From 639e12d4e30a89abb5010be4f65ef58c0df39f20 Mon Sep 17 00:00:00 2001 From: Christina Holland Date: Thu, 11 Dec 2025 11:52:54 -0800 Subject: [PATCH 1/4] update LiveAPI models --- packages/ai/integration/constants.ts | 6 +-- packages/ai/integration/live.test.ts | 57 +++++++++++++++------------- 2 files changed, 33 insertions(+), 30 deletions(-) diff --git a/packages/ai/integration/constants.ts b/packages/ai/integration/constants.ts index c1bf74770c..60d23b001a 100644 --- a/packages/ai/integration/constants.ts +++ b/packages/ai/integration/constants.ts @@ -43,7 +43,7 @@ function formatConfigAsString(config: { ai: AI; model: string }): string { } const backends: readonly Backend[] = [ - new GoogleAIBackend(), + // new GoogleAIBackend(), new VertexAIBackend('global') ]; @@ -56,8 +56,8 @@ const modelNames: readonly string[] = ['gemini-2.0-flash', 'gemini-2.5-flash']; // The Live API requires a different set of models, and they're different for each backend. const liveModelNames: Map = new Map([ - [BackendType.GOOGLE_AI, ['gemini-live-2.5-flash-preview']], - [BackendType.VERTEX_AI, ['gemini-2.0-flash-live-preview-04-09']] + [BackendType.GOOGLE_AI, ['gemini-2.5-flash-native-audio-preview-09-2025']], + [BackendType.VERTEX_AI, ['gemini-live-2.5-flash-preview-native-audio-09-2025']] ]); /** diff --git a/packages/ai/integration/live.test.ts b/packages/ai/integration/live.test.ts index 2a35795164..b38d4fd7f4 100644 --- a/packages/ai/integration/live.test.ts +++ b/packages/ai/integration/live.test.ts @@ -28,7 +28,7 @@ import { liveTestConfigs } from './constants'; import { HELLO_AUDIO_PCM_BASE64 } from './sample-data/hello-audio'; // A helper function to consume the generator and collect text parts from one turn. -async function nextTurnText( +async function nextTurnValue( stream: AsyncGenerator< LiveServerContent | LiveServerToolCall | LiveServerToolCallCancellation > @@ -38,6 +38,7 @@ async function nextTurnText( // We want to keep the generator open so that we can pass it to this function again to get the // next turn's text. let result = await stream.next(); + console.log('result', result); while (!result.done) { const chunk = result.value as | LiveServerContent @@ -73,35 +74,37 @@ async function nextTurnText( describe('Live', function () { this.timeout(20000); - const textLiveGenerationConfig: LiveGenerationConfig = { - responseModalities: [ResponseModality.TEXT], - temperature: 0, - topP: 0 + const audioLiveGenerationConfig: LiveGenerationConfig = { + responseModalities: [ResponseModality.AUDIO] }; liveTestConfigs.forEach(testConfig => { describe(`${testConfig.toString()}`, () => { describe('Live', () => { - it('should connect, send a message, receive a response, and close', async () => { + it.only('should connect, send a message, receive a response, and close', async () => { const model = getLiveGenerativeModel(testConfig.ai, { model: testConfig.model, - generationConfig: textLiveGenerationConfig + generationConfig: audioLiveGenerationConfig }); const session = await model.connect(); - const responsePromise = nextTurnText(session.receive()); - await session.send( - 'Where is Google headquarters located? Answer with the city name only.' - ); - const responseText = await responsePromise; - expect(responseText).to.exist; - expect(responseText).to.include('Mountain View'); + const responsePromise = nextTurnValue(session.receive()); + await session.send([ + { + inlineData: { + data: HELLO_AUDIO_PCM_BASE64, + mimeType: 'audio/pcm' + } + } + ]); + const responseValue = await responsePromise; + expect(responseValue).to.exist; await session.close(); }); it('should handle multiple messages in a session', async () => { const model = getLiveGenerativeModel(testConfig.ai, { model: testConfig.model, - generationConfig: textLiveGenerationConfig + generationConfig: audioLiveGenerationConfig }); const session = await model.connect(); const generator = session.receive(); @@ -110,7 +113,7 @@ describe('Live', function () { 'Where is Google headquarters located? Answer with the city name only.' ); - const responsePromise1 = nextTurnText(generator); + const responsePromise1 = nextTurnValue(generator); const responseText1 = await responsePromise1; // Wait for the turn to complete expect(responseText1).to.include('Mountain View'); @@ -118,7 +121,7 @@ describe('Live', function () { 'What state is that in? Answer with the state name only.' ); - const responsePromise2 = nextTurnText(generator); + const responsePromise2 = nextTurnValue(generator); const responseText2 = await responsePromise2; // Wait for the second turn to complete expect(responseText2).to.include('California'); @@ -154,10 +157,10 @@ describe('Live', function () { it('should send a single text chunk and receive a response', async () => { const model = getLiveGenerativeModel(testConfig.ai, { model: testConfig.model, - generationConfig: textLiveGenerationConfig + generationConfig: audioLiveGenerationConfig }); const session = await model.connect(); - const responsePromise = nextTurnText(session.receive()); + const responsePromise = nextTurnValue(session.receive()); await session.sendTextRealtime('Are you an AI? Yes or No.'); @@ -172,10 +175,10 @@ describe('Live', function () { it('should send a single audio chunk and receive a response', async () => { const model = getLiveGenerativeModel(testConfig.ai, { model: testConfig.model, - generationConfig: textLiveGenerationConfig + generationConfig: audioLiveGenerationConfig }); const session = await model.connect(); - const responsePromise = nextTurnText(session.receive()); + const responsePromise = nextTurnValue(session.receive()); await session.sendAudioRealtime({ data: HELLO_AUDIO_PCM_BASE64, // "Hey, can you hear me?" @@ -193,10 +196,10 @@ describe('Live', function () { it('should send a single audio chunk and receive a response', async () => { const model = getLiveGenerativeModel(testConfig.ai, { model: testConfig.model, - generationConfig: textLiveGenerationConfig + generationConfig: audioLiveGenerationConfig }); const session = await model.connect(); - const responsePromise = nextTurnText(session.receive()); + const responsePromise = nextTurnValue(session.receive()); await session.sendMediaChunks([ { @@ -214,10 +217,10 @@ describe('Live', function () { it('should send multiple audio chunks in a single batch call', async () => { const model = getLiveGenerativeModel(testConfig.ai, { model: testConfig.model, - generationConfig: textLiveGenerationConfig + generationConfig: audioLiveGenerationConfig }); const session = await model.connect(); - const responsePromise = nextTurnText(session.receive()); + const responsePromise = nextTurnValue(session.receive()); // TODO (dlarocque): Pass two PCM files with different audio, and validate that the model // heard both. @@ -237,10 +240,10 @@ describe('Live', function () { it('should consume a stream with multiple chunks and receive a response', async () => { const model = getLiveGenerativeModel(testConfig.ai, { model: testConfig.model, - generationConfig: textLiveGenerationConfig + generationConfig: audioLiveGenerationConfig }); const session = await model.connect(); - const responsePromise = nextTurnText(session.receive()); + const responsePromise = nextTurnValue(session.receive()); // TODO (dlarocque): Pass two PCM files with different audio, and validate that the model // heard both. From 4ddb5f0f221e25d70093bf0b44b7019c118bd6e1 Mon Sep 17 00:00:00 2001 From: Christina Holland Date: Thu, 11 Dec 2025 17:34:33 -0800 Subject: [PATCH 2/4] update integration tests --- packages/ai/integration/constants.ts | 12 ++- packages/ai/integration/live.test.ts | 131 +++++++++++++++++---------- 2 files changed, 91 insertions(+), 52 deletions(-) diff --git a/packages/ai/integration/constants.ts b/packages/ai/integration/constants.ts index 60d23b001a..fc8b00146a 100644 --- a/packages/ai/integration/constants.ts +++ b/packages/ai/integration/constants.ts @@ -43,10 +43,18 @@ function formatConfigAsString(config: { ai: AI; model: string }): string { } const backends: readonly Backend[] = [ - // new GoogleAIBackend(), + new GoogleAIBackend(), new VertexAIBackend('global') ]; +/** + * Vertex Live API only works on us-central1 at the moment. + */ +const liveBackends: readonly Backend[] = [ + new GoogleAIBackend(), + new VertexAIBackend('us-central1') +]; + const backendNames: Map = new Map([ [BackendType.GOOGLE_AI, 'Google AI'], [BackendType.VERTEX_AI, 'Vertex AI'] @@ -78,7 +86,7 @@ export const testConfigs: readonly TestConfig[] = backends.flatMap(backend => { /** * Test configurations used for the Live API integration tests. */ -export const liveTestConfigs: readonly TestConfig[] = backends.flatMap( +export const liveTestConfigs: readonly TestConfig[] = liveBackends.flatMap( backend => { const testConfigs: TestConfig[] = []; liveModelNames.get(backend.backendType)!.forEach(modelName => { diff --git a/packages/ai/integration/live.test.ts b/packages/ai/integration/live.test.ts index b38d4fd7f4..be0e7c6766 100644 --- a/packages/ai/integration/live.test.ts +++ b/packages/ai/integration/live.test.ts @@ -28,17 +28,22 @@ import { liveTestConfigs } from './constants'; import { HELLO_AUDIO_PCM_BASE64 } from './sample-data/hello-audio'; // A helper function to consume the generator and collect text parts from one turn. -async function nextTurnValue( +async function nextTurnData( stream: AsyncGenerator< LiveServerContent | LiveServerToolCall | LiveServerToolCallCancellation > -): Promise { +): Promise<{ + text: string; + hasAudioData: boolean; + hasThinking: boolean; +}> { let text = ''; + let hasAudioData = false; + let hasThinking = false; // We don't use `for await...of` on the generator, because that would automatically close the generator. // We want to keep the generator open so that we can pass it to this function again to get the // next turn's text. let result = await stream.next(); - console.log('result', result); while (!result.done) { const chunk = result.value as | LiveServerContent @@ -47,14 +52,25 @@ async function nextTurnValue( switch (chunk.type) { case 'serverContent': if (chunk.turnComplete) { - return text; + return { + text, + hasAudioData, + hasThinking + }; } const parts = chunk.modelTurn?.parts; if (parts) { parts.forEach(part => { if (part.text) { + if (part.thought) { + hasThinking = true; + } text += part.text; + } else if (part.inlineData) { + if (part.inlineData.mimeType.startsWith('audio')) { + hasAudioData = true; + } } else { throw Error(`Expected TextPart but got ${JSON.stringify(part)}`); } @@ -68,43 +84,46 @@ async function nextTurnValue( result = await stream.next(); } - return text; + return { + text, + hasAudioData, + hasThinking + }; } describe('Live', function () { this.timeout(20000); - const audioLiveGenerationConfig: LiveGenerationConfig = { - responseModalities: [ResponseModality.AUDIO] + const textLiveGenerationConfig: LiveGenerationConfig = { + responseModalities: [ResponseModality.AUDIO], + temperature: 0, + topP: 0 }; liveTestConfigs.forEach(testConfig => { describe(`${testConfig.toString()}`, () => { describe('Live', () => { - it.only('should connect, send a message, receive a response, and close', async () => { + it('should connect, send a message, receive a response, and close', async () => { const model = getLiveGenerativeModel(testConfig.ai, { model: testConfig.model, - generationConfig: audioLiveGenerationConfig + generationConfig: textLiveGenerationConfig }); const session = await model.connect(); - const responsePromise = nextTurnValue(session.receive()); - await session.send([ - { - inlineData: { - data: HELLO_AUDIO_PCM_BASE64, - mimeType: 'audio/pcm' - } - } - ]); - const responseValue = await responsePromise; - expect(responseValue).to.exist; + const responsePromise = nextTurnData(session.receive()); + await session.send( + 'Where is Google headquarters located? Answer with the city name only.' + ); + const responseData = await responsePromise; + expect(responseData).to.exist; + expect(responseData.hasAudioData).to.be + .true; await session.close(); }); it('should handle multiple messages in a session', async () => { const model = getLiveGenerativeModel(testConfig.ai, { model: testConfig.model, - generationConfig: audioLiveGenerationConfig + generationConfig: textLiveGenerationConfig }); const session = await model.connect(); const generator = session.receive(); @@ -113,24 +132,27 @@ describe('Live', function () { 'Where is Google headquarters located? Answer with the city name only.' ); - const responsePromise1 = nextTurnValue(generator); - const responseText1 = await responsePromise1; // Wait for the turn to complete - expect(responseText1).to.include('Mountain View'); + const responsePromise1 = nextTurnData(generator); + const responseData1 = await responsePromise1; // Wait for the turn to complete + expect(responseData1.hasAudioData).to.be + .true; await session.send( 'What state is that in? Answer with the state name only.' ); - const responsePromise2 = nextTurnValue(generator); - const responseText2 = await responsePromise2; // Wait for the second turn to complete - expect(responseText2).to.include('California'); + const responsePromise2 = nextTurnData(generator); + const responseData2 = await responsePromise2; // Wait for the second turn to complete + expect(responseData2.hasAudioData).to.be + .true; await session.close(); }); it('close() should be idempotent and terminate the stream', async () => { const model = getLiveGenerativeModel(testConfig.ai, { - model: testConfig.model + model: testConfig.model, + generationConfig: textLiveGenerationConfig }); const session = await model.connect(); const generator = session.receive(); @@ -157,15 +179,16 @@ describe('Live', function () { it('should send a single text chunk and receive a response', async () => { const model = getLiveGenerativeModel(testConfig.ai, { model: testConfig.model, - generationConfig: audioLiveGenerationConfig + generationConfig: textLiveGenerationConfig }); const session = await model.connect(); - const responsePromise = nextTurnValue(session.receive()); + const responsePromise = nextTurnData(session.receive()); await session.sendTextRealtime('Are you an AI? Yes or No.'); - const responseText = await responsePromise; - expect(responseText).to.include('Yes'); + const responseData = await responsePromise; + expect(responseData.hasAudioData).to.be + .true; await session.close(); }); @@ -175,18 +198,19 @@ describe('Live', function () { it('should send a single audio chunk and receive a response', async () => { const model = getLiveGenerativeModel(testConfig.ai, { model: testConfig.model, - generationConfig: audioLiveGenerationConfig + generationConfig: textLiveGenerationConfig }); const session = await model.connect(); - const responsePromise = nextTurnValue(session.receive()); + const responsePromise = nextTurnData(session.receive()); await session.sendAudioRealtime({ data: HELLO_AUDIO_PCM_BASE64, // "Hey, can you hear me?" mimeType: 'audio/pcm' }); - const responseText = await responsePromise; - expect(responseText).to.include('Yes'); + const responseData = await responsePromise; + expect(responseData.hasAudioData).to.be + .true; await session.close(); }); @@ -196,10 +220,10 @@ describe('Live', function () { it('should send a single audio chunk and receive a response', async () => { const model = getLiveGenerativeModel(testConfig.ai, { model: testConfig.model, - generationConfig: audioLiveGenerationConfig + generationConfig: textLiveGenerationConfig }); const session = await model.connect(); - const responsePromise = nextTurnValue(session.receive()); + const responsePromise = nextTurnData(session.receive()); await session.sendMediaChunks([ { @@ -208,8 +232,9 @@ describe('Live', function () { } ]); - const responseText = await responsePromise; - expect(responseText).to.include('Yes'); + const responseData = await responsePromise; + expect(responseData.hasAudioData).to.be + .true; await session.close(); }); @@ -217,10 +242,10 @@ describe('Live', function () { it('should send multiple audio chunks in a single batch call', async () => { const model = getLiveGenerativeModel(testConfig.ai, { model: testConfig.model, - generationConfig: audioLiveGenerationConfig + generationConfig: textLiveGenerationConfig }); const session = await model.connect(); - const responsePromise = nextTurnValue(session.receive()); + const responsePromise = nextTurnData(session.receive()); // TODO (dlarocque): Pass two PCM files with different audio, and validate that the model // heard both. @@ -229,8 +254,11 @@ describe('Live', function () { { data: HELLO_AUDIO_PCM_BASE64, mimeType: 'audio/pcm' } ]); - const responseText = await responsePromise; - expect(responseText).to.include('Yes'); + const responseData = await responsePromise; + // Sometimes it responds with only thinking. Developer API may + // have trouble handling the double audio? + expect(responseData.hasAudioData || responseData.hasThinking).to.be + .true; await session.close(); }); @@ -240,10 +268,10 @@ describe('Live', function () { it('should consume a stream with multiple chunks and receive a response', async () => { const model = getLiveGenerativeModel(testConfig.ai, { model: testConfig.model, - generationConfig: audioLiveGenerationConfig + generationConfig: textLiveGenerationConfig }); const session = await model.connect(); - const responsePromise = nextTurnValue(session.receive()); + const responsePromise = nextTurnData(session.receive()); // TODO (dlarocque): Pass two PCM files with different audio, and validate that the model // heard both. @@ -262,8 +290,11 @@ describe('Live', function () { }); await session.sendMediaStream(testStream); - const responseText = await responsePromise; - expect(responseText).to.include('Yes'); + const responseData = await responsePromise; + // Sometimes it responds with only thinking. Developer API may + // have trouble handling the double audio? + expect(responseData.hasAudioData || responseData.hasThinking).to.be + .true; await session.close(); }); @@ -403,8 +434,8 @@ describe('Live', function () { // Send a message that should trigger a function call to fetchWeather await session.send('Whats the weather on June 15, 2025 in Toronto?'); - const finalResponseText = await streamPromise; - expect(finalResponseText).to.include('22'); // Should include the result of our function call + const finalresponseData = await streamPromise; + expect(finalresponseData).to.include('22'); // Should include the result of our function call await session.close(); }); From edf262f3537135141dece6e8b8988e2f744b7cdd Mon Sep 17 00:00:00 2001 From: Christina Holland Date: Thu, 11 Dec 2025 17:46:06 -0800 Subject: [PATCH 3/4] Small changes from Gemini PR comments --- packages/ai/integration/live.test.ts | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/packages/ai/integration/live.test.ts b/packages/ai/integration/live.test.ts index be0e7c6766..f8237658ee 100644 --- a/packages/ai/integration/live.test.ts +++ b/packages/ai/integration/live.test.ts @@ -72,7 +72,11 @@ async function nextTurnData( hasAudioData = true; } } else { - throw Error(`Expected TextPart but got ${JSON.stringify(part)}`); + throw Error( + `Expected TextPart or InlineDataPart but got ${JSON.stringify( + part + )}` + ); } }); } @@ -116,8 +120,7 @@ describe('Live', function () { ); const responseData = await responsePromise; expect(responseData).to.exist; - expect(responseData.hasAudioData).to.be - .true; + expect(responseData.hasAudioData).to.be.true; await session.close(); }); it('should handle multiple messages in a session', async () => { @@ -134,8 +137,7 @@ describe('Live', function () { const responsePromise1 = nextTurnData(generator); const responseData1 = await responsePromise1; // Wait for the turn to complete - expect(responseData1.hasAudioData).to.be - .true; + expect(responseData1.hasAudioData).to.be.true; await session.send( 'What state is that in? Answer with the state name only.' @@ -143,8 +145,7 @@ describe('Live', function () { const responsePromise2 = nextTurnData(generator); const responseData2 = await responsePromise2; // Wait for the second turn to complete - expect(responseData2.hasAudioData).to.be - .true; + expect(responseData2.hasAudioData).to.be.true; await session.close(); }); @@ -187,8 +188,7 @@ describe('Live', function () { await session.sendTextRealtime('Are you an AI? Yes or No.'); const responseData = await responsePromise; - expect(responseData.hasAudioData).to.be - .true; + expect(responseData.hasAudioData).to.be.true; await session.close(); }); @@ -209,8 +209,7 @@ describe('Live', function () { }); const responseData = await responsePromise; - expect(responseData.hasAudioData).to.be - .true; + expect(responseData.hasAudioData).to.be.true; await session.close(); }); @@ -233,8 +232,7 @@ describe('Live', function () { ]); const responseData = await responsePromise; - expect(responseData.hasAudioData).to.be - .true; + expect(responseData.hasAudioData).to.be.true; await session.close(); }); @@ -434,8 +432,8 @@ describe('Live', function () { // Send a message that should trigger a function call to fetchWeather await session.send('Whats the weather on June 15, 2025 in Toronto?'); - const finalresponseData = await streamPromise; - expect(finalresponseData).to.include('22'); // Should include the result of our function call + const finalResponseData = await streamPromise; + expect(finalResponseData).to.include('22'); // Should include the result of our function call await session.close(); }); From 5bc3a6611f0d129dadf11fe6be8844c255285c1e Mon Sep 17 00:00:00 2001 From: Christina Holland Date: Thu, 11 Dec 2025 17:47:48 -0800 Subject: [PATCH 4/4] yarn format --- packages/ai/integration/constants.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/ai/integration/constants.ts b/packages/ai/integration/constants.ts index fc8b00146a..87208f77ee 100644 --- a/packages/ai/integration/constants.ts +++ b/packages/ai/integration/constants.ts @@ -65,7 +65,10 @@ const modelNames: readonly string[] = ['gemini-2.0-flash', 'gemini-2.5-flash']; // The Live API requires a different set of models, and they're different for each backend. const liveModelNames: Map = new Map([ [BackendType.GOOGLE_AI, ['gemini-2.5-flash-native-audio-preview-09-2025']], - [BackendType.VERTEX_AI, ['gemini-live-2.5-flash-preview-native-audio-09-2025']] + [ + BackendType.VERTEX_AI, + ['gemini-live-2.5-flash-preview-native-audio-09-2025'] + ] ]); /**