firebase · hsubox76 · Dec 11, 2025 · Dec 12, 2025 · Dec 12, 2025 · Dec 12, 2025
@@ -47,6 +47,14 @@ const backends: readonly Backend[] = [
   new VertexAIBackend('global')
 ];
 
+/**
+ * Vertex Live API only works on us-central1 at the moment.
+ */
+const liveBackends: readonly Backend[] = [
+  new GoogleAIBackend(),
+  new VertexAIBackend('us-central1')
+];
+
 const backendNames: Map<BackendType, string> = new Map([
   [BackendType.GOOGLE_AI, 'Google AI'],
   [BackendType.VERTEX_AI, 'Vertex AI']
@@ -56,8 +64,11 @@ const modelNames: readonly string[] = ['gemini-2.0-flash', 'gemini-2.5-flash'];
 
 // The Live API requires a different set of models, and they're different for each backend.
 const liveModelNames: Map<BackendType, string[]> = new Map([
-  [BackendType.GOOGLE_AI, ['gemini-live-2.5-flash-preview']],
-  [BackendType.VERTEX_AI, ['gemini-2.0-flash-live-preview-04-09']]
+  [BackendType.GOOGLE_AI, ['gemini-2.5-flash-native-audio-preview-09-2025']],
+  [
+    BackendType.VERTEX_AI,
+    ['gemini-live-2.5-flash-preview-native-audio-09-2025']
+  ]
 ]);
 
 /**
@@ -78,7 +89,7 @@ export const testConfigs: readonly TestConfig[] = backends.flatMap(backend => {
 /**
  * Test configurations used for the Live API integration tests.
  */
-export const liveTestConfigs: readonly TestConfig[] = backends.flatMap(
+export const liveTestConfigs: readonly TestConfig[] = liveBackends.flatMap(
   backend => {
     const testConfigs: TestConfig[] = [];
     liveModelNames.get(backend.backendType)!.forEach(modelName => {

@@ -28,12 +28,18 @@ import { liveTestConfigs } from './constants';
 import { HELLO_AUDIO_PCM_BASE64 } from './sample-data/hello-audio';
 
 // A helper function to consume the generator and collect text parts from one turn.
-async function nextTurnText(
+async function nextTurnData(
   stream: AsyncGenerator<
     LiveServerContent | LiveServerToolCall | LiveServerToolCallCancellation
   >
-): Promise<string> {
+): Promise<{
+  text: string;
+  hasAudioData: boolean;
+  hasThinking: boolean;
+}> {
   let text = '';
+  let hasAudioData = false;
+  let hasThinking = false;
   // We don't use `for await...of` on the generator, because that would automatically close the generator.
   // We want to keep the generator open so that we can pass it to this function again to get the
   // next turn's text.
@@ -46,16 +52,31 @@ async function nextTurnText(
     switch (chunk.type) {
       case 'serverContent':
         if (chunk.turnComplete) {
-          return text;
+          return {
+            text,
+            hasAudioData,
+            hasThinking
+          };
         }
 
         const parts = chunk.modelTurn?.parts;
         if (parts) {
           parts.forEach(part => {
             if (part.text) {
+              if (part.thought) {
+                hasThinking = true;
+              }
               text += part.text;
+            } else if (part.inlineData) {
+              if (part.inlineData.mimeType.startsWith('audio')) {
+                hasAudioData = true;
+              }
             } else {
-              throw Error(`Expected TextPart but got ${JSON.stringify(part)}`);
+              throw Error(
+                `Expected TextPart or InlineDataPart but got ${JSON.stringify(
+                  part
+                )}`
+              );
             }
           });
         }
@@ -67,14 +88,18 @@ async function nextTurnText(
     result = await stream.next();
   }
 
-  return text;
+  return {
+    text,
+    hasAudioData,
+    hasThinking
+  };
 }
 
 describe('Live', function () {
   this.timeout(20000);
 
   const textLiveGenerationConfig: LiveGenerationConfig = {
-    responseModalities: [ResponseModality.TEXT],
+    responseModalities: [ResponseModality.AUDIO],
     temperature: 0,
     topP: 0
   };
@@ -89,13 +114,13 @@ describe('Live', function () {
           });
 
           const session = await model.connect();
-          const responsePromise = nextTurnText(session.receive());
+          const responsePromise = nextTurnData(session.receive());
           await session.send(
             'Where is Google headquarters located? Answer with the city name only.'
           );
-          const responseText = await responsePromise;
-          expect(responseText).to.exist;
-          expect(responseText).to.include('Mountain View');
+          const responseData = await responsePromise;
+          expect(responseData).to.exist;
+          expect(responseData.hasAudioData).to.be.true;
           await session.close();
         });
         it('should handle multiple messages in a session', async () => {
@@ -110,24 +135,25 @@ describe('Live', function () {
             'Where is Google headquarters located? Answer with the city name only.'
           );
 
-          const responsePromise1 = nextTurnText(generator);
-          const responseText1 = await responsePromise1; // Wait for the turn to complete
-          expect(responseText1).to.include('Mountain View');
+          const responsePromise1 = nextTurnData(generator);
+          const responseData1 = await responsePromise1; // Wait for the turn to complete
+          expect(responseData1.hasAudioData).to.be.true;
 
           await session.send(
             'What state is that in? Answer with the state name only.'
           );
 
-          const responsePromise2 = nextTurnText(generator);
-          const responseText2 = await responsePromise2; // Wait for the second turn to complete
-          expect(responseText2).to.include('California');
+          const responsePromise2 = nextTurnData(generator);
+          const responseData2 = await responsePromise2; // Wait for the second turn to complete
+          expect(responseData2.hasAudioData).to.be.true;
 
           await session.close();
         });
 
         it('close() should be idempotent and terminate the stream', async () => {
           const model = getLiveGenerativeModel(testConfig.ai, {
-            model: testConfig.model
+            model: testConfig.model,
+            generationConfig: textLiveGenerationConfig
           });
           const session = await model.connect();
           const generator = session.receive();
@@ -157,12 +183,12 @@ describe('Live', function () {
             generationConfig: textLiveGenerationConfig
           });
           const session = await model.connect();
-          const responsePromise = nextTurnText(session.receive());
+          const responsePromise = nextTurnData(session.receive());
 
           await session.sendTextRealtime('Are you an AI? Yes or No.');
 
-          const responseText = await responsePromise;
-          expect(responseText).to.include('Yes');
+          const responseData = await responsePromise;
+          expect(responseData.hasAudioData).to.be.true;
 
           await session.close();
         });
@@ -175,15 +201,15 @@ describe('Live', function () {
             generationConfig: textLiveGenerationConfig
           });
           const session = await model.connect();
-          const responsePromise = nextTurnText(session.receive());
+          const responsePromise = nextTurnData(session.receive());
 
           await session.sendAudioRealtime({
             data: HELLO_AUDIO_PCM_BASE64, // "Hey, can you hear me?"
             mimeType: 'audio/pcm'
           });
 
-          const responseText = await responsePromise;
-          expect(responseText).to.include('Yes');
+          const responseData = await responsePromise;
+          expect(responseData.hasAudioData).to.be.true;
 
           await session.close();
         });
@@ -196,7 +222,7 @@ describe('Live', function () {
             generationConfig: textLiveGenerationConfig
           });
           const session = await model.connect();
-          const responsePromise = nextTurnText(session.receive());
+          const responsePromise = nextTurnData(session.receive());
 
           await session.sendMediaChunks([
             {
@@ -205,8 +231,8 @@ describe('Live', function () {
             }
           ]);
 
-          const responseText = await responsePromise;
-          expect(responseText).to.include('Yes');
+          const responseData = await responsePromise;
+          expect(responseData.hasAudioData).to.be.true;
 
           await session.close();
         });
@@ -217,7 +243,7 @@ describe('Live', function () {
             generationConfig: textLiveGenerationConfig
           });
           const session = await model.connect();
-          const responsePromise = nextTurnText(session.receive());
+          const responsePromise = nextTurnData(session.receive());
 
           // TODO (dlarocque): Pass two PCM files with different audio, and validate that the model
           // heard both.
@@ -226,8 +252,11 @@ describe('Live', function () {
             { data: HELLO_AUDIO_PCM_BASE64, mimeType: 'audio/pcm' }
           ]);
 
-          const responseText = await responsePromise;
-          expect(responseText).to.include('Yes');
+          const responseData = await responsePromise;
+          // Sometimes it responds with only thinking. Developer API may
+          // have trouble handling the double audio?
+          expect(responseData.hasAudioData || responseData.hasThinking).to.be
+            .true;
 
           await session.close();
         });
@@ -240,7 +269,7 @@ describe('Live', function () {
             generationConfig: textLiveGenerationConfig
           });
           const session = await model.connect();
-          const responsePromise = nextTurnText(session.receive());
+          const responsePromise = nextTurnData(session.receive());
 
           // TODO (dlarocque): Pass two PCM files with different audio, and validate that the model
           // heard both.
@@ -259,8 +288,11 @@ describe('Live', function () {
           });
 
           await session.sendMediaStream(testStream);
-          const responseText = await responsePromise;
-          expect(responseText).to.include('Yes');
+          const responseData = await responsePromise;
+          // Sometimes it responds with only thinking. Developer API may
+          // have trouble handling the double audio?
+          expect(responseData.hasAudioData || responseData.hasThinking).to.be
+            .true;
 
           await session.close();
         });
@@ -400,8 +432,8 @@ describe('Live', function () {
           // Send a message that should trigger a function call to fetchWeather
           await session.send('Whats the weather on June 15, 2025 in Toronto?');
 
-          const finalResponseText = await streamPromise;
-          expect(finalResponseText).to.include('22'); // Should include the result of our function call
+          const finalResponseData = await streamPromise;
+          expect(finalResponseData).to.include('22'); // Should include the result of our function call
 
           await session.close();
         });