Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions packages/ai/integration/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,14 @@ const backends: readonly Backend[] = [
new VertexAIBackend('global')
];

/**
* Vertex Live API only works on us-central1 at the moment.
*/
const liveBackends: readonly Backend[] = [
new GoogleAIBackend(),
new VertexAIBackend('us-central1')
];

const backendNames: Map<BackendType, string> = new Map([
[BackendType.GOOGLE_AI, 'Google AI'],
[BackendType.VERTEX_AI, 'Vertex AI']
Expand All @@ -56,8 +64,11 @@ const modelNames: readonly string[] = ['gemini-2.0-flash', 'gemini-2.5-flash'];

// The Live API requires a different set of models, and they're different for each backend.
const liveModelNames: Map<BackendType, string[]> = new Map([
[BackendType.GOOGLE_AI, ['gemini-live-2.5-flash-preview']],
[BackendType.VERTEX_AI, ['gemini-2.0-flash-live-preview-04-09']]
[BackendType.GOOGLE_AI, ['gemini-2.5-flash-native-audio-preview-09-2025']],
[
BackendType.VERTEX_AI,
['gemini-live-2.5-flash-preview-native-audio-09-2025']
]
]);

/**
Expand All @@ -78,7 +89,7 @@ export const testConfigs: readonly TestConfig[] = backends.flatMap(backend => {
/**
* Test configurations used for the Live API integration tests.
*/
export const liveTestConfigs: readonly TestConfig[] = backends.flatMap(
export const liveTestConfigs: readonly TestConfig[] = liveBackends.flatMap(
backend => {
const testConfigs: TestConfig[] = [];
liveModelNames.get(backend.backendType)!.forEach(modelName => {
Expand Down
100 changes: 66 additions & 34 deletions packages/ai/integration/live.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,18 @@ import { liveTestConfigs } from './constants';
import { HELLO_AUDIO_PCM_BASE64 } from './sample-data/hello-audio';

// A helper function to consume the generator and collect text parts from one turn.
async function nextTurnText(
async function nextTurnData(
stream: AsyncGenerator<
LiveServerContent | LiveServerToolCall | LiveServerToolCallCancellation
>
): Promise<string> {
): Promise<{
text: string;
hasAudioData: boolean;
hasThinking: boolean;
}> {
let text = '';
let hasAudioData = false;
let hasThinking = false;
// We don't use `for await...of` on the generator, because that would automatically close the generator.
// We want to keep the generator open so that we can pass it to this function again to get the
// next turn's text.
Expand All @@ -46,16 +52,31 @@ async function nextTurnText(
switch (chunk.type) {
case 'serverContent':
if (chunk.turnComplete) {
return text;
return {
text,
hasAudioData,
hasThinking
};
}

const parts = chunk.modelTurn?.parts;
if (parts) {
parts.forEach(part => {
if (part.text) {
if (part.thought) {
hasThinking = true;
}
text += part.text;
} else if (part.inlineData) {
if (part.inlineData.mimeType.startsWith('audio')) {
hasAudioData = true;
}
} else {
throw Error(`Expected TextPart but got ${JSON.stringify(part)}`);
throw Error(
`Expected TextPart or InlineDataPart but got ${JSON.stringify(
part
)}`
);
}
});
}
Expand All @@ -67,14 +88,18 @@ async function nextTurnText(
result = await stream.next();
}

return text;
return {
text,
hasAudioData,
hasThinking
};
}

describe('Live', function () {
this.timeout(20000);

const textLiveGenerationConfig: LiveGenerationConfig = {
responseModalities: [ResponseModality.TEXT],
responseModalities: [ResponseModality.AUDIO],
temperature: 0,
topP: 0
};
Expand All @@ -89,13 +114,13 @@ describe('Live', function () {
});

const session = await model.connect();
const responsePromise = nextTurnText(session.receive());
const responsePromise = nextTurnData(session.receive());
await session.send(
'Where is Google headquarters located? Answer with the city name only.'
);
const responseText = await responsePromise;
expect(responseText).to.exist;
expect(responseText).to.include('Mountain View');
const responseData = await responsePromise;
expect(responseData).to.exist;
expect(responseData.hasAudioData).to.be.true;
await session.close();
});
it('should handle multiple messages in a session', async () => {
Expand All @@ -110,24 +135,25 @@ describe('Live', function () {
'Where is Google headquarters located? Answer with the city name only.'
);

const responsePromise1 = nextTurnText(generator);
const responseText1 = await responsePromise1; // Wait for the turn to complete
expect(responseText1).to.include('Mountain View');
const responsePromise1 = nextTurnData(generator);
const responseData1 = await responsePromise1; // Wait for the turn to complete
expect(responseData1.hasAudioData).to.be.true;

await session.send(
'What state is that in? Answer with the state name only.'
);

const responsePromise2 = nextTurnText(generator);
const responseText2 = await responsePromise2; // Wait for the second turn to complete
expect(responseText2).to.include('California');
const responsePromise2 = nextTurnData(generator);
const responseData2 = await responsePromise2; // Wait for the second turn to complete
expect(responseData2.hasAudioData).to.be.true;

await session.close();
});

it('close() should be idempotent and terminate the stream', async () => {
const model = getLiveGenerativeModel(testConfig.ai, {
model: testConfig.model
model: testConfig.model,
generationConfig: textLiveGenerationConfig
});
const session = await model.connect();
const generator = session.receive();
Expand Down Expand Up @@ -157,12 +183,12 @@ describe('Live', function () {
generationConfig: textLiveGenerationConfig
});
const session = await model.connect();
const responsePromise = nextTurnText(session.receive());
const responsePromise = nextTurnData(session.receive());

await session.sendTextRealtime('Are you an AI? Yes or No.');

const responseText = await responsePromise;
expect(responseText).to.include('Yes');
const responseData = await responsePromise;
expect(responseData.hasAudioData).to.be.true;

await session.close();
});
Expand All @@ -175,15 +201,15 @@ describe('Live', function () {
generationConfig: textLiveGenerationConfig
});
const session = await model.connect();
const responsePromise = nextTurnText(session.receive());
const responsePromise = nextTurnData(session.receive());

await session.sendAudioRealtime({
data: HELLO_AUDIO_PCM_BASE64, // "Hey, can you hear me?"
mimeType: 'audio/pcm'
});

const responseText = await responsePromise;
expect(responseText).to.include('Yes');
const responseData = await responsePromise;
expect(responseData.hasAudioData).to.be.true;

await session.close();
});
Expand All @@ -196,7 +222,7 @@ describe('Live', function () {
generationConfig: textLiveGenerationConfig
});
const session = await model.connect();
const responsePromise = nextTurnText(session.receive());
const responsePromise = nextTurnData(session.receive());

await session.sendMediaChunks([
{
Expand All @@ -205,8 +231,8 @@ describe('Live', function () {
}
]);

const responseText = await responsePromise;
expect(responseText).to.include('Yes');
const responseData = await responsePromise;
expect(responseData.hasAudioData).to.be.true;

await session.close();
});
Expand All @@ -217,7 +243,7 @@ describe('Live', function () {
generationConfig: textLiveGenerationConfig
});
const session = await model.connect();
const responsePromise = nextTurnText(session.receive());
const responsePromise = nextTurnData(session.receive());

// TODO (dlarocque): Pass two PCM files with different audio, and validate that the model
// heard both.
Expand All @@ -226,8 +252,11 @@ describe('Live', function () {
{ data: HELLO_AUDIO_PCM_BASE64, mimeType: 'audio/pcm' }
]);

const responseText = await responsePromise;
expect(responseText).to.include('Yes');
const responseData = await responsePromise;
// Sometimes it responds with only thinking. Developer API may
// have trouble handling the double audio?
expect(responseData.hasAudioData || responseData.hasThinking).to.be
.true;

await session.close();
});
Expand All @@ -240,7 +269,7 @@ describe('Live', function () {
generationConfig: textLiveGenerationConfig
});
const session = await model.connect();
const responsePromise = nextTurnText(session.receive());
const responsePromise = nextTurnData(session.receive());

// TODO (dlarocque): Pass two PCM files with different audio, and validate that the model
// heard both.
Expand All @@ -259,8 +288,11 @@ describe('Live', function () {
});

await session.sendMediaStream(testStream);
const responseText = await responsePromise;
expect(responseText).to.include('Yes');
const responseData = await responsePromise;
// Sometimes it responds with only thinking. Developer API may
// have trouble handling the double audio?
expect(responseData.hasAudioData || responseData.hasThinking).to.be
.true;

await session.close();
});
Expand Down Expand Up @@ -400,8 +432,8 @@ describe('Live', function () {
// Send a message that should trigger a function call to fetchWeather
await session.send('Whats the weather on June 15, 2025 in Toronto?');

const finalResponseText = await streamPromise;
expect(finalResponseText).to.include('22'); // Should include the result of our function call
const finalResponseData = await streamPromise;
expect(finalResponseData).to.include('22'); // Should include the result of our function call

await session.close();
});
Expand Down
Loading