diff --git a/google/genai/_live_converters.py b/google/genai/_live_converters.py index e3a242587..33e370565 100644 --- a/google/genai/_live_converters.py +++ b/google/genai/_live_converters.py @@ -292,7 +292,13 @@ def _GenerationConfig_to_vertex( setv(to_object, ['seed'], getv(from_object, ['seed'])) if getv(from_object, ['speech_config']) is not None: - setv(to_object, ['speechConfig'], getv(from_object, ['speech_config'])) + setv( + to_object, + ['speechConfig'], + _SpeechConfig_to_vertex( + getv(from_object, ['speech_config']), to_object + ), + ) if getv(from_object, ['stop_sequences']) is not None: setv(to_object, ['stopSequences'], getv(from_object, ['stop_sequences'])) @@ -959,7 +965,10 @@ def _LiveConnectConfig_to_vertex( setv( parent_object, ['setup', 'generationConfig', 'speechConfig'], - t.t_live_speech_config(getv(from_object, ['speech_config'])), + _SpeechConfig_to_vertex( + t.t_live_speech_config(getv(from_object, ['speech_config'])), + to_object, + ), ) if getv(from_object, ['thinking_config']) is not None: @@ -1391,6 +1400,24 @@ def _LiveServerMessage_from_vertex( return to_object +def _MultiSpeakerVoiceConfig_to_vertex( + from_object: Union[dict[str, Any], object], + parent_object: Optional[dict[str, Any]] = None, +) -> dict[str, Any]: + to_object: dict[str, Any] = {} + if getv(from_object, ['speaker_voice_configs']) is not None: + setv( + to_object, + ['speakerVoiceConfigs'], + [ + _SpeakerVoiceConfig_to_vertex(item, to_object) + for item in getv(from_object, ['speaker_voice_configs']) + ], + ) + + return to_object + + def _Part_to_mldev( from_object: Union[dict[str, Any], object], parent_object: Optional[dict[str, Any]] = None, @@ -1531,6 +1558,32 @@ def _Part_to_vertex( return to_object +def _ReplicatedVoiceConfig_to_vertex( + from_object: Union[dict[str, Any], object], + parent_object: Optional[dict[str, Any]] = None, +) -> dict[str, Any]: + to_object: dict[str, Any] = {} + if getv(from_object, ['mime_type']) is not None: + setv(to_object, ['mimeType'], getv(from_object, ['mime_type'])) + + if getv(from_object, ['voice_sample_audio']) is not None: + setv( + to_object, + ['voiceSampleAudio'], + getv(from_object, ['voice_sample_audio']), + ) + + if getv(from_object, ['consent_audio']) is not None: + raise ValueError('consent_audio parameter is not supported in Vertex AI.') + + if getv(from_object, ['voice_consent_signature']) is not None: + raise ValueError( + 'voice_consent_signature parameter is not supported in Vertex AI.' + ) + + return to_object + + def _SessionResumptionConfig_to_mldev( from_object: Union[dict[str, Any], object], parent_object: Optional[dict[str, Any]] = None, @@ -1545,6 +1598,51 @@ def _SessionResumptionConfig_to_mldev( return to_object +def _SpeakerVoiceConfig_to_vertex( + from_object: Union[dict[str, Any], object], + parent_object: Optional[dict[str, Any]] = None, +) -> dict[str, Any]: + to_object: dict[str, Any] = {} + if getv(from_object, ['speaker']) is not None: + setv(to_object, ['speaker'], getv(from_object, ['speaker'])) + + if getv(from_object, ['voice_config']) is not None: + setv( + to_object, + ['voiceConfig'], + _VoiceConfig_to_vertex(getv(from_object, ['voice_config']), to_object), + ) + + return to_object + + +def _SpeechConfig_to_vertex( + from_object: Union[dict[str, Any], object], + parent_object: Optional[dict[str, Any]] = None, +) -> dict[str, Any]: + to_object: dict[str, Any] = {} + if getv(from_object, ['voice_config']) is not None: + setv( + to_object, + ['voiceConfig'], + _VoiceConfig_to_vertex(getv(from_object, ['voice_config']), to_object), + ) + + if getv(from_object, ['language_code']) is not None: + setv(to_object, ['languageCode'], getv(from_object, ['language_code'])) + + if getv(from_object, ['multi_speaker_voice_config']) is not None: + setv( + to_object, + ['multiSpeakerVoiceConfig'], + _MultiSpeakerVoiceConfig_to_vertex( + getv(from_object, ['multi_speaker_voice_config']), to_object + ), + ) + + return to_object + + def _Tool_to_mldev( from_object: Union[dict[str, Any], object], parent_object: Optional[dict[str, Any]] = None, @@ -1775,3 +1873,27 @@ def _VoiceActivity_from_vertex( setv(to_object, ['voice_activity_type'], getv(from_object, ['type'])) return to_object + + +def _VoiceConfig_to_vertex( + from_object: Union[dict[str, Any], object], + parent_object: Optional[dict[str, Any]] = None, +) -> dict[str, Any]: + to_object: dict[str, Any] = {} + if getv(from_object, ['replicated_voice_config']) is not None: + setv( + to_object, + ['replicatedVoiceConfig'], + _ReplicatedVoiceConfig_to_vertex( + getv(from_object, ['replicated_voice_config']), to_object + ), + ) + + if getv(from_object, ['prebuilt_voice_config']) is not None: + setv( + to_object, + ['prebuiltVoiceConfig'], + getv(from_object, ['prebuilt_voice_config']), + ) + + return to_object diff --git a/google/genai/live.py b/google/genai/live.py index 93953a02f..b35e918ce 100644 --- a/google/genai/live.py +++ b/google/genai/live.py @@ -92,10 +92,12 @@ def __init__( api_client: BaseApiClient, websocket: ClientConnection, session_id: Optional[str] = None, + setup_complete: Optional[types.LiveServerSetupComplete] = None, ): self._api_client = api_client self._ws = websocket self.session_id = session_id + self.setup_complete = setup_complete async def send( self, @@ -1131,12 +1133,15 @@ async def connect( ) if setup_response.setup_complete: session_id = setup_response.setup_complete.session_id + setup_complete = setup_response.setup_complete else: session_id = None + setup_complete = None yield AsyncSession( api_client=self._api_client, websocket=ws, session_id=session_id, + setup_complete=setup_complete, ) diff --git a/google/genai/models.py b/google/genai/models.py index c01e8d686..c932b729c 100644 --- a/google/genai/models.py +++ b/google/genai/models.py @@ -1468,7 +1468,11 @@ def _GenerateContentConfig_to_vertex( setv( to_object, ['speechConfig'], - t.t_speech_config(getv(from_object, ['speech_config'])), + _SpeechConfig_to_vertex( + t.t_speech_config(getv(from_object, ['speech_config'])), + to_object, + root_object, + ), ) if getv(from_object, ['audio_timestamp']) is not None: @@ -2735,7 +2739,13 @@ def _GenerationConfig_to_vertex( setv(to_object, ['seed'], getv(from_object, ['seed'])) if getv(from_object, ['speech_config']) is not None: - setv(to_object, ['speechConfig'], getv(from_object, ['speech_config'])) + setv( + to_object, + ['speechConfig'], + _SpeechConfig_to_vertex( + getv(from_object, ['speech_config']), to_object, root_object + ), + ) if getv(from_object, ['stop_sequences']) is not None: setv(to_object, ['stopSequences'], getv(from_object, ['stop_sequences'])) @@ -3301,6 +3311,25 @@ def _Model_from_vertex( return to_object +def _MultiSpeakerVoiceConfig_to_vertex( + from_object: Union[dict[str, Any], object], + parent_object: Optional[dict[str, Any]] = None, + root_object: Optional[Union[dict[str, Any], object]] = None, +) -> dict[str, Any]: + to_object: dict[str, Any] = {} + if getv(from_object, ['speaker_voice_configs']) is not None: + setv( + to_object, + ['speakerVoiceConfigs'], + [ + _SpeakerVoiceConfig_to_vertex(item, to_object, root_object) + for item in getv(from_object, ['speaker_voice_configs']) + ], + ) + + return to_object + + def _Part_to_mldev( from_object: Union[dict[str, Any], object], parent_object: Optional[dict[str, Any]] = None, @@ -3681,6 +3710,33 @@ def _ReferenceImageAPI_to_vertex( return to_object +def _ReplicatedVoiceConfig_to_vertex( + from_object: Union[dict[str, Any], object], + parent_object: Optional[dict[str, Any]] = None, + root_object: Optional[Union[dict[str, Any], object]] = None, +) -> dict[str, Any]: + to_object: dict[str, Any] = {} + if getv(from_object, ['mime_type']) is not None: + setv(to_object, ['mimeType'], getv(from_object, ['mime_type'])) + + if getv(from_object, ['voice_sample_audio']) is not None: + setv( + to_object, + ['voiceSampleAudio'], + getv(from_object, ['voice_sample_audio']), + ) + + if getv(from_object, ['consent_audio']) is not None: + raise ValueError('consent_audio parameter is not supported in Vertex AI.') + + if getv(from_object, ['voice_consent_signature']) is not None: + raise ValueError( + 'voice_consent_signature parameter is not supported in Vertex AI.' + ) + + return to_object + + def _SafetyAttributes_from_mldev( from_object: Union[dict[str, Any], object], parent_object: Optional[dict[str, Any]] = None, @@ -3883,6 +3939,59 @@ def _SegmentImageSource_to_vertex( return to_object +def _SpeakerVoiceConfig_to_vertex( + from_object: Union[dict[str, Any], object], + parent_object: Optional[dict[str, Any]] = None, + root_object: Optional[Union[dict[str, Any], object]] = None, +) -> dict[str, Any]: + to_object: dict[str, Any] = {} + if getv(from_object, ['speaker']) is not None: + setv(to_object, ['speaker'], getv(from_object, ['speaker'])) + + if getv(from_object, ['voice_config']) is not None: + setv( + to_object, + ['voiceConfig'], + _VoiceConfig_to_vertex( + getv(from_object, ['voice_config']), to_object, root_object + ), + ) + + return to_object + + +def _SpeechConfig_to_vertex( + from_object: Union[dict[str, Any], object], + parent_object: Optional[dict[str, Any]] = None, + root_object: Optional[Union[dict[str, Any], object]] = None, +) -> dict[str, Any]: + to_object: dict[str, Any] = {} + if getv(from_object, ['voice_config']) is not None: + setv( + to_object, + ['voiceConfig'], + _VoiceConfig_to_vertex( + getv(from_object, ['voice_config']), to_object, root_object + ), + ) + + if getv(from_object, ['language_code']) is not None: + setv(to_object, ['languageCode'], getv(from_object, ['language_code'])) + + if getv(from_object, ['multi_speaker_voice_config']) is not None: + setv( + to_object, + ['multiSpeakerVoiceConfig'], + _MultiSpeakerVoiceConfig_to_vertex( + getv(from_object, ['multi_speaker_voice_config']), + to_object, + root_object, + ), + ) + + return to_object + + def _ToolConfig_to_mldev( from_object: Union[dict[str, Any], object], parent_object: Optional[dict[str, Any]] = None, @@ -4481,6 +4590,33 @@ def _Video_to_vertex( return to_object +def _VoiceConfig_to_vertex( + from_object: Union[dict[str, Any], object], + parent_object: Optional[dict[str, Any]] = None, + root_object: Optional[Union[dict[str, Any], object]] = None, +) -> dict[str, Any]: + to_object: dict[str, Any] = {} + if getv(from_object, ['replicated_voice_config']) is not None: + setv( + to_object, + ['replicatedVoiceConfig'], + _ReplicatedVoiceConfig_to_vertex( + getv(from_object, ['replicated_voice_config']), + to_object, + root_object, + ), + ) + + if getv(from_object, ['prebuilt_voice_config']) is not None: + setv( + to_object, + ['prebuiltVoiceConfig'], + getv(from_object, ['prebuilt_voice_config']), + ) + + return to_object + + class Models(_api_module.BaseModule): def _generate_content( diff --git a/google/genai/tests/live/test_live.py b/google/genai/tests/live/test_live.py index 2d59ce900..513a90a3f 100644 --- a/google/genai/tests/live/test_live.py +++ b/google/genai/tests/live/test_live.py @@ -2154,3 +2154,106 @@ async def mock_connect(uri, additional_headers=None, **kwargs): assert capture['headers']['x-goog-api-key'] == 'TEST_API_KEY' assert 'BidiGenerateContent' in capture['uri'] + +@pytest.mark.parametrize('vertexai', [True, False]) +@pytest.mark.asyncio +async def test_async_session_setup_complete_with_voice_consent_signature( + vertexai, +): + mock_ws = AsyncMock() + mock_ws.send = AsyncMock() + mock_ws.recv = AsyncMock( + return_value=( + b'{"setupComplete": {"sessionId": "test_session_id",' + b' "voiceConsentSignature": {"signature": "test_sig_abc123"}}}' + ) + ) + mock_ws.close = AsyncMock() + + mock_google_auth_default = Mock(return_value=(None, None)) + mock_creds = Mock(token='test_token') + mock_google_auth_default.return_value = (mock_creds, None) + + @contextlib.asynccontextmanager + async def mock_connect(uri, additional_headers=None, **kwargs): + yield mock_ws + + @patch('google.auth.default', new=mock_google_auth_default) + @patch.object(live, 'ws_connect', new=mock_connect) + async def _test_connect(): + live_module = live.AsyncLive(mock_api_client(vertexai=vertexai)) + async with live_module.connect(model='test_model') as session: + assert session.setup_complete is not None + assert session.setup_complete.session_id == 'test_session_id' + assert session.setup_complete.voice_consent_signature is not None + assert ( + session.setup_complete.voice_consent_signature.signature + == 'test_sig_abc123' + ) + + await _test_connect() + + +@pytest.mark.parametrize('vertexai', [False]) +@pytest.mark.asyncio +async def test_bidi_setup_replicated_voice_config_with_consent(vertexai): + config = types.LiveConnectConfig( + response_modalities=['AUDIO'], + speech_config=types.SpeechConfig( + voice_config=types.VoiceConfig( + replicated_voice_config=types.ReplicatedVoiceConfig( + mime_type='audio/wav', + voice_sample_audio=b'fake_audio_data', + consent_audio=b'fake_consent_data', + ) + ) + ), + ) + result = await get_connect_message( + mock_api_client(vertexai=vertexai), model='test_model', config=config + ) + + setup = result.get('setup', {}) + gen_config = setup.get('generationConfig', {}) + speech_config = gen_config.get('speechConfig', {}) + voice_config = speech_config.get('voice_config', {}) + replicated = voice_config.get('replicated_voice_config', {}) + + assert replicated.get('mime_type') == 'audio/wav' + assert replicated.get('voice_sample_audio') is not None + assert replicated.get('consent_audio') is not None + + config_with_sig = types.LiveConnectConfig( + response_modalities=['AUDIO'], + speech_config=types.SpeechConfig( + voice_config=types.VoiceConfig( + replicated_voice_config=types.ReplicatedVoiceConfig( + mime_type='audio/wav', + voice_sample_audio=b'fake_audio_data', + voice_consent_signature=types.VoiceConsentSignature( + signature='test_sig_abc123' + ), + ) + ) + ), + ) + result_with_sig = await get_connect_message( + mock_api_client(vertexai=vertexai), + model='test_model', + config=config_with_sig, + ) + + setup_sig = result_with_sig.get('setup', {}) + gen_config_sig = setup_sig.get('generationConfig', {}) + speech_config_sig = gen_config_sig.get('speechConfig', {}) + voice_config_sig = speech_config_sig.get('voice_config', {}) + replicated_sig = voice_config_sig.get('replicated_voice_config', {}) + + assert replicated_sig.get('mime_type') == 'audio/wav' + assert replicated_sig.get('voice_sample_audio') is not None + assert replicated_sig.get('voice_consent_signature') is not None + assert ( + replicated_sig['voice_consent_signature'].get('signature') + == 'test_sig_abc123' + ) + diff --git a/google/genai/tunings.py b/google/genai/tunings.py index d6dfb0642..9566ebb43 100644 --- a/google/genai/tunings.py +++ b/google/genai/tunings.py @@ -832,7 +832,13 @@ def _GenerationConfig_to_vertex( setv(to_object, ['seed'], getv(from_object, ['seed'])) if getv(from_object, ['speech_config']) is not None: - setv(to_object, ['speechConfig'], getv(from_object, ['speech_config'])) + setv( + to_object, + ['speechConfig'], + _SpeechConfig_to_vertex( + getv(from_object, ['speech_config']), to_object, root_object + ), + ) if getv(from_object, ['stop_sequences']) is not None: setv(to_object, ['stopSequences'], getv(from_object, ['stop_sequences'])) @@ -1013,6 +1019,105 @@ def _ListTuningJobsResponse_from_vertex( return to_object +def _MultiSpeakerVoiceConfig_to_vertex( + from_object: Union[dict[str, Any], object], + parent_object: Optional[dict[str, Any]] = None, + root_object: Optional[Union[dict[str, Any], object]] = None, +) -> dict[str, Any]: + to_object: dict[str, Any] = {} + if getv(from_object, ['speaker_voice_configs']) is not None: + setv( + to_object, + ['speakerVoiceConfigs'], + [ + _SpeakerVoiceConfig_to_vertex(item, to_object, root_object) + for item in getv(from_object, ['speaker_voice_configs']) + ], + ) + + return to_object + + +def _ReplicatedVoiceConfig_to_vertex( + from_object: Union[dict[str, Any], object], + parent_object: Optional[dict[str, Any]] = None, + root_object: Optional[Union[dict[str, Any], object]] = None, +) -> dict[str, Any]: + to_object: dict[str, Any] = {} + if getv(from_object, ['mime_type']) is not None: + setv(to_object, ['mimeType'], getv(from_object, ['mime_type'])) + + if getv(from_object, ['voice_sample_audio']) is not None: + setv( + to_object, + ['voiceSampleAudio'], + getv(from_object, ['voice_sample_audio']), + ) + + if getv(from_object, ['consent_audio']) is not None: + raise ValueError('consent_audio parameter is not supported in Vertex AI.') + + if getv(from_object, ['voice_consent_signature']) is not None: + raise ValueError( + 'voice_consent_signature parameter is not supported in Vertex AI.' + ) + + return to_object + + +def _SpeakerVoiceConfig_to_vertex( + from_object: Union[dict[str, Any], object], + parent_object: Optional[dict[str, Any]] = None, + root_object: Optional[Union[dict[str, Any], object]] = None, +) -> dict[str, Any]: + to_object: dict[str, Any] = {} + if getv(from_object, ['speaker']) is not None: + setv(to_object, ['speaker'], getv(from_object, ['speaker'])) + + if getv(from_object, ['voice_config']) is not None: + setv( + to_object, + ['voiceConfig'], + _VoiceConfig_to_vertex( + getv(from_object, ['voice_config']), to_object, root_object + ), + ) + + return to_object + + +def _SpeechConfig_to_vertex( + from_object: Union[dict[str, Any], object], + parent_object: Optional[dict[str, Any]] = None, + root_object: Optional[Union[dict[str, Any], object]] = None, +) -> dict[str, Any]: + to_object: dict[str, Any] = {} + if getv(from_object, ['voice_config']) is not None: + setv( + to_object, + ['voiceConfig'], + _VoiceConfig_to_vertex( + getv(from_object, ['voice_config']), to_object, root_object + ), + ) + + if getv(from_object, ['language_code']) is not None: + setv(to_object, ['languageCode'], getv(from_object, ['language_code'])) + + if getv(from_object, ['multi_speaker_voice_config']) is not None: + setv( + to_object, + ['multiSpeakerVoiceConfig'], + _MultiSpeakerVoiceConfig_to_vertex( + getv(from_object, ['multi_speaker_voice_config']), + to_object, + root_object, + ), + ) + + return to_object + + def _TunedModel_from_mldev( from_object: Union[dict[str, Any], object], parent_object: Optional[dict[str, Any]] = None, @@ -1376,6 +1481,33 @@ def _TuningValidationDataset_to_vertex( return to_object +def _VoiceConfig_to_vertex( + from_object: Union[dict[str, Any], object], + parent_object: Optional[dict[str, Any]] = None, + root_object: Optional[Union[dict[str, Any], object]] = None, +) -> dict[str, Any]: + to_object: dict[str, Any] = {} + if getv(from_object, ['replicated_voice_config']) is not None: + setv( + to_object, + ['replicatedVoiceConfig'], + _ReplicatedVoiceConfig_to_vertex( + getv(from_object, ['replicated_voice_config']), + to_object, + root_object, + ), + ) + + if getv(from_object, ['prebuilt_voice_config']) is not None: + setv( + to_object, + ['prebuiltVoiceConfig'], + getv(from_object, ['prebuilt_voice_config']), + ) + + return to_object + + class Tunings(_api_module.BaseModule): def _get( diff --git a/google/genai/types.py b/google/genai/types.py index 41da322c2..35f5e5ffb 100644 --- a/google/genai/types.py +++ b/google/genai/types.py @@ -4956,32 +4956,85 @@ class ToolConfigDict(TypedDict, total=False): ToolConfigOrDict = Union[ToolConfig, ToolConfigDict] +class VoiceConsentSignature(_common.BaseModel): + """The signature of the voice consent check.""" + + signature: Optional[str] = Field( + default=None, + description="""The signature string. + """, + ) + + +class VoiceConsentSignatureDict(TypedDict, total=False): + """The signature of the voice consent check.""" + + signature: Optional[str] + """The signature string. + """ + + +VoiceConsentSignatureOrDict = Union[ + VoiceConsentSignature, VoiceConsentSignatureDict +] + + class ReplicatedVoiceConfig(_common.BaseModel): - """ReplicatedVoiceConfig is used to configure replicated voice.""" + """The configuration for the replicated voice to use.""" mime_type: Optional[str] = Field( default=None, - description="""The mime type of the replicated voice. + description="""The mimetype of the voice sample. The only currently supported + value is `audio/wav`. This represents 16-bit signed little-endian wav + data, with a 24kHz sampling rate. """, ) voice_sample_audio: Optional[bytes] = Field( default=None, - description="""The sample audio of the replicated voice. + description="""The sample of the custom voice. """, ) + consent_audio: Optional[bytes] = Field( + default=None, + description="""Recorded consent verifying ownership of the voice. This + represents 16-bit signed little-endian wav data, with a 24kHz sampling + rate.""", + ) + voice_consent_signature: Optional[VoiceConsentSignature] = Field( + default=None, + description="""Signature of a previously verified consent audio. This should be + populated with a signature generated by the server for a previous + request containing the consent_audio field. When provided, the + signature is verified instead of the consent_audio field to reduce + latency. Requests will fail if the signature is invalid or expired.""", + ) class ReplicatedVoiceConfigDict(TypedDict, total=False): - """ReplicatedVoiceConfig is used to configure replicated voice.""" + """The configuration for the replicated voice to use.""" mime_type: Optional[str] - """The mime type of the replicated voice. + """The mimetype of the voice sample. The only currently supported + value is `audio/wav`. This represents 16-bit signed little-endian wav + data, with a 24kHz sampling rate. """ voice_sample_audio: Optional[bytes] - """The sample audio of the replicated voice. + """The sample of the custom voice. """ + consent_audio: Optional[bytes] + """Recorded consent verifying ownership of the voice. This + represents 16-bit signed little-endian wav data, with a 24kHz sampling + rate.""" + + voice_consent_signature: Optional[VoiceConsentSignatureDict] + """Signature of a previously verified consent audio. This should be + populated with a signature generated by the server for a previous + request containing the consent_audio field. When provided, the + signature is verified instead of the consent_audio field to reduce + latency. Requests will fail if the signature is invalid or expired.""" + ReplicatedVoiceConfigOrDict = Union[ ReplicatedVoiceConfig, ReplicatedVoiceConfigDict @@ -5007,10 +5060,13 @@ class PrebuiltVoiceConfigDict(TypedDict, total=False): class VoiceConfig(_common.BaseModel): + """The configuration for the voice to use.""" replicated_voice_config: Optional[ReplicatedVoiceConfig] = Field( default=None, - description="""If true, the model will use a replicated voice for the response.""", + description="""The configuration for a replicated voice, which is a clone of a + user's voice that can be used for speech synthesis. If this is unset, a + default voice is used.""", ) prebuilt_voice_config: Optional[PrebuiltVoiceConfig] = Field( default=None, description="""The configuration for a prebuilt voice.""" @@ -5018,9 +5074,12 @@ class VoiceConfig(_common.BaseModel): class VoiceConfigDict(TypedDict, total=False): + """The configuration for the voice to use.""" replicated_voice_config: Optional[ReplicatedVoiceConfigDict] - """If true, the model will use a replicated voice for the response.""" + """The configuration for a replicated voice, which is a clone of a + user's voice that can be used for speech synthesis. If this is unset, a + default voice is used.""" prebuilt_voice_config: Optional[PrebuiltVoiceConfigDict] """The configuration for a prebuilt voice.""" @@ -5077,10 +5136,11 @@ class MultiSpeakerVoiceConfigDict(TypedDict, total=False): class SpeechConfig(_common.BaseModel): + """Config for speech generation and transcription.""" voice_config: Optional[VoiceConfig] = Field( default=None, - description="""Configuration for the voice of the response.""", + description="""The configuration in case of single-voice output.""", ) language_code: Optional[str] = Field( default=None, @@ -5093,9 +5153,10 @@ class SpeechConfig(_common.BaseModel): class SpeechConfigDict(TypedDict, total=False): + """Config for speech generation and transcription.""" voice_config: Optional[VoiceConfigDict] - """Configuration for the voice of the response.""" + """The configuration in case of single-voice output.""" language_code: Optional[str] """Optional. The language code (ISO 639-1) for the speech synthesis.""" @@ -17789,6 +17850,13 @@ class LiveServerSetupComplete(_common.BaseModel): session_id: Optional[str] = Field( default=None, description="""The session id of the live session.""" ) + voice_consent_signature: Optional[VoiceConsentSignature] = Field( + default=None, + description="""Signature of the verified consent audio. This is populated when the + request has a ReplicatedVoiceConfig with consent_audio set, if the consent + verification was successful. This may be used in a subsequent request + instead of the consent_audio to verify the same consent.""", + ) class LiveServerSetupCompleteDict(TypedDict, total=False): @@ -17797,6 +17865,12 @@ class LiveServerSetupCompleteDict(TypedDict, total=False): session_id: Optional[str] """The session id of the live session.""" + voice_consent_signature: Optional[VoiceConsentSignatureDict] + """Signature of the verified consent audio. This is populated when the + request has a ReplicatedVoiceConfig with consent_audio set, if the consent + verification was successful. This may be used in a subsequent request + instead of the consent_audio to verify the same consent.""" + LiveServerSetupCompleteOrDict = Union[ LiveServerSetupComplete, LiveServerSetupCompleteDict