Skip to content

Commit 69a02c4

Browse files
MarkDaoustcopybara-github
authored andcommitted
feat: [Python] Add consent_audio and voice_consent_signature and AsyncSession.setup_complete
PiperOrigin-RevId: 888366910
1 parent 674b837 commit 69a02c4

File tree

6 files changed

+587
-15
lines changed

6 files changed

+587
-15
lines changed

google/genai/_live_converters.py

Lines changed: 124 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,13 @@ def _GenerationConfig_to_vertex(
292292
setv(to_object, ['seed'], getv(from_object, ['seed']))
293293

294294
if getv(from_object, ['speech_config']) is not None:
295-
setv(to_object, ['speechConfig'], getv(from_object, ['speech_config']))
295+
setv(
296+
to_object,
297+
['speechConfig'],
298+
_SpeechConfig_to_vertex(
299+
getv(from_object, ['speech_config']), to_object
300+
),
301+
)
296302

297303
if getv(from_object, ['stop_sequences']) is not None:
298304
setv(to_object, ['stopSequences'], getv(from_object, ['stop_sequences']))
@@ -959,7 +965,10 @@ def _LiveConnectConfig_to_vertex(
959965
setv(
960966
parent_object,
961967
['setup', 'generationConfig', 'speechConfig'],
962-
t.t_live_speech_config(getv(from_object, ['speech_config'])),
968+
_SpeechConfig_to_vertex(
969+
t.t_live_speech_config(getv(from_object, ['speech_config'])),
970+
to_object,
971+
),
963972
)
964973

965974
if getv(from_object, ['thinking_config']) is not None:
@@ -1391,6 +1400,24 @@ def _LiveServerMessage_from_vertex(
13911400
return to_object
13921401

13931402

1403+
def _MultiSpeakerVoiceConfig_to_vertex(
1404+
from_object: Union[dict[str, Any], object],
1405+
parent_object: Optional[dict[str, Any]] = None,
1406+
) -> dict[str, Any]:
1407+
to_object: dict[str, Any] = {}
1408+
if getv(from_object, ['speaker_voice_configs']) is not None:
1409+
setv(
1410+
to_object,
1411+
['speakerVoiceConfigs'],
1412+
[
1413+
_SpeakerVoiceConfig_to_vertex(item, to_object)
1414+
for item in getv(from_object, ['speaker_voice_configs'])
1415+
],
1416+
)
1417+
1418+
return to_object
1419+
1420+
13941421
def _Part_to_mldev(
13951422
from_object: Union[dict[str, Any], object],
13961423
parent_object: Optional[dict[str, Any]] = None,
@@ -1531,6 +1558,32 @@ def _Part_to_vertex(
15311558
return to_object
15321559

15331560

1561+
def _ReplicatedVoiceConfig_to_vertex(
1562+
from_object: Union[dict[str, Any], object],
1563+
parent_object: Optional[dict[str, Any]] = None,
1564+
) -> dict[str, Any]:
1565+
to_object: dict[str, Any] = {}
1566+
if getv(from_object, ['mime_type']) is not None:
1567+
setv(to_object, ['mimeType'], getv(from_object, ['mime_type']))
1568+
1569+
if getv(from_object, ['voice_sample_audio']) is not None:
1570+
setv(
1571+
to_object,
1572+
['voiceSampleAudio'],
1573+
getv(from_object, ['voice_sample_audio']),
1574+
)
1575+
1576+
if getv(from_object, ['consent_audio']) is not None:
1577+
raise ValueError('consent_audio parameter is not supported in Vertex AI.')
1578+
1579+
if getv(from_object, ['voice_consent_signature']) is not None:
1580+
raise ValueError(
1581+
'voice_consent_signature parameter is not supported in Vertex AI.'
1582+
)
1583+
1584+
return to_object
1585+
1586+
15341587
def _SessionResumptionConfig_to_mldev(
15351588
from_object: Union[dict[str, Any], object],
15361589
parent_object: Optional[dict[str, Any]] = None,
@@ -1545,6 +1598,51 @@ def _SessionResumptionConfig_to_mldev(
15451598
return to_object
15461599

15471600

1601+
def _SpeakerVoiceConfig_to_vertex(
1602+
from_object: Union[dict[str, Any], object],
1603+
parent_object: Optional[dict[str, Any]] = None,
1604+
) -> dict[str, Any]:
1605+
to_object: dict[str, Any] = {}
1606+
if getv(from_object, ['speaker']) is not None:
1607+
setv(to_object, ['speaker'], getv(from_object, ['speaker']))
1608+
1609+
if getv(from_object, ['voice_config']) is not None:
1610+
setv(
1611+
to_object,
1612+
['voiceConfig'],
1613+
_VoiceConfig_to_vertex(getv(from_object, ['voice_config']), to_object),
1614+
)
1615+
1616+
return to_object
1617+
1618+
1619+
def _SpeechConfig_to_vertex(
1620+
from_object: Union[dict[str, Any], object],
1621+
parent_object: Optional[dict[str, Any]] = None,
1622+
) -> dict[str, Any]:
1623+
to_object: dict[str, Any] = {}
1624+
if getv(from_object, ['voice_config']) is not None:
1625+
setv(
1626+
to_object,
1627+
['voiceConfig'],
1628+
_VoiceConfig_to_vertex(getv(from_object, ['voice_config']), to_object),
1629+
)
1630+
1631+
if getv(from_object, ['language_code']) is not None:
1632+
setv(to_object, ['languageCode'], getv(from_object, ['language_code']))
1633+
1634+
if getv(from_object, ['multi_speaker_voice_config']) is not None:
1635+
setv(
1636+
to_object,
1637+
['multiSpeakerVoiceConfig'],
1638+
_MultiSpeakerVoiceConfig_to_vertex(
1639+
getv(from_object, ['multi_speaker_voice_config']), to_object
1640+
),
1641+
)
1642+
1643+
return to_object
1644+
1645+
15481646
def _Tool_to_mldev(
15491647
from_object: Union[dict[str, Any], object],
15501648
parent_object: Optional[dict[str, Any]] = None,
@@ -1775,3 +1873,27 @@ def _VoiceActivity_from_vertex(
17751873
setv(to_object, ['voice_activity_type'], getv(from_object, ['type']))
17761874

17771875
return to_object
1876+
1877+
1878+
def _VoiceConfig_to_vertex(
1879+
from_object: Union[dict[str, Any], object],
1880+
parent_object: Optional[dict[str, Any]] = None,
1881+
) -> dict[str, Any]:
1882+
to_object: dict[str, Any] = {}
1883+
if getv(from_object, ['replicated_voice_config']) is not None:
1884+
setv(
1885+
to_object,
1886+
['replicatedVoiceConfig'],
1887+
_ReplicatedVoiceConfig_to_vertex(
1888+
getv(from_object, ['replicated_voice_config']), to_object
1889+
),
1890+
)
1891+
1892+
if getv(from_object, ['prebuilt_voice_config']) is not None:
1893+
setv(
1894+
to_object,
1895+
['prebuiltVoiceConfig'],
1896+
getv(from_object, ['prebuilt_voice_config']),
1897+
)
1898+
1899+
return to_object

google/genai/live.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,12 @@ def __init__(
9292
api_client: BaseApiClient,
9393
websocket: ClientConnection,
9494
session_id: Optional[str] = None,
95+
setup_complete: Optional[types.LiveServerSetupComplete] = None,
9596
):
9697
self._api_client = api_client
9798
self._ws = websocket
9899
self.session_id = session_id
100+
self.setup_complete = setup_complete
99101

100102
async def send(
101103
self,
@@ -1131,12 +1133,15 @@ async def connect(
11311133
)
11321134
if setup_response.setup_complete:
11331135
session_id = setup_response.setup_complete.session_id
1136+
setup_complete = setup_response.setup_complete
11341137
else:
11351138
session_id = None
1139+
setup_complete = None
11361140
yield AsyncSession(
11371141
api_client=self._api_client,
11381142
websocket=ws,
11391143
session_id=session_id,
1144+
setup_complete=setup_complete,
11401145
)
11411146

11421147

google/genai/models.py

Lines changed: 138 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1468,7 +1468,11 @@ def _GenerateContentConfig_to_vertex(
14681468
setv(
14691469
to_object,
14701470
['speechConfig'],
1471-
t.t_speech_config(getv(from_object, ['speech_config'])),
1471+
_SpeechConfig_to_vertex(
1472+
t.t_speech_config(getv(from_object, ['speech_config'])),
1473+
to_object,
1474+
root_object,
1475+
),
14721476
)
14731477

14741478
if getv(from_object, ['audio_timestamp']) is not None:
@@ -2735,7 +2739,13 @@ def _GenerationConfig_to_vertex(
27352739
setv(to_object, ['seed'], getv(from_object, ['seed']))
27362740

27372741
if getv(from_object, ['speech_config']) is not None:
2738-
setv(to_object, ['speechConfig'], getv(from_object, ['speech_config']))
2742+
setv(
2743+
to_object,
2744+
['speechConfig'],
2745+
_SpeechConfig_to_vertex(
2746+
getv(from_object, ['speech_config']), to_object, root_object
2747+
),
2748+
)
27392749

27402750
if getv(from_object, ['stop_sequences']) is not None:
27412751
setv(to_object, ['stopSequences'], getv(from_object, ['stop_sequences']))
@@ -3301,6 +3311,25 @@ def _Model_from_vertex(
33013311
return to_object
33023312

33033313

3314+
def _MultiSpeakerVoiceConfig_to_vertex(
3315+
from_object: Union[dict[str, Any], object],
3316+
parent_object: Optional[dict[str, Any]] = None,
3317+
root_object: Optional[Union[dict[str, Any], object]] = None,
3318+
) -> dict[str, Any]:
3319+
to_object: dict[str, Any] = {}
3320+
if getv(from_object, ['speaker_voice_configs']) is not None:
3321+
setv(
3322+
to_object,
3323+
['speakerVoiceConfigs'],
3324+
[
3325+
_SpeakerVoiceConfig_to_vertex(item, to_object, root_object)
3326+
for item in getv(from_object, ['speaker_voice_configs'])
3327+
],
3328+
)
3329+
3330+
return to_object
3331+
3332+
33043333
def _Part_to_mldev(
33053334
from_object: Union[dict[str, Any], object],
33063335
parent_object: Optional[dict[str, Any]] = None,
@@ -3681,6 +3710,33 @@ def _ReferenceImageAPI_to_vertex(
36813710
return to_object
36823711

36833712

3713+
def _ReplicatedVoiceConfig_to_vertex(
3714+
from_object: Union[dict[str, Any], object],
3715+
parent_object: Optional[dict[str, Any]] = None,
3716+
root_object: Optional[Union[dict[str, Any], object]] = None,
3717+
) -> dict[str, Any]:
3718+
to_object: dict[str, Any] = {}
3719+
if getv(from_object, ['mime_type']) is not None:
3720+
setv(to_object, ['mimeType'], getv(from_object, ['mime_type']))
3721+
3722+
if getv(from_object, ['voice_sample_audio']) is not None:
3723+
setv(
3724+
to_object,
3725+
['voiceSampleAudio'],
3726+
getv(from_object, ['voice_sample_audio']),
3727+
)
3728+
3729+
if getv(from_object, ['consent_audio']) is not None:
3730+
raise ValueError('consent_audio parameter is not supported in Vertex AI.')
3731+
3732+
if getv(from_object, ['voice_consent_signature']) is not None:
3733+
raise ValueError(
3734+
'voice_consent_signature parameter is not supported in Vertex AI.'
3735+
)
3736+
3737+
return to_object
3738+
3739+
36843740
def _SafetyAttributes_from_mldev(
36853741
from_object: Union[dict[str, Any], object],
36863742
parent_object: Optional[dict[str, Any]] = None,
@@ -3883,6 +3939,59 @@ def _SegmentImageSource_to_vertex(
38833939
return to_object
38843940

38853941

3942+
def _SpeakerVoiceConfig_to_vertex(
3943+
from_object: Union[dict[str, Any], object],
3944+
parent_object: Optional[dict[str, Any]] = None,
3945+
root_object: Optional[Union[dict[str, Any], object]] = None,
3946+
) -> dict[str, Any]:
3947+
to_object: dict[str, Any] = {}
3948+
if getv(from_object, ['speaker']) is not None:
3949+
setv(to_object, ['speaker'], getv(from_object, ['speaker']))
3950+
3951+
if getv(from_object, ['voice_config']) is not None:
3952+
setv(
3953+
to_object,
3954+
['voiceConfig'],
3955+
_VoiceConfig_to_vertex(
3956+
getv(from_object, ['voice_config']), to_object, root_object
3957+
),
3958+
)
3959+
3960+
return to_object
3961+
3962+
3963+
def _SpeechConfig_to_vertex(
3964+
from_object: Union[dict[str, Any], object],
3965+
parent_object: Optional[dict[str, Any]] = None,
3966+
root_object: Optional[Union[dict[str, Any], object]] = None,
3967+
) -> dict[str, Any]:
3968+
to_object: dict[str, Any] = {}
3969+
if getv(from_object, ['voice_config']) is not None:
3970+
setv(
3971+
to_object,
3972+
['voiceConfig'],
3973+
_VoiceConfig_to_vertex(
3974+
getv(from_object, ['voice_config']), to_object, root_object
3975+
),
3976+
)
3977+
3978+
if getv(from_object, ['language_code']) is not None:
3979+
setv(to_object, ['languageCode'], getv(from_object, ['language_code']))
3980+
3981+
if getv(from_object, ['multi_speaker_voice_config']) is not None:
3982+
setv(
3983+
to_object,
3984+
['multiSpeakerVoiceConfig'],
3985+
_MultiSpeakerVoiceConfig_to_vertex(
3986+
getv(from_object, ['multi_speaker_voice_config']),
3987+
to_object,
3988+
root_object,
3989+
),
3990+
)
3991+
3992+
return to_object
3993+
3994+
38863995
def _ToolConfig_to_mldev(
38873996
from_object: Union[dict[str, Any], object],
38883997
parent_object: Optional[dict[str, Any]] = None,
@@ -4481,6 +4590,33 @@ def _Video_to_vertex(
44814590
return to_object
44824591

44834592

4593+
def _VoiceConfig_to_vertex(
4594+
from_object: Union[dict[str, Any], object],
4595+
parent_object: Optional[dict[str, Any]] = None,
4596+
root_object: Optional[Union[dict[str, Any], object]] = None,
4597+
) -> dict[str, Any]:
4598+
to_object: dict[str, Any] = {}
4599+
if getv(from_object, ['replicated_voice_config']) is not None:
4600+
setv(
4601+
to_object,
4602+
['replicatedVoiceConfig'],
4603+
_ReplicatedVoiceConfig_to_vertex(
4604+
getv(from_object, ['replicated_voice_config']),
4605+
to_object,
4606+
root_object,
4607+
),
4608+
)
4609+
4610+
if getv(from_object, ['prebuilt_voice_config']) is not None:
4611+
setv(
4612+
to_object,
4613+
['prebuiltVoiceConfig'],
4614+
getv(from_object, ['prebuilt_voice_config']),
4615+
)
4616+
4617+
return to_object
4618+
4619+
44844620
class Models(_api_module.BaseModule):
44854621

44864622
def _generate_content(

0 commit comments

Comments
 (0)