Skip to content

Commit aae87a9

Browse files
MarkDaoustsasha-gitg
authored andcommitted
feat: add multi-speaker voice config
PiperOrigin-RevId: 759431774
1 parent 08cf7d9 commit aae87a9

File tree

6 files changed

+210
-2
lines changed

6 files changed

+210
-2
lines changed

live_converters.go

Lines changed: 20 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

live_test.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,35 @@ func TestLiveConnect(t *testing.T) {
9191
},
9292
wantRequestBody: `{"setup":{"generationConfig":{"temperature":0.5},"model":"models/test-model","systemInstruction":{"parts":[{"text":"test instruction"}]},"tools":[{"googleSearch":{}}]}}`,
9393
},
94+
{
95+
desc: "Fail if multispeaker config.",
96+
client: mldevClient,
97+
config: &LiveConnectConfig{
98+
SpeechConfig: &SpeechConfig{
99+
MultiSpeakerVoiceConfig: &MultiSpeakerVoiceConfig{
100+
SpeakerVoiceConfigs: []*SpeakerVoiceConfig{
101+
{
102+
Speaker: "Alice",
103+
VoiceConfig: &VoiceConfig{
104+
PrebuiltVoiceConfig: &PrebuiltVoiceConfig{VoiceName: "kore"},
105+
},
106+
},
107+
{
108+
Speaker: "Bob",
109+
VoiceConfig: &VoiceConfig{
110+
PrebuiltVoiceConfig: &PrebuiltVoiceConfig{VoiceName: "puck"},
111+
},
112+
},
113+
},
114+
},
115+
},
116+
Temperature: Ptr[float32](0.5),
117+
SystemInstruction: &Content{Parts: []*Part{{Text: "test instruction"}}},
118+
Tools: []*Tool{{GoogleSearch: &GoogleSearch{}}},
119+
},
120+
wantErr: true,
121+
wantErrMessage: "multiSpeakerVoiceConfig is not supported",
122+
},
94123
{
95124
desc: "successful connection with http options mldev",
96125
client: mldevClient,

models.go

Lines changed: 73 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

models_test.go

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,61 @@ func TestModelsGenerateContentAudio(t *testing.T) {
149149
}
150150
}
151151

152+
func TestModelsGenerateContentMultiSpeakerVoiceConfigAudio(t *testing.T) {
153+
if *mode != apiMode {
154+
t.Skip("Skip. This test is only in the API mode")
155+
}
156+
ctx := context.Background()
157+
for _, backend := range backends {
158+
t.Run(backend.name, func(t *testing.T) {
159+
t.Parallel()
160+
if isDisabledTest(t) {
161+
t.Skip("Skip: disabled test")
162+
}
163+
client, err := NewClient(ctx, &ClientConfig{Backend: backend.Backend})
164+
if err != nil {
165+
t.Fatal(err)
166+
}
167+
config := &GenerateContentConfig{
168+
ResponseModalities: []string{"AUDIO"},
169+
SpeechConfig: &SpeechConfig{
170+
MultiSpeakerVoiceConfig: &MultiSpeakerVoiceConfig{
171+
SpeakerVoiceConfigs: []*SpeakerVoiceConfig{
172+
{
173+
Speaker: "Alice",
174+
VoiceConfig: &VoiceConfig{
175+
PrebuiltVoiceConfig: &PrebuiltVoiceConfig{
176+
VoiceName: "Aoede",
177+
},
178+
},
179+
},
180+
{
181+
Speaker: "Bob",
182+
VoiceConfig: &VoiceConfig{
183+
PrebuiltVoiceConfig: &PrebuiltVoiceConfig{
184+
VoiceName: "Kore",
185+
},
186+
},
187+
},
188+
},
189+
},
190+
LanguageCode: "en-US",
191+
},
192+
}
193+
result, err := client.Models.GenerateContent(ctx, "gemini-2.0-flash", Text("say something nice to me"), config)
194+
if err != nil {
195+
t.Errorf("GenerateContent failed unexpectedly: %v", err)
196+
}
197+
if result == nil {
198+
t.Fatalf("expected at least one response, got none")
199+
}
200+
if len(result.Candidates) == 0 {
201+
t.Errorf("expected at least one candidate, got none")
202+
}
203+
})
204+
}
205+
}
206+
152207
func TestModelsGenerateVideosText2VideoPoll(t *testing.T) {
153208
if *mode != apiMode {
154209
t.Skip("Skip. This test is only in the API mode")

transformer.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,19 @@ func tSpeechConfig(_ *apiClient, speechConfig any) (any, error) {
125125
return speechConfig, nil
126126
}
127127

128+
func tLiveSpeechConfig(_ *apiClient, speechConfig any) (any, error) {
129+
switch config := speechConfig.(type) {
130+
case map[string]any:
131+
if _, ok := config["multiSpeakerVoiceConfig"]; ok {
132+
return nil, fmt.Errorf("multiSpeakerVoiceConfig is not supported in the live API")
133+
}
134+
return config, nil
135+
case nil:
136+
return nil, nil
137+
default:
138+
return nil, fmt.Errorf("unsupported speechConfig type: %T", speechConfig)
139+
}
140+
}
128141
func tBytes(_ *apiClient, fromImageBytes any) (any, error) {
129142
// TODO(b/389133914): Remove dummy bytes converter.
130143
return fromImageBytes, nil

types.go

Lines changed: 20 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)