mirror of
https://github.com/openai/codex.git
synced 2026-04-27 16:15:09 +00:00
Add realtime voice selection (#17176)
- Add realtime voice selection for realtime/start. - Expose the supported v1/v2 voice lists and cover explicit, configured, default, and invalid voice paths.
This commit is contained in:
@@ -144,6 +144,8 @@ pub struct ConversationStartParams {
|
||||
pub session_id: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub transport: Option<ConversationStartTransport>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub voice: Option<RealtimeVoice>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
|
||||
@@ -176,6 +178,101 @@ mod conversation_start_prompt_serde {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(
|
||||
Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Hash, JsonSchema, TS, Ord, PartialOrd,
|
||||
)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
#[ts(rename_all = "snake_case")]
|
||||
pub enum RealtimeVoice {
|
||||
Alloy,
|
||||
Arbor,
|
||||
Ash,
|
||||
Ballad,
|
||||
Breeze,
|
||||
Cedar,
|
||||
Coral,
|
||||
Cove,
|
||||
Echo,
|
||||
Ember,
|
||||
Juniper,
|
||||
Maple,
|
||||
Marin,
|
||||
Sage,
|
||||
Shimmer,
|
||||
Sol,
|
||||
Spruce,
|
||||
Vale,
|
||||
Verse,
|
||||
}
|
||||
|
||||
impl RealtimeVoice {
|
||||
pub fn wire_name(self) -> &'static str {
|
||||
match self {
|
||||
Self::Alloy => "alloy",
|
||||
Self::Arbor => "arbor",
|
||||
Self::Ash => "ash",
|
||||
Self::Ballad => "ballad",
|
||||
Self::Breeze => "breeze",
|
||||
Self::Cedar => "cedar",
|
||||
Self::Coral => "coral",
|
||||
Self::Cove => "cove",
|
||||
Self::Echo => "echo",
|
||||
Self::Ember => "ember",
|
||||
Self::Juniper => "juniper",
|
||||
Self::Maple => "maple",
|
||||
Self::Marin => "marin",
|
||||
Self::Sage => "sage",
|
||||
Self::Shimmer => "shimmer",
|
||||
Self::Sol => "sol",
|
||||
Self::Spruce => "spruce",
|
||||
Self::Vale => "vale",
|
||||
Self::Verse => "verse",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(rename_all = "camelCase")]
|
||||
pub struct RealtimeVoicesList {
|
||||
pub v1: Vec<RealtimeVoice>,
|
||||
pub v2: Vec<RealtimeVoice>,
|
||||
pub default_v1: RealtimeVoice,
|
||||
pub default_v2: RealtimeVoice,
|
||||
}
|
||||
|
||||
impl RealtimeVoicesList {
|
||||
pub fn builtin() -> Self {
|
||||
Self {
|
||||
v1: vec![
|
||||
RealtimeVoice::Juniper,
|
||||
RealtimeVoice::Maple,
|
||||
RealtimeVoice::Spruce,
|
||||
RealtimeVoice::Ember,
|
||||
RealtimeVoice::Vale,
|
||||
RealtimeVoice::Breeze,
|
||||
RealtimeVoice::Arbor,
|
||||
RealtimeVoice::Sol,
|
||||
RealtimeVoice::Cove,
|
||||
],
|
||||
v2: vec![
|
||||
RealtimeVoice::Alloy,
|
||||
RealtimeVoice::Ash,
|
||||
RealtimeVoice::Ballad,
|
||||
RealtimeVoice::Coral,
|
||||
RealtimeVoice::Echo,
|
||||
RealtimeVoice::Sage,
|
||||
RealtimeVoice::Shimmer,
|
||||
RealtimeVoice::Verse,
|
||||
RealtimeVoice::Marin,
|
||||
RealtimeVoice::Cedar,
|
||||
],
|
||||
default_v1: RealtimeVoice::Cove,
|
||||
default_v2: RealtimeVoice::Marin,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS)]
|
||||
pub struct RealtimeAudioFrame {
|
||||
pub data: String,
|
||||
@@ -271,6 +368,9 @@ pub enum Op {
|
||||
/// Close the running realtime conversation stream.
|
||||
RealtimeConversationClose,
|
||||
|
||||
/// Request the list of voices supported by realtime conversation streams.
|
||||
RealtimeConversationListVoices,
|
||||
|
||||
/// Legacy user input.
|
||||
///
|
||||
/// Prefer [`Op::UserTurn`] so the caller provides full turn context
|
||||
@@ -617,6 +717,7 @@ impl Op {
|
||||
Self::RealtimeConversationAudio(_) => "realtime_conversation_audio",
|
||||
Self::RealtimeConversationText(_) => "realtime_conversation_text",
|
||||
Self::RealtimeConversationClose => "realtime_conversation_close",
|
||||
Self::RealtimeConversationListVoices => "realtime_conversation_list_voices",
|
||||
Self::UserInput { .. } => "user_input",
|
||||
Self::UserTurn { .. } => "user_turn",
|
||||
Self::InterAgentCommunication { .. } => "inter_agent_communication",
|
||||
@@ -1398,6 +1499,9 @@ pub enum EventMsg {
|
||||
/// List of skills available to the agent.
|
||||
ListSkillsResponse(ListSkillsResponseEvent),
|
||||
|
||||
/// List of voices supported by realtime conversation streams.
|
||||
RealtimeConversationListVoicesResponse(RealtimeConversationListVoicesResponseEvent),
|
||||
|
||||
/// Notification that skill data may have been updated and clients may want to reload.
|
||||
SkillsUpdateAvailable,
|
||||
|
||||
@@ -3147,6 +3251,11 @@ pub struct ListSkillsResponseEvent {
|
||||
pub skills: Vec<SkillsListEntry>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS)]
|
||||
pub struct RealtimeConversationListVoicesResponseEvent {
|
||||
pub voices: RealtimeVoicesList,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema, TS)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
#[ts(rename_all = "lowercase")]
|
||||
@@ -4441,6 +4550,7 @@ mod tests {
|
||||
prompt: Some(Some("be helpful".to_string())),
|
||||
session_id: Some("conv_1".to_string()),
|
||||
transport: None,
|
||||
voice: None,
|
||||
});
|
||||
let webrtc_start = Op::RealtimeConversationStart(ConversationStartParams {
|
||||
prompt: Some(Some("be helpful".to_string())),
|
||||
@@ -4448,6 +4558,7 @@ mod tests {
|
||||
transport: Some(ConversationStartTransport::Webrtc {
|
||||
sdp: "v=offer\r\n".to_string(),
|
||||
}),
|
||||
voice: Some(RealtimeVoice::Cove),
|
||||
});
|
||||
let text = Op::RealtimeConversationText(ConversationTextParams {
|
||||
text: "hello".to_string(),
|
||||
@@ -4457,12 +4568,15 @@ mod tests {
|
||||
prompt: None,
|
||||
session_id: None,
|
||||
transport: None,
|
||||
voice: None,
|
||||
});
|
||||
let null_prompt_start = Op::RealtimeConversationStart(ConversationStartParams {
|
||||
prompt: Some(None),
|
||||
session_id: None,
|
||||
transport: None,
|
||||
voice: None,
|
||||
});
|
||||
let list_voices = Op::RealtimeConversationListVoices;
|
||||
|
||||
assert_eq!(
|
||||
serde_json::to_value(&start).unwrap(),
|
||||
@@ -4526,6 +4640,16 @@ mod tests {
|
||||
serde_json::from_value::<Op>(serde_json::to_value(&close).unwrap()).unwrap(),
|
||||
close
|
||||
);
|
||||
assert_eq!(
|
||||
serde_json::to_value(&list_voices).unwrap(),
|
||||
json!({
|
||||
"type": "realtime_conversation_list_voices"
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
serde_json::from_value::<Op>(serde_json::to_value(&list_voices).unwrap()).unwrap(),
|
||||
list_voices
|
||||
);
|
||||
assert_eq!(
|
||||
serde_json::to_value(&webrtc_start).unwrap(),
|
||||
json!({
|
||||
@@ -4535,11 +4659,46 @@ mod tests {
|
||||
"transport": {
|
||||
"type": "webrtc",
|
||||
"sdp": "v=offer\r\n"
|
||||
}
|
||||
},
|
||||
"voice": "cove"
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn realtime_voice_list_is_stable() {
|
||||
assert_eq!(
|
||||
RealtimeVoicesList::builtin(),
|
||||
RealtimeVoicesList {
|
||||
v1: vec![
|
||||
RealtimeVoice::Juniper,
|
||||
RealtimeVoice::Maple,
|
||||
RealtimeVoice::Spruce,
|
||||
RealtimeVoice::Ember,
|
||||
RealtimeVoice::Vale,
|
||||
RealtimeVoice::Breeze,
|
||||
RealtimeVoice::Arbor,
|
||||
RealtimeVoice::Sol,
|
||||
RealtimeVoice::Cove,
|
||||
],
|
||||
v2: vec![
|
||||
RealtimeVoice::Alloy,
|
||||
RealtimeVoice::Ash,
|
||||
RealtimeVoice::Ballad,
|
||||
RealtimeVoice::Coral,
|
||||
RealtimeVoice::Echo,
|
||||
RealtimeVoice::Sage,
|
||||
RealtimeVoice::Shimmer,
|
||||
RealtimeVoice::Verse,
|
||||
RealtimeVoice::Marin,
|
||||
RealtimeVoice::Cedar,
|
||||
],
|
||||
default_v1: RealtimeVoice::Cove,
|
||||
default_v2: RealtimeVoice::Marin,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn user_input_serialization_omits_final_output_json_schema_when_none() -> Result<()> {
|
||||
let op = Op::UserInput {
|
||||
|
||||
Reference in New Issue
Block a user