mirror of
https://github.com/openai/codex.git
synced 2026-04-26 23:55:25 +00:00
Wire realtime api to core (#12268)
- Introduce `RealtimeConversationManager` for realtime API management - Add `op::conversation` to start conversation, insert audio, insert text, and close conversation. - emit conversation lifecycle and realtime events. - Move shared realtime payload types into codex-protocol and add core e2e websocket tests for start/replace/transport-close paths. Things to consider: - Should we use the same `op::` and `Events` channel to carry audio? I think we should try this simple approach and later we can create separate one if the channels got congested. - Sending text updates to the client: we can start simple and later restrict that. - Provider auth isn't wired for now intentionally
This commit is contained in:
360
codex-rs/core/tests/suite/realtime_conversation.rs
Normal file
360
codex-rs/core/tests/suite/realtime_conversation.rs
Normal file
@@ -0,0 +1,360 @@
|
||||
use anyhow::Result;
|
||||
use codex_core::protocol::CodexErrorInfo;
|
||||
use codex_core::protocol::ConversationAudioParams;
|
||||
use codex_core::protocol::ConversationStartParams;
|
||||
use codex_core::protocol::ConversationTextParams;
|
||||
use codex_core::protocol::ErrorEvent;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::RealtimeAudioFrame;
|
||||
use codex_core::protocol::RealtimeConversationRealtimeEvent;
|
||||
use codex_core::protocol::RealtimeEvent;
|
||||
use core_test_support::responses::start_websocket_server;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event_match;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::json;
|
||||
use std::time::Duration;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn conversation_start_audio_text_close_round_trip() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_websocket_server(vec![
|
||||
vec![],
|
||||
vec![
|
||||
vec![json!({
|
||||
"type": "session.created",
|
||||
"session": { "id": "sess_1" }
|
||||
})],
|
||||
vec![],
|
||||
vec![
|
||||
json!({
|
||||
"type": "response.output_audio.delta",
|
||||
"delta": "AQID",
|
||||
"sample_rate": 24000,
|
||||
"num_channels": 1
|
||||
}),
|
||||
json!({
|
||||
"type": "conversation.item.added",
|
||||
"item": {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [{"type": "text", "text": "hi"}]
|
||||
}
|
||||
}),
|
||||
],
|
||||
],
|
||||
])
|
||||
.await;
|
||||
|
||||
let mut builder = test_codex();
|
||||
let test = builder.build_with_websocket_server(&server).await?;
|
||||
assert!(server.wait_for_handshakes(1, Duration::from_secs(2)).await);
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
prompt: "backend prompt".to_string(),
|
||||
session_id: None,
|
||||
}))
|
||||
.await?;
|
||||
|
||||
let started = wait_for_event_match(&test.codex, |msg| match msg {
|
||||
EventMsg::RealtimeConversationStarted(started) => Some(Ok(started.clone())),
|
||||
EventMsg::Error(err) => Some(Err(err.clone())),
|
||||
_ => None,
|
||||
})
|
||||
.await
|
||||
.unwrap_or_else(|err: ErrorEvent| panic!("conversation start failed: {err:?}"));
|
||||
assert!(started.session_id.is_some());
|
||||
|
||||
let session_created = wait_for_event_match(&test.codex, |msg| match msg {
|
||||
EventMsg::RealtimeConversationRealtime(RealtimeConversationRealtimeEvent {
|
||||
payload: RealtimeEvent::SessionCreated { session_id },
|
||||
}) => Some(session_id.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
assert_eq!(session_created, "sess_1");
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationAudio(ConversationAudioParams {
|
||||
frame: RealtimeAudioFrame {
|
||||
data: "AQID".to_string(),
|
||||
sample_rate: 24000,
|
||||
num_channels: 1,
|
||||
samples_per_channel: Some(480),
|
||||
},
|
||||
}))
|
||||
.await?;
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationText(ConversationTextParams {
|
||||
text: "hello".to_string(),
|
||||
}))
|
||||
.await?;
|
||||
|
||||
let audio_out = wait_for_event_match(&test.codex, |msg| match msg {
|
||||
EventMsg::RealtimeConversationRealtime(RealtimeConversationRealtimeEvent {
|
||||
payload: RealtimeEvent::AudioOut(frame),
|
||||
}) => Some(frame.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
assert_eq!(audio_out.data, "AQID");
|
||||
|
||||
let connections = server.connections();
|
||||
assert_eq!(connections.len(), 2);
|
||||
let connection = &connections[1];
|
||||
assert_eq!(connection.len(), 3);
|
||||
assert_eq!(
|
||||
connection[0].body_json()["type"].as_str(),
|
||||
Some("session.create")
|
||||
);
|
||||
assert_eq!(
|
||||
connection[0].body_json()["session"]["conversation_id"]
|
||||
.as_str()
|
||||
.expect("session.create conversation_id"),
|
||||
started
|
||||
.session_id
|
||||
.as_deref()
|
||||
.expect("started session id should be present")
|
||||
);
|
||||
let request_types = [
|
||||
connection[1].body_json()["type"]
|
||||
.as_str()
|
||||
.expect("request type")
|
||||
.to_string(),
|
||||
connection[2].body_json()["type"]
|
||||
.as_str()
|
||||
.expect("request type")
|
||||
.to_string(),
|
||||
];
|
||||
assert_eq!(
|
||||
request_types,
|
||||
[
|
||||
"conversation.item.create".to_string(),
|
||||
"response.input_audio.delta".to_string(),
|
||||
]
|
||||
);
|
||||
|
||||
test.codex.submit(Op::RealtimeConversationClose).await?;
|
||||
let closed = wait_for_event_match(&test.codex, |msg| match msg {
|
||||
EventMsg::RealtimeConversationClosed(closed) => Some(closed.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
assert!(matches!(
|
||||
closed.reason.as_deref(),
|
||||
Some("requested" | "transport_closed")
|
||||
));
|
||||
|
||||
server.shutdown().await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn conversation_transport_close_emits_closed_event() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let session_created = vec![json!({
|
||||
"type": "session.created",
|
||||
"session": { "id": "sess_1" }
|
||||
})];
|
||||
let server = start_websocket_server(vec![vec![], vec![session_created]]).await;
|
||||
|
||||
let mut builder = test_codex();
|
||||
let test = builder.build_with_websocket_server(&server).await?;
|
||||
assert!(server.wait_for_handshakes(1, Duration::from_secs(2)).await);
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
prompt: "backend prompt".to_string(),
|
||||
session_id: None,
|
||||
}))
|
||||
.await?;
|
||||
|
||||
let started = wait_for_event_match(&test.codex, |msg| match msg {
|
||||
EventMsg::RealtimeConversationStarted(started) => Some(Ok(started.clone())),
|
||||
EventMsg::Error(err) => Some(Err(err.clone())),
|
||||
_ => None,
|
||||
})
|
||||
.await
|
||||
.unwrap_or_else(|err: ErrorEvent| panic!("conversation start failed: {err:?}"));
|
||||
assert!(started.session_id.is_some());
|
||||
|
||||
let session_created = wait_for_event_match(&test.codex, |msg| match msg {
|
||||
EventMsg::RealtimeConversationRealtime(RealtimeConversationRealtimeEvent {
|
||||
payload: RealtimeEvent::SessionCreated { session_id },
|
||||
}) => Some(session_id.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
assert_eq!(session_created, "sess_1");
|
||||
|
||||
let closed = wait_for_event_match(&test.codex, |msg| match msg {
|
||||
EventMsg::RealtimeConversationClosed(closed) => Some(closed.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
assert_eq!(closed.reason.as_deref(), Some("transport_closed"));
|
||||
|
||||
server.shutdown().await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn conversation_audio_before_start_emits_error() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_websocket_server(vec![]).await;
|
||||
let mut builder = test_codex();
|
||||
let test = builder.build_with_websocket_server(&server).await?;
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationAudio(ConversationAudioParams {
|
||||
frame: RealtimeAudioFrame {
|
||||
data: "AQID".to_string(),
|
||||
sample_rate: 24000,
|
||||
num_channels: 1,
|
||||
samples_per_channel: Some(480),
|
||||
},
|
||||
}))
|
||||
.await?;
|
||||
|
||||
let err = wait_for_event_match(&test.codex, |msg| match msg {
|
||||
EventMsg::Error(err) => Some(err.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
assert_eq!(err.codex_error_info, Some(CodexErrorInfo::BadRequest));
|
||||
assert_eq!(err.message, "conversation is not running");
|
||||
|
||||
server.shutdown().await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn conversation_text_before_start_emits_error() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_websocket_server(vec![]).await;
|
||||
let mut builder = test_codex();
|
||||
let test = builder.build_with_websocket_server(&server).await?;
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationText(ConversationTextParams {
|
||||
text: "hello".to_string(),
|
||||
}))
|
||||
.await?;
|
||||
|
||||
let err = wait_for_event_match(&test.codex, |msg| match msg {
|
||||
EventMsg::Error(err) => Some(err.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
assert_eq!(err.codex_error_info, Some(CodexErrorInfo::BadRequest));
|
||||
assert_eq!(err.message, "conversation is not running");
|
||||
|
||||
server.shutdown().await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn conversation_second_start_replaces_runtime() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_websocket_server(vec![
|
||||
vec![],
|
||||
vec![vec![json!({
|
||||
"type": "session.created",
|
||||
"session": { "id": "sess_old" }
|
||||
})]],
|
||||
vec![
|
||||
vec![json!({
|
||||
"type": "session.created",
|
||||
"session": { "id": "sess_new" }
|
||||
})],
|
||||
vec![json!({
|
||||
"type": "response.output_audio.delta",
|
||||
"delta": "AQID",
|
||||
"sample_rate": 24000,
|
||||
"num_channels": 1
|
||||
})],
|
||||
],
|
||||
])
|
||||
.await;
|
||||
let mut builder = test_codex();
|
||||
let test = builder.build_with_websocket_server(&server).await?;
|
||||
assert!(server.wait_for_handshakes(1, Duration::from_secs(2)).await);
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
prompt: "old".to_string(),
|
||||
session_id: Some("conv_old".to_string()),
|
||||
}))
|
||||
.await?;
|
||||
wait_for_event_match(&test.codex, |msg| match msg {
|
||||
EventMsg::RealtimeConversationRealtime(RealtimeConversationRealtimeEvent {
|
||||
payload: RealtimeEvent::SessionCreated { session_id },
|
||||
}) if session_id == "sess_old" => Some(Ok(())),
|
||||
EventMsg::Error(err) => Some(Err(err.clone())),
|
||||
_ => None,
|
||||
})
|
||||
.await
|
||||
.unwrap_or_else(|err: ErrorEvent| panic!("first conversation start failed: {err:?}"));
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationStart(ConversationStartParams {
|
||||
prompt: "new".to_string(),
|
||||
session_id: Some("conv_new".to_string()),
|
||||
}))
|
||||
.await?;
|
||||
wait_for_event_match(&test.codex, |msg| match msg {
|
||||
EventMsg::RealtimeConversationRealtime(RealtimeConversationRealtimeEvent {
|
||||
payload: RealtimeEvent::SessionCreated { session_id },
|
||||
}) if session_id == "sess_new" => Some(Ok(())),
|
||||
EventMsg::Error(err) => Some(Err(err.clone())),
|
||||
_ => None,
|
||||
})
|
||||
.await
|
||||
.unwrap_or_else(|err: ErrorEvent| panic!("second conversation start failed: {err:?}"));
|
||||
|
||||
test.codex
|
||||
.submit(Op::RealtimeConversationAudio(ConversationAudioParams {
|
||||
frame: RealtimeAudioFrame {
|
||||
data: "AQID".to_string(),
|
||||
sample_rate: 24000,
|
||||
num_channels: 1,
|
||||
samples_per_channel: Some(480),
|
||||
},
|
||||
}))
|
||||
.await?;
|
||||
let _ = wait_for_event_match(&test.codex, |msg| match msg {
|
||||
EventMsg::RealtimeConversationRealtime(RealtimeConversationRealtimeEvent {
|
||||
payload: RealtimeEvent::AudioOut(frame),
|
||||
}) if frame.data == "AQID" => Some(()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
let connections = server.connections();
|
||||
assert_eq!(connections.len(), 3);
|
||||
assert_eq!(connections[1].len(), 1);
|
||||
assert_eq!(
|
||||
connections[1][0].body_json()["session"]["conversation_id"].as_str(),
|
||||
Some("conv_old")
|
||||
);
|
||||
assert_eq!(connections[2].len(), 2);
|
||||
assert_eq!(
|
||||
connections[2][0].body_json()["session"]["conversation_id"].as_str(),
|
||||
Some("conv_new")
|
||||
);
|
||||
assert_eq!(
|
||||
connections[2][1].body_json()["type"].as_str(),
|
||||
Some("response.input_audio.delta")
|
||||
);
|
||||
|
||||
server.shutdown().await;
|
||||
Ok(())
|
||||
}
|
||||
Reference in New Issue
Block a user