Add realtime silence tool (#18635)

## Summary

Adds a second realtime v2 function tool, `remain_silent`, so the
realtime model has an explicit non-speaking action when the
collaboration mode or latest context says it should not answer aloud.
This is stacked on #18597.

## Design

- Advertise `remain_silent` alongside `background_agent` in realtime v2
conversational sessions.
- Parse `remain_silent` function calls into a typed
`RealtimeEvent::NoopRequested` event.
- Have core answer that function call with an empty
`function_call_output` and deliberately avoid `response.create`, so no
follow-up realtime response is requested.
- Keep the event hidden from app-server/TUI surfaces; it is operational
plumbing, not user-visible conversation content.
This commit is contained in:
guinness-oai
2026-04-20 15:43:20 -07:00
committed by GitHub
parent a718b6fd47
commit 1029742cf7
10 changed files with 242 additions and 39 deletions

View File

@@ -22,6 +22,7 @@ use codex_protocol::protocol::RealtimeAudioFrame;
use codex_protocol::protocol::RealtimeConversationRealtimeEvent;
use codex_protocol::protocol::RealtimeConversationVersion;
use codex_protocol::protocol::RealtimeEvent;
use codex_protocol::protocol::RealtimeNoopRequested;
use codex_protocol::protocol::RealtimeOutputModality;
use codex_protocol::protocol::RealtimeVoice;
use codex_protocol::protocol::RolloutItem;
@@ -2121,6 +2122,92 @@ async fn conversation_user_text_turn_is_capped_when_mirrored_to_realtime() -> Re
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn realtime_v2_noop_tool_call_returns_empty_function_output_without_response() -> Result<()> {
skip_if_no_network!(Ok(()));
let api_server = start_mock_server().await;
let realtime_server = start_websocket_server(vec![vec![
vec![
json!({
"type": "session.updated",
"session": { "id": "sess_silent", "instructions": "backend prompt" }
}),
json!({
"type": "conversation.item.done",
"item": {
"id": "item_silent",
"type": "function_call",
"name": "remain_silent",
"call_id": "call_silent",
"arguments": "{}"
}
}),
],
vec![],
]])
.await;
let mut builder = test_codex().with_config({
let realtime_base_url = realtime_server.uri().to_string();
move |config| {
config.experimental_realtime_ws_base_url = Some(realtime_base_url);
config.realtime.version = RealtimeWsVersion::V2;
}
});
let test = builder.build(&api_server).await?;
test.codex
.submit(Op::RealtimeConversationStart(ConversationStartParams {
output_modality: RealtimeOutputModality::Audio,
prompt: Some(Some("backend prompt".to_string())),
session_id: None,
transport: None,
voice: None,
}))
.await?;
let _ = wait_for_event_match(&test.codex, |msg| match msg {
EventMsg::RealtimeConversationRealtime(RealtimeConversationRealtimeEvent {
payload: RealtimeEvent::NoopRequested(RealtimeNoopRequested { call_id, .. }),
}) if call_id == "call_silent" => Some(()),
_ => None,
})
.await;
let function_output = realtime_server
.wait_for_request(/*connection_index*/ 0, /*request_index*/ 1)
.await;
assert_eq!(
function_output.body_json(),
json!({
"type": "conversation.item.create",
"item": {
"type": "function_call_output",
"call_id": "call_silent",
"output": ""
}
})
);
let realtime_response_create = timeout(Duration::from_millis(200), async {
wait_for_matching_websocket_request(
&realtime_server,
"unexpected realtime response request for noop tool call",
|request| request.body_json()["type"].as_str() == Some("response.create"),
)
.await
})
.await;
assert!(
realtime_response_create.is_err(),
"noop tool calls should not request a realtime response"
);
realtime_server.shutdown().await;
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn conversation_mirrors_assistant_message_text_to_realtime_handoff() -> Result<()> {
skip_if_no_network!(Ok(()));