mirror of
https://github.com/openai/codex.git
synced 2026-04-30 09:26:44 +00:00
Add realtime silence tool (#18635)
## Summary Adds a second realtime v2 function tool, `remain_silent`, so the realtime model has an explicit non-speaking action when the collaboration mode or latest context says it should not answer aloud. This is stacked on #18597. ## Design - Advertise `remain_silent` alongside `background_agent` in realtime v2 conversational sessions. - Parse `remain_silent` function calls into a typed `RealtimeEvent::NoopRequested` event. - Have core answer that function call with an empty `function_call_output` and deliberately avoid `response.create`, so no follow-up realtime response is requested. - Keep the event hidden from app-server/TUI surfaces; it is operational plumbing, not user-visible conversation content.
This commit is contained in:
@@ -1155,7 +1155,7 @@ async fn realtime_webrtc_start_emits_sdp_notification() -> Result<()> {
|
||||
Some("multipart/form-data; boundary=codex-realtime-call-boundary")
|
||||
);
|
||||
let body = String::from_utf8(request.body).context("multipart body should be utf-8")?;
|
||||
let session = r#"{"tool_choice":"auto","type":"realtime","model":"gpt-realtime-1.5","instructions":"backend prompt\n\nstartup context","output_modalities":["audio"],"audio":{"input":{"format":{"type":"audio/pcm","rate":24000},"noise_reduction":{"type":"near_field"},"transcription":{"model":"gpt-4o-mini-transcribe"},"turn_detection":{"type":"server_vad","interrupt_response":true,"create_response":true,"silence_duration_ms":500}},"output":{"format":{"type":"audio/pcm","rate":24000},"voice":"marin"}},"tools":[{"type":"function","name":"background_agent","description":"Send a user request to the background agent. Use this as the default action. Do not rephrase the user's ask or rewrite it in your own words; pass along the user's own words. If the background agent is idle, this starts a new task and returns the final result to the user. If the background agent is already working on a task, this sends the request as guidance to steer that previous task. If the user asks to do something next, later, after this, or once current work finishes, call this tool so the work is actually queued instead of merely promising to do it later.","parameters":{"type":"object","properties":{"prompt":{"type":"string","description":"The user request to delegate to the background agent."}},"required":["prompt"],"additionalProperties":false}}]}"#;
|
||||
let session = r#"{"tool_choice":"auto","type":"realtime","model":"gpt-realtime-1.5","instructions":"backend prompt\n\nstartup context","output_modalities":["audio"],"audio":{"input":{"format":{"type":"audio/pcm","rate":24000},"noise_reduction":{"type":"near_field"},"transcription":{"model":"gpt-4o-mini-transcribe"},"turn_detection":{"type":"server_vad","interrupt_response":true,"create_response":true,"silence_duration_ms":500}},"output":{"format":{"type":"audio/pcm","rate":24000},"voice":"marin"}},"tools":[{"type":"function","name":"background_agent","description":"Send a user request to the background agent. Use this as the default action. Do not rephrase the user's ask or rewrite it in your own words; pass along the user's own words. If the background agent is idle, this starts a new task and returns the final result to the user. If the background agent is already working on a task, this sends the request as guidance to steer that previous task. If the user asks to do something next, later, after this, or once current work finishes, call this tool so the work is actually queued instead of merely promising to do it later.","parameters":{"type":"object","properties":{"prompt":{"type":"string","description":"The user request to delegate to the background agent."}},"required":["prompt"],"additionalProperties":false}},{"type":"function","name":"remain_silent","description":"Call this when the best response is to say nothing. Use it instead of speaking after hidden system/control messages, after background agent updates in silent modes, or whenever acknowledging aloud would be distracting. This tool has no user-visible effect.","parameters":{"type":"object","properties":{},"additionalProperties":false}}]}"#;
|
||||
let session = normalized_json_string(session)?;
|
||||
assert_eq!(
|
||||
body,
|
||||
@@ -2260,6 +2260,10 @@ fn assert_v2_session_update(request: &Value) -> Result<()> {
|
||||
request["session"]["tools"][0]["name"].as_str(),
|
||||
Some("background_agent")
|
||||
);
|
||||
assert_eq!(
|
||||
request["session"]["tools"][1]["name"].as_str(),
|
||||
Some("remain_silent")
|
||||
);
|
||||
assert_eq!(
|
||||
request["session"]["audio"]["input"]["transcription"]["model"].as_str(),
|
||||
Some("gpt-4o-mini-transcribe")
|
||||
|
||||
Reference in New Issue
Block a user