Merge branch 'main' into codex/prefix-compaction-prewarm

This commit is contained in:
rka-oai
2026-04-10 10:19:48 -07:00
committed by GitHub
163 changed files with 8390 additions and 1220 deletions

View File

@@ -24,6 +24,7 @@ pub const WS_REQUEST_HEADER_TRACESTATE_CLIENT_METADATA_KEY: &str = "ws_request_h
pub struct CompactionInput<'a> {
pub model: &'a str,
pub input: &'a [ResponseItem],
#[serde(skip_serializing_if = "str::is_empty")]
pub instructions: &'a str,
pub tools: Vec<Value>,
pub parallel_tool_calls: bool,
@@ -155,6 +156,7 @@ impl From<VerbosityConfig> for OpenAiVerbosity {
#[derive(Debug, Serialize, Clone, PartialEq)]
pub struct ResponsesApiRequest {
pub model: String,
#[serde(skip_serializing_if = "String::is_empty")]
pub instructions: String,
pub input: Vec<ResponseItem>,
pub tools: Vec<serde_json::Value>,
@@ -200,6 +202,7 @@ impl From<&ResponsesApiRequest> for ResponseCreateWsRequest {
#[derive(Debug, Serialize)]
pub struct ResponseCreateWsRequest {
pub model: String,
#[serde(skip_serializing_if = "String::is_empty")]
pub instructions: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub previous_response_id: Option<String>,

View File

@@ -419,7 +419,9 @@ impl RealtimeWebsocketEvents {
}
RealtimeEvent::SessionUpdated { .. }
| RealtimeEvent::AudioOut(_)
| RealtimeEvent::ResponseCreated(_)
| RealtimeEvent::ResponseCancelled(_)
| RealtimeEvent::ResponseDone(_)
| RealtimeEvent::ConversationItemAdded(_)
| RealtimeEvent::ConversationItemDone { .. }
| RealtimeEvent::Error(_) => {}
@@ -724,6 +726,8 @@ mod tests {
use codex_protocol::protocol::RealtimeHandoffRequested;
use codex_protocol::protocol::RealtimeInputAudioSpeechStarted;
use codex_protocol::protocol::RealtimeResponseCancelled;
use codex_protocol::protocol::RealtimeResponseCreated;
use codex_protocol::protocol::RealtimeResponseDone;
use codex_protocol::protocol::RealtimeVoice;
use http::HeaderValue;
use pretty_assertions::assert_eq;
@@ -868,7 +872,7 @@ mod tests {
"item": {
"id": "item_123",
"type": "function_call",
"name": "codex",
"name": "background_agent",
"call_id": "call_123",
"arguments": "{\"prompt\":\"delegate this\"}"
}
@@ -982,14 +986,14 @@ mod tests {
}
#[test]
fn parse_realtime_v2_response_done_handoff_event() {
fn parse_realtime_v2_response_done_event() {
let payload = json!({
"type": "response.done",
"response": {
"output": [{
"id": "item_123",
"type": "function_call",
"name": "codex",
"name": "background_agent",
"call_id": "call_123",
"arguments": "{\"prompt\":\"delegate from done\"}"
}]
@@ -999,11 +1003,8 @@ mod tests {
assert_eq!(
parse_realtime_event(payload.as_str(), RealtimeEventParser::RealtimeV2),
Some(RealtimeEvent::HandoffRequested(RealtimeHandoffRequested {
handoff_id: "call_123".to_string(),
item_id: "item_123".to_string(),
input_transcript: "delegate from done".to_string(),
active_transcript: Vec::new(),
Some(RealtimeEvent::ResponseDone(RealtimeResponseDone {
response_id: None
}))
);
}
@@ -1018,10 +1019,9 @@ mod tests {
assert_eq!(
parse_realtime_event(payload.as_str(), RealtimeEventParser::RealtimeV2),
Some(RealtimeEvent::ConversationItemAdded(json!({
"type": "response.created",
"response": {"id": "resp_created_1"}
})))
Some(RealtimeEvent::ResponseCreated(RealtimeResponseCreated {
response_id: Some("resp_created_1".to_string())
}))
);
}
@@ -1288,7 +1288,7 @@ mod tests {
assert_eq!(fourth_json["handoff_id"], "handoff_1");
assert_eq!(
fourth_json["output_text"],
"\"Agent Final Message\":\n\nhello from codex"
"\"Agent Final Message\":\n\nhello from background agent"
);
ws.send(Message::Text(
@@ -1412,7 +1412,7 @@ mod tests {
connection
.send_conversation_handoff_append(
"handoff_1".to_string(),
"hello from codex".to_string(),
"hello from background agent".to_string(),
)
.await
.expect("send handoff");
@@ -1498,7 +1498,7 @@ mod tests {
}
#[tokio::test]
async fn realtime_v2_session_update_includes_codex_tool_and_handoff_output_item() {
async fn realtime_v2_session_update_includes_background_agent_tool_and_handoff_output_item() {
let listener = TcpListener::bind("127.0.0.1:0").await.expect("bind");
let addr = listener.local_addr().expect("local addr");
@@ -1558,7 +1558,7 @@ mod tests {
);
assert_eq!(
first_json["session"]["tools"][0]["name"],
Value::String("codex".to_string())
Value::String("background_agent".to_string())
);
assert_eq!(
first_json["session"]["tools"][0]["parameters"]["required"],

View File

@@ -27,8 +27,8 @@ use serde_json::json;
const REALTIME_V2_OUTPUT_MODALITY_AUDIO: &str = "audio";
const REALTIME_V2_TOOL_CHOICE: &str = "auto";
const REALTIME_V2_CODEX_TOOL_NAME: &str = "codex";
const REALTIME_V2_CODEX_TOOL_DESCRIPTION: &str = "Delegate a request to Codex and return the final result to the user. Use this as the default action. If the user asks to do something next, later, after this, or once current work finishes, call this tool so the work is actually queued instead of merely promising to do it later.";
const REALTIME_V2_BACKGROUND_AGENT_TOOL_NAME: &str = "background_agent";
const REALTIME_V2_BACKGROUND_AGENT_TOOL_DESCRIPTION: &str = "Send a user request to the background agent. Use this as the default action. If the background agent is idle, this starts a new task and returns the final result to the user. If the background agent is already working on a task, this sends the request as guidance to steer that previous task. If the user asks to do something next, later, after this, or once current work finishes, call this tool so the work is actually queued instead of merely promising to do it later.";
pub(super) fn conversation_item_create_message(text: String) -> RealtimeOutboundMessage {
RealtimeOutboundMessage::ConversationItemCreate {
@@ -93,14 +93,14 @@ pub(super) fn session_update_session(
},
tools: Some(vec![SessionFunctionTool {
r#type: SessionToolType::Function,
name: REALTIME_V2_CODEX_TOOL_NAME.to_string(),
description: REALTIME_V2_CODEX_TOOL_DESCRIPTION.to_string(),
name: REALTIME_V2_BACKGROUND_AGENT_TOOL_NAME.to_string(),
description: REALTIME_V2_BACKGROUND_AGENT_TOOL_DESCRIPTION.to_string(),
parameters: json!({
"type": "object",
"properties": {
"prompt": {
"type": "string",
"description": "The user request to delegate to Codex."
"description": "The user request to delegate to the background agent."
}
},
"required": ["prompt"],

View File

@@ -7,11 +7,13 @@ use codex_protocol::protocol::RealtimeEvent;
use codex_protocol::protocol::RealtimeHandoffRequested;
use codex_protocol::protocol::RealtimeInputAudioSpeechStarted;
use codex_protocol::protocol::RealtimeResponseCancelled;
use codex_protocol::protocol::RealtimeResponseCreated;
use codex_protocol::protocol::RealtimeResponseDone;
use serde_json::Map as JsonMap;
use serde_json::Value;
use tracing::debug;
const CODEX_TOOL_NAME: &str = "codex";
const BACKGROUND_AGENT_TOOL_NAME: &str = "background_agent";
const DEFAULT_AUDIO_SAMPLE_RATE: u32 = 24_000;
const DEFAULT_AUDIO_CHANNELS: u16 = 1;
const TOOL_ARGUMENT_KEYS: [&str; 5] = ["input_transcript", "input", "text", "prompt", "query"];
@@ -47,24 +49,17 @@ pub(super) fn parse_realtime_event_v2(payload: &str) -> Option<RealtimeEvent> {
.cloned()
.map(RealtimeEvent::ConversationItemAdded),
"conversation.item.done" => parse_conversation_item_done_event(&parsed),
"response.created" => Some(RealtimeEvent::ConversationItemAdded(parsed)),
"response.done" => parse_response_done_event(parsed),
"response.created" => Some(RealtimeEvent::ResponseCreated(RealtimeResponseCreated {
response_id: parse_response_event_response_id(&parsed),
})),
"response.cancelled" => Some(RealtimeEvent::ResponseCancelled(
RealtimeResponseCancelled {
response_id: parsed
.get("response")
.and_then(Value::as_object)
.and_then(|response| response.get("id"))
.and_then(Value::as_str)
.map(str::to_string)
.or_else(|| {
parsed
.get("response_id")
.and_then(Value::as_str)
.map(str::to_string)
}),
response_id: parse_response_event_response_id(&parsed),
},
)),
"response.done" => Some(RealtimeEvent::ResponseDone(RealtimeResponseDone {
response_id: parse_response_event_response_id(&parsed),
})),
"error" => parse_error_event(&parsed),
_ => {
debug!("received unsupported realtime v2 event type: {message_type}, data: {payload}");
@@ -73,6 +68,21 @@ pub(super) fn parse_realtime_event_v2(payload: &str) -> Option<RealtimeEvent> {
}
}
fn parse_response_event_response_id(parsed: &Value) -> Option<String> {
parsed
.get("response")
.and_then(Value::as_object)
.and_then(|response| response.get("id"))
.and_then(Value::as_str)
.map(str::to_string)
.or_else(|| {
parsed
.get("response_id")
.and_then(Value::as_str)
.map(str::to_string)
})
}
fn parse_output_audio_delta_event(parsed: &Value) -> Option<RealtimeEvent> {
let data = parsed
.get("delta")
@@ -116,34 +126,10 @@ fn parse_conversation_item_done_event(parsed: &Value) -> Option<RealtimeEvent> {
.map(|item_id| RealtimeEvent::ConversationItemDone { item_id })
}
fn parse_response_done_event(parsed: Value) -> Option<RealtimeEvent> {
if let Some(handoff) = parse_response_done_handoff_requested_event(&parsed) {
return Some(handoff);
}
Some(RealtimeEvent::ConversationItemAdded(parsed))
}
fn parse_response_done_handoff_requested_event(parsed: &Value) -> Option<RealtimeEvent> {
let item = parsed
.get("response")
.and_then(Value::as_object)
.and_then(|response| response.get("output"))
.and_then(Value::as_array)?
.iter()
.find(|item| {
item.get("type").and_then(Value::as_str) == Some("function_call")
&& item.get("name").and_then(Value::as_str) == Some(CODEX_TOOL_NAME)
})?
.as_object()?;
parse_handoff_requested_event(item)
}
fn parse_handoff_requested_event(item: &JsonMap<String, Value>) -> Option<RealtimeEvent> {
let item_type = item.get("type").and_then(Value::as_str);
let item_name = item.get("name").and_then(Value::as_str);
if item_type != Some("function_call") || item_name != Some(CODEX_TOOL_NAME) {
if item_type != Some("function_call") || item_name != Some(BACKGROUND_AGENT_TOOL_NAME) {
return None;
}

View File

@@ -92,6 +92,7 @@ pub fn parse_rate_limit_for_limit(
primary,
secondary,
credits,
spend_control: None,
plan_type: None,
})
}
@@ -155,6 +156,7 @@ pub fn parse_rate_limit_event(payload: &str) -> Option<RateLimitSnapshot> {
primary,
secondary,
credits,
spend_control: None,
plan_type: event.plan_type,
})
}