Files
codex/codex-rs/core/tests/suite/items.rs
neil-oai a92a5085bd Forward app-server turn clientMetadata to Responses (#16009)
## Summary
App-server v2 already receives turn-scoped `clientMetadata`, but the
Rust app-server was dropping it before the outbound Responses request.
This change keeps the fix lightweight by threading that metadata through
the existing turn-metadata path rather than inventing a new transport.

## What we're trying to do and why
We want turn-scoped metadata from the app-server protocol layer,
especially fields like Hermes/GAAS run IDs, to survive all the way to
the actual Responses API request so it is visible in downstream
websocket request logging and analytics.

The specific bug was:
- app-server protocol uses camelCase `clientMetadata`
- Responses transport already has an existing turn metadata carrier:
`x-codex-turn-metadata`
- websocket transport already rewrites that header into
`request.request_body.client_metadata["x-codex-turn-metadata"]`
- but the Rust app-server never parsed or stored `clientMetadata`, so
nothing from the app-server request was making it into that existing
path

This PR fixes that without adding a new header or a second metadata
channel.

## How we did it
### Protocol surface
- Add optional `clientMetadata` to v2 `TurnStartParams` and
`TurnSteerParams`
- Regenerate the JSON schema / TypeScript fixtures
- Update app-server docs to describe the field and its behavior

### Runtime plumbing
- Add a dedicated core op for app-server user input carrying turn-scoped
metadata: `Op::UserInputWithClientMetadata`
- Wire `turn/start` and `turn/steer` through that op / signature path
instead of dropping the metadata at the message-processor boundary
- Store the metadata in `TurnMetadataState`

### Transport behavior
- Reuse the existing serialized `x-codex-turn-metadata` payload
- Merge the new app-server `clientMetadata` into that JSON additively
- Do **not** replace built-in reserved fields already present in the
turn metadata payload
- Keep websocket behavior unchanged at the outer shape level: it still
sends only `client_metadata["x-codex-turn-metadata"]`, but that JSON
string now contains the merged fields
- Keep HTTP fallback behavior unchanged except that the existing
`x-codex-turn-metadata` header now includes the merged fields too

### Request shape before / after
Before, a websocket `response.create` looked like:
```json
{
  "type": "response.create",
  "client_metadata": {
    "x-codex-turn-metadata": "{\"session_id\":\"...\",\"turn_id\":\"...\"}"
  }
}
```
Even if the app-server caller supplied `clientMetadata`, it was not
represented there.

After, the same request shape is preserved, but the serialized payload
now includes the new turn-scoped fields:
```json
{
  "type": "response.create",
  "client_metadata": {
    "x-codex-turn-metadata": "{\"session_id\":\"...\",\"turn_id\":\"...\",\"fiber_run_id\":\"fiber-start-123\",\"origin\":\"gaas\"}"
  }
}
```

## Validation
### Targeted tests added / updated
- protocol round-trip coverage for `clientMetadata` on `turn/start` and
`turn/steer`
- protocol round-trip coverage for `Op::UserInputWithClientMetadata`
- `TurnMetadataState` merge test proving client metadata is added
without overwriting reserved built-in fields
- websocket request-shape test proving outbound `response.create`
contains merged metadata inside
`client_metadata["x-codex-turn-metadata"]`
- app-server integration tests proving:
- `turn/start` forwards `clientMetadata` into the outbound Responses
request path
  - websocket warmup + real turn request both behave correctly
  - `turn/steer` updates the follow-up request metadata

### Commands run
- `just write-app-server-schema`
- `cargo test -p codex-app-server-protocol`
- `cargo test -p codex-protocol`
- `cargo test -p codex-core
turn_metadata_state_merges_client_metadata_without_replacing_reserved_fields
--lib`
- `cargo test -p codex-core --test all
responses_websocket_preserves_custom_turn_metadata_fields`
- `cargo test -p codex-app-server --test all client_metadata`
- `cargo test -p codex-app-server --test all
turn_start_forwards_client_metadata_to_responses_websocket_request_body_v2
-- --nocapture`
- `just fmt`
- `just fix -p codex-core -p codex-protocol -p codex-app-server-protocol
-p codex-app-server`
- `just fix -p codex-exec -p codex-tui-app-server`
- `just argument-comment-lint`

### Full suite note
`cargo test` in `codex-rs` still fails in:
-
`suite::v2::turn_interrupt::turn_interrupt_resolves_pending_command_approval_request`

I verified that same failure on a clean detached `HEAD` worktree with an
isolated `CARGO_TARGET_DIR`, so it is not caused by this patch.
2026-04-09 11:52:37 -07:00

1185 lines
38 KiB
Rust

#![cfg(not(target_os = "windows"))]
use anyhow::Ok;
use codex_protocol::config_types::CollaborationMode;
use codex_protocol::config_types::ModeKind;
use codex_protocol::config_types::Settings;
use codex_protocol::items::AgentMessageContent;
use codex_protocol::items::TurnItem;
use codex_protocol::models::WebSearchAction;
use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::ItemCompletedEvent;
use codex_protocol::protocol::ItemStartedEvent;
use codex_protocol::protocol::Op;
use codex_protocol::user_input::ByteRange;
use codex_protocol::user_input::TextElement;
use codex_protocol::user_input::UserInput;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_image_generation_call;
use core_test_support::responses::ev_message_item_added;
use core_test_support::responses::ev_output_text_delta;
use core_test_support::responses::ev_reasoning_item;
use core_test_support::responses::ev_reasoning_item_added;
use core_test_support::responses::ev_reasoning_summary_text_delta;
use core_test_support::responses::ev_reasoning_text_delta;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::ev_web_search_call_added_partial;
use core_test_support::responses::ev_web_search_call_done;
use core_test_support::responses::mount_sse_once;
use core_test_support::responses::sse;
use core_test_support::responses::start_mock_server;
use core_test_support::skip_if_no_network;
use core_test_support::test_codex::TestCodex;
use core_test_support::test_codex::test_codex;
use core_test_support::wait_for_event;
use core_test_support::wait_for_event_match;
use pretty_assertions::assert_eq;
use std::path::Path;
use std::path::PathBuf;
fn image_generation_artifact_path(codex_home: &Path, session_id: &str, call_id: &str) -> PathBuf {
fn sanitize(value: &str) -> String {
let mut sanitized: String = value
.chars()
.map(|ch| {
if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' {
ch
} else {
'_'
}
})
.collect();
if sanitized.is_empty() {
sanitized = "generated_image".to_string();
}
sanitized
}
codex_home
.join("generated_images")
.join(sanitize(session_id))
.join(format!("{}.png", sanitize(call_id)))
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn user_message_item_is_emitted() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let TestCodex { codex, .. } = test_codex().build(&server).await?;
let first_response = sse(vec![ev_response_created("resp-1"), ev_completed("resp-1")]);
mount_sse_once(&server, first_response).await;
let text_elements = vec![TextElement::new(
ByteRange { start: 0, end: 6 },
Some("<file>".into()),
)];
let expected_input = UserInput::Text {
text: "please inspect sample.txt".into(),
text_elements: text_elements.clone(),
};
codex
.submit(Op::UserInput {
items: vec![expected_input.clone()],
final_output_json_schema: None,
responsesapi_client_metadata: None,
})
.await?;
let started_item = wait_for_event_match(&codex, |ev| match ev {
EventMsg::ItemStarted(ItemStartedEvent {
item: TurnItem::UserMessage(item),
..
}) => Some(item.clone()),
_ => None,
})
.await;
let completed_item = wait_for_event_match(&codex, |ev| match ev {
EventMsg::ItemCompleted(ItemCompletedEvent {
item: TurnItem::UserMessage(item),
..
}) => Some(item.clone()),
_ => None,
})
.await;
assert_eq!(started_item.id, completed_item.id);
assert_eq!(started_item.content, vec![expected_input.clone()]);
assert_eq!(completed_item.content, vec![expected_input]);
let legacy_message = wait_for_event_match(&codex, |ev| match ev {
EventMsg::UserMessage(event) => Some(event.clone()),
_ => None,
})
.await;
assert_eq!(legacy_message.message, "please inspect sample.txt");
assert_eq!(legacy_message.text_elements, text_elements);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn assistant_message_item_is_emitted() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let TestCodex { codex, .. } = test_codex().build(&server).await?;
let first_response = sse(vec![
ev_response_created("resp-1"),
ev_assistant_message("msg-1", "all done"),
ev_completed("resp-1"),
]);
mount_sse_once(&server, first_response).await;
codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "please summarize results".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
responsesapi_client_metadata: None,
})
.await?;
let started = wait_for_event_match(&codex, |ev| match ev {
EventMsg::ItemStarted(ItemStartedEvent {
item: TurnItem::AgentMessage(item),
..
}) => Some(item.clone()),
_ => None,
})
.await;
let completed = wait_for_event_match(&codex, |ev| match ev {
EventMsg::ItemCompleted(ItemCompletedEvent {
item: TurnItem::AgentMessage(item),
..
}) => Some(item.clone()),
_ => None,
})
.await;
assert_eq!(started.id, completed.id);
let Some(codex_protocol::items::AgentMessageContent::Text { text }) = completed.content.first()
else {
panic!("expected agent message text content");
};
assert_eq!(text, "all done");
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn reasoning_item_is_emitted() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let TestCodex { codex, .. } = test_codex().build(&server).await?;
let reasoning_item = ev_reasoning_item(
"reasoning-1",
&["Consider inputs", "Compute output"],
&["Detailed reasoning trace"],
);
let first_response = sse(vec![
ev_response_created("resp-1"),
reasoning_item,
ev_completed("resp-1"),
]);
mount_sse_once(&server, first_response).await;
codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "explain your reasoning".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
responsesapi_client_metadata: None,
})
.await?;
let started = wait_for_event_match(&codex, |ev| match ev {
EventMsg::ItemStarted(ItemStartedEvent {
item: TurnItem::Reasoning(item),
..
}) => Some(item.clone()),
_ => None,
})
.await;
let completed = wait_for_event_match(&codex, |ev| match ev {
EventMsg::ItemCompleted(ItemCompletedEvent {
item: TurnItem::Reasoning(item),
..
}) => Some(item.clone()),
_ => None,
})
.await;
assert_eq!(started.id, completed.id);
assert_eq!(
completed.summary_text,
vec!["Consider inputs".to_string(), "Compute output".to_string()]
);
assert_eq!(
completed.raw_content,
vec!["Detailed reasoning trace".to_string()]
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn web_search_item_is_emitted() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let TestCodex { codex, .. } = test_codex().build(&server).await?;
let web_search_added = ev_web_search_call_added_partial("web-search-1", "in_progress");
let web_search_done = ev_web_search_call_done("web-search-1", "completed", "weather seattle");
let first_response = sse(vec![
ev_response_created("resp-1"),
web_search_added,
web_search_done,
ev_completed("resp-1"),
]);
mount_sse_once(&server, first_response).await;
codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "find the weather".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
responsesapi_client_metadata: None,
})
.await?;
let begin = wait_for_event_match(&codex, |ev| match ev {
EventMsg::WebSearchBegin(event) => Some(event.clone()),
_ => None,
})
.await;
let completed = wait_for_event_match(&codex, |ev| match ev {
EventMsg::ItemCompleted(ItemCompletedEvent {
item: TurnItem::WebSearch(item),
..
}) => Some(item.clone()),
_ => None,
})
.await;
assert_eq!(begin.call_id, "web-search-1");
assert_eq!(completed.id, begin.call_id);
assert_eq!(
completed.action,
WebSearchAction::Search {
query: Some("weather seattle".to_string()),
queries: None,
}
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn image_generation_call_event_is_emitted() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let TestCodex {
codex,
config,
session_configured,
..
} = test_codex().build(&server).await?;
let call_id = "ig_image_saved_to_temp_dir_default";
let expected_saved_path = image_generation_artifact_path(
config.codex_home.as_path(),
&session_configured.session_id.to_string(),
call_id,
);
let _ = std::fs::remove_file(&expected_saved_path);
let first_response = sse(vec![
ev_response_created("resp-1"),
ev_image_generation_call(call_id, "completed", "A tiny blue square", "Zm9v"),
ev_completed("resp-1"),
]);
mount_sse_once(&server, first_response).await;
codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "generate a tiny blue square".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
responsesapi_client_metadata: None,
})
.await?;
let begin = wait_for_event_match(&codex, |ev| match ev {
EventMsg::ImageGenerationBegin(event) => Some(event.clone()),
_ => None,
})
.await;
let end = wait_for_event_match(&codex, |ev| match ev {
EventMsg::ImageGenerationEnd(event) => Some(event.clone()),
_ => None,
})
.await;
assert_eq!(begin.call_id, call_id);
assert_eq!(end.call_id, call_id);
assert_eq!(end.status, "completed");
assert_eq!(end.revised_prompt, Some("A tiny blue square".to_string()));
assert_eq!(end.result, "Zm9v");
assert_eq!(
end.saved_path,
Some(expected_saved_path.to_string_lossy().into_owned())
);
assert_eq!(std::fs::read(&expected_saved_path)?, b"foo");
let _ = std::fs::remove_file(&expected_saved_path);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn image_generation_call_event_is_emitted_when_image_save_fails() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let TestCodex {
codex,
config,
session_configured,
..
} = test_codex().build(&server).await?;
let expected_saved_path = image_generation_artifact_path(
config.codex_home.as_path(),
&session_configured.session_id.to_string(),
"ig_invalid",
);
let _ = std::fs::remove_file(&expected_saved_path);
let first_response = sse(vec![
ev_response_created("resp-1"),
ev_image_generation_call("ig_invalid", "completed", "broken payload", "_-8"),
ev_completed("resp-1"),
]);
mount_sse_once(&server, first_response).await;
codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "generate an image".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
responsesapi_client_metadata: None,
})
.await?;
let begin = wait_for_event_match(&codex, |ev| match ev {
EventMsg::ImageGenerationBegin(event) => Some(event.clone()),
_ => None,
})
.await;
let end = wait_for_event_match(&codex, |ev| match ev {
EventMsg::ImageGenerationEnd(event) => Some(event.clone()),
_ => None,
})
.await;
assert_eq!(begin.call_id, "ig_invalid");
assert_eq!(end.call_id, "ig_invalid");
assert_eq!(end.status, "completed");
assert_eq!(end.revised_prompt, Some("broken payload".to_string()));
assert_eq!(end.result, "_-8");
assert_eq!(end.saved_path, None);
assert!(!expected_saved_path.exists());
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn agent_message_content_delta_has_item_metadata() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let TestCodex {
codex,
session_configured,
..
} = test_codex().build(&server).await?;
let stream = sse(vec![
ev_response_created("resp-1"),
ev_message_item_added("msg-1", ""),
ev_output_text_delta("streamed response"),
ev_assistant_message("msg-1", "streamed response"),
ev_completed("resp-1"),
]);
mount_sse_once(&server, stream).await;
codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "please stream text".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
responsesapi_client_metadata: None,
})
.await?;
let (started_turn_id, started_item) = wait_for_event_match(&codex, |ev| match ev {
EventMsg::ItemStarted(ItemStartedEvent {
turn_id,
item: TurnItem::AgentMessage(item),
..
}) => Some((turn_id.clone(), item.clone())),
_ => None,
})
.await;
let delta_event = wait_for_event_match(&codex, |ev| match ev {
EventMsg::AgentMessageContentDelta(event) => Some(event.clone()),
_ => None,
})
.await;
let legacy_delta = wait_for_event_match(&codex, |ev| match ev {
EventMsg::AgentMessageDelta(event) => Some(event.clone()),
_ => None,
})
.await;
let completed_item = wait_for_event_match(&codex, |ev| match ev {
EventMsg::ItemCompleted(ItemCompletedEvent {
item: TurnItem::AgentMessage(item),
..
}) => Some(item.clone()),
_ => None,
})
.await;
let session_id = session_configured.session_id.to_string();
assert_eq!(delta_event.thread_id, session_id);
assert_eq!(delta_event.turn_id, started_turn_id);
assert_eq!(delta_event.item_id, started_item.id);
assert_eq!(delta_event.delta, "streamed response");
assert_eq!(legacy_delta.delta, "streamed response");
assert_eq!(completed_item.id, started_item.id);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn plan_mode_emits_plan_item_from_proposed_plan_block() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let TestCodex {
codex,
session_configured,
..
} = test_codex().build(&server).await?;
let plan_block = "<proposed_plan>\n- Step 1\n- Step 2\n</proposed_plan>\n";
let full_message = format!("Intro\n{plan_block}Outro");
let stream = sse(vec![
ev_response_created("resp-1"),
ev_message_item_added("msg-1", ""),
ev_output_text_delta(&full_message),
ev_assistant_message("msg-1", &full_message),
ev_completed("resp-1"),
]);
mount_sse_once(&server, stream).await;
let collaboration_mode = CollaborationMode {
mode: ModeKind::Plan,
settings: Settings {
model: session_configured.model.clone(),
reasoning_effort: None,
developer_instructions: None,
},
};
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "please plan".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
cwd: std::env::current_dir()?,
approval_policy: codex_protocol::protocol::AskForApproval::Never,
approvals_reviewer: None,
sandbox_policy: codex_protocol::protocol::SandboxPolicy::DangerFullAccess,
model: session_configured.model.clone(),
effort: None,
summary: None,
service_tier: None,
collaboration_mode: Some(collaboration_mode),
personality: None,
})
.await?;
let plan_delta = wait_for_event_match(&codex, |ev| match ev {
EventMsg::PlanDelta(event) => Some(event.clone()),
_ => None,
})
.await;
let plan_completed = wait_for_event_match(&codex, |ev| match ev {
EventMsg::ItemCompleted(ItemCompletedEvent {
item: TurnItem::Plan(item),
..
}) => Some(item.clone()),
_ => None,
})
.await;
assert_eq!(
plan_delta.thread_id,
session_configured.session_id.to_string()
);
assert_eq!(plan_delta.delta, "- Step 1\n- Step 2\n");
assert_eq!(plan_completed.text, "- Step 1\n- Step 2\n");
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn plan_mode_strips_plan_from_agent_messages() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let TestCodex {
codex,
session_configured,
..
} = test_codex().build(&server).await?;
let plan_block = "<proposed_plan>\n- Step 1\n- Step 2\n</proposed_plan>\n";
let full_message = format!("Intro\n{plan_block}Outro");
let stream = sse(vec![
ev_response_created("resp-1"),
ev_message_item_added("msg-1", ""),
ev_output_text_delta(&full_message),
ev_assistant_message("msg-1", &full_message),
ev_completed("resp-1"),
]);
mount_sse_once(&server, stream).await;
let collaboration_mode = CollaborationMode {
mode: ModeKind::Plan,
settings: Settings {
model: session_configured.model.clone(),
reasoning_effort: None,
developer_instructions: None,
},
};
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "please plan".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
cwd: std::env::current_dir()?,
approval_policy: codex_protocol::protocol::AskForApproval::Never,
approvals_reviewer: None,
sandbox_policy: codex_protocol::protocol::SandboxPolicy::DangerFullAccess,
model: session_configured.model.clone(),
effort: None,
summary: None,
service_tier: None,
collaboration_mode: Some(collaboration_mode),
personality: None,
})
.await?;
let mut agent_deltas = Vec::new();
let mut plan_delta = None;
let mut agent_item = None;
let mut plan_item = None;
while plan_delta.is_none() || agent_item.is_none() || plan_item.is_none() {
let ev = wait_for_event(&codex, |_| true).await;
match ev {
EventMsg::AgentMessageContentDelta(event) => {
agent_deltas.push(event.delta);
}
EventMsg::PlanDelta(event) => {
plan_delta = Some(event.delta);
}
EventMsg::ItemCompleted(ItemCompletedEvent {
item: TurnItem::AgentMessage(item),
..
}) => {
agent_item = Some(item);
}
EventMsg::ItemCompleted(ItemCompletedEvent {
item: TurnItem::Plan(item),
..
}) => {
plan_item = Some(item);
}
_ => {}
}
}
let agent_text = agent_deltas.concat();
assert_eq!(agent_text, "Intro\nOutro");
assert_eq!(plan_delta.unwrap(), "- Step 1\n- Step 2\n");
assert_eq!(plan_item.unwrap().text, "- Step 1\n- Step 2\n");
let agent_text_from_item: String = agent_item
.unwrap()
.content
.iter()
.map(|entry| match entry {
AgentMessageContent::Text { text } => text.as_str(),
})
.collect();
assert_eq!(agent_text_from_item, "Intro\nOutro");
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn plan_mode_streaming_citations_are_stripped_across_added_deltas_and_done()
-> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let TestCodex {
codex,
session_configured,
..
} = test_codex().build(&server).await?;
let added_text = "Intro <oai-mem-";
let deltas = [
"citation>outer-doc</oai-mem-citation>\n<proposed",
"_plan>\n- Step 1<oai-mem-",
"citation>plan-doc</oai-mem-citation>\n- Step 2\n</proposed_plan>\nOu",
"tro",
];
let full_message = format!("{added_text}{}", deltas.concat());
let mut events = vec![
ev_response_created("resp-1"),
ev_message_item_added("msg-1", added_text),
];
for delta in deltas {
events.push(ev_output_text_delta(delta));
}
events.push(ev_assistant_message("msg-1", &full_message));
events.push(ev_completed("resp-1"));
mount_sse_once(&server, sse(events)).await;
let collaboration_mode = CollaborationMode {
mode: ModeKind::Plan,
settings: Settings {
model: session_configured.model.clone(),
reasoning_effort: None,
developer_instructions: None,
},
};
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "please plan with citations".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
cwd: std::env::current_dir()?,
approval_policy: codex_protocol::protocol::AskForApproval::Never,
approvals_reviewer: None,
sandbox_policy: codex_protocol::protocol::SandboxPolicy::DangerFullAccess,
model: session_configured.model.clone(),
effort: None,
summary: None,
service_tier: None,
collaboration_mode: Some(collaboration_mode),
personality: None,
})
.await?;
let mut agent_started = None;
let mut agent_started_idx = None;
let mut agent_completed = None;
let mut agent_completed_idx = None;
let mut plan_started = None;
let mut plan_started_idx = None;
let mut plan_completed = None;
let mut plan_completed_idx = None;
let mut agent_deltas = Vec::new();
let mut plan_deltas = Vec::new();
let mut first_agent_delta_idx = None;
let mut last_agent_delta_idx = None;
let mut first_plan_delta_idx = None;
let mut last_plan_delta_idx = None;
let mut idx = 0usize;
let turn_complete_idx = loop {
let ev = wait_for_event(&codex, |_| true).await;
match ev {
EventMsg::ItemStarted(ItemStartedEvent {
item: TurnItem::AgentMessage(item),
..
}) => {
agent_started_idx = Some(idx);
agent_started = Some(item);
}
EventMsg::ItemStarted(ItemStartedEvent {
item: TurnItem::Plan(item),
..
}) => {
plan_started_idx = Some(idx);
plan_started = Some(item);
}
EventMsg::AgentMessageContentDelta(event) => {
if first_agent_delta_idx.is_none() {
first_agent_delta_idx = Some(idx);
}
last_agent_delta_idx = Some(idx);
agent_deltas.push(event.delta);
}
EventMsg::PlanDelta(event) => {
if first_plan_delta_idx.is_none() {
first_plan_delta_idx = Some(idx);
}
last_plan_delta_idx = Some(idx);
plan_deltas.push(event.delta);
}
EventMsg::ItemCompleted(ItemCompletedEvent {
item: TurnItem::AgentMessage(item),
..
}) => {
agent_completed_idx = Some(idx);
agent_completed = Some(item);
}
EventMsg::ItemCompleted(ItemCompletedEvent {
item: TurnItem::Plan(item),
..
}) => {
plan_completed_idx = Some(idx);
plan_completed = Some(item);
}
EventMsg::TurnComplete(_) => {
break idx;
}
_ => {}
}
idx += 1;
};
let agent_started = agent_started.expect("agent item start should be emitted");
let agent_completed = agent_completed.expect("agent item completion should be emitted");
let plan_started = plan_started.expect("plan item start should be emitted");
let plan_completed = plan_completed.expect("plan item completion should be emitted");
assert_eq!(agent_started.id, agent_completed.id);
assert_eq!(plan_started.id, plan_completed.id);
assert_eq!(plan_started.text, "");
let agent_started_text: String = agent_started
.content
.iter()
.map(|entry| match entry {
AgentMessageContent::Text { text } => text.as_str(),
})
.collect();
let agent_completed_text: String = agent_completed
.content
.iter()
.map(|entry| match entry {
AgentMessageContent::Text { text } => text.as_str(),
})
.collect();
let agent_delta_text = agent_deltas.concat();
let plan_delta_text = plan_deltas.concat();
assert_eq!(agent_started_text, "");
assert_eq!(agent_delta_text, "Intro \nOutro");
assert_eq!(agent_completed_text, "Intro \nOutro");
assert_eq!(plan_delta_text, "- Step 1\n- Step 2\n");
assert_eq!(plan_completed.text, "- Step 1\n- Step 2\n");
for text in [
agent_started_text.as_str(),
agent_delta_text.as_str(),
agent_completed_text.as_str(),
plan_delta_text.as_str(),
plan_completed.text.as_str(),
] {
assert!(!text.contains("<oai-mem-citation>"));
assert!(!text.contains("</oai-mem-citation>"));
}
let agent_started_idx = agent_started_idx.expect("agent start index");
let agent_completed_idx = agent_completed_idx.expect("agent completion index");
let plan_started_idx = plan_started_idx.expect("plan start index");
let plan_completed_idx = plan_completed_idx.expect("plan completion index");
let first_agent_delta_idx = first_agent_delta_idx.expect("agent delta index");
let last_agent_delta_idx = last_agent_delta_idx.expect("agent delta index");
let first_plan_delta_idx = first_plan_delta_idx.expect("plan delta index");
let last_plan_delta_idx = last_plan_delta_idx.expect("plan delta index");
assert!(agent_started_idx < first_agent_delta_idx);
assert!(plan_started_idx < first_plan_delta_idx);
assert!(last_agent_delta_idx < agent_completed_idx);
assert!(last_plan_delta_idx < plan_completed_idx);
assert!(agent_completed_idx < turn_complete_idx);
assert!(plan_completed_idx < turn_complete_idx);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn plan_mode_streaming_proposed_plan_tag_split_across_added_and_delta_is_parsed()
-> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let TestCodex {
codex,
session_configured,
..
} = test_codex().build(&server).await?;
let added_text = "Intro\n<proposed";
let deltas = ["_plan>\n- Step 1\n</proposed_plan>\nOutro"];
let full_message = format!("{added_text}{}", deltas.concat());
let mut events = vec![
ev_response_created("resp-1"),
ev_message_item_added("msg-1", added_text),
];
for delta in deltas {
events.push(ev_output_text_delta(delta));
}
events.push(ev_assistant_message("msg-1", &full_message));
events.push(ev_completed("resp-1"));
mount_sse_once(&server, sse(events)).await;
let collaboration_mode = CollaborationMode {
mode: ModeKind::Plan,
settings: Settings {
model: session_configured.model.clone(),
reasoning_effort: None,
developer_instructions: None,
},
};
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "please plan".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
cwd: std::env::current_dir()?,
approval_policy: codex_protocol::protocol::AskForApproval::Never,
approvals_reviewer: None,
sandbox_policy: codex_protocol::protocol::SandboxPolicy::DangerFullAccess,
model: session_configured.model.clone(),
effort: None,
summary: None,
service_tier: None,
collaboration_mode: Some(collaboration_mode),
personality: None,
})
.await?;
let mut agent_started = None;
let mut agent_completed = None;
let mut plan_started = None;
let mut plan_completed = None;
let mut agent_deltas = Vec::new();
let mut plan_deltas = Vec::new();
loop {
let ev = wait_for_event(&codex, |_| true).await;
match ev {
EventMsg::ItemStarted(ItemStartedEvent {
item: TurnItem::AgentMessage(item),
..
}) => agent_started = Some(item),
EventMsg::ItemStarted(ItemStartedEvent {
item: TurnItem::Plan(item),
..
}) => plan_started = Some(item),
EventMsg::AgentMessageContentDelta(event) => agent_deltas.push(event.delta),
EventMsg::PlanDelta(event) => plan_deltas.push(event.delta),
EventMsg::ItemCompleted(ItemCompletedEvent {
item: TurnItem::AgentMessage(item),
..
}) => agent_completed = Some(item),
EventMsg::ItemCompleted(ItemCompletedEvent {
item: TurnItem::Plan(item),
..
}) => plan_completed = Some(item),
EventMsg::TurnComplete(_) => break,
_ => {}
}
}
let agent_started = agent_started.expect("agent item start should be emitted");
let agent_completed = agent_completed.expect("agent item completion should be emitted");
let plan_started = plan_started.expect("plan item start should be emitted");
let plan_completed = plan_completed.expect("plan item completion should be emitted");
let agent_started_text: String = agent_started
.content
.iter()
.map(|entry| match entry {
AgentMessageContent::Text { text } => text.as_str(),
})
.collect();
let agent_completed_text: String = agent_completed
.content
.iter()
.map(|entry| match entry {
AgentMessageContent::Text { text } => text.as_str(),
})
.collect();
assert_eq!(agent_started_text, "");
assert_eq!(agent_deltas.concat(), "Intro\nOutro");
assert_eq!(agent_completed_text, "Intro\nOutro");
assert_eq!(plan_started.text, "");
assert_eq!(plan_deltas.concat(), "- Step 1\n");
assert_eq!(plan_completed.text, "- Step 1\n");
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn plan_mode_handles_missing_plan_close_tag() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let TestCodex {
codex,
session_configured,
..
} = test_codex().build(&server).await?;
let full_message = "Intro\n<proposed_plan>\n- Step 1\n";
let stream = sse(vec![
ev_response_created("resp-1"),
ev_message_item_added("msg-1", ""),
ev_output_text_delta(full_message),
ev_assistant_message("msg-1", full_message),
ev_completed("resp-1"),
]);
mount_sse_once(&server, stream).await;
let collaboration_mode = CollaborationMode {
mode: ModeKind::Plan,
settings: Settings {
model: session_configured.model.clone(),
reasoning_effort: None,
developer_instructions: None,
},
};
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "please plan".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
cwd: std::env::current_dir()?,
approval_policy: codex_protocol::protocol::AskForApproval::Never,
approvals_reviewer: None,
sandbox_policy: codex_protocol::protocol::SandboxPolicy::DangerFullAccess,
model: session_configured.model.clone(),
effort: None,
summary: None,
service_tier: None,
collaboration_mode: Some(collaboration_mode),
personality: None,
})
.await?;
let mut plan_delta = None;
let mut plan_item = None;
let mut agent_item = None;
while plan_delta.is_none() || plan_item.is_none() || agent_item.is_none() {
let ev = wait_for_event(&codex, |_| true).await;
match ev {
EventMsg::PlanDelta(event) => {
plan_delta = Some(event.delta);
}
EventMsg::ItemCompleted(ItemCompletedEvent {
item: TurnItem::Plan(item),
..
}) => {
plan_item = Some(item);
}
EventMsg::ItemCompleted(ItemCompletedEvent {
item: TurnItem::AgentMessage(item),
..
}) => {
agent_item = Some(item);
}
_ => {}
}
}
assert_eq!(plan_delta.unwrap(), "- Step 1\n");
assert_eq!(plan_item.unwrap().text, "- Step 1\n");
let agent_text_from_item: String = agent_item
.unwrap()
.content
.iter()
.map(|entry| match entry {
AgentMessageContent::Text { text } => text.as_str(),
})
.collect();
assert_eq!(agent_text_from_item, "Intro\n");
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn reasoning_content_delta_has_item_metadata() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let TestCodex { codex, .. } = test_codex().build(&server).await?;
let stream = sse(vec![
ev_response_created("resp-1"),
ev_reasoning_item_added("reasoning-1", &[""]),
ev_reasoning_summary_text_delta("step one"),
ev_reasoning_item("reasoning-1", &["step one"], &[]),
ev_completed("resp-1"),
]);
mount_sse_once(&server, stream).await;
codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "reason through it".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
responsesapi_client_metadata: None,
})
.await?;
let reasoning_item = wait_for_event_match(&codex, |ev| match ev {
EventMsg::ItemStarted(ItemStartedEvent {
item: TurnItem::Reasoning(item),
..
}) => Some(item.clone()),
_ => None,
})
.await;
let delta_event = wait_for_event_match(&codex, |ev| match ev {
EventMsg::ReasoningContentDelta(event) => Some(event.clone()),
_ => None,
})
.await;
let legacy_delta = wait_for_event_match(&codex, |ev| match ev {
EventMsg::AgentReasoningDelta(event) => Some(event.clone()),
_ => None,
})
.await;
assert_eq!(delta_event.item_id, reasoning_item.id);
assert_eq!(delta_event.delta, "step one");
assert_eq!(legacy_delta.delta, "step one");
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn reasoning_raw_content_delta_respects_flag() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let TestCodex { codex, .. } = test_codex()
.with_config(|config| {
config.show_raw_agent_reasoning = true;
})
.build(&server)
.await?;
let stream = sse(vec![
ev_response_created("resp-1"),
ev_reasoning_item_added("reasoning-raw", &[""]),
ev_reasoning_text_delta("raw detail"),
ev_reasoning_item("reasoning-raw", &["complete"], &["raw detail"]),
ev_completed("resp-1"),
]);
mount_sse_once(&server, stream).await;
codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "show raw reasoning".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
responsesapi_client_metadata: None,
})
.await?;
let reasoning_item = wait_for_event_match(&codex, |ev| match ev {
EventMsg::ItemStarted(ItemStartedEvent {
item: TurnItem::Reasoning(item),
..
}) => Some(item.clone()),
_ => None,
})
.await;
let delta_event = wait_for_event_match(&codex, |ev| match ev {
EventMsg::ReasoningRawContentDelta(event) => Some(event.clone()),
_ => None,
})
.await;
let legacy_delta = wait_for_event_match(&codex, |ev| match ev {
EventMsg::AgentReasoningRawContentDelta(event) => Some(event.clone()),
_ => None,
})
.await;
assert_eq!(delta_event.item_id, reasoning_item.id);
assert_eq!(delta_event.delta, "raw detail");
assert_eq!(legacy_delta.delta, "raw detail");
Ok(())
}