#![cfg(not(target_os = "windows"))] use anyhow::Ok; use codex_features::Feature; use codex_protocol::config_types::CollaborationMode; use codex_protocol::config_types::ModeKind; use codex_protocol::config_types::Settings; use codex_protocol::items::AgentMessageContent; use codex_protocol::items::TurnItem; use codex_protocol::models::WebSearchAction; use codex_protocol::protocol::AskForApproval; use codex_protocol::protocol::EventMsg; use codex_protocol::protocol::ItemCompletedEvent; use codex_protocol::protocol::ItemStartedEvent; use codex_protocol::protocol::Op; use codex_protocol::protocol::SandboxPolicy; use codex_protocol::user_input::ByteRange; use codex_protocol::user_input::TextElement; use codex_protocol::user_input::UserInput; use core_test_support::responses::ev_assistant_message; use core_test_support::responses::ev_completed; use core_test_support::responses::ev_function_call; use core_test_support::responses::ev_image_generation_call; use core_test_support::responses::ev_message_item_added; use core_test_support::responses::ev_output_text_delta; use core_test_support::responses::ev_reasoning_item; use core_test_support::responses::ev_reasoning_item_added; use core_test_support::responses::ev_reasoning_summary_text_delta; use core_test_support::responses::ev_reasoning_text_delta; use core_test_support::responses::ev_response_created; use core_test_support::responses::ev_web_search_call_added_partial; use core_test_support::responses::ev_web_search_call_done; use core_test_support::responses::mount_sse_once; use core_test_support::responses::mount_sse_sequence; use core_test_support::responses::sse; use core_test_support::responses::start_mock_server; use core_test_support::skip_if_no_network; use core_test_support::test_codex::TestCodex; use core_test_support::test_codex::test_codex; use core_test_support::wait_for_event; use core_test_support::wait_for_event_match; use pretty_assertions::assert_eq; use serde_json::Value; use serde_json::json; use std::path::Path; use std::path::PathBuf; use tempfile::tempdir; use tokio::time::Duration; use tokio::time::Instant; use tokio::time::sleep; fn user_message_item_by_text<'a>(input: &'a [Value], text: &str) -> &'a Value { input .iter() .find(|item| { if item.get("type").and_then(Value::as_str) != Some("message") || item.get("role").and_then(Value::as_str) != Some("user") { return false; } item.get("content") .and_then(Value::as_array) .is_some_and(|content| { content.iter().any(|entry| { entry.get("type").and_then(Value::as_str) == Some("input_text") && entry.get("text").and_then(Value::as_str) == Some(text) }) }) }) .unwrap_or_else(|| panic!("submitted user message input item not found for text: {text}")) } fn image_generation_artifact_path(codex_home: &Path, session_id: &str, call_id: &str) -> PathBuf { fn sanitize(value: &str) -> String { let mut sanitized: String = value .chars() .map(|ch| { if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' { ch } else { '_' } }) .collect(); if sanitized.is_empty() { sanitized = "generated_image".to_string(); } sanitized } codex_home .join("generated_images") .join(sanitize(session_id)) .join(format!("{}.png", sanitize(call_id))) } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn user_message_item_is_emitted() -> anyhow::Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; let TestCodex { codex, .. } = test_codex().build(&server).await?; let first_response = sse(vec![ev_response_created("resp-1"), ev_completed("resp-1")]); mount_sse_once(&server, first_response).await; let text_elements = vec![TextElement::new( ByteRange { start: 0, end: 6 }, Some("".into()), )]; let expected_input = UserInput::Text { text: "please inspect sample.txt".into(), text_elements: text_elements.clone(), }; codex .submit(Op::UserInput { items: vec![expected_input.clone()], final_output_json_schema: None, }) .await?; let started_item = wait_for_event_match(&codex, |ev| match ev { EventMsg::ItemStarted(ItemStartedEvent { item: TurnItem::UserMessage(item), .. }) => Some(item.clone()), _ => None, }) .await; let completed_item = wait_for_event_match(&codex, |ev| match ev { EventMsg::ItemCompleted(ItemCompletedEvent { item: TurnItem::UserMessage(item), .. }) => Some(item.clone()), _ => None, }) .await; assert_eq!(started_item.id, completed_item.id); assert_eq!(started_item.content, vec![expected_input.clone()]); assert_eq!(completed_item.content, vec![expected_input]); let legacy_message = wait_for_event_match(&codex, |ev| match ev { EventMsg::UserMessage(event) => Some(event.clone()), _ => None, }) .await; assert_eq!(legacy_message.message, "please inspect sample.txt"); assert_eq!(legacy_message.text_elements, text_elements); Ok(()) } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn user_message_type_metadata_is_emitted_when_feature_enabled() -> anyhow::Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; let TestCodex { codex, .. } = test_codex() .with_config(|config| { config .features .enable(Feature::ItemMetadata) .expect("feature flag should be enabled for this test"); }) .build(&server) .await?; let first_response = sse(vec![ev_response_created("resp-1"), ev_completed("resp-1")]); let req = mount_sse_once(&server, first_response).await; let expected_input = UserInput::Text { text: "please inspect sample.txt".into(), text_elements: Vec::new(), }; codex .submit(Op::UserInput { items: vec![expected_input], final_output_json_schema: None, }) .await?; let started_item = wait_for_event_match(&codex, |ev| match ev { EventMsg::ItemStarted(ItemStartedEvent { item: TurnItem::UserMessage(item), .. }) => Some(item.clone()), _ => None, }) .await; let completed_item = wait_for_event_match(&codex, |ev| match ev { EventMsg::ItemCompleted(ItemCompletedEvent { item: TurnItem::UserMessage(item), .. }) => Some(item.clone()), _ => None, }) .await; assert_eq!(started_item.id, completed_item.id); assert_eq!(started_item.content, completed_item.content); let deadline = Instant::now() + Duration::from_secs(2); while req.requests().is_empty() { if Instant::now() >= deadline { panic!("timed out waiting for responses request capture"); } sleep(Duration::from_millis(10)).await; } let body: Value = req.single_request().body_json(); let input = body .get("input") .and_then(Value::as_array) .expect("request input array"); let user_message = input .iter() .find(|item| { if item.get("type").and_then(Value::as_str) != Some("message") || item.get("role").and_then(Value::as_str) != Some("user") { return false; } item.get("content") .and_then(Value::as_array) .is_some_and(|content| { content.iter().any(|entry| { entry.get("type").and_then(Value::as_str) == Some("input_text") && entry.get("text").and_then(Value::as_str) == Some("please inspect sample.txt") }) }) }) .expect("submitted user message input item"); assert_eq!( user_message .get("metadata") .and_then(|metadata| metadata.get("user_message_type")) .and_then(Value::as_str), Some("prompt") ); Ok(()) } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn user_message_type_prompt_steering_metadata_is_emitted_when_feature_enabled() -> anyhow::Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; let temp = tempdir()?; let unblock_path = temp.path().join("unblock-steering"); let command = format!( "while [ ! -f \"{}\" ]; do sleep 0.01; done; echo done", unblock_path.display() ); let call_id = "shell-steering-call"; let responses = mount_sse_sequence( &server, vec![ sse(vec![ ev_response_created("resp-1"), ev_function_call( call_id, "shell", &serde_json::to_string(&json!({ "command": ["/bin/sh", "-c", command], }))?, ), ev_completed("resp-1"), ]), sse(vec![ ev_assistant_message("msg-2", "done"), ev_completed("resp-2"), ]), ], ) .await; let test = test_codex() .with_model("gpt-5") .with_config(|config| { config .features .enable(Feature::ItemMetadata) .expect("feature flag should be enabled for this test"); }) .build(&server) .await?; let codex = test.codex.clone(); let turn_model = test.session_configured.model.clone(); codex .submit(Op::UserTurn { items: vec![UserInput::Text { text: "start steering flow".into(), text_elements: Vec::new(), }], final_output_json_schema: None, cwd: test.cwd_path().to_path_buf(), approval_policy: AskForApproval::Never, sandbox_policy: SandboxPolicy::DangerFullAccess, model: turn_model, effort: None, summary: None, service_tier: None, collaboration_mode: None, personality: None, }) .await?; let turn_id = wait_for_event_match(&codex, |ev| match ev { EventMsg::TurnStarted(event) => Some(event.turn_id.clone()), _ => None, }) .await; wait_for_event_match(&codex, |ev| match ev { EventMsg::ExecCommandBegin(event) if event.call_id == call_id => Some(()), _ => None, }) .await; let steering_text = "steering metadata check"; let steered_turn_id = codex .steer_input( vec![UserInput::Text { text: steering_text.into(), text_elements: Vec::new(), }], Some(turn_id.as_str()), ) .await .expect("steer should succeed on active turn"); assert_eq!(steered_turn_id, turn_id); std::fs::write(&unblock_path, "go")?; wait_for_event(&codex, |ev| match ev { EventMsg::TurnComplete(event) => event.turn_id == turn_id, _ => false, }) .await; let deadline = Instant::now() + Duration::from_secs(3); while responses.requests().len() < 2 { if Instant::now() >= deadline { panic!("timed out waiting for second responses request"); } sleep(Duration::from_millis(10)).await; } let requests = responses.requests(); let second_body = requests .get(1) .expect("second responses request should be present") .body_json(); let input = second_body .get("input") .and_then(Value::as_array) .expect("request input array"); let steered_message = user_message_item_by_text(input, steering_text); assert_eq!( steered_message .get("metadata") .and_then(|metadata| metadata.get("user_message_type")) .and_then(Value::as_str), Some("prompt_steering") ); Ok(()) } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn assistant_message_item_is_emitted() -> anyhow::Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; let TestCodex { codex, .. } = test_codex().build(&server).await?; let first_response = sse(vec![ ev_response_created("resp-1"), ev_assistant_message("msg-1", "all done"), ev_completed("resp-1"), ]); mount_sse_once(&server, first_response).await; codex .submit(Op::UserInput { items: vec![UserInput::Text { text: "please summarize results".into(), text_elements: Vec::new(), }], final_output_json_schema: None, }) .await?; let started = wait_for_event_match(&codex, |ev| match ev { EventMsg::ItemStarted(ItemStartedEvent { item: TurnItem::AgentMessage(item), .. }) => Some(item.clone()), _ => None, }) .await; let completed = wait_for_event_match(&codex, |ev| match ev { EventMsg::ItemCompleted(ItemCompletedEvent { item: TurnItem::AgentMessage(item), .. }) => Some(item.clone()), _ => None, }) .await; assert_eq!(started.id, completed.id); let Some(codex_protocol::items::AgentMessageContent::Text { text }) = completed.content.first() else { panic!("expected agent message text content"); }; assert_eq!(text, "all done"); Ok(()) } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn reasoning_item_is_emitted() -> anyhow::Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; let TestCodex { codex, .. } = test_codex().build(&server).await?; let reasoning_item = ev_reasoning_item( "reasoning-1", &["Consider inputs", "Compute output"], &["Detailed reasoning trace"], ); let first_response = sse(vec![ ev_response_created("resp-1"), reasoning_item, ev_completed("resp-1"), ]); mount_sse_once(&server, first_response).await; codex .submit(Op::UserInput { items: vec![UserInput::Text { text: "explain your reasoning".into(), text_elements: Vec::new(), }], final_output_json_schema: None, }) .await?; let started = wait_for_event_match(&codex, |ev| match ev { EventMsg::ItemStarted(ItemStartedEvent { item: TurnItem::Reasoning(item), .. }) => Some(item.clone()), _ => None, }) .await; let completed = wait_for_event_match(&codex, |ev| match ev { EventMsg::ItemCompleted(ItemCompletedEvent { item: TurnItem::Reasoning(item), .. }) => Some(item.clone()), _ => None, }) .await; assert_eq!(started.id, completed.id); assert_eq!( completed.summary_text, vec!["Consider inputs".to_string(), "Compute output".to_string()] ); assert_eq!( completed.raw_content, vec!["Detailed reasoning trace".to_string()] ); Ok(()) } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn web_search_item_is_emitted() -> anyhow::Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; let TestCodex { codex, .. } = test_codex().build(&server).await?; let web_search_added = ev_web_search_call_added_partial("web-search-1", "in_progress"); let web_search_done = ev_web_search_call_done("web-search-1", "completed", "weather seattle"); let first_response = sse(vec![ ev_response_created("resp-1"), web_search_added, web_search_done, ev_completed("resp-1"), ]); mount_sse_once(&server, first_response).await; codex .submit(Op::UserInput { items: vec![UserInput::Text { text: "find the weather".into(), text_elements: Vec::new(), }], final_output_json_schema: None, }) .await?; let begin = wait_for_event_match(&codex, |ev| match ev { EventMsg::WebSearchBegin(event) => Some(event.clone()), _ => None, }) .await; let completed = wait_for_event_match(&codex, |ev| match ev { EventMsg::ItemCompleted(ItemCompletedEvent { item: TurnItem::WebSearch(item), .. }) => Some(item.clone()), _ => None, }) .await; assert_eq!(begin.call_id, "web-search-1"); assert_eq!(completed.id, begin.call_id); assert_eq!( completed.action, WebSearchAction::Search { query: Some("weather seattle".to_string()), queries: None, } ); Ok(()) } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn image_generation_call_event_is_emitted() -> anyhow::Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; let TestCodex { codex, config, session_configured, .. } = test_codex().build(&server).await?; let call_id = "ig_image_saved_to_temp_dir_default"; let expected_saved_path = image_generation_artifact_path( config.codex_home.as_path(), &session_configured.session_id.to_string(), call_id, ); let _ = std::fs::remove_file(&expected_saved_path); let first_response = sse(vec![ ev_response_created("resp-1"), ev_image_generation_call(call_id, "completed", "A tiny blue square", "Zm9v"), ev_completed("resp-1"), ]); mount_sse_once(&server, first_response).await; codex .submit(Op::UserInput { items: vec![UserInput::Text { text: "generate a tiny blue square".into(), text_elements: Vec::new(), }], final_output_json_schema: None, }) .await?; let begin = wait_for_event_match(&codex, |ev| match ev { EventMsg::ImageGenerationBegin(event) => Some(event.clone()), _ => None, }) .await; let end = wait_for_event_match(&codex, |ev| match ev { EventMsg::ImageGenerationEnd(event) => Some(event.clone()), _ => None, }) .await; assert_eq!(begin.call_id, call_id); assert_eq!(end.call_id, call_id); assert_eq!(end.status, "completed"); assert_eq!(end.revised_prompt, Some("A tiny blue square".to_string())); assert_eq!(end.result, "Zm9v"); assert_eq!( end.saved_path, Some(expected_saved_path.to_string_lossy().into_owned()) ); assert_eq!(std::fs::read(&expected_saved_path)?, b"foo"); let _ = std::fs::remove_file(&expected_saved_path); Ok(()) } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn image_generation_call_event_is_emitted_when_image_save_fails() -> anyhow::Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; let TestCodex { codex, config, session_configured, .. } = test_codex().build(&server).await?; let expected_saved_path = image_generation_artifact_path( config.codex_home.as_path(), &session_configured.session_id.to_string(), "ig_invalid", ); let _ = std::fs::remove_file(&expected_saved_path); let first_response = sse(vec![ ev_response_created("resp-1"), ev_image_generation_call("ig_invalid", "completed", "broken payload", "_-8"), ev_completed("resp-1"), ]); mount_sse_once(&server, first_response).await; codex .submit(Op::UserInput { items: vec![UserInput::Text { text: "generate an image".into(), text_elements: Vec::new(), }], final_output_json_schema: None, }) .await?; let begin = wait_for_event_match(&codex, |ev| match ev { EventMsg::ImageGenerationBegin(event) => Some(event.clone()), _ => None, }) .await; let end = wait_for_event_match(&codex, |ev| match ev { EventMsg::ImageGenerationEnd(event) => Some(event.clone()), _ => None, }) .await; assert_eq!(begin.call_id, "ig_invalid"); assert_eq!(end.call_id, "ig_invalid"); assert_eq!(end.status, "completed"); assert_eq!(end.revised_prompt, Some("broken payload".to_string())); assert_eq!(end.result, "_-8"); assert_eq!(end.saved_path, None); assert!(!expected_saved_path.exists()); Ok(()) } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn agent_message_content_delta_has_item_metadata() -> anyhow::Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; let TestCodex { codex, session_configured, .. } = test_codex().build(&server).await?; let stream = sse(vec![ ev_response_created("resp-1"), ev_message_item_added("msg-1", ""), ev_output_text_delta("streamed response"), ev_assistant_message("msg-1", "streamed response"), ev_completed("resp-1"), ]); mount_sse_once(&server, stream).await; codex .submit(Op::UserInput { items: vec![UserInput::Text { text: "please stream text".into(), text_elements: Vec::new(), }], final_output_json_schema: None, }) .await?; let (started_turn_id, started_item) = wait_for_event_match(&codex, |ev| match ev { EventMsg::ItemStarted(ItemStartedEvent { turn_id, item: TurnItem::AgentMessage(item), .. }) => Some((turn_id.clone(), item.clone())), _ => None, }) .await; let delta_event = wait_for_event_match(&codex, |ev| match ev { EventMsg::AgentMessageContentDelta(event) => Some(event.clone()), _ => None, }) .await; let legacy_delta = wait_for_event_match(&codex, |ev| match ev { EventMsg::AgentMessageDelta(event) => Some(event.clone()), _ => None, }) .await; let completed_item = wait_for_event_match(&codex, |ev| match ev { EventMsg::ItemCompleted(ItemCompletedEvent { item: TurnItem::AgentMessage(item), .. }) => Some(item.clone()), _ => None, }) .await; let session_id = session_configured.session_id.to_string(); assert_eq!(delta_event.thread_id, session_id); assert_eq!(delta_event.turn_id, started_turn_id); assert_eq!(delta_event.item_id, started_item.id); assert_eq!(delta_event.delta, "streamed response"); assert_eq!(legacy_delta.delta, "streamed response"); assert_eq!(completed_item.id, started_item.id); Ok(()) } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn plan_mode_emits_plan_item_from_proposed_plan_block() -> anyhow::Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; let TestCodex { codex, session_configured, .. } = test_codex().build(&server).await?; let plan_block = "\n- Step 1\n- Step 2\n\n"; let full_message = format!("Intro\n{plan_block}Outro"); let stream = sse(vec![ ev_response_created("resp-1"), ev_message_item_added("msg-1", ""), ev_output_text_delta(&full_message), ev_assistant_message("msg-1", &full_message), ev_completed("resp-1"), ]); mount_sse_once(&server, stream).await; let collaboration_mode = CollaborationMode { mode: ModeKind::Plan, settings: Settings { model: session_configured.model.clone(), reasoning_effort: None, developer_instructions: None, }, }; codex .submit(Op::UserTurn { items: vec![UserInput::Text { text: "please plan".into(), text_elements: Vec::new(), }], final_output_json_schema: None, cwd: std::env::current_dir()?, approval_policy: codex_protocol::protocol::AskForApproval::Never, sandbox_policy: codex_protocol::protocol::SandboxPolicy::DangerFullAccess, model: session_configured.model.clone(), effort: None, summary: None, service_tier: None, collaboration_mode: Some(collaboration_mode), personality: None, }) .await?; let plan_delta = wait_for_event_match(&codex, |ev| match ev { EventMsg::PlanDelta(event) => Some(event.clone()), _ => None, }) .await; let plan_completed = wait_for_event_match(&codex, |ev| match ev { EventMsg::ItemCompleted(ItemCompletedEvent { item: TurnItem::Plan(item), .. }) => Some(item.clone()), _ => None, }) .await; assert_eq!( plan_delta.thread_id, session_configured.session_id.to_string() ); assert_eq!(plan_delta.delta, "- Step 1\n- Step 2\n"); assert_eq!(plan_completed.text, "- Step 1\n- Step 2\n"); Ok(()) } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn plan_mode_strips_plan_from_agent_messages() -> anyhow::Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; let TestCodex { codex, session_configured, .. } = test_codex().build(&server).await?; let plan_block = "\n- Step 1\n- Step 2\n\n"; let full_message = format!("Intro\n{plan_block}Outro"); let stream = sse(vec![ ev_response_created("resp-1"), ev_message_item_added("msg-1", ""), ev_output_text_delta(&full_message), ev_assistant_message("msg-1", &full_message), ev_completed("resp-1"), ]); mount_sse_once(&server, stream).await; let collaboration_mode = CollaborationMode { mode: ModeKind::Plan, settings: Settings { model: session_configured.model.clone(), reasoning_effort: None, developer_instructions: None, }, }; codex .submit(Op::UserTurn { items: vec![UserInput::Text { text: "please plan".into(), text_elements: Vec::new(), }], final_output_json_schema: None, cwd: std::env::current_dir()?, approval_policy: codex_protocol::protocol::AskForApproval::Never, sandbox_policy: codex_protocol::protocol::SandboxPolicy::DangerFullAccess, model: session_configured.model.clone(), effort: None, summary: None, service_tier: None, collaboration_mode: Some(collaboration_mode), personality: None, }) .await?; let mut agent_deltas = Vec::new(); let mut plan_delta = None; let mut agent_item = None; let mut plan_item = None; while plan_delta.is_none() || agent_item.is_none() || plan_item.is_none() { let ev = wait_for_event(&codex, |_| true).await; match ev { EventMsg::AgentMessageContentDelta(event) => { agent_deltas.push(event.delta); } EventMsg::PlanDelta(event) => { plan_delta = Some(event.delta); } EventMsg::ItemCompleted(ItemCompletedEvent { item: TurnItem::AgentMessage(item), .. }) => { agent_item = Some(item); } EventMsg::ItemCompleted(ItemCompletedEvent { item: TurnItem::Plan(item), .. }) => { plan_item = Some(item); } _ => {} } } let agent_text = agent_deltas.concat(); assert_eq!(agent_text, "Intro\nOutro"); assert_eq!(plan_delta.unwrap(), "- Step 1\n- Step 2\n"); assert_eq!(plan_item.unwrap().text, "- Step 1\n- Step 2\n"); let agent_text_from_item: String = agent_item .unwrap() .content .iter() .map(|entry| match entry { AgentMessageContent::Text { text } => text.as_str(), }) .collect(); assert_eq!(agent_text_from_item, "Intro\nOutro"); Ok(()) } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn plan_mode_streaming_citations_are_stripped_across_added_deltas_and_done() -> anyhow::Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; let TestCodex { codex, session_configured, .. } = test_codex().build(&server).await?; let added_text = "Intro outer-doc\n\n- Step 1plan-doc\n- Step 2\n\nOu", "tro", ]; let full_message = format!("{added_text}{}", deltas.concat()); let mut events = vec![ ev_response_created("resp-1"), ev_message_item_added("msg-1", added_text), ]; for delta in deltas { events.push(ev_output_text_delta(delta)); } events.push(ev_assistant_message("msg-1", &full_message)); events.push(ev_completed("resp-1")); mount_sse_once(&server, sse(events)).await; let collaboration_mode = CollaborationMode { mode: ModeKind::Plan, settings: Settings { model: session_configured.model.clone(), reasoning_effort: None, developer_instructions: None, }, }; codex .submit(Op::UserTurn { items: vec![UserInput::Text { text: "please plan with citations".into(), text_elements: Vec::new(), }], final_output_json_schema: None, cwd: std::env::current_dir()?, approval_policy: codex_protocol::protocol::AskForApproval::Never, sandbox_policy: codex_protocol::protocol::SandboxPolicy::DangerFullAccess, model: session_configured.model.clone(), effort: None, summary: None, service_tier: None, collaboration_mode: Some(collaboration_mode), personality: None, }) .await?; let mut agent_started = None; let mut agent_started_idx = None; let mut agent_completed = None; let mut agent_completed_idx = None; let mut plan_started = None; let mut plan_started_idx = None; let mut plan_completed = None; let mut plan_completed_idx = None; let mut agent_deltas = Vec::new(); let mut plan_deltas = Vec::new(); let mut first_agent_delta_idx = None; let mut last_agent_delta_idx = None; let mut first_plan_delta_idx = None; let mut last_plan_delta_idx = None; let mut idx = 0usize; let turn_complete_idx = loop { let ev = wait_for_event(&codex, |_| true).await; match ev { EventMsg::ItemStarted(ItemStartedEvent { item: TurnItem::AgentMessage(item), .. }) => { agent_started_idx = Some(idx); agent_started = Some(item); } EventMsg::ItemStarted(ItemStartedEvent { item: TurnItem::Plan(item), .. }) => { plan_started_idx = Some(idx); plan_started = Some(item); } EventMsg::AgentMessageContentDelta(event) => { if first_agent_delta_idx.is_none() { first_agent_delta_idx = Some(idx); } last_agent_delta_idx = Some(idx); agent_deltas.push(event.delta); } EventMsg::PlanDelta(event) => { if first_plan_delta_idx.is_none() { first_plan_delta_idx = Some(idx); } last_plan_delta_idx = Some(idx); plan_deltas.push(event.delta); } EventMsg::ItemCompleted(ItemCompletedEvent { item: TurnItem::AgentMessage(item), .. }) => { agent_completed_idx = Some(idx); agent_completed = Some(item); } EventMsg::ItemCompleted(ItemCompletedEvent { item: TurnItem::Plan(item), .. }) => { plan_completed_idx = Some(idx); plan_completed = Some(item); } EventMsg::TurnComplete(_) => { break idx; } _ => {} } idx += 1; }; let agent_started = agent_started.expect("agent item start should be emitted"); let agent_completed = agent_completed.expect("agent item completion should be emitted"); let plan_started = plan_started.expect("plan item start should be emitted"); let plan_completed = plan_completed.expect("plan item completion should be emitted"); assert_eq!(agent_started.id, agent_completed.id); assert_eq!(plan_started.id, plan_completed.id); assert_eq!(plan_started.text, ""); let agent_started_text: String = agent_started .content .iter() .map(|entry| match entry { AgentMessageContent::Text { text } => text.as_str(), }) .collect(); let agent_completed_text: String = agent_completed .content .iter() .map(|entry| match entry { AgentMessageContent::Text { text } => text.as_str(), }) .collect(); let agent_delta_text = agent_deltas.concat(); let plan_delta_text = plan_deltas.concat(); assert_eq!(agent_started_text, ""); assert_eq!(agent_delta_text, "Intro \nOutro"); assert_eq!(agent_completed_text, "Intro \nOutro"); assert_eq!(plan_delta_text, "- Step 1\n- Step 2\n"); assert_eq!(plan_completed.text, "- Step 1\n- Step 2\n"); for text in [ agent_started_text.as_str(), agent_delta_text.as_str(), agent_completed_text.as_str(), plan_delta_text.as_str(), plan_completed.text.as_str(), ] { assert!(!text.contains("")); assert!(!text.contains("")); } let agent_started_idx = agent_started_idx.expect("agent start index"); let agent_completed_idx = agent_completed_idx.expect("agent completion index"); let plan_started_idx = plan_started_idx.expect("plan start index"); let plan_completed_idx = plan_completed_idx.expect("plan completion index"); let first_agent_delta_idx = first_agent_delta_idx.expect("agent delta index"); let last_agent_delta_idx = last_agent_delta_idx.expect("agent delta index"); let first_plan_delta_idx = first_plan_delta_idx.expect("plan delta index"); let last_plan_delta_idx = last_plan_delta_idx.expect("plan delta index"); assert!(agent_started_idx < first_agent_delta_idx); assert!(plan_started_idx < first_plan_delta_idx); assert!(last_agent_delta_idx < agent_completed_idx); assert!(last_plan_delta_idx < plan_completed_idx); assert!(agent_completed_idx < turn_complete_idx); assert!(plan_completed_idx < turn_complete_idx); Ok(()) } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn plan_mode_streaming_proposed_plan_tag_split_across_added_and_delta_is_parsed() -> anyhow::Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; let TestCodex { codex, session_configured, .. } = test_codex().build(&server).await?; let added_text = "Intro\n\n- Step 1\n\nOutro"]; let full_message = format!("{added_text}{}", deltas.concat()); let mut events = vec![ ev_response_created("resp-1"), ev_message_item_added("msg-1", added_text), ]; for delta in deltas { events.push(ev_output_text_delta(delta)); } events.push(ev_assistant_message("msg-1", &full_message)); events.push(ev_completed("resp-1")); mount_sse_once(&server, sse(events)).await; let collaboration_mode = CollaborationMode { mode: ModeKind::Plan, settings: Settings { model: session_configured.model.clone(), reasoning_effort: None, developer_instructions: None, }, }; codex .submit(Op::UserTurn { items: vec![UserInput::Text { text: "please plan".into(), text_elements: Vec::new(), }], final_output_json_schema: None, cwd: std::env::current_dir()?, approval_policy: codex_protocol::protocol::AskForApproval::Never, sandbox_policy: codex_protocol::protocol::SandboxPolicy::DangerFullAccess, model: session_configured.model.clone(), effort: None, summary: None, service_tier: None, collaboration_mode: Some(collaboration_mode), personality: None, }) .await?; let mut agent_started = None; let mut agent_completed = None; let mut plan_started = None; let mut plan_completed = None; let mut agent_deltas = Vec::new(); let mut plan_deltas = Vec::new(); loop { let ev = wait_for_event(&codex, |_| true).await; match ev { EventMsg::ItemStarted(ItemStartedEvent { item: TurnItem::AgentMessage(item), .. }) => agent_started = Some(item), EventMsg::ItemStarted(ItemStartedEvent { item: TurnItem::Plan(item), .. }) => plan_started = Some(item), EventMsg::AgentMessageContentDelta(event) => agent_deltas.push(event.delta), EventMsg::PlanDelta(event) => plan_deltas.push(event.delta), EventMsg::ItemCompleted(ItemCompletedEvent { item: TurnItem::AgentMessage(item), .. }) => agent_completed = Some(item), EventMsg::ItemCompleted(ItemCompletedEvent { item: TurnItem::Plan(item), .. }) => plan_completed = Some(item), EventMsg::TurnComplete(_) => break, _ => {} } } let agent_started = agent_started.expect("agent item start should be emitted"); let agent_completed = agent_completed.expect("agent item completion should be emitted"); let plan_started = plan_started.expect("plan item start should be emitted"); let plan_completed = plan_completed.expect("plan item completion should be emitted"); let agent_started_text: String = agent_started .content .iter() .map(|entry| match entry { AgentMessageContent::Text { text } => text.as_str(), }) .collect(); let agent_completed_text: String = agent_completed .content .iter() .map(|entry| match entry { AgentMessageContent::Text { text } => text.as_str(), }) .collect(); assert_eq!(agent_started_text, ""); assert_eq!(agent_deltas.concat(), "Intro\nOutro"); assert_eq!(agent_completed_text, "Intro\nOutro"); assert_eq!(plan_started.text, ""); assert_eq!(plan_deltas.concat(), "- Step 1\n"); assert_eq!(plan_completed.text, "- Step 1\n"); Ok(()) } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn plan_mode_handles_missing_plan_close_tag() -> anyhow::Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; let TestCodex { codex, session_configured, .. } = test_codex().build(&server).await?; let full_message = "Intro\n\n- Step 1\n"; let stream = sse(vec![ ev_response_created("resp-1"), ev_message_item_added("msg-1", ""), ev_output_text_delta(full_message), ev_assistant_message("msg-1", full_message), ev_completed("resp-1"), ]); mount_sse_once(&server, stream).await; let collaboration_mode = CollaborationMode { mode: ModeKind::Plan, settings: Settings { model: session_configured.model.clone(), reasoning_effort: None, developer_instructions: None, }, }; codex .submit(Op::UserTurn { items: vec![UserInput::Text { text: "please plan".into(), text_elements: Vec::new(), }], final_output_json_schema: None, cwd: std::env::current_dir()?, approval_policy: codex_protocol::protocol::AskForApproval::Never, sandbox_policy: codex_protocol::protocol::SandboxPolicy::DangerFullAccess, model: session_configured.model.clone(), effort: None, summary: None, service_tier: None, collaboration_mode: Some(collaboration_mode), personality: None, }) .await?; let mut plan_delta = None; let mut plan_item = None; let mut agent_item = None; while plan_delta.is_none() || plan_item.is_none() || agent_item.is_none() { let ev = wait_for_event(&codex, |_| true).await; match ev { EventMsg::PlanDelta(event) => { plan_delta = Some(event.delta); } EventMsg::ItemCompleted(ItemCompletedEvent { item: TurnItem::Plan(item), .. }) => { plan_item = Some(item); } EventMsg::ItemCompleted(ItemCompletedEvent { item: TurnItem::AgentMessage(item), .. }) => { agent_item = Some(item); } _ => {} } } assert_eq!(plan_delta.unwrap(), "- Step 1\n"); assert_eq!(plan_item.unwrap().text, "- Step 1\n"); let agent_text_from_item: String = agent_item .unwrap() .content .iter() .map(|entry| match entry { AgentMessageContent::Text { text } => text.as_str(), }) .collect(); assert_eq!(agent_text_from_item, "Intro\n"); Ok(()) } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn reasoning_content_delta_has_item_metadata() -> anyhow::Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; let TestCodex { codex, .. } = test_codex().build(&server).await?; let stream = sse(vec![ ev_response_created("resp-1"), ev_reasoning_item_added("reasoning-1", &[""]), ev_reasoning_summary_text_delta("step one"), ev_reasoning_item("reasoning-1", &["step one"], &[]), ev_completed("resp-1"), ]); mount_sse_once(&server, stream).await; codex .submit(Op::UserInput { items: vec![UserInput::Text { text: "reason through it".into(), text_elements: Vec::new(), }], final_output_json_schema: None, }) .await?; let reasoning_item = wait_for_event_match(&codex, |ev| match ev { EventMsg::ItemStarted(ItemStartedEvent { item: TurnItem::Reasoning(item), .. }) => Some(item.clone()), _ => None, }) .await; let delta_event = wait_for_event_match(&codex, |ev| match ev { EventMsg::ReasoningContentDelta(event) => Some(event.clone()), _ => None, }) .await; let legacy_delta = wait_for_event_match(&codex, |ev| match ev { EventMsg::AgentReasoningDelta(event) => Some(event.clone()), _ => None, }) .await; assert_eq!(delta_event.item_id, reasoning_item.id); assert_eq!(delta_event.delta, "step one"); assert_eq!(legacy_delta.delta, "step one"); Ok(()) } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn reasoning_raw_content_delta_respects_flag() -> anyhow::Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; let TestCodex { codex, .. } = test_codex() .with_config(|config| { config.show_raw_agent_reasoning = true; }) .build(&server) .await?; let stream = sse(vec![ ev_response_created("resp-1"), ev_reasoning_item_added("reasoning-raw", &[""]), ev_reasoning_text_delta("raw detail"), ev_reasoning_item("reasoning-raw", &["complete"], &["raw detail"]), ev_completed("resp-1"), ]); mount_sse_once(&server, stream).await; codex .submit(Op::UserInput { items: vec![UserInput::Text { text: "show raw reasoning".into(), text_elements: Vec::new(), }], final_output_json_schema: None, }) .await?; let reasoning_item = wait_for_event_match(&codex, |ev| match ev { EventMsg::ItemStarted(ItemStartedEvent { item: TurnItem::Reasoning(item), .. }) => Some(item.clone()), _ => None, }) .await; let delta_event = wait_for_event_match(&codex, |ev| match ev { EventMsg::ReasoningRawContentDelta(event) => Some(event.clone()), _ => None, }) .await; let legacy_delta = wait_for_event_match(&codex, |ev| match ev { EventMsg::AgentReasoningRawContentDelta(event) => Some(event.clone()), _ => None, }) .await; assert_eq!(delta_event.item_id, reasoning_item.id); assert_eq!(delta_event.delta, "raw detail"); assert_eq!(legacy_delta.delta, "raw detail"); Ok(()) }