fix: attempting to resume an existing conversation in ConversationManager should reuse it

Added model summary and risk assessment for commands that violate sandbox policy (#5536 )
This PR adds support for a model-based summary and risk assessment for commands that violate the sandbox policy and require user approval. This aids the user in evaluating whether the command should be approved. The feature works by taking a failed command and passing it back to the model and asking it to summarize the command, give it a risk level (low, medium, high) and a risk category (e.g. "data deletion" or "data exfiltration"). It uses a new conversation thread so the context in the existing thread doesn't influence the answer. If the call to the model fails or takes longer than 5 seconds, it falls back to the current behavior. For now, this is an experimental feature and is gated by a config key `experimental_sandbox_command_assessment`. Here is a screen shot of the approval prompt showing the risk assessment and summary. <img width="723" height="282" alt="image" src="https://github.com/user-attachments/assets/4597dd7c-d5a0-4e9f-9d13-414bd082fd6b" />
2026-03-20 21:53:53 +00:00 · 2025-10-24 16:12:43 -07:00 · 2025-10-24 15:23:44 -07:00 · 2025-10-24 19:12:03 +00:00 · 2025-10-24 11:30:34 -07:00 · 2025-10-24 11:17:46 -07:00
90 changed files with 4516 additions and 797 deletions
--- a/README.md
+++ b/README.md
@@ -33,6 +33,8 @@ Then simply run `codex` to get started:
 codex
 ```

+If you're running into upgrade issues with Homebrew, see the [FAQ entry on brew upgrade codex](./docs/faq.md#brew-update-codex-isnt-upgrading-me).
+
 <details>
 <summary>You can also go to the <a href="https://github.com/openai/codex/releases/latest">latest GitHub Release</a> and download the appropriate binary for your platform.</summary>

--- a/codex-rs/Cargo.lock
+++ b/codex-rs/Cargo.lock
@@ -1066,6 +1066,7 @@ dependencies = [
 "codex-rmcp-client",
 "codex-utils-pty",
 "codex-utils-string",
+ "codex-utils-tokenizer",
 "core-foundation 0.9.4",
 "core_test_support",
 "dirs",
@@ -1074,6 +1075,7 @@ dependencies = [
 "escargot",
 "eventsource-stream",
 "futures",
+ "http",
 "indexmap 2.10.0",
 "landlock",
 "libc",
@@ -1633,6 +1635,7 @@ dependencies = [
 "anyhow",
 "assert_cmd",
 "codex-core",
+ "codex-protocol",
 "notify",
 "regex-lite",
 "serde_json",
@@ -4952,9 +4955,9 @@ dependencies = [

 [[package]]
 name = "rmcp"
-version = "0.8.2"
+version = "0.8.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4e35d31f89beb59c83bc31363426da25b323ce0c2e5b53c7bf29867d16ee7898"
+checksum = "1fdad1258f7259fdc0f2dfc266939c82c3b5d1fd72bcde274d600cdc27e60243"
 dependencies = [
 "base64",
 "bytes",
@@ -4986,9 +4989,9 @@ dependencies = [

 [[package]]
 name = "rmcp-macros"
-version = "0.8.2"
+version = "0.8.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d88518b38110c439a03f0f4eee40e5105d648a530711cb87f98991e3f324a664"
+checksum = "ede0589a208cc7ce81d1be68aa7e74b917fcd03c81528408bab0457e187dcd9b"
 dependencies = [
 "darling 0.21.3",
 "proc-macro2",
--- a/codex-rs/Cargo.toml
+++ b/codex-rs/Cargo.toml
@@ -116,6 +116,7 @@ env_logger = "0.11.5"
 escargot = "0.5"
 eventsource-stream = "0.2.3"
 futures = { version = "0.3", default-features = false }
+http = "1.3.1"
 icu_decimal = "2.0.0"
 icu_locale_core = "2.0.0"
 ignore = "0.4.23"
@@ -153,7 +154,7 @@ ratatui = "0.29.0"
 ratatui-macros = "0.6.0"
 regex-lite = "0.1.7"
 reqwest = "0.12"
-rmcp = { version = "0.8.2", default-features = false }
+rmcp = { version = "0.8.3", default-features = false }
 schemars = "0.8.22"
 seccompiler = "0.5.0"
 sentry = "0.34.0"
--- a/codex-rs/app-server-protocol/src/export.rs
+++ b/codex-rs/app-server-protocol/src/export.rs
@@ -23,6 +23,7 @@ use std::io::Write;
 use std::path::Path;
 use std::path::PathBuf;
 use std::process::Command;
+use ts_rs::ExportError;
 use ts_rs::TS;

 const HEADER: &str = "// GENERATED CODE! DO NOT MODIFY BY HAND!\n\n";
@@ -104,6 +105,19 @@ macro_rules! for_each_schema_type {
    };
 }

+fn export_ts_with_context<F>(label: &str, export: F) -> Result<()>
+where
+    F: FnOnce() -> std::result::Result<(), ExportError>,
+{
+    match export() {
+        Ok(()) => Ok(()),
+        Err(ExportError::CannotBeExported(ty)) => Err(anyhow!(
+            "failed to export {label}: dependency {ty} cannot be exported"
+        )),
+        Err(err) => Err(err.into()),
+    }
+}
+
 pub fn generate_types(out_dir: &Path, prettier: Option<&Path>) -> Result<()> {
    generate_ts(out_dir, prettier)?;
    generate_json(out_dir)?;
@@ -113,13 +127,17 @@ pub fn generate_types(out_dir: &Path, prettier: Option<&Path>) -> Result<()> {
 pub fn generate_ts(out_dir: &Path, prettier: Option<&Path>) -> Result<()> {
    ensure_dir(out_dir)?;

-    ClientRequest::export_all_to(out_dir)?;
-    export_client_responses(out_dir)?;
-    ClientNotification::export_all_to(out_dir)?;
+    export_ts_with_context("ClientRequest", || ClientRequest::export_all_to(out_dir))?;
+    export_ts_with_context("client responses", || export_client_responses(out_dir))?;
+    export_ts_with_context("ClientNotification", || {
+        ClientNotification::export_all_to(out_dir)
+    })?;

-    ServerRequest::export_all_to(out_dir)?;
-    export_server_responses(out_dir)?;
-    ServerNotification::export_all_to(out_dir)?;
+    export_ts_with_context("ServerRequest", || ServerRequest::export_all_to(out_dir))?;
+    export_ts_with_context("server responses", || export_server_responses(out_dir))?;
+    export_ts_with_context("ServerNotification", || {
+        ServerNotification::export_all_to(out_dir)
+    })?;

    generate_index_ts(out_dir)?;

--- a/codex-rs/app-server-protocol/src/protocol.rs
+++ b/codex-rs/app-server-protocol/src/protocol.rs
@@ -5,6 +5,7 @@ use crate::JSONRPCNotification;
 use crate::JSONRPCRequest;
 use crate::RequestId;
 use codex_protocol::ConversationId;
+use codex_protocol::account::Account;
 use codex_protocol::config_types::ForcedLoginMethod;
 use codex_protocol::config_types::ReasoningEffort;
 use codex_protocol::config_types::ReasoningSummary;
@@ -16,6 +17,7 @@ use codex_protocol::protocol::EventMsg;
 use codex_protocol::protocol::FileChange;
 use codex_protocol::protocol::RateLimitSnapshot;
 use codex_protocol::protocol::ReviewDecision;
+use codex_protocol::protocol::SandboxCommandAssessment;
 use codex_protocol::protocol::SandboxPolicy;
 use codex_protocol::protocol::TurnAbortReason;
 use paste::paste;
@@ -93,6 +95,43 @@ macro_rules! client_request_definitions {
 }

 client_request_definitions! {
+    /// NEW APIs
+    #[serde(rename = "model/list")]
+    #[ts(rename = "model/list")]
+    ListModels {
+        params: ListModelsParams,
+        response: ListModelsResponse,
+    },
+
+    #[serde(rename = "account/login")]
+    #[ts(rename = "account/login")]
+    LoginAccount {
+        params: LoginAccountParams,
+        response: LoginAccountResponse,
+    },
+
+    #[serde(rename = "account/logout")]
+    #[ts(rename = "account/logout")]
+    LogoutAccount {
+        params: #[ts(type = "undefined")] #[serde(skip_serializing_if = "Option::is_none")] Option<()>,
+        response: LogoutAccountResponse,
+    },
+
+    #[serde(rename = "account/rateLimits/read")]
+    #[ts(rename = "account/rateLimits/read")]
+    GetAccountRateLimits {
+        params: #[ts(type = "undefined")] #[serde(skip_serializing_if = "Option::is_none")] Option<()>,
+        response: GetAccountRateLimitsResponse,
+    },
+
+    #[serde(rename = "account/read")]
+    #[ts(rename = "account/read")]
+    GetAccount {
+        params: #[ts(type = "undefined")] #[serde(skip_serializing_if = "Option::is_none")] Option<()>,
+        response: GetAccountResponse,
+    },
+
+    /// DEPRECATED APIs below
    Initialize {
        params: InitializeParams,
        response: InitializeResponse,
@@ -106,13 +145,6 @@ client_request_definitions! {
        params: ListConversationsParams,
        response: ListConversationsResponse,
    },
-    #[serde(rename = "model/list")]
-    #[ts(rename = "model/list")]
-    /// List available Codex models along with display metadata.
-    ListModels {
-        params: ListModelsParams,
-        response: ListModelsResponse,
-    },
    /// Resume a recorded Codex conversation from a rollout file.
    ResumeConversation {
        params: ResumeConversationParams,
@@ -191,12 +223,6 @@ client_request_definitions! {
        params: ExecOneOffCommandParams,
        response: ExecOneOffCommandResponse,
    },
-    #[serde(rename = "account/rateLimits/read")]
-    #[ts(rename = "account/rateLimits/read")]
-    GetAccountRateLimits {
-        params: #[ts(type = "undefined")] #[serde(skip_serializing_if = "Option::is_none")] Option<()>,
-        response: GetAccountRateLimitsResponse,
-    },
 }

 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Default, JsonSchema, TS)]
@@ -352,6 +378,38 @@ pub struct ListModelsResponse {
    pub next_cursor: Option<String>,
 }

+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[serde(tag = "type")]
+#[ts(tag = "type")]
+pub enum LoginAccountParams {
+    #[serde(rename = "apiKey")]
+    #[ts(rename = "apiKey")]
+    ApiKey {
+        #[serde(rename = "apiKey")]
+        #[ts(rename = "apiKey")]
+        api_key: String,
+    },
+    #[serde(rename = "chatgpt")]
+    #[ts(rename = "chatgpt")]
+    ChatGpt,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[serde(rename_all = "camelCase")]
+pub struct LoginAccountResponse {
+    /// Only set if the login method is ChatGPT.
+    #[schemars(with = "String")]
+    pub login_id: Option<Uuid>,
+
+    /// URL the client should open in a browser to initiate the OAuth flow.
+    /// Only set if the login method is ChatGPT.
+    pub auth_url: Option<String>,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[serde(rename_all = "camelCase")]
+pub struct LogoutAccountResponse {}
+
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
 #[serde(rename_all = "camelCase")]
 pub struct ResumeConversationParams {
@@ -477,6 +535,12 @@ pub struct GetAccountRateLimitsResponse {
    pub rate_limits: RateLimitSnapshot,
 }

+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[serde(transparent)]
+#[ts(export)]
+#[ts(type = "Account | null")]
+pub struct GetAccountResponse(#[ts(type = "Account | null")] pub Option<Account>);
+
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
 #[serde(rename_all = "camelCase")]
 pub struct GetAuthStatusResponse {
@@ -784,6 +848,8 @@ pub struct ExecCommandApprovalParams {
    pub cwd: PathBuf,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub reason: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub risk: Option<SandboxCommandAssessment>,
    pub parsed_cmd: Vec<ParsedCommand>,
 }

@@ -875,6 +941,13 @@ pub struct AuthStatusChangeNotification {
 #[serde(tag = "method", content = "params", rename_all = "camelCase")]
 #[strum(serialize_all = "camelCase")]
 pub enum ServerNotification {
+    /// NEW NOTIFICATIONS
+    #[serde(rename = "account/rateLimits/updated")]
+    #[ts(rename = "account/rateLimits/updated")]
+    #[strum(serialize = "account/rateLimits/updated")]
+    AccountRateLimitsUpdated(RateLimitSnapshot),
+
+    /// DEPRECATED NOTIFICATIONS below
    /// Authentication status changed
    AuthStatusChange(AuthStatusChangeNotification),

@@ -888,6 +961,7 @@ pub enum ServerNotification {
 impl ServerNotification {
    pub fn to_params(self) -> Result<serde_json::Value, serde_json::Error> {
        match self {
+            ServerNotification::AccountRateLimitsUpdated(params) => serde_json::to_value(params),
            ServerNotification::AuthStatusChange(params) => serde_json::to_value(params),
            ServerNotification::LoginChatGptComplete(params) => serde_json::to_value(params),
            ServerNotification::SessionConfigured(params) => serde_json::to_value(params),
@@ -992,6 +1066,7 @@ mod tests {
            command: vec!["echo".to_string(), "hello".to_string()],
            cwd: PathBuf::from("/tmp"),
            reason: Some("because tests".to_string()),
+            risk: None,
            parsed_cmd: vec![ParsedCommand::Unknown {
                cmd: "echo hello".to_string(),
            }],
@@ -1043,16 +1118,89 @@ mod tests {
        Ok(())
    }

+    #[test]
+    fn serialize_account_login_api_key() -> Result<()> {
+        let request = ClientRequest::LoginAccount {
+            request_id: RequestId::Integer(2),
+            params: LoginAccountParams::ApiKey {
+                api_key: "secret".to_string(),
+            },
+        };
+        assert_eq!(
+            json!({
+                "method": "account/login",
+                "id": 2,
+                "params": {
+                    "type": "apiKey",
+                    "apiKey": "secret"
+                }
+            }),
+            serde_json::to_value(&request)?,
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn serialize_account_login_chatgpt() -> Result<()> {
+        let request = ClientRequest::LoginAccount {
+            request_id: RequestId::Integer(3),
+            params: LoginAccountParams::ChatGpt,
+        };
+        assert_eq!(
+            json!({
+                "method": "account/login",
+                "id": 3,
+                "params": {
+                    "type": "chatgpt"
+                }
+            }),
+            serde_json::to_value(&request)?,
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn serialize_account_logout() -> Result<()> {
+        let request = ClientRequest::LogoutAccount {
+            request_id: RequestId::Integer(4),
+            params: None,
+        };
+        assert_eq!(
+            json!({
+                "method": "account/logout",
+                "id": 4,
+            }),
+            serde_json::to_value(&request)?,
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn serialize_get_account() -> Result<()> {
+        let request = ClientRequest::GetAccount {
+            request_id: RequestId::Integer(5),
+            params: None,
+        };
+        assert_eq!(
+            json!({
+                "method": "account/read",
+                "id": 5,
+            }),
+            serde_json::to_value(&request)?,
+        );
+        Ok(())
+    }
+
    #[test]
    fn serialize_list_models() -> Result<()> {
        let request = ClientRequest::ListModels {
-            request_id: RequestId::Integer(2),
+            request_id: RequestId::Integer(6),
            params: ListModelsParams::default(),
        };
        assert_eq!(
            json!({
                "method": "model/list",
-                "id": 2,
+                "id": 6,
                "params": {}
            }),
            serde_json::to_value(&request)?,
--- a/codex-rs/app-server/src/codex_message_processor.rs
+++ b/codex-rs/app-server/src/codex_message_processor.rs
@@ -176,6 +176,27 @@ impl CodexMessageProcessor {
            ClientRequest::ListModels { request_id, params } => {
                self.list_models(request_id, params).await;
            }
+            ClientRequest::LoginAccount {
+                request_id,
+                params: _,
+            } => {
+                self.send_unimplemented_error(request_id, "account/login")
+                    .await;
+            }
+            ClientRequest::LogoutAccount {
+                request_id,
+                params: _,
+            } => {
+                self.send_unimplemented_error(request_id, "account/logout")
+                    .await;
+            }
+            ClientRequest::GetAccount {
+                request_id,
+                params: _,
+            } => {
+                self.send_unimplemented_error(request_id, "account/read")
+                    .await;
+            }
            ClientRequest::ResumeConversation { request_id, params } => {
                self.handle_resume_conversation(request_id, params).await;
            }
@@ -257,6 +278,15 @@ impl CodexMessageProcessor {
        }
    }

+    async fn send_unimplemented_error(&self, request_id: RequestId, method: &str) {
+        let error = JSONRPCErrorError {
+            code: INTERNAL_ERROR_CODE,
+            message: format!("{method} is not implemented yet"),
+            data: None,
+        };
+        self.outgoing.send_error(request_id, error).await;
+    }
+
    async fn login_api_key(&mut self, request_id: RequestId, params: LoginApiKeyParams) {
        if matches!(
            self.config.forced_login_method,
@@ -1417,6 +1447,7 @@ async fn apply_bespoke_event_handling(
            command,
            cwd,
            reason,
+            risk,
            parsed_cmd,
        }) => {
            let params = ExecCommandApprovalParams {
@@ -1425,6 +1456,7 @@ async fn apply_bespoke_event_handling(
                command,
                cwd,
                reason,
+                risk,
                parsed_cmd,
            };
            let rx = outgoing
@@ -1436,6 +1468,15 @@ async fn apply_bespoke_event_handling(
                on_exec_approval_response(event_id, rx, conversation).await;
            });
        }
+        EventMsg::TokenCount(token_count_event) => {
+            if let Some(rate_limits) = token_count_event.rate_limits {
+                outgoing
+                    .send_server_notification(ServerNotification::AccountRateLimitsUpdated(
+                        rate_limits,
+                    ))
+                    .await;
+            }
+        }
        // If this is a TurnAborted, reply to any pending interrupt requests.
        EventMsg::TurnAborted(turn_aborted_event) => {
            let pending = {
@@ -1484,6 +1525,7 @@ async fn derive_config_from_params(
        include_view_image_tool: None,
        show_raw_agent_reasoning: None,
        tools_web_search_request: None,
+        experimental_sandbox_command_assessment: None,
        additional_writable_roots: Vec::new(),
    };

--- a/codex-rs/app-server/src/fuzzy_file_search.rs
+++ b/codex-rs/app-server/src/fuzzy_file_search.rs
@@ -46,6 +46,7 @@ pub(crate) async fn run_fuzzy_file_search(
                threads,
                cancel_flag,
                COMPUTE_INDICES,
+                true,
            ) {
                Ok(res) => Ok((root, res)),
                Err(err) => Err((root, err)),
--- a/codex-rs/app-server/src/outgoing_message.rs
+++ b/codex-rs/app-server/src/outgoing_message.rs
@@ -142,6 +142,8 @@ pub(crate) struct OutgoingError {
 #[cfg(test)]
 mod tests {
    use codex_app_server_protocol::LoginChatGptCompleteNotification;
+    use codex_protocol::protocol::RateLimitSnapshot;
+    use codex_protocol::protocol::RateLimitWindow;
    use pretty_assertions::assert_eq;
    use serde_json::json;
    use uuid::Uuid;
@@ -171,4 +173,34 @@ mod tests {
            "ensure the strum macros serialize the method field correctly"
        );
    }
+
+    #[test]
+    fn verify_account_rate_limits_notification_serialization() {
+        let notification = ServerNotification::AccountRateLimitsUpdated(RateLimitSnapshot {
+            primary: Some(RateLimitWindow {
+                used_percent: 25.0,
+                window_minutes: Some(15),
+                resets_at: Some(123),
+            }),
+            secondary: None,
+        });
+
+        let jsonrpc_notification = OutgoingMessage::AppServerNotification(notification);
+        assert_eq!(
+            json!({
+                "method": "account/rateLimits/updated",
+                "params": {
+                    "primary": {
+                        "used_percent": 25.0,
+                        "window_minutes": 15,
+                        "resets_at": 123,
+                    },
+                    "secondary": null,
+                },
+            }),
+            serde_json::to_value(jsonrpc_notification)
+                .expect("ensure the notification serializes correctly"),
+            "ensure the notification serializes correctly"
+        );
+    }
 }
--- a/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs
+++ b/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs
@@ -311,6 +311,7 @@ async fn test_send_user_turn_changes_approval_policy_behavior() {
            ],
            cwd: working_directory.clone(),
            reason: None,
+            risk: None,
            parsed_cmd: vec![ParsedCommand::Unknown {
                cmd: "python3 -c 'print(42)'".to_string()
            }],
--- a/codex-rs/apply-patch/tests/suite/mod.rs
+++ b/codex-rs/apply-patch/tests/suite/mod.rs
@@ -1 +1,3 @@
 mod cli;
+#[cfg(not(target_os = "windows"))]
+mod tool;
--- a/codex-rs/apply-patch/tests/suite/tool.rs
+++ b/codex-rs/apply-patch/tests/suite/tool.rs
@@ -0,0 +1,257 @@
+use assert_cmd::Command;
+use pretty_assertions::assert_eq;
+use std::fs;
+use std::path::Path;
+use tempfile::tempdir;
+
+fn run_apply_patch_in_dir(dir: &Path, patch: &str) -> anyhow::Result<assert_cmd::assert::Assert> {
+    let mut cmd = Command::cargo_bin("apply_patch")?;
+    cmd.current_dir(dir);
+    Ok(cmd.arg(patch).assert())
+}
+
+fn apply_patch_command(dir: &Path) -> anyhow::Result<Command> {
+    let mut cmd = Command::cargo_bin("apply_patch")?;
+    cmd.current_dir(dir);
+    Ok(cmd)
+}
+
+#[test]
+fn test_apply_patch_cli_applies_multiple_operations() -> anyhow::Result<()> {
+    let tmp = tempdir()?;
+    let modify_path = tmp.path().join("modify.txt");
+    let delete_path = tmp.path().join("delete.txt");
+
+    fs::write(&modify_path, "line1\nline2\n")?;
+    fs::write(&delete_path, "obsolete\n")?;
+
+    let patch = "*** Begin Patch\n*** Add File: nested/new.txt\n+created\n*** Delete File: delete.txt\n*** Update File: modify.txt\n@@\n-line2\n+changed\n*** End Patch";
+
+    run_apply_patch_in_dir(tmp.path(), patch)?.success().stdout(
+        "Success. Updated the following files:\nA nested/new.txt\nM modify.txt\nD delete.txt\n",
+    );
+
+    assert_eq!(
+        fs::read_to_string(tmp.path().join("nested/new.txt"))?,
+        "created\n"
+    );
+    assert_eq!(fs::read_to_string(&modify_path)?, "line1\nchanged\n");
+    assert!(!delete_path.exists());
+
+    Ok(())
+}
+
+#[test]
+fn test_apply_patch_cli_applies_multiple_chunks() -> anyhow::Result<()> {
+    let tmp = tempdir()?;
+    let target_path = tmp.path().join("multi.txt");
+    fs::write(&target_path, "line1\nline2\nline3\nline4\n")?;
+
+    let patch = "*** Begin Patch\n*** Update File: multi.txt\n@@\n-line2\n+changed2\n@@\n-line4\n+changed4\n*** End Patch";
+
+    run_apply_patch_in_dir(tmp.path(), patch)?
+        .success()
+        .stdout("Success. Updated the following files:\nM multi.txt\n");
+
+    assert_eq!(
+        fs::read_to_string(&target_path)?,
+        "line1\nchanged2\nline3\nchanged4\n"
+    );
+
+    Ok(())
+}
+
+#[test]
+fn test_apply_patch_cli_moves_file_to_new_directory() -> anyhow::Result<()> {
+    let tmp = tempdir()?;
+    let original_path = tmp.path().join("old/name.txt");
+    let new_path = tmp.path().join("renamed/dir/name.txt");
+    fs::create_dir_all(original_path.parent().expect("parent should exist"))?;
+    fs::write(&original_path, "old content\n")?;
+
+    let patch = "*** Begin Patch\n*** Update File: old/name.txt\n*** Move to: renamed/dir/name.txt\n@@\n-old content\n+new content\n*** End Patch";
+
+    run_apply_patch_in_dir(tmp.path(), patch)?
+        .success()
+        .stdout("Success. Updated the following files:\nM renamed/dir/name.txt\n");
+
+    assert!(!original_path.exists());
+    assert_eq!(fs::read_to_string(&new_path)?, "new content\n");
+
+    Ok(())
+}
+
+#[test]
+fn test_apply_patch_cli_rejects_empty_patch() -> anyhow::Result<()> {
+    let tmp = tempdir()?;
+
+    apply_patch_command(tmp.path())?
+        .arg("*** Begin Patch\n*** End Patch")
+        .assert()
+        .failure()
+        .stderr("No files were modified.\n");
+
+    Ok(())
+}
+
+#[test]
+fn test_apply_patch_cli_reports_missing_context() -> anyhow::Result<()> {
+    let tmp = tempdir()?;
+    let target_path = tmp.path().join("modify.txt");
+    fs::write(&target_path, "line1\nline2\n")?;
+
+    apply_patch_command(tmp.path())?
+        .arg("*** Begin Patch\n*** Update File: modify.txt\n@@\n-missing\n+changed\n*** End Patch")
+        .assert()
+        .failure()
+        .stderr("Failed to find expected lines in modify.txt:\nmissing\n");
+    assert_eq!(fs::read_to_string(&target_path)?, "line1\nline2\n");
+
+    Ok(())
+}
+
+#[test]
+fn test_apply_patch_cli_rejects_missing_file_delete() -> anyhow::Result<()> {
+    let tmp = tempdir()?;
+
+    apply_patch_command(tmp.path())?
+        .arg("*** Begin Patch\n*** Delete File: missing.txt\n*** End Patch")
+        .assert()
+        .failure()
+        .stderr("Failed to delete file missing.txt\n");
+
+    Ok(())
+}
+
+#[test]
+fn test_apply_patch_cli_rejects_empty_update_hunk() -> anyhow::Result<()> {
+    let tmp = tempdir()?;
+
+    apply_patch_command(tmp.path())?
+        .arg("*** Begin Patch\n*** Update File: foo.txt\n*** End Patch")
+        .assert()
+        .failure()
+        .stderr("Invalid patch hunk on line 2: Update file hunk for path 'foo.txt' is empty\n");
+
+    Ok(())
+}
+
+#[test]
+fn test_apply_patch_cli_requires_existing_file_for_update() -> anyhow::Result<()> {
+    let tmp = tempdir()?;
+
+    apply_patch_command(tmp.path())?
+        .arg("*** Begin Patch\n*** Update File: missing.txt\n@@\n-old\n+new\n*** End Patch")
+        .assert()
+        .failure()
+        .stderr(
+            "Failed to read file to update missing.txt: No such file or directory (os error 2)\n",
+        );
+
+    Ok(())
+}
+
+#[test]
+fn test_apply_patch_cli_move_overwrites_existing_destination() -> anyhow::Result<()> {
+    let tmp = tempdir()?;
+    let original_path = tmp.path().join("old/name.txt");
+    let destination = tmp.path().join("renamed/dir/name.txt");
+    fs::create_dir_all(original_path.parent().expect("parent should exist"))?;
+    fs::create_dir_all(destination.parent().expect("parent should exist"))?;
+    fs::write(&original_path, "from\n")?;
+    fs::write(&destination, "existing\n")?;
+
+    run_apply_patch_in_dir(
+        tmp.path(),
+        "*** Begin Patch\n*** Update File: old/name.txt\n*** Move to: renamed/dir/name.txt\n@@\n-from\n+new\n*** End Patch",
+    )?
+    .success()
+    .stdout("Success. Updated the following files:\nM renamed/dir/name.txt\n");
+
+    assert!(!original_path.exists());
+    assert_eq!(fs::read_to_string(&destination)?, "new\n");
+
+    Ok(())
+}
+
+#[test]
+fn test_apply_patch_cli_add_overwrites_existing_file() -> anyhow::Result<()> {
+    let tmp = tempdir()?;
+    let path = tmp.path().join("duplicate.txt");
+    fs::write(&path, "old content\n")?;
+
+    run_apply_patch_in_dir(
+        tmp.path(),
+        "*** Begin Patch\n*** Add File: duplicate.txt\n+new content\n*** End Patch",
+    )?
+    .success()
+    .stdout("Success. Updated the following files:\nA duplicate.txt\n");
+
+    assert_eq!(fs::read_to_string(&path)?, "new content\n");
+
+    Ok(())
+}
+
+#[test]
+fn test_apply_patch_cli_delete_directory_fails() -> anyhow::Result<()> {
+    let tmp = tempdir()?;
+    fs::create_dir(tmp.path().join("dir"))?;
+
+    apply_patch_command(tmp.path())?
+        .arg("*** Begin Patch\n*** Delete File: dir\n*** End Patch")
+        .assert()
+        .failure()
+        .stderr("Failed to delete file dir\n");
+
+    Ok(())
+}
+
+#[test]
+fn test_apply_patch_cli_rejects_invalid_hunk_header() -> anyhow::Result<()> {
+    let tmp = tempdir()?;
+
+    apply_patch_command(tmp.path())?
+        .arg("*** Begin Patch\n*** Frobnicate File: foo\n*** End Patch")
+        .assert()
+        .failure()
+        .stderr("Invalid patch hunk on line 2: '*** Frobnicate File: foo' is not a valid hunk header. Valid hunk headers: '*** Add File: {path}', '*** Delete File: {path}', '*** Update File: {path}'\n");
+
+    Ok(())
+}
+
+#[test]
+fn test_apply_patch_cli_updates_file_appends_trailing_newline() -> anyhow::Result<()> {
+    let tmp = tempdir()?;
+    let target_path = tmp.path().join("no_newline.txt");
+    fs::write(&target_path, "no newline at end")?;
+
+    run_apply_patch_in_dir(
+        tmp.path(),
+        "*** Begin Patch\n*** Update File: no_newline.txt\n@@\n-no newline at end\n+first line\n+second line\n*** End Patch",
+    )?
+    .success()
+    .stdout("Success. Updated the following files:\nM no_newline.txt\n");
+
+    let contents = fs::read_to_string(&target_path)?;
+    assert!(contents.ends_with('\n'));
+    assert_eq!(contents, "first line\nsecond line\n");
+
+    Ok(())
+}
+
+#[test]
+fn test_apply_patch_cli_failure_after_partial_success_leaves_changes() -> anyhow::Result<()> {
+    let tmp = tempdir()?;
+    let new_file = tmp.path().join("created.txt");
+
+    apply_patch_command(tmp.path())?
+        .arg("*** Begin Patch\n*** Add File: created.txt\n+hello\n*** Update File: missing.txt\n@@\n-old\n+new\n*** End Patch")
+        .assert()
+        .failure()
+        .stdout("")
+        .stderr("Failed to read file to update missing.txt: No such file or directory (os error 2)\n");
+
+    assert_eq!(fs::read_to_string(&new_file)?, "hello\n");
+
+    Ok(())
+}
--- a/codex-rs/core/Cargo.toml
+++ b/codex-rs/core/Cargo.toml
@@ -28,11 +28,13 @@ codex-rmcp-client = { workspace = true }
 codex-async-utils = { workspace = true }
 codex-utils-string = { workspace = true }
 codex-utils-pty = { workspace = true }
+codex-utils-tokenizer = { workspace = true }
 dirs = { workspace = true }
 dunce = { workspace = true }
 env-flags = { workspace = true }
 eventsource-stream = { workspace = true }
 futures = { workspace = true }
+http = { workspace = true }
 indexmap = { workspace = true }
 libc = { workspace = true }
 mcp-types = { workspace = true }
--- a/codex-rs/core/src/auth.rs
+++ b/codex-rs/core/src/auth.rs
@@ -21,6 +21,7 @@ use codex_app_server_protocol::AuthMode;
 use codex_protocol::config_types::ForcedLoginMethod;

 use crate::config::Config;
+use crate::default_client::CodexHttpClient;
 use crate::token_data::PlanType;
 use crate::token_data::TokenData;
 use crate::token_data::parse_id_token;
@@ -32,7 +33,7 @@ pub struct CodexAuth {
    pub(crate) api_key: Option<String>,
    pub(crate) auth_dot_json: Arc<Mutex<Option<AuthDotJson>>>,
    pub(crate) auth_file: PathBuf,
-    pub(crate) client: reqwest::Client,
+    pub(crate) client: CodexHttpClient,
 }

 impl PartialEq for CodexAuth {
@@ -43,6 +44,8 @@ impl PartialEq for CodexAuth {

 impl CodexAuth {
    pub async fn refresh_token(&self) -> Result<String, std::io::Error> {
+        tracing::info!("Refreshing token");
+
        let token_data = self
            .get_current_token_data()
            .ok_or(std::io::Error::other("Token data is not available."))?;
@@ -180,7 +183,7 @@ impl CodexAuth {
        }
    }

-    fn from_api_key_with_client(api_key: &str, client: reqwest::Client) -> Self {
+    fn from_api_key_with_client(api_key: &str, client: CodexHttpClient) -> Self {
        Self {
            api_key: Some(api_key.to_owned()),
            mode: AuthMode::ApiKey,
@@ -400,7 +403,7 @@ async fn update_tokens(

 async fn try_refresh_token(
    refresh_token: String,
-    client: &reqwest::Client,
+    client: &CodexHttpClient,
 ) -> std::io::Result<RefreshResponse> {
    let refresh_request = RefreshRequest {
        client_id: CLIENT_ID,
@@ -916,7 +919,10 @@ impl AuthManager {
                self.reload();
                Ok(Some(token))
            }
-            Err(e) => Err(e),
+            Err(e) => {
+                tracing::error!("Failed to refresh token: {}", e);
+                Err(e)
+            }
        }
    }

--- a/codex-rs/core/src/chat_completions.rs
+++ b/codex-rs/core/src/chat_completions.rs
@@ -4,6 +4,7 @@ use crate::ModelProviderInfo;
 use crate::client_common::Prompt;
 use crate::client_common::ResponseEvent;
 use crate::client_common::ResponseStream;
+use crate::default_client::CodexHttpClient;
 use crate::error::CodexErr;
 use crate::error::ConnectionFailedError;
 use crate::error::ResponseStreamFailed;
@@ -36,7 +37,7 @@ use tracing::trace;
 pub(crate) async fn stream_chat_completions(
    prompt: &Prompt,
    model_family: &ModelFamily,
-    client: &reqwest::Client,
+    client: &CodexHttpClient,
    provider: &ModelProviderInfo,
    otel_event_manager: &OtelEventManager,
 ) -> Result<ResponseStream> {
--- a/codex-rs/core/src/client.rs
+++ b/codex-rs/core/src/client.rs
@@ -39,6 +39,7 @@ use crate::client_common::ResponsesApiRequest;
 use crate::client_common::create_reasoning_param_for_request;
 use crate::client_common::create_text_param_for_request;
 use crate::config::Config;
+use crate::default_client::CodexHttpClient;
 use crate::default_client::create_client;
 use crate::error::CodexErr;
 use crate::error::ConnectionFailedError;
@@ -81,7 +82,7 @@ pub struct ModelClient {
    config: Arc<Config>,
    auth_manager: Option<Arc<AuthManager>>,
    otel_event_manager: OtelEventManager,
-    client: reqwest::Client,
+    client: CodexHttpClient,
    provider: ModelProviderInfo,
    conversation_id: ConversationId,
    effort: Option<ReasoningEffortConfig>,
@@ -133,6 +134,14 @@ impl ModelClient {
        self.stream_with_task_kind(prompt, TaskKind::Regular).await
    }

+    pub fn config(&self) -> Arc<Config> {
+        Arc::clone(&self.config)
+    }
+
+    pub fn provider(&self) -> &ModelProviderInfo {
+        &self.provider
+    }
+
    pub(crate) async fn stream_with_task_kind(
        &self,
        prompt: &Prompt,
@@ -300,6 +309,7 @@ impl ModelClient {
            "POST to {}: {:?}",
            self.provider.get_full_url(&auth),
            serde_json::to_string(payload_json)
+                .unwrap_or("<unable to serialize payload>".to_string())
        );

        let mut req_builder = self
@@ -335,13 +345,6 @@ impl ModelClient {
                .headers()
                .get("cf-ray")
                .map(|v| v.to_str().unwrap_or_default().to_string());
-
-            debug!(
-                "Response status: {}, cf-ray: {:?}, version: {:?}",
-                resp.status(),
-                request_id,
-                resp.version()
-            );
        }

        match res {
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -8,8 +8,10 @@ use crate::AuthManager;
 use crate::client_common::REVIEW_PROMPT;
 use crate::function_tool::FunctionCallError;
 use crate::mcp::auth::McpAuthStatusEntry;
+use crate::mcp_connection_manager::DEFAULT_STARTUP_TIMEOUT;
 use crate::parse_command::parse_command;
 use crate::parse_turn_item;
+use crate::response_processing::process_items;
 use crate::review_format::format_review_findings_block;
 use crate::terminal;
 use crate::user_notification::UserNotifier;
@@ -86,6 +88,7 @@ use crate::protocol::Op;
 use crate::protocol::RateLimitSnapshot;
 use crate::protocol::ReviewDecision;
 use crate::protocol::ReviewOutputEvent;
+use crate::protocol::SandboxCommandAssessment;
 use crate::protocol::SandboxPolicy;
 use crate::protocol::SessionConfiguredEvent;
 use crate::protocol::StreamErrorEvent;
@@ -753,6 +756,32 @@ impl Session {
        }
    }

+    pub(crate) async fn assess_sandbox_command(
+        &self,
+        turn_context: &TurnContext,
+        call_id: &str,
+        command: &[String],
+        failure_message: Option<&str>,
+    ) -> Option<SandboxCommandAssessment> {
+        let config = turn_context.client.config();
+        let provider = turn_context.client.provider().clone();
+        let auth_manager = Arc::clone(&self.services.auth_manager);
+        let otel = self.services.otel_event_manager.clone();
+        crate::sandboxing::assessment::assess_command(
+            config,
+            provider,
+            auth_manager,
+            &otel,
+            self.conversation_id,
+            call_id,
+            command,
+            &turn_context.sandbox_policy,
+            &turn_context.cwd,
+            failure_message,
+        )
+        .await
+    }
+
    /// Emit an exec approval request event and await the user's decision.
    ///
    /// The request is keyed by `sub_id`/`call_id` so matching responses are delivered
@@ -765,6 +794,7 @@ impl Session {
        command: Vec<String>,
        cwd: PathBuf,
        reason: Option<String>,
+        risk: Option<SandboxCommandAssessment>,
    ) -> ReviewDecision {
        let sub_id = turn_context.sub_id.clone();
        // Add the tx_approve callback to the map before sending the request.
@@ -790,6 +820,7 @@ impl Session {
            command,
            cwd,
            reason,
+            risk,
            parsed_cmd,
        });
        self.send_event(turn_context, event).await;
@@ -855,7 +886,7 @@ impl Session {

    /// Records input items: always append to conversation history and
    /// persist these response items to rollout.
-    async fn record_conversation_items(&self, items: &[ResponseItem]) {
+    pub(crate) async fn record_conversation_items(&self, items: &[ResponseItem]) {
        self.record_into_history(items).await;
        self.persist_rollout_response_items(items).await;
    }
@@ -1608,109 +1639,13 @@ pub(crate) async fn run_task(
                let token_limit_reached = total_usage_tokens
                    .map(|tokens| tokens >= limit)
                    .unwrap_or(false);
-                let mut items_to_record_in_conversation_history = Vec::<ResponseItem>::new();
-                let mut responses = Vec::<ResponseInputItem>::new();
-                for processed_response_item in processed_items {
-                    let ProcessedResponseItem { item, response } = processed_response_item;
-                    match (&item, &response) {
-                        (ResponseItem::Message { role, .. }, None) if role == "assistant" => {
-                            // If the model returned a message, we need to record it.
-                            items_to_record_in_conversation_history.push(item);
-                        }
-                        (
-                            ResponseItem::LocalShellCall { .. },
-                            Some(ResponseInputItem::FunctionCallOutput { call_id, output }),
-                        ) => {
-                            items_to_record_in_conversation_history.push(item);
-                            items_to_record_in_conversation_history.push(
-                                ResponseItem::FunctionCallOutput {
-                                    call_id: call_id.clone(),
-                                    output: output.clone(),
-                                },
-                            );
-                        }
-                        (
-                            ResponseItem::FunctionCall { .. },
-                            Some(ResponseInputItem::FunctionCallOutput { call_id, output }),
-                        ) => {
-                            items_to_record_in_conversation_history.push(item);
-                            items_to_record_in_conversation_history.push(
-                                ResponseItem::FunctionCallOutput {
-                                    call_id: call_id.clone(),
-                                    output: output.clone(),
-                                },
-                            );
-                        }
-                        (
-                            ResponseItem::CustomToolCall { .. },
-                            Some(ResponseInputItem::CustomToolCallOutput { call_id, output }),
-                        ) => {
-                            items_to_record_in_conversation_history.push(item);
-                            items_to_record_in_conversation_history.push(
-                                ResponseItem::CustomToolCallOutput {
-                                    call_id: call_id.clone(),
-                                    output: output.clone(),
-                                },
-                            );
-                        }
-                        (
-                            ResponseItem::FunctionCall { .. },
-                            Some(ResponseInputItem::McpToolCallOutput { call_id, result }),
-                        ) => {
-                            items_to_record_in_conversation_history.push(item);
-                            let output = match result {
-                                Ok(call_tool_result) => {
-                                    convert_call_tool_result_to_function_call_output_payload(
-                                        call_tool_result,
-                                    )
-                                }
-                                Err(err) => FunctionCallOutputPayload {
-                                    content: err.clone(),
-                                    success: Some(false),
-                                },
-                            };
-                            items_to_record_in_conversation_history.push(
-                                ResponseItem::FunctionCallOutput {
-                                    call_id: call_id.clone(),
-                                    output,
-                                },
-                            );
-                        }
-                        (
-                            ResponseItem::Reasoning {
-                                id,
-                                summary,
-                                content,
-                                encrypted_content,
-                            },
-                            None,
-                        ) => {
-                            items_to_record_in_conversation_history.push(ResponseItem::Reasoning {
-                                id: id.clone(),
-                                summary: summary.clone(),
-                                content: content.clone(),
-                                encrypted_content: encrypted_content.clone(),
-                            });
-                        }
-                        _ => {
-                            warn!("Unexpected response item: {item:?} with response: {response:?}");
-                        }
-                    };
-                    if let Some(response) = response {
-                        responses.push(response);
-                    }
-                }
-
-                // Only attempt to take the lock if there is something to record.
-                if !items_to_record_in_conversation_history.is_empty() {
-                    if is_review_mode {
-                        review_thread_history
-                            .record_items(items_to_record_in_conversation_history.iter());
-                    } else {
-                        sess.record_conversation_items(&items_to_record_in_conversation_history)
-                            .await;
-                    }
-                }
+                let (responses, items_to_record_in_conversation_history) = process_items(
+                    processed_items,
+                    is_review_mode,
+                    &mut review_thread_history,
+                    &sess,
+                )
+                .await;

                if token_limit_reached {
                    if auto_compact_recently_attempted {
@@ -1749,7 +1684,16 @@ pub(crate) async fn run_task(
                }
                continue;
            }
-            Err(CodexErr::TurnAborted) => {
+            Err(CodexErr::TurnAborted {
+                dangling_artifacts: processed_items,
+            }) => {
+                let _ = process_items(
+                    processed_items,
+                    is_review_mode,
+                    &mut review_thread_history,
+                    &sess,
+                )
+                .await;
                // Aborted turn is reported via a different event.
                break;
            }
@@ -1850,7 +1794,13 @@ async fn run_turn(
        .await
        {
            Ok(output) => return Ok(output),
-            Err(CodexErr::TurnAborted) => return Err(CodexErr::TurnAborted),
+            Err(CodexErr::TurnAborted {
+                dangling_artifacts: processed_items,
+            }) => {
+                return Err(CodexErr::TurnAborted {
+                    dangling_artifacts: processed_items,
+                });
+            }
            Err(CodexErr::Interrupted) => return Err(CodexErr::Interrupted),
            Err(CodexErr::EnvVar(var)) => return Err(CodexErr::EnvVar(var)),
            Err(e @ CodexErr::Fatal(_)) => return Err(e),
@@ -1903,9 +1853,9 @@ async fn run_turn(
 /// "handled" such that it produces a `ResponseInputItem` that needs to be
 /// sent back to the model on the next turn.
 #[derive(Debug)]
-pub(crate) struct ProcessedResponseItem {
-    pub(crate) item: ResponseItem,
-    pub(crate) response: Option<ResponseInputItem>,
+pub struct ProcessedResponseItem {
+    pub item: ResponseItem,
+    pub response: Option<ResponseInputItem>,
 }

 #[derive(Debug)]
@@ -1954,7 +1904,15 @@ async fn try_run_turn(
        // Poll the next item from the model stream. We must inspect *both* Ok and Err
        // cases so that transient stream failures (e.g., dropped SSE connection before
        // `response.completed`) bubble up and trigger the caller's retry logic.
-        let event = stream.next().or_cancel(&cancellation_token).await?;
+        let event = match stream.next().or_cancel(&cancellation_token).await {
+            Ok(event) => event,
+            Err(codex_async_utils::CancelErr::Cancelled) => {
+                let processed_items = output.try_collect().await?;
+                return Err(CodexErr::TurnAborted {
+                    dangling_artifacts: processed_items,
+                });
+            }
+        };

        let event = match event {
            Some(res) => res?,
@@ -1978,7 +1936,8 @@ async fn try_run_turn(
                        let payload_preview = call.payload.log_payload().into_owned();
                        tracing::info!("ToolCall: {} {}", call.tool_name, payload_preview);

-                        let response = tool_runtime.handle_tool_call(call);
+                        let response =
+                            tool_runtime.handle_tool_call(call, cancellation_token.child_token());

                        output.push_back(
                            async move {
@@ -2060,12 +2019,7 @@ async fn try_run_turn(
            } => {
                sess.update_token_usage_info(turn_context.as_ref(), token_usage.as_ref())
                    .await;
-
-                let processed_items = output
-                    .try_collect()
-                    .or_cancel(&cancellation_token)
-                    .await??;
-
+                let processed_items = output.try_collect().await?;
                let unified_diff = {
                    let mut tracker = turn_diff_tracker.lock().await;
                    tracker.get_unified_diff()
@@ -2169,7 +2123,7 @@ pub(super) fn get_last_assistant_message_from_turn(responses: &[ResponseItem]) -
        }
    })
 }
-fn convert_call_tool_result_to_function_call_output_payload(
+pub(crate) fn convert_call_tool_result_to_function_call_output_payload(
    call_tool_result: &CallToolResult,
 ) -> FunctionCallOutputPayload {
    let CallToolResult {
@@ -2275,12 +2229,24 @@ fn mcp_init_error_display(
        // That means that the user has to specify a personal access token either via bearer_token_env_var or http_headers.
        // https://github.com/github/github-mcp-server/issues/921#issuecomment-3221026448
        format!(
-            "GitHub MCP does not support OAuth. Log in by adding `bearer_token_env_var = CODEX_GITHUB_PAT` in the `mcp_servers.{server_name}` section of your config.toml"
+            "GitHub MCP does not support OAuth. Log in by adding a personal access token (https://github.com/settings/personal-access-tokens) to your environment and config.toml:\n[mcp_servers.{server_name}]\nbearer_token_env_var = CODEX_GITHUB_PERSONAL_ACCESS_TOKEN"
        )
    } else if is_mcp_client_auth_required_error(err) {
        format!(
            "The {server_name} MCP server is not logged in. Run `codex mcp login {server_name}`."
        )
+    } else if is_mcp_client_startup_timeout_error(err) {
+        let startup_timeout_secs = match entry {
+            Some(entry) => match entry.config.startup_timeout_sec {
+                Some(timeout) => timeout,
+                None => DEFAULT_STARTUP_TIMEOUT,
+            },
+            None => DEFAULT_STARTUP_TIMEOUT,
+        }
+        .as_secs();
+        format!(
+            "MCP client for `{server_name}` timed out after {startup_timeout_secs} seconds. Add or adjust `startup_timeout_sec` in your config.toml:\n[mcp_servers.{server_name}]\nstartup_timeout_sec = XX"
+        )
    } else {
        format!("MCP client for `{server_name}` failed to start: {err:#}")
    }
@@ -2291,6 +2257,12 @@ fn is_mcp_client_auth_required_error(error: &anyhow::Error) -> bool {
    error.to_string().contains("Auth required")
 }

+fn is_mcp_client_startup_timeout_error(error: &anyhow::Error) -> bool {
+    let error_message = error.to_string();
+    error_message.contains("request timed out")
+        || error_message.contains("timed out handshaking with MCP server")
+}
+
 #[cfg(test)]
 pub(crate) use tests::make_session_and_context;

@@ -2316,7 +2288,11 @@ mod tests {
    use crate::tools::MODEL_FORMAT_MAX_LINES;
    use crate::tools::MODEL_FORMAT_TAIL_LINES;
    use crate::tools::ToolRouter;
-    use crate::tools::handle_container_exec_with_params;
+    use crate::tools::context::ToolInvocation;
+    use crate::tools::context::ToolOutput;
+    use crate::tools::context::ToolPayload;
+    use crate::tools::handlers::ShellHandler;
+    use crate::tools::registry::ToolHandler;
    use crate::turn_diff_tracker::TurnDiffTracker;
    use codex_app_server_protocol::AuthMode;
    use codex_protocol::models::ContentItem;
@@ -3039,15 +3015,26 @@ mod tests {
        let tool_name = "shell";
        let call_id = "test-call".to_string();

-        let resp = handle_container_exec_with_params(
-            tool_name,
-            params,
-            Arc::clone(&session),
-            Arc::clone(&turn_context),
-            Arc::clone(&turn_diff_tracker),
-            call_id,
-        )
-        .await;
+        let handler = ShellHandler;
+        let resp = handler
+            .handle(ToolInvocation {
+                session: Arc::clone(&session),
+                turn: Arc::clone(&turn_context),
+                tracker: Arc::clone(&turn_diff_tracker),
+                call_id,
+                tool_name: tool_name.to_string(),
+                payload: ToolPayload::Function {
+                    arguments: serde_json::json!({
+                        "command": params.command.clone(),
+                        "workdir": Some(turn_context.cwd.to_string_lossy().to_string()),
+                        "timeout_ms": params.timeout_ms,
+                        "with_escalated_permissions": params.with_escalated_permissions,
+                        "justification": params.justification.clone(),
+                    })
+                    .to_string(),
+                },
+            })
+            .await;

        let Err(FunctionCallError::RespondToModel(output)) = resp else {
            panic!("expected error result");
@@ -3066,17 +3053,30 @@ mod tests {
            .expect("unique turn context Arc")
            .sandbox_policy = SandboxPolicy::DangerFullAccess;

-        let resp2 = handle_container_exec_with_params(
-            tool_name,
-            params2,
-            Arc::clone(&session),
-            Arc::clone(&turn_context),
-            Arc::clone(&turn_diff_tracker),
-            "test-call-2".to_string(),
-        )
-        .await;
+        let resp2 = handler
+            .handle(ToolInvocation {
+                session: Arc::clone(&session),
+                turn: Arc::clone(&turn_context),
+                tracker: Arc::clone(&turn_diff_tracker),
+                call_id: "test-call-2".to_string(),
+                tool_name: tool_name.to_string(),
+                payload: ToolPayload::Function {
+                    arguments: serde_json::json!({
+                        "command": params2.command.clone(),
+                        "workdir": Some(turn_context.cwd.to_string_lossy().to_string()),
+                        "timeout_ms": params2.timeout_ms,
+                        "with_escalated_permissions": params2.with_escalated_permissions,
+                        "justification": params2.justification.clone(),
+                    })
+                    .to_string(),
+                },
+            })
+            .await;

-        let output = resp2.expect("expected Ok result");
+        let output = match resp2.expect("expected Ok result") {
+            ToolOutput::Function { content, .. } => content,
+            _ => panic!("unexpected tool output"),
+        };

        #[derive(Deserialize, PartialEq, Eq, Debug)]
        struct ResponseExecMetadata {
@@ -3120,7 +3120,7 @@ mod tests {
        let display = mcp_init_error_display(server_name, Some(&entry), &err);

        let expected = format!(
-            "GitHub MCP does not support OAuth. Log in by adding `bearer_token_env_var = CODEX_GITHUB_PAT` in the `mcp_servers.{server_name}` section of your config.toml"
+            "GitHub MCP does not support OAuth. Log in by adding a personal access token (https://github.com/settings/personal-access-tokens) to your environment and config.toml:\n[mcp_servers.{server_name}]\nbearer_token_env_var = CODEX_GITHUB_PERSONAL_ACCESS_TOKEN"
        );

        assert_eq!(expected, display);
@@ -3167,4 +3167,17 @@ mod tests {

        assert_eq!(expected, display);
    }
+
+    #[test]
+    fn mcp_init_error_display_includes_startup_timeout_hint() {
+        let server_name = "slow";
+        let err = anyhow::anyhow!("request timed out");
+
+        let display = mcp_init_error_display(server_name, None, &err);
+
+        assert_eq!(
+            "MCP client for `slow` timed out after 10 seconds. Add or adjust `startup_timeout_sec` in your config.toml:\n[mcp_servers.slow]\nstartup_timeout_sec = XX",
+            display
+        );
+    }
 }
--- a/codex-rs/core/src/config.rs
+++ b/codex-rs/core/src/config.rs
@@ -223,6 +223,9 @@ pub struct Config {

    pub tools_web_search_request: bool,

+    /// When `true`, run a model-based assessment for commands denied by the sandbox.
+    pub experimental_sandbox_command_assessment: bool,
+
    pub use_experimental_streamable_shell_tool: bool,

    /// If set to `true`, used only the experimental unified exec tool.
@@ -958,6 +961,7 @@ pub struct ConfigToml {
    pub experimental_use_unified_exec_tool: Option<bool>,
    pub experimental_use_rmcp_client: Option<bool>,
    pub experimental_use_freeform_apply_patch: Option<bool>,
+    pub experimental_sandbox_command_assessment: Option<bool>,
 }

 impl From<ConfigToml> for UserSavedConfig {
@@ -1118,6 +1122,7 @@ pub struct ConfigOverrides {
    pub include_view_image_tool: Option<bool>,
    pub show_raw_agent_reasoning: Option<bool>,
    pub tools_web_search_request: Option<bool>,
+    pub experimental_sandbox_command_assessment: Option<bool>,
    /// Additional directories that should be treated as writable roots for this session.
    pub additional_writable_roots: Vec<PathBuf>,
 }
@@ -1147,6 +1152,7 @@ impl Config {
            include_view_image_tool: include_view_image_tool_override,
            show_raw_agent_reasoning,
            tools_web_search_request: override_tools_web_search_request,
+            experimental_sandbox_command_assessment: sandbox_command_assessment_override,
            additional_writable_roots,
        } = overrides;

@@ -1172,6 +1178,7 @@ impl Config {
            include_apply_patch_tool: include_apply_patch_tool_override,
            include_view_image_tool: include_view_image_tool_override,
            web_search_request: override_tools_web_search_request,
+            experimental_sandbox_command_assessment: sandbox_command_assessment_override,
        };

        let features = Features::from_config(&cfg, &config_profile, feature_overrides);
@@ -1269,6 +1276,8 @@ impl Config {
        let use_experimental_streamable_shell_tool = features.enabled(Feature::StreamableShell);
        let use_experimental_unified_exec_tool = features.enabled(Feature::UnifiedExec);
        let use_experimental_use_rmcp_client = features.enabled(Feature::RmcpClient);
+        let experimental_sandbox_command_assessment =
+            features.enabled(Feature::SandboxCommandAssessment);

        let forced_chatgpt_workspace_id =
            cfg.forced_chatgpt_workspace_id.as_ref().and_then(|value| {
@@ -1390,6 +1399,7 @@ impl Config {
            forced_login_method,
            include_apply_patch_tool: include_apply_patch_tool_flag,
            tools_web_search_request,
+            experimental_sandbox_command_assessment,
            use_experimental_streamable_shell_tool,
            use_experimental_unified_exec_tool,
            use_experimental_use_rmcp_client,
@@ -2873,6 +2883,7 @@ model_verbosity = "high"
                forced_login_method: None,
                include_apply_patch_tool: false,
                tools_web_search_request: false,
+                experimental_sandbox_command_assessment: false,
                use_experimental_streamable_shell_tool: false,
                use_experimental_unified_exec_tool: false,
                use_experimental_use_rmcp_client: false,
@@ -2941,6 +2952,7 @@ model_verbosity = "high"
            forced_login_method: None,
            include_apply_patch_tool: false,
            tools_web_search_request: false,
+            experimental_sandbox_command_assessment: false,
            use_experimental_streamable_shell_tool: false,
            use_experimental_unified_exec_tool: false,
            use_experimental_use_rmcp_client: false,
@@ -3024,6 +3036,7 @@ model_verbosity = "high"
            forced_login_method: None,
            include_apply_patch_tool: false,
            tools_web_search_request: false,
+            experimental_sandbox_command_assessment: false,
            use_experimental_streamable_shell_tool: false,
            use_experimental_unified_exec_tool: false,
            use_experimental_use_rmcp_client: false,
@@ -3093,6 +3106,7 @@ model_verbosity = "high"
            forced_login_method: None,
            include_apply_patch_tool: false,
            tools_web_search_request: false,
+            experimental_sandbox_command_assessment: false,
            use_experimental_streamable_shell_tool: false,
            use_experimental_unified_exec_tool: false,
            use_experimental_use_rmcp_client: false,
--- a/codex-rs/core/src/config_profile.rs
+++ b/codex-rs/core/src/config_profile.rs
@@ -26,6 +26,7 @@ pub struct ConfigProfile {
    pub experimental_use_exec_command_tool: Option<bool>,
    pub experimental_use_rmcp_client: Option<bool>,
    pub experimental_use_freeform_apply_patch: Option<bool>,
+    pub experimental_sandbox_command_assessment: Option<bool>,
    pub tools_web_search: Option<bool>,
    pub tools_view_image: Option<bool>,
    /// Optional feature toggles scoped to this profile.
--- a/codex-rs/core/src/conversation_history.rs
+++ b/codex-rs/core/src/conversation_history.rs
@@ -1,5 +1,7 @@
 use codex_protocol::models::FunctionCallOutputPayload;
 use codex_protocol::models::ResponseItem;
+use codex_protocol::protocol::TokenUsage;
+use codex_protocol::protocol::TokenUsageInfo;
 use tracing::error;

 /// Transcript of conversation history
@@ -7,11 +9,28 @@ use tracing::error;
 pub(crate) struct ConversationHistory {
    /// The oldest items are at the beginning of the vector.
    items: Vec<ResponseItem>,
+    token_info: Option<TokenUsageInfo>,
 }

 impl ConversationHistory {
    pub(crate) fn new() -> Self {
-        Self { items: Vec::new() }
+        Self {
+            items: Vec::new(),
+            token_info: TokenUsageInfo::new_or_append(&None, &None, None),
+        }
+    }
+
+    pub(crate) fn token_info(&self) -> Option<TokenUsageInfo> {
+        self.token_info.clone()
+    }
+
+    pub(crate) fn set_token_usage_full(&mut self, context_window: i64) {
+        match &mut self.token_info {
+            Some(info) => info.fill_to_context_window(context_window),
+            None => {
+                self.token_info = Some(TokenUsageInfo::full_context_window(context_window));
+            }
+        }
    }

    /// `items` is ordered from oldest to newest.
@@ -301,6 +320,18 @@ impl ConversationHistory {
            self.items.remove(pos);
        }
    }
+
+    pub(crate) fn update_token_info(
+        &mut self,
+        usage: &TokenUsage,
+        model_context_window: Option<i64>,
+    ) {
+        self.token_info = TokenUsageInfo::new_or_append(
+            &self.token_info,
+            &Some(usage.clone()),
+            model_context_window,
+        );
+    }
 }

 #[inline]
--- a/codex-rs/core/src/conversation_manager.rs
+++ b/codex-rs/core/src/conversation_manager.rs
@@ -32,8 +32,34 @@ pub struct NewConversation {

 /// [`ConversationManager`] is responsible for creating conversations and
 /// maintaining them in memory.
+#[derive(Clone)]
+struct ConversationEntry {
+    conversation: Arc<CodexConversation>,
+    session_configured: SessionConfiguredEvent,
+}
+
+impl ConversationEntry {
+    fn new(
+        conversation: Arc<CodexConversation>,
+        session_configured: SessionConfiguredEvent,
+    ) -> Self {
+        Self {
+            conversation,
+            session_configured,
+        }
+    }
+
+    fn to_new_conversation(&self, conversation_id: ConversationId) -> NewConversation {
+        NewConversation {
+            conversation_id,
+            conversation: self.conversation.clone(),
+            session_configured: self.session_configured.clone(),
+        }
+    }
+}
+
 pub struct ConversationManager {
-    conversations: Arc<RwLock<HashMap<ConversationId, Arc<CodexConversation>>>>,
+    conversations: Arc<RwLock<HashMap<ConversationId, ConversationEntry>>>,
    auth_manager: Arc<AuthManager>,
    session_source: SessionSource,
 }
@@ -99,10 +125,11 @@ impl ConversationManager {
        };

        let conversation = Arc::new(CodexConversation::new(codex));
+        let entry = ConversationEntry::new(conversation.clone(), session_configured.clone());
        self.conversations
            .write()
            .await
-            .insert(conversation_id, conversation.clone());
+            .insert(conversation_id, entry);

        Ok(NewConversation {
            conversation_id,
@@ -118,7 +145,7 @@ impl ConversationManager {
        let conversations = self.conversations.read().await;
        conversations
            .get(&conversation_id)
-            .cloned()
+            .map(|entry| entry.conversation.clone())
            .ok_or_else(|| CodexErr::ConversationNotFound(conversation_id))
    }

@@ -129,11 +156,22 @@ impl ConversationManager {
        auth_manager: Arc<AuthManager>,
    ) -> CodexResult<NewConversation> {
        let initial_history = RolloutRecorder::get_rollout_history(&rollout_path).await?;
-        let CodexSpawnOk {
-            codex,
-            conversation_id,
-        } = Codex::spawn(config, auth_manager, initial_history, self.session_source).await?;
-        self.finalize_spawn(codex, conversation_id).await
+        if let InitialHistory::Resumed(resumed) = &initial_history
+            && let Some(existing) = self
+                .conversations
+                .read()
+                .await
+                .get(&resumed.conversation_id)
+                .cloned()
+        {
+            Ok(existing.to_new_conversation(resumed.conversation_id))
+        } else {
+            let CodexSpawnOk {
+                codex,
+                conversation_id,
+            } = Codex::spawn(config, auth_manager, initial_history, self.session_source).await?;
+            self.finalize_spawn(codex, conversation_id).await
+        }
    }

    /// Removes the conversation from the manager's internal map, though the
@@ -144,7 +182,11 @@ impl ConversationManager {
        &self,
        conversation_id: &ConversationId,
    ) -> Option<Arc<CodexConversation>> {
-        self.conversations.write().await.remove(conversation_id)
+        self.conversations
+            .write()
+            .await
+            .remove(conversation_id)
+            .map(|entry| entry.conversation)
    }

    /// Fork an existing conversation by taking messages up to the given position
--- a/codex-rs/core/src/default_client.rs
+++ b/codex-rs/core/src/default_client.rs
@@ -1,5 +1,13 @@
 use crate::spawn::CODEX_SANDBOX_ENV_VAR;
+use http::Error as HttpError;
+use reqwest::IntoUrl;
+use reqwest::Method;
+use reqwest::Response;
+use reqwest::header::HeaderName;
 use reqwest::header::HeaderValue;
+use serde::Serialize;
+use std::collections::HashMap;
+use std::fmt::Display;
 use std::sync::LazyLock;
 use std::sync::Mutex;
 use std::sync::OnceLock;
@@ -22,6 +30,130 @@ use std::sync::OnceLock;
 pub static USER_AGENT_SUFFIX: LazyLock<Mutex<Option<String>>> = LazyLock::new(|| Mutex::new(None));
 pub const DEFAULT_ORIGINATOR: &str = "codex_cli_rs";
 pub const CODEX_INTERNAL_ORIGINATOR_OVERRIDE_ENV_VAR: &str = "CODEX_INTERNAL_ORIGINATOR_OVERRIDE";
+
+#[derive(Clone, Debug)]
+pub struct CodexHttpClient {
+    inner: reqwest::Client,
+}
+
+impl CodexHttpClient {
+    fn new(inner: reqwest::Client) -> Self {
+        Self { inner }
+    }
+
+    pub fn get<U>(&self, url: U) -> CodexRequestBuilder
+    where
+        U: IntoUrl,
+    {
+        self.request(Method::GET, url)
+    }
+
+    pub fn post<U>(&self, url: U) -> CodexRequestBuilder
+    where
+        U: IntoUrl,
+    {
+        self.request(Method::POST, url)
+    }
+
+    pub fn request<U>(&self, method: Method, url: U) -> CodexRequestBuilder
+    where
+        U: IntoUrl,
+    {
+        let url_str = url.as_str().to_string();
+        CodexRequestBuilder::new(self.inner.request(method.clone(), url), method, url_str)
+    }
+}
+
+#[must_use = "requests are not sent unless `send` is awaited"]
+#[derive(Debug)]
+pub struct CodexRequestBuilder {
+    builder: reqwest::RequestBuilder,
+    method: Method,
+    url: String,
+}
+
+impl CodexRequestBuilder {
+    fn new(builder: reqwest::RequestBuilder, method: Method, url: String) -> Self {
+        Self {
+            builder,
+            method,
+            url,
+        }
+    }
+
+    fn map(self, f: impl FnOnce(reqwest::RequestBuilder) -> reqwest::RequestBuilder) -> Self {
+        Self {
+            builder: f(self.builder),
+            method: self.method,
+            url: self.url,
+        }
+    }
+
+    pub fn header<K, V>(self, key: K, value: V) -> Self
+    where
+        HeaderName: TryFrom<K>,
+        <HeaderName as TryFrom<K>>::Error: Into<HttpError>,
+        HeaderValue: TryFrom<V>,
+        <HeaderValue as TryFrom<V>>::Error: Into<HttpError>,
+    {
+        self.map(|builder| builder.header(key, value))
+    }
+
+    pub fn bearer_auth<T>(self, token: T) -> Self
+    where
+        T: Display,
+    {
+        self.map(|builder| builder.bearer_auth(token))
+    }
+
+    pub fn json<T>(self, value: &T) -> Self
+    where
+        T: ?Sized + Serialize,
+    {
+        self.map(|builder| builder.json(value))
+    }
+
+    pub async fn send(self) -> Result<Response, reqwest::Error> {
+        match self.builder.send().await {
+            Ok(response) => {
+                let request_ids = Self::extract_request_ids(&response);
+                tracing::debug!(
+                    method = %self.method,
+                    url = %self.url,
+                    status = %response.status(),
+                    request_ids = ?request_ids,
+                    version = ?response.version(),
+                    "Request completed"
+                );
+
+                Ok(response)
+            }
+            Err(error) => {
+                let status = error.status();
+                tracing::debug!(
+                    method = %self.method,
+                    url = %self.url,
+                    status = status.map(|s| s.as_u16()),
+                    error = %error,
+                    "Request failed"
+                );
+                Err(error)
+            }
+        }
+    }
+
+    fn extract_request_ids(response: &Response) -> HashMap<String, String> {
+        ["cf-ray", "x-request-id", "x-oai-request-id"]
+            .iter()
+            .filter_map(|&name| {
+                let header_name = HeaderName::from_static(name);
+                let value = response.headers().get(header_name)?;
+                let value = value.to_str().ok()?.to_owned();
+                Some((name.to_owned(), value))
+            })
+            .collect()
+    }
+}
 #[derive(Debug, Clone)]
 pub struct Originator {
    pub value: String,
@@ -124,8 +256,8 @@ fn sanitize_user_agent(candidate: String, fallback: &str) -> String {
    }
 }

-/// Create a reqwest client with default `originator` and `User-Agent` headers set.
-pub fn create_client() -> reqwest::Client {
+/// Create an HTTP client with default `originator` and `User-Agent` headers set.
+pub fn create_client() -> CodexHttpClient {
    use reqwest::header::HeaderMap;

    let mut headers = HeaderMap::new();
@@ -140,7 +272,8 @@ pub fn create_client() -> reqwest::Client {
        builder = builder.no_proxy();
    }

-    builder.build().unwrap_or_else(|_| reqwest::Client::new())
+    let inner = builder.build().unwrap_or_else(|_| reqwest::Client::new());
+    CodexHttpClient::new(inner)
 }

 fn is_sandboxed() -> bool {
--- a/codex-rs/core/src/error.rs
+++ b/codex-rs/core/src/error.rs
@@ -1,3 +1,4 @@
+use crate::codex::ProcessedResponseItem;
 use crate::exec::ExecToolCallOutput;
 use crate::token_data::KnownPlan;
 use crate::token_data::PlanType;
@@ -53,8 +54,11 @@ pub enum SandboxErr {

 #[derive(Error, Debug)]
 pub enum CodexErr {
+    // todo(aibrahim): git rid of this error carrying the dangling artifacts
    #[error("turn aborted")]
-    TurnAborted,
+    TurnAborted {
+        dangling_artifacts: Vec<ProcessedResponseItem>,
+    },

    /// Returned by ResponsesClient when the SSE stream disconnects or errors out **after** the HTTP
    /// handshake has succeeded but **before** it finished emitting `response.completed`.
@@ -158,7 +162,9 @@ pub enum CodexErr {

 impl From<CancelErr> for CodexErr {
    fn from(_: CancelErr) -> Self {
-        CodexErr::TurnAborted
+        CodexErr::TurnAborted {
+            dangling_artifacts: Vec::new(),
+        }
    }
 }

--- a/codex-rs/core/src/features.rs
+++ b/codex-rs/core/src/features.rs
@@ -39,6 +39,8 @@ pub enum Feature {
    ViewImageTool,
    /// Allow the model to request web searches.
    WebSearchRequest,
+    /// Enable the model-based risk assessments for sandboxed commands.
+    SandboxCommandAssessment,
 }

 impl Feature {
@@ -73,6 +75,7 @@ pub struct FeatureOverrides {
    pub include_apply_patch_tool: Option<bool>,
    pub include_view_image_tool: Option<bool>,
    pub web_search_request: Option<bool>,
+    pub experimental_sandbox_command_assessment: Option<bool>,
 }

 impl FeatureOverrides {
@@ -137,6 +140,7 @@ impl Features {
        let mut features = Features::with_defaults();

        let base_legacy = LegacyFeatureToggles {
+            experimental_sandbox_command_assessment: cfg.experimental_sandbox_command_assessment,
            experimental_use_freeform_apply_patch: cfg.experimental_use_freeform_apply_patch,
            experimental_use_exec_command_tool: cfg.experimental_use_exec_command_tool,
            experimental_use_unified_exec_tool: cfg.experimental_use_unified_exec_tool,
@@ -154,6 +158,8 @@ impl Features {
        let profile_legacy = LegacyFeatureToggles {
            include_apply_patch_tool: config_profile.include_apply_patch_tool,
            include_view_image_tool: config_profile.include_view_image_tool,
+            experimental_sandbox_command_assessment: config_profile
+                .experimental_sandbox_command_assessment,
            experimental_use_freeform_apply_patch: config_profile
                .experimental_use_freeform_apply_patch,
            experimental_use_exec_command_tool: config_profile.experimental_use_exec_command_tool,
@@ -236,4 +242,10 @@ pub const FEATURES: &[FeatureSpec] = &[
        stage: Stage::Stable,
        default_enabled: false,
    },
+    FeatureSpec {
+        id: Feature::SandboxCommandAssessment,
+        key: "experimental_sandbox_command_assessment",
+        stage: Stage::Experimental,
+        default_enabled: false,
+    },
 ];
--- a/codex-rs/core/src/features/legacy.rs
+++ b/codex-rs/core/src/features/legacy.rs
@@ -9,6 +9,10 @@ struct Alias {
 }

 const ALIASES: &[Alias] = &[
+    Alias {
+        legacy_key: "experimental_sandbox_command_assessment",
+        feature: Feature::SandboxCommandAssessment,
+    },
    Alias {
        legacy_key: "experimental_use_unified_exec_tool",
        feature: Feature::UnifiedExec,
@@ -53,6 +57,7 @@ pub(crate) fn feature_for_key(key: &str) -> Option<Feature> {
 pub struct LegacyFeatureToggles {
    pub include_apply_patch_tool: Option<bool>,
    pub include_view_image_tool: Option<bool>,
+    pub experimental_sandbox_command_assessment: Option<bool>,
    pub experimental_use_freeform_apply_patch: Option<bool>,
    pub experimental_use_exec_command_tool: Option<bool>,
    pub experimental_use_unified_exec_tool: Option<bool>,
@@ -69,6 +74,12 @@ impl LegacyFeatureToggles {
            self.include_apply_patch_tool,
            "include_apply_patch_tool",
        );
+        set_if_some(
+            features,
+            Feature::SandboxCommandAssessment,
+            self.experimental_sandbox_command_assessment,
+            "experimental_sandbox_command_assessment",
+        );
        set_if_some(
            features,
            Feature::ApplyPatchFreeform,
--- a/codex-rs/core/src/lib.rs
+++ b/codex-rs/core/src/lib.rs
@@ -36,6 +36,7 @@ mod mcp_tool_call;
 mod message_history;
 mod model_provider_info;
 pub mod parse_command;
+mod response_processing;
 pub mod sandboxing;
 pub mod token_data;
 mod truncate;
--- a/codex-rs/core/src/mcp_connection_manager.rs
+++ b/codex-rs/core/src/mcp_connection_manager.rs
@@ -49,7 +49,7 @@ const MCP_TOOL_NAME_DELIMITER: &str = "__";
 const MAX_TOOL_NAME_LENGTH: usize = 64;

 /// Default timeout for initializing MCP server & initially listing tools.
-const DEFAULT_STARTUP_TIMEOUT: Duration = Duration::from_secs(10);
+pub const DEFAULT_STARTUP_TIMEOUT: Duration = Duration::from_secs(10);

 /// Default timeout for individual tool calls.
 const DEFAULT_TOOL_TIMEOUT: Duration = Duration::from_secs(60);
--- a/codex-rs/core/src/model_provider_info.rs
+++ b/codex-rs/core/src/model_provider_info.rs
@@ -6,6 +6,8 @@
 //!      key. These override or extend the defaults at runtime.

 use crate::CodexAuth;
+use crate::default_client::CodexHttpClient;
+use crate::default_client::CodexRequestBuilder;
 use codex_app_server_protocol::AuthMode;
 use serde::Deserialize;
 use serde::Serialize;
@@ -95,7 +97,7 @@ pub struct ModelProviderInfo {

 impl ModelProviderInfo {
    /// Construct a `POST` RequestBuilder for the given URL using the provided
-    /// reqwest Client applying:
+    /// [`CodexHttpClient`] applying:
    ///   • provider-specific headers (static + env based)
    ///   • Bearer auth header when an API key is available.
    ///   • Auth token for OAuth.
@@ -104,9 +106,9 @@ impl ModelProviderInfo {
    /// one produced by [`ModelProviderInfo::api_key`].
    pub async fn create_request_builder<'a>(
        &'a self,
-        client: &'a reqwest::Client,
+        client: &'a CodexHttpClient,
        auth: &Option<CodexAuth>,
-    ) -> crate::error::Result<reqwest::RequestBuilder> {
+    ) -> crate::error::Result<CodexRequestBuilder> {
        let effective_auth = if let Some(secret_key) = &self.experimental_bearer_token {
            Some(CodexAuth::from_api_key(secret_key))
        } else {
@@ -187,9 +189,9 @@ impl ModelProviderInfo {
    }

    /// Apply provider-specific HTTP headers (both static and environment-based)
-    /// onto an existing `reqwest::RequestBuilder` and return the updated
+    /// onto an existing [`CodexRequestBuilder`] and return the updated
    /// builder.
-    fn apply_http_headers(&self, mut builder: reqwest::RequestBuilder) -> reqwest::RequestBuilder {
+    fn apply_http_headers(&self, mut builder: CodexRequestBuilder) -> CodexRequestBuilder {
        if let Some(extra) = &self.http_headers {
            for (k, v) in extra {
                builder = builder.header(k, v);
--- a/codex-rs/core/src/response_processing.rs
+++ b/codex-rs/core/src/response_processing.rs
@@ -0,0 +1,112 @@
+use crate::codex::Session;
+use crate::conversation_history::ConversationHistory;
+use codex_protocol::models::FunctionCallOutputPayload;
+use codex_protocol::models::ResponseInputItem;
+use codex_protocol::models::ResponseItem;
+use tracing::warn;
+
+/// Process streamed `ResponseItem`s from the model into the pair of:
+/// - items we should record in conversation history; and
+/// - `ResponseInputItem`s to send back to the model on the next turn.
+pub(crate) async fn process_items(
+    processed_items: Vec<crate::codex::ProcessedResponseItem>,
+    is_review_mode: bool,
+    review_thread_history: &mut ConversationHistory,
+    sess: &Session,
+) -> (Vec<ResponseInputItem>, Vec<ResponseItem>) {
+    let mut items_to_record_in_conversation_history = Vec::<ResponseItem>::new();
+    let mut responses = Vec::<ResponseInputItem>::new();
+    for processed_response_item in processed_items {
+        let crate::codex::ProcessedResponseItem { item, response } = processed_response_item;
+        match (&item, &response) {
+            (ResponseItem::Message { role, .. }, None) if role == "assistant" => {
+                // If the model returned a message, we need to record it.
+                items_to_record_in_conversation_history.push(item);
+            }
+            (
+                ResponseItem::LocalShellCall { .. },
+                Some(ResponseInputItem::FunctionCallOutput { call_id, output }),
+            ) => {
+                items_to_record_in_conversation_history.push(item);
+                items_to_record_in_conversation_history.push(ResponseItem::FunctionCallOutput {
+                    call_id: call_id.clone(),
+                    output: output.clone(),
+                });
+            }
+            (
+                ResponseItem::FunctionCall { .. },
+                Some(ResponseInputItem::FunctionCallOutput { call_id, output }),
+            ) => {
+                items_to_record_in_conversation_history.push(item);
+                items_to_record_in_conversation_history.push(ResponseItem::FunctionCallOutput {
+                    call_id: call_id.clone(),
+                    output: output.clone(),
+                });
+            }
+            (
+                ResponseItem::CustomToolCall { .. },
+                Some(ResponseInputItem::CustomToolCallOutput { call_id, output }),
+            ) => {
+                items_to_record_in_conversation_history.push(item);
+                items_to_record_in_conversation_history.push(ResponseItem::CustomToolCallOutput {
+                    call_id: call_id.clone(),
+                    output: output.clone(),
+                });
+            }
+            (
+                ResponseItem::FunctionCall { .. },
+                Some(ResponseInputItem::McpToolCallOutput { call_id, result }),
+            ) => {
+                items_to_record_in_conversation_history.push(item);
+                let output = match result {
+                    Ok(call_tool_result) => {
+                        crate::codex::convert_call_tool_result_to_function_call_output_payload(
+                            call_tool_result,
+                        )
+                    }
+                    Err(err) => FunctionCallOutputPayload {
+                        content: err.clone(),
+                        success: Some(false),
+                    },
+                };
+                items_to_record_in_conversation_history.push(ResponseItem::FunctionCallOutput {
+                    call_id: call_id.clone(),
+                    output,
+                });
+            }
+            (
+                ResponseItem::Reasoning {
+                    id,
+                    summary,
+                    content,
+                    encrypted_content,
+                },
+                None,
+            ) => {
+                items_to_record_in_conversation_history.push(ResponseItem::Reasoning {
+                    id: id.clone(),
+                    summary: summary.clone(),
+                    content: content.clone(),
+                    encrypted_content: encrypted_content.clone(),
+                });
+            }
+            _ => {
+                warn!("Unexpected response item: {item:?} with response: {response:?}");
+            }
+        };
+        if let Some(response) = response {
+            responses.push(response);
+        }
+    }
+
+    // Only attempt to take the lock if there is something to record.
+    if !items_to_record_in_conversation_history.is_empty() {
+        if is_review_mode {
+            review_thread_history.record_items(items_to_record_in_conversation_history.iter());
+        } else {
+            sess.record_conversation_items(&items_to_record_in_conversation_history)
+                .await;
+        }
+    }
+    (responses, items_to_record_in_conversation_history)
+}
--- a/codex-rs/core/src/rollout/list.rs
+++ b/codex-rs/core/src/rollout/list.rs
@@ -1,12 +1,11 @@
 use std::cmp::Reverse;
 use std::io::{self};
+use std::num::NonZero;
 use std::path::Path;
 use std::path::PathBuf;
-
-use codex_file_search as file_search;
-use std::num::NonZero;
 use std::sync::Arc;
 use std::sync::atomic::AtomicBool;
+
 use time::OffsetDateTime;
 use time::PrimitiveDateTime;
 use time::format_description::FormatItem;
@@ -15,6 +14,7 @@ use uuid::Uuid;

 use super::SESSIONS_SUBDIR;
 use crate::protocol::EventMsg;
+use codex_file_search as file_search;
 use codex_protocol::protocol::RolloutItem;
 use codex_protocol::protocol::RolloutLine;
 use codex_protocol::protocol::SessionSource;
@@ -515,6 +515,7 @@ pub async fn find_conversation_path_by_id_str(
        threads,
        cancel,
        compute_indices,
+        false,
    )
    .map_err(|e| io::Error::other(format!("file search failed: {e}")))?;

--- a/codex-rs/core/src/sandboxing/assessment.rs
+++ b/codex-rs/core/src/sandboxing/assessment.rs
@@ -0,0 +1,275 @@
+use std::path::Path;
+use std::path::PathBuf;
+use std::sync::Arc;
+use std::time::Duration;
+use std::time::Instant;
+
+use crate::AuthManager;
+use crate::ModelProviderInfo;
+use crate::client::ModelClient;
+use crate::client_common::Prompt;
+use crate::client_common::ResponseEvent;
+use crate::config::Config;
+use crate::protocol::SandboxPolicy;
+use askama::Template;
+use codex_otel::otel_event_manager::OtelEventManager;
+use codex_protocol::ConversationId;
+use codex_protocol::models::ContentItem;
+use codex_protocol::models::ResponseItem;
+use codex_protocol::protocol::SandboxCommandAssessment;
+use futures::StreamExt;
+use serde_json::json;
+use tokio::time::timeout;
+use tracing::warn;
+
+const SANDBOX_ASSESSMENT_TIMEOUT: Duration = Duration::from_secs(5);
+
+const SANDBOX_RISK_CATEGORY_VALUES: &[&str] = &[
+    "data_deletion",
+    "data_exfiltration",
+    "privilege_escalation",
+    "system_modification",
+    "network_access",
+    "resource_exhaustion",
+    "compliance",
+];
+
+#[derive(Template)]
+#[template(path = "sandboxing/assessment_prompt.md", escape = "none")]
+struct SandboxAssessmentPromptTemplate<'a> {
+    platform: &'a str,
+    sandbox_policy: &'a str,
+    filesystem_roots: Option<&'a str>,
+    working_directory: &'a str,
+    command_argv: &'a str,
+    command_joined: &'a str,
+    sandbox_failure_message: Option<&'a str>,
+}
+
+#[allow(clippy::too_many_arguments)]
+pub(crate) async fn assess_command(
+    config: Arc<Config>,
+    provider: ModelProviderInfo,
+    auth_manager: Arc<AuthManager>,
+    parent_otel: &OtelEventManager,
+    conversation_id: ConversationId,
+    call_id: &str,
+    command: &[String],
+    sandbox_policy: &SandboxPolicy,
+    cwd: &Path,
+    failure_message: Option<&str>,
+) -> Option<SandboxCommandAssessment> {
+    if !config.experimental_sandbox_command_assessment || command.is_empty() {
+        return None;
+    }
+
+    let command_json = serde_json::to_string(command).unwrap_or_else(|_| "[]".to_string());
+    let command_joined =
+        shlex::try_join(command.iter().map(String::as_str)).unwrap_or_else(|_| command.join(" "));
+    let failure = failure_message
+        .map(str::trim)
+        .filter(|msg| !msg.is_empty())
+        .map(str::to_string);
+
+    let cwd_str = cwd.to_string_lossy().to_string();
+    let sandbox_summary = summarize_sandbox_policy(sandbox_policy);
+    let mut roots = sandbox_roots_for_prompt(sandbox_policy, cwd);
+    roots.sort();
+    roots.dedup();
+
+    let platform = std::env::consts::OS;
+    let roots_formatted = roots.iter().map(|root| root.to_string_lossy().to_string());
+    let filesystem_roots = match roots_formatted.collect::<Vec<_>>() {
+        collected if collected.is_empty() => None,
+        collected => Some(collected.join(", ")),
+    };
+
+    let prompt_template = SandboxAssessmentPromptTemplate {
+        platform,
+        sandbox_policy: sandbox_summary.as_str(),
+        filesystem_roots: filesystem_roots.as_deref(),
+        working_directory: cwd_str.as_str(),
+        command_argv: command_json.as_str(),
+        command_joined: command_joined.as_str(),
+        sandbox_failure_message: failure.as_deref(),
+    };
+    let rendered_prompt = match prompt_template.render() {
+        Ok(rendered) => rendered,
+        Err(err) => {
+            warn!("failed to render sandbox assessment prompt: {err}");
+            return None;
+        }
+    };
+    let (system_prompt_section, user_prompt_section) = match rendered_prompt.split_once("\n---\n") {
+        Some(split) => split,
+        None => {
+            warn!("rendered sandbox assessment prompt missing separator");
+            return None;
+        }
+    };
+    let system_prompt = system_prompt_section
+        .strip_prefix("System Prompt:\n")
+        .unwrap_or(system_prompt_section)
+        .trim()
+        .to_string();
+    let user_prompt = user_prompt_section
+        .strip_prefix("User Prompt:\n")
+        .unwrap_or(user_prompt_section)
+        .trim()
+        .to_string();
+
+    let prompt = Prompt {
+        input: vec![ResponseItem::Message {
+            id: None,
+            role: "user".to_string(),
+            content: vec![ContentItem::InputText { text: user_prompt }],
+        }],
+        tools: Vec::new(),
+        parallel_tool_calls: false,
+        base_instructions_override: Some(system_prompt),
+        output_schema: Some(sandbox_assessment_schema()),
+    };
+
+    let child_otel =
+        parent_otel.with_model(config.model.as_str(), config.model_family.slug.as_str());
+
+    let client = ModelClient::new(
+        Arc::clone(&config),
+        Some(auth_manager),
+        child_otel,
+        provider,
+        config.model_reasoning_effort,
+        config.model_reasoning_summary,
+        conversation_id,
+    );
+
+    let start = Instant::now();
+    let assessment_result = timeout(SANDBOX_ASSESSMENT_TIMEOUT, async move {
+        let mut stream = client.stream(&prompt).await?;
+        let mut last_json: Option<String> = None;
+        while let Some(event) = stream.next().await {
+            match event {
+                Ok(ResponseEvent::OutputItemDone(item)) => {
+                    if let Some(text) = response_item_text(&item) {
+                        last_json = Some(text);
+                    }
+                }
+                Ok(ResponseEvent::RateLimits(_)) => {}
+                Ok(ResponseEvent::Completed { .. }) => break,
+                Ok(_) => continue,
+                Err(err) => return Err(err),
+            }
+        }
+        Ok(last_json)
+    })
+    .await;
+    let duration = start.elapsed();
+    parent_otel.sandbox_assessment_latency(call_id, duration);
+
+    match assessment_result {
+        Ok(Ok(Some(raw))) => match serde_json::from_str::<SandboxCommandAssessment>(raw.trim()) {
+            Ok(assessment) => {
+                parent_otel.sandbox_assessment(
+                    call_id,
+                    "success",
+                    Some(assessment.risk_level),
+                    &assessment.risk_categories,
+                    duration,
+                );
+                return Some(assessment);
+            }
+            Err(err) => {
+                warn!("failed to parse sandbox assessment JSON: {err}");
+                parent_otel.sandbox_assessment(call_id, "parse_error", None, &[], duration);
+            }
+        },
+        Ok(Ok(None)) => {
+            warn!("sandbox assessment response did not include any message");
+            parent_otel.sandbox_assessment(call_id, "no_output", None, &[], duration);
+        }
+        Ok(Err(err)) => {
+            warn!("sandbox assessment failed: {err}");
+            parent_otel.sandbox_assessment(call_id, "model_error", None, &[], duration);
+        }
+        Err(_) => {
+            warn!("sandbox assessment timed out");
+            parent_otel.sandbox_assessment(call_id, "timeout", None, &[], duration);
+        }
+    }
+
+    None
+}
+
+fn summarize_sandbox_policy(policy: &SandboxPolicy) -> String {
+    match policy {
+        SandboxPolicy::DangerFullAccess => "danger-full-access".to_string(),
+        SandboxPolicy::ReadOnly => "read-only".to_string(),
+        SandboxPolicy::WorkspaceWrite { network_access, .. } => {
+            let network = if *network_access {
+                "network"
+            } else {
+                "no-network"
+            };
+            format!("workspace-write (network_access={network})")
+        }
+    }
+}
+
+fn sandbox_roots_for_prompt(policy: &SandboxPolicy, cwd: &Path) -> Vec<PathBuf> {
+    let mut roots = vec![cwd.to_path_buf()];
+    if let SandboxPolicy::WorkspaceWrite { writable_roots, .. } = policy {
+        roots.extend(writable_roots.iter().cloned());
+    }
+    roots
+}
+
+fn sandbox_assessment_schema() -> serde_json::Value {
+    json!({
+        "type": "object",
+        "required": ["description", "risk_level", "risk_categories"],
+        "properties": {
+            "description": {
+                "type": "string",
+                "minLength": 1,
+                "maxLength": 500
+            },
+            "risk_level": {
+                "type": "string",
+                "enum": ["low", "medium", "high"]
+            },
+            "risk_categories": {
+                "type": "array",
+                "items": {
+                    "type": "string",
+                    "enum": SANDBOX_RISK_CATEGORY_VALUES
+                }
+            }
+        },
+        "additionalProperties": false
+    })
+}
+
+fn response_item_text(item: &ResponseItem) -> Option<String> {
+    match item {
+        ResponseItem::Message { content, .. } => {
+            let mut buffers: Vec<&str> = Vec::new();
+            for segment in content {
+                match segment {
+                    ContentItem::InputText { text } | ContentItem::OutputText { text } => {
+                        if !text.is_empty() {
+                            buffers.push(text);
+                        }
+                    }
+                    ContentItem::InputImage { .. } => {}
+                }
+            }
+            if buffers.is_empty() {
+                None
+            } else {
+                Some(buffers.join("\n"))
+            }
+        }
+        ResponseItem::FunctionCallOutput { output, .. } => Some(output.content.clone()),
+        _ => None,
+    }
+}
--- a/codex-rs/core/src/sandboxing/mod.rs
+++ b/codex-rs/core/src/sandboxing/mod.rs
@@ -5,6 +5,9 @@ Build platform wrappers and produce ExecEnv for execution. Owns low‑level
 sandbox placement and transformation of portable CommandSpec into a
 ready‑to‑spawn environment.
 */
+
+pub mod assessment;
+
 use crate::exec::ExecToolCallOutput;
 use crate::exec::SandboxType;
 use crate::exec::StdoutStream;
--- a/codex-rs/core/src/state/session.rs
+++ b/codex-rs/core/src/state/session.rs
@@ -12,7 +12,6 @@ use crate::protocol::TokenUsageInfo;
 pub(crate) struct SessionState {
    pub(crate) session_configuration: SessionConfiguration,
    pub(crate) history: ConversationHistory,
-    pub(crate) token_info: Option<TokenUsageInfo>,
    pub(crate) latest_rate_limits: Option<RateLimitSnapshot>,
 }

@@ -22,7 +21,6 @@ impl SessionState {
        Self {
            session_configuration,
            history: ConversationHistory::new(),
-            token_info: None,
            latest_rate_limits: None,
        }
    }
@@ -54,11 +52,11 @@ impl SessionState {
        usage: &TokenUsage,
        model_context_window: Option<i64>,
    ) {
-        self.token_info = TokenUsageInfo::new_or_append(
-            &self.token_info,
-            &Some(usage.clone()),
-            model_context_window,
-        );
+        self.history.update_token_info(usage, model_context_window);
+    }
+
+    pub(crate) fn token_info(&self) -> Option<TokenUsageInfo> {
+        self.history.token_info()
    }

    pub(crate) fn set_rate_limits(&mut self, snapshot: RateLimitSnapshot) {
@@ -68,17 +66,10 @@ impl SessionState {
    pub(crate) fn token_info_and_rate_limits(
        &self,
    ) -> (Option<TokenUsageInfo>, Option<RateLimitSnapshot>) {
-        (self.token_info.clone(), self.latest_rate_limits.clone())
+        (self.token_info(), self.latest_rate_limits.clone())
    }

    pub(crate) fn set_token_usage_full(&mut self, context_window: i64) {
-        match &mut self.token_info {
-            Some(info) => info.fill_to_context_window(context_window),
-            None => {
-                self.token_info = Some(TokenUsageInfo::full_context_window(context_window));
-            }
-        }
+        self.history.set_token_usage_full(context_window);
    }
-
-    // Pending input/approval moved to TurnState.
 }
--- a/codex-rs/core/src/tools/events.rs
+++ b/codex-rs/core/src/tools/events.rs
@@ -1,6 +1,9 @@
 use crate::codex::Session;
 use crate::codex::TurnContext;
+use crate::error::CodexErr;
+use crate::error::SandboxErr;
 use crate::exec::ExecToolCallOutput;
+use crate::function_tool::FunctionCallError;
 use crate::parse_command::parse_command;
 use crate::protocol::EventMsg;
 use crate::protocol::ExecCommandBeginEvent;
@@ -10,6 +13,7 @@ use crate::protocol::PatchApplyBeginEvent;
 use crate::protocol::PatchApplyEndEvent;
 use crate::protocol::TurnDiffEvent;
 use crate::tools::context::SharedTurnDiffTracker;
+use crate::tools::sandboxing::ToolError;
 use std::collections::HashMap;
 use std::path::Path;
 use std::path::PathBuf;
@@ -196,12 +200,103 @@ impl ToolEmitter {
            ) => {
                emit_patch_end(ctx, String::new(), (*message).to_string(), false).await;
            }
-            (Self::UnifiedExec { command, cwd, .. }, _) => {
-                // TODO(jif) add end and failures.
+            (Self::UnifiedExec { command, cwd, .. }, ToolEventStage::Begin) => {
                emit_exec_command_begin(ctx, &[command.to_string()], cwd.as_path()).await;
            }
+            (Self::UnifiedExec { .. }, ToolEventStage::Success(output)) => {
+                emit_exec_end(
+                    ctx,
+                    output.stdout.text.clone(),
+                    output.stderr.text.clone(),
+                    output.aggregated_output.text.clone(),
+                    output.exit_code,
+                    output.duration,
+                    format_exec_output_str(&output),
+                )
+                .await;
+            }
+            (
+                Self::UnifiedExec { .. },
+                ToolEventStage::Failure(ToolEventFailure::Output(output)),
+            ) => {
+                emit_exec_end(
+                    ctx,
+                    output.stdout.text.clone(),
+                    output.stderr.text.clone(),
+                    output.aggregated_output.text.clone(),
+                    output.exit_code,
+                    output.duration,
+                    format_exec_output_str(&output),
+                )
+                .await;
+            }
+            (
+                Self::UnifiedExec { .. },
+                ToolEventStage::Failure(ToolEventFailure::Message(message)),
+            ) => {
+                emit_exec_end(
+                    ctx,
+                    String::new(),
+                    (*message).to_string(),
+                    (*message).to_string(),
+                    -1,
+                    Duration::ZERO,
+                    format_exec_output(&message),
+                )
+                .await;
+            }
        }
    }
+
+    pub async fn begin(&self, ctx: ToolEventCtx<'_>) {
+        self.emit(ctx, ToolEventStage::Begin).await;
+    }
+
+    pub async fn finish(
+        &self,
+        ctx: ToolEventCtx<'_>,
+        out: Result<ExecToolCallOutput, ToolError>,
+    ) -> Result<String, FunctionCallError> {
+        let event;
+        let result = match out {
+            Ok(output) => {
+                let content = super::format_exec_output_for_model(&output);
+                let exit_code = output.exit_code;
+                event = ToolEventStage::Success(output);
+                if exit_code == 0 {
+                    Ok(content)
+                } else {
+                    Err(FunctionCallError::RespondToModel(content))
+                }
+            }
+            Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output })))
+            | Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { output }))) => {
+                let response = super::format_exec_output_for_model(&output);
+                event = ToolEventStage::Failure(ToolEventFailure::Output(*output));
+                Err(FunctionCallError::RespondToModel(response))
+            }
+            Err(ToolError::Codex(err)) => {
+                let message = format!("execution error: {err:?}");
+                let response = super::format_exec_output(&message);
+                event = ToolEventStage::Failure(ToolEventFailure::Message(message));
+                Err(FunctionCallError::RespondToModel(response))
+            }
+            Err(ToolError::Rejected(msg)) | Err(ToolError::SandboxDenied(msg)) => {
+                // Normalize common rejection messages for exec tools so tests and
+                // users see a clear, consistent phrase.
+                let normalized = if msg == "rejected by user" {
+                    "exec command rejected by user".to_string()
+                } else {
+                    msg
+                };
+                let response = super::format_exec_output(&normalized);
+                event = ToolEventStage::Failure(ToolEventFailure::Message(normalized));
+                Err(FunctionCallError::RespondToModel(response))
+            }
+        };
+        self.emit(ctx, event).await;
+        result
+    }
 }

 async fn emit_exec_end(
--- a/codex-rs/core/src/tools/handlers/apply_patch.rs
+++ b/codex-rs/core/src/tools/handlers/apply_patch.rs
@@ -1,19 +1,24 @@
 use std::collections::BTreeMap;
-use std::collections::HashMap;
-use std::sync::Arc;

+use crate::apply_patch;
+use crate::apply_patch::InternalApplyPatchInvocation;
+use crate::apply_patch::convert_apply_patch_to_protocol;
 use crate::client_common::tools::FreeformTool;
 use crate::client_common::tools::FreeformToolFormat;
 use crate::client_common::tools::ResponsesApiTool;
 use crate::client_common::tools::ToolSpec;
-use crate::exec::ExecParams;
 use crate::function_tool::FunctionCallError;
 use crate::tools::context::ToolInvocation;
 use crate::tools::context::ToolOutput;
 use crate::tools::context::ToolPayload;
-use crate::tools::handle_container_exec_with_params;
+use crate::tools::events::ToolEmitter;
+use crate::tools::events::ToolEventCtx;
+use crate::tools::orchestrator::ToolOrchestrator;
 use crate::tools::registry::ToolHandler;
 use crate::tools::registry::ToolKind;
+use crate::tools::runtimes::apply_patch::ApplyPatchRequest;
+use crate::tools::runtimes::apply_patch::ApplyPatchRuntime;
+use crate::tools::sandboxing::ToolCtx;
 use crate::tools::spec::ApplyPatchToolArgs;
 use crate::tools::spec::JsonSchema;
 use async_trait::async_trait;
@@ -64,30 +69,85 @@ impl ToolHandler for ApplyPatchHandler {
            }
        };

-        let exec_params = ExecParams {
-            command: vec!["apply_patch".to_string(), patch_input.clone()],
-            cwd: turn.cwd.clone(),
-            timeout_ms: None,
-            env: HashMap::new(),
-            with_escalated_permissions: None,
-            justification: None,
-            arg0: None,
-        };
+        // Re-parse and verify the patch so we can compute changes and approval.
+        // Avoid building temporary ExecParams/command vectors; derive directly from inputs.
+        let cwd = turn.cwd.clone();
+        let command = vec!["apply_patch".to_string(), patch_input.clone()];
+        match codex_apply_patch::maybe_parse_apply_patch_verified(&command, &cwd) {
+            codex_apply_patch::MaybeApplyPatchVerified::Body(changes) => {
+                match apply_patch::apply_patch(session.as_ref(), turn.as_ref(), &call_id, changes)
+                    .await
+                {
+                    InternalApplyPatchInvocation::Output(item) => {
+                        let content = item?;
+                        Ok(ToolOutput::Function {
+                            content,
+                            success: Some(true),
+                        })
+                    }
+                    InternalApplyPatchInvocation::DelegateToExec(apply) => {
+                        let emitter = ToolEmitter::apply_patch(
+                            convert_apply_patch_to_protocol(&apply.action),
+                            !apply.user_explicitly_approved_this_action,
+                        );
+                        let event_ctx = ToolEventCtx::new(
+                            session.as_ref(),
+                            turn.as_ref(),
+                            &call_id,
+                            Some(&tracker),
+                        );
+                        emitter.begin(event_ctx).await;

-        let content = handle_container_exec_with_params(
-            tool_name.as_str(),
-            exec_params,
-            Arc::clone(&session),
-            Arc::clone(&turn),
-            Arc::clone(&tracker),
-            call_id.clone(),
-        )
-        .await?;
+                        let req = ApplyPatchRequest {
+                            patch: apply.action.patch.clone(),
+                            cwd: apply.action.cwd.clone(),
+                            timeout_ms: None,
+                            user_explicitly_approved: apply.user_explicitly_approved_this_action,
+                            codex_exe: turn.codex_linux_sandbox_exe.clone(),
+                        };

-        Ok(ToolOutput::Function {
-            content,
-            success: Some(true),
-        })
+                        let mut orchestrator = ToolOrchestrator::new();
+                        let mut runtime = ApplyPatchRuntime::new();
+                        let tool_ctx = ToolCtx {
+                            session: session.as_ref(),
+                            turn: turn.as_ref(),
+                            call_id: call_id.clone(),
+                            tool_name: tool_name.to_string(),
+                        };
+                        let out = orchestrator
+                            .run(&mut runtime, &req, &tool_ctx, &turn, turn.approval_policy)
+                            .await;
+                        let event_ctx = ToolEventCtx::new(
+                            session.as_ref(),
+                            turn.as_ref(),
+                            &call_id,
+                            Some(&tracker),
+                        );
+                        let content = emitter.finish(event_ctx, out).await?;
+                        Ok(ToolOutput::Function {
+                            content,
+                            success: Some(true),
+                        })
+                    }
+                }
+            }
+            codex_apply_patch::MaybeApplyPatchVerified::CorrectnessError(parse_error) => {
+                Err(FunctionCallError::RespondToModel(format!(
+                    "apply_patch verification failed: {parse_error}"
+                )))
+            }
+            codex_apply_patch::MaybeApplyPatchVerified::ShellParseError(error) => {
+                tracing::trace!("Failed to parse apply_patch input, {error:?}");
+                Err(FunctionCallError::RespondToModel(
+                    "apply_patch handler received invalid patch input".to_string(),
+                ))
+            }
+            codex_apply_patch::MaybeApplyPatchVerified::NotApplyPatch => {
+                Err(FunctionCallError::RespondToModel(
+                    "apply_patch handler received non-apply_patch input".to_string(),
+                ))
+            }
+        }
    }
 }

--- a/codex-rs/core/src/tools/handlers/shell.rs
+++ b/codex-rs/core/src/tools/handlers/shell.rs
@@ -2,6 +2,9 @@ use async_trait::async_trait;
 use codex_protocol::models::ShellToolCallParams;
 use std::sync::Arc;

+use crate::apply_patch;
+use crate::apply_patch::InternalApplyPatchInvocation;
+use crate::apply_patch::convert_apply_patch_to_protocol;
 use crate::codex::TurnContext;
 use crate::exec::ExecParams;
 use crate::exec_env::create_env;
@@ -9,9 +12,16 @@ use crate::function_tool::FunctionCallError;
 use crate::tools::context::ToolInvocation;
 use crate::tools::context::ToolOutput;
 use crate::tools::context::ToolPayload;
-use crate::tools::handle_container_exec_with_params;
+use crate::tools::events::ToolEmitter;
+use crate::tools::events::ToolEventCtx;
+use crate::tools::orchestrator::ToolOrchestrator;
 use crate::tools::registry::ToolHandler;
 use crate::tools::registry::ToolKind;
+use crate::tools::runtimes::apply_patch::ApplyPatchRequest;
+use crate::tools::runtimes::apply_patch::ApplyPatchRuntime;
+use crate::tools::runtimes::shell::ShellRequest;
+use crate::tools::runtimes::shell::ShellRuntime;
+use crate::tools::sandboxing::ToolCtx;

 pub struct ShellHandler;

@@ -61,35 +71,27 @@ impl ToolHandler for ShellHandler {
                        ))
                    })?;
                let exec_params = Self::to_exec_params(params, turn.as_ref());
-                let content = handle_container_exec_with_params(
+                Self::run_exec_like(
                    tool_name.as_str(),
                    exec_params,
-                    Arc::clone(&session),
-                    Arc::clone(&turn),
-                    Arc::clone(&tracker),
-                    call_id.clone(),
+                    session,
+                    turn,
+                    tracker,
+                    call_id,
                )
-                .await?;
-                Ok(ToolOutput::Function {
-                    content,
-                    success: Some(true),
-                })
+                .await
            }
            ToolPayload::LocalShell { params } => {
                let exec_params = Self::to_exec_params(params, turn.as_ref());
-                let content = handle_container_exec_with_params(
+                Self::run_exec_like(
                    tool_name.as_str(),
                    exec_params,
-                    Arc::clone(&session),
-                    Arc::clone(&turn),
-                    Arc::clone(&tracker),
-                    call_id.clone(),
+                    session,
+                    turn,
+                    tracker,
+                    call_id,
                )
-                .await?;
-                Ok(ToolOutput::Function {
-                    content,
-                    success: Some(true),
-                })
+                .await
            }
            _ => Err(FunctionCallError::RespondToModel(format!(
                "unsupported payload for shell handler: {tool_name}"
@@ -97,3 +99,134 @@ impl ToolHandler for ShellHandler {
        }
    }
 }
+
+impl ShellHandler {
+    async fn run_exec_like(
+        tool_name: &str,
+        exec_params: ExecParams,
+        session: Arc<crate::codex::Session>,
+        turn: Arc<TurnContext>,
+        tracker: crate::tools::context::SharedTurnDiffTracker,
+        call_id: String,
+    ) -> Result<ToolOutput, FunctionCallError> {
+        // Approval policy guard for explicit escalation in non-OnRequest modes.
+        if exec_params.with_escalated_permissions.unwrap_or(false)
+            && !matches!(
+                turn.approval_policy,
+                codex_protocol::protocol::AskForApproval::OnRequest
+            )
+        {
+            return Err(FunctionCallError::RespondToModel(format!(
+                "approval policy is {policy:?}; reject command — you should not ask for escalated permissions if the approval policy is {policy:?}",
+                policy = turn.approval_policy
+            )));
+        }
+
+        // Intercept apply_patch if present.
+        match codex_apply_patch::maybe_parse_apply_patch_verified(
+            &exec_params.command,
+            &exec_params.cwd,
+        ) {
+            codex_apply_patch::MaybeApplyPatchVerified::Body(changes) => {
+                match apply_patch::apply_patch(session.as_ref(), turn.as_ref(), &call_id, changes)
+                    .await
+                {
+                    InternalApplyPatchInvocation::Output(item) => {
+                        // Programmatic apply_patch path; return its result.
+                        let content = item?;
+                        return Ok(ToolOutput::Function {
+                            content,
+                            success: Some(true),
+                        });
+                    }
+                    InternalApplyPatchInvocation::DelegateToExec(apply) => {
+                        let emitter = ToolEmitter::apply_patch(
+                            convert_apply_patch_to_protocol(&apply.action),
+                            !apply.user_explicitly_approved_this_action,
+                        );
+                        let event_ctx = ToolEventCtx::new(
+                            session.as_ref(),
+                            turn.as_ref(),
+                            &call_id,
+                            Some(&tracker),
+                        );
+                        emitter.begin(event_ctx).await;
+
+                        let req = ApplyPatchRequest {
+                            patch: apply.action.patch.clone(),
+                            cwd: apply.action.cwd.clone(),
+                            timeout_ms: exec_params.timeout_ms,
+                            user_explicitly_approved: apply.user_explicitly_approved_this_action,
+                            codex_exe: turn.codex_linux_sandbox_exe.clone(),
+                        };
+                        let mut orchestrator = ToolOrchestrator::new();
+                        let mut runtime = ApplyPatchRuntime::new();
+                        let tool_ctx = ToolCtx {
+                            session: session.as_ref(),
+                            turn: turn.as_ref(),
+                            call_id: call_id.clone(),
+                            tool_name: tool_name.to_string(),
+                        };
+                        let out = orchestrator
+                            .run(&mut runtime, &req, &tool_ctx, &turn, turn.approval_policy)
+                            .await;
+                        let event_ctx = ToolEventCtx::new(
+                            session.as_ref(),
+                            turn.as_ref(),
+                            &call_id,
+                            Some(&tracker),
+                        );
+                        let content = emitter.finish(event_ctx, out).await?;
+                        return Ok(ToolOutput::Function {
+                            content,
+                            success: Some(true),
+                        });
+                    }
+                }
+            }
+            codex_apply_patch::MaybeApplyPatchVerified::CorrectnessError(parse_error) => {
+                return Err(FunctionCallError::RespondToModel(format!(
+                    "apply_patch verification failed: {parse_error}"
+                )));
+            }
+            codex_apply_patch::MaybeApplyPatchVerified::ShellParseError(error) => {
+                tracing::trace!("Failed to parse shell command, {error:?}");
+                // Fall through to regular shell execution.
+            }
+            codex_apply_patch::MaybeApplyPatchVerified::NotApplyPatch => {
+                // Fall through to regular shell execution.
+            }
+        }
+
+        // Regular shell execution path.
+        let emitter = ToolEmitter::shell(exec_params.command.clone(), exec_params.cwd.clone());
+        let event_ctx = ToolEventCtx::new(session.as_ref(), turn.as_ref(), &call_id, None);
+        emitter.begin(event_ctx).await;
+
+        let req = ShellRequest {
+            command: exec_params.command.clone(),
+            cwd: exec_params.cwd.clone(),
+            timeout_ms: exec_params.timeout_ms,
+            env: exec_params.env.clone(),
+            with_escalated_permissions: exec_params.with_escalated_permissions,
+            justification: exec_params.justification.clone(),
+        };
+        let mut orchestrator = ToolOrchestrator::new();
+        let mut runtime = ShellRuntime::new();
+        let tool_ctx = ToolCtx {
+            session: session.as_ref(),
+            turn: turn.as_ref(),
+            call_id: call_id.clone(),
+            tool_name: tool_name.to_string(),
+        };
+        let out = orchestrator
+            .run(&mut runtime, &req, &tool_ctx, &turn, turn.approval_policy)
+            .await;
+        let event_ctx = ToolEventCtx::new(session.as_ref(), turn.as_ref(), &call_id, None);
+        let content = emitter.finish(event_ctx, out).await?;
+        Ok(ToolOutput::Function {
+            content,
+            success: Some(true),
+        })
+    }
+}
--- a/codex-rs/core/src/tools/handlers/unified_exec.rs
+++ b/codex-rs/core/src/tools/handlers/unified_exec.rs
@@ -5,6 +5,9 @@ use serde::Deserialize;
 use serde::Serialize;

 use crate::function_tool::FunctionCallError;
+use crate::protocol::EventMsg;
+use crate::protocol::ExecCommandOutputDeltaEvent;
+use crate::protocol::ExecOutputStream;
 use crate::tools::context::ToolInvocation;
 use crate::tools::context::ToolOutput;
 use crate::tools::context::ToolPayload;
@@ -87,11 +90,7 @@ impl ToolHandler for UnifiedExecHandler {
        };

        let manager: &UnifiedExecSessionManager = &session.services.unified_exec_manager;
-        let context = UnifiedExecContext {
-            session: &session,
-            turn: turn.as_ref(),
-            call_id: &call_id,
-        };
+        let context = UnifiedExecContext::new(session.clone(), turn.clone(), call_id.clone());

        let response = match tool_name.as_str() {
            "exec_command" => {
@@ -101,8 +100,12 @@ impl ToolHandler for UnifiedExecHandler {
                    ))
                })?;

-                let event_ctx =
-                    ToolEventCtx::new(context.session, context.turn, context.call_id, None);
+                let event_ctx = ToolEventCtx::new(
+                    context.session.as_ref(),
+                    context.turn.as_ref(),
+                    &context.call_id,
+                    None,
+                );
                let emitter =
                    ToolEmitter::unified_exec(args.cmd.clone(), context.turn.cwd.clone(), true);
                emitter.emit(event_ctx, ToolEventStage::Begin).await;
@@ -148,6 +151,18 @@ impl ToolHandler for UnifiedExecHandler {
            }
        };

+        // Emit a delta event with the chunk of output we just produced, if any.
+        if !response.output.is_empty() {
+            let delta = ExecCommandOutputDeltaEvent {
+                call_id: response.event_call_id.clone(),
+                stream: ExecOutputStream::Stdout,
+                chunk: response.output.as_bytes().to_vec(),
+            };
+            session
+                .send_event(turn.as_ref(), EventMsg::ExecCommandOutputDelta(delta))
+                .await;
+        }
+
        let content = serialize_response(&response).map_err(|err| {
            FunctionCallError::RespondToModel(format!(
                "failed to serialize unified exec output: {err:?}"
--- a/codex-rs/core/src/tools/mod.rs
+++ b/codex-rs/core/src/tools/mod.rs
@@ -9,37 +9,11 @@ pub mod runtimes;
 pub mod sandboxing;
 pub mod spec;

-use crate::apply_patch;
-use crate::apply_patch::InternalApplyPatchInvocation;
-use crate::apply_patch::convert_apply_patch_to_protocol;
-use crate::codex::Session;
-use crate::codex::TurnContext;
-use crate::error::CodexErr;
-use crate::error::SandboxErr;
-use crate::exec::ExecParams;
 use crate::exec::ExecToolCallOutput;
-use crate::function_tool::FunctionCallError;
-use crate::tools::context::SharedTurnDiffTracker;
-use crate::tools::events::ToolEmitter;
-use crate::tools::events::ToolEventCtx;
-use crate::tools::events::ToolEventFailure;
-use crate::tools::events::ToolEventStage;
-use crate::tools::orchestrator::ToolOrchestrator;
-use crate::tools::runtimes::apply_patch::ApplyPatchRequest;
-use crate::tools::runtimes::apply_patch::ApplyPatchRuntime;
-use crate::tools::runtimes::shell::ShellRequest;
-use crate::tools::runtimes::shell::ShellRuntime;
-use crate::tools::sandboxing::ToolCtx;
-use crate::tools::sandboxing::ToolError;
-use codex_apply_patch::MaybeApplyPatchVerified;
-use codex_apply_patch::maybe_parse_apply_patch_verified;
-use codex_protocol::protocol::AskForApproval;
 use codex_utils_string::take_bytes_at_char_boundary;
 use codex_utils_string::take_last_bytes_at_char_boundary;
 pub use router::ToolRouter;
 use serde::Serialize;
-use std::sync::Arc;
-use tracing::trace;

 // Model-formatting limits: clients get full streams; only content sent to the model is truncated.
 pub(crate) const MODEL_FORMAT_MAX_BYTES: usize = 10 * 1024; // 10 KiB
@@ -54,186 +28,6 @@ pub(crate) const TELEMETRY_PREVIEW_MAX_LINES: usize = 64; // lines
 pub(crate) const TELEMETRY_PREVIEW_TRUNCATION_NOTICE: &str =
    "[... telemetry preview truncated ...]";

-// TODO(jif) break this down
-pub(crate) async fn handle_container_exec_with_params(
-    tool_name: &str,
-    params: ExecParams,
-    sess: Arc<Session>,
-    turn_context: Arc<TurnContext>,
-    turn_diff_tracker: SharedTurnDiffTracker,
-    call_id: String,
-) -> Result<String, FunctionCallError> {
-    let _otel_event_manager = turn_context.client.get_otel_event_manager();
-
-    if params.with_escalated_permissions.unwrap_or(false)
-        && !matches!(turn_context.approval_policy, AskForApproval::OnRequest)
-    {
-        return Err(FunctionCallError::RespondToModel(format!(
-            "approval policy is {policy:?}; reject command — you should not ask for escalated permissions if the approval policy is {policy:?}",
-            policy = turn_context.approval_policy
-        )));
-    }
-
-    // check if this was a patch, and apply it if so
-    let apply_patch_exec = match maybe_parse_apply_patch_verified(&params.command, &params.cwd) {
-        MaybeApplyPatchVerified::Body(changes) => {
-            match apply_patch::apply_patch(sess.as_ref(), turn_context.as_ref(), &call_id, changes)
-                .await
-            {
-                InternalApplyPatchInvocation::Output(item) => return item,
-                InternalApplyPatchInvocation::DelegateToExec(apply_patch_exec) => {
-                    Some(apply_patch_exec)
-                }
-            }
-        }
-        MaybeApplyPatchVerified::CorrectnessError(parse_error) => {
-            // It looks like an invocation of `apply_patch`, but we
-            // could not resolve it into a patch that would apply
-            // cleanly. Return to model for resample.
-            return Err(FunctionCallError::RespondToModel(format!(
-                "apply_patch verification failed: {parse_error}"
-            )));
-        }
-        MaybeApplyPatchVerified::ShellParseError(error) => {
-            trace!("Failed to parse shell command, {error:?}");
-            None
-        }
-        MaybeApplyPatchVerified::NotApplyPatch => None,
-    };
-
-    let (event_emitter, diff_opt) = match apply_patch_exec.as_ref() {
-        Some(exec) => (
-            ToolEmitter::apply_patch(
-                convert_apply_patch_to_protocol(&exec.action),
-                !exec.user_explicitly_approved_this_action,
-            ),
-            Some(&turn_diff_tracker),
-        ),
-        None => (
-            ToolEmitter::shell(params.command.clone(), params.cwd.clone()),
-            None,
-        ),
-    };
-
-    let event_ctx = ToolEventCtx::new(sess.as_ref(), turn_context.as_ref(), &call_id, diff_opt);
-    event_emitter.emit(event_ctx, ToolEventStage::Begin).await;
-
-    // Build runtime contexts only when needed (shell/apply_patch below).
-
-    if let Some(exec) = apply_patch_exec {
-        // Route apply_patch execution through the new orchestrator/runtime.
-        let req = ApplyPatchRequest {
-            patch: exec.action.patch.clone(),
-            cwd: params.cwd.clone(),
-            timeout_ms: params.timeout_ms,
-            user_explicitly_approved: exec.user_explicitly_approved_this_action,
-            codex_exe: turn_context.codex_linux_sandbox_exe.clone(),
-        };
-
-        let mut orchestrator = ToolOrchestrator::new();
-        let mut runtime = ApplyPatchRuntime::new();
-        let tool_ctx = ToolCtx {
-            session: sess.as_ref(),
-            turn: turn_context.as_ref(),
-            call_id: call_id.clone(),
-            tool_name: tool_name.to_string(),
-        };
-
-        let out = orchestrator
-            .run(
-                &mut runtime,
-                &req,
-                &tool_ctx,
-                &turn_context,
-                turn_context.approval_policy,
-            )
-            .await;
-
-        handle_exec_outcome(&event_emitter, event_ctx, out).await
-    } else {
-        // Route shell execution through the new orchestrator/runtime.
-        let req = ShellRequest {
-            command: params.command.clone(),
-            cwd: params.cwd.clone(),
-            timeout_ms: params.timeout_ms,
-            env: params.env.clone(),
-            with_escalated_permissions: params.with_escalated_permissions,
-            justification: params.justification.clone(),
-        };
-
-        let mut orchestrator = ToolOrchestrator::new();
-        let mut runtime = ShellRuntime::new();
-        let tool_ctx = ToolCtx {
-            session: sess.as_ref(),
-            turn: turn_context.as_ref(),
-            call_id: call_id.clone(),
-            tool_name: tool_name.to_string(),
-        };
-
-        let out = orchestrator
-            .run(
-                &mut runtime,
-                &req,
-                &tool_ctx,
-                &turn_context,
-                turn_context.approval_policy,
-            )
-            .await;
-
-        handle_exec_outcome(&event_emitter, event_ctx, out).await
-    }
-}
-
-async fn handle_exec_outcome(
-    event_emitter: &ToolEmitter,
-    event_ctx: ToolEventCtx<'_>,
-    out: Result<ExecToolCallOutput, ToolError>,
-) -> Result<String, FunctionCallError> {
-    let event;
-    let result = match out {
-        Ok(output) => {
-            let content = format_exec_output_for_model(&output);
-            let exit_code = output.exit_code;
-            event = ToolEventStage::Success(output);
-            if exit_code == 0 {
-                Ok(content)
-            } else {
-                Err(FunctionCallError::RespondToModel(content))
-            }
-        }
-        Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output })))
-        | Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { output }))) => {
-            let response = format_exec_output_for_model(&output);
-            event = ToolEventStage::Failure(ToolEventFailure::Output(*output));
-            Err(FunctionCallError::RespondToModel(response))
-        }
-        Err(ToolError::Codex(err)) => {
-            let message = format!("execution error: {err:?}");
-            let response = format_exec_output(&message);
-            event = ToolEventStage::Failure(ToolEventFailure::Message(message));
-            Err(FunctionCallError::RespondToModel(format_exec_output(
-                &response,
-            )))
-        }
-        Err(ToolError::Rejected(msg)) | Err(ToolError::SandboxDenied(msg)) => {
-            // Normalize common rejection messages for exec tools so tests and
-            // users see a clear, consistent phrase.
-            let normalized = if msg == "rejected by user" {
-                "exec command rejected by user".to_string()
-            } else {
-                msg
-            };
-            let response = format_exec_output(&normalized);
-            event = ToolEventStage::Failure(ToolEventFailure::Message(normalized));
-            Err(FunctionCallError::RespondToModel(format_exec_output(
-                &response,
-            )))
-        }
-    };
-    event_emitter.emit(event_ctx, event).await;
-    result
-}
-
 /// Format the combined exec output for sending back to the model.
 /// Includes exit code and duration metadata; truncates large bodies safely.
 pub fn format_exec_output_for_model(exec_output: &ExecToolCallOutput) -> String {
@@ -363,6 +157,7 @@ fn truncate_formatted_exec_output(content: &str, total_lines: usize) -> String {
 #[cfg(test)]
 mod tests {
    use super::*;
+    use crate::function_tool::FunctionCallError;
    use regex_lite::Regex;

    fn truncate_function_error(err: FunctionCallError) -> FunctionCallError {
--- a/codex-rs/core/src/tools/orchestrator.rs
+++ b/codex-rs/core/src/tools/orchestrator.rs
@@ -7,9 +7,11 @@ retry without sandbox on denial (no re‑approval thanks to caching).
 */
 use crate::error::CodexErr;
 use crate::error::SandboxErr;
+use crate::error::get_error_message_ui;
 use crate::exec::ExecToolCallOutput;
 use crate::sandboxing::SandboxManager;
 use crate::tools::sandboxing::ApprovalCtx;
+use crate::tools::sandboxing::ProvidesSandboxRetryData;
 use crate::tools::sandboxing::SandboxAttempt;
 use crate::tools::sandboxing::ToolCtx;
 use crate::tools::sandboxing::ToolError;
@@ -38,6 +40,7 @@ impl ToolOrchestrator {
    ) -> Result<Out, ToolError>
    where
        T: ToolRuntime<Rq, Out>,
+        Rq: ProvidesSandboxRetryData,
    {
        let otel = turn_ctx.client.get_otel_event_manager();
        let otel_tn = &tool_ctx.tool_name;
@@ -56,6 +59,7 @@ impl ToolOrchestrator {
                turn: turn_ctx,
                call_id: &tool_ctx.call_id,
                retry_reason: None,
+                risk: None,
            };
            let decision = tool.start_approval_async(req, approval_ctx).await;

@@ -98,21 +102,42 @@ impl ToolOrchestrator {
                        "sandbox denied and no retry".to_string(),
                    ));
                }
-                // Under `Never`, do not retry without sandbox; surface a concise message
+                // Under `Never` or `OnRequest`, do not retry without sandbox; surface a concise message
                // derived from the actual output (platform-agnostic).
-                if matches!(approval_policy, AskForApproval::Never) {
+                if !tool.wants_no_sandbox_approval(approval_policy) {
                    let msg = build_never_denied_message_from_output(output.as_ref());
                    return Err(ToolError::SandboxDenied(msg));
                }

                // Ask for approval before retrying without sandbox.
                if !tool.should_bypass_approval(approval_policy, already_approved) {
+                    let mut risk = None;
+
+                    if let Some(metadata) = req.sandbox_retry_data() {
+                        let err = SandboxErr::Denied {
+                            output: output.clone(),
+                        };
+                        let friendly = get_error_message_ui(&CodexErr::Sandbox(err));
+                        let failure_summary = format!("failed in sandbox: {friendly}");
+
+                        risk = tool_ctx
+                            .session
+                            .assess_sandbox_command(
+                                turn_ctx,
+                                &tool_ctx.call_id,
+                                &metadata.command,
+                                Some(failure_summary.as_str()),
+                            )
+                            .await;
+                    }
+
                    let reason_msg = build_denial_reason_from_output(output.as_ref());
                    let approval_ctx = ApprovalCtx {
                        session: tool_ctx.session,
                        turn: turn_ctx,
                        call_id: &tool_ctx.call_id,
                        retry_reason: Some(reason_msg),
+                        risk,
                    };

                    let decision = tool.start_approval_async(req, approval_ctx).await;
--- a/codex-rs/core/src/tools/parallel.rs
+++ b/codex-rs/core/src/tools/parallel.rs
@@ -2,6 +2,7 @@ use std::sync::Arc;

 use tokio::sync::RwLock;
 use tokio_util::either::Either;
+use tokio_util::sync::CancellationToken;
 use tokio_util::task::AbortOnDropHandle;

 use crate::codex::Session;
@@ -9,8 +10,10 @@ use crate::codex::TurnContext;
 use crate::error::CodexErr;
 use crate::function_tool::FunctionCallError;
 use crate::tools::context::SharedTurnDiffTracker;
+use crate::tools::context::ToolPayload;
 use crate::tools::router::ToolCall;
 use crate::tools::router::ToolRouter;
+use codex_protocol::models::FunctionCallOutputPayload;
 use codex_protocol::models::ResponseInputItem;

 pub(crate) struct ToolCallRuntime {
@@ -40,6 +43,7 @@ impl ToolCallRuntime {
    pub(crate) fn handle_tool_call(
        &self,
        call: ToolCall,
+        cancellation_token: CancellationToken,
    ) -> impl std::future::Future<Output = Result<ResponseInputItem, CodexErr>> {
        let supports_parallel = self.router.tool_supports_parallel(&call.tool_name);

@@ -48,18 +52,24 @@ impl ToolCallRuntime {
        let turn = Arc::clone(&self.turn_context);
        let tracker = Arc::clone(&self.tracker);
        let lock = Arc::clone(&self.parallel_execution);
+        let aborted_response = Self::aborted_response(&call);

        let handle: AbortOnDropHandle<Result<ResponseInputItem, FunctionCallError>> =
            AbortOnDropHandle::new(tokio::spawn(async move {
-                let _guard = if supports_parallel {
-                    Either::Left(lock.read().await)
-                } else {
-                    Either::Right(lock.write().await)
-                };
+                tokio::select! {
+                    _ = cancellation_token.cancelled() => Ok(aborted_response),
+                    res = async {
+                        let _guard = if supports_parallel {
+                            Either::Left(lock.read().await)
+                        } else {
+                            Either::Right(lock.write().await)
+                        };

-                router
-                    .dispatch_tool_call(session, turn, tracker, call)
-                    .await
+                        router
+                            .dispatch_tool_call(session, turn, tracker, call)
+                            .await
+                    } => res,
+                }
            }));

        async move {
@@ -74,3 +84,25 @@ impl ToolCallRuntime {
        }
    }
 }
+
+impl ToolCallRuntime {
+    fn aborted_response(call: &ToolCall) -> ResponseInputItem {
+        match &call.payload {
+            ToolPayload::Custom { .. } => ResponseInputItem::CustomToolCallOutput {
+                call_id: call.call_id.clone(),
+                output: "aborted".to_string(),
+            },
+            ToolPayload::Mcp { .. } => ResponseInputItem::McpToolCallOutput {
+                call_id: call.call_id.clone(),
+                result: Err("aborted".to_string()),
+            },
+            _ => ResponseInputItem::FunctionCallOutput {
+                call_id: call.call_id.clone(),
+                output: FunctionCallOutputPayload {
+                    content: "aborted".to_string(),
+                    success: None,
+                },
+            },
+        }
+    }
+}
--- a/codex-rs/core/src/tools/runtimes/apply_patch.rs
+++ b/codex-rs/core/src/tools/runtimes/apply_patch.rs
@@ -10,13 +10,16 @@ use crate::sandboxing::CommandSpec;
 use crate::sandboxing::execute_env;
 use crate::tools::sandboxing::Approvable;
 use crate::tools::sandboxing::ApprovalCtx;
+use crate::tools::sandboxing::ProvidesSandboxRetryData;
 use crate::tools::sandboxing::SandboxAttempt;
+use crate::tools::sandboxing::SandboxRetryData;
 use crate::tools::sandboxing::Sandboxable;
 use crate::tools::sandboxing::SandboxablePreference;
 use crate::tools::sandboxing::ToolCtx;
 use crate::tools::sandboxing::ToolError;
 use crate::tools::sandboxing::ToolRuntime;
 use crate::tools::sandboxing::with_cached_approval;
+use codex_protocol::protocol::AskForApproval;
 use codex_protocol::protocol::ReviewDecision;
 use futures::future::BoxFuture;
 use std::collections::HashMap;
@@ -31,6 +34,12 @@ pub struct ApplyPatchRequest {
    pub codex_exe: Option<PathBuf>,
 }

+impl ProvidesSandboxRetryData for ApplyPatchRequest {
+    fn sandbox_retry_data(&self) -> Option<SandboxRetryData> {
+        None
+    }
+}
+
 #[derive(Default)]
 pub struct ApplyPatchRuntime;

@@ -105,9 +114,10 @@ impl Approvable<ApplyPatchRequest> for ApplyPatchRuntime {
        let call_id = ctx.call_id.to_string();
        let cwd = req.cwd.clone();
        let retry_reason = ctx.retry_reason.clone();
+        let risk = ctx.risk.clone();
        let user_explicitly_approved = req.user_explicitly_approved;
        Box::pin(async move {
-            with_cached_approval(&session.services, key, || async move {
+            with_cached_approval(&session.services, key, move || async move {
                if let Some(reason) = retry_reason {
                    session
                        .request_command_approval(
@@ -116,6 +126,7 @@ impl Approvable<ApplyPatchRequest> for ApplyPatchRuntime {
                            vec!["apply_patch".to_string()],
                            cwd,
                            Some(reason),
+                            risk,
                        )
                        .await
                } else if user_explicitly_approved {
@@ -127,6 +138,10 @@ impl Approvable<ApplyPatchRequest> for ApplyPatchRuntime {
            .await
        })
    }
+
+    fn wants_no_sandbox_approval(&self, policy: AskForApproval) -> bool {
+        !matches!(policy, AskForApproval::Never)
+    }
 }

 impl ToolRuntime<ApplyPatchRequest, ExecToolCallOutput> for ApplyPatchRuntime {
--- a/codex-rs/core/src/tools/runtimes/shell.rs
+++ b/codex-rs/core/src/tools/runtimes/shell.rs
@@ -12,7 +12,9 @@ use crate::sandboxing::execute_env;
 use crate::tools::runtimes::build_command_spec;
 use crate::tools::sandboxing::Approvable;
 use crate::tools::sandboxing::ApprovalCtx;
+use crate::tools::sandboxing::ProvidesSandboxRetryData;
 use crate::tools::sandboxing::SandboxAttempt;
+use crate::tools::sandboxing::SandboxRetryData;
 use crate::tools::sandboxing::Sandboxable;
 use crate::tools::sandboxing::SandboxablePreference;
 use crate::tools::sandboxing::ToolCtx;
@@ -34,6 +36,15 @@ pub struct ShellRequest {
    pub justification: Option<String>,
 }

+impl ProvidesSandboxRetryData for ShellRequest {
+    fn sandbox_retry_data(&self) -> Option<SandboxRetryData> {
+        Some(SandboxRetryData {
+            command: self.command.clone(),
+            cwd: self.cwd.clone(),
+        })
+    }
+}
+
 #[derive(Default)]
 pub struct ShellRuntime;

@@ -90,13 +101,14 @@ impl Approvable<ShellRequest> for ShellRuntime {
            .retry_reason
            .clone()
            .or_else(|| req.justification.clone());
+        let risk = ctx.risk.clone();
        let session = ctx.session;
        let turn = ctx.turn;
        let call_id = ctx.call_id.to_string();
        Box::pin(async move {
-            with_cached_approval(&session.services, key, || async move {
+            with_cached_approval(&session.services, key, move || async move {
                session
-                    .request_command_approval(turn, call_id, command, cwd, reason)
+                    .request_command_approval(turn, call_id, command, cwd, reason, risk)
                    .await
            })
            .await
--- a/codex-rs/core/src/tools/runtimes/unified_exec.rs
+++ b/codex-rs/core/src/tools/runtimes/unified_exec.rs
@@ -9,7 +9,9 @@ use crate::error::SandboxErr;
 use crate::tools::runtimes::build_command_spec;
 use crate::tools::sandboxing::Approvable;
 use crate::tools::sandboxing::ApprovalCtx;
+use crate::tools::sandboxing::ProvidesSandboxRetryData;
 use crate::tools::sandboxing::SandboxAttempt;
+use crate::tools::sandboxing::SandboxRetryData;
 use crate::tools::sandboxing::Sandboxable;
 use crate::tools::sandboxing::SandboxablePreference;
 use crate::tools::sandboxing::ToolCtx;
@@ -31,6 +33,15 @@ pub struct UnifiedExecRequest {
    pub env: HashMap<String, String>,
 }

+impl ProvidesSandboxRetryData for UnifiedExecRequest {
+    fn sandbox_retry_data(&self) -> Option<SandboxRetryData> {
+        Some(SandboxRetryData {
+            command: self.command.clone(),
+            cwd: self.cwd.clone(),
+        })
+    }
+}
+
 #[derive(serde::Serialize, Clone, Debug, Eq, PartialEq, Hash)]
 pub struct UnifiedExecApprovalKey {
    pub command: Vec<String>,
@@ -85,10 +96,11 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
        let command = req.command.clone();
        let cwd = req.cwd.clone();
        let reason = ctx.retry_reason.clone();
+        let risk = ctx.risk.clone();
        Box::pin(async move {
            with_cached_approval(&session.services, key, || async move {
                session
-                    .request_command_approval(turn, call_id, command, cwd, reason)
+                    .request_command_approval(turn, call_id, command, cwd, reason, risk)
                    .await
            })
            .await
--- a/codex-rs/core/src/tools/sandboxing.rs
+++ b/codex-rs/core/src/tools/sandboxing.rs
@@ -7,6 +7,7 @@
 use crate::codex::Session;
 use crate::codex::TurnContext;
 use crate::error::CodexErr;
+use crate::protocol::SandboxCommandAssessment;
 use crate::protocol::SandboxPolicy;
 use crate::sandboxing::CommandSpec;
 use crate::sandboxing::SandboxManager;
@@ -18,6 +19,7 @@ use std::collections::HashMap;
 use std::fmt::Debug;
 use std::hash::Hash;
 use std::path::Path;
+use std::path::PathBuf;

 use futures::Future;
 use futures::future::BoxFuture;
@@ -81,6 +83,7 @@ pub(crate) struct ApprovalCtx<'a> {
    pub turn: &'a TurnContext,
    pub call_id: &'a str,
    pub retry_reason: Option<String>,
+    pub risk: Option<SandboxCommandAssessment>,
 }

 pub(crate) trait Approvable<Req> {
@@ -121,6 +124,11 @@ pub(crate) trait Approvable<Req> {
        }
    }

+    /// Decide we can request an approval for no-sandbox execution.
+    fn wants_no_sandbox_approval(&self, policy: AskForApproval) -> bool {
+        !matches!(policy, AskForApproval::Never | AskForApproval::OnRequest)
+    }
+
    fn start_approval_async<'a>(
        &'a mut self,
        req: &'a Req,
@@ -151,6 +159,17 @@ pub(crate) struct ToolCtx<'a> {
    pub tool_name: String,
 }

+/// Captures the command metadata needed to re-run a tool request without sandboxing.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub(crate) struct SandboxRetryData {
+    pub command: Vec<String>,
+    pub cwd: PathBuf,
+}
+
+pub(crate) trait ProvidesSandboxRetryData {
+    fn sandbox_retry_data(&self) -> Option<SandboxRetryData>;
+}
+
 #[derive(Debug)]
 pub(crate) enum ToolError {
    Rejected(String),
--- a/codex-rs/core/src/truncate.rs
+++ b/codex-rs/core/src/truncate.rs
@@ -1,18 +1,35 @@
 //! Utilities for truncating large chunks of output while preserving a prefix
 //! and suffix on UTF-8 boundaries.

+use codex_utils_tokenizer::Tokenizer;
+
 /// Truncate the middle of a UTF-8 string to at most `max_bytes` bytes,
 /// preserving the beginning and the end. Returns the possibly truncated
-/// string and `Some(original_token_count)` (estimated at 4 bytes/token)
+/// string and `Some(original_token_count)` (counted with the local tokenizer;
+/// falls back to a 4-bytes-per-token estimate if the tokenizer cannot load)
 /// if truncation occurred; otherwise returns the original string and `None`.
 pub(crate) fn truncate_middle(s: &str, max_bytes: usize) -> (String, Option<u64>) {
    if s.len() <= max_bytes {
        return (s.to_string(), None);
    }

-    let est_tokens = (s.len() as u64).div_ceil(4);
+    // Build a tokenizer for counting (default to o200k_base; fall back to cl100k_base).
+    // If both fail, fall back to a 4-bytes-per-token estimate.
+    let tok = Tokenizer::try_default().ok();
+    let token_count = |text: &str| -> u64 {
+        if let Some(ref t) = tok {
+            t.count(text) as u64
+        } else {
+            (text.len() as u64).div_ceil(4)
+        }
+    };
+
+    let total_tokens = token_count(s);
    if max_bytes == 0 {
-        return (format!("…{est_tokens} tokens truncated…"), Some(est_tokens));
+        return (
+            format!("…{total_tokens} tokens truncated…"),
+            Some(total_tokens),
+        );
    }

    fn truncate_on_boundary(input: &str, max_len: usize) -> &str {
@@ -50,13 +67,17 @@ pub(crate) fn truncate_middle(s: &str, max_bytes: usize) -> (String, Option<u64>
        idx
    }

-    let mut guess_tokens = est_tokens;
+    // Iterate to stabilize marker length → keep budget → boundaries.
+    let mut guess_tokens: u64 = 1;
    for _ in 0..4 {
        let marker = format!("…{guess_tokens} tokens truncated…");
        let marker_len = marker.len();
        let keep_budget = max_bytes.saturating_sub(marker_len);
        if keep_budget == 0 {
-            return (format!("…{est_tokens} tokens truncated…"), Some(est_tokens));
+            return (
+                format!("…{total_tokens} tokens truncated…"),
+                Some(total_tokens),
+            );
        }

        let left_budget = keep_budget / 2;
@@ -67,59 +88,72 @@ pub(crate) fn truncate_middle(s: &str, max_bytes: usize) -> (String, Option<u64>
            suffix_start = prefix_end;
        }

-        let kept_content_bytes = prefix_end + (s.len() - suffix_start);
-        let truncated_content_bytes = s.len().saturating_sub(kept_content_bytes);
-        let new_tokens = (truncated_content_bytes as u64).div_ceil(4);
+        // Tokens actually removed (middle slice) using the real tokenizer.
+        let removed_tokens = token_count(&s[prefix_end..suffix_start]);

-        if new_tokens == guess_tokens {
-            let mut out = String::with_capacity(marker_len + kept_content_bytes + 1);
+        // If the number of digits in the token count does not change the marker length,
+        // we can finalize output.
+        let final_marker = format!("…{removed_tokens} tokens truncated…");
+        if final_marker.len() == marker_len {
+            let kept_content_bytes = prefix_end + (s.len() - suffix_start);
+            let mut out = String::with_capacity(final_marker.len() + kept_content_bytes + 1);
            out.push_str(&s[..prefix_end]);
-            out.push_str(&marker);
+            out.push_str(&final_marker);
            out.push('\n');
            out.push_str(&s[suffix_start..]);
-            return (out, Some(est_tokens));
+            return (out, Some(total_tokens));
        }

-        guess_tokens = new_tokens;
+        guess_tokens = removed_tokens;
    }

+    // Fallback build after iterations: compute with the last guess.
    let marker = format!("…{guess_tokens} tokens truncated…");
    let marker_len = marker.len();
    let keep_budget = max_bytes.saturating_sub(marker_len);
    if keep_budget == 0 {
-        return (format!("…{est_tokens} tokens truncated…"), Some(est_tokens));
+        return (
+            format!("…{total_tokens} tokens truncated…"),
+            Some(total_tokens),
+        );
    }

    let left_budget = keep_budget / 2;
    let right_budget = keep_budget - left_budget;
    let prefix_end = pick_prefix_end(s, left_budget);
-    let suffix_start = pick_suffix_start(s, right_budget);
+    let mut suffix_start = pick_suffix_start(s, right_budget);
+    if suffix_start < prefix_end {
+        suffix_start = prefix_end;
+    }

    let mut out = String::with_capacity(marker_len + prefix_end + (s.len() - suffix_start) + 1);
    out.push_str(&s[..prefix_end]);
    out.push_str(&marker);
    out.push('\n');
    out.push_str(&s[suffix_start..]);
-    (out, Some(est_tokens))
+    (out, Some(total_tokens))
 }

 #[cfg(test)]
 mod tests {
    use super::truncate_middle;
+    use codex_utils_tokenizer::Tokenizer;

    #[test]
    fn truncate_middle_no_newlines_fallback() {
+        let tok = Tokenizer::try_default().expect("load tokenizer");
        let s = "abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ*";
        let max_bytes = 32;
        let (out, original) = truncate_middle(s, max_bytes);
        assert!(out.starts_with("abc"));
        assert!(out.contains("tokens truncated"));
        assert!(out.ends_with("XYZ*"));
-        assert_eq!(original, Some((s.len() as u64).div_ceil(4)));
+        assert_eq!(original, Some(tok.count(s) as u64));
    }

    #[test]
    fn truncate_middle_prefers_newline_boundaries() {
+        let tok = Tokenizer::try_default().expect("load tokenizer");
        let mut s = String::new();
        for i in 1..=20 {
            s.push_str(&format!("{i:03}\n"));
@@ -131,50 +165,36 @@ mod tests {
        assert!(out.starts_with("001\n002\n003\n004\n"));
        assert!(out.contains("tokens truncated"));
        assert!(out.ends_with("017\n018\n019\n020\n"));
-        assert_eq!(tokens, Some(20));
+        assert_eq!(tokens, Some(tok.count(&s) as u64));
    }

    #[test]
    fn truncate_middle_handles_utf8_content() {
+        let tok = Tokenizer::try_default().expect("load tokenizer");
        let s = "😀😀😀😀😀😀😀😀😀😀\nsecond line with ascii text\n";
        let max_bytes = 32;
        let (out, tokens) = truncate_middle(s, max_bytes);

        assert!(out.contains("tokens truncated"));
        assert!(!out.contains('\u{fffd}'));
-        assert_eq!(tokens, Some((s.len() as u64).div_ceil(4)));
+        assert_eq!(tokens, Some(tok.count(s) as u64));
    }

    #[test]
    fn truncate_middle_prefers_newline_boundaries_2() {
+        let tok = Tokenizer::try_default().expect("load tokenizer");
        // Build a multi-line string of 20 numbered lines (each "NNN\n").
        let mut s = String::new();
        for i in 1..=20 {
            s.push_str(&format!("{i:03}\n"));
        }
-        // Total length: 20 lines * 4 bytes per line = 80 bytes.
        assert_eq!(s.len(), 80);

-        // Choose a cap that forces truncation while leaving room for
-        // a few lines on each side after accounting for the marker.
        let max_bytes = 64;
-        // Expect exact output: first 4 lines, marker, last 4 lines, and correct token estimate (80/4 = 20).
-        assert_eq!(
-            truncate_middle(&s, max_bytes),
-            (
-                r#"001
-002
-003
-004
-…12 tokens truncated…
-017
-018
-019
-020
-"#
-                .to_string(),
-                Some(20)
-            )
-        );
+        let (out, total) = truncate_middle(&s, max_bytes);
+        assert!(out.starts_with("001\n002\n003\n004\n"));
+        assert!(out.contains("tokens truncated"));
+        assert!(out.ends_with("017\n018\n019\n020\n"));
+        assert_eq!(total, Some(tok.count(&s) as u64));
    }
 }
--- a/codex-rs/core/src/unified_exec/mod.rs
+++ b/codex-rs/core/src/unified_exec/mod.rs
@@ -22,6 +22,8 @@
 //! - `session_manager.rs`: orchestration (approvals, sandboxing, reuse) and request handling.

 use std::collections::HashMap;
+use std::path::PathBuf;
+use std::sync::Arc;
 use std::sync::atomic::AtomicI32;
 use std::time::Duration;

@@ -45,10 +47,20 @@ pub(crate) const MAX_YIELD_TIME_MS: u64 = 30_000;
 pub(crate) const DEFAULT_MAX_OUTPUT_TOKENS: usize = 10_000;
 pub(crate) const UNIFIED_EXEC_OUTPUT_MAX_BYTES: usize = 1024 * 1024; // 1 MiB

-pub(crate) struct UnifiedExecContext<'a> {
-    pub session: &'a Session,
-    pub turn: &'a TurnContext,
-    pub call_id: &'a str,
+pub(crate) struct UnifiedExecContext {
+    pub session: Arc<Session>,
+    pub turn: Arc<TurnContext>,
+    pub call_id: String,
+}
+
+impl UnifiedExecContext {
+    pub fn new(session: Arc<Session>, turn: Arc<TurnContext>, call_id: String) -> Self {
+        Self {
+            session,
+            turn,
+            call_id,
+        }
+    }
 }

 #[derive(Debug)]
@@ -70,6 +82,7 @@ pub(crate) struct WriteStdinRequest<'a> {

 #[derive(Debug, Clone, PartialEq)]
 pub(crate) struct UnifiedExecResponse {
+    pub event_call_id: String,
    pub chunk_id: String,
    pub wall_time: Duration,
    pub output: String,
@@ -78,10 +91,20 @@ pub(crate) struct UnifiedExecResponse {
    pub original_token_count: Option<usize>,
 }

-#[derive(Debug, Default)]
+#[derive(Default)]
 pub(crate) struct UnifiedExecSessionManager {
    next_session_id: AtomicI32,
-    sessions: Mutex<HashMap<i32, session::UnifiedExecSession>>,
+    sessions: Mutex<HashMap<i32, SessionEntry>>,
+}
+
+struct SessionEntry {
+    session: session::UnifiedExecSession,
+    session_ref: Arc<Session>,
+    turn_ref: Arc<TurnContext>,
+    call_id: String,
+    command: String,
+    cwd: PathBuf,
+    started_at: tokio::time::Instant,
 }

 pub(crate) fn clamp_yield_time(yield_time_ms: Option<u64>) -> u64 {
@@ -163,11 +186,8 @@ mod tests {
        cmd: &str,
        yield_time_ms: Option<u64>,
    ) -> Result<UnifiedExecResponse, UnifiedExecError> {
-        let context = UnifiedExecContext {
-            session,
-            turn: turn.as_ref(),
-            call_id: "call",
-        };
+        let context =
+            UnifiedExecContext::new(Arc::clone(session), Arc::clone(turn), "call".to_string());

        session
            .services
--- a/codex-rs/core/src/unified_exec/session_manager.rs
+++ b/codex-rs/core/src/unified_exec/session_manager.rs
@@ -5,8 +5,13 @@ use tokio::sync::mpsc;
 use tokio::time::Duration;
 use tokio::time::Instant;

+use crate::exec::ExecToolCallOutput;
+use crate::exec::StreamOutput;
 use crate::exec_env::create_env;
 use crate::sandboxing::ExecEnv;
+use crate::tools::events::ToolEmitter;
+use crate::tools::events::ToolEventCtx;
+use crate::tools::events::ToolEventStage;
 use crate::tools::orchestrator::ToolOrchestrator;
 use crate::tools::runtimes::unified_exec::UnifiedExecRequest as UnifiedExecToolRequest;
 use crate::tools::runtimes::unified_exec::UnifiedExecRuntime;
@@ -14,6 +19,7 @@ use crate::tools::sandboxing::ToolCtx;

 use super::ExecCommandRequest;
 use super::MIN_YIELD_TIME_MS;
+use super::SessionEntry;
 use super::UnifiedExecContext;
 use super::UnifiedExecError;
 use super::UnifiedExecResponse;
@@ -30,7 +36,7 @@ impl UnifiedExecSessionManager {
    pub(crate) async fn exec_command(
        &self,
        request: ExecCommandRequest<'_>,
-        context: &UnifiedExecContext<'_>,
+        context: &UnifiedExecContext,
    ) -> Result<UnifiedExecResponse, UnifiedExecError> {
        let shell_flag = if request.login { "-lc" } else { "-c" };
        let command = vec![
@@ -59,17 +65,36 @@ impl UnifiedExecSessionManager {
        let session_id = if session.has_exited() {
            None
        } else {
-            Some(self.store_session(session).await)
+            Some(
+                self.store_session(session, context, request.command, start)
+                    .await,
+            )
        };

-        Ok(UnifiedExecResponse {
+        let response = UnifiedExecResponse {
+            event_call_id: context.call_id.clone(),
            chunk_id,
            wall_time,
            output,
            session_id,
            exit_code,
            original_token_count,
-        })
+        };
+
+        // If the command completed during this call, emit an ExecCommandEnd via the emitter.
+        if response.session_id.is_none() {
+            let exit = response.exit_code.unwrap_or(-1);
+            Self::emit_exec_end_from_context(
+                context,
+                request.command.to_string(),
+                response.output.clone(),
+                exit,
+                response.wall_time,
+            )
+            .await;
+        }
+
+        Ok(response)
    }

    pub(crate) async fn write_stdin(
@@ -98,37 +123,60 @@ impl UnifiedExecSessionManager {
        let (output, original_token_count) = truncate_output_to_tokens(&text, max_tokens);
        let chunk_id = generate_chunk_id();

-        let (session_id, exit_code) = self.refresh_session_state(session_id).await;
+        let status = self.refresh_session_state(session_id).await;
+        let (session_id, exit_code, completion_entry, event_call_id) = match status {
+            SessionStatus::Alive { exit_code, call_id } => {
+                (Some(session_id), exit_code, None, call_id)
+            }
+            SessionStatus::Exited { exit_code, entry } => {
+                let call_id = entry.call_id.clone();
+                (None, exit_code, Some(*entry), call_id)
+            }
+            SessionStatus::Unknown => {
+                return Err(UnifiedExecError::UnknownSessionId { session_id });
+            }
+        };

-        Ok(UnifiedExecResponse {
+        let response = UnifiedExecResponse {
+            event_call_id,
            chunk_id,
            wall_time,
            output,
            session_id,
            exit_code,
            original_token_count,
-        })
-    }
+        };

-    async fn refresh_session_state(&self, session_id: i32) -> (Option<i32>, Option<i32>) {
-        let mut sessions = self.sessions.lock().await;
-        if !sessions.contains_key(&session_id) {
-            return (None, None);
+        if let (Some(exit), Some(entry)) = (response.exit_code, completion_entry) {
+            let total_duration = Instant::now().saturating_duration_since(entry.started_at);
+            Self::emit_exec_end_from_entry(entry, response.output.clone(), exit, total_duration)
+                .await;
        }

-        let has_exited = sessions
-            .get(&session_id)
-            .map(UnifiedExecSession::has_exited)
-            .unwrap_or(false);
-        let exit_code = sessions
-            .get(&session_id)
-            .and_then(UnifiedExecSession::exit_code);
+        Ok(response)
+    }

-        if has_exited {
-            sessions.remove(&session_id);
-            (None, exit_code)
+    async fn refresh_session_state(&self, session_id: i32) -> SessionStatus {
+        let mut sessions = self.sessions.lock().await;
+        let Some(entry) = sessions.get(&session_id) else {
+            return SessionStatus::Unknown;
+        };
+
+        let exit_code = entry.session.exit_code();
+
+        if entry.session.has_exited() {
+            let Some(entry) = sessions.remove(&session_id) else {
+                return SessionStatus::Unknown;
+            };
+            SessionStatus::Exited {
+                exit_code,
+                entry: Box::new(entry),
+            }
        } else {
-            (Some(session_id), exit_code)
+            SessionStatus::Alive {
+                exit_code,
+                call_id: entry.call_id.clone(),
+            }
        }
    }

@@ -138,9 +186,9 @@ impl UnifiedExecSessionManager {
    ) -> Result<(mpsc::Sender<Vec<u8>>, OutputBuffer, Arc<Notify>), UnifiedExecError> {
        let sessions = self.sessions.lock().await;
        let (output_buffer, output_notify, writer_tx) =
-            if let Some(session) = sessions.get(&session_id) {
-                let (buffer, notify) = session.output_handles();
-                (buffer, notify, session.writer_sender())
+            if let Some(entry) = sessions.get(&session_id) {
+                let (buffer, notify) = entry.session.output_handles();
+                (buffer, notify, entry.session.writer_sender())
            } else {
                return Err(UnifiedExecError::UnknownSessionId { session_id });
            };
@@ -158,14 +206,82 @@ impl UnifiedExecSessionManager {
            .map_err(|_| UnifiedExecError::WriteToStdin)
    }

-    async fn store_session(&self, session: UnifiedExecSession) -> i32 {
+    async fn store_session(
+        &self,
+        session: UnifiedExecSession,
+        context: &UnifiedExecContext,
+        command: &str,
+        started_at: Instant,
+    ) -> i32 {
        let session_id = self
            .next_session_id
            .fetch_add(1, std::sync::atomic::Ordering::SeqCst);
-        self.sessions.lock().await.insert(session_id, session);
+        let entry = SessionEntry {
+            session,
+            session_ref: Arc::clone(&context.session),
+            turn_ref: Arc::clone(&context.turn),
+            call_id: context.call_id.clone(),
+            command: command.to_string(),
+            cwd: context.turn.cwd.clone(),
+            started_at,
+        };
+        self.sessions.lock().await.insert(session_id, entry);
        session_id
    }

+    async fn emit_exec_end_from_entry(
+        entry: SessionEntry,
+        aggregated_output: String,
+        exit_code: i32,
+        duration: Duration,
+    ) {
+        let output = ExecToolCallOutput {
+            exit_code,
+            stdout: StreamOutput::new(aggregated_output.clone()),
+            stderr: StreamOutput::new(String::new()),
+            aggregated_output: StreamOutput::new(aggregated_output),
+            duration,
+            timed_out: false,
+        };
+        let event_ctx = ToolEventCtx::new(
+            entry.session_ref.as_ref(),
+            entry.turn_ref.as_ref(),
+            &entry.call_id,
+            None,
+        );
+        let emitter = ToolEmitter::unified_exec(entry.command, entry.cwd, true);
+        emitter
+            .emit(event_ctx, ToolEventStage::Success(output))
+            .await;
+    }
+
+    async fn emit_exec_end_from_context(
+        context: &UnifiedExecContext,
+        command: String,
+        aggregated_output: String,
+        exit_code: i32,
+        duration: Duration,
+    ) {
+        let output = ExecToolCallOutput {
+            exit_code,
+            stdout: StreamOutput::new(aggregated_output.clone()),
+            stderr: StreamOutput::new(String::new()),
+            aggregated_output: StreamOutput::new(aggregated_output),
+            duration,
+            timed_out: false,
+        };
+        let event_ctx = ToolEventCtx::new(
+            context.session.as_ref(),
+            context.turn.as_ref(),
+            &context.call_id,
+            None,
+        );
+        let emitter = ToolEmitter::unified_exec(command, context.turn.cwd.clone(), true);
+        emitter
+            .emit(event_ctx, ToolEventStage::Success(output))
+            .await;
+    }
+
    pub(crate) async fn open_session_with_exec_env(
        &self,
        env: &ExecEnv,
@@ -184,7 +300,7 @@ impl UnifiedExecSessionManager {
    pub(super) async fn open_session_with_sandbox(
        &self,
        command: Vec<String>,
-        context: &UnifiedExecContext<'_>,
+        context: &UnifiedExecContext,
    ) -> Result<UnifiedExecSession, UnifiedExecError> {
        let mut orchestrator = ToolOrchestrator::new();
        let mut runtime = UnifiedExecRuntime::new(self);
@@ -194,9 +310,9 @@ impl UnifiedExecSessionManager {
            create_env(&context.turn.shell_environment_policy),
        );
        let tool_ctx = ToolCtx {
-            session: context.session,
-            turn: context.turn,
-            call_id: context.call_id.to_string(),
+            session: context.session.as_ref(),
+            turn: context.turn.as_ref(),
+            call_id: context.call_id.clone(),
            tool_name: "exec_command".to_string(),
        };
        orchestrator
@@ -204,7 +320,7 @@ impl UnifiedExecSessionManager {
                &mut runtime,
                &req,
                &tool_ctx,
-                context.turn,
+                context.turn.as_ref(),
                context.turn.approval_policy,
            )
            .await
@@ -255,3 +371,15 @@ impl UnifiedExecSessionManager {
        collected
    }
 }
+
+enum SessionStatus {
+    Alive {
+        exit_code: Option<i32>,
+        call_id: String,
+    },
+    Exited {
+        exit_code: Option<i32>,
+        entry: Box<SessionEntry>,
+    },
+    Unknown,
+}
--- a/codex-rs/core/templates/sandboxing/assessment_prompt.md
+++ b/codex-rs/core/templates/sandboxing/assessment_prompt.md
@@ -0,0 +1,27 @@
+You are a security analyst evaluating shell commands that were blocked by a sandbox. Given the provided metadata, summarize the command's likely intent and assess the risk. Return strictly valid JSON with the keys:
+- description (concise summary, at most two sentences)
+- risk_level ("low", "medium", or "high")
+- risk_categories (optional array of zero or more category strings)
+Risk level examples:
+- low: read-only inspections, listing files, printing configuration
+- medium: modifying project files, installing dependencies, fetching artifacts from trusted sources
+- high: deleting or overwriting data, exfiltrating secrets, escalating privileges, or disabling security controls
+Recognized risk_categories: data_deletion, data_exfiltration, privilege_escalation, system_modification, network_access, resource_exhaustion, compliance.
+Use multiple categories when appropriate.
+If information is insufficient, choose the most cautious risk level supported by the evidence.
+Respond with JSON only, without markdown code fences or extra commentary.
+
+---
+
+Command metadata:
+Platform: {{ platform }}
+Sandbox policy: {{ sandbox_policy }}
+{% if let Some(roots) = filesystem_roots %}
+Filesystem roots: {{ roots }}
+{% endif %}
+Working directory: {{ working_directory }}
+Command argv: {{ command_argv }}
+Command (joined): {{ command_joined }}
+{% if let Some(message) = sandbox_failure_message %}
+Sandbox failure message: {{ message }}
+{% endif %}
--- a/codex-rs/core/tests/common/Cargo.toml
+++ b/codex-rs/core/tests/common/Cargo.toml
@@ -10,6 +10,7 @@ path = "lib.rs"
 anyhow = { workspace = true }
 assert_cmd = { workspace = true }
 codex-core = { workspace = true }
+codex-protocol = { workspace = true }
 notify = { workspace = true }
 regex-lite = { workspace = true }
 serde_json = { workspace = true }
--- a/codex-rs/core/tests/common/responses.rs
+++ b/codex-rs/core/tests/common/responses.rs
@@ -35,6 +35,22 @@ impl ResponseMock {
    pub fn requests(&self) -> Vec<ResponsesRequest> {
        self.requests.lock().unwrap().clone()
    }
+
+    /// Returns true if any captured request contains a `function_call` with the
+    /// provided `call_id`.
+    pub fn saw_function_call(&self, call_id: &str) -> bool {
+        self.requests()
+            .iter()
+            .any(|req| req.has_function_call(call_id))
+    }
+
+    /// Returns the `output` string for a matching `function_call_output` with
+    /// the provided `call_id`, searching across all captured requests.
+    pub fn function_call_output_text(&self, call_id: &str) -> Option<String> {
+        self.requests()
+            .iter()
+            .find_map(|req| req.function_call_output_text(call_id))
+    }
 }

 #[derive(Debug, Clone)]
@@ -70,6 +86,28 @@ impl ResponsesRequest {
            .unwrap_or_else(|| panic!("function call output {call_id} item not found in request"))
    }

+    /// Returns true if this request's `input` contains a `function_call` with
+    /// the specified `call_id`.
+    pub fn has_function_call(&self, call_id: &str) -> bool {
+        self.input().iter().any(|item| {
+            item.get("type").and_then(Value::as_str) == Some("function_call")
+                && item.get("call_id").and_then(Value::as_str) == Some(call_id)
+        })
+    }
+
+    /// If present, returns the `output` string of the `function_call_output`
+    /// entry matching `call_id` in this request's `input`.
+    pub fn function_call_output_text(&self, call_id: &str) -> Option<String> {
+        let binding = self.input();
+        let item = binding.iter().find(|item| {
+            item.get("type").and_then(Value::as_str) == Some("function_call_output")
+                && item.get("call_id").and_then(Value::as_str) == Some(call_id)
+        })?;
+        item.get("output")
+            .and_then(Value::as_str)
+            .map(str::to_string)
+    }
+
    pub fn header(&self, name: &str) -> Option<String> {
        self.0
            .headers
--- a/codex-rs/core/tests/common/test_codex.rs
+++ b/codex-rs/core/tests/common/test_codex.rs
@@ -1,17 +1,30 @@
 use std::mem::swap;
+use std::path::Path;
 use std::path::PathBuf;
 use std::sync::Arc;

+use anyhow::Result;
 use codex_core::CodexAuth;
 use codex_core::CodexConversation;
 use codex_core::ConversationManager;
 use codex_core::ModelProviderInfo;
 use codex_core::built_in_model_providers;
 use codex_core::config::Config;
+use codex_core::features::Feature;
+use codex_core::protocol::AskForApproval;
+use codex_core::protocol::EventMsg;
+use codex_core::protocol::Op;
+use codex_core::protocol::SandboxPolicy;
 use codex_core::protocol::SessionConfiguredEvent;
+use codex_protocol::config_types::ReasoningSummary;
+use codex_protocol::user_input::UserInput;
+use serde_json::Value;
 use tempfile::TempDir;
+use wiremock::MockServer;

 use crate::load_default_config_for_test;
+use crate::responses::start_mock_server;
+use crate::wait_for_event;

 type ConfigMutator = dyn FnOnce(&mut Config) + Send;

@@ -96,6 +109,12 @@ impl TestCodexBuilder {
            mutator(&mut config);
        }

+        if config.include_apply_patch_tool {
+            config.features.enable(Feature::ApplyPatchFreeform);
+        } else {
+            config.features.disable(Feature::ApplyPatchFreeform);
+        }
+
        Ok((config, cwd))
    }
 }
@@ -107,6 +126,139 @@ pub struct TestCodex {
    pub session_configured: SessionConfiguredEvent,
 }

+impl TestCodex {
+    pub fn cwd_path(&self) -> &Path {
+        self.cwd.path()
+    }
+
+    pub fn workspace_path(&self, rel: impl AsRef<Path>) -> PathBuf {
+        self.cwd_path().join(rel)
+    }
+
+    pub async fn submit_turn(&self, prompt: &str) -> Result<()> {
+        self.submit_turn_with_policy(prompt, SandboxPolicy::DangerFullAccess)
+            .await
+    }
+
+    pub async fn submit_turn_with_policy(
+        &self,
+        prompt: &str,
+        sandbox_policy: SandboxPolicy,
+    ) -> Result<()> {
+        let session_model = self.session_configured.model.clone();
+        self.codex
+            .submit(Op::UserTurn {
+                items: vec![UserInput::Text {
+                    text: prompt.into(),
+                }],
+                final_output_json_schema: None,
+                cwd: self.cwd.path().to_path_buf(),
+                approval_policy: AskForApproval::Never,
+                sandbox_policy,
+                model: session_model,
+                effort: None,
+                summary: ReasoningSummary::Auto,
+            })
+            .await?;
+
+        wait_for_event(&self.codex, |event| {
+            matches!(event, EventMsg::TaskComplete(_))
+        })
+        .await;
+        Ok(())
+    }
+}
+
+pub struct TestCodexHarness {
+    server: MockServer,
+    test: TestCodex,
+}
+
+impl TestCodexHarness {
+    pub async fn new() -> Result<Self> {
+        Self::with_builder(test_codex()).await
+    }
+
+    pub async fn with_config(mutator: impl FnOnce(&mut Config) + Send + 'static) -> Result<Self> {
+        Self::with_builder(test_codex().with_config(mutator)).await
+    }
+
+    pub async fn with_builder(mut builder: TestCodexBuilder) -> Result<Self> {
+        let server = start_mock_server().await;
+        let test = builder.build(&server).await?;
+        Ok(Self { server, test })
+    }
+
+    pub fn server(&self) -> &MockServer {
+        &self.server
+    }
+
+    pub fn test(&self) -> &TestCodex {
+        &self.test
+    }
+
+    pub fn cwd(&self) -> &Path {
+        self.test.cwd_path()
+    }
+
+    pub fn path(&self, rel: impl AsRef<Path>) -> PathBuf {
+        self.test.workspace_path(rel)
+    }
+
+    pub async fn submit(&self, prompt: &str) -> Result<()> {
+        self.test.submit_turn(prompt).await
+    }
+
+    pub async fn submit_with_policy(
+        &self,
+        prompt: &str,
+        sandbox_policy: SandboxPolicy,
+    ) -> Result<()> {
+        self.test
+            .submit_turn_with_policy(prompt, sandbox_policy)
+            .await
+    }
+
+    pub async fn request_bodies(&self) -> Vec<Value> {
+        self.server
+            .received_requests()
+            .await
+            .expect("requests")
+            .into_iter()
+            .map(|req| serde_json::from_slice(&req.body).expect("request body json"))
+            .collect()
+    }
+
+    pub async fn function_call_output_value(&self, call_id: &str) -> Value {
+        let bodies = self.request_bodies().await;
+        function_call_output(&bodies, call_id).clone()
+    }
+
+    pub async fn function_call_stdout(&self, call_id: &str) -> String {
+        self.function_call_output_value(call_id)
+            .await
+            .get("output")
+            .and_then(Value::as_str)
+            .expect("output string")
+            .to_string()
+    }
+}
+
+fn function_call_output<'a>(bodies: &'a [Value], call_id: &str) -> &'a Value {
+    for body in bodies {
+        if let Some(items) = body.get("input").and_then(Value::as_array) {
+            for item in items {
+                if item.get("type").and_then(Value::as_str) == Some("function_call_output")
+                    && item.get("call_id").and_then(Value::as_str) == Some(call_id)
+                {
+                    return item;
+                }
+            }
+        }
+    }
+    panic!("function_call_output {call_id} not found");
+}
+
 pub fn test_codex() -> TestCodexBuilder {
    TestCodexBuilder {
        config_mutators: vec![],
--- a/codex-rs/core/tests/suite/abort_tasks.rs
+++ b/codex-rs/core/tests/suite/abort_tasks.rs
@@ -1,3 +1,4 @@
+use std::sync::Arc;
 use std::time::Duration;

 use codex_core::protocol::EventMsg;
@@ -5,7 +6,9 @@ use codex_core::protocol::Op;
 use codex_protocol::user_input::UserInput;
 use core_test_support::responses::ev_completed;
 use core_test_support::responses::ev_function_call;
+use core_test_support::responses::ev_response_created;
 use core_test_support::responses::mount_sse_once;
+use core_test_support::responses::mount_sse_sequence;
 use core_test_support::responses::sse;
 use core_test_support::responses::start_mock_server;
 use core_test_support::test_codex::test_codex;
@@ -67,3 +70,98 @@ async fn interrupt_long_running_tool_emits_turn_aborted() {
    )
    .await;
 }
+
+/// After an interrupt we expect the next request to the model to include both
+/// the original tool call and an `"aborted"` `function_call_output`. This test
+/// exercises the follow-up flow: it sends another user turn, inspects the mock
+/// responses server, and ensures the model receives the synthesized abort.
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn interrupt_tool_records_history_entries() {
+    let command = vec![
+        "bash".to_string(),
+        "-lc".to_string(),
+        "sleep 60".to_string(),
+    ];
+    let call_id = "call-history";
+
+    let args = json!({
+        "command": command,
+        "timeout_ms": 60_000
+    })
+    .to_string();
+    let first_body = sse(vec![
+        ev_response_created("resp-history"),
+        ev_function_call(call_id, "shell", &args),
+        ev_completed("resp-history"),
+    ]);
+    let follow_up_body = sse(vec![
+        ev_response_created("resp-followup"),
+        ev_completed("resp-followup"),
+    ]);
+
+    let server = start_mock_server().await;
+    let response_mock = mount_sse_sequence(&server, vec![first_body, follow_up_body]).await;
+
+    let fixture = test_codex().build(&server).await.unwrap();
+    let codex = Arc::clone(&fixture.codex);
+
+    let wait_timeout = Duration::from_millis(100);
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![UserInput::Text {
+                text: "start history recording".into(),
+            }],
+        })
+        .await
+        .unwrap();
+
+    wait_for_event_with_timeout(
+        &codex,
+        |ev| matches!(ev, EventMsg::ExecCommandBegin(_)),
+        wait_timeout,
+    )
+    .await;
+
+    codex.submit(Op::Interrupt).await.unwrap();
+
+    wait_for_event_with_timeout(
+        &codex,
+        |ev| matches!(ev, EventMsg::TurnAborted(_)),
+        wait_timeout,
+    )
+    .await;
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![UserInput::Text {
+                text: "follow up".into(),
+            }],
+        })
+        .await
+        .unwrap();
+
+    wait_for_event_with_timeout(
+        &codex,
+        |ev| matches!(ev, EventMsg::TaskComplete(_)),
+        wait_timeout,
+    )
+    .await;
+
+    let requests = response_mock.requests();
+    assert!(
+        requests.len() == 2,
+        "expected two calls to the responses API, got {}",
+        requests.len()
+    );
+
+    assert!(
+        response_mock.saw_function_call(call_id),
+        "function call not recorded in responses payload"
+    );
+    assert_eq!(
+        response_mock.function_call_output_text(call_id).as_deref(),
+        Some("aborted"),
+        "aborted function call output not recorded in responses payload"
+    );
+}
--- a/codex-rs/core/tests/suite/apply_patch_cli.rs
+++ b/codex-rs/core/tests/suite/apply_patch_cli.rs
--- a/codex-rs/core/tests/suite/client.rs
+++ b/codex-rs/core/tests/suite/client.rs
@@ -247,7 +247,11 @@ async fn resume_includes_initial_messages_and_sends_prior_items() {
        session_configured,
        ..
    } = conversation_manager
-        .resume_conversation_from_rollout(config, session_path.clone(), auth_manager)
+        .resume_conversation_from_rollout(
+            config.clone(),
+            session_path.clone(),
+            auth_manager.clone(),
+        )
        .await
        .expect("resume conversation");

@@ -260,6 +264,23 @@ async fn resume_includes_initial_messages_and_sends_prior_items() {
    let expected_initial_json = json!([]);
    assert_eq!(initial_json, expected_initial_json);

+    let NewConversation {
+        conversation: codex_again,
+        session_configured: session_configured_again,
+        ..
+    } = conversation_manager
+        .resume_conversation_from_rollout(
+            config.clone(),
+            session_path.clone(),
+            auth_manager.clone(),
+        )
+        .await
+        .expect("resume existing conversation");
+    assert!(Arc::ptr_eq(&codex, &codex_again));
+    let session_configured_json = serde_json::to_value(&session_configured).unwrap();
+    let session_configured_again_json = serde_json::to_value(&session_configured_again).unwrap();
+    assert_eq!(session_configured_json, session_configured_again_json);
+
    // 2) Submit new input; the request body must include the prior item followed by the new user input.
    codex
        .submit(Op::UserInput {
--- a/codex-rs/core/tests/suite/mod.rs
+++ b/codex-rs/core/tests/suite/mod.rs
@@ -3,6 +3,8 @@
 #[cfg(not(target_os = "windows"))]
 mod abort_tasks;
 #[cfg(not(target_os = "windows"))]
+mod apply_patch_cli;
+#[cfg(not(target_os = "windows"))]
 mod approvals;
 mod cli_stream;
 mod client;
--- a/codex-rs/core/tests/suite/rollout_list_find.rs
+++ b/codex-rs/core/tests/suite/rollout_list_find.rs
@@ -1,5 +1,6 @@
 #![allow(clippy::unwrap_used, clippy::expect_used)]
 use std::io::Write;
+use std::path::Path;
 use std::path::PathBuf;

 use codex_core::find_conversation_path_by_id_str;
@@ -8,8 +9,8 @@ use uuid::Uuid;

 /// Create sessions/YYYY/MM/DD and write a minimal rollout file containing the
 /// provided conversation id in the SessionMeta line. Returns the absolute path.
-fn write_minimal_rollout_with_id(codex_home: &TempDir, id: Uuid) -> PathBuf {
-    let sessions = codex_home.path().join("sessions/2024/01/01");
+fn write_minimal_rollout_with_id(codex_home: &Path, id: Uuid) -> PathBuf {
+    let sessions = codex_home.join("sessions/2024/01/01");
    std::fs::create_dir_all(&sessions).unwrap();

    let file = sessions.join(format!("rollout-2024-01-01T00-00-00-{id}.jsonl"));
@@ -40,7 +41,7 @@ fn write_minimal_rollout_with_id(codex_home: &TempDir, id: Uuid) -> PathBuf {
 async fn find_locates_rollout_file_by_id() {
    let home = TempDir::new().unwrap();
    let id = Uuid::new_v4();
-    let expected = write_minimal_rollout_with_id(&home, id);
+    let expected = write_minimal_rollout_with_id(home.path(), id);

    let found = find_conversation_path_by_id_str(home.path(), &id.to_string())
        .await
@@ -48,3 +49,33 @@ async fn find_locates_rollout_file_by_id() {

    assert_eq!(found.unwrap(), expected);
 }
+
+#[tokio::test]
+async fn find_handles_gitignore_covering_codex_home_directory() {
+    let repo = TempDir::new().unwrap();
+    let codex_home = repo.path().join(".codex");
+    std::fs::create_dir_all(&codex_home).unwrap();
+    std::fs::write(repo.path().join(".gitignore"), ".codex/**\n").unwrap();
+    let id = Uuid::new_v4();
+    let expected = write_minimal_rollout_with_id(&codex_home, id);
+
+    let found = find_conversation_path_by_id_str(&codex_home, &id.to_string())
+        .await
+        .unwrap();
+
+    assert_eq!(found, Some(expected));
+}
+
+#[tokio::test]
+async fn find_ignores_granular_gitignore_rules() {
+    let home = TempDir::new().unwrap();
+    let id = Uuid::new_v4();
+    let expected = write_minimal_rollout_with_id(home.path(), id);
+    std::fs::write(home.path().join("sessions/.gitignore"), "*.jsonl\n").unwrap();
+
+    let found = find_conversation_path_by_id_str(home.path(), &id.to_string())
+        .await
+        .unwrap();
+
+    assert_eq!(found, Some(expected));
+}
--- a/codex-rs/core/tests/suite/unified_exec.rs
+++ b/codex-rs/core/tests/suite/unified_exec.rs
@@ -133,6 +133,262 @@ async fn unified_exec_emits_exec_command_begin_event() -> Result<()> {
    Ok(())
 }

+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn unified_exec_emits_exec_command_end_event() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+    skip_if_sandbox!(Ok(()));
+
+    let server = start_mock_server().await;
+
+    let mut builder = test_codex().with_config(|config| {
+        config.use_experimental_unified_exec_tool = true;
+        config.features.enable(Feature::UnifiedExec);
+    });
+    let TestCodex {
+        codex,
+        cwd,
+        session_configured,
+        ..
+    } = builder.build(&server).await?;
+
+    let call_id = "uexec-end-event";
+    let args = json!({
+        "cmd": "/bin/echo END-EVENT".to_string(),
+        "yield_time_ms": 250,
+    });
+    let poll_call_id = "uexec-end-event-poll";
+    let poll_args = json!({
+        "chars": "",
+        "session_id": 0,
+        "yield_time_ms": 250,
+    });
+
+    let responses = vec![
+        sse(vec![
+            ev_response_created("resp-1"),
+            ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?),
+            ev_completed("resp-1"),
+        ]),
+        sse(vec![
+            ev_response_created("resp-2"),
+            ev_function_call(
+                poll_call_id,
+                "write_stdin",
+                &serde_json::to_string(&poll_args)?,
+            ),
+            ev_completed("resp-2"),
+        ]),
+        sse(vec![
+            ev_response_created("resp-3"),
+            ev_assistant_message("msg-1", "finished"),
+            ev_completed("resp-3"),
+        ]),
+    ];
+    mount_sse_sequence(&server, responses).await;
+
+    let session_model = session_configured.model.clone();
+
+    codex
+        .submit(Op::UserTurn {
+            items: vec![UserInput::Text {
+                text: "emit end event".into(),
+            }],
+            final_output_json_schema: None,
+            cwd: cwd.path().to_path_buf(),
+            approval_policy: AskForApproval::Never,
+            sandbox_policy: SandboxPolicy::DangerFullAccess,
+            model: session_model,
+            effort: None,
+            summary: ReasoningSummary::Auto,
+        })
+        .await?;
+
+    let end_event = wait_for_event_match(&codex, |msg| match msg {
+        EventMsg::ExecCommandEnd(ev) if ev.call_id == call_id => Some(ev.clone()),
+        _ => None,
+    })
+    .await;
+
+    assert_eq!(end_event.exit_code, 0);
+    assert!(
+        end_event.aggregated_output.contains("END-EVENT"),
+        "expected aggregated output to contain marker"
+    );
+
+    wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn unified_exec_emits_output_delta_for_exec_command() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+    skip_if_sandbox!(Ok(()));
+
+    let server = start_mock_server().await;
+
+    let mut builder = test_codex().with_config(|config| {
+        config.use_experimental_unified_exec_tool = true;
+        config.features.enable(Feature::UnifiedExec);
+    });
+    let TestCodex {
+        codex,
+        cwd,
+        session_configured,
+        ..
+    } = builder.build(&server).await?;
+
+    let call_id = "uexec-delta-1";
+    let args = json!({
+        "cmd": "printf 'HELLO-UEXEC'",
+        "yield_time_ms": 250,
+    });
+
+    let responses = vec![
+        sse(vec![
+            ev_response_created("resp-1"),
+            ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?),
+            ev_completed("resp-1"),
+        ]),
+        sse(vec![
+            ev_response_created("resp-2"),
+            ev_assistant_message("msg-1", "finished"),
+            ev_completed("resp-2"),
+        ]),
+    ];
+    mount_sse_sequence(&server, responses).await;
+
+    let session_model = session_configured.model.clone();
+
+    codex
+        .submit(Op::UserTurn {
+            items: vec![UserInput::Text {
+                text: "emit delta".into(),
+            }],
+            final_output_json_schema: None,
+            cwd: cwd.path().to_path_buf(),
+            approval_policy: AskForApproval::Never,
+            sandbox_policy: SandboxPolicy::DangerFullAccess,
+            model: session_model,
+            effort: None,
+            summary: ReasoningSummary::Auto,
+        })
+        .await?;
+
+    let delta = wait_for_event_match(&codex, |msg| match msg {
+        EventMsg::ExecCommandOutputDelta(ev) if ev.call_id == call_id => Some(ev.clone()),
+        _ => None,
+    })
+    .await;
+
+    let text = String::from_utf8_lossy(&delta.chunk).to_string();
+    assert!(
+        text.contains("HELLO-UEXEC"),
+        "delta chunk missing expected text: {text:?}"
+    );
+
+    wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn unified_exec_emits_output_delta_for_write_stdin() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+    skip_if_sandbox!(Ok(()));
+
+    let server = start_mock_server().await;
+
+    let mut builder = test_codex().with_config(|config| {
+        config.use_experimental_unified_exec_tool = true;
+        config.features.enable(Feature::UnifiedExec);
+    });
+    let TestCodex {
+        codex,
+        cwd,
+        session_configured,
+        ..
+    } = builder.build(&server).await?;
+
+    let open_call_id = "uexec-open";
+    let open_args = json!({
+        "cmd": "/bin/bash -i",
+        "yield_time_ms": 200,
+    });
+
+    let stdin_call_id = "uexec-stdin-delta";
+    let stdin_args = json!({
+        "chars": "echo WSTDIN-MARK\\n",
+        "session_id": 0,
+        "yield_time_ms": 800,
+    });
+
+    let responses = vec![
+        sse(vec![
+            ev_response_created("resp-1"),
+            ev_function_call(
+                open_call_id,
+                "exec_command",
+                &serde_json::to_string(&open_args)?,
+            ),
+            ev_completed("resp-1"),
+        ]),
+        sse(vec![
+            ev_response_created("resp-2"),
+            ev_function_call(
+                stdin_call_id,
+                "write_stdin",
+                &serde_json::to_string(&stdin_args)?,
+            ),
+            ev_completed("resp-2"),
+        ]),
+        sse(vec![
+            ev_response_created("resp-3"),
+            ev_assistant_message("msg-1", "done"),
+            ev_completed("resp-3"),
+        ]),
+    ];
+    mount_sse_sequence(&server, responses).await;
+
+    let session_model = session_configured.model.clone();
+
+    codex
+        .submit(Op::UserTurn {
+            items: vec![UserInput::Text {
+                text: "stdin delta".into(),
+            }],
+            final_output_json_schema: None,
+            cwd: cwd.path().to_path_buf(),
+            approval_policy: AskForApproval::Never,
+            sandbox_policy: SandboxPolicy::DangerFullAccess,
+            model: session_model,
+            effort: None,
+            summary: ReasoningSummary::Auto,
+        })
+        .await?;
+
+    // Expect a delta event corresponding to the write_stdin call.
+    let delta = wait_for_event_match(&codex, |msg| match msg {
+        EventMsg::ExecCommandOutputDelta(ev) if ev.call_id == open_call_id => {
+            let text = String::from_utf8_lossy(&ev.chunk);
+            if text.contains("WSTDIN-MARK") {
+                Some(ev.clone())
+            } else {
+                None
+            }
+        }
+        _ => None,
+    })
+    .await;
+
+    let text = String::from_utf8_lossy(&delta.chunk).to_string();
+    assert!(
+        text.contains("WSTDIN-MARK"),
+        "stdin delta chunk missing expected text: {text:?}"
+    );
+
+    wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
+    Ok(())
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn unified_exec_skips_begin_event_for_empty_input() -> Result<()> {
    use tokio::time::Duration;
@@ -516,6 +772,110 @@ async fn write_stdin_returns_exit_metadata_and_clears_session() -> Result<()> {
    Ok(())
 }

+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn unified_exec_emits_end_event_when_session_dies_via_stdin() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+    skip_if_sandbox!(Ok(()));
+
+    let server = start_mock_server().await;
+
+    let mut builder = test_codex().with_config(|config| {
+        config.use_experimental_unified_exec_tool = true;
+        config.features.enable(Feature::UnifiedExec);
+    });
+    let TestCodex {
+        codex,
+        cwd,
+        session_configured,
+        ..
+    } = builder.build(&server).await?;
+
+    let start_call_id = "uexec-end-on-exit-start";
+    let start_args = serde_json::json!({
+        "cmd": "/bin/cat",
+        "yield_time_ms": 200,
+    });
+
+    let echo_call_id = "uexec-end-on-exit-echo";
+    let echo_args = serde_json::json!({
+        "chars": "bye-END\n",
+        "session_id": 0,
+        "yield_time_ms": 300,
+    });
+
+    let exit_call_id = "uexec-end-on-exit";
+    let exit_args = serde_json::json!({
+        "chars": "\u{0004}",
+        "session_id": 0,
+        "yield_time_ms": 500,
+    });
+
+    let responses = vec![
+        sse(vec![
+            ev_response_created("resp-1"),
+            ev_function_call(
+                start_call_id,
+                "exec_command",
+                &serde_json::to_string(&start_args)?,
+            ),
+            ev_completed("resp-1"),
+        ]),
+        sse(vec![
+            ev_response_created("resp-2"),
+            ev_function_call(
+                echo_call_id,
+                "write_stdin",
+                &serde_json::to_string(&echo_args)?,
+            ),
+            ev_completed("resp-2"),
+        ]),
+        sse(vec![
+            ev_response_created("resp-3"),
+            ev_function_call(
+                exit_call_id,
+                "write_stdin",
+                &serde_json::to_string(&exit_args)?,
+            ),
+            ev_completed("resp-3"),
+        ]),
+        sse(vec![
+            ev_response_created("resp-4"),
+            ev_assistant_message("msg-1", "done"),
+            ev_completed("resp-4"),
+        ]),
+    ];
+    mount_sse_sequence(&server, responses).await;
+
+    let session_model = session_configured.model.clone();
+
+    codex
+        .submit(Op::UserTurn {
+            items: vec![UserInput::Text {
+                text: "end on exit".into(),
+            }],
+            final_output_json_schema: None,
+            cwd: cwd.path().to_path_buf(),
+            approval_policy: AskForApproval::Never,
+            sandbox_policy: SandboxPolicy::DangerFullAccess,
+            model: session_model,
+            effort: None,
+            summary: ReasoningSummary::Auto,
+        })
+        .await?;
+
+    // We expect the ExecCommandEnd event to match the initial exec_command call_id.
+    let end_event = wait_for_event_match(&codex, |msg| match msg {
+        EventMsg::ExecCommandEnd(ev) if ev.call_id == start_call_id => Some(ev.clone()),
+        _ => None,
+    })
+    .await;
+
+    assert_eq!(end_event.exit_code, 0);
+
+    wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
+    Ok(())
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn unified_exec_reuses_session_via_stdin() -> Result<()> {
    skip_if_no_network!(Ok(()));
--- a/codex-rs/exec/src/lib.rs
+++ b/codex-rs/exec/src/lib.rs
@@ -179,6 +179,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
        include_view_image_tool: None,
        show_raw_agent_reasoning: oss.then_some(true),
        tools_web_search_request: None,
+        experimental_sandbox_command_assessment: None,
        additional_writable_roots: Vec::new(),
    };
    // Parse `-c` overrides.
--- a/codex-rs/feedback/src/lib.rs
+++ b/codex-rs/feedback/src/lib.rs
@@ -12,7 +12,7 @@ use anyhow::anyhow;
 use codex_protocol::ConversationId;
 use tracing_subscriber::fmt::writer::MakeWriter;

-const DEFAULT_MAX_BYTES: usize = 2 * 1024 * 1024; // 2 MiB
+const DEFAULT_MAX_BYTES: usize = 4 * 1024 * 1024; // 4 MiB
 const SENTRY_DSN: &str =
    "https://ae32ed50620d7a7792c1ce5df38b3e3e@o33249.ingest.us.sentry.io/4510195390611458";
 const UPLOAD_TIMEOUT_SECS: u64 = 10;
--- a/codex-rs/file-search/src/lib.rs
+++ b/codex-rs/file-search/src/lib.rs
@@ -105,6 +105,7 @@ pub async fn run_main<T: Reporter>(
        threads,
        cancel_flag,
        compute_indices,
+        true,
    )?;
    let match_count = matches.len();
    let matches_truncated = total_match_count > match_count;
@@ -121,6 +122,7 @@ pub async fn run_main<T: Reporter>(

 /// The worker threads will periodically check `cancel_flag` to see if they
 /// should stop processing files.
+#[allow(clippy::too_many_arguments)]
 pub fn run(
    pattern_text: &str,
    limit: NonZero<usize>,
@@ -129,6 +131,7 @@ pub fn run(
    threads: NonZero<usize>,
    cancel_flag: Arc<AtomicBool>,
    compute_indices: bool,
+    respect_gitignore: bool,
 ) -> anyhow::Result<FileSearchResults> {
    let pattern = create_pattern(pattern_text);
    // Create one BestMatchesList per worker thread so that each worker can
@@ -157,6 +160,14 @@ pub fn run(
        .hidden(false)
        // Don't require git to be present to apply to apply git-related ignore rules.
        .require_git(false);
+    if !respect_gitignore {
+        walk_builder
+            .git_ignore(false)
+            .git_global(false)
+            .git_exclude(false)
+            .ignore(false)
+            .parents(false);
+    }

    if !exclude.is_empty() {
        let mut override_builder = OverrideBuilder::new(search_directory);
--- a/codex-rs/mcp-server/src/codex_tool_config.rs
+++ b/codex-rs/mcp-server/src/codex_tool_config.rs
@@ -158,6 +158,7 @@ impl CodexToolCallParam {
            include_view_image_tool: None,
            show_raw_agent_reasoning: None,
            tools_web_search_request: None,
+            experimental_sandbox_command_assessment: None,
            additional_writable_roots: Vec::new(),
        };

--- a/codex-rs/mcp-server/src/codex_tool_runner.rs
+++ b/codex-rs/mcp-server/src/codex_tool_runner.rs
@@ -178,6 +178,7 @@ async fn run_codex_tool_session_inner(
                        cwd,
                        call_id,
                        reason: _,
+                        risk,
                        parsed_cmd,
                    }) => {
                        handle_exec_approval_request(
@@ -190,6 +191,7 @@ async fn run_codex_tool_session_inner(
                            event.id.clone(),
                            call_id,
                            parsed_cmd,
+                            risk,
                        )
                        .await;
                        continue;
--- a/codex-rs/mcp-server/src/exec_approval.rs
+++ b/codex-rs/mcp-server/src/exec_approval.rs
@@ -4,6 +4,7 @@ use std::sync::Arc;
 use codex_core::CodexConversation;
 use codex_core::protocol::Op;
 use codex_core::protocol::ReviewDecision;
+use codex_core::protocol::SandboxCommandAssessment;
 use codex_protocol::parse_command::ParsedCommand;
 use mcp_types::ElicitRequest;
 use mcp_types::ElicitRequestParamsRequestedSchema;
@@ -37,6 +38,8 @@ pub struct ExecApprovalElicitRequestParams {
    pub codex_command: Vec<String>,
    pub codex_cwd: PathBuf,
    pub codex_parsed_cmd: Vec<ParsedCommand>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub codex_risk: Option<SandboxCommandAssessment>,
 }

 // TODO(mbolin): ExecApprovalResponse does not conform to ElicitResult. See:
@@ -59,6 +62,7 @@ pub(crate) async fn handle_exec_approval_request(
    event_id: String,
    call_id: String,
    codex_parsed_cmd: Vec<ParsedCommand>,
+    codex_risk: Option<SandboxCommandAssessment>,
 ) {
    let escaped_command =
        shlex::try_join(command.iter().map(String::as_str)).unwrap_or_else(|_| command.join(" "));
@@ -81,6 +85,7 @@ pub(crate) async fn handle_exec_approval_request(
        codex_command: command,
        codex_cwd: cwd,
        codex_parsed_cmd,
+        codex_risk,
    };
    let params_json = match serde_json::to_value(&params) {
        Ok(value) => value,
--- a/codex-rs/mcp-server/tests/suite/codex_tool.rs
+++ b/codex-rs/mcp-server/tests/suite/codex_tool.rs
@@ -196,6 +196,7 @@ fn create_expected_elicitation_request(
            codex_cwd: workdir.to_path_buf(),
            codex_call_id: "call1234".to_string(),
            codex_parsed_cmd,
+            codex_risk: None,
        })?),
    })
 }
--- a/codex-rs/otel/src/otel_event_manager.rs
+++ b/codex-rs/otel/src/otel_event_manager.rs
@@ -8,6 +8,8 @@ use codex_protocol::models::ResponseItem;
 use codex_protocol::protocol::AskForApproval;
 use codex_protocol::protocol::ReviewDecision;
 use codex_protocol::protocol::SandboxPolicy;
+use codex_protocol::protocol::SandboxRiskCategory;
+use codex_protocol::protocol::SandboxRiskLevel;
 use codex_protocol::user_input::UserInput;
 use eventsource_stream::Event as StreamEvent;
 use eventsource_stream::EventStreamError as StreamError;
@@ -366,6 +368,63 @@ impl OtelEventManager {
        );
    }

+    pub fn sandbox_assessment(
+        &self,
+        call_id: &str,
+        status: &str,
+        risk_level: Option<SandboxRiskLevel>,
+        risk_categories: &[SandboxRiskCategory],
+        duration: Duration,
+    ) {
+        let level = risk_level.map(|level| level.as_str());
+        let categories = if risk_categories.is_empty() {
+            String::new()
+        } else {
+            risk_categories
+                .iter()
+                .map(SandboxRiskCategory::as_str)
+                .collect::<Vec<_>>()
+                .join(", ")
+        };
+
+        tracing::event!(
+            tracing::Level::INFO,
+            event.name = "codex.sandbox_assessment",
+            event.timestamp = %timestamp(),
+            conversation.id = %self.metadata.conversation_id,
+            app.version = %self.metadata.app_version,
+            auth_mode = self.metadata.auth_mode,
+            user.account_id = self.metadata.account_id,
+            user.email = self.metadata.account_email,
+            terminal.type = %self.metadata.terminal_type,
+            model = %self.metadata.model,
+            slug = %self.metadata.slug,
+            call_id = %call_id,
+            status = %status,
+            risk_level = level,
+            risk_categories = categories,
+            duration_ms = %duration.as_millis(),
+        );
+    }
+
+    pub fn sandbox_assessment_latency(&self, call_id: &str, duration: Duration) {
+        tracing::event!(
+            tracing::Level::INFO,
+            event.name = "codex.sandbox_assessment_latency",
+            event.timestamp = %timestamp(),
+            conversation.id = %self.metadata.conversation_id,
+            app.version = %self.metadata.app_version,
+            auth_mode = self.metadata.auth_mode,
+            user.account_id = self.metadata.account_id,
+            user.email = self.metadata.account_email,
+            terminal.type = %self.metadata.terminal_type,
+            model = %self.metadata.model,
+            slug = %self.metadata.slug,
+            call_id = %call_id,
+            duration_ms = %duration.as_millis(),
+        );
+    }
+
    pub async fn log_tool_result<F, Fut, E>(
        &self,
        tool_name: &str,
--- a/codex-rs/protocol/src/account.rs
+++ b/codex-rs/protocol/src/account.rs
@@ -0,0 +1,35 @@
+use schemars::JsonSchema;
+use serde::Deserialize;
+use serde::Serialize;
+use ts_rs::TS;
+
+#[derive(Serialize, Deserialize, Copy, Clone, Debug, PartialEq, Eq, JsonSchema, TS, Default)]
+#[serde(rename_all = "lowercase")]
+#[ts(rename_all = "lowercase")]
+pub enum PlanType {
+    #[default]
+    Free,
+    Plus,
+    Pro,
+    Team,
+    Business,
+    Enterprise,
+    Edu,
+    #[serde(other)]
+    Unknown,
+}
+
+#[derive(Debug, Clone, PartialEq, Deserialize, Serialize, JsonSchema, TS)]
+#[serde(tag = "type")]
+#[ts(tag = "type")]
+pub enum Account {
+    ApiKey {
+        api_key: String,
+    },
+    #[serde(rename = "chatgpt")]
+    #[ts(rename = "chatgpt")]
+    ChatGpt {
+        email: Option<String>,
+        plan_type: PlanType,
+    },
+}
--- a/codex-rs/protocol/src/approvals.rs
+++ b/codex-rs/protocol/src/approvals.rs
@@ -0,0 +1,91 @@
+use std::collections::HashMap;
+use std::path::PathBuf;
+
+use crate::parse_command::ParsedCommand;
+use crate::protocol::FileChange;
+use schemars::JsonSchema;
+use serde::Deserialize;
+use serde::Serialize;
+use ts_rs::TS;
+
+#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Hash, JsonSchema, TS)]
+#[serde(rename_all = "snake_case")]
+pub enum SandboxRiskLevel {
+    Low,
+    Medium,
+    High,
+}
+
+#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Hash, JsonSchema, TS)]
+#[serde(rename_all = "snake_case")]
+pub enum SandboxRiskCategory {
+    DataDeletion,
+    DataExfiltration,
+    PrivilegeEscalation,
+    SystemModification,
+    NetworkAccess,
+    ResourceExhaustion,
+    Compliance,
+}
+
+#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS)]
+pub struct SandboxCommandAssessment {
+    pub description: String,
+    pub risk_level: SandboxRiskLevel,
+    #[serde(default, skip_serializing_if = "Vec::is_empty")]
+    pub risk_categories: Vec<SandboxRiskCategory>,
+}
+
+impl SandboxRiskLevel {
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            Self::Low => "low",
+            Self::Medium => "medium",
+            Self::High => "high",
+        }
+    }
+}
+
+impl SandboxRiskCategory {
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            Self::DataDeletion => "data_deletion",
+            Self::DataExfiltration => "data_exfiltration",
+            Self::PrivilegeEscalation => "privilege_escalation",
+            Self::SystemModification => "system_modification",
+            Self::NetworkAccess => "network_access",
+            Self::ResourceExhaustion => "resource_exhaustion",
+            Self::Compliance => "compliance",
+        }
+    }
+}
+
+#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
+pub struct ExecApprovalRequestEvent {
+    /// Identifier for the associated exec call, if available.
+    pub call_id: String,
+    /// The command to be executed.
+    pub command: Vec<String>,
+    /// The command's working directory.
+    pub cwd: PathBuf,
+    /// Optional human-readable reason for the approval (e.g. retry without sandbox).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub reason: Option<String>,
+    /// Optional model-provided risk assessment describing the blocked command.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub risk: Option<SandboxCommandAssessment>,
+    pub parsed_cmd: Vec<ParsedCommand>,
+}
+
+#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
+pub struct ApplyPatchApprovalRequestEvent {
+    /// Responses API call id for the associated patch apply call, if available.
+    pub call_id: String,
+    pub changes: HashMap<PathBuf, FileChange>,
+    /// Optional explanatory reason (e.g. request for extra write access).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub reason: Option<String>,
+    /// When set, the agent is asking the user to allow writes under this root for the remainder of the session.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub grant_root: Option<PathBuf>,
+}
--- a/codex-rs/protocol/src/lib.rs
+++ b/codex-rs/protocol/src/lib.rs
@@ -1,5 +1,7 @@
+pub mod account;
 mod conversation_id;
 pub use conversation_id::ConversationId;
+pub mod approvals;
 pub mod config_types;
 pub mod custom_prompts;
 pub mod items;
--- a/codex-rs/protocol/src/protocol.rs
+++ b/codex-rs/protocol/src/protocol.rs
@@ -34,6 +34,12 @@ use serde_with::serde_as;
 use strum_macros::Display;
 use ts_rs::TS;

+pub use crate::approvals::ApplyPatchApprovalRequestEvent;
+pub use crate::approvals::ExecApprovalRequestEvent;
+pub use crate::approvals::SandboxCommandAssessment;
+pub use crate::approvals::SandboxRiskCategory;
+pub use crate::approvals::SandboxRiskLevel;
+
 /// Open/close tags for special user-input blocks. Used across crates to avoid
 /// duplicated hardcoded strings.
 pub const USER_INSTRUCTIONS_OPEN_TAG: &str = "<user_instructions>";
@@ -1126,33 +1132,6 @@ pub struct ExecCommandOutputDeltaEvent {
    pub chunk: Vec<u8>,
 }

-#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-pub struct ExecApprovalRequestEvent {
-    /// Identifier for the associated exec call, if available.
-    pub call_id: String,
-    /// The command to be executed.
-    pub command: Vec<String>,
-    /// The command's working directory.
-    pub cwd: PathBuf,
-    /// Optional human-readable reason for the approval (e.g. retry without sandbox).
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub reason: Option<String>,
-    pub parsed_cmd: Vec<ParsedCommand>,
-}
-
-#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-pub struct ApplyPatchApprovalRequestEvent {
-    /// Responses API call id for the associated patch apply call, if available.
-    pub call_id: String,
-    pub changes: HashMap<PathBuf, FileChange>,
-    /// Optional explanatory reason (e.g. request for extra write access).
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub reason: Option<String>,
-    /// When set, the agent is asking the user to allow writes under this root for the remainder of the session.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub grant_root: Option<PathBuf>,
-}
-
 #[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
 pub struct BackgroundEventEvent {
    pub message: String,
--- a/codex-rs/tui/src/bottom_pane/approval_overlay.rs
+++ b/codex-rs/tui/src/bottom_pane/approval_overlay.rs
@@ -19,6 +19,9 @@ use crate::render::renderable::Renderable;
 use codex_core::protocol::FileChange;
 use codex_core::protocol::Op;
 use codex_core::protocol::ReviewDecision;
+use codex_core::protocol::SandboxCommandAssessment;
+use codex_core::protocol::SandboxRiskCategory;
+use codex_core::protocol::SandboxRiskLevel;
 use crossterm::event::KeyCode;
 use crossterm::event::KeyEvent;
 use crossterm::event::KeyEventKind;
@@ -38,6 +41,7 @@ pub(crate) enum ApprovalRequest {
        id: String,
        command: Vec<String>,
        reason: Option<String>,
+        risk: Option<SandboxCommandAssessment>,
    },
    ApplyPatch {
        id: String,
@@ -285,12 +289,17 @@ impl From<ApprovalRequest> for ApprovalRequestState {
                id,
                command,
                reason,
+                risk,
            } => {
+                let reason = reason.filter(|item| !item.is_empty());
+                let has_reason = reason.is_some();
                let mut header: Vec<Line<'static>> = Vec::new();
-                if let Some(reason) = reason
-                    && !reason.is_empty()
-                {
+                if let Some(reason) = reason {
                    header.push(Line::from(vec!["Reason: ".into(), reason.italic()]));
+                }
+                if let Some(risk) = risk.as_ref() {
+                    header.extend(render_risk_lines(risk));
+                } else if has_reason {
                    header.push(Line::from(""));
                }
                let full_cmd = strip_bash_lc_and_escape(&command);
@@ -330,6 +339,52 @@ impl From<ApprovalRequest> for ApprovalRequestState {
    }
 }

+fn render_risk_lines(risk: &SandboxCommandAssessment) -> Vec<Line<'static>> {
+    let level_span = match risk.risk_level {
+        SandboxRiskLevel::Low => "LOW".green().bold(),
+        SandboxRiskLevel::Medium => "MEDIUM".cyan().bold(),
+        SandboxRiskLevel::High => "HIGH".red().bold(),
+    };
+
+    let mut lines = Vec::new();
+
+    let description = risk.description.trim();
+    if !description.is_empty() {
+        lines.push(Line::from(vec![
+            "Summary: ".into(),
+            description.to_string().into(),
+        ]));
+    }
+
+    let mut spans: Vec<Span<'static>> = vec!["Risk: ".into(), level_span];
+    if !risk.risk_categories.is_empty() {
+        spans.push(" (".into());
+        for (idx, category) in risk.risk_categories.iter().enumerate() {
+            if idx > 0 {
+                spans.push(", ".into());
+            }
+            spans.push(risk_category_label(*category).into());
+        }
+        spans.push(")".into());
+    }
+
+    lines.push(Line::from(spans));
+    lines.push(Line::from(""));
+    lines
+}
+
+fn risk_category_label(category: SandboxRiskCategory) -> &'static str {
+    match category {
+        SandboxRiskCategory::DataDeletion => "data deletion",
+        SandboxRiskCategory::DataExfiltration => "data exfiltration",
+        SandboxRiskCategory::PrivilegeEscalation => "privilege escalation",
+        SandboxRiskCategory::SystemModification => "system modification",
+        SandboxRiskCategory::NetworkAccess => "network access",
+        SandboxRiskCategory::ResourceExhaustion => "resource exhaustion",
+        SandboxRiskCategory::Compliance => "compliance",
+    }
+}
+
 #[derive(Clone)]
 enum ApprovalVariant {
    Exec { id: String, command: Vec<String> },
@@ -404,6 +459,7 @@ mod tests {
            id: "test".to_string(),
            command: vec!["echo".to_string(), "hi".to_string()],
            reason: Some("reason".to_string()),
+            risk: None,
        }
    }

@@ -445,6 +501,7 @@ mod tests {
            id: "test".into(),
            command,
            reason: None,
+            risk: None,
        };

        let view = ApprovalOverlay::new(exec_request, tx);
--- a/codex-rs/tui/src/bottom_pane/chat_composer.rs
+++ b/codex-rs/tui/src/bottom_pane/chat_composer.rs
@@ -2069,6 +2069,35 @@ mod tests {
        }
    }

+    #[test]
+    fn ascii_prefix_survives_non_ascii_followup() {
+        use crossterm::event::KeyCode;
+        use crossterm::event::KeyEvent;
+        use crossterm::event::KeyModifiers;
+
+        let (tx, _rx) = unbounded_channel::<AppEvent>();
+        let sender = AppEventSender::new(tx);
+        let mut composer = ChatComposer::new(
+            true,
+            sender,
+            false,
+            "Ask Codex to do anything".to_string(),
+            false,
+        );
+
+        let _ = composer.handle_key_event(KeyEvent::new(KeyCode::Char('1'), KeyModifiers::NONE));
+        assert!(composer.is_in_paste_burst());
+
+        let _ = composer.handle_key_event(KeyEvent::new(KeyCode::Char('あ'), KeyModifiers::NONE));
+
+        let (result, _) =
+            composer.handle_key_event(KeyEvent::new(KeyCode::Enter, KeyModifiers::NONE));
+        match result {
+            InputResult::Submitted(text) => assert_eq!(text, "1あ"),
+            _ => panic!("expected Submitted"),
+        }
+    }
+
    #[test]
    fn handle_paste_small_inserts_text() {
        use crossterm::event::KeyCode;
--- a/codex-rs/tui/src/bottom_pane/mod.rs
+++ b/codex-rs/tui/src/bottom_pane/mod.rs
@@ -557,6 +557,7 @@ mod tests {
            id: "1".to_string(),
            command: vec!["echo".into(), "ok".into()],
            reason: None,
+            risk: None,
        }
    }

--- a/codex-rs/tui/src/bottom_pane/paste_burst.rs
+++ b/codex-rs/tui/src/bottom_pane/paste_burst.rs
@@ -198,12 +198,15 @@ impl PasteBurst {

    /// Before applying modified/non-char input: flush buffered burst immediately.
    pub fn flush_before_modified_input(&mut self) -> Option<String> {
-        if self.is_active() {
-            self.active = false;
-            Some(std::mem::take(&mut self.buffer))
-        } else {
-            None
+        if !self.is_active() {
+            return None;
        }
+        self.active = false;
+        let mut out = std::mem::take(&mut self.buffer);
+        if let Some((ch, _at)) = self.pending_first_char.take() {
+            out.push(ch);
+        }
+        Some(out)
    }

    /// Clear only the timing window and any pending first-char.
--- a/codex-rs/tui/src/chatwidget.rs
+++ b/codex-rs/tui/src/chatwidget.rs
@@ -745,9 +745,8 @@ impl ChatWidget {
                &ev.call_id,
                CommandOutput {
                    exit_code: ev.exit_code,
-                    stdout: ev.stdout.clone(),
-                    stderr: ev.stderr.clone(),
                    formatted_output: ev.formatted_output.clone(),
+                    aggregated_output: ev.aggregated_output.clone(),
                },
                ev.duration,
            );
@@ -778,6 +777,7 @@ impl ChatWidget {
            id,
            command: ev.command,
            reason: ev.reason,
+            risk: ev.risk,
        };
        self.bottom_pane.push_approval_request(request);
        self.request_redraw();
@@ -1632,6 +1632,7 @@ impl ChatWidget {
            context_usage,
            &self.conversation_id,
            self.rate_limit_snapshot.as_ref(),
+            Local::now(),
        ));
    }

--- a/codex-rs/tui/src/chatwidget/tests.rs
+++ b/codex-rs/tui/src/chatwidget/tests.rs
@@ -71,18 +71,26 @@ fn upgrade_event_payload_for_tests(mut payload: serde_json::Value) -> serde_json
        && let Some(m) = msg.as_object_mut()
    {
        let ty = m.get("type").and_then(|v| v.as_str()).unwrap_or("");
-        if ty == "exec_command_end" && !m.contains_key("formatted_output") {
+        if ty == "exec_command_end" {
            let stdout = m.get("stdout").and_then(|v| v.as_str()).unwrap_or("");
            let stderr = m.get("stderr").and_then(|v| v.as_str()).unwrap_or("");
-            let formatted = if stderr.is_empty() {
+            let aggregated = if stderr.is_empty() {
                stdout.to_string()
            } else {
                format!("{stdout}{stderr}")
            };
-            m.insert(
-                "formatted_output".to_string(),
-                serde_json::Value::String(formatted),
-            );
+            if !m.contains_key("formatted_output") {
+                m.insert(
+                    "formatted_output".to_string(),
+                    serde_json::Value::String(aggregated.clone()),
+                );
+            }
+            if !m.contains_key("aggregated_output") {
+                m.insert(
+                    "aggregated_output".to_string(),
+                    serde_json::Value::String(aggregated),
+                );
+            }
        }
    }
    payload
@@ -394,6 +402,7 @@ fn exec_approval_emits_proposed_command_and_decision_history() {
        reason: Some(
            "this is a test reason such as one that would be produced by the model".into(),
        ),
+        risk: None,
        parsed_cmd: vec![],
    };
    chat.handle_codex_event(Event {
@@ -436,6 +445,7 @@ fn exec_approval_decision_truncates_multiline_and_long_commands() {
        reason: Some(
            "this is a test reason such as one that would be produced by the model".into(),
        ),
+        risk: None,
        parsed_cmd: vec![],
    };
    chat.handle_codex_event(Event {
@@ -484,6 +494,7 @@ fn exec_approval_decision_truncates_multiline_and_long_commands() {
        command: vec!["bash".into(), "-lc".into(), long],
        cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")),
        reason: None,
+        risk: None,
        parsed_cmd: vec![],
    };
    chat.handle_codex_event(Event {
@@ -1413,6 +1424,7 @@ fn approval_modal_exec_snapshot() {
        reason: Some(
            "this is a test reason such as one that would be produced by the model".into(),
        ),
+        risk: None,
        parsed_cmd: vec![],
    };
    chat.handle_codex_event(Event {
@@ -1457,6 +1469,7 @@ fn approval_modal_exec_without_reason_snapshot() {
        command: vec!["bash".into(), "-lc".into(), "echo hello world".into()],
        cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")),
        reason: None,
+        risk: None,
        parsed_cmd: vec![],
    };
    chat.handle_codex_event(Event {
@@ -1667,6 +1680,7 @@ fn status_widget_and_approval_modal_snapshot() {
        reason: Some(
            "this is a test reason such as one that would be produced by the model".into(),
        ),
+        risk: None,
        parsed_cmd: vec![],
    };
    chat.handle_codex_event(Event {
--- a/codex-rs/tui/src/exec_cell/model.rs
+++ b/codex-rs/tui/src/exec_cell/model.rs
@@ -3,11 +3,12 @@ use std::time::Instant;

 use codex_protocol::parse_command::ParsedCommand;

-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, Default)]
 pub(crate) struct CommandOutput {
    pub(crate) exit_code: i32,
-    pub(crate) stdout: String,
-    pub(crate) stderr: String,
+    /// The aggregated stderr + stdout interleaved.
+    pub(crate) aggregated_output: String,
+    /// The formatted output of the command, as seen by the model.
    pub(crate) formatted_output: String,
 }

@@ -82,9 +83,8 @@ impl ExecCell {
                call.duration = Some(elapsed);
                call.output = Some(CommandOutput {
                    exit_code: 1,
-                    stdout: String::new(),
-                    stderr: String::new(),
                    formatted_output: String::new(),
+                    aggregated_output: String::new(),
                });
            }
        }
--- a/codex-rs/tui/src/exec_cell/render.rs
+++ b/codex-rs/tui/src/exec_cell/render.rs
@@ -28,7 +28,6 @@ use unicode_width::UnicodeWidthStr;
 pub(crate) const TOOL_CALL_MAX_LINES: usize = 5;

 pub(crate) struct OutputLinesParams {
-    pub(crate) only_err: bool,
    pub(crate) include_angle_pipe: bool,
    pub(crate) include_prefix: bool,
 }
@@ -59,22 +58,12 @@ pub(crate) fn output_lines(
    params: OutputLinesParams,
 ) -> OutputLines {
    let OutputLinesParams {
-        only_err,
        include_angle_pipe,
        include_prefix,
    } = params;
    let CommandOutput {
-        exit_code,
-        stdout,
-        stderr,
-        ..
+        aggregated_output, ..
    } = match output {
-        Some(output) if only_err && output.exit_code == 0 => {
-            return OutputLines {
-                lines: Vec::new(),
-                omitted: None,
-            };
-        }
        Some(output) => output,
        None => {
            return OutputLines {
@@ -84,7 +73,7 @@ pub(crate) fn output_lines(
        }
    };

-    let src = if *exit_code == 0 { stdout } else { stderr };
+    let src = aggregated_output;
    let lines: Vec<&str> = src.lines().collect();
    let total = lines.len();
    let limit = TOOL_CALL_MAX_LINES;
@@ -398,7 +387,6 @@ impl ExecCell {
            let raw_output = output_lines(
                Some(output),
                OutputLinesParams {
-                    only_err: false,
                    include_angle_pipe: false,
                    include_prefix: false,
                },
--- a/codex-rs/tui/src/file_search.rs
+++ b/codex-rs/tui/src/file_search.rs
@@ -172,6 +172,7 @@ impl FileSearchManager {
                NUM_FILE_SEARCH_THREADS,
                cancellation_token.clone(),
                compute_indices,
+                true,
            )
            .map(|res| res.matches)
            .unwrap_or_default();
--- a/codex-rs/tui/src/history_cell.rs
+++ b/codex-rs/tui/src/history_cell.rs
@@ -1293,12 +1293,10 @@ pub(crate) fn new_patch_apply_failure(stderr: String) -> PlainHistoryCell {
        let output = output_lines(
            Some(&CommandOutput {
                exit_code: 1,
-                stdout: String::new(),
-                stderr,
                formatted_output: String::new(),
+                aggregated_output: stderr,
            }),
            OutputLinesParams {
-                only_err: true,
                include_angle_pipe: true,
                include_prefix: true,
            },
@@ -1739,16 +1737,7 @@ mod tests {
            duration: None,
        });
        // Mark call complete so markers are ✓
-        cell.complete_call(
-            &call_id,
-            CommandOutput {
-                exit_code: 0,
-                stdout: String::new(),
-                stderr: String::new(),
-                formatted_output: String::new(),
-            },
-            Duration::from_millis(1),
-        );
+        cell.complete_call(&call_id, CommandOutput::default(), Duration::from_millis(1));

        let lines = cell.display_lines(80);
        let rendered = render_lines(&lines).join("\n");
@@ -1770,16 +1759,7 @@ mod tests {
            duration: None,
        });
        // Call 1: Search only
-        cell.complete_call(
-            "c1",
-            CommandOutput {
-                exit_code: 0,
-                stdout: String::new(),
-                stderr: String::new(),
-                formatted_output: String::new(),
-            },
-            Duration::from_millis(1),
-        );
+        cell.complete_call("c1", CommandOutput::default(), Duration::from_millis(1));
        // Call 2: Read A
        cell = cell
            .with_added_call(
@@ -1792,16 +1772,7 @@ mod tests {
                }],
            )
            .unwrap();
-        cell.complete_call(
-            "c2",
-            CommandOutput {
-                exit_code: 0,
-                stdout: String::new(),
-                stderr: String::new(),
-                formatted_output: String::new(),
-            },
-            Duration::from_millis(1),
-        );
+        cell.complete_call("c2", CommandOutput::default(), Duration::from_millis(1));
        // Call 3: Read B
        cell = cell
            .with_added_call(
@@ -1814,16 +1785,7 @@ mod tests {
                }],
            )
            .unwrap();
-        cell.complete_call(
-            "c3",
-            CommandOutput {
-                exit_code: 0,
-                stdout: String::new(),
-                stderr: String::new(),
-                formatted_output: String::new(),
-            },
-            Duration::from_millis(1),
-        );
+        cell.complete_call("c3", CommandOutput::default(), Duration::from_millis(1));

        let lines = cell.display_lines(80);
        let rendered = render_lines(&lines).join("\n");
@@ -1856,16 +1818,7 @@ mod tests {
            start_time: Some(Instant::now()),
            duration: None,
        });
-        cell.complete_call(
-            "c1",
-            CommandOutput {
-                exit_code: 0,
-                stdout: String::new(),
-                stderr: String::new(),
-                formatted_output: String::new(),
-            },
-            Duration::from_millis(1),
-        );
+        cell.complete_call("c1", CommandOutput::default(), Duration::from_millis(1));
        let lines = cell.display_lines(80);
        let rendered = render_lines(&lines).join("\n");
        insta::assert_snapshot!(rendered);
@@ -1885,16 +1838,7 @@ mod tests {
            duration: None,
        });
        // Mark call complete so it renders as "Ran"
-        cell.complete_call(
-            &call_id,
-            CommandOutput {
-                exit_code: 0,
-                stdout: String::new(),
-                stderr: String::new(),
-                formatted_output: String::new(),
-            },
-            Duration::from_millis(1),
-        );
+        cell.complete_call(&call_id, CommandOutput::default(), Duration::from_millis(1));

        // Small width to force wrapping on both lines
        let width: u16 = 28;
@@ -1914,16 +1858,7 @@ mod tests {
            start_time: Some(Instant::now()),
            duration: None,
        });
-        cell.complete_call(
-            &call_id,
-            CommandOutput {
-                exit_code: 0,
-                stdout: String::new(),
-                stderr: String::new(),
-                formatted_output: String::new(),
-            },
-            Duration::from_millis(1),
-        );
+        cell.complete_call(&call_id, CommandOutput::default(), Duration::from_millis(1));
        // Wide enough that it fits inline
        let lines = cell.display_lines(80);
        let rendered = render_lines(&lines).join("\n");
@@ -1942,16 +1877,7 @@ mod tests {
            start_time: Some(Instant::now()),
            duration: None,
        });
-        cell.complete_call(
-            &call_id,
-            CommandOutput {
-                exit_code: 0,
-                stdout: String::new(),
-                stderr: String::new(),
-                formatted_output: String::new(),
-            },
-            Duration::from_millis(1),
-        );
+        cell.complete_call(&call_id, CommandOutput::default(), Duration::from_millis(1));
        let lines = cell.display_lines(24);
        let rendered = render_lines(&lines).join("\n");
        insta::assert_snapshot!(rendered);
@@ -1969,16 +1895,7 @@ mod tests {
            start_time: Some(Instant::now()),
            duration: None,
        });
-        cell.complete_call(
-            &call_id,
-            CommandOutput {
-                exit_code: 0,
-                stdout: String::new(),
-                stderr: String::new(),
-                formatted_output: String::new(),
-            },
-            Duration::from_millis(1),
-        );
+        cell.complete_call(&call_id, CommandOutput::default(), Duration::from_millis(1));
        let lines = cell.display_lines(80);
        let rendered = render_lines(&lines).join("\n");
        insta::assert_snapshot!(rendered);
@@ -1997,16 +1914,7 @@ mod tests {
            start_time: Some(Instant::now()),
            duration: None,
        });
-        cell.complete_call(
-            &call_id,
-            CommandOutput {
-                exit_code: 0,
-                stdout: String::new(),
-                stderr: String::new(),
-                formatted_output: String::new(),
-            },
-            Duration::from_millis(1),
-        );
+        cell.complete_call(&call_id, CommandOutput::default(), Duration::from_millis(1));
        let lines = cell.display_lines(28);
        let rendered = render_lines(&lines).join("\n");
        insta::assert_snapshot!(rendered);
@@ -2033,9 +1941,8 @@ mod tests {
            &call_id,
            CommandOutput {
                exit_code: 1,
-                stdout: String::new(),
-                stderr,
                formatted_output: String::new(),
+                aggregated_output: stderr,
            },
            Duration::from_millis(1),
        );
@@ -2077,9 +1984,8 @@ mod tests {
            &call_id,
            CommandOutput {
                exit_code: 1,
-                stdout: String::new(),
-                stderr,
                formatted_output: String::new(),
+                aggregated_output: stderr,
            },
            Duration::from_millis(5),
        );
--- a/codex-rs/tui/src/key_hint.rs
+++ b/codex-rs/tui/src/key_hint.rs
@@ -6,6 +6,9 @@ use ratatui::style::Style;
 use ratatui::style::Stylize;
 use ratatui::text::Span;

+#[cfg(target_os = "macos")]
+const ALT_PREFIX: &str = "⌥ + ";
+#[cfg(not(target_os = "macos"))]
 const ALT_PREFIX: &str = "alt + ";
 const CTRL_PREFIX: &str = "ctrl + ";
 const SHIFT_PREFIX: &str = "shift + ";
--- a/codex-rs/tui/src/lib.rs
+++ b/codex-rs/tui/src/lib.rs
@@ -148,6 +148,7 @@ pub async fn run_main(
        include_view_image_tool: None,
        show_raw_agent_reasoning: cli.oss.then_some(true),
        tools_web_search_request: cli.web_search.then_some(true),
+        experimental_sandbox_command_assessment: None,
        additional_writable_roots: additional_dirs,
    };
    let raw_overrides = cli.config_overrides.raw_overrides.clone();
--- a/codex-rs/tui/src/pager_overlay.rs
+++ b/codex-rs/tui/src/pager_overlay.rs
@@ -724,8 +724,7 @@ mod tests {
            "exec-1",
            CommandOutput {
                exit_code: 0,
-                stdout: "src\nREADME.md\n".into(),
-                stderr: String::new(),
+                aggregated_output: "src\nREADME.md\n".into(),
                formatted_output: "src\nREADME.md\n".into(),
            },
            Duration::from_millis(420),
--- a/codex-rs/tui/src/snapshots/codex_tui__status_indicator_widgettestsrenders_with_queued_messages@macos.snap
+++ b/codex-rs/tui/src/snapshots/codex_tui__status_indicator_widgettestsrenders_with_queued_messages@macos.snap
@@ -0,0 +1,13 @@
+--- 
+source: tui/src/status_indicator_widget.rs
+assertion_line: 289
+expression: terminal.backend()
+---
+"• Working (0s • esc to interrupt)                                               "
+"                                                                                "
+" ↳ first                                                                        "
+" ↳ second                                                                       "
+"   ⌥ + ↑ edit                                                                   "
+"                                                                                "
+"                                                                                "
+"                                                                                "
--- a/codex-rs/tui/src/status/card.rs
+++ b/codex-rs/tui/src/status/card.rs
@@ -3,6 +3,8 @@ use crate::history_cell::HistoryCell;
 use crate::history_cell::PlainHistoryCell;
 use crate::history_cell::with_border_with_inner_width;
 use crate::version::CODEX_CLI_VERSION;
+use chrono::DateTime;
+use chrono::Local;
 use codex_common::create_config_summary_entries;
 use codex_core::config::Config;
 use codex_core::protocol::SandboxPolicy;
@@ -25,6 +27,7 @@ use super::helpers::format_directory_display;
 use super::helpers::format_tokens_compact;
 use super::rate_limits::RateLimitSnapshotDisplay;
 use super::rate_limits::StatusRateLimitData;
+use super::rate_limits::StatusRateLimitRow;
 use super::rate_limits::compose_rate_limit_data;
 use super::rate_limits::format_status_limit_summary;
 use super::rate_limits::render_status_limit_progress_bar;
@@ -64,9 +67,17 @@ pub(crate) fn new_status_output(
    context_usage: Option<&TokenUsage>,
    session_id: &Option<ConversationId>,
    rate_limits: Option<&RateLimitSnapshotDisplay>,
+    now: DateTime<Local>,
 ) -> CompositeHistoryCell {
    let command = PlainHistoryCell::new(vec!["/status".magenta().into()]);
-    let card = StatusHistoryCell::new(config, total_usage, context_usage, session_id, rate_limits);
+    let card = StatusHistoryCell::new(
+        config,
+        total_usage,
+        context_usage,
+        session_id,
+        rate_limits,
+        now,
+    );

    CompositeHistoryCell::new(vec![Box::new(command), Box::new(card)])
 }
@@ -78,6 +89,7 @@ impl StatusHistoryCell {
        context_usage: Option<&TokenUsage>,
        session_id: &Option<ConversationId>,
        rate_limits: Option<&RateLimitSnapshotDisplay>,
+        now: DateTime<Local>,
    ) -> Self {
        let config_entries = create_config_summary_entries(config);
        let (model_name, model_details) = compose_model_display(config, &config_entries);
@@ -108,7 +120,7 @@ impl StatusHistoryCell {
            output: total_usage.output_tokens,
            context_window,
        };
-        let rate_limits = compose_rate_limit_data(rate_limits);
+        let rate_limits = compose_rate_limit_data(rate_limits, now);

        Self {
            model_name,
@@ -171,47 +183,66 @@ impl StatusHistoryCell {
                    ];
                }

-                let mut lines = Vec::with_capacity(rows_data.len() * 2);
-
-                for row in rows_data {
-                    let value_spans = vec![
-                        Span::from(render_status_limit_progress_bar(row.percent_used)),
-                        Span::from(" "),
-                        Span::from(format_status_limit_summary(row.percent_used)),
-                    ];
-                    let base_spans = formatter.full_spans(row.label.as_str(), value_spans);
-                    let base_line = Line::from(base_spans.clone());
-
-                    if let Some(resets_at) = row.resets_at.as_ref() {
-                        let resets_span = Span::from(format!("(resets {resets_at})")).dim();
-                        let mut inline_spans = base_spans.clone();
-                        inline_spans.push(Span::from(" ").dim());
-                        inline_spans.push(resets_span.clone());
-
-                        if line_display_width(&Line::from(inline_spans.clone()))
-                            <= available_inner_width
-                        {
-                            lines.push(Line::from(inline_spans));
-                        } else {
-                            lines.push(base_line);
-                            lines.push(formatter.continuation(vec![resets_span]));
-                        }
-                    } else {
-                        lines.push(base_line);
-                    }
-                }
-
+                self.rate_limit_row_lines(rows_data, available_inner_width, formatter)
+            }
+            StatusRateLimitData::Stale(rows_data) => {
+                let mut lines =
+                    self.rate_limit_row_lines(rows_data, available_inner_width, formatter);
+                lines.push(formatter.line(
+                    "Warning",
+                    vec![Span::from("limits may be stale - start new turn to refresh.").dim()],
+                ));
                lines
            }
            StatusRateLimitData::Missing => {
                vec![formatter.line(
                    "Limits",
-                    vec![Span::from("send a message to load usage data").dim()],
+                    vec![
+                        Span::from("visit ").dim(),
+                        "chatgpt.com/codex/settings/usage".cyan().underlined(),
+                    ],
                )]
            }
        }
    }

+    fn rate_limit_row_lines(
+        &self,
+        rows: &[StatusRateLimitRow],
+        available_inner_width: usize,
+        formatter: &FieldFormatter,
+    ) -> Vec<Line<'static>> {
+        let mut lines = Vec::with_capacity(rows.len().saturating_mul(2));
+
+        for row in rows {
+            let value_spans = vec![
+                Span::from(render_status_limit_progress_bar(row.percent_used)),
+                Span::from(" "),
+                Span::from(format_status_limit_summary(row.percent_used)),
+            ];
+            let base_spans = formatter.full_spans(row.label.as_str(), value_spans);
+            let base_line = Line::from(base_spans.clone());
+
+            if let Some(resets_at) = row.resets_at.as_ref() {
+                let resets_span = Span::from(format!("(resets {resets_at})")).dim();
+                let mut inline_spans = base_spans.clone();
+                inline_spans.push(Span::from(" ").dim());
+                inline_spans.push(resets_span.clone());
+
+                if line_display_width(&Line::from(inline_spans.clone())) <= available_inner_width {
+                    lines.push(Line::from(inline_spans));
+                } else {
+                    lines.push(base_line);
+                    lines.push(formatter.continuation(vec![resets_span]));
+                }
+            } else {
+                lines.push(base_line);
+            }
+        }
+
+        lines
+    }
+
    fn collect_rate_limit_labels(&self, seen: &mut BTreeSet<String>, labels: &mut Vec<String>) {
        match &self.rate_limits {
            StatusRateLimitData::Available(rows) => {
@@ -223,6 +254,12 @@ impl StatusHistoryCell {
                    }
                }
            }
+            StatusRateLimitData::Stale(rows) => {
+                for row in rows {
+                    push_label(labels, seen, row.label.as_str());
+                }
+                push_label(labels, seen, "Warning");
+            }
            StatusRateLimitData::Missing => push_label(labels, seen, "Limits"),
        }
    }
--- a/codex-rs/tui/src/status/rate_limits.rs
+++ b/codex-rs/tui/src/status/rate_limits.rs
@@ -2,6 +2,7 @@ use crate::chatwidget::get_limits_duration;

 use super::helpers::format_reset_timestamp;
 use chrono::DateTime;
+use chrono::Duration as ChronoDuration;
 use chrono::Local;
 use chrono::Utc;
 use codex_core::protocol::RateLimitSnapshot;
@@ -21,9 +22,12 @@ pub(crate) struct StatusRateLimitRow {
 #[derive(Debug, Clone)]
 pub(crate) enum StatusRateLimitData {
    Available(Vec<StatusRateLimitRow>),
+    Stale(Vec<StatusRateLimitRow>),
    Missing,
 }

+pub(crate) const RATE_LIMIT_STALE_THRESHOLD_MINUTES: i64 = 15;
+
 #[derive(Debug, Clone)]
 pub(crate) struct RateLimitWindowDisplay {
    pub used_percent: f64,
@@ -49,6 +53,7 @@ impl RateLimitWindowDisplay {

 #[derive(Debug, Clone)]
 pub(crate) struct RateLimitSnapshotDisplay {
+    pub captured_at: DateTime<Local>,
    pub primary: Option<RateLimitWindowDisplay>,
    pub secondary: Option<RateLimitWindowDisplay>,
 }
@@ -58,6 +63,7 @@ pub(crate) fn rate_limit_snapshot_display(
    captured_at: DateTime<Local>,
 ) -> RateLimitSnapshotDisplay {
    RateLimitSnapshotDisplay {
+        captured_at,
        primary: snapshot
            .primary
            .as_ref()
@@ -71,6 +77,7 @@ pub(crate) fn rate_limit_snapshot_display(

 pub(crate) fn compose_rate_limit_data(
    snapshot: Option<&RateLimitSnapshotDisplay>,
+    now: DateTime<Local>,
 ) -> StatusRateLimitData {
    match snapshot {
        Some(snapshot) => {
@@ -102,8 +109,13 @@ pub(crate) fn compose_rate_limit_data(
                });
            }

+            let is_stale = now.signed_duration_since(snapshot.captured_at)
+                > ChronoDuration::minutes(RATE_LIMIT_STALE_THRESHOLD_MINUTES);
+
            if rows.is_empty() {
                StatusRateLimitData::Available(vec![])
+            } else if is_stale {
+                StatusRateLimitData::Stale(rows)
            } else {
                StatusRateLimitData::Available(rows)
            }
--- a/codex-rs/tui/src/status/snapshots/codex_tuistatustests__status_snapshot_shows_missing_limits_message.snap
+++ b/codex-rs/tui/src/status/snapshots/codex_tuistatustests__status_snapshot_shows_missing_limits_message.snap
@@ -15,5 +15,5 @@ expression: sanitized
 │                                                                 │
 │  Token usage:      750 total  (500 input + 250 output)          │
 │  Context window:   100% left (750 used / 272K)                  │
-│  Limits:           send a message to load usage data            │
+│  Limits:           visit chatgpt.com/codex/settings/usage       │
 ╰─────────────────────────────────────────────────────────────────╯
--- a/codex-rs/tui/src/status/snapshots/codex_tuistatustests__status_snapshot_shows_stale_limits_message.snap
+++ b/codex-rs/tui/src/status/snapshots/codex_tuistatustests__status_snapshot_shows_stale_limits_message.snap
@@ -0,0 +1,21 @@
+---
+source: tui/src/status/tests.rs
+expression: sanitized
+---
+/status
+
+╭─────────────────────────────────────────────────────────────────────╮
+│  >_ OpenAI Codex (v0.0.0)                                           │
+│                                                                     │
+│  Model:            gpt-5-codex (reasoning none, summaries auto)     │
+│  Directory: [[workspace]]                                           │
+│  Approval:         on-request                                       │
+│  Sandbox:          read-only                                        │
+│  Agents.md:        <none>                                           │
+│                                                                     │
+│  Token usage:      1.9K total  (1K input + 900 output)              │
+│  Context window:   100% left (2.1K used / 272K)                     │
+│  5h limit:         [███████████████░░░░░] 72% used (resets 03:14)   │
+│  Weekly limit:     [████████░░░░░░░░░░░░] 40% used (resets 03:34)   │
+│  Warning:          limits may be stale - start new turn to refresh. │
+╰─────────────────────────────────────────────────────────────────────╯
--- a/codex-rs/tui/src/status/tests.rs
+++ b/codex-rs/tui/src/status/tests.rs
@@ -111,7 +111,14 @@ fn status_snapshot_includes_reasoning_details() {
    };
    let rate_display = rate_limit_snapshot_display(&snapshot, captured_at);

-    let composite = new_status_output(&config, &usage, Some(&usage), &None, Some(&rate_display));
+    let composite = new_status_output(
+        &config,
+        &usage,
+        Some(&usage),
+        &None,
+        Some(&rate_display),
+        captured_at,
+    );
    let mut rendered_lines = render_lines(&composite.display_lines(80));
    if cfg!(windows) {
        for line in &mut rendered_lines {
@@ -152,7 +159,14 @@ fn status_snapshot_includes_monthly_limit() {
    };
    let rate_display = rate_limit_snapshot_display(&snapshot, captured_at);

-    let composite = new_status_output(&config, &usage, Some(&usage), &None, Some(&rate_display));
+    let composite = new_status_output(
+        &config,
+        &usage,
+        Some(&usage),
+        &None,
+        Some(&rate_display),
+        captured_at,
+    );
    let mut rendered_lines = render_lines(&composite.display_lines(80));
    if cfg!(windows) {
        for line in &mut rendered_lines {
@@ -178,7 +192,12 @@ fn status_card_token_usage_excludes_cached_tokens() {
        total_tokens: 2_100,
    };

-    let composite = new_status_output(&config, &usage, Some(&usage), &None, None);
+    let now = chrono::Local
+        .with_ymd_and_hms(2024, 1, 1, 0, 0, 0)
+        .single()
+        .expect("timestamp");
+
+    let composite = new_status_output(&config, &usage, Some(&usage), &None, None, now);
    let rendered = render_lines(&composite.display_lines(120));

    assert!(
@@ -219,7 +238,14 @@ fn status_snapshot_truncates_in_narrow_terminal() {
    };
    let rate_display = rate_limit_snapshot_display(&snapshot, captured_at);

-    let composite = new_status_output(&config, &usage, Some(&usage), &None, Some(&rate_display));
+    let composite = new_status_output(
+        &config,
+        &usage,
+        Some(&usage),
+        &None,
+        Some(&rate_display),
+        captured_at,
+    );
    let mut rendered_lines = render_lines(&composite.display_lines(46));
    if cfg!(windows) {
        for line in &mut rendered_lines {
@@ -246,7 +272,12 @@ fn status_snapshot_shows_missing_limits_message() {
        total_tokens: 750,
    };

-    let composite = new_status_output(&config, &usage, Some(&usage), &None, None);
+    let now = chrono::Local
+        .with_ymd_and_hms(2024, 2, 3, 4, 5, 6)
+        .single()
+        .expect("timestamp");
+
+    let composite = new_status_output(&config, &usage, Some(&usage), &None, None, now);
    let mut rendered_lines = render_lines(&composite.display_lines(80));
    if cfg!(windows) {
        for line in &mut rendered_lines {
@@ -282,7 +313,66 @@ fn status_snapshot_shows_empty_limits_message() {
        .expect("timestamp");
    let rate_display = rate_limit_snapshot_display(&snapshot, captured_at);

-    let composite = new_status_output(&config, &usage, Some(&usage), &None, Some(&rate_display));
+    let composite = new_status_output(
+        &config,
+        &usage,
+        Some(&usage),
+        &None,
+        Some(&rate_display),
+        captured_at,
+    );
+    let mut rendered_lines = render_lines(&composite.display_lines(80));
+    if cfg!(windows) {
+        for line in &mut rendered_lines {
+            *line = line.replace('\\', "/");
+        }
+    }
+    let sanitized = sanitize_directory(rendered_lines).join("\n");
+    assert_snapshot!(sanitized);
+}
+
+#[test]
+fn status_snapshot_shows_stale_limits_message() {
+    let temp_home = TempDir::new().expect("temp home");
+    let mut config = test_config(&temp_home);
+    config.model = "gpt-5-codex".to_string();
+    config.cwd = PathBuf::from("/workspace/tests");
+
+    let usage = TokenUsage {
+        input_tokens: 1_200,
+        cached_input_tokens: 200,
+        output_tokens: 900,
+        reasoning_output_tokens: 150,
+        total_tokens: 2_250,
+    };
+
+    let captured_at = chrono::Local
+        .with_ymd_and_hms(2024, 1, 2, 3, 4, 5)
+        .single()
+        .expect("timestamp");
+    let snapshot = RateLimitSnapshot {
+        primary: Some(RateLimitWindow {
+            used_percent: 72.5,
+            window_minutes: Some(300),
+            resets_at: Some(reset_at_from(&captured_at, 600)),
+        }),
+        secondary: Some(RateLimitWindow {
+            used_percent: 40.0,
+            window_minutes: Some(10_080),
+            resets_at: Some(reset_at_from(&captured_at, 1_800)),
+        }),
+    };
+    let rate_display = rate_limit_snapshot_display(&snapshot, captured_at);
+    let now = captured_at + ChronoDuration::minutes(20);
+
+    let composite = new_status_output(
+        &config,
+        &usage,
+        Some(&usage),
+        &None,
+        Some(&rate_display),
+        now,
+    );
    let mut rendered_lines = render_lines(&composite.display_lines(80));
    if cfg!(windows) {
        for line in &mut rendered_lines {
@@ -314,7 +404,12 @@ fn status_context_window_uses_last_usage() {
        total_tokens: 13_679,
    };

-    let composite = new_status_output(&config, &total_usage, Some(&last_usage), &None, None);
+    let now = chrono::Local
+        .with_ymd_and_hms(2024, 6, 1, 12, 0, 0)
+        .single()
+        .expect("timestamp");
+
+    let composite = new_status_output(&config, &total_usage, Some(&last_usage), &None, None, now);
    let rendered_lines = render_lines(&composite.display_lines(80));
    let context_line = rendered_lines
        .into_iter()
--- a/codex-rs/tui/src/status_indicator_widget.rs
+++ b/codex-rs/tui/src/status_indicator_widget.rs
@@ -284,6 +284,11 @@ mod tests {
        terminal
            .draw(|f| w.render_ref(f.area(), f.buffer_mut()))
            .expect("draw");
+        #[cfg(target_os = "macos")]
+        insta::with_settings!({ snapshot_suffix => "macos" }, {
+            insta::assert_snapshot!(terminal.backend());
+        });
+        #[cfg(not(target_os = "macos"))]
        insta::assert_snapshot!(terminal.backend());
    }

--- a/codex-rs/utils/tokenizer/src/lib.rs
+++ b/codex-rs/utils/tokenizer/src/lib.rs
@@ -55,8 +55,13 @@ impl Tokenizer {
        Ok(Self { inner })
    }

+    /// Default to `O200kBase`
+    pub fn try_default() -> Result<Self, TokenizerError> {
+        Self::new(EncodingKind::O200kBase)
+    }
+
    /// Build a tokenizer using an `OpenAI` model name (maps to an encoding).
-    /// Falls back to the `o200k_base` encoding when the model is unknown.
+    /// Falls back to the `O200kBase` encoding when the model is unknown.
    pub fn for_model(model: &str) -> Result<Self, TokenizerError> {
        match tiktoken_rs::get_bpe_from_model(model) {
            Ok(inner) => Ok(Self { inner }),
--- a/docs/faq.md
+++ b/docs/faq.md
@@ -42,3 +42,14 @@ Running Codex directly on Windows may work, but is not officially supported. We
 ### Where should I start after installation?

 Follow the quick setup in [Install & build](./install.md) and then jump into [Getting started](./getting-started.md) for interactive usage tips, prompt examples, and AGENTS.md guidance.
+
+### `brew upgrade codex` isn't upgrading me
+
+If you're running Codex v0.46.0 or older, `brew upgrade codex` will not move you to the latest version because we migrated from a Homebrew formula to a cask. To upgrade, uninstall the existing oudated formula and then install the new cask:
+
+```bash
+brew uninstall --formula codex
+brew install --cask codex
+```
+
+After reinstalling, `brew upgrade --cask codex` will keep future releases up to date.
Author	SHA1	Message	Date
Michael Bolin	4a62376e6b	fix: attempting to resume an existing conversation in ConversationManager should reuse it	2025-10-24 16:12:43 -07:00
Eric Traut	f8af4f5c8d	Added model summary and risk assessment for commands that violate sandbox policy (#5536 ) This PR adds support for a model-based summary and risk assessment for commands that violate the sandbox policy and require user approval. This aids the user in evaluating whether the command should be approved. The feature works by taking a failed command and passing it back to the model and asking it to summarize the command, give it a risk level (low, medium, high) and a risk category (e.g. "data deletion" or "data exfiltration"). It uses a new conversation thread so the context in the existing thread doesn't influence the answer. If the call to the model fails or takes longer than 5 seconds, it falls back to the current behavior. For now, this is an experimental feature and is gated by a config key `experimental_sandbox_command_assessment`. Here is a screen shot of the approval prompt showing the risk assessment and summary. <img width="723" height="282" alt="image" src="https://github.com/user-attachments/assets/4597dd7c-d5a0-4e9f-9d13-414bd082fd6b" />	2025-10-24 15:23:44 -07:00
pakrym-oai	a4be4d78b9	Log more types of request IDs (#5645 ) Different services return different sets of IDs, log all of them to simplify debugging.	2025-10-24 19:12:03 +00:00
Shijie Rao	00c1de0c56	Add instruction for upgrading codex with brew (#5640 ) Include instruction for upgrading codex with brew when there is switch from formula to cask.	2025-10-24 11:30:34 -07:00
Owen Lin	190e7eb104	[app-server] fix account/read response annotation (#5642 ) The API schema export is currently broken: ``` > cargo run -p codex-app-server-protocol --bin export -- --out DIR Error: this type cannot be exported ``` This PR fixes the error message so we get more info: ``` > cargo run -p codex-app-server-protocol --bin export -- --out DIR Error: failed to export client responses: dependency core::option::Option<codex_protocol::account::Account> cannot be exported ``` And fixes the root cause which is the `account/read` response.	2025-10-24 11:17:46 -07:00
pakrym-oai	061862a0e2	Add CodexHttpClient wrapper with request logging (#5564 ) ## Summary - wrap the default reqwest::Client inside a new CodexHttpClient/CodexRequestBuilder pair and log the HTTP method, URL, and status for each request - update the auth/model/provider plumbing to use the new builder helpers so headers and bearer auth continue to be applied consistently - add the shared `http` dependency that backs the header conversion helpers ## Testing - `CODEX_SANDBOX=seatbelt CODEX_SANDBOX_NETWORK_DISABLED=1 cargo test -p codex-core` - `CODEX_SANDBOX=seatbelt CODEX_SANDBOX_NETWORK_DISABLED=1 cargo test -p codex-chatgpt` - `CODEX_SANDBOX=seatbelt CODEX_SANDBOX_NETWORK_DISABLED=1 cargo test -p codex-tui` ------ https://chatgpt.com/codex/tasks/task_i_68fa5038c17483208b1148661c5873be	2025-10-24 09:47:52 -07:00
zhao-oai	c72b2ad766	adding messaging for stale rate limits + when no rate limits are cached (#5570 )	2025-10-24 08:46:31 -07:00
jif-oai	80783a7bb9	fix: flaky tests (#5625 )	2025-10-24 13:56:41 +01:00
Gabriel Peal	ed77d2d977	[MCP] Improve startup errors for timeouts and github (#5595 ) 1. I have seen too many reports of people hitting startup timeout errors and thinking Codex is broken. Hopefully this will help people self-serve. We may also want to consider raising the timeout to ~15s. 2. Make it more clear what PAT is (personal access token) in the GitHub error <img width="2378" height="674" alt="CleanShot 2025-10-23 at 22 05 06" src="https://github.com/user-attachments/assets/d148ce1d-ade3-4511-84a4-c164aefdb5c5" />	2025-10-24 01:54:45 -04:00
Gabriel Peal	abccd3e367	[MCP] Update rmcp to 0.8.3 (#5542 ) Picks up modelcontextprotocol/rust-sdk#497 which fixes #5208 by allowing 204 response to MCP initialize notifications instead of just 202.	2025-10-23 20:45:29 -07:00
Ahmed Ibrahim	0f4fd33ddd	Moving `token_info` to `ConversationHistory` (#5581 ) I want to centralize input processing and management to `ConversationHistory`. This would need `ConversationHistory` to have access to `token_info` (i.e. preventing adding a big input to the history). Besides, it makes more sense to have it on `ConversationHistory` than `state`.	2025-10-23 20:30:58 -07:00
Josh McKinney	e258f0f044	Use Option symbol for mac key hints (#5582 ) ## Summary - show the Option (⌥) symbol in key hints when the TUI is built for macOS so the shortcut text matches the platform terminology ## Testing - cargo test -p codex-tui ------ https://chatgpt.com/codex/tasks/task_i_68fab7505530832992780a9e13fb707b	2025-10-23 20:04:15 -07:00
jif-oai	a6b9471548	feat: end events on unified exec (#5551 )	2025-10-23 18:51:34 +01:00
Thibault Sottiaux	3059373e06	fix: resume lookup for gitignored CODEX_HOME (#5311 ) Walk the sessions tree instead of using file_search so gitignored CODEX_HOME directories can resume sessions. Add a regression test that covers a .gitignore'd sessions directory. Fixes #5247 Fixes #5412 --------- Co-authored-by: Owen Lin <owen@openai.com>	2025-10-23 17:04:40 +00:00
jif-oai	0b4527146e	feat: use actual tokenizer for unified_exec truncation (#5514 )	2025-10-23 17:08:06 +01:00
jif-oai	6745b12427	chore: testing on apply_path (#5557 )	2025-10-23 17:00:48 +01:00
Ahmed Ibrahim	f59978ed3d	Handle cancelling/aborting while processing a turn (#5543 ) Currently we collect all all turn items in a vector, then we add it to the history on success. This result in losing those items on errors including aborting `ctrl+c`. This PR: - Adds the ability for the tool call to handle cancellation - bubble the turn items up to where we are recording this info Admittedly, this logic is an ad-hoc logic that doesn't handle a lot of error edge cases. The right thing to do is recording to the history on the spot as `items`/`tool calls output` come. However, this isn't possible because of having different `task_kind` that has different `conversation_histories`. The `try_run_turn` has no idea what thread are we using. We cannot also pass an `arc` to the `conversation_histories` because it's a private element of `state`. That's said, `abort` is the most common case and we should cover it until we remove `task kind`	2025-10-23 08:47:10 -07:00
Jeremy Rose	3ab6028e80	tui: show aggregated output in display (#5539 ) This shows the aggregated (stdout + stderr) buffer regardless of exit code. Many commands output useful / relevant info on stdout when returning a non-zero exit code, or the same on stderr when returning an exit code of 0. Often, useful info is present on both stdout AND stderr. Also, the model sees both. So it is confusing to see commands listed as "(no output)" that in fact do have output, just on the stream that doesn't match the exit status, or to see some sort of trivial output like "Tests failed" but lacking any information about the actual failure. As such, always display the aggregated output in the display. Transcript mode remains unchanged as it was already displaying the text that the model sees, which seems correct for transcript mode.	2025-10-23 08:05:08 -07:00
jif-oai	892eaff46d	fix: approval issue (#5525 )	2025-10-23 11:13:53 +01:00
jif-oai	8e291a1706	chore: clean `handle_container_exec_with_params` (#5516 ) Drop `handle_container_exec_with_params` to have simpler and more straight forward execution path	2025-10-23 09:24:01 +01:00
Owen Lin	aee321f62b	[app-server] add new account method API stubs (#5527 ) These are the schema definitions for the new JSON-RPC APIs associated with accounts. These are not wired up to business logic yet and will currently throw an internal error indicating these are unimplemented.	2025-10-22 15:36:11 -07:00
Genki Takiuchi	ed32da04d7	Fix IME submissions dropping leading digits (#4359 ) - ensure paste burst flush preserves ASCII characters before IME commits - add regression test covering digit followed by Japanese text submission Fixes openai/codex#4356 Co-authored-by: Josh McKinney <joshka@openai.com>	2025-10-22 22:18:17 +00:00
Owen Lin	8ae3949072	[app-server] send account/rateLimits/updated notifications (#5477 ) Codex will now send an `account/rateLimits/updated` notification whenever the user's rate limits are updated. This is implemented by just transforming the existing TokenCount event.	2025-10-22 20:12:40 +00:00