Ground spec generation with READ/SEARCH/GREP_FILES tooling

Preserve indent for fenced mermaid blocks
Accept new chat_small_running_h3 snapshot after mermaid fixes
2026-02-06 00:43:40 +00:00 · 2025-11-13 15:50:38 -08:00 · 2025-11-13 15:28:12 -08:00 · 2025-11-13 15:24:06 -08:00 · 2025-11-13 15:23:39 -08:00 · 2025-11-12 15:52:07 -08:00
48 changed files with 19690 additions and 164 deletions
--- a/codex-rs/Cargo.lock
+++ b/codex-rs/Cargo.lock
@@ -1452,6 +1452,7 @@ dependencies = [
 "diffy",
 "dirs",
 "dunce",
+ "futures",
 "image",
 "insta",
 "itertools 0.14.0",
@@ -1465,7 +1466,9 @@ dependencies = [
 "rand 0.9.2",
 "ratatui",
 "ratatui-macros",
+ "regex",
 "regex-lite",
+ "reqwest",
 "serde",
 "serde_json",
 "serial_test",
@@ -1475,6 +1478,7 @@ dependencies = [
 "supports-color",
 "tempfile",
 "textwrap 0.16.2",
+ "time",
 "tokio",
 "tokio-stream",
 "toml",
--- a/codex-rs/Cargo.toml
+++ b/codex-rs/Cargo.toml
@@ -159,6 +159,7 @@ rand = "0.9"
 ratatui = "0.29.0"
 ratatui-macros = "0.6.0"
 regex-lite = "0.1.7"
+regex = "1.11.1"
 reqwest = "0.12"
 rmcp = { version = "0.8.5", default-features = false }
 schemars = "0.8.22"
--- a/codex-rs/app-server-protocol/src/protocol/v2.rs
+++ b/codex-rs/app-server-protocol/src/protocol/v2.rs
@@ -8,6 +8,7 @@ use codex_protocol::config_types::ReasoningEffort;
 use codex_protocol::config_types::ReasoningSummary;
 use codex_protocol::items::AgentMessageContent as CoreAgentMessageContent;
 use codex_protocol::items::TurnItem as CoreTurnItem;
+use codex_protocol::models::ResponseItem;
 use codex_protocol::protocol::RateLimitSnapshot as CoreRateLimitSnapshot;
 use codex_protocol::protocol::RateLimitWindow as CoreRateLimitWindow;
 use codex_protocol::user_input::UserInput as CoreUserInput;
@@ -290,11 +291,39 @@ pub struct ThreadStartResponse {
    pub thread: Thread,
 }

-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq, JsonSchema, TS)]
 #[serde(rename_all = "camelCase")]
 #[ts(export_to = "v2/")]
+/// There are three ways to resume a thread:
+/// 1. By thread_id: load the thread from disk by thread_id and resume it.
+/// 2. By history: instantiate the thread from memory and resume it.
+/// 3. By path: load the thread from disk by path and resume it.
+///
+/// The precedence is: history > path > thread_id.
+/// If using history or path, the thread_id param will be ignored.
+///
+/// Prefer using thread_id whenever possible.
 pub struct ThreadResumeParams {
    pub thread_id: String,
+
+    /// [UNSTABLE] FOR CODEX CLOUD - DO NOT USE.
+    /// If specified, the thread will be resumed with the provided history
+    /// instead of loaded from disk.
+    pub history: Option<Vec<ResponseItem>>,
+
+    /// [UNSTABLE] Specify the rollout path to resume from.
+    /// If specified, the thread_id param will be ignored.
+    pub path: Option<PathBuf>,
+
+    /// Configuration overrides for the resumed thread, if any.
+    pub model: Option<String>,
+    pub model_provider: Option<String>,
+    pub cwd: Option<String>,
+    pub approval_policy: Option<AskForApproval>,
+    pub sandbox: Option<SandboxMode>,
+    pub config: Option<HashMap<String, serde_json::Value>>,
+    pub base_instructions: Option<String>,
+    pub developer_instructions: Option<String>,
 }

 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
@@ -361,6 +390,8 @@ pub struct Thread {
    pub model_provider: String,
    /// Unix timestamp (in seconds) when the thread was created.
    pub created_at: i64,
+    /// [UNSTABLE] Path to the thread on disk.
+    pub path: PathBuf,
 }

 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
--- a/codex-rs/app-server/src/codex_message_processor.rs
+++ b/codex-rs/app-server/src/codex_message_processor.rs
@@ -1201,21 +1201,17 @@ impl CodexMessageProcessor {
    }

    async fn thread_start(&mut self, request_id: RequestId, params: ThreadStartParams) {
-        // Build ConfigOverrides directly from ThreadStartParams for config derivation.
-        let cli_overrides = params.config;
-        let overrides = ConfigOverrides {
-            model: params.model,
-            cwd: params.cwd.map(PathBuf::from),
-            approval_policy: params.approval_policy.map(AskForApproval::to_core),
-            sandbox_mode: params.sandbox.map(SandboxMode::to_core),
-            model_provider: params.model_provider,
-            codex_linux_sandbox_exe: self.codex_linux_sandbox_exe.clone(),
-            base_instructions: params.base_instructions,
-            developer_instructions: params.developer_instructions,
-            ..Default::default()
-        };
+        let overrides = self.build_thread_config_overrides(
+            params.model,
+            params.model_provider,
+            params.cwd,
+            params.approval_policy,
+            params.sandbox,
+            params.base_instructions,
+            params.developer_instructions,
+        );

-        let config = match derive_config_from_params(overrides, cli_overrides).await {
+        let config = match derive_config_from_params(overrides, params.config).await {
            Ok(config) => config,
            Err(err) => {
                let error = JSONRPCErrorError {
@@ -1244,16 +1240,15 @@ impl CodexMessageProcessor {
                {
                    Ok(summary) => summary_to_thread(summary),
                    Err(err) => {
-                        warn!(
-                            "failed to load summary for new thread {}: {}",
-                            conversation_id, err
-                        );
-                        Thread {
-                            id: conversation_id.to_string(),
-                            preview: String::new(),
-                            model_provider: self.config.model_provider_id.clone(),
-                            created_at: chrono::Utc::now().timestamp(),
-                        }
+                        self.send_internal_error(
+                            request_id,
+                            format!(
+                                "failed to load rollout `{}` for conversation {conversation_id}: {err}",
+                                rollout_path.display()
+                            ),
+                        )
+                        .await;
+                        return;
                    }
                };

@@ -1292,6 +1287,31 @@ impl CodexMessageProcessor {
        }
    }

+    #[allow(clippy::too_many_arguments)]
+    fn build_thread_config_overrides(
+        &self,
+        model: Option<String>,
+        model_provider: Option<String>,
+        cwd: Option<String>,
+        approval_policy: Option<codex_app_server_protocol::AskForApproval>,
+        sandbox: Option<SandboxMode>,
+        base_instructions: Option<String>,
+        developer_instructions: Option<String>,
+    ) -> ConfigOverrides {
+        ConfigOverrides {
+            model,
+            model_provider,
+            cwd: cwd.map(PathBuf::from),
+            approval_policy: approval_policy
+                .map(codex_app_server_protocol::AskForApproval::to_core),
+            sandbox_mode: sandbox.map(SandboxMode::to_core),
+            codex_linux_sandbox_exe: self.codex_linux_sandbox_exe.clone(),
+            base_instructions,
+            developer_instructions,
+            ..Default::default()
+        }
+    }
+
    async fn thread_archive(&mut self, request_id: RequestId, params: ThreadArchiveParams) {
        let conversation_id = match ConversationId::from_string(&params.thread_id) {
            Ok(id) => id,
@@ -1374,88 +1394,147 @@ impl CodexMessageProcessor {
    }

    async fn thread_resume(&mut self, request_id: RequestId, params: ThreadResumeParams) {
-        let conversation_id = match ConversationId::from_string(&params.thread_id) {
-            Ok(id) => id,
-            Err(err) => {
-                let error = JSONRPCErrorError {
-                    code: INVALID_REQUEST_ERROR_CODE,
-                    message: format!("invalid thread id: {err}"),
-                    data: None,
-                };
-                self.outgoing.send_error(request_id, error).await;
+        let ThreadResumeParams {
+            thread_id,
+            history,
+            path,
+            model,
+            model_provider,
+            cwd,
+            approval_policy,
+            sandbox,
+            config: cli_overrides,
+            base_instructions,
+            developer_instructions,
+        } = params;
+
+        let overrides_requested = model.is_some()
+            || model_provider.is_some()
+            || cwd.is_some()
+            || approval_policy.is_some()
+            || sandbox.is_some()
+            || cli_overrides.is_some()
+            || base_instructions.is_some()
+            || developer_instructions.is_some();
+
+        let config = if overrides_requested {
+            let overrides = self.build_thread_config_overrides(
+                model,
+                model_provider,
+                cwd,
+                approval_policy,
+                sandbox,
+                base_instructions,
+                developer_instructions,
+            );
+            match derive_config_from_params(overrides, cli_overrides).await {
+                Ok(config) => config,
+                Err(err) => {
+                    let error = JSONRPCErrorError {
+                        code: INVALID_REQUEST_ERROR_CODE,
+                        message: format!("error deriving config: {err}"),
+                        data: None,
+                    };
+                    self.outgoing.send_error(request_id, error).await;
+                    return;
+                }
+            }
+        } else {
+            self.config.as_ref().clone()
+        };
+
+        let conversation_history = if let Some(history) = history {
+            if history.is_empty() {
+                self.send_invalid_request_error(
+                    request_id,
+                    "history must not be empty".to_string(),
+                )
+                .await;
                return;
            }
+            InitialHistory::Forked(history.into_iter().map(RolloutItem::ResponseItem).collect())
+        } else if let Some(path) = path {
+            match RolloutRecorder::get_rollout_history(&path).await {
+                Ok(initial_history) => initial_history,
+                Err(err) => {
+                    self.send_invalid_request_error(
+                        request_id,
+                        format!("failed to load rollout `{}`: {err}", path.display()),
+                    )
+                    .await;
+                    return;
+                }
+            }
+        } else {
+            let existing_conversation_id = match ConversationId::from_string(&thread_id) {
+                Ok(id) => id,
+                Err(err) => {
+                    let error = JSONRPCErrorError {
+                        code: INVALID_REQUEST_ERROR_CODE,
+                        message: format!("invalid thread id: {err}"),
+                        data: None,
+                    };
+                    self.outgoing.send_error(request_id, error).await;
+                    return;
+                }
+            };
+
+            let path = match find_conversation_path_by_id_str(
+                &self.config.codex_home,
+                &existing_conversation_id.to_string(),
+            )
+            .await
+            {
+                Ok(Some(p)) => p,
+                Ok(None) => {
+                    self.send_invalid_request_error(
+                        request_id,
+                        format!("no rollout found for conversation id {existing_conversation_id}"),
+                    )
+                    .await;
+                    return;
+                }
+                Err(err) => {
+                    self.send_invalid_request_error(
+                        request_id,
+                        format!(
+                            "failed to locate conversation id {existing_conversation_id}: {err}"
+                        ),
+                    )
+                    .await;
+                    return;
+                }
+            };
+
+            match RolloutRecorder::get_rollout_history(&path).await {
+                Ok(initial_history) => initial_history,
+                Err(err) => {
+                    self.send_invalid_request_error(
+                        request_id,
+                        format!("failed to load rollout `{}`: {err}", path.display()),
+                    )
+                    .await;
+                    return;
+                }
+            }
        };

-        let path = match find_conversation_path_by_id_str(
-            &self.config.codex_home,
-            &conversation_id.to_string(),
-        )
-        .await
-        {
-            Ok(Some(p)) => p,
-            Ok(None) => {
-                let error = JSONRPCErrorError {
-                    code: INVALID_REQUEST_ERROR_CODE,
-                    message: format!("no rollout found for conversation id {conversation_id}"),
-                    data: None,
-                };
-                self.outgoing.send_error(request_id, error).await;
-                return;
-            }
-            Err(err) => {
-                let error = JSONRPCErrorError {
-                    code: INVALID_REQUEST_ERROR_CODE,
-                    message: format!("failed to locate conversation id {conversation_id}: {err}"),
-                    data: None,
-                };
-                self.outgoing.send_error(request_id, error).await;
-                return;
-            }
-        };
-
-        let fallback_provider = self.config.model_provider_id.as_str();
-        let summary = match read_summary_from_rollout(&path, fallback_provider).await {
-            Ok(s) => s,
-            Err(err) => {
-                let error = JSONRPCErrorError {
-                    code: INVALID_REQUEST_ERROR_CODE,
-                    message: format!("failed to load rollout `{}`: {err}", path.display()),
-                    data: None,
-                };
-                self.outgoing.send_error(request_id, error).await;
-                return;
-            }
-        };
-
-        let initial_history = match RolloutRecorder::get_rollout_history(&summary.path).await {
-            Ok(initial_history) => initial_history,
-            Err(err) => {
-                let error = JSONRPCErrorError {
-                    code: INVALID_REQUEST_ERROR_CODE,
-                    message: format!(
-                        "failed to load rollout `{}` for conversation {conversation_id}: {err}",
-                        summary.path.display()
-                    ),
-                    data: None,
-                };
-                self.outgoing.send_error(request_id, error).await;
-                return;
-            }
-        };
+        let fallback_model_provider = config.model_provider_id.clone();

        match self
            .conversation_manager
            .resume_conversation_with_history(
-                self.config.as_ref().clone(),
-                initial_history,
+                config,
+                conversation_history,
                self.auth_manager.clone(),
            )
            .await
        {
-            Ok(_) => {
-                let thread = summary_to_thread(summary);
-
+            Ok(NewConversation {
+                conversation_id,
+                session_configured,
+                ..
+            }) => {
                // Auto-attach a conversation listener when resuming a thread.
                if let Err(err) = self
                    .attach_conversation_listener(conversation_id, false)
@@ -1468,6 +1547,25 @@ impl CodexMessageProcessor {
                    );
                }

+                let thread = match read_summary_from_rollout(
+                    session_configured.rollout_path.as_path(),
+                    fallback_model_provider.as_str(),
+                )
+                .await
+                {
+                    Ok(summary) => summary_to_thread(summary),
+                    Err(err) => {
+                        self.send_internal_error(
+                            request_id,
+                            format!(
+                                "failed to load rollout `{}` for conversation {conversation_id}: {err}",
+                                session_configured.rollout_path.display()
+                            ),
+                        )
+                        .await;
+                        return;
+                    }
+                };
                let response = ThreadResumeResponse { thread };
                self.outgoing.send_response(request_id, response).await;
            }
@@ -1878,6 +1976,15 @@ impl CodexMessageProcessor {
        self.outgoing.send_error(request_id, error).await;
    }

+    async fn send_internal_error(&self, request_id: RequestId, message: String) {
+        let error = JSONRPCErrorError {
+            code: INTERNAL_ERROR_CODE,
+            message,
+            data: None,
+        };
+        self.outgoing.send_error(request_id, error).await;
+    }
+
    async fn archive_conversation(
        &mut self,
        request_id: RequestId,
@@ -2867,6 +2974,7 @@ fn parse_datetime(timestamp: Option<&str>) -> Option<DateTime<Utc>> {
 fn summary_to_thread(summary: ConversationSummary) -> Thread {
    let ConversationSummary {
        conversation_id,
+        path,
        preview,
        timestamp,
        model_provider,
@@ -2880,6 +2988,7 @@ fn summary_to_thread(summary: ConversationSummary) -> Thread {
        preview,
        model_provider,
        created_at: created_at.map(|dt| dt.timestamp()).unwrap_or(0),
+        path,
    }
 }

--- a/codex-rs/app-server/tests/suite/v2/thread_resume.rs
+++ b/codex-rs/app-server/tests/suite/v2/thread_resume.rs
@@ -8,13 +8,15 @@ use codex_app_server_protocol::ThreadResumeParams;
 use codex_app_server_protocol::ThreadResumeResponse;
 use codex_app_server_protocol::ThreadStartParams;
 use codex_app_server_protocol::ThreadStartResponse;
+use codex_protocol::models::ContentItem;
+use codex_protocol::models::ResponseItem;
 use tempfile::TempDir;
 use tokio::time::timeout;

 const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);

 #[tokio::test]
-async fn thread_resume_returns_existing_thread() -> Result<()> {
+async fn thread_resume_returns_original_thread() -> Result<()> {
    let server = create_mock_chat_completions_server(vec![]).await;
    let codex_home = TempDir::new()?;
    create_config_toml(codex_home.path(), &server.uri())?;
@@ -40,6 +42,7 @@ async fn thread_resume_returns_existing_thread() -> Result<()> {
    let resume_id = mcp
        .send_thread_resume_request(ThreadResumeParams {
            thread_id: thread.id.clone(),
+            ..Default::default()
        })
        .await?;
    let resume_resp: JSONRPCResponse = timeout(
@@ -54,6 +57,105 @@ async fn thread_resume_returns_existing_thread() -> Result<()> {
    Ok(())
 }

+#[tokio::test]
+async fn thread_resume_prefers_path_over_thread_id() -> Result<()> {
+    let server = create_mock_chat_completions_server(vec![]).await;
+    let codex_home = TempDir::new()?;
+    create_config_toml(codex_home.path(), &server.uri())?;
+
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
+
+    let start_id = mcp
+        .send_thread_start_request(ThreadStartParams {
+            model: Some("gpt-5-codex".to_string()),
+            ..Default::default()
+        })
+        .await?;
+    let start_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(start_id)),
+    )
+    .await??;
+    let ThreadStartResponse { thread } = to_response::<ThreadStartResponse>(start_resp)?;
+
+    let thread_path = thread.path.clone();
+    let resume_id = mcp
+        .send_thread_resume_request(ThreadResumeParams {
+            thread_id: "not-a-valid-thread-id".to_string(),
+            path: Some(thread_path),
+            ..Default::default()
+        })
+        .await?;
+
+    let resume_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(resume_id)),
+    )
+    .await??;
+    let ThreadResumeResponse { thread: resumed } =
+        to_response::<ThreadResumeResponse>(resume_resp)?;
+    assert_eq!(resumed, thread);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn thread_resume_supports_history_and_overrides() -> Result<()> {
+    let server = create_mock_chat_completions_server(vec![]).await;
+    let codex_home = TempDir::new()?;
+    create_config_toml(codex_home.path(), &server.uri())?;
+
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
+
+    // Start a thread.
+    let start_id = mcp
+        .send_thread_start_request(ThreadStartParams {
+            model: Some("gpt-5-codex".to_string()),
+            ..Default::default()
+        })
+        .await?;
+    let start_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(start_id)),
+    )
+    .await??;
+    let ThreadStartResponse { thread } = to_response::<ThreadStartResponse>(start_resp)?;
+
+    let history_text = "Hello from history";
+    let history = vec![ResponseItem::Message {
+        id: None,
+        role: "user".to_string(),
+        content: vec![ContentItem::InputText {
+            text: history_text.to_string(),
+        }],
+    }];
+
+    // Resume with explicit history and override the model.
+    let resume_id = mcp
+        .send_thread_resume_request(ThreadResumeParams {
+            thread_id: thread.id,
+            history: Some(history),
+            model: Some("mock-model".to_string()),
+            model_provider: Some("mock_provider".to_string()),
+            ..Default::default()
+        })
+        .await?;
+    let resume_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(resume_id)),
+    )
+    .await??;
+    let ThreadResumeResponse { thread: resumed } =
+        to_response::<ThreadResumeResponse>(resume_resp)?;
+    assert!(!resumed.id.is_empty());
+    assert_eq!(resumed.model_provider, "mock_provider");
+    assert_eq!(resumed.preview, history_text);
+
+    Ok(())
+}
+
 // Helper to create a config.toml pointing at the mock model server.
 fn create_config_toml(codex_home: &std::path::Path, server_uri: &str) -> std::io::Result<()> {
    let config_toml = codex_home.join("config.toml");
--- a/codex-rs/core/gpt_5_1_prompt.md
+++ b/codex-rs/core/gpt_5_1_prompt.md
@@ -0,0 +1,310 @@
+You are a coding agent running in the Codex CLI, a terminal-based coding assistant. Codex CLI is an open source project led by OpenAI. You are expected to be precise, safe, and helpful.
+
+Your capabilities:
+
+- Receive user prompts and other context provided by the harness, such as files in the workspace.
+- Communicate with the user by streaming thinking & responses, and by making & updating plans.
+- Emit function calls to run terminal commands and apply patches. Depending on how this specific run is configured, you can request that these function calls be escalated to the user for approval before running. More on this in the "Sandbox and approvals" section.
+
+Within this context, Codex refers to the open-source agentic coding interface (not the old Codex language model built by OpenAI).
+
+# How you work
+
+## Personality
+
+Your default personality and tone is concise, direct, and friendly. You communicate efficiently, always keeping the user clearly informed about ongoing actions without unnecessary detail. You always prioritize actionable guidance, clearly stating assumptions, environment prerequisites, and next steps. Unless explicitly asked, you avoid excessively verbose explanations about your work.
+
+# AGENTS.md spec
+- Repos often contain AGENTS.md files. These files can appear anywhere within the repository.
+- These files are a way for humans to give you (the agent) instructions or tips for working within the container.
+- Some examples might be: coding conventions, info about how code is organized, or instructions for how to run or test code.
+- Instructions in AGENTS.md files:
+    - The scope of an AGENTS.md file is the entire directory tree rooted at the folder that contains it.
+    - For every file you touch in the final patch, you must obey instructions in any AGENTS.md file whose scope includes that file.
+    - Instructions about code style, structure, naming, etc. apply only to code within the AGENTS.md file's scope, unless the file states otherwise.
+    - More-deeply-nested AGENTS.md files take precedence in the case of conflicting instructions.
+    - Direct system/developer/user instructions (as part of a prompt) take precedence over AGENTS.md instructions.
+- The contents of the AGENTS.md file at the root of the repo and any directories from the CWD up to the root are included with the developer message and don't need to be re-read. When working in a subdirectory of CWD, or a directory outside the CWD, check for any AGENTS.md files that may be applicable.
+
+## Responsiveness
+
+### Preamble messages
+
+Before making tool calls, send a brief preamble to the user explaining what you’re about to do. When sending preamble messages, follow these principles and examples:
+
+- **Logically group related actions**: if you’re about to run several related commands, describe them together in one preamble rather than sending a separate note for each.
+- **Keep it concise**: be no more than 1-2 sentences, focused on immediate, tangible next steps. (8–12 words for quick updates).
+- **Build on prior context**: if this is not your first tool call, use the preamble message to connect the dots with what’s been done so far and create a sense of momentum and clarity for the user to understand your next actions.
+- **Keep your tone light, friendly and curious**: add small touches of personality in preambles feel collaborative and engaging.
+- **Exception**: Avoid adding a preamble for every trivial read (e.g., `cat` a single file) unless it’s part of a larger grouped action.
+
+**Examples:**
+
+- “I’ve explored the repo; now checking the API route definitions.”
+- “Next, I’ll patch the config and update the related tests.”
+- “I’m about to scaffold the CLI commands and helper functions.”
+- “Ok cool, so I’ve wrapped my head around the repo. Now digging into the API routes.”
+- “Config’s looking tidy. Next up is patching helpers to keep things in sync.”
+- “Finished poking at the DB gateway. I will now chase down error handling.”
+- “Alright, build pipeline order is interesting. Checking how it reports failures.”
+- “Spotted a clever caching util; now hunting where it gets used.”
+
+## Planning
+
+You have access to an `update_plan` tool which tracks steps and progress and renders them to the user. Using the tool helps demonstrate that you've understood the task and convey how you're approaching it. Plans can help to make complex, ambiguous, or multi-phase work clearer and more collaborative for the user. A good plan should break the task into meaningful, logically ordered steps that are easy to verify as you go.
+
+Note that plans are not for padding out simple work with filler steps or stating the obvious. The content of your plan should not involve doing anything that you aren't capable of doing (i.e. don't try to test things that you can't test). Do not use plans for simple or single-step queries that you can just do or answer immediately.
+
+Do not repeat the full contents of the plan after an `update_plan` call — the harness already displays it. Instead, summarize the change made and highlight any important context or next step.
+
+Before running a command, consider whether or not you have completed the previous step, and make sure to mark it as completed before moving on to the next step. It may be the case that you complete all steps in your plan after a single pass of implementation. If this is the case, you can simply mark all the planned steps as completed. Sometimes, you may need to change plans in the middle of a task: call `update_plan` with the updated plan and make sure to provide an `explanation` of the rationale when doing so.
+
+Use a plan when:
+
+- The task is non-trivial and will require multiple actions over a long time horizon.
+- There are logical phases or dependencies where sequencing matters.
+- The work has ambiguity that benefits from outlining high-level goals.
+- You want intermediate checkpoints for feedback and validation.
+- When the user asked you to do more than one thing in a single prompt
+- The user has asked you to use the plan tool (aka "TODOs")
+- You generate additional steps while working, and plan to do them before yielding to the user
+
+### Examples
+
+**High-quality plans**
+
+Example 1:
+
+1. Add CLI entry with file args
+2. Parse Markdown via CommonMark library
+3. Apply semantic HTML template
+4. Handle code blocks, images, links
+5. Add error handling for invalid files
+
+Example 2:
+
+1. Define CSS variables for colors
+2. Add toggle with localStorage state
+3. Refactor components to use variables
+4. Verify all views for readability
+5. Add smooth theme-change transition
+
+Example 3:
+
+1. Set up Node.js + WebSocket server
+2. Add join/leave broadcast events
+3. Implement messaging with timestamps
+4. Add usernames + mention highlighting
+5. Persist messages in lightweight DB
+6. Add typing indicators + unread count
+
+**Low-quality plans**
+
+Example 1:
+
+1. Create CLI tool
+2. Add Markdown parser
+3. Convert to HTML
+
+Example 2:
+
+1. Add dark mode toggle
+2. Save preference
+3. Make styles look good
+
+Example 3:
+
+1. Create single-file HTML game
+2. Run quick sanity check
+3. Summarize usage instructions
+
+If you need to write a plan, only write high quality plans, not low quality ones.
+
+## Task execution
+
+You are a coding agent. Please keep going until the query is completely resolved, before ending your turn and yielding back to the user. Only terminate your turn when you are sure that the problem is solved. Autonomously resolve the query to the best of your ability, using the tools available to you, before coming back to the user. Do NOT guess or make up an answer.
+
+You MUST adhere to the following criteria when solving queries:
+
+- Working on the repo(s) in the current environment is allowed, even if they are proprietary.
+- Analyzing code for vulnerabilities is allowed.
+- Showing user code and tool call details is allowed.
+- Use the `apply_patch` tool to edit files (NEVER try `applypatch` or `apply-patch`, only `apply_patch`): {"command":["apply_patch","*** Begin Patch\\n*** Update File: path/to/file.py\\n@@ def example():\\n- pass\\n+ return 123\\n*** End Patch"]}
+
+If completing the user's task requires writing or modifying files, your code and final answer should follow these coding guidelines, though user instructions (i.e. AGENTS.md) may override these guidelines:
+
+- Fix the problem at the root cause rather than applying surface-level patches, when possible.
+- Avoid unneeded complexity in your solution.
+- Do not attempt to fix unrelated bugs or broken tests. It is not your responsibility to fix them. (You may mention them to the user in your final message though.)
+- Update documentation as necessary.
+- Keep changes consistent with the style of the existing codebase. Changes should be minimal and focused on the task.
+- Use `git log` and `git blame` to search the history of the codebase if additional context is required.
+- NEVER add copyright or license headers unless specifically requested.
+- Do not waste tokens by re-reading files after calling `apply_patch` on them. The tool call will fail if it didn't work. The same goes for making folders, deleting folders, etc.
+- Do not `git commit` your changes or create new git branches unless explicitly requested.
+- Do not add inline comments within code unless explicitly requested.
+- Do not use one-letter variable names unless explicitly requested.
+- NEVER output inline citations like "【F:README.md†L5-L14】" in your outputs. The CLI is not able to render these so they will just be broken in the UI. Instead, if you output valid filepaths, users will be able to click on them to open the files in their editor.
+
+## Sandbox and approvals
+
+The Codex CLI harness supports several different sandboxing, and approval configurations that the user can choose from.
+
+Filesystem sandboxing prevents you from editing files without user approval. The options are:
+
+- **read-only**: You can only read files.
+- **workspace-write**: You can read files. You can write to files in your workspace folder, but not outside it.
+- **danger-full-access**: No filesystem sandboxing.
+
+Network sandboxing prevents you from accessing network without approval. Options are
+
+- **restricted**
+- **enabled**
+
+Approvals are your mechanism to get user consent to perform more privileged actions. Although they introduce friction to the user because your work is paused until the user responds, you should leverage them to accomplish your important work. Do not let these settings or the sandbox deter you from attempting to accomplish the user's task. Approval options are
+
+- **untrusted**: The harness will escalate most commands for user approval, apart from a limited allowlist of safe "read" commands.
+- **on-failure**: The harness will allow all commands to run in the sandbox (if enabled), and failures will be escalated to the user for approval to run again without the sandbox.
+- **on-request**: Commands will be run in the sandbox by default, and you can specify in your tool call if you want to escalate a command to run without sandboxing. (Note that this mode is not always available. If it is, you'll see parameters for it in the `shell` command description.)
+- **never**: This is a non-interactive mode where you may NEVER ask the user for approval to run commands. Instead, you must always persist and work around constraints to solve the task for the user. You MUST do your utmost best to finish the task and validate your work before yielding. If this mode is pared with `danger-full-access`, take advantage of it to deliver the best outcome for the user. Further, in this mode, your default testing philosophy is overridden: Even if you don't see local patterns for testing, you may add tests and scripts to validate your work. Just remove them before yielding.
+
+When you are running with approvals `on-request`, and sandboxing enabled, here are scenarios where you'll need to request approval:
+
+- You need to run a command that writes to a directory that requires it (e.g. running tests that write to /tmp)
+- You need to run a GUI app (e.g., open/xdg-open/osascript) to open browsers or files.
+- You are running sandboxed and need to run a command that requires network access (e.g. installing packages)
+- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval.
+- You are about to take a potentially destructive action such as an `rm` or `git reset` that the user did not explicitly ask for
+- (For all of these, you should weigh alternative paths that do not require approval.)
+
+Note that when sandboxing is set to read-only, you'll need to request approval for any command that isn't a read.
+
+You will be told what filesystem sandboxing, network sandboxing, and approval mode are active in a developer or user message. If you are not told about this, assume that you are running with workspace-write, network sandboxing ON, and approval on-failure.
+
+## Validating your work
+
+If the codebase has tests or the ability to build or run, consider using them to verify that your work is complete. 
+
+When testing, your philosophy should be to start as specific as possible to the code you changed so that you can catch issues efficiently, then make your way to broader tests as you build confidence. If there's no test for the code you changed, and if the adjacent patterns in the codebases show that there's a logical place for you to add a test, you may do so. However, do not add tests to codebases with no tests.
+
+Similarly, once you're confident in correctness, you can suggest or use formatting commands to ensure that your code is well formatted. If there are issues you can iterate up to 3 times to get formatting right, but if you still can't manage it's better to save the user time and present them a correct solution where you call out the formatting in your final message. If the codebase does not have a formatter configured, do not add one.
+
+For all of testing, running, building, and formatting, do not attempt to fix unrelated bugs. It is not your responsibility to fix them. (You may mention them to the user in your final message though.)
+
+Be mindful of whether to run validation commands proactively. In the absence of behavioral guidance:
+
+- When running in non-interactive approval modes like **never** or **on-failure**, proactively run tests, lint and do whatever you need to ensure you've completed the task.
+- When working in interactive approval modes like **untrusted**, or **on-request**, hold off on running tests or lint commands until the user is ready for you to finalize your output, because these commands take time to run and slow down iteration. Instead suggest what you want to do next, and let the user confirm first.
+- When working on test-related tasks, such as adding tests, fixing tests, or reproducing a bug to verify behavior, you may proactively run tests regardless of approval mode. Use your judgement to decide whether this is a test-related task.
+
+## Ambition vs. precision
+
+For tasks that have no prior context (i.e. the user is starting something brand new), you should feel free to be ambitious and demonstrate creativity with your implementation.
+
+If you're operating in an existing codebase, you should make sure you do exactly what the user asks with surgical precision. Treat the surrounding codebase with respect, and don't overstep (i.e. changing filenames or variables unnecessarily). You should balance being sufficiently ambitious and proactive when completing tasks of this nature.
+
+You should use judicious initiative to decide on the right level of detail and complexity to deliver based on the user's needs. This means showing good judgment that you're capable of doing the right extras without gold-plating. This might be demonstrated by high-value, creative touches when scope of the task is vague; while being surgical and targeted when scope is tightly specified.
+
+## Sharing progress updates
+
+For especially longer tasks that you work on (i.e. requiring many tool calls, or a plan with multiple steps), you should provide progress updates back to the user at reasonable intervals. These updates should be structured as a concise sentence or two (no more than 8-10 words long) recapping progress so far in plain language: this update demonstrates your understanding of what needs to be done, progress so far (i.e. files explores, subtasks complete), and where you're going next.
+
+Before doing large chunks of work that may incur latency as experienced by the user (i.e. writing a new file), you should send a concise message to the user with an update indicating what you're about to do to ensure they know what you're spending time on. Don't start editing or writing large files before informing the user what you are doing and why.
+
+The messages you send before tool calls should describe what is immediately about to be done next in very concise language. If there was previous work done, this preamble message should also include a note about the work done so far to bring the user along.
+
+## Presenting your work and final message
+
+Your final message should read naturally, like an update from a concise teammate. For casual conversation, brainstorming tasks, or quick questions from the user, respond in a friendly, conversational tone. You should ask questions, suggest ideas, and adapt to the user’s style. If you've finished a large amount of work, when describing what you've done to the user, you should follow the final answer formatting guidelines to communicate substantive changes. You don't need to add structured formatting for one-word answers, greetings, or purely conversational exchanges.
+
+You can skip heavy formatting for single, simple actions or confirmations. In these cases, respond in plain sentences with any relevant next step or quick option. Reserve multi-section structured responses for results that need grouping or explanation.
+
+The user is working on the same computer as you, and has access to your work. As such there's no need to show the full contents of large files you have already written unless the user explicitly asks for them. Similarly, if you've created or modified files using `apply_patch`, there's no need to tell users to "save the file" or "copy the code into a file"—just reference the file path.
+
+If there's something that you think you could help with as a logical next step, concisely ask the user if they want you to do so. Good examples of this are running tests, committing changes, or building out the next logical component. If there’s something that you couldn't do (even with approval) but that the user might want to do (such as verifying changes by running the app), include those instructions succinctly.
+
+Brevity is very important as a default. You should be very concise (i.e. no more than 10 lines), but can relax this requirement for tasks where additional detail and comprehensiveness is important for the user's understanding.
+
+### Final answer structure and style guidelines
+
+You are producing plain text that will later be styled by the CLI. Follow these rules exactly. Formatting should make results easy to scan, but not feel mechanical. Use judgment to decide how much structure adds value.
+
+**Section Headers**
+
+- Use only when they improve clarity — they are not mandatory for every answer.
+- Choose descriptive names that fit the content
+- Keep headers short (1–3 words) and in `**Title Case**`. Always start headers with `**` and end with `**`
+- Leave no blank line before the first bullet under a header.
+- Section headers should only be used where they genuinely improve scanability; avoid fragmenting the answer.
+
+**Bullets**
+
+- Use `-` followed by a space for every bullet.
+- Merge related points when possible; avoid a bullet for every trivial detail.
+- Keep bullets to one line unless breaking for clarity is unavoidable.
+- Group into short lists (4–6 bullets) ordered by importance.
+- Use consistent keyword phrasing and formatting across sections.
+
+**Monospace**
+
+- Wrap all commands, file paths, env vars, and code identifiers in backticks (`` `...` ``).
+- Apply to inline examples and to bullet keywords if the keyword itself is a literal file/command.
+- Never mix monospace and bold markers; choose one based on whether it’s a keyword (`**`) or inline code/path (`` ` ``).
+
+**File References**
+When referencing files in your response, make sure to include the relevant start line and always follow the below rules:
+  * Use inline code to make file paths clickable.
+  * Each reference should have a stand alone path. Even if it's the same file.
+  * Accepted: absolute, workspace‑relative, a/ or b/ diff prefixes, or bare filename/suffix.
+  * Line/column (1‑based, optional): :line[:column] or #Lline[Ccolumn] (column defaults to 1).
+  * Do not use URIs like file://, vscode://, or https://.
+  * Do not provide range of lines
+  * Examples: src/app.ts, src/app.ts:42, b/server/index.js#L10, C:\repo\project\main.rs:12:5
+
+**Structure**
+
+- Place related bullets together; don’t mix unrelated concepts in the same section.
+- Order sections from general → specific → supporting info.
+- For subsections (e.g., “Binaries” under “Rust Workspace”), introduce with a bolded keyword bullet, then list items under it.
+- Match structure to complexity:
+  - Multi-part or detailed results → use clear headers and grouped bullets.
+  - Simple results → minimal headers, possibly just a short list or paragraph.
+
+**Tone**
+
+- Keep the voice collaborative and natural, like a coding partner handing off work.
+- Be concise and factual — no filler or conversational commentary and avoid unnecessary repetition
+- Use present tense and active voice (e.g., “Runs tests” not “This will run tests”).
+- Keep descriptions self-contained; don’t refer to “above” or “below”.
+- Use parallel structure in lists for consistency.
+
+**Don’t**
+
+- Don’t use literal words “bold” or “monospace” in the content.
+- Don’t nest bullets or create deep hierarchies.
+- Don’t output ANSI escape codes directly — the CLI renderer applies them.
+- Don’t cram unrelated keywords into a single bullet; split for clarity.
+- Don’t let keyword lists run long — wrap or reformat for scanability.
+
+Generally, ensure your final answers adapt their shape and depth to the request. For example, answers to code explanations should have a precise, structured explanation with code references that answer the question directly. For tasks with a simple implementation, lead with the outcome and supplement only with what’s needed for clarity. Larger changes can be presented as a logical walkthrough of your approach, grouping related steps, explaining rationale where it adds value, and highlighting next actions to accelerate the user. Your answers should provide the right level of detail while being easily scannable.
+
+For casual greetings, acknowledgements, or other one-off conversational messages that are not delivering substantive information or structured results, respond naturally without section headers or bullet formatting.
+
+# Tool Guidelines
+
+## Shell commands
+
+When using the shell, you must adhere to the following guidelines:
+
+- When searching for text or files, prefer using `rg` or `rg --files` respectively because `rg` is much faster than alternatives like `grep`. (If the `rg` command is not found, then use alternatives.)
+- Read files in chunks with a max chunk size of 250 lines. Do not use python scripts to attempt to output larger chunks of a file. Command line output will be truncated after 10 kilobytes or 256 lines of output, regardless of the command used.
+
+## `update_plan`
+
+A tool named `update_plan` is available to you. You can use it to keep an up‑to‑date, step‑by‑step plan for the task.
+
+To create a new plan, call `update_plan` with a short list of 1‑sentence steps (no more than 5-7 words each) with a `status` for each step (`pending`, `in_progress`, or `completed`).
+
+When steps have been completed, use `update_plan` to mark each finished step as `completed` and the next step you are working on as `in_progress`. There should always be exactly one `in_progress` step until everything is done. You can mark multiple items as complete in a single `update_plan` call.
+
+If all steps are complete, ensure you call `update_plan` to mark all steps as `completed`.
--- a/codex-rs/core/src/auth.rs
+++ b/codex-rs/core/src/auth.rs
@@ -227,6 +227,14 @@ impl CodexAuth {
            })
    }

+    /// Raw plan string from the ID token (including unknown/new plan types).
+    pub fn raw_plan_type(&self) -> Option<String> {
+        self.get_plan_type().map(|plan| match plan {
+            InternalPlanType::Known(k) => format!("{k:?}"),
+            InternalPlanType::Unknown(raw) => raw,
+        })
+    }
+
    /// Raw internal plan value from the ID token.
    /// Exposes the underlying `token_data::PlanType` without mapping it to the
    /// public `AccountPlanType`. Use this when downstream code needs to inspect
@@ -335,7 +343,10 @@ pub fn save_auth(
 }

 /// Load CLI auth data using the configured credential store backend.
-/// Returns `None` when no credentials are stored.
+/// Returns `None` when no credentials are stored. This function is
+/// provided only for tests. Production code should not directly load
+/// from the auth.json storage. It should use the AuthManager abstraction
+/// instead.
 pub fn load_auth_dot_json(
    codex_home: &Path,
    auth_credentials_store_mode: AuthCredentialsStoreMode,
--- a/codex-rs/core/src/chat_completions.rs
+++ b/codex-rs/core/src/chat_completions.rs
@@ -338,7 +338,7 @@ pub(crate) async fn stream_chat_completions(
    debug!(
        "POST to {}: {}",
        provider.get_full_url(&None),
-        serde_json::to_string_pretty(&payload).unwrap_or_default()
+        payload.to_string()
    );

    let mut attempt = 0;
--- a/codex-rs/core/src/client.rs
+++ b/codex-rs/core/src/client.rs
@@ -294,10 +294,9 @@ impl ModelClient {
        let auth = auth_manager.as_ref().and_then(|m| m.auth());

        trace!(
-            "POST to {}: {:?}",
+            "POST to {}: {}",
            self.provider.get_full_url(&auth),
-            serde_json::to_string(payload_json)
-                .unwrap_or("<unable to serialize payload>".to_string())
+            payload_json.to_string()
        );

        let mut req_builder = self
--- a/codex-rs/core/src/client_common.rs
+++ b/codex-rs/core/src/client_common.rs
@@ -421,6 +421,10 @@ mod tests {
                slug: "gpt-5",
                expects_apply_patch_instructions: true,
            },
+            InstructionsTestCase {
+                slug: "gpt-5.1",
+                expects_apply_patch_instructions: false,
+            },
            InstructionsTestCase {
                slug: "codex-mini-latest",
                expects_apply_patch_instructions: true,
@@ -433,6 +437,10 @@ mod tests {
                slug: "gpt-5-codex",
                expects_apply_patch_instructions: false,
            },
+            InstructionsTestCase {
+                slug: "gpt-5.1-codex",
+                expects_apply_patch_instructions: false,
+            },
        ];
        for test_case in test_cases {
            let model_family = find_family_for_model(test_case.slug).expect("known model slug");
--- a/codex-rs/core/src/lib.rs
+++ b/codex-rs/core/src/lib.rs
@@ -84,6 +84,11 @@ mod user_notification;
 mod user_shell_command;
 pub mod util;

+/// Shared jittered exponential backoff used across Codex retries.
+pub fn default_retry_backoff(attempt: u64) -> std::time::Duration {
+    util::backoff(attempt)
+}
+
 pub use apply_patch::CODEX_APPLY_PATCH_ARG1;
 pub use command_safety::is_safe_command;
 pub use safety::get_platform_sandbox;
--- a/codex-rs/core/src/model_family.rs
+++ b/codex-rs/core/src/model_family.rs
@@ -5,7 +5,9 @@ use crate::tools::spec::ConfigShellToolType;
 /// The `instructions` field in the payload sent to a model should always start
 /// with this content.
 const BASE_INSTRUCTIONS: &str = include_str!("../prompt.md");
+
 const GPT_5_CODEX_INSTRUCTIONS: &str = include_str!("../gpt_5_codex_prompt.md");
+const GPT_5_1_INSTRUCTIONS: &str = include_str!("../gpt_5_1_prompt.md");

 /// A model family is a group of models that share certain characteristics.
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
@@ -154,7 +156,10 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
        )

    // Production models.
-    } else if slug.starts_with("gpt-5-codex") || slug.starts_with("codex-") {
+    } else if slug.starts_with("gpt-5-codex")
+        || slug.starts_with("gpt-5.1-codex")
+        || slug.starts_with("codex-")
+    {
        model_family!(
            slug, slug,
            supports_reasoning_summaries: true,
@@ -163,6 +168,14 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
            apply_patch_tool_type: Some(ApplyPatchToolType::Freeform),
            support_verbosity: false,
        )
+    } else if slug.starts_with("gpt-5.1") {
+        model_family!(
+            slug, "gpt-5.1",
+            supports_reasoning_summaries: true,
+            apply_patch_tool_type: Some(ApplyPatchToolType::Freeform),
+            support_verbosity: true,
+            base_instructions: GPT_5_1_INSTRUCTIONS.to_string(),
+        )
    } else if slug.starts_with("gpt-5") {
        model_family!(
            slug, "gpt-5",
--- a/codex-rs/core/src/openai_model_info.rs
+++ b/codex-rs/core/src/openai_model_info.rs
@@ -70,7 +70,7 @@ pub(crate) fn get_model_info(model_family: &ModelFamily) -> Option<ModelInfo> {
        // https://platform.openai.com/docs/models/gpt-3.5-turbo
        "gpt-3.5-turbo" => Some(ModelInfo::new(16_385, 4_096)),

-        _ if slug.starts_with("gpt-5-codex") => {
+        _ if slug.starts_with("gpt-5-codex") || slug.starts_with("gpt-5.1-codex") => {
            Some(ModelInfo::new(CONTEXT_WINDOW_272K, MAX_OUTPUT_TOKENS_128K))
        }

--- a/codex-rs/core/src/tools/handlers/apply_patch.rs
+++ b/codex-rs/core/src/tools/handlers/apply_patch.rs
@@ -42,6 +42,10 @@ impl ToolHandler for ApplyPatchHandler {
        )
    }

+    fn is_mutating(&self, _invocation: &ToolInvocation) -> bool {
+        true
+    }
+
    async fn handle(&self, invocation: ToolInvocation) -> Result<ToolOutput, FunctionCallError> {
        let ToolInvocation {
            session,
--- a/codex-rs/core/src/tools/handlers/grep_files.rs
+++ b/codex-rs/core/src/tools/handlers/grep_files.rs
@@ -117,6 +117,7 @@ async fn run_rg_search(
    limit: usize,
    cwd: &Path,
 ) -> Result<Vec<String>, FunctionCallError> {
+    // First attempt: regex search
    let mut command = Command::new("rg");
    command
        .current_dir(cwd)
@@ -148,8 +149,49 @@ async fn run_rg_search(
        Some(1) => Ok(Vec::new()),
        _ => {
            let stderr = String::from_utf8_lossy(&output.stderr);
+            let stderr_trimmed = stderr.trim();
+            // Retry with fixed-strings if the regex failed to parse.
+            if stderr_trimmed.contains("regex parse error")
+                || stderr_trimmed.contains("error parsing regex")
+                || stderr_trimmed.contains("unclosed group")
+            {
+                let mut fixed = Command::new("rg");
+                fixed
+                    .current_dir(cwd)
+                    .arg("--files-with-matches")
+                    .arg("--sortr=modified")
+                    .arg("--fixed-strings")
+                    .arg(pattern)
+                    .arg("--no-messages");
+                if let Some(glob) = include {
+                    fixed.arg("--glob").arg(glob);
+                }
+                fixed.arg("--").arg(search_path);
+                let second = timeout(COMMAND_TIMEOUT, fixed.output())
+                    .await
+                    .map_err(|_| {
+                        FunctionCallError::RespondToModel(
+                            "rg timed out after 30 seconds".to_string(),
+                        )
+                    })?
+                    .map_err(|err| {
+                        FunctionCallError::RespondToModel(format!(
+                            "failed to launch rg: {err}. Ensure ripgrep is installed and on PATH."
+                        ))
+                    })?;
+                return match second.status.code() {
+                    Some(0) => Ok(parse_results(&second.stdout, limit)),
+                    Some(1) => Ok(Vec::new()),
+                    _ => {
+                        let second_stderr = String::from_utf8_lossy(&second.stderr);
+                        Err(FunctionCallError::RespondToModel(format!(
+                            "rg failed: {second_stderr}"
+                        )))
+                    }
+                };
+            }
            Err(FunctionCallError::RespondToModel(format!(
-                "rg failed: {stderr}"
+                "rg failed: {stderr_trimmed}"
            )))
        }
    }
--- a/codex-rs/core/src/tools/handlers/shell.rs
+++ b/codex-rs/core/src/tools/handlers/shell.rs
@@ -10,6 +10,7 @@ use crate::codex::TurnContext;
 use crate::exec::ExecParams;
 use crate::exec_env::create_env;
 use crate::function_tool::FunctionCallError;
+use crate::is_safe_command::is_known_safe_command;
 use crate::tools::context::ToolInvocation;
 use crate::tools::context::ToolOutput;
 use crate::tools::context::ToolPayload;
@@ -77,6 +78,18 @@ impl ToolHandler for ShellHandler {
        )
    }

+    fn is_mutating(&self, invocation: &ToolInvocation) -> bool {
+        match &invocation.payload {
+            ToolPayload::Function { arguments } => {
+                serde_json::from_str::<ShellToolCallParams>(arguments)
+                    .map(|params| !is_known_safe_command(&params.command))
+                    .unwrap_or(true)
+            }
+            ToolPayload::LocalShell { params } => !is_known_safe_command(&params.command),
+            _ => true, // unknown payloads => assume mutating
+        }
+    }
+
    async fn handle(&self, invocation: ToolInvocation) -> Result<ToolOutput, FunctionCallError> {
        let ToolInvocation {
            session,
--- a/codex-rs/core/src/tools/handlers/unified_exec.rs
+++ b/codex-rs/core/src/tools/handlers/unified_exec.rs
@@ -1,9 +1,7 @@
 use std::path::PathBuf;

-use async_trait::async_trait;
-use serde::Deserialize;
-
 use crate::function_tool::FunctionCallError;
+use crate::is_safe_command::is_known_safe_command;
 use crate::protocol::EventMsg;
 use crate::protocol::ExecCommandOutputDeltaEvent;
 use crate::protocol::ExecOutputStream;
@@ -20,6 +18,8 @@ use crate::unified_exec::UnifiedExecContext;
 use crate::unified_exec::UnifiedExecResponse;
 use crate::unified_exec::UnifiedExecSessionManager;
 use crate::unified_exec::WriteStdinRequest;
+use async_trait::async_trait;
+use serde::Deserialize;

 pub struct UnifiedExecHandler;

@@ -74,6 +74,19 @@ impl ToolHandler for UnifiedExecHandler {
        )
    }

+    fn is_mutating(&self, invocation: &ToolInvocation) -> bool {
+        let (ToolPayload::Function { arguments } | ToolPayload::UnifiedExec { arguments }) =
+            &invocation.payload
+        else {
+            return true;
+        };
+
+        let Ok(params) = serde_json::from_str::<ExecCommandArgs>(arguments) else {
+            return true;
+        };
+        !is_known_safe_command(&["bash".to_string(), "-lc".to_string(), params.cmd])
+    }
+
    async fn handle(&self, invocation: ToolInvocation) -> Result<ToolOutput, FunctionCallError> {
        let ToolInvocation {
            session,
--- a/codex-rs/core/src/tools/parallel.rs
+++ b/codex-rs/core/src/tools/parallel.rs
@@ -16,7 +16,6 @@ use crate::tools::router::ToolCall;
 use crate::tools::router::ToolRouter;
 use codex_protocol::models::FunctionCallOutputPayload;
 use codex_protocol::models::ResponseInputItem;
-use codex_utils_readiness::Readiness;

 pub(crate) struct ToolCallRuntime {
    router: Arc<ToolRouter>,
@@ -55,7 +54,6 @@ impl ToolCallRuntime {
        let tracker = Arc::clone(&self.tracker);
        let lock = Arc::clone(&self.parallel_execution);
        let started = Instant::now();
-        let readiness = self.turn_context.tool_call_gate.clone();

        let handle: AbortOnDropHandle<Result<ResponseInputItem, FunctionCallError>> =
            AbortOnDropHandle::new(tokio::spawn(async move {
@@ -65,9 +63,6 @@ impl ToolCallRuntime {
                        Ok(Self::aborted_response(&call, secs))
                    },
                    res = async {
-                        tracing::trace!("waiting for tool gate");
-                        readiness.wait_ready().await;
-                        tracing::trace!("tool gate released");
                        let _guard = if supports_parallel {
                            Either::Left(lock.read().await)
                        } else {
--- a/codex-rs/core/src/tools/registry.rs
+++ b/codex-rs/core/src/tools/registry.rs
@@ -2,15 +2,15 @@ use std::collections::HashMap;
 use std::sync::Arc;
 use std::time::Duration;

-use async_trait::async_trait;
-use codex_protocol::models::ResponseInputItem;
-use tracing::warn;
-
 use crate::client_common::tools::ToolSpec;
 use crate::function_tool::FunctionCallError;
 use crate::tools::context::ToolInvocation;
 use crate::tools::context::ToolOutput;
 use crate::tools::context::ToolPayload;
+use async_trait::async_trait;
+use codex_protocol::models::ResponseInputItem;
+use codex_utils_readiness::Readiness;
+use tracing::warn;

 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
 pub enum ToolKind {
@@ -30,6 +30,10 @@ pub trait ToolHandler: Send + Sync {
        )
    }

+    fn is_mutating(&self, _invocation: &ToolInvocation) -> bool {
+        false
+    }
+
    async fn handle(&self, invocation: ToolInvocation) -> Result<ToolOutput, FunctionCallError>;
 }

@@ -106,6 +110,11 @@ impl ToolRegistry {
                    let output_cell = &output_cell;
                    let invocation = invocation;
                    async move {
+                        if handler.is_mutating(&invocation) {
+                            tracing::trace!("waiting for tool gate");
+                            invocation.turn.tool_call_gate.wait_ready().await;
+                            tracing::trace!("tool gate released");
+                        }
                        match handler.handle(invocation).await {
                            Ok(output) => {
                                let preview = output.log_preview();
--- a/codex-rs/core/tests/suite/model_tools.rs
+++ b/codex-rs/core/tests/suite/model_tools.rs
@@ -128,4 +128,32 @@ async fn model_selects_expected_tools() {
        ],
        "gpt-5-codex should expose the apply_patch tool",
    );
+
+    let gpt51_codex_tools = collect_tool_identifiers_for_model("gpt-5.1-codex").await;
+    assert_eq!(
+        gpt51_codex_tools,
+        vec![
+            "shell".to_string(),
+            "list_mcp_resources".to_string(),
+            "list_mcp_resource_templates".to_string(),
+            "read_mcp_resource".to_string(),
+            "update_plan".to_string(),
+            "apply_patch".to_string()
+        ],
+        "gpt-5-codex should expose the apply_patch tool",
+    );
+
+    let gpt51_tools = collect_tool_identifiers_for_model("gpt-5-codex").await;
+    assert_eq!(
+        gpt51_tools,
+        vec![
+            "shell".to_string(),
+            "list_mcp_resources".to_string(),
+            "list_mcp_resource_templates".to_string(),
+            "read_mcp_resource".to_string(),
+            "update_plan".to_string(),
+            "apply_patch".to_string()
+        ],
+        "gpt-5-codex should expose the apply_patch tool",
+    );
 }
--- a/codex-rs/core/tests/suite/prompt_caching.rs
+++ b/codex-rs/core/tests/suite/prompt_caching.rs
@@ -233,6 +233,18 @@ async fn prompt_tools_are_consistent_across_requests() {
                "view_image",
            ],
        ),
+        (
+            "gpt-5.1",
+            vec![
+                "shell",
+                "list_mcp_resources",
+                "list_mcp_resource_templates",
+                "read_mcp_resource",
+                "update_plan",
+                "apply_patch",
+                "view_image",
+            ],
+        ),
        (
            "gpt-5-codex",
            vec![
@@ -245,6 +257,18 @@ async fn prompt_tools_are_consistent_across_requests() {
                "view_image",
            ],
        ),
+        (
+            "gpt-5.1-codex",
+            vec![
+                "shell",
+                "list_mcp_resources",
+                "list_mcp_resource_templates",
+                "read_mcp_resource",
+                "update_plan",
+                "apply_patch",
+                "view_image",
+            ],
+        ),
    ]);
    let expected_tools_names = tools_by_model
        .get(OPENAI_DEFAULT_MODEL)
@@ -277,6 +301,7 @@ async fn prompt_tools_are_consistent_across_requests() {
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[ignore = "flaky on ubuntu-24.04-arm - aarch64-unknown-linux-gpu"]
 async fn prefixes_context_and_instructions_once_and_consistently_across_requests() {
    skip_if_no_network!();
    use pretty_assertions::assert_eq;
--- a/codex-rs/tui/Cargo.toml
+++ b/codex-rs/tui/Cargo.toml
@@ -62,6 +62,7 @@ ratatui = { workspace = true, features = [
 ] }
 ratatui-macros = { workspace = true }
 regex-lite = { workspace = true }
+regex = { workspace = true }
 serde = { workspace = true, features = ["derive"] }
 serde_json = { workspace = true, features = ["preserve_order"] }
 shlex = { workspace = true }
@@ -71,6 +72,7 @@ supports-color = { workspace = true }
 tempfile = { workspace = true }
 textwrap = { workspace = true }
 tokio = { workspace = true, features = [
+    "fs",
    "io-std",
    "macros",
    "process",
@@ -87,6 +89,9 @@ tree-sitter-highlight = { workspace = true }
 unicode-segmentation = { workspace = true }
 unicode-width = { workspace = true }
 url = { workspace = true }
+futures = { workspace = true }
+reqwest = { workspace = true }
+time = { workspace = true, features = ["serde"] }

 codex-windows-sandbox = { workspace = true }

--- a/codex-rs/tui/src/app.rs
+++ b/codex-rs/tui/src/app.rs
@@ -559,6 +559,62 @@ impl App {
            AppEvent::OpenReviewCustomPrompt => {
                self.chat_widget.show_review_custom_prompt();
            }
+            AppEvent::OpenSecurityReviewPathPrompt(mode) => {
+                self.chat_widget.show_security_review_path_prompt(mode);
+            }
+            AppEvent::StartSecurityReview {
+                mode,
+                include_paths,
+                scope_prompt,
+                force_new,
+            } => {
+                self.chat_widget.start_security_review(
+                    mode,
+                    include_paths,
+                    scope_prompt,
+                    force_new,
+                );
+            }
+            AppEvent::ResumeSecurityReview {
+                output_root,
+                metadata,
+            } => {
+                self.chat_widget
+                    .resume_security_review(output_root, metadata);
+            }
+            AppEvent::SecurityReviewAutoScopeConfirm {
+                mode,
+                prompt,
+                selections,
+                responder,
+            } => {
+                self.chat_widget
+                    .show_security_review_scope_confirmation(mode, prompt, selections, responder);
+            }
+            AppEvent::SecurityReviewScopeResolved { paths } => {
+                self.chat_widget.on_security_review_scope_resolved(paths);
+            }
+            AppEvent::OpenRegistrationPrompt { url, responder } => {
+                self.chat_widget.show_registration_prompt(url, responder);
+            }
+            AppEvent::SecurityReviewCommandStatus {
+                id,
+                summary,
+                state,
+                preview,
+            } => {
+                self.chat_widget
+                    .on_security_review_command_status(id, summary, state, preview);
+            }
+            AppEvent::SecurityReviewLog(message) => {
+                self.chat_widget.on_security_review_log(message);
+            }
+            AppEvent::SecurityReviewComplete { result } => {
+                self.chat_widget.on_security_review_complete(result);
+            }
+            AppEvent::SecurityReviewFailed { error } => {
+                self.chat_widget.on_security_review_failed(error);
+            }
            AppEvent::FullScreenApprovalRequest(request) => match request {
                ApprovalRequest::ApplyPatch { cwd, changes, .. } => {
                    let _ = tui.enter_alt_screen();
--- a/codex-rs/tui/src/app_event.rs
+++ b/codex-rs/tui/src/app_event.rs
@@ -2,16 +2,34 @@ use std::path::PathBuf;

 use codex_common::approval_presets::ApprovalPreset;
 use codex_common::model_presets::ModelPreset;
+use codex_core::protocol::AskForApproval;
 use codex_core::protocol::ConversationPathResponseEvent;
 use codex_core::protocol::Event;
+use codex_core::protocol::SandboxPolicy;
+use codex_core::protocol_config_types::ReasoningEffort;
 use codex_file_search::FileMatch;
+use tokio::sync::oneshot;

 use crate::bottom_pane::ApprovalRequest;
 use crate::history_cell::HistoryCell;
+use crate::security_review::SecurityReviewFailure;
+use crate::security_review::SecurityReviewMetadata;
+use crate::security_review::SecurityReviewMode;
+use crate::security_review::SecurityReviewResult;

-use codex_core::protocol::AskForApproval;
-use codex_core::protocol::SandboxPolicy;
-use codex_core::protocol_config_types::ReasoningEffort;
+#[derive(Clone, Debug)]
+pub(crate) struct SecurityReviewAutoScopeSelection {
+    pub display_path: String,
+    pub reason: Option<String>,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub(crate) enum SecurityReviewCommandState {
+    Running,
+    Matches,
+    NoMatches,
+    Error,
+}

 #[allow(clippy::large_enum_variant)]
 #[derive(Debug)]
@@ -138,7 +156,6 @@ pub(crate) enum AppEvent {

    /// Open the approval popup.
    FullScreenApprovalRequest(ApprovalRequest),
-
    /// Open the feedback note entry overlay after the user selects a category.
    OpenFeedbackNote {
        category: FeedbackCategory,
@@ -149,6 +166,64 @@ pub(crate) enum AppEvent {
    OpenFeedbackConsent {
        category: FeedbackCategory,
    },
+
+    /// Open the scoped path input for security reviews.
+    OpenSecurityReviewPathPrompt(SecurityReviewMode),
+
+    /// Begin running a security review with the given mode and optional scoped paths.
+    StartSecurityReview {
+        mode: SecurityReviewMode,
+        include_paths: Vec<String>,
+        scope_prompt: Option<String>,
+        force_new: bool,
+    },
+
+    /// Resume a previously generated security review from disk.
+    ResumeSecurityReview {
+        output_root: PathBuf,
+        metadata: SecurityReviewMetadata,
+    },
+
+    /// Prompt the user to confirm auto-detected scope selections.
+    SecurityReviewAutoScopeConfirm {
+        mode: SecurityReviewMode,
+        prompt: String,
+        selections: Vec<SecurityReviewAutoScopeSelection>,
+        responder: oneshot::Sender<bool>,
+    },
+
+    /// Prompt the user to register at least two accounts and paste credentials.
+    /// The responder receives `Some(raw_input)` when the user submits text, or `None` if dismissed.
+    OpenRegistrationPrompt {
+        url: Option<String>,
+        responder: oneshot::Sender<Option<String>>,
+    },
+
+    /// Notify that the security review scope has been resolved to specific paths.
+    SecurityReviewScopeResolved {
+        paths: Vec<String>,
+    },
+
+    /// Update the command status display for running security review shell commands.
+    SecurityReviewCommandStatus {
+        id: u64,
+        summary: String,
+        state: SecurityReviewCommandState,
+        preview: Vec<String>,
+    },
+
+    /// Append a progress log emitted during the security review.
+    SecurityReviewLog(String),
+
+    /// Security review completed successfully.
+    SecurityReviewComplete {
+        result: SecurityReviewResult,
+    },
+
+    /// Security review failed prior to completion.
+    SecurityReviewFailed {
+        error: SecurityReviewFailure,
+    },
 }

 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
--- a/codex-rs/tui/src/bottom_pane/chat_composer.rs
+++ b/codex-rs/tui/src/bottom_pane/chat_composer.rs
@@ -1522,6 +1522,10 @@ impl ChatComposer {
        self.is_task_running = running;
    }

+    pub(crate) fn set_placeholder_text(&mut self, placeholder: String) {
+        self.placeholder_text = placeholder;
+    }
+
    pub(crate) fn set_context_window_percent(&mut self, percent: Option<i64>) {
        if self.context_window_percent != percent {
            self.context_window_percent = percent;
--- a/codex-rs/tui/src/bottom_pane/mod.rs
+++ b/codex-rs/tui/src/bottom_pane/mod.rs
@@ -35,10 +35,13 @@ mod paste_burst;
 pub mod popup_consts;
 mod queued_user_messages;
 mod scroll_state;
+mod security_review_scope_confirm_view;
 mod selection_popup_common;
 mod textarea;
 pub(crate) use feedback_view::FeedbackNoteView;

+pub(crate) use security_review_scope_confirm_view::SecurityReviewScopeConfirmView;
+
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub(crate) enum CancellationEvent {
    Handled,
@@ -50,6 +53,7 @@ pub(crate) use chat_composer::InputResult;
 use codex_protocol::custom_prompts::CustomPrompt;

 use crate::status_indicator_widget::StatusIndicatorWidget;
+pub(crate) use crate::status_indicator_widget::StatusSnapshot;
 pub(crate) use list_selection_view::SelectionAction;
 pub(crate) use list_selection_view::SelectionItem;

@@ -74,6 +78,8 @@ pub(crate) struct BottomPane {
    status: Option<StatusIndicatorWidget>,
    /// Queued user messages to show above the composer while a turn is running.
    queued_user_messages: QueuedUserMessages,
+    /// Recent log messages shown beneath the status header.
+    status_logs: Vec<String>,
    context_window_percent: Option<i64>,
 }

@@ -105,6 +111,7 @@ impl BottomPane {
            ctrl_c_quit_hint: false,
            status: None,
            queued_user_messages: QueuedUserMessages::new(),
+            status_logs: Vec::new(),
            esc_backtrack_hint: false,
            context_window_percent: None,
        }
@@ -216,6 +223,11 @@ impl BottomPane {
        self.request_redraw();
    }

+    pub(crate) fn set_placeholder_text(&mut self, text: String) {
+        self.composer.set_placeholder_text(text);
+        self.request_redraw();
+    }
+
    pub(crate) fn clear_composer_for_ctrl_c(&mut self) {
        self.composer.clear_for_ctrl_c();
        self.request_redraw();
@@ -236,6 +248,22 @@ impl BottomPane {
        }
    }

+    pub(crate) fn update_status_snapshot(&mut self, snapshot: StatusSnapshot) {
+        self.status_logs = snapshot.logs.clone();
+        if let Some(status) = self.status.as_mut() {
+            status.update_snapshot(snapshot);
+        } else {
+            self.update_status_header(snapshot.header);
+        }
+    }
+
+    pub(crate) fn update_status_logs(&mut self, logs: Vec<String>) {
+        self.status_logs = logs.clone();
+        if let Some(status) = self.status.as_mut() {
+            status.set_logs(logs);
+        }
+    }
+
    pub(crate) fn show_ctrl_c_quit_hint(&mut self) {
        self.ctrl_c_quit_hint = true;
        self.composer
@@ -284,18 +312,21 @@ impl BottomPane {

        if running {
            if self.status.is_none() {
+                self.status_logs.clear();
                self.status = Some(StatusIndicatorWidget::new(
                    self.app_event_tx.clone(),
                    self.frame_requester.clone(),
                ));
            }
            if let Some(status) = self.status.as_mut() {
+                status.set_logs(self.status_logs.clone());
                status.set_interrupt_hint_visible(true);
            }
            self.request_redraw();
        } else {
            // Hide the status indicator when a task completes, but keep other modal views.
            self.hide_status_indicator();
+            self.status_logs.clear();
        }
    }

--- a/codex-rs/tui/src/bottom_pane/security_review_scope_confirm_view.rs
+++ b/codex-rs/tui/src/bottom_pane/security_review_scope_confirm_view.rs
@@ -0,0 +1,139 @@
+use crossterm::event::KeyCode;
+use crossterm::event::KeyEvent;
+use crossterm::event::KeyModifiers;
+use ratatui::buffer::Buffer;
+use ratatui::layout::Rect;
+use ratatui::style::Stylize;
+use ratatui::text::Line;
+use ratatui::text::Span;
+use ratatui::widgets::Clear;
+use ratatui::widgets::Paragraph;
+use ratatui::widgets::Widget;
+use tokio::sync::oneshot;
+
+use crate::app_event::SecurityReviewAutoScopeSelection;
+use crate::render::renderable::Renderable;
+use crate::security_review::SecurityReviewMode;
+use crate::text_formatting::truncate_text;
+
+use super::CancellationEvent;
+use super::bottom_pane_view::BottomPaneView;
+
+pub(crate) struct SecurityReviewScopeConfirmView {
+    mode: SecurityReviewMode,
+    prompt: String,
+    selections: Vec<SecurityReviewAutoScopeSelection>,
+    responder: Option<oneshot::Sender<bool>>,
+    complete: bool,
+}
+
+impl SecurityReviewScopeConfirmView {
+    pub(crate) fn new(
+        mode: SecurityReviewMode,
+        prompt: String,
+        selections: Vec<SecurityReviewAutoScopeSelection>,
+        responder: oneshot::Sender<bool>,
+    ) -> Self {
+        Self {
+            mode,
+            prompt,
+            selections,
+            responder: Some(responder),
+            complete: false,
+        }
+    }
+
+    fn send_response(&mut self, accept: bool) {
+        if let Some(responder) = self.responder.take() {
+            let _ = responder.send(accept);
+        }
+        self.complete = true;
+    }
+}
+
+impl BottomPaneView for SecurityReviewScopeConfirmView {
+    fn handle_key_event(&mut self, key_event: KeyEvent) {
+        match key_event.code {
+            KeyCode::Char('y') | KeyCode::Char('Y') | KeyCode::Enter => {
+                self.send_response(true);
+            }
+            KeyCode::Char('n') | KeyCode::Char('N') | KeyCode::Esc => {
+                self.send_response(false);
+            }
+            _ if key_event.modifiers.contains(KeyModifiers::CONTROL) => {}
+            _ => {}
+        }
+    }
+
+    fn on_ctrl_c(&mut self) -> CancellationEvent {
+        self.send_response(false);
+        CancellationEvent::Handled
+    }
+
+    fn is_complete(&self) -> bool {
+        self.complete
+    }
+
+    fn handle_paste(&mut self, _pasted: String) -> bool {
+        false
+    }
+}
+
+impl Renderable for SecurityReviewScopeConfirmView {
+    fn desired_height(&self, _width: u16) -> u16 {
+        let base_lines: u16 = 5;
+        let selection_lines = if self.selections.is_empty() {
+            1
+        } else {
+            self.selections.len() as u16
+        };
+        base_lines.saturating_add(selection_lines)
+    }
+
+    fn render(&self, area: Rect, buf: &mut Buffer) {
+        if area.width == 0 || area.height == 0 {
+            return;
+        }
+
+        Clear.render(area, buf);
+
+        let mut lines: Vec<Line> = Vec::new();
+        lines.push(vec!["Confirm auto-detected scope".bold()].into());
+        lines.push(vec![format!("Mode: {}", self.mode.as_str()).dim()].into());
+
+        if !self.prompt.trim().is_empty() {
+            let summary = truncate_text(self.prompt.trim(), 96);
+            lines.push(vec!["Prompt: ".dim(), Span::from(summary)].into());
+        }
+
+        if self.selections.is_empty() {
+            lines.push(
+                vec!["No specific directories selected; review the entire repository.".dim()]
+                    .into(),
+            );
+        } else {
+            for (idx, selection) in self.selections.iter().enumerate() {
+                let label = format!("{:>2}. {}", idx + 1, selection.display_path);
+                let mut spans: Vec<Span> = vec![Span::from(label)];
+                if let Some(reason) = selection.reason.as_ref() {
+                    spans.push(" — ".dim());
+                    spans.push(Span::from(reason.clone()).dim());
+                }
+                lines.push(spans.into());
+            }
+        }
+
+        lines.push(Line::from(Vec::<Span>::new()));
+        lines.push(
+            vec![
+                "Continue with these paths? ".into(),
+                "(y)es".bold(),
+                " / ".into(),
+                "(n)o to refine scope".bold(),
+            ]
+            .into(),
+        );
+
+        Paragraph::new(lines).render(area, buf);
+    }
+}
--- a/codex-rs/tui/src/chatwidget.rs
+++ b/codex-rs/tui/src/chatwidget.rs
--- a/codex-rs/tui/src/chatwidget/snapshots/codex_tuichatwidgettests__chat_small_running_h3.snap
+++ b/codex-rs/tui/src/chatwidget/snapshots/codex_tuichatwidgettests__chat_small_running_h3.snap
@@ -3,5 +3,5 @@ source: tui/src/chatwidget/tests.rs
 expression: terminal.backend()
 ---
 "                                        "
-"                                        "
-"                                        "
+"• Thinking - **Thinking** (0s • esc to i"
+"› Ask Codex to do anything              "
--- a/codex-rs/tui/src/chatwidget/snapshots/codex_tuichatwidgettests__chat_small_running_h3.snap.new
+++ b/codex-rs/tui/src/chatwidget/snapshots/codex_tuichatwidgettests__chat_small_running_h3.snap.new
@@ -0,0 +1,8 @@
+---
+source: tui/src/chatwidget/tests.rs
+assertion_line: 2062
+expression: terminal.backend()
+---
+"                                        "
+"                                        "
+"                                        "
--- a/codex-rs/tui/src/chatwidget/snapshots/codex_tuichatwidgettests__chatwidget_exec_and_status_layout_vt100_snapshot.snap
+++ b/codex-rs/tui/src/chatwidget/snapshots/codex_tuichatwidgettests__chatwidget_exec_and_status_layout_vt100_snapshot.snap
@@ -9,7 +9,7 @@ expression: term.backend().vt100().screen().contents()
  └ Search Change Approved
    Read diff_render.rs

-• Investigating rendering code (0s • esc to interrupt)
+• Investigating rendering code - **Investigating rendering code** (0s • esc to i


 › Summarize recent commits
--- a/codex-rs/tui/src/chatwidget/snapshots/codex_tuichatwidgettests__status_widget_active.snap
+++ b/codex-rs/tui/src/chatwidget/snapshots/codex_tuichatwidgettests__status_widget_active.snap
@@ -1,10 +1,9 @@
 ---
 source: tui/src/chatwidget/tests.rs
-assertion_line: 1577
 expression: terminal.backend()
 ---
 "                                                                                "
-"• Analyzing (0s • esc to interrupt)                                             "
+"• Analyzing - **Analyzing** (0s • esc to interrupt)                             "
 "                                                                                "
 "                                                                                "
 "› Ask Codex to do anything                                                      "
--- a/codex-rs/tui/src/chatwidget/tests.rs
+++ b/codex-rs/tui/src/chatwidget/tests.rs
@@ -278,12 +278,13 @@ fn make_chatwidget_manual() -> (
    let app_event_tx = AppEventSender::new(tx_raw);
    let (op_tx, op_rx) = unbounded_channel::<Op>();
    let cfg = test_config();
+    let placeholder = "Ask Codex to do anything".to_string();
    let bottom = BottomPane::new(BottomPaneParams {
        app_event_tx: app_event_tx.clone(),
        frame_requester: FrameRequester::test_dummy(),
        has_input_focus: true,
        enhanced_keys_supported: false,
-        placeholder_text: "Ask Codex to do anything".to_string(),
+        placeholder_text: placeholder.clone(),
        disable_paste_burst: false,
    });
    let auth_manager = AuthManager::from_auth_for_testing(CodexAuth::from_api_key("test"));
@@ -307,15 +308,23 @@ fn make_chatwidget_manual() -> (
        reasoning_buffer: String::new(),
        full_reasoning_buffer: String::new(),
        current_status_header: String::from("Working"),
+        status_progress: None,
+        status_thinking_lines: Vec::new(),
+        status_tool_calls: Vec::new(),
        retry_status_header: None,
        conversation_id: None,
        frame_requester: FrameRequester::test_dummy(),
+        default_placeholder: placeholder,
        show_welcome_banner: true,
        queued_user_messages: VecDeque::new(),
        suppress_session_configured_redraw: false,
        pending_notification: None,
        is_review_mode: false,
        needs_final_message_separator: false,
+        security_review_task: None,
+        security_review_context: None,
+        security_review_artifacts: None,
+        security_review_follow_up: None,
        last_rendered_width: std::cell::Cell::new(None),
        feedback: codex_feedback::CodexFeedback::new(),
        current_rollout_path: None,
--- a/codex-rs/tui/src/lib.rs
+++ b/codex-rs/tui/src/lib.rs
@@ -54,11 +54,15 @@ pub mod live_wrap;
 mod markdown;
 mod markdown_render;
 mod markdown_stream;
+mod mermaid;
 pub mod onboarding;
 mod pager_overlay;
 pub mod public_widgets;
 mod render;
 mod resume_picker;
+mod security_prompts;
+mod security_report_viewer;
+mod security_review;
 mod selection_list;
 mod session_log;
 mod shimmer;
--- a/codex-rs/tui/src/mermaid.rs
+++ b/codex-rs/tui/src/mermaid.rs
@@ -0,0 +1,874 @@
+use std::collections::HashMap;
+
+use lazy_static::lazy_static;
+use regex::Captures;
+use regex::Regex;
+
+type FixFn = Box<dyn Fn(&str) -> String + Send + Sync>;
+
+struct Issue {
+    line_no: usize,
+    #[allow(dead_code)]
+    start: usize,
+    #[allow(dead_code)]
+    end: usize,
+    #[allow(dead_code)]
+    message: String,
+    fix: Option<FixFn>,
+}
+
+impl Issue {
+    fn new(
+        line_no: usize,
+        start: usize,
+        end: usize,
+        message: impl Into<String>,
+        fix: FixFn,
+    ) -> Self {
+        Self {
+            line_no,
+            start,
+            end,
+            message: message.into(),
+            fix: Some(fix),
+        }
+    }
+}
+
+struct MermaidLinter {
+    lines: Vec<String>,
+}
+
+impl MermaidLinter {
+    fn new(source: &str) -> Self {
+        let lines = if source.is_empty() {
+            Vec::new()
+        } else {
+            source
+                .split('\n')
+                .map(std::string::ToString::to_string)
+                .collect()
+        };
+        Self { lines }
+    }
+
+    fn lint(&mut self) -> Vec<Issue> {
+        let mut issues: Vec<Issue> = Vec::new();
+        let mut in_pie = false;
+        let mut in_sequence = false;
+        let mut in_diagram = false;
+        let mut pending_updates: HashMap<usize, String> = HashMap::new();
+
+        let mut lines_copy = self.lines.clone();
+        for (idx, line) in lines_copy.iter().enumerate() {
+            let line_no = idx + 1;
+            let trimmed = line.trim();
+            let lowered = trimmed.to_lowercase();
+
+            if lowered.starts_with("pie") {
+                in_pie = true;
+                in_diagram = true;
+            }
+            if lowered.starts_with("sequencediagram") {
+                in_sequence = true;
+                in_diagram = true;
+            } else if lowered.starts_with("graph")
+                || lowered.starts_with("flowchart")
+                || lowered.starts_with("classdiagram")
+                || lowered.starts_with("erdiagram")
+                || lowered.starts_with("gantt")
+            {
+                in_sequence = false;
+                in_pie = false;
+                in_diagram = true;
+            }
+
+            if STYLE_RE.is_match(line) {
+                issues.push(Issue::new(
+                    line_no,
+                    0,
+                    line.len(),
+                    "Unsupported 'style' directive; removing line.",
+                    Box::new(|_| String::new()),
+                ));
+                continue;
+            }
+
+            for (pos, _) in line.match_indices('\t') {
+                issues.push(Issue::new(
+                    line_no,
+                    pos,
+                    pos + 1,
+                    "Tab character found; use spaces instead.",
+                    Box::new(|line_text: &str| line_text.replacen('\t', "  ", 1)),
+                ));
+            }
+
+            if line.trim_end() != *line {
+                let trimmed_line = line.trim_end().to_string();
+                issues.push(Issue::new(
+                    line_no,
+                    trimmed_line.len(),
+                    line.len(),
+                    "Trailing whitespace.",
+                    Box::new(move |_| trimmed_line.clone()),
+                ));
+            }
+
+            if !in_sequence {
+                let label_spans = compute_label_spans(line);
+                for arrow in ARROW_RE.find_iter(line) {
+                    let start = arrow.start();
+                    let end = arrow.end();
+                    if label_spans.iter().any(|(a, b)| start >= *a && end <= *b) {
+                        continue;
+                    }
+                    if is_within_double_quotes(line, start, end) {
+                        continue;
+                    }
+                    if arrow.as_str() != "-->" {
+                        let message =
+                            format!("Inconsistent arrow style '{}'; use '-->'.", arrow.as_str());
+                        issues.push(Issue::new(
+                            line_no,
+                            start,
+                            end,
+                            message,
+                            make_replace_span(start, end, "-->".to_string()),
+                        ));
+                    }
+                }
+            }
+
+            if !in_diagram {
+                for arrow in ARROW_RE.find_iter(line) {
+                    let start = arrow.start();
+                    let end = arrow.end();
+                    let lhs = &line[..start];
+                    let rhs = &line[end..];
+                    let left_tok = lhs
+                        .split_whitespace()
+                        .last()
+                        .map(str::to_string)
+                        .unwrap_or_default();
+                    let right_tok = rhs
+                        .split_whitespace()
+                        .next()
+                        .map(str::to_string)
+                        .unwrap_or_default();
+
+                    for node_tok in [left_tok, right_tok] {
+                        if node_tok.is_empty() || NODE_ID_VALID_RE.is_match(&node_tok) {
+                            continue;
+                        }
+                        if let Some(span_start) = line.rfind(&node_tok) {
+                            let span_end = span_start + node_tok.len();
+                            let sanitized = sanitize_node_id(&node_tok);
+                            issues.push(Issue::new(
+                                line_no,
+                                span_start,
+                                span_end,
+                                "Node identifier should be lower_snake_case.",
+                                make_replace_span(span_start, span_end, sanitized),
+                            ));
+                        }
+                    }
+                }
+            }
+
+            if in_sequence {
+                let first_arrow = SEQ_ARROW_RE.find(line).or_else(|| ARROW_RE.find(line));
+                if let Some(arrow_match) = first_arrow {
+                    let mut before = line[..arrow_match.start()].to_string();
+                    let mut after = line[arrow_match.end()..].to_string();
+                    let arrow_text = arrow_match.as_str();
+                    let mut changed = false;
+
+                    if let Some(caps) = SEQ_SENDER_UNDERSCORE_RE.captures(&before)
+                        && let (Some(group), Some(full)) = (caps.get(1), caps.get(0))
+                    {
+                        let replacement = group.as_str().trim_end_matches('_').to_string();
+                        let mut new_before = before[..group.start()].to_string();
+                        new_before.push_str(&replacement);
+                        new_before.push_str(&before[full.end()..]);
+                        before = new_before;
+                        changed = true;
+                    }
+
+                    if let Some(caps) = SEQ_RECEIVER_UNDERSCORE_RE.captures(&after) {
+                        if let (Some(recv), Some(rest)) = (caps.get(2), caps.get(4)) {
+                            after = format!("{}: {}", recv.as_str(), rest.as_str().trim_start());
+                            changed = true;
+                        }
+                    } else if let Some(caps) = SEQ_RECEIVER_MISSING_COLON_RE.captures(&after)
+                        && !after.trim_start().starts_with(':')
+                        && let (Some(recv), Some(rest)) = (caps.get(2), caps.get(4))
+                    {
+                        after = format!("{}: {}", recv.as_str(), rest.as_str().trim_start());
+                        changed = true;
+                    }
+
+                    if changed {
+                        let updated = format!("{before}{arrow_text}{after}");
+                        pending_updates.insert(idx, updated);
+                    }
+                }
+
+                if let Some(arrow_match) = SEQ_ARROW_RE.find(line).or_else(|| ARROW_RE.find(line))
+                    && let Some(rel_colon) = line[arrow_match.end()..].find(':')
+                {
+                    let colon_pos = arrow_match.end() + rel_colon;
+                    if line[colon_pos + 1..].contains(';') {
+                        issues.push(Issue::new(
+                            line_no,
+                            colon_pos + 1,
+                            line.len(),
+                            "Semicolons in sequence message; use commas or split lines.",
+                            make_replace_after_colon(colon_pos, ';', ','),
+                        ));
+                    }
+                }
+            }
+
+            if in_diagram && !in_pie && !in_sequence {
+                for captures in SQUARE_LABEL_RE.captures_iter(line) {
+                    if let Some(span) = captures.get(1) {
+                        let raw = span.as_str();
+                        if is_already_quoted(raw) {
+                            continue;
+                        }
+                        let start_idx = span.start();
+                        let end_idx = span.end();
+                        let replacement = format!("\"{}\"", raw.replace('"', "'"));
+                        issues.push(Issue::new(
+                            line_no,
+                            start_idx,
+                            end_idx,
+                            "Quote node label inside [] to allow punctuation.",
+                            make_replace_span(start_idx, end_idx, replacement),
+                        ));
+                    }
+                }
+
+                for captures in PAR2_LABEL_RE.captures_iter(line) {
+                    if let Some(mat) = captures.get(1) {
+                        if is_within_double_quotes(line, mat.start(), mat.end()) {
+                            continue;
+                        }
+                        let raw = mat.as_str();
+                        if is_already_quoted(raw) {
+                            continue;
+                        }
+                        let start_idx = mat.start();
+                        let end_idx = mat.end();
+                        let replacement = format!("\"{}\"", raw.replace('"', "'"));
+                        issues.push(Issue::new(
+                            line_no,
+                            start_idx,
+                            end_idx,
+                            "Quote node label inside (( )) to allow punctuation.",
+                            make_replace_span(start_idx, end_idx, replacement),
+                        ));
+                    }
+                }
+
+                for captures in PAR1_LABEL_RE.captures_iter(line) {
+                    if let Some(mat) = captures.get(1) {
+                        if is_within_double_quotes(line, mat.start(), mat.end()) {
+                            continue;
+                        }
+                        let raw = mat.as_str();
+                        if is_already_quoted(raw) {
+                            continue;
+                        }
+                        let start_idx = mat.start();
+                        let end_idx = mat.end();
+                        let replacement = format!("\"{}\"", raw.replace('"', "'"));
+                        issues.push(Issue::new(
+                            line_no,
+                            start_idx,
+                            end_idx,
+                            "Quote node label inside () to allow punctuation.",
+                            make_replace_span(start_idx, end_idx, replacement),
+                        ));
+                    }
+                }
+            }
+
+            if in_pie && let Some(caps) = PIE_LINE_RE.captures(line) {
+                let indent = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
+                let label = caps.get(2).map(|m| m.as_str()).unwrap_or("").to_string();
+                let value = caps.get(3).map(|m| m.as_str()).unwrap_or("").to_string();
+
+                let mut fixed_label = label.clone();
+                for _ in 0..3 {
+                    let updated = PIE_INNER_QUOTE_RE
+                        .replace_all(&fixed_label, "$1$2$3")
+                        .into_owned();
+                    if updated == fixed_label {
+                        break;
+                    }
+                    fixed_label = updated;
+                }
+
+                if fixed_label != label {
+                    let replacement_line = format!("{indent}\"{fixed_label}\": {value}");
+                    issues.push(Issue::new(
+                        line_no,
+                        0,
+                        line.len(),
+                        "Pie label contains quoted numeric count; removing inner quotes.",
+                        Box::new(move |_| replacement_line.clone()),
+                    ));
+                }
+            }
+
+            if let Some(caps) = NODE_ID_RE.captures(line)
+                && let Some(group) = caps.get(1)
+            {
+                let bad_id = group.as_str();
+                if !NODE_ID_VALID_RE.is_match(bad_id) {
+                    let sanitized = sanitize_node_id(bad_id);
+                    issues.push(Issue::new(
+                        line_no,
+                        group.start(),
+                        group.end(),
+                        "Node identifier should be lower_snake_case.",
+                        make_replace_span(group.start(), group.end(), sanitized),
+                    ));
+                }
+            }
+        }
+
+        for (idx, updated) in pending_updates {
+            if idx < lines_copy.len() {
+                lines_copy[idx] = updated;
+            }
+        }
+
+        self.lines = lines_copy;
+        issues
+    }
+
+    fn apply_fixes(&mut self, mut issues: Vec<Issue>) -> usize {
+        const MAX_PASSES: usize = 10;
+        let mut passes = 0usize;
+
+        while !issues.is_empty() && passes < MAX_PASSES {
+            let remaining = self.apply_fixes_inner(&issues);
+            passes += 1;
+            if remaining == 0 {
+                break;
+            }
+            issues = self.lint();
+        }
+
+        passes
+    }
+
+    fn apply_fixes_inner(&mut self, issues: &[Issue]) -> usize {
+        let mut issue_map: HashMap<usize, Vec<&Issue>> = HashMap::new();
+        for issue in issues {
+            issue_map.entry(issue.line_no).or_default().push(issue);
+        }
+
+        let mut new_lines: Vec<String> = Vec::with_capacity(self.lines.len());
+        let mut unfixed = 0usize;
+
+        for (idx, line) in self.lines.iter().enumerate() {
+            let line_no = idx + 1;
+            let Some(issues_on_line) = issue_map.get(&line_no) else {
+                new_lines.push(line.clone());
+                continue;
+            };
+
+            if issues_on_line.len() > 1 {
+                let (first_issue, rest) = issues_on_line.split_first().expect("non-empty slice");
+                if let Some(fix) = &first_issue.fix {
+                    let fixed = fix(line);
+                    if !fixed.is_empty() {
+                        new_lines.push(fixed);
+                    }
+                } else {
+                    unfixed += 1;
+                    new_lines.push(line.clone());
+                }
+                unfixed += rest.len();
+                continue;
+            }
+
+            let issue = issues_on_line[0];
+            if let Some(fix) = &issue.fix {
+                let fixed_line = fix(line);
+                if !fixed_line.is_empty() {
+                    new_lines.push(fixed_line);
+                }
+            } else {
+                unfixed += 1;
+                new_lines.push(line.clone());
+            }
+        }
+
+        self.lines = new_lines;
+        unfixed
+    }
+}
+
+lazy_static! {
+    static ref STYLE_RE: Regex = Regex::new(r"(?i)^\s*style\b").expect("valid regex");
+    static ref ARROW_RE: Regex = Regex::new(r"-{1,}[^-]*>").expect("valid regex");
+    static ref SEQ_ARROW_RE: Regex = Regex::new(r"-{1,2}(?:>>|>)").expect("valid regex");
+    static ref NODE_ID_RE: Regex = Regex::new(r"^\s*([a-zA-Z0-9_]+)\s*[\[(]").expect("valid regex");
+    static ref NODE_ID_VALID_RE: Regex = Regex::new(r"^[A-Za-z0-9_]+$").expect("valid regex");
+    static ref SQUARE_LABEL_RE: Regex =
+        Regex::new(r"[A-Za-z0-9_]+\s*\[(.*?)\]").expect("valid regex");
+    static ref PAR2_LABEL_RE: Regex =
+        Regex::new(r"[A-Za-z0-9_]+\s*\(\((.*?)\)\)").expect("valid regex");
+    static ref PAR1_LABEL_RE: Regex =
+        Regex::new(r"[A-Za-z0-9_]+\s*\(([^()]*?)\)").expect("valid regex");
+    static ref SEQ_SENDER_UNDERSCORE_RE: Regex =
+        Regex::new(r"([A-Za-z0-9_]+)_\s*$").expect("valid regex");
+    static ref SEQ_RECEIVER_UNDERSCORE_RE: Regex =
+        Regex::new(r"^(\s*([A-Za-z0-9_]+))_(\s*)(.*)$").expect("valid regex");
+    static ref SEQ_RECEIVER_MISSING_COLON_RE: Regex =
+        Regex::new(r"^(\s*([A-Za-z0-9_]+))(\s+)(.*)$").expect("valid regex");
+    static ref PIE_LINE_RE: Regex =
+        Regex::new(r#"^(\s*)"(.+)"\s*:\s*([0-9]+(?:\.[0-9]+)?)\s*$"#).expect("valid regex");
+    static ref PIE_INNER_QUOTE_RE: Regex =
+        Regex::new(r#"([\(\[])\s*['"](\d+(?:\.\d+)?)['"]\s*([\)\]])"#).expect("valid regex");
+    static ref MERMAID_FENCE_RE: Regex =
+        Regex::new(r"(?is)```mermaid(.*?)```").expect("valid regex");
+    static ref GENERIC_FENCE_RE: Regex =
+        Regex::new(r"(?is)```([a-zA-Z0-9_+-]*)\n(.*?)```").expect("valid regex");
+    static ref HEADER_RE: Regex =
+        Regex::new(r"(?i)^\s*(flowchart|graph|sequenceDiagram|classDiagram|erDiagram|gantt)\b")
+            .expect("valid regex");
+    static ref HEADER_TITLE_SAME_LINE_RE: Regex = Regex::new(
+        r"(?im)^(?P<indent>\s*)(?P<keyword>flowchart|graph)\s+(?P<dir>TB|TD|LR|RL|BT)\s+title\s+(?P<title>.+)$",
+    )
+    .expect("valid regex");
+}
+
+fn sanitize_node_id(value: &str) -> String {
+    let replaced = value
+        .chars()
+        .map(|c| {
+            if c.is_ascii_alphanumeric() || c == '_' {
+                c
+            } else {
+                '_'
+            }
+        })
+        .collect::<String>();
+    replaced.to_lowercase()
+}
+
+fn make_replace_span(start: usize, end: usize, replacement: String) -> FixFn {
+    Box::new(move |line: &str| {
+        let mut result = String::with_capacity(line.len() - (end - start) + replacement.len());
+        result.push_str(&line[..start]);
+        result.push_str(&replacement);
+        result.push_str(&line[end..]);
+        result
+    })
+}
+
+fn make_replace_after_colon(colon_pos: usize, find: char, replace_with: char) -> FixFn {
+    Box::new(move |line: &str| {
+        let mut result = String::with_capacity(line.len());
+        result.push_str(&line[..=colon_pos]);
+        let tail = line[colon_pos + 1..].replace(find, &replace_with.to_string());
+        result.push_str(&tail);
+        result
+    })
+}
+
+fn compute_label_spans(line: &str) -> Vec<(usize, usize)> {
+    let mut spans: Vec<(usize, usize)> = Vec::new();
+    for caps in SQUARE_LABEL_RE.captures_iter(line) {
+        if let Some(inner) = caps.get(1) {
+            spans.push((inner.start(), inner.end()));
+        }
+    }
+    for caps in PAR2_LABEL_RE.captures_iter(line) {
+        if let Some(inner) = caps.get(1) {
+            spans.push((inner.start(), inner.end()));
+        }
+    }
+    for caps in PAR1_LABEL_RE.captures_iter(line) {
+        if let Some(inner) = caps.get(1) {
+            spans.push((inner.start(), inner.end()));
+        }
+    }
+    spans
+}
+
+fn leading_indent(haystack: &str, m: &regex::Match) -> String {
+    let start = m.start();
+    let line_start = haystack[..start]
+        .rfind('\n')
+        .map(|idx| idx.saturating_add(1))
+        .unwrap_or(0);
+    haystack[line_start..start]
+        .chars()
+        .take_while(|ch| ch.is_whitespace())
+        .collect()
+}
+
+fn indent_block(block: &str, indent: &str) -> String {
+    if indent.is_empty() {
+        return block.to_string();
+    }
+
+    block
+        .lines()
+        .map(|line| {
+            if line.is_empty() {
+                indent.to_string()
+            } else {
+                format!("{indent}{line}")
+            }
+        })
+        .collect::<Vec<_>>()
+        .join("\n")
+}
+
+fn is_already_quoted(raw: &str) -> bool {
+    let trimmed = raw.trim();
+    trimmed.len() >= 2
+        && ((trimmed.starts_with('"') && trimmed.ends_with('"'))
+            || (trimmed.starts_with('\'') && trimmed.ends_with('\'')))
+}
+
+fn is_within_double_quotes(line: &str, start: usize, end: usize) -> bool {
+    let mut in_quote = false;
+    let mut quote_start = 0usize;
+    let mut spans: Vec<(usize, usize)> = Vec::new();
+
+    for (idx, ch) in line.char_indices() {
+        if ch == '"' {
+            if !in_quote {
+                in_quote = true;
+                quote_start = idx;
+            } else {
+                spans.push((quote_start, idx));
+                in_quote = false;
+            }
+        }
+    }
+
+    spans.iter().any(|(a, b)| start >= *a && end <= *b)
+}
+
+fn normalize_header_titles(source: &str) -> String {
+    HEADER_TITLE_SAME_LINE_RE
+        .replace_all(source, |caps: &Captures| {
+            let indent = caps.name("indent").map(|m| m.as_str()).unwrap_or("");
+            let keyword = caps
+                .name("keyword")
+                .map(|m| m.as_str())
+                .unwrap_or("flowchart");
+            let dir = caps.name("dir").map(|m| m.as_str()).unwrap_or("TD");
+            let title = caps.name("title").map(|m| m.as_str()).unwrap_or("");
+            format!("{indent}{keyword} {dir}\n{indent}  title {title}")
+        })
+        .into_owned()
+}
+
+fn lint_and_wrap(code: &str) -> String {
+    let normalized = normalize_header_titles(code);
+    let mut linter = MermaidLinter::new(&normalized);
+    let issues = linter.lint();
+    linter.apply_fixes(issues);
+    let fixed = if linter.lines.is_empty() {
+        String::new()
+    } else {
+        linter.lines.join("\n")
+    };
+    format!("```mermaid\n{fixed}\n```")
+}
+
+pub(crate) fn fix_mermaid_blocks(input: &str) -> String {
+    if input.trim().is_empty() {
+        return input.to_string();
+    }
+
+    let after_fenced = MERMAID_FENCE_RE
+        .replace_all(input, |caps: &Captures| {
+            let full_match = caps
+                .get(0)
+                .expect("full match is always present for fenced mermaid block");
+            let indent = leading_indent(input, &full_match);
+            let body = caps
+                .get(1)
+                .map(|m| m.as_str())
+                .unwrap_or("")
+                .trim_matches('\n');
+            let wrapped = lint_and_wrap(body);
+            if indent.is_empty() {
+                wrapped
+            } else {
+                indent_block(&wrapped, &indent)
+            }
+        })
+        .into_owned();
+
+    let after_generic = GENERIC_FENCE_RE
+        .replace_all(&after_fenced, |caps: &Captures| {
+            let full_match = caps
+                .get(0)
+                .expect("full match is always present for fenced code block");
+            let indent = leading_indent(&after_fenced, &full_match);
+            let lang = caps
+                .get(1)
+                .map(|m| m.as_str())
+                .unwrap_or("")
+                .trim()
+                .to_lowercase();
+            if lang == "mermaid" {
+                return caps
+                    .get(0)
+                    .map(|m| m.as_str().to_string())
+                    .unwrap_or_default();
+            }
+            let body = caps
+                .get(2)
+                .map(|m| m.as_str())
+                .unwrap_or("")
+                .trim_matches('\n');
+            let head = body.lines().next().unwrap_or("").trim().to_lowercase();
+            if [
+                "flowchart",
+                "graph",
+                "sequencediagram",
+                "classdiagram",
+                "erdiagram",
+                "gantt",
+            ]
+            .iter()
+            .any(|prefix| head.starts_with(prefix))
+            {
+                let wrapped = lint_and_wrap(body);
+                if indent.is_empty() {
+                    wrapped
+                } else {
+                    indent_block(&wrapped, &indent)
+                }
+            } else {
+                caps.get(0)
+                    .map(|m| m.as_str().to_string())
+                    .unwrap_or_default()
+            }
+        })
+        .into_owned();
+
+    let lines: Vec<String> = after_generic
+        .split('\n')
+        .map(std::string::ToString::to_string)
+        .collect();
+
+    if lines.is_empty() {
+        return after_generic;
+    }
+
+    let mut out_lines: Vec<String> = Vec::new();
+    let mut in_code_block = false;
+    let mut idx = 0usize;
+
+    while idx < lines.len() {
+        let line = &lines[idx];
+        let trimmed = line.trim();
+        if trimmed.starts_with("```") {
+            in_code_block = !in_code_block;
+            out_lines.push(line.clone());
+            idx += 1;
+            continue;
+        }
+
+        if !in_code_block && HEADER_RE.is_match(trimmed) {
+            let start = idx;
+            let mut end = idx;
+            while end < lines.len() {
+                let current = lines[end].trim();
+                if current.starts_with("```") || current.is_empty() {
+                    break;
+                }
+                end += 1;
+            }
+            let block = lines[start..end].join("\n");
+            out_lines.push(lint_and_wrap(block.trim_matches('\n')));
+            idx = end;
+            // Always add a blank line separator after a mermaid block to avoid
+            // back-to-back fenced blocks which some renderers mishandle.
+            if idx < lines.len() {
+                if lines[idx].trim().is_empty() {
+                    out_lines.push(lines[idx].clone());
+                    idx += 1;
+                } else {
+                    out_lines.push(String::new());
+                }
+            }
+            continue;
+        }
+
+        out_lines.push(line.clone());
+        idx += 1;
+    }
+
+    out_lines.join("\n")
+}
+
+#[cfg(test)]
+mod tests {
+    use super::fix_mermaid_blocks;
+    use pretty_assertions::assert_eq;
+    use regex::Regex;
+
+    #[test]
+    fn flowchart_nodes_are_quoted_even_when_unfenced() {
+        let raw = [
+            "flowchart LR",
+            "  A[Caller Service or SDK] --> B[ekm_client Encryptor]",
+            "  B --> C[KeyProviderDefault (HTTP)]",
+            "  C --> D[EKM FastAPI Service]",
+            "  D --> E[Provider Selector]",
+            "  E --> F[Cloud KMS (AWS/GCP/Azure)]",
+            "  B --> G[ekm_client_cpp V1Header build/parse]",
+            "  B --> H[Tink AEAD (streaming/non-streaming)]",
+        ]
+        .join("\n");
+        let fixed = fix_mermaid_blocks(&raw);
+        assert!(fixed.contains(r#"A["Caller Service or SDK"]"#));
+        assert!(fixed.contains(r#"B["ekm_client Encryptor"]"#));
+        assert!(fixed.contains(r#"C["KeyProviderDefault (HTTP)"]"#));
+        assert!(fixed.contains(r#"D["EKM FastAPI Service"]"#));
+        assert!(fixed.contains(r#"E["Provider Selector"]"#));
+        assert!(fixed.contains(r#"F["Cloud KMS (AWS/GCP/Azure)"]"#));
+        assert!(fixed.contains(r#"G["ekm_client_cpp V1Header build/parse"]"#));
+        assert!(fixed.contains(r#"H["Tink AEAD (streaming/non-streaming)"]"#));
+    }
+
+    #[test]
+    fn sequence_semicolons_removed_in_messages() {
+        let raw = "```mermaid\nsequenceDiagram\n  Ingress->>Ingress: Sanitize logs; reject non-HTTPS with 403\n```";
+        let fixed = fix_mermaid_blocks(raw);
+        let message_re = Regex::new(r"Ingress->>Ingress:\s*(.*)").unwrap();
+        let msg = message_re
+            .captures(&fixed)
+            .and_then(|caps| caps.get(1))
+            .map(|m| m.as_str())
+            .unwrap_or_default();
+        assert!(!msg.contains(';'));
+        assert!(msg.contains(','));
+    }
+
+    #[test]
+    fn unfenced_pie_chart_is_not_wrapped() {
+        let raw = r#"pie
+  "High ("12")": 12
+  "Medium ("39")": 39
+  "Low ("20")": 20
+"#;
+        let fixed = fix_mermaid_blocks(raw);
+        assert!(!fixed.contains("```mermaid"));
+    }
+
+    #[test]
+    fn pie_chart_inner_quotes_removed_when_fenced() {
+        let raw = r#"```mermaid
+pie
+  "High ("12")": 12
+  "Medium ("39")": 39
+  "Low ("20")": 20
+```"#;
+        let fixed = fix_mermaid_blocks(raw);
+        assert!(fixed.contains(r#""High (12)": 12"#));
+        assert!(fixed.contains(r#""Medium (39)": 39"#));
+        assert!(fixed.contains(r#""Low (20)": 20"#));
+    }
+
+    #[test]
+    fn flowchart_paren_labels_are_quoted() {
+        let raw = "```mermaid\nflowchart LR\n  A((Start node)) --> B(Account)\n```";
+        let fixed = fix_mermaid_blocks(raw);
+        assert!(fixed.contains(r#"A(("Start node"))"#));
+        assert!(fixed.contains(r#"B("Account")"#));
+    }
+
+    #[test]
+    fn graph_nodes_are_quoted_even_when_unfenced() {
+        let raw = "graph LR\n  A[Client App] --> B[API Server]\n  B --> C[DB (primary)]\n";
+        let fixed = fix_mermaid_blocks(raw);
+        assert!(fixed.contains(r#"A["Client App"]"#));
+        assert!(fixed.contains(r#"B["API Server"]"#));
+        assert!(fixed.contains(r#"C["DB (primary)"]"#));
+    }
+
+    #[test]
+    fn flowchart_quotes_and_preserves_inner_arrows() {
+        let raw = [
+            "flowchart LR",
+            "  ClientApp[Client App - React] --> CalpicoState[Calpico State (signals + React Query)]",
+            "  CalpicoState --> APIServer[API Server (/api/calpico)]",
+            "  CalpicoState --> WebSocket[WebSocket Events]",
+            "  CalpicoState --> FileService[File Upload Service]",
+            "  APIServer --> CalpicoUtils[calpico_utils (post -> messages)]",
+            "  CalpicoUtils --> APIServer",
+            "  ClientApp --> UIComponents[UI Components (Composer, Thread, Sidebar)]",
+            "  WebSocket --> CalpicoState",
+        ]
+        .join("\n");
+        let fixed = fix_mermaid_blocks(&raw);
+        assert!(fixed.contains(r#"ClientApp["Client App - React"]"#));
+        assert!(fixed.contains(r#"CalpicoState["Calpico State (signals + React Query)"]"#));
+        assert!(fixed.contains(r#"APIServer["API Server (/api/calpico)"]"#));
+        assert!(fixed.contains(r#"UIComponents["UI Components (Composer, Thread, Sidebar)"]"#));
+        assert!(fixed.contains(r#"CalpicoUtils["calpico_utils (post -> messages)"]"#));
+        assert!(fixed.contains("post -> messages"));
+        assert!(!fixed.contains("post --> messages"));
+    }
+
+    #[test]
+    fn sequence_message_quotes_preserved() {
+        let raw = r#"```mermaid
+sequenceDiagram
+  API->>FileMgr: sanitize & upload files ("if any")
+```"#;
+        let fixed = fix_mermaid_blocks(raw);
+        assert!(fixed.contains(r#"API->>FileMgr: sanitize & upload files ("if any")"#));
+    }
+
+    #[test]
+    fn sequence_receiver_underscore_repaired_to_colon() {
+        let raw = r#"```mermaid
+sequenceDiagram
+  ClientApp->>APIServer_ POST /api/calpico/rooms/{id}/messages
+  APIServer->>MessageWriter_ validate membership, persist message
+  MessageWriter-->>APIServer_ message record
+```"#;
+        let fixed = fix_mermaid_blocks(raw);
+        assert!(fixed.contains("ClientApp->>APIServer: POST /api/calpico/rooms/{id}/messages"));
+        assert!(fixed.contains("APIServer->>MessageWriter: validate membership, persist message"));
+        assert!(fixed.contains("MessageWriter-->>APIServer: message record"));
+    }
+
+    #[test]
+    fn header_title_on_same_line_is_split() {
+        let raw = "```mermaid\nflowchart TD title Component request flow - end-to-end platform\n  Client[\"Tenant client / automation workflow\"] --> ChatService[\"packages/chat-service\"]\n```";
+        let fixed = fix_mermaid_blocks(raw);
+        // Ensure we no longer have a single line with both the direction and title.
+        assert!(!fixed.contains("flowchart TD  title"));
+
+        // We expect a header line followed by a separate title directive line.
+        assert!(
+            fixed.contains("flowchart TD\n  title Component request flow - end-to-end platform")
+        );
+    }
+
+    #[test]
+    fn round_trip_no_mermaid_returns_input() {
+        let raw = "This markdown has no mermaid.\n\n```rust\nfn main() {}\n```\n";
+        let fixed = fix_mermaid_blocks(raw);
+        assert_eq!(raw, fixed);
+    }
+}
--- a/codex-rs/tui/src/security_prompts.rs
+++ b/codex-rs/tui/src/security_prompts.rs
@@ -0,0 +1,274 @@
+#![allow(dead_code)]
+
+// Centralized prompt strings for the security review feature.
+
+// Auto-scope prompts
+pub(crate) const AUTO_SCOPE_SYSTEM_PROMPT: &str = "You are an application security engineer helping select the minimal set of directories that should be examined for a security review. Only respond with JSON lines that follow the requested schema.";
+pub(crate) const AUTO_SCOPE_PROMPT_TEMPLATE: &str = r#"
+You are assisting with an application security review. Identify the minimal set of directories that should be in scope.
+
+# Repository overview
+{repo_overview}
+
+# Request
+<intent>{user_query}</intent>
+
+# Request keywords
+{keywords}
+
+# Conversation history
+{conversation}
+
+# Available tools
+- SEARCH: respond with `SEARCH: literal:<term>` or `SEARCH: regex:<pattern>` to run ripgrep over the repository root (returns colored matches with line numbers).
+- GREP_FILES: respond with `GREP_FILES: {"pattern":"needle","include":"*.rs","path":"subdir","limit":200}` to list files whose contents match. Fields:
+  - pattern: regex string (required)
+  - include: optional glob filter (ripgrep --glob)
+  - path: optional directory/file to search (defaults to repo root)
+  - limit: optional max paths to return (default 100, max 2000)
+- READ: respond with `READ: <relative path>#L<start>-L<end>` to inspect source code (omit the range to read roughly {read_window} lines starting at the top of the file).
+
+Issue at most one tool command per message and wait for the tool output before continuing. When you have gathered enough information, respond only with JSON Lines as described below.
+
+# Selection rules
+- Prefer code that serves production traffic, handles external input, or configures deployed infrastructure.
+- Return directories (not files). Use the highest level that contains the relevant implementation; avoid returning both a parent and its child.
+- Skip tests, docs, vendored dependencies, caches, build artefacts, editor configuration, or directories that do not exist.
+- Limit to the most relevant 3–8 directories when possible.
+- Before including a directory, confirm it clearly relates to <intent>{user_query}</intent>; use SEARCH or READ to look for matching terminology (README, module names, config files) when uncertain.
+
+# Output format
+Return JSON Lines: each line must be a single JSON object with keys {"path", "include", "reason"}. Omit fences and additional commentary. If unsure, set include=false and explain in reason. Output `ALL` alone on one line to include the entire repository.
+"#;
+pub(crate) const AUTO_SCOPE_JSON_GUARD: &str =
+    "Respond only with JSON Lines as described. Do not include markdown fences, prose, or lists.";
+pub(crate) const AUTO_SCOPE_KEYWORD_SYSTEM_PROMPT: &str = "You expand security review prompts into concise code search keywords. Respond only with JSON Lines.";
+pub(crate) const AUTO_SCOPE_KEYWORD_PROMPT_TEMPLATE: &str = r#"
+Determine the most relevant search keywords for the repository request below. Produce at most {max_keywords} keywords.
+
+Request:
+{user_query}
+
+Guidelines:
+- Prefer feature, component, service, or technology names that are likely to appear in directory names.
+- Keep each keyword to 1–3 words; follow repository naming conventions (snake_case, kebab-case) when obvious.
+- Skip generic words like "security", "review", "code", "bug", or "analysis".
+- If nothing applies, return a single JSON object {{"keyword": "{fallback_keyword}"}} that restates the subject clearly.
+
+Output format: JSON Lines, each {{"keyword": "<term>"}}. Do not add commentary or fences.
+"#;
+
+// Spec generation prompts
+pub(crate) const SPEC_SYSTEM_PROMPT: &str = "You are an application security engineer documenting how a project is built. Produce an architecture specification that focuses on components, flows, and controls. Stay within the provided code locations, ground claims in concrete evidence, and keep the output in markdown.";
+pub(crate) const SPEC_COMBINE_SYSTEM_PROMPT: &str = "You are consolidating multiple specification drafts into a single, cohesive project specification. Merge overlapping content, keep terminology consistent, and follow the supplied template. Preserve every security-relevant detail; when in doubt, include rather than summarize away content.";
+pub(crate) const SPEC_PROMPT_TEMPLATE: &str = "You have access to the source code inside the following locations:\n{project_locations}\n\nFocus on {target_label}.\nGenerate a security-focused project specification. Parallelize discovery when enumerating files and avoid spending time on tests, vendored dependencies, or build artefacts.\n\n# Available tools\n- READ: respond with `READ: <relative path>#Lstart-Lend` (range optional) to open code files within the in-scope locations.\n- SEARCH: respond with `SEARCH: literal:<term>` or `SEARCH: regex:<pattern>` to run ripgrep over the repository root and inspect matches.\n- GREP_FILES: respond with `GREP_FILES: {\"pattern\":\"needle\",\"include\":\"*.rs\",\"path\":\"subdir\",\"limit\":200}` to list files whose contents match.\nEmit at most one tool command per message and wait for the tool output before continuing. Use these tools to ground API entry points, components, and data flows in actual code instead of speculation.\n\nWhen you have gathered enough evidence, follow the template exactly and return only markdown (no tool commands).\n\nTemplate:\n{spec_template}\n";
+pub(crate) const CONVERT_CLASSIFICATION_TO_JSON_PROMPT_TEMPLATE: &str = r#"
+Read the project specification below and extract a normalized Data Classification list.
+
+<specification>
+{spec_markdown}
+</specification>
+
+# Goal
+Produce newline-delimited JSON (NDJSON), one object per classified data type with keys:
+- data_type (string — e.g., PII, PHI, PCI, credentials, secrets, telemetry)
+- sensitivity (exactly one of: high, medium, low)
+- storage_location (string)
+- retention (short policy or duration)
+- encryption_at_rest (string; use "unknown" if not stated)
+- in_transit (string; use "unknown" if not stated)
+- accessed_by (string describing services/roles/users)
+
+# Guidance
+- Prefer the specification's Data Classification section; infer from context when necessary.
+- Merge duplicate data types, choosing the strictest sensitivity.
+- Keep values concise and human-readable.
+
+# Output
+Emit only NDJSON lines. Each JSON object must contain exactly the keys listed above (no arrays, extra keys, or prose).
+"#;
+
+// Validation plan prompts
+pub(crate) const VALIDATION_PLAN_SYSTEM_PROMPT: &str = "You are an application security engineer planning minimal, safe validations for high-risk findings. Respond ONLY with JSON Lines as requested; do not include markdown or prose.";
+pub(crate) const VALIDATION_PLAN_PROMPT_TEMPLATE: &str = r#"
+Before any checks, create two test accounts if the app requires login. Prefer a short Python script that calls a signup endpoint or automates the registration form headlessly. If this is not feasible, return a `manual` instruction with a `login_url`.
+
+Then select ONLY high-risk findings to validate. For each, choose the minimal tool and target:
+- Use the Playwright MCP tool for web_browser checks (supply a reachable URL in `target`).
+- Use tool "curl" for network_api checks (supply full URL in `target`).
+- Use tool "python" only if a short, non-destructive PoC is essential (include inline script text in `script`).
+
+Rules:
+- Keep requests minimal and non-destructive; no state-changing actions.
+- Prefer headless checks (e.g., page loads, HTTP status, presence of a marker string).
+- Max 5 requests total; prioritize Critical/High severity or lowest risk_rank.
+
+Context (findings):
+{findings}
+
+Output format (one JSON object per line, no fences):
+- For account setup (emit at most one line): {"id_kind":"setup","action":"register|manual","login_url":"<string, optional>","tool":"python|manual","script":"<string, optional>"}
+- For validations: {"id_kind":"risk_rank|summary_id","id_value":<int>,"tool":"playwright|curl|python","target":"<string, optional>","script":"<string, optional>"}
+"#;
+
+// Account setup planning (standalone, used when needed)
+pub(crate) const VALIDATION_ACCOUNTS_SYSTEM_PROMPT: &str = "You plan how to create two test accounts for a typical web app. Respond ONLY with JSON Lines; no prose.";
+pub(crate) const VALIDATION_ACCOUNTS_PROMPT_TEMPLATE: &str = r#"
+Goal: ensure two test accounts exist prior to validation. Prefer a short Python script that registers accounts via HTTP or a headless flow; otherwise return a manual login URL.
+
+Constraints:
+- The script must be non-destructive and idempotent.
+- Print credentials to stdout as JSON: {"accounts":[{"username":"...","password":"..."},{"username":"...","password":"..."}]}.
+- If you cannot identify a safe automated path, return a single JSON line: {"action":"manual","login_url":"https://..."}.
+
+Context (findings):
+{findings}
+
+Output format (one JSON object per line, no fences):
+- Automated: {"action":"register","tool":"python","login_url":"<string, optional>","script":"<python script>"}
+- Manual: {"action":"manual","login_url":"<string>"}
+"#;
+pub(crate) const MARKDOWN_OUTPUT_GUARD: &str = "\n# Output Guard (strict)\n    - Output only the final markdown content requested.\n    - Do not include goal, analysis, planning, chain-of-thought, or step lists.\n    - Do not echo prompt sections like \"Task\", \"Steps\", \"Output\", or \"Important\".\n    - Do not include any XML/angle-bracket blocks (e.g., <...> inputs) in the output.\n    - Do not wrap the entire response in code fences; use code fences only for code snippets.\n    - Do not include apologies, disclaimers, or references to being an AI model.\n";
+pub(crate) const MARKDOWN_FIX_SYSTEM_PROMPT: &str = "You are a meticulous technical editor. Polish markdown formatting while preserving the original security analysis content. Focus on fixing numbering, bullet spacing, code fences, and diagram syntax without adding or removing information.";
+pub(crate) const SPEC_COMBINE_PROMPT_TEMPLATE: &str = "You previously generated specification drafts for the following code locations:\n{project_locations}\n\nDraft content (each draft may include an \"API Entry Points\" section summarizing externally exposed interfaces):\n{spec_drafts}\n\nTask: merge these drafts into one comprehensive specification that describes the entire project. Remove duplication, keep terminology consistent, and ensure the final document reads as a single report that preserves API coverage. Follow the template exactly and return only markdown.\n\nNon-negotiable requirements:\n- Carry forward every concrete security-relevant fact, list, table, code block, and data classification entry from the drafts unless it is an exact duplicate.\n- When multiple drafts contribute to the same template section, include the union of their paragraphs and bullet points. If details differ, keep both and attribute them with inline labels such as `(from {location_label})` rather than dropping information.\n- Preserve API entry points verbatim (including tables) and incorporate them into the appropriate section without shortening columns.\n- Keep all identifiers (component names, queue names, environment variables, secrets, external services, metric names) exactly as written; do not rename or generalize.\n- Follow the template's structure exactly: populate every section, create the requested subsections, and include the explicit `Sources:` lines and bullet styles. Do not leave the instructional text in place or drop mandatory sections.\n- Populate the \"Relevant Source Files\" section with bullet points that reference each draft's location label and any concrete file paths mentioned in the drafts.\n- Ensure the \"Data Classification\" section exists even when the drafts were sparse; aggregate and preserve every classification detail there.\n- If multiple drafts contain tabular data (APIs, components, data classification), merge rows from all drafts and maintain duplicates when the sources disagree so the consumer can reconcile manually.\n- Do not introduce new speculation or remove nuance from mitigations, caveats, or risk descriptions provided in the drafts. Err on the side of length; the final document should be at least as detailed as the most verbose draft.\n\n# Available tools\n- READ: respond with `READ: <relative path>#Lstart-Lend` (range optional) to open code or draft files. Use paths relative to the repository root.\n- GREP_FILES: respond with `GREP_FILES: {\"pattern\": \"...\", \"include\": \"*.rs\", \"path\": \"subdir\", \"limit\": 200}` to list files whose contents match.\nEmit at most one tool command in a single message and wait for the tool output before continuing. Prefer READ for prose context; SEARCH is not available during this step.\n\nTemplate:\n{combined_template}\n";
+pub(crate) const SPEC_DIR_FILTER_SYSTEM_PROMPT: &str = r#"
+You triage directories for a security review specification. Only choose directories that hold core product or security-relevant code.
+- Prefer application source directories (services, packages, libs).
+- Exclude build artifacts, vendored dependencies, generated code, or documentation-only folders.
+- Limit the selection to the most critical directories (ideally 3-8).
+Respond with a newline-separated list containing only the directory paths chosen from the provided list. Respond with `ALL` if every directory should be included. Do not add quotes or extra commentary.
+"#;
+pub(crate) const SPEC_MARKDOWN_TEMPLATE: &str = "# Project Specification\n- Location: {target_label}\n- Prepared by: {model_name}\n- Date: {date}\n- In-scope paths:\n```\n{project_locations}\n```\n\n## Overview\nSummarize the product or service, primary users, and the business problem it solves. Highlight the most security relevant entry points.\n\n## Architecture Summary\nDescribe the high-level system architecture, major services, data stores, and external integrations. Include a concise mermaid flowchart when it improves clarity. If the specification uses more than one mermaid diagram, add a `title Component request flow` line (with a descriptive label) inside each diagram so the rendered report shows distinct titles.\n\n## Components\nList 5-8 major components. For each, note the role, responsibilities, key dependencies, and security-critical behavior.\n\n## Business Flows\nDocument up to 5 important flows (CRUD, external integrations, workflow orchestration). For each flow capture triggers, main steps, data touched, and security notes. Include a short mermaid sequence diagram if helpful.\n\n## Tech Stack\nCapture languages, frameworks, and infrastructure used by each major component. Tabulate runtimes, key libraries, storage technologies, and deployment targets.\n\n## Authentication\nExplain how principals authenticate, token lifecycles, libraries used, and how secrets are managed.\n\n## Authorization\nDescribe the authorization model, enforcement points, privileged roles, and escalation paths.\n\n## Data Classification\nIdentify sensitive data types handled by the project and where they are stored or transmitted.\n\n## Infrastructure and Deployment\nSummarize infrastructure-as-code, runtime platforms, and configuration or secret handling that affects security posture.\n\n## API Entry Points\nList externally reachable interfaces (HTTP/gRPC endpoints, message queues, CLIs, SDK methods) and how they handle security.\n\n### Server APIs\nProvide a markdown table with the exact columns:\n- endpoint path\n- authN method\n- authZ type\n- request parameters\n- example request (params, body, or method)\n- code location\n- parsing/validation logic\nIf the project exposes no server APIs, write `- None identified.` instead of a table.\n\n### Client APIs (optional)\nInclude a markdown table when the project ships an SDK, CLI, or other callable client surface. Columns:\n- api name (module.func or Class.method)\n- module/package\n- summary\n- parameters (omit if noisy)\n- returns (omit if noisy)\n- stability (public/official/internal)\n- code location\nIf there is no public client surface, state `- None.`\n";
+pub(crate) const SPEC_COMBINED_MARKDOWN_TEMPLATE: &str = r#"# Project Specification
+Provide a 2–3 sentence executive overview summarizing the system's purpose, primary users, and the highest-value assets or flows that matter for security.
+
+## Relevant Source Files
+List bullet points for the key files and directories covered by the drafts. Use inline code formatting for paths (for example, `src/service.rs`) and briefly note what each covers. Ensure every draft's location label appears at least once.
+
+## Architecture Components and Flow
+Provide a concise overview of how control and data move through the system, highlighting major services, external dependencies, and trust boundaries.
+Include exactly one overarching mermaid diagram here that captures the end-to-end flow (no per-component or sequence diagrams in this section).
+Move any detailed or per-component diagrams to the relevant component subsections below.
+If the specification contains additional mermaid diagrams, add a `title Component request flow` line (with a descriptive label) inside each diagram so the rendered report labels them distinctly.
+End with a `Sources:` line enumerating the files or modules that support this description.
+
+## Core Components
+Create `### <Component name>` subsections for the 4–8 major components, using sensible parent folder or service names (for example, `service-a/`, `packages/foo`, or `cometset-gateway/cometset_gateway`). Avoid file- or module-level subsections and do not title components after specific file paths. Do not create separate subsections for generic concepts like "Data Models" or individual routers/controllers; fold such details into the relevant component's bullets if truly necessary.
+Within each subsection, provide bullet points covering:
+- Role or responsibility
+- Key dependencies and integrations
+- Security-relevant behavior or controls
+Place any detailed flows or sequence diagrams for that component here (not in the Architecture section) when they clarify behavior.
+End every subsection with a line that starts with `Sources:` referencing the supporting directories (prefer directories over individual file paths).
+
+## External Interfaces
+Detail HTTP/gRPC endpoints, CLI commands, message queues, or other integration points. Use markdown tables when listing multiple endpoints and note required authentication/authorization and input validation.
+Include a `Sources:` line referencing the defining modules.
+
+## Data Classification
+Summarize sensitive data types, storage locations, retention policies, and encryption/transport guarantees. Prefer markdown tables that consolidate the drafts' entries when possible.
+Include a `Sources:` line showing where each data entry was documented.
+
+## Security Controls
+Organize subsections as `### Authentication`, `### Authorization`, `### Secrets`, and `### Auditing & Observability` when applicable. For each, explain mechanisms, critical libraries, enforcement points, and failure handling.
+Each subsection must end with a `Sources:` line citing the relevant files.
+
+## Operational Considerations
+Discuss deployment topology, runtime dependencies, background jobs, scaling, resiliency patterns, and monitoring or alerting hooks. Call out infrastructure-as-code or runtime configuration that affects security posture.
+Include a `Sources:` line referencing infrastructure or operational files.
+
+"#;
+
+// Threat model prompts
+pub(crate) const THREAT_MODEL_SYSTEM_PROMPT: &str = "You are a senior application security engineer preparing a threat model. Use the provided architecture specification and repository summary to enumerate realistic threats, prioritised by risk.";
+pub(crate) const THREAT_MODEL_PROMPT_TEMPLATE: &str = "# Repository Summary\n{repository_summary}\n\n# Architecture Specification\n{combined_spec}\n\n# In-Scope Locations\n{locations}\n\n# Task\nConstruct a concise threat model for the system. Focus on meaningful attacker goals and concrete impacts.\n\n## Output Requirements\n- Start with a short paragraph summarising the most important threat themes and high-risk areas.\n- Follow with a markdown table named `Threat Model` with columns: `Threat ID`, `Threat source`, `Prerequisites`, `Threat action`, `Threat impact`, `Impacted assets`, `Priority`, `Recommended mitigations`.\n- Use integer IDs starting at 1. Priority must be one of high, medium, low.\n- Keep prerequisite and mitigation text succinct (single sentence each).\n- Do not include any other sections or commentary outside the summary paragraph and table.\n";
+
+// Bug analysis prompts
+pub(crate) const BUGS_SYSTEM_PROMPT: &str = "You are an application security engineer reviewing a codebase.\nYou read the provided project context and code excerpts to identify concrete, exploitable security vulnerabilities.\nFor each vulnerability you find, produce a thorough, actionable write-up that a security team could ship directly to engineers.\n\nStrict requirements:\n- Only report real vulnerabilities with a plausible attacker-controlled input and a meaningful impact.\n- Quote exact file paths and GitHub-style line fragments, e.g. `src/server/auth.ts#L42-L67`.\n- Provide dataflow analysis (source, propagation, sink) where relevant.\n- Include a severity rating (high, medium, low, ignore) plus impact and likelihood reasoning.\n- Include a taxonomy line exactly as `TAXONOMY: {...}` containing JSON with keys vuln_class, cwe_ids[], owasp_categories[], vuln_tag.\n- If you cannot find a security-relevant issue, respond with exactly `no bugs found`.\n- Do not invent commits or authors if unavailable; leave fields blank instead.\n- Keep the response in markdown.";
+
+// The body of the bug analysis user prompt that follows the repository summary.
+pub(crate) const BUGS_USER_CODE_AND_TASK: &str = r#"
+# Code excerpts
+{code_context}
+
+# Task
+Evaluate the project for concrete, exploitable security vulnerabilities. Prefer precise, production-relevant issues to theoretical concerns.
+
+Follow these rules:
+- Read this file in full and review the provided context to understand intended behavior before judging safety.
+- Start locally: prefer `READ` to open the current file and its immediate neighbors (imports, same directory/module, referenced configs) before using `GREP_FILES`. Use `GREP_FILES` only when you need to locate unknown files across the repository.
+- When you reference a function, method, or class, look up its definition and usages across files: search by the identifier, then open the definition and a few call sites to verify behavior end-to-end.
+- Use the search tools below to inspect additional in-scope files when tracing data flows or confirming a hypothesis; cite the relevant variables, functions, and any validation or sanitization steps you discover.
+- Trace attacker-controlled inputs through the call graph to the ultimate sink. Highlight any sanitization or missing validation along the way.
+- Ignore unit tests, example scripts, or tooling unless they ship to production in this repo.
+- Only report real vulnerabilities that an attacker can trigger with meaningful impact. If none are found, respond with exactly `no bugs found` (no additional text).
+- Quote code snippets and locations using GitHub-style ranges (e.g. `src/service.rs#L10-L24`). Include git blame details when you have them: `<short-sha> <author> <YYYY-MM-DD> L<start>-L<end>`.
+- Keep all output in markdown and avoid generic disclaimers.
+- If you need more repository context, request it explicitly while staying within the provided scope:
+  - Prefer `READ: <relative path>` to inspect specific files (start with the current file and immediate neighbors).
+  - Use `SEARCH: literal:<identifier>` or `SEARCH: regex:<pattern>` to locate definitions and call sites across files; then `READ` the most relevant results to confirm the dataflow.
+  - Use `GREP_FILES: {"pattern":"needle","include":"*.rs","path":"subdir","limit":200}` to discover candidate locations across the repository; prefer meaningful identifiers over generic terms.
+
+# Output format
+For each vulnerability, emit a markdown block:
+
+### <short title>
+- **File & Lines:** `<relative path>#Lstart-Lend`
+- **Severity:** <high|medium|low|ignore>
+- **Impact:** <concise impact analysis>
+- **Likelihood:** <likelihood analysis>
+- **Description:** Detailed narrative with annotated code references explaining the bug.
+- **Snippet:** Fenced code block (specify language) showing only the relevant lines with inline comments or numbered markers that you reference in the description.
+- **Dataflow:** Describe sources, propagation, sanitization, and sinks using relative paths and `L<start>-L<end>` ranges.
+- **PoC:** Concrete steps or payload to reproduce (or `n/a` if infeasible).
+- **Recommendation:** Actionable remediation guidance.
+- **Verification Type:** JSON array subset of ["network_api", "crash_poc", "web_browser"].
+- TAXONOMY: {{"vuln_class": "...", "cwe_ids": [...], "owasp_categories": [...], "vuln_tag": "..."}}
+
+Ensure severity selections are justified by the described impact and likelihood."#;
+
+// Bug rerank prompts
+pub(crate) const BUG_RERANK_SYSTEM_PROMPT: &str = "You are a senior application security engineer triaging review findings. Reassess customer-facing risk using the supplied repository context and previously generated specs. Only respond with JSON Lines.";
+pub(crate) const BUG_RERANK_PROMPT_TEMPLATE: &str = r#"
+Repository summary (trimmed):
+{repository_summary}
+
+Spec excerpt (trimmed; pull in concrete details or note if unavailable):
+{spec_excerpt}
+
+Examples:
+- External unauthenticated remote code execution on a production API ⇒ risk_score 95, severity "High", reason "unauth RCE takeover".
+- Stored XSS on user dashboards that leaks session tokens ⇒ risk_score 72, severity "High", reason "persistent session theft".
+- Originally escalated CSRF on an internal admin tool behind SSO ⇒ risk_score 28, severity "Low", reason "internal-only with SSO".
+- Header injection in a deprecated endpoint with response sanitization ⇒ risk_score 18, severity "Informational", reason "sanitized legacy endpoint".
+- Static analysis high alert that only touches dead code ⇒ risk_score 10, severity "Informational", reason "dead code path".
+- High-severity SQL injection finding that uses fully parameterized queries ⇒ risk_score 20, severity "Low", reason "parameterized queries".
+- SSRF flagged as critical but the target requires internal metadata access tokens ⇒ risk_score 24, severity "Low", reason "internal metadata token".
+- Critical-looking command injection in an internal-only CLI guarded by SSO and audited logging ⇒ risk_score 22, severity "Low", reason "internal CLI".
+- Reported secret leak found in sample dev config with rotate-on-startup hook ⇒ risk_score 12, severity "Informational", reason "sample config only".
+
+# Available tools
+- READ: respond with `READ: <relative path>#Lstart-Lend` (range optional) to inspect specific source code.
+- SEARCH: respond with `SEARCH: literal:<term>` or `SEARCH: regex:<pattern>` to run ripgrep over the repository root (returns colored matches with line numbers).
+- GREP_FILES: respond with `GREP_FILES: {"pattern":"needle","include":"*.rs","path":"subdir","limit":200}` to list files whose contents match, ordered by modification time.
+- Issue at most one tool command per round and wait for the tool output before continuing. Reuse earlier tool outputs when possible.
+
+Instructions:
+- Output severity **only** from ["High","Medium","Low","Informational"]. Map "critical"/"p0" to "High".
+- Produce `risk_score` between 0-100 (higher means greater customer impact) and use the full range for comparability.
+- Review the repository summary, spec excerpt, blame metadata, and file locations before requesting anything new; reuse existing specs or context attachments when possible.
+- If you still lack certainty, request concrete follow-up (e.g., repo_search, read_file, git blame) in the reason and cite the spec section you need.
+- Reference concrete evidence (spec section, tool name, log line) in the reason when you confirm mitigations or reclassify a finding.
+- Prefer reusing existing tool outputs and cached specs before launching new expensive calls; only request fresh tooling when the supplied artifacts truly lack the needed context.
+- Down-rank issues when mitigations or limited blast radius materially reduce customer risk, even if the initial triage labeled them "High".
+- Upgrade issues when exploitability or exposure was understated, or when multiple components amplify the blast radius.
+- Respond with one JSON object per finding, **in the same order**, formatted exactly as:
+  {{"id": <number>, "risk_score": <0-100>, "severity": "<High|Medium|Low|Informational>", "reason": "<≤12 words>"}}
+
+Findings:
+{findings}
+"#;
+
+// File triage prompts
+pub(crate) const FILE_TRIAGE_SYSTEM_PROMPT: &str = "You are an application security engineer triaging source files to decide which ones warrant deep security review.\nFocus on entry points, authentication and authorization, network or process interactions, secrets handling, and other security-sensitive functionality.\nWhen uncertain, err on the side of including a file for further analysis.";
+pub(crate) const FILE_TRIAGE_PROMPT_TEMPLATE: &str = "You will receive JSON objects describing candidate files from a repository. For each object, output a single JSON line with the same `id`, a boolean `include`, and a short `reason`.\n- Use include=true for files that likely influence production behaviour, handle user input, touch the network/filesystem, perform authentication/authorization, execute commands, or otherwise impact security.\n- Use include=false for files that are clearly documentation, tests, generated artefacts, or otherwise irrelevant to security review.\n\nReply with one JSON object per line in this exact form:\n{\"id\": <number>, \"include\": true|false, \"reason\": \"...\"}\n\nFiles:\n{files}";
--- a/codex-rs/tui/src/security_report_assets/highlight.min.js
+++ b/codex-rs/tui/src/security_report_assets/highlight.min.js
--- a/codex-rs/tui/src/security_report_assets/marked.min.js
+++ b/codex-rs/tui/src/security_report_assets/marked.min.js
--- a/codex-rs/tui/src/security_report_assets/mermaid.min.js
+++ b/codex-rs/tui/src/security_report_assets/mermaid.min.js
--- a/codex-rs/tui/src/security_report_assets/script.js
+++ b/codex-rs/tui/src/security_report_assets/script.js
--- a/codex-rs/tui/src/security_report_assets/styles.css
+++ b/codex-rs/tui/src/security_report_assets/styles.css
@@ -0,0 +1,439 @@
+:root {
+  --bg: #ffffff;
+  --bg-elev: #fafafa;
+  --bg-muted: #f5f5f7;
+  --text: #1f2937;
+  --text-muted: #4b5563;
+  --border: #e5e7eb;
+  --accent: #2563eb;
+  --accent-hover: #1d4ed8;
+  --shadow: 0 4px 10px rgba(0, 0, 0, 0.06);
+  --inline-code-bg: rgba(2, 6, 23, 0.06);
+  --inline-code-color: #d73a49; /* GitHub-like reddish for inline code (light) */
+  /* Scrollbar */
+  --scroll-thumb: rgba(100, 116, 139, 0.45);
+  --scroll-thumb-hover: rgba(100, 116, 139, 0.7);
+  --scroll-track: transparent;
+  /* Severity palette (light) */
+  --sev-high: #dc2626;    /* red-600 */
+  --sev-high-bg: #fee2e2; /* red-100 */
+  --sev-med: #eab308;     /* yellow-500 */
+  --sev-med-bg: #fef9c3;  /* yellow-100 */
+  --sev-low: #059669;     /* emerald-600 */
+  --sev-low-bg: #d1fae5;  /* emerald-100 */
+  --sev-ign: #64748b;     /* slate-500 */
+  --sev-ign-bg: #e5e7eb;  /* gray-200 */
+}
+
+[data-theme="dark"] {
+  --bg: #0b0f14;
+  --bg-elev: #0f141b;
+  --bg-muted: #121923;
+  --text: #e5e7eb;
+  --text-muted: #cbd5e1;
+  --border: #202a36;
+  --accent: #3b82f6;
+  --accent-hover: #60a5fa;
+  --shadow: 0 4px 10px rgba(0, 0, 0, 0.35);
+  --inline-code-bg: rgba(148, 163, 184, 0.18);
+  --inline-code-color: #ffa657; /* GitHub dark theme orange for inline code */
+  /* Scrollbar */
+  --scroll-thumb: rgba(148, 163, 184, 0.4);
+  --scroll-thumb-hover: rgba(148, 163, 184, 0.65);
+  --scroll-track: transparent;
+  /* Severity palette (dark) */
+  --sev-high: #f87171;    /* red-400 */
+  --sev-high-bg: rgba(248, 113, 113, 0.12);
+  --sev-med: #facc15;     /* yellow-400 */
+  --sev-med-bg: rgba(250, 204, 21, 0.12);
+  --sev-low: #34d399;     /* emerald-400 */
+  --sev-low-bg: rgba(52, 211, 153, 0.12);
+  --sev-ign: #94a3b8;     /* slate-400 */
+  --sev-ign-bg: rgba(148, 163, 184, 0.12);
+}
+
+* { box-sizing: border-box; }
+html, body { height: 100%; }
+body {
+  margin: 0;
+  color: var(--text);
+  background: var(--bg);
+  font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial, Noto Sans, "Apple Color Emoji", "Segoe UI Emoji";
+  line-height: 1.6;
+}
+
+/* Minimal, pretty scrollbars (Firefox + WebKit) */
+html {
+  scrollbar-width: thin; /* Firefox */
+  scrollbar-color: var(--scroll-thumb) var(--scroll-track);
+}
+.toc-inner, pre, .chat-panel, .job-progress .jp-logs, .content, body {
+  scrollbar-width: thin; /* Firefox for scrollable containers */
+  scrollbar-color: var(--scroll-thumb) var(--scroll-track);
+}
+*::-webkit-scrollbar {
+  width: 10px;
+  height: 10px;
+}
+*::-webkit-scrollbar-track {
+  background: var(--scroll-track);
+}
+*::-webkit-scrollbar-thumb {
+  background-color: var(--scroll-thumb);
+  border-radius: 999px;
+  border: 2px solid transparent;
+  background-clip: padding-box;
+}
+*::-webkit-scrollbar-thumb:hover {
+  background-color: var(--scroll-thumb-hover);
+}
+
+a { color: var(--accent); text-decoration: none; }
+a:hover { text-decoration: underline; }
+
+/* Global nav links */
+.nav-links { display:flex; gap:10px; }
+.nav-links a { padding:6px 10px; border-radius:8px; color:var(--text); border:1px solid var(--border); background:var(--bg); }
+.nav-links a.active, .nav-links a:hover { background: var(--bg-elev); text-decoration: none; }
+
+.topbar {
+  position: sticky;
+  top: 0;
+  z-index: 30;
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  padding: 10px 16px;
+  border-bottom: 1px solid var(--border);
+  background: var(--bg-elev);
+  backdrop-filter: saturate(180%) blur(8px);
+}
+.brand { display: flex; align-items: center; gap: 10px; }
+.site-path { font-weight: 600; color: var(--text); }
+
+.nav-toggle {
+  display: inline-flex;
+  border: 1px solid var(--border);
+  background: transparent;
+  border-radius: 8px;
+  padding: 4px 6px;
+  align-items: center;
+  justify-content: center;
+}
+
+.top-actions { display: flex; align-items: center; gap: 6px; }
+
+.search-input {
+  border: 1px solid var(--border);
+  background: var(--bg);
+  color: var(--text);
+  padding: 8px 10px;
+  border-radius: 10px;
+  outline: none;
+  width: min(260px, 40vw);
+}
+.search-input:focus { box-shadow: var(--shadow); }
+
+/* Sidebar placement of the search */
+.toc-inner .toc-search { padding: 6px 2px 8px; }
+.toc-inner .search-input {
+  width: 100%;
+}
+
+.btn {
+  border: 1px solid var(--border);
+  background: var(--bg);
+  color: var(--text);
+  padding: 6px 10px;
+  border-radius: 8px;
+  cursor: pointer;
+  display: inline-flex;
+  align-items: center;
+  gap: 6px;
+  line-height: 1.2;
+  font: inherit;
+  margin: 0;
+}
+.btn:hover { box-shadow: var(--shadow); }
+.btn.primary {
+  background: var(--accent);
+  color: white;
+  border-color: transparent;
+}
+.btn.primary:hover { background: var(--accent-hover); }
+.icon-btn { border: none; background: transparent; color: var(--text); cursor: pointer; border-radius: 8px; padding: 4px; }
+.icon-btn:hover { background: var(--bg-muted); }
+.btn.small { padding: 3px 8px; font-size: 12px; border-radius: 8px; }
+
+.layout {
+  display: grid;
+  grid-template-columns: 280px minmax(0, 1fr);
+  gap: 0;
+  width: 100%;
+  max-width: 100%;
+  margin: 0;
+}
+
+/* Collapsible left sidebar: keep a thin rail with the toggle */
+body.sidebar-collapsed .layout { grid-template-columns: 44px minmax(0, 1fr); }
+body.sidebar-collapsed .sidebar { display: block; }
+body.sidebar-collapsed .toc-inner {
+  padding: 8px 6px;
+  display: flex;
+  flex-direction: column;
+  align-items: center;
+  gap: 6px;
+}
+body.sidebar-collapsed .toc-inner .nav-title { width: 100%; display: flex; align-items: center; justify-content: center !important; }
+body.sidebar-collapsed .toc-inner .nav-title span { display: none; }
+body.sidebar-collapsed .toc-inner .toc-search,
+body.sidebar-collapsed .toc-inner #jobProgressHost,
+body.sidebar-collapsed .toc-inner #tocList { display: none; }
+body.sidebar-collapsed .nav-toggle { padding: 6px; }
+
+.sidebar {
+  min-height: calc(100vh - 56px);
+  position: relative;
+}
+.sidebar.right { border-right: 1px solid var(--border); border-left: none; grid-column: 1; grid-row: 1; }
+
+.nav-inner, .toc-inner {
+  position: sticky;
+  top: 56px;
+  padding: 10px 8px;
+  height: calc(100vh - 56px);
+  overflow: auto;
+  background: var(--bg-muted);
+}
+
+.nav-title {
+  font-size: 11px;
+  text-transform: uppercase;
+  letter-spacing: 0.08em;
+  color: var(--text-muted);
+  margin-bottom: 6px;
+}
+
+.content {
+  padding: 24px 40px;
+  background: var(--bg);
+  grid-column: 2;
+  grid-row: 1;
+}
+.content.editing { outline: 2px dashed var(--border); outline-offset: -2px; background: var(--bg-elev); }
+.content > *:first-child { margin-top: 0; }
+
+.content h1 { font-size: 2.2rem; margin: 0.6em 0 0.4em; line-height: 1.2; }
+.content h2 { font-size: 1.6rem; margin: 1.6em 0 0.5em; border-top: 1px solid var(--border); padding-top: 1em; }
+.content h3 { font-size: 1.25rem; margin: 1.2em 0 0.4em; }
+.content h4 { font-size: 1.05rem; margin: 1em 0 0.3em; }
+.content h5 { font-size: 0.95rem; margin: 0.9em 0 0.25em; }
+.content h6 { font-size: 0.9rem; margin: 0.8em 0 0.2em; }
+.content p { color: var(--text); margin: 0.7em 0; }
+.content :not(pre) > code { background: transparent; color: inherit; padding: 0.15em 0.35em; border-radius: 6px; font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace; }
+/* Highlight only explicit inline code marks */
+.content code.inline-code { background: var(--inline-code-bg); }
+/* Only color inline code in typical body contexts (not headings or bold text) */
+/* Inline code color applied only when JS marks it explicitly */
+.content code.inline-code { color: var(--inline-code-color); }
+.content code { font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace; }
+.content pre { background: var(--bg-muted); border: 1px solid var(--border); border-radius: 10px; padding: 14px; overflow: auto; position: relative; }
+.content pre code { background: transparent; padding: 0; }
+.content .mermaid { display: block; margin: 1em auto; background: var(--bg-elev); border: 1px solid var(--border); border-radius: 10px; padding: 12px; }
+.content ul, .content ol { padding-left: 1.2em; }
+.content ul.task-list { list-style: none; padding-left: 0; }
+.content .task-list-item { display: flex; align-items: flex-start; gap: 8px; padding: 4px 0; }
+.content .task-list-item input[type="checkbox"] { margin-top: 3px; }
+.content del { opacity: 0.7; }
+.copy-btn {
+  position: absolute; top: 8px; right: 8px;
+  font-size: 12px; border: 1px solid var(--border); background: var(--bg); color: var(--text);
+  border-radius: 8px; padding: 4px 8px; cursor: pointer;
+}
+.copy-btn:hover { box-shadow: var(--shadow); }
+
+/* Bug ticket widget */
+.ticket-box {
+  margin: 12px 0 8px;
+  padding: 10px 12px;
+  border: 1px solid var(--border);
+  background: var(--bg-elev);
+  border-radius: 10px;
+  display: flex;
+  flex-wrap: wrap;
+  align-items: center;
+  gap: 10px 12px;
+}
+.ticket-box label { font-size: 12px; color: var(--text-muted); display: flex; align-items: center; gap: 6px; }
+.ticket-box input[type="text"], .ticket-box select {
+  border: 1px solid var(--border);
+  background: var(--bg);
+  color: var(--text);
+  border-radius: 8px;
+  padding: 6px 8px;
+}
+.ticket-box .ticket-btn { border: 1px solid transparent; background: var(--accent); color: #fff; border-radius: 8px; padding: 8px 10px; cursor: pointer; }
+.ticket-box .ticket-btn:hover { background: var(--accent-hover); }
+.ticket-box .ticket-status { font-size: 12px; color: var(--text-muted); }
+
+/* (Reverted) per-bug floating FAB styles removed */
+
+/* Severity badges */
+.badge {
+  display: inline-block;
+  font-size: 12px;
+  line-height: 1;
+  padding: 4px 8px;
+  border-radius: 999px;
+  font-weight: 600;
+  border: 1px solid transparent;
+}
+.sev-high { color: var(--sev-high); background: var(--sev-high-bg); border-color: transparent; }
+.sev-med  { color: var(--sev-med);  background: var(--sev-med-bg);  border-color: transparent; }
+.sev-low  { color: var(--sev-low);  background: var(--sev-low-bg);  border-color: transparent; }
+.sev-ign  { color: var(--sev-ign);  background: var(--sev-ign-bg);  border-color: transparent; }
+
+#navList, #tocList { list-style: none; padding: 0; margin: 0; }
+#tocList ul { list-style: none; padding-left: 0; margin: 0; }
+#navList a, #tocList a {
+  display: block; padding: 4px 6px; border-radius: 6px; color: var(--text);
+}
+#navList a:hover, #tocList a:hover { background: var(--bg-elev); text-decoration: none; }
+.nav-item { font-size: 14px; }
+.nav-item.depth-2 { padding-left: 12px; }
+.nav-item.depth-3 { padding-left: 24px; }
+.nav-item.depth-4 { padding-left: 36px; }
+.nav-item.depth-5 { padding-left: 48px; }
+.nav-item.depth-6 { padding-left: 60px; }
+.nav-item.active > a, .toc-item.active > a { font-weight: 700; color: var(--accent); }
+.nav-item.active > a { border-left: 3px solid var(--accent); margin-left: -3px; }
+
+/* Right TOC enhanced selection visuals */
+.toc-item > a { position: relative; transition: background 160ms ease, color 160ms ease, padding-left 160ms ease; font-size: 13px; line-height: 1.3; }
+.toc-item.depth-1 > a { font-weight: 700; }
+.toc-item.depth-2 { padding-left: 14px; }
+.toc-item.depth-3 { padding-left: 28px; }
+.toc-item.depth-4 { padding-left: 42px; }
+.toc-item.depth-5 { padding-left: 56px; }
+.toc-item.depth-6 { padding-left: 70px; }
+.toc-item.active > a {
+  color: var(--accent);
+  background: color-mix(in srgb, var(--accent) 12%, transparent);
+  padding-left: 10px;
+}
+.toc-item.active > a::before {
+  content: '';
+  position: absolute;
+  left: 0;
+  top: 4px;
+  bottom: 4px;
+  width: 3px;
+  border-radius: 2px;
+  background: var(--accent);
+}
+.toc-item > a:focus-visible {
+  outline: 2px solid color-mix(in srgb, var(--accent) 50%, transparent);
+  outline-offset: 2px;
+}
+
+.content table { width: 100%; border-collapse: collapse; margin: 1em 0; }
+.content th, .content td { border: 1px solid var(--border); padding: 8px 10px; }
+.content thead th { background: var(--bg-muted); font-weight: 700; }
+.content tbody tr:nth-child(even) { background: var(--bg-elev); }
+
+.content blockquote { border-left: 4px solid var(--border); margin: 1em 0; padding: 0.6em 1em; background: var(--bg-elev); border-radius: 6px; }
+.callout { border: 1px solid var(--border); border-left: 4px solid var(--accent); background: var(--bg-elev); border-radius: 10px; overflow: hidden; }
+.callout .callout-header { display: flex; align-items: center; gap: 8px; padding: 8px 12px; cursor: pointer; user-select: none; font-weight: 600; }
+.callout .callout-body { padding: 0 12px 12px; display: none; }
+.callout.open .callout-body { display: block; animation: fadeIn 160ms ease-in; }
+@keyframes fadeIn { from { opacity: 0; } to { opacity: 1; } }
+
+.footer { border-top: 1px solid var(--border); color: var(--text-muted); font-size: 14px; padding: 16px; background: var(--bg-elev); }
+
+.drop-overlay { position: fixed; inset: 0; background: rgba(37, 99, 235, 0.08); border: 2px dashed var(--accent); display: none; align-items: center; justify-content: center; z-index: 50; }
+.drop-overlay .drop-message { background: var(--bg); color: var(--text); padding: 16px 22px; border-radius: 12px; border: 1px dashed var(--accent); box-shadow: var(--shadow); }
+.drop-overlay.show { display: flex; }
+
+@media (max-width: 1100px) {
+  .layout { grid-template-columns: 220px minmax(0, 1fr); }
+  .sidebar.right { display: block; }
+  .content { grid-column: 2; }
+}
+
+@media (max-width: 800px) {
+  .layout { grid-template-columns: 200px minmax(0, 1fr); }
+  .sidebar { display: block; }
+  .content { padding: 14px; grid-column: 2; }
+}
+
+/* Fullscreen viewer mode */
+body.fullviewer .topbar { display: none; }
+body.fullviewer .layout { grid-template-columns: minmax(0, 1fr) !important; }
+body.fullviewer .sidebar { display: none !important; }
+body.fullviewer .content { grid-column: 1; padding: 20px 28px; }
+
+/* Floating ChatGPT widget */
+.chat-widget { position: fixed; left: 50%; transform: translateX(-50%); bottom: 20px; z-index: 70; }
+.chat-toggle {
+  border: 1px solid transparent;
+  background: linear-gradient(135deg, var(--accent), color-mix(in srgb, var(--accent) 75%, #fff));
+  color: #fff; border-radius: 999px; padding: 12px 18px; cursor: pointer; box-shadow: var(--shadow);
+  font-weight: 700; letter-spacing: .02em;
+  transition: transform .12s ease, box-shadow .12s ease, filter .2s ease;
+}
+.chat-toggle:hover { transform: translateY(-1px); filter: brightness(1.04); }
+.chat-panel {
+  position: absolute; left: 50%; transform: translate(-50%, -12px);
+  bottom: 52px; width: min(680px, 94vw);
+  background: color-mix(in srgb, var(--bg-elev) 85%, transparent);
+  color: var(--text);
+  border: 1px solid var(--border); border-radius: 14px; box-shadow: var(--shadow);
+  padding: 12px; display: grid; gap: 10px; grid-template-rows: auto auto 1fr auto auto;
+  backdrop-filter: blur(6px) saturate(120%);
+}
+.chat-panel[hidden] { display: none !important; }
+.chat-panel:not([hidden]) { animation: popIn 140ms ease-out; }
+@keyframes popIn { from { opacity: 0; transform: translate(-50%, 0) scale(.98); } to { opacity: 1; transform: translate(-50%, -12px) scale(1); } }
+.chat-header { display: flex; align-items: center; justify-content: space-between; padding: 4px 2px; }
+.chat-title { font-weight: 800; font-size: 14px; color: var(--text); letter-spacing: 0.02em; }
+.chat-close { border: none; background: transparent; color: var(--text); font-size: 18px; cursor: pointer; }
+.chat-controls { display: flex; gap: 10px; align-items: center; }
+.chat-controls label { display: flex; gap: 8px; align-items: center; font-size: 12px; color: var(--text-muted); }
+#chatModel { border: 1px solid var(--border); background: var(--bg); color: var(--text); border-radius: 10px; padding: 6px 10px; }
+#chatInput { width: 100%; min-height: 90px; border: 1px solid var(--border); background: var(--bg); color: var(--text); border-radius: 12px; padding: 10px 12px; resize: vertical; font-family: inherit; box-shadow: inset 0 1px 0 rgba(0,0,0,.02); }
+.chat-actions { display: flex; align-items: center; gap: 8px; }
+.chat-actions .spacer { flex: 1 1 auto; }
+.include-context { font-size: 12px; color: var(--text-muted); }
+.chat-status { font-size: 12px; color: var(--text-muted); min-height: 1em; }
+
+/* Sidebar job progress card */
+.job-progress { border: 1px solid var(--border); background: var(--bg-elev); border-radius: 10px; padding: 10px; margin: 6px 0 10px; }
+.job-progress .jp-title { display:flex; align-items:center; justify-content:space-between; gap:8px; font-weight:700; font-size:13px; }
+.job-progress .jp-status { font-size: 12px; color: var(--text-muted); margin-top: 4px; }
+.job-progress .jp-bar { height: 6px; background: var(--bg); border:1px solid var(--border); border-radius: 999px; overflow:hidden; margin-top: 8px; }
+.job-progress .jp-bar > span { display:block; height:100%; width:0%; background: var(--accent); transition: width .2s ease; }
+.job-progress .jp-logs { font-size: 12px; max-height: 120px; overflow:auto; margin-top:8px; }
+
+/* Modal dialog */
+.modal-overlay { position: fixed; inset: 0; background: rgba(0,0,0,0.35); backdrop-filter: blur(2px); display: none; align-items: center; justify-content: center; z-index: 80; }
+.modal-overlay.show { display: flex; }
+.modal { width: min(700px, 94vw); background: var(--bg-elev); color: var(--text); border: 1px solid var(--border); border-radius: 14px; box-shadow: var(--shadow); padding: 12px; }
+.modal-header { display:flex; align-items:center; justify-content:space-between; padding: 6px 4px 10px; }
+.modal-title { font-weight: 800; letter-spacing: .02em; }
+.modal-body { display:grid; gap: 10px; }
+.modal-row { display:flex; gap: 10px; align-items:center; }
+.modal-row label { min-width: 160px; font-size: 12px; color: var(--text-muted); }
+.modal-row input, .modal-row select { flex: 1; border:1px solid var(--border); background:var(--bg); color:var(--text); border-radius:10px; padding:8px 10px; }
+.modal-actions { display:flex; gap:8px; justify-content:flex-end; margin-top: 12px; }
+
+/* highlight.js GitHub theme */
+pre code.hljs{display:block;overflow-x:auto;padding:1em}code.hljs{padding:3px 5px}/*!
+  Theme: GitHub
+  Description: Light theme as seen on github.com
+  Author: github.com
+  Maintainer: @Hirse
+  Updated: 2021-05-15
+
+  Outdated base version: https://github.com/primer/github-syntax-light
+  Current colors taken from GitHub's CSS
+*/.hljs{color:#24292e;background:#fff}.hljs-doctag,.hljs-keyword,.hljs-meta .hljs-keyword,.hljs-template-tag,.hljs-template-variable,.hljs-type,.hljs-variable.language_{color:#d73a49}.hljs-title,.hljs-title.class_,.hljs-title.class_.inherited__,.hljs-title.function_{color:#6f42c1}.hljs-attr,.hljs-attribute,.hljs-literal,.hljs-meta,.hljs-number,.hljs-operator,.hljs-selector-attr,.hljs-selector-class,.hljs-selector-id,.hljs-variable{color:#005cc5}.hljs-meta .hljs-string,.hljs-regexp,.hljs-string{color:#032f62}.hljs-built_in,.hljs-symbol{color:#e36209}.hljs-code,.hljs-comment,.hljs-formula{color:#6a737d}.hljs-name,.hljs-quote,.hljs-selector-pseudo,.hljs-selector-tag{color:#22863a}.hljs-subst{color:#24292e}.hljs-section{color:#005cc5;font-weight:700}.hljs-bullet{color:#735c0f}.hljs-emphasis{color:#24292e;font-style:italic}.hljs-strong{color:#24292e;font-weight:700}.hljs-addition{color:#22863a;background-color:#f0fff4}.hljs-deletion{color:#b31d28;background-color:#ffeef0}
--- a/codex-rs/tui/src/security_report_viewer.rs
+++ b/codex-rs/tui/src/security_report_viewer.rs
@@ -0,0 +1,123 @@
+use base64::Engine;
+use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
+
+const REPORT_STYLES: &str = include_str!("security_report_assets/styles.css");
+const REPORT_SCRIPT: &str = include_str!("security_report_assets/script.js");
+const MARKED_JS: &str = include_str!("security_report_assets/marked.min.js");
+const HIGHLIGHT_JS: &str = include_str!("security_report_assets/highlight.min.js");
+const MERMAID_JS: &str = include_str!("security_report_assets/mermaid.min.js");
+
+fn escape_html(input: &str) -> String {
+    let mut out = String::with_capacity(input.len());
+    for ch in input.chars() {
+        match ch {
+            '&' => out.push_str("&amp;"),
+            '<' => out.push_str("&lt;"),
+            '>' => out.push_str("&gt;"),
+            '"' => out.push_str("&quot;"),
+            '\'' => out.push_str("&#39;"),
+            _ => out.push(ch),
+        }
+    }
+    out
+}
+
+pub(crate) fn build_report_html(title: &str, markdown: &str) -> String {
+    let escaped_title = escape_html(title);
+    let report_payload = BASE64_STANDARD.encode(markdown);
+    let styles = REPORT_STYLES;
+    let script = REPORT_SCRIPT;
+    format!(
+        r#"<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>{escaped_title}</title>
+    <style>{styles}</style>
+  </head>
+  <body>
+    <header class="topbar">
+      <div class="brand">
+        <div class="site-path" id="site-path">/ Report / {escaped_title}</div>
+      </div>
+      <div class="top-actions">
+        <button id="shareBtn" class="btn primary">Share</button>
+        <button id="editToggle" class="btn" aria-pressed="false" title="Toggle edit mode">Edit</button>
+        <label class="file-btn btn" for="fileInput">Open</label>
+        <input id="fileInput" type="file" accept=".md,.markdown,.txt" hidden />
+        <button id="themeToggle" class="icon-btn" title="Toggle dark mode" aria-label="Toggle dark mode">
+          <svg viewBox="0 0 24 24" width="20" height="20" aria-hidden="true">
+            <path id="themeIcon" fill="currentColor" d="M21.64 13a1 1 0 0 0-1.11-.27 8 8 0 0 1-10.26-10.26 1 1 0 0 0-1.38-1.26 10 10 0 1 0 13 13 1 1 0 0 0-.25-1.21Z"/>
+          </svg>
+        </button>
+      </div>
+    </header>
+
+    <div class="drop-overlay" id="dropOverlay" aria-hidden="true">
+      <div class="drop-message">
+        Drop a .md file to load
+      </div>
+    </div>
+
+    <main class="layout">
+      <article class="content" id="content">
+      </article>
+      <aside class="sidebar right" id="rightToc" aria-label="Table of contents">
+        <div class="toc-inner">
+          <div class="nav-title" style="display:flex;align-items:center;justify-content:space-between;gap:8px;">
+            <span>Outline</span>
+            <button id="navToggle" class="icon-btn nav-toggle" aria-pressed="false" aria-label="Collapse sidebar" title="Collapse sidebar">
+              <svg viewBox="0 0 24 24" width="18" height="18" aria-hidden="true">
+                <path id="navIcon" fill="currentColor" d="M9 6l6 6-6 6"/>
+              </svg>
+            </button>
+          </div>
+          <div class="toc-search">
+            <input id="sectionSearch" class="search-input" type="search" placeholder="Jump to section" aria-label="Jump to section" />
+          </div>
+          <div id="jobProgressHost"></div>
+          <nav id="tocList"></nav>
+        </div>
+      </aside>
+    </main>
+
+    <footer class="footer">
+      <div>Drag & drop a Markdown file anywhere, or use Open.</div>
+    </footer>
+
+    <script>
+      (function() {{
+        const base64 = "{report_payload}";
+        try {{
+          const binary = atob(base64);
+          if (typeof TextDecoder === "function") {{
+            const bytes = new Uint8Array(binary.length);
+            for (let i = 0; i < binary.length; i += 1) {{
+              bytes[i] = binary.charCodeAt(i);
+            }}
+            window.REPORT_MD = new TextDecoder("utf-8").decode(bytes);
+          }} else {{
+            const percentEncoded = Array.prototype.map
+              .call(binary, function (ch) {{
+                const code = ch.charCodeAt(0).toString(16).padStart(2, "0");
+                return "%" + code;
+              }})
+              .join("");
+            window.REPORT_MD = decodeURIComponent(percentEncoded);
+          }}
+        }} catch (err) {{
+          console.error("Failed to decode embedded report markdown", err);
+          window.REPORT_MD = "";
+        }}
+      }})();
+    </script>
+    <script>{MARKED_JS}</script>
+    <script>{HIGHLIGHT_JS}</script>
+    <script>{MERMAID_JS}</script>
+    <script>{script}</script>
+  </body>
+</html>
+"#
+    )
+}
--- a/codex-rs/tui/src/security_review.rs
+++ b/codex-rs/tui/src/security_review.rs
--- a/codex-rs/tui/src/slash_command.rs
+++ b/codex-rs/tui/src/slash_command.rs
@@ -15,6 +15,10 @@ pub enum SlashCommand {
    Model,
    Approvals,
    Review,
+    #[strum(serialize = "secreview")]
+    SecReview,
+    /// Validate high-risk findings from the last security review
+    Validate,
    New,
    Init,
    Compact,
@@ -28,6 +32,7 @@ pub enum SlashCommand {
    Exit,
    Feedback,
    Rollout,
+    #[cfg(debug_assertions)]
    TestApproval,
 }

@@ -40,7 +45,9 @@ impl SlashCommand {
            SlashCommand::Init => "create an AGENTS.md file with instructions for Codex",
            SlashCommand::Compact => "summarize conversation to prevent hitting the context limit",
            SlashCommand::Review => "review my current changes and find issues",
-            SlashCommand::Undo => "ask Codex to undo a turn",
+            SlashCommand::SecReview => "run an AppSec security review over the repo",
+            SlashCommand::Validate => "validate high-risk findings (web + api)",
+            SlashCommand::Undo => "restore the workspace to the last Codex snapshot",
            SlashCommand::Quit | SlashCommand::Exit => "exit Codex",
            SlashCommand::Diff => "show git diff (including untracked files)",
            SlashCommand::Mention => "mention a file",
@@ -50,6 +57,7 @@ impl SlashCommand {
            SlashCommand::Mcp => "list configured MCP tools",
            SlashCommand::Logout => "log out of Codex",
            SlashCommand::Rollout => "print the rollout file path",
+            #[cfg(debug_assertions)]
            SlashCommand::TestApproval => "test approval request",
        }
    }
@@ -70,6 +78,8 @@ impl SlashCommand {
            | SlashCommand::Model
            | SlashCommand::Approvals
            | SlashCommand::Review
+            | SlashCommand::SecReview
+            | SlashCommand::Validate
            | SlashCommand::Logout => false,
            SlashCommand::Diff
            | SlashCommand::Mention
@@ -77,8 +87,10 @@ impl SlashCommand {
            | SlashCommand::Mcp
            | SlashCommand::Feedback
            | SlashCommand::Quit
-            | SlashCommand::Exit => true,
-            SlashCommand::Rollout => true,
+            | SlashCommand::Exit
+            | SlashCommand::Rollout => true,
+
+            #[cfg(debug_assertions)]
            SlashCommand::TestApproval => true,
        }
    }
@@ -93,8 +105,20 @@ impl SlashCommand {

 /// Return all built-in commands in a Vec paired with their command string.
 pub fn built_in_slash_commands() -> Vec<(&'static str, SlashCommand)> {
+    let show_beta_features = beta_features_enabled();
+
    SlashCommand::iter()
-        .filter(|command| command.is_visible())
+        .filter(|cmd| {
+            if *cmd == SlashCommand::Undo {
+                show_beta_features
+            } else {
+                cmd.is_visible()
+            }
+        })
        .map(|c| (c.command(), c))
        .collect()
 }
+
+fn beta_features_enabled() -> bool {
+    std::env::var_os("BETA_FEATURE").is_some()
+}
--- a/codex-rs/tui/src/status/card.rs
+++ b/codex-rs/tui/src/status/card.rs
@@ -33,6 +33,7 @@ use super::rate_limits::format_status_limit_summary;
 use super::rate_limits::render_status_limit_progress_bar;
 use crate::wrapping::RtOptions;
 use crate::wrapping::word_wrap_lines;
+use codex_core::AuthManager;

 #[derive(Debug, Clone)]
 struct StatusContextWindowData {
@@ -65,6 +66,7 @@ struct StatusHistoryCell {

 pub(crate) fn new_status_output(
    config: &Config,
+    auth_manager: &AuthManager,
    total_usage: &TokenUsage,
    context_usage: Option<&TokenUsage>,
    session_id: &Option<ConversationId>,
@@ -74,6 +76,7 @@ pub(crate) fn new_status_output(
    let command = PlainHistoryCell::new(vec!["/status".magenta().into()]);
    let card = StatusHistoryCell::new(
        config,
+        auth_manager,
        total_usage,
        context_usage,
        session_id,
@@ -87,6 +90,7 @@ pub(crate) fn new_status_output(
 impl StatusHistoryCell {
    fn new(
        config: &Config,
+        auth_manager: &AuthManager,
        total_usage: &TokenUsage,
        context_usage: Option<&TokenUsage>,
        session_id: &Option<ConversationId>,
@@ -106,7 +110,7 @@ impl StatusHistoryCell {
            SandboxPolicy::WorkspaceWrite { .. } => "workspace-write".to_string(),
        };
        let agents_summary = compose_agents_summary(config);
-        let account = compose_account_display(config);
+        let account = compose_account_display(auth_manager);
        let session_id = session_id.as_ref().map(std::string::ToString::to_string);
        let context_window = config.model_context_window.and_then(|window| {
            context_usage.map(|usage| StatusContextWindowData {
--- a/codex-rs/tui/src/status/helpers.rs
+++ b/codex-rs/tui/src/status/helpers.rs
@@ -2,7 +2,8 @@ use crate::exec_command::relativize_to_home;
 use crate::text_formatting;
 use chrono::DateTime;
 use chrono::Local;
-use codex_core::auth::load_auth_dot_json;
+use codex_app_server_protocol::AuthMode;
+use codex_core::AuthManager;
 use codex_core::config::Config;
 use codex_core::project_doc::discover_project_doc_paths;
 use std::path::Path;
@@ -82,24 +83,17 @@ pub(crate) fn compose_agents_summary(config: &Config) -> String {
    }
 }

-pub(crate) fn compose_account_display(config: &Config) -> Option<StatusAccountDisplay> {
-    let auth =
-        load_auth_dot_json(&config.codex_home, config.cli_auth_credentials_store_mode).ok()??;
+pub(crate) fn compose_account_display(auth_manager: &AuthManager) -> Option<StatusAccountDisplay> {
+    let auth = auth_manager.auth()?;

-    if let Some(tokens) = auth.tokens.as_ref() {
-        let info = &tokens.id_token;
-        let email = info.email.clone();
-        let plan = info.get_chatgpt_plan_type().as_deref().map(title_case);
-        return Some(StatusAccountDisplay::ChatGpt { email, plan });
+    match auth.mode {
+        AuthMode::ChatGPT => {
+            let email = auth.get_account_email();
+            let plan = auth.raw_plan_type().map(|plan| title_case(plan.as_str()));
+            Some(StatusAccountDisplay::ChatGpt { email, plan })
+        }
+        AuthMode::ApiKey => Some(StatusAccountDisplay::ApiKey),
    }
-
-    if let Some(key) = auth.openai_api_key
-        && !key.is_empty()
-    {
-        return Some(StatusAccountDisplay::ApiKey);
-    }
-
-    None
 }

 pub(crate) fn format_tokens_compact(value: i64) -> String {
--- a/codex-rs/tui/src/status/tests.rs
+++ b/codex-rs/tui/src/status/tests.rs
@@ -4,6 +4,7 @@ use crate::history_cell::HistoryCell;
 use chrono::Duration as ChronoDuration;
 use chrono::TimeZone;
 use chrono::Utc;
+use codex_core::AuthManager;
 use codex_core::config::Config;
 use codex_core::config::ConfigOverrides;
 use codex_core::config::ConfigToml;
@@ -27,6 +28,14 @@ fn test_config(temp_home: &TempDir) -> Config {
    .expect("load config")
 }

+fn test_auth_manager(config: &Config) -> AuthManager {
+    AuthManager::new(
+        config.codex_home.clone(),
+        false,
+        config.cli_auth_credentials_store_mode,
+    )
+}
+
 fn render_lines(lines: &[Line<'static>]) -> Vec<String> {
    lines
        .iter()
@@ -85,6 +94,7 @@ fn status_snapshot_includes_reasoning_details() {

    config.cwd = PathBuf::from("/workspace/tests");

+    let auth_manager = test_auth_manager(&config);
    let usage = TokenUsage {
        input_tokens: 1_200,
        cached_input_tokens: 200,
@@ -113,6 +123,7 @@ fn status_snapshot_includes_reasoning_details() {

    let composite = new_status_output(
        &config,
+        &auth_manager,
        &usage,
        Some(&usage),
        &None,
@@ -137,6 +148,7 @@ fn status_snapshot_includes_monthly_limit() {
    config.model_provider_id = "openai".to_string();
    config.cwd = PathBuf::from("/workspace/tests");

+    let auth_manager = test_auth_manager(&config);
    let usage = TokenUsage {
        input_tokens: 800,
        cached_input_tokens: 0,
@@ -161,6 +173,7 @@ fn status_snapshot_includes_monthly_limit() {

    let composite = new_status_output(
        &config,
+        &auth_manager,
        &usage,
        Some(&usage),
        &None,
@@ -184,6 +197,7 @@ fn status_card_token_usage_excludes_cached_tokens() {
    config.model = "gpt-5-codex".to_string();
    config.cwd = PathBuf::from("/workspace/tests");

+    let auth_manager = test_auth_manager(&config);
    let usage = TokenUsage {
        input_tokens: 1_200,
        cached_input_tokens: 200,
@@ -197,7 +211,15 @@ fn status_card_token_usage_excludes_cached_tokens() {
        .single()
        .expect("timestamp");

-    let composite = new_status_output(&config, &usage, Some(&usage), &None, None, now);
+    let composite = new_status_output(
+        &config,
+        &auth_manager,
+        &usage,
+        Some(&usage),
+        &None,
+        None,
+        now,
+    );
    let rendered = render_lines(&composite.display_lines(120));

    assert!(
@@ -216,6 +238,7 @@ fn status_snapshot_truncates_in_narrow_terminal() {
    config.model_reasoning_summary = ReasoningSummary::Detailed;
    config.cwd = PathBuf::from("/workspace/tests");

+    let auth_manager = test_auth_manager(&config);
    let usage = TokenUsage {
        input_tokens: 1_200,
        cached_input_tokens: 200,
@@ -240,6 +263,7 @@ fn status_snapshot_truncates_in_narrow_terminal() {

    let composite = new_status_output(
        &config,
+        &auth_manager,
        &usage,
        Some(&usage),
        &None,
@@ -264,6 +288,7 @@ fn status_snapshot_shows_missing_limits_message() {
    config.model = "gpt-5-codex".to_string();
    config.cwd = PathBuf::from("/workspace/tests");

+    let auth_manager = test_auth_manager(&config);
    let usage = TokenUsage {
        input_tokens: 500,
        cached_input_tokens: 0,
@@ -277,7 +302,15 @@ fn status_snapshot_shows_missing_limits_message() {
        .single()
        .expect("timestamp");

-    let composite = new_status_output(&config, &usage, Some(&usage), &None, None, now);
+    let composite = new_status_output(
+        &config,
+        &auth_manager,
+        &usage,
+        Some(&usage),
+        &None,
+        None,
+        now,
+    );
    let mut rendered_lines = render_lines(&composite.display_lines(80));
    if cfg!(windows) {
        for line in &mut rendered_lines {
@@ -295,6 +328,7 @@ fn status_snapshot_shows_empty_limits_message() {
    config.model = "gpt-5-codex".to_string();
    config.cwd = PathBuf::from("/workspace/tests");

+    let auth_manager = test_auth_manager(&config);
    let usage = TokenUsage {
        input_tokens: 500,
        cached_input_tokens: 0,
@@ -315,6 +349,7 @@ fn status_snapshot_shows_empty_limits_message() {

    let composite = new_status_output(
        &config,
+        &auth_manager,
        &usage,
        Some(&usage),
        &None,
@@ -338,6 +373,7 @@ fn status_snapshot_shows_stale_limits_message() {
    config.model = "gpt-5-codex".to_string();
    config.cwd = PathBuf::from("/workspace/tests");

+    let auth_manager = test_auth_manager(&config);
    let usage = TokenUsage {
        input_tokens: 1_200,
        cached_input_tokens: 200,
@@ -367,6 +403,7 @@ fn status_snapshot_shows_stale_limits_message() {

    let composite = new_status_output(
        &config,
+        &auth_manager,
        &usage,
        Some(&usage),
        &None,
@@ -389,6 +426,7 @@ fn status_context_window_uses_last_usage() {
    let mut config = test_config(&temp_home);
    config.model_context_window = Some(272_000);

+    let auth_manager = test_auth_manager(&config);
    let total_usage = TokenUsage {
        input_tokens: 12_800,
        cached_input_tokens: 0,
@@ -409,7 +447,15 @@ fn status_context_window_uses_last_usage() {
        .single()
        .expect("timestamp");

-    let composite = new_status_output(&config, &total_usage, Some(&last_usage), &None, None, now);
+    let composite = new_status_output(
+        &config,
+        &auth_manager,
+        &total_usage,
+        Some(&last_usage),
+        &None,
+        None,
+        now,
+    );
    let rendered_lines = render_lines(&composite.display_lines(80));
    let context_line = rendered_lines
        .into_iter()
--- a/codex-rs/tui/src/status_indicator_widget.rs
+++ b/codex-rs/tui/src/status_indicator_widget.rs
@@ -10,7 +10,7 @@ use ratatui::buffer::Buffer;
 use ratatui::layout::Rect;
 use ratatui::style::Stylize;
 use ratatui::text::Line;
-use ratatui::widgets::WidgetRef;
+use ratatui::widgets::Paragraph;

 use crate::app_event::AppEvent;
 use crate::app_event_sender::AppEventSender;
@@ -20,9 +20,27 @@ use crate::render::renderable::Renderable;
 use crate::shimmer::shimmer_spans;
 use crate::tui::FrameRequester;

+#[derive(Debug, Clone)]
+pub(crate) struct StatusSnapshot {
+    pub(crate) header: String,
+    pub(crate) progress: Option<f32>,
+    pub(crate) thinking: Vec<String>,
+    pub(crate) tool_calls: Vec<String>,
+    pub(crate) logs: Vec<String>,
+}
+
 pub(crate) struct StatusIndicatorWidget {
    /// Animated header text (defaults to "Working").
    header: String,
+    /// Percentage progress to display, if available.
+    progress: Option<f32>,
+    /// Recent reasoning lines emitted by the model.
+    thinking_lines: Vec<String>,
+    /// Labels of in-flight tool calls.
+    tool_calls: Vec<String>,
+    /// Recent log messages emitted by long-running tasks.
+    logs: Vec<String>,
+    /// Whether to show the interrupt key hint.
    show_interrupt_hint: bool,

    elapsed_running: Duration,
@@ -53,6 +71,10 @@ impl StatusIndicatorWidget {
    pub(crate) fn new(app_event_tx: AppEventSender, frame_requester: FrameRequester) -> Self {
        Self {
            header: String::from("Working"),
+            progress: None,
+            thinking_lines: Vec::new(),
+            tool_calls: Vec::new(),
+            logs: Vec::new(),
            show_interrupt_hint: true,
            elapsed_running: Duration::ZERO,
            last_resume_at: Instant::now(),
@@ -77,6 +99,15 @@ impl StatusIndicatorWidget {
        &self.header
    }

+    pub(crate) fn update_snapshot(&mut self, snapshot: StatusSnapshot) {
+        self.update_header(snapshot.header);
+        self.progress = snapshot.progress;
+        self.thinking_lines = snapshot.thinking;
+        self.tool_calls = snapshot.tool_calls;
+        self.logs = snapshot.logs;
+        self.frame_requester.schedule_frame();
+    }
+
    pub(crate) fn set_interrupt_hint_visible(&mut self, visible: bool) {
        self.show_interrupt_hint = visible;
    }
@@ -86,6 +117,11 @@ impl StatusIndicatorWidget {
        self.show_interrupt_hint
    }

+    pub(crate) fn set_logs(&mut self, logs: Vec<String>) {
+        self.logs = logs;
+        self.frame_requester.schedule_frame();
+    }
+
    pub(crate) fn pause_timer(&mut self) {
        self.pause_timer_at(Instant::now());
    }
@@ -129,8 +165,34 @@ impl StatusIndicatorWidget {
 }

 impl Renderable for StatusIndicatorWidget {
-    fn desired_height(&self, _width: u16) -> u16 {
-        1
+    fn desired_height(&self, width: u16) -> u16 {
+        let inner_width = width.max(1) as usize;
+        let mut total: u16 = 1; // status line
+
+        // Additional thinking/tool call lines beyond the latest one shown inline.
+        let extra_thinking = self
+            .thinking_lines
+            .len()
+            .saturating_sub(usize::from(self.thinking_lines.last().is_some()))
+            as u16;
+        let extra_tool_calls =
+            self.tool_calls
+                .len()
+                .saturating_sub(usize::from(self.tool_calls.last().is_some())) as u16;
+        total = total.saturating_add(extra_thinking);
+        total = total.saturating_add(extra_tool_calls);
+
+        let text_width = inner_width.saturating_sub(3); // account for " ↳ " prefix
+        if text_width > 0 {
+            for log in &self.logs {
+                let wrapped = textwrap::wrap(log, text_width);
+                total = total.saturating_add(wrapped.len() as u16);
+            }
+        } else {
+            total = total.saturating_add(self.logs.len() as u16);
+        }
+
+        total
    }

    fn render(&self, area: Rect, buf: &mut Buffer) {
@@ -145,11 +207,28 @@ impl Renderable for StatusIndicatorWidget {
        let elapsed_duration = self.elapsed_duration_at(now);
        let pretty_elapsed = fmt_elapsed_compact(elapsed_duration.as_secs());

-        // Plain rendering: no borders or padding so the live cell is visually indistinguishable from terminal scrollback.
-        let mut spans = Vec::with_capacity(5);
+        // Plain rendering: no borders or padding so the live cell is visually
+        // indistinguishable from terminal scrollback.
+        let latest_thinking = self.thinking_lines.last().map(String::as_str);
+        let latest_tool_call = self.tool_calls.last().map(String::as_str);
+
+        let mut spans = Vec::with_capacity(9);
        spans.push(spinner(Some(self.last_resume_at)));
        spans.push(" ".into());
        spans.extend(shimmer_spans(&self.header));
+        if let Some(progress) = self.progress {
+            let pct = (progress.clamp(0.0, 1.0) * 100.0).round();
+            spans.push(" ".into());
+            spans.push(format!("{pct:.0}%").dim());
+        }
+        if let Some(thinking) = latest_thinking {
+            spans.push(" - ".into());
+            spans.push(thinking.to_string().magenta());
+        }
+        if let Some(tool) = latest_tool_call {
+            spans.push(" - ".into());
+            spans.push(tool.to_string().cyan());
+        }
        spans.push(" ".into());
        if self.show_interrupt_hint {
            spans.extend(vec![
@@ -161,7 +240,47 @@ impl Renderable for StatusIndicatorWidget {
            spans.push(format!("({pretty_elapsed})").dim());
        }

-        Line::from(spans).render_ref(area, buf);
+        let mut lines: Vec<Line<'static>> = Vec::new();
+        lines.push(Line::from(spans));
+
+        let extra_thinking = self
+            .thinking_lines
+            .len()
+            .saturating_sub(usize::from(latest_thinking.is_some()));
+        if extra_thinking > 0 {
+            for thinking in self.thinking_lines.iter().take(extra_thinking) {
+                lines.push(vec![" ↺ ".magenta(), thinking.clone().magenta()].into());
+            }
+        }
+
+        let extra_tool_calls = self
+            .tool_calls
+            .len()
+            .saturating_sub(usize::from(latest_tool_call.is_some()));
+        if extra_tool_calls > 0 {
+            for call in self.tool_calls.iter().take(extra_tool_calls) {
+                lines.push(vec![" ↳ ".cyan(), call.clone().cyan()].into());
+            }
+        }
+
+        let text_width = area.width.saturating_sub(3); // " ↳ " prefix
+        if !self.logs.is_empty() {
+            if text_width > 0 {
+                for log in &self.logs {
+                    let wrapped = textwrap::wrap(log, text_width as usize);
+                    for (i, piece) in wrapped.iter().enumerate() {
+                        let prefix = if i == 0 { " ↳ ".dim() } else { "   ".dim() };
+                        lines.push(vec![prefix, piece.to_string().into()].into());
+                    }
+                }
+            } else {
+                for log in &self.logs {
+                    lines.push(vec![" ↳ ".dim(), log.clone().into()].into());
+                }
+            }
+        }
+
+        Paragraph::new(lines).render(area, buf);
    }
 }
Author	SHA1	Message	Date
kh.ai	62cade4e09	Ground spec generation with READ/SEARCH/GREP_FILES tooling	2025-11-13 15:50:38 -08:00
kh.ai	d6ea909acc	Preserve indent for fenced mermaid blocks	2025-11-13 15:28:12 -08:00
kh.ai	973ae57206	Accept new chat_small_running_h3 snapshot after mermaid fixes	2025-11-13 15:24:06 -08:00
kh.ai	2680ee6416	Normalize mermaid header titles and reuse Codex client	2025-11-13 15:23:39 -08:00
kh.ai	53140bfd3d	Remove stray chat_small_running_h3.snap.new	2025-11-12 15:52:07 -08:00
kh.ai	d18953e4e4	Accept updated chat_small_running_h3 snapshot	2025-11-12 15:46:57 -08:00
kh.ai	c85ca592cf	Merge origin/main into bug-sweep-report	2025-11-12 15:46:43 -08:00
Owen Lin	964220ac94	[app-server] feat: thread/resume supports history, path, and overrides (#6483 ) This updates `thread/resume` to be at parity with v1's `ResumeConversationParams`. Turns out history is useful for codex cloud and path is useful for the VSCode extension. And config overrides are always useful.	2025-11-12 22:02:43 +00:00
pakrym-oai	2f58e69997	Do not double encode request bodies in logging (#6558 )	2025-11-12 21:28:42 +00:00
pakrym-oai	ec69a4a810	Add gpt-5.1 model definitions (#6551 )	2025-11-12 12:44:36 -08:00
Eric Traut	ad09c138b9	Fixed status output to use auth information from AuthManager (#6529 ) This PR addresses https://github.com/openai/codex/issues/6360. The root problem is that the TUI was directly loading the `auth.json` file to access the auth information. It should instead be using the AuthManager, which records the current auth information. The `auth.json` file can be overwritten at any time by other instances of the CLI or extension, so its information can be out of sync with the current instance. The `/status` command should always report the auth information associated with the current instance. An alternative fix for this bug was submitted by @chojs23 in [this PR](https://github.com/openai/codex/pull/6495). That approach was only a partial fix.	2025-11-12 10:26:50 -08:00
jif-oai	e00eb50db3	feat: only wait for mutating tools for ghost commit (#6534 )	2025-11-12 18:16:32 +00:00
kh.ai	40f38ddb4e	tui: fix auto-scope detection + confirmation - Default include=true when omitted in auto-scope JSON - Run folder detection + confirmation only for options 3 & 4 - Lock auto-scope model to fast gpt-5-codex - Use exact confirmed paths for collection/specs - Add unit test for missing include flag Also ran fmt, clippy (tui), and tests (tui).	2025-11-03 10:30:20 -08:00
kh.ai	883a108624	tui: add /validate subcommand with high‑risk web validation - Add /validate slash command - Plan + execute validations (Playwright MCP preference, curl, python) - Pre‑validation account setup: auto‑register or manual fallback (opens login) - Persist credentials in context/validation/credentials.json (usernames logged) - Update bugs.md and report with validation results + transcript Also adds Playwright tool support, inline python execution, and UI logs.	2025-11-01 11:19:40 -07:00
kh.ai	ae5150c37a	tui(mermaid): ensure blank line between adjacent mermaid blocks - After wrapping unfenced diagrams, always insert a separator line - Prevent back-to-back fenced blocks that can break rendering in marked/mermaid	2025-10-23 12:42:37 -07:00
kh.ai	dcf5489659	tui(security_review): Prefill HTML report assignee from git blame GitHub handle - Parse git blame author-mail to derive @handle for users.noreply.github.com - Store on bug and inject line when not present - Keep efficient: reuse existing blame run; no extra git calls - Linkify File & Lines preserved	2025-10-23 10:06:03 -07:00
kh.ai	bde8a63906	tui(security_review): - Add aggregated token usage to Security Review results and show in summary - Remove/restore report sections per requests: drop Configuration and Extensibility; remove Risks and Gaps; restore Security Findings header - Simplify findings table by removing Recommendation column - Inline GitHub links for File & Lines using single commit tree hash per run - Keep headings and content aligned with appsec review expectations Also ran fmt/clippy and updated tests to pass.	2025-10-22 17:19:29 -07:00
kh.ai	f294813b7b	tui: disable auto-scope for /secreview options 1 & 2; remove default auto-scope prompt and auto-accept for Quick bug sweep; rebuild	2025-10-20 10:23:03 -07:00
kh.ai	32eb5adfd3	tui: dedupe/group bugs before risk rerank; normalize+filter pre and post rerank - Move dedup step ahead of risk rerank to avoid redundant rerank and improve grouping - Normalize severities before dedup; filter to High/Medium/Low - After rerank, re-normalize, rewrite severity in markdown, and filter again if needed - Minor fix in triage_chunk log initialization Tests: cargo test -p codex-tui (passed)	2025-10-18 14:37:41 -07:00
kh.ai	ea2246082d	Improve security review markdown polishing retries	2025-10-17 20:07:42 -07:00
kh.ai	2b2577144c	Sync bug markdown with reranked severity	2025-10-17 18:25:53 -07:00
kh.ai	633be44842	Add security review follow-up prompts	2025-10-17 17:55:26 -07:00
kh.ai	eb5b793ea2	wip	2025-10-17 14:55:46 -07:00
kh.ai	c0ccfeea76	Enhance auto scope keyword expansion	2025-10-17 14:52:36 -07:00
kh.ai	6040759357	Improve security review markdown outputs	2025-10-17 12:20:50 -07:00
kh.ai	62cedda883	Add security review follow-up flow	2025-10-17 12:17:40 -07:00
kh.ai	0b8a3ecc19	Reword bug-sweep autoscope prompt to focus on critical code paths	2025-10-17 11:23:04 -07:00
kh.ai	a915190b38	Cap auto-scope suggestions to up to 20 directories	2025-10-17 11:20:59 -07:00
kh.ai	b567e5628d	Fix inline report markdown encoding	2025-10-17 11:15:23 -07:00
kh.ai	3a90a112c4	Detect abbreviations in auto-scope prompts	2025-10-17 10:24:45 -07:00
kh.ai	ad88013ac4	Provide default auto-scope prompt and confirmation dialog	2025-10-17 10:20:52 -07:00
kh.ai	1cf533496d	Restore auto-scope confirmation dialog	2025-10-17 10:11:15 -07:00
kh.ai	33e1730cd7	Inline security report viewer JS deps	2025-10-17 10:08:33 -07:00
kh.ai	e4cbd96b04	Auto-scope confirmation now displays LLM-chosen directories directly	2025-10-17 10:01:51 -07:00
kh.ai	23169b8a96	Improve default autoscope prompts for scoped reviews	2025-10-17 09:51:25 -07:00
kh.ai	9f091e7b37	Allow cancelling in-progress security review with Ctrl-C	2025-10-17 09:38:17 -07:00
kh.ai	b6bdc7292b	Default scoped review prompts when no paths provided	2025-10-17 09:32:42 -07:00
kh.ai	226ef2e7d5	Restrict security review autoscope to scoped options	2025-10-17 09:28:32 -07:00
kh.ai	c2a6347cdc	Provide default scope prompts for auto scope confirmation	2025-10-17 09:22:56 -07:00
kh.ai	88ccfb46b7	Restore /secreview command and security review UI	2025-10-17 09:07:52 -07:00
kh.ai	3b7f4d25ef	Add report generation for bug sweep	2025-10-16 20:25:27 -07:00