diff --git a/codex-rs/tui/src/security_prompts.rs b/codex-rs/tui/src/security_prompts.rs index 7bb93952ae..6863e2901e 100644 --- a/codex-rs/tui/src/security_prompts.rs +++ b/codex-rs/tui/src/security_prompts.rs @@ -59,9 +59,9 @@ Output format: JSON Lines, each {{"keyword": ""}}. Do not add commentary o "#; // Spec generation prompts -pub(crate) const SPEC_SYSTEM_PROMPT: &str = "You are an application security engineer documenting how a project is built. Produce an architecture specification that focuses on components, flows, and controls. Stay within the provided code locations and keep the output in markdown."; +pub(crate) const SPEC_SYSTEM_PROMPT: &str = "You are an application security engineer documenting how a project is built. Produce an architecture specification that focuses on components, flows, and controls. Stay within the provided code locations, ground claims in concrete evidence, and keep the output in markdown."; pub(crate) const SPEC_COMBINE_SYSTEM_PROMPT: &str = "You are consolidating multiple specification drafts into a single, cohesive project specification. Merge overlapping content, keep terminology consistent, and follow the supplied template. Preserve every security-relevant detail; when in doubt, include rather than summarize away content."; -pub(crate) const SPEC_PROMPT_TEMPLATE: &str = "You have access to the source code inside the following locations:\n{project_locations}\n\nFocus on {target_label}.\nGenerate a security-focused project specification. Parallelize discovery when enumerating files and avoid spending time on tests, vendored dependencies, or build artefacts. Follow the template exactly and return only markdown.\n\nTemplate:\n{spec_template}\n"; +pub(crate) const SPEC_PROMPT_TEMPLATE: &str = "You have access to the source code inside the following locations:\n{project_locations}\n\nFocus on {target_label}.\nGenerate a security-focused project specification. Parallelize discovery when enumerating files and avoid spending time on tests, vendored dependencies, or build artefacts.\n\n# Available tools\n- READ: respond with `READ: #Lstart-Lend` (range optional) to open code files within the in-scope locations.\n- SEARCH: respond with `SEARCH: literal:` or `SEARCH: regex:` to run ripgrep over the repository root and inspect matches.\n- GREP_FILES: respond with `GREP_FILES: {\"pattern\":\"needle\",\"include\":\"*.rs\",\"path\":\"subdir\",\"limit\":200}` to list files whose contents match.\nEmit at most one tool command per message and wait for the tool output before continuing. Use these tools to ground API entry points, components, and data flows in actual code instead of speculation.\n\nWhen you have gathered enough evidence, follow the template exactly and return only markdown (no tool commands).\n\nTemplate:\n{spec_template}\n"; pub(crate) const CONVERT_CLASSIFICATION_TO_JSON_PROMPT_TEMPLATE: &str = r#" Read the project specification below and extract a normalized Data Classification list. diff --git a/codex-rs/tui/src/security_review.rs b/codex-rs/tui/src/security_review.rs index 887243cff0..9cafafcde4 100644 --- a/codex-rs/tui/src/security_review.rs +++ b/codex-rs/tui/src/security_review.rs @@ -4034,43 +4034,284 @@ async fn generate_spec_for_location( let date = OffsetDateTime::now_utc() .format(&Rfc3339) .unwrap_or_else(|_| "unknown-date".to_string()); - let prompt = build_spec_prompt_text( + let base_prompt = build_spec_prompt_text( &project_locations, &location_label, SPEC_GENERATION_MODEL, &date, ); - let response = call_model( - client, - &provider, - &auth, - SPEC_GENERATION_MODEL, - SPEC_SYSTEM_PROMPT, - &prompt, - metrics.clone(), - 0.1, - ) - .await - .map_err(|err| SecurityReviewFailure { - message: format!("Specification generation failed for {location_label}: {err}"), - logs: Vec::new(), - })?; - if let Some(reasoning) = response.reasoning.as_ref() { - for line in reasoning - .lines() - .map(str::trim) - .filter(|line| !line.is_empty()) - { - let truncated = truncate_text(line, MODEL_REASONING_LOG_MAX_GRAPHEMES); - let msg = format!("Model reasoning: {truncated}"); - if let Some(tx) = progress_sender.as_ref() { - tx.send(AppEvent::SecurityReviewLog(msg.clone())); - } - logs.push(msg); + let mut conversation: Vec = Vec::new(); + let mut seen_search_requests: HashSet = HashSet::new(); + let mut seen_read_requests: HashSet = HashSet::new(); + let mut tool_rounds = 0usize; + let mut command_error_count = 0usize; + + let raw_spec = loop { + if tool_rounds > SPEC_COMBINE_MAX_TOOL_ROUNDS { + return Err(SecurityReviewFailure { + message: format!( + "Spec generation for {location_label} exceeded {SPEC_COMBINE_MAX_TOOL_ROUNDS} tool rounds.", + ), + logs, + }); } - } - let mut sanitized = fix_mermaid_blocks(&response.text); + + let mut prompt = base_prompt.clone(); + if !conversation.is_empty() { + prompt.push_str("\n\n# Conversation history\n"); + prompt.push_str(&conversation.join("\n\n")); + } + + let response = match call_model( + client, + &provider, + &auth, + SPEC_GENERATION_MODEL, + SPEC_SYSTEM_PROMPT, + &prompt, + metrics.clone(), + 0.1, + ) + .await + { + Ok(output) => output, + Err(err) => { + return Err(SecurityReviewFailure { + message: format!("Specification generation failed for {location_label}: {err}"), + logs, + }); + } + }; + + if let Some(reasoning) = response.reasoning.as_ref() { + for line in reasoning + .lines() + .map(str::trim) + .filter(|line| !line.is_empty()) + { + let truncated = truncate_text(line, MODEL_REASONING_LOG_MAX_GRAPHEMES); + let msg = format!("Model reasoning: {truncated}"); + if let Some(tx) = progress_sender.as_ref() { + tx.send(AppEvent::SecurityReviewLog(msg.clone())); + } + logs.push(msg); + } + } + + let assistant_reply = response.text.trim().to_string(); + if assistant_reply.is_empty() { + conversation.push("Assistant:".to_string()); + } else { + conversation.push(format!("Assistant:\n{assistant_reply}")); + } + + let (after_read, read_requests) = extract_read_requests(&response.text); + let (cleaned_text, search_requests) = parse_search_requests(&after_read); + + let mut executed_command = false; + + for request in read_requests { + let key = request.dedupe_key(); + if !seen_read_requests.insert(key) { + let msg = format!( + "Spec READ `{}` skipped (already provided).", + request.path.display() + ); + if let Some(tx) = progress_sender.as_ref() { + tx.send(AppEvent::SecurityReviewLog(msg.clone())); + } + logs.push(msg.clone()); + conversation.push(format!( + "Tool READ `{}` already provided earlier.", + request.path.display() + )); + executed_command = true; + continue; + } + + executed_command = true; + match execute_auto_scope_read( + &repo_root, + &request.path, + request.start, + request.end, + metrics.as_ref(), + ) + .await + { + Ok(output) => { + let msg = format!("Spec READ `{}` returned content.", request.path.display()); + if let Some(tx) = progress_sender.as_ref() { + tx.send(AppEvent::SecurityReviewLog(msg.clone())); + } + logs.push(msg); + conversation.push(format!( + "Tool READ `{}`:\n{}", + request.path.display(), + output + )); + } + Err(err) => { + let msg = format!("Spec READ `{}` failed: {err}", request.path.display()); + if let Some(tx) = progress_sender.as_ref() { + tx.send(AppEvent::SecurityReviewLog(msg.clone())); + } + logs.push(msg.clone()); + conversation.push(format!( + "Tool READ `{}` error: {err}", + request.path.display() + )); + command_error_count += 1; + } + } + } + + for request in search_requests { + let key = request.dedupe_key(); + if !seen_search_requests.insert(key) { + match &request { + ToolRequest::Content { term, mode, .. } => { + let display_term = summarize_search_term(term, 80); + let msg = format!( + "Spec SEARCH `{display_term}` ({}) skipped (already provided).", + mode.as_str() + ); + if let Some(tx) = progress_sender.as_ref() { + tx.send(AppEvent::SecurityReviewLog(msg.clone())); + } + logs.push(msg.clone()); + conversation.push(format!( + "Tool SEARCH `{display_term}` ({}) already provided earlier.", + mode.as_str() + )); + } + ToolRequest::GrepFiles { args, .. } => { + let mut shown = serde_json::json!({ "pattern": args.pattern }); + if let Some(ref inc) = args.include { + shown["include"] = serde_json::Value::String(inc.clone()); + } + if let Some(ref path) = args.path { + shown["path"] = serde_json::Value::String(path.clone()); + } + if let Some(limit) = args.limit { + shown["limit"] = + serde_json::Value::Number(serde_json::Number::from(limit as u64)); + } + let msg = format!("Spec GREP_FILES {shown} skipped (already provided).",); + if let Some(tx) = progress_sender.as_ref() { + tx.send(AppEvent::SecurityReviewLog(msg.clone())); + } + logs.push(msg.clone()); + conversation + .push(format!("Tool GREP_FILES {shown} already provided earlier.")); + } + } + executed_command = true; + continue; + } + + executed_command = true; + match request { + ToolRequest::Content { term, mode, .. } => { + let display_term = summarize_search_term(&term, 80); + let (log_line, output) = + match run_content_search(&repo_root, &term, mode, &metrics).await { + SearchResult::Matches(text) => ( + format!( + "Spec SEARCH `{display_term}` ({}) returned matches.", + mode.as_str() + ), + text, + ), + SearchResult::NoMatches => ( + format!( + "Spec SEARCH `{display_term}` ({}) returned no matches.", + mode.as_str() + ), + "No matches found.".to_string(), + ), + SearchResult::Error(err) => ( + format!( + "Spec SEARCH `{display_term}` ({}) failed: {err}", + mode.as_str() + ), + format!("Search error: {err}"), + ), + }; + if let Some(tx) = progress_sender.as_ref() { + tx.send(AppEvent::SecurityReviewLog(log_line.clone())); + } + logs.push(log_line); + conversation.push(format!( + "Tool SEARCH `{display_term}` ({}) results:\n{}", + mode.as_str(), + output + )); + } + ToolRequest::GrepFiles { args, .. } => { + let mut shown = serde_json::json!({ "pattern": args.pattern }); + if let Some(ref inc) = args.include { + shown["include"] = serde_json::Value::String(inc.clone()); + } + if let Some(ref path) = args.path { + shown["path"] = serde_json::Value::String(path.clone()); + } + if let Some(limit) = args.limit { + shown["limit"] = + serde_json::Value::Number(serde_json::Number::from(limit as u64)); + } + let (log_line, output) = match run_grep_files(&repo_root, &args, &metrics).await + { + SearchResult::Matches(text) => { + (format!("Spec GREP_FILES {shown} returned results."), text) + } + SearchResult::NoMatches => ( + format!("Spec GREP_FILES {shown} returned no matches.",), + "No matches found.".to_string(), + ), + SearchResult::Error(err) => ( + format!("Spec GREP_FILES {shown} failed: {err}"), + format!("Search error: {err}"), + ), + }; + if let Some(tx) = progress_sender.as_ref() { + tx.send(AppEvent::SecurityReviewLog(log_line.clone())); + } + logs.push(log_line); + conversation.push(format!("Tool GREP_FILES {shown}:\n{output}")); + } + } + } + + if command_error_count >= SPEC_COMBINE_MAX_COMMAND_ERRORS { + return Err(SecurityReviewFailure { + message: format!( + "Spec generation for {location_label} hit {SPEC_COMBINE_MAX_COMMAND_ERRORS} tool errors.", + ), + logs, + }); + } + + if executed_command { + tool_rounds = tool_rounds.saturating_add(1); + continue; + } + + let final_text = cleaned_text.trim(); + if final_text.is_empty() { + return Err(SecurityReviewFailure { + message: format!( + "Spec generation for {location_label} produced an empty response.", + ), + logs, + }); + } + + break final_text.to_string(); + }; + + let mut sanitized = fix_mermaid_blocks(&raw_spec); if !sanitized.trim().is_empty() { let polish_message = format!("Polishing specification markdown for {location_label}.");