Compare commits

...

11 Commits

Author SHA1 Message Date
Ahmed Ibrahim
a9365c2efa feedback 2025-10-24 22:49:30 -07:00
Ahmed Ibrahim
c1346e5226 feedback 2025-10-24 22:46:57 -07:00
Ahmed Ibrahim
6de3084c96 feedback 2025-10-24 22:45:12 -07:00
Ahmed Ibrahim
105353c42d Merge branch 'main' into followup-feedback 2025-10-24 22:37:01 -07:00
Ahmed Ibrahim
8f1d453320 chore: squash changes since base main 2025-10-24 22:35:35 -07:00
Ahmed Ibrahim
71f838389b Improve feedback (#5661)
<img width="1099" height="153" alt="image"
src="https://github.com/user-attachments/assets/2c901884-8baf-4b1b-b2c4-bcb61ff42be8"
/>

<img width="1082" height="125" alt="image"
src="https://github.com/user-attachments/assets/6336e6c9-9ace-46df-a383-a807ceffa524"
/>

<img width="1102" height="103" alt="image"
src="https://github.com/user-attachments/assets/78883682-7e44-4fa3-9e04-57f7df4766fd"
/>
2025-10-24 22:28:14 -07:00
Eric Traut
0533bd2e7c Fixed flaky unit test (#5654)
This PR fixes a test that is sporadically failing in CI.

The problem is that two unit tests (the older `login_and_cancel_chatgpt`
and a recently added
`login_chatgpt_includes_forced_workspace_query_param`) exercise code
paths that start the login server. The server binds to a hard-coded
localhost port number, so attempts to start more than one server at the
same time will fail. If these two tests happen to run concurrently, one
of them will fail.

To fix this, I've added a simple mutex. We can use this same mutex for
future tests that use the same pattern.
2025-10-24 16:31:24 -07:00
Anton Panasenko
6af83d86ff [codex][app-server] introduce codex/event/raw_item events (#5578) 2025-10-24 22:41:52 +00:00
Gabriel Peal
e2e1b65da6 [MCP] Properly gate login after mcp add with experimental_use_rmcp_client (#5653)
There was supposed to be a check here like in other places.
2025-10-24 18:32:15 -04:00
Gabriel Peal
817d1508bc [MCP] Redact environment variable values in /mcp and mcp get (#5648)
Fixes #5524
2025-10-24 18:30:20 -04:00
Eric Traut
f8af4f5c8d Added model summary and risk assessment for commands that violate sandbox policy (#5536)
This PR adds support for a model-based summary and risk assessment for
commands that violate the sandbox policy and require user approval. This
aids the user in evaluating whether the command should be approved.

The feature works by taking a failed command and passing it back to the
model and asking it to summarize the command, give it a risk level (low,
medium, high) and a risk category (e.g. "data deletion" or "data
exfiltration"). It uses a new conversation thread so the context in the
existing thread doesn't influence the answer. If the call to the model
fails or takes longer than 5 seconds, it falls back to the current
behavior.

For now, this is an experimental feature and is gated by a config key
`experimental_sandbox_command_assessment`.

Here is a screen shot of the approval prompt showing the risk assessment
and summary.

<img width="723" height="282" alt="image"
src="https://github.com/user-attachments/assets/4597dd7c-d5a0-4e9f-9d13-414bd082fd6b"
/>
2025-10-24 15:23:44 -07:00
57 changed files with 1908 additions and 295 deletions

1
codex-rs/Cargo.lock generated
View File

@@ -853,6 +853,7 @@ dependencies = [
"pretty_assertions",
"serde",
"serde_json",
"serial_test",
"tempfile",
"tokio",
"toml",

View File

@@ -17,6 +17,7 @@ use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::FileChange;
use codex_protocol::protocol::RateLimitSnapshot;
use codex_protocol::protocol::ReviewDecision;
use codex_protocol::protocol::SandboxCommandAssessment;
use codex_protocol::protocol::SandboxPolicy;
use codex_protocol::protocol::TurnAbortReason;
use paste::paste;
@@ -716,6 +717,8 @@ pub struct SendUserMessageResponse {}
#[serde(rename_all = "camelCase")]
pub struct AddConversationListenerParams {
pub conversation_id: ConversationId,
#[serde(default)]
pub experimental_raw_events: bool,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
@@ -847,6 +850,8 @@ pub struct ExecCommandApprovalParams {
pub cwd: PathBuf,
#[serde(skip_serializing_if = "Option::is_none")]
pub reason: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub risk: Option<SandboxCommandAssessment>,
pub parsed_cmd: Vec<ParsedCommand>,
}
@@ -1063,6 +1068,7 @@ mod tests {
command: vec!["echo".to_string(), "hello".to_string()],
cwd: PathBuf::from("/tmp"),
reason: Some("because tests".to_string()),
risk: None,
parsed_cmd: vec![ParsedCommand::Unknown {
cmd: "echo hello".to_string(),
}],

View File

@@ -47,6 +47,7 @@ base64 = { workspace = true }
core_test_support = { workspace = true }
os_info = { workspace = true }
pretty_assertions = { workspace = true }
serial_test = { workspace = true }
tempfile = { workspace = true }
toml = { workspace = true }
wiremock = { workspace = true }

View File

@@ -1256,7 +1256,10 @@ impl CodexMessageProcessor {
request_id: RequestId,
params: AddConversationListenerParams,
) {
let AddConversationListenerParams { conversation_id } = params;
let AddConversationListenerParams {
conversation_id,
experimental_raw_events,
} = params;
let Ok(conversation) = self
.conversation_manager
.get_conversation(conversation_id)
@@ -1293,6 +1296,11 @@ impl CodexMessageProcessor {
}
};
if let EventMsg::RawResponseItem(_) = &event.msg
&& !experimental_raw_events {
continue;
}
// For now, we send a notification for every event,
// JSON-serializing the `Event` as-is, but these should
// be migrated to be variants of `ServerNotification`
@@ -1447,6 +1455,7 @@ async fn apply_bespoke_event_handling(
command,
cwd,
reason,
risk,
parsed_cmd,
}) => {
let params = ExecCommandApprovalParams {
@@ -1455,6 +1464,7 @@ async fn apply_bespoke_event_handling(
command,
cwd,
reason,
risk,
parsed_cmd,
};
let rx = outgoing
@@ -1523,6 +1533,7 @@ async fn derive_config_from_params(
include_view_image_tool: None,
show_raw_agent_reasoning: None,
tools_web_search_request: None,
experimental_sandbox_command_assessment: None,
additional_writable_roots: Vec::new(),
};

View File

@@ -103,7 +103,10 @@ async fn test_codex_jsonrpc_conversation_flow() {
// 2) addConversationListener
let add_listener_id = mcp
.send_add_conversation_listener_request(AddConversationListenerParams { conversation_id })
.send_add_conversation_listener_request(AddConversationListenerParams {
conversation_id,
experimental_raw_events: false,
})
.await
.expect("send addConversationListener");
let add_listener_resp: JSONRPCResponse = timeout(
@@ -252,7 +255,10 @@ async fn test_send_user_turn_changes_approval_policy_behavior() {
// 2) addConversationListener
let add_listener_id = mcp
.send_add_conversation_listener_request(AddConversationListenerParams { conversation_id })
.send_add_conversation_listener_request(AddConversationListenerParams {
conversation_id,
experimental_raw_events: false,
})
.await
.expect("send addConversationListener");
let _: AddConversationSubscriptionResponse =
@@ -311,6 +317,7 @@ async fn test_send_user_turn_changes_approval_policy_behavior() {
],
cwd: working_directory.clone(),
reason: None,
risk: None,
parsed_cmd: vec![ParsedCommand::Unknown {
cmd: "python3 -c 'print(42)'".to_string()
}],
@@ -458,7 +465,10 @@ async fn test_send_user_turn_updates_sandbox_and_cwd_between_turns() {
.expect("deserialize newConversation response");
let add_listener_id = mcp
.send_add_conversation_listener_request(AddConversationListenerParams { conversation_id })
.send_add_conversation_listener_request(AddConversationListenerParams {
conversation_id,
experimental_raw_events: false,
})
.await
.expect("send addConversationListener");
timeout(

View File

@@ -67,7 +67,10 @@ async fn test_conversation_create_and_send_message_ok() {
// Add a listener so we receive notifications for this conversation (not strictly required for this test).
let add_listener_id = mcp
.send_add_conversation_listener_request(AddConversationListenerParams { conversation_id })
.send_add_conversation_listener_request(AddConversationListenerParams {
conversation_id,
experimental_raw_events: false,
})
.await
.expect("send addConversationListener");
let _sub: AddConversationSubscriptionResponse =

View File

@@ -88,7 +88,10 @@ async fn shell_command_interruption() -> anyhow::Result<()> {
// 2) addConversationListener
let add_listener_id = mcp
.send_add_conversation_listener_request(AddConversationListenerParams { conversation_id })
.send_add_conversation_listener_request(AddConversationListenerParams {
conversation_id,
experimental_raw_events: false,
})
.await?;
let _add_listener_resp: JSONRPCResponse = timeout(
DEFAULT_READ_TIMEOUT,

View File

@@ -13,6 +13,7 @@ use codex_app_server_protocol::LoginChatGptResponse;
use codex_app_server_protocol::LogoutChatGptResponse;
use codex_app_server_protocol::RequestId;
use codex_login::login_with_api_key;
use serial_test::serial;
use tempfile::TempDir;
use tokio::time::timeout;
@@ -94,6 +95,8 @@ async fn logout_chatgpt_removes_auth() {
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
// Serialize tests that launch the login server since it binds to a fixed port.
#[serial(login_port)]
async fn login_and_cancel_chatgpt() {
let codex_home = TempDir::new().unwrap_or_else(|e| panic!("create tempdir: {e}"));
create_config_toml(codex_home.path()).unwrap_or_else(|err| panic!("write config.toml: {err}"));
@@ -208,6 +211,8 @@ async fn login_chatgpt_rejected_when_forced_api() {
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
// Serialize tests that launch the login server since it binds to a fixed port.
#[serial(login_port)]
async fn login_chatgpt_includes_forced_workspace_query_param() {
let codex_home = TempDir::new().unwrap_or_else(|e| panic!("create tempdir: {e}"));
create_config_toml_forced_workspace(codex_home.path(), "ws-forced")

View File

@@ -15,6 +15,8 @@ use codex_app_server_protocol::RequestId;
use codex_app_server_protocol::SendUserMessageParams;
use codex_app_server_protocol::SendUserMessageResponse;
use codex_protocol::ConversationId;
use codex_protocol::models::ContentItem;
use codex_protocol::models::ResponseItem;
use pretty_assertions::assert_eq;
use tempfile::TempDir;
use tokio::time::timeout;
@@ -62,7 +64,10 @@ async fn test_send_message_success() {
// 2) addConversationListener
let add_listener_id = mcp
.send_add_conversation_listener_request(AddConversationListenerParams { conversation_id })
.send_add_conversation_listener_request(AddConversationListenerParams {
conversation_id,
experimental_raw_events: false,
})
.await
.expect("send addConversationListener");
let add_listener_resp: JSONRPCResponse = timeout(
@@ -124,6 +129,105 @@ async fn send_message(message: &str, conversation_id: ConversationId, mcp: &mut
.expect("should have conversationId"),
&serde_json::Value::String(conversation_id.to_string())
);
let raw_attempt = tokio::time::timeout(
std::time::Duration::from_millis(200),
mcp.read_stream_until_notification_message("codex/event/raw_response_item"),
)
.await;
assert!(
raw_attempt.is_err(),
"unexpected raw item notification when not opted in"
);
}
#[tokio::test]
async fn test_send_message_raw_notifications_opt_in() {
let responses = vec![
create_final_assistant_message_sse_response("Done").expect("build mock assistant message"),
];
let server = create_mock_chat_completions_server(responses).await;
let codex_home = TempDir::new().expect("create temp dir");
create_config_toml(codex_home.path(), &server.uri()).expect("write config.toml");
let mut mcp = McpProcess::new(codex_home.path())
.await
.expect("spawn mcp process");
timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
.await
.expect("init timed out")
.expect("init failed");
let new_conv_id = mcp
.send_new_conversation_request(NewConversationParams::default())
.await
.expect("send newConversation");
let new_conv_resp: JSONRPCResponse = timeout(
DEFAULT_READ_TIMEOUT,
mcp.read_stream_until_response_message(RequestId::Integer(new_conv_id)),
)
.await
.expect("newConversation timeout")
.expect("newConversation resp");
let NewConversationResponse {
conversation_id, ..
} = to_response::<_>(new_conv_resp).expect("deserialize newConversation response");
let add_listener_id = mcp
.send_add_conversation_listener_request(AddConversationListenerParams {
conversation_id,
experimental_raw_events: true,
})
.await
.expect("send addConversationListener");
let add_listener_resp: JSONRPCResponse = timeout(
DEFAULT_READ_TIMEOUT,
mcp.read_stream_until_response_message(RequestId::Integer(add_listener_id)),
)
.await
.expect("addConversationListener timeout")
.expect("addConversationListener resp");
let AddConversationSubscriptionResponse { subscription_id: _ } =
to_response::<_>(add_listener_resp).expect("deserialize addConversationListener response");
let send_id = mcp
.send_send_user_message_request(SendUserMessageParams {
conversation_id,
items: vec![InputItem::Text {
text: "Hello".to_string(),
}],
})
.await
.expect("send sendUserMessage");
let instructions = read_raw_response_item(&mut mcp, conversation_id).await;
assert_instructions_message(&instructions);
let environment = read_raw_response_item(&mut mcp, conversation_id).await;
assert_environment_message(&environment);
let response: JSONRPCResponse = timeout(
DEFAULT_READ_TIMEOUT,
mcp.read_stream_until_response_message(RequestId::Integer(send_id)),
)
.await
.expect("sendUserMessage response timeout")
.expect("sendUserMessage response error");
let _ok: SendUserMessageResponse = to_response::<SendUserMessageResponse>(response)
.expect("deserialize sendUserMessage response");
let user_message = read_raw_response_item(&mut mcp, conversation_id).await;
assert_user_message(&user_message, "Hello");
let assistant_message = read_raw_response_item(&mut mcp, conversation_id).await;
assert_assistant_message(&assistant_message, "Done");
let _ = tokio::time::timeout(
std::time::Duration::from_millis(250),
mcp.read_stream_until_notification_message("codex/event/task_complete"),
)
.await;
}
#[tokio::test]
@@ -184,3 +288,108 @@ stream_max_retries = 0
),
)
}
#[expect(clippy::expect_used)]
async fn read_raw_response_item(
mcp: &mut McpProcess,
conversation_id: ConversationId,
) -> ResponseItem {
let raw_notification: JSONRPCNotification = timeout(
DEFAULT_READ_TIMEOUT,
mcp.read_stream_until_notification_message("codex/event/raw_response_item"),
)
.await
.expect("codex/event/raw_response_item notification timeout")
.expect("codex/event/raw_response_item notification resp");
let serde_json::Value::Object(params) = raw_notification
.params
.expect("codex/event/raw_response_item should have params")
else {
panic!("codex/event/raw_response_item should have params");
};
let conversation_id_value = params
.get("conversationId")
.and_then(|value| value.as_str())
.expect("raw response item should include conversationId");
assert_eq!(
conversation_id_value,
conversation_id.to_string(),
"raw response item conversation mismatch"
);
let msg_value = params
.get("msg")
.cloned()
.expect("raw response item should include msg payload");
serde_json::from_value(msg_value).expect("deserialize raw response item")
}
fn assert_instructions_message(item: &ResponseItem) {
match item {
ResponseItem::Message { role, content, .. } => {
assert_eq!(role, "user");
let texts = content_texts(content);
assert!(
texts
.iter()
.any(|text| text.contains("<user_instructions>")),
"expected instructions message, got {texts:?}"
);
}
other => panic!("expected instructions message, got {other:?}"),
}
}
fn assert_environment_message(item: &ResponseItem) {
match item {
ResponseItem::Message { role, content, .. } => {
assert_eq!(role, "user");
let texts = content_texts(content);
assert!(
texts
.iter()
.any(|text| text.contains("<environment_context>")),
"expected environment context message, got {texts:?}"
);
}
other => panic!("expected environment message, got {other:?}"),
}
}
fn assert_user_message(item: &ResponseItem, expected_text: &str) {
match item {
ResponseItem::Message { role, content, .. } => {
assert_eq!(role, "user");
let texts = content_texts(content);
assert_eq!(texts, vec![expected_text]);
}
other => panic!("expected user message, got {other:?}"),
}
}
fn assert_assistant_message(item: &ResponseItem, expected_text: &str) {
match item {
ResponseItem::Message { role, content, .. } => {
assert_eq!(role, "assistant");
let texts = content_texts(content);
assert_eq!(texts, vec![expected_text]);
}
other => panic!("expected assistant message, got {other:?}"),
}
}
fn content_texts(content: &[ContentItem]) -> Vec<&str> {
content
.iter()
.filter_map(|item| match item {
ContentItem::InputText { text } | ContentItem::OutputText { text } => {
Some(text.as_str())
}
_ => None,
})
.collect()
}

View File

@@ -274,19 +274,33 @@ async fn run_add(config_overrides: &CliConfigOverrides, add_args: AddArgs) -> Re
http_headers,
env_http_headers,
} = transport
&& matches!(supports_oauth_login(&url).await, Ok(true))
{
println!("Detected OAuth support. Starting OAuth flow…");
perform_oauth_login(
&name,
&url,
config.mcp_oauth_credentials_store_mode,
http_headers.clone(),
env_http_headers.clone(),
&Vec::new(),
)
.await?;
println!("Successfully logged in.");
match supports_oauth_login(&url).await {
Ok(true) => {
if !config.features.enabled(Feature::RmcpClient) {
println!(
"MCP server supports login. Add `experimental_use_rmcp_client = true` \
to your config.toml and run `codex mcp login {name}` to login."
);
} else {
println!("Detected OAuth support. Starting OAuth flow…");
perform_oauth_login(
&name,
&url,
config.mcp_oauth_credentials_store_mode,
http_headers.clone(),
env_http_headers.clone(),
&Vec::new(),
)
.await?;
println!("Successfully logged in.");
}
}
Ok(false) => {}
Err(_) => println!(
"MCP server may or may not require login. Run `codex mcp login {name}` to login."
),
}
}
Ok(())
@@ -523,10 +537,12 @@ async fn run_list(config_overrides: &CliConfigOverrides, list_args: ListArgs) ->
.map(|entry| entry.auth_status)
.unwrap_or(McpAuthStatus::Unsupported)
.to_string();
let bearer_token_display =
bearer_token_env_var.as_deref().unwrap_or("-").to_string();
http_rows.push([
name.clone(),
url.clone(),
bearer_token_env_var.clone().unwrap_or("-".to_string()),
bearer_token_display,
status,
auth_status,
]);
@@ -752,15 +768,15 @@ async fn run_get(config_overrides: &CliConfigOverrides, get_args: GetArgs) -> Re
} => {
println!(" transport: streamable_http");
println!(" url: {url}");
let env_var = bearer_token_env_var.as_deref().unwrap_or("-");
println!(" bearer_token_env_var: {env_var}");
let bearer_token_display = bearer_token_env_var.as_deref().unwrap_or("-");
println!(" bearer_token_env_var: {bearer_token_display}");
let headers_display = match http_headers {
Some(map) if !map.is_empty() => {
let mut pairs: Vec<_> = map.iter().collect();
pairs.sort_by(|(a, _), (b, _)| a.cmp(b));
pairs
.into_iter()
.map(|(k, v)| format!("{k}={v}"))
.map(|(k, _)| format!("{k}=*****"))
.collect::<Vec<_>>()
.join(", ")
}
@@ -773,7 +789,7 @@ async fn run_get(config_overrides: &CliConfigOverrides, get_args: GetArgs) -> Re
pairs.sort_by(|(a, _), (b, _)| a.cmp(b));
pairs
.into_iter()
.map(|(k, v)| format!("{k}={v}"))
.map(|(k, var)| format!("{k}={var}"))
.collect::<Vec<_>>()
.join(", ")
}

View File

@@ -68,9 +68,9 @@ async fn list_and_get_render_expected_output() -> Result<()> {
assert!(stdout.contains("Name"));
assert!(stdout.contains("docs"));
assert!(stdout.contains("docs-server"));
assert!(stdout.contains("TOKEN=secret"));
assert!(stdout.contains("APP_TOKEN=$APP_TOKEN"));
assert!(stdout.contains("WORKSPACE_ID=$WORKSPACE_ID"));
assert!(stdout.contains("TOKEN=*****"));
assert!(stdout.contains("APP_TOKEN=*****"));
assert!(stdout.contains("WORKSPACE_ID=*****"));
assert!(stdout.contains("Status"));
assert!(stdout.contains("Auth"));
assert!(stdout.contains("enabled"));
@@ -119,9 +119,9 @@ async fn list_and_get_render_expected_output() -> Result<()> {
assert!(stdout.contains("transport: stdio"));
assert!(stdout.contains("command: docs-server"));
assert!(stdout.contains("args: --port 4000"));
assert!(stdout.contains("env: TOKEN=secret"));
assert!(stdout.contains("APP_TOKEN=$APP_TOKEN"));
assert!(stdout.contains("WORKSPACE_ID=$WORKSPACE_ID"));
assert!(stdout.contains("env: TOKEN=*****"));
assert!(stdout.contains("APP_TOKEN=*****"));
assert!(stdout.contains("WORKSPACE_ID=*****"));
assert!(stdout.contains("enabled: true"));
assert!(stdout.contains("remove: codex mcp remove docs"));

View File

@@ -6,15 +6,11 @@ pub fn format_env_display(env: Option<&HashMap<String, String>>, env_vars: &[Str
if let Some(map) = env {
let mut pairs: Vec<_> = map.iter().collect();
pairs.sort_by(|(a, _), (b, _)| a.cmp(b));
parts.extend(
pairs
.into_iter()
.map(|(key, value)| format!("{key}={value}")),
);
parts.extend(pairs.into_iter().map(|(key, _)| format!("{key}=*****")));
}
if !env_vars.is_empty() {
parts.extend(env_vars.iter().map(|var| format!("{var}=${var}")));
parts.extend(env_vars.iter().map(|var| format!("{var}=*****")));
}
if parts.is_empty() {
@@ -42,14 +38,14 @@ mod tests {
env.insert("B".to_string(), "two".to_string());
env.insert("A".to_string(), "one".to_string());
assert_eq!(format_env_display(Some(&env), &[]), "A=one, B=two");
assert_eq!(format_env_display(Some(&env), &[]), "A=*****, B=*****");
}
#[test]
fn formats_env_vars_with_dollar_prefix() {
let vars = vec!["TOKEN".to_string(), "PATH".to_string()];
assert_eq!(format_env_display(None, &vars), "TOKEN=$TOKEN, PATH=$PATH");
assert_eq!(format_env_display(None, &vars), "TOKEN=*****, PATH=*****");
}
#[test]
@@ -60,7 +56,7 @@ mod tests {
assert_eq!(
format_env_display(Some(&env), &vars),
"HOME=/tmp, TOKEN=$TOKEN"
"HOME=*****, TOKEN=*****"
);
}
}

View File

@@ -134,6 +134,14 @@ impl ModelClient {
self.stream_with_task_kind(prompt, TaskKind::Regular).await
}
pub fn config(&self) -> Arc<Config> {
Arc::clone(&self.config)
}
pub fn provider(&self) -> &ModelProviderInfo {
&self.provider
}
pub(crate) async fn stream_with_task_kind(
&self,
prompt: &Prompt,

View File

@@ -88,6 +88,7 @@ use crate::protocol::Op;
use crate::protocol::RateLimitSnapshot;
use crate::protocol::ReviewDecision;
use crate::protocol::ReviewOutputEvent;
use crate::protocol::SandboxCommandAssessment;
use crate::protocol::SandboxPolicy;
use crate::protocol::SessionConfiguredEvent;
use crate::protocol::StreamErrorEvent;
@@ -569,7 +570,6 @@ impl Session {
// Dispatch the SessionConfiguredEvent first and then report any errors.
// If resuming, include converted initial messages in the payload so UIs can render them immediately.
let initial_messages = initial_history.get_event_msgs();
sess.record_initial_history(initial_history).await;
let events = std::iter::once(Event {
id: INITIAL_SUBMIT_ID.to_owned(),
@@ -588,6 +588,9 @@ impl Session {
sess.send_event_raw(event).await;
}
// record_initial_history can emit events. We record only after the SessionConfiguredEvent is emitted.
sess.record_initial_history(initial_history).await;
Ok(sess)
}
@@ -608,7 +611,7 @@ impl Session {
InitialHistory::New => {
// Build and record initial items (user instructions + environment context)
let items = self.build_initial_context(&turn_context);
self.record_conversation_items(&items).await;
self.record_conversation_items(&turn_context, &items).await;
}
InitialHistory::Resumed(_) | InitialHistory::Forked(_) => {
let rollout_items = conversation_history.get_rollout_items();
@@ -755,6 +758,32 @@ impl Session {
}
}
pub(crate) async fn assess_sandbox_command(
&self,
turn_context: &TurnContext,
call_id: &str,
command: &[String],
failure_message: Option<&str>,
) -> Option<SandboxCommandAssessment> {
let config = turn_context.client.config();
let provider = turn_context.client.provider().clone();
let auth_manager = Arc::clone(&self.services.auth_manager);
let otel = self.services.otel_event_manager.clone();
crate::sandboxing::assessment::assess_command(
config,
provider,
auth_manager,
&otel,
self.conversation_id,
call_id,
command,
&turn_context.sandbox_policy,
&turn_context.cwd,
failure_message,
)
.await
}
/// Emit an exec approval request event and await the user's decision.
///
/// The request is keyed by `sub_id`/`call_id` so matching responses are delivered
@@ -767,6 +796,7 @@ impl Session {
command: Vec<String>,
cwd: PathBuf,
reason: Option<String>,
risk: Option<SandboxCommandAssessment>,
) -> ReviewDecision {
let sub_id = turn_context.sub_id.clone();
// Add the tx_approve callback to the map before sending the request.
@@ -792,6 +822,7 @@ impl Session {
command,
cwd,
reason,
risk,
parsed_cmd,
});
self.send_event(turn_context, event).await;
@@ -857,9 +888,14 @@ impl Session {
/// Records input items: always append to conversation history and
/// persist these response items to rollout.
pub(crate) async fn record_conversation_items(&self, items: &[ResponseItem]) {
pub(crate) async fn record_conversation_items(
&self,
turn_context: &TurnContext,
items: &[ResponseItem],
) {
self.record_into_history(items).await;
self.persist_rollout_response_items(items).await;
self.send_raw_response_items(turn_context, items).await;
}
fn reconstruct_history_from_rollout(
@@ -909,6 +945,13 @@ impl Session {
self.persist_rollout_items(&rollout_items).await;
}
async fn send_raw_response_items(&self, turn_context: &TurnContext, items: &[ResponseItem]) {
for item in items {
self.send_event(turn_context, EventMsg::RawResponseItem(item.clone()))
.await;
}
}
pub(crate) fn build_initial_context(&self, turn_context: &TurnContext) -> Vec<ResponseItem> {
let mut items = Vec::<ResponseItem>::with_capacity(2);
if let Some(user_instructions) = turn_context.user_instructions.as_deref() {
@@ -1004,7 +1047,7 @@ impl Session {
) {
let response_item: ResponseItem = response_input.clone().into();
// Add to conversation history and persist response item to rollout
self.record_conversation_items(std::slice::from_ref(&response_item))
self.record_conversation_items(turn_context, std::slice::from_ref(&response_item))
.await;
// Derive user message events and persist only UserMessage to rollout
@@ -1195,8 +1238,11 @@ async fn submission_loop(sess: Arc<Session>, config: Arc<Config>, rx_sub: Receiv
if let Some(env_item) = sess
.build_environment_update_item(previous_context.as_ref(), &current_context)
{
sess.record_conversation_items(std::slice::from_ref(&env_item))
.await;
sess.record_conversation_items(
&current_context,
std::slice::from_ref(&env_item),
)
.await;
}
sess.spawn_task(Arc::clone(&current_context), items, RegularTask)
@@ -1568,7 +1614,8 @@ pub(crate) async fn run_task(
}
review_thread_history.get_history()
} else {
sess.record_conversation_items(&pending_input).await;
sess.record_conversation_items(&turn_context, &pending_input)
.await;
sess.history_snapshot().await
};
@@ -1615,6 +1662,7 @@ pub(crate) async fn run_task(
is_review_mode,
&mut review_thread_history,
&sess,
&turn_context,
)
.await;
@@ -1663,6 +1711,7 @@ pub(crate) async fn run_task(
is_review_mode,
&mut review_thread_history,
&sess,
&turn_context,
)
.await;
// Aborted turn is reported via a different event.
@@ -2173,11 +2222,14 @@ pub(crate) async fn exit_review_mode(
}
session
.record_conversation_items(&[ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputText { text: user_message }],
}])
.record_conversation_items(
&turn_context,
&[ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputText { text: user_message }],
}],
)
.await;
}
@@ -2772,13 +2824,19 @@ mod tests {
EventMsg::ExitedReviewMode(ev) => assert!(ev.review_output.is_none()),
other => panic!("unexpected first event: {other:?}"),
}
let second = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv())
.await
.expect("timeout waiting for second event")
.expect("second event");
match second.msg {
EventMsg::TurnAborted(e) => assert_eq!(TurnAbortReason::Interrupted, e.reason),
other => panic!("unexpected second event: {other:?}"),
loop {
let evt = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv())
.await
.expect("timeout waiting for next event")
.expect("event");
match evt.msg {
EventMsg::RawResponseItem(_) => continue,
EventMsg::TurnAborted(e) => {
assert_eq!(TurnAbortReason::Interrupted, e.reason);
break;
}
other => panic!("unexpected second event: {other:?}"),
}
}
let history = sess.history_snapshot().await;

View File

@@ -223,6 +223,9 @@ pub struct Config {
pub tools_web_search_request: bool,
/// When `true`, run a model-based assessment for commands denied by the sandbox.
pub experimental_sandbox_command_assessment: bool,
pub use_experimental_streamable_shell_tool: bool,
/// If set to `true`, used only the experimental unified exec tool.
@@ -958,6 +961,7 @@ pub struct ConfigToml {
pub experimental_use_unified_exec_tool: Option<bool>,
pub experimental_use_rmcp_client: Option<bool>,
pub experimental_use_freeform_apply_patch: Option<bool>,
pub experimental_sandbox_command_assessment: Option<bool>,
}
impl From<ConfigToml> for UserSavedConfig {
@@ -1118,6 +1122,7 @@ pub struct ConfigOverrides {
pub include_view_image_tool: Option<bool>,
pub show_raw_agent_reasoning: Option<bool>,
pub tools_web_search_request: Option<bool>,
pub experimental_sandbox_command_assessment: Option<bool>,
/// Additional directories that should be treated as writable roots for this session.
pub additional_writable_roots: Vec<PathBuf>,
}
@@ -1147,6 +1152,7 @@ impl Config {
include_view_image_tool: include_view_image_tool_override,
show_raw_agent_reasoning,
tools_web_search_request: override_tools_web_search_request,
experimental_sandbox_command_assessment: sandbox_command_assessment_override,
additional_writable_roots,
} = overrides;
@@ -1172,6 +1178,7 @@ impl Config {
include_apply_patch_tool: include_apply_patch_tool_override,
include_view_image_tool: include_view_image_tool_override,
web_search_request: override_tools_web_search_request,
experimental_sandbox_command_assessment: sandbox_command_assessment_override,
};
let features = Features::from_config(&cfg, &config_profile, feature_overrides);
@@ -1269,6 +1276,8 @@ impl Config {
let use_experimental_streamable_shell_tool = features.enabled(Feature::StreamableShell);
let use_experimental_unified_exec_tool = features.enabled(Feature::UnifiedExec);
let use_experimental_use_rmcp_client = features.enabled(Feature::RmcpClient);
let experimental_sandbox_command_assessment =
features.enabled(Feature::SandboxCommandAssessment);
let forced_chatgpt_workspace_id =
cfg.forced_chatgpt_workspace_id.as_ref().and_then(|value| {
@@ -1390,6 +1399,7 @@ impl Config {
forced_login_method,
include_apply_patch_tool: include_apply_patch_tool_flag,
tools_web_search_request,
experimental_sandbox_command_assessment,
use_experimental_streamable_shell_tool,
use_experimental_unified_exec_tool,
use_experimental_use_rmcp_client,
@@ -2873,6 +2883,7 @@ model_verbosity = "high"
forced_login_method: None,
include_apply_patch_tool: false,
tools_web_search_request: false,
experimental_sandbox_command_assessment: false,
use_experimental_streamable_shell_tool: false,
use_experimental_unified_exec_tool: false,
use_experimental_use_rmcp_client: false,
@@ -2941,6 +2952,7 @@ model_verbosity = "high"
forced_login_method: None,
include_apply_patch_tool: false,
tools_web_search_request: false,
experimental_sandbox_command_assessment: false,
use_experimental_streamable_shell_tool: false,
use_experimental_unified_exec_tool: false,
use_experimental_use_rmcp_client: false,
@@ -3024,6 +3036,7 @@ model_verbosity = "high"
forced_login_method: None,
include_apply_patch_tool: false,
tools_web_search_request: false,
experimental_sandbox_command_assessment: false,
use_experimental_streamable_shell_tool: false,
use_experimental_unified_exec_tool: false,
use_experimental_use_rmcp_client: false,
@@ -3093,6 +3106,7 @@ model_verbosity = "high"
forced_login_method: None,
include_apply_patch_tool: false,
tools_web_search_request: false,
experimental_sandbox_command_assessment: false,
use_experimental_streamable_shell_tool: false,
use_experimental_unified_exec_tool: false,
use_experimental_use_rmcp_client: false,

View File

@@ -26,6 +26,7 @@ pub struct ConfigProfile {
pub experimental_use_exec_command_tool: Option<bool>,
pub experimental_use_rmcp_client: Option<bool>,
pub experimental_use_freeform_apply_patch: Option<bool>,
pub experimental_sandbox_command_assessment: Option<bool>,
pub tools_web_search: Option<bool>,
pub tools_view_image: Option<bool>,
/// Optional feature toggles scoped to this profile.

View File

@@ -55,7 +55,7 @@ pub enum SandboxErr {
#[derive(Error, Debug)]
pub enum CodexErr {
// todo(aibrahim): git rid of this error carrying the dangling artifacts
#[error("turn aborted")]
#[error("turn aborted. Something went wrong? Hit `/feedback` to report the issue.")]
TurnAborted {
dangling_artifacts: Vec<ProcessedResponseItem>,
},
@@ -91,7 +91,7 @@ pub enum CodexErr {
/// Returned by run_command_stream when the user pressed CtrlC (SIGINT). Session uses this to
/// surface a polite FunctionCallOutput back to the model instead of crashing the CLI.
#[error("interrupted (Ctrl-C)")]
#[error("interrupted (Ctrl-C). Something went wrong? Hit `/feedback` to report the issue.")]
Interrupted,
/// Unexpected HTTP status code.

View File

@@ -39,6 +39,8 @@ pub enum Feature {
ViewImageTool,
/// Allow the model to request web searches.
WebSearchRequest,
/// Enable the model-based risk assessments for sandboxed commands.
SandboxCommandAssessment,
}
impl Feature {
@@ -73,6 +75,7 @@ pub struct FeatureOverrides {
pub include_apply_patch_tool: Option<bool>,
pub include_view_image_tool: Option<bool>,
pub web_search_request: Option<bool>,
pub experimental_sandbox_command_assessment: Option<bool>,
}
impl FeatureOverrides {
@@ -137,6 +140,7 @@ impl Features {
let mut features = Features::with_defaults();
let base_legacy = LegacyFeatureToggles {
experimental_sandbox_command_assessment: cfg.experimental_sandbox_command_assessment,
experimental_use_freeform_apply_patch: cfg.experimental_use_freeform_apply_patch,
experimental_use_exec_command_tool: cfg.experimental_use_exec_command_tool,
experimental_use_unified_exec_tool: cfg.experimental_use_unified_exec_tool,
@@ -154,6 +158,8 @@ impl Features {
let profile_legacy = LegacyFeatureToggles {
include_apply_patch_tool: config_profile.include_apply_patch_tool,
include_view_image_tool: config_profile.include_view_image_tool,
experimental_sandbox_command_assessment: config_profile
.experimental_sandbox_command_assessment,
experimental_use_freeform_apply_patch: config_profile
.experimental_use_freeform_apply_patch,
experimental_use_exec_command_tool: config_profile.experimental_use_exec_command_tool,
@@ -236,4 +242,10 @@ pub const FEATURES: &[FeatureSpec] = &[
stage: Stage::Stable,
default_enabled: false,
},
FeatureSpec {
id: Feature::SandboxCommandAssessment,
key: "experimental_sandbox_command_assessment",
stage: Stage::Experimental,
default_enabled: false,
},
];

View File

@@ -9,6 +9,10 @@ struct Alias {
}
const ALIASES: &[Alias] = &[
Alias {
legacy_key: "experimental_sandbox_command_assessment",
feature: Feature::SandboxCommandAssessment,
},
Alias {
legacy_key: "experimental_use_unified_exec_tool",
feature: Feature::UnifiedExec,
@@ -53,6 +57,7 @@ pub(crate) fn feature_for_key(key: &str) -> Option<Feature> {
pub struct LegacyFeatureToggles {
pub include_apply_patch_tool: Option<bool>,
pub include_view_image_tool: Option<bool>,
pub experimental_sandbox_command_assessment: Option<bool>,
pub experimental_use_freeform_apply_patch: Option<bool>,
pub experimental_use_exec_command_tool: Option<bool>,
pub experimental_use_unified_exec_tool: Option<bool>,
@@ -69,6 +74,12 @@ impl LegacyFeatureToggles {
self.include_apply_patch_tool,
"include_apply_patch_tool",
);
set_if_some(
features,
Feature::SandboxCommandAssessment,
self.experimental_sandbox_command_assessment,
"experimental_sandbox_command_assessment",
);
set_if_some(
features,
Feature::ApplyPatchFreeform,

View File

@@ -1,4 +1,5 @@
use crate::codex::Session;
use crate::codex::TurnContext;
use crate::conversation_history::ConversationHistory;
use codex_protocol::models::FunctionCallOutputPayload;
use codex_protocol::models::ResponseInputItem;
@@ -13,6 +14,7 @@ pub(crate) async fn process_items(
is_review_mode: bool,
review_thread_history: &mut ConversationHistory,
sess: &Session,
turn_context: &TurnContext,
) -> (Vec<ResponseInputItem>, Vec<ResponseItem>) {
let mut items_to_record_in_conversation_history = Vec::<ResponseItem>::new();
let mut responses = Vec::<ResponseInputItem>::new();
@@ -104,7 +106,7 @@ pub(crate) async fn process_items(
if is_review_mode {
review_thread_history.record_items(items_to_record_in_conversation_history.iter());
} else {
sess.record_conversation_items(&items_to_record_in_conversation_history)
sess.record_conversation_items(turn_context, &items_to_record_in_conversation_history)
.await;
}
}

View File

@@ -50,6 +50,7 @@ pub(crate) fn should_persist_event_msg(ev: &EventMsg) -> bool {
| EventMsg::AgentReasoningDelta(_)
| EventMsg::AgentReasoningRawContentDelta(_)
| EventMsg::AgentReasoningSectionBreak(_)
| EventMsg::RawResponseItem(_)
| EventMsg::SessionConfigured(_)
| EventMsg::McpToolCallBegin(_)
| EventMsg::McpToolCallEnd(_)

View File

@@ -0,0 +1,275 @@
use std::path::Path;
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Duration;
use std::time::Instant;
use crate::AuthManager;
use crate::ModelProviderInfo;
use crate::client::ModelClient;
use crate::client_common::Prompt;
use crate::client_common::ResponseEvent;
use crate::config::Config;
use crate::protocol::SandboxPolicy;
use askama::Template;
use codex_otel::otel_event_manager::OtelEventManager;
use codex_protocol::ConversationId;
use codex_protocol::models::ContentItem;
use codex_protocol::models::ResponseItem;
use codex_protocol::protocol::SandboxCommandAssessment;
use futures::StreamExt;
use serde_json::json;
use tokio::time::timeout;
use tracing::warn;
const SANDBOX_ASSESSMENT_TIMEOUT: Duration = Duration::from_secs(5);
const SANDBOX_RISK_CATEGORY_VALUES: &[&str] = &[
"data_deletion",
"data_exfiltration",
"privilege_escalation",
"system_modification",
"network_access",
"resource_exhaustion",
"compliance",
];
#[derive(Template)]
#[template(path = "sandboxing/assessment_prompt.md", escape = "none")]
struct SandboxAssessmentPromptTemplate<'a> {
platform: &'a str,
sandbox_policy: &'a str,
filesystem_roots: Option<&'a str>,
working_directory: &'a str,
command_argv: &'a str,
command_joined: &'a str,
sandbox_failure_message: Option<&'a str>,
}
#[allow(clippy::too_many_arguments)]
pub(crate) async fn assess_command(
config: Arc<Config>,
provider: ModelProviderInfo,
auth_manager: Arc<AuthManager>,
parent_otel: &OtelEventManager,
conversation_id: ConversationId,
call_id: &str,
command: &[String],
sandbox_policy: &SandboxPolicy,
cwd: &Path,
failure_message: Option<&str>,
) -> Option<SandboxCommandAssessment> {
if !config.experimental_sandbox_command_assessment || command.is_empty() {
return None;
}
let command_json = serde_json::to_string(command).unwrap_or_else(|_| "[]".to_string());
let command_joined =
shlex::try_join(command.iter().map(String::as_str)).unwrap_or_else(|_| command.join(" "));
let failure = failure_message
.map(str::trim)
.filter(|msg| !msg.is_empty())
.map(str::to_string);
let cwd_str = cwd.to_string_lossy().to_string();
let sandbox_summary = summarize_sandbox_policy(sandbox_policy);
let mut roots = sandbox_roots_for_prompt(sandbox_policy, cwd);
roots.sort();
roots.dedup();
let platform = std::env::consts::OS;
let roots_formatted = roots.iter().map(|root| root.to_string_lossy().to_string());
let filesystem_roots = match roots_formatted.collect::<Vec<_>>() {
collected if collected.is_empty() => None,
collected => Some(collected.join(", ")),
};
let prompt_template = SandboxAssessmentPromptTemplate {
platform,
sandbox_policy: sandbox_summary.as_str(),
filesystem_roots: filesystem_roots.as_deref(),
working_directory: cwd_str.as_str(),
command_argv: command_json.as_str(),
command_joined: command_joined.as_str(),
sandbox_failure_message: failure.as_deref(),
};
let rendered_prompt = match prompt_template.render() {
Ok(rendered) => rendered,
Err(err) => {
warn!("failed to render sandbox assessment prompt: {err}");
return None;
}
};
let (system_prompt_section, user_prompt_section) = match rendered_prompt.split_once("\n---\n") {
Some(split) => split,
None => {
warn!("rendered sandbox assessment prompt missing separator");
return None;
}
};
let system_prompt = system_prompt_section
.strip_prefix("System Prompt:\n")
.unwrap_or(system_prompt_section)
.trim()
.to_string();
let user_prompt = user_prompt_section
.strip_prefix("User Prompt:\n")
.unwrap_or(user_prompt_section)
.trim()
.to_string();
let prompt = Prompt {
input: vec![ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputText { text: user_prompt }],
}],
tools: Vec::new(),
parallel_tool_calls: false,
base_instructions_override: Some(system_prompt),
output_schema: Some(sandbox_assessment_schema()),
};
let child_otel =
parent_otel.with_model(config.model.as_str(), config.model_family.slug.as_str());
let client = ModelClient::new(
Arc::clone(&config),
Some(auth_manager),
child_otel,
provider,
config.model_reasoning_effort,
config.model_reasoning_summary,
conversation_id,
);
let start = Instant::now();
let assessment_result = timeout(SANDBOX_ASSESSMENT_TIMEOUT, async move {
let mut stream = client.stream(&prompt).await?;
let mut last_json: Option<String> = None;
while let Some(event) = stream.next().await {
match event {
Ok(ResponseEvent::OutputItemDone(item)) => {
if let Some(text) = response_item_text(&item) {
last_json = Some(text);
}
}
Ok(ResponseEvent::RateLimits(_)) => {}
Ok(ResponseEvent::Completed { .. }) => break,
Ok(_) => continue,
Err(err) => return Err(err),
}
}
Ok(last_json)
})
.await;
let duration = start.elapsed();
parent_otel.sandbox_assessment_latency(call_id, duration);
match assessment_result {
Ok(Ok(Some(raw))) => match serde_json::from_str::<SandboxCommandAssessment>(raw.trim()) {
Ok(assessment) => {
parent_otel.sandbox_assessment(
call_id,
"success",
Some(assessment.risk_level),
&assessment.risk_categories,
duration,
);
return Some(assessment);
}
Err(err) => {
warn!("failed to parse sandbox assessment JSON: {err}");
parent_otel.sandbox_assessment(call_id, "parse_error", None, &[], duration);
}
},
Ok(Ok(None)) => {
warn!("sandbox assessment response did not include any message");
parent_otel.sandbox_assessment(call_id, "no_output", None, &[], duration);
}
Ok(Err(err)) => {
warn!("sandbox assessment failed: {err}");
parent_otel.sandbox_assessment(call_id, "model_error", None, &[], duration);
}
Err(_) => {
warn!("sandbox assessment timed out");
parent_otel.sandbox_assessment(call_id, "timeout", None, &[], duration);
}
}
None
}
fn summarize_sandbox_policy(policy: &SandboxPolicy) -> String {
match policy {
SandboxPolicy::DangerFullAccess => "danger-full-access".to_string(),
SandboxPolicy::ReadOnly => "read-only".to_string(),
SandboxPolicy::WorkspaceWrite { network_access, .. } => {
let network = if *network_access {
"network"
} else {
"no-network"
};
format!("workspace-write (network_access={network})")
}
}
}
fn sandbox_roots_for_prompt(policy: &SandboxPolicy, cwd: &Path) -> Vec<PathBuf> {
let mut roots = vec![cwd.to_path_buf()];
if let SandboxPolicy::WorkspaceWrite { writable_roots, .. } = policy {
roots.extend(writable_roots.iter().cloned());
}
roots
}
fn sandbox_assessment_schema() -> serde_json::Value {
json!({
"type": "object",
"required": ["description", "risk_level", "risk_categories"],
"properties": {
"description": {
"type": "string",
"minLength": 1,
"maxLength": 500
},
"risk_level": {
"type": "string",
"enum": ["low", "medium", "high"]
},
"risk_categories": {
"type": "array",
"items": {
"type": "string",
"enum": SANDBOX_RISK_CATEGORY_VALUES
}
}
},
"additionalProperties": false
})
}
fn response_item_text(item: &ResponseItem) -> Option<String> {
match item {
ResponseItem::Message { content, .. } => {
let mut buffers: Vec<&str> = Vec::new();
for segment in content {
match segment {
ContentItem::InputText { text } | ContentItem::OutputText { text } => {
if !text.is_empty() {
buffers.push(text);
}
}
ContentItem::InputImage { .. } => {}
}
}
if buffers.is_empty() {
None
} else {
Some(buffers.join("\n"))
}
}
ResponseItem::FunctionCallOutput { output, .. } => Some(output.content.clone()),
_ => None,
}
}

View File

@@ -5,6 +5,9 @@ Build platform wrappers and produce ExecEnv for execution. Owns lowlevel
sandbox placement and transformation of portable CommandSpec into a
readytospawn environment.
*/
pub mod assessment;
use crate::exec::ExecToolCallOutput;
use crate::exec::SandboxType;
use crate::exec::StdoutStream;

View File

@@ -7,9 +7,11 @@ retry without sandbox on denial (no reapproval thanks to caching).
*/
use crate::error::CodexErr;
use crate::error::SandboxErr;
use crate::error::get_error_message_ui;
use crate::exec::ExecToolCallOutput;
use crate::sandboxing::SandboxManager;
use crate::tools::sandboxing::ApprovalCtx;
use crate::tools::sandboxing::ProvidesSandboxRetryData;
use crate::tools::sandboxing::SandboxAttempt;
use crate::tools::sandboxing::ToolCtx;
use crate::tools::sandboxing::ToolError;
@@ -38,6 +40,7 @@ impl ToolOrchestrator {
) -> Result<Out, ToolError>
where
T: ToolRuntime<Rq, Out>,
Rq: ProvidesSandboxRetryData,
{
let otel = turn_ctx.client.get_otel_event_manager();
let otel_tn = &tool_ctx.tool_name;
@@ -56,6 +59,7 @@ impl ToolOrchestrator {
turn: turn_ctx,
call_id: &tool_ctx.call_id,
retry_reason: None,
risk: None,
};
let decision = tool.start_approval_async(req, approval_ctx).await;
@@ -107,12 +111,33 @@ impl ToolOrchestrator {
// Ask for approval before retrying without sandbox.
if !tool.should_bypass_approval(approval_policy, already_approved) {
let mut risk = None;
if let Some(metadata) = req.sandbox_retry_data() {
let err = SandboxErr::Denied {
output: output.clone(),
};
let friendly = get_error_message_ui(&CodexErr::Sandbox(err));
let failure_summary = format!("failed in sandbox: {friendly}");
risk = tool_ctx
.session
.assess_sandbox_command(
turn_ctx,
&tool_ctx.call_id,
&metadata.command,
Some(failure_summary.as_str()),
)
.await;
}
let reason_msg = build_denial_reason_from_output(output.as_ref());
let approval_ctx = ApprovalCtx {
session: tool_ctx.session,
turn: turn_ctx,
call_id: &tool_ctx.call_id,
retry_reason: Some(reason_msg),
risk,
};
let decision = tool.start_approval_async(req, approval_ctx).await;

View File

@@ -10,7 +10,9 @@ use crate::sandboxing::CommandSpec;
use crate::sandboxing::execute_env;
use crate::tools::sandboxing::Approvable;
use crate::tools::sandboxing::ApprovalCtx;
use crate::tools::sandboxing::ProvidesSandboxRetryData;
use crate::tools::sandboxing::SandboxAttempt;
use crate::tools::sandboxing::SandboxRetryData;
use crate::tools::sandboxing::Sandboxable;
use crate::tools::sandboxing::SandboxablePreference;
use crate::tools::sandboxing::ToolCtx;
@@ -32,6 +34,12 @@ pub struct ApplyPatchRequest {
pub codex_exe: Option<PathBuf>,
}
impl ProvidesSandboxRetryData for ApplyPatchRequest {
fn sandbox_retry_data(&self) -> Option<SandboxRetryData> {
None
}
}
#[derive(Default)]
pub struct ApplyPatchRuntime;
@@ -106,9 +114,10 @@ impl Approvable<ApplyPatchRequest> for ApplyPatchRuntime {
let call_id = ctx.call_id.to_string();
let cwd = req.cwd.clone();
let retry_reason = ctx.retry_reason.clone();
let risk = ctx.risk.clone();
let user_explicitly_approved = req.user_explicitly_approved;
Box::pin(async move {
with_cached_approval(&session.services, key, || async move {
with_cached_approval(&session.services, key, move || async move {
if let Some(reason) = retry_reason {
session
.request_command_approval(
@@ -117,6 +126,7 @@ impl Approvable<ApplyPatchRequest> for ApplyPatchRuntime {
vec!["apply_patch".to_string()],
cwd,
Some(reason),
risk,
)
.await
} else if user_explicitly_approved {

View File

@@ -12,7 +12,9 @@ use crate::sandboxing::execute_env;
use crate::tools::runtimes::build_command_spec;
use crate::tools::sandboxing::Approvable;
use crate::tools::sandboxing::ApprovalCtx;
use crate::tools::sandboxing::ProvidesSandboxRetryData;
use crate::tools::sandboxing::SandboxAttempt;
use crate::tools::sandboxing::SandboxRetryData;
use crate::tools::sandboxing::Sandboxable;
use crate::tools::sandboxing::SandboxablePreference;
use crate::tools::sandboxing::ToolCtx;
@@ -34,6 +36,15 @@ pub struct ShellRequest {
pub justification: Option<String>,
}
impl ProvidesSandboxRetryData for ShellRequest {
fn sandbox_retry_data(&self) -> Option<SandboxRetryData> {
Some(SandboxRetryData {
command: self.command.clone(),
cwd: self.cwd.clone(),
})
}
}
#[derive(Default)]
pub struct ShellRuntime;
@@ -90,13 +101,14 @@ impl Approvable<ShellRequest> for ShellRuntime {
.retry_reason
.clone()
.or_else(|| req.justification.clone());
let risk = ctx.risk.clone();
let session = ctx.session;
let turn = ctx.turn;
let call_id = ctx.call_id.to_string();
Box::pin(async move {
with_cached_approval(&session.services, key, || async move {
with_cached_approval(&session.services, key, move || async move {
session
.request_command_approval(turn, call_id, command, cwd, reason)
.request_command_approval(turn, call_id, command, cwd, reason, risk)
.await
})
.await

View File

@@ -9,7 +9,9 @@ use crate::error::SandboxErr;
use crate::tools::runtimes::build_command_spec;
use crate::tools::sandboxing::Approvable;
use crate::tools::sandboxing::ApprovalCtx;
use crate::tools::sandboxing::ProvidesSandboxRetryData;
use crate::tools::sandboxing::SandboxAttempt;
use crate::tools::sandboxing::SandboxRetryData;
use crate::tools::sandboxing::Sandboxable;
use crate::tools::sandboxing::SandboxablePreference;
use crate::tools::sandboxing::ToolCtx;
@@ -31,6 +33,15 @@ pub struct UnifiedExecRequest {
pub env: HashMap<String, String>,
}
impl ProvidesSandboxRetryData for UnifiedExecRequest {
fn sandbox_retry_data(&self) -> Option<SandboxRetryData> {
Some(SandboxRetryData {
command: self.command.clone(),
cwd: self.cwd.clone(),
})
}
}
#[derive(serde::Serialize, Clone, Debug, Eq, PartialEq, Hash)]
pub struct UnifiedExecApprovalKey {
pub command: Vec<String>,
@@ -85,10 +96,11 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
let command = req.command.clone();
let cwd = req.cwd.clone();
let reason = ctx.retry_reason.clone();
let risk = ctx.risk.clone();
Box::pin(async move {
with_cached_approval(&session.services, key, || async move {
session
.request_command_approval(turn, call_id, command, cwd, reason)
.request_command_approval(turn, call_id, command, cwd, reason, risk)
.await
})
.await

View File

@@ -7,6 +7,7 @@
use crate::codex::Session;
use crate::codex::TurnContext;
use crate::error::CodexErr;
use crate::protocol::SandboxCommandAssessment;
use crate::protocol::SandboxPolicy;
use crate::sandboxing::CommandSpec;
use crate::sandboxing::SandboxManager;
@@ -18,6 +19,7 @@ use std::collections::HashMap;
use std::fmt::Debug;
use std::hash::Hash;
use std::path::Path;
use std::path::PathBuf;
use futures::Future;
use futures::future::BoxFuture;
@@ -81,6 +83,7 @@ pub(crate) struct ApprovalCtx<'a> {
pub turn: &'a TurnContext,
pub call_id: &'a str,
pub retry_reason: Option<String>,
pub risk: Option<SandboxCommandAssessment>,
}
pub(crate) trait Approvable<Req> {
@@ -156,6 +159,17 @@ pub(crate) struct ToolCtx<'a> {
pub tool_name: String,
}
/// Captures the command metadata needed to re-run a tool request without sandboxing.
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) struct SandboxRetryData {
pub command: Vec<String>,
pub cwd: PathBuf,
}
pub(crate) trait ProvidesSandboxRetryData {
fn sandbox_retry_data(&self) -> Option<SandboxRetryData>;
}
#[derive(Debug)]
pub(crate) enum ToolError {
Rejected(String),

View File

@@ -0,0 +1,27 @@
You are a security analyst evaluating shell commands that were blocked by a sandbox. Given the provided metadata, summarize the command's likely intent and assess the risk. Return strictly valid JSON with the keys:
- description (concise summary, at most two sentences)
- risk_level ("low", "medium", or "high")
- risk_categories (optional array of zero or more category strings)
Risk level examples:
- low: read-only inspections, listing files, printing configuration
- medium: modifying project files, installing dependencies, fetching artifacts from trusted sources
- high: deleting or overwriting data, exfiltrating secrets, escalating privileges, or disabling security controls
Recognized risk_categories: data_deletion, data_exfiltration, privilege_escalation, system_modification, network_access, resource_exhaustion, compliance.
Use multiple categories when appropriate.
If information is insufficient, choose the most cautious risk level supported by the evidence.
Respond with JSON only, without markdown code fences or extra commentary.
---
Command metadata:
Platform: {{ platform }}
Sandbox policy: {{ sandbox_policy }}
{% if let Some(roots) = filesystem_roots %}
Filesystem roots: {{ roots }}
{% endif %}
Working directory: {{ working_directory }}
Command argv: {{ command_argv }}
Command (joined): {{ command_joined }}
{% if let Some(message) = sandbox_failure_message %}
Sandbox failure message: {{ message }}
{% endif %}

View File

@@ -519,6 +519,7 @@ impl EventProcessor for EventProcessorWithHumanOutput {
EventMsg::AgentReasoningRawContentDelta(_) => {}
EventMsg::ItemStarted(_) => {}
EventMsg::ItemCompleted(_) => {}
EventMsg::RawResponseItem(_) => {}
}
CodexStatus::Running
}

View File

@@ -179,6 +179,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
include_view_image_tool: None,
show_raw_agent_reasoning: oss.then_some(true),
tools_web_search_request: None,
experimental_sandbox_command_assessment: None,
additional_writable_roots: Vec::new(),
};
// Parse `-c` overrides.

View File

@@ -167,8 +167,17 @@ impl CodexLogSnapshot {
Ok(path)
}
pub fn upload_to_sentry(&self) -> Result<()> {
/// Upload feedback to Sentry with optional attachments.
pub fn upload_feedback(
&self,
classification: &str,
reason: Option<&str>,
cli_version: &str,
include_logs: bool,
rollout_path: Option<&std::path::Path>,
) -> Result<()> {
use std::collections::BTreeMap;
use std::fs;
use std::str::FromStr;
use std::sync::Arc;
@@ -182,36 +191,90 @@ impl CodexLogSnapshot {
use sentry::transports::DefaultTransportFactory;
use sentry::types::Dsn;
// Build Sentry client
let client = Client::from_config(ClientOptions {
dsn: Some(Dsn::from_str(SENTRY_DSN).map_err(|e| anyhow!("invalid DSN: {}", e))?),
dsn: Some(Dsn::from_str(SENTRY_DSN).map_err(|e| anyhow!("invalid DSN: {e}"))?),
transport: Some(Arc::new(DefaultTransportFactory {})),
..Default::default()
});
let tags = BTreeMap::from([(String::from("thread_id"), self.thread_id.to_string())]);
let mut tags = BTreeMap::from([
(String::from("thread_id"), self.thread_id.to_string()),
(String::from("classification"), classification.to_string()),
(String::from("cli_version"), cli_version.to_string()),
]);
if let Some(r) = reason {
tags.insert(String::from("reason"), r.to_string());
}
let event = Event {
level: Level::Error,
message: Some("Codex Log Upload ".to_string() + &self.thread_id),
let level = match classification {
"bug" | "bad_result" => Level::Error,
_ => Level::Info,
};
let mut envelope = Envelope::new();
let title = format!(
"[{}]: Codex session {}",
display_classification(classification),
self.thread_id
);
let mut event = Event {
level,
message: Some(title.clone()),
tags,
..Default::default()
};
let mut envelope = Envelope::new();
if let Some(r) = reason {
use sentry::protocol::Exception;
use sentry::protocol::Values;
event.exception = Values::from(vec![Exception {
ty: title.clone(),
value: Some(r.to_string()),
..Default::default()
}]);
}
envelope.add_item(EnvelopeItem::Event(event));
envelope.add_item(EnvelopeItem::Attachment(Attachment {
buffer: self.bytes.clone(),
filename: String::from("codex-logs.log"),
content_type: Some("text/plain".to_string()),
ty: None,
}));
if include_logs {
envelope.add_item(EnvelopeItem::Attachment(Attachment {
buffer: self.bytes.clone(),
filename: String::from("codex-logs.log"),
content_type: Some("text/plain".to_string()),
ty: None,
}));
}
if let Some((path, data)) = rollout_path.and_then(|p| fs::read(p).ok().map(|d| (p, d))) {
let fname = path
.file_name()
.map(|s| s.to_string_lossy().to_string())
.unwrap_or_else(|| "rollout.jsonl".to_string());
let content_type = "text/plain".to_string();
envelope.add_item(EnvelopeItem::Attachment(Attachment {
buffer: data,
filename: fname,
content_type: Some(content_type),
ty: None,
}));
}
client.send_envelope(envelope);
client.flush(Some(Duration::from_secs(UPLOAD_TIMEOUT_SECS)));
Ok(())
}
}
fn display_classification(classification: &str) -> String {
match classification {
"bug" => "Bug".to_string(),
"bad_result" => "Bad result".to_string(),
"good_result" => "Good result".to_string(),
_ => "Other".to_string(),
}
}
#[cfg(test)]
mod tests {
use super::*;

View File

@@ -158,6 +158,7 @@ impl CodexToolCallParam {
include_view_image_tool: None,
show_raw_agent_reasoning: None,
tools_web_search_request: None,
experimental_sandbox_command_assessment: None,
additional_writable_roots: Vec::new(),
};

View File

@@ -178,6 +178,7 @@ async fn run_codex_tool_session_inner(
cwd,
call_id,
reason: _,
risk,
parsed_cmd,
}) => {
handle_exec_approval_request(
@@ -190,6 +191,7 @@ async fn run_codex_tool_session_inner(
event.id.clone(),
call_id,
parsed_cmd,
risk,
)
.await;
continue;
@@ -283,6 +285,7 @@ async fn run_codex_tool_session_inner(
| EventMsg::UserMessage(_)
| EventMsg::ShutdownComplete
| EventMsg::ViewImageToolCall(_)
| EventMsg::RawResponseItem(_)
| EventMsg::EnteredReviewMode(_)
| EventMsg::ItemStarted(_)
| EventMsg::ItemCompleted(_)

View File

@@ -4,6 +4,7 @@ use std::sync::Arc;
use codex_core::CodexConversation;
use codex_core::protocol::Op;
use codex_core::protocol::ReviewDecision;
use codex_core::protocol::SandboxCommandAssessment;
use codex_protocol::parse_command::ParsedCommand;
use mcp_types::ElicitRequest;
use mcp_types::ElicitRequestParamsRequestedSchema;
@@ -37,6 +38,8 @@ pub struct ExecApprovalElicitRequestParams {
pub codex_command: Vec<String>,
pub codex_cwd: PathBuf,
pub codex_parsed_cmd: Vec<ParsedCommand>,
#[serde(skip_serializing_if = "Option::is_none")]
pub codex_risk: Option<SandboxCommandAssessment>,
}
// TODO(mbolin): ExecApprovalResponse does not conform to ElicitResult. See:
@@ -59,6 +62,7 @@ pub(crate) async fn handle_exec_approval_request(
event_id: String,
call_id: String,
codex_parsed_cmd: Vec<ParsedCommand>,
codex_risk: Option<SandboxCommandAssessment>,
) {
let escaped_command =
shlex::try_join(command.iter().map(String::as_str)).unwrap_or_else(|_| command.join(" "));
@@ -81,6 +85,7 @@ pub(crate) async fn handle_exec_approval_request(
codex_command: command,
codex_cwd: cwd,
codex_parsed_cmd,
codex_risk,
};
let params_json = match serde_json::to_value(&params) {
Ok(value) => value,

View File

@@ -196,6 +196,7 @@ fn create_expected_elicitation_request(
codex_cwd: workdir.to_path_buf(),
codex_call_id: "call1234".to_string(),
codex_parsed_cmd,
codex_risk: None,
})?),
})
}

View File

@@ -8,6 +8,8 @@ use codex_protocol::models::ResponseItem;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::ReviewDecision;
use codex_protocol::protocol::SandboxPolicy;
use codex_protocol::protocol::SandboxRiskCategory;
use codex_protocol::protocol::SandboxRiskLevel;
use codex_protocol::user_input::UserInput;
use eventsource_stream::Event as StreamEvent;
use eventsource_stream::EventStreamError as StreamError;
@@ -366,6 +368,63 @@ impl OtelEventManager {
);
}
pub fn sandbox_assessment(
&self,
call_id: &str,
status: &str,
risk_level: Option<SandboxRiskLevel>,
risk_categories: &[SandboxRiskCategory],
duration: Duration,
) {
let level = risk_level.map(|level| level.as_str());
let categories = if risk_categories.is_empty() {
String::new()
} else {
risk_categories
.iter()
.map(SandboxRiskCategory::as_str)
.collect::<Vec<_>>()
.join(", ")
};
tracing::event!(
tracing::Level::INFO,
event.name = "codex.sandbox_assessment",
event.timestamp = %timestamp(),
conversation.id = %self.metadata.conversation_id,
app.version = %self.metadata.app_version,
auth_mode = self.metadata.auth_mode,
user.account_id = self.metadata.account_id,
user.email = self.metadata.account_email,
terminal.type = %self.metadata.terminal_type,
model = %self.metadata.model,
slug = %self.metadata.slug,
call_id = %call_id,
status = %status,
risk_level = level,
risk_categories = categories,
duration_ms = %duration.as_millis(),
);
}
pub fn sandbox_assessment_latency(&self, call_id: &str, duration: Duration) {
tracing::event!(
tracing::Level::INFO,
event.name = "codex.sandbox_assessment_latency",
event.timestamp = %timestamp(),
conversation.id = %self.metadata.conversation_id,
app.version = %self.metadata.app_version,
auth_mode = self.metadata.auth_mode,
user.account_id = self.metadata.account_id,
user.email = self.metadata.account_email,
terminal.type = %self.metadata.terminal_type,
model = %self.metadata.model,
slug = %self.metadata.slug,
call_id = %call_id,
duration_ms = %duration.as_millis(),
);
}
pub async fn log_tool_result<F, Fut, E>(
&self,
tool_name: &str,

View File

@@ -0,0 +1,91 @@
use std::collections::HashMap;
use std::path::PathBuf;
use crate::parse_command::ParsedCommand;
use crate::protocol::FileChange;
use schemars::JsonSchema;
use serde::Deserialize;
use serde::Serialize;
use ts_rs::TS;
#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Hash, JsonSchema, TS)]
#[serde(rename_all = "snake_case")]
pub enum SandboxRiskLevel {
Low,
Medium,
High,
}
#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Hash, JsonSchema, TS)]
#[serde(rename_all = "snake_case")]
pub enum SandboxRiskCategory {
DataDeletion,
DataExfiltration,
PrivilegeEscalation,
SystemModification,
NetworkAccess,
ResourceExhaustion,
Compliance,
}
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS)]
pub struct SandboxCommandAssessment {
pub description: String,
pub risk_level: SandboxRiskLevel,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub risk_categories: Vec<SandboxRiskCategory>,
}
impl SandboxRiskLevel {
pub fn as_str(&self) -> &'static str {
match self {
Self::Low => "low",
Self::Medium => "medium",
Self::High => "high",
}
}
}
impl SandboxRiskCategory {
pub fn as_str(&self) -> &'static str {
match self {
Self::DataDeletion => "data_deletion",
Self::DataExfiltration => "data_exfiltration",
Self::PrivilegeEscalation => "privilege_escalation",
Self::SystemModification => "system_modification",
Self::NetworkAccess => "network_access",
Self::ResourceExhaustion => "resource_exhaustion",
Self::Compliance => "compliance",
}
}
}
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct ExecApprovalRequestEvent {
/// Identifier for the associated exec call, if available.
pub call_id: String,
/// The command to be executed.
pub command: Vec<String>,
/// The command's working directory.
pub cwd: PathBuf,
/// Optional human-readable reason for the approval (e.g. retry without sandbox).
#[serde(skip_serializing_if = "Option::is_none")]
pub reason: Option<String>,
/// Optional model-provided risk assessment describing the blocked command.
#[serde(skip_serializing_if = "Option::is_none")]
pub risk: Option<SandboxCommandAssessment>,
pub parsed_cmd: Vec<ParsedCommand>,
}
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct ApplyPatchApprovalRequestEvent {
/// Responses API call id for the associated patch apply call, if available.
pub call_id: String,
pub changes: HashMap<PathBuf, FileChange>,
/// Optional explanatory reason (e.g. request for extra write access).
#[serde(skip_serializing_if = "Option::is_none")]
pub reason: Option<String>,
/// When set, the agent is asking the user to allow writes under this root for the remainder of the session.
#[serde(skip_serializing_if = "Option::is_none")]
pub grant_root: Option<PathBuf>,
}

View File

@@ -1,6 +1,7 @@
pub mod account;
mod conversation_id;
pub use conversation_id::ConversationId;
pub mod approvals;
pub mod config_types;
pub mod custom_prompts;
pub mod items;

View File

@@ -34,6 +34,12 @@ use serde_with::serde_as;
use strum_macros::Display;
use ts_rs::TS;
pub use crate::approvals::ApplyPatchApprovalRequestEvent;
pub use crate::approvals::ExecApprovalRequestEvent;
pub use crate::approvals::SandboxCommandAssessment;
pub use crate::approvals::SandboxRiskCategory;
pub use crate::approvals::SandboxRiskLevel;
/// Open/close tags for special user-input blocks. Used across crates to avoid
/// duplicated hardcoded strings.
pub const USER_INSTRUCTIONS_OPEN_TAG: &str = "<user_instructions>";
@@ -521,6 +527,8 @@ pub enum EventMsg {
/// Exited review mode with an optional final result to apply.
ExitedReviewMode(ExitedReviewModeEvent),
RawResponseItem(ResponseItem),
ItemStarted(ItemStartedEvent),
ItemCompleted(ItemCompletedEvent),
}
@@ -1126,33 +1134,6 @@ pub struct ExecCommandOutputDeltaEvent {
pub chunk: Vec<u8>,
}
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct ExecApprovalRequestEvent {
/// Identifier for the associated exec call, if available.
pub call_id: String,
/// The command to be executed.
pub command: Vec<String>,
/// The command's working directory.
pub cwd: PathBuf,
/// Optional human-readable reason for the approval (e.g. retry without sandbox).
#[serde(skip_serializing_if = "Option::is_none")]
pub reason: Option<String>,
pub parsed_cmd: Vec<ParsedCommand>,
}
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct ApplyPatchApprovalRequestEvent {
/// Responses API call id for the associated patch apply call, if available.
pub call_id: String,
pub changes: HashMap<PathBuf, FileChange>,
/// Optional explanatory reason (e.g. request for extra write access).
#[serde(skip_serializing_if = "Option::is_none")]
pub reason: Option<String>,
/// When set, the agent is asking the user to allow writes under this root for the remainder of the session.
#[serde(skip_serializing_if = "Option::is_none")]
pub grant_root: Option<PathBuf>,
}
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct BackgroundEventEvent {
pub message: String,

View File

@@ -360,6 +360,15 @@ impl App {
AppEvent::OpenFullAccessConfirmation { preset } => {
self.chat_widget.open_full_access_confirmation(preset);
}
AppEvent::OpenFeedbackNote {
category,
include_logs,
} => {
self.chat_widget.open_feedback_note(category, include_logs);
}
AppEvent::OpenFeedbackConsent { category } => {
self.chat_widget.open_feedback_consent(category);
}
AppEvent::PersistModelSelection { model, effort } => {
let profile = self.active_profile.as_deref();
match persist_model_selection(&self.config.codex_home, profile, &model, effort)

View File

@@ -101,4 +101,23 @@ pub(crate) enum AppEvent {
/// Open the approval popup.
FullScreenApprovalRequest(ApprovalRequest),
/// Open the feedback note entry overlay after the user selects a category.
OpenFeedbackNote {
category: FeedbackCategory,
include_logs: bool,
},
/// Open the upload consent popup for feedback after selecting a category.
OpenFeedbackConsent {
category: FeedbackCategory,
},
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum FeedbackCategory {
BadResult,
GoodResult,
Bug,
Other,
}

View File

@@ -19,6 +19,9 @@ use crate::render::renderable::Renderable;
use codex_core::protocol::FileChange;
use codex_core::protocol::Op;
use codex_core::protocol::ReviewDecision;
use codex_core::protocol::SandboxCommandAssessment;
use codex_core::protocol::SandboxRiskCategory;
use codex_core::protocol::SandboxRiskLevel;
use crossterm::event::KeyCode;
use crossterm::event::KeyEvent;
use crossterm::event::KeyEventKind;
@@ -38,6 +41,7 @@ pub(crate) enum ApprovalRequest {
id: String,
command: Vec<String>,
reason: Option<String>,
risk: Option<SandboxCommandAssessment>,
},
ApplyPatch {
id: String,
@@ -285,12 +289,17 @@ impl From<ApprovalRequest> for ApprovalRequestState {
id,
command,
reason,
risk,
} => {
let reason = reason.filter(|item| !item.is_empty());
let has_reason = reason.is_some();
let mut header: Vec<Line<'static>> = Vec::new();
if let Some(reason) = reason
&& !reason.is_empty()
{
if let Some(reason) = reason {
header.push(Line::from(vec!["Reason: ".into(), reason.italic()]));
}
if let Some(risk) = risk.as_ref() {
header.extend(render_risk_lines(risk));
} else if has_reason {
header.push(Line::from(""));
}
let full_cmd = strip_bash_lc_and_escape(&command);
@@ -330,6 +339,52 @@ impl From<ApprovalRequest> for ApprovalRequestState {
}
}
fn render_risk_lines(risk: &SandboxCommandAssessment) -> Vec<Line<'static>> {
let level_span = match risk.risk_level {
SandboxRiskLevel::Low => "LOW".green().bold(),
SandboxRiskLevel::Medium => "MEDIUM".cyan().bold(),
SandboxRiskLevel::High => "HIGH".red().bold(),
};
let mut lines = Vec::new();
let description = risk.description.trim();
if !description.is_empty() {
lines.push(Line::from(vec![
"Summary: ".into(),
description.to_string().into(),
]));
}
let mut spans: Vec<Span<'static>> = vec!["Risk: ".into(), level_span];
if !risk.risk_categories.is_empty() {
spans.push(" (".into());
for (idx, category) in risk.risk_categories.iter().enumerate() {
if idx > 0 {
spans.push(", ".into());
}
spans.push(risk_category_label(*category).into());
}
spans.push(")".into());
}
lines.push(Line::from(spans));
lines.push(Line::from(""));
lines
}
fn risk_category_label(category: SandboxRiskCategory) -> &'static str {
match category {
SandboxRiskCategory::DataDeletion => "data deletion",
SandboxRiskCategory::DataExfiltration => "data exfiltration",
SandboxRiskCategory::PrivilegeEscalation => "privilege escalation",
SandboxRiskCategory::SystemModification => "system modification",
SandboxRiskCategory::NetworkAccess => "network access",
SandboxRiskCategory::ResourceExhaustion => "resource exhaustion",
SandboxRiskCategory::Compliance => "compliance",
}
}
#[derive(Clone)]
enum ApprovalVariant {
Exec { id: String, command: Vec<String> },
@@ -404,6 +459,7 @@ mod tests {
id: "test".to_string(),
command: vec!["echo".to_string(), "hi".to_string()],
reason: Some("reason".to_string()),
risk: None,
}
}
@@ -445,6 +501,7 @@ mod tests {
id: "test".into(),
command,
reason: None,
risk: None,
};
let view = ApprovalOverlay::new(exec_request, tx);

View File

@@ -1,165 +1,448 @@
use crate::app_event::AppEvent;
use crate::app_event_sender::AppEventSender;
use crate::history_cell;
use crate::history_cell::PlainHistoryCell;
use crate::render::renderable::Renderable;
use std::cell::RefCell;
use std::path::PathBuf;
use crossterm::event::KeyCode;
use crossterm::event::KeyEvent;
use crossterm::event::KeyModifiers;
use ratatui::buffer::Buffer;
use ratatui::layout::Rect;
use ratatui::style::Stylize;
use ratatui::text::Line;
use std::path::PathBuf;
use ratatui::text::Span;
use ratatui::widgets::Clear;
use ratatui::widgets::Paragraph;
use ratatui::widgets::StatefulWidgetRef;
use ratatui::widgets::Widget;
use super::BottomPane;
use super::SelectionAction;
use super::SelectionItem;
use super::SelectionViewParams;
use crate::app_event::AppEvent;
use crate::app_event::FeedbackCategory;
use crate::app_event_sender::AppEventSender;
use crate::history_cell;
use crate::render::renderable::Renderable;
use super::CancellationEvent;
use super::bottom_pane_view::BottomPaneView;
use super::popup_consts::standard_popup_hint_line;
use super::textarea::TextArea;
use super::textarea::TextAreaState;
const BASE_ISSUE_URL: &str = "https://github.com/openai/codex/issues/new?template=2-bug-report.yml";
pub(crate) struct FeedbackView;
/// Minimal input overlay to collect an optional feedback note, then upload
/// both logs and rollout with classification + metadata.
pub(crate) struct FeedbackNoteView {
category: FeedbackCategory,
snapshot: codex_feedback::CodexLogSnapshot,
rollout_path: Option<PathBuf>,
app_event_tx: AppEventSender,
include_logs: bool,
impl FeedbackView {
pub fn show(
bottom_pane: &mut BottomPane,
file_path: PathBuf,
// UI state
textarea: TextArea,
textarea_state: RefCell<TextAreaState>,
complete: bool,
}
impl FeedbackNoteView {
pub(crate) fn new(
category: FeedbackCategory,
snapshot: codex_feedback::CodexLogSnapshot,
) {
bottom_pane.show_selection_view(Self::selection_params(file_path, snapshot));
rollout_path: Option<PathBuf>,
app_event_tx: AppEventSender,
include_logs: bool,
) -> Self {
Self {
category,
snapshot,
rollout_path,
app_event_tx,
include_logs,
textarea: TextArea::new(),
textarea_state: RefCell::new(TextAreaState::default()),
complete: false,
}
}
fn selection_params(
file_path: PathBuf,
snapshot: codex_feedback::CodexLogSnapshot,
) -> SelectionViewParams {
let header = FeedbackHeader::new(file_path);
fn submit(&mut self) {
let note = self.textarea.text().trim().to_string();
let reason_opt = if note.is_empty() {
None
} else {
Some(note.as_str())
};
let rollout_path_ref = self.rollout_path.as_deref();
let classification = feedback_classification(self.category);
let thread_id = snapshot.thread_id.clone();
let cli_version = crate::version::CODEX_CLI_VERSION;
let mut thread_id = self.snapshot.thread_id.clone();
let upload_action_tread_id = thread_id.clone();
let upload_action: SelectionAction = Box::new(move |tx: &AppEventSender| {
match snapshot.upload_to_sentry() {
Ok(()) => {
let issue_url = format!(
"{BASE_ISSUE_URL}&steps=Uploaded%20thread:%20{upload_action_tread_id}",
);
tx.send(AppEvent::InsertHistoryCell(Box::new(PlainHistoryCell::new(vec![
Line::from(
"• Codex logs uploaded. Please open an issue using the following URL:",
),
let result = self.snapshot.upload_feedback(
classification,
reason_opt,
cli_version,
self.include_logs,
if self.include_logs {
rollout_path_ref
} else {
None
},
);
match result {
Ok(()) => {
let issue_url = format!("{BASE_ISSUE_URL}&steps=Uploaded%20thread:%20{thread_id}");
let prefix = if self.include_logs {
"• Feedback uploaded."
} else {
"• Feedback recorded (no logs)."
};
self.app_event_tx.send(AppEvent::InsertHistoryCell(Box::new(
history_cell::PlainHistoryCell::new(vec![
Line::from(format!(
"{prefix} Please open an issue using the following URL:"
)),
"".into(),
Line::from(vec![" ".into(), issue_url.cyan().underlined()]),
"".into(),
Line::from(vec![" Or mention your thread ID ".into(), upload_action_tread_id.clone().bold(), " in an existing issue.".into()])
]))));
}
Err(e) => {
tx.send(AppEvent::InsertHistoryCell(Box::new(
history_cell::new_error_event(format!("Failed to upload logs: {e}")),
)));
}
}
});
let upload_item = SelectionItem {
name: "Yes".to_string(),
description: Some(
"Share the current Codex session logs with the team for troubleshooting."
.to_string(),
),
actions: vec![upload_action],
dismiss_on_select: true,
..Default::default()
};
let no_action: SelectionAction = Box::new(move |tx: &AppEventSender| {
let issue_url = format!("{BASE_ISSUE_URL}&steps=Thread%20ID:%20{thread_id}",);
tx.send(AppEvent::InsertHistoryCell(Box::new(
PlainHistoryCell::new(vec![
Line::from("• Please open an issue using the following URL:"),
"".into(),
Line::from(vec![" ".into(), issue_url.cyan().underlined()]),
"".into(),
Line::from(vec![
" Or mention your thread ID ".into(),
thread_id.clone().bold(),
" in an existing issue.".into(),
Line::from(vec![
" Or mention your thread ID ".into(),
std::mem::take(&mut thread_id).bold(),
" in an existing issue.".into(),
]),
]),
]),
)));
});
)));
}
Err(e) => {
self.app_event_tx.send(AppEvent::InsertHistoryCell(Box::new(
history_cell::new_error_event(format!("Failed to upload feedback: {e}")),
)));
}
}
self.complete = true;
}
}
let no_item = SelectionItem {
name: "No".to_string(),
actions: vec![no_action],
dismiss_on_select: true,
..Default::default()
};
let cancel_item = SelectionItem {
name: "Cancel".to_string(),
dismiss_on_select: true,
..Default::default()
};
SelectionViewParams {
header: Box::new(header),
items: vec![upload_item, no_item, cancel_item],
..Default::default()
impl BottomPaneView for FeedbackNoteView {
fn handle_key_event(&mut self, key_event: KeyEvent) {
match key_event {
KeyEvent {
code: KeyCode::Esc, ..
} => {
self.on_ctrl_c();
}
KeyEvent {
code: KeyCode::Enter,
modifiers: KeyModifiers::NONE,
..
} => {
self.submit();
}
KeyEvent {
code: KeyCode::Enter,
..
} => {
self.textarea.input(key_event);
}
other => {
self.textarea.input(other);
}
}
}
}
struct FeedbackHeader {
file_path: PathBuf,
}
impl FeedbackHeader {
fn new(file_path: PathBuf) -> Self {
Self { file_path }
fn on_ctrl_c(&mut self) -> CancellationEvent {
self.complete = true;
CancellationEvent::Handled
}
fn lines(&self) -> Vec<Line<'static>> {
vec![
Line::from("Do you want to upload logs before reporting issue?".bold()),
"".into(),
Line::from(
"Logs may include the full conversation history of this Codex process, including prompts, tool calls, and their results.",
),
Line::from(
"These logs are retained for 90 days and are used solely for troubleshooting and diagnostic purposes.",
),
"".into(),
Line::from(vec![
"You can review the exact content of the logs before theyre uploaded at:".into(),
]),
Line::from(self.file_path.display().to_string().dim()),
"".into(),
]
fn is_complete(&self) -> bool {
self.complete
}
fn handle_paste(&mut self, pasted: String) -> bool {
if pasted.is_empty() {
return false;
}
self.textarea.insert_str(&pasted);
true
}
fn cursor_pos(&self, area: Rect) -> Option<(u16, u16)> {
if area.height < 2 || area.width <= 2 {
return None;
}
let text_area_height = self.input_height(area.width).saturating_sub(1);
if text_area_height == 0 {
return None;
}
let top_line_count = 1u16; // title only
let textarea_rect = Rect {
x: area.x.saturating_add(2),
y: area.y.saturating_add(top_line_count).saturating_add(1),
width: area.width.saturating_sub(2),
height: text_area_height,
};
let state = *self.textarea_state.borrow();
self.textarea.cursor_pos_with_state(textarea_rect, state)
}
}
impl Renderable for FeedbackHeader {
impl Renderable for FeedbackNoteView {
fn desired_height(&self, width: u16) -> u16 {
1u16 + self.input_height(width) + 3u16
}
fn render(&self, area: Rect, buf: &mut Buffer) {
if area.width == 0 || area.height == 0 {
if area.height == 0 || area.width == 0 {
return;
}
for (i, line) in self.lines().into_iter().enumerate() {
let y = area.y.saturating_add(i as u16);
if y >= area.y.saturating_add(area.height) {
break;
let (title, placeholder) = feedback_title_and_placeholder(self.category);
let input_height = self.input_height(area.width);
// Title line
let title_area = Rect {
x: area.x,
y: area.y,
width: area.width,
height: 1,
};
let title_spans: Vec<Span<'static>> = vec![gutter(), title.bold()];
Paragraph::new(Line::from(title_spans)).render(title_area, buf);
// Input line
let input_area = Rect {
x: area.x,
y: area.y.saturating_add(1),
width: area.width,
height: input_height,
};
if input_area.width >= 2 {
for row in 0..input_area.height {
Paragraph::new(Line::from(vec![gutter()])).render(
Rect {
x: input_area.x,
y: input_area.y.saturating_add(row),
width: 2,
height: 1,
},
buf,
);
}
let line_area = Rect::new(area.x, y, area.width, 1).intersection(area);
line.render(line_area, buf);
let text_area_height = input_area.height.saturating_sub(1);
if text_area_height > 0 {
if input_area.width > 2 {
let blank_rect = Rect {
x: input_area.x.saturating_add(2),
y: input_area.y,
width: input_area.width.saturating_sub(2),
height: 1,
};
Clear.render(blank_rect, buf);
}
let textarea_rect = Rect {
x: input_area.x.saturating_add(2),
y: input_area.y.saturating_add(1),
width: input_area.width.saturating_sub(2),
height: text_area_height,
};
let mut state = self.textarea_state.borrow_mut();
StatefulWidgetRef::render_ref(&(&self.textarea), textarea_rect, buf, &mut state);
if self.textarea.text().is_empty() {
Paragraph::new(Line::from(placeholder.dim())).render(textarea_rect, buf);
}
}
}
let hint_blank_y = input_area.y.saturating_add(input_height);
if hint_blank_y < area.y.saturating_add(area.height) {
let blank_area = Rect {
x: area.x,
y: hint_blank_y,
width: area.width,
height: 1,
};
Clear.render(blank_area, buf);
}
let hint_y = hint_blank_y.saturating_add(1);
if hint_y < area.y.saturating_add(area.height) {
Paragraph::new(standard_popup_hint_line()).render(
Rect {
x: area.x,
y: hint_y,
width: area.width,
height: 1,
},
buf,
);
}
}
}
fn desired_height(&self, width: u16) -> u16 {
self.lines()
.iter()
.map(|line| line.desired_height(width))
.sum()
impl FeedbackNoteView {
fn input_height(&self, width: u16) -> u16 {
let usable_width = width.saturating_sub(2);
let text_height = self.textarea.desired_height(usable_width).clamp(1, 8);
text_height.saturating_add(1).min(9)
}
}
fn gutter() -> Span<'static> {
"".cyan()
}
fn feedback_title_and_placeholder(category: FeedbackCategory) -> (String, String) {
match category {
FeedbackCategory::BadResult => (
"Tell us more (bad result)".to_string(),
"(optional) Write a short description to help us further".to_string(),
),
FeedbackCategory::GoodResult => (
"Tell us more (good result)".to_string(),
"(optional) Write a short description to help us further".to_string(),
),
FeedbackCategory::Bug => (
"Tell us more (bug)".to_string(),
"(optional) Write a short description to help us further".to_string(),
),
FeedbackCategory::Other => (
"Tell us more (other)".to_string(),
"(optional) Write a short description to help us further".to_string(),
),
}
}
fn feedback_classification(category: FeedbackCategory) -> &'static str {
match category {
FeedbackCategory::BadResult => "bad_result",
FeedbackCategory::GoodResult => "good_result",
FeedbackCategory::Bug => "bug",
FeedbackCategory::Other => "other",
}
}
// Build the selection popup params for feedback categories.
pub(crate) fn feedback_selection_params(
app_event_tx: AppEventSender,
) -> super::SelectionViewParams {
super::SelectionViewParams {
title: Some("How was this?".to_string()),
items: vec![
make_feedback_item(
app_event_tx.clone(),
"bug",
"Crash, error message, hang, or broken UI/behavior.",
FeedbackCategory::Bug,
),
make_feedback_item(
app_event_tx.clone(),
"bad result",
"Output was off-target, incorrect, incomplete, or unhelpful.",
FeedbackCategory::BadResult,
),
make_feedback_item(
app_event_tx.clone(),
"good result",
"Helpful, correct, highquality, or delightful result worth celebrating.",
FeedbackCategory::GoodResult,
),
make_feedback_item(
app_event_tx,
"other",
"Slowness, feature suggestion, UX feedback, or anything else.",
FeedbackCategory::Other,
),
],
..Default::default()
}
}
fn make_feedback_item(
app_event_tx: AppEventSender,
name: &str,
description: &str,
category: FeedbackCategory,
) -> super::SelectionItem {
let action: super::SelectionAction = Box::new(move |_sender: &AppEventSender| {
app_event_tx.send(AppEvent::OpenFeedbackConsent { category });
});
super::SelectionItem {
name: name.to_string(),
description: Some(description.to_string()),
actions: vec![action],
dismiss_on_select: true,
..Default::default()
}
}
/// Build the upload consent popup params for a given feedback category.
pub(crate) fn feedback_upload_consent_params(
app_event_tx: AppEventSender,
category: FeedbackCategory,
rollout_path: Option<std::path::PathBuf>,
) -> super::SelectionViewParams {
use super::popup_consts::standard_popup_hint_line;
let yes_action: super::SelectionAction = Box::new({
let tx = app_event_tx.clone();
move |sender: &AppEventSender| {
let _ = sender;
tx.send(AppEvent::OpenFeedbackNote {
category,
include_logs: true,
});
}
});
let no_action: super::SelectionAction = Box::new({
let tx = app_event_tx;
move |sender: &AppEventSender| {
let _ = sender;
tx.send(AppEvent::OpenFeedbackNote {
category,
include_logs: false,
});
}
});
// Build header listing files that would be sent if user consents.
let mut header_lines: Vec<Box<dyn crate::render::renderable::Renderable>> = vec![
Line::from("Upload logs?".bold()).into(),
Line::from("").into(),
Line::from("The following files will be sent:".dim()).into(),
Line::from(vec!["".into(), "codex-logs.log".into()]).into(),
];
if let Some(path) = rollout_path.as_deref()
&& let Some(name) = path.file_name().map(|s| s.to_string_lossy().to_string())
{
header_lines.push(Line::from(vec!["".into(), name.into()]).into());
}
super::SelectionViewParams {
footer_hint: Some(standard_popup_hint_line()),
items: vec![
super::SelectionItem {
name: "Yes".to_string(),
description: Some(
"Share the current Codex session logs with the team for troubleshooting."
.to_string(),
),
actions: vec![yes_action],
dismiss_on_select: true,
..Default::default()
},
super::SelectionItem {
name: "No".to_string(),
description: Some("".to_string()),
actions: vec![no_action],
dismiss_on_select: true,
..Default::default()
},
],
header: Box::new(crate::render::renderable::ColumnRenderable::with(
header_lines,
)),
..Default::default()
}
}
@@ -167,22 +450,19 @@ impl Renderable for FeedbackHeader {
mod tests {
use super::*;
use crate::app_event::AppEvent;
use crate::bottom_pane::list_selection_view::ListSelectionView;
use crate::style::user_message_style;
use codex_feedback::CodexFeedback;
use codex_protocol::ConversationId;
use insta::assert_snapshot;
use ratatui::buffer::Buffer;
use ratatui::layout::Rect;
use ratatui::style::Color;
use tokio::sync::mpsc::unbounded_channel;
use crate::app_event_sender::AppEventSender;
fn buffer_to_string(buffer: &Buffer) -> String {
(0..buffer.area.height)
fn render(view: &FeedbackNoteView, width: u16) -> String {
let height = view.desired_height(width);
let area = Rect::new(0, 0, width, height);
let mut buf = Buffer::empty(area);
view.render(area, &mut buf);
let mut lines: Vec<String> = (0..area.height)
.map(|row| {
let mut line = String::new();
for col in 0..buffer.area.width {
let symbol = buffer[(buffer.area.x + col, buffer.area.y + row)].symbol();
for col in 0..area.width {
let symbol = buf[(area.x + col, area.y + row)].symbol();
if symbol.is_empty() {
line.push(' ');
} else {
@@ -191,34 +471,49 @@ mod tests {
}
line.trim_end().to_string()
})
.collect::<Vec<_>>()
.join("\n")
.collect();
while lines.first().is_some_and(|l| l.trim().is_empty()) {
lines.remove(0);
}
while lines.last().is_some_and(|l| l.trim().is_empty()) {
lines.pop();
}
lines.join("\n")
}
fn make_view(category: FeedbackCategory) -> FeedbackNoteView {
let (tx_raw, _rx) = tokio::sync::mpsc::unbounded_channel::<AppEvent>();
let tx = AppEventSender::new(tx_raw);
let snapshot = codex_feedback::CodexFeedback::new().snapshot(None);
FeedbackNoteView::new(category, snapshot, None, tx, true)
}
#[test]
fn renders_feedback_view_header() {
let (tx_raw, _rx) = unbounded_channel::<AppEvent>();
let app_event_tx = AppEventSender::new(tx_raw);
let snapshot = CodexFeedback::new().snapshot(Some(
ConversationId::from_string("550e8400-e29b-41d4-a716-446655440000").unwrap(),
));
let file_path = PathBuf::from("/tmp/codex-feedback.log");
fn feedback_view_bad_result() {
let view = make_view(FeedbackCategory::BadResult);
let rendered = render(&view, 60);
insta::assert_snapshot!("feedback_view_bad_result", rendered);
}
let params = FeedbackView::selection_params(file_path.clone(), snapshot);
let view = ListSelectionView::new(params, app_event_tx);
#[test]
fn feedback_view_good_result() {
let view = make_view(FeedbackCategory::GoodResult);
let rendered = render(&view, 60);
insta::assert_snapshot!("feedback_view_good_result", rendered);
}
let width = 72;
let height = view.desired_height(width).max(1);
let area = Rect::new(0, 0, width, height);
let mut buf = Buffer::empty(area);
view.render(area, &mut buf);
#[test]
fn feedback_view_bug() {
let view = make_view(FeedbackCategory::Bug);
let rendered = render(&view, 60);
insta::assert_snapshot!("feedback_view_bug", rendered);
}
let rendered =
buffer_to_string(&buf).replace(&file_path.display().to_string(), "<LOG_PATH>");
assert_snapshot!("feedback_view_render", rendered);
let cell_style = buf[(area.x, area.y)].style();
let expected_bg = user_message_style().bg.unwrap_or(Color::Reset);
assert_eq!(cell_style.bg.unwrap_or(Color::Reset), expected_bg);
#[test]
fn feedback_view_other() {
let view = make_view(FeedbackCategory::Other);
let rendered = render(&view, 60);
insta::assert_snapshot!("feedback_view_other", rendered);
}
}

View File

@@ -28,12 +28,14 @@ mod list_selection_view;
mod prompt_args;
pub(crate) use list_selection_view::SelectionViewParams;
mod feedback_view;
pub(crate) use feedback_view::feedback_selection_params;
pub(crate) use feedback_view::feedback_upload_consent_params;
mod paste_burst;
pub mod popup_consts;
mod scroll_state;
mod selection_popup_common;
mod textarea;
pub(crate) use feedback_view::FeedbackView;
pub(crate) use feedback_view::FeedbackNoteView;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum CancellationEvent {
@@ -557,6 +559,7 @@ mod tests {
id: "1".to_string(),
command: vec!["echo".into(), "ok".into()],
reason: None,
risk: None,
}
}

View File

@@ -0,0 +1,9 @@
---
source: tui/src/bottom_pane/feedback_view.rs
expression: rendered
---
▌ Tell us more (bad result)
▌ (optional) Write a short description to help us further
Press enter to confirm or esc to go back

View File

@@ -0,0 +1,9 @@
---
source: tui/src/bottom_pane/feedback_view.rs
expression: rendered
---
▌ Tell us more (bug)
▌ (optional) Write a short description to help us further
Press enter to confirm or esc to go back

View File

@@ -0,0 +1,9 @@
---
source: tui/src/bottom_pane/feedback_view.rs
expression: rendered
---
▌ Tell us more (good result)
▌ (optional) Write a short description to help us further
Press enter to confirm or esc to go back

View File

@@ -0,0 +1,9 @@
---
source: tui/src/bottom_pane/feedback_view.rs
expression: rendered
---
▌ Tell us more (other)
▌ (optional) Write a short description to help us further
Press enter to confirm or esc to go back

View File

@@ -276,6 +276,8 @@ pub(crate) struct ChatWidget {
last_rendered_width: std::cell::Cell<Option<usize>>,
// Feedback sink for /feedback
feedback: codex_feedback::CodexFeedback,
// Current session rollout path (if known)
current_rollout_path: Option<PathBuf>,
}
struct UserMessage {
@@ -322,6 +324,7 @@ impl ChatWidget {
self.bottom_pane
.set_history_metadata(event.history_log_id, event.history_entry_count);
self.conversation_id = Some(event.session_id);
self.current_rollout_path = Some(event.rollout_path.clone());
let initial_messages = event.initial_messages.clone();
let model_for_header = event.model.clone();
self.session_header.set_model(&model_for_header);
@@ -343,6 +346,39 @@ impl ChatWidget {
}
}
pub(crate) fn open_feedback_note(
&mut self,
category: crate::app_event::FeedbackCategory,
include_logs: bool,
) {
// Build a fresh snapshot at the time of opening the note overlay.
let snapshot = self.feedback.snapshot(self.conversation_id);
let rollout = if include_logs {
self.current_rollout_path.clone()
} else {
None
};
let view = crate::bottom_pane::FeedbackNoteView::new(
category,
snapshot,
rollout,
self.app_event_tx.clone(),
include_logs,
);
self.bottom_pane.show_view(Box::new(view));
self.request_redraw();
}
pub(crate) fn open_feedback_consent(&mut self, category: crate::app_event::FeedbackCategory) {
let params = crate::bottom_pane::feedback_upload_consent_params(
self.app_event_tx.clone(),
category,
self.current_rollout_path.clone(),
);
self.bottom_pane.show_selection_view(params);
self.request_redraw();
}
fn on_agent_message(&mut self, message: String) {
// If we have a stream_controller, then the final agent message is redundant and will be a
// duplicate of what has already been streamed.
@@ -496,7 +532,7 @@ impl ChatWidget {
if reason != TurnAbortReason::ReviewEnded {
self.add_to_history(history_cell::new_error_event(
"Conversation interrupted - tell the model what to do differently".to_owned(),
"Conversation interrupted - tell the model what to do differently. Something went wrong? Hit `/feedback` to report the issue.".to_owned(),
));
}
@@ -777,6 +813,7 @@ impl ChatWidget {
id,
command: ev.command,
reason: ev.reason,
risk: ev.risk,
};
self.bottom_pane.push_approval_request(request);
self.request_redraw();
@@ -957,6 +994,7 @@ impl ChatWidget {
needs_final_message_separator: false,
last_rendered_width: std::cell::Cell::new(None),
feedback,
current_rollout_path: None,
}
}
@@ -1024,6 +1062,7 @@ impl ChatWidget {
needs_final_message_separator: false,
last_rendered_width: std::cell::Cell::new(None),
feedback,
current_rollout_path: None,
}
}
@@ -1128,23 +1167,11 @@ impl ChatWidget {
}
match cmd {
SlashCommand::Feedback => {
let snapshot = self.feedback.snapshot(self.conversation_id);
match snapshot.save_to_temp_file() {
Ok(path) => {
crate::bottom_pane::FeedbackView::show(
&mut self.bottom_pane,
path,
snapshot,
);
self.request_redraw();
}
Err(e) => {
self.add_to_history(history_cell::new_error_event(format!(
"Failed to save feedback logs: {e}"
)));
self.request_redraw();
}
}
// Step 1: pick a category (UI built in feedback_view)
let params =
crate::bottom_pane::feedback_selection_params(self.app_event_tx.clone());
self.bottom_pane.show_selection_view(params);
self.request_redraw();
}
SlashCommand::New => {
self.app_event_tx.send(AppEvent::NewSession);
@@ -1497,7 +1524,9 @@ impl ChatWidget {
self.on_entered_review_mode(review_request)
}
EventMsg::ExitedReviewMode(review) => self.on_exited_review_mode(review),
EventMsg::ItemStarted(_) | EventMsg::ItemCompleted(_) => {}
EventMsg::RawResponseItem(_)
| EventMsg::ItemStarted(_)
| EventMsg::ItemCompleted(_) => {}
}
}

View File

@@ -0,0 +1,11 @@
---
source: tui/src/chatwidget/tests.rs
expression: popup
---
How was this?
1. bug Crash, error message, hang, or broken UI/behavior.
2. bad result Output was off-target, incorrect, incomplete, or unhelpful.
3. good result Helpful, correct, highquality, or delightful result worth
celebrating.
4. other Slowness, feature suggestion, UX feedback, or anything else.

View File

@@ -0,0 +1,14 @@
---
source: tui/src/chatwidget/tests.rs
expression: popup
---
Upload logs?
The following files will be sent:
• codex-logs.log
1. Yes Share the current Codex session logs with the team for
troubleshooting.
2. No
Press enter to confirm or esc to go back

View File

@@ -0,0 +1,5 @@
---
source: tui/src/chatwidget/tests.rs
expression: last
---
■ Conversation interrupted - tell the model what to do differently. Something went wrong? Hit `/feedback` to report the issue.

View File

@@ -299,6 +299,7 @@ fn make_chatwidget_manual() -> (
needs_final_message_separator: false,
last_rendered_width: std::cell::Cell::new(None),
feedback: codex_feedback::CodexFeedback::new(),
current_rollout_path: None,
};
(widget, rx, op_rx)
}
@@ -402,6 +403,7 @@ fn exec_approval_emits_proposed_command_and_decision_history() {
reason: Some(
"this is a test reason such as one that would be produced by the model".into(),
),
risk: None,
parsed_cmd: vec![],
};
chat.handle_codex_event(Event {
@@ -444,6 +446,7 @@ fn exec_approval_decision_truncates_multiline_and_long_commands() {
reason: Some(
"this is a test reason such as one that would be produced by the model".into(),
),
risk: None,
parsed_cmd: vec![],
};
chat.handle_codex_event(Event {
@@ -492,6 +495,7 @@ fn exec_approval_decision_truncates_multiline_and_long_commands() {
command: vec!["bash".into(), "-lc".into(), long],
cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")),
reason: None,
risk: None,
parsed_cmd: vec![],
};
chat.handle_codex_event(Event {
@@ -995,6 +999,37 @@ fn interrupt_exec_marks_failed_snapshot() {
assert_snapshot!("interrupt_exec_marks_failed", exec_blob);
}
// Snapshot test: after an interrupted turn, a gentle error message is inserted
// suggesting the user to tell the model what to do differently and to use /feedback.
#[test]
fn interrupted_turn_error_message_snapshot() {
let (mut chat, mut rx, _op_rx) = make_chatwidget_manual();
// Simulate an in-progress task so the widget is in a running state.
chat.handle_codex_event(Event {
id: "task-1".into(),
msg: EventMsg::TaskStarted(TaskStartedEvent {
model_context_window: None,
}),
});
// Abort the turn (like pressing Esc) and drain inserted history.
chat.handle_codex_event(Event {
id: "task-1".into(),
msg: EventMsg::TurnAborted(codex_core::protocol::TurnAbortedEvent {
reason: TurnAbortReason::Interrupted,
}),
});
let cells = drain_insert_history(&mut rx);
assert!(
!cells.is_empty(),
"expected error message to be inserted after interruption"
);
let last = lines_to_single_string(cells.last().unwrap());
assert_snapshot!("interrupted_turn_error_message", last);
}
/// Opening custom prompt from the review popup, pressing Esc returns to the
/// parent popup, pressing Esc again dismisses all panels (back to normal mode).
#[test]
@@ -1172,6 +1207,28 @@ fn model_reasoning_selection_popup_snapshot() {
assert_snapshot!("model_reasoning_selection_popup", popup);
}
#[test]
fn feedback_selection_popup_snapshot() {
let (mut chat, _rx, _op_rx) = make_chatwidget_manual();
// Open the feedback category selection popup via slash command.
chat.dispatch_command(SlashCommand::Feedback);
let popup = render_bottom_popup(&chat, 80);
assert_snapshot!("feedback_selection_popup", popup);
}
#[test]
fn feedback_upload_consent_popup_snapshot() {
let (mut chat, _rx, _op_rx) = make_chatwidget_manual();
// Open the consent popup directly for a chosen category.
chat.open_feedback_consent(crate::app_event::FeedbackCategory::Bug);
let popup = render_bottom_popup(&chat, 80);
assert_snapshot!("feedback_upload_consent_popup", popup);
}
#[test]
fn reasoning_popup_escape_returns_to_model_popup() {
let (mut chat, _rx, _op_rx) = make_chatwidget_manual();
@@ -1421,6 +1478,7 @@ fn approval_modal_exec_snapshot() {
reason: Some(
"this is a test reason such as one that would be produced by the model".into(),
),
risk: None,
parsed_cmd: vec![],
};
chat.handle_codex_event(Event {
@@ -1465,6 +1523,7 @@ fn approval_modal_exec_without_reason_snapshot() {
command: vec!["bash".into(), "-lc".into(), "echo hello world".into()],
cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")),
reason: None,
risk: None,
parsed_cmd: vec![],
};
chat.handle_codex_event(Event {
@@ -1675,6 +1734,7 @@ fn status_widget_and_approval_modal_snapshot() {
reason: Some(
"this is a test reason such as one that would be produced by the model".into(),
),
risk: None,
parsed_cmd: vec![],
};
chat.handle_codex_event(Event {

View File

@@ -1047,7 +1047,10 @@ pub(crate) fn new_mcp_tools_output(
return PlainHistoryCell { lines };
}
for (server, cfg) in config.mcp_servers.iter() {
let mut servers: Vec<_> = config.mcp_servers.iter().collect();
servers.sort_by(|(a, _), (b, _)| a.cmp(b));
for (server, cfg) in servers {
let prefix = format!("mcp__{server}__");
let mut names: Vec<String> = tools
.keys()
@@ -1111,7 +1114,7 @@ pub(crate) fn new_mcp_tools_output(
pairs.sort_by(|(a, _), (b, _)| a.cmp(b));
let display = pairs
.into_iter()
.map(|(name, value)| format!("{name}={value}"))
.map(|(name, _)| format!("{name}=*****"))
.collect::<Vec<_>>()
.join(", ");
lines.push(vec![" • HTTP headers: ".into(), display.into()].into());
@@ -1123,7 +1126,7 @@ pub(crate) fn new_mcp_tools_output(
pairs.sort_by(|(a, _), (b, _)| a.cmp(b));
let display = pairs
.into_iter()
.map(|(name, env_var)| format!("{name}={env_var}"))
.map(|(name, var)| format!("{name}={var}"))
.collect::<Vec<_>>()
.join(", ");
lines.push(vec![" • Env HTTP headers: ".into(), display.into()].into());
@@ -1415,14 +1418,20 @@ mod tests {
use codex_core::config::Config;
use codex_core::config::ConfigOverrides;
use codex_core::config::ConfigToml;
use codex_core::config_types::McpServerConfig;
use codex_core::config_types::McpServerTransportConfig;
use codex_core::protocol::McpAuthStatus;
use codex_protocol::parse_command::ParsedCommand;
use dirs::home_dir;
use pretty_assertions::assert_eq;
use serde_json::json;
use std::collections::HashMap;
use mcp_types::CallToolResult;
use mcp_types::ContentBlock;
use mcp_types::TextContent;
use mcp_types::Tool;
use mcp_types::ToolInputSchema;
fn test_config() -> Config {
Config::load_from_base_config_with_overrides(
@@ -1449,6 +1458,91 @@ mod tests {
render_lines(&cell.transcript_lines(u16::MAX))
}
#[test]
fn mcp_tools_output_masks_sensitive_values() {
let mut config = test_config();
let mut env = HashMap::new();
env.insert("TOKEN".to_string(), "secret".to_string());
let stdio_config = McpServerConfig {
transport: McpServerTransportConfig::Stdio {
command: "docs-server".to_string(),
args: vec![],
env: Some(env),
env_vars: vec!["APP_TOKEN".to_string()],
cwd: None,
},
enabled: true,
startup_timeout_sec: None,
tool_timeout_sec: None,
enabled_tools: None,
disabled_tools: None,
};
config.mcp_servers.insert("docs".to_string(), stdio_config);
let mut headers = HashMap::new();
headers.insert("Authorization".to_string(), "Bearer secret".to_string());
let mut env_headers = HashMap::new();
env_headers.insert("X-API-Key".to_string(), "API_KEY_ENV".to_string());
let http_config = McpServerConfig {
transport: McpServerTransportConfig::StreamableHttp {
url: "https://example.com/mcp".to_string(),
bearer_token_env_var: Some("MCP_TOKEN".to_string()),
http_headers: Some(headers),
env_http_headers: Some(env_headers),
},
enabled: true,
startup_timeout_sec: None,
tool_timeout_sec: None,
enabled_tools: None,
disabled_tools: None,
};
config.mcp_servers.insert("http".to_string(), http_config);
let mut tools: HashMap<String, Tool> = HashMap::new();
tools.insert(
"mcp__docs__list".to_string(),
Tool {
annotations: None,
description: None,
input_schema: ToolInputSchema {
properties: None,
required: None,
r#type: "object".to_string(),
},
name: "list".to_string(),
output_schema: None,
title: None,
},
);
tools.insert(
"mcp__http__ping".to_string(),
Tool {
annotations: None,
description: None,
input_schema: ToolInputSchema {
properties: None,
required: None,
r#type: "object".to_string(),
},
name: "ping".to_string(),
output_schema: None,
title: None,
},
);
let auth_statuses: HashMap<String, McpAuthStatus> = HashMap::new();
let cell = new_mcp_tools_output(
&config,
tools,
HashMap::new(),
HashMap::new(),
&auth_statuses,
);
let rendered = render_lines(&cell.display_lines(120)).join("\n");
insta::assert_snapshot!(rendered);
}
#[test]
fn empty_agent_message_cell_transcript() {
let cell = AgentMessageCell::new(vec![Line::default()], false);

View File

@@ -148,6 +148,7 @@ pub async fn run_main(
include_view_image_tool: None,
show_raw_agent_reasoning: cli.oss.then_some(true),
tools_web_search_request: cli.web_search.then_some(true),
experimental_sandbox_command_assessment: None,
additional_writable_roots: additional_dirs,
};
let raw_overrides = cli.config_overrides.raw_overrides.clone();

View File

@@ -0,0 +1,27 @@
---
source: tui/src/history_cell.rs
assertion_line: 1540
expression: rendered
---
/mcp
🔌 MCP Tools
• docs
• Status: enabled
• Auth: Unsupported
• Command: docs-server
• Env: TOKEN=*****, APP_TOKEN=*****
• Tools: list
• Resources: (none)
• Resource templates: (none)
• http
• Status: enabled
• Auth: Unsupported
• URL: https://example.com/mcp
• HTTP headers: Authorization=*****
• Env HTTP headers: X-API-Key=API_KEY_ENV
• Tools: ping
• Resources: (none)
• Resource templates: (none)