Compare commits

..

1 Commits

Author SHA1 Message Date
Dylan Hurd
cff47cb1d5 fix(windows) parse powershell commands 2025-11-19 04:08:18 -08:00
183 changed files with 2419 additions and 7852 deletions

View File

@@ -69,37 +69,6 @@ Codex can access MCP servers. To configure them, refer to the [config docs](./do
Codex CLI supports a rich set of configuration options, with preferences stored in `~/.codex/config.toml`. For full configuration options, see [Configuration](./docs/config.md).
### Execpolicy Quickstart
Codex can enforce your own rules-based execution policy before it runs shell commands.
1. Create a policy directory: `mkdir -p ~/.codex/policy`.
2. Create one or more `.codexpolicy` files in that folder. Codex automatically loads every `.codexpolicy` file in there on startup.
3. Write `prefix_rule` entries to describe the commands you want to allow, prompt, or block:
```starlark
prefix_rule(
pattern = ["git", ["push", "fetch"]],
decision = "prompt", # allow | prompt | forbidden
match = [["git", "push", "origin", "main"]], # examples that must match
not_match = [["git", "status"]], # examples that must not match
)
```
- `pattern` is a list of shell tokens, evaluated from left to right; wrap tokens in a nested list to express alternatives (e.g., match both `push` and `fetch`).
- `decision` sets the severity; Codex picks the strictest decision when multiple rules match (forbidden > prompt > allow).
- `match` and `not_match` act as (optional) unit tests. Codex validates them when it loads your policy, so you get feedback if an example has unexpected behavior.
In this example rule, if Codex wants to run commands with the prefix `git push` or `git fetch`, it will first ask for user approval.
Use [`execpolicy2` CLI](./codex-rs/execpolicy2/README.md) to preview decisions for policy files:
```shell
cargo run -p codex-execpolicy2 -- check --policy ~/.codex/policy/default.codexpolicy git push origin main
```
Pass multiple `--policy` flags to test how several files combine. See the [`codex-rs/execpolicy2` README](./codex-rs/execpolicy2/README.md) for a more detailed walkthrough of the available syntax.
---
### Docs & FAQ

34
codex-rs/Cargo.lock generated
View File

@@ -849,7 +849,6 @@ dependencies = [
"codex-login",
"codex-protocol",
"codex-utils-json-to-toml",
"codex-windows-sandbox",
"core_test_support",
"mcp-types",
"opentelemetry-appender-tracing",
@@ -1086,7 +1085,6 @@ dependencies = [
"codex-apply-patch",
"codex-arg0",
"codex-async-utils",
"codex-execpolicy",
"codex-file-search",
"codex-git",
"codex-keyring-store",
@@ -1189,7 +1187,6 @@ name = "codex-exec-server"
version = "0.0.0"
dependencies = [
"anyhow",
"async-trait",
"clap",
"codex-core",
"libc",
@@ -1198,7 +1195,6 @@ dependencies = [
"rmcp",
"serde",
"serde_json",
"shlex",
"socket2 0.6.0",
"tempfile",
"tokio",
@@ -1209,21 +1205,6 @@ dependencies = [
[[package]]
name = "codex-execpolicy"
version = "0.0.0"
dependencies = [
"anyhow",
"clap",
"multimap",
"pretty_assertions",
"serde",
"serde_json",
"shlex",
"starlark",
"thiserror 2.0.17",
]
[[package]]
name = "codex-execpolicy-legacy"
version = "0.0.0"
dependencies = [
"allocative",
"anyhow",
@@ -1241,6 +1222,21 @@ dependencies = [
"tempfile",
]
[[package]]
name = "codex-execpolicy2"
version = "0.0.0"
dependencies = [
"anyhow",
"clap",
"multimap",
"pretty_assertions",
"serde",
"serde_json",
"shlex",
"starlark",
"thiserror 2.0.17",
]
[[package]]
name = "codex-feedback"
version = "0.0.0"

View File

@@ -18,7 +18,7 @@ members = [
"exec",
"exec-server",
"execpolicy",
"execpolicy-legacy",
"execpolicy2",
"keyring-store",
"file-search",
"linux-sandbox",
@@ -67,7 +67,6 @@ codex-chatgpt = { path = "chatgpt" }
codex-common = { path = "common" }
codex-core = { path = "core" }
codex-exec = { path = "exec" }
codex-execpolicy = { path = "execpolicy" }
codex-feedback = { path = "feedback" }
codex-file-search = { path = "file-search" }
codex-git = { path = "utils/git" }

View File

@@ -7,6 +7,5 @@ pub use export::generate_ts;
pub use export::generate_types;
pub use jsonrpc_lite::*;
pub use protocol::common::*;
pub use protocol::thread_history::*;
pub use protocol::v1::*;
pub use protocol::v2::*;

View File

@@ -438,13 +438,6 @@ server_request_definitions! {
response: v2::CommandExecutionRequestApprovalResponse,
},
/// Sent when approval is requested for a specific file change.
/// This request is used for Turns started via turn/start.
FileChangeRequestApproval => "item/fileChange/requestApproval" {
params: v2::FileChangeRequestApprovalParams,
response: v2::FileChangeRequestApprovalResponse,
},
/// DEPRECATED APIs below
/// Request to approve a patch.
/// This request is used for Turns started via the legacy APIs (i.e. SendUserTurn, SendUserMessage).
@@ -501,9 +494,6 @@ server_notification_definitions! {
ReasoningSummaryPartAdded => "item/reasoning/summaryPartAdded" (v2::ReasoningSummaryPartAddedNotification),
ReasoningTextDelta => "item/reasoning/textDelta" (v2::ReasoningTextDeltaNotification),
/// Notifies the user of world-writable directories on Windows, which cannot be protected by the sandbox.
WindowsWorldWritableWarning => "windows/worldWritableWarning" (v2::WindowsWorldWritableWarningNotification),
#[serde(rename = "account/login/completed")]
#[ts(rename = "account/login/completed")]
#[strum(serialize = "account/login/completed")]
@@ -538,7 +528,7 @@ mod tests {
let request = ClientRequest::NewConversation {
request_id: RequestId::Integer(42),
params: v1::NewConversationParams {
model: Some("gpt-5.1-codex-max".to_string()),
model: Some("gpt-5.1-codex".to_string()),
model_provider: None,
profile: None,
cwd: None,
@@ -556,7 +546,7 @@ mod tests {
"method": "newConversation",
"id": 42,
"params": {
"model": "gpt-5.1-codex-max",
"model": "gpt-5.1-codex",
"modelProvider": null,
"profile": null,
"cwd": null,

View File

@@ -2,6 +2,5 @@
// Exposes protocol pieces used by `lib.rs` via `pub use protocol::common::*;`.
pub mod common;
pub mod thread_history;
pub mod v1;
pub mod v2;

View File

@@ -1,409 +0,0 @@
use crate::protocol::v2::ThreadItem;
use crate::protocol::v2::Turn;
use crate::protocol::v2::TurnStatus;
use crate::protocol::v2::UserInput;
use codex_protocol::protocol::AgentReasoningEvent;
use codex_protocol::protocol::AgentReasoningRawContentEvent;
use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::TurnAbortedEvent;
use codex_protocol::protocol::UserMessageEvent;
/// Convert persisted [`EventMsg`] entries into a sequence of [`Turn`] values.
///
/// The purpose of this is to convert the EventMsgs persisted in a rollout file
/// into a sequence of Turns and ThreadItems, which allows the client to render
/// the historical messages when resuming a thread.
pub fn build_turns_from_event_msgs(events: &[EventMsg]) -> Vec<Turn> {
let mut builder = ThreadHistoryBuilder::new();
for event in events {
builder.handle_event(event);
}
builder.finish()
}
struct ThreadHistoryBuilder {
turns: Vec<Turn>,
current_turn: Option<PendingTurn>,
next_turn_index: i64,
next_item_index: i64,
}
impl ThreadHistoryBuilder {
fn new() -> Self {
Self {
turns: Vec::new(),
current_turn: None,
next_turn_index: 1,
next_item_index: 1,
}
}
fn finish(mut self) -> Vec<Turn> {
self.finish_current_turn();
self.turns
}
/// This function should handle all EventMsg variants that can be persisted in a rollout file.
/// See `should_persist_event_msg` in `codex-rs/core/rollout/policy.rs`.
fn handle_event(&mut self, event: &EventMsg) {
match event {
EventMsg::UserMessage(payload) => self.handle_user_message(payload),
EventMsg::AgentMessage(payload) => self.handle_agent_message(payload.message.clone()),
EventMsg::AgentReasoning(payload) => self.handle_agent_reasoning(payload),
EventMsg::AgentReasoningRawContent(payload) => {
self.handle_agent_reasoning_raw_content(payload)
}
EventMsg::TokenCount(_) => {}
EventMsg::EnteredReviewMode(_) => {}
EventMsg::ExitedReviewMode(_) => {}
EventMsg::UndoCompleted(_) => {}
EventMsg::TurnAborted(payload) => self.handle_turn_aborted(payload),
_ => {}
}
}
fn handle_user_message(&mut self, payload: &UserMessageEvent) {
self.finish_current_turn();
let mut turn = self.new_turn();
let id = self.next_item_id();
let content = self.build_user_inputs(payload);
turn.items.push(ThreadItem::UserMessage { id, content });
self.current_turn = Some(turn);
}
fn handle_agent_message(&mut self, text: String) {
if text.is_empty() {
return;
}
let id = self.next_item_id();
self.ensure_turn()
.items
.push(ThreadItem::AgentMessage { id, text });
}
fn handle_agent_reasoning(&mut self, payload: &AgentReasoningEvent) {
if payload.text.is_empty() {
return;
}
// If the last item is a reasoning item, add the new text to the summary.
if let Some(ThreadItem::Reasoning { summary, .. }) = self.ensure_turn().items.last_mut() {
summary.push(payload.text.clone());
return;
}
// Otherwise, create a new reasoning item.
let id = self.next_item_id();
self.ensure_turn().items.push(ThreadItem::Reasoning {
id,
summary: vec![payload.text.clone()],
content: Vec::new(),
});
}
fn handle_agent_reasoning_raw_content(&mut self, payload: &AgentReasoningRawContentEvent) {
if payload.text.is_empty() {
return;
}
// If the last item is a reasoning item, add the new text to the content.
if let Some(ThreadItem::Reasoning { content, .. }) = self.ensure_turn().items.last_mut() {
content.push(payload.text.clone());
return;
}
// Otherwise, create a new reasoning item.
let id = self.next_item_id();
self.ensure_turn().items.push(ThreadItem::Reasoning {
id,
summary: Vec::new(),
content: vec![payload.text.clone()],
});
}
fn handle_turn_aborted(&mut self, _payload: &TurnAbortedEvent) {
let Some(turn) = self.current_turn.as_mut() else {
return;
};
turn.status = TurnStatus::Interrupted;
}
fn finish_current_turn(&mut self) {
if let Some(turn) = self.current_turn.take() {
if turn.items.is_empty() {
return;
}
self.turns.push(turn.into());
}
}
fn new_turn(&mut self) -> PendingTurn {
PendingTurn {
id: self.next_turn_id(),
items: Vec::new(),
status: TurnStatus::Completed,
}
}
fn ensure_turn(&mut self) -> &mut PendingTurn {
if self.current_turn.is_none() {
let turn = self.new_turn();
return self.current_turn.insert(turn);
}
if let Some(turn) = self.current_turn.as_mut() {
return turn;
}
unreachable!("current turn must exist after initialization");
}
fn next_turn_id(&mut self) -> String {
let id = format!("turn-{}", self.next_turn_index);
self.next_turn_index += 1;
id
}
fn next_item_id(&mut self) -> String {
let id = format!("item-{}", self.next_item_index);
self.next_item_index += 1;
id
}
fn build_user_inputs(&self, payload: &UserMessageEvent) -> Vec<UserInput> {
let mut content = Vec::new();
if !payload.message.trim().is_empty() {
content.push(UserInput::Text {
text: payload.message.clone(),
});
}
if let Some(images) = &payload.images {
for image in images {
content.push(UserInput::Image { url: image.clone() });
}
}
content
}
}
struct PendingTurn {
id: String,
items: Vec<ThreadItem>,
status: TurnStatus,
}
impl From<PendingTurn> for Turn {
fn from(value: PendingTurn) -> Self {
Self {
id: value.id,
items: value.items,
status: value.status,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use codex_protocol::protocol::AgentMessageEvent;
use codex_protocol::protocol::AgentReasoningEvent;
use codex_protocol::protocol::AgentReasoningRawContentEvent;
use codex_protocol::protocol::TurnAbortReason;
use codex_protocol::protocol::TurnAbortedEvent;
use codex_protocol::protocol::UserMessageEvent;
use pretty_assertions::assert_eq;
#[test]
fn builds_multiple_turns_with_reasoning_items() {
let events = vec![
EventMsg::UserMessage(UserMessageEvent {
message: "First turn".into(),
images: Some(vec!["https://example.com/one.png".into()]),
}),
EventMsg::AgentMessage(AgentMessageEvent {
message: "Hi there".into(),
}),
EventMsg::AgentReasoning(AgentReasoningEvent {
text: "thinking".into(),
}),
EventMsg::AgentReasoningRawContent(AgentReasoningRawContentEvent {
text: "full reasoning".into(),
}),
EventMsg::UserMessage(UserMessageEvent {
message: "Second turn".into(),
images: None,
}),
EventMsg::AgentMessage(AgentMessageEvent {
message: "Reply two".into(),
}),
];
let turns = build_turns_from_event_msgs(&events);
assert_eq!(turns.len(), 2);
let first = &turns[0];
assert_eq!(first.id, "turn-1");
assert_eq!(first.status, TurnStatus::Completed);
assert_eq!(first.items.len(), 3);
assert_eq!(
first.items[0],
ThreadItem::UserMessage {
id: "item-1".into(),
content: vec![
UserInput::Text {
text: "First turn".into(),
},
UserInput::Image {
url: "https://example.com/one.png".into(),
}
],
}
);
assert_eq!(
first.items[1],
ThreadItem::AgentMessage {
id: "item-2".into(),
text: "Hi there".into(),
}
);
assert_eq!(
first.items[2],
ThreadItem::Reasoning {
id: "item-3".into(),
summary: vec!["thinking".into()],
content: vec!["full reasoning".into()],
}
);
let second = &turns[1];
assert_eq!(second.id, "turn-2");
assert_eq!(second.items.len(), 2);
assert_eq!(
second.items[0],
ThreadItem::UserMessage {
id: "item-4".into(),
content: vec![UserInput::Text {
text: "Second turn".into()
}],
}
);
assert_eq!(
second.items[1],
ThreadItem::AgentMessage {
id: "item-5".into(),
text: "Reply two".into(),
}
);
}
#[test]
fn splits_reasoning_when_interleaved() {
let events = vec![
EventMsg::UserMessage(UserMessageEvent {
message: "Turn start".into(),
images: None,
}),
EventMsg::AgentReasoning(AgentReasoningEvent {
text: "first summary".into(),
}),
EventMsg::AgentReasoningRawContent(AgentReasoningRawContentEvent {
text: "first content".into(),
}),
EventMsg::AgentMessage(AgentMessageEvent {
message: "interlude".into(),
}),
EventMsg::AgentReasoning(AgentReasoningEvent {
text: "second summary".into(),
}),
];
let turns = build_turns_from_event_msgs(&events);
assert_eq!(turns.len(), 1);
let turn = &turns[0];
assert_eq!(turn.items.len(), 4);
assert_eq!(
turn.items[1],
ThreadItem::Reasoning {
id: "item-2".into(),
summary: vec!["first summary".into()],
content: vec!["first content".into()],
}
);
assert_eq!(
turn.items[3],
ThreadItem::Reasoning {
id: "item-4".into(),
summary: vec!["second summary".into()],
content: Vec::new(),
}
);
}
#[test]
fn marks_turn_as_interrupted_when_aborted() {
let events = vec![
EventMsg::UserMessage(UserMessageEvent {
message: "Please do the thing".into(),
images: None,
}),
EventMsg::AgentMessage(AgentMessageEvent {
message: "Working...".into(),
}),
EventMsg::TurnAborted(TurnAbortedEvent {
reason: TurnAbortReason::Replaced,
}),
EventMsg::UserMessage(UserMessageEvent {
message: "Let's try again".into(),
images: None,
}),
EventMsg::AgentMessage(AgentMessageEvent {
message: "Second attempt complete.".into(),
}),
];
let turns = build_turns_from_event_msgs(&events);
assert_eq!(turns.len(), 2);
let first_turn = &turns[0];
assert_eq!(first_turn.status, TurnStatus::Interrupted);
assert_eq!(first_turn.items.len(), 2);
assert_eq!(
first_turn.items[0],
ThreadItem::UserMessage {
id: "item-1".into(),
content: vec![UserInput::Text {
text: "Please do the thing".into()
}],
}
);
assert_eq!(
first_turn.items[1],
ThreadItem::AgentMessage {
id: "item-2".into(),
text: "Working...".into(),
}
);
let second_turn = &turns[1];
assert_eq!(second_turn.status, TurnStatus::Completed);
assert_eq!(second_turn.items.len(), 2);
assert_eq!(
second_turn.items[0],
ThreadItem::UserMessage {
id: "item-3".into(),
content: vec![UserInput::Text {
text: "Let's try again".into()
}],
}
);
assert_eq!(
second_turn.items[1],
ThreadItem::AgentMessage {
id: "item-4".into(),
text: "Second attempt complete.".into(),
}
);
}
}

View File

@@ -11,7 +11,6 @@ use codex_protocol::items::AgentMessageContent as CoreAgentMessageContent;
use codex_protocol::items::TurnItem as CoreTurnItem;
use codex_protocol::models::ResponseItem;
use codex_protocol::parse_command::ParsedCommand as CoreParsedCommand;
use codex_protocol::protocol::CreditsSnapshot as CoreCreditsSnapshot;
use codex_protocol::protocol::RateLimitSnapshot as CoreRateLimitSnapshot;
use codex_protocol::protocol::RateLimitWindow as CoreRateLimitWindow;
use codex_protocol::user_input::UserInput as CoreUserInput;
@@ -518,10 +517,6 @@ pub struct Thread {
pub created_at: i64,
/// [UNSTABLE] Path to the thread on disk.
pub path: PathBuf,
/// Only populated on a `thread/resume` response.
/// For all other responses and notifications returning a Thread,
/// the turns field will be an empty list.
pub turns: Vec<Turn>,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
@@ -536,9 +531,8 @@ pub struct AccountUpdatedNotification {
#[ts(export_to = "v2/")]
pub struct Turn {
pub id: String,
/// Only populated on a `thread/resume` response.
/// For all other responses and notifications returning a Turn,
/// the items field will be an empty list.
/// This is currently only populated for resumed threads.
/// TODO: properly populate items for all turns.
pub items: Vec<ThreadItem>,
#[serde(flatten)]
pub status: TurnStatus,
@@ -794,23 +788,20 @@ pub struct FileUpdateChange {
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(tag = "type", rename_all = "camelCase")]
#[ts(tag = "type")]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]
pub enum PatchChangeKind {
Add,
Delete,
Update { move_path: Option<PathBuf> },
Update,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]
pub enum PatchApplyStatus {
InProgress,
Completed,
Failed,
Declined,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
@@ -943,15 +934,6 @@ pub struct McpToolCallProgressNotification {
pub message: String,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]
pub struct WindowsWorldWritableWarningNotification {
pub sample_paths: Vec<String>,
pub extra_count: usize,
pub failed_scan: bool,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]
@@ -985,26 +967,6 @@ pub struct CommandExecutionRequestApprovalResponse {
pub accept_settings: Option<CommandExecutionRequestAcceptSettings>,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]
pub struct FileChangeRequestApprovalParams {
pub thread_id: String,
pub turn_id: String,
pub item_id: String,
/// Optional explanatory reason (e.g. request for extra write access).
pub reason: Option<String>,
/// [UNSTABLE] When set, the agent is asking the user to allow writes under this root
/// for the remainder of the session (unclear if this is honored today).
pub grant_root: Option<PathBuf>,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[ts(export_to = "v2/")]
pub struct FileChangeRequestApprovalResponse {
pub decision: ApprovalDecision,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]
@@ -1018,7 +980,6 @@ pub struct AccountRateLimitsUpdatedNotification {
pub struct RateLimitSnapshot {
pub primary: Option<RateLimitWindow>,
pub secondary: Option<RateLimitWindow>,
pub credits: Option<CreditsSnapshot>,
}
impl From<CoreRateLimitSnapshot> for RateLimitSnapshot {
@@ -1026,7 +987,6 @@ impl From<CoreRateLimitSnapshot> for RateLimitSnapshot {
Self {
primary: value.primary.map(RateLimitWindow::from),
secondary: value.secondary.map(RateLimitWindow::from),
credits: value.credits.map(CreditsSnapshot::from),
}
}
}
@@ -1050,25 +1010,6 @@ impl From<CoreRateLimitWindow> for RateLimitWindow {
}
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]
pub struct CreditsSnapshot {
pub has_credits: bool,
pub unlimited: bool,
pub balance: Option<String>,
}
impl From<CoreCreditsSnapshot> for CreditsSnapshot {
fn from(value: CoreCreditsSnapshot) -> Self {
Self {
has_credits: value.has_credits,
unlimited: value.unlimited,
balance: value.balance,
}
}
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]

View File

@@ -24,8 +24,6 @@ use codex_app_server_protocol::ClientRequest;
use codex_app_server_protocol::CommandExecutionRequestAcceptSettings;
use codex_app_server_protocol::CommandExecutionRequestApprovalParams;
use codex_app_server_protocol::CommandExecutionRequestApprovalResponse;
use codex_app_server_protocol::FileChangeRequestApprovalParams;
use codex_app_server_protocol::FileChangeRequestApprovalResponse;
use codex_app_server_protocol::GetAccountRateLimitsResponse;
use codex_app_server_protocol::InitializeParams;
use codex_app_server_protocol::InitializeResponse;
@@ -679,9 +677,6 @@ impl CodexClient {
ServerRequest::CommandExecutionRequestApproval { request_id, params } => {
self.handle_command_execution_request_approval(request_id, params)?;
}
ServerRequest::FileChangeRequestApproval { request_id, params } => {
self.approve_file_change_request(request_id, params)?;
}
other => {
bail!("received unsupported server request: {other:?}");
}
@@ -722,37 +717,6 @@ impl CodexClient {
Ok(())
}
fn approve_file_change_request(
&mut self,
request_id: RequestId,
params: FileChangeRequestApprovalParams,
) -> Result<()> {
let FileChangeRequestApprovalParams {
thread_id,
turn_id,
item_id,
reason,
grant_root,
} = params;
println!(
"\n< fileChange approval requested for thread {thread_id}, turn {turn_id}, item {item_id}"
);
if let Some(reason) = reason.as_deref() {
println!("< reason: {reason}");
}
if let Some(grant_root) = grant_root.as_deref() {
println!("< grant root: {}", grant_root.display());
}
let response = FileChangeRequestApprovalResponse {
decision: ApprovalDecision::Accept,
};
self.send_server_request_response(request_id, &response)?;
println!("< approved fileChange request for item {item_id}");
Ok(())
}
fn send_server_request_response<T>(&mut self, request_id: RequestId, response: &T) -> Result<()>
where
T: Serialize,

View File

@@ -40,7 +40,6 @@ tracing = { workspace = true, features = ["log"] }
tracing-subscriber = { workspace = true, features = ["env-filter", "fmt"] }
opentelemetry-appender-tracing = { workspace = true }
uuid = { workspace = true, features = ["serde", "v7"] }
codex-windows-sandbox.workspace = true
[dev-dependencies]
app_test_support = { workspace = true }

View File

@@ -15,17 +15,12 @@ use codex_app_server_protocol::CommandExecutionRequestApprovalResponse;
use codex_app_server_protocol::CommandExecutionStatus;
use codex_app_server_protocol::ExecCommandApprovalParams;
use codex_app_server_protocol::ExecCommandApprovalResponse;
use codex_app_server_protocol::FileChangeRequestApprovalParams;
use codex_app_server_protocol::FileChangeRequestApprovalResponse;
use codex_app_server_protocol::FileUpdateChange;
use codex_app_server_protocol::InterruptConversationResponse;
use codex_app_server_protocol::ItemCompletedNotification;
use codex_app_server_protocol::ItemStartedNotification;
use codex_app_server_protocol::McpToolCallError;
use codex_app_server_protocol::McpToolCallResult;
use codex_app_server_protocol::McpToolCallStatus;
use codex_app_server_protocol::PatchApplyStatus;
use codex_app_server_protocol::PatchChangeKind as V2PatchChangeKind;
use codex_app_server_protocol::ReasoningSummaryPartAddedNotification;
use codex_app_server_protocol::ReasoningSummaryTextDeltaNotification;
use codex_app_server_protocol::ReasoningTextDeltaNotification;
@@ -45,7 +40,6 @@ use codex_core::protocol::Event;
use codex_core::protocol::EventMsg;
use codex_core::protocol::ExecApprovalRequestEvent;
use codex_core::protocol::ExecCommandEndEvent;
use codex_core::protocol::FileChange as CoreFileChange;
use codex_core::protocol::McpToolCallBeginEvent;
use codex_core::protocol::McpToolCallEndEvent;
use codex_core::protocol::Op;
@@ -53,9 +47,7 @@ use codex_core::protocol::ReviewDecision;
use codex_core::review_format::format_review_findings_block;
use codex_protocol::ConversationId;
use codex_protocol::protocol::ReviewOutputEvent;
use std::collections::HashMap;
use std::convert::TryFrom;
use std::path::PathBuf;
use std::sync::Arc;
use tokio::sync::oneshot;
use tracing::error;
@@ -78,74 +70,24 @@ pub(crate) async fn apply_bespoke_event_handling(
}
EventMsg::ApplyPatchApprovalRequest(ApplyPatchApprovalRequestEvent {
call_id,
turn_id,
changes,
reason,
grant_root,
}) => match api_version {
ApiVersion::V1 => {
let params = ApplyPatchApprovalParams {
conversation_id,
call_id,
file_changes: changes.clone(),
reason,
grant_root,
};
let rx = outgoing
.send_request(ServerRequestPayload::ApplyPatchApproval(params))
.await;
tokio::spawn(async move {
on_patch_approval_response(event_id, rx, conversation).await;
});
}
ApiVersion::V2 => {
// Until we migrate the core to be aware of a first class FileChangeItem
// and emit the corresponding EventMsg, we repurpose the call_id as the item_id.
let item_id = call_id.clone();
let patch_changes = convert_patch_changes(&changes);
let first_start = {
let mut map = turn_summary_store.lock().await;
let summary = map.entry(conversation_id).or_default();
summary.file_change_started.insert(item_id.clone())
};
if first_start {
let item = ThreadItem::FileChange {
id: item_id.clone(),
changes: patch_changes.clone(),
status: PatchApplyStatus::InProgress,
};
let notification = ItemStartedNotification { item };
outgoing
.send_server_notification(ServerNotification::ItemStarted(notification))
.await;
}
let params = FileChangeRequestApprovalParams {
thread_id: conversation_id.to_string(),
turn_id: turn_id.clone(),
item_id: item_id.clone(),
reason,
grant_root,
};
let rx = outgoing
.send_request(ServerRequestPayload::FileChangeRequestApproval(params))
.await;
tokio::spawn(async move {
on_file_change_request_approval_response(
event_id,
conversation_id,
item_id,
patch_changes,
rx,
conversation,
outgoing,
turn_summary_store,
)
.await;
});
}
},
}) => {
let params = ApplyPatchApprovalParams {
conversation_id,
call_id,
file_changes: changes,
reason,
grant_root,
};
let rx = outgoing
.send_request(ServerRequestPayload::ApplyPatchApproval(params))
.await;
tokio::spawn(async move {
on_patch_approval_response(event_id, rx, conversation).await;
});
}
EventMsg::ExecApprovalRequest(ExecApprovalRequestEvent {
call_id,
turn_id,
@@ -302,49 +244,6 @@ pub(crate) async fn apply_bespoke_event_handling(
.send_server_notification(ServerNotification::ItemCompleted(notification))
.await;
}
EventMsg::PatchApplyBegin(patch_begin_event) => {
// Until we migrate the core to be aware of a first class FileChangeItem
// and emit the corresponding EventMsg, we repurpose the call_id as the item_id.
let item_id = patch_begin_event.call_id.clone();
let first_start = {
let mut map = turn_summary_store.lock().await;
let summary = map.entry(conversation_id).or_default();
summary.file_change_started.insert(item_id.clone())
};
if first_start {
let item = ThreadItem::FileChange {
id: item_id.clone(),
changes: convert_patch_changes(&patch_begin_event.changes),
status: PatchApplyStatus::InProgress,
};
let notification = ItemStartedNotification { item };
outgoing
.send_server_notification(ServerNotification::ItemStarted(notification))
.await;
}
}
EventMsg::PatchApplyEnd(patch_end_event) => {
// Until we migrate the core to be aware of a first class FileChangeItem
// and emit the corresponding EventMsg, we repurpose the call_id as the item_id.
let item_id = patch_end_event.call_id.clone();
let status = if patch_end_event.success {
PatchApplyStatus::Completed
} else {
PatchApplyStatus::Failed
};
let changes = convert_patch_changes(&patch_end_event.changes);
complete_file_change_item(
conversation_id,
item_id,
changes,
status,
outgoing.as_ref(),
&turn_summary_store,
)
.await;
}
EventMsg::ExecCommandBegin(exec_command_begin_event) => {
let item = ThreadItem::CommandExecution {
id: exec_command_begin_event.call_id.clone(),
@@ -466,32 +365,6 @@ async fn emit_turn_completed_with_status(
.await;
}
async fn complete_file_change_item(
conversation_id: ConversationId,
item_id: String,
changes: Vec<FileUpdateChange>,
status: PatchApplyStatus,
outgoing: &OutgoingMessageSender,
turn_summary_store: &TurnSummaryStore,
) {
{
let mut map = turn_summary_store.lock().await;
if let Some(summary) = map.get_mut(&conversation_id) {
summary.file_change_started.remove(&item_id);
}
}
let item = ThreadItem::FileChange {
id: item_id,
changes,
status,
};
let notification = ItemCompletedNotification { item };
outgoing
.send_server_notification(ServerNotification::ItemCompleted(notification))
.await;
}
async fn find_and_remove_turn_summary(
conversation_id: ConversationId,
turn_summary_store: &TurnSummaryStore,
@@ -639,110 +512,6 @@ fn render_review_output_text(output: &ReviewOutputEvent) -> String {
}
}
fn convert_patch_changes(changes: &HashMap<PathBuf, CoreFileChange>) -> Vec<FileUpdateChange> {
let mut converted: Vec<FileUpdateChange> = changes
.iter()
.map(|(path, change)| FileUpdateChange {
path: path.to_string_lossy().into_owned(),
kind: map_patch_change_kind(change),
diff: format_file_change_diff(change),
})
.collect();
converted.sort_by(|a, b| a.path.cmp(&b.path));
converted
}
fn map_patch_change_kind(change: &CoreFileChange) -> V2PatchChangeKind {
match change {
CoreFileChange::Add { .. } => V2PatchChangeKind::Add,
CoreFileChange::Delete { .. } => V2PatchChangeKind::Delete,
CoreFileChange::Update { move_path, .. } => V2PatchChangeKind::Update {
move_path: move_path.clone(),
},
}
}
fn format_file_change_diff(change: &CoreFileChange) -> String {
match change {
CoreFileChange::Add { content } => content.clone(),
CoreFileChange::Delete { content } => content.clone(),
CoreFileChange::Update {
unified_diff,
move_path,
} => {
if let Some(path) = move_path {
format!("{unified_diff}\n\nMoved to: {}", path.display())
} else {
unified_diff.clone()
}
}
}
}
#[allow(clippy::too_many_arguments)]
async fn on_file_change_request_approval_response(
event_id: String,
conversation_id: ConversationId,
item_id: String,
changes: Vec<FileUpdateChange>,
receiver: oneshot::Receiver<JsonValue>,
codex: Arc<CodexConversation>,
outgoing: Arc<OutgoingMessageSender>,
turn_summary_store: TurnSummaryStore,
) {
let response = receiver.await;
let (decision, completion_status) = match response {
Ok(value) => {
let response = serde_json::from_value::<FileChangeRequestApprovalResponse>(value)
.unwrap_or_else(|err| {
error!("failed to deserialize FileChangeRequestApprovalResponse: {err}");
FileChangeRequestApprovalResponse {
decision: ApprovalDecision::Decline,
}
});
let (decision, completion_status) = match response.decision {
ApprovalDecision::Accept => (ReviewDecision::Approved, None),
ApprovalDecision::Decline => {
(ReviewDecision::Denied, Some(PatchApplyStatus::Declined))
}
ApprovalDecision::Cancel => {
(ReviewDecision::Abort, Some(PatchApplyStatus::Declined))
}
};
// Allow EventMsg::PatchApplyEnd to emit ItemCompleted for accepted patches.
// Only short-circuit on declines/cancels/failures.
(decision, completion_status)
}
Err(err) => {
error!("request failed: {err:?}");
(ReviewDecision::Denied, Some(PatchApplyStatus::Failed))
}
};
if let Some(status) = completion_status {
complete_file_change_item(
conversation_id,
item_id,
changes,
status,
outgoing.as_ref(),
&turn_summary_store,
)
.await;
}
if let Err(err) = codex
.submit(Op::PatchApproval {
id: event_id,
decision,
})
.await
{
error!("failed to submit PatchApproval: {err}");
}
}
async fn on_command_execution_request_approval_response(
event_id: String,
receiver: oneshot::Receiver<JsonValue>,

View File

@@ -91,8 +91,6 @@ use codex_app_server_protocol::TurnStatus;
use codex_app_server_protocol::UserInfoResponse;
use codex_app_server_protocol::UserInput as V2UserInput;
use codex_app_server_protocol::UserSavedConfig;
use codex_app_server_protocol::WindowsWorldWritableWarningNotification;
use codex_app_server_protocol::build_turns_from_event_msgs;
use codex_backend_client::Client as BackendClient;
use codex_core::AuthManager;
use codex_core::CodexConversation;
@@ -113,7 +111,6 @@ use codex_core::config_loader::load_config_as_toml;
use codex_core::default_client::get_codex_user_agent;
use codex_core::exec::ExecParams;
use codex_core::exec_env::create_env;
use codex_core::features::Feature;
use codex_core::find_conversation_path_by_id_str;
use codex_core::get_platform_sandbox;
use codex_core::git_info::git_diff_to_remote;
@@ -139,7 +136,6 @@ use codex_protocol::protocol::USER_MESSAGE_BEGIN;
use codex_protocol::user_input::UserInput as CoreInputItem;
use codex_utils_json_to_toml::json_to_toml;
use std::collections::HashMap;
use std::collections::HashSet;
use std::ffi::OsStr;
use std::io::Error as IoError;
use std::path::Path;
@@ -148,6 +144,7 @@ use std::sync::Arc;
use std::sync::atomic::AtomicBool;
use std::sync::atomic::Ordering;
use std::time::Duration;
use tokio::select;
use tokio::sync::Mutex;
use tokio::sync::oneshot;
use tracing::error;
@@ -162,7 +159,6 @@ pub(crate) type PendingInterrupts = Arc<Mutex<HashMap<ConversationId, PendingInt
#[derive(Default, Clone)]
pub(crate) struct TurnSummary {
pub(crate) last_error_message: Option<String>,
pub(crate) file_change_started: HashSet<String>,
}
pub(crate) type TurnSummaryStore = Arc<Mutex<HashMap<ConversationId, TurnSummary>>>;
@@ -1241,7 +1237,7 @@ impl CodexMessageProcessor {
let overrides = ConfigOverrides {
model,
config_profile: profile,
cwd: cwd.clone().map(PathBuf::from),
cwd: cwd.map(PathBuf::from),
approval_policy,
sandbox_mode,
model_provider,
@@ -1253,17 +1249,7 @@ impl CodexMessageProcessor {
..Default::default()
};
// Persist windows sandbox feature.
// TODO: persist default config in general.
let mut cli_overrides = cli_overrides.unwrap_or_default();
if cfg!(windows) && self.config.features.enabled(Feature::WindowsSandbox) {
cli_overrides.insert(
"features.enable_experimental_windows_sandbox".to_string(),
serde_json::json!(true),
);
}
let config = match derive_config_from_params(overrides, Some(cli_overrides)).await {
let config = match derive_config_from_params(overrides, cli_overrides).await {
Ok(config) => config,
Err(err) => {
let error = JSONRPCErrorError {
@@ -1275,10 +1261,6 @@ impl CodexMessageProcessor {
return;
}
};
if cfg!(windows) && config.features.enabled(Feature::WindowsSandbox) {
self.handle_windows_world_writable_warning(config.cwd.clone())
.await;
}
match self.conversation_manager.new_conversation(config).await {
Ok(conversation_id) => {
@@ -1659,11 +1641,6 @@ impl CodexMessageProcessor {
session_configured,
..
}) => {
let SessionConfiguredEvent {
rollout_path,
initial_messages,
..
} = session_configured;
// Auto-attach a conversation listener when resuming a thread.
if let Err(err) = self
.attach_conversation_listener(conversation_id, false, ApiVersion::V2)
@@ -1676,8 +1653,8 @@ impl CodexMessageProcessor {
);
}
let mut thread = match read_summary_from_rollout(
rollout_path.as_path(),
let thread = match read_summary_from_rollout(
session_configured.rollout_path.as_path(),
fallback_model_provider.as_str(),
)
.await
@@ -1688,17 +1665,13 @@ impl CodexMessageProcessor {
request_id,
format!(
"failed to load rollout `{}` for conversation {conversation_id}: {err}",
rollout_path.display()
session_configured.rollout_path.display()
),
)
.await;
return;
}
};
thread.turns = initial_messages
.as_deref()
.map_or_else(Vec::new, build_turns_from_event_msgs);
let response = ThreadResumeResponse {
thread,
model: session_configured.model,
@@ -1708,7 +1681,6 @@ impl CodexMessageProcessor {
sandbox: session_configured.sandbox_policy.into(),
reasoning_effort: session_configured.reasoning_effort,
};
self.outgoing.send_response(request_id, response).await;
}
Err(err) => {
@@ -1959,15 +1931,6 @@ impl CodexMessageProcessor {
include_apply_patch_tool,
} = overrides;
// Persist windows sandbox feature.
let mut cli_overrides = cli_overrides.unwrap_or_default();
if cfg!(windows) && self.config.features.enabled(Feature::WindowsSandbox) {
cli_overrides.insert(
"features.enable_experimental_windows_sandbox".to_string(),
serde_json::json!(true),
);
}
let overrides = ConfigOverrides {
model,
config_profile: profile,
@@ -1983,7 +1946,7 @@ impl CodexMessageProcessor {
..Default::default()
};
derive_config_from_params(overrides, Some(cli_overrides)).await
derive_config_from_params(overrides, cli_overrides).await
}
None => Ok(self.config.as_ref().clone()),
};
@@ -1998,10 +1961,6 @@ impl CodexMessageProcessor {
return;
}
};
if cfg!(windows) && config.features.enabled(Feature::WindowsSandbox) {
self.handle_windows_world_writable_warning(config.cwd.clone())
.await;
}
let conversation_history = if let Some(path) = path {
match RolloutRecorder::get_rollout_history(&path).await {
@@ -2238,25 +2197,51 @@ impl CodexMessageProcessor {
.await
{
info!("conversation {conversation_id} was active; shutting down");
// Do not wait on conversation.next_event(); the listener task already consumes
// the stream. Request shutdown and ensure the rollout file is flushed before moving it.
if let Err(err) = conversation.submit(Op::Shutdown).await {
error!("failed to submit Shutdown to conversation {conversation_id}: {err}");
}
let conversation_clone = conversation.clone();
let notify = Arc::new(tokio::sync::Notify::new());
let notify_clone = notify.clone();
let flush_result =
tokio::time::timeout(Duration::from_secs(5), conversation.flush_rollout()).await;
match flush_result {
Ok(Ok(())) => {}
Ok(Err(err)) => {
warn!(
"conversation {conversation_id} rollout flush failed before archive: {err}"
);
// Establish the listener for ShutdownComplete before submitting
// Shutdown so it is not missed.
let is_shutdown = tokio::spawn(async move {
// Create the notified future outside the loop to avoid losing notifications.
let notified = notify_clone.notified();
tokio::pin!(notified);
loop {
select! {
_ = &mut notified => { break; }
event = conversation_clone.next_event() => {
match event {
Ok(event) => {
if matches!(event.msg, EventMsg::ShutdownComplete) { break; }
}
// Break on errors to avoid tight loops when the agent loop has exited.
Err(_) => { break; }
}
}
}
}
Err(_) => {
warn!(
"conversation {conversation_id} rollout flush timed out; proceeding with archive"
);
});
// Request shutdown.
match conversation.submit(Op::Shutdown).await {
Ok(_) => {
// Successfully submitted Shutdown; wait before proceeding.
select! {
_ = is_shutdown => {
// Normal shutdown: proceed with archive.
}
_ = tokio::time::sleep(Duration::from_secs(10)) => {
warn!("conversation {conversation_id} shutdown timed out; proceeding with archive");
// Wake any waiter; use notify_waiters to avoid missing the signal.
notify.notify_waiters();
// Perhaps we lost a shutdown race, so let's continue to
// clean up the .jsonl file.
}
}
}
Err(err) => {
error!("failed to submit Shutdown to conversation {conversation_id}: {err}");
notify.notify_waiters();
}
}
}
@@ -2268,8 +2253,7 @@ impl CodexMessageProcessor {
.codex_home
.join(codex_core::ARCHIVED_SESSIONS_SUBDIR);
tokio::fs::create_dir_all(&archive_folder).await?;
let destination = archive_folder.join(&file_name);
tokio::fs::rename(&canonical_rollout_path, &destination).await?;
tokio::fs::rename(&canonical_rollout_path, &archive_folder.join(&file_name)).await?;
Ok(())
}
.await;
@@ -2835,53 +2819,6 @@ impl CodexMessageProcessor {
Err(_) => None,
}
}
/// On Windows, when using the experimental sandbox, we need to warn the user about world-writable directories.
async fn handle_windows_world_writable_warning(&self, cwd: PathBuf) {
if !cfg!(windows) {
return;
}
if !self.config.features.enabled(Feature::WindowsSandbox) {
return;
}
if !matches!(
self.config.sandbox_policy,
codex_protocol::protocol::SandboxPolicy::WorkspaceWrite { .. }
| codex_protocol::protocol::SandboxPolicy::ReadOnly
) {
return;
}
if self
.config
.notices
.hide_world_writable_warning
.unwrap_or(false)
{
return;
}
// This function is stubbed out to return None on non-Windows platforms
if let Some((sample_paths, extra_count, failed_scan)) =
codex_windows_sandbox::world_writable_warning_details(
self.config.codex_home.as_path(),
cwd,
)
{
tracing::warn!("world writable warning: {sample_paths:?} {extra_count} {failed_scan}");
self.outgoing
.send_server_notification(ServerNotification::WindowsWorldWritableWarning(
WindowsWorldWritableWarningNotification {
sample_paths,
extra_count,
failed_scan,
},
))
.await;
}
}
}
async fn derive_config_from_params(
@@ -3035,7 +2972,6 @@ fn summary_to_thread(summary: ConversationSummary) -> Thread {
model_provider,
created_at: created_at.map(|dt| dt.timestamp()).unwrap_or(0),
path,
turns: Vec::new(),
}
}

View File

@@ -6,6 +6,7 @@ use crate::outgoing_message::OutgoingMessageSender;
use codex_app_server_protocol::ClientInfo;
use codex_app_server_protocol::ClientRequest;
use codex_app_server_protocol::InitializeResponse;
use codex_app_server_protocol::JSONRPCError;
use codex_app_server_protocol::JSONRPCErrorError;
use codex_app_server_protocol::JSONRPCNotification;
@@ -117,7 +118,6 @@ impl MessageProcessor {
self.outgoing.send_response(request_id, response).await;
self.initialized = true;
return;
}
}

View File

@@ -229,7 +229,6 @@ mod tests {
resets_at: Some(123),
}),
secondary: None,
credits: None,
},
});
@@ -244,8 +243,7 @@ mod tests {
"windowDurationMins": 15,
"resetsAt": 123
},
"secondary": null,
"credits": null
"secondary": null
}
},
}),

View File

@@ -46,7 +46,6 @@ pub fn create_fake_rollout(
instructions: None,
source: SessionSource::Cli,
model_provider: model_provider.map(str::to_string),
name: None,
})?;
let lines = [

View File

@@ -27,7 +27,7 @@ fn create_config_toml(codex_home: &Path) -> std::io::Result<()> {
std::fs::write(
config_toml,
r#"
model = "gpt-5.1-codex-max"
model = "gpt-5.1-codex"
approval_policy = "on-request"
sandbox_mode = "workspace-write"
model_reasoning_summary = "detailed"
@@ -87,7 +87,7 @@ async fn get_config_toml_parses_all_fields() -> Result<()> {
}),
forced_chatgpt_workspace_id: Some("12345678-0000-0000-0000-000000000000".into()),
forced_login_method: Some(ForcedLoginMethod::Chatgpt),
model: Some("gpt-5.1-codex-max".into()),
model: Some("gpt-5.1-codex".into()),
model_reasoning_effort: Some(ReasoningEffort::High),
model_reasoning_summary: Some(ReasoningSummary::Detailed),
model_verbosity: Some(Verbosity::Medium),

View File

@@ -57,7 +57,7 @@ fn create_config_toml(codex_home: &Path) -> std::io::Result<()> {
std::fs::write(
config_toml,
r#"
model = "gpt-5.1-codex-max"
model = "gpt-5.1-codex"
model_reasoning_effort = "medium"
"#,
)

View File

@@ -45,33 +45,6 @@ async fn list_models_returns_all_models_with_large_limit() -> Result<()> {
} = to_response::<ModelListResponse>(response)?;
let expected_models = vec![
Model {
id: "gpt-5.1-codex-max".to_string(),
model: "gpt-5.1-codex-max".to_string(),
display_name: "gpt-5.1-codex-max".to_string(),
description: "Latest Codex-optimized flagship for deep and fast reasoning.".to_string(),
supported_reasoning_efforts: vec![
ReasoningEffortOption {
reasoning_effort: ReasoningEffort::Low,
description: "Fast responses with lighter reasoning".to_string(),
},
ReasoningEffortOption {
reasoning_effort: ReasoningEffort::Medium,
description: "Balances speed and reasoning depth for everyday tasks"
.to_string(),
},
ReasoningEffortOption {
reasoning_effort: ReasoningEffort::High,
description: "Maximizes reasoning depth for complex problems".to_string(),
},
ReasoningEffortOption {
reasoning_effort: ReasoningEffort::XHigh,
description: "Extra high reasoning depth for complex problems".to_string(),
},
],
default_reasoning_effort: ReasoningEffort::Medium,
is_default: true,
},
Model {
id: "gpt-5.1-codex".to_string(),
model: "gpt-5.1-codex".to_string(),
@@ -93,7 +66,7 @@ async fn list_models_returns_all_models_with_large_limit() -> Result<()> {
},
],
default_reasoning_effort: ReasoningEffort::Medium,
is_default: false,
is_default: true,
},
Model {
id: "gpt-5.1-codex-mini".to_string(),
@@ -174,7 +147,7 @@ async fn list_models_pagination_works() -> Result<()> {
} = to_response::<ModelListResponse>(first_response)?;
assert_eq!(first_items.len(), 1);
assert_eq!(first_items[0].id, "gpt-5.1-codex-max");
assert_eq!(first_items[0].id, "gpt-5.1-codex");
let next_cursor = first_cursor.ok_or_else(|| anyhow!("cursor for second page"))?;
let second_request = mcp
@@ -196,7 +169,7 @@ async fn list_models_pagination_works() -> Result<()> {
} = to_response::<ModelListResponse>(second_response)?;
assert_eq!(second_items.len(), 1);
assert_eq!(second_items[0].id, "gpt-5.1-codex");
assert_eq!(second_items[0].id, "gpt-5.1-codex-mini");
let third_cursor = second_cursor.ok_or_else(|| anyhow!("cursor for third page"))?;
let third_request = mcp
@@ -218,30 +191,8 @@ async fn list_models_pagination_works() -> Result<()> {
} = to_response::<ModelListResponse>(third_response)?;
assert_eq!(third_items.len(), 1);
assert_eq!(third_items[0].id, "gpt-5.1-codex-mini");
let fourth_cursor = third_cursor.ok_or_else(|| anyhow!("cursor for fourth page"))?;
let fourth_request = mcp
.send_list_models_request(ModelListParams {
limit: Some(1),
cursor: Some(fourth_cursor.clone()),
})
.await?;
let fourth_response: JSONRPCResponse = timeout(
DEFAULT_TIMEOUT,
mcp.read_stream_until_response_message(RequestId::Integer(fourth_request)),
)
.await??;
let ModelListResponse {
data: fourth_items,
next_cursor: fourth_cursor,
} = to_response::<ModelListResponse>(fourth_response)?;
assert_eq!(fourth_items.len(), 1);
assert_eq!(fourth_items[0].id, "gpt-5.1");
assert!(fourth_cursor.is_none());
assert_eq!(third_items[0].id, "gpt-5.1");
assert!(third_cursor.is_none());
Ok(())
}

View File

@@ -152,7 +152,6 @@ async fn get_account_rate_limits_returns_snapshot() -> Result<()> {
window_duration_mins: Some(1440),
resets_at: Some(secondary_reset_timestamp),
}),
credits: None,
},
};
assert_eq!(received, expected);

View File

@@ -1,17 +1,13 @@
use anyhow::Result;
use app_test_support::McpProcess;
use app_test_support::create_fake_rollout;
use app_test_support::create_mock_chat_completions_server;
use app_test_support::to_response;
use codex_app_server_protocol::JSONRPCResponse;
use codex_app_server_protocol::RequestId;
use codex_app_server_protocol::ThreadItem;
use codex_app_server_protocol::ThreadResumeParams;
use codex_app_server_protocol::ThreadResumeResponse;
use codex_app_server_protocol::ThreadStartParams;
use codex_app_server_protocol::ThreadStartResponse;
use codex_app_server_protocol::TurnStatus;
use codex_app_server_protocol::UserInput;
use codex_protocol::models::ContentItem;
use codex_protocol::models::ResponseItem;
use tempfile::TempDir;
@@ -31,7 +27,7 @@ async fn thread_resume_returns_original_thread() -> Result<()> {
// Start a thread.
let start_id = mcp
.send_thread_start_request(ThreadStartParams {
model: Some("gpt-5.1-codex-max".to_string()),
model: Some("gpt-5.1-codex".to_string()),
..Default::default()
})
.await?;
@@ -62,65 +58,6 @@ async fn thread_resume_returns_original_thread() -> Result<()> {
Ok(())
}
#[tokio::test]
async fn thread_resume_returns_rollout_history() -> Result<()> {
let server = create_mock_chat_completions_server(vec![]).await;
let codex_home = TempDir::new()?;
create_config_toml(codex_home.path(), &server.uri())?;
let preview = "Saved user message";
let conversation_id = create_fake_rollout(
codex_home.path(),
"2025-01-05T12-00-00",
"2025-01-05T12:00:00Z",
preview,
Some("mock_provider"),
)?;
let mut mcp = McpProcess::new(codex_home.path()).await?;
timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
let resume_id = mcp
.send_thread_resume_request(ThreadResumeParams {
thread_id: conversation_id.clone(),
..Default::default()
})
.await?;
let resume_resp: JSONRPCResponse = timeout(
DEFAULT_READ_TIMEOUT,
mcp.read_stream_until_response_message(RequestId::Integer(resume_id)),
)
.await??;
let ThreadResumeResponse { thread, .. } = to_response::<ThreadResumeResponse>(resume_resp)?;
assert_eq!(thread.id, conversation_id);
assert_eq!(thread.preview, preview);
assert_eq!(thread.model_provider, "mock_provider");
assert!(thread.path.is_absolute());
assert_eq!(
thread.turns.len(),
1,
"expected rollouts to include one turn"
);
let turn = &thread.turns[0];
assert_eq!(turn.status, TurnStatus::Completed);
assert_eq!(turn.items.len(), 1, "expected user message item");
match &turn.items[0] {
ThreadItem::UserMessage { content, .. } => {
assert_eq!(
content,
&vec![UserInput::Text {
text: preview.to_string()
}]
);
}
other => panic!("expected user message item, got {other:?}"),
}
Ok(())
}
#[tokio::test]
async fn thread_resume_prefers_path_over_thread_id() -> Result<()> {
let server = create_mock_chat_completions_server(vec![]).await;
@@ -132,7 +69,7 @@ async fn thread_resume_prefers_path_over_thread_id() -> Result<()> {
let start_id = mcp
.send_thread_start_request(ThreadStartParams {
model: Some("gpt-5.1-codex-max".to_string()),
model: Some("gpt-5.1-codex".to_string()),
..Default::default()
})
.await?;
@@ -177,7 +114,7 @@ async fn thread_resume_supports_history_and_overrides() -> Result<()> {
// Start a thread.
let start_id = mcp
.send_thread_start_request(ThreadStartParams {
model: Some("gpt-5.1-codex-max".to_string()),
model: Some("gpt-5.1-codex".to_string()),
..Default::default()
})
.await?;

View File

@@ -1,20 +1,14 @@
use anyhow::Result;
use app_test_support::McpProcess;
use app_test_support::create_apply_patch_sse_response;
use app_test_support::create_final_assistant_message_sse_response;
use app_test_support::create_mock_chat_completions_server;
use app_test_support::create_mock_chat_completions_server_unchecked;
use app_test_support::create_shell_sse_response;
use app_test_support::to_response;
use codex_app_server_protocol::ApprovalDecision;
use codex_app_server_protocol::CommandExecutionStatus;
use codex_app_server_protocol::FileChangeRequestApprovalResponse;
use codex_app_server_protocol::ItemCompletedNotification;
use codex_app_server_protocol::ItemStartedNotification;
use codex_app_server_protocol::JSONRPCNotification;
use codex_app_server_protocol::JSONRPCResponse;
use codex_app_server_protocol::PatchApplyStatus;
use codex_app_server_protocol::PatchChangeKind;
use codex_app_server_protocol::RequestId;
use codex_app_server_protocol::ServerRequest;
use codex_app_server_protocol::ThreadItem;
@@ -477,300 +471,6 @@ async fn turn_start_updates_sandbox_and_cwd_between_turns_v2() -> Result<()> {
Ok(())
}
#[tokio::test]
async fn turn_start_file_change_approval_v2() -> Result<()> {
skip_if_no_network!(Ok(()));
let tmp = TempDir::new()?;
let codex_home = tmp.path().join("codex_home");
std::fs::create_dir(&codex_home)?;
let workspace = tmp.path().join("workspace");
std::fs::create_dir(&workspace)?;
let patch = r#"*** Begin Patch
*** Add File: README.md
+new line
*** End Patch
"#;
let responses = vec![
create_apply_patch_sse_response(patch, "patch-call")?,
create_final_assistant_message_sse_response("patch applied")?,
];
let server = create_mock_chat_completions_server(responses).await;
create_config_toml(&codex_home, &server.uri(), "untrusted")?;
let mut mcp = McpProcess::new(&codex_home).await?;
timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
let start_req = mcp
.send_thread_start_request(ThreadStartParams {
model: Some("mock-model".to_string()),
cwd: Some(workspace.to_string_lossy().into_owned()),
..Default::default()
})
.await?;
let start_resp: JSONRPCResponse = timeout(
DEFAULT_READ_TIMEOUT,
mcp.read_stream_until_response_message(RequestId::Integer(start_req)),
)
.await??;
let ThreadStartResponse { thread, .. } = to_response::<ThreadStartResponse>(start_resp)?;
let turn_req = mcp
.send_turn_start_request(TurnStartParams {
thread_id: thread.id.clone(),
input: vec![V2UserInput::Text {
text: "apply patch".into(),
}],
cwd: Some(workspace.clone()),
..Default::default()
})
.await?;
let turn_resp: JSONRPCResponse = timeout(
DEFAULT_READ_TIMEOUT,
mcp.read_stream_until_response_message(RequestId::Integer(turn_req)),
)
.await??;
let TurnStartResponse { turn } = to_response::<TurnStartResponse>(turn_resp)?;
let started_file_change = timeout(DEFAULT_READ_TIMEOUT, async {
loop {
let started_notif = mcp
.read_stream_until_notification_message("item/started")
.await?;
let started: ItemStartedNotification =
serde_json::from_value(started_notif.params.clone().expect("item/started params"))?;
if let ThreadItem::FileChange { .. } = started.item {
return Ok::<ThreadItem, anyhow::Error>(started.item);
}
}
})
.await??;
let ThreadItem::FileChange {
ref id,
status,
ref changes,
} = started_file_change
else {
unreachable!("loop ensures we break on file change items");
};
assert_eq!(id, "patch-call");
assert_eq!(status, PatchApplyStatus::InProgress);
let started_changes = changes.clone();
let server_req = timeout(
DEFAULT_READ_TIMEOUT,
mcp.read_stream_until_request_message(),
)
.await??;
let ServerRequest::FileChangeRequestApproval { request_id, params } = server_req else {
panic!("expected FileChangeRequestApproval request")
};
assert_eq!(params.item_id, "patch-call");
assert_eq!(params.thread_id, thread.id);
assert_eq!(params.turn_id, turn.id);
let expected_readme_path = workspace.join("README.md");
let expected_readme_path = expected_readme_path.to_string_lossy().into_owned();
pretty_assertions::assert_eq!(
started_changes,
vec![codex_app_server_protocol::FileUpdateChange {
path: expected_readme_path.clone(),
kind: PatchChangeKind::Add,
diff: "new line\n".to_string(),
}]
);
mcp.send_response(
request_id,
serde_json::to_value(FileChangeRequestApprovalResponse {
decision: ApprovalDecision::Accept,
})?,
)
.await?;
let completed_file_change = timeout(DEFAULT_READ_TIMEOUT, async {
loop {
let completed_notif = mcp
.read_stream_until_notification_message("item/completed")
.await?;
let completed: ItemCompletedNotification = serde_json::from_value(
completed_notif
.params
.clone()
.expect("item/completed params"),
)?;
if let ThreadItem::FileChange { .. } = completed.item {
return Ok::<ThreadItem, anyhow::Error>(completed.item);
}
}
})
.await??;
let ThreadItem::FileChange { ref id, status, .. } = completed_file_change else {
unreachable!("loop ensures we break on file change items");
};
assert_eq!(id, "patch-call");
assert_eq!(status, PatchApplyStatus::Completed);
timeout(
DEFAULT_READ_TIMEOUT,
mcp.read_stream_until_notification_message("codex/event/task_complete"),
)
.await??;
let readme_contents = std::fs::read_to_string(expected_readme_path)?;
assert_eq!(readme_contents, "new line\n");
Ok(())
}
#[tokio::test]
async fn turn_start_file_change_approval_decline_v2() -> Result<()> {
skip_if_no_network!(Ok(()));
let tmp = TempDir::new()?;
let codex_home = tmp.path().join("codex_home");
std::fs::create_dir(&codex_home)?;
let workspace = tmp.path().join("workspace");
std::fs::create_dir(&workspace)?;
let patch = r#"*** Begin Patch
*** Add File: README.md
+new line
*** End Patch
"#;
let responses = vec![
create_apply_patch_sse_response(patch, "patch-call")?,
create_final_assistant_message_sse_response("patch declined")?,
];
let server = create_mock_chat_completions_server(responses).await;
create_config_toml(&codex_home, &server.uri(), "untrusted")?;
let mut mcp = McpProcess::new(&codex_home).await?;
timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
let start_req = mcp
.send_thread_start_request(ThreadStartParams {
model: Some("mock-model".to_string()),
cwd: Some(workspace.to_string_lossy().into_owned()),
..Default::default()
})
.await?;
let start_resp: JSONRPCResponse = timeout(
DEFAULT_READ_TIMEOUT,
mcp.read_stream_until_response_message(RequestId::Integer(start_req)),
)
.await??;
let ThreadStartResponse { thread, .. } = to_response::<ThreadStartResponse>(start_resp)?;
let turn_req = mcp
.send_turn_start_request(TurnStartParams {
thread_id: thread.id.clone(),
input: vec![V2UserInput::Text {
text: "apply patch".into(),
}],
cwd: Some(workspace.clone()),
..Default::default()
})
.await?;
let turn_resp: JSONRPCResponse = timeout(
DEFAULT_READ_TIMEOUT,
mcp.read_stream_until_response_message(RequestId::Integer(turn_req)),
)
.await??;
let TurnStartResponse { turn } = to_response::<TurnStartResponse>(turn_resp)?;
let started_file_change = timeout(DEFAULT_READ_TIMEOUT, async {
loop {
let started_notif = mcp
.read_stream_until_notification_message("item/started")
.await?;
let started: ItemStartedNotification =
serde_json::from_value(started_notif.params.clone().expect("item/started params"))?;
if let ThreadItem::FileChange { .. } = started.item {
return Ok::<ThreadItem, anyhow::Error>(started.item);
}
}
})
.await??;
let ThreadItem::FileChange {
ref id,
status,
ref changes,
} = started_file_change
else {
unreachable!("loop ensures we break on file change items");
};
assert_eq!(id, "patch-call");
assert_eq!(status, PatchApplyStatus::InProgress);
let started_changes = changes.clone();
let server_req = timeout(
DEFAULT_READ_TIMEOUT,
mcp.read_stream_until_request_message(),
)
.await??;
let ServerRequest::FileChangeRequestApproval { request_id, params } = server_req else {
panic!("expected FileChangeRequestApproval request")
};
assert_eq!(params.item_id, "patch-call");
assert_eq!(params.thread_id, thread.id);
assert_eq!(params.turn_id, turn.id);
let expected_readme_path = workspace.join("README.md");
let expected_readme_path_str = expected_readme_path.to_string_lossy().into_owned();
pretty_assertions::assert_eq!(
started_changes,
vec![codex_app_server_protocol::FileUpdateChange {
path: expected_readme_path_str.clone(),
kind: PatchChangeKind::Add,
diff: "new line\n".to_string(),
}]
);
mcp.send_response(
request_id,
serde_json::to_value(FileChangeRequestApprovalResponse {
decision: ApprovalDecision::Decline,
})?,
)
.await?;
let completed_file_change = timeout(DEFAULT_READ_TIMEOUT, async {
loop {
let completed_notif = mcp
.read_stream_until_notification_message("item/completed")
.await?;
let completed: ItemCompletedNotification = serde_json::from_value(
completed_notif
.params
.clone()
.expect("item/completed params"),
)?;
if let ThreadItem::FileChange { .. } = completed.item {
return Ok::<ThreadItem, anyhow::Error>(completed.item);
}
}
})
.await??;
let ThreadItem::FileChange { ref id, status, .. } = completed_file_change else {
unreachable!("loop ensures we break on file change items");
};
assert_eq!(id, "patch-call");
assert_eq!(status, PatchApplyStatus::Declined);
timeout(
DEFAULT_READ_TIMEOUT,
mcp.read_stream_until_notification_message("codex/event/task_complete"),
)
.await??;
assert!(
!expected_readme_path.exists(),
"declined patch should not be applied"
);
Ok(())
}
// Helper to create a config.toml pointing at the mock model server.
fn create_config_toml(
codex_home: &Path,

View File

@@ -1,5 +1,4 @@
use crate::types::CodeTaskDetailsResponse;
use crate::types::CreditStatusDetails;
use crate::types::PaginatedListTaskListItem;
use crate::types::RateLimitStatusPayload;
use crate::types::RateLimitWindowSnapshot;
@@ -7,7 +6,6 @@ use crate::types::TurnAttemptsSiblingTurnsResponse;
use anyhow::Result;
use codex_core::auth::CodexAuth;
use codex_core::default_client::get_codex_user_agent;
use codex_protocol::protocol::CreditsSnapshot;
use codex_protocol::protocol::RateLimitSnapshot;
use codex_protocol::protocol::RateLimitWindow;
use reqwest::header::AUTHORIZATION;
@@ -274,23 +272,19 @@ impl Client {
// rate limit helpers
fn rate_limit_snapshot_from_payload(payload: RateLimitStatusPayload) -> RateLimitSnapshot {
let rate_limit_details = payload
let Some(details) = payload
.rate_limit
.and_then(|inner| inner.map(|boxed| *boxed));
let (primary, secondary) = if let Some(details) = rate_limit_details {
(
Self::map_rate_limit_window(details.primary_window),
Self::map_rate_limit_window(details.secondary_window),
)
} else {
(None, None)
.and_then(|inner| inner.map(|boxed| *boxed))
else {
return RateLimitSnapshot {
primary: None,
secondary: None,
};
};
RateLimitSnapshot {
primary,
secondary,
credits: Self::map_credits(payload.credits),
primary: Self::map_rate_limit_window(details.primary_window),
secondary: Self::map_rate_limit_window(details.secondary_window),
}
}
@@ -312,19 +306,6 @@ impl Client {
})
}
fn map_credits(credits: Option<Option<Box<CreditStatusDetails>>>) -> Option<CreditsSnapshot> {
let details = match credits {
Some(Some(details)) => *details,
_ => return None,
};
Some(CreditsSnapshot {
has_credits: details.has_credits,
unlimited: details.unlimited,
balance: details.balance.and_then(|inner| inner),
})
}
fn window_minutes_from_seconds(seconds: i32) -> Option<i64> {
if seconds <= 0 {
return None;

View File

@@ -1,4 +1,3 @@
pub use codex_backend_openapi_models::models::CreditStatusDetails;
pub use codex_backend_openapi_models::models::PaginatedListTaskListItem;
pub use codex_backend_openapi_models::models::PlanType;
pub use codex_backend_openapi_models::models::RateLimitStatusDetails;

View File

@@ -100,9 +100,6 @@ enum Subcommand {
/// Resume a previous interactive session (picker by default; use --last to continue the most recent).
Resume(ResumeCommand),
/// Fork an existing session into a new conversation.
Fork(ForkCommand),
/// [EXPERIMENTAL] Browse tasks from Codex Cloud and apply changes locally.
#[clap(name = "cloud", alias = "cloud-tasks")]
Cloud(CloudTasksCli),
@@ -145,16 +142,6 @@ struct ResumeCommand {
config_overrides: TuiCli,
}
#[derive(Debug, Parser)]
struct ForkCommand {
/// Resume from a saved session name or rollout id, but start a new conversation.
#[arg(value_name = "ID|NAME")]
target: String,
#[clap(flatten)]
config_overrides: TuiCli,
}
#[derive(Debug, Parser)]
struct SandboxArgs {
#[command(subcommand)]
@@ -479,19 +466,6 @@ async fn cli_main(codex_linux_sandbox_exe: Option<PathBuf>) -> anyhow::Result<()
let exit_info = codex_tui::run_main(interactive, codex_linux_sandbox_exe).await?;
handle_app_exit(exit_info)?;
}
Some(Subcommand::Fork(ForkCommand {
target,
config_overrides,
})) => {
interactive = finalize_fork_interactive(
interactive,
root_config_overrides.clone(),
target,
config_overrides,
);
let exit_info = codex_tui::run_main(interactive, codex_linux_sandbox_exe).await?;
handle_app_exit(exit_info)?;
}
Some(Subcommand::Login(mut login_cli)) => {
prepend_config_flags(
&mut login_cli.config_overrides,
@@ -653,7 +627,6 @@ fn finalize_resume_interactive(
interactive.resume_last = last;
interactive.resume_session_id = resume_session_id;
interactive.resume_show_all = show_all;
interactive.fork_source = None;
// Merge resume-scoped flags and overrides with highest precedence.
merge_resume_cli_flags(&mut interactive, resume_cli);
@@ -664,21 +637,6 @@ fn finalize_resume_interactive(
interactive
}
fn finalize_fork_interactive(
mut interactive: TuiCli,
root_config_overrides: CliConfigOverrides,
target: String,
fork_cli: TuiCli,
) -> TuiCli {
interactive.resume_picker = false;
interactive.resume_last = false;
interactive.resume_session_id = None;
interactive.fork_source = Some(target);
merge_resume_cli_flags(&mut interactive, fork_cli);
prepend_config_flags(&mut interactive.config_overrides, root_config_overrides);
interactive
}
/// Merge flags provided to `codex resume` so they take precedence over any
/// root-level flags. Only overrides fields explicitly set on the resume-scoped
/// CLI. Also appends `-c key=value` overrides with highest precedence.
@@ -769,26 +727,6 @@ mod tests {
)
}
fn fork_from_args(args: &[&str]) -> TuiCli {
let cli = MultitoolCli::try_parse_from(args).expect("parse");
let MultitoolCli {
interactive,
config_overrides: root_overrides,
subcommand,
feature_toggles: _,
} = cli;
let Subcommand::Fork(ForkCommand {
target,
config_overrides,
}) = subcommand.expect("fork present")
else {
unreachable!()
};
finalize_fork_interactive(interactive, root_overrides, target, config_overrides)
}
fn sample_exit_info(conversation: Option<&str>) -> AppExitInfo {
let token_usage = TokenUsage {
output_tokens: 2,
@@ -881,15 +819,6 @@ mod tests {
assert!(interactive.resume_show_all);
}
#[test]
fn fork_sets_target_and_disables_resume_controls() {
let interactive = fork_from_args(["codex", "fork", "saved"].as_ref());
assert_eq!(interactive.fork_source.as_deref(), Some("saved"));
assert!(!interactive.resume_picker);
assert!(!interactive.resume_last);
assert!(interactive.resume_session_id.is_none());
}
#[test]
fn resume_merges_option_flags_and_full_auto() {
let interactive = finalize_from_args(

View File

@@ -15,12 +15,13 @@ pub fn create_config_summary_entries(config: &Config) -> Vec<(&'static str, Stri
if config.model_provider.wire_api == WireApi::Responses
&& config.model_family.supports_reasoning_summaries
{
let reasoning_effort = config
.model_reasoning_effort
.or(config.model_family.default_reasoning_effort)
.map(|effort| effort.to_string())
.unwrap_or_else(|| "none".to_string());
entries.push(("reasoning effort", reasoning_effort));
entries.push((
"reasoning effort",
config
.model_reasoning_effort
.map(|effort| effort.to_string())
.unwrap_or_else(|| "none".to_string()),
));
entries.push((
"reasoning summaries",
config.model_reasoning_summary.to_string(),

View File

@@ -4,10 +4,6 @@ use codex_app_server_protocol::AuthMode;
use codex_core::protocol_config_types::ReasoningEffort;
use once_cell::sync::Lazy;
pub const HIDE_GPT5_1_MIGRATION_PROMPT_CONFIG: &str = "hide_gpt5_1_migration_prompt";
pub const HIDE_GPT_5_1_CODEX_MAX_MIGRATION_PROMPT_CONFIG: &str =
"hide_gpt-5.1-codex-max_migration_prompt";
/// A reasoning effort option that can be surfaced for a model.
#[derive(Debug, Clone, Copy)]
pub struct ReasoningEffortPreset {
@@ -21,7 +17,6 @@ pub struct ReasoningEffortPreset {
pub struct ModelUpgrade {
pub id: &'static str,
pub reasoning_effort_mapping: Option<HashMap<ReasoningEffort, ReasoningEffort>>,
pub migration_config_key: &'static str,
}
/// Metadata describing a Codex-supported model.
@@ -43,40 +38,10 @@ pub struct ModelPreset {
pub is_default: bool,
/// recommended upgrade model
pub upgrade: Option<ModelUpgrade>,
/// Whether this preset should appear in the picker UI.
pub show_in_picker: bool,
}
static PRESETS: Lazy<Vec<ModelPreset>> = Lazy::new(|| {
vec![
ModelPreset {
id: "gpt-5.1-codex-max",
model: "gpt-5.1-codex-max",
display_name: "gpt-5.1-codex-max",
description: "Latest Codex-optimized flagship for deep and fast reasoning.",
default_reasoning_effort: ReasoningEffort::Medium,
supported_reasoning_efforts: &[
ReasoningEffortPreset {
effort: ReasoningEffort::Low,
description: "Fast responses with lighter reasoning",
},
ReasoningEffortPreset {
effort: ReasoningEffort::Medium,
description: "Balances speed and reasoning depth for everyday tasks",
},
ReasoningEffortPreset {
effort: ReasoningEffort::High,
description: "Maximizes reasoning depth for complex problems",
},
ReasoningEffortPreset {
effort: ReasoningEffort::XHigh,
description: "Extra high reasoning depth for complex problems",
},
],
is_default: true,
upgrade: None,
show_in_picker: true,
},
ModelPreset {
id: "gpt-5.1-codex",
model: "gpt-5.1-codex",
@@ -97,13 +62,8 @@ static PRESETS: Lazy<Vec<ModelPreset>> = Lazy::new(|| {
description: "Maximizes reasoning depth for complex or ambiguous problems",
},
],
is_default: false,
upgrade: Some(ModelUpgrade {
id: "gpt-5.1-codex-max",
reasoning_effort_mapping: None,
migration_config_key: HIDE_GPT_5_1_CODEX_MAX_MIGRATION_PROMPT_CONFIG,
}),
show_in_picker: true,
is_default: true,
upgrade: None,
},
ModelPreset {
id: "gpt-5.1-codex-mini",
@@ -122,12 +82,7 @@ static PRESETS: Lazy<Vec<ModelPreset>> = Lazy::new(|| {
},
],
is_default: false,
upgrade: Some(ModelUpgrade {
id: "gpt-5.1-codex-max",
reasoning_effort_mapping: None,
migration_config_key: HIDE_GPT_5_1_CODEX_MAX_MIGRATION_PROMPT_CONFIG,
}),
show_in_picker: true,
upgrade: None,
},
ModelPreset {
id: "gpt-5.1",
@@ -150,12 +105,7 @@ static PRESETS: Lazy<Vec<ModelPreset>> = Lazy::new(|| {
},
],
is_default: false,
upgrade: Some(ModelUpgrade {
id: "gpt-5.1-codex-max",
reasoning_effort_mapping: None,
migration_config_key: HIDE_GPT_5_1_CODEX_MAX_MIGRATION_PROMPT_CONFIG,
}),
show_in_picker: true,
upgrade: None,
},
// Deprecated models.
ModelPreset {
@@ -180,11 +130,9 @@ static PRESETS: Lazy<Vec<ModelPreset>> = Lazy::new(|| {
],
is_default: false,
upgrade: Some(ModelUpgrade {
id: "gpt-5.1-codex-max",
id: "gpt-5.1-codex",
reasoning_effort_mapping: None,
migration_config_key: HIDE_GPT_5_1_CODEX_MAX_MIGRATION_PROMPT_CONFIG,
}),
show_in_picker: false,
},
ModelPreset {
id: "gpt-5-codex-mini",
@@ -206,9 +154,7 @@ static PRESETS: Lazy<Vec<ModelPreset>> = Lazy::new(|| {
upgrade: Some(ModelUpgrade {
id: "gpt-5.1-codex-mini",
reasoning_effort_mapping: None,
migration_config_key: HIDE_GPT5_1_MIGRATION_PROMPT_CONFIG,
}),
show_in_picker: false,
},
ModelPreset {
id: "gpt-5",
@@ -236,22 +182,21 @@ static PRESETS: Lazy<Vec<ModelPreset>> = Lazy::new(|| {
],
is_default: false,
upgrade: Some(ModelUpgrade {
id: "gpt-5.1-codex-max",
reasoning_effort_mapping: None,
migration_config_key: HIDE_GPT_5_1_CODEX_MAX_MIGRATION_PROMPT_CONFIG,
id: "gpt-5.1",
reasoning_effort_mapping: Some(HashMap::from([(
ReasoningEffort::Minimal,
ReasoningEffort::Low,
)])),
}),
show_in_picker: false,
},
]
});
pub fn builtin_model_presets(auth_mode: Option<AuthMode>) -> Vec<ModelPreset> {
pub fn builtin_model_presets(_auth_mode: Option<AuthMode>) -> Vec<ModelPreset> {
// leave auth mode for later use
PRESETS
.iter()
.filter(|preset| match auth_mode {
Some(AuthMode::ApiKey) => preset.show_in_picker && preset.id != "gpt-5.1-codex-max",
_ => preset.show_in_picker,
})
.filter(|preset| preset.upgrade.is_none())
.cloned()
.collect()
}
@@ -263,21 +208,10 @@ pub fn all_model_presets() -> &'static Vec<ModelPreset> {
#[cfg(test)]
mod tests {
use super::*;
use codex_app_server_protocol::AuthMode;
#[test]
fn only_one_default_model_is_configured() {
let default_models = PRESETS.iter().filter(|preset| preset.is_default).count();
assert!(default_models == 1);
}
#[test]
fn gpt_5_1_codex_max_hidden_for_api_key_auth() {
let presets = builtin_model_presets(Some(AuthMode::ApiKey));
assert!(
presets
.iter()
.all(|preset| preset.id != "gpt-5.1-codex-max")
);
}
}

View File

@@ -22,7 +22,6 @@ chrono = { workspace = true, features = ["serde"] }
codex-app-server-protocol = { workspace = true }
codex-apply-patch = { workspace = true }
codex-async-utils = { workspace = true }
codex-execpolicy = { workspace = true }
codex-file-search = { workspace = true }
codex-git = { workspace = true }
codex-keyring-store = { workspace = true }

View File

@@ -1,117 +0,0 @@
You are Codex, based on GPT-5. You are running as a coding agent in the Codex CLI on a user's computer.
## General
- When searching for text or files, prefer using `rg` or `rg --files` respectively because `rg` is much faster than alternatives like `grep`. (If the `rg` command is not found, then use alternatives.)
## Editing constraints
- Default to ASCII when editing or creating files. Only introduce non-ASCII or other Unicode characters when there is a clear justification and the file already uses them.
- Add succinct code comments that explain what is going on if code is not self-explanatory. You should not add comments like "Assigns the value to the variable", but a brief comment might be useful ahead of a complex code block that the user would otherwise have to spend time parsing out. Usage of these comments should be rare.
- Try to use apply_patch for single file edits, but it is fine to explore other options to make the edit if it does not work well. Do not use apply_patch for changes that are auto-generated (i.e. generating package.json or running a lint or format command like gofmt) or when scripting is more efficient (such as search and replacing a string across a codebase).
- You may be in a dirty git worktree.
* NEVER revert existing changes you did not make unless explicitly requested, since these changes were made by the user.
* If asked to make a commit or code edits and there are unrelated changes to your work or changes that you didn't make in those files, don't revert those changes.
* If the changes are in files you've touched recently, you should read carefully and understand how you can work with the changes rather than reverting them.
* If the changes are in unrelated files, just ignore them and don't revert them.
- Do not amend a commit unless explicitly requested to do so.
- While you are working, you might notice unexpected changes that you didn't make. If this happens, STOP IMMEDIATELY and ask the user how they would like to proceed.
- **NEVER** use destructive commands like `git reset --hard` or `git checkout --` unless specifically requested or approved by the user.
## Plan tool
When using the planning tool:
- Skip using the planning tool for straightforward tasks (roughly the easiest 25%).
- Do not make single-step plans.
- When you made a plan, update it after having performed one of the sub-tasks that you shared on the plan.
## Codex CLI harness, sandboxing, and approvals
The Codex CLI harness supports several different configurations for sandboxing and escalation approvals that the user can choose from.
Filesystem sandboxing defines which files can be read or written. The options for `sandbox_mode` are:
- **read-only**: The sandbox only permits reading files.
- **workspace-write**: The sandbox permits reading files, and editing files in `cwd` and `writable_roots`. Editing files in other directories requires approval.
- **danger-full-access**: No filesystem sandboxing - all commands are permitted.
Network sandboxing defines whether network can be accessed without approval. Options for `network_access` are:
- **restricted**: Requires approval
- **enabled**: No approval needed
Approvals are your mechanism to get user consent to run shell commands without the sandbox. Possible configuration options for `approval_policy` are
- **untrusted**: The harness will escalate most commands for user approval, apart from a limited allowlist of safe "read" commands.
- **on-failure**: The harness will allow all commands to run in the sandbox (if enabled), and failures will be escalated to the user for approval to run again without the sandbox.
- **on-request**: Commands will be run in the sandbox by default, and you can specify in your tool call if you want to escalate a command to run without sandboxing. (Note that this mode is not always available. If it is, you'll see parameters for it in the `shell` command description.)
- **never**: This is a non-interactive mode where you may NEVER ask the user for approval to run commands. Instead, you must always persist and work around constraints to solve the task for the user. You MUST do your utmost best to finish the task and validate your work before yielding. If this mode is paired with `danger-full-access`, take advantage of it to deliver the best outcome for the user. Further, in this mode, your default testing philosophy is overridden: Even if you don't see local patterns for testing, you may add tests and scripts to validate your work. Just remove them before yielding.
When you are running with `approval_policy == on-request`, and sandboxing enabled, here are scenarios where you'll need to request approval:
- You need to run a command that writes to a directory that requires it (e.g. running tests that write to /var)
- You need to run a GUI app (e.g., open/xdg-open/osascript) to open browsers or files.
- You are running sandboxed and need to run a command that requires network access (e.g. installing packages)
- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval. ALWAYS proceed to use the `with_escalated_permissions` and `justification` parameters - do not message the user before requesting approval for the command.
- You are about to take a potentially destructive action such as an `rm` or `git reset` that the user did not explicitly ask for
- (for all of these, you should weigh alternative paths that do not require approval)
When `sandbox_mode` is set to read-only, you'll need to request approval for any command that isn't a read.
You will be told what filesystem sandboxing, network sandboxing, and approval mode are active in a developer or user message. If you are not told about this, assume that you are running with workspace-write, network sandboxing enabled, and approval on-failure.
Although they introduce friction to the user because your work is paused until the user responds, you should leverage them when necessary to accomplish important work. If the completing the task requires escalated permissions, Do not let these settings or the sandbox deter you from attempting to accomplish the user's task unless it is set to "never", in which case never ask for approvals.
When requesting approval to execute a command that will require escalated privileges:
- Provide the `with_escalated_permissions` parameter with the boolean value true
- Include a short, 1 sentence explanation for why you need to enable `with_escalated_permissions` in the justification parameter
## Special user requests
- If the user makes a simple request (such as asking for the time) which you can fulfill by running a terminal command (such as `date`), you should do so.
- If the user asks for a "review", default to a code review mindset: prioritise identifying bugs, risks, behavioural regressions, and missing tests. Findings must be the primary focus of the response - keep summaries or overviews brief and only after enumerating the issues. Present findings first (ordered by severity with file/line references), follow with open questions or assumptions, and offer a change-summary only as a secondary detail. If no findings are discovered, state that explicitly and mention any residual risks or testing gaps.
## Frontend tasks
When doing frontend design tasks, avoid collapsing into "AI slop" or safe, average-looking layouts.
Aim for interfaces that feel intentional, bold, and a bit surprising.
- Typography: Use expressive, purposeful fonts and avoid default stacks (Inter, Roboto, Arial, system).
- Color & Look: Choose a clear visual direction; define CSS variables; avoid purple-on-white defaults. No purple bias or dark mode bias.
- Motion: Use a few meaningful animations (page-load, staggered reveals) instead of generic micro-motions.
- Background: Don't rely on flat, single-color backgrounds; use gradients, shapes, or subtle patterns to build atmosphere.
- Overall: Avoid boilerplate layouts and interchangeable UI patterns. Vary themes, type families, and visual languages across outputs.
- Ensure the page loads properly on both desktop and mobile
Exception: If working within an existing website or design system, preserve the established patterns, structure, and visual language.
## Presenting your work and final message
You are producing plain text that will later be styled by the CLI. Follow these rules exactly. Formatting should make results easy to scan, but not feel mechanical. Use judgment to decide how much structure adds value.
- Default: be very concise; friendly coding teammate tone.
- Ask only when needed; suggest ideas; mirror the user's style.
- For substantial work, summarize clearly; follow finalanswer formatting.
- Skip heavy formatting for simple confirmations.
- Don't dump large files you've written; reference paths only.
- No "save/copy this file" - User is on the same machine.
- Offer logical next steps (tests, commits, build) briefly; add verify steps if you couldn't do something.
- For code changes:
* Lead with a quick explanation of the change, and then give more details on the context covering where and why a change was made. Do not start this explanation with "summary", just jump right in.
* If there are natural next steps the user may want to take, suggest them at the end of your response. Do not make suggestions if there are no natural next steps.
* When suggesting multiple options, use numeric lists for the suggestions so the user can quickly respond with a single number.
- The user does not command execution outputs. When asked to show the output of a command (e.g. `git show`), relay the important details in your answer or summarize the key lines so the user understands the result.
### Final answer structure and style guidelines
- Plain text; CLI handles styling. Use structure only when it helps scanability.
- Headers: optional; short Title Case (1-3 words) wrapped in **…**; no blank line before the first bullet; add only if they truly help.
- Bullets: use - ; merge related points; keep to one line when possible; 46 per list ordered by importance; keep phrasing consistent.
- Monospace: backticks for commands/paths/env vars/code ids and inline examples; use for literal keyword bullets; never combine with **.
- Code samples or multi-line snippets should be wrapped in fenced code blocks; include an info string as often as possible.
- Structure: group related bullets; order sections general → specific → supporting; for subsections, start with a bolded keyword bullet, then items; match complexity to the task.
- Tone: collaborative, concise, factual; present tense, active voice; selfcontained; no "above/below"; parallel wording.
- Don'ts: no nested bullets/hierarchies; no ANSI codes; don't cram unrelated keywords; keep keyword lists short—wrap/reformat if long; avoid naming formatting styles in answers.
- Adaptation: code explanations → precise, structured with code refs; simple tasks → lead with outcome; big changes → logical walkthrough + rationale + next actions; casual one-offs → plain sentences, no headers/bullets.
- File References: When referencing files in your response follow the below rules:
* Use inline code to make file paths clickable.
* Each reference should have a stand alone path. Even if it's the same file.
* Accepted: absolute, workspacerelative, a/ or b/ diff prefixes, or bare filename/suffix.
* Optionally include line/column (1based): :line[:column] or #Lline[Ccolumn] (column defaults to 1).
* Do not use URIs like file://, vscode://, or https://.
* Do not provide range of lines
* Examples: src/app.ts, src/app.ts:42, b/server/index.js#L10, C:\repo\project\main.rs:12:5

View File

@@ -56,7 +56,6 @@ use crate::model_family::ModelFamily;
use crate::model_provider_info::ModelProviderInfo;
use crate::model_provider_info::WireApi;
use crate::openai_model_info::get_model_info;
use crate::protocol::CreditsSnapshot;
use crate::protocol::RateLimitSnapshot;
use crate::protocol::RateLimitWindow;
use crate::protocol::TokenUsage;
@@ -727,13 +726,7 @@ fn parse_rate_limit_snapshot(headers: &HeaderMap) -> Option<RateLimitSnapshot> {
"x-codex-secondary-reset-at",
);
let credits = parse_credits_snapshot(headers);
Some(RateLimitSnapshot {
primary,
secondary,
credits,
})
Some(RateLimitSnapshot { primary, secondary })
}
fn parse_rate_limit_window(
@@ -760,20 +753,6 @@ fn parse_rate_limit_window(
})
}
fn parse_credits_snapshot(headers: &HeaderMap) -> Option<CreditsSnapshot> {
let has_credits = parse_header_bool(headers, "x-codex-credits-has-credits")?;
let unlimited = parse_header_bool(headers, "x-codex-credits-unlimited")?;
let balance = parse_header_str(headers, "x-codex-credits-balance")
.map(str::trim)
.filter(|value| !value.is_empty())
.map(std::string::ToString::to_string);
Some(CreditsSnapshot {
has_credits,
unlimited,
balance,
})
}
fn parse_header_f64(headers: &HeaderMap, name: &str) -> Option<f64> {
parse_header_str(headers, name)?
.parse::<f64>()
@@ -785,17 +764,6 @@ fn parse_header_i64(headers: &HeaderMap, name: &str) -> Option<i64> {
parse_header_str(headers, name)?.parse::<i64>().ok()
}
fn parse_header_bool(headers: &HeaderMap, name: &str) -> Option<bool> {
let raw = parse_header_str(headers, name)?;
if raw.eq_ignore_ascii_case("true") || raw == "1" {
Some(true)
} else if raw.eq_ignore_ascii_case("false") || raw == "0" {
Some(false)
} else {
None
}
}
fn parse_header_str<'a>(headers: &'a HeaderMap, name: &str) -> Option<&'a str> {
headers.get(name)?.to_str().ok()
}

View File

@@ -136,7 +136,7 @@ fn reserialize_shell_outputs(items: &mut [ResponseItem]) {
}
fn is_shell_tool_name(name: &str) -> bool {
matches!(name, "shell" | "container.exec")
matches!(name, "shell" | "container.exec" | "shell_command")
}
#[derive(Deserialize)]
@@ -165,9 +165,11 @@ fn build_structured_output(parsed: &ExecOutputJson) -> String {
));
let mut output = parsed.output.clone();
if let Some((stripped, total_lines)) = strip_total_output_header(&parsed.output) {
if let Some(total_lines) = extract_total_output_lines(&parsed.output) {
sections.push(format!("Total output lines: {total_lines}"));
output = stripped.to_string();
if let Some(stripped) = strip_total_output_header(&output) {
output = stripped.to_string();
}
}
sections.push("Output:".to_string());
@@ -176,12 +178,19 @@ fn build_structured_output(parsed: &ExecOutputJson) -> String {
sections.join("\n")
}
fn strip_total_output_header(output: &str) -> Option<(&str, u32)> {
fn extract_total_output_lines(output: &str) -> Option<u32> {
let marker_start = output.find("[... omitted ")?;
let marker = &output[marker_start..];
let (_, after_of) = marker.split_once(" of ")?;
let (total_segment, _) = after_of.split_once(' ')?;
total_segment.parse::<u32>().ok()
}
fn strip_total_output_header(output: &str) -> Option<&str> {
let after_prefix = output.strip_prefix("Total output lines: ")?;
let (total_segment, remainder) = after_prefix.split_once('\n')?;
let total_lines = total_segment.parse::<u32>().ok()?;
let (_, remainder) = after_prefix.split_once('\n')?;
let remainder = remainder.strip_prefix('\n').unwrap_or(remainder);
Some((remainder, total_lines))
Some(remainder)
}
#[derive(Debug)]
@@ -422,7 +431,7 @@ mod tests {
expects_apply_patch_instructions: false,
},
InstructionsTestCase {
slug: "gpt-5.1-codex-max",
slug: "gpt-5.1-codex",
expects_apply_patch_instructions: false,
},
];

View File

@@ -7,9 +7,6 @@ use std::sync::atomic::AtomicU64;
use crate::AuthManager;
use crate::client_common::REVIEW_PROMPT;
use crate::compact;
use crate::compact::run_inline_auto_compact_task;
use crate::compact::should_use_remote_compact_task;
use crate::compact_remote::run_inline_remote_auto_compact_task;
use crate::features::Feature;
use crate::function_tool::FunctionCallError;
use crate::parse_command::parse_command;
@@ -100,8 +97,6 @@ use crate::protocol::TurnDiffEvent;
use crate::protocol::WarningEvent;
use crate::rollout::RolloutRecorder;
use crate::rollout::RolloutRecorderParams;
use crate::saved_sessions::build_saved_session_entry;
use crate::saved_sessions::upsert_saved_session;
use crate::shell;
use crate::state::ActiveTurn;
use crate::state::SessionServices;
@@ -123,7 +118,6 @@ use crate::user_instructions::UserInstructions;
use crate::user_notification::UserNotification;
use crate::util::backoff;
use codex_async_utils::OrCancelExt;
use codex_execpolicy::Policy as ExecPolicy;
use codex_otel::otel_event_manager::OtelEventManager;
use codex_protocol::config_types::ReasoningEffort as ReasoningEffortConfig;
use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
@@ -136,8 +130,6 @@ use codex_protocol::user_input::UserInput;
use codex_utils_readiness::Readiness;
use codex_utils_readiness::ReadinessFlag;
use codex_utils_tokenizer::warm_model_cache;
use reqwest::StatusCode;
use std::path::Path;
/// The high-level interface to the Codex system.
/// It operates as a queue pair where you send submissions and receive events.
@@ -153,7 +145,6 @@ pub struct Codex {
pub struct CodexSpawnOk {
pub codex: Codex,
pub conversation_id: ConversationId,
pub(crate) session: Arc<Session>,
}
pub(crate) const INITIAL_SUBMIT_ID: &str = "";
@@ -172,10 +163,6 @@ impl Codex {
let user_instructions = get_user_instructions(&config).await;
let exec_policy = crate::exec_policy::exec_policy_for(&config.features, &config.codex_home)
.await
.map_err(|err| CodexErr::Fatal(format!("failed to load execpolicy: {err}")))?;
let config = Arc::new(config);
let session_configuration = SessionConfiguration {
@@ -192,7 +179,6 @@ impl Codex {
cwd: config.cwd.clone(),
original_config_do_not_use: Arc::clone(&config),
features: config.features.clone(),
exec_policy,
session_source,
};
@@ -214,8 +200,7 @@ impl Codex {
let conversation_id = session.conversation_id;
// This task will run until Op::Shutdown is received.
let submission_session = Arc::clone(&session);
tokio::spawn(submission_loop(submission_session, config, rx_sub));
tokio::spawn(submission_loop(session, config, rx_sub));
let codex = Codex {
next_id: AtomicU64::new(0),
tx_sub,
@@ -225,7 +210,6 @@ impl Codex {
Ok(CodexSpawnOk {
codex,
conversation_id,
session,
})
}
@@ -292,7 +276,6 @@ pub(crate) struct TurnContext {
pub(crate) final_output_json_schema: Option<Value>,
pub(crate) codex_linux_sandbox_exe: Option<PathBuf>,
pub(crate) tool_call_gate: Arc<ReadinessFlag>,
pub(crate) exec_policy: Arc<ExecPolicy>,
pub(crate) truncation_policy: TruncationPolicy,
}
@@ -349,8 +332,6 @@ pub(crate) struct SessionConfiguration {
/// Set of feature flags for this session
features: Features,
/// Execpolicy policy, applied only when enabled by feature flag.
exec_policy: Arc<ExecPolicy>,
// TODO(pakrym): Remove config from here
original_config_do_not_use: Arc<Config>,
@@ -451,7 +432,6 @@ impl Session {
final_output_json_schema: None,
codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(),
tool_call_gate: Arc::new(ReadinessFlag::new()),
exec_policy: session_configuration.exec_policy.clone(),
truncation_policy: TruncationPolicy::new(&per_turn_config),
}
}
@@ -639,68 +619,18 @@ impl Session {
}
/// Ensure all rollout writes are durably flushed.
pub(crate) async fn flush_rollout(&self) -> std::io::Result<()> {
pub(crate) async fn flush_rollout(&self) {
let recorder = {
let guard = self.services.rollout.lock().await;
guard.clone()
};
if let Some(rec) = recorder {
rec.flush().await
} else {
Ok(())
if let Some(rec) = recorder
&& let Err(e) = rec.flush().await
{
warn!("failed to flush rollout recorder: {e}");
}
}
pub(crate) async fn set_session_name(&self, name: Option<String>) -> std::io::Result<()> {
let recorder = {
let guard = self.services.rollout.lock().await;
guard.clone()
};
if let Some(rec) = recorder {
rec.set_session_name(name).await
} else {
Ok(())
}
}
pub(crate) async fn rollout_path(&self) -> CodexResult<PathBuf> {
let guard = self.services.rollout.lock().await;
let Some(rec) = guard.as_ref() else {
return Err(CodexErr::Fatal(
"Rollout recorder is not initialized; cannot save session.".to_string(),
));
};
Ok(rec.rollout_path.clone())
}
pub(crate) async fn model(&self) -> String {
let state = self.state.lock().await;
state.session_configuration.model.clone()
}
pub(crate) async fn save_session(
&self,
codex_home: &Path,
name: &str,
) -> CodexResult<crate::SavedSessionEntry> {
let trimmed = name.trim();
if trimmed.is_empty() {
return Err(CodexErr::Fatal("Usage: /save <name>".to_string()));
}
let rollout_path = self.rollout_path().await?;
self.flush_rollout()
.await
.map_err(|e| CodexErr::Fatal(format!("failed to flush rollout recorder: {e}")))?;
self.set_session_name(Some(trimmed.to_string()))
.await
.map_err(|e| CodexErr::Fatal(format!("failed to write session name: {e}")))?;
let entry =
build_saved_session_entry(trimmed.to_string(), rollout_path, self.model().await)
.await?;
upsert_saved_session(codex_home, entry.clone()).await?;
Ok(entry)
}
fn next_internal_sub_id(&self) -> String {
let id = self
.next_internal_sub_id
@@ -716,9 +646,7 @@ impl Session {
let items = self.build_initial_context(&turn_context);
self.record_conversation_items(&turn_context, &items).await;
// Ensure initial items are visible to immediate readers (e.g., tests, forks).
if let Err(e) = self.flush_rollout().await {
warn!("failed to flush rollout recorder: {e}");
}
self.flush_rollout().await;
}
InitialHistory::Resumed(_) | InitialHistory::Forked(_) => {
let rollout_items = conversation_history.get_rollout_items();
@@ -762,18 +690,10 @@ impl Session {
// If persisting, persist all rollout items as-is (recorder filters)
if persist && !rollout_items.is_empty() {
// Drop legacy SessionMeta lines from the source rollout so the forked
// session only contains its own SessionMeta written by the recorder.
let filtered =
InitialHistory::Forked(rollout_items.clone()).without_session_meta();
if !filtered.is_empty() {
self.persist_rollout_items(&filtered).await;
}
self.persist_rollout_items(&rollout_items).await;
}
// Flush after seeding history and any persisted rollout copy.
if let Err(e) = self.flush_rollout().await {
warn!("failed to flush rollout recorder: {e}");
}
self.flush_rollout().await;
}
}
}
@@ -986,7 +906,6 @@ impl Session {
let event = EventMsg::ApplyPatchApprovalRequest(ApplyPatchApprovalRequestEvent {
call_id,
turn_id: turn_context.sub_id.clone(),
changes,
reason,
grant_root,
@@ -1163,14 +1082,11 @@ impl Session {
self.send_token_count_event(turn_context).await;
}
pub(crate) async fn recompute_token_usage(&self, turn_context: &TurnContext) {
let Some(estimated_total_tokens) = self
.clone_history()
.await
.estimate_token_count(turn_context)
else {
return;
};
pub(crate) async fn override_last_token_usage_estimate(
&self,
turn_context: &TurnContext,
estimated_total_tokens: i64,
) {
{
let mut state = self.state.lock().await;
let mut info = state.token_info().unwrap_or(TokenUsageInfo {
@@ -1413,10 +1329,7 @@ impl Session {
}
async fn submission_loop(sess: Arc<Session>, config: Arc<Config>, rx_sub: Receiver<Submission>) {
// Seed with context in case there is an OverrideTurnContext first.
let mut previous_context: Option<Arc<TurnContext>> =
Some(sess.new_turn(SessionSettingsUpdate::default()).await);
let mut previous_context: Option<Arc<TurnContext>> = None;
// To break out of this loop, send Op::Shutdown.
while let Ok(sub) = rx_sub.recv().await {
debug!(?sub, "Submission");
@@ -1492,9 +1405,6 @@ async fn submission_loop(sess: Arc<Session>, config: Arc<Config>, rx_sub: Receiv
Op::Review { review_request } => {
handlers::review(&sess, &config, sub.id.clone(), review_request).await;
}
Op::SaveSession { name } => {
handlers::save_session(&sess, &config, sub.id.clone(), name).await;
}
_ => {} // Ignore unknown ops; enum is non_exhaustive to allow extensions.
}
}
@@ -1522,7 +1432,6 @@ mod handlers {
use codex_protocol::protocol::Op;
use codex_protocol::protocol::ReviewDecision;
use codex_protocol::protocol::ReviewRequest;
use codex_protocol::protocol::SaveSessionResponseEvent;
use codex_protocol::protocol::TurnAbortReason;
use codex_protocol::user_input::UserInput;
@@ -1744,38 +1653,6 @@ mod handlers {
.await;
}
pub async fn save_session(
sess: &Arc<Session>,
config: &Arc<crate::config::Config>,
sub_id: String,
name: String,
) {
match sess.save_session(&config.codex_home, &name).await {
Ok(entry) => {
let event = Event {
id: sub_id,
msg: EventMsg::SaveSessionResponse(SaveSessionResponseEvent {
name: entry.name,
rollout_path: entry.rollout_path,
conversation_id: entry.conversation_id,
}),
};
sess.send_event_raw(event).await;
}
Err(err) => {
let message = format!("Failed to save session '{name}': {err}");
let event = Event {
id: sub_id,
msg: EventMsg::Error(ErrorEvent {
message,
http_status_code: None,
}),
};
sess.send_event_raw(event).await;
}
}
}
pub async fn shutdown(sess: &Arc<Session>, sub_id: String) -> bool {
sess.abort_all_tasks(TurnAbortReason::Interrupted).await;
info!("Shutting down Codex instance");
@@ -1899,7 +1776,6 @@ async fn spawn_review_thread(
final_output_json_schema: None,
codex_linux_sandbox_exe: parent_turn_context.codex_linux_sandbox_exe.clone(),
tool_call_gate: Arc::new(ReadinessFlag::new()),
exec_policy: parent_turn_context.exec_policy.clone(),
truncation_policy: TruncationPolicy::new(&per_turn_config),
};
@@ -2014,12 +1890,7 @@ pub(crate) async fn run_task(
// as long as compaction works well in getting us way below the token limit, we shouldn't worry about being in an infinite loop.
if token_limit_reached {
if should_use_remote_compact_task(&sess).await {
run_inline_remote_auto_compact_task(sess.clone(), turn_context.clone())
.await;
} else {
run_inline_auto_compact_task(sess.clone(), turn_context.clone()).await;
}
compact::run_inline_auto_compact_task(sess.clone(), turn_context.clone()).await;
continue;
}
@@ -2103,14 +1974,12 @@ async fn run_turn(
let mut base_instructions = turn_context.base_instructions.clone();
if parallel_tool_calls {
static INSTRUCTIONS: &str = include_str!("../templates/parallel/instructions.md");
if let Some(family) =
find_family_for_model(&sess.state.lock().await.session_configuration.model)
{
let mut new_instructions = base_instructions.unwrap_or(family.base_instructions);
new_instructions.push_str(INSTRUCTIONS);
base_instructions = Some(new_instructions);
}
static INSERTION_SPOT: &str = "## Editing constraints";
base_instructions
.as_mut()
.map(|base| base.replace(INSERTION_SPOT, INSTRUCTIONS));
}
let prompt = Prompt {
input,
tools: router.specs(),
@@ -2602,9 +2471,8 @@ mod tests {
duration: StdDuration::from_secs(1),
timed_out: true,
};
let (_, turn_context) = make_session_and_context();
let out = format_exec_output_str(&exec, turn_context.truncation_policy);
let out = format_exec_output_str(&exec);
assert_eq!(
out,
@@ -2720,7 +2588,6 @@ mod tests {
cwd: config.cwd.clone(),
original_config_do_not_use: Arc::clone(&config),
features: Features::default(),
exec_policy: Arc::new(ExecPolicy::empty()),
session_source: SessionSource::Exec,
};
@@ -2798,7 +2665,6 @@ mod tests {
cwd: config.cwd.clone(),
original_config_do_not_use: Arc::clone(&config),
features: Features::default(),
exec_policy: Arc::new(ExecPolicy::empty()),
session_source: SessionSource::Exec,
};

View File

@@ -1,26 +1,22 @@
use crate::codex::Codex;
use crate::codex::Session;
use crate::error::Result as CodexResult;
use crate::protocol::Event;
use crate::protocol::Op;
use crate::protocol::Submission;
use std::path::PathBuf;
use std::sync::Arc;
pub struct CodexConversation {
codex: Codex,
rollout_path: PathBuf,
session: Arc<Session>,
}
/// Conduit for the bidirectional stream of messages that compose a conversation
/// in Codex.
impl CodexConversation {
pub(crate) fn new(codex: Codex, rollout_path: PathBuf, session: Arc<Session>) -> Self {
pub(crate) fn new(codex: Codex, rollout_path: PathBuf) -> Self {
Self {
codex,
rollout_path,
session,
}
}
@@ -40,24 +36,4 @@ impl CodexConversation {
pub fn rollout_path(&self) -> PathBuf {
self.rollout_path.clone()
}
pub async fn flush_rollout(&self) -> CodexResult<()> {
Ok(self.session.flush_rollout().await?)
}
pub async fn set_session_name(&self, name: Option<String>) -> CodexResult<()> {
Ok(self.session.set_session_name(name).await?)
}
pub async fn model(&self) -> String {
self.session.model().await
}
pub async fn save_session(
&self,
codex_home: &std::path::Path,
name: &str,
) -> CodexResult<crate::SavedSessionEntry> {
self.session.save_session(codex_home, name).await
}
}

View File

@@ -1,8 +1,6 @@
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::SandboxPolicy;
use crate::sandboxing::SandboxPermissions;
use crate::bash::parse_shell_lc_plain_commands;
use crate::is_safe_command::is_known_safe_command;
@@ -10,7 +8,7 @@ pub fn requires_initial_appoval(
policy: AskForApproval,
sandbox_policy: &SandboxPolicy,
command: &[String],
sandbox_permissions: SandboxPermissions,
with_escalated_permissions: bool,
) -> bool {
if is_known_safe_command(command) {
return false;
@@ -26,7 +24,8 @@ pub fn requires_initial_appoval(
// In restricted sandboxes (ReadOnly/WorkspaceWrite), do not prompt for
// nonescalated, nondangerous commands — let the sandbox enforce
// restrictions (e.g., block network/write) without a user prompt.
if sandbox_permissions.requires_escalated_permissions() {
let wants_escalation: bool = with_escalated_permissions;
if wants_escalation {
return true;
}
command_might_be_dangerous(command)

View File

@@ -7,7 +7,6 @@ use crate::codex::TurnContext;
use crate::codex::get_last_assistant_message_from_turn;
use crate::error::CodexErr;
use crate::error::Result as CodexResult;
use crate::features::Feature;
use crate::protocol::AgentMessageEvent;
use crate::protocol::CompactedItem;
use crate::protocol::ErrorEvent;
@@ -19,7 +18,6 @@ use crate::truncate::TruncationPolicy;
use crate::truncate::approx_token_count;
use crate::truncate::truncate_text;
use crate::util::backoff;
use codex_app_server_protocol::AuthMode;
use codex_protocol::items::TurnItem;
use codex_protocol::models::ContentItem;
use codex_protocol::models::ResponseInputItem;
@@ -33,22 +31,12 @@ pub const SUMMARIZATION_PROMPT: &str = include_str!("../templates/compact/prompt
pub const SUMMARY_PREFIX: &str = include_str!("../templates/compact/summary_prefix.md");
const COMPACT_USER_MESSAGE_MAX_TOKENS: usize = 20_000;
pub(crate) async fn should_use_remote_compact_task(session: &Session) -> bool {
session
.services
.auth_manager
.auth()
.is_some_and(|auth| auth.mode == AuthMode::ChatGPT)
&& session.enabled(Feature::RemoteCompaction).await
}
pub(crate) async fn run_inline_auto_compact_task(
sess: Arc<Session>,
turn_context: Arc<TurnContext>,
) {
let prompt = turn_context.compact_prompt().to_string();
let input = vec![UserInput::Text { text: prompt }];
run_compact_task_inner(sess, turn_context, input).await;
}
@@ -56,12 +44,13 @@ pub(crate) async fn run_compact_task(
sess: Arc<Session>,
turn_context: Arc<TurnContext>,
input: Vec<UserInput>,
) {
) -> Option<String> {
let start_event = EventMsg::TaskStarted(TaskStartedEvent {
model_context_window: turn_context.client.get_model_context_window(),
});
sess.send_event(&turn_context, start_event).await;
run_compact_task_inner(sess.clone(), turn_context, input).await;
None
}
async fn run_compact_task_inner(
@@ -171,7 +160,15 @@ async fn run_compact_task_inner(
.collect();
new_history.extend(ghost_snapshots);
sess.replace_history(new_history).await;
sess.recompute_token_usage(&turn_context).await;
if let Some(estimated_tokens) = sess
.clone_history()
.await
.estimate_token_count(&turn_context)
{
sess.override_last_token_usage_estimate(&turn_context, estimated_tokens)
.await;
}
let rollout_item = RolloutItem::Compacted(CompactedItem {
message: summary_text.clone(),

View File

@@ -12,32 +12,34 @@ use crate::protocol::RolloutItem;
use crate::protocol::TaskStartedEvent;
use codex_protocol::models::ResponseItem;
pub(crate) async fn run_inline_remote_auto_compact_task(
pub(crate) async fn run_remote_compact_task(
sess: Arc<Session>,
turn_context: Arc<TurnContext>,
) {
run_remote_compact_task_inner(&sess, &turn_context).await;
}
pub(crate) async fn run_remote_compact_task(sess: Arc<Session>, turn_context: Arc<TurnContext>) {
) -> Option<String> {
let start_event = EventMsg::TaskStarted(TaskStartedEvent {
model_context_window: turn_context.client.get_model_context_window(),
});
sess.send_event(&turn_context, start_event).await;
run_remote_compact_task_inner(&sess, &turn_context).await;
}
async fn run_remote_compact_task_inner(sess: &Arc<Session>, turn_context: &Arc<TurnContext>) {
if let Err(err) = run_remote_compact_task_inner_impl(sess, turn_context).await {
let event = EventMsg::Error(ErrorEvent {
message: format!("Error running remote compact task: {err}"),
});
sess.send_event(turn_context, event).await;
match run_remote_compact_task_inner(&sess, &turn_context).await {
Ok(()) => {
let event = EventMsg::AgentMessage(AgentMessageEvent {
message: "Compact task completed".to_string(),
});
sess.send_event(&turn_context, event).await;
}
Err(err) => {
let event = EventMsg::Error(ErrorEvent {
message: err.to_string(),
});
sess.send_event(&turn_context, event).await;
}
}
None
}
async fn run_remote_compact_task_inner_impl(
async fn run_remote_compact_task_inner(
sess: &Arc<Session>,
turn_context: &Arc<TurnContext>,
) -> CodexResult<()> {
@@ -66,7 +68,15 @@ async fn run_remote_compact_task_inner_impl(
new_history.extend(ghost_snapshots);
}
sess.replace_history(new_history.clone()).await;
sess.recompute_token_usage(turn_context).await;
if let Some(estimated_tokens) = sess
.clone_history()
.await
.estimate_token_count(turn_context.as_ref())
{
sess.override_last_token_usage_estimate(turn_context.as_ref(), estimated_tokens)
.await;
}
let compacted_item = CompactedItem {
message: String::new(),
@@ -74,11 +84,5 @@ async fn run_remote_compact_task_inner_impl(
};
sess.persist_rollout_items(&[RolloutItem::Compacted(compacted_item)])
.await;
let event = EventMsg::AgentMessage(AgentMessageEvent {
message: "Compact task completed".to_string(),
});
sess.send_event(turn_context, event).await;
Ok(())
}

View File

@@ -845,36 +845,6 @@ hide_gpt5_1_migration_prompt = true
assert_eq!(contents, expected);
}
#[test]
fn blocking_set_hide_gpt_5_1_codex_max_migration_prompt_preserves_table() {
let tmp = tempdir().expect("tmpdir");
let codex_home = tmp.path();
std::fs::write(
codex_home.join(CONFIG_TOML_FILE),
r#"[notice]
existing = "value"
"#,
)
.expect("seed");
apply_blocking(
codex_home,
None,
&[ConfigEdit::SetNoticeHideModelMigrationPrompt(
"hide_gpt-5.1-codex-max_migration_prompt".to_string(),
true,
)],
)
.expect("persist");
let contents =
std::fs::read_to_string(codex_home.join(CONFIG_TOML_FILE)).expect("read config");
let expected = r#"[notice]
existing = "value"
"hide_gpt-5.1-codex-max_migration_prompt" = true
"#;
assert_eq!(contents, expected);
}
#[test]
fn blocking_replace_mcp_servers_round_trips() {
let tmp = tempdir().expect("tmpdir");

View File

@@ -61,6 +61,9 @@ pub mod edit;
pub mod profile;
pub mod types;
#[cfg(target_os = "windows")]
pub const OPENAI_DEFAULT_MODEL: &str = "gpt-5.1";
#[cfg(not(target_os = "windows"))]
pub const OPENAI_DEFAULT_MODEL: &str = "gpt-5.1-codex";
const OPENAI_DEFAULT_REVIEW_MODEL: &str = "gpt-5.1-codex";
pub const GPT_5_CODEX_MEDIUM_MODEL: &str = "gpt-5.1-codex";
@@ -78,7 +81,7 @@ pub struct Config {
/// Optional override of model selection.
pub model: String,
/// Model used specifically for review sessions. Defaults to "gpt-5.1-codex-max".
/// Model used specifically for review sessions. Defaults to "gpt-5.1-codex".
pub review_model: String,
pub model_family: ModelFamily,

View File

@@ -378,9 +378,6 @@ pub struct Notice {
pub hide_rate_limit_model_nudge: Option<bool>,
/// Tracks whether the user has seen the model migration prompt
pub hide_gpt5_1_migration_prompt: Option<bool>,
/// Tracks whether the user has seen the gpt-5.1-codex-max migration prompt
#[serde(rename = "hide_gpt-5.1-codex-max_migration_prompt")]
pub hide_gpt_5_1_codex_max_migration_prompt: Option<bool>,
}
impl Notice {

View File

@@ -145,17 +145,13 @@ impl ContextManager {
}
fn process_item(&self, item: &ResponseItem, policy: TruncationPolicy) -> ResponseItem {
let policy_with_serialization_budget = policy.mul(1.2);
match item {
ResponseItem::FunctionCallOutput { call_id, output } => {
let truncated =
truncate_text(output.content.as_str(), policy_with_serialization_budget);
let truncated_items = output.content_items.as_ref().map(|items| {
truncate_function_output_items_with_policy(
items,
policy_with_serialization_budget,
)
});
let truncated = truncate_text(output.content.as_str(), policy);
let truncated_items = output
.content_items
.as_ref()
.map(|items| truncate_function_output_items_with_policy(items, policy));
ResponseItem::FunctionCallOutput {
call_id: call_id.clone(),
output: FunctionCallOutputPayload {
@@ -166,7 +162,7 @@ impl ContextManager {
}
}
ResponseItem::CustomToolCallOutput { call_id, output } => {
let truncated = truncate_text(output, policy_with_serialization_budget);
let truncated = truncate_text(output, policy);
ResponseItem::CustomToolCallOutput {
call_id: call_id.clone(),
output: truncated,

View File

@@ -12,8 +12,8 @@ use codex_protocol::models::ReasoningItemReasoningSummary;
use pretty_assertions::assert_eq;
use regex_lite::Regex;
const EXEC_FORMAT_MAX_LINES: usize = 256;
const EXEC_FORMAT_MAX_BYTES: usize = 10_000;
const EXEC_FORMAT_MAX_TOKENS: usize = 2_500;
fn assistant_msg(text: &str) -> ResponseItem {
ResponseItem::Message {
@@ -56,10 +56,6 @@ fn reasoning_msg(text: &str) -> ResponseItem {
}
}
fn truncate_exec_output(content: &str) -> String {
truncate::truncate_text(content, TruncationPolicy::Tokens(EXEC_FORMAT_MAX_TOKENS))
}
#[test]
fn filters_non_api_messages() {
let mut h = ContextManager::default();
@@ -232,7 +228,7 @@ fn normalization_retains_local_shell_outputs() {
ResponseItem::FunctionCallOutput {
call_id: "shell-1".to_string(),
output: FunctionCallOutputPayload {
content: "Total output lines: 1\n\nok".to_string(),
content: "ok".to_string(),
..Default::default()
},
},
@@ -334,8 +330,8 @@ fn record_items_respects_custom_token_limit() {
assert!(stored.content.contains("tokens truncated"));
}
fn assert_truncated_message_matches(message: &str, line: &str, expected_removed: usize) {
let pattern = truncated_message_pattern(line);
fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usize) {
let pattern = truncated_message_pattern(line, total_lines);
let regex = Regex::new(&pattern).unwrap_or_else(|err| {
panic!("failed to compile regex {pattern}: {err}");
});
@@ -351,18 +347,23 @@ fn assert_truncated_message_matches(message: &str, line: &str, expected_removed:
"body exceeds byte limit: {} bytes",
body.len()
);
let removed: usize = captures
.name("removed")
.expect("missing removed capture")
.as_str()
.parse()
.unwrap_or_else(|err| panic!("invalid removed tokens: {err}"));
assert_eq!(removed, expected_removed, "mismatched removed token count");
}
fn truncated_message_pattern(line: &str) -> String {
fn truncated_message_pattern(line: &str, total_lines: usize) -> String {
let head_lines = EXEC_FORMAT_MAX_LINES / 2;
let tail_lines = EXEC_FORMAT_MAX_LINES - head_lines;
let head_take = head_lines.min(total_lines);
let tail_take = tail_lines.min(total_lines.saturating_sub(head_take));
let omitted = total_lines.saturating_sub(head_take + tail_take);
let escaped_line = regex_lite::escape(line);
format!(r"(?s)^(?P<body>{escaped_line}.*?)(?:\r?)?…(?P<removed>\d+) tokens truncated…(?:.*)?$")
if omitted == 0 {
return format!(
r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} removed \d+ bytes to fit {EXEC_FORMAT_MAX_BYTES} byte limit \.{{3}}]\n\n.*)$",
);
}
format!(
r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} omitted {omitted} of {total_lines} lines \.{{3}}]\n\n.*)$",
)
}
#[test]
@@ -370,18 +371,27 @@ fn format_exec_output_truncates_large_error() {
let line = "very long execution error line that should trigger truncation\n";
let large_error = line.repeat(2_500); // way beyond both byte and line limits
let truncated = truncate_exec_output(&large_error);
let truncated = truncate::truncate_with_line_bytes_budget(&large_error, EXEC_FORMAT_MAX_BYTES);
assert_truncated_message_matches(&truncated, line, 36250);
let total_lines = large_error.lines().count();
assert_truncated_message_matches(&truncated, line, total_lines);
assert_ne!(truncated, large_error);
}
#[test]
fn format_exec_output_marks_byte_truncation_without_omitted_lines() {
let long_line = "a".repeat(EXEC_FORMAT_MAX_BYTES + 10000);
let truncated = truncate_exec_output(&long_line);
let long_line = "a".repeat(EXEC_FORMAT_MAX_BYTES + 50);
let truncated = truncate::truncate_with_line_bytes_budget(&long_line, EXEC_FORMAT_MAX_BYTES);
assert_ne!(truncated, long_line);
assert_truncated_message_matches(&truncated, "a", 2500);
let removed_bytes = long_line.len().saturating_sub(EXEC_FORMAT_MAX_BYTES);
let marker_line = format!(
"[... removed {removed_bytes} bytes to fit {EXEC_FORMAT_MAX_BYTES} byte limit ...]"
);
assert!(
truncated.contains(&marker_line),
"missing byte truncation marker: {truncated}"
);
assert!(
!truncated.contains("omitted"),
"line omission marker should not appear when no lines were dropped: {truncated}"
@@ -391,25 +401,34 @@ fn format_exec_output_marks_byte_truncation_without_omitted_lines() {
#[test]
fn format_exec_output_returns_original_when_within_limits() {
let content = "example output\n".repeat(10);
assert_eq!(truncate_exec_output(&content), content);
assert_eq!(
truncate::truncate_with_line_bytes_budget(&content, EXEC_FORMAT_MAX_BYTES),
content
);
}
#[test]
fn format_exec_output_reports_omitted_lines_and_keeps_head_and_tail() {
let total_lines = 2_000;
let filler = "x".repeat(64);
let total_lines = EXEC_FORMAT_MAX_LINES + 100;
let content: String = (0..total_lines)
.map(|idx| format!("line-{idx}-{filler}\n"))
.map(|idx| format!("line-{idx}\n"))
.collect();
let truncated = truncate_exec_output(&content);
assert_truncated_message_matches(&truncated, "line-0-", 34_723);
let truncated = truncate::truncate_with_line_bytes_budget(&content, EXEC_FORMAT_MAX_BYTES);
let omitted = total_lines - EXEC_FORMAT_MAX_LINES;
let expected_marker = format!("[... omitted {omitted} of {total_lines} lines ...]");
assert!(
truncated.contains("line-0-"),
truncated.contains(&expected_marker),
"missing omitted marker: {truncated}"
);
assert!(
truncated.contains("line-0\n"),
"expected head line to remain: {truncated}"
);
let last_line = format!("line-{}-", total_lines - 1);
let last_line = format!("line-{}\n", total_lines - 1);
assert!(
truncated.contains(&last_line),
"expected tail line to remain: {truncated}"
@@ -418,15 +437,22 @@ fn format_exec_output_reports_omitted_lines_and_keeps_head_and_tail() {
#[test]
fn format_exec_output_prefers_line_marker_when_both_limits_exceeded() {
let total_lines = 300;
let total_lines = EXEC_FORMAT_MAX_LINES + 42;
let long_line = "x".repeat(256);
let content: String = (0..total_lines)
.map(|idx| format!("line-{idx}-{long_line}\n"))
.collect();
let truncated = truncate_exec_output(&content);
let truncated = truncate::truncate_with_line_bytes_budget(&content, EXEC_FORMAT_MAX_BYTES);
assert_truncated_message_matches(&truncated, "line-0-", 17_423);
assert!(
truncated.contains("[... omitted 42 of 298 lines ...]"),
"expected omitted marker when line count exceeds limit: {truncated}"
);
assert!(
!truncated.contains("byte limit"),
"line omission marker should take precedence over byte marker: {truncated}"
);
}
//TODO(aibrahim): run CI in release mode.

View File

@@ -1,4 +1,5 @@
mod history;
mod normalize;
pub(crate) use crate::truncate::truncate_with_line_bytes_budget;
pub(crate) use history::ContextManager;

View File

@@ -3,7 +3,6 @@ use crate::CodexAuth;
use crate::codex::Codex;
use crate::codex::CodexSpawnOk;
use crate::codex::INITIAL_SUBMIT_ID;
use crate::codex::Session;
use crate::codex_conversation::CodexConversation;
use crate::config::Config;
use crate::error::CodexErr;
@@ -12,7 +11,6 @@ use crate::protocol::Event;
use crate::protocol::EventMsg;
use crate::protocol::SessionConfiguredEvent;
use crate::rollout::RolloutRecorder;
use crate::saved_sessions::resolve_rollout_path;
use codex_protocol::ConversationId;
use codex_protocol::items::TurnItem;
use codex_protocol::models::ResponseItem;
@@ -20,7 +18,6 @@ use codex_protocol::protocol::InitialHistory;
use codex_protocol::protocol::RolloutItem;
use codex_protocol::protocol::SessionSource;
use std::collections::HashMap;
use std::path::Path;
use std::path::PathBuf;
use std::sync::Arc;
use tokio::sync::RwLock;
@@ -59,7 +56,6 @@ impl ConversationManager {
)
}
/// Start a brand new conversation with default initial history.
pub async fn new_conversation(&self, config: Config) -> CodexResult<NewConversation> {
self.spawn_conversation(config, self.auth_manager.clone())
.await
@@ -73,7 +69,6 @@ impl ConversationManager {
let CodexSpawnOk {
codex,
conversation_id,
session,
} = Codex::spawn(
config,
auth_manager,
@@ -81,14 +76,13 @@ impl ConversationManager {
self.session_source.clone(),
)
.await?;
self.finalize_spawn(codex, conversation_id, session).await
self.finalize_spawn(codex, conversation_id).await
}
async fn finalize_spawn(
&self,
codex: Codex,
conversation_id: ConversationId,
session: Arc<Session>,
) -> CodexResult<NewConversation> {
// The first event must be `SessionInitialized`. Validate and forward it
// to the caller so that they can display it in the conversation
@@ -107,7 +101,6 @@ impl ConversationManager {
let conversation = Arc::new(CodexConversation::new(
codex,
session_configured.rollout_path.clone(),
session,
));
self.conversations
.write()
@@ -132,7 +125,6 @@ impl ConversationManager {
.ok_or_else(|| CodexErr::ConversationNotFound(conversation_id))
}
/// Resume a conversation from an on-disk rollout file.
pub async fn resume_conversation_from_rollout(
&self,
config: Config,
@@ -144,23 +136,6 @@ impl ConversationManager {
.await
}
/// Resume a conversation by saved-session name or rollout id string.
pub async fn resume_conversation_from_identifier(
&self,
config: Config,
identifier: &str,
auth_manager: Arc<AuthManager>,
) -> CodexResult<NewConversation> {
let Some(path) = resolve_rollout_path(&config.codex_home, identifier).await? else {
return Err(CodexErr::Fatal(format!(
"No saved session or rollout found for '{identifier}'"
)));
};
self.resume_conversation_from_rollout(config, path, auth_manager)
.await
}
/// Resume a conversation from provided rollout history items.
pub async fn resume_conversation_with_history(
&self,
config: Config,
@@ -170,7 +145,6 @@ impl ConversationManager {
let CodexSpawnOk {
codex,
conversation_id,
session,
} = Codex::spawn(
config,
auth_manager,
@@ -178,54 +152,7 @@ impl ConversationManager {
self.session_source.clone(),
)
.await?;
self.finalize_spawn(codex, conversation_id, session).await
}
/// Fork a new conversation from the given rollout path.
pub async fn fork_from_rollout(
&self,
config: Config,
path: PathBuf,
auth_manager: Arc<AuthManager>,
) -> CodexResult<NewConversation> {
let initial_history = RolloutRecorder::get_rollout_history(&path).await?;
let forked = match initial_history {
InitialHistory::Resumed(resumed) => InitialHistory::Forked(resumed.history),
InitialHistory::Forked(items) => InitialHistory::Forked(items),
InitialHistory::New => InitialHistory::New,
};
self.resume_conversation_with_history(config, forked, auth_manager)
.await
}
/// Fork a new conversation from a saved-session name or rollout id string.
pub async fn fork_from_identifier(
&self,
config: Config,
identifier: &str,
auth_manager: Arc<AuthManager>,
) -> CodexResult<NewConversation> {
let Some(path) = resolve_rollout_path(&config.codex_home, identifier).await? else {
return Err(CodexErr::Fatal(format!(
"No saved session or rollout found for '{identifier}'"
)));
};
self.fork_from_rollout(config, path, auth_manager).await
}
/// Persist a human-friendly session name and record it in saved_sessions.json.
pub async fn save_session(
&self,
conversation_id: ConversationId,
codex_home: &Path,
name: &str,
) -> CodexResult<crate::SavedSessionEntry> {
let trimmed = name.trim();
if trimmed.is_empty() {
return Err(CodexErr::Fatal("Usage: /save <name>".to_string()));
}
let conversation = self.get_conversation(conversation_id).await?;
conversation.save_session(codex_home, trimmed).await
self.finalize_spawn(codex, conversation_id).await
}
/// Removes the conversation from the manager's internal map, though the
@@ -258,10 +185,9 @@ impl ConversationManager {
let CodexSpawnOk {
codex,
conversation_id,
session,
} = Codex::spawn(config, auth_manager, history, self.session_source.clone()).await?;
self.finalize_spawn(codex, conversation_id, session).await
self.finalize_spawn(codex, conversation_id).await
}
}

View File

@@ -499,7 +499,6 @@ mod tests {
window_minutes: Some(120),
resets_at: Some(secondary_reset_at),
}),
credits: None,
}
}

View File

@@ -1,365 +0,0 @@
use std::io::ErrorKind;
use std::path::Path;
use std::path::PathBuf;
use std::sync::Arc;
use crate::command_safety::is_dangerous_command::requires_initial_appoval;
use codex_execpolicy::Decision;
use codex_execpolicy::Evaluation;
use codex_execpolicy::Policy;
use codex_execpolicy::PolicyParser;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::SandboxPolicy;
use thiserror::Error;
use tokio::fs;
use crate::bash::parse_shell_lc_plain_commands;
use crate::features::Feature;
use crate::features::Features;
use crate::sandboxing::SandboxPermissions;
use crate::tools::sandboxing::ApprovalRequirement;
const FORBIDDEN_REASON: &str = "execpolicy forbids this command";
const PROMPT_REASON: &str = "execpolicy requires approval for this command";
const POLICY_DIR_NAME: &str = "policy";
const POLICY_EXTENSION: &str = "codexpolicy";
#[derive(Debug, Error)]
pub enum ExecPolicyError {
#[error("failed to read execpolicy files from {dir}: {source}")]
ReadDir {
dir: PathBuf,
source: std::io::Error,
},
#[error("failed to read execpolicy file {path}: {source}")]
ReadFile {
path: PathBuf,
source: std::io::Error,
},
#[error("failed to parse execpolicy file {path}: {source}")]
ParsePolicy {
path: String,
source: codex_execpolicy::Error,
},
}
pub(crate) async fn exec_policy_for(
features: &Features,
codex_home: &Path,
) -> Result<Arc<Policy>, ExecPolicyError> {
if !features.enabled(Feature::ExecPolicy) {
return Ok(Arc::new(Policy::empty()));
}
let policy_dir = codex_home.join(POLICY_DIR_NAME);
let policy_paths = collect_policy_files(&policy_dir).await?;
let mut parser = PolicyParser::new();
for policy_path in &policy_paths {
let contents =
fs::read_to_string(policy_path)
.await
.map_err(|source| ExecPolicyError::ReadFile {
path: policy_path.clone(),
source,
})?;
let identifier = policy_path.to_string_lossy().to_string();
parser
.parse(&identifier, &contents)
.map_err(|source| ExecPolicyError::ParsePolicy {
path: identifier,
source,
})?;
}
let policy = Arc::new(parser.build());
tracing::debug!(
"loaded execpolicy from {} files in {}",
policy_paths.len(),
policy_dir.display()
);
Ok(policy)
}
fn evaluate_with_policy(
policy: &Policy,
command: &[String],
approval_policy: AskForApproval,
) -> Option<ApprovalRequirement> {
let commands = parse_shell_lc_plain_commands(command).unwrap_or_else(|| vec![command.to_vec()]);
let evaluation = policy.check_multiple(commands.iter());
match evaluation {
Evaluation::Match { decision, .. } => match decision {
Decision::Forbidden => Some(ApprovalRequirement::Forbidden {
reason: FORBIDDEN_REASON.to_string(),
}),
Decision::Prompt => {
let reason = PROMPT_REASON.to_string();
if matches!(approval_policy, AskForApproval::Never) {
Some(ApprovalRequirement::Forbidden { reason })
} else {
Some(ApprovalRequirement::NeedsApproval {
reason: Some(reason),
})
}
}
Decision::Allow => Some(ApprovalRequirement::Skip),
},
Evaluation::NoMatch => None,
}
}
pub(crate) fn create_approval_requirement_for_command(
policy: &Policy,
command: &[String],
approval_policy: AskForApproval,
sandbox_policy: &SandboxPolicy,
sandbox_permissions: SandboxPermissions,
) -> ApprovalRequirement {
if let Some(requirement) = evaluate_with_policy(policy, command, approval_policy) {
return requirement;
}
if requires_initial_appoval(
approval_policy,
sandbox_policy,
command,
sandbox_permissions,
) {
ApprovalRequirement::NeedsApproval { reason: None }
} else {
ApprovalRequirement::Skip
}
}
async fn collect_policy_files(dir: &Path) -> Result<Vec<PathBuf>, ExecPolicyError> {
let mut read_dir = match fs::read_dir(dir).await {
Ok(read_dir) => read_dir,
Err(err) if err.kind() == ErrorKind::NotFound => return Ok(Vec::new()),
Err(source) => {
return Err(ExecPolicyError::ReadDir {
dir: dir.to_path_buf(),
source,
});
}
};
let mut policy_paths = Vec::new();
while let Some(entry) =
read_dir
.next_entry()
.await
.map_err(|source| ExecPolicyError::ReadDir {
dir: dir.to_path_buf(),
source,
})?
{
let path = entry.path();
let file_type = entry
.file_type()
.await
.map_err(|source| ExecPolicyError::ReadDir {
dir: dir.to_path_buf(),
source,
})?;
if path
.extension()
.and_then(|ext| ext.to_str())
.is_some_and(|ext| ext == POLICY_EXTENSION)
&& file_type.is_file()
{
policy_paths.push(path);
}
}
policy_paths.sort();
Ok(policy_paths)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::features::Feature;
use crate::features::Features;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::SandboxPolicy;
use pretty_assertions::assert_eq;
use std::fs;
use tempfile::tempdir;
#[tokio::test]
async fn returns_empty_policy_when_feature_disabled() {
let mut features = Features::with_defaults();
features.disable(Feature::ExecPolicy);
let temp_dir = tempdir().expect("create temp dir");
let policy = exec_policy_for(&features, temp_dir.path())
.await
.expect("policy result");
let commands = [vec!["rm".to_string()]];
assert!(matches!(
policy.check_multiple(commands.iter()),
Evaluation::NoMatch
));
assert!(!temp_dir.path().join(POLICY_DIR_NAME).exists());
}
#[tokio::test]
async fn collect_policy_files_returns_empty_when_dir_missing() {
let temp_dir = tempdir().expect("create temp dir");
let policy_dir = temp_dir.path().join(POLICY_DIR_NAME);
let files = collect_policy_files(&policy_dir)
.await
.expect("collect policy files");
assert!(files.is_empty());
}
#[tokio::test]
async fn loads_policies_from_policy_subdirectory() {
let temp_dir = tempdir().expect("create temp dir");
let policy_dir = temp_dir.path().join(POLICY_DIR_NAME);
fs::create_dir_all(&policy_dir).expect("create policy dir");
fs::write(
policy_dir.join("deny.codexpolicy"),
r#"prefix_rule(pattern=["rm"], decision="forbidden")"#,
)
.expect("write policy file");
let policy = exec_policy_for(&Features::with_defaults(), temp_dir.path())
.await
.expect("policy result");
let command = [vec!["rm".to_string()]];
assert!(matches!(
policy.check_multiple(command.iter()),
Evaluation::Match { .. }
));
}
#[tokio::test]
async fn ignores_policies_outside_policy_dir() {
let temp_dir = tempdir().expect("create temp dir");
fs::write(
temp_dir.path().join("root.codexpolicy"),
r#"prefix_rule(pattern=["ls"], decision="prompt")"#,
)
.expect("write policy file");
let policy = exec_policy_for(&Features::with_defaults(), temp_dir.path())
.await
.expect("policy result");
let command = [vec!["ls".to_string()]];
assert!(matches!(
policy.check_multiple(command.iter()),
Evaluation::NoMatch
));
}
#[test]
fn evaluates_bash_lc_inner_commands() {
let policy_src = r#"
prefix_rule(pattern=["rm"], decision="forbidden")
"#;
let mut parser = PolicyParser::new();
parser
.parse("test.codexpolicy", policy_src)
.expect("parse policy");
let policy = parser.build();
let forbidden_script = vec![
"bash".to_string(),
"-lc".to_string(),
"rm -rf /tmp".to_string(),
];
let requirement =
evaluate_with_policy(&policy, &forbidden_script, AskForApproval::OnRequest)
.expect("expected match for forbidden command");
assert_eq!(
requirement,
ApprovalRequirement::Forbidden {
reason: FORBIDDEN_REASON.to_string()
}
);
}
#[test]
fn approval_requirement_prefers_execpolicy_match() {
let policy_src = r#"prefix_rule(pattern=["rm"], decision="prompt")"#;
let mut parser = PolicyParser::new();
parser
.parse("test.codexpolicy", policy_src)
.expect("parse policy");
let policy = parser.build();
let command = vec!["rm".to_string()];
let requirement = create_approval_requirement_for_command(
&policy,
&command,
AskForApproval::OnRequest,
&SandboxPolicy::DangerFullAccess,
SandboxPermissions::UseDefault,
);
assert_eq!(
requirement,
ApprovalRequirement::NeedsApproval {
reason: Some(PROMPT_REASON.to_string())
}
);
}
#[test]
fn approval_requirement_respects_approval_policy() {
let policy_src = r#"prefix_rule(pattern=["rm"], decision="prompt")"#;
let mut parser = PolicyParser::new();
parser
.parse("test.codexpolicy", policy_src)
.expect("parse policy");
let policy = parser.build();
let command = vec!["rm".to_string()];
let requirement = create_approval_requirement_for_command(
&policy,
&command,
AskForApproval::Never,
&SandboxPolicy::DangerFullAccess,
SandboxPermissions::UseDefault,
);
assert_eq!(
requirement,
ApprovalRequirement::Forbidden {
reason: PROMPT_REASON.to_string()
}
);
}
#[test]
fn approval_requirement_falls_back_to_heuristics() {
let command = vec!["python".to_string()];
let empty_policy = Policy::empty();
let requirement = create_approval_requirement_for_command(
&empty_policy,
&command,
AskForApproval::UnlessTrusted,
&SandboxPolicy::ReadOnly,
SandboxPermissions::UseDefault,
);
assert_eq!(
requirement,
ApprovalRequirement::NeedsApproval { reason: None }
);
}
}

View File

@@ -42,8 +42,6 @@ pub enum Feature {
ViewImageTool,
/// Allow the model to request web searches.
WebSearchRequest,
/// Gate the execpolicy enforcement for shell/unified exec.
ExecPolicy,
/// Enable the model-based risk assessments for sandboxed commands.
SandboxCommandAssessment,
/// Enable Windows sandbox (restricted token) on Windows.
@@ -262,12 +260,6 @@ pub const FEATURES: &[FeatureSpec] = &[
stage: Stage::Stable,
default_enabled: true,
},
FeatureSpec {
id: Feature::ViewImageTool,
key: "view_image_tool",
stage: Stage::Stable,
default_enabled: true,
},
// Unstable features.
FeatureSpec {
id: Feature::UnifiedExec,
@@ -293,18 +285,18 @@ pub const FEATURES: &[FeatureSpec] = &[
stage: Stage::Beta,
default_enabled: false,
},
FeatureSpec {
id: Feature::ViewImageTool,
key: "view_image_tool",
stage: Stage::Stable,
default_enabled: true,
},
FeatureSpec {
id: Feature::WebSearchRequest,
key: "web_search_request",
stage: Stage::Stable,
default_enabled: false,
},
FeatureSpec {
id: Feature::ExecPolicy,
key: "exec_policy",
stage: Stage::Experimental,
default_enabled: true,
},
FeatureSpec {
id: Feature::SandboxCommandAssessment,
key: "experimental_sandbox_command_assessment",

View File

@@ -25,7 +25,6 @@ mod environment_context;
pub mod error;
pub mod exec;
pub mod exec_env;
mod exec_policy;
pub mod features;
mod flags;
pub mod git_info;
@@ -66,7 +65,6 @@ mod openai_model_info;
pub mod project_doc;
mod rollout;
pub(crate) mod safety;
pub mod saved_sessions;
pub mod seatbelt;
pub mod shell;
pub mod spawn;
@@ -84,12 +82,6 @@ pub use rollout::list::ConversationsPage;
pub use rollout::list::Cursor;
pub use rollout::list::parse_cursor;
pub use rollout::list::read_head_for_summary;
pub use saved_sessions::SavedSessionEntry;
pub use saved_sessions::build_saved_session_entry;
pub use saved_sessions::list_saved_sessions;
pub use saved_sessions::resolve_rollout_path;
pub use saved_sessions::resolve_saved_session;
pub use saved_sessions::upsert_saved_session;
mod function_tool;
mod state;
mod tasks;

View File

@@ -12,7 +12,6 @@ const BASE_INSTRUCTIONS: &str = include_str!("../prompt.md");
const GPT_5_CODEX_INSTRUCTIONS: &str = include_str!("../gpt_5_codex_prompt.md");
const GPT_5_1_INSTRUCTIONS: &str = include_str!("../gpt_5_1_prompt.md");
const GPT_5_1_CODEX_MAX_INSTRUCTIONS: &str = include_str!("../gpt-5.1-codex-max_prompt.md");
/// A model family is a group of models that share certain characteristics.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
@@ -175,18 +174,6 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
)
// Production models.
} else if slug.starts_with("gpt-5.1-codex-max") {
model_family!(
slug, slug,
supports_reasoning_summaries: true,
reasoning_summary_format: ReasoningSummaryFormat::Experimental,
base_instructions: GPT_5_1_CODEX_MAX_INSTRUCTIONS.to_string(),
apply_patch_tool_type: Some(ApplyPatchToolType::Freeform),
shell_type: ConfigShellToolType::ShellCommand,
supports_parallel_tool_calls: true,
support_verbosity: false,
truncation_policy: TruncationPolicy::Tokens(10_000),
)
} else if slug.starts_with("gpt-5-codex")
|| slug.starts_with("gpt-5.1-codex")
|| slug.starts_with("codex-")
@@ -220,7 +207,7 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
slug, "gpt-5",
supports_reasoning_summaries: true,
needs_special_apply_patch_instructions: true,
shell_type: ConfigShellToolType::Default,
shell_type: ConfigShellToolType::ShellCommand,
support_verbosity: true,
truncation_policy: TruncationPolicy::Bytes(10_000),
)

View File

@@ -70,10 +70,7 @@ pub(crate) fn get_model_info(model_family: &ModelFamily) -> Option<ModelInfo> {
// https://platform.openai.com/docs/models/gpt-3.5-turbo
"gpt-3.5-turbo" => Some(ModelInfo::new(16_385, 4_096)),
_ if slug.starts_with("gpt-5-codex")
|| slug.starts_with("gpt-5.1-codex")
|| slug.starts_with("gpt-5.1-codex-max") =>
{
_ if slug.starts_with("gpt-5-codex") || slug.starts_with("gpt-5.1-codex") => {
Some(ModelInfo::new(CONTEXT_WINDOW_272K, MAX_OUTPUT_TOKENS_128K))
}

View File

@@ -919,6 +919,51 @@ mod tests {
}],
);
}
#[test]
fn powershell_cd_is_stripped() {
assert_parsed(
&vec_str(&["powershell", "-Command", "cd foo; Get-ChildItem"]),
vec![ParsedCommand::Unknown {
cmd: "Get-ChildItem".to_string(),
}],
);
}
#[test]
fn powershell_pipes_are_preserved() {
assert_parsed(
&vec_str(&[
"powershell",
"-Command",
"Get-Content core/src/parse_command.rs | Select-Object -First 980 | Select-Object -Last 140",
]),
vec![
ParsedCommand::Unknown {
cmd: "Get-Content core/src/parse_command.rs".to_string(),
},
ParsedCommand::Unknown {
cmd: "Select-Object -First 980".to_string(),
},
ParsedCommand::Unknown {
cmd: "Select-Object -Last 140".to_string(),
},
],
);
}
#[test]
fn powershell_at_escaping_is_preserved() {
assert_parsed(
&vec_str(&[
"powershell",
"-Command",
"@'\\nprint('Hello, world!')\\n'@ | python -",
]),
vec![ParsedCommand::Unknown {
cmd: "@'\\nprint('Hello, world!')\\n'@ | python -".to_string(),
}],
);
}
}
pub fn parse_command_impl(command: &[String]) -> Vec<ParsedCommand> {
@@ -926,12 +971,6 @@ pub fn parse_command_impl(command: &[String]) -> Vec<ParsedCommand> {
return commands;
}
if let Some((_, script)) = extract_powershell_command(command) {
return vec![ParsedCommand::Unknown {
cmd: script.to_string(),
}];
}
let normalized = normalize_tokens(command);
let parts = if contains_connectors(&normalized) {
@@ -1238,8 +1277,8 @@ fn parse_find_query_and_path(tail: &[String]) -> (Option<String>, Option<String>
}
fn parse_shell_lc_commands(original: &[String]) -> Option<Vec<ParsedCommand>> {
// Only handle bash/zsh here; PowerShell is stripped separately without bash parsing.
let (_, script) = extract_bash_command(original)?;
let (_, script) =
extract_bash_command(original).or_else(|| extract_powershell_command(original))?;
if let Some(tree) = try_parse_shell(script)
&& let Some(all_commands) = try_parse_word_only_commands_sequence(&tree, script)

View File

@@ -66,7 +66,6 @@ pub(crate) fn should_persist_event_msg(ev: &EventMsg) -> bool {
| EventMsg::ExecApprovalRequest(_)
| EventMsg::ApplyPatchApprovalRequest(_)
| EventMsg::BackgroundEvent(_)
| EventMsg::SaveSessionResponse(_)
| EventMsg::StreamError(_)
| EventMsg::PatchApplyBegin(_)
| EventMsg::PatchApplyEnd(_)

View File

@@ -11,8 +11,6 @@ use serde_json::Value;
use time::OffsetDateTime;
use time::format_description::FormatItem;
use time::macros::format_description;
use tokio::io::AsyncReadExt;
use tokio::io::AsyncSeekExt;
use tokio::io::AsyncWriteExt;
use tokio::sync::mpsc::Sender;
use tokio::sync::mpsc::{self};
@@ -72,10 +70,6 @@ enum RolloutCmd {
Shutdown {
ack: oneshot::Sender<()>,
},
SetName {
name: Option<String>,
ack: oneshot::Sender<std::io::Result<()>>,
},
}
impl RolloutRecorderParams {
@@ -154,14 +148,11 @@ impl RolloutRecorder {
instructions,
source,
model_provider: Some(config.model_provider_id.clone()),
name: None,
}),
)
}
RolloutRecorderParams::Resume { path } => (
tokio::fs::OpenOptions::new()
.read(true)
.write(true)
.append(true)
.open(&path)
.await?,
@@ -205,21 +196,6 @@ impl RolloutRecorder {
.map_err(|e| IoError::other(format!("failed to queue rollout items: {e}")))
}
/// Update the session name stored in the rollout's SessionMeta line.
pub async fn set_session_name(&self, name: Option<String>) -> std::io::Result<()> {
let (tx, rx) = oneshot::channel();
self.tx
.send(RolloutCmd::SetName { name, ack: tx })
.await
.map_err(|e| IoError::other(format!("failed to queue session name update: {e}")))?;
match rx.await {
Ok(result) => result,
Err(e) => Err(IoError::other(format!(
"failed waiting for session name update: {e}"
))),
}
}
/// Flush all queued writes and wait until they are committed by the writer task.
pub async fn flush(&self) -> std::io::Result<()> {
let (tx, rx) = oneshot::channel();
@@ -358,7 +334,6 @@ fn create_log_file(
let path = dir.join(filename);
let file = std::fs::OpenOptions::new()
.read(true)
.append(true)
.create(true)
.open(&path)?;
@@ -414,10 +389,6 @@ async fn rollout_writer(
RolloutCmd::Shutdown { ack } => {
let _ = ack.send(());
}
RolloutCmd::SetName { name, ack } => {
let result = rewrite_session_meta_name(&mut writer.file, name).await;
let _ = ack.send(result);
}
}
}
@@ -451,232 +422,3 @@ impl JsonlWriter {
Ok(())
}
}
async fn rewrite_session_meta_name(
file: &mut tokio::fs::File,
name: Option<String>,
) -> std::io::Result<()> {
use std::io::SeekFrom;
file.flush().await?;
file.seek(SeekFrom::Start(0)).await?;
let mut contents = Vec::new();
file.read_to_end(&mut contents).await?;
if contents.is_empty() {
return Err(IoError::other("empty rollout file"));
}
let newline_idx = contents
.iter()
.position(|&b| b == b'\n')
.ok_or_else(|| IoError::other("rollout missing newline after SessionMeta"))?;
let first_line = &contents[..newline_idx];
let mut rollout_line: RolloutLine = serde_json::from_slice(first_line)
.map_err(|e| IoError::other(format!("failed to parse SessionMeta: {e}")))?;
let RolloutItem::SessionMeta(ref mut session_meta_line) = rollout_line.item else {
return Err(IoError::other("first rollout item is not SessionMeta"));
};
session_meta_line.meta.name = name;
let mut updated = serde_json::to_vec(&rollout_line)?;
updated.push(b'\n');
updated.extend_from_slice(&contents[newline_idx + 1..]);
file.set_len(0).await?;
file.seek(SeekFrom::Start(0)).await?;
file.write_all(&updated).await?;
file.flush().await?;
file.seek(SeekFrom::End(0)).await?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::rewrite_session_meta_name;
use codex_protocol::ConversationId;
use codex_protocol::models::ContentItem;
use codex_protocol::models::ResponseItem;
use codex_protocol::protocol::RolloutItem;
use codex_protocol::protocol::RolloutLine;
use codex_protocol::protocol::SessionMeta;
use codex_protocol::protocol::SessionMetaLine;
use tempfile::NamedTempFile;
use tokio::fs::OpenOptions;
use tokio::io::AsyncReadExt;
use tokio::io::AsyncSeekExt;
use tokio::io::AsyncWriteExt;
fn sample_meta(name: Option<&str>) -> RolloutItem {
RolloutItem::SessionMeta(SessionMetaLine {
meta: SessionMeta {
id: ConversationId::from_string("00000000-0000-4000-8000-000000000001")
.expect("conversation id"),
timestamp: "2025-01-01T00:00:00.000Z".to_string(),
cwd: "/tmp".into(),
originator: "tester".to_string(),
cli_version: "1.0.0".to_string(),
instructions: None,
source: codex_protocol::protocol::SessionSource::Cli,
model_provider: Some("provider".to_string()),
name: name.map(str::to_string),
},
git: None,
})
}
fn sample_line() -> RolloutLine {
RolloutLine {
timestamp: "2025-01-01T00:00:00.000Z".to_string(),
item: sample_meta(None),
}
}
async fn write_rollout(lines: &[RolloutLine]) -> (NamedTempFile, tokio::fs::File) {
let temp = NamedTempFile::new().expect("temp file");
let mut file = OpenOptions::new()
.read(true)
.write(true)
.truncate(true)
.create(true)
.open(temp.path())
.await
.expect("open temp file");
for line in lines {
let mut json = serde_json::to_vec(line).expect("serialize line");
json.push(b'\n');
file.write_all(&json).await.expect("write line");
}
file.seek(std::io::SeekFrom::Start(0))
.await
.expect("rewind");
(temp, file)
}
async fn read_first_line(path: &std::path::Path) -> RolloutLine {
let mut contents = String::new();
let mut file = OpenOptions::new()
.read(true)
.open(path)
.await
.expect("open for read");
file.read_to_string(&mut contents).await.expect("read file");
let first = contents.lines().next().expect("first line");
serde_json::from_str(first).expect("parse first line")
}
#[tokio::test]
async fn updates_meta_name_and_preserves_rest() {
let events = vec![
sample_line(),
RolloutLine {
timestamp: "2025-01-01T00:00:01.000Z".to_string(),
item: RolloutItem::ResponseItem(ResponseItem::Message {
id: None,
role: "assistant".to_string(),
content: vec![ContentItem::OutputText {
text: "hello".to_string(),
}],
}),
},
];
let (temp, mut file) = write_rollout(&events).await;
rewrite_session_meta_name(&mut file, Some("renamed".to_string()))
.await
.expect("rewrite ok");
let first = read_first_line(temp.path()).await;
let RolloutItem::SessionMeta(meta_line) = first.item else {
panic!("expected SessionMeta line");
};
assert_eq!(meta_line.meta.name.as_deref(), Some("renamed"));
let contents = tokio::fs::read_to_string(temp.path())
.await
.expect("read file");
let lines: Vec<_> = contents.lines().collect();
assert_eq!(lines.len(), 2);
let parsed: RolloutLine = serde_json::from_str(lines[1]).expect("parse second line");
let RolloutItem::ResponseItem(ResponseItem::Message { role, content, .. }) = parsed.item
else {
panic!("expected response item");
};
assert_eq!(role, "assistant");
assert_eq!(
content,
vec![ContentItem::OutputText {
text: "hello".to_string()
}]
);
}
#[tokio::test]
async fn clearing_name_sets_none() {
let mut first = sample_line();
first.item = sample_meta(Some("existing"));
let (temp, mut file) = write_rollout(&[first]).await;
rewrite_session_meta_name(&mut file, None)
.await
.expect("rewrite ok");
let first = read_first_line(temp.path()).await;
let RolloutItem::SessionMeta(meta_line) = first.item else {
panic!("expected SessionMeta line");
};
assert_eq!(meta_line.meta.name, None);
}
#[tokio::test]
async fn errors_on_empty_file() {
let temp = NamedTempFile::new().expect("temp file");
let mut file = OpenOptions::new()
.read(true)
.write(true)
.open(temp.path())
.await
.expect("open temp file");
let err = rewrite_session_meta_name(&mut file, Some("x".to_string()))
.await
.expect_err("expected error");
assert!(format!("{err}").contains("empty rollout file"));
}
#[tokio::test]
async fn errors_when_first_line_not_session_meta() {
let wrong = RolloutLine {
timestamp: "t".to_string(),
item: RolloutItem::ResponseItem(ResponseItem::Message {
id: None,
role: "assistant".to_string(),
content: vec![ContentItem::OutputText {
text: "hello".to_string(),
}],
}),
};
let (_temp, mut file) = write_rollout(&[wrong]).await;
let err = rewrite_session_meta_name(&mut file, Some("x".to_string()))
.await
.expect_err("expected error");
assert!(format!("{err}").contains("first rollout item is not SessionMeta"));
// ensure file pointer is rewound to end after failure paths
let pos = file
.seek(std::io::SeekFrom::Current(0))
.await
.expect("seek");
assert!(pos > 0);
}
#[tokio::test]
async fn errors_when_missing_newline() {
let temp = NamedTempFile::new().expect("temp file");
let mut file = OpenOptions::new()
.read(true)
.write(true)
.open(temp.path())
.await
.expect("open temp file");
file.write_all(b"no newline").await.expect("write");
let err = rewrite_session_meta_name(&mut file, Some("x".to_string()))
.await
.expect_err("expected error");
assert!(format!("{err}").contains("rollout missing newline after SessionMeta"));
}
}

View File

@@ -594,7 +594,6 @@ async fn test_tail_includes_last_response_items() -> Result<()> {
cli_version: "test_version".into(),
source: SessionSource::VSCode,
model_provider: Some("test-provider".into()),
name: None,
},
git: None,
}),
@@ -688,7 +687,6 @@ async fn test_tail_handles_short_sessions() -> Result<()> {
cli_version: "test_version".into(),
source: SessionSource::VSCode,
model_provider: Some("test-provider".into()),
name: None,
},
git: None,
}),
@@ -783,7 +781,6 @@ async fn test_tail_skips_trailing_non_responses() -> Result<()> {
cli_version: "test_version".into(),
source: SessionSource::VSCode,
model_provider: Some("test-provider".into()),
name: None,
},
git: None,
}),

View File

@@ -26,28 +26,6 @@ use std::collections::HashMap;
use std::path::Path;
use std::path::PathBuf;
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum SandboxPermissions {
UseDefault,
RequireEscalated,
}
impl SandboxPermissions {
pub fn requires_escalated_permissions(self) -> bool {
matches!(self, SandboxPermissions::RequireEscalated)
}
}
impl From<bool> for SandboxPermissions {
fn from(with_escalated_permissions: bool) -> Self {
if with_escalated_permissions {
SandboxPermissions::RequireEscalated
} else {
SandboxPermissions::UseDefault
}
}
}
#[derive(Clone, Debug)]
pub struct CommandSpec {
pub program: String,

View File

@@ -1,144 +0,0 @@
use crate::error::Result;
use crate::find_conversation_path_by_id_str;
use crate::rollout::list::read_head_for_summary;
use codex_protocol::ConversationId;
use codex_protocol::protocol::SessionMetaLine;
use serde::Deserialize;
use serde::Serialize;
use std::collections::BTreeMap;
use std::io::Error as IoError;
use std::io::ErrorKind;
use std::path::Path;
use std::path::PathBuf;
use time::OffsetDateTime;
use time::format_description::well_known::Rfc3339;
use tracing::warn;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct SavedSessionEntry {
pub name: String,
pub conversation_id: ConversationId,
pub rollout_path: PathBuf,
pub cwd: PathBuf,
pub model: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub model_provider: Option<String>,
pub saved_at: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub created_at: Option<String>,
}
#[derive(Debug, Default, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
struct SavedSessionsFile {
#[serde(default)]
entries: BTreeMap<String, SavedSessionEntry>,
}
fn saved_sessions_path(codex_home: &Path) -> PathBuf {
codex_home.join("saved_sessions.json")
}
async fn load_saved_sessions_file(path: &Path) -> Result<SavedSessionsFile> {
match tokio::fs::read_to_string(path).await {
Ok(text) => {
let parsed = serde_json::from_str(&text)
.map_err(|e| IoError::other(format!("failed to parse saved sessions: {e}")))?;
Ok(parsed)
}
Err(err) if err.kind() == ErrorKind::NotFound => Ok(SavedSessionsFile::default()),
Err(err) => Err(err.into()),
}
}
async fn write_saved_sessions_file(path: &Path, file: &SavedSessionsFile) -> Result<()> {
if let Some(parent) = path.parent() {
tokio::fs::create_dir_all(parent).await?;
}
let json = serde_json::to_string_pretty(file)
.map_err(|e| IoError::other(format!("failed to serialize saved sessions: {e}")))?;
let tmp_path = path.with_extension("json.tmp");
tokio::fs::write(&tmp_path, json).await?;
tokio::fs::rename(tmp_path, path).await?;
Ok(())
}
/// Create a new entry from the rollout's SessionMeta line.
pub async fn build_saved_session_entry(
name: String,
rollout_path: PathBuf,
model: String,
) -> Result<SavedSessionEntry> {
let head = read_head_for_summary(&rollout_path).await?;
let first = head.first().ok_or_else(|| {
IoError::other(format!(
"rollout at {} has no SessionMeta",
rollout_path.display()
))
})?;
let SessionMetaLine { mut meta, .. } = serde_json::from_value::<SessionMetaLine>(first.clone())
.map_err(|e| IoError::other(format!("failed to parse SessionMeta: {e}")))?;
meta.name = Some(name.clone());
let saved_at = OffsetDateTime::now_utc()
.format(&Rfc3339)
.map_err(|e| IoError::other(format!("failed to format timestamp: {e}")))?;
let created_at = if meta.timestamp.is_empty() {
None
} else {
Some(meta.timestamp.clone())
};
Ok(SavedSessionEntry {
name,
conversation_id: meta.id,
rollout_path,
cwd: meta.cwd,
model,
model_provider: meta.model_provider,
saved_at,
created_at,
})
}
/// Insert or replace a saved session entry in `saved_sessions.json`.
pub async fn upsert_saved_session(codex_home: &Path, entry: SavedSessionEntry) -> Result<()> {
let path = saved_sessions_path(codex_home);
let mut file = load_saved_sessions_file(&path).await?;
file.entries.insert(entry.name.clone(), entry);
write_saved_sessions_file(&path, &file).await
}
/// Lookup a saved session by name, if present.
pub async fn resolve_saved_session(
codex_home: &Path,
name: &str,
) -> Result<Option<SavedSessionEntry>> {
let path = saved_sessions_path(codex_home);
let file = load_saved_sessions_file(&path).await?;
Ok(file.entries.get(name).cloned())
}
/// Return all saved sessions ordered by newest `saved_at` first.
pub async fn list_saved_sessions(codex_home: &Path) -> Result<Vec<SavedSessionEntry>> {
let path = saved_sessions_path(codex_home);
let file = load_saved_sessions_file(&path).await?;
let mut entries: Vec<SavedSessionEntry> = file.entries.values().cloned().collect();
entries.sort_by(|a, b| b.saved_at.cmp(&a.saved_at));
Ok(entries)
}
/// Resolve a rollout path from either a saved-session name or rollout id string.
/// Returns `Ok(None)` when nothing matches.
pub async fn resolve_rollout_path(codex_home: &Path, identifier: &str) -> Result<Option<PathBuf>> {
if let Some(entry) = resolve_saved_session(codex_home, identifier).await? {
if entry.rollout_path.exists() {
return Ok(Some(entry.rollout_path));
}
warn!(
"saved session '{}' points to missing rollout at {}",
identifier,
entry.rollout_path.display()
);
}
Ok(find_conversation_path_by_id_str(codex_home, identifier).await?)
}

View File

@@ -204,21 +204,10 @@ pub async fn default_user_shell() -> Shell {
if cfg!(windows) {
get_shell(ShellType::PowerShell, None).unwrap_or(Shell::Unknown)
} else {
let user_default_shell = get_user_shell_path()
get_user_shell_path()
.and_then(|shell| detect_shell_type(&shell))
.and_then(|shell_type| get_shell(shell_type, None));
let shell_with_fallback = if cfg!(target_os = "macos") {
user_default_shell
.or_else(|| get_shell(ShellType::Zsh, None))
.or_else(|| get_shell(ShellType::Bash, None))
} else {
user_default_shell
.or_else(|| get_shell(ShellType::Bash, None))
.or_else(|| get_shell(ShellType::Zsh, None))
};
shell_with_fallback.unwrap_or(Shell::Unknown)
.and_then(|shell_type| get_shell(shell_type, None))
.unwrap_or(Shell::Unknown)
}
}

View File

@@ -3,8 +3,10 @@ use std::sync::Arc;
use super::SessionTask;
use super::SessionTaskContext;
use crate::codex::TurnContext;
use crate::features::Feature;
use crate::state::TaskKind;
use async_trait::async_trait;
use codex_app_server_protocol::AuthMode;
use codex_protocol::user_input::UserInput;
use tokio_util::sync::CancellationToken;
@@ -25,12 +27,16 @@ impl SessionTask for CompactTask {
_cancellation_token: CancellationToken,
) -> Option<String> {
let session = session.clone_session();
if crate::compact::should_use_remote_compact_task(&session).await {
if session
.services
.auth_manager
.auth()
.is_some_and(|auth| auth.mode == AuthMode::ChatGPT)
&& session.enabled(Feature::RemoteCompaction).await
{
crate::compact_remote::run_remote_compact_task(session, ctx).await
} else {
crate::compact::run_compact_task(session, ctx, input).await
}
None
}
}

View File

@@ -1,14 +1,10 @@
use crate::codex::TurnContext;
use crate::protocol::EventMsg;
use crate::protocol::WarningEvent;
use crate::state::TaskKind;
use crate::tasks::SessionTask;
use crate::tasks::SessionTaskContext;
use async_trait::async_trait;
use codex_git::CreateGhostCommitOptions;
use codex_git::GhostSnapshotReport;
use codex_git::GitToolingError;
use codex_git::capture_ghost_snapshot_report;
use codex_git::create_ghost_commit;
use codex_protocol::models::ResponseItem;
use codex_protocol::user_input::UserInput;
@@ -43,27 +39,6 @@ impl SessionTask for GhostSnapshotTask {
_ = cancellation_token.cancelled() => true,
_ = async {
let repo_path = ctx_for_task.cwd.clone();
// First, compute a snapshot report so we can warn about
// large untracked directories before running the heavier
// snapshot logic.
if let Ok(Ok(report)) = tokio::task::spawn_blocking({
let repo_path = repo_path.clone();
move || {
let options = CreateGhostCommitOptions::new(&repo_path);
capture_ghost_snapshot_report(&options)
}
})
.await
&& let Some(message) = format_large_untracked_warning(&report) {
session
.session
.send_event(
&ctx_for_task,
EventMsg::Warning(WarningEvent { message }),
)
.await;
}
// Required to run in a dedicated blocking pool.
match tokio::task::spawn_blocking(move || {
let options = CreateGhostCommitOptions::new(&repo_path);
@@ -81,18 +56,23 @@ impl SessionTask for GhostSnapshotTask {
.await;
info!("ghost commit captured: {}", ghost_commit.id());
}
Ok(Err(err)) => match err {
GitToolingError::NotAGitRepository { .. } => info!(
Ok(Err(err)) => {
warn!(
sub_id = ctx_for_task.sub_id.as_str(),
"skipping ghost snapshot because current directory is not a Git repository"
),
_ => {
warn!(
sub_id = ctx_for_task.sub_id.as_str(),
"failed to capture ghost snapshot: {err}"
);
}
},
"failed to capture ghost snapshot: {err}"
);
let message = match err {
GitToolingError::NotAGitRepository { .. } => {
"Snapshots disabled: current directory is not a Git repository."
.to_string()
}
_ => format!("Snapshots disabled after ghost snapshot error: {err}."),
};
session
.session
.notify_background_event(&ctx_for_task, message)
.await;
}
Err(err) => {
warn!(
sub_id = ctx_for_task.sub_id.as_str(),
@@ -128,22 +108,3 @@ impl GhostSnapshotTask {
Self { token }
}
}
fn format_large_untracked_warning(report: &GhostSnapshotReport) -> Option<String> {
if report.large_untracked_dirs.is_empty() {
return None;
}
const MAX_DIRS: usize = 3;
let mut parts: Vec<String> = Vec::new();
for dir in report.large_untracked_dirs.iter().take(MAX_DIRS) {
parts.push(format!("{} ({} files)", dir.path.display(), dir.file_count));
}
if report.large_untracked_dirs.len() > MAX_DIRS {
let remaining = report.large_untracked_dirs.len() - MAX_DIRS;
parts.push(format!("{remaining} more"));
}
Some(format!(
"Repository snapshot encountered large untracked directories: {}. This can slow Codex; consider adding these paths to .gitignore or disabling undo in your config.",
parts.join(", ")
))
}

View File

@@ -128,9 +128,7 @@ impl Session {
task_cancellation_token.child_token(),
)
.await;
if let Err(e) = session_ctx.clone_session().flush_rollout().await {
tracing::warn!("failed to flush rollout recorder: {e}");
}
session_ctx.clone_session().flush_rollout().await;
if !task_cancellation_token.is_cancelled() {
// Emit completion uniformly from spawn site so all tasks share the same lifecycle.
let sess = session_ctx.clone_session();

View File

@@ -122,11 +122,7 @@ impl SessionTask for UserShellCommandTask {
duration: Duration::ZERO,
timed_out: false,
};
let output_items = [user_shell_command_record_item(
&raw_command,
&exec_output,
&turn_context,
)];
let output_items = [user_shell_command_record_item(&raw_command, &exec_output)];
session
.record_conversation_items(turn_context.as_ref(), &output_items)
.await;
@@ -168,19 +164,12 @@ impl SessionTask for UserShellCommandTask {
aggregated_output: output.aggregated_output.text.clone(),
exit_code: output.exit_code,
duration: output.duration,
formatted_output: format_exec_output_str(
&output,
turn_context.truncation_policy,
),
formatted_output: format_exec_output_str(&output),
}),
)
.await;
let output_items = [user_shell_command_record_item(
&raw_command,
&output,
&turn_context,
)];
let output_items = [user_shell_command_record_item(&raw_command, &output)];
session
.record_conversation_items(turn_context.as_ref(), &output_items)
.await;
@@ -212,18 +201,11 @@ impl SessionTask for UserShellCommandTask {
aggregated_output: exec_output.aggregated_output.text.clone(),
exit_code: exec_output.exit_code,
duration: exec_output.duration,
formatted_output: format_exec_output_str(
&exec_output,
turn_context.truncation_policy,
),
formatted_output: format_exec_output_str(&exec_output),
}),
)
.await;
let output_items = [user_shell_command_record_item(
&raw_command,
&exec_output,
&turn_context,
)];
let output_items = [user_shell_command_record_item(&raw_command, &exec_output)];
session
.record_conversation_items(turn_context.as_ref(), &output_items)
.await;

View File

@@ -88,7 +88,6 @@ pub(crate) enum ToolEmitter {
cwd: PathBuf,
source: ExecCommandSource,
parsed_cmd: Vec<ParsedCommand>,
freeform: bool,
},
ApplyPatch {
changes: HashMap<PathBuf, FileChange>,
@@ -104,19 +103,13 @@ pub(crate) enum ToolEmitter {
}
impl ToolEmitter {
pub fn shell(
command: Vec<String>,
cwd: PathBuf,
source: ExecCommandSource,
freeform: bool,
) -> Self {
pub fn shell(command: Vec<String>, cwd: PathBuf, source: ExecCommandSource) -> Self {
let parsed_cmd = parse_command(&command);
Self::Shell {
command,
cwd,
source,
parsed_cmd,
freeform,
}
}
@@ -151,7 +144,6 @@ impl ToolEmitter {
cwd,
source,
parsed_cmd,
..
},
stage,
) => {
@@ -179,17 +171,15 @@ impl ToolEmitter {
ctx.turn,
EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
call_id: ctx.call_id.to_string(),
turn_id: ctx.turn.sub_id.clone(),
auto_approved: *auto_approved,
changes: changes.clone(),
}),
)
.await;
}
(Self::ApplyPatch { changes, .. }, ToolEventStage::Success(output)) => {
(Self::ApplyPatch { .. }, ToolEventStage::Success(output)) => {
emit_patch_end(
ctx,
changes.clone(),
output.stdout.text.clone(),
output.stderr.text.clone(),
output.exit_code == 0,
@@ -197,12 +187,11 @@ impl ToolEmitter {
.await;
}
(
Self::ApplyPatch { changes, .. },
Self::ApplyPatch { .. },
ToolEventStage::Failure(ToolEventFailure::Output(output)),
) => {
emit_patch_end(
ctx,
changes.clone(),
output.stdout.text.clone(),
output.stderr.text.clone(),
output.exit_code == 0,
@@ -210,17 +199,10 @@ impl ToolEmitter {
.await;
}
(
Self::ApplyPatch { changes, .. },
Self::ApplyPatch { .. },
ToolEventStage::Failure(ToolEventFailure::Message(message)),
) => {
emit_patch_end(
ctx,
changes.clone(),
String::new(),
(*message).to_string(),
false,
)
.await;
emit_patch_end(ctx, String::new(), (*message).to_string(), false).await;
}
(
Self::UnifiedExec {
@@ -252,19 +234,6 @@ impl ToolEmitter {
self.emit(ctx, ToolEventStage::Begin).await;
}
fn format_exec_output_for_model(
&self,
output: &ExecToolCallOutput,
ctx: ToolEventCtx<'_>,
) -> String {
match self {
Self::Shell { freeform: true, .. } => {
super::format_exec_output_for_model_freeform(output, ctx.turn.truncation_policy)
}
_ => super::format_exec_output_for_model_structured(output, ctx.turn.truncation_policy),
}
}
pub async fn finish(
&self,
ctx: ToolEventCtx<'_>,
@@ -272,7 +241,7 @@ impl ToolEmitter {
) -> Result<String, FunctionCallError> {
let (event, result) = match out {
Ok(output) => {
let content = self.format_exec_output_for_model(&output, ctx);
let content = super::format_exec_output_for_model(&output);
let exit_code = output.exit_code;
let event = ToolEventStage::Success(output);
let result = if exit_code == 0 {
@@ -284,7 +253,7 @@ impl ToolEmitter {
}
Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output })))
| Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { output }))) => {
let response = self.format_exec_output_for_model(&output, ctx);
let response = super::format_exec_output_for_model(&output);
let event = ToolEventStage::Failure(ToolEventFailure::Output(*output));
let result = Err(FunctionCallError::RespondToModel(response));
(event, result)
@@ -373,7 +342,7 @@ async fn emit_exec_stage(
aggregated_output: output.aggregated_output.text.clone(),
exit_code: output.exit_code,
duration: output.duration,
formatted_output: format_exec_output_str(&output, ctx.turn.truncation_policy),
formatted_output: format_exec_output_str(&output),
};
emit_exec_end(ctx, exec_input, exec_result).await;
}
@@ -419,23 +388,15 @@ async fn emit_exec_end(
.await;
}
async fn emit_patch_end(
ctx: ToolEventCtx<'_>,
changes: HashMap<PathBuf, FileChange>,
stdout: String,
stderr: String,
success: bool,
) {
async fn emit_patch_end(ctx: ToolEventCtx<'_>, stdout: String, stderr: String, success: bool) {
ctx.session
.send_event(
ctx.turn,
EventMsg::PatchApplyEnd(PatchApplyEndEvent {
call_id: ctx.call_id.to_string(),
turn_id: ctx.turn.sub_id.clone(),
stdout,
stderr,
success,
changes,
}),
)
.await;

View File

@@ -9,11 +9,9 @@ use crate::apply_patch::convert_apply_patch_to_protocol;
use crate::codex::TurnContext;
use crate::exec::ExecParams;
use crate::exec_env::create_env;
use crate::exec_policy::create_approval_requirement_for_command;
use crate::function_tool::FunctionCallError;
use crate::is_safe_command::is_known_safe_command;
use crate::protocol::ExecCommandSource;
use crate::sandboxing::SandboxPermissions;
use crate::tools::context::ToolInvocation;
use crate::tools::context::ToolOutput;
use crate::tools::context::ToolPayload;
@@ -119,7 +117,6 @@ impl ToolHandler for ShellHandler {
turn,
tracker,
call_id,
false,
)
.await
}
@@ -132,7 +129,6 @@ impl ToolHandler for ShellHandler {
turn,
tracker,
call_id,
false,
)
.await
}
@@ -180,7 +176,6 @@ impl ToolHandler for ShellCommandHandler {
turn,
tracker,
call_id,
true,
)
.await
}
@@ -194,7 +189,6 @@ impl ShellHandler {
turn: Arc<TurnContext>,
tracker: crate::tools::context::SharedTurnDiffTracker,
call_id: String,
freeform: bool,
) -> Result<ToolOutput, FunctionCallError> {
// Approval policy guard for explicit escalation in non-OnRequest modes.
if exec_params.with_escalated_permissions.unwrap_or(false)
@@ -288,12 +282,8 @@ impl ShellHandler {
}
let source = ExecCommandSource::Agent;
let emitter = ToolEmitter::shell(
exec_params.command.clone(),
exec_params.cwd.clone(),
source,
freeform,
);
let emitter =
ToolEmitter::shell(exec_params.command.clone(), exec_params.cwd.clone(), source);
let event_ctx = ToolEventCtx::new(session.as_ref(), turn.as_ref(), &call_id, None);
emitter.begin(event_ctx).await;
@@ -304,13 +294,6 @@ impl ShellHandler {
env: exec_params.env.clone(),
with_escalated_permissions: exec_params.with_escalated_permissions,
justification: exec_params.justification.clone(),
approval_requirement: create_approval_requirement_for_command(
&turn.exec_policy,
&exec_params.command,
turn.approval_policy,
&turn.sandbox_policy,
SandboxPermissions::from(exec_params.with_escalated_permissions.unwrap_or(false)),
),
};
let mut orchestrator = ToolOrchestrator::new();
let mut runtime = ShellRuntime::new();

View File

@@ -9,10 +9,8 @@ pub mod runtimes;
pub mod sandboxing;
pub mod spec;
use crate::context_manager::truncate_with_line_bytes_budget;
use crate::exec::ExecToolCallOutput;
use crate::truncate::TruncationPolicy;
use crate::truncate::formatted_truncate_text;
use crate::truncate::truncate_text;
pub use router::ToolRouter;
use serde::Serialize;
@@ -22,12 +20,12 @@ pub(crate) const TELEMETRY_PREVIEW_MAX_LINES: usize = 64; // lines
pub(crate) const TELEMETRY_PREVIEW_TRUNCATION_NOTICE: &str =
"[... telemetry preview truncated ...]";
// TODO(aibrahim): migrate shell tool to use truncate text and respect config value
const SHELL_OUTPUT_MAX_BYTES: usize = 10_000;
/// Format the combined exec output for sending back to the model.
/// Includes exit code and duration metadata; truncates large bodies safely.
pub fn format_exec_output_for_model_structured(
exec_output: &ExecToolCallOutput,
truncation_policy: TruncationPolicy,
) -> String {
pub fn format_exec_output_for_model(exec_output: &ExecToolCallOutput) -> String {
let ExecToolCallOutput {
exit_code,
duration,
@@ -49,7 +47,7 @@ pub fn format_exec_output_for_model_structured(
// round to 1 decimal place
let duration_seconds = ((duration.as_secs_f32()) * 10.0).round() / 10.0;
let formatted_output = format_exec_output_str(exec_output, truncation_policy);
let formatted_output = format_exec_output_str(exec_output);
let payload = ExecOutput {
output: &formatted_output,
@@ -63,35 +61,7 @@ pub fn format_exec_output_for_model_structured(
serde_json::to_string(&payload).expect("serialize ExecOutput")
}
pub fn format_exec_output_for_model_freeform(
exec_output: &ExecToolCallOutput,
truncation_policy: TruncationPolicy,
) -> String {
// round to 1 decimal place
let duration_seconds = ((exec_output.duration.as_secs_f32()) * 10.0).round() / 10.0;
let total_lines = exec_output.aggregated_output.text.lines().count();
let formatted_output = truncate_text(&exec_output.aggregated_output.text, truncation_policy);
let mut sections = Vec::new();
sections.push(format!("Exit code: {}", exec_output.exit_code));
sections.push(format!("Wall time: {duration_seconds} seconds"));
if total_lines != formatted_output.lines().count() {
sections.push(format!("Total output lines: {total_lines}"));
}
sections.push("Output:".to_string());
sections.push(formatted_output);
sections.join("\n")
}
pub fn format_exec_output_str(
exec_output: &ExecToolCallOutput,
truncation_policy: TruncationPolicy,
) -> String {
pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
let ExecToolCallOutput {
aggregated_output, ..
} = exec_output;
@@ -108,5 +78,5 @@ pub fn format_exec_output_str(
};
// Truncate for model consumption before serialization.
formatted_truncate_text(&body, truncation_policy)
truncate_with_line_bytes_budget(&body, SHELL_OUTPUT_MAX_BYTES)
}

View File

@@ -11,13 +11,11 @@ use crate::error::get_error_message_ui;
use crate::exec::ExecToolCallOutput;
use crate::sandboxing::SandboxManager;
use crate::tools::sandboxing::ApprovalCtx;
use crate::tools::sandboxing::ApprovalRequirement;
use crate::tools::sandboxing::ProvidesSandboxRetryData;
use crate::tools::sandboxing::SandboxAttempt;
use crate::tools::sandboxing::ToolCtx;
use crate::tools::sandboxing::ToolError;
use crate::tools::sandboxing::ToolRuntime;
use crate::tools::sandboxing::default_approval_requirement;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::ReviewDecision;
@@ -51,52 +49,40 @@ impl ToolOrchestrator {
let otel_cfg = codex_otel::otel_event_manager::ToolDecisionSource::Config;
// 1) Approval
let needs_initial_approval =
tool.wants_initial_approval(req, approval_policy, &turn_ctx.sandbox_policy);
let mut already_approved = false;
let requirement = tool.approval_requirement(req).unwrap_or_else(|| {
default_approval_requirement(approval_policy, &turn_ctx.sandbox_policy)
});
match requirement {
ApprovalRequirement::Skip => {
otel.tool_decision(otel_tn, otel_ci, ReviewDecision::Approved, otel_cfg);
}
ApprovalRequirement::Forbidden { reason } => {
return Err(ToolError::Rejected(reason));
}
ApprovalRequirement::NeedsApproval { reason } => {
let mut risk = None;
if needs_initial_approval {
let mut risk = None;
if let Some(metadata) = req.sandbox_retry_data() {
risk = tool_ctx
.session
.assess_sandbox_command(
turn_ctx,
&tool_ctx.call_id,
&metadata.command,
None,
)
.await;
if let Some(metadata) = req.sandbox_retry_data() {
risk = tool_ctx
.session
.assess_sandbox_command(turn_ctx, &tool_ctx.call_id, &metadata.command, None)
.await;
}
let approval_ctx = ApprovalCtx {
session: tool_ctx.session,
turn: turn_ctx,
call_id: &tool_ctx.call_id,
retry_reason: None,
risk,
};
let decision = tool.start_approval_async(req, approval_ctx).await;
otel.tool_decision(otel_tn, otel_ci, decision, otel_user.clone());
match decision {
ReviewDecision::Denied | ReviewDecision::Abort => {
return Err(ToolError::Rejected("rejected by user".to_string()));
}
let approval_ctx = ApprovalCtx {
session: tool_ctx.session,
turn: turn_ctx,
call_id: &tool_ctx.call_id,
retry_reason: reason,
risk,
};
let decision = tool.start_approval_async(req, approval_ctx).await;
otel.tool_decision(otel_tn, otel_ci, decision, otel_user.clone());
match decision {
ReviewDecision::Denied | ReviewDecision::Abort => {
return Err(ToolError::Rejected("rejected by user".to_string()));
}
ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {}
}
already_approved = true;
ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {}
}
already_approved = true;
} else {
otel.tool_decision(otel_tn, otel_ci, ReviewDecision::Approved, otel_cfg);
}
// 2) First attempt under the selected sandbox.

View File

@@ -4,12 +4,13 @@ Runtime: shell
Executes shell requests under the orchestrator: asks for approval when needed,
builds a CommandSpec, and runs it under the current SandboxAttempt.
*/
use crate::command_safety::is_dangerous_command::requires_initial_appoval;
use crate::exec::ExecToolCallOutput;
use crate::protocol::SandboxPolicy;
use crate::sandboxing::execute_env;
use crate::tools::runtimes::build_command_spec;
use crate::tools::sandboxing::Approvable;
use crate::tools::sandboxing::ApprovalCtx;
use crate::tools::sandboxing::ApprovalRequirement;
use crate::tools::sandboxing::ProvidesSandboxRetryData;
use crate::tools::sandboxing::SandboxAttempt;
use crate::tools::sandboxing::SandboxRetryData;
@@ -19,6 +20,7 @@ use crate::tools::sandboxing::ToolCtx;
use crate::tools::sandboxing::ToolError;
use crate::tools::sandboxing::ToolRuntime;
use crate::tools::sandboxing::with_cached_approval;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::ReviewDecision;
use futures::future::BoxFuture;
use std::path::PathBuf;
@@ -31,7 +33,6 @@ pub struct ShellRequest {
pub env: std::collections::HashMap<String, String>,
pub with_escalated_permissions: Option<bool>,
pub justification: Option<String>,
pub approval_requirement: ApprovalRequirement,
}
impl ProvidesSandboxRetryData for ShellRequest {
@@ -113,8 +114,18 @@ impl Approvable<ShellRequest> for ShellRuntime {
})
}
fn approval_requirement(&self, req: &ShellRequest) -> Option<ApprovalRequirement> {
Some(req.approval_requirement.clone())
fn wants_initial_approval(
&self,
req: &ShellRequest,
policy: AskForApproval,
sandbox_policy: &SandboxPolicy,
) -> bool {
requires_initial_appoval(
policy,
sandbox_policy,
&req.command,
req.with_escalated_permissions.unwrap_or(false),
)
}
fn wants_escalated_first_attempt(&self, req: &ShellRequest) -> bool {

View File

@@ -1,3 +1,4 @@
use crate::command_safety::is_dangerous_command::requires_initial_appoval;
/*
Runtime: unified exec
@@ -9,7 +10,6 @@ use crate::error::SandboxErr;
use crate::tools::runtimes::build_command_spec;
use crate::tools::sandboxing::Approvable;
use crate::tools::sandboxing::ApprovalCtx;
use crate::tools::sandboxing::ApprovalRequirement;
use crate::tools::sandboxing::ProvidesSandboxRetryData;
use crate::tools::sandboxing::SandboxAttempt;
use crate::tools::sandboxing::SandboxRetryData;
@@ -22,7 +22,9 @@ use crate::tools::sandboxing::with_cached_approval;
use crate::unified_exec::UnifiedExecError;
use crate::unified_exec::UnifiedExecSession;
use crate::unified_exec::UnifiedExecSessionManager;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::ReviewDecision;
use codex_protocol::protocol::SandboxPolicy;
use futures::future::BoxFuture;
use std::collections::HashMap;
use std::path::PathBuf;
@@ -34,7 +36,6 @@ pub struct UnifiedExecRequest {
pub env: HashMap<String, String>,
pub with_escalated_permissions: Option<bool>,
pub justification: Option<String>,
pub approval_requirement: ApprovalRequirement,
}
impl ProvidesSandboxRetryData for UnifiedExecRequest {
@@ -64,7 +65,6 @@ impl UnifiedExecRequest {
env: HashMap<String, String>,
with_escalated_permissions: Option<bool>,
justification: Option<String>,
approval_requirement: ApprovalRequirement,
) -> Self {
Self {
command,
@@ -72,7 +72,6 @@ impl UnifiedExecRequest {
env,
with_escalated_permissions,
justification,
approval_requirement,
}
}
}
@@ -130,8 +129,18 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
})
}
fn approval_requirement(&self, req: &UnifiedExecRequest) -> Option<ApprovalRequirement> {
Some(req.approval_requirement.clone())
fn wants_initial_approval(
&self,
req: &UnifiedExecRequest,
policy: AskForApproval,
sandbox_policy: &SandboxPolicy,
) -> bool {
requires_initial_appoval(
policy,
sandbox_policy,
&req.command,
req.with_escalated_permissions.unwrap_or(false),
)
}
fn wants_escalated_first_attempt(&self, req: &UnifiedExecRequest) -> bool {

View File

@@ -86,37 +86,6 @@ pub(crate) struct ApprovalCtx<'a> {
pub risk: Option<SandboxCommandAssessment>,
}
// Specifies what tool orchestrator should do with a given tool call.
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) enum ApprovalRequirement {
/// No approval required for this tool call
Skip,
/// Approval required for this tool call
NeedsApproval { reason: Option<String> },
/// Execution forbidden for this tool call
Forbidden { reason: String },
}
/// - Never, OnFailure: do not ask
/// - OnRequest: ask unless sandbox policy is DangerFullAccess
/// - UnlessTrusted: always ask
pub(crate) fn default_approval_requirement(
policy: AskForApproval,
sandbox_policy: &SandboxPolicy,
) -> ApprovalRequirement {
let needs_approval = match policy {
AskForApproval::Never | AskForApproval::OnFailure => false,
AskForApproval::OnRequest => !matches!(sandbox_policy, SandboxPolicy::DangerFullAccess),
AskForApproval::UnlessTrusted => true,
};
if needs_approval {
ApprovalRequirement::NeedsApproval { reason: None }
} else {
ApprovalRequirement::Skip
}
}
pub(crate) trait Approvable<Req> {
type ApprovalKey: Hash + Eq + Clone + Debug + Serialize;
@@ -137,11 +106,22 @@ pub(crate) trait Approvable<Req> {
matches!(policy, AskForApproval::Never)
}
/// Override the default approval requirement. Return `Some(_)` to specify
/// a custom requirement, or `None` to fall back to
/// policy-based default.
fn approval_requirement(&self, _req: &Req) -> Option<ApprovalRequirement> {
None
/// Decide whether an initial user approval should be requested before the
/// first attempt. Defaults to the orchestrator's behavior (prerefactor):
/// - Never, OnFailure: do not ask
/// - OnRequest: ask unless sandbox policy is DangerFullAccess
/// - UnlessTrusted: always ask
fn wants_initial_approval(
&self,
_req: &Req,
policy: AskForApproval,
sandbox_policy: &SandboxPolicy,
) -> bool {
match policy {
AskForApproval::Never | AskForApproval::OnFailure => false,
AskForApproval::OnRequest => !matches!(sandbox_policy, SandboxPolicy::DangerFullAccess),
AskForApproval::UnlessTrusted => true,
}
}
/// Decide we can request an approval for no-sandbox execution.

View File

@@ -1395,22 +1395,6 @@ mod tests {
);
}
#[test]
fn test_gpt_5_defaults() {
assert_model_tools(
"gpt-5",
&Features::with_defaults(),
&[
"shell",
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"view_image",
],
);
}
#[test]
fn test_gpt_5_1_defaults() {
assert_model_tools(

View File

@@ -2,8 +2,11 @@
//! and suffix on UTF-8 boundaries, and helpers for line/tokenbased truncation
//! used across the core crate.
use crate::config::Config;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_utils_string::take_bytes_at_char_boundary;
use codex_utils_string::take_last_bytes_at_char_boundary;
use crate::config::Config;
const APPROX_BYTES_PER_TOKEN: usize = 4;
@@ -14,18 +17,6 @@ pub enum TruncationPolicy {
}
impl TruncationPolicy {
/// Scale the underlying budget by `multiplier`, rounding up to avoid under-budgeting.
pub fn mul(self, multiplier: f64) -> Self {
match self {
TruncationPolicy::Bytes(bytes) => {
TruncationPolicy::Bytes((bytes as f64 * multiplier).ceil() as usize)
}
TruncationPolicy::Tokens(tokens) => {
TruncationPolicy::Tokens((tokens as f64 * multiplier).ceil() as usize)
}
}
}
pub fn new(config: &Config) -> Self {
let config_token_limit = config.tool_output_token_limit;
@@ -74,20 +65,34 @@ impl TruncationPolicy {
}
}
pub(crate) fn formatted_truncate_text(content: &str, policy: TruncationPolicy) -> String {
if content.len() <= policy.byte_budget() {
/// Format a block of exec/tool output for model consumption, truncating by
/// lines and bytes while preserving head and tail segments.
pub(crate) fn truncate_with_line_bytes_budget(content: &str, bytes_budget: usize) -> String {
// TODO(aibrahim): to be removed
let lines_budget = 256;
// Head+tail truncation for the model: show the beginning and end with an elision.
// Clients still receive full streams; only this formatted summary is capped.
let total_lines = content.lines().count();
if content.len() <= bytes_budget && total_lines <= lines_budget {
return content.to_string();
}
let total_lines = content.lines().count();
let result = truncate_text(content, policy);
format!("Total output lines: {total_lines}\n\n{result}")
let output = truncate_formatted_exec_output(content, total_lines, bytes_budget, lines_budget);
format!("Total output lines: {total_lines}\n\n{output}")
}
pub(crate) fn truncate_text(content: &str, policy: TruncationPolicy) -> String {
match policy {
TruncationPolicy::Bytes(_) => truncate_with_byte_estimate(content, policy),
TruncationPolicy::Tokens(_) => {
let (truncated, _) = truncate_with_token_budget(content, policy);
TruncationPolicy::Bytes(bytes) => truncate_with_byte_estimate(
content,
bytes,
TruncationSource::Policy(TruncationPolicy::Bytes(bytes)),
),
TruncationPolicy::Tokens(tokens) => {
let (truncated, _) = truncate_with_token_budget(
content,
tokens,
TruncationSource::Policy(TruncationPolicy::Tokens(tokens)),
);
truncated
}
}
@@ -157,18 +162,24 @@ pub(crate) fn truncate_function_output_items_with_policy(
/// preserving the beginning and the end. Returns the possibly truncated string
/// and `Some(original_token_count)` if truncation occurred; otherwise returns
/// the original string and `None`.
fn truncate_with_token_budget(s: &str, policy: TruncationPolicy) -> (String, Option<u64>) {
fn truncate_with_token_budget(
s: &str,
max_tokens: usize,
source: TruncationSource,
) -> (String, Option<u64>) {
if s.is_empty() {
return (String::new(), None);
}
let max_tokens = policy.token_budget();
let byte_len = s.len();
if max_tokens > 0 && byte_len <= approx_bytes_for_tokens(max_tokens) {
return (s.to_string(), None);
if max_tokens > 0 {
let small_threshold = approx_bytes_for_tokens(max_tokens / 4);
if small_threshold > 0 && byte_len <= small_threshold {
return (s.to_string(), None);
}
}
let truncated = truncate_with_byte_estimate(s, policy);
let truncated = truncate_with_byte_estimate(s, approx_bytes_for_tokens(max_tokens), source);
let approx_total_usize = approx_token_count(s);
let approx_total = u64::try_from(approx_total_usize).unwrap_or(u64::MAX);
if truncated == s {
@@ -181,20 +192,14 @@ fn truncate_with_token_budget(s: &str, policy: TruncationPolicy) -> (String, Opt
/// Truncate a string using a byte budget derived from the token budget, without
/// performing any real tokenization. This keeps the logic purely byte-based and
/// uses a bytes placeholder in the truncated output.
fn truncate_with_byte_estimate(s: &str, policy: TruncationPolicy) -> String {
fn truncate_with_byte_estimate(s: &str, max_bytes: usize, source: TruncationSource) -> String {
if s.is_empty() {
return String::new();
}
let total_chars = s.chars().count();
let max_bytes = policy.byte_budget();
if max_bytes == 0 {
// No budget to show content; just report that everything was truncated.
let marker = format_truncation_marker(
policy,
removed_units_for_source(policy, s.len(), total_chars),
);
let marker = format_truncation_marker(source, removed_units_for_source(source, s.len()));
return marker;
}
@@ -203,63 +208,127 @@ fn truncate_with_byte_estimate(s: &str, policy: TruncationPolicy) -> String {
}
let total_bytes = s.len();
let removed_bytes = total_bytes.saturating_sub(max_bytes);
let marker = format_truncation_marker(source, removed_units_for_source(source, removed_bytes));
let marker_len = marker.len();
let (left_budget, right_budget) = split_budget(max_bytes);
let (removed_chars, left, right) = split_string(s, left_budget, right_budget);
let marker = format_truncation_marker(
policy,
removed_units_for_source(policy, total_bytes.saturating_sub(max_bytes), removed_chars),
);
assemble_truncated_output(left, right, &marker)
}
fn split_string(s: &str, beginning_bytes: usize, end_bytes: usize) -> (usize, &str, &str) {
if s.is_empty() {
return (0, "", "");
}
let len = s.len();
let tail_start_target = len.saturating_sub(end_bytes);
let mut prefix_end = 0usize;
let mut suffix_start = len;
let mut removed_chars = 0usize;
let mut suffix_started = false;
for (idx, ch) in s.char_indices() {
let char_end = idx + ch.len_utf8();
if char_end <= beginning_bytes {
prefix_end = char_end;
continue;
}
if idx >= tail_start_target {
if !suffix_started {
suffix_start = idx;
suffix_started = true;
}
continue;
}
removed_chars = removed_chars.saturating_add(1);
if marker_len >= max_bytes {
let truncated_marker = truncate_on_boundary(&marker, max_bytes);
return truncated_marker.to_string();
}
let keep_budget = max_bytes - marker_len;
let (left_budget, right_budget) = split_budget(keep_budget);
let prefix_end = pick_prefix_end(s, left_budget);
let mut suffix_start = pick_suffix_start(s, right_budget);
if suffix_start < prefix_end {
suffix_start = prefix_end;
}
let before = &s[..prefix_end];
let after = &s[suffix_start..];
let mut out = assemble_truncated_output(&s[..prefix_end], &s[suffix_start..], &marker);
(removed_chars, before, after)
if out.len() > max_bytes {
let boundary = truncate_on_boundary(&out, max_bytes);
out.truncate(boundary.len());
}
out
}
fn format_truncation_marker(policy: TruncationPolicy, removed_count: u64) -> String {
match policy {
TruncationPolicy::Tokens(_) => format!("{removed_count} tokens truncated…"),
TruncationPolicy::Bytes(_) => format!("{removed_count} chars truncated…"),
fn truncate_formatted_exec_output(
content: &str,
total_lines: usize,
limit_bytes: usize,
limit_lines: usize,
) -> String {
error_on_double_truncation(content);
let head_lines: usize = limit_lines / 2;
let tail_lines: usize = limit_lines - head_lines; // 128
let head_bytes: usize = limit_bytes / 2;
let segments: Vec<&str> = content.split_inclusive('\n').collect();
let head_take = head_lines.min(segments.len());
let tail_take = tail_lines.min(segments.len().saturating_sub(head_take));
let omitted = segments.len().saturating_sub(head_take + tail_take);
let head_slice_end: usize = segments
.iter()
.take(head_take)
.map(|segment| segment.len())
.sum();
let tail_slice_start: usize = if tail_take == 0 {
content.len()
} else {
content.len()
- segments
.iter()
.rev()
.take(tail_take)
.map(|segment| segment.len())
.sum::<usize>()
};
let head_slice = &content[..head_slice_end];
let tail_slice = &content[tail_slice_start..];
let truncated_by_bytes = content.len() > limit_bytes;
// this is a bit wrong. We are counting metadata lines and not just shell output lines.
let marker = if omitted > 0 {
let marker_text = format_truncation_marker(
TruncationSource::LineOmission { total_lines },
u64::try_from(omitted).unwrap_or(u64::MAX),
);
Some(format!("\n{marker_text}\n\n"))
} else if truncated_by_bytes {
let removed_bytes =
u64::try_from(content.len().saturating_sub(limit_bytes)).unwrap_or(u64::MAX);
let marker_text =
format_truncation_marker(TruncationSource::ByteLimit { limit_bytes }, removed_bytes);
Some(format!("\n{marker_text}\n\n"))
} else {
None
};
let marker_len = marker.as_ref().map_or(0, String::len);
let base_head_budget = head_bytes.min(limit_bytes);
let head_budget = base_head_budget.min(limit_bytes.saturating_sub(marker_len));
let head_part = take_bytes_at_char_boundary(head_slice, head_budget);
let mut result = String::with_capacity(limit_bytes.min(content.len()));
result.push_str(head_part);
if let Some(marker_text) = marker.as_ref() {
result.push_str(marker_text);
}
let remaining = limit_bytes.saturating_sub(result.len());
if remaining == 0 {
return result;
}
let tail_part = take_last_bytes_at_char_boundary(tail_slice, remaining);
result.push_str(tail_part);
result
}
#[derive(Clone, Copy)]
pub enum TruncationSource {
Policy(TruncationPolicy),
LineOmission { total_lines: usize },
ByteLimit { limit_bytes: usize },
}
fn format_truncation_marker(source: TruncationSource, removed_count: u64) -> String {
match source {
TruncationSource::Policy(TruncationPolicy::Tokens(_)) => {
format!("[…{removed_count} tokens truncated…]")
}
TruncationSource::Policy(TruncationPolicy::Bytes(_)) => {
format!("[…{removed_count} bytes truncated…]")
}
TruncationSource::LineOmission { total_lines } => {
format!("[... omitted {removed_count} of {total_lines} lines ...]")
}
TruncationSource::ByteLimit { limit_bytes } => {
format!("[... removed {removed_count} bytes to fit {limit_bytes} byte limit ...]")
}
}
}
@@ -268,14 +337,12 @@ fn split_budget(budget: usize) -> (usize, usize) {
(left, budget - left)
}
fn removed_units_for_source(
policy: TruncationPolicy,
removed_bytes: usize,
removed_chars: usize,
) -> u64 {
match policy {
TruncationPolicy::Tokens(_) => approx_tokens_from_byte_count(removed_bytes),
TruncationPolicy::Bytes(_) => u64::try_from(removed_chars).unwrap_or(u64::MAX),
fn removed_units_for_source(source: TruncationSource, removed_bytes: usize) -> u64 {
match source {
TruncationSource::Policy(TruncationPolicy::Tokens(_)) => {
approx_tokens_from_byte_count(removed_bytes)
}
_ => u64::try_from(removed_bytes).unwrap_or(u64::MAX),
}
}
@@ -283,6 +350,7 @@ fn assemble_truncated_output(prefix: &str, suffix: &str, marker: &str) -> String
let mut out = String::with_capacity(prefix.len() + marker.len() + suffix.len() + 1);
out.push_str(prefix);
out.push_str(marker);
out.push('\n');
out.push_str(suffix);
out
}
@@ -302,166 +370,238 @@ fn approx_tokens_from_byte_count(bytes: usize) -> u64 {
/ (APPROX_BYTES_PER_TOKEN as u64)
}
fn truncate_on_boundary(input: &str, max_len: usize) -> &str {
if input.len() <= max_len {
return input;
}
let mut end = max_len;
while end > 0 && !input.is_char_boundary(end) {
end -= 1;
}
&input[..end]
}
fn pick_prefix_end(s: &str, left_budget: usize) -> usize {
if let Some(head) = s.get(..left_budget)
&& let Some(i) = head.rfind('\n')
{
return i + 1;
}
truncate_on_boundary(s, left_budget).len()
}
fn pick_suffix_start(s: &str, right_budget: usize) -> usize {
let start_tail = s.len().saturating_sub(right_budget);
if let Some(tail) = s.get(start_tail..)
&& let Some(i) = tail.find('\n')
{
return start_tail + i + 1;
}
let mut idx = start_tail.min(s.len());
while idx < s.len() && !s.is_char_boundary(idx) {
idx += 1;
}
idx
}
fn error_on_double_truncation(content: &str) {
if content.contains("Total output lines:") && content.contains("omitted") {
tracing::error!(
"FunctionCallOutput content was already truncated before ContextManager::record_items; this would cause double truncation {content}"
);
}
}
#[cfg(test)]
mod tests {
use crate::config::OPENAI_DEFAULT_MODEL;
use crate::model_family::derive_default_model_family;
use crate::model_family::find_family_for_model;
use super::TruncationPolicy;
use super::TruncationSource;
use super::approx_token_count;
use super::formatted_truncate_text;
use super::split_string;
use super::truncate_function_output_items_with_policy;
use super::truncate_text;
use super::truncate_with_line_bytes_budget;
use super::truncate_with_token_budget;
use codex_protocol::models::FunctionCallOutputContentItem;
use pretty_assertions::assert_eq;
use regex_lite::Regex;
#[test]
fn split_string_works() {
assert_eq!(split_string("hello world", 5, 5), (1, "hello", "world"));
assert_eq!(split_string("abc", 0, 0), (3, "", ""));
const MODEL_FORMAT_MAX_LINES: usize = 256;
fn model_format_max_bytes() -> usize {
find_family_for_model(OPENAI_DEFAULT_MODEL)
.unwrap_or_else(|| derive_default_model_family(OPENAI_DEFAULT_MODEL))
.truncation_policy
.byte_budget()
}
fn truncated_message_pattern(line: &str, total_lines: usize) -> String {
let head_lines = MODEL_FORMAT_MAX_LINES / 2;
let tail_lines = MODEL_FORMAT_MAX_LINES - head_lines;
let head_take = head_lines.min(total_lines);
let tail_take = tail_lines.min(total_lines.saturating_sub(head_take));
let omitted = total_lines.saturating_sub(head_take + tail_take);
let escaped_line = regex_lite::escape(line);
if omitted == 0 {
return format!(
r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} removed \d+ bytes to fit {max_bytes} byte limit \.{{3}}]\n\n.*)$",
max_bytes = model_format_max_bytes(),
);
}
format!(
r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} omitted {omitted} of {total_lines} lines \.{{3}}]\n\n.*)$",
)
}
#[test]
fn split_string_handles_empty_string() {
assert_eq!(split_string("", 4, 4), (0, "", ""));
}
#[test]
fn split_string_only_keeps_prefix_when_tail_budget_is_zero() {
assert_eq!(split_string("abcdef", 3, 0), (3, "abc", ""));
}
#[test]
fn split_string_only_keeps_suffix_when_prefix_budget_is_zero() {
assert_eq!(split_string("abcdef", 0, 3), (3, "", "def"));
}
#[test]
fn split_string_handles_overlapping_budgets_without_removal() {
assert_eq!(split_string("abcdef", 4, 4), (0, "abcd", "ef"));
}
#[test]
fn split_string_respects_utf8_boundaries() {
assert_eq!(split_string("😀abc😀", 5, 5), (1, "😀a", "c😀"));
assert_eq!(split_string("😀😀😀😀😀", 1, 1), (5, "", ""));
assert_eq!(split_string("😀😀😀😀😀", 7, 7), (3, "😀", "😀"));
assert_eq!(split_string("😀😀😀😀😀", 8, 8), (1, "😀😀", "😀😀"));
}
#[test]
fn truncate_bytes_less_than_placeholder_returns_placeholder() {
let content = "example output";
assert_eq!(
"Total output lines: 1\n\n…13 chars truncated…t",
formatted_truncate_text(content, TruncationPolicy::Bytes(1)),
);
}
#[test]
fn truncate_tokens_less_than_placeholder_returns_placeholder() {
let content = "example output";
assert_eq!(
"Total output lines: 1\n\nex…3 tokens truncated…ut",
formatted_truncate_text(content, TruncationPolicy::Tokens(1)),
);
}
#[test]
fn truncate_tokens_under_limit_returns_original() {
let content = "example output";
assert_eq!(
content,
formatted_truncate_text(content, TruncationPolicy::Tokens(10)),
);
}
#[test]
fn truncate_bytes_under_limit_returns_original() {
let content = "example output";
assert_eq!(
content,
formatted_truncate_text(content, TruncationPolicy::Bytes(20)),
);
}
#[test]
fn truncate_tokens_over_limit_returns_truncated() {
let content = "this is an example of a long output that should be truncated";
assert_eq!(
"Total output lines: 1\n\nthis is an…10 tokens truncated… truncated",
formatted_truncate_text(content, TruncationPolicy::Tokens(5)),
);
}
#[test]
fn truncate_bytes_over_limit_returns_truncated() {
let content = "this is an example of a long output that should be truncated";
assert_eq!(
"Total output lines: 1\n\nthis is an exam…30 chars truncated…ld be truncated",
formatted_truncate_text(content, TruncationPolicy::Bytes(30)),
);
}
#[test]
fn truncate_bytes_reports_original_line_count_when_truncated() {
let content =
"this is an example of a long output that should be truncated\nalso some other line";
assert_eq!(
"Total output lines: 2\n\nthis is an exam…51 chars truncated…some other line",
formatted_truncate_text(content, TruncationPolicy::Bytes(30)),
);
}
#[test]
fn truncate_tokens_reports_original_line_count_when_truncated() {
let content =
"this is an example of a long output that should be truncated\nalso some other line";
assert_eq!(
"Total output lines: 2\n\nthis is an example o…11 tokens truncated…also some other line",
formatted_truncate_text(content, TruncationPolicy::Tokens(10)),
);
}
#[test]
fn truncate_with_token_budget_returns_original_when_under_limit() {
fn truncate_middle_returns_original_when_under_limit() {
let s = "short output";
let limit = 100;
let (out, original) = truncate_with_token_budget(s, TruncationPolicy::Tokens(limit));
let source = TruncationSource::Policy(TruncationPolicy::Tokens(limit));
let (out, original) = truncate_with_token_budget(s, limit, source);
assert_eq!(out, s);
assert_eq!(original, None);
}
#[test]
fn truncate_with_token_budget_reports_truncation_at_zero_limit() {
fn truncate_middle_reports_truncation_at_zero_limit() {
let s = "abcdef";
let (out, original) = truncate_with_token_budget(s, TruncationPolicy::Tokens(0));
assert_eq!(out, "…2 tokens truncated…");
assert_eq!(original, Some(2));
let source = TruncationSource::Policy(TruncationPolicy::Tokens(0));
let (out, original) = truncate_with_token_budget(s, 0, source);
assert_eq!(out, "[…2 tokens truncated…]");
assert_eq!(original, Some(approx_token_count(s) as u64));
}
#[test]
fn truncate_middle_tokens_handles_utf8_content() {
let s = "😀😀😀😀😀😀😀😀😀😀\nsecond line with text\n";
let (out, tokens) = truncate_with_token_budget(s, TruncationPolicy::Tokens(8));
assert_eq!(out, "😀😀😀😀…8 tokens truncated… line with text\n");
assert_eq!(tokens, Some(16));
fn truncate_middle_enforces_token_budget() {
let s = "alpha beta gamma delta epsilon zeta eta theta iota kappa";
let max_tokens = 12;
let source = TruncationSource::Policy(TruncationPolicy::Tokens(max_tokens));
let (out, original) = truncate_with_token_budget(s, max_tokens, source);
assert!(out.contains("tokens truncated"));
assert_eq!(original, Some(approx_token_count(s) as u64));
assert!(out.len() < s.len(), "truncated output should be shorter");
}
#[test]
fn truncate_middle_bytes_handles_utf8_content() {
fn truncate_middle_handles_utf8_content() {
let s = "😀😀😀😀😀😀😀😀😀😀\nsecond line with text\n";
let out = truncate_text(s, TruncationPolicy::Bytes(20));
assert_eq!(out, "😀😀…21 chars truncated…with text\n");
let max_tokens = 8;
let source = TruncationSource::Policy(TruncationPolicy::Tokens(max_tokens));
let (out, tokens) = truncate_with_token_budget(s, max_tokens, source);
assert!(out.contains("tokens truncated"));
assert!(!out.contains('\u{fffd}'));
assert_eq!(tokens, Some(approx_token_count(s) as u64));
assert!(out.len() < s.len(), "UTF-8 content should be shortened");
}
#[test]
fn format_exec_output_truncates_large_error() {
let line = "very long execution error line that should trigger truncation\n";
let large_error = line.repeat(2_500); // way beyond both byte and line limits
let truncated = truncate_with_line_bytes_budget(&large_error, model_format_max_bytes());
let total_lines = large_error.lines().count();
let pattern = truncated_message_pattern(line, total_lines);
let regex = Regex::new(&pattern).unwrap_or_else(|err| {
panic!("failed to compile regex {pattern}: {err}");
});
let captures = regex
.captures(&truncated)
.unwrap_or_else(|| panic!("message failed to match pattern {pattern}: {truncated}"));
let body = captures
.name("body")
.expect("missing body capture")
.as_str();
assert!(
body.len() <= model_format_max_bytes(),
"body exceeds byte limit: {} bytes",
body.len()
);
assert_ne!(truncated, large_error);
}
#[test]
fn format_exec_output_marks_byte_truncation_without_omitted_lines() {
let max_bytes = model_format_max_bytes();
let long_line = "a".repeat(max_bytes + 50);
let truncated = truncate_with_line_bytes_budget(&long_line, max_bytes);
assert_ne!(truncated, long_line);
let removed_bytes = long_line.len().saturating_sub(max_bytes);
let marker_line =
format!("[... removed {removed_bytes} bytes to fit {max_bytes} byte limit ...]");
assert!(
truncated.contains(&marker_line),
"missing byte truncation marker: {truncated}"
);
assert!(
!truncated.contains("omitted"),
"line omission marker should not appear when no lines were dropped: {truncated}"
);
}
#[test]
fn format_exec_output_returns_original_when_within_limits() {
let content = "example output\n".repeat(10);
assert_eq!(
truncate_with_line_bytes_budget(&content, model_format_max_bytes()),
content
);
}
#[test]
fn format_exec_output_reports_omitted_lines_and_keeps_head_and_tail() {
let total_lines = MODEL_FORMAT_MAX_LINES + 100;
let content: String = (0..total_lines)
.map(|idx| format!("line-{idx}\n"))
.collect();
let truncated = truncate_with_line_bytes_budget(&content, model_format_max_bytes());
let omitted = total_lines - MODEL_FORMAT_MAX_LINES;
let expected_marker = format!("[... omitted {omitted} of {total_lines} lines ...]");
assert!(
truncated.contains(&expected_marker),
"missing omitted marker: {truncated}"
);
assert!(
truncated.contains("line-0\n"),
"expected head line to remain: {truncated}"
);
let last_line = format!("line-{}\n", total_lines - 1);
assert!(
truncated.contains(&last_line),
"expected tail line to remain: {truncated}"
);
}
#[test]
fn format_exec_output_prefers_line_marker_when_both_limits_exceeded() {
let total_lines = MODEL_FORMAT_MAX_LINES + 42;
let long_line = "x".repeat(256);
let content: String = (0..total_lines)
.map(|idx| format!("line-{idx}-{long_line}\n"))
.collect();
let truncated = truncate_with_line_bytes_budget(&content, model_format_max_bytes());
assert!(
truncated.contains("[... omitted 42 of 298 lines ...]"),
"expected omitted marker when line count exceeds limit: {truncated}"
);
assert!(
!truncated.contains("byte limit"),
"line omission marker should take precedence over byte marker: {truncated}"
);
}
#[test]

View File

@@ -2,20 +2,20 @@
use std::collections::VecDeque;
use std::sync::Arc;
use tokio::sync::Mutex;
use tokio::sync::Notify;
use tokio::sync::mpsc;
use tokio::sync::oneshot::error::TryRecvError;
use tokio::task::JoinHandle;
use tokio::time::Duration;
use tokio_util::sync::CancellationToken;
use crate::exec::ExecToolCallOutput;
use crate::exec::SandboxType;
use crate::exec::StreamOutput;
use crate::exec::is_likely_sandbox_denied;
use crate::truncate::TruncationPolicy;
use crate::truncate::formatted_truncate_text;
use crate::truncate::truncate_text;
use codex_utils_pty::ExecCommandSession;
use codex_utils_pty::SpawnedPty;
@@ -67,18 +67,13 @@ impl OutputBufferState {
}
pub(crate) type OutputBuffer = Arc<Mutex<OutputBufferState>>;
pub(crate) struct OutputHandles {
pub(crate) output_buffer: OutputBuffer,
pub(crate) output_notify: Arc<Notify>,
pub(crate) cancellation_token: CancellationToken,
}
pub(crate) type OutputHandles = (OutputBuffer, Arc<Notify>);
#[derive(Debug)]
pub(crate) struct UnifiedExecSession {
session: ExecCommandSession,
output_buffer: OutputBuffer,
output_notify: Arc<Notify>,
cancellation_token: CancellationToken,
output_task: JoinHandle<()>,
sandbox_type: SandboxType,
}
@@ -91,11 +86,9 @@ impl UnifiedExecSession {
) -> Self {
let output_buffer = Arc::new(Mutex::new(OutputBufferState::default()));
let output_notify = Arc::new(Notify::new());
let cancellation_token = CancellationToken::new();
let mut receiver = initial_output_rx;
let buffer_clone = Arc::clone(&output_buffer);
let notify_clone = Arc::clone(&output_notify);
let cancellation_token_clone = cancellation_token.clone();
let output_task = tokio::spawn(async move {
loop {
match receiver.recv().await {
@@ -106,10 +99,7 @@ impl UnifiedExecSession {
notify_clone.notify_waiters();
}
Err(tokio::sync::broadcast::error::RecvError::Lagged(_)) => continue,
Err(tokio::sync::broadcast::error::RecvError::Closed) => {
cancellation_token_clone.cancel();
break;
}
Err(tokio::sync::broadcast::error::RecvError::Closed) => break,
}
}
});
@@ -118,7 +108,6 @@ impl UnifiedExecSession {
session,
output_buffer,
output_notify,
cancellation_token,
output_task,
sandbox_type,
}
@@ -129,11 +118,10 @@ impl UnifiedExecSession {
}
pub(super) fn output_handles(&self) -> OutputHandles {
OutputHandles {
output_buffer: Arc::clone(&self.output_buffer),
output_notify: Arc::clone(&self.output_notify),
cancellation_token: self.cancellation_token.clone(),
}
(
Arc::clone(&self.output_buffer),
Arc::clone(&self.output_notify),
)
}
pub(super) fn has_exited(&self) -> bool {
@@ -179,7 +167,7 @@ impl UnifiedExecSession {
};
if is_likely_sandbox_denied(self.sandbox_type(), &exec_output) {
let snippet = formatted_truncate_text(
let snippet = truncate_text(
&aggregated_text,
TruncationPolicy::Tokens(UNIFIED_EXEC_OUTPUT_MAX_TOKENS),
);
@@ -211,34 +199,20 @@ impl UnifiedExecSession {
};
if exit_ready {
managed.signal_exit();
managed.check_for_sandbox_denial().await?;
return Ok(managed);
}
tokio::pin!(exit_rx);
if tokio::time::timeout(Duration::from_millis(50), &mut exit_rx)
.await
.is_ok()
{
managed.signal_exit();
managed.check_for_sandbox_denial().await?;
return Ok(managed);
}
tokio::spawn({
let cancellation_token = managed.cancellation_token.clone();
async move {
let _ = exit_rx.await;
cancellation_token.cancel();
}
});
Ok(managed)
}
fn signal_exit(&self) {
self.cancellation_token.cancel();
}
}
impl Drop for UnifiedExecSession {

View File

@@ -5,19 +5,16 @@ use tokio::sync::Notify;
use tokio::sync::mpsc;
use tokio::time::Duration;
use tokio::time::Instant;
use tokio_util::sync::CancellationToken;
use crate::codex::Session;
use crate::codex::TurnContext;
use crate::exec::ExecToolCallOutput;
use crate::exec::StreamOutput;
use crate::exec_env::create_env;
use crate::exec_policy::create_approval_requirement_for_command;
use crate::protocol::BackgroundEventEvent;
use crate::protocol::EventMsg;
use crate::protocol::ExecCommandSource;
use crate::sandboxing::ExecEnv;
use crate::sandboxing::SandboxPermissions;
use crate::tools::events::ToolEmitter;
use crate::tools::events::ToolEventCtx;
use crate::tools::events::ToolEventFailure;
@@ -28,7 +25,7 @@ use crate::tools::runtimes::unified_exec::UnifiedExecRuntime;
use crate::tools::sandboxing::ToolCtx;
use crate::truncate::TruncationPolicy;
use crate::truncate::approx_token_count;
use crate::truncate::formatted_truncate_text;
use crate::truncate::truncate_text;
use super::ExecCommandRequest;
use super::SessionEntry;
@@ -41,20 +38,8 @@ use super::clamp_yield_time;
use super::generate_chunk_id;
use super::resolve_max_tokens;
use super::session::OutputBuffer;
use super::session::OutputHandles;
use super::session::UnifiedExecSession;
struct PreparedSessionHandles {
writer_tx: mpsc::Sender<Vec<u8>>,
output_buffer: OutputBuffer,
output_notify: Arc<Notify>,
cancellation_token: CancellationToken,
session_ref: Arc<Session>,
turn_ref: Arc<TurnContext>,
command: Vec<String>,
cwd: PathBuf,
}
impl UnifiedExecSessionManager {
pub(crate) async fn exec_command(
&self,
@@ -80,23 +65,14 @@ impl UnifiedExecSessionManager {
let yield_time_ms = clamp_yield_time(request.yield_time_ms);
let start = Instant::now();
let OutputHandles {
output_buffer,
output_notify,
cancellation_token,
} = session.output_handles();
let (output_buffer, output_notify) = session.output_handles();
let deadline = start + Duration::from_millis(yield_time_ms);
let collected = Self::collect_output_until_deadline(
&output_buffer,
&output_notify,
&cancellation_token,
deadline,
)
.await;
let collected =
Self::collect_output_until_deadline(&output_buffer, &output_notify, deadline).await;
let wall_time = Instant::now().saturating_duration_since(start);
let text = String::from_utf8_lossy(&collected).to_string();
let output = formatted_truncate_text(&text, TruncationPolicy::Tokens(max_tokens));
let output = truncate_text(&text, TruncationPolicy::Tokens(max_tokens));
let chunk_id = generate_chunk_id();
let has_exited = session.has_exited();
let stored_id = self
@@ -151,16 +127,15 @@ impl UnifiedExecSessionManager {
) -> Result<UnifiedExecResponse, UnifiedExecError> {
let session_id = request.session_id;
let PreparedSessionHandles {
let (
writer_tx,
output_buffer,
output_notify,
cancellation_token,
session_ref,
turn_ref,
command: session_command,
cwd: session_cwd,
} = self.prepare_session_handles(session_id).await?;
session_command,
session_cwd,
) = self.prepare_session_handles(session_id).await?;
let interaction_emitter = ToolEmitter::unified_exec(
&session_command,
@@ -199,17 +174,12 @@ impl UnifiedExecSessionManager {
let yield_time_ms = clamp_yield_time(request.yield_time_ms);
let start = Instant::now();
let deadline = start + Duration::from_millis(yield_time_ms);
let collected = Self::collect_output_until_deadline(
&output_buffer,
&output_notify,
&cancellation_token,
deadline,
)
.await;
let collected =
Self::collect_output_until_deadline(&output_buffer, &output_notify, deadline).await;
let wall_time = Instant::now().saturating_duration_since(start);
let text = String::from_utf8_lossy(&collected).to_string();
let output = formatted_truncate_text(&text, TruncationPolicy::Tokens(max_tokens));
let output = truncate_text(&text, TruncationPolicy::Tokens(max_tokens));
let original_token_count = approx_token_count(&text);
let chunk_id = generate_chunk_id();
@@ -293,27 +263,44 @@ impl UnifiedExecSessionManager {
async fn prepare_session_handles(
&self,
session_id: i32,
) -> Result<PreparedSessionHandles, UnifiedExecError> {
) -> Result<
(
mpsc::Sender<Vec<u8>>,
OutputBuffer,
Arc<Notify>,
Arc<Session>,
Arc<TurnContext>,
Vec<String>,
PathBuf,
),
UnifiedExecError,
> {
let sessions = self.sessions.lock().await;
let entry = sessions
.get(&session_id)
.ok_or(UnifiedExecError::UnknownSessionId { session_id })?;
let OutputHandles {
output_buffer,
output_notify,
cancellation_token,
} = entry.session.output_handles();
let (output_buffer, output_notify, writer_tx, session, turn, command, cwd) =
if let Some(entry) = sessions.get(&session_id) {
let (buffer, notify) = entry.session.output_handles();
(
buffer,
notify,
entry.session.writer_sender(),
Arc::clone(&entry.session_ref),
Arc::clone(&entry.turn_ref),
entry.command.clone(),
entry.cwd.clone(),
)
} else {
return Err(UnifiedExecError::UnknownSessionId { session_id });
};
Ok(PreparedSessionHandles {
writer_tx: entry.session.writer_sender(),
Ok((
writer_tx,
output_buffer,
output_notify,
cancellation_token,
session_ref: Arc::clone(&entry.session_ref),
turn_ref: Arc::clone(&entry.turn_ref),
command: entry.command.clone(),
cwd: entry.cwd.clone(),
})
session,
turn,
command,
cwd,
))
}
async fn send_input(
@@ -462,13 +449,6 @@ impl UnifiedExecSessionManager {
create_env(&context.turn.shell_environment_policy),
with_escalated_permissions,
justification,
create_approval_requirement_for_command(
&context.turn.exec_policy,
command,
context.turn.approval_policy,
&context.turn.sandbox_policy,
SandboxPermissions::from(with_escalated_permissions.unwrap_or(false)),
),
);
let tool_ctx = ToolCtx {
session: context.session.as_ref(),
@@ -491,13 +471,9 @@ impl UnifiedExecSessionManager {
pub(super) async fn collect_output_until_deadline(
output_buffer: &OutputBuffer,
output_notify: &Arc<Notify>,
cancellation_token: &CancellationToken,
deadline: Instant,
) -> Vec<u8> {
const POST_EXIT_OUTPUT_GRACE: Duration = Duration::from_millis(25);
let mut collected: Vec<u8> = Vec::with_capacity(4096);
let mut exit_signal_received = cancellation_token.is_cancelled();
loop {
let drained_chunks;
let mut wait_for_output = None;
@@ -510,27 +486,15 @@ impl UnifiedExecSessionManager {
}
if drained_chunks.is_empty() {
exit_signal_received |= cancellation_token.is_cancelled();
let remaining = deadline.saturating_duration_since(Instant::now());
if remaining == Duration::ZERO {
break;
}
let notified = wait_for_output.unwrap_or_else(|| output_notify.notified());
if exit_signal_received {
let grace = remaining.min(POST_EXIT_OUTPUT_GRACE);
if tokio::time::timeout(grace, notified).await.is_err() {
break;
}
continue;
}
tokio::pin!(notified);
let exit_notified = cancellation_token.cancelled();
tokio::pin!(exit_notified);
tokio::select! {
_ = &mut notified => {}
_ = &mut exit_notified => exit_signal_received = true,
_ = tokio::time::sleep(remaining) => break,
}
continue;
@@ -540,7 +504,6 @@ impl UnifiedExecSessionManager {
collected.extend_from_slice(&chunk);
}
exit_signal_received |= cancellation_token.is_cancelled();
if Instant::now() >= deadline {
break;
}

View File

@@ -3,7 +3,6 @@ use std::time::Duration;
use codex_protocol::models::ContentItem;
use codex_protocol::models::ResponseItem;
use crate::codex::TurnContext;
use crate::exec::ExecToolCallOutput;
use crate::tools::format_exec_output_str;
@@ -21,11 +20,7 @@ fn format_duration_line(duration: Duration) -> String {
format!("Duration: {duration_seconds:.4} seconds")
}
fn format_user_shell_command_body(
command: &str,
exec_output: &ExecToolCallOutput,
turn_context: &TurnContext,
) -> String {
fn format_user_shell_command_body(command: &str, exec_output: &ExecToolCallOutput) -> String {
let mut sections = Vec::new();
sections.push("<command>".to_string());
sections.push(command.to_string());
@@ -34,33 +29,25 @@ fn format_user_shell_command_body(
sections.push(format!("Exit code: {}", exec_output.exit_code));
sections.push(format_duration_line(exec_output.duration));
sections.push("Output:".to_string());
sections.push(format_exec_output_str(
exec_output,
turn_context.truncation_policy,
));
sections.push(format_exec_output_str(exec_output));
sections.push("</result>".to_string());
sections.join("\n")
}
pub fn format_user_shell_command_record(
command: &str,
exec_output: &ExecToolCallOutput,
turn_context: &TurnContext,
) -> String {
let body = format_user_shell_command_body(command, exec_output, turn_context);
pub fn format_user_shell_command_record(command: &str, exec_output: &ExecToolCallOutput) -> String {
let body = format_user_shell_command_body(command, exec_output);
format!("{USER_SHELL_COMMAND_OPEN}\n{body}\n{USER_SHELL_COMMAND_CLOSE}")
}
pub fn user_shell_command_record_item(
command: &str,
exec_output: &ExecToolCallOutput,
turn_context: &TurnContext,
) -> ResponseItem {
ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputText {
text: format_user_shell_command_record(command, exec_output, turn_context),
text: format_user_shell_command_record(command, exec_output),
}],
}
}
@@ -68,7 +55,6 @@ pub fn user_shell_command_record_item(
#[cfg(test)]
mod tests {
use super::*;
use crate::codex::make_session_and_context;
use crate::exec::StreamOutput;
use pretty_assertions::assert_eq;
@@ -90,8 +76,7 @@ mod tests {
duration: Duration::from_secs(1),
timed_out: false,
};
let (_, turn_context) = make_session_and_context();
let item = user_shell_command_record_item("echo hi", &exec_output, &turn_context);
let item = user_shell_command_record_item("echo hi", &exec_output);
let ResponseItem::Message { content, .. } = item else {
panic!("expected message");
};
@@ -114,8 +99,7 @@ mod tests {
duration: Duration::from_millis(120),
timed_out: false,
};
let (_, turn_context) = make_session_and_context();
let record = format_user_shell_command_record("false", &exec_output, &turn_context);
let record = format_user_shell_command_record("false", &exec_output);
assert_eq!(
record,
"<user_shell_command>\n<command>\nfalse\n</command>\n<result>\nExit code: 42\nDuration: 0.1200 seconds\nOutput:\ncombined output wins\n</result>\n</user_shell_command>"

View File

@@ -1,4 +1,3 @@
## Exploration and reading files
- **Think first.** Before any tool call, decide ALL files/resources you will need.
@@ -11,3 +10,5 @@
* Always maximize parallelism. Never read files one-by-one unless logically unavoidable.
* This concern every read/list/search operations including, but not only, `cat`, `rg`, `sed`, `ls`, `git show`, `nl`, `wc`, ...
* Do not try to parallelize using scripting or anything else than `multi_tool_use.parallel`.
## Editing constraints

View File

@@ -460,13 +460,6 @@ pub fn ev_apply_patch_function_call(call_id: &str, patch: &str) -> Value {
})
}
pub fn ev_shell_command_call(call_id: &str, command: &str) -> Value {
let args = serde_json::json!({ "command": command });
let arguments = serde_json::to_string(&args).expect("serialize shell arguments");
ev_function_call(call_id, "shell_command", &arguments)
}
pub fn ev_apply_patch_shell_call(call_id: &str, patch: &str) -> Value {
let args = serde_json::json!({ "command": ["apply_patch", patch] });
let arguments = serde_json::to_string(&args).expect("serialize apply_patch arguments");

View File

@@ -1121,8 +1121,7 @@ async fn token_count_includes_rate_limits_snapshot() {
"used_percent": 40.0,
"window_minutes": 60,
"resets_at": 1704074400
},
"credits": null
}
}
})
);
@@ -1156,7 +1155,7 @@ async fn token_count_includes_rate_limits_snapshot() {
"reasoning_output_tokens": 0,
"total_tokens": 123
},
// Default model is gpt-5.1-codex-max in tests → 95% usable context window
// Default model is gpt-5.1-codex in tests → 95% usable context window
"model_context_window": 258400
},
"rate_limits": {
@@ -1169,8 +1168,7 @@ async fn token_count_includes_rate_limits_snapshot() {
"used_percent": 40.0,
"window_minutes": 60,
"resets_at": 1704074400
},
"credits": null
}
}
})
);
@@ -1240,8 +1238,7 @@ async fn usage_limit_error_emits_rate_limit_event() -> anyhow::Result<()> {
"used_percent": 87.5,
"window_minutes": 60,
"resets_at": null
},
"credits": null
}
});
let submission_id = codex

View File

@@ -13,13 +13,10 @@ use codex_protocol::models::ContentItem;
use codex_protocol::models::ResponseItem;
use codex_protocol::user_input::UserInput;
use core_test_support::responses;
use core_test_support::responses::mount_sse_once;
use core_test_support::responses::sse;
use core_test_support::skip_if_no_network;
use core_test_support::test_codex::TestCodexHarness;
use core_test_support::test_codex::test_codex;
use core_test_support::wait_for_event;
use core_test_support::wait_for_event_match;
use pretty_assertions::assert_eq;
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
@@ -128,72 +125,6 @@ async fn remote_compact_replaces_history_for_followups() -> Result<()> {
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn remote_compact_runs_automatically() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = TestCodexHarness::with_builder(
test_codex()
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
.with_config(|config| {
config.features.enable(Feature::RemoteCompaction);
}),
)
.await?;
let codex = harness.test().codex.clone();
mount_sse_once(
harness.server(),
sse(vec![
responses::ev_shell_command_call("m1", "echo 'hi'"),
responses::ev_completed_with_tokens("resp-1", 100000000), // over token limit
]),
)
.await;
let responses_mock = mount_sse_once(
harness.server(),
responses::sse(vec![
responses::ev_assistant_message("m2", "AFTER_COMPACT_REPLY"),
responses::ev_completed("resp-2"),
]),
)
.await;
let compacted_history = vec![ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputText {
text: "REMOTE_COMPACTED_SUMMARY".to_string(),
}],
}];
let compact_mock = responses::mount_compact_json_once(
harness.server(),
serde_json::json!({ "output": compacted_history.clone() }),
)
.await;
codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "hello remote compact".into(),
}],
})
.await?;
let message = wait_for_event_match(&codex, |ev| match ev {
EventMsg::AgentMessage(ev) => Some(ev.message.clone()),
_ => None,
})
.await;
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
assert_eq!(message, "Compact task completed");
assert_eq!(compact_mock.requests().len(), 1);
let follow_up_body = responses_mock.single_request().body_json().to_string();
assert!(follow_up_body.contains("REMOTE_COMPACTED_SUMMARY"));
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn remote_compact_persists_replacement_history_in_rollout() -> Result<()> {
skip_if_no_network!(Ok(()));

View File

@@ -1,101 +0,0 @@
#![allow(clippy::unwrap_used, clippy::expect_used)]
use anyhow::Result;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::EventMsg;
use codex_core::protocol::Op;
use codex_core::protocol::SandboxPolicy;
use codex_protocol::config_types::ReasoningSummary;
use codex_protocol::user_input::UserInput;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_function_call;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::mount_sse_once;
use core_test_support::responses::sse;
use core_test_support::responses::start_mock_server;
use core_test_support::test_codex::test_codex;
use core_test_support::wait_for_event;
use serde_json::json;
use std::fs;
#[tokio::test]
async fn execpolicy_blocks_shell_invocation() -> Result<()> {
let mut builder = test_codex().with_config(|config| {
let policy_path = config.codex_home.join("policy").join("policy.codexpolicy");
fs::create_dir_all(
policy_path
.parent()
.expect("policy directory must have a parent"),
)
.expect("create policy directory");
fs::write(
&policy_path,
r#"prefix_rule(pattern=["echo"], decision="forbidden")"#,
)
.expect("write policy file");
});
let server = start_mock_server().await;
let test = builder.build(&server).await?;
let call_id = "shell-forbidden";
let args = json!({
"command": ["echo", "blocked"],
"timeout_ms": 1_000,
});
mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
ev_completed("resp-1"),
]),
)
.await;
mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
)
.await;
let session_model = test.session_configured.model.clone();
test.codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "run shell command".into(),
}],
final_output_json_schema: None,
cwd: test.cwd_path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: session_model,
effort: None,
summary: ReasoningSummary::Auto,
})
.await?;
let EventMsg::ExecCommandEnd(end) = wait_for_event(&test.codex, |event| {
matches!(event, EventMsg::ExecCommandEnd(_))
})
.await
else {
unreachable!()
};
wait_for_event(&test.codex, |event| {
matches!(event, EventMsg::TaskComplete(_))
})
.await;
assert!(
end.aggregated_output
.contains("execpolicy forbids this command"),
"unexpected output: {}",
end.aggregated_output
);
Ok(())
}

View File

@@ -28,7 +28,6 @@ mod compact_remote;
mod compact_resume_fork;
mod deprecation_notice;
mod exec;
mod exec_policy;
mod fork_conversation;
mod grep_files;
mod items;
@@ -45,7 +44,6 @@ mod resume;
mod review;
mod rmcp_client;
mod rollout_list_find;
mod saved_sessions;
mod seatbelt;
mod shell_serialization;
mod stream_error_allows_next_turn;

View File

@@ -102,6 +102,19 @@ async fn model_selects_expected_tools() {
"codex-mini-latest should expose the local shell tool",
);
let o3_tools = collect_tool_identifiers_for_model("o3").await;
assert_eq!(
o3_tools,
vec![
"shell".to_string(),
"list_mcp_resources".to_string(),
"list_mcp_resource_templates".to_string(),
"read_mcp_resource".to_string(),
"update_plan".to_string()
],
"o3 should expose the generic shell tool",
);
let gpt5_codex_tools = collect_tool_identifiers_for_model("gpt-5-codex").await;
assert_eq!(
gpt5_codex_tools,
@@ -130,19 +143,6 @@ async fn model_selects_expected_tools() {
"gpt-5.1-codex should expose the apply_patch tool",
);
let gpt5_tools = collect_tool_identifiers_for_model("gpt-5").await;
assert_eq!(
gpt5_tools,
vec![
"shell".to_string(),
"list_mcp_resources".to_string(),
"list_mcp_resource_templates".to_string(),
"read_mcp_resource".to_string(),
"update_plan".to_string(),
],
"gpt-5 should expose the apply_patch tool",
);
let gpt51_tools = collect_tool_identifiers_for_model("gpt-5.1").await;
assert_eq!(
gpt51_tools,

View File

@@ -1,9 +1,9 @@
#![allow(clippy::unwrap_used)]
use codex_core::config::OPENAI_DEFAULT_MODEL;
use codex_core::features::Feature;
use codex_core::model_family::find_family_for_model;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::ENVIRONMENT_CONTEXT_OPEN_TAG;
use codex_core::protocol::EventMsg;
use codex_core::protocol::Op;
use codex_core::protocol::SandboxPolicy;
@@ -19,6 +19,7 @@ use core_test_support::skip_if_no_network;
use core_test_support::test_codex::TestCodex;
use core_test_support::test_codex::test_codex;
use core_test_support::wait_for_event;
use std::collections::HashMap;
use tempfile::TempDir;
fn text_user_input(text: String) -> serde_json::Value {
@@ -155,15 +156,61 @@ async fn prompt_tools_are_consistent_across_requests() -> anyhow::Result<()> {
.await?;
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
let expected_tools_names = vec![
"shell_command",
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"apply_patch",
"view_image",
];
// our internal implementation is responsible for keeping tools in sync
// with the OpenAI schema, so we just verify the tool presence here
let tools_by_model: HashMap<&'static str, Vec<&'static str>> = HashMap::from([
(
"gpt-5.1",
vec![
"shell_command",
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"view_image",
],
),
(
"gpt-5.1",
vec![
"shell_command",
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"apply_patch",
"view_image",
],
),
(
"gpt-5.1-codex",
vec![
"shell_command",
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"apply_patch",
"view_image",
],
),
(
"gpt-5.1-codex",
vec![
"shell_command",
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"apply_patch",
"view_image",
],
),
]);
let expected_tools_names = tools_by_model
.get(OPENAI_DEFAULT_MODEL)
.unwrap_or_else(|| panic!("expected tools to be defined for model {OPENAI_DEFAULT_MODEL}"))
.as_slice();
let body0 = req1.single_request().body_json();
let expected_instructions = if expected_tools_names.contains(&"apply_patch") {
@@ -180,14 +227,14 @@ async fn prompt_tools_are_consistent_across_requests() -> anyhow::Result<()> {
body0["instructions"],
serde_json::json!(expected_instructions),
);
assert_tool_names(&body0, &expected_tools_names);
assert_tool_names(&body0, expected_tools_names);
let body1 = req2.single_request().body_json();
assert_eq!(
body1["instructions"],
serde_json::json!(expected_instructions),
);
assert_tool_names(&body1, &expected_tools_names);
assert_tool_names(&body1, expected_tools_names);
Ok(())
}
@@ -373,89 +420,6 @@ async fn overrides_turn_context_but_keeps_cached_prefix_and_key_constant() -> an
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn override_before_first_turn_emits_environment_context() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let req = mount_sse_once(&server, sse_completed("resp-1")).await;
let TestCodex { codex, .. } = test_codex().build(&server).await?;
codex
.submit(Op::OverrideTurnContext {
cwd: None,
approval_policy: Some(AskForApproval::Never),
sandbox_policy: None,
model: None,
effort: None,
summary: None,
})
.await?;
codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "first message".into(),
}],
})
.await?;
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
let body = req.single_request().body_json();
let input = body["input"]
.as_array()
.expect("input array must be present");
assert!(
!input.is_empty(),
"expected at least environment context and user message"
);
let env_msg = &input[1];
let env_text = env_msg["content"][0]["text"]
.as_str()
.expect("environment context text");
assert!(
env_text.starts_with(ENVIRONMENT_CONTEXT_OPEN_TAG),
"second entry should be environment context, got: {env_text}"
);
assert!(
env_text.contains("<approval_policy>never</approval_policy>"),
"environment context should reflect overridden approval policy: {env_text}"
);
let env_count = input
.iter()
.filter(|msg| {
msg["content"]
.as_array()
.and_then(|content| {
content.iter().find(|item| {
item["type"].as_str() == Some("input_text")
&& item["text"]
.as_str()
.map(|text| text.starts_with(ENVIRONMENT_CONTEXT_OPEN_TAG))
.unwrap_or(false)
})
})
.is_some()
})
.count();
assert_eq!(
env_count, 2,
"environment context should appear exactly twice, found {env_count}"
);
let user_msg = &input[2];
let user_text = user_msg["content"][0]["text"]
.as_str()
.expect("user message text");
assert_eq!(user_text, "first message");
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn per_turn_overrides_keep_cached_prefix_and_key_constant() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));

View File

@@ -1,457 +0,0 @@
#![allow(clippy::expect_used)]
use anyhow::Result;
use codex_core::AuthManager;
use codex_core::CodexAuth;
use codex_core::CodexConversation;
use codex_core::ConversationManager;
use codex_core::SavedSessionEntry;
use codex_core::build_saved_session_entry;
use codex_core::config::Config;
use codex_core::protocol::EventMsg;
use codex_core::protocol::Op;
use codex_core::protocol::RolloutItem;
use codex_core::protocol::RolloutLine;
use codex_core::protocol::SaveSessionResponseEvent;
use codex_core::protocol::SessionSource;
use codex_core::resolve_saved_session;
use codex_core::upsert_saved_session;
use codex_protocol::user_input::UserInput;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::mount_sse_sequence;
use core_test_support::responses::sse;
use core_test_support::responses::start_mock_server;
use core_test_support::skip_if_no_network;
use core_test_support::test_codex::test_codex;
use core_test_support::wait_for_event;
use core_test_support::wait_for_event_match;
use pretty_assertions::assert_eq;
use serde_json::json;
use std::path::Path;
use std::sync::Arc;
fn completion_body(idx: usize, message: &str) -> String {
let resp_id = format!("resp-{idx}");
let msg_id = format!("msg-{idx}");
sse(vec![
ev_response_created(&resp_id),
ev_assistant_message(&msg_id, message),
ev_completed(&resp_id),
])
}
fn rollout_lines(path: &Path) -> Vec<RolloutLine> {
let text = std::fs::read_to_string(path).expect("read rollout");
text.lines()
.filter_map(|line| {
if line.trim().is_empty() {
return None;
}
let value: serde_json::Value = serde_json::from_str(line).expect("rollout line json");
Some(serde_json::from_value::<RolloutLine>(value).expect("rollout line"))
})
.collect()
}
fn rollout_items_without_meta(path: &Path) -> Vec<RolloutItem> {
rollout_lines(path)
.into_iter()
.filter_map(|line| match line.item {
RolloutItem::SessionMeta(_) => None,
other => Some(other),
})
.collect()
}
fn session_meta_count(path: &Path) -> usize {
rollout_lines(path)
.iter()
.filter(|line| matches!(line.item, RolloutItem::SessionMeta(_)))
.count()
}
async fn submit_text(codex: &Arc<CodexConversation>, text: &str) -> Result<()> {
codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: text.to_string(),
}],
})
.await?;
let _ = wait_for_event(codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
Ok(())
}
async fn save_session(
name: &str,
codex: &Arc<CodexConversation>,
config: &Config,
) -> Result<SavedSessionEntry> {
codex.flush_rollout().await?;
codex.set_session_name(Some(name.to_string())).await?;
let entry =
build_saved_session_entry(name.to_string(), codex.rollout_path(), codex.model().await)
.await?;
upsert_saved_session(&config.codex_home, entry.clone()).await?;
Ok(entry)
}
async fn save_session_via_op(
codex: &Arc<CodexConversation>,
name: &str,
) -> Result<SaveSessionResponseEvent> {
codex
.submit(Op::SaveSession {
name: name.to_string(),
})
.await?;
let response: SaveSessionResponseEvent = wait_for_event_match(codex, |ev| match ev {
EventMsg::SaveSessionResponse(resp) => Some(resp.clone()),
_ => None,
})
.await;
Ok(response)
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn save_and_resume_by_name() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
mount_sse_sequence(&server, vec![completion_body(1, "initial")]).await;
let mut builder = test_codex();
let initial = builder.build(&server).await?;
submit_text(&initial.codex, "first turn").await?;
let name = "alpha";
let entry = save_session(name, &initial.codex, &initial.config).await?;
let resolved = resolve_saved_session(&initial.config.codex_home, name)
.await?
.expect("saved session");
assert_eq!(entry, resolved);
assert_eq!(session_meta_count(&entry.rollout_path), 1);
let saved_items = rollout_items_without_meta(&entry.rollout_path);
let resumed = builder
.resume(&server, initial.home.clone(), entry.rollout_path.clone())
.await?;
assert_eq!(resumed.session_configured.session_id, entry.conversation_id);
let resumed_items = rollout_items_without_meta(&resumed.session_configured.rollout_path);
assert_eq!(
serde_json::to_value(saved_items)?,
serde_json::to_value(resumed_items)?
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn save_session_op_persists_and_emits_response() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
mount_sse_sequence(&server, vec![completion_body(1, "initial")]).await;
let mut builder = test_codex();
let initial = builder.build(&server).await?;
submit_text(&initial.codex, "first turn").await?;
let name = "via-op";
let response = save_session_via_op(&initial.codex, name).await?;
assert_eq!(response.name, name);
assert_eq!(
response.conversation_id,
initial.session_configured.session_id
);
assert!(response.rollout_path.exists());
let resolved = resolve_saved_session(&initial.config.codex_home, name)
.await?
.expect("saved session");
assert_eq!(resolved.rollout_path, response.rollout_path);
assert_eq!(resolved.conversation_id, response.conversation_id);
assert_eq!(session_meta_count(&resolved.rollout_path), 1);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn fork_from_identifier_after_save_op() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
mount_sse_sequence(
&server,
vec![
completion_body(1, "seed"),
completion_body(2, "fork-extra-1"),
completion_body(3, "fork-extra-2"),
],
)
.await;
let mut builder = test_codex();
let initial = builder.build(&server).await?;
submit_text(&initial.codex, "seeded").await?;
let name = "forkable-op";
let response = save_session_via_op(&initial.codex, name).await?;
let auth_manager = AuthManager::from_auth_for_testing(CodexAuth::from_api_key("dummy"));
let conversation_manager = ConversationManager::new(auth_manager.clone(), SessionSource::Exec);
let forked = conversation_manager
.fork_from_identifier(initial.config.clone(), name, auth_manager)
.await?;
assert_ne!(
forked.session_configured.session_id,
response.conversation_id
);
// Record the baseline rollout for the saved session.
let base_items = rollout_items_without_meta(&response.rollout_path);
// Send additional turns to the forked conversation and flush.
submit_text(&forked.conversation, "fork one").await?;
submit_text(&forked.conversation, "fork two").await?;
forked.conversation.flush_rollout().await?;
// Re-read both rollouts: source should remain unchanged.
let base_after = rollout_items_without_meta(&response.rollout_path);
assert_eq!(
serde_json::to_value(&base_items)?,
serde_json::to_value(&base_after)?
);
// Forked rollout should extend the baseline.
let fork_items = rollout_items_without_meta(&forked.conversation.rollout_path());
assert!(
fork_items.len() > base_items.len(),
"expected forked rollout to contain additional items"
);
let fork_prefix: Vec<_> = fork_items.iter().take(base_items.len()).cloned().collect();
assert_eq!(
serde_json::to_value(&base_items)?,
serde_json::to_value(&fork_prefix)?,
"forked rollout should extend the baseline history"
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn save_and_fork_by_name() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
mount_sse_sequence(&server, vec![completion_body(1, "base")]).await;
let mut builder = test_codex();
let initial = builder.build(&server).await?;
submit_text(&initial.codex, "original").await?;
let entry = save_session("forkable", &initial.codex, &initial.config).await?;
let auth_manager = AuthManager::from_auth_for_testing(CodexAuth::from_api_key("dummy"));
let conversation_manager = ConversationManager::new(auth_manager.clone(), SessionSource::Exec);
let forked = conversation_manager
.fork_from_rollout(
initial.config.clone(),
entry.rollout_path.clone(),
auth_manager,
)
.await?;
assert_ne!(forked.session_configured.session_id, entry.conversation_id);
assert_ne!(forked.conversation.rollout_path(), entry.rollout_path);
assert_eq!(session_meta_count(&forked.conversation.rollout_path()), 1);
let base_items = rollout_items_without_meta(&entry.rollout_path);
let fork_items = rollout_items_without_meta(&forked.conversation.rollout_path());
assert_eq!(
serde_json::to_value(base_items)?,
serde_json::to_value(fork_items)?
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn forked_messages_do_not_touch_original() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
mount_sse_sequence(
&server,
vec![
completion_body(1, "base"),
completion_body(2, "fork-1"),
completion_body(3, "fork-2"),
],
)
.await;
let mut builder = test_codex();
let initial = builder.build(&server).await?;
submit_text(&initial.codex, "first").await?;
let entry = save_session("branch", &initial.codex, &initial.config).await?;
let baseline_items = rollout_items_without_meta(&entry.rollout_path);
let auth_manager = AuthManager::from_auth_for_testing(CodexAuth::from_api_key("dummy"));
let conversation_manager = ConversationManager::new(auth_manager.clone(), SessionSource::Exec);
let forked = conversation_manager
.fork_from_rollout(
initial.config.clone(),
entry.rollout_path.clone(),
auth_manager.clone(),
)
.await?;
submit_text(&forked.conversation, "fork message one").await?;
submit_text(&forked.conversation, "fork message two").await?;
let resumed = builder
.resume(&server, initial.home.clone(), entry.rollout_path.clone())
.await?;
let resumed_items = rollout_items_without_meta(&resumed.session_configured.rollout_path);
assert_eq!(
serde_json::to_value(baseline_items.clone())?,
serde_json::to_value(resumed_items)?
);
assert_eq!(
serde_json::to_value(baseline_items)?,
serde_json::to_value(rollout_items_without_meta(&entry.rollout_path))?
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn resumed_messages_are_present_in_new_fork() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
mount_sse_sequence(
&server,
vec![
completion_body(1, "original"),
completion_body(2, "fork-extra"),
completion_body(3, "resumed-extra"),
],
)
.await;
let mut builder = test_codex();
let initial = builder.build(&server).await?;
submit_text(&initial.codex, "start").await?;
let entry = save_session("seed", &initial.codex, &initial.config).await?;
let auth_manager = AuthManager::from_auth_for_testing(CodexAuth::from_api_key("dummy"));
let conversation_manager = ConversationManager::new(auth_manager.clone(), SessionSource::Exec);
let forked = conversation_manager
.fork_from_rollout(
initial.config.clone(),
entry.rollout_path.clone(),
auth_manager.clone(),
)
.await?;
submit_text(&forked.conversation, "fork only").await?;
let resumed = builder
.resume(&server, initial.home.clone(), entry.rollout_path.clone())
.await?;
submit_text(&resumed.codex, "resumed addition").await?;
resumed.codex.flush_rollout().await?;
let updated_base_items = rollout_items_without_meta(&entry.rollout_path);
let fork_again = conversation_manager
.fork_from_rollout(
initial.config.clone(),
entry.rollout_path.clone(),
auth_manager,
)
.await?;
let fork_again_items = rollout_items_without_meta(&fork_again.conversation.rollout_path());
assert_eq!(
serde_json::to_value(updated_base_items)?,
serde_json::to_value(fork_again_items)?
);
assert_eq!(
session_meta_count(&fork_again.conversation.rollout_path()),
1
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn duplicate_name_overwrites_entry() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
mount_sse_sequence(
&server,
vec![completion_body(1, "one"), completion_body(2, "two")],
)
.await;
let mut builder = test_codex();
let first = builder.build(&server).await?;
submit_text(&first.codex, "first session").await?;
let name = "shared";
let entry_one = save_session(name, &first.codex, &first.config).await?;
let second = builder.build(&server).await?;
submit_text(&second.codex, "second session").await?;
let entry_two = save_session(name, &second.codex, &second.config).await?;
let resolved = resolve_saved_session(&second.config.codex_home, name)
.await?
.expect("latest entry present");
assert_eq!(resolved, entry_two);
assert_ne!(resolved.conversation_id, entry_one.conversation_id);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn same_session_multiple_names() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
mount_sse_sequence(&server, vec![completion_body(1, "hello")]).await;
let mut builder = test_codex();
let session = builder.build(&server).await?;
submit_text(&session.codex, "save twice").await?;
let entry_first = save_session("first", &session.codex, &session.config).await?;
let entry_second = save_session("second", &session.codex, &session.config).await?;
let resolved_first = resolve_saved_session(&session.config.codex_home, "first")
.await?
.expect("first entry");
let resolved_second = resolve_saved_session(&session.config.codex_home, "second")
.await?
.expect("second entry");
assert_eq!(entry_first.conversation_id, entry_second.conversation_id);
assert_eq!(
resolved_first.conversation_id,
resolved_second.conversation_id
);
assert_eq!(resolved_first.rollout_path, resolved_second.rollout_path);
let names: serde_json::Value = json!([entry_first.name, entry_second.name]);
assert_eq!(names, json!(["first", "second"]));
Ok(())
}

View File

@@ -101,6 +101,7 @@ fn shell_responses(
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ShellModelOutput::Shell)]
#[test_case(ShellModelOutput::ShellCommand)]
#[test_case(ShellModelOutput::LocalShell)]
async fn shell_output_stays_json_without_freeform_apply_patch(
output_type: ShellModelOutput,
@@ -212,6 +213,7 @@ freeform shell
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ShellModelOutput::Shell)]
#[test_case(ShellModelOutput::ShellCommand)]
#[test_case(ShellModelOutput::LocalShell)]
async fn shell_output_preserves_fixture_json_without_serialization(
output_type: ShellModelOutput,
@@ -407,6 +409,7 @@ $"#;
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ShellModelOutput::Shell)]
#[test_case(ShellModelOutput::ShellCommand)]
#[test_case(ShellModelOutput::LocalShell)]
async fn shell_output_reserializes_truncated_content(output_type: ShellModelOutput) -> Result<()> {
skip_if_no_network!(Ok(()));
@@ -416,7 +419,6 @@ async fn shell_output_reserializes_truncated_content(output_type: ShellModelOutp
config.model = "gpt-5.1-codex".to_string();
config.model_family =
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
config.tool_output_token_limit = Some(200);
if matches!(output_type, ShellModelOutput::ShellCommand) {
config.features.enable(Feature::ShellCommandTool);
}
@@ -457,7 +459,10 @@ Output:
4
5
6
.*…46 tokens truncated….*
.*
\[\.{3} omitted \d+ of 400 lines \.{3}\]
.*
396
397
398
@@ -755,7 +760,7 @@ Output:
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_command_output_is_freeform() -> Result<()> {
async fn shell_command_output_is_structured() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
@@ -807,114 +812,6 @@ shell command
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_command_output_is_not_truncated_under_10k_bytes() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex()
.with_model("gpt-5.1")
.with_config(move |config| {
config.features.enable(Feature::ShellCommandTool);
});
let test = builder.build(&server).await?;
let call_id = "shell-command";
let args = json!({
"command": "perl -e 'print \"1\" x 10000'",
"timeout_ms": 1000,
});
let responses = vec![
sse(vec![
json!({"type": "response.created", "response": {"id": "resp-1"}}),
ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "shell_command done"),
ev_completed("resp-2"),
]),
];
let mock = mount_sse_sequence(&server, responses).await;
test.submit_turn_with_policy(
"run the shell_command script in the user's shell",
SandboxPolicy::DangerFullAccess,
)
.await?;
let req = mock
.last_request()
.expect("shell_command output request recorded");
let output_item = req.function_call_output(call_id);
let output = output_item
.get("output")
.and_then(Value::as_str)
.expect("shell_command output string");
let expected_pattern = r"(?s)^Exit code: 0
Wall time: [0-9]+(?:\.[0-9]+)? seconds
Output:
1{10000}$";
assert_regex_match(expected_pattern, output);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_command_output_is_not_truncated_over_10k_bytes() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex()
.with_model("gpt-5.1")
.with_config(move |config| {
config.features.enable(Feature::ShellCommandTool);
});
let test = builder.build(&server).await?;
let call_id = "shell-command";
let args = json!({
"command": "perl -e 'print \"1\" x 10001'",
"timeout_ms": 1000,
});
let responses = vec![
sse(vec![
json!({"type": "response.created", "response": {"id": "resp-1"}}),
ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "shell_command done"),
ev_completed("resp-2"),
]),
];
let mock = mount_sse_sequence(&server, responses).await;
test.submit_turn_with_policy(
"run the shell_command script in the user's shell",
SandboxPolicy::DangerFullAccess,
)
.await?;
let req = mock
.last_request()
.expect("shell_command output request recorded");
let output_item = req.function_call_output(call_id);
let output = output_item
.get("output")
.and_then(Value::as_str)
.expect("shell_command output string");
let expected_pattern = r"(?s)^Exit code: 0
Wall time: [0-9]+(?:\.[0-9]+)? seconds
Output:
1*…1 chars truncated…1*$";
assert_regex_match(expected_pattern, output);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn local_shell_call_output_is_structured() -> Result<()> {
skip_if_no_network!(Ok(()));

View File

@@ -98,160 +98,6 @@ async fn truncate_function_error_trims_respond_to_model() -> Result<()> {
Ok(())
}
// Verifies that a standard tool call (shell) exceeding the model formatting
// limits is truncated before being sent back to the model.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn tool_call_output_configured_limit_chars_type() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
// Use a model that exposes the generic shell tool.
let mut builder = test_codex().with_model("gpt-5.1").with_config(|config| {
config.tool_output_token_limit = Some(100_000);
});
let fixture = builder.build(&server).await?;
let call_id = "shell-too-large";
let args = if cfg!(windows) {
serde_json::json!({
"command": [
"powershell",
"-Command",
"for ($i=1; $i -le 100000; $i++) { Write-Output $i }"
],
"timeout_ms": 5_000,
})
} else {
serde_json::json!({
"command": ["/bin/sh", "-c", "seq 1 100000"],
"timeout_ms": 5_000,
})
};
// First response: model tells us to run the tool; second: complete the turn.
mount_sse_once(
&server,
sse(vec![
responses::ev_response_created("resp-1"),
responses::ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
responses::ev_completed("resp-1"),
]),
)
.await;
let mock2 = mount_sse_once(
&server,
sse(vec![
responses::ev_assistant_message("msg-1", "done"),
responses::ev_completed("resp-2"),
]),
)
.await;
fixture
.submit_turn_with_policy("trigger big shell output", SandboxPolicy::DangerFullAccess)
.await?;
// Inspect what we sent back to the model; it should contain a truncated
// function_call_output for the shell call.
let output = mock2
.single_request()
.function_call_output_text(call_id)
.context("function_call_output present for shell call")?;
let output = output.replace("\r\n", "\n");
// Expect plain text (not JSON) containing the entire shell output.
assert!(
serde_json::from_str::<Value>(&output).is_err(),
"expected truncated shell output to be plain text"
);
assert_eq!(output.len(), 400097, "we should be almost 100k tokens");
assert!(
!output.contains("tokens truncated"),
"shell output should not contain tokens truncated marker: {output}"
);
Ok(())
}
// Verifies that a standard tool call (shell) exceeding the model formatting
// limits is truncated before being sent back to the model.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn tool_call_output_exceeds_limit_truncated_chars_limit() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
// Use a model that exposes the generic shell tool.
let mut builder = test_codex().with_model("gpt-5.1");
let fixture = builder.build(&server).await?;
let call_id = "shell-too-large";
let args = if cfg!(windows) {
serde_json::json!({
"command": [
"powershell",
"-Command",
"for ($i=1; $i -le 100000; $i++) { Write-Output $i }"
],
"timeout_ms": 5_000,
})
} else {
serde_json::json!({
"command": ["/bin/sh", "-c", "seq 1 100000"],
"timeout_ms": 5_000,
})
};
// First response: model tells us to run the tool; second: complete the turn.
mount_sse_once(
&server,
sse(vec![
responses::ev_response_created("resp-1"),
responses::ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
responses::ev_completed("resp-1"),
]),
)
.await;
let mock2 = mount_sse_once(
&server,
sse(vec![
responses::ev_assistant_message("msg-1", "done"),
responses::ev_completed("resp-2"),
]),
)
.await;
fixture
.submit_turn_with_policy("trigger big shell output", SandboxPolicy::DangerFullAccess)
.await?;
// Inspect what we sent back to the model; it should contain a truncated
// function_call_output for the shell call.
let output = mock2
.single_request()
.function_call_output_text(call_id)
.context("function_call_output present for shell call")?;
let output = output.replace("\r\n", "\n");
// Expect plain text (not JSON) containing the entire shell output.
assert!(
serde_json::from_str::<Value>(&output).is_err(),
"expected truncated shell output to be plain text"
);
assert_eq!(output.len(), 9976); // ~10k characters
let truncated_pattern = r#"(?s)^Exit code: 0\nWall time: 0 seconds\nTotal output lines: 100000\nOutput:\n.*?…\d+ chars truncated….*$"#;
assert_regex_match(truncated_pattern, &output);
Ok(())
}
// Verifies that a standard tool call (shell) exceeding the model formatting
// limits is truncated before being sent back to the model.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
@@ -274,13 +120,13 @@ async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
"command": [
"powershell",
"-Command",
"for ($i=1; $i -le 100000; $i++) { Write-Output $i }"
"for ($i=1; $i -le 400; $i++) { Write-Output $i }"
],
"timeout_ms": 5_000,
})
} else {
serde_json::json!({
"command": ["/bin/sh", "-c", "seq 1 100000"],
"command": ["/bin/sh", "-c", "seq 1 400"],
"timeout_ms": 5_000,
})
};
@@ -316,14 +162,14 @@ async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
.context("function_call_output present for shell call")?;
let output = output.replace("\r\n", "\n");
// Expect plain text (not JSON) containing the entire shell output.
// Expect plain text (not JSON) with truncation markers and line elision.
assert!(
serde_json::from_str::<Value>(&output).is_err(),
"expected truncated shell output to be plain text"
);
let truncated_pattern = r#"(?s)^Exit code: 0
Wall time: [0-9]+(?:\.[0-9]+)? seconds
Total output lines: 100000
Wall time: .* seconds
Total output lines: 400
Output:
1
2
@@ -331,9 +177,15 @@ Output:
4
5
6
.*…137224 tokens truncated.*
99999
100000
.*
\[\.{3} omitted 144 of 400 lines \.{3}\]
.*
396
397
398
399
400
$"#;
assert_regex_match(truncated_pattern, &output);
@@ -359,13 +211,13 @@ async fn tool_call_output_truncated_only_once() -> Result<()> {
"command": [
"powershell",
"-Command",
"for ($i=1; $i -le 10000; $i++) { Write-Output $i }"
"for ($i=1; $i -le 2000; $i++) { Write-Output $i }"
],
"timeout_ms": 5_000,
})
} else {
serde_json::json!({
"command": ["/bin/sh", "-c", "seq 1 10000"],
"command": ["/bin/sh", "-c", "seq 1 2000"],
"timeout_ms": 5_000,
})
};
@@ -397,11 +249,11 @@ async fn tool_call_output_truncated_only_once() -> Result<()> {
.function_call_output_text(call_id)
.context("function_call_output present for shell call")?;
let truncation_markers = output.matches("tokens truncated").count();
let total_line_headers = output.matches("Total output lines:").count();
assert_eq!(
truncation_markers, 1,
"shell output should carry only one truncation marker: {output}"
total_line_headers, 1,
"shell output should carry only one truncation header: {output}"
);
Ok(())
@@ -469,7 +321,6 @@ async fn mcp_tool_call_output_exceeds_limit_truncated_for_model() -> Result<()>
disabled_tools: None,
},
);
config.tool_output_token_limit = Some(500);
});
let fixture = builder.build(&server).await?;
@@ -486,6 +337,12 @@ async fn mcp_tool_call_output_exceeds_limit_truncated_for_model() -> Result<()>
.function_call_output_text(call_id)
.context("function_call_output present for rmcp call")?;
// Expect plain text with token-based truncation marker; the original JSON body
// is truncated in the middle of the echo string.
assert!(
serde_json::from_str::<Value>(&output).is_err(),
"expected truncated MCP output to be plain text"
);
assert!(
!output.contains("Total output lines:"),
"MCP output should not include line-based truncation header: {output}"
@@ -493,7 +350,6 @@ async fn mcp_tool_call_output_exceeds_limit_truncated_for_model() -> Result<()>
let truncated_pattern = r#"(?s)^\{"echo":\s*"ECHOING: long-message-with-newlines-.*tokens truncated.*long-message-with-newlines-.*$"#;
assert_regex_match(truncated_pattern, &output);
assert!(output.len() < 2500, "{}", output.len());
Ok(())
}
@@ -645,9 +501,7 @@ async fn token_policy_marker_reports_tokens() -> Result<()> {
.function_call_output_text(call_id)
.context("shell output present")?;
let pattern = r#"(?s)^\{"output":"Total output lines: 150\\n\\n1\\n2\\n3\\n4\\n5\\n.*?…\d+ tokens truncated…7\\n138\\n139\\n140\\n141\\n142\\n143\\n144\\n145\\n146\\n147\\n148\\n149\\n150\\n","metadata":\{"exit_code":0,"duration_seconds":0\.0\}\}$"#;
assert_regex_match(pattern, &output);
assert_regex_match(r"\[\u{2026}127 tokens truncated\u{2026}]", &output);
Ok(())
}
@@ -698,16 +552,14 @@ async fn byte_policy_marker_reports_bytes() -> Result<()> {
.function_call_output_text(call_id)
.context("shell output present")?;
let pattern = r#"(?s)^\{"output":"Total output lines: 150\\n\\n1\\n2\\n3\\n4\\n5.*?…\d+ chars truncated…7\\n138\\n139\\n140\\n141\\n142\\n143\\n144\\n145\\n146\\n147\\n148\\n149\\n150\\n","metadata":\{"exit_code":0,"duration_seconds":0\.0\}\}$"#;
assert_regex_match(pattern, &output);
assert_regex_match(r"\[\u{2026}505 bytes truncated\u{2026}]", &output);
Ok(())
}
// Shell tool output should remain intact when the config opts into a large token budget.
// Overriding config with a large token budget should avoid truncation.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_tool_output_not_truncated_with_custom_limit() -> Result<()> {
async fn large_budget_avoids_truncation() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
@@ -724,7 +576,6 @@ async fn shell_tool_output_not_truncated_with_custom_limit() -> Result<()> {
"command": ["/bin/sh", "-c", "seq 1 1000"],
"timeout_ms": 5_000,
});
let expected_body: String = (1..=1000).map(|i| format!("{i}\n")).collect();
mount_sse_once(
&server,
@@ -756,10 +607,6 @@ async fn shell_tool_output_not_truncated_with_custom_limit() -> Result<()> {
.function_call_output_text(call_id)
.context("shell output present")?;
assert!(
output.ends_with(&expected_body),
"expected entire shell output when budget increased: {output}"
);
assert!(
!output.contains("truncated"),
"output should remain untruncated with ample budget"
@@ -767,101 +614,3 @@ async fn shell_tool_output_not_truncated_with_custom_limit() -> Result<()> {
Ok(())
}
// MCP server output should also remain intact when the config increases the token limit.
#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
async fn mcp_tool_call_output_not_truncated_with_custom_limit() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let call_id = "rmcp-untruncated";
let server_name = "rmcp";
let tool_name = format!("mcp__{server_name}__echo");
let large_msg = "a".repeat(80_000);
let args_json = serde_json::json!({ "message": large_msg });
mount_sse_once(
&server,
sse(vec![
responses::ev_response_created("resp-1"),
responses::ev_function_call(call_id, &tool_name, &args_json.to_string()),
responses::ev_completed("resp-1"),
]),
)
.await;
let mock2 = mount_sse_once(
&server,
sse(vec![
responses::ev_assistant_message("msg-1", "rmcp echo tool completed."),
responses::ev_completed("resp-2"),
]),
)
.await;
let rmcp_test_server_bin = CargoBuild::new()
.package("codex-rmcp-client")
.bin("test_stdio_server")
.run()?
.path()
.to_string_lossy()
.into_owned();
let mut builder = test_codex().with_config(move |config| {
config.features.enable(Feature::RmcpClient);
config.tool_output_token_limit = Some(50_000);
config.mcp_servers.insert(
server_name.to_string(),
codex_core::config::types::McpServerConfig {
transport: codex_core::config::types::McpServerTransportConfig::Stdio {
command: rmcp_test_server_bin,
args: Vec::new(),
env: None,
env_vars: Vec::new(),
cwd: None,
},
enabled: true,
startup_timeout_sec: Some(std::time::Duration::from_secs(10)),
tool_timeout_sec: None,
enabled_tools: None,
disabled_tools: None,
},
);
});
let fixture = builder.build(&server).await?;
fixture
.submit_turn_with_policy(
"call the rmcp echo tool with a very large message",
SandboxPolicy::ReadOnly,
)
.await?;
let output = mock2
.single_request()
.function_call_output_text(call_id)
.context("function_call_output present for rmcp call")?;
let parsed: Value = serde_json::from_str(&output)?;
assert_eq!(
output.len(),
80031,
"parsed MCP output should retain its serialized length"
);
let expected_echo = format!("ECHOING: {large_msg}");
let echo_str = parsed["echo"]
.as_str()
.context("echo field should be a string in rmcp echo output")?;
assert_eq!(
echo_str.len(),
expected_echo.len(),
"echo length should match"
);
assert_eq!(echo_str, expected_echo);
assert!(
!output.contains("truncated"),
"output should not include truncation markers when limit is raised: {output}"
);
Ok(())
}

View File

@@ -904,98 +904,6 @@ async fn exec_command_reports_chunk_and_exit_metadata() -> Result<()> {
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn unified_exec_respects_early_exit_notifications() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.features.enable(Feature::UnifiedExec);
});
let TestCodex {
codex,
cwd,
session_configured,
..
} = builder.build(&server).await?;
let call_id = "uexec-early-exit";
let args = serde_json::json!({
"cmd": "sleep 0.05",
"yield_time_ms": 31415,
});
let responses = vec![
sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?),
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
];
mount_sse_sequence(&server, responses).await;
let session_model = session_configured.model.clone();
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "watch early exit timing".into(),
}],
final_output_json_schema: None,
cwd: cwd.path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: session_model,
effort: None,
summary: ReasoningSummary::Auto,
})
.await?;
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
let requests = server.received_requests().await.expect("recorded requests");
assert!(!requests.is_empty(), "expected at least one POST request");
let bodies = requests
.iter()
.map(|req| req.body_json::<Value>().expect("request json"))
.collect::<Vec<_>>();
let outputs = collect_tool_outputs(&bodies)?;
let output = outputs
.get(call_id)
.expect("missing early exit unified_exec output");
assert!(
output.session_id.is_none(),
"short-lived process should not keep a session alive"
);
assert_eq!(
output.exit_code,
Some(0),
"short-lived process should exit successfully"
);
let wall_time = output.wall_time_seconds;
assert!(
wall_time < 0.75,
"wall_time should reflect early exit rather than the full yield time; got {wall_time}"
);
assert!(
output.output.is_empty(),
"sleep command should not emit output, got {:?}",
output.output
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn write_stdin_returns_exit_metadata_and_clears_session() -> Result<()> {
skip_if_no_network!(Ok(()));
@@ -1678,7 +1586,7 @@ PY
let large_output = outputs.get(call_id).expect("missing large output summary");
let output_text = large_output.output.replace("\r\n", "\n");
let truncated_pattern = r"(?s)^Total output lines: \d+\n\n(token token \n){5,}.*\d+ tokens truncated….*(token token \n){5,}$";
let truncated_pattern = r#"(?s)^(token token \n){5,}.*\[\u{2026}\d+ tokens truncated\u{2026}]\n(token token \n){5,}$"#;
assert_regex_match(truncated_pattern, &output_text);
let original_tokens = large_output

View File

@@ -207,16 +207,10 @@ async fn user_shell_command_history_is_persisted_and_shared_with_model() -> anyh
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[cfg(not(target_os = "windows"))] // TODO: unignore on windows
async fn user_shell_command_output_is_truncated_in_history() -> anyhow::Result<()> {
let server = responses::start_mock_server().await;
let builder = core_test_support::test_codex::test_codex();
let test = builder
.with_config(|config| {
config.tool_output_token_limit = Some(100);
})
.build(&server)
.await?;
let mut builder = core_test_support::test_codex::test_codex();
let test = builder.build(&server).await?;
#[cfg(windows)]
let command = r#"for ($i=1; $i -le 400; $i++) { Write-Output $i }"#.to_string();
@@ -255,10 +249,10 @@ async fn user_shell_command_output_is_truncated_in_history() -> anyhow::Result<(
.expect("command message recorded in request");
let command_message = command_message.replace("\r\n", "\n");
let head = (1..=69).map(|i| format!("{i}\n")).collect::<String>();
let tail = (352..=400).map(|i| format!("{i}\n")).collect::<String>();
let head = (1..=128).map(|i| format!("{i}\n")).collect::<String>();
let tail = (273..=400).map(|i| format!("{i}\n")).collect::<String>();
let truncated_body =
format!("Total output lines: 400\n\n{head}70…273 tokens truncated…351\n{tail}");
format!("Total output lines: 400\n\n{head}\n[... omitted 144 of 400 lines ...]\n\n{tail}");
let escaped_command = escape(&command);
let escaped_truncated_body = escape(&truncated_body);
let expected_pattern = format!(
@@ -276,7 +270,6 @@ async fn user_shell_command_is_truncated_only_once() -> anyhow::Result<()> {
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.tool_output_token_limit = Some(100);
config.model = "gpt-5.1-codex".to_string();
config.model_family =
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");

View File

@@ -4,23 +4,14 @@ name = "codex-exec-server"
version = { workspace = true }
[[bin]]
name = "codex-execve-wrapper"
path = "src/bin/main_execve_wrapper.rs"
[[bin]]
name = "codex-exec-mcp-server"
path = "src/bin/main_mcp_server.rs"
[lib]
name = "codex_exec_server"
path = "src/lib.rs"
name = "codex-exec-server"
path = "src/main.rs"
[lints]
workspace = true
[dependencies]
anyhow = { workspace = true }
async-trait = { workspace = true }
clap = { workspace = true, features = ["derive"] }
codex-core = { workspace = true }
libc = { workspace = true }
@@ -40,7 +31,6 @@ rmcp = { workspace = true, default-features = false, features = [
] }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
shlex = { workspace = true }
socket2 = { workspace = true }
tokio = { workspace = true, features = [
"io-std",

View File

@@ -1,8 +0,0 @@
#[cfg(not(unix))]
fn main() {
eprintln!("codex-execve-wrapper is only implemented for UNIX");
std::process::exit(1);
}
#[cfg(unix)]
pub use codex_exec_server::main_execve_wrapper as main;

View File

@@ -1,8 +0,0 @@
#[cfg(not(unix))]
fn main() {
eprintln!("codex-exec-mcp-server is only implemented for UNIX");
std::process::exit(1);
}
#[cfg(unix)]
pub use codex_exec_server::main_mcp_server as main;

View File

@@ -1,8 +0,0 @@
#[cfg(unix)]
mod posix;
#[cfg(unix)]
pub use posix::main_execve_wrapper;
#[cfg(unix)]
pub use posix::main_mcp_server;

View File

@@ -0,0 +1,11 @@
#[cfg(target_os = "windows")]
fn main() {
eprintln!("codex-exec-server is not implemented on Windows targets");
std::process::exit(1);
}
#[cfg(not(target_os = "windows"))]
mod posix;
#[cfg(not(target_os = "windows"))]
pub use posix::main;

View File

@@ -56,114 +56,109 @@
//! o<-----x
//!
use std::path::Path;
use std::path::PathBuf;
use clap::Parser;
use clap::Subcommand;
use tracing_subscriber::EnvFilter;
use tracing_subscriber::{self};
use crate::posix::mcp_escalation_policy::ExecPolicyOutcome;
use crate::posix::escalate_protocol::EscalateAction;
use crate::posix::escalate_server::EscalateServer;
mod escalate_client;
mod escalate_protocol;
mod escalate_server;
mod escalation_policy;
mod mcp;
mod mcp_escalation_policy;
mod socket;
/// Default value of --execve option relative to the current executable.
/// Note this must match the name of the binary as specified in Cargo.toml.
const CODEX_EXECVE_WRAPPER_EXE_NAME: &str = "codex-execve-wrapper";
#[derive(Parser)]
struct McpServerCli {
/// Executable to delegate execve(2) calls to in Bash.
#[arg(long = "execve")]
execve_wrapper: Option<PathBuf>,
/// Path to Bash that has been patched to support execve() wrapping.
#[arg(long = "bash")]
bash_path: Option<PathBuf>,
}
#[tokio::main]
pub async fn main_mcp_server() -> anyhow::Result<()> {
tracing_subscriber::fmt()
.with_env_filter(EnvFilter::from_default_env())
.with_writer(std::io::stderr)
.with_ansi(false)
.init();
let cli = McpServerCli::parse();
let execve_wrapper = match cli.execve_wrapper {
Some(path) => path,
None => {
let cwd = std::env::current_exe()?;
cwd.parent()
.map(|p| p.join(CODEX_EXECVE_WRAPPER_EXE_NAME))
.ok_or_else(|| {
anyhow::anyhow!("failed to determine execve wrapper path from current exe")
})?
}
};
let bash_path = match cli.bash_path {
Some(path) => path,
None => mcp::get_bash_path()?,
};
tracing::info!("Starting MCP server");
let service = mcp::serve(bash_path, execve_wrapper, dummy_exec_policy)
.await
.inspect_err(|e| {
tracing::error!("serving error: {:?}", e);
})?;
service.waiting().await?;
Ok(())
fn dummy_exec_policy(file: &Path, argv: &[String], _workdir: &Path) -> EscalateAction {
// TODO: execpolicy
if file == Path::new("/opt/homebrew/bin/gh")
&& let [_, arg1, arg2, ..] = argv
&& arg1 == "issue"
&& arg2 == "list"
{
return EscalateAction::Escalate;
}
EscalateAction::Run
}
#[derive(Parser)]
pub struct ExecveWrapperCli {
#[command(version)]
pub struct Cli {
#[command(subcommand)]
subcommand: Option<Commands>,
}
#[derive(Subcommand)]
enum Commands {
Escalate(EscalateArgs),
ShellExec(ShellExecArgs),
}
/// Invoked from within the sandbox to (potentially) escalate permissions.
#[derive(Parser, Debug)]
struct EscalateArgs {
file: String,
#[arg(trailing_var_arg = true)]
argv: Vec<String>,
}
impl EscalateArgs {
/// This is the escalate client. It talks to the escalate server to determine whether to exec()
/// the command directly or to proxy to the escalate server.
async fn run(self) -> anyhow::Result<i32> {
let EscalateArgs { file, argv } = self;
escalate_client::run(file, argv).await
}
}
/// Debugging command to emulate an MCP "shell" tool call.
#[derive(Parser, Debug)]
struct ShellExecArgs {
command: String,
}
#[tokio::main]
pub async fn main_execve_wrapper() -> anyhow::Result<()> {
pub async fn main() -> anyhow::Result<()> {
let cli = Cli::parse();
tracing_subscriber::fmt()
.with_env_filter(EnvFilter::from_default_env())
.with_writer(std::io::stderr)
.with_ansi(false)
.init();
let ExecveWrapperCli { file, argv } = ExecveWrapperCli::parse();
let exit_code = escalate_client::run(file, argv).await?;
std::process::exit(exit_code);
}
// TODO: replace with execpolicy
fn dummy_exec_policy(file: &Path, argv: &[String], _workdir: &Path) -> ExecPolicyOutcome {
if file.ends_with("rm") {
ExecPolicyOutcome::Forbidden
} else if file.ends_with("git") {
ExecPolicyOutcome::Prompt {
run_with_escalated_permissions: false,
match cli.subcommand {
Some(Commands::Escalate(args)) => {
std::process::exit(args.run().await?);
}
} else if file == Path::new("/opt/homebrew/bin/gh")
&& let [_, arg1, arg2, ..] = argv
&& arg1 == "issue"
&& arg2 == "list"
{
ExecPolicyOutcome::Allow {
run_with_escalated_permissions: true,
Some(Commands::ShellExec(args)) => {
let bash_path = mcp::get_bash_path()?;
let escalate_server = EscalateServer::new(bash_path, dummy_exec_policy);
let result = escalate_server
.exec(
args.command.clone(),
std::env::vars().collect(),
std::env::current_dir()?,
None,
)
.await?;
println!("{result:?}");
std::process::exit(result.exit_code);
}
} else {
ExecPolicyOutcome::Allow {
run_with_escalated_permissions: false,
None => {
let bash_path = mcp::get_bash_path()?;
tracing::info!("Starting MCP server");
let service = mcp::serve(bash_path, dummy_exec_policy)
.await
.inspect_err(|e| {
tracing::error!("serving error: {:?}", e);
})?;
service.waiting().await?;
Ok(())
}
}
}

View File

@@ -98,12 +98,5 @@ pub(crate) async fn run(file: String, argv: Vec<String>) -> anyhow::Result<i32>
Err(err.into())
}
EscalateAction::Deny { reason } => {
match reason {
Some(reason) => eprintln!("Execution denied: {reason}"),
None => eprintln!("Execution denied"),
}
Ok(1)
}
}
}

View File

@@ -34,8 +34,6 @@ pub(super) enum EscalateAction {
Run,
/// The command should be escalated to the server for execution.
Escalate,
/// The command should not be executed.
Deny { reason: Option<String> },
}
/// The client sends this to the server to forward its open FDs.

View File

@@ -1,8 +1,8 @@
use std::collections::HashMap;
use std::os::fd::AsRawFd;
use std::path::Path;
use std::path::PathBuf;
use std::process::Stdio;
use std::sync::Arc;
use std::time::Duration;
use anyhow::Context as _;
@@ -21,26 +21,25 @@ use crate::posix::escalate_protocol::EscalateRequest;
use crate::posix::escalate_protocol::EscalateResponse;
use crate::posix::escalate_protocol::SuperExecMessage;
use crate::posix::escalate_protocol::SuperExecResult;
use crate::posix::escalation_policy::EscalationPolicy;
use crate::posix::socket::AsyncDatagramSocket;
use crate::posix::socket::AsyncSocket;
/// This is the policy which decides how to handle an exec() call.
///
/// `file` is the absolute, canonical path to the executable to run, i.e. the first arg to exec.
/// `argv` is the argv, including the program name (`argv[0]`).
/// `workdir` is the absolute, canonical path to the working directory in which to execute the
/// command.
pub(crate) type ExecPolicy = fn(file: &Path, argv: &[String], workdir: &Path) -> EscalateAction;
pub(crate) struct EscalateServer {
bash_path: PathBuf,
execve_wrapper: PathBuf,
policy: Arc<dyn EscalationPolicy>,
policy: ExecPolicy,
}
impl EscalateServer {
pub fn new<P>(bash_path: PathBuf, execve_wrapper: PathBuf, policy: P) -> Self
where
P: EscalationPolicy + Send + Sync + 'static,
{
Self {
bash_path,
execve_wrapper,
policy: Arc::new(policy),
}
pub fn new(bash_path: PathBuf, policy: ExecPolicy) -> Self {
Self { bash_path, policy }
}
pub async fn exec(
@@ -54,7 +53,7 @@ impl EscalateServer {
let client_socket = escalate_client.into_inner();
client_socket.set_cloexec(false)?;
let escalate_task = tokio::spawn(escalate_task(escalate_server, self.policy.clone()));
let escalate_task = tokio::spawn(escalate_task(escalate_server, self.policy));
let mut env = env.clone();
env.insert(
ESCALATE_SOCKET_ENV_VAR.to_string(),
@@ -62,15 +61,8 @@ impl EscalateServer {
);
env.insert(
BASH_EXEC_WRAPPER_ENV_VAR.to_string(),
self.execve_wrapper.to_string_lossy().to_string(),
format!("{} escalate", std::env::current_exe()?.to_string_lossy()),
);
// TODO: use the sandbox policy and cwd from the calling client.
// Note that sandbox_cwd is ignored for ReadOnly, but needs to be legit
// for `SandboxPolicy::WorkspaceWrite`.
let sandbox_policy = SandboxPolicy::ReadOnly;
let sandbox_cwd = PathBuf::from("/__NONEXISTENT__");
let result = process_exec_tool_call(
codex_core::exec::ExecParams {
command: vec![
@@ -86,8 +78,9 @@ impl EscalateServer {
arg0: None,
},
get_platform_sandbox().unwrap_or(SandboxType::None),
&sandbox_policy,
&sandbox_cwd,
// TODO: use the sandbox policy and cwd from the calling client
&SandboxPolicy::ReadOnly,
&PathBuf::from("/__NONEXISTENT__"), // This is ignored for ReadOnly
&None,
None,
)
@@ -103,10 +96,7 @@ impl EscalateServer {
}
}
async fn escalate_task(
socket: AsyncDatagramSocket,
policy: Arc<dyn EscalationPolicy>,
) -> anyhow::Result<()> {
async fn escalate_task(socket: AsyncDatagramSocket, policy: ExecPolicy) -> anyhow::Result<()> {
loop {
let (_, mut fds) = socket.receive_with_fds().await?;
if fds.len() != 1 {
@@ -114,7 +104,6 @@ async fn escalate_task(
continue;
}
let stream_socket = AsyncSocket::from_fd(fds.remove(0))?;
let policy = policy.clone();
tokio::spawn(async move {
if let Err(err) = handle_escalate_session_with_policy(stream_socket, policy).await {
tracing::error!("escalate session failed: {err:?}");
@@ -133,7 +122,7 @@ pub(crate) struct ExecResult {
async fn handle_escalate_session_with_policy(
socket: AsyncSocket,
policy: Arc<dyn EscalationPolicy>,
policy: ExecPolicy,
) -> anyhow::Result<()> {
let EscalateRequest {
file,
@@ -143,12 +132,8 @@ async fn handle_escalate_session_with_policy(
} = socket.receive::<EscalateRequest>().await?;
let file = PathBuf::from(&file).absolutize()?.into_owned();
let workdir = PathBuf::from(&workdir).absolutize()?.into_owned();
let action = policy
.determine_action(file.as_path(), &argv, &workdir)
.await?;
let action = policy(file.as_path(), &argv, &workdir);
tracing::debug!("decided {action:?} for {file:?} {argv:?} {workdir:?}");
match action {
EscalateAction::Run => {
socket
@@ -210,13 +195,6 @@ async fn handle_escalate_session_with_policy(
})
.await?;
}
EscalateAction::Deny { reason } => {
socket
.send(EscalateResponse {
action: EscalateAction::Deny { reason },
})
.await?;
}
}
Ok(())
}
@@ -226,33 +204,14 @@ mod tests {
use super::*;
use pretty_assertions::assert_eq;
use std::collections::HashMap;
use std::path::Path;
use std::path::PathBuf;
struct DeterministicEscalationPolicy {
action: EscalateAction,
}
#[async_trait::async_trait]
impl EscalationPolicy for DeterministicEscalationPolicy {
async fn determine_action(
&self,
_file: &Path,
_argv: &[String],
_workdir: &Path,
) -> Result<EscalateAction, rmcp::ErrorData> {
Ok(self.action.clone())
}
}
#[tokio::test]
async fn handle_escalate_session_respects_run_in_sandbox_decision() -> anyhow::Result<()> {
let (server, client) = AsyncSocket::pair()?;
let server_task = tokio::spawn(handle_escalate_session_with_policy(
server,
Arc::new(DeterministicEscalationPolicy {
action: EscalateAction::Run,
}),
|_file, _argv, _workdir| EscalateAction::Run,
));
client
@@ -279,9 +238,7 @@ mod tests {
let (server, client) = AsyncSocket::pair()?;
let server_task = tokio::spawn(handle_escalate_session_with_policy(
server,
Arc::new(DeterministicEscalationPolicy {
action: EscalateAction::Escalate,
}),
|_file, _argv, _workdir| EscalateAction::Escalate,
));
client

View File

@@ -1,14 +0,0 @@
use std::path::Path;
use crate::posix::escalate_protocol::EscalateAction;
/// Decides what action to take in response to an execve request from a client.
#[async_trait::async_trait]
pub(crate) trait EscalationPolicy: Send + Sync {
async fn determine_action(
&self,
file: &Path,
argv: &[String],
workdir: &Path,
) -> Result<EscalateAction, rmcp::ErrorData>;
}

View File

@@ -18,10 +18,9 @@ use rmcp::tool_handler;
use rmcp::tool_router;
use rmcp::transport::stdio;
use crate::posix::escalate_server;
use crate::posix::escalate_server::EscalateServer;
use crate::posix::escalate_server::{self};
use crate::posix::mcp_escalation_policy::ExecPolicy;
use crate::posix::mcp_escalation_policy::McpEscalationPolicy;
use crate::posix::escalate_server::ExecPolicy;
/// Path to our patched bash.
const CODEX_BASH_PATH_ENV_VAR: &str = "CODEX_BASH_PATH";
@@ -65,17 +64,15 @@ impl From<escalate_server::ExecResult> for ExecResult {
pub struct ExecTool {
tool_router: ToolRouter<ExecTool>,
bash_path: PathBuf,
execve_wrapper: PathBuf,
policy: ExecPolicy,
}
#[tool_router]
impl ExecTool {
pub fn new(bash_path: PathBuf, execve_wrapper: PathBuf, policy: ExecPolicy) -> Self {
pub fn new(bash_path: PathBuf, policy: ExecPolicy) -> Self {
Self {
tool_router: Self::tool_router(),
bash_path,
execve_wrapper,
policy,
}
}
@@ -84,14 +81,10 @@ impl ExecTool {
#[tool]
async fn shell(
&self,
context: RequestContext<RoleServer>,
_context: RequestContext<RoleServer>,
Parameters(params): Parameters<ExecParams>,
) -> Result<CallToolResult, McpError> {
let escalate_server = EscalateServer::new(
self.bash_path.clone(),
self.execve_wrapper.clone(),
McpEscalationPolicy::new(self.policy, context),
);
let escalate_server = EscalateServer::new(self.bash_path.clone(), self.policy);
let result = escalate_server
.exec(
params.command,
@@ -106,6 +99,27 @@ impl ExecTool {
ExecResult::from(result),
)?]))
}
#[allow(dead_code)]
async fn prompt(
&self,
command: String,
workdir: String,
context: RequestContext<RoleServer>,
) -> Result<CreateElicitationResult, McpError> {
context
.peer
.create_elicitation(CreateElicitationRequestParam {
message: format!("Allow Codex to run `{command:?}` in `{workdir:?}`?"),
#[allow(clippy::expect_used)]
requested_schema: ElicitationSchema::builder()
.property("dummy", PrimitiveSchema::String(StringSchema::new()))
.build()
.expect("failed to build elicitation schema"),
})
.await
.map_err(|e| McpError::internal_error(e.to_string(), None))
}
}
#[tool_handler]
@@ -133,9 +147,8 @@ impl ServerHandler for ExecTool {
pub(crate) async fn serve(
bash_path: PathBuf,
execve_wrapper: PathBuf,
policy: ExecPolicy,
) -> Result<RunningService<RoleServer, ExecTool>, rmcp::service::ServerInitializeError> {
let tool = ExecTool::new(bash_path, execve_wrapper, policy);
let tool = ExecTool::new(bash_path, policy);
tool.serve(stdio()).await
}

View File

@@ -1,127 +0,0 @@
use std::path::Path;
use rmcp::ErrorData as McpError;
use rmcp::RoleServer;
use rmcp::model::CreateElicitationRequestParam;
use rmcp::model::CreateElicitationResult;
use rmcp::model::ElicitationAction;
use rmcp::model::ElicitationSchema;
use rmcp::service::RequestContext;
use crate::posix::escalate_protocol::EscalateAction;
use crate::posix::escalation_policy::EscalationPolicy;
/// This is the policy which decides how to handle an exec() call.
///
/// `file` is the absolute, canonical path to the executable to run, i.e. the first arg to exec.
/// `argv` is the argv, including the program name (`argv[0]`).
/// `workdir` is the absolute, canonical path to the working directory in which to execute the
/// command.
pub(crate) type ExecPolicy = fn(file: &Path, argv: &[String], workdir: &Path) -> ExecPolicyOutcome;
pub(crate) enum ExecPolicyOutcome {
Allow {
run_with_escalated_permissions: bool,
},
Prompt {
run_with_escalated_permissions: bool,
},
Forbidden,
}
/// ExecPolicy with access to the MCP RequestContext so that it can leverage
/// elicitations.
pub(crate) struct McpEscalationPolicy {
policy: ExecPolicy,
context: RequestContext<RoleServer>,
}
impl McpEscalationPolicy {
pub(crate) fn new(policy: ExecPolicy, context: RequestContext<RoleServer>) -> Self {
Self { policy, context }
}
async fn prompt(
&self,
file: &Path,
argv: &[String],
workdir: &Path,
context: RequestContext<RoleServer>,
) -> Result<CreateElicitationResult, McpError> {
let args = shlex::try_join(argv.iter().skip(1).map(String::as_str)).unwrap_or_default();
let command = if args.is_empty() {
file.display().to_string()
} else {
format!("{} {}", file.display(), args)
};
context
.peer
.create_elicitation(CreateElicitationRequestParam {
message: format!("Allow agent to run `{command}` in `{}`?", workdir.display()),
requested_schema: ElicitationSchema::builder()
.title("Execution Permission Request")
.optional_string_with("reason", |schema| {
schema.description("Optional reason for allowing or denying execution")
})
.build()
.map_err(|e| {
McpError::internal_error(
format!("failed to build elicitation schema: {e}"),
None,
)
})?,
})
.await
.map_err(|e| McpError::internal_error(e.to_string(), None))
}
}
#[async_trait::async_trait]
impl EscalationPolicy for McpEscalationPolicy {
async fn determine_action(
&self,
file: &Path,
argv: &[String],
workdir: &Path,
) -> Result<EscalateAction, rmcp::ErrorData> {
let outcome = (self.policy)(file, argv, workdir);
let action = match outcome {
ExecPolicyOutcome::Allow {
run_with_escalated_permissions,
} => {
if run_with_escalated_permissions {
EscalateAction::Escalate
} else {
EscalateAction::Run
}
}
ExecPolicyOutcome::Prompt {
run_with_escalated_permissions,
} => {
let result = self
.prompt(file, argv, workdir, self.context.clone())
.await?;
// TODO: Extract reason from `result.content`.
match result.action {
ElicitationAction::Accept => {
if run_with_escalated_permissions {
EscalateAction::Escalate
} else {
EscalateAction::Run
}
}
ElicitationAction::Decline => EscalateAction::Deny {
reason: Some("User declined execution".to_string()),
},
ElicitationAction::Cancel => EscalateAction::Deny {
reason: Some("User cancelled execution".to_string()),
},
}
}
ExecPolicyOutcome::Forbidden => EscalateAction::Deny {
reason: Some("Execution forbidden by policy".to_string()),
},
};
Ok(action)
}
}

View File

@@ -346,7 +346,6 @@ impl EventProcessor for EventProcessorWithHumanOutput {
call_id,
auto_approved,
changes,
..
}) => {
// Store metadata so we can calculate duration later when we
// receive the corresponding PatchApplyEnd event.
@@ -567,7 +566,6 @@ impl EventProcessor for EventProcessorWithHumanOutput {
| EventMsg::ReasoningContentDelta(_)
| EventMsg::ReasoningRawContentDelta(_)
| EventMsg::UndoCompleted(_)
| EventMsg::SaveSessionResponse(_)
| EventMsg::UndoStarted(_) => {}
}
CodexStatus::Running

View File

@@ -166,7 +166,6 @@ pub struct FileUpdateChange {
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
#[serde(rename_all = "snake_case")]
pub enum PatchApplyStatus {
InProgress,
Completed,
Failed,
}

View File

@@ -822,7 +822,6 @@ fn patch_apply_success_produces_item_completed_patchapply() {
"p1",
EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
call_id: "call-1".to_string(),
turn_id: "turn-1".to_string(),
auto_approved: true,
changes: changes.clone(),
}),
@@ -835,11 +834,9 @@ fn patch_apply_success_produces_item_completed_patchapply() {
"p2",
EventMsg::PatchApplyEnd(PatchApplyEndEvent {
call_id: "call-1".to_string(),
turn_id: "turn-1".to_string(),
stdout: "applied 3 changes".to_string(),
stderr: String::new(),
success: true,
changes: changes.clone(),
}),
);
let out_end = ep.collect_thread_events(&end);
@@ -894,7 +891,6 @@ fn patch_apply_failure_produces_item_completed_patchapply_failed() {
"p1",
EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
call_id: "call-2".to_string(),
turn_id: "turn-2".to_string(),
auto_approved: false,
changes: changes.clone(),
}),
@@ -906,11 +902,9 @@ fn patch_apply_failure_produces_item_completed_patchapply_failed() {
"p2",
EventMsg::PatchApplyEnd(PatchApplyEndEvent {
call_id: "call-2".to_string(),
turn_id: "turn-2".to_string(),
stdout: String::new(),
stderr: "failed to apply".to_string(),
success: false,
changes: changes.clone(),
}),
);
let out_end = ep.collect_thread_events(&end);

View File

@@ -1,34 +0,0 @@
[package]
edition = "2024"
name = "codex-execpolicy-legacy"
description = "Legacy exec policy engine for validating proposed exec calls."
version = { workspace = true }
[[bin]]
name = "codex-execpolicy-legacy"
path = "src/main.rs"
[lib]
name = "codex_execpolicy_legacy"
path = "src/lib.rs"
[lints]
workspace = true
[dependencies]
allocative = { workspace = true }
anyhow = { workspace = true }
clap = { workspace = true, features = ["derive"] }
derive_more = { workspace = true, features = ["display"] }
env_logger = { workspace = true }
log = { workspace = true }
multimap = { workspace = true }
path-absolutize = { workspace = true }
regex-lite = { workspace = true }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
serde_with = { workspace = true, features = ["macros"] }
starlark = { workspace = true }
[dev-dependencies]
tempfile = { workspace = true }

Some files were not shown because too many files have changed in this diff Show More