mirror of
https://github.com/openai/codex.git
synced 2026-02-06 08:53:41 +00:00
Compare commits
4 Commits
shell-tool
...
jif/git-fi
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3212efaf89 | ||
|
|
9fac7eb7d3 | ||
|
|
d2ae57b182 | ||
|
|
32801fe902 |
33
README.md
33
README.md
@@ -69,38 +69,7 @@ Codex can access MCP servers. To configure them, refer to the [config docs](./do
|
||||
|
||||
Codex CLI supports a rich set of configuration options, with preferences stored in `~/.codex/config.toml`. For full configuration options, see [Configuration](./docs/config.md).
|
||||
|
||||
### Execpolicy Quickstart
|
||||
|
||||
Codex can enforce your own rules-based execution policy before it runs shell commands.
|
||||
|
||||
1. Create a policy directory: `mkdir -p ~/.codex/policy`.
|
||||
2. Create one or more `.codexpolicy` files in that folder. Codex automatically loads every `.codexpolicy` file in there on startup.
|
||||
3. Write `prefix_rule` entries to describe the commands you want to allow, prompt, or block:
|
||||
|
||||
```starlark
|
||||
prefix_rule(
|
||||
pattern = ["git", ["push", "fetch"]],
|
||||
decision = "prompt", # allow | prompt | forbidden
|
||||
match = [["git", "push", "origin", "main"]], # examples that must match
|
||||
not_match = [["git", "status"]], # examples that must not match
|
||||
)
|
||||
```
|
||||
|
||||
- `pattern` is a list of shell tokens, evaluated from left to right; wrap tokens in a nested list to express alternatives (e.g., match both `push` and `fetch`).
|
||||
- `decision` sets the severity; Codex picks the strictest decision when multiple rules match (forbidden > prompt > allow).
|
||||
- `match` and `not_match` act as (optional) unit tests. Codex validates them when it loads your policy, so you get feedback if an example has unexpected behavior.
|
||||
|
||||
In this example rule, if Codex wants to run commands with the prefix `git push` or `git fetch`, it will first ask for user approval.
|
||||
|
||||
Use the `codex execpolicy check` subcommand to preview decisions before you save a rule (see the [`codex-execpolicy` README](./codex-rs/execpolicy/README.md) for syntax details):
|
||||
|
||||
```shell
|
||||
codex execpolicy check --policy ~/.codex/policy/default.codexpolicy git push origin main
|
||||
```
|
||||
|
||||
Pass multiple `--policy` flags to test how several files combine, and use `--pretty` for formatted JSON output. See the [`codex-rs/execpolicy` README](./codex-rs/execpolicy/README.md) for a more detailed walkthrough of the available syntax.
|
||||
|
||||
## Note: `execpolicy` commands are still in preview. The API may have breaking changes in the future.
|
||||
---
|
||||
|
||||
### Docs & FAQ
|
||||
|
||||
|
||||
@@ -7,7 +7,3 @@ slow-timeout = { period = "15s", terminate-after = 2 }
|
||||
# Do not add new tests here
|
||||
filter = 'test(rmcp_client) | test(humanlike_typing_1000_chars_appears_live_no_placeholder)'
|
||||
slow-timeout = { period = "1m", terminate-after = 4 }
|
||||
|
||||
[[profile.default.overrides]]
|
||||
filter = 'test(approval_matrix_covers_all_modes)'
|
||||
slow-timeout = { period = "30s", terminate-after = 2 }
|
||||
|
||||
101
codex-rs/Cargo.lock
generated
101
codex-rs/Cargo.lock
generated
@@ -260,7 +260,7 @@ dependencies = [
|
||||
"memchr",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"rustc-hash",
|
||||
"rustc-hash 2.1.1",
|
||||
"serde",
|
||||
"serde_derive",
|
||||
"syn 2.0.104",
|
||||
@@ -726,17 +726,6 @@ version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
|
||||
|
||||
[[package]]
|
||||
name = "chardetng"
|
||||
version = "0.1.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "14b8f0b65b7b08ae3c8187e8d77174de20cb6777864c6b832d8ad365999cf1ea"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"encoding_rs",
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "chrono"
|
||||
version = "0.4.42"
|
||||
@@ -860,6 +849,7 @@ dependencies = [
|
||||
"codex-login",
|
||||
"codex-protocol",
|
||||
"codex-utils-json-to-toml",
|
||||
"codex-windows-sandbox",
|
||||
"core_test_support",
|
||||
"mcp-types",
|
||||
"opentelemetry-appender-tracing",
|
||||
@@ -890,7 +880,6 @@ dependencies = [
|
||||
"serde",
|
||||
"serde_json",
|
||||
"strum_macros 0.27.2",
|
||||
"thiserror 2.0.17",
|
||||
"ts-rs",
|
||||
"uuid",
|
||||
]
|
||||
@@ -1001,7 +990,6 @@ dependencies = [
|
||||
"codex-common",
|
||||
"codex-core",
|
||||
"codex-exec",
|
||||
"codex-execpolicy",
|
||||
"codex-login",
|
||||
"codex-mcp-server",
|
||||
"codex-process-hardening",
|
||||
@@ -1093,13 +1081,11 @@ dependencies = [
|
||||
"async-trait",
|
||||
"base64",
|
||||
"bytes",
|
||||
"chardetng",
|
||||
"chrono",
|
||||
"codex-app-server-protocol",
|
||||
"codex-apply-patch",
|
||||
"codex-arg0",
|
||||
"codex-async-utils",
|
||||
"codex-execpolicy",
|
||||
"codex-file-search",
|
||||
"codex-git",
|
||||
"codex-keyring-store",
|
||||
@@ -1109,13 +1095,13 @@ dependencies = [
|
||||
"codex-utils-pty",
|
||||
"codex-utils-readiness",
|
||||
"codex-utils-string",
|
||||
"codex-utils-tokenizer",
|
||||
"codex-windows-sandbox",
|
||||
"core-foundation 0.9.4",
|
||||
"core_test_support",
|
||||
"ctor 0.5.0",
|
||||
"dirs",
|
||||
"dunce",
|
||||
"encoding_rs",
|
||||
"env-flags",
|
||||
"escargot",
|
||||
"eventsource-stream",
|
||||
@@ -1202,7 +1188,6 @@ name = "codex-exec-server"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
"clap",
|
||||
"codex-core",
|
||||
"libc",
|
||||
@@ -1211,11 +1196,9 @@ dependencies = [
|
||||
"rmcp",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"shlex",
|
||||
"socket2 0.6.0",
|
||||
"tempfile",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
]
|
||||
@@ -1223,21 +1206,6 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "codex-execpolicy"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap",
|
||||
"multimap",
|
||||
"pretty_assertions",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"shlex",
|
||||
"starlark",
|
||||
"thiserror 2.0.17",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codex-execpolicy-legacy"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"allocative",
|
||||
"anyhow",
|
||||
@@ -1255,6 +1223,21 @@ dependencies = [
|
||||
"tempfile",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codex-execpolicy2"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap",
|
||||
"multimap",
|
||||
"pretty_assertions",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"shlex",
|
||||
"starlark",
|
||||
"thiserror 2.0.17",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codex-feedback"
|
||||
version = "0.0.0"
|
||||
@@ -1629,6 +1612,18 @@ dependencies = [
|
||||
name = "codex-utils-string"
|
||||
version = "0.0.0"
|
||||
|
||||
[[package]]
|
||||
name = "codex-utils-tokenizer"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"codex-utils-cache",
|
||||
"pretty_assertions",
|
||||
"thiserror 2.0.17",
|
||||
"tiktoken-rs",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codex-windows-sandbox"
|
||||
version = "0.1.0"
|
||||
@@ -2450,6 +2445,17 @@ dependencies = [
|
||||
"once_cell",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fancy-regex"
|
||||
version = "0.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2"
|
||||
dependencies = [
|
||||
"bit-set",
|
||||
"regex-automata",
|
||||
"regex-syntax 0.8.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fastrand"
|
||||
version = "2.3.0"
|
||||
@@ -4774,7 +4780,7 @@ dependencies = [
|
||||
"pin-project-lite",
|
||||
"quinn-proto",
|
||||
"quinn-udp",
|
||||
"rustc-hash",
|
||||
"rustc-hash 2.1.1",
|
||||
"rustls",
|
||||
"socket2 0.6.0",
|
||||
"thiserror 2.0.17",
|
||||
@@ -4794,7 +4800,7 @@ dependencies = [
|
||||
"lru-slab",
|
||||
"rand 0.9.2",
|
||||
"ring",
|
||||
"rustc-hash",
|
||||
"rustc-hash 2.1.1",
|
||||
"rustls",
|
||||
"rustls-pki-types",
|
||||
"slab",
|
||||
@@ -5139,6 +5145,12 @@ version = "0.1.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "989e6739f80c4ad5b13e0fd7fe89531180375b18520cc8c82080e4dc4035b84f"
|
||||
|
||||
[[package]]
|
||||
name = "rustc-hash"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
||||
|
||||
[[package]]
|
||||
name = "rustc-hash"
|
||||
version = "2.1.1"
|
||||
@@ -6359,6 +6371,21 @@ dependencies = [
|
||||
"zune-jpeg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tiktoken-rs"
|
||||
version = "0.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3a19830747d9034cd9da43a60eaa8e552dfda7712424aebf187b7a60126bae0d"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"base64",
|
||||
"bstr",
|
||||
"fancy-regex",
|
||||
"lazy_static",
|
||||
"regex",
|
||||
"rustc-hash 1.1.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "time"
|
||||
version = "0.3.44"
|
||||
|
||||
@@ -18,7 +18,7 @@ members = [
|
||||
"exec",
|
||||
"exec-server",
|
||||
"execpolicy",
|
||||
"execpolicy-legacy",
|
||||
"execpolicy2",
|
||||
"keyring-store",
|
||||
"file-search",
|
||||
"linux-sandbox",
|
||||
@@ -41,6 +41,7 @@ members = [
|
||||
"utils/pty",
|
||||
"utils/readiness",
|
||||
"utils/string",
|
||||
"utils/tokenizer",
|
||||
]
|
||||
resolver = "2"
|
||||
|
||||
@@ -66,7 +67,6 @@ codex-chatgpt = { path = "chatgpt" }
|
||||
codex-common = { path = "common" }
|
||||
codex-core = { path = "core" }
|
||||
codex-exec = { path = "exec" }
|
||||
codex-execpolicy = { path = "execpolicy" }
|
||||
codex-feedback = { path = "feedback" }
|
||||
codex-file-search = { path = "file-search" }
|
||||
codex-git = { path = "utils/git" }
|
||||
@@ -89,6 +89,7 @@ codex-utils-json-to-toml = { path = "utils/json-to-toml" }
|
||||
codex-utils-pty = { path = "utils/pty" }
|
||||
codex-utils-readiness = { path = "utils/readiness" }
|
||||
codex-utils-string = { path = "utils/string" }
|
||||
codex-utils-tokenizer = { path = "utils/tokenizer" }
|
||||
codex-windows-sandbox = { path = "windows-sandbox-rs" }
|
||||
core_test_support = { path = "core/tests/common" }
|
||||
mcp-types = { path = "mcp-types" }
|
||||
@@ -109,7 +110,6 @@ axum = { version = "0.8", default-features = false }
|
||||
base64 = "0.22.1"
|
||||
bytes = "1.10.1"
|
||||
chrono = "0.4.42"
|
||||
chardetng = "0.1.17"
|
||||
clap = "4"
|
||||
clap_complete = "4"
|
||||
color-eyre = "0.6.3"
|
||||
@@ -122,7 +122,6 @@ dotenvy = "0.15.7"
|
||||
dunce = "1.0.4"
|
||||
env-flags = "0.1.1"
|
||||
env_logger = "0.11.5"
|
||||
encoding_rs = "0.8.35"
|
||||
escargot = "0.5"
|
||||
eventsource-stream = "0.2.3"
|
||||
futures = { version = "0.3", default-features = false }
|
||||
@@ -169,6 +168,7 @@ reqwest = "0.12"
|
||||
rmcp = { version = "0.8.5", default-features = false }
|
||||
schemars = "0.8.22"
|
||||
seccompiler = "0.5.0"
|
||||
sentry = "0.34.0"
|
||||
serde = "1"
|
||||
serde_json = "1"
|
||||
serde_with = "3.14"
|
||||
@@ -187,6 +187,7 @@ tempfile = "3.23.0"
|
||||
test-log = "0.2.18"
|
||||
textwrap = "0.16.2"
|
||||
thiserror = "2.0.17"
|
||||
tiktoken-rs = "0.9"
|
||||
time = "0.3"
|
||||
tiny_http = "0.12"
|
||||
tokio = "1"
|
||||
@@ -264,6 +265,7 @@ ignored = [
|
||||
"icu_provider",
|
||||
"openssl-sys",
|
||||
"codex-utils-readiness",
|
||||
"codex-utils-tokenizer",
|
||||
]
|
||||
|
||||
[profile.release]
|
||||
|
||||
@@ -19,7 +19,6 @@ schemars = { workspace = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
serde_json = { workspace = true }
|
||||
strum_macros = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
ts-rs = { workspace = true }
|
||||
uuid = { workspace = true, features = ["serde", "v7"] }
|
||||
|
||||
|
||||
@@ -7,6 +7,5 @@ pub use export::generate_ts;
|
||||
pub use export::generate_types;
|
||||
pub use jsonrpc_lite::*;
|
||||
pub use protocol::common::*;
|
||||
pub use protocol::thread_history::*;
|
||||
pub use protocol::v1::*;
|
||||
pub use protocol::v2::*;
|
||||
|
||||
@@ -378,7 +378,7 @@ macro_rules! server_notification_definitions {
|
||||
impl TryFrom<JSONRPCNotification> for ServerNotification {
|
||||
type Error = serde_json::Error;
|
||||
|
||||
fn try_from(value: JSONRPCNotification) -> Result<Self, serde_json::Error> {
|
||||
fn try_from(value: JSONRPCNotification) -> Result<Self, Self::Error> {
|
||||
serde_json::from_value(serde_json::to_value(value)?)
|
||||
}
|
||||
}
|
||||
@@ -438,13 +438,6 @@ server_request_definitions! {
|
||||
response: v2::CommandExecutionRequestApprovalResponse,
|
||||
},
|
||||
|
||||
/// Sent when approval is requested for a specific file change.
|
||||
/// This request is used for Turns started via turn/start.
|
||||
FileChangeRequestApproval => "item/fileChange/requestApproval" {
|
||||
params: v2::FileChangeRequestApprovalParams,
|
||||
response: v2::FileChangeRequestApprovalResponse,
|
||||
},
|
||||
|
||||
/// DEPRECATED APIs below
|
||||
/// Request to approve a patch.
|
||||
/// This request is used for Turns started via the legacy APIs (i.e. SendUserTurn, SendUserMessage).
|
||||
@@ -487,7 +480,6 @@ pub struct FuzzyFileSearchResponse {
|
||||
|
||||
server_notification_definitions! {
|
||||
/// NEW NOTIFICATIONS
|
||||
Error => "error" (v2::ErrorNotification),
|
||||
ThreadStarted => "thread/started" (v2::ThreadStartedNotification),
|
||||
TurnStarted => "turn/started" (v2::TurnStartedNotification),
|
||||
TurnCompleted => "turn/completed" (v2::TurnCompletedNotification),
|
||||
@@ -539,7 +531,7 @@ mod tests {
|
||||
let request = ClientRequest::NewConversation {
|
||||
request_id: RequestId::Integer(42),
|
||||
params: v1::NewConversationParams {
|
||||
model: Some("gpt-5.1-codex-max".to_string()),
|
||||
model: Some("arcticfox".to_string()),
|
||||
model_provider: None,
|
||||
profile: None,
|
||||
cwd: None,
|
||||
@@ -557,7 +549,7 @@ mod tests {
|
||||
"method": "newConversation",
|
||||
"id": 42,
|
||||
"params": {
|
||||
"model": "gpt-5.1-codex-max",
|
||||
"model": "arcticfox",
|
||||
"modelProvider": null,
|
||||
"profile": null,
|
||||
"cwd": null,
|
||||
|
||||
@@ -2,6 +2,5 @@
|
||||
// Exposes protocol pieces used by `lib.rs` via `pub use protocol::common::*;`.
|
||||
|
||||
pub mod common;
|
||||
pub mod thread_history;
|
||||
pub mod v1;
|
||||
pub mod v2;
|
||||
|
||||
@@ -1,409 +0,0 @@
|
||||
use crate::protocol::v2::ThreadItem;
|
||||
use crate::protocol::v2::Turn;
|
||||
use crate::protocol::v2::TurnStatus;
|
||||
use crate::protocol::v2::UserInput;
|
||||
use codex_protocol::protocol::AgentReasoningEvent;
|
||||
use codex_protocol::protocol::AgentReasoningRawContentEvent;
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
use codex_protocol::protocol::TurnAbortedEvent;
|
||||
use codex_protocol::protocol::UserMessageEvent;
|
||||
|
||||
/// Convert persisted [`EventMsg`] entries into a sequence of [`Turn`] values.
|
||||
///
|
||||
/// The purpose of this is to convert the EventMsgs persisted in a rollout file
|
||||
/// into a sequence of Turns and ThreadItems, which allows the client to render
|
||||
/// the historical messages when resuming a thread.
|
||||
pub fn build_turns_from_event_msgs(events: &[EventMsg]) -> Vec<Turn> {
|
||||
let mut builder = ThreadHistoryBuilder::new();
|
||||
for event in events {
|
||||
builder.handle_event(event);
|
||||
}
|
||||
builder.finish()
|
||||
}
|
||||
|
||||
struct ThreadHistoryBuilder {
|
||||
turns: Vec<Turn>,
|
||||
current_turn: Option<PendingTurn>,
|
||||
next_turn_index: i64,
|
||||
next_item_index: i64,
|
||||
}
|
||||
|
||||
impl ThreadHistoryBuilder {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
turns: Vec::new(),
|
||||
current_turn: None,
|
||||
next_turn_index: 1,
|
||||
next_item_index: 1,
|
||||
}
|
||||
}
|
||||
|
||||
fn finish(mut self) -> Vec<Turn> {
|
||||
self.finish_current_turn();
|
||||
self.turns
|
||||
}
|
||||
|
||||
/// This function should handle all EventMsg variants that can be persisted in a rollout file.
|
||||
/// See `should_persist_event_msg` in `codex-rs/core/rollout/policy.rs`.
|
||||
fn handle_event(&mut self, event: &EventMsg) {
|
||||
match event {
|
||||
EventMsg::UserMessage(payload) => self.handle_user_message(payload),
|
||||
EventMsg::AgentMessage(payload) => self.handle_agent_message(payload.message.clone()),
|
||||
EventMsg::AgentReasoning(payload) => self.handle_agent_reasoning(payload),
|
||||
EventMsg::AgentReasoningRawContent(payload) => {
|
||||
self.handle_agent_reasoning_raw_content(payload)
|
||||
}
|
||||
EventMsg::TokenCount(_) => {}
|
||||
EventMsg::EnteredReviewMode(_) => {}
|
||||
EventMsg::ExitedReviewMode(_) => {}
|
||||
EventMsg::UndoCompleted(_) => {}
|
||||
EventMsg::TurnAborted(payload) => self.handle_turn_aborted(payload),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_user_message(&mut self, payload: &UserMessageEvent) {
|
||||
self.finish_current_turn();
|
||||
let mut turn = self.new_turn();
|
||||
let id = self.next_item_id();
|
||||
let content = self.build_user_inputs(payload);
|
||||
turn.items.push(ThreadItem::UserMessage { id, content });
|
||||
self.current_turn = Some(turn);
|
||||
}
|
||||
|
||||
fn handle_agent_message(&mut self, text: String) {
|
||||
if text.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
let id = self.next_item_id();
|
||||
self.ensure_turn()
|
||||
.items
|
||||
.push(ThreadItem::AgentMessage { id, text });
|
||||
}
|
||||
|
||||
fn handle_agent_reasoning(&mut self, payload: &AgentReasoningEvent) {
|
||||
if payload.text.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
// If the last item is a reasoning item, add the new text to the summary.
|
||||
if let Some(ThreadItem::Reasoning { summary, .. }) = self.ensure_turn().items.last_mut() {
|
||||
summary.push(payload.text.clone());
|
||||
return;
|
||||
}
|
||||
|
||||
// Otherwise, create a new reasoning item.
|
||||
let id = self.next_item_id();
|
||||
self.ensure_turn().items.push(ThreadItem::Reasoning {
|
||||
id,
|
||||
summary: vec![payload.text.clone()],
|
||||
content: Vec::new(),
|
||||
});
|
||||
}
|
||||
|
||||
fn handle_agent_reasoning_raw_content(&mut self, payload: &AgentReasoningRawContentEvent) {
|
||||
if payload.text.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
// If the last item is a reasoning item, add the new text to the content.
|
||||
if let Some(ThreadItem::Reasoning { content, .. }) = self.ensure_turn().items.last_mut() {
|
||||
content.push(payload.text.clone());
|
||||
return;
|
||||
}
|
||||
|
||||
// Otherwise, create a new reasoning item.
|
||||
let id = self.next_item_id();
|
||||
self.ensure_turn().items.push(ThreadItem::Reasoning {
|
||||
id,
|
||||
summary: Vec::new(),
|
||||
content: vec![payload.text.clone()],
|
||||
});
|
||||
}
|
||||
|
||||
fn handle_turn_aborted(&mut self, _payload: &TurnAbortedEvent) {
|
||||
let Some(turn) = self.current_turn.as_mut() else {
|
||||
return;
|
||||
};
|
||||
turn.status = TurnStatus::Interrupted;
|
||||
}
|
||||
|
||||
fn finish_current_turn(&mut self) {
|
||||
if let Some(turn) = self.current_turn.take() {
|
||||
if turn.items.is_empty() {
|
||||
return;
|
||||
}
|
||||
self.turns.push(turn.into());
|
||||
}
|
||||
}
|
||||
|
||||
fn new_turn(&mut self) -> PendingTurn {
|
||||
PendingTurn {
|
||||
id: self.next_turn_id(),
|
||||
items: Vec::new(),
|
||||
status: TurnStatus::Completed,
|
||||
}
|
||||
}
|
||||
|
||||
fn ensure_turn(&mut self) -> &mut PendingTurn {
|
||||
if self.current_turn.is_none() {
|
||||
let turn = self.new_turn();
|
||||
return self.current_turn.insert(turn);
|
||||
}
|
||||
|
||||
if let Some(turn) = self.current_turn.as_mut() {
|
||||
return turn;
|
||||
}
|
||||
|
||||
unreachable!("current turn must exist after initialization");
|
||||
}
|
||||
|
||||
fn next_turn_id(&mut self) -> String {
|
||||
let id = format!("turn-{}", self.next_turn_index);
|
||||
self.next_turn_index += 1;
|
||||
id
|
||||
}
|
||||
|
||||
fn next_item_id(&mut self) -> String {
|
||||
let id = format!("item-{}", self.next_item_index);
|
||||
self.next_item_index += 1;
|
||||
id
|
||||
}
|
||||
|
||||
fn build_user_inputs(&self, payload: &UserMessageEvent) -> Vec<UserInput> {
|
||||
let mut content = Vec::new();
|
||||
if !payload.message.trim().is_empty() {
|
||||
content.push(UserInput::Text {
|
||||
text: payload.message.clone(),
|
||||
});
|
||||
}
|
||||
if let Some(images) = &payload.images {
|
||||
for image in images {
|
||||
content.push(UserInput::Image { url: image.clone() });
|
||||
}
|
||||
}
|
||||
content
|
||||
}
|
||||
}
|
||||
|
||||
struct PendingTurn {
|
||||
id: String,
|
||||
items: Vec<ThreadItem>,
|
||||
status: TurnStatus,
|
||||
}
|
||||
|
||||
impl From<PendingTurn> for Turn {
|
||||
fn from(value: PendingTurn) -> Self {
|
||||
Self {
|
||||
id: value.id,
|
||||
items: value.items,
|
||||
status: value.status,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use codex_protocol::protocol::AgentMessageEvent;
|
||||
use codex_protocol::protocol::AgentReasoningEvent;
|
||||
use codex_protocol::protocol::AgentReasoningRawContentEvent;
|
||||
use codex_protocol::protocol::TurnAbortReason;
|
||||
use codex_protocol::protocol::TurnAbortedEvent;
|
||||
use codex_protocol::protocol::UserMessageEvent;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
fn builds_multiple_turns_with_reasoning_items() {
|
||||
let events = vec![
|
||||
EventMsg::UserMessage(UserMessageEvent {
|
||||
message: "First turn".into(),
|
||||
images: Some(vec!["https://example.com/one.png".into()]),
|
||||
}),
|
||||
EventMsg::AgentMessage(AgentMessageEvent {
|
||||
message: "Hi there".into(),
|
||||
}),
|
||||
EventMsg::AgentReasoning(AgentReasoningEvent {
|
||||
text: "thinking".into(),
|
||||
}),
|
||||
EventMsg::AgentReasoningRawContent(AgentReasoningRawContentEvent {
|
||||
text: "full reasoning".into(),
|
||||
}),
|
||||
EventMsg::UserMessage(UserMessageEvent {
|
||||
message: "Second turn".into(),
|
||||
images: None,
|
||||
}),
|
||||
EventMsg::AgentMessage(AgentMessageEvent {
|
||||
message: "Reply two".into(),
|
||||
}),
|
||||
];
|
||||
|
||||
let turns = build_turns_from_event_msgs(&events);
|
||||
assert_eq!(turns.len(), 2);
|
||||
|
||||
let first = &turns[0];
|
||||
assert_eq!(first.id, "turn-1");
|
||||
assert_eq!(first.status, TurnStatus::Completed);
|
||||
assert_eq!(first.items.len(), 3);
|
||||
assert_eq!(
|
||||
first.items[0],
|
||||
ThreadItem::UserMessage {
|
||||
id: "item-1".into(),
|
||||
content: vec![
|
||||
UserInput::Text {
|
||||
text: "First turn".into(),
|
||||
},
|
||||
UserInput::Image {
|
||||
url: "https://example.com/one.png".into(),
|
||||
}
|
||||
],
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
first.items[1],
|
||||
ThreadItem::AgentMessage {
|
||||
id: "item-2".into(),
|
||||
text: "Hi there".into(),
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
first.items[2],
|
||||
ThreadItem::Reasoning {
|
||||
id: "item-3".into(),
|
||||
summary: vec!["thinking".into()],
|
||||
content: vec!["full reasoning".into()],
|
||||
}
|
||||
);
|
||||
|
||||
let second = &turns[1];
|
||||
assert_eq!(second.id, "turn-2");
|
||||
assert_eq!(second.items.len(), 2);
|
||||
assert_eq!(
|
||||
second.items[0],
|
||||
ThreadItem::UserMessage {
|
||||
id: "item-4".into(),
|
||||
content: vec![UserInput::Text {
|
||||
text: "Second turn".into()
|
||||
}],
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
second.items[1],
|
||||
ThreadItem::AgentMessage {
|
||||
id: "item-5".into(),
|
||||
text: "Reply two".into(),
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn splits_reasoning_when_interleaved() {
|
||||
let events = vec![
|
||||
EventMsg::UserMessage(UserMessageEvent {
|
||||
message: "Turn start".into(),
|
||||
images: None,
|
||||
}),
|
||||
EventMsg::AgentReasoning(AgentReasoningEvent {
|
||||
text: "first summary".into(),
|
||||
}),
|
||||
EventMsg::AgentReasoningRawContent(AgentReasoningRawContentEvent {
|
||||
text: "first content".into(),
|
||||
}),
|
||||
EventMsg::AgentMessage(AgentMessageEvent {
|
||||
message: "interlude".into(),
|
||||
}),
|
||||
EventMsg::AgentReasoning(AgentReasoningEvent {
|
||||
text: "second summary".into(),
|
||||
}),
|
||||
];
|
||||
|
||||
let turns = build_turns_from_event_msgs(&events);
|
||||
assert_eq!(turns.len(), 1);
|
||||
let turn = &turns[0];
|
||||
assert_eq!(turn.items.len(), 4);
|
||||
|
||||
assert_eq!(
|
||||
turn.items[1],
|
||||
ThreadItem::Reasoning {
|
||||
id: "item-2".into(),
|
||||
summary: vec!["first summary".into()],
|
||||
content: vec!["first content".into()],
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
turn.items[3],
|
||||
ThreadItem::Reasoning {
|
||||
id: "item-4".into(),
|
||||
summary: vec!["second summary".into()],
|
||||
content: Vec::new(),
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn marks_turn_as_interrupted_when_aborted() {
|
||||
let events = vec![
|
||||
EventMsg::UserMessage(UserMessageEvent {
|
||||
message: "Please do the thing".into(),
|
||||
images: None,
|
||||
}),
|
||||
EventMsg::AgentMessage(AgentMessageEvent {
|
||||
message: "Working...".into(),
|
||||
}),
|
||||
EventMsg::TurnAborted(TurnAbortedEvent {
|
||||
reason: TurnAbortReason::Replaced,
|
||||
}),
|
||||
EventMsg::UserMessage(UserMessageEvent {
|
||||
message: "Let's try again".into(),
|
||||
images: None,
|
||||
}),
|
||||
EventMsg::AgentMessage(AgentMessageEvent {
|
||||
message: "Second attempt complete.".into(),
|
||||
}),
|
||||
];
|
||||
|
||||
let turns = build_turns_from_event_msgs(&events);
|
||||
assert_eq!(turns.len(), 2);
|
||||
|
||||
let first_turn = &turns[0];
|
||||
assert_eq!(first_turn.status, TurnStatus::Interrupted);
|
||||
assert_eq!(first_turn.items.len(), 2);
|
||||
assert_eq!(
|
||||
first_turn.items[0],
|
||||
ThreadItem::UserMessage {
|
||||
id: "item-1".into(),
|
||||
content: vec![UserInput::Text {
|
||||
text: "Please do the thing".into()
|
||||
}],
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
first_turn.items[1],
|
||||
ThreadItem::AgentMessage {
|
||||
id: "item-2".into(),
|
||||
text: "Working...".into(),
|
||||
}
|
||||
);
|
||||
|
||||
let second_turn = &turns[1];
|
||||
assert_eq!(second_turn.status, TurnStatus::Completed);
|
||||
assert_eq!(second_turn.items.len(), 2);
|
||||
assert_eq!(
|
||||
second_turn.items[0],
|
||||
ThreadItem::UserMessage {
|
||||
id: "item-3".into(),
|
||||
content: vec![UserInput::Text {
|
||||
text: "Let's try again".into()
|
||||
}],
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
second_turn.items[1],
|
||||
ThreadItem::AgentMessage {
|
||||
id: "item-4".into(),
|
||||
text: "Second attempt complete.".into(),
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -11,8 +11,6 @@ use codex_protocol::items::AgentMessageContent as CoreAgentMessageContent;
|
||||
use codex_protocol::items::TurnItem as CoreTurnItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::parse_command::ParsedCommand as CoreParsedCommand;
|
||||
use codex_protocol::protocol::CodexErrorInfo as CoreCodexErrorInfo;
|
||||
use codex_protocol::protocol::CreditsSnapshot as CoreCreditsSnapshot;
|
||||
use codex_protocol::protocol::RateLimitSnapshot as CoreRateLimitSnapshot;
|
||||
use codex_protocol::protocol::RateLimitWindow as CoreRateLimitWindow;
|
||||
use codex_protocol::user_input::UserInput as CoreUserInput;
|
||||
@@ -21,7 +19,6 @@ use schemars::JsonSchema;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use serde_json::Value as JsonValue;
|
||||
use thiserror::Error;
|
||||
use ts_rs::TS;
|
||||
|
||||
// Macro to declare a camelCased API v2 enum mirroring a core enum which
|
||||
@@ -49,72 +46,6 @@ macro_rules! v2_enum_from_core {
|
||||
};
|
||||
}
|
||||
|
||||
/// This translation layer make sure that we expose codex error code in camel case.
|
||||
///
|
||||
/// When an upstream HTTP status is available (for example, from the Responses API or a provider),
|
||||
/// it is forwarded in `httpStatusCode` on the relevant `codexErrorInfo` variant.
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub enum CodexErrorInfo {
|
||||
ContextWindowExceeded,
|
||||
UsageLimitExceeded,
|
||||
HttpConnectionFailed {
|
||||
#[serde(rename = "httpStatusCode")]
|
||||
#[ts(rename = "httpStatusCode")]
|
||||
http_status_code: Option<u16>,
|
||||
},
|
||||
/// Failed to connect to the response SSE stream.
|
||||
ResponseStreamConnectionFailed {
|
||||
#[serde(rename = "httpStatusCode")]
|
||||
#[ts(rename = "httpStatusCode")]
|
||||
http_status_code: Option<u16>,
|
||||
},
|
||||
InternalServerError,
|
||||
Unauthorized,
|
||||
BadRequest,
|
||||
SandboxError,
|
||||
/// The response SSE stream disconnected in the middle of a turn before completion.
|
||||
ResponseStreamDisconnected {
|
||||
#[serde(rename = "httpStatusCode")]
|
||||
#[ts(rename = "httpStatusCode")]
|
||||
http_status_code: Option<u16>,
|
||||
},
|
||||
/// Reached the retry limit for responses.
|
||||
ResponseTooManyFailedAttempts {
|
||||
#[serde(rename = "httpStatusCode")]
|
||||
#[ts(rename = "httpStatusCode")]
|
||||
http_status_code: Option<u16>,
|
||||
},
|
||||
Other,
|
||||
}
|
||||
|
||||
impl From<CoreCodexErrorInfo> for CodexErrorInfo {
|
||||
fn from(value: CoreCodexErrorInfo) -> Self {
|
||||
match value {
|
||||
CoreCodexErrorInfo::ContextWindowExceeded => CodexErrorInfo::ContextWindowExceeded,
|
||||
CoreCodexErrorInfo::UsageLimitExceeded => CodexErrorInfo::UsageLimitExceeded,
|
||||
CoreCodexErrorInfo::HttpConnectionFailed { http_status_code } => {
|
||||
CodexErrorInfo::HttpConnectionFailed { http_status_code }
|
||||
}
|
||||
CoreCodexErrorInfo::ResponseStreamConnectionFailed { http_status_code } => {
|
||||
CodexErrorInfo::ResponseStreamConnectionFailed { http_status_code }
|
||||
}
|
||||
CoreCodexErrorInfo::InternalServerError => CodexErrorInfo::InternalServerError,
|
||||
CoreCodexErrorInfo::Unauthorized => CodexErrorInfo::Unauthorized,
|
||||
CoreCodexErrorInfo::BadRequest => CodexErrorInfo::BadRequest,
|
||||
CoreCodexErrorInfo::SandboxError => CodexErrorInfo::SandboxError,
|
||||
CoreCodexErrorInfo::ResponseStreamDisconnected { http_status_code } => {
|
||||
CodexErrorInfo::ResponseStreamDisconnected { http_status_code }
|
||||
}
|
||||
CoreCodexErrorInfo::ResponseTooManyFailedAttempts { http_status_code } => {
|
||||
CodexErrorInfo::ResponseTooManyFailedAttempts { http_status_code }
|
||||
}
|
||||
CoreCodexErrorInfo::Other => CodexErrorInfo::Other,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
v2_enum_from_core!(
|
||||
pub enum AskForApproval from codex_protocol::protocol::AskForApproval {
|
||||
UnlessTrusted, OnFailure, OnRequest, Never
|
||||
@@ -586,10 +517,6 @@ pub struct Thread {
|
||||
pub created_at: i64,
|
||||
/// [UNSTABLE] Path to the thread on disk.
|
||||
pub path: PathBuf,
|
||||
/// Only populated on a `thread/resume` response.
|
||||
/// For all other responses and notifications returning a Thread,
|
||||
/// the turns field will be an empty list.
|
||||
pub turns: Vec<Turn>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
@@ -604,28 +531,18 @@ pub struct AccountUpdatedNotification {
|
||||
#[ts(export_to = "v2/")]
|
||||
pub struct Turn {
|
||||
pub id: String,
|
||||
/// Only populated on a `thread/resume` response.
|
||||
/// For all other responses and notifications returning a Turn,
|
||||
/// the items field will be an empty list.
|
||||
/// This is currently only populated for resumed threads.
|
||||
/// TODO: properly populate items for all turns.
|
||||
pub items: Vec<ThreadItem>,
|
||||
#[serde(flatten)]
|
||||
pub status: TurnStatus,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS, Error)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
#[error("{message}")]
|
||||
pub struct TurnError {
|
||||
pub message: String,
|
||||
pub codex_error_info: Option<CodexErrorInfo>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub struct ErrorNotification {
|
||||
pub error: TurnError,
|
||||
pub struct TurnError {
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
@@ -871,23 +788,20 @@ pub struct FileUpdateChange {
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(tag = "type", rename_all = "camelCase")]
|
||||
#[ts(tag = "type")]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub enum PatchChangeKind {
|
||||
Add,
|
||||
Delete,
|
||||
Update { move_path: Option<PathBuf> },
|
||||
Update,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub enum PatchApplyStatus {
|
||||
InProgress,
|
||||
Completed,
|
||||
Failed,
|
||||
Declined,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
@@ -1062,26 +976,6 @@ pub struct CommandExecutionRequestApprovalResponse {
|
||||
pub accept_settings: Option<CommandExecutionRequestAcceptSettings>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub struct FileChangeRequestApprovalParams {
|
||||
pub thread_id: String,
|
||||
pub turn_id: String,
|
||||
pub item_id: String,
|
||||
/// Optional explanatory reason (e.g. request for extra write access).
|
||||
pub reason: Option<String>,
|
||||
/// [UNSTABLE] When set, the agent is asking the user to allow writes under this root
|
||||
/// for the remainder of the session (unclear if this is honored today).
|
||||
pub grant_root: Option<PathBuf>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub struct FileChangeRequestApprovalResponse {
|
||||
pub decision: ApprovalDecision,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
@@ -1095,7 +989,6 @@ pub struct AccountRateLimitsUpdatedNotification {
|
||||
pub struct RateLimitSnapshot {
|
||||
pub primary: Option<RateLimitWindow>,
|
||||
pub secondary: Option<RateLimitWindow>,
|
||||
pub credits: Option<CreditsSnapshot>,
|
||||
}
|
||||
|
||||
impl From<CoreRateLimitSnapshot> for RateLimitSnapshot {
|
||||
@@ -1103,7 +996,6 @@ impl From<CoreRateLimitSnapshot> for RateLimitSnapshot {
|
||||
Self {
|
||||
primary: value.primary.map(RateLimitWindow::from),
|
||||
secondary: value.secondary.map(RateLimitWindow::from),
|
||||
credits: value.credits.map(CreditsSnapshot::from),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1127,25 +1019,6 @@ impl From<CoreRateLimitWindow> for RateLimitWindow {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub struct CreditsSnapshot {
|
||||
pub has_credits: bool,
|
||||
pub unlimited: bool,
|
||||
pub balance: Option<String>,
|
||||
}
|
||||
|
||||
impl From<CoreCreditsSnapshot> for CreditsSnapshot {
|
||||
fn from(value: CoreCreditsSnapshot) -> Self {
|
||||
Self {
|
||||
has_credits: value.has_credits,
|
||||
unlimited: value.unlimited,
|
||||
balance: value.balance,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
@@ -1168,7 +1041,6 @@ mod tests {
|
||||
use codex_protocol::items::WebSearchItem;
|
||||
use codex_protocol::user_input::UserInput as CoreUserInput;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::json;
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[test]
|
||||
@@ -1254,20 +1126,4 @@ mod tests {
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn codex_error_info_serializes_http_status_code_in_camel_case() {
|
||||
let value = CodexErrorInfo::ResponseTooManyFailedAttempts {
|
||||
http_status_code: Some(401),
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
serde_json::to_value(value).unwrap(),
|
||||
json!({
|
||||
"responseTooManyFailedAttempts": {
|
||||
"httpStatusCode": 401
|
||||
}
|
||||
})
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -24,8 +24,6 @@ use codex_app_server_protocol::ClientRequest;
|
||||
use codex_app_server_protocol::CommandExecutionRequestAcceptSettings;
|
||||
use codex_app_server_protocol::CommandExecutionRequestApprovalParams;
|
||||
use codex_app_server_protocol::CommandExecutionRequestApprovalResponse;
|
||||
use codex_app_server_protocol::FileChangeRequestApprovalParams;
|
||||
use codex_app_server_protocol::FileChangeRequestApprovalResponse;
|
||||
use codex_app_server_protocol::GetAccountRateLimitsResponse;
|
||||
use codex_app_server_protocol::InitializeParams;
|
||||
use codex_app_server_protocol::InitializeResponse;
|
||||
@@ -679,9 +677,6 @@ impl CodexClient {
|
||||
ServerRequest::CommandExecutionRequestApproval { request_id, params } => {
|
||||
self.handle_command_execution_request_approval(request_id, params)?;
|
||||
}
|
||||
ServerRequest::FileChangeRequestApproval { request_id, params } => {
|
||||
self.approve_file_change_request(request_id, params)?;
|
||||
}
|
||||
other => {
|
||||
bail!("received unsupported server request: {other:?}");
|
||||
}
|
||||
@@ -722,37 +717,6 @@ impl CodexClient {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn approve_file_change_request(
|
||||
&mut self,
|
||||
request_id: RequestId,
|
||||
params: FileChangeRequestApprovalParams,
|
||||
) -> Result<()> {
|
||||
let FileChangeRequestApprovalParams {
|
||||
thread_id,
|
||||
turn_id,
|
||||
item_id,
|
||||
reason,
|
||||
grant_root,
|
||||
} = params;
|
||||
|
||||
println!(
|
||||
"\n< fileChange approval requested for thread {thread_id}, turn {turn_id}, item {item_id}"
|
||||
);
|
||||
if let Some(reason) = reason.as_deref() {
|
||||
println!("< reason: {reason}");
|
||||
}
|
||||
if let Some(grant_root) = grant_root.as_deref() {
|
||||
println!("< grant root: {}", grant_root.display());
|
||||
}
|
||||
|
||||
let response = FileChangeRequestApprovalResponse {
|
||||
decision: ApprovalDecision::Accept,
|
||||
};
|
||||
self.send_server_request_response(request_id, &response)?;
|
||||
println!("< approved fileChange request for item {item_id}");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn send_server_request_response<T>(&mut self, request_id: RequestId, response: &T) -> Result<()>
|
||||
where
|
||||
T: Serialize,
|
||||
|
||||
@@ -40,6 +40,7 @@ tracing = { workspace = true, features = ["log"] }
|
||||
tracing-subscriber = { workspace = true, features = ["env-filter", "fmt"] }
|
||||
opentelemetry-appender-tracing = { workspace = true }
|
||||
uuid = { workspace = true, features = ["serde", "v7"] }
|
||||
codex-windows-sandbox.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
app_test_support = { workspace = true }
|
||||
|
||||
@@ -339,29 +339,6 @@ Event notifications are the server-initiated event stream for thread lifecycles,
|
||||
|
||||
The app-server streams JSON-RPC notifications while a turn is running. Each turn starts with `turn/started` (initial `turn`) and ends with `turn/completed` (final `turn` plus token `usage`), and clients subscribe to the events they care about, rendering each item incrementally as updates arrive. The per-item lifecycle is always: `item/started` → zero or more item-specific deltas → `item/completed`.
|
||||
|
||||
- `turn/started` — `{ turn }` with the turn id, empty `items`, and `status: "inProgress"`.
|
||||
- `turn/completed` — `{ turn }` where `turn.status` is `completed`, `interrupted`, or `failed`; failures carry `{ error: { message, codexErrorInfo? } }`.
|
||||
|
||||
Today both notifications carry an empty `items` array even when item events were streamed; rely on `item/*` notifications for the canonical item list until this is fixed.
|
||||
|
||||
#### Errors
|
||||
`error` event is emitted whenever the server hits an error mid-turn (for example, upstream model errors or quota limits). Carries the same `{ error: { message, codexErrorInfo? } }` payload as `turn.status: "failed"` and may precede that terminal notification.
|
||||
|
||||
`codexErrorInfo` maps to the `CodexErrorInfo` enum. Common values:
|
||||
- `ContextWindowExceeded`
|
||||
- `UsageLimitExceeded`
|
||||
- `HttpConnectionFailed { httpStatusCode? }`: upstream HTTP failures including 4xx/5xx
|
||||
- `ResponseStreamConnectionFailed { httpStatusCode? }`: failure to connect to the response SSE stream
|
||||
- `ResponseStreamDisconnected { httpStatusCode? }`: disconnect of the response SSE stream in the middle of a turn before completion
|
||||
- `ResponseTooManyFailedAttempts { httpStatusCode? }`
|
||||
- `BadRequest`
|
||||
- `Unauthorized`
|
||||
- `SandboxError`
|
||||
- `InternalServerError`
|
||||
- `Other`: all unclassified errors
|
||||
|
||||
When an upstream HTTP status is available (for example, from the Responses API or a provider), it is forwarded in `httpStatusCode` on the relevant `codexErrorInfo` variant.
|
||||
|
||||
#### Thread items
|
||||
|
||||
`ThreadItem` is the tagged union carried in turn responses and `item/*` notifications. Currently we support events for the following items:
|
||||
|
||||
@@ -8,26 +8,19 @@ use codex_app_server_protocol::AgentMessageDeltaNotification;
|
||||
use codex_app_server_protocol::ApplyPatchApprovalParams;
|
||||
use codex_app_server_protocol::ApplyPatchApprovalResponse;
|
||||
use codex_app_server_protocol::ApprovalDecision;
|
||||
use codex_app_server_protocol::CodexErrorInfo as V2CodexErrorInfo;
|
||||
use codex_app_server_protocol::CommandAction as V2ParsedCommand;
|
||||
use codex_app_server_protocol::CommandExecutionOutputDeltaNotification;
|
||||
use codex_app_server_protocol::CommandExecutionRequestApprovalParams;
|
||||
use codex_app_server_protocol::CommandExecutionRequestApprovalResponse;
|
||||
use codex_app_server_protocol::CommandExecutionStatus;
|
||||
use codex_app_server_protocol::ErrorNotification;
|
||||
use codex_app_server_protocol::ExecCommandApprovalParams;
|
||||
use codex_app_server_protocol::ExecCommandApprovalResponse;
|
||||
use codex_app_server_protocol::FileChangeRequestApprovalParams;
|
||||
use codex_app_server_protocol::FileChangeRequestApprovalResponse;
|
||||
use codex_app_server_protocol::FileUpdateChange;
|
||||
use codex_app_server_protocol::InterruptConversationResponse;
|
||||
use codex_app_server_protocol::ItemCompletedNotification;
|
||||
use codex_app_server_protocol::ItemStartedNotification;
|
||||
use codex_app_server_protocol::McpToolCallError;
|
||||
use codex_app_server_protocol::McpToolCallResult;
|
||||
use codex_app_server_protocol::McpToolCallStatus;
|
||||
use codex_app_server_protocol::PatchApplyStatus;
|
||||
use codex_app_server_protocol::PatchChangeKind as V2PatchChangeKind;
|
||||
use codex_app_server_protocol::ReasoningSummaryPartAddedNotification;
|
||||
use codex_app_server_protocol::ReasoningSummaryTextDeltaNotification;
|
||||
use codex_app_server_protocol::ReasoningTextDeltaNotification;
|
||||
@@ -47,7 +40,6 @@ use codex_core::protocol::Event;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::ExecApprovalRequestEvent;
|
||||
use codex_core::protocol::ExecCommandEndEvent;
|
||||
use codex_core::protocol::FileChange as CoreFileChange;
|
||||
use codex_core::protocol::McpToolCallBeginEvent;
|
||||
use codex_core::protocol::McpToolCallEndEvent;
|
||||
use codex_core::protocol::Op;
|
||||
@@ -55,9 +47,7 @@ use codex_core::protocol::ReviewDecision;
|
||||
use codex_core::review_format::format_review_findings_block;
|
||||
use codex_protocol::ConversationId;
|
||||
use codex_protocol::protocol::ReviewOutputEvent;
|
||||
use std::collections::HashMap;
|
||||
use std::convert::TryFrom;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::oneshot;
|
||||
use tracing::error;
|
||||
@@ -80,74 +70,24 @@ pub(crate) async fn apply_bespoke_event_handling(
|
||||
}
|
||||
EventMsg::ApplyPatchApprovalRequest(ApplyPatchApprovalRequestEvent {
|
||||
call_id,
|
||||
turn_id,
|
||||
changes,
|
||||
reason,
|
||||
grant_root,
|
||||
}) => match api_version {
|
||||
ApiVersion::V1 => {
|
||||
let params = ApplyPatchApprovalParams {
|
||||
conversation_id,
|
||||
call_id,
|
||||
file_changes: changes.clone(),
|
||||
reason,
|
||||
grant_root,
|
||||
};
|
||||
let rx = outgoing
|
||||
.send_request(ServerRequestPayload::ApplyPatchApproval(params))
|
||||
.await;
|
||||
tokio::spawn(async move {
|
||||
on_patch_approval_response(event_id, rx, conversation).await;
|
||||
});
|
||||
}
|
||||
ApiVersion::V2 => {
|
||||
// Until we migrate the core to be aware of a first class FileChangeItem
|
||||
// and emit the corresponding EventMsg, we repurpose the call_id as the item_id.
|
||||
let item_id = call_id.clone();
|
||||
let patch_changes = convert_patch_changes(&changes);
|
||||
|
||||
let first_start = {
|
||||
let mut map = turn_summary_store.lock().await;
|
||||
let summary = map.entry(conversation_id).or_default();
|
||||
summary.file_change_started.insert(item_id.clone())
|
||||
};
|
||||
if first_start {
|
||||
let item = ThreadItem::FileChange {
|
||||
id: item_id.clone(),
|
||||
changes: patch_changes.clone(),
|
||||
status: PatchApplyStatus::InProgress,
|
||||
};
|
||||
let notification = ItemStartedNotification { item };
|
||||
outgoing
|
||||
.send_server_notification(ServerNotification::ItemStarted(notification))
|
||||
.await;
|
||||
}
|
||||
|
||||
let params = FileChangeRequestApprovalParams {
|
||||
thread_id: conversation_id.to_string(),
|
||||
turn_id: turn_id.clone(),
|
||||
item_id: item_id.clone(),
|
||||
reason,
|
||||
grant_root,
|
||||
};
|
||||
let rx = outgoing
|
||||
.send_request(ServerRequestPayload::FileChangeRequestApproval(params))
|
||||
.await;
|
||||
tokio::spawn(async move {
|
||||
on_file_change_request_approval_response(
|
||||
event_id,
|
||||
conversation_id,
|
||||
item_id,
|
||||
patch_changes,
|
||||
rx,
|
||||
conversation,
|
||||
outgoing,
|
||||
turn_summary_store,
|
||||
)
|
||||
.await;
|
||||
});
|
||||
}
|
||||
},
|
||||
}) => {
|
||||
let params = ApplyPatchApprovalParams {
|
||||
conversation_id,
|
||||
call_id,
|
||||
file_changes: changes,
|
||||
reason,
|
||||
grant_root,
|
||||
};
|
||||
let rx = outgoing
|
||||
.send_request(ServerRequestPayload::ApplyPatchApproval(params))
|
||||
.await;
|
||||
tokio::spawn(async move {
|
||||
on_patch_approval_response(event_id, rx, conversation).await;
|
||||
});
|
||||
}
|
||||
EventMsg::ExecApprovalRequest(ExecApprovalRequestEvent {
|
||||
call_id,
|
||||
turn_id,
|
||||
@@ -262,29 +202,7 @@ pub(crate) async fn apply_bespoke_event_handling(
|
||||
}
|
||||
}
|
||||
EventMsg::Error(ev) => {
|
||||
let turn_error = TurnError {
|
||||
message: ev.message,
|
||||
codex_error_info: ev.codex_error_info.map(V2CodexErrorInfo::from),
|
||||
};
|
||||
handle_error(conversation_id, turn_error.clone(), &turn_summary_store).await;
|
||||
outgoing
|
||||
.send_server_notification(ServerNotification::Error(ErrorNotification {
|
||||
error: turn_error,
|
||||
}))
|
||||
.await;
|
||||
}
|
||||
EventMsg::StreamError(ev) => {
|
||||
// We don't need to update the turn summary store for stream errors as they are intermediate error states for retries,
|
||||
// but we notify the client.
|
||||
let turn_error = TurnError {
|
||||
message: ev.message,
|
||||
codex_error_info: ev.codex_error_info.map(V2CodexErrorInfo::from),
|
||||
};
|
||||
outgoing
|
||||
.send_server_notification(ServerNotification::Error(ErrorNotification {
|
||||
error: turn_error,
|
||||
}))
|
||||
.await;
|
||||
handle_error(conversation_id, ev.message, &turn_summary_store).await;
|
||||
}
|
||||
EventMsg::EnteredReviewMode(review_request) => {
|
||||
let notification = ItemStartedNotification {
|
||||
@@ -326,49 +244,6 @@ pub(crate) async fn apply_bespoke_event_handling(
|
||||
.send_server_notification(ServerNotification::ItemCompleted(notification))
|
||||
.await;
|
||||
}
|
||||
EventMsg::PatchApplyBegin(patch_begin_event) => {
|
||||
// Until we migrate the core to be aware of a first class FileChangeItem
|
||||
// and emit the corresponding EventMsg, we repurpose the call_id as the item_id.
|
||||
let item_id = patch_begin_event.call_id.clone();
|
||||
|
||||
let first_start = {
|
||||
let mut map = turn_summary_store.lock().await;
|
||||
let summary = map.entry(conversation_id).or_default();
|
||||
summary.file_change_started.insert(item_id.clone())
|
||||
};
|
||||
if first_start {
|
||||
let item = ThreadItem::FileChange {
|
||||
id: item_id.clone(),
|
||||
changes: convert_patch_changes(&patch_begin_event.changes),
|
||||
status: PatchApplyStatus::InProgress,
|
||||
};
|
||||
let notification = ItemStartedNotification { item };
|
||||
outgoing
|
||||
.send_server_notification(ServerNotification::ItemStarted(notification))
|
||||
.await;
|
||||
}
|
||||
}
|
||||
EventMsg::PatchApplyEnd(patch_end_event) => {
|
||||
// Until we migrate the core to be aware of a first class FileChangeItem
|
||||
// and emit the corresponding EventMsg, we repurpose the call_id as the item_id.
|
||||
let item_id = patch_end_event.call_id.clone();
|
||||
|
||||
let status = if patch_end_event.success {
|
||||
PatchApplyStatus::Completed
|
||||
} else {
|
||||
PatchApplyStatus::Failed
|
||||
};
|
||||
let changes = convert_patch_changes(&patch_end_event.changes);
|
||||
complete_file_change_item(
|
||||
conversation_id,
|
||||
item_id,
|
||||
changes,
|
||||
status,
|
||||
outgoing.as_ref(),
|
||||
&turn_summary_store,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
EventMsg::ExecCommandBegin(exec_command_begin_event) => {
|
||||
let item = ThreadItem::CommandExecution {
|
||||
id: exec_command_begin_event.call_id.clone(),
|
||||
@@ -490,32 +365,6 @@ async fn emit_turn_completed_with_status(
|
||||
.await;
|
||||
}
|
||||
|
||||
async fn complete_file_change_item(
|
||||
conversation_id: ConversationId,
|
||||
item_id: String,
|
||||
changes: Vec<FileUpdateChange>,
|
||||
status: PatchApplyStatus,
|
||||
outgoing: &OutgoingMessageSender,
|
||||
turn_summary_store: &TurnSummaryStore,
|
||||
) {
|
||||
{
|
||||
let mut map = turn_summary_store.lock().await;
|
||||
if let Some(summary) = map.get_mut(&conversation_id) {
|
||||
summary.file_change_started.remove(&item_id);
|
||||
}
|
||||
}
|
||||
|
||||
let item = ThreadItem::FileChange {
|
||||
id: item_id,
|
||||
changes,
|
||||
status,
|
||||
};
|
||||
let notification = ItemCompletedNotification { item };
|
||||
outgoing
|
||||
.send_server_notification(ServerNotification::ItemCompleted(notification))
|
||||
.await;
|
||||
}
|
||||
|
||||
async fn find_and_remove_turn_summary(
|
||||
conversation_id: ConversationId,
|
||||
turn_summary_store: &TurnSummaryStore,
|
||||
@@ -532,8 +381,10 @@ async fn handle_turn_complete(
|
||||
) {
|
||||
let turn_summary = find_and_remove_turn_summary(conversation_id, turn_summary_store).await;
|
||||
|
||||
let status = if let Some(error) = turn_summary.last_error {
|
||||
TurnStatus::Failed { error }
|
||||
let status = if let Some(message) = turn_summary.last_error_message {
|
||||
TurnStatus::Failed {
|
||||
error: TurnError { message },
|
||||
}
|
||||
} else {
|
||||
TurnStatus::Completed
|
||||
};
|
||||
@@ -554,11 +405,11 @@ async fn handle_turn_interrupted(
|
||||
|
||||
async fn handle_error(
|
||||
conversation_id: ConversationId,
|
||||
error: TurnError,
|
||||
message: String,
|
||||
turn_summary_store: &TurnSummaryStore,
|
||||
) {
|
||||
let mut map = turn_summary_store.lock().await;
|
||||
map.entry(conversation_id).or_default().last_error = Some(error);
|
||||
map.entry(conversation_id).or_default().last_error_message = Some(message);
|
||||
}
|
||||
|
||||
async fn on_patch_approval_response(
|
||||
@@ -661,110 +512,6 @@ fn render_review_output_text(output: &ReviewOutputEvent) -> String {
|
||||
}
|
||||
}
|
||||
|
||||
fn convert_patch_changes(changes: &HashMap<PathBuf, CoreFileChange>) -> Vec<FileUpdateChange> {
|
||||
let mut converted: Vec<FileUpdateChange> = changes
|
||||
.iter()
|
||||
.map(|(path, change)| FileUpdateChange {
|
||||
path: path.to_string_lossy().into_owned(),
|
||||
kind: map_patch_change_kind(change),
|
||||
diff: format_file_change_diff(change),
|
||||
})
|
||||
.collect();
|
||||
converted.sort_by(|a, b| a.path.cmp(&b.path));
|
||||
converted
|
||||
}
|
||||
|
||||
fn map_patch_change_kind(change: &CoreFileChange) -> V2PatchChangeKind {
|
||||
match change {
|
||||
CoreFileChange::Add { .. } => V2PatchChangeKind::Add,
|
||||
CoreFileChange::Delete { .. } => V2PatchChangeKind::Delete,
|
||||
CoreFileChange::Update { move_path, .. } => V2PatchChangeKind::Update {
|
||||
move_path: move_path.clone(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn format_file_change_diff(change: &CoreFileChange) -> String {
|
||||
match change {
|
||||
CoreFileChange::Add { content } => content.clone(),
|
||||
CoreFileChange::Delete { content } => content.clone(),
|
||||
CoreFileChange::Update {
|
||||
unified_diff,
|
||||
move_path,
|
||||
} => {
|
||||
if let Some(path) = move_path {
|
||||
format!("{unified_diff}\n\nMoved to: {}", path.display())
|
||||
} else {
|
||||
unified_diff.clone()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn on_file_change_request_approval_response(
|
||||
event_id: String,
|
||||
conversation_id: ConversationId,
|
||||
item_id: String,
|
||||
changes: Vec<FileUpdateChange>,
|
||||
receiver: oneshot::Receiver<JsonValue>,
|
||||
codex: Arc<CodexConversation>,
|
||||
outgoing: Arc<OutgoingMessageSender>,
|
||||
turn_summary_store: TurnSummaryStore,
|
||||
) {
|
||||
let response = receiver.await;
|
||||
let (decision, completion_status) = match response {
|
||||
Ok(value) => {
|
||||
let response = serde_json::from_value::<FileChangeRequestApprovalResponse>(value)
|
||||
.unwrap_or_else(|err| {
|
||||
error!("failed to deserialize FileChangeRequestApprovalResponse: {err}");
|
||||
FileChangeRequestApprovalResponse {
|
||||
decision: ApprovalDecision::Decline,
|
||||
}
|
||||
});
|
||||
|
||||
let (decision, completion_status) = match response.decision {
|
||||
ApprovalDecision::Accept => (ReviewDecision::Approved, None),
|
||||
ApprovalDecision::Decline => {
|
||||
(ReviewDecision::Denied, Some(PatchApplyStatus::Declined))
|
||||
}
|
||||
ApprovalDecision::Cancel => {
|
||||
(ReviewDecision::Abort, Some(PatchApplyStatus::Declined))
|
||||
}
|
||||
};
|
||||
// Allow EventMsg::PatchApplyEnd to emit ItemCompleted for accepted patches.
|
||||
// Only short-circuit on declines/cancels/failures.
|
||||
(decision, completion_status)
|
||||
}
|
||||
Err(err) => {
|
||||
error!("request failed: {err:?}");
|
||||
(ReviewDecision::Denied, Some(PatchApplyStatus::Failed))
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(status) = completion_status {
|
||||
complete_file_change_item(
|
||||
conversation_id,
|
||||
item_id,
|
||||
changes,
|
||||
status,
|
||||
outgoing.as_ref(),
|
||||
&turn_summary_store,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
if let Err(err) = codex
|
||||
.submit(Op::PatchApproval {
|
||||
id: event_id,
|
||||
decision,
|
||||
})
|
||||
.await
|
||||
{
|
||||
error!("failed to submit PatchApproval: {err}");
|
||||
}
|
||||
}
|
||||
|
||||
async fn on_command_execution_request_approval_response(
|
||||
event_id: String,
|
||||
receiver: oneshot::Receiver<JsonValue>,
|
||||
@@ -895,24 +642,10 @@ mod tests {
|
||||
let conversation_id = ConversationId::new();
|
||||
let turn_summary_store = new_turn_summary_store();
|
||||
|
||||
handle_error(
|
||||
conversation_id,
|
||||
TurnError {
|
||||
message: "boom".to_string(),
|
||||
codex_error_info: Some(V2CodexErrorInfo::InternalServerError),
|
||||
},
|
||||
&turn_summary_store,
|
||||
)
|
||||
.await;
|
||||
handle_error(conversation_id, "boom".to_string(), &turn_summary_store).await;
|
||||
|
||||
let turn_summary = find_and_remove_turn_summary(conversation_id, &turn_summary_store).await;
|
||||
assert_eq!(
|
||||
turn_summary.last_error,
|
||||
Some(TurnError {
|
||||
message: "boom".to_string(),
|
||||
codex_error_info: Some(V2CodexErrorInfo::InternalServerError),
|
||||
})
|
||||
);
|
||||
assert_eq!(turn_summary.last_error_message, Some("boom".to_string()));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -952,15 +685,7 @@ mod tests {
|
||||
let conversation_id = ConversationId::new();
|
||||
let event_id = "interrupt1".to_string();
|
||||
let turn_summary_store = new_turn_summary_store();
|
||||
handle_error(
|
||||
conversation_id,
|
||||
TurnError {
|
||||
message: "oops".to_string(),
|
||||
codex_error_info: None,
|
||||
},
|
||||
&turn_summary_store,
|
||||
)
|
||||
.await;
|
||||
handle_error(conversation_id, "oops".to_string(), &turn_summary_store).await;
|
||||
let (tx, mut rx) = mpsc::channel(CHANNEL_CAPACITY);
|
||||
let outgoing = Arc::new(OutgoingMessageSender::new(tx));
|
||||
|
||||
@@ -992,15 +717,7 @@ mod tests {
|
||||
let conversation_id = ConversationId::new();
|
||||
let event_id = "complete_err1".to_string();
|
||||
let turn_summary_store = new_turn_summary_store();
|
||||
handle_error(
|
||||
conversation_id,
|
||||
TurnError {
|
||||
message: "bad".to_string(),
|
||||
codex_error_info: Some(V2CodexErrorInfo::Other),
|
||||
},
|
||||
&turn_summary_store,
|
||||
)
|
||||
.await;
|
||||
handle_error(conversation_id, "bad".to_string(), &turn_summary_store).await;
|
||||
let (tx, mut rx) = mpsc::channel(CHANNEL_CAPACITY);
|
||||
let outgoing = Arc::new(OutgoingMessageSender::new(tx));
|
||||
|
||||
@@ -1024,7 +741,6 @@ mod tests {
|
||||
TurnStatus::Failed {
|
||||
error: TurnError {
|
||||
message: "bad".to_string(),
|
||||
codex_error_info: Some(V2CodexErrorInfo::Other),
|
||||
}
|
||||
}
|
||||
);
|
||||
@@ -1075,15 +791,7 @@ mod tests {
|
||||
|
||||
// Turn 1 on conversation A
|
||||
let a_turn1 = "a_turn1".to_string();
|
||||
handle_error(
|
||||
conversation_a,
|
||||
TurnError {
|
||||
message: "a1".to_string(),
|
||||
codex_error_info: Some(V2CodexErrorInfo::BadRequest),
|
||||
},
|
||||
&turn_summary_store,
|
||||
)
|
||||
.await;
|
||||
handle_error(conversation_a, "a1".to_string(), &turn_summary_store).await;
|
||||
handle_turn_complete(
|
||||
conversation_a,
|
||||
a_turn1.clone(),
|
||||
@@ -1094,15 +802,7 @@ mod tests {
|
||||
|
||||
// Turn 1 on conversation B
|
||||
let b_turn1 = "b_turn1".to_string();
|
||||
handle_error(
|
||||
conversation_b,
|
||||
TurnError {
|
||||
message: "b1".to_string(),
|
||||
codex_error_info: None,
|
||||
},
|
||||
&turn_summary_store,
|
||||
)
|
||||
.await;
|
||||
handle_error(conversation_b, "b1".to_string(), &turn_summary_store).await;
|
||||
handle_turn_complete(
|
||||
conversation_b,
|
||||
b_turn1.clone(),
|
||||
@@ -1134,7 +834,6 @@ mod tests {
|
||||
TurnStatus::Failed {
|
||||
error: TurnError {
|
||||
message: "a1".to_string(),
|
||||
codex_error_info: Some(V2CodexErrorInfo::BadRequest),
|
||||
}
|
||||
}
|
||||
);
|
||||
@@ -1155,7 +854,6 @@ mod tests {
|
||||
TurnStatus::Failed {
|
||||
error: TurnError {
|
||||
message: "b1".to_string(),
|
||||
codex_error_info: None,
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
@@ -83,7 +83,6 @@ use codex_app_server_protocol::ThreadStartParams;
|
||||
use codex_app_server_protocol::ThreadStartResponse;
|
||||
use codex_app_server_protocol::ThreadStartedNotification;
|
||||
use codex_app_server_protocol::Turn;
|
||||
use codex_app_server_protocol::TurnError;
|
||||
use codex_app_server_protocol::TurnInterruptParams;
|
||||
use codex_app_server_protocol::TurnStartParams;
|
||||
use codex_app_server_protocol::TurnStartResponse;
|
||||
@@ -92,7 +91,6 @@ use codex_app_server_protocol::TurnStatus;
|
||||
use codex_app_server_protocol::UserInfoResponse;
|
||||
use codex_app_server_protocol::UserInput as V2UserInput;
|
||||
use codex_app_server_protocol::UserSavedConfig;
|
||||
use codex_app_server_protocol::build_turns_from_event_msgs;
|
||||
use codex_backend_client::Client as BackendClient;
|
||||
use codex_core::AuthManager;
|
||||
use codex_core::CodexConversation;
|
||||
@@ -139,7 +137,6 @@ use codex_protocol::protocol::USER_MESSAGE_BEGIN;
|
||||
use codex_protocol::user_input::UserInput as CoreInputItem;
|
||||
use codex_utils_json_to_toml::json_to_toml;
|
||||
use std::collections::HashMap;
|
||||
use std::collections::HashSet;
|
||||
use std::ffi::OsStr;
|
||||
use std::io::Error as IoError;
|
||||
use std::path::Path;
|
||||
@@ -162,8 +159,7 @@ pub(crate) type PendingInterrupts = Arc<Mutex<HashMap<ConversationId, PendingInt
|
||||
/// Per-conversation accumulation of the latest states e.g. error message while a turn runs.
|
||||
#[derive(Default, Clone)]
|
||||
pub(crate) struct TurnSummary {
|
||||
pub(crate) file_change_started: HashSet<String>,
|
||||
pub(crate) last_error: Option<TurnError>,
|
||||
pub(crate) last_error_message: Option<String>,
|
||||
}
|
||||
|
||||
pub(crate) type TurnSummaryStore = Arc<Mutex<HashMap<ConversationId, TurnSummary>>>;
|
||||
@@ -1170,13 +1166,11 @@ impl CodexMessageProcessor {
|
||||
let exec_params = ExecParams {
|
||||
command: params.command,
|
||||
cwd,
|
||||
expiration: timeout_ms.into(),
|
||||
timeout_ms,
|
||||
env,
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
arg0: None,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
};
|
||||
|
||||
let effective_policy = params
|
||||
@@ -1244,7 +1238,7 @@ impl CodexMessageProcessor {
|
||||
let overrides = ConfigOverrides {
|
||||
model,
|
||||
config_profile: profile,
|
||||
cwd: cwd.clone().map(PathBuf::from),
|
||||
cwd: cwd.map(PathBuf::from),
|
||||
approval_policy,
|
||||
sandbox_mode,
|
||||
model_provider,
|
||||
@@ -1259,7 +1253,7 @@ impl CodexMessageProcessor {
|
||||
// Persist windows sandbox feature.
|
||||
// TODO: persist default config in general.
|
||||
let mut cli_overrides = cli_overrides.unwrap_or_default();
|
||||
if cfg!(windows) && self.config.features.enabled(Feature::WindowsSandbox) {
|
||||
if cfg!(target_os = "windows") && self.config.features.enabled(Feature::WindowsSandbox) {
|
||||
cli_overrides.insert(
|
||||
"features.enable_experimental_windows_sandbox".to_string(),
|
||||
serde_json::json!(true),
|
||||
@@ -1658,11 +1652,6 @@ impl CodexMessageProcessor {
|
||||
session_configured,
|
||||
..
|
||||
}) => {
|
||||
let SessionConfiguredEvent {
|
||||
rollout_path,
|
||||
initial_messages,
|
||||
..
|
||||
} = session_configured;
|
||||
// Auto-attach a conversation listener when resuming a thread.
|
||||
if let Err(err) = self
|
||||
.attach_conversation_listener(conversation_id, false, ApiVersion::V2)
|
||||
@@ -1675,8 +1664,8 @@ impl CodexMessageProcessor {
|
||||
);
|
||||
}
|
||||
|
||||
let mut thread = match read_summary_from_rollout(
|
||||
rollout_path.as_path(),
|
||||
let thread = match read_summary_from_rollout(
|
||||
session_configured.rollout_path.as_path(),
|
||||
fallback_model_provider.as_str(),
|
||||
)
|
||||
.await
|
||||
@@ -1687,17 +1676,13 @@ impl CodexMessageProcessor {
|
||||
request_id,
|
||||
format!(
|
||||
"failed to load rollout `{}` for conversation {conversation_id}: {err}",
|
||||
rollout_path.display()
|
||||
session_configured.rollout_path.display()
|
||||
),
|
||||
)
|
||||
.await;
|
||||
return;
|
||||
}
|
||||
};
|
||||
thread.turns = initial_messages
|
||||
.as_deref()
|
||||
.map_or_else(Vec::new, build_turns_from_event_msgs);
|
||||
|
||||
let response = ThreadResumeResponse {
|
||||
thread,
|
||||
model: session_configured.model,
|
||||
@@ -1707,7 +1692,6 @@ impl CodexMessageProcessor {
|
||||
sandbox: session_configured.sandbox_policy.into(),
|
||||
reasoning_effort: session_configured.reasoning_effort,
|
||||
};
|
||||
|
||||
self.outgoing.send_response(request_id, response).await;
|
||||
}
|
||||
Err(err) => {
|
||||
@@ -1958,15 +1942,6 @@ impl CodexMessageProcessor {
|
||||
include_apply_patch_tool,
|
||||
} = overrides;
|
||||
|
||||
// Persist windows sandbox feature.
|
||||
let mut cli_overrides = cli_overrides.unwrap_or_default();
|
||||
if cfg!(windows) && self.config.features.enabled(Feature::WindowsSandbox) {
|
||||
cli_overrides.insert(
|
||||
"features.enable_experimental_windows_sandbox".to_string(),
|
||||
serde_json::json!(true),
|
||||
);
|
||||
}
|
||||
|
||||
let overrides = ConfigOverrides {
|
||||
model,
|
||||
config_profile: profile,
|
||||
@@ -1982,7 +1957,7 @@ impl CodexMessageProcessor {
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
derive_config_from_params(overrides, Some(cli_overrides)).await
|
||||
derive_config_from_params(overrides, cli_overrides).await
|
||||
}
|
||||
None => Ok(self.config.as_ref().clone()),
|
||||
};
|
||||
@@ -3008,7 +2983,6 @@ fn summary_to_thread(summary: ConversationSummary) -> Thread {
|
||||
model_provider,
|
||||
created_at: created_at.map(|dt| dt.timestamp()).unwrap_or(0),
|
||||
path,
|
||||
turns: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -11,11 +11,14 @@ use codex_app_server_protocol::JSONRPCErrorError;
|
||||
use codex_app_server_protocol::JSONRPCNotification;
|
||||
use codex_app_server_protocol::JSONRPCRequest;
|
||||
use codex_app_server_protocol::JSONRPCResponse;
|
||||
use codex_app_server_protocol::ServerNotification;
|
||||
use codex_app_server_protocol::WindowsWorldWritableWarningNotification;
|
||||
use codex_core::AuthManager;
|
||||
use codex_core::ConversationManager;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::default_client::USER_AGENT_SUFFIX;
|
||||
use codex_core::default_client::get_codex_user_agent;
|
||||
use codex_core::features::Feature;
|
||||
use codex_feedback::CodexFeedback;
|
||||
use codex_protocol::protocol::SessionSource;
|
||||
use std::sync::Arc;
|
||||
@@ -23,6 +26,7 @@ use std::sync::Arc;
|
||||
pub(crate) struct MessageProcessor {
|
||||
outgoing: Arc<OutgoingMessageSender>,
|
||||
codex_message_processor: CodexMessageProcessor,
|
||||
config: Arc<Config>,
|
||||
initialized: bool,
|
||||
}
|
||||
|
||||
@@ -50,13 +54,14 @@ impl MessageProcessor {
|
||||
conversation_manager,
|
||||
outgoing.clone(),
|
||||
codex_linux_sandbox_exe,
|
||||
config,
|
||||
config.clone(),
|
||||
feedback,
|
||||
);
|
||||
|
||||
Self {
|
||||
outgoing,
|
||||
codex_message_processor,
|
||||
config,
|
||||
initialized: false,
|
||||
}
|
||||
}
|
||||
@@ -117,6 +122,7 @@ impl MessageProcessor {
|
||||
self.outgoing.send_response(request_id, response).await;
|
||||
|
||||
self.initialized = true;
|
||||
self.handle_windows_world_writable_warning().await;
|
||||
|
||||
return;
|
||||
}
|
||||
@@ -156,4 +162,47 @@ impl MessageProcessor {
|
||||
pub(crate) fn process_error(&mut self, err: JSONRPCError) {
|
||||
tracing::error!("<- error: {:?}", err);
|
||||
}
|
||||
|
||||
/// On Windows, when using the experimental sandbox, we need to warn the user about world-writable directories.
|
||||
async fn handle_windows_world_writable_warning(&self) {
|
||||
if !cfg!(windows) {
|
||||
return;
|
||||
}
|
||||
|
||||
if !self.config.features.enabled(Feature::WindowsSandbox) {
|
||||
return;
|
||||
}
|
||||
|
||||
if !matches!(
|
||||
self.config.sandbox_policy,
|
||||
codex_protocol::protocol::SandboxPolicy::WorkspaceWrite { .. }
|
||||
| codex_protocol::protocol::SandboxPolicy::ReadOnly
|
||||
) {
|
||||
return;
|
||||
}
|
||||
|
||||
if self
|
||||
.config
|
||||
.notices
|
||||
.hide_world_writable_warning
|
||||
.unwrap_or(false)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// This function is stubbed out to return None on non-Windows platforms
|
||||
if let Some((sample_paths, extra_count, failed_scan)) =
|
||||
codex_windows_sandbox::world_writable_warning_details(self.config.codex_home.as_path())
|
||||
{
|
||||
self.outgoing
|
||||
.send_server_notification(ServerNotification::WindowsWorldWritableWarning(
|
||||
WindowsWorldWritableWarningNotification {
|
||||
sample_paths,
|
||||
extra_count,
|
||||
failed_scan,
|
||||
},
|
||||
))
|
||||
.await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -229,7 +229,6 @@ mod tests {
|
||||
resets_at: Some(123),
|
||||
}),
|
||||
secondary: None,
|
||||
credits: None,
|
||||
},
|
||||
});
|
||||
|
||||
@@ -244,8 +243,7 @@ mod tests {
|
||||
"windowDurationMins": 15,
|
||||
"resetsAt": 123
|
||||
},
|
||||
"secondary": null,
|
||||
"credits": null
|
||||
"secondary": null
|
||||
}
|
||||
},
|
||||
}),
|
||||
|
||||
@@ -27,7 +27,7 @@ fn create_config_toml(codex_home: &Path) -> std::io::Result<()> {
|
||||
std::fs::write(
|
||||
config_toml,
|
||||
r#"
|
||||
model = "gpt-5.1-codex-max"
|
||||
model = "arcticfox"
|
||||
approval_policy = "on-request"
|
||||
sandbox_mode = "workspace-write"
|
||||
model_reasoning_summary = "detailed"
|
||||
@@ -87,7 +87,7 @@ async fn get_config_toml_parses_all_fields() -> Result<()> {
|
||||
}),
|
||||
forced_chatgpt_workspace_id: Some("12345678-0000-0000-0000-000000000000".into()),
|
||||
forced_login_method: Some(ForcedLoginMethod::Chatgpt),
|
||||
model: Some("gpt-5.1-codex-max".into()),
|
||||
model: Some("arcticfox".into()),
|
||||
model_reasoning_effort: Some(ReasoningEffort::High),
|
||||
model_reasoning_summary: Some(ReasoningSummary::Detailed),
|
||||
model_verbosity: Some(Verbosity::Medium),
|
||||
|
||||
@@ -57,7 +57,7 @@ fn create_config_toml(codex_home: &Path) -> std::io::Result<()> {
|
||||
std::fs::write(
|
||||
config_toml,
|
||||
r#"
|
||||
model = "gpt-5.1-codex-max"
|
||||
model = "arcticfox"
|
||||
model_reasoning_effort = "medium"
|
||||
"#,
|
||||
)
|
||||
|
||||
@@ -46,9 +46,9 @@ async fn list_models_returns_all_models_with_large_limit() -> Result<()> {
|
||||
|
||||
let expected_models = vec![
|
||||
Model {
|
||||
id: "gpt-5.1-codex-max".to_string(),
|
||||
model: "gpt-5.1-codex-max".to_string(),
|
||||
display_name: "gpt-5.1-codex-max".to_string(),
|
||||
id: "arcticfox".to_string(),
|
||||
model: "arcticfox".to_string(),
|
||||
display_name: "arcticfox".to_string(),
|
||||
description: "Latest Codex-optimized flagship for deep and fast reasoning.".to_string(),
|
||||
supported_reasoning_efforts: vec![
|
||||
ReasoningEffortOption {
|
||||
@@ -174,7 +174,7 @@ async fn list_models_pagination_works() -> Result<()> {
|
||||
} = to_response::<ModelListResponse>(first_response)?;
|
||||
|
||||
assert_eq!(first_items.len(), 1);
|
||||
assert_eq!(first_items[0].id, "gpt-5.1-codex-max");
|
||||
assert_eq!(first_items[0].id, "arcticfox");
|
||||
let next_cursor = first_cursor.ok_or_else(|| anyhow!("cursor for second page"))?;
|
||||
|
||||
let second_request = mcp
|
||||
|
||||
@@ -152,7 +152,6 @@ async fn get_account_rate_limits_returns_snapshot() -> Result<()> {
|
||||
window_duration_mins: Some(1440),
|
||||
resets_at: Some(secondary_reset_timestamp),
|
||||
}),
|
||||
credits: None,
|
||||
},
|
||||
};
|
||||
assert_eq!(received, expected);
|
||||
|
||||
@@ -1,17 +1,13 @@
|
||||
use anyhow::Result;
|
||||
use app_test_support::McpProcess;
|
||||
use app_test_support::create_fake_rollout;
|
||||
use app_test_support::create_mock_chat_completions_server;
|
||||
use app_test_support::to_response;
|
||||
use codex_app_server_protocol::JSONRPCResponse;
|
||||
use codex_app_server_protocol::RequestId;
|
||||
use codex_app_server_protocol::ThreadItem;
|
||||
use codex_app_server_protocol::ThreadResumeParams;
|
||||
use codex_app_server_protocol::ThreadResumeResponse;
|
||||
use codex_app_server_protocol::ThreadStartParams;
|
||||
use codex_app_server_protocol::ThreadStartResponse;
|
||||
use codex_app_server_protocol::TurnStatus;
|
||||
use codex_app_server_protocol::UserInput;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use tempfile::TempDir;
|
||||
@@ -31,7 +27,7 @@ async fn thread_resume_returns_original_thread() -> Result<()> {
|
||||
// Start a thread.
|
||||
let start_id = mcp
|
||||
.send_thread_start_request(ThreadStartParams {
|
||||
model: Some("gpt-5.1-codex-max".to_string()),
|
||||
model: Some("arcticfox".to_string()),
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
@@ -62,65 +58,6 @@ async fn thread_resume_returns_original_thread() -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn thread_resume_returns_rollout_history() -> Result<()> {
|
||||
let server = create_mock_chat_completions_server(vec![]).await;
|
||||
let codex_home = TempDir::new()?;
|
||||
create_config_toml(codex_home.path(), &server.uri())?;
|
||||
|
||||
let preview = "Saved user message";
|
||||
let conversation_id = create_fake_rollout(
|
||||
codex_home.path(),
|
||||
"2025-01-05T12-00-00",
|
||||
"2025-01-05T12:00:00Z",
|
||||
preview,
|
||||
Some("mock_provider"),
|
||||
)?;
|
||||
|
||||
let mut mcp = McpProcess::new(codex_home.path()).await?;
|
||||
timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
|
||||
|
||||
let resume_id = mcp
|
||||
.send_thread_resume_request(ThreadResumeParams {
|
||||
thread_id: conversation_id.clone(),
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
let resume_resp: JSONRPCResponse = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp.read_stream_until_response_message(RequestId::Integer(resume_id)),
|
||||
)
|
||||
.await??;
|
||||
let ThreadResumeResponse { thread, .. } = to_response::<ThreadResumeResponse>(resume_resp)?;
|
||||
|
||||
assert_eq!(thread.id, conversation_id);
|
||||
assert_eq!(thread.preview, preview);
|
||||
assert_eq!(thread.model_provider, "mock_provider");
|
||||
assert!(thread.path.is_absolute());
|
||||
|
||||
assert_eq!(
|
||||
thread.turns.len(),
|
||||
1,
|
||||
"expected rollouts to include one turn"
|
||||
);
|
||||
let turn = &thread.turns[0];
|
||||
assert_eq!(turn.status, TurnStatus::Completed);
|
||||
assert_eq!(turn.items.len(), 1, "expected user message item");
|
||||
match &turn.items[0] {
|
||||
ThreadItem::UserMessage { content, .. } => {
|
||||
assert_eq!(
|
||||
content,
|
||||
&vec![UserInput::Text {
|
||||
text: preview.to_string()
|
||||
}]
|
||||
);
|
||||
}
|
||||
other => panic!("expected user message item, got {other:?}"),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn thread_resume_prefers_path_over_thread_id() -> Result<()> {
|
||||
let server = create_mock_chat_completions_server(vec![]).await;
|
||||
@@ -132,7 +69,7 @@ async fn thread_resume_prefers_path_over_thread_id() -> Result<()> {
|
||||
|
||||
let start_id = mcp
|
||||
.send_thread_start_request(ThreadStartParams {
|
||||
model: Some("gpt-5.1-codex-max".to_string()),
|
||||
model: Some("arcticfox".to_string()),
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
@@ -177,7 +114,7 @@ async fn thread_resume_supports_history_and_overrides() -> Result<()> {
|
||||
// Start a thread.
|
||||
let start_id = mcp
|
||||
.send_thread_start_request(ThreadStartParams {
|
||||
model: Some("gpt-5.1-codex-max".to_string()),
|
||||
model: Some("arcticfox".to_string()),
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
|
||||
@@ -1,20 +1,14 @@
|
||||
use anyhow::Result;
|
||||
use app_test_support::McpProcess;
|
||||
use app_test_support::create_apply_patch_sse_response;
|
||||
use app_test_support::create_final_assistant_message_sse_response;
|
||||
use app_test_support::create_mock_chat_completions_server;
|
||||
use app_test_support::create_mock_chat_completions_server_unchecked;
|
||||
use app_test_support::create_shell_sse_response;
|
||||
use app_test_support::to_response;
|
||||
use codex_app_server_protocol::ApprovalDecision;
|
||||
use codex_app_server_protocol::CommandExecutionStatus;
|
||||
use codex_app_server_protocol::FileChangeRequestApprovalResponse;
|
||||
use codex_app_server_protocol::ItemCompletedNotification;
|
||||
use codex_app_server_protocol::ItemStartedNotification;
|
||||
use codex_app_server_protocol::JSONRPCNotification;
|
||||
use codex_app_server_protocol::JSONRPCResponse;
|
||||
use codex_app_server_protocol::PatchApplyStatus;
|
||||
use codex_app_server_protocol::PatchChangeKind;
|
||||
use codex_app_server_protocol::RequestId;
|
||||
use codex_app_server_protocol::ServerRequest;
|
||||
use codex_app_server_protocol::ThreadItem;
|
||||
@@ -477,300 +471,6 @@ async fn turn_start_updates_sandbox_and_cwd_between_turns_v2() -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn turn_start_file_change_approval_v2() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let tmp = TempDir::new()?;
|
||||
let codex_home = tmp.path().join("codex_home");
|
||||
std::fs::create_dir(&codex_home)?;
|
||||
let workspace = tmp.path().join("workspace");
|
||||
std::fs::create_dir(&workspace)?;
|
||||
|
||||
let patch = r#"*** Begin Patch
|
||||
*** Add File: README.md
|
||||
+new line
|
||||
*** End Patch
|
||||
"#;
|
||||
let responses = vec![
|
||||
create_apply_patch_sse_response(patch, "patch-call")?,
|
||||
create_final_assistant_message_sse_response("patch applied")?,
|
||||
];
|
||||
let server = create_mock_chat_completions_server(responses).await;
|
||||
create_config_toml(&codex_home, &server.uri(), "untrusted")?;
|
||||
|
||||
let mut mcp = McpProcess::new(&codex_home).await?;
|
||||
timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
|
||||
|
||||
let start_req = mcp
|
||||
.send_thread_start_request(ThreadStartParams {
|
||||
model: Some("mock-model".to_string()),
|
||||
cwd: Some(workspace.to_string_lossy().into_owned()),
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
let start_resp: JSONRPCResponse = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp.read_stream_until_response_message(RequestId::Integer(start_req)),
|
||||
)
|
||||
.await??;
|
||||
let ThreadStartResponse { thread, .. } = to_response::<ThreadStartResponse>(start_resp)?;
|
||||
|
||||
let turn_req = mcp
|
||||
.send_turn_start_request(TurnStartParams {
|
||||
thread_id: thread.id.clone(),
|
||||
input: vec![V2UserInput::Text {
|
||||
text: "apply patch".into(),
|
||||
}],
|
||||
cwd: Some(workspace.clone()),
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
let turn_resp: JSONRPCResponse = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp.read_stream_until_response_message(RequestId::Integer(turn_req)),
|
||||
)
|
||||
.await??;
|
||||
let TurnStartResponse { turn } = to_response::<TurnStartResponse>(turn_resp)?;
|
||||
|
||||
let started_file_change = timeout(DEFAULT_READ_TIMEOUT, async {
|
||||
loop {
|
||||
let started_notif = mcp
|
||||
.read_stream_until_notification_message("item/started")
|
||||
.await?;
|
||||
let started: ItemStartedNotification =
|
||||
serde_json::from_value(started_notif.params.clone().expect("item/started params"))?;
|
||||
if let ThreadItem::FileChange { .. } = started.item {
|
||||
return Ok::<ThreadItem, anyhow::Error>(started.item);
|
||||
}
|
||||
}
|
||||
})
|
||||
.await??;
|
||||
let ThreadItem::FileChange {
|
||||
ref id,
|
||||
status,
|
||||
ref changes,
|
||||
} = started_file_change
|
||||
else {
|
||||
unreachable!("loop ensures we break on file change items");
|
||||
};
|
||||
assert_eq!(id, "patch-call");
|
||||
assert_eq!(status, PatchApplyStatus::InProgress);
|
||||
let started_changes = changes.clone();
|
||||
|
||||
let server_req = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp.read_stream_until_request_message(),
|
||||
)
|
||||
.await??;
|
||||
let ServerRequest::FileChangeRequestApproval { request_id, params } = server_req else {
|
||||
panic!("expected FileChangeRequestApproval request")
|
||||
};
|
||||
assert_eq!(params.item_id, "patch-call");
|
||||
assert_eq!(params.thread_id, thread.id);
|
||||
assert_eq!(params.turn_id, turn.id);
|
||||
let expected_readme_path = workspace.join("README.md");
|
||||
let expected_readme_path = expected_readme_path.to_string_lossy().into_owned();
|
||||
pretty_assertions::assert_eq!(
|
||||
started_changes,
|
||||
vec![codex_app_server_protocol::FileUpdateChange {
|
||||
path: expected_readme_path.clone(),
|
||||
kind: PatchChangeKind::Add,
|
||||
diff: "new line\n".to_string(),
|
||||
}]
|
||||
);
|
||||
|
||||
mcp.send_response(
|
||||
request_id,
|
||||
serde_json::to_value(FileChangeRequestApprovalResponse {
|
||||
decision: ApprovalDecision::Accept,
|
||||
})?,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let completed_file_change = timeout(DEFAULT_READ_TIMEOUT, async {
|
||||
loop {
|
||||
let completed_notif = mcp
|
||||
.read_stream_until_notification_message("item/completed")
|
||||
.await?;
|
||||
let completed: ItemCompletedNotification = serde_json::from_value(
|
||||
completed_notif
|
||||
.params
|
||||
.clone()
|
||||
.expect("item/completed params"),
|
||||
)?;
|
||||
if let ThreadItem::FileChange { .. } = completed.item {
|
||||
return Ok::<ThreadItem, anyhow::Error>(completed.item);
|
||||
}
|
||||
}
|
||||
})
|
||||
.await??;
|
||||
let ThreadItem::FileChange { ref id, status, .. } = completed_file_change else {
|
||||
unreachable!("loop ensures we break on file change items");
|
||||
};
|
||||
assert_eq!(id, "patch-call");
|
||||
assert_eq!(status, PatchApplyStatus::Completed);
|
||||
|
||||
timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp.read_stream_until_notification_message("codex/event/task_complete"),
|
||||
)
|
||||
.await??;
|
||||
|
||||
let readme_contents = std::fs::read_to_string(expected_readme_path)?;
|
||||
assert_eq!(readme_contents, "new line\n");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn turn_start_file_change_approval_decline_v2() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let tmp = TempDir::new()?;
|
||||
let codex_home = tmp.path().join("codex_home");
|
||||
std::fs::create_dir(&codex_home)?;
|
||||
let workspace = tmp.path().join("workspace");
|
||||
std::fs::create_dir(&workspace)?;
|
||||
|
||||
let patch = r#"*** Begin Patch
|
||||
*** Add File: README.md
|
||||
+new line
|
||||
*** End Patch
|
||||
"#;
|
||||
let responses = vec![
|
||||
create_apply_patch_sse_response(patch, "patch-call")?,
|
||||
create_final_assistant_message_sse_response("patch declined")?,
|
||||
];
|
||||
let server = create_mock_chat_completions_server(responses).await;
|
||||
create_config_toml(&codex_home, &server.uri(), "untrusted")?;
|
||||
|
||||
let mut mcp = McpProcess::new(&codex_home).await?;
|
||||
timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
|
||||
|
||||
let start_req = mcp
|
||||
.send_thread_start_request(ThreadStartParams {
|
||||
model: Some("mock-model".to_string()),
|
||||
cwd: Some(workspace.to_string_lossy().into_owned()),
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
let start_resp: JSONRPCResponse = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp.read_stream_until_response_message(RequestId::Integer(start_req)),
|
||||
)
|
||||
.await??;
|
||||
let ThreadStartResponse { thread, .. } = to_response::<ThreadStartResponse>(start_resp)?;
|
||||
|
||||
let turn_req = mcp
|
||||
.send_turn_start_request(TurnStartParams {
|
||||
thread_id: thread.id.clone(),
|
||||
input: vec![V2UserInput::Text {
|
||||
text: "apply patch".into(),
|
||||
}],
|
||||
cwd: Some(workspace.clone()),
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
let turn_resp: JSONRPCResponse = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp.read_stream_until_response_message(RequestId::Integer(turn_req)),
|
||||
)
|
||||
.await??;
|
||||
let TurnStartResponse { turn } = to_response::<TurnStartResponse>(turn_resp)?;
|
||||
|
||||
let started_file_change = timeout(DEFAULT_READ_TIMEOUT, async {
|
||||
loop {
|
||||
let started_notif = mcp
|
||||
.read_stream_until_notification_message("item/started")
|
||||
.await?;
|
||||
let started: ItemStartedNotification =
|
||||
serde_json::from_value(started_notif.params.clone().expect("item/started params"))?;
|
||||
if let ThreadItem::FileChange { .. } = started.item {
|
||||
return Ok::<ThreadItem, anyhow::Error>(started.item);
|
||||
}
|
||||
}
|
||||
})
|
||||
.await??;
|
||||
let ThreadItem::FileChange {
|
||||
ref id,
|
||||
status,
|
||||
ref changes,
|
||||
} = started_file_change
|
||||
else {
|
||||
unreachable!("loop ensures we break on file change items");
|
||||
};
|
||||
assert_eq!(id, "patch-call");
|
||||
assert_eq!(status, PatchApplyStatus::InProgress);
|
||||
let started_changes = changes.clone();
|
||||
|
||||
let server_req = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp.read_stream_until_request_message(),
|
||||
)
|
||||
.await??;
|
||||
let ServerRequest::FileChangeRequestApproval { request_id, params } = server_req else {
|
||||
panic!("expected FileChangeRequestApproval request")
|
||||
};
|
||||
assert_eq!(params.item_id, "patch-call");
|
||||
assert_eq!(params.thread_id, thread.id);
|
||||
assert_eq!(params.turn_id, turn.id);
|
||||
let expected_readme_path = workspace.join("README.md");
|
||||
let expected_readme_path_str = expected_readme_path.to_string_lossy().into_owned();
|
||||
pretty_assertions::assert_eq!(
|
||||
started_changes,
|
||||
vec![codex_app_server_protocol::FileUpdateChange {
|
||||
path: expected_readme_path_str.clone(),
|
||||
kind: PatchChangeKind::Add,
|
||||
diff: "new line\n".to_string(),
|
||||
}]
|
||||
);
|
||||
|
||||
mcp.send_response(
|
||||
request_id,
|
||||
serde_json::to_value(FileChangeRequestApprovalResponse {
|
||||
decision: ApprovalDecision::Decline,
|
||||
})?,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let completed_file_change = timeout(DEFAULT_READ_TIMEOUT, async {
|
||||
loop {
|
||||
let completed_notif = mcp
|
||||
.read_stream_until_notification_message("item/completed")
|
||||
.await?;
|
||||
let completed: ItemCompletedNotification = serde_json::from_value(
|
||||
completed_notif
|
||||
.params
|
||||
.clone()
|
||||
.expect("item/completed params"),
|
||||
)?;
|
||||
if let ThreadItem::FileChange { .. } = completed.item {
|
||||
return Ok::<ThreadItem, anyhow::Error>(completed.item);
|
||||
}
|
||||
}
|
||||
})
|
||||
.await??;
|
||||
let ThreadItem::FileChange { ref id, status, .. } = completed_file_change else {
|
||||
unreachable!("loop ensures we break on file change items");
|
||||
};
|
||||
assert_eq!(id, "patch-call");
|
||||
assert_eq!(status, PatchApplyStatus::Declined);
|
||||
|
||||
timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp.read_stream_until_notification_message("codex/event/task_complete"),
|
||||
)
|
||||
.await??;
|
||||
|
||||
assert!(
|
||||
!expected_readme_path.exists(),
|
||||
"declined patch should not be applied"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Helper to create a config.toml pointing at the mock model server.
|
||||
fn create_config_toml(
|
||||
codex_home: &Path,
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
use crate::types::CodeTaskDetailsResponse;
|
||||
use crate::types::CreditStatusDetails;
|
||||
use crate::types::PaginatedListTaskListItem;
|
||||
use crate::types::RateLimitStatusPayload;
|
||||
use crate::types::RateLimitWindowSnapshot;
|
||||
@@ -7,7 +6,6 @@ use crate::types::TurnAttemptsSiblingTurnsResponse;
|
||||
use anyhow::Result;
|
||||
use codex_core::auth::CodexAuth;
|
||||
use codex_core::default_client::get_codex_user_agent;
|
||||
use codex_protocol::protocol::CreditsSnapshot;
|
||||
use codex_protocol::protocol::RateLimitSnapshot;
|
||||
use codex_protocol::protocol::RateLimitWindow;
|
||||
use reqwest::header::AUTHORIZATION;
|
||||
@@ -274,23 +272,19 @@ impl Client {
|
||||
|
||||
// rate limit helpers
|
||||
fn rate_limit_snapshot_from_payload(payload: RateLimitStatusPayload) -> RateLimitSnapshot {
|
||||
let rate_limit_details = payload
|
||||
let Some(details) = payload
|
||||
.rate_limit
|
||||
.and_then(|inner| inner.map(|boxed| *boxed));
|
||||
|
||||
let (primary, secondary) = if let Some(details) = rate_limit_details {
|
||||
(
|
||||
Self::map_rate_limit_window(details.primary_window),
|
||||
Self::map_rate_limit_window(details.secondary_window),
|
||||
)
|
||||
} else {
|
||||
(None, None)
|
||||
.and_then(|inner| inner.map(|boxed| *boxed))
|
||||
else {
|
||||
return RateLimitSnapshot {
|
||||
primary: None,
|
||||
secondary: None,
|
||||
};
|
||||
};
|
||||
|
||||
RateLimitSnapshot {
|
||||
primary,
|
||||
secondary,
|
||||
credits: Self::map_credits(payload.credits),
|
||||
primary: Self::map_rate_limit_window(details.primary_window),
|
||||
secondary: Self::map_rate_limit_window(details.secondary_window),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -312,19 +306,6 @@ impl Client {
|
||||
})
|
||||
}
|
||||
|
||||
fn map_credits(credits: Option<Option<Box<CreditStatusDetails>>>) -> Option<CreditsSnapshot> {
|
||||
let details = match credits {
|
||||
Some(Some(details)) => *details,
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
Some(CreditsSnapshot {
|
||||
has_credits: details.has_credits,
|
||||
unlimited: details.unlimited,
|
||||
balance: details.balance.and_then(|inner| inner),
|
||||
})
|
||||
}
|
||||
|
||||
fn window_minutes_from_seconds(seconds: i32) -> Option<i64> {
|
||||
if seconds <= 0 {
|
||||
return None;
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
pub use codex_backend_openapi_models::models::CreditStatusDetails;
|
||||
pub use codex_backend_openapi_models::models::PaginatedListTaskListItem;
|
||||
pub use codex_backend_openapi_models::models::PlanType;
|
||||
pub use codex_backend_openapi_models::models::RateLimitStatusDetails;
|
||||
|
||||
@@ -26,7 +26,6 @@ codex-cloud-tasks = { path = "../cloud-tasks" }
|
||||
codex-common = { workspace = true, features = ["cli"] }
|
||||
codex-core = { workspace = true }
|
||||
codex-exec = { workspace = true }
|
||||
codex-execpolicy = { workspace = true }
|
||||
codex-login = { workspace = true }
|
||||
codex-mcp-server = { workspace = true }
|
||||
codex-process-hardening = { workspace = true }
|
||||
|
||||
@@ -18,7 +18,6 @@ use codex_cli::login::run_logout;
|
||||
use codex_cloud_tasks::Cli as CloudTasksCli;
|
||||
use codex_common::CliConfigOverrides;
|
||||
use codex_exec::Cli as ExecCli;
|
||||
use codex_execpolicy::ExecPolicyCheckCommand;
|
||||
use codex_responses_api_proxy::Args as ResponsesApiProxyArgs;
|
||||
use codex_tui::AppExitInfo;
|
||||
use codex_tui::Cli as TuiCli;
|
||||
@@ -94,10 +93,6 @@ enum Subcommand {
|
||||
#[clap(visible_alias = "debug")]
|
||||
Sandbox(SandboxArgs),
|
||||
|
||||
/// Execpolicy tooling.
|
||||
#[clap(hide = true)]
|
||||
Execpolicy(ExecpolicyCommand),
|
||||
|
||||
/// Apply the latest diff produced by Codex agent as a `git apply` to your local working tree.
|
||||
#[clap(visible_alias = "a")]
|
||||
Apply(ApplyCommand),
|
||||
@@ -167,19 +162,6 @@ enum SandboxCommand {
|
||||
Windows(WindowsCommand),
|
||||
}
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
struct ExecpolicyCommand {
|
||||
#[command(subcommand)]
|
||||
sub: ExecpolicySubcommand,
|
||||
}
|
||||
|
||||
#[derive(Debug, clap::Subcommand)]
|
||||
enum ExecpolicySubcommand {
|
||||
/// Check execpolicy files against a command.
|
||||
#[clap(name = "check")]
|
||||
Check(ExecPolicyCheckCommand),
|
||||
}
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
struct LoginCommand {
|
||||
#[clap(skip)]
|
||||
@@ -345,10 +327,6 @@ fn run_update_action(action: UpdateAction) -> anyhow::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn run_execpolicycheck(cmd: ExecPolicyCheckCommand) -> anyhow::Result<()> {
|
||||
cmd.run()
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Parser, Clone)]
|
||||
struct FeatureToggles {
|
||||
/// Enable a feature (repeatable). Equivalent to `-c features.<name>=true`.
|
||||
@@ -571,9 +549,6 @@ async fn cli_main(codex_linux_sandbox_exe: Option<PathBuf>) -> anyhow::Result<()
|
||||
.await?;
|
||||
}
|
||||
},
|
||||
Some(Subcommand::Execpolicy(ExecpolicyCommand { sub })) => match sub {
|
||||
ExecpolicySubcommand::Check(cmd) => run_execpolicycheck(cmd)?,
|
||||
},
|
||||
Some(Subcommand::Apply(mut apply_cli)) => {
|
||||
prepend_config_flags(
|
||||
&mut apply_cli.config_overrides,
|
||||
|
||||
@@ -79,7 +79,6 @@ pub struct GetArgs {
|
||||
}
|
||||
|
||||
#[derive(Debug, clap::Parser)]
|
||||
#[command(override_usage = "codex mcp add [OPTIONS] <NAME> (--url <URL> | -- <COMMAND>...)")]
|
||||
pub struct AddArgs {
|
||||
/// Name for the MCP server configuration.
|
||||
pub name: String,
|
||||
|
||||
@@ -1,58 +0,0 @@
|
||||
use std::fs;
|
||||
|
||||
use assert_cmd::Command;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::json;
|
||||
use tempfile::TempDir;
|
||||
|
||||
#[test]
|
||||
fn execpolicy_check_matches_expected_json() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let codex_home = TempDir::new()?;
|
||||
let policy_path = codex_home.path().join("policy.codexpolicy");
|
||||
fs::write(
|
||||
&policy_path,
|
||||
r#"
|
||||
prefix_rule(
|
||||
pattern = ["git", "push"],
|
||||
decision = "forbidden",
|
||||
)
|
||||
"#,
|
||||
)?;
|
||||
|
||||
let output = Command::cargo_bin("codex")?
|
||||
.env("CODEX_HOME", codex_home.path())
|
||||
.args([
|
||||
"execpolicy",
|
||||
"check",
|
||||
"--policy",
|
||||
policy_path
|
||||
.to_str()
|
||||
.expect("policy path should be valid UTF-8"),
|
||||
"git",
|
||||
"push",
|
||||
"origin",
|
||||
"main",
|
||||
])
|
||||
.output()?;
|
||||
|
||||
assert!(output.status.success());
|
||||
let result: serde_json::Value = serde_json::from_slice(&output.stdout)?;
|
||||
assert_eq!(
|
||||
result,
|
||||
json!({
|
||||
"match": {
|
||||
"decision": "forbidden",
|
||||
"matchedRules": [
|
||||
{
|
||||
"prefixRuleMatch": {
|
||||
"matchedPrefix": ["git", "push"],
|
||||
"decision": "forbidden"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
})
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -15,12 +15,13 @@ pub fn create_config_summary_entries(config: &Config) -> Vec<(&'static str, Stri
|
||||
if config.model_provider.wire_api == WireApi::Responses
|
||||
&& config.model_family.supports_reasoning_summaries
|
||||
{
|
||||
let reasoning_effort = config
|
||||
.model_reasoning_effort
|
||||
.or(config.model_family.default_reasoning_effort)
|
||||
.map(|effort| effort.to_string())
|
||||
.unwrap_or_else(|| "none".to_string());
|
||||
entries.push(("reasoning effort", reasoning_effort));
|
||||
entries.push((
|
||||
"reasoning effort",
|
||||
config
|
||||
.model_reasoning_effort
|
||||
.map(|effort| effort.to_string())
|
||||
.unwrap_or_else(|| "none".to_string()),
|
||||
));
|
||||
entries.push((
|
||||
"reasoning summaries",
|
||||
config.model_reasoning_summary.to_string(),
|
||||
|
||||
@@ -5,8 +5,7 @@ use codex_core::protocol_config_types::ReasoningEffort;
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
pub const HIDE_GPT5_1_MIGRATION_PROMPT_CONFIG: &str = "hide_gpt5_1_migration_prompt";
|
||||
pub const HIDE_GPT_5_1_CODEX_MAX_MIGRATION_PROMPT_CONFIG: &str =
|
||||
"hide_gpt-5.1-codex-max_migration_prompt";
|
||||
pub const HIDE_ARCTICFOX_MIGRATION_PROMPT_CONFIG: &str = "hide_arcticfox_migration_prompt";
|
||||
|
||||
/// A reasoning effort option that can be surfaced for a model.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
@@ -50,9 +49,9 @@ pub struct ModelPreset {
|
||||
static PRESETS: Lazy<Vec<ModelPreset>> = Lazy::new(|| {
|
||||
vec![
|
||||
ModelPreset {
|
||||
id: "gpt-5.1-codex-max",
|
||||
model: "gpt-5.1-codex-max",
|
||||
display_name: "gpt-5.1-codex-max",
|
||||
id: "arcticfox",
|
||||
model: "arcticfox",
|
||||
display_name: "arcticfox",
|
||||
description: "Latest Codex-optimized flagship for deep and fast reasoning.",
|
||||
default_reasoning_effort: ReasoningEffort::Medium,
|
||||
supported_reasoning_efforts: &[
|
||||
@@ -99,9 +98,9 @@ static PRESETS: Lazy<Vec<ModelPreset>> = Lazy::new(|| {
|
||||
],
|
||||
is_default: false,
|
||||
upgrade: Some(ModelUpgrade {
|
||||
id: "gpt-5.1-codex-max",
|
||||
id: "arcticfox",
|
||||
reasoning_effort_mapping: None,
|
||||
migration_config_key: HIDE_GPT_5_1_CODEX_MAX_MIGRATION_PROMPT_CONFIG,
|
||||
migration_config_key: HIDE_ARCTICFOX_MIGRATION_PROMPT_CONFIG,
|
||||
}),
|
||||
show_in_picker: true,
|
||||
},
|
||||
@@ -122,11 +121,7 @@ static PRESETS: Lazy<Vec<ModelPreset>> = Lazy::new(|| {
|
||||
},
|
||||
],
|
||||
is_default: false,
|
||||
upgrade: Some(ModelUpgrade {
|
||||
id: "gpt-5.1-codex-max",
|
||||
reasoning_effort_mapping: None,
|
||||
migration_config_key: HIDE_GPT_5_1_CODEX_MAX_MIGRATION_PROMPT_CONFIG,
|
||||
}),
|
||||
upgrade: None,
|
||||
show_in_picker: true,
|
||||
},
|
||||
ModelPreset {
|
||||
@@ -150,11 +145,7 @@ static PRESETS: Lazy<Vec<ModelPreset>> = Lazy::new(|| {
|
||||
},
|
||||
],
|
||||
is_default: false,
|
||||
upgrade: Some(ModelUpgrade {
|
||||
id: "gpt-5.1-codex-max",
|
||||
reasoning_effort_mapping: None,
|
||||
migration_config_key: HIDE_GPT_5_1_CODEX_MAX_MIGRATION_PROMPT_CONFIG,
|
||||
}),
|
||||
upgrade: None,
|
||||
show_in_picker: true,
|
||||
},
|
||||
// Deprecated models.
|
||||
@@ -180,9 +171,9 @@ static PRESETS: Lazy<Vec<ModelPreset>> = Lazy::new(|| {
|
||||
],
|
||||
is_default: false,
|
||||
upgrade: Some(ModelUpgrade {
|
||||
id: "gpt-5.1-codex-max",
|
||||
id: "arcticfox",
|
||||
reasoning_effort_mapping: None,
|
||||
migration_config_key: HIDE_GPT_5_1_CODEX_MAX_MIGRATION_PROMPT_CONFIG,
|
||||
migration_config_key: HIDE_ARCTICFOX_MIGRATION_PROMPT_CONFIG,
|
||||
}),
|
||||
show_in_picker: false,
|
||||
},
|
||||
@@ -236,9 +227,12 @@ static PRESETS: Lazy<Vec<ModelPreset>> = Lazy::new(|| {
|
||||
],
|
||||
is_default: false,
|
||||
upgrade: Some(ModelUpgrade {
|
||||
id: "gpt-5.1-codex-max",
|
||||
reasoning_effort_mapping: None,
|
||||
migration_config_key: HIDE_GPT_5_1_CODEX_MAX_MIGRATION_PROMPT_CONFIG,
|
||||
id: "gpt-5.1",
|
||||
reasoning_effort_mapping: Some(HashMap::from([(
|
||||
ReasoningEffort::Minimal,
|
||||
ReasoningEffort::Low,
|
||||
)])),
|
||||
migration_config_key: HIDE_GPT5_1_MIGRATION_PROMPT_CONFIG,
|
||||
}),
|
||||
show_in_picker: false,
|
||||
},
|
||||
@@ -249,7 +243,7 @@ pub fn builtin_model_presets(auth_mode: Option<AuthMode>) -> Vec<ModelPreset> {
|
||||
PRESETS
|
||||
.iter()
|
||||
.filter(|preset| match auth_mode {
|
||||
Some(AuthMode::ApiKey) => preset.show_in_picker && preset.id != "gpt-5.1-codex-max",
|
||||
Some(AuthMode::ApiKey) => preset.show_in_picker && preset.id != "arcticfox",
|
||||
_ => preset.show_in_picker,
|
||||
})
|
||||
.cloned()
|
||||
@@ -272,12 +266,8 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn gpt_5_1_codex_max_hidden_for_api_key_auth() {
|
||||
fn arcticfox_hidden_for_api_key_auth() {
|
||||
let presets = builtin_model_presets(Some(AuthMode::ApiKey));
|
||||
assert!(
|
||||
presets
|
||||
.iter()
|
||||
.all(|preset| preset.id != "gpt-5.1-codex-max")
|
||||
);
|
||||
assert!(presets.iter().all(|preset| preset.id != "arcticfox"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,11 +19,9 @@ async-trait = { workspace = true }
|
||||
base64 = { workspace = true }
|
||||
bytes = { workspace = true }
|
||||
chrono = { workspace = true, features = ["serde"] }
|
||||
chardetng = { workspace = true }
|
||||
codex-app-server-protocol = { workspace = true }
|
||||
codex-apply-patch = { workspace = true }
|
||||
codex-async-utils = { workspace = true }
|
||||
codex-execpolicy = { workspace = true }
|
||||
codex-file-search = { workspace = true }
|
||||
codex-git = { workspace = true }
|
||||
codex-keyring-store = { workspace = true }
|
||||
@@ -33,11 +31,11 @@ codex-rmcp-client = { workspace = true }
|
||||
codex-utils-pty = { workspace = true }
|
||||
codex-utils-readiness = { workspace = true }
|
||||
codex-utils-string = { workspace = true }
|
||||
codex-utils-tokenizer = { workspace = true }
|
||||
codex-windows-sandbox = { package = "codex-windows-sandbox", path = "../windows-sandbox-rs" }
|
||||
dirs = { workspace = true }
|
||||
dunce = { workspace = true }
|
||||
env-flags = { workspace = true }
|
||||
encoding_rs = { workspace = true }
|
||||
eventsource-stream = { workspace = true }
|
||||
futures = { workspace = true }
|
||||
http = { workspace = true }
|
||||
|
||||
@@ -100,7 +100,7 @@ pub fn extract_bash_command(command: &[String]) -> Option<(&str, &str)> {
|
||||
if !matches!(flag.as_str(), "-lc" | "-c")
|
||||
|| !matches!(
|
||||
detect_shell_type(&PathBuf::from(shell)),
|
||||
Some(ShellType::Zsh) | Some(ShellType::Bash) | Some(ShellType::Sh)
|
||||
Some(ShellType::Zsh) | Some(ShellType::Bash)
|
||||
)
|
||||
{
|
||||
return None;
|
||||
|
||||
@@ -56,7 +56,6 @@ use crate::model_family::ModelFamily;
|
||||
use crate::model_provider_info::ModelProviderInfo;
|
||||
use crate::model_provider_info::WireApi;
|
||||
use crate::openai_model_info::get_model_info;
|
||||
use crate::protocol::CreditsSnapshot;
|
||||
use crate::protocol::RateLimitSnapshot;
|
||||
use crate::protocol::RateLimitWindow;
|
||||
use crate::protocol::TokenUsage;
|
||||
@@ -727,13 +726,7 @@ fn parse_rate_limit_snapshot(headers: &HeaderMap) -> Option<RateLimitSnapshot> {
|
||||
"x-codex-secondary-reset-at",
|
||||
);
|
||||
|
||||
let credits = parse_credits_snapshot(headers);
|
||||
|
||||
Some(RateLimitSnapshot {
|
||||
primary,
|
||||
secondary,
|
||||
credits,
|
||||
})
|
||||
Some(RateLimitSnapshot { primary, secondary })
|
||||
}
|
||||
|
||||
fn parse_rate_limit_window(
|
||||
@@ -760,20 +753,6 @@ fn parse_rate_limit_window(
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_credits_snapshot(headers: &HeaderMap) -> Option<CreditsSnapshot> {
|
||||
let has_credits = parse_header_bool(headers, "x-codex-credits-has-credits")?;
|
||||
let unlimited = parse_header_bool(headers, "x-codex-credits-unlimited")?;
|
||||
let balance = parse_header_str(headers, "x-codex-credits-balance")
|
||||
.map(str::trim)
|
||||
.filter(|value| !value.is_empty())
|
||||
.map(std::string::ToString::to_string);
|
||||
Some(CreditsSnapshot {
|
||||
has_credits,
|
||||
unlimited,
|
||||
balance,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_header_f64(headers: &HeaderMap, name: &str) -> Option<f64> {
|
||||
parse_header_str(headers, name)?
|
||||
.parse::<f64>()
|
||||
@@ -785,17 +764,6 @@ fn parse_header_i64(headers: &HeaderMap, name: &str) -> Option<i64> {
|
||||
parse_header_str(headers, name)?.parse::<i64>().ok()
|
||||
}
|
||||
|
||||
fn parse_header_bool(headers: &HeaderMap, name: &str) -> Option<bool> {
|
||||
let raw = parse_header_str(headers, name)?;
|
||||
if raw.eq_ignore_ascii_case("true") || raw == "1" {
|
||||
Some(true)
|
||||
} else if raw.eq_ignore_ascii_case("false") || raw == "0" {
|
||||
Some(false)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_header_str<'a>(headers: &'a HeaderMap, name: &str) -> Option<&'a str> {
|
||||
headers.get(name)?.to_str().ok()
|
||||
}
|
||||
|
||||
@@ -422,7 +422,7 @@ mod tests {
|
||||
expects_apply_patch_instructions: false,
|
||||
},
|
||||
InstructionsTestCase {
|
||||
slug: "gpt-5.1-codex-max",
|
||||
slug: "arcticfox",
|
||||
expects_apply_patch_instructions: false,
|
||||
},
|
||||
];
|
||||
|
||||
@@ -79,6 +79,7 @@ use crate::protocol::ApplyPatchApprovalRequestEvent;
|
||||
use crate::protocol::AskForApproval;
|
||||
use crate::protocol::BackgroundEventEvent;
|
||||
use crate::protocol::DeprecationNoticeEvent;
|
||||
use crate::protocol::ErrorEvent;
|
||||
use crate::protocol::Event;
|
||||
use crate::protocol::EventMsg;
|
||||
use crate::protocol::ExecApprovalRequestEvent;
|
||||
@@ -120,7 +121,6 @@ use crate::user_instructions::UserInstructions;
|
||||
use crate::user_notification::UserNotification;
|
||||
use crate::util::backoff;
|
||||
use codex_async_utils::OrCancelExt;
|
||||
use codex_execpolicy::Policy as ExecPolicy;
|
||||
use codex_otel::otel_event_manager::OtelEventManager;
|
||||
use codex_protocol::config_types::ReasoningEffort as ReasoningEffortConfig;
|
||||
use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
|
||||
@@ -128,11 +128,11 @@ use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
use codex_protocol::models::ResponseInputItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::CodexErrorInfo;
|
||||
use codex_protocol::protocol::InitialHistory;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use codex_utils_readiness::Readiness;
|
||||
use codex_utils_readiness::ReadinessFlag;
|
||||
use codex_utils_tokenizer::warm_model_cache;
|
||||
|
||||
/// The high-level interface to the Codex system.
|
||||
/// It operates as a queue pair where you send submissions and receive events.
|
||||
@@ -166,10 +166,6 @@ impl Codex {
|
||||
|
||||
let user_instructions = get_user_instructions(&config).await;
|
||||
|
||||
let exec_policy = crate::exec_policy::exec_policy_for(&config.features, &config.codex_home)
|
||||
.await
|
||||
.map_err(|err| CodexErr::Fatal(format!("failed to load execpolicy: {err}")))?;
|
||||
|
||||
let config = Arc::new(config);
|
||||
|
||||
let session_configuration = SessionConfiguration {
|
||||
@@ -186,7 +182,6 @@ impl Codex {
|
||||
cwd: config.cwd.clone(),
|
||||
original_config_do_not_use: Arc::clone(&config),
|
||||
features: config.features.clone(),
|
||||
exec_policy,
|
||||
session_source,
|
||||
};
|
||||
|
||||
@@ -284,7 +279,6 @@ pub(crate) struct TurnContext {
|
||||
pub(crate) final_output_json_schema: Option<Value>,
|
||||
pub(crate) codex_linux_sandbox_exe: Option<PathBuf>,
|
||||
pub(crate) tool_call_gate: Arc<ReadinessFlag>,
|
||||
pub(crate) exec_policy: Arc<ExecPolicy>,
|
||||
pub(crate) truncation_policy: TruncationPolicy,
|
||||
}
|
||||
|
||||
@@ -341,8 +335,6 @@ pub(crate) struct SessionConfiguration {
|
||||
|
||||
/// Set of feature flags for this session
|
||||
features: Features,
|
||||
/// Execpolicy policy, applied only when enabled by feature flag.
|
||||
exec_policy: Arc<ExecPolicy>,
|
||||
|
||||
// TODO(pakrym): Remove config from here
|
||||
original_config_do_not_use: Arc<Config>,
|
||||
@@ -443,7 +435,6 @@ impl Session {
|
||||
final_output_json_schema: None,
|
||||
codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(),
|
||||
tool_call_gate: Arc::new(ReadinessFlag::new()),
|
||||
exec_policy: session_configuration.exec_policy.clone(),
|
||||
truncation_policy: TruncationPolicy::new(&per_turn_config),
|
||||
}
|
||||
}
|
||||
@@ -492,7 +483,7 @@ impl Session {
|
||||
// - load history metadata
|
||||
let rollout_fut = RolloutRecorder::new(&config, rollout_params);
|
||||
|
||||
let default_shell = shell::default_user_shell();
|
||||
let default_shell_fut = shell::default_user_shell();
|
||||
let history_meta_fut = crate::message_history::history_metadata(&config);
|
||||
let auth_statuses_fut = compute_auth_statuses(
|
||||
config.mcp_servers.iter(),
|
||||
@@ -500,8 +491,12 @@ impl Session {
|
||||
);
|
||||
|
||||
// Join all independent futures.
|
||||
let (rollout_recorder, (history_log_id, history_entry_count), auth_statuses) =
|
||||
tokio::join!(rollout_fut, history_meta_fut, auth_statuses_fut);
|
||||
let (rollout_recorder, default_shell, (history_log_id, history_entry_count), auth_statuses) = tokio::join!(
|
||||
rollout_fut,
|
||||
default_shell_fut,
|
||||
history_meta_fut,
|
||||
auth_statuses_fut
|
||||
);
|
||||
|
||||
let rollout_recorder = rollout_recorder.map_err(|e| {
|
||||
error!("failed to initialize rollout recorder: {e:#}");
|
||||
@@ -554,6 +549,9 @@ impl Session {
|
||||
// Create the mutable state for the Session.
|
||||
let state = SessionState::new(session_configuration.clone());
|
||||
|
||||
// Warm the tokenizer cache for the session model without blocking startup.
|
||||
warm_model_cache(&session_configuration.model);
|
||||
|
||||
let services = SessionServices {
|
||||
mcp_connection_manager: Arc::new(RwLock::new(McpConnectionManager::default())),
|
||||
mcp_startup_cancellation_token: CancellationToken::new(),
|
||||
@@ -911,7 +909,6 @@ impl Session {
|
||||
|
||||
let event = EventMsg::ApplyPatchApprovalRequest(ApplyPatchApprovalRequestEvent {
|
||||
call_id,
|
||||
turn_id: turn_context.sub_id.clone(),
|
||||
changes,
|
||||
reason,
|
||||
grant_root,
|
||||
@@ -1049,7 +1046,7 @@ impl Session {
|
||||
Some(turn_context.cwd.clone()),
|
||||
Some(turn_context.approval_policy),
|
||||
Some(turn_context.sandbox_policy.clone()),
|
||||
self.user_shell().clone(),
|
||||
Some(self.user_shell().clone()),
|
||||
)));
|
||||
items
|
||||
}
|
||||
@@ -1189,14 +1186,9 @@ impl Session {
|
||||
&self,
|
||||
turn_context: &TurnContext,
|
||||
message: impl Into<String>,
|
||||
codex_error: CodexErr,
|
||||
) {
|
||||
let codex_error_info = CodexErrorInfo::ResponseStreamDisconnected {
|
||||
http_status_code: codex_error.http_status_code_value(),
|
||||
};
|
||||
let event = EventMsg::StreamError(StreamErrorEvent {
|
||||
message: message.into(),
|
||||
codex_error_info: Some(codex_error_info),
|
||||
});
|
||||
self.send_event(turn_context, event).await;
|
||||
}
|
||||
@@ -1442,7 +1434,6 @@ mod handlers {
|
||||
use crate::tasks::UndoTask;
|
||||
use crate::tasks::UserShellCommandTask;
|
||||
use codex_protocol::custom_prompts::CustomPrompt;
|
||||
use codex_protocol::protocol::CodexErrorInfo;
|
||||
use codex_protocol::protocol::ErrorEvent;
|
||||
use codex_protocol::protocol::Event;
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
@@ -1689,7 +1680,6 @@ mod handlers {
|
||||
id: sub_id.clone(),
|
||||
msg: EventMsg::Error(ErrorEvent {
|
||||
message: "Failed to shutdown rollout recorder".to_string(),
|
||||
codex_error_info: Some(CodexErrorInfo::Other),
|
||||
}),
|
||||
};
|
||||
sess.send_event_raw(event).await;
|
||||
@@ -1795,7 +1785,6 @@ async fn spawn_review_thread(
|
||||
final_output_json_schema: None,
|
||||
codex_linux_sandbox_exe: parent_turn_context.codex_linux_sandbox_exe.clone(),
|
||||
tool_call_gate: Arc::new(ReadinessFlag::new()),
|
||||
exec_policy: parent_turn_context.exec_policy.clone(),
|
||||
truncation_policy: TruncationPolicy::new(&per_turn_config),
|
||||
};
|
||||
|
||||
@@ -1944,7 +1933,9 @@ pub(crate) async fn run_task(
|
||||
}
|
||||
Err(e) => {
|
||||
info!("Turn error: {e:#}");
|
||||
let event = EventMsg::Error(e.to_error_event(None));
|
||||
let event = EventMsg::Error(ErrorEvent {
|
||||
message: e.to_string(),
|
||||
});
|
||||
sess.send_event(&turn_context, event).await;
|
||||
// let the user continue the conversation
|
||||
break;
|
||||
@@ -2069,7 +2060,6 @@ async fn run_turn(
|
||||
sess.notify_stream_error(
|
||||
&turn_context,
|
||||
format!("Reconnecting... {retries}/{max_retries}"),
|
||||
e,
|
||||
)
|
||||
.await;
|
||||
|
||||
@@ -2388,7 +2378,6 @@ mod tests {
|
||||
use crate::config::ConfigOverrides;
|
||||
use crate::config::ConfigToml;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::shell::default_user_shell;
|
||||
use crate::tools::format_exec_output_str;
|
||||
|
||||
use crate::protocol::CompactedItem;
|
||||
@@ -2616,7 +2605,6 @@ mod tests {
|
||||
cwd: config.cwd.clone(),
|
||||
original_config_do_not_use: Arc::clone(&config),
|
||||
features: Features::default(),
|
||||
exec_policy: Arc::new(ExecPolicy::empty()),
|
||||
session_source: SessionSource::Exec,
|
||||
};
|
||||
|
||||
@@ -2628,7 +2616,7 @@ mod tests {
|
||||
unified_exec_manager: UnifiedExecSessionManager::default(),
|
||||
notifier: UserNotifier::new(None),
|
||||
rollout: Mutex::new(None),
|
||||
user_shell: default_user_shell(),
|
||||
user_shell: shell::Shell::Unknown,
|
||||
show_raw_agent_reasoning: config.show_raw_agent_reasoning,
|
||||
auth_manager: Arc::clone(&auth_manager),
|
||||
otel_event_manager: otel_event_manager.clone(),
|
||||
@@ -2694,7 +2682,6 @@ mod tests {
|
||||
cwd: config.cwd.clone(),
|
||||
original_config_do_not_use: Arc::clone(&config),
|
||||
features: Features::default(),
|
||||
exec_policy: Arc::new(ExecPolicy::empty()),
|
||||
session_source: SessionSource::Exec,
|
||||
};
|
||||
|
||||
@@ -2706,7 +2693,7 @@ mod tests {
|
||||
unified_exec_manager: UnifiedExecSessionManager::default(),
|
||||
notifier: UserNotifier::new(None),
|
||||
rollout: Mutex::new(None),
|
||||
user_shell: default_user_shell(),
|
||||
user_shell: shell::Shell::Unknown,
|
||||
show_raw_agent_reasoning: config.show_raw_agent_reasoning,
|
||||
auth_manager: Arc::clone(&auth_manager),
|
||||
otel_event_manager: otel_event_manager.clone(),
|
||||
@@ -3051,7 +3038,6 @@ mod tests {
|
||||
let session = Arc::new(session);
|
||||
let mut turn_context = Arc::new(turn_context_raw);
|
||||
|
||||
let timeout_ms = 1000;
|
||||
let params = ExecParams {
|
||||
command: if cfg!(windows) {
|
||||
vec![
|
||||
@@ -3067,25 +3053,16 @@ mod tests {
|
||||
]
|
||||
},
|
||||
cwd: turn_context.cwd.clone(),
|
||||
expiration: timeout_ms.into(),
|
||||
timeout_ms: Some(1000),
|
||||
env: HashMap::new(),
|
||||
with_escalated_permissions: Some(true),
|
||||
justification: Some("test".to_string()),
|
||||
arg0: None,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
};
|
||||
|
||||
let params2 = ExecParams {
|
||||
with_escalated_permissions: Some(false),
|
||||
command: params.command.clone(),
|
||||
cwd: params.cwd.clone(),
|
||||
expiration: timeout_ms.into(),
|
||||
env: HashMap::new(),
|
||||
justification: params.justification.clone(),
|
||||
arg0: None,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
..params.clone()
|
||||
};
|
||||
|
||||
let turn_diff_tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
|
||||
@@ -3105,7 +3082,7 @@ mod tests {
|
||||
arguments: serde_json::json!({
|
||||
"command": params.command.clone(),
|
||||
"workdir": Some(turn_context.cwd.to_string_lossy().to_string()),
|
||||
"timeout_ms": params.expiration.timeout_ms(),
|
||||
"timeout_ms": params.timeout_ms,
|
||||
"with_escalated_permissions": params.with_escalated_permissions,
|
||||
"justification": params.justification.clone(),
|
||||
})
|
||||
@@ -3142,7 +3119,7 @@ mod tests {
|
||||
arguments: serde_json::json!({
|
||||
"command": params2.command.clone(),
|
||||
"workdir": Some(turn_context.cwd.to_string_lossy().to_string()),
|
||||
"timeout_ms": params2.expiration.timeout_ms(),
|
||||
"timeout_ms": params2.timeout_ms,
|
||||
"with_escalated_permissions": params2.with_escalated_permissions,
|
||||
"justification": params2.justification.clone(),
|
||||
})
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
|
||||
use crate::sandboxing::SandboxPermissions;
|
||||
|
||||
use crate::bash::parse_shell_lc_plain_commands;
|
||||
use crate::is_safe_command::is_known_safe_command;
|
||||
|
||||
@@ -10,7 +8,7 @@ pub fn requires_initial_appoval(
|
||||
policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
command: &[String],
|
||||
sandbox_permissions: SandboxPermissions,
|
||||
with_escalated_permissions: bool,
|
||||
) -> bool {
|
||||
if is_known_safe_command(command) {
|
||||
return false;
|
||||
@@ -26,7 +24,8 @@ pub fn requires_initial_appoval(
|
||||
// In restricted sandboxes (ReadOnly/WorkspaceWrite), do not prompt for
|
||||
// non‑escalated, non‑dangerous commands — let the sandbox enforce
|
||||
// restrictions (e.g., block network/write) without a user prompt.
|
||||
if sandbox_permissions.requires_escalated_permissions() {
|
||||
let wants_escalation: bool = with_escalated_permissions;
|
||||
if wants_escalation {
|
||||
return true;
|
||||
}
|
||||
command_might_be_dangerous(command)
|
||||
|
||||
@@ -10,6 +10,7 @@ use crate::error::Result as CodexResult;
|
||||
use crate::features::Feature;
|
||||
use crate::protocol::AgentMessageEvent;
|
||||
use crate::protocol::CompactedItem;
|
||||
use crate::protocol::ErrorEvent;
|
||||
use crate::protocol::EventMsg;
|
||||
use crate::protocol::TaskStartedEvent;
|
||||
use crate::protocol::TurnContextItem;
|
||||
@@ -127,7 +128,9 @@ async fn run_compact_task_inner(
|
||||
continue;
|
||||
}
|
||||
sess.set_total_tokens_full(turn_context.as_ref()).await;
|
||||
let event = EventMsg::Error(e.to_error_event(None));
|
||||
let event = EventMsg::Error(ErrorEvent {
|
||||
message: e.to_string(),
|
||||
});
|
||||
sess.send_event(&turn_context, event).await;
|
||||
return;
|
||||
}
|
||||
@@ -138,13 +141,14 @@ async fn run_compact_task_inner(
|
||||
sess.notify_stream_error(
|
||||
turn_context.as_ref(),
|
||||
format!("Reconnecting... {retries}/{max_retries}"),
|
||||
e,
|
||||
)
|
||||
.await;
|
||||
tokio::time::sleep(delay).await;
|
||||
continue;
|
||||
} else {
|
||||
let event = EventMsg::Error(e.to_error_event(None));
|
||||
let event = EventMsg::Error(ErrorEvent {
|
||||
message: e.to_string(),
|
||||
});
|
||||
sess.send_event(&turn_context, event).await;
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ use crate::codex::TurnContext;
|
||||
use crate::error::Result as CodexResult;
|
||||
use crate::protocol::AgentMessageEvent;
|
||||
use crate::protocol::CompactedItem;
|
||||
use crate::protocol::ErrorEvent;
|
||||
use crate::protocol::EventMsg;
|
||||
use crate::protocol::RolloutItem;
|
||||
use crate::protocol::TaskStartedEvent;
|
||||
@@ -29,9 +30,9 @@ pub(crate) async fn run_remote_compact_task(sess: Arc<Session>, turn_context: Ar
|
||||
|
||||
async fn run_remote_compact_task_inner(sess: &Arc<Session>, turn_context: &Arc<TurnContext>) {
|
||||
if let Err(err) = run_remote_compact_task_inner_impl(sess, turn_context).await {
|
||||
let event = EventMsg::Error(
|
||||
err.to_error_event(Some("Error running remote compact task".to_string())),
|
||||
);
|
||||
let event = EventMsg::Error(ErrorEvent {
|
||||
message: format!("Error running remote compact task: {err}"),
|
||||
});
|
||||
sess.send_event(turn_context, event).await;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ use crate::config::types::Notice;
|
||||
use anyhow::Context;
|
||||
use codex_protocol::config_types::ReasoningEffort;
|
||||
use codex_protocol::config_types::TrustLevel;
|
||||
use codex_utils_tokenizer::warm_model_cache;
|
||||
use std::collections::BTreeMap;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
@@ -230,6 +231,9 @@ impl ConfigDocument {
|
||||
fn apply(&mut self, edit: &ConfigEdit) -> anyhow::Result<bool> {
|
||||
match edit {
|
||||
ConfigEdit::SetModel { model, effort } => Ok({
|
||||
if let Some(model) = &model {
|
||||
warm_model_cache(model)
|
||||
}
|
||||
let mut mutated = false;
|
||||
mutated |= self.write_profile_value(
|
||||
&["model"],
|
||||
@@ -842,7 +846,7 @@ hide_gpt5_1_migration_prompt = true
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn blocking_set_hide_gpt_5_1_codex_max_migration_prompt_preserves_table() {
|
||||
fn blocking_set_hide_arcticfox_migration_prompt_preserves_table() {
|
||||
let tmp = tempdir().expect("tmpdir");
|
||||
let codex_home = tmp.path();
|
||||
std::fs::write(
|
||||
@@ -856,7 +860,7 @@ existing = "value"
|
||||
codex_home,
|
||||
None,
|
||||
&[ConfigEdit::SetNoticeHideModelMigrationPrompt(
|
||||
"hide_gpt-5.1-codex-max_migration_prompt".to_string(),
|
||||
"hide_arcticfox_migration_prompt".to_string(),
|
||||
true,
|
||||
)],
|
||||
)
|
||||
@@ -866,7 +870,7 @@ existing = "value"
|
||||
std::fs::read_to_string(codex_home.join(CONFIG_TOML_FILE)).expect("read config");
|
||||
let expected = r#"[notice]
|
||||
existing = "value"
|
||||
"hide_gpt-5.1-codex-max_migration_prompt" = true
|
||||
hide_arcticfox_migration_prompt = true
|
||||
"#;
|
||||
assert_eq!(contents, expected);
|
||||
}
|
||||
|
||||
@@ -61,9 +61,12 @@ pub mod edit;
|
||||
pub mod profile;
|
||||
pub mod types;
|
||||
|
||||
pub const OPENAI_DEFAULT_MODEL: &str = "gpt-5.1-codex";
|
||||
const OPENAI_DEFAULT_REVIEW_MODEL: &str = "gpt-5.1-codex";
|
||||
pub const GPT_5_CODEX_MEDIUM_MODEL: &str = "gpt-5.1-codex";
|
||||
#[cfg(target_os = "windows")]
|
||||
pub const OPENAI_DEFAULT_MODEL: &str = "arcticfox";
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
pub const OPENAI_DEFAULT_MODEL: &str = "arcticfox";
|
||||
const OPENAI_DEFAULT_REVIEW_MODEL: &str = "arcticfox";
|
||||
pub const GPT_5_CODEX_MEDIUM_MODEL: &str = "arcticfox";
|
||||
|
||||
/// Maximum number of bytes of the documentation that will be embedded. Larger
|
||||
/// files are *silently truncated* to this size so we do not take up too much of
|
||||
@@ -78,7 +81,7 @@ pub struct Config {
|
||||
/// Optional override of model selection.
|
||||
pub model: String,
|
||||
|
||||
/// Model used specifically for review sessions. Defaults to "gpt-5.1-codex-max".
|
||||
/// Model used specifically for review sessions. Defaults to "arcticfox".
|
||||
pub review_model: String,
|
||||
|
||||
pub model_family: ModelFamily,
|
||||
@@ -160,9 +163,6 @@ pub struct Config {
|
||||
/// and turn completions when not focused.
|
||||
pub tui_notifications: Notifications,
|
||||
|
||||
/// Enable ASCII animations and shimmer effects in the TUI.
|
||||
pub animations: bool,
|
||||
|
||||
/// The directory that should be treated as the current working directory
|
||||
/// for the session. All relative paths inside the business-logic layer are
|
||||
/// resolved against this path.
|
||||
@@ -1256,7 +1256,6 @@ impl Config {
|
||||
.as_ref()
|
||||
.map(|t| t.notifications.clone())
|
||||
.unwrap_or_default(),
|
||||
animations: cfg.tui.as_ref().map(|t| t.animations).unwrap_or(true),
|
||||
otel: {
|
||||
let t: OtelConfigToml = cfg.otel.unwrap_or_default();
|
||||
let log_user_prompt = t.log_user_prompt.unwrap_or(false);
|
||||
@@ -3007,7 +3006,6 @@ model_verbosity = "high"
|
||||
notices: Default::default(),
|
||||
disable_paste_burst: false,
|
||||
tui_notifications: Default::default(),
|
||||
animations: true,
|
||||
otel: OtelConfig::default(),
|
||||
},
|
||||
o3_profile_config
|
||||
@@ -3080,7 +3078,6 @@ model_verbosity = "high"
|
||||
notices: Default::default(),
|
||||
disable_paste_burst: false,
|
||||
tui_notifications: Default::default(),
|
||||
animations: true,
|
||||
otel: OtelConfig::default(),
|
||||
};
|
||||
|
||||
@@ -3168,7 +3165,6 @@ model_verbosity = "high"
|
||||
notices: Default::default(),
|
||||
disable_paste_burst: false,
|
||||
tui_notifications: Default::default(),
|
||||
animations: true,
|
||||
otel: OtelConfig::default(),
|
||||
};
|
||||
|
||||
@@ -3242,7 +3238,6 @@ model_verbosity = "high"
|
||||
notices: Default::default(),
|
||||
disable_paste_burst: false,
|
||||
tui_notifications: Default::default(),
|
||||
animations: true,
|
||||
otel: OtelConfig::default(),
|
||||
};
|
||||
|
||||
|
||||
@@ -363,15 +363,6 @@ pub struct Tui {
|
||||
/// Defaults to `true`.
|
||||
#[serde(default)]
|
||||
pub notifications: Notifications,
|
||||
|
||||
/// Enable animations (welcome screen, shimmer effects, spinners).
|
||||
/// Defaults to `true`.
|
||||
#[serde(default = "default_true")]
|
||||
pub animations: bool,
|
||||
}
|
||||
|
||||
const fn default_true() -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
/// Settings for notices we display to users via the tui and app-server clients
|
||||
@@ -387,9 +378,8 @@ pub struct Notice {
|
||||
pub hide_rate_limit_model_nudge: Option<bool>,
|
||||
/// Tracks whether the user has seen the model migration prompt
|
||||
pub hide_gpt5_1_migration_prompt: Option<bool>,
|
||||
/// Tracks whether the user has seen the gpt-5.1-codex-max migration prompt
|
||||
#[serde(rename = "hide_gpt-5.1-codex-max_migration_prompt")]
|
||||
pub hide_gpt_5_1_codex_max_migration_prompt: Option<bool>,
|
||||
/// Tracks whether the user has seen the arcticfox migration prompt
|
||||
pub hide_arcticfox_migration_prompt: Option<bool>,
|
||||
}
|
||||
|
||||
impl Notice {
|
||||
|
||||
@@ -1,14 +1,13 @@
|
||||
use crate::codex::TurnContext;
|
||||
use crate::context_manager::normalize;
|
||||
use crate::truncate::TruncationPolicy;
|
||||
use crate::truncate::approx_token_count;
|
||||
use crate::truncate::truncate_function_output_items_with_policy;
|
||||
use crate::truncate::truncate_text;
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::models::ShellToolCallParams;
|
||||
use codex_protocol::protocol::TokenUsage;
|
||||
use codex_protocol::protocol::TokenUsageInfo;
|
||||
use codex_utils_tokenizer::Tokenizer;
|
||||
use std::ops::Deref;
|
||||
|
||||
/// Transcript of conversation history
|
||||
@@ -75,21 +74,26 @@ impl ContextManager {
|
||||
history
|
||||
}
|
||||
|
||||
// Estimate token usage using byte-based heuristics from the truncation helpers.
|
||||
// This is a coarse lower bound, not a tokenizer-accurate count.
|
||||
// Estimate the number of tokens in the history. Return None if no tokenizer
|
||||
// is available. This does not consider the reasoning traces.
|
||||
// /!\ The value is a lower bound estimate and does not represent the exact
|
||||
// context length.
|
||||
pub(crate) fn estimate_token_count(&self, turn_context: &TurnContext) -> Option<i64> {
|
||||
let model = turn_context.client.get_model();
|
||||
let tokenizer = Tokenizer::for_model(model.as_str()).ok()?;
|
||||
let model_family = turn_context.client.get_model_family();
|
||||
let base_tokens =
|
||||
i64::try_from(approx_token_count(model_family.base_instructions.as_str()))
|
||||
.unwrap_or(i64::MAX);
|
||||
|
||||
let items_tokens = self.items.iter().fold(0i64, |acc, item| {
|
||||
let serialized = serde_json::to_string(item).unwrap_or_default();
|
||||
let item_tokens = i64::try_from(approx_token_count(&serialized)).unwrap_or(i64::MAX);
|
||||
acc.saturating_add(item_tokens)
|
||||
});
|
||||
|
||||
Some(base_tokens.saturating_add(items_tokens))
|
||||
Some(
|
||||
self.items
|
||||
.iter()
|
||||
.map(|item| {
|
||||
serde_json::to_string(&item)
|
||||
.map(|item| tokenizer.count(&item))
|
||||
.unwrap_or_default()
|
||||
})
|
||||
.sum::<i64>()
|
||||
+ tokenizer.count(model_family.base_instructions.as_str()),
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) fn remove_first_item(&mut self) {
|
||||
@@ -131,47 +135,6 @@ impl ContextManager {
|
||||
normalize::remove_orphan_outputs(&mut self.items);
|
||||
}
|
||||
|
||||
fn get_shell_truncation_policy(&self, call_id: &str) -> Option<TruncationPolicy> {
|
||||
let call = self.get_call_for_call_id(call_id)?;
|
||||
match call {
|
||||
ResponseItem::FunctionCall { arguments, .. } => {
|
||||
let shell_tool_call_params =
|
||||
serde_json::from_str::<ShellToolCallParams>(&arguments).ok()?;
|
||||
Self::create_truncation_policy(
|
||||
shell_tool_call_params.max_output_tokens,
|
||||
shell_tool_call_params.max_output_chars,
|
||||
)
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn create_truncation_policy(
|
||||
max_output_tokens: Option<usize>,
|
||||
max_output_chars: Option<usize>,
|
||||
) -> Option<TruncationPolicy> {
|
||||
if let Some(max_output_tokens) = max_output_tokens {
|
||||
Some(TruncationPolicy::Tokens(max_output_tokens))
|
||||
} else {
|
||||
max_output_chars.map(TruncationPolicy::Bytes)
|
||||
}
|
||||
}
|
||||
|
||||
fn get_call_for_call_id(&self, call_id: &str) -> Option<ResponseItem> {
|
||||
self.items.iter().find_map(|item| match item {
|
||||
ResponseItem::FunctionCall {
|
||||
call_id: existing, ..
|
||||
} => {
|
||||
if existing == call_id {
|
||||
Some(item.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns a clone of the contents in the transcript.
|
||||
fn contents(&self) -> Vec<ResponseItem> {
|
||||
self.items.clone()
|
||||
@@ -185,12 +148,13 @@ impl ContextManager {
|
||||
let policy_with_serialization_budget = policy.mul(1.2);
|
||||
match item {
|
||||
ResponseItem::FunctionCallOutput { call_id, output } => {
|
||||
let truncation_policy_override = self.get_shell_truncation_policy(call_id);
|
||||
let truncation_policy =
|
||||
truncation_policy_override.unwrap_or(policy_with_serialization_budget);
|
||||
let truncated = truncate_text(output.content.as_str(), truncation_policy);
|
||||
let truncated =
|
||||
truncate_text(output.content.as_str(), policy_with_serialization_budget);
|
||||
let truncated_items = output.content_items.as_ref().map(|items| {
|
||||
truncate_function_output_items_with_policy(items, truncation_policy)
|
||||
truncate_function_output_items_with_policy(
|
||||
items,
|
||||
policy_with_serialization_budget,
|
||||
)
|
||||
});
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
|
||||
@@ -6,7 +6,6 @@ use crate::codex::TurnContext;
|
||||
use crate::protocol::AskForApproval;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use crate::shell::Shell;
|
||||
use crate::shell::default_user_shell;
|
||||
use codex_protocol::config_types::SandboxMode;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
@@ -29,7 +28,7 @@ pub(crate) struct EnvironmentContext {
|
||||
pub sandbox_mode: Option<SandboxMode>,
|
||||
pub network_access: Option<NetworkAccess>,
|
||||
pub writable_roots: Option<Vec<PathBuf>>,
|
||||
pub shell: Shell,
|
||||
pub shell: Option<Shell>,
|
||||
}
|
||||
|
||||
impl EnvironmentContext {
|
||||
@@ -37,7 +36,7 @@ impl EnvironmentContext {
|
||||
cwd: Option<PathBuf>,
|
||||
approval_policy: Option<AskForApproval>,
|
||||
sandbox_policy: Option<SandboxPolicy>,
|
||||
shell: Shell,
|
||||
shell: Option<Shell>,
|
||||
) -> Self {
|
||||
Self {
|
||||
cwd,
|
||||
@@ -111,7 +110,7 @@ impl EnvironmentContext {
|
||||
} else {
|
||||
None
|
||||
};
|
||||
EnvironmentContext::new(cwd, approval_policy, sandbox_policy, default_user_shell())
|
||||
EnvironmentContext::new(cwd, approval_policy, sandbox_policy, None)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -122,7 +121,7 @@ impl From<&TurnContext> for EnvironmentContext {
|
||||
Some(turn_context.approval_policy),
|
||||
Some(turn_context.sandbox_policy.clone()),
|
||||
// Shell is not configurable from turn to turn
|
||||
default_user_shell(),
|
||||
None,
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -170,9 +169,11 @@ impl EnvironmentContext {
|
||||
}
|
||||
lines.push(" </writable_roots>".to_string());
|
||||
}
|
||||
|
||||
let shell_name = self.shell.name();
|
||||
lines.push(format!(" <shell>{shell_name}</shell>"));
|
||||
if let Some(shell) = self.shell
|
||||
&& let Some(shell_name) = shell.name()
|
||||
{
|
||||
lines.push(format!(" <shell>{shell_name}</shell>"));
|
||||
}
|
||||
lines.push(ENVIRONMENT_CONTEXT_CLOSE_TAG.to_string());
|
||||
lines.join("\n")
|
||||
}
|
||||
@@ -192,18 +193,12 @@ impl From<EnvironmentContext> for ResponseItem {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::shell::ShellType;
|
||||
use crate::shell::BashShell;
|
||||
use crate::shell::ZshShell;
|
||||
|
||||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
fn fake_shell() -> Shell {
|
||||
Shell {
|
||||
shell_type: ShellType::Bash,
|
||||
shell_path: PathBuf::from("/bin/bash"),
|
||||
}
|
||||
}
|
||||
|
||||
fn workspace_write_policy(writable_roots: Vec<&str>, network_access: bool) -> SandboxPolicy {
|
||||
SandboxPolicy::WorkspaceWrite {
|
||||
writable_roots: writable_roots.into_iter().map(PathBuf::from).collect(),
|
||||
@@ -219,7 +214,7 @@ mod tests {
|
||||
Some(PathBuf::from("/repo")),
|
||||
Some(AskForApproval::OnRequest),
|
||||
Some(workspace_write_policy(vec!["/repo", "/tmp"], false)),
|
||||
fake_shell(),
|
||||
None,
|
||||
);
|
||||
|
||||
let expected = r#"<environment_context>
|
||||
@@ -231,7 +226,6 @@ mod tests {
|
||||
<root>/repo</root>
|
||||
<root>/tmp</root>
|
||||
</writable_roots>
|
||||
<shell>bash</shell>
|
||||
</environment_context>"#;
|
||||
|
||||
assert_eq!(context.serialize_to_xml(), expected);
|
||||
@@ -243,14 +237,13 @@ mod tests {
|
||||
None,
|
||||
Some(AskForApproval::Never),
|
||||
Some(SandboxPolicy::ReadOnly),
|
||||
fake_shell(),
|
||||
None,
|
||||
);
|
||||
|
||||
let expected = r#"<environment_context>
|
||||
<approval_policy>never</approval_policy>
|
||||
<sandbox_mode>read-only</sandbox_mode>
|
||||
<network_access>restricted</network_access>
|
||||
<shell>bash</shell>
|
||||
</environment_context>"#;
|
||||
|
||||
assert_eq!(context.serialize_to_xml(), expected);
|
||||
@@ -262,14 +255,13 @@ mod tests {
|
||||
None,
|
||||
Some(AskForApproval::OnFailure),
|
||||
Some(SandboxPolicy::DangerFullAccess),
|
||||
fake_shell(),
|
||||
None,
|
||||
);
|
||||
|
||||
let expected = r#"<environment_context>
|
||||
<approval_policy>on-failure</approval_policy>
|
||||
<sandbox_mode>danger-full-access</sandbox_mode>
|
||||
<network_access>enabled</network_access>
|
||||
<shell>bash</shell>
|
||||
</environment_context>"#;
|
||||
|
||||
assert_eq!(context.serialize_to_xml(), expected);
|
||||
@@ -282,13 +274,13 @@ mod tests {
|
||||
Some(PathBuf::from("/repo")),
|
||||
Some(AskForApproval::OnRequest),
|
||||
Some(workspace_write_policy(vec!["/repo"], false)),
|
||||
fake_shell(),
|
||||
None,
|
||||
);
|
||||
let context2 = EnvironmentContext::new(
|
||||
Some(PathBuf::from("/repo")),
|
||||
Some(AskForApproval::Never),
|
||||
Some(workspace_write_policy(vec!["/repo"], true)),
|
||||
fake_shell(),
|
||||
None,
|
||||
);
|
||||
assert!(!context1.equals_except_shell(&context2));
|
||||
}
|
||||
@@ -299,13 +291,13 @@ mod tests {
|
||||
Some(PathBuf::from("/repo")),
|
||||
Some(AskForApproval::OnRequest),
|
||||
Some(SandboxPolicy::new_read_only_policy()),
|
||||
fake_shell(),
|
||||
None,
|
||||
);
|
||||
let context2 = EnvironmentContext::new(
|
||||
Some(PathBuf::from("/repo")),
|
||||
Some(AskForApproval::OnRequest),
|
||||
Some(SandboxPolicy::new_workspace_write_policy()),
|
||||
fake_shell(),
|
||||
None,
|
||||
);
|
||||
|
||||
assert!(!context1.equals_except_shell(&context2));
|
||||
@@ -317,13 +309,13 @@ mod tests {
|
||||
Some(PathBuf::from("/repo")),
|
||||
Some(AskForApproval::OnRequest),
|
||||
Some(workspace_write_policy(vec!["/repo", "/tmp", "/var"], false)),
|
||||
fake_shell(),
|
||||
None,
|
||||
);
|
||||
let context2 = EnvironmentContext::new(
|
||||
Some(PathBuf::from("/repo")),
|
||||
Some(AskForApproval::OnRequest),
|
||||
Some(workspace_write_policy(vec!["/repo", "/tmp"], true)),
|
||||
fake_shell(),
|
||||
None,
|
||||
);
|
||||
|
||||
assert!(!context1.equals_except_shell(&context2));
|
||||
@@ -335,19 +327,17 @@ mod tests {
|
||||
Some(PathBuf::from("/repo")),
|
||||
Some(AskForApproval::OnRequest),
|
||||
Some(workspace_write_policy(vec!["/repo"], false)),
|
||||
Shell {
|
||||
shell_type: ShellType::Bash,
|
||||
Some(Shell::Bash(BashShell {
|
||||
shell_path: "/bin/bash".into(),
|
||||
},
|
||||
})),
|
||||
);
|
||||
let context2 = EnvironmentContext::new(
|
||||
Some(PathBuf::from("/repo")),
|
||||
Some(AskForApproval::OnRequest),
|
||||
Some(workspace_write_policy(vec!["/repo"], false)),
|
||||
Shell {
|
||||
shell_type: ShellType::Zsh,
|
||||
Some(Shell::Zsh(ZshShell {
|
||||
shell_path: "/bin/zsh".into(),
|
||||
},
|
||||
})),
|
||||
);
|
||||
|
||||
assert!(context1.equals_except_shell(&context2));
|
||||
|
||||
@@ -10,8 +10,6 @@ use chrono::Local;
|
||||
use chrono::Utc;
|
||||
use codex_async_utils::CancelErr;
|
||||
use codex_protocol::ConversationId;
|
||||
use codex_protocol::protocol::CodexErrorInfo;
|
||||
use codex_protocol::protocol::ErrorEvent;
|
||||
use codex_protocol::protocol::RateLimitSnapshot;
|
||||
use reqwest::StatusCode;
|
||||
use serde_json;
|
||||
@@ -432,57 +430,6 @@ impl CodexErr {
|
||||
pub fn downcast_ref<T: std::any::Any>(&self) -> Option<&T> {
|
||||
(self as &dyn std::any::Any).downcast_ref::<T>()
|
||||
}
|
||||
|
||||
/// Translate core error to client-facing protocol error.
|
||||
pub fn to_codex_protocol_error(&self) -> CodexErrorInfo {
|
||||
match self {
|
||||
CodexErr::ContextWindowExceeded => CodexErrorInfo::ContextWindowExceeded,
|
||||
CodexErr::UsageLimitReached(_)
|
||||
| CodexErr::QuotaExceeded
|
||||
| CodexErr::UsageNotIncluded => CodexErrorInfo::UsageLimitExceeded,
|
||||
CodexErr::RetryLimit(_) => CodexErrorInfo::ResponseTooManyFailedAttempts {
|
||||
http_status_code: self.http_status_code_value(),
|
||||
},
|
||||
CodexErr::ConnectionFailed(_) => CodexErrorInfo::HttpConnectionFailed {
|
||||
http_status_code: self.http_status_code_value(),
|
||||
},
|
||||
CodexErr::ResponseStreamFailed(_) => CodexErrorInfo::ResponseStreamConnectionFailed {
|
||||
http_status_code: self.http_status_code_value(),
|
||||
},
|
||||
CodexErr::RefreshTokenFailed(_) => CodexErrorInfo::Unauthorized,
|
||||
CodexErr::SessionConfiguredNotFirstEvent
|
||||
| CodexErr::InternalServerError
|
||||
| CodexErr::InternalAgentDied => CodexErrorInfo::InternalServerError,
|
||||
CodexErr::UnsupportedOperation(_) | CodexErr::ConversationNotFound(_) => {
|
||||
CodexErrorInfo::BadRequest
|
||||
}
|
||||
CodexErr::Sandbox(_) => CodexErrorInfo::SandboxError,
|
||||
_ => CodexErrorInfo::Other,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_error_event(&self, message_prefix: Option<String>) -> ErrorEvent {
|
||||
let error_message = self.to_string();
|
||||
let message: String = match message_prefix {
|
||||
Some(prefix) => format!("{prefix}: {error_message}"),
|
||||
None => error_message,
|
||||
};
|
||||
ErrorEvent {
|
||||
message,
|
||||
codex_error_info: Some(self.to_codex_protocol_error()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn http_status_code_value(&self) -> Option<u16> {
|
||||
let http_status_code = match self {
|
||||
CodexErr::RetryLimit(err) => Some(err.status),
|
||||
CodexErr::UnexpectedStatus(err) => Some(err.status),
|
||||
CodexErr::ConnectionFailed(err) => err.source.status(),
|
||||
CodexErr::ResponseStreamFailed(err) => err.source.status(),
|
||||
_ => None,
|
||||
};
|
||||
http_status_code.as_ref().map(StatusCode::as_u16)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_error_message_ui(e: &CodexErr) -> String {
|
||||
@@ -531,10 +478,6 @@ mod tests {
|
||||
use chrono::Utc;
|
||||
use codex_protocol::protocol::RateLimitWindow;
|
||||
use pretty_assertions::assert_eq;
|
||||
use reqwest::Response;
|
||||
use reqwest::ResponseBuilderExt;
|
||||
use reqwest::StatusCode;
|
||||
use reqwest::Url;
|
||||
|
||||
fn rate_limit_snapshot() -> RateLimitSnapshot {
|
||||
let primary_reset_at = Utc
|
||||
@@ -556,7 +499,6 @@ mod tests {
|
||||
window_minutes: Some(120),
|
||||
resets_at: Some(secondary_reset_at),
|
||||
}),
|
||||
credits: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -630,33 +572,6 @@ mod tests {
|
||||
assert_eq!(get_error_message_ui(&err), "stdout only");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn to_error_event_handles_response_stream_failed() {
|
||||
let response = http::Response::builder()
|
||||
.status(StatusCode::TOO_MANY_REQUESTS)
|
||||
.url(Url::parse("http://example.com").unwrap())
|
||||
.body("")
|
||||
.unwrap();
|
||||
let source = Response::from(response).error_for_status_ref().unwrap_err();
|
||||
let err = CodexErr::ResponseStreamFailed(ResponseStreamFailed {
|
||||
source,
|
||||
request_id: Some("req-123".to_string()),
|
||||
});
|
||||
|
||||
let event = err.to_error_event(Some("prefix".to_string()));
|
||||
|
||||
assert_eq!(
|
||||
event.message,
|
||||
"prefix: Error while reading the server response: HTTP status client error (429 Too Many Requests) for url (http://example.com/), request id: req-123"
|
||||
);
|
||||
assert_eq!(
|
||||
event.codex_error_info,
|
||||
Some(CodexErrorInfo::ResponseStreamConnectionFailed {
|
||||
http_status_code: Some(429)
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sandbox_denied_reports_exit_code_when_no_output_available() {
|
||||
let output = ExecToolCallOutput {
|
||||
|
||||
@@ -14,7 +14,6 @@ use tokio::io::AsyncRead;
|
||||
use tokio::io::AsyncReadExt;
|
||||
use tokio::io::BufReader;
|
||||
use tokio::process::Child;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::Result;
|
||||
@@ -29,9 +28,8 @@ use crate::sandboxing::ExecEnv;
|
||||
use crate::sandboxing::SandboxManager;
|
||||
use crate::spawn::StdioPolicy;
|
||||
use crate::spawn::spawn_child_async;
|
||||
use crate::text_encoding::bytes_to_string_smart;
|
||||
|
||||
pub const DEFAULT_EXEC_COMMAND_TIMEOUT_MS: u64 = 10_000;
|
||||
const DEFAULT_TIMEOUT_MS: u64 = 10_000;
|
||||
|
||||
// Hardcode these since it does not seem worth including the libc crate just
|
||||
// for these.
|
||||
@@ -48,61 +46,20 @@ const AGGREGATE_BUFFER_INITIAL_CAPACITY: usize = 8 * 1024; // 8 KiB
|
||||
/// Aggregation still collects full output; only the live event stream is capped.
|
||||
pub(crate) const MAX_EXEC_OUTPUT_DELTAS_PER_CALL: usize = 10_000;
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ExecParams {
|
||||
pub command: Vec<String>,
|
||||
pub cwd: PathBuf,
|
||||
pub expiration: ExecExpiration,
|
||||
pub timeout_ms: Option<u64>,
|
||||
pub env: HashMap<String, String>,
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
pub justification: Option<String>,
|
||||
pub arg0: Option<String>,
|
||||
pub max_output_tokens: Option<usize>,
|
||||
pub max_output_chars: Option<usize>,
|
||||
}
|
||||
|
||||
/// Mechanism to terminate an exec invocation before it finishes naturally.
|
||||
#[derive(Debug)]
|
||||
pub enum ExecExpiration {
|
||||
Timeout(Duration),
|
||||
DefaultTimeout,
|
||||
Cancellation(CancellationToken),
|
||||
}
|
||||
|
||||
impl From<Option<u64>> for ExecExpiration {
|
||||
fn from(timeout_ms: Option<u64>) -> Self {
|
||||
timeout_ms.map_or(ExecExpiration::DefaultTimeout, |timeout_ms| {
|
||||
ExecExpiration::Timeout(Duration::from_millis(timeout_ms))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl From<u64> for ExecExpiration {
|
||||
fn from(timeout_ms: u64) -> Self {
|
||||
ExecExpiration::Timeout(Duration::from_millis(timeout_ms))
|
||||
}
|
||||
}
|
||||
|
||||
impl ExecExpiration {
|
||||
async fn wait(self) {
|
||||
match self {
|
||||
ExecExpiration::Timeout(duration) => tokio::time::sleep(duration).await,
|
||||
ExecExpiration::DefaultTimeout => {
|
||||
tokio::time::sleep(Duration::from_millis(DEFAULT_EXEC_COMMAND_TIMEOUT_MS)).await
|
||||
}
|
||||
ExecExpiration::Cancellation(cancel) => {
|
||||
cancel.cancelled().await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// If ExecExpiration is a timeout, returns the timeout in milliseconds.
|
||||
pub(crate) fn timeout_ms(&self) -> Option<u64> {
|
||||
match self {
|
||||
ExecExpiration::Timeout(duration) => Some(duration.as_millis() as u64),
|
||||
ExecExpiration::DefaultTimeout => Some(DEFAULT_EXEC_COMMAND_TIMEOUT_MS),
|
||||
ExecExpiration::Cancellation(_) => None,
|
||||
}
|
||||
impl ExecParams {
|
||||
pub fn timeout_duration(&self) -> Duration {
|
||||
Duration::from_millis(self.timeout_ms.unwrap_or(DEFAULT_TIMEOUT_MS))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -138,13 +95,11 @@ pub async fn process_exec_tool_call(
|
||||
let ExecParams {
|
||||
command,
|
||||
cwd,
|
||||
expiration,
|
||||
timeout_ms,
|
||||
env,
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
arg0: _,
|
||||
max_output_tokens,
|
||||
max_output_chars,
|
||||
} = params;
|
||||
|
||||
let (program, args) = command.split_first().ok_or_else(|| {
|
||||
@@ -159,17 +114,15 @@ pub async fn process_exec_tool_call(
|
||||
args: args.to_vec(),
|
||||
cwd,
|
||||
env,
|
||||
expiration,
|
||||
timeout_ms,
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
max_output_tokens,
|
||||
max_output_chars,
|
||||
};
|
||||
|
||||
let manager = SandboxManager::new();
|
||||
let exec_env = manager
|
||||
.transform(
|
||||
spec,
|
||||
&spec,
|
||||
sandbox_policy,
|
||||
sandbox_type,
|
||||
sandbox_cwd,
|
||||
@@ -178,7 +131,7 @@ pub async fn process_exec_tool_call(
|
||||
.map_err(CodexErr::from)?;
|
||||
|
||||
// Route through the sandboxing module for a single, unified execution path.
|
||||
crate::sandboxing::execute_env(exec_env, sandbox_policy, stdout_stream).await
|
||||
crate::sandboxing::execute_env(&exec_env, sandbox_policy, stdout_stream).await
|
||||
}
|
||||
|
||||
pub(crate) async fn execute_exec_env(
|
||||
@@ -190,25 +143,21 @@ pub(crate) async fn execute_exec_env(
|
||||
command,
|
||||
cwd,
|
||||
env,
|
||||
expiration,
|
||||
timeout_ms,
|
||||
sandbox,
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
arg0,
|
||||
max_output_tokens,
|
||||
max_output_chars,
|
||||
} = env;
|
||||
|
||||
let params = ExecParams {
|
||||
command,
|
||||
cwd,
|
||||
expiration,
|
||||
timeout_ms,
|
||||
env,
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
arg0,
|
||||
max_output_tokens,
|
||||
max_output_chars,
|
||||
};
|
||||
|
||||
let start = Instant::now();
|
||||
@@ -229,12 +178,9 @@ async fn exec_windows_sandbox(
|
||||
command,
|
||||
cwd,
|
||||
env,
|
||||
expiration,
|
||||
timeout_ms,
|
||||
..
|
||||
} = params;
|
||||
// TODO(iceweasel-oai): run_windows_sandbox_capture should support all
|
||||
// variants of ExecExpiration, not just timeout.
|
||||
let timeout_ms = expiration.timeout_ms();
|
||||
|
||||
let policy_str = serde_json::to_string(sandbox_policy).map_err(|err| {
|
||||
CodexErr::Io(io::Error::other(format!(
|
||||
@@ -468,7 +414,7 @@ impl StreamOutput<String> {
|
||||
impl StreamOutput<Vec<u8>> {
|
||||
pub fn from_utf8_lossy(&self) -> StreamOutput<String> {
|
||||
StreamOutput {
|
||||
text: bytes_to_string_smart(&self.text),
|
||||
text: String::from_utf8_lossy(&self.text).to_string(),
|
||||
truncated_after_lines: self.truncated_after_lines,
|
||||
}
|
||||
}
|
||||
@@ -502,12 +448,12 @@ async fn exec(
|
||||
{
|
||||
return exec_windows_sandbox(params, sandbox_policy).await;
|
||||
}
|
||||
let timeout = params.timeout_duration();
|
||||
let ExecParams {
|
||||
command,
|
||||
cwd,
|
||||
env,
|
||||
arg0,
|
||||
expiration,
|
||||
..
|
||||
} = params;
|
||||
|
||||
@@ -528,14 +474,14 @@ async fn exec(
|
||||
env,
|
||||
)
|
||||
.await?;
|
||||
consume_truncated_output(child, expiration, stdout_stream).await
|
||||
consume_truncated_output(child, timeout, stdout_stream).await
|
||||
}
|
||||
|
||||
/// Consumes the output of a child process, truncating it so it is suitable for
|
||||
/// use as the output of a `shell` tool call. Also enforces specified timeout.
|
||||
async fn consume_truncated_output(
|
||||
mut child: Child,
|
||||
expiration: ExecExpiration,
|
||||
timeout: Duration,
|
||||
stdout_stream: Option<StdoutStream>,
|
||||
) -> Result<RawExecToolCallOutput> {
|
||||
// Both stdout and stderr were configured with `Stdio::piped()`
|
||||
@@ -569,14 +515,20 @@ async fn consume_truncated_output(
|
||||
));
|
||||
|
||||
let (exit_status, timed_out) = tokio::select! {
|
||||
status_result = child.wait() => {
|
||||
let exit_status = status_result?;
|
||||
(exit_status, false)
|
||||
}
|
||||
_ = expiration.wait() => {
|
||||
kill_child_process_group(&mut child)?;
|
||||
child.start_kill()?;
|
||||
(synthetic_exit_status(EXIT_CODE_SIGNAL_BASE + TIMEOUT_CODE), true)
|
||||
result = tokio::time::timeout(timeout, child.wait()) => {
|
||||
match result {
|
||||
Ok(status_result) => {
|
||||
let exit_status = status_result?;
|
||||
(exit_status, false)
|
||||
}
|
||||
Err(_) => {
|
||||
// timeout
|
||||
kill_child_process_group(&mut child)?;
|
||||
child.start_kill()?;
|
||||
// Debatable whether `child.wait().await` should be called here.
|
||||
(synthetic_exit_status(EXIT_CODE_SIGNAL_BASE + TIMEOUT_CODE), true)
|
||||
}
|
||||
}
|
||||
}
|
||||
_ = tokio::signal::ctrl_c() => {
|
||||
kill_child_process_group(&mut child)?;
|
||||
@@ -828,15 +780,6 @@ mod tests {
|
||||
#[cfg(unix)]
|
||||
#[tokio::test]
|
||||
async fn kill_child_process_group_kills_grandchildren_on_timeout() -> Result<()> {
|
||||
// On Linux/macOS, /bin/bash is typically present; on FreeBSD/OpenBSD,
|
||||
// prefer /bin/sh to avoid NotFound errors.
|
||||
#[cfg(any(target_os = "freebsd", target_os = "openbsd"))]
|
||||
let command = vec![
|
||||
"/bin/sh".to_string(),
|
||||
"-c".to_string(),
|
||||
"sleep 60 & echo $!; sleep 60".to_string(),
|
||||
];
|
||||
#[cfg(all(unix, not(any(target_os = "freebsd", target_os = "openbsd"))))]
|
||||
let command = vec![
|
||||
"/bin/bash".to_string(),
|
||||
"-c".to_string(),
|
||||
@@ -846,13 +789,11 @@ mod tests {
|
||||
let params = ExecParams {
|
||||
command,
|
||||
cwd: std::env::current_dir()?,
|
||||
expiration: 500.into(),
|
||||
timeout_ms: Some(500),
|
||||
env,
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
arg0: None,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
};
|
||||
|
||||
let output = exec(params, SandboxType::None, &SandboxPolicy::ReadOnly, None).await?;
|
||||
@@ -882,64 +823,4 @@ mod tests {
|
||||
assert!(killed, "grandchild process with pid {pid} is still alive");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn process_exec_tool_call_respects_cancellation_token() -> Result<()> {
|
||||
let command = long_running_command();
|
||||
let cwd = std::env::current_dir()?;
|
||||
let env: HashMap<String, String> = std::env::vars().collect();
|
||||
let cancel_token = CancellationToken::new();
|
||||
let cancel_tx = cancel_token.clone();
|
||||
let params = ExecParams {
|
||||
command,
|
||||
cwd: cwd.clone(),
|
||||
expiration: ExecExpiration::Cancellation(cancel_token),
|
||||
env,
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
arg0: None,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
};
|
||||
tokio::spawn(async move {
|
||||
tokio::time::sleep(Duration::from_millis(1_000)).await;
|
||||
cancel_tx.cancel();
|
||||
});
|
||||
let result = process_exec_tool_call(
|
||||
params,
|
||||
SandboxType::None,
|
||||
&SandboxPolicy::DangerFullAccess,
|
||||
cwd.as_path(),
|
||||
&None,
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
let output = match result {
|
||||
Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) => output,
|
||||
other => panic!("expected timeout error, got {other:?}"),
|
||||
};
|
||||
assert!(output.timed_out);
|
||||
assert_eq!(output.exit_code, EXEC_TIMEOUT_EXIT_CODE);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
fn long_running_command() -> Vec<String> {
|
||||
vec![
|
||||
"/bin/sh".to_string(),
|
||||
"-c".to_string(),
|
||||
"sleep 30".to_string(),
|
||||
]
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
fn long_running_command() -> Vec<String> {
|
||||
vec![
|
||||
"powershell.exe".to_string(),
|
||||
"-NonInteractive".to_string(),
|
||||
"-NoLogo".to_string(),
|
||||
"-Command".to_string(),
|
||||
"Start-Sleep -Seconds 30".to_string(),
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,365 +0,0 @@
|
||||
use std::io::ErrorKind;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::command_safety::is_dangerous_command::requires_initial_appoval;
|
||||
use codex_execpolicy::Decision;
|
||||
use codex_execpolicy::Evaluation;
|
||||
use codex_execpolicy::Policy;
|
||||
use codex_execpolicy::PolicyParser;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use thiserror::Error;
|
||||
use tokio::fs;
|
||||
|
||||
use crate::bash::parse_shell_lc_plain_commands;
|
||||
use crate::features::Feature;
|
||||
use crate::features::Features;
|
||||
use crate::sandboxing::SandboxPermissions;
|
||||
use crate::tools::sandboxing::ApprovalRequirement;
|
||||
|
||||
const FORBIDDEN_REASON: &str = "execpolicy forbids this command";
|
||||
const PROMPT_REASON: &str = "execpolicy requires approval for this command";
|
||||
const POLICY_DIR_NAME: &str = "policy";
|
||||
const POLICY_EXTENSION: &str = "codexpolicy";
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum ExecPolicyError {
|
||||
#[error("failed to read execpolicy files from {dir}: {source}")]
|
||||
ReadDir {
|
||||
dir: PathBuf,
|
||||
source: std::io::Error,
|
||||
},
|
||||
|
||||
#[error("failed to read execpolicy file {path}: {source}")]
|
||||
ReadFile {
|
||||
path: PathBuf,
|
||||
source: std::io::Error,
|
||||
},
|
||||
|
||||
#[error("failed to parse execpolicy file {path}: {source}")]
|
||||
ParsePolicy {
|
||||
path: String,
|
||||
source: codex_execpolicy::Error,
|
||||
},
|
||||
}
|
||||
|
||||
pub(crate) async fn exec_policy_for(
|
||||
features: &Features,
|
||||
codex_home: &Path,
|
||||
) -> Result<Arc<Policy>, ExecPolicyError> {
|
||||
if !features.enabled(Feature::ExecPolicy) {
|
||||
return Ok(Arc::new(Policy::empty()));
|
||||
}
|
||||
|
||||
let policy_dir = codex_home.join(POLICY_DIR_NAME);
|
||||
let policy_paths = collect_policy_files(&policy_dir).await?;
|
||||
|
||||
let mut parser = PolicyParser::new();
|
||||
for policy_path in &policy_paths {
|
||||
let contents =
|
||||
fs::read_to_string(policy_path)
|
||||
.await
|
||||
.map_err(|source| ExecPolicyError::ReadFile {
|
||||
path: policy_path.clone(),
|
||||
source,
|
||||
})?;
|
||||
let identifier = policy_path.to_string_lossy().to_string();
|
||||
parser
|
||||
.parse(&identifier, &contents)
|
||||
.map_err(|source| ExecPolicyError::ParsePolicy {
|
||||
path: identifier,
|
||||
source,
|
||||
})?;
|
||||
}
|
||||
|
||||
let policy = Arc::new(parser.build());
|
||||
tracing::debug!(
|
||||
"loaded execpolicy from {} files in {}",
|
||||
policy_paths.len(),
|
||||
policy_dir.display()
|
||||
);
|
||||
|
||||
Ok(policy)
|
||||
}
|
||||
|
||||
fn evaluate_with_policy(
|
||||
policy: &Policy,
|
||||
command: &[String],
|
||||
approval_policy: AskForApproval,
|
||||
) -> Option<ApprovalRequirement> {
|
||||
let commands = parse_shell_lc_plain_commands(command).unwrap_or_else(|| vec![command.to_vec()]);
|
||||
let evaluation = policy.check_multiple(commands.iter());
|
||||
|
||||
match evaluation {
|
||||
Evaluation::Match { decision, .. } => match decision {
|
||||
Decision::Forbidden => Some(ApprovalRequirement::Forbidden {
|
||||
reason: FORBIDDEN_REASON.to_string(),
|
||||
}),
|
||||
Decision::Prompt => {
|
||||
let reason = PROMPT_REASON.to_string();
|
||||
if matches!(approval_policy, AskForApproval::Never) {
|
||||
Some(ApprovalRequirement::Forbidden { reason })
|
||||
} else {
|
||||
Some(ApprovalRequirement::NeedsApproval {
|
||||
reason: Some(reason),
|
||||
})
|
||||
}
|
||||
}
|
||||
Decision::Allow => Some(ApprovalRequirement::Skip),
|
||||
},
|
||||
Evaluation::NoMatch { .. } => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn create_approval_requirement_for_command(
|
||||
policy: &Policy,
|
||||
command: &[String],
|
||||
approval_policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
sandbox_permissions: SandboxPermissions,
|
||||
) -> ApprovalRequirement {
|
||||
if let Some(requirement) = evaluate_with_policy(policy, command, approval_policy) {
|
||||
return requirement;
|
||||
}
|
||||
|
||||
if requires_initial_appoval(
|
||||
approval_policy,
|
||||
sandbox_policy,
|
||||
command,
|
||||
sandbox_permissions,
|
||||
) {
|
||||
ApprovalRequirement::NeedsApproval { reason: None }
|
||||
} else {
|
||||
ApprovalRequirement::Skip
|
||||
}
|
||||
}
|
||||
|
||||
async fn collect_policy_files(dir: &Path) -> Result<Vec<PathBuf>, ExecPolicyError> {
|
||||
let mut read_dir = match fs::read_dir(dir).await {
|
||||
Ok(read_dir) => read_dir,
|
||||
Err(err) if err.kind() == ErrorKind::NotFound => return Ok(Vec::new()),
|
||||
Err(source) => {
|
||||
return Err(ExecPolicyError::ReadDir {
|
||||
dir: dir.to_path_buf(),
|
||||
source,
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
let mut policy_paths = Vec::new();
|
||||
while let Some(entry) =
|
||||
read_dir
|
||||
.next_entry()
|
||||
.await
|
||||
.map_err(|source| ExecPolicyError::ReadDir {
|
||||
dir: dir.to_path_buf(),
|
||||
source,
|
||||
})?
|
||||
{
|
||||
let path = entry.path();
|
||||
let file_type = entry
|
||||
.file_type()
|
||||
.await
|
||||
.map_err(|source| ExecPolicyError::ReadDir {
|
||||
dir: dir.to_path_buf(),
|
||||
source,
|
||||
})?;
|
||||
|
||||
if path
|
||||
.extension()
|
||||
.and_then(|ext| ext.to_str())
|
||||
.is_some_and(|ext| ext == POLICY_EXTENSION)
|
||||
&& file_type.is_file()
|
||||
{
|
||||
policy_paths.push(path);
|
||||
}
|
||||
}
|
||||
|
||||
policy_paths.sort();
|
||||
|
||||
Ok(policy_paths)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::features::Feature;
|
||||
use crate::features::Features;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use pretty_assertions::assert_eq;
|
||||
use std::fs;
|
||||
use tempfile::tempdir;
|
||||
|
||||
#[tokio::test]
|
||||
async fn returns_empty_policy_when_feature_disabled() {
|
||||
let mut features = Features::with_defaults();
|
||||
features.disable(Feature::ExecPolicy);
|
||||
let temp_dir = tempdir().expect("create temp dir");
|
||||
|
||||
let policy = exec_policy_for(&features, temp_dir.path())
|
||||
.await
|
||||
.expect("policy result");
|
||||
|
||||
let commands = [vec!["rm".to_string()]];
|
||||
assert!(matches!(
|
||||
policy.check_multiple(commands.iter()),
|
||||
Evaluation::NoMatch { .. }
|
||||
));
|
||||
assert!(!temp_dir.path().join(POLICY_DIR_NAME).exists());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn collect_policy_files_returns_empty_when_dir_missing() {
|
||||
let temp_dir = tempdir().expect("create temp dir");
|
||||
|
||||
let policy_dir = temp_dir.path().join(POLICY_DIR_NAME);
|
||||
let files = collect_policy_files(&policy_dir)
|
||||
.await
|
||||
.expect("collect policy files");
|
||||
|
||||
assert!(files.is_empty());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn loads_policies_from_policy_subdirectory() {
|
||||
let temp_dir = tempdir().expect("create temp dir");
|
||||
let policy_dir = temp_dir.path().join(POLICY_DIR_NAME);
|
||||
fs::create_dir_all(&policy_dir).expect("create policy dir");
|
||||
fs::write(
|
||||
policy_dir.join("deny.codexpolicy"),
|
||||
r#"prefix_rule(pattern=["rm"], decision="forbidden")"#,
|
||||
)
|
||||
.expect("write policy file");
|
||||
|
||||
let policy = exec_policy_for(&Features::with_defaults(), temp_dir.path())
|
||||
.await
|
||||
.expect("policy result");
|
||||
let command = [vec!["rm".to_string()]];
|
||||
assert!(matches!(
|
||||
policy.check_multiple(command.iter()),
|
||||
Evaluation::Match { .. }
|
||||
));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn ignores_policies_outside_policy_dir() {
|
||||
let temp_dir = tempdir().expect("create temp dir");
|
||||
fs::write(
|
||||
temp_dir.path().join("root.codexpolicy"),
|
||||
r#"prefix_rule(pattern=["ls"], decision="prompt")"#,
|
||||
)
|
||||
.expect("write policy file");
|
||||
|
||||
let policy = exec_policy_for(&Features::with_defaults(), temp_dir.path())
|
||||
.await
|
||||
.expect("policy result");
|
||||
let command = [vec!["ls".to_string()]];
|
||||
assert!(matches!(
|
||||
policy.check_multiple(command.iter()),
|
||||
Evaluation::NoMatch { .. }
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn evaluates_bash_lc_inner_commands() {
|
||||
let policy_src = r#"
|
||||
prefix_rule(pattern=["rm"], decision="forbidden")
|
||||
"#;
|
||||
let mut parser = PolicyParser::new();
|
||||
parser
|
||||
.parse("test.codexpolicy", policy_src)
|
||||
.expect("parse policy");
|
||||
let policy = parser.build();
|
||||
|
||||
let forbidden_script = vec![
|
||||
"bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
"rm -rf /tmp".to_string(),
|
||||
];
|
||||
|
||||
let requirement =
|
||||
evaluate_with_policy(&policy, &forbidden_script, AskForApproval::OnRequest)
|
||||
.expect("expected match for forbidden command");
|
||||
|
||||
assert_eq!(
|
||||
requirement,
|
||||
ApprovalRequirement::Forbidden {
|
||||
reason: FORBIDDEN_REASON.to_string()
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn approval_requirement_prefers_execpolicy_match() {
|
||||
let policy_src = r#"prefix_rule(pattern=["rm"], decision="prompt")"#;
|
||||
let mut parser = PolicyParser::new();
|
||||
parser
|
||||
.parse("test.codexpolicy", policy_src)
|
||||
.expect("parse policy");
|
||||
let policy = parser.build();
|
||||
let command = vec!["rm".to_string()];
|
||||
|
||||
let requirement = create_approval_requirement_for_command(
|
||||
&policy,
|
||||
&command,
|
||||
AskForApproval::OnRequest,
|
||||
&SandboxPolicy::DangerFullAccess,
|
||||
SandboxPermissions::UseDefault,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
requirement,
|
||||
ApprovalRequirement::NeedsApproval {
|
||||
reason: Some(PROMPT_REASON.to_string())
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn approval_requirement_respects_approval_policy() {
|
||||
let policy_src = r#"prefix_rule(pattern=["rm"], decision="prompt")"#;
|
||||
let mut parser = PolicyParser::new();
|
||||
parser
|
||||
.parse("test.codexpolicy", policy_src)
|
||||
.expect("parse policy");
|
||||
let policy = parser.build();
|
||||
let command = vec!["rm".to_string()];
|
||||
|
||||
let requirement = create_approval_requirement_for_command(
|
||||
&policy,
|
||||
&command,
|
||||
AskForApproval::Never,
|
||||
&SandboxPolicy::DangerFullAccess,
|
||||
SandboxPermissions::UseDefault,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
requirement,
|
||||
ApprovalRequirement::Forbidden {
|
||||
reason: PROMPT_REASON.to_string()
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn approval_requirement_falls_back_to_heuristics() {
|
||||
let command = vec!["python".to_string()];
|
||||
|
||||
let empty_policy = Policy::empty();
|
||||
let requirement = create_approval_requirement_for_command(
|
||||
&empty_policy,
|
||||
&command,
|
||||
AskForApproval::UnlessTrusted,
|
||||
&SandboxPolicy::ReadOnly,
|
||||
SandboxPermissions::UseDefault,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
requirement,
|
||||
ApprovalRequirement::NeedsApproval { reason: None }
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -31,6 +31,9 @@ pub enum Feature {
|
||||
GhostCommit,
|
||||
/// Use the single unified PTY-backed exec tool.
|
||||
UnifiedExec,
|
||||
/// Use the shell command tool that takes `command` as a single string of
|
||||
/// shell instead of an array of args passed to `execvp(3)`.
|
||||
ShellCommandTool,
|
||||
/// Enable experimental RMCP features such as OAuth login.
|
||||
RmcpClient,
|
||||
/// Include the freeform apply_patch tool.
|
||||
@@ -39,8 +42,6 @@ pub enum Feature {
|
||||
ViewImageTool,
|
||||
/// Allow the model to request web searches.
|
||||
WebSearchRequest,
|
||||
/// Gate the execpolicy enforcement for shell/unified exec.
|
||||
ExecPolicy,
|
||||
/// Enable the model-based risk assessments for sandboxed commands.
|
||||
SandboxCommandAssessment,
|
||||
/// Enable Windows sandbox (restricted token) on Windows.
|
||||
@@ -272,6 +273,12 @@ pub const FEATURES: &[FeatureSpec] = &[
|
||||
stage: Stage::Experimental,
|
||||
default_enabled: false,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::ShellCommandTool,
|
||||
key: "shell_command_tool",
|
||||
stage: Stage::Experimental,
|
||||
default_enabled: false,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::RmcpClient,
|
||||
key: "rmcp_client",
|
||||
@@ -290,12 +297,6 @@ pub const FEATURES: &[FeatureSpec] = &[
|
||||
stage: Stage::Stable,
|
||||
default_enabled: false,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::ExecPolicy,
|
||||
key: "exec_policy",
|
||||
stage: Stage::Experimental,
|
||||
default_enabled: true,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::SandboxCommandAssessment,
|
||||
key: "experimental_sandbox_command_assessment",
|
||||
|
||||
@@ -25,7 +25,6 @@ mod environment_context;
|
||||
pub mod error;
|
||||
pub mod exec;
|
||||
pub mod exec_env;
|
||||
mod exec_policy;
|
||||
pub mod features;
|
||||
mod flags;
|
||||
pub mod git_info;
|
||||
@@ -39,7 +38,6 @@ pub mod parse_command;
|
||||
pub mod powershell;
|
||||
mod response_processing;
|
||||
pub mod sandboxing;
|
||||
mod text_encoding;
|
||||
pub mod token_data;
|
||||
mod truncate;
|
||||
mod unified_exec;
|
||||
|
||||
@@ -12,7 +12,7 @@ const BASE_INSTRUCTIONS: &str = include_str!("../prompt.md");
|
||||
|
||||
const GPT_5_CODEX_INSTRUCTIONS: &str = include_str!("../gpt_5_codex_prompt.md");
|
||||
const GPT_5_1_INSTRUCTIONS: &str = include_str!("../gpt_5_1_prompt.md");
|
||||
const GPT_5_1_CODEX_MAX_INSTRUCTIONS: &str = include_str!("../gpt-5.1-codex-max_prompt.md");
|
||||
const ARCTICFOX_INSTRUCTIONS: &str = include_str!("../arcticfox_prompt.md");
|
||||
|
||||
/// A model family is a group of models that share certain characteristics.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
@@ -76,7 +76,6 @@ macro_rules! model_family {
|
||||
(
|
||||
$slug:expr, $family:expr $(, $key:ident : $value:expr )* $(,)?
|
||||
) => {{
|
||||
let truncation_policy = TruncationPolicy::Bytes(10_000);
|
||||
// defaults
|
||||
#[allow(unused_mut)]
|
||||
let mut mf = ModelFamily {
|
||||
@@ -91,10 +90,10 @@ macro_rules! model_family {
|
||||
experimental_supported_tools: Vec::new(),
|
||||
effective_context_window_percent: 95,
|
||||
support_verbosity: false,
|
||||
shell_type: ConfigShellToolType::Default(truncation_policy),
|
||||
shell_type: ConfigShellToolType::Default,
|
||||
default_verbosity: None,
|
||||
default_reasoning_effort: None,
|
||||
truncation_policy,
|
||||
truncation_policy: TruncationPolicy::Bytes(10_000),
|
||||
};
|
||||
|
||||
// apply overrides
|
||||
@@ -139,7 +138,6 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
|
||||
} else if slug.starts_with("gpt-3.5") {
|
||||
model_family!(slug, "gpt-3.5", needs_special_apply_patch_instructions: true)
|
||||
} else if slug.starts_with("test-gpt-5") {
|
||||
let truncation_policy = TruncationPolicy::Tokens(10_000);
|
||||
model_family!(
|
||||
slug, slug,
|
||||
supports_reasoning_summaries: true,
|
||||
@@ -152,13 +150,13 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
|
||||
"test_sync_tool".to_string(),
|
||||
],
|
||||
supports_parallel_tool_calls: true,
|
||||
shell_type: ConfigShellToolType::ShellCommand(truncation_policy),
|
||||
shell_type: ConfigShellToolType::ShellCommand,
|
||||
support_verbosity: true,
|
||||
truncation_policy: TruncationPolicy::Tokens(10_000),
|
||||
)
|
||||
|
||||
// Internal models.
|
||||
} else if slug.starts_with("codex-exp-") {
|
||||
let truncation_policy = TruncationPolicy::Tokens(10_000);
|
||||
model_family!(
|
||||
slug, slug,
|
||||
supports_reasoning_summaries: true,
|
||||
@@ -170,44 +168,53 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
|
||||
"list_dir".to_string(),
|
||||
"read_file".to_string(),
|
||||
],
|
||||
shell_type: ConfigShellToolType::ShellCommand(truncation_policy),
|
||||
shell_type: ConfigShellToolType::ShellCommand,
|
||||
supports_parallel_tool_calls: true,
|
||||
support_verbosity: true,
|
||||
truncation_policy: truncation_policy,
|
||||
truncation_policy: TruncationPolicy::Tokens(10_000),
|
||||
)
|
||||
|
||||
// Production models.
|
||||
} else if slug.starts_with("gpt-5.1-codex-max") {
|
||||
let truncation_policy = TruncationPolicy::Tokens(10_000);
|
||||
} else if slug.starts_with("arcticfox") {
|
||||
model_family!(
|
||||
slug, slug,
|
||||
supports_reasoning_summaries: true,
|
||||
reasoning_summary_format: ReasoningSummaryFormat::Experimental,
|
||||
base_instructions: GPT_5_1_CODEX_MAX_INSTRUCTIONS.to_string(),
|
||||
base_instructions: ARCTICFOX_INSTRUCTIONS.to_string(),
|
||||
apply_patch_tool_type: Some(ApplyPatchToolType::Freeform),
|
||||
shell_type: ConfigShellToolType::ShellCommand(truncation_policy),
|
||||
shell_type: ConfigShellToolType::ShellCommand,
|
||||
supports_parallel_tool_calls: true,
|
||||
support_verbosity: false,
|
||||
truncation_policy: truncation_policy,
|
||||
truncation_policy: TruncationPolicy::Tokens(10_000),
|
||||
)
|
||||
|
||||
// Production models.
|
||||
} else if slug.starts_with("gpt-5-codex")
|
||||
|| slug.starts_with("gpt-5.1-codex")
|
||||
|| slug.starts_with("codex-")
|
||||
{
|
||||
let truncation_policy = TruncationPolicy::Tokens(10_000);
|
||||
model_family!(
|
||||
slug, slug,
|
||||
supports_reasoning_summaries: true,
|
||||
reasoning_summary_format: ReasoningSummaryFormat::Experimental,
|
||||
base_instructions: GPT_5_CODEX_INSTRUCTIONS.to_string(),
|
||||
apply_patch_tool_type: Some(ApplyPatchToolType::Freeform),
|
||||
shell_type: ConfigShellToolType::ShellCommand(truncation_policy),
|
||||
shell_type: ConfigShellToolType::ShellCommand,
|
||||
supports_parallel_tool_calls: true,
|
||||
support_verbosity: false,
|
||||
truncation_policy: truncation_policy,
|
||||
truncation_policy: TruncationPolicy::Tokens(10_000),
|
||||
)
|
||||
} else if slug.starts_with("arcticfox") {
|
||||
model_family!(
|
||||
slug, slug,
|
||||
supports_reasoning_summaries: true,
|
||||
reasoning_summary_format: ReasoningSummaryFormat::Experimental,
|
||||
base_instructions: BASE_INSTRUCTIONS.to_string(),
|
||||
apply_patch_tool_type: Some(ApplyPatchToolType::Freeform),
|
||||
shell_type: ConfigShellToolType::ShellCommand,
|
||||
supports_parallel_tool_calls: true,
|
||||
support_verbosity: false,
|
||||
truncation_policy: TruncationPolicy::Tokens(10_000),
|
||||
)
|
||||
} else if slug.starts_with("gpt-5.1") {
|
||||
let truncation_policy = TruncationPolicy::Tokens(10_000);
|
||||
model_family!(
|
||||
slug, "gpt-5.1",
|
||||
supports_reasoning_summaries: true,
|
||||
@@ -217,7 +224,7 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
|
||||
base_instructions: GPT_5_1_INSTRUCTIONS.to_string(),
|
||||
default_reasoning_effort: Some(ReasoningEffort::Medium),
|
||||
truncation_policy: TruncationPolicy::Bytes(10_000),
|
||||
shell_type: ConfigShellToolType::ShellCommand(truncation_policy),
|
||||
shell_type: ConfigShellToolType::ShellCommand,
|
||||
supports_parallel_tool_calls: true,
|
||||
)
|
||||
} else if slug.starts_with("gpt-5") {
|
||||
@@ -225,7 +232,7 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
|
||||
slug, "gpt-5",
|
||||
supports_reasoning_summaries: true,
|
||||
needs_special_apply_patch_instructions: true,
|
||||
shell_type: ConfigShellToolType::Default(TruncationPolicy::Bytes(10_000)),
|
||||
shell_type: ConfigShellToolType::Default,
|
||||
support_verbosity: true,
|
||||
truncation_policy: TruncationPolicy::Bytes(10_000),
|
||||
)
|
||||
@@ -235,7 +242,6 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
|
||||
}
|
||||
|
||||
pub fn derive_default_model_family(model: &str) -> ModelFamily {
|
||||
let truncation_policy = TruncationPolicy::Bytes(10_000);
|
||||
ModelFamily {
|
||||
slug: model.to_string(),
|
||||
family: model.to_string(),
|
||||
@@ -248,9 +254,9 @@ pub fn derive_default_model_family(model: &str) -> ModelFamily {
|
||||
experimental_supported_tools: Vec::new(),
|
||||
effective_context_window_percent: 95,
|
||||
support_verbosity: false,
|
||||
shell_type: ConfigShellToolType::Default(truncation_policy),
|
||||
shell_type: ConfigShellToolType::Default,
|
||||
default_verbosity: None,
|
||||
default_reasoning_effort: None,
|
||||
truncation_policy,
|
||||
truncation_policy: TruncationPolicy::Bytes(10_000),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -72,7 +72,7 @@ pub(crate) fn get_model_info(model_family: &ModelFamily) -> Option<ModelInfo> {
|
||||
|
||||
_ if slug.starts_with("gpt-5-codex")
|
||||
|| slug.starts_with("gpt-5.1-codex")
|
||||
|| slug.starts_with("gpt-5.1-codex-max") =>
|
||||
|| slug.starts_with("arcticfox") =>
|
||||
{
|
||||
Some(ModelInfo::new(CONTEXT_WINDOW_272K, MAX_OUTPUT_TOKENS_128K))
|
||||
}
|
||||
|
||||
@@ -8,7 +8,6 @@ ready‑to‑spawn environment.
|
||||
|
||||
pub mod assessment;
|
||||
|
||||
use crate::exec::ExecExpiration;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::exec::SandboxType;
|
||||
use crate::exec::StdoutStream;
|
||||
@@ -27,53 +26,27 @@ use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub enum SandboxPermissions {
|
||||
UseDefault,
|
||||
RequireEscalated,
|
||||
}
|
||||
|
||||
impl SandboxPermissions {
|
||||
pub fn requires_escalated_permissions(self) -> bool {
|
||||
matches!(self, SandboxPermissions::RequireEscalated)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<bool> for SandboxPermissions {
|
||||
fn from(with_escalated_permissions: bool) -> Self {
|
||||
if with_escalated_permissions {
|
||||
SandboxPermissions::RequireEscalated
|
||||
} else {
|
||||
SandboxPermissions::UseDefault
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct CommandSpec {
|
||||
pub program: String,
|
||||
pub args: Vec<String>,
|
||||
pub cwd: PathBuf,
|
||||
pub env: HashMap<String, String>,
|
||||
pub expiration: ExecExpiration,
|
||||
pub timeout_ms: Option<u64>,
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
pub justification: Option<String>,
|
||||
pub max_output_tokens: Option<usize>,
|
||||
pub max_output_chars: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ExecEnv {
|
||||
pub command: Vec<String>,
|
||||
pub cwd: PathBuf,
|
||||
pub env: HashMap<String, String>,
|
||||
pub expiration: ExecExpiration,
|
||||
pub timeout_ms: Option<u64>,
|
||||
pub sandbox: SandboxType,
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
pub justification: Option<String>,
|
||||
pub arg0: Option<String>,
|
||||
pub max_output_tokens: Option<usize>,
|
||||
pub max_output_chars: Option<usize>,
|
||||
}
|
||||
|
||||
pub enum SandboxPreference {
|
||||
@@ -120,13 +93,13 @@ impl SandboxManager {
|
||||
|
||||
pub(crate) fn transform(
|
||||
&self,
|
||||
mut spec: CommandSpec,
|
||||
spec: &CommandSpec,
|
||||
policy: &SandboxPolicy,
|
||||
sandbox: SandboxType,
|
||||
sandbox_policy_cwd: &Path,
|
||||
codex_linux_sandbox_exe: Option<&PathBuf>,
|
||||
) -> Result<ExecEnv, SandboxTransformError> {
|
||||
let mut env = spec.env;
|
||||
let mut env = spec.env.clone();
|
||||
if !policy.has_full_network_access() {
|
||||
env.insert(
|
||||
CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR.to_string(),
|
||||
@@ -135,8 +108,8 @@ impl SandboxManager {
|
||||
}
|
||||
|
||||
let mut command = Vec::with_capacity(1 + spec.args.len());
|
||||
command.push(spec.program);
|
||||
command.append(&mut spec.args);
|
||||
command.push(spec.program.clone());
|
||||
command.extend(spec.args.iter().cloned());
|
||||
|
||||
let (command, sandbox_env, arg0_override) = match sandbox {
|
||||
SandboxType::None => (command, HashMap::new(), None),
|
||||
@@ -181,15 +154,13 @@ impl SandboxManager {
|
||||
|
||||
Ok(ExecEnv {
|
||||
command,
|
||||
cwd: spec.cwd,
|
||||
cwd: spec.cwd.clone(),
|
||||
env,
|
||||
expiration: spec.expiration,
|
||||
timeout_ms: spec.timeout_ms,
|
||||
sandbox,
|
||||
with_escalated_permissions: spec.with_escalated_permissions,
|
||||
justification: spec.justification,
|
||||
justification: spec.justification.clone(),
|
||||
arg0: arg0_override,
|
||||
max_output_tokens: spec.max_output_tokens,
|
||||
max_output_chars: spec.max_output_chars,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -199,9 +170,9 @@ impl SandboxManager {
|
||||
}
|
||||
|
||||
pub async fn execute_env(
|
||||
env: ExecEnv,
|
||||
env: &ExecEnv,
|
||||
policy: &SandboxPolicy,
|
||||
stdout_stream: Option<StdoutStream>,
|
||||
) -> crate::error::Result<ExecToolCallOutput> {
|
||||
execute_exec_env(env, policy, stdout_stream).await
|
||||
execute_exec_env(env.clone(), policy, stdout_stream).await
|
||||
}
|
||||
|
||||
@@ -7,41 +7,61 @@ pub enum ShellType {
|
||||
Zsh,
|
||||
Bash,
|
||||
PowerShell,
|
||||
Sh,
|
||||
Cmd,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
|
||||
pub struct Shell {
|
||||
pub(crate) shell_type: ShellType,
|
||||
pub struct ZshShell {
|
||||
pub(crate) shell_path: PathBuf,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
|
||||
pub struct BashShell {
|
||||
pub(crate) shell_path: PathBuf,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
|
||||
pub struct PowerShellConfig {
|
||||
pub(crate) shell_path: PathBuf, // Executable name or path, e.g. "pwsh" or "powershell.exe".
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
|
||||
pub enum Shell {
|
||||
Zsh(ZshShell),
|
||||
Bash(BashShell),
|
||||
PowerShell(PowerShellConfig),
|
||||
Unknown,
|
||||
}
|
||||
|
||||
impl Shell {
|
||||
pub fn name(&self) -> &'static str {
|
||||
match self.shell_type {
|
||||
ShellType::Zsh => "zsh",
|
||||
ShellType::Bash => "bash",
|
||||
ShellType::PowerShell => "powershell",
|
||||
ShellType::Sh => "sh",
|
||||
ShellType::Cmd => "cmd",
|
||||
pub fn name(&self) -> Option<String> {
|
||||
match self {
|
||||
Shell::Zsh(ZshShell { shell_path, .. }) | Shell::Bash(BashShell { shell_path, .. }) => {
|
||||
std::path::Path::new(shell_path)
|
||||
.file_name()
|
||||
.map(|s| s.to_string_lossy().to_string())
|
||||
}
|
||||
Shell::PowerShell(ps) => ps
|
||||
.shell_path
|
||||
.file_stem()
|
||||
.map(|s| s.to_string_lossy().to_string()),
|
||||
Shell::Unknown => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Takes a string of shell and returns the full list of command args to
|
||||
/// use with `exec()` to run the shell command.
|
||||
pub fn derive_exec_args(&self, command: &str, use_login_shell: bool) -> Vec<String> {
|
||||
match self.shell_type {
|
||||
ShellType::Zsh | ShellType::Bash | ShellType::Sh => {
|
||||
match self {
|
||||
Shell::Zsh(ZshShell { shell_path, .. }) | Shell::Bash(BashShell { shell_path, .. }) => {
|
||||
let arg = if use_login_shell { "-lc" } else { "-c" };
|
||||
vec![
|
||||
self.shell_path.to_string_lossy().to_string(),
|
||||
shell_path.to_string_lossy().to_string(),
|
||||
arg.to_string(),
|
||||
command.to_string(),
|
||||
]
|
||||
}
|
||||
ShellType::PowerShell => {
|
||||
let mut args = vec![self.shell_path.to_string_lossy().to_string()];
|
||||
Shell::PowerShell(ps) => {
|
||||
let mut args = vec![ps.shell_path.to_string_lossy().to_string()];
|
||||
if !use_login_shell {
|
||||
args.push("-NoProfile".to_string());
|
||||
}
|
||||
@@ -50,12 +70,7 @@ impl Shell {
|
||||
args.push(command.to_string());
|
||||
args
|
||||
}
|
||||
ShellType::Cmd => {
|
||||
let mut args = vec![self.shell_path.to_string_lossy().to_string()];
|
||||
args.push("/c".to_string());
|
||||
args.push(command.to_string());
|
||||
args
|
||||
}
|
||||
Shell::Unknown => shlex::split(command).unwrap_or_else(|| vec![command.to_string()]),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -128,34 +143,19 @@ fn get_shell_path(
|
||||
None
|
||||
}
|
||||
|
||||
fn get_zsh_shell(path: Option<&PathBuf>) -> Option<Shell> {
|
||||
fn get_zsh_shell(path: Option<&PathBuf>) -> Option<ZshShell> {
|
||||
let shell_path = get_shell_path(ShellType::Zsh, path, "zsh", vec!["/bin/zsh"]);
|
||||
|
||||
shell_path.map(|shell_path| Shell {
|
||||
shell_type: ShellType::Zsh,
|
||||
shell_path,
|
||||
})
|
||||
shell_path.map(|shell_path| ZshShell { shell_path })
|
||||
}
|
||||
|
||||
fn get_bash_shell(path: Option<&PathBuf>) -> Option<Shell> {
|
||||
fn get_bash_shell(path: Option<&PathBuf>) -> Option<BashShell> {
|
||||
let shell_path = get_shell_path(ShellType::Bash, path, "bash", vec!["/bin/bash"]);
|
||||
|
||||
shell_path.map(|shell_path| Shell {
|
||||
shell_type: ShellType::Bash,
|
||||
shell_path,
|
||||
})
|
||||
shell_path.map(|shell_path| BashShell { shell_path })
|
||||
}
|
||||
|
||||
fn get_sh_shell(path: Option<&PathBuf>) -> Option<Shell> {
|
||||
let shell_path = get_shell_path(ShellType::Sh, path, "sh", vec!["/bin/sh"]);
|
||||
|
||||
shell_path.map(|shell_path| Shell {
|
||||
shell_type: ShellType::Sh,
|
||||
shell_path,
|
||||
})
|
||||
}
|
||||
|
||||
fn get_powershell_shell(path: Option<&PathBuf>) -> Option<Shell> {
|
||||
fn get_powershell_shell(path: Option<&PathBuf>) -> Option<PowerShellConfig> {
|
||||
let shell_path = get_shell_path(
|
||||
ShellType::PowerShell,
|
||||
path,
|
||||
@@ -164,56 +164,26 @@ fn get_powershell_shell(path: Option<&PathBuf>) -> Option<Shell> {
|
||||
)
|
||||
.or_else(|| get_shell_path(ShellType::PowerShell, path, "powershell", vec![]));
|
||||
|
||||
shell_path.map(|shell_path| Shell {
|
||||
shell_type: ShellType::PowerShell,
|
||||
shell_path,
|
||||
})
|
||||
}
|
||||
|
||||
fn get_cmd_shell(path: Option<&PathBuf>) -> Option<Shell> {
|
||||
let shell_path = get_shell_path(ShellType::Cmd, path, "cmd", vec![]);
|
||||
|
||||
shell_path.map(|shell_path| Shell {
|
||||
shell_type: ShellType::Cmd,
|
||||
shell_path,
|
||||
})
|
||||
}
|
||||
|
||||
fn ultimate_fallback_shell() -> Shell {
|
||||
if cfg!(windows) {
|
||||
Shell {
|
||||
shell_type: ShellType::Cmd,
|
||||
shell_path: PathBuf::from("cmd.exe"),
|
||||
}
|
||||
} else {
|
||||
Shell {
|
||||
shell_type: ShellType::Sh,
|
||||
shell_path: PathBuf::from("/bin/sh"),
|
||||
}
|
||||
}
|
||||
shell_path.map(|shell_path| PowerShellConfig { shell_path })
|
||||
}
|
||||
|
||||
pub fn get_shell_by_model_provided_path(shell_path: &PathBuf) -> Shell {
|
||||
detect_shell_type(shell_path)
|
||||
.and_then(|shell_type| get_shell(shell_type, Some(shell_path)))
|
||||
.unwrap_or(ultimate_fallback_shell())
|
||||
.unwrap_or(Shell::Unknown)
|
||||
}
|
||||
|
||||
pub fn get_shell(shell_type: ShellType, path: Option<&PathBuf>) -> Option<Shell> {
|
||||
match shell_type {
|
||||
ShellType::Zsh => get_zsh_shell(path),
|
||||
ShellType::Bash => get_bash_shell(path),
|
||||
ShellType::PowerShell => get_powershell_shell(path),
|
||||
ShellType::Sh => get_sh_shell(path),
|
||||
ShellType::Cmd => get_cmd_shell(path),
|
||||
ShellType::Zsh => get_zsh_shell(path).map(Shell::Zsh),
|
||||
ShellType::Bash => get_bash_shell(path).map(Shell::Bash),
|
||||
ShellType::PowerShell => get_powershell_shell(path).map(Shell::PowerShell),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn detect_shell_type(shell_path: &PathBuf) -> Option<ShellType> {
|
||||
match shell_path.as_os_str().to_str() {
|
||||
Some("zsh") => Some(ShellType::Zsh),
|
||||
Some("sh") => Some(ShellType::Sh),
|
||||
Some("cmd") => Some(ShellType::Cmd),
|
||||
Some("bash") => Some(ShellType::Bash),
|
||||
Some("pwsh") => Some(ShellType::PowerShell),
|
||||
Some("powershell") => Some(ShellType::PowerShell),
|
||||
@@ -230,29 +200,14 @@ pub fn detect_shell_type(shell_path: &PathBuf) -> Option<ShellType> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn default_user_shell() -> Shell {
|
||||
default_user_shell_from_path(get_user_shell_path())
|
||||
}
|
||||
|
||||
fn default_user_shell_from_path(user_shell_path: Option<PathBuf>) -> Shell {
|
||||
pub async fn default_user_shell() -> Shell {
|
||||
if cfg!(windows) {
|
||||
get_shell(ShellType::PowerShell, None).unwrap_or(ultimate_fallback_shell())
|
||||
get_shell(ShellType::PowerShell, None).unwrap_or(Shell::Unknown)
|
||||
} else {
|
||||
let user_default_shell = user_shell_path
|
||||
get_user_shell_path()
|
||||
.and_then(|shell| detect_shell_type(&shell))
|
||||
.and_then(|shell_type| get_shell(shell_type, None));
|
||||
|
||||
let shell_with_fallback = if cfg!(target_os = "macos") {
|
||||
user_default_shell
|
||||
.or_else(|| get_shell(ShellType::Zsh, None))
|
||||
.or_else(|| get_shell(ShellType::Bash, None))
|
||||
} else {
|
||||
user_default_shell
|
||||
.or_else(|| get_shell(ShellType::Bash, None))
|
||||
.or_else(|| get_shell(ShellType::Zsh, None))
|
||||
};
|
||||
|
||||
shell_with_fallback.unwrap_or(ultimate_fallback_shell())
|
||||
.and_then(|shell_type| get_shell(shell_type, None))
|
||||
.unwrap_or(Shell::Unknown)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -308,19 +263,6 @@ mod detect_shell_type_tests {
|
||||
detect_shell_type(&PathBuf::from("/usr/local/bin/pwsh")),
|
||||
Some(ShellType::PowerShell)
|
||||
);
|
||||
assert_eq!(
|
||||
detect_shell_type(&PathBuf::from("/bin/sh")),
|
||||
Some(ShellType::Sh)
|
||||
);
|
||||
assert_eq!(detect_shell_type(&PathBuf::from("sh")), Some(ShellType::Sh));
|
||||
assert_eq!(
|
||||
detect_shell_type(&PathBuf::from("cmd")),
|
||||
Some(ShellType::Cmd)
|
||||
);
|
||||
assert_eq!(
|
||||
detect_shell_type(&PathBuf::from("cmd.exe")),
|
||||
Some(ShellType::Cmd)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -336,17 +278,10 @@ mod tests {
|
||||
fn detects_zsh() {
|
||||
let zsh_shell = get_shell(ShellType::Zsh, None).unwrap();
|
||||
|
||||
let shell_path = zsh_shell.shell_path;
|
||||
|
||||
assert_eq!(shell_path, PathBuf::from("/bin/zsh"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(target_os = "macos")]
|
||||
fn fish_fallback_to_zsh() {
|
||||
let zsh_shell = default_user_shell_from_path(Some(PathBuf::from("/bin/fish")));
|
||||
|
||||
let shell_path = zsh_shell.shell_path;
|
||||
let ZshShell { shell_path } = match zsh_shell {
|
||||
Shell::Zsh(zsh_shell) => zsh_shell,
|
||||
_ => panic!("expected zsh shell"),
|
||||
};
|
||||
|
||||
assert_eq!(shell_path, PathBuf::from("/bin/zsh"));
|
||||
}
|
||||
@@ -354,60 +289,18 @@ mod tests {
|
||||
#[test]
|
||||
fn detects_bash() {
|
||||
let bash_shell = get_shell(ShellType::Bash, None).unwrap();
|
||||
let shell_path = bash_shell.shell_path;
|
||||
let BashShell { shell_path } = match bash_shell {
|
||||
Shell::Bash(bash_shell) => bash_shell,
|
||||
_ => panic!("expected bash shell"),
|
||||
};
|
||||
|
||||
assert!(
|
||||
shell_path == PathBuf::from("/bin/bash")
|
||||
|| shell_path == PathBuf::from("/usr/bin/bash")
|
||||
|| shell_path == PathBuf::from("/usr/local/bin/bash"),
|
||||
|| shell_path == PathBuf::from("/usr/bin/bash"),
|
||||
"shell path: {shell_path:?}",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_sh() {
|
||||
let sh_shell = get_shell(ShellType::Sh, None).unwrap();
|
||||
let shell_path = sh_shell.shell_path;
|
||||
assert!(
|
||||
shell_path == PathBuf::from("/bin/sh") || shell_path == PathBuf::from("/usr/bin/sh"),
|
||||
"shell path: {shell_path:?}",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn can_run_on_shell_test() {
|
||||
let cmd = "echo \"Works\"";
|
||||
if cfg!(windows) {
|
||||
assert!(shell_works(
|
||||
get_shell(ShellType::PowerShell, None),
|
||||
"Out-String 'Works'",
|
||||
true,
|
||||
));
|
||||
assert!(shell_works(get_shell(ShellType::Cmd, None), cmd, true,));
|
||||
assert!(shell_works(Some(ultimate_fallback_shell()), cmd, true));
|
||||
} else {
|
||||
assert!(shell_works(Some(ultimate_fallback_shell()), cmd, true));
|
||||
assert!(shell_works(get_shell(ShellType::Zsh, None), cmd, false));
|
||||
assert!(shell_works(get_shell(ShellType::Bash, None), cmd, true));
|
||||
assert!(shell_works(get_shell(ShellType::Sh, None), cmd, true));
|
||||
}
|
||||
}
|
||||
|
||||
fn shell_works(shell: Option<Shell>, command: &str, required: bool) -> bool {
|
||||
if let Some(shell) = shell {
|
||||
let args = shell.derive_exec_args(command, false);
|
||||
let output = Command::new(args[0].clone())
|
||||
.args(&args[1..])
|
||||
.output()
|
||||
.unwrap();
|
||||
assert!(output.status.success());
|
||||
assert!(String::from_utf8_lossy(&output.stdout).contains("Works"));
|
||||
true
|
||||
} else {
|
||||
!required
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_current_shell_detects_zsh() {
|
||||
let shell = Command::new("sh")
|
||||
@@ -419,11 +312,10 @@ mod tests {
|
||||
let shell_path = String::from_utf8_lossy(&shell.stdout).trim().to_string();
|
||||
if shell_path.ends_with("/zsh") {
|
||||
assert_eq!(
|
||||
default_user_shell(),
|
||||
Shell {
|
||||
shell_type: ShellType::Zsh,
|
||||
default_user_shell().await,
|
||||
Shell::Zsh(ZshShell {
|
||||
shell_path: PathBuf::from(shell_path),
|
||||
}
|
||||
})
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -434,8 +326,11 @@ mod tests {
|
||||
return;
|
||||
}
|
||||
|
||||
let powershell_shell = default_user_shell();
|
||||
let shell_path = powershell_shell.shell_path;
|
||||
let powershell_shell = default_user_shell().await;
|
||||
let PowerShellConfig { shell_path } = match powershell_shell {
|
||||
Shell::PowerShell(powershell_shell) => powershell_shell,
|
||||
_ => panic!("expected powershell shell"),
|
||||
};
|
||||
|
||||
assert!(shell_path.ends_with("pwsh.exe") || shell_path.ends_with("powershell.exe"));
|
||||
}
|
||||
@@ -447,7 +342,10 @@ mod tests {
|
||||
}
|
||||
|
||||
let powershell_shell = get_shell(ShellType::PowerShell, None).unwrap();
|
||||
let shell_path = powershell_shell.shell_path;
|
||||
let PowerShellConfig { shell_path } = match powershell_shell {
|
||||
Shell::PowerShell(powershell_shell) => powershell_shell,
|
||||
_ => panic!("expected powershell shell"),
|
||||
};
|
||||
|
||||
assert!(shell_path.ends_with("pwsh.exe") || shell_path.ends_with("powershell.exe"));
|
||||
}
|
||||
|
||||
@@ -31,8 +31,6 @@ use crate::user_shell_command::user_shell_command_record_item;
|
||||
use super::SessionTask;
|
||||
use super::SessionTaskContext;
|
||||
|
||||
const USER_SHELL_TIMEOUT_MS: u64 = 60 * 60 * 1000; // 1 hour
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct UserShellCommandTask {
|
||||
command: String,
|
||||
@@ -95,15 +93,11 @@ impl SessionTask for UserShellCommandTask {
|
||||
command: command.clone(),
|
||||
cwd: cwd.clone(),
|
||||
env: create_env(&turn_context.shell_environment_policy),
|
||||
// TODO(zhao-oai): Now that we have ExecExpiration::Cancellation, we
|
||||
// should use that instead of an "arbitrarily large" timeout here.
|
||||
expiration: USER_SHELL_TIMEOUT_MS.into(),
|
||||
timeout_ms: None,
|
||||
sandbox: SandboxType::None,
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
arg0: None,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
};
|
||||
|
||||
let stdout_stream = Some(StdoutStream {
|
||||
|
||||
@@ -1,461 +0,0 @@
|
||||
//! Text encoding detection and conversion utilities for shell output.
|
||||
//!
|
||||
//! Windows users frequently run into code pages such as CP1251 or CP866 when invoking commands
|
||||
//! through VS Code. Those bytes show up as invalid UTF-8 and used to be replaced with the standard
|
||||
//! Unicode replacement character. We now lean on `chardetng` and `encoding_rs` so we can
|
||||
//! automatically detect and decode the vast majority of legacy encodings before falling back to
|
||||
//! lossy UTF-8 decoding.
|
||||
|
||||
use chardetng::EncodingDetector;
|
||||
use encoding_rs::Encoding;
|
||||
use encoding_rs::IBM866;
|
||||
use encoding_rs::WINDOWS_1252;
|
||||
|
||||
/// Attempts to convert arbitrary bytes to UTF-8 with best-effort encoding detection.
|
||||
pub fn bytes_to_string_smart(bytes: &[u8]) -> String {
|
||||
if bytes.is_empty() {
|
||||
return String::new();
|
||||
}
|
||||
|
||||
if let Ok(utf8_str) = std::str::from_utf8(bytes) {
|
||||
return utf8_str.to_owned();
|
||||
}
|
||||
|
||||
let encoding = detect_encoding(bytes);
|
||||
decode_bytes(bytes, encoding)
|
||||
}
|
||||
|
||||
// Windows-1252 reassigns a handful of 0x80-0x9F slots to smart punctuation (curly quotes, dashes,
|
||||
// ™). CP866 uses those *same byte values* for uppercase Cyrillic letters. When chardetng sees shell
|
||||
// snippets that mix these bytes with ASCII it sometimes guesses IBM866, so “smart quotes” render as
|
||||
// Cyrillic garbage (“УФЦ”) in VS Code. However, CP866 uppercase tokens are perfectly valid output
|
||||
// (e.g., `ПРИ test`) so we cannot flip every 0x80-0x9F byte to Windows-1252 either. The compromise
|
||||
// is to only coerce IBM866 to Windows-1252 when (a) the high bytes are exclusively the punctuation
|
||||
// values listed below and (b) we spot adjacent ASCII. This targets the real failure case without
|
||||
// clobbering legitimate Cyrillic text. If another code page has a similar collision, introduce a
|
||||
// dedicated allowlist (like this one) plus unit tests that capture the actual shell output we want
|
||||
// to preserve. Windows-1252 byte values for smart punctuation.
|
||||
const WINDOWS_1252_PUNCT_BYTES: [u8; 8] = [
|
||||
0x91, // ‘ (left single quotation mark)
|
||||
0x92, // ’ (right single quotation mark)
|
||||
0x93, // “ (left double quotation mark)
|
||||
0x94, // ” (right double quotation mark)
|
||||
0x95, // • (bullet)
|
||||
0x96, // – (en dash)
|
||||
0x97, // — (em dash)
|
||||
0x99, // ™ (trade mark sign)
|
||||
];
|
||||
|
||||
fn detect_encoding(bytes: &[u8]) -> &'static Encoding {
|
||||
let mut detector = EncodingDetector::new();
|
||||
detector.feed(bytes, true);
|
||||
let (encoding, _is_confident) = detector.guess_assess(None, true);
|
||||
|
||||
// chardetng occasionally reports IBM866 for short strings that only contain Windows-1252 “smart
|
||||
// punctuation” bytes (0x80-0x9F) because that range maps to Cyrillic letters in IBM866. When
|
||||
// those bytes show up alongside an ASCII word (typical shell output: `"“`test), we know the
|
||||
// intent was likely CP1252 quotes/dashes. Prefer WINDOWS_1252 in that specific situation so we
|
||||
// render the characters users expect instead of Cyrillic junk. References:
|
||||
// - Windows-1252 reserving 0x80-0x9F for curly quotes/dashes:
|
||||
// https://en.wikipedia.org/wiki/Windows-1252
|
||||
// - CP866 mapping 0x93/0x94/0x96 to Cyrillic letters, so the same bytes show up as “УФЦ” when
|
||||
// mis-decoded: https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP866.TXT
|
||||
if encoding == IBM866 && looks_like_windows_1252_punctuation(bytes) {
|
||||
return WINDOWS_1252;
|
||||
}
|
||||
|
||||
encoding
|
||||
}
|
||||
|
||||
fn decode_bytes(bytes: &[u8], encoding: &'static Encoding) -> String {
|
||||
let (decoded, _, had_errors) = encoding.decode(bytes);
|
||||
|
||||
if had_errors {
|
||||
return String::from_utf8_lossy(bytes).into_owned();
|
||||
}
|
||||
|
||||
decoded.into_owned()
|
||||
}
|
||||
|
||||
/// Detect whether the byte stream looks like Windows-1252 “smart punctuation” wrapped around
|
||||
/// otherwise-ASCII text.
|
||||
///
|
||||
/// Context: IBM866 and Windows-1252 share the 0x80-0x9F slot range. In IBM866 these bytes decode to
|
||||
/// Cyrillic letters, whereas Windows-1252 maps them to curly quotes and dashes. chardetng can guess
|
||||
/// IBM866 for short snippets that only contain those bytes, which turns shell output such as
|
||||
/// `“test”` into unreadable Cyrillic. To avoid that, we treat inputs comprising a handful of bytes
|
||||
/// from the problematic range plus ASCII letters as CP1252 punctuation. We deliberately do *not*
|
||||
/// cap how many of those punctuation bytes we accept: VS Code frequently prints several quoted
|
||||
/// phrases (e.g., `"foo" – "bar"`), and truncating the count would once again mis-decode those as
|
||||
/// Cyrillic. If we discover additional encodings with overlapping byte ranges, prefer adding
|
||||
/// encoding-specific byte allowlists like `WINDOWS_1252_PUNCT` and tests that exercise real-world
|
||||
/// shell snippets.
|
||||
fn looks_like_windows_1252_punctuation(bytes: &[u8]) -> bool {
|
||||
let mut saw_extended_punctuation = false;
|
||||
let mut saw_ascii_word = false;
|
||||
|
||||
for &byte in bytes {
|
||||
if byte >= 0xA0 {
|
||||
return false;
|
||||
}
|
||||
if (0x80..=0x9F).contains(&byte) {
|
||||
if !is_windows_1252_punct(byte) {
|
||||
return false;
|
||||
}
|
||||
saw_extended_punctuation = true;
|
||||
}
|
||||
if byte.is_ascii_alphabetic() {
|
||||
saw_ascii_word = true;
|
||||
}
|
||||
}
|
||||
|
||||
saw_extended_punctuation && saw_ascii_word
|
||||
}
|
||||
|
||||
fn is_windows_1252_punct(byte: u8) -> bool {
|
||||
WINDOWS_1252_PUNCT_BYTES.contains(&byte)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use encoding_rs::BIG5;
|
||||
use encoding_rs::EUC_KR;
|
||||
use encoding_rs::GBK;
|
||||
use encoding_rs::ISO_8859_2;
|
||||
use encoding_rs::ISO_8859_3;
|
||||
use encoding_rs::ISO_8859_4;
|
||||
use encoding_rs::ISO_8859_5;
|
||||
use encoding_rs::ISO_8859_6;
|
||||
use encoding_rs::ISO_8859_7;
|
||||
use encoding_rs::ISO_8859_8;
|
||||
use encoding_rs::ISO_8859_10;
|
||||
use encoding_rs::ISO_8859_13;
|
||||
use encoding_rs::SHIFT_JIS;
|
||||
use encoding_rs::WINDOWS_874;
|
||||
use encoding_rs::WINDOWS_1250;
|
||||
use encoding_rs::WINDOWS_1251;
|
||||
use encoding_rs::WINDOWS_1253;
|
||||
use encoding_rs::WINDOWS_1254;
|
||||
use encoding_rs::WINDOWS_1255;
|
||||
use encoding_rs::WINDOWS_1256;
|
||||
use encoding_rs::WINDOWS_1257;
|
||||
use encoding_rs::WINDOWS_1258;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
fn test_utf8_passthrough() {
|
||||
// Fast path: when UTF-8 is valid we should avoid copies and return as-is.
|
||||
let utf8_text = "Hello, мир! 世界";
|
||||
let bytes = utf8_text.as_bytes();
|
||||
assert_eq!(bytes_to_string_smart(bytes), utf8_text);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cp1251_russian_text() {
|
||||
// Cyrillic text emitted by PowerShell/WSL in CP1251 should decode cleanly.
|
||||
let bytes = b"\xEF\xF0\xE8\xEC\xE5\xF0"; // "пример" encoded with Windows-1251
|
||||
assert_eq!(bytes_to_string_smart(bytes), "пример");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cp1251_privet_word() {
|
||||
// Regression: CP1251 words like "Привет" must not be mis-identified as Windows-1252.
|
||||
let bytes = b"\xCF\xF0\xE8\xE2\xE5\xF2"; // "Привет" encoded with Windows-1251
|
||||
assert_eq!(bytes_to_string_smart(bytes), "Привет");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_koi8_r_privet_word() {
|
||||
// KOI8-R output should decode to the original Cyrillic as well.
|
||||
let bytes = b"\xF0\xD2\xC9\xD7\xC5\xD4"; // "Привет" encoded with KOI8-R
|
||||
assert_eq!(bytes_to_string_smart(bytes), "Привет");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cp866_russian_text() {
|
||||
// Legacy consoles (cmd.exe) commonly emit CP866 bytes for Cyrillic content.
|
||||
let bytes = b"\xAF\xE0\xA8\xAC\xA5\xE0"; // "пример" encoded with CP866
|
||||
assert_eq!(bytes_to_string_smart(bytes), "пример");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cp866_uppercase_text() {
|
||||
// Ensure the IBM866 heuristic still returns IBM866 for uppercase-only words.
|
||||
let bytes = b"\x8F\x90\x88"; // "ПРИ" encoded with CP866 uppercase letters
|
||||
assert_eq!(bytes_to_string_smart(bytes), "ПРИ");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cp866_uppercase_followed_by_ascii() {
|
||||
// Regression test: uppercase CP866 tokens next to ASCII text should not be treated as
|
||||
// CP1252.
|
||||
let bytes = b"\x8F\x90\x88 test"; // "ПРИ test" encoded with CP866 uppercase letters followed by ASCII
|
||||
assert_eq!(bytes_to_string_smart(bytes), "ПРИ test");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_windows_1252_quotes() {
|
||||
// Smart detection should map Windows-1252 punctuation into proper Unicode.
|
||||
let bytes = b"\x93\x94test";
|
||||
assert_eq!(bytes_to_string_smart(bytes), "\u{201C}\u{201D}test");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_windows_1252_multiple_quotes() {
|
||||
// Longer snippets of punctuation (e.g., “foo” – “bar”) should still flip to CP1252.
|
||||
let bytes = b"\x93foo\x94 \x96 \x93bar\x94";
|
||||
assert_eq!(
|
||||
bytes_to_string_smart(bytes),
|
||||
"\u{201C}foo\u{201D} \u{2013} \u{201C}bar\u{201D}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_windows_1252_privet_gibberish_is_preserved() {
|
||||
// Windows-1252 cannot encode Cyrillic; if the input literally contains "ПÑ..." we should not "fix" it.
|
||||
let bytes = "Привет".as_bytes();
|
||||
assert_eq!(bytes_to_string_smart(bytes), "Привет");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_iso8859_1_latin_text() {
|
||||
// ISO-8859-1 (code page 28591) is the Latin segment used by LatArCyrHeb.
|
||||
// encoding_rs unifies ISO-8859-1 with Windows-1252, so reuse that constant here.
|
||||
let (encoded, _, had_errors) = WINDOWS_1252.encode("Hello");
|
||||
assert!(!had_errors, "failed to encode Latin sample");
|
||||
assert_eq!(bytes_to_string_smart(encoded.as_ref()), "Hello");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_iso8859_2_central_european_text() {
|
||||
// ISO-8859-2 (code page 28592) covers additional Central European glyphs.
|
||||
let (encoded, _, had_errors) = ISO_8859_2.encode("Příliš žluťoučký kůň");
|
||||
assert!(!had_errors, "failed to encode ISO-8859-2 sample");
|
||||
assert_eq!(
|
||||
bytes_to_string_smart(encoded.as_ref()),
|
||||
"Příliš žluťoučký kůň"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_iso8859_3_south_europe_text() {
|
||||
// ISO-8859-3 (code page 28593) adds support for Maltese/Esperanto letters.
|
||||
// chardetng rarely distinguishes ISO-8859-3 from neighboring Latin code pages, so we rely on
|
||||
// an ASCII-only sample to ensure round-tripping still succeeds.
|
||||
let (encoded, _, had_errors) = ISO_8859_3.encode("Esperanto and Maltese");
|
||||
assert!(!had_errors, "failed to encode ISO-8859-3 sample");
|
||||
assert_eq!(
|
||||
bytes_to_string_smart(encoded.as_ref()),
|
||||
"Esperanto and Maltese"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_iso8859_4_baltic_text() {
|
||||
// ISO-8859-4 (code page 28594) targets the Baltic/Nordic repertoire.
|
||||
let sample = "Šis ir rakstzīmju kodēšanas tests. Dažās valodās, kurās tiek \
|
||||
izmantotas latīņu valodas burti, lēmuma pieņemšanai mums ir nepieciešams \
|
||||
vairāk ieguldījuma.";
|
||||
let (encoded, _, had_errors) = ISO_8859_4.encode(sample);
|
||||
assert!(!had_errors, "failed to encode ISO-8859-4 sample");
|
||||
assert_eq!(bytes_to_string_smart(encoded.as_ref()), sample);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_iso8859_5_cyrillic_text() {
|
||||
// ISO-8859-5 (code page 28595) covers the Cyrillic portion.
|
||||
let (encoded, _, had_errors) = ISO_8859_5.encode("Привет");
|
||||
assert!(!had_errors, "failed to encode Cyrillic sample");
|
||||
assert_eq!(bytes_to_string_smart(encoded.as_ref()), "Привет");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_iso8859_6_arabic_text() {
|
||||
// ISO-8859-6 (code page 28596) covers the Arabic glyphs.
|
||||
let (encoded, _, had_errors) = ISO_8859_6.encode("مرحبا");
|
||||
assert!(!had_errors, "failed to encode Arabic sample");
|
||||
assert_eq!(bytes_to_string_smart(encoded.as_ref()), "مرحبا");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_iso8859_7_greek_text() {
|
||||
// ISO-8859-7 (code page 28597) is used for Greek locales.
|
||||
let (encoded, _, had_errors) = ISO_8859_7.encode("Καλημέρα");
|
||||
assert!(!had_errors, "failed to encode ISO-8859-7 sample");
|
||||
assert_eq!(bytes_to_string_smart(encoded.as_ref()), "Καλημέρα");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_iso8859_8_hebrew_text() {
|
||||
// ISO-8859-8 (code page 28598) covers the Hebrew glyphs.
|
||||
let (encoded, _, had_errors) = ISO_8859_8.encode("שלום");
|
||||
assert!(!had_errors, "failed to encode Hebrew sample");
|
||||
assert_eq!(bytes_to_string_smart(encoded.as_ref()), "שלום");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_iso8859_9_turkish_text() {
|
||||
// ISO-8859-9 (code page 28599) mirrors Latin-1 but inserts Turkish letters.
|
||||
// encoding_rs exposes the equivalent Windows-1254 mapping.
|
||||
let (encoded, _, had_errors) = WINDOWS_1254.encode("İstanbul");
|
||||
assert!(!had_errors, "failed to encode ISO-8859-9 sample");
|
||||
assert_eq!(bytes_to_string_smart(encoded.as_ref()), "İstanbul");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_iso8859_10_nordic_text() {
|
||||
// ISO-8859-10 (code page 28600) adds additional Nordic letters.
|
||||
let sample = "Þetta er prófun fyrir Ægir og Øystein.";
|
||||
let (encoded, _, had_errors) = ISO_8859_10.encode(sample);
|
||||
assert!(!had_errors, "failed to encode ISO-8859-10 sample");
|
||||
assert_eq!(bytes_to_string_smart(encoded.as_ref()), sample);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_iso8859_11_thai_text() {
|
||||
// ISO-8859-11 (code page 28601) mirrors TIS-620 / Windows-874 for Thai.
|
||||
let sample = "ภาษาไทยสำหรับการทดสอบ ISO-8859-11";
|
||||
// encoding_rs exposes the equivalent Windows-874 encoding, so use that constant.
|
||||
let (encoded, _, had_errors) = WINDOWS_874.encode(sample);
|
||||
assert!(!had_errors, "failed to encode ISO-8859-11 sample");
|
||||
assert_eq!(bytes_to_string_smart(encoded.as_ref()), sample);
|
||||
}
|
||||
|
||||
// ISO-8859-12 was never standardized, and encodings 14–16 cannot be distinguished reliably
|
||||
// without the heuristics we removed (chardetng generally reports neighboring Latin pages), so
|
||||
// we intentionally omit coverage for those slots until the detector can identify them.
|
||||
|
||||
#[test]
|
||||
fn test_iso8859_13_baltic_text() {
|
||||
// ISO-8859-13 (code page 28603) is common across Baltic languages.
|
||||
let (encoded, _, had_errors) = ISO_8859_13.encode("Sveiki");
|
||||
assert!(!had_errors, "failed to encode ISO-8859-13 sample");
|
||||
assert_eq!(bytes_to_string_smart(encoded.as_ref()), "Sveiki");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_windows_1250_central_european_text() {
|
||||
let (encoded, _, had_errors) = WINDOWS_1250.encode("Příliš žluťoučký kůň");
|
||||
assert!(!had_errors, "failed to encode Central European sample");
|
||||
assert_eq!(
|
||||
bytes_to_string_smart(encoded.as_ref()),
|
||||
"Příliš žluťoučký kůň"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_windows_1251_encoded_text() {
|
||||
let (encoded, _, had_errors) = WINDOWS_1251.encode("Привет из Windows-1251");
|
||||
assert!(!had_errors, "failed to encode Windows-1251 sample");
|
||||
assert_eq!(
|
||||
bytes_to_string_smart(encoded.as_ref()),
|
||||
"Привет из Windows-1251"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_windows_1253_greek_text() {
|
||||
let (encoded, _, had_errors) = WINDOWS_1253.encode("Γειά σου");
|
||||
assert!(!had_errors, "failed to encode Greek sample");
|
||||
assert_eq!(bytes_to_string_smart(encoded.as_ref()), "Γειά σου");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_windows_1254_turkish_text() {
|
||||
let (encoded, _, had_errors) = WINDOWS_1254.encode("İstanbul");
|
||||
assert!(!had_errors, "failed to encode Turkish sample");
|
||||
assert_eq!(bytes_to_string_smart(encoded.as_ref()), "İstanbul");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_windows_1255_hebrew_text() {
|
||||
let (encoded, _, had_errors) = WINDOWS_1255.encode("שלום");
|
||||
assert!(!had_errors, "failed to encode Windows-1255 Hebrew sample");
|
||||
assert_eq!(bytes_to_string_smart(encoded.as_ref()), "שלום");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_windows_1256_arabic_text() {
|
||||
let (encoded, _, had_errors) = WINDOWS_1256.encode("مرحبا");
|
||||
assert!(!had_errors, "failed to encode Windows-1256 Arabic sample");
|
||||
assert_eq!(bytes_to_string_smart(encoded.as_ref()), "مرحبا");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_windows_1257_baltic_text() {
|
||||
let (encoded, _, had_errors) = WINDOWS_1257.encode("Pērkons");
|
||||
assert!(!had_errors, "failed to encode Baltic sample");
|
||||
assert_eq!(bytes_to_string_smart(encoded.as_ref()), "Pērkons");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_windows_1258_vietnamese_text() {
|
||||
let (encoded, _, had_errors) = WINDOWS_1258.encode("Xin chào");
|
||||
assert!(!had_errors, "failed to encode Vietnamese sample");
|
||||
assert_eq!(bytes_to_string_smart(encoded.as_ref()), "Xin chào");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_windows_874_thai_text() {
|
||||
let (encoded, _, had_errors) = WINDOWS_874.encode("สวัสดีครับ นี่คือการทดสอบภาษาไทย");
|
||||
assert!(!had_errors, "failed to encode Thai sample");
|
||||
assert_eq!(
|
||||
bytes_to_string_smart(encoded.as_ref()),
|
||||
"สวัสดีครับ นี่คือการทดสอบภาษาไทย"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_windows_932_shift_jis_text() {
|
||||
let (encoded, _, had_errors) = SHIFT_JIS.encode("こんにちは");
|
||||
assert!(!had_errors, "failed to encode Shift-JIS sample");
|
||||
assert_eq!(bytes_to_string_smart(encoded.as_ref()), "こんにちは");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_windows_936_gbk_text() {
|
||||
let (encoded, _, had_errors) = GBK.encode("你好,世界,这是一个测试");
|
||||
assert!(!had_errors, "failed to encode GBK sample");
|
||||
assert_eq!(
|
||||
bytes_to_string_smart(encoded.as_ref()),
|
||||
"你好,世界,这是一个测试"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_windows_949_korean_text() {
|
||||
let (encoded, _, had_errors) = EUC_KR.encode("안녕하세요");
|
||||
assert!(!had_errors, "failed to encode Korean sample");
|
||||
assert_eq!(bytes_to_string_smart(encoded.as_ref()), "안녕하세요");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_windows_950_big5_text() {
|
||||
let (encoded, _, had_errors) = BIG5.encode("繁體");
|
||||
assert!(!had_errors, "failed to encode Big5 sample");
|
||||
assert_eq!(bytes_to_string_smart(encoded.as_ref()), "繁體");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_latin1_cafe() {
|
||||
// Latin-1 bytes remain common in Western-European locales; decode them directly.
|
||||
let bytes = b"caf\xE9"; // codespell:ignore caf
|
||||
assert_eq!(bytes_to_string_smart(bytes), "café");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_preserves_ansi_sequences() {
|
||||
// ANSI escape sequences should survive regardless of the detected encoding.
|
||||
let bytes = b"\x1b[31mred\x1b[0m";
|
||||
assert_eq!(bytes_to_string_smart(bytes), "\x1b[31mred\x1b[0m");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fallback_to_lossy() {
|
||||
// Completely invalid sequences fall back to the old lossy behavior.
|
||||
let invalid_bytes = [0xFF, 0xFE, 0xFD];
|
||||
let result = bytes_to_string_smart(&invalid_bytes);
|
||||
assert_eq!(result, String::from_utf8_lossy(&invalid_bytes));
|
||||
}
|
||||
}
|
||||
@@ -15,8 +15,6 @@ use crate::protocol::PatchApplyEndEvent;
|
||||
use crate::protocol::TurnDiffEvent;
|
||||
use crate::tools::context::SharedTurnDiffTracker;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
use crate::truncate::TruncationPolicy;
|
||||
use crate::truncate::formatted_truncate_text;
|
||||
use codex_protocol::parse_command::ParsedCommand;
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
@@ -31,7 +29,6 @@ pub(crate) struct ToolEventCtx<'a> {
|
||||
pub turn: &'a TurnContext,
|
||||
pub call_id: &'a str,
|
||||
pub turn_diff_tracker: Option<&'a SharedTurnDiffTracker>,
|
||||
pub override_truncation_policy: Option<&'a TruncationPolicy>,
|
||||
}
|
||||
|
||||
impl<'a> ToolEventCtx<'a> {
|
||||
@@ -40,14 +37,12 @@ impl<'a> ToolEventCtx<'a> {
|
||||
turn: &'a TurnContext,
|
||||
call_id: &'a str,
|
||||
turn_diff_tracker: Option<&'a SharedTurnDiffTracker>,
|
||||
override_truncation_policy: Option<&'a TruncationPolicy>,
|
||||
) -> Self {
|
||||
Self {
|
||||
session,
|
||||
turn,
|
||||
call_id,
|
||||
turn_diff_tracker,
|
||||
override_truncation_policy,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -184,17 +179,15 @@ impl ToolEmitter {
|
||||
ctx.turn,
|
||||
EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
|
||||
call_id: ctx.call_id.to_string(),
|
||||
turn_id: ctx.turn.sub_id.clone(),
|
||||
auto_approved: *auto_approved,
|
||||
changes: changes.clone(),
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
(Self::ApplyPatch { changes, .. }, ToolEventStage::Success(output)) => {
|
||||
(Self::ApplyPatch { .. }, ToolEventStage::Success(output)) => {
|
||||
emit_patch_end(
|
||||
ctx,
|
||||
changes.clone(),
|
||||
output.stdout.text.clone(),
|
||||
output.stderr.text.clone(),
|
||||
output.exit_code == 0,
|
||||
@@ -202,12 +195,11 @@ impl ToolEmitter {
|
||||
.await;
|
||||
}
|
||||
(
|
||||
Self::ApplyPatch { changes, .. },
|
||||
Self::ApplyPatch { .. },
|
||||
ToolEventStage::Failure(ToolEventFailure::Output(output)),
|
||||
) => {
|
||||
emit_patch_end(
|
||||
ctx,
|
||||
changes.clone(),
|
||||
output.stdout.text.clone(),
|
||||
output.stderr.text.clone(),
|
||||
output.exit_code == 0,
|
||||
@@ -215,17 +207,10 @@ impl ToolEmitter {
|
||||
.await;
|
||||
}
|
||||
(
|
||||
Self::ApplyPatch { changes, .. },
|
||||
Self::ApplyPatch { .. },
|
||||
ToolEventStage::Failure(ToolEventFailure::Message(message)),
|
||||
) => {
|
||||
emit_patch_end(
|
||||
ctx,
|
||||
changes.clone(),
|
||||
String::new(),
|
||||
(*message).to_string(),
|
||||
false,
|
||||
)
|
||||
.await;
|
||||
emit_patch_end(ctx, String::new(), (*message).to_string(), false).await;
|
||||
}
|
||||
(
|
||||
Self::UnifiedExec {
|
||||
@@ -260,13 +245,13 @@ impl ToolEmitter {
|
||||
fn format_exec_output_for_model(
|
||||
&self,
|
||||
output: &ExecToolCallOutput,
|
||||
truncation_policy: &TruncationPolicy,
|
||||
ctx: ToolEventCtx<'_>,
|
||||
) -> String {
|
||||
match self {
|
||||
Self::Shell { freeform: true, .. } => {
|
||||
super::format_exec_output_for_model_freeform(output, *truncation_policy)
|
||||
super::format_exec_output_for_model_freeform(output, ctx.turn.truncation_policy)
|
||||
}
|
||||
_ => super::format_exec_output_for_model_structured(output, *truncation_policy),
|
||||
_ => super::format_exec_output_for_model_structured(output, ctx.turn.truncation_policy),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -275,12 +260,9 @@ impl ToolEmitter {
|
||||
ctx: ToolEventCtx<'_>,
|
||||
out: Result<ExecToolCallOutput, ToolError>,
|
||||
) -> Result<String, FunctionCallError> {
|
||||
let truncation_policy = ctx
|
||||
.override_truncation_policy
|
||||
.unwrap_or(&ctx.turn.truncation_policy);
|
||||
let (event, result) = match out {
|
||||
Ok(output) => {
|
||||
let content = self.format_exec_output_for_model(&output, truncation_policy);
|
||||
let content = self.format_exec_output_for_model(&output, ctx);
|
||||
let exit_code = output.exit_code;
|
||||
let event = ToolEventStage::Success(output);
|
||||
let result = if exit_code == 0 {
|
||||
@@ -292,26 +274,24 @@ impl ToolEmitter {
|
||||
}
|
||||
Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output })))
|
||||
| Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { output }))) => {
|
||||
let response = self.format_exec_output_for_model(&output, truncation_policy);
|
||||
let response = self.format_exec_output_for_model(&output, ctx);
|
||||
let event = ToolEventStage::Failure(ToolEventFailure::Output(*output));
|
||||
let result = Err(FunctionCallError::RespondToModel(response));
|
||||
(event, result)
|
||||
}
|
||||
Err(ToolError::Codex(err)) => {
|
||||
let formatted_error = formatted_truncate_text(&err.to_string(), *truncation_policy);
|
||||
let message = format!("execution error: {formatted_error}");
|
||||
let event = ToolEventStage::Failure(ToolEventFailure::Message(message));
|
||||
let result = Err(FunctionCallError::RespondToModel(formatted_error));
|
||||
let message = format!("execution error: {err:?}");
|
||||
let event = ToolEventStage::Failure(ToolEventFailure::Message(message.clone()));
|
||||
let result = Err(FunctionCallError::RespondToModel(message));
|
||||
(event, result)
|
||||
}
|
||||
Err(ToolError::Rejected(msg)) => {
|
||||
let formatted_msg = formatted_truncate_text(&msg, *truncation_policy);
|
||||
// Normalize common rejection messages for exec tools so tests and
|
||||
// users see a clear, consistent phrase.
|
||||
let normalized = if formatted_msg == "rejected by user" {
|
||||
let normalized = if msg == "rejected by user" {
|
||||
"exec command rejected by user".to_string()
|
||||
} else {
|
||||
formatted_msg
|
||||
msg
|
||||
};
|
||||
let event = ToolEventStage::Failure(ToolEventFailure::Message(normalized.clone()));
|
||||
let result = Err(FunctionCallError::RespondToModel(normalized));
|
||||
@@ -429,23 +409,15 @@ async fn emit_exec_end(
|
||||
.await;
|
||||
}
|
||||
|
||||
async fn emit_patch_end(
|
||||
ctx: ToolEventCtx<'_>,
|
||||
changes: HashMap<PathBuf, FileChange>,
|
||||
stdout: String,
|
||||
stderr: String,
|
||||
success: bool,
|
||||
) {
|
||||
async fn emit_patch_end(ctx: ToolEventCtx<'_>, stdout: String, stderr: String, success: bool) {
|
||||
ctx.session
|
||||
.send_event(
|
||||
ctx.turn,
|
||||
EventMsg::PatchApplyEnd(PatchApplyEndEvent {
|
||||
call_id: ctx.call_id.to_string(),
|
||||
turn_id: ctx.turn.sub_id.clone(),
|
||||
stdout,
|
||||
stderr,
|
||||
success,
|
||||
changes,
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
|
||||
@@ -100,7 +100,6 @@ impl ToolHandler for ApplyPatchHandler {
|
||||
turn.as_ref(),
|
||||
&call_id,
|
||||
Some(&tracker),
|
||||
None,
|
||||
);
|
||||
emitter.begin(event_ctx).await;
|
||||
|
||||
@@ -128,7 +127,6 @@ impl ToolHandler for ApplyPatchHandler {
|
||||
turn.as_ref(),
|
||||
&call_id,
|
||||
Some(&tracker),
|
||||
None,
|
||||
);
|
||||
let content = emitter.finish(event_ctx, out).await?;
|
||||
Ok(ToolOutput::Function {
|
||||
|
||||
@@ -9,11 +9,9 @@ use crate::apply_patch::convert_apply_patch_to_protocol;
|
||||
use crate::codex::TurnContext;
|
||||
use crate::exec::ExecParams;
|
||||
use crate::exec_env::create_env;
|
||||
use crate::exec_policy::create_approval_requirement_for_command;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::is_safe_command::is_known_safe_command;
|
||||
use crate::protocol::ExecCommandSource;
|
||||
use crate::sandboxing::SandboxPermissions;
|
||||
use crate::tools::context::ToolInvocation;
|
||||
use crate::tools::context::ToolOutput;
|
||||
use crate::tools::context::ToolPayload;
|
||||
@@ -27,7 +25,6 @@ use crate::tools::runtimes::apply_patch::ApplyPatchRuntime;
|
||||
use crate::tools::runtimes::shell::ShellRequest;
|
||||
use crate::tools::runtimes::shell::ShellRuntime;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
use crate::truncate::TruncationPolicy;
|
||||
|
||||
pub struct ShellHandler;
|
||||
|
||||
@@ -38,13 +35,11 @@ impl ShellHandler {
|
||||
ExecParams {
|
||||
command: params.command,
|
||||
cwd: turn_context.resolve_path(params.workdir.clone()),
|
||||
expiration: params.timeout_ms.into(),
|
||||
timeout_ms: params.timeout_ms,
|
||||
env: create_env(&turn_context.shell_environment_policy),
|
||||
with_escalated_permissions: params.with_escalated_permissions,
|
||||
justification: params.justification,
|
||||
arg0: None,
|
||||
max_output_tokens: params.max_output_tokens,
|
||||
max_output_chars: params.max_output_chars,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -62,13 +57,11 @@ impl ShellCommandHandler {
|
||||
ExecParams {
|
||||
command,
|
||||
cwd: turn_context.resolve_path(params.workdir.clone()),
|
||||
expiration: params.timeout_ms.into(),
|
||||
timeout_ms: params.timeout_ms,
|
||||
env: create_env(&turn_context.shell_environment_policy),
|
||||
with_escalated_permissions: params.with_escalated_permissions,
|
||||
justification: params.justification,
|
||||
arg0: None,
|
||||
max_output_tokens: params.max_output_tokens,
|
||||
max_output_chars: params.max_output_chars,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -214,9 +207,6 @@ impl ShellHandler {
|
||||
)));
|
||||
}
|
||||
|
||||
let override_truncation_policy =
|
||||
create_truncation_policy(exec_params.max_output_tokens, exec_params.max_output_chars);
|
||||
|
||||
// Intercept apply_patch if present.
|
||||
match codex_apply_patch::maybe_parse_apply_patch_verified(
|
||||
&exec_params.command,
|
||||
@@ -245,14 +235,13 @@ impl ShellHandler {
|
||||
turn.as_ref(),
|
||||
&call_id,
|
||||
Some(&tracker),
|
||||
override_truncation_policy.as_ref(),
|
||||
);
|
||||
emitter.begin(event_ctx).await;
|
||||
|
||||
let req = ApplyPatchRequest {
|
||||
patch: apply.action.patch.clone(),
|
||||
cwd: apply.action.cwd.clone(),
|
||||
timeout_ms: exec_params.expiration.timeout_ms(),
|
||||
timeout_ms: exec_params.timeout_ms,
|
||||
user_explicitly_approved: apply.user_explicitly_approved_this_action,
|
||||
codex_exe: turn.codex_linux_sandbox_exe.clone(),
|
||||
};
|
||||
@@ -272,7 +261,6 @@ impl ShellHandler {
|
||||
turn.as_ref(),
|
||||
&call_id,
|
||||
Some(&tracker),
|
||||
override_truncation_policy.as_ref(),
|
||||
);
|
||||
let content = emitter.finish(event_ctx, out).await?;
|
||||
return Ok(ToolOutput::Function {
|
||||
@@ -304,31 +292,16 @@ impl ShellHandler {
|
||||
source,
|
||||
freeform,
|
||||
);
|
||||
let event_ctx = ToolEventCtx::new(
|
||||
session.as_ref(),
|
||||
turn.as_ref(),
|
||||
&call_id,
|
||||
None,
|
||||
override_truncation_policy.as_ref(),
|
||||
);
|
||||
let event_ctx = ToolEventCtx::new(session.as_ref(), turn.as_ref(), &call_id, None);
|
||||
emitter.begin(event_ctx).await;
|
||||
|
||||
let req = ShellRequest {
|
||||
command: exec_params.command.clone(),
|
||||
cwd: exec_params.cwd.clone(),
|
||||
timeout_ms: exec_params.expiration.timeout_ms(),
|
||||
timeout_ms: exec_params.timeout_ms,
|
||||
env: exec_params.env.clone(),
|
||||
with_escalated_permissions: exec_params.with_escalated_permissions,
|
||||
justification: exec_params.justification.clone(),
|
||||
max_output_tokens: exec_params.max_output_tokens,
|
||||
max_output_chars: exec_params.max_output_chars,
|
||||
approval_requirement: create_approval_requirement_for_command(
|
||||
&turn.exec_policy,
|
||||
&exec_params.command,
|
||||
turn.approval_policy,
|
||||
&turn.sandbox_policy,
|
||||
SandboxPermissions::from(exec_params.with_escalated_permissions.unwrap_or(false)),
|
||||
),
|
||||
};
|
||||
let mut orchestrator = ToolOrchestrator::new();
|
||||
let mut runtime = ShellRuntime::new();
|
||||
@@ -341,13 +314,7 @@ impl ShellHandler {
|
||||
let out = orchestrator
|
||||
.run(&mut runtime, &req, &tool_ctx, &turn, turn.approval_policy)
|
||||
.await;
|
||||
let event_ctx = ToolEventCtx::new(
|
||||
session.as_ref(),
|
||||
turn.as_ref(),
|
||||
&call_id,
|
||||
None,
|
||||
override_truncation_policy.as_ref(),
|
||||
);
|
||||
let event_ctx = ToolEventCtx::new(session.as_ref(), turn.as_ref(), &call_id, None);
|
||||
let content = emitter.finish(event_ctx, out).await?;
|
||||
Ok(ToolOutput::Function {
|
||||
content,
|
||||
@@ -357,45 +324,34 @@ impl ShellHandler {
|
||||
}
|
||||
}
|
||||
|
||||
fn create_truncation_policy(
|
||||
max_output_tokens: Option<usize>,
|
||||
max_output_chars: Option<usize>,
|
||||
) -> Option<TruncationPolicy> {
|
||||
if let Some(max_output_tokens) = max_output_tokens {
|
||||
Some(TruncationPolicy::Tokens(max_output_tokens))
|
||||
} else {
|
||||
max_output_chars.map(TruncationPolicy::Bytes)
|
||||
}
|
||||
}
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::path::PathBuf;
|
||||
|
||||
use crate::is_safe_command::is_known_safe_command;
|
||||
use crate::shell::BashShell;
|
||||
use crate::shell::PowerShellConfig;
|
||||
use crate::shell::Shell;
|
||||
use crate::shell::ShellType;
|
||||
use crate::shell::ZshShell;
|
||||
|
||||
/// The logic for is_known_safe_command() has heuristics for known shells,
|
||||
/// so we must ensure the commands generated by [ShellCommandHandler] can be
|
||||
/// recognized as safe if the `command` is safe.
|
||||
#[test]
|
||||
fn commands_generated_by_shell_command_handler_can_be_matched_by_is_known_safe_command() {
|
||||
let bash_shell = Shell {
|
||||
shell_type: ShellType::Bash,
|
||||
let bash_shell = Shell::Bash(BashShell {
|
||||
shell_path: PathBuf::from("/bin/bash"),
|
||||
};
|
||||
});
|
||||
assert_safe(&bash_shell, "ls -la");
|
||||
|
||||
let zsh_shell = Shell {
|
||||
shell_type: ShellType::Zsh,
|
||||
let zsh_shell = Shell::Zsh(ZshShell {
|
||||
shell_path: PathBuf::from("/bin/zsh"),
|
||||
};
|
||||
});
|
||||
assert_safe(&zsh_shell, "ls -la");
|
||||
|
||||
let powershell = Shell {
|
||||
shell_type: ShellType::PowerShell,
|
||||
let powershell = Shell::PowerShell(PowerShellConfig {
|
||||
shell_path: PathBuf::from("pwsh.exe"),
|
||||
};
|
||||
});
|
||||
assert_safe(&powershell, "ls -Name");
|
||||
}
|
||||
|
||||
|
||||
@@ -162,7 +162,6 @@ impl ToolHandler for UnifiedExecHandler {
|
||||
context.turn.as_ref(),
|
||||
&context.call_id,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
let emitter = ToolEmitter::unified_exec(
|
||||
&command,
|
||||
|
||||
@@ -11,13 +11,11 @@ use crate::error::get_error_message_ui;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::sandboxing::SandboxManager;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::ApprovalRequirement;
|
||||
use crate::tools::sandboxing::ProvidesSandboxRetryData;
|
||||
use crate::tools::sandboxing::SandboxAttempt;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
use crate::tools::sandboxing::ToolRuntime;
|
||||
use crate::tools::sandboxing::default_approval_requirement;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
|
||||
@@ -51,52 +49,40 @@ impl ToolOrchestrator {
|
||||
let otel_cfg = codex_otel::otel_event_manager::ToolDecisionSource::Config;
|
||||
|
||||
// 1) Approval
|
||||
let needs_initial_approval =
|
||||
tool.wants_initial_approval(req, approval_policy, &turn_ctx.sandbox_policy);
|
||||
let mut already_approved = false;
|
||||
|
||||
let requirement = tool.approval_requirement(req).unwrap_or_else(|| {
|
||||
default_approval_requirement(approval_policy, &turn_ctx.sandbox_policy)
|
||||
});
|
||||
match requirement {
|
||||
ApprovalRequirement::Skip => {
|
||||
otel.tool_decision(otel_tn, otel_ci, ReviewDecision::Approved, otel_cfg);
|
||||
}
|
||||
ApprovalRequirement::Forbidden { reason } => {
|
||||
return Err(ToolError::Rejected(reason));
|
||||
}
|
||||
ApprovalRequirement::NeedsApproval { reason } => {
|
||||
let mut risk = None;
|
||||
if needs_initial_approval {
|
||||
let mut risk = None;
|
||||
|
||||
if let Some(metadata) = req.sandbox_retry_data() {
|
||||
risk = tool_ctx
|
||||
.session
|
||||
.assess_sandbox_command(
|
||||
turn_ctx,
|
||||
&tool_ctx.call_id,
|
||||
&metadata.command,
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
if let Some(metadata) = req.sandbox_retry_data() {
|
||||
risk = tool_ctx
|
||||
.session
|
||||
.assess_sandbox_command(turn_ctx, &tool_ctx.call_id, &metadata.command, None)
|
||||
.await;
|
||||
}
|
||||
|
||||
let approval_ctx = ApprovalCtx {
|
||||
session: tool_ctx.session,
|
||||
turn: turn_ctx,
|
||||
call_id: &tool_ctx.call_id,
|
||||
retry_reason: None,
|
||||
risk,
|
||||
};
|
||||
let decision = tool.start_approval_async(req, approval_ctx).await;
|
||||
|
||||
otel.tool_decision(otel_tn, otel_ci, decision, otel_user.clone());
|
||||
|
||||
match decision {
|
||||
ReviewDecision::Denied | ReviewDecision::Abort => {
|
||||
return Err(ToolError::Rejected("rejected by user".to_string()));
|
||||
}
|
||||
|
||||
let approval_ctx = ApprovalCtx {
|
||||
session: tool_ctx.session,
|
||||
turn: turn_ctx,
|
||||
call_id: &tool_ctx.call_id,
|
||||
retry_reason: reason,
|
||||
risk,
|
||||
};
|
||||
let decision = tool.start_approval_async(req, approval_ctx).await;
|
||||
|
||||
otel.tool_decision(otel_tn, otel_ci, decision, otel_user.clone());
|
||||
|
||||
match decision {
|
||||
ReviewDecision::Denied | ReviewDecision::Abort => {
|
||||
return Err(ToolError::Rejected("rejected by user".to_string()));
|
||||
}
|
||||
ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {}
|
||||
}
|
||||
already_approved = true;
|
||||
ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {}
|
||||
}
|
||||
already_approved = true;
|
||||
} else {
|
||||
otel.tool_decision(otel_tn, otel_ci, ReviewDecision::Approved, otel_cfg);
|
||||
}
|
||||
|
||||
// 2) First attempt under the selected sandbox.
|
||||
|
||||
@@ -116,8 +116,6 @@ impl ToolRouter {
|
||||
timeout_ms: exec.timeout_ms,
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
};
|
||||
Ok(Some(ToolCall {
|
||||
tool_name: "local_shell".to_string(),
|
||||
|
||||
@@ -67,13 +67,11 @@ impl ApplyPatchRuntime {
|
||||
program,
|
||||
args: vec![CODEX_APPLY_PATCH_ARG1.to_string(), req.patch.clone()],
|
||||
cwd: req.cwd.clone(),
|
||||
expiration: req.timeout_ms.into(),
|
||||
timeout_ms: req.timeout_ms,
|
||||
// Run apply_patch with a minimal environment for determinism and to avoid leaks.
|
||||
env: HashMap::new(),
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -155,9 +153,9 @@ impl ToolRuntime<ApplyPatchRequest, ExecToolCallOutput> for ApplyPatchRuntime {
|
||||
) -> Result<ExecToolCallOutput, ToolError> {
|
||||
let spec = Self::build_command_spec(req)?;
|
||||
let env = attempt
|
||||
.env_for(spec)
|
||||
.env_for(&spec)
|
||||
.map_err(|err| ToolError::Codex(err.into()))?;
|
||||
let out = execute_env(env, attempt.policy, Self::stdout_stream(ctx))
|
||||
let out = execute_env(&env, attempt.policy, Self::stdout_stream(ctx))
|
||||
.await
|
||||
.map_err(ToolError::Codex)?;
|
||||
Ok(out)
|
||||
|
||||
@@ -4,7 +4,6 @@ Module: runtimes
|
||||
Concrete ToolRuntime implementations for specific tools. Each runtime stays
|
||||
small and focused and reuses the orchestrator for approvals + sandbox + retry.
|
||||
*/
|
||||
use crate::exec::ExecExpiration;
|
||||
use crate::sandboxing::CommandSpec;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
use std::collections::HashMap;
|
||||
@@ -16,16 +15,13 @@ pub mod unified_exec;
|
||||
|
||||
/// Shared helper to construct a CommandSpec from a tokenized command line.
|
||||
/// Validates that at least a program is present.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(crate) fn build_command_spec(
|
||||
command: &[String],
|
||||
cwd: &Path,
|
||||
env: &HashMap<String, String>,
|
||||
expiration: ExecExpiration,
|
||||
timeout_ms: Option<u64>,
|
||||
with_escalated_permissions: Option<bool>,
|
||||
justification: Option<String>,
|
||||
max_output_tokens: Option<usize>,
|
||||
max_output_chars: Option<usize>,
|
||||
) -> Result<CommandSpec, ToolError> {
|
||||
let (program, args) = command
|
||||
.split_first()
|
||||
@@ -35,10 +31,8 @@ pub(crate) fn build_command_spec(
|
||||
args: args.to_vec(),
|
||||
cwd: cwd.to_path_buf(),
|
||||
env: env.clone(),
|
||||
expiration,
|
||||
timeout_ms,
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
max_output_tokens,
|
||||
max_output_chars,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -4,12 +4,13 @@ Runtime: shell
|
||||
Executes shell requests under the orchestrator: asks for approval when needed,
|
||||
builds a CommandSpec, and runs it under the current SandboxAttempt.
|
||||
*/
|
||||
use crate::command_safety::is_dangerous_command::requires_initial_appoval;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use crate::sandboxing::execute_env;
|
||||
use crate::tools::runtimes::build_command_spec;
|
||||
use crate::tools::sandboxing::Approvable;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::ApprovalRequirement;
|
||||
use crate::tools::sandboxing::ProvidesSandboxRetryData;
|
||||
use crate::tools::sandboxing::SandboxAttempt;
|
||||
use crate::tools::sandboxing::SandboxRetryData;
|
||||
@@ -19,6 +20,7 @@ use crate::tools::sandboxing::ToolCtx;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
use crate::tools::sandboxing::ToolRuntime;
|
||||
use crate::tools::sandboxing::with_cached_approval;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use futures::future::BoxFuture;
|
||||
use std::path::PathBuf;
|
||||
@@ -31,9 +33,6 @@ pub struct ShellRequest {
|
||||
pub env: std::collections::HashMap<String, String>,
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
pub justification: Option<String>,
|
||||
pub max_output_tokens: Option<usize>,
|
||||
pub max_output_chars: Option<usize>,
|
||||
pub approval_requirement: ApprovalRequirement,
|
||||
}
|
||||
|
||||
impl ProvidesSandboxRetryData for ShellRequest {
|
||||
@@ -115,8 +114,18 @@ impl Approvable<ShellRequest> for ShellRuntime {
|
||||
})
|
||||
}
|
||||
|
||||
fn approval_requirement(&self, req: &ShellRequest) -> Option<ApprovalRequirement> {
|
||||
Some(req.approval_requirement.clone())
|
||||
fn wants_initial_approval(
|
||||
&self,
|
||||
req: &ShellRequest,
|
||||
policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
) -> bool {
|
||||
requires_initial_appoval(
|
||||
policy,
|
||||
sandbox_policy,
|
||||
&req.command,
|
||||
req.with_escalated_permissions.unwrap_or(false),
|
||||
)
|
||||
}
|
||||
|
||||
fn wants_escalated_first_attempt(&self, req: &ShellRequest) -> bool {
|
||||
@@ -135,16 +144,14 @@ impl ToolRuntime<ShellRequest, ExecToolCallOutput> for ShellRuntime {
|
||||
&req.command,
|
||||
&req.cwd,
|
||||
&req.env,
|
||||
req.timeout_ms.into(),
|
||||
req.timeout_ms,
|
||||
req.with_escalated_permissions,
|
||||
req.justification.clone(),
|
||||
req.max_output_tokens,
|
||||
req.max_output_chars,
|
||||
)?;
|
||||
let env = attempt
|
||||
.env_for(spec)
|
||||
.env_for(&spec)
|
||||
.map_err(|err| ToolError::Codex(err.into()))?;
|
||||
let out = execute_env(env, attempt.policy, Self::stdout_stream(ctx))
|
||||
let out = execute_env(&env, attempt.policy, Self::stdout_stream(ctx))
|
||||
.await
|
||||
.map_err(ToolError::Codex)?;
|
||||
Ok(out)
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use crate::command_safety::is_dangerous_command::requires_initial_appoval;
|
||||
/*
|
||||
Runtime: unified exec
|
||||
|
||||
@@ -6,11 +7,9 @@ the session manager to spawn PTYs once an ExecEnv is prepared.
|
||||
*/
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::SandboxErr;
|
||||
use crate::exec::ExecExpiration;
|
||||
use crate::tools::runtimes::build_command_spec;
|
||||
use crate::tools::sandboxing::Approvable;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::ApprovalRequirement;
|
||||
use crate::tools::sandboxing::ProvidesSandboxRetryData;
|
||||
use crate::tools::sandboxing::SandboxAttempt;
|
||||
use crate::tools::sandboxing::SandboxRetryData;
|
||||
@@ -23,7 +22,9 @@ use crate::tools::sandboxing::with_cached_approval;
|
||||
use crate::unified_exec::UnifiedExecError;
|
||||
use crate::unified_exec::UnifiedExecSession;
|
||||
use crate::unified_exec::UnifiedExecSessionManager;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use futures::future::BoxFuture;
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
@@ -35,9 +36,6 @@ pub struct UnifiedExecRequest {
|
||||
pub env: HashMap<String, String>,
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
pub justification: Option<String>,
|
||||
pub max_output_tokens: Option<usize>,
|
||||
pub max_output_chars: Option<usize>,
|
||||
pub approval_requirement: ApprovalRequirement,
|
||||
}
|
||||
|
||||
impl ProvidesSandboxRetryData for UnifiedExecRequest {
|
||||
@@ -67,7 +65,6 @@ impl UnifiedExecRequest {
|
||||
env: HashMap<String, String>,
|
||||
with_escalated_permissions: Option<bool>,
|
||||
justification: Option<String>,
|
||||
approval_requirement: ApprovalRequirement,
|
||||
) -> Self {
|
||||
Self {
|
||||
command,
|
||||
@@ -75,9 +72,6 @@ impl UnifiedExecRequest {
|
||||
env,
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
approval_requirement,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -135,8 +129,18 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
|
||||
})
|
||||
}
|
||||
|
||||
fn approval_requirement(&self, req: &UnifiedExecRequest) -> Option<ApprovalRequirement> {
|
||||
Some(req.approval_requirement.clone())
|
||||
fn wants_initial_approval(
|
||||
&self,
|
||||
req: &UnifiedExecRequest,
|
||||
policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
) -> bool {
|
||||
requires_initial_appoval(
|
||||
policy,
|
||||
sandbox_policy,
|
||||
&req.command,
|
||||
req.with_escalated_permissions.unwrap_or(false),
|
||||
)
|
||||
}
|
||||
|
||||
fn wants_escalated_first_attempt(&self, req: &UnifiedExecRequest) -> bool {
|
||||
@@ -155,15 +159,13 @@ impl<'a> ToolRuntime<UnifiedExecRequest, UnifiedExecSession> for UnifiedExecRunt
|
||||
&req.command,
|
||||
&req.cwd,
|
||||
&req.env,
|
||||
ExecExpiration::DefaultTimeout,
|
||||
None,
|
||||
req.with_escalated_permissions,
|
||||
req.justification.clone(),
|
||||
req.max_output_tokens,
|
||||
req.max_output_chars,
|
||||
)
|
||||
.map_err(|_| ToolError::Rejected("missing command line for PTY".to_string()))?;
|
||||
let exec_env = attempt
|
||||
.env_for(spec)
|
||||
.env_for(&spec)
|
||||
.map_err(|err| ToolError::Codex(err.into()))?;
|
||||
self.manager
|
||||
.open_session_with_exec_env(&exec_env)
|
||||
|
||||
@@ -86,37 +86,6 @@ pub(crate) struct ApprovalCtx<'a> {
|
||||
pub risk: Option<SandboxCommandAssessment>,
|
||||
}
|
||||
|
||||
// Specifies what tool orchestrator should do with a given tool call.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) enum ApprovalRequirement {
|
||||
/// No approval required for this tool call
|
||||
Skip,
|
||||
/// Approval required for this tool call
|
||||
NeedsApproval { reason: Option<String> },
|
||||
/// Execution forbidden for this tool call
|
||||
Forbidden { reason: String },
|
||||
}
|
||||
|
||||
/// - Never, OnFailure: do not ask
|
||||
/// - OnRequest: ask unless sandbox policy is DangerFullAccess
|
||||
/// - UnlessTrusted: always ask
|
||||
pub(crate) fn default_approval_requirement(
|
||||
policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
) -> ApprovalRequirement {
|
||||
let needs_approval = match policy {
|
||||
AskForApproval::Never | AskForApproval::OnFailure => false,
|
||||
AskForApproval::OnRequest => !matches!(sandbox_policy, SandboxPolicy::DangerFullAccess),
|
||||
AskForApproval::UnlessTrusted => true,
|
||||
};
|
||||
|
||||
if needs_approval {
|
||||
ApprovalRequirement::NeedsApproval { reason: None }
|
||||
} else {
|
||||
ApprovalRequirement::Skip
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) trait Approvable<Req> {
|
||||
type ApprovalKey: Hash + Eq + Clone + Debug + Serialize;
|
||||
|
||||
@@ -137,11 +106,22 @@ pub(crate) trait Approvable<Req> {
|
||||
matches!(policy, AskForApproval::Never)
|
||||
}
|
||||
|
||||
/// Override the default approval requirement. Return `Some(_)` to specify
|
||||
/// a custom requirement, or `None` to fall back to
|
||||
/// policy-based default.
|
||||
fn approval_requirement(&self, _req: &Req) -> Option<ApprovalRequirement> {
|
||||
None
|
||||
/// Decide whether an initial user approval should be requested before the
|
||||
/// first attempt. Defaults to the orchestrator's behavior (pre‑refactor):
|
||||
/// - Never, OnFailure: do not ask
|
||||
/// - OnRequest: ask unless sandbox policy is DangerFullAccess
|
||||
/// - UnlessTrusted: always ask
|
||||
fn wants_initial_approval(
|
||||
&self,
|
||||
_req: &Req,
|
||||
policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
) -> bool {
|
||||
match policy {
|
||||
AskForApproval::Never | AskForApproval::OnFailure => false,
|
||||
AskForApproval::OnRequest => !matches!(sandbox_policy, SandboxPolicy::DangerFullAccess),
|
||||
AskForApproval::UnlessTrusted => true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Decide we can request an approval for no-sandbox execution.
|
||||
@@ -216,7 +196,7 @@ pub(crate) struct SandboxAttempt<'a> {
|
||||
impl<'a> SandboxAttempt<'a> {
|
||||
pub fn env_for(
|
||||
&self,
|
||||
spec: CommandSpec,
|
||||
spec: &CommandSpec,
|
||||
) -> Result<crate::sandboxing::ExecEnv, SandboxTransformError> {
|
||||
self.manager.transform(
|
||||
spec,
|
||||
|
||||
@@ -8,7 +8,6 @@ use crate::tools::handlers::apply_patch::ApplyPatchToolType;
|
||||
use crate::tools::handlers::apply_patch::create_apply_patch_freeform_tool;
|
||||
use crate::tools::handlers::apply_patch::create_apply_patch_json_tool;
|
||||
use crate::tools::registry::ToolRegistryBuilder;
|
||||
use crate::truncate::TruncationPolicy;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use serde_json::Value as JsonValue;
|
||||
@@ -18,7 +17,7 @@ use std::collections::HashMap;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum ConfigShellToolType {
|
||||
Default(TruncationPolicy),
|
||||
Default,
|
||||
Local,
|
||||
UnifiedExec,
|
||||
/// Do not include a shell tool by default. Useful when using Codex
|
||||
@@ -27,7 +26,7 @@ pub enum ConfigShellToolType {
|
||||
/// to customize agent behavior.
|
||||
Disabled,
|
||||
/// Takes a command as a single string to be run in the user's default shell.
|
||||
ShellCommand(TruncationPolicy),
|
||||
ShellCommand,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
@@ -58,6 +57,8 @@ impl ToolsConfig {
|
||||
ConfigShellToolType::Disabled
|
||||
} else if features.enabled(Feature::UnifiedExec) {
|
||||
ConfigShellToolType::UnifiedExec
|
||||
} else if features.enabled(Feature::ShellCommandTool) {
|
||||
ConfigShellToolType::ShellCommand
|
||||
} else {
|
||||
model_family.shell_type.clone()
|
||||
};
|
||||
@@ -265,7 +266,7 @@ fn create_write_stdin_tool() -> ToolSpec {
|
||||
})
|
||||
}
|
||||
|
||||
fn create_shell_tool(truncation_policy: TruncationPolicy) -> ToolSpec {
|
||||
fn create_shell_tool() -> ToolSpec {
|
||||
let mut properties = BTreeMap::new();
|
||||
properties.insert(
|
||||
"command".to_string(),
|
||||
@@ -299,24 +300,6 @@ fn create_shell_tool(truncation_policy: TruncationPolicy) -> ToolSpec {
|
||||
description: Some("Only set if with_escalated_permissions is true. 1-sentence explanation of why we want to run this command.".to_string()),
|
||||
},
|
||||
);
|
||||
match truncation_policy {
|
||||
TruncationPolicy::Tokens(_) => {
|
||||
properties.insert(
|
||||
"max_output_tokens".to_string(),
|
||||
JsonSchema::Number {
|
||||
description: Some("Maximum number of tokens to return from stdout/stderr. Excess tokens will be truncated".to_string()),
|
||||
},
|
||||
);
|
||||
}
|
||||
TruncationPolicy::Bytes(_) => {
|
||||
properties.insert(
|
||||
"max_output_chars".to_string(),
|
||||
JsonSchema::Number {
|
||||
description: Some("Maximum number of characters to return from stdout/stderr. Excess characters will be truncated".to_string()),
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let description = if cfg!(windows) {
|
||||
r#"Runs a Powershell command (Windows) and returns its output. Arguments to `shell` will be passed to CreateProcessW(). Most commands should be prefixed with ["powershell.exe", "-Command"].
|
||||
@@ -347,7 +330,7 @@ Examples of valid command strings:
|
||||
})
|
||||
}
|
||||
|
||||
fn create_shell_command_tool(truncation_policy: TruncationPolicy) -> ToolSpec {
|
||||
fn create_shell_command_tool() -> ToolSpec {
|
||||
let mut properties = BTreeMap::new();
|
||||
properties.insert(
|
||||
"command".to_string(),
|
||||
@@ -381,30 +364,6 @@ fn create_shell_command_tool(truncation_policy: TruncationPolicy) -> ToolSpec {
|
||||
description: Some("Only set if with_escalated_permissions is true. 1-sentence explanation of why we want to run this command.".to_string()),
|
||||
},
|
||||
);
|
||||
match truncation_policy {
|
||||
TruncationPolicy::Tokens(_) => {
|
||||
properties.insert(
|
||||
"max_output_tokens".to_string(),
|
||||
JsonSchema::Number {
|
||||
description: Some(
|
||||
"Maximum number of tokens to return. Excess output will be truncated."
|
||||
.to_string(),
|
||||
),
|
||||
},
|
||||
);
|
||||
}
|
||||
TruncationPolicy::Bytes(_) => {
|
||||
properties.insert(
|
||||
"max_output_chars".to_string(),
|
||||
JsonSchema::Number {
|
||||
description: Some(
|
||||
"Maximum number of tokens to return. Excess output will be truncated."
|
||||
.to_string(),
|
||||
),
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let description = if cfg!(windows) {
|
||||
r#"Runs a Powershell command (Windows) and returns its output.
|
||||
@@ -1042,8 +1001,8 @@ pub(crate) fn build_specs(
|
||||
let shell_command_handler = Arc::new(ShellCommandHandler);
|
||||
|
||||
match &config.shell_type {
|
||||
ConfigShellToolType::Default(truncation_policy) => {
|
||||
builder.push_spec(create_shell_tool(*truncation_policy));
|
||||
ConfigShellToolType::Default => {
|
||||
builder.push_spec(create_shell_tool());
|
||||
}
|
||||
ConfigShellToolType::Local => {
|
||||
builder.push_spec(ToolSpec::LocalShell {});
|
||||
@@ -1057,8 +1016,8 @@ pub(crate) fn build_specs(
|
||||
ConfigShellToolType::Disabled => {
|
||||
// Do nothing.
|
||||
}
|
||||
ConfigShellToolType::ShellCommand(truncation_policy) => {
|
||||
builder.push_spec(create_shell_command_tool(*truncation_policy));
|
||||
ConfigShellToolType::ShellCommand => {
|
||||
builder.push_spec(create_shell_command_tool());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1201,11 +1160,11 @@ mod tests {
|
||||
|
||||
fn shell_tool_name(config: &ToolsConfig) -> Option<&'static str> {
|
||||
match config.shell_type {
|
||||
ConfigShellToolType::Default(_) => Some("shell"),
|
||||
ConfigShellToolType::Default => Some("shell"),
|
||||
ConfigShellToolType::Local => Some("local_shell"),
|
||||
ConfigShellToolType::UnifiedExec => None,
|
||||
ConfigShellToolType::Disabled => None,
|
||||
ConfigShellToolType::ShellCommand(_) => Some("shell_command"),
|
||||
ConfigShellToolType::ShellCommand => Some("shell_command"),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1509,6 +1468,22 @@ mod tests {
|
||||
assert_contains_tool_names(&tools, &subset);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_specs_shell_command_present() {
|
||||
assert_model_tools(
|
||||
"codex-mini-latest",
|
||||
Features::with_defaults().enable(Feature::ShellCommandTool),
|
||||
&[
|
||||
"shell_command",
|
||||
"list_mcp_resources",
|
||||
"list_mcp_resource_templates",
|
||||
"read_mcp_resource",
|
||||
"update_plan",
|
||||
"view_image",
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn test_parallel_support_flags() {
|
||||
@@ -1951,7 +1926,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_shell_tool() {
|
||||
let tool = super::create_shell_tool(TruncationPolicy::Bytes(10_000));
|
||||
let tool = super::create_shell_tool();
|
||||
let ToolSpec::Function(ResponsesApiTool {
|
||||
description, name, ..
|
||||
}) = &tool
|
||||
@@ -1981,7 +1956,7 @@ Examples of valid command strings:
|
||||
|
||||
#[test]
|
||||
fn test_shell_command_tool() {
|
||||
let tool = super::create_shell_command_tool(TruncationPolicy::Tokens(10_000));
|
||||
let tool = super::create_shell_command_tool();
|
||||
let ToolSpec::Function(ResponsesApiTool {
|
||||
description, name, ..
|
||||
}) = &tool
|
||||
|
||||
@@ -185,7 +185,6 @@ fn truncate_with_byte_estimate(s: &str, policy: TruncationPolicy) -> String {
|
||||
if s.is_empty() {
|
||||
return String::new();
|
||||
}
|
||||
|
||||
let total_chars = s.chars().count();
|
||||
let max_bytes = policy.byte_budget();
|
||||
|
||||
@@ -205,55 +204,24 @@ fn truncate_with_byte_estimate(s: &str, policy: TruncationPolicy) -> String {
|
||||
let total_bytes = s.len();
|
||||
|
||||
let (left_budget, right_budget) = split_budget(max_bytes);
|
||||
let prefix_end = pick_prefix_end(s, left_budget);
|
||||
let mut suffix_start = pick_suffix_start(s, right_budget);
|
||||
if suffix_start < prefix_end {
|
||||
suffix_start = prefix_end;
|
||||
}
|
||||
|
||||
let (removed_chars, left, right) = split_string(s, left_budget, right_budget);
|
||||
let left_chars = s[..prefix_end].chars().count();
|
||||
let right_chars = s[suffix_start..].chars().count();
|
||||
let removed_chars = total_chars
|
||||
.saturating_sub(left_chars)
|
||||
.saturating_sub(right_chars);
|
||||
|
||||
let marker = format_truncation_marker(
|
||||
policy,
|
||||
removed_units_for_source(policy, total_bytes.saturating_sub(max_bytes), removed_chars),
|
||||
);
|
||||
|
||||
assemble_truncated_output(left, right, &marker)
|
||||
}
|
||||
|
||||
fn split_string(s: &str, beginning_bytes: usize, end_bytes: usize) -> (usize, &str, &str) {
|
||||
if s.is_empty() {
|
||||
return (0, "", "");
|
||||
}
|
||||
|
||||
let len = s.len();
|
||||
let tail_start_target = len.saturating_sub(end_bytes);
|
||||
let mut prefix_end = 0usize;
|
||||
let mut suffix_start = len;
|
||||
let mut removed_chars = 0usize;
|
||||
let mut suffix_started = false;
|
||||
|
||||
for (idx, ch) in s.char_indices() {
|
||||
let char_end = idx + ch.len_utf8();
|
||||
if char_end <= beginning_bytes {
|
||||
prefix_end = char_end;
|
||||
continue;
|
||||
}
|
||||
|
||||
if idx >= tail_start_target {
|
||||
if !suffix_started {
|
||||
suffix_start = idx;
|
||||
suffix_started = true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
removed_chars = removed_chars.saturating_add(1);
|
||||
}
|
||||
|
||||
if suffix_start < prefix_end {
|
||||
suffix_start = prefix_end;
|
||||
}
|
||||
|
||||
let before = &s[..prefix_end];
|
||||
let after = &s[suffix_start..];
|
||||
|
||||
(removed_chars, before, after)
|
||||
assemble_truncated_output(&s[..prefix_end], &s[suffix_start..], &marker)
|
||||
}
|
||||
|
||||
fn format_truncation_marker(policy: TruncationPolicy, removed_count: u64) -> String {
|
||||
@@ -302,54 +270,53 @@ fn approx_tokens_from_byte_count(bytes: usize) -> u64 {
|
||||
/ (APPROX_BYTES_PER_TOKEN as u64)
|
||||
}
|
||||
|
||||
fn truncate_on_boundary(input: &str, max_len: usize) -> &str {
|
||||
if input.len() <= max_len {
|
||||
return input;
|
||||
}
|
||||
let mut end = max_len;
|
||||
while end > 0 && !input.is_char_boundary(end) {
|
||||
end -= 1;
|
||||
}
|
||||
&input[..end]
|
||||
}
|
||||
|
||||
fn pick_prefix_end(s: &str, left_budget: usize) -> usize {
|
||||
if let Some(head) = s.get(..left_budget)
|
||||
&& let Some(i) = head.rfind('\n')
|
||||
{
|
||||
return i + 1;
|
||||
}
|
||||
truncate_on_boundary(s, left_budget).len()
|
||||
}
|
||||
|
||||
fn pick_suffix_start(s: &str, right_budget: usize) -> usize {
|
||||
let start_tail = s.len().saturating_sub(right_budget);
|
||||
if let Some(tail) = s.get(start_tail..)
|
||||
&& let Some(i) = tail.find('\n')
|
||||
{
|
||||
return start_tail + i + 1;
|
||||
}
|
||||
|
||||
let mut idx = start_tail.min(s.len());
|
||||
while idx < s.len() && !s.is_char_boundary(idx) {
|
||||
idx += 1;
|
||||
}
|
||||
idx
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use super::TruncationPolicy;
|
||||
use super::approx_token_count;
|
||||
use super::formatted_truncate_text;
|
||||
use super::split_string;
|
||||
use super::truncate_function_output_items_with_policy;
|
||||
use super::truncate_text;
|
||||
use super::truncate_with_token_budget;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
fn split_string_works() {
|
||||
assert_eq!(split_string("hello world", 5, 5), (1, "hello", "world"));
|
||||
assert_eq!(split_string("abc", 0, 0), (3, "", ""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn split_string_handles_empty_string() {
|
||||
assert_eq!(split_string("", 4, 4), (0, "", ""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn split_string_only_keeps_prefix_when_tail_budget_is_zero() {
|
||||
assert_eq!(split_string("abcdef", 3, 0), (3, "abc", ""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn split_string_only_keeps_suffix_when_prefix_budget_is_zero() {
|
||||
assert_eq!(split_string("abcdef", 0, 3), (3, "", "def"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn split_string_handles_overlapping_budgets_without_removal() {
|
||||
assert_eq!(split_string("abcdef", 4, 4), (0, "abcd", "ef"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn split_string_respects_utf8_boundaries() {
|
||||
assert_eq!(split_string("😀abc😀", 5, 5), (1, "😀a", "c😀"));
|
||||
|
||||
assert_eq!(split_string("😀😀😀😀😀", 1, 1), (5, "", ""));
|
||||
assert_eq!(split_string("😀😀😀😀😀", 7, 7), (3, "😀", "😀"));
|
||||
assert_eq!(split_string("😀😀😀😀😀", 8, 8), (1, "😀😀", "😀😀"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_bytes_less_than_placeholder_returns_placeholder() {
|
||||
let content = "example output";
|
||||
@@ -453,7 +420,7 @@ mod tests {
|
||||
fn truncate_middle_tokens_handles_utf8_content() {
|
||||
let s = "😀😀😀😀😀😀😀😀😀😀\nsecond line with text\n";
|
||||
let (out, tokens) = truncate_with_token_budget(s, TruncationPolicy::Tokens(8));
|
||||
assert_eq!(out, "😀😀😀😀…8 tokens truncated… line with text\n");
|
||||
assert_eq!(out, "😀😀😀😀…8 tokens truncated…");
|
||||
assert_eq!(tokens, Some(16));
|
||||
}
|
||||
|
||||
@@ -461,7 +428,7 @@ mod tests {
|
||||
fn truncate_middle_bytes_handles_utf8_content() {
|
||||
let s = "😀😀😀😀😀😀😀😀😀😀\nsecond line with text\n";
|
||||
let out = truncate_text(s, TruncationPolicy::Bytes(20));
|
||||
assert_eq!(out, "😀😀…21 chars truncated…with text\n");
|
||||
assert_eq!(out, "😀😀…31 chars truncated…");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -2,13 +2,13 @@
|
||||
|
||||
use std::collections::VecDeque;
|
||||
use std::sync::Arc;
|
||||
|
||||
use tokio::sync::Mutex;
|
||||
use tokio::sync::Notify;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::sync::oneshot::error::TryRecvError;
|
||||
use tokio::task::JoinHandle;
|
||||
use tokio::time::Duration;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::exec::SandboxType;
|
||||
@@ -67,18 +67,13 @@ impl OutputBufferState {
|
||||
}
|
||||
|
||||
pub(crate) type OutputBuffer = Arc<Mutex<OutputBufferState>>;
|
||||
pub(crate) struct OutputHandles {
|
||||
pub(crate) output_buffer: OutputBuffer,
|
||||
pub(crate) output_notify: Arc<Notify>,
|
||||
pub(crate) cancellation_token: CancellationToken,
|
||||
}
|
||||
pub(crate) type OutputHandles = (OutputBuffer, Arc<Notify>);
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct UnifiedExecSession {
|
||||
session: ExecCommandSession,
|
||||
output_buffer: OutputBuffer,
|
||||
output_notify: Arc<Notify>,
|
||||
cancellation_token: CancellationToken,
|
||||
output_task: JoinHandle<()>,
|
||||
sandbox_type: SandboxType,
|
||||
}
|
||||
@@ -91,11 +86,9 @@ impl UnifiedExecSession {
|
||||
) -> Self {
|
||||
let output_buffer = Arc::new(Mutex::new(OutputBufferState::default()));
|
||||
let output_notify = Arc::new(Notify::new());
|
||||
let cancellation_token = CancellationToken::new();
|
||||
let mut receiver = initial_output_rx;
|
||||
let buffer_clone = Arc::clone(&output_buffer);
|
||||
let notify_clone = Arc::clone(&output_notify);
|
||||
let cancellation_token_clone = cancellation_token.clone();
|
||||
let output_task = tokio::spawn(async move {
|
||||
loop {
|
||||
match receiver.recv().await {
|
||||
@@ -106,10 +99,7 @@ impl UnifiedExecSession {
|
||||
notify_clone.notify_waiters();
|
||||
}
|
||||
Err(tokio::sync::broadcast::error::RecvError::Lagged(_)) => continue,
|
||||
Err(tokio::sync::broadcast::error::RecvError::Closed) => {
|
||||
cancellation_token_clone.cancel();
|
||||
break;
|
||||
}
|
||||
Err(tokio::sync::broadcast::error::RecvError::Closed) => break,
|
||||
}
|
||||
}
|
||||
});
|
||||
@@ -118,7 +108,6 @@ impl UnifiedExecSession {
|
||||
session,
|
||||
output_buffer,
|
||||
output_notify,
|
||||
cancellation_token,
|
||||
output_task,
|
||||
sandbox_type,
|
||||
}
|
||||
@@ -129,11 +118,10 @@ impl UnifiedExecSession {
|
||||
}
|
||||
|
||||
pub(super) fn output_handles(&self) -> OutputHandles {
|
||||
OutputHandles {
|
||||
output_buffer: Arc::clone(&self.output_buffer),
|
||||
output_notify: Arc::clone(&self.output_notify),
|
||||
cancellation_token: self.cancellation_token.clone(),
|
||||
}
|
||||
(
|
||||
Arc::clone(&self.output_buffer),
|
||||
Arc::clone(&self.output_notify),
|
||||
)
|
||||
}
|
||||
|
||||
pub(super) fn has_exited(&self) -> bool {
|
||||
@@ -211,34 +199,20 @@ impl UnifiedExecSession {
|
||||
};
|
||||
|
||||
if exit_ready {
|
||||
managed.signal_exit();
|
||||
managed.check_for_sandbox_denial().await?;
|
||||
return Ok(managed);
|
||||
}
|
||||
|
||||
tokio::pin!(exit_rx);
|
||||
if tokio::time::timeout(Duration::from_millis(50), &mut exit_rx)
|
||||
.await
|
||||
.is_ok()
|
||||
{
|
||||
managed.signal_exit();
|
||||
managed.check_for_sandbox_denial().await?;
|
||||
return Ok(managed);
|
||||
}
|
||||
|
||||
tokio::spawn({
|
||||
let cancellation_token = managed.cancellation_token.clone();
|
||||
async move {
|
||||
let _ = exit_rx.await;
|
||||
cancellation_token.cancel();
|
||||
}
|
||||
});
|
||||
|
||||
Ok(managed)
|
||||
}
|
||||
|
||||
fn signal_exit(&self) {
|
||||
self.cancellation_token.cancel();
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for UnifiedExecSession {
|
||||
|
||||
@@ -5,19 +5,16 @@ use tokio::sync::Notify;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::time::Duration;
|
||||
use tokio::time::Instant;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
use crate::codex::Session;
|
||||
use crate::codex::TurnContext;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::exec::StreamOutput;
|
||||
use crate::exec_env::create_env;
|
||||
use crate::exec_policy::create_approval_requirement_for_command;
|
||||
use crate::protocol::BackgroundEventEvent;
|
||||
use crate::protocol::EventMsg;
|
||||
use crate::protocol::ExecCommandSource;
|
||||
use crate::sandboxing::ExecEnv;
|
||||
use crate::sandboxing::SandboxPermissions;
|
||||
use crate::tools::events::ToolEmitter;
|
||||
use crate::tools::events::ToolEventCtx;
|
||||
use crate::tools::events::ToolEventFailure;
|
||||
@@ -41,20 +38,8 @@ use super::clamp_yield_time;
|
||||
use super::generate_chunk_id;
|
||||
use super::resolve_max_tokens;
|
||||
use super::session::OutputBuffer;
|
||||
use super::session::OutputHandles;
|
||||
use super::session::UnifiedExecSession;
|
||||
|
||||
struct PreparedSessionHandles {
|
||||
writer_tx: mpsc::Sender<Vec<u8>>,
|
||||
output_buffer: OutputBuffer,
|
||||
output_notify: Arc<Notify>,
|
||||
cancellation_token: CancellationToken,
|
||||
session_ref: Arc<Session>,
|
||||
turn_ref: Arc<TurnContext>,
|
||||
command: Vec<String>,
|
||||
cwd: PathBuf,
|
||||
}
|
||||
|
||||
impl UnifiedExecSessionManager {
|
||||
pub(crate) async fn exec_command(
|
||||
&self,
|
||||
@@ -80,19 +65,10 @@ impl UnifiedExecSessionManager {
|
||||
let yield_time_ms = clamp_yield_time(request.yield_time_ms);
|
||||
|
||||
let start = Instant::now();
|
||||
let OutputHandles {
|
||||
output_buffer,
|
||||
output_notify,
|
||||
cancellation_token,
|
||||
} = session.output_handles();
|
||||
let (output_buffer, output_notify) = session.output_handles();
|
||||
let deadline = start + Duration::from_millis(yield_time_ms);
|
||||
let collected = Self::collect_output_until_deadline(
|
||||
&output_buffer,
|
||||
&output_notify,
|
||||
&cancellation_token,
|
||||
deadline,
|
||||
)
|
||||
.await;
|
||||
let collected =
|
||||
Self::collect_output_until_deadline(&output_buffer, &output_notify, deadline).await;
|
||||
let wall_time = Instant::now().saturating_duration_since(start);
|
||||
|
||||
let text = String::from_utf8_lossy(&collected).to_string();
|
||||
@@ -151,16 +127,15 @@ impl UnifiedExecSessionManager {
|
||||
) -> Result<UnifiedExecResponse, UnifiedExecError> {
|
||||
let session_id = request.session_id;
|
||||
|
||||
let PreparedSessionHandles {
|
||||
let (
|
||||
writer_tx,
|
||||
output_buffer,
|
||||
output_notify,
|
||||
cancellation_token,
|
||||
session_ref,
|
||||
turn_ref,
|
||||
command: session_command,
|
||||
cwd: session_cwd,
|
||||
} = self.prepare_session_handles(session_id).await?;
|
||||
session_command,
|
||||
session_cwd,
|
||||
) = self.prepare_session_handles(session_id).await?;
|
||||
|
||||
let interaction_emitter = ToolEmitter::unified_exec(
|
||||
&session_command,
|
||||
@@ -174,7 +149,6 @@ impl UnifiedExecSessionManager {
|
||||
turn_ref.as_ref(),
|
||||
request.call_id,
|
||||
None,
|
||||
None,
|
||||
)
|
||||
};
|
||||
interaction_emitter
|
||||
@@ -200,13 +174,8 @@ impl UnifiedExecSessionManager {
|
||||
let yield_time_ms = clamp_yield_time(request.yield_time_ms);
|
||||
let start = Instant::now();
|
||||
let deadline = start + Duration::from_millis(yield_time_ms);
|
||||
let collected = Self::collect_output_until_deadline(
|
||||
&output_buffer,
|
||||
&output_notify,
|
||||
&cancellation_token,
|
||||
deadline,
|
||||
)
|
||||
.await;
|
||||
let collected =
|
||||
Self::collect_output_until_deadline(&output_buffer, &output_notify, deadline).await;
|
||||
let wall_time = Instant::now().saturating_duration_since(start);
|
||||
|
||||
let text = String::from_utf8_lossy(&collected).to_string();
|
||||
@@ -294,27 +263,44 @@ impl UnifiedExecSessionManager {
|
||||
async fn prepare_session_handles(
|
||||
&self,
|
||||
session_id: i32,
|
||||
) -> Result<PreparedSessionHandles, UnifiedExecError> {
|
||||
) -> Result<
|
||||
(
|
||||
mpsc::Sender<Vec<u8>>,
|
||||
OutputBuffer,
|
||||
Arc<Notify>,
|
||||
Arc<Session>,
|
||||
Arc<TurnContext>,
|
||||
Vec<String>,
|
||||
PathBuf,
|
||||
),
|
||||
UnifiedExecError,
|
||||
> {
|
||||
let sessions = self.sessions.lock().await;
|
||||
let entry = sessions
|
||||
.get(&session_id)
|
||||
.ok_or(UnifiedExecError::UnknownSessionId { session_id })?;
|
||||
let OutputHandles {
|
||||
output_buffer,
|
||||
output_notify,
|
||||
cancellation_token,
|
||||
} = entry.session.output_handles();
|
||||
let (output_buffer, output_notify, writer_tx, session, turn, command, cwd) =
|
||||
if let Some(entry) = sessions.get(&session_id) {
|
||||
let (buffer, notify) = entry.session.output_handles();
|
||||
(
|
||||
buffer,
|
||||
notify,
|
||||
entry.session.writer_sender(),
|
||||
Arc::clone(&entry.session_ref),
|
||||
Arc::clone(&entry.turn_ref),
|
||||
entry.command.clone(),
|
||||
entry.cwd.clone(),
|
||||
)
|
||||
} else {
|
||||
return Err(UnifiedExecError::UnknownSessionId { session_id });
|
||||
};
|
||||
|
||||
Ok(PreparedSessionHandles {
|
||||
writer_tx: entry.session.writer_sender(),
|
||||
Ok((
|
||||
writer_tx,
|
||||
output_buffer,
|
||||
output_notify,
|
||||
cancellation_token,
|
||||
session_ref: Arc::clone(&entry.session_ref),
|
||||
turn_ref: Arc::clone(&entry.turn_ref),
|
||||
command: entry.command.clone(),
|
||||
cwd: entry.cwd.clone(),
|
||||
})
|
||||
session,
|
||||
turn,
|
||||
command,
|
||||
cwd,
|
||||
))
|
||||
}
|
||||
|
||||
async fn send_input(
|
||||
@@ -370,7 +356,6 @@ impl UnifiedExecSessionManager {
|
||||
entry.turn_ref.as_ref(),
|
||||
&entry.call_id,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
let emitter = ToolEmitter::unified_exec(
|
||||
&entry.command,
|
||||
@@ -404,7 +389,6 @@ impl UnifiedExecSessionManager {
|
||||
context.turn.as_ref(),
|
||||
&context.call_id,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
let emitter =
|
||||
ToolEmitter::unified_exec(command, cwd, ExecCommandSource::UnifiedExecStartup, None);
|
||||
@@ -465,13 +449,6 @@ impl UnifiedExecSessionManager {
|
||||
create_env(&context.turn.shell_environment_policy),
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
create_approval_requirement_for_command(
|
||||
&context.turn.exec_policy,
|
||||
command,
|
||||
context.turn.approval_policy,
|
||||
&context.turn.sandbox_policy,
|
||||
SandboxPermissions::from(with_escalated_permissions.unwrap_or(false)),
|
||||
),
|
||||
);
|
||||
let tool_ctx = ToolCtx {
|
||||
session: context.session.as_ref(),
|
||||
@@ -494,13 +471,9 @@ impl UnifiedExecSessionManager {
|
||||
pub(super) async fn collect_output_until_deadline(
|
||||
output_buffer: &OutputBuffer,
|
||||
output_notify: &Arc<Notify>,
|
||||
cancellation_token: &CancellationToken,
|
||||
deadline: Instant,
|
||||
) -> Vec<u8> {
|
||||
const POST_EXIT_OUTPUT_GRACE: Duration = Duration::from_millis(25);
|
||||
|
||||
let mut collected: Vec<u8> = Vec::with_capacity(4096);
|
||||
let mut exit_signal_received = cancellation_token.is_cancelled();
|
||||
loop {
|
||||
let drained_chunks;
|
||||
let mut wait_for_output = None;
|
||||
@@ -513,27 +486,15 @@ impl UnifiedExecSessionManager {
|
||||
}
|
||||
|
||||
if drained_chunks.is_empty() {
|
||||
exit_signal_received |= cancellation_token.is_cancelled();
|
||||
let remaining = deadline.saturating_duration_since(Instant::now());
|
||||
if remaining == Duration::ZERO {
|
||||
break;
|
||||
}
|
||||
|
||||
let notified = wait_for_output.unwrap_or_else(|| output_notify.notified());
|
||||
if exit_signal_received {
|
||||
let grace = remaining.min(POST_EXIT_OUTPUT_GRACE);
|
||||
if tokio::time::timeout(grace, notified).await.is_err() {
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
tokio::pin!(notified);
|
||||
let exit_notified = cancellation_token.cancelled();
|
||||
tokio::pin!(exit_notified);
|
||||
tokio::select! {
|
||||
_ = &mut notified => {}
|
||||
_ = &mut exit_notified => exit_signal_received = true,
|
||||
_ = tokio::time::sleep(remaining) => break,
|
||||
}
|
||||
continue;
|
||||
@@ -543,7 +504,6 @@ impl UnifiedExecSessionManager {
|
||||
collected.extend_from_slice(&chunk);
|
||||
}
|
||||
|
||||
exit_signal_received |= cancellation_token.is_cancelled();
|
||||
if Instant::now() >= deadline {
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -1121,8 +1121,7 @@ async fn token_count_includes_rate_limits_snapshot() {
|
||||
"used_percent": 40.0,
|
||||
"window_minutes": 60,
|
||||
"resets_at": 1704074400
|
||||
},
|
||||
"credits": null
|
||||
}
|
||||
}
|
||||
})
|
||||
);
|
||||
@@ -1156,7 +1155,7 @@ async fn token_count_includes_rate_limits_snapshot() {
|
||||
"reasoning_output_tokens": 0,
|
||||
"total_tokens": 123
|
||||
},
|
||||
// Default model is gpt-5.1-codex-max in tests → 95% usable context window
|
||||
// Default model is arcticfox in tests → 95% usable context window
|
||||
"model_context_window": 258400
|
||||
},
|
||||
"rate_limits": {
|
||||
@@ -1169,8 +1168,7 @@ async fn token_count_includes_rate_limits_snapshot() {
|
||||
"used_percent": 40.0,
|
||||
"window_minutes": 60,
|
||||
"resets_at": 1704074400
|
||||
},
|
||||
"credits": null
|
||||
}
|
||||
}
|
||||
})
|
||||
);
|
||||
@@ -1240,8 +1238,7 @@ async fn usage_limit_error_emits_rate_limit_event() -> anyhow::Result<()> {
|
||||
"used_percent": 87.5,
|
||||
"window_minutes": 60,
|
||||
"resets_at": null
|
||||
},
|
||||
"credits": null
|
||||
}
|
||||
});
|
||||
|
||||
let submission_id = codex
|
||||
|
||||
@@ -384,7 +384,7 @@ async fn manual_compact_uses_custom_prompt() {
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn manual_compact_emits_api_and_local_token_usage_events() {
|
||||
async fn manual_compact_emits_estimated_token_usage_event() {
|
||||
skip_if_no_network!();
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
@@ -32,13 +32,11 @@ async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>) -> Result<ExecToolCallOutput
|
||||
let params = ExecParams {
|
||||
command: cmd.iter().map(ToString::to_string).collect(),
|
||||
cwd: tmp.path().to_path_buf(),
|
||||
expiration: 1000.into(),
|
||||
timeout_ms: Some(1000),
|
||||
env: HashMap::new(),
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
arg0: None,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
};
|
||||
|
||||
let policy = SandboxPolicy::new_read_only_policy();
|
||||
|
||||
@@ -1,101 +0,0 @@
|
||||
#![allow(clippy::unwrap_used, clippy::expect_used)]
|
||||
|
||||
use anyhow::Result;
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_function_call;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
use core_test_support::responses::mount_sse_once;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use serde_json::json;
|
||||
use std::fs;
|
||||
|
||||
#[tokio::test]
|
||||
async fn execpolicy_blocks_shell_invocation() -> Result<()> {
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
let policy_path = config.codex_home.join("policy").join("policy.codexpolicy");
|
||||
fs::create_dir_all(
|
||||
policy_path
|
||||
.parent()
|
||||
.expect("policy directory must have a parent"),
|
||||
)
|
||||
.expect("create policy directory");
|
||||
fs::write(
|
||||
&policy_path,
|
||||
r#"prefix_rule(pattern=["echo"], decision="forbidden")"#,
|
||||
)
|
||||
.expect("write policy file");
|
||||
});
|
||||
let server = start_mock_server().await;
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-forbidden";
|
||||
let args = json!({
|
||||
"command": ["echo", "blocked"],
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let session_model = test.session_configured.model.clone();
|
||||
test.codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "run shell command".into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: test.cwd_path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
let EventMsg::ExecCommandEnd(end) = wait_for_event(&test.codex, |event| {
|
||||
matches!(event, EventMsg::ExecCommandEnd(_))
|
||||
})
|
||||
.await
|
||||
else {
|
||||
unreachable!()
|
||||
};
|
||||
wait_for_event(&test.codex, |event| {
|
||||
matches!(event, EventMsg::TaskComplete(_))
|
||||
})
|
||||
.await;
|
||||
|
||||
assert!(
|
||||
end.aggregated_output
|
||||
.contains("execpolicy forbids this command"),
|
||||
"unexpected output: {}",
|
||||
end.aggregated_output
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -28,7 +28,6 @@ mod compact_remote;
|
||||
mod compact_resume_fork;
|
||||
mod deprecation_notice;
|
||||
mod exec;
|
||||
mod exec_policy;
|
||||
mod fork_conversation;
|
||||
mod grep_files;
|
||||
mod items;
|
||||
@@ -49,7 +48,6 @@ mod seatbelt;
|
||||
mod shell_serialization;
|
||||
mod stream_error_allows_next_turn;
|
||||
mod stream_no_completed;
|
||||
mod text_encoding_fix;
|
||||
mod tool_harness;
|
||||
mod tool_parallelism;
|
||||
mod tools;
|
||||
|
||||
@@ -1,10 +1,21 @@
|
||||
#![allow(clippy::unwrap_used)]
|
||||
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::ConversationManager;
|
||||
use codex_core::ModelProviderInfo;
|
||||
use codex_core::built_in_model_providers;
|
||||
use codex_core::features::Feature;
|
||||
use codex_core::model_family::find_family_for_model;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::load_default_config_for_test;
|
||||
use core_test_support::load_sse_fixture_with_id;
|
||||
use core_test_support::responses;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use tempfile::TempDir;
|
||||
use wiremock::MockServer;
|
||||
|
||||
fn sse_completed(id: &str) -> String {
|
||||
load_sse_fixture_with_id("tests/fixtures/completed_template.json", id)
|
||||
@@ -28,17 +39,46 @@ fn tool_identifiers(body: &serde_json::Value) -> Vec<String> {
|
||||
|
||||
#[allow(clippy::expect_used)]
|
||||
async fn collect_tool_identifiers_for_model(model: &str) -> Vec<String> {
|
||||
let server = start_mock_server().await;
|
||||
let server = MockServer::start().await;
|
||||
|
||||
let sse = sse_completed(model);
|
||||
let resp_mock = responses::mount_sse_once(&server, sse).await;
|
||||
|
||||
let mut builder = test_codex().with_model(model);
|
||||
let test = builder
|
||||
.build(&server)
|
||||
.await
|
||||
.expect("create test Codex conversation");
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
};
|
||||
|
||||
test.submit_turn("hello tools").await.expect("submit turn");
|
||||
let cwd = TempDir::new().unwrap();
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&codex_home);
|
||||
config.cwd = cwd.path().to_path_buf();
|
||||
config.model_provider = model_provider;
|
||||
config.model = model.to_string();
|
||||
config.model_family =
|
||||
find_family_for_model(model).unwrap_or_else(|| panic!("unknown model family for {model}"));
|
||||
config.features.disable(Feature::ApplyPatchFreeform);
|
||||
config.features.disable(Feature::ViewImageTool);
|
||||
config.features.disable(Feature::WebSearchRequest);
|
||||
config.features.disable(Feature::UnifiedExec);
|
||||
|
||||
let conversation_manager =
|
||||
ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
|
||||
let codex = conversation_manager
|
||||
.new_conversation(config)
|
||||
.await
|
||||
.expect("create new conversation")
|
||||
.conversation;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "hello tools".into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let body = resp_mock.single_request().body_json();
|
||||
tool_identifiers(&body)
|
||||
@@ -57,8 +97,7 @@ async fn model_selects_expected_tools() {
|
||||
"list_mcp_resources".to_string(),
|
||||
"list_mcp_resource_templates".to_string(),
|
||||
"read_mcp_resource".to_string(),
|
||||
"update_plan".to_string(),
|
||||
"view_image".to_string()
|
||||
"update_plan".to_string()
|
||||
],
|
||||
"codex-mini-latest should expose the local shell tool",
|
||||
);
|
||||
@@ -72,8 +111,7 @@ async fn model_selects_expected_tools() {
|
||||
"list_mcp_resource_templates".to_string(),
|
||||
"read_mcp_resource".to_string(),
|
||||
"update_plan".to_string(),
|
||||
"apply_patch".to_string(),
|
||||
"view_image".to_string()
|
||||
"apply_patch".to_string()
|
||||
],
|
||||
"gpt-5-codex should expose the apply_patch tool",
|
||||
);
|
||||
@@ -87,8 +125,7 @@ async fn model_selects_expected_tools() {
|
||||
"list_mcp_resource_templates".to_string(),
|
||||
"read_mcp_resource".to_string(),
|
||||
"update_plan".to_string(),
|
||||
"apply_patch".to_string(),
|
||||
"view_image".to_string()
|
||||
"apply_patch".to_string()
|
||||
],
|
||||
"gpt-5.1-codex should expose the apply_patch tool",
|
||||
);
|
||||
@@ -102,7 +139,6 @@ async fn model_selects_expected_tools() {
|
||||
"list_mcp_resource_templates".to_string(),
|
||||
"read_mcp_resource".to_string(),
|
||||
"update_plan".to_string(),
|
||||
"view_image".to_string()
|
||||
],
|
||||
"gpt-5 should expose the apply_patch tool",
|
||||
);
|
||||
@@ -116,8 +152,7 @@ async fn model_selects_expected_tools() {
|
||||
"list_mcp_resource_templates".to_string(),
|
||||
"read_mcp_resource".to_string(),
|
||||
"update_plan".to_string(),
|
||||
"apply_patch".to_string(),
|
||||
"view_image".to_string()
|
||||
"apply_patch".to_string()
|
||||
],
|
||||
"gpt-5.1 should expose the apply_patch tool",
|
||||
);
|
||||
|
||||
@@ -30,15 +30,18 @@ fn text_user_input(text: String) -> serde_json::Value {
|
||||
}
|
||||
|
||||
fn default_env_context_str(cwd: &str, shell: &Shell) -> String {
|
||||
let shell_name = shell.name();
|
||||
format!(
|
||||
r#"<environment_context>
|
||||
<cwd>{cwd}</cwd>
|
||||
<cwd>{}</cwd>
|
||||
<approval_policy>on-request</approval_policy>
|
||||
<sandbox_mode>read-only</sandbox_mode>
|
||||
<network_access>restricted</network_access>
|
||||
<shell>{shell_name}</shell>
|
||||
</environment_context>"#
|
||||
{}</environment_context>"#,
|
||||
cwd,
|
||||
match shell.name() {
|
||||
Some(name) => format!(" <shell>{name}</shell>\n"),
|
||||
None => String::new(),
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
@@ -224,7 +227,7 @@ async fn prefixes_context_and_instructions_once_and_consistently_across_requests
|
||||
.await?;
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let shell = default_user_shell();
|
||||
let shell = default_user_shell().await;
|
||||
let cwd_str = config.cwd.to_string_lossy();
|
||||
let expected_env_text = default_env_context_str(&cwd_str, &shell);
|
||||
let expected_ui_text = format!(
|
||||
@@ -342,7 +345,6 @@ async fn overrides_turn_context_but_keeps_cached_prefix_and_key_constant() -> an
|
||||
// After overriding the turn context, the environment context should be emitted again
|
||||
// reflecting the new approval policy and sandbox settings. Omit cwd because it did
|
||||
// not change.
|
||||
let shell = default_user_shell();
|
||||
let expected_env_text_2 = format!(
|
||||
r#"<environment_context>
|
||||
<approval_policy>never</approval_policy>
|
||||
@@ -351,10 +353,8 @@ async fn overrides_turn_context_but_keeps_cached_prefix_and_key_constant() -> an
|
||||
<writable_roots>
|
||||
<root>{}</root>
|
||||
</writable_roots>
|
||||
<shell>{}</shell>
|
||||
</environment_context>"#,
|
||||
writable.path().display(),
|
||||
shell.name()
|
||||
writable.path().to_string_lossy(),
|
||||
);
|
||||
let expected_env_msg_2 = serde_json::json!({
|
||||
"type": "message",
|
||||
@@ -522,8 +522,6 @@ async fn per_turn_overrides_keep_cached_prefix_and_key_constant() -> anyhow::Res
|
||||
"role": "user",
|
||||
"content": [ { "type": "input_text", "text": "hello 2" } ]
|
||||
});
|
||||
let shell = default_user_shell();
|
||||
|
||||
let expected_env_text_2 = format!(
|
||||
r#"<environment_context>
|
||||
<cwd>{}</cwd>
|
||||
@@ -533,11 +531,9 @@ async fn per_turn_overrides_keep_cached_prefix_and_key_constant() -> anyhow::Res
|
||||
<writable_roots>
|
||||
<root>{}</root>
|
||||
</writable_roots>
|
||||
<shell>{}</shell>
|
||||
</environment_context>"#,
|
||||
new_cwd.path().display(),
|
||||
writable.path().display(),
|
||||
shell.name(),
|
||||
new_cwd.path().to_string_lossy(),
|
||||
writable.path().to_string_lossy(),
|
||||
);
|
||||
let expected_env_msg_2 = serde_json::json!({
|
||||
"type": "message",
|
||||
@@ -614,7 +610,7 @@ async fn send_user_turn_with_no_changes_does_not_send_environment_context() -> a
|
||||
let body1 = req1.single_request().body_json();
|
||||
let body2 = req2.single_request().body_json();
|
||||
|
||||
let shell = default_user_shell();
|
||||
let shell = default_user_shell().await;
|
||||
let default_cwd_lossy = default_cwd.to_string_lossy();
|
||||
let expected_ui_text = format!(
|
||||
"# AGENTS.md instructions for {default_cwd_lossy}\n\n<INSTRUCTIONS>\nbe consistent and helpful\n</INSTRUCTIONS>"
|
||||
@@ -701,7 +697,7 @@ async fn send_user_turn_with_changes_sends_environment_context() -> anyhow::Resu
|
||||
let body1 = req1.single_request().body_json();
|
||||
let body2 = req2.single_request().body_json();
|
||||
|
||||
let shell = default_user_shell();
|
||||
let shell = default_user_shell().await;
|
||||
let expected_ui_text = format!(
|
||||
"# AGENTS.md instructions for {}\n\n<INSTRUCTIONS>\nbe consistent and helpful\n</INSTRUCTIONS>",
|
||||
default_cwd.to_string_lossy()
|
||||
@@ -721,15 +717,14 @@ async fn send_user_turn_with_changes_sends_environment_context() -> anyhow::Resu
|
||||
]);
|
||||
assert_eq!(body1["input"], expected_input_1);
|
||||
|
||||
let shell_name = shell.name();
|
||||
let expected_env_msg_2 = text_user_input(format!(
|
||||
let expected_env_msg_2 = text_user_input(
|
||||
r#"<environment_context>
|
||||
<approval_policy>never</approval_policy>
|
||||
<sandbox_mode>danger-full-access</sandbox_mode>
|
||||
<network_access>enabled</network_access>
|
||||
<shell>{shell_name}</shell>
|
||||
</environment_context>"#
|
||||
));
|
||||
.to_string(),
|
||||
);
|
||||
let expected_user_message_2 = text_user_input("hello 2".to_string());
|
||||
let expected_input_2 = serde_json::Value::Array(vec![
|
||||
expected_ui_msg,
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
#![allow(clippy::expect_used)]
|
||||
|
||||
use anyhow::Result;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::features::Feature;
|
||||
use codex_core::model_family::find_family_for_model;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
@@ -41,20 +40,6 @@ const FIXTURE_JSON: &str = r#"{
|
||||
}
|
||||
"#;
|
||||
|
||||
fn configure_shell_command_model(output_type: ShellModelOutput, config: &mut Config) {
|
||||
if !matches!(output_type, ShellModelOutput::ShellCommand) {
|
||||
return;
|
||||
}
|
||||
|
||||
if let Some(shell_command_family) = find_family_for_model("test-gpt-5-codex") {
|
||||
if config.model_family.shell_type == shell_command_family.shell_type {
|
||||
return;
|
||||
}
|
||||
config.model = shell_command_family.slug.clone();
|
||||
config.model_family = shell_command_family;
|
||||
}
|
||||
}
|
||||
|
||||
fn shell_responses(
|
||||
call_id: &str,
|
||||
command: Vec<&str>,
|
||||
@@ -127,7 +112,10 @@ async fn shell_output_stays_json_without_freeform_apply_patch(
|
||||
config.features.disable(Feature::ApplyPatchFreeform);
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a model family");
|
||||
configure_shell_command_model(output_type, config);
|
||||
if matches!(output_type, ShellModelOutput::ShellCommand) {
|
||||
config.features.enable(Feature::ShellCommandTool);
|
||||
}
|
||||
let _ = output_type;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
@@ -182,7 +170,10 @@ async fn shell_output_is_structured_with_freeform_apply_patch(
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.features.enable(Feature::ApplyPatchFreeform);
|
||||
configure_shell_command_model(output_type, config);
|
||||
if matches!(output_type, ShellModelOutput::ShellCommand) {
|
||||
config.features.enable(Feature::ShellCommandTool);
|
||||
}
|
||||
let _ = output_type;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
@@ -232,7 +223,10 @@ async fn shell_output_preserves_fixture_json_without_serialization(
|
||||
config.features.disable(Feature::ApplyPatchFreeform);
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a model family");
|
||||
configure_shell_command_model(output_type, config);
|
||||
if matches!(output_type, ShellModelOutput::ShellCommand) {
|
||||
config.features.enable(Feature::ShellCommandTool);
|
||||
}
|
||||
let _ = output_type;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
@@ -299,7 +293,10 @@ async fn shell_output_structures_fixture_with_serialization(
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.features.enable(Feature::ApplyPatchFreeform);
|
||||
configure_shell_command_model(output_type, config);
|
||||
if matches!(output_type, ShellModelOutput::ShellCommand) {
|
||||
config.features.enable(Feature::ShellCommandTool);
|
||||
}
|
||||
let _ = output_type;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
@@ -361,12 +358,15 @@ async fn shell_output_for_freeform_tool_records_duration(
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.include_apply_patch_tool = true;
|
||||
configure_shell_command_model(output_type, config);
|
||||
if matches!(output_type, ShellModelOutput::ShellCommand) {
|
||||
config.features.enable(Feature::ShellCommandTool);
|
||||
}
|
||||
let _ = output_type;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-structured";
|
||||
let responses = shell_responses(call_id, vec!["/bin/sh", "-c", "sleep 1"], output_type)?;
|
||||
let responses = shell_responses(call_id, vec!["/bin/bash", "-c", "sleep 1"], output_type)?;
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
@@ -417,7 +417,10 @@ async fn shell_output_reserializes_truncated_content(output_type: ShellModelOutp
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
|
||||
config.tool_output_token_limit = Some(200);
|
||||
configure_shell_command_model(output_type, config);
|
||||
if matches!(output_type, ShellModelOutput::ShellCommand) {
|
||||
config.features.enable(Feature::ShellCommandTool);
|
||||
}
|
||||
let _ = output_type;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
@@ -454,7 +457,7 @@ Output:
|
||||
4
|
||||
5
|
||||
6
|
||||
.*…46 tokens truncated….*
|
||||
.*…45 tokens truncated….*
|
||||
396
|
||||
397
|
||||
398
|
||||
@@ -719,7 +722,9 @@ async fn shell_output_is_structured_for_nonzero_exit(output_type: ShellModelOutp
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
|
||||
config.include_apply_patch_tool = true;
|
||||
configure_shell_command_model(output_type, config);
|
||||
if matches!(output_type, ShellModelOutput::ShellCommand) {
|
||||
config.features.enable(Feature::ShellCommandTool);
|
||||
}
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
@@ -755,7 +760,7 @@ async fn shell_command_output_is_freeform() -> Result<()> {
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
configure_shell_command_model(ShellModelOutput::ShellCommand, config);
|
||||
config.features.enable(Feature::ShellCommandTool);
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
@@ -807,7 +812,11 @@ async fn shell_command_output_is_not_truncated_under_10k_bytes() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_model("gpt-5.1");
|
||||
let mut builder = test_codex()
|
||||
.with_model("gpt-5.1")
|
||||
.with_config(move |config| {
|
||||
config.features.enable(Feature::ShellCommandTool);
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-command";
|
||||
@@ -857,7 +866,11 @@ async fn shell_command_output_is_not_truncated_over_10k_bytes() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_model("gpt-5.1");
|
||||
let mut builder = test_codex()
|
||||
.with_model("gpt-5.1")
|
||||
.with_config(move |config| {
|
||||
config.features.enable(Feature::ShellCommandTool);
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-command";
|
||||
|
||||
@@ -1,77 +0,0 @@
|
||||
//! Integration test for the text encoding fix for issue #6178.
|
||||
//!
|
||||
//! These tests simulate VSCode's shell preview on Windows/WSL where the output
|
||||
//! may be encoded with a legacy code page before it reaches Codex.
|
||||
|
||||
use codex_core::exec::StreamOutput;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
fn test_utf8_shell_output() {
|
||||
// Baseline: UTF-8 output should bypass the detector and remain unchanged.
|
||||
assert_eq!(decode_shell_output("пример".as_bytes()), "пример");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cp1251_shell_output() {
|
||||
// VS Code shells on Windows frequently surface CP1251 bytes for Cyrillic text.
|
||||
assert_eq!(decode_shell_output(b"\xEF\xF0\xE8\xEC\xE5\xF0"), "пример");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cp866_shell_output() {
|
||||
// Native cmd.exe still defaults to CP866; make sure we recognize that too.
|
||||
assert_eq!(decode_shell_output(b"\xAF\xE0\xA8\xAC\xA5\xE0"), "пример");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_windows_1252_smart_decoding() {
|
||||
// Smart detection should turn fancy quotes/dashes into the proper Unicode glyphs.
|
||||
assert_eq!(
|
||||
decode_shell_output(b"\x93\x94 test \x96 dash"),
|
||||
"\u{201C}\u{201D} test \u{2013} dash"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_smart_decoding_improves_over_lossy_utf8() {
|
||||
// Regression guard: String::from_utf8_lossy() alone used to emit replacement chars here.
|
||||
let bytes = b"\x93\x94 test \x96 dash";
|
||||
assert!(
|
||||
String::from_utf8_lossy(bytes).contains('\u{FFFD}'),
|
||||
"lossy UTF-8 should inject replacement chars"
|
||||
);
|
||||
assert_eq!(
|
||||
decode_shell_output(bytes),
|
||||
"\u{201C}\u{201D} test \u{2013} dash",
|
||||
"smart decoding should keep curly quotes intact"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mixed_ascii_and_legacy_encoding() {
|
||||
// Commands tend to mix ASCII status text with Latin-1 bytes (e.g. café).
|
||||
assert_eq!(decode_shell_output(b"Output: caf\xE9"), "Output: café"); // codespell:ignore caf
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pure_latin1_shell_output() {
|
||||
// Latin-1 by itself should still decode correctly (regression coverage for the older tests).
|
||||
assert_eq!(decode_shell_output(b"caf\xE9"), "café"); // codespell:ignore caf
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_bytes_still_fall_back_to_lossy() {
|
||||
// If detection fails, we still want the user to see replacement characters.
|
||||
let bytes = b"\xFF\xFE\xFD";
|
||||
assert_eq!(decode_shell_output(bytes), String::from_utf8_lossy(bytes));
|
||||
}
|
||||
|
||||
fn decode_shell_output(bytes: &[u8]) -> String {
|
||||
StreamOutput {
|
||||
text: bytes.to_vec(),
|
||||
truncated_after_lines: None,
|
||||
}
|
||||
.from_utf8_lossy()
|
||||
.text
|
||||
}
|
||||
@@ -167,7 +167,7 @@ async fn tool_call_output_configured_limit_chars_type() -> Result<()> {
|
||||
"expected truncated shell output to be plain text"
|
||||
);
|
||||
|
||||
assert_eq!(output.len(), 400097, "we should be almost 100k tokens");
|
||||
assert_eq!(output.len(), 400094, "we should be almost 100k tokens");
|
||||
|
||||
assert!(
|
||||
!output.contains("tokens truncated"),
|
||||
@@ -244,16 +244,11 @@ async fn tool_call_output_exceeds_limit_truncated_chars_limit() -> Result<()> {
|
||||
"expected truncated shell output to be plain text"
|
||||
);
|
||||
|
||||
let truncated_pattern = r#"(?s)^Exit code: 0\nWall time: [0-9]+(?:\.[0-9]+)? seconds\nTotal output lines: 100000\nOutput:\n.*?…\d+ chars truncated….*$"#;
|
||||
assert_eq!(output.len(), 9976); // ~10k characters
|
||||
let truncated_pattern = r#"(?s)^Exit code: 0\nWall time: 0 seconds\nTotal output lines: 100000\n.*?…578898 chars truncated….*$"#;
|
||||
|
||||
assert_regex_match(truncated_pattern, &output);
|
||||
|
||||
let len = output.len();
|
||||
assert!(
|
||||
(9_900..=10_000).contains(&len),
|
||||
"expected ~10k chars after truncation, got {len}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -904,98 +904,6 @@ async fn exec_command_reports_chunk_and_exit_metadata() -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn unified_exec_respects_early_exit_notifications() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
skip_if_sandbox!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.features.enable(Feature::UnifiedExec);
|
||||
});
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
session_configured,
|
||||
..
|
||||
} = builder.build(&server).await?;
|
||||
|
||||
let call_id = "uexec-early-exit";
|
||||
let args = serde_json::json!({
|
||||
"cmd": "sleep 0.05",
|
||||
"yield_time_ms": 31415,
|
||||
});
|
||||
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
mount_sse_sequence(&server, responses).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "watch early exit timing".into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let requests = server.received_requests().await.expect("recorded requests");
|
||||
assert!(!requests.is_empty(), "expected at least one POST request");
|
||||
|
||||
let bodies = requests
|
||||
.iter()
|
||||
.map(|req| req.body_json::<Value>().expect("request json"))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let outputs = collect_tool_outputs(&bodies)?;
|
||||
let output = outputs
|
||||
.get(call_id)
|
||||
.expect("missing early exit unified_exec output");
|
||||
|
||||
assert!(
|
||||
output.session_id.is_none(),
|
||||
"short-lived process should not keep a session alive"
|
||||
);
|
||||
assert_eq!(
|
||||
output.exit_code,
|
||||
Some(0),
|
||||
"short-lived process should exit successfully"
|
||||
);
|
||||
|
||||
let wall_time = output.wall_time_seconds;
|
||||
assert!(
|
||||
wall_time < 0.75,
|
||||
"wall_time should reflect early exit rather than the full yield time; got {wall_time}"
|
||||
);
|
||||
assert!(
|
||||
output.output.is_empty(),
|
||||
"sleep command should not emit output, got {:?}",
|
||||
output.output
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn write_stdin_returns_exit_metadata_and_clears_session() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
@@ -1622,8 +1530,8 @@ async fn unified_exec_formats_large_output_summary() -> Result<()> {
|
||||
} = builder.build(&server).await?;
|
||||
|
||||
let script = r#"python3 - <<'PY'
|
||||
import sys
|
||||
sys.stdout.write("token token \n" * 5000)
|
||||
for i in range(10000):
|
||||
print("token token ")
|
||||
PY
|
||||
"#;
|
||||
|
||||
@@ -1678,7 +1586,7 @@ PY
|
||||
let large_output = outputs.get(call_id).expect("missing large output summary");
|
||||
|
||||
let output_text = large_output.output.replace("\r\n", "\n");
|
||||
let truncated_pattern = r"(?s)^Total output lines: \d+\n\n(token token \n){5,}.*…\d+ tokens truncated….*(token token \n){5,}$";
|
||||
let truncated_pattern = r"(?s)^Total output lines: \d+\n\n(token token \n){5,}.*…\d+ tokens truncated…(token token \n){5,}$";
|
||||
assert_regex_match(truncated_pattern, &output_text);
|
||||
|
||||
let original_tokens = large_output
|
||||
|
||||
@@ -257,8 +257,7 @@ async fn user_shell_command_output_is_truncated_in_history() -> anyhow::Result<(
|
||||
|
||||
let head = (1..=69).map(|i| format!("{i}\n")).collect::<String>();
|
||||
let tail = (352..=400).map(|i| format!("{i}\n")).collect::<String>();
|
||||
let truncated_body =
|
||||
format!("Total output lines: 400\n\n{head}70…273 tokens truncated…351\n{tail}");
|
||||
let truncated_body = format!("Total output lines: 400\n\n{head}…273 tokens truncated…{tail}");
|
||||
let escaped_command = escape(&command);
|
||||
let escaped_truncated_body = escape(&truncated_body);
|
||||
let expected_pattern = format!(
|
||||
|
||||
@@ -4,23 +4,14 @@ name = "codex-exec-server"
|
||||
version = { workspace = true }
|
||||
|
||||
[[bin]]
|
||||
name = "codex-execve-wrapper"
|
||||
path = "src/bin/main_execve_wrapper.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "codex-exec-mcp-server"
|
||||
path = "src/bin/main_mcp_server.rs"
|
||||
|
||||
[lib]
|
||||
name = "codex_exec_server"
|
||||
path = "src/lib.rs"
|
||||
name = "codex-exec-server"
|
||||
path = "src/main.rs"
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = { workspace = true }
|
||||
async-trait = { workspace = true }
|
||||
clap = { workspace = true, features = ["derive"] }
|
||||
codex-core = { workspace = true }
|
||||
libc = { workspace = true }
|
||||
@@ -40,7 +31,6 @@ rmcp = { workspace = true, default-features = false, features = [
|
||||
] }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
serde_json = { workspace = true }
|
||||
shlex = { workspace = true }
|
||||
socket2 = { workspace = true }
|
||||
tokio = { workspace = true, features = [
|
||||
"io-std",
|
||||
@@ -49,7 +39,6 @@ tokio = { workspace = true, features = [
|
||||
"rt-multi-thread",
|
||||
"signal",
|
||||
] }
|
||||
tokio-util = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
tracing-subscriber = { workspace = true, features = ["env-filter", "fmt"] }
|
||||
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
#[cfg(not(unix))]
|
||||
fn main() {
|
||||
eprintln!("codex-execve-wrapper is only implemented for UNIX");
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
pub use codex_exec_server::main_execve_wrapper as main;
|
||||
@@ -1,8 +0,0 @@
|
||||
#[cfg(not(unix))]
|
||||
fn main() {
|
||||
eprintln!("codex-exec-mcp-server is only implemented for UNIX");
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
pub use codex_exec_server::main_mcp_server as main;
|
||||
@@ -1,8 +0,0 @@
|
||||
#[cfg(unix)]
|
||||
mod posix;
|
||||
|
||||
#[cfg(unix)]
|
||||
pub use posix::main_execve_wrapper;
|
||||
|
||||
#[cfg(unix)]
|
||||
pub use posix::main_mcp_server;
|
||||
11
codex-rs/exec-server/src/main.rs
Normal file
11
codex-rs/exec-server/src/main.rs
Normal file
@@ -0,0 +1,11 @@
|
||||
#[cfg(target_os = "windows")]
|
||||
fn main() {
|
||||
eprintln!("codex-exec-server is not implemented on Windows targets");
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
mod posix;
|
||||
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
pub use posix::main;
|
||||
@@ -56,115 +56,109 @@
|
||||
//! o<-----x
|
||||
//!
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use clap::Parser;
|
||||
use clap::Subcommand;
|
||||
use tracing_subscriber::EnvFilter;
|
||||
use tracing_subscriber::{self};
|
||||
|
||||
use crate::posix::mcp_escalation_policy::ExecPolicyOutcome;
|
||||
use crate::posix::escalate_protocol::EscalateAction;
|
||||
use crate::posix::escalate_server::EscalateServer;
|
||||
|
||||
mod escalate_client;
|
||||
mod escalate_protocol;
|
||||
mod escalate_server;
|
||||
mod escalation_policy;
|
||||
mod mcp;
|
||||
mod mcp_escalation_policy;
|
||||
mod socket;
|
||||
mod stopwatch;
|
||||
|
||||
/// Default value of --execve option relative to the current executable.
|
||||
/// Note this must match the name of the binary as specified in Cargo.toml.
|
||||
const CODEX_EXECVE_WRAPPER_EXE_NAME: &str = "codex-execve-wrapper";
|
||||
|
||||
#[derive(Parser)]
|
||||
struct McpServerCli {
|
||||
/// Executable to delegate execve(2) calls to in Bash.
|
||||
#[arg(long = "execve")]
|
||||
execve_wrapper: Option<PathBuf>,
|
||||
|
||||
/// Path to Bash that has been patched to support execve() wrapping.
|
||||
#[arg(long = "bash")]
|
||||
bash_path: Option<PathBuf>,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
pub async fn main_mcp_server() -> anyhow::Result<()> {
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(EnvFilter::from_default_env())
|
||||
.with_writer(std::io::stderr)
|
||||
.with_ansi(false)
|
||||
.init();
|
||||
|
||||
let cli = McpServerCli::parse();
|
||||
let execve_wrapper = match cli.execve_wrapper {
|
||||
Some(path) => path,
|
||||
None => {
|
||||
let cwd = std::env::current_exe()?;
|
||||
cwd.parent()
|
||||
.map(|p| p.join(CODEX_EXECVE_WRAPPER_EXE_NAME))
|
||||
.ok_or_else(|| {
|
||||
anyhow::anyhow!("failed to determine execve wrapper path from current exe")
|
||||
})?
|
||||
}
|
||||
};
|
||||
let bash_path = match cli.bash_path {
|
||||
Some(path) => path,
|
||||
None => mcp::get_bash_path()?,
|
||||
};
|
||||
|
||||
tracing::info!("Starting MCP server");
|
||||
let service = mcp::serve(bash_path, execve_wrapper, dummy_exec_policy)
|
||||
.await
|
||||
.inspect_err(|e| {
|
||||
tracing::error!("serving error: {:?}", e);
|
||||
})?;
|
||||
|
||||
service.waiting().await?;
|
||||
Ok(())
|
||||
fn dummy_exec_policy(file: &Path, argv: &[String], _workdir: &Path) -> EscalateAction {
|
||||
// TODO: execpolicy
|
||||
if file == Path::new("/opt/homebrew/bin/gh")
|
||||
&& let [_, arg1, arg2, ..] = argv
|
||||
&& arg1 == "issue"
|
||||
&& arg2 == "list"
|
||||
{
|
||||
return EscalateAction::Escalate;
|
||||
}
|
||||
EscalateAction::Run
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
pub struct ExecveWrapperCli {
|
||||
#[command(version)]
|
||||
pub struct Cli {
|
||||
#[command(subcommand)]
|
||||
subcommand: Option<Commands>,
|
||||
}
|
||||
|
||||
#[derive(Subcommand)]
|
||||
enum Commands {
|
||||
Escalate(EscalateArgs),
|
||||
ShellExec(ShellExecArgs),
|
||||
}
|
||||
|
||||
/// Invoked from within the sandbox to (potentially) escalate permissions.
|
||||
#[derive(Parser, Debug)]
|
||||
struct EscalateArgs {
|
||||
file: String,
|
||||
|
||||
#[arg(trailing_var_arg = true)]
|
||||
argv: Vec<String>,
|
||||
}
|
||||
|
||||
impl EscalateArgs {
|
||||
/// This is the escalate client. It talks to the escalate server to determine whether to exec()
|
||||
/// the command directly or to proxy to the escalate server.
|
||||
async fn run(self) -> anyhow::Result<i32> {
|
||||
let EscalateArgs { file, argv } = self;
|
||||
escalate_client::run(file, argv).await
|
||||
}
|
||||
}
|
||||
|
||||
/// Debugging command to emulate an MCP "shell" tool call.
|
||||
#[derive(Parser, Debug)]
|
||||
struct ShellExecArgs {
|
||||
command: String,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
pub async fn main_execve_wrapper() -> anyhow::Result<()> {
|
||||
pub async fn main() -> anyhow::Result<()> {
|
||||
let cli = Cli::parse();
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(EnvFilter::from_default_env())
|
||||
.with_writer(std::io::stderr)
|
||||
.with_ansi(false)
|
||||
.init();
|
||||
|
||||
let ExecveWrapperCli { file, argv } = ExecveWrapperCli::parse();
|
||||
let exit_code = escalate_client::run(file, argv).await?;
|
||||
std::process::exit(exit_code);
|
||||
}
|
||||
|
||||
// TODO: replace with execpolicy
|
||||
|
||||
fn dummy_exec_policy(file: &Path, argv: &[String], _workdir: &Path) -> ExecPolicyOutcome {
|
||||
if file.ends_with("rm") {
|
||||
ExecPolicyOutcome::Forbidden
|
||||
} else if file.ends_with("git") {
|
||||
ExecPolicyOutcome::Prompt {
|
||||
run_with_escalated_permissions: false,
|
||||
match cli.subcommand {
|
||||
Some(Commands::Escalate(args)) => {
|
||||
std::process::exit(args.run().await?);
|
||||
}
|
||||
} else if file == Path::new("/opt/homebrew/bin/gh")
|
||||
&& let [_, arg1, arg2, ..] = argv
|
||||
&& arg1 == "issue"
|
||||
&& arg2 == "list"
|
||||
{
|
||||
ExecPolicyOutcome::Allow {
|
||||
run_with_escalated_permissions: true,
|
||||
Some(Commands::ShellExec(args)) => {
|
||||
let bash_path = mcp::get_bash_path()?;
|
||||
let escalate_server = EscalateServer::new(bash_path, dummy_exec_policy);
|
||||
let result = escalate_server
|
||||
.exec(
|
||||
args.command.clone(),
|
||||
std::env::vars().collect(),
|
||||
std::env::current_dir()?,
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
println!("{result:?}");
|
||||
std::process::exit(result.exit_code);
|
||||
}
|
||||
} else {
|
||||
ExecPolicyOutcome::Allow {
|
||||
run_with_escalated_permissions: false,
|
||||
None => {
|
||||
let bash_path = mcp::get_bash_path()?;
|
||||
|
||||
tracing::info!("Starting MCP server");
|
||||
let service = mcp::serve(bash_path, dummy_exec_policy)
|
||||
.await
|
||||
.inspect_err(|e| {
|
||||
tracing::error!("serving error: {:?}", e);
|
||||
})?;
|
||||
|
||||
service.waiting().await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -98,12 +98,5 @@ pub(crate) async fn run(file: String, argv: Vec<String>) -> anyhow::Result<i32>
|
||||
|
||||
Err(err.into())
|
||||
}
|
||||
EscalateAction::Deny { reason } => {
|
||||
match reason {
|
||||
Some(reason) => eprintln!("Execution denied: {reason}"),
|
||||
None => eprintln!("Execution denied"),
|
||||
}
|
||||
Ok(1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -34,8 +34,6 @@ pub(super) enum EscalateAction {
|
||||
Run,
|
||||
/// The command should be escalated to the server for execution.
|
||||
Escalate,
|
||||
/// The command should not be executed.
|
||||
Deny { reason: Option<String> },
|
||||
}
|
||||
|
||||
/// The client sends this to the server to forward its open FDs.
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
use std::collections::HashMap;
|
||||
use std::os::fd::AsRawFd;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::process::Stdio;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::Context as _;
|
||||
@@ -13,7 +13,6 @@ use codex_core::exec::process_exec_tool_call;
|
||||
use codex_core::get_platform_sandbox;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use tokio::process::Command;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
use crate::posix::escalate_protocol::BASH_EXEC_WRAPPER_ENV_VAR;
|
||||
use crate::posix::escalate_protocol::ESCALATE_SOCKET_ENV_VAR;
|
||||
@@ -22,27 +21,25 @@ use crate::posix::escalate_protocol::EscalateRequest;
|
||||
use crate::posix::escalate_protocol::EscalateResponse;
|
||||
use crate::posix::escalate_protocol::SuperExecMessage;
|
||||
use crate::posix::escalate_protocol::SuperExecResult;
|
||||
use crate::posix::escalation_policy::EscalationPolicy;
|
||||
use crate::posix::socket::AsyncDatagramSocket;
|
||||
use crate::posix::socket::AsyncSocket;
|
||||
use codex_core::exec::ExecExpiration;
|
||||
|
||||
/// This is the policy which decides how to handle an exec() call.
|
||||
///
|
||||
/// `file` is the absolute, canonical path to the executable to run, i.e. the first arg to exec.
|
||||
/// `argv` is the argv, including the program name (`argv[0]`).
|
||||
/// `workdir` is the absolute, canonical path to the working directory in which to execute the
|
||||
/// command.
|
||||
pub(crate) type ExecPolicy = fn(file: &Path, argv: &[String], workdir: &Path) -> EscalateAction;
|
||||
|
||||
pub(crate) struct EscalateServer {
|
||||
bash_path: PathBuf,
|
||||
execve_wrapper: PathBuf,
|
||||
policy: Arc<dyn EscalationPolicy>,
|
||||
policy: ExecPolicy,
|
||||
}
|
||||
|
||||
impl EscalateServer {
|
||||
pub fn new<P>(bash_path: PathBuf, execve_wrapper: PathBuf, policy: P) -> Self
|
||||
where
|
||||
P: EscalationPolicy + Send + Sync + 'static,
|
||||
{
|
||||
Self {
|
||||
bash_path,
|
||||
execve_wrapper,
|
||||
policy: Arc::new(policy),
|
||||
}
|
||||
pub fn new(bash_path: PathBuf, policy: ExecPolicy) -> Self {
|
||||
Self { bash_path, policy }
|
||||
}
|
||||
|
||||
pub async fn exec(
|
||||
@@ -50,13 +47,13 @@ impl EscalateServer {
|
||||
command: String,
|
||||
env: HashMap<String, String>,
|
||||
workdir: PathBuf,
|
||||
cancel_rx: CancellationToken,
|
||||
timeout_ms: Option<u64>,
|
||||
) -> anyhow::Result<ExecResult> {
|
||||
let (escalate_server, escalate_client) = AsyncDatagramSocket::pair()?;
|
||||
let client_socket = escalate_client.into_inner();
|
||||
client_socket.set_cloexec(false)?;
|
||||
|
||||
let escalate_task = tokio::spawn(escalate_task(escalate_server, self.policy.clone()));
|
||||
let escalate_task = tokio::spawn(escalate_task(escalate_server, self.policy));
|
||||
let mut env = env.clone();
|
||||
env.insert(
|
||||
ESCALATE_SOCKET_ENV_VAR.to_string(),
|
||||
@@ -64,15 +61,8 @@ impl EscalateServer {
|
||||
);
|
||||
env.insert(
|
||||
BASH_EXEC_WRAPPER_ENV_VAR.to_string(),
|
||||
self.execve_wrapper.to_string_lossy().to_string(),
|
||||
format!("{} escalate", std::env::current_exe()?.to_string_lossy()),
|
||||
);
|
||||
|
||||
// TODO: use the sandbox policy and cwd from the calling client.
|
||||
// Note that sandbox_cwd is ignored for ReadOnly, but needs to be legit
|
||||
// for `SandboxPolicy::WorkspaceWrite`.
|
||||
let sandbox_policy = SandboxPolicy::ReadOnly;
|
||||
let sandbox_cwd = PathBuf::from("/__NONEXISTENT__");
|
||||
|
||||
let result = process_exec_tool_call(
|
||||
codex_core::exec::ExecParams {
|
||||
command: vec![
|
||||
@@ -81,17 +71,16 @@ impl EscalateServer {
|
||||
command,
|
||||
],
|
||||
cwd: PathBuf::from(&workdir),
|
||||
expiration: ExecExpiration::Cancellation(cancel_rx),
|
||||
timeout_ms,
|
||||
env,
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
arg0: None,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
},
|
||||
get_platform_sandbox().unwrap_or(SandboxType::None),
|
||||
&sandbox_policy,
|
||||
&sandbox_cwd,
|
||||
// TODO: use the sandbox policy and cwd from the calling client
|
||||
&SandboxPolicy::ReadOnly,
|
||||
&PathBuf::from("/__NONEXISTENT__"), // This is ignored for ReadOnly
|
||||
&None,
|
||||
None,
|
||||
)
|
||||
@@ -107,10 +96,7 @@ impl EscalateServer {
|
||||
}
|
||||
}
|
||||
|
||||
async fn escalate_task(
|
||||
socket: AsyncDatagramSocket,
|
||||
policy: Arc<dyn EscalationPolicy>,
|
||||
) -> anyhow::Result<()> {
|
||||
async fn escalate_task(socket: AsyncDatagramSocket, policy: ExecPolicy) -> anyhow::Result<()> {
|
||||
loop {
|
||||
let (_, mut fds) = socket.receive_with_fds().await?;
|
||||
if fds.len() != 1 {
|
||||
@@ -118,7 +104,6 @@ async fn escalate_task(
|
||||
continue;
|
||||
}
|
||||
let stream_socket = AsyncSocket::from_fd(fds.remove(0))?;
|
||||
let policy = policy.clone();
|
||||
tokio::spawn(async move {
|
||||
if let Err(err) = handle_escalate_session_with_policy(stream_socket, policy).await {
|
||||
tracing::error!("escalate session failed: {err:?}");
|
||||
@@ -137,7 +122,7 @@ pub(crate) struct ExecResult {
|
||||
|
||||
async fn handle_escalate_session_with_policy(
|
||||
socket: AsyncSocket,
|
||||
policy: Arc<dyn EscalationPolicy>,
|
||||
policy: ExecPolicy,
|
||||
) -> anyhow::Result<()> {
|
||||
let EscalateRequest {
|
||||
file,
|
||||
@@ -147,12 +132,8 @@ async fn handle_escalate_session_with_policy(
|
||||
} = socket.receive::<EscalateRequest>().await?;
|
||||
let file = PathBuf::from(&file).absolutize()?.into_owned();
|
||||
let workdir = PathBuf::from(&workdir).absolutize()?.into_owned();
|
||||
let action = policy
|
||||
.determine_action(file.as_path(), &argv, &workdir)
|
||||
.await?;
|
||||
|
||||
let action = policy(file.as_path(), &argv, &workdir);
|
||||
tracing::debug!("decided {action:?} for {file:?} {argv:?} {workdir:?}");
|
||||
|
||||
match action {
|
||||
EscalateAction::Run => {
|
||||
socket
|
||||
@@ -214,13 +195,6 @@ async fn handle_escalate_session_with_policy(
|
||||
})
|
||||
.await?;
|
||||
}
|
||||
EscalateAction::Deny { reason } => {
|
||||
socket
|
||||
.send(EscalateResponse {
|
||||
action: EscalateAction::Deny { reason },
|
||||
})
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -230,33 +204,14 @@ mod tests {
|
||||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
struct DeterministicEscalationPolicy {
|
||||
action: EscalateAction,
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl EscalationPolicy for DeterministicEscalationPolicy {
|
||||
async fn determine_action(
|
||||
&self,
|
||||
_file: &Path,
|
||||
_argv: &[String],
|
||||
_workdir: &Path,
|
||||
) -> Result<EscalateAction, rmcp::ErrorData> {
|
||||
Ok(self.action.clone())
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn handle_escalate_session_respects_run_in_sandbox_decision() -> anyhow::Result<()> {
|
||||
let (server, client) = AsyncSocket::pair()?;
|
||||
let server_task = tokio::spawn(handle_escalate_session_with_policy(
|
||||
server,
|
||||
Arc::new(DeterministicEscalationPolicy {
|
||||
action: EscalateAction::Run,
|
||||
}),
|
||||
|_file, _argv, _workdir| EscalateAction::Run,
|
||||
));
|
||||
|
||||
client
|
||||
@@ -283,9 +238,7 @@ mod tests {
|
||||
let (server, client) = AsyncSocket::pair()?;
|
||||
let server_task = tokio::spawn(handle_escalate_session_with_policy(
|
||||
server,
|
||||
Arc::new(DeterministicEscalationPolicy {
|
||||
action: EscalateAction::Escalate,
|
||||
}),
|
||||
|_file, _argv, _workdir| EscalateAction::Escalate,
|
||||
));
|
||||
|
||||
client
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
use std::path::Path;
|
||||
|
||||
use crate::posix::escalate_protocol::EscalateAction;
|
||||
|
||||
/// Decides what action to take in response to an execve request from a client.
|
||||
#[async_trait::async_trait]
|
||||
pub(crate) trait EscalationPolicy: Send + Sync {
|
||||
async fn determine_action(
|
||||
&self,
|
||||
file: &Path,
|
||||
argv: &[String],
|
||||
workdir: &Path,
|
||||
) -> Result<EscalateAction, rmcp::ErrorData>;
|
||||
}
|
||||
@@ -18,11 +18,9 @@ use rmcp::tool_handler;
|
||||
use rmcp::tool_router;
|
||||
use rmcp::transport::stdio;
|
||||
|
||||
use crate::posix::escalate_server;
|
||||
use crate::posix::escalate_server::EscalateServer;
|
||||
use crate::posix::escalate_server::{self};
|
||||
use crate::posix::mcp_escalation_policy::ExecPolicy;
|
||||
use crate::posix::mcp_escalation_policy::McpEscalationPolicy;
|
||||
use crate::posix::stopwatch::Stopwatch;
|
||||
use crate::posix::escalate_server::ExecPolicy;
|
||||
|
||||
/// Path to our patched bash.
|
||||
const CODEX_BASH_PATH_ENV_VAR: &str = "CODEX_BASH_PATH";
|
||||
@@ -66,17 +64,15 @@ impl From<escalate_server::ExecResult> for ExecResult {
|
||||
pub struct ExecTool {
|
||||
tool_router: ToolRouter<ExecTool>,
|
||||
bash_path: PathBuf,
|
||||
execve_wrapper: PathBuf,
|
||||
policy: ExecPolicy,
|
||||
}
|
||||
|
||||
#[tool_router]
|
||||
impl ExecTool {
|
||||
pub fn new(bash_path: PathBuf, execve_wrapper: PathBuf, policy: ExecPolicy) -> Self {
|
||||
pub fn new(bash_path: PathBuf, policy: ExecPolicy) -> Self {
|
||||
Self {
|
||||
tool_router: Self::tool_router(),
|
||||
bash_path,
|
||||
execve_wrapper,
|
||||
policy,
|
||||
}
|
||||
}
|
||||
@@ -85,28 +81,17 @@ impl ExecTool {
|
||||
#[tool]
|
||||
async fn shell(
|
||||
&self,
|
||||
context: RequestContext<RoleServer>,
|
||||
_context: RequestContext<RoleServer>,
|
||||
Parameters(params): Parameters<ExecParams>,
|
||||
) -> Result<CallToolResult, McpError> {
|
||||
let effective_timeout = Duration::from_millis(
|
||||
params
|
||||
.timeout_ms
|
||||
.unwrap_or(codex_core::exec::DEFAULT_EXEC_COMMAND_TIMEOUT_MS),
|
||||
);
|
||||
let stopwatch = Stopwatch::new(effective_timeout);
|
||||
let cancel_token = stopwatch.cancellation_token();
|
||||
let escalate_server = EscalateServer::new(
|
||||
self.bash_path.clone(),
|
||||
self.execve_wrapper.clone(),
|
||||
McpEscalationPolicy::new(self.policy, context, stopwatch.clone()),
|
||||
);
|
||||
let escalate_server = EscalateServer::new(self.bash_path.clone(), self.policy);
|
||||
let result = escalate_server
|
||||
.exec(
|
||||
params.command,
|
||||
// TODO: use ShellEnvironmentPolicy
|
||||
std::env::vars().collect(),
|
||||
PathBuf::from(¶ms.workdir),
|
||||
cancel_token,
|
||||
params.timeout_ms,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| McpError::internal_error(e.to_string(), None))?;
|
||||
@@ -114,6 +99,27 @@ impl ExecTool {
|
||||
ExecResult::from(result),
|
||||
)?]))
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
async fn prompt(
|
||||
&self,
|
||||
command: String,
|
||||
workdir: String,
|
||||
context: RequestContext<RoleServer>,
|
||||
) -> Result<CreateElicitationResult, McpError> {
|
||||
context
|
||||
.peer
|
||||
.create_elicitation(CreateElicitationRequestParam {
|
||||
message: format!("Allow Codex to run `{command:?}` in `{workdir:?}`?"),
|
||||
#[allow(clippy::expect_used)]
|
||||
requested_schema: ElicitationSchema::builder()
|
||||
.property("dummy", PrimitiveSchema::String(StringSchema::new()))
|
||||
.build()
|
||||
.expect("failed to build elicitation schema"),
|
||||
})
|
||||
.await
|
||||
.map_err(|e| McpError::internal_error(e.to_string(), None))
|
||||
}
|
||||
}
|
||||
|
||||
#[tool_handler]
|
||||
@@ -141,9 +147,8 @@ impl ServerHandler for ExecTool {
|
||||
|
||||
pub(crate) async fn serve(
|
||||
bash_path: PathBuf,
|
||||
execve_wrapper: PathBuf,
|
||||
policy: ExecPolicy,
|
||||
) -> Result<RunningService<RoleServer, ExecTool>, rmcp::service::ServerInitializeError> {
|
||||
let tool = ExecTool::new(bash_path, execve_wrapper, policy);
|
||||
let tool = ExecTool::new(bash_path, policy);
|
||||
tool.serve(stdio()).await
|
||||
}
|
||||
|
||||
@@ -1,146 +0,0 @@
|
||||
use std::path::Path;
|
||||
|
||||
use rmcp::ErrorData as McpError;
|
||||
use rmcp::RoleServer;
|
||||
use rmcp::model::CreateElicitationRequestParam;
|
||||
use rmcp::model::CreateElicitationResult;
|
||||
use rmcp::model::ElicitationAction;
|
||||
use rmcp::model::ElicitationSchema;
|
||||
use rmcp::service::RequestContext;
|
||||
|
||||
use crate::posix::escalate_protocol::EscalateAction;
|
||||
use crate::posix::escalation_policy::EscalationPolicy;
|
||||
use crate::posix::stopwatch::Stopwatch;
|
||||
|
||||
/// This is the policy which decides how to handle an exec() call.
|
||||
///
|
||||
/// `file` is the absolute, canonical path to the executable to run, i.e. the first arg to exec.
|
||||
/// `argv` is the argv, including the program name (`argv[0]`).
|
||||
/// `workdir` is the absolute, canonical path to the working directory in which to execute the
|
||||
/// command.
|
||||
pub(crate) type ExecPolicy = fn(file: &Path, argv: &[String], workdir: &Path) -> ExecPolicyOutcome;
|
||||
|
||||
pub(crate) enum ExecPolicyOutcome {
|
||||
Allow {
|
||||
run_with_escalated_permissions: bool,
|
||||
},
|
||||
Prompt {
|
||||
run_with_escalated_permissions: bool,
|
||||
},
|
||||
Forbidden,
|
||||
}
|
||||
|
||||
/// ExecPolicy with access to the MCP RequestContext so that it can leverage
|
||||
/// elicitations.
|
||||
pub(crate) struct McpEscalationPolicy {
|
||||
policy: ExecPolicy,
|
||||
context: RequestContext<RoleServer>,
|
||||
stopwatch: Stopwatch,
|
||||
}
|
||||
|
||||
impl McpEscalationPolicy {
|
||||
pub(crate) fn new(
|
||||
policy: ExecPolicy,
|
||||
context: RequestContext<RoleServer>,
|
||||
stopwatch: Stopwatch,
|
||||
) -> Self {
|
||||
Self {
|
||||
policy,
|
||||
context,
|
||||
stopwatch,
|
||||
}
|
||||
}
|
||||
|
||||
async fn prompt(
|
||||
&self,
|
||||
file: &Path,
|
||||
argv: &[String],
|
||||
workdir: &Path,
|
||||
context: RequestContext<RoleServer>,
|
||||
) -> Result<CreateElicitationResult, McpError> {
|
||||
let args = shlex::try_join(argv.iter().skip(1).map(String::as_str)).unwrap_or_default();
|
||||
let command = if args.is_empty() {
|
||||
file.display().to_string()
|
||||
} else {
|
||||
format!("{} {}", file.display(), args)
|
||||
};
|
||||
self.stopwatch
|
||||
.pause_for(async {
|
||||
context
|
||||
.peer
|
||||
.create_elicitation(CreateElicitationRequestParam {
|
||||
message: format!(
|
||||
"Allow agent to run `{command}` in `{}`?",
|
||||
workdir.display()
|
||||
),
|
||||
requested_schema: ElicitationSchema::builder()
|
||||
.title("Execution Permission Request")
|
||||
.optional_string_with("reason", |schema| {
|
||||
schema.description(
|
||||
"Optional reason for allowing or denying execution",
|
||||
)
|
||||
})
|
||||
.build()
|
||||
.map_err(|e| {
|
||||
McpError::internal_error(
|
||||
format!("failed to build elicitation schema: {e}"),
|
||||
None,
|
||||
)
|
||||
})?,
|
||||
})
|
||||
.await
|
||||
.map_err(|e| McpError::internal_error(e.to_string(), None))
|
||||
})
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl EscalationPolicy for McpEscalationPolicy {
|
||||
async fn determine_action(
|
||||
&self,
|
||||
file: &Path,
|
||||
argv: &[String],
|
||||
workdir: &Path,
|
||||
) -> Result<EscalateAction, rmcp::ErrorData> {
|
||||
let outcome = (self.policy)(file, argv, workdir);
|
||||
let action = match outcome {
|
||||
ExecPolicyOutcome::Allow {
|
||||
run_with_escalated_permissions,
|
||||
} => {
|
||||
if run_with_escalated_permissions {
|
||||
EscalateAction::Escalate
|
||||
} else {
|
||||
EscalateAction::Run
|
||||
}
|
||||
}
|
||||
ExecPolicyOutcome::Prompt {
|
||||
run_with_escalated_permissions,
|
||||
} => {
|
||||
let result = self
|
||||
.prompt(file, argv, workdir, self.context.clone())
|
||||
.await?;
|
||||
// TODO: Extract reason from `result.content`.
|
||||
match result.action {
|
||||
ElicitationAction::Accept => {
|
||||
if run_with_escalated_permissions {
|
||||
EscalateAction::Escalate
|
||||
} else {
|
||||
EscalateAction::Run
|
||||
}
|
||||
}
|
||||
ElicitationAction::Decline => EscalateAction::Deny {
|
||||
reason: Some("User declined execution".to_string()),
|
||||
},
|
||||
ElicitationAction::Cancel => EscalateAction::Deny {
|
||||
reason: Some("User cancelled execution".to_string()),
|
||||
},
|
||||
}
|
||||
}
|
||||
ExecPolicyOutcome::Forbidden => EscalateAction::Deny {
|
||||
reason: Some("Execution forbidden by policy".to_string()),
|
||||
},
|
||||
};
|
||||
Ok(action)
|
||||
}
|
||||
}
|
||||
@@ -1,211 +0,0 @@
|
||||
use std::future::Future;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use std::time::Instant;
|
||||
|
||||
use tokio::sync::Mutex;
|
||||
use tokio::sync::Notify;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct Stopwatch {
|
||||
limit: Duration,
|
||||
inner: Arc<Mutex<StopwatchState>>,
|
||||
notify: Arc<Notify>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct StopwatchState {
|
||||
elapsed: Duration,
|
||||
running_since: Option<Instant>,
|
||||
active_pauses: u32,
|
||||
}
|
||||
|
||||
impl Stopwatch {
|
||||
pub(crate) fn new(limit: Duration) -> Self {
|
||||
Self {
|
||||
inner: Arc::new(Mutex::new(StopwatchState {
|
||||
elapsed: Duration::ZERO,
|
||||
running_since: Some(Instant::now()),
|
||||
active_pauses: 0,
|
||||
})),
|
||||
notify: Arc::new(Notify::new()),
|
||||
limit,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn cancellation_token(&self) -> CancellationToken {
|
||||
let limit = self.limit;
|
||||
let token = CancellationToken::new();
|
||||
let cancel = token.clone();
|
||||
let inner = Arc::clone(&self.inner);
|
||||
let notify = Arc::clone(&self.notify);
|
||||
tokio::spawn(async move {
|
||||
loop {
|
||||
let (remaining, running) = {
|
||||
let guard = inner.lock().await;
|
||||
let elapsed = guard.elapsed
|
||||
+ guard
|
||||
.running_since
|
||||
.map(|since| since.elapsed())
|
||||
.unwrap_or_default();
|
||||
if elapsed >= limit {
|
||||
break;
|
||||
}
|
||||
(limit - elapsed, guard.running_since.is_some())
|
||||
};
|
||||
|
||||
if !running {
|
||||
notify.notified().await;
|
||||
continue;
|
||||
}
|
||||
|
||||
let sleep = tokio::time::sleep(remaining);
|
||||
tokio::pin!(sleep);
|
||||
tokio::select! {
|
||||
_ = &mut sleep => {
|
||||
break;
|
||||
}
|
||||
_ = notify.notified() => {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
cancel.cancel();
|
||||
});
|
||||
token
|
||||
}
|
||||
|
||||
/// Runs `fut`, pausing the stopwatch while the future is pending. The clock
|
||||
/// resumes automatically when the future completes. Nested/overlapping
|
||||
/// calls are reference-counted so the stopwatch only resumes when every
|
||||
/// pause is lifted.
|
||||
pub(crate) async fn pause_for<F, T>(&self, fut: F) -> T
|
||||
where
|
||||
F: Future<Output = T>,
|
||||
{
|
||||
self.pause().await;
|
||||
let result = fut.await;
|
||||
self.resume().await;
|
||||
result
|
||||
}
|
||||
|
||||
async fn pause(&self) {
|
||||
let mut guard = self.inner.lock().await;
|
||||
guard.active_pauses += 1;
|
||||
if guard.active_pauses == 1
|
||||
&& let Some(since) = guard.running_since.take()
|
||||
{
|
||||
guard.elapsed += since.elapsed();
|
||||
self.notify.notify_waiters();
|
||||
}
|
||||
}
|
||||
|
||||
async fn resume(&self) {
|
||||
let mut guard = self.inner.lock().await;
|
||||
if guard.active_pauses == 0 {
|
||||
return;
|
||||
}
|
||||
guard.active_pauses -= 1;
|
||||
if guard.active_pauses == 0 && guard.running_since.is_none() {
|
||||
guard.running_since = Some(Instant::now());
|
||||
self.notify.notify_waiters();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::Stopwatch;
|
||||
use tokio::time::Duration;
|
||||
use tokio::time::Instant;
|
||||
use tokio::time::sleep;
|
||||
use tokio::time::timeout;
|
||||
|
||||
#[tokio::test]
|
||||
async fn cancellation_receiver_fires_after_limit() {
|
||||
let stopwatch = Stopwatch::new(Duration::from_millis(50));
|
||||
let token = stopwatch.cancellation_token();
|
||||
let start = Instant::now();
|
||||
token.cancelled().await;
|
||||
assert!(start.elapsed() >= Duration::from_millis(50));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn pause_prevents_timeout_until_resumed() {
|
||||
let stopwatch = Stopwatch::new(Duration::from_millis(50));
|
||||
let token = stopwatch.cancellation_token();
|
||||
|
||||
let pause_handle = tokio::spawn({
|
||||
let stopwatch = stopwatch.clone();
|
||||
async move {
|
||||
stopwatch
|
||||
.pause_for(async {
|
||||
sleep(Duration::from_millis(100)).await;
|
||||
})
|
||||
.await;
|
||||
}
|
||||
});
|
||||
|
||||
assert!(
|
||||
timeout(Duration::from_millis(30), token.cancelled())
|
||||
.await
|
||||
.is_err()
|
||||
);
|
||||
|
||||
pause_handle.await.expect("pause task should finish");
|
||||
|
||||
token.cancelled().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn overlapping_pauses_only_resume_once() {
|
||||
let stopwatch = Stopwatch::new(Duration::from_millis(50));
|
||||
let token = stopwatch.cancellation_token();
|
||||
|
||||
// First pause.
|
||||
let pause1 = {
|
||||
let stopwatch = stopwatch.clone();
|
||||
tokio::spawn(async move {
|
||||
stopwatch
|
||||
.pause_for(async {
|
||||
sleep(Duration::from_millis(80)).await;
|
||||
})
|
||||
.await;
|
||||
})
|
||||
};
|
||||
|
||||
// Overlapping pause that ends sooner.
|
||||
let pause2 = {
|
||||
let stopwatch = stopwatch.clone();
|
||||
tokio::spawn(async move {
|
||||
stopwatch
|
||||
.pause_for(async {
|
||||
sleep(Duration::from_millis(30)).await;
|
||||
})
|
||||
.await;
|
||||
})
|
||||
};
|
||||
|
||||
// While both pauses are active, the cancellation should not fire.
|
||||
assert!(
|
||||
timeout(Duration::from_millis(40), token.cancelled())
|
||||
.await
|
||||
.is_err()
|
||||
);
|
||||
|
||||
pause2.await.expect("short pause should complete");
|
||||
|
||||
// Still paused because the long pause is active.
|
||||
assert!(
|
||||
timeout(Duration::from_millis(30), token.cancelled())
|
||||
.await
|
||||
.is_err()
|
||||
);
|
||||
|
||||
pause1.await.expect("long pause should complete");
|
||||
|
||||
// Now the stopwatch should resume and hit the limit shortly after.
|
||||
token.cancelled().await;
|
||||
}
|
||||
}
|
||||
@@ -101,7 +101,7 @@ pub struct ResumeArgs {
|
||||
pub session_id: Option<String>,
|
||||
|
||||
/// Resume the most recent recorded session (newest) without specifying an id.
|
||||
#[arg(long = "last", default_value_t = false)]
|
||||
#[arg(long = "last", default_value_t = false, conflicts_with = "session_id")]
|
||||
pub last: bool,
|
||||
|
||||
/// Prompt to send after resuming the session. If `-` is used, read from stdin.
|
||||
|
||||
@@ -161,7 +161,7 @@ impl EventProcessor for EventProcessorWithHumanOutput {
|
||||
fn process_event(&mut self, event: Event) -> CodexStatus {
|
||||
let Event { id: _, msg } = event;
|
||||
match msg {
|
||||
EventMsg::Error(ErrorEvent { message, .. }) => {
|
||||
EventMsg::Error(ErrorEvent { message }) => {
|
||||
let prefix = "ERROR:".style(self.red);
|
||||
ts_msg!(self, "{prefix} {message}");
|
||||
}
|
||||
@@ -221,7 +221,7 @@ impl EventProcessor for EventProcessorWithHumanOutput {
|
||||
EventMsg::BackgroundEvent(BackgroundEventEvent { message }) => {
|
||||
ts_msg!(self, "{}", message.style(self.dimmed));
|
||||
}
|
||||
EventMsg::StreamError(StreamErrorEvent { message, .. }) => {
|
||||
EventMsg::StreamError(StreamErrorEvent { message }) => {
|
||||
ts_msg!(self, "{}", message.style(self.dimmed));
|
||||
}
|
||||
EventMsg::TaskStarted(_) => {
|
||||
@@ -346,7 +346,6 @@ impl EventProcessor for EventProcessorWithHumanOutput {
|
||||
call_id,
|
||||
auto_approved,
|
||||
changes,
|
||||
..
|
||||
}) => {
|
||||
// Store metadata so we can calculate duration later when we
|
||||
// receive the corresponding PatchApplyEnd event.
|
||||
|
||||
@@ -166,7 +166,6 @@ pub struct FileUpdateChange {
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum PatchApplyStatus {
|
||||
InProgress,
|
||||
Completed,
|
||||
Failed,
|
||||
}
|
||||
|
||||
@@ -82,21 +82,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
|
||||
let prompt_arg = match &command {
|
||||
// Allow prompt before the subcommand by falling back to the parent-level prompt
|
||||
// when the Resume subcommand did not provide its own prompt.
|
||||
Some(ExecCommand::Resume(args)) => {
|
||||
let resume_prompt = args
|
||||
.prompt
|
||||
.clone()
|
||||
// When using `resume --last <PROMPT>`, clap still parses the first positional
|
||||
// as `session_id`. Reinterpret it as the prompt so the flag works with JSON mode.
|
||||
.or_else(|| {
|
||||
if args.last {
|
||||
args.session_id.clone()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
resume_prompt.or(prompt)
|
||||
}
|
||||
Some(ExecCommand::Resume(args)) => args.prompt.clone().or(prompt),
|
||||
None => prompt,
|
||||
};
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user