mirror of
https://github.com/openai/codex.git
synced 2026-02-04 16:03:46 +00:00
Compare commits
34 Commits
add-contex
...
dev/zhao/t
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e35cbb7acf | ||
|
|
2c87ca8c68 | ||
|
|
3aa9c0886f | ||
|
|
2b7378ac77 | ||
|
|
ddcc60a085 | ||
|
|
8465f1f2f4 | ||
|
|
7ab45487dd | ||
|
|
cecbd5b021 | ||
|
|
4000e26303 | ||
|
|
e032d338f2 | ||
|
|
8bebe86a47 | ||
|
|
ab2e7499f8 | ||
|
|
daf77b8452 | ||
|
|
03a6e853c0 | ||
|
|
837bc98a1d | ||
|
|
842a1b7fe7 | ||
|
|
03ffe4d595 | ||
|
|
ae2a084fae | ||
|
|
a941ae7632 | ||
|
|
2c665fb1dd | ||
|
|
98a90a3bb2 | ||
|
|
7c8d333980 | ||
|
|
497fb4a19c | ||
|
|
5860481bc4 | ||
|
|
a52cf4d2b4 | ||
|
|
e70c52a3af | ||
|
|
de1768d3ba | ||
|
|
702238f004 | ||
|
|
fa5f6e76c9 | ||
|
|
f828cd2897 | ||
|
|
326c1e0a7e | ||
|
|
3f1c4b9add | ||
|
|
0b28e72b66 | ||
|
|
94dfb211af |
2
.github/codex/home/config.toml
vendored
2
.github/codex/home/config.toml
vendored
@@ -1,3 +1,3 @@
|
||||
model = "gpt-5"
|
||||
model = "gpt-5.1"
|
||||
|
||||
# Consider setting [mcp_servers] here!
|
||||
|
||||
4
.github/workflows/cla.yml
vendored
4
.github/workflows/cla.yml
vendored
@@ -46,4 +46,6 @@ jobs:
|
||||
path-to-document: https://github.com/openai/codex/blob/main/docs/CLA.md
|
||||
path-to-signatures: signatures/cla.json
|
||||
branch: cla-signatures
|
||||
allowlist: dependabot[bot]
|
||||
allowlist: |
|
||||
codex
|
||||
dependabot[bot]
|
||||
|
||||
@@ -15,7 +15,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Close inactive PRs from contributors
|
||||
uses: actions/github-script@v7
|
||||
uses: actions/github-script@v8
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
|
||||
2
.github/workflows/issue-deduplicator.yml
vendored
2
.github/workflows/issue-deduplicator.yml
vendored
@@ -46,7 +46,7 @@ jobs:
|
||||
with:
|
||||
openai-api-key: ${{ secrets.CODEX_OPENAI_API_KEY }}
|
||||
allow-users: "*"
|
||||
model: gpt-5
|
||||
model: gpt-5.1
|
||||
prompt: |
|
||||
You are an assistant that triages new GitHub issues by identifying potential duplicates.
|
||||
|
||||
|
||||
12
.github/workflows/rust-ci.yml
vendored
12
.github/workflows/rust-ci.yml
vendored
@@ -97,7 +97,6 @@ jobs:
|
||||
env:
|
||||
# Speed up repeated builds across CI runs by caching compiled objects (non-Windows).
|
||||
USE_SCCACHE: ${{ startsWith(matrix.runner, 'windows') && 'false' || 'true' }}
|
||||
RUSTC_WRAPPER: ${{ startsWith(matrix.runner, 'windows') && '' || 'sccache' }}
|
||||
CARGO_INCREMENTAL: "0"
|
||||
SCCACHE_CACHE_SIZE: 10G
|
||||
|
||||
@@ -191,6 +190,11 @@ jobs:
|
||||
echo "Using sccache local disk + actions/cache fallback"
|
||||
fi
|
||||
|
||||
- name: Enable sccache wrapper
|
||||
if: ${{ env.USE_SCCACHE == 'true' }}
|
||||
shell: bash
|
||||
run: echo "RUSTC_WRAPPER=sccache" >> "$GITHUB_ENV"
|
||||
|
||||
- name: Restore sccache cache (fallback)
|
||||
if: ${{ env.USE_SCCACHE == 'true' && env.SCCACHE_GHA_ENABLED != 'true' }}
|
||||
id: cache_sccache_restore
|
||||
@@ -331,7 +335,6 @@ jobs:
|
||||
env:
|
||||
# Speed up repeated builds across CI runs by caching compiled objects (non-Windows).
|
||||
USE_SCCACHE: ${{ startsWith(matrix.runner, 'windows') && 'false' || 'true' }}
|
||||
RUSTC_WRAPPER: ${{ startsWith(matrix.runner, 'windows') && '' || 'sccache' }}
|
||||
CARGO_INCREMENTAL: "0"
|
||||
SCCACHE_CACHE_SIZE: 10G
|
||||
|
||||
@@ -395,6 +398,11 @@ jobs:
|
||||
echo "Using sccache local disk + actions/cache fallback"
|
||||
fi
|
||||
|
||||
- name: Enable sccache wrapper
|
||||
if: ${{ env.USE_SCCACHE == 'true' }}
|
||||
shell: bash
|
||||
run: echo "RUSTC_WRAPPER=sccache" >> "$GITHUB_ENV"
|
||||
|
||||
- name: Restore sccache cache (fallback)
|
||||
if: ${{ env.USE_SCCACHE == 'true' && env.SCCACHE_GHA_ENABLED != 'true' }}
|
||||
id: cache_sccache_restore
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -64,6 +64,9 @@ apply_patch/
|
||||
# coverage
|
||||
coverage/
|
||||
|
||||
# personal files
|
||||
personal/
|
||||
|
||||
# os
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
37
codex-rs/Cargo.lock
generated
37
codex-rs/Cargo.lock
generated
@@ -874,7 +874,6 @@ dependencies = [
|
||||
"clap",
|
||||
"codex-protocol",
|
||||
"mcp-types",
|
||||
"paste",
|
||||
"pretty_assertions",
|
||||
"schemars 0.8.22",
|
||||
"serde",
|
||||
@@ -1061,6 +1060,8 @@ dependencies = [
|
||||
"clap",
|
||||
"codex-app-server-protocol",
|
||||
"codex-core",
|
||||
"codex-lmstudio",
|
||||
"codex-ollama",
|
||||
"codex-protocol",
|
||||
"once_cell",
|
||||
"serde",
|
||||
@@ -1084,6 +1085,7 @@ dependencies = [
|
||||
"codex-apply-patch",
|
||||
"codex-arg0",
|
||||
"codex-async-utils",
|
||||
"codex-execpolicy2",
|
||||
"codex-file-search",
|
||||
"codex-git",
|
||||
"codex-keyring-store",
|
||||
@@ -1159,7 +1161,6 @@ dependencies = [
|
||||
"codex-arg0",
|
||||
"codex-common",
|
||||
"codex-core",
|
||||
"codex-ollama",
|
||||
"codex-protocol",
|
||||
"core_test_support",
|
||||
"libc",
|
||||
@@ -1202,6 +1203,21 @@ dependencies = [
|
||||
"tempfile",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codex-execpolicy2"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap",
|
||||
"multimap",
|
||||
"pretty_assertions",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"shlex",
|
||||
"starlark",
|
||||
"thiserror 2.0.17",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codex-feedback"
|
||||
version = "0.0.0"
|
||||
@@ -1263,6 +1279,19 @@ dependencies = [
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codex-lmstudio"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"codex-core",
|
||||
"reqwest",
|
||||
"serde_json",
|
||||
"tokio",
|
||||
"tracing",
|
||||
"which",
|
||||
"wiremock",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codex-login"
|
||||
version = "0.0.0"
|
||||
@@ -1427,6 +1456,7 @@ dependencies = [
|
||||
"tracing",
|
||||
"urlencoding",
|
||||
"webbrowser",
|
||||
"which",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1454,12 +1484,12 @@ dependencies = [
|
||||
"codex-ansi-escape",
|
||||
"codex-app-server-protocol",
|
||||
"codex-arg0",
|
||||
"codex-backend-client",
|
||||
"codex-common",
|
||||
"codex-core",
|
||||
"codex-feedback",
|
||||
"codex-file-search",
|
||||
"codex-login",
|
||||
"codex-ollama",
|
||||
"codex-protocol",
|
||||
"codex-windows-sandbox",
|
||||
"color-eyre",
|
||||
@@ -1482,6 +1512,7 @@ dependencies = [
|
||||
"ratatui",
|
||||
"ratatui-macros",
|
||||
"regex-lite",
|
||||
"reqwest",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serial_test",
|
||||
|
||||
@@ -17,9 +17,11 @@ members = [
|
||||
"core",
|
||||
"exec",
|
||||
"execpolicy",
|
||||
"execpolicy2",
|
||||
"keyring-store",
|
||||
"file-search",
|
||||
"linux-sandbox",
|
||||
"lmstudio",
|
||||
"login",
|
||||
"mcp-server",
|
||||
"mcp-types",
|
||||
@@ -64,11 +66,13 @@ codex-chatgpt = { path = "chatgpt" }
|
||||
codex-common = { path = "common" }
|
||||
codex-core = { path = "core" }
|
||||
codex-exec = { path = "exec" }
|
||||
codex-execpolicy2 = { path = "execpolicy2" }
|
||||
codex-feedback = { path = "feedback" }
|
||||
codex-file-search = { path = "file-search" }
|
||||
codex-git = { path = "utils/git" }
|
||||
codex-keyring-store = { path = "keyring-store" }
|
||||
codex-linux-sandbox = { path = "linux-sandbox" }
|
||||
codex-lmstudio = { path = "lmstudio" }
|
||||
codex-login = { path = "login" }
|
||||
codex-mcp-server = { path = "mcp-server" }
|
||||
codex-ollama = { path = "ollama" }
|
||||
@@ -130,7 +134,7 @@ image = { version = "^0.25.8", default-features = false }
|
||||
indexmap = "2.12.0"
|
||||
insta = "1.43.2"
|
||||
itertools = "0.14.0"
|
||||
keyring = "3.6"
|
||||
keyring = { version = "3.6", default-features = false }
|
||||
landlock = "0.4.1"
|
||||
lazy_static = "1"
|
||||
libc = "0.2.175"
|
||||
@@ -150,7 +154,6 @@ opentelemetry-semantic-conventions = "0.30.0"
|
||||
opentelemetry_sdk = "0.30.0"
|
||||
os_info = "3.12.0"
|
||||
owo-colors = "4.2.0"
|
||||
paste = "1.0.15"
|
||||
path-absolutize = "3.1.1"
|
||||
pathdiff = "0.2"
|
||||
portable-pty = "0.9.0"
|
||||
|
||||
@@ -15,7 +15,6 @@ anyhow = { workspace = true }
|
||||
clap = { workspace = true, features = ["derive"] }
|
||||
codex-protocol = { workspace = true }
|
||||
mcp-types = { workspace = true }
|
||||
paste = { workspace = true }
|
||||
schemars = { workspace = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
serde_json = { workspace = true }
|
||||
|
||||
@@ -1,6 +1,4 @@
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use crate::JSONRPCNotification;
|
||||
use crate::JSONRPCRequest;
|
||||
@@ -9,12 +7,6 @@ use crate::export::GeneratedSchema;
|
||||
use crate::export::write_json_schema;
|
||||
use crate::protocol::v1;
|
||||
use crate::protocol::v2;
|
||||
use codex_protocol::ConversationId;
|
||||
use codex_protocol::parse_command::ParsedCommand;
|
||||
use codex_protocol::protocol::FileChange;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use codex_protocol::protocol::SandboxCommandAssessment;
|
||||
use paste::paste;
|
||||
use schemars::JsonSchema;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
@@ -277,34 +269,36 @@ macro_rules! server_request_definitions {
|
||||
(
|
||||
$(
|
||||
$(#[$variant_meta:meta])*
|
||||
$variant:ident
|
||||
$variant:ident $(=> $wire:literal)? {
|
||||
params: $params:ty,
|
||||
response: $response:ty,
|
||||
}
|
||||
),* $(,)?
|
||||
) => {
|
||||
paste! {
|
||||
/// Request initiated from the server and sent to the client.
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(tag = "method", rename_all = "camelCase")]
|
||||
pub enum ServerRequest {
|
||||
$(
|
||||
$(#[$variant_meta])*
|
||||
$variant {
|
||||
#[serde(rename = "id")]
|
||||
request_id: RequestId,
|
||||
params: [<$variant Params>],
|
||||
},
|
||||
)*
|
||||
}
|
||||
/// Request initiated from the server and sent to the client.
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(tag = "method", rename_all = "camelCase")]
|
||||
pub enum ServerRequest {
|
||||
$(
|
||||
$(#[$variant_meta])*
|
||||
$(#[serde(rename = $wire)] #[ts(rename = $wire)])?
|
||||
$variant {
|
||||
#[serde(rename = "id")]
|
||||
request_id: RequestId,
|
||||
params: $params,
|
||||
},
|
||||
)*
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, JsonSchema)]
|
||||
pub enum ServerRequestPayload {
|
||||
$( $variant([<$variant Params>]), )*
|
||||
}
|
||||
#[derive(Debug, Clone, PartialEq, JsonSchema)]
|
||||
pub enum ServerRequestPayload {
|
||||
$( $variant($params), )*
|
||||
}
|
||||
|
||||
impl ServerRequestPayload {
|
||||
pub fn request_with_id(self, request_id: RequestId) -> ServerRequest {
|
||||
match self {
|
||||
$(Self::$variant(params) => ServerRequest::$variant { request_id, params },)*
|
||||
}
|
||||
impl ServerRequestPayload {
|
||||
pub fn request_with_id(self, request_id: RequestId) -> ServerRequest {
|
||||
match self {
|
||||
$(Self::$variant(params) => ServerRequest::$variant { request_id, params },)*
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -312,9 +306,9 @@ macro_rules! server_request_definitions {
|
||||
pub fn export_server_responses(
|
||||
out_dir: &::std::path::Path,
|
||||
) -> ::std::result::Result<(), ::ts_rs::ExportError> {
|
||||
paste! {
|
||||
$(<[<$variant Response>] as ::ts_rs::TS>::export_all_to(out_dir)?;)*
|
||||
}
|
||||
$(
|
||||
<$response as ::ts_rs::TS>::export_all_to(out_dir)?;
|
||||
)*
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -323,9 +317,12 @@ macro_rules! server_request_definitions {
|
||||
out_dir: &Path,
|
||||
) -> ::anyhow::Result<Vec<GeneratedSchema>> {
|
||||
let mut schemas = Vec::new();
|
||||
paste! {
|
||||
$(schemas.push(crate::export::write_json_schema::<[<$variant Response>]>(out_dir, stringify!([<$variant Response>]))?);)*
|
||||
}
|
||||
$(
|
||||
schemas.push(crate::export::write_json_schema::<$response>(
|
||||
out_dir,
|
||||
concat!(stringify!($variant), "Response"),
|
||||
)?);
|
||||
)*
|
||||
Ok(schemas)
|
||||
}
|
||||
|
||||
@@ -334,9 +331,12 @@ macro_rules! server_request_definitions {
|
||||
out_dir: &Path,
|
||||
) -> ::anyhow::Result<Vec<GeneratedSchema>> {
|
||||
let mut schemas = Vec::new();
|
||||
paste! {
|
||||
$(schemas.push(crate::export::write_json_schema::<[<$variant Params>]>(out_dir, stringify!([<$variant Params>]))?);)*
|
||||
}
|
||||
$(
|
||||
schemas.push(crate::export::write_json_schema::<$params>(
|
||||
out_dir,
|
||||
concat!(stringify!($variant), "Params"),
|
||||
)?);
|
||||
)*
|
||||
Ok(schemas)
|
||||
}
|
||||
};
|
||||
@@ -426,49 +426,27 @@ impl TryFrom<JSONRPCRequest> for ServerRequest {
|
||||
}
|
||||
|
||||
server_request_definitions! {
|
||||
/// NEW APIs
|
||||
/// Sent when approval is requested for a specific command execution.
|
||||
/// This request is used for Turns started via turn/start.
|
||||
CommandExecutionRequestApproval => "item/commandExecution/requestApproval" {
|
||||
params: v2::CommandExecutionRequestApprovalParams,
|
||||
response: v2::CommandExecutionRequestApprovalResponse,
|
||||
},
|
||||
|
||||
/// DEPRECATED APIs below
|
||||
/// Request to approve a patch.
|
||||
ApplyPatchApproval,
|
||||
/// This request is used for Turns started via the legacy APIs (i.e. SendUserTurn, SendUserMessage).
|
||||
ApplyPatchApproval {
|
||||
params: v1::ApplyPatchApprovalParams,
|
||||
response: v1::ApplyPatchApprovalResponse,
|
||||
},
|
||||
/// Request to exec a command.
|
||||
ExecCommandApproval,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ApplyPatchApprovalParams {
|
||||
pub conversation_id: ConversationId,
|
||||
/// Use to correlate this with [codex_core::protocol::PatchApplyBeginEvent]
|
||||
/// and [codex_core::protocol::PatchApplyEndEvent].
|
||||
pub call_id: String,
|
||||
pub file_changes: HashMap<PathBuf, FileChange>,
|
||||
/// Optional explanatory reason (e.g. request for extra write access).
|
||||
pub reason: Option<String>,
|
||||
/// When set, the agent is asking the user to allow writes under this root
|
||||
/// for the remainder of the session (unclear if this is honored today).
|
||||
pub grant_root: Option<PathBuf>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ExecCommandApprovalParams {
|
||||
pub conversation_id: ConversationId,
|
||||
/// Use to correlate this with [codex_core::protocol::ExecCommandBeginEvent]
|
||||
/// and [codex_core::protocol::ExecCommandEndEvent].
|
||||
pub call_id: String,
|
||||
pub command: Vec<String>,
|
||||
pub cwd: PathBuf,
|
||||
pub reason: Option<String>,
|
||||
pub risk: Option<SandboxCommandAssessment>,
|
||||
pub parsed_cmd: Vec<ParsedCommand>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
pub struct ExecCommandApprovalResponse {
|
||||
pub decision: ReviewDecision,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
pub struct ApplyPatchApprovalResponse {
|
||||
pub decision: ReviewDecision,
|
||||
/// This request is used for Turns started via the legacy APIs (i.e. SendUserTurn, SendUserMessage).
|
||||
ExecCommandApproval {
|
||||
params: v1::ExecCommandApprovalParams,
|
||||
response: v1::ExecCommandApprovalResponse,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
@@ -533,17 +511,20 @@ client_notification_definitions! {
|
||||
mod tests {
|
||||
use super::*;
|
||||
use anyhow::Result;
|
||||
use codex_protocol::ConversationId;
|
||||
use codex_protocol::account::PlanType;
|
||||
use codex_protocol::parse_command::ParsedCommand;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::json;
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[test]
|
||||
fn serialize_new_conversation() -> Result<()> {
|
||||
let request = ClientRequest::NewConversation {
|
||||
request_id: RequestId::Integer(42),
|
||||
params: v1::NewConversationParams {
|
||||
model: Some("gpt-5-codex".to_string()),
|
||||
model: Some("gpt-5.1-codex".to_string()),
|
||||
model_provider: None,
|
||||
profile: None,
|
||||
cwd: None,
|
||||
@@ -561,7 +542,7 @@ mod tests {
|
||||
"method": "newConversation",
|
||||
"id": 42,
|
||||
"params": {
|
||||
"model": "gpt-5-codex",
|
||||
"model": "gpt-5.1-codex",
|
||||
"modelProvider": null,
|
||||
"profile": null,
|
||||
"cwd": null,
|
||||
@@ -616,7 +597,7 @@ mod tests {
|
||||
#[test]
|
||||
fn serialize_server_request() -> Result<()> {
|
||||
let conversation_id = ConversationId::from_string("67e55044-10b1-426f-9247-bb680e5fe0c8")?;
|
||||
let params = ExecCommandApprovalParams {
|
||||
let params = v1::ExecCommandApprovalParams {
|
||||
conversation_id,
|
||||
call_id: "call-42".to_string(),
|
||||
command: vec!["echo".to_string(), "hello".to_string()],
|
||||
|
||||
@@ -8,8 +8,12 @@ use codex_protocol::config_types::ReasoningSummary;
|
||||
use codex_protocol::config_types::SandboxMode;
|
||||
use codex_protocol::config_types::Verbosity;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::parse_command::ParsedCommand;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
use codex_protocol::protocol::FileChange;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use codex_protocol::protocol::SandboxCommandAssessment;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use codex_protocol::protocol::SessionSource;
|
||||
use codex_protocol::protocol::TurnAbortReason;
|
||||
@@ -191,6 +195,46 @@ pub struct GitDiffToRemoteResponse {
|
||||
pub diff: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ApplyPatchApprovalParams {
|
||||
pub conversation_id: ConversationId,
|
||||
/// Use to correlate this with [codex_core::protocol::PatchApplyBeginEvent]
|
||||
/// and [codex_core::protocol::PatchApplyEndEvent].
|
||||
pub call_id: String,
|
||||
pub file_changes: HashMap<PathBuf, FileChange>,
|
||||
/// Optional explanatory reason (e.g. request for extra write access).
|
||||
pub reason: Option<String>,
|
||||
/// When set, the agent is asking the user to allow writes under this root
|
||||
/// for the remainder of the session (unclear if this is honored today).
|
||||
pub grant_root: Option<PathBuf>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ApplyPatchApprovalResponse {
|
||||
pub decision: ReviewDecision,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ExecCommandApprovalParams {
|
||||
pub conversation_id: ConversationId,
|
||||
/// Use to correlate this with [codex_core::protocol::ExecCommandBeginEvent]
|
||||
/// and [codex_core::protocol::ExecCommandEndEvent].
|
||||
pub call_id: String,
|
||||
pub command: Vec<String>,
|
||||
pub cwd: PathBuf,
|
||||
pub reason: Option<String>,
|
||||
pub risk: Option<SandboxCommandAssessment>,
|
||||
pub parsed_cmd: Vec<ParsedCommand>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
pub struct ExecCommandApprovalResponse {
|
||||
pub decision: ReviewDecision,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct CancelLoginChatGptParams {
|
||||
|
||||
@@ -4,11 +4,13 @@ use std::path::PathBuf;
|
||||
use crate::protocol::common::AuthMode;
|
||||
use codex_protocol::ConversationId;
|
||||
use codex_protocol::account::PlanType;
|
||||
use codex_protocol::approvals::SandboxCommandAssessment as CoreSandboxCommandAssessment;
|
||||
use codex_protocol::config_types::ReasoningEffort;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
use codex_protocol::items::AgentMessageContent as CoreAgentMessageContent;
|
||||
use codex_protocol::items::TurnItem as CoreTurnItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::parse_command::ParsedCommand as CoreParsedCommand;
|
||||
use codex_protocol::protocol::RateLimitSnapshot as CoreRateLimitSnapshot;
|
||||
use codex_protocol::protocol::RateLimitWindow as CoreRateLimitWindow;
|
||||
use codex_protocol::user_input::UserInput as CoreUserInput;
|
||||
@@ -20,7 +22,7 @@ use serde_json::Value as JsonValue;
|
||||
use ts_rs::TS;
|
||||
|
||||
// Macro to declare a camelCased API v2 enum mirroring a core enum which
|
||||
// tends to use kebab-case.
|
||||
// tends to use either snake_case or kebab-case.
|
||||
macro_rules! v2_enum_from_core {
|
||||
(
|
||||
pub enum $Name:ident from $Src:path { $( $Variant:ident ),+ $(,)? }
|
||||
@@ -56,6 +58,23 @@ v2_enum_from_core!(
|
||||
}
|
||||
);
|
||||
|
||||
v2_enum_from_core!(
|
||||
pub enum CommandRiskLevel from codex_protocol::approvals::SandboxRiskLevel {
|
||||
Low,
|
||||
Medium,
|
||||
High
|
||||
}
|
||||
);
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub enum ApprovalDecision {
|
||||
Accept,
|
||||
Decline,
|
||||
Cancel,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, JsonSchema, TS)]
|
||||
#[serde(tag = "type", rename_all = "camelCase")]
|
||||
#[ts(tag = "type")]
|
||||
@@ -63,6 +82,8 @@ v2_enum_from_core!(
|
||||
pub enum SandboxPolicy {
|
||||
DangerFullAccess,
|
||||
ReadOnly,
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(rename_all = "camelCase")]
|
||||
WorkspaceWrite {
|
||||
#[serde(default)]
|
||||
writable_roots: Vec<PathBuf>,
|
||||
@@ -119,6 +140,98 @@ impl From<codex_protocol::protocol::SandboxPolicy> for SandboxPolicy {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub struct SandboxCommandAssessment {
|
||||
pub description: String,
|
||||
pub risk_level: CommandRiskLevel,
|
||||
}
|
||||
|
||||
impl SandboxCommandAssessment {
|
||||
pub fn into_core(self) -> CoreSandboxCommandAssessment {
|
||||
CoreSandboxCommandAssessment {
|
||||
description: self.description,
|
||||
risk_level: self.risk_level.to_core(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<CoreSandboxCommandAssessment> for SandboxCommandAssessment {
|
||||
fn from(value: CoreSandboxCommandAssessment) -> Self {
|
||||
Self {
|
||||
description: value.description,
|
||||
risk_level: CommandRiskLevel::from(value.risk_level),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(tag = "type", rename_all = "camelCase")]
|
||||
#[ts(tag = "type")]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub enum CommandAction {
|
||||
Read {
|
||||
command: String,
|
||||
name: String,
|
||||
path: PathBuf,
|
||||
},
|
||||
ListFiles {
|
||||
command: String,
|
||||
path: Option<String>,
|
||||
},
|
||||
Search {
|
||||
command: String,
|
||||
query: Option<String>,
|
||||
path: Option<String>,
|
||||
},
|
||||
Unknown {
|
||||
command: String,
|
||||
},
|
||||
}
|
||||
|
||||
impl CommandAction {
|
||||
pub fn into_core(self) -> CoreParsedCommand {
|
||||
match self {
|
||||
CommandAction::Read {
|
||||
command: cmd,
|
||||
name,
|
||||
path,
|
||||
} => CoreParsedCommand::Read { cmd, name, path },
|
||||
CommandAction::ListFiles { command: cmd, path } => {
|
||||
CoreParsedCommand::ListFiles { cmd, path }
|
||||
}
|
||||
CommandAction::Search {
|
||||
command: cmd,
|
||||
query,
|
||||
path,
|
||||
} => CoreParsedCommand::Search { cmd, query, path },
|
||||
CommandAction::Unknown { command: cmd } => CoreParsedCommand::Unknown { cmd },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<CoreParsedCommand> for CommandAction {
|
||||
fn from(value: CoreParsedCommand) -> Self {
|
||||
match value {
|
||||
CoreParsedCommand::Read { cmd, name, path } => CommandAction::Read {
|
||||
command: cmd,
|
||||
name,
|
||||
path,
|
||||
},
|
||||
CoreParsedCommand::ListFiles { cmd, path } => {
|
||||
CommandAction::ListFiles { command: cmd, path }
|
||||
}
|
||||
CoreParsedCommand::Search { cmd, query, path } => CommandAction::Search {
|
||||
command: cmd,
|
||||
query,
|
||||
path,
|
||||
},
|
||||
CoreParsedCommand::Unknown { cmd } => CommandAction::Unknown { command: cmd },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(tag = "type", rename_all = "camelCase")]
|
||||
#[ts(tag = "type")]
|
||||
@@ -279,7 +392,7 @@ pub struct ThreadStartParams {
|
||||
pub cwd: Option<String>,
|
||||
pub approval_policy: Option<AskForApproval>,
|
||||
pub sandbox: Option<SandboxMode>,
|
||||
pub config: Option<HashMap<String, serde_json::Value>>,
|
||||
pub config: Option<HashMap<String, JsonValue>>,
|
||||
pub base_instructions: Option<String>,
|
||||
pub developer_instructions: Option<String>,
|
||||
}
|
||||
@@ -506,14 +619,14 @@ impl From<CoreUserInput> for UserInput {
|
||||
#[ts(tag = "type")]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub enum ThreadItem {
|
||||
UserMessage {
|
||||
id: String,
|
||||
content: Vec<UserInput>,
|
||||
},
|
||||
AgentMessage {
|
||||
id: String,
|
||||
text: String,
|
||||
},
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(rename_all = "camelCase")]
|
||||
UserMessage { id: String, content: Vec<UserInput> },
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(rename_all = "camelCase")]
|
||||
AgentMessage { id: String, text: String },
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(rename_all = "camelCase")]
|
||||
Reasoning {
|
||||
id: String,
|
||||
#[serde(default)]
|
||||
@@ -521,19 +634,35 @@ pub enum ThreadItem {
|
||||
#[serde(default)]
|
||||
content: Vec<String>,
|
||||
},
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(rename_all = "camelCase")]
|
||||
CommandExecution {
|
||||
id: String,
|
||||
/// The command to be executed.
|
||||
command: String,
|
||||
aggregated_output: String,
|
||||
exit_code: Option<i32>,
|
||||
/// The command's working directory.
|
||||
cwd: PathBuf,
|
||||
status: CommandExecutionStatus,
|
||||
/// A best-effort parsing of the command to understand the action(s) it will perform.
|
||||
/// This returns a list of CommandAction objects because a single shell command may
|
||||
/// be composed of many commands piped together.
|
||||
command_actions: Vec<CommandAction>,
|
||||
/// The command's output, aggregated from stdout and stderr.
|
||||
aggregated_output: Option<String>,
|
||||
/// The command's exit code.
|
||||
exit_code: Option<i32>,
|
||||
/// The duration of the command execution in milliseconds.
|
||||
duration_ms: Option<i64>,
|
||||
},
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(rename_all = "camelCase")]
|
||||
FileChange {
|
||||
id: String,
|
||||
changes: Vec<FileUpdateChange>,
|
||||
status: PatchApplyStatus,
|
||||
},
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(rename_all = "camelCase")]
|
||||
McpToolCall {
|
||||
id: String,
|
||||
server: String,
|
||||
@@ -543,22 +672,18 @@ pub enum ThreadItem {
|
||||
result: Option<McpToolCallResult>,
|
||||
error: Option<McpToolCallError>,
|
||||
},
|
||||
WebSearch {
|
||||
id: String,
|
||||
query: String,
|
||||
},
|
||||
TodoList {
|
||||
id: String,
|
||||
items: Vec<TodoItem>,
|
||||
},
|
||||
ImageView {
|
||||
id: String,
|
||||
path: String,
|
||||
},
|
||||
CodeReview {
|
||||
id: String,
|
||||
review: String,
|
||||
},
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(rename_all = "camelCase")]
|
||||
WebSearch { id: String, query: String },
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(rename_all = "camelCase")]
|
||||
TodoList { id: String, items: Vec<TodoItem> },
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(rename_all = "camelCase")]
|
||||
ImageView { id: String, path: String },
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(rename_all = "camelCase")]
|
||||
CodeReview { id: String, review: String },
|
||||
}
|
||||
|
||||
impl From<CoreTurnItem> for ThreadItem {
|
||||
@@ -758,6 +883,39 @@ pub struct McpToolCallProgressNotification {
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub struct CommandExecutionRequestApprovalParams {
|
||||
pub thread_id: String,
|
||||
pub turn_id: String,
|
||||
pub item_id: String,
|
||||
/// Optional explanatory reason (e.g. request for network access).
|
||||
pub reason: Option<String>,
|
||||
/// Optional model-provided risk assessment describing the blocked command.
|
||||
pub risk: Option<SandboxCommandAssessment>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub struct CommandExecutionRequestAcceptSettings {
|
||||
/// If true, automatically approve this command for the duration of the session.
|
||||
#[serde(default)]
|
||||
pub for_session: bool,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub struct CommandExecutionRequestApprovalResponse {
|
||||
pub decision: ApprovalDecision,
|
||||
/// Optional approval settings for when the decision is `accept`.
|
||||
/// Ignored if the decision is `decline` or `cancel`.
|
||||
#[serde(default)]
|
||||
pub accept_settings: Option<CommandExecutionRequestAcceptSettings>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
|
||||
@@ -2,6 +2,16 @@
|
||||
|
||||
`codex app-server` is the interface Codex uses to power rich interfaces such as the [Codex VS Code extension](https://marketplace.visualstudio.com/items?itemName=openai.chatgpt). The message schema is currently unstable, but those who wish to build experimental UIs on top of Codex may find it valuable.
|
||||
|
||||
## Table of Contents
|
||||
- [Protocol](#protocol)
|
||||
- [Message Schema](#message-schema)
|
||||
- [Lifecycle Overview](#lifecycle-overview)
|
||||
- [Initialization](#initialization)
|
||||
- [Core primitives](#core-primitives)
|
||||
- [Thread & turn endpoints](#thread--turn-endpoints)
|
||||
- [Auth endpoints](#auth-endpoints)
|
||||
- [Events (work-in-progress)](#v2-streaming-events-work-in-progress)
|
||||
|
||||
## Protocol
|
||||
|
||||
Similar to [MCP](https://modelcontextprotocol.io/), `codex app-server` supports bidirectional communication, streaming JSONL over stdio. The protocol is JSON-RPC 2.0, though the `"jsonrpc":"2.0"` header is omitted.
|
||||
@@ -15,6 +25,14 @@ codex app-server generate-ts --out DIR
|
||||
codex app-server generate-json-schema --out DIR
|
||||
```
|
||||
|
||||
## Lifecycle Overview
|
||||
|
||||
- Initialize once: Immediately after launching the codex app-server process, send an `initialize` request with your client metadata, then emit an `initialized` notification. Any other request before this handshake gets rejected.
|
||||
- Start (or resume) a thread: Call `thread/start` to open a fresh conversation. The response returns the thread object and you’ll also get a `thread/started` notification. If you’re continuing an existing conversation, call `thread/resume` with its ID instead.
|
||||
- Begin a turn: To send user input, call `turn/start` with the target `threadId` and the user's input. Optional fields let you override model, cwd, sandbox policy, etc. This immediately returns the new turn object and triggers a `turn/started` notification.
|
||||
- Stream events: After `turn/start`, keep reading JSON-RPC notifications on stdout. You’ll see `item/started`, `item/completed`, deltas like `item/agentMessage/delta`, tool progress, etc. These represent streaming model output plus any side effects (commands, tool calls, reasoning notes).
|
||||
- Finish the turn: When the model is done (or the turn is interrupted via making the `turn/interrupt` call), the server sends `turn/completed` with the final turn state and token usage.
|
||||
|
||||
## Initialization
|
||||
|
||||
Clients must send a single `initialize` request before invoking any other method, then acknowledge with an `initialized` notification. The server returns the user agent string it will present to upstream services; subsequent requests issued before initialization receive a `"Not initialized"` error, and repeated `initialize` calls receive an `"Already initialized"` error.
|
||||
@@ -56,7 +74,7 @@ Start a fresh thread when you need a new Codex conversation.
|
||||
{ "method": "thread/start", "id": 10, "params": {
|
||||
// Optionally set config settings. If not specified, will use the user's
|
||||
// current config settings.
|
||||
"model": "gpt-5-codex",
|
||||
"model": "gpt-5.1-codex",
|
||||
"cwd": "/Users/me/project",
|
||||
"approvalPolicy": "never",
|
||||
"sandbox": "workspaceWrite",
|
||||
@@ -137,7 +155,7 @@ You can optionally specify config overrides on the new turn. If specified, these
|
||||
"writableRoots": ["/Users/me/project"],
|
||||
"networkAccess": true
|
||||
},
|
||||
"model": "gpt-5-codex",
|
||||
"model": "gpt-5.1-codex",
|
||||
"effort": "medium",
|
||||
"summary": "concise"
|
||||
} }
|
||||
@@ -258,3 +276,33 @@ Field notes:
|
||||
- `codex app-server generate-ts --out <dir>` emits v2 types under `v2/`.
|
||||
- `codex app-server generate-json-schema --out <dir>` outputs `codex_app_server_protocol.schemas.json`.
|
||||
- See [“Authentication and authorization” in the config docs](../../docs/config.md#authentication-and-authorization) for configuration knobs.
|
||||
|
||||
|
||||
## Events (work-in-progress)
|
||||
|
||||
Event notifications are the server-initiated event stream for thread lifecycles, turn lifecycles, and the items within them. After you start or resume a thread, keep reading stdout for `thread/started`, `turn/*`, and `item/*` notifications.
|
||||
|
||||
### Turn events
|
||||
|
||||
The app-server streams JSON-RPC notifications while a turn is running. Each turn starts with `turn/started` (initial `turn`) and ends with `turn/completed` (final `turn` plus token `usage`), and clients subscribe to the events they care about, rendering each item incrementally as updates arrive. The per-item lifecycle is always: `item/started` → zero or more item-specific deltas → `item/completed`.
|
||||
|
||||
#### Thread items
|
||||
|
||||
`ThreadItem` is the tagged union carried in turn responses and `item/*` notifications. Currently we support events for the following items:
|
||||
- `userMessage` — `{id, content}` where `content` is a list of user inputs (`text`, `image`, or `localImage`).
|
||||
- `agentMessage` — `{id, text}` containing the accumulated agent reply.
|
||||
- `reasoning` — `{id, summary, content}` where `summary` holds streamed reasoning summaries (applicable for most OpenAI models) and `content` holds raw reasoning blocks (applicable for e.g. open source models).
|
||||
- `mcpToolCall` — `{id, server, tool, status, arguments, result?, error?}` describing MCP calls; `status` is `inProgress`, `completed`, or `failed`.
|
||||
- `webSearch` — `{id, query}` for a web search request issued by the agent.
|
||||
|
||||
All items emit two shared lifecycle events:
|
||||
- `item/started` — emits the full `item` when a new unit of work begins so the UI can render it immediately; the `item.id` in this payload matches the `itemId` used by deltas.
|
||||
- `item/completed` — sends the final `item` once that work finishes (e.g., after a tool call or message completes); treat this as the authoritative state.
|
||||
|
||||
There are additional item-specific events:
|
||||
#### agentMessage
|
||||
- `item/agentMessage/delta` — appends streamed text for the agent message; concatenate `delta` values for the same `itemId` in order to reconstruct the full reply.
|
||||
#### reasoning
|
||||
- `item/reasoning/summaryTextDelta` — streams readable reasoning summaries; `summaryIndex` increments when a new summary section opens.
|
||||
- `item/reasoning/summaryPartAdded` — marks the boundary between reasoning summary sections for an `itemId`; subsequent `summaryTextDelta` entries share the same `summaryIndex`.
|
||||
- `item/reasoning/textDelta` — streams raw reasoning text (only applicable for e.g. open source models); use `contentIndex` to group deltas that belong together before showing them in the UI.
|
||||
|
||||
@@ -5,6 +5,12 @@ use codex_app_server_protocol::AccountRateLimitsUpdatedNotification;
|
||||
use codex_app_server_protocol::AgentMessageDeltaNotification;
|
||||
use codex_app_server_protocol::ApplyPatchApprovalParams;
|
||||
use codex_app_server_protocol::ApplyPatchApprovalResponse;
|
||||
use codex_app_server_protocol::ApprovalDecision;
|
||||
use codex_app_server_protocol::CommandAction as V2ParsedCommand;
|
||||
use codex_app_server_protocol::CommandExecutionOutputDeltaNotification;
|
||||
use codex_app_server_protocol::CommandExecutionRequestApprovalParams;
|
||||
use codex_app_server_protocol::CommandExecutionRequestApprovalResponse;
|
||||
use codex_app_server_protocol::CommandExecutionStatus;
|
||||
use codex_app_server_protocol::ExecCommandApprovalParams;
|
||||
use codex_app_server_protocol::ExecCommandApprovalResponse;
|
||||
use codex_app_server_protocol::InterruptConversationResponse;
|
||||
@@ -16,25 +22,29 @@ use codex_app_server_protocol::McpToolCallStatus;
|
||||
use codex_app_server_protocol::ReasoningSummaryPartAddedNotification;
|
||||
use codex_app_server_protocol::ReasoningSummaryTextDeltaNotification;
|
||||
use codex_app_server_protocol::ReasoningTextDeltaNotification;
|
||||
use codex_app_server_protocol::SandboxCommandAssessment as V2SandboxCommandAssessment;
|
||||
use codex_app_server_protocol::ServerNotification;
|
||||
use codex_app_server_protocol::ServerRequestPayload;
|
||||
use codex_app_server_protocol::ThreadItem;
|
||||
use codex_app_server_protocol::TurnInterruptResponse;
|
||||
use codex_core::CodexConversation;
|
||||
use codex_core::parse_command::shlex_join;
|
||||
use codex_core::protocol::ApplyPatchApprovalRequestEvent;
|
||||
use codex_core::protocol::Event;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::ExecApprovalRequestEvent;
|
||||
use codex_core::protocol::ExecCommandEndEvent;
|
||||
use codex_core::protocol::McpToolCallBeginEvent;
|
||||
use codex_core::protocol::McpToolCallEndEvent;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::ReviewDecision;
|
||||
use codex_protocol::ConversationId;
|
||||
use std::convert::TryFrom;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::oneshot;
|
||||
use tracing::error;
|
||||
|
||||
type JsonRpcResult = serde_json::Value;
|
||||
type JsonValue = serde_json::Value;
|
||||
|
||||
pub(crate) async fn apply_bespoke_event_handling(
|
||||
event: Event,
|
||||
@@ -42,6 +52,7 @@ pub(crate) async fn apply_bespoke_event_handling(
|
||||
conversation: Arc<CodexConversation>,
|
||||
outgoing: Arc<OutgoingMessageSender>,
|
||||
pending_interrupts: PendingInterrupts,
|
||||
api_version: ApiVersion,
|
||||
) {
|
||||
let Event { id: event_id, msg } = event;
|
||||
match msg {
|
||||
@@ -61,11 +72,57 @@ pub(crate) async fn apply_bespoke_event_handling(
|
||||
let rx = outgoing
|
||||
.send_request(ServerRequestPayload::ApplyPatchApproval(params))
|
||||
.await;
|
||||
// TODO(mbolin): Enforce a timeout so this task does not live indefinitely?
|
||||
tokio::spawn(async move {
|
||||
on_patch_approval_response(event_id, rx, conversation).await;
|
||||
});
|
||||
}
|
||||
EventMsg::ExecApprovalRequest(ExecApprovalRequestEvent {
|
||||
call_id,
|
||||
turn_id,
|
||||
command,
|
||||
cwd,
|
||||
reason,
|
||||
risk,
|
||||
parsed_cmd,
|
||||
}) => match api_version {
|
||||
ApiVersion::V1 => {
|
||||
let params = ExecCommandApprovalParams {
|
||||
conversation_id,
|
||||
call_id,
|
||||
command,
|
||||
cwd,
|
||||
reason,
|
||||
risk,
|
||||
parsed_cmd,
|
||||
};
|
||||
let rx = outgoing
|
||||
.send_request(ServerRequestPayload::ExecCommandApproval(params))
|
||||
.await;
|
||||
tokio::spawn(async move {
|
||||
on_exec_approval_response(event_id, rx, conversation).await;
|
||||
});
|
||||
}
|
||||
ApiVersion::V2 => {
|
||||
let params = CommandExecutionRequestApprovalParams {
|
||||
thread_id: conversation_id.to_string(),
|
||||
turn_id: turn_id.clone(),
|
||||
// Until we migrate the core to be aware of a first class CommandExecutionItem
|
||||
// and emit the corresponding EventMsg, we repurpose the call_id as the item_id.
|
||||
item_id: call_id.clone(),
|
||||
reason,
|
||||
risk: risk.map(V2SandboxCommandAssessment::from),
|
||||
};
|
||||
let rx = outgoing
|
||||
.send_request(ServerRequestPayload::CommandExecutionRequestApproval(
|
||||
params,
|
||||
))
|
||||
.await;
|
||||
tokio::spawn(async move {
|
||||
on_command_execution_request_approval_response(event_id, rx, conversation)
|
||||
.await;
|
||||
});
|
||||
}
|
||||
},
|
||||
// TODO(celia): properly construct McpToolCall TurnItem in core.
|
||||
EventMsg::McpToolCallBegin(begin_event) => {
|
||||
let notification = construct_mcp_tool_call_notification(begin_event).await;
|
||||
@@ -121,32 +178,6 @@ pub(crate) async fn apply_bespoke_event_handling(
|
||||
))
|
||||
.await;
|
||||
}
|
||||
EventMsg::ExecApprovalRequest(ExecApprovalRequestEvent {
|
||||
call_id,
|
||||
command,
|
||||
cwd,
|
||||
reason,
|
||||
risk,
|
||||
parsed_cmd,
|
||||
}) => {
|
||||
let params = ExecCommandApprovalParams {
|
||||
conversation_id,
|
||||
call_id,
|
||||
command,
|
||||
cwd,
|
||||
reason,
|
||||
risk,
|
||||
parsed_cmd,
|
||||
};
|
||||
let rx = outgoing
|
||||
.send_request(ServerRequestPayload::ExecCommandApproval(params))
|
||||
.await;
|
||||
|
||||
// TODO(mbolin): Enforce a timeout so this task does not live indefinitely?
|
||||
tokio::spawn(async move {
|
||||
on_exec_approval_response(event_id, rx, conversation).await;
|
||||
});
|
||||
}
|
||||
EventMsg::TokenCount(token_count_event) => {
|
||||
if let Some(rate_limits) = token_count_event.rate_limits {
|
||||
outgoing
|
||||
@@ -172,6 +203,79 @@ pub(crate) async fn apply_bespoke_event_handling(
|
||||
.send_server_notification(ServerNotification::ItemCompleted(notification))
|
||||
.await;
|
||||
}
|
||||
EventMsg::ExecCommandBegin(exec_command_begin_event) => {
|
||||
let item = ThreadItem::CommandExecution {
|
||||
id: exec_command_begin_event.call_id.clone(),
|
||||
command: shlex_join(&exec_command_begin_event.command),
|
||||
cwd: exec_command_begin_event.cwd,
|
||||
status: CommandExecutionStatus::InProgress,
|
||||
command_actions: exec_command_begin_event
|
||||
.parsed_cmd
|
||||
.into_iter()
|
||||
.map(V2ParsedCommand::from)
|
||||
.collect(),
|
||||
aggregated_output: None,
|
||||
exit_code: None,
|
||||
duration_ms: None,
|
||||
};
|
||||
let notification = ItemStartedNotification { item };
|
||||
outgoing
|
||||
.send_server_notification(ServerNotification::ItemStarted(notification))
|
||||
.await;
|
||||
}
|
||||
EventMsg::ExecCommandOutputDelta(exec_command_output_delta_event) => {
|
||||
let notification = CommandExecutionOutputDeltaNotification {
|
||||
item_id: exec_command_output_delta_event.call_id.clone(),
|
||||
delta: String::from_utf8_lossy(&exec_command_output_delta_event.chunk).to_string(),
|
||||
};
|
||||
outgoing
|
||||
.send_server_notification(ServerNotification::CommandExecutionOutputDelta(
|
||||
notification,
|
||||
))
|
||||
.await;
|
||||
}
|
||||
EventMsg::ExecCommandEnd(exec_command_end_event) => {
|
||||
let ExecCommandEndEvent {
|
||||
call_id,
|
||||
command,
|
||||
cwd,
|
||||
parsed_cmd,
|
||||
aggregated_output,
|
||||
exit_code,
|
||||
duration,
|
||||
..
|
||||
} = exec_command_end_event;
|
||||
|
||||
let status = if exit_code == 0 {
|
||||
CommandExecutionStatus::Completed
|
||||
} else {
|
||||
CommandExecutionStatus::Failed
|
||||
};
|
||||
|
||||
let aggregated_output = if aggregated_output.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(aggregated_output)
|
||||
};
|
||||
|
||||
let duration_ms = i64::try_from(duration.as_millis()).unwrap_or(i64::MAX);
|
||||
|
||||
let item = ThreadItem::CommandExecution {
|
||||
id: call_id,
|
||||
command: shlex_join(&command),
|
||||
cwd,
|
||||
status,
|
||||
command_actions: parsed_cmd.into_iter().map(V2ParsedCommand::from).collect(),
|
||||
aggregated_output,
|
||||
exit_code: Some(exit_code),
|
||||
duration_ms: Some(duration_ms),
|
||||
};
|
||||
|
||||
let notification = ItemCompletedNotification { item };
|
||||
outgoing
|
||||
.send_server_notification(ServerNotification::ItemCompleted(notification))
|
||||
.await;
|
||||
}
|
||||
// If this is a TurnAborted, reply to any pending interrupt requests.
|
||||
EventMsg::TurnAborted(turn_aborted_event) => {
|
||||
let pending = {
|
||||
@@ -202,7 +306,7 @@ pub(crate) async fn apply_bespoke_event_handling(
|
||||
|
||||
async fn on_patch_approval_response(
|
||||
event_id: String,
|
||||
receiver: oneshot::Receiver<JsonRpcResult>,
|
||||
receiver: oneshot::Receiver<JsonValue>,
|
||||
codex: Arc<CodexConversation>,
|
||||
) {
|
||||
let response = receiver.await;
|
||||
@@ -244,7 +348,7 @@ async fn on_patch_approval_response(
|
||||
|
||||
async fn on_exec_approval_response(
|
||||
event_id: String,
|
||||
receiver: oneshot::Receiver<JsonRpcResult>,
|
||||
receiver: oneshot::Receiver<JsonValue>,
|
||||
conversation: Arc<CodexConversation>,
|
||||
) {
|
||||
let response = receiver.await;
|
||||
@@ -278,6 +382,53 @@ async fn on_exec_approval_response(
|
||||
}
|
||||
}
|
||||
|
||||
async fn on_command_execution_request_approval_response(
|
||||
event_id: String,
|
||||
receiver: oneshot::Receiver<JsonValue>,
|
||||
conversation: Arc<CodexConversation>,
|
||||
) {
|
||||
let response = receiver.await;
|
||||
let value = match response {
|
||||
Ok(value) => value,
|
||||
Err(err) => {
|
||||
error!("request failed: {err:?}");
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let response = serde_json::from_value::<CommandExecutionRequestApprovalResponse>(value)
|
||||
.unwrap_or_else(|err| {
|
||||
error!("failed to deserialize CommandExecutionRequestApprovalResponse: {err}");
|
||||
CommandExecutionRequestApprovalResponse {
|
||||
decision: ApprovalDecision::Decline,
|
||||
accept_settings: None,
|
||||
}
|
||||
});
|
||||
|
||||
let CommandExecutionRequestApprovalResponse {
|
||||
decision,
|
||||
accept_settings,
|
||||
} = response;
|
||||
|
||||
let decision = match (decision, accept_settings) {
|
||||
(ApprovalDecision::Accept, Some(settings)) if settings.for_session => {
|
||||
ReviewDecision::ApprovedForSession
|
||||
}
|
||||
(ApprovalDecision::Accept, _) => ReviewDecision::Approved,
|
||||
(ApprovalDecision::Decline, _) => ReviewDecision::Denied,
|
||||
(ApprovalDecision::Cancel, _) => ReviewDecision::Abort,
|
||||
};
|
||||
if let Err(err) = conversation
|
||||
.submit(Op::ExecApproval {
|
||||
id: event_id,
|
||||
decision,
|
||||
})
|
||||
.await
|
||||
{
|
||||
error!("failed to submit ExecApproval: {err}");
|
||||
}
|
||||
}
|
||||
|
||||
/// similar to handle_mcp_tool_call_begin in exec
|
||||
async fn construct_mcp_tool_call_notification(
|
||||
begin_event: McpToolCallBeginEvent,
|
||||
@@ -287,10 +438,7 @@ async fn construct_mcp_tool_call_notification(
|
||||
server: begin_event.invocation.server,
|
||||
tool: begin_event.invocation.tool,
|
||||
status: McpToolCallStatus::InProgress,
|
||||
arguments: begin_event
|
||||
.invocation
|
||||
.arguments
|
||||
.unwrap_or(JsonRpcResult::Null),
|
||||
arguments: begin_event.invocation.arguments.unwrap_or(JsonValue::Null),
|
||||
result: None,
|
||||
error: None,
|
||||
};
|
||||
@@ -328,10 +476,7 @@ async fn construct_mcp_tool_call_end_notification(
|
||||
server: end_event.invocation.server,
|
||||
tool: end_event.invocation.tool,
|
||||
status,
|
||||
arguments: end_event
|
||||
.invocation
|
||||
.arguments
|
||||
.unwrap_or(JsonRpcResult::Null),
|
||||
arguments: end_event.invocation.arguments.unwrap_or(JsonValue::Null),
|
||||
result,
|
||||
error,
|
||||
};
|
||||
|
||||
@@ -1245,7 +1245,7 @@ impl CodexMessageProcessor {
|
||||
// Auto-attach a conversation listener when starting a thread.
|
||||
// Use the same behavior as the v1 API with experimental_raw_events=false.
|
||||
if let Err(err) = self
|
||||
.attach_conversation_listener(conversation_id, false)
|
||||
.attach_conversation_listener(conversation_id, false, ApiVersion::V2)
|
||||
.await
|
||||
{
|
||||
tracing::warn!(
|
||||
@@ -1523,7 +1523,7 @@ impl CodexMessageProcessor {
|
||||
}) => {
|
||||
// Auto-attach a conversation listener when resuming a thread.
|
||||
if let Err(err) = self
|
||||
.attach_conversation_listener(conversation_id, false)
|
||||
.attach_conversation_listener(conversation_id, false, ApiVersion::V2)
|
||||
.await
|
||||
{
|
||||
tracing::warn!(
|
||||
@@ -2376,7 +2376,7 @@ impl CodexMessageProcessor {
|
||||
experimental_raw_events,
|
||||
} = params;
|
||||
match self
|
||||
.attach_conversation_listener(conversation_id, experimental_raw_events)
|
||||
.attach_conversation_listener(conversation_id, experimental_raw_events, ApiVersion::V1)
|
||||
.await
|
||||
{
|
||||
Ok(subscription_id) => {
|
||||
@@ -2417,6 +2417,7 @@ impl CodexMessageProcessor {
|
||||
&mut self,
|
||||
conversation_id: ConversationId,
|
||||
experimental_raw_events: bool,
|
||||
api_version: ApiVersion,
|
||||
) -> Result<Uuid, JSONRPCErrorError> {
|
||||
let conversation = match self
|
||||
.conversation_manager
|
||||
@@ -2440,6 +2441,7 @@ impl CodexMessageProcessor {
|
||||
|
||||
let outgoing_for_task = self.outgoing.clone();
|
||||
let pending_interrupts = self.pending_interrupts.clone();
|
||||
let api_version_for_task = api_version;
|
||||
tokio::spawn(async move {
|
||||
loop {
|
||||
tokio::select! {
|
||||
@@ -2495,6 +2497,7 @@ impl CodexMessageProcessor {
|
||||
conversation.clone(),
|
||||
outgoing_for_task.clone(),
|
||||
pending_interrupts.clone(),
|
||||
api_version_for_task,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
@@ -27,7 +27,7 @@ fn create_config_toml(codex_home: &Path) -> std::io::Result<()> {
|
||||
std::fs::write(
|
||||
config_toml,
|
||||
r#"
|
||||
model = "gpt-5-codex"
|
||||
model = "gpt-5.1-codex"
|
||||
approval_policy = "on-request"
|
||||
sandbox_mode = "workspace-write"
|
||||
model_reasoning_summary = "detailed"
|
||||
@@ -87,7 +87,7 @@ async fn get_config_toml_parses_all_fields() -> Result<()> {
|
||||
}),
|
||||
forced_chatgpt_workspace_id: Some("12345678-0000-0000-0000-000000000000".into()),
|
||||
forced_login_method: Some(ForcedLoginMethod::Chatgpt),
|
||||
model: Some("gpt-5-codex".into()),
|
||||
model: Some("gpt-5.1-codex".into()),
|
||||
model_reasoning_effort: Some(ReasoningEffort::High),
|
||||
model_reasoning_summary: Some(ReasoningSummary::Detailed),
|
||||
model_verbosity: Some(Verbosity::Medium),
|
||||
|
||||
@@ -57,7 +57,7 @@ fn create_config_toml(codex_home: &Path) -> std::io::Result<()> {
|
||||
std::fs::write(
|
||||
config_toml,
|
||||
r#"
|
||||
model = "gpt-5-codex"
|
||||
model = "gpt-5.1-codex"
|
||||
model_reasoning_effort = "medium"
|
||||
"#,
|
||||
)
|
||||
|
||||
@@ -27,7 +27,7 @@ async fn thread_resume_returns_original_thread() -> Result<()> {
|
||||
// Start a thread.
|
||||
let start_id = mcp
|
||||
.send_thread_start_request(ThreadStartParams {
|
||||
model: Some("gpt-5-codex".to_string()),
|
||||
model: Some("gpt-5.1-codex".to_string()),
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
@@ -68,7 +68,7 @@ async fn thread_resume_prefers_path_over_thread_id() -> Result<()> {
|
||||
|
||||
let start_id = mcp
|
||||
.send_thread_start_request(ThreadStartParams {
|
||||
model: Some("gpt-5-codex".to_string()),
|
||||
model: Some("gpt-5.1-codex".to_string()),
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
@@ -112,7 +112,7 @@ async fn thread_resume_supports_history_and_overrides() -> Result<()> {
|
||||
// Start a thread.
|
||||
let start_id = mcp
|
||||
.send_thread_start_request(ThreadStartParams {
|
||||
model: Some("gpt-5-codex".to_string()),
|
||||
model: Some("gpt-5.1-codex".to_string()),
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
|
||||
@@ -29,7 +29,7 @@ async fn thread_start_creates_thread_and_emits_started() -> Result<()> {
|
||||
// Start a v2 thread with an explicit model override.
|
||||
let req_id = mcp
|
||||
.send_thread_start_request(ThreadStartParams {
|
||||
model: Some("gpt-5".to_string()),
|
||||
model: Some("gpt-5.1".to_string()),
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
|
||||
@@ -5,10 +5,13 @@ use app_test_support::create_mock_chat_completions_server;
|
||||
use app_test_support::create_mock_chat_completions_server_unchecked;
|
||||
use app_test_support::create_shell_sse_response;
|
||||
use app_test_support::to_response;
|
||||
use codex_app_server_protocol::CommandExecutionStatus;
|
||||
use codex_app_server_protocol::ItemStartedNotification;
|
||||
use codex_app_server_protocol::JSONRPCNotification;
|
||||
use codex_app_server_protocol::JSONRPCResponse;
|
||||
use codex_app_server_protocol::RequestId;
|
||||
use codex_app_server_protocol::ServerRequest;
|
||||
use codex_app_server_protocol::ThreadItem;
|
||||
use codex_app_server_protocol::ThreadStartParams;
|
||||
use codex_app_server_protocol::ThreadStartResponse;
|
||||
use codex_app_server_protocol::TurnStartParams;
|
||||
@@ -17,9 +20,6 @@ use codex_app_server_protocol::TurnStartedNotification;
|
||||
use codex_app_server_protocol::UserInput as V2UserInput;
|
||||
use codex_core::protocol_config_types::ReasoningEffort;
|
||||
use codex_core::protocol_config_types::ReasoningSummary;
|
||||
use codex_protocol::parse_command::ParsedCommand;
|
||||
use codex_protocol::protocol::Event;
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use pretty_assertions::assert_eq;
|
||||
use std::path::Path;
|
||||
@@ -235,7 +235,7 @@ async fn turn_start_exec_approval_toggle_v2() -> Result<()> {
|
||||
.await??;
|
||||
let ThreadStartResponse { thread } = to_response::<ThreadStartResponse>(start_resp)?;
|
||||
|
||||
// turn/start — expect ExecCommandApproval request from server
|
||||
// turn/start — expect CommandExecutionRequestApproval request from server
|
||||
let first_turn_id = mcp
|
||||
.send_turn_start_request(TurnStartParams {
|
||||
thread_id: thread.id.clone(),
|
||||
@@ -258,16 +258,10 @@ async fn turn_start_exec_approval_toggle_v2() -> Result<()> {
|
||||
mcp.read_stream_until_request_message(),
|
||||
)
|
||||
.await??;
|
||||
let ServerRequest::ExecCommandApproval { request_id, params } = server_req else {
|
||||
panic!("expected ExecCommandApproval request");
|
||||
let ServerRequest::CommandExecutionRequestApproval { request_id, params } = server_req else {
|
||||
panic!("expected CommandExecutionRequestApproval request");
|
||||
};
|
||||
assert_eq!(params.call_id, "call1");
|
||||
assert_eq!(
|
||||
params.parsed_cmd,
|
||||
vec![ParsedCommand::Unknown {
|
||||
cmd: "python3 -c 'print(42)'".to_string()
|
||||
}]
|
||||
);
|
||||
assert_eq!(params.item_id, "call1");
|
||||
|
||||
// Approve and wait for task completion
|
||||
mcp.send_response(
|
||||
@@ -302,7 +296,7 @@ async fn turn_start_exec_approval_toggle_v2() -> Result<()> {
|
||||
)
|
||||
.await??;
|
||||
|
||||
// Ensure we do NOT receive an ExecCommandApproval request before task completes
|
||||
// Ensure we do NOT receive a CommandExecutionRequestApproval request before task completes
|
||||
timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp.read_stream_until_notification_message("codex/event/task_complete"),
|
||||
@@ -314,8 +308,6 @@ async fn turn_start_exec_approval_toggle_v2() -> Result<()> {
|
||||
|
||||
#[tokio::test]
|
||||
async fn turn_start_updates_sandbox_and_cwd_between_turns_v2() -> Result<()> {
|
||||
// When returning Result from a test, pass an Ok(()) to the skip macro
|
||||
// so the early return type matches. The no-arg form returns unit.
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let tmp = TempDir::new()?;
|
||||
@@ -424,29 +416,35 @@ async fn turn_start_updates_sandbox_and_cwd_between_turns_v2() -> Result<()> {
|
||||
)
|
||||
.await??;
|
||||
|
||||
let exec_begin_notification = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp.read_stream_until_notification_message("codex/event/exec_command_begin"),
|
||||
)
|
||||
let command_exec_item = timeout(DEFAULT_READ_TIMEOUT, async {
|
||||
loop {
|
||||
let item_started_notification = mcp
|
||||
.read_stream_until_notification_message("item/started")
|
||||
.await?;
|
||||
let params = item_started_notification
|
||||
.params
|
||||
.clone()
|
||||
.expect("item/started params");
|
||||
let item_started: ItemStartedNotification =
|
||||
serde_json::from_value(params).expect("deserialize item/started notification");
|
||||
if matches!(item_started.item, ThreadItem::CommandExecution { .. }) {
|
||||
return Ok::<ThreadItem, anyhow::Error>(item_started.item);
|
||||
}
|
||||
}
|
||||
})
|
||||
.await??;
|
||||
let params = exec_begin_notification
|
||||
.params
|
||||
.clone()
|
||||
.expect("exec_command_begin params");
|
||||
let event: Event = serde_json::from_value(params).expect("deserialize exec begin event");
|
||||
let exec_begin = match event.msg {
|
||||
EventMsg::ExecCommandBegin(exec_begin) => exec_begin,
|
||||
other => panic!("expected ExecCommandBegin event, got {other:?}"),
|
||||
let ThreadItem::CommandExecution {
|
||||
cwd,
|
||||
command,
|
||||
status,
|
||||
..
|
||||
} = command_exec_item
|
||||
else {
|
||||
unreachable!("loop ensures we break on command execution items");
|
||||
};
|
||||
assert_eq!(exec_begin.cwd, second_cwd);
|
||||
assert_eq!(
|
||||
exec_begin.command,
|
||||
vec![
|
||||
"bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
"echo second turn".to_string()
|
||||
]
|
||||
);
|
||||
assert_eq!(cwd, second_cwd);
|
||||
assert_eq!(command, "bash -lc 'echo second turn'");
|
||||
assert_eq!(status, CommandExecutionStatus::InProgress);
|
||||
|
||||
timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
|
||||
@@ -155,11 +155,11 @@ async fn run_command_under_sandbox(
|
||||
run_windows_sandbox_capture(
|
||||
policy_str,
|
||||
&sandbox_cwd,
|
||||
base_dir.as_path(),
|
||||
command_vec,
|
||||
&cwd_clone,
|
||||
env_map,
|
||||
None,
|
||||
Some(base_dir.as_path()),
|
||||
)
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -761,9 +761,9 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn resume_model_flag_applies_when_no_root_flags() {
|
||||
let interactive = finalize_from_args(["codex", "resume", "-m", "gpt-5-test"].as_ref());
|
||||
let interactive = finalize_from_args(["codex", "resume", "-m", "gpt-5.1-test"].as_ref());
|
||||
|
||||
assert_eq!(interactive.model.as_deref(), Some("gpt-5-test"));
|
||||
assert_eq!(interactive.model.as_deref(), Some("gpt-5.1-test"));
|
||||
assert!(interactive.resume_picker);
|
||||
assert!(!interactive.resume_last);
|
||||
assert_eq!(interactive.resume_session_id, None);
|
||||
@@ -808,7 +808,7 @@ mod tests {
|
||||
"--ask-for-approval",
|
||||
"on-request",
|
||||
"-m",
|
||||
"gpt-5-test",
|
||||
"gpt-5.1-test",
|
||||
"-p",
|
||||
"my-profile",
|
||||
"-C",
|
||||
@@ -819,7 +819,7 @@ mod tests {
|
||||
.as_ref(),
|
||||
);
|
||||
|
||||
assert_eq!(interactive.model.as_deref(), Some("gpt-5-test"));
|
||||
assert_eq!(interactive.model.as_deref(), Some("gpt-5.1-test"));
|
||||
assert!(interactive.oss);
|
||||
assert_eq!(interactive.config_profile.as_deref(), Some("my-profile"));
|
||||
assert_matches!(
|
||||
|
||||
@@ -10,6 +10,8 @@ workspace = true
|
||||
clap = { workspace = true, features = ["derive", "wrap_help"], optional = true }
|
||||
codex-app-server-protocol = { workspace = true }
|
||||
codex-core = { workspace = true }
|
||||
codex-lmstudio = { workspace = true }
|
||||
codex-ollama = { workspace = true }
|
||||
codex-protocol = { workspace = true }
|
||||
once_cell = { workspace = true }
|
||||
serde = { workspace = true, optional = true }
|
||||
|
||||
@@ -37,3 +37,5 @@ pub mod model_presets;
|
||||
// Shared approval presets (AskForApproval + Sandbox) used by TUI and MCP server
|
||||
// Not to be confused with AskForApproval, which we should probably rename to EscalationPolicy.
|
||||
pub mod approval_presets;
|
||||
// Shared OSS provider utilities used by TUI and exec
|
||||
pub mod oss;
|
||||
|
||||
60
codex-rs/common/src/oss.rs
Normal file
60
codex-rs/common/src/oss.rs
Normal file
@@ -0,0 +1,60 @@
|
||||
//! OSS provider utilities shared between TUI and exec.
|
||||
|
||||
use codex_core::LMSTUDIO_OSS_PROVIDER_ID;
|
||||
use codex_core::OLLAMA_OSS_PROVIDER_ID;
|
||||
use codex_core::config::Config;
|
||||
|
||||
/// Returns the default model for a given OSS provider.
|
||||
pub fn get_default_model_for_oss_provider(provider_id: &str) -> Option<&'static str> {
|
||||
match provider_id {
|
||||
LMSTUDIO_OSS_PROVIDER_ID => Some(codex_lmstudio::DEFAULT_OSS_MODEL),
|
||||
OLLAMA_OSS_PROVIDER_ID => Some(codex_ollama::DEFAULT_OSS_MODEL),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Ensures the specified OSS provider is ready (models downloaded, service reachable).
|
||||
pub async fn ensure_oss_provider_ready(
|
||||
provider_id: &str,
|
||||
config: &Config,
|
||||
) -> Result<(), std::io::Error> {
|
||||
match provider_id {
|
||||
LMSTUDIO_OSS_PROVIDER_ID => {
|
||||
codex_lmstudio::ensure_oss_ready(config)
|
||||
.await
|
||||
.map_err(|e| std::io::Error::other(format!("OSS setup failed: {e}")))?;
|
||||
}
|
||||
OLLAMA_OSS_PROVIDER_ID => {
|
||||
codex_ollama::ensure_oss_ready(config)
|
||||
.await
|
||||
.map_err(|e| std::io::Error::other(format!("OSS setup failed: {e}")))?;
|
||||
}
|
||||
_ => {
|
||||
// Unknown provider, skip setup
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_get_default_model_for_provider_lmstudio() {
|
||||
let result = get_default_model_for_oss_provider(LMSTUDIO_OSS_PROVIDER_ID);
|
||||
assert_eq!(result, Some(codex_lmstudio::DEFAULT_OSS_MODEL));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_default_model_for_provider_ollama() {
|
||||
let result = get_default_model_for_oss_provider(OLLAMA_OSS_PROVIDER_ID);
|
||||
assert_eq!(result, Some(codex_ollama::DEFAULT_OSS_MODEL));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_default_model_for_provider_unknown() {
|
||||
let result = get_default_model_for_oss_provider("unknown-provider");
|
||||
assert_eq!(result, None);
|
||||
}
|
||||
}
|
||||
@@ -22,6 +22,7 @@ chrono = { workspace = true, features = ["serde"] }
|
||||
codex-app-server-protocol = { workspace = true }
|
||||
codex-apply-patch = { workspace = true }
|
||||
codex-async-utils = { workspace = true }
|
||||
codex-execpolicy2 = { workspace = true }
|
||||
codex-file-search = { workspace = true }
|
||||
codex-git = { workspace = true }
|
||||
codex-keyring-store = { workspace = true }
|
||||
@@ -40,12 +41,7 @@ eventsource-stream = { workspace = true }
|
||||
futures = { workspace = true }
|
||||
http = { workspace = true }
|
||||
indexmap = { workspace = true }
|
||||
keyring = { workspace = true, features = [
|
||||
"apple-native",
|
||||
"crypto-rust",
|
||||
"linux-native-async-persistent",
|
||||
"windows-native",
|
||||
] }
|
||||
keyring = { workspace = true, features = ["crypto-rust"] }
|
||||
libc = { workspace = true }
|
||||
mcp-types = { workspace = true }
|
||||
os_info = { workspace = true }
|
||||
@@ -90,9 +86,11 @@ wildmatch = { workspace = true }
|
||||
[target.'cfg(target_os = "linux")'.dependencies]
|
||||
landlock = { workspace = true }
|
||||
seccompiler = { workspace = true }
|
||||
keyring = { workspace = true, features = ["linux-native-async-persistent"] }
|
||||
|
||||
[target.'cfg(target_os = "macos")'.dependencies]
|
||||
core-foundation = "0.9"
|
||||
keyring = { workspace = true, features = ["apple-native"] }
|
||||
|
||||
# Build OpenSSL from source for musl builds.
|
||||
[target.x86_64-unknown-linux-musl.dependencies]
|
||||
@@ -102,6 +100,12 @@ openssl-sys = { workspace = true, features = ["vendored"] }
|
||||
[target.aarch64-unknown-linux-musl.dependencies]
|
||||
openssl-sys = { workspace = true, features = ["vendored"] }
|
||||
|
||||
[target.'cfg(target_os = "windows")'.dependencies]
|
||||
keyring = { workspace = true, features = ["windows-native"] }
|
||||
|
||||
[target.'cfg(any(target_os = "freebsd", target_os = "openbsd"))'.dependencies]
|
||||
keyring = { workspace = true, features = ["sync-secret-service"] }
|
||||
|
||||
[dev-dependencies]
|
||||
assert_cmd = { workspace = true }
|
||||
assert_matches = { workspace = true }
|
||||
|
||||
@@ -318,8 +318,6 @@ For casual greetings, acknowledgements, or other one-off conversational messages
|
||||
|
||||
When using the shell, you must adhere to the following guidelines:
|
||||
|
||||
- The arguments to `shell` will be passed to execvp().
|
||||
- Always set the `workdir` param when using the shell function. Do not use `cd` unless absolutely necessary.
|
||||
- When searching for text or files, prefer using `rg` or `rg --files` respectively because `rg` is much faster than alternatives like `grep`. (If the `rg` command is not found, then use alternatives.)
|
||||
- Read files in chunks with a max chunk size of 250 lines. Do not use python scripts to attempt to output larger chunks of a file. Command line output will be truncated after 10 kilobytes or 256 lines of output, regardless of the command used.
|
||||
|
||||
|
||||
@@ -2,8 +2,6 @@ You are Codex, based on GPT-5. You are running as a coding agent in the Codex CL
|
||||
|
||||
## General
|
||||
|
||||
- The arguments to `shell` will be passed to execvp(). Most terminal commands should be prefixed with ["bash", "-lc"].
|
||||
- Always set the `workdir` param when using the shell function. Do not use `cd` unless absolutely necessary.
|
||||
- When searching for text or files, prefer using `rg` or `rg --files` respectively because `rg` is much faster than alternatives like `grep`. (If the `rg` command is not found, then use alternatives.)
|
||||
|
||||
## Editing constraints
|
||||
|
||||
@@ -673,7 +673,9 @@ async fn process_chat_sse<S>(
|
||||
}
|
||||
|
||||
// Emit end-of-turn when finish_reason signals completion.
|
||||
if let Some(finish_reason) = choice.get("finish_reason").and_then(|v| v.as_str()) {
|
||||
if let Some(finish_reason) = choice.get("finish_reason").and_then(|v| v.as_str())
|
||||
&& !finish_reason.is_empty()
|
||||
{
|
||||
match finish_reason {
|
||||
"tool_calls" if fn_call_state.active => {
|
||||
// First, flush the terminal raw reasoning so UIs can finalize
|
||||
|
||||
@@ -1225,7 +1225,7 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn error_when_error_event() {
|
||||
let raw_error = r#"{"type":"response.failed","sequence_number":3,"response":{"id":"resp_689bcf18d7f08194bf3440ba62fe05d803fee0cdac429894","object":"response","created_at":1755041560,"status":"failed","background":false,"error":{"code":"rate_limit_exceeded","message":"Rate limit reached for gpt-5 in organization org-AAA on tokens per min (TPM): Limit 30000, Used 22999, Requested 12528. Please try again in 11.054s. Visit https://platform.openai.com/account/rate-limits to learn more."}, "usage":null,"user":null,"metadata":{}}}"#;
|
||||
let raw_error = r#"{"type":"response.failed","sequence_number":3,"response":{"id":"resp_689bcf18d7f08194bf3440ba62fe05d803fee0cdac429894","object":"response","created_at":1755041560,"status":"failed","background":false,"error":{"code":"rate_limit_exceeded","message":"Rate limit reached for gpt-5.1 in organization org-AAA on tokens per min (TPM): Limit 30000, Used 22999, Requested 12528. Please try again in 11.054s. Visit https://platform.openai.com/account/rate-limits to learn more."}, "usage":null,"user":null,"metadata":{}}}"#;
|
||||
|
||||
let sse1 = format!("event: response.failed\ndata: {raw_error}\n\n");
|
||||
let provider = ModelProviderInfo {
|
||||
@@ -1254,7 +1254,7 @@ mod tests {
|
||||
Err(CodexErr::Stream(msg, delay)) => {
|
||||
assert_eq!(
|
||||
msg,
|
||||
"Rate limit reached for gpt-5 in organization org-AAA on tokens per min (TPM): Limit 30000, Used 22999, Requested 12528. Please try again in 11.054s. Visit https://platform.openai.com/account/rate-limits to learn more."
|
||||
"Rate limit reached for gpt-5.1 in organization org-AAA on tokens per min (TPM): Limit 30000, Used 22999, Requested 12528. Please try again in 11.054s. Visit https://platform.openai.com/account/rate-limits to learn more."
|
||||
);
|
||||
assert_eq!(*delay, Some(Duration::from_secs_f64(11.054)));
|
||||
}
|
||||
@@ -1473,7 +1473,7 @@ mod tests {
|
||||
fn test_try_parse_retry_after() {
|
||||
let err = Error {
|
||||
r#type: None,
|
||||
message: Some("Rate limit reached for gpt-5 in organization org- on tokens per min (TPM): Limit 1, Used 1, Requested 19304. Please try again in 28ms. Visit https://platform.openai.com/account/rate-limits to learn more.".to_string()),
|
||||
message: Some("Rate limit reached for gpt-5.1 in organization org- on tokens per min (TPM): Limit 1, Used 1, Requested 19304. Please try again in 28ms. Visit https://platform.openai.com/account/rate-limits to learn more.".to_string()),
|
||||
code: Some("rate_limit_exceeded".to_string()),
|
||||
plan_type: None,
|
||||
resets_at: None
|
||||
@@ -1487,7 +1487,7 @@ mod tests {
|
||||
fn test_try_parse_retry_after_no_delay() {
|
||||
let err = Error {
|
||||
r#type: None,
|
||||
message: Some("Rate limit reached for gpt-5 in organization <ORG> on tokens per min (TPM): Limit 30000, Used 6899, Requested 24050. Please try again in 1.898s. Visit https://platform.openai.com/account/rate-limits to learn more.".to_string()),
|
||||
message: Some("Rate limit reached for gpt-5.1 in organization <ORG> on tokens per min (TPM): Limit 30000, Used 6899, Requested 24050. Please try again in 1.898s. Visit https://platform.openai.com/account/rate-limits to learn more.".to_string()),
|
||||
code: Some("rate_limit_exceeded".to_string()),
|
||||
plan_type: None,
|
||||
resets_at: None
|
||||
|
||||
@@ -136,7 +136,7 @@ fn reserialize_shell_outputs(items: &mut [ResponseItem]) {
|
||||
}
|
||||
|
||||
fn is_shell_tool_name(name: &str) -> bool {
|
||||
matches!(name, "shell" | "container.exec")
|
||||
matches!(name, "shell" | "container.exec" | "shell_command")
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
@@ -427,7 +427,7 @@ mod tests {
|
||||
expects_apply_patch_instructions: false,
|
||||
},
|
||||
InstructionsTestCase {
|
||||
slug: "gpt-5-codex",
|
||||
slug: "gpt-5.1-codex",
|
||||
expects_apply_patch_instructions: false,
|
||||
},
|
||||
InstructionsTestCase {
|
||||
@@ -457,7 +457,7 @@ mod tests {
|
||||
let input: Vec<ResponseItem> = vec![];
|
||||
let tools: Vec<serde_json::Value> = vec![];
|
||||
let req = ResponsesApiRequest {
|
||||
model: "gpt-5",
|
||||
model: "gpt-5.1",
|
||||
instructions: "i",
|
||||
input: &input,
|
||||
tools: &tools,
|
||||
@@ -498,7 +498,7 @@ mod tests {
|
||||
create_text_param_for_request(None, &Some(schema.clone())).expect("text controls");
|
||||
|
||||
let req = ResponsesApiRequest {
|
||||
model: "gpt-5",
|
||||
model: "gpt-5.1",
|
||||
instructions: "i",
|
||||
input: &input,
|
||||
tools: &tools,
|
||||
@@ -534,7 +534,7 @@ mod tests {
|
||||
let input: Vec<ResponseItem> = vec![];
|
||||
let tools: Vec<serde_json::Value> = vec![];
|
||||
let req = ResponsesApiRequest {
|
||||
model: "gpt-5",
|
||||
model: "gpt-5.1",
|
||||
instructions: "i",
|
||||
input: &input,
|
||||
tools: &tools,
|
||||
|
||||
@@ -9,8 +9,6 @@ use crate::client_common::REVIEW_PROMPT;
|
||||
use crate::compact;
|
||||
use crate::features::Feature;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::mcp::auth::McpAuthStatusEntry;
|
||||
use crate::mcp_connection_manager::DEFAULT_STARTUP_TIMEOUT;
|
||||
use crate::parse_command::parse_command;
|
||||
use crate::parse_turn_item;
|
||||
use crate::response_processing::process_items;
|
||||
@@ -45,6 +43,7 @@ use mcp_types::ReadResourceResult;
|
||||
use serde_json;
|
||||
use serde_json::Value;
|
||||
use tokio::sync::Mutex;
|
||||
use tokio::sync::RwLock;
|
||||
use tokio::sync::oneshot;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::debug;
|
||||
@@ -57,7 +56,6 @@ use crate::client::ModelClient;
|
||||
use crate::client_common::Prompt;
|
||||
use crate::client_common::ResponseEvent;
|
||||
use crate::config::Config;
|
||||
use crate::config::types::McpServerTransportConfig;
|
||||
use crate::config::types::ShellEnvironmentPolicy;
|
||||
use crate::context_manager::ContextManager;
|
||||
use crate::environment_context::EnvironmentContext;
|
||||
@@ -122,6 +120,7 @@ use crate::user_instructions::UserInstructions;
|
||||
use crate::user_notification::UserNotification;
|
||||
use crate::util::backoff;
|
||||
use codex_async_utils::OrCancelExt;
|
||||
use codex_execpolicy2::Policy as ExecPolicyV2;
|
||||
use codex_otel::otel_event_manager::OtelEventManager;
|
||||
use codex_protocol::config_types::ReasoningEffort as ReasoningEffortConfig;
|
||||
use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
|
||||
@@ -167,6 +166,10 @@ impl Codex {
|
||||
|
||||
let user_instructions = get_user_instructions(&config).await;
|
||||
|
||||
let exec_policy_v2 =
|
||||
crate::exec_policy::exec_policy_for(&config.features, &config.codex_home)
|
||||
.map_err(|err| CodexErr::Fatal(format!("failed to load execpolicy2: {err}")))?;
|
||||
|
||||
let config = Arc::new(config);
|
||||
|
||||
let session_configuration = SessionConfiguration {
|
||||
@@ -183,6 +186,7 @@ impl Codex {
|
||||
cwd: config.cwd.clone(),
|
||||
original_config_do_not_use: Arc::clone(&config),
|
||||
features: config.features.clone(),
|
||||
exec_policy_v2,
|
||||
session_source,
|
||||
};
|
||||
|
||||
@@ -280,6 +284,7 @@ pub(crate) struct TurnContext {
|
||||
pub(crate) final_output_json_schema: Option<Value>,
|
||||
pub(crate) codex_linux_sandbox_exe: Option<PathBuf>,
|
||||
pub(crate) tool_call_gate: Arc<ReadinessFlag>,
|
||||
pub(crate) exec_policy_v2: Option<Arc<ExecPolicyV2>>,
|
||||
}
|
||||
|
||||
impl TurnContext {
|
||||
@@ -336,6 +341,8 @@ pub(crate) struct SessionConfiguration {
|
||||
|
||||
/// Set of feature flags for this session
|
||||
features: Features,
|
||||
/// Optional execpolicy2 policy, applied only when enabled by feature flag.
|
||||
exec_policy_v2: Option<Arc<ExecPolicyV2>>,
|
||||
|
||||
// TODO(pakrym): Remove config from here
|
||||
original_config_do_not_use: Arc<Config>,
|
||||
@@ -436,6 +443,7 @@ impl Session {
|
||||
final_output_json_schema: None,
|
||||
codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(),
|
||||
tool_call_gate: Arc::new(ReadinessFlag::new()),
|
||||
exec_policy_v2: session_configuration.exec_policy_v2.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -476,21 +484,13 @@ impl Session {
|
||||
),
|
||||
};
|
||||
|
||||
// Error messages to dispatch after SessionConfigured is sent.
|
||||
let mut post_session_configured_events = Vec::<Event>::new();
|
||||
|
||||
// Kick off independent async setup tasks in parallel to reduce startup latency.
|
||||
//
|
||||
// - initialize RolloutRecorder with new or resumed session info
|
||||
// - spin up MCP connection manager
|
||||
// - perform default shell discovery
|
||||
// - load history metadata
|
||||
let rollout_fut = RolloutRecorder::new(&config, rollout_params);
|
||||
|
||||
let mcp_fut = McpConnectionManager::new(
|
||||
config.mcp_servers.clone(),
|
||||
config.mcp_oauth_credentials_store_mode,
|
||||
);
|
||||
let default_shell_fut = shell::default_user_shell();
|
||||
let history_meta_fut = crate::message_history::history_metadata(&config);
|
||||
let auth_statuses_fut = compute_auth_statuses(
|
||||
@@ -499,15 +499,8 @@ impl Session {
|
||||
);
|
||||
|
||||
// Join all independent futures.
|
||||
let (
|
||||
rollout_recorder,
|
||||
mcp_res,
|
||||
default_shell,
|
||||
(history_log_id, history_entry_count),
|
||||
auth_statuses,
|
||||
) = tokio::join!(
|
||||
let (rollout_recorder, default_shell, (history_log_id, history_entry_count), auth_statuses) = tokio::join!(
|
||||
rollout_fut,
|
||||
mcp_fut,
|
||||
default_shell_fut,
|
||||
history_meta_fut,
|
||||
auth_statuses_fut
|
||||
@@ -519,34 +512,7 @@ impl Session {
|
||||
})?;
|
||||
let rollout_path = rollout_recorder.rollout_path.clone();
|
||||
|
||||
// Handle MCP manager result and record any startup failures.
|
||||
let (mcp_connection_manager, failed_clients) = match mcp_res {
|
||||
Ok((mgr, failures)) => (mgr, failures),
|
||||
Err(e) => {
|
||||
let message = format!("Failed to create MCP connection manager: {e:#}");
|
||||
error!("{message}");
|
||||
post_session_configured_events.push(Event {
|
||||
id: INITIAL_SUBMIT_ID.to_owned(),
|
||||
msg: EventMsg::Error(ErrorEvent { message }),
|
||||
});
|
||||
(McpConnectionManager::default(), Default::default())
|
||||
}
|
||||
};
|
||||
|
||||
// Surface individual client start-up failures to the user.
|
||||
if !failed_clients.is_empty() {
|
||||
for (server_name, err) in failed_clients {
|
||||
let auth_entry = auth_statuses.get(&server_name);
|
||||
let display_message = mcp_init_error_display(&server_name, auth_entry, &err);
|
||||
warn!("MCP client for `{server_name}` failed to start: {err:#}");
|
||||
post_session_configured_events.push(Event {
|
||||
id: INITIAL_SUBMIT_ID.to_owned(),
|
||||
msg: EventMsg::Error(ErrorEvent {
|
||||
message: display_message,
|
||||
}),
|
||||
});
|
||||
}
|
||||
}
|
||||
let mut post_session_configured_events = Vec::<Event>::new();
|
||||
|
||||
for (alias, feature) in session_configuration.features.legacy_feature_usages() {
|
||||
let canonical = feature.key();
|
||||
@@ -595,7 +561,8 @@ impl Session {
|
||||
warm_model_cache(&session_configuration.model);
|
||||
|
||||
let services = SessionServices {
|
||||
mcp_connection_manager,
|
||||
mcp_connection_manager: Arc::new(RwLock::new(McpConnectionManager::default())),
|
||||
mcp_startup_cancellation_token: CancellationToken::new(),
|
||||
unified_exec_manager: UnifiedExecSessionManager::default(),
|
||||
notifier: UserNotifier::new(config.notify.clone()),
|
||||
rollout: Mutex::new(Some(rollout_recorder)),
|
||||
@@ -635,6 +602,18 @@ impl Session {
|
||||
for event in events {
|
||||
sess.send_event_raw(event).await;
|
||||
}
|
||||
sess.services
|
||||
.mcp_connection_manager
|
||||
.write()
|
||||
.await
|
||||
.initialize(
|
||||
config.mcp_servers.clone(),
|
||||
config.mcp_oauth_credentials_store_mode,
|
||||
auth_statuses.clone(),
|
||||
tx_event.clone(),
|
||||
sess.services.mcp_startup_cancellation_token.clone(),
|
||||
)
|
||||
.await;
|
||||
|
||||
// record_initial_history can emit events. We record only after the SessionConfiguredEvent is emitted.
|
||||
sess.record_initial_history(initial_history).await;
|
||||
@@ -894,6 +873,7 @@ impl Session {
|
||||
let parsed_cmd = parse_command(&command);
|
||||
let event = EventMsg::ExecApprovalRequest(ExecApprovalRequestEvent {
|
||||
call_id,
|
||||
turn_id: turn_context.sub_id.clone(),
|
||||
command,
|
||||
cwd,
|
||||
reason,
|
||||
@@ -1258,6 +1238,8 @@ impl Session {
|
||||
) -> anyhow::Result<ListResourcesResult> {
|
||||
self.services
|
||||
.mcp_connection_manager
|
||||
.read()
|
||||
.await
|
||||
.list_resources(server, params)
|
||||
.await
|
||||
}
|
||||
@@ -1269,6 +1251,8 @@ impl Session {
|
||||
) -> anyhow::Result<ListResourceTemplatesResult> {
|
||||
self.services
|
||||
.mcp_connection_manager
|
||||
.read()
|
||||
.await
|
||||
.list_resource_templates(server, params)
|
||||
.await
|
||||
}
|
||||
@@ -1280,6 +1264,8 @@ impl Session {
|
||||
) -> anyhow::Result<ReadResourceResult> {
|
||||
self.services
|
||||
.mcp_connection_manager
|
||||
.read()
|
||||
.await
|
||||
.read_resource(server, params)
|
||||
.await
|
||||
}
|
||||
@@ -1292,19 +1278,29 @@ impl Session {
|
||||
) -> anyhow::Result<CallToolResult> {
|
||||
self.services
|
||||
.mcp_connection_manager
|
||||
.read()
|
||||
.await
|
||||
.call_tool(server, tool, arguments)
|
||||
.await
|
||||
}
|
||||
|
||||
pub(crate) fn parse_mcp_tool_name(&self, tool_name: &str) -> Option<(String, String)> {
|
||||
pub(crate) async fn parse_mcp_tool_name(&self, tool_name: &str) -> Option<(String, String)> {
|
||||
self.services
|
||||
.mcp_connection_manager
|
||||
.read()
|
||||
.await
|
||||
.parse_tool_name(tool_name)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn interrupt_task(self: &Arc<Self>) {
|
||||
info!("interrupt received: abort current task, if any");
|
||||
self.abort_all_tasks(TurnAbortReason::Interrupted).await;
|
||||
let has_active_turn = { self.active_turn.lock().await.is_some() };
|
||||
if has_active_turn {
|
||||
self.abort_all_tasks(TurnAbortReason::Interrupted).await;
|
||||
} else {
|
||||
self.cancel_mcp_startup().await;
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn notifier(&self) -> &UserNotifier {
|
||||
@@ -1318,6 +1314,10 @@ impl Session {
|
||||
fn show_raw_agent_reasoning(&self) -> bool {
|
||||
self.services.show_raw_agent_reasoning
|
||||
}
|
||||
|
||||
async fn cancel_mcp_startup(&self) {
|
||||
self.services.mcp_startup_cancellation_token.cancel();
|
||||
}
|
||||
}
|
||||
|
||||
async fn submission_loop(sess: Arc<Session>, config: Arc<Config>, rx_sub: Receiver<Submission>) {
|
||||
@@ -1575,17 +1575,15 @@ mod handlers {
|
||||
}
|
||||
|
||||
pub async fn list_mcp_tools(sess: &Session, config: &Arc<Config>, sub_id: String) {
|
||||
// This is a cheap lookup from the connection manager's cache.
|
||||
let tools = sess.services.mcp_connection_manager.list_all_tools();
|
||||
let (auth_status_entries, resources, resource_templates) = tokio::join!(
|
||||
let mcp_connection_manager = sess.services.mcp_connection_manager.read().await;
|
||||
let (tools, auth_status_entries, resources, resource_templates) = tokio::join!(
|
||||
mcp_connection_manager.list_all_tools(),
|
||||
compute_auth_statuses(
|
||||
config.mcp_servers.iter(),
|
||||
config.mcp_oauth_credentials_store_mode,
|
||||
),
|
||||
sess.services.mcp_connection_manager.list_all_resources(),
|
||||
sess.services
|
||||
.mcp_connection_manager
|
||||
.list_all_resource_templates()
|
||||
mcp_connection_manager.list_all_resources(),
|
||||
mcp_connection_manager.list_all_resource_templates(),
|
||||
);
|
||||
let auth_statuses = auth_status_entries
|
||||
.iter()
|
||||
@@ -1594,7 +1592,10 @@ mod handlers {
|
||||
let event = Event {
|
||||
id: sub_id,
|
||||
msg: EventMsg::McpListToolsResponse(crate::protocol::McpListToolsResponseEvent {
|
||||
tools,
|
||||
tools: tools
|
||||
.into_iter()
|
||||
.map(|(name, tool)| (name, tool.tool))
|
||||
.collect(),
|
||||
resources,
|
||||
resource_templates,
|
||||
auth_statuses,
|
||||
@@ -1767,6 +1768,7 @@ async fn spawn_review_thread(
|
||||
final_output_json_schema: None,
|
||||
codex_linux_sandbox_exe: parent_turn_context.codex_linux_sandbox_exe.clone(),
|
||||
tool_call_gate: Arc::new(ReadinessFlag::new()),
|
||||
exec_policy_v2: parent_turn_context.exec_policy_v2.clone(),
|
||||
};
|
||||
|
||||
// Seed the child task with the review prompt as the initial user message.
|
||||
@@ -1819,7 +1821,6 @@ pub(crate) async fn run_task(
|
||||
// Although from the perspective of codex.rs, TurnDiffTracker has the lifecycle of a Task which contains
|
||||
// many turns, from the perspective of the user, it is a single turn.
|
||||
let turn_diff_tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
|
||||
let mut auto_compact_recently_attempted = false;
|
||||
|
||||
loop {
|
||||
// Note that pending_input would be something like a message the user
|
||||
@@ -1874,27 +1875,12 @@ pub(crate) async fn run_task(
|
||||
let (responses, items_to_record_in_conversation_history) =
|
||||
process_items(processed_items, &sess, &turn_context).await;
|
||||
|
||||
// as long as compaction works well in getting us way below the token limit, we shouldn't worry about being in an infinite loop.
|
||||
if token_limit_reached {
|
||||
if auto_compact_recently_attempted {
|
||||
let limit_str = limit.to_string();
|
||||
let current_tokens = total_usage_tokens
|
||||
.map(|tokens| tokens.to_string())
|
||||
.unwrap_or_else(|| "unknown".to_string());
|
||||
let event = EventMsg::Error(ErrorEvent {
|
||||
message: format!(
|
||||
"Conversation is still above the token limit after automatic summarization (limit {limit_str}, current {current_tokens}). Please start a new session or trim your input."
|
||||
),
|
||||
});
|
||||
sess.send_event(&turn_context, event).await;
|
||||
break;
|
||||
}
|
||||
auto_compact_recently_attempted = true;
|
||||
compact::run_inline_auto_compact_task(sess.clone(), turn_context.clone()).await;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto_compact_recently_attempted = false;
|
||||
|
||||
if responses.is_empty() {
|
||||
last_agent_message = get_last_assistant_message_from_turn(
|
||||
&items_to_record_in_conversation_history,
|
||||
@@ -1940,10 +1926,22 @@ async fn run_turn(
|
||||
input: Vec<ResponseItem>,
|
||||
cancellation_token: CancellationToken,
|
||||
) -> CodexResult<TurnRunResult> {
|
||||
let mcp_tools = sess.services.mcp_connection_manager.list_all_tools();
|
||||
let mcp_tools = sess
|
||||
.services
|
||||
.mcp_connection_manager
|
||||
.read()
|
||||
.await
|
||||
.list_all_tools()
|
||||
.or_cancel(&cancellation_token)
|
||||
.await?;
|
||||
let router = Arc::new(ToolRouter::from_config(
|
||||
&turn_context.tools_config,
|
||||
Some(mcp_tools),
|
||||
Some(
|
||||
mcp_tools
|
||||
.into_iter()
|
||||
.map(|(name, tool)| (name, tool.tool))
|
||||
.collect(),
|
||||
),
|
||||
));
|
||||
|
||||
let model_supports_parallel = turn_context
|
||||
@@ -2112,7 +2110,7 @@ async fn try_run_turn(
|
||||
ResponseEvent::Created => {}
|
||||
ResponseEvent::OutputItemDone(item) => {
|
||||
let previously_active_item = active_item.take();
|
||||
match ToolRouter::build_tool_call(sess.as_ref(), item.clone()) {
|
||||
match ToolRouter::build_tool_call(sess.as_ref(), item.clone()).await {
|
||||
Ok(Some(call)) => {
|
||||
let payload_preview = call.payload.log_payload().into_owned();
|
||||
tracing::info!("ToolCall: {} {}", call.tool_name, payload_preview);
|
||||
@@ -2323,59 +2321,6 @@ pub(super) fn get_last_assistant_message_from_turn(responses: &[ResponseItem]) -
|
||||
})
|
||||
}
|
||||
|
||||
fn mcp_init_error_display(
|
||||
server_name: &str,
|
||||
entry: Option<&McpAuthStatusEntry>,
|
||||
err: &anyhow::Error,
|
||||
) -> String {
|
||||
if let Some(McpServerTransportConfig::StreamableHttp {
|
||||
url,
|
||||
bearer_token_env_var,
|
||||
http_headers,
|
||||
..
|
||||
}) = &entry.map(|entry| &entry.config.transport)
|
||||
&& url == "https://api.githubcopilot.com/mcp/"
|
||||
&& bearer_token_env_var.is_none()
|
||||
&& http_headers.as_ref().map(HashMap::is_empty).unwrap_or(true)
|
||||
{
|
||||
// GitHub only supports OAUth for first party MCP clients.
|
||||
// That means that the user has to specify a personal access token either via bearer_token_env_var or http_headers.
|
||||
// https://github.com/github/github-mcp-server/issues/921#issuecomment-3221026448
|
||||
format!(
|
||||
"GitHub MCP does not support OAuth. Log in by adding a personal access token (https://github.com/settings/personal-access-tokens) to your environment and config.toml:\n[mcp_servers.{server_name}]\nbearer_token_env_var = CODEX_GITHUB_PERSONAL_ACCESS_TOKEN"
|
||||
)
|
||||
} else if is_mcp_client_auth_required_error(err) {
|
||||
format!(
|
||||
"The {server_name} MCP server is not logged in. Run `codex mcp login {server_name}`."
|
||||
)
|
||||
} else if is_mcp_client_startup_timeout_error(err) {
|
||||
let startup_timeout_secs = match entry {
|
||||
Some(entry) => match entry.config.startup_timeout_sec {
|
||||
Some(timeout) => timeout,
|
||||
None => DEFAULT_STARTUP_TIMEOUT,
|
||||
},
|
||||
None => DEFAULT_STARTUP_TIMEOUT,
|
||||
}
|
||||
.as_secs();
|
||||
format!(
|
||||
"MCP client for `{server_name}` timed out after {startup_timeout_secs} seconds. Add or adjust `startup_timeout_sec` in your config.toml:\n[mcp_servers.{server_name}]\nstartup_timeout_sec = XX"
|
||||
)
|
||||
} else {
|
||||
format!("MCP client for `{server_name}` failed to start: {err:#}")
|
||||
}
|
||||
}
|
||||
|
||||
fn is_mcp_client_auth_required_error(error: &anyhow::Error) -> bool {
|
||||
// StreamableHttpError::AuthRequired from the MCP SDK.
|
||||
error.to_string().contains("Auth required")
|
||||
}
|
||||
|
||||
fn is_mcp_client_startup_timeout_error(error: &anyhow::Error) -> bool {
|
||||
let error_message = error.to_string();
|
||||
error_message.contains("request timed out")
|
||||
|| error_message.contains("timed out handshaking with MCP server")
|
||||
}
|
||||
|
||||
use crate::features::Features;
|
||||
#[cfg(test)]
|
||||
pub(crate) use tests::make_session_and_context;
|
||||
@@ -2385,10 +2330,7 @@ mod tests {
|
||||
use super::*;
|
||||
use crate::config::ConfigOverrides;
|
||||
use crate::config::ConfigToml;
|
||||
use crate::config::types::McpServerConfig;
|
||||
use crate::config::types::McpServerTransportConfig;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::mcp::auth::McpAuthStatusEntry;
|
||||
use crate::tools::format_exec_output_str;
|
||||
|
||||
use crate::protocol::CompactedItem;
|
||||
@@ -2408,7 +2350,6 @@ mod tests {
|
||||
use codex_app_server_protocol::AuthMode;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::McpAuthStatus;
|
||||
use std::time::Duration;
|
||||
use tokio::time::sleep;
|
||||
|
||||
@@ -2616,13 +2557,15 @@ mod tests {
|
||||
cwd: config.cwd.clone(),
|
||||
original_config_do_not_use: Arc::clone(&config),
|
||||
features: Features::default(),
|
||||
exec_policy_v2: None,
|
||||
session_source: SessionSource::Exec,
|
||||
};
|
||||
|
||||
let state = SessionState::new(session_configuration.clone());
|
||||
|
||||
let services = SessionServices {
|
||||
mcp_connection_manager: McpConnectionManager::default(),
|
||||
mcp_connection_manager: Arc::new(RwLock::new(McpConnectionManager::default())),
|
||||
mcp_startup_cancellation_token: CancellationToken::new(),
|
||||
unified_exec_manager: UnifiedExecSessionManager::default(),
|
||||
notifier: UserNotifier::new(None),
|
||||
rollout: Mutex::new(None),
|
||||
@@ -2692,13 +2635,15 @@ mod tests {
|
||||
cwd: config.cwd.clone(),
|
||||
original_config_do_not_use: Arc::clone(&config),
|
||||
features: Features::default(),
|
||||
exec_policy_v2: None,
|
||||
session_source: SessionSource::Exec,
|
||||
};
|
||||
|
||||
let state = SessionState::new(session_configuration.clone());
|
||||
|
||||
let services = SessionServices {
|
||||
mcp_connection_manager: McpConnectionManager::default(),
|
||||
mcp_connection_manager: Arc::new(RwLock::new(McpConnectionManager::default())),
|
||||
mcp_startup_cancellation_token: CancellationToken::new(),
|
||||
unified_exec_manager: UnifiedExecSessionManager::default(),
|
||||
notifier: UserNotifier::new(None),
|
||||
rollout: Mutex::new(None),
|
||||
@@ -2879,9 +2824,23 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn fatal_tool_error_stops_turn_and_reports_error() {
|
||||
let (session, turn_context, _rx) = make_session_and_context_with_rx();
|
||||
let tools = {
|
||||
session
|
||||
.services
|
||||
.mcp_connection_manager
|
||||
.read()
|
||||
.await
|
||||
.list_all_tools()
|
||||
.await
|
||||
};
|
||||
let router = ToolRouter::from_config(
|
||||
&turn_context.tools_config,
|
||||
Some(session.services.mcp_connection_manager.list_all_tools()),
|
||||
Some(
|
||||
tools
|
||||
.into_iter()
|
||||
.map(|(name, tool)| (name, tool.tool))
|
||||
.collect(),
|
||||
),
|
||||
);
|
||||
let item = ResponseItem::CustomToolCall {
|
||||
id: None,
|
||||
@@ -2892,6 +2851,7 @@ mod tests {
|
||||
};
|
||||
|
||||
let call = ToolRouter::build_tool_call(session.as_ref(), item.clone())
|
||||
.await
|
||||
.expect("build tool call")
|
||||
.expect("tool call present");
|
||||
let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
|
||||
@@ -3141,7 +3101,6 @@ mod tests {
|
||||
pretty_assertions::assert_eq!(exec_output.metadata, ResponseExecMetadata { exit_code: 0 });
|
||||
assert!(exec_output.output.contains("hi"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn unified_exec_rejects_escalated_permissions_when_policy_not_on_request() {
|
||||
use crate::protocol::AskForApproval;
|
||||
@@ -3183,89 +3142,4 @@ mod tests {
|
||||
|
||||
pretty_assertions::assert_eq!(output, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mcp_init_error_display_prompts_for_github_pat() {
|
||||
let server_name = "github";
|
||||
let entry = McpAuthStatusEntry {
|
||||
config: McpServerConfig {
|
||||
transport: McpServerTransportConfig::StreamableHttp {
|
||||
url: "https://api.githubcopilot.com/mcp/".to_string(),
|
||||
bearer_token_env_var: None,
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
},
|
||||
enabled: true,
|
||||
startup_timeout_sec: None,
|
||||
tool_timeout_sec: None,
|
||||
enabled_tools: None,
|
||||
disabled_tools: None,
|
||||
},
|
||||
auth_status: McpAuthStatus::Unsupported,
|
||||
};
|
||||
let err = anyhow::anyhow!("OAuth is unsupported");
|
||||
|
||||
let display = mcp_init_error_display(server_name, Some(&entry), &err);
|
||||
|
||||
let expected = format!(
|
||||
"GitHub MCP does not support OAuth. Log in by adding a personal access token (https://github.com/settings/personal-access-tokens) to your environment and config.toml:\n[mcp_servers.{server_name}]\nbearer_token_env_var = CODEX_GITHUB_PERSONAL_ACCESS_TOKEN"
|
||||
);
|
||||
|
||||
assert_eq!(expected, display);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mcp_init_error_display_prompts_for_login_when_auth_required() {
|
||||
let server_name = "example";
|
||||
let err = anyhow::anyhow!("Auth required for server");
|
||||
|
||||
let display = mcp_init_error_display(server_name, None, &err);
|
||||
|
||||
let expected = format!(
|
||||
"The {server_name} MCP server is not logged in. Run `codex mcp login {server_name}`."
|
||||
);
|
||||
|
||||
assert_eq!(expected, display);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mcp_init_error_display_reports_generic_errors() {
|
||||
let server_name = "custom";
|
||||
let entry = McpAuthStatusEntry {
|
||||
config: McpServerConfig {
|
||||
transport: McpServerTransportConfig::StreamableHttp {
|
||||
url: "https://example.com".to_string(),
|
||||
bearer_token_env_var: Some("TOKEN".to_string()),
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
},
|
||||
enabled: true,
|
||||
startup_timeout_sec: None,
|
||||
tool_timeout_sec: None,
|
||||
enabled_tools: None,
|
||||
disabled_tools: None,
|
||||
},
|
||||
auth_status: McpAuthStatus::Unsupported,
|
||||
};
|
||||
let err = anyhow::anyhow!("boom");
|
||||
|
||||
let display = mcp_init_error_display(server_name, Some(&entry), &err);
|
||||
|
||||
let expected = format!("MCP client for `{server_name}` failed to start: {err:#}");
|
||||
|
||||
assert_eq!(expected, display);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mcp_init_error_display_includes_startup_timeout_hint() {
|
||||
let server_name = "slow";
|
||||
let err = anyhow::anyhow!("request timed out");
|
||||
|
||||
let display = mcp_init_error_display(server_name, None, &err);
|
||||
|
||||
assert_eq!(
|
||||
"MCP client for `slow` timed out after 10 seconds. Add or adjust `startup_timeout_sec` in your config.toml:\n[mcp_servers.slow]\nstartup_timeout_sec = XX",
|
||||
display
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,6 +26,7 @@ use futures::prelude::*;
|
||||
use tracing::error;
|
||||
|
||||
pub const SUMMARIZATION_PROMPT: &str = include_str!("../templates/compact/prompt.md");
|
||||
pub const SUMMARY_PREFIX: &str = include_str!("../templates/compact/summary_prefix.md");
|
||||
const COMPACT_USER_MESSAGE_MAX_TOKENS: usize = 20_000;
|
||||
|
||||
pub(crate) async fn run_inline_auto_compact_task(
|
||||
@@ -140,7 +141,9 @@ async fn run_compact_task_inner(
|
||||
}
|
||||
|
||||
let history_snapshot = sess.clone_history().await.get_history();
|
||||
let summary_text = get_last_assistant_message_from_turn(&history_snapshot).unwrap_or_default();
|
||||
let summary_suffix =
|
||||
get_last_assistant_message_from_turn(&history_snapshot).unwrap_or_default();
|
||||
let summary_text = format!("{SUMMARY_PREFIX}\n{summary_suffix}");
|
||||
let user_messages = collect_user_messages(&history_snapshot);
|
||||
|
||||
let initial_context = sess.build_initial_context(turn_context.as_ref());
|
||||
@@ -201,12 +204,22 @@ pub(crate) fn collect_user_messages(items: &[ResponseItem]) -> Vec<String> {
|
||||
items
|
||||
.iter()
|
||||
.filter_map(|item| match crate::event_mapping::parse_turn_item(item) {
|
||||
Some(TurnItem::UserMessage(user)) => Some(user.message()),
|
||||
Some(TurnItem::UserMessage(user)) => {
|
||||
if is_summary_message(&user.message()) {
|
||||
None
|
||||
} else {
|
||||
Some(user.message())
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub(crate) fn is_summary_message(message: &str) -> bool {
|
||||
message.starts_with(format!("{SUMMARY_PREFIX}\n").as_str())
|
||||
}
|
||||
|
||||
pub(crate) fn build_compacted_history(
|
||||
initial_context: Vec<ResponseItem>,
|
||||
user_messages: &[String],
|
||||
|
||||
@@ -584,7 +584,7 @@ mod tests {
|
||||
codex_home,
|
||||
None,
|
||||
&[ConfigEdit::SetModel {
|
||||
model: Some("gpt-5-codex".to_string()),
|
||||
model: Some("gpt-5.1-codex".to_string()),
|
||||
effort: Some(ReasoningEffort::High),
|
||||
}],
|
||||
)
|
||||
@@ -592,7 +592,7 @@ mod tests {
|
||||
|
||||
let contents =
|
||||
std::fs::read_to_string(codex_home.join(CONFIG_TOML_FILE)).expect("read config");
|
||||
let expected = r#"model = "gpt-5-codex"
|
||||
let expected = r#"model = "gpt-5.1-codex"
|
||||
model_reasoning_effort = "high"
|
||||
"#;
|
||||
assert_eq!(contents, expected);
|
||||
@@ -722,7 +722,7 @@ model = "o5-preview"
|
||||
std::fs::write(
|
||||
codex_home.join(CONFIG_TOML_FILE),
|
||||
r#"[profiles."team a"]
|
||||
model = "gpt-5-codex"
|
||||
model = "gpt-5.1-codex"
|
||||
"#,
|
||||
)
|
||||
.expect("seed");
|
||||
@@ -972,14 +972,14 @@ B = \"2\"
|
||||
let codex_home = tmp.path().to_path_buf();
|
||||
|
||||
ConfigEditsBuilder::new(&codex_home)
|
||||
.set_model(Some("gpt-5-codex"), Some(ReasoningEffort::High))
|
||||
.set_model(Some("gpt-5.1-codex"), Some(ReasoningEffort::High))
|
||||
.apply()
|
||||
.await
|
||||
.expect("persist");
|
||||
|
||||
let contents =
|
||||
std::fs::read_to_string(codex_home.join(CONFIG_TOML_FILE)).expect("read config");
|
||||
let expected = r#"model = "gpt-5-codex"
|
||||
let expected = r#"model = "gpt-5.1-codex"
|
||||
model_reasoning_effort = "high"
|
||||
"#;
|
||||
assert_eq!(contents, expected);
|
||||
@@ -1001,11 +1001,11 @@ model_reasoning_effort = "low"
|
||||
std::fs::read_to_string(codex_home.join(CONFIG_TOML_FILE)).expect("read config");
|
||||
assert_eq!(contents, initial_expected);
|
||||
|
||||
let updated_expected = r#"model = "gpt-5-codex"
|
||||
let updated_expected = r#"model = "gpt-5.1-codex"
|
||||
model_reasoning_effort = "high"
|
||||
"#;
|
||||
ConfigEditsBuilder::new(codex_home)
|
||||
.set_model(Some("gpt-5-codex"), Some(ReasoningEffort::High))
|
||||
.set_model(Some("gpt-5.1-codex"), Some(ReasoningEffort::High))
|
||||
.apply_blocking()
|
||||
.expect("persist update");
|
||||
contents = std::fs::read_to_string(codex_home.join(CONFIG_TOML_FILE)).expect("read config");
|
||||
|
||||
@@ -25,7 +25,9 @@ use crate::git_info::resolve_root_git_project_for_trust;
|
||||
use crate::model_family::ModelFamily;
|
||||
use crate::model_family::derive_default_model_family;
|
||||
use crate::model_family::find_family_for_model;
|
||||
use crate::model_provider_info::LMSTUDIO_OSS_PROVIDER_ID;
|
||||
use crate::model_provider_info::ModelProviderInfo;
|
||||
use crate::model_provider_info::OLLAMA_OSS_PROVIDER_ID;
|
||||
use crate::model_provider_info::built_in_model_providers;
|
||||
use crate::openai_model_info::get_model_info;
|
||||
use crate::project_doc::DEFAULT_PROJECT_DOC_FILENAME;
|
||||
@@ -60,11 +62,11 @@ pub mod profile;
|
||||
pub mod types;
|
||||
|
||||
#[cfg(target_os = "windows")]
|
||||
pub const OPENAI_DEFAULT_MODEL: &str = "gpt-5";
|
||||
pub const OPENAI_DEFAULT_MODEL: &str = "gpt-5.1";
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
pub const OPENAI_DEFAULT_MODEL: &str = "gpt-5-codex";
|
||||
const OPENAI_DEFAULT_REVIEW_MODEL: &str = "gpt-5-codex";
|
||||
pub const GPT_5_CODEX_MEDIUM_MODEL: &str = "gpt-5-codex";
|
||||
pub const OPENAI_DEFAULT_MODEL: &str = "gpt-5.1-codex";
|
||||
const OPENAI_DEFAULT_REVIEW_MODEL: &str = "gpt-5.1-codex";
|
||||
pub const GPT_5_CODEX_MEDIUM_MODEL: &str = "gpt-5.1-codex";
|
||||
|
||||
/// Maximum number of bytes of the documentation that will be embedded. Larger
|
||||
/// files are *silently truncated* to this size so we do not take up too much of
|
||||
@@ -79,7 +81,7 @@ pub struct Config {
|
||||
/// Optional override of model selection.
|
||||
pub model: String,
|
||||
|
||||
/// Model used specifically for review sessions. Defaults to "gpt-5-codex".
|
||||
/// Model used specifically for review sessions. Defaults to "gpt-5.1-codex".
|
||||
pub review_model: String,
|
||||
|
||||
pub model_family: ModelFamily,
|
||||
@@ -466,6 +468,48 @@ pub fn set_project_trust_level(
|
||||
.apply_blocking()
|
||||
}
|
||||
|
||||
/// Save the default OSS provider preference to config.toml
|
||||
pub fn set_default_oss_provider(codex_home: &Path, provider: &str) -> std::io::Result<()> {
|
||||
// Validate that the provider is one of the known OSS providers
|
||||
match provider {
|
||||
LMSTUDIO_OSS_PROVIDER_ID | OLLAMA_OSS_PROVIDER_ID => {
|
||||
// Valid provider, continue
|
||||
}
|
||||
_ => {
|
||||
return Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidInput,
|
||||
format!(
|
||||
"Invalid OSS provider '{provider}'. Must be one of: {LMSTUDIO_OSS_PROVIDER_ID}, {OLLAMA_OSS_PROVIDER_ID}"
|
||||
),
|
||||
));
|
||||
}
|
||||
}
|
||||
let config_path = codex_home.join(CONFIG_TOML_FILE);
|
||||
|
||||
// Read existing config or create empty string if file doesn't exist
|
||||
let content = match std::fs::read_to_string(&config_path) {
|
||||
Ok(content) => content,
|
||||
Err(e) if e.kind() == std::io::ErrorKind::NotFound => String::new(),
|
||||
Err(e) => return Err(e),
|
||||
};
|
||||
|
||||
// Parse as DocumentMut for editing while preserving structure
|
||||
let mut doc = content.parse::<DocumentMut>().map_err(|e| {
|
||||
std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidData,
|
||||
format!("failed to parse config.toml: {e}"),
|
||||
)
|
||||
})?;
|
||||
|
||||
// Set the default_oss_provider at root level
|
||||
use toml_edit::value;
|
||||
doc["oss_provider"] = value(provider);
|
||||
|
||||
// Write the modified document back
|
||||
std::fs::write(&config_path, doc.to_string())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Apply a single dotted-path override onto a TOML value.
|
||||
fn apply_toml_override(root: &mut TomlValue, path: &str, value: TomlValue) {
|
||||
use toml::value::Table;
|
||||
@@ -663,6 +707,8 @@ pub struct ConfigToml {
|
||||
pub experimental_use_rmcp_client: Option<bool>,
|
||||
pub experimental_use_freeform_apply_patch: Option<bool>,
|
||||
pub experimental_sandbox_command_assessment: Option<bool>,
|
||||
/// Preferred OSS provider for local models, e.g. "lmstudio" or "ollama".
|
||||
pub oss_provider: Option<String>,
|
||||
}
|
||||
|
||||
impl From<ConfigToml> for UserSavedConfig {
|
||||
@@ -851,6 +897,34 @@ pub struct ConfigOverrides {
|
||||
pub additional_writable_roots: Vec<PathBuf>,
|
||||
}
|
||||
|
||||
/// Resolves the OSS provider from CLI override, profile config, or global config.
|
||||
/// Returns `None` if no provider is configured at any level.
|
||||
pub fn resolve_oss_provider(
|
||||
explicit_provider: Option<&str>,
|
||||
config_toml: &ConfigToml,
|
||||
config_profile: Option<String>,
|
||||
) -> Option<String> {
|
||||
if let Some(provider) = explicit_provider {
|
||||
// Explicit provider specified (e.g., via --local-provider)
|
||||
Some(provider.to_string())
|
||||
} else {
|
||||
// Check profile config first, then global config
|
||||
let profile = config_toml.get_config_profile(config_profile).ok();
|
||||
if let Some(profile) = &profile {
|
||||
// Check if profile has an oss provider
|
||||
if let Some(profile_oss_provider) = &profile.oss_provider {
|
||||
Some(profile_oss_provider.clone())
|
||||
}
|
||||
// If not then check if the toml has an oss provider
|
||||
else {
|
||||
config_toml.oss_provider.clone()
|
||||
}
|
||||
} else {
|
||||
config_toml.oss_provider.clone()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Config {
|
||||
/// Meant to be used exclusively for tests: `load_with_overrides()` should
|
||||
/// be used in all other cases.
|
||||
@@ -2542,7 +2616,7 @@ url = "https://example.com/mcp"
|
||||
let codex_home = TempDir::new()?;
|
||||
|
||||
ConfigEditsBuilder::new(codex_home.path())
|
||||
.set_model(Some("gpt-5-codex"), Some(ReasoningEffort::High))
|
||||
.set_model(Some("gpt-5.1-codex"), Some(ReasoningEffort::High))
|
||||
.apply()
|
||||
.await?;
|
||||
|
||||
@@ -2550,7 +2624,7 @@ url = "https://example.com/mcp"
|
||||
tokio::fs::read_to_string(codex_home.path().join(CONFIG_TOML_FILE)).await?;
|
||||
let parsed: ConfigToml = toml::from_str(&serialized)?;
|
||||
|
||||
assert_eq!(parsed.model.as_deref(), Some("gpt-5-codex"));
|
||||
assert_eq!(parsed.model.as_deref(), Some("gpt-5.1-codex"));
|
||||
assert_eq!(parsed.model_reasoning_effort, Some(ReasoningEffort::High));
|
||||
|
||||
Ok(())
|
||||
@@ -2564,7 +2638,7 @@ url = "https://example.com/mcp"
|
||||
tokio::fs::write(
|
||||
&config_path,
|
||||
r#"
|
||||
model = "gpt-5-codex"
|
||||
model = "gpt-5.1-codex"
|
||||
model_reasoning_effort = "medium"
|
||||
|
||||
[profiles.dev]
|
||||
@@ -2600,7 +2674,7 @@ model = "gpt-4.1"
|
||||
|
||||
ConfigEditsBuilder::new(codex_home.path())
|
||||
.with_profile(Some("dev"))
|
||||
.set_model(Some("gpt-5-codex"), Some(ReasoningEffort::Medium))
|
||||
.set_model(Some("gpt-5.1-codex"), Some(ReasoningEffort::Medium))
|
||||
.apply()
|
||||
.await?;
|
||||
|
||||
@@ -2612,7 +2686,7 @@ model = "gpt-4.1"
|
||||
.get("dev")
|
||||
.expect("profile should be created");
|
||||
|
||||
assert_eq!(profile.model.as_deref(), Some("gpt-5-codex"));
|
||||
assert_eq!(profile.model.as_deref(), Some("gpt-5.1-codex"));
|
||||
assert_eq!(
|
||||
profile.model_reasoning_effort,
|
||||
Some(ReasoningEffort::Medium)
|
||||
@@ -2634,7 +2708,7 @@ model = "gpt-4"
|
||||
model_reasoning_effort = "medium"
|
||||
|
||||
[profiles.prod]
|
||||
model = "gpt-5-codex"
|
||||
model = "gpt-5.1-codex"
|
||||
"#,
|
||||
)
|
||||
.await?;
|
||||
@@ -2663,7 +2737,7 @@ model = "gpt-5-codex"
|
||||
.profiles
|
||||
.get("prod")
|
||||
.and_then(|profile| profile.model.as_deref()),
|
||||
Some("gpt-5-codex"),
|
||||
Some("gpt-5.1-codex"),
|
||||
);
|
||||
|
||||
Ok(())
|
||||
@@ -2778,7 +2852,7 @@ model_provider = "openai"
|
||||
approval_policy = "on-failure"
|
||||
|
||||
[profiles.gpt5]
|
||||
model = "gpt-5"
|
||||
model = "gpt-5.1"
|
||||
model_provider = "openai"
|
||||
approval_policy = "on-failure"
|
||||
model_reasoning_effort = "high"
|
||||
@@ -3094,9 +3168,9 @@ model_verbosity = "high"
|
||||
fixture.codex_home(),
|
||||
)?;
|
||||
let expected_gpt5_profile_config = Config {
|
||||
model: "gpt-5".to_string(),
|
||||
model: "gpt-5.1".to_string(),
|
||||
review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(),
|
||||
model_family: find_family_for_model("gpt-5").expect("known model slug"),
|
||||
model_family: find_family_for_model("gpt-5.1").expect("known model slug"),
|
||||
model_context_window: Some(272_000),
|
||||
model_max_output_tokens: Some(128_000),
|
||||
model_auto_compact_token_limit: Some(244_800),
|
||||
@@ -3265,6 +3339,41 @@ trust_level = "trusted"
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_set_default_oss_provider() -> std::io::Result<()> {
|
||||
let temp_dir = TempDir::new()?;
|
||||
let codex_home = temp_dir.path();
|
||||
let config_path = codex_home.join(CONFIG_TOML_FILE);
|
||||
|
||||
// Test setting valid provider on empty config
|
||||
set_default_oss_provider(codex_home, OLLAMA_OSS_PROVIDER_ID)?;
|
||||
let content = std::fs::read_to_string(&config_path)?;
|
||||
assert!(content.contains("oss_provider = \"ollama\""));
|
||||
|
||||
// Test updating existing config
|
||||
std::fs::write(&config_path, "model = \"gpt-4\"\n")?;
|
||||
set_default_oss_provider(codex_home, LMSTUDIO_OSS_PROVIDER_ID)?;
|
||||
let content = std::fs::read_to_string(&config_path)?;
|
||||
assert!(content.contains("oss_provider = \"lmstudio\""));
|
||||
assert!(content.contains("model = \"gpt-4\""));
|
||||
|
||||
// Test overwriting existing oss_provider
|
||||
set_default_oss_provider(codex_home, OLLAMA_OSS_PROVIDER_ID)?;
|
||||
let content = std::fs::read_to_string(&config_path)?;
|
||||
assert!(content.contains("oss_provider = \"ollama\""));
|
||||
assert!(!content.contains("oss_provider = \"lmstudio\""));
|
||||
|
||||
// Test invalid provider
|
||||
let result = set_default_oss_provider(codex_home, "invalid_provider");
|
||||
assert!(result.is_err());
|
||||
let error = result.unwrap_err();
|
||||
assert_eq!(error.kind(), std::io::ErrorKind::InvalidInput);
|
||||
assert!(error.to_string().contains("Invalid OSS provider"));
|
||||
assert!(error.to_string().contains("invalid_provider"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_untrusted_project_gets_workspace_write_sandbox() -> anyhow::Result<()> {
|
||||
let config_with_untrusted = r#"
|
||||
@@ -3295,6 +3404,85 @@ trust_level = "untrusted"
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_oss_provider_explicit_override() {
|
||||
let config_toml = ConfigToml::default();
|
||||
let result = resolve_oss_provider(Some("custom-provider"), &config_toml, None);
|
||||
assert_eq!(result, Some("custom-provider".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_oss_provider_from_profile() {
|
||||
let mut profiles = std::collections::HashMap::new();
|
||||
let profile = ConfigProfile {
|
||||
oss_provider: Some("profile-provider".to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
profiles.insert("test-profile".to_string(), profile);
|
||||
let config_toml = ConfigToml {
|
||||
profiles,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let result = resolve_oss_provider(None, &config_toml, Some("test-profile".to_string()));
|
||||
assert_eq!(result, Some("profile-provider".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_oss_provider_from_global_config() {
|
||||
let config_toml = ConfigToml {
|
||||
oss_provider: Some("global-provider".to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let result = resolve_oss_provider(None, &config_toml, None);
|
||||
assert_eq!(result, Some("global-provider".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_oss_provider_profile_fallback_to_global() {
|
||||
let mut profiles = std::collections::HashMap::new();
|
||||
let profile = ConfigProfile::default(); // No oss_provider set
|
||||
profiles.insert("test-profile".to_string(), profile);
|
||||
let config_toml = ConfigToml {
|
||||
oss_provider: Some("global-provider".to_string()),
|
||||
profiles,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let result = resolve_oss_provider(None, &config_toml, Some("test-profile".to_string()));
|
||||
assert_eq!(result, Some("global-provider".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_oss_provider_none_when_not_configured() {
|
||||
let config_toml = ConfigToml::default();
|
||||
let result = resolve_oss_provider(None, &config_toml, None);
|
||||
assert_eq!(result, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_oss_provider_explicit_overrides_all() {
|
||||
let mut profiles = std::collections::HashMap::new();
|
||||
let profile = ConfigProfile {
|
||||
oss_provider: Some("profile-provider".to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
profiles.insert("test-profile".to_string(), profile);
|
||||
let config_toml = ConfigToml {
|
||||
oss_provider: Some("global-provider".to_string()),
|
||||
profiles,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let result = resolve_oss_provider(
|
||||
Some("explicit-provider"),
|
||||
&config_toml,
|
||||
Some("test-profile".to_string()),
|
||||
);
|
||||
assert_eq!(result, Some("explicit-provider".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_untrusted_project_gets_unless_trusted_approval_policy() -> std::io::Result<()> {
|
||||
let codex_home = TempDir::new()?;
|
||||
|
||||
@@ -33,6 +33,7 @@ pub struct ConfigProfile {
|
||||
/// Optional feature toggles scoped to this profile.
|
||||
#[serde(default)]
|
||||
pub features: Option<crate::features::FeaturesToml>,
|
||||
pub oss_provider: Option<String>,
|
||||
}
|
||||
|
||||
impl From<ConfigProfile> for codex_app_server_protocol::Profile {
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
use crate::codex::TurnContext;
|
||||
use crate::context_manager::normalize;
|
||||
use crate::context_manager::truncate;
|
||||
use crate::context_manager::truncate::format_output_for_model_body;
|
||||
use crate::context_manager::truncate::globally_truncate_function_output_items;
|
||||
use crate::truncate;
|
||||
use crate::truncate::format_output_for_model_body;
|
||||
use crate::truncate::globally_truncate_function_output_items;
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::TokenUsage;
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use super::*;
|
||||
use crate::context_manager::MODEL_FORMAT_MAX_LINES;
|
||||
use crate::context_manager::truncate;
|
||||
use crate::truncate;
|
||||
use codex_git::GhostCommit;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
mod history;
|
||||
mod normalize;
|
||||
mod truncate;
|
||||
|
||||
pub(crate) use crate::truncate::MODEL_FORMAT_MAX_BYTES;
|
||||
pub(crate) use crate::truncate::MODEL_FORMAT_MAX_LINES;
|
||||
pub(crate) use crate::truncate::format_output_for_model_body;
|
||||
pub(crate) use history::ContextManager;
|
||||
pub(crate) use truncate::MODEL_FORMAT_MAX_BYTES;
|
||||
pub(crate) use truncate::MODEL_FORMAT_MAX_LINES;
|
||||
pub(crate) use truncate::format_output_for_model_body;
|
||||
|
||||
@@ -1,148 +0,0 @@
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use codex_utils_string::take_bytes_at_char_boundary;
|
||||
use codex_utils_string::take_last_bytes_at_char_boundary;
|
||||
|
||||
use crate::util::error_or_panic;
|
||||
|
||||
// Model-formatting limits: clients get full streams; only content sent to the model is truncated.
|
||||
pub const MODEL_FORMAT_MAX_BYTES: usize = 10 * 1024; // 10 KiB
|
||||
pub const MODEL_FORMAT_MAX_LINES: usize = 256; // lines
|
||||
|
||||
pub(crate) fn globally_truncate_function_output_items(
|
||||
items: &[FunctionCallOutputContentItem],
|
||||
) -> Vec<FunctionCallOutputContentItem> {
|
||||
let mut out: Vec<FunctionCallOutputContentItem> = Vec::with_capacity(items.len());
|
||||
let mut remaining = MODEL_FORMAT_MAX_BYTES;
|
||||
let mut omitted_text_items = 0usize;
|
||||
|
||||
for it in items {
|
||||
match it {
|
||||
FunctionCallOutputContentItem::InputText { text } => {
|
||||
if remaining == 0 {
|
||||
omitted_text_items += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
let len = text.len();
|
||||
if len <= remaining {
|
||||
out.push(FunctionCallOutputContentItem::InputText { text: text.clone() });
|
||||
remaining -= len;
|
||||
} else {
|
||||
let slice = take_bytes_at_char_boundary(text, remaining);
|
||||
if !slice.is_empty() {
|
||||
out.push(FunctionCallOutputContentItem::InputText {
|
||||
text: slice.to_string(),
|
||||
});
|
||||
}
|
||||
remaining = 0;
|
||||
}
|
||||
}
|
||||
// todo(aibrahim): handle input images; resize
|
||||
FunctionCallOutputContentItem::InputImage { image_url } => {
|
||||
out.push(FunctionCallOutputContentItem::InputImage {
|
||||
image_url: image_url.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if omitted_text_items > 0 {
|
||||
out.push(FunctionCallOutputContentItem::InputText {
|
||||
text: format!("[omitted {omitted_text_items} text items ...]"),
|
||||
});
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
pub(crate) fn format_output_for_model_body(
|
||||
content: &str,
|
||||
limit_bytes: usize,
|
||||
limit_lines: usize,
|
||||
) -> String {
|
||||
// Head+tail truncation for the model: show the beginning and end with an elision.
|
||||
// Clients still receive full streams; only this formatted summary is capped.
|
||||
let total_lines = content.lines().count();
|
||||
if content.len() <= limit_bytes && total_lines <= limit_lines {
|
||||
return content.to_string();
|
||||
}
|
||||
let output = truncate_formatted_exec_output(content, total_lines, limit_bytes, limit_lines);
|
||||
format!("Total output lines: {total_lines}\n\n{output}")
|
||||
}
|
||||
|
||||
fn truncate_formatted_exec_output(
|
||||
content: &str,
|
||||
total_lines: usize,
|
||||
limit_bytes: usize,
|
||||
limit_lines: usize,
|
||||
) -> String {
|
||||
debug_panic_on_double_truncation(content);
|
||||
let head_lines: usize = limit_lines / 2;
|
||||
let tail_lines: usize = limit_lines - head_lines; // 128
|
||||
let head_bytes: usize = limit_bytes / 2;
|
||||
let segments: Vec<&str> = content.split_inclusive('\n').collect();
|
||||
let head_take = head_lines.min(segments.len());
|
||||
let tail_take = tail_lines.min(segments.len().saturating_sub(head_take));
|
||||
let omitted = segments.len().saturating_sub(head_take + tail_take);
|
||||
|
||||
let head_slice_end: usize = segments
|
||||
.iter()
|
||||
.take(head_take)
|
||||
.map(|segment| segment.len())
|
||||
.sum();
|
||||
let tail_slice_start: usize = if tail_take == 0 {
|
||||
content.len()
|
||||
} else {
|
||||
content.len()
|
||||
- segments
|
||||
.iter()
|
||||
.rev()
|
||||
.take(tail_take)
|
||||
.map(|segment| segment.len())
|
||||
.sum::<usize>()
|
||||
};
|
||||
let head_slice = &content[..head_slice_end];
|
||||
let tail_slice = &content[tail_slice_start..];
|
||||
let truncated_by_bytes = content.len() > limit_bytes;
|
||||
// this is a bit wrong. We are counting metadata lines and not just shell output lines.
|
||||
let marker = if omitted > 0 {
|
||||
Some(format!(
|
||||
"\n[... omitted {omitted} of {total_lines} lines ...]\n\n"
|
||||
))
|
||||
} else if truncated_by_bytes {
|
||||
Some(format!(
|
||||
"\n[... output truncated to fit {limit_bytes} bytes ...]\n\n"
|
||||
))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let marker_len = marker.as_ref().map_or(0, String::len);
|
||||
let base_head_budget = head_bytes.min(limit_bytes);
|
||||
let head_budget = base_head_budget.min(limit_bytes.saturating_sub(marker_len));
|
||||
let head_part = take_bytes_at_char_boundary(head_slice, head_budget);
|
||||
let mut result = String::with_capacity(limit_bytes.min(content.len()));
|
||||
|
||||
result.push_str(head_part);
|
||||
if let Some(marker_text) = marker.as_ref() {
|
||||
result.push_str(marker_text);
|
||||
}
|
||||
|
||||
let remaining = limit_bytes.saturating_sub(result.len());
|
||||
if remaining == 0 {
|
||||
return result;
|
||||
}
|
||||
|
||||
let tail_part = take_last_bytes_at_char_boundary(tail_slice, remaining);
|
||||
result.push_str(tail_part);
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
fn debug_panic_on_double_truncation(content: &str) {
|
||||
if content.contains("Total output lines:") && content.contains("omitted") {
|
||||
error_or_panic(format!(
|
||||
"FunctionCallOutput content was already truncated before ContextManager::record_items; this would cause double truncation {content}"
|
||||
));
|
||||
}
|
||||
}
|
||||
@@ -189,16 +189,20 @@ async fn exec_windows_sandbox(
|
||||
};
|
||||
|
||||
let sandbox_cwd = cwd.clone();
|
||||
let logs_base_dir = find_codex_home().ok();
|
||||
let codex_home = find_codex_home().map_err(|err| {
|
||||
CodexErr::Io(io::Error::other(format!(
|
||||
"windows sandbox: failed to resolve codex_home: {err}"
|
||||
)))
|
||||
})?;
|
||||
let spawn_res = tokio::task::spawn_blocking(move || {
|
||||
run_windows_sandbox_capture(
|
||||
policy_str,
|
||||
&sandbox_cwd,
|
||||
codex_home.as_ref(),
|
||||
command,
|
||||
&cwd,
|
||||
env,
|
||||
timeout_ms,
|
||||
logs_base_dir.as_deref(),
|
||||
)
|
||||
})
|
||||
.await;
|
||||
|
||||
293
codex-rs/core/src/exec_policy.rs
Normal file
293
codex-rs/core/src/exec_policy.rs
Normal file
@@ -0,0 +1,293 @@
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::command_safety::is_dangerous_command::requires_initial_appoval;
|
||||
use codex_execpolicy2::Decision;
|
||||
use codex_execpolicy2::Evaluation;
|
||||
use codex_execpolicy2::Policy;
|
||||
use codex_execpolicy2::PolicyParser;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::bash::parse_shell_lc_plain_commands;
|
||||
use crate::features::Feature;
|
||||
use crate::features::Features;
|
||||
use crate::tools::sandboxing::ApprovalRequirement;
|
||||
|
||||
const FORBIDDEN_REASON: &str = "execpolicy forbids this command";
|
||||
const PROMPT_REASON: &str = "execpolicy requires approval for this command";
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum ExecPolicyError {
|
||||
#[error("failed to read execpolicy files from {dir}: {source}")]
|
||||
ReadDir {
|
||||
dir: PathBuf,
|
||||
source: std::io::Error,
|
||||
},
|
||||
|
||||
#[error("failed to read execpolicy file {path}: {source}")]
|
||||
ReadFile {
|
||||
path: PathBuf,
|
||||
source: std::io::Error,
|
||||
},
|
||||
|
||||
#[error("failed to parse execpolicy file {path}: {source}")]
|
||||
ParsePolicy {
|
||||
path: String,
|
||||
source: codex_execpolicy2::Error,
|
||||
},
|
||||
}
|
||||
|
||||
pub(crate) fn exec_policy_for(
|
||||
features: &Features,
|
||||
codex_home: &Path,
|
||||
) -> Result<Option<Arc<Policy>>, ExecPolicyError> {
|
||||
if !features.enabled(Feature::ExecPolicyV2) {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let policy_dir = codex_home.to_path_buf();
|
||||
let entries = match fs::read_dir(&policy_dir) {
|
||||
Ok(entries) => entries,
|
||||
Err(source) if source.kind() == std::io::ErrorKind::NotFound => return Ok(None),
|
||||
Err(source) => {
|
||||
return Err(ExecPolicyError::ReadDir {
|
||||
dir: policy_dir,
|
||||
source,
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
let mut policy_paths: Vec<PathBuf> = Vec::new();
|
||||
for entry in entries {
|
||||
let entry = entry.map_err(|source| ExecPolicyError::ReadDir {
|
||||
dir: policy_dir.clone(),
|
||||
source,
|
||||
})?;
|
||||
let path = entry.path();
|
||||
if path
|
||||
.extension()
|
||||
.and_then(|ext| ext.to_str())
|
||||
.is_some_and(|ext| ext == "codexpolicy")
|
||||
&& path.is_file()
|
||||
{
|
||||
policy_paths.push(path);
|
||||
}
|
||||
}
|
||||
|
||||
policy_paths.sort();
|
||||
|
||||
let mut parser = PolicyParser::new();
|
||||
for policy_path in &policy_paths {
|
||||
let contents =
|
||||
fs::read_to_string(policy_path).map_err(|source| ExecPolicyError::ReadFile {
|
||||
path: policy_path.clone(),
|
||||
source,
|
||||
})?;
|
||||
let identifier = policy_path.to_string_lossy().to_string();
|
||||
parser
|
||||
.parse(&identifier, &contents)
|
||||
.map_err(|source| ExecPolicyError::ParsePolicy {
|
||||
path: identifier,
|
||||
source,
|
||||
})?;
|
||||
}
|
||||
|
||||
let policy = Arc::new(parser.build());
|
||||
tracing::debug!(
|
||||
file_count = policy_paths.len(),
|
||||
"loaded execpolicy2 from {}",
|
||||
policy_dir.display()
|
||||
);
|
||||
|
||||
Ok(Some(policy))
|
||||
}
|
||||
|
||||
fn evaluate_with_policy(
|
||||
policy: &Policy,
|
||||
command: &[String],
|
||||
approval_policy: AskForApproval,
|
||||
) -> Option<ApprovalRequirement> {
|
||||
let commands = parse_shell_lc_plain_commands(command).unwrap_or_else(|| vec![command.to_vec()]);
|
||||
let evaluation = policy.check_multiple(commands.iter());
|
||||
|
||||
match evaluation {
|
||||
Evaluation::Match { decision, .. } => match decision {
|
||||
Decision::Forbidden => Some(ApprovalRequirement::Forbidden {
|
||||
reason: FORBIDDEN_REASON.to_string(),
|
||||
}),
|
||||
Decision::Prompt => {
|
||||
let reason = PROMPT_REASON.to_string();
|
||||
if matches!(approval_policy, AskForApproval::Never) {
|
||||
Some(ApprovalRequirement::Forbidden { reason })
|
||||
} else {
|
||||
Some(ApprovalRequirement::NeedsApproval {
|
||||
reason: Some(reason),
|
||||
})
|
||||
}
|
||||
}
|
||||
Decision::Allow => Some(ApprovalRequirement::Skip),
|
||||
},
|
||||
Evaluation::NoMatch => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn approval_requirement_for_command(
|
||||
policy: Option<&Policy>,
|
||||
command: &[String],
|
||||
approval_policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
with_escalated_permissions: bool,
|
||||
) -> ApprovalRequirement {
|
||||
if let Some(policy) = policy
|
||||
&& let Some(requirement) = evaluate_with_policy(policy, command, approval_policy)
|
||||
{
|
||||
return requirement;
|
||||
}
|
||||
|
||||
if requires_initial_appoval(
|
||||
approval_policy,
|
||||
sandbox_policy,
|
||||
command,
|
||||
with_escalated_permissions,
|
||||
) {
|
||||
ApprovalRequirement::NeedsApproval { reason: None }
|
||||
} else {
|
||||
ApprovalRequirement::Skip
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::features::Feature;
|
||||
use crate::features::Features;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use pretty_assertions::assert_eq;
|
||||
use tempfile::tempdir;
|
||||
|
||||
#[test]
|
||||
fn returns_none_when_feature_disabled() {
|
||||
let features = Features::with_defaults();
|
||||
let temp_dir = tempdir().expect("create temp dir");
|
||||
|
||||
let policy = exec_policy_for(&features, temp_dir.path()).expect("policy result");
|
||||
|
||||
assert!(policy.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn returns_none_when_policy_dir_is_missing() {
|
||||
let mut features = Features::with_defaults();
|
||||
features.enable(Feature::ExecPolicyV2);
|
||||
let temp_dir = tempdir().expect("create temp dir");
|
||||
let missing_dir = temp_dir.path().join("missing");
|
||||
|
||||
let policy = exec_policy_for(&features, &missing_dir).expect("policy result");
|
||||
|
||||
assert!(policy.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn evaluates_bash_lc_inner_commands() {
|
||||
let policy_src = r#"
|
||||
prefix_rule(pattern=["rm"], decision="forbidden")
|
||||
"#;
|
||||
let mut parser = PolicyParser::new();
|
||||
parser
|
||||
.parse("test.codexpolicy", policy_src)
|
||||
.expect("parse policy");
|
||||
let policy = parser.build();
|
||||
|
||||
let forbidden_script = vec![
|
||||
"bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
"rm -rf /tmp".to_string(),
|
||||
];
|
||||
|
||||
let requirement =
|
||||
evaluate_with_policy(&policy, &forbidden_script, AskForApproval::OnRequest)
|
||||
.expect("expected match for forbidden command");
|
||||
|
||||
assert_eq!(
|
||||
requirement,
|
||||
ApprovalRequirement::Forbidden {
|
||||
reason: FORBIDDEN_REASON.to_string()
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn approval_requirement_prefers_execpolicy_match() {
|
||||
let policy_src = r#"prefix_rule(pattern=["rm"], decision="prompt")"#;
|
||||
let mut parser = PolicyParser::new();
|
||||
parser
|
||||
.parse("test.codexpolicy", policy_src)
|
||||
.expect("parse policy");
|
||||
let policy = parser.build();
|
||||
let command = vec!["rm".to_string()];
|
||||
|
||||
let requirement = approval_requirement_for_command(
|
||||
Some(&policy),
|
||||
&command,
|
||||
AskForApproval::OnRequest,
|
||||
&SandboxPolicy::DangerFullAccess,
|
||||
false,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
requirement,
|
||||
ApprovalRequirement::NeedsApproval {
|
||||
reason: Some(PROMPT_REASON.to_string())
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn approval_requirement_respects_approval_policy() {
|
||||
let policy_src = r#"prefix_rule(pattern=["rm"], decision="prompt")"#;
|
||||
let mut parser = PolicyParser::new();
|
||||
parser
|
||||
.parse("test.codexpolicy", policy_src)
|
||||
.expect("parse policy");
|
||||
let policy = parser.build();
|
||||
let command = vec!["rm".to_string()];
|
||||
|
||||
let requirement = approval_requirement_for_command(
|
||||
Some(&policy),
|
||||
&command,
|
||||
AskForApproval::Never,
|
||||
&SandboxPolicy::DangerFullAccess,
|
||||
false,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
requirement,
|
||||
ApprovalRequirement::Forbidden {
|
||||
reason: PROMPT_REASON.to_string()
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn approval_requirement_falls_back_to_heuristics() {
|
||||
let command = vec!["python".to_string()];
|
||||
|
||||
let requirement = approval_requirement_for_command(
|
||||
None,
|
||||
&command,
|
||||
AskForApproval::UnlessTrusted,
|
||||
&SandboxPolicy::ReadOnly,
|
||||
false,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
requirement,
|
||||
ApprovalRequirement::NeedsApproval { reason: None }
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -40,12 +40,16 @@ pub enum Feature {
|
||||
ViewImageTool,
|
||||
/// Allow the model to request web searches.
|
||||
WebSearchRequest,
|
||||
/// Gate the execpolicy2 enforcement for shell/unified exec.
|
||||
ExecPolicyV2,
|
||||
/// Enable the model-based risk assessments for sandboxed commands.
|
||||
SandboxCommandAssessment,
|
||||
/// Create a ghost commit at each turn.
|
||||
GhostCommit,
|
||||
/// Enable Windows sandbox (restricted token) on Windows.
|
||||
WindowsSandbox,
|
||||
/// Enable the default shell tool.
|
||||
ShellTool,
|
||||
}
|
||||
|
||||
impl Feature {
|
||||
@@ -283,6 +287,12 @@ pub const FEATURES: &[FeatureSpec] = &[
|
||||
stage: Stage::Stable,
|
||||
default_enabled: false,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::ExecPolicyV2,
|
||||
key: "exec_policy_v2",
|
||||
stage: Stage::Experimental,
|
||||
default_enabled: false,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::SandboxCommandAssessment,
|
||||
key: "experimental_sandbox_command_assessment",
|
||||
@@ -301,4 +311,10 @@ pub const FEATURES: &[FeatureSpec] = &[
|
||||
stage: Stage::Experimental,
|
||||
default_enabled: false,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::ShellTool,
|
||||
key: "shell_tool",
|
||||
stage: Stage::Stable,
|
||||
default_enabled: true,
|
||||
},
|
||||
];
|
||||
|
||||
@@ -24,6 +24,7 @@ mod environment_context;
|
||||
pub mod error;
|
||||
pub mod exec;
|
||||
pub mod exec_env;
|
||||
mod exec_policy;
|
||||
pub mod features;
|
||||
mod flags;
|
||||
pub mod git_info;
|
||||
@@ -40,8 +41,11 @@ pub mod token_data;
|
||||
mod truncate;
|
||||
mod unified_exec;
|
||||
mod user_instructions;
|
||||
pub use model_provider_info::BUILT_IN_OSS_MODEL_PROVIDER_ID;
|
||||
pub use model_provider_info::DEFAULT_LMSTUDIO_PORT;
|
||||
pub use model_provider_info::DEFAULT_OLLAMA_PORT;
|
||||
pub use model_provider_info::LMSTUDIO_OSS_PROVIDER_ID;
|
||||
pub use model_provider_info::ModelProviderInfo;
|
||||
pub use model_provider_info::OLLAMA_OSS_PROVIDER_ID;
|
||||
pub use model_provider_info::WireApi;
|
||||
pub use model_provider_info::built_in_model_providers;
|
||||
pub use model_provider_info::create_oss_provider_with_base_url;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -132,7 +132,7 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
|
||||
model_family!(slug, "gpt-4o", needs_special_apply_patch_instructions: true)
|
||||
} else if slug.starts_with("gpt-3.5") {
|
||||
model_family!(slug, "gpt-3.5", needs_special_apply_patch_instructions: true)
|
||||
} else if slug.starts_with("test-gpt-5-codex") {
|
||||
} else if slug.starts_with("test-gpt-5") {
|
||||
model_family!(
|
||||
slug, slug,
|
||||
supports_reasoning_summaries: true,
|
||||
|
||||
@@ -258,9 +258,11 @@ impl ModelProviderInfo {
|
||||
}
|
||||
}
|
||||
|
||||
const DEFAULT_OLLAMA_PORT: u32 = 11434;
|
||||
pub const DEFAULT_LMSTUDIO_PORT: u16 = 1234;
|
||||
pub const DEFAULT_OLLAMA_PORT: u16 = 11434;
|
||||
|
||||
pub const BUILT_IN_OSS_MODEL_PROVIDER_ID: &str = "oss";
|
||||
pub const LMSTUDIO_OSS_PROVIDER_ID: &str = "lmstudio";
|
||||
pub const OLLAMA_OSS_PROVIDER_ID: &str = "ollama";
|
||||
|
||||
/// Built-in default provider list.
|
||||
pub fn built_in_model_providers() -> HashMap<String, ModelProviderInfo> {
|
||||
@@ -311,14 +313,21 @@ pub fn built_in_model_providers() -> HashMap<String, ModelProviderInfo> {
|
||||
requires_openai_auth: true,
|
||||
},
|
||||
),
|
||||
(BUILT_IN_OSS_MODEL_PROVIDER_ID, create_oss_provider()),
|
||||
(
|
||||
OLLAMA_OSS_PROVIDER_ID,
|
||||
create_oss_provider(DEFAULT_OLLAMA_PORT, WireApi::Chat),
|
||||
),
|
||||
(
|
||||
LMSTUDIO_OSS_PROVIDER_ID,
|
||||
create_oss_provider(DEFAULT_LMSTUDIO_PORT, WireApi::Responses),
|
||||
),
|
||||
]
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k.to_string(), v))
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn create_oss_provider() -> ModelProviderInfo {
|
||||
pub fn create_oss_provider(default_provider_port: u16, wire_api: WireApi) -> ModelProviderInfo {
|
||||
// These CODEX_OSS_ environment variables are experimental: we may
|
||||
// switch to reading values from config.toml instead.
|
||||
let codex_oss_base_url = match std::env::var("CODEX_OSS_BASE_URL")
|
||||
@@ -331,22 +340,21 @@ pub fn create_oss_provider() -> ModelProviderInfo {
|
||||
port = std::env::var("CODEX_OSS_PORT")
|
||||
.ok()
|
||||
.filter(|v| !v.trim().is_empty())
|
||||
.and_then(|v| v.parse::<u32>().ok())
|
||||
.unwrap_or(DEFAULT_OLLAMA_PORT)
|
||||
.and_then(|v| v.parse::<u16>().ok())
|
||||
.unwrap_or(default_provider_port)
|
||||
),
|
||||
};
|
||||
|
||||
create_oss_provider_with_base_url(&codex_oss_base_url)
|
||||
create_oss_provider_with_base_url(&codex_oss_base_url, wire_api)
|
||||
}
|
||||
|
||||
pub fn create_oss_provider_with_base_url(base_url: &str) -> ModelProviderInfo {
|
||||
pub fn create_oss_provider_with_base_url(base_url: &str, wire_api: WireApi) -> ModelProviderInfo {
|
||||
ModelProviderInfo {
|
||||
name: "gpt-oss".into(),
|
||||
base_url: Some(base_url.into()),
|
||||
env_key: None,
|
||||
env_key_instructions: None,
|
||||
experimental_bearer_token: None,
|
||||
wire_api: WireApi::Chat,
|
||||
wire_api,
|
||||
query_params: None,
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
|
||||
@@ -6,7 +6,7 @@ use shlex::split as shlex_split;
|
||||
use shlex::try_join as shlex_try_join;
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn shlex_join(tokens: &[String]) -> String {
|
||||
pub fn shlex_join(tokens: &[String]) -> String {
|
||||
shlex_try_join(tokens.iter().map(String::as_str))
|
||||
.unwrap_or_else(|_| "<command included NUL byte>".to_string())
|
||||
}
|
||||
|
||||
@@ -72,6 +72,8 @@ pub(crate) fn should_persist_event_msg(ev: &EventMsg) -> bool {
|
||||
| EventMsg::GetHistoryEntryResponse(_)
|
||||
| EventMsg::UndoStarted(_)
|
||||
| EventMsg::McpListToolsResponse(_)
|
||||
| EventMsg::McpStartupUpdate(_)
|
||||
| EventMsg::McpStartupComplete(_)
|
||||
| EventMsg::ListCustomPromptsResponse(_)
|
||||
| EventMsg::PlanUpdate(_)
|
||||
| EventMsg::ShutdownComplete
|
||||
|
||||
@@ -8,9 +8,12 @@ use crate::unified_exec::UnifiedExecSessionManager;
|
||||
use crate::user_notification::UserNotifier;
|
||||
use codex_otel::otel_event_manager::OtelEventManager;
|
||||
use tokio::sync::Mutex;
|
||||
use tokio::sync::RwLock;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
pub(crate) struct SessionServices {
|
||||
pub(crate) mcp_connection_manager: McpConnectionManager,
|
||||
pub(crate) mcp_connection_manager: Arc<RwLock<McpConnectionManager>>,
|
||||
pub(crate) mcp_startup_cancellation_token: CancellationToken,
|
||||
pub(crate) unified_exec_manager: UnifiedExecSessionManager,
|
||||
pub(crate) notifier: UserNotifier,
|
||||
pub(crate) rollout: Mutex<Option<RolloutRecorder>>,
|
||||
|
||||
@@ -65,22 +65,24 @@ impl SessionTask for UserShellCommandTask {
|
||||
// allows commands that use shell features (pipes, &&, redirects, etc.).
|
||||
// We do not source rc files or otherwise reformat the script.
|
||||
let use_login_shell = true;
|
||||
let shell_invocation = session
|
||||
let command = session
|
||||
.user_shell()
|
||||
.derive_exec_args(&self.command, use_login_shell);
|
||||
|
||||
let call_id = Uuid::new_v4().to_string();
|
||||
let raw_command = self.command.clone();
|
||||
let cwd = turn_context.cwd.clone();
|
||||
|
||||
let parsed_cmd = parse_command(&shell_invocation);
|
||||
let parsed_cmd = parse_command(&command);
|
||||
session
|
||||
.send_event(
|
||||
turn_context.as_ref(),
|
||||
EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
|
||||
call_id: call_id.clone(),
|
||||
command: shell_invocation.clone(),
|
||||
cwd: turn_context.cwd.clone(),
|
||||
parsed_cmd,
|
||||
turn_id: turn_context.sub_id.clone(),
|
||||
command: command.clone(),
|
||||
cwd: cwd.clone(),
|
||||
parsed_cmd: parsed_cmd.clone(),
|
||||
source: ExecCommandSource::UserShell,
|
||||
interaction_input: None,
|
||||
}),
|
||||
@@ -88,8 +90,8 @@ impl SessionTask for UserShellCommandTask {
|
||||
.await;
|
||||
|
||||
let exec_env = ExecEnv {
|
||||
command: shell_invocation,
|
||||
cwd: turn_context.cwd.clone(),
|
||||
command: command.clone(),
|
||||
cwd: cwd.clone(),
|
||||
env: create_env(&turn_context.shell_environment_policy),
|
||||
timeout_ms: None,
|
||||
sandbox: SandboxType::None,
|
||||
@@ -129,6 +131,12 @@ impl SessionTask for UserShellCommandTask {
|
||||
turn_context.as_ref(),
|
||||
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
|
||||
call_id,
|
||||
turn_id: turn_context.sub_id.clone(),
|
||||
command: command.clone(),
|
||||
cwd: cwd.clone(),
|
||||
parsed_cmd: parsed_cmd.clone(),
|
||||
source: ExecCommandSource::UserShell,
|
||||
interaction_input: None,
|
||||
stdout: String::new(),
|
||||
stderr: aborted_message.clone(),
|
||||
aggregated_output: aborted_message.clone(),
|
||||
@@ -145,6 +153,12 @@ impl SessionTask for UserShellCommandTask {
|
||||
turn_context.as_ref(),
|
||||
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
|
||||
call_id: call_id.clone(),
|
||||
turn_id: turn_context.sub_id.clone(),
|
||||
command: command.clone(),
|
||||
cwd: cwd.clone(),
|
||||
parsed_cmd: parsed_cmd.clone(),
|
||||
source: ExecCommandSource::UserShell,
|
||||
interaction_input: None,
|
||||
stdout: output.stdout.text.clone(),
|
||||
stderr: output.stderr.text.clone(),
|
||||
aggregated_output: output.aggregated_output.text.clone(),
|
||||
@@ -176,6 +190,12 @@ impl SessionTask for UserShellCommandTask {
|
||||
turn_context.as_ref(),
|
||||
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
|
||||
call_id,
|
||||
turn_id: turn_context.sub_id.clone(),
|
||||
command,
|
||||
cwd,
|
||||
parsed_cmd,
|
||||
source: ExecCommandSource::UserShell,
|
||||
interaction_input: None,
|
||||
stdout: exec_output.stdout.text.clone(),
|
||||
stderr: exec_output.stderr.text.clone(),
|
||||
aggregated_output: exec_output.aggregated_output.text.clone(),
|
||||
|
||||
@@ -15,6 +15,7 @@ use crate::protocol::PatchApplyEndEvent;
|
||||
use crate::protocol::TurnDiffEvent;
|
||||
use crate::tools::context::SharedTurnDiffTracker;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
use codex_protocol::parse_command::ParsedCommand;
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
@@ -61,6 +62,7 @@ pub(crate) async fn emit_exec_command_begin(
|
||||
ctx: ToolEventCtx<'_>,
|
||||
command: &[String],
|
||||
cwd: &Path,
|
||||
parsed_cmd: &[ParsedCommand],
|
||||
source: ExecCommandSource,
|
||||
interaction_input: Option<String>,
|
||||
) {
|
||||
@@ -69,9 +71,10 @@ pub(crate) async fn emit_exec_command_begin(
|
||||
ctx.turn,
|
||||
EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
|
||||
call_id: ctx.call_id.to_string(),
|
||||
turn_id: ctx.turn.sub_id.clone(),
|
||||
command: command.to_vec(),
|
||||
cwd: cwd.to_path_buf(),
|
||||
parsed_cmd: parse_command(command),
|
||||
parsed_cmd: parsed_cmd.to_vec(),
|
||||
source,
|
||||
interaction_input,
|
||||
}),
|
||||
@@ -84,6 +87,7 @@ pub(crate) enum ToolEmitter {
|
||||
command: Vec<String>,
|
||||
cwd: PathBuf,
|
||||
source: ExecCommandSource,
|
||||
parsed_cmd: Vec<ParsedCommand>,
|
||||
},
|
||||
ApplyPatch {
|
||||
changes: HashMap<PathBuf, FileChange>,
|
||||
@@ -94,15 +98,18 @@ pub(crate) enum ToolEmitter {
|
||||
cwd: PathBuf,
|
||||
source: ExecCommandSource,
|
||||
interaction_input: Option<String>,
|
||||
parsed_cmd: Vec<ParsedCommand>,
|
||||
},
|
||||
}
|
||||
|
||||
impl ToolEmitter {
|
||||
pub fn shell(command: Vec<String>, cwd: PathBuf, source: ExecCommandSource) -> Self {
|
||||
let parsed_cmd = parse_command(&command);
|
||||
Self::Shell {
|
||||
command,
|
||||
cwd,
|
||||
source,
|
||||
parsed_cmd,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -119,11 +126,13 @@ impl ToolEmitter {
|
||||
source: ExecCommandSource,
|
||||
interaction_input: Option<String>,
|
||||
) -> Self {
|
||||
let parsed_cmd = parse_command(command);
|
||||
Self::UnifiedExec {
|
||||
command: command.to_vec(),
|
||||
cwd,
|
||||
source,
|
||||
interaction_input,
|
||||
parsed_cmd,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -134,44 +143,14 @@ impl ToolEmitter {
|
||||
command,
|
||||
cwd,
|
||||
source,
|
||||
parsed_cmd,
|
||||
},
|
||||
ToolEventStage::Begin,
|
||||
stage,
|
||||
) => {
|
||||
emit_exec_command_begin(ctx, command, cwd.as_path(), *source, None).await;
|
||||
}
|
||||
(Self::Shell { .. }, ToolEventStage::Success(output)) => {
|
||||
emit_exec_end(
|
||||
emit_exec_stage(
|
||||
ctx,
|
||||
output.stdout.text.clone(),
|
||||
output.stderr.text.clone(),
|
||||
output.aggregated_output.text.clone(),
|
||||
output.exit_code,
|
||||
output.duration,
|
||||
format_exec_output_str(&output),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
(Self::Shell { .. }, ToolEventStage::Failure(ToolEventFailure::Output(output))) => {
|
||||
emit_exec_end(
|
||||
ctx,
|
||||
output.stdout.text.clone(),
|
||||
output.stderr.text.clone(),
|
||||
output.aggregated_output.text.clone(),
|
||||
output.exit_code,
|
||||
output.duration,
|
||||
format_exec_output_str(&output),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
(Self::Shell { .. }, ToolEventStage::Failure(ToolEventFailure::Message(message))) => {
|
||||
emit_exec_end(
|
||||
ctx,
|
||||
String::new(),
|
||||
(*message).to_string(),
|
||||
(*message).to_string(),
|
||||
-1,
|
||||
Duration::ZERO,
|
||||
message.clone(),
|
||||
ExecCommandInput::new(command, cwd.as_path(), parsed_cmd, *source, None),
|
||||
stage,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
@@ -231,57 +210,20 @@ impl ToolEmitter {
|
||||
cwd,
|
||||
source,
|
||||
interaction_input,
|
||||
parsed_cmd,
|
||||
},
|
||||
ToolEventStage::Begin,
|
||||
stage,
|
||||
) => {
|
||||
emit_exec_command_begin(
|
||||
emit_exec_stage(
|
||||
ctx,
|
||||
command,
|
||||
cwd.as_path(),
|
||||
*source,
|
||||
interaction_input.clone(),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
(Self::UnifiedExec { .. }, ToolEventStage::Success(output)) => {
|
||||
emit_exec_end(
|
||||
ctx,
|
||||
output.stdout.text.clone(),
|
||||
output.stderr.text.clone(),
|
||||
output.aggregated_output.text.clone(),
|
||||
output.exit_code,
|
||||
output.duration,
|
||||
format_exec_output_str(&output),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
(
|
||||
Self::UnifiedExec { .. },
|
||||
ToolEventStage::Failure(ToolEventFailure::Output(output)),
|
||||
) => {
|
||||
emit_exec_end(
|
||||
ctx,
|
||||
output.stdout.text.clone(),
|
||||
output.stderr.text.clone(),
|
||||
output.aggregated_output.text.clone(),
|
||||
output.exit_code,
|
||||
output.duration,
|
||||
format_exec_output_str(&output),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
(
|
||||
Self::UnifiedExec { .. },
|
||||
ToolEventStage::Failure(ToolEventFailure::Message(message)),
|
||||
) => {
|
||||
emit_exec_end(
|
||||
ctx,
|
||||
String::new(),
|
||||
(*message).to_string(),
|
||||
(*message).to_string(),
|
||||
-1,
|
||||
Duration::ZERO,
|
||||
message.clone(),
|
||||
ExecCommandInput::new(
|
||||
command,
|
||||
cwd.as_path(),
|
||||
parsed_cmd,
|
||||
*source,
|
||||
interaction_input.as_deref(),
|
||||
),
|
||||
stage,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
@@ -340,26 +282,107 @@ impl ToolEmitter {
|
||||
}
|
||||
}
|
||||
|
||||
async fn emit_exec_end(
|
||||
ctx: ToolEventCtx<'_>,
|
||||
struct ExecCommandInput<'a> {
|
||||
command: &'a [String],
|
||||
cwd: &'a Path,
|
||||
parsed_cmd: &'a [ParsedCommand],
|
||||
source: ExecCommandSource,
|
||||
interaction_input: Option<&'a str>,
|
||||
}
|
||||
|
||||
impl<'a> ExecCommandInput<'a> {
|
||||
fn new(
|
||||
command: &'a [String],
|
||||
cwd: &'a Path,
|
||||
parsed_cmd: &'a [ParsedCommand],
|
||||
source: ExecCommandSource,
|
||||
interaction_input: Option<&'a str>,
|
||||
) -> Self {
|
||||
Self {
|
||||
command,
|
||||
cwd,
|
||||
parsed_cmd,
|
||||
source,
|
||||
interaction_input,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct ExecCommandResult {
|
||||
stdout: String,
|
||||
stderr: String,
|
||||
aggregated_output: String,
|
||||
exit_code: i32,
|
||||
duration: Duration,
|
||||
formatted_output: String,
|
||||
}
|
||||
|
||||
async fn emit_exec_stage(
|
||||
ctx: ToolEventCtx<'_>,
|
||||
exec_input: ExecCommandInput<'_>,
|
||||
stage: ToolEventStage,
|
||||
) {
|
||||
match stage {
|
||||
ToolEventStage::Begin => {
|
||||
emit_exec_command_begin(
|
||||
ctx,
|
||||
exec_input.command,
|
||||
exec_input.cwd,
|
||||
exec_input.parsed_cmd,
|
||||
exec_input.source,
|
||||
exec_input.interaction_input.map(str::to_owned),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
ToolEventStage::Success(output)
|
||||
| ToolEventStage::Failure(ToolEventFailure::Output(output)) => {
|
||||
let exec_result = ExecCommandResult {
|
||||
stdout: output.stdout.text.clone(),
|
||||
stderr: output.stderr.text.clone(),
|
||||
aggregated_output: output.aggregated_output.text.clone(),
|
||||
exit_code: output.exit_code,
|
||||
duration: output.duration,
|
||||
formatted_output: format_exec_output_str(&output),
|
||||
};
|
||||
emit_exec_end(ctx, exec_input, exec_result).await;
|
||||
}
|
||||
ToolEventStage::Failure(ToolEventFailure::Message(message)) => {
|
||||
let text = message.to_string();
|
||||
let exec_result = ExecCommandResult {
|
||||
stdout: String::new(),
|
||||
stderr: text.clone(),
|
||||
aggregated_output: text.clone(),
|
||||
exit_code: -1,
|
||||
duration: Duration::ZERO,
|
||||
formatted_output: text,
|
||||
};
|
||||
emit_exec_end(ctx, exec_input, exec_result).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn emit_exec_end(
|
||||
ctx: ToolEventCtx<'_>,
|
||||
exec_input: ExecCommandInput<'_>,
|
||||
exec_result: ExecCommandResult,
|
||||
) {
|
||||
ctx.session
|
||||
.send_event(
|
||||
ctx.turn,
|
||||
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
|
||||
call_id: ctx.call_id.to_string(),
|
||||
stdout,
|
||||
stderr,
|
||||
aggregated_output,
|
||||
exit_code,
|
||||
duration,
|
||||
formatted_output,
|
||||
turn_id: ctx.turn.sub_id.clone(),
|
||||
command: exec_input.command.to_vec(),
|
||||
cwd: exec_input.cwd.to_path_buf(),
|
||||
parsed_cmd: exec_input.parsed_cmd.to_vec(),
|
||||
source: exec_input.source,
|
||||
interaction_input: exec_input.interaction_input.map(str::to_owned),
|
||||
stdout: exec_result.stdout,
|
||||
stderr: exec_result.stderr,
|
||||
aggregated_output: exec_result.aggregated_output,
|
||||
exit_code: exec_result.exit_code,
|
||||
duration: exec_result.duration,
|
||||
formatted_output: exec_result.formatted_output,
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
|
||||
@@ -287,6 +287,8 @@ async fn handle_list_resources(
|
||||
let resources = session
|
||||
.services
|
||||
.mcp_connection_manager
|
||||
.read()
|
||||
.await
|
||||
.list_all_resources()
|
||||
.await;
|
||||
Ok(ListResourcesPayload::from_all_servers(resources))
|
||||
@@ -396,6 +398,8 @@ async fn handle_list_resource_templates(
|
||||
let templates = session
|
||||
.services
|
||||
.mcp_connection_manager
|
||||
.read()
|
||||
.await
|
||||
.list_all_resource_templates()
|
||||
.await;
|
||||
Ok(ListResourceTemplatesPayload::from_all_servers(templates))
|
||||
|
||||
@@ -9,6 +9,7 @@ use crate::apply_patch::convert_apply_patch_to_protocol;
|
||||
use crate::codex::TurnContext;
|
||||
use crate::exec::ExecParams;
|
||||
use crate::exec_env::create_env;
|
||||
use crate::exec_policy::approval_requirement_for_command;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::is_safe_command::is_known_safe_command;
|
||||
use crate::protocol::ExecCommandSource;
|
||||
@@ -24,6 +25,7 @@ use crate::tools::runtimes::apply_patch::ApplyPatchRequest;
|
||||
use crate::tools::runtimes::apply_patch::ApplyPatchRuntime;
|
||||
use crate::tools::runtimes::shell::ShellRequest;
|
||||
use crate::tools::runtimes::shell::ShellRuntime;
|
||||
use crate::tools::sandboxing::ApprovalRequirement;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
|
||||
pub struct ShellHandler;
|
||||
@@ -303,6 +305,17 @@ impl ShellHandler {
|
||||
env: exec_params.env.clone(),
|
||||
with_escalated_permissions: exec_params.with_escalated_permissions,
|
||||
justification: exec_params.justification.clone(),
|
||||
approval_requirement: if is_user_shell_command {
|
||||
ApprovalRequirement::Skip
|
||||
} else {
|
||||
approval_requirement_for_command(
|
||||
turn.exec_policy_v2.as_deref(),
|
||||
&exec_params.command,
|
||||
turn.approval_policy,
|
||||
&turn.sandbox_policy,
|
||||
exec_params.with_escalated_permissions.unwrap_or(false),
|
||||
)
|
||||
},
|
||||
};
|
||||
let mut orchestrator = ToolOrchestrator::new();
|
||||
let mut runtime = ShellRuntime::new();
|
||||
|
||||
@@ -11,11 +11,13 @@ use crate::error::get_error_message_ui;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::sandboxing::SandboxManager;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::ApprovalRequirement;
|
||||
use crate::tools::sandboxing::ProvidesSandboxRetryData;
|
||||
use crate::tools::sandboxing::SandboxAttempt;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
use crate::tools::sandboxing::ToolRuntime;
|
||||
use crate::tools::sandboxing::default_approval_requirement;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
|
||||
@@ -49,40 +51,52 @@ impl ToolOrchestrator {
|
||||
let otel_cfg = codex_otel::otel_event_manager::ToolDecisionSource::Config;
|
||||
|
||||
// 1) Approval
|
||||
let needs_initial_approval =
|
||||
tool.wants_initial_approval(req, approval_policy, &turn_ctx.sandbox_policy);
|
||||
let mut already_approved = false;
|
||||
|
||||
if needs_initial_approval {
|
||||
let mut risk = None;
|
||||
|
||||
if let Some(metadata) = req.sandbox_retry_data() {
|
||||
risk = tool_ctx
|
||||
.session
|
||||
.assess_sandbox_command(turn_ctx, &tool_ctx.call_id, &metadata.command, None)
|
||||
.await;
|
||||
let requirement = tool.approval_requirement(req).unwrap_or_else(|| {
|
||||
default_approval_requirement(approval_policy, &turn_ctx.sandbox_policy)
|
||||
});
|
||||
match requirement {
|
||||
ApprovalRequirement::Skip => {
|
||||
otel.tool_decision(otel_tn, otel_ci, ReviewDecision::Approved, otel_cfg);
|
||||
}
|
||||
ApprovalRequirement::Forbidden { reason } => {
|
||||
return Err(ToolError::Rejected(reason));
|
||||
}
|
||||
ApprovalRequirement::NeedsApproval { reason } => {
|
||||
let mut risk = None;
|
||||
|
||||
let approval_ctx = ApprovalCtx {
|
||||
session: tool_ctx.session,
|
||||
turn: turn_ctx,
|
||||
call_id: &tool_ctx.call_id,
|
||||
retry_reason: None,
|
||||
risk,
|
||||
};
|
||||
let decision = tool.start_approval_async(req, approval_ctx).await;
|
||||
|
||||
otel.tool_decision(otel_tn, otel_ci, decision, otel_user.clone());
|
||||
|
||||
match decision {
|
||||
ReviewDecision::Denied | ReviewDecision::Abort => {
|
||||
return Err(ToolError::Rejected("rejected by user".to_string()));
|
||||
if let Some(metadata) = req.sandbox_retry_data() {
|
||||
risk = tool_ctx
|
||||
.session
|
||||
.assess_sandbox_command(
|
||||
turn_ctx,
|
||||
&tool_ctx.call_id,
|
||||
&metadata.command,
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {}
|
||||
|
||||
let approval_ctx = ApprovalCtx {
|
||||
session: tool_ctx.session,
|
||||
turn: turn_ctx,
|
||||
call_id: &tool_ctx.call_id,
|
||||
retry_reason: reason,
|
||||
risk,
|
||||
};
|
||||
let decision = tool.start_approval_async(req, approval_ctx).await;
|
||||
|
||||
otel.tool_decision(otel_tn, otel_ci, decision, otel_user.clone());
|
||||
|
||||
match decision {
|
||||
ReviewDecision::Denied | ReviewDecision::Abort => {
|
||||
return Err(ToolError::Rejected("rejected by user".to_string()));
|
||||
}
|
||||
ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {}
|
||||
}
|
||||
already_approved = true;
|
||||
}
|
||||
already_approved = true;
|
||||
} else {
|
||||
otel.tool_decision(otel_tn, otel_ci, ReviewDecision::Approved, otel_cfg);
|
||||
}
|
||||
|
||||
// 2) First attempt under the selected sandbox.
|
||||
|
||||
@@ -112,7 +112,7 @@ impl ToolCallRuntime {
|
||||
|
||||
fn abort_message(call: &ToolCall, secs: f32) -> String {
|
||||
match call.tool_name.as_str() {
|
||||
"shell" | "container.exec" | "local_shell" | "unified_exec" => {
|
||||
"shell" | "container.exec" | "local_shell" | "shell_command" | "unified_exec" => {
|
||||
format!("Wall time: {secs:.1} seconds\naborted by user")
|
||||
}
|
||||
_ => format!("aborted by user after {secs:.1}s"),
|
||||
|
||||
@@ -54,7 +54,7 @@ impl ToolRouter {
|
||||
.any(|config| config.spec.name() == tool_name)
|
||||
}
|
||||
|
||||
pub fn build_tool_call(
|
||||
pub async fn build_tool_call(
|
||||
session: &Session,
|
||||
item: ResponseItem,
|
||||
) -> Result<Option<ToolCall>, FunctionCallError> {
|
||||
@@ -65,7 +65,7 @@ impl ToolRouter {
|
||||
call_id,
|
||||
..
|
||||
} => {
|
||||
if let Some((server, tool)) = session.parse_mcp_tool_name(&name) {
|
||||
if let Some((server, tool)) = session.parse_mcp_tool_name(&name).await {
|
||||
Ok(Some(ToolCall {
|
||||
tool_name: name,
|
||||
call_id,
|
||||
|
||||
@@ -4,13 +4,12 @@ Runtime: shell
|
||||
Executes shell requests under the orchestrator: asks for approval when needed,
|
||||
builds a CommandSpec, and runs it under the current SandboxAttempt.
|
||||
*/
|
||||
use crate::command_safety::is_dangerous_command::requires_initial_appoval;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use crate::sandboxing::execute_env;
|
||||
use crate::tools::runtimes::build_command_spec;
|
||||
use crate::tools::sandboxing::Approvable;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::ApprovalRequirement;
|
||||
use crate::tools::sandboxing::ProvidesSandboxRetryData;
|
||||
use crate::tools::sandboxing::SandboxAttempt;
|
||||
use crate::tools::sandboxing::SandboxRetryData;
|
||||
@@ -20,7 +19,6 @@ use crate::tools::sandboxing::ToolCtx;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
use crate::tools::sandboxing::ToolRuntime;
|
||||
use crate::tools::sandboxing::with_cached_approval;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use futures::future::BoxFuture;
|
||||
use std::path::PathBuf;
|
||||
@@ -33,6 +31,7 @@ pub struct ShellRequest {
|
||||
pub env: std::collections::HashMap<String, String>,
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
pub justification: Option<String>,
|
||||
pub approval_requirement: ApprovalRequirement,
|
||||
}
|
||||
|
||||
impl ProvidesSandboxRetryData for ShellRequest {
|
||||
@@ -114,18 +113,8 @@ impl Approvable<ShellRequest> for ShellRuntime {
|
||||
})
|
||||
}
|
||||
|
||||
fn wants_initial_approval(
|
||||
&self,
|
||||
req: &ShellRequest,
|
||||
policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
) -> bool {
|
||||
requires_initial_appoval(
|
||||
policy,
|
||||
sandbox_policy,
|
||||
&req.command,
|
||||
req.with_escalated_permissions.unwrap_or(false),
|
||||
)
|
||||
fn approval_requirement(&self, req: &ShellRequest) -> Option<ApprovalRequirement> {
|
||||
Some(req.approval_requirement.clone())
|
||||
}
|
||||
|
||||
fn wants_escalated_first_attempt(&self, req: &ShellRequest) -> bool {
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
use crate::command_safety::is_dangerous_command::requires_initial_appoval;
|
||||
/*
|
||||
Runtime: unified exec
|
||||
|
||||
@@ -10,6 +9,7 @@ use crate::error::SandboxErr;
|
||||
use crate::tools::runtimes::build_command_spec;
|
||||
use crate::tools::sandboxing::Approvable;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::ApprovalRequirement;
|
||||
use crate::tools::sandboxing::ProvidesSandboxRetryData;
|
||||
use crate::tools::sandboxing::SandboxAttempt;
|
||||
use crate::tools::sandboxing::SandboxRetryData;
|
||||
@@ -22,9 +22,7 @@ use crate::tools::sandboxing::with_cached_approval;
|
||||
use crate::unified_exec::UnifiedExecError;
|
||||
use crate::unified_exec::UnifiedExecSession;
|
||||
use crate::unified_exec::UnifiedExecSessionManager;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use futures::future::BoxFuture;
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
@@ -36,6 +34,7 @@ pub struct UnifiedExecRequest {
|
||||
pub env: HashMap<String, String>,
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
pub justification: Option<String>,
|
||||
pub approval_requirement: ApprovalRequirement,
|
||||
}
|
||||
|
||||
impl ProvidesSandboxRetryData for UnifiedExecRequest {
|
||||
@@ -65,6 +64,7 @@ impl UnifiedExecRequest {
|
||||
env: HashMap<String, String>,
|
||||
with_escalated_permissions: Option<bool>,
|
||||
justification: Option<String>,
|
||||
approval_requirement: ApprovalRequirement,
|
||||
) -> Self {
|
||||
Self {
|
||||
command,
|
||||
@@ -72,6 +72,7 @@ impl UnifiedExecRequest {
|
||||
env,
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
approval_requirement,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -129,18 +130,8 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
|
||||
})
|
||||
}
|
||||
|
||||
fn wants_initial_approval(
|
||||
&self,
|
||||
req: &UnifiedExecRequest,
|
||||
policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
) -> bool {
|
||||
requires_initial_appoval(
|
||||
policy,
|
||||
sandbox_policy,
|
||||
&req.command,
|
||||
req.with_escalated_permissions.unwrap_or(false),
|
||||
)
|
||||
fn approval_requirement(&self, req: &UnifiedExecRequest) -> Option<ApprovalRequirement> {
|
||||
Some(req.approval_requirement.clone())
|
||||
}
|
||||
|
||||
fn wants_escalated_first_attempt(&self, req: &UnifiedExecRequest) -> bool {
|
||||
|
||||
@@ -86,6 +86,34 @@ pub(crate) struct ApprovalCtx<'a> {
|
||||
pub risk: Option<SandboxCommandAssessment>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) enum ApprovalRequirement {
|
||||
Skip,
|
||||
NeedsApproval { reason: Option<String> },
|
||||
Forbidden { reason: String },
|
||||
}
|
||||
|
||||
/// Reflects the orchestrator's behavior (pre-refactor):
|
||||
/// - Never, OnFailure: do not ask
|
||||
/// - OnRequest: ask unless sandbox policy is DangerFullAccess
|
||||
/// - UnlessTrusted: always ask
|
||||
pub(crate) fn default_approval_requirement(
|
||||
policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
) -> ApprovalRequirement {
|
||||
let needs_approval = match policy {
|
||||
AskForApproval::Never | AskForApproval::OnFailure => false,
|
||||
AskForApproval::OnRequest => !matches!(sandbox_policy, SandboxPolicy::DangerFullAccess),
|
||||
AskForApproval::UnlessTrusted => true,
|
||||
};
|
||||
|
||||
if needs_approval {
|
||||
ApprovalRequirement::NeedsApproval { reason: None }
|
||||
} else {
|
||||
ApprovalRequirement::Skip
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) trait Approvable<Req> {
|
||||
type ApprovalKey: Hash + Eq + Clone + Debug + Serialize;
|
||||
|
||||
@@ -106,22 +134,11 @@ pub(crate) trait Approvable<Req> {
|
||||
matches!(policy, AskForApproval::Never)
|
||||
}
|
||||
|
||||
/// Decide whether an initial user approval should be requested before the
|
||||
/// first attempt. Defaults to the orchestrator's behavior (pre‑refactor):
|
||||
/// - Never, OnFailure: do not ask
|
||||
/// - OnRequest: ask unless sandbox policy is DangerFullAccess
|
||||
/// - UnlessTrusted: always ask
|
||||
fn wants_initial_approval(
|
||||
&self,
|
||||
_req: &Req,
|
||||
policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
) -> bool {
|
||||
match policy {
|
||||
AskForApproval::Never | AskForApproval::OnFailure => false,
|
||||
AskForApproval::OnRequest => !matches!(sandbox_policy, SandboxPolicy::DangerFullAccess),
|
||||
AskForApproval::UnlessTrusted => true,
|
||||
}
|
||||
/// Override the default approval requirement. Return `Some(_)` to specify
|
||||
/// a custom requirement, or `None` to fall back to
|
||||
/// policy-based default.
|
||||
fn approval_requirement(&self, _req: &Req) -> Option<ApprovalRequirement> {
|
||||
None
|
||||
}
|
||||
|
||||
/// Decide we can request an approval for no-sandbox execution.
|
||||
|
||||
@@ -20,6 +20,11 @@ pub enum ConfigShellToolType {
|
||||
Default,
|
||||
Local,
|
||||
UnifiedExec,
|
||||
/// Do not include a shell tool by default. Useful when using Codex
|
||||
/// with tools provided exclusively provided by MCP servers. Often used
|
||||
/// with `--config base_instructions=CUSTOM_INSTRUCTIONS`
|
||||
/// to customize agent behavior.
|
||||
Disabled,
|
||||
/// Takes a command as a single string to be run in the user's default shell.
|
||||
ShellCommand,
|
||||
}
|
||||
@@ -48,7 +53,9 @@ impl ToolsConfig {
|
||||
let include_web_search_request = features.enabled(Feature::WebSearchRequest);
|
||||
let include_view_image_tool = features.enabled(Feature::ViewImageTool);
|
||||
|
||||
let shell_type = if features.enabled(Feature::UnifiedExec) {
|
||||
let shell_type = if !features.enabled(Feature::ShellTool) {
|
||||
ConfigShellToolType::Disabled
|
||||
} else if features.enabled(Feature::UnifiedExec) {
|
||||
ConfigShellToolType::UnifiedExec
|
||||
} else if features.enabled(Feature::ShellCommandTool) {
|
||||
ConfigShellToolType::ShellCommand
|
||||
@@ -294,9 +301,26 @@ fn create_shell_tool() -> ToolSpec {
|
||||
},
|
||||
);
|
||||
|
||||
let description = if cfg!(windows) {
|
||||
r#"Runs a Powershell command (Windows) and returns its output. Arguments to `shell` will be passed to CreateProcessW(). Most commands should be prefixed with ["powershell.exe", "-Command"].
|
||||
|
||||
Examples of valid command strings:
|
||||
|
||||
- ls -a (show hidden): ["powershell.exe", "-Command", "Get-ChildItem -Force"]
|
||||
- recursive find by name: ["powershell.exe", "-Command", "Get-ChildItem -Recurse -Filter *.py"]
|
||||
- recursive grep: ["powershell.exe", "-Command", "Get-ChildItem -Path C:\\myrepo -Recurse | Select-String -Pattern 'TODO' -CaseSensitive"]
|
||||
- ps aux | grep python: ["powershell.exe", "-Command", "Get-Process | Where-Object { $_.ProcessName -like '*python*' }"]
|
||||
- setting an env var: ["powershell.exe", "-Command", "$env:FOO='bar'; echo $env:FOO"]
|
||||
- running an inline Python script: ["powershell.exe", "-Command", "@'\\nprint('Hello, world!')\\n'@ | python -"]"#
|
||||
} else {
|
||||
r#"Runs a shell command and returns its output.
|
||||
- The arguments to `shell` will be passed to execvp(). Most terminal commands should be prefixed with ["bash", "-lc"].
|
||||
- Always set the `workdir` param when using the shell function. Do not use `cd` unless absolutely necessary."#
|
||||
}.to_string();
|
||||
|
||||
ToolSpec::Function(ResponsesApiTool {
|
||||
name: "shell".to_string(),
|
||||
description: "Runs a shell command and returns its output.".to_string(),
|
||||
description,
|
||||
strict: false,
|
||||
parameters: JsonSchema::Object {
|
||||
properties,
|
||||
@@ -341,9 +365,25 @@ fn create_shell_command_tool() -> ToolSpec {
|
||||
},
|
||||
);
|
||||
|
||||
let description = if cfg!(windows) {
|
||||
r#"Runs a Powershell command (Windows) and returns its output.
|
||||
|
||||
Examples of valid command strings:
|
||||
|
||||
- ls -a (show hidden): "Get-ChildItem -Force"
|
||||
- recursive find by name: "Get-ChildItem -Recurse -Filter *.py"
|
||||
- recursive grep: "Get-ChildItem -Path C:\\myrepo -Recurse | Select-String -Pattern 'TODO' -CaseSensitive"
|
||||
- ps aux | grep python: "Get-Process | Where-Object { $_.ProcessName -like '*python*' }"
|
||||
- setting an env var: "$env:FOO='bar'; echo $env:FOO"
|
||||
- running an inline Python script: "@'\\nprint('Hello, world!')\\n'@ | python -"#
|
||||
} else {
|
||||
r#"Runs a shell command and returns its output.
|
||||
- Always set the `workdir` param when using the shell_command function. Do not use `cd` unless absolutely necessary."#
|
||||
}.to_string();
|
||||
|
||||
ToolSpec::Function(ResponsesApiTool {
|
||||
name: "shell_command".to_string(),
|
||||
description: "Runs a shell command string and returns its output.".to_string(),
|
||||
description,
|
||||
strict: false,
|
||||
parameters: JsonSchema::Object {
|
||||
properties,
|
||||
@@ -973,16 +1013,21 @@ pub(crate) fn build_specs(
|
||||
builder.register_handler("exec_command", unified_exec_handler.clone());
|
||||
builder.register_handler("write_stdin", unified_exec_handler);
|
||||
}
|
||||
ConfigShellToolType::Disabled => {
|
||||
// Do nothing.
|
||||
}
|
||||
ConfigShellToolType::ShellCommand => {
|
||||
builder.push_spec(create_shell_command_tool());
|
||||
}
|
||||
}
|
||||
|
||||
// Always register shell aliases so older prompts remain compatible.
|
||||
builder.register_handler("shell", shell_handler.clone());
|
||||
builder.register_handler("container.exec", shell_handler.clone());
|
||||
builder.register_handler("local_shell", shell_handler);
|
||||
builder.register_handler("shell_command", shell_command_handler);
|
||||
if config.shell_type != ConfigShellToolType::Disabled {
|
||||
// Always register shell aliases so older prompts remain compatible.
|
||||
builder.register_handler("shell", shell_handler.clone());
|
||||
builder.register_handler("container.exec", shell_handler.clone());
|
||||
builder.register_handler("local_shell", shell_handler);
|
||||
builder.register_handler("shell_command", shell_command_handler);
|
||||
}
|
||||
|
||||
builder.push_spec_with_parallel_support(create_list_mcp_resources_tool(), true);
|
||||
builder.push_spec_with_parallel_support(create_list_mcp_resource_templates_tool(), true);
|
||||
@@ -1118,6 +1163,7 @@ mod tests {
|
||||
ConfigShellToolType::Default => Some("shell"),
|
||||
ConfigShellToolType::Local => Some("local_shell"),
|
||||
ConfigShellToolType::UnifiedExec => None,
|
||||
ConfigShellToolType::Disabled => None,
|
||||
ConfigShellToolType::ShellCommand => Some("shell_command"),
|
||||
}
|
||||
}
|
||||
@@ -1873,8 +1919,23 @@ mod tests {
|
||||
};
|
||||
assert_eq!(name, "shell");
|
||||
|
||||
let expected = "Runs a shell command and returns its output.";
|
||||
assert_eq!(description, expected);
|
||||
let expected = if cfg!(windows) {
|
||||
r#"Runs a Powershell command (Windows) and returns its output. Arguments to `shell` will be passed to CreateProcessW(). Most commands should be prefixed with ["powershell.exe", "-Command"].
|
||||
|
||||
Examples of valid command strings:
|
||||
|
||||
- ls -a (show hidden): ["powershell.exe", "-Command", "Get-ChildItem -Force"]
|
||||
- recursive find by name: ["powershell.exe", "-Command", "Get-ChildItem -Recurse -Filter *.py"]
|
||||
- recursive grep: ["powershell.exe", "-Command", "Get-ChildItem -Path C:\\myrepo -Recurse | Select-String -Pattern 'TODO' -CaseSensitive"]
|
||||
- ps aux | grep python: ["powershell.exe", "-Command", "Get-Process | Where-Object { $_.ProcessName -like '*python*' }"]
|
||||
- setting an env var: ["powershell.exe", "-Command", "$env:FOO='bar'; echo $env:FOO"]
|
||||
- running an inline Python script: ["powershell.exe", "-Command", "@'\\nprint('Hello, world!')\\n'@ | python -"]"#
|
||||
} else {
|
||||
r#"Runs a shell command and returns its output.
|
||||
- The arguments to `shell` will be passed to execvp(). Most terminal commands should be prefixed with ["bash", "-lc"].
|
||||
- Always set the `workdir` param when using the shell function. Do not use `cd` unless absolutely necessary."#
|
||||
}.to_string();
|
||||
assert_eq!(description, &expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -1888,8 +1949,22 @@ mod tests {
|
||||
};
|
||||
assert_eq!(name, "shell_command");
|
||||
|
||||
let expected = "Runs a shell command string and returns its output.";
|
||||
assert_eq!(description, expected);
|
||||
let expected = if cfg!(windows) {
|
||||
r#"Runs a Powershell command (Windows) and returns its output.
|
||||
|
||||
Examples of valid command strings:
|
||||
|
||||
- ls -a (show hidden): "Get-ChildItem -Force"
|
||||
- recursive find by name: "Get-ChildItem -Recurse -Filter *.py"
|
||||
- recursive grep: "Get-ChildItem -Path C:\\myrepo -Recurse | Select-String -Pattern 'TODO' -CaseSensitive"
|
||||
- ps aux | grep python: "Get-Process | Where-Object { $_.ProcessName -like '*python*' }"
|
||||
- setting an env var: "$env:FOO='bar'; echo $env:FOO"
|
||||
- running an inline Python script: "@'\\nprint('Hello, world!')\\n'@ | python -"#.to_string()
|
||||
} else {
|
||||
r#"Runs a shell command and returns its output.
|
||||
- Always set the `workdir` param when using the shell_command function. Do not use `cd` unless absolutely necessary."#.to_string()
|
||||
};
|
||||
assert_eq!(description, &expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -1,8 +1,194 @@
|
||||
//! Utilities for truncating large chunks of output while preserving a prefix
|
||||
//! and suffix on UTF-8 boundaries.
|
||||
//! and suffix on UTF-8 boundaries, and helpers for line/token‑based truncation
|
||||
//! used across the core crate.
|
||||
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use codex_utils_string::take_bytes_at_char_boundary;
|
||||
use codex_utils_string::take_last_bytes_at_char_boundary;
|
||||
use codex_utils_tokenizer::Tokenizer;
|
||||
|
||||
/// Model-formatting limits: clients get full streams; only content sent to the model is truncated.
|
||||
pub const MODEL_FORMAT_MAX_BYTES: usize = 10 * 1024; // 10 KiB
|
||||
pub const MODEL_FORMAT_MAX_LINES: usize = 256; // lines
|
||||
|
||||
/// Globally truncate function output items to fit within `MODEL_FORMAT_MAX_BYTES`
|
||||
/// by preserving as many text/image items as possible and appending a summary
|
||||
/// for any omitted text items.
|
||||
pub(crate) fn globally_truncate_function_output_items(
|
||||
items: &[FunctionCallOutputContentItem],
|
||||
) -> Vec<FunctionCallOutputContentItem> {
|
||||
let mut out: Vec<FunctionCallOutputContentItem> = Vec::with_capacity(items.len());
|
||||
let mut remaining = MODEL_FORMAT_MAX_BYTES;
|
||||
let mut omitted_text_items = 0usize;
|
||||
|
||||
for it in items {
|
||||
match it {
|
||||
FunctionCallOutputContentItem::InputText { text } => {
|
||||
if remaining == 0 {
|
||||
omitted_text_items += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
let len = text.len();
|
||||
if len <= remaining {
|
||||
out.push(FunctionCallOutputContentItem::InputText { text: text.clone() });
|
||||
remaining -= len;
|
||||
} else {
|
||||
let slice = take_bytes_at_char_boundary(text, remaining);
|
||||
if !slice.is_empty() {
|
||||
out.push(FunctionCallOutputContentItem::InputText {
|
||||
text: slice.to_string(),
|
||||
});
|
||||
}
|
||||
remaining = 0;
|
||||
}
|
||||
}
|
||||
// todo(aibrahim): handle input images; resize
|
||||
FunctionCallOutputContentItem::InputImage { image_url } => {
|
||||
out.push(FunctionCallOutputContentItem::InputImage {
|
||||
image_url: image_url.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if omitted_text_items > 0 {
|
||||
out.push(FunctionCallOutputContentItem::InputText {
|
||||
text: format!("[omitted {omitted_text_items} text items ...]"),
|
||||
});
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
/// Format a block of exec/tool output for model consumption, truncating by
|
||||
/// lines and bytes while preserving head and tail segments.
|
||||
pub(crate) fn format_output_for_model_body(
|
||||
content: &str,
|
||||
limit_bytes: usize,
|
||||
limit_lines: usize,
|
||||
) -> String {
|
||||
// Head+tail truncation for the model: show the beginning and end with an elision.
|
||||
// Clients still receive full streams; only this formatted summary is capped.
|
||||
let total_lines = content.lines().count();
|
||||
if content.len() <= limit_bytes && total_lines <= limit_lines {
|
||||
return content.to_string();
|
||||
}
|
||||
let output = truncate_formatted_exec_output(content, total_lines, limit_bytes, limit_lines);
|
||||
format!("Total output lines: {total_lines}\n\n{output}")
|
||||
}
|
||||
|
||||
fn truncate_formatted_exec_output(
|
||||
content: &str,
|
||||
total_lines: usize,
|
||||
limit_bytes: usize,
|
||||
limit_lines: usize,
|
||||
) -> String {
|
||||
error_on_double_truncation(content);
|
||||
let head_lines: usize = limit_lines / 2;
|
||||
let tail_lines: usize = limit_lines - head_lines; // 128
|
||||
let head_bytes: usize = limit_bytes / 2;
|
||||
let segments: Vec<&str> = content.split_inclusive('\n').collect();
|
||||
let head_take = head_lines.min(segments.len());
|
||||
let tail_take = tail_lines.min(segments.len().saturating_sub(head_take));
|
||||
let omitted = segments.len().saturating_sub(head_take + tail_take);
|
||||
|
||||
let head_slice_end: usize = segments
|
||||
.iter()
|
||||
.take(head_take)
|
||||
.map(|segment| segment.len())
|
||||
.sum();
|
||||
let tail_slice_start: usize = if tail_take == 0 {
|
||||
content.len()
|
||||
} else {
|
||||
content.len()
|
||||
- segments
|
||||
.iter()
|
||||
.rev()
|
||||
.take(tail_take)
|
||||
.map(|segment| segment.len())
|
||||
.sum::<usize>()
|
||||
};
|
||||
let head_slice = &content[..head_slice_end];
|
||||
let tail_slice = &content[tail_slice_start..];
|
||||
let truncated_by_bytes = content.len() > limit_bytes;
|
||||
// this is a bit wrong. We are counting metadata lines and not just shell output lines.
|
||||
let marker = if omitted > 0 {
|
||||
Some(format!(
|
||||
"\n[... omitted {omitted} of {total_lines} lines ...]\n\n"
|
||||
))
|
||||
} else if truncated_by_bytes {
|
||||
Some(format!(
|
||||
"\n[... output truncated to fit {limit_bytes} bytes ...]\n\n"
|
||||
))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let marker_len = marker.as_ref().map_or(0, String::len);
|
||||
let base_head_budget = head_bytes.min(limit_bytes);
|
||||
let head_budget = base_head_budget.min(limit_bytes.saturating_sub(marker_len));
|
||||
let head_part = take_bytes_at_char_boundary(head_slice, head_budget);
|
||||
let mut result = String::with_capacity(limit_bytes.min(content.len()));
|
||||
|
||||
result.push_str(head_part);
|
||||
if let Some(marker_text) = marker.as_ref() {
|
||||
result.push_str(marker_text);
|
||||
}
|
||||
|
||||
let remaining = limit_bytes.saturating_sub(result.len());
|
||||
if remaining == 0 {
|
||||
return result;
|
||||
}
|
||||
|
||||
let tail_part = take_last_bytes_at_char_boundary(tail_slice, remaining);
|
||||
result.push_str(tail_part);
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
fn error_on_double_truncation(content: &str) {
|
||||
if content.contains("Total output lines:") && content.contains("omitted") {
|
||||
tracing::error!(
|
||||
"FunctionCallOutput content was already truncated before ContextManager::record_items; this would cause double truncation {content}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Truncate an output string to a maximum number of “tokens”, where tokens are
|
||||
/// approximated as individual `char`s. Preserves a prefix and suffix with an
|
||||
/// elision marker describing how many tokens were omitted.
|
||||
pub(crate) fn truncate_output_to_tokens(
|
||||
output: &str,
|
||||
max_tokens: usize,
|
||||
) -> (String, Option<usize>) {
|
||||
if max_tokens == 0 {
|
||||
let total_tokens = output.chars().count();
|
||||
let message = format!("…{total_tokens} tokens truncated…");
|
||||
return (message, Some(total_tokens));
|
||||
}
|
||||
|
||||
let tokens: Vec<char> = output.chars().collect();
|
||||
let total_tokens = tokens.len();
|
||||
if total_tokens <= max_tokens {
|
||||
return (output.to_string(), None);
|
||||
}
|
||||
|
||||
let half = max_tokens / 2;
|
||||
if half == 0 {
|
||||
let truncated = total_tokens.saturating_sub(max_tokens);
|
||||
let message = format!("…{truncated} tokens truncated…");
|
||||
return (message, Some(total_tokens));
|
||||
}
|
||||
|
||||
let truncated = total_tokens.saturating_sub(half * 2);
|
||||
let mut truncated_output = String::new();
|
||||
truncated_output.extend(&tokens[..half]);
|
||||
truncated_output.push_str(&format!("…{truncated} tokens truncated…"));
|
||||
truncated_output.extend(&tokens[total_tokens - half..]);
|
||||
(truncated_output, Some(total_tokens))
|
||||
}
|
||||
|
||||
/// Truncate the middle of a UTF-8 string to at most `max_bytes` bytes,
|
||||
/// preserving the beginning and the end. Returns the possibly truncated
|
||||
/// string and `Some(original_token_count)` (counted with the local tokenizer;
|
||||
@@ -136,8 +322,33 @@ pub(crate) fn truncate_middle(s: &str, max_bytes: usize) -> (String, Option<u64>
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::MODEL_FORMAT_MAX_BYTES;
|
||||
use super::MODEL_FORMAT_MAX_LINES;
|
||||
use super::format_output_for_model_body;
|
||||
use super::globally_truncate_function_output_items;
|
||||
use super::truncate_middle;
|
||||
use super::truncate_output_to_tokens;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use codex_utils_tokenizer::Tokenizer;
|
||||
use pretty_assertions::assert_eq;
|
||||
use regex_lite::Regex;
|
||||
|
||||
fn truncated_message_pattern(line: &str, total_lines: usize) -> String {
|
||||
let head_lines = MODEL_FORMAT_MAX_LINES / 2;
|
||||
let tail_lines = MODEL_FORMAT_MAX_LINES - head_lines;
|
||||
let head_take = head_lines.min(total_lines);
|
||||
let tail_take = tail_lines.min(total_lines.saturating_sub(head_take));
|
||||
let omitted = total_lines.saturating_sub(head_take + tail_take);
|
||||
let escaped_line = regex_lite::escape(line);
|
||||
if omitted == 0 {
|
||||
return format!(
|
||||
r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes \.{{3}}]\n\n.*)$",
|
||||
);
|
||||
}
|
||||
format!(
|
||||
r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} omitted {omitted} of {total_lines} lines \.{{3}}]\n\n.*)$",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_middle_no_newlines_fallback() {
|
||||
@@ -197,4 +408,208 @@ mod tests {
|
||||
assert!(out.ends_with("017\n018\n019\n020\n"));
|
||||
assert_eq!(total, Some(tok.count(&s) as u64));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_output_to_tokens_returns_original_when_under_limit() {
|
||||
let s = "short output";
|
||||
let (truncated, original) = truncate_output_to_tokens(s, 100);
|
||||
assert_eq!(truncated, s);
|
||||
assert_eq!(original, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_output_to_tokens_reports_truncation_at_zero_limit() {
|
||||
let s = "abcdef";
|
||||
let (truncated, original) = truncate_output_to_tokens(s, 0);
|
||||
assert!(truncated.contains("tokens truncated"));
|
||||
assert_eq!(original, Some(s.chars().count()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_output_to_tokens_preserves_prefix_and_suffix() {
|
||||
let s = "abcdefghijklmnopqrstuvwxyz";
|
||||
let max_tokens = 10;
|
||||
let (truncated, original) = truncate_output_to_tokens(s, max_tokens);
|
||||
assert!(truncated.starts_with("abcde"));
|
||||
assert!(truncated.ends_with("vwxyz"));
|
||||
assert_eq!(original, Some(s.chars().count()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_exec_output_truncates_large_error() {
|
||||
let line = "very long execution error line that should trigger truncation\n";
|
||||
let large_error = line.repeat(2_500); // way beyond both byte and line limits
|
||||
|
||||
let truncated = format_output_for_model_body(
|
||||
&large_error,
|
||||
MODEL_FORMAT_MAX_BYTES,
|
||||
MODEL_FORMAT_MAX_LINES,
|
||||
);
|
||||
|
||||
let total_lines = large_error.lines().count();
|
||||
let pattern = truncated_message_pattern(line, total_lines);
|
||||
let regex = Regex::new(&pattern).unwrap_or_else(|err| {
|
||||
panic!("failed to compile regex {pattern}: {err}");
|
||||
});
|
||||
let captures = regex
|
||||
.captures(&truncated)
|
||||
.unwrap_or_else(|| panic!("message failed to match pattern {pattern}: {truncated}"));
|
||||
let body = captures
|
||||
.name("body")
|
||||
.expect("missing body capture")
|
||||
.as_str();
|
||||
assert!(
|
||||
body.len() <= MODEL_FORMAT_MAX_BYTES,
|
||||
"body exceeds byte limit: {} bytes",
|
||||
body.len()
|
||||
);
|
||||
assert_ne!(truncated, large_error);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_exec_output_marks_byte_truncation_without_omitted_lines() {
|
||||
let long_line = "a".repeat(MODEL_FORMAT_MAX_BYTES + 50);
|
||||
let truncated = format_output_for_model_body(
|
||||
&long_line,
|
||||
MODEL_FORMAT_MAX_BYTES,
|
||||
MODEL_FORMAT_MAX_LINES,
|
||||
);
|
||||
|
||||
assert_ne!(truncated, long_line);
|
||||
let marker_line =
|
||||
format!("[... output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes ...]");
|
||||
assert!(
|
||||
truncated.contains(&marker_line),
|
||||
"missing byte truncation marker: {truncated}"
|
||||
);
|
||||
assert!(
|
||||
!truncated.contains("omitted"),
|
||||
"line omission marker should not appear when no lines were dropped: {truncated}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_exec_output_returns_original_when_within_limits() {
|
||||
let content = "example output\n".repeat(10);
|
||||
|
||||
assert_eq!(
|
||||
format_output_for_model_body(&content, MODEL_FORMAT_MAX_BYTES, MODEL_FORMAT_MAX_LINES),
|
||||
content
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_exec_output_reports_omitted_lines_and_keeps_head_and_tail() {
|
||||
let total_lines = MODEL_FORMAT_MAX_LINES + 100;
|
||||
let content: String = (0..total_lines)
|
||||
.map(|idx| format!("line-{idx}\n"))
|
||||
.collect();
|
||||
|
||||
let truncated =
|
||||
format_output_for_model_body(&content, MODEL_FORMAT_MAX_BYTES, MODEL_FORMAT_MAX_LINES);
|
||||
|
||||
let omitted = total_lines - MODEL_FORMAT_MAX_LINES;
|
||||
let expected_marker = format!("[... omitted {omitted} of {total_lines} lines ...]");
|
||||
|
||||
assert!(
|
||||
truncated.contains(&expected_marker),
|
||||
"missing omitted marker: {truncated}"
|
||||
);
|
||||
assert!(
|
||||
truncated.contains("line-0\n"),
|
||||
"expected head line to remain: {truncated}"
|
||||
);
|
||||
|
||||
let last_line = format!("line-{}\n", total_lines - 1);
|
||||
assert!(
|
||||
truncated.contains(&last_line),
|
||||
"expected tail line to remain: {truncated}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_exec_output_prefers_line_marker_when_both_limits_exceeded() {
|
||||
let total_lines = MODEL_FORMAT_MAX_LINES + 42;
|
||||
let long_line = "x".repeat(256);
|
||||
let content: String = (0..total_lines)
|
||||
.map(|idx| format!("line-{idx}-{long_line}\n"))
|
||||
.collect();
|
||||
|
||||
let truncated =
|
||||
format_output_for_model_body(&content, MODEL_FORMAT_MAX_BYTES, MODEL_FORMAT_MAX_LINES);
|
||||
|
||||
assert!(
|
||||
truncated.contains("[... omitted 42 of 298 lines ...]"),
|
||||
"expected omitted marker when line count exceeds limit: {truncated}"
|
||||
);
|
||||
assert!(
|
||||
!truncated.contains("output truncated to fit"),
|
||||
"line omission marker should take precedence over byte marker: {truncated}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncates_across_multiple_under_limit_texts_and_reports_omitted() {
|
||||
// Arrange: several text items, none exceeding per-item limit, but total exceeds budget.
|
||||
let budget = MODEL_FORMAT_MAX_BYTES;
|
||||
let t1_len = (budget / 2).saturating_sub(10);
|
||||
let t2_len = (budget / 2).saturating_sub(10);
|
||||
let remaining_after_t1_t2 = budget.saturating_sub(t1_len + t2_len);
|
||||
let t3_len = 50; // gets truncated to remaining_after_t1_t2
|
||||
let t4_len = 5; // omitted
|
||||
let t5_len = 7; // omitted
|
||||
|
||||
let t1 = "a".repeat(t1_len);
|
||||
let t2 = "b".repeat(t2_len);
|
||||
let t3 = "c".repeat(t3_len);
|
||||
let t4 = "d".repeat(t4_len);
|
||||
let t5 = "e".repeat(t5_len);
|
||||
|
||||
let items = vec![
|
||||
FunctionCallOutputContentItem::InputText { text: t1 },
|
||||
FunctionCallOutputContentItem::InputText { text: t2 },
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "img:mid".to_string(),
|
||||
},
|
||||
FunctionCallOutputContentItem::InputText { text: t3 },
|
||||
FunctionCallOutputContentItem::InputText { text: t4 },
|
||||
FunctionCallOutputContentItem::InputText { text: t5 },
|
||||
];
|
||||
|
||||
let output = globally_truncate_function_output_items(&items);
|
||||
|
||||
// Expect: t1 (full), t2 (full), image, t3 (truncated), summary mentioning 2 omitted.
|
||||
assert_eq!(output.len(), 5);
|
||||
|
||||
let first_text = match &output[0] {
|
||||
FunctionCallOutputContentItem::InputText { text } => text,
|
||||
other => panic!("unexpected first item: {other:?}"),
|
||||
};
|
||||
assert_eq!(first_text.len(), t1_len);
|
||||
|
||||
let second_text = match &output[1] {
|
||||
FunctionCallOutputContentItem::InputText { text } => text,
|
||||
other => panic!("unexpected second item: {other:?}"),
|
||||
};
|
||||
assert_eq!(second_text.len(), t2_len);
|
||||
|
||||
assert_eq!(
|
||||
output[2],
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "img:mid".to_string()
|
||||
}
|
||||
);
|
||||
|
||||
let fourth_text = match &output[3] {
|
||||
FunctionCallOutputContentItem::InputText { text } => text,
|
||||
other => panic!("unexpected fourth item: {other:?}"),
|
||||
};
|
||||
assert_eq!(fourth_text.len(), remaining_after_t1_t2);
|
||||
|
||||
let summary_text = match &output[4] {
|
||||
FunctionCallOutputContentItem::InputText { text } => text,
|
||||
other => panic!("unexpected summary item: {other:?}"),
|
||||
};
|
||||
assert!(summary_text.contains("omitted 2 text items"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -124,37 +124,6 @@ pub(crate) fn generate_chunk_id() -> String {
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub(crate) fn truncate_output_to_tokens(
|
||||
output: &str,
|
||||
max_tokens: usize,
|
||||
) -> (String, Option<usize>) {
|
||||
if max_tokens == 0 {
|
||||
let total_tokens = output.chars().count();
|
||||
let message = format!("…{total_tokens} tokens truncated…");
|
||||
return (message, Some(total_tokens));
|
||||
}
|
||||
|
||||
let tokens: Vec<char> = output.chars().collect();
|
||||
let total_tokens = tokens.len();
|
||||
if total_tokens <= max_tokens {
|
||||
return (output.to_string(), None);
|
||||
}
|
||||
|
||||
let half = max_tokens / 2;
|
||||
if half == 0 {
|
||||
let truncated = total_tokens.saturating_sub(max_tokens);
|
||||
let message = format!("…{truncated} tokens truncated…");
|
||||
return (message, Some(total_tokens));
|
||||
}
|
||||
|
||||
let truncated = total_tokens.saturating_sub(half * 2);
|
||||
let mut truncated_output = String::new();
|
||||
truncated_output.extend(&tokens[..half]);
|
||||
truncated_output.push_str(&format!("…{truncated} tokens truncated…"));
|
||||
truncated_output.extend(&tokens[total_tokens - half..]);
|
||||
(truncated_output, Some(total_tokens))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[cfg(unix)]
|
||||
mod tests {
|
||||
|
||||
@@ -11,6 +11,7 @@ use crate::codex::TurnContext;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::exec::StreamOutput;
|
||||
use crate::exec_env::create_env;
|
||||
use crate::exec_policy::approval_requirement_for_command;
|
||||
use crate::protocol::BackgroundEventEvent;
|
||||
use crate::protocol::EventMsg;
|
||||
use crate::protocol::ExecCommandSource;
|
||||
@@ -36,7 +37,7 @@ use super::generate_chunk_id;
|
||||
use super::resolve_max_tokens;
|
||||
use super::session::OutputBuffer;
|
||||
use super::session::UnifiedExecSession;
|
||||
use super::truncate_output_to_tokens;
|
||||
use crate::truncate::truncate_output_to_tokens;
|
||||
|
||||
impl UnifiedExecSessionManager {
|
||||
pub(crate) async fn exec_command(
|
||||
@@ -444,6 +445,13 @@ impl UnifiedExecSessionManager {
|
||||
create_env(&context.turn.shell_environment_policy),
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
approval_requirement_for_command(
|
||||
context.turn.exec_policy_v2.as_deref(),
|
||||
command,
|
||||
context.turn.approval_policy,
|
||||
&context.turn.sandbox_policy,
|
||||
with_escalated_permissions.unwrap_or(false),
|
||||
),
|
||||
);
|
||||
let tool_ctx = ToolCtx {
|
||||
session: context.session.as_ref(),
|
||||
|
||||
1
codex-rs/core/templates/compact/summary_prefix.md
Normal file
1
codex-rs/core/templates/compact/summary_prefix.md
Normal file
@@ -0,0 +1 @@
|
||||
Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:
|
||||
@@ -38,6 +38,15 @@ pub enum ApplyPatchModelOutput {
|
||||
ShellViaHeredoc,
|
||||
}
|
||||
|
||||
/// A collection of different ways the model can output an apply_patch call
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
pub enum ShellModelOutput {
|
||||
Shell,
|
||||
ShellCommand,
|
||||
LocalShell,
|
||||
// UnifiedExec has its own set of tests
|
||||
}
|
||||
|
||||
pub struct TestCodexBuilder {
|
||||
config_mutators: Vec<Box<ConfigMutator>>,
|
||||
}
|
||||
|
||||
@@ -29,7 +29,7 @@ use core_test_support::wait_for_event;
|
||||
use serde_json::json;
|
||||
use test_case::test_case;
|
||||
|
||||
async fn apply_patch_harness() -> Result<TestCodexHarness> {
|
||||
pub async fn apply_patch_harness() -> Result<TestCodexHarness> {
|
||||
apply_patch_harness_with(|_| {}).await
|
||||
}
|
||||
|
||||
@@ -43,7 +43,7 @@ async fn apply_patch_harness_with(
|
||||
.await
|
||||
}
|
||||
|
||||
async fn mount_apply_patch(
|
||||
pub async fn mount_apply_patch(
|
||||
harness: &TestCodexHarness,
|
||||
call_id: &str,
|
||||
patch: &str,
|
||||
@@ -87,8 +87,8 @@ async fn apply_patch_cli_multiple_operations_integration(
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = apply_patch_harness_with(|config| {
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is valid");
|
||||
config.model = "gpt-5.1".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is valid");
|
||||
})
|
||||
.await?;
|
||||
|
||||
@@ -671,8 +671,8 @@ async fn apply_patch_shell_heredoc_with_cd_updates_relative_workdir() -> Result<
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = apply_patch_harness_with(|config| {
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is valid");
|
||||
config.model = "gpt-5.1".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is valid");
|
||||
})
|
||||
.await?;
|
||||
|
||||
@@ -717,8 +717,8 @@ async fn apply_patch_shell_failure_propagates_error_and_skips_diff() -> Result<(
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = apply_patch_harness_with(|config| {
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is valid");
|
||||
config.model = "gpt-5.1".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is valid");
|
||||
})
|
||||
.await?;
|
||||
let test = harness.test();
|
||||
|
||||
@@ -240,6 +240,10 @@ enum Expectation {
|
||||
target: TargetPath,
|
||||
content: &'static str,
|
||||
},
|
||||
FileCreatedNoExitCode {
|
||||
target: TargetPath,
|
||||
content: &'static str,
|
||||
},
|
||||
PatchApplied {
|
||||
target: TargetPath,
|
||||
content: &'static str,
|
||||
@@ -251,12 +255,18 @@ enum Expectation {
|
||||
NetworkSuccess {
|
||||
body_contains: &'static str,
|
||||
},
|
||||
NetworkSuccessNoExitCode {
|
||||
body_contains: &'static str,
|
||||
},
|
||||
NetworkFailure {
|
||||
expect_tag: &'static str,
|
||||
},
|
||||
CommandSuccess {
|
||||
stdout_contains: &'static str,
|
||||
},
|
||||
CommandSuccessNoExitCode {
|
||||
stdout_contains: &'static str,
|
||||
},
|
||||
CommandFailure {
|
||||
output_contains: &'static str,
|
||||
},
|
||||
@@ -270,8 +280,7 @@ impl Expectation {
|
||||
assert_eq!(
|
||||
result.exit_code,
|
||||
Some(0),
|
||||
"expected successful exit for {:?}",
|
||||
path
|
||||
"expected successful exit for {path:?}"
|
||||
);
|
||||
assert!(
|
||||
result.stdout.contains(content),
|
||||
@@ -285,6 +294,21 @@ impl Expectation {
|
||||
);
|
||||
let _ = fs::remove_file(path);
|
||||
}
|
||||
Expectation::FileCreatedNoExitCode { target, content } => {
|
||||
let (path, _) = target.resolve_for_patch(test);
|
||||
assert_eq!(result.exit_code, None, "expected no exit code for {path:?}");
|
||||
assert!(
|
||||
result.stdout.contains(content),
|
||||
"stdout missing {content:?}: {}",
|
||||
result.stdout
|
||||
);
|
||||
let file_contents = fs::read_to_string(&path)?;
|
||||
assert!(
|
||||
file_contents.contains(content),
|
||||
"file contents missing {content:?}: {file_contents}"
|
||||
);
|
||||
let _ = fs::remove_file(path);
|
||||
}
|
||||
Expectation::PatchApplied { target, content } => {
|
||||
let (path, _) = target.resolve_for_patch(test);
|
||||
match result.exit_code {
|
||||
@@ -360,6 +384,23 @@ impl Expectation {
|
||||
result.stdout
|
||||
);
|
||||
}
|
||||
Expectation::NetworkSuccessNoExitCode { body_contains } => {
|
||||
assert_eq!(
|
||||
result.exit_code, None,
|
||||
"expected no exit code for successful network call: {}",
|
||||
result.stdout
|
||||
);
|
||||
assert!(
|
||||
result.stdout.contains("OK:"),
|
||||
"stdout missing OK prefix: {}",
|
||||
result.stdout
|
||||
);
|
||||
assert!(
|
||||
result.stdout.contains(body_contains),
|
||||
"stdout missing body text {body_contains:?}: {}",
|
||||
result.stdout
|
||||
);
|
||||
}
|
||||
Expectation::NetworkFailure { expect_tag } => {
|
||||
assert_ne!(
|
||||
result.exit_code,
|
||||
@@ -391,6 +432,18 @@ impl Expectation {
|
||||
result.stdout
|
||||
);
|
||||
}
|
||||
Expectation::CommandSuccessNoExitCode { stdout_contains } => {
|
||||
assert_eq!(
|
||||
result.exit_code, None,
|
||||
"expected no exit code for trusted command: {}",
|
||||
result.stdout
|
||||
);
|
||||
assert!(
|
||||
result.stdout.contains(stdout_contains),
|
||||
"trusted command stdout missing {stdout_contains:?}: {}",
|
||||
result.stdout
|
||||
);
|
||||
}
|
||||
Expectation::CommandFailure { output_contains } => {
|
||||
assert_ne!(
|
||||
result.exit_code,
|
||||
@@ -588,13 +641,30 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileCreated {
|
||||
target: TargetPath::OutsideWorkspace("dfa_on_request.txt"),
|
||||
content: "danger-on-request",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "danger_full_access_on_request_allows_outside_write_gpt_5_1_no_exit",
|
||||
approval_policy: OnRequest,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
action: ActionKind::WriteFile {
|
||||
target: TargetPath::OutsideWorkspace("dfa_on_request_5_1.txt"),
|
||||
content: "danger-on-request",
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileCreatedNoExitCode {
|
||||
target: TargetPath::OutsideWorkspace("dfa_on_request_5_1.txt"),
|
||||
content: "danger-on-request",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "danger_full_access_on_request_allows_network",
|
||||
approval_policy: OnRequest,
|
||||
@@ -605,12 +675,28 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::NetworkSuccess {
|
||||
body_contains: "danger-network-ok",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "danger_full_access_on_request_allows_network_gpt_5_1_no_exit",
|
||||
approval_policy: OnRequest,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
action: ActionKind::FetchUrl {
|
||||
endpoint: "/dfa/network",
|
||||
response_body: "danger-network-ok",
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::NetworkSuccessNoExitCode {
|
||||
body_contains: "danger-network-ok",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "trusted_command_unless_trusted_runs_without_prompt",
|
||||
approval_policy: UnlessTrusted,
|
||||
@@ -620,12 +706,27 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::CommandSuccess {
|
||||
stdout_contains: "trusted-unless",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "trusted_command_unless_trusted_runs_without_prompt_gpt_5_1_no_exit",
|
||||
approval_policy: UnlessTrusted,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
action: ActionKind::RunCommand {
|
||||
command: &["echo", "trusted-unless"],
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::CommandSuccessNoExitCode {
|
||||
stdout_contains: "trusted-unless",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "danger_full_access_on_failure_allows_outside_write",
|
||||
approval_policy: OnFailure,
|
||||
@@ -636,13 +737,30 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileCreated {
|
||||
target: TargetPath::OutsideWorkspace("dfa_on_failure.txt"),
|
||||
content: "danger-on-failure",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "danger_full_access_on_failure_allows_outside_write_gpt_5_1_no_exit",
|
||||
approval_policy: OnFailure,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
action: ActionKind::WriteFile {
|
||||
target: TargetPath::OutsideWorkspace("dfa_on_failure_5_1.txt"),
|
||||
content: "danger-on-failure",
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileCreatedNoExitCode {
|
||||
target: TargetPath::OutsideWorkspace("dfa_on_failure_5_1.txt"),
|
||||
content: "danger-on-failure",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "danger_full_access_unless_trusted_requests_approval",
|
||||
approval_policy: UnlessTrusted,
|
||||
@@ -653,7 +771,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -663,6 +781,26 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
content: "danger-unless-trusted",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "danger_full_access_unless_trusted_requests_approval_gpt_5_1_no_exit",
|
||||
approval_policy: UnlessTrusted,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
action: ActionKind::WriteFile {
|
||||
target: TargetPath::OutsideWorkspace("dfa_unless_trusted_5_1.txt"),
|
||||
content: "danger-unless-trusted",
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
},
|
||||
expectation: Expectation::FileCreatedNoExitCode {
|
||||
target: TargetPath::OutsideWorkspace("dfa_unless_trusted_5_1.txt"),
|
||||
content: "danger-unless-trusted",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "danger_full_access_never_allows_outside_write",
|
||||
approval_policy: Never,
|
||||
@@ -673,13 +811,30 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileCreated {
|
||||
target: TargetPath::OutsideWorkspace("dfa_never.txt"),
|
||||
content: "danger-never",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "danger_full_access_never_allows_outside_write_gpt_5_1_no_exit",
|
||||
approval_policy: Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
action: ActionKind::WriteFile {
|
||||
target: TargetPath::OutsideWorkspace("dfa_never_5_1.txt"),
|
||||
content: "danger-never",
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileCreatedNoExitCode {
|
||||
target: TargetPath::OutsideWorkspace("dfa_never_5_1.txt"),
|
||||
content: "danger-never",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "read_only_on_request_requires_approval",
|
||||
approval_policy: OnRequest,
|
||||
@@ -690,7 +845,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: true,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -700,6 +855,26 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
content: "read-only-approval",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "read_only_on_request_requires_approval_gpt_5_1_no_exit",
|
||||
approval_policy: OnRequest,
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
action: ActionKind::WriteFile {
|
||||
target: TargetPath::Workspace("ro_on_request_5_1.txt"),
|
||||
content: "read-only-approval",
|
||||
},
|
||||
with_escalated_permissions: true,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
},
|
||||
expectation: Expectation::FileCreatedNoExitCode {
|
||||
target: TargetPath::Workspace("ro_on_request_5_1.txt"),
|
||||
content: "read-only-approval",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "trusted_command_on_request_read_only_runs_without_prompt",
|
||||
approval_policy: OnRequest,
|
||||
@@ -709,12 +884,27 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::CommandSuccess {
|
||||
stdout_contains: "trusted-read-only",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "trusted_command_on_request_read_only_runs_without_prompt_gpt_5_1_no_exit",
|
||||
approval_policy: OnRequest,
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
action: ActionKind::RunCommand {
|
||||
command: &["echo", "trusted-read-only"],
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::CommandSuccessNoExitCode {
|
||||
stdout_contains: "trusted-read-only",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "read_only_on_request_blocks_network",
|
||||
approval_policy: OnRequest,
|
||||
@@ -760,7 +950,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: Some("command failed; retry without sandbox?"),
|
||||
@@ -770,6 +960,27 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
content: "read-only-on-failure",
|
||||
},
|
||||
},
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
ScenarioSpec {
|
||||
name: "read_only_on_failure_escalates_after_sandbox_error_gpt_5_1_no_exit",
|
||||
approval_policy: OnFailure,
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
action: ActionKind::WriteFile {
|
||||
target: TargetPath::Workspace("ro_on_failure_5_1.txt"),
|
||||
content: "read-only-on-failure",
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: Some("command failed; retry without sandbox?"),
|
||||
},
|
||||
expectation: Expectation::FileCreatedNoExitCode {
|
||||
target: TargetPath::Workspace("ro_on_failure_5_1.txt"),
|
||||
content: "read-only-on-failure",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "read_only_on_request_network_escalates_when_approved",
|
||||
approval_policy: OnRequest,
|
||||
@@ -780,7 +991,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: true,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -789,6 +1000,25 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
body_contains: "read-only-network-ok",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "read_only_on_request_network_escalates_when_approved_gpt_5_1_no_exit",
|
||||
approval_policy: OnRequest,
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
action: ActionKind::FetchUrl {
|
||||
endpoint: "/ro/network-approved",
|
||||
response_body: "read-only-network-ok",
|
||||
},
|
||||
with_escalated_permissions: true,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
},
|
||||
expectation: Expectation::NetworkSuccessNoExitCode {
|
||||
body_contains: "read-only-network-ok",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "apply_patch_shell_requires_patch_approval",
|
||||
approval_policy: UnlessTrusted,
|
||||
@@ -819,7 +1049,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5-codex"),
|
||||
model_override: Some("gpt-5.1-codex"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::PatchApplied {
|
||||
target: TargetPath::Workspace("apply_patch_function.txt"),
|
||||
@@ -836,7 +1066,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![Feature::ApplyPatchFreeform],
|
||||
model_override: Some("gpt-5-codex"),
|
||||
model_override: Some("gpt-5.1-codex"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::PatchApplied {
|
||||
target: TargetPath::OutsideWorkspace("apply_patch_function_danger.txt"),
|
||||
@@ -853,7 +1083,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5-codex"),
|
||||
model_override: Some("gpt-5.1-codex"),
|
||||
outcome: Outcome::PatchApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -873,7 +1103,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5-codex"),
|
||||
model_override: Some("gpt-5.1-codex"),
|
||||
outcome: Outcome::PatchApproval {
|
||||
decision: ReviewDecision::Denied,
|
||||
expected_reason: None,
|
||||
@@ -913,7 +1143,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5-codex"),
|
||||
model_override: Some("gpt-5.1-codex"),
|
||||
outcome: Outcome::PatchApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -933,7 +1163,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5-codex"),
|
||||
model_override: Some("gpt-5.1-codex"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileNotCreated {
|
||||
target: TargetPath::OutsideWorkspace("apply_patch_function_never.txt"),
|
||||
@@ -952,7 +1182,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -962,6 +1192,26 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
content: "read-only-unless-trusted",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "read_only_unless_trusted_requires_approval_gpt_5_1_no_exit",
|
||||
approval_policy: UnlessTrusted,
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
action: ActionKind::WriteFile {
|
||||
target: TargetPath::Workspace("ro_unless_trusted_5_1.txt"),
|
||||
content: "read-only-unless-trusted",
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
},
|
||||
expectation: Expectation::FileCreatedNoExitCode {
|
||||
target: TargetPath::Workspace("ro_unless_trusted_5_1.txt"),
|
||||
content: "read-only-unless-trusted",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "read_only_never_reports_sandbox_failure",
|
||||
approval_policy: Never,
|
||||
@@ -992,7 +1242,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::CommandSuccess {
|
||||
stdout_contains: "trusted-never",
|
||||
@@ -1008,7 +1258,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileCreated {
|
||||
target: TargetPath::Workspace("ww_on_request.txt"),
|
||||
@@ -1039,7 +1289,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: true,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -1059,7 +1309,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::NetworkSuccess {
|
||||
body_contains: "workspace-network-ok",
|
||||
@@ -1076,7 +1326,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: Some("command failed; retry without sandbox?"),
|
||||
@@ -1096,7 +1346,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -1137,7 +1387,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![Feature::UnifiedExec],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::CommandSuccess {
|
||||
stdout_contains: "hello unified exec",
|
||||
@@ -1155,7 +1405,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: true,
|
||||
features: vec![Feature::UnifiedExec],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: Some(DEFAULT_UNIFIED_EXEC_JUSTIFICATION),
|
||||
@@ -1208,7 +1458,7 @@ async fn run_scenario(scenario: &ScenarioSpec) -> Result<()> {
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.approval_policy = approval_policy;
|
||||
config.sandbox_policy = sandbox_policy.clone();
|
||||
let model = model_override.unwrap_or("gpt-5");
|
||||
let model = model_override.unwrap_or("gpt-5.1");
|
||||
config.model = model.to_string();
|
||||
config.model_family =
|
||||
find_family_for_model(model).expect("model should map to a known family");
|
||||
|
||||
@@ -769,7 +769,7 @@ async fn configured_verbosity_not_sent_for_models_without_support() -> anyhow::R
|
||||
|
||||
let resp_mock = responses::mount_sse_once(&server, sse_completed("resp1")).await;
|
||||
let TestCodex { codex, .. } = test_codex()
|
||||
.with_model("gpt-5-codex")
|
||||
.with_model("gpt-5.1-codex")
|
||||
.with_config(|config| {
|
||||
config.model_verbosity = Some(Verbosity::High);
|
||||
})
|
||||
@@ -807,7 +807,7 @@ async fn configured_verbosity_is_sent() -> anyhow::Result<()> {
|
||||
|
||||
let resp_mock = responses::mount_sse_once(&server, sse_completed("resp1")).await;
|
||||
let TestCodex { codex, .. } = test_codex()
|
||||
.with_model("gpt-5")
|
||||
.with_model("gpt-5.1")
|
||||
.with_config(|config| {
|
||||
config.model_verbosity = Some(Verbosity::High);
|
||||
})
|
||||
@@ -1155,7 +1155,7 @@ async fn token_count_includes_rate_limits_snapshot() {
|
||||
"reasoning_output_tokens": 0,
|
||||
"total_tokens": 123
|
||||
},
|
||||
// Default model is gpt-5-codex in tests → 95% usable context window
|
||||
// Default model is gpt-5.1-codex in tests → 95% usable context window
|
||||
"model_context_window": 258400
|
||||
},
|
||||
"rate_limits": {
|
||||
@@ -1304,8 +1304,9 @@ async fn context_window_error_sets_total_tokens_to_model_window() -> anyhow::Res
|
||||
|
||||
let TestCodex { codex, .. } = test_codex()
|
||||
.with_config(|config| {
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("known gpt-5 model family");
|
||||
config.model = "gpt-5.1".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5.1").expect("known gpt-5.1 model family");
|
||||
config.model_context_window = Some(272_000);
|
||||
})
|
||||
.build(&server)
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
#![allow(clippy::expect_used)]
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::ConversationManager;
|
||||
use codex_core::ModelProviderInfo;
|
||||
use codex_core::NewConversation;
|
||||
use codex_core::built_in_model_providers;
|
||||
use codex_core::compact::SUMMARIZATION_PROMPT;
|
||||
use codex_core::compact::SUMMARY_PREFIX;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::protocol::ErrorEvent;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::RolloutItem;
|
||||
@@ -12,7 +14,10 @@ use codex_core::protocol::RolloutLine;
|
||||
use codex_core::protocol::WarningEvent;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::load_default_config_for_test;
|
||||
use core_test_support::responses::ev_local_shell_call;
|
||||
use core_test_support::responses::ev_reasoning_item;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use core_test_support::wait_for_event_match;
|
||||
use std::collections::VecDeque;
|
||||
@@ -38,7 +43,6 @@ const THIRD_USER_MSG: &str = "next turn";
|
||||
const AUTO_SUMMARY_TEXT: &str = "AUTO_SUMMARY";
|
||||
const FIRST_AUTO_MSG: &str = "token limit start";
|
||||
const SECOND_AUTO_MSG: &str = "token limit push";
|
||||
const STILL_TOO_BIG_REPLY: &str = "STILL_TOO_BIG";
|
||||
const MULTI_AUTO_MSG: &str = "multi auto";
|
||||
const SECOND_LARGE_REPLY: &str = "SECOND_LARGE_REPLY";
|
||||
const FIRST_AUTO_SUMMARY: &str = "FIRST_AUTO_SUMMARY";
|
||||
@@ -50,10 +54,6 @@ const DUMMY_FUNCTION_NAME: &str = "unsupported_tool";
|
||||
const DUMMY_CALL_ID: &str = "call-multi-auto";
|
||||
const FUNCTION_CALL_LIMIT_MSG: &str = "function call limit push";
|
||||
const POST_AUTO_USER_MSG: &str = "post auto follow-up";
|
||||
const COMPACT_PROMPT_MARKER: &str =
|
||||
"You are performing a CONTEXT CHECKPOINT COMPACTION for a tool.";
|
||||
pub(super) const TEST_COMPACT_PROMPT: &str =
|
||||
"You are performing a CONTEXT CHECKPOINT COMPACTION for a tool.\nTest-only compact prompt.";
|
||||
|
||||
pub(super) const COMPACT_WARNING_MESSAGE: &str = "Heads up: Long conversations and multiple compactions can cause the model to be less accurate. Start a new conversation when possible to keep conversations small and targeted.";
|
||||
|
||||
@@ -61,6 +61,10 @@ fn auto_summary(summary: &str) -> String {
|
||||
summary.to_string()
|
||||
}
|
||||
|
||||
fn summary_with_prefix(summary: &str) -> String {
|
||||
format!("{SUMMARY_PREFIX}\n{summary}")
|
||||
}
|
||||
|
||||
fn drop_call_id(value: &mut serde_json::Value) {
|
||||
match value {
|
||||
serde_json::Value::Object(obj) => {
|
||||
@@ -79,7 +83,18 @@ fn drop_call_id(value: &mut serde_json::Value) {
|
||||
}
|
||||
|
||||
fn set_test_compact_prompt(config: &mut Config) {
|
||||
config.compact_prompt = Some(TEST_COMPACT_PROMPT.to_string());
|
||||
config.compact_prompt = Some(SUMMARIZATION_PROMPT.to_string());
|
||||
}
|
||||
|
||||
fn body_contains_text(body: &str, text: &str) -> bool {
|
||||
body.contains(&json_fragment(text))
|
||||
}
|
||||
|
||||
fn json_fragment(text: &str) -> String {
|
||||
serde_json::to_string(text)
|
||||
.expect("serialize text to JSON")
|
||||
.trim_matches('"')
|
||||
.to_string()
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
@@ -107,13 +122,13 @@ async fn summarize_context_three_requests_and_instructions() {
|
||||
// Mount three expectations, one per request, matched by body content.
|
||||
let first_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains("\"text\":\"hello world\"") && !body.contains(COMPACT_PROMPT_MARKER)
|
||||
body.contains("\"text\":\"hello world\"") && !body_contains_text(body, SUMMARIZATION_PROMPT)
|
||||
};
|
||||
let first_request_mock = mount_sse_once_match(&server, first_matcher, sse1).await;
|
||||
|
||||
let second_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(COMPACT_PROMPT_MARKER)
|
||||
body_contains_text(body, SUMMARIZATION_PROMPT)
|
||||
};
|
||||
let second_request_mock = mount_sse_once_match(&server, second_matcher, sse2).await;
|
||||
|
||||
@@ -197,7 +212,7 @@ async fn summarize_context_three_requests_and_instructions() {
|
||||
assert_eq!(last2.get("role").unwrap().as_str().unwrap(), "user");
|
||||
let text2 = last2["content"][0]["text"].as_str().unwrap();
|
||||
assert_eq!(
|
||||
text2, TEST_COMPACT_PROMPT,
|
||||
text2, SUMMARIZATION_PROMPT,
|
||||
"expected summarize trigger, got `{text2}`"
|
||||
);
|
||||
|
||||
@@ -210,6 +225,7 @@ async fn summarize_context_three_requests_and_instructions() {
|
||||
);
|
||||
|
||||
let mut messages: Vec<(String, String)> = Vec::new();
|
||||
let expected_summary_message = summary_with_prefix(SUMMARY_TEXT);
|
||||
|
||||
for item in input3 {
|
||||
if let Some("message") = item.get("type").and_then(|v| v.as_str()) {
|
||||
@@ -248,13 +264,13 @@ async fn summarize_context_three_requests_and_instructions() {
|
||||
assert!(
|
||||
messages
|
||||
.iter()
|
||||
.any(|(r, t)| r == "user" && t == SUMMARY_TEXT),
|
||||
.any(|(r, t)| r == "user" && t == &expected_summary_message),
|
||||
"third request should include the summary message"
|
||||
);
|
||||
assert!(
|
||||
!messages
|
||||
.iter()
|
||||
.any(|(_, text)| text.contains(TEST_COMPACT_PROMPT)),
|
||||
.any(|(_, text)| text.contains(SUMMARIZATION_PROMPT)),
|
||||
"third request should not include the summarize trigger"
|
||||
);
|
||||
|
||||
@@ -285,7 +301,7 @@ async fn summarize_context_three_requests_and_instructions() {
|
||||
api_turn_count += 1;
|
||||
}
|
||||
RolloutItem::Compacted(ci) => {
|
||||
if ci.message == SUMMARY_TEXT {
|
||||
if ci.message == expected_summary_message {
|
||||
saw_compacted_summary = true;
|
||||
}
|
||||
}
|
||||
@@ -358,7 +374,7 @@ async fn manual_compact_uses_custom_prompt() {
|
||||
if text == custom_prompt {
|
||||
found_custom_prompt = true;
|
||||
}
|
||||
if text == TEST_COMPACT_PROMPT {
|
||||
if text == SUMMARIZATION_PROMPT {
|
||||
found_default_prompt = true;
|
||||
}
|
||||
}
|
||||
@@ -433,6 +449,514 @@ async fn manual_compact_emits_estimated_token_usage_event() {
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() {
|
||||
skip_if_no_network!();
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let codex = test_codex()
|
||||
.build(&server)
|
||||
.await
|
||||
.expect("build codex")
|
||||
.codex;
|
||||
|
||||
// user message
|
||||
let user_message = "create an app";
|
||||
|
||||
// Prepare the mock responses from the model
|
||||
|
||||
// summary texts from model
|
||||
let first_summary_text = "The task is to create an app. I started to create a react app.";
|
||||
let second_summary_text = "The task is to create an app. I started to create a react app. then I realized that I need to create a node app.";
|
||||
let third_summary_text = "The task is to create an app. I started to create a react app. then I realized that I need to create a node app. then I realized that I need to create a python app.";
|
||||
// summary texts with prefix
|
||||
let prefixed_first_summary = summary_with_prefix(first_summary_text);
|
||||
let prefixed_second_summary = summary_with_prefix(second_summary_text);
|
||||
let prefixed_third_summary = summary_with_prefix(third_summary_text);
|
||||
// token used count after long work
|
||||
let token_count_used = 270_000;
|
||||
// token used count after compaction
|
||||
let token_count_used_after_compaction = 80000;
|
||||
|
||||
// mock responses from the model
|
||||
|
||||
// first chunk of work
|
||||
let model_reasoning_response_1_sse = sse(vec![
|
||||
ev_reasoning_item("m1", &["I will create a react app"], &[]),
|
||||
ev_local_shell_call("r1-shell", "completed", vec!["echo", "make-react"]),
|
||||
ev_completed_with_tokens("r1", token_count_used),
|
||||
]);
|
||||
|
||||
// first compaction response
|
||||
let model_compact_response_1_sse = sse(vec![
|
||||
ev_assistant_message("m2", first_summary_text),
|
||||
ev_completed_with_tokens("r2", token_count_used_after_compaction),
|
||||
]);
|
||||
|
||||
// second chunk of work
|
||||
let model_reasoning_response_2_sse = sse(vec![
|
||||
ev_reasoning_item("m3", &["I will create a node app"], &[]),
|
||||
ev_local_shell_call("r3-shell", "completed", vec!["echo", "make-node"]),
|
||||
ev_completed_with_tokens("r3", token_count_used),
|
||||
]);
|
||||
|
||||
// second compaction response
|
||||
let model_compact_response_2_sse = sse(vec![
|
||||
ev_assistant_message("m4", second_summary_text),
|
||||
ev_completed_with_tokens("r4", token_count_used_after_compaction),
|
||||
]);
|
||||
|
||||
// third chunk of work
|
||||
let model_reasoning_response_3_sse = sse(vec![
|
||||
ev_reasoning_item("m6", &["I will create a python app"], &[]),
|
||||
ev_local_shell_call("r6-shell", "completed", vec!["echo", "make-python"]),
|
||||
ev_completed_with_tokens("r6", token_count_used),
|
||||
]);
|
||||
|
||||
// third compaction response
|
||||
let model_compact_response_3_sse = sse(vec![
|
||||
ev_assistant_message("m7", third_summary_text),
|
||||
ev_completed_with_tokens("r7", token_count_used_after_compaction),
|
||||
]);
|
||||
|
||||
// final response
|
||||
let model_final_response_sse = sse(vec![
|
||||
ev_assistant_message(
|
||||
"m8",
|
||||
"The task is to create an app. I started to create a react app. then I realized that I need to create a node app. then I realized that I need to create a python app.",
|
||||
),
|
||||
ev_completed_with_tokens("r8", token_count_used_after_compaction + 1000),
|
||||
]);
|
||||
|
||||
// mount the mock responses from the model
|
||||
let bodies = vec![
|
||||
model_reasoning_response_1_sse,
|
||||
model_compact_response_1_sse,
|
||||
model_reasoning_response_2_sse,
|
||||
model_compact_response_2_sse,
|
||||
model_reasoning_response_3_sse,
|
||||
model_compact_response_3_sse,
|
||||
model_final_response_sse,
|
||||
];
|
||||
mount_sse_sequence(&server, bodies).await;
|
||||
|
||||
// Start the conversation with the user message
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: user_message.into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.expect("submit user input");
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
// collect the requests payloads from the model
|
||||
let requests_payloads = server.received_requests().await.unwrap();
|
||||
|
||||
let body = requests_payloads[0]
|
||||
.body_json::<serde_json::Value>()
|
||||
.unwrap();
|
||||
let input = body.get("input").and_then(|v| v.as_array()).unwrap();
|
||||
let environment_message = input[0]["content"][0]["text"].as_str().unwrap();
|
||||
|
||||
// test 1: after compaction, we should have one environment message, one user message, and one user message with summary prefix
|
||||
let compaction_indices = [2, 4, 6];
|
||||
let expected_summaries = [
|
||||
prefixed_first_summary.as_str(),
|
||||
prefixed_second_summary.as_str(),
|
||||
prefixed_third_summary.as_str(),
|
||||
];
|
||||
for (i, expected_summary) in compaction_indices.into_iter().zip(expected_summaries) {
|
||||
let body = requests_payloads.clone()[i]
|
||||
.body_json::<serde_json::Value>()
|
||||
.unwrap();
|
||||
let input = body.get("input").and_then(|v| v.as_array()).unwrap();
|
||||
assert_eq!(input.len(), 3);
|
||||
let environment_message = input[0]["content"][0]["text"].as_str().unwrap();
|
||||
let user_message_received = input[1]["content"][0]["text"].as_str().unwrap();
|
||||
let summary_message = input[2]["content"][0]["text"].as_str().unwrap();
|
||||
assert_eq!(environment_message, environment_message);
|
||||
assert_eq!(user_message_received, user_message);
|
||||
assert_eq!(
|
||||
summary_message, expected_summary,
|
||||
"compaction request at index {i} should include the prefixed summary"
|
||||
);
|
||||
}
|
||||
|
||||
// test 2: the expected requests inputs should be as follows:
|
||||
let expected_requests_inputs = json!([
|
||||
[
|
||||
// 0: first request of the user message.
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": environment_message,
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
},
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": "create an app",
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
}
|
||||
]
|
||||
,
|
||||
[
|
||||
// 1: first automatic compaction request.
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": environment_message,
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
},
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": "create an app",
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
},
|
||||
{
|
||||
"content": null,
|
||||
"encrypted_content": null,
|
||||
"summary": [
|
||||
{
|
||||
"text": "I will create a react app",
|
||||
"type": "summary_text"
|
||||
}
|
||||
],
|
||||
"type": "reasoning"
|
||||
},
|
||||
{
|
||||
"action": {
|
||||
"command": [
|
||||
"echo",
|
||||
"make-react"
|
||||
],
|
||||
"env": null,
|
||||
"timeout_ms": null,
|
||||
"type": "exec",
|
||||
"user": null,
|
||||
"working_directory": null
|
||||
},
|
||||
"call_id": "r1-shell",
|
||||
"status": "completed",
|
||||
"type": "local_shell_call"
|
||||
},
|
||||
{
|
||||
"call_id": "r1-shell",
|
||||
"output": "execution error: Io(Os { code: 2, kind: NotFound, message: \"No such file or directory\" })",
|
||||
"type": "function_call_output"
|
||||
},
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": SUMMARIZATION_PROMPT,
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
}
|
||||
]
|
||||
,
|
||||
[
|
||||
// 2: request after first automatic compaction.
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": environment_message,
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
},
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": "create an app",
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
},
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": prefixed_first_summary.clone(),
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
}
|
||||
]
|
||||
,
|
||||
[
|
||||
// 3: request for second automatic compaction.
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": environment_message,
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
},
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": "create an app",
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
},
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": prefixed_first_summary.clone(),
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
},
|
||||
{
|
||||
"content": null,
|
||||
"encrypted_content": null,
|
||||
"summary": [
|
||||
{
|
||||
"text": "I will create a node app",
|
||||
"type": "summary_text"
|
||||
}
|
||||
],
|
||||
"type": "reasoning"
|
||||
},
|
||||
{
|
||||
"action": {
|
||||
"command": [
|
||||
"echo",
|
||||
"make-node"
|
||||
],
|
||||
"env": null,
|
||||
"timeout_ms": null,
|
||||
"type": "exec",
|
||||
"user": null,
|
||||
"working_directory": null
|
||||
},
|
||||
"call_id": "r3-shell",
|
||||
"status": "completed",
|
||||
"type": "local_shell_call"
|
||||
},
|
||||
{
|
||||
"call_id": "r3-shell",
|
||||
"output": "execution error: Io(Os { code: 2, kind: NotFound, message: \"No such file or directory\" })",
|
||||
"type": "function_call_output"
|
||||
},
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": SUMMARIZATION_PROMPT,
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
}
|
||||
]
|
||||
,
|
||||
// 4: request after second automatic compaction.
|
||||
[
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": environment_message,
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
},
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": "create an app",
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
},
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": prefixed_second_summary.clone(),
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
}
|
||||
]
|
||||
,
|
||||
[
|
||||
// 5: request for third automatic compaction.
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": environment_message,
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
},
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": "create an app",
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
},
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": prefixed_second_summary.clone(),
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
},
|
||||
{
|
||||
"content": null,
|
||||
"encrypted_content": null,
|
||||
"summary": [
|
||||
{
|
||||
"text": "I will create a python app",
|
||||
"type": "summary_text"
|
||||
}
|
||||
],
|
||||
"type": "reasoning"
|
||||
},
|
||||
{
|
||||
"action": {
|
||||
"command": [
|
||||
"echo",
|
||||
"make-python"
|
||||
],
|
||||
"env": null,
|
||||
"timeout_ms": null,
|
||||
"type": "exec",
|
||||
"user": null,
|
||||
"working_directory": null
|
||||
},
|
||||
"call_id": "r6-shell",
|
||||
"status": "completed",
|
||||
"type": "local_shell_call"
|
||||
},
|
||||
{
|
||||
"call_id": "r6-shell",
|
||||
"output": "execution error: Io(Os { code: 2, kind: NotFound, message: \"No such file or directory\" })",
|
||||
"type": "function_call_output"
|
||||
},
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": SUMMARIZATION_PROMPT,
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
}
|
||||
]
|
||||
,
|
||||
[
|
||||
{
|
||||
// 6: request after third automatic compaction.
|
||||
"content": [
|
||||
{
|
||||
"text": environment_message,
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
},
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": "create an app",
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
},
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": prefixed_third_summary.clone(),
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
}
|
||||
]
|
||||
]);
|
||||
|
||||
// ignore local shell calls output because it differs from OS to another and it's out of the scope of this test.
|
||||
fn normalize_inputs(values: &[serde_json::Value]) -> Vec<serde_json::Value> {
|
||||
values
|
||||
.iter()
|
||||
.filter(|value| {
|
||||
value
|
||||
.get("type")
|
||||
.and_then(|ty| ty.as_str())
|
||||
.is_none_or(|ty| ty != "function_call_output")
|
||||
})
|
||||
.cloned()
|
||||
.collect()
|
||||
}
|
||||
|
||||
for (i, request) in requests_payloads.iter().enumerate() {
|
||||
let body = request.body_json::<serde_json::Value>().unwrap();
|
||||
let input = body.get("input").and_then(|v| v.as_array()).unwrap();
|
||||
let expected_input = expected_requests_inputs[i].as_array().unwrap();
|
||||
assert_eq!(normalize_inputs(input), normalize_inputs(expected_input));
|
||||
}
|
||||
|
||||
// test 3: the number of requests should be 7
|
||||
assert_eq!(requests_payloads.len(), 7);
|
||||
}
|
||||
|
||||
// Windows CI only: bump to 4 workers to prevent SSE/event starvation and test timeouts.
|
||||
#[cfg_attr(windows, tokio::test(flavor = "multi_thread", worker_threads = 4))]
|
||||
#[cfg_attr(not(windows), tokio::test(flavor = "multi_thread", worker_threads = 2))]
|
||||
@@ -460,12 +984,13 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
ev_assistant_message("m4", FINAL_REPLY),
|
||||
ev_completed_with_tokens("r4", 120),
|
||||
]);
|
||||
let prefixed_auto_summary = AUTO_SUMMARY_TEXT;
|
||||
|
||||
let first_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(FIRST_AUTO_MSG)
|
||||
&& !body.contains(SECOND_AUTO_MSG)
|
||||
&& !body.contains(COMPACT_PROMPT_MARKER)
|
||||
&& !body_contains_text(body, SUMMARIZATION_PROMPT)
|
||||
};
|
||||
mount_sse_once_match(&server, first_matcher, sse1).await;
|
||||
|
||||
@@ -473,27 +998,28 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(SECOND_AUTO_MSG)
|
||||
&& body.contains(FIRST_AUTO_MSG)
|
||||
&& !body.contains(COMPACT_PROMPT_MARKER)
|
||||
&& !body_contains_text(body, SUMMARIZATION_PROMPT)
|
||||
};
|
||||
mount_sse_once_match(&server, second_matcher, sse2).await;
|
||||
|
||||
let third_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(COMPACT_PROMPT_MARKER)
|
||||
body_contains_text(body, SUMMARIZATION_PROMPT)
|
||||
};
|
||||
mount_sse_once_match(&server, third_matcher, sse3).await;
|
||||
|
||||
let resume_matcher = |req: &wiremock::Request| {
|
||||
let resume_marker = prefixed_auto_summary;
|
||||
let resume_matcher = move |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(AUTO_SUMMARY_TEXT)
|
||||
&& !body.contains(COMPACT_PROMPT_MARKER)
|
||||
body.contains(resume_marker)
|
||||
&& !body_contains_text(body, SUMMARIZATION_PROMPT)
|
||||
&& !body.contains(POST_AUTO_USER_MSG)
|
||||
};
|
||||
mount_sse_once_match(&server, resume_matcher, sse_resume).await;
|
||||
|
||||
let fourth_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(POST_AUTO_USER_MSG) && !body.contains(COMPACT_PROMPT_MARKER)
|
||||
body.contains(POST_AUTO_USER_MSG) && !body_contains_text(body, SUMMARIZATION_PROMPT)
|
||||
};
|
||||
mount_sse_once_match(&server, fourth_matcher, sse4).await;
|
||||
|
||||
@@ -555,9 +1081,10 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
requests.len()
|
||||
);
|
||||
let is_auto_compact = |req: &wiremock::Request| {
|
||||
std::str::from_utf8(&req.body)
|
||||
.unwrap_or("")
|
||||
.contains(COMPACT_PROMPT_MARKER)
|
||||
body_contains_text(
|
||||
std::str::from_utf8(&req.body).unwrap_or(""),
|
||||
SUMMARIZATION_PROMPT,
|
||||
)
|
||||
};
|
||||
let auto_compact_count = requests.iter().filter(|req| is_auto_compact(req)).count();
|
||||
assert_eq!(
|
||||
@@ -574,13 +1101,14 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
"auto compact should add a third request"
|
||||
);
|
||||
|
||||
let resume_summary_marker = prefixed_auto_summary;
|
||||
let resume_index = requests
|
||||
.iter()
|
||||
.enumerate()
|
||||
.find_map(|(idx, req)| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
(body.contains(AUTO_SUMMARY_TEXT)
|
||||
&& !body.contains(COMPACT_PROMPT_MARKER)
|
||||
(body.contains(resume_summary_marker)
|
||||
&& !body_contains_text(body, SUMMARIZATION_PROMPT)
|
||||
&& !body.contains(POST_AUTO_USER_MSG))
|
||||
.then_some(idx)
|
||||
})
|
||||
@@ -592,7 +1120,7 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
.rev()
|
||||
.find_map(|(idx, req)| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
(body.contains(POST_AUTO_USER_MSG) && !body.contains(COMPACT_PROMPT_MARKER))
|
||||
(body.contains(POST_AUTO_USER_MSG) && !body_contains_text(body, SUMMARIZATION_PROMPT))
|
||||
.then_some(idx)
|
||||
})
|
||||
.expect("follow-up request missing");
|
||||
@@ -639,7 +1167,7 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
.and_then(|text| text.as_str())
|
||||
.unwrap_or_default();
|
||||
assert_eq!(
|
||||
last_text, TEST_COMPACT_PROMPT,
|
||||
last_text, SUMMARIZATION_PROMPT,
|
||||
"auto compact should send the summarization prompt as a user message",
|
||||
);
|
||||
|
||||
@@ -654,7 +1182,8 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
.and_then(|arr| arr.first())
|
||||
.and_then(|entry| entry.get("text"))
|
||||
.and_then(|v| v.as_str())
|
||||
== Some(AUTO_SUMMARY_TEXT)
|
||||
.map(|text| text.contains(prefixed_auto_summary))
|
||||
.unwrap_or(false)
|
||||
}),
|
||||
"resume request should include compacted history"
|
||||
);
|
||||
@@ -689,7 +1218,9 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
"auto compact follow-up request should include the new user message"
|
||||
);
|
||||
assert!(
|
||||
user_texts.iter().any(|text| text == AUTO_SUMMARY_TEXT),
|
||||
user_texts
|
||||
.iter()
|
||||
.any(|text| text.contains(prefixed_auto_summary)),
|
||||
"auto compact follow-up request should include the summary message"
|
||||
);
|
||||
}
|
||||
@@ -720,7 +1251,7 @@ async fn auto_compact_persists_rollout_entries() {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(FIRST_AUTO_MSG)
|
||||
&& !body.contains(SECOND_AUTO_MSG)
|
||||
&& !body.contains(COMPACT_PROMPT_MARKER)
|
||||
&& !body_contains_text(body, SUMMARIZATION_PROMPT)
|
||||
};
|
||||
mount_sse_once_match(&server, first_matcher, sse1).await;
|
||||
|
||||
@@ -728,13 +1259,13 @@ async fn auto_compact_persists_rollout_entries() {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(SECOND_AUTO_MSG)
|
||||
&& body.contains(FIRST_AUTO_MSG)
|
||||
&& !body.contains(COMPACT_PROMPT_MARKER)
|
||||
&& !body_contains_text(body, SUMMARIZATION_PROMPT)
|
||||
};
|
||||
mount_sse_once_match(&server, second_matcher, sse2).await;
|
||||
|
||||
let third_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(COMPACT_PROMPT_MARKER)
|
||||
body_contains_text(body, SUMMARIZATION_PROMPT)
|
||||
};
|
||||
mount_sse_once_match(&server, third_matcher, sse3).await;
|
||||
|
||||
@@ -809,112 +1340,6 @@ async fn auto_compact_persists_rollout_entries() {
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn auto_compact_stops_after_failed_attempt() {
|
||||
skip_if_no_network!();
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let sse1 = sse(vec![
|
||||
ev_assistant_message("m1", FIRST_REPLY),
|
||||
ev_completed_with_tokens("r1", 500),
|
||||
]);
|
||||
|
||||
let summary_payload = auto_summary(SUMMARY_TEXT);
|
||||
let sse2 = sse(vec![
|
||||
ev_assistant_message("m2", &summary_payload),
|
||||
ev_completed_with_tokens("r2", 50),
|
||||
]);
|
||||
|
||||
let sse3 = sse(vec![
|
||||
ev_assistant_message("m3", STILL_TOO_BIG_REPLY),
|
||||
ev_completed_with_tokens("r3", 500),
|
||||
]);
|
||||
|
||||
let first_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(FIRST_AUTO_MSG) && !body.contains(COMPACT_PROMPT_MARKER)
|
||||
};
|
||||
mount_sse_once_match(&server, first_matcher, sse1.clone()).await;
|
||||
|
||||
let second_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(COMPACT_PROMPT_MARKER)
|
||||
};
|
||||
mount_sse_once_match(&server, second_matcher, sse2.clone()).await;
|
||||
|
||||
let third_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
!body.contains(COMPACT_PROMPT_MARKER) && body.contains(SUMMARY_TEXT)
|
||||
};
|
||||
mount_sse_once_match(&server, third_matcher, sse3.clone()).await;
|
||||
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
};
|
||||
|
||||
let home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
config.model_provider = model_provider;
|
||||
set_test_compact_prompt(&mut config);
|
||||
config.model_auto_compact_token_limit = Some(200);
|
||||
let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
|
||||
let codex = conversation_manager
|
||||
.new_conversation(config)
|
||||
.await
|
||||
.unwrap()
|
||||
.conversation;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: FIRST_AUTO_MSG.into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let error_event = wait_for_event(&codex, |ev| matches!(ev, EventMsg::Error(_))).await;
|
||||
let EventMsg::Error(ErrorEvent { message }) = error_event else {
|
||||
panic!("expected error event");
|
||||
};
|
||||
assert!(
|
||||
message.contains("limit"),
|
||||
"error message should include limit information: {message}"
|
||||
);
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let requests = server.received_requests().await.unwrap();
|
||||
assert_eq!(
|
||||
requests.len(),
|
||||
3,
|
||||
"auto compact should attempt at most one summarization before erroring"
|
||||
);
|
||||
|
||||
let last_body = requests[2].body_json::<serde_json::Value>().unwrap();
|
||||
let input = last_body
|
||||
.get("input")
|
||||
.and_then(|v| v.as_array())
|
||||
.unwrap_or_else(|| panic!("unexpected request format: {last_body}"));
|
||||
let contains_prompt = input.iter().any(|item| {
|
||||
item.get("type").and_then(|v| v.as_str()) == Some("message")
|
||||
&& item.get("role").and_then(|v| v.as_str()) == Some("user")
|
||||
&& item
|
||||
.get("content")
|
||||
.and_then(|v| v.as_array())
|
||||
.and_then(|items| items.first())
|
||||
.and_then(|entry| entry.get("text"))
|
||||
.and_then(|text| text.as_str())
|
||||
.map(|text| text == TEST_COMPACT_PROMPT)
|
||||
.unwrap_or(false)
|
||||
});
|
||||
assert!(
|
||||
!contains_prompt,
|
||||
"third request should be the follow-up turn, not another summarization",
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn manual_compact_retries_after_context_window_error() {
|
||||
skip_if_no_network!();
|
||||
@@ -1013,7 +1438,7 @@ async fn manual_compact_retries_after_context_window_error() {
|
||||
.and_then(|items| items.first())
|
||||
.and_then(|entry| entry.get("text"))
|
||||
.and_then(|text| text.as_str()),
|
||||
Some(TEST_COMPACT_PROMPT),
|
||||
Some(SUMMARIZATION_PROMPT),
|
||||
"compact attempt should include summarization prompt"
|
||||
);
|
||||
assert_eq!(
|
||||
@@ -1024,7 +1449,7 @@ async fn manual_compact_retries_after_context_window_error() {
|
||||
.and_then(|items| items.first())
|
||||
.and_then(|entry| entry.get("text"))
|
||||
.and_then(|text| text.as_str()),
|
||||
Some(TEST_COMPACT_PROMPT),
|
||||
Some(SUMMARIZATION_PROMPT),
|
||||
"retry attempt should include summarization prompt"
|
||||
);
|
||||
assert_eq!(
|
||||
@@ -1053,6 +1478,7 @@ async fn manual_compact_twice_preserves_latest_user_messages() {
|
||||
let final_user_message = "post compact follow-up";
|
||||
let first_summary = "FIRST_MANUAL_SUMMARY";
|
||||
let second_summary = "SECOND_MANUAL_SUMMARY";
|
||||
let expected_second_summary = summary_with_prefix(second_summary);
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
@@ -1170,13 +1596,13 @@ async fn manual_compact_twice_preserves_latest_user_messages() {
|
||||
"first turn request missing first user message"
|
||||
);
|
||||
assert!(
|
||||
!contains_user_text(&first_turn_input, TEST_COMPACT_PROMPT),
|
||||
!contains_user_text(&first_turn_input, SUMMARIZATION_PROMPT),
|
||||
"first turn request should not include summarization prompt"
|
||||
);
|
||||
|
||||
let first_compact_input = requests[1].input();
|
||||
assert!(
|
||||
contains_user_text(&first_compact_input, TEST_COMPACT_PROMPT),
|
||||
contains_user_text(&first_compact_input, SUMMARIZATION_PROMPT),
|
||||
"first compact request should include summarization prompt"
|
||||
);
|
||||
assert!(
|
||||
@@ -1196,7 +1622,7 @@ async fn manual_compact_twice_preserves_latest_user_messages() {
|
||||
|
||||
let second_compact_input = requests[3].input();
|
||||
assert!(
|
||||
contains_user_text(&second_compact_input, TEST_COMPACT_PROMPT),
|
||||
contains_user_text(&second_compact_input, SUMMARIZATION_PROMPT),
|
||||
"second compact request should include summarization prompt"
|
||||
);
|
||||
assert!(
|
||||
@@ -1230,14 +1656,6 @@ async fn manual_compact_twice_preserves_latest_user_messages() {
|
||||
"role": "user",
|
||||
"type": "message",
|
||||
}),
|
||||
json!({
|
||||
"content": vec![json!({
|
||||
"text": first_summary,
|
||||
"type": "input_text",
|
||||
})],
|
||||
"role": "user",
|
||||
"type": "message",
|
||||
}),
|
||||
json!({
|
||||
"content": vec![json!({
|
||||
"text": second_user_message,
|
||||
@@ -1248,7 +1666,7 @@ async fn manual_compact_twice_preserves_latest_user_messages() {
|
||||
}),
|
||||
json!({
|
||||
"content": vec![json!({
|
||||
"text": second_summary,
|
||||
"text": expected_second_summary,
|
||||
"type": "input_text",
|
||||
})],
|
||||
"role": "user",
|
||||
@@ -1368,7 +1786,7 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
|
||||
"first request should contain the user input"
|
||||
);
|
||||
assert!(
|
||||
request_bodies[1].contains(COMPACT_PROMPT_MARKER),
|
||||
body_contains_text(&request_bodies[1], SUMMARIZATION_PROMPT),
|
||||
"first auto compact request should include the summarization prompt"
|
||||
);
|
||||
assert!(
|
||||
@@ -1376,7 +1794,7 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
|
||||
"function call output should be sent before the second auto compact"
|
||||
);
|
||||
assert!(
|
||||
request_bodies[4].contains(COMPACT_PROMPT_MARKER),
|
||||
body_contains_text(&request_bodies[4], SUMMARIZATION_PROMPT),
|
||||
"second auto compact request should include the summarization prompt"
|
||||
);
|
||||
}
|
||||
@@ -1472,7 +1890,7 @@ async fn auto_compact_triggers_after_function_call_over_95_percent_usage() {
|
||||
|
||||
let auto_compact_body = auto_compact_mock.single_request().body_json().to_string();
|
||||
assert!(
|
||||
auto_compact_body.contains(COMPACT_PROMPT_MARKER),
|
||||
body_contains_text(&auto_compact_body, SUMMARIZATION_PROMPT),
|
||||
"auto compact request should include the summarization prompt after exceeding 95% (limit {limit})"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -10,15 +10,14 @@
|
||||
use super::compact::COMPACT_WARNING_MESSAGE;
|
||||
use super::compact::FIRST_REPLY;
|
||||
use super::compact::SUMMARY_TEXT;
|
||||
use super::compact::TEST_COMPACT_PROMPT;
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::CodexConversation;
|
||||
use codex_core::ConversationManager;
|
||||
use codex_core::ModelProviderInfo;
|
||||
use codex_core::NewConversation;
|
||||
use codex_core::built_in_model_providers;
|
||||
use codex_core::compact::SUMMARIZATION_PROMPT;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::config::OPENAI_DEFAULT_MODEL;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::WarningEvent;
|
||||
@@ -38,13 +37,22 @@ use tempfile::TempDir;
|
||||
use wiremock::MockServer;
|
||||
|
||||
const AFTER_SECOND_RESUME: &str = "AFTER_SECOND_RESUME";
|
||||
const COMPACT_PROMPT_MARKER: &str =
|
||||
"You are performing a CONTEXT CHECKPOINT COMPACTION for a tool.";
|
||||
|
||||
fn network_disabled() -> bool {
|
||||
std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok()
|
||||
}
|
||||
|
||||
fn body_contains_text(body: &str, text: &str) -> bool {
|
||||
body.contains(&json_fragment(text))
|
||||
}
|
||||
|
||||
fn json_fragment(text: &str) -> String {
|
||||
serde_json::to_string(text)
|
||||
.expect("serialize text to JSON")
|
||||
.trim_matches('"')
|
||||
.to_string()
|
||||
}
|
||||
|
||||
fn filter_out_ghost_snapshot_entries(items: &[Value]) -> Vec<Value> {
|
||||
items
|
||||
.iter()
|
||||
@@ -82,7 +90,8 @@ fn extract_summary_message(request: &Value, summary_text: &str) -> Value {
|
||||
.and_then(|arr| arr.first())
|
||||
.and_then(|entry| entry.get("text"))
|
||||
.and_then(Value::as_str)
|
||||
== Some(summary_text)
|
||||
.map(|text| text.contains(summary_text))
|
||||
.unwrap_or(false)
|
||||
})
|
||||
})
|
||||
.cloned()
|
||||
@@ -101,9 +110,10 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
|
||||
// 1. Arrange mocked SSE responses for the initial compact/resume/fork flow.
|
||||
let server = MockServer::start().await;
|
||||
mount_initial_flow(&server).await;
|
||||
|
||||
let expected_model = "gpt-5.1-codex";
|
||||
// 2. Start a new conversation and drive it through the compact/resume/fork steps.
|
||||
let (_home, config, manager, base) = start_test_conversation(&server).await;
|
||||
let (_home, config, manager, base) =
|
||||
start_test_conversation(&server, Some(expected_model)).await;
|
||||
|
||||
user_turn(&base, "hello world").await;
|
||||
compact_conversation(&base).await;
|
||||
@@ -179,7 +189,6 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
|
||||
.as_str()
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
let expected_model = OPENAI_DEFAULT_MODEL;
|
||||
let summary_after_compact = extract_summary_message(&requests[2], SUMMARY_TEXT);
|
||||
let summary_after_resume = extract_summary_message(&requests[3], SUMMARY_TEXT);
|
||||
let summary_after_fork = extract_summary_message(&requests[4], SUMMARY_TEXT);
|
||||
@@ -283,7 +292,7 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": TEST_COMPACT_PROMPT
|
||||
"text": SUMMARIZATION_PROMPT
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -548,7 +557,7 @@ async fn compact_resume_after_second_compaction_preserves_history() {
|
||||
mount_second_compact_flow(&server).await;
|
||||
|
||||
// 2. Drive the conversation through compact -> resume -> fork -> compact -> resume.
|
||||
let (_home, config, manager, base) = start_test_conversation(&server).await;
|
||||
let (_home, config, manager, base) = start_test_conversation(&server, None).await;
|
||||
|
||||
user_turn(&base, "hello world").await;
|
||||
compact_conversation(&base).await;
|
||||
@@ -741,7 +750,7 @@ async fn mount_initial_flow(server: &MockServer) {
|
||||
let match_first = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains("\"text\":\"hello world\"")
|
||||
&& !body.contains(COMPACT_PROMPT_MARKER)
|
||||
&& !body_contains_text(body, SUMMARIZATION_PROMPT)
|
||||
&& !body.contains(&format!("\"text\":\"{SUMMARY_TEXT}\""))
|
||||
&& !body.contains("\"text\":\"AFTER_COMPACT\"")
|
||||
&& !body.contains("\"text\":\"AFTER_RESUME\"")
|
||||
@@ -751,7 +760,7 @@ async fn mount_initial_flow(server: &MockServer) {
|
||||
|
||||
let match_compact = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(COMPACT_PROMPT_MARKER)
|
||||
body_contains_text(body, SUMMARIZATION_PROMPT)
|
||||
};
|
||||
mount_sse_once_match(server, match_compact, sse2).await;
|
||||
|
||||
@@ -785,7 +794,7 @@ async fn mount_second_compact_flow(server: &MockServer) {
|
||||
|
||||
let match_second_compact = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(COMPACT_PROMPT_MARKER) && body.contains("AFTER_FORK")
|
||||
body_contains_text(body, SUMMARIZATION_PROMPT) && body.contains("AFTER_FORK")
|
||||
};
|
||||
mount_sse_once_match(server, match_second_compact, sse6).await;
|
||||
|
||||
@@ -798,6 +807,7 @@ async fn mount_second_compact_flow(server: &MockServer) {
|
||||
|
||||
async fn start_test_conversation(
|
||||
server: &MockServer,
|
||||
model: Option<&str>,
|
||||
) -> (TempDir, Config, ConversationManager, Arc<CodexConversation>) {
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
@@ -806,8 +816,10 @@ async fn start_test_conversation(
|
||||
let home = TempDir::new().expect("create temp dir");
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
config.model_provider = model_provider;
|
||||
config.compact_prompt = Some(TEST_COMPACT_PROMPT.to_string());
|
||||
|
||||
config.compact_prompt = Some(SUMMARIZATION_PROMPT.to_string());
|
||||
if let Some(model) = model {
|
||||
config.model = model.to_string();
|
||||
}
|
||||
let manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
|
||||
let NewConversation { conversation, .. } = manager
|
||||
.new_conversation(config.clone())
|
||||
|
||||
97
codex-rs/core/tests/suite/execpolicy2.rs
Normal file
97
codex-rs/core/tests/suite/execpolicy2.rs
Normal file
@@ -0,0 +1,97 @@
|
||||
#![allow(clippy::unwrap_used, clippy::expect_used)]
|
||||
|
||||
use anyhow::Result;
|
||||
use codex_core::features::Feature;
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_function_call;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
use core_test_support::responses::mount_sse_once;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use serde_json::json;
|
||||
use std::fs;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn execpolicy2_blocks_shell_invocation() -> Result<()> {
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.features.enable(Feature::ExecPolicyV2);
|
||||
let policy_path = config.codex_home.join("policy.codexpolicy");
|
||||
fs::write(
|
||||
&policy_path,
|
||||
r#"prefix_rule(pattern=["echo"], decision="forbidden")"#,
|
||||
)
|
||||
.expect("write policy file");
|
||||
});
|
||||
let server = start_mock_server().await;
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-forbidden";
|
||||
let args = json!({
|
||||
"command": ["echo", "blocked"],
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let session_model = test.session_configured.model.clone();
|
||||
test.codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "run shell command".into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: test.cwd_path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
let EventMsg::ExecCommandEnd(end) = wait_for_event(&test.codex, |event| {
|
||||
matches!(event, EventMsg::ExecCommandEnd(_))
|
||||
})
|
||||
.await
|
||||
else {
|
||||
unreachable!()
|
||||
};
|
||||
wait_for_event(&test.codex, |event| {
|
||||
matches!(event, EventMsg::TaskComplete(_))
|
||||
})
|
||||
.await;
|
||||
|
||||
assert!(
|
||||
end.aggregated_output
|
||||
.contains("execpolicy forbids this command"),
|
||||
"unexpected output: {}",
|
||||
end.aggregated_output
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -11,7 +11,7 @@ use std::collections::HashSet;
|
||||
use std::path::Path;
|
||||
use std::process::Command as StdCommand;
|
||||
|
||||
const MODEL_WITH_TOOL: &str = "test-gpt-5-codex";
|
||||
const MODEL_WITH_TOOL: &str = "test-gpt-5.1-codex";
|
||||
|
||||
fn ripgrep_available() -> bool {
|
||||
StdCommand::new("rg")
|
||||
|
||||
@@ -31,12 +31,12 @@ const SCHEMA: &str = r#"
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn codex_returns_json_result_for_gpt5() -> anyhow::Result<()> {
|
||||
codex_returns_json_result("gpt-5".to_string()).await
|
||||
codex_returns_json_result("gpt-5.1".to_string()).await
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn codex_returns_json_result_for_gpt5_codex() -> anyhow::Result<()> {
|
||||
codex_returns_json_result("gpt-5-codex".to_string()).await
|
||||
codex_returns_json_result("gpt-5.1-codex".to_string()).await
|
||||
}
|
||||
|
||||
async fn codex_returns_json_result(model: String) -> anyhow::Result<()> {
|
||||
|
||||
@@ -27,6 +27,7 @@ mod compact;
|
||||
mod compact_resume_fork;
|
||||
mod deprecation_notice;
|
||||
mod exec;
|
||||
mod execpolicy2;
|
||||
mod fork_conversation;
|
||||
mod grep_files;
|
||||
mod items;
|
||||
|
||||
@@ -160,7 +160,7 @@ async fn prompt_tools_are_consistent_across_requests() -> anyhow::Result<()> {
|
||||
// with the OpenAI schema, so we just verify the tool presence here
|
||||
let tools_by_model: HashMap<&'static str, Vec<&'static str>> = HashMap::from([
|
||||
(
|
||||
"gpt-5",
|
||||
"gpt-5.1",
|
||||
vec![
|
||||
"shell",
|
||||
"list_mcp_resources",
|
||||
@@ -183,7 +183,7 @@ async fn prompt_tools_are_consistent_across_requests() -> anyhow::Result<()> {
|
||||
],
|
||||
),
|
||||
(
|
||||
"gpt-5-codex",
|
||||
"gpt-5.1-codex",
|
||||
vec![
|
||||
"shell",
|
||||
"list_mcp_resources",
|
||||
|
||||
@@ -364,7 +364,7 @@ async fn review_uses_custom_review_model_from_config() {
|
||||
// Choose a review model different from the main model; ensure it is used.
|
||||
let codex = new_conversation_for_server(&server, &codex_home, |cfg| {
|
||||
cfg.model = "gpt-4.1".to_string();
|
||||
cfg.review_model = "gpt-5".to_string();
|
||||
cfg.review_model = "gpt-5.1".to_string();
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -394,7 +394,7 @@ async fn review_uses_custom_review_model_from_config() {
|
||||
// Assert the request body model equals the configured review model
|
||||
let request = &server.received_requests().await.unwrap()[0];
|
||||
let body = request.body_json::<serde_json::Value>().unwrap();
|
||||
assert_eq!(body["model"].as_str().unwrap(), "gpt-5");
|
||||
assert_eq!(body["model"].as_str().unwrap(), "gpt-5.1");
|
||||
|
||||
server.verify().await;
|
||||
}
|
||||
|
||||
@@ -1,14 +1,13 @@
|
||||
#![cfg(not(target_os = "windows"))]
|
||||
#![allow(clippy::expect_used)]
|
||||
|
||||
use anyhow::Result;
|
||||
use codex_core::features::Feature;
|
||||
use codex_core::model_family::find_family_for_model;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use core_test_support::assert_regex_match;
|
||||
use core_test_support::responses::ev_apply_patch_function_call;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_custom_tool_call;
|
||||
use core_test_support::responses::ev_function_call;
|
||||
use core_test_support::responses::ev_local_shell_call;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
@@ -16,12 +15,18 @@ use core_test_support::responses::mount_sse_sequence;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex::ApplyPatchModelOutput;
|
||||
use core_test_support::test_codex::ShellModelOutput;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use pretty_assertions::assert_eq;
|
||||
use regex_lite::Regex;
|
||||
use serde_json::Value;
|
||||
use serde_json::json;
|
||||
use std::fs;
|
||||
use test_case::test_case;
|
||||
|
||||
use crate::suite::apply_patch_cli::apply_patch_harness;
|
||||
use crate::suite::apply_patch_cli::mount_apply_patch;
|
||||
|
||||
const FIXTURE_JSON: &str = r#"{
|
||||
"description": "This is an example JSON file.",
|
||||
@@ -35,34 +40,88 @@ const FIXTURE_JSON: &str = r#"{
|
||||
}
|
||||
"#;
|
||||
|
||||
fn shell_responses(
|
||||
call_id: &str,
|
||||
command: Vec<&str>,
|
||||
output_type: ShellModelOutput,
|
||||
) -> Result<Vec<String>> {
|
||||
match output_type {
|
||||
ShellModelOutput::ShellCommand => {
|
||||
let command = shlex::try_join(command)?;
|
||||
let parameters = json!({
|
||||
"command": command,
|
||||
"timeout_ms": 2_000,
|
||||
});
|
||||
Ok(vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(
|
||||
call_id,
|
||||
"shell_command",
|
||||
&serde_json::to_string(¶meters)?,
|
||||
),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
])
|
||||
}
|
||||
ShellModelOutput::Shell => {
|
||||
let parameters = json!({
|
||||
"command": command,
|
||||
"timeout_ms": 2_000,
|
||||
});
|
||||
Ok(vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(¶meters)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
])
|
||||
}
|
||||
ShellModelOutput::LocalShell => Ok(vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_local_shell_call(call_id, "completed", command),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
]),
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_output_stays_json_without_freeform_apply_patch() -> Result<()> {
|
||||
#[test_case(ShellModelOutput::Shell)]
|
||||
#[test_case(ShellModelOutput::ShellCommand)]
|
||||
#[test_case(ShellModelOutput::LocalShell)]
|
||||
async fn shell_output_stays_json_without_freeform_apply_patch(
|
||||
output_type: ShellModelOutput,
|
||||
) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.features.disable(Feature::ApplyPatchFreeform);
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a model family");
|
||||
if matches!(output_type, ShellModelOutput::ShellCommand) {
|
||||
config.features.enable(Feature::ShellCommandTool);
|
||||
}
|
||||
let _ = output_type;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-json";
|
||||
let args = json!({
|
||||
"command": ["/bin/echo", "shell json"],
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let responses = shell_responses(call_id, vec!["/bin/echo", "shell json"], output_type)?;
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
@@ -80,7 +139,6 @@ async fn shell_output_stays_json_without_freeform_apply_patch() -> Result<()> {
|
||||
|
||||
let mut parsed: Value = serde_json::from_str(output)?;
|
||||
if let Some(metadata) = parsed.get_mut("metadata").and_then(Value::as_object_mut) {
|
||||
// duration_seconds is non-deterministic; remove it for deep equality
|
||||
let _ = metadata.remove("duration_seconds");
|
||||
}
|
||||
|
||||
@@ -102,31 +160,26 @@ async fn shell_output_stays_json_without_freeform_apply_patch() -> Result<()> {
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_output_is_structured_with_freeform_apply_patch() -> Result<()> {
|
||||
#[test_case(ShellModelOutput::Shell)]
|
||||
#[test_case(ShellModelOutput::ShellCommand)]
|
||||
#[test_case(ShellModelOutput::LocalShell)]
|
||||
async fn shell_output_is_structured_with_freeform_apply_patch(
|
||||
output_type: ShellModelOutput,
|
||||
) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.features.enable(Feature::ApplyPatchFreeform);
|
||||
if matches!(output_type, ShellModelOutput::ShellCommand) {
|
||||
config.features.enable(Feature::ShellCommandTool);
|
||||
}
|
||||
let _ = output_type;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-structured";
|
||||
let args = json!({
|
||||
"command": ["/bin/echo", "freeform shell"],
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let responses = shell_responses(call_id, vec!["/bin/echo", "freeform shell"], output_type)?;
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
@@ -159,14 +212,23 @@ freeform shell
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_output_preserves_fixture_json_without_serialization() -> Result<()> {
|
||||
#[test_case(ShellModelOutput::Shell)]
|
||||
#[test_case(ShellModelOutput::ShellCommand)]
|
||||
#[test_case(ShellModelOutput::LocalShell)]
|
||||
async fn shell_output_preserves_fixture_json_without_serialization(
|
||||
output_type: ShellModelOutput,
|
||||
) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.features.disable(Feature::ApplyPatchFreeform);
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a model family");
|
||||
if matches!(output_type, ShellModelOutput::ShellCommand) {
|
||||
config.features.enable(Feature::ShellCommandTool);
|
||||
}
|
||||
let _ = output_type;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
@@ -175,21 +237,11 @@ async fn shell_output_preserves_fixture_json_without_serialization() -> Result<(
|
||||
let fixture_path_str = fixture_path.to_string_lossy().to_string();
|
||||
|
||||
let call_id = "shell-json-fixture";
|
||||
let args = json!({
|
||||
"command": ["/usr/bin/sed", "-n", "p", fixture_path_str],
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let responses = shell_responses(
|
||||
call_id,
|
||||
vec!["/usr/bin/sed", "-n", "p", fixture_path_str.as_str()],
|
||||
output_type,
|
||||
)?;
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
@@ -232,12 +284,21 @@ async fn shell_output_preserves_fixture_json_without_serialization() -> Result<(
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_output_structures_fixture_with_serialization() -> Result<()> {
|
||||
#[test_case(ShellModelOutput::Shell)]
|
||||
#[test_case(ShellModelOutput::ShellCommand)]
|
||||
#[test_case(ShellModelOutput::LocalShell)]
|
||||
async fn shell_output_structures_fixture_with_serialization(
|
||||
output_type: ShellModelOutput,
|
||||
) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.features.enable(Feature::ApplyPatchFreeform);
|
||||
if matches!(output_type, ShellModelOutput::ShellCommand) {
|
||||
config.features.enable(Feature::ShellCommandTool);
|
||||
}
|
||||
let _ = output_type;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
@@ -246,21 +307,11 @@ async fn shell_output_structures_fixture_with_serialization() -> Result<()> {
|
||||
let fixture_path_str = fixture_path.to_string_lossy().to_string();
|
||||
|
||||
let call_id = "shell-structured-fixture";
|
||||
let args = json!({
|
||||
"command": ["/usr/bin/sed", "-n", "p", fixture_path_str],
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let responses = shell_responses(
|
||||
call_id,
|
||||
vec!["/usr/bin/sed", "-n", "p", fixture_path_str.as_str()],
|
||||
output_type,
|
||||
)?;
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
@@ -298,40 +349,26 @@ async fn shell_output_structures_fixture_with_serialization() -> Result<()> {
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_output_for_freeform_tool_records_duration() -> Result<()> {
|
||||
#[test_case(ShellModelOutput::Shell)]
|
||||
#[test_case(ShellModelOutput::ShellCommand)]
|
||||
#[test_case(ShellModelOutput::LocalShell)]
|
||||
async fn shell_output_for_freeform_tool_records_duration(
|
||||
output_type: ShellModelOutput,
|
||||
) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.include_apply_patch_tool = true;
|
||||
if matches!(output_type, ShellModelOutput::ShellCommand) {
|
||||
config.features.enable(Feature::ShellCommandTool);
|
||||
}
|
||||
let _ = output_type;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
let sleep_cmd = vec!["/bin/bash", "-c", "sleep 1"];
|
||||
|
||||
#[cfg(target_os = "macos")]
|
||||
let sleep_cmd = vec!["/bin/bash", "-c", "sleep 1"];
|
||||
|
||||
#[cfg(windows)]
|
||||
let sleep_cmd = "timeout 1";
|
||||
|
||||
let call_id = "shell-structured";
|
||||
let args = json!({
|
||||
"command": sleep_cmd,
|
||||
"timeout_ms": 2_000,
|
||||
});
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let responses = shell_responses(call_id, vec!["/bin/bash", "-c", "sleep 1"], output_type)?;
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
@@ -371,33 +408,26 @@ $"#;
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_output_reserializes_truncated_content() -> Result<()> {
|
||||
#[test_case(ShellModelOutput::Shell)]
|
||||
#[test_case(ShellModelOutput::ShellCommand)]
|
||||
#[test_case(ShellModelOutput::LocalShell)]
|
||||
async fn shell_output_reserializes_truncated_content(output_type: ShellModelOutput) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5-codex").expect("gpt-5 is a model family");
|
||||
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
|
||||
if matches!(output_type, ShellModelOutput::ShellCommand) {
|
||||
config.features.enable(Feature::ShellCommandTool);
|
||||
}
|
||||
let _ = output_type;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-truncated";
|
||||
let args = json!({
|
||||
"command": ["/bin/sh", "-c", "seq 1 400"],
|
||||
"timeout_ms": 5_000,
|
||||
});
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let responses = shell_responses(call_id, vec!["/bin/sh", "-c", "seq 1 400"], output_type)?;
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
@@ -445,14 +475,16 @@ $"#;
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn apply_patch_custom_tool_output_is_structured() -> Result<()> {
|
||||
#[test_case(ApplyPatchModelOutput::Freeform)]
|
||||
#[test_case(ApplyPatchModelOutput::Function)]
|
||||
#[test_case(ApplyPatchModelOutput::Shell)]
|
||||
#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
|
||||
async fn apply_patch_custom_tool_output_is_structured(
|
||||
output_type: ApplyPatchModelOutput,
|
||||
) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.include_apply_patch_tool = true;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
let harness = apply_patch_harness().await?;
|
||||
|
||||
let call_id = "apply-patch-structured";
|
||||
let file_name = "structured.txt";
|
||||
@@ -463,33 +495,17 @@ async fn apply_patch_custom_tool_output_is_structured() -> Result<()> {
|
||||
*** End Patch
|
||||
"#
|
||||
);
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_custom_tool_call(call_id, "apply_patch", &patch),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
mount_apply_patch(&harness, call_id, &patch, "done", output_type).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
"apply the patch via custom tool",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
harness
|
||||
.test()
|
||||
.submit_turn_with_policy(
|
||||
"apply the patch via custom tool",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let req = mock
|
||||
.last_request()
|
||||
.expect("apply_patch output request recorded");
|
||||
let output_item = req.custom_tool_call_output(call_id);
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("apply_patch output string");
|
||||
let output = harness.apply_patch_output(call_id, output_type).await;
|
||||
|
||||
let expected_pattern = format!(
|
||||
r"(?s)^Exit code: 0
|
||||
@@ -499,53 +515,39 @@ Success. Updated the following files:
|
||||
A {file_name}
|
||||
?$"
|
||||
);
|
||||
assert_regex_match(&expected_pattern, output);
|
||||
assert_regex_match(&expected_pattern, output.as_str());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn apply_patch_custom_tool_call_creates_file() -> Result<()> {
|
||||
#[test_case(ApplyPatchModelOutput::Freeform)]
|
||||
#[test_case(ApplyPatchModelOutput::Function)]
|
||||
#[test_case(ApplyPatchModelOutput::Shell)]
|
||||
#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
|
||||
async fn apply_patch_custom_tool_call_creates_file(
|
||||
output_type: ApplyPatchModelOutput,
|
||||
) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.include_apply_patch_tool = true;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
let harness = apply_patch_harness().await?;
|
||||
|
||||
let call_id = "apply-patch-add-file";
|
||||
let file_name = "custom_tool_apply_patch.txt";
|
||||
let patch = format!(
|
||||
"*** Begin Patch\n*** Add File: {file_name}\n+custom tool content\n*** End Patch\n"
|
||||
);
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_custom_tool_call(call_id, "apply_patch", &patch),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "apply_patch done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
mount_apply_patch(&harness, call_id, &patch, "apply_patch done", output_type).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
"apply the patch via custom tool to create a file",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
harness
|
||||
.test()
|
||||
.submit_turn_with_policy(
|
||||
"apply the patch via custom tool to create a file",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let req = mock
|
||||
.last_request()
|
||||
.expect("apply_patch output request recorded");
|
||||
let output_item = req.custom_tool_call_output(call_id);
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("apply_patch output string");
|
||||
let output = harness.apply_patch_output(call_id, output_type).await;
|
||||
|
||||
let expected_pattern = format!(
|
||||
r"(?s)^Exit code: 0
|
||||
@@ -555,9 +557,9 @@ Success. Updated the following files:
|
||||
A {file_name}
|
||||
?$"
|
||||
);
|
||||
assert_regex_match(&expected_pattern, output);
|
||||
assert_regex_match(&expected_pattern, output.as_str());
|
||||
|
||||
let new_file_path = test.cwd.path().join(file_name);
|
||||
let new_file_path = harness.path(file_name);
|
||||
let created_contents = fs::read_to_string(&new_file_path)?;
|
||||
assert_eq!(
|
||||
created_contents, "custom tool content\n",
|
||||
@@ -568,49 +570,42 @@ A {file_name}
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn apply_patch_custom_tool_call_updates_existing_file() -> Result<()> {
|
||||
#[test_case(ApplyPatchModelOutput::Freeform)]
|
||||
#[test_case(ApplyPatchModelOutput::Function)]
|
||||
#[test_case(ApplyPatchModelOutput::Shell)]
|
||||
#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
|
||||
async fn apply_patch_custom_tool_call_updates_existing_file(
|
||||
output_type: ApplyPatchModelOutput,
|
||||
) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.include_apply_patch_tool = true;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
let harness = apply_patch_harness().await?;
|
||||
|
||||
let call_id = "apply-patch-update-file";
|
||||
let file_name = "custom_tool_apply_patch_existing.txt";
|
||||
let file_path = test.cwd.path().join(file_name);
|
||||
let file_path = harness.path(file_name);
|
||||
fs::write(&file_path, "before\n")?;
|
||||
let patch = format!(
|
||||
"*** Begin Patch\n*** Update File: {file_name}\n@@\n-before\n+after\n*** End Patch\n"
|
||||
);
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_custom_tool_call(call_id, "apply_patch", &patch),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "apply_patch update done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
"apply the patch via custom tool to update a file",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
mount_apply_patch(
|
||||
&harness,
|
||||
call_id,
|
||||
&patch,
|
||||
"apply_patch update done",
|
||||
output_type,
|
||||
)
|
||||
.await?;
|
||||
.await;
|
||||
|
||||
let req = mock
|
||||
.last_request()
|
||||
.expect("apply_patch output request recorded");
|
||||
let output_item = req.custom_tool_call_output(call_id);
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("apply_patch output string");
|
||||
harness
|
||||
.test()
|
||||
.submit_turn_with_policy(
|
||||
"apply the patch via custom tool to update a file",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let output = harness.apply_patch_output(call_id, output_type).await;
|
||||
|
||||
let expected_pattern = format!(
|
||||
r"(?s)^Exit code: 0
|
||||
@@ -620,7 +615,7 @@ Success. Updated the following files:
|
||||
M {file_name}
|
||||
?$"
|
||||
);
|
||||
assert_regex_match(&expected_pattern, output);
|
||||
assert_regex_match(&expected_pattern, output.as_str());
|
||||
|
||||
let updated_contents = fs::read_to_string(file_path)?;
|
||||
assert_eq!(updated_contents, "after\n", "expected updated file content");
|
||||
@@ -629,99 +624,83 @@ M {file_name}
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn apply_patch_custom_tool_call_reports_failure_output() -> Result<()> {
|
||||
#[test_case(ApplyPatchModelOutput::Freeform)]
|
||||
#[test_case(ApplyPatchModelOutput::Function)]
|
||||
#[test_case(ApplyPatchModelOutput::Shell)]
|
||||
#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
|
||||
async fn apply_patch_custom_tool_call_reports_failure_output(
|
||||
output_type: ApplyPatchModelOutput,
|
||||
) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.include_apply_patch_tool = true;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
let harness = apply_patch_harness().await?;
|
||||
|
||||
let call_id = "apply-patch-failure";
|
||||
let missing_file = "missing_custom_tool_apply_patch.txt";
|
||||
let patch = format!(
|
||||
"*** Begin Patch\n*** Update File: {missing_file}\n@@\n-before\n+after\n*** End Patch\n"
|
||||
);
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_custom_tool_call(call_id, "apply_patch", &patch),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "apply_patch failure done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
"attempt a failing apply_patch via custom tool",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
mount_apply_patch(
|
||||
&harness,
|
||||
call_id,
|
||||
&patch,
|
||||
"apply_patch failure done",
|
||||
output_type,
|
||||
)
|
||||
.await?;
|
||||
.await;
|
||||
|
||||
let req = mock
|
||||
.last_request()
|
||||
.expect("apply_patch output request recorded");
|
||||
let output_item = req.custom_tool_call_output(call_id);
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("apply_patch output string");
|
||||
harness
|
||||
.test()
|
||||
.submit_turn_with_policy(
|
||||
"attempt a failing apply_patch via custom tool",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let output = harness.apply_patch_output(call_id, output_type).await;
|
||||
|
||||
let expected_output = format!(
|
||||
"apply_patch verification failed: Failed to read file to update {}/{missing_file}: No such file or directory (os error 2)",
|
||||
test.cwd.path().to_string_lossy()
|
||||
harness.cwd().to_string_lossy()
|
||||
);
|
||||
assert_eq!(output, expected_output);
|
||||
assert_eq!(output, expected_output.as_str());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn apply_patch_function_call_output_is_structured() -> Result<()> {
|
||||
#[test_case(ApplyPatchModelOutput::Freeform)]
|
||||
#[test_case(ApplyPatchModelOutput::Function)]
|
||||
#[test_case(ApplyPatchModelOutput::Shell)]
|
||||
#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
|
||||
async fn apply_patch_function_call_output_is_structured(
|
||||
output_type: ApplyPatchModelOutput,
|
||||
) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.include_apply_patch_tool = true;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
let harness = apply_patch_harness().await?;
|
||||
|
||||
let call_id = "apply-patch-function";
|
||||
let file_name = "function_apply_patch.txt";
|
||||
let patch =
|
||||
format!("*** Begin Patch\n*** Add File: {file_name}\n+via function call\n*** End Patch\n");
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_apply_patch_function_call(call_id, &patch),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "apply_patch function done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
"apply the patch via function-call apply_patch",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
mount_apply_patch(
|
||||
&harness,
|
||||
call_id,
|
||||
&patch,
|
||||
"apply_patch function done",
|
||||
output_type,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let req = mock
|
||||
.last_request()
|
||||
.expect("apply_patch function output request recorded");
|
||||
let output_item = req.function_call_output(call_id);
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("apply_patch output string");
|
||||
.await;
|
||||
harness
|
||||
.test()
|
||||
.submit_turn_with_policy(
|
||||
"apply the patch via function-call apply_patch",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let output = harness.apply_patch_output(call_id, output_type).await;
|
||||
let expected_pattern = format!(
|
||||
r"(?s)^Exit code: 0
|
||||
Wall time: [0-9]+(?:\.[0-9]+)? seconds
|
||||
@@ -730,40 +709,32 @@ Success. Updated the following files:
|
||||
A {file_name}
|
||||
?$"
|
||||
);
|
||||
assert_regex_match(&expected_pattern, output);
|
||||
assert_regex_match(&expected_pattern, output.as_str());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_output_is_structured_for_nonzero_exit() -> Result<()> {
|
||||
#[test_case(ShellModelOutput::Shell)]
|
||||
#[test_case(ShellModelOutput::ShellCommand)]
|
||||
#[test_case(ShellModelOutput::LocalShell)]
|
||||
async fn shell_output_is_structured_for_nonzero_exit(output_type: ShellModelOutput) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
|
||||
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
|
||||
config.include_apply_patch_tool = true;
|
||||
if matches!(output_type, ShellModelOutput::ShellCommand) {
|
||||
config.features.enable(Feature::ShellCommandTool);
|
||||
}
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-nonzero-exit";
|
||||
let args = json!({
|
||||
"command": ["/bin/sh", "-c", "exit 42"],
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "shell failure handled"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let responses = shell_responses(call_id, vec!["/bin/sh", "-c", "exit 42"], output_type)?;
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
@@ -788,15 +759,68 @@ Output:
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_command_output_is_structured() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.features.enable(Feature::ShellCommandTool);
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-command";
|
||||
let args = json!({
|
||||
"command": "echo shell command",
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "shell_command done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
"run the shell_command script in the user's shell",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let req = mock
|
||||
.last_request()
|
||||
.expect("shell_command output request recorded");
|
||||
let output_item = req.function_call_output(call_id);
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("shell_command output string");
|
||||
|
||||
let expected_pattern = r"(?s)^Exit code: 0
|
||||
Wall time: [0-9]+(?:\.[0-9]+)? seconds
|
||||
Output:
|
||||
shell command
|
||||
?$";
|
||||
assert_regex_match(expected_pattern, output);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn local_shell_call_output_is_structured() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
|
||||
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
|
||||
config.include_apply_patch_tool = true;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
@@ -57,9 +57,9 @@ async fn run_turn_and_measure(test: &TestCodex, prompt: &str) -> anyhow::Result<
|
||||
#[allow(clippy::expect_used)]
|
||||
async fn build_codex_with_test_tool(server: &wiremock::MockServer) -> anyhow::Result<TestCodex> {
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "test-gpt-5-codex".to_string();
|
||||
config.model = "test-gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("test-gpt-5-codex").expect("test-gpt-5-codex model family");
|
||||
find_family_for_model("test-gpt-5.1-codex").expect("test-gpt-5.1-codex model family");
|
||||
});
|
||||
builder.build(server).await
|
||||
}
|
||||
|
||||
@@ -197,9 +197,9 @@ async fn sandbox_denied_shell_returns_original_output() -> Result<()> {
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5-codex").expect("gpt-5-codex model family");
|
||||
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex model family");
|
||||
});
|
||||
let fixture = builder.build(&server).await?;
|
||||
|
||||
@@ -425,8 +425,8 @@ async fn shell_timeout_handles_background_grandchild_stdout() -> Result<()> {
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a valid model");
|
||||
config.model = "gpt-5.1".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is a valid model");
|
||||
config.sandbox_policy = SandboxPolicy::DangerFullAccess;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
@@ -41,9 +41,9 @@ async fn truncate_function_error_trims_respond_to_model() -> Result<()> {
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
// Use the test model that wires function tools like grep_files
|
||||
config.model = "test-gpt-5-codex".to_string();
|
||||
config.model = "test-gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("test-gpt-5-codex").expect("model family for test model");
|
||||
find_family_for_model("test-gpt-5.1-codex").expect("model family for test model");
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
@@ -105,9 +105,9 @@ async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
|
||||
|
||||
// Use a model that exposes the generic shell tool.
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
|
||||
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
|
||||
});
|
||||
let fixture = builder.build(&server).await?;
|
||||
|
||||
@@ -197,9 +197,9 @@ async fn tool_call_output_truncated_only_once() -> Result<()> {
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
|
||||
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
|
||||
});
|
||||
let fixture = builder.build(&server).await?;
|
||||
let call_id = "shell-single-truncation";
|
||||
|
||||
@@ -30,8 +30,8 @@ use pretty_assertions::assert_eq;
|
||||
async fn undo_harness() -> Result<TestCodexHarness> {
|
||||
TestCodexHarness::with_config(|config: &mut Config| {
|
||||
config.include_apply_patch_tool = true;
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is valid");
|
||||
config.model = "gpt-5.1".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is valid");
|
||||
config.features.enable(Feature::GhostCommit);
|
||||
})
|
||||
.await
|
||||
|
||||
@@ -159,7 +159,7 @@ async fn unified_exec_emits_exec_command_begin_event() -> Result<()> {
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
let mut builder = test_codex().with_model("gpt-5").with_config(|config| {
|
||||
config.use_experimental_unified_exec_tool = true;
|
||||
config.features.enable(Feature::UnifiedExec);
|
||||
});
|
||||
@@ -236,7 +236,7 @@ async fn unified_exec_respects_workdir_override() -> Result<()> {
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
let mut builder = test_codex().with_model("gpt-5").with_config(|config| {
|
||||
config.use_experimental_unified_exec_tool = true;
|
||||
config.features.enable(Feature::UnifiedExec);
|
||||
});
|
||||
@@ -288,28 +288,22 @@ async fn unified_exec_respects_workdir_override() -> Result<()> {
|
||||
})
|
||||
.await?;
|
||||
|
||||
let begin_event = wait_for_event_match(&codex, |msg| match msg {
|
||||
EventMsg::ExecCommandBegin(event) if event.call_id == call_id => Some(event.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(
|
||||
begin_event.cwd, workdir,
|
||||
"exec_command cwd should reflect the requested workdir override"
|
||||
);
|
||||
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let requests = server.received_requests().await.expect("recorded requests");
|
||||
assert!(!requests.is_empty(), "expected at least one POST request");
|
||||
|
||||
let bodies = requests
|
||||
.iter()
|
||||
.map(|req| req.body_json::<Value>().expect("request json"))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let outputs = collect_tool_outputs(&bodies)?;
|
||||
let output = outputs
|
||||
.get(call_id)
|
||||
.expect("missing exec_command workdir output");
|
||||
let output_text = output.output.trim();
|
||||
let output_canonical = std::fs::canonicalize(output_text)?;
|
||||
let expected_canonical = std::fs::canonicalize(&workdir)?;
|
||||
assert_eq!(
|
||||
output_canonical, expected_canonical,
|
||||
"pwd should reflect the requested workdir override"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -270,9 +270,9 @@ async fn user_shell_command_is_truncated_only_once() -> anyhow::Result<()> {
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
|
||||
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
|
||||
});
|
||||
let fixture = builder.build(&server).await?;
|
||||
|
||||
|
||||
@@ -55,7 +55,7 @@ Start a new session with optional overrides:
|
||||
|
||||
Request `newConversation` params (subset):
|
||||
|
||||
- `model`: string model id (e.g. "o3", "gpt-5", "gpt-5-codex")
|
||||
- `model`: string model id (e.g. "o3", "gpt-5.1", "gpt-5.1-codex")
|
||||
- `profile`: optional named profile
|
||||
- `cwd`: optional working directory
|
||||
- `approvalPolicy`: `untrusted` | `on-request` | `on-failure` | `never`
|
||||
@@ -119,13 +119,13 @@ For the complete request/response shapes and flow examples, see the [“Auth end
|
||||
## Example: start and send a message
|
||||
|
||||
```json
|
||||
{ "jsonrpc": "2.0", "id": 1, "method": "newConversation", "params": { "model": "gpt-5", "approvalPolicy": "on-request" } }
|
||||
{ "jsonrpc": "2.0", "id": 1, "method": "newConversation", "params": { "model": "gpt-5.1", "approvalPolicy": "on-request" } }
|
||||
```
|
||||
|
||||
Server responds:
|
||||
|
||||
```json
|
||||
{ "jsonrpc": "2.0", "id": 1, "result": { "conversationId": "c7b0…", "model": "gpt-5", "rolloutPath": "/path/to/rollout.jsonl" } }
|
||||
{ "jsonrpc": "2.0", "id": 1, "result": { "conversationId": "c7b0…", "model": "gpt-5.1", "rolloutPath": "/path/to/rollout.jsonl" } }
|
||||
```
|
||||
|
||||
Then send input:
|
||||
|
||||
@@ -24,7 +24,6 @@ codex-common = { workspace = true, features = [
|
||||
"sandbox_summary",
|
||||
] }
|
||||
codex-core = { workspace = true }
|
||||
codex-ollama = { workspace = true }
|
||||
codex-protocol = { workspace = true }
|
||||
mcp-types = { workspace = true }
|
||||
opentelemetry-appender-tracing = { workspace = true }
|
||||
|
||||
@@ -18,9 +18,15 @@ pub struct Cli {
|
||||
#[arg(long, short = 'm')]
|
||||
pub model: Option<String>,
|
||||
|
||||
/// Use open-source provider.
|
||||
#[arg(long = "oss", default_value_t = false)]
|
||||
pub oss: bool,
|
||||
|
||||
/// Specify which local provider to use (lmstudio or ollama).
|
||||
/// If not specified with --oss, will use config default or show selection.
|
||||
#[arg(long = "local-provider")]
|
||||
pub oss_provider: Option<String>,
|
||||
|
||||
/// Select the sandbox policy to use when executing model-generated shell
|
||||
/// commands.
|
||||
#[arg(long = "sandbox", short = 's', value_enum)]
|
||||
|
||||
@@ -182,6 +182,42 @@ impl EventProcessor for EventProcessorWithHumanOutput {
|
||||
ts_msg!(self, " {}", details.style(self.dimmed));
|
||||
}
|
||||
}
|
||||
EventMsg::McpStartupUpdate(update) => {
|
||||
let status_text = match update.status {
|
||||
codex_core::protocol::McpStartupStatus::Starting => "starting".to_string(),
|
||||
codex_core::protocol::McpStartupStatus::Ready => "ready".to_string(),
|
||||
codex_core::protocol::McpStartupStatus::Cancelled => "cancelled".to_string(),
|
||||
codex_core::protocol::McpStartupStatus::Failed { ref error } => {
|
||||
format!("failed: {error}")
|
||||
}
|
||||
};
|
||||
ts_msg!(
|
||||
self,
|
||||
"{} {} {}",
|
||||
"mcp:".style(self.cyan),
|
||||
update.server,
|
||||
status_text
|
||||
);
|
||||
}
|
||||
EventMsg::McpStartupComplete(summary) => {
|
||||
let mut parts = Vec::new();
|
||||
if !summary.ready.is_empty() {
|
||||
parts.push(format!("ready: {}", summary.ready.join(", ")));
|
||||
}
|
||||
if !summary.failed.is_empty() {
|
||||
let servers: Vec<_> = summary.failed.iter().map(|f| f.server.clone()).collect();
|
||||
parts.push(format!("failed: {}", servers.join(", ")));
|
||||
}
|
||||
if !summary.cancelled.is_empty() {
|
||||
parts.push(format!("cancelled: {}", summary.cancelled.join(", ")));
|
||||
}
|
||||
let joined = if parts.is_empty() {
|
||||
"no servers".to_string()
|
||||
} else {
|
||||
parts.join("; ")
|
||||
};
|
||||
ts_msg!(self, "{} {}", "mcp startup:".style(self.cyan), joined);
|
||||
}
|
||||
EventMsg::BackgroundEvent(BackgroundEventEvent { message }) => {
|
||||
ts_msg!(self, "{}", message.style(self.dimmed));
|
||||
}
|
||||
|
||||
@@ -11,20 +11,25 @@ pub mod event_processor_with_jsonl_output;
|
||||
pub mod exec_events;
|
||||
|
||||
pub use cli::Cli;
|
||||
use codex_common::oss::ensure_oss_provider_ready;
|
||||
use codex_common::oss::get_default_model_for_oss_provider;
|
||||
use codex_core::AuthManager;
|
||||
use codex_core::BUILT_IN_OSS_MODEL_PROVIDER_ID;
|
||||
use codex_core::ConversationManager;
|
||||
use codex_core::LMSTUDIO_OSS_PROVIDER_ID;
|
||||
use codex_core::NewConversation;
|
||||
use codex_core::OLLAMA_OSS_PROVIDER_ID;
|
||||
use codex_core::auth::enforce_login_restrictions;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::config::ConfigOverrides;
|
||||
use codex_core::config::find_codex_home;
|
||||
use codex_core::config::load_config_as_toml_with_cli_overrides;
|
||||
use codex_core::config::resolve_oss_provider;
|
||||
use codex_core::git_info::get_git_repo_root;
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::Event;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SessionSource;
|
||||
use codex_ollama::DEFAULT_OSS_MODEL;
|
||||
use codex_protocol::config_types::SandboxMode;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use event_processor_with_human_output::EventProcessorWithHumanOutput;
|
||||
@@ -57,6 +62,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
|
||||
images,
|
||||
model: model_cli_arg,
|
||||
oss,
|
||||
oss_provider,
|
||||
config_profile,
|
||||
full_auto,
|
||||
dangerously_bypass_approvals_and_sandbox,
|
||||
@@ -146,21 +152,64 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
|
||||
sandbox_mode_cli_arg.map(Into::<SandboxMode>::into)
|
||||
};
|
||||
|
||||
// When using `--oss`, let the bootstrapper pick the model (defaulting to
|
||||
// gpt-oss:20b) and ensure it is present locally. Also, force the built‑in
|
||||
// `oss` model provider.
|
||||
let model = if let Some(model) = model_cli_arg {
|
||||
Some(model)
|
||||
} else if oss {
|
||||
Some(DEFAULT_OSS_MODEL.to_owned())
|
||||
} else {
|
||||
None // No model specified, will use the default.
|
||||
// Parse `-c` overrides from the CLI.
|
||||
let cli_kv_overrides = match config_overrides.parse_overrides() {
|
||||
Ok(v) => v,
|
||||
#[allow(clippy::print_stderr)]
|
||||
Err(e) => {
|
||||
eprintln!("Error parsing -c overrides: {e}");
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
// we load config.toml here to determine project state.
|
||||
#[allow(clippy::print_stderr)]
|
||||
let config_toml = {
|
||||
let codex_home = match find_codex_home() {
|
||||
Ok(codex_home) => codex_home,
|
||||
Err(err) => {
|
||||
eprintln!("Error finding codex home: {err}");
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
match load_config_as_toml_with_cli_overrides(&codex_home, cli_kv_overrides.clone()).await {
|
||||
Ok(config_toml) => config_toml,
|
||||
Err(err) => {
|
||||
eprintln!("Error loading config.toml: {err}");
|
||||
std::process::exit(1);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let model_provider = if oss {
|
||||
Some(BUILT_IN_OSS_MODEL_PROVIDER_ID.to_string())
|
||||
let resolved = resolve_oss_provider(
|
||||
oss_provider.as_deref(),
|
||||
&config_toml,
|
||||
config_profile.clone(),
|
||||
);
|
||||
|
||||
if let Some(provider) = resolved {
|
||||
Some(provider)
|
||||
} else {
|
||||
return Err(anyhow::anyhow!(
|
||||
"No default OSS provider configured. Use --local-provider=provider or set oss_provider to either {LMSTUDIO_OSS_PROVIDER_ID} or {OLLAMA_OSS_PROVIDER_ID} in config.toml"
|
||||
));
|
||||
}
|
||||
} else {
|
||||
None // No specific model provider override.
|
||||
None // No OSS mode enabled
|
||||
};
|
||||
|
||||
// When using `--oss`, let the bootstrapper pick the model based on selected provider
|
||||
let model = if let Some(model) = model_cli_arg {
|
||||
Some(model)
|
||||
} else if oss {
|
||||
model_provider
|
||||
.as_ref()
|
||||
.and_then(|provider_id| get_default_model_for_oss_provider(provider_id))
|
||||
.map(std::borrow::ToOwned::to_owned)
|
||||
} else {
|
||||
None // No model specified, will use the default.
|
||||
};
|
||||
|
||||
// Load configuration and determine approval policy
|
||||
@@ -172,7 +221,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
|
||||
approval_policy: Some(AskForApproval::Never),
|
||||
sandbox_mode,
|
||||
cwd: cwd.map(|p| p.canonicalize().unwrap_or(p)),
|
||||
model_provider,
|
||||
model_provider: model_provider.clone(),
|
||||
codex_linux_sandbox_exe,
|
||||
base_instructions: None,
|
||||
developer_instructions: None,
|
||||
@@ -183,14 +232,6 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
|
||||
experimental_sandbox_command_assessment: None,
|
||||
additional_writable_roots: add_dir,
|
||||
};
|
||||
// Parse `-c` overrides.
|
||||
let cli_kv_overrides = match config_overrides.parse_overrides() {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
eprintln!("Error parsing -c overrides: {e}");
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
let config = Config::load_with_cli_overrides(cli_kv_overrides, overrides).await?;
|
||||
|
||||
@@ -233,7 +274,18 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
|
||||
};
|
||||
|
||||
if oss {
|
||||
codex_ollama::ensure_oss_ready(&config)
|
||||
// We're in the oss section, so provider_id should be Some
|
||||
// Let's handle None case gracefully though just in case
|
||||
let provider_id = match model_provider.as_ref() {
|
||||
Some(id) => id,
|
||||
None => {
|
||||
error!("OSS provider unexpectedly not set when oss flag is used");
|
||||
return Err(anyhow::anyhow!(
|
||||
"OSS provider not set but oss flag was used"
|
||||
));
|
||||
}
|
||||
};
|
||||
ensure_oss_provider_ready(provider_id, &config)
|
||||
.await
|
||||
.map_err(|e| anyhow::anyhow!("OSS setup failed: {e}"))?;
|
||||
}
|
||||
|
||||
@@ -618,15 +618,19 @@ fn error_followed_by_task_complete_produces_turn_failed() {
|
||||
#[test]
|
||||
fn exec_command_end_success_produces_completed_command_item() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
let command = vec!["bash".to_string(), "-lc".to_string(), "echo hi".to_string()];
|
||||
let cwd = std::env::current_dir().unwrap();
|
||||
let parsed_cmd = Vec::new();
|
||||
|
||||
// Begin -> no output
|
||||
let begin = event(
|
||||
"c1",
|
||||
EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
|
||||
call_id: "1".to_string(),
|
||||
command: vec!["bash".to_string(), "-lc".to_string(), "echo hi".to_string()],
|
||||
cwd: std::env::current_dir().unwrap(),
|
||||
parsed_cmd: Vec::new(),
|
||||
turn_id: "turn-1".to_string(),
|
||||
command: command.clone(),
|
||||
cwd: cwd.clone(),
|
||||
parsed_cmd: parsed_cmd.clone(),
|
||||
source: ExecCommandSource::Agent,
|
||||
interaction_input: None,
|
||||
}),
|
||||
@@ -652,6 +656,12 @@ fn exec_command_end_success_produces_completed_command_item() {
|
||||
"c2",
|
||||
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
|
||||
call_id: "1".to_string(),
|
||||
turn_id: "turn-1".to_string(),
|
||||
command,
|
||||
cwd,
|
||||
parsed_cmd,
|
||||
source: ExecCommandSource::Agent,
|
||||
interaction_input: None,
|
||||
stdout: String::new(),
|
||||
stderr: String::new(),
|
||||
aggregated_output: "hi\n".to_string(),
|
||||
@@ -680,15 +690,19 @@ fn exec_command_end_success_produces_completed_command_item() {
|
||||
#[test]
|
||||
fn exec_command_end_failure_produces_failed_command_item() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
let command = vec!["sh".to_string(), "-c".to_string(), "exit 1".to_string()];
|
||||
let cwd = std::env::current_dir().unwrap();
|
||||
let parsed_cmd = Vec::new();
|
||||
|
||||
// Begin -> no output
|
||||
let begin = event(
|
||||
"c1",
|
||||
EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
|
||||
call_id: "2".to_string(),
|
||||
command: vec!["sh".to_string(), "-c".to_string(), "exit 1".to_string()],
|
||||
cwd: std::env::current_dir().unwrap(),
|
||||
parsed_cmd: Vec::new(),
|
||||
turn_id: "turn-1".to_string(),
|
||||
command: command.clone(),
|
||||
cwd: cwd.clone(),
|
||||
parsed_cmd: parsed_cmd.clone(),
|
||||
source: ExecCommandSource::Agent,
|
||||
interaction_input: None,
|
||||
}),
|
||||
@@ -713,6 +727,12 @@ fn exec_command_end_failure_produces_failed_command_item() {
|
||||
"c2",
|
||||
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
|
||||
call_id: "2".to_string(),
|
||||
turn_id: "turn-1".to_string(),
|
||||
command,
|
||||
cwd,
|
||||
parsed_cmd,
|
||||
source: ExecCommandSource::Agent,
|
||||
interaction_input: None,
|
||||
stdout: String::new(),
|
||||
stderr: String::new(),
|
||||
aggregated_output: String::new(),
|
||||
@@ -747,6 +767,12 @@ fn exec_command_end_without_begin_is_ignored() {
|
||||
"c1",
|
||||
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
|
||||
call_id: "no-begin".to_string(),
|
||||
turn_id: "turn-1".to_string(),
|
||||
command: Vec::new(),
|
||||
cwd: PathBuf::from("."),
|
||||
parsed_cmd: Vec::new(),
|
||||
source: ExecCommandSource::Agent,
|
||||
interaction_input: None,
|
||||
stdout: String::new(),
|
||||
stderr: String::new(),
|
||||
aggregated_output: String::new(),
|
||||
|
||||
@@ -37,7 +37,7 @@ async fn exec_includes_output_schema_in_request() -> anyhow::Result<()> {
|
||||
.arg("--output-schema")
|
||||
.arg(&schema_path)
|
||||
.arg("-m")
|
||||
.arg("gpt-5")
|
||||
.arg("gpt-5.1")
|
||||
.arg("tell me a joke")
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
@@ -196,7 +196,7 @@ fn exec_resume_preserves_cli_configuration_overrides() -> anyhow::Result<()> {
|
||||
.arg("--sandbox")
|
||||
.arg("workspace-write")
|
||||
.arg("--model")
|
||||
.arg("gpt-5")
|
||||
.arg("gpt-5.1")
|
||||
.arg("-C")
|
||||
.arg(env!("CARGO_MANIFEST_DIR"))
|
||||
.arg(&prompt)
|
||||
@@ -218,7 +218,7 @@ fn exec_resume_preserves_cli_configuration_overrides() -> anyhow::Result<()> {
|
||||
.arg("--sandbox")
|
||||
.arg("workspace-write")
|
||||
.arg("--model")
|
||||
.arg("gpt-5-high")
|
||||
.arg("gpt-5.1-high")
|
||||
.arg("-C")
|
||||
.arg(env!("CARGO_MANIFEST_DIR"))
|
||||
.arg(&prompt2)
|
||||
@@ -231,7 +231,7 @@ fn exec_resume_preserves_cli_configuration_overrides() -> anyhow::Result<()> {
|
||||
|
||||
let stderr = String::from_utf8(output.stderr)?;
|
||||
assert!(
|
||||
stderr.contains("model: gpt-5-high"),
|
||||
stderr.contains("model: gpt-5.1-high"),
|
||||
"stderr missing model override: {stderr}"
|
||||
);
|
||||
if cfg!(target_os = "windows") {
|
||||
|
||||
29
codex-rs/execpolicy2/Cargo.toml
Normal file
29
codex-rs/execpolicy2/Cargo.toml
Normal file
@@ -0,0 +1,29 @@
|
||||
[package]
|
||||
name = "codex-execpolicy2"
|
||||
version = { workspace = true }
|
||||
edition = "2024"
|
||||
description = "Codex exec policy v2: prefix-based Starlark rules for command decisions."
|
||||
|
||||
[lib]
|
||||
name = "codex_execpolicy2"
|
||||
path = "src/lib.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "codex-execpolicy2"
|
||||
path = "src/main.rs"
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = { workspace = true }
|
||||
clap = { workspace = true, features = ["derive"] }
|
||||
multimap = { workspace = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
serde_json = { workspace = true }
|
||||
shlex = { workspace = true }
|
||||
starlark = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
pretty_assertions = { workspace = true }
|
||||
59
codex-rs/execpolicy2/README.md
Normal file
59
codex-rs/execpolicy2/README.md
Normal file
@@ -0,0 +1,59 @@
|
||||
# codex-execpolicy2
|
||||
|
||||
## Overview
|
||||
- Policy engine and CLI built around `prefix_rule(pattern=[...], decision?, match?, not_match?)`.
|
||||
- This release covers only the prefix-rule subset of the planned execpolicy v2 language; a richer language will follow.
|
||||
- Tokens are matched in order; any `pattern` element may be a list to denote alternatives. `decision` defaults to `allow`; valid values: `allow`, `prompt`, `forbidden`.
|
||||
- `match` / `not_match` supply example invocations that are validated at load time (think of them as unit tests); examples can be token arrays or strings (strings are tokenized with `shlex`).
|
||||
- The CLI always prints the JSON serialization of the evaluation result (whether a match or not).
|
||||
|
||||
## Policy shapes
|
||||
- Prefix rules use Starlark syntax:
|
||||
```starlark
|
||||
prefix_rule(
|
||||
pattern = ["cmd", ["alt1", "alt2"]], # ordered tokens; list entries denote alternatives
|
||||
decision = "prompt", # allow | prompt | forbidden; defaults to allow
|
||||
match = [["cmd", "alt1"], "cmd alt2"], # examples that must match this rule
|
||||
not_match = [["cmd", "oops"], "cmd alt3"], # examples that must not match this rule
|
||||
)
|
||||
```
|
||||
|
||||
## Response shapes
|
||||
- Match:
|
||||
```json
|
||||
{
|
||||
"match": {
|
||||
"decision": "allow|prompt|forbidden",
|
||||
"matchedRules": [
|
||||
{
|
||||
"prefixRuleMatch": {
|
||||
"matchedPrefix": ["<token>", "..."],
|
||||
"decision": "allow|prompt|forbidden"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
- No match:
|
||||
```json
|
||||
"noMatch"
|
||||
```
|
||||
|
||||
- `matchedRules` lists every rule whose prefix matched the command; `matchedPrefix` is the exact prefix that matched.
|
||||
- The effective `decision` is the strictest severity across all matches (`forbidden` > `prompt` > `allow`).
|
||||
|
||||
## CLI
|
||||
- Provide one or more policy files (for example `src/default.codexpolicy`) to check a command:
|
||||
```bash
|
||||
cargo run -p codex-execpolicy2 -- check --policy path/to/policy.codexpolicy git status
|
||||
```
|
||||
- Pass multiple `--policy` flags to merge rules, evaluated in the order provided:
|
||||
```bash
|
||||
cargo run -p codex-execpolicy2 -- check --policy base.codexpolicy --policy overrides.codexpolicy git status
|
||||
```
|
||||
- Output is newline-delimited JSON by default; pass `--pretty` for pretty-printed JSON if desired.
|
||||
- Example outcomes:
|
||||
- Match: `{"match": { ... "decision": "allow" ... }}`
|
||||
- No match: `"noMatch"`
|
||||
77
codex-rs/execpolicy2/examples/example.codexpolicy
Normal file
77
codex-rs/execpolicy2/examples/example.codexpolicy
Normal file
@@ -0,0 +1,77 @@
|
||||
|
||||
# Example policy to illustrate syntax; not comprehensive and not recommended for actual use.
|
||||
|
||||
prefix_rule(
|
||||
pattern = ["git", "reset", "--hard"],
|
||||
decision = "forbidden",
|
||||
match = [
|
||||
["git", "reset", "--hard"],
|
||||
],
|
||||
not_match = [
|
||||
["git", "reset", "--keep"],
|
||||
"git reset --merge",
|
||||
],
|
||||
)
|
||||
|
||||
prefix_rule(
|
||||
pattern = ["ls"],
|
||||
match = [
|
||||
["ls"],
|
||||
["ls", "-l"],
|
||||
["ls", "-a", "."],
|
||||
],
|
||||
)
|
||||
|
||||
prefix_rule(
|
||||
pattern = ["cat"],
|
||||
match = [
|
||||
["cat", "file.txt"],
|
||||
["cat", "-n", "README.md"],
|
||||
],
|
||||
)
|
||||
|
||||
prefix_rule(
|
||||
pattern = ["cp"],
|
||||
decision = "prompt",
|
||||
match = [
|
||||
["cp", "foo", "bar"],
|
||||
"cp -r src dest",
|
||||
],
|
||||
)
|
||||
|
||||
prefix_rule(
|
||||
pattern = ["head"],
|
||||
match = [
|
||||
["head", "README.md"],
|
||||
["head", "-n", "5", "CHANGELOG.md"],
|
||||
],
|
||||
not_match = [
|
||||
["hea", "-n", "1,5p", "CHANGELOG.md"],
|
||||
],
|
||||
)
|
||||
|
||||
prefix_rule(
|
||||
pattern = ["printenv"],
|
||||
match = [
|
||||
["printenv"],
|
||||
["printenv", "PATH"],
|
||||
],
|
||||
not_match = [
|
||||
["print", "-0"],
|
||||
],
|
||||
)
|
||||
|
||||
prefix_rule(
|
||||
pattern = ["pwd"],
|
||||
match = [
|
||||
["pwd"],
|
||||
],
|
||||
)
|
||||
|
||||
prefix_rule(
|
||||
pattern = ["which"],
|
||||
match = [
|
||||
["which", "python3"],
|
||||
["which", "-a", "python3"],
|
||||
],
|
||||
)
|
||||
27
codex-rs/execpolicy2/src/decision.rs
Normal file
27
codex-rs/execpolicy2/src/decision.rs
Normal file
@@ -0,0 +1,27 @@
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::error::Error;
|
||||
use crate::error::Result;
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub enum Decision {
|
||||
/// Command may run without further approval.
|
||||
Allow,
|
||||
/// Request explicit user approval; rejected outright when running with `approval_policy="never"`.
|
||||
Prompt,
|
||||
/// Command is blocked without further consideration.
|
||||
Forbidden,
|
||||
}
|
||||
|
||||
impl Decision {
|
||||
pub fn parse(raw: &str) -> Result<Self> {
|
||||
match raw {
|
||||
"allow" => Ok(Self::Allow),
|
||||
"prompt" => Ok(Self::Prompt),
|
||||
"forbidden" => Ok(Self::Forbidden),
|
||||
other => Err(Error::InvalidDecision(other.to_string())),
|
||||
}
|
||||
}
|
||||
}
|
||||
26
codex-rs/execpolicy2/src/error.rs
Normal file
26
codex-rs/execpolicy2/src/error.rs
Normal file
@@ -0,0 +1,26 @@
|
||||
use starlark::Error as StarlarkError;
|
||||
use thiserror::Error;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum Error {
|
||||
#[error("invalid decision: {0}")]
|
||||
InvalidDecision(String),
|
||||
#[error("invalid pattern element: {0}")]
|
||||
InvalidPattern(String),
|
||||
#[error("invalid example: {0}")]
|
||||
InvalidExample(String),
|
||||
#[error(
|
||||
"expected every example to match at least one rule. rules: {rules:?}; unmatched examples: \
|
||||
{examples:?}"
|
||||
)]
|
||||
ExampleDidNotMatch {
|
||||
rules: Vec<String>,
|
||||
examples: Vec<String>,
|
||||
},
|
||||
#[error("expected example to not match rule `{rule}`: {example}")]
|
||||
ExampleDidMatch { rule: String, example: String },
|
||||
#[error("starlark error: {0}")]
|
||||
Starlark(StarlarkError),
|
||||
}
|
||||
15
codex-rs/execpolicy2/src/lib.rs
Normal file
15
codex-rs/execpolicy2/src/lib.rs
Normal file
@@ -0,0 +1,15 @@
|
||||
pub mod decision;
|
||||
pub mod error;
|
||||
pub mod parser;
|
||||
pub mod policy;
|
||||
pub mod rule;
|
||||
|
||||
pub use decision::Decision;
|
||||
pub use error::Error;
|
||||
pub use error::Result;
|
||||
pub use parser::PolicyParser;
|
||||
pub use policy::Evaluation;
|
||||
pub use policy::Policy;
|
||||
pub use rule::Rule;
|
||||
pub use rule::RuleMatch;
|
||||
pub use rule::RuleRef;
|
||||
66
codex-rs/execpolicy2/src/main.rs
Normal file
66
codex-rs/execpolicy2/src/main.rs
Normal file
@@ -0,0 +1,66 @@
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use clap::Parser;
|
||||
use codex_execpolicy2::PolicyParser;
|
||||
|
||||
/// CLI for evaluating exec policies
|
||||
#[derive(Parser)]
|
||||
#[command(name = "codex-execpolicy2")]
|
||||
enum Cli {
|
||||
/// Evaluate a command against a policy.
|
||||
Check {
|
||||
#[arg(short, long = "policy", value_name = "PATH", required = true)]
|
||||
policies: Vec<PathBuf>,
|
||||
|
||||
/// Pretty-print the JSON output.
|
||||
#[arg(long)]
|
||||
pretty: bool,
|
||||
|
||||
/// Command tokens to check.
|
||||
#[arg(
|
||||
value_name = "COMMAND",
|
||||
required = true,
|
||||
trailing_var_arg = true,
|
||||
allow_hyphen_values = true
|
||||
)]
|
||||
command: Vec<String>,
|
||||
},
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let cli = Cli::parse();
|
||||
match cli {
|
||||
Cli::Check {
|
||||
policies,
|
||||
command,
|
||||
pretty,
|
||||
} => cmd_check(policies, command, pretty),
|
||||
}
|
||||
}
|
||||
|
||||
fn cmd_check(policy_paths: Vec<PathBuf>, args: Vec<String>, pretty: bool) -> Result<()> {
|
||||
let policy = load_policies(&policy_paths)?;
|
||||
|
||||
let eval = policy.check(&args);
|
||||
let json = if pretty {
|
||||
serde_json::to_string_pretty(&eval)?
|
||||
} else {
|
||||
serde_json::to_string(&eval)?
|
||||
};
|
||||
println!("{json}");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn load_policies(policy_paths: &[PathBuf]) -> Result<codex_execpolicy2::Policy> {
|
||||
let mut parser = PolicyParser::new();
|
||||
for policy_path in policy_paths {
|
||||
let policy_file_contents = fs::read_to_string(policy_path)
|
||||
.with_context(|| format!("failed to read policy at {}", policy_path.display()))?;
|
||||
let policy_identifier = policy_path.to_string_lossy().to_string();
|
||||
parser.parse(&policy_identifier, &policy_file_contents)?;
|
||||
}
|
||||
Ok(parser.build())
|
||||
}
|
||||
259
codex-rs/execpolicy2/src/parser.rs
Normal file
259
codex-rs/execpolicy2/src/parser.rs
Normal file
@@ -0,0 +1,259 @@
|
||||
use multimap::MultiMap;
|
||||
use shlex;
|
||||
use starlark::any::ProvidesStaticType;
|
||||
use starlark::environment::GlobalsBuilder;
|
||||
use starlark::environment::Module;
|
||||
use starlark::eval::Evaluator;
|
||||
use starlark::starlark_module;
|
||||
use starlark::syntax::AstModule;
|
||||
use starlark::syntax::Dialect;
|
||||
use starlark::values::Value;
|
||||
use starlark::values::list::ListRef;
|
||||
use starlark::values::list::UnpackList;
|
||||
use starlark::values::none::NoneType;
|
||||
use std::cell::RefCell;
|
||||
use std::cell::RefMut;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::decision::Decision;
|
||||
use crate::error::Error;
|
||||
use crate::error::Result;
|
||||
use crate::rule::PatternToken;
|
||||
use crate::rule::PrefixPattern;
|
||||
use crate::rule::PrefixRule;
|
||||
use crate::rule::RuleRef;
|
||||
use crate::rule::validate_match_examples;
|
||||
use crate::rule::validate_not_match_examples;
|
||||
|
||||
pub struct PolicyParser {
|
||||
builder: RefCell<PolicyBuilder>,
|
||||
}
|
||||
|
||||
impl Default for PolicyParser {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl PolicyParser {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
builder: RefCell::new(PolicyBuilder::new()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Parses a policy, tagging parser errors with `policy_identifier` so failures include the
|
||||
/// identifier alongside line numbers.
|
||||
pub fn parse(&mut self, policy_identifier: &str, policy_file_contents: &str) -> Result<()> {
|
||||
let mut dialect = Dialect::Extended.clone();
|
||||
dialect.enable_f_strings = true;
|
||||
let ast = AstModule::parse(
|
||||
policy_identifier,
|
||||
policy_file_contents.to_string(),
|
||||
&dialect,
|
||||
)
|
||||
.map_err(Error::Starlark)?;
|
||||
let globals = GlobalsBuilder::standard().with(policy_builtins).build();
|
||||
let module = Module::new();
|
||||
{
|
||||
let mut eval = Evaluator::new(&module);
|
||||
eval.extra = Some(&self.builder);
|
||||
eval.eval_module(ast, &globals).map_err(Error::Starlark)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn build(self) -> crate::policy::Policy {
|
||||
self.builder.into_inner().build()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, ProvidesStaticType)]
|
||||
struct PolicyBuilder {
|
||||
rules_by_program: MultiMap<String, RuleRef>,
|
||||
}
|
||||
|
||||
impl PolicyBuilder {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
rules_by_program: MultiMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn add_rule(&mut self, rule: RuleRef) {
|
||||
self.rules_by_program
|
||||
.insert(rule.program().to_string(), rule);
|
||||
}
|
||||
|
||||
fn build(self) -> crate::policy::Policy {
|
||||
crate::policy::Policy::new(self.rules_by_program)
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_pattern<'v>(pattern: UnpackList<Value<'v>>) -> Result<Vec<PatternToken>> {
|
||||
let tokens: Vec<PatternToken> = pattern
|
||||
.items
|
||||
.into_iter()
|
||||
.map(parse_pattern_token)
|
||||
.collect::<Result<_>>()?;
|
||||
if tokens.is_empty() {
|
||||
Err(Error::InvalidPattern("pattern cannot be empty".to_string()))
|
||||
} else {
|
||||
Ok(tokens)
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_pattern_token<'v>(value: Value<'v>) -> Result<PatternToken> {
|
||||
if let Some(s) = value.unpack_str() {
|
||||
Ok(PatternToken::Single(s.to_string()))
|
||||
} else if let Some(list) = ListRef::from_value(value) {
|
||||
let tokens: Vec<String> = list
|
||||
.content()
|
||||
.iter()
|
||||
.map(|value| {
|
||||
value
|
||||
.unpack_str()
|
||||
.ok_or_else(|| {
|
||||
Error::InvalidPattern(format!(
|
||||
"pattern alternative must be a string (got {})",
|
||||
value.get_type()
|
||||
))
|
||||
})
|
||||
.map(str::to_string)
|
||||
})
|
||||
.collect::<Result<_>>()?;
|
||||
|
||||
match tokens.as_slice() {
|
||||
[] => Err(Error::InvalidPattern(
|
||||
"pattern alternatives cannot be empty".to_string(),
|
||||
)),
|
||||
[single] => Ok(PatternToken::Single(single.clone())),
|
||||
_ => Ok(PatternToken::Alts(tokens)),
|
||||
}
|
||||
} else {
|
||||
Err(Error::InvalidPattern(format!(
|
||||
"pattern element must be a string or list of strings (got {})",
|
||||
value.get_type()
|
||||
)))
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_examples<'v>(examples: UnpackList<Value<'v>>) -> Result<Vec<Vec<String>>> {
|
||||
examples.items.into_iter().map(parse_example).collect()
|
||||
}
|
||||
|
||||
fn parse_example<'v>(value: Value<'v>) -> Result<Vec<String>> {
|
||||
if let Some(raw) = value.unpack_str() {
|
||||
parse_string_example(raw)
|
||||
} else if let Some(list) = ListRef::from_value(value) {
|
||||
parse_list_example(list)
|
||||
} else {
|
||||
Err(Error::InvalidExample(format!(
|
||||
"example must be a string or list of strings (got {})",
|
||||
value.get_type()
|
||||
)))
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_string_example(raw: &str) -> Result<Vec<String>> {
|
||||
let tokens = shlex::split(raw).ok_or_else(|| {
|
||||
Error::InvalidExample("example string has invalid shell syntax".to_string())
|
||||
})?;
|
||||
|
||||
if tokens.is_empty() {
|
||||
Err(Error::InvalidExample(
|
||||
"example cannot be an empty string".to_string(),
|
||||
))
|
||||
} else {
|
||||
Ok(tokens)
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_list_example(list: &ListRef) -> Result<Vec<String>> {
|
||||
let tokens: Vec<String> = list
|
||||
.content()
|
||||
.iter()
|
||||
.map(|value| {
|
||||
value
|
||||
.unpack_str()
|
||||
.ok_or_else(|| {
|
||||
Error::InvalidExample(format!(
|
||||
"example tokens must be strings (got {})",
|
||||
value.get_type()
|
||||
))
|
||||
})
|
||||
.map(str::to_string)
|
||||
})
|
||||
.collect::<Result<_>>()?;
|
||||
|
||||
if tokens.is_empty() {
|
||||
Err(Error::InvalidExample(
|
||||
"example cannot be an empty list".to_string(),
|
||||
))
|
||||
} else {
|
||||
Ok(tokens)
|
||||
}
|
||||
}
|
||||
|
||||
fn policy_builder<'v, 'a>(eval: &Evaluator<'v, 'a, '_>) -> RefMut<'a, PolicyBuilder> {
|
||||
#[expect(clippy::expect_used)]
|
||||
eval.extra
|
||||
.as_ref()
|
||||
.expect("policy_builder requires Evaluator.extra to be populated")
|
||||
.downcast_ref::<RefCell<PolicyBuilder>>()
|
||||
.expect("Evaluator.extra must contain a PolicyBuilder")
|
||||
.borrow_mut()
|
||||
}
|
||||
|
||||
#[starlark_module]
|
||||
fn policy_builtins(builder: &mut GlobalsBuilder) {
|
||||
fn prefix_rule<'v>(
|
||||
pattern: UnpackList<Value<'v>>,
|
||||
decision: Option<&'v str>,
|
||||
r#match: Option<UnpackList<Value<'v>>>,
|
||||
not_match: Option<UnpackList<Value<'v>>>,
|
||||
eval: &mut Evaluator<'v, '_, '_>,
|
||||
) -> anyhow::Result<NoneType> {
|
||||
let decision = match decision {
|
||||
Some(raw) => Decision::parse(raw)?,
|
||||
None => Decision::Allow,
|
||||
};
|
||||
|
||||
let pattern_tokens = parse_pattern(pattern)?;
|
||||
|
||||
let matches: Vec<Vec<String>> =
|
||||
r#match.map(parse_examples).transpose()?.unwrap_or_default();
|
||||
let not_matches: Vec<Vec<String>> = not_match
|
||||
.map(parse_examples)
|
||||
.transpose()?
|
||||
.unwrap_or_default();
|
||||
|
||||
let mut builder = policy_builder(eval);
|
||||
|
||||
let (first_token, remaining_tokens) = pattern_tokens
|
||||
.split_first()
|
||||
.ok_or_else(|| Error::InvalidPattern("pattern cannot be empty".to_string()))?;
|
||||
|
||||
let rest: Arc<[PatternToken]> = remaining_tokens.to_vec().into();
|
||||
|
||||
let rules: Vec<RuleRef> = first_token
|
||||
.alternatives()
|
||||
.iter()
|
||||
.map(|head| {
|
||||
Arc::new(PrefixRule {
|
||||
pattern: PrefixPattern {
|
||||
first: Arc::from(head.as_str()),
|
||||
rest: rest.clone(),
|
||||
},
|
||||
decision,
|
||||
}) as RuleRef
|
||||
})
|
||||
.collect();
|
||||
|
||||
validate_not_match_examples(&rules, ¬_matches)?;
|
||||
validate_match_examples(&rules, &matches)?;
|
||||
|
||||
rules.into_iter().for_each(|rule| builder.add_rule(rule));
|
||||
Ok(NoneType)
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user