mirror of
https://github.com/openai/codex.git
synced 2026-02-05 16:33:42 +00:00
Compare commits
61 Commits
add-contex
...
auto-picke
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c7e6666337 | ||
|
|
e7227ffec8 | ||
|
|
f0cc2db4c3 | ||
|
|
c03d43be6f | ||
|
|
44058a2d27 | ||
|
|
90fdb118f4 | ||
|
|
0afeea3b5e | ||
|
|
e3809caaaf | ||
|
|
29ca89c414 | ||
|
|
1fafbb0fb8 | ||
|
|
c10bcc6780 | ||
|
|
77bc873059 | ||
|
|
c1b7e6f9a8 | ||
|
|
0ce1ed78e4 | ||
|
|
c36291f233 | ||
|
|
4bada5a84d | ||
|
|
3de8790714 | ||
|
|
9435d39d4c | ||
|
|
c025309d32 | ||
|
|
093cd9cf04 | ||
|
|
b035c604b0 | ||
|
|
e9e644a119 | ||
|
|
f5d9939cda | ||
|
|
838531d3e4 | ||
|
|
0eb2e6f9ee | ||
|
|
c20df79a38 | ||
|
|
fc55fd7a81 | ||
|
|
9923f76831 | ||
|
|
f3d4e210d8 | ||
|
|
28ebe1c97a | ||
|
|
2b7378ac77 | ||
|
|
ddcc60a085 | ||
|
|
8465f1f2f4 | ||
|
|
7ab45487dd | ||
|
|
cecbd5b021 | ||
|
|
4000e26303 | ||
|
|
e032d338f2 | ||
|
|
8bebe86a47 | ||
|
|
ab2e7499f8 | ||
|
|
daf77b8452 | ||
|
|
03a6e853c0 | ||
|
|
837bc98a1d | ||
|
|
842a1b7fe7 | ||
|
|
03ffe4d595 | ||
|
|
ae2a084fae | ||
|
|
a941ae7632 | ||
|
|
2c665fb1dd | ||
|
|
98a90a3bb2 | ||
|
|
7c8d333980 | ||
|
|
497fb4a19c | ||
|
|
5860481bc4 | ||
|
|
a52cf4d2b4 | ||
|
|
e70c52a3af | ||
|
|
de1768d3ba | ||
|
|
702238f004 | ||
|
|
fa5f6e76c9 | ||
|
|
f828cd2897 | ||
|
|
326c1e0a7e | ||
|
|
3f1c4b9add | ||
|
|
0b28e72b66 | ||
|
|
94dfb211af |
2
.github/codex/home/config.toml
vendored
2
.github/codex/home/config.toml
vendored
@@ -1,3 +1,3 @@
|
||||
model = "gpt-5"
|
||||
model = "gpt-5.1"
|
||||
|
||||
# Consider setting [mcp_servers] here!
|
||||
|
||||
4
.github/workflows/cla.yml
vendored
4
.github/workflows/cla.yml
vendored
@@ -46,4 +46,6 @@ jobs:
|
||||
path-to-document: https://github.com/openai/codex/blob/main/docs/CLA.md
|
||||
path-to-signatures: signatures/cla.json
|
||||
branch: cla-signatures
|
||||
allowlist: dependabot[bot]
|
||||
allowlist: |
|
||||
codex
|
||||
dependabot[bot]
|
||||
|
||||
@@ -15,7 +15,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Close inactive PRs from contributors
|
||||
uses: actions/github-script@v7
|
||||
uses: actions/github-script@v8
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
|
||||
2
.github/workflows/issue-deduplicator.yml
vendored
2
.github/workflows/issue-deduplicator.yml
vendored
@@ -46,7 +46,7 @@ jobs:
|
||||
with:
|
||||
openai-api-key: ${{ secrets.CODEX_OPENAI_API_KEY }}
|
||||
allow-users: "*"
|
||||
model: gpt-5
|
||||
model: gpt-5.1
|
||||
prompt: |
|
||||
You are an assistant that triages new GitHub issues by identifying potential duplicates.
|
||||
|
||||
|
||||
12
.github/workflows/rust-ci.yml
vendored
12
.github/workflows/rust-ci.yml
vendored
@@ -97,7 +97,6 @@ jobs:
|
||||
env:
|
||||
# Speed up repeated builds across CI runs by caching compiled objects (non-Windows).
|
||||
USE_SCCACHE: ${{ startsWith(matrix.runner, 'windows') && 'false' || 'true' }}
|
||||
RUSTC_WRAPPER: ${{ startsWith(matrix.runner, 'windows') && '' || 'sccache' }}
|
||||
CARGO_INCREMENTAL: "0"
|
||||
SCCACHE_CACHE_SIZE: 10G
|
||||
|
||||
@@ -191,6 +190,11 @@ jobs:
|
||||
echo "Using sccache local disk + actions/cache fallback"
|
||||
fi
|
||||
|
||||
- name: Enable sccache wrapper
|
||||
if: ${{ env.USE_SCCACHE == 'true' }}
|
||||
shell: bash
|
||||
run: echo "RUSTC_WRAPPER=sccache" >> "$GITHUB_ENV"
|
||||
|
||||
- name: Restore sccache cache (fallback)
|
||||
if: ${{ env.USE_SCCACHE == 'true' && env.SCCACHE_GHA_ENABLED != 'true' }}
|
||||
id: cache_sccache_restore
|
||||
@@ -331,7 +335,6 @@ jobs:
|
||||
env:
|
||||
# Speed up repeated builds across CI runs by caching compiled objects (non-Windows).
|
||||
USE_SCCACHE: ${{ startsWith(matrix.runner, 'windows') && 'false' || 'true' }}
|
||||
RUSTC_WRAPPER: ${{ startsWith(matrix.runner, 'windows') && '' || 'sccache' }}
|
||||
CARGO_INCREMENTAL: "0"
|
||||
SCCACHE_CACHE_SIZE: 10G
|
||||
|
||||
@@ -395,6 +398,11 @@ jobs:
|
||||
echo "Using sccache local disk + actions/cache fallback"
|
||||
fi
|
||||
|
||||
- name: Enable sccache wrapper
|
||||
if: ${{ env.USE_SCCACHE == 'true' }}
|
||||
shell: bash
|
||||
run: echo "RUSTC_WRAPPER=sccache" >> "$GITHUB_ENV"
|
||||
|
||||
- name: Restore sccache cache (fallback)
|
||||
if: ${{ env.USE_SCCACHE == 'true' && env.SCCACHE_GHA_ENABLED != 'true' }}
|
||||
id: cache_sccache_restore
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -64,6 +64,9 @@ apply_patch/
|
||||
# coverage
|
||||
coverage/
|
||||
|
||||
# personal files
|
||||
personal/
|
||||
|
||||
# os
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
36
codex-rs/Cargo.lock
generated
36
codex-rs/Cargo.lock
generated
@@ -874,7 +874,6 @@ dependencies = [
|
||||
"clap",
|
||||
"codex-protocol",
|
||||
"mcp-types",
|
||||
"paste",
|
||||
"pretty_assertions",
|
||||
"schemars 0.8.22",
|
||||
"serde",
|
||||
@@ -1061,6 +1060,8 @@ dependencies = [
|
||||
"clap",
|
||||
"codex-app-server-protocol",
|
||||
"codex-core",
|
||||
"codex-lmstudio",
|
||||
"codex-ollama",
|
||||
"codex-protocol",
|
||||
"once_cell",
|
||||
"serde",
|
||||
@@ -1159,7 +1160,6 @@ dependencies = [
|
||||
"codex-arg0",
|
||||
"codex-common",
|
||||
"codex-core",
|
||||
"codex-ollama",
|
||||
"codex-protocol",
|
||||
"core_test_support",
|
||||
"libc",
|
||||
@@ -1202,6 +1202,21 @@ dependencies = [
|
||||
"tempfile",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codex-execpolicy2"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap",
|
||||
"multimap",
|
||||
"pretty_assertions",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"shlex",
|
||||
"starlark",
|
||||
"thiserror 2.0.17",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codex-feedback"
|
||||
version = "0.0.0"
|
||||
@@ -1263,6 +1278,19 @@ dependencies = [
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codex-lmstudio"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"codex-core",
|
||||
"reqwest",
|
||||
"serde_json",
|
||||
"tokio",
|
||||
"tracing",
|
||||
"which",
|
||||
"wiremock",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codex-login"
|
||||
version = "0.0.0"
|
||||
@@ -1427,6 +1455,7 @@ dependencies = [
|
||||
"tracing",
|
||||
"urlencoding",
|
||||
"webbrowser",
|
||||
"which",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1454,12 +1483,12 @@ dependencies = [
|
||||
"codex-ansi-escape",
|
||||
"codex-app-server-protocol",
|
||||
"codex-arg0",
|
||||
"codex-backend-client",
|
||||
"codex-common",
|
||||
"codex-core",
|
||||
"codex-feedback",
|
||||
"codex-file-search",
|
||||
"codex-login",
|
||||
"codex-ollama",
|
||||
"codex-protocol",
|
||||
"codex-windows-sandbox",
|
||||
"color-eyre",
|
||||
@@ -1482,6 +1511,7 @@ dependencies = [
|
||||
"ratatui",
|
||||
"ratatui-macros",
|
||||
"regex-lite",
|
||||
"reqwest",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serial_test",
|
||||
|
||||
@@ -17,9 +17,11 @@ members = [
|
||||
"core",
|
||||
"exec",
|
||||
"execpolicy",
|
||||
"execpolicy2",
|
||||
"keyring-store",
|
||||
"file-search",
|
||||
"linux-sandbox",
|
||||
"lmstudio",
|
||||
"login",
|
||||
"mcp-server",
|
||||
"mcp-types",
|
||||
@@ -69,6 +71,7 @@ codex-file-search = { path = "file-search" }
|
||||
codex-git = { path = "utils/git" }
|
||||
codex-keyring-store = { path = "keyring-store" }
|
||||
codex-linux-sandbox = { path = "linux-sandbox" }
|
||||
codex-lmstudio = { path = "lmstudio" }
|
||||
codex-login = { path = "login" }
|
||||
codex-mcp-server = { path = "mcp-server" }
|
||||
codex-ollama = { path = "ollama" }
|
||||
@@ -130,7 +133,7 @@ image = { version = "^0.25.8", default-features = false }
|
||||
indexmap = "2.12.0"
|
||||
insta = "1.43.2"
|
||||
itertools = "0.14.0"
|
||||
keyring = "3.6"
|
||||
keyring = { version = "3.6", default-features = false }
|
||||
landlock = "0.4.1"
|
||||
lazy_static = "1"
|
||||
libc = "0.2.175"
|
||||
@@ -150,7 +153,6 @@ opentelemetry-semantic-conventions = "0.30.0"
|
||||
opentelemetry_sdk = "0.30.0"
|
||||
os_info = "3.12.0"
|
||||
owo-colors = "4.2.0"
|
||||
paste = "1.0.15"
|
||||
path-absolutize = "3.1.1"
|
||||
pathdiff = "0.2"
|
||||
portable-pty = "0.9.0"
|
||||
|
||||
@@ -15,7 +15,6 @@ anyhow = { workspace = true }
|
||||
clap = { workspace = true, features = ["derive"] }
|
||||
codex-protocol = { workspace = true }
|
||||
mcp-types = { workspace = true }
|
||||
paste = { workspace = true }
|
||||
schemars = { workspace = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
serde_json = { workspace = true }
|
||||
|
||||
@@ -708,6 +708,7 @@ mod tests {
|
||||
use uuid::Uuid;
|
||||
|
||||
#[test]
|
||||
#[ignore = "timing out"]
|
||||
fn generated_ts_has_no_optional_nullable_fields() -> Result<()> {
|
||||
// Assert that there are no types of the form "?: T | null" in the generated TS files.
|
||||
let output_dir = std::env::temp_dir().join(format!("codex_ts_types_{}", Uuid::now_v7()));
|
||||
|
||||
@@ -1,6 +1,4 @@
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use crate::JSONRPCNotification;
|
||||
use crate::JSONRPCRequest;
|
||||
@@ -9,12 +7,6 @@ use crate::export::GeneratedSchema;
|
||||
use crate::export::write_json_schema;
|
||||
use crate::protocol::v1;
|
||||
use crate::protocol::v2;
|
||||
use codex_protocol::ConversationId;
|
||||
use codex_protocol::parse_command::ParsedCommand;
|
||||
use codex_protocol::protocol::FileChange;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use codex_protocol::protocol::SandboxCommandAssessment;
|
||||
use paste::paste;
|
||||
use schemars::JsonSchema;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
@@ -277,34 +269,36 @@ macro_rules! server_request_definitions {
|
||||
(
|
||||
$(
|
||||
$(#[$variant_meta:meta])*
|
||||
$variant:ident
|
||||
$variant:ident $(=> $wire:literal)? {
|
||||
params: $params:ty,
|
||||
response: $response:ty,
|
||||
}
|
||||
),* $(,)?
|
||||
) => {
|
||||
paste! {
|
||||
/// Request initiated from the server and sent to the client.
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(tag = "method", rename_all = "camelCase")]
|
||||
pub enum ServerRequest {
|
||||
$(
|
||||
$(#[$variant_meta])*
|
||||
$variant {
|
||||
#[serde(rename = "id")]
|
||||
request_id: RequestId,
|
||||
params: [<$variant Params>],
|
||||
},
|
||||
)*
|
||||
}
|
||||
/// Request initiated from the server and sent to the client.
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(tag = "method", rename_all = "camelCase")]
|
||||
pub enum ServerRequest {
|
||||
$(
|
||||
$(#[$variant_meta])*
|
||||
$(#[serde(rename = $wire)] #[ts(rename = $wire)])?
|
||||
$variant {
|
||||
#[serde(rename = "id")]
|
||||
request_id: RequestId,
|
||||
params: $params,
|
||||
},
|
||||
)*
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, JsonSchema)]
|
||||
pub enum ServerRequestPayload {
|
||||
$( $variant([<$variant Params>]), )*
|
||||
}
|
||||
#[derive(Debug, Clone, PartialEq, JsonSchema)]
|
||||
pub enum ServerRequestPayload {
|
||||
$( $variant($params), )*
|
||||
}
|
||||
|
||||
impl ServerRequestPayload {
|
||||
pub fn request_with_id(self, request_id: RequestId) -> ServerRequest {
|
||||
match self {
|
||||
$(Self::$variant(params) => ServerRequest::$variant { request_id, params },)*
|
||||
}
|
||||
impl ServerRequestPayload {
|
||||
pub fn request_with_id(self, request_id: RequestId) -> ServerRequest {
|
||||
match self {
|
||||
$(Self::$variant(params) => ServerRequest::$variant { request_id, params },)*
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -312,9 +306,9 @@ macro_rules! server_request_definitions {
|
||||
pub fn export_server_responses(
|
||||
out_dir: &::std::path::Path,
|
||||
) -> ::std::result::Result<(), ::ts_rs::ExportError> {
|
||||
paste! {
|
||||
$(<[<$variant Response>] as ::ts_rs::TS>::export_all_to(out_dir)?;)*
|
||||
}
|
||||
$(
|
||||
<$response as ::ts_rs::TS>::export_all_to(out_dir)?;
|
||||
)*
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -323,9 +317,12 @@ macro_rules! server_request_definitions {
|
||||
out_dir: &Path,
|
||||
) -> ::anyhow::Result<Vec<GeneratedSchema>> {
|
||||
let mut schemas = Vec::new();
|
||||
paste! {
|
||||
$(schemas.push(crate::export::write_json_schema::<[<$variant Response>]>(out_dir, stringify!([<$variant Response>]))?);)*
|
||||
}
|
||||
$(
|
||||
schemas.push(crate::export::write_json_schema::<$response>(
|
||||
out_dir,
|
||||
concat!(stringify!($variant), "Response"),
|
||||
)?);
|
||||
)*
|
||||
Ok(schemas)
|
||||
}
|
||||
|
||||
@@ -334,9 +331,12 @@ macro_rules! server_request_definitions {
|
||||
out_dir: &Path,
|
||||
) -> ::anyhow::Result<Vec<GeneratedSchema>> {
|
||||
let mut schemas = Vec::new();
|
||||
paste! {
|
||||
$(schemas.push(crate::export::write_json_schema::<[<$variant Params>]>(out_dir, stringify!([<$variant Params>]))?);)*
|
||||
}
|
||||
$(
|
||||
schemas.push(crate::export::write_json_schema::<$params>(
|
||||
out_dir,
|
||||
concat!(stringify!($variant), "Params"),
|
||||
)?);
|
||||
)*
|
||||
Ok(schemas)
|
||||
}
|
||||
};
|
||||
@@ -426,49 +426,27 @@ impl TryFrom<JSONRPCRequest> for ServerRequest {
|
||||
}
|
||||
|
||||
server_request_definitions! {
|
||||
/// NEW APIs
|
||||
/// Sent when approval is requested for a specific command execution.
|
||||
/// This request is used for Turns started via turn/start.
|
||||
CommandExecutionRequestApproval => "item/commandExecution/requestApproval" {
|
||||
params: v2::CommandExecutionRequestApprovalParams,
|
||||
response: v2::CommandExecutionRequestApprovalResponse,
|
||||
},
|
||||
|
||||
/// DEPRECATED APIs below
|
||||
/// Request to approve a patch.
|
||||
ApplyPatchApproval,
|
||||
/// This request is used for Turns started via the legacy APIs (i.e. SendUserTurn, SendUserMessage).
|
||||
ApplyPatchApproval {
|
||||
params: v1::ApplyPatchApprovalParams,
|
||||
response: v1::ApplyPatchApprovalResponse,
|
||||
},
|
||||
/// Request to exec a command.
|
||||
ExecCommandApproval,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ApplyPatchApprovalParams {
|
||||
pub conversation_id: ConversationId,
|
||||
/// Use to correlate this with [codex_core::protocol::PatchApplyBeginEvent]
|
||||
/// and [codex_core::protocol::PatchApplyEndEvent].
|
||||
pub call_id: String,
|
||||
pub file_changes: HashMap<PathBuf, FileChange>,
|
||||
/// Optional explanatory reason (e.g. request for extra write access).
|
||||
pub reason: Option<String>,
|
||||
/// When set, the agent is asking the user to allow writes under this root
|
||||
/// for the remainder of the session (unclear if this is honored today).
|
||||
pub grant_root: Option<PathBuf>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ExecCommandApprovalParams {
|
||||
pub conversation_id: ConversationId,
|
||||
/// Use to correlate this with [codex_core::protocol::ExecCommandBeginEvent]
|
||||
/// and [codex_core::protocol::ExecCommandEndEvent].
|
||||
pub call_id: String,
|
||||
pub command: Vec<String>,
|
||||
pub cwd: PathBuf,
|
||||
pub reason: Option<String>,
|
||||
pub risk: Option<SandboxCommandAssessment>,
|
||||
pub parsed_cmd: Vec<ParsedCommand>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
pub struct ExecCommandApprovalResponse {
|
||||
pub decision: ReviewDecision,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
pub struct ApplyPatchApprovalResponse {
|
||||
pub decision: ReviewDecision,
|
||||
/// This request is used for Turns started via the legacy APIs (i.e. SendUserTurn, SendUserMessage).
|
||||
ExecCommandApproval {
|
||||
params: v1::ExecCommandApprovalParams,
|
||||
response: v1::ExecCommandApprovalResponse,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
@@ -533,17 +511,20 @@ client_notification_definitions! {
|
||||
mod tests {
|
||||
use super::*;
|
||||
use anyhow::Result;
|
||||
use codex_protocol::ConversationId;
|
||||
use codex_protocol::account::PlanType;
|
||||
use codex_protocol::parse_command::ParsedCommand;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::json;
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[test]
|
||||
fn serialize_new_conversation() -> Result<()> {
|
||||
let request = ClientRequest::NewConversation {
|
||||
request_id: RequestId::Integer(42),
|
||||
params: v1::NewConversationParams {
|
||||
model: Some("gpt-5-codex".to_string()),
|
||||
model: Some("gpt-5.1-codex".to_string()),
|
||||
model_provider: None,
|
||||
profile: None,
|
||||
cwd: None,
|
||||
@@ -561,7 +542,7 @@ mod tests {
|
||||
"method": "newConversation",
|
||||
"id": 42,
|
||||
"params": {
|
||||
"model": "gpt-5-codex",
|
||||
"model": "gpt-5.1-codex",
|
||||
"modelProvider": null,
|
||||
"profile": null,
|
||||
"cwd": null,
|
||||
@@ -616,7 +597,7 @@ mod tests {
|
||||
#[test]
|
||||
fn serialize_server_request() -> Result<()> {
|
||||
let conversation_id = ConversationId::from_string("67e55044-10b1-426f-9247-bb680e5fe0c8")?;
|
||||
let params = ExecCommandApprovalParams {
|
||||
let params = v1::ExecCommandApprovalParams {
|
||||
conversation_id,
|
||||
call_id: "call-42".to_string(),
|
||||
command: vec!["echo".to_string(), "hello".to_string()],
|
||||
|
||||
@@ -8,8 +8,12 @@ use codex_protocol::config_types::ReasoningSummary;
|
||||
use codex_protocol::config_types::SandboxMode;
|
||||
use codex_protocol::config_types::Verbosity;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::parse_command::ParsedCommand;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
use codex_protocol::protocol::FileChange;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use codex_protocol::protocol::SandboxCommandAssessment;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use codex_protocol::protocol::SessionSource;
|
||||
use codex_protocol::protocol::TurnAbortReason;
|
||||
@@ -191,6 +195,46 @@ pub struct GitDiffToRemoteResponse {
|
||||
pub diff: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ApplyPatchApprovalParams {
|
||||
pub conversation_id: ConversationId,
|
||||
/// Use to correlate this with [codex_core::protocol::PatchApplyBeginEvent]
|
||||
/// and [codex_core::protocol::PatchApplyEndEvent].
|
||||
pub call_id: String,
|
||||
pub file_changes: HashMap<PathBuf, FileChange>,
|
||||
/// Optional explanatory reason (e.g. request for extra write access).
|
||||
pub reason: Option<String>,
|
||||
/// When set, the agent is asking the user to allow writes under this root
|
||||
/// for the remainder of the session (unclear if this is honored today).
|
||||
pub grant_root: Option<PathBuf>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ApplyPatchApprovalResponse {
|
||||
pub decision: ReviewDecision,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ExecCommandApprovalParams {
|
||||
pub conversation_id: ConversationId,
|
||||
/// Use to correlate this with [codex_core::protocol::ExecCommandBeginEvent]
|
||||
/// and [codex_core::protocol::ExecCommandEndEvent].
|
||||
pub call_id: String,
|
||||
pub command: Vec<String>,
|
||||
pub cwd: PathBuf,
|
||||
pub reason: Option<String>,
|
||||
pub risk: Option<SandboxCommandAssessment>,
|
||||
pub parsed_cmd: Vec<ParsedCommand>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
pub struct ExecCommandApprovalResponse {
|
||||
pub decision: ReviewDecision,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct CancelLoginChatGptParams {
|
||||
|
||||
@@ -4,11 +4,13 @@ use std::path::PathBuf;
|
||||
use crate::protocol::common::AuthMode;
|
||||
use codex_protocol::ConversationId;
|
||||
use codex_protocol::account::PlanType;
|
||||
use codex_protocol::approvals::SandboxCommandAssessment as CoreSandboxCommandAssessment;
|
||||
use codex_protocol::config_types::ReasoningEffort;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
use codex_protocol::items::AgentMessageContent as CoreAgentMessageContent;
|
||||
use codex_protocol::items::TurnItem as CoreTurnItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::parse_command::ParsedCommand as CoreParsedCommand;
|
||||
use codex_protocol::protocol::RateLimitSnapshot as CoreRateLimitSnapshot;
|
||||
use codex_protocol::protocol::RateLimitWindow as CoreRateLimitWindow;
|
||||
use codex_protocol::user_input::UserInput as CoreUserInput;
|
||||
@@ -20,7 +22,7 @@ use serde_json::Value as JsonValue;
|
||||
use ts_rs::TS;
|
||||
|
||||
// Macro to declare a camelCased API v2 enum mirroring a core enum which
|
||||
// tends to use kebab-case.
|
||||
// tends to use either snake_case or kebab-case.
|
||||
macro_rules! v2_enum_from_core {
|
||||
(
|
||||
pub enum $Name:ident from $Src:path { $( $Variant:ident ),+ $(,)? }
|
||||
@@ -56,6 +58,23 @@ v2_enum_from_core!(
|
||||
}
|
||||
);
|
||||
|
||||
v2_enum_from_core!(
|
||||
pub enum CommandRiskLevel from codex_protocol::approvals::SandboxRiskLevel {
|
||||
Low,
|
||||
Medium,
|
||||
High
|
||||
}
|
||||
);
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub enum ApprovalDecision {
|
||||
Accept,
|
||||
Decline,
|
||||
Cancel,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, JsonSchema, TS)]
|
||||
#[serde(tag = "type", rename_all = "camelCase")]
|
||||
#[ts(tag = "type")]
|
||||
@@ -63,6 +82,8 @@ v2_enum_from_core!(
|
||||
pub enum SandboxPolicy {
|
||||
DangerFullAccess,
|
||||
ReadOnly,
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(rename_all = "camelCase")]
|
||||
WorkspaceWrite {
|
||||
#[serde(default)]
|
||||
writable_roots: Vec<PathBuf>,
|
||||
@@ -119,6 +140,98 @@ impl From<codex_protocol::protocol::SandboxPolicy> for SandboxPolicy {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub struct SandboxCommandAssessment {
|
||||
pub description: String,
|
||||
pub risk_level: CommandRiskLevel,
|
||||
}
|
||||
|
||||
impl SandboxCommandAssessment {
|
||||
pub fn into_core(self) -> CoreSandboxCommandAssessment {
|
||||
CoreSandboxCommandAssessment {
|
||||
description: self.description,
|
||||
risk_level: self.risk_level.to_core(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<CoreSandboxCommandAssessment> for SandboxCommandAssessment {
|
||||
fn from(value: CoreSandboxCommandAssessment) -> Self {
|
||||
Self {
|
||||
description: value.description,
|
||||
risk_level: CommandRiskLevel::from(value.risk_level),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(tag = "type", rename_all = "camelCase")]
|
||||
#[ts(tag = "type")]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub enum CommandAction {
|
||||
Read {
|
||||
command: String,
|
||||
name: String,
|
||||
path: PathBuf,
|
||||
},
|
||||
ListFiles {
|
||||
command: String,
|
||||
path: Option<String>,
|
||||
},
|
||||
Search {
|
||||
command: String,
|
||||
query: Option<String>,
|
||||
path: Option<String>,
|
||||
},
|
||||
Unknown {
|
||||
command: String,
|
||||
},
|
||||
}
|
||||
|
||||
impl CommandAction {
|
||||
pub fn into_core(self) -> CoreParsedCommand {
|
||||
match self {
|
||||
CommandAction::Read {
|
||||
command: cmd,
|
||||
name,
|
||||
path,
|
||||
} => CoreParsedCommand::Read { cmd, name, path },
|
||||
CommandAction::ListFiles { command: cmd, path } => {
|
||||
CoreParsedCommand::ListFiles { cmd, path }
|
||||
}
|
||||
CommandAction::Search {
|
||||
command: cmd,
|
||||
query,
|
||||
path,
|
||||
} => CoreParsedCommand::Search { cmd, query, path },
|
||||
CommandAction::Unknown { command: cmd } => CoreParsedCommand::Unknown { cmd },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<CoreParsedCommand> for CommandAction {
|
||||
fn from(value: CoreParsedCommand) -> Self {
|
||||
match value {
|
||||
CoreParsedCommand::Read { cmd, name, path } => CommandAction::Read {
|
||||
command: cmd,
|
||||
name,
|
||||
path,
|
||||
},
|
||||
CoreParsedCommand::ListFiles { cmd, path } => {
|
||||
CommandAction::ListFiles { command: cmd, path }
|
||||
}
|
||||
CoreParsedCommand::Search { cmd, query, path } => CommandAction::Search {
|
||||
command: cmd,
|
||||
query,
|
||||
path,
|
||||
},
|
||||
CoreParsedCommand::Unknown { cmd } => CommandAction::Unknown { command: cmd },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(tag = "type", rename_all = "camelCase")]
|
||||
#[ts(tag = "type")]
|
||||
@@ -279,7 +392,7 @@ pub struct ThreadStartParams {
|
||||
pub cwd: Option<String>,
|
||||
pub approval_policy: Option<AskForApproval>,
|
||||
pub sandbox: Option<SandboxMode>,
|
||||
pub config: Option<HashMap<String, serde_json::Value>>,
|
||||
pub config: Option<HashMap<String, JsonValue>>,
|
||||
pub base_instructions: Option<String>,
|
||||
pub developer_instructions: Option<String>,
|
||||
}
|
||||
@@ -506,14 +619,14 @@ impl From<CoreUserInput> for UserInput {
|
||||
#[ts(tag = "type")]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub enum ThreadItem {
|
||||
UserMessage {
|
||||
id: String,
|
||||
content: Vec<UserInput>,
|
||||
},
|
||||
AgentMessage {
|
||||
id: String,
|
||||
text: String,
|
||||
},
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(rename_all = "camelCase")]
|
||||
UserMessage { id: String, content: Vec<UserInput> },
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(rename_all = "camelCase")]
|
||||
AgentMessage { id: String, text: String },
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(rename_all = "camelCase")]
|
||||
Reasoning {
|
||||
id: String,
|
||||
#[serde(default)]
|
||||
@@ -521,19 +634,35 @@ pub enum ThreadItem {
|
||||
#[serde(default)]
|
||||
content: Vec<String>,
|
||||
},
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(rename_all = "camelCase")]
|
||||
CommandExecution {
|
||||
id: String,
|
||||
/// The command to be executed.
|
||||
command: String,
|
||||
aggregated_output: String,
|
||||
exit_code: Option<i32>,
|
||||
/// The command's working directory.
|
||||
cwd: PathBuf,
|
||||
status: CommandExecutionStatus,
|
||||
/// A best-effort parsing of the command to understand the action(s) it will perform.
|
||||
/// This returns a list of CommandAction objects because a single shell command may
|
||||
/// be composed of many commands piped together.
|
||||
command_actions: Vec<CommandAction>,
|
||||
/// The command's output, aggregated from stdout and stderr.
|
||||
aggregated_output: Option<String>,
|
||||
/// The command's exit code.
|
||||
exit_code: Option<i32>,
|
||||
/// The duration of the command execution in milliseconds.
|
||||
duration_ms: Option<i64>,
|
||||
},
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(rename_all = "camelCase")]
|
||||
FileChange {
|
||||
id: String,
|
||||
changes: Vec<FileUpdateChange>,
|
||||
status: PatchApplyStatus,
|
||||
},
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(rename_all = "camelCase")]
|
||||
McpToolCall {
|
||||
id: String,
|
||||
server: String,
|
||||
@@ -543,22 +672,18 @@ pub enum ThreadItem {
|
||||
result: Option<McpToolCallResult>,
|
||||
error: Option<McpToolCallError>,
|
||||
},
|
||||
WebSearch {
|
||||
id: String,
|
||||
query: String,
|
||||
},
|
||||
TodoList {
|
||||
id: String,
|
||||
items: Vec<TodoItem>,
|
||||
},
|
||||
ImageView {
|
||||
id: String,
|
||||
path: String,
|
||||
},
|
||||
CodeReview {
|
||||
id: String,
|
||||
review: String,
|
||||
},
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(rename_all = "camelCase")]
|
||||
WebSearch { id: String, query: String },
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(rename_all = "camelCase")]
|
||||
TodoList { id: String, items: Vec<TodoItem> },
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(rename_all = "camelCase")]
|
||||
ImageView { id: String, path: String },
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(rename_all = "camelCase")]
|
||||
CodeReview { id: String, review: String },
|
||||
}
|
||||
|
||||
impl From<CoreTurnItem> for ThreadItem {
|
||||
@@ -758,6 +883,39 @@ pub struct McpToolCallProgressNotification {
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub struct CommandExecutionRequestApprovalParams {
|
||||
pub thread_id: String,
|
||||
pub turn_id: String,
|
||||
pub item_id: String,
|
||||
/// Optional explanatory reason (e.g. request for network access).
|
||||
pub reason: Option<String>,
|
||||
/// Optional model-provided risk assessment describing the blocked command.
|
||||
pub risk: Option<SandboxCommandAssessment>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub struct CommandExecutionRequestAcceptSettings {
|
||||
/// If true, automatically approve this command for the duration of the session.
|
||||
#[serde(default)]
|
||||
pub for_session: bool,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub struct CommandExecutionRequestApprovalResponse {
|
||||
pub decision: ApprovalDecision,
|
||||
/// Optional approval settings for when the decision is `accept`.
|
||||
/// Ignored if the decision is `decline` or `cancel`.
|
||||
#[serde(default)]
|
||||
pub accept_settings: Option<CommandExecutionRequestAcceptSettings>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
|
||||
@@ -2,6 +2,16 @@
|
||||
|
||||
`codex app-server` is the interface Codex uses to power rich interfaces such as the [Codex VS Code extension](https://marketplace.visualstudio.com/items?itemName=openai.chatgpt). The message schema is currently unstable, but those who wish to build experimental UIs on top of Codex may find it valuable.
|
||||
|
||||
## Table of Contents
|
||||
- [Protocol](#protocol)
|
||||
- [Message Schema](#message-schema)
|
||||
- [Lifecycle Overview](#lifecycle-overview)
|
||||
- [Initialization](#initialization)
|
||||
- [Core primitives](#core-primitives)
|
||||
- [Thread & turn endpoints](#thread--turn-endpoints)
|
||||
- [Auth endpoints](#auth-endpoints)
|
||||
- [Events (work-in-progress)](#v2-streaming-events-work-in-progress)
|
||||
|
||||
## Protocol
|
||||
|
||||
Similar to [MCP](https://modelcontextprotocol.io/), `codex app-server` supports bidirectional communication, streaming JSONL over stdio. The protocol is JSON-RPC 2.0, though the `"jsonrpc":"2.0"` header is omitted.
|
||||
@@ -15,6 +25,14 @@ codex app-server generate-ts --out DIR
|
||||
codex app-server generate-json-schema --out DIR
|
||||
```
|
||||
|
||||
## Lifecycle Overview
|
||||
|
||||
- Initialize once: Immediately after launching the codex app-server process, send an `initialize` request with your client metadata, then emit an `initialized` notification. Any other request before this handshake gets rejected.
|
||||
- Start (or resume) a thread: Call `thread/start` to open a fresh conversation. The response returns the thread object and you’ll also get a `thread/started` notification. If you’re continuing an existing conversation, call `thread/resume` with its ID instead.
|
||||
- Begin a turn: To send user input, call `turn/start` with the target `threadId` and the user's input. Optional fields let you override model, cwd, sandbox policy, etc. This immediately returns the new turn object and triggers a `turn/started` notification.
|
||||
- Stream events: After `turn/start`, keep reading JSON-RPC notifications on stdout. You’ll see `item/started`, `item/completed`, deltas like `item/agentMessage/delta`, tool progress, etc. These represent streaming model output plus any side effects (commands, tool calls, reasoning notes).
|
||||
- Finish the turn: When the model is done (or the turn is interrupted via making the `turn/interrupt` call), the server sends `turn/completed` with the final turn state and token usage.
|
||||
|
||||
## Initialization
|
||||
|
||||
Clients must send a single `initialize` request before invoking any other method, then acknowledge with an `initialized` notification. The server returns the user agent string it will present to upstream services; subsequent requests issued before initialization receive a `"Not initialized"` error, and repeated `initialize` calls receive an `"Already initialized"` error.
|
||||
@@ -56,7 +74,7 @@ Start a fresh thread when you need a new Codex conversation.
|
||||
{ "method": "thread/start", "id": 10, "params": {
|
||||
// Optionally set config settings. If not specified, will use the user's
|
||||
// current config settings.
|
||||
"model": "gpt-5-codex",
|
||||
"model": "gpt-5.1-codex",
|
||||
"cwd": "/Users/me/project",
|
||||
"approvalPolicy": "never",
|
||||
"sandbox": "workspaceWrite",
|
||||
@@ -137,7 +155,7 @@ You can optionally specify config overrides on the new turn. If specified, these
|
||||
"writableRoots": ["/Users/me/project"],
|
||||
"networkAccess": true
|
||||
},
|
||||
"model": "gpt-5-codex",
|
||||
"model": "gpt-5.1-codex",
|
||||
"effort": "medium",
|
||||
"summary": "concise"
|
||||
} }
|
||||
@@ -258,3 +276,33 @@ Field notes:
|
||||
- `codex app-server generate-ts --out <dir>` emits v2 types under `v2/`.
|
||||
- `codex app-server generate-json-schema --out <dir>` outputs `codex_app_server_protocol.schemas.json`.
|
||||
- See [“Authentication and authorization” in the config docs](../../docs/config.md#authentication-and-authorization) for configuration knobs.
|
||||
|
||||
|
||||
## Events (work-in-progress)
|
||||
|
||||
Event notifications are the server-initiated event stream for thread lifecycles, turn lifecycles, and the items within them. After you start or resume a thread, keep reading stdout for `thread/started`, `turn/*`, and `item/*` notifications.
|
||||
|
||||
### Turn events
|
||||
|
||||
The app-server streams JSON-RPC notifications while a turn is running. Each turn starts with `turn/started` (initial `turn`) and ends with `turn/completed` (final `turn` plus token `usage`), and clients subscribe to the events they care about, rendering each item incrementally as updates arrive. The per-item lifecycle is always: `item/started` → zero or more item-specific deltas → `item/completed`.
|
||||
|
||||
#### Thread items
|
||||
|
||||
`ThreadItem` is the tagged union carried in turn responses and `item/*` notifications. Currently we support events for the following items:
|
||||
- `userMessage` — `{id, content}` where `content` is a list of user inputs (`text`, `image`, or `localImage`).
|
||||
- `agentMessage` — `{id, text}` containing the accumulated agent reply.
|
||||
- `reasoning` — `{id, summary, content}` where `summary` holds streamed reasoning summaries (applicable for most OpenAI models) and `content` holds raw reasoning blocks (applicable for e.g. open source models).
|
||||
- `mcpToolCall` — `{id, server, tool, status, arguments, result?, error?}` describing MCP calls; `status` is `inProgress`, `completed`, or `failed`.
|
||||
- `webSearch` — `{id, query}` for a web search request issued by the agent.
|
||||
|
||||
All items emit two shared lifecycle events:
|
||||
- `item/started` — emits the full `item` when a new unit of work begins so the UI can render it immediately; the `item.id` in this payload matches the `itemId` used by deltas.
|
||||
- `item/completed` — sends the final `item` once that work finishes (e.g., after a tool call or message completes); treat this as the authoritative state.
|
||||
|
||||
There are additional item-specific events:
|
||||
#### agentMessage
|
||||
- `item/agentMessage/delta` — appends streamed text for the agent message; concatenate `delta` values for the same `itemId` in order to reconstruct the full reply.
|
||||
#### reasoning
|
||||
- `item/reasoning/summaryTextDelta` — streams readable reasoning summaries; `summaryIndex` increments when a new summary section opens.
|
||||
- `item/reasoning/summaryPartAdded` — marks the boundary between reasoning summary sections for an `itemId`; subsequent `summaryTextDelta` entries share the same `summaryIndex`.
|
||||
- `item/reasoning/textDelta` — streams raw reasoning text (only applicable for e.g. open source models); use `contentIndex` to group deltas that belong together before showing them in the UI.
|
||||
|
||||
@@ -5,6 +5,12 @@ use codex_app_server_protocol::AccountRateLimitsUpdatedNotification;
|
||||
use codex_app_server_protocol::AgentMessageDeltaNotification;
|
||||
use codex_app_server_protocol::ApplyPatchApprovalParams;
|
||||
use codex_app_server_protocol::ApplyPatchApprovalResponse;
|
||||
use codex_app_server_protocol::ApprovalDecision;
|
||||
use codex_app_server_protocol::CommandAction as V2ParsedCommand;
|
||||
use codex_app_server_protocol::CommandExecutionOutputDeltaNotification;
|
||||
use codex_app_server_protocol::CommandExecutionRequestApprovalParams;
|
||||
use codex_app_server_protocol::CommandExecutionRequestApprovalResponse;
|
||||
use codex_app_server_protocol::CommandExecutionStatus;
|
||||
use codex_app_server_protocol::ExecCommandApprovalParams;
|
||||
use codex_app_server_protocol::ExecCommandApprovalResponse;
|
||||
use codex_app_server_protocol::InterruptConversationResponse;
|
||||
@@ -16,25 +22,29 @@ use codex_app_server_protocol::McpToolCallStatus;
|
||||
use codex_app_server_protocol::ReasoningSummaryPartAddedNotification;
|
||||
use codex_app_server_protocol::ReasoningSummaryTextDeltaNotification;
|
||||
use codex_app_server_protocol::ReasoningTextDeltaNotification;
|
||||
use codex_app_server_protocol::SandboxCommandAssessment as V2SandboxCommandAssessment;
|
||||
use codex_app_server_protocol::ServerNotification;
|
||||
use codex_app_server_protocol::ServerRequestPayload;
|
||||
use codex_app_server_protocol::ThreadItem;
|
||||
use codex_app_server_protocol::TurnInterruptResponse;
|
||||
use codex_core::CodexConversation;
|
||||
use codex_core::parse_command::shlex_join;
|
||||
use codex_core::protocol::ApplyPatchApprovalRequestEvent;
|
||||
use codex_core::protocol::Event;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::ExecApprovalRequestEvent;
|
||||
use codex_core::protocol::ExecCommandEndEvent;
|
||||
use codex_core::protocol::McpToolCallBeginEvent;
|
||||
use codex_core::protocol::McpToolCallEndEvent;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::ReviewDecision;
|
||||
use codex_protocol::ConversationId;
|
||||
use std::convert::TryFrom;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::oneshot;
|
||||
use tracing::error;
|
||||
|
||||
type JsonRpcResult = serde_json::Value;
|
||||
type JsonValue = serde_json::Value;
|
||||
|
||||
pub(crate) async fn apply_bespoke_event_handling(
|
||||
event: Event,
|
||||
@@ -42,6 +52,7 @@ pub(crate) async fn apply_bespoke_event_handling(
|
||||
conversation: Arc<CodexConversation>,
|
||||
outgoing: Arc<OutgoingMessageSender>,
|
||||
pending_interrupts: PendingInterrupts,
|
||||
api_version: ApiVersion,
|
||||
) {
|
||||
let Event { id: event_id, msg } = event;
|
||||
match msg {
|
||||
@@ -61,11 +72,57 @@ pub(crate) async fn apply_bespoke_event_handling(
|
||||
let rx = outgoing
|
||||
.send_request(ServerRequestPayload::ApplyPatchApproval(params))
|
||||
.await;
|
||||
// TODO(mbolin): Enforce a timeout so this task does not live indefinitely?
|
||||
tokio::spawn(async move {
|
||||
on_patch_approval_response(event_id, rx, conversation).await;
|
||||
});
|
||||
}
|
||||
EventMsg::ExecApprovalRequest(ExecApprovalRequestEvent {
|
||||
call_id,
|
||||
turn_id,
|
||||
command,
|
||||
cwd,
|
||||
reason,
|
||||
risk,
|
||||
parsed_cmd,
|
||||
}) => match api_version {
|
||||
ApiVersion::V1 => {
|
||||
let params = ExecCommandApprovalParams {
|
||||
conversation_id,
|
||||
call_id,
|
||||
command,
|
||||
cwd,
|
||||
reason,
|
||||
risk,
|
||||
parsed_cmd,
|
||||
};
|
||||
let rx = outgoing
|
||||
.send_request(ServerRequestPayload::ExecCommandApproval(params))
|
||||
.await;
|
||||
tokio::spawn(async move {
|
||||
on_exec_approval_response(event_id, rx, conversation).await;
|
||||
});
|
||||
}
|
||||
ApiVersion::V2 => {
|
||||
let params = CommandExecutionRequestApprovalParams {
|
||||
thread_id: conversation_id.to_string(),
|
||||
turn_id: turn_id.clone(),
|
||||
// Until we migrate the core to be aware of a first class CommandExecutionItem
|
||||
// and emit the corresponding EventMsg, we repurpose the call_id as the item_id.
|
||||
item_id: call_id.clone(),
|
||||
reason,
|
||||
risk: risk.map(V2SandboxCommandAssessment::from),
|
||||
};
|
||||
let rx = outgoing
|
||||
.send_request(ServerRequestPayload::CommandExecutionRequestApproval(
|
||||
params,
|
||||
))
|
||||
.await;
|
||||
tokio::spawn(async move {
|
||||
on_command_execution_request_approval_response(event_id, rx, conversation)
|
||||
.await;
|
||||
});
|
||||
}
|
||||
},
|
||||
// TODO(celia): properly construct McpToolCall TurnItem in core.
|
||||
EventMsg::McpToolCallBegin(begin_event) => {
|
||||
let notification = construct_mcp_tool_call_notification(begin_event).await;
|
||||
@@ -121,32 +178,6 @@ pub(crate) async fn apply_bespoke_event_handling(
|
||||
))
|
||||
.await;
|
||||
}
|
||||
EventMsg::ExecApprovalRequest(ExecApprovalRequestEvent {
|
||||
call_id,
|
||||
command,
|
||||
cwd,
|
||||
reason,
|
||||
risk,
|
||||
parsed_cmd,
|
||||
}) => {
|
||||
let params = ExecCommandApprovalParams {
|
||||
conversation_id,
|
||||
call_id,
|
||||
command,
|
||||
cwd,
|
||||
reason,
|
||||
risk,
|
||||
parsed_cmd,
|
||||
};
|
||||
let rx = outgoing
|
||||
.send_request(ServerRequestPayload::ExecCommandApproval(params))
|
||||
.await;
|
||||
|
||||
// TODO(mbolin): Enforce a timeout so this task does not live indefinitely?
|
||||
tokio::spawn(async move {
|
||||
on_exec_approval_response(event_id, rx, conversation).await;
|
||||
});
|
||||
}
|
||||
EventMsg::TokenCount(token_count_event) => {
|
||||
if let Some(rate_limits) = token_count_event.rate_limits {
|
||||
outgoing
|
||||
@@ -172,6 +203,79 @@ pub(crate) async fn apply_bespoke_event_handling(
|
||||
.send_server_notification(ServerNotification::ItemCompleted(notification))
|
||||
.await;
|
||||
}
|
||||
EventMsg::ExecCommandBegin(exec_command_begin_event) => {
|
||||
let item = ThreadItem::CommandExecution {
|
||||
id: exec_command_begin_event.call_id.clone(),
|
||||
command: shlex_join(&exec_command_begin_event.command),
|
||||
cwd: exec_command_begin_event.cwd,
|
||||
status: CommandExecutionStatus::InProgress,
|
||||
command_actions: exec_command_begin_event
|
||||
.parsed_cmd
|
||||
.into_iter()
|
||||
.map(V2ParsedCommand::from)
|
||||
.collect(),
|
||||
aggregated_output: None,
|
||||
exit_code: None,
|
||||
duration_ms: None,
|
||||
};
|
||||
let notification = ItemStartedNotification { item };
|
||||
outgoing
|
||||
.send_server_notification(ServerNotification::ItemStarted(notification))
|
||||
.await;
|
||||
}
|
||||
EventMsg::ExecCommandOutputDelta(exec_command_output_delta_event) => {
|
||||
let notification = CommandExecutionOutputDeltaNotification {
|
||||
item_id: exec_command_output_delta_event.call_id.clone(),
|
||||
delta: String::from_utf8_lossy(&exec_command_output_delta_event.chunk).to_string(),
|
||||
};
|
||||
outgoing
|
||||
.send_server_notification(ServerNotification::CommandExecutionOutputDelta(
|
||||
notification,
|
||||
))
|
||||
.await;
|
||||
}
|
||||
EventMsg::ExecCommandEnd(exec_command_end_event) => {
|
||||
let ExecCommandEndEvent {
|
||||
call_id,
|
||||
command,
|
||||
cwd,
|
||||
parsed_cmd,
|
||||
aggregated_output,
|
||||
exit_code,
|
||||
duration,
|
||||
..
|
||||
} = exec_command_end_event;
|
||||
|
||||
let status = if exit_code == 0 {
|
||||
CommandExecutionStatus::Completed
|
||||
} else {
|
||||
CommandExecutionStatus::Failed
|
||||
};
|
||||
|
||||
let aggregated_output = if aggregated_output.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(aggregated_output)
|
||||
};
|
||||
|
||||
let duration_ms = i64::try_from(duration.as_millis()).unwrap_or(i64::MAX);
|
||||
|
||||
let item = ThreadItem::CommandExecution {
|
||||
id: call_id,
|
||||
command: shlex_join(&command),
|
||||
cwd,
|
||||
status,
|
||||
command_actions: parsed_cmd.into_iter().map(V2ParsedCommand::from).collect(),
|
||||
aggregated_output,
|
||||
exit_code: Some(exit_code),
|
||||
duration_ms: Some(duration_ms),
|
||||
};
|
||||
|
||||
let notification = ItemCompletedNotification { item };
|
||||
outgoing
|
||||
.send_server_notification(ServerNotification::ItemCompleted(notification))
|
||||
.await;
|
||||
}
|
||||
// If this is a TurnAborted, reply to any pending interrupt requests.
|
||||
EventMsg::TurnAborted(turn_aborted_event) => {
|
||||
let pending = {
|
||||
@@ -202,7 +306,7 @@ pub(crate) async fn apply_bespoke_event_handling(
|
||||
|
||||
async fn on_patch_approval_response(
|
||||
event_id: String,
|
||||
receiver: oneshot::Receiver<JsonRpcResult>,
|
||||
receiver: oneshot::Receiver<JsonValue>,
|
||||
codex: Arc<CodexConversation>,
|
||||
) {
|
||||
let response = receiver.await;
|
||||
@@ -244,7 +348,7 @@ async fn on_patch_approval_response(
|
||||
|
||||
async fn on_exec_approval_response(
|
||||
event_id: String,
|
||||
receiver: oneshot::Receiver<JsonRpcResult>,
|
||||
receiver: oneshot::Receiver<JsonValue>,
|
||||
conversation: Arc<CodexConversation>,
|
||||
) {
|
||||
let response = receiver.await;
|
||||
@@ -278,6 +382,53 @@ async fn on_exec_approval_response(
|
||||
}
|
||||
}
|
||||
|
||||
async fn on_command_execution_request_approval_response(
|
||||
event_id: String,
|
||||
receiver: oneshot::Receiver<JsonValue>,
|
||||
conversation: Arc<CodexConversation>,
|
||||
) {
|
||||
let response = receiver.await;
|
||||
let value = match response {
|
||||
Ok(value) => value,
|
||||
Err(err) => {
|
||||
error!("request failed: {err:?}");
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let response = serde_json::from_value::<CommandExecutionRequestApprovalResponse>(value)
|
||||
.unwrap_or_else(|err| {
|
||||
error!("failed to deserialize CommandExecutionRequestApprovalResponse: {err}");
|
||||
CommandExecutionRequestApprovalResponse {
|
||||
decision: ApprovalDecision::Decline,
|
||||
accept_settings: None,
|
||||
}
|
||||
});
|
||||
|
||||
let CommandExecutionRequestApprovalResponse {
|
||||
decision,
|
||||
accept_settings,
|
||||
} = response;
|
||||
|
||||
let decision = match (decision, accept_settings) {
|
||||
(ApprovalDecision::Accept, Some(settings)) if settings.for_session => {
|
||||
ReviewDecision::ApprovedForSession
|
||||
}
|
||||
(ApprovalDecision::Accept, _) => ReviewDecision::Approved,
|
||||
(ApprovalDecision::Decline, _) => ReviewDecision::Denied,
|
||||
(ApprovalDecision::Cancel, _) => ReviewDecision::Abort,
|
||||
};
|
||||
if let Err(err) = conversation
|
||||
.submit(Op::ExecApproval {
|
||||
id: event_id,
|
||||
decision,
|
||||
})
|
||||
.await
|
||||
{
|
||||
error!("failed to submit ExecApproval: {err}");
|
||||
}
|
||||
}
|
||||
|
||||
/// similar to handle_mcp_tool_call_begin in exec
|
||||
async fn construct_mcp_tool_call_notification(
|
||||
begin_event: McpToolCallBeginEvent,
|
||||
@@ -287,10 +438,7 @@ async fn construct_mcp_tool_call_notification(
|
||||
server: begin_event.invocation.server,
|
||||
tool: begin_event.invocation.tool,
|
||||
status: McpToolCallStatus::InProgress,
|
||||
arguments: begin_event
|
||||
.invocation
|
||||
.arguments
|
||||
.unwrap_or(JsonRpcResult::Null),
|
||||
arguments: begin_event.invocation.arguments.unwrap_or(JsonValue::Null),
|
||||
result: None,
|
||||
error: None,
|
||||
};
|
||||
@@ -328,10 +476,7 @@ async fn construct_mcp_tool_call_end_notification(
|
||||
server: end_event.invocation.server,
|
||||
tool: end_event.invocation.tool,
|
||||
status,
|
||||
arguments: end_event
|
||||
.invocation
|
||||
.arguments
|
||||
.unwrap_or(JsonRpcResult::Null),
|
||||
arguments: end_event.invocation.arguments.unwrap_or(JsonValue::Null),
|
||||
result,
|
||||
error,
|
||||
};
|
||||
|
||||
@@ -158,8 +158,8 @@ struct ActiveLogin {
|
||||
login_id: Uuid,
|
||||
}
|
||||
|
||||
impl ActiveLogin {
|
||||
fn drop(&self) {
|
||||
impl Drop for ActiveLogin {
|
||||
fn drop(&mut self) {
|
||||
self.shutdown_handle.shutdown();
|
||||
}
|
||||
}
|
||||
@@ -417,7 +417,7 @@ impl CodexMessageProcessor {
|
||||
{
|
||||
let mut guard = self.active_login.lock().await;
|
||||
if let Some(active) = guard.take() {
|
||||
active.drop();
|
||||
drop(active);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -525,7 +525,7 @@ impl CodexMessageProcessor {
|
||||
{
|
||||
let mut guard = self.active_login.lock().await;
|
||||
if let Some(existing) = guard.take() {
|
||||
existing.drop();
|
||||
drop(existing);
|
||||
}
|
||||
*guard = Some(ActiveLogin {
|
||||
shutdown_handle: shutdown_handle.clone(),
|
||||
@@ -615,7 +615,7 @@ impl CodexMessageProcessor {
|
||||
{
|
||||
let mut guard = self.active_login.lock().await;
|
||||
if let Some(existing) = guard.take() {
|
||||
existing.drop();
|
||||
drop(existing);
|
||||
}
|
||||
*guard = Some(ActiveLogin {
|
||||
shutdown_handle: shutdown_handle.clone(),
|
||||
@@ -704,7 +704,7 @@ impl CodexMessageProcessor {
|
||||
let mut guard = self.active_login.lock().await;
|
||||
if guard.as_ref().map(|l| l.login_id) == Some(login_id) {
|
||||
if let Some(active) = guard.take() {
|
||||
active.drop();
|
||||
drop(active);
|
||||
}
|
||||
Ok(())
|
||||
} else {
|
||||
@@ -758,7 +758,7 @@ impl CodexMessageProcessor {
|
||||
{
|
||||
let mut guard = self.active_login.lock().await;
|
||||
if let Some(active) = guard.take() {
|
||||
active.drop();
|
||||
drop(active);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1245,7 +1245,7 @@ impl CodexMessageProcessor {
|
||||
// Auto-attach a conversation listener when starting a thread.
|
||||
// Use the same behavior as the v1 API with experimental_raw_events=false.
|
||||
if let Err(err) = self
|
||||
.attach_conversation_listener(conversation_id, false)
|
||||
.attach_conversation_listener(conversation_id, false, ApiVersion::V2)
|
||||
.await
|
||||
{
|
||||
tracing::warn!(
|
||||
@@ -1523,7 +1523,7 @@ impl CodexMessageProcessor {
|
||||
}) => {
|
||||
// Auto-attach a conversation listener when resuming a thread.
|
||||
if let Err(err) = self
|
||||
.attach_conversation_listener(conversation_id, false)
|
||||
.attach_conversation_listener(conversation_id, false, ApiVersion::V2)
|
||||
.await
|
||||
{
|
||||
tracing::warn!(
|
||||
@@ -2376,7 +2376,7 @@ impl CodexMessageProcessor {
|
||||
experimental_raw_events,
|
||||
} = params;
|
||||
match self
|
||||
.attach_conversation_listener(conversation_id, experimental_raw_events)
|
||||
.attach_conversation_listener(conversation_id, experimental_raw_events, ApiVersion::V1)
|
||||
.await
|
||||
{
|
||||
Ok(subscription_id) => {
|
||||
@@ -2417,6 +2417,7 @@ impl CodexMessageProcessor {
|
||||
&mut self,
|
||||
conversation_id: ConversationId,
|
||||
experimental_raw_events: bool,
|
||||
api_version: ApiVersion,
|
||||
) -> Result<Uuid, JSONRPCErrorError> {
|
||||
let conversation = match self
|
||||
.conversation_manager
|
||||
@@ -2440,6 +2441,7 @@ impl CodexMessageProcessor {
|
||||
|
||||
let outgoing_for_task = self.outgoing.clone();
|
||||
let pending_interrupts = self.pending_interrupts.clone();
|
||||
let api_version_for_task = api_version;
|
||||
tokio::spawn(async move {
|
||||
loop {
|
||||
tokio::select! {
|
||||
@@ -2495,6 +2497,7 @@ impl CodexMessageProcessor {
|
||||
conversation.clone(),
|
||||
outgoing_for_task.clone(),
|
||||
pending_interrupts.clone(),
|
||||
api_version_for_task,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
@@ -19,6 +19,10 @@ pub(crate) async fn run_fuzzy_file_search(
|
||||
roots: Vec<String>,
|
||||
cancellation_flag: Arc<AtomicBool>,
|
||||
) -> Vec<FuzzyFileSearchResult> {
|
||||
if roots.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
#[expect(clippy::expect_used)]
|
||||
let limit_per_root =
|
||||
NonZero::new(LIMIT_PER_ROOT).expect("LIMIT_PER_ROOT should be a valid non-zero usize");
|
||||
|
||||
@@ -47,7 +47,7 @@ pub async fn run_main(
|
||||
) -> IoResult<()> {
|
||||
// Set up channels.
|
||||
let (incoming_tx, mut incoming_rx) = mpsc::channel::<JSONRPCMessage>(CHANNEL_CAPACITY);
|
||||
let (outgoing_tx, mut outgoing_rx) = mpsc::unbounded_channel::<OutgoingMessage>();
|
||||
let (outgoing_tx, mut outgoing_rx) = mpsc::channel::<OutgoingMessage>(CHANNEL_CAPACITY);
|
||||
|
||||
// Task: read from stdin, push to `incoming_tx`.
|
||||
let stdin_reader_handle = tokio::spawn({
|
||||
|
||||
@@ -19,12 +19,12 @@ use crate::error_code::INTERNAL_ERROR_CODE;
|
||||
/// Sends messages to the client and manages request callbacks.
|
||||
pub(crate) struct OutgoingMessageSender {
|
||||
next_request_id: AtomicI64,
|
||||
sender: mpsc::UnboundedSender<OutgoingMessage>,
|
||||
sender: mpsc::Sender<OutgoingMessage>,
|
||||
request_id_to_callback: Mutex<HashMap<RequestId, oneshot::Sender<Result>>>,
|
||||
}
|
||||
|
||||
impl OutgoingMessageSender {
|
||||
pub(crate) fn new(sender: mpsc::UnboundedSender<OutgoingMessage>) -> Self {
|
||||
pub(crate) fn new(sender: mpsc::Sender<OutgoingMessage>) -> Self {
|
||||
Self {
|
||||
next_request_id: AtomicI64::new(0),
|
||||
sender,
|
||||
@@ -45,8 +45,12 @@ impl OutgoingMessageSender {
|
||||
}
|
||||
|
||||
let outgoing_message =
|
||||
OutgoingMessage::Request(request.request_with_id(outgoing_message_id));
|
||||
let _ = self.sender.send(outgoing_message);
|
||||
OutgoingMessage::Request(request.request_with_id(outgoing_message_id.clone()));
|
||||
if let Err(err) = self.sender.send(outgoing_message).await {
|
||||
warn!("failed to send request {outgoing_message_id:?} to client: {err:?}");
|
||||
let mut request_id_to_callback = self.request_id_to_callback.lock().await;
|
||||
request_id_to_callback.remove(&outgoing_message_id);
|
||||
}
|
||||
rx_approve
|
||||
}
|
||||
|
||||
@@ -72,7 +76,9 @@ impl OutgoingMessageSender {
|
||||
match serde_json::to_value(response) {
|
||||
Ok(result) => {
|
||||
let outgoing_message = OutgoingMessage::Response(OutgoingResponse { id, result });
|
||||
let _ = self.sender.send(outgoing_message);
|
||||
if let Err(err) = self.sender.send(outgoing_message).await {
|
||||
warn!("failed to send response to client: {err:?}");
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
self.send_error(
|
||||
@@ -89,21 +95,29 @@ impl OutgoingMessageSender {
|
||||
}
|
||||
|
||||
pub(crate) async fn send_server_notification(&self, notification: ServerNotification) {
|
||||
let _ = self
|
||||
if let Err(err) = self
|
||||
.sender
|
||||
.send(OutgoingMessage::AppServerNotification(notification));
|
||||
.send(OutgoingMessage::AppServerNotification(notification))
|
||||
.await
|
||||
{
|
||||
warn!("failed to send server notification to client: {err:?}");
|
||||
}
|
||||
}
|
||||
|
||||
/// All notifications should be migrated to [`ServerNotification`] and
|
||||
/// [`OutgoingMessage::Notification`] should be removed.
|
||||
pub(crate) async fn send_notification(&self, notification: OutgoingNotification) {
|
||||
let outgoing_message = OutgoingMessage::Notification(notification);
|
||||
let _ = self.sender.send(outgoing_message);
|
||||
if let Err(err) = self.sender.send(outgoing_message).await {
|
||||
warn!("failed to send notification to client: {err:?}");
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn send_error(&self, id: RequestId, error: JSONRPCErrorError) {
|
||||
let outgoing_message = OutgoingMessage::Error(OutgoingError { id, error });
|
||||
let _ = self.sender.send(outgoing_message);
|
||||
if let Err(err) = self.sender.send(outgoing_message).await {
|
||||
warn!("failed to send error to client: {err:?}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -27,7 +27,7 @@ fn create_config_toml(codex_home: &Path) -> std::io::Result<()> {
|
||||
std::fs::write(
|
||||
config_toml,
|
||||
r#"
|
||||
model = "gpt-5-codex"
|
||||
model = "gpt-5.1-codex"
|
||||
approval_policy = "on-request"
|
||||
sandbox_mode = "workspace-write"
|
||||
model_reasoning_summary = "detailed"
|
||||
@@ -87,7 +87,7 @@ async fn get_config_toml_parses_all_fields() -> Result<()> {
|
||||
}),
|
||||
forced_chatgpt_workspace_id: Some("12345678-0000-0000-0000-000000000000".into()),
|
||||
forced_login_method: Some(ForcedLoginMethod::Chatgpt),
|
||||
model: Some("gpt-5-codex".into()),
|
||||
model: Some("gpt-5.1-codex".into()),
|
||||
model_reasoning_effort: Some(ReasoningEffort::High),
|
||||
model_reasoning_summary: Some(ReasoningSummary::Detailed),
|
||||
model_verbosity: Some(Verbosity::Medium),
|
||||
|
||||
@@ -57,7 +57,7 @@ fn create_config_toml(codex_home: &Path) -> std::io::Result<()> {
|
||||
std::fs::write(
|
||||
config_toml,
|
||||
r#"
|
||||
model = "gpt-5-codex"
|
||||
model = "gpt-5.1-codex"
|
||||
model_reasoning_effort = "medium"
|
||||
"#,
|
||||
)
|
||||
|
||||
@@ -45,6 +45,29 @@ async fn list_models_returns_all_models_with_large_limit() -> Result<()> {
|
||||
} = to_response::<ModelListResponse>(response)?;
|
||||
|
||||
let expected_models = vec![
|
||||
Model {
|
||||
id: "codex-auto".to_string(),
|
||||
model: "codex-auto".to_string(),
|
||||
display_name: "codex-auto".to_string(),
|
||||
description: "Automatically chooses the best Codex model configuration for your task."
|
||||
.to_string(),
|
||||
supported_reasoning_efforts: vec![
|
||||
ReasoningEffortOption {
|
||||
reasoning_effort: ReasoningEffort::Low,
|
||||
description: "Works faster".to_string(),
|
||||
},
|
||||
ReasoningEffortOption {
|
||||
reasoning_effort: ReasoningEffort::Medium,
|
||||
description: "Balances speed with intelligence".to_string(),
|
||||
},
|
||||
ReasoningEffortOption {
|
||||
reasoning_effort: ReasoningEffort::High,
|
||||
description: "Works longer for harder tasks".to_string(),
|
||||
},
|
||||
],
|
||||
default_reasoning_effort: ReasoningEffort::Medium,
|
||||
is_default: true,
|
||||
},
|
||||
Model {
|
||||
id: "gpt-5.1-codex".to_string(),
|
||||
model: "gpt-5.1-codex".to_string(),
|
||||
@@ -66,7 +89,7 @@ async fn list_models_returns_all_models_with_large_limit() -> Result<()> {
|
||||
},
|
||||
],
|
||||
default_reasoning_effort: ReasoningEffort::Medium,
|
||||
is_default: true,
|
||||
is_default: false,
|
||||
},
|
||||
Model {
|
||||
id: "gpt-5.1-codex-mini".to_string(),
|
||||
@@ -147,7 +170,7 @@ async fn list_models_pagination_works() -> Result<()> {
|
||||
} = to_response::<ModelListResponse>(first_response)?;
|
||||
|
||||
assert_eq!(first_items.len(), 1);
|
||||
assert_eq!(first_items[0].id, "gpt-5.1-codex");
|
||||
assert_eq!(first_items[0].id, "codex-auto");
|
||||
let next_cursor = first_cursor.ok_or_else(|| anyhow!("cursor for second page"))?;
|
||||
|
||||
let second_request = mcp
|
||||
@@ -169,7 +192,7 @@ async fn list_models_pagination_works() -> Result<()> {
|
||||
} = to_response::<ModelListResponse>(second_response)?;
|
||||
|
||||
assert_eq!(second_items.len(), 1);
|
||||
assert_eq!(second_items[0].id, "gpt-5.1-codex-mini");
|
||||
assert_eq!(second_items[0].id, "gpt-5.1-codex");
|
||||
let third_cursor = second_cursor.ok_or_else(|| anyhow!("cursor for third page"))?;
|
||||
|
||||
let third_request = mcp
|
||||
@@ -191,8 +214,30 @@ async fn list_models_pagination_works() -> Result<()> {
|
||||
} = to_response::<ModelListResponse>(third_response)?;
|
||||
|
||||
assert_eq!(third_items.len(), 1);
|
||||
assert_eq!(third_items[0].id, "gpt-5.1");
|
||||
assert!(third_cursor.is_none());
|
||||
assert_eq!(third_items[0].id, "gpt-5.1-codex-mini");
|
||||
let fourth_cursor = third_cursor.ok_or_else(|| anyhow!("cursor for fourth page"))?;
|
||||
|
||||
let fourth_request = mcp
|
||||
.send_list_models_request(ModelListParams {
|
||||
limit: Some(1),
|
||||
cursor: Some(fourth_cursor.clone()),
|
||||
})
|
||||
.await?;
|
||||
|
||||
let fourth_response: JSONRPCResponse = timeout(
|
||||
DEFAULT_TIMEOUT,
|
||||
mcp.read_stream_until_response_message(RequestId::Integer(fourth_request)),
|
||||
)
|
||||
.await??;
|
||||
|
||||
let ModelListResponse {
|
||||
data: fourth_items,
|
||||
next_cursor: fourth_cursor,
|
||||
} = to_response::<ModelListResponse>(fourth_response)?;
|
||||
|
||||
assert_eq!(fourth_items.len(), 1);
|
||||
assert_eq!(fourth_items[0].id, "gpt-5.1");
|
||||
assert!(fourth_cursor.is_none());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -27,7 +27,7 @@ async fn thread_resume_returns_original_thread() -> Result<()> {
|
||||
// Start a thread.
|
||||
let start_id = mcp
|
||||
.send_thread_start_request(ThreadStartParams {
|
||||
model: Some("gpt-5-codex".to_string()),
|
||||
model: Some("gpt-5.1-codex".to_string()),
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
@@ -68,7 +68,7 @@ async fn thread_resume_prefers_path_over_thread_id() -> Result<()> {
|
||||
|
||||
let start_id = mcp
|
||||
.send_thread_start_request(ThreadStartParams {
|
||||
model: Some("gpt-5-codex".to_string()),
|
||||
model: Some("gpt-5.1-codex".to_string()),
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
@@ -112,7 +112,7 @@ async fn thread_resume_supports_history_and_overrides() -> Result<()> {
|
||||
// Start a thread.
|
||||
let start_id = mcp
|
||||
.send_thread_start_request(ThreadStartParams {
|
||||
model: Some("gpt-5-codex".to_string()),
|
||||
model: Some("gpt-5.1-codex".to_string()),
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
|
||||
@@ -29,7 +29,7 @@ async fn thread_start_creates_thread_and_emits_started() -> Result<()> {
|
||||
// Start a v2 thread with an explicit model override.
|
||||
let req_id = mcp
|
||||
.send_thread_start_request(ThreadStartParams {
|
||||
model: Some("gpt-5".to_string()),
|
||||
model: Some("gpt-5.1".to_string()),
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
|
||||
@@ -5,10 +5,13 @@ use app_test_support::create_mock_chat_completions_server;
|
||||
use app_test_support::create_mock_chat_completions_server_unchecked;
|
||||
use app_test_support::create_shell_sse_response;
|
||||
use app_test_support::to_response;
|
||||
use codex_app_server_protocol::CommandExecutionStatus;
|
||||
use codex_app_server_protocol::ItemStartedNotification;
|
||||
use codex_app_server_protocol::JSONRPCNotification;
|
||||
use codex_app_server_protocol::JSONRPCResponse;
|
||||
use codex_app_server_protocol::RequestId;
|
||||
use codex_app_server_protocol::ServerRequest;
|
||||
use codex_app_server_protocol::ThreadItem;
|
||||
use codex_app_server_protocol::ThreadStartParams;
|
||||
use codex_app_server_protocol::ThreadStartResponse;
|
||||
use codex_app_server_protocol::TurnStartParams;
|
||||
@@ -17,9 +20,6 @@ use codex_app_server_protocol::TurnStartedNotification;
|
||||
use codex_app_server_protocol::UserInput as V2UserInput;
|
||||
use codex_core::protocol_config_types::ReasoningEffort;
|
||||
use codex_core::protocol_config_types::ReasoningSummary;
|
||||
use codex_protocol::parse_command::ParsedCommand;
|
||||
use codex_protocol::protocol::Event;
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use pretty_assertions::assert_eq;
|
||||
use std::path::Path;
|
||||
@@ -235,7 +235,7 @@ async fn turn_start_exec_approval_toggle_v2() -> Result<()> {
|
||||
.await??;
|
||||
let ThreadStartResponse { thread } = to_response::<ThreadStartResponse>(start_resp)?;
|
||||
|
||||
// turn/start — expect ExecCommandApproval request from server
|
||||
// turn/start — expect CommandExecutionRequestApproval request from server
|
||||
let first_turn_id = mcp
|
||||
.send_turn_start_request(TurnStartParams {
|
||||
thread_id: thread.id.clone(),
|
||||
@@ -258,16 +258,10 @@ async fn turn_start_exec_approval_toggle_v2() -> Result<()> {
|
||||
mcp.read_stream_until_request_message(),
|
||||
)
|
||||
.await??;
|
||||
let ServerRequest::ExecCommandApproval { request_id, params } = server_req else {
|
||||
panic!("expected ExecCommandApproval request");
|
||||
let ServerRequest::CommandExecutionRequestApproval { request_id, params } = server_req else {
|
||||
panic!("expected CommandExecutionRequestApproval request");
|
||||
};
|
||||
assert_eq!(params.call_id, "call1");
|
||||
assert_eq!(
|
||||
params.parsed_cmd,
|
||||
vec![ParsedCommand::Unknown {
|
||||
cmd: "python3 -c 'print(42)'".to_string()
|
||||
}]
|
||||
);
|
||||
assert_eq!(params.item_id, "call1");
|
||||
|
||||
// Approve and wait for task completion
|
||||
mcp.send_response(
|
||||
@@ -302,7 +296,7 @@ async fn turn_start_exec_approval_toggle_v2() -> Result<()> {
|
||||
)
|
||||
.await??;
|
||||
|
||||
// Ensure we do NOT receive an ExecCommandApproval request before task completes
|
||||
// Ensure we do NOT receive a CommandExecutionRequestApproval request before task completes
|
||||
timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp.read_stream_until_notification_message("codex/event/task_complete"),
|
||||
@@ -314,8 +308,6 @@ async fn turn_start_exec_approval_toggle_v2() -> Result<()> {
|
||||
|
||||
#[tokio::test]
|
||||
async fn turn_start_updates_sandbox_and_cwd_between_turns_v2() -> Result<()> {
|
||||
// When returning Result from a test, pass an Ok(()) to the skip macro
|
||||
// so the early return type matches. The no-arg form returns unit.
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let tmp = TempDir::new()?;
|
||||
@@ -424,29 +416,35 @@ async fn turn_start_updates_sandbox_and_cwd_between_turns_v2() -> Result<()> {
|
||||
)
|
||||
.await??;
|
||||
|
||||
let exec_begin_notification = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp.read_stream_until_notification_message("codex/event/exec_command_begin"),
|
||||
)
|
||||
let command_exec_item = timeout(DEFAULT_READ_TIMEOUT, async {
|
||||
loop {
|
||||
let item_started_notification = mcp
|
||||
.read_stream_until_notification_message("item/started")
|
||||
.await?;
|
||||
let params = item_started_notification
|
||||
.params
|
||||
.clone()
|
||||
.expect("item/started params");
|
||||
let item_started: ItemStartedNotification =
|
||||
serde_json::from_value(params).expect("deserialize item/started notification");
|
||||
if matches!(item_started.item, ThreadItem::CommandExecution { .. }) {
|
||||
return Ok::<ThreadItem, anyhow::Error>(item_started.item);
|
||||
}
|
||||
}
|
||||
})
|
||||
.await??;
|
||||
let params = exec_begin_notification
|
||||
.params
|
||||
.clone()
|
||||
.expect("exec_command_begin params");
|
||||
let event: Event = serde_json::from_value(params).expect("deserialize exec begin event");
|
||||
let exec_begin = match event.msg {
|
||||
EventMsg::ExecCommandBegin(exec_begin) => exec_begin,
|
||||
other => panic!("expected ExecCommandBegin event, got {other:?}"),
|
||||
let ThreadItem::CommandExecution {
|
||||
cwd,
|
||||
command,
|
||||
status,
|
||||
..
|
||||
} = command_exec_item
|
||||
else {
|
||||
unreachable!("loop ensures we break on command execution items");
|
||||
};
|
||||
assert_eq!(exec_begin.cwd, second_cwd);
|
||||
assert_eq!(
|
||||
exec_begin.command,
|
||||
vec![
|
||||
"bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
"echo second turn".to_string()
|
||||
]
|
||||
);
|
||||
assert_eq!(cwd, second_cwd);
|
||||
assert_eq!(command, "bash -lc 'echo second turn'");
|
||||
assert_eq!(status, CommandExecutionStatus::InProgress);
|
||||
|
||||
timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
|
||||
@@ -155,11 +155,11 @@ async fn run_command_under_sandbox(
|
||||
run_windows_sandbox_capture(
|
||||
policy_str,
|
||||
&sandbox_cwd,
|
||||
base_dir.as_path(),
|
||||
command_vec,
|
||||
&cwd_clone,
|
||||
env_map,
|
||||
None,
|
||||
Some(base_dir.as_path()),
|
||||
)
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -761,9 +761,9 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn resume_model_flag_applies_when_no_root_flags() {
|
||||
let interactive = finalize_from_args(["codex", "resume", "-m", "gpt-5-test"].as_ref());
|
||||
let interactive = finalize_from_args(["codex", "resume", "-m", "gpt-5.1-test"].as_ref());
|
||||
|
||||
assert_eq!(interactive.model.as_deref(), Some("gpt-5-test"));
|
||||
assert_eq!(interactive.model.as_deref(), Some("gpt-5.1-test"));
|
||||
assert!(interactive.resume_picker);
|
||||
assert!(!interactive.resume_last);
|
||||
assert_eq!(interactive.resume_session_id, None);
|
||||
@@ -808,7 +808,7 @@ mod tests {
|
||||
"--ask-for-approval",
|
||||
"on-request",
|
||||
"-m",
|
||||
"gpt-5-test",
|
||||
"gpt-5.1-test",
|
||||
"-p",
|
||||
"my-profile",
|
||||
"-C",
|
||||
@@ -819,7 +819,7 @@ mod tests {
|
||||
.as_ref(),
|
||||
);
|
||||
|
||||
assert_eq!(interactive.model.as_deref(), Some("gpt-5-test"));
|
||||
assert_eq!(interactive.model.as_deref(), Some("gpt-5.1-test"));
|
||||
assert!(interactive.oss);
|
||||
assert_eq!(interactive.config_profile.as_deref(), Some("my-profile"));
|
||||
assert_matches!(
|
||||
|
||||
@@ -10,6 +10,8 @@ workspace = true
|
||||
clap = { workspace = true, features = ["derive", "wrap_help"], optional = true }
|
||||
codex-app-server-protocol = { workspace = true }
|
||||
codex-core = { workspace = true }
|
||||
codex-lmstudio = { workspace = true }
|
||||
codex-ollama = { workspace = true }
|
||||
codex-protocol = { workspace = true }
|
||||
once_cell = { workspace = true }
|
||||
serde = { workspace = true, optional = true }
|
||||
|
||||
@@ -24,21 +24,21 @@ pub fn builtin_approval_presets() -> Vec<ApprovalPreset> {
|
||||
ApprovalPreset {
|
||||
id: "read-only",
|
||||
label: "Read Only",
|
||||
description: "Codex can read files and answer questions. Codex requires approval to make edits, run commands, or access network.",
|
||||
description: "Requires approval to edit files and run commands.",
|
||||
approval: AskForApproval::OnRequest,
|
||||
sandbox: SandboxPolicy::ReadOnly,
|
||||
},
|
||||
ApprovalPreset {
|
||||
id: "auto",
|
||||
label: "Auto",
|
||||
description: "Codex can read files, make edits, and run commands in the workspace. Codex requires approval to work outside the workspace or access network.",
|
||||
label: "Agent",
|
||||
description: "Read and edit files, and run commands.",
|
||||
approval: AskForApproval::OnRequest,
|
||||
sandbox: SandboxPolicy::new_workspace_write_policy(),
|
||||
},
|
||||
ApprovalPreset {
|
||||
id: "full-access",
|
||||
label: "Full Access",
|
||||
description: "Codex can read files, make edits, and run commands with network access, without approval. Exercise caution.",
|
||||
label: "Agent (full access)",
|
||||
description: "Codex can edit files outside this workspace and run commands with network access. Exercise caution when using.",
|
||||
approval: AskForApproval::Never,
|
||||
sandbox: SandboxPolicy::DangerFullAccess,
|
||||
},
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use codex_core::WireApi;
|
||||
use codex_core::config::Config;
|
||||
|
||||
use crate::model_presets::reasoning_effort_label_for_model;
|
||||
use crate::sandbox_summary::summarize_sandbox_policy;
|
||||
|
||||
/// Build a list of key/value pairs summarizing the effective configuration.
|
||||
@@ -19,7 +20,7 @@ pub fn create_config_summary_entries(config: &Config) -> Vec<(&'static str, Stri
|
||||
"reasoning effort",
|
||||
config
|
||||
.model_reasoning_effort
|
||||
.map(|effort| effort.to_string())
|
||||
.map(|effort| reasoning_effort_label_for_model(&config.model, effort))
|
||||
.unwrap_or_else(|| "none".to_string()),
|
||||
));
|
||||
entries.push((
|
||||
|
||||
@@ -37,3 +37,5 @@ pub mod model_presets;
|
||||
// Shared approval presets (AskForApproval + Sandbox) used by TUI and MCP server
|
||||
// Not to be confused with AskForApproval, which we should probably rename to EscalationPolicy.
|
||||
pub mod approval_presets;
|
||||
// Shared OSS provider utilities used by TUI and exec
|
||||
pub mod oss;
|
||||
|
||||
@@ -11,6 +11,16 @@ pub struct ReasoningEffortPreset {
|
||||
pub effort: ReasoningEffort,
|
||||
/// Short human description shown next to the effort in UIs.
|
||||
pub description: &'static str,
|
||||
/// Optional friendly label shown in featured pickers.
|
||||
pub label: Option<&'static str>,
|
||||
}
|
||||
|
||||
impl ReasoningEffortPreset {
|
||||
pub fn label(&self) -> String {
|
||||
self.label
|
||||
.map(ToString::to_string)
|
||||
.unwrap_or_else(|| self.effort.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
@@ -42,6 +52,32 @@ pub struct ModelPreset {
|
||||
|
||||
static PRESETS: Lazy<Vec<ModelPreset>> = Lazy::new(|| {
|
||||
vec![
|
||||
ModelPreset {
|
||||
id: "codex-auto",
|
||||
model: "codex-auto",
|
||||
display_name: "codex-auto",
|
||||
description: "Automatically chooses the best Codex model configuration for your task.",
|
||||
default_reasoning_effort: ReasoningEffort::Medium,
|
||||
supported_reasoning_efforts: &[
|
||||
ReasoningEffortPreset {
|
||||
effort: ReasoningEffort::Low,
|
||||
description: "Works faster",
|
||||
label: Some("Fast"),
|
||||
},
|
||||
ReasoningEffortPreset {
|
||||
effort: ReasoningEffort::Medium,
|
||||
description: "Balances speed with intelligence",
|
||||
label: Some("Balanced"),
|
||||
},
|
||||
ReasoningEffortPreset {
|
||||
effort: ReasoningEffort::High,
|
||||
description: "Works longer for harder tasks",
|
||||
label: Some("Thorough"),
|
||||
},
|
||||
],
|
||||
is_default: true,
|
||||
upgrade: None,
|
||||
},
|
||||
ModelPreset {
|
||||
id: "gpt-5.1-codex",
|
||||
model: "gpt-5.1-codex",
|
||||
@@ -52,17 +88,20 @@ static PRESETS: Lazy<Vec<ModelPreset>> = Lazy::new(|| {
|
||||
ReasoningEffortPreset {
|
||||
effort: ReasoningEffort::Low,
|
||||
description: "Fastest responses with limited reasoning",
|
||||
label: None,
|
||||
},
|
||||
ReasoningEffortPreset {
|
||||
effort: ReasoningEffort::Medium,
|
||||
description: "Dynamically adjusts reasoning based on the task",
|
||||
label: None,
|
||||
},
|
||||
ReasoningEffortPreset {
|
||||
effort: ReasoningEffort::High,
|
||||
description: "Maximizes reasoning depth for complex or ambiguous problems",
|
||||
label: None,
|
||||
},
|
||||
],
|
||||
is_default: true,
|
||||
is_default: false,
|
||||
upgrade: None,
|
||||
},
|
||||
ModelPreset {
|
||||
@@ -75,10 +114,12 @@ static PRESETS: Lazy<Vec<ModelPreset>> = Lazy::new(|| {
|
||||
ReasoningEffortPreset {
|
||||
effort: ReasoningEffort::Medium,
|
||||
description: "Dynamically adjusts reasoning based on the task",
|
||||
label: None,
|
||||
},
|
||||
ReasoningEffortPreset {
|
||||
effort: ReasoningEffort::High,
|
||||
description: "Maximizes reasoning depth for complex or ambiguous problems",
|
||||
label: None,
|
||||
},
|
||||
],
|
||||
is_default: false,
|
||||
@@ -94,14 +135,17 @@ static PRESETS: Lazy<Vec<ModelPreset>> = Lazy::new(|| {
|
||||
ReasoningEffortPreset {
|
||||
effort: ReasoningEffort::Low,
|
||||
description: "Balances speed with some reasoning; useful for straightforward queries and short explanations",
|
||||
label: None,
|
||||
},
|
||||
ReasoningEffortPreset {
|
||||
effort: ReasoningEffort::Medium,
|
||||
description: "Provides a solid balance of reasoning depth and latency for general-purpose tasks",
|
||||
label: None,
|
||||
},
|
||||
ReasoningEffortPreset {
|
||||
effort: ReasoningEffort::High,
|
||||
description: "Maximizes reasoning depth for complex or ambiguous problems",
|
||||
label: None,
|
||||
},
|
||||
],
|
||||
is_default: false,
|
||||
@@ -118,14 +162,17 @@ static PRESETS: Lazy<Vec<ModelPreset>> = Lazy::new(|| {
|
||||
ReasoningEffortPreset {
|
||||
effort: ReasoningEffort::Low,
|
||||
description: "Fastest responses with limited reasoning",
|
||||
label: None,
|
||||
},
|
||||
ReasoningEffortPreset {
|
||||
effort: ReasoningEffort::Medium,
|
||||
description: "Dynamically adjusts reasoning based on the task",
|
||||
label: None,
|
||||
},
|
||||
ReasoningEffortPreset {
|
||||
effort: ReasoningEffort::High,
|
||||
description: "Maximizes reasoning depth for complex or ambiguous problems",
|
||||
label: None,
|
||||
},
|
||||
],
|
||||
is_default: false,
|
||||
@@ -144,10 +191,12 @@ static PRESETS: Lazy<Vec<ModelPreset>> = Lazy::new(|| {
|
||||
ReasoningEffortPreset {
|
||||
effort: ReasoningEffort::Medium,
|
||||
description: "Dynamically adjusts reasoning based on the task",
|
||||
label: None,
|
||||
},
|
||||
ReasoningEffortPreset {
|
||||
effort: ReasoningEffort::High,
|
||||
description: "Maximizes reasoning depth for complex or ambiguous problems",
|
||||
label: None,
|
||||
},
|
||||
],
|
||||
is_default: false,
|
||||
@@ -166,18 +215,22 @@ static PRESETS: Lazy<Vec<ModelPreset>> = Lazy::new(|| {
|
||||
ReasoningEffortPreset {
|
||||
effort: ReasoningEffort::Minimal,
|
||||
description: "Fastest responses with little reasoning",
|
||||
label: None,
|
||||
},
|
||||
ReasoningEffortPreset {
|
||||
effort: ReasoningEffort::Low,
|
||||
description: "Balances speed with some reasoning; useful for straightforward queries and short explanations",
|
||||
label: None,
|
||||
},
|
||||
ReasoningEffortPreset {
|
||||
effort: ReasoningEffort::Medium,
|
||||
description: "Provides a solid balance of reasoning depth and latency for general-purpose tasks",
|
||||
label: None,
|
||||
},
|
||||
ReasoningEffortPreset {
|
||||
effort: ReasoningEffort::High,
|
||||
description: "Maximizes reasoning depth for complex or ambiguous problems",
|
||||
label: None,
|
||||
},
|
||||
],
|
||||
is_default: false,
|
||||
@@ -192,11 +245,15 @@ static PRESETS: Lazy<Vec<ModelPreset>> = Lazy::new(|| {
|
||||
]
|
||||
});
|
||||
|
||||
pub fn builtin_model_presets(_auth_mode: Option<AuthMode>) -> Vec<ModelPreset> {
|
||||
// leave auth mode for later use
|
||||
pub fn builtin_model_presets(auth_mode: Option<AuthMode>) -> Vec<ModelPreset> {
|
||||
PRESETS
|
||||
.iter()
|
||||
.filter(|preset| preset.upgrade.is_none())
|
||||
.filter(|preset| match auth_mode {
|
||||
// `codex-auto` is only available when using ChatGPT-style auth.
|
||||
Some(AuthMode::ApiKey) => preset.id != "codex-auto",
|
||||
_ => true,
|
||||
})
|
||||
.cloned()
|
||||
.collect()
|
||||
}
|
||||
@@ -205,6 +262,25 @@ pub fn all_model_presets() -> &'static Vec<ModelPreset> {
|
||||
&PRESETS
|
||||
}
|
||||
|
||||
impl ModelPreset {
|
||||
pub fn reasoning_effort_label(&self, effort: ReasoningEffort) -> String {
|
||||
self.supported_reasoning_efforts
|
||||
.iter()
|
||||
.find(|option| option.effort == effort)
|
||||
.map(ReasoningEffortPreset::label)
|
||||
.unwrap_or_else(|| effort.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the display label for a reasoning effort on a given model, falling back to the effort
|
||||
/// name when no label is provided.
|
||||
pub fn reasoning_effort_label_for_model(model: &str, effort: ReasoningEffort) -> String {
|
||||
all_model_presets()
|
||||
.iter()
|
||||
.find(|preset| preset.model == model)
|
||||
.map(|preset| preset.reasoning_effort_label(effort))
|
||||
.unwrap_or_else(|| effort.to_string())
|
||||
}
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -214,4 +290,31 @@ mod tests {
|
||||
let default_models = PRESETS.iter().filter(|preset| preset.is_default).count();
|
||||
assert!(default_models == 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn codex_auto_is_included_for_non_api_auth() {
|
||||
let presets_no_auth = builtin_model_presets(None);
|
||||
assert!(
|
||||
presets_no_auth
|
||||
.iter()
|
||||
.any(|preset| preset.id == "codex-auto")
|
||||
);
|
||||
|
||||
let presets_chatgpt = builtin_model_presets(Some(AuthMode::ChatGPT));
|
||||
assert!(
|
||||
presets_chatgpt
|
||||
.iter()
|
||||
.any(|preset| preset.id == "codex-auto")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn codex_auto_is_excluded_for_api_key_auth() {
|
||||
let presets_api_key = builtin_model_presets(Some(AuthMode::ApiKey));
|
||||
assert!(
|
||||
!presets_api_key
|
||||
.iter()
|
||||
.any(|preset| preset.id == "codex-auto")
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
60
codex-rs/common/src/oss.rs
Normal file
60
codex-rs/common/src/oss.rs
Normal file
@@ -0,0 +1,60 @@
|
||||
//! OSS provider utilities shared between TUI and exec.
|
||||
|
||||
use codex_core::LMSTUDIO_OSS_PROVIDER_ID;
|
||||
use codex_core::OLLAMA_OSS_PROVIDER_ID;
|
||||
use codex_core::config::Config;
|
||||
|
||||
/// Returns the default model for a given OSS provider.
|
||||
pub fn get_default_model_for_oss_provider(provider_id: &str) -> Option<&'static str> {
|
||||
match provider_id {
|
||||
LMSTUDIO_OSS_PROVIDER_ID => Some(codex_lmstudio::DEFAULT_OSS_MODEL),
|
||||
OLLAMA_OSS_PROVIDER_ID => Some(codex_ollama::DEFAULT_OSS_MODEL),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Ensures the specified OSS provider is ready (models downloaded, service reachable).
|
||||
pub async fn ensure_oss_provider_ready(
|
||||
provider_id: &str,
|
||||
config: &Config,
|
||||
) -> Result<(), std::io::Error> {
|
||||
match provider_id {
|
||||
LMSTUDIO_OSS_PROVIDER_ID => {
|
||||
codex_lmstudio::ensure_oss_ready(config)
|
||||
.await
|
||||
.map_err(|e| std::io::Error::other(format!("OSS setup failed: {e}")))?;
|
||||
}
|
||||
OLLAMA_OSS_PROVIDER_ID => {
|
||||
codex_ollama::ensure_oss_ready(config)
|
||||
.await
|
||||
.map_err(|e| std::io::Error::other(format!("OSS setup failed: {e}")))?;
|
||||
}
|
||||
_ => {
|
||||
// Unknown provider, skip setup
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_get_default_model_for_provider_lmstudio() {
|
||||
let result = get_default_model_for_oss_provider(LMSTUDIO_OSS_PROVIDER_ID);
|
||||
assert_eq!(result, Some(codex_lmstudio::DEFAULT_OSS_MODEL));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_default_model_for_provider_ollama() {
|
||||
let result = get_default_model_for_oss_provider(OLLAMA_OSS_PROVIDER_ID);
|
||||
assert_eq!(result, Some(codex_ollama::DEFAULT_OSS_MODEL));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_default_model_for_provider_unknown() {
|
||||
let result = get_default_model_for_oss_provider("unknown-provider");
|
||||
assert_eq!(result, None);
|
||||
}
|
||||
}
|
||||
@@ -40,12 +40,7 @@ eventsource-stream = { workspace = true }
|
||||
futures = { workspace = true }
|
||||
http = { workspace = true }
|
||||
indexmap = { workspace = true }
|
||||
keyring = { workspace = true, features = [
|
||||
"apple-native",
|
||||
"crypto-rust",
|
||||
"linux-native-async-persistent",
|
||||
"windows-native",
|
||||
] }
|
||||
keyring = { workspace = true, features = ["crypto-rust"] }
|
||||
libc = { workspace = true }
|
||||
mcp-types = { workspace = true }
|
||||
os_info = { workspace = true }
|
||||
@@ -90,9 +85,11 @@ wildmatch = { workspace = true }
|
||||
[target.'cfg(target_os = "linux")'.dependencies]
|
||||
landlock = { workspace = true }
|
||||
seccompiler = { workspace = true }
|
||||
keyring = { workspace = true, features = ["linux-native-async-persistent"] }
|
||||
|
||||
[target.'cfg(target_os = "macos")'.dependencies]
|
||||
core-foundation = "0.9"
|
||||
keyring = { workspace = true, features = ["apple-native"] }
|
||||
|
||||
# Build OpenSSL from source for musl builds.
|
||||
[target.x86_64-unknown-linux-musl.dependencies]
|
||||
@@ -102,6 +99,12 @@ openssl-sys = { workspace = true, features = ["vendored"] }
|
||||
[target.aarch64-unknown-linux-musl.dependencies]
|
||||
openssl-sys = { workspace = true, features = ["vendored"] }
|
||||
|
||||
[target.'cfg(target_os = "windows")'.dependencies]
|
||||
keyring = { workspace = true, features = ["windows-native"] }
|
||||
|
||||
[target.'cfg(any(target_os = "freebsd", target_os = "openbsd"))'.dependencies]
|
||||
keyring = { workspace = true, features = ["sync-secret-service"] }
|
||||
|
||||
[dev-dependencies]
|
||||
assert_cmd = { workspace = true }
|
||||
assert_matches = { workspace = true }
|
||||
|
||||
@@ -318,8 +318,6 @@ For casual greetings, acknowledgements, or other one-off conversational messages
|
||||
|
||||
When using the shell, you must adhere to the following guidelines:
|
||||
|
||||
- The arguments to `shell` will be passed to execvp().
|
||||
- Always set the `workdir` param when using the shell function. Do not use `cd` unless absolutely necessary.
|
||||
- When searching for text or files, prefer using `rg` or `rg --files` respectively because `rg` is much faster than alternatives like `grep`. (If the `rg` command is not found, then use alternatives.)
|
||||
- Read files in chunks with a max chunk size of 250 lines. Do not use python scripts to attempt to output larger chunks of a file. Command line output will be truncated after 10 kilobytes or 256 lines of output, regardless of the command used.
|
||||
|
||||
|
||||
@@ -2,8 +2,6 @@ You are Codex, based on GPT-5. You are running as a coding agent in the Codex CL
|
||||
|
||||
## General
|
||||
|
||||
- The arguments to `shell` will be passed to execvp(). Most terminal commands should be prefixed with ["bash", "-lc"].
|
||||
- Always set the `workdir` param when using the shell function. Do not use `cd` unless absolutely necessary.
|
||||
- When searching for text or files, prefer using `rg` or `rg --files` respectively because `rg` is much faster than alternatives like `grep`. (If the `rg` command is not found, then use alternatives.)
|
||||
|
||||
## Editing constraints
|
||||
|
||||
@@ -81,6 +81,7 @@ pub(crate) async fn stream_chat_completions(
|
||||
ResponseItem::CustomToolCallOutput { .. } => {}
|
||||
ResponseItem::WebSearchCall { .. } => {}
|
||||
ResponseItem::GhostSnapshot { .. } => {}
|
||||
ResponseItem::CompactionSummary { .. } => {}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -320,7 +321,8 @@ pub(crate) async fn stream_chat_completions(
|
||||
}
|
||||
ResponseItem::Reasoning { .. }
|
||||
| ResponseItem::WebSearchCall { .. }
|
||||
| ResponseItem::Other => {
|
||||
| ResponseItem::Other
|
||||
| ResponseItem::CompactionSummary { .. } => {
|
||||
// Omit these items from the conversation history.
|
||||
continue;
|
||||
}
|
||||
@@ -673,7 +675,9 @@ async fn process_chat_sse<S>(
|
||||
}
|
||||
|
||||
// Emit end-of-turn when finish_reason signals completion.
|
||||
if let Some(finish_reason) = choice.get("finish_reason").and_then(|v| v.as_str()) {
|
||||
if let Some(finish_reason) = choice.get("finish_reason").and_then(|v| v.as_str())
|
||||
&& !finish_reason.is_empty()
|
||||
{
|
||||
match finish_reason {
|
||||
"tool_calls" if fn_call_state.active => {
|
||||
// First, flush the terminal raw reasoning so UIs can finalize
|
||||
|
||||
@@ -26,6 +26,7 @@ use tokio::sync::mpsc;
|
||||
use tokio::time::timeout;
|
||||
use tokio_util::io::ReaderStream;
|
||||
use tracing::debug;
|
||||
use tracing::enabled;
|
||||
use tracing::trace;
|
||||
use tracing::warn;
|
||||
|
||||
@@ -78,6 +79,18 @@ struct Error {
|
||||
resets_at: Option<i64>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct CompactHistoryRequest<'a> {
|
||||
model: &'a str,
|
||||
input: &'a [ResponseItem],
|
||||
instructions: &'a str,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct CompactHistoryResponse {
|
||||
output: Vec<ResponseItem>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ModelClient {
|
||||
config: Arc<Config>,
|
||||
@@ -507,6 +520,70 @@ impl ModelClient {
|
||||
pub fn get_auth_manager(&self) -> Option<Arc<AuthManager>> {
|
||||
self.auth_manager.clone()
|
||||
}
|
||||
|
||||
pub async fn compact_conversation_history(&self, prompt: &Prompt) -> Result<Vec<ResponseItem>> {
|
||||
if prompt.input.is_empty() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
let auth_manager = self.auth_manager.clone();
|
||||
let auth = auth_manager.as_ref().and_then(|m| m.auth());
|
||||
let mut req_builder = self
|
||||
.provider
|
||||
.create_compact_request_builder(&self.client, &auth)
|
||||
.await?;
|
||||
if let SessionSource::SubAgent(sub) = &self.session_source {
|
||||
let subagent = if let crate::protocol::SubAgentSource::Other(label) = sub {
|
||||
label.clone()
|
||||
} else {
|
||||
serde_json::to_value(sub)
|
||||
.ok()
|
||||
.and_then(|v| v.as_str().map(std::string::ToString::to_string))
|
||||
.unwrap_or_else(|| "other".to_string())
|
||||
};
|
||||
req_builder = req_builder.header("x-openai-subagent", subagent);
|
||||
}
|
||||
if let Some(auth) = auth.as_ref()
|
||||
&& auth.mode == AuthMode::ChatGPT
|
||||
&& let Some(account_id) = auth.get_account_id()
|
||||
{
|
||||
req_builder = req_builder.header("chatgpt-account-id", account_id);
|
||||
}
|
||||
let payload = CompactHistoryRequest {
|
||||
model: &self.config.model,
|
||||
input: &prompt.input,
|
||||
instructions: &prompt.get_full_instructions(&self.config.model_family),
|
||||
};
|
||||
|
||||
if enabled!(tracing::Level::TRACE) {
|
||||
trace!(
|
||||
"POST to {}: {}",
|
||||
self.provider
|
||||
.get_compact_url(&auth)
|
||||
.unwrap_or("<none>".to_string()),
|
||||
serde_json::to_value(&payload).unwrap_or_default()
|
||||
);
|
||||
}
|
||||
|
||||
let response = req_builder
|
||||
.json(&payload)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|source| CodexErr::ConnectionFailed(ConnectionFailedError { source }))?;
|
||||
let status = response.status();
|
||||
let body = response
|
||||
.text()
|
||||
.await
|
||||
.map_err(|source| CodexErr::ConnectionFailed(ConnectionFailedError { source }))?;
|
||||
if !status.is_success() {
|
||||
return Err(CodexErr::UnexpectedStatus(UnexpectedResponseError {
|
||||
status,
|
||||
body,
|
||||
request_id: None,
|
||||
}));
|
||||
}
|
||||
let CompactHistoryResponse { output } = serde_json::from_str(&body)?;
|
||||
Ok(output)
|
||||
}
|
||||
}
|
||||
|
||||
enum StreamAttemptError {
|
||||
@@ -1225,7 +1302,7 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn error_when_error_event() {
|
||||
let raw_error = r#"{"type":"response.failed","sequence_number":3,"response":{"id":"resp_689bcf18d7f08194bf3440ba62fe05d803fee0cdac429894","object":"response","created_at":1755041560,"status":"failed","background":false,"error":{"code":"rate_limit_exceeded","message":"Rate limit reached for gpt-5 in organization org-AAA on tokens per min (TPM): Limit 30000, Used 22999, Requested 12528. Please try again in 11.054s. Visit https://platform.openai.com/account/rate-limits to learn more."}, "usage":null,"user":null,"metadata":{}}}"#;
|
||||
let raw_error = r#"{"type":"response.failed","sequence_number":3,"response":{"id":"resp_689bcf18d7f08194bf3440ba62fe05d803fee0cdac429894","object":"response","created_at":1755041560,"status":"failed","background":false,"error":{"code":"rate_limit_exceeded","message":"Rate limit reached for gpt-5.1 in organization org-AAA on tokens per min (TPM): Limit 30000, Used 22999, Requested 12528. Please try again in 11.054s. Visit https://platform.openai.com/account/rate-limits to learn more."}, "usage":null,"user":null,"metadata":{}}}"#;
|
||||
|
||||
let sse1 = format!("event: response.failed\ndata: {raw_error}\n\n");
|
||||
let provider = ModelProviderInfo {
|
||||
@@ -1254,7 +1331,7 @@ mod tests {
|
||||
Err(CodexErr::Stream(msg, delay)) => {
|
||||
assert_eq!(
|
||||
msg,
|
||||
"Rate limit reached for gpt-5 in organization org-AAA on tokens per min (TPM): Limit 30000, Used 22999, Requested 12528. Please try again in 11.054s. Visit https://platform.openai.com/account/rate-limits to learn more."
|
||||
"Rate limit reached for gpt-5.1 in organization org-AAA on tokens per min (TPM): Limit 30000, Used 22999, Requested 12528. Please try again in 11.054s. Visit https://platform.openai.com/account/rate-limits to learn more."
|
||||
);
|
||||
assert_eq!(*delay, Some(Duration::from_secs_f64(11.054)));
|
||||
}
|
||||
@@ -1473,7 +1550,7 @@ mod tests {
|
||||
fn test_try_parse_retry_after() {
|
||||
let err = Error {
|
||||
r#type: None,
|
||||
message: Some("Rate limit reached for gpt-5 in organization org- on tokens per min (TPM): Limit 1, Used 1, Requested 19304. Please try again in 28ms. Visit https://platform.openai.com/account/rate-limits to learn more.".to_string()),
|
||||
message: Some("Rate limit reached for gpt-5.1 in organization org- on tokens per min (TPM): Limit 1, Used 1, Requested 19304. Please try again in 28ms. Visit https://platform.openai.com/account/rate-limits to learn more.".to_string()),
|
||||
code: Some("rate_limit_exceeded".to_string()),
|
||||
plan_type: None,
|
||||
resets_at: None
|
||||
@@ -1487,7 +1564,7 @@ mod tests {
|
||||
fn test_try_parse_retry_after_no_delay() {
|
||||
let err = Error {
|
||||
r#type: None,
|
||||
message: Some("Rate limit reached for gpt-5 in organization <ORG> on tokens per min (TPM): Limit 30000, Used 6899, Requested 24050. Please try again in 1.898s. Visit https://platform.openai.com/account/rate-limits to learn more.".to_string()),
|
||||
message: Some("Rate limit reached for gpt-5.1 in organization <ORG> on tokens per min (TPM): Limit 30000, Used 6899, Requested 24050. Please try again in 1.898s. Visit https://platform.openai.com/account/rate-limits to learn more.".to_string()),
|
||||
code: Some("rate_limit_exceeded".to_string()),
|
||||
plan_type: None,
|
||||
resets_at: None
|
||||
|
||||
@@ -136,7 +136,7 @@ fn reserialize_shell_outputs(items: &mut [ResponseItem]) {
|
||||
}
|
||||
|
||||
fn is_shell_tool_name(name: &str) -> bool {
|
||||
matches!(name, "shell" | "container.exec")
|
||||
matches!(name, "shell" | "container.exec" | "shell_command")
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
@@ -427,7 +427,7 @@ mod tests {
|
||||
expects_apply_patch_instructions: false,
|
||||
},
|
||||
InstructionsTestCase {
|
||||
slug: "gpt-5-codex",
|
||||
slug: "gpt-5.1-codex",
|
||||
expects_apply_patch_instructions: false,
|
||||
},
|
||||
InstructionsTestCase {
|
||||
@@ -457,7 +457,7 @@ mod tests {
|
||||
let input: Vec<ResponseItem> = vec![];
|
||||
let tools: Vec<serde_json::Value> = vec![];
|
||||
let req = ResponsesApiRequest {
|
||||
model: "gpt-5",
|
||||
model: "gpt-5.1",
|
||||
instructions: "i",
|
||||
input: &input,
|
||||
tools: &tools,
|
||||
@@ -498,7 +498,7 @@ mod tests {
|
||||
create_text_param_for_request(None, &Some(schema.clone())).expect("text controls");
|
||||
|
||||
let req = ResponsesApiRequest {
|
||||
model: "gpt-5",
|
||||
model: "gpt-5.1",
|
||||
instructions: "i",
|
||||
input: &input,
|
||||
tools: &tools,
|
||||
@@ -534,7 +534,7 @@ mod tests {
|
||||
let input: Vec<ResponseItem> = vec![];
|
||||
let tools: Vec<serde_json::Value> = vec![];
|
||||
let req = ResponsesApiRequest {
|
||||
model: "gpt-5",
|
||||
model: "gpt-5.1",
|
||||
instructions: "i",
|
||||
input: &input,
|
||||
tools: &tools,
|
||||
|
||||
@@ -9,12 +9,11 @@ use crate::client_common::REVIEW_PROMPT;
|
||||
use crate::compact;
|
||||
use crate::features::Feature;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::mcp::auth::McpAuthStatusEntry;
|
||||
use crate::mcp_connection_manager::DEFAULT_STARTUP_TIMEOUT;
|
||||
use crate::parse_command::parse_command;
|
||||
use crate::parse_turn_item;
|
||||
use crate::response_processing::process_items;
|
||||
use crate::terminal;
|
||||
use crate::truncate::TruncationPolicy;
|
||||
use crate::user_notification::UserNotifier;
|
||||
use crate::util::error_or_panic;
|
||||
use async_channel::Receiver;
|
||||
@@ -45,6 +44,7 @@ use mcp_types::ReadResourceResult;
|
||||
use serde_json;
|
||||
use serde_json::Value;
|
||||
use tokio::sync::Mutex;
|
||||
use tokio::sync::RwLock;
|
||||
use tokio::sync::oneshot;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::debug;
|
||||
@@ -56,8 +56,8 @@ use crate::ModelProviderInfo;
|
||||
use crate::client::ModelClient;
|
||||
use crate::client_common::Prompt;
|
||||
use crate::client_common::ResponseEvent;
|
||||
use crate::compact::collect_user_messages;
|
||||
use crate::config::Config;
|
||||
use crate::config::types::McpServerTransportConfig;
|
||||
use crate::config::types::ShellEnvironmentPolicy;
|
||||
use crate::context_manager::ContextManager;
|
||||
use crate::environment_context::EnvironmentContext;
|
||||
@@ -65,10 +65,6 @@ use crate::error::CodexErr;
|
||||
use crate::error::Result as CodexResult;
|
||||
#[cfg(test)]
|
||||
use crate::exec::StreamOutput;
|
||||
// Removed: legacy executor wiring replaced by ToolOrchestrator flows.
|
||||
// legacy normalize_exec_result no longer used after orchestrator migration
|
||||
use crate::compact::build_compacted_history;
|
||||
use crate::compact::collect_user_messages;
|
||||
use crate::mcp::auth::compute_auth_statuses;
|
||||
use crate::mcp_connection_manager::McpConnectionManager;
|
||||
use crate::model_family::find_family_for_model;
|
||||
@@ -280,6 +276,7 @@ pub(crate) struct TurnContext {
|
||||
pub(crate) final_output_json_schema: Option<Value>,
|
||||
pub(crate) codex_linux_sandbox_exe: Option<PathBuf>,
|
||||
pub(crate) tool_call_gate: Arc<ReadinessFlag>,
|
||||
pub(crate) truncation_policy: TruncationPolicy,
|
||||
}
|
||||
|
||||
impl TurnContext {
|
||||
@@ -406,7 +403,7 @@ impl Session {
|
||||
);
|
||||
|
||||
let client = ModelClient::new(
|
||||
Arc::new(per_turn_config),
|
||||
Arc::new(per_turn_config.clone()),
|
||||
auth_manager,
|
||||
otel_event_manager,
|
||||
provider,
|
||||
@@ -436,6 +433,7 @@ impl Session {
|
||||
final_output_json_schema: None,
|
||||
codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(),
|
||||
tool_call_gate: Arc::new(ReadinessFlag::new()),
|
||||
truncation_policy: TruncationPolicy::new(&per_turn_config),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -476,21 +474,13 @@ impl Session {
|
||||
),
|
||||
};
|
||||
|
||||
// Error messages to dispatch after SessionConfigured is sent.
|
||||
let mut post_session_configured_events = Vec::<Event>::new();
|
||||
|
||||
// Kick off independent async setup tasks in parallel to reduce startup latency.
|
||||
//
|
||||
// - initialize RolloutRecorder with new or resumed session info
|
||||
// - spin up MCP connection manager
|
||||
// - perform default shell discovery
|
||||
// - load history metadata
|
||||
let rollout_fut = RolloutRecorder::new(&config, rollout_params);
|
||||
|
||||
let mcp_fut = McpConnectionManager::new(
|
||||
config.mcp_servers.clone(),
|
||||
config.mcp_oauth_credentials_store_mode,
|
||||
);
|
||||
let default_shell_fut = shell::default_user_shell();
|
||||
let history_meta_fut = crate::message_history::history_metadata(&config);
|
||||
let auth_statuses_fut = compute_auth_statuses(
|
||||
@@ -499,15 +489,8 @@ impl Session {
|
||||
);
|
||||
|
||||
// Join all independent futures.
|
||||
let (
|
||||
rollout_recorder,
|
||||
mcp_res,
|
||||
default_shell,
|
||||
(history_log_id, history_entry_count),
|
||||
auth_statuses,
|
||||
) = tokio::join!(
|
||||
let (rollout_recorder, default_shell, (history_log_id, history_entry_count), auth_statuses) = tokio::join!(
|
||||
rollout_fut,
|
||||
mcp_fut,
|
||||
default_shell_fut,
|
||||
history_meta_fut,
|
||||
auth_statuses_fut
|
||||
@@ -519,34 +502,7 @@ impl Session {
|
||||
})?;
|
||||
let rollout_path = rollout_recorder.rollout_path.clone();
|
||||
|
||||
// Handle MCP manager result and record any startup failures.
|
||||
let (mcp_connection_manager, failed_clients) = match mcp_res {
|
||||
Ok((mgr, failures)) => (mgr, failures),
|
||||
Err(e) => {
|
||||
let message = format!("Failed to create MCP connection manager: {e:#}");
|
||||
error!("{message}");
|
||||
post_session_configured_events.push(Event {
|
||||
id: INITIAL_SUBMIT_ID.to_owned(),
|
||||
msg: EventMsg::Error(ErrorEvent { message }),
|
||||
});
|
||||
(McpConnectionManager::default(), Default::default())
|
||||
}
|
||||
};
|
||||
|
||||
// Surface individual client start-up failures to the user.
|
||||
if !failed_clients.is_empty() {
|
||||
for (server_name, err) in failed_clients {
|
||||
let auth_entry = auth_statuses.get(&server_name);
|
||||
let display_message = mcp_init_error_display(&server_name, auth_entry, &err);
|
||||
warn!("MCP client for `{server_name}` failed to start: {err:#}");
|
||||
post_session_configured_events.push(Event {
|
||||
id: INITIAL_SUBMIT_ID.to_owned(),
|
||||
msg: EventMsg::Error(ErrorEvent {
|
||||
message: display_message,
|
||||
}),
|
||||
});
|
||||
}
|
||||
}
|
||||
let mut post_session_configured_events = Vec::<Event>::new();
|
||||
|
||||
for (alias, feature) in session_configuration.features.legacy_feature_usages() {
|
||||
let canonical = feature.key();
|
||||
@@ -595,7 +551,8 @@ impl Session {
|
||||
warm_model_cache(&session_configuration.model);
|
||||
|
||||
let services = SessionServices {
|
||||
mcp_connection_manager,
|
||||
mcp_connection_manager: Arc::new(RwLock::new(McpConnectionManager::default())),
|
||||
mcp_startup_cancellation_token: CancellationToken::new(),
|
||||
unified_exec_manager: UnifiedExecSessionManager::default(),
|
||||
notifier: UserNotifier::new(config.notify.clone()),
|
||||
rollout: Mutex::new(Some(rollout_recorder)),
|
||||
@@ -635,6 +592,18 @@ impl Session {
|
||||
for event in events {
|
||||
sess.send_event_raw(event).await;
|
||||
}
|
||||
sess.services
|
||||
.mcp_connection_manager
|
||||
.write()
|
||||
.await
|
||||
.initialize(
|
||||
config.mcp_servers.clone(),
|
||||
config.mcp_oauth_credentials_store_mode,
|
||||
auth_statuses.clone(),
|
||||
tx_event.clone(),
|
||||
sess.services.mcp_startup_cancellation_token.clone(),
|
||||
)
|
||||
.await;
|
||||
|
||||
// record_initial_history can emit events. We record only after the SessionConfiguredEvent is emitted.
|
||||
sess.record_initial_history(initial_history).await;
|
||||
@@ -712,7 +681,8 @@ impl Session {
|
||||
let reconstructed_history =
|
||||
self.reconstruct_history_from_rollout(&turn_context, &rollout_items);
|
||||
if !reconstructed_history.is_empty() {
|
||||
self.record_into_history(&reconstructed_history).await;
|
||||
self.record_into_history(&reconstructed_history, &turn_context)
|
||||
.await;
|
||||
}
|
||||
|
||||
// If persisting, persist all rollout items as-is (recorder filters)
|
||||
@@ -894,6 +864,7 @@ impl Session {
|
||||
let parsed_cmd = parse_command(&command);
|
||||
let event = EventMsg::ExecApprovalRequest(ExecApprovalRequestEvent {
|
||||
call_id,
|
||||
turn_id: turn_context.sub_id.clone(),
|
||||
command,
|
||||
cwd,
|
||||
reason,
|
||||
@@ -968,7 +939,7 @@ impl Session {
|
||||
turn_context: &TurnContext,
|
||||
items: &[ResponseItem],
|
||||
) {
|
||||
self.record_into_history(items).await;
|
||||
self.record_into_history(items, turn_context).await;
|
||||
self.persist_rollout_response_items(items).await;
|
||||
self.send_raw_response_items(turn_context, items).await;
|
||||
}
|
||||
@@ -982,17 +953,25 @@ impl Session {
|
||||
for item in rollout_items {
|
||||
match item {
|
||||
RolloutItem::ResponseItem(response_item) => {
|
||||
history.record_items(std::iter::once(response_item));
|
||||
history.record_items(
|
||||
std::iter::once(response_item),
|
||||
turn_context.truncation_policy,
|
||||
);
|
||||
}
|
||||
RolloutItem::Compacted(compacted) => {
|
||||
let snapshot = history.get_history();
|
||||
let user_messages = collect_user_messages(&snapshot);
|
||||
let rebuilt = build_compacted_history(
|
||||
self.build_initial_context(turn_context),
|
||||
&user_messages,
|
||||
&compacted.message,
|
||||
);
|
||||
history.replace(rebuilt);
|
||||
// TODO(jif) clean
|
||||
if let Some(replacement) = &compacted.replacement_history {
|
||||
history.replace(replacement.clone());
|
||||
} else {
|
||||
let user_messages = collect_user_messages(&snapshot);
|
||||
let rebuilt = compact::build_compacted_history(
|
||||
self.build_initial_context(turn_context),
|
||||
&user_messages,
|
||||
&compacted.message,
|
||||
);
|
||||
history.replace(rebuilt);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
@@ -1001,9 +980,13 @@ impl Session {
|
||||
}
|
||||
|
||||
/// Append ResponseItems to the in-memory conversation history only.
|
||||
pub(crate) async fn record_into_history(&self, items: &[ResponseItem]) {
|
||||
pub(crate) async fn record_into_history(
|
||||
&self,
|
||||
items: &[ResponseItem],
|
||||
turn_context: &TurnContext,
|
||||
) {
|
||||
let mut state = self.state.lock().await;
|
||||
state.record_items(items.iter());
|
||||
state.record_items(items.iter(), turn_context.truncation_policy);
|
||||
}
|
||||
|
||||
pub(crate) async fn replace_history(&self, items: Vec<ResponseItem>) {
|
||||
@@ -1020,6 +1003,15 @@ impl Session {
|
||||
self.persist_rollout_items(&rollout_items).await;
|
||||
}
|
||||
|
||||
pub async fn enabled(&self, feature: Feature) -> bool {
|
||||
self.state
|
||||
.lock()
|
||||
.await
|
||||
.session_configuration
|
||||
.features
|
||||
.enabled(feature)
|
||||
}
|
||||
|
||||
async fn send_raw_response_items(&self, turn_context: &TurnContext, items: &[ResponseItem]) {
|
||||
for item in items {
|
||||
self.send_event(
|
||||
@@ -1197,14 +1189,7 @@ impl Session {
|
||||
turn_context: Arc<TurnContext>,
|
||||
cancellation_token: CancellationToken,
|
||||
) {
|
||||
if !self
|
||||
.state
|
||||
.lock()
|
||||
.await
|
||||
.session_configuration
|
||||
.features
|
||||
.enabled(Feature::GhostCommit)
|
||||
{
|
||||
if !self.enabled(Feature::GhostCommit).await {
|
||||
return;
|
||||
}
|
||||
let token = match turn_context.tool_call_gate.subscribe().await {
|
||||
@@ -1258,6 +1243,8 @@ impl Session {
|
||||
) -> anyhow::Result<ListResourcesResult> {
|
||||
self.services
|
||||
.mcp_connection_manager
|
||||
.read()
|
||||
.await
|
||||
.list_resources(server, params)
|
||||
.await
|
||||
}
|
||||
@@ -1269,6 +1256,8 @@ impl Session {
|
||||
) -> anyhow::Result<ListResourceTemplatesResult> {
|
||||
self.services
|
||||
.mcp_connection_manager
|
||||
.read()
|
||||
.await
|
||||
.list_resource_templates(server, params)
|
||||
.await
|
||||
}
|
||||
@@ -1280,6 +1269,8 @@ impl Session {
|
||||
) -> anyhow::Result<ReadResourceResult> {
|
||||
self.services
|
||||
.mcp_connection_manager
|
||||
.read()
|
||||
.await
|
||||
.read_resource(server, params)
|
||||
.await
|
||||
}
|
||||
@@ -1292,19 +1283,29 @@ impl Session {
|
||||
) -> anyhow::Result<CallToolResult> {
|
||||
self.services
|
||||
.mcp_connection_manager
|
||||
.read()
|
||||
.await
|
||||
.call_tool(server, tool, arguments)
|
||||
.await
|
||||
}
|
||||
|
||||
pub(crate) fn parse_mcp_tool_name(&self, tool_name: &str) -> Option<(String, String)> {
|
||||
pub(crate) async fn parse_mcp_tool_name(&self, tool_name: &str) -> Option<(String, String)> {
|
||||
self.services
|
||||
.mcp_connection_manager
|
||||
.read()
|
||||
.await
|
||||
.parse_tool_name(tool_name)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn interrupt_task(self: &Arc<Self>) {
|
||||
info!("interrupt received: abort current task, if any");
|
||||
self.abort_all_tasks(TurnAbortReason::Interrupted).await;
|
||||
let has_active_turn = { self.active_turn.lock().await.is_some() };
|
||||
if has_active_turn {
|
||||
self.abort_all_tasks(TurnAbortReason::Interrupted).await;
|
||||
} else {
|
||||
self.cancel_mcp_startup().await;
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn notifier(&self) -> &UserNotifier {
|
||||
@@ -1318,6 +1319,10 @@ impl Session {
|
||||
fn show_raw_agent_reasoning(&self) -> bool {
|
||||
self.services.show_raw_agent_reasoning
|
||||
}
|
||||
|
||||
async fn cancel_mcp_startup(&self) {
|
||||
self.services.mcp_startup_cancellation_token.cancel();
|
||||
}
|
||||
}
|
||||
|
||||
async fn submission_loop(sess: Arc<Session>, config: Arc<Config>, rx_sub: Receiver<Submission>) {
|
||||
@@ -1425,7 +1430,7 @@ mod handlers {
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use codex_protocol::protocol::ReviewRequest;
|
||||
use codex_protocol::protocol::TurnAbortReason;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
|
||||
use std::sync::Arc;
|
||||
use tracing::info;
|
||||
use tracing::warn;
|
||||
@@ -1575,17 +1580,15 @@ mod handlers {
|
||||
}
|
||||
|
||||
pub async fn list_mcp_tools(sess: &Session, config: &Arc<Config>, sub_id: String) {
|
||||
// This is a cheap lookup from the connection manager's cache.
|
||||
let tools = sess.services.mcp_connection_manager.list_all_tools();
|
||||
let (auth_status_entries, resources, resource_templates) = tokio::join!(
|
||||
let mcp_connection_manager = sess.services.mcp_connection_manager.read().await;
|
||||
let (tools, auth_status_entries, resources, resource_templates) = tokio::join!(
|
||||
mcp_connection_manager.list_all_tools(),
|
||||
compute_auth_statuses(
|
||||
config.mcp_servers.iter(),
|
||||
config.mcp_oauth_credentials_store_mode,
|
||||
),
|
||||
sess.services.mcp_connection_manager.list_all_resources(),
|
||||
sess.services
|
||||
.mcp_connection_manager
|
||||
.list_all_resource_templates()
|
||||
mcp_connection_manager.list_all_resources(),
|
||||
mcp_connection_manager.list_all_resource_templates(),
|
||||
);
|
||||
let auth_statuses = auth_status_entries
|
||||
.iter()
|
||||
@@ -1594,7 +1597,10 @@ mod handlers {
|
||||
let event = Event {
|
||||
id: sub_id,
|
||||
msg: EventMsg::McpListToolsResponse(crate::protocol::McpListToolsResponseEvent {
|
||||
tools,
|
||||
tools: tools
|
||||
.into_iter()
|
||||
.map(|(name, tool)| (name, tool.tool))
|
||||
.collect(),
|
||||
resources,
|
||||
resource_templates,
|
||||
auth_statuses,
|
||||
@@ -1632,16 +1638,9 @@ mod handlers {
|
||||
let turn_context = sess
|
||||
.new_turn_with_sub_id(sub_id, SessionSettingsUpdate::default())
|
||||
.await;
|
||||
// Attempt to inject input into current task
|
||||
if let Err(items) = sess
|
||||
.inject_input(vec![UserInput::Text {
|
||||
text: turn_context.compact_prompt().to_string(),
|
||||
}])
|
||||
.await
|
||||
{
|
||||
sess.spawn_task(Arc::clone(&turn_context), items, CompactTask)
|
||||
.await;
|
||||
}
|
||||
|
||||
sess.spawn_task(Arc::clone(&turn_context), vec![], CompactTask)
|
||||
.await;
|
||||
}
|
||||
|
||||
pub async fn shutdown(sess: &Arc<Session>, sub_id: String) -> bool {
|
||||
@@ -1767,6 +1766,7 @@ async fn spawn_review_thread(
|
||||
final_output_json_schema: None,
|
||||
codex_linux_sandbox_exe: parent_turn_context.codex_linux_sandbox_exe.clone(),
|
||||
tool_call_gate: Arc::new(ReadinessFlag::new()),
|
||||
truncation_policy: TruncationPolicy::new(&per_turn_config),
|
||||
};
|
||||
|
||||
// Seed the child task with the review prompt as the initial user message.
|
||||
@@ -1819,7 +1819,6 @@ pub(crate) async fn run_task(
|
||||
// Although from the perspective of codex.rs, TurnDiffTracker has the lifecycle of a Task which contains
|
||||
// many turns, from the perspective of the user, it is a single turn.
|
||||
let turn_diff_tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
|
||||
let mut auto_compact_recently_attempted = false;
|
||||
|
||||
loop {
|
||||
// Note that pending_input would be something like a message the user
|
||||
@@ -1874,27 +1873,12 @@ pub(crate) async fn run_task(
|
||||
let (responses, items_to_record_in_conversation_history) =
|
||||
process_items(processed_items, &sess, &turn_context).await;
|
||||
|
||||
// as long as compaction works well in getting us way below the token limit, we shouldn't worry about being in an infinite loop.
|
||||
if token_limit_reached {
|
||||
if auto_compact_recently_attempted {
|
||||
let limit_str = limit.to_string();
|
||||
let current_tokens = total_usage_tokens
|
||||
.map(|tokens| tokens.to_string())
|
||||
.unwrap_or_else(|| "unknown".to_string());
|
||||
let event = EventMsg::Error(ErrorEvent {
|
||||
message: format!(
|
||||
"Conversation is still above the token limit after automatic summarization (limit {limit_str}, current {current_tokens}). Please start a new session or trim your input."
|
||||
),
|
||||
});
|
||||
sess.send_event(&turn_context, event).await;
|
||||
break;
|
||||
}
|
||||
auto_compact_recently_attempted = true;
|
||||
compact::run_inline_auto_compact_task(sess.clone(), turn_context.clone()).await;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto_compact_recently_attempted = false;
|
||||
|
||||
if responses.is_empty() {
|
||||
last_agent_message = get_last_assistant_message_from_turn(
|
||||
&items_to_record_in_conversation_history,
|
||||
@@ -1940,22 +1924,52 @@ async fn run_turn(
|
||||
input: Vec<ResponseItem>,
|
||||
cancellation_token: CancellationToken,
|
||||
) -> CodexResult<TurnRunResult> {
|
||||
let mcp_tools = sess.services.mcp_connection_manager.list_all_tools();
|
||||
let mcp_tools = sess
|
||||
.services
|
||||
.mcp_connection_manager
|
||||
.read()
|
||||
.await
|
||||
.list_all_tools()
|
||||
.or_cancel(&cancellation_token)
|
||||
.await?;
|
||||
let router = Arc::new(ToolRouter::from_config(
|
||||
&turn_context.tools_config,
|
||||
Some(mcp_tools),
|
||||
Some(
|
||||
mcp_tools
|
||||
.into_iter()
|
||||
.map(|(name, tool)| (name, tool.tool))
|
||||
.collect(),
|
||||
),
|
||||
));
|
||||
|
||||
let model_supports_parallel = turn_context
|
||||
.client
|
||||
.get_model_family()
|
||||
.supports_parallel_tool_calls;
|
||||
let parallel_tool_calls = model_supports_parallel;
|
||||
|
||||
// TODO(jif) revert once testing phase is done.
|
||||
let parallel_tool_calls = model_supports_parallel
|
||||
&& sess
|
||||
.state
|
||||
.lock()
|
||||
.await
|
||||
.session_configuration
|
||||
.features
|
||||
.enabled(Feature::ParallelToolCalls);
|
||||
let mut base_instructions = turn_context.base_instructions.clone();
|
||||
if parallel_tool_calls {
|
||||
static INSTRUCTIONS: &str = include_str!("../templates/parallel/instructions.md");
|
||||
static INSERTION_SPOT: &str = "## Editing constraints";
|
||||
base_instructions
|
||||
.as_mut()
|
||||
.map(|base| base.replace(INSERTION_SPOT, INSTRUCTIONS));
|
||||
}
|
||||
|
||||
let prompt = Prompt {
|
||||
input,
|
||||
tools: router.specs(),
|
||||
parallel_tool_calls,
|
||||
base_instructions_override: turn_context.base_instructions.clone(),
|
||||
base_instructions_override: base_instructions,
|
||||
output_schema: turn_context.final_output_json_schema.clone(),
|
||||
};
|
||||
|
||||
@@ -2112,7 +2126,7 @@ async fn try_run_turn(
|
||||
ResponseEvent::Created => {}
|
||||
ResponseEvent::OutputItemDone(item) => {
|
||||
let previously_active_item = active_item.take();
|
||||
match ToolRouter::build_tool_call(sess.as_ref(), item.clone()) {
|
||||
match ToolRouter::build_tool_call(sess.as_ref(), item.clone()).await {
|
||||
Ok(Some(call)) => {
|
||||
let payload_preview = call.payload.log_payload().into_owned();
|
||||
tracing::info!("ToolCall: {} {}", call.tool_name, payload_preview);
|
||||
@@ -2323,59 +2337,6 @@ pub(super) fn get_last_assistant_message_from_turn(responses: &[ResponseItem]) -
|
||||
})
|
||||
}
|
||||
|
||||
fn mcp_init_error_display(
|
||||
server_name: &str,
|
||||
entry: Option<&McpAuthStatusEntry>,
|
||||
err: &anyhow::Error,
|
||||
) -> String {
|
||||
if let Some(McpServerTransportConfig::StreamableHttp {
|
||||
url,
|
||||
bearer_token_env_var,
|
||||
http_headers,
|
||||
..
|
||||
}) = &entry.map(|entry| &entry.config.transport)
|
||||
&& url == "https://api.githubcopilot.com/mcp/"
|
||||
&& bearer_token_env_var.is_none()
|
||||
&& http_headers.as_ref().map(HashMap::is_empty).unwrap_or(true)
|
||||
{
|
||||
// GitHub only supports OAUth for first party MCP clients.
|
||||
// That means that the user has to specify a personal access token either via bearer_token_env_var or http_headers.
|
||||
// https://github.com/github/github-mcp-server/issues/921#issuecomment-3221026448
|
||||
format!(
|
||||
"GitHub MCP does not support OAuth. Log in by adding a personal access token (https://github.com/settings/personal-access-tokens) to your environment and config.toml:\n[mcp_servers.{server_name}]\nbearer_token_env_var = CODEX_GITHUB_PERSONAL_ACCESS_TOKEN"
|
||||
)
|
||||
} else if is_mcp_client_auth_required_error(err) {
|
||||
format!(
|
||||
"The {server_name} MCP server is not logged in. Run `codex mcp login {server_name}`."
|
||||
)
|
||||
} else if is_mcp_client_startup_timeout_error(err) {
|
||||
let startup_timeout_secs = match entry {
|
||||
Some(entry) => match entry.config.startup_timeout_sec {
|
||||
Some(timeout) => timeout,
|
||||
None => DEFAULT_STARTUP_TIMEOUT,
|
||||
},
|
||||
None => DEFAULT_STARTUP_TIMEOUT,
|
||||
}
|
||||
.as_secs();
|
||||
format!(
|
||||
"MCP client for `{server_name}` timed out after {startup_timeout_secs} seconds. Add or adjust `startup_timeout_sec` in your config.toml:\n[mcp_servers.{server_name}]\nstartup_timeout_sec = XX"
|
||||
)
|
||||
} else {
|
||||
format!("MCP client for `{server_name}` failed to start: {err:#}")
|
||||
}
|
||||
}
|
||||
|
||||
fn is_mcp_client_auth_required_error(error: &anyhow::Error) -> bool {
|
||||
// StreamableHttpError::AuthRequired from the MCP SDK.
|
||||
error.to_string().contains("Auth required")
|
||||
}
|
||||
|
||||
fn is_mcp_client_startup_timeout_error(error: &anyhow::Error) -> bool {
|
||||
let error_message = error.to_string();
|
||||
error_message.contains("request timed out")
|
||||
|| error_message.contains("timed out handshaking with MCP server")
|
||||
}
|
||||
|
||||
use crate::features::Features;
|
||||
#[cfg(test)]
|
||||
pub(crate) use tests::make_session_and_context;
|
||||
@@ -2385,10 +2346,7 @@ mod tests {
|
||||
use super::*;
|
||||
use crate::config::ConfigOverrides;
|
||||
use crate::config::ConfigToml;
|
||||
use crate::config::types::McpServerConfig;
|
||||
use crate::config::types::McpServerTransportConfig;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::mcp::auth::McpAuthStatusEntry;
|
||||
use crate::tools::format_exec_output_str;
|
||||
|
||||
use crate::protocol::CompactedItem;
|
||||
@@ -2408,7 +2366,6 @@ mod tests {
|
||||
use codex_app_server_protocol::AuthMode;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::McpAuthStatus;
|
||||
use std::time::Duration;
|
||||
use tokio::time::sleep;
|
||||
|
||||
@@ -2622,7 +2579,8 @@ mod tests {
|
||||
let state = SessionState::new(session_configuration.clone());
|
||||
|
||||
let services = SessionServices {
|
||||
mcp_connection_manager: McpConnectionManager::default(),
|
||||
mcp_connection_manager: Arc::new(RwLock::new(McpConnectionManager::default())),
|
||||
mcp_startup_cancellation_token: CancellationToken::new(),
|
||||
unified_exec_manager: UnifiedExecSessionManager::default(),
|
||||
notifier: UserNotifier::new(None),
|
||||
rollout: Mutex::new(None),
|
||||
@@ -2698,7 +2656,8 @@ mod tests {
|
||||
let state = SessionState::new(session_configuration.clone());
|
||||
|
||||
let services = SessionServices {
|
||||
mcp_connection_manager: McpConnectionManager::default(),
|
||||
mcp_connection_manager: Arc::new(RwLock::new(McpConnectionManager::default())),
|
||||
mcp_startup_cancellation_token: CancellationToken::new(),
|
||||
unified_exec_manager: UnifiedExecSessionManager::default(),
|
||||
notifier: UserNotifier::new(None),
|
||||
rollout: Mutex::new(None),
|
||||
@@ -2879,9 +2838,23 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn fatal_tool_error_stops_turn_and_reports_error() {
|
||||
let (session, turn_context, _rx) = make_session_and_context_with_rx();
|
||||
let tools = {
|
||||
session
|
||||
.services
|
||||
.mcp_connection_manager
|
||||
.read()
|
||||
.await
|
||||
.list_all_tools()
|
||||
.await
|
||||
};
|
||||
let router = ToolRouter::from_config(
|
||||
&turn_context.tools_config,
|
||||
Some(session.services.mcp_connection_manager.list_all_tools()),
|
||||
Some(
|
||||
tools
|
||||
.into_iter()
|
||||
.map(|(name, tool)| (name, tool.tool))
|
||||
.collect(),
|
||||
),
|
||||
);
|
||||
let item = ResponseItem::CustomToolCall {
|
||||
id: None,
|
||||
@@ -2892,6 +2865,7 @@ mod tests {
|
||||
};
|
||||
|
||||
let call = ToolRouter::build_tool_call(session.as_ref(), item.clone())
|
||||
.await
|
||||
.expect("build tool call")
|
||||
.expect("tool call present");
|
||||
let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
|
||||
@@ -2924,7 +2898,7 @@ mod tests {
|
||||
for item in &initial_context {
|
||||
rollout_items.push(RolloutItem::ResponseItem(item.clone()));
|
||||
}
|
||||
live_history.record_items(initial_context.iter());
|
||||
live_history.record_items(initial_context.iter(), turn_context.truncation_policy);
|
||||
|
||||
let user1 = ResponseItem::Message {
|
||||
id: None,
|
||||
@@ -2933,7 +2907,7 @@ mod tests {
|
||||
text: "first user".to_string(),
|
||||
}],
|
||||
};
|
||||
live_history.record_items(std::iter::once(&user1));
|
||||
live_history.record_items(std::iter::once(&user1), turn_context.truncation_policy);
|
||||
rollout_items.push(RolloutItem::ResponseItem(user1.clone()));
|
||||
|
||||
let assistant1 = ResponseItem::Message {
|
||||
@@ -2943,13 +2917,13 @@ mod tests {
|
||||
text: "assistant reply one".to_string(),
|
||||
}],
|
||||
};
|
||||
live_history.record_items(std::iter::once(&assistant1));
|
||||
live_history.record_items(std::iter::once(&assistant1), turn_context.truncation_policy);
|
||||
rollout_items.push(RolloutItem::ResponseItem(assistant1.clone()));
|
||||
|
||||
let summary1 = "summary one";
|
||||
let snapshot1 = live_history.get_history();
|
||||
let user_messages1 = collect_user_messages(&snapshot1);
|
||||
let rebuilt1 = build_compacted_history(
|
||||
let rebuilt1 = compact::build_compacted_history(
|
||||
session.build_initial_context(turn_context),
|
||||
&user_messages1,
|
||||
summary1,
|
||||
@@ -2957,6 +2931,7 @@ mod tests {
|
||||
live_history.replace(rebuilt1);
|
||||
rollout_items.push(RolloutItem::Compacted(CompactedItem {
|
||||
message: summary1.to_string(),
|
||||
replacement_history: None,
|
||||
}));
|
||||
|
||||
let user2 = ResponseItem::Message {
|
||||
@@ -2966,7 +2941,7 @@ mod tests {
|
||||
text: "second user".to_string(),
|
||||
}],
|
||||
};
|
||||
live_history.record_items(std::iter::once(&user2));
|
||||
live_history.record_items(std::iter::once(&user2), turn_context.truncation_policy);
|
||||
rollout_items.push(RolloutItem::ResponseItem(user2.clone()));
|
||||
|
||||
let assistant2 = ResponseItem::Message {
|
||||
@@ -2976,13 +2951,13 @@ mod tests {
|
||||
text: "assistant reply two".to_string(),
|
||||
}],
|
||||
};
|
||||
live_history.record_items(std::iter::once(&assistant2));
|
||||
live_history.record_items(std::iter::once(&assistant2), turn_context.truncation_policy);
|
||||
rollout_items.push(RolloutItem::ResponseItem(assistant2.clone()));
|
||||
|
||||
let summary2 = "summary two";
|
||||
let snapshot2 = live_history.get_history();
|
||||
let user_messages2 = collect_user_messages(&snapshot2);
|
||||
let rebuilt2 = build_compacted_history(
|
||||
let rebuilt2 = compact::build_compacted_history(
|
||||
session.build_initial_context(turn_context),
|
||||
&user_messages2,
|
||||
summary2,
|
||||
@@ -2990,6 +2965,7 @@ mod tests {
|
||||
live_history.replace(rebuilt2);
|
||||
rollout_items.push(RolloutItem::Compacted(CompactedItem {
|
||||
message: summary2.to_string(),
|
||||
replacement_history: None,
|
||||
}));
|
||||
|
||||
let user3 = ResponseItem::Message {
|
||||
@@ -2999,7 +2975,7 @@ mod tests {
|
||||
text: "third user".to_string(),
|
||||
}],
|
||||
};
|
||||
live_history.record_items(std::iter::once(&user3));
|
||||
live_history.record_items(std::iter::once(&user3), turn_context.truncation_policy);
|
||||
rollout_items.push(RolloutItem::ResponseItem(user3.clone()));
|
||||
|
||||
let assistant3 = ResponseItem::Message {
|
||||
@@ -3009,7 +2985,7 @@ mod tests {
|
||||
text: "assistant reply three".to_string(),
|
||||
}],
|
||||
};
|
||||
live_history.record_items(std::iter::once(&assistant3));
|
||||
live_history.record_items(std::iter::once(&assistant3), turn_context.truncation_policy);
|
||||
rollout_items.push(RolloutItem::ResponseItem(assistant3.clone()));
|
||||
|
||||
(rollout_items, live_history.get_history())
|
||||
@@ -3141,7 +3117,6 @@ mod tests {
|
||||
pretty_assertions::assert_eq!(exec_output.metadata, ResponseExecMetadata { exit_code: 0 });
|
||||
assert!(exec_output.output.contains("hi"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn unified_exec_rejects_escalated_permissions_when_policy_not_on_request() {
|
||||
use crate::protocol::AskForApproval;
|
||||
@@ -3183,89 +3158,4 @@ mod tests {
|
||||
|
||||
pretty_assertions::assert_eq!(output, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mcp_init_error_display_prompts_for_github_pat() {
|
||||
let server_name = "github";
|
||||
let entry = McpAuthStatusEntry {
|
||||
config: McpServerConfig {
|
||||
transport: McpServerTransportConfig::StreamableHttp {
|
||||
url: "https://api.githubcopilot.com/mcp/".to_string(),
|
||||
bearer_token_env_var: None,
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
},
|
||||
enabled: true,
|
||||
startup_timeout_sec: None,
|
||||
tool_timeout_sec: None,
|
||||
enabled_tools: None,
|
||||
disabled_tools: None,
|
||||
},
|
||||
auth_status: McpAuthStatus::Unsupported,
|
||||
};
|
||||
let err = anyhow::anyhow!("OAuth is unsupported");
|
||||
|
||||
let display = mcp_init_error_display(server_name, Some(&entry), &err);
|
||||
|
||||
let expected = format!(
|
||||
"GitHub MCP does not support OAuth. Log in by adding a personal access token (https://github.com/settings/personal-access-tokens) to your environment and config.toml:\n[mcp_servers.{server_name}]\nbearer_token_env_var = CODEX_GITHUB_PERSONAL_ACCESS_TOKEN"
|
||||
);
|
||||
|
||||
assert_eq!(expected, display);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mcp_init_error_display_prompts_for_login_when_auth_required() {
|
||||
let server_name = "example";
|
||||
let err = anyhow::anyhow!("Auth required for server");
|
||||
|
||||
let display = mcp_init_error_display(server_name, None, &err);
|
||||
|
||||
let expected = format!(
|
||||
"The {server_name} MCP server is not logged in. Run `codex mcp login {server_name}`."
|
||||
);
|
||||
|
||||
assert_eq!(expected, display);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mcp_init_error_display_reports_generic_errors() {
|
||||
let server_name = "custom";
|
||||
let entry = McpAuthStatusEntry {
|
||||
config: McpServerConfig {
|
||||
transport: McpServerTransportConfig::StreamableHttp {
|
||||
url: "https://example.com".to_string(),
|
||||
bearer_token_env_var: Some("TOKEN".to_string()),
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
},
|
||||
enabled: true,
|
||||
startup_timeout_sec: None,
|
||||
tool_timeout_sec: None,
|
||||
enabled_tools: None,
|
||||
disabled_tools: None,
|
||||
},
|
||||
auth_status: McpAuthStatus::Unsupported,
|
||||
};
|
||||
let err = anyhow::anyhow!("boom");
|
||||
|
||||
let display = mcp_init_error_display(server_name, Some(&entry), &err);
|
||||
|
||||
let expected = format!("MCP client for `{server_name}` failed to start: {err:#}");
|
||||
|
||||
assert_eq!(expected, display);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mcp_init_error_display_includes_startup_timeout_hint() {
|
||||
let server_name = "slow";
|
||||
let err = anyhow::anyhow!("request timed out");
|
||||
|
||||
let display = mcp_init_error_display(server_name, None, &err);
|
||||
|
||||
assert_eq!(
|
||||
"MCP client for `slow` timed out after 10 seconds. Add or adjust `startup_timeout_sec` in your config.toml:\n[mcp_servers.slow]\nstartup_timeout_sec = XX",
|
||||
display
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,7 +14,9 @@ use crate::protocol::EventMsg;
|
||||
use crate::protocol::TaskStartedEvent;
|
||||
use crate::protocol::TurnContextItem;
|
||||
use crate::protocol::WarningEvent;
|
||||
use crate::truncate::truncate_middle;
|
||||
use crate::truncate::TruncationPolicy;
|
||||
use crate::truncate::approx_token_count;
|
||||
use crate::truncate::truncate_text;
|
||||
use crate::util::backoff;
|
||||
use codex_protocol::items::TurnItem;
|
||||
use codex_protocol::models::ContentItem;
|
||||
@@ -26,6 +28,7 @@ use futures::prelude::*;
|
||||
use tracing::error;
|
||||
|
||||
pub const SUMMARIZATION_PROMPT: &str = include_str!("../templates/compact/prompt.md");
|
||||
pub const SUMMARY_PREFIX: &str = include_str!("../templates/compact/summary_prefix.md");
|
||||
const COMPACT_USER_MESSAGE_MAX_TOKENS: usize = 20_000;
|
||||
|
||||
pub(crate) async fn run_inline_auto_compact_task(
|
||||
@@ -58,7 +61,10 @@ async fn run_compact_task_inner(
|
||||
let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input);
|
||||
|
||||
let mut history = sess.clone_history().await;
|
||||
history.record_items(&[initial_input_for_turn.into()]);
|
||||
history.record_items(
|
||||
&[initial_input_for_turn.into()],
|
||||
turn_context.truncation_policy,
|
||||
);
|
||||
|
||||
let mut truncated_count = 0usize;
|
||||
|
||||
@@ -140,7 +146,9 @@ async fn run_compact_task_inner(
|
||||
}
|
||||
|
||||
let history_snapshot = sess.clone_history().await.get_history();
|
||||
let summary_text = get_last_assistant_message_from_turn(&history_snapshot).unwrap_or_default();
|
||||
let summary_suffix =
|
||||
get_last_assistant_message_from_turn(&history_snapshot).unwrap_or_default();
|
||||
let summary_text = format!("{SUMMARY_PREFIX}\n{summary_suffix}");
|
||||
let user_messages = collect_user_messages(&history_snapshot);
|
||||
|
||||
let initial_context = sess.build_initial_context(turn_context.as_ref());
|
||||
@@ -164,6 +172,7 @@ async fn run_compact_task_inner(
|
||||
|
||||
let rollout_item = RolloutItem::Compacted(CompactedItem {
|
||||
message: summary_text.clone(),
|
||||
replacement_history: None,
|
||||
});
|
||||
sess.persist_rollout_items(&[rollout_item]).await;
|
||||
|
||||
@@ -201,12 +210,22 @@ pub(crate) fn collect_user_messages(items: &[ResponseItem]) -> Vec<String> {
|
||||
items
|
||||
.iter()
|
||||
.filter_map(|item| match crate::event_mapping::parse_turn_item(item) {
|
||||
Some(TurnItem::UserMessage(user)) => Some(user.message()),
|
||||
Some(TurnItem::UserMessage(user)) => {
|
||||
if is_summary_message(&user.message()) {
|
||||
None
|
||||
} else {
|
||||
Some(user.message())
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub(crate) fn is_summary_message(message: &str) -> bool {
|
||||
message.starts_with(format!("{SUMMARY_PREFIX}\n").as_str())
|
||||
}
|
||||
|
||||
pub(crate) fn build_compacted_history(
|
||||
initial_context: Vec<ResponseItem>,
|
||||
user_messages: &[String],
|
||||
@@ -216,7 +235,7 @@ pub(crate) fn build_compacted_history(
|
||||
initial_context,
|
||||
user_messages,
|
||||
summary_text,
|
||||
COMPACT_USER_MESSAGE_MAX_TOKENS * 4,
|
||||
COMPACT_USER_MESSAGE_MAX_TOKENS,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -224,20 +243,21 @@ fn build_compacted_history_with_limit(
|
||||
mut history: Vec<ResponseItem>,
|
||||
user_messages: &[String],
|
||||
summary_text: &str,
|
||||
max_bytes: usize,
|
||||
max_tokens: usize,
|
||||
) -> Vec<ResponseItem> {
|
||||
let mut selected_messages: Vec<String> = Vec::new();
|
||||
if max_bytes > 0 {
|
||||
let mut remaining = max_bytes;
|
||||
if max_tokens > 0 {
|
||||
let mut remaining = max_tokens;
|
||||
for message in user_messages.iter().rev() {
|
||||
if remaining == 0 {
|
||||
break;
|
||||
}
|
||||
if message.len() <= remaining {
|
||||
let tokens = approx_token_count(message);
|
||||
if tokens <= remaining {
|
||||
selected_messages.push(message.clone());
|
||||
remaining = remaining.saturating_sub(message.len());
|
||||
remaining = remaining.saturating_sub(tokens);
|
||||
} else {
|
||||
let (truncated, _) = truncate_middle(message, remaining);
|
||||
let truncated = truncate_text(message, TruncationPolicy::Tokens(remaining));
|
||||
selected_messages.push(truncated);
|
||||
break;
|
||||
}
|
||||
@@ -286,7 +306,8 @@ async fn drain_to_completed(
|
||||
};
|
||||
match event {
|
||||
Ok(ResponseEvent::OutputItemDone(item)) => {
|
||||
sess.record_into_history(std::slice::from_ref(&item)).await;
|
||||
sess.record_into_history(std::slice::from_ref(&item), turn_context)
|
||||
.await;
|
||||
}
|
||||
Ok(ResponseEvent::RateLimits(snapshot)) => {
|
||||
sess.update_rate_limits(turn_context, snapshot).await;
|
||||
@@ -304,6 +325,7 @@ async fn drain_to_completed(
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
@@ -395,16 +417,16 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn build_compacted_history_truncates_overlong_user_messages() {
|
||||
fn build_token_limited_compacted_history_truncates_overlong_user_messages() {
|
||||
// Use a small truncation limit so the test remains fast while still validating
|
||||
// that oversized user content is truncated.
|
||||
let max_bytes = 128;
|
||||
let big = "X".repeat(max_bytes + 50);
|
||||
let max_tokens = 16;
|
||||
let big = "word ".repeat(200);
|
||||
let history = super::build_compacted_history_with_limit(
|
||||
Vec::new(),
|
||||
std::slice::from_ref(&big),
|
||||
"SUMMARY",
|
||||
max_bytes,
|
||||
max_tokens,
|
||||
);
|
||||
assert_eq!(history.len(), 2);
|
||||
|
||||
@@ -437,7 +459,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn build_compacted_history_appends_summary_message() {
|
||||
fn build_token_limited_compacted_history_appends_summary_message() {
|
||||
let initial_context: Vec<ResponseItem> = Vec::new();
|
||||
let user_messages = vec!["first user message".to_string()];
|
||||
let summary_text = "summary text";
|
||||
|
||||
100
codex-rs/core/src/compact_remote.rs
Normal file
100
codex-rs/core/src/compact_remote.rs
Normal file
@@ -0,0 +1,100 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::Prompt;
|
||||
use crate::codex::Session;
|
||||
use crate::codex::TurnContext;
|
||||
use crate::error::Result as CodexResult;
|
||||
use crate::protocol::AgentMessageEvent;
|
||||
use crate::protocol::CompactedItem;
|
||||
use crate::protocol::ErrorEvent;
|
||||
use crate::protocol::EventMsg;
|
||||
use crate::protocol::RolloutItem;
|
||||
use crate::protocol::TaskStartedEvent;
|
||||
use codex_protocol::models::ResponseInputItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
|
||||
pub(crate) async fn run_remote_compact_task(
|
||||
sess: Arc<Session>,
|
||||
turn_context: Arc<TurnContext>,
|
||||
input: Vec<UserInput>,
|
||||
) -> Option<String> {
|
||||
let start_event = EventMsg::TaskStarted(TaskStartedEvent {
|
||||
model_context_window: turn_context.client.get_model_context_window(),
|
||||
});
|
||||
sess.send_event(&turn_context, start_event).await;
|
||||
|
||||
match run_remote_compact_task_inner(&sess, &turn_context, input).await {
|
||||
Ok(()) => {
|
||||
let event = EventMsg::AgentMessage(AgentMessageEvent {
|
||||
message: "Compact task completed".to_string(),
|
||||
});
|
||||
sess.send_event(&turn_context, event).await;
|
||||
}
|
||||
Err(err) => {
|
||||
let event = EventMsg::Error(ErrorEvent {
|
||||
message: err.to_string(),
|
||||
});
|
||||
sess.send_event(&turn_context, event).await;
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
async fn run_remote_compact_task_inner(
|
||||
sess: &Arc<Session>,
|
||||
turn_context: &Arc<TurnContext>,
|
||||
input: Vec<UserInput>,
|
||||
) -> CodexResult<()> {
|
||||
let mut history = sess.clone_history().await;
|
||||
if !input.is_empty() {
|
||||
let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input);
|
||||
history.record_items(
|
||||
&[initial_input_for_turn.into()],
|
||||
turn_context.truncation_policy,
|
||||
);
|
||||
}
|
||||
|
||||
let prompt = Prompt {
|
||||
input: history.get_history_for_prompt(),
|
||||
tools: vec![],
|
||||
parallel_tool_calls: false,
|
||||
base_instructions_override: turn_context.base_instructions.clone(),
|
||||
output_schema: None,
|
||||
};
|
||||
|
||||
let mut new_history = turn_context
|
||||
.client
|
||||
.compact_conversation_history(&prompt)
|
||||
.await?;
|
||||
// Required to keep `/undo` available after compaction
|
||||
let ghost_snapshots: Vec<ResponseItem> = history
|
||||
.get_history()
|
||||
.iter()
|
||||
.filter(|item| matches!(item, ResponseItem::GhostSnapshot { .. }))
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
if !ghost_snapshots.is_empty() {
|
||||
new_history.extend(ghost_snapshots);
|
||||
}
|
||||
sess.replace_history(new_history.clone()).await;
|
||||
|
||||
if let Some(estimated_tokens) = sess
|
||||
.clone_history()
|
||||
.await
|
||||
.estimate_token_count(turn_context.as_ref())
|
||||
{
|
||||
sess.override_last_token_usage_estimate(turn_context.as_ref(), estimated_tokens)
|
||||
.await;
|
||||
}
|
||||
|
||||
let compacted_item = CompactedItem {
|
||||
message: String::new(),
|
||||
replacement_history: Some(new_history),
|
||||
};
|
||||
sess.persist_rollout_items(&[RolloutItem::Compacted(compacted_item)])
|
||||
.await;
|
||||
Ok(())
|
||||
}
|
||||
@@ -550,6 +550,15 @@ impl ConfigEditsBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
/// Enable or disable a feature flag by key under the `[features]` table.
|
||||
pub fn set_feature_enabled(mut self, key: &str, enabled: bool) -> Self {
|
||||
self.edits.push(ConfigEdit::SetPath {
|
||||
segments: vec!["features".to_string(), key.to_string()],
|
||||
value: value(enabled),
|
||||
});
|
||||
self
|
||||
}
|
||||
|
||||
/// Apply edits on a blocking thread.
|
||||
pub fn apply_blocking(self) -> anyhow::Result<()> {
|
||||
apply_blocking(&self.codex_home, self.profile.as_deref(), &self.edits)
|
||||
@@ -584,7 +593,7 @@ mod tests {
|
||||
codex_home,
|
||||
None,
|
||||
&[ConfigEdit::SetModel {
|
||||
model: Some("gpt-5-codex".to_string()),
|
||||
model: Some("gpt-5.1-codex".to_string()),
|
||||
effort: Some(ReasoningEffort::High),
|
||||
}],
|
||||
)
|
||||
@@ -592,7 +601,7 @@ mod tests {
|
||||
|
||||
let contents =
|
||||
std::fs::read_to_string(codex_home.join(CONFIG_TOML_FILE)).expect("read config");
|
||||
let expected = r#"model = "gpt-5-codex"
|
||||
let expected = r#"model = "gpt-5.1-codex"
|
||||
model_reasoning_effort = "high"
|
||||
"#;
|
||||
assert_eq!(contents, expected);
|
||||
@@ -722,7 +731,7 @@ model = "o5-preview"
|
||||
std::fs::write(
|
||||
codex_home.join(CONFIG_TOML_FILE),
|
||||
r#"[profiles."team a"]
|
||||
model = "gpt-5-codex"
|
||||
model = "gpt-5.1-codex"
|
||||
"#,
|
||||
)
|
||||
.expect("seed");
|
||||
@@ -972,14 +981,14 @@ B = \"2\"
|
||||
let codex_home = tmp.path().to_path_buf();
|
||||
|
||||
ConfigEditsBuilder::new(&codex_home)
|
||||
.set_model(Some("gpt-5-codex"), Some(ReasoningEffort::High))
|
||||
.set_model(Some("gpt-5.1-codex"), Some(ReasoningEffort::High))
|
||||
.apply()
|
||||
.await
|
||||
.expect("persist");
|
||||
|
||||
let contents =
|
||||
std::fs::read_to_string(codex_home.join(CONFIG_TOML_FILE)).expect("read config");
|
||||
let expected = r#"model = "gpt-5-codex"
|
||||
let expected = r#"model = "gpt-5.1-codex"
|
||||
model_reasoning_effort = "high"
|
||||
"#;
|
||||
assert_eq!(contents, expected);
|
||||
@@ -1001,11 +1010,11 @@ model_reasoning_effort = "low"
|
||||
std::fs::read_to_string(codex_home.join(CONFIG_TOML_FILE)).expect("read config");
|
||||
assert_eq!(contents, initial_expected);
|
||||
|
||||
let updated_expected = r#"model = "gpt-5-codex"
|
||||
let updated_expected = r#"model = "gpt-5.1-codex"
|
||||
model_reasoning_effort = "high"
|
||||
"#;
|
||||
ConfigEditsBuilder::new(codex_home)
|
||||
.set_model(Some("gpt-5-codex"), Some(ReasoningEffort::High))
|
||||
.set_model(Some("gpt-5.1-codex"), Some(ReasoningEffort::High))
|
||||
.apply_blocking()
|
||||
.expect("persist update");
|
||||
contents = std::fs::read_to_string(codex_home.join(CONFIG_TOML_FILE)).expect("read config");
|
||||
|
||||
@@ -25,7 +25,9 @@ use crate::git_info::resolve_root_git_project_for_trust;
|
||||
use crate::model_family::ModelFamily;
|
||||
use crate::model_family::derive_default_model_family;
|
||||
use crate::model_family::find_family_for_model;
|
||||
use crate::model_provider_info::LMSTUDIO_OSS_PROVIDER_ID;
|
||||
use crate::model_provider_info::ModelProviderInfo;
|
||||
use crate::model_provider_info::OLLAMA_OSS_PROVIDER_ID;
|
||||
use crate::model_provider_info::built_in_model_providers;
|
||||
use crate::openai_model_info::get_model_info;
|
||||
use crate::project_doc::DEFAULT_PROJECT_DOC_FILENAME;
|
||||
@@ -60,11 +62,11 @@ pub mod profile;
|
||||
pub mod types;
|
||||
|
||||
#[cfg(target_os = "windows")]
|
||||
pub const OPENAI_DEFAULT_MODEL: &str = "gpt-5";
|
||||
pub const OPENAI_DEFAULT_MODEL: &str = "codex-auto";
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
pub const OPENAI_DEFAULT_MODEL: &str = "gpt-5-codex";
|
||||
const OPENAI_DEFAULT_REVIEW_MODEL: &str = "gpt-5-codex";
|
||||
pub const GPT_5_CODEX_MEDIUM_MODEL: &str = "gpt-5-codex";
|
||||
pub const OPENAI_DEFAULT_MODEL: &str = "codex-auto";
|
||||
const OPENAI_DEFAULT_REVIEW_MODEL: &str = "gpt-5.1-codex";
|
||||
pub const GPT_5_CODEX_MEDIUM_MODEL: &str = "gpt-5.1-codex";
|
||||
|
||||
/// Maximum number of bytes of the documentation that will be embedded. Larger
|
||||
/// files are *silently truncated* to this size so we do not take up too much of
|
||||
@@ -79,7 +81,7 @@ pub struct Config {
|
||||
/// Optional override of model selection.
|
||||
pub model: String,
|
||||
|
||||
/// Model used specifically for review sessions. Defaults to "gpt-5-codex".
|
||||
/// Model used specifically for review sessions. Defaults to "gpt-5.1-codex".
|
||||
pub review_model: String,
|
||||
|
||||
pub model_family: ModelFamily,
|
||||
@@ -193,6 +195,9 @@ pub struct Config {
|
||||
/// Additional filenames to try when looking for project-level docs.
|
||||
pub project_doc_fallback_filenames: Vec<String>,
|
||||
|
||||
/// Token budget applied when storing tool/function outputs in the context manager.
|
||||
pub tool_output_token_limit: Option<usize>,
|
||||
|
||||
/// Directory containing all Codex state (defaults to `~/.codex` but can be
|
||||
/// overridden by the `CODEX_HOME` environment variable).
|
||||
pub codex_home: PathBuf,
|
||||
@@ -466,6 +471,48 @@ pub fn set_project_trust_level(
|
||||
.apply_blocking()
|
||||
}
|
||||
|
||||
/// Save the default OSS provider preference to config.toml
|
||||
pub fn set_default_oss_provider(codex_home: &Path, provider: &str) -> std::io::Result<()> {
|
||||
// Validate that the provider is one of the known OSS providers
|
||||
match provider {
|
||||
LMSTUDIO_OSS_PROVIDER_ID | OLLAMA_OSS_PROVIDER_ID => {
|
||||
// Valid provider, continue
|
||||
}
|
||||
_ => {
|
||||
return Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidInput,
|
||||
format!(
|
||||
"Invalid OSS provider '{provider}'. Must be one of: {LMSTUDIO_OSS_PROVIDER_ID}, {OLLAMA_OSS_PROVIDER_ID}"
|
||||
),
|
||||
));
|
||||
}
|
||||
}
|
||||
let config_path = codex_home.join(CONFIG_TOML_FILE);
|
||||
|
||||
// Read existing config or create empty string if file doesn't exist
|
||||
let content = match std::fs::read_to_string(&config_path) {
|
||||
Ok(content) => content,
|
||||
Err(e) if e.kind() == std::io::ErrorKind::NotFound => String::new(),
|
||||
Err(e) => return Err(e),
|
||||
};
|
||||
|
||||
// Parse as DocumentMut for editing while preserving structure
|
||||
let mut doc = content.parse::<DocumentMut>().map_err(|e| {
|
||||
std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidData,
|
||||
format!("failed to parse config.toml: {e}"),
|
||||
)
|
||||
})?;
|
||||
|
||||
// Set the default_oss_provider at root level
|
||||
use toml_edit::value;
|
||||
doc["oss_provider"] = value(provider);
|
||||
|
||||
// Write the modified document back
|
||||
std::fs::write(&config_path, doc.to_string())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Apply a single dotted-path override onto a TOML value.
|
||||
fn apply_toml_override(root: &mut TomlValue, path: &str, value: TomlValue) {
|
||||
use toml::value::Table;
|
||||
@@ -592,6 +639,9 @@ pub struct ConfigToml {
|
||||
/// Ordered list of fallback filenames to look for when AGENTS.md is missing.
|
||||
pub project_doc_fallback_filenames: Option<Vec<String>>,
|
||||
|
||||
/// Token budget applied when storing tool/function outputs in the context manager.
|
||||
pub tool_output_token_limit: Option<usize>,
|
||||
|
||||
/// Profile to use from the `profiles` map.
|
||||
pub profile: Option<String>,
|
||||
|
||||
@@ -663,6 +713,8 @@ pub struct ConfigToml {
|
||||
pub experimental_use_rmcp_client: Option<bool>,
|
||||
pub experimental_use_freeform_apply_patch: Option<bool>,
|
||||
pub experimental_sandbox_command_assessment: Option<bool>,
|
||||
/// Preferred OSS provider for local models, e.g. "lmstudio" or "ollama".
|
||||
pub oss_provider: Option<String>,
|
||||
}
|
||||
|
||||
impl From<ConfigToml> for UserSavedConfig {
|
||||
@@ -851,6 +903,34 @@ pub struct ConfigOverrides {
|
||||
pub additional_writable_roots: Vec<PathBuf>,
|
||||
}
|
||||
|
||||
/// Resolves the OSS provider from CLI override, profile config, or global config.
|
||||
/// Returns `None` if no provider is configured at any level.
|
||||
pub fn resolve_oss_provider(
|
||||
explicit_provider: Option<&str>,
|
||||
config_toml: &ConfigToml,
|
||||
config_profile: Option<String>,
|
||||
) -> Option<String> {
|
||||
if let Some(provider) = explicit_provider {
|
||||
// Explicit provider specified (e.g., via --local-provider)
|
||||
Some(provider.to_string())
|
||||
} else {
|
||||
// Check profile config first, then global config
|
||||
let profile = config_toml.get_config_profile(config_profile).ok();
|
||||
if let Some(profile) = &profile {
|
||||
// Check if profile has an oss provider
|
||||
if let Some(profile_oss_provider) = &profile.oss_provider {
|
||||
Some(profile_oss_provider.clone())
|
||||
}
|
||||
// If not then check if the toml has an oss provider
|
||||
else {
|
||||
config_toml.oss_provider.clone()
|
||||
}
|
||||
} else {
|
||||
config_toml.oss_provider.clone()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Config {
|
||||
/// Meant to be used exclusively for tests: `load_with_overrides()` should
|
||||
/// be used in all other cases.
|
||||
@@ -1135,6 +1215,7 @@ impl Config {
|
||||
}
|
||||
})
|
||||
.collect(),
|
||||
tool_output_token_limit: cfg.tool_output_token_limit,
|
||||
codex_home,
|
||||
history,
|
||||
file_opener: cfg.file_opener.unwrap_or(UriBasedFileOpener::VsCode),
|
||||
@@ -1239,6 +1320,16 @@ impl Config {
|
||||
Ok(Some(s))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_windows_sandbox_globally(&mut self, value: bool) {
|
||||
crate::safety::set_windows_sandbox_enabled(value);
|
||||
if value {
|
||||
self.features.enable(Feature::WindowsSandbox);
|
||||
} else {
|
||||
self.features.disable(Feature::WindowsSandbox);
|
||||
}
|
||||
self.forced_auto_mode_downgraded_on_windows = !value;
|
||||
}
|
||||
}
|
||||
|
||||
fn default_model() -> String {
|
||||
@@ -2542,7 +2633,7 @@ url = "https://example.com/mcp"
|
||||
let codex_home = TempDir::new()?;
|
||||
|
||||
ConfigEditsBuilder::new(codex_home.path())
|
||||
.set_model(Some("gpt-5-codex"), Some(ReasoningEffort::High))
|
||||
.set_model(Some("gpt-5.1-codex"), Some(ReasoningEffort::High))
|
||||
.apply()
|
||||
.await?;
|
||||
|
||||
@@ -2550,7 +2641,7 @@ url = "https://example.com/mcp"
|
||||
tokio::fs::read_to_string(codex_home.path().join(CONFIG_TOML_FILE)).await?;
|
||||
let parsed: ConfigToml = toml::from_str(&serialized)?;
|
||||
|
||||
assert_eq!(parsed.model.as_deref(), Some("gpt-5-codex"));
|
||||
assert_eq!(parsed.model.as_deref(), Some("gpt-5.1-codex"));
|
||||
assert_eq!(parsed.model_reasoning_effort, Some(ReasoningEffort::High));
|
||||
|
||||
Ok(())
|
||||
@@ -2564,7 +2655,7 @@ url = "https://example.com/mcp"
|
||||
tokio::fs::write(
|
||||
&config_path,
|
||||
r#"
|
||||
model = "gpt-5-codex"
|
||||
model = "gpt-5.1-codex"
|
||||
model_reasoning_effort = "medium"
|
||||
|
||||
[profiles.dev]
|
||||
@@ -2600,7 +2691,7 @@ model = "gpt-4.1"
|
||||
|
||||
ConfigEditsBuilder::new(codex_home.path())
|
||||
.with_profile(Some("dev"))
|
||||
.set_model(Some("gpt-5-codex"), Some(ReasoningEffort::Medium))
|
||||
.set_model(Some("gpt-5.1-codex"), Some(ReasoningEffort::Medium))
|
||||
.apply()
|
||||
.await?;
|
||||
|
||||
@@ -2612,7 +2703,7 @@ model = "gpt-4.1"
|
||||
.get("dev")
|
||||
.expect("profile should be created");
|
||||
|
||||
assert_eq!(profile.model.as_deref(), Some("gpt-5-codex"));
|
||||
assert_eq!(profile.model.as_deref(), Some("gpt-5.1-codex"));
|
||||
assert_eq!(
|
||||
profile.model_reasoning_effort,
|
||||
Some(ReasoningEffort::Medium)
|
||||
@@ -2634,7 +2725,7 @@ model = "gpt-4"
|
||||
model_reasoning_effort = "medium"
|
||||
|
||||
[profiles.prod]
|
||||
model = "gpt-5-codex"
|
||||
model = "gpt-5.1-codex"
|
||||
"#,
|
||||
)
|
||||
.await?;
|
||||
@@ -2663,7 +2754,7 @@ model = "gpt-5-codex"
|
||||
.profiles
|
||||
.get("prod")
|
||||
.and_then(|profile| profile.model.as_deref()),
|
||||
Some("gpt-5-codex"),
|
||||
Some("gpt-5.1-codex"),
|
||||
);
|
||||
|
||||
Ok(())
|
||||
@@ -2778,7 +2869,7 @@ model_provider = "openai"
|
||||
approval_policy = "on-failure"
|
||||
|
||||
[profiles.gpt5]
|
||||
model = "gpt-5"
|
||||
model = "gpt-5.1"
|
||||
model_provider = "openai"
|
||||
approval_policy = "on-failure"
|
||||
model_reasoning_effort = "high"
|
||||
@@ -2887,6 +2978,7 @@ model_verbosity = "high"
|
||||
model_providers: fixture.model_provider_map.clone(),
|
||||
project_doc_max_bytes: PROJECT_DOC_MAX_BYTES,
|
||||
project_doc_fallback_filenames: Vec::new(),
|
||||
tool_output_token_limit: None,
|
||||
codex_home: fixture.codex_home(),
|
||||
history: History::default(),
|
||||
file_opener: UriBasedFileOpener::VsCode,
|
||||
@@ -2958,6 +3050,7 @@ model_verbosity = "high"
|
||||
model_providers: fixture.model_provider_map.clone(),
|
||||
project_doc_max_bytes: PROJECT_DOC_MAX_BYTES,
|
||||
project_doc_fallback_filenames: Vec::new(),
|
||||
tool_output_token_limit: None,
|
||||
codex_home: fixture.codex_home(),
|
||||
history: History::default(),
|
||||
file_opener: UriBasedFileOpener::VsCode,
|
||||
@@ -3044,6 +3137,7 @@ model_verbosity = "high"
|
||||
model_providers: fixture.model_provider_map.clone(),
|
||||
project_doc_max_bytes: PROJECT_DOC_MAX_BYTES,
|
||||
project_doc_fallback_filenames: Vec::new(),
|
||||
tool_output_token_limit: None,
|
||||
codex_home: fixture.codex_home(),
|
||||
history: History::default(),
|
||||
file_opener: UriBasedFileOpener::VsCode,
|
||||
@@ -3094,9 +3188,9 @@ model_verbosity = "high"
|
||||
fixture.codex_home(),
|
||||
)?;
|
||||
let expected_gpt5_profile_config = Config {
|
||||
model: "gpt-5".to_string(),
|
||||
model: "gpt-5.1".to_string(),
|
||||
review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(),
|
||||
model_family: find_family_for_model("gpt-5").expect("known model slug"),
|
||||
model_family: find_family_for_model("gpt-5.1").expect("known model slug"),
|
||||
model_context_window: Some(272_000),
|
||||
model_max_output_tokens: Some(128_000),
|
||||
model_auto_compact_token_limit: Some(244_800),
|
||||
@@ -3116,6 +3210,7 @@ model_verbosity = "high"
|
||||
model_providers: fixture.model_provider_map.clone(),
|
||||
project_doc_max_bytes: PROJECT_DOC_MAX_BYTES,
|
||||
project_doc_fallback_filenames: Vec::new(),
|
||||
tool_output_token_limit: None,
|
||||
codex_home: fixture.codex_home(),
|
||||
history: History::default(),
|
||||
file_opener: UriBasedFileOpener::VsCode,
|
||||
@@ -3265,6 +3360,41 @@ trust_level = "trusted"
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_set_default_oss_provider() -> std::io::Result<()> {
|
||||
let temp_dir = TempDir::new()?;
|
||||
let codex_home = temp_dir.path();
|
||||
let config_path = codex_home.join(CONFIG_TOML_FILE);
|
||||
|
||||
// Test setting valid provider on empty config
|
||||
set_default_oss_provider(codex_home, OLLAMA_OSS_PROVIDER_ID)?;
|
||||
let content = std::fs::read_to_string(&config_path)?;
|
||||
assert!(content.contains("oss_provider = \"ollama\""));
|
||||
|
||||
// Test updating existing config
|
||||
std::fs::write(&config_path, "model = \"gpt-4\"\n")?;
|
||||
set_default_oss_provider(codex_home, LMSTUDIO_OSS_PROVIDER_ID)?;
|
||||
let content = std::fs::read_to_string(&config_path)?;
|
||||
assert!(content.contains("oss_provider = \"lmstudio\""));
|
||||
assert!(content.contains("model = \"gpt-4\""));
|
||||
|
||||
// Test overwriting existing oss_provider
|
||||
set_default_oss_provider(codex_home, OLLAMA_OSS_PROVIDER_ID)?;
|
||||
let content = std::fs::read_to_string(&config_path)?;
|
||||
assert!(content.contains("oss_provider = \"ollama\""));
|
||||
assert!(!content.contains("oss_provider = \"lmstudio\""));
|
||||
|
||||
// Test invalid provider
|
||||
let result = set_default_oss_provider(codex_home, "invalid_provider");
|
||||
assert!(result.is_err());
|
||||
let error = result.unwrap_err();
|
||||
assert_eq!(error.kind(), std::io::ErrorKind::InvalidInput);
|
||||
assert!(error.to_string().contains("Invalid OSS provider"));
|
||||
assert!(error.to_string().contains("invalid_provider"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_untrusted_project_gets_workspace_write_sandbox() -> anyhow::Result<()> {
|
||||
let config_with_untrusted = r#"
|
||||
@@ -3295,6 +3425,85 @@ trust_level = "untrusted"
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_oss_provider_explicit_override() {
|
||||
let config_toml = ConfigToml::default();
|
||||
let result = resolve_oss_provider(Some("custom-provider"), &config_toml, None);
|
||||
assert_eq!(result, Some("custom-provider".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_oss_provider_from_profile() {
|
||||
let mut profiles = std::collections::HashMap::new();
|
||||
let profile = ConfigProfile {
|
||||
oss_provider: Some("profile-provider".to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
profiles.insert("test-profile".to_string(), profile);
|
||||
let config_toml = ConfigToml {
|
||||
profiles,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let result = resolve_oss_provider(None, &config_toml, Some("test-profile".to_string()));
|
||||
assert_eq!(result, Some("profile-provider".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_oss_provider_from_global_config() {
|
||||
let config_toml = ConfigToml {
|
||||
oss_provider: Some("global-provider".to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let result = resolve_oss_provider(None, &config_toml, None);
|
||||
assert_eq!(result, Some("global-provider".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_oss_provider_profile_fallback_to_global() {
|
||||
let mut profiles = std::collections::HashMap::new();
|
||||
let profile = ConfigProfile::default(); // No oss_provider set
|
||||
profiles.insert("test-profile".to_string(), profile);
|
||||
let config_toml = ConfigToml {
|
||||
oss_provider: Some("global-provider".to_string()),
|
||||
profiles,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let result = resolve_oss_provider(None, &config_toml, Some("test-profile".to_string()));
|
||||
assert_eq!(result, Some("global-provider".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_oss_provider_none_when_not_configured() {
|
||||
let config_toml = ConfigToml::default();
|
||||
let result = resolve_oss_provider(None, &config_toml, None);
|
||||
assert_eq!(result, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_oss_provider_explicit_overrides_all() {
|
||||
let mut profiles = std::collections::HashMap::new();
|
||||
let profile = ConfigProfile {
|
||||
oss_provider: Some("profile-provider".to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
profiles.insert("test-profile".to_string(), profile);
|
||||
let config_toml = ConfigToml {
|
||||
oss_provider: Some("global-provider".to_string()),
|
||||
profiles,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let result = resolve_oss_provider(
|
||||
Some("explicit-provider"),
|
||||
&config_toml,
|
||||
Some("test-profile".to_string()),
|
||||
);
|
||||
assert_eq!(result, Some("explicit-provider".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_untrusted_project_gets_unless_trusted_approval_policy() -> std::io::Result<()> {
|
||||
let codex_home = TempDir::new()?;
|
||||
|
||||
@@ -33,6 +33,7 @@ pub struct ConfigProfile {
|
||||
/// Optional feature toggles scoped to this profile.
|
||||
#[serde(default)]
|
||||
pub features: Option<crate::features::FeaturesToml>,
|
||||
pub oss_provider: Option<String>,
|
||||
}
|
||||
|
||||
impl From<ConfigProfile> for codex_app_server_protocol::Profile {
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
use crate::codex::TurnContext;
|
||||
use crate::context_manager::normalize;
|
||||
use crate::context_manager::truncate;
|
||||
use crate::context_manager::truncate::format_output_for_model_body;
|
||||
use crate::context_manager::truncate::globally_truncate_function_output_items;
|
||||
use crate::truncate::TruncationPolicy;
|
||||
use crate::truncate::truncate_function_output_items_with_policy;
|
||||
use crate::truncate::truncate_text;
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::TokenUsage;
|
||||
@@ -10,12 +10,6 @@ use codex_protocol::protocol::TokenUsageInfo;
|
||||
use codex_utils_tokenizer::Tokenizer;
|
||||
use std::ops::Deref;
|
||||
|
||||
const CONTEXT_WINDOW_HARD_LIMIT_FACTOR: f64 = 1.1;
|
||||
const CONTEXT_WINDOW_HARD_LIMIT_BYTES: usize =
|
||||
(truncate::MODEL_FORMAT_MAX_BYTES as f64 * CONTEXT_WINDOW_HARD_LIMIT_FACTOR) as usize;
|
||||
const CONTEXT_WINDOW_HARD_LIMIT_LINES: usize =
|
||||
(truncate::MODEL_FORMAT_MAX_LINES as f64 * CONTEXT_WINDOW_HARD_LIMIT_FACTOR) as usize;
|
||||
|
||||
/// Transcript of conversation history
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub(crate) struct ContextManager {
|
||||
@@ -50,7 +44,7 @@ impl ContextManager {
|
||||
}
|
||||
|
||||
/// `items` is ordered from oldest to newest.
|
||||
pub(crate) fn record_items<I>(&mut self, items: I)
|
||||
pub(crate) fn record_items<I>(&mut self, items: I, policy: TruncationPolicy)
|
||||
where
|
||||
I: IntoIterator,
|
||||
I::Item: std::ops::Deref<Target = ResponseItem>,
|
||||
@@ -62,7 +56,7 @@ impl ContextManager {
|
||||
continue;
|
||||
}
|
||||
|
||||
let processed = Self::process_item(&item);
|
||||
let processed = self.process_item(item_ref, policy);
|
||||
self.items.push(processed);
|
||||
}
|
||||
}
|
||||
@@ -150,18 +144,14 @@ impl ContextManager {
|
||||
items.retain(|item| !matches!(item, ResponseItem::GhostSnapshot { .. }));
|
||||
}
|
||||
|
||||
fn process_item(item: &ResponseItem) -> ResponseItem {
|
||||
fn process_item(&self, item: &ResponseItem, policy: TruncationPolicy) -> ResponseItem {
|
||||
match item {
|
||||
ResponseItem::FunctionCallOutput { call_id, output } => {
|
||||
let truncated = format_output_for_model_body(
|
||||
output.content.as_str(),
|
||||
CONTEXT_WINDOW_HARD_LIMIT_BYTES,
|
||||
CONTEXT_WINDOW_HARD_LIMIT_LINES,
|
||||
);
|
||||
let truncated = truncate_text(output.content.as_str(), policy);
|
||||
let truncated_items = output
|
||||
.content_items
|
||||
.as_ref()
|
||||
.map(|items| globally_truncate_function_output_items(items));
|
||||
.map(|items| truncate_function_output_items_with_policy(items, policy));
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: FunctionCallOutputPayload {
|
||||
@@ -172,11 +162,7 @@ impl ContextManager {
|
||||
}
|
||||
}
|
||||
ResponseItem::CustomToolCallOutput { call_id, output } => {
|
||||
let truncated = format_output_for_model_body(
|
||||
output,
|
||||
CONTEXT_WINDOW_HARD_LIMIT_BYTES,
|
||||
CONTEXT_WINDOW_HARD_LIMIT_LINES,
|
||||
);
|
||||
let truncated = truncate_text(output, policy);
|
||||
ResponseItem::CustomToolCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: truncated,
|
||||
@@ -188,6 +174,7 @@ impl ContextManager {
|
||||
| ResponseItem::FunctionCall { .. }
|
||||
| ResponseItem::WebSearchCall { .. }
|
||||
| ResponseItem::CustomToolCall { .. }
|
||||
| ResponseItem::CompactionSummary { .. }
|
||||
| ResponseItem::GhostSnapshot { .. }
|
||||
| ResponseItem::Other => item.clone(),
|
||||
}
|
||||
@@ -205,7 +192,8 @@ fn is_api_message(message: &ResponseItem) -> bool {
|
||||
| ResponseItem::CustomToolCallOutput { .. }
|
||||
| ResponseItem::LocalShellCall { .. }
|
||||
| ResponseItem::Reasoning { .. }
|
||||
| ResponseItem::WebSearchCall { .. } => true,
|
||||
| ResponseItem::WebSearchCall { .. }
|
||||
| ResponseItem::CompactionSummary { .. } => true,
|
||||
ResponseItem::GhostSnapshot { .. } => false,
|
||||
ResponseItem::Other => false,
|
||||
}
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
use super::*;
|
||||
use crate::context_manager::MODEL_FORMAT_MAX_LINES;
|
||||
use crate::context_manager::truncate;
|
||||
use crate::truncate;
|
||||
use crate::truncate::TruncationPolicy;
|
||||
use codex_git::GhostCommit;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
use codex_protocol::models::LocalShellAction;
|
||||
use codex_protocol::models::LocalShellExecAction;
|
||||
@@ -13,6 +12,9 @@ use codex_protocol::models::ReasoningItemReasoningSummary;
|
||||
use pretty_assertions::assert_eq;
|
||||
use regex_lite::Regex;
|
||||
|
||||
const EXEC_FORMAT_MAX_LINES: usize = 256;
|
||||
const EXEC_FORMAT_MAX_BYTES: usize = 10_000;
|
||||
|
||||
fn assistant_msg(text: &str) -> ResponseItem {
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
@@ -25,7 +27,9 @@ fn assistant_msg(text: &str) -> ResponseItem {
|
||||
|
||||
fn create_history_with_items(items: Vec<ResponseItem>) -> ContextManager {
|
||||
let mut h = ContextManager::new();
|
||||
h.record_items(items.iter());
|
||||
// Use a generous but fixed token budget; tests only rely on truncation
|
||||
// behavior, not on a specific model's token limit.
|
||||
h.record_items(items.iter(), TruncationPolicy::Tokens(10_000));
|
||||
h
|
||||
}
|
||||
|
||||
@@ -55,6 +59,7 @@ fn reasoning_msg(text: &str) -> ResponseItem {
|
||||
#[test]
|
||||
fn filters_non_api_messages() {
|
||||
let mut h = ContextManager::default();
|
||||
let policy = TruncationPolicy::Tokens(10_000);
|
||||
// System message is not API messages; Other is ignored.
|
||||
let system = ResponseItem::Message {
|
||||
id: None,
|
||||
@@ -64,12 +69,12 @@ fn filters_non_api_messages() {
|
||||
}],
|
||||
};
|
||||
let reasoning = reasoning_msg("thinking...");
|
||||
h.record_items([&system, &reasoning, &ResponseItem::Other]);
|
||||
h.record_items([&system, &reasoning, &ResponseItem::Other], policy);
|
||||
|
||||
// User and assistant should be retained.
|
||||
let u = user_msg("hi");
|
||||
let a = assistant_msg("hello");
|
||||
h.record_items([&u, &a]);
|
||||
h.record_items([&u, &a], policy);
|
||||
|
||||
let items = h.contents();
|
||||
assert_eq!(
|
||||
@@ -237,6 +242,9 @@ fn normalization_retains_local_shell_outputs() {
|
||||
#[test]
|
||||
fn record_items_truncates_function_call_output_content() {
|
||||
let mut history = ContextManager::new();
|
||||
// Any reasonably small token budget works; the test only cares that
|
||||
// truncation happens and the marker is present.
|
||||
let policy = TruncationPolicy::Tokens(1_000);
|
||||
let long_line = "a very long line to trigger truncation\n";
|
||||
let long_output = long_line.repeat(2_500);
|
||||
let item = ResponseItem::FunctionCallOutput {
|
||||
@@ -248,15 +256,20 @@ fn record_items_truncates_function_call_output_content() {
|
||||
},
|
||||
};
|
||||
|
||||
history.record_items([&item]);
|
||||
history.record_items([&item], policy);
|
||||
|
||||
assert_eq!(history.items.len(), 1);
|
||||
match &history.items[0] {
|
||||
ResponseItem::FunctionCallOutput { output, .. } => {
|
||||
assert_ne!(output.content, long_output);
|
||||
assert!(
|
||||
output.content.starts_with("Total output lines:"),
|
||||
"expected truncated summary, got {}",
|
||||
output.content.contains("tokens truncated"),
|
||||
"expected token-based truncation marker, got {}",
|
||||
output.content
|
||||
);
|
||||
assert!(
|
||||
output.content.contains("tokens truncated"),
|
||||
"expected truncation marker, got {}",
|
||||
output.content
|
||||
);
|
||||
}
|
||||
@@ -267,6 +280,7 @@ fn record_items_truncates_function_call_output_content() {
|
||||
#[test]
|
||||
fn record_items_truncates_custom_tool_call_output_content() {
|
||||
let mut history = ContextManager::new();
|
||||
let policy = TruncationPolicy::Tokens(1_000);
|
||||
let line = "custom output that is very long\n";
|
||||
let long_output = line.repeat(2_500);
|
||||
let item = ResponseItem::CustomToolCallOutput {
|
||||
@@ -274,21 +288,48 @@ fn record_items_truncates_custom_tool_call_output_content() {
|
||||
output: long_output.clone(),
|
||||
};
|
||||
|
||||
history.record_items([&item]);
|
||||
history.record_items([&item], policy);
|
||||
|
||||
assert_eq!(history.items.len(), 1);
|
||||
match &history.items[0] {
|
||||
ResponseItem::CustomToolCallOutput { output, .. } => {
|
||||
assert_ne!(output, &long_output);
|
||||
assert!(
|
||||
output.starts_with("Total output lines:"),
|
||||
"expected truncated summary, got {output}"
|
||||
output.contains("tokens truncated"),
|
||||
"expected token-based truncation marker, got {output}"
|
||||
);
|
||||
assert!(
|
||||
output.contains("tokens truncated") || output.contains("bytes truncated"),
|
||||
"expected truncation marker, got {output}"
|
||||
);
|
||||
}
|
||||
other => panic!("unexpected history item: {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn record_items_respects_custom_token_limit() {
|
||||
let mut history = ContextManager::new();
|
||||
let policy = TruncationPolicy::Tokens(10);
|
||||
let long_output = "tokenized content repeated many times ".repeat(200);
|
||||
let item = ResponseItem::FunctionCallOutput {
|
||||
call_id: "call-custom-limit".to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: long_output,
|
||||
success: Some(true),
|
||||
..Default::default()
|
||||
},
|
||||
};
|
||||
|
||||
history.record_items([&item], policy);
|
||||
|
||||
let stored = match &history.items[0] {
|
||||
ResponseItem::FunctionCallOutput { output, .. } => output,
|
||||
other => panic!("unexpected history item: {other:?}"),
|
||||
};
|
||||
assert!(stored.content.contains("tokens truncated"));
|
||||
}
|
||||
|
||||
fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usize) {
|
||||
let pattern = truncated_message_pattern(line, total_lines);
|
||||
let regex = Regex::new(&pattern).unwrap_or_else(|err| {
|
||||
@@ -302,23 +343,22 @@ fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usiz
|
||||
.expect("missing body capture")
|
||||
.as_str();
|
||||
assert!(
|
||||
body.len() <= truncate::MODEL_FORMAT_MAX_BYTES,
|
||||
body.len() <= EXEC_FORMAT_MAX_BYTES,
|
||||
"body exceeds byte limit: {} bytes",
|
||||
body.len()
|
||||
);
|
||||
}
|
||||
|
||||
fn truncated_message_pattern(line: &str, total_lines: usize) -> String {
|
||||
let head_lines = MODEL_FORMAT_MAX_LINES / 2;
|
||||
let tail_lines = MODEL_FORMAT_MAX_LINES - head_lines;
|
||||
let head_lines = EXEC_FORMAT_MAX_LINES / 2;
|
||||
let tail_lines = EXEC_FORMAT_MAX_LINES - head_lines;
|
||||
let head_take = head_lines.min(total_lines);
|
||||
let tail_take = tail_lines.min(total_lines.saturating_sub(head_take));
|
||||
let omitted = total_lines.saturating_sub(head_take + tail_take);
|
||||
let escaped_line = regex_lite::escape(line);
|
||||
if omitted == 0 {
|
||||
return format!(
|
||||
r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} output truncated to fit {max_bytes} bytes \.{{3}}]\n\n.*)$",
|
||||
max_bytes = truncate::MODEL_FORMAT_MAX_BYTES,
|
||||
r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} removed \d+ bytes to fit {EXEC_FORMAT_MAX_BYTES} byte limit \.{{3}}]\n\n.*)$",
|
||||
);
|
||||
}
|
||||
format!(
|
||||
@@ -331,11 +371,7 @@ fn format_exec_output_truncates_large_error() {
|
||||
let line = "very long execution error line that should trigger truncation\n";
|
||||
let large_error = line.repeat(2_500); // way beyond both byte and line limits
|
||||
|
||||
let truncated = truncate::format_output_for_model_body(
|
||||
&large_error,
|
||||
truncate::MODEL_FORMAT_MAX_BYTES,
|
||||
truncate::MODEL_FORMAT_MAX_LINES,
|
||||
);
|
||||
let truncated = truncate::truncate_with_line_bytes_budget(&large_error, EXEC_FORMAT_MAX_BYTES);
|
||||
|
||||
let total_lines = large_error.lines().count();
|
||||
assert_truncated_message_matches(&truncated, line, total_lines);
|
||||
@@ -344,17 +380,13 @@ fn format_exec_output_truncates_large_error() {
|
||||
|
||||
#[test]
|
||||
fn format_exec_output_marks_byte_truncation_without_omitted_lines() {
|
||||
let long_line = "a".repeat(truncate::MODEL_FORMAT_MAX_BYTES + 50);
|
||||
let truncated = truncate::format_output_for_model_body(
|
||||
&long_line,
|
||||
truncate::MODEL_FORMAT_MAX_BYTES,
|
||||
truncate::MODEL_FORMAT_MAX_LINES,
|
||||
);
|
||||
let long_line = "a".repeat(EXEC_FORMAT_MAX_BYTES + 50);
|
||||
let truncated = truncate::truncate_with_line_bytes_budget(&long_line, EXEC_FORMAT_MAX_BYTES);
|
||||
|
||||
assert_ne!(truncated, long_line);
|
||||
let removed_bytes = long_line.len().saturating_sub(EXEC_FORMAT_MAX_BYTES);
|
||||
let marker_line = format!(
|
||||
"[... output truncated to fit {} bytes ...]",
|
||||
truncate::MODEL_FORMAT_MAX_BYTES
|
||||
"[... removed {removed_bytes} bytes to fit {EXEC_FORMAT_MAX_BYTES} byte limit ...]"
|
||||
);
|
||||
assert!(
|
||||
truncated.contains(&marker_line),
|
||||
@@ -371,28 +403,20 @@ fn format_exec_output_returns_original_when_within_limits() {
|
||||
let content = "example output\n".repeat(10);
|
||||
|
||||
assert_eq!(
|
||||
truncate::format_output_for_model_body(
|
||||
&content,
|
||||
truncate::MODEL_FORMAT_MAX_BYTES,
|
||||
truncate::MODEL_FORMAT_MAX_LINES
|
||||
),
|
||||
truncate::truncate_with_line_bytes_budget(&content, EXEC_FORMAT_MAX_BYTES),
|
||||
content
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_exec_output_reports_omitted_lines_and_keeps_head_and_tail() {
|
||||
let total_lines = truncate::MODEL_FORMAT_MAX_LINES + 100;
|
||||
let total_lines = EXEC_FORMAT_MAX_LINES + 100;
|
||||
let content: String = (0..total_lines)
|
||||
.map(|idx| format!("line-{idx}\n"))
|
||||
.collect();
|
||||
|
||||
let truncated = truncate::format_output_for_model_body(
|
||||
&content,
|
||||
truncate::MODEL_FORMAT_MAX_BYTES,
|
||||
truncate::MODEL_FORMAT_MAX_LINES,
|
||||
);
|
||||
let omitted = total_lines - truncate::MODEL_FORMAT_MAX_LINES;
|
||||
let truncated = truncate::truncate_with_line_bytes_budget(&content, EXEC_FORMAT_MAX_BYTES);
|
||||
let omitted = total_lines - EXEC_FORMAT_MAX_LINES;
|
||||
let expected_marker = format!("[... omitted {omitted} of {total_lines} lines ...]");
|
||||
|
||||
assert!(
|
||||
@@ -413,103 +437,24 @@ fn format_exec_output_reports_omitted_lines_and_keeps_head_and_tail() {
|
||||
|
||||
#[test]
|
||||
fn format_exec_output_prefers_line_marker_when_both_limits_exceeded() {
|
||||
let total_lines = truncate::MODEL_FORMAT_MAX_LINES + 42;
|
||||
let total_lines = EXEC_FORMAT_MAX_LINES + 42;
|
||||
let long_line = "x".repeat(256);
|
||||
let content: String = (0..total_lines)
|
||||
.map(|idx| format!("line-{idx}-{long_line}\n"))
|
||||
.collect();
|
||||
|
||||
let truncated = truncate::format_output_for_model_body(
|
||||
&content,
|
||||
truncate::MODEL_FORMAT_MAX_BYTES,
|
||||
truncate::MODEL_FORMAT_MAX_LINES,
|
||||
);
|
||||
let truncated = truncate::truncate_with_line_bytes_budget(&content, EXEC_FORMAT_MAX_BYTES);
|
||||
|
||||
assert!(
|
||||
truncated.contains("[... omitted 42 of 298 lines ...]"),
|
||||
"expected omitted marker when line count exceeds limit: {truncated}"
|
||||
);
|
||||
assert!(
|
||||
!truncated.contains("output truncated to fit"),
|
||||
!truncated.contains("byte limit"),
|
||||
"line omission marker should take precedence over byte marker: {truncated}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncates_across_multiple_under_limit_texts_and_reports_omitted() {
|
||||
// Arrange: several text items, none exceeding per-item limit, but total exceeds budget.
|
||||
let budget = truncate::MODEL_FORMAT_MAX_BYTES;
|
||||
let t1_len = (budget / 2).saturating_sub(10);
|
||||
let t2_len = (budget / 2).saturating_sub(10);
|
||||
let remaining_after_t1_t2 = budget.saturating_sub(t1_len + t2_len);
|
||||
let t3_len = 50; // gets truncated to remaining_after_t1_t2
|
||||
let t4_len = 5; // omitted
|
||||
let t5_len = 7; // omitted
|
||||
|
||||
let t1 = "a".repeat(t1_len);
|
||||
let t2 = "b".repeat(t2_len);
|
||||
let t3 = "c".repeat(t3_len);
|
||||
let t4 = "d".repeat(t4_len);
|
||||
let t5 = "e".repeat(t5_len);
|
||||
|
||||
let item = ResponseItem::FunctionCallOutput {
|
||||
call_id: "call-omit".to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: "irrelevant".to_string(),
|
||||
content_items: Some(vec![
|
||||
FunctionCallOutputContentItem::InputText { text: t1 },
|
||||
FunctionCallOutputContentItem::InputText { text: t2 },
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "img:mid".to_string(),
|
||||
},
|
||||
FunctionCallOutputContentItem::InputText { text: t3 },
|
||||
FunctionCallOutputContentItem::InputText { text: t4 },
|
||||
FunctionCallOutputContentItem::InputText { text: t5 },
|
||||
]),
|
||||
success: Some(true),
|
||||
},
|
||||
};
|
||||
|
||||
let mut history = ContextManager::new();
|
||||
history.record_items([&item]);
|
||||
assert_eq!(history.items.len(), 1);
|
||||
let json = serde_json::to_value(&history.items[0]).expect("serialize to json");
|
||||
|
||||
let output = json
|
||||
.get("output")
|
||||
.expect("output field")
|
||||
.as_array()
|
||||
.expect("array output");
|
||||
|
||||
// Expect: t1 (full), t2 (full), image, t3 (truncated), summary mentioning 2 omitted.
|
||||
assert_eq!(output.len(), 5);
|
||||
|
||||
let first = output[0].as_object().expect("first obj");
|
||||
assert_eq!(first.get("type").unwrap(), "input_text");
|
||||
let first_text = first.get("text").unwrap().as_str().unwrap();
|
||||
assert_eq!(first_text.len(), t1_len);
|
||||
|
||||
let second = output[1].as_object().expect("second obj");
|
||||
assert_eq!(second.get("type").unwrap(), "input_text");
|
||||
let second_text = second.get("text").unwrap().as_str().unwrap();
|
||||
assert_eq!(second_text.len(), t2_len);
|
||||
|
||||
assert_eq!(
|
||||
output[2],
|
||||
serde_json::json!({"type": "input_image", "image_url": "img:mid"})
|
||||
);
|
||||
|
||||
let fourth = output[3].as_object().expect("fourth obj");
|
||||
assert_eq!(fourth.get("type").unwrap(), "input_text");
|
||||
let fourth_text = fourth.get("text").unwrap().as_str().unwrap();
|
||||
assert_eq!(fourth_text.len(), remaining_after_t1_t2);
|
||||
|
||||
let summary = output[4].as_object().expect("summary obj");
|
||||
assert_eq!(summary.get("type").unwrap(), "input_text");
|
||||
let summary_text = summary.get("text").unwrap().as_str().unwrap();
|
||||
assert!(summary_text.contains("omitted 2 text items"));
|
||||
}
|
||||
|
||||
//TODO(aibrahim): run CI in release mode.
|
||||
#[cfg(not(debug_assertions))]
|
||||
#[test]
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
mod history;
|
||||
mod normalize;
|
||||
mod truncate;
|
||||
|
||||
pub(crate) use crate::truncate::truncate_with_line_bytes_budget;
|
||||
pub(crate) use history::ContextManager;
|
||||
pub(crate) use truncate::MODEL_FORMAT_MAX_BYTES;
|
||||
pub(crate) use truncate::MODEL_FORMAT_MAX_LINES;
|
||||
pub(crate) use truncate::format_output_for_model_body;
|
||||
|
||||
@@ -1,148 +0,0 @@
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use codex_utils_string::take_bytes_at_char_boundary;
|
||||
use codex_utils_string::take_last_bytes_at_char_boundary;
|
||||
|
||||
use crate::util::error_or_panic;
|
||||
|
||||
// Model-formatting limits: clients get full streams; only content sent to the model is truncated.
|
||||
pub const MODEL_FORMAT_MAX_BYTES: usize = 10 * 1024; // 10 KiB
|
||||
pub const MODEL_FORMAT_MAX_LINES: usize = 256; // lines
|
||||
|
||||
pub(crate) fn globally_truncate_function_output_items(
|
||||
items: &[FunctionCallOutputContentItem],
|
||||
) -> Vec<FunctionCallOutputContentItem> {
|
||||
let mut out: Vec<FunctionCallOutputContentItem> = Vec::with_capacity(items.len());
|
||||
let mut remaining = MODEL_FORMAT_MAX_BYTES;
|
||||
let mut omitted_text_items = 0usize;
|
||||
|
||||
for it in items {
|
||||
match it {
|
||||
FunctionCallOutputContentItem::InputText { text } => {
|
||||
if remaining == 0 {
|
||||
omitted_text_items += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
let len = text.len();
|
||||
if len <= remaining {
|
||||
out.push(FunctionCallOutputContentItem::InputText { text: text.clone() });
|
||||
remaining -= len;
|
||||
} else {
|
||||
let slice = take_bytes_at_char_boundary(text, remaining);
|
||||
if !slice.is_empty() {
|
||||
out.push(FunctionCallOutputContentItem::InputText {
|
||||
text: slice.to_string(),
|
||||
});
|
||||
}
|
||||
remaining = 0;
|
||||
}
|
||||
}
|
||||
// todo(aibrahim): handle input images; resize
|
||||
FunctionCallOutputContentItem::InputImage { image_url } => {
|
||||
out.push(FunctionCallOutputContentItem::InputImage {
|
||||
image_url: image_url.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if omitted_text_items > 0 {
|
||||
out.push(FunctionCallOutputContentItem::InputText {
|
||||
text: format!("[omitted {omitted_text_items} text items ...]"),
|
||||
});
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
pub(crate) fn format_output_for_model_body(
|
||||
content: &str,
|
||||
limit_bytes: usize,
|
||||
limit_lines: usize,
|
||||
) -> String {
|
||||
// Head+tail truncation for the model: show the beginning and end with an elision.
|
||||
// Clients still receive full streams; only this formatted summary is capped.
|
||||
let total_lines = content.lines().count();
|
||||
if content.len() <= limit_bytes && total_lines <= limit_lines {
|
||||
return content.to_string();
|
||||
}
|
||||
let output = truncate_formatted_exec_output(content, total_lines, limit_bytes, limit_lines);
|
||||
format!("Total output lines: {total_lines}\n\n{output}")
|
||||
}
|
||||
|
||||
fn truncate_formatted_exec_output(
|
||||
content: &str,
|
||||
total_lines: usize,
|
||||
limit_bytes: usize,
|
||||
limit_lines: usize,
|
||||
) -> String {
|
||||
debug_panic_on_double_truncation(content);
|
||||
let head_lines: usize = limit_lines / 2;
|
||||
let tail_lines: usize = limit_lines - head_lines; // 128
|
||||
let head_bytes: usize = limit_bytes / 2;
|
||||
let segments: Vec<&str> = content.split_inclusive('\n').collect();
|
||||
let head_take = head_lines.min(segments.len());
|
||||
let tail_take = tail_lines.min(segments.len().saturating_sub(head_take));
|
||||
let omitted = segments.len().saturating_sub(head_take + tail_take);
|
||||
|
||||
let head_slice_end: usize = segments
|
||||
.iter()
|
||||
.take(head_take)
|
||||
.map(|segment| segment.len())
|
||||
.sum();
|
||||
let tail_slice_start: usize = if tail_take == 0 {
|
||||
content.len()
|
||||
} else {
|
||||
content.len()
|
||||
- segments
|
||||
.iter()
|
||||
.rev()
|
||||
.take(tail_take)
|
||||
.map(|segment| segment.len())
|
||||
.sum::<usize>()
|
||||
};
|
||||
let head_slice = &content[..head_slice_end];
|
||||
let tail_slice = &content[tail_slice_start..];
|
||||
let truncated_by_bytes = content.len() > limit_bytes;
|
||||
// this is a bit wrong. We are counting metadata lines and not just shell output lines.
|
||||
let marker = if omitted > 0 {
|
||||
Some(format!(
|
||||
"\n[... omitted {omitted} of {total_lines} lines ...]\n\n"
|
||||
))
|
||||
} else if truncated_by_bytes {
|
||||
Some(format!(
|
||||
"\n[... output truncated to fit {limit_bytes} bytes ...]\n\n"
|
||||
))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let marker_len = marker.as_ref().map_or(0, String::len);
|
||||
let base_head_budget = head_bytes.min(limit_bytes);
|
||||
let head_budget = base_head_budget.min(limit_bytes.saturating_sub(marker_len));
|
||||
let head_part = take_bytes_at_char_boundary(head_slice, head_budget);
|
||||
let mut result = String::with_capacity(limit_bytes.min(content.len()));
|
||||
|
||||
result.push_str(head_part);
|
||||
if let Some(marker_text) = marker.as_ref() {
|
||||
result.push_str(marker_text);
|
||||
}
|
||||
|
||||
let remaining = limit_bytes.saturating_sub(result.len());
|
||||
if remaining == 0 {
|
||||
return result;
|
||||
}
|
||||
|
||||
let tail_part = take_last_bytes_at_char_boundary(tail_slice, remaining);
|
||||
result.push_str(tail_part);
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
fn debug_panic_on_double_truncation(content: &str) {
|
||||
if content.contains("Total output lines:") && content.contains("omitted") {
|
||||
error_or_panic(format!(
|
||||
"FunctionCallOutput content was already truncated before ContextManager::record_items; this would cause double truncation {content}"
|
||||
));
|
||||
}
|
||||
}
|
||||
@@ -2,7 +2,8 @@ use crate::codex::ProcessedResponseItem;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::token_data::KnownPlan;
|
||||
use crate::token_data::PlanType;
|
||||
use crate::truncate::truncate_middle;
|
||||
use crate::truncate::TruncationPolicy;
|
||||
use crate::truncate::truncate_text;
|
||||
use chrono::DateTime;
|
||||
use chrono::Datelike;
|
||||
use chrono::Local;
|
||||
@@ -461,7 +462,10 @@ pub fn get_error_message_ui(e: &CodexErr) -> String {
|
||||
_ => e.to_string(),
|
||||
};
|
||||
|
||||
truncate_middle(&message, ERROR_MESSAGE_UI_MAX_BYTES).0
|
||||
truncate_text(
|
||||
&message,
|
||||
TruncationPolicy::Bytes(ERROR_MESSAGE_UI_MAX_BYTES),
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -189,16 +189,20 @@ async fn exec_windows_sandbox(
|
||||
};
|
||||
|
||||
let sandbox_cwd = cwd.clone();
|
||||
let logs_base_dir = find_codex_home().ok();
|
||||
let codex_home = find_codex_home().map_err(|err| {
|
||||
CodexErr::Io(io::Error::other(format!(
|
||||
"windows sandbox: failed to resolve codex_home: {err}"
|
||||
)))
|
||||
})?;
|
||||
let spawn_res = tokio::task::spawn_blocking(move || {
|
||||
run_windows_sandbox_capture(
|
||||
policy_str,
|
||||
&sandbox_cwd,
|
||||
codex_home.as_ref(),
|
||||
command,
|
||||
&cwd,
|
||||
env,
|
||||
timeout_ms,
|
||||
logs_base_dir.as_deref(),
|
||||
)
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -27,6 +27,8 @@ pub enum Stage {
|
||||
/// Unique features toggled via configuration.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum Feature {
|
||||
/// Create a ghost commit at each turn.
|
||||
GhostCommit,
|
||||
/// Use the single unified PTY-backed exec tool.
|
||||
UnifiedExec,
|
||||
/// Use the shell command tool that takes `command` as a single string of
|
||||
@@ -42,10 +44,14 @@ pub enum Feature {
|
||||
WebSearchRequest,
|
||||
/// Enable the model-based risk assessments for sandboxed commands.
|
||||
SandboxCommandAssessment,
|
||||
/// Create a ghost commit at each turn.
|
||||
GhostCommit,
|
||||
/// Enable Windows sandbox (restricted token) on Windows.
|
||||
WindowsSandbox,
|
||||
/// Remote compaction enabled (only for ChatGPT auth)
|
||||
RemoteCompaction,
|
||||
/// Enable the default shell tool.
|
||||
ShellTool,
|
||||
/// Allow model to call multiple tools in parallel (only for models supporting it).
|
||||
ParallelToolCalls,
|
||||
}
|
||||
|
||||
impl Feature {
|
||||
@@ -247,6 +253,14 @@ pub struct FeatureSpec {
|
||||
}
|
||||
|
||||
pub const FEATURES: &[FeatureSpec] = &[
|
||||
// Stable features.
|
||||
FeatureSpec {
|
||||
id: Feature::GhostCommit,
|
||||
key: "undo",
|
||||
stage: Stage::Stable,
|
||||
default_enabled: true,
|
||||
},
|
||||
// Unstable features.
|
||||
FeatureSpec {
|
||||
id: Feature::UnifiedExec,
|
||||
key: "unified_exec",
|
||||
@@ -289,16 +303,28 @@ pub const FEATURES: &[FeatureSpec] = &[
|
||||
stage: Stage::Experimental,
|
||||
default_enabled: false,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::GhostCommit,
|
||||
key: "ghost_commit",
|
||||
stage: Stage::Experimental,
|
||||
default_enabled: true,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::WindowsSandbox,
|
||||
key: "enable_experimental_windows_sandbox",
|
||||
stage: Stage::Experimental,
|
||||
default_enabled: false,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::RemoteCompaction,
|
||||
key: "remote_compaction",
|
||||
stage: Stage::Experimental,
|
||||
default_enabled: false,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::ParallelToolCalls,
|
||||
key: "parallel",
|
||||
stage: Stage::Experimental,
|
||||
default_enabled: false,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::ShellTool,
|
||||
key: "shell_tool",
|
||||
stage: Stage::Stable,
|
||||
default_enabled: true,
|
||||
},
|
||||
];
|
||||
|
||||
@@ -13,6 +13,7 @@ mod client;
|
||||
mod client_common;
|
||||
pub mod codex;
|
||||
mod codex_conversation;
|
||||
mod compact_remote;
|
||||
pub use codex_conversation::CodexConversation;
|
||||
mod codex_delegate;
|
||||
mod command_safety;
|
||||
@@ -34,14 +35,18 @@ mod mcp_tool_call;
|
||||
mod message_history;
|
||||
mod model_provider_info;
|
||||
pub mod parse_command;
|
||||
pub mod powershell;
|
||||
mod response_processing;
|
||||
pub mod sandboxing;
|
||||
pub mod token_data;
|
||||
mod truncate;
|
||||
mod unified_exec;
|
||||
mod user_instructions;
|
||||
pub use model_provider_info::BUILT_IN_OSS_MODEL_PROVIDER_ID;
|
||||
pub use model_provider_info::DEFAULT_LMSTUDIO_PORT;
|
||||
pub use model_provider_info::DEFAULT_OLLAMA_PORT;
|
||||
pub use model_provider_info::LMSTUDIO_OSS_PROVIDER_ID;
|
||||
pub use model_provider_info::ModelProviderInfo;
|
||||
pub use model_provider_info::OLLAMA_OSS_PROVIDER_ID;
|
||||
pub use model_provider_info::WireApi;
|
||||
pub use model_provider_info::built_in_model_providers;
|
||||
pub use model_provider_info::create_oss_provider_with_base_url;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -4,6 +4,7 @@ use codex_protocol::config_types::Verbosity;
|
||||
use crate::config::types::ReasoningSummaryFormat;
|
||||
use crate::tools::handlers::apply_patch::ApplyPatchToolType;
|
||||
use crate::tools::spec::ConfigShellToolType;
|
||||
use crate::truncate::TruncationPolicy;
|
||||
|
||||
/// The `instructions` field in the payload sent to a model should always start
|
||||
/// with this content.
|
||||
@@ -66,6 +67,8 @@ pub struct ModelFamily {
|
||||
|
||||
/// Preferred shell tool type for this model family when features do not override it.
|
||||
pub shell_type: ConfigShellToolType,
|
||||
|
||||
pub truncation_policy: TruncationPolicy,
|
||||
}
|
||||
|
||||
macro_rules! model_family {
|
||||
@@ -89,6 +92,7 @@ macro_rules! model_family {
|
||||
shell_type: ConfigShellToolType::Default,
|
||||
default_verbosity: None,
|
||||
default_reasoning_effort: None,
|
||||
truncation_policy: TruncationPolicy::Bytes(10_000),
|
||||
};
|
||||
|
||||
// apply overrides
|
||||
@@ -132,7 +136,7 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
|
||||
model_family!(slug, "gpt-4o", needs_special_apply_patch_instructions: true)
|
||||
} else if slug.starts_with("gpt-3.5") {
|
||||
model_family!(slug, "gpt-3.5", needs_special_apply_patch_instructions: true)
|
||||
} else if slug.starts_with("test-gpt-5-codex") {
|
||||
} else if slug.starts_with("test-gpt-5") {
|
||||
model_family!(
|
||||
slug, slug,
|
||||
supports_reasoning_summaries: true,
|
||||
@@ -145,7 +149,9 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
|
||||
"test_sync_tool".to_string(),
|
||||
],
|
||||
supports_parallel_tool_calls: true,
|
||||
shell_type: ConfigShellToolType::ShellCommand,
|
||||
support_verbosity: true,
|
||||
truncation_policy: TruncationPolicy::Tokens(10_000),
|
||||
)
|
||||
|
||||
// Internal models.
|
||||
@@ -161,8 +167,10 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
|
||||
"list_dir".to_string(),
|
||||
"read_file".to_string(),
|
||||
],
|
||||
shell_type: ConfigShellToolType::ShellCommand,
|
||||
supports_parallel_tool_calls: true,
|
||||
support_verbosity: true,
|
||||
truncation_policy: TruncationPolicy::Tokens(10_000),
|
||||
)
|
||||
|
||||
// Production models.
|
||||
@@ -176,7 +184,10 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
|
||||
reasoning_summary_format: ReasoningSummaryFormat::Experimental,
|
||||
base_instructions: GPT_5_CODEX_INSTRUCTIONS.to_string(),
|
||||
apply_patch_tool_type: Some(ApplyPatchToolType::Freeform),
|
||||
shell_type: ConfigShellToolType::ShellCommand,
|
||||
supports_parallel_tool_calls: true,
|
||||
support_verbosity: false,
|
||||
truncation_policy: TruncationPolicy::Tokens(10_000),
|
||||
)
|
||||
} else if slug.starts_with("gpt-5.1") {
|
||||
model_family!(
|
||||
@@ -187,13 +198,18 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
|
||||
default_verbosity: Some(Verbosity::Low),
|
||||
base_instructions: GPT_5_1_INSTRUCTIONS.to_string(),
|
||||
default_reasoning_effort: Some(ReasoningEffort::Medium),
|
||||
truncation_policy: TruncationPolicy::Bytes(10_000),
|
||||
shell_type: ConfigShellToolType::ShellCommand,
|
||||
supports_parallel_tool_calls: true,
|
||||
)
|
||||
} else if slug.starts_with("gpt-5") {
|
||||
model_family!(
|
||||
slug, "gpt-5",
|
||||
supports_reasoning_summaries: true,
|
||||
needs_special_apply_patch_instructions: true,
|
||||
shell_type: ConfigShellToolType::ShellCommand,
|
||||
support_verbosity: true,
|
||||
truncation_policy: TruncationPolicy::Bytes(10_000),
|
||||
)
|
||||
} else {
|
||||
None
|
||||
@@ -216,5 +232,6 @@ pub fn derive_default_model_family(model: &str) -> ModelFamily {
|
||||
shell_type: ConfigShellToolType::Default,
|
||||
default_verbosity: None,
|
||||
default_reasoning_effort: None,
|
||||
truncation_policy: TruncationPolicy::Bytes(10_000),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
use crate::CodexAuth;
|
||||
use crate::default_client::CodexHttpClient;
|
||||
use crate::default_client::CodexRequestBuilder;
|
||||
use crate::error::CodexErr;
|
||||
use codex_app_server_protocol::AuthMode;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
@@ -109,21 +110,7 @@ impl ModelProviderInfo {
|
||||
client: &'a CodexHttpClient,
|
||||
auth: &Option<CodexAuth>,
|
||||
) -> crate::error::Result<CodexRequestBuilder> {
|
||||
let effective_auth = if let Some(secret_key) = &self.experimental_bearer_token {
|
||||
Some(CodexAuth::from_api_key(secret_key))
|
||||
} else {
|
||||
match self.api_key() {
|
||||
Ok(Some(key)) => Some(CodexAuth::from_api_key(&key)),
|
||||
Ok(None) => auth.clone(),
|
||||
Err(err) => {
|
||||
if auth.is_some() {
|
||||
auth.clone()
|
||||
} else {
|
||||
return Err(err);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
let effective_auth = self.effective_auth(auth)?;
|
||||
|
||||
let url = self.get_full_url(&effective_auth);
|
||||
|
||||
@@ -136,6 +123,51 @@ impl ModelProviderInfo {
|
||||
Ok(self.apply_http_headers(builder))
|
||||
}
|
||||
|
||||
pub async fn create_compact_request_builder<'a>(
|
||||
&'a self,
|
||||
client: &'a CodexHttpClient,
|
||||
auth: &Option<CodexAuth>,
|
||||
) -> crate::error::Result<CodexRequestBuilder> {
|
||||
if self.wire_api != WireApi::Responses {
|
||||
return Err(CodexErr::UnsupportedOperation(
|
||||
"Compaction endpoint requires Responses API providers".to_string(),
|
||||
));
|
||||
}
|
||||
let effective_auth = self.effective_auth(auth)?;
|
||||
|
||||
let url = self.get_compact_url(&effective_auth).ok_or_else(|| {
|
||||
CodexErr::UnsupportedOperation(
|
||||
"Compaction endpoint requires Responses API providers".to_string(),
|
||||
)
|
||||
})?;
|
||||
|
||||
let mut builder = client.post(url);
|
||||
|
||||
if let Some(auth) = effective_auth.as_ref() {
|
||||
builder = builder.bearer_auth(auth.get_token().await?);
|
||||
}
|
||||
|
||||
Ok(self.apply_http_headers(builder))
|
||||
}
|
||||
|
||||
fn effective_auth(&self, auth: &Option<CodexAuth>) -> crate::error::Result<Option<CodexAuth>> {
|
||||
if let Some(secret_key) = &self.experimental_bearer_token {
|
||||
return Ok(Some(CodexAuth::from_api_key(secret_key)));
|
||||
}
|
||||
|
||||
match self.api_key() {
|
||||
Ok(Some(key)) => Ok(Some(CodexAuth::from_api_key(&key))),
|
||||
Ok(None) => Ok(auth.clone()),
|
||||
Err(err) => {
|
||||
if auth.is_some() {
|
||||
Ok(auth.clone())
|
||||
} else {
|
||||
Err(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_query_string(&self) -> String {
|
||||
self.query_params
|
||||
.as_ref()
|
||||
@@ -173,6 +205,18 @@ impl ModelProviderInfo {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn get_compact_url(&self, auth: &Option<CodexAuth>) -> Option<String> {
|
||||
if self.wire_api != WireApi::Responses {
|
||||
return None;
|
||||
}
|
||||
let full = self.get_full_url(auth);
|
||||
if let Some((path, query)) = full.split_once('?') {
|
||||
Some(format!("{path}/compact?{query}"))
|
||||
} else {
|
||||
Some(format!("{full}/compact"))
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn is_azure_responses_endpoint(&self) -> bool {
|
||||
if self.wire_api != WireApi::Responses {
|
||||
return false;
|
||||
@@ -258,9 +302,11 @@ impl ModelProviderInfo {
|
||||
}
|
||||
}
|
||||
|
||||
const DEFAULT_OLLAMA_PORT: u32 = 11434;
|
||||
pub const DEFAULT_LMSTUDIO_PORT: u16 = 1234;
|
||||
pub const DEFAULT_OLLAMA_PORT: u16 = 11434;
|
||||
|
||||
pub const BUILT_IN_OSS_MODEL_PROVIDER_ID: &str = "oss";
|
||||
pub const LMSTUDIO_OSS_PROVIDER_ID: &str = "lmstudio";
|
||||
pub const OLLAMA_OSS_PROVIDER_ID: &str = "ollama";
|
||||
|
||||
/// Built-in default provider list.
|
||||
pub fn built_in_model_providers() -> HashMap<String, ModelProviderInfo> {
|
||||
@@ -311,14 +357,21 @@ pub fn built_in_model_providers() -> HashMap<String, ModelProviderInfo> {
|
||||
requires_openai_auth: true,
|
||||
},
|
||||
),
|
||||
(BUILT_IN_OSS_MODEL_PROVIDER_ID, create_oss_provider()),
|
||||
(
|
||||
OLLAMA_OSS_PROVIDER_ID,
|
||||
create_oss_provider(DEFAULT_OLLAMA_PORT, WireApi::Chat),
|
||||
),
|
||||
(
|
||||
LMSTUDIO_OSS_PROVIDER_ID,
|
||||
create_oss_provider(DEFAULT_LMSTUDIO_PORT, WireApi::Responses),
|
||||
),
|
||||
]
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k.to_string(), v))
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn create_oss_provider() -> ModelProviderInfo {
|
||||
pub fn create_oss_provider(default_provider_port: u16, wire_api: WireApi) -> ModelProviderInfo {
|
||||
// These CODEX_OSS_ environment variables are experimental: we may
|
||||
// switch to reading values from config.toml instead.
|
||||
let codex_oss_base_url = match std::env::var("CODEX_OSS_BASE_URL")
|
||||
@@ -331,22 +384,21 @@ pub fn create_oss_provider() -> ModelProviderInfo {
|
||||
port = std::env::var("CODEX_OSS_PORT")
|
||||
.ok()
|
||||
.filter(|v| !v.trim().is_empty())
|
||||
.and_then(|v| v.parse::<u32>().ok())
|
||||
.unwrap_or(DEFAULT_OLLAMA_PORT)
|
||||
.and_then(|v| v.parse::<u16>().ok())
|
||||
.unwrap_or(default_provider_port)
|
||||
),
|
||||
};
|
||||
|
||||
create_oss_provider_with_base_url(&codex_oss_base_url)
|
||||
create_oss_provider_with_base_url(&codex_oss_base_url, wire_api)
|
||||
}
|
||||
|
||||
pub fn create_oss_provider_with_base_url(base_url: &str) -> ModelProviderInfo {
|
||||
pub fn create_oss_provider_with_base_url(base_url: &str, wire_api: WireApi) -> ModelProviderInfo {
|
||||
ModelProviderInfo {
|
||||
name: "gpt-oss".into(),
|
||||
base_url: Some(base_url.into()),
|
||||
env_key: None,
|
||||
env_key_instructions: None,
|
||||
experimental_bearer_token: None,
|
||||
wire_api: WireApi::Chat,
|
||||
wire_api,
|
||||
query_params: None,
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
|
||||
@@ -1,16 +1,22 @@
|
||||
use crate::bash::extract_bash_command;
|
||||
use crate::bash::try_parse_shell;
|
||||
use crate::bash::try_parse_word_only_commands_sequence;
|
||||
use crate::powershell::extract_powershell_command;
|
||||
use codex_protocol::parse_command::ParsedCommand;
|
||||
use shlex::split as shlex_split;
|
||||
use shlex::try_join as shlex_try_join;
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn shlex_join(tokens: &[String]) -> String {
|
||||
pub fn shlex_join(tokens: &[String]) -> String {
|
||||
shlex_try_join(tokens.iter().map(String::as_str))
|
||||
.unwrap_or_else(|_| "<command included NUL byte>".to_string())
|
||||
}
|
||||
|
||||
/// Extracts the shell and script from a command, regardless of platform
|
||||
pub fn extract_shell_command(command: &[String]) -> Option<(&str, &str)> {
|
||||
extract_bash_command(command).or_else(|| extract_powershell_command(command))
|
||||
}
|
||||
|
||||
/// DO NOT REVIEW THIS CODE BY HAND
|
||||
/// This parsing code is quite complex and not easy to hand-modify.
|
||||
/// The easiest way to iterate is to add unit tests and have Codex fix the implementation.
|
||||
@@ -877,6 +883,42 @@ mod tests {
|
||||
}],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn powershell_command_is_stripped() {
|
||||
assert_parsed(
|
||||
&vec_str(&["powershell", "-Command", "Get-ChildItem"]),
|
||||
vec![ParsedCommand::Unknown {
|
||||
cmd: "Get-ChildItem".to_string(),
|
||||
}],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pwsh_with_noprofile_and_c_alias_is_stripped() {
|
||||
assert_parsed(
|
||||
&vec_str(&["pwsh", "-NoProfile", "-c", "Write-Host hi"]),
|
||||
vec![ParsedCommand::Unknown {
|
||||
cmd: "Write-Host hi".to_string(),
|
||||
}],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn powershell_with_path_is_stripped() {
|
||||
let command = if cfg!(windows) {
|
||||
"C:\\windows\\System32\\WindowsPowerShell\\v1.0\\powershell.exe"
|
||||
} else {
|
||||
"/usr/local/bin/powershell.exe"
|
||||
};
|
||||
|
||||
assert_parsed(
|
||||
&vec_str(&[command, "-NoProfile", "-c", "Write-Host hi"]),
|
||||
vec![ParsedCommand::Unknown {
|
||||
cmd: "Write-Host hi".to_string(),
|
||||
}],
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_command_impl(command: &[String]) -> Vec<ParsedCommand> {
|
||||
@@ -884,6 +926,12 @@ pub fn parse_command_impl(command: &[String]) -> Vec<ParsedCommand> {
|
||||
return commands;
|
||||
}
|
||||
|
||||
if let Some((_, script)) = extract_powershell_command(command) {
|
||||
return vec![ParsedCommand::Unknown {
|
||||
cmd: script.to_string(),
|
||||
}];
|
||||
}
|
||||
|
||||
let normalized = normalize_tokens(command);
|
||||
|
||||
let parts = if contains_connectors(&normalized) {
|
||||
@@ -1190,6 +1238,7 @@ fn parse_find_query_and_path(tail: &[String]) -> (Option<String>, Option<String>
|
||||
}
|
||||
|
||||
fn parse_shell_lc_commands(original: &[String]) -> Option<Vec<ParsedCommand>> {
|
||||
// Only handle bash/zsh here; PowerShell is stripped separately without bash parsing.
|
||||
let (_, script) = extract_bash_command(original)?;
|
||||
|
||||
if let Some(tree) = try_parse_shell(script)
|
||||
|
||||
93
codex-rs/core/src/powershell.rs
Normal file
93
codex-rs/core/src/powershell.rs
Normal file
@@ -0,0 +1,93 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
use crate::shell::ShellType;
|
||||
use crate::shell::detect_shell_type;
|
||||
|
||||
const POWERSHELL_FLAGS: &[&str] = &["-nologo", "-noprofile", "-command", "-c"];
|
||||
|
||||
/// Extract the PowerShell script body from an invocation such as:
|
||||
///
|
||||
/// - ["pwsh", "-NoProfile", "-Command", "Get-ChildItem -Recurse | Select-String foo"]
|
||||
/// - ["powershell.exe", "-Command", "Write-Host hi"]
|
||||
/// - ["powershell", "-NoLogo", "-NoProfile", "-Command", "...script..."]
|
||||
///
|
||||
/// Returns (`shell`, `script`) when the first arg is a PowerShell executable and a
|
||||
/// `-Command` (or `-c`) flag is present followed by a script string.
|
||||
pub fn extract_powershell_command(command: &[String]) -> Option<(&str, &str)> {
|
||||
if command.len() < 3 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let shell = &command[0];
|
||||
if detect_shell_type(&PathBuf::from(shell)) != Some(ShellType::PowerShell) {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Find the first occurrence of -Command (accept common short alias -c as well)
|
||||
let mut i = 1usize;
|
||||
while i + 1 < command.len() {
|
||||
let flag = &command[i];
|
||||
// Reject unknown flags
|
||||
if !POWERSHELL_FLAGS.contains(&flag.to_ascii_lowercase().as_str()) {
|
||||
return None;
|
||||
}
|
||||
if flag.eq_ignore_ascii_case("-Command") || flag.eq_ignore_ascii_case("-c") {
|
||||
let script = &command[i + 1];
|
||||
return Some((shell, script.as_str()));
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::extract_powershell_command;
|
||||
|
||||
#[test]
|
||||
fn extracts_basic_powershell_command() {
|
||||
let cmd = vec![
|
||||
"powershell".to_string(),
|
||||
"-Command".to_string(),
|
||||
"Write-Host hi".to_string(),
|
||||
];
|
||||
let (_shell, script) = extract_powershell_command(&cmd).expect("extract");
|
||||
assert_eq!(script, "Write-Host hi");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extracts_lowercase_flags() {
|
||||
let cmd = vec![
|
||||
"powershell".to_string(),
|
||||
"-nologo".to_string(),
|
||||
"-command".to_string(),
|
||||
"Write-Host hi".to_string(),
|
||||
];
|
||||
let (_shell, script) = extract_powershell_command(&cmd).expect("extract");
|
||||
assert_eq!(script, "Write-Host hi");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extracts_full_path_powershell_command() {
|
||||
let command = if cfg!(windows) {
|
||||
"C:\\windows\\System32\\WindowsPowerShell\\v1.0\\powershell.exe".to_string()
|
||||
} else {
|
||||
"/usr/local/bin/powershell.exe".to_string()
|
||||
};
|
||||
let cmd = vec![command, "-Command".to_string(), "Write-Host hi".to_string()];
|
||||
let (_shell, script) = extract_powershell_command(&cmd).expect("extract");
|
||||
assert_eq!(script, "Write-Host hi");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extracts_with_noprofile_and_alias() {
|
||||
let cmd = vec![
|
||||
"pwsh".to_string(),
|
||||
"-NoProfile".to_string(),
|
||||
"-c".to_string(),
|
||||
"Get-ChildItem | Select-String foo".to_string(),
|
||||
];
|
||||
let (_shell, script) = extract_powershell_command(&cmd).expect("extract");
|
||||
assert_eq!(script, "Get-ChildItem | Select-String foo");
|
||||
}
|
||||
}
|
||||
@@ -27,7 +27,8 @@ pub(crate) fn should_persist_response_item(item: &ResponseItem) -> bool {
|
||||
| ResponseItem::CustomToolCall { .. }
|
||||
| ResponseItem::CustomToolCallOutput { .. }
|
||||
| ResponseItem::WebSearchCall { .. }
|
||||
| ResponseItem::GhostSnapshot { .. } => true,
|
||||
| ResponseItem::GhostSnapshot { .. }
|
||||
| ResponseItem::CompactionSummary { .. } => true,
|
||||
ResponseItem::Other => false,
|
||||
}
|
||||
}
|
||||
@@ -72,6 +73,8 @@ pub(crate) fn should_persist_event_msg(ev: &EventMsg) -> bool {
|
||||
| EventMsg::GetHistoryEntryResponse(_)
|
||||
| EventMsg::UndoStarted(_)
|
||||
| EventMsg::McpListToolsResponse(_)
|
||||
| EventMsg::McpStartupUpdate(_)
|
||||
| EventMsg::McpStartupComplete(_)
|
||||
| EventMsg::ListCustomPromptsResponse(_)
|
||||
| EventMsg::PlanUpdate(_)
|
||||
| EventMsg::ShutdownComplete
|
||||
|
||||
@@ -814,6 +814,7 @@ async fn test_tail_skips_trailing_non_responses() -> Result<()> {
|
||||
timestamp: format!("{ts}-compacted"),
|
||||
item: RolloutItem::Compacted(CompactedItem {
|
||||
message: "compacted".into(),
|
||||
replacement_history: None,
|
||||
}),
|
||||
};
|
||||
writeln!(file, "{}", serde_json::to_string(&compacted_line)?)?;
|
||||
|
||||
@@ -61,10 +61,7 @@ impl Shell {
|
||||
]
|
||||
}
|
||||
Shell::PowerShell(ps) => {
|
||||
let mut args = vec![
|
||||
ps.shell_path.to_string_lossy().to_string(),
|
||||
"-NoLogo".to_string(),
|
||||
];
|
||||
let mut args = vec![ps.shell_path.to_string_lossy().to_string()];
|
||||
if !use_login_shell {
|
||||
args.push("-NoProfile".to_string());
|
||||
}
|
||||
@@ -192,7 +189,6 @@ pub fn detect_shell_type(shell_path: &PathBuf) -> Option<ShellType> {
|
||||
Some("powershell") => Some(ShellType::PowerShell),
|
||||
_ => {
|
||||
let shell_name = shell_path.file_stem();
|
||||
|
||||
if let Some(shell_name) = shell_name
|
||||
&& shell_name != shell_path
|
||||
{
|
||||
@@ -251,6 +247,14 @@ mod detect_shell_type_tests {
|
||||
detect_shell_type(&PathBuf::from("powershell.exe")),
|
||||
Some(ShellType::PowerShell)
|
||||
);
|
||||
assert_eq!(
|
||||
detect_shell_type(&PathBuf::from(if cfg!(windows) {
|
||||
"C:\\windows\\System32\\WindowsPowerShell\\v1.0\\powershell.exe"
|
||||
} else {
|
||||
"/usr/local/bin/pwsh"
|
||||
})),
|
||||
Some(ShellType::PowerShell)
|
||||
);
|
||||
assert_eq!(
|
||||
detect_shell_type(&PathBuf::from("pwsh.exe")),
|
||||
Some(ShellType::PowerShell)
|
||||
|
||||
@@ -8,9 +8,12 @@ use crate::unified_exec::UnifiedExecSessionManager;
|
||||
use crate::user_notification::UserNotifier;
|
||||
use codex_otel::otel_event_manager::OtelEventManager;
|
||||
use tokio::sync::Mutex;
|
||||
use tokio::sync::RwLock;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
pub(crate) struct SessionServices {
|
||||
pub(crate) mcp_connection_manager: McpConnectionManager,
|
||||
pub(crate) mcp_connection_manager: Arc<RwLock<McpConnectionManager>>,
|
||||
pub(crate) mcp_startup_cancellation_token: CancellationToken,
|
||||
pub(crate) unified_exec_manager: UnifiedExecSessionManager,
|
||||
pub(crate) notifier: UserNotifier,
|
||||
pub(crate) rollout: Mutex<Option<RolloutRecorder>>,
|
||||
|
||||
@@ -7,6 +7,7 @@ use crate::context_manager::ContextManager;
|
||||
use crate::protocol::RateLimitSnapshot;
|
||||
use crate::protocol::TokenUsage;
|
||||
use crate::protocol::TokenUsageInfo;
|
||||
use crate::truncate::TruncationPolicy;
|
||||
|
||||
/// Persistent, session-scoped state previously stored directly on `Session`.
|
||||
pub(crate) struct SessionState {
|
||||
@@ -18,20 +19,21 @@ pub(crate) struct SessionState {
|
||||
impl SessionState {
|
||||
/// Create a new session state mirroring previous `State::default()` semantics.
|
||||
pub(crate) fn new(session_configuration: SessionConfiguration) -> Self {
|
||||
let history = ContextManager::new();
|
||||
Self {
|
||||
session_configuration,
|
||||
history: ContextManager::new(),
|
||||
history,
|
||||
latest_rate_limits: None,
|
||||
}
|
||||
}
|
||||
|
||||
// History helpers
|
||||
pub(crate) fn record_items<I>(&mut self, items: I)
|
||||
pub(crate) fn record_items<I>(&mut self, items: I, policy: TruncationPolicy)
|
||||
where
|
||||
I: IntoIterator,
|
||||
I::Item: std::ops::Deref<Target = ResponseItem>,
|
||||
{
|
||||
self.history.record_items(items)
|
||||
self.history.record_items(items, policy);
|
||||
}
|
||||
|
||||
pub(crate) fn clone_history(&self) -> ContextManager {
|
||||
|
||||
@@ -1,15 +1,14 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
use crate::codex::TurnContext;
|
||||
use crate::compact;
|
||||
use crate::state::TaskKind;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
|
||||
use super::SessionTask;
|
||||
use super::SessionTaskContext;
|
||||
use crate::codex::TurnContext;
|
||||
use crate::features::Feature;
|
||||
use crate::state::TaskKind;
|
||||
use async_trait::async_trait;
|
||||
use codex_app_server_protocol::AuthMode;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
#[derive(Clone, Copy, Default)]
|
||||
pub(crate) struct CompactTask;
|
||||
@@ -27,6 +26,17 @@ impl SessionTask for CompactTask {
|
||||
input: Vec<UserInput>,
|
||||
_cancellation_token: CancellationToken,
|
||||
) -> Option<String> {
|
||||
compact::run_compact_task(session.clone_session(), ctx, input).await
|
||||
let session = session.clone_session();
|
||||
if session
|
||||
.services
|
||||
.auth_manager
|
||||
.auth()
|
||||
.is_some_and(|auth| auth.mode == AuthMode::ChatGPT)
|
||||
&& session.enabled(Feature::RemoteCompaction).await
|
||||
{
|
||||
crate::compact_remote::run_remote_compact_task(session, ctx, input).await
|
||||
} else {
|
||||
crate::compact::run_compact_task(session, ctx, input).await
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -65,22 +65,24 @@ impl SessionTask for UserShellCommandTask {
|
||||
// allows commands that use shell features (pipes, &&, redirects, etc.).
|
||||
// We do not source rc files or otherwise reformat the script.
|
||||
let use_login_shell = true;
|
||||
let shell_invocation = session
|
||||
let command = session
|
||||
.user_shell()
|
||||
.derive_exec_args(&self.command, use_login_shell);
|
||||
|
||||
let call_id = Uuid::new_v4().to_string();
|
||||
let raw_command = self.command.clone();
|
||||
let cwd = turn_context.cwd.clone();
|
||||
|
||||
let parsed_cmd = parse_command(&shell_invocation);
|
||||
let parsed_cmd = parse_command(&command);
|
||||
session
|
||||
.send_event(
|
||||
turn_context.as_ref(),
|
||||
EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
|
||||
call_id: call_id.clone(),
|
||||
command: shell_invocation.clone(),
|
||||
cwd: turn_context.cwd.clone(),
|
||||
parsed_cmd,
|
||||
turn_id: turn_context.sub_id.clone(),
|
||||
command: command.clone(),
|
||||
cwd: cwd.clone(),
|
||||
parsed_cmd: parsed_cmd.clone(),
|
||||
source: ExecCommandSource::UserShell,
|
||||
interaction_input: None,
|
||||
}),
|
||||
@@ -88,8 +90,8 @@ impl SessionTask for UserShellCommandTask {
|
||||
.await;
|
||||
|
||||
let exec_env = ExecEnv {
|
||||
command: shell_invocation,
|
||||
cwd: turn_context.cwd.clone(),
|
||||
command: command.clone(),
|
||||
cwd: cwd.clone(),
|
||||
env: create_env(&turn_context.shell_environment_policy),
|
||||
timeout_ms: None,
|
||||
sandbox: SandboxType::None,
|
||||
@@ -129,6 +131,12 @@ impl SessionTask for UserShellCommandTask {
|
||||
turn_context.as_ref(),
|
||||
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
|
||||
call_id,
|
||||
turn_id: turn_context.sub_id.clone(),
|
||||
command: command.clone(),
|
||||
cwd: cwd.clone(),
|
||||
parsed_cmd: parsed_cmd.clone(),
|
||||
source: ExecCommandSource::UserShell,
|
||||
interaction_input: None,
|
||||
stdout: String::new(),
|
||||
stderr: aborted_message.clone(),
|
||||
aggregated_output: aborted_message.clone(),
|
||||
@@ -145,6 +153,12 @@ impl SessionTask for UserShellCommandTask {
|
||||
turn_context.as_ref(),
|
||||
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
|
||||
call_id: call_id.clone(),
|
||||
turn_id: turn_context.sub_id.clone(),
|
||||
command: command.clone(),
|
||||
cwd: cwd.clone(),
|
||||
parsed_cmd: parsed_cmd.clone(),
|
||||
source: ExecCommandSource::UserShell,
|
||||
interaction_input: None,
|
||||
stdout: output.stdout.text.clone(),
|
||||
stderr: output.stderr.text.clone(),
|
||||
aggregated_output: output.aggregated_output.text.clone(),
|
||||
@@ -176,6 +190,12 @@ impl SessionTask for UserShellCommandTask {
|
||||
turn_context.as_ref(),
|
||||
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
|
||||
call_id,
|
||||
turn_id: turn_context.sub_id.clone(),
|
||||
command,
|
||||
cwd,
|
||||
parsed_cmd,
|
||||
source: ExecCommandSource::UserShell,
|
||||
interaction_input: None,
|
||||
stdout: exec_output.stdout.text.clone(),
|
||||
stderr: exec_output.stderr.text.clone(),
|
||||
aggregated_output: exec_output.aggregated_output.text.clone(),
|
||||
|
||||
@@ -15,6 +15,7 @@ use crate::protocol::PatchApplyEndEvent;
|
||||
use crate::protocol::TurnDiffEvent;
|
||||
use crate::tools::context::SharedTurnDiffTracker;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
use codex_protocol::parse_command::ParsedCommand;
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
@@ -61,6 +62,7 @@ pub(crate) async fn emit_exec_command_begin(
|
||||
ctx: ToolEventCtx<'_>,
|
||||
command: &[String],
|
||||
cwd: &Path,
|
||||
parsed_cmd: &[ParsedCommand],
|
||||
source: ExecCommandSource,
|
||||
interaction_input: Option<String>,
|
||||
) {
|
||||
@@ -69,9 +71,10 @@ pub(crate) async fn emit_exec_command_begin(
|
||||
ctx.turn,
|
||||
EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
|
||||
call_id: ctx.call_id.to_string(),
|
||||
turn_id: ctx.turn.sub_id.clone(),
|
||||
command: command.to_vec(),
|
||||
cwd: cwd.to_path_buf(),
|
||||
parsed_cmd: parse_command(command),
|
||||
parsed_cmd: parsed_cmd.to_vec(),
|
||||
source,
|
||||
interaction_input,
|
||||
}),
|
||||
@@ -84,6 +87,7 @@ pub(crate) enum ToolEmitter {
|
||||
command: Vec<String>,
|
||||
cwd: PathBuf,
|
||||
source: ExecCommandSource,
|
||||
parsed_cmd: Vec<ParsedCommand>,
|
||||
},
|
||||
ApplyPatch {
|
||||
changes: HashMap<PathBuf, FileChange>,
|
||||
@@ -94,15 +98,18 @@ pub(crate) enum ToolEmitter {
|
||||
cwd: PathBuf,
|
||||
source: ExecCommandSource,
|
||||
interaction_input: Option<String>,
|
||||
parsed_cmd: Vec<ParsedCommand>,
|
||||
},
|
||||
}
|
||||
|
||||
impl ToolEmitter {
|
||||
pub fn shell(command: Vec<String>, cwd: PathBuf, source: ExecCommandSource) -> Self {
|
||||
let parsed_cmd = parse_command(&command);
|
||||
Self::Shell {
|
||||
command,
|
||||
cwd,
|
||||
source,
|
||||
parsed_cmd,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -119,11 +126,13 @@ impl ToolEmitter {
|
||||
source: ExecCommandSource,
|
||||
interaction_input: Option<String>,
|
||||
) -> Self {
|
||||
let parsed_cmd = parse_command(command);
|
||||
Self::UnifiedExec {
|
||||
command: command.to_vec(),
|
||||
cwd,
|
||||
source,
|
||||
interaction_input,
|
||||
parsed_cmd,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -134,44 +143,14 @@ impl ToolEmitter {
|
||||
command,
|
||||
cwd,
|
||||
source,
|
||||
parsed_cmd,
|
||||
},
|
||||
ToolEventStage::Begin,
|
||||
stage,
|
||||
) => {
|
||||
emit_exec_command_begin(ctx, command, cwd.as_path(), *source, None).await;
|
||||
}
|
||||
(Self::Shell { .. }, ToolEventStage::Success(output)) => {
|
||||
emit_exec_end(
|
||||
emit_exec_stage(
|
||||
ctx,
|
||||
output.stdout.text.clone(),
|
||||
output.stderr.text.clone(),
|
||||
output.aggregated_output.text.clone(),
|
||||
output.exit_code,
|
||||
output.duration,
|
||||
format_exec_output_str(&output),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
(Self::Shell { .. }, ToolEventStage::Failure(ToolEventFailure::Output(output))) => {
|
||||
emit_exec_end(
|
||||
ctx,
|
||||
output.stdout.text.clone(),
|
||||
output.stderr.text.clone(),
|
||||
output.aggregated_output.text.clone(),
|
||||
output.exit_code,
|
||||
output.duration,
|
||||
format_exec_output_str(&output),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
(Self::Shell { .. }, ToolEventStage::Failure(ToolEventFailure::Message(message))) => {
|
||||
emit_exec_end(
|
||||
ctx,
|
||||
String::new(),
|
||||
(*message).to_string(),
|
||||
(*message).to_string(),
|
||||
-1,
|
||||
Duration::ZERO,
|
||||
message.clone(),
|
||||
ExecCommandInput::new(command, cwd.as_path(), parsed_cmd, *source, None),
|
||||
stage,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
@@ -231,57 +210,20 @@ impl ToolEmitter {
|
||||
cwd,
|
||||
source,
|
||||
interaction_input,
|
||||
parsed_cmd,
|
||||
},
|
||||
ToolEventStage::Begin,
|
||||
stage,
|
||||
) => {
|
||||
emit_exec_command_begin(
|
||||
emit_exec_stage(
|
||||
ctx,
|
||||
command,
|
||||
cwd.as_path(),
|
||||
*source,
|
||||
interaction_input.clone(),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
(Self::UnifiedExec { .. }, ToolEventStage::Success(output)) => {
|
||||
emit_exec_end(
|
||||
ctx,
|
||||
output.stdout.text.clone(),
|
||||
output.stderr.text.clone(),
|
||||
output.aggregated_output.text.clone(),
|
||||
output.exit_code,
|
||||
output.duration,
|
||||
format_exec_output_str(&output),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
(
|
||||
Self::UnifiedExec { .. },
|
||||
ToolEventStage::Failure(ToolEventFailure::Output(output)),
|
||||
) => {
|
||||
emit_exec_end(
|
||||
ctx,
|
||||
output.stdout.text.clone(),
|
||||
output.stderr.text.clone(),
|
||||
output.aggregated_output.text.clone(),
|
||||
output.exit_code,
|
||||
output.duration,
|
||||
format_exec_output_str(&output),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
(
|
||||
Self::UnifiedExec { .. },
|
||||
ToolEventStage::Failure(ToolEventFailure::Message(message)),
|
||||
) => {
|
||||
emit_exec_end(
|
||||
ctx,
|
||||
String::new(),
|
||||
(*message).to_string(),
|
||||
(*message).to_string(),
|
||||
-1,
|
||||
Duration::ZERO,
|
||||
message.clone(),
|
||||
ExecCommandInput::new(
|
||||
command,
|
||||
cwd.as_path(),
|
||||
parsed_cmd,
|
||||
*source,
|
||||
interaction_input.as_deref(),
|
||||
),
|
||||
stage,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
@@ -340,26 +282,107 @@ impl ToolEmitter {
|
||||
}
|
||||
}
|
||||
|
||||
async fn emit_exec_end(
|
||||
ctx: ToolEventCtx<'_>,
|
||||
struct ExecCommandInput<'a> {
|
||||
command: &'a [String],
|
||||
cwd: &'a Path,
|
||||
parsed_cmd: &'a [ParsedCommand],
|
||||
source: ExecCommandSource,
|
||||
interaction_input: Option<&'a str>,
|
||||
}
|
||||
|
||||
impl<'a> ExecCommandInput<'a> {
|
||||
fn new(
|
||||
command: &'a [String],
|
||||
cwd: &'a Path,
|
||||
parsed_cmd: &'a [ParsedCommand],
|
||||
source: ExecCommandSource,
|
||||
interaction_input: Option<&'a str>,
|
||||
) -> Self {
|
||||
Self {
|
||||
command,
|
||||
cwd,
|
||||
parsed_cmd,
|
||||
source,
|
||||
interaction_input,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct ExecCommandResult {
|
||||
stdout: String,
|
||||
stderr: String,
|
||||
aggregated_output: String,
|
||||
exit_code: i32,
|
||||
duration: Duration,
|
||||
formatted_output: String,
|
||||
}
|
||||
|
||||
async fn emit_exec_stage(
|
||||
ctx: ToolEventCtx<'_>,
|
||||
exec_input: ExecCommandInput<'_>,
|
||||
stage: ToolEventStage,
|
||||
) {
|
||||
match stage {
|
||||
ToolEventStage::Begin => {
|
||||
emit_exec_command_begin(
|
||||
ctx,
|
||||
exec_input.command,
|
||||
exec_input.cwd,
|
||||
exec_input.parsed_cmd,
|
||||
exec_input.source,
|
||||
exec_input.interaction_input.map(str::to_owned),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
ToolEventStage::Success(output)
|
||||
| ToolEventStage::Failure(ToolEventFailure::Output(output)) => {
|
||||
let exec_result = ExecCommandResult {
|
||||
stdout: output.stdout.text.clone(),
|
||||
stderr: output.stderr.text.clone(),
|
||||
aggregated_output: output.aggregated_output.text.clone(),
|
||||
exit_code: output.exit_code,
|
||||
duration: output.duration,
|
||||
formatted_output: format_exec_output_str(&output),
|
||||
};
|
||||
emit_exec_end(ctx, exec_input, exec_result).await;
|
||||
}
|
||||
ToolEventStage::Failure(ToolEventFailure::Message(message)) => {
|
||||
let text = message.to_string();
|
||||
let exec_result = ExecCommandResult {
|
||||
stdout: String::new(),
|
||||
stderr: text.clone(),
|
||||
aggregated_output: text.clone(),
|
||||
exit_code: -1,
|
||||
duration: Duration::ZERO,
|
||||
formatted_output: text,
|
||||
};
|
||||
emit_exec_end(ctx, exec_input, exec_result).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn emit_exec_end(
|
||||
ctx: ToolEventCtx<'_>,
|
||||
exec_input: ExecCommandInput<'_>,
|
||||
exec_result: ExecCommandResult,
|
||||
) {
|
||||
ctx.session
|
||||
.send_event(
|
||||
ctx.turn,
|
||||
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
|
||||
call_id: ctx.call_id.to_string(),
|
||||
stdout,
|
||||
stderr,
|
||||
aggregated_output,
|
||||
exit_code,
|
||||
duration,
|
||||
formatted_output,
|
||||
turn_id: ctx.turn.sub_id.clone(),
|
||||
command: exec_input.command.to_vec(),
|
||||
cwd: exec_input.cwd.to_path_buf(),
|
||||
parsed_cmd: exec_input.parsed_cmd.to_vec(),
|
||||
source: exec_input.source,
|
||||
interaction_input: exec_input.interaction_input.map(str::to_owned),
|
||||
stdout: exec_result.stdout,
|
||||
stderr: exec_result.stderr,
|
||||
aggregated_output: exec_result.aggregated_output,
|
||||
exit_code: exec_result.exit_code,
|
||||
duration: exec_result.duration,
|
||||
formatted_output: exec_result.formatted_output,
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
|
||||
@@ -287,6 +287,8 @@ async fn handle_list_resources(
|
||||
let resources = session
|
||||
.services
|
||||
.mcp_connection_manager
|
||||
.read()
|
||||
.await
|
||||
.list_all_resources()
|
||||
.await;
|
||||
Ok(ListResourcesPayload::from_all_servers(resources))
|
||||
@@ -396,6 +398,8 @@ async fn handle_list_resource_templates(
|
||||
let templates = session
|
||||
.services
|
||||
.mcp_connection_manager
|
||||
.read()
|
||||
.await
|
||||
.list_all_resource_templates()
|
||||
.await;
|
||||
Ok(ListResourceTemplatesPayload::from_all_servers(templates))
|
||||
|
||||
@@ -117,7 +117,6 @@ impl ToolHandler for ShellHandler {
|
||||
turn,
|
||||
tracker,
|
||||
call_id,
|
||||
false,
|
||||
)
|
||||
.await
|
||||
}
|
||||
@@ -130,7 +129,6 @@ impl ToolHandler for ShellHandler {
|
||||
turn,
|
||||
tracker,
|
||||
call_id,
|
||||
true,
|
||||
)
|
||||
.await
|
||||
}
|
||||
@@ -178,7 +176,6 @@ impl ToolHandler for ShellCommandHandler {
|
||||
turn,
|
||||
tracker,
|
||||
call_id,
|
||||
false,
|
||||
)
|
||||
.await
|
||||
}
|
||||
@@ -192,7 +189,6 @@ impl ShellHandler {
|
||||
turn: Arc<TurnContext>,
|
||||
tracker: crate::tools::context::SharedTurnDiffTracker,
|
||||
call_id: String,
|
||||
is_user_shell_command: bool,
|
||||
) -> Result<ToolOutput, FunctionCallError> {
|
||||
// Approval policy guard for explicit escalation in non-OnRequest modes.
|
||||
if exec_params.with_escalated_permissions.unwrap_or(false)
|
||||
@@ -285,12 +281,7 @@ impl ShellHandler {
|
||||
}
|
||||
}
|
||||
|
||||
// Regular shell execution path.
|
||||
let source = if is_user_shell_command {
|
||||
ExecCommandSource::UserShell
|
||||
} else {
|
||||
ExecCommandSource::Agent
|
||||
};
|
||||
let source = ExecCommandSource::Agent;
|
||||
let emitter =
|
||||
ToolEmitter::shell(exec_params.command.clone(), exec_params.cwd.clone(), source);
|
||||
let event_ctx = ToolEventCtx::new(session.as_ref(), turn.as_ref(), &call_id, None);
|
||||
|
||||
@@ -9,9 +9,7 @@ pub mod runtimes;
|
||||
pub mod sandboxing;
|
||||
pub mod spec;
|
||||
|
||||
use crate::context_manager::MODEL_FORMAT_MAX_BYTES;
|
||||
use crate::context_manager::MODEL_FORMAT_MAX_LINES;
|
||||
use crate::context_manager::format_output_for_model_body;
|
||||
use crate::context_manager::truncate_with_line_bytes_budget;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
pub use router::ToolRouter;
|
||||
use serde::Serialize;
|
||||
@@ -22,6 +20,9 @@ pub(crate) const TELEMETRY_PREVIEW_MAX_LINES: usize = 64; // lines
|
||||
pub(crate) const TELEMETRY_PREVIEW_TRUNCATION_NOTICE: &str =
|
||||
"[... telemetry preview truncated ...]";
|
||||
|
||||
// TODO(aibrahim): migrate shell tool to use truncate text and respect config value
|
||||
const SHELL_OUTPUT_MAX_BYTES: usize = 10_000;
|
||||
|
||||
/// Format the combined exec output for sending back to the model.
|
||||
/// Includes exit code and duration metadata; truncates large bodies safely.
|
||||
pub fn format_exec_output_for_model(exec_output: &ExecToolCallOutput) -> String {
|
||||
@@ -77,5 +78,5 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
|
||||
};
|
||||
|
||||
// Truncate for model consumption before serialization.
|
||||
format_output_for_model_body(&body, MODEL_FORMAT_MAX_BYTES, MODEL_FORMAT_MAX_LINES)
|
||||
truncate_with_line_bytes_budget(&body, SHELL_OUTPUT_MAX_BYTES)
|
||||
}
|
||||
|
||||
@@ -112,7 +112,7 @@ impl ToolCallRuntime {
|
||||
|
||||
fn abort_message(call: &ToolCall, secs: f32) -> String {
|
||||
match call.tool_name.as_str() {
|
||||
"shell" | "container.exec" | "local_shell" | "unified_exec" => {
|
||||
"shell" | "container.exec" | "local_shell" | "shell_command" | "unified_exec" => {
|
||||
format!("Wall time: {secs:.1} seconds\naborted by user")
|
||||
}
|
||||
_ => format!("aborted by user after {secs:.1}s"),
|
||||
|
||||
@@ -54,7 +54,7 @@ impl ToolRouter {
|
||||
.any(|config| config.spec.name() == tool_name)
|
||||
}
|
||||
|
||||
pub fn build_tool_call(
|
||||
pub async fn build_tool_call(
|
||||
session: &Session,
|
||||
item: ResponseItem,
|
||||
) -> Result<Option<ToolCall>, FunctionCallError> {
|
||||
@@ -65,7 +65,7 @@ impl ToolRouter {
|
||||
call_id,
|
||||
..
|
||||
} => {
|
||||
if let Some((server, tool)) = session.parse_mcp_tool_name(&name) {
|
||||
if let Some((server, tool)) = session.parse_mcp_tool_name(&name).await {
|
||||
Ok(Some(ToolCall {
|
||||
tool_name: name,
|
||||
call_id,
|
||||
|
||||
@@ -20,6 +20,11 @@ pub enum ConfigShellToolType {
|
||||
Default,
|
||||
Local,
|
||||
UnifiedExec,
|
||||
/// Do not include a shell tool by default. Useful when using Codex
|
||||
/// with tools provided exclusively provided by MCP servers. Often used
|
||||
/// with `--config base_instructions=CUSTOM_INSTRUCTIONS`
|
||||
/// to customize agent behavior.
|
||||
Disabled,
|
||||
/// Takes a command as a single string to be run in the user's default shell.
|
||||
ShellCommand,
|
||||
}
|
||||
@@ -48,7 +53,9 @@ impl ToolsConfig {
|
||||
let include_web_search_request = features.enabled(Feature::WebSearchRequest);
|
||||
let include_view_image_tool = features.enabled(Feature::ViewImageTool);
|
||||
|
||||
let shell_type = if features.enabled(Feature::UnifiedExec) {
|
||||
let shell_type = if !features.enabled(Feature::ShellTool) {
|
||||
ConfigShellToolType::Disabled
|
||||
} else if features.enabled(Feature::UnifiedExec) {
|
||||
ConfigShellToolType::UnifiedExec
|
||||
} else if features.enabled(Feature::ShellCommandTool) {
|
||||
ConfigShellToolType::ShellCommand
|
||||
@@ -294,9 +301,26 @@ fn create_shell_tool() -> ToolSpec {
|
||||
},
|
||||
);
|
||||
|
||||
let description = if cfg!(windows) {
|
||||
r#"Runs a Powershell command (Windows) and returns its output. Arguments to `shell` will be passed to CreateProcessW(). Most commands should be prefixed with ["powershell.exe", "-Command"].
|
||||
|
||||
Examples of valid command strings:
|
||||
|
||||
- ls -a (show hidden): ["powershell.exe", "-Command", "Get-ChildItem -Force"]
|
||||
- recursive find by name: ["powershell.exe", "-Command", "Get-ChildItem -Recurse -Filter *.py"]
|
||||
- recursive grep: ["powershell.exe", "-Command", "Get-ChildItem -Path C:\\myrepo -Recurse | Select-String -Pattern 'TODO' -CaseSensitive"]
|
||||
- ps aux | grep python: ["powershell.exe", "-Command", "Get-Process | Where-Object { $_.ProcessName -like '*python*' }"]
|
||||
- setting an env var: ["powershell.exe", "-Command", "$env:FOO='bar'; echo $env:FOO"]
|
||||
- running an inline Python script: ["powershell.exe", "-Command", "@'\\nprint('Hello, world!')\\n'@ | python -"]"#
|
||||
} else {
|
||||
r#"Runs a shell command and returns its output.
|
||||
- The arguments to `shell` will be passed to execvp(). Most terminal commands should be prefixed with ["bash", "-lc"].
|
||||
- Always set the `workdir` param when using the shell function. Do not use `cd` unless absolutely necessary."#
|
||||
}.to_string();
|
||||
|
||||
ToolSpec::Function(ResponsesApiTool {
|
||||
name: "shell".to_string(),
|
||||
description: "Runs a shell command and returns its output.".to_string(),
|
||||
description,
|
||||
strict: false,
|
||||
parameters: JsonSchema::Object {
|
||||
properties,
|
||||
@@ -341,9 +365,25 @@ fn create_shell_command_tool() -> ToolSpec {
|
||||
},
|
||||
);
|
||||
|
||||
let description = if cfg!(windows) {
|
||||
r#"Runs a Powershell command (Windows) and returns its output.
|
||||
|
||||
Examples of valid command strings:
|
||||
|
||||
- ls -a (show hidden): "Get-ChildItem -Force"
|
||||
- recursive find by name: "Get-ChildItem -Recurse -Filter *.py"
|
||||
- recursive grep: "Get-ChildItem -Path C:\\myrepo -Recurse | Select-String -Pattern 'TODO' -CaseSensitive"
|
||||
- ps aux | grep python: "Get-Process | Where-Object { $_.ProcessName -like '*python*' }"
|
||||
- setting an env var: "$env:FOO='bar'; echo $env:FOO"
|
||||
- running an inline Python script: "@'\\nprint('Hello, world!')\\n'@ | python -"#
|
||||
} else {
|
||||
r#"Runs a shell command and returns its output.
|
||||
- Always set the `workdir` param when using the shell_command function. Do not use `cd` unless absolutely necessary."#
|
||||
}.to_string();
|
||||
|
||||
ToolSpec::Function(ResponsesApiTool {
|
||||
name: "shell_command".to_string(),
|
||||
description: "Runs a shell command string and returns its output.".to_string(),
|
||||
description,
|
||||
strict: false,
|
||||
parameters: JsonSchema::Object {
|
||||
properties,
|
||||
@@ -973,16 +1013,21 @@ pub(crate) fn build_specs(
|
||||
builder.register_handler("exec_command", unified_exec_handler.clone());
|
||||
builder.register_handler("write_stdin", unified_exec_handler);
|
||||
}
|
||||
ConfigShellToolType::Disabled => {
|
||||
// Do nothing.
|
||||
}
|
||||
ConfigShellToolType::ShellCommand => {
|
||||
builder.push_spec(create_shell_command_tool());
|
||||
}
|
||||
}
|
||||
|
||||
// Always register shell aliases so older prompts remain compatible.
|
||||
builder.register_handler("shell", shell_handler.clone());
|
||||
builder.register_handler("container.exec", shell_handler.clone());
|
||||
builder.register_handler("local_shell", shell_handler);
|
||||
builder.register_handler("shell_command", shell_command_handler);
|
||||
if config.shell_type != ConfigShellToolType::Disabled {
|
||||
// Always register shell aliases so older prompts remain compatible.
|
||||
builder.register_handler("shell", shell_handler.clone());
|
||||
builder.register_handler("container.exec", shell_handler.clone());
|
||||
builder.register_handler("local_shell", shell_handler);
|
||||
builder.register_handler("shell_command", shell_command_handler);
|
||||
}
|
||||
|
||||
builder.push_spec_with_parallel_support(create_list_mcp_resources_tool(), true);
|
||||
builder.push_spec_with_parallel_support(create_list_mcp_resource_templates_tool(), true);
|
||||
@@ -1118,6 +1163,7 @@ mod tests {
|
||||
ConfigShellToolType::Default => Some("shell"),
|
||||
ConfigShellToolType::Local => Some("local_shell"),
|
||||
ConfigShellToolType::UnifiedExec => None,
|
||||
ConfigShellToolType::Disabled => None,
|
||||
ConfigShellToolType::ShellCommand => Some("shell_command"),
|
||||
}
|
||||
}
|
||||
@@ -1246,7 +1292,7 @@ mod tests {
|
||||
"gpt-5-codex",
|
||||
&Features::with_defaults(),
|
||||
&[
|
||||
"shell",
|
||||
"shell_command",
|
||||
"list_mcp_resources",
|
||||
"list_mcp_resource_templates",
|
||||
"read_mcp_resource",
|
||||
@@ -1263,7 +1309,7 @@ mod tests {
|
||||
"gpt-5.1-codex",
|
||||
&Features::with_defaults(),
|
||||
&[
|
||||
"shell",
|
||||
"shell_command",
|
||||
"list_mcp_resources",
|
||||
"list_mcp_resource_templates",
|
||||
"read_mcp_resource",
|
||||
@@ -1338,7 +1384,7 @@ mod tests {
|
||||
"gpt-5.1-codex-mini",
|
||||
&Features::with_defaults(),
|
||||
&[
|
||||
"shell",
|
||||
"shell_command",
|
||||
"list_mcp_resources",
|
||||
"list_mcp_resource_templates",
|
||||
"read_mcp_resource",
|
||||
@@ -1355,7 +1401,7 @@ mod tests {
|
||||
"gpt-5.1",
|
||||
&Features::with_defaults(),
|
||||
&[
|
||||
"shell",
|
||||
"shell_command",
|
||||
"list_mcp_resources",
|
||||
"list_mcp_resource_templates",
|
||||
"read_mcp_resource",
|
||||
@@ -1873,8 +1919,23 @@ mod tests {
|
||||
};
|
||||
assert_eq!(name, "shell");
|
||||
|
||||
let expected = "Runs a shell command and returns its output.";
|
||||
assert_eq!(description, expected);
|
||||
let expected = if cfg!(windows) {
|
||||
r#"Runs a Powershell command (Windows) and returns its output. Arguments to `shell` will be passed to CreateProcessW(). Most commands should be prefixed with ["powershell.exe", "-Command"].
|
||||
|
||||
Examples of valid command strings:
|
||||
|
||||
- ls -a (show hidden): ["powershell.exe", "-Command", "Get-ChildItem -Force"]
|
||||
- recursive find by name: ["powershell.exe", "-Command", "Get-ChildItem -Recurse -Filter *.py"]
|
||||
- recursive grep: ["powershell.exe", "-Command", "Get-ChildItem -Path C:\\myrepo -Recurse | Select-String -Pattern 'TODO' -CaseSensitive"]
|
||||
- ps aux | grep python: ["powershell.exe", "-Command", "Get-Process | Where-Object { $_.ProcessName -like '*python*' }"]
|
||||
- setting an env var: ["powershell.exe", "-Command", "$env:FOO='bar'; echo $env:FOO"]
|
||||
- running an inline Python script: ["powershell.exe", "-Command", "@'\\nprint('Hello, world!')\\n'@ | python -"]"#
|
||||
} else {
|
||||
r#"Runs a shell command and returns its output.
|
||||
- The arguments to `shell` will be passed to execvp(). Most terminal commands should be prefixed with ["bash", "-lc"].
|
||||
- Always set the `workdir` param when using the shell function. Do not use `cd` unless absolutely necessary."#
|
||||
}.to_string();
|
||||
assert_eq!(description, &expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -1888,8 +1949,22 @@ mod tests {
|
||||
};
|
||||
assert_eq!(name, "shell_command");
|
||||
|
||||
let expected = "Runs a shell command string and returns its output.";
|
||||
assert_eq!(description, expected);
|
||||
let expected = if cfg!(windows) {
|
||||
r#"Runs a Powershell command (Windows) and returns its output.
|
||||
|
||||
Examples of valid command strings:
|
||||
|
||||
- ls -a (show hidden): "Get-ChildItem -Force"
|
||||
- recursive find by name: "Get-ChildItem -Recurse -Filter *.py"
|
||||
- recursive grep: "Get-ChildItem -Path C:\\myrepo -Recurse | Select-String -Pattern 'TODO' -CaseSensitive"
|
||||
- ps aux | grep python: "Get-Process | Where-Object { $_.ProcessName -like '*python*' }"
|
||||
- setting an env var: "$env:FOO='bar'; echo $env:FOO"
|
||||
- running an inline Python script: "@'\\nprint('Hello, world!')\\n'@ | python -"#.to_string()
|
||||
} else {
|
||||
r#"Runs a shell command and returns its output.
|
||||
- Always set the `workdir` param when using the shell_command function. Do not use `cd` unless absolutely necessary."#.to_string()
|
||||
};
|
||||
assert_eq!(description, &expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -1,200 +1,670 @@
|
||||
//! Utilities for truncating large chunks of output while preserving a prefix
|
||||
//! and suffix on UTF-8 boundaries.
|
||||
//! and suffix on UTF-8 boundaries, and helpers for line/token‑based truncation
|
||||
//! used across the core crate.
|
||||
|
||||
use codex_utils_tokenizer::Tokenizer;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use codex_utils_string::take_bytes_at_char_boundary;
|
||||
use codex_utils_string::take_last_bytes_at_char_boundary;
|
||||
|
||||
/// Truncate the middle of a UTF-8 string to at most `max_bytes` bytes,
|
||||
/// preserving the beginning and the end. Returns the possibly truncated
|
||||
/// string and `Some(original_token_count)` (counted with the local tokenizer;
|
||||
/// falls back to a 4-bytes-per-token estimate if the tokenizer cannot load)
|
||||
/// if truncation occurred; otherwise returns the original string and `None`.
|
||||
pub(crate) fn truncate_middle(s: &str, max_bytes: usize) -> (String, Option<u64>) {
|
||||
if s.len() <= max_bytes {
|
||||
return (s.to_string(), None);
|
||||
use crate::config::Config;
|
||||
|
||||
const APPROX_BYTES_PER_TOKEN: usize = 4;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub enum TruncationPolicy {
|
||||
Bytes(usize),
|
||||
Tokens(usize),
|
||||
}
|
||||
|
||||
impl TruncationPolicy {
|
||||
pub fn new(config: &Config) -> Self {
|
||||
let config_token_limit = config.tool_output_token_limit;
|
||||
|
||||
match config.model_family.truncation_policy {
|
||||
TruncationPolicy::Bytes(family_bytes) => {
|
||||
if let Some(token_limit) = config_token_limit {
|
||||
Self::Bytes(approx_bytes_for_tokens(token_limit))
|
||||
} else {
|
||||
Self::Bytes(family_bytes)
|
||||
}
|
||||
}
|
||||
TruncationPolicy::Tokens(family_tokens) => {
|
||||
if let Some(token_limit) = config_token_limit {
|
||||
Self::Tokens(token_limit)
|
||||
} else {
|
||||
Self::Tokens(family_tokens)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Build a tokenizer for counting (default to o200k_base; fall back to cl100k_base).
|
||||
// If both fail, fall back to a 4-bytes-per-token estimate.
|
||||
let tok = Tokenizer::try_default().ok();
|
||||
let token_count = |text: &str| -> u64 {
|
||||
if let Some(ref t) = tok {
|
||||
t.count(text) as u64
|
||||
} else {
|
||||
(text.len() as u64).div_ceil(4)
|
||||
/// Returns a token budget derived from this policy.
|
||||
///
|
||||
/// - For `Tokens`, this is the explicit token limit.
|
||||
/// - For `Bytes`, this is an approximate token budget using the global
|
||||
/// bytes-per-token heuristic.
|
||||
pub fn token_budget(&self) -> usize {
|
||||
match self {
|
||||
TruncationPolicy::Bytes(bytes) => {
|
||||
usize::try_from(approx_tokens_from_byte_count(*bytes)).unwrap_or(usize::MAX)
|
||||
}
|
||||
TruncationPolicy::Tokens(tokens) => *tokens,
|
||||
}
|
||||
};
|
||||
|
||||
let total_tokens = token_count(s);
|
||||
if max_bytes == 0 {
|
||||
return (
|
||||
format!("…{total_tokens} tokens truncated…"),
|
||||
Some(total_tokens),
|
||||
);
|
||||
}
|
||||
|
||||
fn truncate_on_boundary(input: &str, max_len: usize) -> &str {
|
||||
if input.len() <= max_len {
|
||||
return input;
|
||||
/// Returns a byte budget derived from this policy.
|
||||
///
|
||||
/// - For `Bytes`, this is the explicit byte limit.
|
||||
/// - For `Tokens`, this is an approximate byte budget using the global
|
||||
/// bytes-per-token heuristic.
|
||||
pub fn byte_budget(&self) -> usize {
|
||||
match self {
|
||||
TruncationPolicy::Bytes(bytes) => *bytes,
|
||||
TruncationPolicy::Tokens(tokens) => approx_bytes_for_tokens(*tokens),
|
||||
}
|
||||
let mut end = max_len;
|
||||
while end > 0 && !input.is_char_boundary(end) {
|
||||
end -= 1;
|
||||
}
|
||||
&input[..end]
|
||||
}
|
||||
}
|
||||
|
||||
fn pick_prefix_end(s: &str, left_budget: usize) -> usize {
|
||||
if let Some(head) = s.get(..left_budget)
|
||||
&& let Some(i) = head.rfind('\n')
|
||||
{
|
||||
return i + 1;
|
||||
}
|
||||
truncate_on_boundary(s, left_budget).len()
|
||||
/// Format a block of exec/tool output for model consumption, truncating by
|
||||
/// lines and bytes while preserving head and tail segments.
|
||||
pub(crate) fn truncate_with_line_bytes_budget(content: &str, bytes_budget: usize) -> String {
|
||||
// TODO(aibrahim): to be removed
|
||||
let lines_budget = 256;
|
||||
// Head+tail truncation for the model: show the beginning and end with an elision.
|
||||
// Clients still receive full streams; only this formatted summary is capped.
|
||||
let total_lines = content.lines().count();
|
||||
if content.len() <= bytes_budget && total_lines <= lines_budget {
|
||||
return content.to_string();
|
||||
}
|
||||
let output = truncate_formatted_exec_output(content, total_lines, bytes_budget, lines_budget);
|
||||
format!("Total output lines: {total_lines}\n\n{output}")
|
||||
}
|
||||
|
||||
fn pick_suffix_start(s: &str, right_budget: usize) -> usize {
|
||||
let start_tail = s.len().saturating_sub(right_budget);
|
||||
if let Some(tail) = s.get(start_tail..)
|
||||
&& let Some(i) = tail.find('\n')
|
||||
{
|
||||
return start_tail + i + 1;
|
||||
}
|
||||
|
||||
let mut idx = start_tail.min(s.len());
|
||||
while idx < s.len() && !s.is_char_boundary(idx) {
|
||||
idx += 1;
|
||||
}
|
||||
idx
|
||||
}
|
||||
|
||||
// Iterate to stabilize marker length → keep budget → boundaries.
|
||||
let mut guess_tokens: u64 = 1;
|
||||
for _ in 0..4 {
|
||||
let marker = format!("…{guess_tokens} tokens truncated…");
|
||||
let marker_len = marker.len();
|
||||
let keep_budget = max_bytes.saturating_sub(marker_len);
|
||||
if keep_budget == 0 {
|
||||
return (
|
||||
format!("…{total_tokens} tokens truncated…"),
|
||||
Some(total_tokens),
|
||||
pub(crate) fn truncate_text(content: &str, policy: TruncationPolicy) -> String {
|
||||
match policy {
|
||||
TruncationPolicy::Bytes(bytes) => truncate_with_byte_estimate(
|
||||
content,
|
||||
bytes,
|
||||
TruncationSource::Policy(TruncationPolicy::Bytes(bytes)),
|
||||
),
|
||||
TruncationPolicy::Tokens(tokens) => {
|
||||
let (truncated, _) = truncate_with_token_budget(
|
||||
content,
|
||||
tokens,
|
||||
TruncationSource::Policy(TruncationPolicy::Tokens(tokens)),
|
||||
);
|
||||
truncated
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Globally truncate function output items to fit within the given
|
||||
/// truncation policy's budget, preserving as many text/image items as
|
||||
/// possible and appending a summary for any omitted text items.
|
||||
pub(crate) fn truncate_function_output_items_with_policy(
|
||||
items: &[FunctionCallOutputContentItem],
|
||||
policy: TruncationPolicy,
|
||||
) -> Vec<FunctionCallOutputContentItem> {
|
||||
let mut out: Vec<FunctionCallOutputContentItem> = Vec::with_capacity(items.len());
|
||||
let mut remaining_budget = match policy {
|
||||
TruncationPolicy::Bytes(_) => policy.byte_budget(),
|
||||
TruncationPolicy::Tokens(_) => policy.token_budget(),
|
||||
};
|
||||
let mut omitted_text_items = 0usize;
|
||||
|
||||
let left_budget = keep_budget / 2;
|
||||
let right_budget = keep_budget - left_budget;
|
||||
let prefix_end = pick_prefix_end(s, left_budget);
|
||||
let mut suffix_start = pick_suffix_start(s, right_budget);
|
||||
if suffix_start < prefix_end {
|
||||
suffix_start = prefix_end;
|
||||
for it in items {
|
||||
match it {
|
||||
FunctionCallOutputContentItem::InputText { text } => {
|
||||
if remaining_budget == 0 {
|
||||
omitted_text_items += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
let cost = match policy {
|
||||
TruncationPolicy::Bytes(_) => text.len(),
|
||||
TruncationPolicy::Tokens(_) => approx_token_count(text),
|
||||
};
|
||||
|
||||
if cost <= remaining_budget {
|
||||
out.push(FunctionCallOutputContentItem::InputText { text: text.clone() });
|
||||
remaining_budget = remaining_budget.saturating_sub(cost);
|
||||
} else {
|
||||
let snippet_policy = match policy {
|
||||
TruncationPolicy::Bytes(_) => TruncationPolicy::Bytes(remaining_budget),
|
||||
TruncationPolicy::Tokens(_) => TruncationPolicy::Tokens(remaining_budget),
|
||||
};
|
||||
let snippet = truncate_text(text, snippet_policy);
|
||||
if snippet.is_empty() {
|
||||
omitted_text_items += 1;
|
||||
} else {
|
||||
out.push(FunctionCallOutputContentItem::InputText { text: snippet });
|
||||
}
|
||||
remaining_budget = 0;
|
||||
}
|
||||
}
|
||||
FunctionCallOutputContentItem::InputImage { image_url } => {
|
||||
out.push(FunctionCallOutputContentItem::InputImage {
|
||||
image_url: image_url.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Tokens actually removed (middle slice) using the real tokenizer.
|
||||
let removed_tokens = token_count(&s[prefix_end..suffix_start]);
|
||||
|
||||
// If the number of digits in the token count does not change the marker length,
|
||||
// we can finalize output.
|
||||
let final_marker = format!("…{removed_tokens} tokens truncated…");
|
||||
if final_marker.len() == marker_len {
|
||||
let kept_content_bytes = prefix_end + (s.len() - suffix_start);
|
||||
let mut out = String::with_capacity(final_marker.len() + kept_content_bytes + 1);
|
||||
out.push_str(&s[..prefix_end]);
|
||||
out.push_str(&final_marker);
|
||||
out.push('\n');
|
||||
out.push_str(&s[suffix_start..]);
|
||||
return (out, Some(total_tokens));
|
||||
}
|
||||
|
||||
guess_tokens = removed_tokens;
|
||||
}
|
||||
|
||||
// Fallback build after iterations: compute with the last guess.
|
||||
let marker = format!("…{guess_tokens} tokens truncated…");
|
||||
if omitted_text_items > 0 {
|
||||
out.push(FunctionCallOutputContentItem::InputText {
|
||||
text: format!("[omitted {omitted_text_items} text items ...]"),
|
||||
});
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
/// Truncate the middle of a UTF-8 string to at most `max_tokens` tokens,
|
||||
/// preserving the beginning and the end. Returns the possibly truncated string
|
||||
/// and `Some(original_token_count)` if truncation occurred; otherwise returns
|
||||
/// the original string and `None`.
|
||||
fn truncate_with_token_budget(
|
||||
s: &str,
|
||||
max_tokens: usize,
|
||||
source: TruncationSource,
|
||||
) -> (String, Option<u64>) {
|
||||
if s.is_empty() {
|
||||
return (String::new(), None);
|
||||
}
|
||||
|
||||
let byte_len = s.len();
|
||||
if max_tokens > 0 {
|
||||
let small_threshold = approx_bytes_for_tokens(max_tokens / 4);
|
||||
if small_threshold > 0 && byte_len <= small_threshold {
|
||||
return (s.to_string(), None);
|
||||
}
|
||||
}
|
||||
|
||||
let truncated = truncate_with_byte_estimate(s, approx_bytes_for_tokens(max_tokens), source);
|
||||
let approx_total_usize = approx_token_count(s);
|
||||
let approx_total = u64::try_from(approx_total_usize).unwrap_or(u64::MAX);
|
||||
if truncated == s {
|
||||
(truncated, None)
|
||||
} else {
|
||||
(truncated, Some(approx_total))
|
||||
}
|
||||
}
|
||||
|
||||
/// Truncate a string using a byte budget derived from the token budget, without
|
||||
/// performing any real tokenization. This keeps the logic purely byte-based and
|
||||
/// uses a bytes placeholder in the truncated output.
|
||||
fn truncate_with_byte_estimate(s: &str, max_bytes: usize, source: TruncationSource) -> String {
|
||||
if s.is_empty() {
|
||||
return String::new();
|
||||
}
|
||||
|
||||
if max_bytes == 0 {
|
||||
// No budget to show content; just report that everything was truncated.
|
||||
let marker = format_truncation_marker(source, removed_units_for_source(source, s.len()));
|
||||
return marker;
|
||||
}
|
||||
|
||||
if s.len() <= max_bytes {
|
||||
return s.to_string();
|
||||
}
|
||||
|
||||
let total_bytes = s.len();
|
||||
let removed_bytes = total_bytes.saturating_sub(max_bytes);
|
||||
let marker = format_truncation_marker(source, removed_units_for_source(source, removed_bytes));
|
||||
let marker_len = marker.len();
|
||||
let keep_budget = max_bytes.saturating_sub(marker_len);
|
||||
if keep_budget == 0 {
|
||||
return (
|
||||
format!("…{total_tokens} tokens truncated…"),
|
||||
Some(total_tokens),
|
||||
);
|
||||
|
||||
if marker_len >= max_bytes {
|
||||
let truncated_marker = truncate_on_boundary(&marker, max_bytes);
|
||||
return truncated_marker.to_string();
|
||||
}
|
||||
|
||||
let left_budget = keep_budget / 2;
|
||||
let right_budget = keep_budget - left_budget;
|
||||
let keep_budget = max_bytes - marker_len;
|
||||
let (left_budget, right_budget) = split_budget(keep_budget);
|
||||
let prefix_end = pick_prefix_end(s, left_budget);
|
||||
let mut suffix_start = pick_suffix_start(s, right_budget);
|
||||
if suffix_start < prefix_end {
|
||||
suffix_start = prefix_end;
|
||||
}
|
||||
|
||||
let mut out = String::with_capacity(marker_len + prefix_end + (s.len() - suffix_start) + 1);
|
||||
out.push_str(&s[..prefix_end]);
|
||||
out.push_str(&marker);
|
||||
let mut out = assemble_truncated_output(&s[..prefix_end], &s[suffix_start..], &marker);
|
||||
|
||||
if out.len() > max_bytes {
|
||||
let boundary = truncate_on_boundary(&out, max_bytes);
|
||||
out.truncate(boundary.len());
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
fn truncate_formatted_exec_output(
|
||||
content: &str,
|
||||
total_lines: usize,
|
||||
limit_bytes: usize,
|
||||
limit_lines: usize,
|
||||
) -> String {
|
||||
error_on_double_truncation(content);
|
||||
let head_lines: usize = limit_lines / 2;
|
||||
let tail_lines: usize = limit_lines - head_lines; // 128
|
||||
let head_bytes: usize = limit_bytes / 2;
|
||||
let segments: Vec<&str> = content.split_inclusive('\n').collect();
|
||||
let head_take = head_lines.min(segments.len());
|
||||
let tail_take = tail_lines.min(segments.len().saturating_sub(head_take));
|
||||
let omitted = segments.len().saturating_sub(head_take + tail_take);
|
||||
|
||||
let head_slice_end: usize = segments
|
||||
.iter()
|
||||
.take(head_take)
|
||||
.map(|segment| segment.len())
|
||||
.sum();
|
||||
let tail_slice_start: usize = if tail_take == 0 {
|
||||
content.len()
|
||||
} else {
|
||||
content.len()
|
||||
- segments
|
||||
.iter()
|
||||
.rev()
|
||||
.take(tail_take)
|
||||
.map(|segment| segment.len())
|
||||
.sum::<usize>()
|
||||
};
|
||||
let head_slice = &content[..head_slice_end];
|
||||
let tail_slice = &content[tail_slice_start..];
|
||||
let truncated_by_bytes = content.len() > limit_bytes;
|
||||
// this is a bit wrong. We are counting metadata lines and not just shell output lines.
|
||||
let marker = if omitted > 0 {
|
||||
let marker_text = format_truncation_marker(
|
||||
TruncationSource::LineOmission { total_lines },
|
||||
u64::try_from(omitted).unwrap_or(u64::MAX),
|
||||
);
|
||||
Some(format!("\n{marker_text}\n\n"))
|
||||
} else if truncated_by_bytes {
|
||||
let removed_bytes =
|
||||
u64::try_from(content.len().saturating_sub(limit_bytes)).unwrap_or(u64::MAX);
|
||||
let marker_text =
|
||||
format_truncation_marker(TruncationSource::ByteLimit { limit_bytes }, removed_bytes);
|
||||
Some(format!("\n{marker_text}\n\n"))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let marker_len = marker.as_ref().map_or(0, String::len);
|
||||
let base_head_budget = head_bytes.min(limit_bytes);
|
||||
let head_budget = base_head_budget.min(limit_bytes.saturating_sub(marker_len));
|
||||
let head_part = take_bytes_at_char_boundary(head_slice, head_budget);
|
||||
let mut result = String::with_capacity(limit_bytes.min(content.len()));
|
||||
|
||||
result.push_str(head_part);
|
||||
if let Some(marker_text) = marker.as_ref() {
|
||||
result.push_str(marker_text);
|
||||
}
|
||||
|
||||
let remaining = limit_bytes.saturating_sub(result.len());
|
||||
if remaining == 0 {
|
||||
return result;
|
||||
}
|
||||
|
||||
let tail_part = take_last_bytes_at_char_boundary(tail_slice, remaining);
|
||||
result.push_str(tail_part);
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub enum TruncationSource {
|
||||
Policy(TruncationPolicy),
|
||||
LineOmission { total_lines: usize },
|
||||
ByteLimit { limit_bytes: usize },
|
||||
}
|
||||
|
||||
fn format_truncation_marker(source: TruncationSource, removed_count: u64) -> String {
|
||||
match source {
|
||||
TruncationSource::Policy(TruncationPolicy::Tokens(_)) => {
|
||||
format!("[…{removed_count} tokens truncated…]")
|
||||
}
|
||||
TruncationSource::Policy(TruncationPolicy::Bytes(_)) => {
|
||||
format!("[…{removed_count} bytes truncated…]")
|
||||
}
|
||||
TruncationSource::LineOmission { total_lines } => {
|
||||
format!("[... omitted {removed_count} of {total_lines} lines ...]")
|
||||
}
|
||||
TruncationSource::ByteLimit { limit_bytes } => {
|
||||
format!("[... removed {removed_count} bytes to fit {limit_bytes} byte limit ...]")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn split_budget(budget: usize) -> (usize, usize) {
|
||||
let left = budget / 2;
|
||||
(left, budget - left)
|
||||
}
|
||||
|
||||
fn removed_units_for_source(source: TruncationSource, removed_bytes: usize) -> u64 {
|
||||
match source {
|
||||
TruncationSource::Policy(TruncationPolicy::Tokens(_)) => {
|
||||
approx_tokens_from_byte_count(removed_bytes)
|
||||
}
|
||||
_ => u64::try_from(removed_bytes).unwrap_or(u64::MAX),
|
||||
}
|
||||
}
|
||||
|
||||
fn assemble_truncated_output(prefix: &str, suffix: &str, marker: &str) -> String {
|
||||
let mut out = String::with_capacity(prefix.len() + marker.len() + suffix.len() + 1);
|
||||
out.push_str(prefix);
|
||||
out.push_str(marker);
|
||||
out.push('\n');
|
||||
out.push_str(&s[suffix_start..]);
|
||||
(out, Some(total_tokens))
|
||||
out.push_str(suffix);
|
||||
out
|
||||
}
|
||||
|
||||
pub(crate) fn approx_token_count(text: &str) -> usize {
|
||||
let len = text.len();
|
||||
len.saturating_add(APPROX_BYTES_PER_TOKEN.saturating_sub(1)) / APPROX_BYTES_PER_TOKEN
|
||||
}
|
||||
|
||||
fn approx_bytes_for_tokens(tokens: usize) -> usize {
|
||||
tokens.saturating_mul(APPROX_BYTES_PER_TOKEN)
|
||||
}
|
||||
|
||||
fn approx_tokens_from_byte_count(bytes: usize) -> u64 {
|
||||
let bytes_u64 = bytes as u64;
|
||||
bytes_u64.saturating_add((APPROX_BYTES_PER_TOKEN as u64).saturating_sub(1))
|
||||
/ (APPROX_BYTES_PER_TOKEN as u64)
|
||||
}
|
||||
|
||||
fn truncate_on_boundary(input: &str, max_len: usize) -> &str {
|
||||
if input.len() <= max_len {
|
||||
return input;
|
||||
}
|
||||
let mut end = max_len;
|
||||
while end > 0 && !input.is_char_boundary(end) {
|
||||
end -= 1;
|
||||
}
|
||||
&input[..end]
|
||||
}
|
||||
|
||||
fn pick_prefix_end(s: &str, left_budget: usize) -> usize {
|
||||
if let Some(head) = s.get(..left_budget)
|
||||
&& let Some(i) = head.rfind('\n')
|
||||
{
|
||||
return i + 1;
|
||||
}
|
||||
truncate_on_boundary(s, left_budget).len()
|
||||
}
|
||||
|
||||
fn pick_suffix_start(s: &str, right_budget: usize) -> usize {
|
||||
let start_tail = s.len().saturating_sub(right_budget);
|
||||
if let Some(tail) = s.get(start_tail..)
|
||||
&& let Some(i) = tail.find('\n')
|
||||
{
|
||||
return start_tail + i + 1;
|
||||
}
|
||||
|
||||
let mut idx = start_tail.min(s.len());
|
||||
while idx < s.len() && !s.is_char_boundary(idx) {
|
||||
idx += 1;
|
||||
}
|
||||
idx
|
||||
}
|
||||
|
||||
fn error_on_double_truncation(content: &str) {
|
||||
if content.contains("Total output lines:") && content.contains("omitted") {
|
||||
tracing::error!(
|
||||
"FunctionCallOutput content was already truncated before ContextManager::record_items; this would cause double truncation {content}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::truncate_middle;
|
||||
use codex_utils_tokenizer::Tokenizer;
|
||||
use crate::config::OPENAI_DEFAULT_MODEL;
|
||||
use crate::model_family::derive_default_model_family;
|
||||
use crate::model_family::find_family_for_model;
|
||||
|
||||
#[test]
|
||||
fn truncate_middle_no_newlines_fallback() {
|
||||
let tok = Tokenizer::try_default().expect("load tokenizer");
|
||||
let s = "abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ*";
|
||||
let max_bytes = 32;
|
||||
let (out, original) = truncate_middle(s, max_bytes);
|
||||
assert!(out.starts_with("abc"));
|
||||
assert!(out.contains("tokens truncated"));
|
||||
assert!(out.ends_with("XYZ*"));
|
||||
assert_eq!(original, Some(tok.count(s) as u64));
|
||||
use super::TruncationPolicy;
|
||||
use super::TruncationSource;
|
||||
use super::approx_token_count;
|
||||
use super::truncate_function_output_items_with_policy;
|
||||
use super::truncate_with_line_bytes_budget;
|
||||
use super::truncate_with_token_budget;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use pretty_assertions::assert_eq;
|
||||
use regex_lite::Regex;
|
||||
|
||||
const MODEL_FORMAT_MAX_LINES: usize = 256;
|
||||
|
||||
fn model_format_max_bytes() -> usize {
|
||||
find_family_for_model(OPENAI_DEFAULT_MODEL)
|
||||
.unwrap_or_else(|| derive_default_model_family(OPENAI_DEFAULT_MODEL))
|
||||
.truncation_policy
|
||||
.byte_budget()
|
||||
}
|
||||
|
||||
fn truncated_message_pattern(line: &str, total_lines: usize) -> String {
|
||||
let head_lines = MODEL_FORMAT_MAX_LINES / 2;
|
||||
let tail_lines = MODEL_FORMAT_MAX_LINES - head_lines;
|
||||
let head_take = head_lines.min(total_lines);
|
||||
let tail_take = tail_lines.min(total_lines.saturating_sub(head_take));
|
||||
let omitted = total_lines.saturating_sub(head_take + tail_take);
|
||||
let escaped_line = regex_lite::escape(line);
|
||||
if omitted == 0 {
|
||||
return format!(
|
||||
r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} removed \d+ bytes to fit {max_bytes} byte limit \.{{3}}]\n\n.*)$",
|
||||
max_bytes = model_format_max_bytes(),
|
||||
);
|
||||
}
|
||||
format!(
|
||||
r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} omitted {omitted} of {total_lines} lines \.{{3}}]\n\n.*)$",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_middle_prefers_newline_boundaries() {
|
||||
let tok = Tokenizer::try_default().expect("load tokenizer");
|
||||
let mut s = String::new();
|
||||
for i in 1..=20 {
|
||||
s.push_str(&format!("{i:03}\n"));
|
||||
}
|
||||
assert_eq!(s.len(), 80);
|
||||
fn truncate_middle_returns_original_when_under_limit() {
|
||||
let s = "short output";
|
||||
let limit = 100;
|
||||
let source = TruncationSource::Policy(TruncationPolicy::Tokens(limit));
|
||||
let (out, original) = truncate_with_token_budget(s, limit, source);
|
||||
assert_eq!(out, s);
|
||||
assert_eq!(original, None);
|
||||
}
|
||||
|
||||
let max_bytes = 64;
|
||||
let (out, tokens) = truncate_middle(&s, max_bytes);
|
||||
assert!(out.starts_with("001\n002\n003\n004\n"));
|
||||
#[test]
|
||||
fn truncate_middle_reports_truncation_at_zero_limit() {
|
||||
let s = "abcdef";
|
||||
let source = TruncationSource::Policy(TruncationPolicy::Tokens(0));
|
||||
let (out, original) = truncate_with_token_budget(s, 0, source);
|
||||
assert_eq!(out, "[…2 tokens truncated…]");
|
||||
assert_eq!(original, Some(approx_token_count(s) as u64));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_middle_enforces_token_budget() {
|
||||
let s = "alpha beta gamma delta epsilon zeta eta theta iota kappa";
|
||||
let max_tokens = 12;
|
||||
let source = TruncationSource::Policy(TruncationPolicy::Tokens(max_tokens));
|
||||
let (out, original) = truncate_with_token_budget(s, max_tokens, source);
|
||||
assert!(out.contains("tokens truncated"));
|
||||
assert!(out.ends_with("017\n018\n019\n020\n"));
|
||||
assert_eq!(tokens, Some(tok.count(&s) as u64));
|
||||
assert_eq!(original, Some(approx_token_count(s) as u64));
|
||||
assert!(out.len() < s.len(), "truncated output should be shorter");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_middle_handles_utf8_content() {
|
||||
let tok = Tokenizer::try_default().expect("load tokenizer");
|
||||
let s = "😀😀😀😀😀😀😀😀😀😀\nsecond line with ascii text\n";
|
||||
let max_bytes = 32;
|
||||
let (out, tokens) = truncate_middle(s, max_bytes);
|
||||
let s = "😀😀😀😀😀😀😀😀😀😀\nsecond line with text\n";
|
||||
let max_tokens = 8;
|
||||
let source = TruncationSource::Policy(TruncationPolicy::Tokens(max_tokens));
|
||||
let (out, tokens) = truncate_with_token_budget(s, max_tokens, source);
|
||||
|
||||
assert!(out.contains("tokens truncated"));
|
||||
assert!(!out.contains('\u{fffd}'));
|
||||
assert_eq!(tokens, Some(tok.count(s) as u64));
|
||||
assert_eq!(tokens, Some(approx_token_count(s) as u64));
|
||||
assert!(out.len() < s.len(), "UTF-8 content should be shortened");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_middle_prefers_newline_boundaries_2() {
|
||||
let tok = Tokenizer::try_default().expect("load tokenizer");
|
||||
// Build a multi-line string of 20 numbered lines (each "NNN\n").
|
||||
let mut s = String::new();
|
||||
for i in 1..=20 {
|
||||
s.push_str(&format!("{i:03}\n"));
|
||||
}
|
||||
assert_eq!(s.len(), 80);
|
||||
fn format_exec_output_truncates_large_error() {
|
||||
let line = "very long execution error line that should trigger truncation\n";
|
||||
let large_error = line.repeat(2_500); // way beyond both byte and line limits
|
||||
|
||||
let max_bytes = 64;
|
||||
let (out, total) = truncate_middle(&s, max_bytes);
|
||||
assert!(out.starts_with("001\n002\n003\n004\n"));
|
||||
assert!(out.contains("tokens truncated"));
|
||||
assert!(out.ends_with("017\n018\n019\n020\n"));
|
||||
assert_eq!(total, Some(tok.count(&s) as u64));
|
||||
let truncated = truncate_with_line_bytes_budget(&large_error, model_format_max_bytes());
|
||||
|
||||
let total_lines = large_error.lines().count();
|
||||
let pattern = truncated_message_pattern(line, total_lines);
|
||||
let regex = Regex::new(&pattern).unwrap_or_else(|err| {
|
||||
panic!("failed to compile regex {pattern}: {err}");
|
||||
});
|
||||
let captures = regex
|
||||
.captures(&truncated)
|
||||
.unwrap_or_else(|| panic!("message failed to match pattern {pattern}: {truncated}"));
|
||||
let body = captures
|
||||
.name("body")
|
||||
.expect("missing body capture")
|
||||
.as_str();
|
||||
assert!(
|
||||
body.len() <= model_format_max_bytes(),
|
||||
"body exceeds byte limit: {} bytes",
|
||||
body.len()
|
||||
);
|
||||
assert_ne!(truncated, large_error);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_exec_output_marks_byte_truncation_without_omitted_lines() {
|
||||
let max_bytes = model_format_max_bytes();
|
||||
let long_line = "a".repeat(max_bytes + 50);
|
||||
let truncated = truncate_with_line_bytes_budget(&long_line, max_bytes);
|
||||
|
||||
assert_ne!(truncated, long_line);
|
||||
let removed_bytes = long_line.len().saturating_sub(max_bytes);
|
||||
let marker_line =
|
||||
format!("[... removed {removed_bytes} bytes to fit {max_bytes} byte limit ...]");
|
||||
assert!(
|
||||
truncated.contains(&marker_line),
|
||||
"missing byte truncation marker: {truncated}"
|
||||
);
|
||||
assert!(
|
||||
!truncated.contains("omitted"),
|
||||
"line omission marker should not appear when no lines were dropped: {truncated}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_exec_output_returns_original_when_within_limits() {
|
||||
let content = "example output\n".repeat(10);
|
||||
|
||||
assert_eq!(
|
||||
truncate_with_line_bytes_budget(&content, model_format_max_bytes()),
|
||||
content
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_exec_output_reports_omitted_lines_and_keeps_head_and_tail() {
|
||||
let total_lines = MODEL_FORMAT_MAX_LINES + 100;
|
||||
let content: String = (0..total_lines)
|
||||
.map(|idx| format!("line-{idx}\n"))
|
||||
.collect();
|
||||
|
||||
let truncated = truncate_with_line_bytes_budget(&content, model_format_max_bytes());
|
||||
|
||||
let omitted = total_lines - MODEL_FORMAT_MAX_LINES;
|
||||
let expected_marker = format!("[... omitted {omitted} of {total_lines} lines ...]");
|
||||
|
||||
assert!(
|
||||
truncated.contains(&expected_marker),
|
||||
"missing omitted marker: {truncated}"
|
||||
);
|
||||
assert!(
|
||||
truncated.contains("line-0\n"),
|
||||
"expected head line to remain: {truncated}"
|
||||
);
|
||||
|
||||
let last_line = format!("line-{}\n", total_lines - 1);
|
||||
assert!(
|
||||
truncated.contains(&last_line),
|
||||
"expected tail line to remain: {truncated}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_exec_output_prefers_line_marker_when_both_limits_exceeded() {
|
||||
let total_lines = MODEL_FORMAT_MAX_LINES + 42;
|
||||
let long_line = "x".repeat(256);
|
||||
let content: String = (0..total_lines)
|
||||
.map(|idx| format!("line-{idx}-{long_line}\n"))
|
||||
.collect();
|
||||
|
||||
let truncated = truncate_with_line_bytes_budget(&content, model_format_max_bytes());
|
||||
|
||||
assert!(
|
||||
truncated.contains("[... omitted 42 of 298 lines ...]"),
|
||||
"expected omitted marker when line count exceeds limit: {truncated}"
|
||||
);
|
||||
assert!(
|
||||
!truncated.contains("byte limit"),
|
||||
"line omission marker should take precedence over byte marker: {truncated}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncates_across_multiple_under_limit_texts_and_reports_omitted() {
|
||||
let chunk = "alpha beta gamma delta epsilon zeta eta theta iota kappa lambda mu nu xi omicron pi rho sigma tau upsilon phi chi psi omega.\n";
|
||||
let chunk_tokens = approx_token_count(chunk);
|
||||
assert!(chunk_tokens > 0, "chunk must consume tokens");
|
||||
let limit = chunk_tokens * 3;
|
||||
let t1 = chunk.to_string();
|
||||
let t2 = chunk.to_string();
|
||||
let t3 = chunk.repeat(10);
|
||||
let t4 = chunk.to_string();
|
||||
let t5 = chunk.to_string();
|
||||
|
||||
let items = vec![
|
||||
FunctionCallOutputContentItem::InputText { text: t1.clone() },
|
||||
FunctionCallOutputContentItem::InputText { text: t2.clone() },
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "img:mid".to_string(),
|
||||
},
|
||||
FunctionCallOutputContentItem::InputText { text: t3 },
|
||||
FunctionCallOutputContentItem::InputText { text: t4 },
|
||||
FunctionCallOutputContentItem::InputText { text: t5 },
|
||||
];
|
||||
|
||||
let output =
|
||||
truncate_function_output_items_with_policy(&items, TruncationPolicy::Tokens(limit));
|
||||
|
||||
// Expect: t1 (full), t2 (full), image, t3 (truncated), summary mentioning 2 omitted.
|
||||
assert_eq!(output.len(), 5);
|
||||
|
||||
let first_text = match &output[0] {
|
||||
FunctionCallOutputContentItem::InputText { text } => text,
|
||||
other => panic!("unexpected first item: {other:?}"),
|
||||
};
|
||||
assert_eq!(first_text, &t1);
|
||||
|
||||
let second_text = match &output[1] {
|
||||
FunctionCallOutputContentItem::InputText { text } => text,
|
||||
other => panic!("unexpected second item: {other:?}"),
|
||||
};
|
||||
assert_eq!(second_text, &t2);
|
||||
|
||||
assert_eq!(
|
||||
output[2],
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "img:mid".to_string()
|
||||
}
|
||||
);
|
||||
|
||||
let fourth_text = match &output[3] {
|
||||
FunctionCallOutputContentItem::InputText { text } => text,
|
||||
other => panic!("unexpected fourth item: {other:?}"),
|
||||
};
|
||||
assert!(
|
||||
fourth_text.contains("tokens truncated"),
|
||||
"expected marker in truncated snippet: {fourth_text}"
|
||||
);
|
||||
|
||||
let summary_text = match &output[4] {
|
||||
FunctionCallOutputContentItem::InputText { text } => text,
|
||||
other => panic!("unexpected summary item: {other:?}"),
|
||||
};
|
||||
assert!(summary_text.contains("omitted 2 text items"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -45,6 +45,7 @@ pub(crate) const MIN_YIELD_TIME_MS: u64 = 250;
|
||||
pub(crate) const MAX_YIELD_TIME_MS: u64 = 30_000;
|
||||
pub(crate) const DEFAULT_MAX_OUTPUT_TOKENS: usize = 10_000;
|
||||
pub(crate) const UNIFIED_EXEC_OUTPUT_MAX_BYTES: usize = 1024 * 1024; // 1 MiB
|
||||
pub(crate) const UNIFIED_EXEC_OUTPUT_MAX_TOKENS: usize = UNIFIED_EXEC_OUTPUT_MAX_BYTES / 4;
|
||||
|
||||
pub(crate) struct UnifiedExecContext {
|
||||
pub session: Arc<Session>,
|
||||
@@ -124,37 +125,6 @@ pub(crate) fn generate_chunk_id() -> String {
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub(crate) fn truncate_output_to_tokens(
|
||||
output: &str,
|
||||
max_tokens: usize,
|
||||
) -> (String, Option<usize>) {
|
||||
if max_tokens == 0 {
|
||||
let total_tokens = output.chars().count();
|
||||
let message = format!("…{total_tokens} tokens truncated…");
|
||||
return (message, Some(total_tokens));
|
||||
}
|
||||
|
||||
let tokens: Vec<char> = output.chars().collect();
|
||||
let total_tokens = tokens.len();
|
||||
if total_tokens <= max_tokens {
|
||||
return (output.to_string(), None);
|
||||
}
|
||||
|
||||
let half = max_tokens / 2;
|
||||
if half == 0 {
|
||||
let truncated = total_tokens.saturating_sub(max_tokens);
|
||||
let message = format!("…{truncated} tokens truncated…");
|
||||
return (message, Some(total_tokens));
|
||||
}
|
||||
|
||||
let truncated = total_tokens.saturating_sub(half * 2);
|
||||
let mut truncated_output = String::new();
|
||||
truncated_output.extend(&tokens[..half]);
|
||||
truncated_output.push_str(&format!("…{truncated} tokens truncated…"));
|
||||
truncated_output.extend(&tokens[total_tokens - half..]);
|
||||
(truncated_output, Some(total_tokens))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[cfg(unix)]
|
||||
mod tests {
|
||||
|
||||
@@ -14,11 +14,13 @@ use crate::exec::ExecToolCallOutput;
|
||||
use crate::exec::SandboxType;
|
||||
use crate::exec::StreamOutput;
|
||||
use crate::exec::is_likely_sandbox_denied;
|
||||
use crate::truncate::truncate_middle;
|
||||
use crate::truncate::TruncationPolicy;
|
||||
use crate::truncate::truncate_text;
|
||||
use codex_utils_pty::ExecCommandSession;
|
||||
use codex_utils_pty::SpawnedPty;
|
||||
|
||||
use super::UNIFIED_EXEC_OUTPUT_MAX_BYTES;
|
||||
use super::UNIFIED_EXEC_OUTPUT_MAX_TOKENS;
|
||||
use super::UnifiedExecError;
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
@@ -165,7 +167,10 @@ impl UnifiedExecSession {
|
||||
};
|
||||
|
||||
if is_likely_sandbox_denied(self.sandbox_type(), &exec_output) {
|
||||
let (snippet, _) = truncate_middle(&aggregated_text, UNIFIED_EXEC_OUTPUT_MAX_BYTES);
|
||||
let snippet = truncate_text(
|
||||
&aggregated_text,
|
||||
TruncationPolicy::Tokens(UNIFIED_EXEC_OUTPUT_MAX_TOKENS),
|
||||
);
|
||||
let message = if snippet.is_empty() {
|
||||
format!("exit code {exit_code}")
|
||||
} else {
|
||||
|
||||
@@ -23,6 +23,9 @@ use crate::tools::orchestrator::ToolOrchestrator;
|
||||
use crate::tools::runtimes::unified_exec::UnifiedExecRequest as UnifiedExecToolRequest;
|
||||
use crate::tools::runtimes::unified_exec::UnifiedExecRuntime;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
use crate::truncate::TruncationPolicy;
|
||||
use crate::truncate::approx_token_count;
|
||||
use crate::truncate::truncate_text;
|
||||
|
||||
use super::ExecCommandRequest;
|
||||
use super::SessionEntry;
|
||||
@@ -36,7 +39,6 @@ use super::generate_chunk_id;
|
||||
use super::resolve_max_tokens;
|
||||
use super::session::OutputBuffer;
|
||||
use super::session::UnifiedExecSession;
|
||||
use super::truncate_output_to_tokens;
|
||||
|
||||
impl UnifiedExecSessionManager {
|
||||
pub(crate) async fn exec_command(
|
||||
@@ -70,7 +72,7 @@ impl UnifiedExecSessionManager {
|
||||
let wall_time = Instant::now().saturating_duration_since(start);
|
||||
|
||||
let text = String::from_utf8_lossy(&collected).to_string();
|
||||
let (output, original_token_count) = truncate_output_to_tokens(&text, max_tokens);
|
||||
let output = truncate_text(&text, TruncationPolicy::Tokens(max_tokens));
|
||||
let chunk_id = generate_chunk_id();
|
||||
let has_exited = session.has_exited();
|
||||
let stored_id = self
|
||||
@@ -85,6 +87,8 @@ impl UnifiedExecSessionManager {
|
||||
// Only include a session_id in the response if the process is still alive.
|
||||
let session_id = if has_exited { None } else { Some(stored_id) };
|
||||
|
||||
let original_token_count = approx_token_count(&text);
|
||||
|
||||
let response = UnifiedExecResponse {
|
||||
event_call_id: context.call_id.clone(),
|
||||
chunk_id,
|
||||
@@ -92,7 +96,7 @@ impl UnifiedExecSessionManager {
|
||||
output,
|
||||
session_id,
|
||||
exit_code: exit_code.flatten(),
|
||||
original_token_count,
|
||||
original_token_count: Some(original_token_count),
|
||||
session_command: Some(request.command.clone()),
|
||||
};
|
||||
|
||||
@@ -175,7 +179,8 @@ impl UnifiedExecSessionManager {
|
||||
let wall_time = Instant::now().saturating_duration_since(start);
|
||||
|
||||
let text = String::from_utf8_lossy(&collected).to_string();
|
||||
let (output, original_token_count) = truncate_output_to_tokens(&text, max_tokens);
|
||||
let output = truncate_text(&text, TruncationPolicy::Tokens(max_tokens));
|
||||
let original_token_count = approx_token_count(&text);
|
||||
let chunk_id = generate_chunk_id();
|
||||
|
||||
let status = self.refresh_session_state(session_id).await;
|
||||
@@ -199,7 +204,7 @@ impl UnifiedExecSessionManager {
|
||||
output,
|
||||
session_id,
|
||||
exit_code,
|
||||
original_token_count,
|
||||
original_token_count: Some(original_token_count),
|
||||
session_command: Some(session_command.clone()),
|
||||
};
|
||||
|
||||
|
||||
1
codex-rs/core/templates/compact/summary_prefix.md
Normal file
1
codex-rs/core/templates/compact/summary_prefix.md
Normal file
@@ -0,0 +1 @@
|
||||
Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:
|
||||
14
codex-rs/core/templates/parallel/instructions.md
Normal file
14
codex-rs/core/templates/parallel/instructions.md
Normal file
@@ -0,0 +1,14 @@
|
||||
## Exploration and reading files
|
||||
|
||||
- **Think first.** Before any tool call, decide ALL files/resources you will need.
|
||||
- **Batch everything.** If you need multiple files (even from different places), read them together.
|
||||
- **multi_tool_use.parallel** Use `multi_tool_use.parallel` to parallelize tool calls and only this.
|
||||
- **Only make sequential calls if you truly cannot know the next file without seeing a result first.**
|
||||
- **Workflow:** (a) plan all needed reads → (b) issue one parallel batch → (c) analyze results → (d) repeat if new, unpredictable reads arise.
|
||||
|
||||
**Additional notes**:
|
||||
* Always maximize parallelism. Never read files one-by-one unless logically unavoidable.
|
||||
* This concern every read/list/search operations including, but not only, `cat`, `rg`, `sed`, `ls`, `git show`, `nl`, `wc`, ...
|
||||
* Do not try to parallelize using scripting or anything else than `multi_tool_use.parallel`.
|
||||
|
||||
## Editing constraints
|
||||
@@ -499,6 +499,14 @@ fn base_mock() -> (MockBuilder, ResponseMock) {
|
||||
(mock, response_mock)
|
||||
}
|
||||
|
||||
fn compact_mock() -> (MockBuilder, ResponseMock) {
|
||||
let response_mock = ResponseMock::new();
|
||||
let mock = Mock::given(method("POST"))
|
||||
.and(path_regex(".*/responses/compact$"))
|
||||
.and(response_mock.clone());
|
||||
(mock, response_mock)
|
||||
}
|
||||
|
||||
pub async fn mount_sse_once_match<M>(server: &MockServer, matcher: M, body: String) -> ResponseMock
|
||||
where
|
||||
M: wiremock::Match + Send + Sync + 'static,
|
||||
@@ -521,6 +529,40 @@ pub async fn mount_sse_once(server: &MockServer, body: String) -> ResponseMock {
|
||||
response_mock
|
||||
}
|
||||
|
||||
pub async fn mount_compact_json_once_match<M>(
|
||||
server: &MockServer,
|
||||
matcher: M,
|
||||
body: serde_json::Value,
|
||||
) -> ResponseMock
|
||||
where
|
||||
M: wiremock::Match + Send + Sync + 'static,
|
||||
{
|
||||
let (mock, response_mock) = compact_mock();
|
||||
mock.and(matcher)
|
||||
.respond_with(
|
||||
ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "application/json")
|
||||
.set_body_json(body.clone()),
|
||||
)
|
||||
.up_to_n_times(1)
|
||||
.mount(server)
|
||||
.await;
|
||||
response_mock
|
||||
}
|
||||
|
||||
pub async fn mount_compact_json_once(server: &MockServer, body: serde_json::Value) -> ResponseMock {
|
||||
let (mock, response_mock) = compact_mock();
|
||||
mock.respond_with(
|
||||
ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "application/json")
|
||||
.set_body_json(body.clone()),
|
||||
)
|
||||
.up_to_n_times(1)
|
||||
.mount(server)
|
||||
.await;
|
||||
response_mock
|
||||
}
|
||||
|
||||
pub async fn start_mock_server() -> MockServer {
|
||||
MockServer::builder()
|
||||
.body_print_limit(BodyPrintLimit::Limited(80_000))
|
||||
|
||||
@@ -38,8 +38,18 @@ pub enum ApplyPatchModelOutput {
|
||||
ShellViaHeredoc,
|
||||
}
|
||||
|
||||
/// A collection of different ways the model can output an apply_patch call
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
pub enum ShellModelOutput {
|
||||
Shell,
|
||||
ShellCommand,
|
||||
LocalShell,
|
||||
// UnifiedExec has its own set of tests
|
||||
}
|
||||
|
||||
pub struct TestCodexBuilder {
|
||||
config_mutators: Vec<Box<ConfigMutator>>,
|
||||
auth: CodexAuth,
|
||||
}
|
||||
|
||||
impl TestCodexBuilder {
|
||||
@@ -51,6 +61,11 @@ impl TestCodexBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_auth(mut self, auth: CodexAuth) -> Self {
|
||||
self.auth = auth;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_model(self, model: &str) -> Self {
|
||||
let new_model = model.to_string();
|
||||
self.with_config(move |config| {
|
||||
@@ -81,13 +96,12 @@ impl TestCodexBuilder {
|
||||
) -> anyhow::Result<TestCodex> {
|
||||
let (config, cwd) = self.prepare_config(server, &home).await?;
|
||||
|
||||
let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
|
||||
let auth = self.auth.clone();
|
||||
let conversation_manager = ConversationManager::with_auth(auth.clone());
|
||||
|
||||
let new_conversation = match resume_from {
|
||||
Some(path) => {
|
||||
let auth_manager = codex_core::AuthManager::from_auth_for_testing(
|
||||
CodexAuth::from_api_key("dummy"),
|
||||
);
|
||||
let auth_manager = codex_core::AuthManager::from_auth_for_testing(auth);
|
||||
conversation_manager
|
||||
.resume_conversation_from_rollout(config.clone(), path, auth_manager)
|
||||
.await?
|
||||
@@ -336,5 +350,6 @@ fn function_call_output<'a>(bodies: &'a [Value], call_id: &str) -> &'a Value {
|
||||
pub fn test_codex() -> TestCodexBuilder {
|
||||
TestCodexBuilder {
|
||||
config_mutators: vec![],
|
||||
auth: CodexAuth::from_api_key("dummy"),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -29,7 +29,7 @@ use core_test_support::wait_for_event;
|
||||
use serde_json::json;
|
||||
use test_case::test_case;
|
||||
|
||||
async fn apply_patch_harness() -> Result<TestCodexHarness> {
|
||||
pub async fn apply_patch_harness() -> Result<TestCodexHarness> {
|
||||
apply_patch_harness_with(|_| {}).await
|
||||
}
|
||||
|
||||
@@ -43,7 +43,7 @@ async fn apply_patch_harness_with(
|
||||
.await
|
||||
}
|
||||
|
||||
async fn mount_apply_patch(
|
||||
pub async fn mount_apply_patch(
|
||||
harness: &TestCodexHarness,
|
||||
call_id: &str,
|
||||
patch: &str,
|
||||
@@ -87,8 +87,8 @@ async fn apply_patch_cli_multiple_operations_integration(
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = apply_patch_harness_with(|config| {
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is valid");
|
||||
config.model = "gpt-5.1".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is valid");
|
||||
})
|
||||
.await?;
|
||||
|
||||
@@ -671,8 +671,8 @@ async fn apply_patch_shell_heredoc_with_cd_updates_relative_workdir() -> Result<
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = apply_patch_harness_with(|config| {
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is valid");
|
||||
config.model = "gpt-5.1".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is valid");
|
||||
})
|
||||
.await?;
|
||||
|
||||
@@ -717,8 +717,8 @@ async fn apply_patch_shell_failure_propagates_error_and_skips_diff() -> Result<(
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = apply_patch_harness_with(|config| {
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is valid");
|
||||
config.model = "gpt-5.1".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is valid");
|
||||
})
|
||||
.await?;
|
||||
let test = harness.test();
|
||||
|
||||
@@ -240,6 +240,10 @@ enum Expectation {
|
||||
target: TargetPath,
|
||||
content: &'static str,
|
||||
},
|
||||
FileCreatedNoExitCode {
|
||||
target: TargetPath,
|
||||
content: &'static str,
|
||||
},
|
||||
PatchApplied {
|
||||
target: TargetPath,
|
||||
content: &'static str,
|
||||
@@ -251,12 +255,18 @@ enum Expectation {
|
||||
NetworkSuccess {
|
||||
body_contains: &'static str,
|
||||
},
|
||||
NetworkSuccessNoExitCode {
|
||||
body_contains: &'static str,
|
||||
},
|
||||
NetworkFailure {
|
||||
expect_tag: &'static str,
|
||||
},
|
||||
CommandSuccess {
|
||||
stdout_contains: &'static str,
|
||||
},
|
||||
CommandSuccessNoExitCode {
|
||||
stdout_contains: &'static str,
|
||||
},
|
||||
CommandFailure {
|
||||
output_contains: &'static str,
|
||||
},
|
||||
@@ -270,8 +280,7 @@ impl Expectation {
|
||||
assert_eq!(
|
||||
result.exit_code,
|
||||
Some(0),
|
||||
"expected successful exit for {:?}",
|
||||
path
|
||||
"expected successful exit for {path:?}"
|
||||
);
|
||||
assert!(
|
||||
result.stdout.contains(content),
|
||||
@@ -285,6 +294,21 @@ impl Expectation {
|
||||
);
|
||||
let _ = fs::remove_file(path);
|
||||
}
|
||||
Expectation::FileCreatedNoExitCode { target, content } => {
|
||||
let (path, _) = target.resolve_for_patch(test);
|
||||
assert_eq!(result.exit_code, None, "expected no exit code for {path:?}");
|
||||
assert!(
|
||||
result.stdout.contains(content),
|
||||
"stdout missing {content:?}: {}",
|
||||
result.stdout
|
||||
);
|
||||
let file_contents = fs::read_to_string(&path)?;
|
||||
assert!(
|
||||
file_contents.contains(content),
|
||||
"file contents missing {content:?}: {file_contents}"
|
||||
);
|
||||
let _ = fs::remove_file(path);
|
||||
}
|
||||
Expectation::PatchApplied { target, content } => {
|
||||
let (path, _) = target.resolve_for_patch(test);
|
||||
match result.exit_code {
|
||||
@@ -360,6 +384,23 @@ impl Expectation {
|
||||
result.stdout
|
||||
);
|
||||
}
|
||||
Expectation::NetworkSuccessNoExitCode { body_contains } => {
|
||||
assert_eq!(
|
||||
result.exit_code, None,
|
||||
"expected no exit code for successful network call: {}",
|
||||
result.stdout
|
||||
);
|
||||
assert!(
|
||||
result.stdout.contains("OK:"),
|
||||
"stdout missing OK prefix: {}",
|
||||
result.stdout
|
||||
);
|
||||
assert!(
|
||||
result.stdout.contains(body_contains),
|
||||
"stdout missing body text {body_contains:?}: {}",
|
||||
result.stdout
|
||||
);
|
||||
}
|
||||
Expectation::NetworkFailure { expect_tag } => {
|
||||
assert_ne!(
|
||||
result.exit_code,
|
||||
@@ -391,6 +432,18 @@ impl Expectation {
|
||||
result.stdout
|
||||
);
|
||||
}
|
||||
Expectation::CommandSuccessNoExitCode { stdout_contains } => {
|
||||
assert_eq!(
|
||||
result.exit_code, None,
|
||||
"expected no exit code for trusted command: {}",
|
||||
result.stdout
|
||||
);
|
||||
assert!(
|
||||
result.stdout.contains(stdout_contains),
|
||||
"trusted command stdout missing {stdout_contains:?}: {}",
|
||||
result.stdout
|
||||
);
|
||||
}
|
||||
Expectation::CommandFailure { output_contains } => {
|
||||
assert_ne!(
|
||||
result.exit_code,
|
||||
@@ -588,13 +641,30 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileCreated {
|
||||
target: TargetPath::OutsideWorkspace("dfa_on_request.txt"),
|
||||
content: "danger-on-request",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "danger_full_access_on_request_allows_outside_write_gpt_5_1_no_exit",
|
||||
approval_policy: OnRequest,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
action: ActionKind::WriteFile {
|
||||
target: TargetPath::OutsideWorkspace("dfa_on_request_5_1.txt"),
|
||||
content: "danger-on-request",
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileCreatedNoExitCode {
|
||||
target: TargetPath::OutsideWorkspace("dfa_on_request_5_1.txt"),
|
||||
content: "danger-on-request",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "danger_full_access_on_request_allows_network",
|
||||
approval_policy: OnRequest,
|
||||
@@ -605,12 +675,28 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::NetworkSuccess {
|
||||
body_contains: "danger-network-ok",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "danger_full_access_on_request_allows_network_gpt_5_1_no_exit",
|
||||
approval_policy: OnRequest,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
action: ActionKind::FetchUrl {
|
||||
endpoint: "/dfa/network",
|
||||
response_body: "danger-network-ok",
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::NetworkSuccessNoExitCode {
|
||||
body_contains: "danger-network-ok",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "trusted_command_unless_trusted_runs_without_prompt",
|
||||
approval_policy: UnlessTrusted,
|
||||
@@ -620,12 +706,27 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::CommandSuccess {
|
||||
stdout_contains: "trusted-unless",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "trusted_command_unless_trusted_runs_without_prompt_gpt_5_1_no_exit",
|
||||
approval_policy: UnlessTrusted,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
action: ActionKind::RunCommand {
|
||||
command: &["echo", "trusted-unless"],
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::CommandSuccessNoExitCode {
|
||||
stdout_contains: "trusted-unless",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "danger_full_access_on_failure_allows_outside_write",
|
||||
approval_policy: OnFailure,
|
||||
@@ -636,13 +737,30 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileCreated {
|
||||
target: TargetPath::OutsideWorkspace("dfa_on_failure.txt"),
|
||||
content: "danger-on-failure",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "danger_full_access_on_failure_allows_outside_write_gpt_5_1_no_exit",
|
||||
approval_policy: OnFailure,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
action: ActionKind::WriteFile {
|
||||
target: TargetPath::OutsideWorkspace("dfa_on_failure_5_1.txt"),
|
||||
content: "danger-on-failure",
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileCreatedNoExitCode {
|
||||
target: TargetPath::OutsideWorkspace("dfa_on_failure_5_1.txt"),
|
||||
content: "danger-on-failure",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "danger_full_access_unless_trusted_requests_approval",
|
||||
approval_policy: UnlessTrusted,
|
||||
@@ -653,7 +771,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -663,6 +781,26 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
content: "danger-unless-trusted",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "danger_full_access_unless_trusted_requests_approval_gpt_5_1_no_exit",
|
||||
approval_policy: UnlessTrusted,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
action: ActionKind::WriteFile {
|
||||
target: TargetPath::OutsideWorkspace("dfa_unless_trusted_5_1.txt"),
|
||||
content: "danger-unless-trusted",
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
},
|
||||
expectation: Expectation::FileCreatedNoExitCode {
|
||||
target: TargetPath::OutsideWorkspace("dfa_unless_trusted_5_1.txt"),
|
||||
content: "danger-unless-trusted",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "danger_full_access_never_allows_outside_write",
|
||||
approval_policy: Never,
|
||||
@@ -673,13 +811,30 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileCreated {
|
||||
target: TargetPath::OutsideWorkspace("dfa_never.txt"),
|
||||
content: "danger-never",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "danger_full_access_never_allows_outside_write_gpt_5_1_no_exit",
|
||||
approval_policy: Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
action: ActionKind::WriteFile {
|
||||
target: TargetPath::OutsideWorkspace("dfa_never_5_1.txt"),
|
||||
content: "danger-never",
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileCreatedNoExitCode {
|
||||
target: TargetPath::OutsideWorkspace("dfa_never_5_1.txt"),
|
||||
content: "danger-never",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "read_only_on_request_requires_approval",
|
||||
approval_policy: OnRequest,
|
||||
@@ -690,7 +845,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: true,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -700,6 +855,26 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
content: "read-only-approval",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "read_only_on_request_requires_approval_gpt_5_1_no_exit",
|
||||
approval_policy: OnRequest,
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
action: ActionKind::WriteFile {
|
||||
target: TargetPath::Workspace("ro_on_request_5_1.txt"),
|
||||
content: "read-only-approval",
|
||||
},
|
||||
with_escalated_permissions: true,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
},
|
||||
expectation: Expectation::FileCreatedNoExitCode {
|
||||
target: TargetPath::Workspace("ro_on_request_5_1.txt"),
|
||||
content: "read-only-approval",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "trusted_command_on_request_read_only_runs_without_prompt",
|
||||
approval_policy: OnRequest,
|
||||
@@ -709,12 +884,27 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::CommandSuccess {
|
||||
stdout_contains: "trusted-read-only",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "trusted_command_on_request_read_only_runs_without_prompt_gpt_5_1_no_exit",
|
||||
approval_policy: OnRequest,
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
action: ActionKind::RunCommand {
|
||||
command: &["echo", "trusted-read-only"],
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::CommandSuccessNoExitCode {
|
||||
stdout_contains: "trusted-read-only",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "read_only_on_request_blocks_network",
|
||||
approval_policy: OnRequest,
|
||||
@@ -760,7 +950,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: Some("command failed; retry without sandbox?"),
|
||||
@@ -770,6 +960,27 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
content: "read-only-on-failure",
|
||||
},
|
||||
},
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
ScenarioSpec {
|
||||
name: "read_only_on_failure_escalates_after_sandbox_error_gpt_5_1_no_exit",
|
||||
approval_policy: OnFailure,
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
action: ActionKind::WriteFile {
|
||||
target: TargetPath::Workspace("ro_on_failure_5_1.txt"),
|
||||
content: "read-only-on-failure",
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: Some("command failed; retry without sandbox?"),
|
||||
},
|
||||
expectation: Expectation::FileCreatedNoExitCode {
|
||||
target: TargetPath::Workspace("ro_on_failure_5_1.txt"),
|
||||
content: "read-only-on-failure",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "read_only_on_request_network_escalates_when_approved",
|
||||
approval_policy: OnRequest,
|
||||
@@ -780,7 +991,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: true,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -789,6 +1000,25 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
body_contains: "read-only-network-ok",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "read_only_on_request_network_escalates_when_approved_gpt_5_1_no_exit",
|
||||
approval_policy: OnRequest,
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
action: ActionKind::FetchUrl {
|
||||
endpoint: "/ro/network-approved",
|
||||
response_body: "read-only-network-ok",
|
||||
},
|
||||
with_escalated_permissions: true,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
},
|
||||
expectation: Expectation::NetworkSuccessNoExitCode {
|
||||
body_contains: "read-only-network-ok",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "apply_patch_shell_requires_patch_approval",
|
||||
approval_policy: UnlessTrusted,
|
||||
@@ -819,7 +1049,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5-codex"),
|
||||
model_override: Some("gpt-5.1-codex"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::PatchApplied {
|
||||
target: TargetPath::Workspace("apply_patch_function.txt"),
|
||||
@@ -836,7 +1066,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![Feature::ApplyPatchFreeform],
|
||||
model_override: Some("gpt-5-codex"),
|
||||
model_override: Some("gpt-5.1-codex"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::PatchApplied {
|
||||
target: TargetPath::OutsideWorkspace("apply_patch_function_danger.txt"),
|
||||
@@ -853,7 +1083,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5-codex"),
|
||||
model_override: Some("gpt-5.1-codex"),
|
||||
outcome: Outcome::PatchApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -873,7 +1103,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5-codex"),
|
||||
model_override: Some("gpt-5.1-codex"),
|
||||
outcome: Outcome::PatchApproval {
|
||||
decision: ReviewDecision::Denied,
|
||||
expected_reason: None,
|
||||
@@ -913,7 +1143,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5-codex"),
|
||||
model_override: Some("gpt-5.1-codex"),
|
||||
outcome: Outcome::PatchApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -933,7 +1163,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5-codex"),
|
||||
model_override: Some("gpt-5.1-codex"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileNotCreated {
|
||||
target: TargetPath::OutsideWorkspace("apply_patch_function_never.txt"),
|
||||
@@ -952,7 +1182,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -962,6 +1192,26 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
content: "read-only-unless-trusted",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "read_only_unless_trusted_requires_approval_gpt_5_1_no_exit",
|
||||
approval_policy: UnlessTrusted,
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
action: ActionKind::WriteFile {
|
||||
target: TargetPath::Workspace("ro_unless_trusted_5_1.txt"),
|
||||
content: "read-only-unless-trusted",
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
},
|
||||
expectation: Expectation::FileCreatedNoExitCode {
|
||||
target: TargetPath::Workspace("ro_unless_trusted_5_1.txt"),
|
||||
content: "read-only-unless-trusted",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "read_only_never_reports_sandbox_failure",
|
||||
approval_policy: Never,
|
||||
@@ -992,7 +1242,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::CommandSuccess {
|
||||
stdout_contains: "trusted-never",
|
||||
@@ -1008,7 +1258,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileCreated {
|
||||
target: TargetPath::Workspace("ww_on_request.txt"),
|
||||
@@ -1039,7 +1289,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: true,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -1059,7 +1309,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::NetworkSuccess {
|
||||
body_contains: "workspace-network-ok",
|
||||
@@ -1076,7 +1326,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: Some("command failed; retry without sandbox?"),
|
||||
@@ -1096,7 +1346,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -1137,7 +1387,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![Feature::UnifiedExec],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::CommandSuccess {
|
||||
stdout_contains: "hello unified exec",
|
||||
@@ -1155,7 +1405,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: true,
|
||||
features: vec![Feature::UnifiedExec],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: Some(DEFAULT_UNIFIED_EXEC_JUSTIFICATION),
|
||||
@@ -1208,7 +1458,7 @@ async fn run_scenario(scenario: &ScenarioSpec) -> Result<()> {
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.approval_policy = approval_policy;
|
||||
config.sandbox_policy = sandbox_policy.clone();
|
||||
let model = model_override.unwrap_or("gpt-5");
|
||||
let model = model_override.unwrap_or("gpt-5.1");
|
||||
config.model = model.to_string();
|
||||
config.model_family =
|
||||
find_family_for_model(model).expect("model should map to a known family");
|
||||
|
||||
@@ -769,7 +769,7 @@ async fn configured_verbosity_not_sent_for_models_without_support() -> anyhow::R
|
||||
|
||||
let resp_mock = responses::mount_sse_once(&server, sse_completed("resp1")).await;
|
||||
let TestCodex { codex, .. } = test_codex()
|
||||
.with_model("gpt-5-codex")
|
||||
.with_model("gpt-5.1-codex")
|
||||
.with_config(|config| {
|
||||
config.model_verbosity = Some(Verbosity::High);
|
||||
})
|
||||
@@ -807,7 +807,7 @@ async fn configured_verbosity_is_sent() -> anyhow::Result<()> {
|
||||
|
||||
let resp_mock = responses::mount_sse_once(&server, sse_completed("resp1")).await;
|
||||
let TestCodex { codex, .. } = test_codex()
|
||||
.with_model("gpt-5")
|
||||
.with_model("gpt-5.1")
|
||||
.with_config(|config| {
|
||||
config.model_verbosity = Some(Verbosity::High);
|
||||
})
|
||||
@@ -1155,7 +1155,7 @@ async fn token_count_includes_rate_limits_snapshot() {
|
||||
"reasoning_output_tokens": 0,
|
||||
"total_tokens": 123
|
||||
},
|
||||
// Default model is gpt-5-codex in tests → 95% usable context window
|
||||
// Default model is gpt-5.1-codex in tests → 95% usable context window
|
||||
"model_context_window": 258400
|
||||
},
|
||||
"rate_limits": {
|
||||
@@ -1304,8 +1304,9 @@ async fn context_window_error_sets_total_tokens_to_model_window() -> anyhow::Res
|
||||
|
||||
let TestCodex { codex, .. } = test_codex()
|
||||
.with_config(|config| {
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("known gpt-5 model family");
|
||||
config.model = "gpt-5.1".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5.1").expect("known gpt-5.1 model family");
|
||||
config.model_context_window = Some(272_000);
|
||||
})
|
||||
.build(&server)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
217
codex-rs/core/tests/suite/compact_remote.rs
Normal file
217
codex-rs/core/tests/suite/compact_remote.rs
Normal file
@@ -0,0 +1,217 @@
|
||||
#![allow(clippy::expect_used)]
|
||||
|
||||
use std::fs;
|
||||
|
||||
use anyhow::Result;
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::features::Feature;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::RolloutItem;
|
||||
use codex_core::protocol::RolloutLine;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::responses;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex::TestCodexHarness;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn remote_compact_replaces_history_for_followups() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = TestCodexHarness::with_builder(
|
||||
test_codex()
|
||||
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
|
||||
.with_config(|config| {
|
||||
config.features.enable(Feature::RemoteCompaction);
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
let codex = harness.test().codex.clone();
|
||||
|
||||
let responses_mock = responses::mount_sse_sequence(
|
||||
harness.server(),
|
||||
vec![
|
||||
responses::sse(vec![
|
||||
responses::ev_assistant_message("m1", "FIRST_REMOTE_REPLY"),
|
||||
responses::ev_completed("resp-1"),
|
||||
]),
|
||||
responses::sse(vec![
|
||||
responses::ev_assistant_message("m2", "AFTER_COMPACT_REPLY"),
|
||||
responses::ev_completed("resp-2"),
|
||||
]),
|
||||
],
|
||||
)
|
||||
.await;
|
||||
|
||||
let compacted_history = vec![ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputText {
|
||||
text: "REMOTE_COMPACTED_SUMMARY".to_string(),
|
||||
}],
|
||||
}];
|
||||
let compact_mock = responses::mount_compact_json_once(
|
||||
harness.server(),
|
||||
serde_json::json!({ "output": compacted_history.clone() }),
|
||||
)
|
||||
.await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "hello remote compact".into(),
|
||||
}],
|
||||
})
|
||||
.await?;
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
codex.submit(Op::Compact).await?;
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "after compact".into(),
|
||||
}],
|
||||
})
|
||||
.await?;
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let compact_request = compact_mock.single_request();
|
||||
assert_eq!(compact_request.path(), "/v1/responses/compact");
|
||||
assert_eq!(
|
||||
compact_request.header("chatgpt-account-id").as_deref(),
|
||||
Some("account_id")
|
||||
);
|
||||
assert_eq!(
|
||||
compact_request.header("authorization").as_deref(),
|
||||
Some("Bearer Access Token")
|
||||
);
|
||||
let compact_body = compact_request.body_json();
|
||||
assert_eq!(
|
||||
compact_body.get("model").and_then(|v| v.as_str()),
|
||||
Some(harness.test().session_configured.model.as_str())
|
||||
);
|
||||
let compact_body_text = compact_body.to_string();
|
||||
assert!(
|
||||
compact_body_text.contains("hello remote compact"),
|
||||
"expected compact request to include user history"
|
||||
);
|
||||
assert!(
|
||||
compact_body_text.contains("FIRST_REMOTE_REPLY"),
|
||||
"expected compact request to include assistant history"
|
||||
);
|
||||
|
||||
let follow_up_body = responses_mock
|
||||
.requests()
|
||||
.last()
|
||||
.expect("follow-up request missing")
|
||||
.body_json()
|
||||
.to_string();
|
||||
assert!(
|
||||
follow_up_body.contains("REMOTE_COMPACTED_SUMMARY"),
|
||||
"expected follow-up request to use compacted history"
|
||||
);
|
||||
assert!(
|
||||
!follow_up_body.contains("FIRST_REMOTE_REPLY"),
|
||||
"expected follow-up request to drop pre-compaction assistant messages"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn remote_compact_persists_replacement_history_in_rollout() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = TestCodexHarness::with_builder(
|
||||
test_codex()
|
||||
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
|
||||
.with_config(|config| {
|
||||
config.features.enable(Feature::RemoteCompaction);
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
let codex = harness.test().codex.clone();
|
||||
let rollout_path = harness.test().session_configured.rollout_path.clone();
|
||||
|
||||
let responses_mock = responses::mount_sse_once(
|
||||
harness.server(),
|
||||
responses::sse(vec![
|
||||
responses::ev_assistant_message("m1", "COMPACT_BASELINE_REPLY"),
|
||||
responses::ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let compacted_history = vec![
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputText {
|
||||
text: "COMPACTED_USER_SUMMARY".to_string(),
|
||||
}],
|
||||
},
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "assistant".to_string(),
|
||||
content: vec![ContentItem::OutputText {
|
||||
text: "COMPACTED_ASSISTANT_NOTE".to_string(),
|
||||
}],
|
||||
},
|
||||
];
|
||||
let compact_mock = responses::mount_compact_json_once(
|
||||
harness.server(),
|
||||
serde_json::json!({ "output": compacted_history.clone() }),
|
||||
)
|
||||
.await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "needs compaction".into(),
|
||||
}],
|
||||
})
|
||||
.await?;
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
codex.submit(Op::Compact).await?;
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
codex.submit(Op::Shutdown).await?;
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::ShutdownComplete)).await;
|
||||
|
||||
assert_eq!(responses_mock.requests().len(), 1);
|
||||
assert_eq!(compact_mock.requests().len(), 1);
|
||||
|
||||
let rollout_text = fs::read_to_string(&rollout_path)?;
|
||||
let mut saw_compacted_history = false;
|
||||
for line in rollout_text
|
||||
.lines()
|
||||
.map(str::trim)
|
||||
.filter(|l| !l.is_empty())
|
||||
{
|
||||
let Ok(entry) = serde_json::from_str::<RolloutLine>(line) else {
|
||||
continue;
|
||||
};
|
||||
if let RolloutItem::Compacted(compacted) = entry.item
|
||||
&& compacted.message.is_empty()
|
||||
&& compacted.replacement_history.as_ref() == Some(&compacted_history)
|
||||
{
|
||||
saw_compacted_history = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assert!(
|
||||
saw_compacted_history,
|
||||
"expected rollout to persist remote compaction history"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -10,15 +10,14 @@
|
||||
use super::compact::COMPACT_WARNING_MESSAGE;
|
||||
use super::compact::FIRST_REPLY;
|
||||
use super::compact::SUMMARY_TEXT;
|
||||
use super::compact::TEST_COMPACT_PROMPT;
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::CodexConversation;
|
||||
use codex_core::ConversationManager;
|
||||
use codex_core::ModelProviderInfo;
|
||||
use codex_core::NewConversation;
|
||||
use codex_core::built_in_model_providers;
|
||||
use codex_core::compact::SUMMARIZATION_PROMPT;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::config::OPENAI_DEFAULT_MODEL;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::WarningEvent;
|
||||
@@ -38,13 +37,22 @@ use tempfile::TempDir;
|
||||
use wiremock::MockServer;
|
||||
|
||||
const AFTER_SECOND_RESUME: &str = "AFTER_SECOND_RESUME";
|
||||
const COMPACT_PROMPT_MARKER: &str =
|
||||
"You are performing a CONTEXT CHECKPOINT COMPACTION for a tool.";
|
||||
|
||||
fn network_disabled() -> bool {
|
||||
std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok()
|
||||
}
|
||||
|
||||
fn body_contains_text(body: &str, text: &str) -> bool {
|
||||
body.contains(&json_fragment(text))
|
||||
}
|
||||
|
||||
fn json_fragment(text: &str) -> String {
|
||||
serde_json::to_string(text)
|
||||
.expect("serialize text to JSON")
|
||||
.trim_matches('"')
|
||||
.to_string()
|
||||
}
|
||||
|
||||
fn filter_out_ghost_snapshot_entries(items: &[Value]) -> Vec<Value> {
|
||||
items
|
||||
.iter()
|
||||
@@ -68,6 +76,14 @@ fn is_ghost_snapshot_message(item: &Value) -> bool {
|
||||
.is_some_and(|text| text.trim_start().starts_with("<ghost_snapshot>"))
|
||||
}
|
||||
|
||||
fn normalize_line_endings_str(text: &str) -> String {
|
||||
if text.contains('\r') {
|
||||
text.replace("\r\n", "\n").replace('\r', "\n")
|
||||
} else {
|
||||
text.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_summary_message(request: &Value, summary_text: &str) -> Value {
|
||||
request
|
||||
.get("input")
|
||||
@@ -82,13 +98,44 @@ fn extract_summary_message(request: &Value, summary_text: &str) -> Value {
|
||||
.and_then(|arr| arr.first())
|
||||
.and_then(|entry| entry.get("text"))
|
||||
.and_then(Value::as_str)
|
||||
== Some(summary_text)
|
||||
.map(|text| text.contains(summary_text))
|
||||
.unwrap_or(false)
|
||||
})
|
||||
})
|
||||
.cloned()
|
||||
.unwrap_or_else(|| panic!("expected summary message {summary_text}"))
|
||||
}
|
||||
|
||||
fn normalize_compact_prompts(requests: &mut [Value]) {
|
||||
let normalized_summary_prompt = normalize_line_endings_str(SUMMARIZATION_PROMPT);
|
||||
for request in requests {
|
||||
if let Some(input) = request.get_mut("input").and_then(Value::as_array_mut) {
|
||||
input.retain(|item| {
|
||||
if item.get("type").and_then(Value::as_str) != Some("message")
|
||||
|| item.get("role").and_then(Value::as_str) != Some("user")
|
||||
{
|
||||
return true;
|
||||
}
|
||||
let content = item
|
||||
.get("content")
|
||||
.and_then(Value::as_array)
|
||||
.cloned()
|
||||
.unwrap_or_default();
|
||||
if let Some(first) = content.first() {
|
||||
let text = first
|
||||
.get("text")
|
||||
.and_then(Value::as_str)
|
||||
.unwrap_or_default();
|
||||
let normalized_text = normalize_line_endings_str(text);
|
||||
!(text.is_empty() || normalized_text == normalized_summary_prompt)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
/// Scenario: compact an initial conversation, resume it, fork one turn back, and
|
||||
/// ensure the model-visible history matches expectations at each request.
|
||||
@@ -101,9 +148,10 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
|
||||
// 1. Arrange mocked SSE responses for the initial compact/resume/fork flow.
|
||||
let server = MockServer::start().await;
|
||||
mount_initial_flow(&server).await;
|
||||
|
||||
let expected_model = "gpt-5.1-codex";
|
||||
// 2. Start a new conversation and drive it through the compact/resume/fork steps.
|
||||
let (_home, config, manager, base) = start_test_conversation(&server).await;
|
||||
let (_home, config, manager, base) =
|
||||
start_test_conversation(&server, Some(expected_model)).await;
|
||||
|
||||
user_turn(&base, "hello world").await;
|
||||
compact_conversation(&base).await;
|
||||
@@ -126,7 +174,8 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
|
||||
user_turn(&forked, "AFTER_FORK").await;
|
||||
|
||||
// 3. Capture the requests to the model and validate the history slices.
|
||||
let requests = gather_request_bodies(&server).await;
|
||||
let mut requests = gather_request_bodies(&server).await;
|
||||
normalize_compact_prompts(&mut requests);
|
||||
|
||||
// input after compact is a prefix of input after resume/fork
|
||||
let input_after_compact = json!(requests[requests.len() - 3]["input"]);
|
||||
@@ -158,6 +207,10 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
|
||||
&fork_arr[..compact_arr.len()]
|
||||
);
|
||||
|
||||
let expected_model = requests[0]["model"]
|
||||
.as_str()
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
let prompt = requests[0]["instructions"]
|
||||
.as_str()
|
||||
.unwrap_or_default()
|
||||
@@ -179,7 +232,6 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
|
||||
.as_str()
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
let expected_model = OPENAI_DEFAULT_MODEL;
|
||||
let summary_after_compact = extract_summary_message(&requests[2], SUMMARY_TEXT);
|
||||
let summary_after_resume = extract_summary_message(&requests[3], SUMMARY_TEXT);
|
||||
let summary_after_fork = extract_summary_message(&requests[4], SUMMARY_TEXT);
|
||||
@@ -283,7 +335,7 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": TEST_COMPACT_PROMPT
|
||||
"text": SUMMARIZATION_PROMPT
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -529,6 +581,9 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
|
||||
user_turn_3_after_fork
|
||||
]);
|
||||
normalize_line_endings(&mut expected);
|
||||
if let Some(arr) = expected.as_array_mut() {
|
||||
normalize_compact_prompts(arr);
|
||||
}
|
||||
assert_eq!(requests.len(), 5);
|
||||
assert_eq!(json!(requests), expected);
|
||||
}
|
||||
@@ -548,7 +603,7 @@ async fn compact_resume_after_second_compaction_preserves_history() {
|
||||
mount_second_compact_flow(&server).await;
|
||||
|
||||
// 2. Drive the conversation through compact -> resume -> fork -> compact -> resume.
|
||||
let (_home, config, manager, base) = start_test_conversation(&server).await;
|
||||
let (_home, config, manager, base) = start_test_conversation(&server, None).await;
|
||||
|
||||
user_turn(&base, "hello world").await;
|
||||
compact_conversation(&base).await;
|
||||
@@ -581,7 +636,8 @@ async fn compact_resume_after_second_compaction_preserves_history() {
|
||||
let resumed_again = resume_conversation(&manager, &config, forked_path).await;
|
||||
user_turn(&resumed_again, AFTER_SECOND_RESUME).await;
|
||||
|
||||
let requests = gather_request_bodies(&server).await;
|
||||
let mut requests = gather_request_bodies(&server).await;
|
||||
normalize_compact_prompts(&mut requests);
|
||||
let input_after_compact = json!(requests[requests.len() - 2]["input"]);
|
||||
let input_after_resume = json!(requests[requests.len() - 1]["input"]);
|
||||
|
||||
@@ -680,10 +736,16 @@ async fn compact_resume_after_second_compaction_preserves_history() {
|
||||
}
|
||||
]);
|
||||
normalize_line_endings(&mut expected);
|
||||
let last_request_after_2_compacts = json!([{
|
||||
let mut last_request_after_2_compacts = json!([{
|
||||
"instructions": requests[requests.len() -1]["instructions"],
|
||||
"input": requests[requests.len() -1]["input"],
|
||||
}]);
|
||||
if let Some(arr) = expected.as_array_mut() {
|
||||
normalize_compact_prompts(arr);
|
||||
}
|
||||
if let Some(arr) = last_request_after_2_compacts.as_array_mut() {
|
||||
normalize_compact_prompts(arr);
|
||||
}
|
||||
assert_eq!(expected, last_request_after_2_compacts);
|
||||
}
|
||||
|
||||
@@ -741,7 +803,6 @@ async fn mount_initial_flow(server: &MockServer) {
|
||||
let match_first = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains("\"text\":\"hello world\"")
|
||||
&& !body.contains(COMPACT_PROMPT_MARKER)
|
||||
&& !body.contains(&format!("\"text\":\"{SUMMARY_TEXT}\""))
|
||||
&& !body.contains("\"text\":\"AFTER_COMPACT\"")
|
||||
&& !body.contains("\"text\":\"AFTER_RESUME\"")
|
||||
@@ -751,7 +812,7 @@ async fn mount_initial_flow(server: &MockServer) {
|
||||
|
||||
let match_compact = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(COMPACT_PROMPT_MARKER)
|
||||
body_contains_text(body, SUMMARIZATION_PROMPT) || body.contains(&json_fragment(FIRST_REPLY))
|
||||
};
|
||||
mount_sse_once_match(server, match_compact, sse2).await;
|
||||
|
||||
@@ -785,7 +846,7 @@ async fn mount_second_compact_flow(server: &MockServer) {
|
||||
|
||||
let match_second_compact = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(COMPACT_PROMPT_MARKER) && body.contains("AFTER_FORK")
|
||||
body.contains("AFTER_FORK")
|
||||
};
|
||||
mount_sse_once_match(server, match_second_compact, sse6).await;
|
||||
|
||||
@@ -798,6 +859,7 @@ async fn mount_second_compact_flow(server: &MockServer) {
|
||||
|
||||
async fn start_test_conversation(
|
||||
server: &MockServer,
|
||||
model: Option<&str>,
|
||||
) -> (TempDir, Config, ConversationManager, Arc<CodexConversation>) {
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
@@ -806,8 +868,10 @@ async fn start_test_conversation(
|
||||
let home = TempDir::new().expect("create temp dir");
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
config.model_provider = model_provider;
|
||||
config.compact_prompt = Some(TEST_COMPACT_PROMPT.to_string());
|
||||
|
||||
config.compact_prompt = Some(SUMMARIZATION_PROMPT.to_string());
|
||||
if let Some(model) = model {
|
||||
config.model = model.to_string();
|
||||
}
|
||||
let manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
|
||||
let NewConversation { conversation, .. } = manager
|
||||
.new_conversation(config.clone())
|
||||
|
||||
@@ -11,7 +11,7 @@ use std::collections::HashSet;
|
||||
use std::path::Path;
|
||||
use std::process::Command as StdCommand;
|
||||
|
||||
const MODEL_WITH_TOOL: &str = "test-gpt-5-codex";
|
||||
const MODEL_WITH_TOOL: &str = "test-gpt-5.1-codex";
|
||||
|
||||
fn ripgrep_available() -> bool {
|
||||
StdCommand::new("rg")
|
||||
|
||||
@@ -31,12 +31,12 @@ const SCHEMA: &str = r#"
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn codex_returns_json_result_for_gpt5() -> anyhow::Result<()> {
|
||||
codex_returns_json_result("gpt-5".to_string()).await
|
||||
codex_returns_json_result("gpt-5.1".to_string()).await
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn codex_returns_json_result_for_gpt5_codex() -> anyhow::Result<()> {
|
||||
codex_returns_json_result("gpt-5-codex".to_string()).await
|
||||
codex_returns_json_result("gpt-5.1-codex".to_string()).await
|
||||
}
|
||||
|
||||
async fn codex_returns_json_result(model: String) -> anyhow::Result<()> {
|
||||
|
||||
@@ -24,6 +24,7 @@ mod cli_stream;
|
||||
mod client;
|
||||
mod codex_delegate;
|
||||
mod compact;
|
||||
mod compact_remote;
|
||||
mod compact_resume_fork;
|
||||
mod deprecation_notice;
|
||||
mod exec;
|
||||
|
||||
@@ -119,7 +119,7 @@ async fn model_selects_expected_tools() {
|
||||
assert_eq!(
|
||||
gpt5_codex_tools,
|
||||
vec![
|
||||
"shell".to_string(),
|
||||
"shell_command".to_string(),
|
||||
"list_mcp_resources".to_string(),
|
||||
"list_mcp_resource_templates".to_string(),
|
||||
"read_mcp_resource".to_string(),
|
||||
@@ -133,7 +133,7 @@ async fn model_selects_expected_tools() {
|
||||
assert_eq!(
|
||||
gpt51_codex_tools,
|
||||
vec![
|
||||
"shell".to_string(),
|
||||
"shell_command".to_string(),
|
||||
"list_mcp_resources".to_string(),
|
||||
"list_mcp_resource_templates".to_string(),
|
||||
"read_mcp_resource".to_string(),
|
||||
@@ -147,7 +147,7 @@ async fn model_selects_expected_tools() {
|
||||
assert_eq!(
|
||||
gpt51_tools,
|
||||
vec![
|
||||
"shell".to_string(),
|
||||
"shell_command".to_string(),
|
||||
"list_mcp_resources".to_string(),
|
||||
"list_mcp_resource_templates".to_string(),
|
||||
"read_mcp_resource".to_string(),
|
||||
|
||||
@@ -160,9 +160,9 @@ async fn prompt_tools_are_consistent_across_requests() -> anyhow::Result<()> {
|
||||
// with the OpenAI schema, so we just verify the tool presence here
|
||||
let tools_by_model: HashMap<&'static str, Vec<&'static str>> = HashMap::from([
|
||||
(
|
||||
"gpt-5",
|
||||
"gpt-5.1",
|
||||
vec![
|
||||
"shell",
|
||||
"shell_command",
|
||||
"list_mcp_resources",
|
||||
"list_mcp_resource_templates",
|
||||
"read_mcp_resource",
|
||||
@@ -173,7 +173,7 @@ async fn prompt_tools_are_consistent_across_requests() -> anyhow::Result<()> {
|
||||
(
|
||||
"gpt-5.1",
|
||||
vec![
|
||||
"shell",
|
||||
"shell_command",
|
||||
"list_mcp_resources",
|
||||
"list_mcp_resource_templates",
|
||||
"read_mcp_resource",
|
||||
@@ -183,7 +183,19 @@ async fn prompt_tools_are_consistent_across_requests() -> anyhow::Result<()> {
|
||||
],
|
||||
),
|
||||
(
|
||||
"gpt-5-codex",
|
||||
"gpt-5.1-codex",
|
||||
vec![
|
||||
"shell_command",
|
||||
"list_mcp_resources",
|
||||
"list_mcp_resource_templates",
|
||||
"read_mcp_resource",
|
||||
"update_plan",
|
||||
"apply_patch",
|
||||
"view_image",
|
||||
],
|
||||
),
|
||||
(
|
||||
"codex-auto",
|
||||
vec![
|
||||
"shell",
|
||||
"list_mcp_resources",
|
||||
@@ -197,7 +209,7 @@ async fn prompt_tools_are_consistent_across_requests() -> anyhow::Result<()> {
|
||||
(
|
||||
"gpt-5.1-codex",
|
||||
vec![
|
||||
"shell",
|
||||
"shell_command",
|
||||
"list_mcp_resources",
|
||||
"list_mcp_resource_templates",
|
||||
"read_mcp_resource",
|
||||
|
||||
@@ -364,7 +364,7 @@ async fn review_uses_custom_review_model_from_config() {
|
||||
// Choose a review model different from the main model; ensure it is used.
|
||||
let codex = new_conversation_for_server(&server, &codex_home, |cfg| {
|
||||
cfg.model = "gpt-4.1".to_string();
|
||||
cfg.review_model = "gpt-5".to_string();
|
||||
cfg.review_model = "gpt-5.1".to_string();
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -394,7 +394,7 @@ async fn review_uses_custom_review_model_from_config() {
|
||||
// Assert the request body model equals the configured review model
|
||||
let request = &server.received_requests().await.unwrap()[0];
|
||||
let body = request.body_json::<serde_json::Value>().unwrap();
|
||||
assert_eq!(body["model"].as_str().unwrap(), "gpt-5");
|
||||
assert_eq!(body["model"].as_str().unwrap(), "gpt-5.1");
|
||||
|
||||
server.verify().await;
|
||||
}
|
||||
|
||||
@@ -1,14 +1,13 @@
|
||||
#![cfg(not(target_os = "windows"))]
|
||||
#![allow(clippy::expect_used)]
|
||||
|
||||
use anyhow::Result;
|
||||
use codex_core::features::Feature;
|
||||
use codex_core::model_family::find_family_for_model;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use core_test_support::assert_regex_match;
|
||||
use core_test_support::responses::ev_apply_patch_function_call;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_custom_tool_call;
|
||||
use core_test_support::responses::ev_function_call;
|
||||
use core_test_support::responses::ev_local_shell_call;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
@@ -16,12 +15,18 @@ use core_test_support::responses::mount_sse_sequence;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex::ApplyPatchModelOutput;
|
||||
use core_test_support::test_codex::ShellModelOutput;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use pretty_assertions::assert_eq;
|
||||
use regex_lite::Regex;
|
||||
use serde_json::Value;
|
||||
use serde_json::json;
|
||||
use std::fs;
|
||||
use test_case::test_case;
|
||||
|
||||
use crate::suite::apply_patch_cli::apply_patch_harness;
|
||||
use crate::suite::apply_patch_cli::mount_apply_patch;
|
||||
|
||||
const FIXTURE_JSON: &str = r#"{
|
||||
"description": "This is an example JSON file.",
|
||||
@@ -35,34 +40,88 @@ const FIXTURE_JSON: &str = r#"{
|
||||
}
|
||||
"#;
|
||||
|
||||
fn shell_responses(
|
||||
call_id: &str,
|
||||
command: Vec<&str>,
|
||||
output_type: ShellModelOutput,
|
||||
) -> Result<Vec<String>> {
|
||||
match output_type {
|
||||
ShellModelOutput::ShellCommand => {
|
||||
let command = shlex::try_join(command)?;
|
||||
let parameters = json!({
|
||||
"command": command,
|
||||
"timeout_ms": 2_000,
|
||||
});
|
||||
Ok(vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(
|
||||
call_id,
|
||||
"shell_command",
|
||||
&serde_json::to_string(¶meters)?,
|
||||
),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
])
|
||||
}
|
||||
ShellModelOutput::Shell => {
|
||||
let parameters = json!({
|
||||
"command": command,
|
||||
"timeout_ms": 2_000,
|
||||
});
|
||||
Ok(vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(¶meters)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
])
|
||||
}
|
||||
ShellModelOutput::LocalShell => Ok(vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_local_shell_call(call_id, "completed", command),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
]),
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_output_stays_json_without_freeform_apply_patch() -> Result<()> {
|
||||
#[test_case(ShellModelOutput::Shell)]
|
||||
#[test_case(ShellModelOutput::ShellCommand)]
|
||||
#[test_case(ShellModelOutput::LocalShell)]
|
||||
async fn shell_output_stays_json_without_freeform_apply_patch(
|
||||
output_type: ShellModelOutput,
|
||||
) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.features.disable(Feature::ApplyPatchFreeform);
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a model family");
|
||||
if matches!(output_type, ShellModelOutput::ShellCommand) {
|
||||
config.features.enable(Feature::ShellCommandTool);
|
||||
}
|
||||
let _ = output_type;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-json";
|
||||
let args = json!({
|
||||
"command": ["/bin/echo", "shell json"],
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let responses = shell_responses(call_id, vec!["/bin/echo", "shell json"], output_type)?;
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
@@ -80,7 +139,6 @@ async fn shell_output_stays_json_without_freeform_apply_patch() -> Result<()> {
|
||||
|
||||
let mut parsed: Value = serde_json::from_str(output)?;
|
||||
if let Some(metadata) = parsed.get_mut("metadata").and_then(Value::as_object_mut) {
|
||||
// duration_seconds is non-deterministic; remove it for deep equality
|
||||
let _ = metadata.remove("duration_seconds");
|
||||
}
|
||||
|
||||
@@ -102,31 +160,26 @@ async fn shell_output_stays_json_without_freeform_apply_patch() -> Result<()> {
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_output_is_structured_with_freeform_apply_patch() -> Result<()> {
|
||||
#[test_case(ShellModelOutput::Shell)]
|
||||
#[test_case(ShellModelOutput::ShellCommand)]
|
||||
#[test_case(ShellModelOutput::LocalShell)]
|
||||
async fn shell_output_is_structured_with_freeform_apply_patch(
|
||||
output_type: ShellModelOutput,
|
||||
) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.features.enable(Feature::ApplyPatchFreeform);
|
||||
if matches!(output_type, ShellModelOutput::ShellCommand) {
|
||||
config.features.enable(Feature::ShellCommandTool);
|
||||
}
|
||||
let _ = output_type;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-structured";
|
||||
let args = json!({
|
||||
"command": ["/bin/echo", "freeform shell"],
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let responses = shell_responses(call_id, vec!["/bin/echo", "freeform shell"], output_type)?;
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
@@ -159,14 +212,23 @@ freeform shell
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_output_preserves_fixture_json_without_serialization() -> Result<()> {
|
||||
#[test_case(ShellModelOutput::Shell)]
|
||||
#[test_case(ShellModelOutput::ShellCommand)]
|
||||
#[test_case(ShellModelOutput::LocalShell)]
|
||||
async fn shell_output_preserves_fixture_json_without_serialization(
|
||||
output_type: ShellModelOutput,
|
||||
) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.features.disable(Feature::ApplyPatchFreeform);
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a model family");
|
||||
if matches!(output_type, ShellModelOutput::ShellCommand) {
|
||||
config.features.enable(Feature::ShellCommandTool);
|
||||
}
|
||||
let _ = output_type;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
@@ -175,21 +237,11 @@ async fn shell_output_preserves_fixture_json_without_serialization() -> Result<(
|
||||
let fixture_path_str = fixture_path.to_string_lossy().to_string();
|
||||
|
||||
let call_id = "shell-json-fixture";
|
||||
let args = json!({
|
||||
"command": ["/usr/bin/sed", "-n", "p", fixture_path_str],
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let responses = shell_responses(
|
||||
call_id,
|
||||
vec!["/usr/bin/sed", "-n", "p", fixture_path_str.as_str()],
|
||||
output_type,
|
||||
)?;
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
@@ -232,12 +284,21 @@ async fn shell_output_preserves_fixture_json_without_serialization() -> Result<(
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_output_structures_fixture_with_serialization() -> Result<()> {
|
||||
#[test_case(ShellModelOutput::Shell)]
|
||||
#[test_case(ShellModelOutput::ShellCommand)]
|
||||
#[test_case(ShellModelOutput::LocalShell)]
|
||||
async fn shell_output_structures_fixture_with_serialization(
|
||||
output_type: ShellModelOutput,
|
||||
) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.features.enable(Feature::ApplyPatchFreeform);
|
||||
if matches!(output_type, ShellModelOutput::ShellCommand) {
|
||||
config.features.enable(Feature::ShellCommandTool);
|
||||
}
|
||||
let _ = output_type;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
@@ -246,21 +307,11 @@ async fn shell_output_structures_fixture_with_serialization() -> Result<()> {
|
||||
let fixture_path_str = fixture_path.to_string_lossy().to_string();
|
||||
|
||||
let call_id = "shell-structured-fixture";
|
||||
let args = json!({
|
||||
"command": ["/usr/bin/sed", "-n", "p", fixture_path_str],
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let responses = shell_responses(
|
||||
call_id,
|
||||
vec!["/usr/bin/sed", "-n", "p", fixture_path_str.as_str()],
|
||||
output_type,
|
||||
)?;
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
@@ -298,40 +349,26 @@ async fn shell_output_structures_fixture_with_serialization() -> Result<()> {
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_output_for_freeform_tool_records_duration() -> Result<()> {
|
||||
#[test_case(ShellModelOutput::Shell)]
|
||||
#[test_case(ShellModelOutput::ShellCommand)]
|
||||
#[test_case(ShellModelOutput::LocalShell)]
|
||||
async fn shell_output_for_freeform_tool_records_duration(
|
||||
output_type: ShellModelOutput,
|
||||
) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.include_apply_patch_tool = true;
|
||||
if matches!(output_type, ShellModelOutput::ShellCommand) {
|
||||
config.features.enable(Feature::ShellCommandTool);
|
||||
}
|
||||
let _ = output_type;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
let sleep_cmd = vec!["/bin/bash", "-c", "sleep 1"];
|
||||
|
||||
#[cfg(target_os = "macos")]
|
||||
let sleep_cmd = vec!["/bin/bash", "-c", "sleep 1"];
|
||||
|
||||
#[cfg(windows)]
|
||||
let sleep_cmd = "timeout 1";
|
||||
|
||||
let call_id = "shell-structured";
|
||||
let args = json!({
|
||||
"command": sleep_cmd,
|
||||
"timeout_ms": 2_000,
|
||||
});
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let responses = shell_responses(call_id, vec!["/bin/bash", "-c", "sleep 1"], output_type)?;
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
@@ -371,33 +408,26 @@ $"#;
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_output_reserializes_truncated_content() -> Result<()> {
|
||||
#[test_case(ShellModelOutput::Shell)]
|
||||
#[test_case(ShellModelOutput::ShellCommand)]
|
||||
#[test_case(ShellModelOutput::LocalShell)]
|
||||
async fn shell_output_reserializes_truncated_content(output_type: ShellModelOutput) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5-codex").expect("gpt-5 is a model family");
|
||||
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
|
||||
if matches!(output_type, ShellModelOutput::ShellCommand) {
|
||||
config.features.enable(Feature::ShellCommandTool);
|
||||
}
|
||||
let _ = output_type;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-truncated";
|
||||
let args = json!({
|
||||
"command": ["/bin/sh", "-c", "seq 1 400"],
|
||||
"timeout_ms": 5_000,
|
||||
});
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let responses = shell_responses(call_id, vec!["/bin/sh", "-c", "seq 1 400"], output_type)?;
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
@@ -445,14 +475,16 @@ $"#;
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn apply_patch_custom_tool_output_is_structured() -> Result<()> {
|
||||
#[test_case(ApplyPatchModelOutput::Freeform)]
|
||||
#[test_case(ApplyPatchModelOutput::Function)]
|
||||
#[test_case(ApplyPatchModelOutput::Shell)]
|
||||
#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
|
||||
async fn apply_patch_custom_tool_output_is_structured(
|
||||
output_type: ApplyPatchModelOutput,
|
||||
) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.include_apply_patch_tool = true;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
let harness = apply_patch_harness().await?;
|
||||
|
||||
let call_id = "apply-patch-structured";
|
||||
let file_name = "structured.txt";
|
||||
@@ -463,33 +495,17 @@ async fn apply_patch_custom_tool_output_is_structured() -> Result<()> {
|
||||
*** End Patch
|
||||
"#
|
||||
);
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_custom_tool_call(call_id, "apply_patch", &patch),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
mount_apply_patch(&harness, call_id, &patch, "done", output_type).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
"apply the patch via custom tool",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
harness
|
||||
.test()
|
||||
.submit_turn_with_policy(
|
||||
"apply the patch via custom tool",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let req = mock
|
||||
.last_request()
|
||||
.expect("apply_patch output request recorded");
|
||||
let output_item = req.custom_tool_call_output(call_id);
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("apply_patch output string");
|
||||
let output = harness.apply_patch_output(call_id, output_type).await;
|
||||
|
||||
let expected_pattern = format!(
|
||||
r"(?s)^Exit code: 0
|
||||
@@ -499,53 +515,39 @@ Success. Updated the following files:
|
||||
A {file_name}
|
||||
?$"
|
||||
);
|
||||
assert_regex_match(&expected_pattern, output);
|
||||
assert_regex_match(&expected_pattern, output.as_str());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn apply_patch_custom_tool_call_creates_file() -> Result<()> {
|
||||
#[test_case(ApplyPatchModelOutput::Freeform)]
|
||||
#[test_case(ApplyPatchModelOutput::Function)]
|
||||
#[test_case(ApplyPatchModelOutput::Shell)]
|
||||
#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
|
||||
async fn apply_patch_custom_tool_call_creates_file(
|
||||
output_type: ApplyPatchModelOutput,
|
||||
) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.include_apply_patch_tool = true;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
let harness = apply_patch_harness().await?;
|
||||
|
||||
let call_id = "apply-patch-add-file";
|
||||
let file_name = "custom_tool_apply_patch.txt";
|
||||
let patch = format!(
|
||||
"*** Begin Patch\n*** Add File: {file_name}\n+custom tool content\n*** End Patch\n"
|
||||
);
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_custom_tool_call(call_id, "apply_patch", &patch),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "apply_patch done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
mount_apply_patch(&harness, call_id, &patch, "apply_patch done", output_type).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
"apply the patch via custom tool to create a file",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
harness
|
||||
.test()
|
||||
.submit_turn_with_policy(
|
||||
"apply the patch via custom tool to create a file",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let req = mock
|
||||
.last_request()
|
||||
.expect("apply_patch output request recorded");
|
||||
let output_item = req.custom_tool_call_output(call_id);
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("apply_patch output string");
|
||||
let output = harness.apply_patch_output(call_id, output_type).await;
|
||||
|
||||
let expected_pattern = format!(
|
||||
r"(?s)^Exit code: 0
|
||||
@@ -555,9 +557,9 @@ Success. Updated the following files:
|
||||
A {file_name}
|
||||
?$"
|
||||
);
|
||||
assert_regex_match(&expected_pattern, output);
|
||||
assert_regex_match(&expected_pattern, output.as_str());
|
||||
|
||||
let new_file_path = test.cwd.path().join(file_name);
|
||||
let new_file_path = harness.path(file_name);
|
||||
let created_contents = fs::read_to_string(&new_file_path)?;
|
||||
assert_eq!(
|
||||
created_contents, "custom tool content\n",
|
||||
@@ -568,49 +570,42 @@ A {file_name}
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn apply_patch_custom_tool_call_updates_existing_file() -> Result<()> {
|
||||
#[test_case(ApplyPatchModelOutput::Freeform)]
|
||||
#[test_case(ApplyPatchModelOutput::Function)]
|
||||
#[test_case(ApplyPatchModelOutput::Shell)]
|
||||
#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
|
||||
async fn apply_patch_custom_tool_call_updates_existing_file(
|
||||
output_type: ApplyPatchModelOutput,
|
||||
) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.include_apply_patch_tool = true;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
let harness = apply_patch_harness().await?;
|
||||
|
||||
let call_id = "apply-patch-update-file";
|
||||
let file_name = "custom_tool_apply_patch_existing.txt";
|
||||
let file_path = test.cwd.path().join(file_name);
|
||||
let file_path = harness.path(file_name);
|
||||
fs::write(&file_path, "before\n")?;
|
||||
let patch = format!(
|
||||
"*** Begin Patch\n*** Update File: {file_name}\n@@\n-before\n+after\n*** End Patch\n"
|
||||
);
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_custom_tool_call(call_id, "apply_patch", &patch),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "apply_patch update done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
"apply the patch via custom tool to update a file",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
mount_apply_patch(
|
||||
&harness,
|
||||
call_id,
|
||||
&patch,
|
||||
"apply_patch update done",
|
||||
output_type,
|
||||
)
|
||||
.await?;
|
||||
.await;
|
||||
|
||||
let req = mock
|
||||
.last_request()
|
||||
.expect("apply_patch output request recorded");
|
||||
let output_item = req.custom_tool_call_output(call_id);
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("apply_patch output string");
|
||||
harness
|
||||
.test()
|
||||
.submit_turn_with_policy(
|
||||
"apply the patch via custom tool to update a file",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let output = harness.apply_patch_output(call_id, output_type).await;
|
||||
|
||||
let expected_pattern = format!(
|
||||
r"(?s)^Exit code: 0
|
||||
@@ -620,7 +615,7 @@ Success. Updated the following files:
|
||||
M {file_name}
|
||||
?$"
|
||||
);
|
||||
assert_regex_match(&expected_pattern, output);
|
||||
assert_regex_match(&expected_pattern, output.as_str());
|
||||
|
||||
let updated_contents = fs::read_to_string(file_path)?;
|
||||
assert_eq!(updated_contents, "after\n", "expected updated file content");
|
||||
@@ -629,99 +624,83 @@ M {file_name}
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn apply_patch_custom_tool_call_reports_failure_output() -> Result<()> {
|
||||
#[test_case(ApplyPatchModelOutput::Freeform)]
|
||||
#[test_case(ApplyPatchModelOutput::Function)]
|
||||
#[test_case(ApplyPatchModelOutput::Shell)]
|
||||
#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
|
||||
async fn apply_patch_custom_tool_call_reports_failure_output(
|
||||
output_type: ApplyPatchModelOutput,
|
||||
) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.include_apply_patch_tool = true;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
let harness = apply_patch_harness().await?;
|
||||
|
||||
let call_id = "apply-patch-failure";
|
||||
let missing_file = "missing_custom_tool_apply_patch.txt";
|
||||
let patch = format!(
|
||||
"*** Begin Patch\n*** Update File: {missing_file}\n@@\n-before\n+after\n*** End Patch\n"
|
||||
);
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_custom_tool_call(call_id, "apply_patch", &patch),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "apply_patch failure done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
"attempt a failing apply_patch via custom tool",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
mount_apply_patch(
|
||||
&harness,
|
||||
call_id,
|
||||
&patch,
|
||||
"apply_patch failure done",
|
||||
output_type,
|
||||
)
|
||||
.await?;
|
||||
.await;
|
||||
|
||||
let req = mock
|
||||
.last_request()
|
||||
.expect("apply_patch output request recorded");
|
||||
let output_item = req.custom_tool_call_output(call_id);
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("apply_patch output string");
|
||||
harness
|
||||
.test()
|
||||
.submit_turn_with_policy(
|
||||
"attempt a failing apply_patch via custom tool",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let output = harness.apply_patch_output(call_id, output_type).await;
|
||||
|
||||
let expected_output = format!(
|
||||
"apply_patch verification failed: Failed to read file to update {}/{missing_file}: No such file or directory (os error 2)",
|
||||
test.cwd.path().to_string_lossy()
|
||||
harness.cwd().to_string_lossy()
|
||||
);
|
||||
assert_eq!(output, expected_output);
|
||||
assert_eq!(output, expected_output.as_str());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn apply_patch_function_call_output_is_structured() -> Result<()> {
|
||||
#[test_case(ApplyPatchModelOutput::Freeform)]
|
||||
#[test_case(ApplyPatchModelOutput::Function)]
|
||||
#[test_case(ApplyPatchModelOutput::Shell)]
|
||||
#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
|
||||
async fn apply_patch_function_call_output_is_structured(
|
||||
output_type: ApplyPatchModelOutput,
|
||||
) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.include_apply_patch_tool = true;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
let harness = apply_patch_harness().await?;
|
||||
|
||||
let call_id = "apply-patch-function";
|
||||
let file_name = "function_apply_patch.txt";
|
||||
let patch =
|
||||
format!("*** Begin Patch\n*** Add File: {file_name}\n+via function call\n*** End Patch\n");
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_apply_patch_function_call(call_id, &patch),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "apply_patch function done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
"apply the patch via function-call apply_patch",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
mount_apply_patch(
|
||||
&harness,
|
||||
call_id,
|
||||
&patch,
|
||||
"apply_patch function done",
|
||||
output_type,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let req = mock
|
||||
.last_request()
|
||||
.expect("apply_patch function output request recorded");
|
||||
let output_item = req.function_call_output(call_id);
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("apply_patch output string");
|
||||
.await;
|
||||
harness
|
||||
.test()
|
||||
.submit_turn_with_policy(
|
||||
"apply the patch via function-call apply_patch",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let output = harness.apply_patch_output(call_id, output_type).await;
|
||||
let expected_pattern = format!(
|
||||
r"(?s)^Exit code: 0
|
||||
Wall time: [0-9]+(?:\.[0-9]+)? seconds
|
||||
@@ -730,40 +709,32 @@ Success. Updated the following files:
|
||||
A {file_name}
|
||||
?$"
|
||||
);
|
||||
assert_regex_match(&expected_pattern, output);
|
||||
assert_regex_match(&expected_pattern, output.as_str());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_output_is_structured_for_nonzero_exit() -> Result<()> {
|
||||
#[test_case(ShellModelOutput::Shell)]
|
||||
#[test_case(ShellModelOutput::ShellCommand)]
|
||||
#[test_case(ShellModelOutput::LocalShell)]
|
||||
async fn shell_output_is_structured_for_nonzero_exit(output_type: ShellModelOutput) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
|
||||
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
|
||||
config.include_apply_patch_tool = true;
|
||||
if matches!(output_type, ShellModelOutput::ShellCommand) {
|
||||
config.features.enable(Feature::ShellCommandTool);
|
||||
}
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-nonzero-exit";
|
||||
let args = json!({
|
||||
"command": ["/bin/sh", "-c", "exit 42"],
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "shell failure handled"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let responses = shell_responses(call_id, vec!["/bin/sh", "-c", "exit 42"], output_type)?;
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
@@ -788,15 +759,68 @@ Output:
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_command_output_is_structured() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.features.enable(Feature::ShellCommandTool);
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-command";
|
||||
let args = json!({
|
||||
"command": "echo shell command",
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "shell_command done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
"run the shell_command script in the user's shell",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let req = mock
|
||||
.last_request()
|
||||
.expect("shell_command output request recorded");
|
||||
let output_item = req.function_call_output(call_id);
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("shell_command output string");
|
||||
|
||||
let expected_pattern = r"(?s)^Exit code: 0
|
||||
Wall time: [0-9]+(?:\.[0-9]+)? seconds
|
||||
Output:
|
||||
shell command
|
||||
?$";
|
||||
assert_regex_match(expected_pattern, output);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn local_shell_call_output_is_structured() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
|
||||
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
|
||||
config.include_apply_patch_tool = true;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
@@ -57,9 +57,9 @@ async fn run_turn_and_measure(test: &TestCodex, prompt: &str) -> anyhow::Result<
|
||||
#[allow(clippy::expect_used)]
|
||||
async fn build_codex_with_test_tool(server: &wiremock::MockServer) -> anyhow::Result<TestCodex> {
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "test-gpt-5-codex".to_string();
|
||||
config.model = "test-gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("test-gpt-5-codex").expect("test-gpt-5-codex model family");
|
||||
find_family_for_model("test-gpt-5.1-codex").expect("test-gpt-5.1-codex model family");
|
||||
});
|
||||
builder.build(server).await
|
||||
}
|
||||
|
||||
@@ -197,9 +197,9 @@ async fn sandbox_denied_shell_returns_original_output() -> Result<()> {
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5-codex").expect("gpt-5-codex model family");
|
||||
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex model family");
|
||||
});
|
||||
let fixture = builder.build(&server).await?;
|
||||
|
||||
@@ -425,8 +425,8 @@ async fn shell_timeout_handles_background_grandchild_stdout() -> Result<()> {
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a valid model");
|
||||
config.model = "gpt-5.1".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is a valid model");
|
||||
config.sandbox_policy = SandboxPolicy::DangerFullAccess;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
@@ -27,7 +27,6 @@ use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use escargot::CargoBuild;
|
||||
use regex_lite::Regex;
|
||||
use serde_json::Value;
|
||||
use serde_json::json;
|
||||
use std::collections::HashMap;
|
||||
@@ -41,14 +40,14 @@ async fn truncate_function_error_trims_respond_to_model() -> Result<()> {
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
// Use the test model that wires function tools like grep_files
|
||||
config.model = "test-gpt-5-codex".to_string();
|
||||
config.model = "test-gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("test-gpt-5-codex").expect("model family for test model");
|
||||
find_family_for_model("test-gpt-5.1-codex").expect("model family for test model");
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
// Construct a very long, non-existent path to force a RespondToModel error with a large message
|
||||
let long_path = "a".repeat(20_000);
|
||||
let long_path = "long path text should trigger truncation".repeat(8_000);
|
||||
let call_id = "grep-huge-error";
|
||||
let args = json!({
|
||||
"pattern": "alpha",
|
||||
@@ -80,12 +79,16 @@ async fn truncate_function_error_trims_respond_to_model() -> Result<()> {
|
||||
|
||||
tracing::debug!(output = %output, "truncated function error output");
|
||||
|
||||
// Expect plaintext with byte-truncation marker and no omitted-lines marker
|
||||
// Expect plaintext with token-based truncation marker and no omitted-lines marker
|
||||
assert!(
|
||||
serde_json::from_str::<serde_json::Value>(&output).is_err(),
|
||||
"expected error output to be plain text",
|
||||
);
|
||||
let truncated_pattern = r#"(?s)^Total output lines: 1\s+.*\[\.\.\. output truncated to fit 11264 bytes \.\.\.\]\s*$"#;
|
||||
assert!(
|
||||
!output.contains("Total output lines:"),
|
||||
"error output should not include line-based truncation header: {output}",
|
||||
);
|
||||
let truncated_pattern = r"(?s)^unable to access `.*tokens truncated.*$";
|
||||
assert_regex_match(truncated_pattern, &output);
|
||||
assert!(
|
||||
!output.contains("omitted"),
|
||||
@@ -105,9 +108,9 @@ async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
|
||||
|
||||
// Use a model that exposes the generic shell tool.
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
|
||||
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
|
||||
});
|
||||
let fixture = builder.build(&server).await?;
|
||||
|
||||
@@ -197,9 +200,9 @@ async fn tool_call_output_truncated_only_once() -> Result<()> {
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
|
||||
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
|
||||
});
|
||||
let fixture = builder.build(&server).await?;
|
||||
let call_id = "shell-single-truncation";
|
||||
@@ -269,7 +272,7 @@ async fn mcp_tool_call_output_exceeds_limit_truncated_for_model() -> Result<()>
|
||||
let tool_name = format!("mcp__{server_name}__echo");
|
||||
|
||||
// Build a very large message to exceed 10KiB once serialized.
|
||||
let large_msg = "long-message-with-newlines-".repeat(600);
|
||||
let large_msg = "long-message-with-newlines-".repeat(6000);
|
||||
let args_json = serde_json::json!({ "message": large_msg });
|
||||
|
||||
mount_sse_once(
|
||||
@@ -334,22 +337,19 @@ async fn mcp_tool_call_output_exceeds_limit_truncated_for_model() -> Result<()>
|
||||
.function_call_output_text(call_id)
|
||||
.context("function_call_output present for rmcp call")?;
|
||||
|
||||
// Expect plain text with byte-based truncation marker.
|
||||
// Expect plain text with token-based truncation marker; the original JSON body
|
||||
// is truncated in the middle of the echo string.
|
||||
assert!(
|
||||
serde_json::from_str::<Value>(&output).is_err(),
|
||||
"expected truncated MCP output to be plain text"
|
||||
);
|
||||
assert!(
|
||||
output.starts_with("Total output lines: 1\n\n{"),
|
||||
"expected total line header and JSON head, got: {output}"
|
||||
!output.contains("Total output lines:"),
|
||||
"MCP output should not include line-based truncation header: {output}"
|
||||
);
|
||||
|
||||
let byte_marker = Regex::new(r"\[\.\.\. output truncated to fit 11264 bytes \.\.\.\]")
|
||||
.expect("compile regex");
|
||||
assert!(
|
||||
byte_marker.is_match(&output),
|
||||
"expected byte truncation marker, got: {output}"
|
||||
);
|
||||
let truncated_pattern = r#"(?s)^\{"echo":\s*"ECHOING: long-message-with-newlines-.*tokens truncated.*long-message-with-newlines-.*$"#;
|
||||
assert_regex_match(truncated_pattern, &output);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -453,3 +453,164 @@ async fn mcp_image_output_preserves_image_and_no_text_summary() -> Result<()> {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Token-based policy should report token counts even when truncation is byte-estimated.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn token_policy_marker_reports_tokens() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5.1-codex".to_string(); // token policy
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5.1-codex").expect("model family for gpt-5.1-codex");
|
||||
config.tool_output_token_limit = Some(50); // small budget to force truncation
|
||||
});
|
||||
let fixture = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-token-marker";
|
||||
let args = json!({
|
||||
"command": ["/bin/sh", "-c", "seq 1 150"],
|
||||
"timeout_ms": 5_000,
|
||||
});
|
||||
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let done_mock = mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
fixture
|
||||
.submit_turn_with_policy("run the shell tool", SandboxPolicy::DangerFullAccess)
|
||||
.await?;
|
||||
|
||||
let output = done_mock
|
||||
.single_request()
|
||||
.function_call_output_text(call_id)
|
||||
.context("shell output present")?;
|
||||
|
||||
assert_regex_match(r"\[\u{2026}127 tokens truncated\u{2026}]", &output);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Byte-based policy should report bytes removed.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn byte_policy_marker_reports_bytes() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5.1".to_string(); // byte policy
|
||||
config.model_family = find_family_for_model("gpt-5.1").expect("model family for gpt-5.1");
|
||||
config.tool_output_token_limit = Some(50); // ~200 byte cap
|
||||
});
|
||||
let fixture = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-byte-marker";
|
||||
let args = json!({
|
||||
"command": ["/bin/sh", "-c", "seq 1 150"],
|
||||
"timeout_ms": 5_000,
|
||||
});
|
||||
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let done_mock = mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
fixture
|
||||
.submit_turn_with_policy("run the shell tool", SandboxPolicy::DangerFullAccess)
|
||||
.await?;
|
||||
|
||||
let output = done_mock
|
||||
.single_request()
|
||||
.function_call_output_text(call_id)
|
||||
.context("shell output present")?;
|
||||
|
||||
assert_regex_match(r"\[\u{2026}505 bytes truncated\u{2026}]", &output);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Overriding config with a large token budget should avoid truncation.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn large_budget_avoids_truncation() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5.1-codex").expect("model family for gpt-5.1-codex");
|
||||
config.tool_output_token_limit = Some(50_000); // ample budget
|
||||
});
|
||||
let fixture = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-no-trunc";
|
||||
let args = json!({
|
||||
"command": ["/bin/sh", "-c", "seq 1 1000"],
|
||||
"timeout_ms": 5_000,
|
||||
});
|
||||
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let done_mock = mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
fixture
|
||||
.submit_turn_with_policy(
|
||||
"run big output without truncation",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let output = done_mock
|
||||
.single_request()
|
||||
.function_call_output_text(call_id)
|
||||
.context("shell output present")?;
|
||||
|
||||
assert!(
|
||||
!output.contains("truncated"),
|
||||
"output should remain untruncated with ample budget"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -30,8 +30,8 @@ use pretty_assertions::assert_eq;
|
||||
async fn undo_harness() -> Result<TestCodexHarness> {
|
||||
TestCodexHarness::with_config(|config: &mut Config| {
|
||||
config.include_apply_patch_tool = true;
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is valid");
|
||||
config.model = "gpt-5.1".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is valid");
|
||||
config.features.enable(Feature::GhostCommit);
|
||||
})
|
||||
.await
|
||||
|
||||
@@ -26,9 +26,11 @@ use core_test_support::test_codex::TestCodex;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use core_test_support::wait_for_event_match;
|
||||
use core_test_support::wait_for_event_with_timeout;
|
||||
use regex_lite::Regex;
|
||||
use serde_json::Value;
|
||||
use serde_json::json;
|
||||
use tokio::time::Duration;
|
||||
|
||||
fn extract_output_text(item: &Value) -> Option<&str> {
|
||||
item.get("output").and_then(|value| match value {
|
||||
@@ -159,7 +161,7 @@ async fn unified_exec_emits_exec_command_begin_event() -> Result<()> {
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
let mut builder = test_codex().with_model("gpt-5").with_config(|config| {
|
||||
config.use_experimental_unified_exec_tool = true;
|
||||
config.features.enable(Feature::UnifiedExec);
|
||||
});
|
||||
@@ -236,7 +238,7 @@ async fn unified_exec_respects_workdir_override() -> Result<()> {
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
let mut builder = test_codex().with_model("gpt-5").with_config(|config| {
|
||||
config.use_experimental_unified_exec_tool = true;
|
||||
config.features.enable(Feature::UnifiedExec);
|
||||
});
|
||||
@@ -288,28 +290,22 @@ async fn unified_exec_respects_workdir_override() -> Result<()> {
|
||||
})
|
||||
.await?;
|
||||
|
||||
let begin_event = wait_for_event_match(&codex, |msg| match msg {
|
||||
EventMsg::ExecCommandBegin(event) if event.call_id == call_id => Some(event.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(
|
||||
begin_event.cwd, workdir,
|
||||
"exec_command cwd should reflect the requested workdir override"
|
||||
);
|
||||
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let requests = server.received_requests().await.expect("recorded requests");
|
||||
assert!(!requests.is_empty(), "expected at least one POST request");
|
||||
|
||||
let bodies = requests
|
||||
.iter()
|
||||
.map(|req| req.body_json::<Value>().expect("request json"))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let outputs = collect_tool_outputs(&bodies)?;
|
||||
let output = outputs
|
||||
.get(call_id)
|
||||
.expect("missing exec_command workdir output");
|
||||
let output_text = output.output.trim();
|
||||
let output_canonical = std::fs::canonicalize(output_text)?;
|
||||
let expected_canonical = std::fs::canonicalize(&workdir)?;
|
||||
assert_eq!(
|
||||
output_canonical, expected_canonical,
|
||||
"pwd should reflect the requested workdir override"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -820,7 +816,7 @@ async fn exec_command_reports_chunk_and_exit_metadata() -> Result<()> {
|
||||
|
||||
let call_id = "uexec-metadata";
|
||||
let args = serde_json::json!({
|
||||
"cmd": "printf 'abcdefghijklmnopqrstuvwxyz'",
|
||||
"cmd": "printf 'token one token two token three token four token five token six token seven'",
|
||||
"yield_time_ms": 500,
|
||||
"max_output_tokens": 6,
|
||||
});
|
||||
@@ -1301,7 +1297,7 @@ async fn unified_exec_streams_after_lagged_output() -> Result<()> {
|
||||
import sys
|
||||
import time
|
||||
|
||||
chunk = b'x' * (1 << 20)
|
||||
chunk = b'long content here to trigger truncation' * (1 << 10)
|
||||
for _ in range(4):
|
||||
sys.stdout.buffer.write(chunk)
|
||||
sys.stdout.flush()
|
||||
@@ -1371,8 +1367,13 @@ PY
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
|
||||
// This is a worst case scenario for the truncate logic.
|
||||
wait_for_event_with_timeout(
|
||||
&codex,
|
||||
|event| matches!(event, EventMsg::TaskComplete(_)),
|
||||
Duration::from_secs(10),
|
||||
)
|
||||
.await;
|
||||
|
||||
let requests = server.received_requests().await.expect("recorded requests");
|
||||
assert!(!requests.is_empty(), "expected at least one POST request");
|
||||
@@ -1529,14 +1530,15 @@ async fn unified_exec_formats_large_output_summary() -> Result<()> {
|
||||
} = builder.build(&server).await?;
|
||||
|
||||
let script = r#"python3 - <<'PY'
|
||||
for i in range(300):
|
||||
print(f"line-{i}")
|
||||
for i in range(10000):
|
||||
print("token token ")
|
||||
PY
|
||||
"#;
|
||||
|
||||
let call_id = "uexec-large-output";
|
||||
let args = serde_json::json!({
|
||||
"cmd": script,
|
||||
"max_output_tokens": 100,
|
||||
"yield_time_ms": 500,
|
||||
});
|
||||
|
||||
@@ -1583,15 +1585,14 @@ PY
|
||||
let outputs = collect_tool_outputs(&bodies)?;
|
||||
let large_output = outputs.get(call_id).expect("missing large output summary");
|
||||
|
||||
assert_regex_match(
|
||||
concat!(
|
||||
r"(?s)",
|
||||
r"line-0.*?",
|
||||
r"\[\.{3} omitted \d+ of \d+ lines \.{3}\].*?",
|
||||
r"line-299",
|
||||
),
|
||||
&large_output.output,
|
||||
);
|
||||
let output_text = large_output.output.replace("\r\n", "\n");
|
||||
let truncated_pattern = r#"(?s)^(token token \n){5,}.*\[\u{2026}\d+ tokens truncated\u{2026}]\n(token token \n){5,}$"#;
|
||||
assert_regex_match(truncated_pattern, &output_text);
|
||||
|
||||
let original_tokens = large_output
|
||||
.original_token_count
|
||||
.expect("missing original_token_count for large output summary");
|
||||
assert!(original_tokens > 0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -270,7 +270,7 @@ async fn user_shell_command_is_truncated_only_once() -> anyhow::Result<()> {
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
|
||||
});
|
||||
|
||||
@@ -55,7 +55,7 @@ Start a new session with optional overrides:
|
||||
|
||||
Request `newConversation` params (subset):
|
||||
|
||||
- `model`: string model id (e.g. "o3", "gpt-5", "gpt-5-codex")
|
||||
- `model`: string model id (e.g. "o3", "gpt-5.1", "gpt-5.1-codex")
|
||||
- `profile`: optional named profile
|
||||
- `cwd`: optional working directory
|
||||
- `approvalPolicy`: `untrusted` | `on-request` | `on-failure` | `never`
|
||||
@@ -119,13 +119,13 @@ For the complete request/response shapes and flow examples, see the [“Auth end
|
||||
## Example: start and send a message
|
||||
|
||||
```json
|
||||
{ "jsonrpc": "2.0", "id": 1, "method": "newConversation", "params": { "model": "gpt-5", "approvalPolicy": "on-request" } }
|
||||
{ "jsonrpc": "2.0", "id": 1, "method": "newConversation", "params": { "model": "gpt-5.1", "approvalPolicy": "on-request" } }
|
||||
```
|
||||
|
||||
Server responds:
|
||||
|
||||
```json
|
||||
{ "jsonrpc": "2.0", "id": 1, "result": { "conversationId": "c7b0…", "model": "gpt-5", "rolloutPath": "/path/to/rollout.jsonl" } }
|
||||
{ "jsonrpc": "2.0", "id": 1, "result": { "conversationId": "c7b0…", "model": "gpt-5.1", "rolloutPath": "/path/to/rollout.jsonl" } }
|
||||
```
|
||||
|
||||
Then send input:
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user