mirror of
https://github.com/openai/codex.git
synced 2026-02-04 07:53:43 +00:00
Compare commits
33 Commits
jif/fix-ui
...
centralize
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
377b251d59 | ||
|
|
acd62cc610 | ||
|
|
0e30347746 | ||
|
|
a4be4d78b9 | ||
|
|
00c1de0c56 | ||
|
|
190e7eb104 | ||
|
|
061862a0e2 | ||
|
|
c72b2ad766 | ||
|
|
80783a7bb9 | ||
|
|
ed77d2d977 | ||
|
|
abccd3e367 | ||
|
|
0f4fd33ddd | ||
|
|
e258f0f044 | ||
|
|
a6b9471548 | ||
|
|
3059373e06 | ||
|
|
0b4527146e | ||
|
|
6745b12427 | ||
|
|
f59978ed3d | ||
|
|
3ab6028e80 | ||
|
|
892eaff46d | ||
|
|
8e291a1706 | ||
|
|
aee321f62b | ||
|
|
ed32da04d7 | ||
|
|
8ae3949072 | ||
|
|
273819aaae | ||
|
|
4cd6b01494 | ||
|
|
dd59b16a17 | ||
|
|
bac7acaa7c | ||
|
|
3c90728a29 | ||
|
|
34c5a9eaa9 | ||
|
|
f522aafb7f | ||
|
|
fd0673e457 | ||
|
|
00b1e130b3 |
@@ -33,6 +33,8 @@ Then simply run `codex` to get started:
|
||||
codex
|
||||
```
|
||||
|
||||
If you're running into upgrade issues with Homebrew, see the [FAQ entry on brew upgrade codex](./docs/faq.md#brew-update-codex-isnt-upgrading-me).
|
||||
|
||||
<details>
|
||||
<summary>You can also go to the <a href="https://github.com/openai/codex/releases/latest">latest GitHub Release</a> and download the appropriate binary for your platform.</summary>
|
||||
|
||||
|
||||
71
codex-rs/Cargo.lock
generated
71
codex-rs/Cargo.lock
generated
@@ -1061,12 +1061,12 @@ dependencies = [
|
||||
"codex-apply-patch",
|
||||
"codex-async-utils",
|
||||
"codex-file-search",
|
||||
"codex-mcp-client",
|
||||
"codex-otel",
|
||||
"codex-protocol",
|
||||
"codex-rmcp-client",
|
||||
"codex-utils-pty",
|
||||
"codex-utils-string",
|
||||
"codex-utils-tokenizer",
|
||||
"core-foundation 0.9.4",
|
||||
"core_test_support",
|
||||
"dirs",
|
||||
@@ -1075,6 +1075,7 @@ dependencies = [
|
||||
"escargot",
|
||||
"eventsource-stream",
|
||||
"futures",
|
||||
"http",
|
||||
"indexmap 2.10.0",
|
||||
"landlock",
|
||||
"libc",
|
||||
@@ -1249,19 +1250,6 @@ dependencies = [
|
||||
"wiremock",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codex-mcp-client"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"mcp-types",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tokio",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codex-mcp-server"
|
||||
version = "0.0.0"
|
||||
@@ -1517,6 +1505,16 @@ dependencies = [
|
||||
name = "codex-utils-string"
|
||||
version = "0.0.0"
|
||||
|
||||
[[package]]
|
||||
name = "codex-utils-tokenizer"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"pretty_assertions",
|
||||
"thiserror 2.0.16",
|
||||
"tiktoken-rs",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "color-eyre"
|
||||
version = "0.6.5"
|
||||
@@ -1637,6 +1635,7 @@ dependencies = [
|
||||
"anyhow",
|
||||
"assert_cmd",
|
||||
"codex-core",
|
||||
"codex-protocol",
|
||||
"notify",
|
||||
"regex-lite",
|
||||
"serde_json",
|
||||
@@ -2314,6 +2313,17 @@ dependencies = [
|
||||
"once_cell",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fancy-regex"
|
||||
version = "0.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2"
|
||||
dependencies = [
|
||||
"bit-set",
|
||||
"regex-automata",
|
||||
"regex-syntax 0.8.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fastrand"
|
||||
version = "2.3.0"
|
||||
@@ -4631,7 +4641,7 @@ dependencies = [
|
||||
"pin-project-lite",
|
||||
"quinn-proto",
|
||||
"quinn-udp",
|
||||
"rustc-hash",
|
||||
"rustc-hash 2.1.1",
|
||||
"rustls",
|
||||
"socket2 0.6.0",
|
||||
"thiserror 2.0.16",
|
||||
@@ -4651,7 +4661,7 @@ dependencies = [
|
||||
"lru-slab",
|
||||
"rand 0.9.2",
|
||||
"ring",
|
||||
"rustc-hash",
|
||||
"rustc-hash 2.1.1",
|
||||
"rustls",
|
||||
"rustls-pki-types",
|
||||
"slab",
|
||||
@@ -4945,9 +4955,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "rmcp"
|
||||
version = "0.8.2"
|
||||
version = "0.8.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4e35d31f89beb59c83bc31363426da25b323ce0c2e5b53c7bf29867d16ee7898"
|
||||
checksum = "1fdad1258f7259fdc0f2dfc266939c82c3b5d1fd72bcde274d600cdc27e60243"
|
||||
dependencies = [
|
||||
"base64",
|
||||
"bytes",
|
||||
@@ -4979,9 +4989,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "rmcp-macros"
|
||||
version = "0.8.2"
|
||||
version = "0.8.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d88518b38110c439a03f0f4eee40e5105d648a530711cb87f98991e3f324a664"
|
||||
checksum = "ede0589a208cc7ce81d1be68aa7e74b917fcd03c81528408bab0457e187dcd9b"
|
||||
dependencies = [
|
||||
"darling 0.21.3",
|
||||
"proc-macro2",
|
||||
@@ -4996,6 +5006,12 @@ version = "0.1.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "989e6739f80c4ad5b13e0fd7fe89531180375b18520cc8c82080e4dc4035b84f"
|
||||
|
||||
[[package]]
|
||||
name = "rustc-hash"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
||||
|
||||
[[package]]
|
||||
name = "rustc-hash"
|
||||
version = "2.1.1"
|
||||
@@ -6176,6 +6192,21 @@ dependencies = [
|
||||
"zune-jpeg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tiktoken-rs"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "25563eeba904d770acf527e8b370fe9a5547bacd20ff84a0b6c3bc41288e5625"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"base64",
|
||||
"bstr",
|
||||
"fancy-regex",
|
||||
"lazy_static",
|
||||
"regex",
|
||||
"rustc-hash 1.1.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "time"
|
||||
version = "0.3.44"
|
||||
|
||||
@@ -20,7 +20,6 @@ members = [
|
||||
"git-tooling",
|
||||
"linux-sandbox",
|
||||
"login",
|
||||
"mcp-client",
|
||||
"mcp-server",
|
||||
"mcp-types",
|
||||
"ollama",
|
||||
@@ -37,6 +36,7 @@ members = [
|
||||
"utils/readiness",
|
||||
"utils/pty",
|
||||
"utils/string",
|
||||
"utils/tokenizer",
|
||||
]
|
||||
resolver = "2"
|
||||
|
||||
@@ -67,7 +67,6 @@ codex-file-search = { path = "file-search" }
|
||||
codex-git-tooling = { path = "git-tooling" }
|
||||
codex-linux-sandbox = { path = "linux-sandbox" }
|
||||
codex-login = { path = "login" }
|
||||
codex-mcp-client = { path = "mcp-client" }
|
||||
codex-mcp-server = { path = "mcp-server" }
|
||||
codex-ollama = { path = "ollama" }
|
||||
codex-otel = { path = "otel" }
|
||||
@@ -82,6 +81,7 @@ codex-utils-json-to-toml = { path = "utils/json-to-toml" }
|
||||
codex-utils-pty = { path = "utils/pty" }
|
||||
codex-utils-readiness = { path = "utils/readiness" }
|
||||
codex-utils-string = { path = "utils/string" }
|
||||
codex-utils-tokenizer = { path = "utils/tokenizer" }
|
||||
core_test_support = { path = "core/tests/common" }
|
||||
mcp-types = { path = "mcp-types" }
|
||||
mcp_test_support = { path = "mcp-server/tests/common" }
|
||||
@@ -116,6 +116,7 @@ env_logger = "0.11.5"
|
||||
escargot = "0.5"
|
||||
eventsource-stream = "0.2.3"
|
||||
futures = { version = "0.3", default-features = false }
|
||||
http = "1.3.1"
|
||||
icu_decimal = "2.0.0"
|
||||
icu_locale_core = "2.0.0"
|
||||
ignore = "0.4.23"
|
||||
@@ -153,7 +154,7 @@ ratatui = "0.29.0"
|
||||
ratatui-macros = "0.6.0"
|
||||
regex-lite = "0.1.7"
|
||||
reqwest = "0.12"
|
||||
rmcp = { version = "0.8.2", default-features = false }
|
||||
rmcp = { version = "0.8.3", default-features = false }
|
||||
schemars = "0.8.22"
|
||||
seccompiler = "0.5.0"
|
||||
sentry = "0.34.0"
|
||||
@@ -246,7 +247,7 @@ unwrap_used = "deny"
|
||||
# cargo-shear cannot see the platform-specific openssl-sys usage, so we
|
||||
# silence the false positive here instead of deleting a real dependency.
|
||||
[workspace.metadata.cargo-shear]
|
||||
ignored = ["openssl-sys", "codex-utils-readiness"]
|
||||
ignored = ["openssl-sys", "codex-utils-readiness", "codex-utils-tokenizer"]
|
||||
|
||||
[profile.release]
|
||||
lto = "fat"
|
||||
|
||||
@@ -23,6 +23,7 @@ use std::io::Write;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::process::Command;
|
||||
use ts_rs::ExportError;
|
||||
use ts_rs::TS;
|
||||
|
||||
const HEADER: &str = "// GENERATED CODE! DO NOT MODIFY BY HAND!\n\n";
|
||||
@@ -104,6 +105,19 @@ macro_rules! for_each_schema_type {
|
||||
};
|
||||
}
|
||||
|
||||
fn export_ts_with_context<F>(label: &str, export: F) -> Result<()>
|
||||
where
|
||||
F: FnOnce() -> std::result::Result<(), ExportError>,
|
||||
{
|
||||
match export() {
|
||||
Ok(()) => Ok(()),
|
||||
Err(ExportError::CannotBeExported(ty)) => Err(anyhow!(
|
||||
"failed to export {label}: dependency {ty} cannot be exported"
|
||||
)),
|
||||
Err(err) => Err(err.into()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn generate_types(out_dir: &Path, prettier: Option<&Path>) -> Result<()> {
|
||||
generate_ts(out_dir, prettier)?;
|
||||
generate_json(out_dir)?;
|
||||
@@ -113,13 +127,17 @@ pub fn generate_types(out_dir: &Path, prettier: Option<&Path>) -> Result<()> {
|
||||
pub fn generate_ts(out_dir: &Path, prettier: Option<&Path>) -> Result<()> {
|
||||
ensure_dir(out_dir)?;
|
||||
|
||||
ClientRequest::export_all_to(out_dir)?;
|
||||
export_client_responses(out_dir)?;
|
||||
ClientNotification::export_all_to(out_dir)?;
|
||||
export_ts_with_context("ClientRequest", || ClientRequest::export_all_to(out_dir))?;
|
||||
export_ts_with_context("client responses", || export_client_responses(out_dir))?;
|
||||
export_ts_with_context("ClientNotification", || {
|
||||
ClientNotification::export_all_to(out_dir)
|
||||
})?;
|
||||
|
||||
ServerRequest::export_all_to(out_dir)?;
|
||||
export_server_responses(out_dir)?;
|
||||
ServerNotification::export_all_to(out_dir)?;
|
||||
export_ts_with_context("ServerRequest", || ServerRequest::export_all_to(out_dir))?;
|
||||
export_ts_with_context("server responses", || export_server_responses(out_dir))?;
|
||||
export_ts_with_context("ServerNotification", || {
|
||||
ServerNotification::export_all_to(out_dir)
|
||||
})?;
|
||||
|
||||
generate_index_ts(out_dir)?;
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ use crate::JSONRPCNotification;
|
||||
use crate::JSONRPCRequest;
|
||||
use crate::RequestId;
|
||||
use codex_protocol::ConversationId;
|
||||
use codex_protocol::account::Account;
|
||||
use codex_protocol::config_types::ForcedLoginMethod;
|
||||
use codex_protocol::config_types::ReasoningEffort;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
@@ -93,6 +94,43 @@ macro_rules! client_request_definitions {
|
||||
}
|
||||
|
||||
client_request_definitions! {
|
||||
/// NEW APIs
|
||||
#[serde(rename = "model/list")]
|
||||
#[ts(rename = "model/list")]
|
||||
ListModels {
|
||||
params: ListModelsParams,
|
||||
response: ListModelsResponse,
|
||||
},
|
||||
|
||||
#[serde(rename = "account/login")]
|
||||
#[ts(rename = "account/login")]
|
||||
LoginAccount {
|
||||
params: LoginAccountParams,
|
||||
response: LoginAccountResponse,
|
||||
},
|
||||
|
||||
#[serde(rename = "account/logout")]
|
||||
#[ts(rename = "account/logout")]
|
||||
LogoutAccount {
|
||||
params: #[ts(type = "undefined")] #[serde(skip_serializing_if = "Option::is_none")] Option<()>,
|
||||
response: LogoutAccountResponse,
|
||||
},
|
||||
|
||||
#[serde(rename = "account/rateLimits/read")]
|
||||
#[ts(rename = "account/rateLimits/read")]
|
||||
GetAccountRateLimits {
|
||||
params: #[ts(type = "undefined")] #[serde(skip_serializing_if = "Option::is_none")] Option<()>,
|
||||
response: GetAccountRateLimitsResponse,
|
||||
},
|
||||
|
||||
#[serde(rename = "account/read")]
|
||||
#[ts(rename = "account/read")]
|
||||
GetAccount {
|
||||
params: #[ts(type = "undefined")] #[serde(skip_serializing_if = "Option::is_none")] Option<()>,
|
||||
response: GetAccountResponse,
|
||||
},
|
||||
|
||||
/// DEPRECATED APIs below
|
||||
Initialize {
|
||||
params: InitializeParams,
|
||||
response: InitializeResponse,
|
||||
@@ -106,13 +144,6 @@ client_request_definitions! {
|
||||
params: ListConversationsParams,
|
||||
response: ListConversationsResponse,
|
||||
},
|
||||
#[serde(rename = "model/list")]
|
||||
#[ts(rename = "model/list")]
|
||||
/// List available Codex models along with display metadata.
|
||||
ListModels {
|
||||
params: ListModelsParams,
|
||||
response: ListModelsResponse,
|
||||
},
|
||||
/// Resume a recorded Codex conversation from a rollout file.
|
||||
ResumeConversation {
|
||||
params: ResumeConversationParams,
|
||||
@@ -191,12 +222,6 @@ client_request_definitions! {
|
||||
params: ExecOneOffCommandParams,
|
||||
response: ExecOneOffCommandResponse,
|
||||
},
|
||||
#[serde(rename = "account/rateLimits/read")]
|
||||
#[ts(rename = "account/rateLimits/read")]
|
||||
GetAccountRateLimits {
|
||||
params: #[ts(type = "undefined")] #[serde(skip_serializing_if = "Option::is_none")] Option<()>,
|
||||
response: GetAccountRateLimitsResponse,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Default, JsonSchema, TS)]
|
||||
@@ -352,6 +377,38 @@ pub struct ListModelsResponse {
|
||||
pub next_cursor: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(tag = "type")]
|
||||
#[ts(tag = "type")]
|
||||
pub enum LoginAccountParams {
|
||||
#[serde(rename = "apiKey")]
|
||||
#[ts(rename = "apiKey")]
|
||||
ApiKey {
|
||||
#[serde(rename = "apiKey")]
|
||||
#[ts(rename = "apiKey")]
|
||||
api_key: String,
|
||||
},
|
||||
#[serde(rename = "chatgpt")]
|
||||
#[ts(rename = "chatgpt")]
|
||||
ChatGpt,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct LoginAccountResponse {
|
||||
/// Only set if the login method is ChatGPT.
|
||||
#[schemars(with = "String")]
|
||||
pub login_id: Option<Uuid>,
|
||||
|
||||
/// URL the client should open in a browser to initiate the OAuth flow.
|
||||
/// Only set if the login method is ChatGPT.
|
||||
pub auth_url: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct LogoutAccountResponse {}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ResumeConversationParams {
|
||||
@@ -477,6 +534,12 @@ pub struct GetAccountRateLimitsResponse {
|
||||
pub rate_limits: RateLimitSnapshot,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(transparent)]
|
||||
#[ts(export)]
|
||||
#[ts(type = "Account | null")]
|
||||
pub struct GetAccountResponse(#[ts(type = "Account | null")] pub Option<Account>);
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct GetAuthStatusResponse {
|
||||
@@ -875,6 +938,13 @@ pub struct AuthStatusChangeNotification {
|
||||
#[serde(tag = "method", content = "params", rename_all = "camelCase")]
|
||||
#[strum(serialize_all = "camelCase")]
|
||||
pub enum ServerNotification {
|
||||
/// NEW NOTIFICATIONS
|
||||
#[serde(rename = "account/rateLimits/updated")]
|
||||
#[ts(rename = "account/rateLimits/updated")]
|
||||
#[strum(serialize = "account/rateLimits/updated")]
|
||||
AccountRateLimitsUpdated(RateLimitSnapshot),
|
||||
|
||||
/// DEPRECATED NOTIFICATIONS below
|
||||
/// Authentication status changed
|
||||
AuthStatusChange(AuthStatusChangeNotification),
|
||||
|
||||
@@ -888,6 +958,7 @@ pub enum ServerNotification {
|
||||
impl ServerNotification {
|
||||
pub fn to_params(self) -> Result<serde_json::Value, serde_json::Error> {
|
||||
match self {
|
||||
ServerNotification::AccountRateLimitsUpdated(params) => serde_json::to_value(params),
|
||||
ServerNotification::AuthStatusChange(params) => serde_json::to_value(params),
|
||||
ServerNotification::LoginChatGptComplete(params) => serde_json::to_value(params),
|
||||
ServerNotification::SessionConfigured(params) => serde_json::to_value(params),
|
||||
@@ -1043,16 +1114,89 @@ mod tests {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn serialize_account_login_api_key() -> Result<()> {
|
||||
let request = ClientRequest::LoginAccount {
|
||||
request_id: RequestId::Integer(2),
|
||||
params: LoginAccountParams::ApiKey {
|
||||
api_key: "secret".to_string(),
|
||||
},
|
||||
};
|
||||
assert_eq!(
|
||||
json!({
|
||||
"method": "account/login",
|
||||
"id": 2,
|
||||
"params": {
|
||||
"type": "apiKey",
|
||||
"apiKey": "secret"
|
||||
}
|
||||
}),
|
||||
serde_json::to_value(&request)?,
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn serialize_account_login_chatgpt() -> Result<()> {
|
||||
let request = ClientRequest::LoginAccount {
|
||||
request_id: RequestId::Integer(3),
|
||||
params: LoginAccountParams::ChatGpt,
|
||||
};
|
||||
assert_eq!(
|
||||
json!({
|
||||
"method": "account/login",
|
||||
"id": 3,
|
||||
"params": {
|
||||
"type": "chatgpt"
|
||||
}
|
||||
}),
|
||||
serde_json::to_value(&request)?,
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn serialize_account_logout() -> Result<()> {
|
||||
let request = ClientRequest::LogoutAccount {
|
||||
request_id: RequestId::Integer(4),
|
||||
params: None,
|
||||
};
|
||||
assert_eq!(
|
||||
json!({
|
||||
"method": "account/logout",
|
||||
"id": 4,
|
||||
}),
|
||||
serde_json::to_value(&request)?,
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn serialize_get_account() -> Result<()> {
|
||||
let request = ClientRequest::GetAccount {
|
||||
request_id: RequestId::Integer(5),
|
||||
params: None,
|
||||
};
|
||||
assert_eq!(
|
||||
json!({
|
||||
"method": "account/read",
|
||||
"id": 5,
|
||||
}),
|
||||
serde_json::to_value(&request)?,
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn serialize_list_models() -> Result<()> {
|
||||
let request = ClientRequest::ListModels {
|
||||
request_id: RequestId::Integer(2),
|
||||
request_id: RequestId::Integer(6),
|
||||
params: ListModelsParams::default(),
|
||||
};
|
||||
assert_eq!(
|
||||
json!({
|
||||
"method": "model/list",
|
||||
"id": 2,
|
||||
"id": 6,
|
||||
"params": {}
|
||||
}),
|
||||
serde_json::to_value(&request)?,
|
||||
|
||||
@@ -90,9 +90,8 @@ use codex_login::ShutdownHandle;
|
||||
use codex_login::run_login_server;
|
||||
use codex_protocol::ConversationId;
|
||||
use codex_protocol::config_types::ForcedLoginMethod;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::items::TurnItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::InputMessageKind;
|
||||
use codex_protocol::protocol::RateLimitSnapshot;
|
||||
use codex_protocol::protocol::USER_MESSAGE_BEGIN;
|
||||
use codex_protocol::user_input::UserInput as CoreInputItem;
|
||||
@@ -177,6 +176,27 @@ impl CodexMessageProcessor {
|
||||
ClientRequest::ListModels { request_id, params } => {
|
||||
self.list_models(request_id, params).await;
|
||||
}
|
||||
ClientRequest::LoginAccount {
|
||||
request_id,
|
||||
params: _,
|
||||
} => {
|
||||
self.send_unimplemented_error(request_id, "account/login")
|
||||
.await;
|
||||
}
|
||||
ClientRequest::LogoutAccount {
|
||||
request_id,
|
||||
params: _,
|
||||
} => {
|
||||
self.send_unimplemented_error(request_id, "account/logout")
|
||||
.await;
|
||||
}
|
||||
ClientRequest::GetAccount {
|
||||
request_id,
|
||||
params: _,
|
||||
} => {
|
||||
self.send_unimplemented_error(request_id, "account/read")
|
||||
.await;
|
||||
}
|
||||
ClientRequest::ResumeConversation { request_id, params } => {
|
||||
self.handle_resume_conversation(request_id, params).await;
|
||||
}
|
||||
@@ -258,6 +278,15 @@ impl CodexMessageProcessor {
|
||||
}
|
||||
}
|
||||
|
||||
async fn send_unimplemented_error(&self, request_id: RequestId, method: &str) {
|
||||
let error = JSONRPCErrorError {
|
||||
code: INTERNAL_ERROR_CODE,
|
||||
message: format!("{method} is not implemented yet"),
|
||||
data: None,
|
||||
};
|
||||
self.outgoing.send_error(request_id, error).await;
|
||||
}
|
||||
|
||||
async fn login_api_key(&mut self, request_id: RequestId, params: LoginApiKeyParams) {
|
||||
if matches!(
|
||||
self.config.forced_login_method,
|
||||
@@ -940,18 +969,9 @@ impl CodexMessageProcessor {
|
||||
},
|
||||
))
|
||||
.await;
|
||||
let initial_messages = session_configured.initial_messages.map(|msgs| {
|
||||
msgs.into_iter()
|
||||
.filter(|event| {
|
||||
// Don't send non-plain user messages (like user instructions
|
||||
// or environment context) back so they don't get rendered.
|
||||
if let EventMsg::UserMessage(user_message) = event {
|
||||
return matches!(user_message.kind, Some(InputMessageKind::Plain));
|
||||
}
|
||||
true
|
||||
})
|
||||
.collect()
|
||||
});
|
||||
let initial_messages = session_configured
|
||||
.initial_messages
|
||||
.map(|msgs| msgs.into_iter().collect());
|
||||
|
||||
// Reply with conversation id + model and initial messages (when present)
|
||||
let response = codex_app_server_protocol::ResumeConversationResponse {
|
||||
@@ -1446,6 +1466,15 @@ async fn apply_bespoke_event_handling(
|
||||
on_exec_approval_response(event_id, rx, conversation).await;
|
||||
});
|
||||
}
|
||||
EventMsg::TokenCount(token_count_event) => {
|
||||
if let Some(rate_limits) = token_count_event.rate_limits {
|
||||
outgoing
|
||||
.send_server_notification(ServerNotification::AccountRateLimitsUpdated(
|
||||
rate_limits,
|
||||
))
|
||||
.await;
|
||||
}
|
||||
}
|
||||
// If this is a TurnAborted, reply to any pending interrupt requests.
|
||||
EventMsg::TurnAborted(turn_aborted_event) => {
|
||||
let pending = {
|
||||
@@ -1596,18 +1625,8 @@ fn extract_conversation_summary(
|
||||
let preview = head
|
||||
.iter()
|
||||
.filter_map(|value| serde_json::from_value::<ResponseItem>(value.clone()).ok())
|
||||
.find_map(|item| match item {
|
||||
ResponseItem::Message { content, .. } => {
|
||||
content.into_iter().find_map(|content| match content {
|
||||
ContentItem::InputText { text } => {
|
||||
match InputMessageKind::from(("user", &text)) {
|
||||
InputMessageKind::Plain => Some(text),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
}
|
||||
.find_map(|item| match codex_core::parse_turn_item(&item) {
|
||||
Some(TurnItem::UserMessage(user)) => Some(user.message()),
|
||||
_ => None,
|
||||
})?;
|
||||
|
||||
|
||||
@@ -46,6 +46,7 @@ pub(crate) async fn run_fuzzy_file_search(
|
||||
threads,
|
||||
cancel_flag,
|
||||
COMPUTE_INDICES,
|
||||
true,
|
||||
) {
|
||||
Ok(res) => Ok((root, res)),
|
||||
Err(err) => Err((root, err)),
|
||||
|
||||
@@ -142,6 +142,8 @@ pub(crate) struct OutgoingError {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use codex_app_server_protocol::LoginChatGptCompleteNotification;
|
||||
use codex_protocol::protocol::RateLimitSnapshot;
|
||||
use codex_protocol::protocol::RateLimitWindow;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::json;
|
||||
use uuid::Uuid;
|
||||
@@ -171,4 +173,34 @@ mod tests {
|
||||
"ensure the strum macros serialize the method field correctly"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn verify_account_rate_limits_notification_serialization() {
|
||||
let notification = ServerNotification::AccountRateLimitsUpdated(RateLimitSnapshot {
|
||||
primary: Some(RateLimitWindow {
|
||||
used_percent: 25.0,
|
||||
window_minutes: Some(15),
|
||||
resets_at: Some(123),
|
||||
}),
|
||||
secondary: None,
|
||||
});
|
||||
|
||||
let jsonrpc_notification = OutgoingMessage::AppServerNotification(notification);
|
||||
assert_eq!(
|
||||
json!({
|
||||
"method": "account/rateLimits/updated",
|
||||
"params": {
|
||||
"primary": {
|
||||
"used_percent": 25.0,
|
||||
"window_minutes": 15,
|
||||
"resets_at": 123,
|
||||
},
|
||||
"secondary": null,
|
||||
},
|
||||
}),
|
||||
serde_json::to_value(jsonrpc_notification)
|
||||
.expect("ensure the notification serializes correctly"),
|
||||
"ensure the notification serializes correctly"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,7 +30,6 @@ use codex_protocol::config_types::SandboxMode;
|
||||
use codex_protocol::parse_command::ParsedCommand;
|
||||
use codex_protocol::protocol::Event;
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
use codex_protocol::protocol::InputMessageKind;
|
||||
use pretty_assertions::assert_eq;
|
||||
use std::env;
|
||||
use tempfile::TempDir;
|
||||
@@ -528,43 +527,6 @@ async fn test_send_user_turn_updates_sandbox_and_cwd_between_turns() {
|
||||
.expect("sendUserTurn 2 timeout")
|
||||
.expect("sendUserTurn 2 resp");
|
||||
|
||||
let mut env_message: Option<String> = None;
|
||||
let second_cwd_str = second_cwd.to_string_lossy().into_owned();
|
||||
for _ in 0..10 {
|
||||
let notification = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp.read_stream_until_notification_message("codex/event/user_message"),
|
||||
)
|
||||
.await
|
||||
.expect("user_message timeout")
|
||||
.expect("user_message notification");
|
||||
let params = notification
|
||||
.params
|
||||
.clone()
|
||||
.expect("user_message should include params");
|
||||
let event: Event = serde_json::from_value(params).expect("deserialize user_message event");
|
||||
if let EventMsg::UserMessage(user) = event.msg
|
||||
&& matches!(user.kind, Some(InputMessageKind::EnvironmentContext))
|
||||
&& user.message.contains(&second_cwd_str)
|
||||
{
|
||||
env_message = Some(user.message);
|
||||
break;
|
||||
}
|
||||
}
|
||||
let env_message = env_message.expect("expected environment context update");
|
||||
assert!(
|
||||
env_message.contains("<sandbox_mode>danger-full-access</sandbox_mode>"),
|
||||
"env context should reflect new sandbox mode: {env_message}"
|
||||
);
|
||||
assert!(
|
||||
env_message.contains("<network_access>enabled</network_access>"),
|
||||
"env context should enable network access for danger-full-access policy: {env_message}"
|
||||
);
|
||||
assert!(
|
||||
env_message.contains(&second_cwd_str),
|
||||
"env context should include updated cwd: {env_message}"
|
||||
);
|
||||
|
||||
let exec_begin_notification = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp.read_stream_until_notification_message("codex/event/exec_command_begin"),
|
||||
|
||||
@@ -1 +1,3 @@
|
||||
mod cli;
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
mod tool;
|
||||
|
||||
257
codex-rs/apply-patch/tests/suite/tool.rs
Normal file
257
codex-rs/apply-patch/tests/suite/tool.rs
Normal file
@@ -0,0 +1,257 @@
|
||||
use assert_cmd::Command;
|
||||
use pretty_assertions::assert_eq;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use tempfile::tempdir;
|
||||
|
||||
fn run_apply_patch_in_dir(dir: &Path, patch: &str) -> anyhow::Result<assert_cmd::assert::Assert> {
|
||||
let mut cmd = Command::cargo_bin("apply_patch")?;
|
||||
cmd.current_dir(dir);
|
||||
Ok(cmd.arg(patch).assert())
|
||||
}
|
||||
|
||||
fn apply_patch_command(dir: &Path) -> anyhow::Result<Command> {
|
||||
let mut cmd = Command::cargo_bin("apply_patch")?;
|
||||
cmd.current_dir(dir);
|
||||
Ok(cmd)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_apply_patch_cli_applies_multiple_operations() -> anyhow::Result<()> {
|
||||
let tmp = tempdir()?;
|
||||
let modify_path = tmp.path().join("modify.txt");
|
||||
let delete_path = tmp.path().join("delete.txt");
|
||||
|
||||
fs::write(&modify_path, "line1\nline2\n")?;
|
||||
fs::write(&delete_path, "obsolete\n")?;
|
||||
|
||||
let patch = "*** Begin Patch\n*** Add File: nested/new.txt\n+created\n*** Delete File: delete.txt\n*** Update File: modify.txt\n@@\n-line2\n+changed\n*** End Patch";
|
||||
|
||||
run_apply_patch_in_dir(tmp.path(), patch)?.success().stdout(
|
||||
"Success. Updated the following files:\nA nested/new.txt\nM modify.txt\nD delete.txt\n",
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
fs::read_to_string(tmp.path().join("nested/new.txt"))?,
|
||||
"created\n"
|
||||
);
|
||||
assert_eq!(fs::read_to_string(&modify_path)?, "line1\nchanged\n");
|
||||
assert!(!delete_path.exists());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_apply_patch_cli_applies_multiple_chunks() -> anyhow::Result<()> {
|
||||
let tmp = tempdir()?;
|
||||
let target_path = tmp.path().join("multi.txt");
|
||||
fs::write(&target_path, "line1\nline2\nline3\nline4\n")?;
|
||||
|
||||
let patch = "*** Begin Patch\n*** Update File: multi.txt\n@@\n-line2\n+changed2\n@@\n-line4\n+changed4\n*** End Patch";
|
||||
|
||||
run_apply_patch_in_dir(tmp.path(), patch)?
|
||||
.success()
|
||||
.stdout("Success. Updated the following files:\nM multi.txt\n");
|
||||
|
||||
assert_eq!(
|
||||
fs::read_to_string(&target_path)?,
|
||||
"line1\nchanged2\nline3\nchanged4\n"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_apply_patch_cli_moves_file_to_new_directory() -> anyhow::Result<()> {
|
||||
let tmp = tempdir()?;
|
||||
let original_path = tmp.path().join("old/name.txt");
|
||||
let new_path = tmp.path().join("renamed/dir/name.txt");
|
||||
fs::create_dir_all(original_path.parent().expect("parent should exist"))?;
|
||||
fs::write(&original_path, "old content\n")?;
|
||||
|
||||
let patch = "*** Begin Patch\n*** Update File: old/name.txt\n*** Move to: renamed/dir/name.txt\n@@\n-old content\n+new content\n*** End Patch";
|
||||
|
||||
run_apply_patch_in_dir(tmp.path(), patch)?
|
||||
.success()
|
||||
.stdout("Success. Updated the following files:\nM renamed/dir/name.txt\n");
|
||||
|
||||
assert!(!original_path.exists());
|
||||
assert_eq!(fs::read_to_string(&new_path)?, "new content\n");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_apply_patch_cli_rejects_empty_patch() -> anyhow::Result<()> {
|
||||
let tmp = tempdir()?;
|
||||
|
||||
apply_patch_command(tmp.path())?
|
||||
.arg("*** Begin Patch\n*** End Patch")
|
||||
.assert()
|
||||
.failure()
|
||||
.stderr("No files were modified.\n");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_apply_patch_cli_reports_missing_context() -> anyhow::Result<()> {
|
||||
let tmp = tempdir()?;
|
||||
let target_path = tmp.path().join("modify.txt");
|
||||
fs::write(&target_path, "line1\nline2\n")?;
|
||||
|
||||
apply_patch_command(tmp.path())?
|
||||
.arg("*** Begin Patch\n*** Update File: modify.txt\n@@\n-missing\n+changed\n*** End Patch")
|
||||
.assert()
|
||||
.failure()
|
||||
.stderr("Failed to find expected lines in modify.txt:\nmissing\n");
|
||||
assert_eq!(fs::read_to_string(&target_path)?, "line1\nline2\n");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_apply_patch_cli_rejects_missing_file_delete() -> anyhow::Result<()> {
|
||||
let tmp = tempdir()?;
|
||||
|
||||
apply_patch_command(tmp.path())?
|
||||
.arg("*** Begin Patch\n*** Delete File: missing.txt\n*** End Patch")
|
||||
.assert()
|
||||
.failure()
|
||||
.stderr("Failed to delete file missing.txt\n");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_apply_patch_cli_rejects_empty_update_hunk() -> anyhow::Result<()> {
|
||||
let tmp = tempdir()?;
|
||||
|
||||
apply_patch_command(tmp.path())?
|
||||
.arg("*** Begin Patch\n*** Update File: foo.txt\n*** End Patch")
|
||||
.assert()
|
||||
.failure()
|
||||
.stderr("Invalid patch hunk on line 2: Update file hunk for path 'foo.txt' is empty\n");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_apply_patch_cli_requires_existing_file_for_update() -> anyhow::Result<()> {
|
||||
let tmp = tempdir()?;
|
||||
|
||||
apply_patch_command(tmp.path())?
|
||||
.arg("*** Begin Patch\n*** Update File: missing.txt\n@@\n-old\n+new\n*** End Patch")
|
||||
.assert()
|
||||
.failure()
|
||||
.stderr(
|
||||
"Failed to read file to update missing.txt: No such file or directory (os error 2)\n",
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_apply_patch_cli_move_overwrites_existing_destination() -> anyhow::Result<()> {
|
||||
let tmp = tempdir()?;
|
||||
let original_path = tmp.path().join("old/name.txt");
|
||||
let destination = tmp.path().join("renamed/dir/name.txt");
|
||||
fs::create_dir_all(original_path.parent().expect("parent should exist"))?;
|
||||
fs::create_dir_all(destination.parent().expect("parent should exist"))?;
|
||||
fs::write(&original_path, "from\n")?;
|
||||
fs::write(&destination, "existing\n")?;
|
||||
|
||||
run_apply_patch_in_dir(
|
||||
tmp.path(),
|
||||
"*** Begin Patch\n*** Update File: old/name.txt\n*** Move to: renamed/dir/name.txt\n@@\n-from\n+new\n*** End Patch",
|
||||
)?
|
||||
.success()
|
||||
.stdout("Success. Updated the following files:\nM renamed/dir/name.txt\n");
|
||||
|
||||
assert!(!original_path.exists());
|
||||
assert_eq!(fs::read_to_string(&destination)?, "new\n");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_apply_patch_cli_add_overwrites_existing_file() -> anyhow::Result<()> {
|
||||
let tmp = tempdir()?;
|
||||
let path = tmp.path().join("duplicate.txt");
|
||||
fs::write(&path, "old content\n")?;
|
||||
|
||||
run_apply_patch_in_dir(
|
||||
tmp.path(),
|
||||
"*** Begin Patch\n*** Add File: duplicate.txt\n+new content\n*** End Patch",
|
||||
)?
|
||||
.success()
|
||||
.stdout("Success. Updated the following files:\nA duplicate.txt\n");
|
||||
|
||||
assert_eq!(fs::read_to_string(&path)?, "new content\n");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_apply_patch_cli_delete_directory_fails() -> anyhow::Result<()> {
|
||||
let tmp = tempdir()?;
|
||||
fs::create_dir(tmp.path().join("dir"))?;
|
||||
|
||||
apply_patch_command(tmp.path())?
|
||||
.arg("*** Begin Patch\n*** Delete File: dir\n*** End Patch")
|
||||
.assert()
|
||||
.failure()
|
||||
.stderr("Failed to delete file dir\n");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_apply_patch_cli_rejects_invalid_hunk_header() -> anyhow::Result<()> {
|
||||
let tmp = tempdir()?;
|
||||
|
||||
apply_patch_command(tmp.path())?
|
||||
.arg("*** Begin Patch\n*** Frobnicate File: foo\n*** End Patch")
|
||||
.assert()
|
||||
.failure()
|
||||
.stderr("Invalid patch hunk on line 2: '*** Frobnicate File: foo' is not a valid hunk header. Valid hunk headers: '*** Add File: {path}', '*** Delete File: {path}', '*** Update File: {path}'\n");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_apply_patch_cli_updates_file_appends_trailing_newline() -> anyhow::Result<()> {
|
||||
let tmp = tempdir()?;
|
||||
let target_path = tmp.path().join("no_newline.txt");
|
||||
fs::write(&target_path, "no newline at end")?;
|
||||
|
||||
run_apply_patch_in_dir(
|
||||
tmp.path(),
|
||||
"*** Begin Patch\n*** Update File: no_newline.txt\n@@\n-no newline at end\n+first line\n+second line\n*** End Patch",
|
||||
)?
|
||||
.success()
|
||||
.stdout("Success. Updated the following files:\nM no_newline.txt\n");
|
||||
|
||||
let contents = fs::read_to_string(&target_path)?;
|
||||
assert!(contents.ends_with('\n'));
|
||||
assert_eq!(contents, "first line\nsecond line\n");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_apply_patch_cli_failure_after_partial_success_leaves_changes() -> anyhow::Result<()> {
|
||||
let tmp = tempdir()?;
|
||||
let new_file = tmp.path().join("created.txt");
|
||||
|
||||
apply_patch_command(tmp.path())?
|
||||
.arg("*** Begin Patch\n*** Add File: created.txt\n+hello\n*** Update File: missing.txt\n@@\n-old\n+new\n*** End Patch")
|
||||
.assert()
|
||||
.failure()
|
||||
.stdout("")
|
||||
.stderr("Failed to read file to update missing.txt: No such file or directory (os error 2)\n");
|
||||
|
||||
assert_eq!(fs::read_to_string(&new_file)?, "hello\n");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -150,6 +150,10 @@ pub struct RemoveArgs {
|
||||
pub struct LoginArgs {
|
||||
/// Name of the MCP server to authenticate with oauth.
|
||||
pub name: String,
|
||||
|
||||
/// Comma-separated list of OAuth scopes to request.
|
||||
#[arg(long, value_delimiter = ',', value_name = "SCOPE,SCOPE")]
|
||||
pub scopes: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, clap::Parser)]
|
||||
@@ -279,6 +283,7 @@ async fn run_add(config_overrides: &CliConfigOverrides, add_args: AddArgs) -> Re
|
||||
config.mcp_oauth_credentials_store_mode,
|
||||
http_headers.clone(),
|
||||
env_http_headers.clone(),
|
||||
&Vec::new(),
|
||||
)
|
||||
.await?;
|
||||
println!("Successfully logged in.");
|
||||
@@ -327,7 +332,7 @@ async fn run_login(config_overrides: &CliConfigOverrides, login_args: LoginArgs)
|
||||
);
|
||||
}
|
||||
|
||||
let LoginArgs { name } = login_args;
|
||||
let LoginArgs { name, scopes } = login_args;
|
||||
|
||||
let Some(server) = config.mcp_servers.get(&name) else {
|
||||
bail!("No MCP server named '{name}' found.");
|
||||
@@ -349,6 +354,7 @@ async fn run_login(config_overrides: &CliConfigOverrides, login_args: LoginArgs)
|
||||
config.mcp_oauth_credentials_store_mode,
|
||||
http_headers,
|
||||
env_http_headers,
|
||||
&scopes,
|
||||
)
|
||||
.await?;
|
||||
println!("Successfully logged in to MCP server '{name}'.");
|
||||
|
||||
@@ -22,18 +22,19 @@ chrono = { workspace = true, features = ["serde"] }
|
||||
codex-app-server-protocol = { workspace = true }
|
||||
codex-apply-patch = { workspace = true }
|
||||
codex-file-search = { workspace = true }
|
||||
codex-mcp-client = { workspace = true }
|
||||
codex-otel = { workspace = true, features = ["otel"] }
|
||||
codex-protocol = { workspace = true }
|
||||
codex-rmcp-client = { workspace = true }
|
||||
codex-async-utils = { workspace = true }
|
||||
codex-utils-string = { workspace = true }
|
||||
codex-utils-pty = { workspace = true }
|
||||
codex-utils-tokenizer = { workspace = true }
|
||||
dirs = { workspace = true }
|
||||
dunce = { workspace = true }
|
||||
env-flags = { workspace = true }
|
||||
eventsource-stream = { workspace = true }
|
||||
futures = { workspace = true }
|
||||
http = { workspace = true }
|
||||
indexmap = { workspace = true }
|
||||
libc = { workspace = true }
|
||||
mcp-types = { workspace = true }
|
||||
|
||||
@@ -21,6 +21,7 @@ use codex_app_server_protocol::AuthMode;
|
||||
use codex_protocol::config_types::ForcedLoginMethod;
|
||||
|
||||
use crate::config::Config;
|
||||
use crate::default_client::CodexHttpClient;
|
||||
use crate::token_data::PlanType;
|
||||
use crate::token_data::TokenData;
|
||||
use crate::token_data::parse_id_token;
|
||||
@@ -32,7 +33,7 @@ pub struct CodexAuth {
|
||||
pub(crate) api_key: Option<String>,
|
||||
pub(crate) auth_dot_json: Arc<Mutex<Option<AuthDotJson>>>,
|
||||
pub(crate) auth_file: PathBuf,
|
||||
pub(crate) client: reqwest::Client,
|
||||
pub(crate) client: CodexHttpClient,
|
||||
}
|
||||
|
||||
impl PartialEq for CodexAuth {
|
||||
@@ -43,6 +44,8 @@ impl PartialEq for CodexAuth {
|
||||
|
||||
impl CodexAuth {
|
||||
pub async fn refresh_token(&self) -> Result<String, std::io::Error> {
|
||||
tracing::info!("Refreshing token");
|
||||
|
||||
let token_data = self
|
||||
.get_current_token_data()
|
||||
.ok_or(std::io::Error::other("Token data is not available."))?;
|
||||
@@ -180,7 +183,7 @@ impl CodexAuth {
|
||||
}
|
||||
}
|
||||
|
||||
fn from_api_key_with_client(api_key: &str, client: reqwest::Client) -> Self {
|
||||
fn from_api_key_with_client(api_key: &str, client: CodexHttpClient) -> Self {
|
||||
Self {
|
||||
api_key: Some(api_key.to_owned()),
|
||||
mode: AuthMode::ApiKey,
|
||||
@@ -400,7 +403,7 @@ async fn update_tokens(
|
||||
|
||||
async fn try_refresh_token(
|
||||
refresh_token: String,
|
||||
client: &reqwest::Client,
|
||||
client: &CodexHttpClient,
|
||||
) -> std::io::Result<RefreshResponse> {
|
||||
let refresh_request = RefreshRequest {
|
||||
client_id: CLIENT_ID,
|
||||
@@ -916,7 +919,10 @@ impl AuthManager {
|
||||
self.reload();
|
||||
Ok(Some(token))
|
||||
}
|
||||
Err(e) => Err(e),
|
||||
Err(e) => {
|
||||
tracing::error!("Failed to refresh token: {}", e);
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ use crate::ModelProviderInfo;
|
||||
use crate::client_common::Prompt;
|
||||
use crate::client_common::ResponseEvent;
|
||||
use crate::client_common::ResponseStream;
|
||||
use crate::default_client::CodexHttpClient;
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::ConnectionFailedError;
|
||||
use crate::error::ResponseStreamFailed;
|
||||
@@ -36,7 +37,7 @@ use tracing::trace;
|
||||
pub(crate) async fn stream_chat_completions(
|
||||
prompt: &Prompt,
|
||||
model_family: &ModelFamily,
|
||||
client: &reqwest::Client,
|
||||
client: &CodexHttpClient,
|
||||
provider: &ModelProviderInfo,
|
||||
otel_event_manager: &OtelEventManager,
|
||||
) -> Result<ResponseStream> {
|
||||
@@ -104,10 +105,10 @@ pub(crate) async fn stream_chat_completions(
|
||||
} = item
|
||||
{
|
||||
let mut text = String::new();
|
||||
for c in items {
|
||||
match c {
|
||||
ReasoningItemContent::ReasoningText { text: t }
|
||||
| ReasoningItemContent::Text { text: t } => text.push_str(t),
|
||||
for entry in items {
|
||||
match entry {
|
||||
ReasoningItemContent::ReasoningText { text: segment }
|
||||
| ReasoningItemContent::Text { text: segment } => text.push_str(segment),
|
||||
}
|
||||
}
|
||||
if text.trim().is_empty() {
|
||||
|
||||
@@ -39,6 +39,7 @@ use crate::client_common::ResponsesApiRequest;
|
||||
use crate::client_common::create_reasoning_param_for_request;
|
||||
use crate::client_common::create_text_param_for_request;
|
||||
use crate::config::Config;
|
||||
use crate::default_client::CodexHttpClient;
|
||||
use crate::default_client::create_client;
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::ConnectionFailedError;
|
||||
@@ -81,7 +82,7 @@ pub struct ModelClient {
|
||||
config: Arc<Config>,
|
||||
auth_manager: Option<Arc<AuthManager>>,
|
||||
otel_event_manager: OtelEventManager,
|
||||
client: reqwest::Client,
|
||||
client: CodexHttpClient,
|
||||
provider: ModelProviderInfo,
|
||||
conversation_id: ConversationId,
|
||||
effort: Option<ReasoningEffortConfig>,
|
||||
@@ -300,6 +301,7 @@ impl ModelClient {
|
||||
"POST to {}: {:?}",
|
||||
self.provider.get_full_url(&auth),
|
||||
serde_json::to_string(payload_json)
|
||||
.unwrap_or("<unable to serialize payload>".to_string())
|
||||
);
|
||||
|
||||
let mut req_builder = self
|
||||
@@ -335,13 +337,6 @@ impl ModelClient {
|
||||
.headers()
|
||||
.get("cf-ray")
|
||||
.map(|v| v.to_str().unwrap_or_default().to_string());
|
||||
|
||||
debug!(
|
||||
"Response status: {}, cf-ray: {:?}, version: {:?}",
|
||||
resp.status(),
|
||||
request_id,
|
||||
resp.version()
|
||||
);
|
||||
}
|
||||
|
||||
match res {
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Debug;
|
||||
use std::path::PathBuf;
|
||||
@@ -7,12 +6,13 @@ use std::sync::atomic::AtomicU64;
|
||||
|
||||
use crate::AuthManager;
|
||||
use crate::client_common::REVIEW_PROMPT;
|
||||
use crate::event_mapping::map_response_item_to_event_messages;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::mcp::auth::McpAuthStatusEntry;
|
||||
use crate::mcp_connection_manager::DEFAULT_STARTUP_TIMEOUT;
|
||||
use crate::parse_command::parse_command;
|
||||
use crate::parse_turn_item;
|
||||
use crate::response_processing::process_items;
|
||||
use crate::review_format::format_review_findings_block;
|
||||
use crate::state::ItemCollector;
|
||||
use crate::terminal;
|
||||
use crate::user_notification::UserNotifier;
|
||||
use async_channel::Receiver;
|
||||
@@ -20,9 +20,10 @@ use async_channel::Sender;
|
||||
use codex_apply_patch::ApplyPatchAction;
|
||||
use codex_protocol::ConversationId;
|
||||
use codex_protocol::items::TurnItem;
|
||||
use codex_protocol::items::UserMessageItem;
|
||||
use codex_protocol::protocol::ConversationPathResponseEvent;
|
||||
use codex_protocol::protocol::ExitedReviewModeEvent;
|
||||
use codex_protocol::protocol::ItemCompletedEvent;
|
||||
use codex_protocol::protocol::ItemStartedEvent;
|
||||
use codex_protocol::protocol::ReviewRequest;
|
||||
use codex_protocol::protocol::RolloutItem;
|
||||
use codex_protocol::protocol::SessionSource;
|
||||
@@ -58,7 +59,7 @@ use crate::client_common::ResponseEvent;
|
||||
use crate::config::Config;
|
||||
use crate::config_types::McpServerTransportConfig;
|
||||
use crate::config_types::ShellEnvironmentPolicy;
|
||||
use crate::conversation_history::ConversationHistory;
|
||||
use crate::context_manager::ContextManager;
|
||||
use crate::environment_context::EnvironmentContext;
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::Result as CodexResult;
|
||||
@@ -270,7 +271,6 @@ pub(crate) struct TurnContext {
|
||||
pub(crate) is_review_mode: bool,
|
||||
pub(crate) final_output_json_schema: Option<Value>,
|
||||
pub(crate) codex_linux_sandbox_exe: Option<PathBuf>,
|
||||
pub(crate) item_collector: ItemCollector,
|
||||
}
|
||||
|
||||
impl TurnContext {
|
||||
@@ -359,7 +359,6 @@ impl Session {
|
||||
provider: ModelProviderInfo,
|
||||
session_configuration: &SessionConfiguration,
|
||||
conversation_id: ConversationId,
|
||||
tx_event: Sender<Event>,
|
||||
sub_id: String,
|
||||
) -> TurnContext {
|
||||
let config = session_configuration.original_config_do_not_use.clone();
|
||||
@@ -394,8 +393,6 @@ impl Session {
|
||||
features: &config.features,
|
||||
});
|
||||
|
||||
let item_collector = ItemCollector::new(tx_event, conversation_id, sub_id.clone());
|
||||
|
||||
TurnContext {
|
||||
sub_id,
|
||||
client,
|
||||
@@ -409,7 +406,6 @@ impl Session {
|
||||
is_review_mode: false,
|
||||
final_output_json_schema: None,
|
||||
codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(),
|
||||
item_collector,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -463,9 +459,6 @@ impl Session {
|
||||
|
||||
let mcp_fut = McpConnectionManager::new(
|
||||
config.mcp_servers.clone(),
|
||||
config
|
||||
.features
|
||||
.enabled(crate::features::Feature::RmcpClient),
|
||||
config.mcp_oauth_credentials_store_mode,
|
||||
);
|
||||
let default_shell_fut = shell::default_user_shell();
|
||||
@@ -665,7 +658,6 @@ impl Session {
|
||||
session_configuration.provider.clone(),
|
||||
&session_configuration,
|
||||
self.conversation_id,
|
||||
self.get_tx_event(),
|
||||
sub_id,
|
||||
);
|
||||
if let Some(final_schema) = updates.final_output_json_schema {
|
||||
@@ -710,6 +702,59 @@ impl Session {
|
||||
}
|
||||
}
|
||||
|
||||
async fn emit_turn_item_started(&self, turn_context: &TurnContext, item: &TurnItem) {
|
||||
self.send_event(
|
||||
turn_context,
|
||||
EventMsg::ItemStarted(ItemStartedEvent {
|
||||
thread_id: self.conversation_id,
|
||||
turn_id: turn_context.sub_id.clone(),
|
||||
item: item.clone(),
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
async fn emit_turn_item_completed(
|
||||
&self,
|
||||
turn_context: &TurnContext,
|
||||
item: TurnItem,
|
||||
emit_raw_agent_reasoning: bool,
|
||||
) {
|
||||
self.send_event(
|
||||
turn_context,
|
||||
EventMsg::ItemCompleted(ItemCompletedEvent {
|
||||
thread_id: self.conversation_id,
|
||||
turn_id: turn_context.sub_id.clone(),
|
||||
item: item.clone(),
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
self.emit_turn_item_legacy_events(turn_context, &item, emit_raw_agent_reasoning)
|
||||
.await;
|
||||
}
|
||||
|
||||
async fn emit_turn_item_started_completed(
|
||||
&self,
|
||||
turn_context: &TurnContext,
|
||||
item: TurnItem,
|
||||
emit_raw_agent_reasoning: bool,
|
||||
) {
|
||||
self.emit_turn_item_started(turn_context, &item).await;
|
||||
self.emit_turn_item_completed(turn_context, item, emit_raw_agent_reasoning)
|
||||
.await;
|
||||
}
|
||||
|
||||
async fn emit_turn_item_legacy_events(
|
||||
&self,
|
||||
turn_context: &TurnContext,
|
||||
item: &TurnItem,
|
||||
emit_raw_agent_reasoning: bool,
|
||||
) {
|
||||
for event in item.as_legacy_events(emit_raw_agent_reasoning) {
|
||||
self.send_event(turn_context, event).await;
|
||||
}
|
||||
}
|
||||
|
||||
/// Emit an exec approval request event and await the user's decision.
|
||||
///
|
||||
/// The request is keyed by `sub_id`/`call_id` so matching responses are delivered
|
||||
@@ -812,7 +857,7 @@ impl Session {
|
||||
|
||||
/// Records input items: always append to conversation history and
|
||||
/// persist these response items to rollout.
|
||||
async fn record_conversation_items(&self, items: &[ResponseItem]) {
|
||||
pub(crate) async fn record_conversation_items(&self, items: &[ResponseItem]) {
|
||||
self.record_into_history(items).await;
|
||||
self.persist_rollout_response_items(items).await;
|
||||
}
|
||||
@@ -822,14 +867,14 @@ impl Session {
|
||||
turn_context: &TurnContext,
|
||||
rollout_items: &[RolloutItem],
|
||||
) -> Vec<ResponseItem> {
|
||||
let mut history = ConversationHistory::new();
|
||||
let mut history = ContextManager::new();
|
||||
for item in rollout_items {
|
||||
match item {
|
||||
RolloutItem::ResponseItem(response_item) => {
|
||||
history.record_items(std::iter::once(response_item));
|
||||
}
|
||||
RolloutItem::Compacted(compacted) => {
|
||||
let snapshot = history.contents();
|
||||
let snapshot = history.get_history();
|
||||
let user_messages = collect_user_messages(&snapshot);
|
||||
let rebuilt = build_compacted_history(
|
||||
self.build_initial_context(turn_context),
|
||||
@@ -841,7 +886,7 @@ impl Session {
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
history.contents()
|
||||
history.get_history()
|
||||
}
|
||||
|
||||
/// Append ResponseItems to the in-memory conversation history only.
|
||||
@@ -890,11 +935,17 @@ impl Session {
|
||||
}
|
||||
}
|
||||
|
||||
// todo (aibrahim): get rid of this method. we shouldn't deal with vec[resposne_item] and rather use ConversationHistory.
|
||||
pub(crate) async fn history_snapshot(&self) -> Vec<ResponseItem> {
|
||||
let state = self.state.lock().await;
|
||||
let mut state = self.state.lock().await;
|
||||
state.history_snapshot()
|
||||
}
|
||||
|
||||
pub(crate) async fn clone_history(&self) -> ContextManager {
|
||||
let state = self.state.lock().await;
|
||||
state.clone_history()
|
||||
}
|
||||
|
||||
async fn update_token_usage_info(
|
||||
&self,
|
||||
turn_context: &TurnContext,
|
||||
@@ -946,24 +997,22 @@ impl Session {
|
||||
|
||||
/// Record a user input item to conversation history and also persist a
|
||||
/// corresponding UserMessage EventMsg to rollout.
|
||||
async fn record_input_and_rollout_usermsg(&self, response_input: &ResponseInputItem) {
|
||||
async fn record_input_and_rollout_usermsg(
|
||||
&self,
|
||||
turn_context: &TurnContext,
|
||||
response_input: &ResponseInputItem,
|
||||
) {
|
||||
let response_item: ResponseItem = response_input.clone().into();
|
||||
// Add to conversation history and persist response item to rollout
|
||||
self.record_conversation_items(std::slice::from_ref(&response_item))
|
||||
.await;
|
||||
|
||||
// Derive user message events and persist only UserMessage to rollout
|
||||
let msgs =
|
||||
map_response_item_to_event_messages(&response_item, self.show_raw_agent_reasoning());
|
||||
let user_msgs: Vec<RolloutItem> = msgs
|
||||
.into_iter()
|
||||
.filter_map(|m| match m {
|
||||
EventMsg::UserMessage(ev) => Some(RolloutItem::EventMsg(EventMsg::UserMessage(ev))),
|
||||
_ => None,
|
||||
})
|
||||
.collect();
|
||||
if !user_msgs.is_empty() {
|
||||
self.persist_rollout_items(&user_msgs).await;
|
||||
let turn_item = parse_turn_item(&response_item);
|
||||
|
||||
if let Some(item @ TurnItem::UserMessage(_)) = turn_item {
|
||||
self.emit_turn_item_started_completed(turn_context, item, false)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -988,16 +1037,6 @@ impl Session {
|
||||
self.send_event(turn_context, event).await;
|
||||
}
|
||||
|
||||
/// Build the full turn input by concatenating the current conversation
|
||||
/// history with additional items for this turn.
|
||||
pub async fn turn_input_with_history(&self, extra: Vec<ResponseItem>) -> Vec<ResponseItem> {
|
||||
let history = {
|
||||
let state = self.state.lock().await;
|
||||
state.history_snapshot()
|
||||
};
|
||||
[history, extra].concat()
|
||||
}
|
||||
|
||||
/// Returns the input if there was no task running to inject into
|
||||
pub async fn inject_input(&self, input: Vec<UserInput>) -> Result<(), Vec<UserInput>> {
|
||||
let mut active = self.active_turn.lock().await;
|
||||
@@ -1158,19 +1197,8 @@ async fn submission_loop(sess: Arc<Session>, config: Arc<Config>, rx_sub: Receiv
|
||||
{
|
||||
sess.record_conversation_items(std::slice::from_ref(&env_item))
|
||||
.await;
|
||||
for msg in map_response_item_to_event_messages(
|
||||
&env_item,
|
||||
sess.show_raw_agent_reasoning(),
|
||||
) {
|
||||
sess.send_event(¤t_context, msg).await;
|
||||
}
|
||||
}
|
||||
|
||||
current_context
|
||||
.item_collector
|
||||
.started_completed(TurnItem::UserMessage(UserMessageItem::new(&items)))
|
||||
.await;
|
||||
|
||||
sess.spawn_task(Arc::clone(¤t_context), items, RegularTask)
|
||||
.await;
|
||||
previous_context = Some(current_context);
|
||||
@@ -1444,11 +1472,6 @@ async fn spawn_review_thread(
|
||||
is_review_mode: true,
|
||||
final_output_json_schema: None,
|
||||
codex_linux_sandbox_exe: parent_turn_context.codex_linux_sandbox_exe.clone(),
|
||||
item_collector: ItemCollector::new(
|
||||
sess.get_tx_event(),
|
||||
sess.conversation_id,
|
||||
sub_id.to_string(),
|
||||
),
|
||||
};
|
||||
|
||||
// Seed the child task with the review prompt as the initial user message.
|
||||
@@ -1500,13 +1523,15 @@ pub(crate) async fn run_task(
|
||||
// model sees a fresh conversation without the parent session's history.
|
||||
// For normal turns, continue recording to the session history as before.
|
||||
let is_review_mode = turn_context.is_review_mode;
|
||||
let mut review_thread_history: Vec<ResponseItem> = Vec::new();
|
||||
|
||||
let mut review_thread_history: ContextManager = ContextManager::new();
|
||||
if is_review_mode {
|
||||
// Seed review threads with environment context so the model knows the working directory.
|
||||
review_thread_history.extend(sess.build_initial_context(turn_context.as_ref()));
|
||||
review_thread_history.push(initial_input_for_turn.into());
|
||||
review_thread_history
|
||||
.record_items(sess.build_initial_context(turn_context.as_ref()).iter());
|
||||
review_thread_history.record_items(std::iter::once(&initial_input_for_turn.into()));
|
||||
} else {
|
||||
sess.record_input_and_rollout_usermsg(&initial_input_for_turn)
|
||||
sess.record_input_and_rollout_usermsg(turn_context.as_ref(), &initial_input_for_turn)
|
||||
.await;
|
||||
}
|
||||
|
||||
@@ -1539,12 +1564,12 @@ pub(crate) async fn run_task(
|
||||
// represents an append-only log without duplicates.
|
||||
let turn_input: Vec<ResponseItem> = if is_review_mode {
|
||||
if !pending_input.is_empty() {
|
||||
review_thread_history.extend(pending_input);
|
||||
review_thread_history.record_items(&pending_input);
|
||||
}
|
||||
review_thread_history.clone()
|
||||
review_thread_history.get_history()
|
||||
} else {
|
||||
sess.record_conversation_items(&pending_input).await;
|
||||
sess.turn_input_with_history(pending_input).await
|
||||
sess.history_snapshot().await
|
||||
};
|
||||
|
||||
let turn_input_messages: Vec<String> = turn_input
|
||||
@@ -1585,109 +1610,13 @@ pub(crate) async fn run_task(
|
||||
let token_limit_reached = total_usage_tokens
|
||||
.map(|tokens| tokens >= limit)
|
||||
.unwrap_or(false);
|
||||
let mut items_to_record_in_conversation_history = Vec::<ResponseItem>::new();
|
||||
let mut responses = Vec::<ResponseInputItem>::new();
|
||||
for processed_response_item in processed_items {
|
||||
let ProcessedResponseItem { item, response } = processed_response_item;
|
||||
match (&item, &response) {
|
||||
(ResponseItem::Message { role, .. }, None) if role == "assistant" => {
|
||||
// If the model returned a message, we need to record it.
|
||||
items_to_record_in_conversation_history.push(item);
|
||||
}
|
||||
(
|
||||
ResponseItem::LocalShellCall { .. },
|
||||
Some(ResponseInputItem::FunctionCallOutput { call_id, output }),
|
||||
) => {
|
||||
items_to_record_in_conversation_history.push(item);
|
||||
items_to_record_in_conversation_history.push(
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: output.clone(),
|
||||
},
|
||||
);
|
||||
}
|
||||
(
|
||||
ResponseItem::FunctionCall { .. },
|
||||
Some(ResponseInputItem::FunctionCallOutput { call_id, output }),
|
||||
) => {
|
||||
items_to_record_in_conversation_history.push(item);
|
||||
items_to_record_in_conversation_history.push(
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: output.clone(),
|
||||
},
|
||||
);
|
||||
}
|
||||
(
|
||||
ResponseItem::CustomToolCall { .. },
|
||||
Some(ResponseInputItem::CustomToolCallOutput { call_id, output }),
|
||||
) => {
|
||||
items_to_record_in_conversation_history.push(item);
|
||||
items_to_record_in_conversation_history.push(
|
||||
ResponseItem::CustomToolCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: output.clone(),
|
||||
},
|
||||
);
|
||||
}
|
||||
(
|
||||
ResponseItem::FunctionCall { .. },
|
||||
Some(ResponseInputItem::McpToolCallOutput { call_id, result }),
|
||||
) => {
|
||||
items_to_record_in_conversation_history.push(item);
|
||||
let output = match result {
|
||||
Ok(call_tool_result) => {
|
||||
convert_call_tool_result_to_function_call_output_payload(
|
||||
call_tool_result,
|
||||
)
|
||||
}
|
||||
Err(err) => FunctionCallOutputPayload {
|
||||
content: err.clone(),
|
||||
success: Some(false),
|
||||
},
|
||||
};
|
||||
items_to_record_in_conversation_history.push(
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output,
|
||||
},
|
||||
);
|
||||
}
|
||||
(
|
||||
ResponseItem::Reasoning {
|
||||
id,
|
||||
summary,
|
||||
content,
|
||||
encrypted_content,
|
||||
},
|
||||
None,
|
||||
) => {
|
||||
items_to_record_in_conversation_history.push(ResponseItem::Reasoning {
|
||||
id: id.clone(),
|
||||
summary: summary.clone(),
|
||||
content: content.clone(),
|
||||
encrypted_content: encrypted_content.clone(),
|
||||
});
|
||||
}
|
||||
_ => {
|
||||
warn!("Unexpected response item: {item:?} with response: {response:?}");
|
||||
}
|
||||
};
|
||||
if let Some(response) = response {
|
||||
responses.push(response);
|
||||
}
|
||||
}
|
||||
|
||||
// Only attempt to take the lock if there is something to record.
|
||||
if !items_to_record_in_conversation_history.is_empty() {
|
||||
if is_review_mode {
|
||||
review_thread_history
|
||||
.extend(items_to_record_in_conversation_history.clone());
|
||||
} else {
|
||||
sess.record_conversation_items(&items_to_record_in_conversation_history)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
let (responses, items_to_record_in_conversation_history) = process_items(
|
||||
processed_items,
|
||||
is_review_mode,
|
||||
&mut review_thread_history,
|
||||
&sess,
|
||||
)
|
||||
.await;
|
||||
|
||||
if token_limit_reached {
|
||||
if auto_compact_recently_attempted {
|
||||
@@ -1726,7 +1655,16 @@ pub(crate) async fn run_task(
|
||||
}
|
||||
continue;
|
||||
}
|
||||
Err(CodexErr::TurnAborted) => {
|
||||
Err(CodexErr::TurnAborted {
|
||||
dangling_artifacts: processed_items,
|
||||
}) => {
|
||||
let _ = process_items(
|
||||
processed_items,
|
||||
is_review_mode,
|
||||
&mut review_thread_history,
|
||||
&sess,
|
||||
)
|
||||
.await;
|
||||
// Aborted turn is reported via a different event.
|
||||
break;
|
||||
}
|
||||
@@ -1827,7 +1765,13 @@ async fn run_turn(
|
||||
.await
|
||||
{
|
||||
Ok(output) => return Ok(output),
|
||||
Err(CodexErr::TurnAborted) => return Err(CodexErr::TurnAborted),
|
||||
Err(CodexErr::TurnAborted {
|
||||
dangling_artifacts: processed_items,
|
||||
}) => {
|
||||
return Err(CodexErr::TurnAborted {
|
||||
dangling_artifacts: processed_items,
|
||||
});
|
||||
}
|
||||
Err(CodexErr::Interrupted) => return Err(CodexErr::Interrupted),
|
||||
Err(CodexErr::EnvVar(var)) => return Err(CodexErr::EnvVar(var)),
|
||||
Err(e @ CodexErr::Fatal(_)) => return Err(e),
|
||||
@@ -1880,9 +1824,9 @@ async fn run_turn(
|
||||
/// "handled" such that it produces a `ResponseInputItem` that needs to be
|
||||
/// sent back to the model on the next turn.
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct ProcessedResponseItem {
|
||||
pub(crate) item: ResponseItem,
|
||||
pub(crate) response: Option<ResponseInputItem>,
|
||||
pub struct ProcessedResponseItem {
|
||||
pub item: ResponseItem,
|
||||
pub response: Option<ResponseInputItem>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -1901,61 +1845,6 @@ async fn try_run_turn(
|
||||
task_kind: TaskKind,
|
||||
cancellation_token: CancellationToken,
|
||||
) -> CodexResult<TurnRunResult> {
|
||||
// call_ids that are part of this response.
|
||||
let completed_call_ids = prompt
|
||||
.input
|
||||
.iter()
|
||||
.filter_map(|ri| match ri {
|
||||
ResponseItem::FunctionCallOutput { call_id, .. } => Some(call_id),
|
||||
ResponseItem::LocalShellCall {
|
||||
call_id: Some(call_id),
|
||||
..
|
||||
} => Some(call_id),
|
||||
ResponseItem::CustomToolCallOutput { call_id, .. } => Some(call_id),
|
||||
_ => None,
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// call_ids that were pending but are not part of this response.
|
||||
// This usually happens because the user interrupted the model before we responded to one of its tool calls
|
||||
// and then the user sent a follow-up message.
|
||||
let missing_calls = {
|
||||
prompt
|
||||
.input
|
||||
.iter()
|
||||
.filter_map(|ri| match ri {
|
||||
ResponseItem::FunctionCall { call_id, .. } => Some(call_id),
|
||||
ResponseItem::LocalShellCall {
|
||||
call_id: Some(call_id),
|
||||
..
|
||||
} => Some(call_id),
|
||||
ResponseItem::CustomToolCall { call_id, .. } => Some(call_id),
|
||||
_ => None,
|
||||
})
|
||||
.filter_map(|call_id| {
|
||||
if completed_call_ids.contains(&call_id) {
|
||||
None
|
||||
} else {
|
||||
Some(call_id.clone())
|
||||
}
|
||||
})
|
||||
.map(|call_id| ResponseItem::CustomToolCallOutput {
|
||||
call_id,
|
||||
output: "aborted".to_string(),
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
};
|
||||
let prompt: Cow<Prompt> = if missing_calls.is_empty() {
|
||||
Cow::Borrowed(prompt)
|
||||
} else {
|
||||
// Add the synthetic aborted missing calls to the beginning of the input to ensure all call ids have responses.
|
||||
let input = [missing_calls, prompt.input.clone()].concat();
|
||||
Cow::Owned(Prompt {
|
||||
input,
|
||||
..prompt.clone()
|
||||
})
|
||||
};
|
||||
|
||||
let rollout_item = RolloutItem::TurnContext(TurnContextItem {
|
||||
cwd: turn_context.cwd.clone(),
|
||||
approval_policy: turn_context.approval_policy,
|
||||
@@ -1964,11 +1853,12 @@ async fn try_run_turn(
|
||||
effort: turn_context.client.get_reasoning_effort(),
|
||||
summary: turn_context.client.get_reasoning_summary(),
|
||||
});
|
||||
|
||||
sess.persist_rollout_items(&[rollout_item]).await;
|
||||
let mut stream = turn_context
|
||||
.client
|
||||
.clone()
|
||||
.stream_with_task_kind(prompt.as_ref(), task_kind)
|
||||
.stream_with_task_kind(prompt, task_kind)
|
||||
.or_cancel(&cancellation_token)
|
||||
.await??;
|
||||
|
||||
@@ -1985,7 +1875,15 @@ async fn try_run_turn(
|
||||
// Poll the next item from the model stream. We must inspect *both* Ok and Err
|
||||
// cases so that transient stream failures (e.g., dropped SSE connection before
|
||||
// `response.completed`) bubble up and trigger the caller's retry logic.
|
||||
let event = stream.next().or_cancel(&cancellation_token).await?;
|
||||
let event = match stream.next().or_cancel(&cancellation_token).await {
|
||||
Ok(event) => event,
|
||||
Err(codex_async_utils::CancelErr::Cancelled) => {
|
||||
let processed_items = output.try_collect().await?;
|
||||
return Err(CodexErr::TurnAborted {
|
||||
dangling_artifacts: processed_items,
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
let event = match event {
|
||||
Some(res) => res?,
|
||||
@@ -2009,7 +1907,8 @@ async fn try_run_turn(
|
||||
let payload_preview = call.payload.log_payload().into_owned();
|
||||
tracing::info!("ToolCall: {} {}", call.tool_name, payload_preview);
|
||||
|
||||
let response = tool_runtime.handle_tool_call(call);
|
||||
let response =
|
||||
tool_runtime.handle_tool_call(call, cancellation_token.child_token());
|
||||
|
||||
output.push_back(
|
||||
async move {
|
||||
@@ -2023,9 +1922,10 @@ async fn try_run_turn(
|
||||
}
|
||||
Ok(None) => {
|
||||
let response = handle_non_tool_response_item(
|
||||
Arc::clone(&sess),
|
||||
sess.as_ref(),
|
||||
Arc::clone(&turn_context),
|
||||
item.clone(),
|
||||
sess.show_raw_agent_reasoning(),
|
||||
)
|
||||
.await?;
|
||||
add_completed(ProcessedResponseItem { item, response });
|
||||
@@ -2090,12 +1990,7 @@ async fn try_run_turn(
|
||||
} => {
|
||||
sess.update_token_usage_info(turn_context.as_ref(), token_usage.as_ref())
|
||||
.await;
|
||||
|
||||
let processed_items = output
|
||||
.try_collect()
|
||||
.or_cancel(&cancellation_token)
|
||||
.await??;
|
||||
|
||||
let processed_items = output.try_collect().await?;
|
||||
let unified_diff = {
|
||||
let mut tracker = turn_diff_tracker.lock().await;
|
||||
tracker.get_unified_diff()
|
||||
@@ -2144,9 +2039,10 @@ async fn try_run_turn(
|
||||
}
|
||||
|
||||
async fn handle_non_tool_response_item(
|
||||
sess: Arc<Session>,
|
||||
sess: &Session,
|
||||
turn_context: Arc<TurnContext>,
|
||||
item: ResponseItem,
|
||||
show_raw_agent_reasoning: bool,
|
||||
) -> CodexResult<Option<ResponseInputItem>> {
|
||||
debug!(?item, "Output item");
|
||||
|
||||
@@ -2154,15 +2050,20 @@ async fn handle_non_tool_response_item(
|
||||
ResponseItem::Message { .. }
|
||||
| ResponseItem::Reasoning { .. }
|
||||
| ResponseItem::WebSearchCall { .. } => {
|
||||
let msgs = match &item {
|
||||
let turn_item = match &item {
|
||||
ResponseItem::Message { .. } if turn_context.is_review_mode => {
|
||||
trace!("suppressing assistant Message in review mode");
|
||||
Vec::new()
|
||||
None
|
||||
}
|
||||
_ => map_response_item_to_event_messages(&item, sess.show_raw_agent_reasoning()),
|
||||
_ => parse_turn_item(&item),
|
||||
};
|
||||
for msg in msgs {
|
||||
sess.send_event(&turn_context, msg).await;
|
||||
if let Some(turn_item) = turn_item {
|
||||
sess.emit_turn_item_started_completed(
|
||||
turn_context.as_ref(),
|
||||
turn_item,
|
||||
show_raw_agent_reasoning,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
ResponseItem::FunctionCallOutput { .. } | ResponseItem::CustomToolCallOutput { .. } => {
|
||||
@@ -2193,7 +2094,7 @@ pub(super) fn get_last_assistant_message_from_turn(responses: &[ResponseItem]) -
|
||||
}
|
||||
})
|
||||
}
|
||||
fn convert_call_tool_result_to_function_call_output_payload(
|
||||
pub(crate) fn convert_call_tool_result_to_function_call_output_payload(
|
||||
call_tool_result: &CallToolResult,
|
||||
) -> FunctionCallOutputPayload {
|
||||
let CallToolResult {
|
||||
@@ -2299,12 +2200,24 @@ fn mcp_init_error_display(
|
||||
// That means that the user has to specify a personal access token either via bearer_token_env_var or http_headers.
|
||||
// https://github.com/github/github-mcp-server/issues/921#issuecomment-3221026448
|
||||
format!(
|
||||
"GitHub MCP does not support OAuth. Log in by adding `bearer_token_env_var = CODEX_GITHUB_PAT` in the `mcp_servers.{server_name}` section of your config.toml"
|
||||
"GitHub MCP does not support OAuth. Log in by adding a personal access token (https://github.com/settings/personal-access-tokens) to your environment and config.toml:\n[mcp_servers.{server_name}]\nbearer_token_env_var = CODEX_GITHUB_PERSONAL_ACCESS_TOKEN"
|
||||
)
|
||||
} else if is_mcp_client_auth_required_error(err) {
|
||||
format!(
|
||||
"The {server_name} MCP server is not logged in. Run `codex mcp login {server_name}`."
|
||||
)
|
||||
} else if is_mcp_client_startup_timeout_error(err) {
|
||||
let startup_timeout_secs = match entry {
|
||||
Some(entry) => match entry.config.startup_timeout_sec {
|
||||
Some(timeout) => timeout,
|
||||
None => DEFAULT_STARTUP_TIMEOUT,
|
||||
},
|
||||
None => DEFAULT_STARTUP_TIMEOUT,
|
||||
}
|
||||
.as_secs();
|
||||
format!(
|
||||
"MCP client for `{server_name}` timed out after {startup_timeout_secs} seconds. Add or adjust `startup_timeout_sec` in your config.toml:\n[mcp_servers.{server_name}]\nstartup_timeout_sec = XX"
|
||||
)
|
||||
} else {
|
||||
format!("MCP client for `{server_name}` failed to start: {err:#}")
|
||||
}
|
||||
@@ -2315,6 +2228,12 @@ fn is_mcp_client_auth_required_error(error: &anyhow::Error) -> bool {
|
||||
error.to_string().contains("Auth required")
|
||||
}
|
||||
|
||||
fn is_mcp_client_startup_timeout_error(error: &anyhow::Error) -> bool {
|
||||
let error_message = error.to_string();
|
||||
error_message.contains("request timed out")
|
||||
|| error_message.contains("timed out handshaking with MCP server")
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) use tests::make_session_and_context;
|
||||
|
||||
@@ -2340,7 +2259,11 @@ mod tests {
|
||||
use crate::tools::MODEL_FORMAT_MAX_LINES;
|
||||
use crate::tools::MODEL_FORMAT_TAIL_LINES;
|
||||
use crate::tools::ToolRouter;
|
||||
use crate::tools::handle_container_exec_with_params;
|
||||
use crate::tools::context::ToolInvocation;
|
||||
use crate::tools::context::ToolOutput;
|
||||
use crate::tools::context::ToolPayload;
|
||||
use crate::tools::handlers::ShellHandler;
|
||||
use crate::tools::registry::ToolHandler;
|
||||
use crate::turn_diff_tracker::TurnDiffTracker;
|
||||
use codex_app_server_protocol::AuthMode;
|
||||
use codex_protocol::models::ContentItem;
|
||||
@@ -2649,7 +2572,6 @@ mod tests {
|
||||
session_configuration.provider.clone(),
|
||||
&session_configuration,
|
||||
conversation_id,
|
||||
tx_event.clone(),
|
||||
"turn_id".to_string(),
|
||||
);
|
||||
|
||||
@@ -2718,7 +2640,6 @@ mod tests {
|
||||
session_configuration.provider.clone(),
|
||||
&session_configuration,
|
||||
conversation_id,
|
||||
tx_event.clone(),
|
||||
"turn_id".to_string(),
|
||||
));
|
||||
|
||||
@@ -2922,7 +2843,7 @@ mod tests {
|
||||
turn_context: &TurnContext,
|
||||
) -> (Vec<RolloutItem>, Vec<ResponseItem>) {
|
||||
let mut rollout_items = Vec::new();
|
||||
let mut live_history = ConversationHistory::new();
|
||||
let mut live_history = ContextManager::new();
|
||||
|
||||
let initial_context = session.build_initial_context(turn_context);
|
||||
for item in &initial_context {
|
||||
@@ -2951,7 +2872,7 @@ mod tests {
|
||||
rollout_items.push(RolloutItem::ResponseItem(assistant1.clone()));
|
||||
|
||||
let summary1 = "summary one";
|
||||
let snapshot1 = live_history.contents();
|
||||
let snapshot1 = live_history.get_history();
|
||||
let user_messages1 = collect_user_messages(&snapshot1);
|
||||
let rebuilt1 = build_compacted_history(
|
||||
session.build_initial_context(turn_context),
|
||||
@@ -2984,7 +2905,7 @@ mod tests {
|
||||
rollout_items.push(RolloutItem::ResponseItem(assistant2.clone()));
|
||||
|
||||
let summary2 = "summary two";
|
||||
let snapshot2 = live_history.contents();
|
||||
let snapshot2 = live_history.get_history();
|
||||
let user_messages2 = collect_user_messages(&snapshot2);
|
||||
let rebuilt2 = build_compacted_history(
|
||||
session.build_initial_context(turn_context),
|
||||
@@ -3016,7 +2937,7 @@ mod tests {
|
||||
live_history.record_items(std::iter::once(&assistant3));
|
||||
rollout_items.push(RolloutItem::ResponseItem(assistant3.clone()));
|
||||
|
||||
(rollout_items, live_history.contents())
|
||||
(rollout_items, live_history.get_history())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -3065,15 +2986,26 @@ mod tests {
|
||||
let tool_name = "shell";
|
||||
let call_id = "test-call".to_string();
|
||||
|
||||
let resp = handle_container_exec_with_params(
|
||||
tool_name,
|
||||
params,
|
||||
Arc::clone(&session),
|
||||
Arc::clone(&turn_context),
|
||||
Arc::clone(&turn_diff_tracker),
|
||||
call_id,
|
||||
)
|
||||
.await;
|
||||
let handler = ShellHandler;
|
||||
let resp = handler
|
||||
.handle(ToolInvocation {
|
||||
session: Arc::clone(&session),
|
||||
turn: Arc::clone(&turn_context),
|
||||
tracker: Arc::clone(&turn_diff_tracker),
|
||||
call_id,
|
||||
tool_name: tool_name.to_string(),
|
||||
payload: ToolPayload::Function {
|
||||
arguments: serde_json::json!({
|
||||
"command": params.command.clone(),
|
||||
"workdir": Some(turn_context.cwd.to_string_lossy().to_string()),
|
||||
"timeout_ms": params.timeout_ms,
|
||||
"with_escalated_permissions": params.with_escalated_permissions,
|
||||
"justification": params.justification.clone(),
|
||||
})
|
||||
.to_string(),
|
||||
},
|
||||
})
|
||||
.await;
|
||||
|
||||
let Err(FunctionCallError::RespondToModel(output)) = resp else {
|
||||
panic!("expected error result");
|
||||
@@ -3092,17 +3024,30 @@ mod tests {
|
||||
.expect("unique turn context Arc")
|
||||
.sandbox_policy = SandboxPolicy::DangerFullAccess;
|
||||
|
||||
let resp2 = handle_container_exec_with_params(
|
||||
tool_name,
|
||||
params2,
|
||||
Arc::clone(&session),
|
||||
Arc::clone(&turn_context),
|
||||
Arc::clone(&turn_diff_tracker),
|
||||
"test-call-2".to_string(),
|
||||
)
|
||||
.await;
|
||||
let resp2 = handler
|
||||
.handle(ToolInvocation {
|
||||
session: Arc::clone(&session),
|
||||
turn: Arc::clone(&turn_context),
|
||||
tracker: Arc::clone(&turn_diff_tracker),
|
||||
call_id: "test-call-2".to_string(),
|
||||
tool_name: tool_name.to_string(),
|
||||
payload: ToolPayload::Function {
|
||||
arguments: serde_json::json!({
|
||||
"command": params2.command.clone(),
|
||||
"workdir": Some(turn_context.cwd.to_string_lossy().to_string()),
|
||||
"timeout_ms": params2.timeout_ms,
|
||||
"with_escalated_permissions": params2.with_escalated_permissions,
|
||||
"justification": params2.justification.clone(),
|
||||
})
|
||||
.to_string(),
|
||||
},
|
||||
})
|
||||
.await;
|
||||
|
||||
let output = resp2.expect("expected Ok result");
|
||||
let output = match resp2.expect("expected Ok result") {
|
||||
ToolOutput::Function { content, .. } => content,
|
||||
_ => panic!("unexpected tool output"),
|
||||
};
|
||||
|
||||
#[derive(Deserialize, PartialEq, Eq, Debug)]
|
||||
struct ResponseExecMetadata {
|
||||
@@ -3146,7 +3091,7 @@ mod tests {
|
||||
let display = mcp_init_error_display(server_name, Some(&entry), &err);
|
||||
|
||||
let expected = format!(
|
||||
"GitHub MCP does not support OAuth. Log in by adding `bearer_token_env_var = CODEX_GITHUB_PAT` in the `mcp_servers.{server_name}` section of your config.toml"
|
||||
"GitHub MCP does not support OAuth. Log in by adding a personal access token (https://github.com/settings/personal-access-tokens) to your environment and config.toml:\n[mcp_servers.{server_name}]\nbearer_token_env_var = CODEX_GITHUB_PERSONAL_ACCESS_TOKEN"
|
||||
);
|
||||
|
||||
assert_eq!(expected, display);
|
||||
@@ -3193,4 +3138,17 @@ mod tests {
|
||||
|
||||
assert_eq!(expected, display);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mcp_init_error_display_includes_startup_timeout_hint() {
|
||||
let server_name = "slow";
|
||||
let err = anyhow::anyhow!("request timed out");
|
||||
|
||||
let display = mcp_init_error_display(server_name, None, &err);
|
||||
|
||||
assert_eq!(
|
||||
"MCP client for `slow` timed out after 10 seconds. Add or adjust `startup_timeout_sec` in your config.toml:\n[mcp_servers.slow]\nstartup_timeout_sec = XX",
|
||||
display
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,19 +11,20 @@ use crate::protocol::AgentMessageEvent;
|
||||
use crate::protocol::CompactedItem;
|
||||
use crate::protocol::ErrorEvent;
|
||||
use crate::protocol::EventMsg;
|
||||
use crate::protocol::InputMessageKind;
|
||||
use crate::protocol::TaskStartedEvent;
|
||||
use crate::protocol::TurnContextItem;
|
||||
use crate::state::TaskKind;
|
||||
use crate::truncate::truncate_middle;
|
||||
use crate::util::backoff;
|
||||
use askama::Template;
|
||||
use codex_protocol::items::TurnItem;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::ResponseInputItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::RolloutItem;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use futures::prelude::*;
|
||||
use tracing::error;
|
||||
|
||||
pub const SUMMARIZATION_PROMPT: &str = include_str!("../../templates/compact/prompt.md");
|
||||
const COMPACT_USER_MESSAGE_MAX_TOKENS: usize = 20_000;
|
||||
@@ -64,9 +65,10 @@ async fn run_compact_task_inner(
|
||||
input: Vec<UserInput>,
|
||||
) {
|
||||
let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input);
|
||||
let mut turn_input = sess
|
||||
.turn_input_with_history(vec![initial_input_for_turn.clone().into()])
|
||||
.await;
|
||||
|
||||
let mut history = sess.clone_history().await;
|
||||
history.record_items(&[initial_input_for_turn.into()]);
|
||||
|
||||
let mut truncated_count = 0usize;
|
||||
|
||||
let max_retries = turn_context.client.get_provider().stream_max_retries();
|
||||
@@ -83,6 +85,7 @@ async fn run_compact_task_inner(
|
||||
sess.persist_rollout_items(&[rollout_item]).await;
|
||||
|
||||
loop {
|
||||
let turn_input = history.get_history();
|
||||
let prompt = Prompt {
|
||||
input: turn_input.clone(),
|
||||
..Default::default()
|
||||
@@ -107,7 +110,11 @@ async fn run_compact_task_inner(
|
||||
}
|
||||
Err(e @ CodexErr::ContextWindowExceeded) => {
|
||||
if turn_input.len() > 1 {
|
||||
turn_input.remove(0);
|
||||
// Trim from the beginning to preserve cache (prefix-based) and keep recent messages intact.
|
||||
error!(
|
||||
"Context window exceeded while compacting; removing oldest history item. Error: {e}"
|
||||
);
|
||||
history.remove_first_item();
|
||||
truncated_count += 1;
|
||||
retries = 0;
|
||||
continue;
|
||||
@@ -181,23 +188,13 @@ pub fn content_items_to_text(content: &[ContentItem]) -> Option<String> {
|
||||
pub(crate) fn collect_user_messages(items: &[ResponseItem]) -> Vec<String> {
|
||||
items
|
||||
.iter()
|
||||
.filter_map(|item| match item {
|
||||
ResponseItem::Message { role, content, .. } if role == "user" => {
|
||||
content_items_to_text(content)
|
||||
}
|
||||
.filter_map(|item| match crate::event_mapping::parse_turn_item(item) {
|
||||
Some(TurnItem::UserMessage(user)) => Some(user.message()),
|
||||
_ => None,
|
||||
})
|
||||
.filter(|text| !is_session_prefix_message(text))
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn is_session_prefix_message(text: &str) -> bool {
|
||||
matches!(
|
||||
InputMessageKind::from(("user", text)),
|
||||
InputMessageKind::UserInstructions | InputMessageKind::EnvironmentContext
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) fn build_compacted_history(
|
||||
initial_context: Vec<ResponseItem>,
|
||||
user_messages: &[String],
|
||||
@@ -319,21 +316,16 @@ mod tests {
|
||||
ResponseItem::Message {
|
||||
id: Some("user".to_string()),
|
||||
role: "user".to_string(),
|
||||
content: vec![
|
||||
ContentItem::InputText {
|
||||
text: "first".to_string(),
|
||||
},
|
||||
ContentItem::OutputText {
|
||||
text: "second".to_string(),
|
||||
},
|
||||
],
|
||||
content: vec![ContentItem::InputText {
|
||||
text: "first".to_string(),
|
||||
}],
|
||||
},
|
||||
ResponseItem::Other,
|
||||
];
|
||||
|
||||
let collected = collect_user_messages(&items);
|
||||
|
||||
assert_eq!(vec!["first\nsecond".to_string()], collected);
|
||||
assert_eq!(vec!["first".to_string()], collected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -1220,7 +1220,7 @@ impl Config {
|
||||
}
|
||||
}
|
||||
}
|
||||
let mut approval_policy = approval_policy_override
|
||||
let approval_policy = approval_policy_override
|
||||
.or(config_profile.approval_policy)
|
||||
.or(cfg.approval_policy)
|
||||
.unwrap_or_else(|| {
|
||||
@@ -1328,10 +1328,6 @@ impl Config {
|
||||
.or(cfg.review_model)
|
||||
.unwrap_or_else(default_review_model);
|
||||
|
||||
if features.enabled(Feature::ApproveAll) {
|
||||
approval_policy = AskForApproval::OnRequest;
|
||||
}
|
||||
|
||||
let config = Self {
|
||||
model,
|
||||
review_model,
|
||||
@@ -1711,26 +1707,6 @@ trust_level = "trusted"
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn approve_all_feature_forces_on_request_policy() -> std::io::Result<()> {
|
||||
let cfg = r#"
|
||||
[features]
|
||||
approve_all = true
|
||||
"#;
|
||||
let parsed = toml::from_str::<ConfigToml>(cfg)
|
||||
.expect("TOML deserialization should succeed for approve_all feature");
|
||||
let temp_dir = TempDir::new()?;
|
||||
let config = Config::load_from_base_config_with_overrides(
|
||||
parsed,
|
||||
ConfigOverrides::default(),
|
||||
temp_dir.path().to_path_buf(),
|
||||
)?;
|
||||
|
||||
assert!(config.features.enabled(Feature::ApproveAll));
|
||||
assert_eq!(config.approval_policy, AskForApproval::OnRequest);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn config_defaults_to_auto_oauth_store_mode() -> std::io::Result<()> {
|
||||
let codex_home = TempDir::new()?;
|
||||
|
||||
970
codex-rs/core/src/context_manager/manager.rs
Normal file
970
codex-rs/core/src/context_manager/manager.rs
Normal file
@@ -0,0 +1,970 @@
|
||||
use crate::context_manager::truncation::truncate_context_output;
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::TokenUsage;
|
||||
use codex_protocol::protocol::TokenUsageInfo;
|
||||
use tracing::error;
|
||||
|
||||
/// Transcript of conversation history
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub(crate) struct ContextManager {
|
||||
/// The oldest items are at the beginning of the vector.
|
||||
items: Vec<ResponseItem>,
|
||||
token_info: Option<TokenUsageInfo>,
|
||||
}
|
||||
|
||||
impl ContextManager {
|
||||
pub(crate) fn new() -> Self {
|
||||
Self {
|
||||
items: Vec::new(),
|
||||
token_info: TokenUsageInfo::new_or_append(&None, &None, None),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn token_info(&self) -> Option<TokenUsageInfo> {
|
||||
self.token_info.clone()
|
||||
}
|
||||
|
||||
pub(crate) fn set_token_usage_full(&mut self, context_window: i64) {
|
||||
match &mut self.token_info {
|
||||
Some(info) => info.fill_to_context_window(context_window),
|
||||
None => {
|
||||
self.token_info = Some(TokenUsageInfo::full_context_window(context_window));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// `items` is ordered from oldest to newest.
|
||||
pub(crate) fn record_items<I>(&mut self, items: I)
|
||||
where
|
||||
I: IntoIterator,
|
||||
I::Item: std::ops::Deref<Target = ResponseItem>,
|
||||
{
|
||||
for item in items {
|
||||
if !is_api_message(&item) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let processed = Self::process_item(&item);
|
||||
self.items.push(processed);
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn get_history(&mut self) -> Vec<ResponseItem> {
|
||||
self.normalize_history();
|
||||
self.contents()
|
||||
}
|
||||
|
||||
pub(crate) fn remove_first_item(&mut self) {
|
||||
if !self.items.is_empty() {
|
||||
// Remove the oldest item (front of the list). Items are ordered from
|
||||
// oldest → newest, so index 0 is the first entry recorded.
|
||||
let removed = self.items.remove(0);
|
||||
// If the removed item participates in a call/output pair, also remove
|
||||
// its corresponding counterpart to keep the invariants intact without
|
||||
// running a full normalization pass.
|
||||
self.remove_corresponding_for(&removed);
|
||||
}
|
||||
}
|
||||
|
||||
/// This function enforces a couple of invariants on the in-memory history:
|
||||
/// 1. every call (function/custom) has a corresponding output entry
|
||||
/// 2. every output has a corresponding call entry
|
||||
fn normalize_history(&mut self) {
|
||||
// all function/tool calls must have a corresponding output
|
||||
self.ensure_call_outputs_present();
|
||||
|
||||
// all outputs must have a corresponding function/tool call
|
||||
self.remove_orphan_outputs();
|
||||
}
|
||||
|
||||
fn process_item(item: &ResponseItem) -> ResponseItem {
|
||||
match item {
|
||||
ResponseItem::FunctionCallOutput { call_id, output } => {
|
||||
let truncated_content = truncate_context_output(output.content.as_str());
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: truncated_content,
|
||||
success: output.success,
|
||||
},
|
||||
}
|
||||
}
|
||||
ResponseItem::CustomToolCallOutput { call_id, output } => {
|
||||
let truncated = truncate_context_output(output);
|
||||
ResponseItem::CustomToolCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: truncated,
|
||||
}
|
||||
}
|
||||
_ => item.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a clone of the contents in the transcript.
|
||||
fn contents(&self) -> Vec<ResponseItem> {
|
||||
self.items.clone()
|
||||
}
|
||||
|
||||
fn ensure_call_outputs_present(&mut self) {
|
||||
// Collect synthetic outputs to insert immediately after their calls.
|
||||
// Store the insertion position (index of call) alongside the item so
|
||||
// we can insert in reverse order and avoid index shifting.
|
||||
let mut missing_outputs_to_insert: Vec<(usize, ResponseItem)> = Vec::new();
|
||||
|
||||
for (idx, item) in self.items.iter().enumerate() {
|
||||
match item {
|
||||
ResponseItem::FunctionCall { call_id, .. } => {
|
||||
let has_output = self.items.iter().any(|i| match i {
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: existing, ..
|
||||
} => existing == call_id,
|
||||
_ => false,
|
||||
});
|
||||
|
||||
if !has_output {
|
||||
error_or_panic(format!(
|
||||
"Function call output is missing for call id: {call_id}"
|
||||
));
|
||||
missing_outputs_to_insert.push((
|
||||
idx,
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: truncate_context_output("aborted"),
|
||||
success: None,
|
||||
},
|
||||
},
|
||||
));
|
||||
}
|
||||
}
|
||||
ResponseItem::CustomToolCall { call_id, .. } => {
|
||||
let has_output = self.items.iter().any(|i| match i {
|
||||
ResponseItem::CustomToolCallOutput {
|
||||
call_id: existing, ..
|
||||
} => existing == call_id,
|
||||
_ => false,
|
||||
});
|
||||
|
||||
if !has_output {
|
||||
error_or_panic(format!(
|
||||
"Custom tool call output is missing for call id: {call_id}"
|
||||
));
|
||||
missing_outputs_to_insert.push((
|
||||
idx,
|
||||
ResponseItem::CustomToolCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: truncate_context_output("aborted"),
|
||||
},
|
||||
));
|
||||
}
|
||||
}
|
||||
// LocalShellCall is represented in upstream streams by a FunctionCallOutput
|
||||
ResponseItem::LocalShellCall { call_id, .. } => {
|
||||
if let Some(call_id) = call_id.as_ref() {
|
||||
let has_output = self.items.iter().any(|i| match i {
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: existing, ..
|
||||
} => existing == call_id,
|
||||
_ => false,
|
||||
});
|
||||
|
||||
if !has_output {
|
||||
error_or_panic(format!(
|
||||
"Local shell call output is missing for call id: {call_id}"
|
||||
));
|
||||
missing_outputs_to_insert.push((
|
||||
idx,
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: truncate_context_output("aborted"),
|
||||
success: None,
|
||||
},
|
||||
},
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
ResponseItem::Reasoning { .. }
|
||||
| ResponseItem::WebSearchCall { .. }
|
||||
| ResponseItem::FunctionCallOutput { .. }
|
||||
| ResponseItem::CustomToolCallOutput { .. }
|
||||
| ResponseItem::Other
|
||||
| ResponseItem::Message { .. } => {
|
||||
// nothing to do for these variants
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !missing_outputs_to_insert.is_empty() {
|
||||
// Insert from the end to avoid shifting subsequent indices.
|
||||
missing_outputs_to_insert.sort_by_key(|(i, _)| *i);
|
||||
for (idx, item) in missing_outputs_to_insert.into_iter().rev() {
|
||||
let insert_pos = idx + 1; // place immediately after the call
|
||||
if insert_pos <= self.items.len() {
|
||||
self.items.insert(insert_pos, item);
|
||||
} else {
|
||||
self.items.push(item);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn remove_orphan_outputs(&mut self) {
|
||||
// Work on a snapshot to avoid borrowing `self.items` while mutating it.
|
||||
let snapshot = self.items.clone();
|
||||
let mut orphan_output_call_ids: std::collections::HashSet<String> =
|
||||
std::collections::HashSet::new();
|
||||
|
||||
for item in &snapshot {
|
||||
match item {
|
||||
ResponseItem::FunctionCallOutput { call_id, .. } => {
|
||||
let has_call = snapshot.iter().any(|i| match i {
|
||||
ResponseItem::FunctionCall {
|
||||
call_id: existing, ..
|
||||
} => existing == call_id,
|
||||
ResponseItem::LocalShellCall {
|
||||
call_id: Some(existing),
|
||||
..
|
||||
} => existing == call_id,
|
||||
_ => false,
|
||||
});
|
||||
|
||||
if !has_call {
|
||||
error_or_panic(format!("Function call is missing for call id: {call_id}"));
|
||||
orphan_output_call_ids.insert(call_id.clone());
|
||||
}
|
||||
}
|
||||
ResponseItem::CustomToolCallOutput { call_id, .. } => {
|
||||
let has_call = snapshot.iter().any(|i| match i {
|
||||
ResponseItem::CustomToolCall {
|
||||
call_id: existing, ..
|
||||
} => existing == call_id,
|
||||
_ => false,
|
||||
});
|
||||
|
||||
if !has_call {
|
||||
error_or_panic(format!(
|
||||
"Custom tool call is missing for call id: {call_id}"
|
||||
));
|
||||
orphan_output_call_ids.insert(call_id.clone());
|
||||
}
|
||||
}
|
||||
ResponseItem::FunctionCall { .. }
|
||||
| ResponseItem::CustomToolCall { .. }
|
||||
| ResponseItem::LocalShellCall { .. }
|
||||
| ResponseItem::Reasoning { .. }
|
||||
| ResponseItem::WebSearchCall { .. }
|
||||
| ResponseItem::Other
|
||||
| ResponseItem::Message { .. } => {
|
||||
// nothing to do for these variants
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !orphan_output_call_ids.is_empty() {
|
||||
let ids = orphan_output_call_ids;
|
||||
self.items.retain(|i| match i {
|
||||
ResponseItem::FunctionCallOutput { call_id, .. }
|
||||
| ResponseItem::CustomToolCallOutput { call_id, .. } => !ids.contains(call_id),
|
||||
_ => true,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn replace(&mut self, items: Vec<ResponseItem>) {
|
||||
self.items = items
|
||||
.into_iter()
|
||||
.map(|item| Self::process_item(&item))
|
||||
.collect();
|
||||
}
|
||||
|
||||
/// Removes the corresponding paired item for the provided `item`, if any.
|
||||
///
|
||||
/// Pairs:
|
||||
/// - FunctionCall <-> FunctionCallOutput
|
||||
/// - CustomToolCall <-> CustomToolCallOutput
|
||||
/// - LocalShellCall(call_id: Some) <-> FunctionCallOutput
|
||||
fn remove_corresponding_for(&mut self, item: &ResponseItem) {
|
||||
match item {
|
||||
ResponseItem::FunctionCall { call_id, .. } => {
|
||||
self.remove_first_matching(|i| match i {
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: existing, ..
|
||||
} => existing == call_id,
|
||||
_ => false,
|
||||
});
|
||||
}
|
||||
ResponseItem::CustomToolCall { call_id, .. } => {
|
||||
self.remove_first_matching(|i| match i {
|
||||
ResponseItem::CustomToolCallOutput {
|
||||
call_id: existing, ..
|
||||
} => existing == call_id,
|
||||
_ => false,
|
||||
});
|
||||
}
|
||||
ResponseItem::LocalShellCall {
|
||||
call_id: Some(call_id),
|
||||
..
|
||||
} => {
|
||||
self.remove_first_matching(|i| match i {
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: existing, ..
|
||||
} => existing == call_id,
|
||||
_ => false,
|
||||
});
|
||||
}
|
||||
ResponseItem::FunctionCallOutput { call_id, .. } => {
|
||||
self.remove_first_matching(|i| match i {
|
||||
ResponseItem::FunctionCall {
|
||||
call_id: existing, ..
|
||||
} => existing == call_id,
|
||||
ResponseItem::LocalShellCall {
|
||||
call_id: Some(existing),
|
||||
..
|
||||
} => existing == call_id,
|
||||
_ => false,
|
||||
});
|
||||
}
|
||||
ResponseItem::CustomToolCallOutput { call_id, .. } => {
|
||||
self.remove_first_matching(|i| match i {
|
||||
ResponseItem::CustomToolCall {
|
||||
call_id: existing, ..
|
||||
} => existing == call_id,
|
||||
_ => false,
|
||||
});
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove the first item matching the predicate.
|
||||
fn remove_first_matching<F>(&mut self, predicate: F)
|
||||
where
|
||||
F: FnMut(&ResponseItem) -> bool,
|
||||
{
|
||||
if let Some(pos) = self.items.iter().position(predicate) {
|
||||
self.items.remove(pos);
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn update_token_info(
|
||||
&mut self,
|
||||
usage: &TokenUsage,
|
||||
model_context_window: Option<i64>,
|
||||
) {
|
||||
self.token_info = TokenUsageInfo::new_or_append(
|
||||
&self.token_info,
|
||||
&Some(usage.clone()),
|
||||
model_context_window,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn error_or_panic(message: String) {
|
||||
if cfg!(debug_assertions) || env!("CARGO_PKG_VERSION").contains("alpha") {
|
||||
panic!("{message}");
|
||||
} else {
|
||||
error!("{message}");
|
||||
}
|
||||
}
|
||||
|
||||
/// Anything that is not a system message or "reasoning" message is considered
|
||||
/// an API message.
|
||||
fn is_api_message(message: &ResponseItem) -> bool {
|
||||
match message {
|
||||
ResponseItem::Message { role, .. } => role.as_str() != "system",
|
||||
ResponseItem::FunctionCallOutput { .. }
|
||||
| ResponseItem::FunctionCall { .. }
|
||||
| ResponseItem::CustomToolCall { .. }
|
||||
| ResponseItem::CustomToolCallOutput { .. }
|
||||
| ResponseItem::LocalShellCall { .. }
|
||||
| ResponseItem::Reasoning { .. }
|
||||
| ResponseItem::WebSearchCall { .. } => true,
|
||||
ResponseItem::Other => false,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::context_manager::truncation::TELEMETRY_PREVIEW_MAX_BYTES;
|
||||
use crate::context_manager::truncation::TELEMETRY_PREVIEW_TRUNCATION_NOTICE;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
use codex_protocol::models::LocalShellAction;
|
||||
use codex_protocol::models::LocalShellExecAction;
|
||||
use codex_protocol::models::LocalShellStatus;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
fn assistant_msg(text: &str) -> ResponseItem {
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "assistant".to_string(),
|
||||
content: vec![ContentItem::OutputText {
|
||||
text: text.to_string(),
|
||||
}],
|
||||
}
|
||||
}
|
||||
|
||||
fn create_history_with_items(items: Vec<ResponseItem>) -> ContextManager {
|
||||
let mut h = ContextManager::new();
|
||||
h.record_items(items.iter());
|
||||
h
|
||||
}
|
||||
|
||||
fn user_msg(text: &str) -> ResponseItem {
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::OutputText {
|
||||
text: text.to_string(),
|
||||
}],
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filters_non_api_messages() {
|
||||
let mut h = ContextManager::default();
|
||||
// System message is not an API message; Other is ignored.
|
||||
let system = ResponseItem::Message {
|
||||
id: None,
|
||||
role: "system".to_string(),
|
||||
content: vec![ContentItem::OutputText {
|
||||
text: "ignored".to_string(),
|
||||
}],
|
||||
};
|
||||
h.record_items([&system, &ResponseItem::Other]);
|
||||
|
||||
// User and assistant should be retained.
|
||||
let u = user_msg("hi");
|
||||
let a = assistant_msg("hello");
|
||||
h.record_items([&u, &a]);
|
||||
|
||||
let items = h.contents();
|
||||
assert_eq!(
|
||||
items,
|
||||
vec![
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::OutputText {
|
||||
text: "hi".to_string()
|
||||
}]
|
||||
},
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "assistant".to_string(),
|
||||
content: vec![ContentItem::OutputText {
|
||||
text: "hello".to_string()
|
||||
}]
|
||||
}
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn record_items_truncates_function_call_output() {
|
||||
let mut h = ContextManager::new();
|
||||
let long_content = "a".repeat(TELEMETRY_PREVIEW_MAX_BYTES + 32);
|
||||
let item = ResponseItem::FunctionCallOutput {
|
||||
call_id: "call-long".to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: long_content.clone(),
|
||||
success: Some(true),
|
||||
},
|
||||
};
|
||||
|
||||
h.record_items([&item]);
|
||||
|
||||
let stored = h.contents();
|
||||
let ResponseItem::FunctionCallOutput { output, .. } = &stored[0] else {
|
||||
panic!("expected FunctionCallOutput variant");
|
||||
};
|
||||
assert!(
|
||||
output
|
||||
.content
|
||||
.ends_with(TELEMETRY_PREVIEW_TRUNCATION_NOTICE),
|
||||
"truncated content should end with notice"
|
||||
);
|
||||
assert!(
|
||||
output.content.len() < long_content.len(),
|
||||
"content should shrink after truncation"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn record_items_truncates_custom_tool_output() {
|
||||
let mut h = ContextManager::new();
|
||||
let long_content = "b".repeat(TELEMETRY_PREVIEW_MAX_BYTES + 64);
|
||||
let item = ResponseItem::CustomToolCallOutput {
|
||||
call_id: "custom-long".to_string(),
|
||||
output: long_content.clone(),
|
||||
};
|
||||
|
||||
h.record_items([&item]);
|
||||
|
||||
let stored = h.contents();
|
||||
let ResponseItem::CustomToolCallOutput { output, .. } = &stored[0] else {
|
||||
panic!("expected CustomToolCallOutput variant");
|
||||
};
|
||||
assert!(
|
||||
output.ends_with(TELEMETRY_PREVIEW_TRUNCATION_NOTICE),
|
||||
"truncated output should end with notice"
|
||||
);
|
||||
assert!(
|
||||
output.len() < long_content.len(),
|
||||
"output should shrink after truncation"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn remove_first_item_removes_matching_output_for_function_call() {
|
||||
let items = vec![
|
||||
ResponseItem::FunctionCall {
|
||||
id: None,
|
||||
name: "do_it".to_string(),
|
||||
arguments: "{}".to_string(),
|
||||
call_id: "call-1".to_string(),
|
||||
},
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: "call-1".to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: "ok".to_string(),
|
||||
success: None,
|
||||
},
|
||||
},
|
||||
];
|
||||
let mut h = create_history_with_items(items);
|
||||
h.remove_first_item();
|
||||
assert_eq!(h.contents(), vec![]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn remove_first_item_removes_matching_call_for_output() {
|
||||
let items = vec![
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: "call-2".to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: "ok".to_string(),
|
||||
success: None,
|
||||
},
|
||||
},
|
||||
ResponseItem::FunctionCall {
|
||||
id: None,
|
||||
name: "do_it".to_string(),
|
||||
arguments: "{}".to_string(),
|
||||
call_id: "call-2".to_string(),
|
||||
},
|
||||
];
|
||||
let mut h = create_history_with_items(items);
|
||||
h.remove_first_item();
|
||||
assert_eq!(h.contents(), vec![]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn remove_first_item_handles_local_shell_pair() {
|
||||
let items = vec![
|
||||
ResponseItem::LocalShellCall {
|
||||
id: None,
|
||||
call_id: Some("call-3".to_string()),
|
||||
status: LocalShellStatus::Completed,
|
||||
action: LocalShellAction::Exec(LocalShellExecAction {
|
||||
command: vec!["echo".to_string(), "hi".to_string()],
|
||||
timeout_ms: None,
|
||||
working_directory: None,
|
||||
env: None,
|
||||
user: None,
|
||||
}),
|
||||
},
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: "call-3".to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: "ok".to_string(),
|
||||
success: None,
|
||||
},
|
||||
},
|
||||
];
|
||||
let mut h = create_history_with_items(items);
|
||||
h.remove_first_item();
|
||||
assert_eq!(h.contents(), vec![]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn remove_first_item_handles_custom_tool_pair() {
|
||||
let items = vec![
|
||||
ResponseItem::CustomToolCall {
|
||||
id: None,
|
||||
status: None,
|
||||
call_id: "tool-1".to_string(),
|
||||
name: "my_tool".to_string(),
|
||||
input: "{}".to_string(),
|
||||
},
|
||||
ResponseItem::CustomToolCallOutput {
|
||||
call_id: "tool-1".to_string(),
|
||||
output: "ok".to_string(),
|
||||
},
|
||||
];
|
||||
let mut h = create_history_with_items(items);
|
||||
h.remove_first_item();
|
||||
assert_eq!(h.contents(), vec![]);
|
||||
}
|
||||
|
||||
//TODO(aibrahim): run CI in release mode.
|
||||
#[cfg(not(debug_assertions))]
|
||||
#[test]
|
||||
fn normalize_adds_missing_output_for_function_call() {
|
||||
let items = vec![ResponseItem::FunctionCall {
|
||||
id: None,
|
||||
name: "do_it".to_string(),
|
||||
arguments: "{}".to_string(),
|
||||
call_id: "call-x".to_string(),
|
||||
}];
|
||||
let mut h = create_history_with_items(items);
|
||||
|
||||
h.normalize_history();
|
||||
|
||||
assert_eq!(
|
||||
h.contents(),
|
||||
vec![
|
||||
ResponseItem::FunctionCall {
|
||||
id: None,
|
||||
name: "do_it".to_string(),
|
||||
arguments: "{}".to_string(),
|
||||
call_id: "call-x".to_string(),
|
||||
},
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: "call-x".to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: "aborted".to_string(),
|
||||
success: None,
|
||||
},
|
||||
},
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
#[test]
|
||||
fn normalize_adds_missing_output_for_custom_tool_call() {
|
||||
let items = vec![ResponseItem::CustomToolCall {
|
||||
id: None,
|
||||
status: None,
|
||||
call_id: "tool-x".to_string(),
|
||||
name: "custom".to_string(),
|
||||
input: "{}".to_string(),
|
||||
}];
|
||||
let mut h = create_history_with_items(items);
|
||||
|
||||
h.normalize_history();
|
||||
|
||||
assert_eq!(
|
||||
h.contents(),
|
||||
vec![
|
||||
ResponseItem::CustomToolCall {
|
||||
id: None,
|
||||
status: None,
|
||||
call_id: "tool-x".to_string(),
|
||||
name: "custom".to_string(),
|
||||
input: "{}".to_string(),
|
||||
},
|
||||
ResponseItem::CustomToolCallOutput {
|
||||
call_id: "tool-x".to_string(),
|
||||
output: "aborted".to_string(),
|
||||
},
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
#[test]
|
||||
fn normalize_adds_missing_output_for_local_shell_call_with_id() {
|
||||
let items = vec![ResponseItem::LocalShellCall {
|
||||
id: None,
|
||||
call_id: Some("shell-1".to_string()),
|
||||
status: LocalShellStatus::Completed,
|
||||
action: LocalShellAction::Exec(LocalShellExecAction {
|
||||
command: vec!["echo".to_string(), "hi".to_string()],
|
||||
timeout_ms: None,
|
||||
working_directory: None,
|
||||
env: None,
|
||||
user: None,
|
||||
}),
|
||||
}];
|
||||
let mut h = create_history_with_items(items);
|
||||
|
||||
h.normalize_history();
|
||||
|
||||
assert_eq!(
|
||||
h.contents(),
|
||||
vec![
|
||||
ResponseItem::LocalShellCall {
|
||||
id: None,
|
||||
call_id: Some("shell-1".to_string()),
|
||||
status: LocalShellStatus::Completed,
|
||||
action: LocalShellAction::Exec(LocalShellExecAction {
|
||||
command: vec!["echo".to_string(), "hi".to_string()],
|
||||
timeout_ms: None,
|
||||
working_directory: None,
|
||||
env: None,
|
||||
user: None,
|
||||
}),
|
||||
},
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: "shell-1".to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: "aborted".to_string(),
|
||||
success: None,
|
||||
},
|
||||
},
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
#[test]
|
||||
fn normalize_removes_orphan_function_call_output() {
|
||||
let items = vec![ResponseItem::FunctionCallOutput {
|
||||
call_id: "orphan-1".to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: "ok".to_string(),
|
||||
success: None,
|
||||
},
|
||||
}];
|
||||
let mut h = create_history_with_items(items);
|
||||
|
||||
h.normalize_history();
|
||||
|
||||
assert_eq!(h.contents(), vec![]);
|
||||
}
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
#[test]
|
||||
fn normalize_removes_orphan_custom_tool_call_output() {
|
||||
let items = vec![ResponseItem::CustomToolCallOutput {
|
||||
call_id: "orphan-2".to_string(),
|
||||
output: "ok".to_string(),
|
||||
}];
|
||||
let mut h = create_history_with_items(items);
|
||||
|
||||
h.normalize_history();
|
||||
|
||||
assert_eq!(h.contents(), vec![]);
|
||||
}
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
#[test]
|
||||
fn normalize_mixed_inserts_and_removals() {
|
||||
let items = vec![
|
||||
// Will get an inserted output
|
||||
ResponseItem::FunctionCall {
|
||||
id: None,
|
||||
name: "f1".to_string(),
|
||||
arguments: "{}".to_string(),
|
||||
call_id: "c1".to_string(),
|
||||
},
|
||||
// Orphan output that should be removed
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: "c2".to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: "ok".to_string(),
|
||||
success: None,
|
||||
},
|
||||
},
|
||||
// Will get an inserted custom tool output
|
||||
ResponseItem::CustomToolCall {
|
||||
id: None,
|
||||
status: None,
|
||||
call_id: "t1".to_string(),
|
||||
name: "tool".to_string(),
|
||||
input: "{}".to_string(),
|
||||
},
|
||||
// Local shell call also gets an inserted function call output
|
||||
ResponseItem::LocalShellCall {
|
||||
id: None,
|
||||
call_id: Some("s1".to_string()),
|
||||
status: LocalShellStatus::Completed,
|
||||
action: LocalShellAction::Exec(LocalShellExecAction {
|
||||
command: vec!["echo".to_string()],
|
||||
timeout_ms: None,
|
||||
working_directory: None,
|
||||
env: None,
|
||||
user: None,
|
||||
}),
|
||||
},
|
||||
];
|
||||
let mut h = create_history_with_items(items);
|
||||
|
||||
h.normalize_history();
|
||||
|
||||
assert_eq!(
|
||||
h.contents(),
|
||||
vec![
|
||||
ResponseItem::FunctionCall {
|
||||
id: None,
|
||||
name: "f1".to_string(),
|
||||
arguments: "{}".to_string(),
|
||||
call_id: "c1".to_string(),
|
||||
},
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: "c1".to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: "aborted".to_string(),
|
||||
success: None,
|
||||
},
|
||||
},
|
||||
ResponseItem::CustomToolCall {
|
||||
id: None,
|
||||
status: None,
|
||||
call_id: "t1".to_string(),
|
||||
name: "tool".to_string(),
|
||||
input: "{}".to_string(),
|
||||
},
|
||||
ResponseItem::CustomToolCallOutput {
|
||||
call_id: "t1".to_string(),
|
||||
output: "aborted".to_string(),
|
||||
},
|
||||
ResponseItem::LocalShellCall {
|
||||
id: None,
|
||||
call_id: Some("s1".to_string()),
|
||||
status: LocalShellStatus::Completed,
|
||||
action: LocalShellAction::Exec(LocalShellExecAction {
|
||||
command: vec!["echo".to_string()],
|
||||
timeout_ms: None,
|
||||
working_directory: None,
|
||||
env: None,
|
||||
user: None,
|
||||
}),
|
||||
},
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: "s1".to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: "aborted".to_string(),
|
||||
success: None,
|
||||
},
|
||||
},
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
// In debug builds we panic on normalization errors instead of silently fixing them.
|
||||
#[cfg(debug_assertions)]
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn normalize_adds_missing_output_for_function_call_panics_in_debug() {
|
||||
let items = vec![ResponseItem::FunctionCall {
|
||||
id: None,
|
||||
name: "do_it".to_string(),
|
||||
arguments: "{}".to_string(),
|
||||
call_id: "call-x".to_string(),
|
||||
}];
|
||||
let mut h = create_history_with_items(items);
|
||||
h.normalize_history();
|
||||
}
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn normalize_adds_missing_output_for_custom_tool_call_panics_in_debug() {
|
||||
let items = vec![ResponseItem::CustomToolCall {
|
||||
id: None,
|
||||
status: None,
|
||||
call_id: "tool-x".to_string(),
|
||||
name: "custom".to_string(),
|
||||
input: "{}".to_string(),
|
||||
}];
|
||||
let mut h = create_history_with_items(items);
|
||||
h.normalize_history();
|
||||
}
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn normalize_adds_missing_output_for_local_shell_call_with_id_panics_in_debug() {
|
||||
let items = vec![ResponseItem::LocalShellCall {
|
||||
id: None,
|
||||
call_id: Some("shell-1".to_string()),
|
||||
status: LocalShellStatus::Completed,
|
||||
action: LocalShellAction::Exec(LocalShellExecAction {
|
||||
command: vec!["echo".to_string(), "hi".to_string()],
|
||||
timeout_ms: None,
|
||||
working_directory: None,
|
||||
env: None,
|
||||
user: None,
|
||||
}),
|
||||
}];
|
||||
let mut h = create_history_with_items(items);
|
||||
h.normalize_history();
|
||||
}
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn normalize_removes_orphan_function_call_output_panics_in_debug() {
|
||||
let items = vec![ResponseItem::FunctionCallOutput {
|
||||
call_id: "orphan-1".to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: "ok".to_string(),
|
||||
success: None,
|
||||
},
|
||||
}];
|
||||
let mut h = create_history_with_items(items);
|
||||
h.normalize_history();
|
||||
}
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn normalize_removes_orphan_custom_tool_call_output_panics_in_debug() {
|
||||
let items = vec![ResponseItem::CustomToolCallOutput {
|
||||
call_id: "orphan-2".to_string(),
|
||||
output: "ok".to_string(),
|
||||
}];
|
||||
let mut h = create_history_with_items(items);
|
||||
h.normalize_history();
|
||||
}
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn normalize_mixed_inserts_and_removals_panics_in_debug() {
|
||||
let items = vec![
|
||||
ResponseItem::FunctionCall {
|
||||
id: None,
|
||||
name: "f1".to_string(),
|
||||
arguments: "{}".to_string(),
|
||||
call_id: "c1".to_string(),
|
||||
},
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: "c2".to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: "ok".to_string(),
|
||||
success: None,
|
||||
},
|
||||
},
|
||||
ResponseItem::CustomToolCall {
|
||||
id: None,
|
||||
status: None,
|
||||
call_id: "t1".to_string(),
|
||||
name: "tool".to_string(),
|
||||
input: "{}".to_string(),
|
||||
},
|
||||
ResponseItem::LocalShellCall {
|
||||
id: None,
|
||||
call_id: Some("s1".to_string()),
|
||||
status: LocalShellStatus::Completed,
|
||||
action: LocalShellAction::Exec(LocalShellExecAction {
|
||||
command: vec!["echo".to_string()],
|
||||
timeout_ms: None,
|
||||
working_directory: None,
|
||||
env: None,
|
||||
user: None,
|
||||
}),
|
||||
},
|
||||
];
|
||||
let mut h = create_history_with_items(items);
|
||||
h.normalize_history();
|
||||
}
|
||||
}
|
||||
3
codex-rs/core/src/context_manager/mod.rs
Normal file
3
codex-rs/core/src/context_manager/mod.rs
Normal file
@@ -0,0 +1,3 @@
|
||||
mod manager;
|
||||
pub(crate) use manager::ContextManager;
|
||||
pub mod truncation;
|
||||
159
codex-rs/core/src/context_manager/truncation.rs
Normal file
159
codex-rs/core/src/context_manager/truncation.rs
Normal file
@@ -0,0 +1,159 @@
|
||||
use codex_utils_string::take_bytes_at_char_boundary;
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub(crate) struct TruncationConfig {
|
||||
pub max_bytes: usize,
|
||||
pub max_lines: usize,
|
||||
pub truncation_notice: &'static str,
|
||||
}
|
||||
|
||||
// Telemetry preview limits: keep log events smaller than model budgets.
|
||||
pub(crate) const TELEMETRY_PREVIEW_MAX_BYTES: usize = 2 * 1024; // 2 KiB
|
||||
pub(crate) const TELEMETRY_PREVIEW_MAX_LINES: usize = 64; // lines
|
||||
pub(crate) const TELEMETRY_PREVIEW_TRUNCATION_NOTICE: &str =
|
||||
"[... telemetry preview truncated ...]";
|
||||
|
||||
pub(crate) const CONTEXT_OUTPUT_TRUNCATION: TruncationConfig = TruncationConfig {
|
||||
max_bytes: TELEMETRY_PREVIEW_MAX_BYTES,
|
||||
max_lines: TELEMETRY_PREVIEW_MAX_LINES,
|
||||
truncation_notice: TELEMETRY_PREVIEW_TRUNCATION_NOTICE,
|
||||
};
|
||||
|
||||
pub(crate) fn truncate_with_config(content: &str, config: TruncationConfig) -> String {
|
||||
let TruncationConfig {
|
||||
max_bytes,
|
||||
max_lines,
|
||||
truncation_notice,
|
||||
} = config;
|
||||
|
||||
let truncated_slice = take_bytes_at_char_boundary(content, max_bytes);
|
||||
let truncated_by_bytes = truncated_slice.len() < content.len();
|
||||
|
||||
let mut preview = String::new();
|
||||
let mut lines_iter = truncated_slice.lines();
|
||||
for idx in 0..max_lines {
|
||||
match lines_iter.next() {
|
||||
Some(line) => {
|
||||
if idx > 0 {
|
||||
preview.push('\n');
|
||||
}
|
||||
preview.push_str(line);
|
||||
}
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
let truncated_by_lines = lines_iter.next().is_some();
|
||||
|
||||
if !truncated_by_bytes && !truncated_by_lines {
|
||||
return content.to_string();
|
||||
}
|
||||
|
||||
if preview.len() < truncated_slice.len()
|
||||
&& truncated_slice
|
||||
.as_bytes()
|
||||
.get(preview.len())
|
||||
.is_some_and(|byte| *byte == b'\n')
|
||||
{
|
||||
preview.push('\n');
|
||||
}
|
||||
|
||||
if !preview.is_empty() && !preview.ends_with('\n') {
|
||||
preview.push('\n');
|
||||
}
|
||||
|
||||
preview.push_str(truncation_notice);
|
||||
preview
|
||||
}
|
||||
|
||||
pub(crate) fn truncate_context_output(content: &str) -> String {
|
||||
truncate_with_config(content, CONTEXT_OUTPUT_TRUNCATION)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
fn truncate_with_config_returns_original_within_limits() {
|
||||
let content = "short output";
|
||||
let config = TruncationConfig {
|
||||
max_bytes: 64,
|
||||
max_lines: 5,
|
||||
truncation_notice: "[notice]",
|
||||
};
|
||||
assert_eq!(truncate_with_config(content, config), content);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_with_config_truncates_by_bytes() {
|
||||
let config = TruncationConfig {
|
||||
max_bytes: 16,
|
||||
max_lines: 10,
|
||||
truncation_notice: "[notice]",
|
||||
};
|
||||
let content = "abcdefghijklmnopqrstuvwxyz";
|
||||
let truncated = truncate_with_config(content, config);
|
||||
assert!(truncated.contains("[notice]"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_with_config_truncates_by_lines() {
|
||||
let config = TruncationConfig {
|
||||
max_bytes: 1024,
|
||||
max_lines: 2,
|
||||
truncation_notice: "[notice]",
|
||||
};
|
||||
let content = "l1\nl2\nl3\nl4";
|
||||
let truncated = truncate_with_config(content, config);
|
||||
assert!(truncated.lines().count() <= 3);
|
||||
assert!(truncated.contains("[notice]"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn telemetry_preview_returns_original_within_limits() {
|
||||
let content = "short output";
|
||||
let config = TruncationConfig {
|
||||
max_bytes: TELEMETRY_PREVIEW_MAX_BYTES,
|
||||
max_lines: TELEMETRY_PREVIEW_MAX_LINES,
|
||||
truncation_notice: TELEMETRY_PREVIEW_TRUNCATION_NOTICE,
|
||||
};
|
||||
assert_eq!(truncate_with_config(content, config), content);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn telemetry_preview_truncates_by_bytes() {
|
||||
let config = TruncationConfig {
|
||||
max_bytes: TELEMETRY_PREVIEW_MAX_BYTES,
|
||||
max_lines: TELEMETRY_PREVIEW_MAX_LINES,
|
||||
truncation_notice: TELEMETRY_PREVIEW_TRUNCATION_NOTICE,
|
||||
};
|
||||
let content = "x".repeat(TELEMETRY_PREVIEW_MAX_BYTES + 8);
|
||||
let preview = truncate_with_config(&content, config);
|
||||
|
||||
assert!(preview.contains(TELEMETRY_PREVIEW_TRUNCATION_NOTICE));
|
||||
assert!(
|
||||
preview.len()
|
||||
<= TELEMETRY_PREVIEW_MAX_BYTES + TELEMETRY_PREVIEW_TRUNCATION_NOTICE.len() + 1
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn telemetry_preview_truncates_by_lines() {
|
||||
let config = TruncationConfig {
|
||||
max_bytes: TELEMETRY_PREVIEW_MAX_BYTES,
|
||||
max_lines: TELEMETRY_PREVIEW_MAX_LINES,
|
||||
truncation_notice: TELEMETRY_PREVIEW_TRUNCATION_NOTICE,
|
||||
};
|
||||
let content = (0..(TELEMETRY_PREVIEW_MAX_LINES + 5))
|
||||
.map(|idx| format!("line {idx}"))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
|
||||
let preview = truncate_with_config(&content, config);
|
||||
let lines: Vec<&str> = preview.lines().collect();
|
||||
|
||||
assert!(lines.len() <= TELEMETRY_PREVIEW_MAX_LINES + 1);
|
||||
assert_eq!(lines.last(), Some(&TELEMETRY_PREVIEW_TRUNCATION_NOTICE));
|
||||
}
|
||||
}
|
||||
@@ -1,120 +0,0 @@
|
||||
use codex_protocol::models::ResponseItem;
|
||||
|
||||
/// Transcript of conversation history
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub(crate) struct ConversationHistory {
|
||||
/// The oldest items are at the beginning of the vector.
|
||||
items: Vec<ResponseItem>,
|
||||
}
|
||||
|
||||
impl ConversationHistory {
|
||||
pub(crate) fn new() -> Self {
|
||||
Self { items: Vec::new() }
|
||||
}
|
||||
|
||||
/// Returns a clone of the contents in the transcript.
|
||||
pub(crate) fn contents(&self) -> Vec<ResponseItem> {
|
||||
self.items.clone()
|
||||
}
|
||||
|
||||
/// `items` is ordered from oldest to newest.
|
||||
pub(crate) fn record_items<I>(&mut self, items: I)
|
||||
where
|
||||
I: IntoIterator,
|
||||
I::Item: std::ops::Deref<Target = ResponseItem>,
|
||||
{
|
||||
for item in items {
|
||||
if !is_api_message(&item) {
|
||||
continue;
|
||||
}
|
||||
|
||||
self.items.push(item.clone());
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn replace(&mut self, items: Vec<ResponseItem>) {
|
||||
self.items = items;
|
||||
}
|
||||
}
|
||||
|
||||
/// Anything that is not a system message or "reasoning" message is considered
|
||||
/// an API message.
|
||||
fn is_api_message(message: &ResponseItem) -> bool {
|
||||
match message {
|
||||
ResponseItem::Message { role, .. } => role.as_str() != "system",
|
||||
ResponseItem::FunctionCallOutput { .. }
|
||||
| ResponseItem::FunctionCall { .. }
|
||||
| ResponseItem::CustomToolCall { .. }
|
||||
| ResponseItem::CustomToolCallOutput { .. }
|
||||
| ResponseItem::LocalShellCall { .. }
|
||||
| ResponseItem::Reasoning { .. }
|
||||
| ResponseItem::WebSearchCall { .. } => true,
|
||||
ResponseItem::Other => false,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use codex_protocol::models::ContentItem;
|
||||
|
||||
fn assistant_msg(text: &str) -> ResponseItem {
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "assistant".to_string(),
|
||||
content: vec![ContentItem::OutputText {
|
||||
text: text.to_string(),
|
||||
}],
|
||||
}
|
||||
}
|
||||
|
||||
fn user_msg(text: &str) -> ResponseItem {
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::OutputText {
|
||||
text: text.to_string(),
|
||||
}],
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filters_non_api_messages() {
|
||||
let mut h = ConversationHistory::default();
|
||||
// System message is not an API message; Other is ignored.
|
||||
let system = ResponseItem::Message {
|
||||
id: None,
|
||||
role: "system".to_string(),
|
||||
content: vec![ContentItem::OutputText {
|
||||
text: "ignored".to_string(),
|
||||
}],
|
||||
};
|
||||
h.record_items([&system, &ResponseItem::Other]);
|
||||
|
||||
// User and assistant should be retained.
|
||||
let u = user_msg("hi");
|
||||
let a = assistant_msg("hello");
|
||||
h.record_items([&u, &a]);
|
||||
|
||||
let items = h.contents();
|
||||
assert_eq!(
|
||||
items,
|
||||
vec![
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::OutputText {
|
||||
text: "hi".to_string()
|
||||
}]
|
||||
},
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "assistant".to_string(),
|
||||
content: vec![ContentItem::OutputText {
|
||||
text: "hello".to_string()
|
||||
}]
|
||||
}
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -3,8 +3,6 @@ use crate::CodexAuth;
|
||||
use crate::codex::Codex;
|
||||
use crate::codex::CodexSpawnOk;
|
||||
use crate::codex::INITIAL_SUBMIT_ID;
|
||||
use crate::codex::compact::content_items_to_text;
|
||||
use crate::codex::compact::is_session_prefix_message;
|
||||
use crate::codex_conversation::CodexConversation;
|
||||
use crate::config::Config;
|
||||
use crate::error::CodexErr;
|
||||
@@ -14,6 +12,7 @@ use crate::protocol::EventMsg;
|
||||
use crate::protocol::SessionConfiguredEvent;
|
||||
use crate::rollout::RolloutRecorder;
|
||||
use codex_protocol::ConversationId;
|
||||
use codex_protocol::items::TurnItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::InitialHistory;
|
||||
use codex_protocol::protocol::RolloutItem;
|
||||
@@ -182,9 +181,11 @@ fn truncate_before_nth_user_message(history: InitialHistory, n: usize) -> Initia
|
||||
// Find indices of user message inputs in rollout order.
|
||||
let mut user_positions: Vec<usize> = Vec::new();
|
||||
for (idx, item) in items.iter().enumerate() {
|
||||
if let RolloutItem::ResponseItem(ResponseItem::Message { role, content, .. }) = item
|
||||
&& role == "user"
|
||||
&& content_items_to_text(content).is_some_and(|text| !is_session_prefix_message(&text))
|
||||
if let RolloutItem::ResponseItem(item @ ResponseItem::Message { .. }) = item
|
||||
&& matches!(
|
||||
crate::event_mapping::parse_turn_item(item),
|
||||
Some(TurnItem::UserMessage(_))
|
||||
)
|
||||
{
|
||||
user_positions.push(idx);
|
||||
}
|
||||
|
||||
@@ -1,5 +1,13 @@
|
||||
use crate::spawn::CODEX_SANDBOX_ENV_VAR;
|
||||
use http::Error as HttpError;
|
||||
use reqwest::IntoUrl;
|
||||
use reqwest::Method;
|
||||
use reqwest::Response;
|
||||
use reqwest::header::HeaderName;
|
||||
use reqwest::header::HeaderValue;
|
||||
use serde::Serialize;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Display;
|
||||
use std::sync::LazyLock;
|
||||
use std::sync::Mutex;
|
||||
use std::sync::OnceLock;
|
||||
@@ -22,6 +30,130 @@ use std::sync::OnceLock;
|
||||
pub static USER_AGENT_SUFFIX: LazyLock<Mutex<Option<String>>> = LazyLock::new(|| Mutex::new(None));
|
||||
pub const DEFAULT_ORIGINATOR: &str = "codex_cli_rs";
|
||||
pub const CODEX_INTERNAL_ORIGINATOR_OVERRIDE_ENV_VAR: &str = "CODEX_INTERNAL_ORIGINATOR_OVERRIDE";
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct CodexHttpClient {
|
||||
inner: reqwest::Client,
|
||||
}
|
||||
|
||||
impl CodexHttpClient {
|
||||
fn new(inner: reqwest::Client) -> Self {
|
||||
Self { inner }
|
||||
}
|
||||
|
||||
pub fn get<U>(&self, url: U) -> CodexRequestBuilder
|
||||
where
|
||||
U: IntoUrl,
|
||||
{
|
||||
self.request(Method::GET, url)
|
||||
}
|
||||
|
||||
pub fn post<U>(&self, url: U) -> CodexRequestBuilder
|
||||
where
|
||||
U: IntoUrl,
|
||||
{
|
||||
self.request(Method::POST, url)
|
||||
}
|
||||
|
||||
pub fn request<U>(&self, method: Method, url: U) -> CodexRequestBuilder
|
||||
where
|
||||
U: IntoUrl,
|
||||
{
|
||||
let url_str = url.as_str().to_string();
|
||||
CodexRequestBuilder::new(self.inner.request(method.clone(), url), method, url_str)
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use = "requests are not sent unless `send` is awaited"]
|
||||
#[derive(Debug)]
|
||||
pub struct CodexRequestBuilder {
|
||||
builder: reqwest::RequestBuilder,
|
||||
method: Method,
|
||||
url: String,
|
||||
}
|
||||
|
||||
impl CodexRequestBuilder {
|
||||
fn new(builder: reqwest::RequestBuilder, method: Method, url: String) -> Self {
|
||||
Self {
|
||||
builder,
|
||||
method,
|
||||
url,
|
||||
}
|
||||
}
|
||||
|
||||
fn map(self, f: impl FnOnce(reqwest::RequestBuilder) -> reqwest::RequestBuilder) -> Self {
|
||||
Self {
|
||||
builder: f(self.builder),
|
||||
method: self.method,
|
||||
url: self.url,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn header<K, V>(self, key: K, value: V) -> Self
|
||||
where
|
||||
HeaderName: TryFrom<K>,
|
||||
<HeaderName as TryFrom<K>>::Error: Into<HttpError>,
|
||||
HeaderValue: TryFrom<V>,
|
||||
<HeaderValue as TryFrom<V>>::Error: Into<HttpError>,
|
||||
{
|
||||
self.map(|builder| builder.header(key, value))
|
||||
}
|
||||
|
||||
pub fn bearer_auth<T>(self, token: T) -> Self
|
||||
where
|
||||
T: Display,
|
||||
{
|
||||
self.map(|builder| builder.bearer_auth(token))
|
||||
}
|
||||
|
||||
pub fn json<T>(self, value: &T) -> Self
|
||||
where
|
||||
T: ?Sized + Serialize,
|
||||
{
|
||||
self.map(|builder| builder.json(value))
|
||||
}
|
||||
|
||||
pub async fn send(self) -> Result<Response, reqwest::Error> {
|
||||
match self.builder.send().await {
|
||||
Ok(response) => {
|
||||
let request_ids = Self::extract_request_ids(&response);
|
||||
tracing::debug!(
|
||||
method = %self.method,
|
||||
url = %self.url,
|
||||
status = %response.status(),
|
||||
request_ids = ?request_ids,
|
||||
version = ?response.version(),
|
||||
"Request completed"
|
||||
);
|
||||
|
||||
Ok(response)
|
||||
}
|
||||
Err(error) => {
|
||||
let status = error.status();
|
||||
tracing::debug!(
|
||||
method = %self.method,
|
||||
url = %self.url,
|
||||
status = status.map(|s| s.as_u16()),
|
||||
error = %error,
|
||||
"Request failed"
|
||||
);
|
||||
Err(error)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_request_ids(response: &Response) -> HashMap<String, String> {
|
||||
["cf-ray", "x-request-id", "x-oai-request-id"]
|
||||
.iter()
|
||||
.filter_map(|&name| {
|
||||
let header_name = HeaderName::from_static(name);
|
||||
let value = response.headers().get(header_name)?;
|
||||
let value = value.to_str().ok()?.to_owned();
|
||||
Some((name.to_owned(), value))
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Originator {
|
||||
pub value: String,
|
||||
@@ -124,8 +256,8 @@ fn sanitize_user_agent(candidate: String, fallback: &str) -> String {
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a reqwest client with default `originator` and `User-Agent` headers set.
|
||||
pub fn create_client() -> reqwest::Client {
|
||||
/// Create an HTTP client with default `originator` and `User-Agent` headers set.
|
||||
pub fn create_client() -> CodexHttpClient {
|
||||
use reqwest::header::HeaderMap;
|
||||
|
||||
let mut headers = HeaderMap::new();
|
||||
@@ -140,7 +272,8 @@ pub fn create_client() -> reqwest::Client {
|
||||
builder = builder.no_proxy();
|
||||
}
|
||||
|
||||
builder.build().unwrap_or_else(|_| reqwest::Client::new())
|
||||
let inner = builder.build().unwrap_or_else(|_| reqwest::Client::new());
|
||||
CodexHttpClient::new(inner)
|
||||
}
|
||||
|
||||
fn is_sandboxed() -> bool {
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use crate::codex::ProcessedResponseItem;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::token_data::KnownPlan;
|
||||
use crate::token_data::PlanType;
|
||||
@@ -53,8 +54,11 @@ pub enum SandboxErr {
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum CodexErr {
|
||||
// todo(aibrahim): git rid of this error carrying the dangling artifacts
|
||||
#[error("turn aborted")]
|
||||
TurnAborted,
|
||||
TurnAborted {
|
||||
dangling_artifacts: Vec<ProcessedResponseItem>,
|
||||
},
|
||||
|
||||
/// Returned by ResponsesClient when the SSE stream disconnects or errors out **after** the HTTP
|
||||
/// handshake has succeeded but **before** it finished emitting `response.completed`.
|
||||
@@ -158,7 +162,9 @@ pub enum CodexErr {
|
||||
|
||||
impl From<CancelErr> for CodexErr {
|
||||
fn from(_: CancelErr) -> Self {
|
||||
CodexErr::TurnAborted
|
||||
CodexErr::TurnAborted {
|
||||
dangling_artifacts: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,139 +1,131 @@
|
||||
use crate::protocol::AgentMessageEvent;
|
||||
use crate::protocol::AgentReasoningEvent;
|
||||
use crate::protocol::AgentReasoningRawContentEvent;
|
||||
use crate::protocol::EventMsg;
|
||||
use crate::protocol::InputMessageKind;
|
||||
use crate::protocol::UserMessageEvent;
|
||||
use crate::protocol::WebSearchEndEvent;
|
||||
use codex_protocol::items::AgentMessageContent;
|
||||
use codex_protocol::items::AgentMessageItem;
|
||||
use codex_protocol::items::ReasoningItem;
|
||||
use codex_protocol::items::TurnItem;
|
||||
use codex_protocol::items::UserMessageItem;
|
||||
use codex_protocol::items::WebSearchItem;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::ReasoningItemContent;
|
||||
use codex_protocol::models::ReasoningItemReasoningSummary;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::models::WebSearchAction;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use tracing::warn;
|
||||
|
||||
/// Convert a `ResponseItem` into zero or more `EventMsg` values that the UI can render.
|
||||
///
|
||||
/// When `show_raw_agent_reasoning` is false, raw reasoning content events are omitted.
|
||||
pub(crate) fn map_response_item_to_event_messages(
|
||||
item: &ResponseItem,
|
||||
show_raw_agent_reasoning: bool,
|
||||
) -> Vec<EventMsg> {
|
||||
fn is_session_prefix(text: &str) -> bool {
|
||||
let trimmed = text.trim_start();
|
||||
let lowered = trimmed.to_ascii_lowercase();
|
||||
lowered.starts_with("<environment_context>") || lowered.starts_with("<user_instructions>")
|
||||
}
|
||||
|
||||
fn parse_user_message(message: &[ContentItem]) -> Option<UserMessageItem> {
|
||||
let mut content: Vec<UserInput> = Vec::new();
|
||||
|
||||
for content_item in message.iter() {
|
||||
match content_item {
|
||||
ContentItem::InputText { text } => {
|
||||
if is_session_prefix(text) {
|
||||
return None;
|
||||
}
|
||||
content.push(UserInput::Text { text: text.clone() });
|
||||
}
|
||||
ContentItem::InputImage { image_url } => {
|
||||
content.push(UserInput::Image {
|
||||
image_url: image_url.clone(),
|
||||
});
|
||||
}
|
||||
ContentItem::OutputText { text } => {
|
||||
if is_session_prefix(text) {
|
||||
return None;
|
||||
}
|
||||
warn!("Output text in user message: {}", text);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Some(UserMessageItem::new(&content))
|
||||
}
|
||||
|
||||
fn parse_agent_message(message: &[ContentItem]) -> AgentMessageItem {
|
||||
let mut content: Vec<AgentMessageContent> = Vec::new();
|
||||
for content_item in message.iter() {
|
||||
match content_item {
|
||||
ContentItem::OutputText { text } => {
|
||||
content.push(AgentMessageContent::Text { text: text.clone() });
|
||||
}
|
||||
_ => {
|
||||
warn!(
|
||||
"Unexpected content item in agent message: {:?}",
|
||||
content_item
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
AgentMessageItem::new(&content)
|
||||
}
|
||||
|
||||
pub fn parse_turn_item(item: &ResponseItem) -> Option<TurnItem> {
|
||||
match item {
|
||||
ResponseItem::Message { role, content, .. } => {
|
||||
// Do not surface system messages as user events.
|
||||
if role == "system" {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let mut events: Vec<EventMsg> = Vec::new();
|
||||
let mut message_parts: Vec<String> = Vec::new();
|
||||
let mut images: Vec<String> = Vec::new();
|
||||
let mut kind: Option<InputMessageKind> = None;
|
||||
|
||||
for content_item in content.iter() {
|
||||
match content_item {
|
||||
ContentItem::InputText { text } => {
|
||||
if kind.is_none() {
|
||||
let trimmed = text.trim_start();
|
||||
kind = if trimmed.starts_with("<environment_context>") {
|
||||
Some(InputMessageKind::EnvironmentContext)
|
||||
} else if trimmed.starts_with("<user_instructions>") {
|
||||
Some(InputMessageKind::UserInstructions)
|
||||
} else {
|
||||
Some(InputMessageKind::Plain)
|
||||
};
|
||||
}
|
||||
message_parts.push(text.clone());
|
||||
}
|
||||
ContentItem::InputImage { image_url } => {
|
||||
images.push(image_url.clone());
|
||||
}
|
||||
ContentItem::OutputText { text } => {
|
||||
events.push(EventMsg::AgentMessage(AgentMessageEvent {
|
||||
message: text.clone(),
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !message_parts.is_empty() || !images.is_empty() {
|
||||
let message = if message_parts.is_empty() {
|
||||
String::new()
|
||||
} else {
|
||||
message_parts.join("")
|
||||
};
|
||||
let images = if images.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(images)
|
||||
};
|
||||
|
||||
events.push(EventMsg::UserMessage(UserMessageEvent {
|
||||
message,
|
||||
kind,
|
||||
images,
|
||||
}));
|
||||
}
|
||||
|
||||
events
|
||||
}
|
||||
|
||||
ResponseItem::Reasoning {
|
||||
summary, content, ..
|
||||
} => {
|
||||
let mut events = Vec::new();
|
||||
for ReasoningItemReasoningSummary::SummaryText { text } in summary {
|
||||
events.push(EventMsg::AgentReasoning(AgentReasoningEvent {
|
||||
text: text.clone(),
|
||||
}));
|
||||
}
|
||||
if let Some(items) = content.as_ref().filter(|_| show_raw_agent_reasoning) {
|
||||
for c in items {
|
||||
let text = match c {
|
||||
ReasoningItemContent::ReasoningText { text }
|
||||
| ReasoningItemContent::Text { text } => text,
|
||||
};
|
||||
events.push(EventMsg::AgentReasoningRawContent(
|
||||
AgentReasoningRawContentEvent { text: text.clone() },
|
||||
));
|
||||
}
|
||||
}
|
||||
events
|
||||
}
|
||||
|
||||
ResponseItem::WebSearchCall { id, action, .. } => match action {
|
||||
WebSearchAction::Search { query } => {
|
||||
let call_id = id.clone().unwrap_or_else(|| "".to_string());
|
||||
vec![EventMsg::WebSearchEnd(WebSearchEndEvent {
|
||||
call_id,
|
||||
query: query.clone(),
|
||||
})]
|
||||
}
|
||||
WebSearchAction::Other => Vec::new(),
|
||||
ResponseItem::Message { role, content, .. } => match role.as_str() {
|
||||
"user" => parse_user_message(content).map(TurnItem::UserMessage),
|
||||
"assistant" => Some(TurnItem::AgentMessage(parse_agent_message(content))),
|
||||
"system" => None,
|
||||
_ => None,
|
||||
},
|
||||
|
||||
// Variants that require side effects are handled by higher layers and do not emit events here.
|
||||
ResponseItem::FunctionCall { .. }
|
||||
| ResponseItem::FunctionCallOutput { .. }
|
||||
| ResponseItem::LocalShellCall { .. }
|
||||
| ResponseItem::CustomToolCall { .. }
|
||||
| ResponseItem::CustomToolCallOutput { .. }
|
||||
| ResponseItem::Other => Vec::new(),
|
||||
ResponseItem::Reasoning {
|
||||
id,
|
||||
summary,
|
||||
content,
|
||||
..
|
||||
} => {
|
||||
let summary_text = summary
|
||||
.iter()
|
||||
.map(|entry| match entry {
|
||||
ReasoningItemReasoningSummary::SummaryText { text } => text.clone(),
|
||||
})
|
||||
.collect();
|
||||
let raw_content = content
|
||||
.clone()
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.map(|entry| match entry {
|
||||
ReasoningItemContent::ReasoningText { text }
|
||||
| ReasoningItemContent::Text { text } => text,
|
||||
})
|
||||
.collect();
|
||||
Some(TurnItem::Reasoning(ReasoningItem {
|
||||
id: id.clone(),
|
||||
summary_text,
|
||||
raw_content,
|
||||
}))
|
||||
}
|
||||
ResponseItem::WebSearchCall {
|
||||
id,
|
||||
action: WebSearchAction::Search { query },
|
||||
..
|
||||
} => Some(TurnItem::WebSearch(WebSearchItem {
|
||||
id: id.clone().unwrap_or_default(),
|
||||
query: query.clone(),
|
||||
})),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::map_response_item_to_event_messages;
|
||||
use crate::protocol::EventMsg;
|
||||
use crate::protocol::InputMessageKind;
|
||||
use assert_matches::assert_matches;
|
||||
use super::parse_turn_item;
|
||||
use codex_protocol::items::AgentMessageContent;
|
||||
use codex_protocol::items::TurnItem;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::ReasoningItemContent;
|
||||
use codex_protocol::models::ReasoningItemReasoningSummary;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::models::WebSearchAction;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
fn maps_user_message_with_text_and_two_images() {
|
||||
fn parses_user_message_with_text_and_two_images() {
|
||||
let img1 = "https://example.com/one.png".to_string();
|
||||
let img2 = "https://example.com/two.jpg".to_string();
|
||||
|
||||
@@ -153,16 +145,128 @@ mod tests {
|
||||
],
|
||||
};
|
||||
|
||||
let events = map_response_item_to_event_messages(&item, false);
|
||||
assert_eq!(events.len(), 1, "expected a single user message event");
|
||||
let turn_item = parse_turn_item(&item).expect("expected user message turn item");
|
||||
|
||||
match &events[0] {
|
||||
EventMsg::UserMessage(user) => {
|
||||
assert_eq!(user.message, "Hello world");
|
||||
assert_matches!(user.kind, Some(InputMessageKind::Plain));
|
||||
assert_eq!(user.images, Some(vec![img1, img2]));
|
||||
match turn_item {
|
||||
TurnItem::UserMessage(user) => {
|
||||
let expected_content = vec![
|
||||
UserInput::Text {
|
||||
text: "Hello world".to_string(),
|
||||
},
|
||||
UserInput::Image { image_url: img1 },
|
||||
UserInput::Image { image_url: img2 },
|
||||
];
|
||||
assert_eq!(user.content, expected_content);
|
||||
}
|
||||
other => panic!("expected UserMessage, got {other:?}"),
|
||||
other => panic!("expected TurnItem::UserMessage, got {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_agent_message() {
|
||||
let item = ResponseItem::Message {
|
||||
id: Some("msg-1".to_string()),
|
||||
role: "assistant".to_string(),
|
||||
content: vec![ContentItem::OutputText {
|
||||
text: "Hello from Codex".to_string(),
|
||||
}],
|
||||
};
|
||||
|
||||
let turn_item = parse_turn_item(&item).expect("expected agent message turn item");
|
||||
|
||||
match turn_item {
|
||||
TurnItem::AgentMessage(message) => {
|
||||
let Some(AgentMessageContent::Text { text }) = message.content.first() else {
|
||||
panic!("expected agent message text content");
|
||||
};
|
||||
assert_eq!(text, "Hello from Codex");
|
||||
}
|
||||
other => panic!("expected TurnItem::AgentMessage, got {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_reasoning_summary_and_raw_content() {
|
||||
let item = ResponseItem::Reasoning {
|
||||
id: "reasoning_1".to_string(),
|
||||
summary: vec![
|
||||
ReasoningItemReasoningSummary::SummaryText {
|
||||
text: "Step 1".to_string(),
|
||||
},
|
||||
ReasoningItemReasoningSummary::SummaryText {
|
||||
text: "Step 2".to_string(),
|
||||
},
|
||||
],
|
||||
content: Some(vec![ReasoningItemContent::ReasoningText {
|
||||
text: "raw details".to_string(),
|
||||
}]),
|
||||
encrypted_content: None,
|
||||
};
|
||||
|
||||
let turn_item = parse_turn_item(&item).expect("expected reasoning turn item");
|
||||
|
||||
match turn_item {
|
||||
TurnItem::Reasoning(reasoning) => {
|
||||
assert_eq!(
|
||||
reasoning.summary_text,
|
||||
vec!["Step 1".to_string(), "Step 2".to_string()]
|
||||
);
|
||||
assert_eq!(reasoning.raw_content, vec!["raw details".to_string()]);
|
||||
}
|
||||
other => panic!("expected TurnItem::Reasoning, got {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_reasoning_including_raw_content() {
|
||||
let item = ResponseItem::Reasoning {
|
||||
id: "reasoning_2".to_string(),
|
||||
summary: vec![ReasoningItemReasoningSummary::SummaryText {
|
||||
text: "Summarized step".to_string(),
|
||||
}],
|
||||
content: Some(vec![
|
||||
ReasoningItemContent::ReasoningText {
|
||||
text: "raw step".to_string(),
|
||||
},
|
||||
ReasoningItemContent::Text {
|
||||
text: "final thought".to_string(),
|
||||
},
|
||||
]),
|
||||
encrypted_content: None,
|
||||
};
|
||||
|
||||
let turn_item = parse_turn_item(&item).expect("expected reasoning turn item");
|
||||
|
||||
match turn_item {
|
||||
TurnItem::Reasoning(reasoning) => {
|
||||
assert_eq!(reasoning.summary_text, vec!["Summarized step".to_string()]);
|
||||
assert_eq!(
|
||||
reasoning.raw_content,
|
||||
vec!["raw step".to_string(), "final thought".to_string()]
|
||||
);
|
||||
}
|
||||
other => panic!("expected TurnItem::Reasoning, got {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_web_search_call() {
|
||||
let item = ResponseItem::WebSearchCall {
|
||||
id: Some("ws_1".to_string()),
|
||||
status: Some("completed".to_string()),
|
||||
action: WebSearchAction::Search {
|
||||
query: "weather".to_string(),
|
||||
},
|
||||
};
|
||||
|
||||
let turn_item = parse_turn_item(&item).expect("expected web search turn item");
|
||||
|
||||
match turn_item {
|
||||
TurnItem::WebSearch(search) => {
|
||||
assert_eq!(search.id, "ws_1");
|
||||
assert_eq!(search.query, "weather");
|
||||
}
|
||||
other => panic!("expected TurnItem::WebSearch, got {other:?}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -31,7 +31,7 @@ pub enum Feature {
|
||||
UnifiedExec,
|
||||
/// Use the streamable exec-command/write-stdin tool pair.
|
||||
StreamableShell,
|
||||
/// Use the official Rust MCP client (rmcp).
|
||||
/// Enable experimental RMCP features such as OAuth login.
|
||||
RmcpClient,
|
||||
/// Include the freeform apply_patch tool.
|
||||
ApplyPatchFreeform,
|
||||
@@ -39,8 +39,6 @@ pub enum Feature {
|
||||
ViewImageTool,
|
||||
/// Allow the model to request web searches.
|
||||
WebSearchRequest,
|
||||
/// Automatically approve all approval requests from the harness.
|
||||
ApproveAll,
|
||||
}
|
||||
|
||||
impl Feature {
|
||||
@@ -238,10 +236,4 @@ pub const FEATURES: &[FeatureSpec] = &[
|
||||
stage: Stage::Stable,
|
||||
default_enabled: false,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::ApproveAll,
|
||||
key: "approve_all",
|
||||
stage: Stage::Experimental,
|
||||
default_enabled: false,
|
||||
},
|
||||
];
|
||||
|
||||
@@ -20,7 +20,7 @@ pub mod config_edit;
|
||||
pub mod config_loader;
|
||||
pub mod config_profile;
|
||||
pub mod config_types;
|
||||
mod conversation_history;
|
||||
mod context_manager;
|
||||
pub mod custom_prompts;
|
||||
mod environment_context;
|
||||
pub mod error;
|
||||
@@ -36,6 +36,7 @@ mod mcp_tool_call;
|
||||
mod message_history;
|
||||
mod model_provider_info;
|
||||
pub mod parse_command;
|
||||
mod response_processing;
|
||||
pub mod sandboxing;
|
||||
pub mod token_data;
|
||||
mod truncate;
|
||||
@@ -98,11 +99,10 @@ pub use client_common::REVIEW_PROMPT;
|
||||
pub use client_common::ResponseEvent;
|
||||
pub use client_common::ResponseStream;
|
||||
pub use codex::compact::content_items_to_text;
|
||||
pub use codex::compact::is_session_prefix_message;
|
||||
pub use codex_protocol::models::ContentItem;
|
||||
pub use codex_protocol::models::LocalShellAction;
|
||||
pub use codex_protocol::models::LocalShellExecAction;
|
||||
pub use codex_protocol::models::LocalShellStatus;
|
||||
pub use codex_protocol::models::ReasoningItemContent;
|
||||
pub use codex_protocol::models::ResponseItem;
|
||||
pub use event_mapping::parse_turn_item;
|
||||
pub mod otel_init;
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
//! Connection manager for Model Context Protocol (MCP) servers.
|
||||
//!
|
||||
//! The [`McpConnectionManager`] owns one [`codex_mcp_client::McpClient`] per
|
||||
//! The [`McpConnectionManager`] owns one [`codex_rmcp_client::RmcpClient`] per
|
||||
//! configured server (keyed by the *server name*). It offers convenience
|
||||
//! helpers to query the available tools across *all* servers and returns them
|
||||
//! in a single aggregated map using the fully-qualified tool name
|
||||
@@ -10,14 +10,12 @@ use std::collections::HashMap;
|
||||
use std::collections::HashSet;
|
||||
use std::env;
|
||||
use std::ffi::OsString;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use anyhow::anyhow;
|
||||
use codex_mcp_client::McpClient;
|
||||
use codex_rmcp_client::OAuthCredentialsStoreMode;
|
||||
use codex_rmcp_client::RmcpClient;
|
||||
use mcp_types::ClientCapabilities;
|
||||
@@ -51,7 +49,7 @@ const MCP_TOOL_NAME_DELIMITER: &str = "__";
|
||||
const MAX_TOOL_NAME_LENGTH: usize = 64;
|
||||
|
||||
/// Default timeout for initializing MCP server & initially listing tools.
|
||||
const DEFAULT_STARTUP_TIMEOUT: Duration = Duration::from_secs(10);
|
||||
pub const DEFAULT_STARTUP_TIMEOUT: Duration = Duration::from_secs(10);
|
||||
|
||||
/// Default timeout for individual tool calls.
|
||||
const DEFAULT_TOOL_TIMEOUT: Duration = Duration::from_secs(60);
|
||||
@@ -99,134 +97,12 @@ struct ToolInfo {
|
||||
}
|
||||
|
||||
struct ManagedClient {
|
||||
client: McpClientAdapter,
|
||||
client: Arc<RmcpClient>,
|
||||
startup_timeout: Duration,
|
||||
tool_timeout: Option<Duration>,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
enum McpClientAdapter {
|
||||
Legacy(Arc<McpClient>),
|
||||
Rmcp(Arc<RmcpClient>),
|
||||
}
|
||||
|
||||
impl McpClientAdapter {
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn new_stdio_client(
|
||||
use_rmcp_client: bool,
|
||||
program: OsString,
|
||||
args: Vec<OsString>,
|
||||
env: Option<HashMap<String, String>>,
|
||||
env_vars: Vec<String>,
|
||||
cwd: Option<PathBuf>,
|
||||
params: mcp_types::InitializeRequestParams,
|
||||
startup_timeout: Duration,
|
||||
) -> Result<Self> {
|
||||
if use_rmcp_client {
|
||||
let client =
|
||||
Arc::new(RmcpClient::new_stdio_client(program, args, env, &env_vars, cwd).await?);
|
||||
client.initialize(params, Some(startup_timeout)).await?;
|
||||
Ok(McpClientAdapter::Rmcp(client))
|
||||
} else {
|
||||
let client =
|
||||
Arc::new(McpClient::new_stdio_client(program, args, env, &env_vars, cwd).await?);
|
||||
client.initialize(params, Some(startup_timeout)).await?;
|
||||
Ok(McpClientAdapter::Legacy(client))
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn new_streamable_http_client(
|
||||
server_name: String,
|
||||
url: String,
|
||||
bearer_token: Option<String>,
|
||||
http_headers: Option<HashMap<String, String>>,
|
||||
env_http_headers: Option<HashMap<String, String>>,
|
||||
params: mcp_types::InitializeRequestParams,
|
||||
startup_timeout: Duration,
|
||||
store_mode: OAuthCredentialsStoreMode,
|
||||
) -> Result<Self> {
|
||||
let client = Arc::new(
|
||||
RmcpClient::new_streamable_http_client(
|
||||
&server_name,
|
||||
&url,
|
||||
bearer_token,
|
||||
http_headers,
|
||||
env_http_headers,
|
||||
store_mode,
|
||||
)
|
||||
.await?,
|
||||
);
|
||||
client.initialize(params, Some(startup_timeout)).await?;
|
||||
Ok(McpClientAdapter::Rmcp(client))
|
||||
}
|
||||
|
||||
async fn list_tools(
|
||||
&self,
|
||||
params: Option<mcp_types::ListToolsRequestParams>,
|
||||
timeout: Option<Duration>,
|
||||
) -> Result<mcp_types::ListToolsResult> {
|
||||
match self {
|
||||
McpClientAdapter::Legacy(client) => client.list_tools(params, timeout).await,
|
||||
McpClientAdapter::Rmcp(client) => client.list_tools(params, timeout).await,
|
||||
}
|
||||
}
|
||||
|
||||
async fn list_resources(
|
||||
&self,
|
||||
params: Option<mcp_types::ListResourcesRequestParams>,
|
||||
timeout: Option<Duration>,
|
||||
) -> Result<mcp_types::ListResourcesResult> {
|
||||
match self {
|
||||
McpClientAdapter::Legacy(_) => Ok(ListResourcesResult {
|
||||
next_cursor: None,
|
||||
resources: Vec::new(),
|
||||
}),
|
||||
McpClientAdapter::Rmcp(client) => client.list_resources(params, timeout).await,
|
||||
}
|
||||
}
|
||||
|
||||
async fn read_resource(
|
||||
&self,
|
||||
params: mcp_types::ReadResourceRequestParams,
|
||||
timeout: Option<Duration>,
|
||||
) -> Result<mcp_types::ReadResourceResult> {
|
||||
match self {
|
||||
McpClientAdapter::Legacy(_) => Err(anyhow!(
|
||||
"resources/read is not supported by legacy MCP clients"
|
||||
)),
|
||||
McpClientAdapter::Rmcp(client) => client.read_resource(params, timeout).await,
|
||||
}
|
||||
}
|
||||
|
||||
async fn list_resource_templates(
|
||||
&self,
|
||||
params: Option<mcp_types::ListResourceTemplatesRequestParams>,
|
||||
timeout: Option<Duration>,
|
||||
) -> Result<mcp_types::ListResourceTemplatesResult> {
|
||||
match self {
|
||||
McpClientAdapter::Legacy(_) => Ok(ListResourceTemplatesResult {
|
||||
next_cursor: None,
|
||||
resource_templates: Vec::new(),
|
||||
}),
|
||||
McpClientAdapter::Rmcp(client) => client.list_resource_templates(params, timeout).await,
|
||||
}
|
||||
}
|
||||
|
||||
async fn call_tool(
|
||||
&self,
|
||||
name: String,
|
||||
arguments: Option<serde_json::Value>,
|
||||
timeout: Option<Duration>,
|
||||
) -> Result<mcp_types::CallToolResult> {
|
||||
match self {
|
||||
McpClientAdapter::Legacy(client) => client.call_tool(name, arguments, timeout).await,
|
||||
McpClientAdapter::Rmcp(client) => client.call_tool(name, arguments, timeout).await,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A thin wrapper around a set of running [`McpClient`] instances.
|
||||
/// A thin wrapper around a set of running [`RmcpClient`] instances.
|
||||
#[derive(Default)]
|
||||
pub(crate) struct McpConnectionManager {
|
||||
/// Server-name -> client instance.
|
||||
@@ -243,7 +119,7 @@ pub(crate) struct McpConnectionManager {
|
||||
}
|
||||
|
||||
impl McpConnectionManager {
|
||||
/// Spawn a [`McpClient`] for each configured server.
|
||||
/// Spawn a [`RmcpClient`] for each configured server.
|
||||
///
|
||||
/// * `mcp_servers` – Map loaded from the user configuration where *keys*
|
||||
/// are human-readable server identifiers and *values* are the spawn
|
||||
@@ -253,7 +129,6 @@ impl McpConnectionManager {
|
||||
/// user should be informed about these errors.
|
||||
pub async fn new(
|
||||
mcp_servers: HashMap<String, McpServerConfig>,
|
||||
use_rmcp_client: bool,
|
||||
store_mode: OAuthCredentialsStoreMode,
|
||||
) -> Result<(Self, ClientStartErrors)> {
|
||||
// Early exit if no servers are configured.
|
||||
@@ -316,7 +191,8 @@ impl McpConnectionManager {
|
||||
protocol_version: mcp_types::MCP_SCHEMA_VERSION.to_owned(),
|
||||
};
|
||||
|
||||
let client = match transport {
|
||||
let resolved_bearer_token = resolved_bearer_token.unwrap_or_default();
|
||||
let client_result = match transport {
|
||||
McpServerTransportConfig::Stdio {
|
||||
command,
|
||||
args,
|
||||
@@ -326,17 +202,18 @@ impl McpConnectionManager {
|
||||
} => {
|
||||
let command_os: OsString = command.into();
|
||||
let args_os: Vec<OsString> = args.into_iter().map(Into::into).collect();
|
||||
McpClientAdapter::new_stdio_client(
|
||||
use_rmcp_client,
|
||||
command_os,
|
||||
args_os,
|
||||
env,
|
||||
env_vars,
|
||||
cwd,
|
||||
params,
|
||||
startup_timeout,
|
||||
)
|
||||
.await
|
||||
match RmcpClient::new_stdio_client(command_os, args_os, env, &env_vars, cwd)
|
||||
.await
|
||||
{
|
||||
Ok(client) => {
|
||||
let client = Arc::new(client);
|
||||
client
|
||||
.initialize(params.clone(), Some(startup_timeout))
|
||||
.await
|
||||
.map(|_| client)
|
||||
}
|
||||
Err(err) => Err(err.into()),
|
||||
}
|
||||
}
|
||||
McpServerTransportConfig::StreamableHttp {
|
||||
url,
|
||||
@@ -344,22 +221,32 @@ impl McpConnectionManager {
|
||||
env_http_headers,
|
||||
..
|
||||
} => {
|
||||
McpClientAdapter::new_streamable_http_client(
|
||||
server_name.clone(),
|
||||
url,
|
||||
resolved_bearer_token.unwrap_or_default(),
|
||||
match RmcpClient::new_streamable_http_client(
|
||||
&server_name,
|
||||
&url,
|
||||
resolved_bearer_token.clone(),
|
||||
http_headers,
|
||||
env_http_headers,
|
||||
params,
|
||||
startup_timeout,
|
||||
store_mode,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(client) => {
|
||||
let client = Arc::new(client);
|
||||
client
|
||||
.initialize(params.clone(), Some(startup_timeout))
|
||||
.await
|
||||
.map(|_| client)
|
||||
}
|
||||
Err(err) => Err(err),
|
||||
}
|
||||
}
|
||||
}
|
||||
.map(|c| (c, startup_timeout));
|
||||
};
|
||||
|
||||
((server_name, tool_timeout), client)
|
||||
(
|
||||
(server_name, tool_timeout),
|
||||
client_result.map(|client| (client, startup_timeout)),
|
||||
)
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -6,6 +6,8 @@
|
||||
//! key. These override or extend the defaults at runtime.
|
||||
|
||||
use crate::CodexAuth;
|
||||
use crate::default_client::CodexHttpClient;
|
||||
use crate::default_client::CodexRequestBuilder;
|
||||
use codex_app_server_protocol::AuthMode;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
@@ -95,7 +97,7 @@ pub struct ModelProviderInfo {
|
||||
|
||||
impl ModelProviderInfo {
|
||||
/// Construct a `POST` RequestBuilder for the given URL using the provided
|
||||
/// reqwest Client applying:
|
||||
/// [`CodexHttpClient`] applying:
|
||||
/// • provider-specific headers (static + env based)
|
||||
/// • Bearer auth header when an API key is available.
|
||||
/// • Auth token for OAuth.
|
||||
@@ -104,9 +106,9 @@ impl ModelProviderInfo {
|
||||
/// one produced by [`ModelProviderInfo::api_key`].
|
||||
pub async fn create_request_builder<'a>(
|
||||
&'a self,
|
||||
client: &'a reqwest::Client,
|
||||
client: &'a CodexHttpClient,
|
||||
auth: &Option<CodexAuth>,
|
||||
) -> crate::error::Result<reqwest::RequestBuilder> {
|
||||
) -> crate::error::Result<CodexRequestBuilder> {
|
||||
let effective_auth = if let Some(secret_key) = &self.experimental_bearer_token {
|
||||
Some(CodexAuth::from_api_key(secret_key))
|
||||
} else {
|
||||
@@ -187,9 +189,9 @@ impl ModelProviderInfo {
|
||||
}
|
||||
|
||||
/// Apply provider-specific HTTP headers (both static and environment-based)
|
||||
/// onto an existing `reqwest::RequestBuilder` and return the updated
|
||||
/// onto an existing [`CodexRequestBuilder`] and return the updated
|
||||
/// builder.
|
||||
fn apply_http_headers(&self, mut builder: reqwest::RequestBuilder) -> reqwest::RequestBuilder {
|
||||
fn apply_http_headers(&self, mut builder: CodexRequestBuilder) -> CodexRequestBuilder {
|
||||
if let Some(extra) = &self.http_headers {
|
||||
for (k, v) in extra {
|
||||
builder = builder.header(k, v);
|
||||
|
||||
112
codex-rs/core/src/response_processing.rs
Normal file
112
codex-rs/core/src/response_processing.rs
Normal file
@@ -0,0 +1,112 @@
|
||||
use crate::codex::Session;
|
||||
use crate::context_manager::ContextManager;
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
use codex_protocol::models::ResponseInputItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use tracing::warn;
|
||||
|
||||
/// Process streamed `ResponseItem`s from the model into the pair of:
|
||||
/// - items we should record in conversation history; and
|
||||
/// - `ResponseInputItem`s to send back to the model on the next turn.
|
||||
pub(crate) async fn process_items(
|
||||
processed_items: Vec<crate::codex::ProcessedResponseItem>,
|
||||
is_review_mode: bool,
|
||||
review_thread_history: &mut ContextManager,
|
||||
sess: &Session,
|
||||
) -> (Vec<ResponseInputItem>, Vec<ResponseItem>) {
|
||||
let mut items_to_record_in_conversation_history = Vec::<ResponseItem>::new();
|
||||
let mut responses = Vec::<ResponseInputItem>::new();
|
||||
for processed_response_item in processed_items {
|
||||
let crate::codex::ProcessedResponseItem { item, response } = processed_response_item;
|
||||
match (&item, &response) {
|
||||
(ResponseItem::Message { role, .. }, None) if role == "assistant" => {
|
||||
// If the model returned a message, we need to record it.
|
||||
items_to_record_in_conversation_history.push(item);
|
||||
}
|
||||
(
|
||||
ResponseItem::LocalShellCall { .. },
|
||||
Some(ResponseInputItem::FunctionCallOutput { call_id, output }),
|
||||
) => {
|
||||
items_to_record_in_conversation_history.push(item);
|
||||
items_to_record_in_conversation_history.push(ResponseItem::FunctionCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: output.clone(),
|
||||
});
|
||||
}
|
||||
(
|
||||
ResponseItem::FunctionCall { .. },
|
||||
Some(ResponseInputItem::FunctionCallOutput { call_id, output }),
|
||||
) => {
|
||||
items_to_record_in_conversation_history.push(item);
|
||||
items_to_record_in_conversation_history.push(ResponseItem::FunctionCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: output.clone(),
|
||||
});
|
||||
}
|
||||
(
|
||||
ResponseItem::CustomToolCall { .. },
|
||||
Some(ResponseInputItem::CustomToolCallOutput { call_id, output }),
|
||||
) => {
|
||||
items_to_record_in_conversation_history.push(item);
|
||||
items_to_record_in_conversation_history.push(ResponseItem::CustomToolCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: output.clone(),
|
||||
});
|
||||
}
|
||||
(
|
||||
ResponseItem::FunctionCall { .. },
|
||||
Some(ResponseInputItem::McpToolCallOutput { call_id, result }),
|
||||
) => {
|
||||
items_to_record_in_conversation_history.push(item);
|
||||
let output = match result {
|
||||
Ok(call_tool_result) => {
|
||||
crate::codex::convert_call_tool_result_to_function_call_output_payload(
|
||||
call_tool_result,
|
||||
)
|
||||
}
|
||||
Err(err) => FunctionCallOutputPayload {
|
||||
content: err.clone(),
|
||||
success: Some(false),
|
||||
},
|
||||
};
|
||||
items_to_record_in_conversation_history.push(ResponseItem::FunctionCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output,
|
||||
});
|
||||
}
|
||||
(
|
||||
ResponseItem::Reasoning {
|
||||
id,
|
||||
summary,
|
||||
content,
|
||||
encrypted_content,
|
||||
},
|
||||
None,
|
||||
) => {
|
||||
items_to_record_in_conversation_history.push(ResponseItem::Reasoning {
|
||||
id: id.clone(),
|
||||
summary: summary.clone(),
|
||||
content: content.clone(),
|
||||
encrypted_content: encrypted_content.clone(),
|
||||
});
|
||||
}
|
||||
_ => {
|
||||
warn!("Unexpected response item: {item:?} with response: {response:?}");
|
||||
}
|
||||
};
|
||||
if let Some(response) = response {
|
||||
responses.push(response);
|
||||
}
|
||||
}
|
||||
|
||||
// Only attempt to take the lock if there is something to record.
|
||||
if !items_to_record_in_conversation_history.is_empty() {
|
||||
if is_review_mode {
|
||||
review_thread_history.record_items(items_to_record_in_conversation_history.iter());
|
||||
} else {
|
||||
sess.record_conversation_items(&items_to_record_in_conversation_history)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
(responses, items_to_record_in_conversation_history)
|
||||
}
|
||||
@@ -1,12 +1,11 @@
|
||||
use std::cmp::Reverse;
|
||||
use std::io::{self};
|
||||
use std::num::NonZero;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use codex_file_search as file_search;
|
||||
use std::num::NonZero;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::AtomicBool;
|
||||
|
||||
use time::OffsetDateTime;
|
||||
use time::PrimitiveDateTime;
|
||||
use time::format_description::FormatItem;
|
||||
@@ -15,6 +14,7 @@ use uuid::Uuid;
|
||||
|
||||
use super::SESSIONS_SUBDIR;
|
||||
use crate::protocol::EventMsg;
|
||||
use codex_file_search as file_search;
|
||||
use codex_protocol::protocol::RolloutItem;
|
||||
use codex_protocol::protocol::RolloutLine;
|
||||
use codex_protocol::protocol::SessionSource;
|
||||
@@ -515,6 +515,7 @@ pub async fn find_conversation_path_by_id_str(
|
||||
threads,
|
||||
cancel,
|
||||
compute_indices,
|
||||
false,
|
||||
)
|
||||
.map_err(|e| io::Error::other(format!("file search failed: {e}")))?;
|
||||
|
||||
|
||||
@@ -24,7 +24,6 @@ use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::CompactedItem;
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
use codex_protocol::protocol::InputMessageKind;
|
||||
use codex_protocol::protocol::RolloutItem;
|
||||
use codex_protocol::protocol::RolloutLine;
|
||||
use codex_protocol::protocol::SessionMeta;
|
||||
@@ -543,7 +542,6 @@ async fn test_tail_includes_last_response_items() -> Result<()> {
|
||||
timestamp: ts.to_string(),
|
||||
item: RolloutItem::EventMsg(EventMsg::UserMessage(UserMessageEvent {
|
||||
message: "hello".into(),
|
||||
kind: Some(InputMessageKind::Plain),
|
||||
images: None,
|
||||
})),
|
||||
};
|
||||
@@ -627,7 +625,6 @@ async fn test_tail_handles_short_sessions() -> Result<()> {
|
||||
timestamp: ts.to_string(),
|
||||
item: RolloutItem::EventMsg(EventMsg::UserMessage(UserMessageEvent {
|
||||
message: "hi".into(),
|
||||
kind: Some(InputMessageKind::Plain),
|
||||
images: None,
|
||||
})),
|
||||
};
|
||||
@@ -712,7 +709,6 @@ async fn test_tail_skips_trailing_non_responses() -> Result<()> {
|
||||
timestamp: ts.to_string(),
|
||||
item: RolloutItem::EventMsg(EventMsg::UserMessage(UserMessageEvent {
|
||||
message: "hello".into(),
|
||||
kind: Some(InputMessageKind::Plain),
|
||||
images: None,
|
||||
})),
|
||||
};
|
||||
|
||||
@@ -1,68 +0,0 @@
|
||||
use async_channel::Sender;
|
||||
use codex_protocol::ConversationId;
|
||||
use codex_protocol::items::TurnItem;
|
||||
use codex_protocol::protocol::Event;
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
use codex_protocol::protocol::ItemCompletedEvent;
|
||||
use codex_protocol::protocol::ItemStartedEvent;
|
||||
use tracing::error;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct ItemCollector {
|
||||
thread_id: ConversationId,
|
||||
turn_id: String,
|
||||
tx_event: Sender<Event>,
|
||||
}
|
||||
|
||||
impl ItemCollector {
|
||||
pub fn new(
|
||||
tx_event: Sender<Event>,
|
||||
thread_id: ConversationId,
|
||||
turn_id: String,
|
||||
) -> ItemCollector {
|
||||
ItemCollector {
|
||||
tx_event,
|
||||
thread_id,
|
||||
turn_id,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn started(&self, item: TurnItem) {
|
||||
let err = self
|
||||
.tx_event
|
||||
.send(Event {
|
||||
id: self.turn_id.clone(),
|
||||
msg: EventMsg::ItemStarted(ItemStartedEvent {
|
||||
thread_id: self.thread_id,
|
||||
turn_id: self.turn_id.clone(),
|
||||
item,
|
||||
}),
|
||||
})
|
||||
.await;
|
||||
if let Err(e) = err {
|
||||
error!("failed to send item started event: {e}");
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn completed(&self, item: TurnItem) {
|
||||
let err = self
|
||||
.tx_event
|
||||
.send(Event {
|
||||
id: self.turn_id.clone(),
|
||||
msg: EventMsg::ItemCompleted(ItemCompletedEvent {
|
||||
thread_id: self.thread_id,
|
||||
turn_id: self.turn_id.clone(),
|
||||
item,
|
||||
}),
|
||||
})
|
||||
.await;
|
||||
if let Err(e) = err {
|
||||
error!("failed to send item completed event: {e}");
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn started_completed(&self, item: TurnItem) {
|
||||
self.started(item.clone()).await;
|
||||
self.completed(item).await;
|
||||
}
|
||||
}
|
||||
@@ -1,9 +1,7 @@
|
||||
mod item_collector;
|
||||
mod service;
|
||||
mod session;
|
||||
mod turn;
|
||||
|
||||
pub(crate) use item_collector::ItemCollector;
|
||||
pub(crate) use service::SessionServices;
|
||||
pub(crate) use session::SessionState;
|
||||
pub(crate) use turn::ActiveTurn;
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
use codex_protocol::models::ResponseItem;
|
||||
|
||||
use crate::codex::SessionConfiguration;
|
||||
use crate::conversation_history::ConversationHistory;
|
||||
use crate::context_manager::ContextManager;
|
||||
use crate::protocol::RateLimitSnapshot;
|
||||
use crate::protocol::TokenUsage;
|
||||
use crate::protocol::TokenUsageInfo;
|
||||
@@ -11,8 +11,7 @@ use crate::protocol::TokenUsageInfo;
|
||||
/// Persistent, session-scoped state previously stored directly on `Session`.
|
||||
pub(crate) struct SessionState {
|
||||
pub(crate) session_configuration: SessionConfiguration,
|
||||
pub(crate) history: ConversationHistory,
|
||||
pub(crate) token_info: Option<TokenUsageInfo>,
|
||||
pub(crate) history: ContextManager,
|
||||
pub(crate) latest_rate_limits: Option<RateLimitSnapshot>,
|
||||
}
|
||||
|
||||
@@ -21,8 +20,7 @@ impl SessionState {
|
||||
pub(crate) fn new(session_configuration: SessionConfiguration) -> Self {
|
||||
Self {
|
||||
session_configuration,
|
||||
history: ConversationHistory::new(),
|
||||
token_info: None,
|
||||
history: ContextManager::new(),
|
||||
latest_rate_limits: None,
|
||||
}
|
||||
}
|
||||
@@ -36,8 +34,12 @@ impl SessionState {
|
||||
self.history.record_items(items)
|
||||
}
|
||||
|
||||
pub(crate) fn history_snapshot(&self) -> Vec<ResponseItem> {
|
||||
self.history.contents()
|
||||
pub(crate) fn history_snapshot(&mut self) -> Vec<ResponseItem> {
|
||||
self.history.get_history()
|
||||
}
|
||||
|
||||
pub(crate) fn clone_history(&self) -> ContextManager {
|
||||
self.history.clone()
|
||||
}
|
||||
|
||||
pub(crate) fn replace_history(&mut self, items: Vec<ResponseItem>) {
|
||||
@@ -50,11 +52,11 @@ impl SessionState {
|
||||
usage: &TokenUsage,
|
||||
model_context_window: Option<i64>,
|
||||
) {
|
||||
self.token_info = TokenUsageInfo::new_or_append(
|
||||
&self.token_info,
|
||||
&Some(usage.clone()),
|
||||
model_context_window,
|
||||
);
|
||||
self.history.update_token_info(usage, model_context_window);
|
||||
}
|
||||
|
||||
pub(crate) fn token_info(&self) -> Option<TokenUsageInfo> {
|
||||
self.history.token_info()
|
||||
}
|
||||
|
||||
pub(crate) fn set_rate_limits(&mut self, snapshot: RateLimitSnapshot) {
|
||||
@@ -64,17 +66,10 @@ impl SessionState {
|
||||
pub(crate) fn token_info_and_rate_limits(
|
||||
&self,
|
||||
) -> (Option<TokenUsageInfo>, Option<RateLimitSnapshot>) {
|
||||
(self.token_info.clone(), self.latest_rate_limits.clone())
|
||||
(self.token_info(), self.latest_rate_limits.clone())
|
||||
}
|
||||
|
||||
pub(crate) fn set_token_usage_full(&mut self, context_window: i64) {
|
||||
match &mut self.token_info {
|
||||
Some(info) => info.fill_to_context_window(context_window),
|
||||
None => {
|
||||
self.token_info = Some(TokenUsageInfo::full_context_window(context_window));
|
||||
}
|
||||
}
|
||||
self.history.set_token_usage_full(context_window);
|
||||
}
|
||||
|
||||
// Pending input/approval moved to TurnState.
|
||||
}
|
||||
|
||||
@@ -1,15 +1,11 @@
|
||||
use crate::codex::Session;
|
||||
use crate::codex::TurnContext;
|
||||
use crate::tools::TELEMETRY_PREVIEW_MAX_BYTES;
|
||||
use crate::tools::TELEMETRY_PREVIEW_MAX_LINES;
|
||||
use crate::tools::TELEMETRY_PREVIEW_TRUNCATION_NOTICE;
|
||||
use crate::turn_diff_tracker::TurnDiffTracker;
|
||||
use codex_otel::otel_event_manager::OtelEventManager;
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
use codex_protocol::models::ResponseInputItem;
|
||||
use codex_protocol::models::ShellToolCallParams;
|
||||
use codex_protocol::protocol::FileChange;
|
||||
use codex_utils_string::take_bytes_at_char_boundary;
|
||||
use mcp_types::CallToolResult;
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
@@ -76,7 +72,7 @@ pub enum ToolOutput {
|
||||
impl ToolOutput {
|
||||
pub fn log_preview(&self) -> String {
|
||||
match self {
|
||||
ToolOutput::Function { content, .. } => telemetry_preview(content),
|
||||
ToolOutput::Function { content, .. } => content.clone(),
|
||||
ToolOutput::Mcp { result } => format!("{result:?}"),
|
||||
}
|
||||
}
|
||||
@@ -111,46 +107,6 @@ impl ToolOutput {
|
||||
}
|
||||
}
|
||||
|
||||
fn telemetry_preview(content: &str) -> String {
|
||||
let truncated_slice = take_bytes_at_char_boundary(content, TELEMETRY_PREVIEW_MAX_BYTES);
|
||||
let truncated_by_bytes = truncated_slice.len() < content.len();
|
||||
|
||||
let mut preview = String::new();
|
||||
let mut lines_iter = truncated_slice.lines();
|
||||
for idx in 0..TELEMETRY_PREVIEW_MAX_LINES {
|
||||
match lines_iter.next() {
|
||||
Some(line) => {
|
||||
if idx > 0 {
|
||||
preview.push('\n');
|
||||
}
|
||||
preview.push_str(line);
|
||||
}
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
let truncated_by_lines = lines_iter.next().is_some();
|
||||
|
||||
if !truncated_by_bytes && !truncated_by_lines {
|
||||
return content.to_string();
|
||||
}
|
||||
|
||||
if preview.len() < truncated_slice.len()
|
||||
&& truncated_slice
|
||||
.as_bytes()
|
||||
.get(preview.len())
|
||||
.is_some_and(|byte| *byte == b'\n')
|
||||
{
|
||||
preview.push('\n');
|
||||
}
|
||||
|
||||
if !preview.is_empty() && !preview.ends_with('\n') {
|
||||
preview.push('\n');
|
||||
}
|
||||
preview.push_str(TELEMETRY_PREVIEW_TRUNCATION_NOTICE);
|
||||
|
||||
preview
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -196,38 +152,6 @@ mod tests {
|
||||
other => panic!("expected FunctionCallOutput, got {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn telemetry_preview_returns_original_within_limits() {
|
||||
let content = "short output";
|
||||
assert_eq!(telemetry_preview(content), content);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn telemetry_preview_truncates_by_bytes() {
|
||||
let content = "x".repeat(TELEMETRY_PREVIEW_MAX_BYTES + 8);
|
||||
let preview = telemetry_preview(&content);
|
||||
|
||||
assert!(preview.contains(TELEMETRY_PREVIEW_TRUNCATION_NOTICE));
|
||||
assert!(
|
||||
preview.len()
|
||||
<= TELEMETRY_PREVIEW_MAX_BYTES + TELEMETRY_PREVIEW_TRUNCATION_NOTICE.len() + 1
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn telemetry_preview_truncates_by_lines() {
|
||||
let content = (0..(TELEMETRY_PREVIEW_MAX_LINES + 5))
|
||||
.map(|idx| format!("line {idx}"))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
|
||||
let preview = telemetry_preview(&content);
|
||||
let lines: Vec<&str> = preview.lines().collect();
|
||||
|
||||
assert!(lines.len() <= TELEMETRY_PREVIEW_MAX_LINES + 1);
|
||||
assert_eq!(lines.last(), Some(&TELEMETRY_PREVIEW_TRUNCATION_NOTICE));
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
use crate::codex::Session;
|
||||
use crate::codex::TurnContext;
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::SandboxErr;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::parse_command::parse_command;
|
||||
use crate::protocol::EventMsg;
|
||||
use crate::protocol::ExecCommandBeginEvent;
|
||||
@@ -10,6 +13,7 @@ use crate::protocol::PatchApplyBeginEvent;
|
||||
use crate::protocol::PatchApplyEndEvent;
|
||||
use crate::protocol::TurnDiffEvent;
|
||||
use crate::tools::context::SharedTurnDiffTracker;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
@@ -76,6 +80,13 @@ pub(crate) enum ToolEmitter {
|
||||
changes: HashMap<PathBuf, FileChange>,
|
||||
auto_approved: bool,
|
||||
},
|
||||
UnifiedExec {
|
||||
command: String,
|
||||
cwd: PathBuf,
|
||||
// True for `exec_command` and false for `write_stdin`.
|
||||
#[allow(dead_code)]
|
||||
is_startup_command: bool,
|
||||
},
|
||||
}
|
||||
|
||||
impl ToolEmitter {
|
||||
@@ -90,6 +101,14 @@ impl ToolEmitter {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn unified_exec(command: String, cwd: PathBuf, is_startup_command: bool) -> Self {
|
||||
Self::UnifiedExec {
|
||||
command,
|
||||
cwd,
|
||||
is_startup_command,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn emit(&self, ctx: ToolEventCtx<'_>, stage: ToolEventStage) {
|
||||
match (self, stage) {
|
||||
(Self::Shell { command, cwd }, ToolEventStage::Begin) => {
|
||||
@@ -181,8 +200,103 @@ impl ToolEmitter {
|
||||
) => {
|
||||
emit_patch_end(ctx, String::new(), (*message).to_string(), false).await;
|
||||
}
|
||||
(Self::UnifiedExec { command, cwd, .. }, ToolEventStage::Begin) => {
|
||||
emit_exec_command_begin(ctx, &[command.to_string()], cwd.as_path()).await;
|
||||
}
|
||||
(Self::UnifiedExec { .. }, ToolEventStage::Success(output)) => {
|
||||
emit_exec_end(
|
||||
ctx,
|
||||
output.stdout.text.clone(),
|
||||
output.stderr.text.clone(),
|
||||
output.aggregated_output.text.clone(),
|
||||
output.exit_code,
|
||||
output.duration,
|
||||
format_exec_output_str(&output),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
(
|
||||
Self::UnifiedExec { .. },
|
||||
ToolEventStage::Failure(ToolEventFailure::Output(output)),
|
||||
) => {
|
||||
emit_exec_end(
|
||||
ctx,
|
||||
output.stdout.text.clone(),
|
||||
output.stderr.text.clone(),
|
||||
output.aggregated_output.text.clone(),
|
||||
output.exit_code,
|
||||
output.duration,
|
||||
format_exec_output_str(&output),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
(
|
||||
Self::UnifiedExec { .. },
|
||||
ToolEventStage::Failure(ToolEventFailure::Message(message)),
|
||||
) => {
|
||||
emit_exec_end(
|
||||
ctx,
|
||||
String::new(),
|
||||
(*message).to_string(),
|
||||
(*message).to_string(),
|
||||
-1,
|
||||
Duration::ZERO,
|
||||
format_exec_output(&message),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn begin(&self, ctx: ToolEventCtx<'_>) {
|
||||
self.emit(ctx, ToolEventStage::Begin).await;
|
||||
}
|
||||
|
||||
pub async fn finish(
|
||||
&self,
|
||||
ctx: ToolEventCtx<'_>,
|
||||
out: Result<ExecToolCallOutput, ToolError>,
|
||||
) -> Result<String, FunctionCallError> {
|
||||
let event;
|
||||
let result = match out {
|
||||
Ok(output) => {
|
||||
let content = super::format_exec_output_for_model(&output);
|
||||
let exit_code = output.exit_code;
|
||||
event = ToolEventStage::Success(output);
|
||||
if exit_code == 0 {
|
||||
Ok(content)
|
||||
} else {
|
||||
Err(FunctionCallError::RespondToModel(content))
|
||||
}
|
||||
}
|
||||
Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output })))
|
||||
| Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { output }))) => {
|
||||
let response = super::format_exec_output_for_model(&output);
|
||||
event = ToolEventStage::Failure(ToolEventFailure::Output(*output));
|
||||
Err(FunctionCallError::RespondToModel(response))
|
||||
}
|
||||
Err(ToolError::Codex(err)) => {
|
||||
let message = format!("execution error: {err:?}");
|
||||
let response = super::format_exec_output(&message);
|
||||
event = ToolEventStage::Failure(ToolEventFailure::Message(message));
|
||||
Err(FunctionCallError::RespondToModel(response))
|
||||
}
|
||||
Err(ToolError::Rejected(msg)) | Err(ToolError::SandboxDenied(msg)) => {
|
||||
// Normalize common rejection messages for exec tools so tests and
|
||||
// users see a clear, consistent phrase.
|
||||
let normalized = if msg == "rejected by user" {
|
||||
"exec command rejected by user".to_string()
|
||||
} else {
|
||||
msg
|
||||
};
|
||||
let response = super::format_exec_output(&normalized);
|
||||
event = ToolEventStage::Failure(ToolEventFailure::Message(normalized));
|
||||
Err(FunctionCallError::RespondToModel(response))
|
||||
}
|
||||
};
|
||||
self.emit(ctx, event).await;
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
async fn emit_exec_end(
|
||||
|
||||
@@ -1,19 +1,24 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::apply_patch;
|
||||
use crate::apply_patch::InternalApplyPatchInvocation;
|
||||
use crate::apply_patch::convert_apply_patch_to_protocol;
|
||||
use crate::client_common::tools::FreeformTool;
|
||||
use crate::client_common::tools::FreeformToolFormat;
|
||||
use crate::client_common::tools::ResponsesApiTool;
|
||||
use crate::client_common::tools::ToolSpec;
|
||||
use crate::exec::ExecParams;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::tools::context::ToolInvocation;
|
||||
use crate::tools::context::ToolOutput;
|
||||
use crate::tools::context::ToolPayload;
|
||||
use crate::tools::handle_container_exec_with_params;
|
||||
use crate::tools::events::ToolEmitter;
|
||||
use crate::tools::events::ToolEventCtx;
|
||||
use crate::tools::orchestrator::ToolOrchestrator;
|
||||
use crate::tools::registry::ToolHandler;
|
||||
use crate::tools::registry::ToolKind;
|
||||
use crate::tools::runtimes::apply_patch::ApplyPatchRequest;
|
||||
use crate::tools::runtimes::apply_patch::ApplyPatchRuntime;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
use crate::tools::spec::ApplyPatchToolArgs;
|
||||
use crate::tools::spec::JsonSchema;
|
||||
use async_trait::async_trait;
|
||||
@@ -64,30 +69,85 @@ impl ToolHandler for ApplyPatchHandler {
|
||||
}
|
||||
};
|
||||
|
||||
let exec_params = ExecParams {
|
||||
command: vec!["apply_patch".to_string(), patch_input.clone()],
|
||||
cwd: turn.cwd.clone(),
|
||||
timeout_ms: None,
|
||||
env: HashMap::new(),
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
arg0: None,
|
||||
};
|
||||
// Re-parse and verify the patch so we can compute changes and approval.
|
||||
// Avoid building temporary ExecParams/command vectors; derive directly from inputs.
|
||||
let cwd = turn.cwd.clone();
|
||||
let command = vec!["apply_patch".to_string(), patch_input.clone()];
|
||||
match codex_apply_patch::maybe_parse_apply_patch_verified(&command, &cwd) {
|
||||
codex_apply_patch::MaybeApplyPatchVerified::Body(changes) => {
|
||||
match apply_patch::apply_patch(session.as_ref(), turn.as_ref(), &call_id, changes)
|
||||
.await
|
||||
{
|
||||
InternalApplyPatchInvocation::Output(item) => {
|
||||
let content = item?;
|
||||
Ok(ToolOutput::Function {
|
||||
content,
|
||||
success: Some(true),
|
||||
})
|
||||
}
|
||||
InternalApplyPatchInvocation::DelegateToExec(apply) => {
|
||||
let emitter = ToolEmitter::apply_patch(
|
||||
convert_apply_patch_to_protocol(&apply.action),
|
||||
!apply.user_explicitly_approved_this_action,
|
||||
);
|
||||
let event_ctx = ToolEventCtx::new(
|
||||
session.as_ref(),
|
||||
turn.as_ref(),
|
||||
&call_id,
|
||||
Some(&tracker),
|
||||
);
|
||||
emitter.begin(event_ctx).await;
|
||||
|
||||
let content = handle_container_exec_with_params(
|
||||
tool_name.as_str(),
|
||||
exec_params,
|
||||
Arc::clone(&session),
|
||||
Arc::clone(&turn),
|
||||
Arc::clone(&tracker),
|
||||
call_id.clone(),
|
||||
)
|
||||
.await?;
|
||||
let req = ApplyPatchRequest {
|
||||
patch: apply.action.patch.clone(),
|
||||
cwd: apply.action.cwd.clone(),
|
||||
timeout_ms: None,
|
||||
user_explicitly_approved: apply.user_explicitly_approved_this_action,
|
||||
codex_exe: turn.codex_linux_sandbox_exe.clone(),
|
||||
};
|
||||
|
||||
Ok(ToolOutput::Function {
|
||||
content,
|
||||
success: Some(true),
|
||||
})
|
||||
let mut orchestrator = ToolOrchestrator::new();
|
||||
let mut runtime = ApplyPatchRuntime::new();
|
||||
let tool_ctx = ToolCtx {
|
||||
session: session.as_ref(),
|
||||
turn: turn.as_ref(),
|
||||
call_id: call_id.clone(),
|
||||
tool_name: tool_name.to_string(),
|
||||
};
|
||||
let out = orchestrator
|
||||
.run(&mut runtime, &req, &tool_ctx, &turn, turn.approval_policy)
|
||||
.await;
|
||||
let event_ctx = ToolEventCtx::new(
|
||||
session.as_ref(),
|
||||
turn.as_ref(),
|
||||
&call_id,
|
||||
Some(&tracker),
|
||||
);
|
||||
let content = emitter.finish(event_ctx, out).await?;
|
||||
Ok(ToolOutput::Function {
|
||||
content,
|
||||
success: Some(true),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
codex_apply_patch::MaybeApplyPatchVerified::CorrectnessError(parse_error) => {
|
||||
Err(FunctionCallError::RespondToModel(format!(
|
||||
"apply_patch verification failed: {parse_error}"
|
||||
)))
|
||||
}
|
||||
codex_apply_patch::MaybeApplyPatchVerified::ShellParseError(error) => {
|
||||
tracing::trace!("Failed to parse apply_patch input, {error:?}");
|
||||
Err(FunctionCallError::RespondToModel(
|
||||
"apply_patch handler received invalid patch input".to_string(),
|
||||
))
|
||||
}
|
||||
codex_apply_patch::MaybeApplyPatchVerified::NotApplyPatch => {
|
||||
Err(FunctionCallError::RespondToModel(
|
||||
"apply_patch handler received non-apply_patch input".to_string(),
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -2,6 +2,9 @@ use async_trait::async_trait;
|
||||
use codex_protocol::models::ShellToolCallParams;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::apply_patch;
|
||||
use crate::apply_patch::InternalApplyPatchInvocation;
|
||||
use crate::apply_patch::convert_apply_patch_to_protocol;
|
||||
use crate::codex::TurnContext;
|
||||
use crate::exec::ExecParams;
|
||||
use crate::exec_env::create_env;
|
||||
@@ -9,9 +12,16 @@ use crate::function_tool::FunctionCallError;
|
||||
use crate::tools::context::ToolInvocation;
|
||||
use crate::tools::context::ToolOutput;
|
||||
use crate::tools::context::ToolPayload;
|
||||
use crate::tools::handle_container_exec_with_params;
|
||||
use crate::tools::events::ToolEmitter;
|
||||
use crate::tools::events::ToolEventCtx;
|
||||
use crate::tools::orchestrator::ToolOrchestrator;
|
||||
use crate::tools::registry::ToolHandler;
|
||||
use crate::tools::registry::ToolKind;
|
||||
use crate::tools::runtimes::apply_patch::ApplyPatchRequest;
|
||||
use crate::tools::runtimes::apply_patch::ApplyPatchRuntime;
|
||||
use crate::tools::runtimes::shell::ShellRequest;
|
||||
use crate::tools::runtimes::shell::ShellRuntime;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
|
||||
pub struct ShellHandler;
|
||||
|
||||
@@ -61,35 +71,27 @@ impl ToolHandler for ShellHandler {
|
||||
))
|
||||
})?;
|
||||
let exec_params = Self::to_exec_params(params, turn.as_ref());
|
||||
let content = handle_container_exec_with_params(
|
||||
Self::run_exec_like(
|
||||
tool_name.as_str(),
|
||||
exec_params,
|
||||
Arc::clone(&session),
|
||||
Arc::clone(&turn),
|
||||
Arc::clone(&tracker),
|
||||
call_id.clone(),
|
||||
session,
|
||||
turn,
|
||||
tracker,
|
||||
call_id,
|
||||
)
|
||||
.await?;
|
||||
Ok(ToolOutput::Function {
|
||||
content,
|
||||
success: Some(true),
|
||||
})
|
||||
.await
|
||||
}
|
||||
ToolPayload::LocalShell { params } => {
|
||||
let exec_params = Self::to_exec_params(params, turn.as_ref());
|
||||
let content = handle_container_exec_with_params(
|
||||
Self::run_exec_like(
|
||||
tool_name.as_str(),
|
||||
exec_params,
|
||||
Arc::clone(&session),
|
||||
Arc::clone(&turn),
|
||||
Arc::clone(&tracker),
|
||||
call_id.clone(),
|
||||
session,
|
||||
turn,
|
||||
tracker,
|
||||
call_id,
|
||||
)
|
||||
.await?;
|
||||
Ok(ToolOutput::Function {
|
||||
content,
|
||||
success: Some(true),
|
||||
})
|
||||
.await
|
||||
}
|
||||
_ => Err(FunctionCallError::RespondToModel(format!(
|
||||
"unsupported payload for shell handler: {tool_name}"
|
||||
@@ -97,3 +99,134 @@ impl ToolHandler for ShellHandler {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ShellHandler {
|
||||
async fn run_exec_like(
|
||||
tool_name: &str,
|
||||
exec_params: ExecParams,
|
||||
session: Arc<crate::codex::Session>,
|
||||
turn: Arc<TurnContext>,
|
||||
tracker: crate::tools::context::SharedTurnDiffTracker,
|
||||
call_id: String,
|
||||
) -> Result<ToolOutput, FunctionCallError> {
|
||||
// Approval policy guard for explicit escalation in non-OnRequest modes.
|
||||
if exec_params.with_escalated_permissions.unwrap_or(false)
|
||||
&& !matches!(
|
||||
turn.approval_policy,
|
||||
codex_protocol::protocol::AskForApproval::OnRequest
|
||||
)
|
||||
{
|
||||
return Err(FunctionCallError::RespondToModel(format!(
|
||||
"approval policy is {policy:?}; reject command — you should not ask for escalated permissions if the approval policy is {policy:?}",
|
||||
policy = turn.approval_policy
|
||||
)));
|
||||
}
|
||||
|
||||
// Intercept apply_patch if present.
|
||||
match codex_apply_patch::maybe_parse_apply_patch_verified(
|
||||
&exec_params.command,
|
||||
&exec_params.cwd,
|
||||
) {
|
||||
codex_apply_patch::MaybeApplyPatchVerified::Body(changes) => {
|
||||
match apply_patch::apply_patch(session.as_ref(), turn.as_ref(), &call_id, changes)
|
||||
.await
|
||||
{
|
||||
InternalApplyPatchInvocation::Output(item) => {
|
||||
// Programmatic apply_patch path; return its result.
|
||||
let content = item?;
|
||||
return Ok(ToolOutput::Function {
|
||||
content,
|
||||
success: Some(true),
|
||||
});
|
||||
}
|
||||
InternalApplyPatchInvocation::DelegateToExec(apply) => {
|
||||
let emitter = ToolEmitter::apply_patch(
|
||||
convert_apply_patch_to_protocol(&apply.action),
|
||||
!apply.user_explicitly_approved_this_action,
|
||||
);
|
||||
let event_ctx = ToolEventCtx::new(
|
||||
session.as_ref(),
|
||||
turn.as_ref(),
|
||||
&call_id,
|
||||
Some(&tracker),
|
||||
);
|
||||
emitter.begin(event_ctx).await;
|
||||
|
||||
let req = ApplyPatchRequest {
|
||||
patch: apply.action.patch.clone(),
|
||||
cwd: apply.action.cwd.clone(),
|
||||
timeout_ms: exec_params.timeout_ms,
|
||||
user_explicitly_approved: apply.user_explicitly_approved_this_action,
|
||||
codex_exe: turn.codex_linux_sandbox_exe.clone(),
|
||||
};
|
||||
let mut orchestrator = ToolOrchestrator::new();
|
||||
let mut runtime = ApplyPatchRuntime::new();
|
||||
let tool_ctx = ToolCtx {
|
||||
session: session.as_ref(),
|
||||
turn: turn.as_ref(),
|
||||
call_id: call_id.clone(),
|
||||
tool_name: tool_name.to_string(),
|
||||
};
|
||||
let out = orchestrator
|
||||
.run(&mut runtime, &req, &tool_ctx, &turn, turn.approval_policy)
|
||||
.await;
|
||||
let event_ctx = ToolEventCtx::new(
|
||||
session.as_ref(),
|
||||
turn.as_ref(),
|
||||
&call_id,
|
||||
Some(&tracker),
|
||||
);
|
||||
let content = emitter.finish(event_ctx, out).await?;
|
||||
return Ok(ToolOutput::Function {
|
||||
content,
|
||||
success: Some(true),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
codex_apply_patch::MaybeApplyPatchVerified::CorrectnessError(parse_error) => {
|
||||
return Err(FunctionCallError::RespondToModel(format!(
|
||||
"apply_patch verification failed: {parse_error}"
|
||||
)));
|
||||
}
|
||||
codex_apply_patch::MaybeApplyPatchVerified::ShellParseError(error) => {
|
||||
tracing::trace!("Failed to parse shell command, {error:?}");
|
||||
// Fall through to regular shell execution.
|
||||
}
|
||||
codex_apply_patch::MaybeApplyPatchVerified::NotApplyPatch => {
|
||||
// Fall through to regular shell execution.
|
||||
}
|
||||
}
|
||||
|
||||
// Regular shell execution path.
|
||||
let emitter = ToolEmitter::shell(exec_params.command.clone(), exec_params.cwd.clone());
|
||||
let event_ctx = ToolEventCtx::new(session.as_ref(), turn.as_ref(), &call_id, None);
|
||||
emitter.begin(event_ctx).await;
|
||||
|
||||
let req = ShellRequest {
|
||||
command: exec_params.command.clone(),
|
||||
cwd: exec_params.cwd.clone(),
|
||||
timeout_ms: exec_params.timeout_ms,
|
||||
env: exec_params.env.clone(),
|
||||
with_escalated_permissions: exec_params.with_escalated_permissions,
|
||||
justification: exec_params.justification.clone(),
|
||||
};
|
||||
let mut orchestrator = ToolOrchestrator::new();
|
||||
let mut runtime = ShellRuntime::new();
|
||||
let tool_ctx = ToolCtx {
|
||||
session: session.as_ref(),
|
||||
turn: turn.as_ref(),
|
||||
call_id: call_id.clone(),
|
||||
tool_name: tool_name.to_string(),
|
||||
};
|
||||
let out = orchestrator
|
||||
.run(&mut runtime, &req, &tool_ctx, &turn, turn.approval_policy)
|
||||
.await;
|
||||
let event_ctx = ToolEventCtx::new(session.as_ref(), turn.as_ref(), &call_id, None);
|
||||
let content = emitter.finish(event_ctx, out).await?;
|
||||
Ok(ToolOutput::Function {
|
||||
content,
|
||||
success: Some(true),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,35 +1,71 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::protocol::EventMsg;
|
||||
use crate::protocol::ExecCommandOutputDeltaEvent;
|
||||
use crate::protocol::ExecOutputStream;
|
||||
use crate::tools::context::ToolInvocation;
|
||||
use crate::tools::context::ToolOutput;
|
||||
use crate::tools::context::ToolPayload;
|
||||
use crate::tools::events::ToolEmitter;
|
||||
use crate::tools::events::ToolEventCtx;
|
||||
use crate::tools::events::ToolEventStage;
|
||||
use crate::tools::registry::ToolHandler;
|
||||
use crate::tools::registry::ToolKind;
|
||||
use crate::unified_exec::UnifiedExecRequest;
|
||||
use crate::unified_exec::ExecCommandRequest;
|
||||
use crate::unified_exec::UnifiedExecContext;
|
||||
use crate::unified_exec::UnifiedExecResponse;
|
||||
use crate::unified_exec::UnifiedExecSessionManager;
|
||||
use crate::unified_exec::WriteStdinRequest;
|
||||
|
||||
pub struct UnifiedExecHandler;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct UnifiedExecArgs {
|
||||
input: Vec<String>,
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ExecCommandArgs {
|
||||
cmd: String,
|
||||
#[serde(default = "default_shell")]
|
||||
shell: String,
|
||||
#[serde(default = "default_login")]
|
||||
login: bool,
|
||||
#[serde(default)]
|
||||
session_id: Option<String>,
|
||||
yield_time_ms: Option<u64>,
|
||||
#[serde(default)]
|
||||
timeout_ms: Option<u64>,
|
||||
max_output_tokens: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct WriteStdinArgs {
|
||||
session_id: i32,
|
||||
#[serde(default)]
|
||||
chars: String,
|
||||
#[serde(default)]
|
||||
yield_time_ms: Option<u64>,
|
||||
#[serde(default)]
|
||||
max_output_tokens: Option<usize>,
|
||||
}
|
||||
|
||||
fn default_shell() -> String {
|
||||
"/bin/bash".to_string()
|
||||
}
|
||||
|
||||
fn default_login() -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ToolHandler for UnifiedExecHandler {
|
||||
fn kind(&self) -> ToolKind {
|
||||
ToolKind::UnifiedExec
|
||||
ToolKind::Function
|
||||
}
|
||||
|
||||
fn matches_kind(&self, payload: &ToolPayload) -> bool {
|
||||
matches!(
|
||||
payload,
|
||||
ToolPayload::UnifiedExec { .. } | ToolPayload::Function { .. }
|
||||
ToolPayload::Function { .. } | ToolPayload::UnifiedExec { .. }
|
||||
)
|
||||
}
|
||||
|
||||
@@ -38,19 +74,14 @@ impl ToolHandler for UnifiedExecHandler {
|
||||
session,
|
||||
turn,
|
||||
call_id,
|
||||
tool_name: _tool_name,
|
||||
tool_name,
|
||||
payload,
|
||||
..
|
||||
} = invocation;
|
||||
|
||||
let args = match payload {
|
||||
ToolPayload::UnifiedExec { arguments } | ToolPayload::Function { arguments } => {
|
||||
serde_json::from_str::<UnifiedExecArgs>(&arguments).map_err(|err| {
|
||||
FunctionCallError::RespondToModel(format!(
|
||||
"failed to parse function arguments: {err:?}"
|
||||
))
|
||||
})?
|
||||
}
|
||||
let arguments = match payload {
|
||||
ToolPayload::Function { arguments } => arguments,
|
||||
ToolPayload::UnifiedExec { arguments } => arguments,
|
||||
_ => {
|
||||
return Err(FunctionCallError::RespondToModel(
|
||||
"unified_exec handler received unsupported payload".to_string(),
|
||||
@@ -58,58 +89,81 @@ impl ToolHandler for UnifiedExecHandler {
|
||||
}
|
||||
};
|
||||
|
||||
let UnifiedExecArgs {
|
||||
input,
|
||||
session_id,
|
||||
timeout_ms,
|
||||
} = args;
|
||||
let manager: &UnifiedExecSessionManager = &session.services.unified_exec_manager;
|
||||
let context = UnifiedExecContext::new(session.clone(), turn.clone(), call_id.clone());
|
||||
|
||||
let parsed_session_id = if let Some(session_id) = session_id {
|
||||
match session_id.parse::<i32>() {
|
||||
Ok(parsed) => Some(parsed),
|
||||
Err(output) => {
|
||||
return Err(FunctionCallError::RespondToModel(format!(
|
||||
"invalid session_id: {session_id} due to error {output:?}"
|
||||
)));
|
||||
}
|
||||
let response = match tool_name.as_str() {
|
||||
"exec_command" => {
|
||||
let args: ExecCommandArgs = serde_json::from_str(&arguments).map_err(|err| {
|
||||
FunctionCallError::RespondToModel(format!(
|
||||
"failed to parse exec_command arguments: {err:?}"
|
||||
))
|
||||
})?;
|
||||
|
||||
let event_ctx = ToolEventCtx::new(
|
||||
context.session.as_ref(),
|
||||
context.turn.as_ref(),
|
||||
&context.call_id,
|
||||
None,
|
||||
);
|
||||
let emitter =
|
||||
ToolEmitter::unified_exec(args.cmd.clone(), context.turn.cwd.clone(), true);
|
||||
emitter.emit(event_ctx, ToolEventStage::Begin).await;
|
||||
|
||||
manager
|
||||
.exec_command(
|
||||
ExecCommandRequest {
|
||||
command: &args.cmd,
|
||||
shell: &args.shell,
|
||||
login: args.login,
|
||||
yield_time_ms: args.yield_time_ms,
|
||||
max_output_tokens: args.max_output_tokens,
|
||||
},
|
||||
&context,
|
||||
)
|
||||
.await
|
||||
.map_err(|err| {
|
||||
FunctionCallError::RespondToModel(format!("exec_command failed: {err:?}"))
|
||||
})?
|
||||
}
|
||||
"write_stdin" => {
|
||||
let args: WriteStdinArgs = serde_json::from_str(&arguments).map_err(|err| {
|
||||
FunctionCallError::RespondToModel(format!(
|
||||
"failed to parse write_stdin arguments: {err:?}"
|
||||
))
|
||||
})?;
|
||||
manager
|
||||
.write_stdin(WriteStdinRequest {
|
||||
session_id: args.session_id,
|
||||
input: &args.chars,
|
||||
yield_time_ms: args.yield_time_ms,
|
||||
max_output_tokens: args.max_output_tokens,
|
||||
})
|
||||
.await
|
||||
.map_err(|err| {
|
||||
FunctionCallError::RespondToModel(format!("write_stdin failed: {err:?}"))
|
||||
})?
|
||||
}
|
||||
other => {
|
||||
return Err(FunctionCallError::RespondToModel(format!(
|
||||
"unsupported unified exec function {other}"
|
||||
)));
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let request = UnifiedExecRequest {
|
||||
input_chunks: &input,
|
||||
timeout_ms,
|
||||
};
|
||||
|
||||
let value = session
|
||||
.services
|
||||
.unified_exec_manager
|
||||
.handle_request(
|
||||
request,
|
||||
crate::unified_exec::UnifiedExecContext {
|
||||
session: &session,
|
||||
turn: turn.as_ref(),
|
||||
call_id: &call_id,
|
||||
session_id: parsed_session_id,
|
||||
},
|
||||
)
|
||||
.await
|
||||
.map_err(|err| {
|
||||
FunctionCallError::RespondToModel(format!("unified exec failed: {err:?}"))
|
||||
})?;
|
||||
|
||||
#[derive(serde::Serialize)]
|
||||
struct SerializedUnifiedExecResult {
|
||||
session_id: Option<String>,
|
||||
output: String,
|
||||
// Emit a delta event with the chunk of output we just produced, if any.
|
||||
if !response.output.is_empty() {
|
||||
let delta = ExecCommandOutputDeltaEvent {
|
||||
call_id: response.event_call_id.clone(),
|
||||
stream: ExecOutputStream::Stdout,
|
||||
chunk: response.output.as_bytes().to_vec(),
|
||||
};
|
||||
session
|
||||
.send_event(turn.as_ref(), EventMsg::ExecCommandOutputDelta(delta))
|
||||
.await;
|
||||
}
|
||||
|
||||
let content = serde_json::to_string(&SerializedUnifiedExecResult {
|
||||
session_id: value.session_id.map(|id| id.to_string()),
|
||||
output: value.output,
|
||||
})
|
||||
.map_err(|err| {
|
||||
let content = serialize_response(&response).map_err(|err| {
|
||||
FunctionCallError::RespondToModel(format!(
|
||||
"failed to serialize unified exec output: {err:?}"
|
||||
))
|
||||
@@ -121,3 +175,33 @@ impl ToolHandler for UnifiedExecHandler {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct SerializedUnifiedExecResponse<'a> {
|
||||
chunk_id: &'a str,
|
||||
wall_time_seconds: f64,
|
||||
output: &'a str,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
session_id: Option<i32>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
exit_code: Option<i32>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
original_token_count: Option<usize>,
|
||||
}
|
||||
|
||||
fn serialize_response(response: &UnifiedExecResponse) -> Result<String, serde_json::Error> {
|
||||
let payload = SerializedUnifiedExecResponse {
|
||||
chunk_id: &response.chunk_id,
|
||||
wall_time_seconds: duration_to_seconds(response.wall_time),
|
||||
output: &response.output,
|
||||
session_id: response.session_id,
|
||||
exit_code: response.exit_code,
|
||||
original_token_count: response.original_token_count,
|
||||
};
|
||||
|
||||
serde_json::to_string(&payload)
|
||||
}
|
||||
|
||||
fn duration_to_seconds(duration: Duration) -> f64 {
|
||||
duration.as_secs_f64()
|
||||
}
|
||||
|
||||
@@ -9,37 +9,11 @@ pub mod runtimes;
|
||||
pub mod sandboxing;
|
||||
pub mod spec;
|
||||
|
||||
use crate::apply_patch;
|
||||
use crate::apply_patch::InternalApplyPatchInvocation;
|
||||
use crate::apply_patch::convert_apply_patch_to_protocol;
|
||||
use crate::codex::Session;
|
||||
use crate::codex::TurnContext;
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::SandboxErr;
|
||||
use crate::exec::ExecParams;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::tools::context::SharedTurnDiffTracker;
|
||||
use crate::tools::events::ToolEmitter;
|
||||
use crate::tools::events::ToolEventCtx;
|
||||
use crate::tools::events::ToolEventFailure;
|
||||
use crate::tools::events::ToolEventStage;
|
||||
use crate::tools::orchestrator::ToolOrchestrator;
|
||||
use crate::tools::runtimes::apply_patch::ApplyPatchRequest;
|
||||
use crate::tools::runtimes::apply_patch::ApplyPatchRuntime;
|
||||
use crate::tools::runtimes::shell::ShellRequest;
|
||||
use crate::tools::runtimes::shell::ShellRuntime;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
use codex_apply_patch::MaybeApplyPatchVerified;
|
||||
use codex_apply_patch::maybe_parse_apply_patch_verified;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_utils_string::take_bytes_at_char_boundary;
|
||||
use codex_utils_string::take_last_bytes_at_char_boundary;
|
||||
pub use router::ToolRouter;
|
||||
use serde::Serialize;
|
||||
use std::sync::Arc;
|
||||
use tracing::trace;
|
||||
|
||||
// Model-formatting limits: clients get full streams; only content sent to the model is truncated.
|
||||
pub(crate) const MODEL_FORMAT_MAX_BYTES: usize = 10 * 1024; // 10 KiB
|
||||
@@ -48,192 +22,6 @@ pub(crate) const MODEL_FORMAT_HEAD_LINES: usize = MODEL_FORMAT_MAX_LINES / 2;
|
||||
pub(crate) const MODEL_FORMAT_TAIL_LINES: usize = MODEL_FORMAT_MAX_LINES - MODEL_FORMAT_HEAD_LINES; // 128
|
||||
pub(crate) const MODEL_FORMAT_HEAD_BYTES: usize = MODEL_FORMAT_MAX_BYTES / 2;
|
||||
|
||||
// Telemetry preview limits: keep log events smaller than model budgets.
|
||||
pub(crate) const TELEMETRY_PREVIEW_MAX_BYTES: usize = 2 * 1024; // 2 KiB
|
||||
pub(crate) const TELEMETRY_PREVIEW_MAX_LINES: usize = 64; // lines
|
||||
pub(crate) const TELEMETRY_PREVIEW_TRUNCATION_NOTICE: &str =
|
||||
"[... telemetry preview truncated ...]";
|
||||
|
||||
// TODO(jif) break this down
|
||||
pub(crate) async fn handle_container_exec_with_params(
|
||||
tool_name: &str,
|
||||
params: ExecParams,
|
||||
sess: Arc<Session>,
|
||||
turn_context: Arc<TurnContext>,
|
||||
turn_diff_tracker: SharedTurnDiffTracker,
|
||||
call_id: String,
|
||||
) -> Result<String, FunctionCallError> {
|
||||
let _otel_event_manager = turn_context.client.get_otel_event_manager();
|
||||
|
||||
if params.with_escalated_permissions.unwrap_or(false)
|
||||
&& !matches!(turn_context.approval_policy, AskForApproval::OnRequest)
|
||||
{
|
||||
return Err(FunctionCallError::RespondToModel(format!(
|
||||
"approval policy is {policy:?}; reject command — you should not ask for escalated permissions if the approval policy is {policy:?}",
|
||||
policy = turn_context.approval_policy
|
||||
)));
|
||||
}
|
||||
|
||||
// check if this was a patch, and apply it if so
|
||||
let apply_patch_exec = match maybe_parse_apply_patch_verified(¶ms.command, ¶ms.cwd) {
|
||||
MaybeApplyPatchVerified::Body(changes) => {
|
||||
match apply_patch::apply_patch(sess.as_ref(), turn_context.as_ref(), &call_id, changes)
|
||||
.await
|
||||
{
|
||||
InternalApplyPatchInvocation::Output(item) => return item,
|
||||
InternalApplyPatchInvocation::DelegateToExec(apply_patch_exec) => {
|
||||
Some(apply_patch_exec)
|
||||
}
|
||||
}
|
||||
}
|
||||
MaybeApplyPatchVerified::CorrectnessError(parse_error) => {
|
||||
// It looks like an invocation of `apply_patch`, but we
|
||||
// could not resolve it into a patch that would apply
|
||||
// cleanly. Return to model for resample.
|
||||
return Err(FunctionCallError::RespondToModel(format!(
|
||||
"apply_patch verification failed: {parse_error}"
|
||||
)));
|
||||
}
|
||||
MaybeApplyPatchVerified::ShellParseError(error) => {
|
||||
trace!("Failed to parse shell command, {error:?}");
|
||||
None
|
||||
}
|
||||
MaybeApplyPatchVerified::NotApplyPatch => None,
|
||||
};
|
||||
|
||||
let (event_emitter, diff_opt) = match apply_patch_exec.as_ref() {
|
||||
Some(exec) => (
|
||||
ToolEmitter::apply_patch(
|
||||
convert_apply_patch_to_protocol(&exec.action),
|
||||
!exec.user_explicitly_approved_this_action,
|
||||
),
|
||||
Some(&turn_diff_tracker),
|
||||
),
|
||||
None => (
|
||||
ToolEmitter::shell(params.command.clone(), params.cwd.clone()),
|
||||
None,
|
||||
),
|
||||
};
|
||||
|
||||
let event_ctx = ToolEventCtx::new(sess.as_ref(), turn_context.as_ref(), &call_id, diff_opt);
|
||||
event_emitter.emit(event_ctx, ToolEventStage::Begin).await;
|
||||
|
||||
// Build runtime contexts only when needed (shell/apply_patch below).
|
||||
|
||||
if let Some(exec) = apply_patch_exec {
|
||||
// Route apply_patch execution through the new orchestrator/runtime.
|
||||
let req = ApplyPatchRequest {
|
||||
patch: exec.action.patch.clone(),
|
||||
cwd: params.cwd.clone(),
|
||||
timeout_ms: params.timeout_ms,
|
||||
user_explicitly_approved: exec.user_explicitly_approved_this_action,
|
||||
codex_exe: turn_context.codex_linux_sandbox_exe.clone(),
|
||||
};
|
||||
|
||||
let mut orchestrator = ToolOrchestrator::new();
|
||||
let mut runtime = ApplyPatchRuntime::new();
|
||||
let tool_ctx = ToolCtx {
|
||||
session: sess.as_ref(),
|
||||
turn: turn_context.as_ref(),
|
||||
call_id: call_id.clone(),
|
||||
tool_name: tool_name.to_string(),
|
||||
};
|
||||
|
||||
let out = orchestrator
|
||||
.run(
|
||||
&mut runtime,
|
||||
&req,
|
||||
&tool_ctx,
|
||||
&turn_context,
|
||||
turn_context.approval_policy,
|
||||
)
|
||||
.await;
|
||||
|
||||
handle_exec_outcome(&event_emitter, event_ctx, out).await
|
||||
} else {
|
||||
// Route shell execution through the new orchestrator/runtime.
|
||||
let req = ShellRequest {
|
||||
command: params.command.clone(),
|
||||
cwd: params.cwd.clone(),
|
||||
timeout_ms: params.timeout_ms,
|
||||
env: params.env.clone(),
|
||||
with_escalated_permissions: params.with_escalated_permissions,
|
||||
justification: params.justification.clone(),
|
||||
};
|
||||
|
||||
let mut orchestrator = ToolOrchestrator::new();
|
||||
let mut runtime = ShellRuntime::new();
|
||||
let tool_ctx = ToolCtx {
|
||||
session: sess.as_ref(),
|
||||
turn: turn_context.as_ref(),
|
||||
call_id: call_id.clone(),
|
||||
tool_name: tool_name.to_string(),
|
||||
};
|
||||
|
||||
let out = orchestrator
|
||||
.run(
|
||||
&mut runtime,
|
||||
&req,
|
||||
&tool_ctx,
|
||||
&turn_context,
|
||||
turn_context.approval_policy,
|
||||
)
|
||||
.await;
|
||||
|
||||
handle_exec_outcome(&event_emitter, event_ctx, out).await
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_exec_outcome(
|
||||
event_emitter: &ToolEmitter,
|
||||
event_ctx: ToolEventCtx<'_>,
|
||||
out: Result<ExecToolCallOutput, ToolError>,
|
||||
) -> Result<String, FunctionCallError> {
|
||||
let event;
|
||||
let result = match out {
|
||||
Ok(output) => {
|
||||
let content = format_exec_output_for_model(&output);
|
||||
let exit_code = output.exit_code;
|
||||
event = ToolEventStage::Success(output);
|
||||
if exit_code == 0 {
|
||||
Ok(content)
|
||||
} else {
|
||||
Err(FunctionCallError::RespondToModel(content))
|
||||
}
|
||||
}
|
||||
Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output })))
|
||||
| Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { output }))) => {
|
||||
let response = format_exec_output_for_model(&output);
|
||||
event = ToolEventStage::Failure(ToolEventFailure::Output(*output));
|
||||
Err(FunctionCallError::RespondToModel(response))
|
||||
}
|
||||
Err(ToolError::Codex(err)) => {
|
||||
let message = format!("execution error: {err:?}");
|
||||
let response = format_exec_output(&message);
|
||||
event = ToolEventStage::Failure(ToolEventFailure::Message(message));
|
||||
Err(FunctionCallError::RespondToModel(format_exec_output(
|
||||
&response,
|
||||
)))
|
||||
}
|
||||
Err(ToolError::Rejected(msg)) | Err(ToolError::SandboxDenied(msg)) => {
|
||||
// Normalize common rejection messages for exec tools so tests and
|
||||
// users see a clear, consistent phrase.
|
||||
let normalized = if msg == "rejected by user" {
|
||||
"exec command rejected by user".to_string()
|
||||
} else {
|
||||
msg
|
||||
};
|
||||
let response = format_exec_output(&normalized);
|
||||
event = ToolEventStage::Failure(ToolEventFailure::Message(normalized));
|
||||
Err(FunctionCallError::RespondToModel(format_exec_output(
|
||||
&response,
|
||||
)))
|
||||
}
|
||||
};
|
||||
event_emitter.emit(event_ctx, event).await;
|
||||
result
|
||||
}
|
||||
|
||||
/// Format the combined exec output for sending back to the model.
|
||||
/// Includes exit code and duration metadata; truncates large bodies safely.
|
||||
pub fn format_exec_output_for_model(exec_output: &ExecToolCallOutput) -> String {
|
||||
@@ -363,6 +151,7 @@ fn truncate_formatted_exec_output(content: &str, total_lines: usize) -> String {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use regex_lite::Regex;
|
||||
|
||||
fn truncate_function_error(err: FunctionCallError) -> FunctionCallError {
|
||||
|
||||
@@ -98,9 +98,9 @@ impl ToolOrchestrator {
|
||||
"sandbox denied and no retry".to_string(),
|
||||
));
|
||||
}
|
||||
// Under `Never`, do not retry without sandbox; surface a concise message
|
||||
// Under `Never` or `OnRequest`, do not retry without sandbox; surface a concise message
|
||||
// derived from the actual output (platform-agnostic).
|
||||
if matches!(approval_policy, AskForApproval::Never) {
|
||||
if !tool.wants_no_sandbox_approval(approval_policy) {
|
||||
let msg = build_never_denied_message_from_output(output.as_ref());
|
||||
return Err(ToolError::SandboxDenied(msg));
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ use std::sync::Arc;
|
||||
|
||||
use tokio::sync::RwLock;
|
||||
use tokio_util::either::Either;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tokio_util::task::AbortOnDropHandle;
|
||||
|
||||
use crate::codex::Session;
|
||||
@@ -9,8 +10,10 @@ use crate::codex::TurnContext;
|
||||
use crate::error::CodexErr;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::tools::context::SharedTurnDiffTracker;
|
||||
use crate::tools::context::ToolPayload;
|
||||
use crate::tools::router::ToolCall;
|
||||
use crate::tools::router::ToolRouter;
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
use codex_protocol::models::ResponseInputItem;
|
||||
|
||||
pub(crate) struct ToolCallRuntime {
|
||||
@@ -40,6 +43,7 @@ impl ToolCallRuntime {
|
||||
pub(crate) fn handle_tool_call(
|
||||
&self,
|
||||
call: ToolCall,
|
||||
cancellation_token: CancellationToken,
|
||||
) -> impl std::future::Future<Output = Result<ResponseInputItem, CodexErr>> {
|
||||
let supports_parallel = self.router.tool_supports_parallel(&call.tool_name);
|
||||
|
||||
@@ -48,18 +52,24 @@ impl ToolCallRuntime {
|
||||
let turn = Arc::clone(&self.turn_context);
|
||||
let tracker = Arc::clone(&self.tracker);
|
||||
let lock = Arc::clone(&self.parallel_execution);
|
||||
let aborted_response = Self::aborted_response(&call);
|
||||
|
||||
let handle: AbortOnDropHandle<Result<ResponseInputItem, FunctionCallError>> =
|
||||
AbortOnDropHandle::new(tokio::spawn(async move {
|
||||
let _guard = if supports_parallel {
|
||||
Either::Left(lock.read().await)
|
||||
} else {
|
||||
Either::Right(lock.write().await)
|
||||
};
|
||||
tokio::select! {
|
||||
_ = cancellation_token.cancelled() => Ok(aborted_response),
|
||||
res = async {
|
||||
let _guard = if supports_parallel {
|
||||
Either::Left(lock.read().await)
|
||||
} else {
|
||||
Either::Right(lock.write().await)
|
||||
};
|
||||
|
||||
router
|
||||
.dispatch_tool_call(session, turn, tracker, call)
|
||||
.await
|
||||
router
|
||||
.dispatch_tool_call(session, turn, tracker, call)
|
||||
.await
|
||||
} => res,
|
||||
}
|
||||
}));
|
||||
|
||||
async move {
|
||||
@@ -74,3 +84,25 @@ impl ToolCallRuntime {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ToolCallRuntime {
|
||||
fn aborted_response(call: &ToolCall) -> ResponseInputItem {
|
||||
match &call.payload {
|
||||
ToolPayload::Custom { .. } => ResponseInputItem::CustomToolCallOutput {
|
||||
call_id: call.call_id.clone(),
|
||||
output: "aborted".to_string(),
|
||||
},
|
||||
ToolPayload::Mcp { .. } => ResponseInputItem::McpToolCallOutput {
|
||||
call_id: call.call_id.clone(),
|
||||
result: Err("aborted".to_string()),
|
||||
},
|
||||
_ => ResponseInputItem::FunctionCallOutput {
|
||||
call_id: call.call_id.clone(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: "aborted".to_string(),
|
||||
success: None,
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,7 +15,6 @@ use crate::tools::context::ToolPayload;
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
pub enum ToolKind {
|
||||
Function,
|
||||
UnifiedExec,
|
||||
Mcp,
|
||||
}
|
||||
|
||||
@@ -27,7 +26,6 @@ pub trait ToolHandler: Send + Sync {
|
||||
matches!(
|
||||
(self.kind(), payload),
|
||||
(ToolKind::Function, ToolPayload::Function { .. })
|
||||
| (ToolKind::UnifiedExec, ToolPayload::UnifiedExec { .. })
|
||||
| (ToolKind::Mcp, ToolPayload::Mcp { .. })
|
||||
)
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@ use crate::tools::sandboxing::ToolCtx;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
use crate::tools::sandboxing::ToolRuntime;
|
||||
use crate::tools::sandboxing::with_cached_approval;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use futures::future::BoxFuture;
|
||||
use std::collections::HashMap;
|
||||
@@ -127,6 +128,10 @@ impl Approvable<ApplyPatchRequest> for ApplyPatchRuntime {
|
||||
.await
|
||||
})
|
||||
}
|
||||
|
||||
fn wants_no_sandbox_approval(&self, policy: AskForApproval) -> bool {
|
||||
!matches!(policy, AskForApproval::Never)
|
||||
}
|
||||
}
|
||||
|
||||
impl ToolRuntime<ApplyPatchRequest, ExecToolCallOutput> for ApplyPatchRuntime {
|
||||
|
||||
@@ -121,6 +121,11 @@ pub(crate) trait Approvable<Req> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Decide we can request an approval for no-sandbox execution.
|
||||
fn wants_no_sandbox_approval(&self, policy: AskForApproval) -> bool {
|
||||
!matches!(policy, AskForApproval::Never | AskForApproval::OnRequest)
|
||||
}
|
||||
|
||||
fn start_approval_async<'a>(
|
||||
&'a mut self,
|
||||
req: &'a Req,
|
||||
|
||||
@@ -136,48 +136,99 @@ impl From<JsonSchema> for AdditionalProperties {
|
||||
}
|
||||
}
|
||||
|
||||
fn create_unified_exec_tool() -> ToolSpec {
|
||||
fn create_exec_command_tool() -> ToolSpec {
|
||||
let mut properties = BTreeMap::new();
|
||||
properties.insert(
|
||||
"input".to_string(),
|
||||
JsonSchema::Array {
|
||||
items: Box::new(JsonSchema::String { description: None }),
|
||||
description: Some(
|
||||
"When no session_id is provided, treat the array as the command and arguments \
|
||||
to launch. When session_id is set, concatenate the strings (in order) and write \
|
||||
them to the session's stdin."
|
||||
.to_string(),
|
||||
),
|
||||
},
|
||||
);
|
||||
properties.insert(
|
||||
"session_id".to_string(),
|
||||
"cmd".to_string(),
|
||||
JsonSchema::String {
|
||||
description: Some("Shell command to execute.".to_string()),
|
||||
},
|
||||
);
|
||||
properties.insert(
|
||||
"shell".to_string(),
|
||||
JsonSchema::String {
|
||||
description: Some("Shell binary to launch. Defaults to /bin/bash.".to_string()),
|
||||
},
|
||||
);
|
||||
properties.insert(
|
||||
"login".to_string(),
|
||||
JsonSchema::Boolean {
|
||||
description: Some(
|
||||
"Identifier for an existing interactive session. If omitted, a new command \
|
||||
is spawned."
|
||||
.to_string(),
|
||||
"Whether to run the shell with -l/-i semantics. Defaults to true.".to_string(),
|
||||
),
|
||||
},
|
||||
);
|
||||
properties.insert(
|
||||
"timeout_ms".to_string(),
|
||||
"yield_time_ms".to_string(),
|
||||
JsonSchema::Number {
|
||||
description: Some(
|
||||
"Maximum time in milliseconds to wait for output after writing the input."
|
||||
.to_string(),
|
||||
"How long to wait (in milliseconds) for output before yielding.".to_string(),
|
||||
),
|
||||
},
|
||||
);
|
||||
properties.insert(
|
||||
"max_output_tokens".to_string(),
|
||||
JsonSchema::Number {
|
||||
description: Some(
|
||||
"Maximum number of tokens to return. Excess output will be truncated.".to_string(),
|
||||
),
|
||||
},
|
||||
);
|
||||
|
||||
ToolSpec::Function(ResponsesApiTool {
|
||||
name: "unified_exec".to_string(),
|
||||
name: "exec_command".to_string(),
|
||||
description:
|
||||
"Runs a command in a PTY. Provide a session_id to reuse an existing interactive session.".to_string(),
|
||||
"Runs a command in a PTY, returning output or a session ID for ongoing interaction."
|
||||
.to_string(),
|
||||
strict: false,
|
||||
parameters: JsonSchema::Object {
|
||||
properties,
|
||||
required: Some(vec!["input".to_string()]),
|
||||
required: Some(vec!["cmd".to_string()]),
|
||||
additional_properties: Some(false.into()),
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
fn create_write_stdin_tool() -> ToolSpec {
|
||||
let mut properties = BTreeMap::new();
|
||||
properties.insert(
|
||||
"session_id".to_string(),
|
||||
JsonSchema::Number {
|
||||
description: Some("Identifier of the running unified exec session.".to_string()),
|
||||
},
|
||||
);
|
||||
properties.insert(
|
||||
"chars".to_string(),
|
||||
JsonSchema::String {
|
||||
description: Some("Bytes to write to stdin (may be empty to poll).".to_string()),
|
||||
},
|
||||
);
|
||||
properties.insert(
|
||||
"yield_time_ms".to_string(),
|
||||
JsonSchema::Number {
|
||||
description: Some(
|
||||
"How long to wait (in milliseconds) for output before yielding.".to_string(),
|
||||
),
|
||||
},
|
||||
);
|
||||
properties.insert(
|
||||
"max_output_tokens".to_string(),
|
||||
JsonSchema::Number {
|
||||
description: Some(
|
||||
"Maximum number of tokens to return. Excess output will be truncated.".to_string(),
|
||||
),
|
||||
},
|
||||
);
|
||||
|
||||
ToolSpec::Function(ResponsesApiTool {
|
||||
name: "write_stdin".to_string(),
|
||||
description:
|
||||
"Writes characters to an existing unified exec session and returns recent output."
|
||||
.to_string(),
|
||||
strict: false,
|
||||
parameters: JsonSchema::Object {
|
||||
properties,
|
||||
required: Some(vec!["session_id".to_string()]),
|
||||
additional_properties: Some(false.into()),
|
||||
},
|
||||
})
|
||||
@@ -839,19 +890,20 @@ pub(crate) fn build_specs(
|
||||
|| matches!(config.shell_type, ConfigShellToolType::Streamable);
|
||||
|
||||
if use_unified_exec {
|
||||
builder.push_spec(create_unified_exec_tool());
|
||||
builder.register_handler("unified_exec", unified_exec_handler);
|
||||
} else {
|
||||
match &config.shell_type {
|
||||
ConfigShellToolType::Default => {
|
||||
builder.push_spec(create_shell_tool());
|
||||
}
|
||||
ConfigShellToolType::Local => {
|
||||
builder.push_spec(ToolSpec::LocalShell {});
|
||||
}
|
||||
ConfigShellToolType::Streamable => {
|
||||
// Already handled by use_unified_exec.
|
||||
}
|
||||
builder.push_spec(create_exec_command_tool());
|
||||
builder.push_spec(create_write_stdin_tool());
|
||||
builder.register_handler("exec_command", unified_exec_handler.clone());
|
||||
builder.register_handler("write_stdin", unified_exec_handler);
|
||||
}
|
||||
match &config.shell_type {
|
||||
ConfigShellToolType::Default => {
|
||||
builder.push_spec(create_shell_tool());
|
||||
}
|
||||
ConfigShellToolType::Local => {
|
||||
builder.push_spec(ToolSpec::LocalShell {});
|
||||
}
|
||||
ConfigShellToolType::Streamable => {
|
||||
// Already handled by use_unified_exec.
|
||||
}
|
||||
}
|
||||
|
||||
@@ -967,25 +1019,36 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
fn assert_eq_tool_names(tools: &[ConfiguredToolSpec], expected_names: &[&str]) {
|
||||
let tool_names = tools
|
||||
.iter()
|
||||
.map(|tool| tool_name(&tool.spec))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
assert_eq!(
|
||||
tool_names.len(),
|
||||
expected_names.len(),
|
||||
"tool_name mismatch, {tool_names:?}, {expected_names:?}",
|
||||
// Avoid order-based assertions; compare via set containment instead.
|
||||
fn assert_contains_tool_names(tools: &[ConfiguredToolSpec], expected_subset: &[&str]) {
|
||||
use std::collections::HashSet;
|
||||
let mut names = HashSet::new();
|
||||
let mut duplicates = Vec::new();
|
||||
for name in tools.iter().map(|t| tool_name(&t.spec)) {
|
||||
if !names.insert(name) {
|
||||
duplicates.push(name);
|
||||
}
|
||||
}
|
||||
assert!(
|
||||
duplicates.is_empty(),
|
||||
"duplicate tool entries detected: {duplicates:?}"
|
||||
);
|
||||
for (name, expected_name) in tool_names.iter().zip(expected_names.iter()) {
|
||||
assert_eq!(
|
||||
name, expected_name,
|
||||
"tool_name mismatch, {name:?}, {expected_name:?}"
|
||||
for expected in expected_subset {
|
||||
assert!(
|
||||
names.contains(expected),
|
||||
"expected tool {expected} to be present; had: {names:?}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn shell_tool_name(config: &ToolsConfig) -> Option<&'static str> {
|
||||
match config.shell_type {
|
||||
ConfigShellToolType::Default => Some("shell"),
|
||||
ConfigShellToolType::Local => Some("local_shell"),
|
||||
ConfigShellToolType::Streamable => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn find_tool<'a>(
|
||||
tools: &'a [ConfiguredToolSpec],
|
||||
expected_name: &str,
|
||||
@@ -996,8 +1059,105 @@ mod tests {
|
||||
.unwrap_or_else(|| panic!("expected tool {expected_name}"))
|
||||
}
|
||||
|
||||
fn strip_descriptions_schema(schema: &mut JsonSchema) {
|
||||
match schema {
|
||||
JsonSchema::Boolean { description }
|
||||
| JsonSchema::String { description }
|
||||
| JsonSchema::Number { description } => {
|
||||
*description = None;
|
||||
}
|
||||
JsonSchema::Array { items, description } => {
|
||||
strip_descriptions_schema(items);
|
||||
*description = None;
|
||||
}
|
||||
JsonSchema::Object {
|
||||
properties,
|
||||
required: _,
|
||||
additional_properties,
|
||||
} => {
|
||||
for v in properties.values_mut() {
|
||||
strip_descriptions_schema(v);
|
||||
}
|
||||
if let Some(AdditionalProperties::Schema(s)) = additional_properties {
|
||||
strip_descriptions_schema(s);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn strip_descriptions_tool(spec: &mut ToolSpec) {
|
||||
match spec {
|
||||
ToolSpec::Function(ResponsesApiTool { parameters, .. }) => {
|
||||
strip_descriptions_schema(parameters);
|
||||
}
|
||||
ToolSpec::Freeform(_) | ToolSpec::LocalShell {} | ToolSpec::WebSearch {} => {}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_specs() {
|
||||
fn test_full_toolset_specs_for_gpt5_codex() {
|
||||
let model_family = find_family_for_model("gpt-5-codex")
|
||||
.expect("gpt-5-codex should be a valid model family");
|
||||
let mut features = Features::with_defaults();
|
||||
features.enable(Feature::UnifiedExec);
|
||||
features.enable(Feature::WebSearchRequest);
|
||||
features.enable(Feature::ViewImageTool);
|
||||
let config = ToolsConfig::new(&ToolsConfigParams {
|
||||
model_family: &model_family,
|
||||
features: &features,
|
||||
});
|
||||
let (tools, _) = build_specs(&config, None).build();
|
||||
|
||||
// Build actual map name -> spec
|
||||
use std::collections::BTreeMap;
|
||||
use std::collections::HashSet;
|
||||
let mut actual: BTreeMap<String, ToolSpec> = BTreeMap::new();
|
||||
let mut duplicate_names = Vec::new();
|
||||
for t in &tools {
|
||||
let name = tool_name(&t.spec).to_string();
|
||||
if actual.insert(name.clone(), t.spec.clone()).is_some() {
|
||||
duplicate_names.push(name);
|
||||
}
|
||||
}
|
||||
assert!(
|
||||
duplicate_names.is_empty(),
|
||||
"duplicate tool entries detected: {duplicate_names:?}"
|
||||
);
|
||||
|
||||
// Build expected from the same helpers used by the builder.
|
||||
let mut expected: BTreeMap<String, ToolSpec> = BTreeMap::new();
|
||||
for spec in [
|
||||
create_exec_command_tool(),
|
||||
create_write_stdin_tool(),
|
||||
create_shell_tool(),
|
||||
create_list_mcp_resources_tool(),
|
||||
create_list_mcp_resource_templates_tool(),
|
||||
create_read_mcp_resource_tool(),
|
||||
PLAN_TOOL.clone(),
|
||||
create_apply_patch_freeform_tool(),
|
||||
ToolSpec::WebSearch {},
|
||||
create_view_image_tool(),
|
||||
] {
|
||||
expected.insert(tool_name(&spec).to_string(), spec);
|
||||
}
|
||||
|
||||
// Exact name set match — this is the only test allowed to fail when tools change.
|
||||
let actual_names: HashSet<_> = actual.keys().cloned().collect();
|
||||
let expected_names: HashSet<_> = expected.keys().cloned().collect();
|
||||
assert_eq!(actual_names, expected_names, "tool name set mismatch");
|
||||
|
||||
// Compare specs ignoring human-readable descriptions.
|
||||
for name in expected.keys() {
|
||||
let mut a = actual.get(name).expect("present").clone();
|
||||
let mut e = expected.get(name).expect("present").clone();
|
||||
strip_descriptions_tool(&mut a);
|
||||
strip_descriptions_tool(&mut e);
|
||||
assert_eq!(a, e, "spec mismatch for {name}");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_specs_contains_expected_basics() {
|
||||
let model_family = find_family_for_model("codex-mini-latest")
|
||||
.expect("codex-mini-latest should be a valid model family");
|
||||
let mut features = Features::with_defaults();
|
||||
@@ -1008,23 +1168,25 @@ mod tests {
|
||||
features: &features,
|
||||
});
|
||||
let (tools, _) = build_specs(&config, Some(HashMap::new())).build();
|
||||
|
||||
assert_eq_tool_names(
|
||||
&tools,
|
||||
let tool_names = tools.iter().map(|t| t.spec.name()).collect::<Vec<_>>();
|
||||
assert_eq!(
|
||||
&tool_names,
|
||||
&[
|
||||
"unified_exec",
|
||||
"exec_command",
|
||||
"write_stdin",
|
||||
"local_shell",
|
||||
"list_mcp_resources",
|
||||
"list_mcp_resource_templates",
|
||||
"read_mcp_resource",
|
||||
"update_plan",
|
||||
"web_search",
|
||||
"view_image",
|
||||
],
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_specs_default_shell() {
|
||||
fn test_build_specs_default_shell_present() {
|
||||
let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
|
||||
let mut features = Features::with_defaults();
|
||||
features.enable(Feature::WebSearchRequest);
|
||||
@@ -1035,18 +1197,12 @@ mod tests {
|
||||
});
|
||||
let (tools, _) = build_specs(&config, Some(HashMap::new())).build();
|
||||
|
||||
assert_eq_tool_names(
|
||||
&tools,
|
||||
&[
|
||||
"unified_exec",
|
||||
"list_mcp_resources",
|
||||
"list_mcp_resource_templates",
|
||||
"read_mcp_resource",
|
||||
"update_plan",
|
||||
"web_search",
|
||||
"view_image",
|
||||
],
|
||||
);
|
||||
// Only check the shell variant and a couple of core tools.
|
||||
let mut subset = vec!["exec_command", "write_stdin", "update_plan"];
|
||||
if let Some(shell_tool) = shell_tool_name(&config) {
|
||||
subset.push(shell_tool);
|
||||
}
|
||||
assert_contains_tool_names(&tools, &subset);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -1063,7 +1219,8 @@ mod tests {
|
||||
});
|
||||
let (tools, _) = build_specs(&config, None).build();
|
||||
|
||||
assert!(!find_tool(&tools, "unified_exec").supports_parallel_tool_calls);
|
||||
assert!(!find_tool(&tools, "exec_command").supports_parallel_tool_calls);
|
||||
assert!(!find_tool(&tools, "write_stdin").supports_parallel_tool_calls);
|
||||
assert!(find_tool(&tools, "grep_files").supports_parallel_tool_calls);
|
||||
assert!(find_tool(&tools, "list_dir").supports_parallel_tool_calls);
|
||||
assert!(find_tool(&tools, "read_file").supports_parallel_tool_calls);
|
||||
@@ -1100,7 +1257,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_specs_mcp_tools() {
|
||||
fn test_build_specs_mcp_tools_converted() {
|
||||
let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
|
||||
let mut features = Features::with_defaults();
|
||||
features.enable(Feature::UnifiedExec);
|
||||
@@ -1148,20 +1305,6 @@ mod tests {
|
||||
)
|
||||
.build();
|
||||
|
||||
assert_eq_tool_names(
|
||||
&tools,
|
||||
&[
|
||||
"unified_exec",
|
||||
"list_mcp_resources",
|
||||
"list_mcp_resource_templates",
|
||||
"read_mcp_resource",
|
||||
"update_plan",
|
||||
"web_search",
|
||||
"view_image",
|
||||
"test_server/do_something_cool",
|
||||
],
|
||||
);
|
||||
|
||||
let tool = find_tool(&tools, "test_server/do_something_cool");
|
||||
assert_eq!(
|
||||
&tool.spec,
|
||||
@@ -1267,21 +1410,19 @@ mod tests {
|
||||
]);
|
||||
|
||||
let (tools, _) = build_specs(&config, Some(tools_map)).build();
|
||||
// Expect unified_exec first, followed by MCP tools sorted by fully-qualified name.
|
||||
assert_eq_tool_names(
|
||||
&tools,
|
||||
&[
|
||||
"unified_exec",
|
||||
"list_mcp_resources",
|
||||
"list_mcp_resource_templates",
|
||||
"read_mcp_resource",
|
||||
"update_plan",
|
||||
"view_image",
|
||||
"test_server/cool",
|
||||
"test_server/do",
|
||||
"test_server/something",
|
||||
],
|
||||
);
|
||||
|
||||
// Only assert that the MCP tools themselves are sorted by fully-qualified name.
|
||||
let mcp_names: Vec<_> = tools
|
||||
.iter()
|
||||
.map(|t| tool_name(&t.spec).to_string())
|
||||
.filter(|n| n.starts_with("test_server/"))
|
||||
.collect();
|
||||
let expected = vec![
|
||||
"test_server/cool".to_string(),
|
||||
"test_server/do".to_string(),
|
||||
"test_server/something".to_string(),
|
||||
];
|
||||
assert_eq!(mcp_names, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -1320,23 +1461,9 @@ mod tests {
|
||||
)
|
||||
.build();
|
||||
|
||||
assert_eq_tool_names(
|
||||
&tools,
|
||||
&[
|
||||
"unified_exec",
|
||||
"list_mcp_resources",
|
||||
"list_mcp_resource_templates",
|
||||
"read_mcp_resource",
|
||||
"update_plan",
|
||||
"apply_patch",
|
||||
"web_search",
|
||||
"view_image",
|
||||
"dash/search",
|
||||
],
|
||||
);
|
||||
|
||||
let tool = find_tool(&tools, "dash/search");
|
||||
assert_eq!(
|
||||
tools[8].spec,
|
||||
tool.spec,
|
||||
ToolSpec::Function(ResponsesApiTool {
|
||||
name: "dash/search".to_string(),
|
||||
parameters: JsonSchema::Object {
|
||||
@@ -1389,22 +1516,9 @@ mod tests {
|
||||
)
|
||||
.build();
|
||||
|
||||
assert_eq_tool_names(
|
||||
&tools,
|
||||
&[
|
||||
"unified_exec",
|
||||
"list_mcp_resources",
|
||||
"list_mcp_resource_templates",
|
||||
"read_mcp_resource",
|
||||
"update_plan",
|
||||
"apply_patch",
|
||||
"web_search",
|
||||
"view_image",
|
||||
"dash/paginate",
|
||||
],
|
||||
);
|
||||
let tool = find_tool(&tools, "dash/paginate");
|
||||
assert_eq!(
|
||||
tools[8].spec,
|
||||
tool.spec,
|
||||
ToolSpec::Function(ResponsesApiTool {
|
||||
name: "dash/paginate".to_string(),
|
||||
parameters: JsonSchema::Object {
|
||||
@@ -1456,22 +1570,9 @@ mod tests {
|
||||
)
|
||||
.build();
|
||||
|
||||
assert_eq_tool_names(
|
||||
&tools,
|
||||
&[
|
||||
"unified_exec",
|
||||
"list_mcp_resources",
|
||||
"list_mcp_resource_templates",
|
||||
"read_mcp_resource",
|
||||
"update_plan",
|
||||
"apply_patch",
|
||||
"web_search",
|
||||
"view_image",
|
||||
"dash/tags",
|
||||
],
|
||||
);
|
||||
let tool = find_tool(&tools, "dash/tags");
|
||||
assert_eq!(
|
||||
tools[8].spec,
|
||||
tool.spec,
|
||||
ToolSpec::Function(ResponsesApiTool {
|
||||
name: "dash/tags".to_string(),
|
||||
parameters: JsonSchema::Object {
|
||||
@@ -1525,22 +1626,9 @@ mod tests {
|
||||
)
|
||||
.build();
|
||||
|
||||
assert_eq_tool_names(
|
||||
&tools,
|
||||
&[
|
||||
"unified_exec",
|
||||
"list_mcp_resources",
|
||||
"list_mcp_resource_templates",
|
||||
"read_mcp_resource",
|
||||
"update_plan",
|
||||
"apply_patch",
|
||||
"web_search",
|
||||
"view_image",
|
||||
"dash/value",
|
||||
],
|
||||
);
|
||||
let tool = find_tool(&tools, "dash/value");
|
||||
assert_eq!(
|
||||
tools[8].spec,
|
||||
tool.spec,
|
||||
ToolSpec::Function(ResponsesApiTool {
|
||||
name: "dash/value".to_string(),
|
||||
parameters: JsonSchema::Object {
|
||||
@@ -1631,23 +1719,9 @@ mod tests {
|
||||
)
|
||||
.build();
|
||||
|
||||
assert_eq_tool_names(
|
||||
&tools,
|
||||
&[
|
||||
"unified_exec",
|
||||
"list_mcp_resources",
|
||||
"list_mcp_resource_templates",
|
||||
"read_mcp_resource",
|
||||
"update_plan",
|
||||
"apply_patch",
|
||||
"web_search",
|
||||
"view_image",
|
||||
"test_server/do_something_cool",
|
||||
],
|
||||
);
|
||||
|
||||
let tool = find_tool(&tools, "test_server/do_something_cool");
|
||||
assert_eq!(
|
||||
tools[8].spec,
|
||||
tool.spec,
|
||||
ToolSpec::Function(ResponsesApiTool {
|
||||
name: "test_server/do_something_cool".to_string(),
|
||||
parameters: JsonSchema::Object {
|
||||
|
||||
@@ -1,18 +1,35 @@
|
||||
//! Utilities for truncating large chunks of output while preserving a prefix
|
||||
//! and suffix on UTF-8 boundaries.
|
||||
|
||||
use codex_utils_tokenizer::Tokenizer;
|
||||
|
||||
/// Truncate the middle of a UTF-8 string to at most `max_bytes` bytes,
|
||||
/// preserving the beginning and the end. Returns the possibly truncated
|
||||
/// string and `Some(original_token_count)` (estimated at 4 bytes/token)
|
||||
/// string and `Some(original_token_count)` (counted with the local tokenizer;
|
||||
/// falls back to a 4-bytes-per-token estimate if the tokenizer cannot load)
|
||||
/// if truncation occurred; otherwise returns the original string and `None`.
|
||||
pub(crate) fn truncate_middle(s: &str, max_bytes: usize) -> (String, Option<u64>) {
|
||||
if s.len() <= max_bytes {
|
||||
return (s.to_string(), None);
|
||||
}
|
||||
|
||||
let est_tokens = (s.len() as u64).div_ceil(4);
|
||||
// Build a tokenizer for counting (default to o200k_base; fall back to cl100k_base).
|
||||
// If both fail, fall back to a 4-bytes-per-token estimate.
|
||||
let tok = Tokenizer::try_default().ok();
|
||||
let token_count = |text: &str| -> u64 {
|
||||
if let Some(ref t) = tok {
|
||||
t.count(text) as u64
|
||||
} else {
|
||||
(text.len() as u64).div_ceil(4)
|
||||
}
|
||||
};
|
||||
|
||||
let total_tokens = token_count(s);
|
||||
if max_bytes == 0 {
|
||||
return (format!("…{est_tokens} tokens truncated…"), Some(est_tokens));
|
||||
return (
|
||||
format!("…{total_tokens} tokens truncated…"),
|
||||
Some(total_tokens),
|
||||
);
|
||||
}
|
||||
|
||||
fn truncate_on_boundary(input: &str, max_len: usize) -> &str {
|
||||
@@ -50,13 +67,17 @@ pub(crate) fn truncate_middle(s: &str, max_bytes: usize) -> (String, Option<u64>
|
||||
idx
|
||||
}
|
||||
|
||||
let mut guess_tokens = est_tokens;
|
||||
// Iterate to stabilize marker length → keep budget → boundaries.
|
||||
let mut guess_tokens: u64 = 1;
|
||||
for _ in 0..4 {
|
||||
let marker = format!("…{guess_tokens} tokens truncated…");
|
||||
let marker_len = marker.len();
|
||||
let keep_budget = max_bytes.saturating_sub(marker_len);
|
||||
if keep_budget == 0 {
|
||||
return (format!("…{est_tokens} tokens truncated…"), Some(est_tokens));
|
||||
return (
|
||||
format!("…{total_tokens} tokens truncated…"),
|
||||
Some(total_tokens),
|
||||
);
|
||||
}
|
||||
|
||||
let left_budget = keep_budget / 2;
|
||||
@@ -67,59 +88,72 @@ pub(crate) fn truncate_middle(s: &str, max_bytes: usize) -> (String, Option<u64>
|
||||
suffix_start = prefix_end;
|
||||
}
|
||||
|
||||
let kept_content_bytes = prefix_end + (s.len() - suffix_start);
|
||||
let truncated_content_bytes = s.len().saturating_sub(kept_content_bytes);
|
||||
let new_tokens = (truncated_content_bytes as u64).div_ceil(4);
|
||||
// Tokens actually removed (middle slice) using the real tokenizer.
|
||||
let removed_tokens = token_count(&s[prefix_end..suffix_start]);
|
||||
|
||||
if new_tokens == guess_tokens {
|
||||
let mut out = String::with_capacity(marker_len + kept_content_bytes + 1);
|
||||
// If the number of digits in the token count does not change the marker length,
|
||||
// we can finalize output.
|
||||
let final_marker = format!("…{removed_tokens} tokens truncated…");
|
||||
if final_marker.len() == marker_len {
|
||||
let kept_content_bytes = prefix_end + (s.len() - suffix_start);
|
||||
let mut out = String::with_capacity(final_marker.len() + kept_content_bytes + 1);
|
||||
out.push_str(&s[..prefix_end]);
|
||||
out.push_str(&marker);
|
||||
out.push_str(&final_marker);
|
||||
out.push('\n');
|
||||
out.push_str(&s[suffix_start..]);
|
||||
return (out, Some(est_tokens));
|
||||
return (out, Some(total_tokens));
|
||||
}
|
||||
|
||||
guess_tokens = new_tokens;
|
||||
guess_tokens = removed_tokens;
|
||||
}
|
||||
|
||||
// Fallback build after iterations: compute with the last guess.
|
||||
let marker = format!("…{guess_tokens} tokens truncated…");
|
||||
let marker_len = marker.len();
|
||||
let keep_budget = max_bytes.saturating_sub(marker_len);
|
||||
if keep_budget == 0 {
|
||||
return (format!("…{est_tokens} tokens truncated…"), Some(est_tokens));
|
||||
return (
|
||||
format!("…{total_tokens} tokens truncated…"),
|
||||
Some(total_tokens),
|
||||
);
|
||||
}
|
||||
|
||||
let left_budget = keep_budget / 2;
|
||||
let right_budget = keep_budget - left_budget;
|
||||
let prefix_end = pick_prefix_end(s, left_budget);
|
||||
let suffix_start = pick_suffix_start(s, right_budget);
|
||||
let mut suffix_start = pick_suffix_start(s, right_budget);
|
||||
if suffix_start < prefix_end {
|
||||
suffix_start = prefix_end;
|
||||
}
|
||||
|
||||
let mut out = String::with_capacity(marker_len + prefix_end + (s.len() - suffix_start) + 1);
|
||||
out.push_str(&s[..prefix_end]);
|
||||
out.push_str(&marker);
|
||||
out.push('\n');
|
||||
out.push_str(&s[suffix_start..]);
|
||||
(out, Some(est_tokens))
|
||||
(out, Some(total_tokens))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::truncate_middle;
|
||||
use codex_utils_tokenizer::Tokenizer;
|
||||
|
||||
#[test]
|
||||
fn truncate_middle_no_newlines_fallback() {
|
||||
let tok = Tokenizer::try_default().expect("load tokenizer");
|
||||
let s = "abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ*";
|
||||
let max_bytes = 32;
|
||||
let (out, original) = truncate_middle(s, max_bytes);
|
||||
assert!(out.starts_with("abc"));
|
||||
assert!(out.contains("tokens truncated"));
|
||||
assert!(out.ends_with("XYZ*"));
|
||||
assert_eq!(original, Some((s.len() as u64).div_ceil(4)));
|
||||
assert_eq!(original, Some(tok.count(s) as u64));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_middle_prefers_newline_boundaries() {
|
||||
let tok = Tokenizer::try_default().expect("load tokenizer");
|
||||
let mut s = String::new();
|
||||
for i in 1..=20 {
|
||||
s.push_str(&format!("{i:03}\n"));
|
||||
@@ -131,50 +165,36 @@ mod tests {
|
||||
assert!(out.starts_with("001\n002\n003\n004\n"));
|
||||
assert!(out.contains("tokens truncated"));
|
||||
assert!(out.ends_with("017\n018\n019\n020\n"));
|
||||
assert_eq!(tokens, Some(20));
|
||||
assert_eq!(tokens, Some(tok.count(&s) as u64));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_middle_handles_utf8_content() {
|
||||
let tok = Tokenizer::try_default().expect("load tokenizer");
|
||||
let s = "😀😀😀😀😀😀😀😀😀😀\nsecond line with ascii text\n";
|
||||
let max_bytes = 32;
|
||||
let (out, tokens) = truncate_middle(s, max_bytes);
|
||||
|
||||
assert!(out.contains("tokens truncated"));
|
||||
assert!(!out.contains('\u{fffd}'));
|
||||
assert_eq!(tokens, Some((s.len() as u64).div_ceil(4)));
|
||||
assert_eq!(tokens, Some(tok.count(s) as u64));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_middle_prefers_newline_boundaries_2() {
|
||||
let tok = Tokenizer::try_default().expect("load tokenizer");
|
||||
// Build a multi-line string of 20 numbered lines (each "NNN\n").
|
||||
let mut s = String::new();
|
||||
for i in 1..=20 {
|
||||
s.push_str(&format!("{i:03}\n"));
|
||||
}
|
||||
// Total length: 20 lines * 4 bytes per line = 80 bytes.
|
||||
assert_eq!(s.len(), 80);
|
||||
|
||||
// Choose a cap that forces truncation while leaving room for
|
||||
// a few lines on each side after accounting for the marker.
|
||||
let max_bytes = 64;
|
||||
// Expect exact output: first 4 lines, marker, last 4 lines, and correct token estimate (80/4 = 20).
|
||||
assert_eq!(
|
||||
truncate_middle(&s, max_bytes),
|
||||
(
|
||||
r#"001
|
||||
002
|
||||
003
|
||||
004
|
||||
…12 tokens truncated…
|
||||
017
|
||||
018
|
||||
019
|
||||
020
|
||||
"#
|
||||
.to_string(),
|
||||
Some(20)
|
||||
)
|
||||
);
|
||||
let (out, total) = truncate_middle(&s, max_bytes);
|
||||
assert!(out.starts_with("001\n002\n003\n004\n"));
|
||||
assert!(out.contains("tokens truncated"));
|
||||
assert!(out.ends_with("017\n018\n019\n020\n"));
|
||||
assert_eq!(total, Some(tok.count(&s) as u64));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,8 +22,13 @@
|
||||
//! - `session_manager.rs`: orchestration (approvals, sandboxing, reuse) and request handling.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::AtomicI32;
|
||||
use std::time::Duration;
|
||||
|
||||
use rand::Rng;
|
||||
use rand::rng;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
use crate::codex::Session;
|
||||
@@ -36,45 +41,132 @@ mod session_manager;
|
||||
pub(crate) use errors::UnifiedExecError;
|
||||
pub(crate) use session::UnifiedExecSession;
|
||||
|
||||
const DEFAULT_TIMEOUT_MS: u64 = 1_000;
|
||||
const MAX_TIMEOUT_MS: u64 = 60_000;
|
||||
const UNIFIED_EXEC_OUTPUT_MAX_BYTES: usize = 128 * 1024; // 128 KiB
|
||||
pub(crate) const DEFAULT_YIELD_TIME_MS: u64 = 10_000;
|
||||
pub(crate) const MIN_YIELD_TIME_MS: u64 = 250;
|
||||
pub(crate) const MAX_YIELD_TIME_MS: u64 = 30_000;
|
||||
pub(crate) const DEFAULT_MAX_OUTPUT_TOKENS: usize = 10_000;
|
||||
pub(crate) const UNIFIED_EXEC_OUTPUT_MAX_BYTES: usize = 1024 * 1024; // 1 MiB
|
||||
|
||||
pub(crate) struct UnifiedExecContext<'a> {
|
||||
pub session: &'a Session,
|
||||
pub turn: &'a TurnContext,
|
||||
pub call_id: &'a str,
|
||||
pub session_id: Option<i32>,
|
||||
pub(crate) struct UnifiedExecContext {
|
||||
pub session: Arc<Session>,
|
||||
pub turn: Arc<TurnContext>,
|
||||
pub call_id: String,
|
||||
}
|
||||
|
||||
impl UnifiedExecContext {
|
||||
pub fn new(session: Arc<Session>, turn: Arc<TurnContext>, call_id: String) -> Self {
|
||||
Self {
|
||||
session,
|
||||
turn,
|
||||
call_id,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct UnifiedExecRequest<'a> {
|
||||
pub input_chunks: &'a [String],
|
||||
pub timeout_ms: Option<u64>,
|
||||
pub(crate) struct ExecCommandRequest<'a> {
|
||||
pub command: &'a str,
|
||||
pub shell: &'a str,
|
||||
pub login: bool,
|
||||
pub yield_time_ms: Option<u64>,
|
||||
pub max_output_tokens: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct WriteStdinRequest<'a> {
|
||||
pub session_id: i32,
|
||||
pub input: &'a str,
|
||||
pub yield_time_ms: Option<u64>,
|
||||
pub max_output_tokens: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub(crate) struct UnifiedExecResult {
|
||||
pub session_id: Option<i32>,
|
||||
pub(crate) struct UnifiedExecResponse {
|
||||
pub event_call_id: String,
|
||||
pub chunk_id: String,
|
||||
pub wall_time: Duration,
|
||||
pub output: String,
|
||||
pub session_id: Option<i32>,
|
||||
pub exit_code: Option<i32>,
|
||||
pub original_token_count: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
#[derive(Default)]
|
||||
pub(crate) struct UnifiedExecSessionManager {
|
||||
next_session_id: AtomicI32,
|
||||
sessions: Mutex<HashMap<i32, session::UnifiedExecSession>>,
|
||||
sessions: Mutex<HashMap<i32, SessionEntry>>,
|
||||
}
|
||||
|
||||
struct SessionEntry {
|
||||
session: session::UnifiedExecSession,
|
||||
session_ref: Arc<Session>,
|
||||
turn_ref: Arc<TurnContext>,
|
||||
call_id: String,
|
||||
command: String,
|
||||
cwd: PathBuf,
|
||||
started_at: tokio::time::Instant,
|
||||
}
|
||||
|
||||
pub(crate) fn clamp_yield_time(yield_time_ms: Option<u64>) -> u64 {
|
||||
match yield_time_ms {
|
||||
Some(value) => value.clamp(MIN_YIELD_TIME_MS, MAX_YIELD_TIME_MS),
|
||||
None => DEFAULT_YIELD_TIME_MS,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn resolve_max_tokens(max_tokens: Option<usize>) -> usize {
|
||||
max_tokens.unwrap_or(DEFAULT_MAX_OUTPUT_TOKENS)
|
||||
}
|
||||
|
||||
pub(crate) fn generate_chunk_id() -> String {
|
||||
let mut rng = rng();
|
||||
(0..6)
|
||||
.map(|_| format!("{:x}", rng.random_range(0..16)))
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub(crate) fn truncate_output_to_tokens(
|
||||
output: &str,
|
||||
max_tokens: usize,
|
||||
) -> (String, Option<usize>) {
|
||||
if max_tokens == 0 {
|
||||
let total_tokens = output.chars().count();
|
||||
let message = format!("…{total_tokens} tokens truncated…");
|
||||
return (message, Some(total_tokens));
|
||||
}
|
||||
|
||||
let tokens: Vec<char> = output.chars().collect();
|
||||
let total_tokens = tokens.len();
|
||||
if total_tokens <= max_tokens {
|
||||
return (output.to_string(), None);
|
||||
}
|
||||
|
||||
let half = max_tokens / 2;
|
||||
if half == 0 {
|
||||
let truncated = total_tokens.saturating_sub(max_tokens);
|
||||
let message = format!("…{truncated} tokens truncated…");
|
||||
return (message, Some(total_tokens));
|
||||
}
|
||||
|
||||
let truncated = total_tokens.saturating_sub(half * 2);
|
||||
let mut truncated_output = String::new();
|
||||
truncated_output.extend(&tokens[..half]);
|
||||
truncated_output.push_str(&format!("…{truncated} tokens truncated…"));
|
||||
truncated_output.extend(&tokens[total_tokens - half..]);
|
||||
(truncated_output, Some(total_tokens))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[cfg(unix)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
use crate::codex::Session;
|
||||
use crate::codex::TurnContext;
|
||||
use crate::codex::make_session_and_context;
|
||||
use crate::protocol::AskForApproval;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use crate::unified_exec::ExecCommandRequest;
|
||||
use crate::unified_exec::WriteStdinRequest;
|
||||
use core_test_support::skip_if_sandbox;
|
||||
use std::sync::Arc;
|
||||
use tokio::time::Duration;
|
||||
@@ -88,34 +180,49 @@ mod tests {
|
||||
(Arc::new(session), Arc::new(turn))
|
||||
}
|
||||
|
||||
async fn run_unified_exec_request(
|
||||
async fn exec_command(
|
||||
session: &Arc<Session>,
|
||||
turn: &Arc<TurnContext>,
|
||||
session_id: Option<i32>,
|
||||
input: Vec<String>,
|
||||
timeout_ms: Option<u64>,
|
||||
) -> Result<UnifiedExecResult, UnifiedExecError> {
|
||||
let request_input = input;
|
||||
let request = UnifiedExecRequest {
|
||||
input_chunks: &request_input,
|
||||
timeout_ms,
|
||||
};
|
||||
cmd: &str,
|
||||
yield_time_ms: Option<u64>,
|
||||
) -> Result<UnifiedExecResponse, UnifiedExecError> {
|
||||
let context =
|
||||
UnifiedExecContext::new(Arc::clone(session), Arc::clone(turn), "call".to_string());
|
||||
|
||||
session
|
||||
.services
|
||||
.unified_exec_manager
|
||||
.handle_request(
|
||||
request,
|
||||
UnifiedExecContext {
|
||||
session,
|
||||
turn: turn.as_ref(),
|
||||
call_id: "call",
|
||||
session_id,
|
||||
.exec_command(
|
||||
ExecCommandRequest {
|
||||
command: cmd,
|
||||
shell: "/bin/bash",
|
||||
login: true,
|
||||
yield_time_ms,
|
||||
max_output_tokens: None,
|
||||
},
|
||||
&context,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn write_stdin(
|
||||
session: &Arc<Session>,
|
||||
session_id: i32,
|
||||
input: &str,
|
||||
yield_time_ms: Option<u64>,
|
||||
) -> Result<UnifiedExecResponse, UnifiedExecError> {
|
||||
session
|
||||
.services
|
||||
.unified_exec_manager
|
||||
.write_stdin(WriteStdinRequest {
|
||||
session_id,
|
||||
input,
|
||||
yield_time_ms,
|
||||
max_output_tokens: None,
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn push_chunk_trims_only_excess_bytes() {
|
||||
let mut buffer = OutputBufferState::default();
|
||||
@@ -140,37 +247,28 @@ mod tests {
|
||||
|
||||
let (session, turn) = test_session_and_turn();
|
||||
|
||||
let open_shell = run_unified_exec_request(
|
||||
&session,
|
||||
&turn,
|
||||
None,
|
||||
vec!["bash".to_string(), "-i".to_string()],
|
||||
Some(2_500),
|
||||
)
|
||||
.await?;
|
||||
let open_shell = exec_command(&session, &turn, "bash -i", Some(2_500)).await?;
|
||||
let session_id = open_shell.session_id.expect("expected session_id");
|
||||
|
||||
run_unified_exec_request(
|
||||
write_stdin(
|
||||
&session,
|
||||
&turn,
|
||||
Some(session_id),
|
||||
vec![
|
||||
"export".to_string(),
|
||||
"CODEX_INTERACTIVE_SHELL_VAR=codex\n".to_string(),
|
||||
],
|
||||
session_id,
|
||||
"export CODEX_INTERACTIVE_SHELL_VAR=codex\n",
|
||||
Some(2_500),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let out_2 = run_unified_exec_request(
|
||||
let out_2 = write_stdin(
|
||||
&session,
|
||||
&turn,
|
||||
Some(session_id),
|
||||
vec!["echo $CODEX_INTERACTIVE_SHELL_VAR\n".to_string()],
|
||||
session_id,
|
||||
"echo $CODEX_INTERACTIVE_SHELL_VAR\n",
|
||||
Some(2_500),
|
||||
)
|
||||
.await?;
|
||||
assert!(out_2.output.contains("codex"));
|
||||
assert!(
|
||||
out_2.output.contains("codex"),
|
||||
"expected environment variable output"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -181,47 +279,44 @@ mod tests {
|
||||
|
||||
let (session, turn) = test_session_and_turn();
|
||||
|
||||
let shell_a = run_unified_exec_request(
|
||||
&session,
|
||||
&turn,
|
||||
None,
|
||||
vec!["/bin/bash".to_string(), "-i".to_string()],
|
||||
Some(2_500),
|
||||
)
|
||||
.await?;
|
||||
let shell_a = exec_command(&session, &turn, "bash -i", Some(2_500)).await?;
|
||||
let session_a = shell_a.session_id.expect("expected session id");
|
||||
|
||||
run_unified_exec_request(
|
||||
write_stdin(
|
||||
&session,
|
||||
&turn,
|
||||
Some(session_a),
|
||||
vec!["export CODEX_INTERACTIVE_SHELL_VAR=codex\n".to_string()],
|
||||
session_a,
|
||||
"export CODEX_INTERACTIVE_SHELL_VAR=codex\n",
|
||||
Some(2_500),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let out_2 = run_unified_exec_request(
|
||||
let out_2 = exec_command(
|
||||
&session,
|
||||
&turn,
|
||||
None,
|
||||
vec![
|
||||
"echo".to_string(),
|
||||
"$CODEX_INTERACTIVE_SHELL_VAR\n".to_string(),
|
||||
],
|
||||
"echo $CODEX_INTERACTIVE_SHELL_VAR",
|
||||
Some(2_500),
|
||||
)
|
||||
.await?;
|
||||
assert!(!out_2.output.contains("codex"));
|
||||
assert!(
|
||||
out_2.session_id.is_none(),
|
||||
"short command should not retain a session"
|
||||
);
|
||||
assert!(
|
||||
!out_2.output.contains("codex"),
|
||||
"short command should run in a fresh shell"
|
||||
);
|
||||
|
||||
let out_3 = run_unified_exec_request(
|
||||
let out_3 = write_stdin(
|
||||
&session,
|
||||
&turn,
|
||||
Some(session_a),
|
||||
vec!["echo $CODEX_INTERACTIVE_SHELL_VAR\n".to_string()],
|
||||
session_a,
|
||||
"echo $CODEX_INTERACTIVE_SHELL_VAR\n",
|
||||
Some(2_500),
|
||||
)
|
||||
.await?;
|
||||
assert!(out_3.output.contains("codex"));
|
||||
assert!(
|
||||
out_3.output.contains("codex"),
|
||||
"session should preserve state"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -232,45 +327,37 @@ mod tests {
|
||||
|
||||
let (session, turn) = test_session_and_turn();
|
||||
|
||||
let open_shell = run_unified_exec_request(
|
||||
&session,
|
||||
&turn,
|
||||
None,
|
||||
vec!["bash".to_string(), "-i".to_string()],
|
||||
Some(2_500),
|
||||
)
|
||||
.await?;
|
||||
let open_shell = exec_command(&session, &turn, "bash -i", Some(2_500)).await?;
|
||||
let session_id = open_shell.session_id.expect("expected session id");
|
||||
|
||||
run_unified_exec_request(
|
||||
write_stdin(
|
||||
&session,
|
||||
&turn,
|
||||
Some(session_id),
|
||||
vec![
|
||||
"export".to_string(),
|
||||
"CODEX_INTERACTIVE_SHELL_VAR=codex\n".to_string(),
|
||||
],
|
||||
session_id,
|
||||
"export CODEX_INTERACTIVE_SHELL_VAR=codex\n",
|
||||
Some(2_500),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let out_2 = run_unified_exec_request(
|
||||
let out_2 = write_stdin(
|
||||
&session,
|
||||
&turn,
|
||||
Some(session_id),
|
||||
vec!["sleep 5 && echo $CODEX_INTERACTIVE_SHELL_VAR\n".to_string()],
|
||||
session_id,
|
||||
"sleep 5 && echo $CODEX_INTERACTIVE_SHELL_VAR\n",
|
||||
Some(10),
|
||||
)
|
||||
.await?;
|
||||
assert!(!out_2.output.contains("codex"));
|
||||
assert!(
|
||||
!out_2.output.contains("codex"),
|
||||
"timeout too short should yield incomplete output"
|
||||
);
|
||||
|
||||
tokio::time::sleep(Duration::from_secs(7)).await;
|
||||
|
||||
let out_3 =
|
||||
run_unified_exec_request(&session, &turn, Some(session_id), Vec::new(), Some(100))
|
||||
.await?;
|
||||
let out_3 = write_stdin(&session, session_id, "", Some(100)).await?;
|
||||
|
||||
assert!(out_3.output.contains("codex"));
|
||||
assert!(
|
||||
out_3.output.contains("codex"),
|
||||
"subsequent poll should retrieve output"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -280,18 +367,9 @@ mod tests {
|
||||
async fn requests_with_large_timeout_are_capped() -> anyhow::Result<()> {
|
||||
let (session, turn) = test_session_and_turn();
|
||||
|
||||
let result = run_unified_exec_request(
|
||||
&session,
|
||||
&turn,
|
||||
None,
|
||||
vec!["echo".to_string(), "codex".to_string()],
|
||||
Some(120_000),
|
||||
)
|
||||
.await?;
|
||||
let result = exec_command(&session, &turn, "echo codex", Some(120_000)).await?;
|
||||
|
||||
assert!(result.output.starts_with(
|
||||
"Warning: requested timeout 120000ms exceeds maximum of 60000ms; clamping to 60000ms.\n"
|
||||
));
|
||||
assert!(result.session_id.is_none());
|
||||
assert!(result.output.contains("codex"));
|
||||
|
||||
Ok(())
|
||||
@@ -301,16 +379,12 @@ mod tests {
|
||||
#[ignore] // Ignored while we have a better way to test this.
|
||||
async fn completed_commands_do_not_persist_sessions() -> anyhow::Result<()> {
|
||||
let (session, turn) = test_session_and_turn();
|
||||
let result = run_unified_exec_request(
|
||||
&session,
|
||||
&turn,
|
||||
None,
|
||||
vec!["/bin/echo".to_string(), "codex".to_string()],
|
||||
Some(2_500),
|
||||
)
|
||||
.await?;
|
||||
let result = exec_command(&session, &turn, "echo codex", Some(2_500)).await?;
|
||||
|
||||
assert!(result.session_id.is_none());
|
||||
assert!(
|
||||
result.session_id.is_none(),
|
||||
"completed command should not retain session"
|
||||
);
|
||||
assert!(result.output.contains("codex"));
|
||||
|
||||
assert!(
|
||||
@@ -332,31 +406,16 @@ mod tests {
|
||||
|
||||
let (session, turn) = test_session_and_turn();
|
||||
|
||||
let open_shell = run_unified_exec_request(
|
||||
&session,
|
||||
&turn,
|
||||
None,
|
||||
vec!["/bin/bash".to_string(), "-i".to_string()],
|
||||
Some(2_500),
|
||||
)
|
||||
.await?;
|
||||
let open_shell = exec_command(&session, &turn, "bash -i", Some(2_500)).await?;
|
||||
let session_id = open_shell.session_id.expect("expected session id");
|
||||
|
||||
run_unified_exec_request(
|
||||
&session,
|
||||
&turn,
|
||||
Some(session_id),
|
||||
vec!["exit\n".to_string()],
|
||||
Some(2_500),
|
||||
)
|
||||
.await?;
|
||||
write_stdin(&session, session_id, "exit\n", Some(2_500)).await?;
|
||||
|
||||
tokio::time::sleep(Duration::from_millis(200)).await;
|
||||
|
||||
let err =
|
||||
run_unified_exec_request(&session, &turn, Some(session_id), Vec::new(), Some(100))
|
||||
.await
|
||||
.expect_err("expected unknown session error");
|
||||
let err = write_stdin(&session, session_id, "", Some(100))
|
||||
.await
|
||||
.expect_err("expected unknown session error");
|
||||
|
||||
match err {
|
||||
UnifiedExecError::UnknownSessionId { session_id: err_id } => {
|
||||
|
||||
@@ -5,6 +5,8 @@ use tokio::sync::mpsc;
|
||||
use tokio::time::Duration;
|
||||
use tokio::time::Instant;
|
||||
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::exec::StreamOutput;
|
||||
use crate::exec_env::create_env;
|
||||
use crate::sandboxing::ExecEnv;
|
||||
use crate::tools::events::ToolEmitter;
|
||||
@@ -14,79 +16,272 @@ use crate::tools::orchestrator::ToolOrchestrator;
|
||||
use crate::tools::runtimes::unified_exec::UnifiedExecRequest as UnifiedExecToolRequest;
|
||||
use crate::tools::runtimes::unified_exec::UnifiedExecRuntime;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
use crate::truncate::truncate_middle;
|
||||
|
||||
use super::DEFAULT_TIMEOUT_MS;
|
||||
use super::MAX_TIMEOUT_MS;
|
||||
use super::UNIFIED_EXEC_OUTPUT_MAX_BYTES;
|
||||
use super::ExecCommandRequest;
|
||||
use super::MIN_YIELD_TIME_MS;
|
||||
use super::SessionEntry;
|
||||
use super::UnifiedExecContext;
|
||||
use super::UnifiedExecError;
|
||||
use super::UnifiedExecRequest;
|
||||
use super::UnifiedExecResult;
|
||||
use super::UnifiedExecResponse;
|
||||
use super::UnifiedExecSessionManager;
|
||||
use super::WriteStdinRequest;
|
||||
use super::clamp_yield_time;
|
||||
use super::generate_chunk_id;
|
||||
use super::resolve_max_tokens;
|
||||
use super::session::OutputBuffer;
|
||||
use super::session::UnifiedExecSession;
|
||||
|
||||
pub(super) struct SessionAcquisition {
|
||||
pub(super) session_id: i32,
|
||||
pub(super) writer_tx: mpsc::Sender<Vec<u8>>,
|
||||
pub(super) output_buffer: OutputBuffer,
|
||||
pub(super) output_notify: Arc<Notify>,
|
||||
pub(super) new_session: Option<UnifiedExecSession>,
|
||||
pub(super) reuse_requested: bool,
|
||||
}
|
||||
use super::truncate_output_to_tokens;
|
||||
|
||||
impl UnifiedExecSessionManager {
|
||||
pub(super) async fn acquire_session(
|
||||
pub(crate) async fn exec_command(
|
||||
&self,
|
||||
request: &UnifiedExecRequest<'_>,
|
||||
context: &UnifiedExecContext<'_>,
|
||||
) -> Result<SessionAcquisition, UnifiedExecError> {
|
||||
if let Some(existing_id) = context.session_id {
|
||||
let mut sessions = self.sessions.lock().await;
|
||||
match sessions.get(&existing_id) {
|
||||
Some(session) => {
|
||||
if session.has_exited() {
|
||||
sessions.remove(&existing_id);
|
||||
return Err(UnifiedExecError::UnknownSessionId {
|
||||
session_id: existing_id,
|
||||
});
|
||||
}
|
||||
let (buffer, notify) = session.output_handles();
|
||||
let writer_tx = session.writer_sender();
|
||||
Ok(SessionAcquisition {
|
||||
session_id: existing_id,
|
||||
writer_tx,
|
||||
output_buffer: buffer,
|
||||
output_notify: notify,
|
||||
new_session: None,
|
||||
reuse_requested: true,
|
||||
})
|
||||
}
|
||||
None => Err(UnifiedExecError::UnknownSessionId {
|
||||
session_id: existing_id,
|
||||
}),
|
||||
request: ExecCommandRequest<'_>,
|
||||
context: &UnifiedExecContext,
|
||||
) -> Result<UnifiedExecResponse, UnifiedExecError> {
|
||||
let shell_flag = if request.login { "-lc" } else { "-c" };
|
||||
let command = vec![
|
||||
request.shell.to_string(),
|
||||
shell_flag.to_string(),
|
||||
request.command.to_string(),
|
||||
];
|
||||
|
||||
let session = self.open_session_with_sandbox(command, context).await?;
|
||||
|
||||
let max_tokens = resolve_max_tokens(request.max_output_tokens);
|
||||
let yield_time_ms =
|
||||
clamp_yield_time(Some(request.yield_time_ms.unwrap_or(MIN_YIELD_TIME_MS)));
|
||||
|
||||
let start = Instant::now();
|
||||
let (output_buffer, output_notify) = session.output_handles();
|
||||
let deadline = start + Duration::from_millis(yield_time_ms);
|
||||
let collected =
|
||||
Self::collect_output_until_deadline(&output_buffer, &output_notify, deadline).await;
|
||||
let wall_time = Instant::now().saturating_duration_since(start);
|
||||
|
||||
let text = String::from_utf8_lossy(&collected).to_string();
|
||||
let (output, original_token_count) = truncate_output_to_tokens(&text, max_tokens);
|
||||
let chunk_id = generate_chunk_id();
|
||||
let exit_code = session.exit_code();
|
||||
let session_id = if session.has_exited() {
|
||||
None
|
||||
} else {
|
||||
Some(
|
||||
self.store_session(session, context, request.command, start)
|
||||
.await,
|
||||
)
|
||||
};
|
||||
|
||||
let response = UnifiedExecResponse {
|
||||
event_call_id: context.call_id.clone(),
|
||||
chunk_id,
|
||||
wall_time,
|
||||
output,
|
||||
session_id,
|
||||
exit_code,
|
||||
original_token_count,
|
||||
};
|
||||
|
||||
// If the command completed during this call, emit an ExecCommandEnd via the emitter.
|
||||
if response.session_id.is_none() {
|
||||
let exit = response.exit_code.unwrap_or(-1);
|
||||
Self::emit_exec_end_from_context(
|
||||
context,
|
||||
request.command.to_string(),
|
||||
response.output.clone(),
|
||||
exit,
|
||||
response.wall_time,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
pub(crate) async fn write_stdin(
|
||||
&self,
|
||||
request: WriteStdinRequest<'_>,
|
||||
) -> Result<UnifiedExecResponse, UnifiedExecError> {
|
||||
let session_id = request.session_id;
|
||||
|
||||
let (writer_tx, output_buffer, output_notify) =
|
||||
self.prepare_session_handles(session_id).await?;
|
||||
|
||||
if !request.input.is_empty() {
|
||||
Self::send_input(&writer_tx, request.input.as_bytes()).await?;
|
||||
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||
}
|
||||
|
||||
let max_tokens = resolve_max_tokens(request.max_output_tokens);
|
||||
let yield_time_ms = clamp_yield_time(request.yield_time_ms);
|
||||
let start = Instant::now();
|
||||
let deadline = start + Duration::from_millis(yield_time_ms);
|
||||
let collected =
|
||||
Self::collect_output_until_deadline(&output_buffer, &output_notify, deadline).await;
|
||||
let wall_time = Instant::now().saturating_duration_since(start);
|
||||
|
||||
let text = String::from_utf8_lossy(&collected).to_string();
|
||||
let (output, original_token_count) = truncate_output_to_tokens(&text, max_tokens);
|
||||
let chunk_id = generate_chunk_id();
|
||||
|
||||
let status = self.refresh_session_state(session_id).await;
|
||||
let (session_id, exit_code, completion_entry, event_call_id) = match status {
|
||||
SessionStatus::Alive { exit_code, call_id } => {
|
||||
(Some(session_id), exit_code, None, call_id)
|
||||
}
|
||||
SessionStatus::Exited { exit_code, entry } => {
|
||||
let call_id = entry.call_id.clone();
|
||||
(None, exit_code, Some(*entry), call_id)
|
||||
}
|
||||
SessionStatus::Unknown => {
|
||||
return Err(UnifiedExecError::UnknownSessionId { session_id });
|
||||
}
|
||||
};
|
||||
|
||||
let response = UnifiedExecResponse {
|
||||
event_call_id,
|
||||
chunk_id,
|
||||
wall_time,
|
||||
output,
|
||||
session_id,
|
||||
exit_code,
|
||||
original_token_count,
|
||||
};
|
||||
|
||||
if let (Some(exit), Some(entry)) = (response.exit_code, completion_entry) {
|
||||
let total_duration = Instant::now().saturating_duration_since(entry.started_at);
|
||||
Self::emit_exec_end_from_entry(entry, response.output.clone(), exit, total_duration)
|
||||
.await;
|
||||
}
|
||||
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
async fn refresh_session_state(&self, session_id: i32) -> SessionStatus {
|
||||
let mut sessions = self.sessions.lock().await;
|
||||
let Some(entry) = sessions.get(&session_id) else {
|
||||
return SessionStatus::Unknown;
|
||||
};
|
||||
|
||||
let exit_code = entry.session.exit_code();
|
||||
|
||||
if entry.session.has_exited() {
|
||||
let Some(entry) = sessions.remove(&session_id) else {
|
||||
return SessionStatus::Unknown;
|
||||
};
|
||||
SessionStatus::Exited {
|
||||
exit_code,
|
||||
entry: Box::new(entry),
|
||||
}
|
||||
} else {
|
||||
let new_id = self
|
||||
.next_session_id
|
||||
.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
|
||||
let managed_session = self
|
||||
.open_session_with_sandbox(request.input_chunks.to_vec(), context)
|
||||
.await?;
|
||||
let (buffer, notify) = managed_session.output_handles();
|
||||
let writer_tx = managed_session.writer_sender();
|
||||
Ok(SessionAcquisition {
|
||||
session_id: new_id,
|
||||
writer_tx,
|
||||
output_buffer: buffer,
|
||||
output_notify: notify,
|
||||
new_session: Some(managed_session),
|
||||
reuse_requested: false,
|
||||
})
|
||||
SessionStatus::Alive {
|
||||
exit_code,
|
||||
call_id: entry.call_id.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn prepare_session_handles(
|
||||
&self,
|
||||
session_id: i32,
|
||||
) -> Result<(mpsc::Sender<Vec<u8>>, OutputBuffer, Arc<Notify>), UnifiedExecError> {
|
||||
let sessions = self.sessions.lock().await;
|
||||
let (output_buffer, output_notify, writer_tx) =
|
||||
if let Some(entry) = sessions.get(&session_id) {
|
||||
let (buffer, notify) = entry.session.output_handles();
|
||||
(buffer, notify, entry.session.writer_sender())
|
||||
} else {
|
||||
return Err(UnifiedExecError::UnknownSessionId { session_id });
|
||||
};
|
||||
|
||||
Ok((writer_tx, output_buffer, output_notify))
|
||||
}
|
||||
|
||||
async fn send_input(
|
||||
writer_tx: &mpsc::Sender<Vec<u8>>,
|
||||
data: &[u8],
|
||||
) -> Result<(), UnifiedExecError> {
|
||||
writer_tx
|
||||
.send(data.to_vec())
|
||||
.await
|
||||
.map_err(|_| UnifiedExecError::WriteToStdin)
|
||||
}
|
||||
|
||||
async fn store_session(
|
||||
&self,
|
||||
session: UnifiedExecSession,
|
||||
context: &UnifiedExecContext,
|
||||
command: &str,
|
||||
started_at: Instant,
|
||||
) -> i32 {
|
||||
let session_id = self
|
||||
.next_session_id
|
||||
.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
|
||||
let entry = SessionEntry {
|
||||
session,
|
||||
session_ref: Arc::clone(&context.session),
|
||||
turn_ref: Arc::clone(&context.turn),
|
||||
call_id: context.call_id.clone(),
|
||||
command: command.to_string(),
|
||||
cwd: context.turn.cwd.clone(),
|
||||
started_at,
|
||||
};
|
||||
self.sessions.lock().await.insert(session_id, entry);
|
||||
session_id
|
||||
}
|
||||
|
||||
async fn emit_exec_end_from_entry(
|
||||
entry: SessionEntry,
|
||||
aggregated_output: String,
|
||||
exit_code: i32,
|
||||
duration: Duration,
|
||||
) {
|
||||
let output = ExecToolCallOutput {
|
||||
exit_code,
|
||||
stdout: StreamOutput::new(aggregated_output.clone()),
|
||||
stderr: StreamOutput::new(String::new()),
|
||||
aggregated_output: StreamOutput::new(aggregated_output),
|
||||
duration,
|
||||
timed_out: false,
|
||||
};
|
||||
let event_ctx = ToolEventCtx::new(
|
||||
entry.session_ref.as_ref(),
|
||||
entry.turn_ref.as_ref(),
|
||||
&entry.call_id,
|
||||
None,
|
||||
);
|
||||
let emitter = ToolEmitter::unified_exec(entry.command, entry.cwd, true);
|
||||
emitter
|
||||
.emit(event_ctx, ToolEventStage::Success(output))
|
||||
.await;
|
||||
}
|
||||
|
||||
async fn emit_exec_end_from_context(
|
||||
context: &UnifiedExecContext,
|
||||
command: String,
|
||||
aggregated_output: String,
|
||||
exit_code: i32,
|
||||
duration: Duration,
|
||||
) {
|
||||
let output = ExecToolCallOutput {
|
||||
exit_code,
|
||||
stdout: StreamOutput::new(aggregated_output.clone()),
|
||||
stderr: StreamOutput::new(String::new()),
|
||||
aggregated_output: StreamOutput::new(aggregated_output),
|
||||
duration,
|
||||
timed_out: false,
|
||||
};
|
||||
let event_ctx = ToolEventCtx::new(
|
||||
context.session.as_ref(),
|
||||
context.turn.as_ref(),
|
||||
&context.call_id,
|
||||
None,
|
||||
);
|
||||
let emitter = ToolEmitter::unified_exec(command, context.turn.cwd.clone(), true);
|
||||
emitter
|
||||
.emit(event_ctx, ToolEventStage::Success(output))
|
||||
.await;
|
||||
}
|
||||
|
||||
pub(crate) async fn open_session_with_exec_env(
|
||||
&self,
|
||||
env: &ExecEnv,
|
||||
@@ -105,7 +300,7 @@ impl UnifiedExecSessionManager {
|
||||
pub(super) async fn open_session_with_sandbox(
|
||||
&self,
|
||||
command: Vec<String>,
|
||||
context: &UnifiedExecContext<'_>,
|
||||
context: &UnifiedExecContext,
|
||||
) -> Result<UnifiedExecSession, UnifiedExecError> {
|
||||
let mut orchestrator = ToolOrchestrator::new();
|
||||
let mut runtime = UnifiedExecRuntime::new(self);
|
||||
@@ -115,17 +310,17 @@ impl UnifiedExecSessionManager {
|
||||
create_env(&context.turn.shell_environment_policy),
|
||||
);
|
||||
let tool_ctx = ToolCtx {
|
||||
session: context.session,
|
||||
turn: context.turn,
|
||||
call_id: context.call_id.to_string(),
|
||||
tool_name: "unified_exec".to_string(),
|
||||
session: context.session.as_ref(),
|
||||
turn: context.turn.as_ref(),
|
||||
call_id: context.call_id.clone(),
|
||||
tool_name: "exec_command".to_string(),
|
||||
};
|
||||
orchestrator
|
||||
.run(
|
||||
&mut runtime,
|
||||
&req,
|
||||
&tool_ctx,
|
||||
context.turn,
|
||||
context.turn.as_ref(),
|
||||
context.turn.approval_policy,
|
||||
)
|
||||
.await
|
||||
@@ -175,124 +370,16 @@ impl UnifiedExecSessionManager {
|
||||
|
||||
collected
|
||||
}
|
||||
|
||||
pub(super) async fn should_store_session(&self, acquisition: &SessionAcquisition) -> bool {
|
||||
if let Some(session) = acquisition.new_session.as_ref() {
|
||||
!session.has_exited()
|
||||
} else if acquisition.reuse_requested {
|
||||
let mut sessions = self.sessions.lock().await;
|
||||
if let Some(existing) = sessions.get(&acquisition.session_id) {
|
||||
if existing.has_exited() {
|
||||
sessions.remove(&acquisition.session_id);
|
||||
false
|
||||
} else {
|
||||
true
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
} else {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) async fn send_input_chunks(
|
||||
writer_tx: &mpsc::Sender<Vec<u8>>,
|
||||
chunks: &[String],
|
||||
) -> Result<(), UnifiedExecError> {
|
||||
let mut trailing_whitespace = true;
|
||||
for chunk in chunks {
|
||||
if chunk.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let leading_whitespace = chunk
|
||||
.chars()
|
||||
.next()
|
||||
.map(char::is_whitespace)
|
||||
.unwrap_or(true);
|
||||
|
||||
if !trailing_whitespace
|
||||
&& !leading_whitespace
|
||||
&& writer_tx.send(vec![b' ']).await.is_err()
|
||||
{
|
||||
return Err(UnifiedExecError::WriteToStdin);
|
||||
}
|
||||
|
||||
if writer_tx.send(chunk.as_bytes().to_vec()).await.is_err() {
|
||||
return Err(UnifiedExecError::WriteToStdin);
|
||||
}
|
||||
|
||||
trailing_whitespace = chunk
|
||||
.chars()
|
||||
.next_back()
|
||||
.map(char::is_whitespace)
|
||||
.unwrap_or(trailing_whitespace);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn handle_request(
|
||||
&self,
|
||||
request: UnifiedExecRequest<'_>,
|
||||
context: UnifiedExecContext<'_>,
|
||||
) -> Result<UnifiedExecResult, UnifiedExecError> {
|
||||
let (timeout_ms, timeout_warning) = match request.timeout_ms {
|
||||
Some(requested) if requested > MAX_TIMEOUT_MS => (
|
||||
MAX_TIMEOUT_MS,
|
||||
Some(format!(
|
||||
"Warning: requested timeout {requested}ms exceeds maximum of {MAX_TIMEOUT_MS}ms; clamping to {MAX_TIMEOUT_MS}ms.\n"
|
||||
)),
|
||||
),
|
||||
Some(requested) => (requested, None),
|
||||
None => (DEFAULT_TIMEOUT_MS, None),
|
||||
};
|
||||
|
||||
if !request.input_chunks.is_empty() {
|
||||
let event_ctx = ToolEventCtx::new(context.session, context.turn, context.call_id, None);
|
||||
let emitter =
|
||||
ToolEmitter::shell(request.input_chunks.to_vec(), context.turn.cwd.clone());
|
||||
emitter.emit(event_ctx, ToolEventStage::Begin).await;
|
||||
}
|
||||
|
||||
let mut acquisition = self.acquire_session(&request, &context).await?;
|
||||
|
||||
if acquisition.reuse_requested {
|
||||
Self::send_input_chunks(&acquisition.writer_tx, request.input_chunks).await?;
|
||||
}
|
||||
|
||||
let deadline = Instant::now() + Duration::from_millis(timeout_ms);
|
||||
let collected = Self::collect_output_until_deadline(
|
||||
&acquisition.output_buffer,
|
||||
&acquisition.output_notify,
|
||||
deadline,
|
||||
)
|
||||
.await;
|
||||
|
||||
let (output, _maybe_tokens) = truncate_middle(
|
||||
&String::from_utf8_lossy(&collected),
|
||||
UNIFIED_EXEC_OUTPUT_MAX_BYTES,
|
||||
);
|
||||
let output = if let Some(warning) = timeout_warning {
|
||||
format!("{warning}{output}")
|
||||
} else {
|
||||
output
|
||||
};
|
||||
|
||||
let should_store_session = self.should_store_session(&acquisition).await;
|
||||
let session_id = if should_store_session {
|
||||
if let Some(session) = acquisition.new_session.take() {
|
||||
self.sessions
|
||||
.lock()
|
||||
.await
|
||||
.insert(acquisition.session_id, session);
|
||||
}
|
||||
Some(acquisition.session_id)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok(UnifiedExecResult { session_id, output })
|
||||
}
|
||||
}
|
||||
|
||||
enum SessionStatus {
|
||||
Alive {
|
||||
exit_code: Option<i32>,
|
||||
call_id: String,
|
||||
},
|
||||
Exited {
|
||||
exit_code: Option<i32>,
|
||||
entry: Box<SessionEntry>,
|
||||
},
|
||||
Unknown,
|
||||
}
|
||||
|
||||
@@ -8,12 +8,12 @@ use codex_core::LocalShellStatus;
|
||||
use codex_core::ModelClient;
|
||||
use codex_core::ModelProviderInfo;
|
||||
use codex_core::Prompt;
|
||||
use codex_core::ReasoningItemContent;
|
||||
use codex_core::ResponseItem;
|
||||
use codex_core::WireApi;
|
||||
use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
|
||||
use codex_otel::otel_event_manager::OtelEventManager;
|
||||
use codex_protocol::ConversationId;
|
||||
use codex_protocol::models::ReasoningItemContent;
|
||||
use core_test_support::load_default_config_for_test;
|
||||
use futures::StreamExt;
|
||||
use serde_json::Value;
|
||||
|
||||
@@ -13,6 +13,7 @@ use codex_core::WireApi;
|
||||
use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
|
||||
use codex_otel::otel_event_manager::OtelEventManager;
|
||||
use codex_protocol::ConversationId;
|
||||
use codex_protocol::models::ReasoningItemContent;
|
||||
use core_test_support::load_default_config_for_test;
|
||||
use futures::StreamExt;
|
||||
use tempfile::TempDir;
|
||||
@@ -143,8 +144,8 @@ fn assert_reasoning(item: &ResponseItem, expected: &str) {
|
||||
let mut combined = String::new();
|
||||
for part in parts {
|
||||
match part {
|
||||
codex_core::ReasoningItemContent::ReasoningText { text }
|
||||
| codex_core::ReasoningItemContent::Text { text } => combined.push_str(text),
|
||||
ReasoningItemContent::ReasoningText { text }
|
||||
| ReasoningItemContent::Text { text } => combined.push_str(text),
|
||||
}
|
||||
}
|
||||
assert_eq!(combined, expected);
|
||||
|
||||
@@ -10,6 +10,7 @@ path = "lib.rs"
|
||||
anyhow = { workspace = true }
|
||||
assert_cmd = { workspace = true }
|
||||
codex-core = { workspace = true }
|
||||
codex-protocol = { workspace = true }
|
||||
notify = { workspace = true }
|
||||
regex-lite = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
|
||||
@@ -35,6 +35,22 @@ impl ResponseMock {
|
||||
pub fn requests(&self) -> Vec<ResponsesRequest> {
|
||||
self.requests.lock().unwrap().clone()
|
||||
}
|
||||
|
||||
/// Returns true if any captured request contains a `function_call` with the
|
||||
/// provided `call_id`.
|
||||
pub fn saw_function_call(&self, call_id: &str) -> bool {
|
||||
self.requests()
|
||||
.iter()
|
||||
.any(|req| req.has_function_call(call_id))
|
||||
}
|
||||
|
||||
/// Returns the `output` string for a matching `function_call_output` with
|
||||
/// the provided `call_id`, searching across all captured requests.
|
||||
pub fn function_call_output_text(&self, call_id: &str) -> Option<String> {
|
||||
self.requests()
|
||||
.iter()
|
||||
.find_map(|req| req.function_call_output_text(call_id))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
@@ -70,6 +86,28 @@ impl ResponsesRequest {
|
||||
.unwrap_or_else(|| panic!("function call output {call_id} item not found in request"))
|
||||
}
|
||||
|
||||
/// Returns true if this request's `input` contains a `function_call` with
|
||||
/// the specified `call_id`.
|
||||
pub fn has_function_call(&self, call_id: &str) -> bool {
|
||||
self.input().iter().any(|item| {
|
||||
item.get("type").and_then(Value::as_str) == Some("function_call")
|
||||
&& item.get("call_id").and_then(Value::as_str) == Some(call_id)
|
||||
})
|
||||
}
|
||||
|
||||
/// If present, returns the `output` string of the `function_call_output`
|
||||
/// entry matching `call_id` in this request's `input`.
|
||||
pub fn function_call_output_text(&self, call_id: &str) -> Option<String> {
|
||||
let binding = self.input();
|
||||
let item = binding.iter().find(|item| {
|
||||
item.get("type").and_then(Value::as_str) == Some("function_call_output")
|
||||
&& item.get("call_id").and_then(Value::as_str) == Some(call_id)
|
||||
})?;
|
||||
item.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.map(str::to_string)
|
||||
}
|
||||
|
||||
pub fn header(&self, name: &str) -> Option<String> {
|
||||
self.0
|
||||
.headers
|
||||
@@ -97,6 +135,10 @@ impl Match for ResponseMock {
|
||||
.lock()
|
||||
.unwrap()
|
||||
.push(ResponsesRequest(request.clone()));
|
||||
|
||||
// Enforce invariant checks on every request body captured by the mock.
|
||||
// Panic on orphan tool outputs or calls to catch regressions early.
|
||||
validate_request_body_invariants(request);
|
||||
true
|
||||
}
|
||||
}
|
||||
@@ -167,6 +209,56 @@ pub fn ev_assistant_message(id: &str, text: &str) -> Value {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn ev_reasoning_item(id: &str, summary: &[&str], raw_content: &[&str]) -> Value {
|
||||
let summary_entries: Vec<Value> = summary
|
||||
.iter()
|
||||
.map(|text| serde_json::json!({"type": "summary_text", "text": text}))
|
||||
.collect();
|
||||
|
||||
let mut event = serde_json::json!({
|
||||
"type": "response.output_item.done",
|
||||
"item": {
|
||||
"type": "reasoning",
|
||||
"id": id,
|
||||
"summary": summary_entries,
|
||||
}
|
||||
});
|
||||
|
||||
if !raw_content.is_empty() {
|
||||
let content_entries: Vec<Value> = raw_content
|
||||
.iter()
|
||||
.map(|text| serde_json::json!({"type": "reasoning_text", "text": text}))
|
||||
.collect();
|
||||
event["item"]["content"] = Value::Array(content_entries);
|
||||
}
|
||||
|
||||
event
|
||||
}
|
||||
|
||||
pub fn ev_web_search_call_added(id: &str, status: &str, query: &str) -> Value {
|
||||
serde_json::json!({
|
||||
"type": "response.output_item.added",
|
||||
"item": {
|
||||
"type": "web_search_call",
|
||||
"id": id,
|
||||
"status": status,
|
||||
"action": {"type": "search", "query": query}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn ev_web_search_call_done(id: &str, status: &str, query: &str) -> Value {
|
||||
serde_json::json!({
|
||||
"type": "response.output_item.done",
|
||||
"item": {
|
||||
"type": "web_search_call",
|
||||
"id": id,
|
||||
"status": status,
|
||||
"action": {"type": "search", "query": query}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn ev_function_call(call_id: &str, name: &str, arguments: &str) -> Value {
|
||||
serde_json::json!({
|
||||
"type": "response.output_item.done",
|
||||
@@ -336,3 +428,90 @@ pub async fn mount_sse_sequence(server: &MockServer, bodies: Vec<String>) -> Res
|
||||
|
||||
response_mock
|
||||
}
|
||||
|
||||
/// Validate invariants on the request body sent to `/v1/responses`.
|
||||
///
|
||||
/// - No `function_call_output`/`custom_tool_call_output` with missing/empty `call_id`.
|
||||
/// - Every `function_call_output` must match a prior `function_call` or
|
||||
/// `local_shell_call` with the same `call_id` in the same `input`.
|
||||
/// - Every `custom_tool_call_output` must match a prior `custom_tool_call`.
|
||||
/// - Additionally, enforce symmetry: every `function_call`/`custom_tool_call`
|
||||
/// in the `input` must have a matching output entry.
|
||||
fn validate_request_body_invariants(request: &wiremock::Request) {
|
||||
let Ok(body): Result<Value, _> = request.body_json() else {
|
||||
return;
|
||||
};
|
||||
let Some(items) = body.get("input").and_then(Value::as_array) else {
|
||||
panic!("input array not found in request");
|
||||
};
|
||||
|
||||
use std::collections::HashSet;
|
||||
|
||||
fn get_call_id(item: &Value) -> Option<&str> {
|
||||
item.get("call_id")
|
||||
.and_then(Value::as_str)
|
||||
.filter(|id| !id.is_empty())
|
||||
}
|
||||
|
||||
fn gather_ids(items: &[Value], kind: &str) -> HashSet<String> {
|
||||
items
|
||||
.iter()
|
||||
.filter(|item| item.get("type").and_then(Value::as_str) == Some(kind))
|
||||
.filter_map(get_call_id)
|
||||
.map(str::to_string)
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn gather_output_ids(items: &[Value], kind: &str, missing_msg: &str) -> HashSet<String> {
|
||||
items
|
||||
.iter()
|
||||
.filter(|item| item.get("type").and_then(Value::as_str) == Some(kind))
|
||||
.map(|item| {
|
||||
let Some(id) = get_call_id(item) else {
|
||||
panic!("{missing_msg}");
|
||||
};
|
||||
id.to_string()
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
let function_calls = gather_ids(items, "function_call");
|
||||
let custom_tool_calls = gather_ids(items, "custom_tool_call");
|
||||
let local_shell_calls = gather_ids(items, "local_shell_call");
|
||||
let function_call_outputs = gather_output_ids(
|
||||
items,
|
||||
"function_call_output",
|
||||
"orphan function_call_output with empty call_id should be dropped",
|
||||
);
|
||||
let custom_tool_call_outputs = gather_output_ids(
|
||||
items,
|
||||
"custom_tool_call_output",
|
||||
"orphan custom_tool_call_output with empty call_id should be dropped",
|
||||
);
|
||||
|
||||
for cid in &function_call_outputs {
|
||||
assert!(
|
||||
function_calls.contains(cid) || local_shell_calls.contains(cid),
|
||||
"function_call_output without matching call in input: {cid}",
|
||||
);
|
||||
}
|
||||
for cid in &custom_tool_call_outputs {
|
||||
assert!(
|
||||
custom_tool_calls.contains(cid),
|
||||
"custom_tool_call_output without matching call in input: {cid}",
|
||||
);
|
||||
}
|
||||
|
||||
for cid in &function_calls {
|
||||
assert!(
|
||||
function_call_outputs.contains(cid),
|
||||
"Function call output is missing for call id: {cid}",
|
||||
);
|
||||
}
|
||||
for cid in &custom_tool_calls {
|
||||
assert!(
|
||||
custom_tool_call_outputs.contains(cid),
|
||||
"Custom tool call output is missing for call id: {cid}",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,17 +1,30 @@
|
||||
use std::mem::swap;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::Result;
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::CodexConversation;
|
||||
use codex_core::ConversationManager;
|
||||
use codex_core::ModelProviderInfo;
|
||||
use codex_core::built_in_model_providers;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::features::Feature;
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_core::protocol::SessionConfiguredEvent;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use serde_json::Value;
|
||||
use tempfile::TempDir;
|
||||
use wiremock::MockServer;
|
||||
|
||||
use crate::load_default_config_for_test;
|
||||
use crate::responses::start_mock_server;
|
||||
use crate::wait_for_event;
|
||||
|
||||
type ConfigMutator = dyn FnOnce(&mut Config) + Send;
|
||||
|
||||
@@ -96,6 +109,12 @@ impl TestCodexBuilder {
|
||||
mutator(&mut config);
|
||||
}
|
||||
|
||||
if config.include_apply_patch_tool {
|
||||
config.features.enable(Feature::ApplyPatchFreeform);
|
||||
} else {
|
||||
config.features.disable(Feature::ApplyPatchFreeform);
|
||||
}
|
||||
|
||||
Ok((config, cwd))
|
||||
}
|
||||
}
|
||||
@@ -107,6 +126,139 @@ pub struct TestCodex {
|
||||
pub session_configured: SessionConfiguredEvent,
|
||||
}
|
||||
|
||||
impl TestCodex {
|
||||
pub fn cwd_path(&self) -> &Path {
|
||||
self.cwd.path()
|
||||
}
|
||||
|
||||
pub fn workspace_path(&self, rel: impl AsRef<Path>) -> PathBuf {
|
||||
self.cwd_path().join(rel)
|
||||
}
|
||||
|
||||
pub async fn submit_turn(&self, prompt: &str) -> Result<()> {
|
||||
self.submit_turn_with_policy(prompt, SandboxPolicy::DangerFullAccess)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn submit_turn_with_policy(
|
||||
&self,
|
||||
prompt: &str,
|
||||
sandbox_policy: SandboxPolicy,
|
||||
) -> Result<()> {
|
||||
let session_model = self.session_configured.model.clone();
|
||||
self.codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: prompt.into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: self.cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(&self.codex, |event| {
|
||||
matches!(event, EventMsg::TaskComplete(_))
|
||||
})
|
||||
.await;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TestCodexHarness {
|
||||
server: MockServer,
|
||||
test: TestCodex,
|
||||
}
|
||||
|
||||
impl TestCodexHarness {
|
||||
pub async fn new() -> Result<Self> {
|
||||
Self::with_builder(test_codex()).await
|
||||
}
|
||||
|
||||
pub async fn with_config(mutator: impl FnOnce(&mut Config) + Send + 'static) -> Result<Self> {
|
||||
Self::with_builder(test_codex().with_config(mutator)).await
|
||||
}
|
||||
|
||||
pub async fn with_builder(mut builder: TestCodexBuilder) -> Result<Self> {
|
||||
let server = start_mock_server().await;
|
||||
let test = builder.build(&server).await?;
|
||||
Ok(Self { server, test })
|
||||
}
|
||||
|
||||
pub fn server(&self) -> &MockServer {
|
||||
&self.server
|
||||
}
|
||||
|
||||
pub fn test(&self) -> &TestCodex {
|
||||
&self.test
|
||||
}
|
||||
|
||||
pub fn cwd(&self) -> &Path {
|
||||
self.test.cwd_path()
|
||||
}
|
||||
|
||||
pub fn path(&self, rel: impl AsRef<Path>) -> PathBuf {
|
||||
self.test.workspace_path(rel)
|
||||
}
|
||||
|
||||
pub async fn submit(&self, prompt: &str) -> Result<()> {
|
||||
self.test.submit_turn(prompt).await
|
||||
}
|
||||
|
||||
pub async fn submit_with_policy(
|
||||
&self,
|
||||
prompt: &str,
|
||||
sandbox_policy: SandboxPolicy,
|
||||
) -> Result<()> {
|
||||
self.test
|
||||
.submit_turn_with_policy(prompt, sandbox_policy)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn request_bodies(&self) -> Vec<Value> {
|
||||
self.server
|
||||
.received_requests()
|
||||
.await
|
||||
.expect("requests")
|
||||
.into_iter()
|
||||
.map(|req| serde_json::from_slice(&req.body).expect("request body json"))
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub async fn function_call_output_value(&self, call_id: &str) -> Value {
|
||||
let bodies = self.request_bodies().await;
|
||||
function_call_output(&bodies, call_id).clone()
|
||||
}
|
||||
|
||||
pub async fn function_call_stdout(&self, call_id: &str) -> String {
|
||||
self.function_call_output_value(call_id)
|
||||
.await
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("output string")
|
||||
.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
fn function_call_output<'a>(bodies: &'a [Value], call_id: &str) -> &'a Value {
|
||||
for body in bodies {
|
||||
if let Some(items) = body.get("input").and_then(Value::as_array) {
|
||||
for item in items {
|
||||
if item.get("type").and_then(Value::as_str) == Some("function_call_output")
|
||||
&& item.get("call_id").and_then(Value::as_str) == Some(call_id)
|
||||
{
|
||||
return item;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
panic!("function_call_output {call_id} not found");
|
||||
}
|
||||
|
||||
pub fn test_codex() -> TestCodexBuilder {
|
||||
TestCodexBuilder {
|
||||
config_mutators: vec![],
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use codex_core::protocol::EventMsg;
|
||||
@@ -5,7 +6,9 @@ use codex_core::protocol::Op;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_function_call;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
use core_test_support::responses::mount_sse_once;
|
||||
use core_test_support::responses::mount_sse_sequence;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
@@ -67,3 +70,98 @@ async fn interrupt_long_running_tool_emits_turn_aborted() {
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
/// After an interrupt we expect the next request to the model to include both
|
||||
/// the original tool call and an `"aborted"` `function_call_output`. This test
|
||||
/// exercises the follow-up flow: it sends another user turn, inspects the mock
|
||||
/// responses server, and ensures the model receives the synthesized abort.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn interrupt_tool_records_history_entries() {
|
||||
let command = vec![
|
||||
"bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
"sleep 60".to_string(),
|
||||
];
|
||||
let call_id = "call-history";
|
||||
|
||||
let args = json!({
|
||||
"command": command,
|
||||
"timeout_ms": 60_000
|
||||
})
|
||||
.to_string();
|
||||
let first_body = sse(vec![
|
||||
ev_response_created("resp-history"),
|
||||
ev_function_call(call_id, "shell", &args),
|
||||
ev_completed("resp-history"),
|
||||
]);
|
||||
let follow_up_body = sse(vec![
|
||||
ev_response_created("resp-followup"),
|
||||
ev_completed("resp-followup"),
|
||||
]);
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let response_mock = mount_sse_sequence(&server, vec![first_body, follow_up_body]).await;
|
||||
|
||||
let fixture = test_codex().build(&server).await.unwrap();
|
||||
let codex = Arc::clone(&fixture.codex);
|
||||
|
||||
let wait_timeout = Duration::from_millis(100);
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "start history recording".into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
wait_for_event_with_timeout(
|
||||
&codex,
|
||||
|ev| matches!(ev, EventMsg::ExecCommandBegin(_)),
|
||||
wait_timeout,
|
||||
)
|
||||
.await;
|
||||
|
||||
codex.submit(Op::Interrupt).await.unwrap();
|
||||
|
||||
wait_for_event_with_timeout(
|
||||
&codex,
|
||||
|ev| matches!(ev, EventMsg::TurnAborted(_)),
|
||||
wait_timeout,
|
||||
)
|
||||
.await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "follow up".into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
wait_for_event_with_timeout(
|
||||
&codex,
|
||||
|ev| matches!(ev, EventMsg::TaskComplete(_)),
|
||||
wait_timeout,
|
||||
)
|
||||
.await;
|
||||
|
||||
let requests = response_mock.requests();
|
||||
assert!(
|
||||
requests.len() == 2,
|
||||
"expected two calls to the responses API, got {}",
|
||||
requests.len()
|
||||
);
|
||||
|
||||
assert!(
|
||||
response_mock.saw_function_call(call_id),
|
||||
"function call not recorded in responses payload"
|
||||
);
|
||||
assert_eq!(
|
||||
response_mock.function_call_output_text(call_id).as_deref(),
|
||||
Some("aborted"),
|
||||
"aborted function call output not recorded in responses payload"
|
||||
);
|
||||
}
|
||||
|
||||
1052
codex-rs/core/tests/suite/apply_patch_cli.rs
Normal file
1052
codex-rs/core/tests/suite/apply_patch_cli.rs
Normal file
File diff suppressed because it is too large
Load Diff
@@ -9,7 +9,6 @@ use codex_core::ModelClient;
|
||||
use codex_core::ModelProviderInfo;
|
||||
use codex_core::NewConversation;
|
||||
use codex_core::Prompt;
|
||||
use codex_core::ReasoningItemContent;
|
||||
use codex_core::ResponseEvent;
|
||||
use codex_core::ResponseItem;
|
||||
use codex_core::WireApi;
|
||||
@@ -21,6 +20,7 @@ use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SessionSource;
|
||||
use codex_otel::otel_event_manager::OtelEventManager;
|
||||
use codex_protocol::ConversationId;
|
||||
use codex_protocol::models::ReasoningItemContent;
|
||||
use codex_protocol::models::ReasoningItemReasoningSummary;
|
||||
use codex_protocol::models::WebSearchAction;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
|
||||
@@ -1,17 +1,15 @@
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::ContentItem;
|
||||
use codex_core::ConversationManager;
|
||||
use codex_core::ModelProviderInfo;
|
||||
use codex_core::NewConversation;
|
||||
use codex_core::ResponseItem;
|
||||
use codex_core::built_in_model_providers;
|
||||
use codex_core::content_items_to_text;
|
||||
use codex_core::is_session_prefix_message;
|
||||
use codex_core::parse_turn_item;
|
||||
use codex_core::protocol::ConversationPathResponseEvent;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::RolloutItem;
|
||||
use codex_core::protocol::RolloutLine;
|
||||
use codex_protocol::items::TurnItem;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::load_default_config_for_test;
|
||||
use core_test_support::skip_if_no_network;
|
||||
@@ -115,19 +113,12 @@ async fn fork_conversation_twice_drops_to_first_message() {
|
||||
let find_user_input_positions = |items: &[RolloutItem]| -> Vec<usize> {
|
||||
let mut pos = Vec::new();
|
||||
for (i, it) in items.iter().enumerate() {
|
||||
if let RolloutItem::ResponseItem(ResponseItem::Message { role, content, .. }) = it
|
||||
&& role == "user"
|
||||
&& content_items_to_text(content)
|
||||
.is_some_and(|text| !is_session_prefix_message(&text))
|
||||
if let RolloutItem::ResponseItem(response_item) = it
|
||||
&& let Some(TurnItem::UserMessage(_)) = parse_turn_item(response_item)
|
||||
{
|
||||
// Consider any user message as an input boundary; recorder stores both EventMsg and ResponseItem.
|
||||
// We specifically look for input items, which are represented as ContentItem::InputText.
|
||||
if content
|
||||
.iter()
|
||||
.any(|c| matches!(c, ContentItem::InputText { .. }))
|
||||
{
|
||||
pos.push(i);
|
||||
}
|
||||
pos.push(i);
|
||||
}
|
||||
}
|
||||
pos
|
||||
|
||||
@@ -2,12 +2,18 @@
|
||||
|
||||
use anyhow::Ok;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::ItemCompletedEvent;
|
||||
use codex_core::protocol::ItemStartedEvent;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_protocol::items::TurnItem;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::responses;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_reasoning_item;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
use core_test_support::responses::ev_web_search_call_added;
|
||||
use core_test_support::responses::ev_web_search_call_done;
|
||||
use core_test_support::responses::mount_sse_once_match;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::skip_if_no_network;
|
||||
@@ -26,7 +32,7 @@ async fn user_message_item_is_emitted() -> anyhow::Result<()> {
|
||||
let TestCodex { codex, .. } = test_codex().build(&server).await?;
|
||||
|
||||
let first_response = sse(vec![ev_response_created("resp-1"), ev_completed("resp-1")]);
|
||||
responses::mount_sse_once_match(&server, any(), first_response).await;
|
||||
mount_sse_once_match(&server, any(), first_response).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
@@ -36,21 +42,23 @@ async fn user_message_item_is_emitted() -> anyhow::Result<()> {
|
||||
})
|
||||
.await?;
|
||||
|
||||
let started = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ItemStarted(e) => Some(e.clone()),
|
||||
let started_item = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ItemStarted(ItemStartedEvent {
|
||||
item: TurnItem::UserMessage(item),
|
||||
..
|
||||
}) => Some(item.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
let completed = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ItemCompleted(e) => Some(e.clone()),
|
||||
let completed_item = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ItemCompleted(ItemCompletedEvent {
|
||||
item: TurnItem::UserMessage(item),
|
||||
..
|
||||
}) => Some(item.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
let TurnItem::UserMessage(started_item) = started.item;
|
||||
let TurnItem::UserMessage(completed_item) = completed.item;
|
||||
|
||||
assert_eq!(started_item.id, completed_item.id);
|
||||
assert_eq!(
|
||||
started_item.content,
|
||||
@@ -66,3 +74,163 @@ async fn user_message_item_is_emitted() -> anyhow::Result<()> {
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn assistant_message_item_is_emitted() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let TestCodex { codex, .. } = test_codex().build(&server).await?;
|
||||
|
||||
let first_response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_assistant_message("msg-1", "all done"),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
mount_sse_once_match(&server, any(), first_response).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "please summarize results".into(),
|
||||
}],
|
||||
})
|
||||
.await?;
|
||||
|
||||
let started = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ItemStarted(ItemStartedEvent {
|
||||
item: TurnItem::AgentMessage(item),
|
||||
..
|
||||
}) => Some(item.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
let completed = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ItemCompleted(ItemCompletedEvent {
|
||||
item: TurnItem::AgentMessage(item),
|
||||
..
|
||||
}) => Some(item.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(started.id, completed.id);
|
||||
let Some(codex_protocol::items::AgentMessageContent::Text { text }) = completed.content.first()
|
||||
else {
|
||||
panic!("expected agent message text content");
|
||||
};
|
||||
assert_eq!(text, "all done");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn reasoning_item_is_emitted() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let TestCodex { codex, .. } = test_codex().build(&server).await?;
|
||||
|
||||
let reasoning_item = ev_reasoning_item(
|
||||
"reasoning-1",
|
||||
&["Consider inputs", "Compute output"],
|
||||
&["Detailed reasoning trace"],
|
||||
);
|
||||
|
||||
let first_response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
reasoning_item,
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
mount_sse_once_match(&server, any(), first_response).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "explain your reasoning".into(),
|
||||
}],
|
||||
})
|
||||
.await?;
|
||||
|
||||
let started = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ItemStarted(ItemStartedEvent {
|
||||
item: TurnItem::Reasoning(item),
|
||||
..
|
||||
}) => Some(item.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
let completed = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ItemCompleted(ItemCompletedEvent {
|
||||
item: TurnItem::Reasoning(item),
|
||||
..
|
||||
}) => Some(item.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(started.id, completed.id);
|
||||
assert_eq!(
|
||||
completed.summary_text,
|
||||
vec!["Consider inputs".to_string(), "Compute output".to_string()]
|
||||
);
|
||||
assert_eq!(
|
||||
completed.raw_content,
|
||||
vec!["Detailed reasoning trace".to_string()]
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn web_search_item_is_emitted() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let TestCodex { codex, .. } = test_codex().build(&server).await?;
|
||||
|
||||
let web_search_added =
|
||||
ev_web_search_call_added("web-search-1", "in_progress", "weather seattle");
|
||||
let web_search_done = ev_web_search_call_done("web-search-1", "completed", "weather seattle");
|
||||
|
||||
let first_response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
web_search_added,
|
||||
web_search_done,
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
mount_sse_once_match(&server, any(), first_response).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "find the weather".into(),
|
||||
}],
|
||||
})
|
||||
.await?;
|
||||
|
||||
let started = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ItemStarted(ItemStartedEvent {
|
||||
item: TurnItem::WebSearch(item),
|
||||
..
|
||||
}) => Some(item.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
let completed = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ItemCompleted(ItemCompletedEvent {
|
||||
item: TurnItem::WebSearch(item),
|
||||
..
|
||||
}) => Some(item.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(started.id, completed.id);
|
||||
assert_eq!(completed.query, "weather seattle");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
mod abort_tasks;
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
mod apply_patch_cli;
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
mod approvals;
|
||||
mod cli_stream;
|
||||
mod client;
|
||||
|
||||
@@ -4,6 +4,7 @@ use codex_core::protocol::Op;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_reasoning_item;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
use core_test_support::responses::mount_sse_once_match;
|
||||
use core_test_support::responses::sse;
|
||||
@@ -62,3 +63,59 @@ async fn resume_includes_initial_messages_from_rollout_events() -> Result<()> {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn resume_includes_initial_messages_from_reasoning_events() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.show_raw_agent_reasoning = true;
|
||||
});
|
||||
let initial = builder.build(&server).await?;
|
||||
let codex = Arc::clone(&initial.codex);
|
||||
let home = initial.home.clone();
|
||||
let rollout_path = initial.session_configured.rollout_path.clone();
|
||||
|
||||
let initial_sse = sse(vec![
|
||||
ev_response_created("resp-initial"),
|
||||
ev_reasoning_item("reason-1", &["Summarized step"], &["raw detail"]),
|
||||
ev_assistant_message("msg-1", "Completed reasoning turn"),
|
||||
ev_completed("resp-initial"),
|
||||
]);
|
||||
mount_sse_once_match(&server, any(), initial_sse).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "Record reasoning messages".into(),
|
||||
}],
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let resumed = builder.resume(&server, home, rollout_path).await?;
|
||||
let initial_messages = resumed
|
||||
.session_configured
|
||||
.initial_messages
|
||||
.expect("expected initial messages to be present for resumed session");
|
||||
match initial_messages.as_slice() {
|
||||
[
|
||||
EventMsg::UserMessage(first_user),
|
||||
EventMsg::TokenCount(_),
|
||||
EventMsg::AgentReasoning(reasoning),
|
||||
EventMsg::AgentReasoningRawContent(raw),
|
||||
EventMsg::AgentMessage(assistant_message),
|
||||
EventMsg::TokenCount(_),
|
||||
] => {
|
||||
assert_eq!(first_user.message, "Record reasoning messages");
|
||||
assert_eq!(reasoning.text, "Summarized step");
|
||||
assert_eq!(raw.text, "raw detail");
|
||||
assert_eq!(assistant_message.message, "Completed reasoning turn");
|
||||
}
|
||||
other => panic!("unexpected initial messages after resume: {other:#?}"),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
#![allow(clippy::unwrap_used, clippy::expect_used)]
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use codex_core::find_conversation_path_by_id_str;
|
||||
@@ -8,8 +9,8 @@ use uuid::Uuid;
|
||||
|
||||
/// Create sessions/YYYY/MM/DD and write a minimal rollout file containing the
|
||||
/// provided conversation id in the SessionMeta line. Returns the absolute path.
|
||||
fn write_minimal_rollout_with_id(codex_home: &TempDir, id: Uuid) -> PathBuf {
|
||||
let sessions = codex_home.path().join("sessions/2024/01/01");
|
||||
fn write_minimal_rollout_with_id(codex_home: &Path, id: Uuid) -> PathBuf {
|
||||
let sessions = codex_home.join("sessions/2024/01/01");
|
||||
std::fs::create_dir_all(&sessions).unwrap();
|
||||
|
||||
let file = sessions.join(format!("rollout-2024-01-01T00-00-00-{id}.jsonl"));
|
||||
@@ -40,7 +41,7 @@ fn write_minimal_rollout_with_id(codex_home: &TempDir, id: Uuid) -> PathBuf {
|
||||
async fn find_locates_rollout_file_by_id() {
|
||||
let home = TempDir::new().unwrap();
|
||||
let id = Uuid::new_v4();
|
||||
let expected = write_minimal_rollout_with_id(&home, id);
|
||||
let expected = write_minimal_rollout_with_id(home.path(), id);
|
||||
|
||||
let found = find_conversation_path_by_id_str(home.path(), &id.to_string())
|
||||
.await
|
||||
@@ -48,3 +49,33 @@ async fn find_locates_rollout_file_by_id() {
|
||||
|
||||
assert_eq!(found.unwrap(), expected);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn find_handles_gitignore_covering_codex_home_directory() {
|
||||
let repo = TempDir::new().unwrap();
|
||||
let codex_home = repo.path().join(".codex");
|
||||
std::fs::create_dir_all(&codex_home).unwrap();
|
||||
std::fs::write(repo.path().join(".gitignore"), ".codex/**\n").unwrap();
|
||||
let id = Uuid::new_v4();
|
||||
let expected = write_minimal_rollout_with_id(&codex_home, id);
|
||||
|
||||
let found = find_conversation_path_by_id_str(&codex_home, &id.to_string())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(found, Some(expected));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn find_ignores_granular_gitignore_rules() {
|
||||
let home = TempDir::new().unwrap();
|
||||
let id = Uuid::new_v4();
|
||||
let expected = write_minimal_rollout_with_id(home.path(), id);
|
||||
std::fs::write(home.path().join("sessions/.gitignore"), "*.jsonl\n").unwrap();
|
||||
|
||||
let found = find_conversation_path_by_id_str(home.path(), &id.to_string())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(found, Some(expected));
|
||||
}
|
||||
|
||||
@@ -227,62 +227,6 @@ async fn shell_escalated_permissions_rejected_then_ok() -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn local_shell_missing_ids_maps_to_function_output_error() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex();
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let local_shell_event = json!({
|
||||
"type": "response.output_item.done",
|
||||
"item": {
|
||||
"type": "local_shell_call",
|
||||
"status": "completed",
|
||||
"action": {
|
||||
"type": "exec",
|
||||
"command": ["/bin/echo", "hi"],
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
local_shell_event,
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let second_mock = mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
submit_turn(
|
||||
&test,
|
||||
"check shell output",
|
||||
AskForApproval::Never,
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let item = second_mock.single_request().function_call_output("");
|
||||
assert_eq!(item.get("call_id").and_then(Value::as_str), Some(""));
|
||||
assert_eq!(
|
||||
item.get("output").and_then(Value::as_str),
|
||||
Some("LocalShellCall without call_id or id"),
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn collect_tools(use_unified_exec: bool) -> Result<Vec<String>> {
|
||||
let server = start_mock_server().await;
|
||||
|
||||
@@ -320,14 +264,22 @@ async fn unified_exec_spec_toggle_end_to_end() -> Result<()> {
|
||||
|
||||
let tools_disabled = collect_tools(false).await?;
|
||||
assert!(
|
||||
!tools_disabled.iter().any(|name| name == "unified_exec"),
|
||||
"tools list should not include unified_exec when disabled: {tools_disabled:?}"
|
||||
!tools_disabled.iter().any(|name| name == "exec_command"),
|
||||
"tools list should not include exec_command when disabled: {tools_disabled:?}"
|
||||
);
|
||||
assert!(
|
||||
!tools_disabled.iter().any(|name| name == "write_stdin"),
|
||||
"tools list should not include write_stdin when disabled: {tools_disabled:?}"
|
||||
);
|
||||
|
||||
let tools_enabled = collect_tools(true).await?;
|
||||
assert!(
|
||||
tools_enabled.iter().any(|name| name == "unified_exec"),
|
||||
"tools list should include unified_exec when enabled: {tools_enabled:?}"
|
||||
tools_enabled.iter().any(|name| name == "exec_command"),
|
||||
"tools list should include exec_command when enabled: {tools_enabled:?}"
|
||||
);
|
||||
assert!(
|
||||
tools_enabled.iter().any(|name| name == "write_stdin"),
|
||||
"tools list should include write_stdin when enabled: {tools_enabled:?}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -4,7 +4,6 @@ use std::collections::HashMap;
|
||||
|
||||
use anyhow::Result;
|
||||
use codex_core::features::Feature;
|
||||
use codex_core::parse_command::parse_command;
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
@@ -81,16 +80,15 @@ async fn unified_exec_emits_exec_command_begin_event() -> Result<()> {
|
||||
} = builder.build(&server).await?;
|
||||
|
||||
let call_id = "uexec-begin-event";
|
||||
let command = vec!["/bin/echo".to_string(), "hello unified exec".to_string()];
|
||||
let args = json!({
|
||||
"input": command.clone(),
|
||||
"timeout_ms": 250,
|
||||
"cmd": "/bin/echo hello unified exec".to_string(),
|
||||
"yield_time_ms": 250,
|
||||
});
|
||||
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "unified_exec", &serde_json::to_string(&args)?),
|
||||
ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
@@ -124,15 +122,273 @@ async fn unified_exec_emits_exec_command_begin_event() -> Result<()> {
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(begin_event.command, command);
|
||||
assert_eq!(
|
||||
begin_event.command,
|
||||
vec!["/bin/echo hello unified exec".to_string()]
|
||||
);
|
||||
assert_eq!(begin_event.cwd, cwd.path());
|
||||
assert_eq!(begin_event.parsed_cmd, parse_command(&command));
|
||||
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn unified_exec_emits_exec_command_end_event() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
skip_if_sandbox!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.use_experimental_unified_exec_tool = true;
|
||||
config.features.enable(Feature::UnifiedExec);
|
||||
});
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
session_configured,
|
||||
..
|
||||
} = builder.build(&server).await?;
|
||||
|
||||
let call_id = "uexec-end-event";
|
||||
let args = json!({
|
||||
"cmd": "/bin/echo END-EVENT".to_string(),
|
||||
"yield_time_ms": 250,
|
||||
});
|
||||
let poll_call_id = "uexec-end-event-poll";
|
||||
let poll_args = json!({
|
||||
"chars": "",
|
||||
"session_id": 0,
|
||||
"yield_time_ms": 250,
|
||||
});
|
||||
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_response_created("resp-2"),
|
||||
ev_function_call(
|
||||
poll_call_id,
|
||||
"write_stdin",
|
||||
&serde_json::to_string(&poll_args)?,
|
||||
),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_response_created("resp-3"),
|
||||
ev_assistant_message("msg-1", "finished"),
|
||||
ev_completed("resp-3"),
|
||||
]),
|
||||
];
|
||||
mount_sse_sequence(&server, responses).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "emit end event".into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
let end_event = wait_for_event_match(&codex, |msg| match msg {
|
||||
EventMsg::ExecCommandEnd(ev) if ev.call_id == call_id => Some(ev.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(end_event.exit_code, 0);
|
||||
assert!(
|
||||
end_event.aggregated_output.contains("END-EVENT"),
|
||||
"expected aggregated output to contain marker"
|
||||
);
|
||||
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn unified_exec_emits_output_delta_for_exec_command() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
skip_if_sandbox!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.use_experimental_unified_exec_tool = true;
|
||||
config.features.enable(Feature::UnifiedExec);
|
||||
});
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
session_configured,
|
||||
..
|
||||
} = builder.build(&server).await?;
|
||||
|
||||
let call_id = "uexec-delta-1";
|
||||
let args = json!({
|
||||
"cmd": "printf 'HELLO-UEXEC'",
|
||||
"yield_time_ms": 250,
|
||||
});
|
||||
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_response_created("resp-2"),
|
||||
ev_assistant_message("msg-1", "finished"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
mount_sse_sequence(&server, responses).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "emit delta".into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
let delta = wait_for_event_match(&codex, |msg| match msg {
|
||||
EventMsg::ExecCommandOutputDelta(ev) if ev.call_id == call_id => Some(ev.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
let text = String::from_utf8_lossy(&delta.chunk).to_string();
|
||||
assert!(
|
||||
text.contains("HELLO-UEXEC"),
|
||||
"delta chunk missing expected text: {text:?}"
|
||||
);
|
||||
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn unified_exec_emits_output_delta_for_write_stdin() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
skip_if_sandbox!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.use_experimental_unified_exec_tool = true;
|
||||
config.features.enable(Feature::UnifiedExec);
|
||||
});
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
session_configured,
|
||||
..
|
||||
} = builder.build(&server).await?;
|
||||
|
||||
let open_call_id = "uexec-open";
|
||||
let open_args = json!({
|
||||
"cmd": "/bin/bash -i",
|
||||
"yield_time_ms": 200,
|
||||
});
|
||||
|
||||
let stdin_call_id = "uexec-stdin-delta";
|
||||
let stdin_args = json!({
|
||||
"chars": "echo WSTDIN-MARK\\n",
|
||||
"session_id": 0,
|
||||
"yield_time_ms": 800,
|
||||
});
|
||||
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(
|
||||
open_call_id,
|
||||
"exec_command",
|
||||
&serde_json::to_string(&open_args)?,
|
||||
),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_response_created("resp-2"),
|
||||
ev_function_call(
|
||||
stdin_call_id,
|
||||
"write_stdin",
|
||||
&serde_json::to_string(&stdin_args)?,
|
||||
),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_response_created("resp-3"),
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-3"),
|
||||
]),
|
||||
];
|
||||
mount_sse_sequence(&server, responses).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "stdin delta".into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
// Expect a delta event corresponding to the write_stdin call.
|
||||
let delta = wait_for_event_match(&codex, |msg| match msg {
|
||||
EventMsg::ExecCommandOutputDelta(ev) if ev.call_id == open_call_id => {
|
||||
let text = String::from_utf8_lossy(&ev.chunk);
|
||||
if text.contains("WSTDIN-MARK") {
|
||||
Some(ev.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
let text = String::from_utf8_lossy(&delta.chunk).to_string();
|
||||
assert!(
|
||||
text.contains("WSTDIN-MARK"),
|
||||
"stdin delta chunk missing expected text: {text:?}"
|
||||
);
|
||||
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn unified_exec_skips_begin_event_for_empty_input() -> Result<()> {
|
||||
use tokio::time::Duration;
|
||||
@@ -154,14 +410,9 @@ async fn unified_exec_skips_begin_event_for_empty_input() -> Result<()> {
|
||||
} = builder.build(&server).await?;
|
||||
|
||||
let open_call_id = "uexec-open-session";
|
||||
let open_command = vec![
|
||||
"/bin/sh".to_string(),
|
||||
"-c".to_string(),
|
||||
"echo ready".to_string(),
|
||||
];
|
||||
let open_args = json!({
|
||||
"input": open_command.clone(),
|
||||
"timeout_ms": 200,
|
||||
"cmd": "/bin/sh -c echo ready".to_string(),
|
||||
"yield_time_ms": 250,
|
||||
});
|
||||
|
||||
let poll_call_id = "uexec-poll-empty";
|
||||
@@ -176,7 +427,7 @@ async fn unified_exec_skips_begin_event_for_empty_input() -> Result<()> {
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(
|
||||
open_call_id,
|
||||
"unified_exec",
|
||||
"exec_command",
|
||||
&serde_json::to_string(&open_args)?,
|
||||
),
|
||||
ev_completed("resp-1"),
|
||||
@@ -185,7 +436,7 @@ async fn unified_exec_skips_begin_event_for_empty_input() -> Result<()> {
|
||||
ev_response_created("resp-2"),
|
||||
ev_function_call(
|
||||
poll_call_id,
|
||||
"unified_exec",
|
||||
"write_stdin",
|
||||
&serde_json::to_string(&poll_args)?,
|
||||
),
|
||||
ev_completed("resp-2"),
|
||||
@@ -231,11 +482,400 @@ async fn unified_exec_skips_begin_event_for_empty_input() -> Result<()> {
|
||||
"expected only the initial command to emit begin event"
|
||||
);
|
||||
assert_eq!(begin_events[0].call_id, open_call_id);
|
||||
assert_eq!(begin_events[0].command, open_command);
|
||||
assert_eq!(begin_events[0].command[0], "/bin/sh -c echo ready");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn exec_command_reports_chunk_and_exit_metadata() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
skip_if_sandbox!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.features.enable(Feature::UnifiedExec);
|
||||
});
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
session_configured,
|
||||
..
|
||||
} = builder.build(&server).await?;
|
||||
|
||||
let call_id = "uexec-metadata";
|
||||
let args = serde_json::json!({
|
||||
"cmd": "printf 'abcdefghijklmnopqrstuvwxyz'",
|
||||
"yield_time_ms": 500,
|
||||
"max_output_tokens": 6,
|
||||
});
|
||||
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
mount_sse_sequence(&server, responses).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "run metadata test".into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let requests = server.received_requests().await.expect("recorded requests");
|
||||
assert!(!requests.is_empty(), "expected at least one POST request");
|
||||
|
||||
let bodies = requests
|
||||
.iter()
|
||||
.map(|req| req.body_json::<Value>().expect("request json"))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let outputs = collect_tool_outputs(&bodies)?;
|
||||
let metadata = outputs
|
||||
.get(call_id)
|
||||
.expect("missing exec_command metadata output");
|
||||
|
||||
let chunk_id = metadata
|
||||
.get("chunk_id")
|
||||
.and_then(Value::as_str)
|
||||
.expect("missing chunk_id");
|
||||
assert_eq!(chunk_id.len(), 6, "chunk id should be 6 hex characters");
|
||||
assert!(
|
||||
chunk_id.chars().all(|c| c.is_ascii_hexdigit()),
|
||||
"chunk id should be hexadecimal: {chunk_id}"
|
||||
);
|
||||
|
||||
let wall_time = metadata
|
||||
.get("wall_time_seconds")
|
||||
.and_then(Value::as_f64)
|
||||
.unwrap_or_default();
|
||||
assert!(
|
||||
wall_time >= 0.0,
|
||||
"wall_time_seconds should be non-negative, got {wall_time}"
|
||||
);
|
||||
|
||||
assert!(
|
||||
metadata.get("session_id").is_none(),
|
||||
"exec_command for a completed process should not include session_id"
|
||||
);
|
||||
|
||||
let exit_code = metadata
|
||||
.get("exit_code")
|
||||
.and_then(Value::as_i64)
|
||||
.expect("expected exit_code");
|
||||
assert_eq!(exit_code, 0, "expected successful exit");
|
||||
|
||||
let output_text = metadata
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("missing output text");
|
||||
assert!(
|
||||
output_text.contains("tokens truncated"),
|
||||
"expected truncation notice in output: {output_text:?}"
|
||||
);
|
||||
|
||||
let original_tokens = metadata
|
||||
.get("original_token_count")
|
||||
.and_then(Value::as_u64)
|
||||
.expect("missing original_token_count");
|
||||
assert!(
|
||||
original_tokens as usize > 6,
|
||||
"original token count should exceed max_output_tokens"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn write_stdin_returns_exit_metadata_and_clears_session() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
skip_if_sandbox!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.features.enable(Feature::UnifiedExec);
|
||||
});
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
session_configured,
|
||||
..
|
||||
} = builder.build(&server).await?;
|
||||
|
||||
let start_call_id = "uexec-cat-start";
|
||||
let send_call_id = "uexec-cat-send";
|
||||
let exit_call_id = "uexec-cat-exit";
|
||||
|
||||
let start_args = serde_json::json!({
|
||||
"cmd": "/bin/cat",
|
||||
"yield_time_ms": 500,
|
||||
});
|
||||
let send_args = serde_json::json!({
|
||||
"chars": "hello unified exec\n",
|
||||
"session_id": 0,
|
||||
"yield_time_ms": 500,
|
||||
});
|
||||
let exit_args = serde_json::json!({
|
||||
"chars": "\u{0004}",
|
||||
"session_id": 0,
|
||||
"yield_time_ms": 500,
|
||||
});
|
||||
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(
|
||||
start_call_id,
|
||||
"exec_command",
|
||||
&serde_json::to_string(&start_args)?,
|
||||
),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_response_created("resp-2"),
|
||||
ev_function_call(
|
||||
send_call_id,
|
||||
"write_stdin",
|
||||
&serde_json::to_string(&send_args)?,
|
||||
),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_response_created("resp-3"),
|
||||
ev_function_call(
|
||||
exit_call_id,
|
||||
"write_stdin",
|
||||
&serde_json::to_string(&exit_args)?,
|
||||
),
|
||||
ev_completed("resp-3"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "all done"),
|
||||
ev_completed("resp-4"),
|
||||
]),
|
||||
];
|
||||
mount_sse_sequence(&server, responses).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "test write_stdin exit behavior".into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let requests = server.received_requests().await.expect("recorded requests");
|
||||
assert!(!requests.is_empty(), "expected at least one POST request");
|
||||
|
||||
let bodies = requests
|
||||
.iter()
|
||||
.map(|req| req.body_json::<Value>().expect("request json"))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let outputs = collect_tool_outputs(&bodies)?;
|
||||
|
||||
let start_output = outputs
|
||||
.get(start_call_id)
|
||||
.expect("missing start output for exec_command");
|
||||
let session_id = start_output
|
||||
.get("session_id")
|
||||
.and_then(Value::as_i64)
|
||||
.expect("expected session id from exec_command");
|
||||
assert!(
|
||||
session_id >= 0,
|
||||
"session_id should be non-negative, got {session_id}"
|
||||
);
|
||||
assert!(
|
||||
start_output.get("exit_code").is_none(),
|
||||
"initial exec_command should not include exit_code while session is running"
|
||||
);
|
||||
|
||||
let send_output = outputs
|
||||
.get(send_call_id)
|
||||
.expect("missing write_stdin echo output");
|
||||
let echoed = send_output
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.unwrap_or_default();
|
||||
assert!(
|
||||
echoed.contains("hello unified exec"),
|
||||
"expected echoed output from cat, got {echoed:?}"
|
||||
);
|
||||
let echoed_session = send_output
|
||||
.get("session_id")
|
||||
.and_then(Value::as_i64)
|
||||
.expect("write_stdin should return session id while process is running");
|
||||
assert_eq!(
|
||||
echoed_session, session_id,
|
||||
"write_stdin should reuse existing session id"
|
||||
);
|
||||
assert!(
|
||||
send_output.get("exit_code").is_none(),
|
||||
"write_stdin should not include exit_code while process is running"
|
||||
);
|
||||
|
||||
let exit_output = outputs
|
||||
.get(exit_call_id)
|
||||
.expect("missing exit metadata output");
|
||||
assert!(
|
||||
exit_output.get("session_id").is_none(),
|
||||
"session_id should be omitted once the process exits"
|
||||
);
|
||||
let exit_code = exit_output
|
||||
.get("exit_code")
|
||||
.and_then(Value::as_i64)
|
||||
.expect("expected exit_code after sending EOF");
|
||||
assert_eq!(exit_code, 0, "cat should exit cleanly after EOF");
|
||||
|
||||
let exit_chunk = exit_output
|
||||
.get("chunk_id")
|
||||
.and_then(Value::as_str)
|
||||
.expect("missing chunk id for exit output");
|
||||
assert!(
|
||||
exit_chunk.chars().all(|c| c.is_ascii_hexdigit()),
|
||||
"chunk id should be hexadecimal: {exit_chunk}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn unified_exec_emits_end_event_when_session_dies_via_stdin() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
skip_if_sandbox!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.use_experimental_unified_exec_tool = true;
|
||||
config.features.enable(Feature::UnifiedExec);
|
||||
});
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
session_configured,
|
||||
..
|
||||
} = builder.build(&server).await?;
|
||||
|
||||
let start_call_id = "uexec-end-on-exit-start";
|
||||
let start_args = serde_json::json!({
|
||||
"cmd": "/bin/cat",
|
||||
"yield_time_ms": 200,
|
||||
});
|
||||
|
||||
let echo_call_id = "uexec-end-on-exit-echo";
|
||||
let echo_args = serde_json::json!({
|
||||
"chars": "bye-END\n",
|
||||
"session_id": 0,
|
||||
"yield_time_ms": 300,
|
||||
});
|
||||
|
||||
let exit_call_id = "uexec-end-on-exit";
|
||||
let exit_args = serde_json::json!({
|
||||
"chars": "\u{0004}",
|
||||
"session_id": 0,
|
||||
"yield_time_ms": 500,
|
||||
});
|
||||
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(
|
||||
start_call_id,
|
||||
"exec_command",
|
||||
&serde_json::to_string(&start_args)?,
|
||||
),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_response_created("resp-2"),
|
||||
ev_function_call(
|
||||
echo_call_id,
|
||||
"write_stdin",
|
||||
&serde_json::to_string(&echo_args)?,
|
||||
),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_response_created("resp-3"),
|
||||
ev_function_call(
|
||||
exit_call_id,
|
||||
"write_stdin",
|
||||
&serde_json::to_string(&exit_args)?,
|
||||
),
|
||||
ev_completed("resp-3"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_response_created("resp-4"),
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-4"),
|
||||
]),
|
||||
];
|
||||
mount_sse_sequence(&server, responses).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "end on exit".into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
// We expect the ExecCommandEnd event to match the initial exec_command call_id.
|
||||
let end_event = wait_for_event_match(&codex, |msg| match msg {
|
||||
EventMsg::ExecCommandEnd(ev) if ev.call_id == start_call_id => Some(ev.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(end_event.exit_code, 0);
|
||||
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn unified_exec_reuses_session_via_stdin() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
@@ -255,15 +895,15 @@ async fn unified_exec_reuses_session_via_stdin() -> Result<()> {
|
||||
|
||||
let first_call_id = "uexec-start";
|
||||
let first_args = serde_json::json!({
|
||||
"input": ["/bin/cat"],
|
||||
"timeout_ms": 200,
|
||||
"cmd": "/bin/cat",
|
||||
"yield_time_ms": 200,
|
||||
});
|
||||
|
||||
let second_call_id = "uexec-stdin";
|
||||
let second_args = serde_json::json!({
|
||||
"input": ["hello unified exec\n"],
|
||||
"session_id": "0",
|
||||
"timeout_ms": 500,
|
||||
"chars": "hello unified exec\n",
|
||||
"session_id": 0,
|
||||
"yield_time_ms": 500,
|
||||
});
|
||||
|
||||
let responses = vec![
|
||||
@@ -271,7 +911,7 @@ async fn unified_exec_reuses_session_via_stdin() -> Result<()> {
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(
|
||||
first_call_id,
|
||||
"unified_exec",
|
||||
"exec_command",
|
||||
&serde_json::to_string(&first_args)?,
|
||||
),
|
||||
ev_completed("resp-1"),
|
||||
@@ -280,7 +920,7 @@ async fn unified_exec_reuses_session_via_stdin() -> Result<()> {
|
||||
ev_response_created("resp-2"),
|
||||
ev_function_call(
|
||||
second_call_id,
|
||||
"unified_exec",
|
||||
"write_stdin",
|
||||
&serde_json::to_string(&second_args)?,
|
||||
),
|
||||
ev_completed("resp-2"),
|
||||
@@ -324,9 +964,9 @@ async fn unified_exec_reuses_session_via_stdin() -> Result<()> {
|
||||
let start_output = outputs
|
||||
.get(first_call_id)
|
||||
.expect("missing first unified_exec output");
|
||||
let session_id = start_output["session_id"].as_str().unwrap_or_default();
|
||||
let session_id = start_output["session_id"].as_i64().unwrap_or_default();
|
||||
assert!(
|
||||
!session_id.is_empty(),
|
||||
session_id >= 0,
|
||||
"expected session id in first unified_exec response"
|
||||
);
|
||||
assert!(
|
||||
@@ -340,7 +980,7 @@ async fn unified_exec_reuses_session_via_stdin() -> Result<()> {
|
||||
.get(second_call_id)
|
||||
.expect("missing reused unified_exec output");
|
||||
assert_eq!(
|
||||
reuse_output["session_id"].as_str().unwrap_or_default(),
|
||||
reuse_output["session_id"].as_i64().unwrap_or_default(),
|
||||
session_id
|
||||
);
|
||||
let echoed = reuse_output["output"].as_str().unwrap_or_default();
|
||||
@@ -391,15 +1031,15 @@ PY
|
||||
|
||||
let first_call_id = "uexec-lag-start";
|
||||
let first_args = serde_json::json!({
|
||||
"input": ["/bin/sh", "-c", script],
|
||||
"timeout_ms": 25,
|
||||
"cmd": script,
|
||||
"yield_time_ms": 25,
|
||||
});
|
||||
|
||||
let second_call_id = "uexec-lag-poll";
|
||||
let second_args = serde_json::json!({
|
||||
"input": Vec::<String>::new(),
|
||||
"session_id": "0",
|
||||
"timeout_ms": 2_000,
|
||||
"chars": "",
|
||||
"session_id": 0,
|
||||
"yield_time_ms": 2_000,
|
||||
});
|
||||
|
||||
let responses = vec![
|
||||
@@ -407,7 +1047,7 @@ PY
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(
|
||||
first_call_id,
|
||||
"unified_exec",
|
||||
"exec_command",
|
||||
&serde_json::to_string(&first_args)?,
|
||||
),
|
||||
ev_completed("resp-1"),
|
||||
@@ -416,7 +1056,7 @@ PY
|
||||
ev_response_created("resp-2"),
|
||||
ev_function_call(
|
||||
second_call_id,
|
||||
"unified_exec",
|
||||
"write_stdin",
|
||||
&serde_json::to_string(&second_args)?,
|
||||
),
|
||||
ev_completed("resp-2"),
|
||||
@@ -460,9 +1100,9 @@ PY
|
||||
let start_output = outputs
|
||||
.get(first_call_id)
|
||||
.expect("missing initial unified_exec output");
|
||||
let session_id = start_output["session_id"].as_str().unwrap_or_default();
|
||||
let session_id = start_output["session_id"].as_i64().unwrap_or_default();
|
||||
assert!(
|
||||
!session_id.is_empty(),
|
||||
session_id >= 0,
|
||||
"expected session id from initial unified_exec response"
|
||||
);
|
||||
|
||||
@@ -497,15 +1137,15 @@ async fn unified_exec_timeout_and_followup_poll() -> Result<()> {
|
||||
|
||||
let first_call_id = "uexec-timeout";
|
||||
let first_args = serde_json::json!({
|
||||
"input": ["/bin/sh", "-c", "sleep 0.1; echo ready"],
|
||||
"timeout_ms": 10,
|
||||
"cmd": "sleep 0.5; echo ready",
|
||||
"yield_time_ms": 10,
|
||||
});
|
||||
|
||||
let second_call_id = "uexec-poll";
|
||||
let second_args = serde_json::json!({
|
||||
"input": Vec::<String>::new(),
|
||||
"session_id": "0",
|
||||
"timeout_ms": 800,
|
||||
"chars": "",
|
||||
"session_id": 0,
|
||||
"yield_time_ms": 800,
|
||||
});
|
||||
|
||||
let responses = vec![
|
||||
@@ -513,7 +1153,7 @@ async fn unified_exec_timeout_and_followup_poll() -> Result<()> {
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(
|
||||
first_call_id,
|
||||
"unified_exec",
|
||||
"exec_command",
|
||||
&serde_json::to_string(&first_args)?,
|
||||
),
|
||||
ev_completed("resp-1"),
|
||||
@@ -522,7 +1162,7 @@ async fn unified_exec_timeout_and_followup_poll() -> Result<()> {
|
||||
ev_response_created("resp-2"),
|
||||
ev_function_call(
|
||||
second_call_id,
|
||||
"unified_exec",
|
||||
"write_stdin",
|
||||
&serde_json::to_string(&second_args)?,
|
||||
),
|
||||
ev_completed("resp-2"),
|
||||
@@ -569,7 +1209,7 @@ async fn unified_exec_timeout_and_followup_poll() -> Result<()> {
|
||||
let outputs = collect_tool_outputs(&bodies)?;
|
||||
|
||||
let first_output = outputs.get(first_call_id).expect("missing timeout output");
|
||||
assert_eq!(first_output["session_id"], "0");
|
||||
assert_eq!(first_output["session_id"], 0);
|
||||
assert!(
|
||||
first_output["output"]
|
||||
.as_str()
|
||||
|
||||
@@ -18,7 +18,6 @@ use codex_core::NewConversation;
|
||||
use codex_core::auth::enforce_login_restrictions;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::config::ConfigOverrides;
|
||||
use codex_core::features::Feature;
|
||||
use codex_core::git_info::get_git_repo_root;
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::Event;
|
||||
@@ -192,7 +191,6 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
|
||||
};
|
||||
|
||||
let config = Config::load_with_cli_overrides(cli_kv_overrides, overrides).await?;
|
||||
let approve_all_enabled = config.features.enabled(Feature::ApproveAll);
|
||||
|
||||
if let Err(err) = enforce_login_restrictions(&config).await {
|
||||
eprintln!("{err}");
|
||||
@@ -366,34 +364,6 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
|
||||
if matches!(event.msg, EventMsg::Error(_)) {
|
||||
error_seen = true;
|
||||
}
|
||||
// Auto-approve requests when the approve_all feature is enabled.
|
||||
if approve_all_enabled {
|
||||
match &event.msg {
|
||||
EventMsg::ExecApprovalRequest(_) => {
|
||||
if let Err(e) = conversation
|
||||
.submit(Op::ExecApproval {
|
||||
id: event.id.clone(),
|
||||
decision: codex_core::protocol::ReviewDecision::Approved,
|
||||
})
|
||||
.await
|
||||
{
|
||||
error!("failed to auto-approve exec: {e}");
|
||||
}
|
||||
}
|
||||
EventMsg::ApplyPatchApprovalRequest(_) => {
|
||||
if let Err(e) = conversation
|
||||
.submit(Op::PatchApproval {
|
||||
id: event.id.clone(),
|
||||
decision: codex_core::protocol::ReviewDecision::Approved,
|
||||
})
|
||||
.await
|
||||
{
|
||||
error!("failed to auto-approve patch: {e}");
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
let shutdown: CodexStatus = event_processor.process_event(event);
|
||||
match shutdown {
|
||||
CodexStatus::Running => continue,
|
||||
|
||||
@@ -1,81 +0,0 @@
|
||||
#![cfg(not(target_os = "windows"))]
|
||||
#![allow(clippy::expect_used, clippy::unwrap_used)]
|
||||
|
||||
use anyhow::Result;
|
||||
use core_test_support::responses;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_function_call;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
use core_test_support::responses::mount_sse_sequence;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex_exec::test_codex_exec;
|
||||
use serde_json::Value;
|
||||
use serde_json::json;
|
||||
|
||||
async fn run_exec_with_args(args: &[&str]) -> Result<String> {
|
||||
let test = test_codex_exec();
|
||||
|
||||
let call_id = "exec-approve";
|
||||
let exec_args = json!({
|
||||
"command": [
|
||||
if cfg!(windows) { "cmd.exe" } else { "/bin/sh" },
|
||||
if cfg!(windows) { "/C" } else { "-lc" },
|
||||
"echo approve-all-ok",
|
||||
],
|
||||
"timeout_ms": 1500,
|
||||
"with_escalated_permissions": true
|
||||
});
|
||||
|
||||
let response_streams = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&exec_args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let mock = mount_sse_sequence(&server, response_streams).await;
|
||||
|
||||
test.cmd_with_server(&server).args(args).assert().success();
|
||||
|
||||
let requests = mock.requests();
|
||||
assert!(requests.len() >= 2, "expected at least two responses POSTs");
|
||||
let item = requests[1].function_call_output(call_id);
|
||||
let output_str = item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("function_call_output.output should be a string");
|
||||
|
||||
Ok(output_str.to_string())
|
||||
}
|
||||
|
||||
/// Setting `features.approve_all=true` should switch to auto-approvals.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn approve_all_auto_accepts_exec() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let output = run_exec_with_args(&[
|
||||
"--skip-git-repo-check",
|
||||
"-c",
|
||||
"features.approve_all=true",
|
||||
"train",
|
||||
])
|
||||
.await?;
|
||||
assert!(
|
||||
output.contains("Exit code: 0"),
|
||||
"expected Exit code: 0 in output: {output}"
|
||||
);
|
||||
assert!(
|
||||
output.contains("approve-all-ok"),
|
||||
"expected command output in response: {output}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,6 +1,5 @@
|
||||
// Aggregates all former standalone integration tests as modules.
|
||||
mod apply_patch;
|
||||
mod approve_all;
|
||||
mod auth_env;
|
||||
mod originator;
|
||||
mod output_schema;
|
||||
|
||||
@@ -12,7 +12,7 @@ use anyhow::anyhow;
|
||||
use codex_protocol::ConversationId;
|
||||
use tracing_subscriber::fmt::writer::MakeWriter;
|
||||
|
||||
const DEFAULT_MAX_BYTES: usize = 2 * 1024 * 1024; // 2 MiB
|
||||
const DEFAULT_MAX_BYTES: usize = 4 * 1024 * 1024; // 4 MiB
|
||||
const SENTRY_DSN: &str =
|
||||
"https://ae32ed50620d7a7792c1ce5df38b3e3e@o33249.ingest.us.sentry.io/4510195390611458";
|
||||
const UPLOAD_TIMEOUT_SECS: u64 = 10;
|
||||
|
||||
@@ -105,6 +105,7 @@ pub async fn run_main<T: Reporter>(
|
||||
threads,
|
||||
cancel_flag,
|
||||
compute_indices,
|
||||
true,
|
||||
)?;
|
||||
let match_count = matches.len();
|
||||
let matches_truncated = total_match_count > match_count;
|
||||
@@ -121,6 +122,7 @@ pub async fn run_main<T: Reporter>(
|
||||
|
||||
/// The worker threads will periodically check `cancel_flag` to see if they
|
||||
/// should stop processing files.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn run(
|
||||
pattern_text: &str,
|
||||
limit: NonZero<usize>,
|
||||
@@ -129,6 +131,7 @@ pub fn run(
|
||||
threads: NonZero<usize>,
|
||||
cancel_flag: Arc<AtomicBool>,
|
||||
compute_indices: bool,
|
||||
respect_gitignore: bool,
|
||||
) -> anyhow::Result<FileSearchResults> {
|
||||
let pattern = create_pattern(pattern_text);
|
||||
// Create one BestMatchesList per worker thread so that each worker can
|
||||
@@ -157,6 +160,14 @@ pub fn run(
|
||||
.hidden(false)
|
||||
// Don't require git to be present to apply to apply git-related ignore rules.
|
||||
.require_git(false);
|
||||
if !respect_gitignore {
|
||||
walk_builder
|
||||
.git_ignore(false)
|
||||
.git_global(false)
|
||||
.git_exclude(false)
|
||||
.ignore(false)
|
||||
.parents(false);
|
||||
}
|
||||
|
||||
if !exclude.is_empty() {
|
||||
let mut override_builder = OverrideBuilder::new(search_directory);
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
[package]
|
||||
name = "codex-mcp-client"
|
||||
version = { workspace = true }
|
||||
edition = "2024"
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = { workspace = true }
|
||||
mcp-types = { workspace = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
serde_json = { workspace = true }
|
||||
tracing = { workspace = true, features = ["log"] }
|
||||
tracing-subscriber = { workspace = true, features = ["fmt", "env-filter"] }
|
||||
tokio = { workspace = true, features = [
|
||||
"io-util",
|
||||
"macros",
|
||||
"process",
|
||||
"rt-multi-thread",
|
||||
"sync",
|
||||
"time",
|
||||
] }
|
||||
@@ -1,3 +0,0 @@
|
||||
mod mcp_client;
|
||||
|
||||
pub use mcp_client::McpClient;
|
||||
@@ -1,88 +0,0 @@
|
||||
//! Simple command-line utility to exercise `McpClient`.
|
||||
//!
|
||||
//! Example usage:
|
||||
//!
|
||||
//! ```bash
|
||||
//! cargo run -p codex-mcp-client -- `codex-mcp-server`
|
||||
//! ```
|
||||
//!
|
||||
//! Any additional arguments after the first one are forwarded to the spawned
|
||||
//! program. The utility connects, issues a `tools/list` request and prints the
|
||||
//! server's response as pretty JSON.
|
||||
|
||||
use std::ffi::OsString;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use codex_mcp_client::McpClient;
|
||||
use mcp_types::ClientCapabilities;
|
||||
use mcp_types::Implementation;
|
||||
use mcp_types::InitializeRequestParams;
|
||||
use mcp_types::ListToolsRequestParams;
|
||||
use mcp_types::MCP_SCHEMA_VERSION;
|
||||
use tracing_subscriber::EnvFilter;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
let default_level = "debug";
|
||||
let _ = tracing_subscriber::fmt()
|
||||
// Fallback to the `default_level` log filter if the environment
|
||||
// variable is not set _or_ contains an invalid value
|
||||
.with_env_filter(
|
||||
EnvFilter::try_from_default_env()
|
||||
.or_else(|_| EnvFilter::try_new(default_level))
|
||||
.unwrap_or_else(|_| EnvFilter::new(default_level)),
|
||||
)
|
||||
.with_writer(std::io::stderr)
|
||||
.try_init();
|
||||
|
||||
// Collect command-line arguments excluding the program name itself.
|
||||
let mut args: Vec<OsString> = std::env::args_os().skip(1).collect();
|
||||
|
||||
if args.is_empty() || args[0] == "--help" || args[0] == "-h" {
|
||||
eprintln!("Usage: mcp-client <program> [args..]\n\nExample: mcp-client codex-mcp-server");
|
||||
std::process::exit(1);
|
||||
}
|
||||
let original_args = args.clone();
|
||||
|
||||
// Spawn the subprocess and connect the client.
|
||||
let program = args.remove(0);
|
||||
let env = None;
|
||||
let client = McpClient::new_stdio_client(program, args, env, &[], None)
|
||||
.await
|
||||
.with_context(|| format!("failed to spawn subprocess: {original_args:?}"))?;
|
||||
|
||||
let params = InitializeRequestParams {
|
||||
capabilities: ClientCapabilities {
|
||||
experimental: None,
|
||||
roots: None,
|
||||
sampling: None,
|
||||
elicitation: None,
|
||||
},
|
||||
client_info: Implementation {
|
||||
name: "codex-mcp-client".to_owned(),
|
||||
version: env!("CARGO_PKG_VERSION").to_owned(),
|
||||
title: Some("Codex".to_string()),
|
||||
// This field is used by Codex when it is an MCP server: it should
|
||||
// not be used when Codex is an MCP client.
|
||||
user_agent: None,
|
||||
},
|
||||
protocol_version: MCP_SCHEMA_VERSION.to_owned(),
|
||||
};
|
||||
let timeout = Some(Duration::from_secs(10));
|
||||
let response = client.initialize(params, timeout).await?;
|
||||
eprintln!("initialize response: {response:?}");
|
||||
|
||||
// Issue `tools/list` request (no params).
|
||||
let timeout = None;
|
||||
let tools = client
|
||||
.list_tools(None::<ListToolsRequestParams>, timeout)
|
||||
.await
|
||||
.context("tools/list request failed")?;
|
||||
|
||||
// Print the result in a human readable form.
|
||||
println!("{}", serde_json::to_string_pretty(&tools)?);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,509 +0,0 @@
|
||||
//! A minimal async client for the Model Context Protocol (MCP).
|
||||
//!
|
||||
//! The client is intentionally lightweight – it is only capable of:
|
||||
//! 1. Spawning a subprocess that launches a conforming MCP server that
|
||||
//! communicates over stdio.
|
||||
//! 2. Sending MCP requests and pairing them with their corresponding
|
||||
//! responses.
|
||||
//! 3. Offering a convenience helper for the common `tools/list` request.
|
||||
//!
|
||||
//! The crate hides all JSON‐RPC framing details behind a typed API. Users
|
||||
//! interact with the [`ModelContextProtocolRequest`] trait from `mcp-types` to
|
||||
//! issue requests and receive strongly-typed results.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::ffi::OsString;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::AtomicI64;
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use anyhow::anyhow;
|
||||
use mcp_types::CallToolRequest;
|
||||
use mcp_types::CallToolRequestParams;
|
||||
use mcp_types::InitializeRequest;
|
||||
use mcp_types::InitializeRequestParams;
|
||||
use mcp_types::InitializedNotification;
|
||||
use mcp_types::JSONRPC_VERSION;
|
||||
use mcp_types::JSONRPCMessage;
|
||||
use mcp_types::JSONRPCNotification;
|
||||
use mcp_types::JSONRPCRequest;
|
||||
use mcp_types::JSONRPCResponse;
|
||||
use mcp_types::ListToolsRequest;
|
||||
use mcp_types::ListToolsRequestParams;
|
||||
use mcp_types::ListToolsResult;
|
||||
use mcp_types::ModelContextProtocolNotification;
|
||||
use mcp_types::ModelContextProtocolRequest;
|
||||
use mcp_types::RequestId;
|
||||
use serde::Serialize;
|
||||
use serde::de::DeserializeOwned;
|
||||
use tokio::io::AsyncBufReadExt;
|
||||
use tokio::io::AsyncWriteExt;
|
||||
use tokio::io::BufReader;
|
||||
use tokio::process::Command;
|
||||
use tokio::sync::Mutex;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::sync::oneshot;
|
||||
use tokio::time;
|
||||
use tracing::debug;
|
||||
use tracing::error;
|
||||
use tracing::info;
|
||||
use tracing::warn;
|
||||
|
||||
/// Capacity of the bounded channels used for transporting messages between the
|
||||
/// client API and the IO tasks.
|
||||
const CHANNEL_CAPACITY: usize = 128;
|
||||
|
||||
/// Internal representation of a pending request sender.
|
||||
type PendingSender = oneshot::Sender<JSONRPCMessage>;
|
||||
|
||||
/// A running MCP client instance.
|
||||
pub struct McpClient {
|
||||
/// Retain this child process until the client is dropped. The Tokio runtime
|
||||
/// will make a "best effort" to reap the process after it exits, but it is
|
||||
/// not a guarantee. See the `kill_on_drop` documentation for details.
|
||||
#[allow(dead_code)]
|
||||
child: tokio::process::Child,
|
||||
|
||||
/// Channel for sending JSON-RPC messages *to* the background writer task.
|
||||
outgoing_tx: mpsc::Sender<JSONRPCMessage>,
|
||||
|
||||
/// Map of `request.id -> oneshot::Sender` used to dispatch responses back
|
||||
/// to the originating caller.
|
||||
pending: Arc<Mutex<HashMap<i64, PendingSender>>>,
|
||||
|
||||
/// Monotonically increasing counter used to generate request IDs.
|
||||
id_counter: AtomicI64,
|
||||
}
|
||||
|
||||
impl McpClient {
|
||||
/// Spawn the given command and establish an MCP session over its STDIO.
|
||||
/// Caller is responsible for sending the `initialize` request. See
|
||||
/// [`initialize`](Self::initialize) for details.
|
||||
pub async fn new_stdio_client(
|
||||
program: OsString,
|
||||
args: Vec<OsString>,
|
||||
env: Option<HashMap<String, String>>,
|
||||
env_vars: &[String],
|
||||
cwd: Option<PathBuf>,
|
||||
) -> std::io::Result<Self> {
|
||||
let mut command = Command::new(program);
|
||||
command
|
||||
.args(args)
|
||||
.env_clear()
|
||||
.envs(create_env_for_mcp_server(env, env_vars))
|
||||
.stdin(std::process::Stdio::piped())
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::null())
|
||||
// As noted in the `kill_on_drop` documentation, the Tokio runtime makes
|
||||
// a "best effort" to reap-after-exit to avoid zombie processes, but it
|
||||
// is not a guarantee.
|
||||
.kill_on_drop(true);
|
||||
if let Some(cwd) = cwd {
|
||||
command.current_dir(cwd);
|
||||
}
|
||||
|
||||
let mut child = command.spawn()?;
|
||||
|
||||
let stdin = child
|
||||
.stdin
|
||||
.take()
|
||||
.ok_or_else(|| std::io::Error::other("failed to capture child stdin"))?;
|
||||
let stdout = child
|
||||
.stdout
|
||||
.take()
|
||||
.ok_or_else(|| std::io::Error::other("failed to capture child stdout"))?;
|
||||
|
||||
let (outgoing_tx, mut outgoing_rx) = mpsc::channel::<JSONRPCMessage>(CHANNEL_CAPACITY);
|
||||
let pending: Arc<Mutex<HashMap<i64, PendingSender>>> = Arc::new(Mutex::new(HashMap::new()));
|
||||
|
||||
// Spawn writer task. It listens on the `outgoing_rx` channel and
|
||||
// writes messages to the child's STDIN.
|
||||
let writer_handle = {
|
||||
let mut stdin = stdin;
|
||||
tokio::spawn(async move {
|
||||
while let Some(msg) = outgoing_rx.recv().await {
|
||||
match serde_json::to_string(&msg) {
|
||||
Ok(json) => {
|
||||
debug!("MCP message to server: {json}");
|
||||
if stdin.write_all(json.as_bytes()).await.is_err() {
|
||||
error!("failed to write message to child stdin");
|
||||
break;
|
||||
}
|
||||
if stdin.write_all(b"\n").await.is_err() {
|
||||
error!("failed to write newline to child stdin");
|
||||
break;
|
||||
}
|
||||
// No explicit flush needed on a pipe; write_all is sufficient.
|
||||
}
|
||||
Err(e) => error!("failed to serialize JSONRPCMessage: {e}"),
|
||||
}
|
||||
}
|
||||
})
|
||||
};
|
||||
|
||||
// Spawn reader task. It reads line-delimited JSON from the child's
|
||||
// STDOUT and dispatches responses to the pending map.
|
||||
let reader_handle = {
|
||||
let pending = pending.clone();
|
||||
let mut lines = BufReader::new(stdout).lines();
|
||||
|
||||
tokio::spawn(async move {
|
||||
while let Ok(Some(line)) = lines.next_line().await {
|
||||
debug!("MCP message from server: {line}");
|
||||
match serde_json::from_str::<JSONRPCMessage>(&line) {
|
||||
Ok(JSONRPCMessage::Response(resp)) => {
|
||||
Self::dispatch_response(resp, &pending).await;
|
||||
}
|
||||
Ok(JSONRPCMessage::Error(err)) => {
|
||||
Self::dispatch_error(err, &pending).await;
|
||||
}
|
||||
Ok(JSONRPCMessage::Notification(JSONRPCNotification { .. })) => {
|
||||
// For now we only log server-initiated notifications.
|
||||
info!("<- notification: {}", line);
|
||||
}
|
||||
Ok(other) => {
|
||||
// Batch responses and requests are currently not
|
||||
// expected from the server – log and ignore.
|
||||
info!("<- unhandled message: {:?}", other);
|
||||
}
|
||||
Err(e) => {
|
||||
error!("failed to deserialize JSONRPCMessage: {e}; line = {}", line)
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
};
|
||||
|
||||
// We intentionally *detach* the tasks. They will keep running in the
|
||||
// background as long as their respective resources (channels/stdin/
|
||||
// stdout) are alive. Dropping `McpClient` cancels the tasks due to
|
||||
// dropped resources.
|
||||
let _ = (writer_handle, reader_handle);
|
||||
|
||||
Ok(Self {
|
||||
child,
|
||||
outgoing_tx,
|
||||
pending,
|
||||
id_counter: AtomicI64::new(1),
|
||||
})
|
||||
}
|
||||
|
||||
/// Send an arbitrary MCP request and await the typed result.
|
||||
///
|
||||
/// If `timeout` is `None` the call waits indefinitely. If `Some(duration)`
|
||||
/// is supplied and no response is received within the given period, a
|
||||
/// timeout error is returned.
|
||||
pub async fn send_request<R>(
|
||||
&self,
|
||||
params: R::Params,
|
||||
timeout: Option<Duration>,
|
||||
) -> Result<R::Result>
|
||||
where
|
||||
R: ModelContextProtocolRequest,
|
||||
R::Params: Serialize,
|
||||
R::Result: DeserializeOwned,
|
||||
{
|
||||
// Create a new unique ID.
|
||||
let id = self.id_counter.fetch_add(1, Ordering::SeqCst);
|
||||
let request_id = RequestId::Integer(id);
|
||||
|
||||
// Serialize params -> JSON. For many request types `Params` is
|
||||
// `Option<T>` and `None` should be encoded as *absence* of the field.
|
||||
let params_json = serde_json::to_value(¶ms)?;
|
||||
let params_field = if params_json.is_null() {
|
||||
None
|
||||
} else {
|
||||
Some(params_json)
|
||||
};
|
||||
|
||||
let jsonrpc_request = JSONRPCRequest {
|
||||
id: request_id.clone(),
|
||||
jsonrpc: JSONRPC_VERSION.to_string(),
|
||||
method: R::METHOD.to_string(),
|
||||
params: params_field,
|
||||
};
|
||||
|
||||
let message = JSONRPCMessage::Request(jsonrpc_request);
|
||||
|
||||
// oneshot channel for the response.
|
||||
let (tx, rx) = oneshot::channel();
|
||||
|
||||
// Register in pending map *before* sending the message so a race where
|
||||
// the response arrives immediately cannot be lost.
|
||||
{
|
||||
let mut guard = self.pending.lock().await;
|
||||
guard.insert(id, tx);
|
||||
}
|
||||
|
||||
// Send to writer task.
|
||||
if self.outgoing_tx.send(message).await.is_err() {
|
||||
return Err(anyhow!(
|
||||
"failed to send message to writer task - channel closed"
|
||||
));
|
||||
}
|
||||
|
||||
// Await the response, optionally bounded by a timeout.
|
||||
let msg = match timeout {
|
||||
Some(duration) => {
|
||||
match time::timeout(duration, rx).await {
|
||||
Ok(Ok(msg)) => msg,
|
||||
Ok(Err(_)) => {
|
||||
// Channel closed without a reply – remove the pending entry.
|
||||
let mut guard = self.pending.lock().await;
|
||||
guard.remove(&id);
|
||||
return Err(anyhow!(
|
||||
"response channel closed before a reply was received"
|
||||
));
|
||||
}
|
||||
Err(_) => {
|
||||
// Timed out. Remove the pending entry so we don't leak.
|
||||
let mut guard = self.pending.lock().await;
|
||||
guard.remove(&id);
|
||||
return Err(anyhow!("request timed out"));
|
||||
}
|
||||
}
|
||||
}
|
||||
None => rx
|
||||
.await
|
||||
.map_err(|_| anyhow!("response channel closed before a reply was received"))?,
|
||||
};
|
||||
|
||||
match msg {
|
||||
JSONRPCMessage::Response(JSONRPCResponse { result, .. }) => {
|
||||
let typed: R::Result = serde_json::from_value(result)?;
|
||||
Ok(typed)
|
||||
}
|
||||
JSONRPCMessage::Error(err) => Err(anyhow!(format!(
|
||||
"server returned JSON-RPC error: code = {}, message = {}",
|
||||
err.error.code, err.error.message
|
||||
))),
|
||||
other => Err(anyhow!(format!(
|
||||
"unexpected message variant received in reply path: {other:?}"
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn send_notification<N>(&self, params: N::Params) -> Result<()>
|
||||
where
|
||||
N: ModelContextProtocolNotification,
|
||||
N::Params: Serialize,
|
||||
{
|
||||
// Serialize params -> JSON. For many request types `Params` is
|
||||
// `Option<T>` and `None` should be encoded as *absence* of the field.
|
||||
let params_json = serde_json::to_value(¶ms)?;
|
||||
let params_field = if params_json.is_null() {
|
||||
None
|
||||
} else {
|
||||
Some(params_json)
|
||||
};
|
||||
|
||||
let method = N::METHOD.to_string();
|
||||
let jsonrpc_notification = JSONRPCNotification {
|
||||
jsonrpc: JSONRPC_VERSION.to_string(),
|
||||
method: method.clone(),
|
||||
params: params_field,
|
||||
};
|
||||
|
||||
let notification = JSONRPCMessage::Notification(jsonrpc_notification);
|
||||
self.outgoing_tx
|
||||
.send(notification)
|
||||
.await
|
||||
.with_context(|| format!("failed to send notification `{method}` to writer task"))
|
||||
}
|
||||
|
||||
/// Negotiates the initialization with the MCP server. Sends an `initialize`
|
||||
/// request with the specified `initialize_params` and then the
|
||||
/// `notifications/initialized` notification once the response has been
|
||||
/// received. Returns the response to the `initialize` request.
|
||||
pub async fn initialize(
|
||||
&self,
|
||||
initialize_params: InitializeRequestParams,
|
||||
timeout: Option<Duration>,
|
||||
) -> Result<mcp_types::InitializeResult> {
|
||||
let response = self
|
||||
.send_request::<InitializeRequest>(initialize_params, timeout)
|
||||
.await?;
|
||||
self.send_notification::<InitializedNotification>(None)
|
||||
.await?;
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
/// Convenience wrapper around `tools/list`.
|
||||
pub async fn list_tools(
|
||||
&self,
|
||||
params: Option<ListToolsRequestParams>,
|
||||
timeout: Option<Duration>,
|
||||
) -> Result<ListToolsResult> {
|
||||
self.send_request::<ListToolsRequest>(params, timeout).await
|
||||
}
|
||||
|
||||
/// Convenience wrapper around `tools/call`.
|
||||
pub async fn call_tool(
|
||||
&self,
|
||||
name: String,
|
||||
arguments: Option<serde_json::Value>,
|
||||
timeout: Option<Duration>,
|
||||
) -> Result<mcp_types::CallToolResult> {
|
||||
let params = CallToolRequestParams { name, arguments };
|
||||
debug!("MCP tool call: {params:?}");
|
||||
self.send_request::<CallToolRequest>(params, timeout).await
|
||||
}
|
||||
|
||||
/// Internal helper: route a JSON-RPC *response* object to the pending map.
|
||||
async fn dispatch_response(
|
||||
resp: JSONRPCResponse,
|
||||
pending: &Arc<Mutex<HashMap<i64, PendingSender>>>,
|
||||
) {
|
||||
let id = match resp.id {
|
||||
RequestId::Integer(i) => i,
|
||||
RequestId::String(_) => {
|
||||
// We only ever generate integer IDs. Receiving a string here
|
||||
// means we will not find a matching entry in `pending`.
|
||||
error!("response with string ID - no matching pending request");
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let tx_opt = {
|
||||
let mut guard = pending.lock().await;
|
||||
guard.remove(&id)
|
||||
};
|
||||
if let Some(tx) = tx_opt {
|
||||
// Ignore send errors – the receiver might have been dropped.
|
||||
let _ = tx.send(JSONRPCMessage::Response(resp));
|
||||
} else {
|
||||
warn!(id, "no pending request found for response");
|
||||
}
|
||||
}
|
||||
|
||||
/// Internal helper: route a JSON-RPC *error* object to the pending map.
|
||||
async fn dispatch_error(
|
||||
err: mcp_types::JSONRPCError,
|
||||
pending: &Arc<Mutex<HashMap<i64, PendingSender>>>,
|
||||
) {
|
||||
let id = match err.id {
|
||||
RequestId::Integer(i) => i,
|
||||
RequestId::String(_) => return, // see comment above
|
||||
};
|
||||
|
||||
let tx_opt = {
|
||||
let mut guard = pending.lock().await;
|
||||
guard.remove(&id)
|
||||
};
|
||||
if let Some(tx) = tx_opt {
|
||||
let _ = tx.send(JSONRPCMessage::Error(err));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for McpClient {
|
||||
fn drop(&mut self) {
|
||||
// Even though we have already tagged this process with
|
||||
// `kill_on_drop(true)` above, this extra check has the benefit of
|
||||
// forcing the process to be reaped immediately if it has already exited
|
||||
// instead of waiting for the Tokio runtime to reap it later.
|
||||
let _ = self.child.try_wait();
|
||||
}
|
||||
}
|
||||
|
||||
/// Environment variables that are always included when spawning a new MCP
|
||||
/// server.
|
||||
#[rustfmt::skip]
|
||||
#[cfg(unix)]
|
||||
const DEFAULT_ENV_VARS: &[&str] = &[
|
||||
// https://modelcontextprotocol.io/docs/tools/debugging#environment-variables
|
||||
// states:
|
||||
//
|
||||
// > MCP servers inherit only a subset of environment variables automatically,
|
||||
// > like `USER`, `HOME`, and `PATH`.
|
||||
//
|
||||
// But it does not fully enumerate the list. Empirically, when spawning a
|
||||
// an MCP server via Claude Desktop on macOS, it reports the following
|
||||
// environment variables:
|
||||
"HOME",
|
||||
"LOGNAME",
|
||||
"PATH",
|
||||
"SHELL",
|
||||
"USER",
|
||||
"__CF_USER_TEXT_ENCODING",
|
||||
|
||||
// Additional environment variables Codex chooses to include by default:
|
||||
"LANG",
|
||||
"LC_ALL",
|
||||
"TERM",
|
||||
"TMPDIR",
|
||||
"TZ",
|
||||
];
|
||||
|
||||
#[cfg(windows)]
|
||||
const DEFAULT_ENV_VARS: &[&str] = &[
|
||||
// TODO: More research is necessary to curate this list.
|
||||
"PATH",
|
||||
"PATHEXT",
|
||||
"USERNAME",
|
||||
"USERDOMAIN",
|
||||
"USERPROFILE",
|
||||
"TEMP",
|
||||
"TMP",
|
||||
];
|
||||
|
||||
/// `extra_env` comes from the config for an entry in `mcp_servers` in
|
||||
/// `config.toml`.
|
||||
fn create_env_for_mcp_server(
|
||||
extra_env: Option<HashMap<String, String>>,
|
||||
env_vars: &[String],
|
||||
) -> HashMap<String, String> {
|
||||
DEFAULT_ENV_VARS
|
||||
.iter()
|
||||
.copied()
|
||||
.chain(env_vars.iter().map(String::as_str))
|
||||
.filter_map(|var| {
|
||||
std::env::var(var)
|
||||
.ok()
|
||||
.map(|value| (var.to_string(), value))
|
||||
})
|
||||
.chain(extra_env.unwrap_or_default())
|
||||
.collect::<HashMap<_, _>>()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn set_env_var(key: &str, value: &str) {
|
||||
unsafe {
|
||||
std::env::set_var(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
fn remove_env_var(key: &str) {
|
||||
unsafe {
|
||||
std::env::remove_var(key);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_env_for_mcp_server() {
|
||||
let env_var = "USER";
|
||||
let env_var_existing_value = std::env::var(env_var).unwrap_or_default();
|
||||
let env_var_new_value = format!("{env_var_existing_value}-extra");
|
||||
let extra_env = HashMap::from([(env_var.to_owned(), env_var_new_value.clone())]);
|
||||
let mcp_server_env = create_env_for_mcp_server(Some(extra_env), &[]);
|
||||
assert!(mcp_server_env.contains_key("PATH"));
|
||||
assert_eq!(Some(&env_var_new_value), mcp_server_env.get(env_var));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_env_for_mcp_server_includes_extra_whitelisted_vars() {
|
||||
let custom_var = "CUSTOM_TEST_VAR";
|
||||
let value = "value".to_string();
|
||||
set_env_var(custom_var, &value);
|
||||
let mcp_server_env = create_env_for_mcp_server(None, &[custom_var.to_string()]);
|
||||
assert_eq!(Some(&value), mcp_server_env.get(custom_var));
|
||||
remove_env_var(custom_var);
|
||||
}
|
||||
}
|
||||
35
codex-rs/protocol/src/account.rs
Normal file
35
codex-rs/protocol/src/account.rs
Normal file
@@ -0,0 +1,35 @@
|
||||
use schemars::JsonSchema;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use ts_rs::TS;
|
||||
|
||||
#[derive(Serialize, Deserialize, Copy, Clone, Debug, PartialEq, Eq, JsonSchema, TS, Default)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
#[ts(rename_all = "lowercase")]
|
||||
pub enum PlanType {
|
||||
#[default]
|
||||
Free,
|
||||
Plus,
|
||||
Pro,
|
||||
Team,
|
||||
Business,
|
||||
Enterprise,
|
||||
Edu,
|
||||
#[serde(other)]
|
||||
Unknown,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Deserialize, Serialize, JsonSchema, TS)]
|
||||
#[serde(tag = "type")]
|
||||
#[ts(tag = "type")]
|
||||
pub enum Account {
|
||||
ApiKey {
|
||||
api_key: String,
|
||||
},
|
||||
#[serde(rename = "chatgpt")]
|
||||
#[ts(rename = "chatgpt")]
|
||||
ChatGpt {
|
||||
email: Option<String>,
|
||||
plan_type: PlanType,
|
||||
},
|
||||
}
|
||||
@@ -1,3 +1,9 @@
|
||||
use crate::protocol::AgentMessageEvent;
|
||||
use crate::protocol::AgentReasoningEvent;
|
||||
use crate::protocol::AgentReasoningRawContentEvent;
|
||||
use crate::protocol::EventMsg;
|
||||
use crate::protocol::UserMessageEvent;
|
||||
use crate::protocol::WebSearchEndEvent;
|
||||
use crate::user_input::UserInput;
|
||||
use schemars::JsonSchema;
|
||||
use serde::Deserialize;
|
||||
@@ -7,6 +13,9 @@ use ts_rs::TS;
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema)]
|
||||
pub enum TurnItem {
|
||||
UserMessage(UserMessageItem),
|
||||
AgentMessage(AgentMessageItem),
|
||||
Reasoning(ReasoningItem),
|
||||
WebSearch(WebSearchItem),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema)]
|
||||
@@ -15,6 +24,31 @@ pub struct UserMessageItem {
|
||||
pub content: Vec<UserInput>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema)]
|
||||
pub enum AgentMessageContent {
|
||||
Text { text: String },
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema)]
|
||||
pub struct AgentMessageItem {
|
||||
pub id: String,
|
||||
pub content: Vec<AgentMessageContent>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema)]
|
||||
pub struct ReasoningItem {
|
||||
pub id: String,
|
||||
pub summary_text: Vec<String>,
|
||||
#[serde(default)]
|
||||
pub raw_content: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema)]
|
||||
pub struct WebSearchItem {
|
||||
pub id: String,
|
||||
pub query: String,
|
||||
}
|
||||
|
||||
impl UserMessageItem {
|
||||
pub fn new(content: &[UserInput]) -> Self {
|
||||
Self {
|
||||
@@ -22,12 +56,104 @@ impl UserMessageItem {
|
||||
content: content.to_vec(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_legacy_event(&self) -> EventMsg {
|
||||
EventMsg::UserMessage(UserMessageEvent {
|
||||
message: self.message(),
|
||||
images: Some(self.image_urls()),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn message(&self) -> String {
|
||||
self.content
|
||||
.iter()
|
||||
.map(|c| match c {
|
||||
UserInput::Text { text } => text.clone(),
|
||||
_ => String::new(),
|
||||
})
|
||||
.collect::<Vec<String>>()
|
||||
.join("")
|
||||
}
|
||||
|
||||
pub fn image_urls(&self) -> Vec<String> {
|
||||
self.content
|
||||
.iter()
|
||||
.filter_map(|c| match c {
|
||||
UserInput::Image { image_url } => Some(image_url.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
impl AgentMessageItem {
|
||||
pub fn new(content: &[AgentMessageContent]) -> Self {
|
||||
Self {
|
||||
id: uuid::Uuid::new_v4().to_string(),
|
||||
content: content.to_vec(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_legacy_events(&self) -> Vec<EventMsg> {
|
||||
self.content
|
||||
.iter()
|
||||
.map(|c| match c {
|
||||
AgentMessageContent::Text { text } => EventMsg::AgentMessage(AgentMessageEvent {
|
||||
message: text.clone(),
|
||||
}),
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
impl ReasoningItem {
|
||||
pub fn as_legacy_events(&self, show_raw_agent_reasoning: bool) -> Vec<EventMsg> {
|
||||
let mut events = Vec::new();
|
||||
for summary in &self.summary_text {
|
||||
events.push(EventMsg::AgentReasoning(AgentReasoningEvent {
|
||||
text: summary.clone(),
|
||||
}));
|
||||
}
|
||||
|
||||
if show_raw_agent_reasoning {
|
||||
for entry in &self.raw_content {
|
||||
events.push(EventMsg::AgentReasoningRawContent(
|
||||
AgentReasoningRawContentEvent {
|
||||
text: entry.clone(),
|
||||
},
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
events
|
||||
}
|
||||
}
|
||||
|
||||
impl WebSearchItem {
|
||||
pub fn as_legacy_event(&self) -> EventMsg {
|
||||
EventMsg::WebSearchEnd(WebSearchEndEvent {
|
||||
call_id: self.id.clone(),
|
||||
query: self.query.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl TurnItem {
|
||||
pub fn id(&self) -> String {
|
||||
match self {
|
||||
TurnItem::UserMessage(item) => item.id.clone(),
|
||||
TurnItem::AgentMessage(item) => item.id.clone(),
|
||||
TurnItem::Reasoning(item) => item.id.clone(),
|
||||
TurnItem::WebSearch(item) => item.id.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_legacy_events(&self, show_raw_agent_reasoning: bool) -> Vec<EventMsg> {
|
||||
match self {
|
||||
TurnItem::UserMessage(item) => vec![item.as_legacy_event()],
|
||||
TurnItem::AgentMessage(item) => item.as_legacy_events(),
|
||||
TurnItem::WebSearch(item) => vec![item.as_legacy_event()],
|
||||
TurnItem::Reasoning(item) => item.as_legacy_events(show_raw_agent_reasoning),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
pub mod account;
|
||||
mod conversation_id;
|
||||
pub use conversation_id::ConversationId;
|
||||
pub mod config_types;
|
||||
|
||||
@@ -770,69 +770,13 @@ pub struct AgentMessageEvent {
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum InputMessageKind {
|
||||
/// Plain user text (default)
|
||||
Plain,
|
||||
/// XML-wrapped user instructions (<user_instructions>...)
|
||||
UserInstructions,
|
||||
/// XML-wrapped environment context (<environment_context>...)
|
||||
EnvironmentContext,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
|
||||
pub struct UserMessageEvent {
|
||||
pub message: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub kind: Option<InputMessageKind>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub images: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
impl<T, U> From<(T, U)> for InputMessageKind
|
||||
where
|
||||
T: AsRef<str>,
|
||||
U: AsRef<str>,
|
||||
{
|
||||
fn from(value: (T, U)) -> Self {
|
||||
let (_role, message) = value;
|
||||
let message = message.as_ref();
|
||||
let trimmed = message.trim();
|
||||
if starts_with_ignore_ascii_case(trimmed, ENVIRONMENT_CONTEXT_OPEN_TAG)
|
||||
&& ends_with_ignore_ascii_case(trimmed, ENVIRONMENT_CONTEXT_CLOSE_TAG)
|
||||
{
|
||||
InputMessageKind::EnvironmentContext
|
||||
} else if starts_with_ignore_ascii_case(trimmed, USER_INSTRUCTIONS_OPEN_TAG)
|
||||
&& ends_with_ignore_ascii_case(trimmed, USER_INSTRUCTIONS_CLOSE_TAG)
|
||||
{
|
||||
InputMessageKind::UserInstructions
|
||||
} else {
|
||||
InputMessageKind::Plain
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn starts_with_ignore_ascii_case(text: &str, prefix: &str) -> bool {
|
||||
let text_bytes = text.as_bytes();
|
||||
let prefix_bytes = prefix.as_bytes();
|
||||
text_bytes.len() >= prefix_bytes.len()
|
||||
&& text_bytes
|
||||
.iter()
|
||||
.zip(prefix_bytes.iter())
|
||||
.all(|(a, b)| a.eq_ignore_ascii_case(b))
|
||||
}
|
||||
|
||||
fn ends_with_ignore_ascii_case(text: &str, suffix: &str) -> bool {
|
||||
let text_bytes = text.as_bytes();
|
||||
let suffix_bytes = suffix.as_bytes();
|
||||
text_bytes.len() >= suffix_bytes.len()
|
||||
&& text_bytes[text_bytes.len() - suffix_bytes.len()..]
|
||||
.iter()
|
||||
.zip(suffix_bytes.iter())
|
||||
.all(|(a, b)| a.eq_ignore_ascii_case(b))
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
|
||||
pub struct AgentMessageDeltaEvent {
|
||||
pub delta: String,
|
||||
|
||||
@@ -37,6 +37,7 @@ pub async fn perform_oauth_login(
|
||||
store_mode: OAuthCredentialsStoreMode,
|
||||
http_headers: Option<HashMap<String, String>>,
|
||||
env_http_headers: Option<HashMap<String, String>>,
|
||||
scopes: &[String],
|
||||
) -> Result<()> {
|
||||
let server = Arc::new(Server::http("127.0.0.1:0").map_err(|err| anyhow!(err))?);
|
||||
let guard = CallbackServerGuard {
|
||||
@@ -61,8 +62,9 @@ pub async fn perform_oauth_login(
|
||||
let http_client = apply_default_headers(ClientBuilder::new(), &default_headers).build()?;
|
||||
|
||||
let mut oauth_state = OAuthState::new(server_url, Some(http_client)).await?;
|
||||
let scope_refs: Vec<&str> = scopes.iter().map(String::as_str).collect();
|
||||
oauth_state
|
||||
.start_authorization(&[], &redirect_uri, Some("Codex"))
|
||||
.start_authorization(&scope_refs, &redirect_uri, Some("Codex"))
|
||||
.await?;
|
||||
let auth_url = oauth_state.get_authorization_url().await?;
|
||||
|
||||
|
||||
@@ -2069,6 +2069,35 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ascii_prefix_survives_non_ascii_followup() {
|
||||
use crossterm::event::KeyCode;
|
||||
use crossterm::event::KeyEvent;
|
||||
use crossterm::event::KeyModifiers;
|
||||
|
||||
let (tx, _rx) = unbounded_channel::<AppEvent>();
|
||||
let sender = AppEventSender::new(tx);
|
||||
let mut composer = ChatComposer::new(
|
||||
true,
|
||||
sender,
|
||||
false,
|
||||
"Ask Codex to do anything".to_string(),
|
||||
false,
|
||||
);
|
||||
|
||||
let _ = composer.handle_key_event(KeyEvent::new(KeyCode::Char('1'), KeyModifiers::NONE));
|
||||
assert!(composer.is_in_paste_burst());
|
||||
|
||||
let _ = composer.handle_key_event(KeyEvent::new(KeyCode::Char('あ'), KeyModifiers::NONE));
|
||||
|
||||
let (result, _) =
|
||||
composer.handle_key_event(KeyEvent::new(KeyCode::Enter, KeyModifiers::NONE));
|
||||
match result {
|
||||
InputResult::Submitted(text) => assert_eq!(text, "1あ"),
|
||||
_ => panic!("expected Submitted"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn handle_paste_small_inserts_text() {
|
||||
use crossterm::event::KeyCode;
|
||||
|
||||
@@ -198,12 +198,15 @@ impl PasteBurst {
|
||||
|
||||
/// Before applying modified/non-char input: flush buffered burst immediately.
|
||||
pub fn flush_before_modified_input(&mut self) -> Option<String> {
|
||||
if self.is_active() {
|
||||
self.active = false;
|
||||
Some(std::mem::take(&mut self.buffer))
|
||||
} else {
|
||||
None
|
||||
if !self.is_active() {
|
||||
return None;
|
||||
}
|
||||
self.active = false;
|
||||
let mut out = std::mem::take(&mut self.buffer);
|
||||
if let Some((ch, _at)) = self.pending_first_char.take() {
|
||||
out.push(ch);
|
||||
}
|
||||
Some(out)
|
||||
}
|
||||
|
||||
/// Clear only the timing window and any pending first-char.
|
||||
|
||||
@@ -23,7 +23,6 @@ use codex_core::protocol::ExecApprovalRequestEvent;
|
||||
use codex_core::protocol::ExecCommandBeginEvent;
|
||||
use codex_core::protocol::ExecCommandEndEvent;
|
||||
use codex_core::protocol::ExitedReviewModeEvent;
|
||||
use codex_core::protocol::InputMessageKind;
|
||||
use codex_core::protocol::ListCustomPromptsResponseEvent;
|
||||
use codex_core::protocol::McpListToolsResponseEvent;
|
||||
use codex_core::protocol::McpToolCallBeginEvent;
|
||||
@@ -746,9 +745,8 @@ impl ChatWidget {
|
||||
&ev.call_id,
|
||||
CommandOutput {
|
||||
exit_code: ev.exit_code,
|
||||
stdout: ev.stdout.clone(),
|
||||
stderr: ev.stderr.clone(),
|
||||
formatted_output: ev.formatted_output.clone(),
|
||||
aggregated_output: ev.aggregated_output.clone(),
|
||||
},
|
||||
ev.duration,
|
||||
);
|
||||
@@ -1553,17 +1551,9 @@ impl ChatWidget {
|
||||
}
|
||||
|
||||
fn on_user_message_event(&mut self, event: UserMessageEvent) {
|
||||
match event.kind {
|
||||
Some(InputMessageKind::EnvironmentContext)
|
||||
| Some(InputMessageKind::UserInstructions) => {
|
||||
// Skip XML‑wrapped context blocks in the transcript.
|
||||
}
|
||||
Some(InputMessageKind::Plain) | None => {
|
||||
let message = event.message.trim();
|
||||
if !message.is_empty() {
|
||||
self.add_to_history(history_cell::new_user_prompt(message.to_string()));
|
||||
}
|
||||
}
|
||||
let message = event.message.trim();
|
||||
if !message.is_empty() {
|
||||
self.add_to_history(history_cell::new_user_prompt(message.to_string()));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1641,6 +1631,7 @@ impl ChatWidget {
|
||||
context_usage,
|
||||
&self.conversation_id,
|
||||
self.rate_limit_snapshot.as_ref(),
|
||||
Local::now(),
|
||||
));
|
||||
}
|
||||
|
||||
|
||||
@@ -23,7 +23,6 @@ use codex_core::protocol::ExecCommandBeginEvent;
|
||||
use codex_core::protocol::ExecCommandEndEvent;
|
||||
use codex_core::protocol::ExitedReviewModeEvent;
|
||||
use codex_core::protocol::FileChange;
|
||||
use codex_core::protocol::InputMessageKind;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::PatchApplyBeginEvent;
|
||||
use codex_core::protocol::PatchApplyEndEvent;
|
||||
@@ -72,18 +71,26 @@ fn upgrade_event_payload_for_tests(mut payload: serde_json::Value) -> serde_json
|
||||
&& let Some(m) = msg.as_object_mut()
|
||||
{
|
||||
let ty = m.get("type").and_then(|v| v.as_str()).unwrap_or("");
|
||||
if ty == "exec_command_end" && !m.contains_key("formatted_output") {
|
||||
if ty == "exec_command_end" {
|
||||
let stdout = m.get("stdout").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let stderr = m.get("stderr").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let formatted = if stderr.is_empty() {
|
||||
let aggregated = if stderr.is_empty() {
|
||||
stdout.to_string()
|
||||
} else {
|
||||
format!("{stdout}{stderr}")
|
||||
};
|
||||
m.insert(
|
||||
"formatted_output".to_string(),
|
||||
serde_json::Value::String(formatted),
|
||||
);
|
||||
if !m.contains_key("formatted_output") {
|
||||
m.insert(
|
||||
"formatted_output".to_string(),
|
||||
serde_json::Value::String(aggregated.clone()),
|
||||
);
|
||||
}
|
||||
if !m.contains_key("aggregated_output") {
|
||||
m.insert(
|
||||
"aggregated_output".to_string(),
|
||||
serde_json::Value::String(aggregated),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
payload
|
||||
@@ -104,7 +111,6 @@ fn resumed_initial_messages_render_history() {
|
||||
initial_messages: Some(vec![
|
||||
EventMsg::UserMessage(UserMessageEvent {
|
||||
message: "hello from user".to_string(),
|
||||
kind: Some(InputMessageKind::Plain),
|
||||
images: None,
|
||||
}),
|
||||
EventMsg::AgentMessage(AgentMessageEvent {
|
||||
|
||||
@@ -3,11 +3,12 @@ use std::time::Instant;
|
||||
|
||||
use codex_protocol::parse_command::ParsedCommand;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub(crate) struct CommandOutput {
|
||||
pub(crate) exit_code: i32,
|
||||
pub(crate) stdout: String,
|
||||
pub(crate) stderr: String,
|
||||
/// The aggregated stderr + stdout interleaved.
|
||||
pub(crate) aggregated_output: String,
|
||||
/// The formatted output of the command, as seen by the model.
|
||||
pub(crate) formatted_output: String,
|
||||
}
|
||||
|
||||
@@ -82,9 +83,8 @@ impl ExecCell {
|
||||
call.duration = Some(elapsed);
|
||||
call.output = Some(CommandOutput {
|
||||
exit_code: 1,
|
||||
stdout: String::new(),
|
||||
stderr: String::new(),
|
||||
formatted_output: String::new(),
|
||||
aggregated_output: String::new(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,7 +28,6 @@ use unicode_width::UnicodeWidthStr;
|
||||
pub(crate) const TOOL_CALL_MAX_LINES: usize = 5;
|
||||
|
||||
pub(crate) struct OutputLinesParams {
|
||||
pub(crate) only_err: bool,
|
||||
pub(crate) include_angle_pipe: bool,
|
||||
pub(crate) include_prefix: bool,
|
||||
}
|
||||
@@ -59,22 +58,12 @@ pub(crate) fn output_lines(
|
||||
params: OutputLinesParams,
|
||||
) -> OutputLines {
|
||||
let OutputLinesParams {
|
||||
only_err,
|
||||
include_angle_pipe,
|
||||
include_prefix,
|
||||
} = params;
|
||||
let CommandOutput {
|
||||
exit_code,
|
||||
stdout,
|
||||
stderr,
|
||||
..
|
||||
aggregated_output, ..
|
||||
} = match output {
|
||||
Some(output) if only_err && output.exit_code == 0 => {
|
||||
return OutputLines {
|
||||
lines: Vec::new(),
|
||||
omitted: None,
|
||||
};
|
||||
}
|
||||
Some(output) => output,
|
||||
None => {
|
||||
return OutputLines {
|
||||
@@ -84,7 +73,7 @@ pub(crate) fn output_lines(
|
||||
}
|
||||
};
|
||||
|
||||
let src = if *exit_code == 0 { stdout } else { stderr };
|
||||
let src = aggregated_output;
|
||||
let lines: Vec<&str> = src.lines().collect();
|
||||
let total = lines.len();
|
||||
let limit = TOOL_CALL_MAX_LINES;
|
||||
@@ -398,7 +387,6 @@ impl ExecCell {
|
||||
let raw_output = output_lines(
|
||||
Some(output),
|
||||
OutputLinesParams {
|
||||
only_err: false,
|
||||
include_angle_pipe: false,
|
||||
include_prefix: false,
|
||||
},
|
||||
|
||||
@@ -172,6 +172,7 @@ impl FileSearchManager {
|
||||
NUM_FILE_SEARCH_THREADS,
|
||||
cancellation_token.clone(),
|
||||
compute_indices,
|
||||
true,
|
||||
)
|
||||
.map(|res| res.matches)
|
||||
.unwrap_or_default();
|
||||
|
||||
@@ -1293,12 +1293,10 @@ pub(crate) fn new_patch_apply_failure(stderr: String) -> PlainHistoryCell {
|
||||
let output = output_lines(
|
||||
Some(&CommandOutput {
|
||||
exit_code: 1,
|
||||
stdout: String::new(),
|
||||
stderr,
|
||||
formatted_output: String::new(),
|
||||
aggregated_output: stderr,
|
||||
}),
|
||||
OutputLinesParams {
|
||||
only_err: true,
|
||||
include_angle_pipe: true,
|
||||
include_prefix: true,
|
||||
},
|
||||
@@ -1739,16 +1737,7 @@ mod tests {
|
||||
duration: None,
|
||||
});
|
||||
// Mark call complete so markers are ✓
|
||||
cell.complete_call(
|
||||
&call_id,
|
||||
CommandOutput {
|
||||
exit_code: 0,
|
||||
stdout: String::new(),
|
||||
stderr: String::new(),
|
||||
formatted_output: String::new(),
|
||||
},
|
||||
Duration::from_millis(1),
|
||||
);
|
||||
cell.complete_call(&call_id, CommandOutput::default(), Duration::from_millis(1));
|
||||
|
||||
let lines = cell.display_lines(80);
|
||||
let rendered = render_lines(&lines).join("\n");
|
||||
@@ -1770,16 +1759,7 @@ mod tests {
|
||||
duration: None,
|
||||
});
|
||||
// Call 1: Search only
|
||||
cell.complete_call(
|
||||
"c1",
|
||||
CommandOutput {
|
||||
exit_code: 0,
|
||||
stdout: String::new(),
|
||||
stderr: String::new(),
|
||||
formatted_output: String::new(),
|
||||
},
|
||||
Duration::from_millis(1),
|
||||
);
|
||||
cell.complete_call("c1", CommandOutput::default(), Duration::from_millis(1));
|
||||
// Call 2: Read A
|
||||
cell = cell
|
||||
.with_added_call(
|
||||
@@ -1792,16 +1772,7 @@ mod tests {
|
||||
}],
|
||||
)
|
||||
.unwrap();
|
||||
cell.complete_call(
|
||||
"c2",
|
||||
CommandOutput {
|
||||
exit_code: 0,
|
||||
stdout: String::new(),
|
||||
stderr: String::new(),
|
||||
formatted_output: String::new(),
|
||||
},
|
||||
Duration::from_millis(1),
|
||||
);
|
||||
cell.complete_call("c2", CommandOutput::default(), Duration::from_millis(1));
|
||||
// Call 3: Read B
|
||||
cell = cell
|
||||
.with_added_call(
|
||||
@@ -1814,16 +1785,7 @@ mod tests {
|
||||
}],
|
||||
)
|
||||
.unwrap();
|
||||
cell.complete_call(
|
||||
"c3",
|
||||
CommandOutput {
|
||||
exit_code: 0,
|
||||
stdout: String::new(),
|
||||
stderr: String::new(),
|
||||
formatted_output: String::new(),
|
||||
},
|
||||
Duration::from_millis(1),
|
||||
);
|
||||
cell.complete_call("c3", CommandOutput::default(), Duration::from_millis(1));
|
||||
|
||||
let lines = cell.display_lines(80);
|
||||
let rendered = render_lines(&lines).join("\n");
|
||||
@@ -1856,16 +1818,7 @@ mod tests {
|
||||
start_time: Some(Instant::now()),
|
||||
duration: None,
|
||||
});
|
||||
cell.complete_call(
|
||||
"c1",
|
||||
CommandOutput {
|
||||
exit_code: 0,
|
||||
stdout: String::new(),
|
||||
stderr: String::new(),
|
||||
formatted_output: String::new(),
|
||||
},
|
||||
Duration::from_millis(1),
|
||||
);
|
||||
cell.complete_call("c1", CommandOutput::default(), Duration::from_millis(1));
|
||||
let lines = cell.display_lines(80);
|
||||
let rendered = render_lines(&lines).join("\n");
|
||||
insta::assert_snapshot!(rendered);
|
||||
@@ -1885,16 +1838,7 @@ mod tests {
|
||||
duration: None,
|
||||
});
|
||||
// Mark call complete so it renders as "Ran"
|
||||
cell.complete_call(
|
||||
&call_id,
|
||||
CommandOutput {
|
||||
exit_code: 0,
|
||||
stdout: String::new(),
|
||||
stderr: String::new(),
|
||||
formatted_output: String::new(),
|
||||
},
|
||||
Duration::from_millis(1),
|
||||
);
|
||||
cell.complete_call(&call_id, CommandOutput::default(), Duration::from_millis(1));
|
||||
|
||||
// Small width to force wrapping on both lines
|
||||
let width: u16 = 28;
|
||||
@@ -1914,16 +1858,7 @@ mod tests {
|
||||
start_time: Some(Instant::now()),
|
||||
duration: None,
|
||||
});
|
||||
cell.complete_call(
|
||||
&call_id,
|
||||
CommandOutput {
|
||||
exit_code: 0,
|
||||
stdout: String::new(),
|
||||
stderr: String::new(),
|
||||
formatted_output: String::new(),
|
||||
},
|
||||
Duration::from_millis(1),
|
||||
);
|
||||
cell.complete_call(&call_id, CommandOutput::default(), Duration::from_millis(1));
|
||||
// Wide enough that it fits inline
|
||||
let lines = cell.display_lines(80);
|
||||
let rendered = render_lines(&lines).join("\n");
|
||||
@@ -1942,16 +1877,7 @@ mod tests {
|
||||
start_time: Some(Instant::now()),
|
||||
duration: None,
|
||||
});
|
||||
cell.complete_call(
|
||||
&call_id,
|
||||
CommandOutput {
|
||||
exit_code: 0,
|
||||
stdout: String::new(),
|
||||
stderr: String::new(),
|
||||
formatted_output: String::new(),
|
||||
},
|
||||
Duration::from_millis(1),
|
||||
);
|
||||
cell.complete_call(&call_id, CommandOutput::default(), Duration::from_millis(1));
|
||||
let lines = cell.display_lines(24);
|
||||
let rendered = render_lines(&lines).join("\n");
|
||||
insta::assert_snapshot!(rendered);
|
||||
@@ -1969,16 +1895,7 @@ mod tests {
|
||||
start_time: Some(Instant::now()),
|
||||
duration: None,
|
||||
});
|
||||
cell.complete_call(
|
||||
&call_id,
|
||||
CommandOutput {
|
||||
exit_code: 0,
|
||||
stdout: String::new(),
|
||||
stderr: String::new(),
|
||||
formatted_output: String::new(),
|
||||
},
|
||||
Duration::from_millis(1),
|
||||
);
|
||||
cell.complete_call(&call_id, CommandOutput::default(), Duration::from_millis(1));
|
||||
let lines = cell.display_lines(80);
|
||||
let rendered = render_lines(&lines).join("\n");
|
||||
insta::assert_snapshot!(rendered);
|
||||
@@ -1997,16 +1914,7 @@ mod tests {
|
||||
start_time: Some(Instant::now()),
|
||||
duration: None,
|
||||
});
|
||||
cell.complete_call(
|
||||
&call_id,
|
||||
CommandOutput {
|
||||
exit_code: 0,
|
||||
stdout: String::new(),
|
||||
stderr: String::new(),
|
||||
formatted_output: String::new(),
|
||||
},
|
||||
Duration::from_millis(1),
|
||||
);
|
||||
cell.complete_call(&call_id, CommandOutput::default(), Duration::from_millis(1));
|
||||
let lines = cell.display_lines(28);
|
||||
let rendered = render_lines(&lines).join("\n");
|
||||
insta::assert_snapshot!(rendered);
|
||||
@@ -2033,9 +1941,8 @@ mod tests {
|
||||
&call_id,
|
||||
CommandOutput {
|
||||
exit_code: 1,
|
||||
stdout: String::new(),
|
||||
stderr,
|
||||
formatted_output: String::new(),
|
||||
aggregated_output: stderr,
|
||||
},
|
||||
Duration::from_millis(1),
|
||||
);
|
||||
@@ -2077,9 +1984,8 @@ mod tests {
|
||||
&call_id,
|
||||
CommandOutput {
|
||||
exit_code: 1,
|
||||
stdout: String::new(),
|
||||
stderr,
|
||||
formatted_output: String::new(),
|
||||
aggregated_output: stderr,
|
||||
},
|
||||
Duration::from_millis(5),
|
||||
);
|
||||
|
||||
@@ -6,6 +6,9 @@ use ratatui::style::Style;
|
||||
use ratatui::style::Stylize;
|
||||
use ratatui::text::Span;
|
||||
|
||||
#[cfg(target_os = "macos")]
|
||||
const ALT_PREFIX: &str = "⌥ + ";
|
||||
#[cfg(not(target_os = "macos"))]
|
||||
const ALT_PREFIX: &str = "alt + ";
|
||||
const CTRL_PREFIX: &str = "ctrl + ";
|
||||
const SHIFT_PREFIX: &str = "shift + ";
|
||||
|
||||
@@ -724,8 +724,7 @@ mod tests {
|
||||
"exec-1",
|
||||
CommandOutput {
|
||||
exit_code: 0,
|
||||
stdout: "src\nREADME.md\n".into(),
|
||||
stderr: String::new(),
|
||||
aggregated_output: "src\nREADME.md\n".into(),
|
||||
formatted_output: "src\nREADME.md\n".into(),
|
||||
},
|
||||
Duration::from_millis(420),
|
||||
|
||||
@@ -10,6 +10,7 @@ use codex_core::ConversationsPage;
|
||||
use codex_core::Cursor;
|
||||
use codex_core::INTERACTIVE_SESSION_SOURCES;
|
||||
use codex_core::RolloutRecorder;
|
||||
use codex_protocol::items::TurnItem;
|
||||
use color_eyre::eyre::Result;
|
||||
use crossterm::event::KeyCode;
|
||||
use crossterm::event::KeyEvent;
|
||||
@@ -30,10 +31,7 @@ use crate::text_formatting::truncate_text;
|
||||
use crate::tui::FrameRequester;
|
||||
use crate::tui::Tui;
|
||||
use crate::tui::TuiEvent;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::InputMessageKind;
|
||||
use codex_protocol::protocol::USER_MESSAGE_BEGIN;
|
||||
|
||||
const PAGE_SIZE: usize = 25;
|
||||
const LOAD_NEAR_THRESHOLD: usize = 5;
|
||||
@@ -616,37 +614,8 @@ fn extract_timestamp(value: &serde_json::Value) -> Option<DateTime<Utc>> {
|
||||
fn preview_from_head(head: &[serde_json::Value]) -> Option<String> {
|
||||
head.iter()
|
||||
.filter_map(|value| serde_json::from_value::<ResponseItem>(value.clone()).ok())
|
||||
.find_map(|item| match item {
|
||||
ResponseItem::Message { content, .. } => {
|
||||
// Find the actual user message (as opposed to user instructions or ide context)
|
||||
let preview = content
|
||||
.into_iter()
|
||||
.filter_map(|content| match content {
|
||||
ContentItem::InputText { text }
|
||||
if matches!(
|
||||
InputMessageKind::from(("user", text.as_str())),
|
||||
InputMessageKind::Plain
|
||||
) =>
|
||||
{
|
||||
// Strip ide context.
|
||||
let text = match text.find(USER_MESSAGE_BEGIN) {
|
||||
Some(idx) => {
|
||||
text[idx + USER_MESSAGE_BEGIN.len()..].trim().to_string()
|
||||
}
|
||||
None => text,
|
||||
};
|
||||
Some(text)
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
.collect::<String>();
|
||||
|
||||
if preview.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(preview)
|
||||
}
|
||||
}
|
||||
.find_map(|item| match codex_core::parse_turn_item(&item) {
|
||||
Some(TurnItem::UserMessage(user)) => Some(user.message()),
|
||||
_ => None,
|
||||
})
|
||||
}
|
||||
@@ -999,6 +968,19 @@ mod tests {
|
||||
"role": "user",
|
||||
"content": [
|
||||
{ "type": "input_text", "text": "<user_instructions>hi</user_instructions>" },
|
||||
]
|
||||
}),
|
||||
json!({
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [
|
||||
{ "type": "input_text", "text": "<environment_context>...</environment_context>" },
|
||||
]
|
||||
}),
|
||||
json!({
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [
|
||||
{ "type": "input_text", "text": "real question" },
|
||||
{ "type": "input_image", "image_url": "ignored" }
|
||||
]
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
---
|
||||
source: tui/src/status_indicator_widget.rs
|
||||
assertion_line: 289
|
||||
expression: terminal.backend()
|
||||
---
|
||||
"• Working (0s • esc to interrupt) "
|
||||
" "
|
||||
" ↳ first "
|
||||
" ↳ second "
|
||||
" ⌥ + ↑ edit "
|
||||
" "
|
||||
" "
|
||||
" "
|
||||
@@ -3,6 +3,8 @@ use crate::history_cell::HistoryCell;
|
||||
use crate::history_cell::PlainHistoryCell;
|
||||
use crate::history_cell::with_border_with_inner_width;
|
||||
use crate::version::CODEX_CLI_VERSION;
|
||||
use chrono::DateTime;
|
||||
use chrono::Local;
|
||||
use codex_common::create_config_summary_entries;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
@@ -25,6 +27,7 @@ use super::helpers::format_directory_display;
|
||||
use super::helpers::format_tokens_compact;
|
||||
use super::rate_limits::RateLimitSnapshotDisplay;
|
||||
use super::rate_limits::StatusRateLimitData;
|
||||
use super::rate_limits::StatusRateLimitRow;
|
||||
use super::rate_limits::compose_rate_limit_data;
|
||||
use super::rate_limits::format_status_limit_summary;
|
||||
use super::rate_limits::render_status_limit_progress_bar;
|
||||
@@ -64,9 +67,17 @@ pub(crate) fn new_status_output(
|
||||
context_usage: Option<&TokenUsage>,
|
||||
session_id: &Option<ConversationId>,
|
||||
rate_limits: Option<&RateLimitSnapshotDisplay>,
|
||||
now: DateTime<Local>,
|
||||
) -> CompositeHistoryCell {
|
||||
let command = PlainHistoryCell::new(vec!["/status".magenta().into()]);
|
||||
let card = StatusHistoryCell::new(config, total_usage, context_usage, session_id, rate_limits);
|
||||
let card = StatusHistoryCell::new(
|
||||
config,
|
||||
total_usage,
|
||||
context_usage,
|
||||
session_id,
|
||||
rate_limits,
|
||||
now,
|
||||
);
|
||||
|
||||
CompositeHistoryCell::new(vec![Box::new(command), Box::new(card)])
|
||||
}
|
||||
@@ -78,6 +89,7 @@ impl StatusHistoryCell {
|
||||
context_usage: Option<&TokenUsage>,
|
||||
session_id: &Option<ConversationId>,
|
||||
rate_limits: Option<&RateLimitSnapshotDisplay>,
|
||||
now: DateTime<Local>,
|
||||
) -> Self {
|
||||
let config_entries = create_config_summary_entries(config);
|
||||
let (model_name, model_details) = compose_model_display(config, &config_entries);
|
||||
@@ -108,7 +120,7 @@ impl StatusHistoryCell {
|
||||
output: total_usage.output_tokens,
|
||||
context_window,
|
||||
};
|
||||
let rate_limits = compose_rate_limit_data(rate_limits);
|
||||
let rate_limits = compose_rate_limit_data(rate_limits, now);
|
||||
|
||||
Self {
|
||||
model_name,
|
||||
@@ -171,47 +183,66 @@ impl StatusHistoryCell {
|
||||
];
|
||||
}
|
||||
|
||||
let mut lines = Vec::with_capacity(rows_data.len() * 2);
|
||||
|
||||
for row in rows_data {
|
||||
let value_spans = vec![
|
||||
Span::from(render_status_limit_progress_bar(row.percent_used)),
|
||||
Span::from(" "),
|
||||
Span::from(format_status_limit_summary(row.percent_used)),
|
||||
];
|
||||
let base_spans = formatter.full_spans(row.label.as_str(), value_spans);
|
||||
let base_line = Line::from(base_spans.clone());
|
||||
|
||||
if let Some(resets_at) = row.resets_at.as_ref() {
|
||||
let resets_span = Span::from(format!("(resets {resets_at})")).dim();
|
||||
let mut inline_spans = base_spans.clone();
|
||||
inline_spans.push(Span::from(" ").dim());
|
||||
inline_spans.push(resets_span.clone());
|
||||
|
||||
if line_display_width(&Line::from(inline_spans.clone()))
|
||||
<= available_inner_width
|
||||
{
|
||||
lines.push(Line::from(inline_spans));
|
||||
} else {
|
||||
lines.push(base_line);
|
||||
lines.push(formatter.continuation(vec![resets_span]));
|
||||
}
|
||||
} else {
|
||||
lines.push(base_line);
|
||||
}
|
||||
}
|
||||
|
||||
self.rate_limit_row_lines(rows_data, available_inner_width, formatter)
|
||||
}
|
||||
StatusRateLimitData::Stale(rows_data) => {
|
||||
let mut lines =
|
||||
self.rate_limit_row_lines(rows_data, available_inner_width, formatter);
|
||||
lines.push(formatter.line(
|
||||
"Warning",
|
||||
vec![Span::from("limits may be stale - start new turn to refresh.").dim()],
|
||||
));
|
||||
lines
|
||||
}
|
||||
StatusRateLimitData::Missing => {
|
||||
vec![formatter.line(
|
||||
"Limits",
|
||||
vec![Span::from("send a message to load usage data").dim()],
|
||||
vec![
|
||||
Span::from("visit ").dim(),
|
||||
"chatgpt.com/codex/settings/usage".cyan().underlined(),
|
||||
],
|
||||
)]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn rate_limit_row_lines(
|
||||
&self,
|
||||
rows: &[StatusRateLimitRow],
|
||||
available_inner_width: usize,
|
||||
formatter: &FieldFormatter,
|
||||
) -> Vec<Line<'static>> {
|
||||
let mut lines = Vec::with_capacity(rows.len().saturating_mul(2));
|
||||
|
||||
for row in rows {
|
||||
let value_spans = vec![
|
||||
Span::from(render_status_limit_progress_bar(row.percent_used)),
|
||||
Span::from(" "),
|
||||
Span::from(format_status_limit_summary(row.percent_used)),
|
||||
];
|
||||
let base_spans = formatter.full_spans(row.label.as_str(), value_spans);
|
||||
let base_line = Line::from(base_spans.clone());
|
||||
|
||||
if let Some(resets_at) = row.resets_at.as_ref() {
|
||||
let resets_span = Span::from(format!("(resets {resets_at})")).dim();
|
||||
let mut inline_spans = base_spans.clone();
|
||||
inline_spans.push(Span::from(" ").dim());
|
||||
inline_spans.push(resets_span.clone());
|
||||
|
||||
if line_display_width(&Line::from(inline_spans.clone())) <= available_inner_width {
|
||||
lines.push(Line::from(inline_spans));
|
||||
} else {
|
||||
lines.push(base_line);
|
||||
lines.push(formatter.continuation(vec![resets_span]));
|
||||
}
|
||||
} else {
|
||||
lines.push(base_line);
|
||||
}
|
||||
}
|
||||
|
||||
lines
|
||||
}
|
||||
|
||||
fn collect_rate_limit_labels(&self, seen: &mut BTreeSet<String>, labels: &mut Vec<String>) {
|
||||
match &self.rate_limits {
|
||||
StatusRateLimitData::Available(rows) => {
|
||||
@@ -223,6 +254,12 @@ impl StatusHistoryCell {
|
||||
}
|
||||
}
|
||||
}
|
||||
StatusRateLimitData::Stale(rows) => {
|
||||
for row in rows {
|
||||
push_label(labels, seen, row.label.as_str());
|
||||
}
|
||||
push_label(labels, seen, "Warning");
|
||||
}
|
||||
StatusRateLimitData::Missing => push_label(labels, seen, "Limits"),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ use crate::chatwidget::get_limits_duration;
|
||||
|
||||
use super::helpers::format_reset_timestamp;
|
||||
use chrono::DateTime;
|
||||
use chrono::Duration as ChronoDuration;
|
||||
use chrono::Local;
|
||||
use chrono::Utc;
|
||||
use codex_core::protocol::RateLimitSnapshot;
|
||||
@@ -21,9 +22,12 @@ pub(crate) struct StatusRateLimitRow {
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) enum StatusRateLimitData {
|
||||
Available(Vec<StatusRateLimitRow>),
|
||||
Stale(Vec<StatusRateLimitRow>),
|
||||
Missing,
|
||||
}
|
||||
|
||||
pub(crate) const RATE_LIMIT_STALE_THRESHOLD_MINUTES: i64 = 15;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct RateLimitWindowDisplay {
|
||||
pub used_percent: f64,
|
||||
@@ -49,6 +53,7 @@ impl RateLimitWindowDisplay {
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct RateLimitSnapshotDisplay {
|
||||
pub captured_at: DateTime<Local>,
|
||||
pub primary: Option<RateLimitWindowDisplay>,
|
||||
pub secondary: Option<RateLimitWindowDisplay>,
|
||||
}
|
||||
@@ -58,6 +63,7 @@ pub(crate) fn rate_limit_snapshot_display(
|
||||
captured_at: DateTime<Local>,
|
||||
) -> RateLimitSnapshotDisplay {
|
||||
RateLimitSnapshotDisplay {
|
||||
captured_at,
|
||||
primary: snapshot
|
||||
.primary
|
||||
.as_ref()
|
||||
@@ -71,6 +77,7 @@ pub(crate) fn rate_limit_snapshot_display(
|
||||
|
||||
pub(crate) fn compose_rate_limit_data(
|
||||
snapshot: Option<&RateLimitSnapshotDisplay>,
|
||||
now: DateTime<Local>,
|
||||
) -> StatusRateLimitData {
|
||||
match snapshot {
|
||||
Some(snapshot) => {
|
||||
@@ -102,8 +109,13 @@ pub(crate) fn compose_rate_limit_data(
|
||||
});
|
||||
}
|
||||
|
||||
let is_stale = now.signed_duration_since(snapshot.captured_at)
|
||||
> ChronoDuration::minutes(RATE_LIMIT_STALE_THRESHOLD_MINUTES);
|
||||
|
||||
if rows.is_empty() {
|
||||
StatusRateLimitData::Available(vec![])
|
||||
} else if is_stale {
|
||||
StatusRateLimitData::Stale(rows)
|
||||
} else {
|
||||
StatusRateLimitData::Available(rows)
|
||||
}
|
||||
|
||||
@@ -15,5 +15,5 @@ expression: sanitized
|
||||
│ │
|
||||
│ Token usage: 750 total (500 input + 250 output) │
|
||||
│ Context window: 100% left (750 used / 272K) │
|
||||
│ Limits: send a message to load usage data │
|
||||
│ Limits: visit chatgpt.com/codex/settings/usage │
|
||||
╰─────────────────────────────────────────────────────────────────╯
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
---
|
||||
source: tui/src/status/tests.rs
|
||||
expression: sanitized
|
||||
---
|
||||
/status
|
||||
|
||||
╭─────────────────────────────────────────────────────────────────────╮
|
||||
│ >_ OpenAI Codex (v0.0.0) │
|
||||
│ │
|
||||
│ Model: gpt-5-codex (reasoning none, summaries auto) │
|
||||
│ Directory: [[workspace]] │
|
||||
│ Approval: on-request │
|
||||
│ Sandbox: read-only │
|
||||
│ Agents.md: <none> │
|
||||
│ │
|
||||
│ Token usage: 1.9K total (1K input + 900 output) │
|
||||
│ Context window: 100% left (2.1K used / 272K) │
|
||||
│ 5h limit: [███████████████░░░░░] 72% used (resets 03:14) │
|
||||
│ Weekly limit: [████████░░░░░░░░░░░░] 40% used (resets 03:34) │
|
||||
│ Warning: limits may be stale - start new turn to refresh. │
|
||||
╰─────────────────────────────────────────────────────────────────────╯
|
||||
@@ -111,7 +111,14 @@ fn status_snapshot_includes_reasoning_details() {
|
||||
};
|
||||
let rate_display = rate_limit_snapshot_display(&snapshot, captured_at);
|
||||
|
||||
let composite = new_status_output(&config, &usage, Some(&usage), &None, Some(&rate_display));
|
||||
let composite = new_status_output(
|
||||
&config,
|
||||
&usage,
|
||||
Some(&usage),
|
||||
&None,
|
||||
Some(&rate_display),
|
||||
captured_at,
|
||||
);
|
||||
let mut rendered_lines = render_lines(&composite.display_lines(80));
|
||||
if cfg!(windows) {
|
||||
for line in &mut rendered_lines {
|
||||
@@ -152,7 +159,14 @@ fn status_snapshot_includes_monthly_limit() {
|
||||
};
|
||||
let rate_display = rate_limit_snapshot_display(&snapshot, captured_at);
|
||||
|
||||
let composite = new_status_output(&config, &usage, Some(&usage), &None, Some(&rate_display));
|
||||
let composite = new_status_output(
|
||||
&config,
|
||||
&usage,
|
||||
Some(&usage),
|
||||
&None,
|
||||
Some(&rate_display),
|
||||
captured_at,
|
||||
);
|
||||
let mut rendered_lines = render_lines(&composite.display_lines(80));
|
||||
if cfg!(windows) {
|
||||
for line in &mut rendered_lines {
|
||||
@@ -178,7 +192,12 @@ fn status_card_token_usage_excludes_cached_tokens() {
|
||||
total_tokens: 2_100,
|
||||
};
|
||||
|
||||
let composite = new_status_output(&config, &usage, Some(&usage), &None, None);
|
||||
let now = chrono::Local
|
||||
.with_ymd_and_hms(2024, 1, 1, 0, 0, 0)
|
||||
.single()
|
||||
.expect("timestamp");
|
||||
|
||||
let composite = new_status_output(&config, &usage, Some(&usage), &None, None, now);
|
||||
let rendered = render_lines(&composite.display_lines(120));
|
||||
|
||||
assert!(
|
||||
@@ -219,7 +238,14 @@ fn status_snapshot_truncates_in_narrow_terminal() {
|
||||
};
|
||||
let rate_display = rate_limit_snapshot_display(&snapshot, captured_at);
|
||||
|
||||
let composite = new_status_output(&config, &usage, Some(&usage), &None, Some(&rate_display));
|
||||
let composite = new_status_output(
|
||||
&config,
|
||||
&usage,
|
||||
Some(&usage),
|
||||
&None,
|
||||
Some(&rate_display),
|
||||
captured_at,
|
||||
);
|
||||
let mut rendered_lines = render_lines(&composite.display_lines(46));
|
||||
if cfg!(windows) {
|
||||
for line in &mut rendered_lines {
|
||||
@@ -246,7 +272,12 @@ fn status_snapshot_shows_missing_limits_message() {
|
||||
total_tokens: 750,
|
||||
};
|
||||
|
||||
let composite = new_status_output(&config, &usage, Some(&usage), &None, None);
|
||||
let now = chrono::Local
|
||||
.with_ymd_and_hms(2024, 2, 3, 4, 5, 6)
|
||||
.single()
|
||||
.expect("timestamp");
|
||||
|
||||
let composite = new_status_output(&config, &usage, Some(&usage), &None, None, now);
|
||||
let mut rendered_lines = render_lines(&composite.display_lines(80));
|
||||
if cfg!(windows) {
|
||||
for line in &mut rendered_lines {
|
||||
@@ -282,7 +313,66 @@ fn status_snapshot_shows_empty_limits_message() {
|
||||
.expect("timestamp");
|
||||
let rate_display = rate_limit_snapshot_display(&snapshot, captured_at);
|
||||
|
||||
let composite = new_status_output(&config, &usage, Some(&usage), &None, Some(&rate_display));
|
||||
let composite = new_status_output(
|
||||
&config,
|
||||
&usage,
|
||||
Some(&usage),
|
||||
&None,
|
||||
Some(&rate_display),
|
||||
captured_at,
|
||||
);
|
||||
let mut rendered_lines = render_lines(&composite.display_lines(80));
|
||||
if cfg!(windows) {
|
||||
for line in &mut rendered_lines {
|
||||
*line = line.replace('\\', "/");
|
||||
}
|
||||
}
|
||||
let sanitized = sanitize_directory(rendered_lines).join("\n");
|
||||
assert_snapshot!(sanitized);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn status_snapshot_shows_stale_limits_message() {
|
||||
let temp_home = TempDir::new().expect("temp home");
|
||||
let mut config = test_config(&temp_home);
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.cwd = PathBuf::from("/workspace/tests");
|
||||
|
||||
let usage = TokenUsage {
|
||||
input_tokens: 1_200,
|
||||
cached_input_tokens: 200,
|
||||
output_tokens: 900,
|
||||
reasoning_output_tokens: 150,
|
||||
total_tokens: 2_250,
|
||||
};
|
||||
|
||||
let captured_at = chrono::Local
|
||||
.with_ymd_and_hms(2024, 1, 2, 3, 4, 5)
|
||||
.single()
|
||||
.expect("timestamp");
|
||||
let snapshot = RateLimitSnapshot {
|
||||
primary: Some(RateLimitWindow {
|
||||
used_percent: 72.5,
|
||||
window_minutes: Some(300),
|
||||
resets_at: Some(reset_at_from(&captured_at, 600)),
|
||||
}),
|
||||
secondary: Some(RateLimitWindow {
|
||||
used_percent: 40.0,
|
||||
window_minutes: Some(10_080),
|
||||
resets_at: Some(reset_at_from(&captured_at, 1_800)),
|
||||
}),
|
||||
};
|
||||
let rate_display = rate_limit_snapshot_display(&snapshot, captured_at);
|
||||
let now = captured_at + ChronoDuration::minutes(20);
|
||||
|
||||
let composite = new_status_output(
|
||||
&config,
|
||||
&usage,
|
||||
Some(&usage),
|
||||
&None,
|
||||
Some(&rate_display),
|
||||
now,
|
||||
);
|
||||
let mut rendered_lines = render_lines(&composite.display_lines(80));
|
||||
if cfg!(windows) {
|
||||
for line in &mut rendered_lines {
|
||||
@@ -314,7 +404,12 @@ fn status_context_window_uses_last_usage() {
|
||||
total_tokens: 13_679,
|
||||
};
|
||||
|
||||
let composite = new_status_output(&config, &total_usage, Some(&last_usage), &None, None);
|
||||
let now = chrono::Local
|
||||
.with_ymd_and_hms(2024, 6, 1, 12, 0, 0)
|
||||
.single()
|
||||
.expect("timestamp");
|
||||
|
||||
let composite = new_status_output(&config, &total_usage, Some(&last_usage), &None, None, now);
|
||||
let rendered_lines = render_lines(&composite.display_lines(80));
|
||||
let context_line = rendered_lines
|
||||
.into_iter()
|
||||
|
||||
@@ -284,6 +284,11 @@ mod tests {
|
||||
terminal
|
||||
.draw(|f| w.render_ref(f.area(), f.buffer_mut()))
|
||||
.expect("draw");
|
||||
#[cfg(target_os = "macos")]
|
||||
insta::with_settings!({ snapshot_suffix => "macos" }, {
|
||||
insta::assert_snapshot!(terminal.backend());
|
||||
});
|
||||
#[cfg(not(target_os = "macos"))]
|
||||
insta::assert_snapshot!(terminal.backend());
|
||||
}
|
||||
|
||||
|
||||
15
codex-rs/utils/tokenizer/Cargo.toml
Normal file
15
codex-rs/utils/tokenizer/Cargo.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
[package]
|
||||
edition.workspace = true
|
||||
name = "codex-utils-tokenizer"
|
||||
version.workspace = true
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
tiktoken-rs = "0.7"
|
||||
|
||||
[dev-dependencies]
|
||||
pretty_assertions = { workspace = true }
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user