mirror of
https://github.com/openai/codex.git
synced 2026-05-11 06:42:30 +00:00
Compare commits
20 Commits
jif/client
...
dev/zhao/t
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e35cbb7acf | ||
|
|
2c87ca8c68 | ||
|
|
3aa9c0886f | ||
|
|
2b7378ac77 | ||
|
|
ddcc60a085 | ||
|
|
8465f1f2f4 | ||
|
|
7ab45487dd | ||
|
|
cecbd5b021 | ||
|
|
4000e26303 | ||
|
|
e032d338f2 | ||
|
|
8bebe86a47 | ||
|
|
ab2e7499f8 | ||
|
|
daf77b8452 | ||
|
|
03a6e853c0 | ||
|
|
837bc98a1d | ||
|
|
842a1b7fe7 | ||
|
|
03ffe4d595 | ||
|
|
ae2a084fae | ||
|
|
a941ae7632 | ||
|
|
2c665fb1dd |
2
.github/codex/home/config.toml
vendored
2
.github/codex/home/config.toml
vendored
@@ -1,3 +1,3 @@
|
||||
model = "gpt-5"
|
||||
model = "gpt-5.1"
|
||||
|
||||
# Consider setting [mcp_servers] here!
|
||||
|
||||
2
.github/workflows/issue-deduplicator.yml
vendored
2
.github/workflows/issue-deduplicator.yml
vendored
@@ -46,7 +46,7 @@ jobs:
|
||||
with:
|
||||
openai-api-key: ${{ secrets.CODEX_OPENAI_API_KEY }}
|
||||
allow-users: "*"
|
||||
model: gpt-5
|
||||
model: gpt-5.1
|
||||
prompt: |
|
||||
You are an assistant that triages new GitHub issues by identifying potential duplicates.
|
||||
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -64,6 +64,9 @@ apply_patch/
|
||||
# coverage
|
||||
coverage/
|
||||
|
||||
# personal files
|
||||
personal/
|
||||
|
||||
# os
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
36
codex-rs/Cargo.lock
generated
36
codex-rs/Cargo.lock
generated
@@ -874,7 +874,6 @@ dependencies = [
|
||||
"clap",
|
||||
"codex-protocol",
|
||||
"mcp-types",
|
||||
"paste",
|
||||
"pretty_assertions",
|
||||
"schemars 0.8.22",
|
||||
"serde",
|
||||
@@ -1061,6 +1060,8 @@ dependencies = [
|
||||
"clap",
|
||||
"codex-app-server-protocol",
|
||||
"codex-core",
|
||||
"codex-lmstudio",
|
||||
"codex-ollama",
|
||||
"codex-protocol",
|
||||
"once_cell",
|
||||
"serde",
|
||||
@@ -1084,6 +1085,7 @@ dependencies = [
|
||||
"codex-apply-patch",
|
||||
"codex-arg0",
|
||||
"codex-async-utils",
|
||||
"codex-execpolicy2",
|
||||
"codex-file-search",
|
||||
"codex-git",
|
||||
"codex-keyring-store",
|
||||
@@ -1159,7 +1161,6 @@ dependencies = [
|
||||
"codex-arg0",
|
||||
"codex-common",
|
||||
"codex-core",
|
||||
"codex-ollama",
|
||||
"codex-protocol",
|
||||
"core_test_support",
|
||||
"libc",
|
||||
@@ -1202,6 +1203,21 @@ dependencies = [
|
||||
"tempfile",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codex-execpolicy2"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap",
|
||||
"multimap",
|
||||
"pretty_assertions",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"shlex",
|
||||
"starlark",
|
||||
"thiserror 2.0.17",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codex-feedback"
|
||||
version = "0.0.0"
|
||||
@@ -1263,6 +1279,19 @@ dependencies = [
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codex-lmstudio"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"codex-core",
|
||||
"reqwest",
|
||||
"serde_json",
|
||||
"tokio",
|
||||
"tracing",
|
||||
"which",
|
||||
"wiremock",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codex-login"
|
||||
version = "0.0.0"
|
||||
@@ -1455,12 +1484,12 @@ dependencies = [
|
||||
"codex-ansi-escape",
|
||||
"codex-app-server-protocol",
|
||||
"codex-arg0",
|
||||
"codex-backend-client",
|
||||
"codex-common",
|
||||
"codex-core",
|
||||
"codex-feedback",
|
||||
"codex-file-search",
|
||||
"codex-login",
|
||||
"codex-ollama",
|
||||
"codex-protocol",
|
||||
"codex-windows-sandbox",
|
||||
"color-eyre",
|
||||
@@ -1483,6 +1512,7 @@ dependencies = [
|
||||
"ratatui",
|
||||
"ratatui-macros",
|
||||
"regex-lite",
|
||||
"reqwest",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serial_test",
|
||||
|
||||
@@ -17,9 +17,11 @@ members = [
|
||||
"core",
|
||||
"exec",
|
||||
"execpolicy",
|
||||
"execpolicy2",
|
||||
"keyring-store",
|
||||
"file-search",
|
||||
"linux-sandbox",
|
||||
"lmstudio",
|
||||
"login",
|
||||
"mcp-server",
|
||||
"mcp-types",
|
||||
@@ -64,11 +66,13 @@ codex-chatgpt = { path = "chatgpt" }
|
||||
codex-common = { path = "common" }
|
||||
codex-core = { path = "core" }
|
||||
codex-exec = { path = "exec" }
|
||||
codex-execpolicy2 = { path = "execpolicy2" }
|
||||
codex-feedback = { path = "feedback" }
|
||||
codex-file-search = { path = "file-search" }
|
||||
codex-git = { path = "utils/git" }
|
||||
codex-keyring-store = { path = "keyring-store" }
|
||||
codex-linux-sandbox = { path = "linux-sandbox" }
|
||||
codex-lmstudio = { path = "lmstudio" }
|
||||
codex-login = { path = "login" }
|
||||
codex-mcp-server = { path = "mcp-server" }
|
||||
codex-ollama = { path = "ollama" }
|
||||
@@ -150,7 +154,6 @@ opentelemetry-semantic-conventions = "0.30.0"
|
||||
opentelemetry_sdk = "0.30.0"
|
||||
os_info = "3.12.0"
|
||||
owo-colors = "4.2.0"
|
||||
paste = "1.0.15"
|
||||
path-absolutize = "3.1.1"
|
||||
pathdiff = "0.2"
|
||||
portable-pty = "0.9.0"
|
||||
|
||||
@@ -15,7 +15,6 @@ anyhow = { workspace = true }
|
||||
clap = { workspace = true, features = ["derive"] }
|
||||
codex-protocol = { workspace = true }
|
||||
mcp-types = { workspace = true }
|
||||
paste = { workspace = true }
|
||||
schemars = { workspace = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
serde_json = { workspace = true }
|
||||
|
||||
@@ -1,6 +1,4 @@
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use crate::JSONRPCNotification;
|
||||
use crate::JSONRPCRequest;
|
||||
@@ -9,12 +7,6 @@ use crate::export::GeneratedSchema;
|
||||
use crate::export::write_json_schema;
|
||||
use crate::protocol::v1;
|
||||
use crate::protocol::v2;
|
||||
use codex_protocol::ConversationId;
|
||||
use codex_protocol::parse_command::ParsedCommand;
|
||||
use codex_protocol::protocol::FileChange;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use codex_protocol::protocol::SandboxCommandAssessment;
|
||||
use paste::paste;
|
||||
use schemars::JsonSchema;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
@@ -277,34 +269,36 @@ macro_rules! server_request_definitions {
|
||||
(
|
||||
$(
|
||||
$(#[$variant_meta:meta])*
|
||||
$variant:ident
|
||||
$variant:ident $(=> $wire:literal)? {
|
||||
params: $params:ty,
|
||||
response: $response:ty,
|
||||
}
|
||||
),* $(,)?
|
||||
) => {
|
||||
paste! {
|
||||
/// Request initiated from the server and sent to the client.
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(tag = "method", rename_all = "camelCase")]
|
||||
pub enum ServerRequest {
|
||||
$(
|
||||
$(#[$variant_meta])*
|
||||
$variant {
|
||||
#[serde(rename = "id")]
|
||||
request_id: RequestId,
|
||||
params: [<$variant Params>],
|
||||
},
|
||||
)*
|
||||
}
|
||||
/// Request initiated from the server and sent to the client.
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(tag = "method", rename_all = "camelCase")]
|
||||
pub enum ServerRequest {
|
||||
$(
|
||||
$(#[$variant_meta])*
|
||||
$(#[serde(rename = $wire)] #[ts(rename = $wire)])?
|
||||
$variant {
|
||||
#[serde(rename = "id")]
|
||||
request_id: RequestId,
|
||||
params: $params,
|
||||
},
|
||||
)*
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, JsonSchema)]
|
||||
pub enum ServerRequestPayload {
|
||||
$( $variant([<$variant Params>]), )*
|
||||
}
|
||||
#[derive(Debug, Clone, PartialEq, JsonSchema)]
|
||||
pub enum ServerRequestPayload {
|
||||
$( $variant($params), )*
|
||||
}
|
||||
|
||||
impl ServerRequestPayload {
|
||||
pub fn request_with_id(self, request_id: RequestId) -> ServerRequest {
|
||||
match self {
|
||||
$(Self::$variant(params) => ServerRequest::$variant { request_id, params },)*
|
||||
}
|
||||
impl ServerRequestPayload {
|
||||
pub fn request_with_id(self, request_id: RequestId) -> ServerRequest {
|
||||
match self {
|
||||
$(Self::$variant(params) => ServerRequest::$variant { request_id, params },)*
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -312,9 +306,9 @@ macro_rules! server_request_definitions {
|
||||
pub fn export_server_responses(
|
||||
out_dir: &::std::path::Path,
|
||||
) -> ::std::result::Result<(), ::ts_rs::ExportError> {
|
||||
paste! {
|
||||
$(<[<$variant Response>] as ::ts_rs::TS>::export_all_to(out_dir)?;)*
|
||||
}
|
||||
$(
|
||||
<$response as ::ts_rs::TS>::export_all_to(out_dir)?;
|
||||
)*
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -323,9 +317,12 @@ macro_rules! server_request_definitions {
|
||||
out_dir: &Path,
|
||||
) -> ::anyhow::Result<Vec<GeneratedSchema>> {
|
||||
let mut schemas = Vec::new();
|
||||
paste! {
|
||||
$(schemas.push(crate::export::write_json_schema::<[<$variant Response>]>(out_dir, stringify!([<$variant Response>]))?);)*
|
||||
}
|
||||
$(
|
||||
schemas.push(crate::export::write_json_schema::<$response>(
|
||||
out_dir,
|
||||
concat!(stringify!($variant), "Response"),
|
||||
)?);
|
||||
)*
|
||||
Ok(schemas)
|
||||
}
|
||||
|
||||
@@ -334,9 +331,12 @@ macro_rules! server_request_definitions {
|
||||
out_dir: &Path,
|
||||
) -> ::anyhow::Result<Vec<GeneratedSchema>> {
|
||||
let mut schemas = Vec::new();
|
||||
paste! {
|
||||
$(schemas.push(crate::export::write_json_schema::<[<$variant Params>]>(out_dir, stringify!([<$variant Params>]))?);)*
|
||||
}
|
||||
$(
|
||||
schemas.push(crate::export::write_json_schema::<$params>(
|
||||
out_dir,
|
||||
concat!(stringify!($variant), "Params"),
|
||||
)?);
|
||||
)*
|
||||
Ok(schemas)
|
||||
}
|
||||
};
|
||||
@@ -426,49 +426,27 @@ impl TryFrom<JSONRPCRequest> for ServerRequest {
|
||||
}
|
||||
|
||||
server_request_definitions! {
|
||||
/// NEW APIs
|
||||
/// Sent when approval is requested for a specific command execution.
|
||||
/// This request is used for Turns started via turn/start.
|
||||
CommandExecutionRequestApproval => "item/commandExecution/requestApproval" {
|
||||
params: v2::CommandExecutionRequestApprovalParams,
|
||||
response: v2::CommandExecutionRequestApprovalResponse,
|
||||
},
|
||||
|
||||
/// DEPRECATED APIs below
|
||||
/// Request to approve a patch.
|
||||
ApplyPatchApproval,
|
||||
/// This request is used for Turns started via the legacy APIs (i.e. SendUserTurn, SendUserMessage).
|
||||
ApplyPatchApproval {
|
||||
params: v1::ApplyPatchApprovalParams,
|
||||
response: v1::ApplyPatchApprovalResponse,
|
||||
},
|
||||
/// Request to exec a command.
|
||||
ExecCommandApproval,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ApplyPatchApprovalParams {
|
||||
pub conversation_id: ConversationId,
|
||||
/// Use to correlate this with [codex_core::protocol::PatchApplyBeginEvent]
|
||||
/// and [codex_core::protocol::PatchApplyEndEvent].
|
||||
pub call_id: String,
|
||||
pub file_changes: HashMap<PathBuf, FileChange>,
|
||||
/// Optional explanatory reason (e.g. request for extra write access).
|
||||
pub reason: Option<String>,
|
||||
/// When set, the agent is asking the user to allow writes under this root
|
||||
/// for the remainder of the session (unclear if this is honored today).
|
||||
pub grant_root: Option<PathBuf>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ExecCommandApprovalParams {
|
||||
pub conversation_id: ConversationId,
|
||||
/// Use to correlate this with [codex_core::protocol::ExecCommandBeginEvent]
|
||||
/// and [codex_core::protocol::ExecCommandEndEvent].
|
||||
pub call_id: String,
|
||||
pub command: Vec<String>,
|
||||
pub cwd: PathBuf,
|
||||
pub reason: Option<String>,
|
||||
pub risk: Option<SandboxCommandAssessment>,
|
||||
pub parsed_cmd: Vec<ParsedCommand>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
pub struct ExecCommandApprovalResponse {
|
||||
pub decision: ReviewDecision,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
pub struct ApplyPatchApprovalResponse {
|
||||
pub decision: ReviewDecision,
|
||||
/// This request is used for Turns started via the legacy APIs (i.e. SendUserTurn, SendUserMessage).
|
||||
ExecCommandApproval {
|
||||
params: v1::ExecCommandApprovalParams,
|
||||
response: v1::ExecCommandApprovalResponse,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
@@ -533,17 +511,20 @@ client_notification_definitions! {
|
||||
mod tests {
|
||||
use super::*;
|
||||
use anyhow::Result;
|
||||
use codex_protocol::ConversationId;
|
||||
use codex_protocol::account::PlanType;
|
||||
use codex_protocol::parse_command::ParsedCommand;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::json;
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[test]
|
||||
fn serialize_new_conversation() -> Result<()> {
|
||||
let request = ClientRequest::NewConversation {
|
||||
request_id: RequestId::Integer(42),
|
||||
params: v1::NewConversationParams {
|
||||
model: Some("gpt-5-codex".to_string()),
|
||||
model: Some("gpt-5.1-codex".to_string()),
|
||||
model_provider: None,
|
||||
profile: None,
|
||||
cwd: None,
|
||||
@@ -561,7 +542,7 @@ mod tests {
|
||||
"method": "newConversation",
|
||||
"id": 42,
|
||||
"params": {
|
||||
"model": "gpt-5-codex",
|
||||
"model": "gpt-5.1-codex",
|
||||
"modelProvider": null,
|
||||
"profile": null,
|
||||
"cwd": null,
|
||||
@@ -616,7 +597,7 @@ mod tests {
|
||||
#[test]
|
||||
fn serialize_server_request() -> Result<()> {
|
||||
let conversation_id = ConversationId::from_string("67e55044-10b1-426f-9247-bb680e5fe0c8")?;
|
||||
let params = ExecCommandApprovalParams {
|
||||
let params = v1::ExecCommandApprovalParams {
|
||||
conversation_id,
|
||||
call_id: "call-42".to_string(),
|
||||
command: vec!["echo".to_string(), "hello".to_string()],
|
||||
|
||||
@@ -8,8 +8,12 @@ use codex_protocol::config_types::ReasoningSummary;
|
||||
use codex_protocol::config_types::SandboxMode;
|
||||
use codex_protocol::config_types::Verbosity;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::parse_command::ParsedCommand;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
use codex_protocol::protocol::FileChange;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use codex_protocol::protocol::SandboxCommandAssessment;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use codex_protocol::protocol::SessionSource;
|
||||
use codex_protocol::protocol::TurnAbortReason;
|
||||
@@ -191,6 +195,46 @@ pub struct GitDiffToRemoteResponse {
|
||||
pub diff: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ApplyPatchApprovalParams {
|
||||
pub conversation_id: ConversationId,
|
||||
/// Use to correlate this with [codex_core::protocol::PatchApplyBeginEvent]
|
||||
/// and [codex_core::protocol::PatchApplyEndEvent].
|
||||
pub call_id: String,
|
||||
pub file_changes: HashMap<PathBuf, FileChange>,
|
||||
/// Optional explanatory reason (e.g. request for extra write access).
|
||||
pub reason: Option<String>,
|
||||
/// When set, the agent is asking the user to allow writes under this root
|
||||
/// for the remainder of the session (unclear if this is honored today).
|
||||
pub grant_root: Option<PathBuf>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ApplyPatchApprovalResponse {
|
||||
pub decision: ReviewDecision,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ExecCommandApprovalParams {
|
||||
pub conversation_id: ConversationId,
|
||||
/// Use to correlate this with [codex_core::protocol::ExecCommandBeginEvent]
|
||||
/// and [codex_core::protocol::ExecCommandEndEvent].
|
||||
pub call_id: String,
|
||||
pub command: Vec<String>,
|
||||
pub cwd: PathBuf,
|
||||
pub reason: Option<String>,
|
||||
pub risk: Option<SandboxCommandAssessment>,
|
||||
pub parsed_cmd: Vec<ParsedCommand>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
pub struct ExecCommandApprovalResponse {
|
||||
pub decision: ReviewDecision,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct CancelLoginChatGptParams {
|
||||
|
||||
@@ -4,11 +4,13 @@ use std::path::PathBuf;
|
||||
use crate::protocol::common::AuthMode;
|
||||
use codex_protocol::ConversationId;
|
||||
use codex_protocol::account::PlanType;
|
||||
use codex_protocol::approvals::SandboxCommandAssessment as CoreSandboxCommandAssessment;
|
||||
use codex_protocol::config_types::ReasoningEffort;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
use codex_protocol::items::AgentMessageContent as CoreAgentMessageContent;
|
||||
use codex_protocol::items::TurnItem as CoreTurnItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::parse_command::ParsedCommand as CoreParsedCommand;
|
||||
use codex_protocol::protocol::RateLimitSnapshot as CoreRateLimitSnapshot;
|
||||
use codex_protocol::protocol::RateLimitWindow as CoreRateLimitWindow;
|
||||
use codex_protocol::user_input::UserInput as CoreUserInput;
|
||||
@@ -20,7 +22,7 @@ use serde_json::Value as JsonValue;
|
||||
use ts_rs::TS;
|
||||
|
||||
// Macro to declare a camelCased API v2 enum mirroring a core enum which
|
||||
// tends to use kebab-case.
|
||||
// tends to use either snake_case or kebab-case.
|
||||
macro_rules! v2_enum_from_core {
|
||||
(
|
||||
pub enum $Name:ident from $Src:path { $( $Variant:ident ),+ $(,)? }
|
||||
@@ -56,6 +58,23 @@ v2_enum_from_core!(
|
||||
}
|
||||
);
|
||||
|
||||
v2_enum_from_core!(
|
||||
pub enum CommandRiskLevel from codex_protocol::approvals::SandboxRiskLevel {
|
||||
Low,
|
||||
Medium,
|
||||
High
|
||||
}
|
||||
);
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub enum ApprovalDecision {
|
||||
Accept,
|
||||
Decline,
|
||||
Cancel,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, JsonSchema, TS)]
|
||||
#[serde(tag = "type", rename_all = "camelCase")]
|
||||
#[ts(tag = "type")]
|
||||
@@ -63,6 +82,8 @@ v2_enum_from_core!(
|
||||
pub enum SandboxPolicy {
|
||||
DangerFullAccess,
|
||||
ReadOnly,
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(rename_all = "camelCase")]
|
||||
WorkspaceWrite {
|
||||
#[serde(default)]
|
||||
writable_roots: Vec<PathBuf>,
|
||||
@@ -119,6 +140,98 @@ impl From<codex_protocol::protocol::SandboxPolicy> for SandboxPolicy {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub struct SandboxCommandAssessment {
|
||||
pub description: String,
|
||||
pub risk_level: CommandRiskLevel,
|
||||
}
|
||||
|
||||
impl SandboxCommandAssessment {
|
||||
pub fn into_core(self) -> CoreSandboxCommandAssessment {
|
||||
CoreSandboxCommandAssessment {
|
||||
description: self.description,
|
||||
risk_level: self.risk_level.to_core(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<CoreSandboxCommandAssessment> for SandboxCommandAssessment {
|
||||
fn from(value: CoreSandboxCommandAssessment) -> Self {
|
||||
Self {
|
||||
description: value.description,
|
||||
risk_level: CommandRiskLevel::from(value.risk_level),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(tag = "type", rename_all = "camelCase")]
|
||||
#[ts(tag = "type")]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub enum CommandAction {
|
||||
Read {
|
||||
command: String,
|
||||
name: String,
|
||||
path: PathBuf,
|
||||
},
|
||||
ListFiles {
|
||||
command: String,
|
||||
path: Option<String>,
|
||||
},
|
||||
Search {
|
||||
command: String,
|
||||
query: Option<String>,
|
||||
path: Option<String>,
|
||||
},
|
||||
Unknown {
|
||||
command: String,
|
||||
},
|
||||
}
|
||||
|
||||
impl CommandAction {
|
||||
pub fn into_core(self) -> CoreParsedCommand {
|
||||
match self {
|
||||
CommandAction::Read {
|
||||
command: cmd,
|
||||
name,
|
||||
path,
|
||||
} => CoreParsedCommand::Read { cmd, name, path },
|
||||
CommandAction::ListFiles { command: cmd, path } => {
|
||||
CoreParsedCommand::ListFiles { cmd, path }
|
||||
}
|
||||
CommandAction::Search {
|
||||
command: cmd,
|
||||
query,
|
||||
path,
|
||||
} => CoreParsedCommand::Search { cmd, query, path },
|
||||
CommandAction::Unknown { command: cmd } => CoreParsedCommand::Unknown { cmd },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<CoreParsedCommand> for CommandAction {
|
||||
fn from(value: CoreParsedCommand) -> Self {
|
||||
match value {
|
||||
CoreParsedCommand::Read { cmd, name, path } => CommandAction::Read {
|
||||
command: cmd,
|
||||
name,
|
||||
path,
|
||||
},
|
||||
CoreParsedCommand::ListFiles { cmd, path } => {
|
||||
CommandAction::ListFiles { command: cmd, path }
|
||||
}
|
||||
CoreParsedCommand::Search { cmd, query, path } => CommandAction::Search {
|
||||
command: cmd,
|
||||
query,
|
||||
path,
|
||||
},
|
||||
CoreParsedCommand::Unknown { cmd } => CommandAction::Unknown { command: cmd },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(tag = "type", rename_all = "camelCase")]
|
||||
#[ts(tag = "type")]
|
||||
@@ -279,7 +392,7 @@ pub struct ThreadStartParams {
|
||||
pub cwd: Option<String>,
|
||||
pub approval_policy: Option<AskForApproval>,
|
||||
pub sandbox: Option<SandboxMode>,
|
||||
pub config: Option<HashMap<String, serde_json::Value>>,
|
||||
pub config: Option<HashMap<String, JsonValue>>,
|
||||
pub base_instructions: Option<String>,
|
||||
pub developer_instructions: Option<String>,
|
||||
}
|
||||
@@ -506,14 +619,14 @@ impl From<CoreUserInput> for UserInput {
|
||||
#[ts(tag = "type")]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub enum ThreadItem {
|
||||
UserMessage {
|
||||
id: String,
|
||||
content: Vec<UserInput>,
|
||||
},
|
||||
AgentMessage {
|
||||
id: String,
|
||||
text: String,
|
||||
},
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(rename_all = "camelCase")]
|
||||
UserMessage { id: String, content: Vec<UserInput> },
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(rename_all = "camelCase")]
|
||||
AgentMessage { id: String, text: String },
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(rename_all = "camelCase")]
|
||||
Reasoning {
|
||||
id: String,
|
||||
#[serde(default)]
|
||||
@@ -521,19 +634,35 @@ pub enum ThreadItem {
|
||||
#[serde(default)]
|
||||
content: Vec<String>,
|
||||
},
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(rename_all = "camelCase")]
|
||||
CommandExecution {
|
||||
id: String,
|
||||
/// The command to be executed.
|
||||
command: String,
|
||||
aggregated_output: String,
|
||||
exit_code: Option<i32>,
|
||||
/// The command's working directory.
|
||||
cwd: PathBuf,
|
||||
status: CommandExecutionStatus,
|
||||
/// A best-effort parsing of the command to understand the action(s) it will perform.
|
||||
/// This returns a list of CommandAction objects because a single shell command may
|
||||
/// be composed of many commands piped together.
|
||||
command_actions: Vec<CommandAction>,
|
||||
/// The command's output, aggregated from stdout and stderr.
|
||||
aggregated_output: Option<String>,
|
||||
/// The command's exit code.
|
||||
exit_code: Option<i32>,
|
||||
/// The duration of the command execution in milliseconds.
|
||||
duration_ms: Option<i64>,
|
||||
},
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(rename_all = "camelCase")]
|
||||
FileChange {
|
||||
id: String,
|
||||
changes: Vec<FileUpdateChange>,
|
||||
status: PatchApplyStatus,
|
||||
},
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(rename_all = "camelCase")]
|
||||
McpToolCall {
|
||||
id: String,
|
||||
server: String,
|
||||
@@ -543,22 +672,18 @@ pub enum ThreadItem {
|
||||
result: Option<McpToolCallResult>,
|
||||
error: Option<McpToolCallError>,
|
||||
},
|
||||
WebSearch {
|
||||
id: String,
|
||||
query: String,
|
||||
},
|
||||
TodoList {
|
||||
id: String,
|
||||
items: Vec<TodoItem>,
|
||||
},
|
||||
ImageView {
|
||||
id: String,
|
||||
path: String,
|
||||
},
|
||||
CodeReview {
|
||||
id: String,
|
||||
review: String,
|
||||
},
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(rename_all = "camelCase")]
|
||||
WebSearch { id: String, query: String },
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(rename_all = "camelCase")]
|
||||
TodoList { id: String, items: Vec<TodoItem> },
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(rename_all = "camelCase")]
|
||||
ImageView { id: String, path: String },
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(rename_all = "camelCase")]
|
||||
CodeReview { id: String, review: String },
|
||||
}
|
||||
|
||||
impl From<CoreTurnItem> for ThreadItem {
|
||||
@@ -758,6 +883,39 @@ pub struct McpToolCallProgressNotification {
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub struct CommandExecutionRequestApprovalParams {
|
||||
pub thread_id: String,
|
||||
pub turn_id: String,
|
||||
pub item_id: String,
|
||||
/// Optional explanatory reason (e.g. request for network access).
|
||||
pub reason: Option<String>,
|
||||
/// Optional model-provided risk assessment describing the blocked command.
|
||||
pub risk: Option<SandboxCommandAssessment>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub struct CommandExecutionRequestAcceptSettings {
|
||||
/// If true, automatically approve this command for the duration of the session.
|
||||
#[serde(default)]
|
||||
pub for_session: bool,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub struct CommandExecutionRequestApprovalResponse {
|
||||
pub decision: ApprovalDecision,
|
||||
/// Optional approval settings for when the decision is `accept`.
|
||||
/// Ignored if the decision is `decline` or `cancel`.
|
||||
#[serde(default)]
|
||||
pub accept_settings: Option<CommandExecutionRequestAcceptSettings>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
|
||||
@@ -2,6 +2,16 @@
|
||||
|
||||
`codex app-server` is the interface Codex uses to power rich interfaces such as the [Codex VS Code extension](https://marketplace.visualstudio.com/items?itemName=openai.chatgpt). The message schema is currently unstable, but those who wish to build experimental UIs on top of Codex may find it valuable.
|
||||
|
||||
## Table of Contents
|
||||
- [Protocol](#protocol)
|
||||
- [Message Schema](#message-schema)
|
||||
- [Lifecycle Overview](#lifecycle-overview)
|
||||
- [Initialization](#initialization)
|
||||
- [Core primitives](#core-primitives)
|
||||
- [Thread & turn endpoints](#thread--turn-endpoints)
|
||||
- [Auth endpoints](#auth-endpoints)
|
||||
- [Events (work-in-progress)](#v2-streaming-events-work-in-progress)
|
||||
|
||||
## Protocol
|
||||
|
||||
Similar to [MCP](https://modelcontextprotocol.io/), `codex app-server` supports bidirectional communication, streaming JSONL over stdio. The protocol is JSON-RPC 2.0, though the `"jsonrpc":"2.0"` header is omitted.
|
||||
@@ -15,6 +25,14 @@ codex app-server generate-ts --out DIR
|
||||
codex app-server generate-json-schema --out DIR
|
||||
```
|
||||
|
||||
## Lifecycle Overview
|
||||
|
||||
- Initialize once: Immediately after launching the codex app-server process, send an `initialize` request with your client metadata, then emit an `initialized` notification. Any other request before this handshake gets rejected.
|
||||
- Start (or resume) a thread: Call `thread/start` to open a fresh conversation. The response returns the thread object and you’ll also get a `thread/started` notification. If you’re continuing an existing conversation, call `thread/resume` with its ID instead.
|
||||
- Begin a turn: To send user input, call `turn/start` with the target `threadId` and the user's input. Optional fields let you override model, cwd, sandbox policy, etc. This immediately returns the new turn object and triggers a `turn/started` notification.
|
||||
- Stream events: After `turn/start`, keep reading JSON-RPC notifications on stdout. You’ll see `item/started`, `item/completed`, deltas like `item/agentMessage/delta`, tool progress, etc. These represent streaming model output plus any side effects (commands, tool calls, reasoning notes).
|
||||
- Finish the turn: When the model is done (or the turn is interrupted via making the `turn/interrupt` call), the server sends `turn/completed` with the final turn state and token usage.
|
||||
|
||||
## Initialization
|
||||
|
||||
Clients must send a single `initialize` request before invoking any other method, then acknowledge with an `initialized` notification. The server returns the user agent string it will present to upstream services; subsequent requests issued before initialization receive a `"Not initialized"` error, and repeated `initialize` calls receive an `"Already initialized"` error.
|
||||
@@ -56,7 +74,7 @@ Start a fresh thread when you need a new Codex conversation.
|
||||
{ "method": "thread/start", "id": 10, "params": {
|
||||
// Optionally set config settings. If not specified, will use the user's
|
||||
// current config settings.
|
||||
"model": "gpt-5-codex",
|
||||
"model": "gpt-5.1-codex",
|
||||
"cwd": "/Users/me/project",
|
||||
"approvalPolicy": "never",
|
||||
"sandbox": "workspaceWrite",
|
||||
@@ -137,7 +155,7 @@ You can optionally specify config overrides on the new turn. If specified, these
|
||||
"writableRoots": ["/Users/me/project"],
|
||||
"networkAccess": true
|
||||
},
|
||||
"model": "gpt-5-codex",
|
||||
"model": "gpt-5.1-codex",
|
||||
"effort": "medium",
|
||||
"summary": "concise"
|
||||
} }
|
||||
@@ -258,3 +276,33 @@ Field notes:
|
||||
- `codex app-server generate-ts --out <dir>` emits v2 types under `v2/`.
|
||||
- `codex app-server generate-json-schema --out <dir>` outputs `codex_app_server_protocol.schemas.json`.
|
||||
- See [“Authentication and authorization” in the config docs](../../docs/config.md#authentication-and-authorization) for configuration knobs.
|
||||
|
||||
|
||||
## Events (work-in-progress)
|
||||
|
||||
Event notifications are the server-initiated event stream for thread lifecycles, turn lifecycles, and the items within them. After you start or resume a thread, keep reading stdout for `thread/started`, `turn/*`, and `item/*` notifications.
|
||||
|
||||
### Turn events
|
||||
|
||||
The app-server streams JSON-RPC notifications while a turn is running. Each turn starts with `turn/started` (initial `turn`) and ends with `turn/completed` (final `turn` plus token `usage`), and clients subscribe to the events they care about, rendering each item incrementally as updates arrive. The per-item lifecycle is always: `item/started` → zero or more item-specific deltas → `item/completed`.
|
||||
|
||||
#### Thread items
|
||||
|
||||
`ThreadItem` is the tagged union carried in turn responses and `item/*` notifications. Currently we support events for the following items:
|
||||
- `userMessage` — `{id, content}` where `content` is a list of user inputs (`text`, `image`, or `localImage`).
|
||||
- `agentMessage` — `{id, text}` containing the accumulated agent reply.
|
||||
- `reasoning` — `{id, summary, content}` where `summary` holds streamed reasoning summaries (applicable for most OpenAI models) and `content` holds raw reasoning blocks (applicable for e.g. open source models).
|
||||
- `mcpToolCall` — `{id, server, tool, status, arguments, result?, error?}` describing MCP calls; `status` is `inProgress`, `completed`, or `failed`.
|
||||
- `webSearch` — `{id, query}` for a web search request issued by the agent.
|
||||
|
||||
All items emit two shared lifecycle events:
|
||||
- `item/started` — emits the full `item` when a new unit of work begins so the UI can render it immediately; the `item.id` in this payload matches the `itemId` used by deltas.
|
||||
- `item/completed` — sends the final `item` once that work finishes (e.g., after a tool call or message completes); treat this as the authoritative state.
|
||||
|
||||
There are additional item-specific events:
|
||||
#### agentMessage
|
||||
- `item/agentMessage/delta` — appends streamed text for the agent message; concatenate `delta` values for the same `itemId` in order to reconstruct the full reply.
|
||||
#### reasoning
|
||||
- `item/reasoning/summaryTextDelta` — streams readable reasoning summaries; `summaryIndex` increments when a new summary section opens.
|
||||
- `item/reasoning/summaryPartAdded` — marks the boundary between reasoning summary sections for an `itemId`; subsequent `summaryTextDelta` entries share the same `summaryIndex`.
|
||||
- `item/reasoning/textDelta` — streams raw reasoning text (only applicable for e.g. open source models); use `contentIndex` to group deltas that belong together before showing them in the UI.
|
||||
|
||||
@@ -5,6 +5,12 @@ use codex_app_server_protocol::AccountRateLimitsUpdatedNotification;
|
||||
use codex_app_server_protocol::AgentMessageDeltaNotification;
|
||||
use codex_app_server_protocol::ApplyPatchApprovalParams;
|
||||
use codex_app_server_protocol::ApplyPatchApprovalResponse;
|
||||
use codex_app_server_protocol::ApprovalDecision;
|
||||
use codex_app_server_protocol::CommandAction as V2ParsedCommand;
|
||||
use codex_app_server_protocol::CommandExecutionOutputDeltaNotification;
|
||||
use codex_app_server_protocol::CommandExecutionRequestApprovalParams;
|
||||
use codex_app_server_protocol::CommandExecutionRequestApprovalResponse;
|
||||
use codex_app_server_protocol::CommandExecutionStatus;
|
||||
use codex_app_server_protocol::ExecCommandApprovalParams;
|
||||
use codex_app_server_protocol::ExecCommandApprovalResponse;
|
||||
use codex_app_server_protocol::InterruptConversationResponse;
|
||||
@@ -16,25 +22,29 @@ use codex_app_server_protocol::McpToolCallStatus;
|
||||
use codex_app_server_protocol::ReasoningSummaryPartAddedNotification;
|
||||
use codex_app_server_protocol::ReasoningSummaryTextDeltaNotification;
|
||||
use codex_app_server_protocol::ReasoningTextDeltaNotification;
|
||||
use codex_app_server_protocol::SandboxCommandAssessment as V2SandboxCommandAssessment;
|
||||
use codex_app_server_protocol::ServerNotification;
|
||||
use codex_app_server_protocol::ServerRequestPayload;
|
||||
use codex_app_server_protocol::ThreadItem;
|
||||
use codex_app_server_protocol::TurnInterruptResponse;
|
||||
use codex_core::CodexConversation;
|
||||
use codex_core::parse_command::shlex_join;
|
||||
use codex_core::protocol::ApplyPatchApprovalRequestEvent;
|
||||
use codex_core::protocol::Event;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::ExecApprovalRequestEvent;
|
||||
use codex_core::protocol::ExecCommandEndEvent;
|
||||
use codex_core::protocol::McpToolCallBeginEvent;
|
||||
use codex_core::protocol::McpToolCallEndEvent;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::ReviewDecision;
|
||||
use codex_protocol::ConversationId;
|
||||
use std::convert::TryFrom;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::oneshot;
|
||||
use tracing::error;
|
||||
|
||||
type JsonRpcResult = serde_json::Value;
|
||||
type JsonValue = serde_json::Value;
|
||||
|
||||
pub(crate) async fn apply_bespoke_event_handling(
|
||||
event: Event,
|
||||
@@ -42,6 +52,7 @@ pub(crate) async fn apply_bespoke_event_handling(
|
||||
conversation: Arc<CodexConversation>,
|
||||
outgoing: Arc<OutgoingMessageSender>,
|
||||
pending_interrupts: PendingInterrupts,
|
||||
api_version: ApiVersion,
|
||||
) {
|
||||
let Event { id: event_id, msg } = event;
|
||||
match msg {
|
||||
@@ -61,11 +72,57 @@ pub(crate) async fn apply_bespoke_event_handling(
|
||||
let rx = outgoing
|
||||
.send_request(ServerRequestPayload::ApplyPatchApproval(params))
|
||||
.await;
|
||||
// TODO(mbolin): Enforce a timeout so this task does not live indefinitely?
|
||||
tokio::spawn(async move {
|
||||
on_patch_approval_response(event_id, rx, conversation).await;
|
||||
});
|
||||
}
|
||||
EventMsg::ExecApprovalRequest(ExecApprovalRequestEvent {
|
||||
call_id,
|
||||
turn_id,
|
||||
command,
|
||||
cwd,
|
||||
reason,
|
||||
risk,
|
||||
parsed_cmd,
|
||||
}) => match api_version {
|
||||
ApiVersion::V1 => {
|
||||
let params = ExecCommandApprovalParams {
|
||||
conversation_id,
|
||||
call_id,
|
||||
command,
|
||||
cwd,
|
||||
reason,
|
||||
risk,
|
||||
parsed_cmd,
|
||||
};
|
||||
let rx = outgoing
|
||||
.send_request(ServerRequestPayload::ExecCommandApproval(params))
|
||||
.await;
|
||||
tokio::spawn(async move {
|
||||
on_exec_approval_response(event_id, rx, conversation).await;
|
||||
});
|
||||
}
|
||||
ApiVersion::V2 => {
|
||||
let params = CommandExecutionRequestApprovalParams {
|
||||
thread_id: conversation_id.to_string(),
|
||||
turn_id: turn_id.clone(),
|
||||
// Until we migrate the core to be aware of a first class CommandExecutionItem
|
||||
// and emit the corresponding EventMsg, we repurpose the call_id as the item_id.
|
||||
item_id: call_id.clone(),
|
||||
reason,
|
||||
risk: risk.map(V2SandboxCommandAssessment::from),
|
||||
};
|
||||
let rx = outgoing
|
||||
.send_request(ServerRequestPayload::CommandExecutionRequestApproval(
|
||||
params,
|
||||
))
|
||||
.await;
|
||||
tokio::spawn(async move {
|
||||
on_command_execution_request_approval_response(event_id, rx, conversation)
|
||||
.await;
|
||||
});
|
||||
}
|
||||
},
|
||||
// TODO(celia): properly construct McpToolCall TurnItem in core.
|
||||
EventMsg::McpToolCallBegin(begin_event) => {
|
||||
let notification = construct_mcp_tool_call_notification(begin_event).await;
|
||||
@@ -121,32 +178,6 @@ pub(crate) async fn apply_bespoke_event_handling(
|
||||
))
|
||||
.await;
|
||||
}
|
||||
EventMsg::ExecApprovalRequest(ExecApprovalRequestEvent {
|
||||
call_id,
|
||||
command,
|
||||
cwd,
|
||||
reason,
|
||||
risk,
|
||||
parsed_cmd,
|
||||
}) => {
|
||||
let params = ExecCommandApprovalParams {
|
||||
conversation_id,
|
||||
call_id,
|
||||
command,
|
||||
cwd,
|
||||
reason,
|
||||
risk,
|
||||
parsed_cmd,
|
||||
};
|
||||
let rx = outgoing
|
||||
.send_request(ServerRequestPayload::ExecCommandApproval(params))
|
||||
.await;
|
||||
|
||||
// TODO(mbolin): Enforce a timeout so this task does not live indefinitely?
|
||||
tokio::spawn(async move {
|
||||
on_exec_approval_response(event_id, rx, conversation).await;
|
||||
});
|
||||
}
|
||||
EventMsg::TokenCount(token_count_event) => {
|
||||
if let Some(rate_limits) = token_count_event.rate_limits {
|
||||
outgoing
|
||||
@@ -172,6 +203,79 @@ pub(crate) async fn apply_bespoke_event_handling(
|
||||
.send_server_notification(ServerNotification::ItemCompleted(notification))
|
||||
.await;
|
||||
}
|
||||
EventMsg::ExecCommandBegin(exec_command_begin_event) => {
|
||||
let item = ThreadItem::CommandExecution {
|
||||
id: exec_command_begin_event.call_id.clone(),
|
||||
command: shlex_join(&exec_command_begin_event.command),
|
||||
cwd: exec_command_begin_event.cwd,
|
||||
status: CommandExecutionStatus::InProgress,
|
||||
command_actions: exec_command_begin_event
|
||||
.parsed_cmd
|
||||
.into_iter()
|
||||
.map(V2ParsedCommand::from)
|
||||
.collect(),
|
||||
aggregated_output: None,
|
||||
exit_code: None,
|
||||
duration_ms: None,
|
||||
};
|
||||
let notification = ItemStartedNotification { item };
|
||||
outgoing
|
||||
.send_server_notification(ServerNotification::ItemStarted(notification))
|
||||
.await;
|
||||
}
|
||||
EventMsg::ExecCommandOutputDelta(exec_command_output_delta_event) => {
|
||||
let notification = CommandExecutionOutputDeltaNotification {
|
||||
item_id: exec_command_output_delta_event.call_id.clone(),
|
||||
delta: String::from_utf8_lossy(&exec_command_output_delta_event.chunk).to_string(),
|
||||
};
|
||||
outgoing
|
||||
.send_server_notification(ServerNotification::CommandExecutionOutputDelta(
|
||||
notification,
|
||||
))
|
||||
.await;
|
||||
}
|
||||
EventMsg::ExecCommandEnd(exec_command_end_event) => {
|
||||
let ExecCommandEndEvent {
|
||||
call_id,
|
||||
command,
|
||||
cwd,
|
||||
parsed_cmd,
|
||||
aggregated_output,
|
||||
exit_code,
|
||||
duration,
|
||||
..
|
||||
} = exec_command_end_event;
|
||||
|
||||
let status = if exit_code == 0 {
|
||||
CommandExecutionStatus::Completed
|
||||
} else {
|
||||
CommandExecutionStatus::Failed
|
||||
};
|
||||
|
||||
let aggregated_output = if aggregated_output.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(aggregated_output)
|
||||
};
|
||||
|
||||
let duration_ms = i64::try_from(duration.as_millis()).unwrap_or(i64::MAX);
|
||||
|
||||
let item = ThreadItem::CommandExecution {
|
||||
id: call_id,
|
||||
command: shlex_join(&command),
|
||||
cwd,
|
||||
status,
|
||||
command_actions: parsed_cmd.into_iter().map(V2ParsedCommand::from).collect(),
|
||||
aggregated_output,
|
||||
exit_code: Some(exit_code),
|
||||
duration_ms: Some(duration_ms),
|
||||
};
|
||||
|
||||
let notification = ItemCompletedNotification { item };
|
||||
outgoing
|
||||
.send_server_notification(ServerNotification::ItemCompleted(notification))
|
||||
.await;
|
||||
}
|
||||
// If this is a TurnAborted, reply to any pending interrupt requests.
|
||||
EventMsg::TurnAborted(turn_aborted_event) => {
|
||||
let pending = {
|
||||
@@ -202,7 +306,7 @@ pub(crate) async fn apply_bespoke_event_handling(
|
||||
|
||||
async fn on_patch_approval_response(
|
||||
event_id: String,
|
||||
receiver: oneshot::Receiver<JsonRpcResult>,
|
||||
receiver: oneshot::Receiver<JsonValue>,
|
||||
codex: Arc<CodexConversation>,
|
||||
) {
|
||||
let response = receiver.await;
|
||||
@@ -244,7 +348,7 @@ async fn on_patch_approval_response(
|
||||
|
||||
async fn on_exec_approval_response(
|
||||
event_id: String,
|
||||
receiver: oneshot::Receiver<JsonRpcResult>,
|
||||
receiver: oneshot::Receiver<JsonValue>,
|
||||
conversation: Arc<CodexConversation>,
|
||||
) {
|
||||
let response = receiver.await;
|
||||
@@ -278,6 +382,53 @@ async fn on_exec_approval_response(
|
||||
}
|
||||
}
|
||||
|
||||
async fn on_command_execution_request_approval_response(
|
||||
event_id: String,
|
||||
receiver: oneshot::Receiver<JsonValue>,
|
||||
conversation: Arc<CodexConversation>,
|
||||
) {
|
||||
let response = receiver.await;
|
||||
let value = match response {
|
||||
Ok(value) => value,
|
||||
Err(err) => {
|
||||
error!("request failed: {err:?}");
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let response = serde_json::from_value::<CommandExecutionRequestApprovalResponse>(value)
|
||||
.unwrap_or_else(|err| {
|
||||
error!("failed to deserialize CommandExecutionRequestApprovalResponse: {err}");
|
||||
CommandExecutionRequestApprovalResponse {
|
||||
decision: ApprovalDecision::Decline,
|
||||
accept_settings: None,
|
||||
}
|
||||
});
|
||||
|
||||
let CommandExecutionRequestApprovalResponse {
|
||||
decision,
|
||||
accept_settings,
|
||||
} = response;
|
||||
|
||||
let decision = match (decision, accept_settings) {
|
||||
(ApprovalDecision::Accept, Some(settings)) if settings.for_session => {
|
||||
ReviewDecision::ApprovedForSession
|
||||
}
|
||||
(ApprovalDecision::Accept, _) => ReviewDecision::Approved,
|
||||
(ApprovalDecision::Decline, _) => ReviewDecision::Denied,
|
||||
(ApprovalDecision::Cancel, _) => ReviewDecision::Abort,
|
||||
};
|
||||
if let Err(err) = conversation
|
||||
.submit(Op::ExecApproval {
|
||||
id: event_id,
|
||||
decision,
|
||||
})
|
||||
.await
|
||||
{
|
||||
error!("failed to submit ExecApproval: {err}");
|
||||
}
|
||||
}
|
||||
|
||||
/// similar to handle_mcp_tool_call_begin in exec
|
||||
async fn construct_mcp_tool_call_notification(
|
||||
begin_event: McpToolCallBeginEvent,
|
||||
@@ -287,10 +438,7 @@ async fn construct_mcp_tool_call_notification(
|
||||
server: begin_event.invocation.server,
|
||||
tool: begin_event.invocation.tool,
|
||||
status: McpToolCallStatus::InProgress,
|
||||
arguments: begin_event
|
||||
.invocation
|
||||
.arguments
|
||||
.unwrap_or(JsonRpcResult::Null),
|
||||
arguments: begin_event.invocation.arguments.unwrap_or(JsonValue::Null),
|
||||
result: None,
|
||||
error: None,
|
||||
};
|
||||
@@ -328,10 +476,7 @@ async fn construct_mcp_tool_call_end_notification(
|
||||
server: end_event.invocation.server,
|
||||
tool: end_event.invocation.tool,
|
||||
status,
|
||||
arguments: end_event
|
||||
.invocation
|
||||
.arguments
|
||||
.unwrap_or(JsonRpcResult::Null),
|
||||
arguments: end_event.invocation.arguments.unwrap_or(JsonValue::Null),
|
||||
result,
|
||||
error,
|
||||
};
|
||||
|
||||
@@ -101,12 +101,12 @@ use codex_core::RolloutRecorder;
|
||||
use codex_core::SessionMeta;
|
||||
use codex_core::auth::CLIENT_ID;
|
||||
use codex_core::auth::login_with_api_key;
|
||||
use codex_core::client::http::get_codex_user_agent;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::config::ConfigOverrides;
|
||||
use codex_core::config::ConfigToml;
|
||||
use codex_core::config::edit::ConfigEditsBuilder;
|
||||
use codex_core::config_loader::load_config_as_toml;
|
||||
use codex_core::default_client::get_codex_user_agent;
|
||||
use codex_core::exec::ExecParams;
|
||||
use codex_core::exec_env::create_env;
|
||||
use codex_core::find_conversation_path_by_id_str;
|
||||
@@ -1245,7 +1245,7 @@ impl CodexMessageProcessor {
|
||||
// Auto-attach a conversation listener when starting a thread.
|
||||
// Use the same behavior as the v1 API with experimental_raw_events=false.
|
||||
if let Err(err) = self
|
||||
.attach_conversation_listener(conversation_id, false)
|
||||
.attach_conversation_listener(conversation_id, false, ApiVersion::V2)
|
||||
.await
|
||||
{
|
||||
tracing::warn!(
|
||||
@@ -1523,7 +1523,7 @@ impl CodexMessageProcessor {
|
||||
}) => {
|
||||
// Auto-attach a conversation listener when resuming a thread.
|
||||
if let Err(err) = self
|
||||
.attach_conversation_listener(conversation_id, false)
|
||||
.attach_conversation_listener(conversation_id, false, ApiVersion::V2)
|
||||
.await
|
||||
{
|
||||
tracing::warn!(
|
||||
@@ -2376,7 +2376,7 @@ impl CodexMessageProcessor {
|
||||
experimental_raw_events,
|
||||
} = params;
|
||||
match self
|
||||
.attach_conversation_listener(conversation_id, experimental_raw_events)
|
||||
.attach_conversation_listener(conversation_id, experimental_raw_events, ApiVersion::V1)
|
||||
.await
|
||||
{
|
||||
Ok(subscription_id) => {
|
||||
@@ -2417,6 +2417,7 @@ impl CodexMessageProcessor {
|
||||
&mut self,
|
||||
conversation_id: ConversationId,
|
||||
experimental_raw_events: bool,
|
||||
api_version: ApiVersion,
|
||||
) -> Result<Uuid, JSONRPCErrorError> {
|
||||
let conversation = match self
|
||||
.conversation_manager
|
||||
@@ -2440,6 +2441,7 @@ impl CodexMessageProcessor {
|
||||
|
||||
let outgoing_for_task = self.outgoing.clone();
|
||||
let pending_interrupts = self.pending_interrupts.clone();
|
||||
let api_version_for_task = api_version;
|
||||
tokio::spawn(async move {
|
||||
loop {
|
||||
tokio::select! {
|
||||
@@ -2495,6 +2497,7 @@ impl CodexMessageProcessor {
|
||||
conversation.clone(),
|
||||
outgoing_for_task.clone(),
|
||||
pending_interrupts.clone(),
|
||||
api_version_for_task,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
@@ -14,9 +14,9 @@ use codex_app_server_protocol::JSONRPCRequest;
|
||||
use codex_app_server_protocol::JSONRPCResponse;
|
||||
use codex_core::AuthManager;
|
||||
use codex_core::ConversationManager;
|
||||
use codex_core::client::http::USER_AGENT_SUFFIX;
|
||||
use codex_core::client::http::get_codex_user_agent;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::default_client::USER_AGENT_SUFFIX;
|
||||
use codex_core::default_client::get_codex_user_agent;
|
||||
use codex_feedback::CodexFeedback;
|
||||
use codex_protocol::protocol::SessionSource;
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -27,7 +27,7 @@ fn create_config_toml(codex_home: &Path) -> std::io::Result<()> {
|
||||
std::fs::write(
|
||||
config_toml,
|
||||
r#"
|
||||
model = "gpt-5-codex"
|
||||
model = "gpt-5.1-codex"
|
||||
approval_policy = "on-request"
|
||||
sandbox_mode = "workspace-write"
|
||||
model_reasoning_summary = "detailed"
|
||||
@@ -87,7 +87,7 @@ async fn get_config_toml_parses_all_fields() -> Result<()> {
|
||||
}),
|
||||
forced_chatgpt_workspace_id: Some("12345678-0000-0000-0000-000000000000".into()),
|
||||
forced_login_method: Some(ForcedLoginMethod::Chatgpt),
|
||||
model: Some("gpt-5-codex".into()),
|
||||
model: Some("gpt-5.1-codex".into()),
|
||||
model_reasoning_effort: Some(ReasoningEffort::High),
|
||||
model_reasoning_summary: Some(ReasoningSummary::Detailed),
|
||||
model_verbosity: Some(Verbosity::Medium),
|
||||
|
||||
@@ -57,7 +57,7 @@ fn create_config_toml(codex_home: &Path) -> std::io::Result<()> {
|
||||
std::fs::write(
|
||||
config_toml,
|
||||
r#"
|
||||
model = "gpt-5-codex"
|
||||
model = "gpt-5.1-codex"
|
||||
model_reasoning_effort = "medium"
|
||||
"#,
|
||||
)
|
||||
|
||||
@@ -27,7 +27,7 @@ async fn thread_resume_returns_original_thread() -> Result<()> {
|
||||
// Start a thread.
|
||||
let start_id = mcp
|
||||
.send_thread_start_request(ThreadStartParams {
|
||||
model: Some("gpt-5-codex".to_string()),
|
||||
model: Some("gpt-5.1-codex".to_string()),
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
@@ -68,7 +68,7 @@ async fn thread_resume_prefers_path_over_thread_id() -> Result<()> {
|
||||
|
||||
let start_id = mcp
|
||||
.send_thread_start_request(ThreadStartParams {
|
||||
model: Some("gpt-5-codex".to_string()),
|
||||
model: Some("gpt-5.1-codex".to_string()),
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
@@ -112,7 +112,7 @@ async fn thread_resume_supports_history_and_overrides() -> Result<()> {
|
||||
// Start a thread.
|
||||
let start_id = mcp
|
||||
.send_thread_start_request(ThreadStartParams {
|
||||
model: Some("gpt-5-codex".to_string()),
|
||||
model: Some("gpt-5.1-codex".to_string()),
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
|
||||
@@ -29,7 +29,7 @@ async fn thread_start_creates_thread_and_emits_started() -> Result<()> {
|
||||
// Start a v2 thread with an explicit model override.
|
||||
let req_id = mcp
|
||||
.send_thread_start_request(ThreadStartParams {
|
||||
model: Some("gpt-5".to_string()),
|
||||
model: Some("gpt-5.1".to_string()),
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
|
||||
@@ -5,10 +5,13 @@ use app_test_support::create_mock_chat_completions_server;
|
||||
use app_test_support::create_mock_chat_completions_server_unchecked;
|
||||
use app_test_support::create_shell_sse_response;
|
||||
use app_test_support::to_response;
|
||||
use codex_app_server_protocol::CommandExecutionStatus;
|
||||
use codex_app_server_protocol::ItemStartedNotification;
|
||||
use codex_app_server_protocol::JSONRPCNotification;
|
||||
use codex_app_server_protocol::JSONRPCResponse;
|
||||
use codex_app_server_protocol::RequestId;
|
||||
use codex_app_server_protocol::ServerRequest;
|
||||
use codex_app_server_protocol::ThreadItem;
|
||||
use codex_app_server_protocol::ThreadStartParams;
|
||||
use codex_app_server_protocol::ThreadStartResponse;
|
||||
use codex_app_server_protocol::TurnStartParams;
|
||||
@@ -17,9 +20,6 @@ use codex_app_server_protocol::TurnStartedNotification;
|
||||
use codex_app_server_protocol::UserInput as V2UserInput;
|
||||
use codex_core::protocol_config_types::ReasoningEffort;
|
||||
use codex_core::protocol_config_types::ReasoningSummary;
|
||||
use codex_protocol::parse_command::ParsedCommand;
|
||||
use codex_protocol::protocol::Event;
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use pretty_assertions::assert_eq;
|
||||
use std::path::Path;
|
||||
@@ -235,7 +235,7 @@ async fn turn_start_exec_approval_toggle_v2() -> Result<()> {
|
||||
.await??;
|
||||
let ThreadStartResponse { thread } = to_response::<ThreadStartResponse>(start_resp)?;
|
||||
|
||||
// turn/start — expect ExecCommandApproval request from server
|
||||
// turn/start — expect CommandExecutionRequestApproval request from server
|
||||
let first_turn_id = mcp
|
||||
.send_turn_start_request(TurnStartParams {
|
||||
thread_id: thread.id.clone(),
|
||||
@@ -258,16 +258,10 @@ async fn turn_start_exec_approval_toggle_v2() -> Result<()> {
|
||||
mcp.read_stream_until_request_message(),
|
||||
)
|
||||
.await??;
|
||||
let ServerRequest::ExecCommandApproval { request_id, params } = server_req else {
|
||||
panic!("expected ExecCommandApproval request");
|
||||
let ServerRequest::CommandExecutionRequestApproval { request_id, params } = server_req else {
|
||||
panic!("expected CommandExecutionRequestApproval request");
|
||||
};
|
||||
assert_eq!(params.call_id, "call1");
|
||||
assert_eq!(
|
||||
params.parsed_cmd,
|
||||
vec![ParsedCommand::Unknown {
|
||||
cmd: "python3 -c 'print(42)'".to_string()
|
||||
}]
|
||||
);
|
||||
assert_eq!(params.item_id, "call1");
|
||||
|
||||
// Approve and wait for task completion
|
||||
mcp.send_response(
|
||||
@@ -302,7 +296,7 @@ async fn turn_start_exec_approval_toggle_v2() -> Result<()> {
|
||||
)
|
||||
.await??;
|
||||
|
||||
// Ensure we do NOT receive an ExecCommandApproval request before task completes
|
||||
// Ensure we do NOT receive a CommandExecutionRequestApproval request before task completes
|
||||
timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp.read_stream_until_notification_message("codex/event/task_complete"),
|
||||
@@ -314,8 +308,6 @@ async fn turn_start_exec_approval_toggle_v2() -> Result<()> {
|
||||
|
||||
#[tokio::test]
|
||||
async fn turn_start_updates_sandbox_and_cwd_between_turns_v2() -> Result<()> {
|
||||
// When returning Result from a test, pass an Ok(()) to the skip macro
|
||||
// so the early return type matches. The no-arg form returns unit.
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let tmp = TempDir::new()?;
|
||||
@@ -424,29 +416,35 @@ async fn turn_start_updates_sandbox_and_cwd_between_turns_v2() -> Result<()> {
|
||||
)
|
||||
.await??;
|
||||
|
||||
let exec_begin_notification = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp.read_stream_until_notification_message("codex/event/exec_command_begin"),
|
||||
)
|
||||
let command_exec_item = timeout(DEFAULT_READ_TIMEOUT, async {
|
||||
loop {
|
||||
let item_started_notification = mcp
|
||||
.read_stream_until_notification_message("item/started")
|
||||
.await?;
|
||||
let params = item_started_notification
|
||||
.params
|
||||
.clone()
|
||||
.expect("item/started params");
|
||||
let item_started: ItemStartedNotification =
|
||||
serde_json::from_value(params).expect("deserialize item/started notification");
|
||||
if matches!(item_started.item, ThreadItem::CommandExecution { .. }) {
|
||||
return Ok::<ThreadItem, anyhow::Error>(item_started.item);
|
||||
}
|
||||
}
|
||||
})
|
||||
.await??;
|
||||
let params = exec_begin_notification
|
||||
.params
|
||||
.clone()
|
||||
.expect("exec_command_begin params");
|
||||
let event: Event = serde_json::from_value(params).expect("deserialize exec begin event");
|
||||
let exec_begin = match event.msg {
|
||||
EventMsg::ExecCommandBegin(exec_begin) => exec_begin,
|
||||
other => panic!("expected ExecCommandBegin event, got {other:?}"),
|
||||
let ThreadItem::CommandExecution {
|
||||
cwd,
|
||||
command,
|
||||
status,
|
||||
..
|
||||
} = command_exec_item
|
||||
else {
|
||||
unreachable!("loop ensures we break on command execution items");
|
||||
};
|
||||
assert_eq!(exec_begin.cwd, second_cwd);
|
||||
assert_eq!(
|
||||
exec_begin.command,
|
||||
vec![
|
||||
"bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
"echo second turn".to_string()
|
||||
]
|
||||
);
|
||||
assert_eq!(cwd, second_cwd);
|
||||
assert_eq!(command, "bash -lc 'echo second turn'");
|
||||
assert_eq!(status, CommandExecutionStatus::InProgress);
|
||||
|
||||
timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
|
||||
@@ -5,7 +5,7 @@ use crate::types::RateLimitWindowSnapshot;
|
||||
use crate::types::TurnAttemptsSiblingTurnsResponse;
|
||||
use anyhow::Result;
|
||||
use codex_core::auth::CodexAuth;
|
||||
use codex_core::client::http::get_codex_user_agent;
|
||||
use codex_core::default_client::get_codex_user_agent;
|
||||
use codex_protocol::protocol::RateLimitSnapshot;
|
||||
use codex_protocol::protocol::RateLimitWindow;
|
||||
use reqwest::header::AUTHORIZATION;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use codex_core::client::http::create_client;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::default_client::create_client;
|
||||
|
||||
use crate::chatgpt_token::get_chatgpt_token_data;
|
||||
use crate::chatgpt_token::init_chatgpt_token_from_auth;
|
||||
|
||||
@@ -155,11 +155,11 @@ async fn run_command_under_sandbox(
|
||||
run_windows_sandbox_capture(
|
||||
policy_str,
|
||||
&sandbox_cwd,
|
||||
base_dir.as_path(),
|
||||
command_vec,
|
||||
&cwd_clone,
|
||||
env_map,
|
||||
None,
|
||||
Some(base_dir.as_path()),
|
||||
)
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -761,9 +761,9 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn resume_model_flag_applies_when_no_root_flags() {
|
||||
let interactive = finalize_from_args(["codex", "resume", "-m", "gpt-5-test"].as_ref());
|
||||
let interactive = finalize_from_args(["codex", "resume", "-m", "gpt-5.1-test"].as_ref());
|
||||
|
||||
assert_eq!(interactive.model.as_deref(), Some("gpt-5-test"));
|
||||
assert_eq!(interactive.model.as_deref(), Some("gpt-5.1-test"));
|
||||
assert!(interactive.resume_picker);
|
||||
assert!(!interactive.resume_last);
|
||||
assert_eq!(interactive.resume_session_id, None);
|
||||
@@ -808,7 +808,7 @@ mod tests {
|
||||
"--ask-for-approval",
|
||||
"on-request",
|
||||
"-m",
|
||||
"gpt-5-test",
|
||||
"gpt-5.1-test",
|
||||
"-p",
|
||||
"my-profile",
|
||||
"-C",
|
||||
@@ -819,7 +819,7 @@ mod tests {
|
||||
.as_ref(),
|
||||
);
|
||||
|
||||
assert_eq!(interactive.model.as_deref(), Some("gpt-5-test"));
|
||||
assert_eq!(interactive.model.as_deref(), Some("gpt-5.1-test"));
|
||||
assert!(interactive.oss);
|
||||
assert_eq!(interactive.config_profile.as_deref(), Some("my-profile"));
|
||||
assert_matches!(
|
||||
|
||||
@@ -48,7 +48,7 @@ async fn init_backend(user_agent_suffix: &str) -> anyhow::Result<BackendContext>
|
||||
});
|
||||
}
|
||||
|
||||
let ua = codex_core::client::http::get_codex_user_agent();
|
||||
let ua = codex_core::default_client::get_codex_user_agent();
|
||||
let mut http = codex_cloud_tasks_client::HttpClient::new(base_url.clone())?.with_user_agent(ua);
|
||||
let style = if base_url.contains("/backend-api") {
|
||||
"wham"
|
||||
@@ -384,7 +384,7 @@ pub async fn run_main(cli: Cli, _codex_linux_sandbox_exe: Option<PathBuf>) -> an
|
||||
append_error_log(format!(
|
||||
"startup: wham_force_internal={} ua={}",
|
||||
force_internal,
|
||||
codex_core::client::http::get_codex_user_agent()
|
||||
codex_core::default_client::get_codex_user_agent()
|
||||
));
|
||||
// Non-blocking initial load so the in-box spinner can animate
|
||||
app.status = "Loading tasks…".to_string();
|
||||
|
||||
@@ -7,7 +7,7 @@ use codex_core::config::ConfigOverrides;
|
||||
use codex_login::AuthManager;
|
||||
|
||||
pub fn set_user_agent_suffix(suffix: &str) {
|
||||
if let Ok(mut guard) = codex_core::client::http::USER_AGENT_SUFFIX.lock() {
|
||||
if let Ok(mut guard) = codex_core::default_client::USER_AGENT_SUFFIX.lock() {
|
||||
guard.replace(suffix.to_string());
|
||||
}
|
||||
}
|
||||
@@ -79,7 +79,7 @@ pub async fn build_chatgpt_headers() -> HeaderMap {
|
||||
use reqwest::header::USER_AGENT;
|
||||
|
||||
set_user_agent_suffix("codex_cloud_tasks_tui");
|
||||
let ua = codex_core::client::http::get_codex_user_agent();
|
||||
let ua = codex_core::default_client::get_codex_user_agent();
|
||||
let mut headers = HeaderMap::new();
|
||||
headers.insert(
|
||||
USER_AGENT,
|
||||
|
||||
@@ -10,6 +10,8 @@ workspace = true
|
||||
clap = { workspace = true, features = ["derive", "wrap_help"], optional = true }
|
||||
codex-app-server-protocol = { workspace = true }
|
||||
codex-core = { workspace = true }
|
||||
codex-lmstudio = { workspace = true }
|
||||
codex-ollama = { workspace = true }
|
||||
codex-protocol = { workspace = true }
|
||||
once_cell = { workspace = true }
|
||||
serde = { workspace = true, optional = true }
|
||||
|
||||
@@ -37,3 +37,5 @@ pub mod model_presets;
|
||||
// Shared approval presets (AskForApproval + Sandbox) used by TUI and MCP server
|
||||
// Not to be confused with AskForApproval, which we should probably rename to EscalationPolicy.
|
||||
pub mod approval_presets;
|
||||
// Shared OSS provider utilities used by TUI and exec
|
||||
pub mod oss;
|
||||
|
||||
60
codex-rs/common/src/oss.rs
Normal file
60
codex-rs/common/src/oss.rs
Normal file
@@ -0,0 +1,60 @@
|
||||
//! OSS provider utilities shared between TUI and exec.
|
||||
|
||||
use codex_core::LMSTUDIO_OSS_PROVIDER_ID;
|
||||
use codex_core::OLLAMA_OSS_PROVIDER_ID;
|
||||
use codex_core::config::Config;
|
||||
|
||||
/// Returns the default model for a given OSS provider.
|
||||
pub fn get_default_model_for_oss_provider(provider_id: &str) -> Option<&'static str> {
|
||||
match provider_id {
|
||||
LMSTUDIO_OSS_PROVIDER_ID => Some(codex_lmstudio::DEFAULT_OSS_MODEL),
|
||||
OLLAMA_OSS_PROVIDER_ID => Some(codex_ollama::DEFAULT_OSS_MODEL),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Ensures the specified OSS provider is ready (models downloaded, service reachable).
|
||||
pub async fn ensure_oss_provider_ready(
|
||||
provider_id: &str,
|
||||
config: &Config,
|
||||
) -> Result<(), std::io::Error> {
|
||||
match provider_id {
|
||||
LMSTUDIO_OSS_PROVIDER_ID => {
|
||||
codex_lmstudio::ensure_oss_ready(config)
|
||||
.await
|
||||
.map_err(|e| std::io::Error::other(format!("OSS setup failed: {e}")))?;
|
||||
}
|
||||
OLLAMA_OSS_PROVIDER_ID => {
|
||||
codex_ollama::ensure_oss_ready(config)
|
||||
.await
|
||||
.map_err(|e| std::io::Error::other(format!("OSS setup failed: {e}")))?;
|
||||
}
|
||||
_ => {
|
||||
// Unknown provider, skip setup
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_get_default_model_for_provider_lmstudio() {
|
||||
let result = get_default_model_for_oss_provider(LMSTUDIO_OSS_PROVIDER_ID);
|
||||
assert_eq!(result, Some(codex_lmstudio::DEFAULT_OSS_MODEL));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_default_model_for_provider_ollama() {
|
||||
let result = get_default_model_for_oss_provider(OLLAMA_OSS_PROVIDER_ID);
|
||||
assert_eq!(result, Some(codex_ollama::DEFAULT_OSS_MODEL));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_default_model_for_provider_unknown() {
|
||||
let result = get_default_model_for_oss_provider("unknown-provider");
|
||||
assert_eq!(result, None);
|
||||
}
|
||||
}
|
||||
@@ -22,6 +22,7 @@ chrono = { workspace = true, features = ["serde"] }
|
||||
codex-app-server-protocol = { workspace = true }
|
||||
codex-apply-patch = { workspace = true }
|
||||
codex-async-utils = { workspace = true }
|
||||
codex-execpolicy2 = { workspace = true }
|
||||
codex-file-search = { workspace = true }
|
||||
codex-git = { workspace = true }
|
||||
codex-keyring-store = { workspace = true }
|
||||
|
||||
@@ -318,8 +318,6 @@ For casual greetings, acknowledgements, or other one-off conversational messages
|
||||
|
||||
When using the shell, you must adhere to the following guidelines:
|
||||
|
||||
- The arguments to `shell` will be passed to execvp().
|
||||
- Always set the `workdir` param when using the shell function. Do not use `cd` unless absolutely necessary.
|
||||
- When searching for text or files, prefer using `rg` or `rg --files` respectively because `rg` is much faster than alternatives like `grep`. (If the `rg` command is not found, then use alternatives.)
|
||||
- Read files in chunks with a max chunk size of 250 lines. Do not use python scripts to attempt to output larger chunks of a file. Command line output will be truncated after 10 kilobytes or 256 lines of output, regardless of the command used.
|
||||
|
||||
|
||||
@@ -2,8 +2,6 @@ You are Codex, based on GPT-5. You are running as a coding agent in the Codex CL
|
||||
|
||||
## General
|
||||
|
||||
- The arguments to `shell` will be passed to execvp(). Most terminal commands should be prefixed with ["bash", "-lc"].
|
||||
- Always set the `workdir` param when using the shell function. Do not use `cd` unless absolutely necessary.
|
||||
- When searching for text or files, prefer using `rg` or `rg --files` respectively because `rg` is much faster than alternatives like `grep`. (If the `rg` command is not found, then use alternatives.)
|
||||
|
||||
## Editing constraints
|
||||
|
||||
@@ -23,6 +23,7 @@ pub use crate::auth::storage::AuthDotJson;
|
||||
use crate::auth::storage::AuthStorageBackend;
|
||||
use crate::auth::storage::create_auth_storage;
|
||||
use crate::config::Config;
|
||||
use crate::default_client::CodexHttpClient;
|
||||
use crate::error::RefreshTokenFailedError;
|
||||
use crate::error::RefreshTokenFailedReason;
|
||||
use crate::token_data::KnownPlan as InternalKnownPlan;
|
||||
@@ -271,7 +272,7 @@ impl CodexAuth {
|
||||
mode: AuthMode::ChatGPT,
|
||||
storage: create_auth_storage(PathBuf::new(), AuthCredentialsStoreMode::File),
|
||||
auth_dot_json,
|
||||
client: crate::client::http::create_client(),
|
||||
client: crate::default_client::create_client(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -286,7 +287,7 @@ impl CodexAuth {
|
||||
}
|
||||
|
||||
pub fn from_api_key(api_key: &str) -> Self {
|
||||
Self::from_api_key_with_client(api_key, crate::client::http::create_client())
|
||||
Self::from_api_key_with_client(api_key, crate::default_client::create_client())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -446,7 +447,7 @@ fn load_auth(
|
||||
auth_credentials_store_mode: AuthCredentialsStoreMode,
|
||||
) -> std::io::Result<Option<CodexAuth>> {
|
||||
if enable_codex_api_key_env && let Some(api_key) = read_codex_api_key_from_env() {
|
||||
let client = crate::client::http::create_client();
|
||||
let client = crate::default_client::create_client();
|
||||
return Ok(Some(CodexAuth::from_api_key_with_client(
|
||||
api_key.as_str(),
|
||||
client,
|
||||
@@ -455,7 +456,7 @@ fn load_auth(
|
||||
|
||||
let storage = create_auth_storage(codex_home.to_path_buf(), auth_credentials_store_mode);
|
||||
|
||||
let client = crate::client::http::create_client();
|
||||
let client = crate::default_client::create_client();
|
||||
let auth_dot_json = match storage.load()? {
|
||||
Some(auth) => auth,
|
||||
None => return Ok(None),
|
||||
@@ -631,7 +632,6 @@ fn refresh_token_endpoint() -> String {
|
||||
.unwrap_or_else(|_| REFRESH_TOKEN_URL.to_string())
|
||||
}
|
||||
|
||||
use crate::client::http::CodexHttpClient;
|
||||
use std::sync::RwLock;
|
||||
|
||||
/// Internal cached auth state.
|
||||
|
||||
@@ -1,16 +1,15 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::ModelProviderInfo;
|
||||
use crate::client::ResponseEvent;
|
||||
use crate::client::ResponseStream;
|
||||
use crate::client::http::CodexHttpClient;
|
||||
use crate::client::retry::RetryableStreamError;
|
||||
use crate::client::retry::retry_stream;
|
||||
use crate::client_common::Prompt;
|
||||
use crate::client_common::ResponseEvent;
|
||||
use crate::client_common::ResponseStream;
|
||||
use crate::default_client::CodexHttpClient;
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::ConnectionFailedError;
|
||||
use crate::error::ResponseStreamFailed;
|
||||
use crate::error::Result;
|
||||
use crate::error::RetryLimitReachedError;
|
||||
use crate::error::UnexpectedResponseError;
|
||||
use crate::model_family::ModelFamily;
|
||||
use crate::tools::spec::create_tools_json_for_chat_completions_api;
|
||||
@@ -22,12 +21,19 @@ use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use codex_protocol::models::ReasoningItemContent;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::SessionSource;
|
||||
use codex_protocol::protocol::SubAgentSource;
|
||||
use eventsource_stream::Eventsource;
|
||||
use futures::Stream;
|
||||
use futures::StreamExt;
|
||||
use futures::TryStreamExt;
|
||||
use reqwest::StatusCode;
|
||||
use serde_json::json;
|
||||
use std::pin::Pin;
|
||||
use std::task::Context;
|
||||
use std::task::Poll;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::time::timeout;
|
||||
use tracing::debug;
|
||||
use tracing::trace;
|
||||
|
||||
/// Implementation for the classic Chat Completions API.
|
||||
@@ -49,7 +55,7 @@ pub(crate) async fn stream_chat_completions(
|
||||
let mut messages = Vec::<serde_json::Value>::new();
|
||||
|
||||
let full_instructions = prompt.get_full_instructions(model_family);
|
||||
messages.push(json!({ "role": "system", "content": full_instructions }));
|
||||
messages.push(json!({"role": "system", "content": full_instructions}));
|
||||
|
||||
let input = prompt.get_formatted_input();
|
||||
|
||||
@@ -122,7 +128,7 @@ pub(crate) async fn stream_chat_completions(
|
||||
{
|
||||
reasoning_by_anchor_index
|
||||
.entry(idx - 1)
|
||||
.and_modify(|v| v.push_str(text.as_str()))
|
||||
.and_modify(|v| v.push_str(&text))
|
||||
.or_insert(text.clone());
|
||||
attached = true;
|
||||
}
|
||||
@@ -133,13 +139,13 @@ pub(crate) async fn stream_chat_completions(
|
||||
ResponseItem::FunctionCall { .. } | ResponseItem::LocalShellCall { .. } => {
|
||||
reasoning_by_anchor_index
|
||||
.entry(idx + 1)
|
||||
.and_modify(|v| v.push_str(text.as_str()))
|
||||
.and_modify(|v| v.push_str(&text))
|
||||
.or_insert(text.clone());
|
||||
}
|
||||
ResponseItem::Message { role, .. } if role == "assistant" => {
|
||||
reasoning_by_anchor_index
|
||||
.entry(idx + 1)
|
||||
.and_modify(|v| v.push_str(text.as_str()))
|
||||
.and_modify(|v| v.push_str(&text))
|
||||
.or_insert(text.clone());
|
||||
}
|
||||
_ => {}
|
||||
@@ -197,18 +203,13 @@ pub(crate) async fn stream_chat_completions(
|
||||
json!(text)
|
||||
};
|
||||
|
||||
let mut msg = json!({
|
||||
"role": role,
|
||||
"content": content_value
|
||||
});
|
||||
|
||||
let mut msg = json!({"role": role, "content": content_value});
|
||||
if role == "assistant"
|
||||
&& let Some(reasoning) = reasoning_by_anchor_index.get(&idx)
|
||||
&& let Some(obj) = msg.as_object_mut()
|
||||
{
|
||||
obj.insert("reasoning".to_string(), json!(reasoning));
|
||||
}
|
||||
|
||||
messages.push(msg);
|
||||
}
|
||||
ResponseItem::FunctionCall {
|
||||
@@ -227,24 +228,22 @@ pub(crate) async fn stream_chat_completions(
|
||||
"name": name,
|
||||
"arguments": arguments,
|
||||
}
|
||||
}],
|
||||
}]
|
||||
});
|
||||
|
||||
if let Some(reasoning) = reasoning_by_anchor_index.get(&idx)
|
||||
&& let Some(obj) = msg.as_object_mut()
|
||||
{
|
||||
obj.insert("reasoning".to_string(), json!(reasoning));
|
||||
}
|
||||
|
||||
messages.push(msg);
|
||||
}
|
||||
|
||||
ResponseItem::LocalShellCall {
|
||||
id,
|
||||
call_id: _,
|
||||
status,
|
||||
action,
|
||||
} => {
|
||||
// Confirm with API team.
|
||||
let mut msg = json!({
|
||||
"role": "assistant",
|
||||
"content": null,
|
||||
@@ -255,16 +254,13 @@ pub(crate) async fn stream_chat_completions(
|
||||
"action": action,
|
||||
}]
|
||||
});
|
||||
|
||||
if let Some(reasoning) = reasoning_by_anchor_index.get(&idx)
|
||||
&& let Some(obj) = msg.as_object_mut()
|
||||
{
|
||||
obj.insert("reasoning".to_string(), json!(reasoning));
|
||||
}
|
||||
|
||||
messages.push(msg);
|
||||
}
|
||||
|
||||
ResponseItem::FunctionCallOutput { call_id, output } => {
|
||||
// Prefer structured content items when available (e.g., images)
|
||||
// otherwise fall back to the legacy plain-string content.
|
||||
@@ -291,7 +287,6 @@ pub(crate) async fn stream_chat_completions(
|
||||
"content": content_value,
|
||||
}));
|
||||
}
|
||||
|
||||
ResponseItem::CustomToolCall {
|
||||
id,
|
||||
call_id: _,
|
||||
@@ -309,7 +304,7 @@ pub(crate) async fn stream_chat_completions(
|
||||
"name": name,
|
||||
"input": input,
|
||||
}
|
||||
}],
|
||||
}]
|
||||
}));
|
||||
}
|
||||
ResponseItem::CustomToolCallOutput { call_id, output } => {
|
||||
@@ -319,148 +314,117 @@ pub(crate) async fn stream_chat_completions(
|
||||
"content": output,
|
||||
}));
|
||||
}
|
||||
|
||||
ResponseItem::Reasoning { .. } => {
|
||||
// Omit from conversation history; reasoning is attached to anchors above.
|
||||
continue;
|
||||
}
|
||||
|
||||
ResponseItem::WebSearchCall { .. } | ResponseItem::Other => {
|
||||
continue;
|
||||
}
|
||||
|
||||
ResponseItem::GhostSnapshot { .. } => {
|
||||
// Ghost snapshots annotate history but are not sent to the model.
|
||||
continue;
|
||||
}
|
||||
ResponseItem::Reasoning { .. }
|
||||
| ResponseItem::WebSearchCall { .. }
|
||||
| ResponseItem::Other => {
|
||||
// Omit these items from the conversation history.
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let tools_json = create_tools_json_for_chat_completions_api(&prompt.tools)?;
|
||||
|
||||
let mut body = json!({
|
||||
let payload = json!({
|
||||
"model": model_family.slug,
|
||||
"messages": messages,
|
||||
"stream": true,
|
||||
"stream_options": {
|
||||
"include_usage": true,
|
||||
},
|
||||
"tools": tools_json,
|
||||
});
|
||||
|
||||
if !tools_json.is_empty() {
|
||||
body["tools"] = json!(tools_json);
|
||||
body["tool_choice"] = json!("auto");
|
||||
}
|
||||
|
||||
if let SessionSource::SubAgent(sub) = session_source {
|
||||
let subagent = crate::client::types::subagent_label(sub);
|
||||
body["metadata"] = json!({
|
||||
"x-openai-subagent": subagent,
|
||||
});
|
||||
}
|
||||
|
||||
let max_attempts = provider.request_max_retries();
|
||||
retry_stream(max_attempts, |attempt| {
|
||||
let body = body.clone();
|
||||
async move {
|
||||
stream_single_chat_completion(attempt, client, provider, otel_event_manager, body)
|
||||
.await
|
||||
.map_err(ChatStreamError::Retryable)
|
||||
}
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
async fn stream_single_chat_completion(
|
||||
attempt: u64,
|
||||
client: &CodexHttpClient,
|
||||
provider: &ModelProviderInfo,
|
||||
otel_event_manager: &OtelEventManager,
|
||||
body: serde_json::Value,
|
||||
) -> Result<ResponseStream> {
|
||||
trace!(
|
||||
debug!(
|
||||
"POST to {}: {}",
|
||||
provider.get_full_url(&None),
|
||||
body.to_string()
|
||||
payload.to_string()
|
||||
);
|
||||
|
||||
let mut req_builder = provider.create_request_builder(client, &None).await?;
|
||||
req_builder = req_builder
|
||||
.header(reqwest::header::ACCEPT, "text/event-stream")
|
||||
.json(&body);
|
||||
let mut attempt = 0;
|
||||
let max_retries = provider.request_max_retries();
|
||||
loop {
|
||||
attempt += 1;
|
||||
|
||||
let res = otel_event_manager
|
||||
.log_request(attempt, || req_builder.send())
|
||||
.await;
|
||||
let mut req_builder = provider.create_request_builder(client, &None).await?;
|
||||
|
||||
let mut request_id = None;
|
||||
if let Ok(resp) = &res {
|
||||
request_id = resp
|
||||
.headers()
|
||||
.get("cf-ray")
|
||||
.map(|v| v.to_str().unwrap_or_default().to_string());
|
||||
}
|
||||
|
||||
match res {
|
||||
Ok(resp) if resp.status().is_success() => {
|
||||
let (tx_event, rx_event) = mpsc::channel::<Result<ResponseEvent>>(1600);
|
||||
|
||||
// spawn task to process SSE
|
||||
let stream = resp.bytes_stream().map_err(move |e| {
|
||||
CodexErr::ResponseStreamFailed(ResponseStreamFailed {
|
||||
source: e,
|
||||
request_id: request_id.clone(),
|
||||
})
|
||||
});
|
||||
tokio::spawn(process_chat_sse(
|
||||
stream,
|
||||
tx_event,
|
||||
provider.stream_idle_timeout(),
|
||||
otel_event_manager.clone(),
|
||||
));
|
||||
|
||||
Ok(ResponseStream { rx_event })
|
||||
// Include subagent header only for subagent sessions.
|
||||
if let SessionSource::SubAgent(sub) = session_source.clone() {
|
||||
let subagent = if let SubAgentSource::Other(label) = sub {
|
||||
label
|
||||
} else {
|
||||
serde_json::to_value(&sub)
|
||||
.ok()
|
||||
.and_then(|v| v.as_str().map(std::string::ToString::to_string))
|
||||
.unwrap_or_else(|| "other".to_string())
|
||||
};
|
||||
req_builder = req_builder.header("x-openai-subagent", subagent);
|
||||
}
|
||||
Ok(res) => {
|
||||
let status = res.status();
|
||||
|
||||
if !(status == StatusCode::TOO_MANY_REQUESTS
|
||||
|| status == StatusCode::UNAUTHORIZED
|
||||
|| status.is_server_error())
|
||||
{
|
||||
// Surface the error body to callers. Use `unwrap_or_default` per Clippy.
|
||||
let body = res.text().await.unwrap_or_default();
|
||||
return Err(CodexErr::UnexpectedStatus(UnexpectedResponseError {
|
||||
status,
|
||||
body,
|
||||
request_id: None,
|
||||
}));
|
||||
let res = otel_event_manager
|
||||
.log_request(attempt, || {
|
||||
req_builder
|
||||
.header(reqwest::header::ACCEPT, "text/event-stream")
|
||||
.json(&payload)
|
||||
.send()
|
||||
})
|
||||
.await;
|
||||
|
||||
match res {
|
||||
Ok(resp) if resp.status().is_success() => {
|
||||
let (tx_event, rx_event) = mpsc::channel::<Result<ResponseEvent>>(1600);
|
||||
let stream = resp.bytes_stream().map_err(|e| {
|
||||
CodexErr::ResponseStreamFailed(ResponseStreamFailed {
|
||||
source: e,
|
||||
request_id: None,
|
||||
})
|
||||
});
|
||||
tokio::spawn(process_chat_sse(
|
||||
stream,
|
||||
tx_event,
|
||||
provider.stream_idle_timeout(),
|
||||
otel_event_manager.clone(),
|
||||
));
|
||||
return Ok(ResponseStream { rx_event });
|
||||
}
|
||||
Ok(res) => {
|
||||
let status = res.status();
|
||||
if !(status == StatusCode::TOO_MANY_REQUESTS || status.is_server_error()) {
|
||||
let body = (res.text().await).unwrap_or_default();
|
||||
return Err(CodexErr::UnexpectedStatus(UnexpectedResponseError {
|
||||
status,
|
||||
body,
|
||||
request_id: None,
|
||||
}));
|
||||
}
|
||||
|
||||
Err(CodexErr::UnexpectedStatus(UnexpectedResponseError {
|
||||
status,
|
||||
body: String::new(),
|
||||
request_id,
|
||||
}))
|
||||
}
|
||||
Err(e) => Err(CodexErr::ConnectionFailed(ConnectionFailedError {
|
||||
source: e,
|
||||
})),
|
||||
}
|
||||
}
|
||||
if attempt > max_retries {
|
||||
return Err(CodexErr::RetryLimit(RetryLimitReachedError {
|
||||
status,
|
||||
request_id: None,
|
||||
}));
|
||||
}
|
||||
|
||||
enum ChatStreamError {
|
||||
Retryable(CodexErr),
|
||||
}
|
||||
let retry_after_secs = res
|
||||
.headers()
|
||||
.get(reqwest::header::RETRY_AFTER)
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.and_then(|s| s.parse::<u64>().ok());
|
||||
|
||||
impl RetryableStreamError for ChatStreamError {
|
||||
fn delay(&self, attempt: u64) -> Option<Duration> {
|
||||
Some(backoff(attempt))
|
||||
}
|
||||
|
||||
fn into_error(self) -> CodexErr {
|
||||
match self {
|
||||
ChatStreamError::Retryable(e) => e,
|
||||
let delay = retry_after_secs
|
||||
.map(|s| Duration::from_millis(s * 1_000))
|
||||
.unwrap_or_else(|| backoff(attempt));
|
||||
tokio::time::sleep(delay).await;
|
||||
}
|
||||
Err(e) => {
|
||||
if attempt > max_retries {
|
||||
return Err(CodexErr::ConnectionFailed(ConnectionFailedError {
|
||||
source: e,
|
||||
}));
|
||||
}
|
||||
let delay = backoff(attempt);
|
||||
tokio::time::sleep(delay).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -524,7 +488,9 @@ async fn append_reasoning_text(
|
||||
.await;
|
||||
}
|
||||
}
|
||||
|
||||
/// Lightweight SSE processor for the Chat Completions streaming format. The
|
||||
/// output is mapped onto Codex's internal [`ResponseEvent`] so that the rest
|
||||
/// of the pipeline can stay agnostic of the underlying wire format.
|
||||
async fn process_chat_sse<S>(
|
||||
stream: S,
|
||||
tx_event: mpsc::Sender<Result<ResponseEvent>>,
|
||||
@@ -553,15 +519,21 @@ async fn process_chat_sse<S>(
|
||||
let mut reasoning_item: Option<ResponseItem> = None;
|
||||
|
||||
loop {
|
||||
let sse = match crate::client::sse::next_sse_event(
|
||||
&mut stream,
|
||||
idle_timeout,
|
||||
&otel_event_manager,
|
||||
)
|
||||
.await
|
||||
{
|
||||
crate::client::sse::SseNext::Event(ev) => ev,
|
||||
crate::client::sse::SseNext::Eof => {
|
||||
let start = std::time::Instant::now();
|
||||
let response = timeout(idle_timeout, stream.next()).await;
|
||||
let duration = start.elapsed();
|
||||
otel_event_manager.log_sse_event(&response, duration);
|
||||
|
||||
let sse = match response {
|
||||
Ok(Some(Ok(ev))) => ev,
|
||||
Ok(Some(Err(e))) => {
|
||||
let _ = tx_event
|
||||
.send(Err(CodexErr::Stream(e.to_string(), None)))
|
||||
.await;
|
||||
return;
|
||||
}
|
||||
Ok(None) => {
|
||||
// Stream closed gracefully – emit Completed with dummy id.
|
||||
let _ = tx_event
|
||||
.send(Ok(ResponseEvent::Completed {
|
||||
response_id: String::new(),
|
||||
@@ -570,11 +542,7 @@ async fn process_chat_sse<S>(
|
||||
.await;
|
||||
return;
|
||||
}
|
||||
crate::client::sse::SseNext::StreamError(message) => {
|
||||
let _ = tx_event.send(Err(CodexErr::Stream(message, None))).await;
|
||||
return;
|
||||
}
|
||||
crate::client::sse::SseNext::Timeout => {
|
||||
Err(_) => {
|
||||
let _ = tx_event
|
||||
.send(Err(CodexErr::Stream(
|
||||
"idle timeout waiting for SSE".into(),
|
||||
@@ -756,3 +724,256 @@ async fn process_chat_sse<S>(
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Optional client-side aggregation helper
|
||||
///
|
||||
/// Stream adapter that merges the incremental `OutputItemDone` chunks coming from
|
||||
/// [`process_chat_sse`] into a *running* assistant message, **suppressing the
|
||||
/// per-token deltas**. The stream stays silent while the model is thinking
|
||||
/// and only emits two events per turn:
|
||||
///
|
||||
/// 1. `ResponseEvent::OutputItemDone` with the *complete* assistant message
|
||||
/// (fully concatenated).
|
||||
/// 2. The original `ResponseEvent::Completed` right after it.
|
||||
///
|
||||
/// This mirrors the behaviour the TypeScript CLI exposes to its higher layers.
|
||||
///
|
||||
/// The adapter is intentionally *lossless*: callers who do **not** opt in via
|
||||
/// [`AggregateStreamExt::aggregate()`] keep receiving the original unmodified
|
||||
/// events.
|
||||
#[derive(Copy, Clone, Eq, PartialEq)]
|
||||
enum AggregateMode {
|
||||
AggregatedOnly,
|
||||
Streaming,
|
||||
}
|
||||
pub(crate) struct AggregatedChatStream<S> {
|
||||
inner: S,
|
||||
cumulative: String,
|
||||
cumulative_reasoning: String,
|
||||
pending: std::collections::VecDeque<ResponseEvent>,
|
||||
mode: AggregateMode,
|
||||
}
|
||||
|
||||
impl<S> Stream for AggregatedChatStream<S>
|
||||
where
|
||||
S: Stream<Item = Result<ResponseEvent>> + Unpin,
|
||||
{
|
||||
type Item = Result<ResponseEvent>;
|
||||
|
||||
fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||
let this = self.get_mut();
|
||||
|
||||
// First, flush any buffered events from the previous call.
|
||||
if let Some(ev) = this.pending.pop_front() {
|
||||
return Poll::Ready(Some(Ok(ev)));
|
||||
}
|
||||
|
||||
loop {
|
||||
match Pin::new(&mut this.inner).poll_next(cx) {
|
||||
Poll::Pending => return Poll::Pending,
|
||||
Poll::Ready(None) => return Poll::Ready(None),
|
||||
Poll::Ready(Some(Err(e))) => return Poll::Ready(Some(Err(e))),
|
||||
Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(item)))) => {
|
||||
// If this is an incremental assistant message chunk, accumulate but
|
||||
// do NOT emit yet. Forward any other item (e.g. FunctionCall) right
|
||||
// away so downstream consumers see it.
|
||||
|
||||
let is_assistant_message = matches!(
|
||||
&item,
|
||||
codex_protocol::models::ResponseItem::Message { role, .. } if role == "assistant"
|
||||
);
|
||||
|
||||
if is_assistant_message {
|
||||
match this.mode {
|
||||
AggregateMode::AggregatedOnly => {
|
||||
// Only use the final assistant message if we have not
|
||||
// seen any deltas; otherwise, deltas already built the
|
||||
// cumulative text and this would duplicate it.
|
||||
if this.cumulative.is_empty()
|
||||
&& let codex_protocol::models::ResponseItem::Message {
|
||||
content,
|
||||
..
|
||||
} = &item
|
||||
&& let Some(text) = content.iter().find_map(|c| match c {
|
||||
codex_protocol::models::ContentItem::OutputText {
|
||||
text,
|
||||
} => Some(text),
|
||||
_ => None,
|
||||
})
|
||||
{
|
||||
this.cumulative.push_str(text);
|
||||
}
|
||||
// Swallow assistant message here; emit on Completed.
|
||||
continue;
|
||||
}
|
||||
AggregateMode::Streaming => {
|
||||
// In streaming mode, if we have not seen any deltas, forward
|
||||
// the final assistant message directly. If deltas were seen,
|
||||
// suppress the final message to avoid duplication.
|
||||
if this.cumulative.is_empty() {
|
||||
return Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(
|
||||
item,
|
||||
))));
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Not an assistant message – forward immediately.
|
||||
return Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(item))));
|
||||
}
|
||||
Poll::Ready(Some(Ok(ResponseEvent::RateLimits(snapshot)))) => {
|
||||
return Poll::Ready(Some(Ok(ResponseEvent::RateLimits(snapshot))));
|
||||
}
|
||||
Poll::Ready(Some(Ok(ResponseEvent::Completed {
|
||||
response_id,
|
||||
token_usage,
|
||||
}))) => {
|
||||
// Build any aggregated items in the correct order: Reasoning first, then Message.
|
||||
let mut emitted_any = false;
|
||||
|
||||
if !this.cumulative_reasoning.is_empty()
|
||||
&& matches!(this.mode, AggregateMode::AggregatedOnly)
|
||||
{
|
||||
let aggregated_reasoning =
|
||||
codex_protocol::models::ResponseItem::Reasoning {
|
||||
id: String::new(),
|
||||
summary: Vec::new(),
|
||||
content: Some(vec![
|
||||
codex_protocol::models::ReasoningItemContent::ReasoningText {
|
||||
text: std::mem::take(&mut this.cumulative_reasoning),
|
||||
},
|
||||
]),
|
||||
encrypted_content: None,
|
||||
};
|
||||
this.pending
|
||||
.push_back(ResponseEvent::OutputItemDone(aggregated_reasoning));
|
||||
emitted_any = true;
|
||||
}
|
||||
|
||||
// Always emit the final aggregated assistant message when any
|
||||
// content deltas have been observed. In AggregatedOnly mode this
|
||||
// is the sole assistant output; in Streaming mode this finalizes
|
||||
// the streamed deltas into a terminal OutputItemDone so callers
|
||||
// can persist/render the message once per turn.
|
||||
if !this.cumulative.is_empty() {
|
||||
let aggregated_message = codex_protocol::models::ResponseItem::Message {
|
||||
id: None,
|
||||
role: "assistant".to_string(),
|
||||
content: vec![codex_protocol::models::ContentItem::OutputText {
|
||||
text: std::mem::take(&mut this.cumulative),
|
||||
}],
|
||||
};
|
||||
this.pending
|
||||
.push_back(ResponseEvent::OutputItemDone(aggregated_message));
|
||||
emitted_any = true;
|
||||
}
|
||||
|
||||
// Always emit Completed last when anything was aggregated.
|
||||
if emitted_any {
|
||||
this.pending.push_back(ResponseEvent::Completed {
|
||||
response_id: response_id.clone(),
|
||||
token_usage: token_usage.clone(),
|
||||
});
|
||||
// Return the first pending event now.
|
||||
if let Some(ev) = this.pending.pop_front() {
|
||||
return Poll::Ready(Some(Ok(ev)));
|
||||
}
|
||||
}
|
||||
|
||||
// Nothing aggregated – forward Completed directly.
|
||||
return Poll::Ready(Some(Ok(ResponseEvent::Completed {
|
||||
response_id,
|
||||
token_usage,
|
||||
})));
|
||||
}
|
||||
Poll::Ready(Some(Ok(ResponseEvent::Created))) => {
|
||||
// These events are exclusive to the Responses API and
|
||||
// will never appear in a Chat Completions stream.
|
||||
continue;
|
||||
}
|
||||
Poll::Ready(Some(Ok(ResponseEvent::OutputTextDelta(delta)))) => {
|
||||
// Always accumulate deltas so we can emit a final OutputItemDone at Completed.
|
||||
this.cumulative.push_str(&delta);
|
||||
if matches!(this.mode, AggregateMode::Streaming) {
|
||||
// In streaming mode, also forward the delta immediately.
|
||||
return Poll::Ready(Some(Ok(ResponseEvent::OutputTextDelta(delta))));
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Poll::Ready(Some(Ok(ResponseEvent::ReasoningContentDelta {
|
||||
delta,
|
||||
content_index,
|
||||
}))) => {
|
||||
// Always accumulate reasoning deltas so we can emit a final Reasoning item at Completed.
|
||||
this.cumulative_reasoning.push_str(&delta);
|
||||
if matches!(this.mode, AggregateMode::Streaming) {
|
||||
// In streaming mode, also forward the delta immediately.
|
||||
return Poll::Ready(Some(Ok(ResponseEvent::ReasoningContentDelta {
|
||||
delta,
|
||||
content_index,
|
||||
})));
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Poll::Ready(Some(Ok(ResponseEvent::ReasoningSummaryDelta { .. }))) => {
|
||||
continue;
|
||||
}
|
||||
Poll::Ready(Some(Ok(ResponseEvent::ReasoningSummaryPartAdded { .. }))) => {
|
||||
continue;
|
||||
}
|
||||
Poll::Ready(Some(Ok(ResponseEvent::OutputItemAdded(item)))) => {
|
||||
return Poll::Ready(Some(Ok(ResponseEvent::OutputItemAdded(item))));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Extension trait that activates aggregation on any stream of [`ResponseEvent`].
|
||||
pub(crate) trait AggregateStreamExt: Stream<Item = Result<ResponseEvent>> + Sized {
|
||||
/// Returns a new stream that emits **only** the final assistant message
|
||||
/// per turn instead of every incremental delta. The produced
|
||||
/// `ResponseEvent` sequence for a typical text turn looks like:
|
||||
///
|
||||
/// ```ignore
|
||||
/// OutputItemDone(<full message>)
|
||||
/// Completed
|
||||
/// ```
|
||||
///
|
||||
/// No other `OutputItemDone` events will be seen by the caller.
|
||||
///
|
||||
/// Usage:
|
||||
///
|
||||
/// ```ignore
|
||||
/// let agg_stream = client.stream(&prompt).await?.aggregate();
|
||||
/// while let Some(event) = agg_stream.next().await {
|
||||
/// // event now contains cumulative text
|
||||
/// }
|
||||
/// ```
|
||||
fn aggregate(self) -> AggregatedChatStream<Self> {
|
||||
AggregatedChatStream::new(self, AggregateMode::AggregatedOnly)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> AggregateStreamExt for T where T: Stream<Item = Result<ResponseEvent>> + Sized {}
|
||||
|
||||
impl<S> AggregatedChatStream<S> {
|
||||
fn new(inner: S, mode: AggregateMode) -> Self {
|
||||
AggregatedChatStream {
|
||||
inner,
|
||||
cumulative: String::new(),
|
||||
cumulative_reasoning: String::new(),
|
||||
pending: std::collections::VecDeque::new(),
|
||||
mode,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn streaming_mode(inner: S) -> Self {
|
||||
Self::new(inner, AggregateMode::Streaming)
|
||||
}
|
||||
}
|
||||
1540
codex-rs/core/src/client.rs
Normal file
1540
codex-rs/core/src/client.rs
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,235 +0,0 @@
|
||||
use std::pin::Pin;
|
||||
use std::task::Context;
|
||||
use std::task::Poll;
|
||||
|
||||
use crate::client::ResponseEvent;
|
||||
use crate::error::Result;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::ReasoningItemContent;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use futures::Stream;
|
||||
|
||||
/// Optional client-side aggregation helper
|
||||
///
|
||||
/// Stream adapter that merges the incremental `OutputItemDone` chunks coming from
|
||||
/// the chat SSE decoder into a *running* assistant message, **suppressing the
|
||||
/// per-token deltas**. The stream stays silent while the model is thinking and
|
||||
/// only emits two events per turn:
|
||||
///
|
||||
/// 1. `ResponseEvent::OutputItemDone` with the *complete* assistant message
|
||||
/// (fully concatenated).
|
||||
/// 2. The original `ResponseEvent::Completed` right after it.
|
||||
///
|
||||
/// The adapter is intentionally *lossless*: callers who do **not** opt in via
|
||||
/// [`AggregateStreamExt::aggregate()`] keep receiving the original unmodified
|
||||
/// events.
|
||||
#[derive(Copy, Clone, Eq, PartialEq)]
|
||||
enum AggregateMode {
|
||||
AggregatedOnly,
|
||||
Streaming,
|
||||
}
|
||||
|
||||
pub(crate) struct AggregatedChatStream<S> {
|
||||
inner: S,
|
||||
cumulative: String,
|
||||
cumulative_reasoning: String,
|
||||
pending: std::collections::VecDeque<ResponseEvent>,
|
||||
mode: AggregateMode,
|
||||
}
|
||||
|
||||
impl<S> Stream for AggregatedChatStream<S>
|
||||
where
|
||||
S: Stream<Item = Result<ResponseEvent>> + Unpin,
|
||||
{
|
||||
type Item = Result<ResponseEvent>;
|
||||
|
||||
fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||
let this = self.get_mut();
|
||||
|
||||
// First, flush any buffered events from the previous call.
|
||||
if let Some(ev) = this.pending.pop_front() {
|
||||
return Poll::Ready(Some(Ok(ev)));
|
||||
}
|
||||
|
||||
loop {
|
||||
match Pin::new(&mut this.inner).poll_next(cx) {
|
||||
Poll::Pending => return Poll::Pending,
|
||||
Poll::Ready(None) => return Poll::Ready(None),
|
||||
Poll::Ready(Some(Err(e))) => return Poll::Ready(Some(Err(e))),
|
||||
Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(item)))) => {
|
||||
// If this is an incremental assistant message chunk, accumulate but
|
||||
// do NOT emit yet. Forward any other item (e.g. FunctionCall) right
|
||||
// away so downstream consumers see it.
|
||||
|
||||
let is_assistant_message = matches!(
|
||||
&item,
|
||||
ResponseItem::Message { role, .. } if role == "assistant"
|
||||
);
|
||||
|
||||
if is_assistant_message {
|
||||
match this.mode {
|
||||
AggregateMode::AggregatedOnly => {
|
||||
// Only use the final assistant message if we have not
|
||||
// seen any deltas; otherwise, deltas already built the
|
||||
// cumulative text and this would duplicate it.
|
||||
if this.cumulative.is_empty()
|
||||
&& let ResponseItem::Message { content, .. } = &item
|
||||
&& let Some(text) = content.iter().find_map(|c| match c {
|
||||
ContentItem::OutputText { text } => Some(text),
|
||||
_ => None,
|
||||
})
|
||||
{
|
||||
this.cumulative.push_str(text);
|
||||
}
|
||||
// Swallow assistant message here; emit on Completed.
|
||||
continue;
|
||||
}
|
||||
AggregateMode::Streaming => {
|
||||
// In streaming mode, if we have not seen any deltas, forward
|
||||
// the final assistant message directly. If deltas were seen,
|
||||
// suppress the final message to avoid duplication.
|
||||
if this.cumulative.is_empty() {
|
||||
return Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(
|
||||
item,
|
||||
))));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Not an assistant message – forward immediately.
|
||||
return Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(item))));
|
||||
}
|
||||
Poll::Ready(Some(Ok(ResponseEvent::RateLimits(snapshot)))) => {
|
||||
return Poll::Ready(Some(Ok(ResponseEvent::RateLimits(snapshot))));
|
||||
}
|
||||
Poll::Ready(Some(Ok(ResponseEvent::Completed {
|
||||
response_id,
|
||||
token_usage,
|
||||
}))) => {
|
||||
// Build any aggregated items in the correct order: Reasoning first, then Message.
|
||||
let mut emitted_any = false;
|
||||
|
||||
if !this.cumulative_reasoning.is_empty()
|
||||
&& matches!(this.mode, AggregateMode::AggregatedOnly)
|
||||
{
|
||||
let aggregated_reasoning = ResponseItem::Reasoning {
|
||||
id: String::new(),
|
||||
summary: Vec::new(),
|
||||
content: Some(vec![ReasoningItemContent::ReasoningText {
|
||||
text: std::mem::take(&mut this.cumulative_reasoning),
|
||||
}]),
|
||||
encrypted_content: None,
|
||||
};
|
||||
this.pending
|
||||
.push_back(ResponseEvent::OutputItemDone(aggregated_reasoning));
|
||||
emitted_any = true;
|
||||
}
|
||||
|
||||
// Always emit the final aggregated assistant message when any
|
||||
// content deltas have been observed. In AggregatedOnly mode this
|
||||
// is the sole assistant output; in Streaming mode this finalizes
|
||||
// the streamed deltas into a terminal OutputItemDone so callers
|
||||
// can persist/render the message once per turn.
|
||||
if !this.cumulative.is_empty() {
|
||||
let aggregated_message = ResponseItem::Message {
|
||||
id: None,
|
||||
role: "assistant".to_string(),
|
||||
content: vec![ContentItem::OutputText {
|
||||
text: std::mem::take(&mut this.cumulative),
|
||||
}],
|
||||
};
|
||||
this.pending
|
||||
.push_back(ResponseEvent::OutputItemDone(aggregated_message));
|
||||
emitted_any = true;
|
||||
}
|
||||
|
||||
// Always emit Completed last when anything was aggregated.
|
||||
if emitted_any {
|
||||
this.pending.push_back(ResponseEvent::Completed {
|
||||
response_id: response_id.clone(),
|
||||
token_usage: token_usage.clone(),
|
||||
});
|
||||
if let Some(ev) = this.pending.pop_front() {
|
||||
return Poll::Ready(Some(Ok(ev)));
|
||||
}
|
||||
}
|
||||
|
||||
// Nothing aggregated – forward Completed directly.
|
||||
return Poll::Ready(Some(Ok(ResponseEvent::Completed {
|
||||
response_id,
|
||||
token_usage,
|
||||
})));
|
||||
}
|
||||
Poll::Ready(Some(Ok(ResponseEvent::Created))) => {
|
||||
// These events are exclusive to the Responses API and
|
||||
// will never appear in a Chat Completions stream.
|
||||
continue;
|
||||
}
|
||||
Poll::Ready(Some(Ok(ResponseEvent::OutputTextDelta(delta)))) => {
|
||||
// Always accumulate deltas so we can emit a final OutputItemDone at Completed.
|
||||
this.cumulative.push_str(&delta);
|
||||
if matches!(this.mode, AggregateMode::Streaming) {
|
||||
// In streaming mode, also forward the delta immediately.
|
||||
return Poll::Ready(Some(Ok(ResponseEvent::OutputTextDelta(delta))));
|
||||
}
|
||||
}
|
||||
Poll::Ready(Some(Ok(ResponseEvent::ReasoningContentDelta {
|
||||
delta,
|
||||
content_index,
|
||||
}))) => {
|
||||
// Always accumulate reasoning deltas so we can emit a final Reasoning item at Completed.
|
||||
this.cumulative_reasoning.push_str(&delta);
|
||||
if matches!(this.mode, AggregateMode::Streaming) {
|
||||
// In streaming mode, also forward the delta immediately.
|
||||
return Poll::Ready(Some(Ok(ResponseEvent::ReasoningContentDelta {
|
||||
delta,
|
||||
content_index,
|
||||
})));
|
||||
}
|
||||
}
|
||||
Poll::Ready(Some(Ok(ResponseEvent::ReasoningSummaryDelta { .. }))) => {}
|
||||
Poll::Ready(Some(Ok(ResponseEvent::ReasoningSummaryPartAdded { .. }))) => {}
|
||||
Poll::Ready(Some(Ok(ResponseEvent::OutputItemAdded(item)))) => {
|
||||
return Poll::Ready(Some(Ok(ResponseEvent::OutputItemAdded(item))));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Extension trait that activates aggregation on any stream of [`ResponseEvent`].
|
||||
pub(crate) trait AggregateStreamExt: Stream<Item = Result<ResponseEvent>> + Sized {
|
||||
/// Returns a new stream that emits **only** the final assistant message
|
||||
/// per turn instead of every incremental delta. The produced
|
||||
/// `ResponseEvent` sequence for a typical text turn looks like:
|
||||
///
|
||||
/// ```ignore
|
||||
/// OutputItemDone(<full message>)
|
||||
/// Completed
|
||||
/// ```
|
||||
///
|
||||
/// No other `OutputItemDone` events will be seen by the caller.
|
||||
fn aggregate(self) -> AggregatedChatStream<Self> {
|
||||
AggregatedChatStream::new(self, AggregateMode::AggregatedOnly)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> AggregateStreamExt for T where T: Stream<Item = Result<ResponseEvent>> + Sized {}
|
||||
|
||||
impl<S> AggregatedChatStream<S> {
|
||||
fn new(inner: S, mode: AggregateMode) -> Self {
|
||||
AggregatedChatStream {
|
||||
inner,
|
||||
cumulative: String::new(),
|
||||
cumulative_reasoning: String::new(),
|
||||
pending: std::collections::VecDeque::new(),
|
||||
mode,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn streaming_mode(inner: S) -> Self {
|
||||
Self::new(inner, AggregateMode::Streaming)
|
||||
}
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
mod aggregation;
|
||||
mod chat_completions;
|
||||
pub mod http;
|
||||
mod rate_limits;
|
||||
mod responses;
|
||||
mod retry;
|
||||
mod sse;
|
||||
pub mod types;
|
||||
|
||||
pub(crate) use aggregation::AggregateStreamExt;
|
||||
pub(crate) use aggregation::AggregatedChatStream;
|
||||
pub(crate) use chat_completions::stream_chat_completions;
|
||||
pub use responses::ModelClient;
|
||||
pub(crate) use types::FreeformTool;
|
||||
pub(crate) use types::FreeformToolFormat;
|
||||
pub(crate) use types::Reasoning;
|
||||
pub use types::ResponseEvent;
|
||||
pub use types::ResponseStream;
|
||||
pub(crate) use types::ResponsesApiRequest;
|
||||
pub(crate) use types::ResponsesApiTool;
|
||||
pub(crate) use types::ToolSpec;
|
||||
pub(crate) use types::create_text_param_for_request;
|
||||
@@ -1,86 +0,0 @@
|
||||
use crate::protocol::RateLimitSnapshot;
|
||||
use crate::protocol::RateLimitWindow;
|
||||
use chrono::Utc;
|
||||
use reqwest::header::HeaderMap;
|
||||
|
||||
/// Prefer Codex-specific aggregate rate limit headers if present; fall back
|
||||
/// to raw OpenAI-style request headers otherwise.
|
||||
pub(crate) fn parse_rate_limit_snapshot(headers: &HeaderMap) -> Option<RateLimitSnapshot> {
|
||||
parse_codex_rate_limits(headers).or_else(|| parse_openai_rate_limits(headers))
|
||||
}
|
||||
|
||||
fn parse_codex_rate_limits(headers: &HeaderMap) -> Option<RateLimitSnapshot> {
|
||||
fn parse_f64(headers: &HeaderMap, name: &str) -> Option<f64> {
|
||||
headers
|
||||
.get(name)
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.and_then(|s| s.parse::<f64>().ok())
|
||||
}
|
||||
|
||||
fn parse_i64(headers: &HeaderMap, name: &str) -> Option<i64> {
|
||||
headers
|
||||
.get(name)
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.and_then(|s| s.parse::<i64>().ok())
|
||||
}
|
||||
|
||||
let primary_used = parse_f64(headers, "x-codex-primary-used-percent");
|
||||
let secondary_used = parse_f64(headers, "x-codex-secondary-used-percent");
|
||||
|
||||
if primary_used.is_none() && secondary_used.is_none() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let primary = primary_used.map(|used_percent| RateLimitWindow {
|
||||
used_percent,
|
||||
window_minutes: parse_i64(headers, "x-codex-primary-window-minutes"),
|
||||
resets_at: parse_i64(headers, "x-codex-primary-reset-at"),
|
||||
});
|
||||
|
||||
let secondary = secondary_used.map(|used_percent| RateLimitWindow {
|
||||
used_percent,
|
||||
window_minutes: parse_i64(headers, "x-codex-secondary-window-minutes"),
|
||||
resets_at: parse_i64(headers, "x-codex-secondary-reset-at"),
|
||||
});
|
||||
|
||||
Some(RateLimitSnapshot { primary, secondary })
|
||||
}
|
||||
|
||||
fn parse_openai_rate_limits(headers: &HeaderMap) -> Option<RateLimitSnapshot> {
|
||||
let limit = headers.get("x-ratelimit-limit-requests")?;
|
||||
let remaining = headers.get("x-ratelimit-remaining-requests")?;
|
||||
let reset_ms = headers.get("x-ratelimit-reset-requests")?;
|
||||
|
||||
let limit = limit.to_str().ok()?.parse::<f64>().ok()?;
|
||||
let remaining = remaining.to_str().ok()?.parse::<f64>().ok()?;
|
||||
let reset_ms = reset_ms.to_str().ok()?.parse::<i64>().ok()?;
|
||||
|
||||
if limit <= 0.0 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let used = (limit - remaining).max(0.0);
|
||||
let used_percent = (used / limit) * 100.0;
|
||||
|
||||
let window_minutes = if reset_ms <= 0 {
|
||||
None
|
||||
} else {
|
||||
let seconds = reset_ms / 1000;
|
||||
Some((seconds + 59) / 60)
|
||||
};
|
||||
|
||||
let resets_at = if reset_ms > 0 {
|
||||
Some(Utc::now().timestamp() + reset_ms / 1000)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Some(RateLimitSnapshot {
|
||||
primary: Some(RateLimitWindow {
|
||||
used_percent,
|
||||
window_minutes,
|
||||
resets_at,
|
||||
}),
|
||||
secondary: None,
|
||||
})
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,140 +0,0 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::Result;
|
||||
|
||||
/// Common interface for classifying stream start errors as retryable or fatal.
|
||||
pub(crate) trait RetryableStreamError {
|
||||
/// Returns a delay for the next retry attempt, or `None` if the error
|
||||
/// should be treated as fatal and not retried.
|
||||
fn delay(&self, attempt: u64) -> Option<Duration>;
|
||||
|
||||
/// Converts this error into the final `CodexErr` that should be surfaced
|
||||
/// to callers when retries are exhausted or the error is fatal.
|
||||
fn into_error(self) -> CodexErr;
|
||||
}
|
||||
|
||||
/// Helper to retry a streaming operation with provider-configured backoff.
|
||||
///
|
||||
/// The caller supplies an `attempt_fn` that is invoked once per attempt with
|
||||
/// the current attempt index in `[0, max_attempts]`. On success, the value is
|
||||
/// returned immediately. On error, the error's [`RetryableStreamError`]
|
||||
/// implementation decides whether to retry (with an optional delay) or to
|
||||
/// surface a final error.
|
||||
pub(crate) async fn retry_stream<F, Fut, T, E>(max_attempts: u64, mut attempt_fn: F) -> Result<T>
|
||||
where
|
||||
F: FnMut(u64) -> Fut,
|
||||
Fut: std::future::Future<Output = std::result::Result<T, E>>,
|
||||
E: RetryableStreamError,
|
||||
{
|
||||
for attempt in 0..=max_attempts {
|
||||
match attempt_fn(attempt).await {
|
||||
Ok(value) => return Ok(value),
|
||||
Err(err) => {
|
||||
let delay = err.delay(attempt);
|
||||
|
||||
// Fatal error or final attempt: surface to caller.
|
||||
if attempt == max_attempts || delay.is_none() {
|
||||
return Err(err.into_error());
|
||||
}
|
||||
|
||||
if let Some(duration) = delay {
|
||||
tokio::time::sleep(duration).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unreachable!("retry_stream should always return");
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[derive(Clone)]
|
||||
struct TestError {
|
||||
fatal: bool,
|
||||
}
|
||||
|
||||
impl RetryableStreamError for TestError {
|
||||
fn delay(&self, attempt: u64) -> Option<Duration> {
|
||||
if self.fatal {
|
||||
None
|
||||
} else {
|
||||
Some(Duration::from_millis(attempt * 10))
|
||||
}
|
||||
}
|
||||
|
||||
fn into_error(self) -> CodexErr {
|
||||
if self.fatal {
|
||||
CodexErr::InternalServerError
|
||||
} else {
|
||||
CodexErr::Io(std::io::Error::other("retryable"))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn retries_until_success_before_max_attempts() {
|
||||
let max_attempts = 3;
|
||||
|
||||
let result: Result<&str> = retry_stream(max_attempts, |attempt| async move {
|
||||
if attempt < 2 {
|
||||
Err(TestError { fatal: false })
|
||||
} else {
|
||||
Ok("ok")
|
||||
}
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(result.unwrap(), "ok");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn stops_on_fatal_error_without_retrying() {
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::AtomicUsize;
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
let calls = Arc::new(AtomicUsize::new(0));
|
||||
let calls_ref = calls.clone();
|
||||
|
||||
let result: Result<()> = retry_stream(5, move |_attempt| {
|
||||
let calls_ref = calls_ref.clone();
|
||||
async move {
|
||||
calls_ref.fetch_add(1, Ordering::SeqCst);
|
||||
Err(TestError { fatal: true })
|
||||
}
|
||||
})
|
||||
.await;
|
||||
|
||||
assert!(result.is_err());
|
||||
assert_eq!(calls.load(Ordering::SeqCst), 1);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn stops_after_max_attempts_for_retryable_errors() {
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::AtomicUsize;
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
let calls = Arc::new(AtomicUsize::new(0));
|
||||
let calls_ref = calls.clone();
|
||||
|
||||
let max_attempts = 2;
|
||||
|
||||
let result: Result<()> = retry_stream(max_attempts, move |_attempt| {
|
||||
let calls_ref = calls_ref.clone();
|
||||
async move {
|
||||
calls_ref.fetch_add(1, Ordering::SeqCst);
|
||||
Err(TestError { fatal: false })
|
||||
}
|
||||
})
|
||||
.await;
|
||||
|
||||
assert!(result.is_err());
|
||||
assert_eq!(calls.load(Ordering::SeqCst), (max_attempts + 1) as usize);
|
||||
}
|
||||
}
|
||||
@@ -1,46 +0,0 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use codex_otel::otel_event_manager::OtelEventManager;
|
||||
use eventsource_stream::Event;
|
||||
use eventsource_stream::EventStreamError as StreamError;
|
||||
use futures::Stream;
|
||||
use futures::StreamExt;
|
||||
use tokio::time::timeout;
|
||||
|
||||
/// Result of polling the next SSE event with timeout and logging applied.
|
||||
pub(crate) enum SseNext {
|
||||
Event(Event),
|
||||
Eof,
|
||||
StreamError(String),
|
||||
Timeout,
|
||||
}
|
||||
|
||||
/// Read the next SSE event from `stream`, applying an idle timeout and recording
|
||||
/// telemetry via `otel_event_manager`.
|
||||
///
|
||||
/// This helper centralizes the boilerplate for:
|
||||
/// - `tokio::time::timeout`
|
||||
/// - calling `log_sse_event`
|
||||
/// - mapping the different outcomes into a small enum that callers can
|
||||
/// interpret according to their own protocol semantics.
|
||||
pub(crate) async fn next_sse_event<S, E>(
|
||||
stream: &mut S,
|
||||
idle_timeout: Duration,
|
||||
otel_event_manager: &OtelEventManager,
|
||||
) -> SseNext
|
||||
where
|
||||
S: Stream<Item = Result<Event, StreamError<E>>> + Unpin,
|
||||
E: std::fmt::Display,
|
||||
{
|
||||
let start = tokio::time::Instant::now();
|
||||
let next_event = timeout(idle_timeout, stream.next()).await;
|
||||
let duration = start.elapsed();
|
||||
otel_event_manager.log_sse_event(&next_event, duration);
|
||||
|
||||
match next_event {
|
||||
Ok(Some(Ok(ev))) => SseNext::Event(ev),
|
||||
Ok(Some(Err(e))) => SseNext::StreamError(e.to_string()),
|
||||
Ok(None) => SseNext::Eof,
|
||||
Err(_) => SseNext::Timeout,
|
||||
}
|
||||
}
|
||||
@@ -1,327 +0,0 @@
|
||||
use crate::error::Result;
|
||||
use crate::protocol::RateLimitSnapshot;
|
||||
use crate::protocol::SubAgentSource;
|
||||
use crate::protocol::TokenUsage;
|
||||
use crate::tools::spec::JsonSchema;
|
||||
use codex_protocol::config_types::ReasoningEffort as ReasoningEffortConfig;
|
||||
use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
|
||||
use codex_protocol::config_types::Verbosity as VerbosityConfig;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use futures::Stream;
|
||||
use serde::Serialize;
|
||||
use serde_json::Value;
|
||||
use std::pin::Pin;
|
||||
use std::task::Context;
|
||||
use std::task::Poll;
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum ResponseEvent {
|
||||
Created,
|
||||
OutputItemDone(ResponseItem),
|
||||
OutputItemAdded(ResponseItem),
|
||||
Completed {
|
||||
response_id: String,
|
||||
token_usage: Option<TokenUsage>,
|
||||
},
|
||||
OutputTextDelta(String),
|
||||
ReasoningSummaryDelta {
|
||||
delta: String,
|
||||
summary_index: i64,
|
||||
},
|
||||
ReasoningContentDelta {
|
||||
delta: String,
|
||||
content_index: i64,
|
||||
},
|
||||
ReasoningSummaryPartAdded {
|
||||
summary_index: i64,
|
||||
},
|
||||
RateLimits(RateLimitSnapshot),
|
||||
}
|
||||
|
||||
pub(crate) fn subagent_label(sub: &SubAgentSource) -> String {
|
||||
if let SubAgentSource::Other(label) = sub {
|
||||
label.clone()
|
||||
} else {
|
||||
serde_json::to_value(sub)
|
||||
.ok()
|
||||
.and_then(|v| v.as_str().map(std::string::ToString::to_string))
|
||||
.unwrap_or_else(|| "other".to_string())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub(crate) struct Reasoning {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) effort: Option<ReasoningEffortConfig>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) summary: Option<ReasoningSummaryConfig>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Default, Clone)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub(crate) enum TextFormatType {
|
||||
#[default]
|
||||
JsonSchema,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Default, Clone)]
|
||||
pub(crate) struct TextFormat {
|
||||
pub(crate) r#type: TextFormatType,
|
||||
pub(crate) strict: bool,
|
||||
pub(crate) schema: Value,
|
||||
pub(crate) name: String,
|
||||
}
|
||||
|
||||
/// Controls under the `text` field in the Responses API for GPT-5.
|
||||
#[derive(Debug, Serialize, Default, Clone)]
|
||||
pub(crate) struct TextControls {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) verbosity: Option<OpenAiVerbosity>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) format: Option<TextFormat>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Default, Clone)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub(crate) enum OpenAiVerbosity {
|
||||
Low,
|
||||
#[default]
|
||||
Medium,
|
||||
High,
|
||||
}
|
||||
|
||||
impl From<VerbosityConfig> for OpenAiVerbosity {
|
||||
fn from(v: VerbosityConfig) -> Self {
|
||||
match v {
|
||||
VerbosityConfig::Low => OpenAiVerbosity::Low,
|
||||
VerbosityConfig::Medium => OpenAiVerbosity::Medium,
|
||||
VerbosityConfig::High => OpenAiVerbosity::High,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Request object that is serialized as JSON and POST'ed when using the
|
||||
/// Responses API.
|
||||
#[derive(Debug, Serialize)]
|
||||
pub(crate) struct ResponsesApiRequest<'a> {
|
||||
pub(crate) model: &'a str,
|
||||
pub(crate) instructions: &'a str,
|
||||
// TODO(mbolin): ResponseItem::Other should not be serialized. Currently,
|
||||
// we code defensively to avoid this case, but perhaps we should use a
|
||||
// separate enum for serialization.
|
||||
pub(crate) input: &'a Vec<ResponseItem>,
|
||||
pub(crate) tools: &'a [serde_json::Value],
|
||||
pub(crate) tool_choice: &'static str,
|
||||
pub(crate) parallel_tool_calls: bool,
|
||||
pub(crate) reasoning: Option<Reasoning>,
|
||||
pub(crate) store: bool,
|
||||
pub(crate) stream: bool,
|
||||
pub(crate) include: Vec<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) prompt_cache_key: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) text: Option<TextControls>,
|
||||
}
|
||||
|
||||
pub(crate) mod tools {
|
||||
use super::JsonSchema;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
|
||||
/// When serialized as JSON, this produces a valid "Tool" in the OpenAI
|
||||
/// Responses API.
|
||||
#[derive(Debug, Clone, Serialize, PartialEq)]
|
||||
#[serde(tag = "type")]
|
||||
pub(crate) enum ToolSpec {
|
||||
#[serde(rename = "function")]
|
||||
Function(ResponsesApiTool),
|
||||
#[serde(rename = "local_shell")]
|
||||
LocalShell {},
|
||||
// TODO: Understand why we get an error on web_search although the API docs say it's supported.
|
||||
// https://platform.openai.com/docs/guides/tools-web-search?api-mode=responses#:~:text=%7B%20type%3A%20%22web_search%22%20%7D%2C
|
||||
#[serde(rename = "web_search")]
|
||||
WebSearch {},
|
||||
#[serde(rename = "custom")]
|
||||
Freeform(FreeformTool),
|
||||
}
|
||||
|
||||
impl ToolSpec {
|
||||
pub(crate) fn name(&self) -> &str {
|
||||
match self {
|
||||
ToolSpec::Function(tool) => tool.name.as_str(),
|
||||
ToolSpec::LocalShell {} => "local_shell",
|
||||
ToolSpec::WebSearch {} => "web_search",
|
||||
ToolSpec::Freeform(tool) => tool.name.as_str(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
pub struct FreeformTool {
|
||||
pub(crate) name: String,
|
||||
pub(crate) description: String,
|
||||
pub(crate) format: FreeformToolFormat,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
pub struct FreeformToolFormat {
|
||||
pub(crate) r#type: String,
|
||||
pub(crate) syntax: String,
|
||||
pub(crate) definition: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, PartialEq)]
|
||||
pub struct ResponsesApiTool {
|
||||
pub(crate) name: String,
|
||||
pub(crate) description: String,
|
||||
/// TODO: Validation. When strict is set to true, the JSON schema,
|
||||
/// `required` and `additional_properties` must be present. All fields in
|
||||
/// `properties` must be present in `required`.
|
||||
pub(crate) strict: bool,
|
||||
pub(crate) parameters: JsonSchema,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) use tools::FreeformTool;
|
||||
pub(crate) use tools::FreeformToolFormat;
|
||||
pub(crate) use tools::ResponsesApiTool;
|
||||
pub(crate) use tools::ToolSpec;
|
||||
|
||||
pub(crate) fn create_text_param_for_request(
|
||||
verbosity: Option<VerbosityConfig>,
|
||||
output_schema: &Option<Value>,
|
||||
) -> Option<TextControls> {
|
||||
if verbosity.is_none() && output_schema.is_none() {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(TextControls {
|
||||
verbosity: verbosity.map(std::convert::Into::into),
|
||||
format: output_schema.as_ref().map(|schema| TextFormat {
|
||||
r#type: TextFormatType::JsonSchema,
|
||||
strict: true,
|
||||
schema: schema.clone(),
|
||||
name: "codex_output_schema".to_string(),
|
||||
}),
|
||||
})
|
||||
}
|
||||
|
||||
pub struct ResponseStream {
|
||||
pub(crate) rx_event: mpsc::Receiver<Result<ResponseEvent>>,
|
||||
}
|
||||
|
||||
impl Stream for ResponseStream {
|
||||
type Item = Result<ResponseEvent>;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||
self.rx_event.poll_recv(cx)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
fn serializes_text_verbosity_when_set() {
|
||||
let input: Vec<ResponseItem> = vec![];
|
||||
let tools: Vec<serde_json::Value> = vec![];
|
||||
let req = ResponsesApiRequest {
|
||||
model: "gpt-5",
|
||||
instructions: "i",
|
||||
input: &input,
|
||||
tools: &tools,
|
||||
tool_choice: "auto",
|
||||
parallel_tool_calls: true,
|
||||
reasoning: None,
|
||||
store: false,
|
||||
stream: true,
|
||||
include: vec![],
|
||||
prompt_cache_key: None,
|
||||
text: Some(TextControls {
|
||||
verbosity: Some(OpenAiVerbosity::Low),
|
||||
format: None,
|
||||
}),
|
||||
};
|
||||
|
||||
let v = serde_json::to_value(&req).expect("json");
|
||||
assert_eq!(
|
||||
v.get("text")
|
||||
.and_then(|t| t.get("verbosity"))
|
||||
.and_then(|s| s.as_str()),
|
||||
Some("low")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn serializes_text_schema_with_strict_format() {
|
||||
let input: Vec<ResponseItem> = vec![];
|
||||
let tools: Vec<serde_json::Value> = vec![];
|
||||
let schema = serde_json::json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"answer": {"type": "string"}
|
||||
},
|
||||
"required": ["answer"],
|
||||
});
|
||||
let text_controls =
|
||||
create_text_param_for_request(None, &Some(schema.clone())).expect("text controls");
|
||||
|
||||
let req = ResponsesApiRequest {
|
||||
model: "gpt-5",
|
||||
instructions: "i",
|
||||
input: &input,
|
||||
tools: &tools,
|
||||
tool_choice: "auto",
|
||||
parallel_tool_calls: true,
|
||||
reasoning: None,
|
||||
store: false,
|
||||
stream: true,
|
||||
include: vec![],
|
||||
prompt_cache_key: None,
|
||||
text: Some(text_controls),
|
||||
};
|
||||
|
||||
let v = serde_json::to_value(&req).expect("json");
|
||||
let text = v.get("text").expect("text field");
|
||||
assert!(text.get("verbosity").is_none());
|
||||
let format = text.get("format").expect("format field");
|
||||
|
||||
assert_eq!(
|
||||
format.get("name"),
|
||||
Some(&serde_json::Value::String("codex_output_schema".into()))
|
||||
);
|
||||
assert_eq!(
|
||||
format.get("type"),
|
||||
Some(&serde_json::Value::String("json_schema".into()))
|
||||
);
|
||||
assert_eq!(format.get("strict"), Some(&serde_json::Value::Bool(true)));
|
||||
assert_eq!(format.get("schema"), Some(&schema));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn omits_text_when_not_set() {
|
||||
let input: Vec<ResponseItem> = vec![];
|
||||
let tools: Vec<serde_json::Value> = vec![];
|
||||
let req = ResponsesApiRequest {
|
||||
model: "gpt-5",
|
||||
instructions: "i",
|
||||
input: &input,
|
||||
tools: &tools,
|
||||
tool_choice: "auto",
|
||||
parallel_tool_calls: true,
|
||||
reasoning: None,
|
||||
store: false,
|
||||
stream: true,
|
||||
include: vec![],
|
||||
prompt_cache_key: None,
|
||||
text: None,
|
||||
};
|
||||
|
||||
let v = serde_json::to_value(&req).expect("json");
|
||||
assert!(v.get("text").is_none());
|
||||
}
|
||||
}
|
||||
@@ -1,12 +1,24 @@
|
||||
use crate::client::ToolSpec;
|
||||
use crate::client_common::tools::ToolSpec;
|
||||
use crate::error::Result;
|
||||
use crate::model_family::ModelFamily;
|
||||
use crate::protocol::RateLimitSnapshot;
|
||||
use crate::protocol::TokenUsage;
|
||||
use codex_apply_patch::APPLY_PATCH_TOOL_INSTRUCTIONS;
|
||||
use codex_protocol::config_types::ReasoningEffort as ReasoningEffortConfig;
|
||||
use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
|
||||
use codex_protocol::config_types::Verbosity as VerbosityConfig;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use futures::Stream;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use serde_json::Value;
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashSet;
|
||||
use std::ops::Deref;
|
||||
use std::pin::Pin;
|
||||
use std::task::Context;
|
||||
use std::task::Poll;
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
/// Review thread system prompt. Edit `core/src/review_prompt.md` to customize.
|
||||
pub const REVIEW_PROMPT: &str = include_str!("../review_prompt.md");
|
||||
@@ -181,6 +193,194 @@ fn strip_total_output_header(output: &str) -> Option<&str> {
|
||||
Some(remainder)
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum ResponseEvent {
|
||||
Created,
|
||||
OutputItemDone(ResponseItem),
|
||||
OutputItemAdded(ResponseItem),
|
||||
Completed {
|
||||
response_id: String,
|
||||
token_usage: Option<TokenUsage>,
|
||||
},
|
||||
OutputTextDelta(String),
|
||||
ReasoningSummaryDelta {
|
||||
delta: String,
|
||||
summary_index: i64,
|
||||
},
|
||||
ReasoningContentDelta {
|
||||
delta: String,
|
||||
content_index: i64,
|
||||
},
|
||||
ReasoningSummaryPartAdded {
|
||||
summary_index: i64,
|
||||
},
|
||||
RateLimits(RateLimitSnapshot),
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub(crate) struct Reasoning {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) effort: Option<ReasoningEffortConfig>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) summary: Option<ReasoningSummaryConfig>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Default, Clone)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub(crate) enum TextFormatType {
|
||||
#[default]
|
||||
JsonSchema,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Default, Clone)]
|
||||
pub(crate) struct TextFormat {
|
||||
pub(crate) r#type: TextFormatType,
|
||||
pub(crate) strict: bool,
|
||||
pub(crate) schema: Value,
|
||||
pub(crate) name: String,
|
||||
}
|
||||
|
||||
/// Controls under the `text` field in the Responses API for GPT-5.
|
||||
#[derive(Debug, Serialize, Default, Clone)]
|
||||
pub(crate) struct TextControls {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) verbosity: Option<OpenAiVerbosity>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) format: Option<TextFormat>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Default, Clone)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub(crate) enum OpenAiVerbosity {
|
||||
Low,
|
||||
#[default]
|
||||
Medium,
|
||||
High,
|
||||
}
|
||||
|
||||
impl From<VerbosityConfig> for OpenAiVerbosity {
|
||||
fn from(v: VerbosityConfig) -> Self {
|
||||
match v {
|
||||
VerbosityConfig::Low => OpenAiVerbosity::Low,
|
||||
VerbosityConfig::Medium => OpenAiVerbosity::Medium,
|
||||
VerbosityConfig::High => OpenAiVerbosity::High,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Request object that is serialized as JSON and POST'ed when using the
|
||||
/// Responses API.
|
||||
#[derive(Debug, Serialize)]
|
||||
pub(crate) struct ResponsesApiRequest<'a> {
|
||||
pub(crate) model: &'a str,
|
||||
pub(crate) instructions: &'a str,
|
||||
// TODO(mbolin): ResponseItem::Other should not be serialized. Currently,
|
||||
// we code defensively to avoid this case, but perhaps we should use a
|
||||
// separate enum for serialization.
|
||||
pub(crate) input: &'a Vec<ResponseItem>,
|
||||
pub(crate) tools: &'a [serde_json::Value],
|
||||
pub(crate) tool_choice: &'static str,
|
||||
pub(crate) parallel_tool_calls: bool,
|
||||
pub(crate) reasoning: Option<Reasoning>,
|
||||
pub(crate) store: bool,
|
||||
pub(crate) stream: bool,
|
||||
pub(crate) include: Vec<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) prompt_cache_key: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) text: Option<TextControls>,
|
||||
}
|
||||
|
||||
pub(crate) mod tools {
|
||||
use crate::tools::spec::JsonSchema;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
|
||||
/// When serialized as JSON, this produces a valid "Tool" in the OpenAI
|
||||
/// Responses API.
|
||||
#[derive(Debug, Clone, Serialize, PartialEq)]
|
||||
#[serde(tag = "type")]
|
||||
pub(crate) enum ToolSpec {
|
||||
#[serde(rename = "function")]
|
||||
Function(ResponsesApiTool),
|
||||
#[serde(rename = "local_shell")]
|
||||
LocalShell {},
|
||||
// TODO: Understand why we get an error on web_search although the API docs say it's supported.
|
||||
// https://platform.openai.com/docs/guides/tools-web-search?api-mode=responses#:~:text=%7B%20type%3A%20%22web_search%22%20%7D%2C
|
||||
#[serde(rename = "web_search")]
|
||||
WebSearch {},
|
||||
#[serde(rename = "custom")]
|
||||
Freeform(FreeformTool),
|
||||
}
|
||||
|
||||
impl ToolSpec {
|
||||
pub(crate) fn name(&self) -> &str {
|
||||
match self {
|
||||
ToolSpec::Function(tool) => tool.name.as_str(),
|
||||
ToolSpec::LocalShell {} => "local_shell",
|
||||
ToolSpec::WebSearch {} => "web_search",
|
||||
ToolSpec::Freeform(tool) => tool.name.as_str(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
pub struct FreeformTool {
|
||||
pub(crate) name: String,
|
||||
pub(crate) description: String,
|
||||
pub(crate) format: FreeformToolFormat,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
pub struct FreeformToolFormat {
|
||||
pub(crate) r#type: String,
|
||||
pub(crate) syntax: String,
|
||||
pub(crate) definition: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, PartialEq)]
|
||||
pub struct ResponsesApiTool {
|
||||
pub(crate) name: String,
|
||||
pub(crate) description: String,
|
||||
/// TODO: Validation. When strict is set to true, the JSON schema,
|
||||
/// `required` and `additional_properties` must be present. All fields in
|
||||
/// `properties` must be present in `required`.
|
||||
pub(crate) strict: bool,
|
||||
pub(crate) parameters: JsonSchema,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn create_text_param_for_request(
|
||||
verbosity: Option<VerbosityConfig>,
|
||||
output_schema: &Option<Value>,
|
||||
) -> Option<TextControls> {
|
||||
if verbosity.is_none() && output_schema.is_none() {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(TextControls {
|
||||
verbosity: verbosity.map(std::convert::Into::into),
|
||||
format: output_schema.as_ref().map(|schema| TextFormat {
|
||||
r#type: TextFormatType::JsonSchema,
|
||||
strict: true,
|
||||
schema: schema.clone(),
|
||||
name: "codex_output_schema".to_string(),
|
||||
}),
|
||||
})
|
||||
}
|
||||
|
||||
pub struct ResponseStream {
|
||||
pub(crate) rx_event: mpsc::Receiver<Result<ResponseEvent>>,
|
||||
}
|
||||
|
||||
impl Stream for ResponseStream {
|
||||
type Item = Result<ResponseEvent>;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||
self.rx_event.poll_recv(cx)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::model_family::find_family_for_model;
|
||||
@@ -227,7 +427,7 @@ mod tests {
|
||||
expects_apply_patch_instructions: false,
|
||||
},
|
||||
InstructionsTestCase {
|
||||
slug: "gpt-5-codex",
|
||||
slug: "gpt-5.1-codex",
|
||||
expects_apply_patch_instructions: false,
|
||||
},
|
||||
InstructionsTestCase {
|
||||
@@ -251,4 +451,104 @@ mod tests {
|
||||
assert_eq!(full, expected);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn serializes_text_verbosity_when_set() {
|
||||
let input: Vec<ResponseItem> = vec![];
|
||||
let tools: Vec<serde_json::Value> = vec![];
|
||||
let req = ResponsesApiRequest {
|
||||
model: "gpt-5.1",
|
||||
instructions: "i",
|
||||
input: &input,
|
||||
tools: &tools,
|
||||
tool_choice: "auto",
|
||||
parallel_tool_calls: true,
|
||||
reasoning: None,
|
||||
store: false,
|
||||
stream: true,
|
||||
include: vec![],
|
||||
prompt_cache_key: None,
|
||||
text: Some(TextControls {
|
||||
verbosity: Some(OpenAiVerbosity::Low),
|
||||
format: None,
|
||||
}),
|
||||
};
|
||||
|
||||
let v = serde_json::to_value(&req).expect("json");
|
||||
assert_eq!(
|
||||
v.get("text")
|
||||
.and_then(|t| t.get("verbosity"))
|
||||
.and_then(|s| s.as_str()),
|
||||
Some("low")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn serializes_text_schema_with_strict_format() {
|
||||
let input: Vec<ResponseItem> = vec![];
|
||||
let tools: Vec<serde_json::Value> = vec![];
|
||||
let schema = serde_json::json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"answer": {"type": "string"}
|
||||
},
|
||||
"required": ["answer"],
|
||||
});
|
||||
let text_controls =
|
||||
create_text_param_for_request(None, &Some(schema.clone())).expect("text controls");
|
||||
|
||||
let req = ResponsesApiRequest {
|
||||
model: "gpt-5.1",
|
||||
instructions: "i",
|
||||
input: &input,
|
||||
tools: &tools,
|
||||
tool_choice: "auto",
|
||||
parallel_tool_calls: true,
|
||||
reasoning: None,
|
||||
store: false,
|
||||
stream: true,
|
||||
include: vec![],
|
||||
prompt_cache_key: None,
|
||||
text: Some(text_controls),
|
||||
};
|
||||
|
||||
let v = serde_json::to_value(&req).expect("json");
|
||||
let text = v.get("text").expect("text field");
|
||||
assert!(text.get("verbosity").is_none());
|
||||
let format = text.get("format").expect("format field");
|
||||
|
||||
assert_eq!(
|
||||
format.get("name"),
|
||||
Some(&serde_json::Value::String("codex_output_schema".into()))
|
||||
);
|
||||
assert_eq!(
|
||||
format.get("type"),
|
||||
Some(&serde_json::Value::String("json_schema".into()))
|
||||
);
|
||||
assert_eq!(format.get("strict"), Some(&serde_json::Value::Bool(true)));
|
||||
assert_eq!(format.get("schema"), Some(&schema));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn omits_text_when_not_set() {
|
||||
let input: Vec<ResponseItem> = vec![];
|
||||
let tools: Vec<serde_json::Value> = vec![];
|
||||
let req = ResponsesApiRequest {
|
||||
model: "gpt-5.1",
|
||||
instructions: "i",
|
||||
input: &input,
|
||||
tools: &tools,
|
||||
tool_choice: "auto",
|
||||
parallel_tool_calls: true,
|
||||
reasoning: None,
|
||||
store: false,
|
||||
stream: true,
|
||||
include: vec![],
|
||||
prompt_cache_key: None,
|
||||
text: None,
|
||||
};
|
||||
|
||||
let v = serde_json::to_value(&req).expect("json");
|
||||
assert!(v.get("text").is_none());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,13 +5,10 @@ use std::sync::Arc;
|
||||
use std::sync::atomic::AtomicU64;
|
||||
|
||||
use crate::AuthManager;
|
||||
use crate::ResponseEvent;
|
||||
use crate::client_common::REVIEW_PROMPT;
|
||||
use crate::compact;
|
||||
use crate::features::Feature;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::mcp::auth::McpAuthStatusEntry;
|
||||
use crate::mcp_connection_manager::DEFAULT_STARTUP_TIMEOUT;
|
||||
use crate::parse_command::parse_command;
|
||||
use crate::parse_turn_item;
|
||||
use crate::response_processing::process_items;
|
||||
@@ -46,6 +43,7 @@ use mcp_types::ReadResourceResult;
|
||||
use serde_json;
|
||||
use serde_json::Value;
|
||||
use tokio::sync::Mutex;
|
||||
use tokio::sync::RwLock;
|
||||
use tokio::sync::oneshot;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::debug;
|
||||
@@ -56,8 +54,8 @@ use tracing::warn;
|
||||
use crate::ModelProviderInfo;
|
||||
use crate::client::ModelClient;
|
||||
use crate::client_common::Prompt;
|
||||
use crate::client_common::ResponseEvent;
|
||||
use crate::config::Config;
|
||||
use crate::config::types::McpServerTransportConfig;
|
||||
use crate::config::types::ShellEnvironmentPolicy;
|
||||
use crate::context_manager::ContextManager;
|
||||
use crate::environment_context::EnvironmentContext;
|
||||
@@ -122,6 +120,7 @@ use crate::user_instructions::UserInstructions;
|
||||
use crate::user_notification::UserNotification;
|
||||
use crate::util::backoff;
|
||||
use codex_async_utils::OrCancelExt;
|
||||
use codex_execpolicy2::Policy as ExecPolicyV2;
|
||||
use codex_otel::otel_event_manager::OtelEventManager;
|
||||
use codex_protocol::config_types::ReasoningEffort as ReasoningEffortConfig;
|
||||
use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
|
||||
@@ -167,6 +166,10 @@ impl Codex {
|
||||
|
||||
let user_instructions = get_user_instructions(&config).await;
|
||||
|
||||
let exec_policy_v2 =
|
||||
crate::exec_policy::exec_policy_for(&config.features, &config.codex_home)
|
||||
.map_err(|err| CodexErr::Fatal(format!("failed to load execpolicy2: {err}")))?;
|
||||
|
||||
let config = Arc::new(config);
|
||||
|
||||
let session_configuration = SessionConfiguration {
|
||||
@@ -183,6 +186,7 @@ impl Codex {
|
||||
cwd: config.cwd.clone(),
|
||||
original_config_do_not_use: Arc::clone(&config),
|
||||
features: config.features.clone(),
|
||||
exec_policy_v2,
|
||||
session_source,
|
||||
};
|
||||
|
||||
@@ -280,6 +284,7 @@ pub(crate) struct TurnContext {
|
||||
pub(crate) final_output_json_schema: Option<Value>,
|
||||
pub(crate) codex_linux_sandbox_exe: Option<PathBuf>,
|
||||
pub(crate) tool_call_gate: Arc<ReadinessFlag>,
|
||||
pub(crate) exec_policy_v2: Option<Arc<ExecPolicyV2>>,
|
||||
}
|
||||
|
||||
impl TurnContext {
|
||||
@@ -336,6 +341,8 @@ pub(crate) struct SessionConfiguration {
|
||||
|
||||
/// Set of feature flags for this session
|
||||
features: Features,
|
||||
/// Optional execpolicy2 policy, applied only when enabled by feature flag.
|
||||
exec_policy_v2: Option<Arc<ExecPolicyV2>>,
|
||||
|
||||
// TODO(pakrym): Remove config from here
|
||||
original_config_do_not_use: Arc<Config>,
|
||||
@@ -436,6 +443,7 @@ impl Session {
|
||||
final_output_json_schema: None,
|
||||
codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(),
|
||||
tool_call_gate: Arc::new(ReadinessFlag::new()),
|
||||
exec_policy_v2: session_configuration.exec_policy_v2.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -476,21 +484,13 @@ impl Session {
|
||||
),
|
||||
};
|
||||
|
||||
// Error messages to dispatch after SessionConfigured is sent.
|
||||
let mut post_session_configured_events = Vec::<Event>::new();
|
||||
|
||||
// Kick off independent async setup tasks in parallel to reduce startup latency.
|
||||
//
|
||||
// - initialize RolloutRecorder with new or resumed session info
|
||||
// - spin up MCP connection manager
|
||||
// - perform default shell discovery
|
||||
// - load history metadata
|
||||
let rollout_fut = RolloutRecorder::new(&config, rollout_params);
|
||||
|
||||
let mcp_fut = McpConnectionManager::new(
|
||||
config.mcp_servers.clone(),
|
||||
config.mcp_oauth_credentials_store_mode,
|
||||
);
|
||||
let default_shell_fut = shell::default_user_shell();
|
||||
let history_meta_fut = crate::message_history::history_metadata(&config);
|
||||
let auth_statuses_fut = compute_auth_statuses(
|
||||
@@ -499,15 +499,8 @@ impl Session {
|
||||
);
|
||||
|
||||
// Join all independent futures.
|
||||
let (
|
||||
rollout_recorder,
|
||||
mcp_res,
|
||||
default_shell,
|
||||
(history_log_id, history_entry_count),
|
||||
auth_statuses,
|
||||
) = tokio::join!(
|
||||
let (rollout_recorder, default_shell, (history_log_id, history_entry_count), auth_statuses) = tokio::join!(
|
||||
rollout_fut,
|
||||
mcp_fut,
|
||||
default_shell_fut,
|
||||
history_meta_fut,
|
||||
auth_statuses_fut
|
||||
@@ -519,34 +512,7 @@ impl Session {
|
||||
})?;
|
||||
let rollout_path = rollout_recorder.rollout_path.clone();
|
||||
|
||||
// Handle MCP manager result and record any startup failures.
|
||||
let (mcp_connection_manager, failed_clients) = match mcp_res {
|
||||
Ok((mgr, failures)) => (mgr, failures),
|
||||
Err(e) => {
|
||||
let message = format!("Failed to create MCP connection manager: {e:#}");
|
||||
error!("{message}");
|
||||
post_session_configured_events.push(Event {
|
||||
id: INITIAL_SUBMIT_ID.to_owned(),
|
||||
msg: EventMsg::Error(ErrorEvent { message }),
|
||||
});
|
||||
(McpConnectionManager::default(), Default::default())
|
||||
}
|
||||
};
|
||||
|
||||
// Surface individual client start-up failures to the user.
|
||||
if !failed_clients.is_empty() {
|
||||
for (server_name, err) in failed_clients {
|
||||
let auth_entry = auth_statuses.get(&server_name);
|
||||
let display_message = mcp_init_error_display(&server_name, auth_entry, &err);
|
||||
warn!("MCP client for `{server_name}` failed to start: {err:#}");
|
||||
post_session_configured_events.push(Event {
|
||||
id: INITIAL_SUBMIT_ID.to_owned(),
|
||||
msg: EventMsg::Error(ErrorEvent {
|
||||
message: display_message,
|
||||
}),
|
||||
});
|
||||
}
|
||||
}
|
||||
let mut post_session_configured_events = Vec::<Event>::new();
|
||||
|
||||
for (alias, feature) in session_configuration.features.legacy_feature_usages() {
|
||||
let canonical = feature.key();
|
||||
@@ -595,7 +561,8 @@ impl Session {
|
||||
warm_model_cache(&session_configuration.model);
|
||||
|
||||
let services = SessionServices {
|
||||
mcp_connection_manager,
|
||||
mcp_connection_manager: Arc::new(RwLock::new(McpConnectionManager::default())),
|
||||
mcp_startup_cancellation_token: CancellationToken::new(),
|
||||
unified_exec_manager: UnifiedExecSessionManager::default(),
|
||||
notifier: UserNotifier::new(config.notify.clone()),
|
||||
rollout: Mutex::new(Some(rollout_recorder)),
|
||||
@@ -635,6 +602,18 @@ impl Session {
|
||||
for event in events {
|
||||
sess.send_event_raw(event).await;
|
||||
}
|
||||
sess.services
|
||||
.mcp_connection_manager
|
||||
.write()
|
||||
.await
|
||||
.initialize(
|
||||
config.mcp_servers.clone(),
|
||||
config.mcp_oauth_credentials_store_mode,
|
||||
auth_statuses.clone(),
|
||||
tx_event.clone(),
|
||||
sess.services.mcp_startup_cancellation_token.clone(),
|
||||
)
|
||||
.await;
|
||||
|
||||
// record_initial_history can emit events. We record only after the SessionConfiguredEvent is emitted.
|
||||
sess.record_initial_history(initial_history).await;
|
||||
@@ -894,6 +873,7 @@ impl Session {
|
||||
let parsed_cmd = parse_command(&command);
|
||||
let event = EventMsg::ExecApprovalRequest(ExecApprovalRequestEvent {
|
||||
call_id,
|
||||
turn_id: turn_context.sub_id.clone(),
|
||||
command,
|
||||
cwd,
|
||||
reason,
|
||||
@@ -1258,6 +1238,8 @@ impl Session {
|
||||
) -> anyhow::Result<ListResourcesResult> {
|
||||
self.services
|
||||
.mcp_connection_manager
|
||||
.read()
|
||||
.await
|
||||
.list_resources(server, params)
|
||||
.await
|
||||
}
|
||||
@@ -1269,6 +1251,8 @@ impl Session {
|
||||
) -> anyhow::Result<ListResourceTemplatesResult> {
|
||||
self.services
|
||||
.mcp_connection_manager
|
||||
.read()
|
||||
.await
|
||||
.list_resource_templates(server, params)
|
||||
.await
|
||||
}
|
||||
@@ -1280,6 +1264,8 @@ impl Session {
|
||||
) -> anyhow::Result<ReadResourceResult> {
|
||||
self.services
|
||||
.mcp_connection_manager
|
||||
.read()
|
||||
.await
|
||||
.read_resource(server, params)
|
||||
.await
|
||||
}
|
||||
@@ -1292,19 +1278,29 @@ impl Session {
|
||||
) -> anyhow::Result<CallToolResult> {
|
||||
self.services
|
||||
.mcp_connection_manager
|
||||
.read()
|
||||
.await
|
||||
.call_tool(server, tool, arguments)
|
||||
.await
|
||||
}
|
||||
|
||||
pub(crate) fn parse_mcp_tool_name(&self, tool_name: &str) -> Option<(String, String)> {
|
||||
pub(crate) async fn parse_mcp_tool_name(&self, tool_name: &str) -> Option<(String, String)> {
|
||||
self.services
|
||||
.mcp_connection_manager
|
||||
.read()
|
||||
.await
|
||||
.parse_tool_name(tool_name)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn interrupt_task(self: &Arc<Self>) {
|
||||
info!("interrupt received: abort current task, if any");
|
||||
self.abort_all_tasks(TurnAbortReason::Interrupted).await;
|
||||
let has_active_turn = { self.active_turn.lock().await.is_some() };
|
||||
if has_active_turn {
|
||||
self.abort_all_tasks(TurnAbortReason::Interrupted).await;
|
||||
} else {
|
||||
self.cancel_mcp_startup().await;
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn notifier(&self) -> &UserNotifier {
|
||||
@@ -1318,6 +1314,10 @@ impl Session {
|
||||
fn show_raw_agent_reasoning(&self) -> bool {
|
||||
self.services.show_raw_agent_reasoning
|
||||
}
|
||||
|
||||
async fn cancel_mcp_startup(&self) {
|
||||
self.services.mcp_startup_cancellation_token.cancel();
|
||||
}
|
||||
}
|
||||
|
||||
async fn submission_loop(sess: Arc<Session>, config: Arc<Config>, rx_sub: Receiver<Submission>) {
|
||||
@@ -1575,17 +1575,15 @@ mod handlers {
|
||||
}
|
||||
|
||||
pub async fn list_mcp_tools(sess: &Session, config: &Arc<Config>, sub_id: String) {
|
||||
// This is a cheap lookup from the connection manager's cache.
|
||||
let tools = sess.services.mcp_connection_manager.list_all_tools();
|
||||
let (auth_status_entries, resources, resource_templates) = tokio::join!(
|
||||
let mcp_connection_manager = sess.services.mcp_connection_manager.read().await;
|
||||
let (tools, auth_status_entries, resources, resource_templates) = tokio::join!(
|
||||
mcp_connection_manager.list_all_tools(),
|
||||
compute_auth_statuses(
|
||||
config.mcp_servers.iter(),
|
||||
config.mcp_oauth_credentials_store_mode,
|
||||
),
|
||||
sess.services.mcp_connection_manager.list_all_resources(),
|
||||
sess.services
|
||||
.mcp_connection_manager
|
||||
.list_all_resource_templates()
|
||||
mcp_connection_manager.list_all_resources(),
|
||||
mcp_connection_manager.list_all_resource_templates(),
|
||||
);
|
||||
let auth_statuses = auth_status_entries
|
||||
.iter()
|
||||
@@ -1594,7 +1592,10 @@ mod handlers {
|
||||
let event = Event {
|
||||
id: sub_id,
|
||||
msg: EventMsg::McpListToolsResponse(crate::protocol::McpListToolsResponseEvent {
|
||||
tools,
|
||||
tools: tools
|
||||
.into_iter()
|
||||
.map(|(name, tool)| (name, tool.tool))
|
||||
.collect(),
|
||||
resources,
|
||||
resource_templates,
|
||||
auth_statuses,
|
||||
@@ -1767,6 +1768,7 @@ async fn spawn_review_thread(
|
||||
final_output_json_schema: None,
|
||||
codex_linux_sandbox_exe: parent_turn_context.codex_linux_sandbox_exe.clone(),
|
||||
tool_call_gate: Arc::new(ReadinessFlag::new()),
|
||||
exec_policy_v2: parent_turn_context.exec_policy_v2.clone(),
|
||||
};
|
||||
|
||||
// Seed the child task with the review prompt as the initial user message.
|
||||
@@ -1924,10 +1926,22 @@ async fn run_turn(
|
||||
input: Vec<ResponseItem>,
|
||||
cancellation_token: CancellationToken,
|
||||
) -> CodexResult<TurnRunResult> {
|
||||
let mcp_tools = sess.services.mcp_connection_manager.list_all_tools();
|
||||
let mcp_tools = sess
|
||||
.services
|
||||
.mcp_connection_manager
|
||||
.read()
|
||||
.await
|
||||
.list_all_tools()
|
||||
.or_cancel(&cancellation_token)
|
||||
.await?;
|
||||
let router = Arc::new(ToolRouter::from_config(
|
||||
&turn_context.tools_config,
|
||||
Some(mcp_tools),
|
||||
Some(
|
||||
mcp_tools
|
||||
.into_iter()
|
||||
.map(|(name, tool)| (name, tool.tool))
|
||||
.collect(),
|
||||
),
|
||||
));
|
||||
|
||||
let model_supports_parallel = turn_context
|
||||
@@ -2096,7 +2110,7 @@ async fn try_run_turn(
|
||||
ResponseEvent::Created => {}
|
||||
ResponseEvent::OutputItemDone(item) => {
|
||||
let previously_active_item = active_item.take();
|
||||
match ToolRouter::build_tool_call(sess.as_ref(), item.clone()) {
|
||||
match ToolRouter::build_tool_call(sess.as_ref(), item.clone()).await {
|
||||
Ok(Some(call)) => {
|
||||
let payload_preview = call.payload.log_payload().into_owned();
|
||||
tracing::info!("ToolCall: {} {}", call.tool_name, payload_preview);
|
||||
@@ -2307,59 +2321,6 @@ pub(super) fn get_last_assistant_message_from_turn(responses: &[ResponseItem]) -
|
||||
})
|
||||
}
|
||||
|
||||
fn mcp_init_error_display(
|
||||
server_name: &str,
|
||||
entry: Option<&McpAuthStatusEntry>,
|
||||
err: &anyhow::Error,
|
||||
) -> String {
|
||||
if let Some(McpServerTransportConfig::StreamableHttp {
|
||||
url,
|
||||
bearer_token_env_var,
|
||||
http_headers,
|
||||
..
|
||||
}) = &entry.map(|entry| &entry.config.transport)
|
||||
&& url == "https://api.githubcopilot.com/mcp/"
|
||||
&& bearer_token_env_var.is_none()
|
||||
&& http_headers.as_ref().map(HashMap::is_empty).unwrap_or(true)
|
||||
{
|
||||
// GitHub only supports OAUth for first party MCP clients.
|
||||
// That means that the user has to specify a personal access token either via bearer_token_env_var or http_headers.
|
||||
// https://github.com/github/github-mcp-server/issues/921#issuecomment-3221026448
|
||||
format!(
|
||||
"GitHub MCP does not support OAuth. Log in by adding a personal access token (https://github.com/settings/personal-access-tokens) to your environment and config.toml:\n[mcp_servers.{server_name}]\nbearer_token_env_var = CODEX_GITHUB_PERSONAL_ACCESS_TOKEN"
|
||||
)
|
||||
} else if is_mcp_client_auth_required_error(err) {
|
||||
format!(
|
||||
"The {server_name} MCP server is not logged in. Run `codex mcp login {server_name}`."
|
||||
)
|
||||
} else if is_mcp_client_startup_timeout_error(err) {
|
||||
let startup_timeout_secs = match entry {
|
||||
Some(entry) => match entry.config.startup_timeout_sec {
|
||||
Some(timeout) => timeout,
|
||||
None => DEFAULT_STARTUP_TIMEOUT,
|
||||
},
|
||||
None => DEFAULT_STARTUP_TIMEOUT,
|
||||
}
|
||||
.as_secs();
|
||||
format!(
|
||||
"MCP client for `{server_name}` timed out after {startup_timeout_secs} seconds. Add or adjust `startup_timeout_sec` in your config.toml:\n[mcp_servers.{server_name}]\nstartup_timeout_sec = XX"
|
||||
)
|
||||
} else {
|
||||
format!("MCP client for `{server_name}` failed to start: {err:#}")
|
||||
}
|
||||
}
|
||||
|
||||
fn is_mcp_client_auth_required_error(error: &anyhow::Error) -> bool {
|
||||
// StreamableHttpError::AuthRequired from the MCP SDK.
|
||||
error.to_string().contains("Auth required")
|
||||
}
|
||||
|
||||
fn is_mcp_client_startup_timeout_error(error: &anyhow::Error) -> bool {
|
||||
let error_message = error.to_string();
|
||||
error_message.contains("request timed out")
|
||||
|| error_message.contains("timed out handshaking with MCP server")
|
||||
}
|
||||
|
||||
use crate::features::Features;
|
||||
#[cfg(test)]
|
||||
pub(crate) use tests::make_session_and_context;
|
||||
@@ -2369,10 +2330,7 @@ mod tests {
|
||||
use super::*;
|
||||
use crate::config::ConfigOverrides;
|
||||
use crate::config::ConfigToml;
|
||||
use crate::config::types::McpServerConfig;
|
||||
use crate::config::types::McpServerTransportConfig;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::mcp::auth::McpAuthStatusEntry;
|
||||
use crate::tools::format_exec_output_str;
|
||||
|
||||
use crate::protocol::CompactedItem;
|
||||
@@ -2392,7 +2350,6 @@ mod tests {
|
||||
use codex_app_server_protocol::AuthMode;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::McpAuthStatus;
|
||||
use std::time::Duration;
|
||||
use tokio::time::sleep;
|
||||
|
||||
@@ -2600,13 +2557,15 @@ mod tests {
|
||||
cwd: config.cwd.clone(),
|
||||
original_config_do_not_use: Arc::clone(&config),
|
||||
features: Features::default(),
|
||||
exec_policy_v2: None,
|
||||
session_source: SessionSource::Exec,
|
||||
};
|
||||
|
||||
let state = SessionState::new(session_configuration.clone());
|
||||
|
||||
let services = SessionServices {
|
||||
mcp_connection_manager: McpConnectionManager::default(),
|
||||
mcp_connection_manager: Arc::new(RwLock::new(McpConnectionManager::default())),
|
||||
mcp_startup_cancellation_token: CancellationToken::new(),
|
||||
unified_exec_manager: UnifiedExecSessionManager::default(),
|
||||
notifier: UserNotifier::new(None),
|
||||
rollout: Mutex::new(None),
|
||||
@@ -2676,13 +2635,15 @@ mod tests {
|
||||
cwd: config.cwd.clone(),
|
||||
original_config_do_not_use: Arc::clone(&config),
|
||||
features: Features::default(),
|
||||
exec_policy_v2: None,
|
||||
session_source: SessionSource::Exec,
|
||||
};
|
||||
|
||||
let state = SessionState::new(session_configuration.clone());
|
||||
|
||||
let services = SessionServices {
|
||||
mcp_connection_manager: McpConnectionManager::default(),
|
||||
mcp_connection_manager: Arc::new(RwLock::new(McpConnectionManager::default())),
|
||||
mcp_startup_cancellation_token: CancellationToken::new(),
|
||||
unified_exec_manager: UnifiedExecSessionManager::default(),
|
||||
notifier: UserNotifier::new(None),
|
||||
rollout: Mutex::new(None),
|
||||
@@ -2863,9 +2824,23 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn fatal_tool_error_stops_turn_and_reports_error() {
|
||||
let (session, turn_context, _rx) = make_session_and_context_with_rx();
|
||||
let tools = {
|
||||
session
|
||||
.services
|
||||
.mcp_connection_manager
|
||||
.read()
|
||||
.await
|
||||
.list_all_tools()
|
||||
.await
|
||||
};
|
||||
let router = ToolRouter::from_config(
|
||||
&turn_context.tools_config,
|
||||
Some(session.services.mcp_connection_manager.list_all_tools()),
|
||||
Some(
|
||||
tools
|
||||
.into_iter()
|
||||
.map(|(name, tool)| (name, tool.tool))
|
||||
.collect(),
|
||||
),
|
||||
);
|
||||
let item = ResponseItem::CustomToolCall {
|
||||
id: None,
|
||||
@@ -2876,6 +2851,7 @@ mod tests {
|
||||
};
|
||||
|
||||
let call = ToolRouter::build_tool_call(session.as_ref(), item.clone())
|
||||
.await
|
||||
.expect("build tool call")
|
||||
.expect("tool call present");
|
||||
let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
|
||||
@@ -3125,7 +3101,6 @@ mod tests {
|
||||
pretty_assertions::assert_eq!(exec_output.metadata, ResponseExecMetadata { exit_code: 0 });
|
||||
assert!(exec_output.output.contains("hi"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn unified_exec_rejects_escalated_permissions_when_policy_not_on_request() {
|
||||
use crate::protocol::AskForApproval;
|
||||
@@ -3167,89 +3142,4 @@ mod tests {
|
||||
|
||||
pretty_assertions::assert_eq!(output, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mcp_init_error_display_prompts_for_github_pat() {
|
||||
let server_name = "github";
|
||||
let entry = McpAuthStatusEntry {
|
||||
config: McpServerConfig {
|
||||
transport: McpServerTransportConfig::StreamableHttp {
|
||||
url: "https://api.githubcopilot.com/mcp/".to_string(),
|
||||
bearer_token_env_var: None,
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
},
|
||||
enabled: true,
|
||||
startup_timeout_sec: None,
|
||||
tool_timeout_sec: None,
|
||||
enabled_tools: None,
|
||||
disabled_tools: None,
|
||||
},
|
||||
auth_status: McpAuthStatus::Unsupported,
|
||||
};
|
||||
let err = anyhow::anyhow!("OAuth is unsupported");
|
||||
|
||||
let display = mcp_init_error_display(server_name, Some(&entry), &err);
|
||||
|
||||
let expected = format!(
|
||||
"GitHub MCP does not support OAuth. Log in by adding a personal access token (https://github.com/settings/personal-access-tokens) to your environment and config.toml:\n[mcp_servers.{server_name}]\nbearer_token_env_var = CODEX_GITHUB_PERSONAL_ACCESS_TOKEN"
|
||||
);
|
||||
|
||||
assert_eq!(expected, display);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mcp_init_error_display_prompts_for_login_when_auth_required() {
|
||||
let server_name = "example";
|
||||
let err = anyhow::anyhow!("Auth required for server");
|
||||
|
||||
let display = mcp_init_error_display(server_name, None, &err);
|
||||
|
||||
let expected = format!(
|
||||
"The {server_name} MCP server is not logged in. Run `codex mcp login {server_name}`."
|
||||
);
|
||||
|
||||
assert_eq!(expected, display);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mcp_init_error_display_reports_generic_errors() {
|
||||
let server_name = "custom";
|
||||
let entry = McpAuthStatusEntry {
|
||||
config: McpServerConfig {
|
||||
transport: McpServerTransportConfig::StreamableHttp {
|
||||
url: "https://example.com".to_string(),
|
||||
bearer_token_env_var: Some("TOKEN".to_string()),
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
},
|
||||
enabled: true,
|
||||
startup_timeout_sec: None,
|
||||
tool_timeout_sec: None,
|
||||
enabled_tools: None,
|
||||
disabled_tools: None,
|
||||
},
|
||||
auth_status: McpAuthStatus::Unsupported,
|
||||
};
|
||||
let err = anyhow::anyhow!("boom");
|
||||
|
||||
let display = mcp_init_error_display(server_name, Some(&entry), &err);
|
||||
|
||||
let expected = format!("MCP client for `{server_name}` failed to start: {err:#}");
|
||||
|
||||
assert_eq!(expected, display);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mcp_init_error_display_includes_startup_timeout_hint() {
|
||||
let server_name = "slow";
|
||||
let err = anyhow::anyhow!("request timed out");
|
||||
|
||||
let display = mcp_init_error_display(server_name, None, &err);
|
||||
|
||||
assert_eq!(
|
||||
"MCP client for `slow` timed out after 10 seconds. Add or adjust `startup_timeout_sec` in your config.toml:\n[mcp_servers.slow]\nstartup_timeout_sec = XX",
|
||||
display
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::Prompt;
|
||||
use crate::ResponseEvent;
|
||||
use crate::client_common::ResponseEvent;
|
||||
use crate::codex::Session;
|
||||
use crate::codex::TurnContext;
|
||||
use crate::codex::get_last_assistant_message_from_turn;
|
||||
|
||||
@@ -584,7 +584,7 @@ mod tests {
|
||||
codex_home,
|
||||
None,
|
||||
&[ConfigEdit::SetModel {
|
||||
model: Some("gpt-5-codex".to_string()),
|
||||
model: Some("gpt-5.1-codex".to_string()),
|
||||
effort: Some(ReasoningEffort::High),
|
||||
}],
|
||||
)
|
||||
@@ -592,7 +592,7 @@ mod tests {
|
||||
|
||||
let contents =
|
||||
std::fs::read_to_string(codex_home.join(CONFIG_TOML_FILE)).expect("read config");
|
||||
let expected = r#"model = "gpt-5-codex"
|
||||
let expected = r#"model = "gpt-5.1-codex"
|
||||
model_reasoning_effort = "high"
|
||||
"#;
|
||||
assert_eq!(contents, expected);
|
||||
@@ -722,7 +722,7 @@ model = "o5-preview"
|
||||
std::fs::write(
|
||||
codex_home.join(CONFIG_TOML_FILE),
|
||||
r#"[profiles."team a"]
|
||||
model = "gpt-5-codex"
|
||||
model = "gpt-5.1-codex"
|
||||
"#,
|
||||
)
|
||||
.expect("seed");
|
||||
@@ -972,14 +972,14 @@ B = \"2\"
|
||||
let codex_home = tmp.path().to_path_buf();
|
||||
|
||||
ConfigEditsBuilder::new(&codex_home)
|
||||
.set_model(Some("gpt-5-codex"), Some(ReasoningEffort::High))
|
||||
.set_model(Some("gpt-5.1-codex"), Some(ReasoningEffort::High))
|
||||
.apply()
|
||||
.await
|
||||
.expect("persist");
|
||||
|
||||
let contents =
|
||||
std::fs::read_to_string(codex_home.join(CONFIG_TOML_FILE)).expect("read config");
|
||||
let expected = r#"model = "gpt-5-codex"
|
||||
let expected = r#"model = "gpt-5.1-codex"
|
||||
model_reasoning_effort = "high"
|
||||
"#;
|
||||
assert_eq!(contents, expected);
|
||||
@@ -1001,11 +1001,11 @@ model_reasoning_effort = "low"
|
||||
std::fs::read_to_string(codex_home.join(CONFIG_TOML_FILE)).expect("read config");
|
||||
assert_eq!(contents, initial_expected);
|
||||
|
||||
let updated_expected = r#"model = "gpt-5-codex"
|
||||
let updated_expected = r#"model = "gpt-5.1-codex"
|
||||
model_reasoning_effort = "high"
|
||||
"#;
|
||||
ConfigEditsBuilder::new(codex_home)
|
||||
.set_model(Some("gpt-5-codex"), Some(ReasoningEffort::High))
|
||||
.set_model(Some("gpt-5.1-codex"), Some(ReasoningEffort::High))
|
||||
.apply_blocking()
|
||||
.expect("persist update");
|
||||
contents = std::fs::read_to_string(codex_home.join(CONFIG_TOML_FILE)).expect("read config");
|
||||
|
||||
@@ -25,7 +25,9 @@ use crate::git_info::resolve_root_git_project_for_trust;
|
||||
use crate::model_family::ModelFamily;
|
||||
use crate::model_family::derive_default_model_family;
|
||||
use crate::model_family::find_family_for_model;
|
||||
use crate::model_provider_info::LMSTUDIO_OSS_PROVIDER_ID;
|
||||
use crate::model_provider_info::ModelProviderInfo;
|
||||
use crate::model_provider_info::OLLAMA_OSS_PROVIDER_ID;
|
||||
use crate::model_provider_info::built_in_model_providers;
|
||||
use crate::openai_model_info::get_model_info;
|
||||
use crate::project_doc::DEFAULT_PROJECT_DOC_FILENAME;
|
||||
@@ -60,11 +62,11 @@ pub mod profile;
|
||||
pub mod types;
|
||||
|
||||
#[cfg(target_os = "windows")]
|
||||
pub const OPENAI_DEFAULT_MODEL: &str = "gpt-5";
|
||||
pub const OPENAI_DEFAULT_MODEL: &str = "gpt-5.1";
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
pub const OPENAI_DEFAULT_MODEL: &str = "gpt-5-codex";
|
||||
const OPENAI_DEFAULT_REVIEW_MODEL: &str = "gpt-5-codex";
|
||||
pub const GPT_5_CODEX_MEDIUM_MODEL: &str = "gpt-5-codex";
|
||||
pub const OPENAI_DEFAULT_MODEL: &str = "gpt-5.1-codex";
|
||||
const OPENAI_DEFAULT_REVIEW_MODEL: &str = "gpt-5.1-codex";
|
||||
pub const GPT_5_CODEX_MEDIUM_MODEL: &str = "gpt-5.1-codex";
|
||||
|
||||
/// Maximum number of bytes of the documentation that will be embedded. Larger
|
||||
/// files are *silently truncated* to this size so we do not take up too much of
|
||||
@@ -79,7 +81,7 @@ pub struct Config {
|
||||
/// Optional override of model selection.
|
||||
pub model: String,
|
||||
|
||||
/// Model used specifically for review sessions. Defaults to "gpt-5-codex".
|
||||
/// Model used specifically for review sessions. Defaults to "gpt-5.1-codex".
|
||||
pub review_model: String,
|
||||
|
||||
pub model_family: ModelFamily,
|
||||
@@ -466,6 +468,48 @@ pub fn set_project_trust_level(
|
||||
.apply_blocking()
|
||||
}
|
||||
|
||||
/// Save the default OSS provider preference to config.toml
|
||||
pub fn set_default_oss_provider(codex_home: &Path, provider: &str) -> std::io::Result<()> {
|
||||
// Validate that the provider is one of the known OSS providers
|
||||
match provider {
|
||||
LMSTUDIO_OSS_PROVIDER_ID | OLLAMA_OSS_PROVIDER_ID => {
|
||||
// Valid provider, continue
|
||||
}
|
||||
_ => {
|
||||
return Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidInput,
|
||||
format!(
|
||||
"Invalid OSS provider '{provider}'. Must be one of: {LMSTUDIO_OSS_PROVIDER_ID}, {OLLAMA_OSS_PROVIDER_ID}"
|
||||
),
|
||||
));
|
||||
}
|
||||
}
|
||||
let config_path = codex_home.join(CONFIG_TOML_FILE);
|
||||
|
||||
// Read existing config or create empty string if file doesn't exist
|
||||
let content = match std::fs::read_to_string(&config_path) {
|
||||
Ok(content) => content,
|
||||
Err(e) if e.kind() == std::io::ErrorKind::NotFound => String::new(),
|
||||
Err(e) => return Err(e),
|
||||
};
|
||||
|
||||
// Parse as DocumentMut for editing while preserving structure
|
||||
let mut doc = content.parse::<DocumentMut>().map_err(|e| {
|
||||
std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidData,
|
||||
format!("failed to parse config.toml: {e}"),
|
||||
)
|
||||
})?;
|
||||
|
||||
// Set the default_oss_provider at root level
|
||||
use toml_edit::value;
|
||||
doc["oss_provider"] = value(provider);
|
||||
|
||||
// Write the modified document back
|
||||
std::fs::write(&config_path, doc.to_string())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Apply a single dotted-path override onto a TOML value.
|
||||
fn apply_toml_override(root: &mut TomlValue, path: &str, value: TomlValue) {
|
||||
use toml::value::Table;
|
||||
@@ -663,6 +707,8 @@ pub struct ConfigToml {
|
||||
pub experimental_use_rmcp_client: Option<bool>,
|
||||
pub experimental_use_freeform_apply_patch: Option<bool>,
|
||||
pub experimental_sandbox_command_assessment: Option<bool>,
|
||||
/// Preferred OSS provider for local models, e.g. "lmstudio" or "ollama".
|
||||
pub oss_provider: Option<String>,
|
||||
}
|
||||
|
||||
impl From<ConfigToml> for UserSavedConfig {
|
||||
@@ -851,6 +897,34 @@ pub struct ConfigOverrides {
|
||||
pub additional_writable_roots: Vec<PathBuf>,
|
||||
}
|
||||
|
||||
/// Resolves the OSS provider from CLI override, profile config, or global config.
|
||||
/// Returns `None` if no provider is configured at any level.
|
||||
pub fn resolve_oss_provider(
|
||||
explicit_provider: Option<&str>,
|
||||
config_toml: &ConfigToml,
|
||||
config_profile: Option<String>,
|
||||
) -> Option<String> {
|
||||
if let Some(provider) = explicit_provider {
|
||||
// Explicit provider specified (e.g., via --local-provider)
|
||||
Some(provider.to_string())
|
||||
} else {
|
||||
// Check profile config first, then global config
|
||||
let profile = config_toml.get_config_profile(config_profile).ok();
|
||||
if let Some(profile) = &profile {
|
||||
// Check if profile has an oss provider
|
||||
if let Some(profile_oss_provider) = &profile.oss_provider {
|
||||
Some(profile_oss_provider.clone())
|
||||
}
|
||||
// If not then check if the toml has an oss provider
|
||||
else {
|
||||
config_toml.oss_provider.clone()
|
||||
}
|
||||
} else {
|
||||
config_toml.oss_provider.clone()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Config {
|
||||
/// Meant to be used exclusively for tests: `load_with_overrides()` should
|
||||
/// be used in all other cases.
|
||||
@@ -2542,7 +2616,7 @@ url = "https://example.com/mcp"
|
||||
let codex_home = TempDir::new()?;
|
||||
|
||||
ConfigEditsBuilder::new(codex_home.path())
|
||||
.set_model(Some("gpt-5-codex"), Some(ReasoningEffort::High))
|
||||
.set_model(Some("gpt-5.1-codex"), Some(ReasoningEffort::High))
|
||||
.apply()
|
||||
.await?;
|
||||
|
||||
@@ -2550,7 +2624,7 @@ url = "https://example.com/mcp"
|
||||
tokio::fs::read_to_string(codex_home.path().join(CONFIG_TOML_FILE)).await?;
|
||||
let parsed: ConfigToml = toml::from_str(&serialized)?;
|
||||
|
||||
assert_eq!(parsed.model.as_deref(), Some("gpt-5-codex"));
|
||||
assert_eq!(parsed.model.as_deref(), Some("gpt-5.1-codex"));
|
||||
assert_eq!(parsed.model_reasoning_effort, Some(ReasoningEffort::High));
|
||||
|
||||
Ok(())
|
||||
@@ -2564,7 +2638,7 @@ url = "https://example.com/mcp"
|
||||
tokio::fs::write(
|
||||
&config_path,
|
||||
r#"
|
||||
model = "gpt-5-codex"
|
||||
model = "gpt-5.1-codex"
|
||||
model_reasoning_effort = "medium"
|
||||
|
||||
[profiles.dev]
|
||||
@@ -2600,7 +2674,7 @@ model = "gpt-4.1"
|
||||
|
||||
ConfigEditsBuilder::new(codex_home.path())
|
||||
.with_profile(Some("dev"))
|
||||
.set_model(Some("gpt-5-codex"), Some(ReasoningEffort::Medium))
|
||||
.set_model(Some("gpt-5.1-codex"), Some(ReasoningEffort::Medium))
|
||||
.apply()
|
||||
.await?;
|
||||
|
||||
@@ -2612,7 +2686,7 @@ model = "gpt-4.1"
|
||||
.get("dev")
|
||||
.expect("profile should be created");
|
||||
|
||||
assert_eq!(profile.model.as_deref(), Some("gpt-5-codex"));
|
||||
assert_eq!(profile.model.as_deref(), Some("gpt-5.1-codex"));
|
||||
assert_eq!(
|
||||
profile.model_reasoning_effort,
|
||||
Some(ReasoningEffort::Medium)
|
||||
@@ -2634,7 +2708,7 @@ model = "gpt-4"
|
||||
model_reasoning_effort = "medium"
|
||||
|
||||
[profiles.prod]
|
||||
model = "gpt-5-codex"
|
||||
model = "gpt-5.1-codex"
|
||||
"#,
|
||||
)
|
||||
.await?;
|
||||
@@ -2663,7 +2737,7 @@ model = "gpt-5-codex"
|
||||
.profiles
|
||||
.get("prod")
|
||||
.and_then(|profile| profile.model.as_deref()),
|
||||
Some("gpt-5-codex"),
|
||||
Some("gpt-5.1-codex"),
|
||||
);
|
||||
|
||||
Ok(())
|
||||
@@ -2778,7 +2852,7 @@ model_provider = "openai"
|
||||
approval_policy = "on-failure"
|
||||
|
||||
[profiles.gpt5]
|
||||
model = "gpt-5"
|
||||
model = "gpt-5.1"
|
||||
model_provider = "openai"
|
||||
approval_policy = "on-failure"
|
||||
model_reasoning_effort = "high"
|
||||
@@ -3094,9 +3168,9 @@ model_verbosity = "high"
|
||||
fixture.codex_home(),
|
||||
)?;
|
||||
let expected_gpt5_profile_config = Config {
|
||||
model: "gpt-5".to_string(),
|
||||
model: "gpt-5.1".to_string(),
|
||||
review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(),
|
||||
model_family: find_family_for_model("gpt-5").expect("known model slug"),
|
||||
model_family: find_family_for_model("gpt-5.1").expect("known model slug"),
|
||||
model_context_window: Some(272_000),
|
||||
model_max_output_tokens: Some(128_000),
|
||||
model_auto_compact_token_limit: Some(244_800),
|
||||
@@ -3265,6 +3339,41 @@ trust_level = "trusted"
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_set_default_oss_provider() -> std::io::Result<()> {
|
||||
let temp_dir = TempDir::new()?;
|
||||
let codex_home = temp_dir.path();
|
||||
let config_path = codex_home.join(CONFIG_TOML_FILE);
|
||||
|
||||
// Test setting valid provider on empty config
|
||||
set_default_oss_provider(codex_home, OLLAMA_OSS_PROVIDER_ID)?;
|
||||
let content = std::fs::read_to_string(&config_path)?;
|
||||
assert!(content.contains("oss_provider = \"ollama\""));
|
||||
|
||||
// Test updating existing config
|
||||
std::fs::write(&config_path, "model = \"gpt-4\"\n")?;
|
||||
set_default_oss_provider(codex_home, LMSTUDIO_OSS_PROVIDER_ID)?;
|
||||
let content = std::fs::read_to_string(&config_path)?;
|
||||
assert!(content.contains("oss_provider = \"lmstudio\""));
|
||||
assert!(content.contains("model = \"gpt-4\""));
|
||||
|
||||
// Test overwriting existing oss_provider
|
||||
set_default_oss_provider(codex_home, OLLAMA_OSS_PROVIDER_ID)?;
|
||||
let content = std::fs::read_to_string(&config_path)?;
|
||||
assert!(content.contains("oss_provider = \"ollama\""));
|
||||
assert!(!content.contains("oss_provider = \"lmstudio\""));
|
||||
|
||||
// Test invalid provider
|
||||
let result = set_default_oss_provider(codex_home, "invalid_provider");
|
||||
assert!(result.is_err());
|
||||
let error = result.unwrap_err();
|
||||
assert_eq!(error.kind(), std::io::ErrorKind::InvalidInput);
|
||||
assert!(error.to_string().contains("Invalid OSS provider"));
|
||||
assert!(error.to_string().contains("invalid_provider"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_untrusted_project_gets_workspace_write_sandbox() -> anyhow::Result<()> {
|
||||
let config_with_untrusted = r#"
|
||||
@@ -3295,6 +3404,85 @@ trust_level = "untrusted"
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_oss_provider_explicit_override() {
|
||||
let config_toml = ConfigToml::default();
|
||||
let result = resolve_oss_provider(Some("custom-provider"), &config_toml, None);
|
||||
assert_eq!(result, Some("custom-provider".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_oss_provider_from_profile() {
|
||||
let mut profiles = std::collections::HashMap::new();
|
||||
let profile = ConfigProfile {
|
||||
oss_provider: Some("profile-provider".to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
profiles.insert("test-profile".to_string(), profile);
|
||||
let config_toml = ConfigToml {
|
||||
profiles,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let result = resolve_oss_provider(None, &config_toml, Some("test-profile".to_string()));
|
||||
assert_eq!(result, Some("profile-provider".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_oss_provider_from_global_config() {
|
||||
let config_toml = ConfigToml {
|
||||
oss_provider: Some("global-provider".to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let result = resolve_oss_provider(None, &config_toml, None);
|
||||
assert_eq!(result, Some("global-provider".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_oss_provider_profile_fallback_to_global() {
|
||||
let mut profiles = std::collections::HashMap::new();
|
||||
let profile = ConfigProfile::default(); // No oss_provider set
|
||||
profiles.insert("test-profile".to_string(), profile);
|
||||
let config_toml = ConfigToml {
|
||||
oss_provider: Some("global-provider".to_string()),
|
||||
profiles,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let result = resolve_oss_provider(None, &config_toml, Some("test-profile".to_string()));
|
||||
assert_eq!(result, Some("global-provider".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_oss_provider_none_when_not_configured() {
|
||||
let config_toml = ConfigToml::default();
|
||||
let result = resolve_oss_provider(None, &config_toml, None);
|
||||
assert_eq!(result, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_oss_provider_explicit_overrides_all() {
|
||||
let mut profiles = std::collections::HashMap::new();
|
||||
let profile = ConfigProfile {
|
||||
oss_provider: Some("profile-provider".to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
profiles.insert("test-profile".to_string(), profile);
|
||||
let config_toml = ConfigToml {
|
||||
oss_provider: Some("global-provider".to_string()),
|
||||
profiles,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let result = resolve_oss_provider(
|
||||
Some("explicit-provider"),
|
||||
&config_toml,
|
||||
Some("test-profile".to_string()),
|
||||
);
|
||||
assert_eq!(result, Some("explicit-provider".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_untrusted_project_gets_unless_trusted_approval_policy() -> std::io::Result<()> {
|
||||
let codex_home = TempDir::new()?;
|
||||
|
||||
@@ -33,6 +33,7 @@ pub struct ConfigProfile {
|
||||
/// Optional feature toggles scoped to this profile.
|
||||
#[serde(default)]
|
||||
pub features: Option<crate::features::FeaturesToml>,
|
||||
pub oss_provider: Option<String>,
|
||||
}
|
||||
|
||||
impl From<ConfigProfile> for codex_app_server_protocol::Profile {
|
||||
|
||||
@@ -154,13 +154,11 @@ impl CodexRequestBuilder {
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Originator {
|
||||
pub value: String,
|
||||
pub header_value: HeaderValue,
|
||||
}
|
||||
|
||||
static ORIGINATOR: OnceLock<Originator> = OnceLock::new();
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -189,16 +189,20 @@ async fn exec_windows_sandbox(
|
||||
};
|
||||
|
||||
let sandbox_cwd = cwd.clone();
|
||||
let logs_base_dir = find_codex_home().ok();
|
||||
let codex_home = find_codex_home().map_err(|err| {
|
||||
CodexErr::Io(io::Error::other(format!(
|
||||
"windows sandbox: failed to resolve codex_home: {err}"
|
||||
)))
|
||||
})?;
|
||||
let spawn_res = tokio::task::spawn_blocking(move || {
|
||||
run_windows_sandbox_capture(
|
||||
policy_str,
|
||||
&sandbox_cwd,
|
||||
codex_home.as_ref(),
|
||||
command,
|
||||
&cwd,
|
||||
env,
|
||||
timeout_ms,
|
||||
logs_base_dir.as_deref(),
|
||||
)
|
||||
})
|
||||
.await;
|
||||
|
||||
293
codex-rs/core/src/exec_policy.rs
Normal file
293
codex-rs/core/src/exec_policy.rs
Normal file
@@ -0,0 +1,293 @@
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::command_safety::is_dangerous_command::requires_initial_appoval;
|
||||
use codex_execpolicy2::Decision;
|
||||
use codex_execpolicy2::Evaluation;
|
||||
use codex_execpolicy2::Policy;
|
||||
use codex_execpolicy2::PolicyParser;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::bash::parse_shell_lc_plain_commands;
|
||||
use crate::features::Feature;
|
||||
use crate::features::Features;
|
||||
use crate::tools::sandboxing::ApprovalRequirement;
|
||||
|
||||
const FORBIDDEN_REASON: &str = "execpolicy forbids this command";
|
||||
const PROMPT_REASON: &str = "execpolicy requires approval for this command";
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum ExecPolicyError {
|
||||
#[error("failed to read execpolicy files from {dir}: {source}")]
|
||||
ReadDir {
|
||||
dir: PathBuf,
|
||||
source: std::io::Error,
|
||||
},
|
||||
|
||||
#[error("failed to read execpolicy file {path}: {source}")]
|
||||
ReadFile {
|
||||
path: PathBuf,
|
||||
source: std::io::Error,
|
||||
},
|
||||
|
||||
#[error("failed to parse execpolicy file {path}: {source}")]
|
||||
ParsePolicy {
|
||||
path: String,
|
||||
source: codex_execpolicy2::Error,
|
||||
},
|
||||
}
|
||||
|
||||
pub(crate) fn exec_policy_for(
|
||||
features: &Features,
|
||||
codex_home: &Path,
|
||||
) -> Result<Option<Arc<Policy>>, ExecPolicyError> {
|
||||
if !features.enabled(Feature::ExecPolicyV2) {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let policy_dir = codex_home.to_path_buf();
|
||||
let entries = match fs::read_dir(&policy_dir) {
|
||||
Ok(entries) => entries,
|
||||
Err(source) if source.kind() == std::io::ErrorKind::NotFound => return Ok(None),
|
||||
Err(source) => {
|
||||
return Err(ExecPolicyError::ReadDir {
|
||||
dir: policy_dir,
|
||||
source,
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
let mut policy_paths: Vec<PathBuf> = Vec::new();
|
||||
for entry in entries {
|
||||
let entry = entry.map_err(|source| ExecPolicyError::ReadDir {
|
||||
dir: policy_dir.clone(),
|
||||
source,
|
||||
})?;
|
||||
let path = entry.path();
|
||||
if path
|
||||
.extension()
|
||||
.and_then(|ext| ext.to_str())
|
||||
.is_some_and(|ext| ext == "codexpolicy")
|
||||
&& path.is_file()
|
||||
{
|
||||
policy_paths.push(path);
|
||||
}
|
||||
}
|
||||
|
||||
policy_paths.sort();
|
||||
|
||||
let mut parser = PolicyParser::new();
|
||||
for policy_path in &policy_paths {
|
||||
let contents =
|
||||
fs::read_to_string(policy_path).map_err(|source| ExecPolicyError::ReadFile {
|
||||
path: policy_path.clone(),
|
||||
source,
|
||||
})?;
|
||||
let identifier = policy_path.to_string_lossy().to_string();
|
||||
parser
|
||||
.parse(&identifier, &contents)
|
||||
.map_err(|source| ExecPolicyError::ParsePolicy {
|
||||
path: identifier,
|
||||
source,
|
||||
})?;
|
||||
}
|
||||
|
||||
let policy = Arc::new(parser.build());
|
||||
tracing::debug!(
|
||||
file_count = policy_paths.len(),
|
||||
"loaded execpolicy2 from {}",
|
||||
policy_dir.display()
|
||||
);
|
||||
|
||||
Ok(Some(policy))
|
||||
}
|
||||
|
||||
fn evaluate_with_policy(
|
||||
policy: &Policy,
|
||||
command: &[String],
|
||||
approval_policy: AskForApproval,
|
||||
) -> Option<ApprovalRequirement> {
|
||||
let commands = parse_shell_lc_plain_commands(command).unwrap_or_else(|| vec![command.to_vec()]);
|
||||
let evaluation = policy.check_multiple(commands.iter());
|
||||
|
||||
match evaluation {
|
||||
Evaluation::Match { decision, .. } => match decision {
|
||||
Decision::Forbidden => Some(ApprovalRequirement::Forbidden {
|
||||
reason: FORBIDDEN_REASON.to_string(),
|
||||
}),
|
||||
Decision::Prompt => {
|
||||
let reason = PROMPT_REASON.to_string();
|
||||
if matches!(approval_policy, AskForApproval::Never) {
|
||||
Some(ApprovalRequirement::Forbidden { reason })
|
||||
} else {
|
||||
Some(ApprovalRequirement::NeedsApproval {
|
||||
reason: Some(reason),
|
||||
})
|
||||
}
|
||||
}
|
||||
Decision::Allow => Some(ApprovalRequirement::Skip),
|
||||
},
|
||||
Evaluation::NoMatch => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn approval_requirement_for_command(
|
||||
policy: Option<&Policy>,
|
||||
command: &[String],
|
||||
approval_policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
with_escalated_permissions: bool,
|
||||
) -> ApprovalRequirement {
|
||||
if let Some(policy) = policy
|
||||
&& let Some(requirement) = evaluate_with_policy(policy, command, approval_policy)
|
||||
{
|
||||
return requirement;
|
||||
}
|
||||
|
||||
if requires_initial_appoval(
|
||||
approval_policy,
|
||||
sandbox_policy,
|
||||
command,
|
||||
with_escalated_permissions,
|
||||
) {
|
||||
ApprovalRequirement::NeedsApproval { reason: None }
|
||||
} else {
|
||||
ApprovalRequirement::Skip
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::features::Feature;
|
||||
use crate::features::Features;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use pretty_assertions::assert_eq;
|
||||
use tempfile::tempdir;
|
||||
|
||||
#[test]
|
||||
fn returns_none_when_feature_disabled() {
|
||||
let features = Features::with_defaults();
|
||||
let temp_dir = tempdir().expect("create temp dir");
|
||||
|
||||
let policy = exec_policy_for(&features, temp_dir.path()).expect("policy result");
|
||||
|
||||
assert!(policy.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn returns_none_when_policy_dir_is_missing() {
|
||||
let mut features = Features::with_defaults();
|
||||
features.enable(Feature::ExecPolicyV2);
|
||||
let temp_dir = tempdir().expect("create temp dir");
|
||||
let missing_dir = temp_dir.path().join("missing");
|
||||
|
||||
let policy = exec_policy_for(&features, &missing_dir).expect("policy result");
|
||||
|
||||
assert!(policy.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn evaluates_bash_lc_inner_commands() {
|
||||
let policy_src = r#"
|
||||
prefix_rule(pattern=["rm"], decision="forbidden")
|
||||
"#;
|
||||
let mut parser = PolicyParser::new();
|
||||
parser
|
||||
.parse("test.codexpolicy", policy_src)
|
||||
.expect("parse policy");
|
||||
let policy = parser.build();
|
||||
|
||||
let forbidden_script = vec![
|
||||
"bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
"rm -rf /tmp".to_string(),
|
||||
];
|
||||
|
||||
let requirement =
|
||||
evaluate_with_policy(&policy, &forbidden_script, AskForApproval::OnRequest)
|
||||
.expect("expected match for forbidden command");
|
||||
|
||||
assert_eq!(
|
||||
requirement,
|
||||
ApprovalRequirement::Forbidden {
|
||||
reason: FORBIDDEN_REASON.to_string()
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn approval_requirement_prefers_execpolicy_match() {
|
||||
let policy_src = r#"prefix_rule(pattern=["rm"], decision="prompt")"#;
|
||||
let mut parser = PolicyParser::new();
|
||||
parser
|
||||
.parse("test.codexpolicy", policy_src)
|
||||
.expect("parse policy");
|
||||
let policy = parser.build();
|
||||
let command = vec!["rm".to_string()];
|
||||
|
||||
let requirement = approval_requirement_for_command(
|
||||
Some(&policy),
|
||||
&command,
|
||||
AskForApproval::OnRequest,
|
||||
&SandboxPolicy::DangerFullAccess,
|
||||
false,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
requirement,
|
||||
ApprovalRequirement::NeedsApproval {
|
||||
reason: Some(PROMPT_REASON.to_string())
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn approval_requirement_respects_approval_policy() {
|
||||
let policy_src = r#"prefix_rule(pattern=["rm"], decision="prompt")"#;
|
||||
let mut parser = PolicyParser::new();
|
||||
parser
|
||||
.parse("test.codexpolicy", policy_src)
|
||||
.expect("parse policy");
|
||||
let policy = parser.build();
|
||||
let command = vec!["rm".to_string()];
|
||||
|
||||
let requirement = approval_requirement_for_command(
|
||||
Some(&policy),
|
||||
&command,
|
||||
AskForApproval::Never,
|
||||
&SandboxPolicy::DangerFullAccess,
|
||||
false,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
requirement,
|
||||
ApprovalRequirement::Forbidden {
|
||||
reason: PROMPT_REASON.to_string()
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn approval_requirement_falls_back_to_heuristics() {
|
||||
let command = vec!["python".to_string()];
|
||||
|
||||
let requirement = approval_requirement_for_command(
|
||||
None,
|
||||
&command,
|
||||
AskForApproval::UnlessTrusted,
|
||||
&SandboxPolicy::ReadOnly,
|
||||
false,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
requirement,
|
||||
ApprovalRequirement::NeedsApproval { reason: None }
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -40,12 +40,16 @@ pub enum Feature {
|
||||
ViewImageTool,
|
||||
/// Allow the model to request web searches.
|
||||
WebSearchRequest,
|
||||
/// Gate the execpolicy2 enforcement for shell/unified exec.
|
||||
ExecPolicyV2,
|
||||
/// Enable the model-based risk assessments for sandboxed commands.
|
||||
SandboxCommandAssessment,
|
||||
/// Create a ghost commit at each turn.
|
||||
GhostCommit,
|
||||
/// Enable Windows sandbox (restricted token) on Windows.
|
||||
WindowsSandbox,
|
||||
/// Enable the default shell tool.
|
||||
ShellTool,
|
||||
}
|
||||
|
||||
impl Feature {
|
||||
@@ -283,6 +287,12 @@ pub const FEATURES: &[FeatureSpec] = &[
|
||||
stage: Stage::Stable,
|
||||
default_enabled: false,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::ExecPolicyV2,
|
||||
key: "exec_policy_v2",
|
||||
stage: Stage::Experimental,
|
||||
default_enabled: false,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::SandboxCommandAssessment,
|
||||
key: "experimental_sandbox_command_assessment",
|
||||
@@ -301,4 +311,10 @@ pub const FEATURES: &[FeatureSpec] = &[
|
||||
stage: Stage::Experimental,
|
||||
default_enabled: false,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::ShellTool,
|
||||
key: "shell_tool",
|
||||
stage: Stage::Stable,
|
||||
default_enabled: true,
|
||||
},
|
||||
];
|
||||
|
||||
@@ -8,7 +8,8 @@
|
||||
mod apply_patch;
|
||||
pub mod auth;
|
||||
pub mod bash;
|
||||
pub mod client;
|
||||
mod chat_completions;
|
||||
mod client;
|
||||
mod client_common;
|
||||
pub mod codex;
|
||||
mod codex_conversation;
|
||||
@@ -23,6 +24,7 @@ mod environment_context;
|
||||
pub mod error;
|
||||
pub mod exec;
|
||||
pub mod exec_env;
|
||||
mod exec_policy;
|
||||
pub mod features;
|
||||
mod flags;
|
||||
pub mod git_info;
|
||||
@@ -39,8 +41,11 @@ pub mod token_data;
|
||||
mod truncate;
|
||||
mod unified_exec;
|
||||
mod user_instructions;
|
||||
pub use model_provider_info::BUILT_IN_OSS_MODEL_PROVIDER_ID;
|
||||
pub use model_provider_info::DEFAULT_LMSTUDIO_PORT;
|
||||
pub use model_provider_info::DEFAULT_OLLAMA_PORT;
|
||||
pub use model_provider_info::LMSTUDIO_OSS_PROVIDER_ID;
|
||||
pub use model_provider_info::ModelProviderInfo;
|
||||
pub use model_provider_info::OLLAMA_OSS_PROVIDER_ID;
|
||||
pub use model_provider_info::WireApi;
|
||||
pub use model_provider_info::built_in_model_providers;
|
||||
pub use model_provider_info::create_oss_provider_with_base_url;
|
||||
@@ -53,6 +58,7 @@ pub use conversation_manager::NewConversation;
|
||||
// Re-export common auth types for workspace consumers
|
||||
pub use auth::AuthManager;
|
||||
pub use auth::CodexAuth;
|
||||
pub mod default_client;
|
||||
pub mod model_family;
|
||||
mod openai_model_info;
|
||||
pub mod project_doc;
|
||||
@@ -94,10 +100,10 @@ pub use codex_protocol::protocol;
|
||||
pub use codex_protocol::config_types as protocol_config_types;
|
||||
|
||||
pub use client::ModelClient;
|
||||
pub use client::ResponseEvent;
|
||||
pub use client::ResponseStream;
|
||||
pub use client_common::Prompt;
|
||||
pub use client_common::REVIEW_PROMPT;
|
||||
pub use client_common::ResponseEvent;
|
||||
pub use client_common::ResponseStream;
|
||||
pub use codex_protocol::models::ContentItem;
|
||||
pub use codex_protocol::models::LocalShellAction;
|
||||
pub use codex_protocol::models::LocalShellExecAction;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -132,7 +132,7 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
|
||||
model_family!(slug, "gpt-4o", needs_special_apply_patch_instructions: true)
|
||||
} else if slug.starts_with("gpt-3.5") {
|
||||
model_family!(slug, "gpt-3.5", needs_special_apply_patch_instructions: true)
|
||||
} else if slug.starts_with("test-gpt-5-codex") {
|
||||
} else if slug.starts_with("test-gpt-5") {
|
||||
model_family!(
|
||||
slug, slug,
|
||||
supports_reasoning_summaries: true,
|
||||
|
||||
@@ -6,15 +6,16 @@
|
||||
//! key. These override or extend the defaults at runtime.
|
||||
|
||||
use crate::CodexAuth;
|
||||
use crate::client::http::CodexHttpClient;
|
||||
use crate::client::http::CodexRequestBuilder;
|
||||
use crate::error::EnvVarError;
|
||||
use crate::default_client::CodexHttpClient;
|
||||
use crate::default_client::CodexRequestBuilder;
|
||||
use codex_app_server_protocol::AuthMode;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use std::collections::HashMap;
|
||||
use std::env::VarError;
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::error::EnvVarError;
|
||||
const DEFAULT_STREAM_IDLE_TIMEOUT_MS: u64 = 300_000;
|
||||
const DEFAULT_STREAM_MAX_RETRIES: u64 = 5;
|
||||
const DEFAULT_REQUEST_MAX_RETRIES: u64 = 4;
|
||||
@@ -257,9 +258,11 @@ impl ModelProviderInfo {
|
||||
}
|
||||
}
|
||||
|
||||
const DEFAULT_OLLAMA_PORT: u32 = 11434;
|
||||
pub const DEFAULT_LMSTUDIO_PORT: u16 = 1234;
|
||||
pub const DEFAULT_OLLAMA_PORT: u16 = 11434;
|
||||
|
||||
pub const BUILT_IN_OSS_MODEL_PROVIDER_ID: &str = "oss";
|
||||
pub const LMSTUDIO_OSS_PROVIDER_ID: &str = "lmstudio";
|
||||
pub const OLLAMA_OSS_PROVIDER_ID: &str = "ollama";
|
||||
|
||||
/// Built-in default provider list.
|
||||
pub fn built_in_model_providers() -> HashMap<String, ModelProviderInfo> {
|
||||
@@ -310,14 +313,21 @@ pub fn built_in_model_providers() -> HashMap<String, ModelProviderInfo> {
|
||||
requires_openai_auth: true,
|
||||
},
|
||||
),
|
||||
(BUILT_IN_OSS_MODEL_PROVIDER_ID, create_oss_provider()),
|
||||
(
|
||||
OLLAMA_OSS_PROVIDER_ID,
|
||||
create_oss_provider(DEFAULT_OLLAMA_PORT, WireApi::Chat),
|
||||
),
|
||||
(
|
||||
LMSTUDIO_OSS_PROVIDER_ID,
|
||||
create_oss_provider(DEFAULT_LMSTUDIO_PORT, WireApi::Responses),
|
||||
),
|
||||
]
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k.to_string(), v))
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn create_oss_provider() -> ModelProviderInfo {
|
||||
pub fn create_oss_provider(default_provider_port: u16, wire_api: WireApi) -> ModelProviderInfo {
|
||||
// These CODEX_OSS_ environment variables are experimental: we may
|
||||
// switch to reading values from config.toml instead.
|
||||
let codex_oss_base_url = match std::env::var("CODEX_OSS_BASE_URL")
|
||||
@@ -330,22 +340,21 @@ pub fn create_oss_provider() -> ModelProviderInfo {
|
||||
port = std::env::var("CODEX_OSS_PORT")
|
||||
.ok()
|
||||
.filter(|v| !v.trim().is_empty())
|
||||
.and_then(|v| v.parse::<u32>().ok())
|
||||
.unwrap_or(DEFAULT_OLLAMA_PORT)
|
||||
.and_then(|v| v.parse::<u16>().ok())
|
||||
.unwrap_or(default_provider_port)
|
||||
),
|
||||
};
|
||||
|
||||
create_oss_provider_with_base_url(&codex_oss_base_url)
|
||||
create_oss_provider_with_base_url(&codex_oss_base_url, wire_api)
|
||||
}
|
||||
|
||||
pub fn create_oss_provider_with_base_url(base_url: &str) -> ModelProviderInfo {
|
||||
pub fn create_oss_provider_with_base_url(base_url: &str, wire_api: WireApi) -> ModelProviderInfo {
|
||||
ModelProviderInfo {
|
||||
name: "gpt-oss".into(),
|
||||
base_url: Some(base_url.into()),
|
||||
env_key: None,
|
||||
env_key_instructions: None,
|
||||
experimental_bearer_token: None,
|
||||
wire_api: WireApi::Chat,
|
||||
wire_api,
|
||||
query_params: None,
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use crate::client::http::originator;
|
||||
use crate::config::Config;
|
||||
use crate::config::types::OtelExporterKind as Kind;
|
||||
use crate::config::types::OtelHttpProtocol as Protocol;
|
||||
use crate::default_client::originator;
|
||||
use codex_otel::config::OtelExporter;
|
||||
use codex_otel::config::OtelHttpProtocol;
|
||||
use codex_otel::config::OtelSettings;
|
||||
|
||||
@@ -6,7 +6,7 @@ use shlex::split as shlex_split;
|
||||
use shlex::try_join as shlex_try_join;
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn shlex_join(tokens: &[String]) -> String {
|
||||
pub fn shlex_join(tokens: &[String]) -> String {
|
||||
shlex_try_join(tokens.iter().map(String::as_str))
|
||||
.unwrap_or_else(|_| "<command included NUL byte>".to_string())
|
||||
}
|
||||
|
||||
@@ -72,6 +72,8 @@ pub(crate) fn should_persist_event_msg(ev: &EventMsg) -> bool {
|
||||
| EventMsg::GetHistoryEntryResponse(_)
|
||||
| EventMsg::UndoStarted(_)
|
||||
| EventMsg::McpListToolsResponse(_)
|
||||
| EventMsg::McpStartupUpdate(_)
|
||||
| EventMsg::McpStartupComplete(_)
|
||||
| EventMsg::ListCustomPromptsResponse(_)
|
||||
| EventMsg::PlanUpdate(_)
|
||||
| EventMsg::ShutdownComplete
|
||||
|
||||
@@ -23,8 +23,8 @@ use super::list::ConversationsPage;
|
||||
use super::list::Cursor;
|
||||
use super::list::get_conversations;
|
||||
use super::policy::is_persisted_response_item;
|
||||
use crate::client::http::originator;
|
||||
use crate::config::Config;
|
||||
use crate::default_client::originator;
|
||||
use crate::git_info::collect_git_info;
|
||||
use codex_protocol::protocol::InitialHistory;
|
||||
use codex_protocol::protocol::ResumedHistory;
|
||||
|
||||
@@ -6,9 +6,9 @@ use std::time::Instant;
|
||||
|
||||
use crate::AuthManager;
|
||||
use crate::ModelProviderInfo;
|
||||
use crate::ResponseEvent;
|
||||
use crate::client::ModelClient;
|
||||
use crate::client_common::Prompt;
|
||||
use crate::client_common::ResponseEvent;
|
||||
use crate::config::Config;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use askama::Template;
|
||||
|
||||
@@ -8,9 +8,12 @@ use crate::unified_exec::UnifiedExecSessionManager;
|
||||
use crate::user_notification::UserNotifier;
|
||||
use codex_otel::otel_event_manager::OtelEventManager;
|
||||
use tokio::sync::Mutex;
|
||||
use tokio::sync::RwLock;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
pub(crate) struct SessionServices {
|
||||
pub(crate) mcp_connection_manager: McpConnectionManager,
|
||||
pub(crate) mcp_connection_manager: Arc<RwLock<McpConnectionManager>>,
|
||||
pub(crate) mcp_startup_cancellation_token: CancellationToken,
|
||||
pub(crate) unified_exec_manager: UnifiedExecSessionManager,
|
||||
pub(crate) notifier: UserNotifier,
|
||||
pub(crate) rollout: Mutex<Option<RolloutRecorder>>,
|
||||
|
||||
@@ -65,22 +65,24 @@ impl SessionTask for UserShellCommandTask {
|
||||
// allows commands that use shell features (pipes, &&, redirects, etc.).
|
||||
// We do not source rc files or otherwise reformat the script.
|
||||
let use_login_shell = true;
|
||||
let shell_invocation = session
|
||||
let command = session
|
||||
.user_shell()
|
||||
.derive_exec_args(&self.command, use_login_shell);
|
||||
|
||||
let call_id = Uuid::new_v4().to_string();
|
||||
let raw_command = self.command.clone();
|
||||
let cwd = turn_context.cwd.clone();
|
||||
|
||||
let parsed_cmd = parse_command(&shell_invocation);
|
||||
let parsed_cmd = parse_command(&command);
|
||||
session
|
||||
.send_event(
|
||||
turn_context.as_ref(),
|
||||
EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
|
||||
call_id: call_id.clone(),
|
||||
command: shell_invocation.clone(),
|
||||
cwd: turn_context.cwd.clone(),
|
||||
parsed_cmd,
|
||||
turn_id: turn_context.sub_id.clone(),
|
||||
command: command.clone(),
|
||||
cwd: cwd.clone(),
|
||||
parsed_cmd: parsed_cmd.clone(),
|
||||
source: ExecCommandSource::UserShell,
|
||||
interaction_input: None,
|
||||
}),
|
||||
@@ -88,8 +90,8 @@ impl SessionTask for UserShellCommandTask {
|
||||
.await;
|
||||
|
||||
let exec_env = ExecEnv {
|
||||
command: shell_invocation,
|
||||
cwd: turn_context.cwd.clone(),
|
||||
command: command.clone(),
|
||||
cwd: cwd.clone(),
|
||||
env: create_env(&turn_context.shell_environment_policy),
|
||||
timeout_ms: None,
|
||||
sandbox: SandboxType::None,
|
||||
@@ -129,6 +131,12 @@ impl SessionTask for UserShellCommandTask {
|
||||
turn_context.as_ref(),
|
||||
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
|
||||
call_id,
|
||||
turn_id: turn_context.sub_id.clone(),
|
||||
command: command.clone(),
|
||||
cwd: cwd.clone(),
|
||||
parsed_cmd: parsed_cmd.clone(),
|
||||
source: ExecCommandSource::UserShell,
|
||||
interaction_input: None,
|
||||
stdout: String::new(),
|
||||
stderr: aborted_message.clone(),
|
||||
aggregated_output: aborted_message.clone(),
|
||||
@@ -145,6 +153,12 @@ impl SessionTask for UserShellCommandTask {
|
||||
turn_context.as_ref(),
|
||||
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
|
||||
call_id: call_id.clone(),
|
||||
turn_id: turn_context.sub_id.clone(),
|
||||
command: command.clone(),
|
||||
cwd: cwd.clone(),
|
||||
parsed_cmd: parsed_cmd.clone(),
|
||||
source: ExecCommandSource::UserShell,
|
||||
interaction_input: None,
|
||||
stdout: output.stdout.text.clone(),
|
||||
stderr: output.stderr.text.clone(),
|
||||
aggregated_output: output.aggregated_output.text.clone(),
|
||||
@@ -176,6 +190,12 @@ impl SessionTask for UserShellCommandTask {
|
||||
turn_context.as_ref(),
|
||||
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
|
||||
call_id,
|
||||
turn_id: turn_context.sub_id.clone(),
|
||||
command,
|
||||
cwd,
|
||||
parsed_cmd,
|
||||
source: ExecCommandSource::UserShell,
|
||||
interaction_input: None,
|
||||
stdout: exec_output.stdout.text.clone(),
|
||||
stderr: exec_output.stderr.text.clone(),
|
||||
aggregated_output: exec_output.aggregated_output.text.clone(),
|
||||
|
||||
@@ -15,6 +15,7 @@ use crate::protocol::PatchApplyEndEvent;
|
||||
use crate::protocol::TurnDiffEvent;
|
||||
use crate::tools::context::SharedTurnDiffTracker;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
use codex_protocol::parse_command::ParsedCommand;
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
@@ -61,6 +62,7 @@ pub(crate) async fn emit_exec_command_begin(
|
||||
ctx: ToolEventCtx<'_>,
|
||||
command: &[String],
|
||||
cwd: &Path,
|
||||
parsed_cmd: &[ParsedCommand],
|
||||
source: ExecCommandSource,
|
||||
interaction_input: Option<String>,
|
||||
) {
|
||||
@@ -69,9 +71,10 @@ pub(crate) async fn emit_exec_command_begin(
|
||||
ctx.turn,
|
||||
EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
|
||||
call_id: ctx.call_id.to_string(),
|
||||
turn_id: ctx.turn.sub_id.clone(),
|
||||
command: command.to_vec(),
|
||||
cwd: cwd.to_path_buf(),
|
||||
parsed_cmd: parse_command(command),
|
||||
parsed_cmd: parsed_cmd.to_vec(),
|
||||
source,
|
||||
interaction_input,
|
||||
}),
|
||||
@@ -84,6 +87,7 @@ pub(crate) enum ToolEmitter {
|
||||
command: Vec<String>,
|
||||
cwd: PathBuf,
|
||||
source: ExecCommandSource,
|
||||
parsed_cmd: Vec<ParsedCommand>,
|
||||
},
|
||||
ApplyPatch {
|
||||
changes: HashMap<PathBuf, FileChange>,
|
||||
@@ -94,15 +98,18 @@ pub(crate) enum ToolEmitter {
|
||||
cwd: PathBuf,
|
||||
source: ExecCommandSource,
|
||||
interaction_input: Option<String>,
|
||||
parsed_cmd: Vec<ParsedCommand>,
|
||||
},
|
||||
}
|
||||
|
||||
impl ToolEmitter {
|
||||
pub fn shell(command: Vec<String>, cwd: PathBuf, source: ExecCommandSource) -> Self {
|
||||
let parsed_cmd = parse_command(&command);
|
||||
Self::Shell {
|
||||
command,
|
||||
cwd,
|
||||
source,
|
||||
parsed_cmd,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -119,11 +126,13 @@ impl ToolEmitter {
|
||||
source: ExecCommandSource,
|
||||
interaction_input: Option<String>,
|
||||
) -> Self {
|
||||
let parsed_cmd = parse_command(command);
|
||||
Self::UnifiedExec {
|
||||
command: command.to_vec(),
|
||||
cwd,
|
||||
source,
|
||||
interaction_input,
|
||||
parsed_cmd,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -134,44 +143,14 @@ impl ToolEmitter {
|
||||
command,
|
||||
cwd,
|
||||
source,
|
||||
parsed_cmd,
|
||||
},
|
||||
ToolEventStage::Begin,
|
||||
stage,
|
||||
) => {
|
||||
emit_exec_command_begin(ctx, command, cwd.as_path(), *source, None).await;
|
||||
}
|
||||
(Self::Shell { .. }, ToolEventStage::Success(output)) => {
|
||||
emit_exec_end(
|
||||
emit_exec_stage(
|
||||
ctx,
|
||||
output.stdout.text.clone(),
|
||||
output.stderr.text.clone(),
|
||||
output.aggregated_output.text.clone(),
|
||||
output.exit_code,
|
||||
output.duration,
|
||||
format_exec_output_str(&output),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
(Self::Shell { .. }, ToolEventStage::Failure(ToolEventFailure::Output(output))) => {
|
||||
emit_exec_end(
|
||||
ctx,
|
||||
output.stdout.text.clone(),
|
||||
output.stderr.text.clone(),
|
||||
output.aggregated_output.text.clone(),
|
||||
output.exit_code,
|
||||
output.duration,
|
||||
format_exec_output_str(&output),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
(Self::Shell { .. }, ToolEventStage::Failure(ToolEventFailure::Message(message))) => {
|
||||
emit_exec_end(
|
||||
ctx,
|
||||
String::new(),
|
||||
(*message).to_string(),
|
||||
(*message).to_string(),
|
||||
-1,
|
||||
Duration::ZERO,
|
||||
message.clone(),
|
||||
ExecCommandInput::new(command, cwd.as_path(), parsed_cmd, *source, None),
|
||||
stage,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
@@ -231,57 +210,20 @@ impl ToolEmitter {
|
||||
cwd,
|
||||
source,
|
||||
interaction_input,
|
||||
parsed_cmd,
|
||||
},
|
||||
ToolEventStage::Begin,
|
||||
stage,
|
||||
) => {
|
||||
emit_exec_command_begin(
|
||||
emit_exec_stage(
|
||||
ctx,
|
||||
command,
|
||||
cwd.as_path(),
|
||||
*source,
|
||||
interaction_input.clone(),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
(Self::UnifiedExec { .. }, ToolEventStage::Success(output)) => {
|
||||
emit_exec_end(
|
||||
ctx,
|
||||
output.stdout.text.clone(),
|
||||
output.stderr.text.clone(),
|
||||
output.aggregated_output.text.clone(),
|
||||
output.exit_code,
|
||||
output.duration,
|
||||
format_exec_output_str(&output),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
(
|
||||
Self::UnifiedExec { .. },
|
||||
ToolEventStage::Failure(ToolEventFailure::Output(output)),
|
||||
) => {
|
||||
emit_exec_end(
|
||||
ctx,
|
||||
output.stdout.text.clone(),
|
||||
output.stderr.text.clone(),
|
||||
output.aggregated_output.text.clone(),
|
||||
output.exit_code,
|
||||
output.duration,
|
||||
format_exec_output_str(&output),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
(
|
||||
Self::UnifiedExec { .. },
|
||||
ToolEventStage::Failure(ToolEventFailure::Message(message)),
|
||||
) => {
|
||||
emit_exec_end(
|
||||
ctx,
|
||||
String::new(),
|
||||
(*message).to_string(),
|
||||
(*message).to_string(),
|
||||
-1,
|
||||
Duration::ZERO,
|
||||
message.clone(),
|
||||
ExecCommandInput::new(
|
||||
command,
|
||||
cwd.as_path(),
|
||||
parsed_cmd,
|
||||
*source,
|
||||
interaction_input.as_deref(),
|
||||
),
|
||||
stage,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
@@ -340,26 +282,107 @@ impl ToolEmitter {
|
||||
}
|
||||
}
|
||||
|
||||
async fn emit_exec_end(
|
||||
ctx: ToolEventCtx<'_>,
|
||||
struct ExecCommandInput<'a> {
|
||||
command: &'a [String],
|
||||
cwd: &'a Path,
|
||||
parsed_cmd: &'a [ParsedCommand],
|
||||
source: ExecCommandSource,
|
||||
interaction_input: Option<&'a str>,
|
||||
}
|
||||
|
||||
impl<'a> ExecCommandInput<'a> {
|
||||
fn new(
|
||||
command: &'a [String],
|
||||
cwd: &'a Path,
|
||||
parsed_cmd: &'a [ParsedCommand],
|
||||
source: ExecCommandSource,
|
||||
interaction_input: Option<&'a str>,
|
||||
) -> Self {
|
||||
Self {
|
||||
command,
|
||||
cwd,
|
||||
parsed_cmd,
|
||||
source,
|
||||
interaction_input,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct ExecCommandResult {
|
||||
stdout: String,
|
||||
stderr: String,
|
||||
aggregated_output: String,
|
||||
exit_code: i32,
|
||||
duration: Duration,
|
||||
formatted_output: String,
|
||||
}
|
||||
|
||||
async fn emit_exec_stage(
|
||||
ctx: ToolEventCtx<'_>,
|
||||
exec_input: ExecCommandInput<'_>,
|
||||
stage: ToolEventStage,
|
||||
) {
|
||||
match stage {
|
||||
ToolEventStage::Begin => {
|
||||
emit_exec_command_begin(
|
||||
ctx,
|
||||
exec_input.command,
|
||||
exec_input.cwd,
|
||||
exec_input.parsed_cmd,
|
||||
exec_input.source,
|
||||
exec_input.interaction_input.map(str::to_owned),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
ToolEventStage::Success(output)
|
||||
| ToolEventStage::Failure(ToolEventFailure::Output(output)) => {
|
||||
let exec_result = ExecCommandResult {
|
||||
stdout: output.stdout.text.clone(),
|
||||
stderr: output.stderr.text.clone(),
|
||||
aggregated_output: output.aggregated_output.text.clone(),
|
||||
exit_code: output.exit_code,
|
||||
duration: output.duration,
|
||||
formatted_output: format_exec_output_str(&output),
|
||||
};
|
||||
emit_exec_end(ctx, exec_input, exec_result).await;
|
||||
}
|
||||
ToolEventStage::Failure(ToolEventFailure::Message(message)) => {
|
||||
let text = message.to_string();
|
||||
let exec_result = ExecCommandResult {
|
||||
stdout: String::new(),
|
||||
stderr: text.clone(),
|
||||
aggregated_output: text.clone(),
|
||||
exit_code: -1,
|
||||
duration: Duration::ZERO,
|
||||
formatted_output: text,
|
||||
};
|
||||
emit_exec_end(ctx, exec_input, exec_result).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn emit_exec_end(
|
||||
ctx: ToolEventCtx<'_>,
|
||||
exec_input: ExecCommandInput<'_>,
|
||||
exec_result: ExecCommandResult,
|
||||
) {
|
||||
ctx.session
|
||||
.send_event(
|
||||
ctx.turn,
|
||||
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
|
||||
call_id: ctx.call_id.to_string(),
|
||||
stdout,
|
||||
stderr,
|
||||
aggregated_output,
|
||||
exit_code,
|
||||
duration,
|
||||
formatted_output,
|
||||
turn_id: ctx.turn.sub_id.clone(),
|
||||
command: exec_input.command.to_vec(),
|
||||
cwd: exec_input.cwd.to_path_buf(),
|
||||
parsed_cmd: exec_input.parsed_cmd.to_vec(),
|
||||
source: exec_input.source,
|
||||
interaction_input: exec_input.interaction_input.map(str::to_owned),
|
||||
stdout: exec_result.stdout,
|
||||
stderr: exec_result.stderr,
|
||||
aggregated_output: exec_result.aggregated_output,
|
||||
exit_code: exec_result.exit_code,
|
||||
duration: exec_result.duration,
|
||||
formatted_output: exec_result.formatted_output,
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
|
||||
@@ -3,10 +3,10 @@ use std::collections::BTreeMap;
|
||||
use crate::apply_patch;
|
||||
use crate::apply_patch::InternalApplyPatchInvocation;
|
||||
use crate::apply_patch::convert_apply_patch_to_protocol;
|
||||
use crate::client::FreeformTool;
|
||||
use crate::client::FreeformToolFormat;
|
||||
use crate::client::ResponsesApiTool;
|
||||
use crate::client::ToolSpec;
|
||||
use crate::client_common::tools::FreeformTool;
|
||||
use crate::client_common::tools::FreeformToolFormat;
|
||||
use crate::client_common::tools::ResponsesApiTool;
|
||||
use crate::client_common::tools::ToolSpec;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::tools::context::ToolInvocation;
|
||||
use crate::tools::context::ToolOutput;
|
||||
|
||||
@@ -287,6 +287,8 @@ async fn handle_list_resources(
|
||||
let resources = session
|
||||
.services
|
||||
.mcp_connection_manager
|
||||
.read()
|
||||
.await
|
||||
.list_all_resources()
|
||||
.await;
|
||||
Ok(ListResourcesPayload::from_all_servers(resources))
|
||||
@@ -396,6 +398,8 @@ async fn handle_list_resource_templates(
|
||||
let templates = session
|
||||
.services
|
||||
.mcp_connection_manager
|
||||
.read()
|
||||
.await
|
||||
.list_all_resource_templates()
|
||||
.await;
|
||||
Ok(ListResourceTemplatesPayload::from_all_servers(templates))
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use crate::client::ResponsesApiTool;
|
||||
use crate::client::ToolSpec;
|
||||
use crate::client_common::tools::ResponsesApiTool;
|
||||
use crate::client_common::tools::ToolSpec;
|
||||
use crate::codex::Session;
|
||||
use crate::codex::TurnContext;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
|
||||
@@ -9,6 +9,7 @@ use crate::apply_patch::convert_apply_patch_to_protocol;
|
||||
use crate::codex::TurnContext;
|
||||
use crate::exec::ExecParams;
|
||||
use crate::exec_env::create_env;
|
||||
use crate::exec_policy::approval_requirement_for_command;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::is_safe_command::is_known_safe_command;
|
||||
use crate::protocol::ExecCommandSource;
|
||||
@@ -24,6 +25,7 @@ use crate::tools::runtimes::apply_patch::ApplyPatchRequest;
|
||||
use crate::tools::runtimes::apply_patch::ApplyPatchRuntime;
|
||||
use crate::tools::runtimes::shell::ShellRequest;
|
||||
use crate::tools::runtimes::shell::ShellRuntime;
|
||||
use crate::tools::sandboxing::ApprovalRequirement;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
|
||||
pub struct ShellHandler;
|
||||
@@ -303,6 +305,17 @@ impl ShellHandler {
|
||||
env: exec_params.env.clone(),
|
||||
with_escalated_permissions: exec_params.with_escalated_permissions,
|
||||
justification: exec_params.justification.clone(),
|
||||
approval_requirement: if is_user_shell_command {
|
||||
ApprovalRequirement::Skip
|
||||
} else {
|
||||
approval_requirement_for_command(
|
||||
turn.exec_policy_v2.as_deref(),
|
||||
&exec_params.command,
|
||||
turn.approval_policy,
|
||||
&turn.sandbox_policy,
|
||||
exec_params.with_escalated_permissions.unwrap_or(false),
|
||||
)
|
||||
},
|
||||
};
|
||||
let mut orchestrator = ToolOrchestrator::new();
|
||||
let mut runtime = ShellRuntime::new();
|
||||
|
||||
@@ -11,11 +11,13 @@ use crate::error::get_error_message_ui;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::sandboxing::SandboxManager;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::ApprovalRequirement;
|
||||
use crate::tools::sandboxing::ProvidesSandboxRetryData;
|
||||
use crate::tools::sandboxing::SandboxAttempt;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
use crate::tools::sandboxing::ToolRuntime;
|
||||
use crate::tools::sandboxing::default_approval_requirement;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
|
||||
@@ -49,40 +51,52 @@ impl ToolOrchestrator {
|
||||
let otel_cfg = codex_otel::otel_event_manager::ToolDecisionSource::Config;
|
||||
|
||||
// 1) Approval
|
||||
let needs_initial_approval =
|
||||
tool.wants_initial_approval(req, approval_policy, &turn_ctx.sandbox_policy);
|
||||
let mut already_approved = false;
|
||||
|
||||
if needs_initial_approval {
|
||||
let mut risk = None;
|
||||
|
||||
if let Some(metadata) = req.sandbox_retry_data() {
|
||||
risk = tool_ctx
|
||||
.session
|
||||
.assess_sandbox_command(turn_ctx, &tool_ctx.call_id, &metadata.command, None)
|
||||
.await;
|
||||
let requirement = tool.approval_requirement(req).unwrap_or_else(|| {
|
||||
default_approval_requirement(approval_policy, &turn_ctx.sandbox_policy)
|
||||
});
|
||||
match requirement {
|
||||
ApprovalRequirement::Skip => {
|
||||
otel.tool_decision(otel_tn, otel_ci, ReviewDecision::Approved, otel_cfg);
|
||||
}
|
||||
ApprovalRequirement::Forbidden { reason } => {
|
||||
return Err(ToolError::Rejected(reason));
|
||||
}
|
||||
ApprovalRequirement::NeedsApproval { reason } => {
|
||||
let mut risk = None;
|
||||
|
||||
let approval_ctx = ApprovalCtx {
|
||||
session: tool_ctx.session,
|
||||
turn: turn_ctx,
|
||||
call_id: &tool_ctx.call_id,
|
||||
retry_reason: None,
|
||||
risk,
|
||||
};
|
||||
let decision = tool.start_approval_async(req, approval_ctx).await;
|
||||
|
||||
otel.tool_decision(otel_tn, otel_ci, decision, otel_user.clone());
|
||||
|
||||
match decision {
|
||||
ReviewDecision::Denied | ReviewDecision::Abort => {
|
||||
return Err(ToolError::Rejected("rejected by user".to_string()));
|
||||
if let Some(metadata) = req.sandbox_retry_data() {
|
||||
risk = tool_ctx
|
||||
.session
|
||||
.assess_sandbox_command(
|
||||
turn_ctx,
|
||||
&tool_ctx.call_id,
|
||||
&metadata.command,
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {}
|
||||
|
||||
let approval_ctx = ApprovalCtx {
|
||||
session: tool_ctx.session,
|
||||
turn: turn_ctx,
|
||||
call_id: &tool_ctx.call_id,
|
||||
retry_reason: reason,
|
||||
risk,
|
||||
};
|
||||
let decision = tool.start_approval_async(req, approval_ctx).await;
|
||||
|
||||
otel.tool_decision(otel_tn, otel_ci, decision, otel_user.clone());
|
||||
|
||||
match decision {
|
||||
ReviewDecision::Denied | ReviewDecision::Abort => {
|
||||
return Err(ToolError::Rejected("rejected by user".to_string()));
|
||||
}
|
||||
ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {}
|
||||
}
|
||||
already_approved = true;
|
||||
}
|
||||
already_approved = true;
|
||||
} else {
|
||||
otel.tool_decision(otel_tn, otel_ci, ReviewDecision::Approved, otel_cfg);
|
||||
}
|
||||
|
||||
// 2) First attempt under the selected sandbox.
|
||||
|
||||
@@ -2,7 +2,7 @@ use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::client::ToolSpec;
|
||||
use crate::client_common::tools::ToolSpec;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::tools::context::ToolInvocation;
|
||||
use crate::tools::context::ToolOutput;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::client::ToolSpec;
|
||||
use crate::client_common::tools::ToolSpec;
|
||||
use crate::codex::Session;
|
||||
use crate::codex::TurnContext;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
@@ -54,7 +54,7 @@ impl ToolRouter {
|
||||
.any(|config| config.spec.name() == tool_name)
|
||||
}
|
||||
|
||||
pub fn build_tool_call(
|
||||
pub async fn build_tool_call(
|
||||
session: &Session,
|
||||
item: ResponseItem,
|
||||
) -> Result<Option<ToolCall>, FunctionCallError> {
|
||||
@@ -65,7 +65,7 @@ impl ToolRouter {
|
||||
call_id,
|
||||
..
|
||||
} => {
|
||||
if let Some((server, tool)) = session.parse_mcp_tool_name(&name) {
|
||||
if let Some((server, tool)) = session.parse_mcp_tool_name(&name).await {
|
||||
Ok(Some(ToolCall {
|
||||
tool_name: name,
|
||||
call_id,
|
||||
|
||||
@@ -4,13 +4,12 @@ Runtime: shell
|
||||
Executes shell requests under the orchestrator: asks for approval when needed,
|
||||
builds a CommandSpec, and runs it under the current SandboxAttempt.
|
||||
*/
|
||||
use crate::command_safety::is_dangerous_command::requires_initial_appoval;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use crate::sandboxing::execute_env;
|
||||
use crate::tools::runtimes::build_command_spec;
|
||||
use crate::tools::sandboxing::Approvable;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::ApprovalRequirement;
|
||||
use crate::tools::sandboxing::ProvidesSandboxRetryData;
|
||||
use crate::tools::sandboxing::SandboxAttempt;
|
||||
use crate::tools::sandboxing::SandboxRetryData;
|
||||
@@ -20,7 +19,6 @@ use crate::tools::sandboxing::ToolCtx;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
use crate::tools::sandboxing::ToolRuntime;
|
||||
use crate::tools::sandboxing::with_cached_approval;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use futures::future::BoxFuture;
|
||||
use std::path::PathBuf;
|
||||
@@ -33,6 +31,7 @@ pub struct ShellRequest {
|
||||
pub env: std::collections::HashMap<String, String>,
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
pub justification: Option<String>,
|
||||
pub approval_requirement: ApprovalRequirement,
|
||||
}
|
||||
|
||||
impl ProvidesSandboxRetryData for ShellRequest {
|
||||
@@ -114,18 +113,8 @@ impl Approvable<ShellRequest> for ShellRuntime {
|
||||
})
|
||||
}
|
||||
|
||||
fn wants_initial_approval(
|
||||
&self,
|
||||
req: &ShellRequest,
|
||||
policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
) -> bool {
|
||||
requires_initial_appoval(
|
||||
policy,
|
||||
sandbox_policy,
|
||||
&req.command,
|
||||
req.with_escalated_permissions.unwrap_or(false),
|
||||
)
|
||||
fn approval_requirement(&self, req: &ShellRequest) -> Option<ApprovalRequirement> {
|
||||
Some(req.approval_requirement.clone())
|
||||
}
|
||||
|
||||
fn wants_escalated_first_attempt(&self, req: &ShellRequest) -> bool {
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
use crate::command_safety::is_dangerous_command::requires_initial_appoval;
|
||||
/*
|
||||
Runtime: unified exec
|
||||
|
||||
@@ -10,6 +9,7 @@ use crate::error::SandboxErr;
|
||||
use crate::tools::runtimes::build_command_spec;
|
||||
use crate::tools::sandboxing::Approvable;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::ApprovalRequirement;
|
||||
use crate::tools::sandboxing::ProvidesSandboxRetryData;
|
||||
use crate::tools::sandboxing::SandboxAttempt;
|
||||
use crate::tools::sandboxing::SandboxRetryData;
|
||||
@@ -22,9 +22,7 @@ use crate::tools::sandboxing::with_cached_approval;
|
||||
use crate::unified_exec::UnifiedExecError;
|
||||
use crate::unified_exec::UnifiedExecSession;
|
||||
use crate::unified_exec::UnifiedExecSessionManager;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use futures::future::BoxFuture;
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
@@ -36,6 +34,7 @@ pub struct UnifiedExecRequest {
|
||||
pub env: HashMap<String, String>,
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
pub justification: Option<String>,
|
||||
pub approval_requirement: ApprovalRequirement,
|
||||
}
|
||||
|
||||
impl ProvidesSandboxRetryData for UnifiedExecRequest {
|
||||
@@ -65,6 +64,7 @@ impl UnifiedExecRequest {
|
||||
env: HashMap<String, String>,
|
||||
with_escalated_permissions: Option<bool>,
|
||||
justification: Option<String>,
|
||||
approval_requirement: ApprovalRequirement,
|
||||
) -> Self {
|
||||
Self {
|
||||
command,
|
||||
@@ -72,6 +72,7 @@ impl UnifiedExecRequest {
|
||||
env,
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
approval_requirement,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -129,18 +130,8 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
|
||||
})
|
||||
}
|
||||
|
||||
fn wants_initial_approval(
|
||||
&self,
|
||||
req: &UnifiedExecRequest,
|
||||
policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
) -> bool {
|
||||
requires_initial_appoval(
|
||||
policy,
|
||||
sandbox_policy,
|
||||
&req.command,
|
||||
req.with_escalated_permissions.unwrap_or(false),
|
||||
)
|
||||
fn approval_requirement(&self, req: &UnifiedExecRequest) -> Option<ApprovalRequirement> {
|
||||
Some(req.approval_requirement.clone())
|
||||
}
|
||||
|
||||
fn wants_escalated_first_attempt(&self, req: &UnifiedExecRequest) -> bool {
|
||||
|
||||
@@ -86,6 +86,34 @@ pub(crate) struct ApprovalCtx<'a> {
|
||||
pub risk: Option<SandboxCommandAssessment>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) enum ApprovalRequirement {
|
||||
Skip,
|
||||
NeedsApproval { reason: Option<String> },
|
||||
Forbidden { reason: String },
|
||||
}
|
||||
|
||||
/// Reflects the orchestrator's behavior (pre-refactor):
|
||||
/// - Never, OnFailure: do not ask
|
||||
/// - OnRequest: ask unless sandbox policy is DangerFullAccess
|
||||
/// - UnlessTrusted: always ask
|
||||
pub(crate) fn default_approval_requirement(
|
||||
policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
) -> ApprovalRequirement {
|
||||
let needs_approval = match policy {
|
||||
AskForApproval::Never | AskForApproval::OnFailure => false,
|
||||
AskForApproval::OnRequest => !matches!(sandbox_policy, SandboxPolicy::DangerFullAccess),
|
||||
AskForApproval::UnlessTrusted => true,
|
||||
};
|
||||
|
||||
if needs_approval {
|
||||
ApprovalRequirement::NeedsApproval { reason: None }
|
||||
} else {
|
||||
ApprovalRequirement::Skip
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) trait Approvable<Req> {
|
||||
type ApprovalKey: Hash + Eq + Clone + Debug + Serialize;
|
||||
|
||||
@@ -106,22 +134,11 @@ pub(crate) trait Approvable<Req> {
|
||||
matches!(policy, AskForApproval::Never)
|
||||
}
|
||||
|
||||
/// Decide whether an initial user approval should be requested before the
|
||||
/// first attempt. Defaults to the orchestrator's behavior (pre‑refactor):
|
||||
/// - Never, OnFailure: do not ask
|
||||
/// - OnRequest: ask unless sandbox policy is DangerFullAccess
|
||||
/// - UnlessTrusted: always ask
|
||||
fn wants_initial_approval(
|
||||
&self,
|
||||
_req: &Req,
|
||||
policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
) -> bool {
|
||||
match policy {
|
||||
AskForApproval::Never | AskForApproval::OnFailure => false,
|
||||
AskForApproval::OnRequest => !matches!(sandbox_policy, SandboxPolicy::DangerFullAccess),
|
||||
AskForApproval::UnlessTrusted => true,
|
||||
}
|
||||
/// Override the default approval requirement. Return `Some(_)` to specify
|
||||
/// a custom requirement, or `None` to fall back to
|
||||
/// policy-based default.
|
||||
fn approval_requirement(&self, _req: &Req) -> Option<ApprovalRequirement> {
|
||||
None
|
||||
}
|
||||
|
||||
/// Decide we can request an approval for no-sandbox execution.
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use crate::client::ResponsesApiTool;
|
||||
use crate::client::ToolSpec;
|
||||
use crate::client_common::tools::ResponsesApiTool;
|
||||
use crate::client_common::tools::ToolSpec;
|
||||
use crate::features::Feature;
|
||||
use crate::features::Features;
|
||||
use crate::model_family::ModelFamily;
|
||||
@@ -20,6 +20,11 @@ pub enum ConfigShellToolType {
|
||||
Default,
|
||||
Local,
|
||||
UnifiedExec,
|
||||
/// Do not include a shell tool by default. Useful when using Codex
|
||||
/// with tools provided exclusively provided by MCP servers. Often used
|
||||
/// with `--config base_instructions=CUSTOM_INSTRUCTIONS`
|
||||
/// to customize agent behavior.
|
||||
Disabled,
|
||||
/// Takes a command as a single string to be run in the user's default shell.
|
||||
ShellCommand,
|
||||
}
|
||||
@@ -48,7 +53,9 @@ impl ToolsConfig {
|
||||
let include_web_search_request = features.enabled(Feature::WebSearchRequest);
|
||||
let include_view_image_tool = features.enabled(Feature::ViewImageTool);
|
||||
|
||||
let shell_type = if features.enabled(Feature::UnifiedExec) {
|
||||
let shell_type = if !features.enabled(Feature::ShellTool) {
|
||||
ConfigShellToolType::Disabled
|
||||
} else if features.enabled(Feature::UnifiedExec) {
|
||||
ConfigShellToolType::UnifiedExec
|
||||
} else if features.enabled(Feature::ShellCommandTool) {
|
||||
ConfigShellToolType::ShellCommand
|
||||
@@ -294,9 +301,26 @@ fn create_shell_tool() -> ToolSpec {
|
||||
},
|
||||
);
|
||||
|
||||
let description = if cfg!(windows) {
|
||||
r#"Runs a Powershell command (Windows) and returns its output. Arguments to `shell` will be passed to CreateProcessW(). Most commands should be prefixed with ["powershell.exe", "-Command"].
|
||||
|
||||
Examples of valid command strings:
|
||||
|
||||
- ls -a (show hidden): ["powershell.exe", "-Command", "Get-ChildItem -Force"]
|
||||
- recursive find by name: ["powershell.exe", "-Command", "Get-ChildItem -Recurse -Filter *.py"]
|
||||
- recursive grep: ["powershell.exe", "-Command", "Get-ChildItem -Path C:\\myrepo -Recurse | Select-String -Pattern 'TODO' -CaseSensitive"]
|
||||
- ps aux | grep python: ["powershell.exe", "-Command", "Get-Process | Where-Object { $_.ProcessName -like '*python*' }"]
|
||||
- setting an env var: ["powershell.exe", "-Command", "$env:FOO='bar'; echo $env:FOO"]
|
||||
- running an inline Python script: ["powershell.exe", "-Command", "@'\\nprint('Hello, world!')\\n'@ | python -"]"#
|
||||
} else {
|
||||
r#"Runs a shell command and returns its output.
|
||||
- The arguments to `shell` will be passed to execvp(). Most terminal commands should be prefixed with ["bash", "-lc"].
|
||||
- Always set the `workdir` param when using the shell function. Do not use `cd` unless absolutely necessary."#
|
||||
}.to_string();
|
||||
|
||||
ToolSpec::Function(ResponsesApiTool {
|
||||
name: "shell".to_string(),
|
||||
description: "Runs a shell command and returns its output.".to_string(),
|
||||
description,
|
||||
strict: false,
|
||||
parameters: JsonSchema::Object {
|
||||
properties,
|
||||
@@ -341,9 +365,25 @@ fn create_shell_command_tool() -> ToolSpec {
|
||||
},
|
||||
);
|
||||
|
||||
let description = if cfg!(windows) {
|
||||
r#"Runs a Powershell command (Windows) and returns its output.
|
||||
|
||||
Examples of valid command strings:
|
||||
|
||||
- ls -a (show hidden): "Get-ChildItem -Force"
|
||||
- recursive find by name: "Get-ChildItem -Recurse -Filter *.py"
|
||||
- recursive grep: "Get-ChildItem -Path C:\\myrepo -Recurse | Select-String -Pattern 'TODO' -CaseSensitive"
|
||||
- ps aux | grep python: "Get-Process | Where-Object { $_.ProcessName -like '*python*' }"
|
||||
- setting an env var: "$env:FOO='bar'; echo $env:FOO"
|
||||
- running an inline Python script: "@'\\nprint('Hello, world!')\\n'@ | python -"#
|
||||
} else {
|
||||
r#"Runs a shell command and returns its output.
|
||||
- Always set the `workdir` param when using the shell_command function. Do not use `cd` unless absolutely necessary."#
|
||||
}.to_string();
|
||||
|
||||
ToolSpec::Function(ResponsesApiTool {
|
||||
name: "shell_command".to_string(),
|
||||
description: "Runs a shell command string and returns its output.".to_string(),
|
||||
description,
|
||||
strict: false,
|
||||
parameters: JsonSchema::Object {
|
||||
properties,
|
||||
@@ -973,16 +1013,21 @@ pub(crate) fn build_specs(
|
||||
builder.register_handler("exec_command", unified_exec_handler.clone());
|
||||
builder.register_handler("write_stdin", unified_exec_handler);
|
||||
}
|
||||
ConfigShellToolType::Disabled => {
|
||||
// Do nothing.
|
||||
}
|
||||
ConfigShellToolType::ShellCommand => {
|
||||
builder.push_spec(create_shell_command_tool());
|
||||
}
|
||||
}
|
||||
|
||||
// Always register shell aliases so older prompts remain compatible.
|
||||
builder.register_handler("shell", shell_handler.clone());
|
||||
builder.register_handler("container.exec", shell_handler.clone());
|
||||
builder.register_handler("local_shell", shell_handler);
|
||||
builder.register_handler("shell_command", shell_command_handler);
|
||||
if config.shell_type != ConfigShellToolType::Disabled {
|
||||
// Always register shell aliases so older prompts remain compatible.
|
||||
builder.register_handler("shell", shell_handler.clone());
|
||||
builder.register_handler("container.exec", shell_handler.clone());
|
||||
builder.register_handler("local_shell", shell_handler);
|
||||
builder.register_handler("shell_command", shell_command_handler);
|
||||
}
|
||||
|
||||
builder.push_spec_with_parallel_support(create_list_mcp_resources_tool(), true);
|
||||
builder.push_spec_with_parallel_support(create_list_mcp_resource_templates_tool(), true);
|
||||
@@ -1074,7 +1119,7 @@ pub(crate) fn build_specs(
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::client::FreeformTool;
|
||||
use crate::client_common::tools::FreeformTool;
|
||||
use crate::model_family::find_family_for_model;
|
||||
use crate::tools::registry::ConfiguredToolSpec;
|
||||
use mcp_types::ToolInputSchema;
|
||||
@@ -1118,6 +1163,7 @@ mod tests {
|
||||
ConfigShellToolType::Default => Some("shell"),
|
||||
ConfigShellToolType::Local => Some("local_shell"),
|
||||
ConfigShellToolType::UnifiedExec => None,
|
||||
ConfigShellToolType::Disabled => None,
|
||||
ConfigShellToolType::ShellCommand => Some("shell_command"),
|
||||
}
|
||||
}
|
||||
@@ -1873,8 +1919,23 @@ mod tests {
|
||||
};
|
||||
assert_eq!(name, "shell");
|
||||
|
||||
let expected = "Runs a shell command and returns its output.";
|
||||
assert_eq!(description, expected);
|
||||
let expected = if cfg!(windows) {
|
||||
r#"Runs a Powershell command (Windows) and returns its output. Arguments to `shell` will be passed to CreateProcessW(). Most commands should be prefixed with ["powershell.exe", "-Command"].
|
||||
|
||||
Examples of valid command strings:
|
||||
|
||||
- ls -a (show hidden): ["powershell.exe", "-Command", "Get-ChildItem -Force"]
|
||||
- recursive find by name: ["powershell.exe", "-Command", "Get-ChildItem -Recurse -Filter *.py"]
|
||||
- recursive grep: ["powershell.exe", "-Command", "Get-ChildItem -Path C:\\myrepo -Recurse | Select-String -Pattern 'TODO' -CaseSensitive"]
|
||||
- ps aux | grep python: ["powershell.exe", "-Command", "Get-Process | Where-Object { $_.ProcessName -like '*python*' }"]
|
||||
- setting an env var: ["powershell.exe", "-Command", "$env:FOO='bar'; echo $env:FOO"]
|
||||
- running an inline Python script: ["powershell.exe", "-Command", "@'\\nprint('Hello, world!')\\n'@ | python -"]"#
|
||||
} else {
|
||||
r#"Runs a shell command and returns its output.
|
||||
- The arguments to `shell` will be passed to execvp(). Most terminal commands should be prefixed with ["bash", "-lc"].
|
||||
- Always set the `workdir` param when using the shell function. Do not use `cd` unless absolutely necessary."#
|
||||
}.to_string();
|
||||
assert_eq!(description, &expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -1888,8 +1949,22 @@ mod tests {
|
||||
};
|
||||
assert_eq!(name, "shell_command");
|
||||
|
||||
let expected = "Runs a shell command string and returns its output.";
|
||||
assert_eq!(description, expected);
|
||||
let expected = if cfg!(windows) {
|
||||
r#"Runs a Powershell command (Windows) and returns its output.
|
||||
|
||||
Examples of valid command strings:
|
||||
|
||||
- ls -a (show hidden): "Get-ChildItem -Force"
|
||||
- recursive find by name: "Get-ChildItem -Recurse -Filter *.py"
|
||||
- recursive grep: "Get-ChildItem -Path C:\\myrepo -Recurse | Select-String -Pattern 'TODO' -CaseSensitive"
|
||||
- ps aux | grep python: "Get-Process | Where-Object { $_.ProcessName -like '*python*' }"
|
||||
- setting an env var: "$env:FOO='bar'; echo $env:FOO"
|
||||
- running an inline Python script: "@'\\nprint('Hello, world!')\\n'@ | python -"#.to_string()
|
||||
} else {
|
||||
r#"Runs a shell command and returns its output.
|
||||
- Always set the `workdir` param when using the shell_command function. Do not use `cd` unless absolutely necessary."#.to_string()
|
||||
};
|
||||
assert_eq!(description, &expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -11,6 +11,7 @@ use crate::codex::TurnContext;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::exec::StreamOutput;
|
||||
use crate::exec_env::create_env;
|
||||
use crate::exec_policy::approval_requirement_for_command;
|
||||
use crate::protocol::BackgroundEventEvent;
|
||||
use crate::protocol::EventMsg;
|
||||
use crate::protocol::ExecCommandSource;
|
||||
@@ -444,6 +445,13 @@ impl UnifiedExecSessionManager {
|
||||
create_env(&context.turn.shell_environment_policy),
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
approval_requirement_for_command(
|
||||
context.turn.exec_policy_v2.as_deref(),
|
||||
command,
|
||||
context.turn.approval_policy,
|
||||
&context.turn.sandbox_policy,
|
||||
with_escalated_permissions.unwrap_or(false),
|
||||
),
|
||||
);
|
||||
let tool_ctx = ToolCtx {
|
||||
session: context.session.as_ref(),
|
||||
|
||||
@@ -38,6 +38,15 @@ pub enum ApplyPatchModelOutput {
|
||||
ShellViaHeredoc,
|
||||
}
|
||||
|
||||
/// A collection of different ways the model can output an apply_patch call
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
pub enum ShellModelOutput {
|
||||
Shell,
|
||||
ShellCommand,
|
||||
LocalShell,
|
||||
// UnifiedExec has its own set of tests
|
||||
}
|
||||
|
||||
pub struct TestCodexBuilder {
|
||||
config_mutators: Vec<Box<ConfigMutator>>,
|
||||
}
|
||||
|
||||
@@ -29,7 +29,7 @@ use core_test_support::wait_for_event;
|
||||
use serde_json::json;
|
||||
use test_case::test_case;
|
||||
|
||||
async fn apply_patch_harness() -> Result<TestCodexHarness> {
|
||||
pub async fn apply_patch_harness() -> Result<TestCodexHarness> {
|
||||
apply_patch_harness_with(|_| {}).await
|
||||
}
|
||||
|
||||
@@ -43,7 +43,7 @@ async fn apply_patch_harness_with(
|
||||
.await
|
||||
}
|
||||
|
||||
async fn mount_apply_patch(
|
||||
pub async fn mount_apply_patch(
|
||||
harness: &TestCodexHarness,
|
||||
call_id: &str,
|
||||
patch: &str,
|
||||
@@ -87,8 +87,8 @@ async fn apply_patch_cli_multiple_operations_integration(
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = apply_patch_harness_with(|config| {
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is valid");
|
||||
config.model = "gpt-5.1".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is valid");
|
||||
})
|
||||
.await?;
|
||||
|
||||
@@ -671,8 +671,8 @@ async fn apply_patch_shell_heredoc_with_cd_updates_relative_workdir() -> Result<
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = apply_patch_harness_with(|config| {
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is valid");
|
||||
config.model = "gpt-5.1".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is valid");
|
||||
})
|
||||
.await?;
|
||||
|
||||
@@ -717,8 +717,8 @@ async fn apply_patch_shell_failure_propagates_error_and_skips_diff() -> Result<(
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = apply_patch_harness_with(|config| {
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is valid");
|
||||
config.model = "gpt-5.1".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is valid");
|
||||
})
|
||||
.await?;
|
||||
let test = harness.test();
|
||||
|
||||
@@ -240,6 +240,10 @@ enum Expectation {
|
||||
target: TargetPath,
|
||||
content: &'static str,
|
||||
},
|
||||
FileCreatedNoExitCode {
|
||||
target: TargetPath,
|
||||
content: &'static str,
|
||||
},
|
||||
PatchApplied {
|
||||
target: TargetPath,
|
||||
content: &'static str,
|
||||
@@ -251,12 +255,18 @@ enum Expectation {
|
||||
NetworkSuccess {
|
||||
body_contains: &'static str,
|
||||
},
|
||||
NetworkSuccessNoExitCode {
|
||||
body_contains: &'static str,
|
||||
},
|
||||
NetworkFailure {
|
||||
expect_tag: &'static str,
|
||||
},
|
||||
CommandSuccess {
|
||||
stdout_contains: &'static str,
|
||||
},
|
||||
CommandSuccessNoExitCode {
|
||||
stdout_contains: &'static str,
|
||||
},
|
||||
CommandFailure {
|
||||
output_contains: &'static str,
|
||||
},
|
||||
@@ -270,8 +280,7 @@ impl Expectation {
|
||||
assert_eq!(
|
||||
result.exit_code,
|
||||
Some(0),
|
||||
"expected successful exit for {:?}",
|
||||
path
|
||||
"expected successful exit for {path:?}"
|
||||
);
|
||||
assert!(
|
||||
result.stdout.contains(content),
|
||||
@@ -285,6 +294,21 @@ impl Expectation {
|
||||
);
|
||||
let _ = fs::remove_file(path);
|
||||
}
|
||||
Expectation::FileCreatedNoExitCode { target, content } => {
|
||||
let (path, _) = target.resolve_for_patch(test);
|
||||
assert_eq!(result.exit_code, None, "expected no exit code for {path:?}");
|
||||
assert!(
|
||||
result.stdout.contains(content),
|
||||
"stdout missing {content:?}: {}",
|
||||
result.stdout
|
||||
);
|
||||
let file_contents = fs::read_to_string(&path)?;
|
||||
assert!(
|
||||
file_contents.contains(content),
|
||||
"file contents missing {content:?}: {file_contents}"
|
||||
);
|
||||
let _ = fs::remove_file(path);
|
||||
}
|
||||
Expectation::PatchApplied { target, content } => {
|
||||
let (path, _) = target.resolve_for_patch(test);
|
||||
match result.exit_code {
|
||||
@@ -360,6 +384,23 @@ impl Expectation {
|
||||
result.stdout
|
||||
);
|
||||
}
|
||||
Expectation::NetworkSuccessNoExitCode { body_contains } => {
|
||||
assert_eq!(
|
||||
result.exit_code, None,
|
||||
"expected no exit code for successful network call: {}",
|
||||
result.stdout
|
||||
);
|
||||
assert!(
|
||||
result.stdout.contains("OK:"),
|
||||
"stdout missing OK prefix: {}",
|
||||
result.stdout
|
||||
);
|
||||
assert!(
|
||||
result.stdout.contains(body_contains),
|
||||
"stdout missing body text {body_contains:?}: {}",
|
||||
result.stdout
|
||||
);
|
||||
}
|
||||
Expectation::NetworkFailure { expect_tag } => {
|
||||
assert_ne!(
|
||||
result.exit_code,
|
||||
@@ -391,6 +432,18 @@ impl Expectation {
|
||||
result.stdout
|
||||
);
|
||||
}
|
||||
Expectation::CommandSuccessNoExitCode { stdout_contains } => {
|
||||
assert_eq!(
|
||||
result.exit_code, None,
|
||||
"expected no exit code for trusted command: {}",
|
||||
result.stdout
|
||||
);
|
||||
assert!(
|
||||
result.stdout.contains(stdout_contains),
|
||||
"trusted command stdout missing {stdout_contains:?}: {}",
|
||||
result.stdout
|
||||
);
|
||||
}
|
||||
Expectation::CommandFailure { output_contains } => {
|
||||
assert_ne!(
|
||||
result.exit_code,
|
||||
@@ -588,13 +641,30 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileCreated {
|
||||
target: TargetPath::OutsideWorkspace("dfa_on_request.txt"),
|
||||
content: "danger-on-request",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "danger_full_access_on_request_allows_outside_write_gpt_5_1_no_exit",
|
||||
approval_policy: OnRequest,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
action: ActionKind::WriteFile {
|
||||
target: TargetPath::OutsideWorkspace("dfa_on_request_5_1.txt"),
|
||||
content: "danger-on-request",
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileCreatedNoExitCode {
|
||||
target: TargetPath::OutsideWorkspace("dfa_on_request_5_1.txt"),
|
||||
content: "danger-on-request",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "danger_full_access_on_request_allows_network",
|
||||
approval_policy: OnRequest,
|
||||
@@ -605,12 +675,28 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::NetworkSuccess {
|
||||
body_contains: "danger-network-ok",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "danger_full_access_on_request_allows_network_gpt_5_1_no_exit",
|
||||
approval_policy: OnRequest,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
action: ActionKind::FetchUrl {
|
||||
endpoint: "/dfa/network",
|
||||
response_body: "danger-network-ok",
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::NetworkSuccessNoExitCode {
|
||||
body_contains: "danger-network-ok",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "trusted_command_unless_trusted_runs_without_prompt",
|
||||
approval_policy: UnlessTrusted,
|
||||
@@ -620,12 +706,27 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::CommandSuccess {
|
||||
stdout_contains: "trusted-unless",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "trusted_command_unless_trusted_runs_without_prompt_gpt_5_1_no_exit",
|
||||
approval_policy: UnlessTrusted,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
action: ActionKind::RunCommand {
|
||||
command: &["echo", "trusted-unless"],
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::CommandSuccessNoExitCode {
|
||||
stdout_contains: "trusted-unless",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "danger_full_access_on_failure_allows_outside_write",
|
||||
approval_policy: OnFailure,
|
||||
@@ -636,13 +737,30 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileCreated {
|
||||
target: TargetPath::OutsideWorkspace("dfa_on_failure.txt"),
|
||||
content: "danger-on-failure",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "danger_full_access_on_failure_allows_outside_write_gpt_5_1_no_exit",
|
||||
approval_policy: OnFailure,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
action: ActionKind::WriteFile {
|
||||
target: TargetPath::OutsideWorkspace("dfa_on_failure_5_1.txt"),
|
||||
content: "danger-on-failure",
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileCreatedNoExitCode {
|
||||
target: TargetPath::OutsideWorkspace("dfa_on_failure_5_1.txt"),
|
||||
content: "danger-on-failure",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "danger_full_access_unless_trusted_requests_approval",
|
||||
approval_policy: UnlessTrusted,
|
||||
@@ -653,7 +771,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -663,6 +781,26 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
content: "danger-unless-trusted",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "danger_full_access_unless_trusted_requests_approval_gpt_5_1_no_exit",
|
||||
approval_policy: UnlessTrusted,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
action: ActionKind::WriteFile {
|
||||
target: TargetPath::OutsideWorkspace("dfa_unless_trusted_5_1.txt"),
|
||||
content: "danger-unless-trusted",
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
},
|
||||
expectation: Expectation::FileCreatedNoExitCode {
|
||||
target: TargetPath::OutsideWorkspace("dfa_unless_trusted_5_1.txt"),
|
||||
content: "danger-unless-trusted",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "danger_full_access_never_allows_outside_write",
|
||||
approval_policy: Never,
|
||||
@@ -673,13 +811,30 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileCreated {
|
||||
target: TargetPath::OutsideWorkspace("dfa_never.txt"),
|
||||
content: "danger-never",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "danger_full_access_never_allows_outside_write_gpt_5_1_no_exit",
|
||||
approval_policy: Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
action: ActionKind::WriteFile {
|
||||
target: TargetPath::OutsideWorkspace("dfa_never_5_1.txt"),
|
||||
content: "danger-never",
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileCreatedNoExitCode {
|
||||
target: TargetPath::OutsideWorkspace("dfa_never_5_1.txt"),
|
||||
content: "danger-never",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "read_only_on_request_requires_approval",
|
||||
approval_policy: OnRequest,
|
||||
@@ -690,7 +845,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: true,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -700,6 +855,26 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
content: "read-only-approval",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "read_only_on_request_requires_approval_gpt_5_1_no_exit",
|
||||
approval_policy: OnRequest,
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
action: ActionKind::WriteFile {
|
||||
target: TargetPath::Workspace("ro_on_request_5_1.txt"),
|
||||
content: "read-only-approval",
|
||||
},
|
||||
with_escalated_permissions: true,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
},
|
||||
expectation: Expectation::FileCreatedNoExitCode {
|
||||
target: TargetPath::Workspace("ro_on_request_5_1.txt"),
|
||||
content: "read-only-approval",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "trusted_command_on_request_read_only_runs_without_prompt",
|
||||
approval_policy: OnRequest,
|
||||
@@ -709,12 +884,27 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::CommandSuccess {
|
||||
stdout_contains: "trusted-read-only",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "trusted_command_on_request_read_only_runs_without_prompt_gpt_5_1_no_exit",
|
||||
approval_policy: OnRequest,
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
action: ActionKind::RunCommand {
|
||||
command: &["echo", "trusted-read-only"],
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::CommandSuccessNoExitCode {
|
||||
stdout_contains: "trusted-read-only",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "read_only_on_request_blocks_network",
|
||||
approval_policy: OnRequest,
|
||||
@@ -760,7 +950,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: Some("command failed; retry without sandbox?"),
|
||||
@@ -770,6 +960,27 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
content: "read-only-on-failure",
|
||||
},
|
||||
},
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
ScenarioSpec {
|
||||
name: "read_only_on_failure_escalates_after_sandbox_error_gpt_5_1_no_exit",
|
||||
approval_policy: OnFailure,
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
action: ActionKind::WriteFile {
|
||||
target: TargetPath::Workspace("ro_on_failure_5_1.txt"),
|
||||
content: "read-only-on-failure",
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: Some("command failed; retry without sandbox?"),
|
||||
},
|
||||
expectation: Expectation::FileCreatedNoExitCode {
|
||||
target: TargetPath::Workspace("ro_on_failure_5_1.txt"),
|
||||
content: "read-only-on-failure",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "read_only_on_request_network_escalates_when_approved",
|
||||
approval_policy: OnRequest,
|
||||
@@ -780,7 +991,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: true,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -789,6 +1000,25 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
body_contains: "read-only-network-ok",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "read_only_on_request_network_escalates_when_approved_gpt_5_1_no_exit",
|
||||
approval_policy: OnRequest,
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
action: ActionKind::FetchUrl {
|
||||
endpoint: "/ro/network-approved",
|
||||
response_body: "read-only-network-ok",
|
||||
},
|
||||
with_escalated_permissions: true,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
},
|
||||
expectation: Expectation::NetworkSuccessNoExitCode {
|
||||
body_contains: "read-only-network-ok",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "apply_patch_shell_requires_patch_approval",
|
||||
approval_policy: UnlessTrusted,
|
||||
@@ -819,7 +1049,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5-codex"),
|
||||
model_override: Some("gpt-5.1-codex"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::PatchApplied {
|
||||
target: TargetPath::Workspace("apply_patch_function.txt"),
|
||||
@@ -836,7 +1066,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![Feature::ApplyPatchFreeform],
|
||||
model_override: Some("gpt-5-codex"),
|
||||
model_override: Some("gpt-5.1-codex"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::PatchApplied {
|
||||
target: TargetPath::OutsideWorkspace("apply_patch_function_danger.txt"),
|
||||
@@ -853,7 +1083,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5-codex"),
|
||||
model_override: Some("gpt-5.1-codex"),
|
||||
outcome: Outcome::PatchApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -873,7 +1103,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5-codex"),
|
||||
model_override: Some("gpt-5.1-codex"),
|
||||
outcome: Outcome::PatchApproval {
|
||||
decision: ReviewDecision::Denied,
|
||||
expected_reason: None,
|
||||
@@ -913,7 +1143,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5-codex"),
|
||||
model_override: Some("gpt-5.1-codex"),
|
||||
outcome: Outcome::PatchApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -933,7 +1163,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5-codex"),
|
||||
model_override: Some("gpt-5.1-codex"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileNotCreated {
|
||||
target: TargetPath::OutsideWorkspace("apply_patch_function_never.txt"),
|
||||
@@ -952,7 +1182,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -962,6 +1192,26 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
content: "read-only-unless-trusted",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "read_only_unless_trusted_requires_approval_gpt_5_1_no_exit",
|
||||
approval_policy: UnlessTrusted,
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
action: ActionKind::WriteFile {
|
||||
target: TargetPath::Workspace("ro_unless_trusted_5_1.txt"),
|
||||
content: "read-only-unless-trusted",
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: Some("gpt-5.1"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
},
|
||||
expectation: Expectation::FileCreatedNoExitCode {
|
||||
target: TargetPath::Workspace("ro_unless_trusted_5_1.txt"),
|
||||
content: "read-only-unless-trusted",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "read_only_never_reports_sandbox_failure",
|
||||
approval_policy: Never,
|
||||
@@ -992,7 +1242,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::CommandSuccess {
|
||||
stdout_contains: "trusted-never",
|
||||
@@ -1008,7 +1258,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::FileCreated {
|
||||
target: TargetPath::Workspace("ww_on_request.txt"),
|
||||
@@ -1039,7 +1289,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: true,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -1059,7 +1309,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::NetworkSuccess {
|
||||
body_contains: "workspace-network-ok",
|
||||
@@ -1076,7 +1326,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: Some("command failed; retry without sandbox?"),
|
||||
@@ -1096,7 +1346,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: None,
|
||||
@@ -1137,7 +1387,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![Feature::UnifiedExec],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::Auto,
|
||||
expectation: Expectation::CommandSuccess {
|
||||
stdout_contains: "hello unified exec",
|
||||
@@ -1155,7 +1405,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
},
|
||||
with_escalated_permissions: true,
|
||||
features: vec![Feature::UnifiedExec],
|
||||
model_override: None,
|
||||
model_override: Some("gpt-5"),
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: Some(DEFAULT_UNIFIED_EXEC_JUSTIFICATION),
|
||||
@@ -1208,7 +1458,7 @@ async fn run_scenario(scenario: &ScenarioSpec) -> Result<()> {
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.approval_policy = approval_policy;
|
||||
config.sandbox_policy = sandbox_policy.clone();
|
||||
let model = model_override.unwrap_or("gpt-5");
|
||||
let model = model_override.unwrap_or("gpt-5.1");
|
||||
config.model = model.to_string();
|
||||
config.model_family =
|
||||
find_family_for_model(model).expect("model should map to a known family");
|
||||
|
||||
@@ -769,7 +769,7 @@ async fn configured_verbosity_not_sent_for_models_without_support() -> anyhow::R
|
||||
|
||||
let resp_mock = responses::mount_sse_once(&server, sse_completed("resp1")).await;
|
||||
let TestCodex { codex, .. } = test_codex()
|
||||
.with_model("gpt-5-codex")
|
||||
.with_model("gpt-5.1-codex")
|
||||
.with_config(|config| {
|
||||
config.model_verbosity = Some(Verbosity::High);
|
||||
})
|
||||
@@ -807,7 +807,7 @@ async fn configured_verbosity_is_sent() -> anyhow::Result<()> {
|
||||
|
||||
let resp_mock = responses::mount_sse_once(&server, sse_completed("resp1")).await;
|
||||
let TestCodex { codex, .. } = test_codex()
|
||||
.with_model("gpt-5")
|
||||
.with_model("gpt-5.1")
|
||||
.with_config(|config| {
|
||||
config.model_verbosity = Some(Verbosity::High);
|
||||
})
|
||||
@@ -1155,7 +1155,7 @@ async fn token_count_includes_rate_limits_snapshot() {
|
||||
"reasoning_output_tokens": 0,
|
||||
"total_tokens": 123
|
||||
},
|
||||
// Default model is gpt-5-codex in tests → 95% usable context window
|
||||
// Default model is gpt-5.1-codex in tests → 95% usable context window
|
||||
"model_context_window": 258400
|
||||
},
|
||||
"rate_limits": {
|
||||
@@ -1304,8 +1304,9 @@ async fn context_window_error_sets_total_tokens_to_model_window() -> anyhow::Res
|
||||
|
||||
let TestCodex { codex, .. } = test_codex()
|
||||
.with_config(|config| {
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("known gpt-5 model family");
|
||||
config.model = "gpt-5.1".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5.1").expect("known gpt-5.1 model family");
|
||||
config.model_context_window = Some(272_000);
|
||||
})
|
||||
.build(&server)
|
||||
|
||||
@@ -18,7 +18,6 @@ use codex_core::NewConversation;
|
||||
use codex_core::built_in_model_providers;
|
||||
use codex_core::compact::SUMMARIZATION_PROMPT;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::config::OPENAI_DEFAULT_MODEL;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::WarningEvent;
|
||||
@@ -111,9 +110,10 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
|
||||
// 1. Arrange mocked SSE responses for the initial compact/resume/fork flow.
|
||||
let server = MockServer::start().await;
|
||||
mount_initial_flow(&server).await;
|
||||
|
||||
let expected_model = "gpt-5.1-codex";
|
||||
// 2. Start a new conversation and drive it through the compact/resume/fork steps.
|
||||
let (_home, config, manager, base) = start_test_conversation(&server).await;
|
||||
let (_home, config, manager, base) =
|
||||
start_test_conversation(&server, Some(expected_model)).await;
|
||||
|
||||
user_turn(&base, "hello world").await;
|
||||
compact_conversation(&base).await;
|
||||
@@ -189,7 +189,6 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
|
||||
.as_str()
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
let expected_model = OPENAI_DEFAULT_MODEL;
|
||||
let summary_after_compact = extract_summary_message(&requests[2], SUMMARY_TEXT);
|
||||
let summary_after_resume = extract_summary_message(&requests[3], SUMMARY_TEXT);
|
||||
let summary_after_fork = extract_summary_message(&requests[4], SUMMARY_TEXT);
|
||||
@@ -558,7 +557,7 @@ async fn compact_resume_after_second_compaction_preserves_history() {
|
||||
mount_second_compact_flow(&server).await;
|
||||
|
||||
// 2. Drive the conversation through compact -> resume -> fork -> compact -> resume.
|
||||
let (_home, config, manager, base) = start_test_conversation(&server).await;
|
||||
let (_home, config, manager, base) = start_test_conversation(&server, None).await;
|
||||
|
||||
user_turn(&base, "hello world").await;
|
||||
compact_conversation(&base).await;
|
||||
@@ -808,6 +807,7 @@ async fn mount_second_compact_flow(server: &MockServer) {
|
||||
|
||||
async fn start_test_conversation(
|
||||
server: &MockServer,
|
||||
model: Option<&str>,
|
||||
) -> (TempDir, Config, ConversationManager, Arc<CodexConversation>) {
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
@@ -817,7 +817,9 @@ async fn start_test_conversation(
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
config.model_provider = model_provider;
|
||||
config.compact_prompt = Some(SUMMARIZATION_PROMPT.to_string());
|
||||
|
||||
if let Some(model) = model {
|
||||
config.model = model.to_string();
|
||||
}
|
||||
let manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
|
||||
let NewConversation { conversation, .. } = manager
|
||||
.new_conversation(config.clone())
|
||||
|
||||
97
codex-rs/core/tests/suite/execpolicy2.rs
Normal file
97
codex-rs/core/tests/suite/execpolicy2.rs
Normal file
@@ -0,0 +1,97 @@
|
||||
#![allow(clippy::unwrap_used, clippy::expect_used)]
|
||||
|
||||
use anyhow::Result;
|
||||
use codex_core::features::Feature;
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_function_call;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
use core_test_support::responses::mount_sse_once;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use serde_json::json;
|
||||
use std::fs;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn execpolicy2_blocks_shell_invocation() -> Result<()> {
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.features.enable(Feature::ExecPolicyV2);
|
||||
let policy_path = config.codex_home.join("policy.codexpolicy");
|
||||
fs::write(
|
||||
&policy_path,
|
||||
r#"prefix_rule(pattern=["echo"], decision="forbidden")"#,
|
||||
)
|
||||
.expect("write policy file");
|
||||
});
|
||||
let server = start_mock_server().await;
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-forbidden";
|
||||
let args = json!({
|
||||
"command": ["echo", "blocked"],
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let session_model = test.session_configured.model.clone();
|
||||
test.codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "run shell command".into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: test.cwd_path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
let EventMsg::ExecCommandEnd(end) = wait_for_event(&test.codex, |event| {
|
||||
matches!(event, EventMsg::ExecCommandEnd(_))
|
||||
})
|
||||
.await
|
||||
else {
|
||||
unreachable!()
|
||||
};
|
||||
wait_for_event(&test.codex, |event| {
|
||||
matches!(event, EventMsg::TaskComplete(_))
|
||||
})
|
||||
.await;
|
||||
|
||||
assert!(
|
||||
end.aggregated_output
|
||||
.contains("execpolicy forbids this command"),
|
||||
"unexpected output: {}",
|
||||
end.aggregated_output
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -11,7 +11,7 @@ use std::collections::HashSet;
|
||||
use std::path::Path;
|
||||
use std::process::Command as StdCommand;
|
||||
|
||||
const MODEL_WITH_TOOL: &str = "test-gpt-5-codex";
|
||||
const MODEL_WITH_TOOL: &str = "test-gpt-5.1-codex";
|
||||
|
||||
fn ripgrep_available() -> bool {
|
||||
StdCommand::new("rg")
|
||||
|
||||
@@ -31,12 +31,12 @@ const SCHEMA: &str = r#"
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn codex_returns_json_result_for_gpt5() -> anyhow::Result<()> {
|
||||
codex_returns_json_result("gpt-5".to_string()).await
|
||||
codex_returns_json_result("gpt-5.1".to_string()).await
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn codex_returns_json_result_for_gpt5_codex() -> anyhow::Result<()> {
|
||||
codex_returns_json_result("gpt-5-codex".to_string()).await
|
||||
codex_returns_json_result("gpt-5.1-codex".to_string()).await
|
||||
}
|
||||
|
||||
async fn codex_returns_json_result(model: String) -> anyhow::Result<()> {
|
||||
|
||||
@@ -27,6 +27,7 @@ mod compact;
|
||||
mod compact_resume_fork;
|
||||
mod deprecation_notice;
|
||||
mod exec;
|
||||
mod execpolicy2;
|
||||
mod fork_conversation;
|
||||
mod grep_files;
|
||||
mod items;
|
||||
|
||||
@@ -160,7 +160,7 @@ async fn prompt_tools_are_consistent_across_requests() -> anyhow::Result<()> {
|
||||
// with the OpenAI schema, so we just verify the tool presence here
|
||||
let tools_by_model: HashMap<&'static str, Vec<&'static str>> = HashMap::from([
|
||||
(
|
||||
"gpt-5",
|
||||
"gpt-5.1",
|
||||
vec![
|
||||
"shell",
|
||||
"list_mcp_resources",
|
||||
@@ -183,7 +183,7 @@ async fn prompt_tools_are_consistent_across_requests() -> anyhow::Result<()> {
|
||||
],
|
||||
),
|
||||
(
|
||||
"gpt-5-codex",
|
||||
"gpt-5.1-codex",
|
||||
vec![
|
||||
"shell",
|
||||
"list_mcp_resources",
|
||||
|
||||
@@ -364,7 +364,7 @@ async fn review_uses_custom_review_model_from_config() {
|
||||
// Choose a review model different from the main model; ensure it is used.
|
||||
let codex = new_conversation_for_server(&server, &codex_home, |cfg| {
|
||||
cfg.model = "gpt-4.1".to_string();
|
||||
cfg.review_model = "gpt-5".to_string();
|
||||
cfg.review_model = "gpt-5.1".to_string();
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -394,7 +394,7 @@ async fn review_uses_custom_review_model_from_config() {
|
||||
// Assert the request body model equals the configured review model
|
||||
let request = &server.received_requests().await.unwrap()[0];
|
||||
let body = request.body_json::<serde_json::Value>().unwrap();
|
||||
assert_eq!(body["model"].as_str().unwrap(), "gpt-5");
|
||||
assert_eq!(body["model"].as_str().unwrap(), "gpt-5.1");
|
||||
|
||||
server.verify().await;
|
||||
}
|
||||
|
||||
@@ -1,14 +1,13 @@
|
||||
#![cfg(not(target_os = "windows"))]
|
||||
#![allow(clippy::expect_used)]
|
||||
|
||||
use anyhow::Result;
|
||||
use codex_core::features::Feature;
|
||||
use codex_core::model_family::find_family_for_model;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use core_test_support::assert_regex_match;
|
||||
use core_test_support::responses::ev_apply_patch_function_call;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_custom_tool_call;
|
||||
use core_test_support::responses::ev_function_call;
|
||||
use core_test_support::responses::ev_local_shell_call;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
@@ -16,12 +15,18 @@ use core_test_support::responses::mount_sse_sequence;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex::ApplyPatchModelOutput;
|
||||
use core_test_support::test_codex::ShellModelOutput;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use pretty_assertions::assert_eq;
|
||||
use regex_lite::Regex;
|
||||
use serde_json::Value;
|
||||
use serde_json::json;
|
||||
use std::fs;
|
||||
use test_case::test_case;
|
||||
|
||||
use crate::suite::apply_patch_cli::apply_patch_harness;
|
||||
use crate::suite::apply_patch_cli::mount_apply_patch;
|
||||
|
||||
const FIXTURE_JSON: &str = r#"{
|
||||
"description": "This is an example JSON file.",
|
||||
@@ -35,34 +40,88 @@ const FIXTURE_JSON: &str = r#"{
|
||||
}
|
||||
"#;
|
||||
|
||||
fn shell_responses(
|
||||
call_id: &str,
|
||||
command: Vec<&str>,
|
||||
output_type: ShellModelOutput,
|
||||
) -> Result<Vec<String>> {
|
||||
match output_type {
|
||||
ShellModelOutput::ShellCommand => {
|
||||
let command = shlex::try_join(command)?;
|
||||
let parameters = json!({
|
||||
"command": command,
|
||||
"timeout_ms": 2_000,
|
||||
});
|
||||
Ok(vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(
|
||||
call_id,
|
||||
"shell_command",
|
||||
&serde_json::to_string(¶meters)?,
|
||||
),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
])
|
||||
}
|
||||
ShellModelOutput::Shell => {
|
||||
let parameters = json!({
|
||||
"command": command,
|
||||
"timeout_ms": 2_000,
|
||||
});
|
||||
Ok(vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(¶meters)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
])
|
||||
}
|
||||
ShellModelOutput::LocalShell => Ok(vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_local_shell_call(call_id, "completed", command),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
]),
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_output_stays_json_without_freeform_apply_patch() -> Result<()> {
|
||||
#[test_case(ShellModelOutput::Shell)]
|
||||
#[test_case(ShellModelOutput::ShellCommand)]
|
||||
#[test_case(ShellModelOutput::LocalShell)]
|
||||
async fn shell_output_stays_json_without_freeform_apply_patch(
|
||||
output_type: ShellModelOutput,
|
||||
) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.features.disable(Feature::ApplyPatchFreeform);
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a model family");
|
||||
if matches!(output_type, ShellModelOutput::ShellCommand) {
|
||||
config.features.enable(Feature::ShellCommandTool);
|
||||
}
|
||||
let _ = output_type;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-json";
|
||||
let args = json!({
|
||||
"command": ["/bin/echo", "shell json"],
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let responses = shell_responses(call_id, vec!["/bin/echo", "shell json"], output_type)?;
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
@@ -80,7 +139,6 @@ async fn shell_output_stays_json_without_freeform_apply_patch() -> Result<()> {
|
||||
|
||||
let mut parsed: Value = serde_json::from_str(output)?;
|
||||
if let Some(metadata) = parsed.get_mut("metadata").and_then(Value::as_object_mut) {
|
||||
// duration_seconds is non-deterministic; remove it for deep equality
|
||||
let _ = metadata.remove("duration_seconds");
|
||||
}
|
||||
|
||||
@@ -102,31 +160,26 @@ async fn shell_output_stays_json_without_freeform_apply_patch() -> Result<()> {
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_output_is_structured_with_freeform_apply_patch() -> Result<()> {
|
||||
#[test_case(ShellModelOutput::Shell)]
|
||||
#[test_case(ShellModelOutput::ShellCommand)]
|
||||
#[test_case(ShellModelOutput::LocalShell)]
|
||||
async fn shell_output_is_structured_with_freeform_apply_patch(
|
||||
output_type: ShellModelOutput,
|
||||
) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.features.enable(Feature::ApplyPatchFreeform);
|
||||
if matches!(output_type, ShellModelOutput::ShellCommand) {
|
||||
config.features.enable(Feature::ShellCommandTool);
|
||||
}
|
||||
let _ = output_type;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-structured";
|
||||
let args = json!({
|
||||
"command": ["/bin/echo", "freeform shell"],
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let responses = shell_responses(call_id, vec!["/bin/echo", "freeform shell"], output_type)?;
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
@@ -159,14 +212,23 @@ freeform shell
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_output_preserves_fixture_json_without_serialization() -> Result<()> {
|
||||
#[test_case(ShellModelOutput::Shell)]
|
||||
#[test_case(ShellModelOutput::ShellCommand)]
|
||||
#[test_case(ShellModelOutput::LocalShell)]
|
||||
async fn shell_output_preserves_fixture_json_without_serialization(
|
||||
output_type: ShellModelOutput,
|
||||
) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.features.disable(Feature::ApplyPatchFreeform);
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a model family");
|
||||
if matches!(output_type, ShellModelOutput::ShellCommand) {
|
||||
config.features.enable(Feature::ShellCommandTool);
|
||||
}
|
||||
let _ = output_type;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
@@ -175,21 +237,11 @@ async fn shell_output_preserves_fixture_json_without_serialization() -> Result<(
|
||||
let fixture_path_str = fixture_path.to_string_lossy().to_string();
|
||||
|
||||
let call_id = "shell-json-fixture";
|
||||
let args = json!({
|
||||
"command": ["/usr/bin/sed", "-n", "p", fixture_path_str],
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let responses = shell_responses(
|
||||
call_id,
|
||||
vec!["/usr/bin/sed", "-n", "p", fixture_path_str.as_str()],
|
||||
output_type,
|
||||
)?;
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
@@ -232,12 +284,21 @@ async fn shell_output_preserves_fixture_json_without_serialization() -> Result<(
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_output_structures_fixture_with_serialization() -> Result<()> {
|
||||
#[test_case(ShellModelOutput::Shell)]
|
||||
#[test_case(ShellModelOutput::ShellCommand)]
|
||||
#[test_case(ShellModelOutput::LocalShell)]
|
||||
async fn shell_output_structures_fixture_with_serialization(
|
||||
output_type: ShellModelOutput,
|
||||
) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.features.enable(Feature::ApplyPatchFreeform);
|
||||
if matches!(output_type, ShellModelOutput::ShellCommand) {
|
||||
config.features.enable(Feature::ShellCommandTool);
|
||||
}
|
||||
let _ = output_type;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
@@ -246,21 +307,11 @@ async fn shell_output_structures_fixture_with_serialization() -> Result<()> {
|
||||
let fixture_path_str = fixture_path.to_string_lossy().to_string();
|
||||
|
||||
let call_id = "shell-structured-fixture";
|
||||
let args = json!({
|
||||
"command": ["/usr/bin/sed", "-n", "p", fixture_path_str],
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let responses = shell_responses(
|
||||
call_id,
|
||||
vec!["/usr/bin/sed", "-n", "p", fixture_path_str.as_str()],
|
||||
output_type,
|
||||
)?;
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
@@ -298,40 +349,26 @@ async fn shell_output_structures_fixture_with_serialization() -> Result<()> {
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_output_for_freeform_tool_records_duration() -> Result<()> {
|
||||
#[test_case(ShellModelOutput::Shell)]
|
||||
#[test_case(ShellModelOutput::ShellCommand)]
|
||||
#[test_case(ShellModelOutput::LocalShell)]
|
||||
async fn shell_output_for_freeform_tool_records_duration(
|
||||
output_type: ShellModelOutput,
|
||||
) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.include_apply_patch_tool = true;
|
||||
if matches!(output_type, ShellModelOutput::ShellCommand) {
|
||||
config.features.enable(Feature::ShellCommandTool);
|
||||
}
|
||||
let _ = output_type;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
let sleep_cmd = vec!["/bin/bash", "-c", "sleep 1"];
|
||||
|
||||
#[cfg(target_os = "macos")]
|
||||
let sleep_cmd = vec!["/bin/bash", "-c", "sleep 1"];
|
||||
|
||||
#[cfg(windows)]
|
||||
let sleep_cmd = "timeout 1";
|
||||
|
||||
let call_id = "shell-structured";
|
||||
let args = json!({
|
||||
"command": sleep_cmd,
|
||||
"timeout_ms": 2_000,
|
||||
});
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let responses = shell_responses(call_id, vec!["/bin/bash", "-c", "sleep 1"], output_type)?;
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
@@ -371,33 +408,26 @@ $"#;
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_output_reserializes_truncated_content() -> Result<()> {
|
||||
#[test_case(ShellModelOutput::Shell)]
|
||||
#[test_case(ShellModelOutput::ShellCommand)]
|
||||
#[test_case(ShellModelOutput::LocalShell)]
|
||||
async fn shell_output_reserializes_truncated_content(output_type: ShellModelOutput) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5-codex").expect("gpt-5 is a model family");
|
||||
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
|
||||
if matches!(output_type, ShellModelOutput::ShellCommand) {
|
||||
config.features.enable(Feature::ShellCommandTool);
|
||||
}
|
||||
let _ = output_type;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-truncated";
|
||||
let args = json!({
|
||||
"command": ["/bin/sh", "-c", "seq 1 400"],
|
||||
"timeout_ms": 5_000,
|
||||
});
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let responses = shell_responses(call_id, vec!["/bin/sh", "-c", "seq 1 400"], output_type)?;
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
@@ -445,14 +475,16 @@ $"#;
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn apply_patch_custom_tool_output_is_structured() -> Result<()> {
|
||||
#[test_case(ApplyPatchModelOutput::Freeform)]
|
||||
#[test_case(ApplyPatchModelOutput::Function)]
|
||||
#[test_case(ApplyPatchModelOutput::Shell)]
|
||||
#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
|
||||
async fn apply_patch_custom_tool_output_is_structured(
|
||||
output_type: ApplyPatchModelOutput,
|
||||
) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.include_apply_patch_tool = true;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
let harness = apply_patch_harness().await?;
|
||||
|
||||
let call_id = "apply-patch-structured";
|
||||
let file_name = "structured.txt";
|
||||
@@ -463,33 +495,17 @@ async fn apply_patch_custom_tool_output_is_structured() -> Result<()> {
|
||||
*** End Patch
|
||||
"#
|
||||
);
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_custom_tool_call(call_id, "apply_patch", &patch),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
mount_apply_patch(&harness, call_id, &patch, "done", output_type).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
"apply the patch via custom tool",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
harness
|
||||
.test()
|
||||
.submit_turn_with_policy(
|
||||
"apply the patch via custom tool",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let req = mock
|
||||
.last_request()
|
||||
.expect("apply_patch output request recorded");
|
||||
let output_item = req.custom_tool_call_output(call_id);
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("apply_patch output string");
|
||||
let output = harness.apply_patch_output(call_id, output_type).await;
|
||||
|
||||
let expected_pattern = format!(
|
||||
r"(?s)^Exit code: 0
|
||||
@@ -499,53 +515,39 @@ Success. Updated the following files:
|
||||
A {file_name}
|
||||
?$"
|
||||
);
|
||||
assert_regex_match(&expected_pattern, output);
|
||||
assert_regex_match(&expected_pattern, output.as_str());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn apply_patch_custom_tool_call_creates_file() -> Result<()> {
|
||||
#[test_case(ApplyPatchModelOutput::Freeform)]
|
||||
#[test_case(ApplyPatchModelOutput::Function)]
|
||||
#[test_case(ApplyPatchModelOutput::Shell)]
|
||||
#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
|
||||
async fn apply_patch_custom_tool_call_creates_file(
|
||||
output_type: ApplyPatchModelOutput,
|
||||
) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.include_apply_patch_tool = true;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
let harness = apply_patch_harness().await?;
|
||||
|
||||
let call_id = "apply-patch-add-file";
|
||||
let file_name = "custom_tool_apply_patch.txt";
|
||||
let patch = format!(
|
||||
"*** Begin Patch\n*** Add File: {file_name}\n+custom tool content\n*** End Patch\n"
|
||||
);
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_custom_tool_call(call_id, "apply_patch", &patch),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "apply_patch done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
mount_apply_patch(&harness, call_id, &patch, "apply_patch done", output_type).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
"apply the patch via custom tool to create a file",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
harness
|
||||
.test()
|
||||
.submit_turn_with_policy(
|
||||
"apply the patch via custom tool to create a file",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let req = mock
|
||||
.last_request()
|
||||
.expect("apply_patch output request recorded");
|
||||
let output_item = req.custom_tool_call_output(call_id);
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("apply_patch output string");
|
||||
let output = harness.apply_patch_output(call_id, output_type).await;
|
||||
|
||||
let expected_pattern = format!(
|
||||
r"(?s)^Exit code: 0
|
||||
@@ -555,9 +557,9 @@ Success. Updated the following files:
|
||||
A {file_name}
|
||||
?$"
|
||||
);
|
||||
assert_regex_match(&expected_pattern, output);
|
||||
assert_regex_match(&expected_pattern, output.as_str());
|
||||
|
||||
let new_file_path = test.cwd.path().join(file_name);
|
||||
let new_file_path = harness.path(file_name);
|
||||
let created_contents = fs::read_to_string(&new_file_path)?;
|
||||
assert_eq!(
|
||||
created_contents, "custom tool content\n",
|
||||
@@ -568,49 +570,42 @@ A {file_name}
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn apply_patch_custom_tool_call_updates_existing_file() -> Result<()> {
|
||||
#[test_case(ApplyPatchModelOutput::Freeform)]
|
||||
#[test_case(ApplyPatchModelOutput::Function)]
|
||||
#[test_case(ApplyPatchModelOutput::Shell)]
|
||||
#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
|
||||
async fn apply_patch_custom_tool_call_updates_existing_file(
|
||||
output_type: ApplyPatchModelOutput,
|
||||
) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.include_apply_patch_tool = true;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
let harness = apply_patch_harness().await?;
|
||||
|
||||
let call_id = "apply-patch-update-file";
|
||||
let file_name = "custom_tool_apply_patch_existing.txt";
|
||||
let file_path = test.cwd.path().join(file_name);
|
||||
let file_path = harness.path(file_name);
|
||||
fs::write(&file_path, "before\n")?;
|
||||
let patch = format!(
|
||||
"*** Begin Patch\n*** Update File: {file_name}\n@@\n-before\n+after\n*** End Patch\n"
|
||||
);
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_custom_tool_call(call_id, "apply_patch", &patch),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "apply_patch update done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
"apply the patch via custom tool to update a file",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
mount_apply_patch(
|
||||
&harness,
|
||||
call_id,
|
||||
&patch,
|
||||
"apply_patch update done",
|
||||
output_type,
|
||||
)
|
||||
.await?;
|
||||
.await;
|
||||
|
||||
let req = mock
|
||||
.last_request()
|
||||
.expect("apply_patch output request recorded");
|
||||
let output_item = req.custom_tool_call_output(call_id);
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("apply_patch output string");
|
||||
harness
|
||||
.test()
|
||||
.submit_turn_with_policy(
|
||||
"apply the patch via custom tool to update a file",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let output = harness.apply_patch_output(call_id, output_type).await;
|
||||
|
||||
let expected_pattern = format!(
|
||||
r"(?s)^Exit code: 0
|
||||
@@ -620,7 +615,7 @@ Success. Updated the following files:
|
||||
M {file_name}
|
||||
?$"
|
||||
);
|
||||
assert_regex_match(&expected_pattern, output);
|
||||
assert_regex_match(&expected_pattern, output.as_str());
|
||||
|
||||
let updated_contents = fs::read_to_string(file_path)?;
|
||||
assert_eq!(updated_contents, "after\n", "expected updated file content");
|
||||
@@ -629,99 +624,83 @@ M {file_name}
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn apply_patch_custom_tool_call_reports_failure_output() -> Result<()> {
|
||||
#[test_case(ApplyPatchModelOutput::Freeform)]
|
||||
#[test_case(ApplyPatchModelOutput::Function)]
|
||||
#[test_case(ApplyPatchModelOutput::Shell)]
|
||||
#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
|
||||
async fn apply_patch_custom_tool_call_reports_failure_output(
|
||||
output_type: ApplyPatchModelOutput,
|
||||
) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.include_apply_patch_tool = true;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
let harness = apply_patch_harness().await?;
|
||||
|
||||
let call_id = "apply-patch-failure";
|
||||
let missing_file = "missing_custom_tool_apply_patch.txt";
|
||||
let patch = format!(
|
||||
"*** Begin Patch\n*** Update File: {missing_file}\n@@\n-before\n+after\n*** End Patch\n"
|
||||
);
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_custom_tool_call(call_id, "apply_patch", &patch),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "apply_patch failure done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
"attempt a failing apply_patch via custom tool",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
mount_apply_patch(
|
||||
&harness,
|
||||
call_id,
|
||||
&patch,
|
||||
"apply_patch failure done",
|
||||
output_type,
|
||||
)
|
||||
.await?;
|
||||
.await;
|
||||
|
||||
let req = mock
|
||||
.last_request()
|
||||
.expect("apply_patch output request recorded");
|
||||
let output_item = req.custom_tool_call_output(call_id);
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("apply_patch output string");
|
||||
harness
|
||||
.test()
|
||||
.submit_turn_with_policy(
|
||||
"attempt a failing apply_patch via custom tool",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let output = harness.apply_patch_output(call_id, output_type).await;
|
||||
|
||||
let expected_output = format!(
|
||||
"apply_patch verification failed: Failed to read file to update {}/{missing_file}: No such file or directory (os error 2)",
|
||||
test.cwd.path().to_string_lossy()
|
||||
harness.cwd().to_string_lossy()
|
||||
);
|
||||
assert_eq!(output, expected_output);
|
||||
assert_eq!(output, expected_output.as_str());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn apply_patch_function_call_output_is_structured() -> Result<()> {
|
||||
#[test_case(ApplyPatchModelOutput::Freeform)]
|
||||
#[test_case(ApplyPatchModelOutput::Function)]
|
||||
#[test_case(ApplyPatchModelOutput::Shell)]
|
||||
#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
|
||||
async fn apply_patch_function_call_output_is_structured(
|
||||
output_type: ApplyPatchModelOutput,
|
||||
) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.include_apply_patch_tool = true;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
let harness = apply_patch_harness().await?;
|
||||
|
||||
let call_id = "apply-patch-function";
|
||||
let file_name = "function_apply_patch.txt";
|
||||
let patch =
|
||||
format!("*** Begin Patch\n*** Add File: {file_name}\n+via function call\n*** End Patch\n");
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_apply_patch_function_call(call_id, &patch),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "apply_patch function done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
"apply the patch via function-call apply_patch",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
mount_apply_patch(
|
||||
&harness,
|
||||
call_id,
|
||||
&patch,
|
||||
"apply_patch function done",
|
||||
output_type,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let req = mock
|
||||
.last_request()
|
||||
.expect("apply_patch function output request recorded");
|
||||
let output_item = req.function_call_output(call_id);
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("apply_patch output string");
|
||||
.await;
|
||||
harness
|
||||
.test()
|
||||
.submit_turn_with_policy(
|
||||
"apply the patch via function-call apply_patch",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let output = harness.apply_patch_output(call_id, output_type).await;
|
||||
let expected_pattern = format!(
|
||||
r"(?s)^Exit code: 0
|
||||
Wall time: [0-9]+(?:\.[0-9]+)? seconds
|
||||
@@ -730,40 +709,32 @@ Success. Updated the following files:
|
||||
A {file_name}
|
||||
?$"
|
||||
);
|
||||
assert_regex_match(&expected_pattern, output);
|
||||
assert_regex_match(&expected_pattern, output.as_str());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_output_is_structured_for_nonzero_exit() -> Result<()> {
|
||||
#[test_case(ShellModelOutput::Shell)]
|
||||
#[test_case(ShellModelOutput::ShellCommand)]
|
||||
#[test_case(ShellModelOutput::LocalShell)]
|
||||
async fn shell_output_is_structured_for_nonzero_exit(output_type: ShellModelOutput) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
|
||||
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
|
||||
config.include_apply_patch_tool = true;
|
||||
if matches!(output_type, ShellModelOutput::ShellCommand) {
|
||||
config.features.enable(Feature::ShellCommandTool);
|
||||
}
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-nonzero-exit";
|
||||
let args = json!({
|
||||
"command": ["/bin/sh", "-c", "exit 42"],
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "shell failure handled"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let responses = shell_responses(call_id, vec!["/bin/sh", "-c", "exit 42"], output_type)?;
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
@@ -793,7 +764,7 @@ async fn shell_command_output_is_structured() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.features.enable(Feature::ShellCommandTool);
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
@@ -847,9 +818,9 @@ async fn local_shell_call_output_is_structured() -> Result<()> {
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
|
||||
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
|
||||
config.include_apply_patch_tool = true;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
@@ -57,9 +57,9 @@ async fn run_turn_and_measure(test: &TestCodex, prompt: &str) -> anyhow::Result<
|
||||
#[allow(clippy::expect_used)]
|
||||
async fn build_codex_with_test_tool(server: &wiremock::MockServer) -> anyhow::Result<TestCodex> {
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "test-gpt-5-codex".to_string();
|
||||
config.model = "test-gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("test-gpt-5-codex").expect("test-gpt-5-codex model family");
|
||||
find_family_for_model("test-gpt-5.1-codex").expect("test-gpt-5.1-codex model family");
|
||||
});
|
||||
builder.build(server).await
|
||||
}
|
||||
|
||||
@@ -197,9 +197,9 @@ async fn sandbox_denied_shell_returns_original_output() -> Result<()> {
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5-codex").expect("gpt-5-codex model family");
|
||||
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex model family");
|
||||
});
|
||||
let fixture = builder.build(&server).await?;
|
||||
|
||||
@@ -425,8 +425,8 @@ async fn shell_timeout_handles_background_grandchild_stdout() -> Result<()> {
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a valid model");
|
||||
config.model = "gpt-5.1".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is a valid model");
|
||||
config.sandbox_policy = SandboxPolicy::DangerFullAccess;
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
@@ -41,9 +41,9 @@ async fn truncate_function_error_trims_respond_to_model() -> Result<()> {
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
// Use the test model that wires function tools like grep_files
|
||||
config.model = "test-gpt-5-codex".to_string();
|
||||
config.model = "test-gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("test-gpt-5-codex").expect("model family for test model");
|
||||
find_family_for_model("test-gpt-5.1-codex").expect("model family for test model");
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
@@ -105,9 +105,9 @@ async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
|
||||
|
||||
// Use a model that exposes the generic shell tool.
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
|
||||
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
|
||||
});
|
||||
let fixture = builder.build(&server).await?;
|
||||
|
||||
@@ -197,9 +197,9 @@ async fn tool_call_output_truncated_only_once() -> Result<()> {
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
|
||||
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
|
||||
});
|
||||
let fixture = builder.build(&server).await?;
|
||||
let call_id = "shell-single-truncation";
|
||||
|
||||
@@ -30,8 +30,8 @@ use pretty_assertions::assert_eq;
|
||||
async fn undo_harness() -> Result<TestCodexHarness> {
|
||||
TestCodexHarness::with_config(|config: &mut Config| {
|
||||
config.include_apply_patch_tool = true;
|
||||
config.model = "gpt-5".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is valid");
|
||||
config.model = "gpt-5.1".to_string();
|
||||
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is valid");
|
||||
config.features.enable(Feature::GhostCommit);
|
||||
})
|
||||
.await
|
||||
|
||||
@@ -159,7 +159,7 @@ async fn unified_exec_emits_exec_command_begin_event() -> Result<()> {
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
let mut builder = test_codex().with_model("gpt-5").with_config(|config| {
|
||||
config.use_experimental_unified_exec_tool = true;
|
||||
config.features.enable(Feature::UnifiedExec);
|
||||
});
|
||||
@@ -236,7 +236,7 @@ async fn unified_exec_respects_workdir_override() -> Result<()> {
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
let mut builder = test_codex().with_model("gpt-5").with_config(|config| {
|
||||
config.use_experimental_unified_exec_tool = true;
|
||||
config.features.enable(Feature::UnifiedExec);
|
||||
});
|
||||
@@ -288,28 +288,22 @@ async fn unified_exec_respects_workdir_override() -> Result<()> {
|
||||
})
|
||||
.await?;
|
||||
|
||||
let begin_event = wait_for_event_match(&codex, |msg| match msg {
|
||||
EventMsg::ExecCommandBegin(event) if event.call_id == call_id => Some(event.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(
|
||||
begin_event.cwd, workdir,
|
||||
"exec_command cwd should reflect the requested workdir override"
|
||||
);
|
||||
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let requests = server.received_requests().await.expect("recorded requests");
|
||||
assert!(!requests.is_empty(), "expected at least one POST request");
|
||||
|
||||
let bodies = requests
|
||||
.iter()
|
||||
.map(|req| req.body_json::<Value>().expect("request json"))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let outputs = collect_tool_outputs(&bodies)?;
|
||||
let output = outputs
|
||||
.get(call_id)
|
||||
.expect("missing exec_command workdir output");
|
||||
let output_text = output.output.trim();
|
||||
let output_canonical = std::fs::canonicalize(output_text)?;
|
||||
let expected_canonical = std::fs::canonicalize(&workdir)?;
|
||||
assert_eq!(
|
||||
output_canonical, expected_canonical,
|
||||
"pwd should reflect the requested workdir override"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -270,9 +270,9 @@ async fn user_shell_command_is_truncated_only_once() -> anyhow::Result<()> {
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
|
||||
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
|
||||
});
|
||||
let fixture = builder.build(&server).await?;
|
||||
|
||||
|
||||
@@ -55,7 +55,7 @@ Start a new session with optional overrides:
|
||||
|
||||
Request `newConversation` params (subset):
|
||||
|
||||
- `model`: string model id (e.g. "o3", "gpt-5", "gpt-5-codex")
|
||||
- `model`: string model id (e.g. "o3", "gpt-5.1", "gpt-5.1-codex")
|
||||
- `profile`: optional named profile
|
||||
- `cwd`: optional working directory
|
||||
- `approvalPolicy`: `untrusted` | `on-request` | `on-failure` | `never`
|
||||
@@ -119,13 +119,13 @@ For the complete request/response shapes and flow examples, see the [“Auth end
|
||||
## Example: start and send a message
|
||||
|
||||
```json
|
||||
{ "jsonrpc": "2.0", "id": 1, "method": "newConversation", "params": { "model": "gpt-5", "approvalPolicy": "on-request" } }
|
||||
{ "jsonrpc": "2.0", "id": 1, "method": "newConversation", "params": { "model": "gpt-5.1", "approvalPolicy": "on-request" } }
|
||||
```
|
||||
|
||||
Server responds:
|
||||
|
||||
```json
|
||||
{ "jsonrpc": "2.0", "id": 1, "result": { "conversationId": "c7b0…", "model": "gpt-5", "rolloutPath": "/path/to/rollout.jsonl" } }
|
||||
{ "jsonrpc": "2.0", "id": 1, "result": { "conversationId": "c7b0…", "model": "gpt-5.1", "rolloutPath": "/path/to/rollout.jsonl" } }
|
||||
```
|
||||
|
||||
Then send input:
|
||||
|
||||
@@ -24,7 +24,6 @@ codex-common = { workspace = true, features = [
|
||||
"sandbox_summary",
|
||||
] }
|
||||
codex-core = { workspace = true }
|
||||
codex-ollama = { workspace = true }
|
||||
codex-protocol = { workspace = true }
|
||||
mcp-types = { workspace = true }
|
||||
opentelemetry-appender-tracing = { workspace = true }
|
||||
|
||||
@@ -18,9 +18,15 @@ pub struct Cli {
|
||||
#[arg(long, short = 'm')]
|
||||
pub model: Option<String>,
|
||||
|
||||
/// Use open-source provider.
|
||||
#[arg(long = "oss", default_value_t = false)]
|
||||
pub oss: bool,
|
||||
|
||||
/// Specify which local provider to use (lmstudio or ollama).
|
||||
/// If not specified with --oss, will use config default or show selection.
|
||||
#[arg(long = "local-provider")]
|
||||
pub oss_provider: Option<String>,
|
||||
|
||||
/// Select the sandbox policy to use when executing model-generated shell
|
||||
/// commands.
|
||||
#[arg(long = "sandbox", short = 's', value_enum)]
|
||||
|
||||
@@ -182,6 +182,42 @@ impl EventProcessor for EventProcessorWithHumanOutput {
|
||||
ts_msg!(self, " {}", details.style(self.dimmed));
|
||||
}
|
||||
}
|
||||
EventMsg::McpStartupUpdate(update) => {
|
||||
let status_text = match update.status {
|
||||
codex_core::protocol::McpStartupStatus::Starting => "starting".to_string(),
|
||||
codex_core::protocol::McpStartupStatus::Ready => "ready".to_string(),
|
||||
codex_core::protocol::McpStartupStatus::Cancelled => "cancelled".to_string(),
|
||||
codex_core::protocol::McpStartupStatus::Failed { ref error } => {
|
||||
format!("failed: {error}")
|
||||
}
|
||||
};
|
||||
ts_msg!(
|
||||
self,
|
||||
"{} {} {}",
|
||||
"mcp:".style(self.cyan),
|
||||
update.server,
|
||||
status_text
|
||||
);
|
||||
}
|
||||
EventMsg::McpStartupComplete(summary) => {
|
||||
let mut parts = Vec::new();
|
||||
if !summary.ready.is_empty() {
|
||||
parts.push(format!("ready: {}", summary.ready.join(", ")));
|
||||
}
|
||||
if !summary.failed.is_empty() {
|
||||
let servers: Vec<_> = summary.failed.iter().map(|f| f.server.clone()).collect();
|
||||
parts.push(format!("failed: {}", servers.join(", ")));
|
||||
}
|
||||
if !summary.cancelled.is_empty() {
|
||||
parts.push(format!("cancelled: {}", summary.cancelled.join(", ")));
|
||||
}
|
||||
let joined = if parts.is_empty() {
|
||||
"no servers".to_string()
|
||||
} else {
|
||||
parts.join("; ")
|
||||
};
|
||||
ts_msg!(self, "{} {}", "mcp startup:".style(self.cyan), joined);
|
||||
}
|
||||
EventMsg::BackgroundEvent(BackgroundEventEvent { message }) => {
|
||||
ts_msg!(self, "{}", message.style(self.dimmed));
|
||||
}
|
||||
|
||||
@@ -10,26 +10,26 @@ mod event_processor_with_human_output;
|
||||
pub mod event_processor_with_jsonl_output;
|
||||
pub mod exec_events;
|
||||
|
||||
use crate::cli::Command as ExecCommand;
|
||||
use crate::event_processor::CodexStatus;
|
||||
use crate::event_processor::EventProcessor;
|
||||
pub use cli::Cli;
|
||||
use codex_common::oss::ensure_oss_provider_ready;
|
||||
use codex_common::oss::get_default_model_for_oss_provider;
|
||||
use codex_core::AuthManager;
|
||||
use codex_core::BUILT_IN_OSS_MODEL_PROVIDER_ID;
|
||||
use codex_core::ConversationManager;
|
||||
use codex_core::LMSTUDIO_OSS_PROVIDER_ID;
|
||||
use codex_core::NewConversation;
|
||||
use codex_core::OLLAMA_OSS_PROVIDER_ID;
|
||||
use codex_core::auth::enforce_login_restrictions;
|
||||
use codex_core::client::http::set_default_originator;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::config::ConfigOverrides;
|
||||
use codex_core::find_conversation_path_by_id_str;
|
||||
use codex_core::config::find_codex_home;
|
||||
use codex_core::config::load_config_as_toml_with_cli_overrides;
|
||||
use codex_core::config::resolve_oss_provider;
|
||||
use codex_core::git_info::get_git_repo_root;
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::Event;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SessionSource;
|
||||
use codex_ollama::DEFAULT_OSS_MODEL;
|
||||
use codex_protocol::config_types::SandboxMode;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use event_processor_with_human_output::EventProcessorWithHumanOutput;
|
||||
@@ -46,6 +46,12 @@ use tracing::info;
|
||||
use tracing_subscriber::EnvFilter;
|
||||
use tracing_subscriber::prelude::*;
|
||||
|
||||
use crate::cli::Command as ExecCommand;
|
||||
use crate::event_processor::CodexStatus;
|
||||
use crate::event_processor::EventProcessor;
|
||||
use codex_core::default_client::set_default_originator;
|
||||
use codex_core::find_conversation_path_by_id_str;
|
||||
|
||||
pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> anyhow::Result<()> {
|
||||
if let Err(err) = set_default_originator("codex_exec".to_string()) {
|
||||
tracing::warn!(?err, "Failed to set codex exec originator override {err:?}");
|
||||
@@ -56,6 +62,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
|
||||
images,
|
||||
model: model_cli_arg,
|
||||
oss,
|
||||
oss_provider,
|
||||
config_profile,
|
||||
full_auto,
|
||||
dangerously_bypass_approvals_and_sandbox,
|
||||
@@ -145,21 +152,64 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
|
||||
sandbox_mode_cli_arg.map(Into::<SandboxMode>::into)
|
||||
};
|
||||
|
||||
// When using `--oss`, let the bootstrapper pick the model (defaulting to
|
||||
// gpt-oss:20b) and ensure it is present locally. Also, force the built‑in
|
||||
// `oss` model provider.
|
||||
let model = if let Some(model) = model_cli_arg {
|
||||
Some(model)
|
||||
} else if oss {
|
||||
Some(DEFAULT_OSS_MODEL.to_owned())
|
||||
} else {
|
||||
None // No model specified, will use the default.
|
||||
// Parse `-c` overrides from the CLI.
|
||||
let cli_kv_overrides = match config_overrides.parse_overrides() {
|
||||
Ok(v) => v,
|
||||
#[allow(clippy::print_stderr)]
|
||||
Err(e) => {
|
||||
eprintln!("Error parsing -c overrides: {e}");
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
// we load config.toml here to determine project state.
|
||||
#[allow(clippy::print_stderr)]
|
||||
let config_toml = {
|
||||
let codex_home = match find_codex_home() {
|
||||
Ok(codex_home) => codex_home,
|
||||
Err(err) => {
|
||||
eprintln!("Error finding codex home: {err}");
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
match load_config_as_toml_with_cli_overrides(&codex_home, cli_kv_overrides.clone()).await {
|
||||
Ok(config_toml) => config_toml,
|
||||
Err(err) => {
|
||||
eprintln!("Error loading config.toml: {err}");
|
||||
std::process::exit(1);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let model_provider = if oss {
|
||||
Some(BUILT_IN_OSS_MODEL_PROVIDER_ID.to_string())
|
||||
let resolved = resolve_oss_provider(
|
||||
oss_provider.as_deref(),
|
||||
&config_toml,
|
||||
config_profile.clone(),
|
||||
);
|
||||
|
||||
if let Some(provider) = resolved {
|
||||
Some(provider)
|
||||
} else {
|
||||
return Err(anyhow::anyhow!(
|
||||
"No default OSS provider configured. Use --local-provider=provider or set oss_provider to either {LMSTUDIO_OSS_PROVIDER_ID} or {OLLAMA_OSS_PROVIDER_ID} in config.toml"
|
||||
));
|
||||
}
|
||||
} else {
|
||||
None // No specific model provider override.
|
||||
None // No OSS mode enabled
|
||||
};
|
||||
|
||||
// When using `--oss`, let the bootstrapper pick the model based on selected provider
|
||||
let model = if let Some(model) = model_cli_arg {
|
||||
Some(model)
|
||||
} else if oss {
|
||||
model_provider
|
||||
.as_ref()
|
||||
.and_then(|provider_id| get_default_model_for_oss_provider(provider_id))
|
||||
.map(std::borrow::ToOwned::to_owned)
|
||||
} else {
|
||||
None // No model specified, will use the default.
|
||||
};
|
||||
|
||||
// Load configuration and determine approval policy
|
||||
@@ -171,7 +221,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
|
||||
approval_policy: Some(AskForApproval::Never),
|
||||
sandbox_mode,
|
||||
cwd: cwd.map(|p| p.canonicalize().unwrap_or(p)),
|
||||
model_provider,
|
||||
model_provider: model_provider.clone(),
|
||||
codex_linux_sandbox_exe,
|
||||
base_instructions: None,
|
||||
developer_instructions: None,
|
||||
@@ -182,14 +232,6 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
|
||||
experimental_sandbox_command_assessment: None,
|
||||
additional_writable_roots: add_dir,
|
||||
};
|
||||
// Parse `-c` overrides.
|
||||
let cli_kv_overrides = match config_overrides.parse_overrides() {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
eprintln!("Error parsing -c overrides: {e}");
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
let config = Config::load_with_cli_overrides(cli_kv_overrides, overrides).await?;
|
||||
|
||||
@@ -232,7 +274,18 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
|
||||
};
|
||||
|
||||
if oss {
|
||||
codex_ollama::ensure_oss_ready(&config)
|
||||
// We're in the oss section, so provider_id should be Some
|
||||
// Let's handle None case gracefully though just in case
|
||||
let provider_id = match model_provider.as_ref() {
|
||||
Some(id) => id,
|
||||
None => {
|
||||
error!("OSS provider unexpectedly not set when oss flag is used");
|
||||
return Err(anyhow::anyhow!(
|
||||
"OSS provider not set but oss flag was used"
|
||||
));
|
||||
}
|
||||
};
|
||||
ensure_oss_provider_ready(provider_id, &config)
|
||||
.await
|
||||
.map_err(|e| anyhow::anyhow!("OSS setup failed: {e}"))?;
|
||||
}
|
||||
|
||||
@@ -618,15 +618,19 @@ fn error_followed_by_task_complete_produces_turn_failed() {
|
||||
#[test]
|
||||
fn exec_command_end_success_produces_completed_command_item() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
let command = vec!["bash".to_string(), "-lc".to_string(), "echo hi".to_string()];
|
||||
let cwd = std::env::current_dir().unwrap();
|
||||
let parsed_cmd = Vec::new();
|
||||
|
||||
// Begin -> no output
|
||||
let begin = event(
|
||||
"c1",
|
||||
EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
|
||||
call_id: "1".to_string(),
|
||||
command: vec!["bash".to_string(), "-lc".to_string(), "echo hi".to_string()],
|
||||
cwd: std::env::current_dir().unwrap(),
|
||||
parsed_cmd: Vec::new(),
|
||||
turn_id: "turn-1".to_string(),
|
||||
command: command.clone(),
|
||||
cwd: cwd.clone(),
|
||||
parsed_cmd: parsed_cmd.clone(),
|
||||
source: ExecCommandSource::Agent,
|
||||
interaction_input: None,
|
||||
}),
|
||||
@@ -652,6 +656,12 @@ fn exec_command_end_success_produces_completed_command_item() {
|
||||
"c2",
|
||||
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
|
||||
call_id: "1".to_string(),
|
||||
turn_id: "turn-1".to_string(),
|
||||
command,
|
||||
cwd,
|
||||
parsed_cmd,
|
||||
source: ExecCommandSource::Agent,
|
||||
interaction_input: None,
|
||||
stdout: String::new(),
|
||||
stderr: String::new(),
|
||||
aggregated_output: "hi\n".to_string(),
|
||||
@@ -680,15 +690,19 @@ fn exec_command_end_success_produces_completed_command_item() {
|
||||
#[test]
|
||||
fn exec_command_end_failure_produces_failed_command_item() {
|
||||
let mut ep = EventProcessorWithJsonOutput::new(None);
|
||||
let command = vec!["sh".to_string(), "-c".to_string(), "exit 1".to_string()];
|
||||
let cwd = std::env::current_dir().unwrap();
|
||||
let parsed_cmd = Vec::new();
|
||||
|
||||
// Begin -> no output
|
||||
let begin = event(
|
||||
"c1",
|
||||
EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
|
||||
call_id: "2".to_string(),
|
||||
command: vec!["sh".to_string(), "-c".to_string(), "exit 1".to_string()],
|
||||
cwd: std::env::current_dir().unwrap(),
|
||||
parsed_cmd: Vec::new(),
|
||||
turn_id: "turn-1".to_string(),
|
||||
command: command.clone(),
|
||||
cwd: cwd.clone(),
|
||||
parsed_cmd: parsed_cmd.clone(),
|
||||
source: ExecCommandSource::Agent,
|
||||
interaction_input: None,
|
||||
}),
|
||||
@@ -713,6 +727,12 @@ fn exec_command_end_failure_produces_failed_command_item() {
|
||||
"c2",
|
||||
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
|
||||
call_id: "2".to_string(),
|
||||
turn_id: "turn-1".to_string(),
|
||||
command,
|
||||
cwd,
|
||||
parsed_cmd,
|
||||
source: ExecCommandSource::Agent,
|
||||
interaction_input: None,
|
||||
stdout: String::new(),
|
||||
stderr: String::new(),
|
||||
aggregated_output: String::new(),
|
||||
@@ -747,6 +767,12 @@ fn exec_command_end_without_begin_is_ignored() {
|
||||
"c1",
|
||||
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
|
||||
call_id: "no-begin".to_string(),
|
||||
turn_id: "turn-1".to_string(),
|
||||
command: Vec::new(),
|
||||
cwd: PathBuf::from("."),
|
||||
parsed_cmd: Vec::new(),
|
||||
source: ExecCommandSource::Agent,
|
||||
interaction_input: None,
|
||||
stdout: String::new(),
|
||||
stderr: String::new(),
|
||||
aggregated_output: String::new(),
|
||||
|
||||
@@ -37,7 +37,7 @@ async fn exec_includes_output_schema_in_request() -> anyhow::Result<()> {
|
||||
.arg("--output-schema")
|
||||
.arg(&schema_path)
|
||||
.arg("-m")
|
||||
.arg("gpt-5")
|
||||
.arg("gpt-5.1")
|
||||
.arg("tell me a joke")
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
@@ -196,7 +196,7 @@ fn exec_resume_preserves_cli_configuration_overrides() -> anyhow::Result<()> {
|
||||
.arg("--sandbox")
|
||||
.arg("workspace-write")
|
||||
.arg("--model")
|
||||
.arg("gpt-5")
|
||||
.arg("gpt-5.1")
|
||||
.arg("-C")
|
||||
.arg(env!("CARGO_MANIFEST_DIR"))
|
||||
.arg(&prompt)
|
||||
@@ -218,7 +218,7 @@ fn exec_resume_preserves_cli_configuration_overrides() -> anyhow::Result<()> {
|
||||
.arg("--sandbox")
|
||||
.arg("workspace-write")
|
||||
.arg("--model")
|
||||
.arg("gpt-5-high")
|
||||
.arg("gpt-5.1-high")
|
||||
.arg("-C")
|
||||
.arg(env!("CARGO_MANIFEST_DIR"))
|
||||
.arg(&prompt2)
|
||||
@@ -231,7 +231,7 @@ fn exec_resume_preserves_cli_configuration_overrides() -> anyhow::Result<()> {
|
||||
|
||||
let stderr = String::from_utf8(output.stderr)?;
|
||||
assert!(
|
||||
stderr.contains("model: gpt-5-high"),
|
||||
stderr.contains("model: gpt-5.1-high"),
|
||||
"stderr missing model override: {stderr}"
|
||||
);
|
||||
if cfg!(target_os = "windows") {
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user