Compare commits

..

20 Commits

Author SHA1 Message Date
kevin zhao
e35cbb7acf execpolicy2 core integration 2025-11-17 22:12:38 -08:00
kevin zhao
2c87ca8c68 precompute approval_requirement 2025-11-17 22:12:37 -08:00
kevin zhao
3aa9c0886f execpolicy2 core integration
fix PR

undo keyring store
2025-11-17 22:12:37 -08:00
Dylan Hurd
2b7378ac77 chore(core) Add shell_serialization coverage (#6810)
## Summary
Similar to #6545, this PR updates the shell_serialization test suite to
cover the various `shell` tool invocations we have. Note that this does
not cover unified_exec, which has its own suite of tests. This should
provide some test coverage for when we eventually consolidate
serialization logic.

## Testing
- [x] These are tests
2025-11-17 19:10:56 -08:00
Ahmed Ibrahim
ddcc60a085 Update defaults to gpt-5.1 (#6652)
## Summary
- update documentation, example configs, and automation defaults to
reference gpt-5.1 / gpt-5.1-codex
- bump the CLI and core configuration defaults, model presets, and error
messaging to the new models while keeping the model-family/tool coverage
for legacy slugs
- refresh tests, fixtures, and TUI snapshots so they expect the upgraded
defaults

## Testing
- `cargo test -p codex-core
config::tests::test_precedence_fixture_with_gpt5_profile`


------
[Codex
Task](https://chatgpt.com/codex/tasks/task_i_6916c5b3c2b08321ace04ee38604fc6b)
2025-11-17 17:40:11 -08:00
cassirer-openai
8465f1f2f4 Demote function call payload log to debug to avoid noisy error-level stderr (#6808) 2025-11-18 01:16:11 +00:00
zhao-oai
7ab45487dd execpolicy2 extension (#6627)
- enabling execpolicy2 parser to parse multiple policy files to build a
combined `Policy` (useful if codex detects many `.codexpolicy` files)
- adding functionality to `Policy` to allow evaluation of multiple cmds
at once (useful when we have chained commands)
2025-11-17 16:44:41 -08:00
Owen Lin
cecbd5b021 [app-server] feat: add v2 command execution approval flow (#6758)
This PR adds the API V2 version of the command‑execution approval flow
for the shell tool.

This PR wires the new RPC (`item/commandExecution/requestApproval`, V2
only) and related events (`item/started`, `item/completed`, and
`item/commandExecution/delta`, which are emitted in both V1 and V2)
through the app-server
protocol. The new approval RPC is only sent when the user initiates a
turn with the new `turn/start` API so we don't break backwards
compatibility with VSCE.

The approach I took was to make as few changes to the Codex core as
possible, leveraging existing `EventMsg` core events, and translating
those in app-server. I did have to add additional fields to
`EventMsg::ExecCommandEndEvent` to capture the command's input so that
app-server can statelessly transform these events to a
`ThreadItem::CommandExecution` item for the `item/completed` event.

Once we stabilize the API and it's complete enough for our partners, we
can work on migrating the core to be aware of command execution items as
a first-class concept.

**Note**: We'll need followup work to make sure these APIs work for the
unified exec tool, but will wait til that's stable and landed before
doing a pass on app-server.

Example payloads below:
```
{
  "method": "item/started",
  "params": {
    "item": {
      "aggregatedOutput": null,
      "command": "/bin/zsh -lc 'touch /tmp/should-trigger-approval'",
      "cwd": "/Users/owen/repos/codex/codex-rs",
      "durationMs": null,
      "exitCode": null,
      "id": "call_lNWWsbXl1e47qNaYjFRs0dyU",
      "parsedCmd": [
        {
          "cmd": "touch /tmp/should-trigger-approval",
          "type": "unknown"
        }
      ],
      "status": "inProgress",
      "type": "commandExecution"
    }
  }
}
```

```
{
  "id": 0,
  "method": "item/commandExecution/requestApproval",
  "params": {
    "itemId": "call_lNWWsbXl1e47qNaYjFRs0dyU",
    "parsedCmd": [
      {
        "cmd": "touch /tmp/should-trigger-approval",
        "type": "unknown"
      }
    ],
    "reason": "Need to create file in /tmp which is outside workspace sandbox",
    "risk": null,
    "threadId": "019a93e8-0a52-7fe3-9808-b6bc40c0989a",
    "turnId": "1"
  }
}
```

```
{
  "id": 0,
  "result": {
    "acceptSettings": {
      "forSession": false
    },
    "decision": "accept"
  }
}
```

```
{
  "params": {
    "item": {
      "aggregatedOutput": null,
      "command": "/bin/zsh -lc 'touch /tmp/should-trigger-approval'",
      "cwd": "/Users/owen/repos/codex/codex-rs",
      "durationMs": 224,
      "exitCode": 0,
      "id": "call_lNWWsbXl1e47qNaYjFRs0dyU",
      "parsedCmd": [
        {
          "cmd": "touch /tmp/should-trigger-approval",
          "type": "unknown"
        }
      ],
      "status": "completed",
      "type": "commandExecution"
    }
  }
}
```
2025-11-18 00:23:54 +00:00
zhao-oai
4000e26303 background rate limits fetch (#6789)
fetching rate limits every minute asynchronously
2025-11-17 16:06:26 -08:00
iceweasel-oai
e032d338f2 move cap_sid file into ~/.codex so the sandbox cannot overwrite it (#6798)
The `cap_sid` file contains the IDs of the two custom SIDs that the
Windows sandbox creates/manages to implement read-only and
workspace-write sandbox policies.

It previously lived in `<cwd>/.codex` which means that the sandbox could
write to it, which could degrade the efficacy of the sandbox. This
change moves it to `~/.codex/` (or wherever `CODEX_HOME` points to) so
that it is outside the workspace.
2025-11-17 15:49:41 -08:00
Eric Traut
8bebe86a47 Fix TUI issues with Alt-Gr on Windows (#6799)
This PR fixes keyboard handling for the Right Alt (aka "Alt-Gr") key on
Windows. This key appears on keyboards in Central and Eastern Europe.
Codex has effectively never worked for Windows users in these regions
because the code didn't properly handle this key, which is used for
typing common symbols like `\` and `@`.

A few days ago, I merged a [community-authored
PR](https://github.com/openai/codex/pull/6720) that supplied a partial
fix for this issue. Upon closer inspect, that PR was 1) too broad (not
scoped to Windows only) and 2) incomplete (didn't fix all relevant code
paths, so paste was still broken).

This improvement is based on another [community-provided
PR](https://github.com/openai/codex/pull/3241) by @marektomas-cz. He
submitted it back in September and later closed it because it didn't
receive any attention.

This fix addresses the following bugs: #5922, #3046, #3092, #3519,
#5684, #5843.
2025-11-17 15:18:16 -08:00
Jeremy Rose
ab2e7499f8 core: add a feature to disable the shell tool (#6481)
`--disable shell_tool` disables the built-in shell tool. This is useful
for MCP-only operation.

---------

Co-authored-by: Michael Bolin <mbolin@openai.com>
2025-11-17 22:56:19 +00:00
Dylan Hurd
daf77b8452 chore(core) Update shell instructions (#6679)
## Summary
Consolidates `shell` and `shell_command` tool instructions.
## Testing 
- [x] Updated tests, tested locally
2025-11-17 13:05:15 -08:00
Owen Lin
03a6e853c0 fix: annotate all app server v2 types with camelCase (#6791) 2025-11-17 12:02:52 -08:00
rugvedS07
837bc98a1d LM Studio OSS Support (#2312)
## Overview

Adds LM Studio OSS support. Closes #1883


### Changes
This PR enhances the behavior of `--oss` flag to support LM Studio as a
provider. Additionally, it introduces a new flag`--local-provider` which
can take in `lmstudio` or `ollama` as values if the user wants to
explicitly choose which one to use.

If no provider is specified `codex --oss` will auto-select the provider
based on whichever is running.

#### Additional enhancements 
The default can be set using `oss-provider` in config like:

```
oss_provider = "lmstudio"
```

For non-interactive users, they will need to either provide the provider
as an arg or have it in their `config.toml`

### Notes
For best performance, [set the default context
length](https://lmstudio.ai/docs/app/advanced/per-model) for gpt-oss to
the maximum your machine can support

---------

Co-authored-by: Matt Clayton <matt@lmstudio.ai>
Co-authored-by: Eric Traut <etraut@openai.com>
2025-11-17 11:49:09 -08:00
Celia Chen
842a1b7fe7 [app-server] add events to readme (#6690)
add table of contents, lifecycle and events to readme.
2025-11-17 19:28:05 +00:00
Jeremy Rose
03ffe4d595 core/tui: non-blocking MCP startup (#6334)
This makes MCP startup not block TUI startup. Messages sent while MCPs
are booting will be queued.


https://github.com/user-attachments/assets/96e1d234-5d8f-4932-a935-a675d35c05e0


Fixes #6317

---------

Co-authored-by: pakrym-oai <pakrym@openai.com>
2025-11-17 11:26:11 -08:00
Owen Lin
ae2a084fae chore: delete chatwidget::tests::binary_size_transcript_snapshot tui test (#6759)
We're running into quite a bit of drag maintaining this test, since
every time we add fields to an EventMsg that happened to be dumped into
the `binary-size-log.jsonl` fixture, this test starts to fail. The fix
is usually to either manually update the `binary-size-log.jsonl` fixture
file, or update the `upgrade_event_payload_for_tests` function to map
the data in that file into something workable.

Eason says it's fine to delete this test, so let's just delete it
2025-11-17 11:11:41 -08:00
zhao-oai
a941ae7632 feat: execpolicy v2 (#6467)
## Summary
- Introduces the `codex-execpolicy2` crate.
- This PR covers only the prefix-rule subset of the planned execpolicy
v2 language; a richer language will follow.

## Policy
- Policy language centers on `prefix_rule(pattern=[...], decision?,
match?, not_match?)`, where `pattern` is an ordered list of tokens; any
element may be a list to denote alternatives. `decision` defaults to
`allow`; valid values are `allow`, `prompt`, and `forbidden`. `match` /
`not_match` hold example commands that are tokenized and validated at
load time (think of these as unit tests).

## Policy shapes
- Prefix rules use Starlark syntax:
```starlark
prefix_rule(
    pattern = ["cmd", ["alt1", "alt2"]], # ordered tokens; list entries denote alternatives
    decision = "prompt",                # allow | prompt | forbidden; defaults to allow
    match = [["cmd", "alt1"]],          # examples that must match this rule (enforced at compile time)
    not_match = [["cmd", "oops"]],      # examples that must not match this rule (enforced at compile time)
)
```

## Response shapes
- Match:

```json
{
  "match": {
    "decision": "allow|prompt|forbidden",
    "matchedRules": [
      {
        "prefixRuleMatch": {
          "matchedPrefix": ["<token>", "..."],
          "decision": "allow|prompt|forbidden"
        }
      }
    ]
  }
}
```

- No match:

```json
"noMatch"
```

- `matchedRules` lists every rule whose prefix matched the command;
`matchedPrefix` is the exact prefix that matched.
- The effective `decision` is the strictest severity across all matches
(`forbidden` > `prompt` > `allow`).

---------

Co-authored-by: Michael Bolin <mbolin@openai.com>
2025-11-17 10:15:45 -08:00
jif-oai
2c665fb1dd nit: personal git ignore (#6787) 2025-11-17 17:45:52 +00:00
149 changed files with 7963 additions and 28310 deletions

View File

@@ -1,3 +1,3 @@
model = "gpt-5"
model = "gpt-5.1"
# Consider setting [mcp_servers] here!

View File

@@ -46,7 +46,7 @@ jobs:
with:
openai-api-key: ${{ secrets.CODEX_OPENAI_API_KEY }}
allow-users: "*"
model: gpt-5
model: gpt-5.1
prompt: |
You are an assistant that triages new GitHub issues by identifying potential duplicates.

3
.gitignore vendored
View File

@@ -64,6 +64,9 @@ apply_patch/
# coverage
coverage/
# personal files
personal/
# os
.DS_Store
Thumbs.db

36
codex-rs/Cargo.lock generated
View File

@@ -874,7 +874,6 @@ dependencies = [
"clap",
"codex-protocol",
"mcp-types",
"paste",
"pretty_assertions",
"schemars 0.8.22",
"serde",
@@ -1061,6 +1060,8 @@ dependencies = [
"clap",
"codex-app-server-protocol",
"codex-core",
"codex-lmstudio",
"codex-ollama",
"codex-protocol",
"once_cell",
"serde",
@@ -1084,6 +1085,7 @@ dependencies = [
"codex-apply-patch",
"codex-arg0",
"codex-async-utils",
"codex-execpolicy2",
"codex-file-search",
"codex-git",
"codex-keyring-store",
@@ -1159,7 +1161,6 @@ dependencies = [
"codex-arg0",
"codex-common",
"codex-core",
"codex-ollama",
"codex-protocol",
"core_test_support",
"libc",
@@ -1202,6 +1203,21 @@ dependencies = [
"tempfile",
]
[[package]]
name = "codex-execpolicy2"
version = "0.0.0"
dependencies = [
"anyhow",
"clap",
"multimap",
"pretty_assertions",
"serde",
"serde_json",
"shlex",
"starlark",
"thiserror 2.0.17",
]
[[package]]
name = "codex-feedback"
version = "0.0.0"
@@ -1263,6 +1279,19 @@ dependencies = [
"tokio",
]
[[package]]
name = "codex-lmstudio"
version = "0.0.0"
dependencies = [
"codex-core",
"reqwest",
"serde_json",
"tokio",
"tracing",
"which",
"wiremock",
]
[[package]]
name = "codex-login"
version = "0.0.0"
@@ -1455,12 +1484,12 @@ dependencies = [
"codex-ansi-escape",
"codex-app-server-protocol",
"codex-arg0",
"codex-backend-client",
"codex-common",
"codex-core",
"codex-feedback",
"codex-file-search",
"codex-login",
"codex-ollama",
"codex-protocol",
"codex-windows-sandbox",
"color-eyre",
@@ -1483,6 +1512,7 @@ dependencies = [
"ratatui",
"ratatui-macros",
"regex-lite",
"reqwest",
"serde",
"serde_json",
"serial_test",

View File

@@ -17,9 +17,11 @@ members = [
"core",
"exec",
"execpolicy",
"execpolicy2",
"keyring-store",
"file-search",
"linux-sandbox",
"lmstudio",
"login",
"mcp-server",
"mcp-types",
@@ -64,11 +66,13 @@ codex-chatgpt = { path = "chatgpt" }
codex-common = { path = "common" }
codex-core = { path = "core" }
codex-exec = { path = "exec" }
codex-execpolicy2 = { path = "execpolicy2" }
codex-feedback = { path = "feedback" }
codex-file-search = { path = "file-search" }
codex-git = { path = "utils/git" }
codex-keyring-store = { path = "keyring-store" }
codex-linux-sandbox = { path = "linux-sandbox" }
codex-lmstudio = { path = "lmstudio" }
codex-login = { path = "login" }
codex-mcp-server = { path = "mcp-server" }
codex-ollama = { path = "ollama" }
@@ -150,7 +154,6 @@ opentelemetry-semantic-conventions = "0.30.0"
opentelemetry_sdk = "0.30.0"
os_info = "3.12.0"
owo-colors = "4.2.0"
paste = "1.0.15"
path-absolutize = "3.1.1"
pathdiff = "0.2"
portable-pty = "0.9.0"

View File

@@ -15,7 +15,6 @@ anyhow = { workspace = true }
clap = { workspace = true, features = ["derive"] }
codex-protocol = { workspace = true }
mcp-types = { workspace = true }
paste = { workspace = true }
schemars = { workspace = true }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }

View File

@@ -1,6 +1,4 @@
use std::collections::HashMap;
use std::path::Path;
use std::path::PathBuf;
use crate::JSONRPCNotification;
use crate::JSONRPCRequest;
@@ -9,12 +7,6 @@ use crate::export::GeneratedSchema;
use crate::export::write_json_schema;
use crate::protocol::v1;
use crate::protocol::v2;
use codex_protocol::ConversationId;
use codex_protocol::parse_command::ParsedCommand;
use codex_protocol::protocol::FileChange;
use codex_protocol::protocol::ReviewDecision;
use codex_protocol::protocol::SandboxCommandAssessment;
use paste::paste;
use schemars::JsonSchema;
use serde::Deserialize;
use serde::Serialize;
@@ -277,34 +269,36 @@ macro_rules! server_request_definitions {
(
$(
$(#[$variant_meta:meta])*
$variant:ident
$variant:ident $(=> $wire:literal)? {
params: $params:ty,
response: $response:ty,
}
),* $(,)?
) => {
paste! {
/// Request initiated from the server and sent to the client.
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(tag = "method", rename_all = "camelCase")]
pub enum ServerRequest {
$(
$(#[$variant_meta])*
$variant {
#[serde(rename = "id")]
request_id: RequestId,
params: [<$variant Params>],
},
)*
}
/// Request initiated from the server and sent to the client.
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(tag = "method", rename_all = "camelCase")]
pub enum ServerRequest {
$(
$(#[$variant_meta])*
$(#[serde(rename = $wire)] #[ts(rename = $wire)])?
$variant {
#[serde(rename = "id")]
request_id: RequestId,
params: $params,
},
)*
}
#[derive(Debug, Clone, PartialEq, JsonSchema)]
pub enum ServerRequestPayload {
$( $variant([<$variant Params>]), )*
}
#[derive(Debug, Clone, PartialEq, JsonSchema)]
pub enum ServerRequestPayload {
$( $variant($params), )*
}
impl ServerRequestPayload {
pub fn request_with_id(self, request_id: RequestId) -> ServerRequest {
match self {
$(Self::$variant(params) => ServerRequest::$variant { request_id, params },)*
}
impl ServerRequestPayload {
pub fn request_with_id(self, request_id: RequestId) -> ServerRequest {
match self {
$(Self::$variant(params) => ServerRequest::$variant { request_id, params },)*
}
}
}
@@ -312,9 +306,9 @@ macro_rules! server_request_definitions {
pub fn export_server_responses(
out_dir: &::std::path::Path,
) -> ::std::result::Result<(), ::ts_rs::ExportError> {
paste! {
$(<[<$variant Response>] as ::ts_rs::TS>::export_all_to(out_dir)?;)*
}
$(
<$response as ::ts_rs::TS>::export_all_to(out_dir)?;
)*
Ok(())
}
@@ -323,9 +317,12 @@ macro_rules! server_request_definitions {
out_dir: &Path,
) -> ::anyhow::Result<Vec<GeneratedSchema>> {
let mut schemas = Vec::new();
paste! {
$(schemas.push(crate::export::write_json_schema::<[<$variant Response>]>(out_dir, stringify!([<$variant Response>]))?);)*
}
$(
schemas.push(crate::export::write_json_schema::<$response>(
out_dir,
concat!(stringify!($variant), "Response"),
)?);
)*
Ok(schemas)
}
@@ -334,9 +331,12 @@ macro_rules! server_request_definitions {
out_dir: &Path,
) -> ::anyhow::Result<Vec<GeneratedSchema>> {
let mut schemas = Vec::new();
paste! {
$(schemas.push(crate::export::write_json_schema::<[<$variant Params>]>(out_dir, stringify!([<$variant Params>]))?);)*
}
$(
schemas.push(crate::export::write_json_schema::<$params>(
out_dir,
concat!(stringify!($variant), "Params"),
)?);
)*
Ok(schemas)
}
};
@@ -426,49 +426,27 @@ impl TryFrom<JSONRPCRequest> for ServerRequest {
}
server_request_definitions! {
/// NEW APIs
/// Sent when approval is requested for a specific command execution.
/// This request is used for Turns started via turn/start.
CommandExecutionRequestApproval => "item/commandExecution/requestApproval" {
params: v2::CommandExecutionRequestApprovalParams,
response: v2::CommandExecutionRequestApprovalResponse,
},
/// DEPRECATED APIs below
/// Request to approve a patch.
ApplyPatchApproval,
/// This request is used for Turns started via the legacy APIs (i.e. SendUserTurn, SendUserMessage).
ApplyPatchApproval {
params: v1::ApplyPatchApprovalParams,
response: v1::ApplyPatchApprovalResponse,
},
/// Request to exec a command.
ExecCommandApproval,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
pub struct ApplyPatchApprovalParams {
pub conversation_id: ConversationId,
/// Use to correlate this with [codex_core::protocol::PatchApplyBeginEvent]
/// and [codex_core::protocol::PatchApplyEndEvent].
pub call_id: String,
pub file_changes: HashMap<PathBuf, FileChange>,
/// Optional explanatory reason (e.g. request for extra write access).
pub reason: Option<String>,
/// When set, the agent is asking the user to allow writes under this root
/// for the remainder of the session (unclear if this is honored today).
pub grant_root: Option<PathBuf>,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
pub struct ExecCommandApprovalParams {
pub conversation_id: ConversationId,
/// Use to correlate this with [codex_core::protocol::ExecCommandBeginEvent]
/// and [codex_core::protocol::ExecCommandEndEvent].
pub call_id: String,
pub command: Vec<String>,
pub cwd: PathBuf,
pub reason: Option<String>,
pub risk: Option<SandboxCommandAssessment>,
pub parsed_cmd: Vec<ParsedCommand>,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
pub struct ExecCommandApprovalResponse {
pub decision: ReviewDecision,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
pub struct ApplyPatchApprovalResponse {
pub decision: ReviewDecision,
/// This request is used for Turns started via the legacy APIs (i.e. SendUserTurn, SendUserMessage).
ExecCommandApproval {
params: v1::ExecCommandApprovalParams,
response: v1::ExecCommandApprovalResponse,
},
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
@@ -533,17 +511,20 @@ client_notification_definitions! {
mod tests {
use super::*;
use anyhow::Result;
use codex_protocol::ConversationId;
use codex_protocol::account::PlanType;
use codex_protocol::parse_command::ParsedCommand;
use codex_protocol::protocol::AskForApproval;
use pretty_assertions::assert_eq;
use serde_json::json;
use std::path::PathBuf;
#[test]
fn serialize_new_conversation() -> Result<()> {
let request = ClientRequest::NewConversation {
request_id: RequestId::Integer(42),
params: v1::NewConversationParams {
model: Some("gpt-5-codex".to_string()),
model: Some("gpt-5.1-codex".to_string()),
model_provider: None,
profile: None,
cwd: None,
@@ -561,7 +542,7 @@ mod tests {
"method": "newConversation",
"id": 42,
"params": {
"model": "gpt-5-codex",
"model": "gpt-5.1-codex",
"modelProvider": null,
"profile": null,
"cwd": null,
@@ -616,7 +597,7 @@ mod tests {
#[test]
fn serialize_server_request() -> Result<()> {
let conversation_id = ConversationId::from_string("67e55044-10b1-426f-9247-bb680e5fe0c8")?;
let params = ExecCommandApprovalParams {
let params = v1::ExecCommandApprovalParams {
conversation_id,
call_id: "call-42".to_string(),
command: vec!["echo".to_string(), "hello".to_string()],

View File

@@ -8,8 +8,12 @@ use codex_protocol::config_types::ReasoningSummary;
use codex_protocol::config_types::SandboxMode;
use codex_protocol::config_types::Verbosity;
use codex_protocol::models::ResponseItem;
use codex_protocol::parse_command::ParsedCommand;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::FileChange;
use codex_protocol::protocol::ReviewDecision;
use codex_protocol::protocol::SandboxCommandAssessment;
use codex_protocol::protocol::SandboxPolicy;
use codex_protocol::protocol::SessionSource;
use codex_protocol::protocol::TurnAbortReason;
@@ -191,6 +195,46 @@ pub struct GitDiffToRemoteResponse {
pub diff: String,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
pub struct ApplyPatchApprovalParams {
pub conversation_id: ConversationId,
/// Use to correlate this with [codex_core::protocol::PatchApplyBeginEvent]
/// and [codex_core::protocol::PatchApplyEndEvent].
pub call_id: String,
pub file_changes: HashMap<PathBuf, FileChange>,
/// Optional explanatory reason (e.g. request for extra write access).
pub reason: Option<String>,
/// When set, the agent is asking the user to allow writes under this root
/// for the remainder of the session (unclear if this is honored today).
pub grant_root: Option<PathBuf>,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
pub struct ApplyPatchApprovalResponse {
pub decision: ReviewDecision,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
pub struct ExecCommandApprovalParams {
pub conversation_id: ConversationId,
/// Use to correlate this with [codex_core::protocol::ExecCommandBeginEvent]
/// and [codex_core::protocol::ExecCommandEndEvent].
pub call_id: String,
pub command: Vec<String>,
pub cwd: PathBuf,
pub reason: Option<String>,
pub risk: Option<SandboxCommandAssessment>,
pub parsed_cmd: Vec<ParsedCommand>,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
pub struct ExecCommandApprovalResponse {
pub decision: ReviewDecision,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
pub struct CancelLoginChatGptParams {

View File

@@ -4,11 +4,13 @@ use std::path::PathBuf;
use crate::protocol::common::AuthMode;
use codex_protocol::ConversationId;
use codex_protocol::account::PlanType;
use codex_protocol::approvals::SandboxCommandAssessment as CoreSandboxCommandAssessment;
use codex_protocol::config_types::ReasoningEffort;
use codex_protocol::config_types::ReasoningSummary;
use codex_protocol::items::AgentMessageContent as CoreAgentMessageContent;
use codex_protocol::items::TurnItem as CoreTurnItem;
use codex_protocol::models::ResponseItem;
use codex_protocol::parse_command::ParsedCommand as CoreParsedCommand;
use codex_protocol::protocol::RateLimitSnapshot as CoreRateLimitSnapshot;
use codex_protocol::protocol::RateLimitWindow as CoreRateLimitWindow;
use codex_protocol::user_input::UserInput as CoreUserInput;
@@ -20,7 +22,7 @@ use serde_json::Value as JsonValue;
use ts_rs::TS;
// Macro to declare a camelCased API v2 enum mirroring a core enum which
// tends to use kebab-case.
// tends to use either snake_case or kebab-case.
macro_rules! v2_enum_from_core {
(
pub enum $Name:ident from $Src:path { $( $Variant:ident ),+ $(,)? }
@@ -56,6 +58,23 @@ v2_enum_from_core!(
}
);
v2_enum_from_core!(
pub enum CommandRiskLevel from codex_protocol::approvals::SandboxRiskLevel {
Low,
Medium,
High
}
);
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]
pub enum ApprovalDecision {
Accept,
Decline,
Cancel,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, JsonSchema, TS)]
#[serde(tag = "type", rename_all = "camelCase")]
#[ts(tag = "type")]
@@ -63,6 +82,8 @@ v2_enum_from_core!(
pub enum SandboxPolicy {
DangerFullAccess,
ReadOnly,
#[serde(rename_all = "camelCase")]
#[ts(rename_all = "camelCase")]
WorkspaceWrite {
#[serde(default)]
writable_roots: Vec<PathBuf>,
@@ -119,6 +140,98 @@ impl From<codex_protocol::protocol::SandboxPolicy> for SandboxPolicy {
}
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]
pub struct SandboxCommandAssessment {
pub description: String,
pub risk_level: CommandRiskLevel,
}
impl SandboxCommandAssessment {
pub fn into_core(self) -> CoreSandboxCommandAssessment {
CoreSandboxCommandAssessment {
description: self.description,
risk_level: self.risk_level.to_core(),
}
}
}
impl From<CoreSandboxCommandAssessment> for SandboxCommandAssessment {
fn from(value: CoreSandboxCommandAssessment) -> Self {
Self {
description: value.description,
risk_level: CommandRiskLevel::from(value.risk_level),
}
}
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(tag = "type", rename_all = "camelCase")]
#[ts(tag = "type")]
#[ts(export_to = "v2/")]
pub enum CommandAction {
Read {
command: String,
name: String,
path: PathBuf,
},
ListFiles {
command: String,
path: Option<String>,
},
Search {
command: String,
query: Option<String>,
path: Option<String>,
},
Unknown {
command: String,
},
}
impl CommandAction {
pub fn into_core(self) -> CoreParsedCommand {
match self {
CommandAction::Read {
command: cmd,
name,
path,
} => CoreParsedCommand::Read { cmd, name, path },
CommandAction::ListFiles { command: cmd, path } => {
CoreParsedCommand::ListFiles { cmd, path }
}
CommandAction::Search {
command: cmd,
query,
path,
} => CoreParsedCommand::Search { cmd, query, path },
CommandAction::Unknown { command: cmd } => CoreParsedCommand::Unknown { cmd },
}
}
}
impl From<CoreParsedCommand> for CommandAction {
fn from(value: CoreParsedCommand) -> Self {
match value {
CoreParsedCommand::Read { cmd, name, path } => CommandAction::Read {
command: cmd,
name,
path,
},
CoreParsedCommand::ListFiles { cmd, path } => {
CommandAction::ListFiles { command: cmd, path }
}
CoreParsedCommand::Search { cmd, query, path } => CommandAction::Search {
command: cmd,
query,
path,
},
CoreParsedCommand::Unknown { cmd } => CommandAction::Unknown { command: cmd },
}
}
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(tag = "type", rename_all = "camelCase")]
#[ts(tag = "type")]
@@ -279,7 +392,7 @@ pub struct ThreadStartParams {
pub cwd: Option<String>,
pub approval_policy: Option<AskForApproval>,
pub sandbox: Option<SandboxMode>,
pub config: Option<HashMap<String, serde_json::Value>>,
pub config: Option<HashMap<String, JsonValue>>,
pub base_instructions: Option<String>,
pub developer_instructions: Option<String>,
}
@@ -506,14 +619,14 @@ impl From<CoreUserInput> for UserInput {
#[ts(tag = "type")]
#[ts(export_to = "v2/")]
pub enum ThreadItem {
UserMessage {
id: String,
content: Vec<UserInput>,
},
AgentMessage {
id: String,
text: String,
},
#[serde(rename_all = "camelCase")]
#[ts(rename_all = "camelCase")]
UserMessage { id: String, content: Vec<UserInput> },
#[serde(rename_all = "camelCase")]
#[ts(rename_all = "camelCase")]
AgentMessage { id: String, text: String },
#[serde(rename_all = "camelCase")]
#[ts(rename_all = "camelCase")]
Reasoning {
id: String,
#[serde(default)]
@@ -521,19 +634,35 @@ pub enum ThreadItem {
#[serde(default)]
content: Vec<String>,
},
#[serde(rename_all = "camelCase")]
#[ts(rename_all = "camelCase")]
CommandExecution {
id: String,
/// The command to be executed.
command: String,
aggregated_output: String,
exit_code: Option<i32>,
/// The command's working directory.
cwd: PathBuf,
status: CommandExecutionStatus,
/// A best-effort parsing of the command to understand the action(s) it will perform.
/// This returns a list of CommandAction objects because a single shell command may
/// be composed of many commands piped together.
command_actions: Vec<CommandAction>,
/// The command's output, aggregated from stdout and stderr.
aggregated_output: Option<String>,
/// The command's exit code.
exit_code: Option<i32>,
/// The duration of the command execution in milliseconds.
duration_ms: Option<i64>,
},
#[serde(rename_all = "camelCase")]
#[ts(rename_all = "camelCase")]
FileChange {
id: String,
changes: Vec<FileUpdateChange>,
status: PatchApplyStatus,
},
#[serde(rename_all = "camelCase")]
#[ts(rename_all = "camelCase")]
McpToolCall {
id: String,
server: String,
@@ -543,22 +672,18 @@ pub enum ThreadItem {
result: Option<McpToolCallResult>,
error: Option<McpToolCallError>,
},
WebSearch {
id: String,
query: String,
},
TodoList {
id: String,
items: Vec<TodoItem>,
},
ImageView {
id: String,
path: String,
},
CodeReview {
id: String,
review: String,
},
#[serde(rename_all = "camelCase")]
#[ts(rename_all = "camelCase")]
WebSearch { id: String, query: String },
#[serde(rename_all = "camelCase")]
#[ts(rename_all = "camelCase")]
TodoList { id: String, items: Vec<TodoItem> },
#[serde(rename_all = "camelCase")]
#[ts(rename_all = "camelCase")]
ImageView { id: String, path: String },
#[serde(rename_all = "camelCase")]
#[ts(rename_all = "camelCase")]
CodeReview { id: String, review: String },
}
impl From<CoreTurnItem> for ThreadItem {
@@ -758,6 +883,39 @@ pub struct McpToolCallProgressNotification {
pub message: String,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]
pub struct CommandExecutionRequestApprovalParams {
pub thread_id: String,
pub turn_id: String,
pub item_id: String,
/// Optional explanatory reason (e.g. request for network access).
pub reason: Option<String>,
/// Optional model-provided risk assessment describing the blocked command.
pub risk: Option<SandboxCommandAssessment>,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]
pub struct CommandExecutionRequestAcceptSettings {
/// If true, automatically approve this command for the duration of the session.
#[serde(default)]
pub for_session: bool,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]
pub struct CommandExecutionRequestApprovalResponse {
pub decision: ApprovalDecision,
/// Optional approval settings for when the decision is `accept`.
/// Ignored if the decision is `decline` or `cancel`.
#[serde(default)]
pub accept_settings: Option<CommandExecutionRequestAcceptSettings>,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]

View File

@@ -2,6 +2,16 @@
`codex app-server` is the interface Codex uses to power rich interfaces such as the [Codex VS Code extension](https://marketplace.visualstudio.com/items?itemName=openai.chatgpt). The message schema is currently unstable, but those who wish to build experimental UIs on top of Codex may find it valuable.
## Table of Contents
- [Protocol](#protocol)
- [Message Schema](#message-schema)
- [Lifecycle Overview](#lifecycle-overview)
- [Initialization](#initialization)
- [Core primitives](#core-primitives)
- [Thread & turn endpoints](#thread--turn-endpoints)
- [Auth endpoints](#auth-endpoints)
- [Events (work-in-progress)](#v2-streaming-events-work-in-progress)
## Protocol
Similar to [MCP](https://modelcontextprotocol.io/), `codex app-server` supports bidirectional communication, streaming JSONL over stdio. The protocol is JSON-RPC 2.0, though the `"jsonrpc":"2.0"` header is omitted.
@@ -15,6 +25,14 @@ codex app-server generate-ts --out DIR
codex app-server generate-json-schema --out DIR
```
## Lifecycle Overview
- Initialize once: Immediately after launching the codex app-server process, send an `initialize` request with your client metadata, then emit an `initialized` notification. Any other request before this handshake gets rejected.
- Start (or resume) a thread: Call `thread/start` to open a fresh conversation. The response returns the thread object and youll also get a `thread/started` notification. If youre continuing an existing conversation, call `thread/resume` with its ID instead.
- Begin a turn: To send user input, call `turn/start` with the target `threadId` and the user's input. Optional fields let you override model, cwd, sandbox policy, etc. This immediately returns the new turn object and triggers a `turn/started` notification.
- Stream events: After `turn/start`, keep reading JSON-RPC notifications on stdout. Youll see `item/started`, `item/completed`, deltas like `item/agentMessage/delta`, tool progress, etc. These represent streaming model output plus any side effects (commands, tool calls, reasoning notes).
- Finish the turn: When the model is done (or the turn is interrupted via making the `turn/interrupt` call), the server sends `turn/completed` with the final turn state and token usage.
## Initialization
Clients must send a single `initialize` request before invoking any other method, then acknowledge with an `initialized` notification. The server returns the user agent string it will present to upstream services; subsequent requests issued before initialization receive a `"Not initialized"` error, and repeated `initialize` calls receive an `"Already initialized"` error.
@@ -56,7 +74,7 @@ Start a fresh thread when you need a new Codex conversation.
{ "method": "thread/start", "id": 10, "params": {
// Optionally set config settings. If not specified, will use the user's
// current config settings.
"model": "gpt-5-codex",
"model": "gpt-5.1-codex",
"cwd": "/Users/me/project",
"approvalPolicy": "never",
"sandbox": "workspaceWrite",
@@ -137,7 +155,7 @@ You can optionally specify config overrides on the new turn. If specified, these
"writableRoots": ["/Users/me/project"],
"networkAccess": true
},
"model": "gpt-5-codex",
"model": "gpt-5.1-codex",
"effort": "medium",
"summary": "concise"
} }
@@ -258,3 +276,33 @@ Field notes:
- `codex app-server generate-ts --out <dir>` emits v2 types under `v2/`.
- `codex app-server generate-json-schema --out <dir>` outputs `codex_app_server_protocol.schemas.json`.
- See [“Authentication and authorization” in the config docs](../../docs/config.md#authentication-and-authorization) for configuration knobs.
## Events (work-in-progress)
Event notifications are the server-initiated event stream for thread lifecycles, turn lifecycles, and the items within them. After you start or resume a thread, keep reading stdout for `thread/started`, `turn/*`, and `item/*` notifications.
### Turn events
The app-server streams JSON-RPC notifications while a turn is running. Each turn starts with `turn/started` (initial `turn`) and ends with `turn/completed` (final `turn` plus token `usage`), and clients subscribe to the events they care about, rendering each item incrementally as updates arrive. The per-item lifecycle is always: `item/started` → zero or more item-specific deltas → `item/completed`.
#### Thread items
`ThreadItem` is the tagged union carried in turn responses and `item/*` notifications. Currently we support events for the following items:
- `userMessage` — `{id, content}` where `content` is a list of user inputs (`text`, `image`, or `localImage`).
- `agentMessage` — `{id, text}` containing the accumulated agent reply.
- `reasoning` — `{id, summary, content}` where `summary` holds streamed reasoning summaries (applicable for most OpenAI models) and `content` holds raw reasoning blocks (applicable for e.g. open source models).
- `mcpToolCall` — `{id, server, tool, status, arguments, result?, error?}` describing MCP calls; `status` is `inProgress`, `completed`, or `failed`.
- `webSearch` — `{id, query}` for a web search request issued by the agent.
All items emit two shared lifecycle events:
- `item/started` — emits the full `item` when a new unit of work begins so the UI can render it immediately; the `item.id` in this payload matches the `itemId` used by deltas.
- `item/completed` — sends the final `item` once that work finishes (e.g., after a tool call or message completes); treat this as the authoritative state.
There are additional item-specific events:
#### agentMessage
- `item/agentMessage/delta` — appends streamed text for the agent message; concatenate `delta` values for the same `itemId` in order to reconstruct the full reply.
#### reasoning
- `item/reasoning/summaryTextDelta` — streams readable reasoning summaries; `summaryIndex` increments when a new summary section opens.
- `item/reasoning/summaryPartAdded` — marks the boundary between reasoning summary sections for an `itemId`; subsequent `summaryTextDelta` entries share the same `summaryIndex`.
- `item/reasoning/textDelta` — streams raw reasoning text (only applicable for e.g. open source models); use `contentIndex` to group deltas that belong together before showing them in the UI.

View File

@@ -5,6 +5,12 @@ use codex_app_server_protocol::AccountRateLimitsUpdatedNotification;
use codex_app_server_protocol::AgentMessageDeltaNotification;
use codex_app_server_protocol::ApplyPatchApprovalParams;
use codex_app_server_protocol::ApplyPatchApprovalResponse;
use codex_app_server_protocol::ApprovalDecision;
use codex_app_server_protocol::CommandAction as V2ParsedCommand;
use codex_app_server_protocol::CommandExecutionOutputDeltaNotification;
use codex_app_server_protocol::CommandExecutionRequestApprovalParams;
use codex_app_server_protocol::CommandExecutionRequestApprovalResponse;
use codex_app_server_protocol::CommandExecutionStatus;
use codex_app_server_protocol::ExecCommandApprovalParams;
use codex_app_server_protocol::ExecCommandApprovalResponse;
use codex_app_server_protocol::InterruptConversationResponse;
@@ -16,25 +22,29 @@ use codex_app_server_protocol::McpToolCallStatus;
use codex_app_server_protocol::ReasoningSummaryPartAddedNotification;
use codex_app_server_protocol::ReasoningSummaryTextDeltaNotification;
use codex_app_server_protocol::ReasoningTextDeltaNotification;
use codex_app_server_protocol::SandboxCommandAssessment as V2SandboxCommandAssessment;
use codex_app_server_protocol::ServerNotification;
use codex_app_server_protocol::ServerRequestPayload;
use codex_app_server_protocol::ThreadItem;
use codex_app_server_protocol::TurnInterruptResponse;
use codex_core::CodexConversation;
use codex_core::parse_command::shlex_join;
use codex_core::protocol::ApplyPatchApprovalRequestEvent;
use codex_core::protocol::Event;
use codex_core::protocol::EventMsg;
use codex_core::protocol::ExecApprovalRequestEvent;
use codex_core::protocol::ExecCommandEndEvent;
use codex_core::protocol::McpToolCallBeginEvent;
use codex_core::protocol::McpToolCallEndEvent;
use codex_core::protocol::Op;
use codex_core::protocol::ReviewDecision;
use codex_protocol::ConversationId;
use std::convert::TryFrom;
use std::sync::Arc;
use tokio::sync::oneshot;
use tracing::error;
type JsonRpcResult = serde_json::Value;
type JsonValue = serde_json::Value;
pub(crate) async fn apply_bespoke_event_handling(
event: Event,
@@ -42,6 +52,7 @@ pub(crate) async fn apply_bespoke_event_handling(
conversation: Arc<CodexConversation>,
outgoing: Arc<OutgoingMessageSender>,
pending_interrupts: PendingInterrupts,
api_version: ApiVersion,
) {
let Event { id: event_id, msg } = event;
match msg {
@@ -61,11 +72,57 @@ pub(crate) async fn apply_bespoke_event_handling(
let rx = outgoing
.send_request(ServerRequestPayload::ApplyPatchApproval(params))
.await;
// TODO(mbolin): Enforce a timeout so this task does not live indefinitely?
tokio::spawn(async move {
on_patch_approval_response(event_id, rx, conversation).await;
});
}
EventMsg::ExecApprovalRequest(ExecApprovalRequestEvent {
call_id,
turn_id,
command,
cwd,
reason,
risk,
parsed_cmd,
}) => match api_version {
ApiVersion::V1 => {
let params = ExecCommandApprovalParams {
conversation_id,
call_id,
command,
cwd,
reason,
risk,
parsed_cmd,
};
let rx = outgoing
.send_request(ServerRequestPayload::ExecCommandApproval(params))
.await;
tokio::spawn(async move {
on_exec_approval_response(event_id, rx, conversation).await;
});
}
ApiVersion::V2 => {
let params = CommandExecutionRequestApprovalParams {
thread_id: conversation_id.to_string(),
turn_id: turn_id.clone(),
// Until we migrate the core to be aware of a first class CommandExecutionItem
// and emit the corresponding EventMsg, we repurpose the call_id as the item_id.
item_id: call_id.clone(),
reason,
risk: risk.map(V2SandboxCommandAssessment::from),
};
let rx = outgoing
.send_request(ServerRequestPayload::CommandExecutionRequestApproval(
params,
))
.await;
tokio::spawn(async move {
on_command_execution_request_approval_response(event_id, rx, conversation)
.await;
});
}
},
// TODO(celia): properly construct McpToolCall TurnItem in core.
EventMsg::McpToolCallBegin(begin_event) => {
let notification = construct_mcp_tool_call_notification(begin_event).await;
@@ -121,32 +178,6 @@ pub(crate) async fn apply_bespoke_event_handling(
))
.await;
}
EventMsg::ExecApprovalRequest(ExecApprovalRequestEvent {
call_id,
command,
cwd,
reason,
risk,
parsed_cmd,
}) => {
let params = ExecCommandApprovalParams {
conversation_id,
call_id,
command,
cwd,
reason,
risk,
parsed_cmd,
};
let rx = outgoing
.send_request(ServerRequestPayload::ExecCommandApproval(params))
.await;
// TODO(mbolin): Enforce a timeout so this task does not live indefinitely?
tokio::spawn(async move {
on_exec_approval_response(event_id, rx, conversation).await;
});
}
EventMsg::TokenCount(token_count_event) => {
if let Some(rate_limits) = token_count_event.rate_limits {
outgoing
@@ -172,6 +203,79 @@ pub(crate) async fn apply_bespoke_event_handling(
.send_server_notification(ServerNotification::ItemCompleted(notification))
.await;
}
EventMsg::ExecCommandBegin(exec_command_begin_event) => {
let item = ThreadItem::CommandExecution {
id: exec_command_begin_event.call_id.clone(),
command: shlex_join(&exec_command_begin_event.command),
cwd: exec_command_begin_event.cwd,
status: CommandExecutionStatus::InProgress,
command_actions: exec_command_begin_event
.parsed_cmd
.into_iter()
.map(V2ParsedCommand::from)
.collect(),
aggregated_output: None,
exit_code: None,
duration_ms: None,
};
let notification = ItemStartedNotification { item };
outgoing
.send_server_notification(ServerNotification::ItemStarted(notification))
.await;
}
EventMsg::ExecCommandOutputDelta(exec_command_output_delta_event) => {
let notification = CommandExecutionOutputDeltaNotification {
item_id: exec_command_output_delta_event.call_id.clone(),
delta: String::from_utf8_lossy(&exec_command_output_delta_event.chunk).to_string(),
};
outgoing
.send_server_notification(ServerNotification::CommandExecutionOutputDelta(
notification,
))
.await;
}
EventMsg::ExecCommandEnd(exec_command_end_event) => {
let ExecCommandEndEvent {
call_id,
command,
cwd,
parsed_cmd,
aggregated_output,
exit_code,
duration,
..
} = exec_command_end_event;
let status = if exit_code == 0 {
CommandExecutionStatus::Completed
} else {
CommandExecutionStatus::Failed
};
let aggregated_output = if aggregated_output.is_empty() {
None
} else {
Some(aggregated_output)
};
let duration_ms = i64::try_from(duration.as_millis()).unwrap_or(i64::MAX);
let item = ThreadItem::CommandExecution {
id: call_id,
command: shlex_join(&command),
cwd,
status,
command_actions: parsed_cmd.into_iter().map(V2ParsedCommand::from).collect(),
aggregated_output,
exit_code: Some(exit_code),
duration_ms: Some(duration_ms),
};
let notification = ItemCompletedNotification { item };
outgoing
.send_server_notification(ServerNotification::ItemCompleted(notification))
.await;
}
// If this is a TurnAborted, reply to any pending interrupt requests.
EventMsg::TurnAborted(turn_aborted_event) => {
let pending = {
@@ -202,7 +306,7 @@ pub(crate) async fn apply_bespoke_event_handling(
async fn on_patch_approval_response(
event_id: String,
receiver: oneshot::Receiver<JsonRpcResult>,
receiver: oneshot::Receiver<JsonValue>,
codex: Arc<CodexConversation>,
) {
let response = receiver.await;
@@ -244,7 +348,7 @@ async fn on_patch_approval_response(
async fn on_exec_approval_response(
event_id: String,
receiver: oneshot::Receiver<JsonRpcResult>,
receiver: oneshot::Receiver<JsonValue>,
conversation: Arc<CodexConversation>,
) {
let response = receiver.await;
@@ -278,6 +382,53 @@ async fn on_exec_approval_response(
}
}
async fn on_command_execution_request_approval_response(
event_id: String,
receiver: oneshot::Receiver<JsonValue>,
conversation: Arc<CodexConversation>,
) {
let response = receiver.await;
let value = match response {
Ok(value) => value,
Err(err) => {
error!("request failed: {err:?}");
return;
}
};
let response = serde_json::from_value::<CommandExecutionRequestApprovalResponse>(value)
.unwrap_or_else(|err| {
error!("failed to deserialize CommandExecutionRequestApprovalResponse: {err}");
CommandExecutionRequestApprovalResponse {
decision: ApprovalDecision::Decline,
accept_settings: None,
}
});
let CommandExecutionRequestApprovalResponse {
decision,
accept_settings,
} = response;
let decision = match (decision, accept_settings) {
(ApprovalDecision::Accept, Some(settings)) if settings.for_session => {
ReviewDecision::ApprovedForSession
}
(ApprovalDecision::Accept, _) => ReviewDecision::Approved,
(ApprovalDecision::Decline, _) => ReviewDecision::Denied,
(ApprovalDecision::Cancel, _) => ReviewDecision::Abort,
};
if let Err(err) = conversation
.submit(Op::ExecApproval {
id: event_id,
decision,
})
.await
{
error!("failed to submit ExecApproval: {err}");
}
}
/// similar to handle_mcp_tool_call_begin in exec
async fn construct_mcp_tool_call_notification(
begin_event: McpToolCallBeginEvent,
@@ -287,10 +438,7 @@ async fn construct_mcp_tool_call_notification(
server: begin_event.invocation.server,
tool: begin_event.invocation.tool,
status: McpToolCallStatus::InProgress,
arguments: begin_event
.invocation
.arguments
.unwrap_or(JsonRpcResult::Null),
arguments: begin_event.invocation.arguments.unwrap_or(JsonValue::Null),
result: None,
error: None,
};
@@ -328,10 +476,7 @@ async fn construct_mcp_tool_call_end_notification(
server: end_event.invocation.server,
tool: end_event.invocation.tool,
status,
arguments: end_event
.invocation
.arguments
.unwrap_or(JsonRpcResult::Null),
arguments: end_event.invocation.arguments.unwrap_or(JsonValue::Null),
result,
error,
};

View File

@@ -101,12 +101,12 @@ use codex_core::RolloutRecorder;
use codex_core::SessionMeta;
use codex_core::auth::CLIENT_ID;
use codex_core::auth::login_with_api_key;
use codex_core::client::http::get_codex_user_agent;
use codex_core::config::Config;
use codex_core::config::ConfigOverrides;
use codex_core::config::ConfigToml;
use codex_core::config::edit::ConfigEditsBuilder;
use codex_core::config_loader::load_config_as_toml;
use codex_core::default_client::get_codex_user_agent;
use codex_core::exec::ExecParams;
use codex_core::exec_env::create_env;
use codex_core::find_conversation_path_by_id_str;
@@ -1245,7 +1245,7 @@ impl CodexMessageProcessor {
// Auto-attach a conversation listener when starting a thread.
// Use the same behavior as the v1 API with experimental_raw_events=false.
if let Err(err) = self
.attach_conversation_listener(conversation_id, false)
.attach_conversation_listener(conversation_id, false, ApiVersion::V2)
.await
{
tracing::warn!(
@@ -1523,7 +1523,7 @@ impl CodexMessageProcessor {
}) => {
// Auto-attach a conversation listener when resuming a thread.
if let Err(err) = self
.attach_conversation_listener(conversation_id, false)
.attach_conversation_listener(conversation_id, false, ApiVersion::V2)
.await
{
tracing::warn!(
@@ -2376,7 +2376,7 @@ impl CodexMessageProcessor {
experimental_raw_events,
} = params;
match self
.attach_conversation_listener(conversation_id, experimental_raw_events)
.attach_conversation_listener(conversation_id, experimental_raw_events, ApiVersion::V1)
.await
{
Ok(subscription_id) => {
@@ -2417,6 +2417,7 @@ impl CodexMessageProcessor {
&mut self,
conversation_id: ConversationId,
experimental_raw_events: bool,
api_version: ApiVersion,
) -> Result<Uuid, JSONRPCErrorError> {
let conversation = match self
.conversation_manager
@@ -2440,6 +2441,7 @@ impl CodexMessageProcessor {
let outgoing_for_task = self.outgoing.clone();
let pending_interrupts = self.pending_interrupts.clone();
let api_version_for_task = api_version;
tokio::spawn(async move {
loop {
tokio::select! {
@@ -2495,6 +2497,7 @@ impl CodexMessageProcessor {
conversation.clone(),
outgoing_for_task.clone(),
pending_interrupts.clone(),
api_version_for_task,
)
.await;
}

View File

@@ -14,9 +14,9 @@ use codex_app_server_protocol::JSONRPCRequest;
use codex_app_server_protocol::JSONRPCResponse;
use codex_core::AuthManager;
use codex_core::ConversationManager;
use codex_core::client::http::USER_AGENT_SUFFIX;
use codex_core::client::http::get_codex_user_agent;
use codex_core::config::Config;
use codex_core::default_client::USER_AGENT_SUFFIX;
use codex_core::default_client::get_codex_user_agent;
use codex_feedback::CodexFeedback;
use codex_protocol::protocol::SessionSource;
use std::sync::Arc;

View File

@@ -27,7 +27,7 @@ fn create_config_toml(codex_home: &Path) -> std::io::Result<()> {
std::fs::write(
config_toml,
r#"
model = "gpt-5-codex"
model = "gpt-5.1-codex"
approval_policy = "on-request"
sandbox_mode = "workspace-write"
model_reasoning_summary = "detailed"
@@ -87,7 +87,7 @@ async fn get_config_toml_parses_all_fields() -> Result<()> {
}),
forced_chatgpt_workspace_id: Some("12345678-0000-0000-0000-000000000000".into()),
forced_login_method: Some(ForcedLoginMethod::Chatgpt),
model: Some("gpt-5-codex".into()),
model: Some("gpt-5.1-codex".into()),
model_reasoning_effort: Some(ReasoningEffort::High),
model_reasoning_summary: Some(ReasoningSummary::Detailed),
model_verbosity: Some(Verbosity::Medium),

View File

@@ -57,7 +57,7 @@ fn create_config_toml(codex_home: &Path) -> std::io::Result<()> {
std::fs::write(
config_toml,
r#"
model = "gpt-5-codex"
model = "gpt-5.1-codex"
model_reasoning_effort = "medium"
"#,
)

View File

@@ -27,7 +27,7 @@ async fn thread_resume_returns_original_thread() -> Result<()> {
// Start a thread.
let start_id = mcp
.send_thread_start_request(ThreadStartParams {
model: Some("gpt-5-codex".to_string()),
model: Some("gpt-5.1-codex".to_string()),
..Default::default()
})
.await?;
@@ -68,7 +68,7 @@ async fn thread_resume_prefers_path_over_thread_id() -> Result<()> {
let start_id = mcp
.send_thread_start_request(ThreadStartParams {
model: Some("gpt-5-codex".to_string()),
model: Some("gpt-5.1-codex".to_string()),
..Default::default()
})
.await?;
@@ -112,7 +112,7 @@ async fn thread_resume_supports_history_and_overrides() -> Result<()> {
// Start a thread.
let start_id = mcp
.send_thread_start_request(ThreadStartParams {
model: Some("gpt-5-codex".to_string()),
model: Some("gpt-5.1-codex".to_string()),
..Default::default()
})
.await?;

View File

@@ -29,7 +29,7 @@ async fn thread_start_creates_thread_and_emits_started() -> Result<()> {
// Start a v2 thread with an explicit model override.
let req_id = mcp
.send_thread_start_request(ThreadStartParams {
model: Some("gpt-5".to_string()),
model: Some("gpt-5.1".to_string()),
..Default::default()
})
.await?;

View File

@@ -5,10 +5,13 @@ use app_test_support::create_mock_chat_completions_server;
use app_test_support::create_mock_chat_completions_server_unchecked;
use app_test_support::create_shell_sse_response;
use app_test_support::to_response;
use codex_app_server_protocol::CommandExecutionStatus;
use codex_app_server_protocol::ItemStartedNotification;
use codex_app_server_protocol::JSONRPCNotification;
use codex_app_server_protocol::JSONRPCResponse;
use codex_app_server_protocol::RequestId;
use codex_app_server_protocol::ServerRequest;
use codex_app_server_protocol::ThreadItem;
use codex_app_server_protocol::ThreadStartParams;
use codex_app_server_protocol::ThreadStartResponse;
use codex_app_server_protocol::TurnStartParams;
@@ -17,9 +20,6 @@ use codex_app_server_protocol::TurnStartedNotification;
use codex_app_server_protocol::UserInput as V2UserInput;
use codex_core::protocol_config_types::ReasoningEffort;
use codex_core::protocol_config_types::ReasoningSummary;
use codex_protocol::parse_command::ParsedCommand;
use codex_protocol::protocol::Event;
use codex_protocol::protocol::EventMsg;
use core_test_support::skip_if_no_network;
use pretty_assertions::assert_eq;
use std::path::Path;
@@ -235,7 +235,7 @@ async fn turn_start_exec_approval_toggle_v2() -> Result<()> {
.await??;
let ThreadStartResponse { thread } = to_response::<ThreadStartResponse>(start_resp)?;
// turn/start — expect ExecCommandApproval request from server
// turn/start — expect CommandExecutionRequestApproval request from server
let first_turn_id = mcp
.send_turn_start_request(TurnStartParams {
thread_id: thread.id.clone(),
@@ -258,16 +258,10 @@ async fn turn_start_exec_approval_toggle_v2() -> Result<()> {
mcp.read_stream_until_request_message(),
)
.await??;
let ServerRequest::ExecCommandApproval { request_id, params } = server_req else {
panic!("expected ExecCommandApproval request");
let ServerRequest::CommandExecutionRequestApproval { request_id, params } = server_req else {
panic!("expected CommandExecutionRequestApproval request");
};
assert_eq!(params.call_id, "call1");
assert_eq!(
params.parsed_cmd,
vec![ParsedCommand::Unknown {
cmd: "python3 -c 'print(42)'".to_string()
}]
);
assert_eq!(params.item_id, "call1");
// Approve and wait for task completion
mcp.send_response(
@@ -302,7 +296,7 @@ async fn turn_start_exec_approval_toggle_v2() -> Result<()> {
)
.await??;
// Ensure we do NOT receive an ExecCommandApproval request before task completes
// Ensure we do NOT receive a CommandExecutionRequestApproval request before task completes
timeout(
DEFAULT_READ_TIMEOUT,
mcp.read_stream_until_notification_message("codex/event/task_complete"),
@@ -314,8 +308,6 @@ async fn turn_start_exec_approval_toggle_v2() -> Result<()> {
#[tokio::test]
async fn turn_start_updates_sandbox_and_cwd_between_turns_v2() -> Result<()> {
// When returning Result from a test, pass an Ok(()) to the skip macro
// so the early return type matches. The no-arg form returns unit.
skip_if_no_network!(Ok(()));
let tmp = TempDir::new()?;
@@ -424,29 +416,35 @@ async fn turn_start_updates_sandbox_and_cwd_between_turns_v2() -> Result<()> {
)
.await??;
let exec_begin_notification = timeout(
DEFAULT_READ_TIMEOUT,
mcp.read_stream_until_notification_message("codex/event/exec_command_begin"),
)
let command_exec_item = timeout(DEFAULT_READ_TIMEOUT, async {
loop {
let item_started_notification = mcp
.read_stream_until_notification_message("item/started")
.await?;
let params = item_started_notification
.params
.clone()
.expect("item/started params");
let item_started: ItemStartedNotification =
serde_json::from_value(params).expect("deserialize item/started notification");
if matches!(item_started.item, ThreadItem::CommandExecution { .. }) {
return Ok::<ThreadItem, anyhow::Error>(item_started.item);
}
}
})
.await??;
let params = exec_begin_notification
.params
.clone()
.expect("exec_command_begin params");
let event: Event = serde_json::from_value(params).expect("deserialize exec begin event");
let exec_begin = match event.msg {
EventMsg::ExecCommandBegin(exec_begin) => exec_begin,
other => panic!("expected ExecCommandBegin event, got {other:?}"),
let ThreadItem::CommandExecution {
cwd,
command,
status,
..
} = command_exec_item
else {
unreachable!("loop ensures we break on command execution items");
};
assert_eq!(exec_begin.cwd, second_cwd);
assert_eq!(
exec_begin.command,
vec![
"bash".to_string(),
"-lc".to_string(),
"echo second turn".to_string()
]
);
assert_eq!(cwd, second_cwd);
assert_eq!(command, "bash -lc 'echo second turn'");
assert_eq!(status, CommandExecutionStatus::InProgress);
timeout(
DEFAULT_READ_TIMEOUT,

View File

@@ -5,7 +5,7 @@ use crate::types::RateLimitWindowSnapshot;
use crate::types::TurnAttemptsSiblingTurnsResponse;
use anyhow::Result;
use codex_core::auth::CodexAuth;
use codex_core::client::http::get_codex_user_agent;
use codex_core::default_client::get_codex_user_agent;
use codex_protocol::protocol::RateLimitSnapshot;
use codex_protocol::protocol::RateLimitWindow;
use reqwest::header::AUTHORIZATION;

View File

@@ -1,5 +1,5 @@
use codex_core::client::http::create_client;
use codex_core::config::Config;
use codex_core::default_client::create_client;
use crate::chatgpt_token::get_chatgpt_token_data;
use crate::chatgpt_token::init_chatgpt_token_from_auth;

View File

@@ -155,11 +155,11 @@ async fn run_command_under_sandbox(
run_windows_sandbox_capture(
policy_str,
&sandbox_cwd,
base_dir.as_path(),
command_vec,
&cwd_clone,
env_map,
None,
Some(base_dir.as_path()),
)
})
.await;

View File

@@ -761,9 +761,9 @@ mod tests {
#[test]
fn resume_model_flag_applies_when_no_root_flags() {
let interactive = finalize_from_args(["codex", "resume", "-m", "gpt-5-test"].as_ref());
let interactive = finalize_from_args(["codex", "resume", "-m", "gpt-5.1-test"].as_ref());
assert_eq!(interactive.model.as_deref(), Some("gpt-5-test"));
assert_eq!(interactive.model.as_deref(), Some("gpt-5.1-test"));
assert!(interactive.resume_picker);
assert!(!interactive.resume_last);
assert_eq!(interactive.resume_session_id, None);
@@ -808,7 +808,7 @@ mod tests {
"--ask-for-approval",
"on-request",
"-m",
"gpt-5-test",
"gpt-5.1-test",
"-p",
"my-profile",
"-C",
@@ -819,7 +819,7 @@ mod tests {
.as_ref(),
);
assert_eq!(interactive.model.as_deref(), Some("gpt-5-test"));
assert_eq!(interactive.model.as_deref(), Some("gpt-5.1-test"));
assert!(interactive.oss);
assert_eq!(interactive.config_profile.as_deref(), Some("my-profile"));
assert_matches!(

View File

@@ -48,7 +48,7 @@ async fn init_backend(user_agent_suffix: &str) -> anyhow::Result<BackendContext>
});
}
let ua = codex_core::client::http::get_codex_user_agent();
let ua = codex_core::default_client::get_codex_user_agent();
let mut http = codex_cloud_tasks_client::HttpClient::new(base_url.clone())?.with_user_agent(ua);
let style = if base_url.contains("/backend-api") {
"wham"
@@ -384,7 +384,7 @@ pub async fn run_main(cli: Cli, _codex_linux_sandbox_exe: Option<PathBuf>) -> an
append_error_log(format!(
"startup: wham_force_internal={} ua={}",
force_internal,
codex_core::client::http::get_codex_user_agent()
codex_core::default_client::get_codex_user_agent()
));
// Non-blocking initial load so the in-box spinner can animate
app.status = "Loading tasks…".to_string();

View File

@@ -7,7 +7,7 @@ use codex_core::config::ConfigOverrides;
use codex_login::AuthManager;
pub fn set_user_agent_suffix(suffix: &str) {
if let Ok(mut guard) = codex_core::client::http::USER_AGENT_SUFFIX.lock() {
if let Ok(mut guard) = codex_core::default_client::USER_AGENT_SUFFIX.lock() {
guard.replace(suffix.to_string());
}
}
@@ -79,7 +79,7 @@ pub async fn build_chatgpt_headers() -> HeaderMap {
use reqwest::header::USER_AGENT;
set_user_agent_suffix("codex_cloud_tasks_tui");
let ua = codex_core::client::http::get_codex_user_agent();
let ua = codex_core::default_client::get_codex_user_agent();
let mut headers = HeaderMap::new();
headers.insert(
USER_AGENT,

View File

@@ -10,6 +10,8 @@ workspace = true
clap = { workspace = true, features = ["derive", "wrap_help"], optional = true }
codex-app-server-protocol = { workspace = true }
codex-core = { workspace = true }
codex-lmstudio = { workspace = true }
codex-ollama = { workspace = true }
codex-protocol = { workspace = true }
once_cell = { workspace = true }
serde = { workspace = true, optional = true }

View File

@@ -37,3 +37,5 @@ pub mod model_presets;
// Shared approval presets (AskForApproval + Sandbox) used by TUI and MCP server
// Not to be confused with AskForApproval, which we should probably rename to EscalationPolicy.
pub mod approval_presets;
// Shared OSS provider utilities used by TUI and exec
pub mod oss;

View File

@@ -0,0 +1,60 @@
//! OSS provider utilities shared between TUI and exec.
use codex_core::LMSTUDIO_OSS_PROVIDER_ID;
use codex_core::OLLAMA_OSS_PROVIDER_ID;
use codex_core::config::Config;
/// Returns the default model for a given OSS provider.
pub fn get_default_model_for_oss_provider(provider_id: &str) -> Option<&'static str> {
match provider_id {
LMSTUDIO_OSS_PROVIDER_ID => Some(codex_lmstudio::DEFAULT_OSS_MODEL),
OLLAMA_OSS_PROVIDER_ID => Some(codex_ollama::DEFAULT_OSS_MODEL),
_ => None,
}
}
/// Ensures the specified OSS provider is ready (models downloaded, service reachable).
pub async fn ensure_oss_provider_ready(
provider_id: &str,
config: &Config,
) -> Result<(), std::io::Error> {
match provider_id {
LMSTUDIO_OSS_PROVIDER_ID => {
codex_lmstudio::ensure_oss_ready(config)
.await
.map_err(|e| std::io::Error::other(format!("OSS setup failed: {e}")))?;
}
OLLAMA_OSS_PROVIDER_ID => {
codex_ollama::ensure_oss_ready(config)
.await
.map_err(|e| std::io::Error::other(format!("OSS setup failed: {e}")))?;
}
_ => {
// Unknown provider, skip setup
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_get_default_model_for_provider_lmstudio() {
let result = get_default_model_for_oss_provider(LMSTUDIO_OSS_PROVIDER_ID);
assert_eq!(result, Some(codex_lmstudio::DEFAULT_OSS_MODEL));
}
#[test]
fn test_get_default_model_for_provider_ollama() {
let result = get_default_model_for_oss_provider(OLLAMA_OSS_PROVIDER_ID);
assert_eq!(result, Some(codex_ollama::DEFAULT_OSS_MODEL));
}
#[test]
fn test_get_default_model_for_provider_unknown() {
let result = get_default_model_for_oss_provider("unknown-provider");
assert_eq!(result, None);
}
}

View File

@@ -22,6 +22,7 @@ chrono = { workspace = true, features = ["serde"] }
codex-app-server-protocol = { workspace = true }
codex-apply-patch = { workspace = true }
codex-async-utils = { workspace = true }
codex-execpolicy2 = { workspace = true }
codex-file-search = { workspace = true }
codex-git = { workspace = true }
codex-keyring-store = { workspace = true }

View File

@@ -318,8 +318,6 @@ For casual greetings, acknowledgements, or other one-off conversational messages
When using the shell, you must adhere to the following guidelines:
- The arguments to `shell` will be passed to execvp().
- Always set the `workdir` param when using the shell function. Do not use `cd` unless absolutely necessary.
- When searching for text or files, prefer using `rg` or `rg --files` respectively because `rg` is much faster than alternatives like `grep`. (If the `rg` command is not found, then use alternatives.)
- Read files in chunks with a max chunk size of 250 lines. Do not use python scripts to attempt to output larger chunks of a file. Command line output will be truncated after 10 kilobytes or 256 lines of output, regardless of the command used.

View File

@@ -2,8 +2,6 @@ You are Codex, based on GPT-5. You are running as a coding agent in the Codex CL
## General
- The arguments to `shell` will be passed to execvp(). Most terminal commands should be prefixed with ["bash", "-lc"].
- Always set the `workdir` param when using the shell function. Do not use `cd` unless absolutely necessary.
- When searching for text or files, prefer using `rg` or `rg --files` respectively because `rg` is much faster than alternatives like `grep`. (If the `rg` command is not found, then use alternatives.)
## Editing constraints

View File

@@ -23,6 +23,7 @@ pub use crate::auth::storage::AuthDotJson;
use crate::auth::storage::AuthStorageBackend;
use crate::auth::storage::create_auth_storage;
use crate::config::Config;
use crate::default_client::CodexHttpClient;
use crate::error::RefreshTokenFailedError;
use crate::error::RefreshTokenFailedReason;
use crate::token_data::KnownPlan as InternalKnownPlan;
@@ -271,7 +272,7 @@ impl CodexAuth {
mode: AuthMode::ChatGPT,
storage: create_auth_storage(PathBuf::new(), AuthCredentialsStoreMode::File),
auth_dot_json,
client: crate::client::http::create_client(),
client: crate::default_client::create_client(),
}
}
@@ -286,7 +287,7 @@ impl CodexAuth {
}
pub fn from_api_key(api_key: &str) -> Self {
Self::from_api_key_with_client(api_key, crate::client::http::create_client())
Self::from_api_key_with_client(api_key, crate::default_client::create_client())
}
}
@@ -446,7 +447,7 @@ fn load_auth(
auth_credentials_store_mode: AuthCredentialsStoreMode,
) -> std::io::Result<Option<CodexAuth>> {
if enable_codex_api_key_env && let Some(api_key) = read_codex_api_key_from_env() {
let client = crate::client::http::create_client();
let client = crate::default_client::create_client();
return Ok(Some(CodexAuth::from_api_key_with_client(
api_key.as_str(),
client,
@@ -455,7 +456,7 @@ fn load_auth(
let storage = create_auth_storage(codex_home.to_path_buf(), auth_credentials_store_mode);
let client = crate::client::http::create_client();
let client = crate::default_client::create_client();
let auth_dot_json = match storage.load()? {
Some(auth) => auth,
None => return Ok(None),
@@ -631,7 +632,6 @@ fn refresh_token_endpoint() -> String {
.unwrap_or_else(|_| REFRESH_TOKEN_URL.to_string())
}
use crate::client::http::CodexHttpClient;
use std::sync::RwLock;
/// Internal cached auth state.

View File

@@ -1,16 +1,15 @@
use std::time::Duration;
use crate::ModelProviderInfo;
use crate::client::ResponseEvent;
use crate::client::ResponseStream;
use crate::client::http::CodexHttpClient;
use crate::client::retry::RetryableStreamError;
use crate::client::retry::retry_stream;
use crate::client_common::Prompt;
use crate::client_common::ResponseEvent;
use crate::client_common::ResponseStream;
use crate::default_client::CodexHttpClient;
use crate::error::CodexErr;
use crate::error::ConnectionFailedError;
use crate::error::ResponseStreamFailed;
use crate::error::Result;
use crate::error::RetryLimitReachedError;
use crate::error::UnexpectedResponseError;
use crate::model_family::ModelFamily;
use crate::tools::spec::create_tools_json_for_chat_completions_api;
@@ -22,12 +21,19 @@ use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::models::ReasoningItemContent;
use codex_protocol::models::ResponseItem;
use codex_protocol::protocol::SessionSource;
use codex_protocol::protocol::SubAgentSource;
use eventsource_stream::Eventsource;
use futures::Stream;
use futures::StreamExt;
use futures::TryStreamExt;
use reqwest::StatusCode;
use serde_json::json;
use std::pin::Pin;
use std::task::Context;
use std::task::Poll;
use tokio::sync::mpsc;
use tokio::time::timeout;
use tracing::debug;
use tracing::trace;
/// Implementation for the classic Chat Completions API.
@@ -49,7 +55,7 @@ pub(crate) async fn stream_chat_completions(
let mut messages = Vec::<serde_json::Value>::new();
let full_instructions = prompt.get_full_instructions(model_family);
messages.push(json!({ "role": "system", "content": full_instructions }));
messages.push(json!({"role": "system", "content": full_instructions}));
let input = prompt.get_formatted_input();
@@ -122,7 +128,7 @@ pub(crate) async fn stream_chat_completions(
{
reasoning_by_anchor_index
.entry(idx - 1)
.and_modify(|v| v.push_str(text.as_str()))
.and_modify(|v| v.push_str(&text))
.or_insert(text.clone());
attached = true;
}
@@ -133,13 +139,13 @@ pub(crate) async fn stream_chat_completions(
ResponseItem::FunctionCall { .. } | ResponseItem::LocalShellCall { .. } => {
reasoning_by_anchor_index
.entry(idx + 1)
.and_modify(|v| v.push_str(text.as_str()))
.and_modify(|v| v.push_str(&text))
.or_insert(text.clone());
}
ResponseItem::Message { role, .. } if role == "assistant" => {
reasoning_by_anchor_index
.entry(idx + 1)
.and_modify(|v| v.push_str(text.as_str()))
.and_modify(|v| v.push_str(&text))
.or_insert(text.clone());
}
_ => {}
@@ -197,18 +203,13 @@ pub(crate) async fn stream_chat_completions(
json!(text)
};
let mut msg = json!({
"role": role,
"content": content_value
});
let mut msg = json!({"role": role, "content": content_value});
if role == "assistant"
&& let Some(reasoning) = reasoning_by_anchor_index.get(&idx)
&& let Some(obj) = msg.as_object_mut()
{
obj.insert("reasoning".to_string(), json!(reasoning));
}
messages.push(msg);
}
ResponseItem::FunctionCall {
@@ -227,24 +228,22 @@ pub(crate) async fn stream_chat_completions(
"name": name,
"arguments": arguments,
}
}],
}]
});
if let Some(reasoning) = reasoning_by_anchor_index.get(&idx)
&& let Some(obj) = msg.as_object_mut()
{
obj.insert("reasoning".to_string(), json!(reasoning));
}
messages.push(msg);
}
ResponseItem::LocalShellCall {
id,
call_id: _,
status,
action,
} => {
// Confirm with API team.
let mut msg = json!({
"role": "assistant",
"content": null,
@@ -255,16 +254,13 @@ pub(crate) async fn stream_chat_completions(
"action": action,
}]
});
if let Some(reasoning) = reasoning_by_anchor_index.get(&idx)
&& let Some(obj) = msg.as_object_mut()
{
obj.insert("reasoning".to_string(), json!(reasoning));
}
messages.push(msg);
}
ResponseItem::FunctionCallOutput { call_id, output } => {
// Prefer structured content items when available (e.g., images)
// otherwise fall back to the legacy plain-string content.
@@ -291,7 +287,6 @@ pub(crate) async fn stream_chat_completions(
"content": content_value,
}));
}
ResponseItem::CustomToolCall {
id,
call_id: _,
@@ -309,7 +304,7 @@ pub(crate) async fn stream_chat_completions(
"name": name,
"input": input,
}
}],
}]
}));
}
ResponseItem::CustomToolCallOutput { call_id, output } => {
@@ -319,148 +314,117 @@ pub(crate) async fn stream_chat_completions(
"content": output,
}));
}
ResponseItem::Reasoning { .. } => {
// Omit from conversation history; reasoning is attached to anchors above.
continue;
}
ResponseItem::WebSearchCall { .. } | ResponseItem::Other => {
continue;
}
ResponseItem::GhostSnapshot { .. } => {
// Ghost snapshots annotate history but are not sent to the model.
continue;
}
ResponseItem::Reasoning { .. }
| ResponseItem::WebSearchCall { .. }
| ResponseItem::Other => {
// Omit these items from the conversation history.
continue;
}
}
}
let tools_json = create_tools_json_for_chat_completions_api(&prompt.tools)?;
let mut body = json!({
let payload = json!({
"model": model_family.slug,
"messages": messages,
"stream": true,
"stream_options": {
"include_usage": true,
},
"tools": tools_json,
});
if !tools_json.is_empty() {
body["tools"] = json!(tools_json);
body["tool_choice"] = json!("auto");
}
if let SessionSource::SubAgent(sub) = session_source {
let subagent = crate::client::types::subagent_label(sub);
body["metadata"] = json!({
"x-openai-subagent": subagent,
});
}
let max_attempts = provider.request_max_retries();
retry_stream(max_attempts, |attempt| {
let body = body.clone();
async move {
stream_single_chat_completion(attempt, client, provider, otel_event_manager, body)
.await
.map_err(ChatStreamError::Retryable)
}
})
.await
}
async fn stream_single_chat_completion(
attempt: u64,
client: &CodexHttpClient,
provider: &ModelProviderInfo,
otel_event_manager: &OtelEventManager,
body: serde_json::Value,
) -> Result<ResponseStream> {
trace!(
debug!(
"POST to {}: {}",
provider.get_full_url(&None),
body.to_string()
payload.to_string()
);
let mut req_builder = provider.create_request_builder(client, &None).await?;
req_builder = req_builder
.header(reqwest::header::ACCEPT, "text/event-stream")
.json(&body);
let mut attempt = 0;
let max_retries = provider.request_max_retries();
loop {
attempt += 1;
let res = otel_event_manager
.log_request(attempt, || req_builder.send())
.await;
let mut req_builder = provider.create_request_builder(client, &None).await?;
let mut request_id = None;
if let Ok(resp) = &res {
request_id = resp
.headers()
.get("cf-ray")
.map(|v| v.to_str().unwrap_or_default().to_string());
}
match res {
Ok(resp) if resp.status().is_success() => {
let (tx_event, rx_event) = mpsc::channel::<Result<ResponseEvent>>(1600);
// spawn task to process SSE
let stream = resp.bytes_stream().map_err(move |e| {
CodexErr::ResponseStreamFailed(ResponseStreamFailed {
source: e,
request_id: request_id.clone(),
})
});
tokio::spawn(process_chat_sse(
stream,
tx_event,
provider.stream_idle_timeout(),
otel_event_manager.clone(),
));
Ok(ResponseStream { rx_event })
// Include subagent header only for subagent sessions.
if let SessionSource::SubAgent(sub) = session_source.clone() {
let subagent = if let SubAgentSource::Other(label) = sub {
label
} else {
serde_json::to_value(&sub)
.ok()
.and_then(|v| v.as_str().map(std::string::ToString::to_string))
.unwrap_or_else(|| "other".to_string())
};
req_builder = req_builder.header("x-openai-subagent", subagent);
}
Ok(res) => {
let status = res.status();
if !(status == StatusCode::TOO_MANY_REQUESTS
|| status == StatusCode::UNAUTHORIZED
|| status.is_server_error())
{
// Surface the error body to callers. Use `unwrap_or_default` per Clippy.
let body = res.text().await.unwrap_or_default();
return Err(CodexErr::UnexpectedStatus(UnexpectedResponseError {
status,
body,
request_id: None,
}));
let res = otel_event_manager
.log_request(attempt, || {
req_builder
.header(reqwest::header::ACCEPT, "text/event-stream")
.json(&payload)
.send()
})
.await;
match res {
Ok(resp) if resp.status().is_success() => {
let (tx_event, rx_event) = mpsc::channel::<Result<ResponseEvent>>(1600);
let stream = resp.bytes_stream().map_err(|e| {
CodexErr::ResponseStreamFailed(ResponseStreamFailed {
source: e,
request_id: None,
})
});
tokio::spawn(process_chat_sse(
stream,
tx_event,
provider.stream_idle_timeout(),
otel_event_manager.clone(),
));
return Ok(ResponseStream { rx_event });
}
Ok(res) => {
let status = res.status();
if !(status == StatusCode::TOO_MANY_REQUESTS || status.is_server_error()) {
let body = (res.text().await).unwrap_or_default();
return Err(CodexErr::UnexpectedStatus(UnexpectedResponseError {
status,
body,
request_id: None,
}));
}
Err(CodexErr::UnexpectedStatus(UnexpectedResponseError {
status,
body: String::new(),
request_id,
}))
}
Err(e) => Err(CodexErr::ConnectionFailed(ConnectionFailedError {
source: e,
})),
}
}
if attempt > max_retries {
return Err(CodexErr::RetryLimit(RetryLimitReachedError {
status,
request_id: None,
}));
}
enum ChatStreamError {
Retryable(CodexErr),
}
let retry_after_secs = res
.headers()
.get(reqwest::header::RETRY_AFTER)
.and_then(|v| v.to_str().ok())
.and_then(|s| s.parse::<u64>().ok());
impl RetryableStreamError for ChatStreamError {
fn delay(&self, attempt: u64) -> Option<Duration> {
Some(backoff(attempt))
}
fn into_error(self) -> CodexErr {
match self {
ChatStreamError::Retryable(e) => e,
let delay = retry_after_secs
.map(|s| Duration::from_millis(s * 1_000))
.unwrap_or_else(|| backoff(attempt));
tokio::time::sleep(delay).await;
}
Err(e) => {
if attempt > max_retries {
return Err(CodexErr::ConnectionFailed(ConnectionFailedError {
source: e,
}));
}
let delay = backoff(attempt);
tokio::time::sleep(delay).await;
}
}
}
}
@@ -524,7 +488,9 @@ async fn append_reasoning_text(
.await;
}
}
/// Lightweight SSE processor for the Chat Completions streaming format. The
/// output is mapped onto Codex's internal [`ResponseEvent`] so that the rest
/// of the pipeline can stay agnostic of the underlying wire format.
async fn process_chat_sse<S>(
stream: S,
tx_event: mpsc::Sender<Result<ResponseEvent>>,
@@ -553,15 +519,21 @@ async fn process_chat_sse<S>(
let mut reasoning_item: Option<ResponseItem> = None;
loop {
let sse = match crate::client::sse::next_sse_event(
&mut stream,
idle_timeout,
&otel_event_manager,
)
.await
{
crate::client::sse::SseNext::Event(ev) => ev,
crate::client::sse::SseNext::Eof => {
let start = std::time::Instant::now();
let response = timeout(idle_timeout, stream.next()).await;
let duration = start.elapsed();
otel_event_manager.log_sse_event(&response, duration);
let sse = match response {
Ok(Some(Ok(ev))) => ev,
Ok(Some(Err(e))) => {
let _ = tx_event
.send(Err(CodexErr::Stream(e.to_string(), None)))
.await;
return;
}
Ok(None) => {
// Stream closed gracefully emit Completed with dummy id.
let _ = tx_event
.send(Ok(ResponseEvent::Completed {
response_id: String::new(),
@@ -570,11 +542,7 @@ async fn process_chat_sse<S>(
.await;
return;
}
crate::client::sse::SseNext::StreamError(message) => {
let _ = tx_event.send(Err(CodexErr::Stream(message, None))).await;
return;
}
crate::client::sse::SseNext::Timeout => {
Err(_) => {
let _ = tx_event
.send(Err(CodexErr::Stream(
"idle timeout waiting for SSE".into(),
@@ -756,3 +724,256 @@ async fn process_chat_sse<S>(
}
}
}
/// Optional client-side aggregation helper
///
/// Stream adapter that merges the incremental `OutputItemDone` chunks coming from
/// [`process_chat_sse`] into a *running* assistant message, **suppressing the
/// per-token deltas**. The stream stays silent while the model is thinking
/// and only emits two events per turn:
///
/// 1. `ResponseEvent::OutputItemDone` with the *complete* assistant message
/// (fully concatenated).
/// 2. The original `ResponseEvent::Completed` right after it.
///
/// This mirrors the behaviour the TypeScript CLI exposes to its higher layers.
///
/// The adapter is intentionally *lossless*: callers who do **not** opt in via
/// [`AggregateStreamExt::aggregate()`] keep receiving the original unmodified
/// events.
#[derive(Copy, Clone, Eq, PartialEq)]
enum AggregateMode {
AggregatedOnly,
Streaming,
}
pub(crate) struct AggregatedChatStream<S> {
inner: S,
cumulative: String,
cumulative_reasoning: String,
pending: std::collections::VecDeque<ResponseEvent>,
mode: AggregateMode,
}
impl<S> Stream for AggregatedChatStream<S>
where
S: Stream<Item = Result<ResponseEvent>> + Unpin,
{
type Item = Result<ResponseEvent>;
fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
let this = self.get_mut();
// First, flush any buffered events from the previous call.
if let Some(ev) = this.pending.pop_front() {
return Poll::Ready(Some(Ok(ev)));
}
loop {
match Pin::new(&mut this.inner).poll_next(cx) {
Poll::Pending => return Poll::Pending,
Poll::Ready(None) => return Poll::Ready(None),
Poll::Ready(Some(Err(e))) => return Poll::Ready(Some(Err(e))),
Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(item)))) => {
// If this is an incremental assistant message chunk, accumulate but
// do NOT emit yet. Forward any other item (e.g. FunctionCall) right
// away so downstream consumers see it.
let is_assistant_message = matches!(
&item,
codex_protocol::models::ResponseItem::Message { role, .. } if role == "assistant"
);
if is_assistant_message {
match this.mode {
AggregateMode::AggregatedOnly => {
// Only use the final assistant message if we have not
// seen any deltas; otherwise, deltas already built the
// cumulative text and this would duplicate it.
if this.cumulative.is_empty()
&& let codex_protocol::models::ResponseItem::Message {
content,
..
} = &item
&& let Some(text) = content.iter().find_map(|c| match c {
codex_protocol::models::ContentItem::OutputText {
text,
} => Some(text),
_ => None,
})
{
this.cumulative.push_str(text);
}
// Swallow assistant message here; emit on Completed.
continue;
}
AggregateMode::Streaming => {
// In streaming mode, if we have not seen any deltas, forward
// the final assistant message directly. If deltas were seen,
// suppress the final message to avoid duplication.
if this.cumulative.is_empty() {
return Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(
item,
))));
} else {
continue;
}
}
}
}
// Not an assistant message forward immediately.
return Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(item))));
}
Poll::Ready(Some(Ok(ResponseEvent::RateLimits(snapshot)))) => {
return Poll::Ready(Some(Ok(ResponseEvent::RateLimits(snapshot))));
}
Poll::Ready(Some(Ok(ResponseEvent::Completed {
response_id,
token_usage,
}))) => {
// Build any aggregated items in the correct order: Reasoning first, then Message.
let mut emitted_any = false;
if !this.cumulative_reasoning.is_empty()
&& matches!(this.mode, AggregateMode::AggregatedOnly)
{
let aggregated_reasoning =
codex_protocol::models::ResponseItem::Reasoning {
id: String::new(),
summary: Vec::new(),
content: Some(vec![
codex_protocol::models::ReasoningItemContent::ReasoningText {
text: std::mem::take(&mut this.cumulative_reasoning),
},
]),
encrypted_content: None,
};
this.pending
.push_back(ResponseEvent::OutputItemDone(aggregated_reasoning));
emitted_any = true;
}
// Always emit the final aggregated assistant message when any
// content deltas have been observed. In AggregatedOnly mode this
// is the sole assistant output; in Streaming mode this finalizes
// the streamed deltas into a terminal OutputItemDone so callers
// can persist/render the message once per turn.
if !this.cumulative.is_empty() {
let aggregated_message = codex_protocol::models::ResponseItem::Message {
id: None,
role: "assistant".to_string(),
content: vec![codex_protocol::models::ContentItem::OutputText {
text: std::mem::take(&mut this.cumulative),
}],
};
this.pending
.push_back(ResponseEvent::OutputItemDone(aggregated_message));
emitted_any = true;
}
// Always emit Completed last when anything was aggregated.
if emitted_any {
this.pending.push_back(ResponseEvent::Completed {
response_id: response_id.clone(),
token_usage: token_usage.clone(),
});
// Return the first pending event now.
if let Some(ev) = this.pending.pop_front() {
return Poll::Ready(Some(Ok(ev)));
}
}
// Nothing aggregated forward Completed directly.
return Poll::Ready(Some(Ok(ResponseEvent::Completed {
response_id,
token_usage,
})));
}
Poll::Ready(Some(Ok(ResponseEvent::Created))) => {
// These events are exclusive to the Responses API and
// will never appear in a Chat Completions stream.
continue;
}
Poll::Ready(Some(Ok(ResponseEvent::OutputTextDelta(delta)))) => {
// Always accumulate deltas so we can emit a final OutputItemDone at Completed.
this.cumulative.push_str(&delta);
if matches!(this.mode, AggregateMode::Streaming) {
// In streaming mode, also forward the delta immediately.
return Poll::Ready(Some(Ok(ResponseEvent::OutputTextDelta(delta))));
} else {
continue;
}
}
Poll::Ready(Some(Ok(ResponseEvent::ReasoningContentDelta {
delta,
content_index,
}))) => {
// Always accumulate reasoning deltas so we can emit a final Reasoning item at Completed.
this.cumulative_reasoning.push_str(&delta);
if matches!(this.mode, AggregateMode::Streaming) {
// In streaming mode, also forward the delta immediately.
return Poll::Ready(Some(Ok(ResponseEvent::ReasoningContentDelta {
delta,
content_index,
})));
} else {
continue;
}
}
Poll::Ready(Some(Ok(ResponseEvent::ReasoningSummaryDelta { .. }))) => {
continue;
}
Poll::Ready(Some(Ok(ResponseEvent::ReasoningSummaryPartAdded { .. }))) => {
continue;
}
Poll::Ready(Some(Ok(ResponseEvent::OutputItemAdded(item)))) => {
return Poll::Ready(Some(Ok(ResponseEvent::OutputItemAdded(item))));
}
}
}
}
}
/// Extension trait that activates aggregation on any stream of [`ResponseEvent`].
pub(crate) trait AggregateStreamExt: Stream<Item = Result<ResponseEvent>> + Sized {
/// Returns a new stream that emits **only** the final assistant message
/// per turn instead of every incremental delta. The produced
/// `ResponseEvent` sequence for a typical text turn looks like:
///
/// ```ignore
/// OutputItemDone(<full message>)
/// Completed
/// ```
///
/// No other `OutputItemDone` events will be seen by the caller.
///
/// Usage:
///
/// ```ignore
/// let agg_stream = client.stream(&prompt).await?.aggregate();
/// while let Some(event) = agg_stream.next().await {
/// // event now contains cumulative text
/// }
/// ```
fn aggregate(self) -> AggregatedChatStream<Self> {
AggregatedChatStream::new(self, AggregateMode::AggregatedOnly)
}
}
impl<T> AggregateStreamExt for T where T: Stream<Item = Result<ResponseEvent>> + Sized {}
impl<S> AggregatedChatStream<S> {
fn new(inner: S, mode: AggregateMode) -> Self {
AggregatedChatStream {
inner,
cumulative: String::new(),
cumulative_reasoning: String::new(),
pending: std::collections::VecDeque::new(),
mode,
}
}
pub(crate) fn streaming_mode(inner: S) -> Self {
Self::new(inner, AggregateMode::Streaming)
}
}

1540
codex-rs/core/src/client.rs Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -1,235 +0,0 @@
use std::pin::Pin;
use std::task::Context;
use std::task::Poll;
use crate::client::ResponseEvent;
use crate::error::Result;
use codex_protocol::models::ContentItem;
use codex_protocol::models::ReasoningItemContent;
use codex_protocol::models::ResponseItem;
use futures::Stream;
/// Optional client-side aggregation helper
///
/// Stream adapter that merges the incremental `OutputItemDone` chunks coming from
/// the chat SSE decoder into a *running* assistant message, **suppressing the
/// per-token deltas**. The stream stays silent while the model is thinking and
/// only emits two events per turn:
///
/// 1. `ResponseEvent::OutputItemDone` with the *complete* assistant message
/// (fully concatenated).
/// 2. The original `ResponseEvent::Completed` right after it.
///
/// The adapter is intentionally *lossless*: callers who do **not** opt in via
/// [`AggregateStreamExt::aggregate()`] keep receiving the original unmodified
/// events.
#[derive(Copy, Clone, Eq, PartialEq)]
enum AggregateMode {
AggregatedOnly,
Streaming,
}
pub(crate) struct AggregatedChatStream<S> {
inner: S,
cumulative: String,
cumulative_reasoning: String,
pending: std::collections::VecDeque<ResponseEvent>,
mode: AggregateMode,
}
impl<S> Stream for AggregatedChatStream<S>
where
S: Stream<Item = Result<ResponseEvent>> + Unpin,
{
type Item = Result<ResponseEvent>;
fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
let this = self.get_mut();
// First, flush any buffered events from the previous call.
if let Some(ev) = this.pending.pop_front() {
return Poll::Ready(Some(Ok(ev)));
}
loop {
match Pin::new(&mut this.inner).poll_next(cx) {
Poll::Pending => return Poll::Pending,
Poll::Ready(None) => return Poll::Ready(None),
Poll::Ready(Some(Err(e))) => return Poll::Ready(Some(Err(e))),
Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(item)))) => {
// If this is an incremental assistant message chunk, accumulate but
// do NOT emit yet. Forward any other item (e.g. FunctionCall) right
// away so downstream consumers see it.
let is_assistant_message = matches!(
&item,
ResponseItem::Message { role, .. } if role == "assistant"
);
if is_assistant_message {
match this.mode {
AggregateMode::AggregatedOnly => {
// Only use the final assistant message if we have not
// seen any deltas; otherwise, deltas already built the
// cumulative text and this would duplicate it.
if this.cumulative.is_empty()
&& let ResponseItem::Message { content, .. } = &item
&& let Some(text) = content.iter().find_map(|c| match c {
ContentItem::OutputText { text } => Some(text),
_ => None,
})
{
this.cumulative.push_str(text);
}
// Swallow assistant message here; emit on Completed.
continue;
}
AggregateMode::Streaming => {
// In streaming mode, if we have not seen any deltas, forward
// the final assistant message directly. If deltas were seen,
// suppress the final message to avoid duplication.
if this.cumulative.is_empty() {
return Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(
item,
))));
}
continue;
}
}
}
// Not an assistant message forward immediately.
return Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(item))));
}
Poll::Ready(Some(Ok(ResponseEvent::RateLimits(snapshot)))) => {
return Poll::Ready(Some(Ok(ResponseEvent::RateLimits(snapshot))));
}
Poll::Ready(Some(Ok(ResponseEvent::Completed {
response_id,
token_usage,
}))) => {
// Build any aggregated items in the correct order: Reasoning first, then Message.
let mut emitted_any = false;
if !this.cumulative_reasoning.is_empty()
&& matches!(this.mode, AggregateMode::AggregatedOnly)
{
let aggregated_reasoning = ResponseItem::Reasoning {
id: String::new(),
summary: Vec::new(),
content: Some(vec![ReasoningItemContent::ReasoningText {
text: std::mem::take(&mut this.cumulative_reasoning),
}]),
encrypted_content: None,
};
this.pending
.push_back(ResponseEvent::OutputItemDone(aggregated_reasoning));
emitted_any = true;
}
// Always emit the final aggregated assistant message when any
// content deltas have been observed. In AggregatedOnly mode this
// is the sole assistant output; in Streaming mode this finalizes
// the streamed deltas into a terminal OutputItemDone so callers
// can persist/render the message once per turn.
if !this.cumulative.is_empty() {
let aggregated_message = ResponseItem::Message {
id: None,
role: "assistant".to_string(),
content: vec![ContentItem::OutputText {
text: std::mem::take(&mut this.cumulative),
}],
};
this.pending
.push_back(ResponseEvent::OutputItemDone(aggregated_message));
emitted_any = true;
}
// Always emit Completed last when anything was aggregated.
if emitted_any {
this.pending.push_back(ResponseEvent::Completed {
response_id: response_id.clone(),
token_usage: token_usage.clone(),
});
if let Some(ev) = this.pending.pop_front() {
return Poll::Ready(Some(Ok(ev)));
}
}
// Nothing aggregated forward Completed directly.
return Poll::Ready(Some(Ok(ResponseEvent::Completed {
response_id,
token_usage,
})));
}
Poll::Ready(Some(Ok(ResponseEvent::Created))) => {
// These events are exclusive to the Responses API and
// will never appear in a Chat Completions stream.
continue;
}
Poll::Ready(Some(Ok(ResponseEvent::OutputTextDelta(delta)))) => {
// Always accumulate deltas so we can emit a final OutputItemDone at Completed.
this.cumulative.push_str(&delta);
if matches!(this.mode, AggregateMode::Streaming) {
// In streaming mode, also forward the delta immediately.
return Poll::Ready(Some(Ok(ResponseEvent::OutputTextDelta(delta))));
}
}
Poll::Ready(Some(Ok(ResponseEvent::ReasoningContentDelta {
delta,
content_index,
}))) => {
// Always accumulate reasoning deltas so we can emit a final Reasoning item at Completed.
this.cumulative_reasoning.push_str(&delta);
if matches!(this.mode, AggregateMode::Streaming) {
// In streaming mode, also forward the delta immediately.
return Poll::Ready(Some(Ok(ResponseEvent::ReasoningContentDelta {
delta,
content_index,
})));
}
}
Poll::Ready(Some(Ok(ResponseEvent::ReasoningSummaryDelta { .. }))) => {}
Poll::Ready(Some(Ok(ResponseEvent::ReasoningSummaryPartAdded { .. }))) => {}
Poll::Ready(Some(Ok(ResponseEvent::OutputItemAdded(item)))) => {
return Poll::Ready(Some(Ok(ResponseEvent::OutputItemAdded(item))));
}
}
}
}
}
/// Extension trait that activates aggregation on any stream of [`ResponseEvent`].
pub(crate) trait AggregateStreamExt: Stream<Item = Result<ResponseEvent>> + Sized {
/// Returns a new stream that emits **only** the final assistant message
/// per turn instead of every incremental delta. The produced
/// `ResponseEvent` sequence for a typical text turn looks like:
///
/// ```ignore
/// OutputItemDone(<full message>)
/// Completed
/// ```
///
/// No other `OutputItemDone` events will be seen by the caller.
fn aggregate(self) -> AggregatedChatStream<Self> {
AggregatedChatStream::new(self, AggregateMode::AggregatedOnly)
}
}
impl<T> AggregateStreamExt for T where T: Stream<Item = Result<ResponseEvent>> + Sized {}
impl<S> AggregatedChatStream<S> {
fn new(inner: S, mode: AggregateMode) -> Self {
AggregatedChatStream {
inner,
cumulative: String::new(),
cumulative_reasoning: String::new(),
pending: std::collections::VecDeque::new(),
mode,
}
}
pub(crate) fn streaming_mode(inner: S) -> Self {
Self::new(inner, AggregateMode::Streaming)
}
}

View File

@@ -1,22 +0,0 @@
mod aggregation;
mod chat_completions;
pub mod http;
mod rate_limits;
mod responses;
mod retry;
mod sse;
pub mod types;
pub(crate) use aggregation::AggregateStreamExt;
pub(crate) use aggregation::AggregatedChatStream;
pub(crate) use chat_completions::stream_chat_completions;
pub use responses::ModelClient;
pub(crate) use types::FreeformTool;
pub(crate) use types::FreeformToolFormat;
pub(crate) use types::Reasoning;
pub use types::ResponseEvent;
pub use types::ResponseStream;
pub(crate) use types::ResponsesApiRequest;
pub(crate) use types::ResponsesApiTool;
pub(crate) use types::ToolSpec;
pub(crate) use types::create_text_param_for_request;

View File

@@ -1,86 +0,0 @@
use crate::protocol::RateLimitSnapshot;
use crate::protocol::RateLimitWindow;
use chrono::Utc;
use reqwest::header::HeaderMap;
/// Prefer Codex-specific aggregate rate limit headers if present; fall back
/// to raw OpenAI-style request headers otherwise.
pub(crate) fn parse_rate_limit_snapshot(headers: &HeaderMap) -> Option<RateLimitSnapshot> {
parse_codex_rate_limits(headers).or_else(|| parse_openai_rate_limits(headers))
}
fn parse_codex_rate_limits(headers: &HeaderMap) -> Option<RateLimitSnapshot> {
fn parse_f64(headers: &HeaderMap, name: &str) -> Option<f64> {
headers
.get(name)
.and_then(|v| v.to_str().ok())
.and_then(|s| s.parse::<f64>().ok())
}
fn parse_i64(headers: &HeaderMap, name: &str) -> Option<i64> {
headers
.get(name)
.and_then(|v| v.to_str().ok())
.and_then(|s| s.parse::<i64>().ok())
}
let primary_used = parse_f64(headers, "x-codex-primary-used-percent");
let secondary_used = parse_f64(headers, "x-codex-secondary-used-percent");
if primary_used.is_none() && secondary_used.is_none() {
return None;
}
let primary = primary_used.map(|used_percent| RateLimitWindow {
used_percent,
window_minutes: parse_i64(headers, "x-codex-primary-window-minutes"),
resets_at: parse_i64(headers, "x-codex-primary-reset-at"),
});
let secondary = secondary_used.map(|used_percent| RateLimitWindow {
used_percent,
window_minutes: parse_i64(headers, "x-codex-secondary-window-minutes"),
resets_at: parse_i64(headers, "x-codex-secondary-reset-at"),
});
Some(RateLimitSnapshot { primary, secondary })
}
fn parse_openai_rate_limits(headers: &HeaderMap) -> Option<RateLimitSnapshot> {
let limit = headers.get("x-ratelimit-limit-requests")?;
let remaining = headers.get("x-ratelimit-remaining-requests")?;
let reset_ms = headers.get("x-ratelimit-reset-requests")?;
let limit = limit.to_str().ok()?.parse::<f64>().ok()?;
let remaining = remaining.to_str().ok()?.parse::<f64>().ok()?;
let reset_ms = reset_ms.to_str().ok()?.parse::<i64>().ok()?;
if limit <= 0.0 {
return None;
}
let used = (limit - remaining).max(0.0);
let used_percent = (used / limit) * 100.0;
let window_minutes = if reset_ms <= 0 {
None
} else {
let seconds = reset_ms / 1000;
Some((seconds + 59) / 60)
};
let resets_at = if reset_ms > 0 {
Some(Utc::now().timestamp() + reset_ms / 1000)
} else {
None
};
Some(RateLimitSnapshot {
primary: Some(RateLimitWindow {
used_percent,
window_minutes,
resets_at,
}),
secondary: None,
})
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,140 +0,0 @@
use std::time::Duration;
use crate::error::CodexErr;
use crate::error::Result;
/// Common interface for classifying stream start errors as retryable or fatal.
pub(crate) trait RetryableStreamError {
/// Returns a delay for the next retry attempt, or `None` if the error
/// should be treated as fatal and not retried.
fn delay(&self, attempt: u64) -> Option<Duration>;
/// Converts this error into the final `CodexErr` that should be surfaced
/// to callers when retries are exhausted or the error is fatal.
fn into_error(self) -> CodexErr;
}
/// Helper to retry a streaming operation with provider-configured backoff.
///
/// The caller supplies an `attempt_fn` that is invoked once per attempt with
/// the current attempt index in `[0, max_attempts]`. On success, the value is
/// returned immediately. On error, the error's [`RetryableStreamError`]
/// implementation decides whether to retry (with an optional delay) or to
/// surface a final error.
pub(crate) async fn retry_stream<F, Fut, T, E>(max_attempts: u64, mut attempt_fn: F) -> Result<T>
where
F: FnMut(u64) -> Fut,
Fut: std::future::Future<Output = std::result::Result<T, E>>,
E: RetryableStreamError,
{
for attempt in 0..=max_attempts {
match attempt_fn(attempt).await {
Ok(value) => return Ok(value),
Err(err) => {
let delay = err.delay(attempt);
// Fatal error or final attempt: surface to caller.
if attempt == max_attempts || delay.is_none() {
return Err(err.into_error());
}
if let Some(duration) = delay {
tokio::time::sleep(duration).await;
}
}
}
}
unreachable!("retry_stream should always return");
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
#[derive(Clone)]
struct TestError {
fatal: bool,
}
impl RetryableStreamError for TestError {
fn delay(&self, attempt: u64) -> Option<Duration> {
if self.fatal {
None
} else {
Some(Duration::from_millis(attempt * 10))
}
}
fn into_error(self) -> CodexErr {
if self.fatal {
CodexErr::InternalServerError
} else {
CodexErr::Io(std::io::Error::other("retryable"))
}
}
}
#[tokio::test]
async fn retries_until_success_before_max_attempts() {
let max_attempts = 3;
let result: Result<&str> = retry_stream(max_attempts, |attempt| async move {
if attempt < 2 {
Err(TestError { fatal: false })
} else {
Ok("ok")
}
})
.await;
assert_eq!(result.unwrap(), "ok");
}
#[tokio::test]
async fn stops_on_fatal_error_without_retrying() {
use std::sync::Arc;
use std::sync::atomic::AtomicUsize;
use std::sync::atomic::Ordering;
let calls = Arc::new(AtomicUsize::new(0));
let calls_ref = calls.clone();
let result: Result<()> = retry_stream(5, move |_attempt| {
let calls_ref = calls_ref.clone();
async move {
calls_ref.fetch_add(1, Ordering::SeqCst);
Err(TestError { fatal: true })
}
})
.await;
assert!(result.is_err());
assert_eq!(calls.load(Ordering::SeqCst), 1);
}
#[tokio::test]
async fn stops_after_max_attempts_for_retryable_errors() {
use std::sync::Arc;
use std::sync::atomic::AtomicUsize;
use std::sync::atomic::Ordering;
let calls = Arc::new(AtomicUsize::new(0));
let calls_ref = calls.clone();
let max_attempts = 2;
let result: Result<()> = retry_stream(max_attempts, move |_attempt| {
let calls_ref = calls_ref.clone();
async move {
calls_ref.fetch_add(1, Ordering::SeqCst);
Err(TestError { fatal: false })
}
})
.await;
assert!(result.is_err());
assert_eq!(calls.load(Ordering::SeqCst), (max_attempts + 1) as usize);
}
}

View File

@@ -1,46 +0,0 @@
use std::time::Duration;
use codex_otel::otel_event_manager::OtelEventManager;
use eventsource_stream::Event;
use eventsource_stream::EventStreamError as StreamError;
use futures::Stream;
use futures::StreamExt;
use tokio::time::timeout;
/// Result of polling the next SSE event with timeout and logging applied.
pub(crate) enum SseNext {
Event(Event),
Eof,
StreamError(String),
Timeout,
}
/// Read the next SSE event from `stream`, applying an idle timeout and recording
/// telemetry via `otel_event_manager`.
///
/// This helper centralizes the boilerplate for:
/// - `tokio::time::timeout`
/// - calling `log_sse_event`
/// - mapping the different outcomes into a small enum that callers can
/// interpret according to their own protocol semantics.
pub(crate) async fn next_sse_event<S, E>(
stream: &mut S,
idle_timeout: Duration,
otel_event_manager: &OtelEventManager,
) -> SseNext
where
S: Stream<Item = Result<Event, StreamError<E>>> + Unpin,
E: std::fmt::Display,
{
let start = tokio::time::Instant::now();
let next_event = timeout(idle_timeout, stream.next()).await;
let duration = start.elapsed();
otel_event_manager.log_sse_event(&next_event, duration);
match next_event {
Ok(Some(Ok(ev))) => SseNext::Event(ev),
Ok(Some(Err(e))) => SseNext::StreamError(e.to_string()),
Ok(None) => SseNext::Eof,
Err(_) => SseNext::Timeout,
}
}

View File

@@ -1,327 +0,0 @@
use crate::error::Result;
use crate::protocol::RateLimitSnapshot;
use crate::protocol::SubAgentSource;
use crate::protocol::TokenUsage;
use crate::tools::spec::JsonSchema;
use codex_protocol::config_types::ReasoningEffort as ReasoningEffortConfig;
use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
use codex_protocol::config_types::Verbosity as VerbosityConfig;
use codex_protocol::models::ResponseItem;
use futures::Stream;
use serde::Serialize;
use serde_json::Value;
use std::pin::Pin;
use std::task::Context;
use std::task::Poll;
use tokio::sync::mpsc;
#[derive(Debug)]
pub enum ResponseEvent {
Created,
OutputItemDone(ResponseItem),
OutputItemAdded(ResponseItem),
Completed {
response_id: String,
token_usage: Option<TokenUsage>,
},
OutputTextDelta(String),
ReasoningSummaryDelta {
delta: String,
summary_index: i64,
},
ReasoningContentDelta {
delta: String,
content_index: i64,
},
ReasoningSummaryPartAdded {
summary_index: i64,
},
RateLimits(RateLimitSnapshot),
}
pub(crate) fn subagent_label(sub: &SubAgentSource) -> String {
if let SubAgentSource::Other(label) = sub {
label.clone()
} else {
serde_json::to_value(sub)
.ok()
.and_then(|v| v.as_str().map(std::string::ToString::to_string))
.unwrap_or_else(|| "other".to_string())
}
}
#[derive(Debug, Serialize)]
pub(crate) struct Reasoning {
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) effort: Option<ReasoningEffortConfig>,
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) summary: Option<ReasoningSummaryConfig>,
}
#[derive(Debug, Serialize, Default, Clone)]
#[serde(rename_all = "snake_case")]
pub(crate) enum TextFormatType {
#[default]
JsonSchema,
}
#[derive(Debug, Serialize, Default, Clone)]
pub(crate) struct TextFormat {
pub(crate) r#type: TextFormatType,
pub(crate) strict: bool,
pub(crate) schema: Value,
pub(crate) name: String,
}
/// Controls under the `text` field in the Responses API for GPT-5.
#[derive(Debug, Serialize, Default, Clone)]
pub(crate) struct TextControls {
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) verbosity: Option<OpenAiVerbosity>,
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) format: Option<TextFormat>,
}
#[derive(Debug, Serialize, Default, Clone)]
#[serde(rename_all = "lowercase")]
pub(crate) enum OpenAiVerbosity {
Low,
#[default]
Medium,
High,
}
impl From<VerbosityConfig> for OpenAiVerbosity {
fn from(v: VerbosityConfig) -> Self {
match v {
VerbosityConfig::Low => OpenAiVerbosity::Low,
VerbosityConfig::Medium => OpenAiVerbosity::Medium,
VerbosityConfig::High => OpenAiVerbosity::High,
}
}
}
/// Request object that is serialized as JSON and POST'ed when using the
/// Responses API.
#[derive(Debug, Serialize)]
pub(crate) struct ResponsesApiRequest<'a> {
pub(crate) model: &'a str,
pub(crate) instructions: &'a str,
// TODO(mbolin): ResponseItem::Other should not be serialized. Currently,
// we code defensively to avoid this case, but perhaps we should use a
// separate enum for serialization.
pub(crate) input: &'a Vec<ResponseItem>,
pub(crate) tools: &'a [serde_json::Value],
pub(crate) tool_choice: &'static str,
pub(crate) parallel_tool_calls: bool,
pub(crate) reasoning: Option<Reasoning>,
pub(crate) store: bool,
pub(crate) stream: bool,
pub(crate) include: Vec<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) prompt_cache_key: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) text: Option<TextControls>,
}
pub(crate) mod tools {
use super::JsonSchema;
use serde::Deserialize;
use serde::Serialize;
/// When serialized as JSON, this produces a valid "Tool" in the OpenAI
/// Responses API.
#[derive(Debug, Clone, Serialize, PartialEq)]
#[serde(tag = "type")]
pub(crate) enum ToolSpec {
#[serde(rename = "function")]
Function(ResponsesApiTool),
#[serde(rename = "local_shell")]
LocalShell {},
// TODO: Understand why we get an error on web_search although the API docs say it's supported.
// https://platform.openai.com/docs/guides/tools-web-search?api-mode=responses#:~:text=%7B%20type%3A%20%22web_search%22%20%7D%2C
#[serde(rename = "web_search")]
WebSearch {},
#[serde(rename = "custom")]
Freeform(FreeformTool),
}
impl ToolSpec {
pub(crate) fn name(&self) -> &str {
match self {
ToolSpec::Function(tool) => tool.name.as_str(),
ToolSpec::LocalShell {} => "local_shell",
ToolSpec::WebSearch {} => "web_search",
ToolSpec::Freeform(tool) => tool.name.as_str(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct FreeformTool {
pub(crate) name: String,
pub(crate) description: String,
pub(crate) format: FreeformToolFormat,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct FreeformToolFormat {
pub(crate) r#type: String,
pub(crate) syntax: String,
pub(crate) definition: String,
}
#[derive(Debug, Clone, Serialize, PartialEq)]
pub struct ResponsesApiTool {
pub(crate) name: String,
pub(crate) description: String,
/// TODO: Validation. When strict is set to true, the JSON schema,
/// `required` and `additional_properties` must be present. All fields in
/// `properties` must be present in `required`.
pub(crate) strict: bool,
pub(crate) parameters: JsonSchema,
}
}
pub(crate) use tools::FreeformTool;
pub(crate) use tools::FreeformToolFormat;
pub(crate) use tools::ResponsesApiTool;
pub(crate) use tools::ToolSpec;
pub(crate) fn create_text_param_for_request(
verbosity: Option<VerbosityConfig>,
output_schema: &Option<Value>,
) -> Option<TextControls> {
if verbosity.is_none() && output_schema.is_none() {
return None;
}
Some(TextControls {
verbosity: verbosity.map(std::convert::Into::into),
format: output_schema.as_ref().map(|schema| TextFormat {
r#type: TextFormatType::JsonSchema,
strict: true,
schema: schema.clone(),
name: "codex_output_schema".to_string(),
}),
})
}
pub struct ResponseStream {
pub(crate) rx_event: mpsc::Receiver<Result<ResponseEvent>>,
}
impl Stream for ResponseStream {
type Item = Result<ResponseEvent>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
self.rx_event.poll_recv(cx)
}
}
#[cfg(test)]
mod tests {
use super::*;
use codex_protocol::models::ResponseItem;
use pretty_assertions::assert_eq;
#[test]
fn serializes_text_verbosity_when_set() {
let input: Vec<ResponseItem> = vec![];
let tools: Vec<serde_json::Value> = vec![];
let req = ResponsesApiRequest {
model: "gpt-5",
instructions: "i",
input: &input,
tools: &tools,
tool_choice: "auto",
parallel_tool_calls: true,
reasoning: None,
store: false,
stream: true,
include: vec![],
prompt_cache_key: None,
text: Some(TextControls {
verbosity: Some(OpenAiVerbosity::Low),
format: None,
}),
};
let v = serde_json::to_value(&req).expect("json");
assert_eq!(
v.get("text")
.and_then(|t| t.get("verbosity"))
.and_then(|s| s.as_str()),
Some("low")
);
}
#[test]
fn serializes_text_schema_with_strict_format() {
let input: Vec<ResponseItem> = vec![];
let tools: Vec<serde_json::Value> = vec![];
let schema = serde_json::json!({
"type": "object",
"properties": {
"answer": {"type": "string"}
},
"required": ["answer"],
});
let text_controls =
create_text_param_for_request(None, &Some(schema.clone())).expect("text controls");
let req = ResponsesApiRequest {
model: "gpt-5",
instructions: "i",
input: &input,
tools: &tools,
tool_choice: "auto",
parallel_tool_calls: true,
reasoning: None,
store: false,
stream: true,
include: vec![],
prompt_cache_key: None,
text: Some(text_controls),
};
let v = serde_json::to_value(&req).expect("json");
let text = v.get("text").expect("text field");
assert!(text.get("verbosity").is_none());
let format = text.get("format").expect("format field");
assert_eq!(
format.get("name"),
Some(&serde_json::Value::String("codex_output_schema".into()))
);
assert_eq!(
format.get("type"),
Some(&serde_json::Value::String("json_schema".into()))
);
assert_eq!(format.get("strict"), Some(&serde_json::Value::Bool(true)));
assert_eq!(format.get("schema"), Some(&schema));
}
#[test]
fn omits_text_when_not_set() {
let input: Vec<ResponseItem> = vec![];
let tools: Vec<serde_json::Value> = vec![];
let req = ResponsesApiRequest {
model: "gpt-5",
instructions: "i",
input: &input,
tools: &tools,
tool_choice: "auto",
parallel_tool_calls: true,
reasoning: None,
store: false,
stream: true,
include: vec![],
prompt_cache_key: None,
text: None,
};
let v = serde_json::to_value(&req).expect("json");
assert!(v.get("text").is_none());
}
}

View File

@@ -1,12 +1,24 @@
use crate::client::ToolSpec;
use crate::client_common::tools::ToolSpec;
use crate::error::Result;
use crate::model_family::ModelFamily;
use crate::protocol::RateLimitSnapshot;
use crate::protocol::TokenUsage;
use codex_apply_patch::APPLY_PATCH_TOOL_INSTRUCTIONS;
use codex_protocol::config_types::ReasoningEffort as ReasoningEffortConfig;
use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
use codex_protocol::config_types::Verbosity as VerbosityConfig;
use codex_protocol::models::ResponseItem;
use futures::Stream;
use serde::Deserialize;
use serde::Serialize;
use serde_json::Value;
use std::borrow::Cow;
use std::collections::HashSet;
use std::ops::Deref;
use std::pin::Pin;
use std::task::Context;
use std::task::Poll;
use tokio::sync::mpsc;
/// Review thread system prompt. Edit `core/src/review_prompt.md` to customize.
pub const REVIEW_PROMPT: &str = include_str!("../review_prompt.md");
@@ -181,6 +193,194 @@ fn strip_total_output_header(output: &str) -> Option<&str> {
Some(remainder)
}
#[derive(Debug)]
pub enum ResponseEvent {
Created,
OutputItemDone(ResponseItem),
OutputItemAdded(ResponseItem),
Completed {
response_id: String,
token_usage: Option<TokenUsage>,
},
OutputTextDelta(String),
ReasoningSummaryDelta {
delta: String,
summary_index: i64,
},
ReasoningContentDelta {
delta: String,
content_index: i64,
},
ReasoningSummaryPartAdded {
summary_index: i64,
},
RateLimits(RateLimitSnapshot),
}
#[derive(Debug, Serialize)]
pub(crate) struct Reasoning {
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) effort: Option<ReasoningEffortConfig>,
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) summary: Option<ReasoningSummaryConfig>,
}
#[derive(Debug, Serialize, Default, Clone)]
#[serde(rename_all = "snake_case")]
pub(crate) enum TextFormatType {
#[default]
JsonSchema,
}
#[derive(Debug, Serialize, Default, Clone)]
pub(crate) struct TextFormat {
pub(crate) r#type: TextFormatType,
pub(crate) strict: bool,
pub(crate) schema: Value,
pub(crate) name: String,
}
/// Controls under the `text` field in the Responses API for GPT-5.
#[derive(Debug, Serialize, Default, Clone)]
pub(crate) struct TextControls {
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) verbosity: Option<OpenAiVerbosity>,
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) format: Option<TextFormat>,
}
#[derive(Debug, Serialize, Default, Clone)]
#[serde(rename_all = "lowercase")]
pub(crate) enum OpenAiVerbosity {
Low,
#[default]
Medium,
High,
}
impl From<VerbosityConfig> for OpenAiVerbosity {
fn from(v: VerbosityConfig) -> Self {
match v {
VerbosityConfig::Low => OpenAiVerbosity::Low,
VerbosityConfig::Medium => OpenAiVerbosity::Medium,
VerbosityConfig::High => OpenAiVerbosity::High,
}
}
}
/// Request object that is serialized as JSON and POST'ed when using the
/// Responses API.
#[derive(Debug, Serialize)]
pub(crate) struct ResponsesApiRequest<'a> {
pub(crate) model: &'a str,
pub(crate) instructions: &'a str,
// TODO(mbolin): ResponseItem::Other should not be serialized. Currently,
// we code defensively to avoid this case, but perhaps we should use a
// separate enum for serialization.
pub(crate) input: &'a Vec<ResponseItem>,
pub(crate) tools: &'a [serde_json::Value],
pub(crate) tool_choice: &'static str,
pub(crate) parallel_tool_calls: bool,
pub(crate) reasoning: Option<Reasoning>,
pub(crate) store: bool,
pub(crate) stream: bool,
pub(crate) include: Vec<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) prompt_cache_key: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) text: Option<TextControls>,
}
pub(crate) mod tools {
use crate::tools::spec::JsonSchema;
use serde::Deserialize;
use serde::Serialize;
/// When serialized as JSON, this produces a valid "Tool" in the OpenAI
/// Responses API.
#[derive(Debug, Clone, Serialize, PartialEq)]
#[serde(tag = "type")]
pub(crate) enum ToolSpec {
#[serde(rename = "function")]
Function(ResponsesApiTool),
#[serde(rename = "local_shell")]
LocalShell {},
// TODO: Understand why we get an error on web_search although the API docs say it's supported.
// https://platform.openai.com/docs/guides/tools-web-search?api-mode=responses#:~:text=%7B%20type%3A%20%22web_search%22%20%7D%2C
#[serde(rename = "web_search")]
WebSearch {},
#[serde(rename = "custom")]
Freeform(FreeformTool),
}
impl ToolSpec {
pub(crate) fn name(&self) -> &str {
match self {
ToolSpec::Function(tool) => tool.name.as_str(),
ToolSpec::LocalShell {} => "local_shell",
ToolSpec::WebSearch {} => "web_search",
ToolSpec::Freeform(tool) => tool.name.as_str(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct FreeformTool {
pub(crate) name: String,
pub(crate) description: String,
pub(crate) format: FreeformToolFormat,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct FreeformToolFormat {
pub(crate) r#type: String,
pub(crate) syntax: String,
pub(crate) definition: String,
}
#[derive(Debug, Clone, Serialize, PartialEq)]
pub struct ResponsesApiTool {
pub(crate) name: String,
pub(crate) description: String,
/// TODO: Validation. When strict is set to true, the JSON schema,
/// `required` and `additional_properties` must be present. All fields in
/// `properties` must be present in `required`.
pub(crate) strict: bool,
pub(crate) parameters: JsonSchema,
}
}
pub(crate) fn create_text_param_for_request(
verbosity: Option<VerbosityConfig>,
output_schema: &Option<Value>,
) -> Option<TextControls> {
if verbosity.is_none() && output_schema.is_none() {
return None;
}
Some(TextControls {
verbosity: verbosity.map(std::convert::Into::into),
format: output_schema.as_ref().map(|schema| TextFormat {
r#type: TextFormatType::JsonSchema,
strict: true,
schema: schema.clone(),
name: "codex_output_schema".to_string(),
}),
})
}
pub struct ResponseStream {
pub(crate) rx_event: mpsc::Receiver<Result<ResponseEvent>>,
}
impl Stream for ResponseStream {
type Item = Result<ResponseEvent>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
self.rx_event.poll_recv(cx)
}
}
#[cfg(test)]
mod tests {
use crate::model_family::find_family_for_model;
@@ -227,7 +427,7 @@ mod tests {
expects_apply_patch_instructions: false,
},
InstructionsTestCase {
slug: "gpt-5-codex",
slug: "gpt-5.1-codex",
expects_apply_patch_instructions: false,
},
InstructionsTestCase {
@@ -251,4 +451,104 @@ mod tests {
assert_eq!(full, expected);
}
}
#[test]
fn serializes_text_verbosity_when_set() {
let input: Vec<ResponseItem> = vec![];
let tools: Vec<serde_json::Value> = vec![];
let req = ResponsesApiRequest {
model: "gpt-5.1",
instructions: "i",
input: &input,
tools: &tools,
tool_choice: "auto",
parallel_tool_calls: true,
reasoning: None,
store: false,
stream: true,
include: vec![],
prompt_cache_key: None,
text: Some(TextControls {
verbosity: Some(OpenAiVerbosity::Low),
format: None,
}),
};
let v = serde_json::to_value(&req).expect("json");
assert_eq!(
v.get("text")
.and_then(|t| t.get("verbosity"))
.and_then(|s| s.as_str()),
Some("low")
);
}
#[test]
fn serializes_text_schema_with_strict_format() {
let input: Vec<ResponseItem> = vec![];
let tools: Vec<serde_json::Value> = vec![];
let schema = serde_json::json!({
"type": "object",
"properties": {
"answer": {"type": "string"}
},
"required": ["answer"],
});
let text_controls =
create_text_param_for_request(None, &Some(schema.clone())).expect("text controls");
let req = ResponsesApiRequest {
model: "gpt-5.1",
instructions: "i",
input: &input,
tools: &tools,
tool_choice: "auto",
parallel_tool_calls: true,
reasoning: None,
store: false,
stream: true,
include: vec![],
prompt_cache_key: None,
text: Some(text_controls),
};
let v = serde_json::to_value(&req).expect("json");
let text = v.get("text").expect("text field");
assert!(text.get("verbosity").is_none());
let format = text.get("format").expect("format field");
assert_eq!(
format.get("name"),
Some(&serde_json::Value::String("codex_output_schema".into()))
);
assert_eq!(
format.get("type"),
Some(&serde_json::Value::String("json_schema".into()))
);
assert_eq!(format.get("strict"), Some(&serde_json::Value::Bool(true)));
assert_eq!(format.get("schema"), Some(&schema));
}
#[test]
fn omits_text_when_not_set() {
let input: Vec<ResponseItem> = vec![];
let tools: Vec<serde_json::Value> = vec![];
let req = ResponsesApiRequest {
model: "gpt-5.1",
instructions: "i",
input: &input,
tools: &tools,
tool_choice: "auto",
parallel_tool_calls: true,
reasoning: None,
store: false,
stream: true,
include: vec![],
prompt_cache_key: None,
text: None,
};
let v = serde_json::to_value(&req).expect("json");
assert!(v.get("text").is_none());
}
}

View File

@@ -5,13 +5,10 @@ use std::sync::Arc;
use std::sync::atomic::AtomicU64;
use crate::AuthManager;
use crate::ResponseEvent;
use crate::client_common::REVIEW_PROMPT;
use crate::compact;
use crate::features::Feature;
use crate::function_tool::FunctionCallError;
use crate::mcp::auth::McpAuthStatusEntry;
use crate::mcp_connection_manager::DEFAULT_STARTUP_TIMEOUT;
use crate::parse_command::parse_command;
use crate::parse_turn_item;
use crate::response_processing::process_items;
@@ -46,6 +43,7 @@ use mcp_types::ReadResourceResult;
use serde_json;
use serde_json::Value;
use tokio::sync::Mutex;
use tokio::sync::RwLock;
use tokio::sync::oneshot;
use tokio_util::sync::CancellationToken;
use tracing::debug;
@@ -56,8 +54,8 @@ use tracing::warn;
use crate::ModelProviderInfo;
use crate::client::ModelClient;
use crate::client_common::Prompt;
use crate::client_common::ResponseEvent;
use crate::config::Config;
use crate::config::types::McpServerTransportConfig;
use crate::config::types::ShellEnvironmentPolicy;
use crate::context_manager::ContextManager;
use crate::environment_context::EnvironmentContext;
@@ -122,6 +120,7 @@ use crate::user_instructions::UserInstructions;
use crate::user_notification::UserNotification;
use crate::util::backoff;
use codex_async_utils::OrCancelExt;
use codex_execpolicy2::Policy as ExecPolicyV2;
use codex_otel::otel_event_manager::OtelEventManager;
use codex_protocol::config_types::ReasoningEffort as ReasoningEffortConfig;
use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
@@ -167,6 +166,10 @@ impl Codex {
let user_instructions = get_user_instructions(&config).await;
let exec_policy_v2 =
crate::exec_policy::exec_policy_for(&config.features, &config.codex_home)
.map_err(|err| CodexErr::Fatal(format!("failed to load execpolicy2: {err}")))?;
let config = Arc::new(config);
let session_configuration = SessionConfiguration {
@@ -183,6 +186,7 @@ impl Codex {
cwd: config.cwd.clone(),
original_config_do_not_use: Arc::clone(&config),
features: config.features.clone(),
exec_policy_v2,
session_source,
};
@@ -280,6 +284,7 @@ pub(crate) struct TurnContext {
pub(crate) final_output_json_schema: Option<Value>,
pub(crate) codex_linux_sandbox_exe: Option<PathBuf>,
pub(crate) tool_call_gate: Arc<ReadinessFlag>,
pub(crate) exec_policy_v2: Option<Arc<ExecPolicyV2>>,
}
impl TurnContext {
@@ -336,6 +341,8 @@ pub(crate) struct SessionConfiguration {
/// Set of feature flags for this session
features: Features,
/// Optional execpolicy2 policy, applied only when enabled by feature flag.
exec_policy_v2: Option<Arc<ExecPolicyV2>>,
// TODO(pakrym): Remove config from here
original_config_do_not_use: Arc<Config>,
@@ -436,6 +443,7 @@ impl Session {
final_output_json_schema: None,
codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(),
tool_call_gate: Arc::new(ReadinessFlag::new()),
exec_policy_v2: session_configuration.exec_policy_v2.clone(),
}
}
@@ -476,21 +484,13 @@ impl Session {
),
};
// Error messages to dispatch after SessionConfigured is sent.
let mut post_session_configured_events = Vec::<Event>::new();
// Kick off independent async setup tasks in parallel to reduce startup latency.
//
// - initialize RolloutRecorder with new or resumed session info
// - spin up MCP connection manager
// - perform default shell discovery
// - load history metadata
let rollout_fut = RolloutRecorder::new(&config, rollout_params);
let mcp_fut = McpConnectionManager::new(
config.mcp_servers.clone(),
config.mcp_oauth_credentials_store_mode,
);
let default_shell_fut = shell::default_user_shell();
let history_meta_fut = crate::message_history::history_metadata(&config);
let auth_statuses_fut = compute_auth_statuses(
@@ -499,15 +499,8 @@ impl Session {
);
// Join all independent futures.
let (
rollout_recorder,
mcp_res,
default_shell,
(history_log_id, history_entry_count),
auth_statuses,
) = tokio::join!(
let (rollout_recorder, default_shell, (history_log_id, history_entry_count), auth_statuses) = tokio::join!(
rollout_fut,
mcp_fut,
default_shell_fut,
history_meta_fut,
auth_statuses_fut
@@ -519,34 +512,7 @@ impl Session {
})?;
let rollout_path = rollout_recorder.rollout_path.clone();
// Handle MCP manager result and record any startup failures.
let (mcp_connection_manager, failed_clients) = match mcp_res {
Ok((mgr, failures)) => (mgr, failures),
Err(e) => {
let message = format!("Failed to create MCP connection manager: {e:#}");
error!("{message}");
post_session_configured_events.push(Event {
id: INITIAL_SUBMIT_ID.to_owned(),
msg: EventMsg::Error(ErrorEvent { message }),
});
(McpConnectionManager::default(), Default::default())
}
};
// Surface individual client start-up failures to the user.
if !failed_clients.is_empty() {
for (server_name, err) in failed_clients {
let auth_entry = auth_statuses.get(&server_name);
let display_message = mcp_init_error_display(&server_name, auth_entry, &err);
warn!("MCP client for `{server_name}` failed to start: {err:#}");
post_session_configured_events.push(Event {
id: INITIAL_SUBMIT_ID.to_owned(),
msg: EventMsg::Error(ErrorEvent {
message: display_message,
}),
});
}
}
let mut post_session_configured_events = Vec::<Event>::new();
for (alias, feature) in session_configuration.features.legacy_feature_usages() {
let canonical = feature.key();
@@ -595,7 +561,8 @@ impl Session {
warm_model_cache(&session_configuration.model);
let services = SessionServices {
mcp_connection_manager,
mcp_connection_manager: Arc::new(RwLock::new(McpConnectionManager::default())),
mcp_startup_cancellation_token: CancellationToken::new(),
unified_exec_manager: UnifiedExecSessionManager::default(),
notifier: UserNotifier::new(config.notify.clone()),
rollout: Mutex::new(Some(rollout_recorder)),
@@ -635,6 +602,18 @@ impl Session {
for event in events {
sess.send_event_raw(event).await;
}
sess.services
.mcp_connection_manager
.write()
.await
.initialize(
config.mcp_servers.clone(),
config.mcp_oauth_credentials_store_mode,
auth_statuses.clone(),
tx_event.clone(),
sess.services.mcp_startup_cancellation_token.clone(),
)
.await;
// record_initial_history can emit events. We record only after the SessionConfiguredEvent is emitted.
sess.record_initial_history(initial_history).await;
@@ -894,6 +873,7 @@ impl Session {
let parsed_cmd = parse_command(&command);
let event = EventMsg::ExecApprovalRequest(ExecApprovalRequestEvent {
call_id,
turn_id: turn_context.sub_id.clone(),
command,
cwd,
reason,
@@ -1258,6 +1238,8 @@ impl Session {
) -> anyhow::Result<ListResourcesResult> {
self.services
.mcp_connection_manager
.read()
.await
.list_resources(server, params)
.await
}
@@ -1269,6 +1251,8 @@ impl Session {
) -> anyhow::Result<ListResourceTemplatesResult> {
self.services
.mcp_connection_manager
.read()
.await
.list_resource_templates(server, params)
.await
}
@@ -1280,6 +1264,8 @@ impl Session {
) -> anyhow::Result<ReadResourceResult> {
self.services
.mcp_connection_manager
.read()
.await
.read_resource(server, params)
.await
}
@@ -1292,19 +1278,29 @@ impl Session {
) -> anyhow::Result<CallToolResult> {
self.services
.mcp_connection_manager
.read()
.await
.call_tool(server, tool, arguments)
.await
}
pub(crate) fn parse_mcp_tool_name(&self, tool_name: &str) -> Option<(String, String)> {
pub(crate) async fn parse_mcp_tool_name(&self, tool_name: &str) -> Option<(String, String)> {
self.services
.mcp_connection_manager
.read()
.await
.parse_tool_name(tool_name)
.await
}
pub async fn interrupt_task(self: &Arc<Self>) {
info!("interrupt received: abort current task, if any");
self.abort_all_tasks(TurnAbortReason::Interrupted).await;
let has_active_turn = { self.active_turn.lock().await.is_some() };
if has_active_turn {
self.abort_all_tasks(TurnAbortReason::Interrupted).await;
} else {
self.cancel_mcp_startup().await;
}
}
pub(crate) fn notifier(&self) -> &UserNotifier {
@@ -1318,6 +1314,10 @@ impl Session {
fn show_raw_agent_reasoning(&self) -> bool {
self.services.show_raw_agent_reasoning
}
async fn cancel_mcp_startup(&self) {
self.services.mcp_startup_cancellation_token.cancel();
}
}
async fn submission_loop(sess: Arc<Session>, config: Arc<Config>, rx_sub: Receiver<Submission>) {
@@ -1575,17 +1575,15 @@ mod handlers {
}
pub async fn list_mcp_tools(sess: &Session, config: &Arc<Config>, sub_id: String) {
// This is a cheap lookup from the connection manager's cache.
let tools = sess.services.mcp_connection_manager.list_all_tools();
let (auth_status_entries, resources, resource_templates) = tokio::join!(
let mcp_connection_manager = sess.services.mcp_connection_manager.read().await;
let (tools, auth_status_entries, resources, resource_templates) = tokio::join!(
mcp_connection_manager.list_all_tools(),
compute_auth_statuses(
config.mcp_servers.iter(),
config.mcp_oauth_credentials_store_mode,
),
sess.services.mcp_connection_manager.list_all_resources(),
sess.services
.mcp_connection_manager
.list_all_resource_templates()
mcp_connection_manager.list_all_resources(),
mcp_connection_manager.list_all_resource_templates(),
);
let auth_statuses = auth_status_entries
.iter()
@@ -1594,7 +1592,10 @@ mod handlers {
let event = Event {
id: sub_id,
msg: EventMsg::McpListToolsResponse(crate::protocol::McpListToolsResponseEvent {
tools,
tools: tools
.into_iter()
.map(|(name, tool)| (name, tool.tool))
.collect(),
resources,
resource_templates,
auth_statuses,
@@ -1767,6 +1768,7 @@ async fn spawn_review_thread(
final_output_json_schema: None,
codex_linux_sandbox_exe: parent_turn_context.codex_linux_sandbox_exe.clone(),
tool_call_gate: Arc::new(ReadinessFlag::new()),
exec_policy_v2: parent_turn_context.exec_policy_v2.clone(),
};
// Seed the child task with the review prompt as the initial user message.
@@ -1924,10 +1926,22 @@ async fn run_turn(
input: Vec<ResponseItem>,
cancellation_token: CancellationToken,
) -> CodexResult<TurnRunResult> {
let mcp_tools = sess.services.mcp_connection_manager.list_all_tools();
let mcp_tools = sess
.services
.mcp_connection_manager
.read()
.await
.list_all_tools()
.or_cancel(&cancellation_token)
.await?;
let router = Arc::new(ToolRouter::from_config(
&turn_context.tools_config,
Some(mcp_tools),
Some(
mcp_tools
.into_iter()
.map(|(name, tool)| (name, tool.tool))
.collect(),
),
));
let model_supports_parallel = turn_context
@@ -2096,7 +2110,7 @@ async fn try_run_turn(
ResponseEvent::Created => {}
ResponseEvent::OutputItemDone(item) => {
let previously_active_item = active_item.take();
match ToolRouter::build_tool_call(sess.as_ref(), item.clone()) {
match ToolRouter::build_tool_call(sess.as_ref(), item.clone()).await {
Ok(Some(call)) => {
let payload_preview = call.payload.log_payload().into_owned();
tracing::info!("ToolCall: {} {}", call.tool_name, payload_preview);
@@ -2307,59 +2321,6 @@ pub(super) fn get_last_assistant_message_from_turn(responses: &[ResponseItem]) -
})
}
fn mcp_init_error_display(
server_name: &str,
entry: Option<&McpAuthStatusEntry>,
err: &anyhow::Error,
) -> String {
if let Some(McpServerTransportConfig::StreamableHttp {
url,
bearer_token_env_var,
http_headers,
..
}) = &entry.map(|entry| &entry.config.transport)
&& url == "https://api.githubcopilot.com/mcp/"
&& bearer_token_env_var.is_none()
&& http_headers.as_ref().map(HashMap::is_empty).unwrap_or(true)
{
// GitHub only supports OAUth for first party MCP clients.
// That means that the user has to specify a personal access token either via bearer_token_env_var or http_headers.
// https://github.com/github/github-mcp-server/issues/921#issuecomment-3221026448
format!(
"GitHub MCP does not support OAuth. Log in by adding a personal access token (https://github.com/settings/personal-access-tokens) to your environment and config.toml:\n[mcp_servers.{server_name}]\nbearer_token_env_var = CODEX_GITHUB_PERSONAL_ACCESS_TOKEN"
)
} else if is_mcp_client_auth_required_error(err) {
format!(
"The {server_name} MCP server is not logged in. Run `codex mcp login {server_name}`."
)
} else if is_mcp_client_startup_timeout_error(err) {
let startup_timeout_secs = match entry {
Some(entry) => match entry.config.startup_timeout_sec {
Some(timeout) => timeout,
None => DEFAULT_STARTUP_TIMEOUT,
},
None => DEFAULT_STARTUP_TIMEOUT,
}
.as_secs();
format!(
"MCP client for `{server_name}` timed out after {startup_timeout_secs} seconds. Add or adjust `startup_timeout_sec` in your config.toml:\n[mcp_servers.{server_name}]\nstartup_timeout_sec = XX"
)
} else {
format!("MCP client for `{server_name}` failed to start: {err:#}")
}
}
fn is_mcp_client_auth_required_error(error: &anyhow::Error) -> bool {
// StreamableHttpError::AuthRequired from the MCP SDK.
error.to_string().contains("Auth required")
}
fn is_mcp_client_startup_timeout_error(error: &anyhow::Error) -> bool {
let error_message = error.to_string();
error_message.contains("request timed out")
|| error_message.contains("timed out handshaking with MCP server")
}
use crate::features::Features;
#[cfg(test)]
pub(crate) use tests::make_session_and_context;
@@ -2369,10 +2330,7 @@ mod tests {
use super::*;
use crate::config::ConfigOverrides;
use crate::config::ConfigToml;
use crate::config::types::McpServerConfig;
use crate::config::types::McpServerTransportConfig;
use crate::exec::ExecToolCallOutput;
use crate::mcp::auth::McpAuthStatusEntry;
use crate::tools::format_exec_output_str;
use crate::protocol::CompactedItem;
@@ -2392,7 +2350,6 @@ mod tests {
use codex_app_server_protocol::AuthMode;
use codex_protocol::models::ContentItem;
use codex_protocol::models::ResponseItem;
use codex_protocol::protocol::McpAuthStatus;
use std::time::Duration;
use tokio::time::sleep;
@@ -2600,13 +2557,15 @@ mod tests {
cwd: config.cwd.clone(),
original_config_do_not_use: Arc::clone(&config),
features: Features::default(),
exec_policy_v2: None,
session_source: SessionSource::Exec,
};
let state = SessionState::new(session_configuration.clone());
let services = SessionServices {
mcp_connection_manager: McpConnectionManager::default(),
mcp_connection_manager: Arc::new(RwLock::new(McpConnectionManager::default())),
mcp_startup_cancellation_token: CancellationToken::new(),
unified_exec_manager: UnifiedExecSessionManager::default(),
notifier: UserNotifier::new(None),
rollout: Mutex::new(None),
@@ -2676,13 +2635,15 @@ mod tests {
cwd: config.cwd.clone(),
original_config_do_not_use: Arc::clone(&config),
features: Features::default(),
exec_policy_v2: None,
session_source: SessionSource::Exec,
};
let state = SessionState::new(session_configuration.clone());
let services = SessionServices {
mcp_connection_manager: McpConnectionManager::default(),
mcp_connection_manager: Arc::new(RwLock::new(McpConnectionManager::default())),
mcp_startup_cancellation_token: CancellationToken::new(),
unified_exec_manager: UnifiedExecSessionManager::default(),
notifier: UserNotifier::new(None),
rollout: Mutex::new(None),
@@ -2863,9 +2824,23 @@ mod tests {
#[tokio::test]
async fn fatal_tool_error_stops_turn_and_reports_error() {
let (session, turn_context, _rx) = make_session_and_context_with_rx();
let tools = {
session
.services
.mcp_connection_manager
.read()
.await
.list_all_tools()
.await
};
let router = ToolRouter::from_config(
&turn_context.tools_config,
Some(session.services.mcp_connection_manager.list_all_tools()),
Some(
tools
.into_iter()
.map(|(name, tool)| (name, tool.tool))
.collect(),
),
);
let item = ResponseItem::CustomToolCall {
id: None,
@@ -2876,6 +2851,7 @@ mod tests {
};
let call = ToolRouter::build_tool_call(session.as_ref(), item.clone())
.await
.expect("build tool call")
.expect("tool call present");
let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
@@ -3125,7 +3101,6 @@ mod tests {
pretty_assertions::assert_eq!(exec_output.metadata, ResponseExecMetadata { exit_code: 0 });
assert!(exec_output.output.contains("hi"));
}
#[tokio::test]
async fn unified_exec_rejects_escalated_permissions_when_policy_not_on_request() {
use crate::protocol::AskForApproval;
@@ -3167,89 +3142,4 @@ mod tests {
pretty_assertions::assert_eq!(output, expected);
}
#[test]
fn mcp_init_error_display_prompts_for_github_pat() {
let server_name = "github";
let entry = McpAuthStatusEntry {
config: McpServerConfig {
transport: McpServerTransportConfig::StreamableHttp {
url: "https://api.githubcopilot.com/mcp/".to_string(),
bearer_token_env_var: None,
http_headers: None,
env_http_headers: None,
},
enabled: true,
startup_timeout_sec: None,
tool_timeout_sec: None,
enabled_tools: None,
disabled_tools: None,
},
auth_status: McpAuthStatus::Unsupported,
};
let err = anyhow::anyhow!("OAuth is unsupported");
let display = mcp_init_error_display(server_name, Some(&entry), &err);
let expected = format!(
"GitHub MCP does not support OAuth. Log in by adding a personal access token (https://github.com/settings/personal-access-tokens) to your environment and config.toml:\n[mcp_servers.{server_name}]\nbearer_token_env_var = CODEX_GITHUB_PERSONAL_ACCESS_TOKEN"
);
assert_eq!(expected, display);
}
#[test]
fn mcp_init_error_display_prompts_for_login_when_auth_required() {
let server_name = "example";
let err = anyhow::anyhow!("Auth required for server");
let display = mcp_init_error_display(server_name, None, &err);
let expected = format!(
"The {server_name} MCP server is not logged in. Run `codex mcp login {server_name}`."
);
assert_eq!(expected, display);
}
#[test]
fn mcp_init_error_display_reports_generic_errors() {
let server_name = "custom";
let entry = McpAuthStatusEntry {
config: McpServerConfig {
transport: McpServerTransportConfig::StreamableHttp {
url: "https://example.com".to_string(),
bearer_token_env_var: Some("TOKEN".to_string()),
http_headers: None,
env_http_headers: None,
},
enabled: true,
startup_timeout_sec: None,
tool_timeout_sec: None,
enabled_tools: None,
disabled_tools: None,
},
auth_status: McpAuthStatus::Unsupported,
};
let err = anyhow::anyhow!("boom");
let display = mcp_init_error_display(server_name, Some(&entry), &err);
let expected = format!("MCP client for `{server_name}` failed to start: {err:#}");
assert_eq!(expected, display);
}
#[test]
fn mcp_init_error_display_includes_startup_timeout_hint() {
let server_name = "slow";
let err = anyhow::anyhow!("request timed out");
let display = mcp_init_error_display(server_name, None, &err);
assert_eq!(
"MCP client for `slow` timed out after 10 seconds. Add or adjust `startup_timeout_sec` in your config.toml:\n[mcp_servers.slow]\nstartup_timeout_sec = XX",
display
);
}
}

View File

@@ -1,7 +1,7 @@
use std::sync::Arc;
use crate::Prompt;
use crate::ResponseEvent;
use crate::client_common::ResponseEvent;
use crate::codex::Session;
use crate::codex::TurnContext;
use crate::codex::get_last_assistant_message_from_turn;

View File

@@ -584,7 +584,7 @@ mod tests {
codex_home,
None,
&[ConfigEdit::SetModel {
model: Some("gpt-5-codex".to_string()),
model: Some("gpt-5.1-codex".to_string()),
effort: Some(ReasoningEffort::High),
}],
)
@@ -592,7 +592,7 @@ mod tests {
let contents =
std::fs::read_to_string(codex_home.join(CONFIG_TOML_FILE)).expect("read config");
let expected = r#"model = "gpt-5-codex"
let expected = r#"model = "gpt-5.1-codex"
model_reasoning_effort = "high"
"#;
assert_eq!(contents, expected);
@@ -722,7 +722,7 @@ model = "o5-preview"
std::fs::write(
codex_home.join(CONFIG_TOML_FILE),
r#"[profiles."team a"]
model = "gpt-5-codex"
model = "gpt-5.1-codex"
"#,
)
.expect("seed");
@@ -972,14 +972,14 @@ B = \"2\"
let codex_home = tmp.path().to_path_buf();
ConfigEditsBuilder::new(&codex_home)
.set_model(Some("gpt-5-codex"), Some(ReasoningEffort::High))
.set_model(Some("gpt-5.1-codex"), Some(ReasoningEffort::High))
.apply()
.await
.expect("persist");
let contents =
std::fs::read_to_string(codex_home.join(CONFIG_TOML_FILE)).expect("read config");
let expected = r#"model = "gpt-5-codex"
let expected = r#"model = "gpt-5.1-codex"
model_reasoning_effort = "high"
"#;
assert_eq!(contents, expected);
@@ -1001,11 +1001,11 @@ model_reasoning_effort = "low"
std::fs::read_to_string(codex_home.join(CONFIG_TOML_FILE)).expect("read config");
assert_eq!(contents, initial_expected);
let updated_expected = r#"model = "gpt-5-codex"
let updated_expected = r#"model = "gpt-5.1-codex"
model_reasoning_effort = "high"
"#;
ConfigEditsBuilder::new(codex_home)
.set_model(Some("gpt-5-codex"), Some(ReasoningEffort::High))
.set_model(Some("gpt-5.1-codex"), Some(ReasoningEffort::High))
.apply_blocking()
.expect("persist update");
contents = std::fs::read_to_string(codex_home.join(CONFIG_TOML_FILE)).expect("read config");

View File

@@ -25,7 +25,9 @@ use crate::git_info::resolve_root_git_project_for_trust;
use crate::model_family::ModelFamily;
use crate::model_family::derive_default_model_family;
use crate::model_family::find_family_for_model;
use crate::model_provider_info::LMSTUDIO_OSS_PROVIDER_ID;
use crate::model_provider_info::ModelProviderInfo;
use crate::model_provider_info::OLLAMA_OSS_PROVIDER_ID;
use crate::model_provider_info::built_in_model_providers;
use crate::openai_model_info::get_model_info;
use crate::project_doc::DEFAULT_PROJECT_DOC_FILENAME;
@@ -60,11 +62,11 @@ pub mod profile;
pub mod types;
#[cfg(target_os = "windows")]
pub const OPENAI_DEFAULT_MODEL: &str = "gpt-5";
pub const OPENAI_DEFAULT_MODEL: &str = "gpt-5.1";
#[cfg(not(target_os = "windows"))]
pub const OPENAI_DEFAULT_MODEL: &str = "gpt-5-codex";
const OPENAI_DEFAULT_REVIEW_MODEL: &str = "gpt-5-codex";
pub const GPT_5_CODEX_MEDIUM_MODEL: &str = "gpt-5-codex";
pub const OPENAI_DEFAULT_MODEL: &str = "gpt-5.1-codex";
const OPENAI_DEFAULT_REVIEW_MODEL: &str = "gpt-5.1-codex";
pub const GPT_5_CODEX_MEDIUM_MODEL: &str = "gpt-5.1-codex";
/// Maximum number of bytes of the documentation that will be embedded. Larger
/// files are *silently truncated* to this size so we do not take up too much of
@@ -79,7 +81,7 @@ pub struct Config {
/// Optional override of model selection.
pub model: String,
/// Model used specifically for review sessions. Defaults to "gpt-5-codex".
/// Model used specifically for review sessions. Defaults to "gpt-5.1-codex".
pub review_model: String,
pub model_family: ModelFamily,
@@ -466,6 +468,48 @@ pub fn set_project_trust_level(
.apply_blocking()
}
/// Save the default OSS provider preference to config.toml
pub fn set_default_oss_provider(codex_home: &Path, provider: &str) -> std::io::Result<()> {
// Validate that the provider is one of the known OSS providers
match provider {
LMSTUDIO_OSS_PROVIDER_ID | OLLAMA_OSS_PROVIDER_ID => {
// Valid provider, continue
}
_ => {
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
format!(
"Invalid OSS provider '{provider}'. Must be one of: {LMSTUDIO_OSS_PROVIDER_ID}, {OLLAMA_OSS_PROVIDER_ID}"
),
));
}
}
let config_path = codex_home.join(CONFIG_TOML_FILE);
// Read existing config or create empty string if file doesn't exist
let content = match std::fs::read_to_string(&config_path) {
Ok(content) => content,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => String::new(),
Err(e) => return Err(e),
};
// Parse as DocumentMut for editing while preserving structure
let mut doc = content.parse::<DocumentMut>().map_err(|e| {
std::io::Error::new(
std::io::ErrorKind::InvalidData,
format!("failed to parse config.toml: {e}"),
)
})?;
// Set the default_oss_provider at root level
use toml_edit::value;
doc["oss_provider"] = value(provider);
// Write the modified document back
std::fs::write(&config_path, doc.to_string())?;
Ok(())
}
/// Apply a single dotted-path override onto a TOML value.
fn apply_toml_override(root: &mut TomlValue, path: &str, value: TomlValue) {
use toml::value::Table;
@@ -663,6 +707,8 @@ pub struct ConfigToml {
pub experimental_use_rmcp_client: Option<bool>,
pub experimental_use_freeform_apply_patch: Option<bool>,
pub experimental_sandbox_command_assessment: Option<bool>,
/// Preferred OSS provider for local models, e.g. "lmstudio" or "ollama".
pub oss_provider: Option<String>,
}
impl From<ConfigToml> for UserSavedConfig {
@@ -851,6 +897,34 @@ pub struct ConfigOverrides {
pub additional_writable_roots: Vec<PathBuf>,
}
/// Resolves the OSS provider from CLI override, profile config, or global config.
/// Returns `None` if no provider is configured at any level.
pub fn resolve_oss_provider(
explicit_provider: Option<&str>,
config_toml: &ConfigToml,
config_profile: Option<String>,
) -> Option<String> {
if let Some(provider) = explicit_provider {
// Explicit provider specified (e.g., via --local-provider)
Some(provider.to_string())
} else {
// Check profile config first, then global config
let profile = config_toml.get_config_profile(config_profile).ok();
if let Some(profile) = &profile {
// Check if profile has an oss provider
if let Some(profile_oss_provider) = &profile.oss_provider {
Some(profile_oss_provider.clone())
}
// If not then check if the toml has an oss provider
else {
config_toml.oss_provider.clone()
}
} else {
config_toml.oss_provider.clone()
}
}
}
impl Config {
/// Meant to be used exclusively for tests: `load_with_overrides()` should
/// be used in all other cases.
@@ -2542,7 +2616,7 @@ url = "https://example.com/mcp"
let codex_home = TempDir::new()?;
ConfigEditsBuilder::new(codex_home.path())
.set_model(Some("gpt-5-codex"), Some(ReasoningEffort::High))
.set_model(Some("gpt-5.1-codex"), Some(ReasoningEffort::High))
.apply()
.await?;
@@ -2550,7 +2624,7 @@ url = "https://example.com/mcp"
tokio::fs::read_to_string(codex_home.path().join(CONFIG_TOML_FILE)).await?;
let parsed: ConfigToml = toml::from_str(&serialized)?;
assert_eq!(parsed.model.as_deref(), Some("gpt-5-codex"));
assert_eq!(parsed.model.as_deref(), Some("gpt-5.1-codex"));
assert_eq!(parsed.model_reasoning_effort, Some(ReasoningEffort::High));
Ok(())
@@ -2564,7 +2638,7 @@ url = "https://example.com/mcp"
tokio::fs::write(
&config_path,
r#"
model = "gpt-5-codex"
model = "gpt-5.1-codex"
model_reasoning_effort = "medium"
[profiles.dev]
@@ -2600,7 +2674,7 @@ model = "gpt-4.1"
ConfigEditsBuilder::new(codex_home.path())
.with_profile(Some("dev"))
.set_model(Some("gpt-5-codex"), Some(ReasoningEffort::Medium))
.set_model(Some("gpt-5.1-codex"), Some(ReasoningEffort::Medium))
.apply()
.await?;
@@ -2612,7 +2686,7 @@ model = "gpt-4.1"
.get("dev")
.expect("profile should be created");
assert_eq!(profile.model.as_deref(), Some("gpt-5-codex"));
assert_eq!(profile.model.as_deref(), Some("gpt-5.1-codex"));
assert_eq!(
profile.model_reasoning_effort,
Some(ReasoningEffort::Medium)
@@ -2634,7 +2708,7 @@ model = "gpt-4"
model_reasoning_effort = "medium"
[profiles.prod]
model = "gpt-5-codex"
model = "gpt-5.1-codex"
"#,
)
.await?;
@@ -2663,7 +2737,7 @@ model = "gpt-5-codex"
.profiles
.get("prod")
.and_then(|profile| profile.model.as_deref()),
Some("gpt-5-codex"),
Some("gpt-5.1-codex"),
);
Ok(())
@@ -2778,7 +2852,7 @@ model_provider = "openai"
approval_policy = "on-failure"
[profiles.gpt5]
model = "gpt-5"
model = "gpt-5.1"
model_provider = "openai"
approval_policy = "on-failure"
model_reasoning_effort = "high"
@@ -3094,9 +3168,9 @@ model_verbosity = "high"
fixture.codex_home(),
)?;
let expected_gpt5_profile_config = Config {
model: "gpt-5".to_string(),
model: "gpt-5.1".to_string(),
review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(),
model_family: find_family_for_model("gpt-5").expect("known model slug"),
model_family: find_family_for_model("gpt-5.1").expect("known model slug"),
model_context_window: Some(272_000),
model_max_output_tokens: Some(128_000),
model_auto_compact_token_limit: Some(244_800),
@@ -3265,6 +3339,41 @@ trust_level = "trusted"
Ok(())
}
#[test]
fn test_set_default_oss_provider() -> std::io::Result<()> {
let temp_dir = TempDir::new()?;
let codex_home = temp_dir.path();
let config_path = codex_home.join(CONFIG_TOML_FILE);
// Test setting valid provider on empty config
set_default_oss_provider(codex_home, OLLAMA_OSS_PROVIDER_ID)?;
let content = std::fs::read_to_string(&config_path)?;
assert!(content.contains("oss_provider = \"ollama\""));
// Test updating existing config
std::fs::write(&config_path, "model = \"gpt-4\"\n")?;
set_default_oss_provider(codex_home, LMSTUDIO_OSS_PROVIDER_ID)?;
let content = std::fs::read_to_string(&config_path)?;
assert!(content.contains("oss_provider = \"lmstudio\""));
assert!(content.contains("model = \"gpt-4\""));
// Test overwriting existing oss_provider
set_default_oss_provider(codex_home, OLLAMA_OSS_PROVIDER_ID)?;
let content = std::fs::read_to_string(&config_path)?;
assert!(content.contains("oss_provider = \"ollama\""));
assert!(!content.contains("oss_provider = \"lmstudio\""));
// Test invalid provider
let result = set_default_oss_provider(codex_home, "invalid_provider");
assert!(result.is_err());
let error = result.unwrap_err();
assert_eq!(error.kind(), std::io::ErrorKind::InvalidInput);
assert!(error.to_string().contains("Invalid OSS provider"));
assert!(error.to_string().contains("invalid_provider"));
Ok(())
}
#[test]
fn test_untrusted_project_gets_workspace_write_sandbox() -> anyhow::Result<()> {
let config_with_untrusted = r#"
@@ -3295,6 +3404,85 @@ trust_level = "untrusted"
Ok(())
}
#[test]
fn test_resolve_oss_provider_explicit_override() {
let config_toml = ConfigToml::default();
let result = resolve_oss_provider(Some("custom-provider"), &config_toml, None);
assert_eq!(result, Some("custom-provider".to_string()));
}
#[test]
fn test_resolve_oss_provider_from_profile() {
let mut profiles = std::collections::HashMap::new();
let profile = ConfigProfile {
oss_provider: Some("profile-provider".to_string()),
..Default::default()
};
profiles.insert("test-profile".to_string(), profile);
let config_toml = ConfigToml {
profiles,
..Default::default()
};
let result = resolve_oss_provider(None, &config_toml, Some("test-profile".to_string()));
assert_eq!(result, Some("profile-provider".to_string()));
}
#[test]
fn test_resolve_oss_provider_from_global_config() {
let config_toml = ConfigToml {
oss_provider: Some("global-provider".to_string()),
..Default::default()
};
let result = resolve_oss_provider(None, &config_toml, None);
assert_eq!(result, Some("global-provider".to_string()));
}
#[test]
fn test_resolve_oss_provider_profile_fallback_to_global() {
let mut profiles = std::collections::HashMap::new();
let profile = ConfigProfile::default(); // No oss_provider set
profiles.insert("test-profile".to_string(), profile);
let config_toml = ConfigToml {
oss_provider: Some("global-provider".to_string()),
profiles,
..Default::default()
};
let result = resolve_oss_provider(None, &config_toml, Some("test-profile".to_string()));
assert_eq!(result, Some("global-provider".to_string()));
}
#[test]
fn test_resolve_oss_provider_none_when_not_configured() {
let config_toml = ConfigToml::default();
let result = resolve_oss_provider(None, &config_toml, None);
assert_eq!(result, None);
}
#[test]
fn test_resolve_oss_provider_explicit_overrides_all() {
let mut profiles = std::collections::HashMap::new();
let profile = ConfigProfile {
oss_provider: Some("profile-provider".to_string()),
..Default::default()
};
profiles.insert("test-profile".to_string(), profile);
let config_toml = ConfigToml {
oss_provider: Some("global-provider".to_string()),
profiles,
..Default::default()
};
let result = resolve_oss_provider(
Some("explicit-provider"),
&config_toml,
Some("test-profile".to_string()),
);
assert_eq!(result, Some("explicit-provider".to_string()));
}
#[test]
fn test_untrusted_project_gets_unless_trusted_approval_policy() -> std::io::Result<()> {
let codex_home = TempDir::new()?;

View File

@@ -33,6 +33,7 @@ pub struct ConfigProfile {
/// Optional feature toggles scoped to this profile.
#[serde(default)]
pub features: Option<crate::features::FeaturesToml>,
pub oss_provider: Option<String>,
}
impl From<ConfigProfile> for codex_app_server_protocol::Profile {

View File

@@ -154,13 +154,11 @@ impl CodexRequestBuilder {
.collect()
}
}
#[derive(Debug, Clone)]
pub struct Originator {
pub value: String,
pub header_value: HeaderValue,
}
static ORIGINATOR: OnceLock<Originator> = OnceLock::new();
#[derive(Debug)]

View File

@@ -189,16 +189,20 @@ async fn exec_windows_sandbox(
};
let sandbox_cwd = cwd.clone();
let logs_base_dir = find_codex_home().ok();
let codex_home = find_codex_home().map_err(|err| {
CodexErr::Io(io::Error::other(format!(
"windows sandbox: failed to resolve codex_home: {err}"
)))
})?;
let spawn_res = tokio::task::spawn_blocking(move || {
run_windows_sandbox_capture(
policy_str,
&sandbox_cwd,
codex_home.as_ref(),
command,
&cwd,
env,
timeout_ms,
logs_base_dir.as_deref(),
)
})
.await;

View File

@@ -0,0 +1,293 @@
use std::fs;
use std::path::Path;
use std::path::PathBuf;
use std::sync::Arc;
use crate::command_safety::is_dangerous_command::requires_initial_appoval;
use codex_execpolicy2::Decision;
use codex_execpolicy2::Evaluation;
use codex_execpolicy2::Policy;
use codex_execpolicy2::PolicyParser;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::SandboxPolicy;
use thiserror::Error;
use crate::bash::parse_shell_lc_plain_commands;
use crate::features::Feature;
use crate::features::Features;
use crate::tools::sandboxing::ApprovalRequirement;
const FORBIDDEN_REASON: &str = "execpolicy forbids this command";
const PROMPT_REASON: &str = "execpolicy requires approval for this command";
#[derive(Debug, Error)]
pub enum ExecPolicyError {
#[error("failed to read execpolicy files from {dir}: {source}")]
ReadDir {
dir: PathBuf,
source: std::io::Error,
},
#[error("failed to read execpolicy file {path}: {source}")]
ReadFile {
path: PathBuf,
source: std::io::Error,
},
#[error("failed to parse execpolicy file {path}: {source}")]
ParsePolicy {
path: String,
source: codex_execpolicy2::Error,
},
}
pub(crate) fn exec_policy_for(
features: &Features,
codex_home: &Path,
) -> Result<Option<Arc<Policy>>, ExecPolicyError> {
if !features.enabled(Feature::ExecPolicyV2) {
return Ok(None);
}
let policy_dir = codex_home.to_path_buf();
let entries = match fs::read_dir(&policy_dir) {
Ok(entries) => entries,
Err(source) if source.kind() == std::io::ErrorKind::NotFound => return Ok(None),
Err(source) => {
return Err(ExecPolicyError::ReadDir {
dir: policy_dir,
source,
});
}
};
let mut policy_paths: Vec<PathBuf> = Vec::new();
for entry in entries {
let entry = entry.map_err(|source| ExecPolicyError::ReadDir {
dir: policy_dir.clone(),
source,
})?;
let path = entry.path();
if path
.extension()
.and_then(|ext| ext.to_str())
.is_some_and(|ext| ext == "codexpolicy")
&& path.is_file()
{
policy_paths.push(path);
}
}
policy_paths.sort();
let mut parser = PolicyParser::new();
for policy_path in &policy_paths {
let contents =
fs::read_to_string(policy_path).map_err(|source| ExecPolicyError::ReadFile {
path: policy_path.clone(),
source,
})?;
let identifier = policy_path.to_string_lossy().to_string();
parser
.parse(&identifier, &contents)
.map_err(|source| ExecPolicyError::ParsePolicy {
path: identifier,
source,
})?;
}
let policy = Arc::new(parser.build());
tracing::debug!(
file_count = policy_paths.len(),
"loaded execpolicy2 from {}",
policy_dir.display()
);
Ok(Some(policy))
}
fn evaluate_with_policy(
policy: &Policy,
command: &[String],
approval_policy: AskForApproval,
) -> Option<ApprovalRequirement> {
let commands = parse_shell_lc_plain_commands(command).unwrap_or_else(|| vec![command.to_vec()]);
let evaluation = policy.check_multiple(commands.iter());
match evaluation {
Evaluation::Match { decision, .. } => match decision {
Decision::Forbidden => Some(ApprovalRequirement::Forbidden {
reason: FORBIDDEN_REASON.to_string(),
}),
Decision::Prompt => {
let reason = PROMPT_REASON.to_string();
if matches!(approval_policy, AskForApproval::Never) {
Some(ApprovalRequirement::Forbidden { reason })
} else {
Some(ApprovalRequirement::NeedsApproval {
reason: Some(reason),
})
}
}
Decision::Allow => Some(ApprovalRequirement::Skip),
},
Evaluation::NoMatch => None,
}
}
pub(crate) fn approval_requirement_for_command(
policy: Option<&Policy>,
command: &[String],
approval_policy: AskForApproval,
sandbox_policy: &SandboxPolicy,
with_escalated_permissions: bool,
) -> ApprovalRequirement {
if let Some(policy) = policy
&& let Some(requirement) = evaluate_with_policy(policy, command, approval_policy)
{
return requirement;
}
if requires_initial_appoval(
approval_policy,
sandbox_policy,
command,
with_escalated_permissions,
) {
ApprovalRequirement::NeedsApproval { reason: None }
} else {
ApprovalRequirement::Skip
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::features::Feature;
use crate::features::Features;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::SandboxPolicy;
use pretty_assertions::assert_eq;
use tempfile::tempdir;
#[test]
fn returns_none_when_feature_disabled() {
let features = Features::with_defaults();
let temp_dir = tempdir().expect("create temp dir");
let policy = exec_policy_for(&features, temp_dir.path()).expect("policy result");
assert!(policy.is_none());
}
#[test]
fn returns_none_when_policy_dir_is_missing() {
let mut features = Features::with_defaults();
features.enable(Feature::ExecPolicyV2);
let temp_dir = tempdir().expect("create temp dir");
let missing_dir = temp_dir.path().join("missing");
let policy = exec_policy_for(&features, &missing_dir).expect("policy result");
assert!(policy.is_none());
}
#[test]
fn evaluates_bash_lc_inner_commands() {
let policy_src = r#"
prefix_rule(pattern=["rm"], decision="forbidden")
"#;
let mut parser = PolicyParser::new();
parser
.parse("test.codexpolicy", policy_src)
.expect("parse policy");
let policy = parser.build();
let forbidden_script = vec![
"bash".to_string(),
"-lc".to_string(),
"rm -rf /tmp".to_string(),
];
let requirement =
evaluate_with_policy(&policy, &forbidden_script, AskForApproval::OnRequest)
.expect("expected match for forbidden command");
assert_eq!(
requirement,
ApprovalRequirement::Forbidden {
reason: FORBIDDEN_REASON.to_string()
}
);
}
#[test]
fn approval_requirement_prefers_execpolicy_match() {
let policy_src = r#"prefix_rule(pattern=["rm"], decision="prompt")"#;
let mut parser = PolicyParser::new();
parser
.parse("test.codexpolicy", policy_src)
.expect("parse policy");
let policy = parser.build();
let command = vec!["rm".to_string()];
let requirement = approval_requirement_for_command(
Some(&policy),
&command,
AskForApproval::OnRequest,
&SandboxPolicy::DangerFullAccess,
false,
);
assert_eq!(
requirement,
ApprovalRequirement::NeedsApproval {
reason: Some(PROMPT_REASON.to_string())
}
);
}
#[test]
fn approval_requirement_respects_approval_policy() {
let policy_src = r#"prefix_rule(pattern=["rm"], decision="prompt")"#;
let mut parser = PolicyParser::new();
parser
.parse("test.codexpolicy", policy_src)
.expect("parse policy");
let policy = parser.build();
let command = vec!["rm".to_string()];
let requirement = approval_requirement_for_command(
Some(&policy),
&command,
AskForApproval::Never,
&SandboxPolicy::DangerFullAccess,
false,
);
assert_eq!(
requirement,
ApprovalRequirement::Forbidden {
reason: PROMPT_REASON.to_string()
}
);
}
#[test]
fn approval_requirement_falls_back_to_heuristics() {
let command = vec!["python".to_string()];
let requirement = approval_requirement_for_command(
None,
&command,
AskForApproval::UnlessTrusted,
&SandboxPolicy::ReadOnly,
false,
);
assert_eq!(
requirement,
ApprovalRequirement::NeedsApproval { reason: None }
);
}
}

View File

@@ -40,12 +40,16 @@ pub enum Feature {
ViewImageTool,
/// Allow the model to request web searches.
WebSearchRequest,
/// Gate the execpolicy2 enforcement for shell/unified exec.
ExecPolicyV2,
/// Enable the model-based risk assessments for sandboxed commands.
SandboxCommandAssessment,
/// Create a ghost commit at each turn.
GhostCommit,
/// Enable Windows sandbox (restricted token) on Windows.
WindowsSandbox,
/// Enable the default shell tool.
ShellTool,
}
impl Feature {
@@ -283,6 +287,12 @@ pub const FEATURES: &[FeatureSpec] = &[
stage: Stage::Stable,
default_enabled: false,
},
FeatureSpec {
id: Feature::ExecPolicyV2,
key: "exec_policy_v2",
stage: Stage::Experimental,
default_enabled: false,
},
FeatureSpec {
id: Feature::SandboxCommandAssessment,
key: "experimental_sandbox_command_assessment",
@@ -301,4 +311,10 @@ pub const FEATURES: &[FeatureSpec] = &[
stage: Stage::Experimental,
default_enabled: false,
},
FeatureSpec {
id: Feature::ShellTool,
key: "shell_tool",
stage: Stage::Stable,
default_enabled: true,
},
];

View File

@@ -8,7 +8,8 @@
mod apply_patch;
pub mod auth;
pub mod bash;
pub mod client;
mod chat_completions;
mod client;
mod client_common;
pub mod codex;
mod codex_conversation;
@@ -23,6 +24,7 @@ mod environment_context;
pub mod error;
pub mod exec;
pub mod exec_env;
mod exec_policy;
pub mod features;
mod flags;
pub mod git_info;
@@ -39,8 +41,11 @@ pub mod token_data;
mod truncate;
mod unified_exec;
mod user_instructions;
pub use model_provider_info::BUILT_IN_OSS_MODEL_PROVIDER_ID;
pub use model_provider_info::DEFAULT_LMSTUDIO_PORT;
pub use model_provider_info::DEFAULT_OLLAMA_PORT;
pub use model_provider_info::LMSTUDIO_OSS_PROVIDER_ID;
pub use model_provider_info::ModelProviderInfo;
pub use model_provider_info::OLLAMA_OSS_PROVIDER_ID;
pub use model_provider_info::WireApi;
pub use model_provider_info::built_in_model_providers;
pub use model_provider_info::create_oss_provider_with_base_url;
@@ -53,6 +58,7 @@ pub use conversation_manager::NewConversation;
// Re-export common auth types for workspace consumers
pub use auth::AuthManager;
pub use auth::CodexAuth;
pub mod default_client;
pub mod model_family;
mod openai_model_info;
pub mod project_doc;
@@ -94,10 +100,10 @@ pub use codex_protocol::protocol;
pub use codex_protocol::config_types as protocol_config_types;
pub use client::ModelClient;
pub use client::ResponseEvent;
pub use client::ResponseStream;
pub use client_common::Prompt;
pub use client_common::REVIEW_PROMPT;
pub use client_common::ResponseEvent;
pub use client_common::ResponseStream;
pub use codex_protocol::models::ContentItem;
pub use codex_protocol::models::LocalShellAction;
pub use codex_protocol::models::LocalShellExecAction;

File diff suppressed because it is too large Load Diff

View File

@@ -132,7 +132,7 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
model_family!(slug, "gpt-4o", needs_special_apply_patch_instructions: true)
} else if slug.starts_with("gpt-3.5") {
model_family!(slug, "gpt-3.5", needs_special_apply_patch_instructions: true)
} else if slug.starts_with("test-gpt-5-codex") {
} else if slug.starts_with("test-gpt-5") {
model_family!(
slug, slug,
supports_reasoning_summaries: true,

View File

@@ -6,15 +6,16 @@
//! key. These override or extend the defaults at runtime.
use crate::CodexAuth;
use crate::client::http::CodexHttpClient;
use crate::client::http::CodexRequestBuilder;
use crate::error::EnvVarError;
use crate::default_client::CodexHttpClient;
use crate::default_client::CodexRequestBuilder;
use codex_app_server_protocol::AuthMode;
use serde::Deserialize;
use serde::Serialize;
use std::collections::HashMap;
use std::env::VarError;
use std::time::Duration;
use crate::error::EnvVarError;
const DEFAULT_STREAM_IDLE_TIMEOUT_MS: u64 = 300_000;
const DEFAULT_STREAM_MAX_RETRIES: u64 = 5;
const DEFAULT_REQUEST_MAX_RETRIES: u64 = 4;
@@ -257,9 +258,11 @@ impl ModelProviderInfo {
}
}
const DEFAULT_OLLAMA_PORT: u32 = 11434;
pub const DEFAULT_LMSTUDIO_PORT: u16 = 1234;
pub const DEFAULT_OLLAMA_PORT: u16 = 11434;
pub const BUILT_IN_OSS_MODEL_PROVIDER_ID: &str = "oss";
pub const LMSTUDIO_OSS_PROVIDER_ID: &str = "lmstudio";
pub const OLLAMA_OSS_PROVIDER_ID: &str = "ollama";
/// Built-in default provider list.
pub fn built_in_model_providers() -> HashMap<String, ModelProviderInfo> {
@@ -310,14 +313,21 @@ pub fn built_in_model_providers() -> HashMap<String, ModelProviderInfo> {
requires_openai_auth: true,
},
),
(BUILT_IN_OSS_MODEL_PROVIDER_ID, create_oss_provider()),
(
OLLAMA_OSS_PROVIDER_ID,
create_oss_provider(DEFAULT_OLLAMA_PORT, WireApi::Chat),
),
(
LMSTUDIO_OSS_PROVIDER_ID,
create_oss_provider(DEFAULT_LMSTUDIO_PORT, WireApi::Responses),
),
]
.into_iter()
.map(|(k, v)| (k.to_string(), v))
.collect()
}
pub fn create_oss_provider() -> ModelProviderInfo {
pub fn create_oss_provider(default_provider_port: u16, wire_api: WireApi) -> ModelProviderInfo {
// These CODEX_OSS_ environment variables are experimental: we may
// switch to reading values from config.toml instead.
let codex_oss_base_url = match std::env::var("CODEX_OSS_BASE_URL")
@@ -330,22 +340,21 @@ pub fn create_oss_provider() -> ModelProviderInfo {
port = std::env::var("CODEX_OSS_PORT")
.ok()
.filter(|v| !v.trim().is_empty())
.and_then(|v| v.parse::<u32>().ok())
.unwrap_or(DEFAULT_OLLAMA_PORT)
.and_then(|v| v.parse::<u16>().ok())
.unwrap_or(default_provider_port)
),
};
create_oss_provider_with_base_url(&codex_oss_base_url)
create_oss_provider_with_base_url(&codex_oss_base_url, wire_api)
}
pub fn create_oss_provider_with_base_url(base_url: &str) -> ModelProviderInfo {
pub fn create_oss_provider_with_base_url(base_url: &str, wire_api: WireApi) -> ModelProviderInfo {
ModelProviderInfo {
name: "gpt-oss".into(),
base_url: Some(base_url.into()),
env_key: None,
env_key_instructions: None,
experimental_bearer_token: None,
wire_api: WireApi::Chat,
wire_api,
query_params: None,
http_headers: None,
env_http_headers: None,

View File

@@ -1,7 +1,7 @@
use crate::client::http::originator;
use crate::config::Config;
use crate::config::types::OtelExporterKind as Kind;
use crate::config::types::OtelHttpProtocol as Protocol;
use crate::default_client::originator;
use codex_otel::config::OtelExporter;
use codex_otel::config::OtelHttpProtocol;
use codex_otel::config::OtelSettings;

View File

@@ -6,7 +6,7 @@ use shlex::split as shlex_split;
use shlex::try_join as shlex_try_join;
use std::path::PathBuf;
fn shlex_join(tokens: &[String]) -> String {
pub fn shlex_join(tokens: &[String]) -> String {
shlex_try_join(tokens.iter().map(String::as_str))
.unwrap_or_else(|_| "<command included NUL byte>".to_string())
}

View File

@@ -72,6 +72,8 @@ pub(crate) fn should_persist_event_msg(ev: &EventMsg) -> bool {
| EventMsg::GetHistoryEntryResponse(_)
| EventMsg::UndoStarted(_)
| EventMsg::McpListToolsResponse(_)
| EventMsg::McpStartupUpdate(_)
| EventMsg::McpStartupComplete(_)
| EventMsg::ListCustomPromptsResponse(_)
| EventMsg::PlanUpdate(_)
| EventMsg::ShutdownComplete

View File

@@ -23,8 +23,8 @@ use super::list::ConversationsPage;
use super::list::Cursor;
use super::list::get_conversations;
use super::policy::is_persisted_response_item;
use crate::client::http::originator;
use crate::config::Config;
use crate::default_client::originator;
use crate::git_info::collect_git_info;
use codex_protocol::protocol::InitialHistory;
use codex_protocol::protocol::ResumedHistory;

View File

@@ -6,9 +6,9 @@ use std::time::Instant;
use crate::AuthManager;
use crate::ModelProviderInfo;
use crate::ResponseEvent;
use crate::client::ModelClient;
use crate::client_common::Prompt;
use crate::client_common::ResponseEvent;
use crate::config::Config;
use crate::protocol::SandboxPolicy;
use askama::Template;

View File

@@ -8,9 +8,12 @@ use crate::unified_exec::UnifiedExecSessionManager;
use crate::user_notification::UserNotifier;
use codex_otel::otel_event_manager::OtelEventManager;
use tokio::sync::Mutex;
use tokio::sync::RwLock;
use tokio_util::sync::CancellationToken;
pub(crate) struct SessionServices {
pub(crate) mcp_connection_manager: McpConnectionManager,
pub(crate) mcp_connection_manager: Arc<RwLock<McpConnectionManager>>,
pub(crate) mcp_startup_cancellation_token: CancellationToken,
pub(crate) unified_exec_manager: UnifiedExecSessionManager,
pub(crate) notifier: UserNotifier,
pub(crate) rollout: Mutex<Option<RolloutRecorder>>,

View File

@@ -65,22 +65,24 @@ impl SessionTask for UserShellCommandTask {
// allows commands that use shell features (pipes, &&, redirects, etc.).
// We do not source rc files or otherwise reformat the script.
let use_login_shell = true;
let shell_invocation = session
let command = session
.user_shell()
.derive_exec_args(&self.command, use_login_shell);
let call_id = Uuid::new_v4().to_string();
let raw_command = self.command.clone();
let cwd = turn_context.cwd.clone();
let parsed_cmd = parse_command(&shell_invocation);
let parsed_cmd = parse_command(&command);
session
.send_event(
turn_context.as_ref(),
EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
call_id: call_id.clone(),
command: shell_invocation.clone(),
cwd: turn_context.cwd.clone(),
parsed_cmd,
turn_id: turn_context.sub_id.clone(),
command: command.clone(),
cwd: cwd.clone(),
parsed_cmd: parsed_cmd.clone(),
source: ExecCommandSource::UserShell,
interaction_input: None,
}),
@@ -88,8 +90,8 @@ impl SessionTask for UserShellCommandTask {
.await;
let exec_env = ExecEnv {
command: shell_invocation,
cwd: turn_context.cwd.clone(),
command: command.clone(),
cwd: cwd.clone(),
env: create_env(&turn_context.shell_environment_policy),
timeout_ms: None,
sandbox: SandboxType::None,
@@ -129,6 +131,12 @@ impl SessionTask for UserShellCommandTask {
turn_context.as_ref(),
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
call_id,
turn_id: turn_context.sub_id.clone(),
command: command.clone(),
cwd: cwd.clone(),
parsed_cmd: parsed_cmd.clone(),
source: ExecCommandSource::UserShell,
interaction_input: None,
stdout: String::new(),
stderr: aborted_message.clone(),
aggregated_output: aborted_message.clone(),
@@ -145,6 +153,12 @@ impl SessionTask for UserShellCommandTask {
turn_context.as_ref(),
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
call_id: call_id.clone(),
turn_id: turn_context.sub_id.clone(),
command: command.clone(),
cwd: cwd.clone(),
parsed_cmd: parsed_cmd.clone(),
source: ExecCommandSource::UserShell,
interaction_input: None,
stdout: output.stdout.text.clone(),
stderr: output.stderr.text.clone(),
aggregated_output: output.aggregated_output.text.clone(),
@@ -176,6 +190,12 @@ impl SessionTask for UserShellCommandTask {
turn_context.as_ref(),
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
call_id,
turn_id: turn_context.sub_id.clone(),
command,
cwd,
parsed_cmd,
source: ExecCommandSource::UserShell,
interaction_input: None,
stdout: exec_output.stdout.text.clone(),
stderr: exec_output.stderr.text.clone(),
aggregated_output: exec_output.aggregated_output.text.clone(),

View File

@@ -15,6 +15,7 @@ use crate::protocol::PatchApplyEndEvent;
use crate::protocol::TurnDiffEvent;
use crate::tools::context::SharedTurnDiffTracker;
use crate::tools::sandboxing::ToolError;
use codex_protocol::parse_command::ParsedCommand;
use std::collections::HashMap;
use std::path::Path;
use std::path::PathBuf;
@@ -61,6 +62,7 @@ pub(crate) async fn emit_exec_command_begin(
ctx: ToolEventCtx<'_>,
command: &[String],
cwd: &Path,
parsed_cmd: &[ParsedCommand],
source: ExecCommandSource,
interaction_input: Option<String>,
) {
@@ -69,9 +71,10 @@ pub(crate) async fn emit_exec_command_begin(
ctx.turn,
EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
call_id: ctx.call_id.to_string(),
turn_id: ctx.turn.sub_id.clone(),
command: command.to_vec(),
cwd: cwd.to_path_buf(),
parsed_cmd: parse_command(command),
parsed_cmd: parsed_cmd.to_vec(),
source,
interaction_input,
}),
@@ -84,6 +87,7 @@ pub(crate) enum ToolEmitter {
command: Vec<String>,
cwd: PathBuf,
source: ExecCommandSource,
parsed_cmd: Vec<ParsedCommand>,
},
ApplyPatch {
changes: HashMap<PathBuf, FileChange>,
@@ -94,15 +98,18 @@ pub(crate) enum ToolEmitter {
cwd: PathBuf,
source: ExecCommandSource,
interaction_input: Option<String>,
parsed_cmd: Vec<ParsedCommand>,
},
}
impl ToolEmitter {
pub fn shell(command: Vec<String>, cwd: PathBuf, source: ExecCommandSource) -> Self {
let parsed_cmd = parse_command(&command);
Self::Shell {
command,
cwd,
source,
parsed_cmd,
}
}
@@ -119,11 +126,13 @@ impl ToolEmitter {
source: ExecCommandSource,
interaction_input: Option<String>,
) -> Self {
let parsed_cmd = parse_command(command);
Self::UnifiedExec {
command: command.to_vec(),
cwd,
source,
interaction_input,
parsed_cmd,
}
}
@@ -134,44 +143,14 @@ impl ToolEmitter {
command,
cwd,
source,
parsed_cmd,
},
ToolEventStage::Begin,
stage,
) => {
emit_exec_command_begin(ctx, command, cwd.as_path(), *source, None).await;
}
(Self::Shell { .. }, ToolEventStage::Success(output)) => {
emit_exec_end(
emit_exec_stage(
ctx,
output.stdout.text.clone(),
output.stderr.text.clone(),
output.aggregated_output.text.clone(),
output.exit_code,
output.duration,
format_exec_output_str(&output),
)
.await;
}
(Self::Shell { .. }, ToolEventStage::Failure(ToolEventFailure::Output(output))) => {
emit_exec_end(
ctx,
output.stdout.text.clone(),
output.stderr.text.clone(),
output.aggregated_output.text.clone(),
output.exit_code,
output.duration,
format_exec_output_str(&output),
)
.await;
}
(Self::Shell { .. }, ToolEventStage::Failure(ToolEventFailure::Message(message))) => {
emit_exec_end(
ctx,
String::new(),
(*message).to_string(),
(*message).to_string(),
-1,
Duration::ZERO,
message.clone(),
ExecCommandInput::new(command, cwd.as_path(), parsed_cmd, *source, None),
stage,
)
.await;
}
@@ -231,57 +210,20 @@ impl ToolEmitter {
cwd,
source,
interaction_input,
parsed_cmd,
},
ToolEventStage::Begin,
stage,
) => {
emit_exec_command_begin(
emit_exec_stage(
ctx,
command,
cwd.as_path(),
*source,
interaction_input.clone(),
)
.await;
}
(Self::UnifiedExec { .. }, ToolEventStage::Success(output)) => {
emit_exec_end(
ctx,
output.stdout.text.clone(),
output.stderr.text.clone(),
output.aggregated_output.text.clone(),
output.exit_code,
output.duration,
format_exec_output_str(&output),
)
.await;
}
(
Self::UnifiedExec { .. },
ToolEventStage::Failure(ToolEventFailure::Output(output)),
) => {
emit_exec_end(
ctx,
output.stdout.text.clone(),
output.stderr.text.clone(),
output.aggregated_output.text.clone(),
output.exit_code,
output.duration,
format_exec_output_str(&output),
)
.await;
}
(
Self::UnifiedExec { .. },
ToolEventStage::Failure(ToolEventFailure::Message(message)),
) => {
emit_exec_end(
ctx,
String::new(),
(*message).to_string(),
(*message).to_string(),
-1,
Duration::ZERO,
message.clone(),
ExecCommandInput::new(
command,
cwd.as_path(),
parsed_cmd,
*source,
interaction_input.as_deref(),
),
stage,
)
.await;
}
@@ -340,26 +282,107 @@ impl ToolEmitter {
}
}
async fn emit_exec_end(
ctx: ToolEventCtx<'_>,
struct ExecCommandInput<'a> {
command: &'a [String],
cwd: &'a Path,
parsed_cmd: &'a [ParsedCommand],
source: ExecCommandSource,
interaction_input: Option<&'a str>,
}
impl<'a> ExecCommandInput<'a> {
fn new(
command: &'a [String],
cwd: &'a Path,
parsed_cmd: &'a [ParsedCommand],
source: ExecCommandSource,
interaction_input: Option<&'a str>,
) -> Self {
Self {
command,
cwd,
parsed_cmd,
source,
interaction_input,
}
}
}
struct ExecCommandResult {
stdout: String,
stderr: String,
aggregated_output: String,
exit_code: i32,
duration: Duration,
formatted_output: String,
}
async fn emit_exec_stage(
ctx: ToolEventCtx<'_>,
exec_input: ExecCommandInput<'_>,
stage: ToolEventStage,
) {
match stage {
ToolEventStage::Begin => {
emit_exec_command_begin(
ctx,
exec_input.command,
exec_input.cwd,
exec_input.parsed_cmd,
exec_input.source,
exec_input.interaction_input.map(str::to_owned),
)
.await;
}
ToolEventStage::Success(output)
| ToolEventStage::Failure(ToolEventFailure::Output(output)) => {
let exec_result = ExecCommandResult {
stdout: output.stdout.text.clone(),
stderr: output.stderr.text.clone(),
aggregated_output: output.aggregated_output.text.clone(),
exit_code: output.exit_code,
duration: output.duration,
formatted_output: format_exec_output_str(&output),
};
emit_exec_end(ctx, exec_input, exec_result).await;
}
ToolEventStage::Failure(ToolEventFailure::Message(message)) => {
let text = message.to_string();
let exec_result = ExecCommandResult {
stdout: String::new(),
stderr: text.clone(),
aggregated_output: text.clone(),
exit_code: -1,
duration: Duration::ZERO,
formatted_output: text,
};
emit_exec_end(ctx, exec_input, exec_result).await;
}
}
}
async fn emit_exec_end(
ctx: ToolEventCtx<'_>,
exec_input: ExecCommandInput<'_>,
exec_result: ExecCommandResult,
) {
ctx.session
.send_event(
ctx.turn,
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
call_id: ctx.call_id.to_string(),
stdout,
stderr,
aggregated_output,
exit_code,
duration,
formatted_output,
turn_id: ctx.turn.sub_id.clone(),
command: exec_input.command.to_vec(),
cwd: exec_input.cwd.to_path_buf(),
parsed_cmd: exec_input.parsed_cmd.to_vec(),
source: exec_input.source,
interaction_input: exec_input.interaction_input.map(str::to_owned),
stdout: exec_result.stdout,
stderr: exec_result.stderr,
aggregated_output: exec_result.aggregated_output,
exit_code: exec_result.exit_code,
duration: exec_result.duration,
formatted_output: exec_result.formatted_output,
}),
)
.await;

View File

@@ -3,10 +3,10 @@ use std::collections::BTreeMap;
use crate::apply_patch;
use crate::apply_patch::InternalApplyPatchInvocation;
use crate::apply_patch::convert_apply_patch_to_protocol;
use crate::client::FreeformTool;
use crate::client::FreeformToolFormat;
use crate::client::ResponsesApiTool;
use crate::client::ToolSpec;
use crate::client_common::tools::FreeformTool;
use crate::client_common::tools::FreeformToolFormat;
use crate::client_common::tools::ResponsesApiTool;
use crate::client_common::tools::ToolSpec;
use crate::function_tool::FunctionCallError;
use crate::tools::context::ToolInvocation;
use crate::tools::context::ToolOutput;

View File

@@ -287,6 +287,8 @@ async fn handle_list_resources(
let resources = session
.services
.mcp_connection_manager
.read()
.await
.list_all_resources()
.await;
Ok(ListResourcesPayload::from_all_servers(resources))
@@ -396,6 +398,8 @@ async fn handle_list_resource_templates(
let templates = session
.services
.mcp_connection_manager
.read()
.await
.list_all_resource_templates()
.await;
Ok(ListResourceTemplatesPayload::from_all_servers(templates))

View File

@@ -1,5 +1,5 @@
use crate::client::ResponsesApiTool;
use crate::client::ToolSpec;
use crate::client_common::tools::ResponsesApiTool;
use crate::client_common::tools::ToolSpec;
use crate::codex::Session;
use crate::codex::TurnContext;
use crate::function_tool::FunctionCallError;

View File

@@ -9,6 +9,7 @@ use crate::apply_patch::convert_apply_patch_to_protocol;
use crate::codex::TurnContext;
use crate::exec::ExecParams;
use crate::exec_env::create_env;
use crate::exec_policy::approval_requirement_for_command;
use crate::function_tool::FunctionCallError;
use crate::is_safe_command::is_known_safe_command;
use crate::protocol::ExecCommandSource;
@@ -24,6 +25,7 @@ use crate::tools::runtimes::apply_patch::ApplyPatchRequest;
use crate::tools::runtimes::apply_patch::ApplyPatchRuntime;
use crate::tools::runtimes::shell::ShellRequest;
use crate::tools::runtimes::shell::ShellRuntime;
use crate::tools::sandboxing::ApprovalRequirement;
use crate::tools::sandboxing::ToolCtx;
pub struct ShellHandler;
@@ -303,6 +305,17 @@ impl ShellHandler {
env: exec_params.env.clone(),
with_escalated_permissions: exec_params.with_escalated_permissions,
justification: exec_params.justification.clone(),
approval_requirement: if is_user_shell_command {
ApprovalRequirement::Skip
} else {
approval_requirement_for_command(
turn.exec_policy_v2.as_deref(),
&exec_params.command,
turn.approval_policy,
&turn.sandbox_policy,
exec_params.with_escalated_permissions.unwrap_or(false),
)
},
};
let mut orchestrator = ToolOrchestrator::new();
let mut runtime = ShellRuntime::new();

View File

@@ -11,11 +11,13 @@ use crate::error::get_error_message_ui;
use crate::exec::ExecToolCallOutput;
use crate::sandboxing::SandboxManager;
use crate::tools::sandboxing::ApprovalCtx;
use crate::tools::sandboxing::ApprovalRequirement;
use crate::tools::sandboxing::ProvidesSandboxRetryData;
use crate::tools::sandboxing::SandboxAttempt;
use crate::tools::sandboxing::ToolCtx;
use crate::tools::sandboxing::ToolError;
use crate::tools::sandboxing::ToolRuntime;
use crate::tools::sandboxing::default_approval_requirement;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::ReviewDecision;
@@ -49,40 +51,52 @@ impl ToolOrchestrator {
let otel_cfg = codex_otel::otel_event_manager::ToolDecisionSource::Config;
// 1) Approval
let needs_initial_approval =
tool.wants_initial_approval(req, approval_policy, &turn_ctx.sandbox_policy);
let mut already_approved = false;
if needs_initial_approval {
let mut risk = None;
if let Some(metadata) = req.sandbox_retry_data() {
risk = tool_ctx
.session
.assess_sandbox_command(turn_ctx, &tool_ctx.call_id, &metadata.command, None)
.await;
let requirement = tool.approval_requirement(req).unwrap_or_else(|| {
default_approval_requirement(approval_policy, &turn_ctx.sandbox_policy)
});
match requirement {
ApprovalRequirement::Skip => {
otel.tool_decision(otel_tn, otel_ci, ReviewDecision::Approved, otel_cfg);
}
ApprovalRequirement::Forbidden { reason } => {
return Err(ToolError::Rejected(reason));
}
ApprovalRequirement::NeedsApproval { reason } => {
let mut risk = None;
let approval_ctx = ApprovalCtx {
session: tool_ctx.session,
turn: turn_ctx,
call_id: &tool_ctx.call_id,
retry_reason: None,
risk,
};
let decision = tool.start_approval_async(req, approval_ctx).await;
otel.tool_decision(otel_tn, otel_ci, decision, otel_user.clone());
match decision {
ReviewDecision::Denied | ReviewDecision::Abort => {
return Err(ToolError::Rejected("rejected by user".to_string()));
if let Some(metadata) = req.sandbox_retry_data() {
risk = tool_ctx
.session
.assess_sandbox_command(
turn_ctx,
&tool_ctx.call_id,
&metadata.command,
None,
)
.await;
}
ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {}
let approval_ctx = ApprovalCtx {
session: tool_ctx.session,
turn: turn_ctx,
call_id: &tool_ctx.call_id,
retry_reason: reason,
risk,
};
let decision = tool.start_approval_async(req, approval_ctx).await;
otel.tool_decision(otel_tn, otel_ci, decision, otel_user.clone());
match decision {
ReviewDecision::Denied | ReviewDecision::Abort => {
return Err(ToolError::Rejected("rejected by user".to_string()));
}
ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {}
}
already_approved = true;
}
already_approved = true;
} else {
otel.tool_decision(otel_tn, otel_ci, ReviewDecision::Approved, otel_cfg);
}
// 2) First attempt under the selected sandbox.

View File

@@ -2,7 +2,7 @@ use std::collections::HashMap;
use std::sync::Arc;
use std::time::Duration;
use crate::client::ToolSpec;
use crate::client_common::tools::ToolSpec;
use crate::function_tool::FunctionCallError;
use crate::tools::context::ToolInvocation;
use crate::tools::context::ToolOutput;

View File

@@ -1,7 +1,7 @@
use std::collections::HashMap;
use std::sync::Arc;
use crate::client::ToolSpec;
use crate::client_common::tools::ToolSpec;
use crate::codex::Session;
use crate::codex::TurnContext;
use crate::function_tool::FunctionCallError;
@@ -54,7 +54,7 @@ impl ToolRouter {
.any(|config| config.spec.name() == tool_name)
}
pub fn build_tool_call(
pub async fn build_tool_call(
session: &Session,
item: ResponseItem,
) -> Result<Option<ToolCall>, FunctionCallError> {
@@ -65,7 +65,7 @@ impl ToolRouter {
call_id,
..
} => {
if let Some((server, tool)) = session.parse_mcp_tool_name(&name) {
if let Some((server, tool)) = session.parse_mcp_tool_name(&name).await {
Ok(Some(ToolCall {
tool_name: name,
call_id,

View File

@@ -4,13 +4,12 @@ Runtime: shell
Executes shell requests under the orchestrator: asks for approval when needed,
builds a CommandSpec, and runs it under the current SandboxAttempt.
*/
use crate::command_safety::is_dangerous_command::requires_initial_appoval;
use crate::exec::ExecToolCallOutput;
use crate::protocol::SandboxPolicy;
use crate::sandboxing::execute_env;
use crate::tools::runtimes::build_command_spec;
use crate::tools::sandboxing::Approvable;
use crate::tools::sandboxing::ApprovalCtx;
use crate::tools::sandboxing::ApprovalRequirement;
use crate::tools::sandboxing::ProvidesSandboxRetryData;
use crate::tools::sandboxing::SandboxAttempt;
use crate::tools::sandboxing::SandboxRetryData;
@@ -20,7 +19,6 @@ use crate::tools::sandboxing::ToolCtx;
use crate::tools::sandboxing::ToolError;
use crate::tools::sandboxing::ToolRuntime;
use crate::tools::sandboxing::with_cached_approval;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::ReviewDecision;
use futures::future::BoxFuture;
use std::path::PathBuf;
@@ -33,6 +31,7 @@ pub struct ShellRequest {
pub env: std::collections::HashMap<String, String>,
pub with_escalated_permissions: Option<bool>,
pub justification: Option<String>,
pub approval_requirement: ApprovalRequirement,
}
impl ProvidesSandboxRetryData for ShellRequest {
@@ -114,18 +113,8 @@ impl Approvable<ShellRequest> for ShellRuntime {
})
}
fn wants_initial_approval(
&self,
req: &ShellRequest,
policy: AskForApproval,
sandbox_policy: &SandboxPolicy,
) -> bool {
requires_initial_appoval(
policy,
sandbox_policy,
&req.command,
req.with_escalated_permissions.unwrap_or(false),
)
fn approval_requirement(&self, req: &ShellRequest) -> Option<ApprovalRequirement> {
Some(req.approval_requirement.clone())
}
fn wants_escalated_first_attempt(&self, req: &ShellRequest) -> bool {

View File

@@ -1,4 +1,3 @@
use crate::command_safety::is_dangerous_command::requires_initial_appoval;
/*
Runtime: unified exec
@@ -10,6 +9,7 @@ use crate::error::SandboxErr;
use crate::tools::runtimes::build_command_spec;
use crate::tools::sandboxing::Approvable;
use crate::tools::sandboxing::ApprovalCtx;
use crate::tools::sandboxing::ApprovalRequirement;
use crate::tools::sandboxing::ProvidesSandboxRetryData;
use crate::tools::sandboxing::SandboxAttempt;
use crate::tools::sandboxing::SandboxRetryData;
@@ -22,9 +22,7 @@ use crate::tools::sandboxing::with_cached_approval;
use crate::unified_exec::UnifiedExecError;
use crate::unified_exec::UnifiedExecSession;
use crate::unified_exec::UnifiedExecSessionManager;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::ReviewDecision;
use codex_protocol::protocol::SandboxPolicy;
use futures::future::BoxFuture;
use std::collections::HashMap;
use std::path::PathBuf;
@@ -36,6 +34,7 @@ pub struct UnifiedExecRequest {
pub env: HashMap<String, String>,
pub with_escalated_permissions: Option<bool>,
pub justification: Option<String>,
pub approval_requirement: ApprovalRequirement,
}
impl ProvidesSandboxRetryData for UnifiedExecRequest {
@@ -65,6 +64,7 @@ impl UnifiedExecRequest {
env: HashMap<String, String>,
with_escalated_permissions: Option<bool>,
justification: Option<String>,
approval_requirement: ApprovalRequirement,
) -> Self {
Self {
command,
@@ -72,6 +72,7 @@ impl UnifiedExecRequest {
env,
with_escalated_permissions,
justification,
approval_requirement,
}
}
}
@@ -129,18 +130,8 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
})
}
fn wants_initial_approval(
&self,
req: &UnifiedExecRequest,
policy: AskForApproval,
sandbox_policy: &SandboxPolicy,
) -> bool {
requires_initial_appoval(
policy,
sandbox_policy,
&req.command,
req.with_escalated_permissions.unwrap_or(false),
)
fn approval_requirement(&self, req: &UnifiedExecRequest) -> Option<ApprovalRequirement> {
Some(req.approval_requirement.clone())
}
fn wants_escalated_first_attempt(&self, req: &UnifiedExecRequest) -> bool {

View File

@@ -86,6 +86,34 @@ pub(crate) struct ApprovalCtx<'a> {
pub risk: Option<SandboxCommandAssessment>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) enum ApprovalRequirement {
Skip,
NeedsApproval { reason: Option<String> },
Forbidden { reason: String },
}
/// Reflects the orchestrator's behavior (pre-refactor):
/// - Never, OnFailure: do not ask
/// - OnRequest: ask unless sandbox policy is DangerFullAccess
/// - UnlessTrusted: always ask
pub(crate) fn default_approval_requirement(
policy: AskForApproval,
sandbox_policy: &SandboxPolicy,
) -> ApprovalRequirement {
let needs_approval = match policy {
AskForApproval::Never | AskForApproval::OnFailure => false,
AskForApproval::OnRequest => !matches!(sandbox_policy, SandboxPolicy::DangerFullAccess),
AskForApproval::UnlessTrusted => true,
};
if needs_approval {
ApprovalRequirement::NeedsApproval { reason: None }
} else {
ApprovalRequirement::Skip
}
}
pub(crate) trait Approvable<Req> {
type ApprovalKey: Hash + Eq + Clone + Debug + Serialize;
@@ -106,22 +134,11 @@ pub(crate) trait Approvable<Req> {
matches!(policy, AskForApproval::Never)
}
/// Decide whether an initial user approval should be requested before the
/// first attempt. Defaults to the orchestrator's behavior (prerefactor):
/// - Never, OnFailure: do not ask
/// - OnRequest: ask unless sandbox policy is DangerFullAccess
/// - UnlessTrusted: always ask
fn wants_initial_approval(
&self,
_req: &Req,
policy: AskForApproval,
sandbox_policy: &SandboxPolicy,
) -> bool {
match policy {
AskForApproval::Never | AskForApproval::OnFailure => false,
AskForApproval::OnRequest => !matches!(sandbox_policy, SandboxPolicy::DangerFullAccess),
AskForApproval::UnlessTrusted => true,
}
/// Override the default approval requirement. Return `Some(_)` to specify
/// a custom requirement, or `None` to fall back to
/// policy-based default.
fn approval_requirement(&self, _req: &Req) -> Option<ApprovalRequirement> {
None
}
/// Decide we can request an approval for no-sandbox execution.

View File

@@ -1,5 +1,5 @@
use crate::client::ResponsesApiTool;
use crate::client::ToolSpec;
use crate::client_common::tools::ResponsesApiTool;
use crate::client_common::tools::ToolSpec;
use crate::features::Feature;
use crate::features::Features;
use crate::model_family::ModelFamily;
@@ -20,6 +20,11 @@ pub enum ConfigShellToolType {
Default,
Local,
UnifiedExec,
/// Do not include a shell tool by default. Useful when using Codex
/// with tools provided exclusively provided by MCP servers. Often used
/// with `--config base_instructions=CUSTOM_INSTRUCTIONS`
/// to customize agent behavior.
Disabled,
/// Takes a command as a single string to be run in the user's default shell.
ShellCommand,
}
@@ -48,7 +53,9 @@ impl ToolsConfig {
let include_web_search_request = features.enabled(Feature::WebSearchRequest);
let include_view_image_tool = features.enabled(Feature::ViewImageTool);
let shell_type = if features.enabled(Feature::UnifiedExec) {
let shell_type = if !features.enabled(Feature::ShellTool) {
ConfigShellToolType::Disabled
} else if features.enabled(Feature::UnifiedExec) {
ConfigShellToolType::UnifiedExec
} else if features.enabled(Feature::ShellCommandTool) {
ConfigShellToolType::ShellCommand
@@ -294,9 +301,26 @@ fn create_shell_tool() -> ToolSpec {
},
);
let description = if cfg!(windows) {
r#"Runs a Powershell command (Windows) and returns its output. Arguments to `shell` will be passed to CreateProcessW(). Most commands should be prefixed with ["powershell.exe", "-Command"].
Examples of valid command strings:
- ls -a (show hidden): ["powershell.exe", "-Command", "Get-ChildItem -Force"]
- recursive find by name: ["powershell.exe", "-Command", "Get-ChildItem -Recurse -Filter *.py"]
- recursive grep: ["powershell.exe", "-Command", "Get-ChildItem -Path C:\\myrepo -Recurse | Select-String -Pattern 'TODO' -CaseSensitive"]
- ps aux | grep python: ["powershell.exe", "-Command", "Get-Process | Where-Object { $_.ProcessName -like '*python*' }"]
- setting an env var: ["powershell.exe", "-Command", "$env:FOO='bar'; echo $env:FOO"]
- running an inline Python script: ["powershell.exe", "-Command", "@'\\nprint('Hello, world!')\\n'@ | python -"]"#
} else {
r#"Runs a shell command and returns its output.
- The arguments to `shell` will be passed to execvp(). Most terminal commands should be prefixed with ["bash", "-lc"].
- Always set the `workdir` param when using the shell function. Do not use `cd` unless absolutely necessary."#
}.to_string();
ToolSpec::Function(ResponsesApiTool {
name: "shell".to_string(),
description: "Runs a shell command and returns its output.".to_string(),
description,
strict: false,
parameters: JsonSchema::Object {
properties,
@@ -341,9 +365,25 @@ fn create_shell_command_tool() -> ToolSpec {
},
);
let description = if cfg!(windows) {
r#"Runs a Powershell command (Windows) and returns its output.
Examples of valid command strings:
- ls -a (show hidden): "Get-ChildItem -Force"
- recursive find by name: "Get-ChildItem -Recurse -Filter *.py"
- recursive grep: "Get-ChildItem -Path C:\\myrepo -Recurse | Select-String -Pattern 'TODO' -CaseSensitive"
- ps aux | grep python: "Get-Process | Where-Object { $_.ProcessName -like '*python*' }"
- setting an env var: "$env:FOO='bar'; echo $env:FOO"
- running an inline Python script: "@'\\nprint('Hello, world!')\\n'@ | python -"#
} else {
r#"Runs a shell command and returns its output.
- Always set the `workdir` param when using the shell_command function. Do not use `cd` unless absolutely necessary."#
}.to_string();
ToolSpec::Function(ResponsesApiTool {
name: "shell_command".to_string(),
description: "Runs a shell command string and returns its output.".to_string(),
description,
strict: false,
parameters: JsonSchema::Object {
properties,
@@ -973,16 +1013,21 @@ pub(crate) fn build_specs(
builder.register_handler("exec_command", unified_exec_handler.clone());
builder.register_handler("write_stdin", unified_exec_handler);
}
ConfigShellToolType::Disabled => {
// Do nothing.
}
ConfigShellToolType::ShellCommand => {
builder.push_spec(create_shell_command_tool());
}
}
// Always register shell aliases so older prompts remain compatible.
builder.register_handler("shell", shell_handler.clone());
builder.register_handler("container.exec", shell_handler.clone());
builder.register_handler("local_shell", shell_handler);
builder.register_handler("shell_command", shell_command_handler);
if config.shell_type != ConfigShellToolType::Disabled {
// Always register shell aliases so older prompts remain compatible.
builder.register_handler("shell", shell_handler.clone());
builder.register_handler("container.exec", shell_handler.clone());
builder.register_handler("local_shell", shell_handler);
builder.register_handler("shell_command", shell_command_handler);
}
builder.push_spec_with_parallel_support(create_list_mcp_resources_tool(), true);
builder.push_spec_with_parallel_support(create_list_mcp_resource_templates_tool(), true);
@@ -1074,7 +1119,7 @@ pub(crate) fn build_specs(
#[cfg(test)]
mod tests {
use crate::client::FreeformTool;
use crate::client_common::tools::FreeformTool;
use crate::model_family::find_family_for_model;
use crate::tools::registry::ConfiguredToolSpec;
use mcp_types::ToolInputSchema;
@@ -1118,6 +1163,7 @@ mod tests {
ConfigShellToolType::Default => Some("shell"),
ConfigShellToolType::Local => Some("local_shell"),
ConfigShellToolType::UnifiedExec => None,
ConfigShellToolType::Disabled => None,
ConfigShellToolType::ShellCommand => Some("shell_command"),
}
}
@@ -1873,8 +1919,23 @@ mod tests {
};
assert_eq!(name, "shell");
let expected = "Runs a shell command and returns its output.";
assert_eq!(description, expected);
let expected = if cfg!(windows) {
r#"Runs a Powershell command (Windows) and returns its output. Arguments to `shell` will be passed to CreateProcessW(). Most commands should be prefixed with ["powershell.exe", "-Command"].
Examples of valid command strings:
- ls -a (show hidden): ["powershell.exe", "-Command", "Get-ChildItem -Force"]
- recursive find by name: ["powershell.exe", "-Command", "Get-ChildItem -Recurse -Filter *.py"]
- recursive grep: ["powershell.exe", "-Command", "Get-ChildItem -Path C:\\myrepo -Recurse | Select-String -Pattern 'TODO' -CaseSensitive"]
- ps aux | grep python: ["powershell.exe", "-Command", "Get-Process | Where-Object { $_.ProcessName -like '*python*' }"]
- setting an env var: ["powershell.exe", "-Command", "$env:FOO='bar'; echo $env:FOO"]
- running an inline Python script: ["powershell.exe", "-Command", "@'\\nprint('Hello, world!')\\n'@ | python -"]"#
} else {
r#"Runs a shell command and returns its output.
- The arguments to `shell` will be passed to execvp(). Most terminal commands should be prefixed with ["bash", "-lc"].
- Always set the `workdir` param when using the shell function. Do not use `cd` unless absolutely necessary."#
}.to_string();
assert_eq!(description, &expected);
}
#[test]
@@ -1888,8 +1949,22 @@ mod tests {
};
assert_eq!(name, "shell_command");
let expected = "Runs a shell command string and returns its output.";
assert_eq!(description, expected);
let expected = if cfg!(windows) {
r#"Runs a Powershell command (Windows) and returns its output.
Examples of valid command strings:
- ls -a (show hidden): "Get-ChildItem -Force"
- recursive find by name: "Get-ChildItem -Recurse -Filter *.py"
- recursive grep: "Get-ChildItem -Path C:\\myrepo -Recurse | Select-String -Pattern 'TODO' -CaseSensitive"
- ps aux | grep python: "Get-Process | Where-Object { $_.ProcessName -like '*python*' }"
- setting an env var: "$env:FOO='bar'; echo $env:FOO"
- running an inline Python script: "@'\\nprint('Hello, world!')\\n'@ | python -"#.to_string()
} else {
r#"Runs a shell command and returns its output.
- Always set the `workdir` param when using the shell_command function. Do not use `cd` unless absolutely necessary."#.to_string()
};
assert_eq!(description, &expected);
}
#[test]

View File

@@ -11,6 +11,7 @@ use crate::codex::TurnContext;
use crate::exec::ExecToolCallOutput;
use crate::exec::StreamOutput;
use crate::exec_env::create_env;
use crate::exec_policy::approval_requirement_for_command;
use crate::protocol::BackgroundEventEvent;
use crate::protocol::EventMsg;
use crate::protocol::ExecCommandSource;
@@ -444,6 +445,13 @@ impl UnifiedExecSessionManager {
create_env(&context.turn.shell_environment_policy),
with_escalated_permissions,
justification,
approval_requirement_for_command(
context.turn.exec_policy_v2.as_deref(),
command,
context.turn.approval_policy,
&context.turn.sandbox_policy,
with_escalated_permissions.unwrap_or(false),
),
);
let tool_ctx = ToolCtx {
session: context.session.as_ref(),

View File

@@ -38,6 +38,15 @@ pub enum ApplyPatchModelOutput {
ShellViaHeredoc,
}
/// A collection of different ways the model can output an apply_patch call
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum ShellModelOutput {
Shell,
ShellCommand,
LocalShell,
// UnifiedExec has its own set of tests
}
pub struct TestCodexBuilder {
config_mutators: Vec<Box<ConfigMutator>>,
}

View File

@@ -29,7 +29,7 @@ use core_test_support::wait_for_event;
use serde_json::json;
use test_case::test_case;
async fn apply_patch_harness() -> Result<TestCodexHarness> {
pub async fn apply_patch_harness() -> Result<TestCodexHarness> {
apply_patch_harness_with(|_| {}).await
}
@@ -43,7 +43,7 @@ async fn apply_patch_harness_with(
.await
}
async fn mount_apply_patch(
pub async fn mount_apply_patch(
harness: &TestCodexHarness,
call_id: &str,
patch: &str,
@@ -87,8 +87,8 @@ async fn apply_patch_cli_multiple_operations_integration(
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness_with(|config| {
config.model = "gpt-5".to_string();
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is valid");
config.model = "gpt-5.1".to_string();
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is valid");
})
.await?;
@@ -671,8 +671,8 @@ async fn apply_patch_shell_heredoc_with_cd_updates_relative_workdir() -> Result<
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness_with(|config| {
config.model = "gpt-5".to_string();
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is valid");
config.model = "gpt-5.1".to_string();
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is valid");
})
.await?;
@@ -717,8 +717,8 @@ async fn apply_patch_shell_failure_propagates_error_and_skips_diff() -> Result<(
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness_with(|config| {
config.model = "gpt-5".to_string();
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is valid");
config.model = "gpt-5.1".to_string();
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is valid");
})
.await?;
let test = harness.test();

View File

@@ -240,6 +240,10 @@ enum Expectation {
target: TargetPath,
content: &'static str,
},
FileCreatedNoExitCode {
target: TargetPath,
content: &'static str,
},
PatchApplied {
target: TargetPath,
content: &'static str,
@@ -251,12 +255,18 @@ enum Expectation {
NetworkSuccess {
body_contains: &'static str,
},
NetworkSuccessNoExitCode {
body_contains: &'static str,
},
NetworkFailure {
expect_tag: &'static str,
},
CommandSuccess {
stdout_contains: &'static str,
},
CommandSuccessNoExitCode {
stdout_contains: &'static str,
},
CommandFailure {
output_contains: &'static str,
},
@@ -270,8 +280,7 @@ impl Expectation {
assert_eq!(
result.exit_code,
Some(0),
"expected successful exit for {:?}",
path
"expected successful exit for {path:?}"
);
assert!(
result.stdout.contains(content),
@@ -285,6 +294,21 @@ impl Expectation {
);
let _ = fs::remove_file(path);
}
Expectation::FileCreatedNoExitCode { target, content } => {
let (path, _) = target.resolve_for_patch(test);
assert_eq!(result.exit_code, None, "expected no exit code for {path:?}");
assert!(
result.stdout.contains(content),
"stdout missing {content:?}: {}",
result.stdout
);
let file_contents = fs::read_to_string(&path)?;
assert!(
file_contents.contains(content),
"file contents missing {content:?}: {file_contents}"
);
let _ = fs::remove_file(path);
}
Expectation::PatchApplied { target, content } => {
let (path, _) = target.resolve_for_patch(test);
match result.exit_code {
@@ -360,6 +384,23 @@ impl Expectation {
result.stdout
);
}
Expectation::NetworkSuccessNoExitCode { body_contains } => {
assert_eq!(
result.exit_code, None,
"expected no exit code for successful network call: {}",
result.stdout
);
assert!(
result.stdout.contains("OK:"),
"stdout missing OK prefix: {}",
result.stdout
);
assert!(
result.stdout.contains(body_contains),
"stdout missing body text {body_contains:?}: {}",
result.stdout
);
}
Expectation::NetworkFailure { expect_tag } => {
assert_ne!(
result.exit_code,
@@ -391,6 +432,18 @@ impl Expectation {
result.stdout
);
}
Expectation::CommandSuccessNoExitCode { stdout_contains } => {
assert_eq!(
result.exit_code, None,
"expected no exit code for trusted command: {}",
result.stdout
);
assert!(
result.stdout.contains(stdout_contains),
"trusted command stdout missing {stdout_contains:?}: {}",
result.stdout
);
}
Expectation::CommandFailure { output_contains } => {
assert_ne!(
result.exit_code,
@@ -588,13 +641,30 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
expectation: Expectation::FileCreated {
target: TargetPath::OutsideWorkspace("dfa_on_request.txt"),
content: "danger-on-request",
},
},
ScenarioSpec {
name: "danger_full_access_on_request_allows_outside_write_gpt_5_1_no_exit",
approval_policy: OnRequest,
sandbox_policy: SandboxPolicy::DangerFullAccess,
action: ActionKind::WriteFile {
target: TargetPath::OutsideWorkspace("dfa_on_request_5_1.txt"),
content: "danger-on-request",
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::Auto,
expectation: Expectation::FileCreatedNoExitCode {
target: TargetPath::OutsideWorkspace("dfa_on_request_5_1.txt"),
content: "danger-on-request",
},
},
ScenarioSpec {
name: "danger_full_access_on_request_allows_network",
approval_policy: OnRequest,
@@ -605,12 +675,28 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
expectation: Expectation::NetworkSuccess {
body_contains: "danger-network-ok",
},
},
ScenarioSpec {
name: "danger_full_access_on_request_allows_network_gpt_5_1_no_exit",
approval_policy: OnRequest,
sandbox_policy: SandboxPolicy::DangerFullAccess,
action: ActionKind::FetchUrl {
endpoint: "/dfa/network",
response_body: "danger-network-ok",
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::Auto,
expectation: Expectation::NetworkSuccessNoExitCode {
body_contains: "danger-network-ok",
},
},
ScenarioSpec {
name: "trusted_command_unless_trusted_runs_without_prompt",
approval_policy: UnlessTrusted,
@@ -620,12 +706,27 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
expectation: Expectation::CommandSuccess {
stdout_contains: "trusted-unless",
},
},
ScenarioSpec {
name: "trusted_command_unless_trusted_runs_without_prompt_gpt_5_1_no_exit",
approval_policy: UnlessTrusted,
sandbox_policy: SandboxPolicy::DangerFullAccess,
action: ActionKind::RunCommand {
command: &["echo", "trusted-unless"],
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::Auto,
expectation: Expectation::CommandSuccessNoExitCode {
stdout_contains: "trusted-unless",
},
},
ScenarioSpec {
name: "danger_full_access_on_failure_allows_outside_write",
approval_policy: OnFailure,
@@ -636,13 +737,30 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
expectation: Expectation::FileCreated {
target: TargetPath::OutsideWorkspace("dfa_on_failure.txt"),
content: "danger-on-failure",
},
},
ScenarioSpec {
name: "danger_full_access_on_failure_allows_outside_write_gpt_5_1_no_exit",
approval_policy: OnFailure,
sandbox_policy: SandboxPolicy::DangerFullAccess,
action: ActionKind::WriteFile {
target: TargetPath::OutsideWorkspace("dfa_on_failure_5_1.txt"),
content: "danger-on-failure",
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::Auto,
expectation: Expectation::FileCreatedNoExitCode {
target: TargetPath::OutsideWorkspace("dfa_on_failure_5_1.txt"),
content: "danger-on-failure",
},
},
ScenarioSpec {
name: "danger_full_access_unless_trusted_requests_approval",
approval_policy: UnlessTrusted,
@@ -653,7 +771,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
@@ -663,6 +781,26 @@ fn scenarios() -> Vec<ScenarioSpec> {
content: "danger-unless-trusted",
},
},
ScenarioSpec {
name: "danger_full_access_unless_trusted_requests_approval_gpt_5_1_no_exit",
approval_policy: UnlessTrusted,
sandbox_policy: SandboxPolicy::DangerFullAccess,
action: ActionKind::WriteFile {
target: TargetPath::OutsideWorkspace("dfa_unless_trusted_5_1.txt"),
content: "danger-unless-trusted",
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
},
expectation: Expectation::FileCreatedNoExitCode {
target: TargetPath::OutsideWorkspace("dfa_unless_trusted_5_1.txt"),
content: "danger-unless-trusted",
},
},
ScenarioSpec {
name: "danger_full_access_never_allows_outside_write",
approval_policy: Never,
@@ -673,13 +811,30 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
expectation: Expectation::FileCreated {
target: TargetPath::OutsideWorkspace("dfa_never.txt"),
content: "danger-never",
},
},
ScenarioSpec {
name: "danger_full_access_never_allows_outside_write_gpt_5_1_no_exit",
approval_policy: Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
action: ActionKind::WriteFile {
target: TargetPath::OutsideWorkspace("dfa_never_5_1.txt"),
content: "danger-never",
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::Auto,
expectation: Expectation::FileCreatedNoExitCode {
target: TargetPath::OutsideWorkspace("dfa_never_5_1.txt"),
content: "danger-never",
},
},
ScenarioSpec {
name: "read_only_on_request_requires_approval",
approval_policy: OnRequest,
@@ -690,7 +845,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: true,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
@@ -700,6 +855,26 @@ fn scenarios() -> Vec<ScenarioSpec> {
content: "read-only-approval",
},
},
ScenarioSpec {
name: "read_only_on_request_requires_approval_gpt_5_1_no_exit",
approval_policy: OnRequest,
sandbox_policy: SandboxPolicy::ReadOnly,
action: ActionKind::WriteFile {
target: TargetPath::Workspace("ro_on_request_5_1.txt"),
content: "read-only-approval",
},
with_escalated_permissions: true,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
},
expectation: Expectation::FileCreatedNoExitCode {
target: TargetPath::Workspace("ro_on_request_5_1.txt"),
content: "read-only-approval",
},
},
ScenarioSpec {
name: "trusted_command_on_request_read_only_runs_without_prompt",
approval_policy: OnRequest,
@@ -709,12 +884,27 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
expectation: Expectation::CommandSuccess {
stdout_contains: "trusted-read-only",
},
},
ScenarioSpec {
name: "trusted_command_on_request_read_only_runs_without_prompt_gpt_5_1_no_exit",
approval_policy: OnRequest,
sandbox_policy: SandboxPolicy::ReadOnly,
action: ActionKind::RunCommand {
command: &["echo", "trusted-read-only"],
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::Auto,
expectation: Expectation::CommandSuccessNoExitCode {
stdout_contains: "trusted-read-only",
},
},
ScenarioSpec {
name: "read_only_on_request_blocks_network",
approval_policy: OnRequest,
@@ -760,7 +950,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: Some("command failed; retry without sandbox?"),
@@ -770,6 +960,27 @@ fn scenarios() -> Vec<ScenarioSpec> {
content: "read-only-on-failure",
},
},
#[cfg(not(target_os = "linux"))]
ScenarioSpec {
name: "read_only_on_failure_escalates_after_sandbox_error_gpt_5_1_no_exit",
approval_policy: OnFailure,
sandbox_policy: SandboxPolicy::ReadOnly,
action: ActionKind::WriteFile {
target: TargetPath::Workspace("ro_on_failure_5_1.txt"),
content: "read-only-on-failure",
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: Some("command failed; retry without sandbox?"),
},
expectation: Expectation::FileCreatedNoExitCode {
target: TargetPath::Workspace("ro_on_failure_5_1.txt"),
content: "read-only-on-failure",
},
},
ScenarioSpec {
name: "read_only_on_request_network_escalates_when_approved",
approval_policy: OnRequest,
@@ -780,7 +991,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: true,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
@@ -789,6 +1000,25 @@ fn scenarios() -> Vec<ScenarioSpec> {
body_contains: "read-only-network-ok",
},
},
ScenarioSpec {
name: "read_only_on_request_network_escalates_when_approved_gpt_5_1_no_exit",
approval_policy: OnRequest,
sandbox_policy: SandboxPolicy::ReadOnly,
action: ActionKind::FetchUrl {
endpoint: "/ro/network-approved",
response_body: "read-only-network-ok",
},
with_escalated_permissions: true,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
},
expectation: Expectation::NetworkSuccessNoExitCode {
body_contains: "read-only-network-ok",
},
},
ScenarioSpec {
name: "apply_patch_shell_requires_patch_approval",
approval_policy: UnlessTrusted,
@@ -819,7 +1049,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5-codex"),
model_override: Some("gpt-5.1-codex"),
outcome: Outcome::Auto,
expectation: Expectation::PatchApplied {
target: TargetPath::Workspace("apply_patch_function.txt"),
@@ -836,7 +1066,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![Feature::ApplyPatchFreeform],
model_override: Some("gpt-5-codex"),
model_override: Some("gpt-5.1-codex"),
outcome: Outcome::Auto,
expectation: Expectation::PatchApplied {
target: TargetPath::OutsideWorkspace("apply_patch_function_danger.txt"),
@@ -853,7 +1083,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5-codex"),
model_override: Some("gpt-5.1-codex"),
outcome: Outcome::PatchApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
@@ -873,7 +1103,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5-codex"),
model_override: Some("gpt-5.1-codex"),
outcome: Outcome::PatchApproval {
decision: ReviewDecision::Denied,
expected_reason: None,
@@ -913,7 +1143,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5-codex"),
model_override: Some("gpt-5.1-codex"),
outcome: Outcome::PatchApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
@@ -933,7 +1163,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5-codex"),
model_override: Some("gpt-5.1-codex"),
outcome: Outcome::Auto,
expectation: Expectation::FileNotCreated {
target: TargetPath::OutsideWorkspace("apply_patch_function_never.txt"),
@@ -952,7 +1182,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
@@ -962,6 +1192,26 @@ fn scenarios() -> Vec<ScenarioSpec> {
content: "read-only-unless-trusted",
},
},
ScenarioSpec {
name: "read_only_unless_trusted_requires_approval_gpt_5_1_no_exit",
approval_policy: UnlessTrusted,
sandbox_policy: SandboxPolicy::ReadOnly,
action: ActionKind::WriteFile {
target: TargetPath::Workspace("ro_unless_trusted_5_1.txt"),
content: "read-only-unless-trusted",
},
with_escalated_permissions: false,
features: vec![],
model_override: Some("gpt-5.1"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
},
expectation: Expectation::FileCreatedNoExitCode {
target: TargetPath::Workspace("ro_unless_trusted_5_1.txt"),
content: "read-only-unless-trusted",
},
},
ScenarioSpec {
name: "read_only_never_reports_sandbox_failure",
approval_policy: Never,
@@ -992,7 +1242,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
expectation: Expectation::CommandSuccess {
stdout_contains: "trusted-never",
@@ -1008,7 +1258,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
expectation: Expectation::FileCreated {
target: TargetPath::Workspace("ww_on_request.txt"),
@@ -1039,7 +1289,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: true,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
@@ -1059,7 +1309,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
expectation: Expectation::NetworkSuccess {
body_contains: "workspace-network-ok",
@@ -1076,7 +1326,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: Some("command failed; retry without sandbox?"),
@@ -1096,7 +1346,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: None,
@@ -1137,7 +1387,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: false,
features: vec![Feature::UnifiedExec],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::Auto,
expectation: Expectation::CommandSuccess {
stdout_contains: "hello unified exec",
@@ -1155,7 +1405,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
with_escalated_permissions: true,
features: vec![Feature::UnifiedExec],
model_override: None,
model_override: Some("gpt-5"),
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: Some(DEFAULT_UNIFIED_EXEC_JUSTIFICATION),
@@ -1208,7 +1458,7 @@ async fn run_scenario(scenario: &ScenarioSpec) -> Result<()> {
let mut builder = test_codex().with_config(move |config| {
config.approval_policy = approval_policy;
config.sandbox_policy = sandbox_policy.clone();
let model = model_override.unwrap_or("gpt-5");
let model = model_override.unwrap_or("gpt-5.1");
config.model = model.to_string();
config.model_family =
find_family_for_model(model).expect("model should map to a known family");

View File

@@ -769,7 +769,7 @@ async fn configured_verbosity_not_sent_for_models_without_support() -> anyhow::R
let resp_mock = responses::mount_sse_once(&server, sse_completed("resp1")).await;
let TestCodex { codex, .. } = test_codex()
.with_model("gpt-5-codex")
.with_model("gpt-5.1-codex")
.with_config(|config| {
config.model_verbosity = Some(Verbosity::High);
})
@@ -807,7 +807,7 @@ async fn configured_verbosity_is_sent() -> anyhow::Result<()> {
let resp_mock = responses::mount_sse_once(&server, sse_completed("resp1")).await;
let TestCodex { codex, .. } = test_codex()
.with_model("gpt-5")
.with_model("gpt-5.1")
.with_config(|config| {
config.model_verbosity = Some(Verbosity::High);
})
@@ -1155,7 +1155,7 @@ async fn token_count_includes_rate_limits_snapshot() {
"reasoning_output_tokens": 0,
"total_tokens": 123
},
// Default model is gpt-5-codex in tests → 95% usable context window
// Default model is gpt-5.1-codex in tests → 95% usable context window
"model_context_window": 258400
},
"rate_limits": {
@@ -1304,8 +1304,9 @@ async fn context_window_error_sets_total_tokens_to_model_window() -> anyhow::Res
let TestCodex { codex, .. } = test_codex()
.with_config(|config| {
config.model = "gpt-5".to_string();
config.model_family = find_family_for_model("gpt-5").expect("known gpt-5 model family");
config.model = "gpt-5.1".to_string();
config.model_family =
find_family_for_model("gpt-5.1").expect("known gpt-5.1 model family");
config.model_context_window = Some(272_000);
})
.build(&server)

View File

@@ -18,7 +18,6 @@ use codex_core::NewConversation;
use codex_core::built_in_model_providers;
use codex_core::compact::SUMMARIZATION_PROMPT;
use codex_core::config::Config;
use codex_core::config::OPENAI_DEFAULT_MODEL;
use codex_core::protocol::EventMsg;
use codex_core::protocol::Op;
use codex_core::protocol::WarningEvent;
@@ -111,9 +110,10 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
// 1. Arrange mocked SSE responses for the initial compact/resume/fork flow.
let server = MockServer::start().await;
mount_initial_flow(&server).await;
let expected_model = "gpt-5.1-codex";
// 2. Start a new conversation and drive it through the compact/resume/fork steps.
let (_home, config, manager, base) = start_test_conversation(&server).await;
let (_home, config, manager, base) =
start_test_conversation(&server, Some(expected_model)).await;
user_turn(&base, "hello world").await;
compact_conversation(&base).await;
@@ -189,7 +189,6 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
.as_str()
.unwrap_or_default()
.to_string();
let expected_model = OPENAI_DEFAULT_MODEL;
let summary_after_compact = extract_summary_message(&requests[2], SUMMARY_TEXT);
let summary_after_resume = extract_summary_message(&requests[3], SUMMARY_TEXT);
let summary_after_fork = extract_summary_message(&requests[4], SUMMARY_TEXT);
@@ -558,7 +557,7 @@ async fn compact_resume_after_second_compaction_preserves_history() {
mount_second_compact_flow(&server).await;
// 2. Drive the conversation through compact -> resume -> fork -> compact -> resume.
let (_home, config, manager, base) = start_test_conversation(&server).await;
let (_home, config, manager, base) = start_test_conversation(&server, None).await;
user_turn(&base, "hello world").await;
compact_conversation(&base).await;
@@ -808,6 +807,7 @@ async fn mount_second_compact_flow(server: &MockServer) {
async fn start_test_conversation(
server: &MockServer,
model: Option<&str>,
) -> (TempDir, Config, ConversationManager, Arc<CodexConversation>) {
let model_provider = ModelProviderInfo {
base_url: Some(format!("{}/v1", server.uri())),
@@ -817,7 +817,9 @@ async fn start_test_conversation(
let mut config = load_default_config_for_test(&home);
config.model_provider = model_provider;
config.compact_prompt = Some(SUMMARIZATION_PROMPT.to_string());
if let Some(model) = model {
config.model = model.to_string();
}
let manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
let NewConversation { conversation, .. } = manager
.new_conversation(config.clone())

View File

@@ -0,0 +1,97 @@
#![allow(clippy::unwrap_used, clippy::expect_used)]
use anyhow::Result;
use codex_core::features::Feature;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::EventMsg;
use codex_core::protocol::Op;
use codex_core::protocol::SandboxPolicy;
use codex_protocol::config_types::ReasoningSummary;
use codex_protocol::user_input::UserInput;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_function_call;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::mount_sse_once;
use core_test_support::responses::sse;
use core_test_support::responses::start_mock_server;
use core_test_support::test_codex::test_codex;
use core_test_support::wait_for_event;
use serde_json::json;
use std::fs;
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn execpolicy2_blocks_shell_invocation() -> Result<()> {
let mut builder = test_codex().with_config(|config| {
config.features.enable(Feature::ExecPolicyV2);
let policy_path = config.codex_home.join("policy.codexpolicy");
fs::write(
&policy_path,
r#"prefix_rule(pattern=["echo"], decision="forbidden")"#,
)
.expect("write policy file");
});
let server = start_mock_server().await;
let test = builder.build(&server).await?;
let call_id = "shell-forbidden";
let args = json!({
"command": ["echo", "blocked"],
"timeout_ms": 1_000,
});
mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
ev_completed("resp-1"),
]),
)
.await;
mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
)
.await;
let session_model = test.session_configured.model.clone();
test.codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "run shell command".into(),
}],
final_output_json_schema: None,
cwd: test.cwd_path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: session_model,
effort: None,
summary: ReasoningSummary::Auto,
})
.await?;
let EventMsg::ExecCommandEnd(end) = wait_for_event(&test.codex, |event| {
matches!(event, EventMsg::ExecCommandEnd(_))
})
.await
else {
unreachable!()
};
wait_for_event(&test.codex, |event| {
matches!(event, EventMsg::TaskComplete(_))
})
.await;
assert!(
end.aggregated_output
.contains("execpolicy forbids this command"),
"unexpected output: {}",
end.aggregated_output
);
Ok(())
}

View File

@@ -11,7 +11,7 @@ use std::collections::HashSet;
use std::path::Path;
use std::process::Command as StdCommand;
const MODEL_WITH_TOOL: &str = "test-gpt-5-codex";
const MODEL_WITH_TOOL: &str = "test-gpt-5.1-codex";
fn ripgrep_available() -> bool {
StdCommand::new("rg")

View File

@@ -31,12 +31,12 @@ const SCHEMA: &str = r#"
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn codex_returns_json_result_for_gpt5() -> anyhow::Result<()> {
codex_returns_json_result("gpt-5".to_string()).await
codex_returns_json_result("gpt-5.1".to_string()).await
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn codex_returns_json_result_for_gpt5_codex() -> anyhow::Result<()> {
codex_returns_json_result("gpt-5-codex".to_string()).await
codex_returns_json_result("gpt-5.1-codex".to_string()).await
}
async fn codex_returns_json_result(model: String) -> anyhow::Result<()> {

View File

@@ -27,6 +27,7 @@ mod compact;
mod compact_resume_fork;
mod deprecation_notice;
mod exec;
mod execpolicy2;
mod fork_conversation;
mod grep_files;
mod items;

View File

@@ -160,7 +160,7 @@ async fn prompt_tools_are_consistent_across_requests() -> anyhow::Result<()> {
// with the OpenAI schema, so we just verify the tool presence here
let tools_by_model: HashMap<&'static str, Vec<&'static str>> = HashMap::from([
(
"gpt-5",
"gpt-5.1",
vec![
"shell",
"list_mcp_resources",
@@ -183,7 +183,7 @@ async fn prompt_tools_are_consistent_across_requests() -> anyhow::Result<()> {
],
),
(
"gpt-5-codex",
"gpt-5.1-codex",
vec![
"shell",
"list_mcp_resources",

View File

@@ -364,7 +364,7 @@ async fn review_uses_custom_review_model_from_config() {
// Choose a review model different from the main model; ensure it is used.
let codex = new_conversation_for_server(&server, &codex_home, |cfg| {
cfg.model = "gpt-4.1".to_string();
cfg.review_model = "gpt-5".to_string();
cfg.review_model = "gpt-5.1".to_string();
})
.await;
@@ -394,7 +394,7 @@ async fn review_uses_custom_review_model_from_config() {
// Assert the request body model equals the configured review model
let request = &server.received_requests().await.unwrap()[0];
let body = request.body_json::<serde_json::Value>().unwrap();
assert_eq!(body["model"].as_str().unwrap(), "gpt-5");
assert_eq!(body["model"].as_str().unwrap(), "gpt-5.1");
server.verify().await;
}

View File

@@ -1,14 +1,13 @@
#![cfg(not(target_os = "windows"))]
#![allow(clippy::expect_used)]
use anyhow::Result;
use codex_core::features::Feature;
use codex_core::model_family::find_family_for_model;
use codex_core::protocol::SandboxPolicy;
use core_test_support::assert_regex_match;
use core_test_support::responses::ev_apply_patch_function_call;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_custom_tool_call;
use core_test_support::responses::ev_function_call;
use core_test_support::responses::ev_local_shell_call;
use core_test_support::responses::ev_response_created;
@@ -16,12 +15,18 @@ use core_test_support::responses::mount_sse_sequence;
use core_test_support::responses::sse;
use core_test_support::responses::start_mock_server;
use core_test_support::skip_if_no_network;
use core_test_support::test_codex::ApplyPatchModelOutput;
use core_test_support::test_codex::ShellModelOutput;
use core_test_support::test_codex::test_codex;
use pretty_assertions::assert_eq;
use regex_lite::Regex;
use serde_json::Value;
use serde_json::json;
use std::fs;
use test_case::test_case;
use crate::suite::apply_patch_cli::apply_patch_harness;
use crate::suite::apply_patch_cli::mount_apply_patch;
const FIXTURE_JSON: &str = r#"{
"description": "This is an example JSON file.",
@@ -35,34 +40,88 @@ const FIXTURE_JSON: &str = r#"{
}
"#;
fn shell_responses(
call_id: &str,
command: Vec<&str>,
output_type: ShellModelOutput,
) -> Result<Vec<String>> {
match output_type {
ShellModelOutput::ShellCommand => {
let command = shlex::try_join(command)?;
let parameters = json!({
"command": command,
"timeout_ms": 2_000,
});
Ok(vec![
sse(vec![
ev_response_created("resp-1"),
ev_function_call(
call_id,
"shell_command",
&serde_json::to_string(&parameters)?,
),
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
])
}
ShellModelOutput::Shell => {
let parameters = json!({
"command": command,
"timeout_ms": 2_000,
});
Ok(vec![
sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "shell", &serde_json::to_string(&parameters)?),
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
])
}
ShellModelOutput::LocalShell => Ok(vec![
sse(vec![
ev_response_created("resp-1"),
ev_local_shell_call(call_id, "completed", command),
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
]),
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_output_stays_json_without_freeform_apply_patch() -> Result<()> {
#[test_case(ShellModelOutput::Shell)]
#[test_case(ShellModelOutput::ShellCommand)]
#[test_case(ShellModelOutput::LocalShell)]
async fn shell_output_stays_json_without_freeform_apply_patch(
output_type: ShellModelOutput,
) -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
let mut builder = test_codex().with_config(move |config| {
config.features.disable(Feature::ApplyPatchFreeform);
config.model = "gpt-5".to_string();
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a model family");
if matches!(output_type, ShellModelOutput::ShellCommand) {
config.features.enable(Feature::ShellCommandTool);
}
let _ = output_type;
});
let test = builder.build(&server).await?;
let call_id = "shell-json";
let args = json!({
"command": ["/bin/echo", "shell json"],
"timeout_ms": 1_000,
});
let responses = vec![
sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
];
let responses = shell_responses(call_id, vec!["/bin/echo", "shell json"], output_type)?;
let mock = mount_sse_sequence(&server, responses).await;
test.submit_turn_with_policy(
@@ -80,7 +139,6 @@ async fn shell_output_stays_json_without_freeform_apply_patch() -> Result<()> {
let mut parsed: Value = serde_json::from_str(output)?;
if let Some(metadata) = parsed.get_mut("metadata").and_then(Value::as_object_mut) {
// duration_seconds is non-deterministic; remove it for deep equality
let _ = metadata.remove("duration_seconds");
}
@@ -102,31 +160,26 @@ async fn shell_output_stays_json_without_freeform_apply_patch() -> Result<()> {
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_output_is_structured_with_freeform_apply_patch() -> Result<()> {
#[test_case(ShellModelOutput::Shell)]
#[test_case(ShellModelOutput::ShellCommand)]
#[test_case(ShellModelOutput::LocalShell)]
async fn shell_output_is_structured_with_freeform_apply_patch(
output_type: ShellModelOutput,
) -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
let mut builder = test_codex().with_config(move |config| {
config.features.enable(Feature::ApplyPatchFreeform);
if matches!(output_type, ShellModelOutput::ShellCommand) {
config.features.enable(Feature::ShellCommandTool);
}
let _ = output_type;
});
let test = builder.build(&server).await?;
let call_id = "shell-structured";
let args = json!({
"command": ["/bin/echo", "freeform shell"],
"timeout_ms": 1_000,
});
let responses = vec![
sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
];
let responses = shell_responses(call_id, vec!["/bin/echo", "freeform shell"], output_type)?;
let mock = mount_sse_sequence(&server, responses).await;
test.submit_turn_with_policy(
@@ -159,14 +212,23 @@ freeform shell
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_output_preserves_fixture_json_without_serialization() -> Result<()> {
#[test_case(ShellModelOutput::Shell)]
#[test_case(ShellModelOutput::ShellCommand)]
#[test_case(ShellModelOutput::LocalShell)]
async fn shell_output_preserves_fixture_json_without_serialization(
output_type: ShellModelOutput,
) -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
let mut builder = test_codex().with_config(move |config| {
config.features.disable(Feature::ApplyPatchFreeform);
config.model = "gpt-5".to_string();
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a model family");
if matches!(output_type, ShellModelOutput::ShellCommand) {
config.features.enable(Feature::ShellCommandTool);
}
let _ = output_type;
});
let test = builder.build(&server).await?;
@@ -175,21 +237,11 @@ async fn shell_output_preserves_fixture_json_without_serialization() -> Result<(
let fixture_path_str = fixture_path.to_string_lossy().to_string();
let call_id = "shell-json-fixture";
let args = json!({
"command": ["/usr/bin/sed", "-n", "p", fixture_path_str],
"timeout_ms": 1_000,
});
let responses = vec![
sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
];
let responses = shell_responses(
call_id,
vec!["/usr/bin/sed", "-n", "p", fixture_path_str.as_str()],
output_type,
)?;
let mock = mount_sse_sequence(&server, responses).await;
test.submit_turn_with_policy(
@@ -232,12 +284,21 @@ async fn shell_output_preserves_fixture_json_without_serialization() -> Result<(
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_output_structures_fixture_with_serialization() -> Result<()> {
#[test_case(ShellModelOutput::Shell)]
#[test_case(ShellModelOutput::ShellCommand)]
#[test_case(ShellModelOutput::LocalShell)]
async fn shell_output_structures_fixture_with_serialization(
output_type: ShellModelOutput,
) -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
let mut builder = test_codex().with_config(move |config| {
config.features.enable(Feature::ApplyPatchFreeform);
if matches!(output_type, ShellModelOutput::ShellCommand) {
config.features.enable(Feature::ShellCommandTool);
}
let _ = output_type;
});
let test = builder.build(&server).await?;
@@ -246,21 +307,11 @@ async fn shell_output_structures_fixture_with_serialization() -> Result<()> {
let fixture_path_str = fixture_path.to_string_lossy().to_string();
let call_id = "shell-structured-fixture";
let args = json!({
"command": ["/usr/bin/sed", "-n", "p", fixture_path_str],
"timeout_ms": 1_000,
});
let responses = vec![
sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
];
let responses = shell_responses(
call_id,
vec!["/usr/bin/sed", "-n", "p", fixture_path_str.as_str()],
output_type,
)?;
let mock = mount_sse_sequence(&server, responses).await;
test.submit_turn_with_policy(
@@ -298,40 +349,26 @@ async fn shell_output_structures_fixture_with_serialization() -> Result<()> {
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_output_for_freeform_tool_records_duration() -> Result<()> {
#[test_case(ShellModelOutput::Shell)]
#[test_case(ShellModelOutput::ShellCommand)]
#[test_case(ShellModelOutput::LocalShell)]
async fn shell_output_for_freeform_tool_records_duration(
output_type: ShellModelOutput,
) -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
let mut builder = test_codex().with_config(move |config| {
config.include_apply_patch_tool = true;
if matches!(output_type, ShellModelOutput::ShellCommand) {
config.features.enable(Feature::ShellCommandTool);
}
let _ = output_type;
});
let test = builder.build(&server).await?;
#[cfg(target_os = "linux")]
let sleep_cmd = vec!["/bin/bash", "-c", "sleep 1"];
#[cfg(target_os = "macos")]
let sleep_cmd = vec!["/bin/bash", "-c", "sleep 1"];
#[cfg(windows)]
let sleep_cmd = "timeout 1";
let call_id = "shell-structured";
let args = json!({
"command": sleep_cmd,
"timeout_ms": 2_000,
});
let responses = vec![
sse(vec![
json!({"type": "response.created", "response": {"id": "resp-1"}}),
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
];
let responses = shell_responses(call_id, vec!["/bin/bash", "-c", "sleep 1"], output_type)?;
let mock = mount_sse_sequence(&server, responses).await;
test.submit_turn_with_policy(
@@ -371,33 +408,26 @@ $"#;
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_output_reserializes_truncated_content() -> Result<()> {
#[test_case(ShellModelOutput::Shell)]
#[test_case(ShellModelOutput::ShellCommand)]
#[test_case(ShellModelOutput::LocalShell)]
async fn shell_output_reserializes_truncated_content(output_type: ShellModelOutput) -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.model = "gpt-5-codex".to_string();
let mut builder = test_codex().with_config(move |config| {
config.model = "gpt-5.1-codex".to_string();
config.model_family =
find_family_for_model("gpt-5-codex").expect("gpt-5 is a model family");
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
if matches!(output_type, ShellModelOutput::ShellCommand) {
config.features.enable(Feature::ShellCommandTool);
}
let _ = output_type;
});
let test = builder.build(&server).await?;
let call_id = "shell-truncated";
let args = json!({
"command": ["/bin/sh", "-c", "seq 1 400"],
"timeout_ms": 5_000,
});
let responses = vec![
sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
];
let responses = shell_responses(call_id, vec!["/bin/sh", "-c", "seq 1 400"], output_type)?;
let mock = mount_sse_sequence(&server, responses).await;
test.submit_turn_with_policy(
@@ -445,14 +475,16 @@ $"#;
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn apply_patch_custom_tool_output_is_structured() -> Result<()> {
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::Function)]
#[test_case(ApplyPatchModelOutput::Shell)]
#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
async fn apply_patch_custom_tool_output_is_structured(
output_type: ApplyPatchModelOutput,
) -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.include_apply_patch_tool = true;
});
let test = builder.build(&server).await?;
let harness = apply_patch_harness().await?;
let call_id = "apply-patch-structured";
let file_name = "structured.txt";
@@ -463,33 +495,17 @@ async fn apply_patch_custom_tool_output_is_structured() -> Result<()> {
*** End Patch
"#
);
let responses = vec![
sse(vec![
json!({"type": "response.created", "response": {"id": "resp-1"}}),
ev_custom_tool_call(call_id, "apply_patch", &patch),
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
];
let mock = mount_sse_sequence(&server, responses).await;
mount_apply_patch(&harness, call_id, &patch, "done", output_type).await;
test.submit_turn_with_policy(
"apply the patch via custom tool",
SandboxPolicy::DangerFullAccess,
)
.await?;
harness
.test()
.submit_turn_with_policy(
"apply the patch via custom tool",
SandboxPolicy::DangerFullAccess,
)
.await?;
let req = mock
.last_request()
.expect("apply_patch output request recorded");
let output_item = req.custom_tool_call_output(call_id);
let output = output_item
.get("output")
.and_then(Value::as_str)
.expect("apply_patch output string");
let output = harness.apply_patch_output(call_id, output_type).await;
let expected_pattern = format!(
r"(?s)^Exit code: 0
@@ -499,53 +515,39 @@ Success. Updated the following files:
A {file_name}
?$"
);
assert_regex_match(&expected_pattern, output);
assert_regex_match(&expected_pattern, output.as_str());
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn apply_patch_custom_tool_call_creates_file() -> Result<()> {
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::Function)]
#[test_case(ApplyPatchModelOutput::Shell)]
#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
async fn apply_patch_custom_tool_call_creates_file(
output_type: ApplyPatchModelOutput,
) -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.include_apply_patch_tool = true;
});
let test = builder.build(&server).await?;
let harness = apply_patch_harness().await?;
let call_id = "apply-patch-add-file";
let file_name = "custom_tool_apply_patch.txt";
let patch = format!(
"*** Begin Patch\n*** Add File: {file_name}\n+custom tool content\n*** End Patch\n"
);
let responses = vec![
sse(vec![
json!({"type": "response.created", "response": {"id": "resp-1"}}),
ev_custom_tool_call(call_id, "apply_patch", &patch),
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "apply_patch done"),
ev_completed("resp-2"),
]),
];
let mock = mount_sse_sequence(&server, responses).await;
mount_apply_patch(&harness, call_id, &patch, "apply_patch done", output_type).await;
test.submit_turn_with_policy(
"apply the patch via custom tool to create a file",
SandboxPolicy::DangerFullAccess,
)
.await?;
harness
.test()
.submit_turn_with_policy(
"apply the patch via custom tool to create a file",
SandboxPolicy::DangerFullAccess,
)
.await?;
let req = mock
.last_request()
.expect("apply_patch output request recorded");
let output_item = req.custom_tool_call_output(call_id);
let output = output_item
.get("output")
.and_then(Value::as_str)
.expect("apply_patch output string");
let output = harness.apply_patch_output(call_id, output_type).await;
let expected_pattern = format!(
r"(?s)^Exit code: 0
@@ -555,9 +557,9 @@ Success. Updated the following files:
A {file_name}
?$"
);
assert_regex_match(&expected_pattern, output);
assert_regex_match(&expected_pattern, output.as_str());
let new_file_path = test.cwd.path().join(file_name);
let new_file_path = harness.path(file_name);
let created_contents = fs::read_to_string(&new_file_path)?;
assert_eq!(
created_contents, "custom tool content\n",
@@ -568,49 +570,42 @@ A {file_name}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn apply_patch_custom_tool_call_updates_existing_file() -> Result<()> {
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::Function)]
#[test_case(ApplyPatchModelOutput::Shell)]
#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
async fn apply_patch_custom_tool_call_updates_existing_file(
output_type: ApplyPatchModelOutput,
) -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.include_apply_patch_tool = true;
});
let test = builder.build(&server).await?;
let harness = apply_patch_harness().await?;
let call_id = "apply-patch-update-file";
let file_name = "custom_tool_apply_patch_existing.txt";
let file_path = test.cwd.path().join(file_name);
let file_path = harness.path(file_name);
fs::write(&file_path, "before\n")?;
let patch = format!(
"*** Begin Patch\n*** Update File: {file_name}\n@@\n-before\n+after\n*** End Patch\n"
);
let responses = vec![
sse(vec![
json!({"type": "response.created", "response": {"id": "resp-1"}}),
ev_custom_tool_call(call_id, "apply_patch", &patch),
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "apply_patch update done"),
ev_completed("resp-2"),
]),
];
let mock = mount_sse_sequence(&server, responses).await;
test.submit_turn_with_policy(
"apply the patch via custom tool to update a file",
SandboxPolicy::DangerFullAccess,
mount_apply_patch(
&harness,
call_id,
&patch,
"apply_patch update done",
output_type,
)
.await?;
.await;
let req = mock
.last_request()
.expect("apply_patch output request recorded");
let output_item = req.custom_tool_call_output(call_id);
let output = output_item
.get("output")
.and_then(Value::as_str)
.expect("apply_patch output string");
harness
.test()
.submit_turn_with_policy(
"apply the patch via custom tool to update a file",
SandboxPolicy::DangerFullAccess,
)
.await?;
let output = harness.apply_patch_output(call_id, output_type).await;
let expected_pattern = format!(
r"(?s)^Exit code: 0
@@ -620,7 +615,7 @@ Success. Updated the following files:
M {file_name}
?$"
);
assert_regex_match(&expected_pattern, output);
assert_regex_match(&expected_pattern, output.as_str());
let updated_contents = fs::read_to_string(file_path)?;
assert_eq!(updated_contents, "after\n", "expected updated file content");
@@ -629,99 +624,83 @@ M {file_name}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn apply_patch_custom_tool_call_reports_failure_output() -> Result<()> {
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::Function)]
#[test_case(ApplyPatchModelOutput::Shell)]
#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
async fn apply_patch_custom_tool_call_reports_failure_output(
output_type: ApplyPatchModelOutput,
) -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.include_apply_patch_tool = true;
});
let test = builder.build(&server).await?;
let harness = apply_patch_harness().await?;
let call_id = "apply-patch-failure";
let missing_file = "missing_custom_tool_apply_patch.txt";
let patch = format!(
"*** Begin Patch\n*** Update File: {missing_file}\n@@\n-before\n+after\n*** End Patch\n"
);
let responses = vec![
sse(vec![
json!({"type": "response.created", "response": {"id": "resp-1"}}),
ev_custom_tool_call(call_id, "apply_patch", &patch),
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "apply_patch failure done"),
ev_completed("resp-2"),
]),
];
let mock = mount_sse_sequence(&server, responses).await;
test.submit_turn_with_policy(
"attempt a failing apply_patch via custom tool",
SandboxPolicy::DangerFullAccess,
mount_apply_patch(
&harness,
call_id,
&patch,
"apply_patch failure done",
output_type,
)
.await?;
.await;
let req = mock
.last_request()
.expect("apply_patch output request recorded");
let output_item = req.custom_tool_call_output(call_id);
let output = output_item
.get("output")
.and_then(Value::as_str)
.expect("apply_patch output string");
harness
.test()
.submit_turn_with_policy(
"attempt a failing apply_patch via custom tool",
SandboxPolicy::DangerFullAccess,
)
.await?;
let output = harness.apply_patch_output(call_id, output_type).await;
let expected_output = format!(
"apply_patch verification failed: Failed to read file to update {}/{missing_file}: No such file or directory (os error 2)",
test.cwd.path().to_string_lossy()
harness.cwd().to_string_lossy()
);
assert_eq!(output, expected_output);
assert_eq!(output, expected_output.as_str());
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn apply_patch_function_call_output_is_structured() -> Result<()> {
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::Function)]
#[test_case(ApplyPatchModelOutput::Shell)]
#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
async fn apply_patch_function_call_output_is_structured(
output_type: ApplyPatchModelOutput,
) -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.include_apply_patch_tool = true;
});
let test = builder.build(&server).await?;
let harness = apply_patch_harness().await?;
let call_id = "apply-patch-function";
let file_name = "function_apply_patch.txt";
let patch =
format!("*** Begin Patch\n*** Add File: {file_name}\n+via function call\n*** End Patch\n");
let responses = vec![
sse(vec![
json!({"type": "response.created", "response": {"id": "resp-1"}}),
ev_apply_patch_function_call(call_id, &patch),
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "apply_patch function done"),
ev_completed("resp-2"),
]),
];
let mock = mount_sse_sequence(&server, responses).await;
test.submit_turn_with_policy(
"apply the patch via function-call apply_patch",
SandboxPolicy::DangerFullAccess,
mount_apply_patch(
&harness,
call_id,
&patch,
"apply_patch function done",
output_type,
)
.await?;
let req = mock
.last_request()
.expect("apply_patch function output request recorded");
let output_item = req.function_call_output(call_id);
let output = output_item
.get("output")
.and_then(Value::as_str)
.expect("apply_patch output string");
.await;
harness
.test()
.submit_turn_with_policy(
"apply the patch via function-call apply_patch",
SandboxPolicy::DangerFullAccess,
)
.await?;
let output = harness.apply_patch_output(call_id, output_type).await;
let expected_pattern = format!(
r"(?s)^Exit code: 0
Wall time: [0-9]+(?:\.[0-9]+)? seconds
@@ -730,40 +709,32 @@ Success. Updated the following files:
A {file_name}
?$"
);
assert_regex_match(&expected_pattern, output);
assert_regex_match(&expected_pattern, output.as_str());
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_output_is_structured_for_nonzero_exit() -> Result<()> {
#[test_case(ShellModelOutput::Shell)]
#[test_case(ShellModelOutput::ShellCommand)]
#[test_case(ShellModelOutput::LocalShell)]
async fn shell_output_is_structured_for_nonzero_exit(output_type: ShellModelOutput) -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.model = "gpt-5-codex".to_string();
let mut builder = test_codex().with_config(move |config| {
config.model = "gpt-5.1-codex".to_string();
config.model_family =
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
config.include_apply_patch_tool = true;
if matches!(output_type, ShellModelOutput::ShellCommand) {
config.features.enable(Feature::ShellCommandTool);
}
});
let test = builder.build(&server).await?;
let call_id = "shell-nonzero-exit";
let args = json!({
"command": ["/bin/sh", "-c", "exit 42"],
"timeout_ms": 1_000,
});
let responses = vec![
sse(vec![
json!({"type": "response.created", "response": {"id": "resp-1"}}),
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "shell failure handled"),
ev_completed("resp-2"),
]),
];
let responses = shell_responses(call_id, vec!["/bin/sh", "-c", "exit 42"], output_type)?;
let mock = mount_sse_sequence(&server, responses).await;
test.submit_turn_with_policy(
@@ -793,7 +764,7 @@ async fn shell_command_output_is_structured() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
let mut builder = test_codex().with_config(move |config| {
config.features.enable(Feature::ShellCommandTool);
});
let test = builder.build(&server).await?;
@@ -847,9 +818,9 @@ async fn local_shell_call_output_is_structured() -> Result<()> {
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.model = "gpt-5-codex".to_string();
config.model = "gpt-5.1-codex".to_string();
config.model_family =
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
config.include_apply_patch_tool = true;
});
let test = builder.build(&server).await?;

View File

@@ -57,9 +57,9 @@ async fn run_turn_and_measure(test: &TestCodex, prompt: &str) -> anyhow::Result<
#[allow(clippy::expect_used)]
async fn build_codex_with_test_tool(server: &wiremock::MockServer) -> anyhow::Result<TestCodex> {
let mut builder = test_codex().with_config(|config| {
config.model = "test-gpt-5-codex".to_string();
config.model = "test-gpt-5.1-codex".to_string();
config.model_family =
find_family_for_model("test-gpt-5-codex").expect("test-gpt-5-codex model family");
find_family_for_model("test-gpt-5.1-codex").expect("test-gpt-5.1-codex model family");
});
builder.build(server).await
}

View File

@@ -197,9 +197,9 @@ async fn sandbox_denied_shell_returns_original_output() -> Result<()> {
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.model = "gpt-5-codex".to_string();
config.model = "gpt-5.1-codex".to_string();
config.model_family =
find_family_for_model("gpt-5-codex").expect("gpt-5-codex model family");
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex model family");
});
let fixture = builder.build(&server).await?;
@@ -425,8 +425,8 @@ async fn shell_timeout_handles_background_grandchild_stdout() -> Result<()> {
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.model = "gpt-5".to_string();
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is a valid model");
config.model = "gpt-5.1".to_string();
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is a valid model");
config.sandbox_policy = SandboxPolicy::DangerFullAccess;
});
let test = builder.build(&server).await?;

View File

@@ -41,9 +41,9 @@ async fn truncate_function_error_trims_respond_to_model() -> Result<()> {
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
// Use the test model that wires function tools like grep_files
config.model = "test-gpt-5-codex".to_string();
config.model = "test-gpt-5.1-codex".to_string();
config.model_family =
find_family_for_model("test-gpt-5-codex").expect("model family for test model");
find_family_for_model("test-gpt-5.1-codex").expect("model family for test model");
});
let test = builder.build(&server).await?;
@@ -105,9 +105,9 @@ async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
// Use a model that exposes the generic shell tool.
let mut builder = test_codex().with_config(|config| {
config.model = "gpt-5-codex".to_string();
config.model = "gpt-5.1-codex".to_string();
config.model_family =
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
});
let fixture = builder.build(&server).await?;
@@ -197,9 +197,9 @@ async fn tool_call_output_truncated_only_once() -> Result<()> {
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.model = "gpt-5-codex".to_string();
config.model = "gpt-5.1-codex".to_string();
config.model_family =
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
});
let fixture = builder.build(&server).await?;
let call_id = "shell-single-truncation";

View File

@@ -30,8 +30,8 @@ use pretty_assertions::assert_eq;
async fn undo_harness() -> Result<TestCodexHarness> {
TestCodexHarness::with_config(|config: &mut Config| {
config.include_apply_patch_tool = true;
config.model = "gpt-5".to_string();
config.model_family = find_family_for_model("gpt-5").expect("gpt-5 is valid");
config.model = "gpt-5.1".to_string();
config.model_family = find_family_for_model("gpt-5.1").expect("gpt-5.1 is valid");
config.features.enable(Feature::GhostCommit);
})
.await

View File

@@ -159,7 +159,7 @@ async fn unified_exec_emits_exec_command_begin_event() -> Result<()> {
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
let mut builder = test_codex().with_model("gpt-5").with_config(|config| {
config.use_experimental_unified_exec_tool = true;
config.features.enable(Feature::UnifiedExec);
});
@@ -236,7 +236,7 @@ async fn unified_exec_respects_workdir_override() -> Result<()> {
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
let mut builder = test_codex().with_model("gpt-5").with_config(|config| {
config.use_experimental_unified_exec_tool = true;
config.features.enable(Feature::UnifiedExec);
});
@@ -288,28 +288,22 @@ async fn unified_exec_respects_workdir_override() -> Result<()> {
})
.await?;
let begin_event = wait_for_event_match(&codex, |msg| match msg {
EventMsg::ExecCommandBegin(event) if event.call_id == call_id => Some(event.clone()),
_ => None,
})
.await;
assert_eq!(
begin_event.cwd, workdir,
"exec_command cwd should reflect the requested workdir override"
);
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
let requests = server.received_requests().await.expect("recorded requests");
assert!(!requests.is_empty(), "expected at least one POST request");
let bodies = requests
.iter()
.map(|req| req.body_json::<Value>().expect("request json"))
.collect::<Vec<_>>();
let outputs = collect_tool_outputs(&bodies)?;
let output = outputs
.get(call_id)
.expect("missing exec_command workdir output");
let output_text = output.output.trim();
let output_canonical = std::fs::canonicalize(output_text)?;
let expected_canonical = std::fs::canonicalize(&workdir)?;
assert_eq!(
output_canonical, expected_canonical,
"pwd should reflect the requested workdir override"
);
Ok(())
}

View File

@@ -270,9 +270,9 @@ async fn user_shell_command_is_truncated_only_once() -> anyhow::Result<()> {
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.model = "gpt-5-codex".to_string();
config.model = "gpt-5.1-codex".to_string();
config.model_family =
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
});
let fixture = builder.build(&server).await?;

View File

@@ -55,7 +55,7 @@ Start a new session with optional overrides:
Request `newConversation` params (subset):
- `model`: string model id (e.g. "o3", "gpt-5", "gpt-5-codex")
- `model`: string model id (e.g. "o3", "gpt-5.1", "gpt-5.1-codex")
- `profile`: optional named profile
- `cwd`: optional working directory
- `approvalPolicy`: `untrusted` | `on-request` | `on-failure` | `never`
@@ -119,13 +119,13 @@ For the complete request/response shapes and flow examples, see the [“Auth end
## Example: start and send a message
```json
{ "jsonrpc": "2.0", "id": 1, "method": "newConversation", "params": { "model": "gpt-5", "approvalPolicy": "on-request" } }
{ "jsonrpc": "2.0", "id": 1, "method": "newConversation", "params": { "model": "gpt-5.1", "approvalPolicy": "on-request" } }
```
Server responds:
```json
{ "jsonrpc": "2.0", "id": 1, "result": { "conversationId": "c7b0…", "model": "gpt-5", "rolloutPath": "/path/to/rollout.jsonl" } }
{ "jsonrpc": "2.0", "id": 1, "result": { "conversationId": "c7b0…", "model": "gpt-5.1", "rolloutPath": "/path/to/rollout.jsonl" } }
```
Then send input:

View File

@@ -24,7 +24,6 @@ codex-common = { workspace = true, features = [
"sandbox_summary",
] }
codex-core = { workspace = true }
codex-ollama = { workspace = true }
codex-protocol = { workspace = true }
mcp-types = { workspace = true }
opentelemetry-appender-tracing = { workspace = true }

View File

@@ -18,9 +18,15 @@ pub struct Cli {
#[arg(long, short = 'm')]
pub model: Option<String>,
/// Use open-source provider.
#[arg(long = "oss", default_value_t = false)]
pub oss: bool,
/// Specify which local provider to use (lmstudio or ollama).
/// If not specified with --oss, will use config default or show selection.
#[arg(long = "local-provider")]
pub oss_provider: Option<String>,
/// Select the sandbox policy to use when executing model-generated shell
/// commands.
#[arg(long = "sandbox", short = 's', value_enum)]

View File

@@ -182,6 +182,42 @@ impl EventProcessor for EventProcessorWithHumanOutput {
ts_msg!(self, " {}", details.style(self.dimmed));
}
}
EventMsg::McpStartupUpdate(update) => {
let status_text = match update.status {
codex_core::protocol::McpStartupStatus::Starting => "starting".to_string(),
codex_core::protocol::McpStartupStatus::Ready => "ready".to_string(),
codex_core::protocol::McpStartupStatus::Cancelled => "cancelled".to_string(),
codex_core::protocol::McpStartupStatus::Failed { ref error } => {
format!("failed: {error}")
}
};
ts_msg!(
self,
"{} {} {}",
"mcp:".style(self.cyan),
update.server,
status_text
);
}
EventMsg::McpStartupComplete(summary) => {
let mut parts = Vec::new();
if !summary.ready.is_empty() {
parts.push(format!("ready: {}", summary.ready.join(", ")));
}
if !summary.failed.is_empty() {
let servers: Vec<_> = summary.failed.iter().map(|f| f.server.clone()).collect();
parts.push(format!("failed: {}", servers.join(", ")));
}
if !summary.cancelled.is_empty() {
parts.push(format!("cancelled: {}", summary.cancelled.join(", ")));
}
let joined = if parts.is_empty() {
"no servers".to_string()
} else {
parts.join("; ")
};
ts_msg!(self, "{} {}", "mcp startup:".style(self.cyan), joined);
}
EventMsg::BackgroundEvent(BackgroundEventEvent { message }) => {
ts_msg!(self, "{}", message.style(self.dimmed));
}

View File

@@ -10,26 +10,26 @@ mod event_processor_with_human_output;
pub mod event_processor_with_jsonl_output;
pub mod exec_events;
use crate::cli::Command as ExecCommand;
use crate::event_processor::CodexStatus;
use crate::event_processor::EventProcessor;
pub use cli::Cli;
use codex_common::oss::ensure_oss_provider_ready;
use codex_common::oss::get_default_model_for_oss_provider;
use codex_core::AuthManager;
use codex_core::BUILT_IN_OSS_MODEL_PROVIDER_ID;
use codex_core::ConversationManager;
use codex_core::LMSTUDIO_OSS_PROVIDER_ID;
use codex_core::NewConversation;
use codex_core::OLLAMA_OSS_PROVIDER_ID;
use codex_core::auth::enforce_login_restrictions;
use codex_core::client::http::set_default_originator;
use codex_core::config::Config;
use codex_core::config::ConfigOverrides;
use codex_core::find_conversation_path_by_id_str;
use codex_core::config::find_codex_home;
use codex_core::config::load_config_as_toml_with_cli_overrides;
use codex_core::config::resolve_oss_provider;
use codex_core::git_info::get_git_repo_root;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::Event;
use codex_core::protocol::EventMsg;
use codex_core::protocol::Op;
use codex_core::protocol::SessionSource;
use codex_ollama::DEFAULT_OSS_MODEL;
use codex_protocol::config_types::SandboxMode;
use codex_protocol::user_input::UserInput;
use event_processor_with_human_output::EventProcessorWithHumanOutput;
@@ -46,6 +46,12 @@ use tracing::info;
use tracing_subscriber::EnvFilter;
use tracing_subscriber::prelude::*;
use crate::cli::Command as ExecCommand;
use crate::event_processor::CodexStatus;
use crate::event_processor::EventProcessor;
use codex_core::default_client::set_default_originator;
use codex_core::find_conversation_path_by_id_str;
pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> anyhow::Result<()> {
if let Err(err) = set_default_originator("codex_exec".to_string()) {
tracing::warn!(?err, "Failed to set codex exec originator override {err:?}");
@@ -56,6 +62,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
images,
model: model_cli_arg,
oss,
oss_provider,
config_profile,
full_auto,
dangerously_bypass_approvals_and_sandbox,
@@ -145,21 +152,64 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
sandbox_mode_cli_arg.map(Into::<SandboxMode>::into)
};
// When using `--oss`, let the bootstrapper pick the model (defaulting to
// gpt-oss:20b) and ensure it is present locally. Also, force the builtin
// `oss` model provider.
let model = if let Some(model) = model_cli_arg {
Some(model)
} else if oss {
Some(DEFAULT_OSS_MODEL.to_owned())
} else {
None // No model specified, will use the default.
// Parse `-c` overrides from the CLI.
let cli_kv_overrides = match config_overrides.parse_overrides() {
Ok(v) => v,
#[allow(clippy::print_stderr)]
Err(e) => {
eprintln!("Error parsing -c overrides: {e}");
std::process::exit(1);
}
};
// we load config.toml here to determine project state.
#[allow(clippy::print_stderr)]
let config_toml = {
let codex_home = match find_codex_home() {
Ok(codex_home) => codex_home,
Err(err) => {
eprintln!("Error finding codex home: {err}");
std::process::exit(1);
}
};
match load_config_as_toml_with_cli_overrides(&codex_home, cli_kv_overrides.clone()).await {
Ok(config_toml) => config_toml,
Err(err) => {
eprintln!("Error loading config.toml: {err}");
std::process::exit(1);
}
}
};
let model_provider = if oss {
Some(BUILT_IN_OSS_MODEL_PROVIDER_ID.to_string())
let resolved = resolve_oss_provider(
oss_provider.as_deref(),
&config_toml,
config_profile.clone(),
);
if let Some(provider) = resolved {
Some(provider)
} else {
return Err(anyhow::anyhow!(
"No default OSS provider configured. Use --local-provider=provider or set oss_provider to either {LMSTUDIO_OSS_PROVIDER_ID} or {OLLAMA_OSS_PROVIDER_ID} in config.toml"
));
}
} else {
None // No specific model provider override.
None // No OSS mode enabled
};
// When using `--oss`, let the bootstrapper pick the model based on selected provider
let model = if let Some(model) = model_cli_arg {
Some(model)
} else if oss {
model_provider
.as_ref()
.and_then(|provider_id| get_default_model_for_oss_provider(provider_id))
.map(std::borrow::ToOwned::to_owned)
} else {
None // No model specified, will use the default.
};
// Load configuration and determine approval policy
@@ -171,7 +221,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
approval_policy: Some(AskForApproval::Never),
sandbox_mode,
cwd: cwd.map(|p| p.canonicalize().unwrap_or(p)),
model_provider,
model_provider: model_provider.clone(),
codex_linux_sandbox_exe,
base_instructions: None,
developer_instructions: None,
@@ -182,14 +232,6 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
experimental_sandbox_command_assessment: None,
additional_writable_roots: add_dir,
};
// Parse `-c` overrides.
let cli_kv_overrides = match config_overrides.parse_overrides() {
Ok(v) => v,
Err(e) => {
eprintln!("Error parsing -c overrides: {e}");
std::process::exit(1);
}
};
let config = Config::load_with_cli_overrides(cli_kv_overrides, overrides).await?;
@@ -232,7 +274,18 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
};
if oss {
codex_ollama::ensure_oss_ready(&config)
// We're in the oss section, so provider_id should be Some
// Let's handle None case gracefully though just in case
let provider_id = match model_provider.as_ref() {
Some(id) => id,
None => {
error!("OSS provider unexpectedly not set when oss flag is used");
return Err(anyhow::anyhow!(
"OSS provider not set but oss flag was used"
));
}
};
ensure_oss_provider_ready(provider_id, &config)
.await
.map_err(|e| anyhow::anyhow!("OSS setup failed: {e}"))?;
}

View File

@@ -618,15 +618,19 @@ fn error_followed_by_task_complete_produces_turn_failed() {
#[test]
fn exec_command_end_success_produces_completed_command_item() {
let mut ep = EventProcessorWithJsonOutput::new(None);
let command = vec!["bash".to_string(), "-lc".to_string(), "echo hi".to_string()];
let cwd = std::env::current_dir().unwrap();
let parsed_cmd = Vec::new();
// Begin -> no output
let begin = event(
"c1",
EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
call_id: "1".to_string(),
command: vec!["bash".to_string(), "-lc".to_string(), "echo hi".to_string()],
cwd: std::env::current_dir().unwrap(),
parsed_cmd: Vec::new(),
turn_id: "turn-1".to_string(),
command: command.clone(),
cwd: cwd.clone(),
parsed_cmd: parsed_cmd.clone(),
source: ExecCommandSource::Agent,
interaction_input: None,
}),
@@ -652,6 +656,12 @@ fn exec_command_end_success_produces_completed_command_item() {
"c2",
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
call_id: "1".to_string(),
turn_id: "turn-1".to_string(),
command,
cwd,
parsed_cmd,
source: ExecCommandSource::Agent,
interaction_input: None,
stdout: String::new(),
stderr: String::new(),
aggregated_output: "hi\n".to_string(),
@@ -680,15 +690,19 @@ fn exec_command_end_success_produces_completed_command_item() {
#[test]
fn exec_command_end_failure_produces_failed_command_item() {
let mut ep = EventProcessorWithJsonOutput::new(None);
let command = vec!["sh".to_string(), "-c".to_string(), "exit 1".to_string()];
let cwd = std::env::current_dir().unwrap();
let parsed_cmd = Vec::new();
// Begin -> no output
let begin = event(
"c1",
EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
call_id: "2".to_string(),
command: vec!["sh".to_string(), "-c".to_string(), "exit 1".to_string()],
cwd: std::env::current_dir().unwrap(),
parsed_cmd: Vec::new(),
turn_id: "turn-1".to_string(),
command: command.clone(),
cwd: cwd.clone(),
parsed_cmd: parsed_cmd.clone(),
source: ExecCommandSource::Agent,
interaction_input: None,
}),
@@ -713,6 +727,12 @@ fn exec_command_end_failure_produces_failed_command_item() {
"c2",
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
call_id: "2".to_string(),
turn_id: "turn-1".to_string(),
command,
cwd,
parsed_cmd,
source: ExecCommandSource::Agent,
interaction_input: None,
stdout: String::new(),
stderr: String::new(),
aggregated_output: String::new(),
@@ -747,6 +767,12 @@ fn exec_command_end_without_begin_is_ignored() {
"c1",
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
call_id: "no-begin".to_string(),
turn_id: "turn-1".to_string(),
command: Vec::new(),
cwd: PathBuf::from("."),
parsed_cmd: Vec::new(),
source: ExecCommandSource::Agent,
interaction_input: None,
stdout: String::new(),
stderr: String::new(),
aggregated_output: String::new(),

View File

@@ -37,7 +37,7 @@ async fn exec_includes_output_schema_in_request() -> anyhow::Result<()> {
.arg("--output-schema")
.arg(&schema_path)
.arg("-m")
.arg("gpt-5")
.arg("gpt-5.1")
.arg("tell me a joke")
.assert()
.success();

View File

@@ -196,7 +196,7 @@ fn exec_resume_preserves_cli_configuration_overrides() -> anyhow::Result<()> {
.arg("--sandbox")
.arg("workspace-write")
.arg("--model")
.arg("gpt-5")
.arg("gpt-5.1")
.arg("-C")
.arg(env!("CARGO_MANIFEST_DIR"))
.arg(&prompt)
@@ -218,7 +218,7 @@ fn exec_resume_preserves_cli_configuration_overrides() -> anyhow::Result<()> {
.arg("--sandbox")
.arg("workspace-write")
.arg("--model")
.arg("gpt-5-high")
.arg("gpt-5.1-high")
.arg("-C")
.arg(env!("CARGO_MANIFEST_DIR"))
.arg(&prompt2)
@@ -231,7 +231,7 @@ fn exec_resume_preserves_cli_configuration_overrides() -> anyhow::Result<()> {
let stderr = String::from_utf8(output.stderr)?;
assert!(
stderr.contains("model: gpt-5-high"),
stderr.contains("model: gpt-5.1-high"),
"stderr missing model override: {stderr}"
);
if cfg!(target_os = "windows") {

Some files were not shown because too many files have changed in this diff Show More