Compare commits

...

8 Commits

Author SHA1 Message Date
owenchak-oai
14b0a97ba2 Derive required Node version from package.json 2025-06-02 19:55:57 -07:00
Michael Bolin
5a5aa89914 chore: replace regex with regex-lite, where appropriate (#1200)
As explained on https://crates.io/crates/regex-lite, `regex-lite` is a
lighter alternative to `regex` and seems to be sufficient for our
purposes.
2025-06-02 17:11:45 -07:00
Michael Bolin
0f3cc8f842 feat: make reasoning effort/summaries configurable (#1199)
Previous to this PR, we always set `reasoning` when making a request
using the Responses API:


d7245cbbc9/codex-rs/core/src/client.rs (L108-L111)

Though if you tried to use the Rust CLI with `--model gpt-4.1`, this
would fail with:

```shell
"Unsupported parameter: 'reasoning.effort' is not supported with this model."
```

We take a cue from the TypeScript CLI, which does a check on the model
name:


d7245cbbc9/codex-cli/src/utils/agent/agent-loop.ts (L786-L789)

This PR does a similar check, though also adds support for the following
config options:

```
model_reasoning_effort = "low" | "medium" | "high" | "none"
model_reasoning_summary = "auto" | "concise" | "detailed" | "none"
```

This way, if you have a model whose name happens to start with `"o"` (or
`"codex"`?), you can set these to `"none"` to explicitly disable
reasoning, if necessary. (That said, it seems unlikely anyone would use
the Responses API with non-OpenAI models, but we provide an escape
hatch, anyway.)

This PR also updates both the TUI and `codex exec` to show `reasoning
effort` and `reasoning summaries` in the header.
2025-06-02 16:01:34 -07:00
Michael Bolin
d7245cbbc9 fix: chat completions API now also passes tools along (#1167)
Prior to this PR, there were two big misses in `chat_completions.rs`:

1. The loop in `stream_chat_completions()` was only including items of
type `ResponseItem::Message` when building up the `"messages"` JSON for
the `POST` request to the `chat/completions` endpoint. This fixes things
by ensuring other variants (`FunctionCall`, `LocalShellCall`, and
`FunctionCallOutput`) are included, as well.
2. In `process_chat_sse()`, we were not recording tool calls and were
only emitting items of type
`ResponseEvent::OutputItemDone(ResponseItem::Message)` to the stream.
Now we introduce `FunctionCallState`, which is used to accumulate the
`delta`s of type `tool_calls`, so we can ultimately emit a
`ResponseItem::FunctionCall`, when appropriate.

While function calling now appears to work for chat completions with my
local testing, I believe that there are still edge cases that are not
covered and that this codepath would benefit from a battery of
integration tests. (As part of that further cleanup, we should also work
to support streaming responses in the UI.)

The other important part of this PR is some cleanup in
`core/src/codex.rs`. In particular, it was hard to reason about how
`run_task()` was building up the list of messages to include in a
request across the various cases:

- Responses API
- Chat Completions API
- Responses API used in concert with ZDR

I like to think things are a bit cleaner now where:

- `zdr_transcript` (if present) contains all messages in the history of
the conversation, which includes function call outputs that have not
been sent back to the model yet
- `pending_input` includes any messages the user has submitted while the
turn is in flight that need to be injected as part of the next `POST` to
the model
- `input_for_next_turn` includes the tool call outputs that have not
been sent back to the model yet
2025-06-02 13:47:51 -07:00
Michael Bolin
e40f86b446 chore: logging cleanup (#1196)
Update what we log to make `RUST_LOG=debug` a bit easier to work with.
---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1196).
* #1167
* __->__ #1196
2025-06-02 13:31:33 -07:00
Michael Bolin
7896b1089d chore: update the WORKFLOW_URL in install_native_deps.sh to the latest release (#1190) 2025-05-31 10:30:50 -07:00
Michael Bolin
1410ae95ca fix: set --config hide_agent_reasoning=true in the GitHub Action (#1185)
Whoops, I had this flipped in https://github.com/openai/codex/pull/1183.
2025-05-30 23:57:05 -07:00
Michael Bolin
fccf5f3221 fix: disable agent reasoning output by default in the GitHub Action (#1183) 2025-05-30 23:49:48 -07:00
24 changed files with 573 additions and 112 deletions

View File

@@ -15,14 +15,14 @@ inputs:
codex_args:
description: "A whitespace-delimited list of arguments to pass to Codex. Due to limitations in YAML, arguments with spaces are not supported. For more complex configurations, use the `codex_home` input."
required: false
default: "--full-auto"
default: "--config hide_agent_reasoning=true --full-auto"
codex_home:
description: "Value to use as the CODEX_HOME environment variable when running Codex."
required: false
codex_release_tag:
description: "The release tag of the Codex model to run."
required: false
default: "codex-rs-d519bd8bbd1e1fd9efdc5d68cf7bebdec0dd0f28-1-rust-v0.0.2505270918"
default: "codex-rs-ca8e97fcbcb991e542b8689f2d4eab9d30c399d6-1-rust-v0.0.2505302325"
runs:
using: "composite"

View File

@@ -81,6 +81,18 @@ if (wantsNative) {
process.exit(exitCode);
}
// Ensure a compatible Node version before falling back to the JavaScript CLI.
const pkgJsonPath = path.join(__dirname, "../package.json");
const { engines } = JSON.parse(fs.readFileSync(pkgJsonPath, "utf8"));
const requiredNodeMajor = Number.parseInt(engines.node.match(/\d+/)[0], 10);
const currentMajor = Number.parseInt(process.versions.node.split(".")[0], 10);
if (currentMajor < requiredNodeMajor) {
console.error(
`Codex CLI requires Node.js ${requiredNodeMajor} or newer. Detected ${process.version}. Please upgrade your Node.js installation.`
);
process.exit(1);
}
// Fallback: execute the original JavaScript CLI.
// Resolve the path to the compiled CLI bundle

View File

@@ -65,7 +65,7 @@ mkdir -p "$BIN_DIR"
# Until we start publishing stable GitHub releases, we have to grab the binaries
# from the GitHub Action that created them. Update the URL below to point to the
# appropriate workflow run:
WORKFLOW_URL="https://github.com/openai/codex/actions/runs/15334411824"
WORKFLOW_URL="https://github.com/openai/codex/actions/runs/15361005231"
WORKFLOW_ID="${WORKFLOW_URL##*/}"
ARTIFACTS_DIR="$(mktemp -d)"

13
codex-rs/Cargo.lock generated
View File

@@ -567,7 +567,6 @@ version = "0.0.0"
dependencies = [
"anyhow",
"pretty_assertions",
"regex",
"serde_json",
"similar",
"tempfile",
@@ -635,6 +634,8 @@ dependencies = [
"seccompiler",
"serde",
"serde_json",
"strum 0.27.1",
"strum_macros 0.27.1",
"tempfile",
"thiserror 2.0.12",
"time",
@@ -680,7 +681,7 @@ dependencies = [
"log",
"multimap",
"path-absolutize",
"regex",
"regex-lite",
"serde",
"serde_json",
"serde_with",
@@ -755,7 +756,7 @@ dependencies = [
"pretty_assertions",
"ratatui",
"ratatui-image",
"regex",
"regex-lite",
"serde_json",
"shlex",
"strum 0.27.1",
@@ -3321,6 +3322,12 @@ dependencies = [
"regex-syntax 0.8.5",
]
[[package]]
name = "regex-lite"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a"
[[package]]
name = "regex-syntax"
version = "0.6.29"

View File

@@ -12,7 +12,6 @@ workspace = true
[dependencies]
anyhow = "1"
regex = "1.11.1"
serde_json = "1.0.110"
similar = "2.7.0"
thiserror = "2.0.12"

View File

@@ -142,6 +142,34 @@ Users can specify config values at multiple levels. Order of precedence is as fo
3. as an entry in `config.toml`, e.g., `model = "o3"`
4. the default value that comes with Codex CLI (i.e., Codex CLI defaults to `codex-mini-latest`)
## model_reasoning_effort
If the model name starts with `"o"` (as in `"o3"` or `"o4-mini"`) or `"codex"`, reasoning is enabled by default when using the Responses API. As explained in the [OpenAI Platform documentation](https://platform.openai.com/docs/guides/reasoning?api-mode=responses#get-started-with-reasoning), this can be set to:
- `"low"`
- `"medium"` (default)
- `"high"`
To disable reasoning, set `model_reasoning_effort` to `"none"` in your config:
```toml
model_reasoning_effort = "none" # disable reasoning
```
## model_reasoning_summary
If the model name starts with `"o"` (as in `"o3"` or `"o4-mini"`) or `"codex"`, reasoning is enabled by default when using the Responses API. As explained in the [OpenAI Platform documentation](https://platform.openai.com/docs/guides/reasoning?api-mode=responses#reasoning-summaries), this can be set to:
- `"auto"` (default)
- `"concise"`
- `"detailed"`
To disable reasoning summaries, set `model_reasoning_summary` to `"none"` in your config:
```toml
model_reasoning_summary = "none" # disable reasoning summaries
```
## sandbox_permissions
List of permissions to grant to the sandbox that Codex uses to execute untrusted commands:

View File

@@ -31,6 +31,8 @@ rand = "0.9"
reqwest = { version = "0.12", features = ["json", "stream"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
strum = "0.27.1"
strum_macros = "0.27.1"
thiserror = "2.0.12"
time = { version = "0.3", features = ["formatting", "local-offset", "macros"] }
tokio = { version = "1", features = [

View File

@@ -28,8 +28,7 @@ use crate::models::ResponseItem;
use crate::openai_tools::create_tools_json_for_chat_completions_api;
use crate::util::backoff;
/// Implementation for the classic Chat Completions API. This is intentionally
/// minimal: we only stream back plain assistant text.
/// Implementation for the classic Chat Completions API.
pub(crate) async fn stream_chat_completions(
prompt: &Prompt,
model: &str,
@@ -43,17 +42,67 @@ pub(crate) async fn stream_chat_completions(
messages.push(json!({"role": "system", "content": full_instructions}));
for item in &prompt.input {
if let ResponseItem::Message { role, content } = item {
let mut text = String::new();
for c in content {
match c {
ContentItem::InputText { text: t } | ContentItem::OutputText { text: t } => {
text.push_str(t);
match item {
ResponseItem::Message { role, content } => {
let mut text = String::new();
for c in content {
match c {
ContentItem::InputText { text: t }
| ContentItem::OutputText { text: t } => {
text.push_str(t);
}
_ => {}
}
_ => {}
}
messages.push(json!({"role": role, "content": text}));
}
ResponseItem::FunctionCall {
name,
arguments,
call_id,
} => {
messages.push(json!({
"role": "assistant",
"content": null,
"tool_calls": [{
"id": call_id,
"type": "function",
"function": {
"name": name,
"arguments": arguments,
}
}]
}));
}
ResponseItem::LocalShellCall {
id,
call_id: _,
status,
action,
} => {
// Confirm with API team.
messages.push(json!({
"role": "assistant",
"content": null,
"tool_calls": [{
"id": id.clone().unwrap_or_else(|| "".to_string()),
"type": "local_shell_call",
"status": status,
"action": action,
}]
}));
}
ResponseItem::FunctionCallOutput { call_id, output } => {
messages.push(json!({
"role": "tool",
"tool_call_id": call_id,
"content": output.content,
}));
}
ResponseItem::Reasoning { .. } | ResponseItem::Other => {
// Omit these items from the conversation history.
continue;
}
messages.push(json!({"role": role, "content": text}));
}
}
@@ -68,9 +117,8 @@ pub(crate) async fn stream_chat_completions(
let base_url = provider.base_url.trim_end_matches('/');
let url = format!("{}/chat/completions", base_url);
debug!(url, "POST (chat)");
trace!(
"request payload: {}",
debug!(
"POST to {url}: {}",
serde_json::to_string_pretty(&payload).unwrap_or_default()
);
@@ -140,6 +188,21 @@ where
let idle_timeout = *OPENAI_STREAM_IDLE_TIMEOUT_MS;
// State to accumulate a function call across streaming chunks.
// OpenAI may split the `arguments` string over multiple `delta` events
// until the chunk whose `finish_reason` is `tool_calls` is emitted. We
// keep collecting the pieces here and forward a single
// `ResponseItem::FunctionCall` once the call is complete.
#[derive(Default)]
struct FunctionCallState {
name: Option<String>,
arguments: String,
call_id: Option<String>,
active: bool,
}
let mut fn_call_state = FunctionCallState::default();
loop {
let sse = match timeout(idle_timeout, stream.next()).await {
Ok(Some(Ok(ev))) => ev,
@@ -179,23 +242,89 @@ where
Ok(v) => v,
Err(_) => continue,
};
trace!("chat_completions received SSE chunk: {chunk:?}");
let content_opt = chunk
.get("choices")
.and_then(|c| c.get(0))
.and_then(|c| c.get("delta"))
.and_then(|d| d.get("content"))
.and_then(|c| c.as_str());
let choice_opt = chunk.get("choices").and_then(|c| c.get(0));
if let Some(content) = content_opt {
let item = ResponseItem::Message {
role: "assistant".to_string(),
content: vec![ContentItem::OutputText {
text: content.to_string(),
}],
};
if let Some(choice) = choice_opt {
// Handle assistant content tokens.
if let Some(content) = choice
.get("delta")
.and_then(|d| d.get("content"))
.and_then(|c| c.as_str())
{
let item = ResponseItem::Message {
role: "assistant".to_string(),
content: vec![ContentItem::OutputText {
text: content.to_string(),
}],
};
let _ = tx_event.send(Ok(ResponseEvent::OutputItemDone(item))).await;
let _ = tx_event.send(Ok(ResponseEvent::OutputItemDone(item))).await;
}
// Handle streaming function / tool calls.
if let Some(tool_calls) = choice
.get("delta")
.and_then(|d| d.get("tool_calls"))
.and_then(|tc| tc.as_array())
{
if let Some(tool_call) = tool_calls.first() {
// Mark that we have an active function call in progress.
fn_call_state.active = true;
// Extract call_id if present.
if let Some(id) = tool_call.get("id").and_then(|v| v.as_str()) {
fn_call_state.call_id.get_or_insert_with(|| id.to_string());
}
// Extract function details if present.
if let Some(function) = tool_call.get("function") {
if let Some(name) = function.get("name").and_then(|n| n.as_str()) {
fn_call_state.name.get_or_insert_with(|| name.to_string());
}
if let Some(args_fragment) =
function.get("arguments").and_then(|a| a.as_str())
{
fn_call_state.arguments.push_str(args_fragment);
}
}
}
}
// Emit end-of-turn when finish_reason signals completion.
if let Some(finish_reason) = choice.get("finish_reason").and_then(|v| v.as_str()) {
match finish_reason {
"tool_calls" if fn_call_state.active => {
// Build the FunctionCall response item.
let item = ResponseItem::FunctionCall {
name: fn_call_state.name.clone().unwrap_or_else(|| "".to_string()),
arguments: fn_call_state.arguments.clone(),
call_id: fn_call_state.call_id.clone().unwrap_or_else(String::new),
};
// Emit it downstream.
let _ = tx_event.send(Ok(ResponseEvent::OutputItemDone(item))).await;
}
"stop" => {
// Regular turn without tool-call.
}
_ => {}
}
// Emit Completed regardless of reason so the agent can advance.
let _ = tx_event
.send(Ok(ResponseEvent::Completed {
response_id: String::new(),
}))
.await;
// Prepare for potential next turn (should not happen in same stream).
// fn_call_state = FunctionCallState::default();
return; // End processing for this SSE stream.
}
}
}
}
@@ -242,9 +371,14 @@ where
Poll::Ready(None) => return Poll::Ready(None),
Poll::Ready(Some(Err(e))) => return Poll::Ready(Some(Err(e))),
Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(item)))) => {
// Accumulate *assistant* text but do not emit yet.
if let crate::models::ResponseItem::Message { role, content } = &item {
if role == "assistant" {
// If this is an incremental assistant message chunk, accumulate but
// do NOT emit yet. Forward any other item (e.g. FunctionCall) right
// away so downstream consumers see it.
let is_assistant_delta = matches!(&item, crate::models::ResponseItem::Message { role, .. } if role == "assistant");
if is_assistant_delta {
if let crate::models::ResponseItem::Message { content, .. } = &item {
if let Some(text) = content.iter().find_map(|c| match c {
crate::models::ContentItem::OutputText { text } => Some(text),
_ => None,
@@ -252,10 +386,13 @@ where
this.cumulative.push_str(text);
}
}
// Swallow partial assistant chunk; keep polling.
continue;
}
// Swallow partial event; keep polling.
continue;
// Not an assistant message forward immediately.
return Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(item))));
}
Poll::Ready(Some(Ok(ResponseEvent::Completed { response_id }))) => {
if !this.cumulative.is_empty() {

View File

@@ -18,12 +18,13 @@ use tracing::warn;
use crate::chat_completions::AggregateStreamExt;
use crate::chat_completions::stream_chat_completions;
use crate::client_common::Payload;
use crate::client_common::Prompt;
use crate::client_common::Reasoning;
use crate::client_common::ResponseEvent;
use crate::client_common::ResponseStream;
use crate::client_common::Summary;
use crate::client_common::ResponsesApiRequest;
use crate::client_common::create_reasoning_param_for_request;
use crate::config_types::ReasoningEffort as ReasoningEffortConfig;
use crate::config_types::ReasoningSummary as ReasoningSummaryConfig;
use crate::error::CodexErr;
use crate::error::EnvVarError;
use crate::error::Result;
@@ -41,14 +42,23 @@ pub struct ModelClient {
model: String,
client: reqwest::Client,
provider: ModelProviderInfo,
effort: ReasoningEffortConfig,
summary: ReasoningSummaryConfig,
}
impl ModelClient {
pub fn new(model: impl ToString, provider: ModelProviderInfo) -> Self {
pub fn new(
model: impl ToString,
provider: ModelProviderInfo,
effort: ReasoningEffortConfig,
summary: ReasoningSummaryConfig,
) -> Self {
Self {
model: model.to_string(),
client: reqwest::Client::new(),
provider,
effort,
summary,
}
}
@@ -98,17 +108,15 @@ impl ModelClient {
let full_instructions = prompt.get_full_instructions();
let tools_json = create_tools_json_for_responses_api(prompt, &self.model)?;
let payload = Payload {
let reasoning = create_reasoning_param_for_request(&self.model, self.effort, self.summary);
let payload = ResponsesApiRequest {
model: &self.model,
instructions: &full_instructions,
input: &prompt.input,
tools: &tools_json,
tool_choice: "auto",
parallel_tool_calls: false,
reasoning: Some(Reasoning {
effort: "high",
summary: Some(Summary::Auto),
}),
reasoning,
previous_response_id: prompt.prev_id.clone(),
store: prompt.store,
stream: true,
@@ -117,8 +125,7 @@ impl ModelClient {
let base_url = self.provider.base_url.clone();
let base_url = base_url.trim_end_matches('/');
let url = format!("{}/responses", base_url);
debug!(url, "POST");
trace!("request payload: {}", serde_json::to_string(&payload)?);
trace!("POST to {url}: {}", serde_json::to_string(&payload)?);
let mut attempt = 0;
loop {
@@ -303,6 +310,19 @@ where
};
};
}
"response.content_part.done"
| "response.created"
| "response.function_call_arguments.delta"
| "response.in_progress"
| "response.output_item.added"
| "response.output_text.delta"
| "response.output_text.done"
| "response.reasoning_summary_part.added"
| "response.reasoning_summary_text.delta"
| "response.reasoning_summary_text.done" => {
// Currently, we ignore these events, but we handle them
// separately to skip the logging message in the `other` case.
}
other => debug!(other, "sse event"),
}
}

View File

@@ -1,3 +1,5 @@
use crate::config_types::ReasoningEffort as ReasoningEffortConfig;
use crate::config_types::ReasoningSummary as ReasoningSummaryConfig;
use crate::error::Result;
use crate::models::ResponseItem;
use futures::Stream;
@@ -52,25 +54,59 @@ pub enum ResponseEvent {
#[derive(Debug, Serialize)]
pub(crate) struct Reasoning {
pub(crate) effort: &'static str,
pub(crate) effort: OpenAiReasoningEffort,
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) summary: Option<Summary>,
pub(crate) summary: Option<OpenAiReasoningSummary>,
}
/// See https://platform.openai.com/docs/guides/reasoning?api-mode=responses#get-started-with-reasoning
#[derive(Debug, Serialize, Default, Clone, Copy)]
#[serde(rename_all = "lowercase")]
pub(crate) enum OpenAiReasoningEffort {
Low,
#[default]
Medium,
High,
}
impl From<ReasoningEffortConfig> for Option<OpenAiReasoningEffort> {
fn from(effort: ReasoningEffortConfig) -> Self {
match effort {
ReasoningEffortConfig::Low => Some(OpenAiReasoningEffort::Low),
ReasoningEffortConfig::Medium => Some(OpenAiReasoningEffort::Medium),
ReasoningEffortConfig::High => Some(OpenAiReasoningEffort::High),
ReasoningEffortConfig::None => None,
}
}
}
/// A summary of the reasoning performed by the model. This can be useful for
/// debugging and understanding the model's reasoning process.
#[derive(Debug, Serialize)]
/// See https://platform.openai.com/docs/guides/reasoning?api-mode=responses#reasoning-summaries
#[derive(Debug, Serialize, Default, Clone, Copy)]
#[serde(rename_all = "lowercase")]
pub(crate) enum Summary {
pub(crate) enum OpenAiReasoningSummary {
#[default]
Auto,
#[allow(dead_code)] // Will go away once this is configurable.
Concise,
#[allow(dead_code)] // Will go away once this is configurable.
Detailed,
}
impl From<ReasoningSummaryConfig> for Option<OpenAiReasoningSummary> {
fn from(summary: ReasoningSummaryConfig) -> Self {
match summary {
ReasoningSummaryConfig::Auto => Some(OpenAiReasoningSummary::Auto),
ReasoningSummaryConfig::Concise => Some(OpenAiReasoningSummary::Concise),
ReasoningSummaryConfig::Detailed => Some(OpenAiReasoningSummary::Detailed),
ReasoningSummaryConfig::None => None,
}
}
}
/// Request object that is serialized as JSON and POST'ed when using the
/// Responses API.
#[derive(Debug, Serialize)]
pub(crate) struct Payload<'a> {
pub(crate) struct ResponsesApiRequest<'a> {
pub(crate) model: &'a str,
pub(crate) instructions: &'a str,
// TODO(mbolin): ResponseItem::Other should not be serialized. Currently,
@@ -88,6 +124,40 @@ pub(crate) struct Payload<'a> {
pub(crate) stream: bool,
}
pub(crate) fn create_reasoning_param_for_request(
model: &str,
effort: ReasoningEffortConfig,
summary: ReasoningSummaryConfig,
) -> Option<Reasoning> {
let effort: Option<OpenAiReasoningEffort> = effort.into();
let effort = effort?;
if model_supports_reasoning_summaries(model) {
Some(Reasoning {
effort,
summary: summary.into(),
})
} else {
None
}
}
pub fn model_supports_reasoning_summaries(model: &str) -> bool {
// Currently, we hardcode this rule to decide whether enable reasoning.
// We expect reasoning to apply only to OpenAI models, but we do not want
// users to have to mess with their config to disable reasoning for models
// that do not support it, such as `gpt-4.1`.
//
// Though if a user is using Codex with non-OpenAI models that, say, happen
// to start with "o", then they can set `model_reasoning_effort = "none` in
// config.toml to disable reasoning.
//
// Ultimately, this should also be configurable in config.toml, but we
// need to have defaults that "just work." Perhaps we could have a
// "reasoning models pattern" as part of ModelProviderInfo?
model.starts_with("o") || model.starts_with("codex")
}
pub(crate) struct ResponseStream {
pub(crate) rx_event: mpsc::Receiver<Result<ResponseEvent>>,
}

View File

@@ -20,6 +20,7 @@ use codex_apply_patch::MaybeApplyPatchVerified;
use codex_apply_patch::maybe_parse_apply_patch_verified;
use codex_apply_patch::print_summary;
use futures::prelude::*;
use mcp_types::CallToolResult;
use serde::Serialize;
use serde_json;
use tokio::sync::Notify;
@@ -107,6 +108,8 @@ impl Codex {
let configure_session = Op::ConfigureSession {
provider: config.model_provider.clone(),
model: config.model.clone(),
model_reasoning_effort: config.model_reasoning_effort,
model_reasoning_summary: config.model_reasoning_summary,
instructions,
approval_policy: config.approval_policy,
sandbox_policy: config.sandbox_policy.clone(),
@@ -295,6 +298,17 @@ impl Session {
state.approved_commands.insert(cmd);
}
/// Records items to both the rollout and the chat completions/ZDR
/// transcript, if enabled.
async fn record_conversation_items(&self, items: &[ResponseItem]) {
debug!("Recording items for conversation: {items:?}");
self.record_rollout_items(items).await;
if let Some(transcript) = self.state.lock().unwrap().zdr_transcript.as_mut() {
transcript.record_items(items);
}
}
/// Append the given items to the session's rollout transcript (if enabled)
/// and persist them to disk.
async fn record_rollout_items(&self, items: &[ResponseItem]) {
@@ -388,7 +402,7 @@ impl Session {
tool: &str,
arguments: Option<serde_json::Value>,
timeout: Option<Duration>,
) -> anyhow::Result<mcp_types::CallToolResult> {
) -> anyhow::Result<CallToolResult> {
self.mcp_connection_manager
.call_tool(server, tool, arguments, timeout)
.await
@@ -542,6 +556,8 @@ async fn submission_loop(
Op::ConfigureSession {
provider,
model,
model_reasoning_effort,
model_reasoning_summary,
instructions,
approval_policy,
sandbox_policy,
@@ -563,7 +579,12 @@ async fn submission_loop(
return;
}
let client = ModelClient::new(model.clone(), provider.clone());
let client = ModelClient::new(
model.clone(),
provider.clone(),
model_reasoning_effort,
model_reasoning_summary,
);
// abort any current running session and clone its state
let retain_zdr_transcript =
@@ -760,6 +781,19 @@ async fn submission_loop(
debug!("Agent loop exited");
}
/// Takes a user message as input and runs a loop where, at each turn, the model
/// replies with either:
///
/// - requested function calls
/// - an assistant message
///
/// While it is possible for the model to return multiple of these items in a
/// single turn, in practice, we generally one item per turn:
///
/// - If the model requests a function call, we execute it and send the output
/// back to the model in the next turn.
/// - If the model sends only an assistant message, we record it in the
/// conversation history and consider the task complete.
async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
if input.is_empty() {
return;
@@ -772,10 +806,14 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
return;
}
let mut pending_response_input: Vec<ResponseInputItem> = vec![ResponseInputItem::from(input)];
let initial_input_for_turn = ResponseInputItem::from(input);
sess.record_conversation_items(&[initial_input_for_turn.clone().into()])
.await;
let mut input_for_next_turn: Vec<ResponseInputItem> = vec![initial_input_for_turn];
let last_agent_message: Option<String>;
loop {
let mut net_new_turn_input = pending_response_input
let mut net_new_turn_input = input_for_next_turn
.drain(..)
.map(ResponseItem::from)
.collect::<Vec<_>>();
@@ -783,11 +821,12 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
// Note that pending_input would be something like a message the user
// submitted through the UI while the model was running. Though the UI
// may support this, the model might not.
let pending_input = sess.get_pending_input().into_iter().map(ResponseItem::from);
net_new_turn_input.extend(pending_input);
// Persist only the net-new items of this turn to the rollout.
sess.record_rollout_items(&net_new_turn_input).await;
let pending_input = sess
.get_pending_input()
.into_iter()
.map(ResponseItem::from)
.collect::<Vec<ResponseItem>>();
sess.record_conversation_items(&pending_input).await;
// Construct the input that we will send to the model. When using the
// Chat completions API (or ZDR clients), the model needs the full
@@ -796,20 +835,24 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
// represents an append-only log without duplicates.
let turn_input: Vec<ResponseItem> =
if let Some(transcript) = sess.state.lock().unwrap().zdr_transcript.as_mut() {
// If we are using Chat/ZDR, we need to send the transcript with every turn.
// 1. Build up the conversation history for the next turn.
let full_transcript = [transcript.contents(), net_new_turn_input.clone()].concat();
// 2. Update the in-memory transcript so that future turns
// include these items as part of the history.
transcript.record_items(&net_new_turn_input);
// Note that `transcript.record_items()` does some filtering
// such that `full_transcript` may include items that were
// excluded from `transcript`.
full_transcript
// If we are using Chat/ZDR, we need to send the transcript with
// every turn. By induction, `transcript` already contains:
// - The `input` that kicked off this task.
// - Each `ResponseItem` that was recorded in the previous turn.
// - Each response to a `ResponseItem` (in practice, the only
// response type we seem to have is `FunctionCallOutput`).
//
// The only thing the `transcript` does not contain is the
// `pending_input` that was injected while the model was
// running. We need to add that to the conversation history
// so that the model can see it in the next turn.
[transcript.contents(), pending_input].concat()
} else {
// In practice, net_new_turn_input should contain only:
// - User messages
// - Outputs for function calls requested by the model
net_new_turn_input.extend(pending_input);
// Responses API path we can just send the new items and
// record the same.
net_new_turn_input
@@ -830,29 +873,86 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
.collect();
match run_turn(&sess, sub_id.clone(), turn_input).await {
Ok(turn_output) => {
let (items, responses): (Vec<_>, Vec<_>) = turn_output
.into_iter()
.map(|p| (p.item, p.response))
.unzip();
let responses = responses
.into_iter()
.flatten()
.collect::<Vec<ResponseInputItem>>();
let mut items_to_record_in_conversation_history = Vec::<ResponseItem>::new();
let mut responses = Vec::<ResponseInputItem>::new();
for processed_response_item in turn_output {
let ProcessedResponseItem { item, response } = processed_response_item;
match (&item, &response) {
(ResponseItem::Message { role, .. }, None) if role == "assistant" => {
// If the model returned a message, we need to record it.
items_to_record_in_conversation_history.push(item);
}
(
ResponseItem::LocalShellCall { .. },
Some(ResponseInputItem::FunctionCallOutput { call_id, output }),
) => {
items_to_record_in_conversation_history.push(item);
items_to_record_in_conversation_history.push(
ResponseItem::FunctionCallOutput {
call_id: call_id.clone(),
output: output.clone(),
},
);
}
(
ResponseItem::FunctionCall { .. },
Some(ResponseInputItem::FunctionCallOutput { call_id, output }),
) => {
items_to_record_in_conversation_history.push(item);
items_to_record_in_conversation_history.push(
ResponseItem::FunctionCallOutput {
call_id: call_id.clone(),
output: output.clone(),
},
);
}
(
ResponseItem::FunctionCall { .. },
Some(ResponseInputItem::McpToolCallOutput { call_id, result }),
) => {
items_to_record_in_conversation_history.push(item);
let (content, success): (String, Option<bool>) = match result {
Ok(CallToolResult { content, is_error }) => {
match serde_json::to_string(content) {
Ok(content) => (content, *is_error),
Err(e) => {
warn!("Failed to serialize MCP tool call output: {e}");
(e.to_string(), Some(true))
}
}
}
Err(e) => (e.clone(), Some(true)),
};
items_to_record_in_conversation_history.push(
ResponseItem::FunctionCallOutput {
call_id: call_id.clone(),
output: FunctionCallOutputPayload { content, success },
},
);
}
(ResponseItem::Reasoning { .. }, None) => {
// Omit from conversation history.
}
_ => {
warn!("Unexpected response item: {item:?} with response: {response:?}");
}
};
if let Some(response) = response {
responses.push(response);
}
}
// Only attempt to take the lock if there is something to record.
if !items.is_empty() {
// First persist model-generated output to the rollout file this only borrows.
sess.record_rollout_items(&items).await;
// For ZDR we also need to keep a transcript clone.
if let Some(transcript) = sess.state.lock().unwrap().zdr_transcript.as_mut() {
transcript.record_items(&items);
}
if !items_to_record_in_conversation_history.is_empty() {
sess.record_conversation_items(&items_to_record_in_conversation_history)
.await;
}
if responses.is_empty() {
debug!("Turn completed");
last_agent_message = get_last_assistant_message_from_turn(&items);
last_agent_message = get_last_assistant_message_from_turn(
&items_to_record_in_conversation_history,
);
sess.maybe_notify(UserNotification::AgentTurnComplete {
turn_id: sub_id.clone(),
input_messages: turn_input_messages,
@@ -861,7 +961,7 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
break;
}
pending_response_input = responses;
input_for_next_turn = responses;
}
Err(e) => {
info!("Turn error: {e:#}");

View File

@@ -1,6 +1,8 @@
use crate::config_profile::ConfigProfile;
use crate::config_types::History;
use crate::config_types::McpServerConfig;
use crate::config_types::ReasoningEffort;
use crate::config_types::ReasoningSummary;
use crate::config_types::ShellEnvironmentPolicy;
use crate::config_types::ShellEnvironmentPolicyToml;
use crate::config_types::Tui;
@@ -112,6 +114,14 @@ pub struct Config {
///
/// When this program is invoked, arg0 will be set to `codex-linux-sandbox`.
pub codex_linux_sandbox_exe: Option<PathBuf>,
/// If not "none", the value to use for `reasoning.effort` when making a
/// request using the Responses API.
pub model_reasoning_effort: ReasoningEffort,
/// If not "none", the value to use for `reasoning.summary` when making a
/// request using the Responses API.
pub model_reasoning_summary: ReasoningSummary,
}
impl Config {
@@ -281,6 +291,9 @@ pub struct ConfigToml {
/// When set to `true`, `AgentReasoning` events will be hidden from the
/// UI/output. Defaults to `false`.
pub hide_agent_reasoning: Option<bool>,
pub model_reasoning_effort: Option<ReasoningEffort>,
pub model_reasoning_summary: Option<ReasoningSummary>,
}
fn deserialize_sandbox_permissions<'de, D>(
@@ -444,6 +457,8 @@ impl Config {
codex_linux_sandbox_exe,
hide_agent_reasoning: cfg.hide_agent_reasoning.unwrap_or(false),
model_reasoning_effort: cfg.model_reasoning_effort.unwrap_or_default(),
model_reasoning_summary: cfg.model_reasoning_summary.unwrap_or_default(),
};
Ok(config)
}
@@ -786,6 +801,8 @@ disable_response_storage = true
tui: Tui::default(),
codex_linux_sandbox_exe: None,
hide_agent_reasoning: false,
model_reasoning_effort: ReasoningEffort::default(),
model_reasoning_summary: ReasoningSummary::default(),
},
o3_profile_config
);
@@ -826,6 +843,8 @@ disable_response_storage = true
tui: Tui::default(),
codex_linux_sandbox_exe: None,
hide_agent_reasoning: false,
model_reasoning_effort: ReasoningEffort::default(),
model_reasoning_summary: ReasoningSummary::default(),
};
assert_eq!(expected_gpt3_profile_config, gpt3_profile_config);
@@ -881,6 +900,8 @@ disable_response_storage = true
tui: Tui::default(),
codex_linux_sandbox_exe: None,
hide_agent_reasoning: false,
model_reasoning_effort: ReasoningEffort::default(),
model_reasoning_summary: ReasoningSummary::default(),
};
assert_eq!(expected_zdr_profile_config, zdr_profile_config);

View File

@@ -4,9 +4,11 @@
// definitions that do not contain business logic.
use std::collections::HashMap;
use strum_macros::Display;
use wildmatch::WildMatchPattern;
use serde::Deserialize;
use serde::Serialize;
#[derive(Deserialize, Debug, Clone, PartialEq)]
pub struct McpServerConfig {
@@ -175,3 +177,31 @@ impl From<ShellEnvironmentPolicyToml> for ShellEnvironmentPolicy {
}
}
}
/// See https://platform.openai.com/docs/guides/reasoning?api-mode=responses#get-started-with-reasoning
#[derive(Debug, Serialize, Deserialize, Default, Clone, Copy, PartialEq, Eq, Display)]
#[serde(rename_all = "lowercase")]
#[strum(serialize_all = "lowercase")]
pub enum ReasoningEffort {
Low,
#[default]
Medium,
High,
/// Option to disable reasoning.
None,
}
/// A summary of the reasoning performed by the model. This can be useful for
/// debugging and understanding the model's reasoning process.
/// See https://platform.openai.com/docs/guides/reasoning?api-mode=responses#reasoning-summaries
#[derive(Debug, Serialize, Deserialize, Default, Clone, Copy, PartialEq, Eq, Display)]
#[serde(rename_all = "lowercase")]
#[strum(serialize_all = "lowercase")]
pub enum ReasoningSummary {
#[default]
Auto,
Concise,
Detailed,
/// Option to disable reasoning summaries.
None,
}

View File

@@ -34,3 +34,5 @@ mod rollout;
mod safety;
mod user_notification;
pub mod util;
pub use client_common::model_supports_reasoning_summaries;

View File

@@ -93,7 +93,6 @@ pub(crate) fn create_tools_json_for_responses_api(
.map(|(name, tool)| mcp_tool_to_openai_tool(name, tool)),
);
tracing::debug!("tools_json: {}", serde_json::to_string_pretty(&tools_json)?);
Ok(tools_json)
}

View File

@@ -12,6 +12,8 @@ use serde::Deserialize;
use serde::Serialize;
use uuid::Uuid;
use crate::config_types::ReasoningEffort as ReasoningEffortConfig;
use crate::config_types::ReasoningSummary as ReasoningSummaryConfig;
use crate::message_history::HistoryEntry;
use crate::model_provider_info::ModelProviderInfo;
@@ -37,6 +39,10 @@ pub enum Op {
/// If not specified, server will use its default model.
model: String,
model_reasoning_effort: ReasoningEffortConfig,
model_reasoning_summary: ReasoningSummaryConfig,
/// Model instructions
instructions: Option<String>,
/// When to escalate for approval for execution

View File

@@ -1,5 +1,7 @@
use codex_common::elapsed::format_elapsed;
use codex_core::WireApi;
use codex_core::config::Config;
use codex_core::model_supports_reasoning_summaries;
use codex_core::protocol::AgentMessageEvent;
use codex_core::protocol::BackgroundEventEvent;
use codex_core::protocol::ErrorEvent;
@@ -127,16 +129,28 @@ impl EventProcessor {
VERSION
);
let entries = vec![
let mut entries = vec![
("workdir", config.cwd.display().to_string()),
("model", config.model.clone()),
("provider", config.model_provider_id.clone()),
("approval", format!("{:?}", config.approval_policy)),
("sandbox", format!("{:?}", config.sandbox_policy)),
];
if config.model_provider.wire_api == WireApi::Responses
&& model_supports_reasoning_summaries(&config.model)
{
entries.push((
"reasoning effort",
config.model_reasoning_effort.to_string(),
));
entries.push((
"reasoning summaries",
config.model_reasoning_summary.to_string(),
));
}
for (key, value) in entries {
println!("{} {}", format!("{key}: ").style(self.bold), value);
println!("{} {}", format!("{key}:").style(self.bold), value);
}
println!("--------");

View File

@@ -24,7 +24,7 @@ env_logger = "0.11.5"
log = "0.4"
multimap = "0.10.0"
path-absolutize = "3.1.1"
regex = "1.11.1"
regex-lite = "0.1"
serde = { version = "1.0.194", features = ["derive"] }
serde_json = "1.0.110"
serde_with = { version = "3", features = ["macros"] }

View File

@@ -1,6 +1,6 @@
use multimap::MultiMap;
use regex::Error as RegexError;
use regex::Regex;
use regex_lite::Error as RegexError;
use regex_lite::Regex;
use crate::ExecCall;
use crate::Forbidden;
@@ -29,7 +29,7 @@ impl Policy {
} else {
let escaped_substrings = forbidden_substrings
.iter()
.map(|s| regex::escape(s))
.map(|s| regex_lite::escape(s))
.collect::<Vec<_>>()
.join("|");
Some(Regex::new(&format!("({escaped_substrings})"))?)

View File

@@ -7,7 +7,7 @@ use crate::arg_matcher::ArgMatcher;
use crate::opt::OptMeta;
use log::info;
use multimap::MultiMap;
use regex::Regex;
use regex_lite::Regex;
use starlark::any::ProvidesStaticType;
use starlark::environment::GlobalsBuilder;
use starlark::environment::LibraryExtension;
@@ -73,7 +73,7 @@ impl PolicyParser {
#[derive(Debug)]
pub struct ForbiddenProgramRegex {
pub regex: regex::Regex,
pub regex: regex_lite::Regex,
pub reason: String,
}
@@ -93,7 +93,7 @@ impl PolicyBuilder {
}
}
fn build(self) -> Result<Policy, regex::Error> {
fn build(self) -> Result<Policy, regex_lite::Error> {
let programs = self.programs.into_inner();
let forbidden_program_regexes = self.forbidden_program_regexes.into_inner();
let forbidden_substrings = self.forbidden_substrings.into_inner();
@@ -207,7 +207,7 @@ fn policy_builtins(builder: &mut GlobalsBuilder) {
.unwrap()
.downcast_ref::<PolicyBuilder>()
.unwrap();
let compiled_regex = regex::Regex::new(&regex)?;
let compiled_regex = regex_lite::Regex::new(&regex)?;
policy_builder.add_forbidden_program_regex(compiled_regex, reason);
Ok(NoneType)
}

View File

@@ -33,7 +33,7 @@ ratatui = { version = "0.29.0", features = [
"unstable-rendered-line-info",
] }
ratatui-image = "8.0.0"
regex = "1"
regex-lite = "0.1"
serde_json = "1"
shlex = "1.3.0"
strum = "0.27.1"

View File

@@ -1,6 +1,6 @@
#![allow(clippy::expect_used)]
use regex::Regex;
use regex_lite::Regex;
// This is defined in its own file so we can limit the scope of
// `allow(clippy::expect_used)` because we cannot scope it to the `lazy_static!`

View File

@@ -5,7 +5,9 @@ use crate::text_block::TextBlock;
use base64::Engine;
use codex_ansi_escape::ansi_escape_line;
use codex_common::elapsed::format_duration;
use codex_core::WireApi;
use codex_core::config::Config;
use codex_core::model_supports_reasoning_summaries;
use codex_core::protocol::FileChange;
use codex_core::protocol::SessionConfiguredEvent;
use image::DynamicImage;
@@ -147,13 +149,25 @@ impl HistoryCell {
]),
];
let entries = vec![
let mut entries = vec![
("workdir", config.cwd.display().to_string()),
("model", config.model.clone()),
("provider", config.model_provider_id.clone()),
("approval", format!("{:?}", config.approval_policy)),
("sandbox", format!("{:?}", config.sandbox_policy)),
];
if config.model_provider.wire_api == WireApi::Responses
&& model_supports_reasoning_summaries(&config.model)
{
entries.push((
"reasoning effort",
config.model_reasoning_effort.to_string(),
));
entries.push((
"reasoning summaries",
config.model_reasoning_summary.to_string(),
));
}
for (key, value) in entries {
lines.push(Line::from(vec![format!("{key}: ").bold(), value.into()]));
}

View File

@@ -71,7 +71,7 @@ fn rewrite_file_citations<'a>(
None => return Cow::Borrowed(src),
};
CITATION_REGEX.replace_all(src, |caps: &regex::Captures<'_>| {
CITATION_REGEX.replace_all(src, |caps: &regex_lite::Captures<'_>| {
let file = &caps[1];
let start_line = &caps[2];