diff --git a/announcement_tip.toml b/announcement_tip.toml index e9bd9ef19f..0726fe237a 100644 --- a/announcement_tip.toml +++ b/announcement_tip.toml @@ -17,7 +17,7 @@ version_regex = "^0\\.0\\.0$" to_date = "2026-05-10" [[announcements]] -content = "**BREAKING NEWS**: `gpt-5.3-codex` is out! Upgrade to `0.98.0` to try it." +content = "**BREAKING NEWS**: `gpt-5.3-codex` is out! Upgrade to `0.98.0` for a faster, smarter, more steerable agent." from_date = "2026-02-01" to_date = "2026-02-15" version_regex = "^0\\.(?:[0-9]|[1-8][0-9]|9[0-7])\\." diff --git a/codex-rs/app-server-protocol/schema/json/ClientRequest.json b/codex-rs/app-server-protocol/schema/json/ClientRequest.json index c6711fcdf8..eaa83da8d6 100644 --- a/codex-rs/app-server-protocol/schema/json/ClientRequest.json +++ b/codex-rs/app-server-protocol/schema/json/ClientRequest.json @@ -2821,6 +2821,29 @@ ], "type": "object" }, + "TurnSteerParams": { + "properties": { + "expectedTurnId": { + "description": "Required active turn id precondition. The request fails when it does not match the currently active turn.", + "type": "string" + }, + "input": { + "items": { + "$ref": "#/definitions/UserInput" + }, + "type": "array" + }, + "threadId": { + "type": "string" + } + }, + "required": [ + "expectedTurnId", + "input", + "threadId" + ], + "type": "object" + }, "UserInput": { "oneOf": [ { @@ -3546,6 +3569,30 @@ "title": "Turn/startRequest", "type": "object" }, + { + "properties": { + "id": { + "$ref": "#/definitions/RequestId" + }, + "method": { + "enum": [ + "turn/steer" + ], + "title": "Turn/steerRequestMethod", + "type": "string" + }, + "params": { + "$ref": "#/definitions/TurnSteerParams" + } + }, + "required": [ + "id", + "method", + "params" + ], + "title": "Turn/steerRequest", + "type": "object" + }, { "properties": { "id": { diff --git a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json index 0e20aa9ab6..447ad9a65e 100644 --- a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json +++ b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json @@ -886,6 +886,30 @@ "title": "Turn/startRequest", "type": "object" }, + { + "properties": { + "id": { + "$ref": "#/definitions/RequestId" + }, + "method": { + "enum": [ + "turn/steer" + ], + "title": "Turn/steerRequestMethod", + "type": "string" + }, + "params": { + "$ref": "#/definitions/v2/TurnSteerParams" + } + }, + "required": [ + "id", + "method", + "params" + ], + "title": "Turn/steerRequest", + "type": "object" + }, { "properties": { "id": { @@ -11255,37 +11279,52 @@ "ExperimentalFeature": { "properties": { "announcement": { - "description": "Announcement copy shown to users when the feature is introduced.", - "type": "string" + "description": "Announcement copy shown to users when the feature is introduced. Null when this feature is not in beta.", + "type": [ + "string", + "null" + ] }, "defaultEnabled": { "description": "Whether this feature is enabled by default.", "type": "boolean" }, "description": { - "description": "Short summary describing what the feature does.", - "type": "string" + "description": "Short summary describing what the feature does. Null when this feature is not in beta.", + "type": [ + "string", + "null" + ] }, "displayName": { - "description": "User-facing display name shown in the experimental features UI.", - "type": "string" + "description": "User-facing display name shown in the experimental features UI. Null when this feature is not in beta.", + "type": [ + "string", + "null" + ] }, "enabled": { "description": "Whether this feature is currently enabled in the loaded config.", "type": "boolean" }, - "flagName": { + "name": { "description": "Stable key used in config.toml and CLI flag toggles.", "type": "string" + }, + "stage": { + "allOf": [ + { + "$ref": "#/definitions/v2/ExperimentalFeatureStage" + } + ], + "description": "Lifecycle stage of this feature flag." } }, "required": [ - "announcement", "defaultEnabled", - "description", - "displayName", "enabled", - "flagName" + "name", + "stage" ], "type": "object" }, @@ -11335,6 +11374,45 @@ "title": "ExperimentalFeatureListResponse", "type": "object" }, + "ExperimentalFeatureStage": { + "oneOf": [ + { + "description": "Feature is available for user testing and feedback.", + "enum": [ + "beta" + ], + "type": "string" + }, + { + "description": "Feature is still being built and not ready for broad use.", + "enum": [ + "underDevelopment" + ], + "type": "string" + }, + { + "description": "Feature is production-ready.", + "enum": [ + "stable" + ], + "type": "string" + }, + { + "description": "Feature is deprecated and should be avoided.", + "enum": [ + "deprecated" + ], + "type": "string" + }, + { + "description": "Feature flag is retained only for backwards compatibility.", + "enum": [ + "removed" + ], + "type": "string" + } + ] + }, "FeedbackUploadParams": { "$schema": "http://json-schema.org/draft-07/schema#", "properties": { @@ -15717,6 +15795,44 @@ ], "type": "string" }, + "TurnSteerParams": { + "$schema": "http://json-schema.org/draft-07/schema#", + "properties": { + "expectedTurnId": { + "description": "Required active turn id precondition. The request fails when it does not match the currently active turn.", + "type": "string" + }, + "input": { + "items": { + "$ref": "#/definitions/v2/UserInput" + }, + "type": "array" + }, + "threadId": { + "type": "string" + } + }, + "required": [ + "expectedTurnId", + "input", + "threadId" + ], + "title": "TurnSteerParams", + "type": "object" + }, + "TurnSteerResponse": { + "$schema": "http://json-schema.org/draft-07/schema#", + "properties": { + "turnId": { + "type": "string" + } + }, + "required": [ + "turnId" + ], + "title": "TurnSteerResponse", + "type": "object" + }, "UserInput": { "oneOf": [ { diff --git a/codex-rs/app-server-protocol/schema/json/v2/ExperimentalFeatureListResponse.json b/codex-rs/app-server-protocol/schema/json/v2/ExperimentalFeatureListResponse.json index b9e36760bc..25398fc0ea 100644 --- a/codex-rs/app-server-protocol/schema/json/v2/ExperimentalFeatureListResponse.json +++ b/codex-rs/app-server-protocol/schema/json/v2/ExperimentalFeatureListResponse.json @@ -4,39 +4,93 @@ "ExperimentalFeature": { "properties": { "announcement": { - "description": "Announcement copy shown to users when the feature is introduced.", - "type": "string" + "description": "Announcement copy shown to users when the feature is introduced. Null when this feature is not in beta.", + "type": [ + "string", + "null" + ] }, "defaultEnabled": { "description": "Whether this feature is enabled by default.", "type": "boolean" }, "description": { - "description": "Short summary describing what the feature does.", - "type": "string" + "description": "Short summary describing what the feature does. Null when this feature is not in beta.", + "type": [ + "string", + "null" + ] }, "displayName": { - "description": "User-facing display name shown in the experimental features UI.", - "type": "string" + "description": "User-facing display name shown in the experimental features UI. Null when this feature is not in beta.", + "type": [ + "string", + "null" + ] }, "enabled": { "description": "Whether this feature is currently enabled in the loaded config.", "type": "boolean" }, - "flagName": { + "name": { "description": "Stable key used in config.toml and CLI flag toggles.", "type": "string" + }, + "stage": { + "allOf": [ + { + "$ref": "#/definitions/ExperimentalFeatureStage" + } + ], + "description": "Lifecycle stage of this feature flag." } }, "required": [ - "announcement", "defaultEnabled", - "description", - "displayName", "enabled", - "flagName" + "name", + "stage" ], "type": "object" + }, + "ExperimentalFeatureStage": { + "oneOf": [ + { + "description": "Feature is available for user testing and feedback.", + "enum": [ + "beta" + ], + "type": "string" + }, + { + "description": "Feature is still being built and not ready for broad use.", + "enum": [ + "underDevelopment" + ], + "type": "string" + }, + { + "description": "Feature is production-ready.", + "enum": [ + "stable" + ], + "type": "string" + }, + { + "description": "Feature is deprecated and should be avoided.", + "enum": [ + "deprecated" + ], + "type": "string" + }, + { + "description": "Feature flag is retained only for backwards compatibility.", + "enum": [ + "removed" + ], + "type": "string" + } + ] } }, "properties": { diff --git a/codex-rs/app-server-protocol/schema/json/v2/TurnSteerParams.json b/codex-rs/app-server-protocol/schema/json/v2/TurnSteerParams.json new file mode 100644 index 0000000000..a064d9e7e3 --- /dev/null +++ b/codex-rs/app-server-protocol/schema/json/v2/TurnSteerParams.json @@ -0,0 +1,189 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "definitions": { + "ByteRange": { + "properties": { + "end": { + "format": "uint", + "minimum": 0.0, + "type": "integer" + }, + "start": { + "format": "uint", + "minimum": 0.0, + "type": "integer" + } + }, + "required": [ + "end", + "start" + ], + "type": "object" + }, + "TextElement": { + "properties": { + "byteRange": { + "allOf": [ + { + "$ref": "#/definitions/ByteRange" + } + ], + "description": "Byte range in the parent `text` buffer that this element occupies." + }, + "placeholder": { + "description": "Optional human-readable placeholder for the element, displayed in the UI.", + "type": [ + "string", + "null" + ] + } + }, + "required": [ + "byteRange" + ], + "type": "object" + }, + "UserInput": { + "oneOf": [ + { + "properties": { + "text": { + "type": "string" + }, + "text_elements": { + "default": [], + "description": "UI-defined spans within `text` used to render or persist special elements.", + "items": { + "$ref": "#/definitions/TextElement" + }, + "type": "array" + }, + "type": { + "enum": [ + "text" + ], + "title": "TextUserInputType", + "type": "string" + } + }, + "required": [ + "text", + "type" + ], + "title": "TextUserInput", + "type": "object" + }, + { + "properties": { + "type": { + "enum": [ + "image" + ], + "title": "ImageUserInputType", + "type": "string" + }, + "url": { + "type": "string" + } + }, + "required": [ + "type", + "url" + ], + "title": "ImageUserInput", + "type": "object" + }, + { + "properties": { + "path": { + "type": "string" + }, + "type": { + "enum": [ + "localImage" + ], + "title": "LocalImageUserInputType", + "type": "string" + } + }, + "required": [ + "path", + "type" + ], + "title": "LocalImageUserInput", + "type": "object" + }, + { + "properties": { + "name": { + "type": "string" + }, + "path": { + "type": "string" + }, + "type": { + "enum": [ + "skill" + ], + "title": "SkillUserInputType", + "type": "string" + } + }, + "required": [ + "name", + "path", + "type" + ], + "title": "SkillUserInput", + "type": "object" + }, + { + "properties": { + "name": { + "type": "string" + }, + "path": { + "type": "string" + }, + "type": { + "enum": [ + "mention" + ], + "title": "MentionUserInputType", + "type": "string" + } + }, + "required": [ + "name", + "path", + "type" + ], + "title": "MentionUserInput", + "type": "object" + } + ] + } + }, + "properties": { + "expectedTurnId": { + "description": "Required active turn id precondition. The request fails when it does not match the currently active turn.", + "type": "string" + }, + "input": { + "items": { + "$ref": "#/definitions/UserInput" + }, + "type": "array" + }, + "threadId": { + "type": "string" + } + }, + "required": [ + "expectedTurnId", + "input", + "threadId" + ], + "title": "TurnSteerParams", + "type": "object" +} \ No newline at end of file diff --git a/codex-rs/app-server-protocol/schema/json/v2/TurnSteerResponse.json b/codex-rs/app-server-protocol/schema/json/v2/TurnSteerResponse.json new file mode 100644 index 0000000000..d801a3613c --- /dev/null +++ b/codex-rs/app-server-protocol/schema/json/v2/TurnSteerResponse.json @@ -0,0 +1,13 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "properties": { + "turnId": { + "type": "string" + } + }, + "required": [ + "turnId" + ], + "title": "TurnSteerResponse", + "type": "object" +} \ No newline at end of file diff --git a/codex-rs/app-server-protocol/schema/typescript/ClientRequest.ts b/codex-rs/app-server-protocol/schema/typescript/ClientRequest.ts index 4322623f8e..964de10156 100644 --- a/codex-rs/app-server-protocol/schema/typescript/ClientRequest.ts +++ b/codex-rs/app-server-protocol/schema/typescript/ClientRequest.ts @@ -53,6 +53,7 @@ import type { ThreadStartParams } from "./v2/ThreadStartParams"; import type { ThreadUnarchiveParams } from "./v2/ThreadUnarchiveParams"; import type { TurnInterruptParams } from "./v2/TurnInterruptParams"; import type { TurnStartParams } from "./v2/TurnStartParams"; +import type { TurnSteerParams } from "./v2/TurnSteerParams"; /** * Request from the client to the server. diff --git a/codex-rs/app-server-protocol/schema/typescript/v2/ExperimentalFeature.ts b/codex-rs/app-server-protocol/schema/typescript/v2/ExperimentalFeature.ts index 6275148780..e17ef83138 100644 --- a/codex-rs/app-server-protocol/schema/typescript/v2/ExperimentalFeature.ts +++ b/codex-rs/app-server-protocol/schema/typescript/v2/ExperimentalFeature.ts @@ -1,24 +1,32 @@ // GENERATED CODE! DO NOT MODIFY BY HAND! // This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. +import type { ExperimentalFeatureStage } from "./ExperimentalFeatureStage"; export type ExperimentalFeature = { /** * Stable key used in config.toml and CLI flag toggles. */ -flagName: string, +name: string, +/** + * Lifecycle stage of this feature flag. + */ +stage: ExperimentalFeatureStage, /** * User-facing display name shown in the experimental features UI. + * Null when this feature is not in beta. */ -displayName: string, +displayName: string | null, /** * Short summary describing what the feature does. + * Null when this feature is not in beta. */ -description: string, +description: string | null, /** * Announcement copy shown to users when the feature is introduced. + * Null when this feature is not in beta. */ -announcement: string, +announcement: string | null, /** * Whether this feature is currently enabled in the loaded config. */ diff --git a/codex-rs/app-server-protocol/schema/typescript/v2/ExperimentalFeatureStage.ts b/codex-rs/app-server-protocol/schema/typescript/v2/ExperimentalFeatureStage.ts new file mode 100644 index 0000000000..dbd206e05f --- /dev/null +++ b/codex-rs/app-server-protocol/schema/typescript/v2/ExperimentalFeatureStage.ts @@ -0,0 +1,5 @@ +// GENERATED CODE! DO NOT MODIFY BY HAND! + +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. + +export type ExperimentalFeatureStage = "beta" | "underDevelopment" | "stable" | "deprecated" | "removed"; diff --git a/codex-rs/app-server-protocol/schema/typescript/v2/TurnSteerParams.ts b/codex-rs/app-server-protocol/schema/typescript/v2/TurnSteerParams.ts new file mode 100644 index 0000000000..2c84f195cf --- /dev/null +++ b/codex-rs/app-server-protocol/schema/typescript/v2/TurnSteerParams.ts @@ -0,0 +1,11 @@ +// GENERATED CODE! DO NOT MODIFY BY HAND! + +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. +import type { UserInput } from "./UserInput"; + +export type TurnSteerParams = { threadId: string, input: Array, +/** + * Required active turn id precondition. The request fails when it does not + * match the currently active turn. + */ +expectedTurnId: string, }; diff --git a/codex-rs/app-server-protocol/schema/typescript/v2/TurnSteerResponse.ts b/codex-rs/app-server-protocol/schema/typescript/v2/TurnSteerResponse.ts new file mode 100644 index 0000000000..390adb4f59 --- /dev/null +++ b/codex-rs/app-server-protocol/schema/typescript/v2/TurnSteerResponse.ts @@ -0,0 +1,5 @@ +// GENERATED CODE! DO NOT MODIFY BY HAND! + +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. + +export type TurnSteerResponse = { turnId: string, }; diff --git a/codex-rs/app-server-protocol/schema/typescript/v2/index.ts b/codex-rs/app-server-protocol/schema/typescript/v2/index.ts index 787eef34fc..deba0dd5c3 100644 --- a/codex-rs/app-server-protocol/schema/typescript/v2/index.ts +++ b/codex-rs/app-server-protocol/schema/typescript/v2/index.ts @@ -55,6 +55,7 @@ export type { ExecPolicyAmendment } from "./ExecPolicyAmendment"; export type { ExperimentalFeature } from "./ExperimentalFeature"; export type { ExperimentalFeatureListParams } from "./ExperimentalFeatureListParams"; export type { ExperimentalFeatureListResponse } from "./ExperimentalFeatureListResponse"; +export type { ExperimentalFeatureStage } from "./ExperimentalFeatureStage"; export type { FeedbackUploadParams } from "./FeedbackUploadParams"; export type { FeedbackUploadResponse } from "./FeedbackUploadResponse"; export type { FileChangeApprovalDecision } from "./FileChangeApprovalDecision"; @@ -181,6 +182,8 @@ export type { TurnStartParams } from "./TurnStartParams"; export type { TurnStartResponse } from "./TurnStartResponse"; export type { TurnStartedNotification } from "./TurnStartedNotification"; export type { TurnStatus } from "./TurnStatus"; +export type { TurnSteerParams } from "./TurnSteerParams"; +export type { TurnSteerResponse } from "./TurnSteerResponse"; export type { UserInput } from "./UserInput"; export type { WebSearchAction } from "./WebSearchAction"; export type { WindowsWorldWritableWarningNotification } from "./WindowsWorldWritableWarningNotification"; diff --git a/codex-rs/app-server-protocol/src/protocol/common.rs b/codex-rs/app-server-protocol/src/protocol/common.rs index e5c45672a6..3e13bdd0e3 100644 --- a/codex-rs/app-server-protocol/src/protocol/common.rs +++ b/codex-rs/app-server-protocol/src/protocol/common.rs @@ -256,6 +256,10 @@ client_request_definitions! { params: v2::TurnStartParams, response: v2::TurnStartResponse, }, + TurnSteer => "turn/steer" { + params: v2::TurnSteerParams, + response: v2::TurnSteerResponse, + }, TurnInterrupt => "turn/interrupt" { params: v2::TurnInterruptParams, response: v2::TurnInterruptResponse, diff --git a/codex-rs/app-server-protocol/src/protocol/v2.rs b/codex-rs/app-server-protocol/src/protocol/v2.rs index 883ba57d48..ed6161cfea 100644 --- a/codex-rs/app-server-protocol/src/protocol/v2.rs +++ b/codex-rs/app-server-protocol/src/protocol/v2.rs @@ -1055,18 +1055,39 @@ pub struct ExperimentalFeatureListParams { pub limit: Option, } +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, JsonSchema, TS)] +#[serde(rename_all = "camelCase")] +#[ts(export_to = "v2/")] +pub enum ExperimentalFeatureStage { + /// Feature is available for user testing and feedback. + Beta, + /// Feature is still being built and not ready for broad use. + UnderDevelopment, + /// Feature is production-ready. + Stable, + /// Feature is deprecated and should be avoided. + Deprecated, + /// Feature flag is retained only for backwards compatibility. + Removed, +} + #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, JsonSchema, TS)] #[serde(rename_all = "camelCase")] #[ts(export_to = "v2/")] pub struct ExperimentalFeature { /// Stable key used in config.toml and CLI flag toggles. - pub flag_name: String, + pub name: String, + /// Lifecycle stage of this feature flag. + pub stage: ExperimentalFeatureStage, /// User-facing display name shown in the experimental features UI. - pub display_name: String, + /// Null when this feature is not in beta. + pub display_name: Option, /// Short summary describing what the feature does. - pub description: String, + /// Null when this feature is not in beta. + pub description: Option, /// Announcement copy shown to users when the feature is introduced. - pub announcement: String, + /// Null when this feature is not in beta. + pub announcement: Option, /// Whether this feature is currently enabled in the loaded config. pub enabled: bool, /// Whether this feature is enabled by default. @@ -2083,6 +2104,24 @@ pub struct TurnStartResponse { pub turn: Turn, } +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq, JsonSchema, TS)] +#[serde(rename_all = "camelCase")] +#[ts(export_to = "v2/")] +pub struct TurnSteerParams { + pub thread_id: String, + pub input: Vec, + /// Required active turn id precondition. The request fails when it does not + /// match the currently active turn. + pub expected_turn_id: String, +} + +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] +#[serde(rename_all = "camelCase")] +#[ts(export_to = "v2/")] +pub struct TurnSteerResponse { + pub turn_id: String, +} + #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] #[serde(rename_all = "camelCase")] #[ts(export_to = "v2/")] diff --git a/codex-rs/app-server/README.md b/codex-rs/app-server/README.md index 41944bfe03..ad4d32c9d4 100644 --- a/codex-rs/app-server/README.md +++ b/codex-rs/app-server/README.md @@ -96,11 +96,12 @@ Example (from OpenAI's official VSCode extension): - `thread/backgroundTerminals/clean` — terminate all running background terminals for a thread; returns `{}` when the cleanup request is accepted. - `thread/rollback` — drop the last N turns from the agent’s in-memory context and persist a rollback marker in the rollout so future resumes see the pruned history; returns the updated `thread` (with `turns` populated) on success. - `turn/start` — add user input to a thread and begin Codex generation; responds with the initial `turn` object and streams `turn/started`, `item/*`, and `turn/completed` notifications. +- `turn/steer` — add user input to an already in-flight turn without starting a new turn; returns the active `turnId` that accepted the input. - `turn/interrupt` — request cancellation of an in-flight turn by `(thread_id, turn_id)`; success is an empty `{}` response and the turn finishes with `status: "interrupted"`. - `review/start` — kick off Codex’s automated reviewer for a thread; responds like `turn/start` and emits `item/started`/`item/completed` notifications with `enteredReviewMode` and `exitedReviewMode` items, plus a final assistant `agentMessage` containing the review. - `command/exec` — run a single command under the server sandbox without starting a thread/turn (handy for utilities and validation). - `model/list` — list available models (with reasoning effort options and optional `upgrade` model ids). -- `experimentalFeature/list` — list experimental feature flags with metadata (flag name, display name, description, announcement, enabled/default-enabled) and cursor pagination. +- `experimentalFeature/list` — list feature flags with stage metadata (`beta`, `underDevelopment`, `stable`, etc.), enabled/default-enabled state, and cursor pagination. For non-beta flags, `displayName`/`description`/`announcement` are `null`. - `collaborationMode/list` — list available collaboration mode presets (experimental, no pagination). - `skills/list` — list skills for one or more `cwd` values (optional `forceReload`). - `skills/remote/read` — list public remote skills (**under development; do not call from production clients yet**). @@ -375,6 +376,22 @@ Use `thread/backgroundTerminals/clean` to terminate all running background termi { "id": 35, "result": {} } ``` +### Example: Steer an active turn + +Use `turn/steer` to append additional user input to the currently active turn. This does not emit +`turn/started` and does not accept turn context overrides. + +```json +{ "method": "turn/steer", "id": 32, "params": { + "threadId": "thr_123", + "input": [ { "type": "text", "text": "Actually focus on failing tests first." } ], + "expectedTurnId": "turn_456" +} } +{ "id": 32, "result": { "turnId": "turn_456" } } +``` + +`expectedTurnId` is required. If there is no active turn (or `expectedTurnId` does not match the active turn), the request fails with an `invalid request` error. + ### Example: Request a code review Use `review/start` to run Codex’s reviewer on the currently checked-out project. The request takes the thread id plus a `target` describing what should be reviewed: @@ -908,6 +925,7 @@ Examples of descriptor strings: - `thread/start.mockExperimentalField` (field-level gate) ### For maintainers: Adding experimental fields and methods + Use this checklist when introducing a field/method that should only be available when the client opts into experimental APIs. At runtime, clients must send `initialize` with `capabilities.experimentalApi = true` to use experimental methods or fields. @@ -928,7 +946,7 @@ At runtime, clients must send `initialize` with `capabilities.experimentalApi = # Include experimental API fields/methods in fixtures. just write-app-server-schema --experimental ``` - + 5. Verify the protocol crate: ```bash diff --git a/codex-rs/app-server/src/codex_message_processor.rs b/codex-rs/app-server/src/codex_message_processor.rs index 1ac55374cb..ddaaf4a852 100644 --- a/codex-rs/app-server/src/codex_message_processor.rs +++ b/codex-rs/app-server/src/codex_message_processor.rs @@ -37,6 +37,7 @@ use codex_app_server_protocol::ExecOneOffCommandResponse; use codex_app_server_protocol::ExperimentalFeature as ApiExperimentalFeature; use codex_app_server_protocol::ExperimentalFeatureListParams; use codex_app_server_protocol::ExperimentalFeatureListResponse; +use codex_app_server_protocol::ExperimentalFeatureStage as ApiExperimentalFeatureStage; use codex_app_server_protocol::FeedbackUploadParams; use codex_app_server_protocol::FeedbackUploadResponse; use codex_app_server_protocol::ForkConversationParams; @@ -140,6 +141,8 @@ use codex_app_server_protocol::TurnStartParams; use codex_app_server_protocol::TurnStartResponse; use codex_app_server_protocol::TurnStartedNotification; use codex_app_server_protocol::TurnStatus; +use codex_app_server_protocol::TurnSteerParams; +use codex_app_server_protocol::TurnSteerResponse; use codex_app_server_protocol::UserInfoResponse; use codex_app_server_protocol::UserInput as V2UserInput; use codex_app_server_protocol::UserSavedConfig; @@ -155,6 +158,7 @@ use codex_core::InitialHistory; use codex_core::NewThread; use codex_core::RolloutRecorder; use codex_core::SessionMeta; +use codex_core::SteerInputError; use codex_core::ThreadConfigSnapshot; use codex_core::ThreadManager; use codex_core::ThreadSortKey as CoreThreadSortKey; @@ -316,7 +320,7 @@ pub(crate) struct CodexMessageProcessorArgs { pub(crate) codex_linux_sandbox_exe: Option, pub(crate) config: Arc, pub(crate) cli_overrides: Vec<(String, TomlValue)>, - pub(crate) cloud_requirements: CloudRequirementsLoader, + pub(crate) cloud_requirements: Arc>, pub(crate) feedback: CodexFeedback, } @@ -362,7 +366,7 @@ impl CodexMessageProcessor { codex_linux_sandbox_exe, config, cli_overrides, - cloud_requirements: Arc::new(RwLock::new(cloud_requirements)), + cloud_requirements, conversation_listeners: HashMap::new(), listener_thread_ids_by_subscription: HashMap::new(), active_login: Arc::new(Mutex::new(None)), @@ -540,6 +544,10 @@ impl CodexMessageProcessor { self.turn_start(to_connection_request_id(request_id), params) .await; } + ClientRequest::TurnSteer { request_id, params } => { + self.turn_steer(to_connection_request_id(request_id), params) + .await; + } ClientRequest::TurnInterrupt { request_id, params } => { self.turn_interrupt(to_connection_request_id(request_id), params) .await; @@ -3302,23 +3310,40 @@ impl CodexMessageProcessor { let data = FEATURES .iter() - .filter_map(|spec| { - let Stage::Experimental { - name, - menu_description, - announcement, - } = spec.stage - else { - return None; + .map(|spec| { + let (stage, display_name, description, announcement) = match spec.stage { + Stage::Experimental { + name, + menu_description, + announcement, + } => ( + ApiExperimentalFeatureStage::Beta, + Some(name.to_string()), + Some(menu_description.to_string()), + Some(announcement.to_string()), + ), + Stage::UnderDevelopment => ( + ApiExperimentalFeatureStage::UnderDevelopment, + None, + None, + None, + ), + Stage::Stable => (ApiExperimentalFeatureStage::Stable, None, None, None), + Stage::Deprecated => { + (ApiExperimentalFeatureStage::Deprecated, None, None, None) + } + Stage::Removed => (ApiExperimentalFeatureStage::Removed, None, None, None), }; - Some(ApiExperimentalFeature { - flag_name: spec.key.to_string(), - display_name: name.to_string(), - description: menu_description.to_string(), - announcement: announcement.to_string(), + + ApiExperimentalFeature { + name: spec.key.to_string(), + stage, + display_name, + description, + announcement, enabled: config.features.enabled(spec.id), default_enabled: spec.default_enabled, - }) + } }) .collect::>(); @@ -3357,7 +3382,7 @@ impl CodexMessageProcessor { if start > total { self.send_invalid_request_error( request_id, - format!("cursor {start} exceeds total experimental features {total}"), + format!("cursor {start} exceeds total feature flags {total}"), ) .await; return; @@ -4642,6 +4667,63 @@ impl CodexMessageProcessor { } } + async fn turn_steer(&self, request_id: ConnectionRequestId, params: TurnSteerParams) { + let (_, thread) = match self.load_thread(¶ms.thread_id).await { + Ok(v) => v, + Err(error) => { + self.outgoing.send_error(request_id, error).await; + return; + } + }; + + if params.expected_turn_id.is_empty() { + self.send_invalid_request_error( + request_id, + "expectedTurnId must not be empty".to_string(), + ) + .await; + return; + } + + let mapped_items: Vec = params + .input + .into_iter() + .map(V2UserInput::into_core) + .collect(); + + match thread + .steer_input(mapped_items, Some(¶ms.expected_turn_id)) + .await + { + Ok(turn_id) => { + let response = TurnSteerResponse { turn_id }; + self.outgoing.send_response(request_id, response).await; + } + Err(err) => { + let (code, message) = match err { + SteerInputError::NoActiveTurn(_) => ( + INVALID_REQUEST_ERROR_CODE, + "no active turn to steer".to_string(), + ), + SteerInputError::ExpectedTurnMismatch { expected, actual } => ( + INVALID_REQUEST_ERROR_CODE, + format!("expected active turn id `{expected}` but found `{actual}`"), + ), + SteerInputError::EmptyInput => ( + INVALID_REQUEST_ERROR_CODE, + "input must not be empty".to_string(), + ), + }; + let error = JSONRPCErrorError { + code, + message, + data: None, + }; + self.outgoing.send_error(request_id, error).await; + } + } + } + fn build_review_turn(turn_id: String, display_text: &str) -> Turn { let items = if display_text.is_empty() { Vec::new() diff --git a/codex-rs/app-server/src/config_api.rs b/codex-rs/app-server/src/config_api.rs index 5a43d6e528..e1f27be0b5 100644 --- a/codex-rs/app-server/src/config_api.rs +++ b/codex-rs/app-server/src/config_api.rs @@ -19,11 +19,16 @@ use codex_core::config_loader::ResidencyRequirement as CoreResidencyRequirement; use codex_core::config_loader::SandboxModeRequirement as CoreSandboxModeRequirement; use serde_json::json; use std::path::PathBuf; +use std::sync::Arc; +use std::sync::RwLock; use toml::Value as TomlValue; #[derive(Clone)] pub(crate) struct ConfigApi { - service: ConfigService, + codex_home: PathBuf, + cli_overrides: Vec<(String, TomlValue)>, + loader_overrides: LoaderOverrides, + cloud_requirements: Arc>, } impl ConfigApi { @@ -31,30 +36,42 @@ impl ConfigApi { codex_home: PathBuf, cli_overrides: Vec<(String, TomlValue)>, loader_overrides: LoaderOverrides, - cloud_requirements: CloudRequirementsLoader, + cloud_requirements: Arc>, ) -> Self { Self { - service: ConfigService::new( - codex_home, - cli_overrides, - loader_overrides, - cloud_requirements, - ), + codex_home, + cli_overrides, + loader_overrides, + cloud_requirements, } } + fn config_service(&self) -> ConfigService { + let cloud_requirements = self + .cloud_requirements + .read() + .map(|guard| guard.clone()) + .unwrap_or_default(); + ConfigService::new( + self.codex_home.clone(), + self.cli_overrides.clone(), + self.loader_overrides.clone(), + cloud_requirements, + ) + } + pub(crate) async fn read( &self, params: ConfigReadParams, ) -> Result { - self.service.read(params).await.map_err(map_error) + self.config_service().read(params).await.map_err(map_error) } pub(crate) async fn config_requirements_read( &self, ) -> Result { let requirements = self - .service + .config_service() .read_requirements() .await .map_err(map_error)? @@ -67,14 +84,20 @@ impl ConfigApi { &self, params: ConfigValueWriteParams, ) -> Result { - self.service.write_value(params).await.map_err(map_error) + self.config_service() + .write_value(params) + .await + .map_err(map_error) } pub(crate) async fn batch_write( &self, params: ConfigBatchWriteParams, ) -> Result { - self.service.batch_write(params).await.map_err(map_error) + self.config_service() + .batch_write(params) + .await + .map_err(map_error) } } diff --git a/codex-rs/app-server/src/message_processor.rs b/codex-rs/app-server/src/message_processor.rs index 7d8da6c1b7..2646e5f0a5 100644 --- a/codex-rs/app-server/src/message_processor.rs +++ b/codex-rs/app-server/src/message_processor.rs @@ -1,5 +1,6 @@ use std::path::PathBuf; use std::sync::Arc; +use std::sync::RwLock; use crate::codex_message_processor::CodexMessageProcessor; use crate::codex_message_processor::CodexMessageProcessorArgs; @@ -157,6 +158,7 @@ impl MessageProcessor { auth_manager.clone(), SessionSource::VSCode, )); + let cloud_requirements = Arc::new(RwLock::new(cloud_requirements)); let codex_message_processor = CodexMessageProcessor::new(CodexMessageProcessorArgs { auth_manager, thread_manager, diff --git a/codex-rs/app-server/tests/common/mcp_process.rs b/codex-rs/app-server/tests/common/mcp_process.rs index d992db6d45..57c29fcf9f 100644 --- a/codex-rs/app-server/tests/common/mcp_process.rs +++ b/codex-rs/app-server/tests/common/mcp_process.rs @@ -62,6 +62,7 @@ use codex_app_server_protocol::ThreadStartParams; use codex_app_server_protocol::ThreadUnarchiveParams; use codex_app_server_protocol::TurnInterruptParams; use codex_app_server_protocol::TurnStartParams; +use codex_app_server_protocol::TurnSteerParams; use codex_core::default_client::CODEX_INTERNAL_ORIGINATOR_OVERRIDE_ENV_VAR; use tokio::process::Command; @@ -557,6 +558,15 @@ impl McpProcess { self.send_request("turn/interrupt", params).await } + /// Send a `turn/steer` JSON-RPC request (v2). + pub async fn send_turn_steer_request( + &mut self, + params: TurnSteerParams, + ) -> anyhow::Result { + let params = Some(serde_json::to_value(params)?); + self.send_request("turn/steer", params).await + } + /// Send a `review/start` JSON-RPC request (v2). pub async fn send_review_start_request( &mut self, diff --git a/codex-rs/app-server/tests/suite/v2/compaction.rs b/codex-rs/app-server/tests/suite/v2/compaction.rs index 4730d920c8..daf6030dd2 100644 --- a/codex-rs/app-server/tests/suite/v2/compaction.rs +++ b/codex-rs/app-server/tests/suite/v2/compaction.rs @@ -29,7 +29,6 @@ use codex_app_server_protocol::TurnStartParams; use codex_app_server_protocol::TurnStartResponse; use codex_app_server_protocol::UserInput as V2UserInput; use codex_core::auth::AuthCredentialsStoreMode; -use codex_core::features::Feature; use codex_protocol::models::ContentItem; use codex_protocol::models::ResponseItem; use core_test_support::responses; @@ -143,12 +142,10 @@ async fn auto_compaction_remote_emits_started_and_completed_items() -> Result<() .await; let codex_home = TempDir::new()?; - let mut features = BTreeMap::default(); - features.insert(Feature::RemoteCompaction, true); write_mock_responses_config_toml( codex_home.path(), &server.uri(), - &features, + &BTreeMap::default(), AUTO_COMPACT_LIMIT, Some(true), "openai", diff --git a/codex-rs/app-server/tests/suite/v2/experimental_feature_list.rs b/codex-rs/app-server/tests/suite/v2/experimental_feature_list.rs index 2ca236d89e..fdcbaca5b0 100644 --- a/codex-rs/app-server/tests/suite/v2/experimental_feature_list.rs +++ b/codex-rs/app-server/tests/suite/v2/experimental_feature_list.rs @@ -6,6 +6,7 @@ use app_test_support::to_response; use codex_app_server_protocol::ExperimentalFeature; use codex_app_server_protocol::ExperimentalFeatureListParams; use codex_app_server_protocol::ExperimentalFeatureListResponse; +use codex_app_server_protocol::ExperimentalFeatureStage; use codex_app_server_protocol::JSONRPCResponse; use codex_app_server_protocol::RequestId; use codex_core::features::FEATURES; @@ -17,7 +18,7 @@ use tokio::time::timeout; const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10); #[tokio::test] -async fn experimental_feature_list_returns_experimental_feature_metadata() -> Result<()> { +async fn experimental_feature_list_returns_feature_metadata_with_stage() -> Result<()> { let codex_home = TempDir::new()?; let mut mcp = McpProcess::new(codex_home.path()).await?; @@ -36,24 +37,35 @@ async fn experimental_feature_list_returns_experimental_feature_metadata() -> Re let actual = to_response::(response)?; let expected_data = FEATURES .iter() - .filter_map(|spec| { - let Stage::Experimental { - name, - menu_description, - announcement, - } = spec.stage - else { - return None; + .map(|spec| { + let (stage, display_name, description, announcement) = match spec.stage { + Stage::Experimental { + name, + menu_description, + announcement, + } => ( + ExperimentalFeatureStage::Beta, + Some(name.to_string()), + Some(menu_description.to_string()), + Some(announcement.to_string()), + ), + Stage::UnderDevelopment => { + (ExperimentalFeatureStage::UnderDevelopment, None, None, None) + } + Stage::Stable => (ExperimentalFeatureStage::Stable, None, None, None), + Stage::Deprecated => (ExperimentalFeatureStage::Deprecated, None, None, None), + Stage::Removed => (ExperimentalFeatureStage::Removed, None, None, None), }; - Some(ExperimentalFeature { - flag_name: spec.key.to_string(), - display_name: name.to_string(), - description: menu_description.to_string(), - announcement: announcement.to_string(), + ExperimentalFeature { + name: spec.key.to_string(), + stage, + display_name, + description, + announcement, enabled: spec.default_enabled, default_enabled: spec.default_enabled, - }) + } }) .collect::>(); let expected = ExperimentalFeatureListResponse { diff --git a/codex-rs/app-server/tests/suite/v2/mod.rs b/codex-rs/app-server/tests/suite/v2/mod.rs index 0894504a0f..c25a6d0568 100644 --- a/codex-rs/app-server/tests/suite/v2/mod.rs +++ b/codex-rs/app-server/tests/suite/v2/mod.rs @@ -26,3 +26,4 @@ mod thread_start; mod thread_unarchive; mod turn_interrupt; mod turn_start; +mod turn_steer; diff --git a/codex-rs/app-server/tests/suite/v2/turn_steer.rs b/codex-rs/app-server/tests/suite/v2/turn_steer.rs new file mode 100644 index 0000000000..89704326fd --- /dev/null +++ b/codex-rs/app-server/tests/suite/v2/turn_steer.rs @@ -0,0 +1,179 @@ +#![cfg(unix)] + +use anyhow::Result; +use app_test_support::McpProcess; +use app_test_support::create_mock_responses_server_sequence; +use app_test_support::create_mock_responses_server_sequence_unchecked; +use app_test_support::create_shell_command_sse_response; +use app_test_support::to_response; +use codex_app_server_protocol::JSONRPCError; +use codex_app_server_protocol::JSONRPCNotification; +use codex_app_server_protocol::JSONRPCResponse; +use codex_app_server_protocol::RequestId; +use codex_app_server_protocol::ThreadStartParams; +use codex_app_server_protocol::ThreadStartResponse; +use codex_app_server_protocol::TurnStartParams; +use codex_app_server_protocol::TurnStartResponse; +use codex_app_server_protocol::TurnSteerParams; +use codex_app_server_protocol::TurnSteerResponse; +use codex_app_server_protocol::UserInput as V2UserInput; +use tempfile::TempDir; +use tokio::time::timeout; + +const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10); + +#[tokio::test] +async fn turn_steer_requires_active_turn() -> Result<()> { + let tmp = TempDir::new()?; + let codex_home = tmp.path().join("codex_home"); + std::fs::create_dir(&codex_home)?; + + let server = create_mock_responses_server_sequence(vec![]).await; + create_config_toml(&codex_home, &server.uri())?; + + let mut mcp = McpProcess::new(&codex_home).await?; + timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; + + let thread_req = mcp + .send_thread_start_request(ThreadStartParams { + model: Some("mock-model".to_string()), + ..Default::default() + }) + .await?; + let thread_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(thread_req)), + ) + .await??; + let ThreadStartResponse { thread, .. } = to_response::(thread_resp)?; + + let steer_req = mcp + .send_turn_steer_request(TurnSteerParams { + thread_id: thread.id, + input: vec![V2UserInput::Text { + text: "steer".to_string(), + text_elements: Vec::new(), + }], + expected_turn_id: "turn-does-not-exist".to_string(), + }) + .await?; + let steer_err: JSONRPCError = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_error_message(RequestId::Integer(steer_req)), + ) + .await??; + assert_eq!(steer_err.error.code, -32600); + + Ok(()) +} + +#[tokio::test] +async fn turn_steer_returns_active_turn_id() -> Result<()> { + #[cfg(target_os = "windows")] + let shell_command = vec![ + "powershell".to_string(), + "-Command".to_string(), + "Start-Sleep -Seconds 10".to_string(), + ]; + #[cfg(not(target_os = "windows"))] + let shell_command = vec!["sleep".to_string(), "10".to_string()]; + + let tmp = TempDir::new()?; + let codex_home = tmp.path().join("codex_home"); + std::fs::create_dir(&codex_home)?; + let working_directory = tmp.path().join("workdir"); + std::fs::create_dir(&working_directory)?; + + let server = + create_mock_responses_server_sequence_unchecked(vec![create_shell_command_sse_response( + shell_command.clone(), + Some(&working_directory), + Some(10_000), + "call_sleep", + )?]) + .await; + create_config_toml(&codex_home, &server.uri())?; + + let mut mcp = McpProcess::new(&codex_home).await?; + timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; + + let thread_req = mcp + .send_thread_start_request(ThreadStartParams { + model: Some("mock-model".to_string()), + ..Default::default() + }) + .await?; + let thread_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(thread_req)), + ) + .await??; + let ThreadStartResponse { thread, .. } = to_response::(thread_resp)?; + + let turn_req = mcp + .send_turn_start_request(TurnStartParams { + thread_id: thread.id.clone(), + input: vec![V2UserInput::Text { + text: "run sleep".to_string(), + text_elements: Vec::new(), + }], + cwd: Some(working_directory.clone()), + ..Default::default() + }) + .await?; + let turn_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(turn_req)), + ) + .await??; + let TurnStartResponse { turn } = to_response::(turn_resp)?; + + let _task_started: JSONRPCNotification = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_notification_message("codex/event/task_started"), + ) + .await??; + + let steer_req = mcp + .send_turn_steer_request(TurnSteerParams { + thread_id: thread.id, + input: vec![V2UserInput::Text { + text: "steer".to_string(), + text_elements: Vec::new(), + }], + expected_turn_id: turn.id.clone(), + }) + .await?; + let steer_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(steer_req)), + ) + .await??; + let steer: TurnSteerResponse = to_response::(steer_resp)?; + assert_eq!(steer.turn_id, turn.id); + + Ok(()) +} + +fn create_config_toml(codex_home: &std::path::Path, server_uri: &str) -> std::io::Result<()> { + let config_toml = codex_home.join("config.toml"); + std::fs::write( + config_toml, + format!( + r#" +model = "mock-model" +approval_policy = "never" +sandbox_mode = "danger-full-access" + +model_provider = "mock_provider" + +[model_providers.mock_provider] +name = "Mock provider for test" +base_url = "{server_uri}/v1" +wire_api = "responses" +request_max_retries = 0 +stream_max_retries = 0 +"# + ), + ) +} diff --git a/codex-rs/core/config.schema.json b/codex-rs/core/config.schema.json index e9ac1c3b87..1364152a46 100644 --- a/codex-rs/core/config.schema.json +++ b/codex-rs/core/config.schema.json @@ -199,9 +199,6 @@ "powershell_utf8": { "type": "boolean" }, - "remote_compaction": { - "type": "boolean" - }, "remote_models": { "type": "boolean" }, @@ -1232,9 +1229,6 @@ "powershell_utf8": { "type": "boolean" }, - "remote_compaction": { - "type": "boolean" - }, "remote_models": { "type": "boolean" }, diff --git a/codex-rs/core/src/client.rs b/codex-rs/core/src/client.rs index 3631f3afb8..b8568a1043 100644 --- a/codex-rs/core/src/client.rs +++ b/codex-rs/core/src/client.rs @@ -82,6 +82,8 @@ use crate::model_provider_info::ModelProviderInfo; use crate::model_provider_info::WireApi; use crate::tools::spec::create_tools_json_for_responses_api; +pub const OPENAI_BETA_HEADER: &str = "OpenAI-Beta"; +pub const OPENAI_BETA_RESPONSES_WEBSOCKETS: &str = "responses_websockets=2026-02-04"; pub const X_CODEX_TURN_STATE_HEADER: &str = "x-codex-turn-state"; pub const X_CODEX_TURN_METADATA_HEADER: &str = "x-codex-turn-metadata"; pub const X_RESPONSESAPI_INCLUDE_TIMING_METRICS_HEADER: &str = @@ -477,14 +479,8 @@ impl ModelClientSession { }; if needs_new { - let mut headers = options.extra_headers.clone(); - headers.extend(build_conversation_headers(options.conversation_id.clone())); - if self.client.state.include_timing_metrics { - headers.insert( - X_RESPONSESAPI_INCLUDE_TIMING_METRICS_HEADER, - HeaderValue::from_static("true"), - ); - } + let headers = + build_websocket_connect_headers(options, self.client.state.include_timing_metrics); let websocket_telemetry = Self::build_websocket_telemetry(otel_manager); let new_conn: ApiWebSocketConnection = ApiWebSocketResponsesClient::new(api_provider, api_auth) @@ -787,6 +783,25 @@ fn build_responses_headers( headers } +fn build_websocket_connect_headers( + options: &ApiResponsesOptions, + include_timing_metrics: bool, +) -> ApiHeaderMap { + let mut headers = options.extra_headers.clone(); + headers.extend(build_conversation_headers(options.conversation_id.clone())); + headers.insert( + OPENAI_BETA_HEADER, + HeaderValue::from_static(OPENAI_BETA_RESPONSES_WEBSOCKETS), + ); + if include_timing_metrics { + headers.insert( + X_RESPONSESAPI_INCLUDE_TIMING_METRICS_HEADER, + HeaderValue::from_static("true"), + ); + } + headers +} + fn map_response_stream(api_stream: S, otel_manager: OtelManager) -> ResponseStream where S: futures::Stream> diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 8d8eb17553..274f4f491f 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -118,6 +118,13 @@ use crate::error::CodexErr; use crate::error::Result as CodexResult; #[cfg(test)] use crate::exec::StreamOutput; + +#[derive(Debug, PartialEq)] +pub enum SteerInputError { + NoActiveTurn(Vec), + ExpectedTurnMismatch { expected: String, actual: String }, + EmptyInput, +} use crate::exec_policy::ExecPolicyUpdateError; use crate::feedback_tags; use crate::file_watcher::FileWatcher; @@ -455,6 +462,14 @@ impl Codex { Ok(event) } + pub async fn steer_input( + &self, + input: Vec, + expected_turn_id: Option<&str>, + ) -> Result { + self.session.steer_input(input, expected_turn_id).await + } + pub(crate) async fn agent_status(&self) -> AgentStatus { self.agent_status.borrow().clone() } @@ -719,7 +734,6 @@ impl Session { per_turn_config.personality = session_configuration.personality; per_turn_config.web_search_mode = Some(resolve_web_search_mode_for_turn( per_turn_config.web_search_mode, - session_configuration.provider.is_azure_responses_endpoint(), session_configuration.sandbox_policy.get(), )); per_turn_config.features = config.features.clone(); @@ -947,6 +961,14 @@ impl Session { }), }); } + for message in &config.startup_warnings { + post_session_configured_events.push(Event { + id: "".to_owned(), + msg: EventMsg::Warning(WarningEvent { + message: message.clone(), + }), + }); + } maybe_push_unstable_features_warning(&config, &mut post_session_configured_events); let auth = auth.as_ref(); @@ -2328,17 +2350,39 @@ impl Session { .await; } - /// Returns the input if there was no task running to inject into - pub async fn inject_input(&self, input: Vec) -> Result<(), Vec> { - let mut active = self.active_turn.lock().await; - match active.as_mut() { - Some(at) => { - let mut ts = at.turn_state.lock().await; - ts.push_pending_input(input.into()); - Ok(()) - } - None => Err(input), + /// Inject additional user input into the currently active turn. + /// + /// Returns the active turn id when accepted. + pub async fn steer_input( + &self, + input: Vec, + expected_turn_id: Option<&str>, + ) -> Result { + if input.is_empty() { + return Err(SteerInputError::EmptyInput); } + + let mut active = self.active_turn.lock().await; + let Some(active_turn) = active.as_mut() else { + return Err(SteerInputError::NoActiveTurn(input)); + }; + + let Some((active_turn_id, _)) = active_turn.tasks.first() else { + return Err(SteerInputError::NoActiveTurn(input)); + }; + + if let Some(expected_turn_id) = expected_turn_id + && expected_turn_id != active_turn_id + { + return Err(SteerInputError::ExpectedTurnMismatch { + expected: expected_turn_id.to_string(), + actual: active_turn_id.clone(), + }); + } + + let mut turn_state = active_turn.turn_state.lock().await; + turn_state.push_pending_input(input.into()); + Ok(active_turn_id.clone()) } /// Returns the input if there was no task running to inject into @@ -2720,6 +2764,7 @@ async fn submission_loop(sess: Arc, config: Arc, rx_sub: Receiv mod handlers { use crate::codex::Session; use crate::codex::SessionSettingsUpdate; + use crate::codex::SteerInputError; use crate::codex::TurnContext; use crate::codex::spawn_review_thread; @@ -2858,8 +2903,8 @@ mod handlers { }; current_context.otel_manager.user_prompt(&items); - // Attempt to inject input into current task - if let Err(items) = sess.inject_input(items).await { + // Attempt to inject input into current task. + if let Err(SteerInputError::NoActiveTurn(items)) = sess.steer_input(items, None).await { sess.seed_initial_context_if_needed(¤t_context).await; let resumed_model = sess.take_pending_resume_previous_model().await; let update_items = sess.build_settings_update_items( @@ -3839,7 +3884,7 @@ pub(crate) async fn run_turn( } async fn run_auto_compact(sess: &Arc, turn_context: &Arc) { - if should_use_remote_compact_task(sess.as_ref(), &turn_context.provider) { + if should_use_remote_compact_task(&turn_context.provider) { run_inline_remote_auto_compact_task(Arc::clone(sess), Arc::clone(turn_context)).await; } else { run_inline_auto_compact_task(Arc::clone(sess), Arc::clone(turn_context)).await; @@ -6131,6 +6176,89 @@ mod tests { ); } + #[tokio::test] + async fn steer_input_requires_active_turn() { + let (sess, _tc, _rx) = make_session_and_context_with_rx().await; + let input = vec![UserInput::Text { + text: "steer".to_string(), + text_elements: Vec::new(), + }]; + + let err = sess + .steer_input(input, None) + .await + .expect_err("steering without active turn should fail"); + + assert!(matches!(err, SteerInputError::NoActiveTurn(_))); + } + + #[tokio::test] + async fn steer_input_enforces_expected_turn_id() { + let (sess, tc, _rx) = make_session_and_context_with_rx().await; + let input = vec![UserInput::Text { + text: "hello".to_string(), + text_elements: Vec::new(), + }]; + sess.spawn_task( + Arc::clone(&tc), + input, + NeverEndingTask { + kind: TaskKind::Regular, + listen_to_cancellation_token: false, + }, + ) + .await; + + let steer_input = vec![UserInput::Text { + text: "steer".to_string(), + text_elements: Vec::new(), + }]; + let err = sess + .steer_input(steer_input, Some("different-turn-id")) + .await + .expect_err("mismatched expected turn id should fail"); + + match err { + SteerInputError::ExpectedTurnMismatch { expected, actual } => { + assert_eq!( + (expected, actual), + ("different-turn-id".to_string(), tc.sub_id.clone()) + ); + } + other => panic!("unexpected error: {other:?}"), + } + } + + #[tokio::test] + async fn steer_input_returns_active_turn_id() { + let (sess, tc, _rx) = make_session_and_context_with_rx().await; + let input = vec![UserInput::Text { + text: "hello".to_string(), + text_elements: Vec::new(), + }]; + sess.spawn_task( + Arc::clone(&tc), + input, + NeverEndingTask { + kind: TaskKind::Regular, + listen_to_cancellation_token: false, + }, + ) + .await; + + let steer_input = vec![UserInput::Text { + text: "steer".to_string(), + text_elements: Vec::new(), + }]; + let turn_id = sess + .steer_input(steer_input, Some(&tc.sub_id)) + .await + .expect("steering with matching expected turn id should succeed"); + + assert_eq!(turn_id, tc.sub_id); + assert!(sess.has_pending_input().await); + } + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn abort_review_task_emits_exited_then_aborted_and_records_history() { let (sess, tc, rx) = make_session_and_context_with_rx().await; diff --git a/codex-rs/core/src/codex_thread.rs b/codex-rs/core/src/codex_thread.rs index fb8e466d71..0c0bbe0e0d 100644 --- a/codex-rs/core/src/codex_thread.rs +++ b/codex-rs/core/src/codex_thread.rs @@ -1,5 +1,6 @@ use crate::agent::AgentStatus; use crate::codex::Codex; +use crate::codex::SteerInputError; use crate::error::Result as CodexResult; use crate::protocol::Event; use crate::protocol::Op; @@ -9,6 +10,7 @@ use codex_protocol::openai_models::ReasoningEffort; use codex_protocol::protocol::AskForApproval; use codex_protocol::protocol::SandboxPolicy; use codex_protocol::protocol::SessionSource; +use codex_protocol::user_input::UserInput; use std::path::PathBuf; use tokio::sync::watch; @@ -45,6 +47,14 @@ impl CodexThread { self.codex.submit(op).await } + pub async fn steer_input( + &self, + input: Vec, + expected_turn_id: Option<&str>, + ) -> Result { + self.codex.steer_input(input, expected_turn_id).await + } + /// Use sparingly: this is intended to be removed soon. pub async fn submit_with_id(&self, sub: Submission) -> CodexResult<()> { self.codex.submit_with_id(sub).await diff --git a/codex-rs/core/src/compact.rs b/codex-rs/core/src/compact.rs index ab9a9f15d2..72e002d45a 100644 --- a/codex-rs/core/src/compact.rs +++ b/codex-rs/core/src/compact.rs @@ -9,7 +9,6 @@ use crate::codex::TurnContext; use crate::codex::get_last_assistant_message_from_turn; use crate::error::CodexErr; use crate::error::Result as CodexResult; -use crate::features::Feature; use crate::protocol::CompactedItem; use crate::protocol::EventMsg; use crate::protocol::TurnContextItem; @@ -34,11 +33,8 @@ pub const SUMMARIZATION_PROMPT: &str = include_str!("../templates/compact/prompt pub const SUMMARY_PREFIX: &str = include_str!("../templates/compact/summary_prefix.md"); const COMPACT_USER_MESSAGE_MAX_TOKENS: usize = 20_000; -pub(crate) fn should_use_remote_compact_task( - session: &Session, - provider: &ModelProviderInfo, -) -> bool { - provider.is_openai() && session.enabled(Feature::RemoteCompaction) +pub(crate) fn should_use_remote_compact_task(provider: &ModelProviderInfo) -> bool { + provider.is_openai() } pub(crate) async fn run_inline_auto_compact_task( diff --git a/codex-rs/core/src/config/mod.rs b/codex-rs/core/src/config/mod.rs index 78f947db68..7de6f00ea8 100644 --- a/codex-rs/core/src/config/mod.rs +++ b/codex-rs/core/src/config/mod.rs @@ -21,6 +21,7 @@ use crate::config::types::UriBasedFileOpener; use crate::config_loader::CloudRequirementsLoader; use crate::config_loader::ConfigLayerStack; use crate::config_loader::ConfigRequirements; +use crate::config_loader::ConstrainedWithSource; use crate::config_loader::LoaderOverrides; use crate::config_loader::McpServerIdentity; use crate::config_loader::McpServerRequirement; @@ -116,6 +117,9 @@ pub struct Config { /// requirements). pub config_layer_stack: ConfigLayerStack, + /// Warnings collected during config load that should be shown on startup. + pub startup_warnings: Vec, + /// Optional override of model selection. pub model: Option, @@ -601,6 +605,41 @@ fn constrain_mcp_servers( }) } +fn apply_requirement_constrained_value( + field_name: &'static str, + configured_value: T, + constrained_value: &mut ConstrainedWithSource, + startup_warnings: &mut Vec, +) -> std::io::Result<()> +where + T: Clone + std::fmt::Debug + Send + Sync, +{ + if let Err(err) = constrained_value.set(configured_value) { + let fallback_value = constrained_value.get().clone(); + tracing::warn!( + error = %err, + ?fallback_value, + requirement_source = ?constrained_value.source, + "configured value is disallowed by requirements; falling back to required value for {field_name}" + ); + let message = format!( + "Configured value for `{field_name}` is disallowed by requirements; falling back to required value {fallback_value:?}. Details: {err}" + ); + startup_warnings.push(message); + + constrained_value.set(fallback_value).map_err(|fallback_err| { + std::io::Error::new( + std::io::ErrorKind::InvalidInput, + format!( + "configured value for `{field_name}` is disallowed by requirements ({err}); fallback to a requirement-compliant value also failed ({fallback_err})" + ), + ) + })?; + } + + Ok(()) +} + fn mcp_server_matches_requirement( requirement: &McpServerRequirement, server: &McpServerConfig, @@ -1288,15 +1327,11 @@ fn resolve_web_search_mode( pub(crate) fn resolve_web_search_mode_for_turn( explicit_mode: Option, - is_azure_responses_endpoint: bool, sandbox_policy: &SandboxPolicy, ) -> WebSearchMode { if let Some(mode) = explicit_mode { return mode; } - if is_azure_responses_endpoint { - return WebSearchMode::Disabled; - } if matches!(sandbox_policy, SandboxPolicy::DangerFullAccess) { WebSearchMode::Live } else { @@ -1324,6 +1359,7 @@ impl Config { ) -> std::io::Result { let requirements = config_layer_stack.requirements().clone(); let user_instructions = Self::load_instructions(Some(&codex_home)); + let mut startup_warnings = Vec::new(); // Destructure ConfigOverrides fully to ensure all overrides are applied. let ConfigOverrides { @@ -1590,12 +1626,18 @@ impl Config { enforce_residency, } = requirements; - constrained_approval_policy - .set(approval_policy) - .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidInput, format!("{e}")))?; - constrained_sandbox_policy - .set(sandbox_policy) - .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidInput, format!("{e}")))?; + apply_requirement_constrained_value( + "approval_policy", + approval_policy, + &mut constrained_approval_policy, + &mut startup_warnings, + )?; + apply_requirement_constrained_value( + "sandbox_mode", + sandbox_policy, + &mut constrained_sandbox_policy, + &mut startup_warnings, + )?; let mcp_servers = constrain_mcp_servers(cfg.mcp_servers.clone(), mcp_servers.as_ref()) .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidInput, format!("{e}")))?; @@ -1608,6 +1650,7 @@ impl Config { model_provider_id, model_provider, cwd: resolved_cwd, + startup_warnings, approval_policy: constrained_approval_policy.value, sandbox_policy: constrained_sandbox_policy.value, enforce_residency: enforce_residency.value, @@ -2413,14 +2456,14 @@ trust_level = "trusted" #[test] fn web_search_mode_for_turn_defaults_to_cached_when_unset() { - let mode = resolve_web_search_mode_for_turn(None, false, &SandboxPolicy::ReadOnly); + let mode = resolve_web_search_mode_for_turn(None, &SandboxPolicy::ReadOnly); assert_eq!(mode, WebSearchMode::Cached); } #[test] fn web_search_mode_for_turn_defaults_to_live_for_danger_full_access() { - let mode = resolve_web_search_mode_for_turn(None, false, &SandboxPolicy::DangerFullAccess); + let mode = resolve_web_search_mode_for_turn(None, &SandboxPolicy::DangerFullAccess); assert_eq!(mode, WebSearchMode::Live); } @@ -2429,20 +2472,12 @@ trust_level = "trusted" fn web_search_mode_for_turn_prefers_explicit_value() { let mode = resolve_web_search_mode_for_turn( Some(WebSearchMode::Cached), - false, &SandboxPolicy::DangerFullAccess, ); assert_eq!(mode, WebSearchMode::Cached); } - #[test] - fn web_search_mode_for_turn_disables_for_azure_responses_endpoint() { - let mode = resolve_web_search_mode_for_turn(None, true, &SandboxPolicy::DangerFullAccess); - - assert_eq!(mode, WebSearchMode::Disabled); - } - #[test] fn profile_legacy_toggles_override_base() -> std::io::Result<()> { let codex_home = TempDir::new()?; @@ -3865,6 +3900,7 @@ model_verbosity = "high" codex_home: fixture.codex_home(), log_dir: fixture.codex_home().join("log"), config_layer_stack: Default::default(), + startup_warnings: Vec::new(), history: History::default(), ephemeral: false, file_opener: UriBasedFileOpener::VsCode, @@ -3952,6 +3988,7 @@ model_verbosity = "high" codex_home: fixture.codex_home(), log_dir: fixture.codex_home().join("log"), config_layer_stack: Default::default(), + startup_warnings: Vec::new(), history: History::default(), ephemeral: false, file_opener: UriBasedFileOpener::VsCode, @@ -4054,6 +4091,7 @@ model_verbosity = "high" codex_home: fixture.codex_home(), log_dir: fixture.codex_home().join("log"), config_layer_stack: Default::default(), + startup_warnings: Vec::new(), history: History::default(), ephemeral: false, file_opener: UriBasedFileOpener::VsCode, @@ -4142,6 +4180,7 @@ model_verbosity = "high" codex_home: fixture.codex_home(), log_dir: fixture.codex_home().join("log"), config_layer_stack: Default::default(), + startup_warnings: Vec::new(), history: History::default(), ephemeral: false, file_opener: UriBasedFileOpener::VsCode, @@ -4693,7 +4732,7 @@ mcp_oauth_callback_port = 5678 } #[tokio::test] - async fn explicit_sandbox_mode_still_errors_when_disallowed_by_requirements() + async fn explicit_sandbox_mode_falls_back_when_disallowed_by_requirements() -> std::io::Result<()> { let codex_home = TempDir::new()?; std::fs::write( @@ -4712,19 +4751,15 @@ mcp_oauth_callback_port = 5678 enforce_residency: None, }; - let err = ConfigBuilder::default() + let config = ConfigBuilder::default() .codex_home(codex_home.path().to_path_buf()) .fallback_cwd(Some(codex_home.path().to_path_buf())) .cloud_requirements(CloudRequirementsLoader::new( async move { Some(requirements) }, )) .build() - .await - .expect_err("explicit disallowed mode should still fail"); - assert_eq!(err.kind(), std::io::ErrorKind::InvalidInput); - let message = err.to_string(); - assert!(message.contains("invalid value for `sandbox_mode`")); - assert!(message.contains("set by cloud requirements")); + .await?; + assert_eq!(*config.sandbox_policy.get(), SandboxPolicy::ReadOnly); Ok(()) } @@ -4761,7 +4796,7 @@ trust_level = "untrusted" } #[tokio::test] - async fn explicit_approval_policy_still_errors_when_disallowed_by_requirements() + async fn explicit_approval_policy_falls_back_when_disallowed_by_requirements() -> std::io::Result<()> { let codex_home = TempDir::new()?; std::fs::write( @@ -4770,7 +4805,7 @@ trust_level = "untrusted" "#, )?; - let err = ConfigBuilder::default() + let config = ConfigBuilder::default() .codex_home(codex_home.path().to_path_buf()) .fallback_cwd(Some(codex_home.path().to_path_buf())) .cloud_requirements(CloudRequirementsLoader::new(async { @@ -4780,12 +4815,8 @@ trust_level = "untrusted" }) })) .build() - .await - .expect_err("explicit disallowed approval policy should fail"); - assert_eq!(err.kind(), std::io::ErrorKind::InvalidInput); - let message = err.to_string(); - assert!(message.contains("invalid value for `approval_policy`")); - assert!(message.contains("set by cloud requirements")); + .await?; + assert_eq!(config.approval_policy.value(), AskForApproval::OnRequest); Ok(()) } } diff --git a/codex-rs/core/src/config/service.rs b/codex-rs/core/src/config/service.rs index 72393f805b..b72af4c0f7 100644 --- a/codex-rs/core/src/config/service.rs +++ b/codex-rs/core/src/config/service.rs @@ -774,7 +774,7 @@ unified_exec = true service .write_value(ConfigValueWriteParams { file_path: Some(tmp.path().join(CONFIG_TOML_FILE).display().to_string()), - key_path: "features.remote_compaction".to_string(), + key_path: "features.remote_models".to_string(), value: serde_json::json!(true), merge_strategy: MergeStrategy::Replace, expected_version: None, @@ -794,7 +794,7 @@ hide_full_access_warning = true [features] unified_exec = true -remote_compaction = true +remote_models = true "#; assert_eq!(updated, expected); Ok(()) diff --git a/codex-rs/core/src/features.rs b/codex-rs/core/src/features.rs index ee091979eb..22cb88f998 100644 --- a/codex-rs/core/src/features.rs +++ b/codex-rs/core/src/features.rs @@ -97,8 +97,6 @@ pub enum Feature { WindowsSandbox, /// Use the elevated Windows sandbox pipeline (setup + runner). WindowsSandboxElevated, - /// Remote compaction enabled (only for ChatGPT auth) - RemoteCompaction, /// Refresh remote models and emit AppReady once the list is available. RemoteModels, /// Experimental shell snapshotting. @@ -499,12 +497,6 @@ pub const FEATURES: &[FeatureSpec] = &[ stage: Stage::UnderDevelopment, default_enabled: false, }, - FeatureSpec { - id: Feature::RemoteCompaction, - key: "remote_compaction", - stage: Stage::UnderDevelopment, - default_enabled: true, - }, FeatureSpec { id: Feature::RemoteModels, key: "remote_models", diff --git a/codex-rs/core/src/file_watcher.rs b/codex-rs/core/src/file_watcher.rs index afda5b6086..37fb182508 100644 --- a/codex-rs/core/src/file_watcher.rs +++ b/codex-rs/core/src/file_watcher.rs @@ -19,6 +19,7 @@ use tokio::sync::broadcast; use tokio::sync::mpsc; use tokio::time::Instant; use tokio::time::sleep_until; +use tracing::info; use tracing::warn; use crate::config::Config; @@ -162,6 +163,12 @@ impl FileWatcher { res = raw_rx.recv() => { match res { Some(Ok(event)) => { + info!( + event_kind = ?event.kind, + event_paths = ?event.paths, + event_attrs = ?event.attrs, + "file watcher received filesystem event" + ); let skills_paths = classify_event(&event, &state); let now = Instant::now(); skills.add(skills_paths); diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs index ff54bdc80c..f1534cf0f7 100644 --- a/codex-rs/core/src/lib.rs +++ b/codex-rs/core/src/lib.rs @@ -13,6 +13,7 @@ pub mod bash; mod client; mod client_common; pub mod codex; +pub use codex::SteerInputError; mod codex_thread; mod compact_remote; pub use codex_thread::CodexThread; diff --git a/codex-rs/core/src/model_provider_info.rs b/codex-rs/core/src/model_provider_info.rs index 211822f48f..305233ae7a 100644 --- a/codex-rs/core/src/model_provider_info.rs +++ b/codex-rs/core/src/model_provider_info.rs @@ -8,7 +8,6 @@ use crate::auth::AuthMode; use crate::error::EnvVarError; use codex_api::Provider as ApiProvider; -use codex_api::is_azure_responses_wire_base_url; use codex_api::provider::RetryConfig as ApiRetryConfig; use http::HeaderMap; use http::header::HeaderName; @@ -174,10 +173,6 @@ impl ModelProviderInfo { }) } - pub(crate) fn is_azure_responses_endpoint(&self) -> bool { - is_azure_responses_wire_base_url(&self.name, self.base_url.as_deref()) - } - /// If `env_key` is Some, returns the API key for this provider if present /// (and non-empty) in the environment. If `env_key` is required but /// cannot be found, returns an error. diff --git a/codex-rs/core/src/tasks/compact.rs b/codex-rs/core/src/tasks/compact.rs index e1fdbfd91f..908b0460f4 100644 --- a/codex-rs/core/src/tasks/compact.rs +++ b/codex-rs/core/src/tasks/compact.rs @@ -25,7 +25,7 @@ impl SessionTask for CompactTask { _cancellation_token: CancellationToken, ) -> Option { let session = session.clone_session(); - if crate::compact::should_use_remote_compact_task(session.as_ref(), &ctx.provider) { + if crate::compact::should_use_remote_compact_task(&ctx.provider) { let _ = session.services.otel_manager.counter( "codex.task.compact", 1, diff --git a/codex-rs/core/src/tools/handlers/collab.rs b/codex-rs/core/src/tools/handlers/collab.rs index 3110204d29..e6f0c3c959 100644 --- a/codex-rs/core/src/tools/handlers/collab.rs +++ b/codex-rs/core/src/tools/handlers/collab.rs @@ -1168,6 +1168,37 @@ mod tests { #[tokio::test] async fn build_agent_spawn_config_uses_turn_context_values() { + fn pick_allowed_approval_policy( + constraint: &crate::config::Constrained, + base: AskForApproval, + ) -> AskForApproval { + let candidates = [ + AskForApproval::Never, + AskForApproval::UnlessTrusted, + AskForApproval::OnRequest, + AskForApproval::OnFailure, + ]; + candidates + .into_iter() + .find(|candidate| *candidate != base && constraint.can_set(candidate).is_ok()) + .unwrap_or(base) + } + + fn pick_allowed_sandbox_policy( + constraint: &crate::config::Constrained, + base: SandboxPolicy, + ) -> SandboxPolicy { + let candidates = [ + SandboxPolicy::new_read_only_policy(), + SandboxPolicy::new_workspace_write_policy(), + SandboxPolicy::DangerFullAccess, + ]; + candidates + .into_iter() + .find(|candidate| *candidate != base && constraint.can_set(candidate).is_ok()) + .unwrap_or(base) + } + let (_session, mut turn) = make_session_and_context().await; let base_instructions = BaseInstructions { text: "base".to_string(), @@ -1181,8 +1212,14 @@ mod tests { let temp_dir = tempfile::tempdir().expect("temp dir"); turn.cwd = temp_dir.path().to_path_buf(); turn.codex_linux_sandbox_exe = Some(PathBuf::from("/bin/echo")); - turn.approval_policy = AskForApproval::Never; - turn.sandbox_policy = SandboxPolicy::DangerFullAccess; + turn.approval_policy = pick_allowed_approval_policy( + &turn.config.approval_policy, + *turn.config.approval_policy.get(), + ); + turn.sandbox_policy = pick_allowed_sandbox_policy( + &turn.config.sandbox_policy, + turn.config.sandbox_policy.get().clone(), + ); let config = build_agent_spawn_config(&base_instructions, &turn, 0).expect("spawn config"); let mut expected = (*turn.config).clone(); diff --git a/codex-rs/core/tests/suite/client_websockets.rs b/codex-rs/core/tests/suite/client_websockets.rs index e58a512f9e..bf170bb757 100644 --- a/codex-rs/core/tests/suite/client_websockets.rs +++ b/codex-rs/core/tests/suite/client_websockets.rs @@ -40,6 +40,8 @@ use tempfile::TempDir; use tracing_test::traced_test; const MODEL: &str = "gpt-5.2-codex"; +const OPENAI_BETA_HEADER: &str = "OpenAI-Beta"; +const OPENAI_BETA_RESPONSES_WEBSOCKETS: &str = "responses_websockets=2026-02-04"; struct WebsocketTestHarness { _codex_home: TempDir, @@ -74,6 +76,11 @@ async fn responses_websocket_streams_request() { assert_eq!(body["model"].as_str(), Some(MODEL)); assert_eq!(body["stream"], serde_json::Value::Bool(true)); assert_eq!(body["input"].as_array().map(Vec::len), Some(1)); + let handshake = server.single_handshake(); + assert_eq!( + handshake.header(OPENAI_BETA_HEADER), + Some(OPENAI_BETA_RESPONSES_WEBSOCKETS.to_string()) + ); server.shutdown().await; } @@ -120,7 +127,11 @@ async fn responses_websocket_includes_timing_metrics_header_when_runtime_metrics "type": "responsesapi.websocket_timing", "timing_metrics": { "responses_duration_excl_engine_and_client_tool_time_ms": 120, - "engine_service_total_ms": 450 + "engine_service_total_ms": 450, + "engine_iapi_ttft_total_ms": 310, + "engine_service_ttft_total_ms": 340, + "engine_iapi_tbt_across_engine_calls_ms": 220, + "engine_service_tbt_across_engine_calls_ms": 260 } }), ev_completed("resp-1"), @@ -147,6 +158,10 @@ async fn responses_websocket_includes_timing_metrics_header_when_runtime_metrics .expect("runtime metrics summary"); assert_eq!(summary.responses_api_overhead_ms, 120); assert_eq!(summary.responses_api_inference_time_ms, 450); + assert_eq!(summary.responses_api_engine_iapi_ttft_ms, 310); + assert_eq!(summary.responses_api_engine_service_ttft_ms, 340); + assert_eq!(summary.responses_api_engine_iapi_tbt_ms, 220); + assert_eq!(summary.responses_api_engine_service_tbt_ms, 260); server.shutdown().await; } diff --git a/codex-rs/core/tests/suite/compact.rs b/codex-rs/core/tests/suite/compact.rs index ea9d2f92fa..183732f0a4 100644 --- a/codex-rs/core/tests/suite/compact.rs +++ b/codex-rs/core/tests/suite/compact.rs @@ -5,7 +5,6 @@ use codex_core::built_in_model_providers; use codex_core::compact::SUMMARIZATION_PROMPT; use codex_core::compact::SUMMARY_PREFIX; use codex_core::config::Config; -use codex_core::features::Feature; use codex_core::protocol::AskForApproval; use codex_core::protocol::EventMsg; use codex_core::protocol::ItemCompletedEvent; @@ -1464,7 +1463,6 @@ async fn auto_compact_runs_after_resume_when_token_usage_is_over_limit() { let mut builder = test_codex().with_config(move |config| { set_test_compact_prompt(config); config.model_auto_compact_token_limit = Some(limit); - config.features.enable(Feature::RemoteCompaction); }); let initial = builder.build(&server).await.unwrap(); let home = initial.home.clone(); @@ -1493,7 +1491,6 @@ async fn auto_compact_runs_after_resume_when_token_usage_is_over_limit() { let mut resume_builder = test_codex().with_config(move |config| { set_test_compact_prompt(config); config.model_auto_compact_token_limit = Some(limit); - config.features.enable(Feature::RemoteCompaction); }); let resumed = resume_builder .resume(&server, home, rollout_path) @@ -2290,7 +2287,6 @@ async fn auto_compact_counts_encrypted_reasoning_before_last_user() { .with_config(|config| { set_test_compact_prompt(config); config.model_auto_compact_token_limit = Some(300); - config.features.enable(Feature::RemoteCompaction); }) .build(&server) .await @@ -2411,7 +2407,6 @@ async fn auto_compact_runs_when_reasoning_header_clears_between_turns() { .with_config(|config| { set_test_compact_prompt(config); config.model_auto_compact_token_limit = Some(300); - config.features.enable(Feature::RemoteCompaction); }) .build(&server) .await diff --git a/codex-rs/core/tests/suite/compact_remote.rs b/codex-rs/core/tests/suite/compact_remote.rs index 435aa8900c..a2ae58cf25 100644 --- a/codex-rs/core/tests/suite/compact_remote.rs +++ b/codex-rs/core/tests/suite/compact_remote.rs @@ -4,7 +4,6 @@ use std::fs; use anyhow::Result; use codex_core::CodexAuth; -use codex_core::features::Feature; use codex_core::protocol::EventMsg; use codex_core::protocol::ItemCompletedEvent; use codex_core::protocol::ItemStartedEvent; @@ -45,11 +44,7 @@ async fn remote_compact_replaces_history_for_followups() -> Result<()> { skip_if_no_network!(Ok(())); let harness = TestCodexHarness::with_builder( - test_codex() - .with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing()) - .with_config(|config| { - config.features.enable(Feature::RemoteCompaction); - }), + test_codex().with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing()), ) .await?; let codex = harness.test().codex.clone(); @@ -166,11 +161,7 @@ async fn remote_compact_runs_automatically() -> Result<()> { skip_if_no_network!(Ok(())); let harness = TestCodexHarness::with_builder( - test_codex() - .with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing()) - .with_config(|config| { - config.features.enable(Feature::RemoteCompaction); - }), + test_codex().with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing()), ) .await?; let codex = harness.test().codex.clone(); @@ -253,7 +244,6 @@ async fn remote_compact_trims_function_call_history_to_fit_context_window() -> R test_codex() .with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing()) .with_config(|config| { - config.features.enable(Feature::RemoteCompaction); config.model_context_window = Some(2_000); }), ) @@ -384,7 +374,6 @@ async fn remote_compact_trim_estimate_uses_session_base_instructions() -> Result test_codex() .with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing()) .with_config(|config| { - config.features.enable(Feature::RemoteCompaction); config.model_context_window = Some(200_000); }), ) @@ -482,7 +471,6 @@ async fn remote_compact_trim_estimate_uses_session_base_instructions() -> Result .with_config({ let override_base_instructions = override_base_instructions.clone(); move |config| { - config.features.enable(Feature::RemoteCompaction); config.model_context_window = Some(override_context_window); config.base_instructions = Some(override_base_instructions); } @@ -575,11 +563,7 @@ async fn remote_manual_compact_emits_context_compaction_items() -> Result<()> { skip_if_no_network!(Ok(())); let harness = TestCodexHarness::with_builder( - test_codex() - .with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing()) - .with_config(|config| { - config.features.enable(Feature::RemoteCompaction); - }), + test_codex().with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing()), ) .await?; let codex = harness.test().codex.clone(); @@ -671,11 +655,7 @@ async fn remote_compact_persists_replacement_history_in_rollout() -> Result<()> skip_if_no_network!(Ok(())); let harness = TestCodexHarness::with_builder( - test_codex() - .with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing()) - .with_config(|config| { - config.features.enable(Feature::RemoteCompaction); - }), + test_codex().with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing()), ) .await?; let codex = harness.test().codex.clone(); diff --git a/codex-rs/core/tests/suite/web_search.rs b/codex-rs/core/tests/suite/web_search.rs index 8b1cc2a4e7..edcbfd35d3 100644 --- a/codex-rs/core/tests/suite/web_search.rs +++ b/codex-rs/core/tests/suite/web_search.rs @@ -1,7 +1,5 @@ #![allow(clippy::unwrap_used)] -use codex_core::WireApi; -use codex_core::built_in_model_providers; use codex_core::features::Feature; use codex_core::protocol::SandboxPolicy; use codex_protocol::config_types::WebSearchMode; @@ -22,15 +20,6 @@ fn find_web_search_tool(body: &Value) -> &Value { .expect("tools should include a web_search tool") } -#[allow(clippy::expect_used)] -fn has_web_search_tool(body: &Value) -> bool { - body["tools"] - .as_array() - .expect("request body should include tools array") - .iter() - .any(|tool| tool.get("type").and_then(Value::as_str) == Some("web_search")) -} - #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn web_search_mode_cached_sets_external_web_access_false() { skip_if_no_network!(); @@ -198,48 +187,3 @@ async fn web_search_mode_updates_between_turns_with_sandbox_policy() { "danger-full-access policy should default web_search to live" ); } - -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn web_search_mode_defaults_to_disabled_for_azure_responses() { - skip_if_no_network!(); - - let server = start_mock_server().await; - let sse = responses::sse(vec![ - responses::ev_response_created("resp-1"), - responses::ev_completed("resp-1"), - ]); - let resp_mock = responses::mount_sse_once(&server, sse).await; - - let mut builder = test_codex() - .with_model("gpt-5-codex") - .with_config(|config| { - let base_url = config.model_provider.base_url.clone(); - let mut provider = built_in_model_providers()["openai"].clone(); - provider.name = "Azure".to_string(); - provider.base_url = base_url; - provider.wire_api = WireApi::Responses; - config.model_provider_id = provider.name.clone(); - config.model_provider = provider; - config.web_search_mode = None; - config.features.disable(Feature::WebSearchCached); - config.features.disable(Feature::WebSearchRequest); - }); - let test = builder - .build(&server) - .await - .expect("create test Codex conversation"); - - test.submit_turn_with_policy( - "hello azure default web search", - SandboxPolicy::DangerFullAccess, - ) - .await - .expect("submit turn"); - - let body = resp_mock.single_request().body_json(); - assert_eq!( - has_web_search_tool(&body), - false, - "azure responses requests should disable web_search by default" - ); -} diff --git a/codex-rs/otel/src/metrics/names.rs b/codex-rs/otel/src/metrics/names.rs index 97c4f4e6ed..76b46d2450 100644 --- a/codex-rs/otel/src/metrics/names.rs +++ b/codex-rs/otel/src/metrics/names.rs @@ -12,3 +12,11 @@ pub(crate) const RESPONSES_API_OVERHEAD_DURATION_METRIC: &str = "codex.responses_api_overhead.duration_ms"; pub(crate) const RESPONSES_API_INFERENCE_TIME_DURATION_METRIC: &str = "codex.responses_api_inference_time.duration_ms"; +pub(crate) const RESPONSES_API_ENGINE_IAPI_TTFT_DURATION_METRIC: &str = + "codex.responses_api_engine_iapi_ttft.duration_ms"; +pub(crate) const RESPONSES_API_ENGINE_SERVICE_TTFT_DURATION_METRIC: &str = + "codex.responses_api_engine_service_ttft.duration_ms"; +pub(crate) const RESPONSES_API_ENGINE_IAPI_TBT_DURATION_METRIC: &str = + "codex.responses_api_engine_iapi_tbt.duration_ms"; +pub(crate) const RESPONSES_API_ENGINE_SERVICE_TBT_DURATION_METRIC: &str = + "codex.responses_api_engine_service_tbt.duration_ms"; diff --git a/codex-rs/otel/src/metrics/runtime_metrics.rs b/codex-rs/otel/src/metrics/runtime_metrics.rs index d59ff4b1b7..dcac367d87 100644 --- a/codex-rs/otel/src/metrics/runtime_metrics.rs +++ b/codex-rs/otel/src/metrics/runtime_metrics.rs @@ -1,5 +1,9 @@ use crate::metrics::names::API_CALL_COUNT_METRIC; use crate::metrics::names::API_CALL_DURATION_METRIC; +use crate::metrics::names::RESPONSES_API_ENGINE_IAPI_TBT_DURATION_METRIC; +use crate::metrics::names::RESPONSES_API_ENGINE_IAPI_TTFT_DURATION_METRIC; +use crate::metrics::names::RESPONSES_API_ENGINE_SERVICE_TBT_DURATION_METRIC; +use crate::metrics::names::RESPONSES_API_ENGINE_SERVICE_TTFT_DURATION_METRIC; use crate::metrics::names::RESPONSES_API_INFERENCE_TIME_DURATION_METRIC; use crate::metrics::names::RESPONSES_API_OVERHEAD_DURATION_METRIC; use crate::metrics::names::SSE_EVENT_COUNT_METRIC; @@ -25,6 +29,11 @@ impl RuntimeMetricTotals { pub fn is_empty(self) -> bool { self.count == 0 && self.duration_ms == 0 } + + pub fn merge(&mut self, other: Self) { + self.count = self.count.saturating_add(other.count); + self.duration_ms = self.duration_ms.saturating_add(other.duration_ms); + } } #[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] @@ -36,6 +45,10 @@ pub struct RuntimeMetricsSummary { pub websocket_events: RuntimeMetricTotals, pub responses_api_overhead_ms: u64, pub responses_api_inference_time_ms: u64, + pub responses_api_engine_iapi_ttft_ms: u64, + pub responses_api_engine_service_ttft_ms: u64, + pub responses_api_engine_iapi_tbt_ms: u64, + pub responses_api_engine_service_tbt_ms: u64, } impl RuntimeMetricsSummary { @@ -47,6 +60,48 @@ impl RuntimeMetricsSummary { && self.websocket_events.is_empty() && self.responses_api_overhead_ms == 0 && self.responses_api_inference_time_ms == 0 + && self.responses_api_engine_iapi_ttft_ms == 0 + && self.responses_api_engine_service_ttft_ms == 0 + && self.responses_api_engine_iapi_tbt_ms == 0 + && self.responses_api_engine_service_tbt_ms == 0 + } + + pub fn merge(&mut self, other: Self) { + self.tool_calls.merge(other.tool_calls); + self.api_calls.merge(other.api_calls); + self.streaming_events.merge(other.streaming_events); + self.websocket_calls.merge(other.websocket_calls); + self.websocket_events.merge(other.websocket_events); + if other.responses_api_overhead_ms > 0 { + self.responses_api_overhead_ms = other.responses_api_overhead_ms; + } + if other.responses_api_inference_time_ms > 0 { + self.responses_api_inference_time_ms = other.responses_api_inference_time_ms; + } + if other.responses_api_engine_iapi_ttft_ms > 0 { + self.responses_api_engine_iapi_ttft_ms = other.responses_api_engine_iapi_ttft_ms; + } + if other.responses_api_engine_service_ttft_ms > 0 { + self.responses_api_engine_service_ttft_ms = other.responses_api_engine_service_ttft_ms; + } + if other.responses_api_engine_iapi_tbt_ms > 0 { + self.responses_api_engine_iapi_tbt_ms = other.responses_api_engine_iapi_tbt_ms; + } + if other.responses_api_engine_service_tbt_ms > 0 { + self.responses_api_engine_service_tbt_ms = other.responses_api_engine_service_tbt_ms; + } + } + + pub fn responses_api_summary(&self) -> RuntimeMetricsSummary { + Self { + responses_api_overhead_ms: self.responses_api_overhead_ms, + responses_api_inference_time_ms: self.responses_api_inference_time_ms, + responses_api_engine_iapi_ttft_ms: self.responses_api_engine_iapi_ttft_ms, + responses_api_engine_service_ttft_ms: self.responses_api_engine_service_ttft_ms, + responses_api_engine_iapi_tbt_ms: self.responses_api_engine_iapi_tbt_ms, + responses_api_engine_service_tbt_ms: self.responses_api_engine_service_tbt_ms, + ..RuntimeMetricsSummary::default() + } } pub(crate) fn from_snapshot(snapshot: &ResourceMetrics) -> Self { @@ -74,6 +129,14 @@ impl RuntimeMetricsSummary { sum_histogram_ms(snapshot, RESPONSES_API_OVERHEAD_DURATION_METRIC); let responses_api_inference_time_ms = sum_histogram_ms(snapshot, RESPONSES_API_INFERENCE_TIME_DURATION_METRIC); + let responses_api_engine_iapi_ttft_ms = + sum_histogram_ms(snapshot, RESPONSES_API_ENGINE_IAPI_TTFT_DURATION_METRIC); + let responses_api_engine_service_ttft_ms = + sum_histogram_ms(snapshot, RESPONSES_API_ENGINE_SERVICE_TTFT_DURATION_METRIC); + let responses_api_engine_iapi_tbt_ms = + sum_histogram_ms(snapshot, RESPONSES_API_ENGINE_IAPI_TBT_DURATION_METRIC); + let responses_api_engine_service_tbt_ms = + sum_histogram_ms(snapshot, RESPONSES_API_ENGINE_SERVICE_TBT_DURATION_METRIC); Self { tool_calls, api_calls, @@ -82,6 +145,10 @@ impl RuntimeMetricsSummary { websocket_events, responses_api_overhead_ms, responses_api_inference_time_ms, + responses_api_engine_iapi_ttft_ms, + responses_api_engine_service_ttft_ms, + responses_api_engine_iapi_tbt_ms, + responses_api_engine_service_tbt_ms, } } } diff --git a/codex-rs/otel/src/traces/otel_manager.rs b/codex-rs/otel/src/traces/otel_manager.rs index aa3006da01..c54c64c6cb 100644 --- a/codex-rs/otel/src/traces/otel_manager.rs +++ b/codex-rs/otel/src/traces/otel_manager.rs @@ -1,6 +1,10 @@ use crate::TelemetryAuthMode; use crate::metrics::names::API_CALL_COUNT_METRIC; use crate::metrics::names::API_CALL_DURATION_METRIC; +use crate::metrics::names::RESPONSES_API_ENGINE_IAPI_TBT_DURATION_METRIC; +use crate::metrics::names::RESPONSES_API_ENGINE_IAPI_TTFT_DURATION_METRIC; +use crate::metrics::names::RESPONSES_API_ENGINE_SERVICE_TBT_DURATION_METRIC; +use crate::metrics::names::RESPONSES_API_ENGINE_SERVICE_TTFT_DURATION_METRIC; use crate::metrics::names::RESPONSES_API_INFERENCE_TIME_DURATION_METRIC; use crate::metrics::names::RESPONSES_API_OVERHEAD_DURATION_METRIC; use crate::metrics::names::SSE_EVENT_COUNT_METRIC; @@ -48,6 +52,10 @@ const RESPONSES_WEBSOCKET_TIMING_KIND: &str = "responsesapi.websocket_timing"; const RESPONSES_WEBSOCKET_TIMING_METRICS_FIELD: &str = "timing_metrics"; const RESPONSES_API_OVERHEAD_FIELD: &str = "responses_duration_excl_engine_and_client_tool_time_ms"; const RESPONSES_API_INFERENCE_FIELD: &str = "engine_service_total_ms"; +const RESPONSES_API_ENGINE_IAPI_TTFT_FIELD: &str = "engine_iapi_ttft_total_ms"; +const RESPONSES_API_ENGINE_SERVICE_TTFT_FIELD: &str = "engine_service_ttft_total_ms"; +const RESPONSES_API_ENGINE_IAPI_TBT_FIELD: &str = "engine_iapi_tbt_across_engine_calls_ms"; +const RESPONSES_API_ENGINE_SERVICE_TBT_FIELD: &str = "engine_service_tbt_across_engine_calls_ms"; impl OtelManager { #[allow(clippy::too_many_arguments)] @@ -674,6 +682,42 @@ impl OtelManager { if let Some(duration) = duration_from_ms_value(inference_value) { self.record_duration(RESPONSES_API_INFERENCE_TIME_DURATION_METRIC, duration, &[]); } + + let engine_iapi_ttft_value = + timing_metrics.and_then(|value| value.get(RESPONSES_API_ENGINE_IAPI_TTFT_FIELD)); + if let Some(duration) = duration_from_ms_value(engine_iapi_ttft_value) { + self.record_duration( + RESPONSES_API_ENGINE_IAPI_TTFT_DURATION_METRIC, + duration, + &[], + ); + } + + let engine_service_ttft_value = + timing_metrics.and_then(|value| value.get(RESPONSES_API_ENGINE_SERVICE_TTFT_FIELD)); + if let Some(duration) = duration_from_ms_value(engine_service_ttft_value) { + self.record_duration( + RESPONSES_API_ENGINE_SERVICE_TTFT_DURATION_METRIC, + duration, + &[], + ); + } + + let engine_iapi_tbt_value = + timing_metrics.and_then(|value| value.get(RESPONSES_API_ENGINE_IAPI_TBT_FIELD)); + if let Some(duration) = duration_from_ms_value(engine_iapi_tbt_value) { + self.record_duration(RESPONSES_API_ENGINE_IAPI_TBT_DURATION_METRIC, duration, &[]); + } + + let engine_service_tbt_value = + timing_metrics.and_then(|value| value.get(RESPONSES_API_ENGINE_SERVICE_TBT_FIELD)); + if let Some(duration) = duration_from_ms_value(engine_service_tbt_value) { + self.record_duration( + RESPONSES_API_ENGINE_SERVICE_TBT_DURATION_METRIC, + duration, + &[], + ); + } } fn responses_type(event: &ResponseEvent) -> String { diff --git a/codex-rs/otel/tests/suite/runtime_summary.rs b/codex-rs/otel/tests/suite/runtime_summary.rs index 6ef2927f43..79c6b258e7 100644 --- a/codex-rs/otel/tests/suite/runtime_summary.rs +++ b/codex-rs/otel/tests/suite/runtime_summary.rs @@ -67,7 +67,7 @@ fn runtime_metrics_summary_collects_tool_api_and_streaming_metrics() -> Result<( Option>, codex_api::ApiError, > = Ok(Some(Ok(Message::Text( - r#"{"type":"responsesapi.websocket_timing","timing_metrics":{"responses_duration_excl_engine_and_client_tool_time_ms":124,"engine_service_total_ms":457}}"# + r#"{"type":"responsesapi.websocket_timing","timing_metrics":{"responses_duration_excl_engine_and_client_tool_time_ms":124,"engine_service_total_ms":457,"engine_iapi_ttft_total_ms":211,"engine_service_ttft_total_ms":233,"engine_iapi_tbt_across_engine_calls_ms":377,"engine_service_tbt_across_engine_calls_ms":399}}"# .into(), )))); manager.record_websocket_event(&ws_timing_response, Duration::from_millis(20)); @@ -98,6 +98,10 @@ fn runtime_metrics_summary_collects_tool_api_and_streaming_metrics() -> Result<( }, responses_api_overhead_ms: 124, responses_api_inference_time_ms: 457, + responses_api_engine_iapi_ttft_ms: 211, + responses_api_engine_service_ttft_ms: 233, + responses_api_engine_iapi_tbt_ms: 377, + responses_api_engine_service_tbt_ms: 399, }; assert_eq!(summary, expected); diff --git a/codex-rs/tui/src/chatwidget.rs b/codex-rs/tui/src/chatwidget.rs index 419c0b8867..378a74c306 100644 --- a/codex-rs/tui/src/chatwidget.rs +++ b/codex-rs/tui/src/chatwidget.rs @@ -105,6 +105,7 @@ use codex_core::skills::model::SkillMetadata; #[cfg(target_os = "windows")] use codex_core::windows_sandbox::WindowsSandboxLevelExt; use codex_otel::OtelManager; +use codex_otel::RuntimeMetricsSummary; use codex_protocol::ThreadId; use codex_protocol::account::PlanType; use codex_protocol::approvals::ElicitationRequestEvent; @@ -587,7 +588,8 @@ pub(crate) struct ChatWidget { // This lets the separator show per-chunk work time (since the previous separator) rather than // the total task-running time reported by the status indicator. last_separator_elapsed_secs: Option, - + // Runtime metrics accumulated across delta snapshots for the active turn. + turn_runtime_metrics: RuntimeMetricsSummary, last_rendered_width: std::cell::Cell>, // Feedback sink for /feedback feedback: codex_feedback::CodexFeedback, @@ -931,6 +933,32 @@ impl ChatWidget { self.request_status_line_branch(cwd); } + fn collect_runtime_metrics_delta(&mut self) { + if let Some(delta) = self.otel_manager.runtime_metrics_summary() { + self.apply_runtime_metrics_delta(delta); + } + } + + fn apply_runtime_metrics_delta(&mut self, delta: RuntimeMetricsSummary) { + let should_log_timing = has_websocket_timing_metrics(delta); + self.turn_runtime_metrics.merge(delta); + if should_log_timing { + self.log_websocket_timing_totals(delta); + } + } + + fn log_websocket_timing_totals(&mut self, delta: RuntimeMetricsSummary) { + if let Some(label) = history_cell::runtime_metrics_label(delta.responses_api_summary()) { + self.add_plain_history_lines(vec![ + vec!["• ".dim(), format!("WebSocket timing: {label}").dark_gray()].into(), + ]); + } + } + + fn refresh_runtime_metrics(&mut self) { + self.collect_runtime_metrics_delta(); + } + fn restore_retry_status_header_if_present(&mut self) { if let Some(header) = self.retry_status_header.take() { self.set_status_header(header); @@ -1218,6 +1246,7 @@ impl ChatWidget { self.plan_item_active = false; self.adaptive_chunking.reset(); self.plan_stream_controller = None; + self.turn_runtime_metrics = RuntimeMetricsSummary::default(); self.otel_manager.reset_runtime_metrics(); self.bottom_pane.clear_quit_shortcut_hint(); self.quit_shortcut_expires_at = None; @@ -1241,17 +1270,25 @@ impl ChatWidget { } self.flush_unified_exec_wait_streak(); if !from_replay { - let runtime_metrics = self.otel_manager.runtime_metrics_summary(); - if runtime_metrics.is_some() { - let elapsed_seconds = self - .bottom_pane - .status_widget() - .map(super::status_indicator_widget::StatusIndicatorWidget::elapsed_seconds); + self.collect_runtime_metrics_delta(); + let runtime_metrics = + (!self.turn_runtime_metrics.is_empty()).then_some(self.turn_runtime_metrics); + let show_work_separator = self.needs_final_message_separator && self.had_work_activity; + if show_work_separator || runtime_metrics.is_some() { + let elapsed_seconds = if show_work_separator { + self.bottom_pane + .status_widget() + .map(super::status_indicator_widget::StatusIndicatorWidget::elapsed_seconds) + .map(|current| self.worked_elapsed_from(current)) + } else { + None + }; self.add_to_history(history_cell::FinalMessageSeparator::new( elapsed_seconds, runtime_metrics, )); } + self.turn_runtime_metrics = RuntimeMetricsSummary::default(); self.needs_final_message_separator = false; self.had_work_activity = false; self.request_status_line_branch_refresh(); @@ -2092,6 +2129,10 @@ impl ChatWidget { } self.app_event_tx.send(AppEvent::StopCommitAnimation); } + + if self.agent_turn_running { + self.refresh_runtime_metrics(); + } } fn flush_interrupt_queue(&mut self) { @@ -2534,6 +2575,7 @@ impl ChatWidget { plan_delta_buffer: String::new(), plan_item_active: false, last_separator_elapsed_secs: None, + turn_runtime_metrics: RuntimeMetricsSummary::default(), last_rendered_width: std::cell::Cell::new(None), feedback, feedback_audience, @@ -2695,6 +2737,7 @@ impl ChatWidget { needs_final_message_separator: false, had_work_activity: false, last_separator_elapsed_secs: None, + turn_runtime_metrics: RuntimeMetricsSummary::default(), last_rendered_width: std::cell::Cell::new(None), feedback, feedback_audience, @@ -2845,6 +2888,7 @@ impl ChatWidget { plan_delta_buffer: String::new(), plan_item_active: false, last_separator_elapsed_secs: None, + turn_runtime_metrics: RuntimeMetricsSummary::default(), last_rendered_width: std::cell::Cell::new(None), feedback, feedback_audience, @@ -3867,6 +3911,10 @@ impl ChatWidget { } } } + + if !from_replay && self.agent_turn_running { + self.refresh_runtime_metrics(); + } } fn on_entered_review_mode(&mut self, review: ReviewRequest, from_replay: bool) { @@ -4537,9 +4585,7 @@ impl ChatWidget { let mut header = ColumnRenderable::new(); header.push(Line::from("Select Personality".bold())); - header.push(Line::from( - "Choose a communication style for Codex. Disable in /experimental.".dim(), - )); + header.push(Line::from("Choose a communication style for Codex.".dim())); self.bottom_pane.show_selection_view(SelectionViewParams { header: Box::new(header), @@ -6703,6 +6749,15 @@ impl ChatWidget { } } +fn has_websocket_timing_metrics(summary: RuntimeMetricsSummary) -> bool { + summary.responses_api_overhead_ms > 0 + || summary.responses_api_inference_time_ms > 0 + || summary.responses_api_engine_iapi_ttft_ms > 0 + || summary.responses_api_engine_service_ttft_ms > 0 + || summary.responses_api_engine_iapi_tbt_ms > 0 + || summary.responses_api_engine_service_tbt_ms > 0 +} + impl Drop for ChatWidget { fn drop(&mut self) { self.stop_rate_limit_poller(); diff --git a/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__personality_selection_popup.snap b/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__personality_selection_popup.snap index 3cd887f2e4..d9a6e0a23c 100644 --- a/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__personality_selection_popup.snap +++ b/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__personality_selection_popup.snap @@ -3,7 +3,7 @@ source: tui/src/chatwidget/tests.rs expression: popup --- Select Personality - Choose a communication style for Codex. Disable in /experimental. + Choose a communication style for Codex. 1. Friendly Warm, collaborative, and helpful. › 2. Pragmatic (current) Concise, task-focused, and direct. diff --git a/codex-rs/tui/src/chatwidget/tests.rs b/codex-rs/tui/src/chatwidget/tests.rs index 2003279852..774c4370bd 100644 --- a/codex-rs/tui/src/chatwidget/tests.rs +++ b/codex-rs/tui/src/chatwidget/tests.rs @@ -62,6 +62,7 @@ use codex_core::protocol::UndoStartedEvent; use codex_core::protocol::ViewImageToolCallEvent; use codex_core::protocol::WarningEvent; use codex_otel::OtelManager; +use codex_otel::RuntimeMetricsSummary; use codex_protocol::ThreadId; use codex_protocol::account::PlanType; use codex_protocol::config_types::CollaborationMode; @@ -955,6 +956,7 @@ async fn make_chatwidget_manual( plan_delta_buffer: String::new(), plan_item_active: false, last_separator_elapsed_secs: None, + turn_runtime_metrics: RuntimeMetricsSummary::default(), last_rendered_width: std::cell::Cell::new(None), feedback: codex_feedback::CodexFeedback::new(), feedback_audience: FeedbackAudience::External, @@ -5061,6 +5063,50 @@ async fn stream_recovery_restores_previous_status_header() { assert!(chat.retry_status_header.is_none()); } +#[tokio::test] +async fn runtime_metrics_websocket_timing_logs_and_final_separator_sums_totals() { + let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(None).await; + chat.set_feature_enabled(Feature::RuntimeMetrics, true); + + chat.on_task_started(); + chat.apply_runtime_metrics_delta(RuntimeMetricsSummary { + responses_api_engine_iapi_ttft_ms: 120, + responses_api_engine_service_tbt_ms: 50, + ..RuntimeMetricsSummary::default() + }); + + let first_log = drain_insert_history(&mut rx) + .iter() + .map(|lines| lines_to_single_string(lines)) + .find(|line| line.contains("WebSocket timing:")) + .expect("expected websocket timing log"); + assert!(first_log.contains("TTFT: 120ms (iapi)")); + assert!(first_log.contains("TBT: 50ms (service)")); + + chat.apply_runtime_metrics_delta(RuntimeMetricsSummary { + responses_api_engine_iapi_ttft_ms: 80, + ..RuntimeMetricsSummary::default() + }); + + let second_log = drain_insert_history(&mut rx) + .iter() + .map(|lines| lines_to_single_string(lines)) + .find(|line| line.contains("WebSocket timing:")) + .expect("expected websocket timing log"); + assert!(second_log.contains("TTFT: 80ms (iapi)")); + + chat.on_task_complete(None, false); + let mut final_separator = None; + while let Ok(event) = rx.try_recv() { + if let AppEvent::InsertHistoryCell(cell) = event { + final_separator = Some(lines_to_single_string(&cell.display_lines(300))); + } + } + let final_separator = final_separator.expect("expected final separator with runtime metrics"); + assert!(final_separator.contains("TTFT: 80ms (iapi)")); + assert!(final_separator.contains("TBT: 50ms (service)")); +} + #[tokio::test] async fn multiple_agent_messages_in_single_turn_emit_multiple_headers() { let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(None).await; diff --git a/codex-rs/tui/src/history_cell.rs b/codex-rs/tui/src/history_cell.rs index b85ca8f6bb..26a91dc6e6 100644 --- a/codex-rs/tui/src/history_cell.rs +++ b/codex-rs/tui/src/history_cell.rs @@ -2172,7 +2172,7 @@ impl HistoryCell for FinalMessageSeparator { } } -fn runtime_metrics_label(summary: RuntimeMetricsSummary) -> Option { +pub(crate) fn runtime_metrics_label(summary: RuntimeMetricsSummary) -> Option { let mut parts = Vec::new(); if summary.tool_calls.count > 0 { let duration = format_duration_ms(summary.tool_calls.duration_ms); @@ -2221,6 +2221,34 @@ fn runtime_metrics_label(summary: RuntimeMetricsSummary) -> Option { let duration = format_duration_ms(summary.responses_api_inference_time_ms); parts.push(format!("Responses API inference: {duration}")); } + if summary.responses_api_engine_iapi_ttft_ms > 0 + || summary.responses_api_engine_service_ttft_ms > 0 + { + let mut ttft_parts = Vec::new(); + if summary.responses_api_engine_iapi_ttft_ms > 0 { + let duration = format_duration_ms(summary.responses_api_engine_iapi_ttft_ms); + ttft_parts.push(format!("{duration} (iapi)")); + } + if summary.responses_api_engine_service_ttft_ms > 0 { + let duration = format_duration_ms(summary.responses_api_engine_service_ttft_ms); + ttft_parts.push(format!("{duration} (service)")); + } + parts.push(format!("TTFT: {}", ttft_parts.join(" "))); + } + if summary.responses_api_engine_iapi_tbt_ms > 0 + || summary.responses_api_engine_service_tbt_ms > 0 + { + let mut tbt_parts = Vec::new(); + if summary.responses_api_engine_iapi_tbt_ms > 0 { + let duration = format_duration_ms(summary.responses_api_engine_iapi_tbt_ms); + tbt_parts.push(format!("{duration} (iapi)")); + } + if summary.responses_api_engine_service_tbt_ms > 0 { + let duration = format_duration_ms(summary.responses_api_engine_service_tbt_ms); + tbt_parts.push(format!("{duration} (service)")); + } + parts.push(format!("TBT: {}", tbt_parts.join(" "))); + } if parts.is_empty() { None } else { @@ -2391,9 +2419,13 @@ mod tests { }, responses_api_overhead_ms: 650, responses_api_inference_time_ms: 1_940, + responses_api_engine_iapi_ttft_ms: 410, + responses_api_engine_service_ttft_ms: 460, + responses_api_engine_iapi_tbt_ms: 1_180, + responses_api_engine_service_tbt_ms: 1_240, }; let cell = FinalMessageSeparator::new(Some(12), Some(summary)); - let rendered = render_lines(&cell.display_lines(300)); + let rendered = render_lines(&cell.display_lines(600)); assert_eq!(rendered.len(), 1); assert!(!rendered[0].contains("Worked for")); @@ -2404,6 +2436,8 @@ mod tests { assert!(rendered[0].contains("4 events received (1.2s)")); assert!(rendered[0].contains("Responses API overhead: 650ms")); assert!(rendered[0].contains("Responses API inference: 1.9s")); + assert!(rendered[0].contains("TTFT: 410ms (iapi) 460ms (service)")); + assert!(rendered[0].contains("TBT: 1.2s (iapi) 1.2s (service)")); } #[test] diff --git a/codex-rs/tui/src/tooltips.rs b/codex-rs/tui/src/tooltips.rs index 7365c16496..a9dbd82e2f 100644 --- a/codex-rs/tui/src/tooltips.rs +++ b/codex-rs/tui/src/tooltips.rs @@ -6,9 +6,13 @@ use rand::Rng; const ANNOUNCEMENT_TIP_URL: &str = "https://raw.githubusercontent.com/openai/codex/main/announcement_tip.toml"; +const IS_MACOS: bool = cfg!(target_os = "macos"); + const PAID_TOOLTIP: &str = "*New* Try the **Codex App** with 2x rate limits until *April 2nd*. Run 'codex app' or visit https://chatgpt.com/codex"; +const PAID_TOOLTIP_NON_MAC: &str = "*New* 2x rate limits until *April 2nd*."; const OTHER_TOOLTIP: &str = "*New* Build faster with the **Codex App**. Run 'codex app' or visit https://chatgpt.com/codex"; +const OTHER_TOOLTIP_NON_MAC: &str = "*New* Build faster with Codex."; const FREE_GO_TOOLTIP: &str = "*New* Codex is included in your plan for free through *March 2nd* – let’s build together."; @@ -18,7 +22,15 @@ lazy_static! { static ref TOOLTIPS: Vec<&'static str> = RAW_TOOLTIPS .lines() .map(str::trim) - .filter(|line| !line.is_empty() && !line.starts_with('#')) + .filter(|line| { + if line.is_empty() || line.starts_with('#') { + return false; + } + if !IS_MACOS && line.contains("codex app") { + return false; + } + true + }) .collect(); static ref ALL_TOOLTIPS: Vec<&'static str> = { let mut tips = Vec::new(); @@ -51,12 +63,24 @@ pub(crate) fn get_tooltip(plan: Option) -> Option { | Some(PlanType::Team) | Some(PlanType::Enterprise) | Some(PlanType::Pro) => { - return Some(PAID_TOOLTIP.to_string()); + let tooltip = if IS_MACOS { + PAID_TOOLTIP + } else { + PAID_TOOLTIP_NON_MAC + }; + return Some(tooltip.to_string()); } Some(PlanType::Go) | Some(PlanType::Free) => { return Some(FREE_GO_TOOLTIP.to_string()); } - _ => return Some(OTHER_TOOLTIP.to_string()), + _ => { + let tooltip = if IS_MACOS { + OTHER_TOOLTIP + } else { + OTHER_TOOLTIP_NON_MAC + }; + return Some(tooltip.to_string()); + } } } diff --git a/codex-rs/tui/src/updates.rs b/codex-rs/tui/src/updates.rs index 89fd6f32f6..58d025a4a1 100644 --- a/codex-rs/tui/src/updates.rs +++ b/codex-rs/tui/src/updates.rs @@ -56,8 +56,7 @@ struct VersionInfo { const VERSION_FILENAME: &str = "version.json"; // We use the latest version from the cask if installation is via homebrew - homebrew does not immediately pick up the latest release and can lag behind. -const HOMEBREW_CASK_URL: &str = - "https://raw.githubusercontent.com/Homebrew/homebrew-cask/HEAD/Casks/c/codex.rb"; +const HOMEBREW_CASK_API_URL: &str = "https://formulae.brew.sh/api/cask/codex.json"; const LATEST_RELEASE_URL: &str = "https://api.github.com/repos/openai/codex/releases/latest"; #[derive(Deserialize, Debug, Clone)] @@ -65,6 +64,11 @@ struct ReleaseInfo { tag_name: String, } +#[derive(Deserialize, Debug, Clone)] +struct HomebrewCaskInfo { + version: String, +} + fn version_filepath(config: &Config) -> PathBuf { config.codex_home.join(VERSION_FILENAME) } @@ -77,14 +81,14 @@ fn read_version_info(version_file: &Path) -> anyhow::Result { async fn check_for_update(version_file: &Path) -> anyhow::Result<()> { let latest_version = match update_action::get_update_action() { Some(UpdateAction::BrewUpgrade) => { - let cask_contents = create_client() - .get(HOMEBREW_CASK_URL) + let HomebrewCaskInfo { version } = create_client() + .get(HOMEBREW_CASK_API_URL) .send() .await? .error_for_status()? - .text() + .json::() .await?; - extract_version_from_cask(&cask_contents)? + version } _ => { let ReleaseInfo { @@ -123,18 +127,6 @@ fn is_newer(latest: &str, current: &str) -> Option { } } -fn extract_version_from_cask(cask_contents: &str) -> anyhow::Result { - cask_contents - .lines() - .find_map(|line| { - let line = line.trim(); - line.strip_prefix("version \"") - .and_then(|rest| rest.strip_suffix('"')) - .map(ToString::to_string) - }) - .ok_or_else(|| anyhow::anyhow!("Failed to find version in Homebrew cask file")) -} - fn extract_version_from_latest_tag(latest_tag_name: &str) -> anyhow::Result { latest_tag_name .strip_prefix("rust-v") @@ -190,16 +182,18 @@ mod tests { use super::*; #[test] - fn parses_version_from_cask_contents() { - let cask = r#" - cask "codex" do - version "0.55.0" - end - "#; - assert_eq!( - extract_version_from_cask(cask).expect("failed to parse version"), - "0.55.0" - ); + fn extract_version_from_brew_api_json() { + // + // https://formulae.brew.sh/api/cask/codex.json + let cask_json = r#"{ + "token": "codex", + "full_token": "codex", + "tap": "homebrew/cask", + "version": "0.96.0", + }"#; + let HomebrewCaskInfo { version } = serde_json::from_str::(cask_json) + .expect("failed to parse version from cask json"); + assert_eq!(version, "0.96.0"); } #[test]