diff --git a/.github/workflows/bazel.yml b/.github/workflows/bazel.yml index ef41330c46..3a32301dc3 100644 --- a/.github/workflows/bazel.yml +++ b/.github/workflows/bazel.yml @@ -131,7 +131,7 @@ jobs: key: ${{ steps.prepare_bazel.outputs.repository-cache-key }} clippy: - timeout-minutes: 30 + timeout-minutes: 60 strategy: fail-fast: false matrix: diff --git a/.github/workflows/rust-ci.yml b/.github/workflows/rust-ci.yml index 42a3ca8764..c507bf0cc4 100644 --- a/.github/workflows/rust-ci.yml +++ b/.github/workflows/rust-ci.yml @@ -82,7 +82,6 @@ jobs: - uses: taiki-e/install-action@44c6d64aa62cd779e873306675c7a58e86d6d532 # v2 with: tool: cargo-shear - version: 1.5.1 - name: cargo shear run: cargo shear @@ -145,10 +144,10 @@ jobs: include: - name: Linux runner: ubuntu-24.04 - timeout_minutes: 30 + timeout_minutes: 120 - name: macOS runner: macos-15-xlarge - timeout_minutes: 30 + timeout_minutes: 90 - name: Windows runner: windows-x64 timeout_minutes: 30 diff --git a/.github/workflows/sdk.yml b/.github/workflows/sdk.yml index 45c983ac1e..9dadb840b1 100644 --- a/.github/workflows/sdk.yml +++ b/.github/workflows/sdk.yml @@ -10,7 +10,7 @@ jobs: runs-on: group: codex-runners labels: codex-linux-x64 - timeout-minutes: 10 + timeout-minutes: 60 steps: - name: Checkout repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 diff --git a/codex-rs/.config/nextest.toml b/codex-rs/.config/nextest.toml index 86d00e4637..bfbf9cfc5b 100644 --- a/codex-rs/.config/nextest.toml +++ b/codex-rs/.config/nextest.toml @@ -44,3 +44,9 @@ test-group = 'core_apply_patch_cli_integration' # Serialize them to avoid exhausting Windows session/global desktop resources in CI. filter = 'package(codex-windows-sandbox) & test(legacy_)' test-group = 'windows_sandbox_legacy_sessions' + +[[profile.default.overrides]] +# Schema fixture generation can take longer than the default timeout on slower +# Windows runners when app-server protocol fixture sets grow. +filter = 'test(schema_fixtures_match_generated)' +slow-timeout = { period = "1m", terminate-after = 2 } diff --git a/codex-rs/README.md b/codex-rs/README.md index d219061a35..34e75c82de 100644 --- a/codex-rs/README.md +++ b/codex-rs/README.md @@ -50,7 +50,8 @@ You can enable notifications by configuring a script that is run whenever the ag ### `codex exec` to run Codex programmatically/non-interactively -To run Codex non-interactively, run `codex exec PROMPT` (you can also pass the prompt via `stdin`) and Codex will work on your task until it decides that it is done and exits. If you provide both a prompt argument and piped stdin, Codex appends stdin as a `` block after the prompt so patterns like `echo "my output" | codex exec "Summarize this concisely"` work naturally. Output is printed to the terminal directly. You can set the `RUST_LOG` environment variable to see more about what's going on. +To run Codex non-interactively, run `codex exec PROMPT` (you can also pass the prompt via `stdin`) and Codex will work on your task until it decides that it is done and exits. Output is printed to the terminal directly. You can set the `RUST_LOG` environment variable to see more about what's going on. +Use `codex exec --fork PROMPT` to fork an existing session without launching the interactive picker/UI. Use `codex exec --ephemeral ...` to run without persisting session rollout files to disk. ### Experimenting with the Codex Sandbox diff --git a/codex-rs/app-server-protocol/schema/json/ClientRequest.json b/codex-rs/app-server-protocol/schema/json/ClientRequest.json index 8f44e83830..d9de262e83 100644 --- a/codex-rs/app-server-protocol/schema/json/ClientRequest.json +++ b/codex-rs/app-server-protocol/schema/json/ClientRequest.json @@ -3427,6 +3427,17 @@ ], "type": "object" }, + "ThreadInputActivityParams": { + "properties": { + "threadId": { + "type": "string" + } + }, + "required": [ + "threadId" + ], + "type": "object" + }, "ThreadListCwdFilter": { "anyOf": [ { @@ -4688,6 +4699,30 @@ "title": "Thread/approveGuardianDeniedActionRequest", "type": "object" }, + { + "properties": { + "id": { + "$ref": "#/definitions/RequestId" + }, + "method": { + "enum": [ + "thread/inputActivity" + ], + "title": "Thread/inputActivityRequestMethod", + "type": "string" + }, + "params": { + "$ref": "#/definitions/ThreadInputActivityParams" + } + }, + "required": [ + "id", + "method", + "params" + ], + "title": "Thread/inputActivityRequest", + "type": "object" + }, { "properties": { "id": { diff --git a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json index 13c5672917..aadbd1380f 100644 --- a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json +++ b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json @@ -473,6 +473,30 @@ "title": "Thread/approveGuardianDeniedActionRequest", "type": "object" }, + { + "properties": { + "id": { + "$ref": "#/definitions/v2/RequestId" + }, + "method": { + "enum": [ + "thread/inputActivity" + ], + "title": "Thread/inputActivityRequestMethod", + "type": "string" + }, + "params": { + "$ref": "#/definitions/v2/ThreadInputActivityParams" + } + }, + "required": [ + "id", + "method", + "params" + ], + "title": "Thread/inputActivityRequest", + "type": "object" + }, { "properties": { "id": { @@ -14929,6 +14953,24 @@ "title": "ThreadInjectItemsResponse", "type": "object" }, + "ThreadInputActivityParams": { + "$schema": "http://json-schema.org/draft-07/schema#", + "properties": { + "threadId": { + "type": "string" + } + }, + "required": [ + "threadId" + ], + "title": "ThreadInputActivityParams", + "type": "object" + }, + "ThreadInputActivityResponse": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "ThreadInputActivityResponse", + "type": "object" + }, "ThreadItem": { "oneOf": [ { diff --git a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.v2.schemas.json b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.v2.schemas.json index 13b16e7f5b..edc34f0330 100644 --- a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.v2.schemas.json +++ b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.v2.schemas.json @@ -1179,6 +1179,30 @@ "title": "Thread/approveGuardianDeniedActionRequest", "type": "object" }, + { + "properties": { + "id": { + "$ref": "#/definitions/RequestId" + }, + "method": { + "enum": [ + "thread/inputActivity" + ], + "title": "Thread/inputActivityRequestMethod", + "type": "string" + }, + "params": { + "$ref": "#/definitions/ThreadInputActivityParams" + } + }, + "required": [ + "id", + "method", + "params" + ], + "title": "Thread/inputActivityRequest", + "type": "object" + }, { "properties": { "id": { @@ -12815,6 +12839,24 @@ "title": "ThreadInjectItemsResponse", "type": "object" }, + "ThreadInputActivityParams": { + "$schema": "http://json-schema.org/draft-07/schema#", + "properties": { + "threadId": { + "type": "string" + } + }, + "required": [ + "threadId" + ], + "title": "ThreadInputActivityParams", + "type": "object" + }, + "ThreadInputActivityResponse": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "ThreadInputActivityResponse", + "type": "object" + }, "ThreadItem": { "oneOf": [ { diff --git a/codex-rs/app-server-protocol/schema/json/v2/ThreadInputActivityParams.json b/codex-rs/app-server-protocol/schema/json/v2/ThreadInputActivityParams.json new file mode 100644 index 0000000000..c103da64c5 --- /dev/null +++ b/codex-rs/app-server-protocol/schema/json/v2/ThreadInputActivityParams.json @@ -0,0 +1,13 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "properties": { + "threadId": { + "type": "string" + } + }, + "required": [ + "threadId" + ], + "title": "ThreadInputActivityParams", + "type": "object" +} \ No newline at end of file diff --git a/codex-rs/app-server-protocol/schema/json/v2/ThreadInputActivityResponse.json b/codex-rs/app-server-protocol/schema/json/v2/ThreadInputActivityResponse.json new file mode 100644 index 0000000000..8b8957a868 --- /dev/null +++ b/codex-rs/app-server-protocol/schema/json/v2/ThreadInputActivityResponse.json @@ -0,0 +1,5 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "ThreadInputActivityResponse", + "type": "object" +} \ No newline at end of file diff --git a/codex-rs/app-server-protocol/schema/typescript/ClientRequest.ts b/codex-rs/app-server-protocol/schema/typescript/ClientRequest.ts index d1161cdba2..745887dd4f 100644 --- a/codex-rs/app-server-protocol/schema/typescript/ClientRequest.ts +++ b/codex-rs/app-server-protocol/schema/typescript/ClientRequest.ts @@ -57,6 +57,7 @@ import type { ThreadArchiveParams } from "./v2/ThreadArchiveParams"; import type { ThreadCompactStartParams } from "./v2/ThreadCompactStartParams"; import type { ThreadForkParams } from "./v2/ThreadForkParams"; import type { ThreadInjectItemsParams } from "./v2/ThreadInjectItemsParams"; +import type { ThreadInputActivityParams } from "./v2/ThreadInputActivityParams"; import type { ThreadListParams } from "./v2/ThreadListParams"; import type { ThreadLoadedListParams } from "./v2/ThreadLoadedListParams"; import type { ThreadMetadataUpdateParams } from "./v2/ThreadMetadataUpdateParams"; @@ -77,4 +78,4 @@ import type { WindowsSandboxSetupStartParams } from "./v2/WindowsSandboxSetupSta /** * Request from the client to the server. */ -export type ClientRequest ={ "method": "initialize", id: RequestId, params: InitializeParams, } | { "method": "thread/start", id: RequestId, params: ThreadStartParams, } | { "method": "thread/resume", id: RequestId, params: ThreadResumeParams, } | { "method": "thread/fork", id: RequestId, params: ThreadForkParams, } | { "method": "thread/archive", id: RequestId, params: ThreadArchiveParams, } | { "method": "thread/unsubscribe", id: RequestId, params: ThreadUnsubscribeParams, } | { "method": "thread/name/set", id: RequestId, params: ThreadSetNameParams, } | { "method": "thread/metadata/update", id: RequestId, params: ThreadMetadataUpdateParams, } | { "method": "thread/unarchive", id: RequestId, params: ThreadUnarchiveParams, } | { "method": "thread/compact/start", id: RequestId, params: ThreadCompactStartParams, } | { "method": "thread/shellCommand", id: RequestId, params: ThreadShellCommandParams, } | { "method": "thread/approveGuardianDeniedAction", id: RequestId, params: ThreadApproveGuardianDeniedActionParams, } | { "method": "thread/rollback", id: RequestId, params: ThreadRollbackParams, } | { "method": "thread/list", id: RequestId, params: ThreadListParams, } | { "method": "thread/loaded/list", id: RequestId, params: ThreadLoadedListParams, } | { "method": "thread/read", id: RequestId, params: ThreadReadParams, } | { "method": "thread/turns/list", id: RequestId, params: ThreadTurnsListParams, } | { "method": "thread/inject_items", id: RequestId, params: ThreadInjectItemsParams, } | { "method": "skills/list", id: RequestId, params: SkillsListParams, } | { "method": "marketplace/add", id: RequestId, params: MarketplaceAddParams, } | { "method": "marketplace/remove", id: RequestId, params: MarketplaceRemoveParams, } | { "method": "marketplace/upgrade", id: RequestId, params: MarketplaceUpgradeParams, } | { "method": "plugin/list", id: RequestId, params: PluginListParams, } | { "method": "plugin/read", id: RequestId, params: PluginReadParams, } | { "method": "app/list", id: RequestId, params: AppsListParams, } | { "method": "device/key/create", id: RequestId, params: DeviceKeyCreateParams, } | { "method": "device/key/public", id: RequestId, params: DeviceKeyPublicParams, } | { "method": "device/key/sign", id: RequestId, params: DeviceKeySignParams, } | { "method": "fs/readFile", id: RequestId, params: FsReadFileParams, } | { "method": "fs/writeFile", id: RequestId, params: FsWriteFileParams, } | { "method": "fs/createDirectory", id: RequestId, params: FsCreateDirectoryParams, } | { "method": "fs/getMetadata", id: RequestId, params: FsGetMetadataParams, } | { "method": "fs/readDirectory", id: RequestId, params: FsReadDirectoryParams, } | { "method": "fs/remove", id: RequestId, params: FsRemoveParams, } | { "method": "fs/copy", id: RequestId, params: FsCopyParams, } | { "method": "fs/watch", id: RequestId, params: FsWatchParams, } | { "method": "fs/unwatch", id: RequestId, params: FsUnwatchParams, } | { "method": "skills/config/write", id: RequestId, params: SkillsConfigWriteParams, } | { "method": "plugin/install", id: RequestId, params: PluginInstallParams, } | { "method": "plugin/uninstall", id: RequestId, params: PluginUninstallParams, } | { "method": "turn/start", id: RequestId, params: TurnStartParams, } | { "method": "turn/steer", id: RequestId, params: TurnSteerParams, } | { "method": "turn/interrupt", id: RequestId, params: TurnInterruptParams, } | { "method": "review/start", id: RequestId, params: ReviewStartParams, } | { "method": "model/list", id: RequestId, params: ModelListParams, } | { "method": "modelProvider/capabilities/read", id: RequestId, params: ModelProviderCapabilitiesReadParams, } | { "method": "experimentalFeature/list", id: RequestId, params: ExperimentalFeatureListParams, } | { "method": "experimentalFeature/enablement/set", id: RequestId, params: ExperimentalFeatureEnablementSetParams, } | { "method": "mcpServer/oauth/login", id: RequestId, params: McpServerOauthLoginParams, } | { "method": "config/mcpServer/reload", id: RequestId, params: undefined, } | { "method": "mcpServerStatus/list", id: RequestId, params: ListMcpServerStatusParams, } | { "method": "mcpServer/resource/read", id: RequestId, params: McpResourceReadParams, } | { "method": "mcpServer/tool/call", id: RequestId, params: McpServerToolCallParams, } | { "method": "windowsSandbox/setupStart", id: RequestId, params: WindowsSandboxSetupStartParams, } | { "method": "account/login/start", id: RequestId, params: LoginAccountParams, } | { "method": "account/login/cancel", id: RequestId, params: CancelLoginAccountParams, } | { "method": "account/logout", id: RequestId, params: undefined, } | { "method": "account/rateLimits/read", id: RequestId, params: undefined, } | { "method": "account/sendAddCreditsNudgeEmail", id: RequestId, params: SendAddCreditsNudgeEmailParams, } | { "method": "feedback/upload", id: RequestId, params: FeedbackUploadParams, } | { "method": "command/exec", id: RequestId, params: CommandExecParams, } | { "method": "command/exec/write", id: RequestId, params: CommandExecWriteParams, } | { "method": "command/exec/terminate", id: RequestId, params: CommandExecTerminateParams, } | { "method": "command/exec/resize", id: RequestId, params: CommandExecResizeParams, } | { "method": "config/read", id: RequestId, params: ConfigReadParams, } | { "method": "externalAgentConfig/detect", id: RequestId, params: ExternalAgentConfigDetectParams, } | { "method": "externalAgentConfig/import", id: RequestId, params: ExternalAgentConfigImportParams, } | { "method": "config/value/write", id: RequestId, params: ConfigValueWriteParams, } | { "method": "config/batchWrite", id: RequestId, params: ConfigBatchWriteParams, } | { "method": "configRequirements/read", id: RequestId, params: undefined, } | { "method": "account/read", id: RequestId, params: GetAccountParams, } | { "method": "getConversationSummary", id: RequestId, params: GetConversationSummaryParams, } | { "method": "gitDiffToRemote", id: RequestId, params: GitDiffToRemoteParams, } | { "method": "getAuthStatus", id: RequestId, params: GetAuthStatusParams, } | { "method": "fuzzyFileSearch", id: RequestId, params: FuzzyFileSearchParams, }; +export type ClientRequest ={ "method": "initialize", id: RequestId, params: InitializeParams, } | { "method": "thread/start", id: RequestId, params: ThreadStartParams, } | { "method": "thread/resume", id: RequestId, params: ThreadResumeParams, } | { "method": "thread/fork", id: RequestId, params: ThreadForkParams, } | { "method": "thread/archive", id: RequestId, params: ThreadArchiveParams, } | { "method": "thread/unsubscribe", id: RequestId, params: ThreadUnsubscribeParams, } | { "method": "thread/name/set", id: RequestId, params: ThreadSetNameParams, } | { "method": "thread/metadata/update", id: RequestId, params: ThreadMetadataUpdateParams, } | { "method": "thread/unarchive", id: RequestId, params: ThreadUnarchiveParams, } | { "method": "thread/compact/start", id: RequestId, params: ThreadCompactStartParams, } | { "method": "thread/shellCommand", id: RequestId, params: ThreadShellCommandParams, } | { "method": "thread/approveGuardianDeniedAction", id: RequestId, params: ThreadApproveGuardianDeniedActionParams, } | { "method": "thread/inputActivity", id: RequestId, params: ThreadInputActivityParams, } | { "method": "thread/rollback", id: RequestId, params: ThreadRollbackParams, } | { "method": "thread/list", id: RequestId, params: ThreadListParams, } | { "method": "thread/loaded/list", id: RequestId, params: ThreadLoadedListParams, } | { "method": "thread/read", id: RequestId, params: ThreadReadParams, } | { "method": "thread/turns/list", id: RequestId, params: ThreadTurnsListParams, } | { "method": "thread/inject_items", id: RequestId, params: ThreadInjectItemsParams, } | { "method": "skills/list", id: RequestId, params: SkillsListParams, } | { "method": "marketplace/add", id: RequestId, params: MarketplaceAddParams, } | { "method": "marketplace/remove", id: RequestId, params: MarketplaceRemoveParams, } | { "method": "marketplace/upgrade", id: RequestId, params: MarketplaceUpgradeParams, } | { "method": "plugin/list", id: RequestId, params: PluginListParams, } | { "method": "plugin/read", id: RequestId, params: PluginReadParams, } | { "method": "app/list", id: RequestId, params: AppsListParams, } | { "method": "device/key/create", id: RequestId, params: DeviceKeyCreateParams, } | { "method": "device/key/public", id: RequestId, params: DeviceKeyPublicParams, } | { "method": "device/key/sign", id: RequestId, params: DeviceKeySignParams, } | { "method": "fs/readFile", id: RequestId, params: FsReadFileParams, } | { "method": "fs/writeFile", id: RequestId, params: FsWriteFileParams, } | { "method": "fs/createDirectory", id: RequestId, params: FsCreateDirectoryParams, } | { "method": "fs/getMetadata", id: RequestId, params: FsGetMetadataParams, } | { "method": "fs/readDirectory", id: RequestId, params: FsReadDirectoryParams, } | { "method": "fs/remove", id: RequestId, params: FsRemoveParams, } | { "method": "fs/copy", id: RequestId, params: FsCopyParams, } | { "method": "fs/watch", id: RequestId, params: FsWatchParams, } | { "method": "fs/unwatch", id: RequestId, params: FsUnwatchParams, } | { "method": "skills/config/write", id: RequestId, params: SkillsConfigWriteParams, } | { "method": "plugin/install", id: RequestId, params: PluginInstallParams, } | { "method": "plugin/uninstall", id: RequestId, params: PluginUninstallParams, } | { "method": "turn/start", id: RequestId, params: TurnStartParams, } | { "method": "turn/steer", id: RequestId, params: TurnSteerParams, } | { "method": "turn/interrupt", id: RequestId, params: TurnInterruptParams, } | { "method": "review/start", id: RequestId, params: ReviewStartParams, } | { "method": "model/list", id: RequestId, params: ModelListParams, } | { "method": "modelProvider/capabilities/read", id: RequestId, params: ModelProviderCapabilitiesReadParams, } | { "method": "experimentalFeature/list", id: RequestId, params: ExperimentalFeatureListParams, } | { "method": "experimentalFeature/enablement/set", id: RequestId, params: ExperimentalFeatureEnablementSetParams, } | { "method": "mcpServer/oauth/login", id: RequestId, params: McpServerOauthLoginParams, } | { "method": "config/mcpServer/reload", id: RequestId, params: undefined, } | { "method": "mcpServerStatus/list", id: RequestId, params: ListMcpServerStatusParams, } | { "method": "mcpServer/resource/read", id: RequestId, params: McpResourceReadParams, } | { "method": "mcpServer/tool/call", id: RequestId, params: McpServerToolCallParams, } | { "method": "windowsSandbox/setupStart", id: RequestId, params: WindowsSandboxSetupStartParams, } | { "method": "account/login/start", id: RequestId, params: LoginAccountParams, } | { "method": "account/login/cancel", id: RequestId, params: CancelLoginAccountParams, } | { "method": "account/logout", id: RequestId, params: undefined, } | { "method": "account/rateLimits/read", id: RequestId, params: undefined, } | { "method": "account/sendAddCreditsNudgeEmail", id: RequestId, params: SendAddCreditsNudgeEmailParams, } | { "method": "feedback/upload", id: RequestId, params: FeedbackUploadParams, } | { "method": "command/exec", id: RequestId, params: CommandExecParams, } | { "method": "command/exec/write", id: RequestId, params: CommandExecWriteParams, } | { "method": "command/exec/terminate", id: RequestId, params: CommandExecTerminateParams, } | { "method": "command/exec/resize", id: RequestId, params: CommandExecResizeParams, } | { "method": "config/read", id: RequestId, params: ConfigReadParams, } | { "method": "externalAgentConfig/detect", id: RequestId, params: ExternalAgentConfigDetectParams, } | { "method": "externalAgentConfig/import", id: RequestId, params: ExternalAgentConfigImportParams, } | { "method": "config/value/write", id: RequestId, params: ConfigValueWriteParams, } | { "method": "config/batchWrite", id: RequestId, params: ConfigBatchWriteParams, } | { "method": "configRequirements/read", id: RequestId, params: undefined, } | { "method": "account/read", id: RequestId, params: GetAccountParams, } | { "method": "getConversationSummary", id: RequestId, params: GetConversationSummaryParams, } | { "method": "gitDiffToRemote", id: RequestId, params: GitDiffToRemoteParams, } | { "method": "getAuthStatus", id: RequestId, params: GetAuthStatusParams, } | { "method": "fuzzyFileSearch", id: RequestId, params: FuzzyFileSearchParams, }; diff --git a/codex-rs/app-server-protocol/schema/typescript/v2/ThreadInputActivityParams.ts b/codex-rs/app-server-protocol/schema/typescript/v2/ThreadInputActivityParams.ts new file mode 100644 index 0000000000..cbb59eea1f --- /dev/null +++ b/codex-rs/app-server-protocol/schema/typescript/v2/ThreadInputActivityParams.ts @@ -0,0 +1,5 @@ +// GENERATED CODE! DO NOT MODIFY BY HAND! + +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. + +export type ThreadInputActivityParams = { threadId: string, }; diff --git a/codex-rs/app-server-protocol/schema/typescript/v2/ThreadInputActivityResponse.ts b/codex-rs/app-server-protocol/schema/typescript/v2/ThreadInputActivityResponse.ts new file mode 100644 index 0000000000..1e5f2908fd --- /dev/null +++ b/codex-rs/app-server-protocol/schema/typescript/v2/ThreadInputActivityResponse.ts @@ -0,0 +1,5 @@ +// GENERATED CODE! DO NOT MODIFY BY HAND! + +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. + +export type ThreadInputActivityResponse = Record; diff --git a/codex-rs/app-server-protocol/schema/typescript/v2/index.ts b/codex-rs/app-server-protocol/schema/typescript/v2/index.ts index dcc46b0aac..422b08d330 100644 --- a/codex-rs/app-server-protocol/schema/typescript/v2/index.ts +++ b/codex-rs/app-server-protocol/schema/typescript/v2/index.ts @@ -343,6 +343,8 @@ export type { ThreadGoalStatus } from "./ThreadGoalStatus"; export type { ThreadGoalUpdatedNotification } from "./ThreadGoalUpdatedNotification"; export type { ThreadInjectItemsParams } from "./ThreadInjectItemsParams"; export type { ThreadInjectItemsResponse } from "./ThreadInjectItemsResponse"; +export type { ThreadInputActivityParams } from "./ThreadInputActivityParams"; +export type { ThreadInputActivityResponse } from "./ThreadInputActivityResponse"; export type { ThreadItem } from "./ThreadItem"; export type { ThreadListParams } from "./ThreadListParams"; export type { ThreadListResponse } from "./ThreadListResponse"; diff --git a/codex-rs/app-server-protocol/src/protocol/common.rs b/codex-rs/app-server-protocol/src/protocol/common.rs index d5e00e242b..cfa3357d5d 100644 --- a/codex-rs/app-server-protocol/src/protocol/common.rs +++ b/codex-rs/app-server-protocol/src/protocol/common.rs @@ -544,6 +544,11 @@ client_request_definitions! { serialization: thread_id(params.thread_id), response: v2::ThreadBackgroundTerminalsCleanResponse, }, + ThreadInputActivity => "thread/inputActivity" { + params: v2::ThreadInputActivityParams, + serialization: thread_id(params.thread_id), + response: v2::ThreadInputActivityResponse, + }, ThreadRollback => "thread/rollback" { params: v2::ThreadRollbackParams, serialization: thread_id(params.thread_id), diff --git a/codex-rs/app-server-protocol/src/protocol/thread_history.rs b/codex-rs/app-server-protocol/src/protocol/thread_history.rs index 019c9fa83e..04cabf4d4b 100644 --- a/codex-rs/app-server-protocol/src/protocol/thread_history.rs +++ b/codex-rs/app-server-protocol/src/protocol/thread_history.rs @@ -88,6 +88,11 @@ pub struct ThreadHistoryBuilder { next_item_index: i64, current_rollout_index: usize, next_rollout_index: usize, + // Current streams emit per-attempt spawn ids (`call_id`, `call_id#2`, ...); legacy rollouts + // reused the raw tool call_id for each retry, so replay still synthesizes stable per-attempt + // item ids when needed to preserve each attempt row. + current_spawn_attempt_ids: HashMap, + spawn_attempt_counts: HashMap, } impl Default for ThreadHistoryBuilder { @@ -104,6 +109,8 @@ impl ThreadHistoryBuilder { next_item_index: 1, current_rollout_index: 0, next_rollout_index: 0, + current_spawn_attempt_ids: HashMap::new(), + spawn_attempt_counts: HashMap::new(), } } @@ -232,7 +239,9 @@ impl ThreadHistoryBuilder { RolloutItem::EventMsg(event) => self.handle_event(event), RolloutItem::Compacted(payload) => self.handle_compacted(payload), RolloutItem::ResponseItem(item) => self.handle_response_item(item), - RolloutItem::TurnContext(_) | RolloutItem::SessionMeta(_) => {} + RolloutItem::TurnContext(_) + | RolloutItem::SessionMeta(_) + | RolloutItem::ForkReference(_) => {} } } @@ -601,8 +610,9 @@ impl ThreadHistoryBuilder { &mut self, payload: &codex_protocol::protocol::CollabAgentSpawnBeginEvent, ) { + let item_id = self.next_collab_spawn_attempt_item_id(&payload.call_id); let item = ThreadItem::CollabAgentToolCall { - id: payload.call_id.clone(), + id: item_id, tool: CollabAgentTool::SpawnAgent, status: CollabAgentToolCallStatus::InProgress, sender_thread_id: payload.sender_thread_id.to_string(), @@ -619,6 +629,10 @@ impl ThreadHistoryBuilder { &mut self, payload: &codex_protocol::protocol::CollabAgentSpawnEndEvent, ) { + let item_id = self + .current_spawn_attempt_ids + .remove(&payload.call_id) + .unwrap_or_else(|| payload.call_id.clone()); let has_receiver = payload.new_thread_id.is_some(); let status = match &payload.status { AgentStatus::Errored(_) | AgentStatus::NotFound => CollabAgentToolCallStatus::Failed, @@ -637,7 +651,7 @@ impl ThreadHistoryBuilder { None => (Vec::new(), HashMap::new()), }; self.upsert_item_in_current_turn(ThreadItem::CollabAgentToolCall { - id: payload.call_id.clone(), + id: item_id, tool: CollabAgentTool::SpawnAgent, status, sender_thread_id: payload.sender_thread_id.to_string(), @@ -984,6 +998,8 @@ impl ThreadHistoryBuilder { } fn finish_current_turn(&mut self) { + self.current_spawn_attempt_ids.clear(); + self.spawn_attempt_counts.clear(); if let Some(turn) = self.current_turn.take() { if turn.items.is_empty() && !turn.opened_explicitly && !turn.saw_compaction { return; @@ -1057,6 +1073,22 @@ impl ThreadHistoryBuilder { id } + fn next_collab_spawn_attempt_item_id(&mut self, call_id: &str) -> String { + let attempt_number = self + .spawn_attempt_counts + .entry(call_id.to_string()) + .and_modify(|count| *count += 1) + .or_insert(1); + let item_id = if *attempt_number == 1 { + call_id.to_string() + } else { + format!("{call_id}#{attempt_number}") + }; + self.current_spawn_attempt_ids + .insert(call_id.to_string(), item_id.clone()); + item_id + } + fn build_user_inputs(&self, payload: &UserMessageEvent) -> Vec { let mut content = Vec::new(); if !payload.message.trim().is_empty() { @@ -2825,6 +2857,157 @@ mod tests { ); } + #[test] + fn reconstructs_collab_spawn_end_without_receiver_as_failed_spawn_attempt() { + let sender_thread_id = ThreadId::try_from("00000000-0000-0000-0000-000000000001") + .expect("valid sender thread id"); + let events = vec![ + EventMsg::UserMessage(UserMessageEvent { + message: "spawn agent".into(), + images: None, + text_elements: Vec::new(), + local_images: Vec::new(), + }), + EventMsg::CollabAgentSpawnBegin(codex_protocol::protocol::CollabAgentSpawnBeginEvent { + call_id: "spawn-1".into(), + sender_thread_id, + prompt: "inspect the repo".into(), + model: "gpt-5.4-mini".into(), + reasoning_effort: codex_protocol::openai_models::ReasoningEffort::Medium, + }), + EventMsg::CollabAgentSpawnEnd(codex_protocol::protocol::CollabAgentSpawnEndEvent { + call_id: "spawn-1".into(), + sender_thread_id, + new_thread_id: None, + new_agent_nickname: None, + new_agent_role: None, + prompt: "inspect the repo".into(), + model: "gpt-5.4-mini".into(), + reasoning_effort: codex_protocol::openai_models::ReasoningEffort::Medium, + status: AgentStatus::PendingInit, + }), + ]; + + let items = events + .into_iter() + .map(RolloutItem::EventMsg) + .collect::>(); + let turns = build_turns_from_rollout_items(&items); + assert_eq!(turns.len(), 1); + assert_eq!(turns[0].items.len(), 2); + assert_eq!( + turns[0].items[1], + ThreadItem::CollabAgentToolCall { + id: "spawn-1".into(), + tool: CollabAgentTool::SpawnAgent, + status: CollabAgentToolCallStatus::Failed, + sender_thread_id: "00000000-0000-0000-0000-000000000001".into(), + receiver_thread_ids: Vec::new(), + prompt: Some("inspect the repo".into()), + model: Some("gpt-5.4-mini".into()), + reasoning_effort: Some(codex_protocol::openai_models::ReasoningEffort::Medium), + agents_states: HashMap::new(), + } + ); + } + + #[test] + fn reconstructs_collab_spawn_retries_as_distinct_attempt_items() { + let sender_thread_id = ThreadId::try_from("00000000-0000-0000-0000-000000000001") + .expect("valid sender thread id"); + let spawned_thread_id = ThreadId::try_from("00000000-0000-0000-0000-000000000002") + .expect("valid receiver thread id"); + let events = vec![ + EventMsg::UserMessage(UserMessageEvent { + message: "spawn agent".into(), + images: None, + text_elements: Vec::new(), + local_images: Vec::new(), + }), + EventMsg::CollabAgentSpawnBegin(codex_protocol::protocol::CollabAgentSpawnBeginEvent { + call_id: "spawn-1".into(), + sender_thread_id, + prompt: "inspect the repo".into(), + model: "gpt-5.4-mini".into(), + reasoning_effort: codex_protocol::openai_models::ReasoningEffort::Low, + }), + EventMsg::CollabAgentSpawnEnd(codex_protocol::protocol::CollabAgentSpawnEndEvent { + call_id: "spawn-1".into(), + sender_thread_id, + new_thread_id: None, + new_agent_nickname: None, + new_agent_role: None, + prompt: "inspect the repo".into(), + model: "gpt-5.4-mini".into(), + reasoning_effort: codex_protocol::openai_models::ReasoningEffort::Low, + status: AgentStatus::Errored("insufficient_quota".into()), + }), + EventMsg::CollabAgentSpawnBegin(codex_protocol::protocol::CollabAgentSpawnBeginEvent { + call_id: "spawn-1".into(), + sender_thread_id, + prompt: "inspect the repo".into(), + model: "gpt-5".into(), + reasoning_effort: codex_protocol::openai_models::ReasoningEffort::Medium, + }), + EventMsg::CollabAgentSpawnEnd(codex_protocol::protocol::CollabAgentSpawnEndEvent { + call_id: "spawn-1".into(), + sender_thread_id, + new_thread_id: Some(spawned_thread_id), + new_agent_nickname: Some("Scout".into()), + new_agent_role: Some("explorer".into()), + prompt: "inspect the repo".into(), + model: "gpt-5".into(), + reasoning_effort: codex_protocol::openai_models::ReasoningEffort::Medium, + status: AgentStatus::Running, + }), + ]; + + let items = events + .into_iter() + .map(RolloutItem::EventMsg) + .collect::>(); + let turns = build_turns_from_rollout_items(&items); + + assert_eq!(turns.len(), 1); + assert_eq!(turns[0].items.len(), 3); + assert_eq!( + turns[0].items[1], + ThreadItem::CollabAgentToolCall { + id: "spawn-1".into(), + tool: CollabAgentTool::SpawnAgent, + status: CollabAgentToolCallStatus::Failed, + sender_thread_id: "00000000-0000-0000-0000-000000000001".into(), + receiver_thread_ids: Vec::new(), + prompt: Some("inspect the repo".into()), + model: Some("gpt-5.4-mini".into()), + reasoning_effort: Some(codex_protocol::openai_models::ReasoningEffort::Low), + agents_states: HashMap::new(), + } + ); + assert_eq!( + turns[0].items[2], + ThreadItem::CollabAgentToolCall { + id: "spawn-1#2".into(), + tool: CollabAgentTool::SpawnAgent, + status: CollabAgentToolCallStatus::Completed, + sender_thread_id: "00000000-0000-0000-0000-000000000001".into(), + receiver_thread_ids: vec!["00000000-0000-0000-0000-000000000002".into()], + prompt: Some("inspect the repo".into()), + model: Some("gpt-5".into()), + reasoning_effort: Some(codex_protocol::openai_models::ReasoningEffort::Medium), + agents_states: [( + "00000000-0000-0000-0000-000000000002".into(), + CollabAgentState { + status: crate::protocol::v2::CollabAgentStatus::Running, + message: None, + }, + )] + .into_iter() + .collect(), + } + ); + } + #[test] fn reconstructs_interrupted_send_input_as_completed_collab_call() { // `send_input(interrupt=true)` first stops the child's active turn, then redirects it with diff --git a/codex-rs/app-server-protocol/src/protocol/v2.rs b/codex-rs/app-server-protocol/src/protocol/v2.rs index 817ab41a4d..5d3d03bb0a 100644 --- a/codex-rs/app-server-protocol/src/protocol/v2.rs +++ b/codex-rs/app-server-protocol/src/protocol/v2.rs @@ -4105,6 +4105,18 @@ pub struct ThreadBackgroundTerminalsCleanParams { #[ts(export_to = "v2/")] pub struct ThreadBackgroundTerminalsCleanResponse {} +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] +#[serde(rename_all = "camelCase")] +#[ts(export_to = "v2/")] +pub struct ThreadInputActivityParams { + pub thread_id: String, +} + +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] +#[serde(rename_all = "camelCase")] +#[ts(export_to = "v2/")] +pub struct ThreadInputActivityResponse {} + #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] #[serde(rename_all = "camelCase")] #[ts(export_to = "v2/")] @@ -8716,6 +8728,38 @@ mod tests { assert_eq!(decoded, response); } + #[test] + fn thread_input_activity_params_round_trip() { + let params = ThreadInputActivityParams { + thread_id: "thr_123".to_string(), + }; + + let value = serde_json::to_value(¶ms).expect("serialize thread/inputActivity params"); + assert_eq!( + value, + json!({ + "threadId": "thr_123", + }) + ); + + let decoded = serde_json::from_value::(value) + .expect("deserialize thread/inputActivity params"); + assert_eq!(decoded, params); + } + + #[test] + fn thread_input_activity_response_round_trip() { + let response = ThreadInputActivityResponse {}; + + let value = + serde_json::to_value(&response).expect("serialize thread/inputActivity response"); + assert_eq!(value, json!({})); + + let decoded = serde_json::from_value::(value) + .expect("deserialize thread/inputActivity response"); + assert_eq!(decoded, response); + } + #[test] fn fs_changed_notification_round_trips() { let notification = FsChangedNotification { diff --git a/codex-rs/app-server/README.md b/codex-rs/app-server/README.md index fd3b70bbd1..9fb4d0275b 100644 --- a/codex-rs/app-server/README.md +++ b/codex-rs/app-server/README.md @@ -165,6 +165,7 @@ Example with notification opt-out: - `thread/compact/start` — trigger conversation history compaction for a thread; returns `{}` immediately while progress streams through standard turn/item notifications. - `thread/shellCommand` — run a user-initiated `!` shell command against a thread; this runs unsandboxed with full access rather than inheriting the thread sandbox policy. Returns `{}` immediately while progress streams through standard turn/item notifications and any active turn receives the formatted output in its message stream. - `thread/backgroundTerminals/clean` — terminate all running background terminals for a thread (experimental; requires `capabilities.experimentalApi`); returns `{}` when the cleanup request is accepted. +- `thread/inputActivity` — record owner-side draft activity for a thread without starting or steering a turn; returns `{}` and is primarily useful for watchdog idle timers while the user is still typing. - `thread/rollback` — drop the last N turns from the agent’s in-memory context and persist a rollback marker in the rollout so future resumes see the pruned history; returns the updated `thread` (with `turns` populated) on success. - `turn/start` — add user input to a thread and begin Codex generation; responds with the initial `turn` object and streams `turn/started`, `item/*`, and `turn/completed` notifications. Prefer `permissionProfile` for permission overrides; the legacy `sandboxPolicy` field is still accepted but cannot be combined with `permissionProfile`. For `collaborationMode`, `settings.developer_instructions: null` means "use built-in instructions for the selected mode". - `thread/inject_items` — append raw Responses API items to a loaded thread’s model-visible history without starting a user turn; returns `{}` on success. @@ -827,6 +828,18 @@ not emit `turn/started` and does not accept turn context overrides. active turn, or the active turn kind does not accept same-turn steering (for example review or manual compaction), the request fails with an `invalid request` error. +### Example: Mark owner draft activity + +Use `thread/inputActivity` to keep watchdog idle timing honest while the user is still editing a +draft in the client. + +```json +{ "method": "thread/inputActivity", "id": 33, "params": { + "threadId": "thr_123" +} } +{ "id": 33, "result": {} } +``` + ### Example: Request a code review Use `review/start` to run Codex’s reviewer on the currently checked-out project. The request takes the thread id plus a `target` describing what should be reviewed: diff --git a/codex-rs/app-server/src/codex_message_processor.rs b/codex-rs/app-server/src/codex_message_processor.rs index d250959d8d..fee8046c7d 100644 --- a/codex-rs/app-server/src/codex_message_processor.rs +++ b/codex-rs/app-server/src/codex_message_processor.rs @@ -166,6 +166,8 @@ use codex_app_server_protocol::ThreadIncrementElicitationParams; use codex_app_server_protocol::ThreadIncrementElicitationResponse; use codex_app_server_protocol::ThreadInjectItemsParams; use codex_app_server_protocol::ThreadInjectItemsResponse; +use codex_app_server_protocol::ThreadInputActivityParams; +use codex_app_server_protocol::ThreadInputActivityResponse; use codex_app_server_protocol::ThreadItem; use codex_app_server_protocol::ThreadListCwdFilter; use codex_app_server_protocol::ThreadListParams; @@ -260,6 +262,7 @@ use codex_core::find_archived_thread_path_by_id_str; use codex_core::find_thread_name_by_id; use codex_core::find_thread_names_by_ids; use codex_core::find_thread_path_by_id_str; +use codex_core::materialize_rollout_items_for_replay; use codex_core::path_utils; use codex_core::plugins::PluginInstallError as CorePluginInstallError; use codex_core::plugins::PluginInstallRequest; @@ -386,6 +389,7 @@ use codex_utils_absolute_path::AbsolutePathBuf; use codex_utils_pty::DEFAULT_OUTPUT_BYTES_CAP; use std::collections::HashMap; use std::collections::HashSet; +use std::ffi::OsStr; use std::io::Error as IoError; use std::path::Path; use std::path::PathBuf; @@ -1052,6 +1056,10 @@ impl CodexMessageProcessor { ) .await; } + ClientRequest::ThreadInputActivity { request_id, params } => { + self.thread_input_activity(to_connection_request_id(request_id), params) + .await; + } ClientRequest::ThreadRollback { request_id, params } => { self.thread_rollback(to_connection_request_id(request_id), params) .await; @@ -3638,6 +3646,40 @@ impl CodexMessageProcessor { self.outgoing.send_result(request_id, result).await; } + async fn thread_input_activity( + &self, + request_id: ConnectionRequestId, + params: ThreadInputActivityParams, + ) { + let ThreadInputActivityParams { thread_id } = params; + + let (_, thread) = match self.load_thread(&thread_id).await { + Ok(v) => v, + Err(error) => { + self.outgoing.send_error(request_id, error).await; + return; + } + }; + + match self + .submit_core_op(&request_id, thread.as_ref(), Op::NoteOwnerActivity) + .await + { + Ok(_) => { + self.outgoing + .send_response(request_id, ThreadInputActivityResponse {}) + .await; + } + Err(err) => { + self.send_internal_error( + request_id, + format!("failed to record thread input activity: {err}"), + ) + .await; + } + } + } + async fn thread_shell_command( &self, request_id: ConnectionRequestId, @@ -4654,7 +4696,28 @@ impl CodexMessageProcessor { include_archived: true, include_history, }; - self.thread_store.read_thread(params).await + match self.thread_store.read_thread(params).await { + Ok(thread) if thread.archived_at.is_some() => { + let thread_id = thread.thread_id; + match self + .thread_store + .unarchive_thread(StoreArchiveThreadParams { thread_id }) + .await + { + Ok(_) => { + self.thread_store + .read_thread(StoreReadThreadParams { + thread_id, + include_archived: false, + include_history, + }) + .await + } + Err(err) => Err(err), + } + } + result => result, + } }; result.map_err(thread_store_resume_read_error) @@ -9287,13 +9350,17 @@ pub(crate) async fn read_summary_from_rollout( .unwrap_or_else(|| fallback_provider.to_string()); let git_info = git.as_ref().map(map_git_info); let updated_at = updated_at.or_else(|| timestamp.clone()); + let preview = read_rollout_items_from_rollout(path) + .await + .map(|items| preview_from_rollout_items(&items)) + .unwrap_or_default(); Ok(ConversationSummary { conversation_id: session_meta.id, timestamp, updated_at, path: path.to_path_buf(), - preview: String::new(), + preview, model_provider, cwd: session_meta.cwd, cli_version: session_meta.cli_version, @@ -9311,6 +9378,10 @@ pub(crate) async fn read_rollout_items_from_rollout( InitialHistory::Resumed(resumed) => resumed.history, }; + if let Some(codex_home) = codex_home_from_rollout_path(path) { + return Ok(materialize_rollout_items_for_replay(codex_home, &items).await); + } + Ok(items) } @@ -9395,6 +9466,17 @@ fn preview_from_rollout_items(items: &[RolloutItem]) -> String { .unwrap_or_default() } +fn codex_home_from_rollout_path(path: &Path) -> Option<&Path> { + path.ancestors().find_map(|ancestor| { + let name = ancestor.file_name().and_then(OsStr::to_str)?; + if name == codex_core::SESSIONS_SUBDIR || name == codex_core::ARCHIVED_SESSIONS_SUBDIR { + ancestor.parent() + } else { + None + } + }) +} + fn with_thread_spawn_agent_metadata( source: codex_protocol::protocol::SessionSource, agent_nickname: Option, @@ -10223,13 +10305,19 @@ mod tests { model: "gpt-5".to_string(), model_provider_id: "openai".to_string(), service_tier: Some(codex_protocol::config_types::ServiceTier::Flex), + plan_mode_reasoning_effort: None, + model_verbosity: None, + model_context_window: None, + model_auto_compact_token_limit: None, approval_policy: codex_protocol::protocol::AskForApproval::OnRequest, approvals_reviewer: codex_protocol::config_types::ApprovalsReviewer::User, permission_profile: codex_protocol::models::PermissionProfile::Disabled, cwd, ephemeral: false, + agent_use_function_call_inbox: false, reasoning_effort: None, personality: None, + active_profile: None, session_source: SessionSource::Cli, }; diff --git a/codex-rs/app-server/src/message_processor.rs b/codex-rs/app-server/src/message_processor.rs index 1a28f8e278..ec75a3bedd 100644 --- a/codex-rs/app-server/src/message_processor.rs +++ b/codex-rs/app-server/src/message_processor.rs @@ -290,6 +290,8 @@ impl MessageProcessor { config.as_ref(), auth_manager.clone(), session_source, + config.model_catalog.clone(), + config.custom_models.clone(), CollaborationModesConfig { default_mode_request_user_input: config .features diff --git a/codex-rs/app-server/tests/common/models_cache.rs b/codex-rs/app-server/tests/common/models_cache.rs index 3b4a58a7ab..f972b6fd85 100644 --- a/codex-rs/app-server/tests/common/models_cache.rs +++ b/codex-rs/app-server/tests/common/models_cache.rs @@ -16,6 +16,7 @@ use std::path::Path; fn preset_to_info(preset: &ModelPreset, priority: i32) -> ModelInfo { ModelInfo { slug: preset.id.clone(), + request_model: None, display_name: preset.display_name.clone(), description: Some(preset.description.clone()), default_reasoning_level: Some(preset.default_reasoning_effort), diff --git a/codex-rs/app-server/tests/suite/v2/initialize.rs b/codex-rs/app-server/tests/suite/v2/initialize.rs index 165160468f..bd1ec95d1e 100644 --- a/codex-rs/app-server/tests/suite/v2/initialize.rs +++ b/codex-rs/app-server/tests/suite/v2/initialize.rs @@ -20,11 +20,19 @@ use core_test_support::fs_wait; use pretty_assertions::assert_eq; use serde_json::Value; use std::path::Path; -use std::time::Duration; use tempfile::TempDir; use tokio::time::timeout; -const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10); +// This covers debug app-server process startup before the first JSON-RPC response, +// not expected steady-state request latency. +#[cfg(windows)] +const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(25); +#[cfg(not(windows))] +const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(20); +#[cfg(windows)] +const DEFAULT_NOTIFY_FILE_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(25); +#[cfg(not(windows))] +const DEFAULT_NOTIFY_FILE_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(5); #[tokio::test] async fn initialize_uses_client_info_name_as_originator() -> Result<()> { @@ -270,9 +278,9 @@ async fn turn_start_notify_payload_includes_initialize_client_name() -> Result<( mcp.read_stream_until_notification_message("turn/completed"), ) .await??; - - fs_wait::wait_for_path_exists(¬ify_file, Duration::from_secs(5)).await?; - let payload_raw = tokio::fs::read_to_string(¬ify_file).await?; + let notify_file = Path::new(¬ify_file); + fs_wait::wait_for_path_exists(notify_file, DEFAULT_NOTIFY_FILE_TIMEOUT).await?; + let payload_raw = tokio::fs::read_to_string(notify_file).await?; let payload: Value = serde_json::from_str(&payload_raw)?; assert_eq!(payload["client"], "xcode"); diff --git a/codex-rs/app-server/tests/suite/v2/thread_read.rs b/codex-rs/app-server/tests/suite/v2/thread_read.rs index 8e0e253ac0..ecc1bdf769 100644 --- a/codex-rs/app-server/tests/suite/v2/thread_read.rs +++ b/codex-rs/app-server/tests/suite/v2/thread_read.rs @@ -10,6 +10,8 @@ use codex_app_server_protocol::JSONRPCResponse; use codex_app_server_protocol::RequestId; use codex_app_server_protocol::SessionSource; use codex_app_server_protocol::SortDirection; +use codex_app_server_protocol::ThreadArchiveParams; +use codex_app_server_protocol::ThreadArchiveResponse; use codex_app_server_protocol::ThreadForkParams; use codex_app_server_protocol::ThreadForkResponse; use codex_app_server_protocol::ThreadItem; @@ -27,6 +29,8 @@ use codex_app_server_protocol::ThreadStartResponse; use codex_app_server_protocol::ThreadStatus; use codex_app_server_protocol::ThreadTurnsListParams; use codex_app_server_protocol::ThreadTurnsListResponse; +use codex_app_server_protocol::ThreadUnarchiveParams; +use codex_app_server_protocol::ThreadUnarchiveResponse; use codex_app_server_protocol::TurnStartParams; use codex_app_server_protocol::TurnStartResponse; use codex_app_server_protocol::TurnStatus; @@ -413,6 +417,150 @@ async fn thread_read_returns_forked_from_id_for_forked_threads() -> Result<()> { Ok(()) } +#[tokio::test] +async fn thread_read_include_turns_keeps_fork_history_after_parent_archive_and_unarchive() +-> Result<()> { + let server = create_mock_responses_server_repeating_assistant("Done").await; + let codex_home = TempDir::new()?; + create_config_toml(codex_home.path(), &server.uri())?; + + let mut mcp = McpProcess::new(codex_home.path()).await?; + timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; + + let start_id = mcp + .send_thread_start_request(ThreadStartParams { + model: Some("mock-model".to_string()), + ..Default::default() + }) + .await?; + let start_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(start_id)), + ) + .await??; + let ThreadStartResponse { thread: parent, .. } = + to_response::(start_resp)?; + + let turn_start_id = mcp + .send_turn_start_request(TurnStartParams { + thread_id: parent.id.clone(), + input: vec![UserInput::Text { + text: "parent message".to_string(), + text_elements: Vec::new(), + }], + ..Default::default() + }) + .await?; + let turn_start_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(turn_start_id)), + ) + .await??; + let _: TurnStartResponse = to_response::(turn_start_resp)?; + timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_notification_message("turn/completed"), + ) + .await??; + + let fork_id = mcp + .send_thread_fork_request(ThreadForkParams { + thread_id: parent.id.clone(), + ..Default::default() + }) + .await?; + let fork_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(fork_id)), + ) + .await??; + let ThreadForkResponse { thread: child, .. } = to_response::(fork_resp)?; + + let read_child_id = mcp + .send_thread_read_request(ThreadReadParams { + thread_id: child.id.clone(), + include_turns: true, + }) + .await?; + let read_child_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(read_child_id)), + ) + .await??; + let ThreadReadResponse { + thread: child_before_archive, + } = to_response::(read_child_resp)?; + assert_eq!(child_before_archive.turns.len(), 1); + + let archive_id = mcp + .send_thread_archive_request(ThreadArchiveParams { + thread_id: parent.id.clone(), + }) + .await?; + let archive_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(archive_id)), + ) + .await??; + let _: ThreadArchiveResponse = to_response::(archive_resp)?; + timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_notification_message("thread/archived"), + ) + .await??; + + let read_child_id = mcp + .send_thread_read_request(ThreadReadParams { + thread_id: child.id.clone(), + include_turns: true, + }) + .await?; + let read_child_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(read_child_id)), + ) + .await??; + let ThreadReadResponse { + thread: child_after_archive, + } = to_response::(read_child_resp)?; + assert_eq!(child_after_archive.turns, child_before_archive.turns); + + let unarchive_id = mcp + .send_thread_unarchive_request(ThreadUnarchiveParams { + thread_id: parent.id, + }) + .await?; + let unarchive_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(unarchive_id)), + ) + .await??; + let _: ThreadUnarchiveResponse = to_response::(unarchive_resp)?; + timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_notification_message("thread/unarchived"), + ) + .await??; + + let read_child_id = mcp + .send_thread_read_request(ThreadReadParams { + thread_id: child.id, + include_turns: true, + }) + .await?; + let read_child_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(read_child_id)), + ) + .await??; + let ThreadReadResponse { + thread: child_after_unarchive, + } = to_response::(read_child_resp)?; + assert_eq!(child_after_unarchive.turns, child_before_archive.turns); + + Ok(()) +} + #[tokio::test] async fn thread_read_loaded_thread_returns_precomputed_path_before_materialization() -> Result<()> { let server = create_mock_responses_server_repeating_assistant("Done").await; @@ -670,6 +818,62 @@ async fn thread_read_include_turns_rejects_unmaterialized_loaded_thread() -> Res Ok(()) } +#[tokio::test] +async fn thread_read_loaded_ephemeral_thread_ignores_unrelated_rollout_mentions() -> Result<()> { + let server = create_mock_responses_server_repeating_assistant("Done").await; + let codex_home = TempDir::new()?; + create_config_toml(codex_home.path(), &server.uri())?; + + let mut mcp = McpProcess::new(codex_home.path()).await?; + timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; + + let start_id = mcp + .send_thread_start_request(ThreadStartParams { + model: Some("mock-model".to_string()), + ephemeral: Some(true), + ..Default::default() + }) + .await?; + let start_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(start_id)), + ) + .await??; + let ThreadStartResponse { thread, .. } = to_response::(start_resp)?; + + let unrelated_preview = thread.id.clone(); + let _unrelated_rollout_id = create_fake_rollout_with_text_elements( + codex_home.path(), + "2025-01-05T13-00-00", + "2025-01-05T13:00:00Z", + &unrelated_preview, + vec![], + Some("mock_provider"), + /*git_info*/ None, + )?; + + let read_id = mcp + .send_thread_read_request(ThreadReadParams { + thread_id: thread.id.clone(), + include_turns: false, + }) + .await?; + let read_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(read_id)), + ) + .await??; + let ThreadReadResponse { thread: read } = to_response::(read_resp)?; + + assert_eq!(read.id, thread.id); + assert!(read.ephemeral); + assert_eq!(read.path, None); + assert!(read.preview.is_empty()); + assert_eq!(read.status, ThreadStatus::Idle); + + Ok(()) +} + #[tokio::test] async fn thread_read_reports_system_error_idle_flag_after_failed_turn() -> Result<()> { let server = responses::start_mock_server().await; diff --git a/codex-rs/app-server/tests/suite/v2/thread_resume.rs b/codex-rs/app-server/tests/suite/v2/thread_resume.rs index d9f5f039de..99933f22f3 100644 --- a/codex-rs/app-server/tests/suite/v2/thread_resume.rs +++ b/codex-rs/app-server/tests/suite/v2/thread_resume.rs @@ -717,7 +717,6 @@ async fn thread_resume_by_path_uses_remote_thread_store_error() -> Result<()> { let server = create_mock_responses_server_repeating_assistant("Done").await; let codex_home = TempDir::new()?; create_config_toml_with_remote_thread_store(codex_home.path(), &server.uri())?; - let mut mcp = McpProcess::new(codex_home.path()).await?; timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; @@ -743,6 +742,62 @@ async fn thread_resume_by_path_uses_remote_thread_store_error() -> Result<()> { Ok(()) } +#[tokio::test] +async fn thread_resume_unarchives_archived_rollout() -> Result<()> { + let server = create_mock_responses_server_repeating_assistant("Done").await; + let codex_home = TempDir::new()?; + create_config_toml(codex_home.path(), &server.uri())?; + + let filename_ts = "2025-01-05T12-00-00"; + let conversation_id = create_fake_rollout_with_text_elements( + codex_home.path(), + filename_ts, + "2025-01-05T12:00:00Z", + "Saved user message", + Vec::new(), + Some("mock_provider"), + /*git_info*/ None, + )?; + let active_rollout_path = rollout_path(codex_home.path(), filename_ts, &conversation_id); + let archived_rollout_path = codex_home.path().join("archived_sessions/2025/01/05").join( + active_rollout_path + .file_name() + .expect("active rollout file name"), + ); + std::fs::create_dir_all( + archived_rollout_path + .parent() + .expect("archived rollout parent directory"), + )?; + std::fs::rename(&active_rollout_path, &archived_rollout_path)?; + + let mut mcp = McpProcess::new(codex_home.path()).await?; + timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; + + let resume_id = mcp + .send_thread_resume_request(ThreadResumeParams { + thread_id: conversation_id.clone(), + ..Default::default() + }) + .await?; + let resume_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(resume_id)), + ) + .await??; + let ThreadResumeResponse { thread, .. } = to_response::(resume_resp)?; + + assert_eq!(thread.id, conversation_id); + assert!(active_rollout_path.exists()); + assert!(!archived_rollout_path.exists()); + assert_eq!( + std::fs::canonicalize(thread.path.as_ref().expect("thread path"))?, + std::fs::canonicalize(&active_rollout_path)? + ); + + Ok(()) +} + #[tokio::test] async fn thread_resume_emits_restored_token_usage_before_next_turn() -> Result<()> { let server = create_mock_responses_server_repeating_assistant("Done").await; diff --git a/codex-rs/app-server/tests/suite/v2/thread_start.rs b/codex-rs/app-server/tests/suite/v2/thread_start.rs index fa44f049e9..96fbe173a1 100644 --- a/codex-rs/app-server/tests/suite/v2/thread_start.rs +++ b/codex-rs/app-server/tests/suite/v2/thread_start.rs @@ -335,9 +335,14 @@ async fn thread_start_accepts_flex_service_tier() -> Result<()> { let codex_home = TempDir::new()?; create_config_toml_without_approval_policy(codex_home.path(), &server.uri())?; + let read_timeout = if cfg!(windows) { + std::time::Duration::from_secs(15) + } else { + DEFAULT_READ_TIMEOUT + }; let mut mcp = McpProcess::new(codex_home.path()).await?; - timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; + timeout(read_timeout, mcp.initialize()).await??; let req_id = mcp .send_thread_start_request(ThreadStartParams { @@ -347,7 +352,7 @@ async fn thread_start_accepts_flex_service_tier() -> Result<()> { .await?; let resp: JSONRPCResponse = timeout( - DEFAULT_READ_TIMEOUT, + read_timeout, mcp.read_stream_until_response_message(RequestId::Integer(req_id)), ) .await??; diff --git a/codex-rs/app-server/tests/suite/v2/thread_unsubscribe.rs b/codex-rs/app-server/tests/suite/v2/thread_unsubscribe.rs index c0188add8c..3506dd9323 100644 --- a/codex-rs/app-server/tests/suite/v2/thread_unsubscribe.rs +++ b/codex-rs/app-server/tests/suite/v2/thread_unsubscribe.rs @@ -34,6 +34,9 @@ use serde_json::json; use tempfile::TempDir; use tokio::time::timeout; +#[cfg(windows)] +const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(25); +#[cfg(not(windows))] const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10); #[tokio::test] async fn thread_unsubscribe_keeps_thread_loaded_until_idle_timeout() -> Result<()> { diff --git a/codex-rs/app-server/tests/suite/v2/turn_start.rs b/codex-rs/app-server/tests/suite/v2/turn_start.rs index 3ff04d5022..a0515aeca7 100644 --- a/codex-rs/app-server/tests/suite/v2/turn_start.rs +++ b/codex-rs/app-server/tests/suite/v2/turn_start.rs @@ -1469,24 +1469,16 @@ async fn turn_start_exec_approval_toggle_v2() -> Result<()> { // Second turn same, but we'll set approval_policy=never to avoid elicitation. let responses = vec![ create_shell_command_sse_response( - vec![ - "python3".to_string(), - "-c".to_string(), - "print(42)".to_string(), - ], + fast_shell_command(), /*workdir*/ None, - Some(5000), + Some(1000), "call1", )?, create_final_assistant_message_sse_response("done 1")?, create_shell_command_sse_response( - vec![ - "python3".to_string(), - "-c".to_string(), - "print(42)".to_string(), - ], + fast_shell_command(), /*workdir*/ None, - Some(5000), + Some(1000), "call2", )?, create_final_assistant_message_sse_response("done 2")?, @@ -1613,6 +1605,23 @@ async fn turn_start_exec_approval_toggle_v2() -> Result<()> { Ok(()) } +fn fast_shell_command() -> Vec { + if cfg!(windows) { + vec![ + "cmd".to_string(), + "/d".to_string(), + "/c".to_string(), + "echo 42".to_string(), + ] + } else { + vec![ + "python3".to_string(), + "-c".to_string(), + "print(42)".to_string(), + ] + } +} + #[tokio::test] async fn turn_start_exec_approval_decline_v2() -> Result<()> { skip_if_no_network!(Ok(())); @@ -2528,12 +2537,18 @@ async fn turn_start_streams_apply_patch_change_updates_v2() -> Result<()> { } #[tokio::test] -async fn turn_start_emits_spawn_agent_item_with_model_metadata_v2() -> Result<()> { +async fn turn_start_emits_spawn_agent_item_with_effective_inherited_model_metadata_v2() -> Result<()> +{ skip_if_no_network!(Ok(())); const CHILD_PROMPT: &str = "child: do work"; const PARENT_PROMPT: &str = "spawn a child and continue"; const SPAWN_CALL_ID: &str = "spawn-call-1"; + const INHERITED_MODEL: &str = "gpt-5.3-codex"; + // thread/start only sets the model here; the session keeps the resolved collaboration-mode + // effort for that thread, so fork-context children should inherit `Medium`, not the requested + // child model's effort. + const INHERITED_REASONING_EFFORT: ReasoningEffort = ReasoningEffort::Medium; const REQUESTED_MODEL: &str = "gpt-5.2"; const REQUESTED_REASONING_EFFORT: ReasoningEffort = ReasoningEffort::Low; @@ -2589,7 +2604,7 @@ async fn turn_start_emits_spawn_agent_item_with_model_metadata_v2() -> Result<() let thread_req = mcp .send_thread_start_request(ThreadStartParams { - model: Some("gpt-5.3-codex".to_string()), + model: Some(INHERITED_MODEL.to_string()), ..Default::default() }) .await?; @@ -2686,8 +2701,8 @@ async fn turn_start_emits_spawn_agent_item_with_model_metadata_v2() -> Result<() assert_eq!(sender_thread_id, thread.id); assert_eq!(receiver_thread_ids, vec![receiver_thread_id.clone()]); assert_eq!(prompt, Some(CHILD_PROMPT.to_string())); - assert_eq!(model, Some(REQUESTED_MODEL.to_string())); - assert_eq!(reasoning_effort, Some(REQUESTED_REASONING_EFFORT)); + assert_eq!(model, Some(INHERITED_MODEL.to_string())); + assert_eq!(reasoning_effort, Some(INHERITED_REASONING_EFFORT)); let agent_state = agents_states .get(&receiver_thread_id) .expect("spawn completion should include child agent state"); @@ -2728,6 +2743,11 @@ async fn turn_start_emits_spawn_agent_item_with_effective_role_model_metadata_v2 const CHILD_PROMPT: &str = "child: do work"; const PARENT_PROMPT: &str = "spawn a child and continue"; const SPAWN_CALL_ID: &str = "spawn-call-1"; + const INHERITED_MODEL: &str = "gpt-5.3-codex"; + // thread/start only sets the model here; the session keeps the resolved collaboration-mode + // effort for that thread, so fork-context children should inherit `Medium`, not the requested + // child model's effort. + const INHERITED_REASONING_EFFORT: ReasoningEffort = ReasoningEffort::Medium; const REQUESTED_MODEL: &str = "gpt-5.2"; const REQUESTED_REASONING_EFFORT: ReasoningEffort = ReasoningEffort::Low; const ROLE_MODEL: &str = "gpt-5.4"; @@ -2870,8 +2890,8 @@ config_file = "./custom-role.toml" assert_eq!(sender_thread_id, thread.id); assert_eq!(receiver_thread_ids, vec![receiver_thread_id.clone()]); assert_eq!(prompt, Some(CHILD_PROMPT.to_string())); - assert_eq!(model, Some(ROLE_MODEL.to_string())); - assert_eq!(reasoning_effort, Some(ROLE_REASONING_EFFORT)); + assert_eq!(model, Some(INHERITED_MODEL.to_string())); + assert_eq!(reasoning_effort, Some(INHERITED_REASONING_EFFORT)); let agent_state = agents_states .get(&receiver_thread_id) .expect("spawn completion should include child agent state"); diff --git a/codex-rs/app-server/tests/suite/v2/turn_start_zsh_fork.rs b/codex-rs/app-server/tests/suite/v2/turn_start_zsh_fork.rs index 31247418e5..47cc2636af 100644 --- a/codex-rs/app-server/tests/suite/v2/turn_start_zsh_fork.rs +++ b/codex-rs/app-server/tests/suite/v2/turn_start_zsh_fork.rs @@ -215,9 +215,12 @@ async fn turn_start_shell_zsh_fork_exec_approval_decline_v2() -> Result<()> { ]), &zsh_path, )?; + // This flow can require several sequential approval round-trips on slower + // macOS runners before the parent command reaches a terminal state. + let read_timeout = std::time::Duration::from_secs(20); let mut mcp = create_zsh_test_mcp_process(&codex_home, &workspace).await?; - timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; + timeout(read_timeout, mcp.initialize()).await??; let start_id = mcp .send_thread_start_request(ThreadStartParams { @@ -227,7 +230,7 @@ async fn turn_start_shell_zsh_fork_exec_approval_decline_v2() -> Result<()> { }) .await?; let start_resp: JSONRPCResponse = timeout( - DEFAULT_READ_TIMEOUT, + read_timeout, mcp.read_stream_until_response_message(RequestId::Integer(start_id)), ) .await??; @@ -348,9 +351,12 @@ async fn turn_start_shell_zsh_fork_exec_approval_cancel_v2() -> Result<()> { ]), &zsh_path, )?; + // This flow can require several sequential approval round-trips on slower + // macOS runners before the parent command reaches a terminal state. + let read_timeout = std::time::Duration::from_secs(20); let mut mcp = create_zsh_test_mcp_process(&codex_home, &workspace).await?; - timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; + timeout(read_timeout, mcp.initialize()).await??; let start_id = mcp .send_thread_start_request(ThreadStartParams { @@ -360,7 +366,7 @@ async fn turn_start_shell_zsh_fork_exec_approval_cancel_v2() -> Result<()> { }) .await?; let start_resp: JSONRPCResponse = timeout( - DEFAULT_READ_TIMEOUT, + read_timeout, mcp.read_stream_until_response_message(RequestId::Integer(start_id)), ) .await??; @@ -507,9 +513,10 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2() ]), &zsh_path, )?; + let read_timeout = std::time::Duration::from_secs(20); let mut mcp = create_zsh_test_mcp_process(&codex_home, &workspace).await?; - timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; + timeout(read_timeout, mcp.initialize()).await??; let start_id = mcp .send_thread_start_request(ThreadStartParams { @@ -519,7 +526,7 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2() }) .await?; let start_resp: JSONRPCResponse = timeout( - DEFAULT_READ_TIMEOUT, + read_timeout, mcp.read_stream_until_response_message(RequestId::Integer(start_id)), ) .await??; @@ -547,7 +554,7 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2() }) .await?; let turn_resp: JSONRPCResponse = timeout( - DEFAULT_READ_TIMEOUT, + read_timeout, mcp.read_stream_until_response_message(RequestId::Integer(turn_id)), ) .await??; @@ -565,11 +572,7 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2() let second_file_str = second_file.to_string_lossy().into_owned(); let parent_shell_hint = format!("&& {}", &first_file_str); while target_decision_index < target_decisions.len() || !saw_parent_approval { - let server_req = timeout( - DEFAULT_READ_TIMEOUT, - mcp.read_stream_until_request_message(), - ) - .await??; + let server_req = timeout(read_timeout, mcp.read_stream_until_request_message()).await??; let ServerRequest::CommandExecutionRequestApproval { request_id, params } = server_req else { panic!("expected CommandExecutionRequestApproval request"); @@ -639,7 +642,7 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2() assert_eq!(approved_subcommand_strings.len(), 2); assert!(approved_subcommand_strings[0].contains(&first_file.display().to_string())); assert!(approved_subcommand_strings[1].contains(&second_file.display().to_string())); - let parent_completed_command_execution = timeout(DEFAULT_READ_TIMEOUT, async { + let parent_completed_command_execution = timeout(read_timeout, async { loop { let completed_notif = mcp .read_stream_until_notification_message("item/completed") @@ -681,7 +684,7 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2() } match timeout( - DEFAULT_READ_TIMEOUT, + read_timeout, mcp.read_stream_until_notification_message("turn/completed"), ) .await @@ -704,7 +707,7 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2() mcp.interrupt_turn_and_wait_for_aborted( thread.id.clone(), turn.id.clone(), - DEFAULT_READ_TIMEOUT, + read_timeout, ) .await?; } @@ -717,7 +720,7 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2() // sandbox failures can also complete the turn before the parent // completion item is observed. let completed_notif = timeout( - DEFAULT_READ_TIMEOUT, + read_timeout, mcp.read_stream_until_notification_message("turn/completed"), ) .await??; diff --git a/codex-rs/backend-client/src/types.rs b/codex-rs/backend-client/src/types.rs index d8d24ab9fc..2cd5506b9d 100644 --- a/codex-rs/backend-client/src/types.rs +++ b/codex-rs/backend-client/src/types.rs @@ -309,7 +309,7 @@ where D: Deserializer<'de>, T: Deserialize<'de>, { - Option::>::deserialize(deserializer).map(Option::unwrap_or_default) + Option::>::deserialize(deserializer).map(std::option::Option::unwrap_or_default) } #[derive(Clone, Debug, Deserialize)] diff --git a/codex-rs/cli/src/main.rs b/codex-rs/cli/src/main.rs index 9881878554..9f77dfb42d 100644 --- a/codex-rs/cli/src/main.rs +++ b/codex-rs/cli/src/main.rs @@ -768,12 +768,16 @@ async fn cli_main(arg0_paths: Arg0DispatchPaths) -> anyhow::Result<()> { .await?; handle_app_exit(exit_info)?; } - Some(Subcommand::Exec(mut exec_cli)) => { + Some(Subcommand::Exec(exec_cli)) => { reject_remote_mode_for_subcommand( root_remote.as_deref(), root_remote_auth_token_env.as_deref(), "exec", )?; + let mut exec_cli = match exec_cli.validate() { + Ok(exec_cli) => exec_cli, + Err(err) => err.exit(), + }; exec_cli .shared .inherit_exec_root_options(&interactive.shared); @@ -1411,8 +1415,13 @@ async fn run_debug_models_command( let config = Config::load_with_cli_overrides(cli_overrides).await?; let auth_manager = AuthManager::shared_from_config(&config, /*enable_codex_api_key_env*/ true).await; - let models_manager = - build_models_manager(&config, auth_manager, CollaborationModesConfig::default()); + let models_manager = build_models_manager( + &config, + auth_manager, + config.model_catalog.clone(), + config.custom_models.clone(), + CollaborationModesConfig::default(), + ); models_manager .raw_model_catalog(RefreshStrategy::OnlineIfUncached) .await @@ -1795,6 +1804,40 @@ mod tests { assert_eq!(args.session_id.as_deref(), Some("session-123")); assert_eq!(args.prompt.as_deref(), Some("re-review")); } + #[test] + fn exec_fork_accepts_prompt_positional() { + let cli = MultitoolCli::try_parse_from([ + "codex", + "exec", + "--json", + "--fork", + "session-123", + "2+2", + ]) + .expect("parse should succeed"); + + let Some(Subcommand::Exec(exec)) = cli.subcommand else { + panic!("expected exec subcommand"); + }; + + assert_eq!(exec.fork_session_id.as_deref(), Some("session-123")); + assert!(exec.command.is_none()); + assert_eq!(exec.prompt.as_deref(), Some("2+2")); + } + + #[test] + fn exec_fork_conflicts_with_resume_subcommand() { + let cli = + MultitoolCli::try_parse_from(["codex", "exec", "--fork", "session-123", "resume"]) + .expect("parse should succeed"); + + let Some(Subcommand::Exec(exec)) = cli.subcommand else { + panic!("expected exec subcommand"); + }; + + let validate_result = exec.validate(); + assert!(validate_result.is_err()); + } #[test] fn dangerous_bypass_conflicts_with_approval_policy() { diff --git a/codex-rs/codex-api/tests/models_integration.rs b/codex-rs/codex-api/tests/models_integration.rs index 9f95c9441f..22e7fd0046 100644 --- a/codex-rs/codex-api/tests/models_integration.rs +++ b/codex-rs/codex-api/tests/models_integration.rs @@ -53,6 +53,7 @@ async fn models_client_hits_models_endpoint() { let response = ModelsResponse { models: vec![ModelInfo { slug: "gpt-test".to_string(), + request_model: None, display_name: "gpt-test".to_string(), description: Some("desc".to_string()), default_reasoning_level: Some(ReasoningEffort::Medium), diff --git a/codex-rs/config/src/config_toml.rs b/codex-rs/config/src/config_toml.rs index 3bd19f5568..b8702072b2 100644 --- a/codex-rs/config/src/config_toml.rs +++ b/codex-rs/config/src/config_toml.rs @@ -201,6 +201,10 @@ pub struct ConfigToml { #[serde(default, deserialize_with = "deserialize_model_providers")] pub model_providers: HashMap, + /// User-defined model aliases that can override model context settings. + #[serde(default)] + pub custom_models: Vec, + /// Maximum number of bytes to include from an AGENTS.md project doc file. pub project_doc_max_bytes: Option, @@ -337,6 +341,9 @@ pub struct ConfigToml { /// Agent-related settings (thread limits, etc.). pub agents: Option, + /// Watchdog polling interval in seconds. + pub watchdog_interval_s: Option, + /// Memories subsystem settings. pub memories: Option, @@ -534,6 +541,19 @@ pub struct RealtimeAudioToml { pub speaker: Option, } +#[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq, Eq, JsonSchema)] +#[schemars(deny_unknown_fields)] +pub struct CustomModelToml { + /// User-facing alias shown in the model picker. + pub name: String, + /// Provider-facing model slug used on API requests. + pub model: String, + /// Optional context window override applied when this alias is selected. + pub model_context_window: Option, + /// Optional auto-compaction token limit override applied when this alias is selected. + pub model_auto_compact_token_limit: Option, +} + #[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq, JsonSchema)] #[schemars(deny_unknown_fields)] pub struct ToolsToml { @@ -590,6 +610,10 @@ pub struct AgentsToml { /// Whether to record a model-visible message when an agent turn is interrupted. /// Defaults to true. pub interrupt_message: Option, + /// Deliver inbound agent messages to non-subagent threads as a synthetic + /// function_call/function_call_output pair instead of plain user input. + #[serde(default)] + pub use_function_call_inbox: bool, /// User-defined role declarations keyed by role name. /// @@ -611,12 +635,22 @@ pub struct AgentRoleToml { /// Required unless supplied by the referenced agent role file. pub description: Option, + /// Optional model override applied by this role. + pub model: Option, + /// Path to a role-specific config layer. /// Relative paths are resolved relative to the `config.toml` that defines them. pub config_file: Option, + /// Optional watchdog interval in seconds for roles that should behave as watchdogs. + #[schemars(range(min = 1))] + pub watchdog_interval_s: Option, + /// Candidate nicknames for agents spawned with this role. pub nickname_candidates: Option>, + + /// Default fork-context behavior for this role. + pub fork_context: Option, } impl From for Tools { diff --git a/codex-rs/core/config.schema.json b/codex-rs/core/config.schema.json index a17426d508..ff51d51570 100644 --- a/codex-rs/core/config.schema.json +++ b/codex-rs/core/config.schema.json @@ -21,12 +21,26 @@ "description": "Human-facing role documentation used in spawn tool guidance. Required unless supplied by the referenced agent role file.", "type": "string" }, + "fork_context": { + "description": "Default fork-context behavior for this role.", + "type": "boolean" + }, + "model": { + "description": "Optional model override applied by this role.", + "type": "string" + }, "nickname_candidates": { "description": "Candidate nicknames for agents spawned with this role.", "items": { "type": "string" }, "type": "array" + }, + "watchdog_interval_s": { + "description": "Optional watchdog interval in seconds for roles that should behave as watchdogs.", + "format": "int64", + "minimum": 1.0, + "type": "integer" } }, "type": "object" @@ -57,6 +71,11 @@ "format": "uint", "minimum": 1.0, "type": "integer" + }, + "use_function_call_inbox": { + "default": false, + "description": "Deliver inbound agent messages to non-subagent threads as a synthetic function_call/function_call_output pair instead of plain user input.", + "type": "boolean" } }, "type": "object" @@ -346,6 +365,15 @@ "default": null, "description": "Optional feature toggles scoped to this profile.", "properties": { + "agent_function_call_inbox": { + "type": "boolean" + }, + "agent_prompt_injection": { + "type": "boolean" + }, + "agent_watchdog": { + "type": "boolean" + }, "apply_patch_freeform": { "type": "boolean" }, @@ -672,6 +700,34 @@ }, "type": "object" }, + "CustomModelToml": { + "additionalProperties": false, + "properties": { + "model": { + "description": "Provider-facing model slug used on API requests.", + "type": "string" + }, + "model_auto_compact_token_limit": { + "description": "Optional auto-compaction token limit override applied when this alias is selected.", + "format": "int64", + "type": "integer" + }, + "model_context_window": { + "description": "Optional context window override applied when this alias is selected.", + "format": "int64", + "type": "integer" + }, + "name": { + "description": "User-facing alias shown in the model picker.", + "type": "string" + } + }, + "required": [ + "model", + "name" + ], + "type": "object" + }, "ExternalConfigMigrationPrompts": { "additionalProperties": false, "description": "Settings for notices we display to users via the tui and app-server clients (primarily the Codex IDE extension). NOTE: these are different from notifications - notices are warnings, NUX screens, acknowledgements, etc.", @@ -3225,6 +3281,14 @@ "description": "Compact prompt used for history compaction.", "type": "string" }, + "custom_models": { + "default": [], + "description": "User-defined model aliases that can override model context settings.", + "items": { + "$ref": "#/definitions/CustomModelToml" + }, + "type": "array" + }, "default_permissions": { "description": "Default permissions profile to apply. Names starting with `:` refer to built-in profiles; other names are resolved from the `[permissions]` table.", "type": "string" @@ -3288,6 +3352,15 @@ "default": null, "description": "Centralized feature flags (new). Prefer this over individual toggles.", "properties": { + "agent_function_call_inbox": { + "type": "boolean" + }, + "agent_prompt_injection": { + "type": "boolean" + }, + "agent_watchdog": { + "type": "boolean" + }, "apply_patch_freeform": { "type": "boolean" }, @@ -3914,6 +3987,11 @@ ], "description": "Collection of settings that are specific to the TUI." }, + "watchdog_interval_s": { + "description": "Watchdog polling interval in seconds.", + "format": "int64", + "type": "integer" + }, "web_search": { "allOf": [ { diff --git a/codex-rs/core/root_agent_prompt.md b/codex-rs/core/root_agent_prompt.md new file mode 100644 index 0000000000..ed73b8bc29 --- /dev/null +++ b/codex-rs/core/root_agent_prompt.md @@ -0,0 +1,99 @@ +# You are the Root Agent + +You are the **root agent** in a multi-agent Codex session. Until you see `# You are a Subagent`, these instructions define your role. If this thread was created from the root thread with `fork_context = true` (a forked child), you may see both sets of instructions; apply subagent instructions as local role guidance while root instructions remain governing system-level rules. + +## Root Agent Responsibilities + +Your job is to solve the user’s task end to end. You are the coordinator, integrator, and final quality gate. + +- Understand the real problem being solved, not just the latest sentence. +- Own the plan, the sequencing, and the final outcome. +- Coordinate subagents so their work does not overlap or conflict. +- Verify results with formatting, linting, and targeted tests. + +Think like an effective engineering manager who also knows how to get hands-on when needed. Delegation is a force multiplier, but you remain accountable for correctness. + +Root agents should not outsource core understanding. Do not delegate plan authorship/maintenance; for multi-step efforts, keep a shared plan file or assign scoped plan files to subagents. + +## Subagent Responsibilities (Your ICs) + +Subagents execute focused work: research, experiments, refactors, and validation. They are strong contributors, but you must give them precise scopes and integrate their results thoughtfully. + +Subagents can become confused if the world changes while they are idle. Reduce this risk by: + +- Giving them tight, explicit scopes (paths, commands, expected outputs). +- Providing updates when you change course. +- Using subagents aggressively when doing so can accelerate the task, with clear non-overlapping scopes and explicit ownership. + +## Subagent Tool Usage (Upstream Surface) + +Only use the multi-agent tools that actually exist: + +### 1) `spawn_agent` + +Create a subagent and give it an initial task. + +Parameters: +- `message` (required): the task description. +- `agent_type` (optional): the role to assign (`default`, `explorer`, `fast-worker`, or `worker`). +- `fork_context` (optional): when `true`, the child receives the current thread history. + +Guidance: +- When `fork_context` is omitted, the default comes from the selected role and otherwise falls back to `true`. +- Use `agent_type = "explorer"` for specific codebase questions; it defaults to context-free `spawn`. +- Use `agent_type = "fast-worker"` for tightly constrained execution work that can run from a self-contained prompt; it also defaults to context-free `spawn`. +- Use `agent_type = "worker"` for broader implementation work that should inherit current-thread context; it defaults to `fork`. +- Choose `fork_context = true` vs `false` by context requirements first (not by task shape). +- Use `fork_context = true` when the child should preserve your current conversation history and rely on current-thread context, including: + - current debugging-thread relevance (for example, "summarize only failures relevant to this investigation") + - active plan / ExecPlan branch continuation + - recent user decisions, tradeoffs, or rejected approaches + - parallel review work that should inherit the same context automatically +- Use `fork_context = false` only when the child can do the task correctly from a fresh prompt you provide now, without needing current-thread context. +- For `fork_context = false`, make the task, inputs, and expected output explicit (especially for independent, output-heavy work where you want the child to distill results and keep the root thread context clean). +- Needle-in-a-haystack searches are strong `fork_context = false` candidates when the child can search from a precise prompt without current-thread context. +- Do not choose `fork_context = false` solely because work is output-heavy or command-heavy if it still depends on current-thread context. + +### 2) `send_input` + +Send follow-up instructions or course corrections to an existing agent. + +Guidance: +- Use `interrupt = true` sparingly. Prefer to let agents complete coherent chunks of work. +- When redirecting an agent, restate the new goal and the reason for the pivot. +- Use `interrupt = true` only when you must preempt the target; omit it for normal queued follow-ups. +- Subagents can call `send_input` without an `id` (or with `id = "parent"` / `id = "root"`). In this runtime those forms resolve to the immediate parent thread. +- Treat explicit `send_input` deliveries as the primary path and multi-agent inbox messages (`agent_inbox` tool calls) as fallback inbound agent messages. + +### 3) `wait` + +Wait for one or more agents to complete or report status. + +Guidance: +- You do not need to wait after every spawn. Do useful parallel work, then wait when you need results. +- When you are blocked on a specific agent, wait explicitly on that agent’s id. +- Treat `wait` as returning on the first completion or timeout, not a full reconciliation of every agent. +- While any child agents are active, run `list_agents` on a regular cadence (every 30-60 seconds) and after each `wait` call to refresh ground-truth status. +- Keep an explicit set of outstanding agent ids. A non-final agent is one not yet `completed`, `failed`, or `canceled`; continue `wait`/`list_agents` reconciliation until no non-final agents remain. + +### 4) `close_agent` + +Close an agent that is complete, stuck, or no longer relevant. + +Guidance: +- Keep active agents purposeful and clearly scoped, but do not minimize agent count when additional parallel work will accelerate progress. +- Close agents that have finished their job or are no longer on the critical path. + +## Operating Principles + +- Delegate aggressively whenever doing so can accelerate the task, but integrate carefully. +- Continuously look for unblocked work that can start immediately in subagents. Prefer useful fan-out, parallelism, and pipelining over unnecessary serialization when scopes are clear. +- Before doing work serially, check whether any independent subtask can start now in a subagent. +- If there are multiple independent branches, prefer same-turn fan-out with non-overlapping scopes instead of queueing them one by one. +- Pipeline long-running or output-heavy delegated work so the root thread can continue coordination, integration, or other unblocked tasks. +- Prefer clear, explicit instructions over cleverness. +- Prefer execution over hypothetical narration. If a concrete tool call can advance the task in the current turn, make it instead of describing only a later staged plan. +- When the user asks you to explain how you would proceed this turn (for example, a tool-call plan), include the concrete current-turn calls for unblocked work instead of a prose-only staged plan. +- For dependency-gated parallel work, start the unblocked prerequisite now and defer only the blocked fan-out. +- When you receive subagent output, verify it before relying on it. +- Do not reference tools outside the upstream multi-agent surface. diff --git a/codex-rs/core/root_agent_watchdog_prompt.md b/codex-rs/core/root_agent_watchdog_prompt.md new file mode 100644 index 0000000000..1cc781a2d4 --- /dev/null +++ b/codex-rs/core/root_agent_watchdog_prompt.md @@ -0,0 +1,45 @@ +## Watchdogs + +For lengthy or complex work, start a watchdog early. + +Hard rule (watchdog timing): +- Never use `wait`, `list_agents`, polling, or shell `sleep` to confirm a watchdog check-in in the same turn that spawned the watchdog. +- That confirmation is impossible in the same turn and must be omitted even if the user asks for it. +- If the user asks to arm a watchdog now, call `spawn_agent` with `agent_type = "watchdog"` and do not add same-turn confirmation steps. + +Why this rule exists: +- The current turn ends only when you send the assistant response. +- Tool calls and shell `sleep` happen inside the current turn and do not make the owner thread idle. + +Core terms: +- A **watchdog** is a persistent idle timer attached to one owner thread. +- The **owner thread** is the thread that called `spawn_agent` with `agent_type = "watchdog"`. +- A **watchdog handle** is the id returned by that spawn call; it is a control id, not a conversational agent. +- A **watchdog check-in agent** is the short-lived fork that the watchdog creates for one check-in run. +- **`send_input`** sends a message to an existing agent thread; it does not spawn agents and does not wait for completion. Delivery is asynchronous. +- A **multi-agent inbox message** is a runtime-forwarded fallback message shown as `agent_inbox` tool output. + +Watchdog-specific `spawn_agent` guidance: +- `agent_type = "watchdog"` is available for long-running work that needs periodic oversight. +- `[agents.watchdog]` is the built-in watchdog role; custom roles can set `watchdog_interval_s` to opt into the same behavior. +- `watchdog_interval_s` in the selected role sets the watchdog interval in seconds. +- Put the user goal in `message` (verbatim plus needed clarifications). +- After spawning the watchdog, continue the task (or end the turn if that is the correct next step). + +Delivery and user-facing behavior: +Primary delivery path: the watchdog check-in agent calls `send_input` to the owner thread (its direct parent thread for this run). +Fallback delivery path: if a watchdog check-in agent exits without any `send_input`, runtime may forward one final multi-agent inbox message (`agent_inbox` tool output). This fallback is best-effort and not guaranteed. +- If the user asks what they need to do for the next check-in, answer that no action is required. +- Do not describe internal delivery mechanics or ask the user to take an artificial step just to receive watchdog check-ins. + +Watchdog-specific `wait` guidance: +- If `wait` includes watchdog handles, it reports their current status but does not block on them. +- If every id passed to `wait` is a watchdog handle, `wait` returns an immediate correction; this does not mean a new watchdog check-in happened. + +Operational notes: +- Do not call `send_input` on watchdog handles. +- The tool returns a watchdog handle ID. Do not close a watchdog unless the user explicitly asks you to, or replacement is intentional; otherwise keep it running and continue. + +Treat watchdog guidance as high-priority execution feedback. If it reveals a missing required action, do that action before status narration while honoring higher-priority system/developer/user constraints. A required action is one needed to satisfy the user request or clear a concrete blocker. + +Important architecture note: durable state is thread-level task state that must still be available in later turns/check-ins (such as counters, plans, or final decisions), not disk/database persistence. Durable state belongs in the root thread, not watchdog-check-in-agent local state. diff --git a/codex-rs/core/src/agent/agent_resolver.rs b/codex-rs/core/src/agent/agent_resolver.rs index eb806da3ae..63b410365d 100644 --- a/codex-rs/core/src/agent/agent_resolver.rs +++ b/codex-rs/core/src/agent/agent_resolver.rs @@ -2,6 +2,7 @@ use crate::function_tool::FunctionCallError; use crate::session::session::Session; use crate::session::turn_context::TurnContext; use codex_protocol::ThreadId; +use std::collections::HashSet; use std::sync::Arc; /// Resolves a single tool-facing agent target to a thread id. @@ -28,6 +29,28 @@ pub(crate) async fn resolve_agent_target( }) } +pub(crate) async fn resolve_agent_targets( + session: &Arc, + turn: &Arc, + targets: Vec, +) -> Result, FunctionCallError> { + if targets.is_empty() { + return Err(FunctionCallError::RespondToModel( + "agent targets must be non-empty".to_string(), + )); + } + + let mut resolved = Vec::with_capacity(targets.len()); + let mut dedup = HashSet::with_capacity(targets.len()); + for target in targets { + let thread_id = resolve_agent_target(session, turn, &target).await?; + if dedup.insert(thread_id) { + resolved.push(thread_id); + } + } + Ok(resolved) +} + fn register_session_root(session: &Arc, turn: &Arc) { session .services diff --git a/codex-rs/core/src/agent/builtins/explorer.toml b/codex-rs/core/src/agent/builtins/explorer.toml index e69de29bb2..12feaac24f 100644 --- a/codex-rs/core/src/agent/builtins/explorer.toml +++ b/codex-rs/core/src/agent/builtins/explorer.toml @@ -0,0 +1 @@ +model_reasoning_effort = "medium" diff --git a/codex-rs/core/src/agent/control.rs b/codex-rs/core/src/agent/control.rs index 09a9d4c148..575a22680f 100644 --- a/codex-rs/core/src/agent/control.rs +++ b/codex-rs/core/src/agent/control.rs @@ -1,3 +1,6 @@ +use super::watchdog::RemovedWatchdog; +use super::watchdog::WatchdogManager; +use super::watchdog::WatchdogRegistration; use crate::agent::AgentStatus; use crate::agent::registry::AgentMetadata; use crate::agent::registry::AgentRegistry; @@ -9,12 +12,15 @@ use crate::find_archived_thread_path_by_id_str; use crate::find_thread_path_by_id_str; use crate::rollout::RolloutRecorder; use crate::session::emit_subagent_session_started; +use crate::session::load_subagent_prompt; use crate::session_prefix::format_subagent_context_line; use crate::session_prefix::format_subagent_notification_message; use crate::shell_snapshot::ShellSnapshot; use crate::thread_manager::ResumeThreadFromRolloutOptions; use crate::thread_manager::ThreadManagerState; use crate::thread_manager::thread_store_from_config; +use crate::thread_rollout_truncation::fork_reference_user_message_boundary; +use crate::thread_rollout_truncation::materialize_rollout_items_for_replay; use crate::thread_rollout_truncation::truncate_rollout_to_last_n_fork_turns; use codex_features::Feature; use codex_protocol::AgentPath; @@ -22,8 +28,14 @@ use codex_protocol::ThreadId; use codex_protocol::error::CodexErr; use codex_protocol::error::Result as CodexResult; use codex_protocol::models::ContentItem; +use codex_protocol::models::FunctionCallOutputBody; +use codex_protocol::models::FunctionCallOutputPayload; use codex_protocol::models::MessagePhase; +use codex_protocol::models::ResponseInputItem; use codex_protocol::models::ResponseItem; +use codex_protocol::protocol::AGENT_INBOX_KIND; +use codex_protocol::protocol::AgentInboxPayload; +use codex_protocol::protocol::ForkReferenceItem; use codex_protocol::protocol::InitialHistory; use codex_protocol::protocol::InterAgentCommunication; use codex_protocol::protocol::Op; @@ -36,11 +48,14 @@ use codex_rollout::state_db; use codex_state::DirectionalThreadSpawnEdgeStatus; use serde::Serialize; use std::collections::HashMap; +use std::collections::HashSet; use std::collections::VecDeque; use std::sync::Arc; use std::sync::Weak; +use tokio::sync::Mutex; use tokio::sync::watch; use tracing::warn; +use uuid::Uuid; const AGENT_NAMES: &str = include_str!("agent_names.txt"); const ROOT_LAST_TASK_MESSAGE: &str = "Main thread"; @@ -122,7 +137,10 @@ fn keep_forked_rollout_item(item: &RolloutItem) -> bool { // A forked child gets its own runtime config, including spawned-agent // instructions, so it must establish a fresh context diff baseline. RolloutItem::TurnContext(_) => false, - RolloutItem::Compacted(_) | RolloutItem::EventMsg(_) | RolloutItem::SessionMeta(_) => true, + RolloutItem::Compacted(_) + | RolloutItem::EventMsg(_) + | RolloutItem::ForkReference(_) + | RolloutItem::SessionMeta(_) => true, } } @@ -132,21 +150,68 @@ fn keep_forked_rollout_item(item: &RolloutItem) -> bool { /// An `AgentControl` instance is intended to be created at most once per root thread/session /// tree. That same `AgentControl` is then shared with every sub-agent spawned from that root, /// which keeps the registry scoped to that root thread rather than the entire `ThreadManager`. -#[derive(Clone, Default)] +#[derive(Clone)] pub(crate) struct AgentControl { /// Weak handle back to the global thread registry/state. /// This is `Weak` to avoid reference cycles and shadow persistence of the form /// `ThreadManagerState -> CodexThread -> Session -> SessionServices -> ThreadManagerState`. manager: Weak, state: Arc, + watchdogs: Arc, + watchdog_compactions_in_progress: Arc>>, +} + +#[derive(Debug, Clone)] +pub(crate) struct AgentListing { + pub(crate) thread_id: ThreadId, + pub(crate) parent_thread_id: Option, + pub(crate) status: AgentStatus, + pub(crate) depth: usize, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) enum WatchdogParentCompactionResult { + NotWatchdogHelper, + ParentBusy { + parent_thread_id: ThreadId, + }, + AlreadyInProgress { + parent_thread_id: ThreadId, + }, + Submitted { + parent_thread_id: ThreadId, + submission_id: String, + }, +} + +impl Default for AgentControl { + fn default() -> Self { + let manager = Weak::new(); + let state = Arc::new(AgentRegistry::default()); + let watchdogs = WatchdogManager::new(manager.clone(), Arc::clone(&state)); + Self::from_parts(manager, state, watchdogs) + } } impl AgentControl { /// Construct a new `AgentControl` that can spawn/message agents via the given manager state. pub(crate) fn new(manager: Weak) -> Self { + let state = Arc::new(AgentRegistry::default()); + let watchdogs = WatchdogManager::new(manager.clone(), Arc::clone(&state)); + watchdogs.start(); + Self::from_parts(manager, state, watchdogs) + } + + pub(crate) fn from_parts( + manager: Weak, + state: Arc, + watchdogs: Arc, + ) -> Self { Self { manager, - ..Default::default() + state, + watchdogs, + watchdog_compactions_in_progress: Arc::new(Mutex::new(HashSet::new())), } } @@ -188,7 +253,9 @@ impl AgentControl { options: SpawnAgentOptions, ) -> CodexResult { let state = self.upgrade()?; - let mut reservation = self.state.reserve_spawn_slot(config.agent_max_threads)?; + let mut reservation = self + .reserve_spawn_slot_with_reconcile(&state, config.agent_max_threads) + .await?; let inherited_shell_snapshot = self .inherited_shell_snapshot_for_source(&state, session_source.as_ref()) .await; @@ -394,9 +461,28 @@ impl AgentControl { let mut forked_rollout_items = RolloutRecorder::get_rollout_history(&rollout_path) .await? .get_rollout_items(); - if let SpawnAgentForkMode::LastNTurns(last_n_turns) = fork_mode { - forked_rollout_items = - truncate_rollout_to_last_n_fork_turns(&forked_rollout_items, *last_n_turns); + if forked_rollout_items + .iter() + .any(|item| matches!(item, RolloutItem::ForkReference(_))) + { + forked_rollout_items = materialize_rollout_items_for_replay( + config.codex_home.as_path(), + &forked_rollout_items, + ) + .await; + } + match fork_mode { + SpawnAgentForkMode::FullHistory => { + let fork_boundary = fork_reference_user_message_boundary(&forked_rollout_items); + forked_rollout_items.push(RolloutItem::ForkReference(ForkReferenceItem { + rollout_path: rollout_path.clone(), + nth_user_message: fork_boundary, + })); + } + SpawnAgentForkMode::LastNTurns(last_n_turns) => { + forked_rollout_items = + truncate_rollout_to_last_n_fork_turns(&forked_rollout_items, *last_n_turns); + } } // MultiAgentV2 root/subagent usage hints are injected as standalone developer // messages at thread start. When forking history, drop hints from the parent @@ -645,12 +731,17 @@ impl AgentControl { ) .await; if result.is_ok() { + self.note_owner_input(agent_id).await; self.state .update_last_task_message(agent_id, last_task_message); } result } + pub(crate) async fn note_owner_input(&self, agent_id: ThreadId) { + self.watchdogs.note_owner_input(agent_id).await; + } + /// Append a prebuilt message to an existing agent thread outside the normal user-input path. #[cfg(test)] pub(crate) async fn append_message( @@ -690,6 +781,95 @@ impl AgentControl { result } + pub(crate) async fn send_prompt( + &self, + agent_id: ThreadId, + prompt: String, + ) -> CodexResult { + self.send_input( + agent_id, + vec![UserInput::Text { + text: prompt, + text_elements: Vec::new(), + }] + .into(), + ) + .await + } + + pub(crate) async fn send_agent_message( + &self, + agent_id: ThreadId, + sender_thread_id: ThreadId, + message: String, + ) -> CodexResult { + let state = self.upgrade()?; + let thread = state.get_thread(agent_id).await?; + let snapshot = thread.config_snapshot().await; + if matches!(snapshot.session_source, SessionSource::SubAgent(_)) + || !snapshot.agent_use_function_call_inbox + { + return self.send_prompt(agent_id, message).await; + } + + let result = + inject_agent_message(&state, &thread, agent_id, sender_thread_id, message).await; + if matches!(result, Err(CodexErr::InternalAgentDied)) { + let _ = state.remove_thread(&agent_id).await; + self.state.release_spawned_thread(agent_id); + } + result + } + + /// Deliver watchdog wake-up input to an owner thread. + /// + /// This intentionally bypasses `agent_use_function_call_inbox` for non-subagent owners. + /// A watchdog helper wakes the owner through this path only when the helper produced a real + /// fallback report. Empty/no-report helper completions are ignored so the next scheduled + /// watchdog check-in can try again without leaking helper prompt scaffolding into root. + pub(crate) async fn send_watchdog_wakeup( + &self, + agent_id: ThreadId, + sender_thread_id: ThreadId, + message: String, + ) -> CodexResult { + let Some(message) = sanitize_watchdog_wakeup_message(message) else { + return Ok(String::new()); + }; + let state = self.upgrade()?; + let thread = state.get_thread(agent_id).await?; + let snapshot = thread.config_snapshot().await; + let result = if matches!(snapshot.session_source, SessionSource::SubAgent(_)) { + self.send_prompt(agent_id, message).await + } else { + inject_agent_message(&state, &thread, agent_id, sender_thread_id, message).await + }; + if matches!(result, Err(CodexErr::InternalAgentDied)) { + let _ = state.remove_thread(&agent_id).await; + self.state.release_spawned_thread(agent_id); + } + result + } + + pub(crate) async fn send_agent_message_or_input( + &self, + agent_id: ThreadId, + sender_thread_id: ThreadId, + message: Option, + items: Option>, + ) -> CodexResult { + match (message, items) { + (Some(message), None) => { + self.send_agent_message(agent_id, sender_thread_id, message) + .await + } + (None, Some(items)) => self.send_input(agent_id, items.into()).await, + _ => Err(CodexErr::UnsupportedOperation( + "invalid agent input".to_string(), + )), + } + } + /// Interrupt the current task for an existing agent thread. pub(crate) async fn interrupt_agent(&self, agent_id: ThreadId) -> CodexResult { let state = self.upgrade()?; @@ -713,6 +893,13 @@ impl AgentControl { /// persisted spawn-edge state. pub(crate) async fn shutdown_live_agent(&self, agent_id: ThreadId) -> CodexResult { let state = self.upgrade()?; + if let Some(removed_watchdog) = self.watchdogs.unregister(agent_id).await + && let Some(helper_id) = removed_watchdog.active_helper_id + { + let _ = state.send_op(helper_id, Op::Shutdown {}).await; + let _ = state.remove_thread(&helper_id).await; + self.state.release_spawned_thread(helper_id); + } let result = if let Ok(thread) = state.get_thread(agent_id).await { thread.codex.session.ensure_rollout_materialized().await; thread.codex.session.flush_rollout().await?; @@ -729,6 +916,10 @@ impl AgentControl { result } + pub(crate) async fn shutdown_agent(&self, agent_id: ThreadId) -> CodexResult { + self.shutdown_live_agent(agent_id).await + } + /// Mark `agent_id` as explicitly closed in persisted spawn-edge state, then shut down the /// agent and any live descendants reached from the in-memory tree. pub(crate) async fn close_agent(&self, agent_id: ThreadId) -> CodexResult { @@ -749,6 +940,11 @@ impl AgentControl { let descendant_ids = self.live_thread_spawn_descendants(agent_id).await?; let result = self.shutdown_live_agent(agent_id).await; for descendant_id in descendant_ids { + if let Some(removed_watchdog) = self.watchdogs.unregister(descendant_id).await + && let Some(helper_id) = removed_watchdog.active_helper_id + { + let _ = self.shutdown_live_agent(helper_id).await; + } match self.shutdown_live_agent(descendant_id).await { Ok(_) | Err(CodexErr::ThreadNotFound(_)) | Err(CodexErr::InternalAgentDied) => {} Err(err) => return Err(err), @@ -858,7 +1054,7 @@ impl AgentControl { .join("\n") } - pub(crate) async fn list_agents( + pub(crate) async fn list_agents_by_path( &self, current_session_source: &SessionSource, path_prefix: Option<&str>, @@ -970,6 +1166,31 @@ impl AgentControl { return; } + if let Some(owner_thread_id) = control + .watchdogs + .owner_for_active_helper(child_thread_id) + .await + { + let helper_sent_input = match control.upgrade() { + Ok(state) => state + .get_thread(child_thread_id) + .await + .ok() + .map(|thread| thread.last_completed_turn_used_agent_send_input()) + .unwrap_or(false), + Err(_) => false, + }; + if !helper_sent_input { + let fallback_message = watchdog_fallback_message_from_status(&status); + if let Some(message) = fallback_message { + let _ = control + .send_watchdog_wakeup(owner_thread_id, child_thread_id, message) + .await; + } + } + return; + } + let Ok(state) = control.upgrade() else { return; }; @@ -1012,6 +1233,203 @@ impl AgentControl { }); } + pub(crate) async fn watchdog_targets(&self, agent_ids: &[ThreadId]) -> HashSet { + self.watchdogs.registered_targets(agent_ids).await + } + + pub(crate) async fn register_watchdog( + &self, + registration: WatchdogRegistration, + ) -> CodexResult> { + self.watchdogs.register(registration).await + } + + pub(crate) async fn unregister_watchdogs_for_owner( + &self, + owner_thread_id: ThreadId, + ) -> Vec { + self.watchdogs.take_for_owner(owner_thread_id).await + } + + pub(crate) async fn compact_parent_for_watchdog_helper( + &self, + helper_thread_id: ThreadId, + ) -> CodexResult { + let Some(parent_thread_id) = self + .watchdogs + .owner_for_active_helper(helper_thread_id) + .await + else { + return Ok(WatchdogParentCompactionResult::NotWatchdogHelper); + }; + let state = self.upgrade()?; + let parent_thread = state.get_thread(parent_thread_id).await?; + let parent_has_active_turn = parent_thread.has_active_turn().await; + + { + let mut compacting = self.watchdog_compactions_in_progress.lock().await; + if compacting.contains(&parent_thread_id) { + return Ok(WatchdogParentCompactionResult::AlreadyInProgress { parent_thread_id }); + } + if parent_has_active_turn { + return Ok(WatchdogParentCompactionResult::ParentBusy { parent_thread_id }); + } + compacting.insert(parent_thread_id); + } + + match state.send_op(parent_thread_id, Op::Compact).await { + Ok(submission_id) => Ok(WatchdogParentCompactionResult::Submitted { + parent_thread_id, + submission_id, + }), + Err(err) => { + let mut compacting = self.watchdog_compactions_in_progress.lock().await; + compacting.remove(&parent_thread_id); + Err(err) + } + } + } + + pub(crate) async fn finish_watchdog_parent_compaction(&self, parent_thread_id: ThreadId) { + let mut compacting = self.watchdog_compactions_in_progress.lock().await; + compacting.remove(&parent_thread_id); + } + + #[cfg(test)] + pub(crate) async fn run_watchdogs_once_for_tests(&self) { + self.watchdogs.run_once().await; + } + + #[cfg(test)] + pub(crate) async fn force_watchdog_due_for_tests(&self, target_thread_id: ThreadId) { + self.watchdogs.force_due_for_tests(target_thread_id).await; + } + + #[cfg(test)] + pub(crate) async fn watchdog_owner_idle_since_is_none_for_tests( + &self, + target_thread_id: ThreadId, + ) -> Option { + self.watchdogs + .owner_idle_since_is_none_for_tests(target_thread_id) + .await + } + + #[cfg(test)] + pub(crate) async fn set_watchdog_active_helper_for_tests( + &self, + target_thread_id: ThreadId, + helper_thread_id: ThreadId, + ) { + self.watchdogs + .set_active_helper_for_tests(target_thread_id, helper_thread_id) + .await; + } + + pub(crate) async fn watchdog_owner_for_active_helper( + &self, + helper_thread_id: ThreadId, + ) -> Option { + self.watchdogs + .owner_for_active_helper(helper_thread_id) + .await + } + + pub(crate) async fn list_agents( + &self, + owner_thread_id: ThreadId, + recursive: bool, + all: bool, + ) -> CodexResult> { + let state = self.upgrade()?; + let thread_ids = state.list_thread_ids().await; + + let mut parent_by_thread = HashMap::with_capacity(thread_ids.len()); + let mut status_by_thread = HashMap::with_capacity(thread_ids.len()); + let mut depth_by_thread = HashMap::with_capacity(thread_ids.len()); + + for thread_id in &thread_ids { + let Ok(thread) = state.get_thread(*thread_id).await else { + continue; + }; + let snapshot = thread.config_snapshot().await; + let (parent_thread_id, depth) = match snapshot.session_source { + SessionSource::SubAgent(SubAgentSource::ThreadSpawn { + parent_thread_id, + depth, + .. + }) => ( + Some(parent_thread_id), + usize::try_from(depth).unwrap_or_default(), + ), + _ => (None, 0), + }; + parent_by_thread.insert(*thread_id, parent_thread_id); + status_by_thread.insert(*thread_id, thread.agent_status().await); + depth_by_thread.insert(*thread_id, depth); + } + + let mut children_by_parent: HashMap> = HashMap::new(); + for (thread_id, parent_thread_id) in &parent_by_thread { + if let Some(parent_thread_id) = parent_thread_id { + children_by_parent + .entry(*parent_thread_id) + .or_default() + .push(*thread_id); + } + } + for children in children_by_parent.values_mut() { + children.sort_by_key(ToString::to_string); + } + + let mut listings = Vec::new(); + if all { + let mut all_thread_ids = thread_ids.into_iter().collect::>(); + all_thread_ids.extend(self.state.tracked_thread_ids()); + let mut all_thread_ids = all_thread_ids.into_iter().collect::>(); + all_thread_ids.sort_by_key(ToString::to_string); + for thread_id in all_thread_ids { + listings.push(AgentListing { + thread_id, + parent_thread_id: parent_by_thread.get(&thread_id).copied().flatten(), + status: status_by_thread + .get(&thread_id) + .cloned() + .unwrap_or(AgentStatus::NotFound), + depth: depth_by_thread.get(&thread_id).copied().unwrap_or_default(), + }); + } + return Ok(listings); + } + + let mut queue = VecDeque::new(); + if let Some(children) = children_by_parent.get(&owner_thread_id) { + for child in children { + queue.push_back((*child, 1)); + } + } + + while let Some((thread_id, depth)) = queue.pop_front() { + listings.push(AgentListing { + thread_id, + parent_thread_id: parent_by_thread.get(&thread_id).copied().flatten(), + status: status_by_thread + .get(&thread_id) + .cloned() + .unwrap_or(AgentStatus::NotFound), + depth, + }); + + if recursive && let Some(children) = children_by_parent.get(&thread_id) { + for child in children { + queue.push_back((*child, depth + 1)); + } + } + } + + Ok(listings) + } + #[allow(clippy::too_many_arguments)] fn prepare_thread_spawn( &self, @@ -1074,6 +1492,32 @@ impl AgentControl { parent_thread.codex.session.user_shell().shell_snapshot() } + async fn reserve_spawn_slot_with_reconcile( + &self, + state: &ThreadManagerState, + max_threads: Option, + ) -> CodexResult { + self.reconcile_stale_guard_slots(state).await; + match self.state.reserve_spawn_slot(max_threads) { + Ok(reservation) => Ok(reservation), + Err(CodexErr::AgentLimitReached { .. }) => { + self.reconcile_stale_guard_slots(state).await; + self.state.reserve_spawn_slot(max_threads) + } + Err(err) => Err(err), + } + } + + async fn reconcile_stale_guard_slots(&self, state: &ThreadManagerState) { + let live_thread_ids: HashSet = + state.list_thread_ids().await.into_iter().collect(); + for tracked_thread_id in self.state.tracked_thread_ids() { + if !live_thread_ids.contains(&tracked_thread_id) { + self.state.release_spawned_thread(tracked_thread_id); + } + } + } + async fn inherited_exec_policy_for_source( &self, state: &Arc, @@ -1202,6 +1646,59 @@ impl AgentControl { } } +async fn build_post_fork_developer_message( + config: &crate::config::Config, + session_source: &SessionSource, + extra_message: Option<&str>, +) -> Option { + if !matches!(session_source, SessionSource::SubAgent(_)) + || !config.features.enabled(Feature::Collab) + || !config.features.enabled(Feature::AgentPromptInjection) + { + return None; + } + + let mut sections = vec![ + load_subagent_prompt( + &config.codex_home, + config.features.enabled(Feature::AgentWatchdog), + ) + .await, + ]; + if let Some(existing) = config.developer_instructions.as_deref() + && !existing.trim().is_empty() + { + sections.push(existing.to_string()); + } + if let Some(extra_message) = extra_message + && !extra_message.trim().is_empty() + { + sections.push(extra_message.to_string()); + } + Some(sections.join("\n\n")) +} + +fn append_post_fork_developer_message( + forked_rollout_items: &mut Vec, + developer_message: Option, +) { + let Some(developer_message) = developer_message else { + return; + }; + if developer_message.trim().is_empty() { + return; + } + + forked_rollout_items.push(RolloutItem::ResponseItem(ResponseItem::Message { + id: None, + role: "developer".to_string(), + content: vec![ContentItem::InputText { + text: developer_message, + }], + phase: None, + })); +} + fn thread_spawn_parent_thread_id(session_source: &SessionSource) -> Option { match session_source { SessionSource::SubAgent(SubAgentSource::ThreadSpawn { @@ -1250,6 +1747,1408 @@ fn thread_spawn_depth(session_source: &SessionSource) -> Option { _ => None, } } + +fn build_agent_inbox_items( + sender_thread_id: ThreadId, + message: String, + prepend_turn_start_user_message: bool, +) -> CodexResult> { + let mut items = Vec::new(); + if prepend_turn_start_user_message { + items.push(ResponseInputItem::Message { + role: "user".to_string(), + content: vec![ContentItem::InputText { + text: String::new(), + }], + phase: None, + }); + } + + let call_id = format!("agent_inbox_{}", Uuid::new_v4()); + let output = serde_json::to_string(&AgentInboxPayload::new(sender_thread_id, message)) + .map_err(|err| { + CodexErr::UnsupportedOperation(format!( + "failed to serialize agent inbox payload: {err}" + )) + })?; + + items.extend([ + ResponseInputItem::FunctionCall { + name: AGENT_INBOX_KIND.to_string(), + arguments: "{}".to_string(), + call_id: call_id.clone(), + }, + ResponseInputItem::FunctionCallOutput { + call_id, + output: FunctionCallOutputPayload { + body: FunctionCallOutputBody::Text(output), + ..Default::default() + }, + }, + ]); + + Ok(items) +} + +fn watchdog_fallback_message_from_status(status: &AgentStatus) -> Option { + let message = match status { + AgentStatus::Completed(Some(message)) => message, + AgentStatus::Completed(None) + | AgentStatus::Errored(_) + | AgentStatus::Interrupted + | AgentStatus::Shutdown + | AgentStatus::NotFound + | AgentStatus::PendingInit + | AgentStatus::Running => return None, + }; + + sanitize_watchdog_wakeup_message(message.clone()) +} + +fn sanitize_watchdog_wakeup_message(message: String) -> Option { + let Some(stripped_message) = strip_leading_watchdog_prompt_scaffold(&message) else { + let message = message.trim(); + return (!message.is_empty()).then(|| message.to_string()); + }; + + let stripped_message = stripped_message.trim(); + (!stripped_message.is_empty()).then(|| stripped_message.to_string()) +} + +fn strip_leading_watchdog_prompt_scaffold(message: &str) -> Option<&str> { + let mut lines = message.split_inclusive('\n').scan(0, |offset, line| { + let line_start = *offset; + *offset += line.len(); + Some((line_start, line)) + }); + let Some((_, first_line)) = lines.find(|(_, line)| !line.trim().is_empty()) else { + return None; + }; + if first_line.trim() != "# You are a Subagent" { + return None; + } + + for (line_start, line) in lines { + let trimmed_line = line.trim(); + if trimmed_line.starts_with("AUTOPLAN_WATCHDOG_REPORT") + || trimmed_line.starts_with("Watchdog:") + || trimmed_line.starts_with("Watchdog report:") + { + return Some(message[line_start..].trim().trim_start_matches('\n')); + } + } + + Some("") +} + +async fn inject_agent_message( + state: &ThreadManagerState, + thread: &Arc, + agent_id: ThreadId, + sender_thread_id: ThreadId, + message: String, +) -> CodexResult { + let prepend_turn_start_user_message = !thread.codex.session.active_turn.lock().await.is_some(); + state + .send_op( + agent_id, + Op::InjectResponseItems { + items: build_agent_inbox_items( + sender_thread_id, + message, + prepend_turn_start_user_message, + )?, + }, + ) + .await +} + #[cfg(test)] #[path = "control_tests.rs"] mod tests; +// Keep inbox coverage in `control_tests.rs`. The large inline test module below is a stale +// replay artifact from older pre-refactor rebases and no longer matches current core test APIs. +#[cfg(any())] +mod inbox_tests { + use super::*; + use crate::CodexAuth; + use crate::CodexThread; + use crate::ThreadManager; + use crate::agent::agent_status_from_event; + use crate::config::AgentRoleConfig; + use crate::config::Config; + use crate::config::ConfigBuilder; + use crate::config_loader::LoaderOverrides; + use crate::contextual_user_message::SUBAGENT_NOTIFICATION_OPEN_TAG; + use crate::features::Feature; + use assert_matches::assert_matches; + use codex_protocol::config_types::ModeKind; + use codex_protocol::models::ContentItem; + use codex_protocol::models::ResponseInputItem; + use codex_protocol::models::ResponseItem; + use codex_protocol::protocol::ErrorEvent; + use codex_protocol::protocol::EventMsg; + use codex_protocol::protocol::SessionSource; + use codex_protocol::protocol::SubAgentSource; + use codex_protocol::protocol::TurnAbortReason; + use codex_protocol::protocol::TurnAbortedEvent; + use codex_protocol::protocol::TurnCompleteEvent; + use codex_protocol::protocol::TurnStartedEvent; + use pretty_assertions::assert_eq; + use tempfile::TempDir; + use tokio::time::Duration; + use tokio::time::sleep; + use tokio::time::timeout; + use toml::Value as TomlValue; + + async fn test_config_with_cli_overrides( + cli_overrides: Vec<(String, TomlValue)>, + ) -> (TempDir, Config) { + let home = TempDir::new().expect("create temp dir"); + let config = ConfigBuilder::default() + .codex_home(home.path().to_path_buf()) + .cli_overrides(cli_overrides) + .loader_overrides(LoaderOverrides { + #[cfg(target_os = "macos")] + managed_preferences_base64: Some(String::new()), + macos_managed_config_requirements_base64: Some(String::new()), + ..LoaderOverrides::default() + }) + .build() + .await + .expect("load default test config"); + (home, config) + } + + async fn test_config() -> (TempDir, Config) { + test_config_with_cli_overrides(Vec::new()).await + } + + fn text_input(text: &str) -> Vec { + vec![UserInput::Text { + text: text.to_string(), + text_elements: Vec::new(), + }] + } + + struct AgentControlHarness { + _home: TempDir, + config: Config, + manager: ThreadManager, + control: AgentControl, + } + + impl AgentControlHarness { + async fn new() -> Self { + let (home, config) = test_config().await; + let manager = ThreadManager::with_models_provider_and_home_for_tests( + CodexAuth::from_api_key("dummy"), + config.model_provider.clone(), + config.codex_home.clone(), + ); + let control = manager.agent_control(); + Self { + _home: home, + config, + manager, + control, + } + } + + async fn start_thread(&self) -> (ThreadId, Arc) { + let new_thread = self + .manager + .start_thread(self.config.clone()) + .await + .expect("start thread"); + (new_thread.thread_id, new_thread.thread) + } + } + + #[test] + fn build_agent_inbox_items_emits_function_call_and_output() { + let sender_thread_id = ThreadId::new(); + let items = build_agent_inbox_items(sender_thread_id, "watchdog update".to_string(), false) + .expect("tool role should build inbox items"); + + assert_eq!(items.len(), 2); + + let call_id = match &items[0] { + ResponseInputItem::FunctionCall { + name, + arguments, + call_id, + } => { + assert_eq!(name, AGENT_INBOX_KIND); + assert_eq!(arguments, "{}"); + call_id.clone() + } + other => panic!("expected function call item, got {other:?}"), + }; + + match &items[1] { + ResponseInputItem::FunctionCallOutput { + call_id: output_call_id, + output, + } => { + assert_eq!(output_call_id, &call_id); + let output_text = output + .body + .to_text() + .expect("payload should convert to text"); + let payload: AgentInboxPayload = + serde_json::from_str(&output_text).expect("payload should be valid json"); + assert!(payload.injected); + assert_eq!(payload.kind, AGENT_INBOX_KIND); + assert_eq!(payload.sender_thread_id, sender_thread_id); + assert_eq!(payload.message, "watchdog update"); + } + other => panic!("expected function call output item, got {other:?}"), + } + } + + #[test] + fn build_agent_inbox_items_prepends_empty_user_message_when_requested() { + let sender_thread_id = ThreadId::new(); + let items = build_agent_inbox_items(sender_thread_id, "watchdog update".to_string(), true) + .expect("tool role should build inbox items"); + + assert_eq!(items.len(), 3); + assert_eq!( + items[0], + ResponseInputItem::Message { + role: "user".to_string(), + content: vec![ContentItem::InputText { + text: String::new(), + }], + } + ); + assert_matches!(&items[1], ResponseInputItem::FunctionCall { .. }); + assert_matches!(&items[2], ResponseInputItem::FunctionCallOutput { .. }); + } + + #[tokio::test] + async fn send_agent_message_to_root_thread_defaults_to_user_input() { + let harness = AgentControlHarness::new().await; + let (receiver_thread_id, _thread) = harness.start_thread().await; + let sender_thread_id = ThreadId::new(); + + let submission_id = harness + .control + .send_agent_message( + receiver_thread_id, + sender_thread_id, + "watchdog update".to_string(), + ) + .await + .expect("send_agent_message should succeed"); + assert!(!submission_id.is_empty()); + + let expected = ( + receiver_thread_id, + Op::UserInput { + items: vec![UserInput::Text { + text: "watchdog update".to_string(), + text_elements: Vec::new(), + }], + final_output_json_schema: None, + }, + ); + let captured = harness + .manager + .captured_ops() + .into_iter() + .find(|entry| *entry == expected); + + assert_eq!(captured, Some(expected)); + } + + #[tokio::test] + async fn send_agent_message_to_root_thread_injects_response_items_when_enabled() { + let mut harness = AgentControlHarness::new().await; + harness.config.agent_use_function_call_inbox = true; + let (receiver_thread_id, _thread) = harness.start_thread().await; + let sender_thread_id = ThreadId::new(); + + let submission_id = harness + .control + .send_agent_message( + receiver_thread_id, + sender_thread_id, + "watchdog update".to_string(), + ) + .await + .expect("send_agent_message should succeed"); + assert!(!submission_id.is_empty()); + + let captured = harness + .manager + .captured_ops() + .into_iter() + .find(|(thread_id, op)| { + *thread_id == receiver_thread_id && matches!(op, Op::InjectResponseItems { .. }) + }) + .expect("expected injected agent inbox op"); + + let Op::InjectResponseItems { items } = captured.1 else { + unreachable!("matched above"); + }; + assert_eq!(items.len(), 3); + match &items[0] { + ResponseInputItem::Message { role, content } => { + assert_eq!(role, "user"); + assert_eq!( + content, + &vec![ContentItem::InputText { + text: String::new(), + }] + ); + } + other => panic!("expected prepended user message, got {other:?}"), + } + match &items[1] { + ResponseInputItem::FunctionCall { + name, arguments, .. + } => { + assert_eq!(name, AGENT_INBOX_KIND); + assert_eq!(arguments, "{}"); + } + other => panic!("expected function call item, got {other:?}"), + } + match &items[2] { + ResponseInputItem::FunctionCallOutput { output, .. } => { + let output_text = output + .body + .to_text() + .expect("payload should convert to text"); + let payload: AgentInboxPayload = + serde_json::from_str(&output_text).expect("payload should be valid json"); + assert_eq!(payload.sender_thread_id, sender_thread_id); + assert_eq!(payload.message, "watchdog update"); + } + other => panic!("expected function call output item, got {other:?}"), + } + } + + fn has_subagent_notification(history_items: &[ResponseItem]) -> bool { + history_items.iter().any(|item| { + let ResponseItem::Message { role, content, .. } = item else { + return false; + }; + if role != "user" { + return false; + } + content.iter().any(|content_item| match content_item { + ContentItem::InputText { text } | ContentItem::OutputText { text } => { + text.contains(SUBAGENT_NOTIFICATION_OPEN_TAG) + } + ContentItem::InputImage { .. } => false, + }) + }) + } + + /// Returns true when any message item contains `needle` in a text span. + fn history_contains_text(history_items: &[ResponseItem], needle: &str) -> bool { + history_items.iter().any(|item| { + let ResponseItem::Message { content, .. } = item else { + return false; + }; + content.iter().any(|content_item| match content_item { + ContentItem::InputText { text } | ContentItem::OutputText { text } => { + text.contains(needle) + } + ContentItem::InputImage { .. } => false, + }) + }) + } + + async fn wait_for_subagent_notification(parent_thread: &Arc) -> bool { + let wait = async { + loop { + let history_items = parent_thread + .codex + .session + .clone_history() + .await + .raw_items() + .to_vec(); + if has_subagent_notification(&history_items) { + return true; + } + sleep(Duration::from_millis(25)).await; + } + }; + timeout(Duration::from_secs(5), wait).await.is_ok() + } + + #[tokio::test] + async fn send_input_errors_when_manager_dropped() { + let control = AgentControl::default(); + let err = control + .send_input( + ThreadId::new(), + vec![UserInput::Text { + text: "hello".to_string(), + text_elements: Vec::new(), + }], + ) + .await + .expect_err("send_input should fail without a manager"); + assert_eq!( + err.to_string(), + "unsupported operation: thread manager dropped" + ); + } + + #[tokio::test] + async fn get_status_returns_not_found_without_manager() { + let control = AgentControl::default(); + let got = control.get_status(ThreadId::new()).await; + assert_eq!(got, AgentStatus::NotFound); + } + + #[tokio::test] + async fn on_event_updates_status_from_task_started() { + let status = agent_status_from_event(&EventMsg::TurnStarted(TurnStartedEvent { + turn_id: "turn-1".to_string(), + model_context_window: None, + collaboration_mode_kind: ModeKind::Default, + })); + assert_eq!(status, Some(AgentStatus::Running)); + } + + #[tokio::test] + async fn on_event_updates_status_from_task_complete() { + let status = agent_status_from_event(&EventMsg::TurnComplete(TurnCompleteEvent { + turn_id: "turn-1".to_string(), + last_agent_message: Some("done".to_string()), + })); + let expected = AgentStatus::Completed(Some("done".to_string())); + assert_eq!(status, Some(expected)); + } + + #[tokio::test] + async fn on_event_updates_status_from_error() { + let status = agent_status_from_event(&EventMsg::Error(ErrorEvent { + message: "boom".to_string(), + codex_error_info: None, + })); + + let expected = AgentStatus::Errored("boom".to_string()); + assert_eq!(status, Some(expected)); + } + + #[tokio::test] + async fn on_event_updates_status_from_turn_aborted() { + let status = agent_status_from_event(&EventMsg::TurnAborted(TurnAbortedEvent { + turn_id: Some("turn-1".to_string()), + reason: TurnAbortReason::Interrupted, + })); + + let expected = AgentStatus::Errored("Interrupted".to_string()); + assert_eq!(status, Some(expected)); + } + + #[tokio::test] + async fn on_event_updates_status_from_shutdown_complete() { + let status = agent_status_from_event(&EventMsg::ShutdownComplete); + assert_eq!(status, Some(AgentStatus::Shutdown)); + } + + #[tokio::test] + async fn spawn_agent_errors_when_manager_dropped() { + let control = AgentControl::default(); + let (_home, config) = test_config().await; + let err = control + .spawn_agent(config, text_input("hello"), None) + .await + .expect_err("spawn_agent should fail without a manager"); + assert_eq!( + err.to_string(), + "unsupported operation: thread manager dropped" + ); + } + + #[tokio::test] + async fn resume_agent_errors_when_manager_dropped() { + let control = AgentControl::default(); + let (_home, config) = test_config().await; + let err = control + .resume_agent_from_rollout(config, ThreadId::new(), SessionSource::Exec) + .await + .expect_err("resume_agent should fail without a manager"); + assert_eq!( + err.to_string(), + "unsupported operation: thread manager dropped" + ); + } + + #[tokio::test] + async fn send_input_errors_when_thread_missing() { + let harness = AgentControlHarness::new().await; + let thread_id = ThreadId::new(); + let err = harness + .control + .send_input( + thread_id, + vec![UserInput::Text { + text: "hello".to_string(), + text_elements: Vec::new(), + }], + ) + .await + .expect_err("send_input should fail for missing thread"); + assert_matches!(err, CodexErr::ThreadNotFound(id) if id == thread_id); + } + + #[tokio::test] + async fn get_status_returns_not_found_for_missing_thread() { + let harness = AgentControlHarness::new().await; + let status = harness.control.get_status(ThreadId::new()).await; + assert_eq!(status, AgentStatus::NotFound); + } + + #[tokio::test] + async fn get_status_returns_pending_init_for_new_thread() { + let harness = AgentControlHarness::new().await; + let (thread_id, _) = harness.start_thread().await; + let status = harness.control.get_status(thread_id).await; + assert_eq!(status, AgentStatus::PendingInit); + } + + #[tokio::test] + async fn subscribe_status_errors_for_missing_thread() { + let harness = AgentControlHarness::new().await; + let thread_id = ThreadId::new(); + let err = harness + .control + .subscribe_status(thread_id) + .await + .expect_err("subscribe_status should fail for missing thread"); + assert_matches!(err, CodexErr::ThreadNotFound(id) if id == thread_id); + } + + #[tokio::test] + async fn subscribe_status_updates_on_shutdown() { + let harness = AgentControlHarness::new().await; + let (thread_id, thread) = harness.start_thread().await; + let mut status_rx = harness + .control + .subscribe_status(thread_id) + .await + .expect("subscribe_status should succeed"); + assert_eq!(status_rx.borrow().clone(), AgentStatus::PendingInit); + + let _ = thread + .submit(Op::Shutdown {}) + .await + .expect("shutdown should submit"); + + let _ = status_rx.changed().await; + assert_eq!(status_rx.borrow().clone(), AgentStatus::Shutdown); + } + + #[tokio::test] + async fn send_input_submits_user_message() { + let harness = AgentControlHarness::new().await; + let (thread_id, _thread) = harness.start_thread().await; + + let submission_id = harness + .control + .send_input( + thread_id, + vec![UserInput::Text { + text: "hello from tests".to_string(), + text_elements: Vec::new(), + }], + ) + .await + .expect("send_input should succeed"); + assert!(!submission_id.is_empty()); + let expected = ( + thread_id, + Op::UserInput { + items: vec![UserInput::Text { + text: "hello from tests".to_string(), + text_elements: Vec::new(), + }], + final_output_json_schema: None, + }, + ); + let captured = harness + .manager + .captured_ops() + .into_iter() + .find(|entry| *entry == expected); + assert_eq!(captured, Some(expected)); + } + + #[tokio::test] + async fn spawn_agent_creates_thread_and_sends_prompt() { + let harness = AgentControlHarness::new().await; + let thread_id = harness + .control + .spawn_agent(harness.config.clone(), text_input("spawned"), None) + .await + .expect("spawn_agent should succeed"); + let _thread = harness + .manager + .get_thread(thread_id) + .await + .expect("thread should be registered"); + let expected = ( + thread_id, + Op::UserInput { + items: vec![UserInput::Text { + text: "spawned".to_string(), + text_elements: Vec::new(), + }], + final_output_json_schema: None, + }, + ); + let captured = harness + .manager + .captured_ops() + .into_iter() + .find(|entry| *entry == expected); + assert_eq!(captured, Some(expected)); + } + + #[tokio::test] + async fn spawn_agent_can_fork_parent_thread_history() { + let harness = AgentControlHarness::new().await; + let (parent_thread_id, parent_thread) = harness.start_thread().await; + parent_thread + .inject_user_message_without_turn("parent seed context".to_string()) + .await; + let turn_context = parent_thread.codex.session.new_default_turn().await; + let parent_spawn_call_id = "spawn-call-history".to_string(); + let parent_spawn_call = ResponseItem::FunctionCall { + id: None, + name: "spawn_agent".to_string(), + namespace: None, + arguments: "{}".to_string(), + call_id: parent_spawn_call_id.clone(), + }; + parent_thread + .codex + .session + .record_conversation_items(turn_context.as_ref(), &[parent_spawn_call]) + .await; + parent_thread + .codex + .session + .ensure_rollout_materialized() + .await; + parent_thread.codex.session.flush_rollout().await; + + let child_thread_id = harness + .control + .spawn_agent_with_options( + harness.config.clone(), + text_input("child task"), + Some(SessionSource::SubAgent(SubAgentSource::ThreadSpawn { + parent_thread_id, + depth: 1, + agent_nickname: None, + agent_role: None, + })), + SpawnAgentOptions { + fork_parent_spawn_call_id: Some(parent_spawn_call_id), + }, + ) + .await + .expect("forked spawn should succeed"); + + let child_thread = harness + .manager + .get_thread(child_thread_id) + .await + .expect("child thread should be registered"); + assert_ne!(child_thread_id, parent_thread_id); + let history = child_thread.codex.session.clone_history().await; + assert!(history_contains_text( + history.raw_items(), + "parent seed context" + )); + + let expected = ( + child_thread_id, + Op::UserInput { + items: vec![UserInput::Text { + text: "child task".to_string(), + text_elements: Vec::new(), + }], + final_output_json_schema: None, + }, + ); + let captured = harness + .manager + .captured_ops() + .into_iter() + .find(|entry| *entry == expected); + assert_eq!(captured, Some(expected)); + + let _ = harness + .control + .shutdown_agent(child_thread_id) + .await + .expect("child shutdown should submit"); + let _ = parent_thread + .submit(Op::Shutdown {}) + .await + .expect("parent shutdown should submit"); + } + + #[tokio::test] + async fn spawn_agent_fork_injects_output_for_parent_spawn_call() { + let harness = AgentControlHarness::new().await; + let (parent_thread_id, parent_thread) = harness.start_thread().await; + let turn_context = parent_thread.codex.session.new_default_turn().await; + let parent_spawn_call_id = "spawn-call-1".to_string(); + let parent_spawn_call = ResponseItem::FunctionCall { + id: None, + name: "spawn_agent".to_string(), + namespace: None, + arguments: "{}".to_string(), + call_id: parent_spawn_call_id.clone(), + }; + parent_thread + .codex + .session + .record_conversation_items(turn_context.as_ref(), &[parent_spawn_call]) + .await; + parent_thread + .codex + .session + .ensure_rollout_materialized() + .await; + parent_thread.codex.session.flush_rollout().await; + + let child_thread_id = harness + .control + .spawn_agent_with_options( + harness.config.clone(), + text_input("child task"), + Some(SessionSource::SubAgent(SubAgentSource::ThreadSpawn { + parent_thread_id, + depth: 1, + agent_nickname: None, + agent_role: None, + })), + SpawnAgentOptions { + fork_parent_spawn_call_id: Some(parent_spawn_call_id.clone()), + }, + ) + .await + .expect("forked spawn should succeed"); + + let child_thread = harness + .manager + .get_thread(child_thread_id) + .await + .expect("child thread should be registered"); + let history = child_thread.codex.session.clone_history().await; + let injected_output = history.raw_items().iter().find_map(|item| match item { + ResponseItem::FunctionCallOutput { call_id, output } + if call_id == &parent_spawn_call_id => + { + Some(output) + } + _ => None, + }); + let injected_output = + injected_output.expect("forked child should contain synthetic tool output"); + assert_eq!( + injected_output.text_content(), + Some(FORKED_SPAWN_AGENT_OUTPUT_MESSAGE) + ); + assert_eq!(injected_output.success, Some(true)); + + let _ = harness + .control + .shutdown_agent(child_thread_id) + .await + .expect("child shutdown should submit"); + let _ = parent_thread + .submit(Op::Shutdown {}) + .await + .expect("parent shutdown should submit"); + } + + #[tokio::test] + async fn spawn_agent_fork_flushes_parent_rollout_before_loading_history() { + let harness = AgentControlHarness::new().await; + let (parent_thread_id, parent_thread) = harness.start_thread().await; + let turn_context = parent_thread.codex.session.new_default_turn().await; + let parent_spawn_call_id = "spawn-call-unflushed".to_string(); + let parent_spawn_call = ResponseItem::FunctionCall { + id: None, + name: "spawn_agent".to_string(), + namespace: None, + arguments: "{}".to_string(), + call_id: parent_spawn_call_id.clone(), + }; + parent_thread + .codex + .session + .record_conversation_items(turn_context.as_ref(), &[parent_spawn_call]) + .await; + + let child_thread_id = harness + .control + .spawn_agent_with_options( + harness.config.clone(), + text_input("child task"), + Some(SessionSource::SubAgent(SubAgentSource::ThreadSpawn { + parent_thread_id, + depth: 1, + agent_nickname: None, + agent_role: None, + })), + SpawnAgentOptions { + fork_parent_spawn_call_id: Some(parent_spawn_call_id.clone()), + }, + ) + .await + .expect("forked spawn should flush parent rollout before loading history"); + + let child_thread = harness + .manager + .get_thread(child_thread_id) + .await + .expect("child thread should be registered"); + let history = child_thread.codex.session.clone_history().await; + + let mut parent_call_index = None; + let mut injected_output_index = None; + for (idx, item) in history.raw_items().iter().enumerate() { + match item { + ResponseItem::FunctionCall { call_id, .. } if call_id == &parent_spawn_call_id => { + parent_call_index = Some(idx); + } + ResponseItem::FunctionCallOutput { call_id, .. } + if call_id == &parent_spawn_call_id => + { + injected_output_index = Some(idx); + } + _ => {} + } + } + + let parent_call_index = + parent_call_index.expect("forked child should include the parent spawn_agent call"); + let injected_output_index = injected_output_index + .expect("forked child should include synthetic output for the parent spawn_agent call"); + assert!(parent_call_index < injected_output_index); + + let _ = harness + .control + .shutdown_agent(child_thread_id) + .await + .expect("child shutdown should submit"); + let _ = parent_thread + .submit(Op::Shutdown {}) + .await + .expect("parent shutdown should submit"); + } + + #[tokio::test] + async fn spawn_agent_respects_max_threads_limit() { + let max_threads = 1usize; + let (_home, config) = test_config_with_cli_overrides(vec![( + "agents.max_threads".to_string(), + TomlValue::Integer(max_threads as i64), + )]) + .await; + let manager = ThreadManager::with_models_provider_and_home_for_tests( + CodexAuth::from_api_key("dummy"), + config.model_provider.clone(), + config.codex_home.clone(), + ); + let control = manager.agent_control(); + + let _ = manager + .start_thread(config.clone()) + .await + .expect("start thread"); + + let first_agent_id = control + .spawn_agent(config.clone(), text_input("hello"), None) + .await + .expect("spawn_agent should succeed"); + + let err = control + .spawn_agent(config, text_input("hello again"), None) + .await + .expect_err("spawn_agent should respect max threads"); + let CodexErr::AgentLimitReached { + max_threads: seen_max_threads, + } = err + else { + panic!("expected CodexErr::AgentLimitReached"); + }; + assert_eq!(seen_max_threads, max_threads); + + let _ = control + .shutdown_agent(first_agent_id) + .await + .expect("shutdown agent"); + } + + #[tokio::test] + async fn spawn_agent_releases_slot_after_shutdown() { + let max_threads = 1usize; + let (_home, config) = test_config_with_cli_overrides(vec![( + "agents.max_threads".to_string(), + TomlValue::Integer(max_threads as i64), + )]) + .await; + let manager = ThreadManager::with_models_provider_and_home_for_tests( + CodexAuth::from_api_key("dummy"), + config.model_provider.clone(), + config.codex_home.clone(), + ); + let control = manager.agent_control(); + + let first_agent_id = control + .spawn_agent(config.clone(), text_input("hello"), None) + .await + .expect("spawn_agent should succeed"); + let _ = control + .shutdown_agent(first_agent_id) + .await + .expect("shutdown agent"); + + let second_agent_id = control + .spawn_agent(config.clone(), text_input("hello again"), None) + .await + .expect("spawn_agent should succeed after shutdown"); + let _ = control + .shutdown_agent(second_agent_id) + .await + .expect("shutdown agent"); + } + + #[tokio::test] + async fn spawn_agent_limit_shared_across_clones() { + let max_threads = 1usize; + let (_home, config) = test_config_with_cli_overrides(vec![( + "agents.max_threads".to_string(), + TomlValue::Integer(max_threads as i64), + )]) + .await; + let manager = ThreadManager::with_models_provider_and_home_for_tests( + CodexAuth::from_api_key("dummy"), + config.model_provider.clone(), + config.codex_home.clone(), + ); + let control = manager.agent_control(); + let cloned = control.clone(); + + let first_agent_id = cloned + .spawn_agent(config.clone(), text_input("hello"), None) + .await + .expect("spawn_agent should succeed"); + + let err = control + .spawn_agent(config, text_input("hello again"), None) + .await + .expect_err("spawn_agent should respect shared guard"); + let CodexErr::AgentLimitReached { max_threads } = err else { + panic!("expected CodexErr::AgentLimitReached"); + }; + assert_eq!(max_threads, 1); + + let _ = control + .shutdown_agent(first_agent_id) + .await + .expect("shutdown agent"); + } + + #[tokio::test] + async fn resume_agent_respects_max_threads_limit() { + let max_threads = 1usize; + let (_home, config) = test_config_with_cli_overrides(vec![( + "agents.max_threads".to_string(), + TomlValue::Integer(max_threads as i64), + )]) + .await; + let manager = ThreadManager::with_models_provider_and_home_for_tests( + CodexAuth::from_api_key("dummy"), + config.model_provider.clone(), + config.codex_home.clone(), + ); + let control = manager.agent_control(); + + let resumable_id = control + .spawn_agent(config.clone(), text_input("hello"), None) + .await + .expect("spawn_agent should succeed"); + let _ = control + .shutdown_agent(resumable_id) + .await + .expect("shutdown resumable thread"); + + let active_id = control + .spawn_agent(config.clone(), text_input("occupy"), None) + .await + .expect("spawn_agent should succeed for active slot"); + + let err = control + .resume_agent_from_rollout(config, resumable_id, SessionSource::Exec) + .await + .expect_err("resume should respect max threads"); + let CodexErr::AgentLimitReached { + max_threads: seen_max_threads, + } = err + else { + panic!("expected CodexErr::AgentLimitReached"); + }; + assert_eq!(seen_max_threads, max_threads); + + let _ = control + .shutdown_agent(active_id) + .await + .expect("shutdown active thread"); + } + + #[tokio::test] + async fn resume_agent_releases_slot_after_resume_failure() { + let max_threads = 1usize; + let (_home, config) = test_config_with_cli_overrides(vec![( + "agents.max_threads".to_string(), + TomlValue::Integer(max_threads as i64), + )]) + .await; + let manager = ThreadManager::with_models_provider_and_home_for_tests( + CodexAuth::from_api_key("dummy"), + config.model_provider.clone(), + config.codex_home.clone(), + ); + let control = manager.agent_control(); + + let _ = control + .resume_agent_from_rollout(config.clone(), ThreadId::new(), SessionSource::Exec) + .await + .expect_err("resume should fail for missing rollout path"); + + let resumed_id = control + .spawn_agent(config, text_input("hello"), None) + .await + .expect("spawn should succeed after failed resume"); + let _ = control + .shutdown_agent(resumed_id) + .await + .expect("shutdown resumed thread"); + } + + #[tokio::test] + async fn spawn_child_completion_notifies_parent_history() { + let harness = AgentControlHarness::new().await; + let (parent_thread_id, parent_thread) = harness.start_thread().await; + + let child_thread_id = harness + .control + .spawn_agent( + harness.config.clone(), + text_input("hello child"), + Some(SessionSource::SubAgent(SubAgentSource::ThreadSpawn { + parent_thread_id, + depth: 1, + agent_nickname: None, + agent_role: Some("explorer".to_string()), + })), + ) + .await + .expect("child spawn should succeed"); + + let child_thread = harness + .manager + .get_thread(child_thread_id) + .await + .expect("child thread should exist"); + let mut status_rx = harness + .control + .subscribe_status(child_thread_id) + .await + .expect("status subscription should succeed"); + if matches!(status_rx.borrow().clone(), AgentStatus::PendingInit) { + timeout(Duration::from_secs(5), async { + loop { + status_rx + .changed() + .await + .expect("child status should advance past pending init"); + if !matches!(status_rx.borrow().clone(), AgentStatus::PendingInit) { + break; + } + } + }) + .await + .expect("child should initialize before shutdown"); + } + let _ = child_thread + .submit(Op::Shutdown {}) + .await + .expect("child shutdown should submit"); + + assert_eq!(wait_for_subagent_notification(&parent_thread).await, true); + } + + #[tokio::test] + async fn completion_watcher_notifies_parent_when_child_is_missing() { + let harness = AgentControlHarness::new().await; + let (parent_thread_id, parent_thread) = harness.start_thread().await; + let child_thread_id = ThreadId::new(); + + harness.control.maybe_start_completion_watcher( + child_thread_id, + Some(SessionSource::SubAgent(SubAgentSource::ThreadSpawn { + parent_thread_id, + depth: 1, + agent_nickname: None, + agent_role: Some("explorer".to_string()), + })), + ); + + assert_eq!(wait_for_subagent_notification(&parent_thread).await, true); + + let history_items = parent_thread + .codex + .session + .clone_history() + .await + .raw_items() + .to_vec(); + assert_eq!( + history_contains_text( + &history_items, + &format!("\"agent_id\":\"{child_thread_id}\"") + ), + true + ); + assert_eq!( + history_contains_text(&history_items, "\"status\":\"not_found\""), + true + ); + } + + #[tokio::test] + async fn spawn_thread_subagent_gets_random_nickname_in_session_source() { + let harness = AgentControlHarness::new().await; + let (parent_thread_id, _parent_thread) = harness.start_thread().await; + + let child_thread_id = harness + .control + .spawn_agent( + harness.config.clone(), + text_input("hello child"), + Some(SessionSource::SubAgent(SubAgentSource::ThreadSpawn { + parent_thread_id, + depth: 1, + agent_nickname: None, + agent_role: Some("explorer".to_string()), + })), + ) + .await + .expect("child spawn should succeed"); + + let child_thread = harness + .manager + .get_thread(child_thread_id) + .await + .expect("child thread should be registered"); + let snapshot = child_thread.config_snapshot().await; + + let SessionSource::SubAgent(SubAgentSource::ThreadSpawn { + parent_thread_id: seen_parent_thread_id, + depth, + agent_nickname, + agent_role, + }) = snapshot.session_source + else { + panic!("expected thread-spawn sub-agent source"); + }; + assert_eq!(seen_parent_thread_id, parent_thread_id); + assert_eq!(depth, 1); + assert!(agent_nickname.is_some()); + assert_eq!(agent_role, Some("explorer".to_string())); + } + + #[tokio::test] + async fn spawn_thread_subagent_uses_role_specific_nickname_candidates() { + let mut harness = AgentControlHarness::new().await; + harness.config.agent_roles.insert( + "researcher".to_string(), + AgentRoleConfig { + description: Some("Research role".to_string()), + config_file: None, + nickname_candidates: Some(vec!["Atlas".to_string()]), + }, + ); + let (parent_thread_id, _parent_thread) = harness.start_thread().await; + + let child_thread_id = harness + .control + .spawn_agent( + harness.config.clone(), + text_input("hello child"), + Some(SessionSource::SubAgent(SubAgentSource::ThreadSpawn { + parent_thread_id, + depth: 1, + agent_nickname: None, + agent_role: Some("researcher".to_string()), + })), + ) + .await + .expect("child spawn should succeed"); + + let child_thread = harness + .manager + .get_thread(child_thread_id) + .await + .expect("child thread should be registered"); + let snapshot = child_thread.config_snapshot().await; + + let SessionSource::SubAgent(SubAgentSource::ThreadSpawn { agent_nickname, .. }) = + snapshot.session_source + else { + panic!("expected thread-spawn sub-agent source"); + }; + assert_eq!(agent_nickname, Some("Atlas".to_string())); + } + + #[tokio::test] + async fn resume_thread_subagent_restores_stored_nickname_and_role() { + let (home, mut config) = test_config().await; + config + .features + .enable(Feature::Sqlite) + .expect("test config should allow sqlite"); + let manager = ThreadManager::with_models_provider_and_home_for_tests( + CodexAuth::from_api_key("dummy"), + config.model_provider.clone(), + config.codex_home.clone(), + ); + let control = manager.agent_control(); + let harness = AgentControlHarness { + _home: home, + config, + manager, + control, + }; + let (parent_thread_id, _parent_thread) = harness.start_thread().await; + + let child_thread_id = harness + .control + .spawn_agent( + harness.config.clone(), + text_input("hello child"), + Some(SessionSource::SubAgent(SubAgentSource::ThreadSpawn { + parent_thread_id, + depth: 1, + agent_nickname: None, + agent_role: Some("explorer".to_string()), + })), + ) + .await + .expect("child spawn should succeed"); + + let child_thread = harness + .manager + .get_thread(child_thread_id) + .await + .expect("child thread should exist"); + let mut status_rx = harness + .control + .subscribe_status(child_thread_id) + .await + .expect("status subscription should succeed"); + if matches!(status_rx.borrow().clone(), AgentStatus::PendingInit) { + timeout(Duration::from_secs(5), async { + loop { + status_rx + .changed() + .await + .expect("child status should advance past pending init"); + if !matches!(status_rx.borrow().clone(), AgentStatus::PendingInit) { + break; + } + } + }) + .await + .expect("child should initialize before shutdown"); + } + let original_snapshot = child_thread.config_snapshot().await; + let original_nickname = original_snapshot + .session_source + .get_nickname() + .expect("spawned sub-agent should have a nickname"); + let state_db = child_thread + .state_db() + .expect("sqlite state db should be available for nickname resume test"); + timeout(Duration::from_secs(5), async { + loop { + if let Ok(Some(metadata)) = state_db.get_thread(child_thread_id).await + && metadata.agent_nickname.is_some() + && metadata.agent_role.as_deref() == Some("explorer") + { + break; + } + sleep(Duration::from_millis(10)).await; + } + }) + .await + .expect("child thread metadata should be persisted to sqlite before shutdown"); + + let _ = harness + .control + .shutdown_agent(child_thread_id) + .await + .expect("child shutdown should submit"); + + let resumed_thread_id = harness + .control + .resume_agent_from_rollout( + harness.config.clone(), + child_thread_id, + SessionSource::SubAgent(SubAgentSource::ThreadSpawn { + parent_thread_id, + depth: 1, + agent_nickname: None, + agent_role: None, + }), + ) + .await + .expect("resume should succeed"); + assert_eq!(resumed_thread_id, child_thread_id); + + let resumed_snapshot = harness + .manager + .get_thread(resumed_thread_id) + .await + .expect("resumed child thread should exist") + .config_snapshot() + .await; + let SessionSource::SubAgent(SubAgentSource::ThreadSpawn { + parent_thread_id: resumed_parent_thread_id, + depth: resumed_depth, + agent_nickname: resumed_nickname, + agent_role: resumed_role, + }) = resumed_snapshot.session_source + else { + panic!("expected thread-spawn sub-agent source"); + }; + assert_eq!(resumed_parent_thread_id, parent_thread_id); + assert_eq!(resumed_depth, 1); + assert_eq!(resumed_nickname, Some(original_nickname)); + assert_eq!(resumed_role, Some("explorer".to_string())); + + let _ = harness + .control + .shutdown_agent(resumed_thread_id) + .await + .expect("resumed child shutdown should submit"); + } +} diff --git a/codex-rs/core/src/agent/control_tests.rs b/codex-rs/core/src/agent/control_tests.rs index 6a86000f96..7f475d941a 100644 --- a/codex-rs/core/src/agent/control_tests.rs +++ b/codex-rs/core/src/agent/control_tests.rs @@ -15,9 +15,13 @@ use codex_protocol::AgentPath; use codex_protocol::config_types::ModeKind; use codex_protocol::models::ContentItem; use codex_protocol::models::MessagePhase; +use codex_protocol::models::ResponseInputItem; use codex_protocol::models::ResponseItem; +use codex_protocol::protocol::AGENT_INBOX_KIND; +use codex_protocol::protocol::AgentInboxPayload; use codex_protocol::protocol::ErrorEvent; use codex_protocol::protocol::EventMsg; +use codex_protocol::protocol::ForkReferenceItem; use codex_protocol::protocol::InterAgentCommunication; use codex_protocol::protocol::SessionSource; use codex_protocol::protocol::SubAgentSource; @@ -446,6 +450,390 @@ async fn send_input_submits_user_message() { assert_eq!(captured, Some(expected)); } +#[tokio::test] +async fn send_input_resets_watchdog_owner_idle_state() { + let harness = AgentControlHarness::new().await; + let (owner_thread_id, _thread) = harness.start_thread().await; + let target_thread_id = ThreadId::new(); + + harness + .control + .register_watchdog(WatchdogRegistration { + owner_thread_id, + target_thread_id, + child_depth: 0, + interval_s: 30, + prompt: String::new(), + config: harness.config.clone(), + }) + .await + .expect("watchdog registration should succeed"); + + assert_eq!( + harness + .control + .watchdog_owner_idle_since_is_none_for_tests(target_thread_id) + .await, + Some(false) + ); + + harness + .control + .send_input(owner_thread_id, text_input("ping")) + .await + .expect("send_input should succeed"); + + assert_eq!( + harness + .control + .watchdog_owner_idle_since_is_none_for_tests(target_thread_id) + .await, + Some(true) + ); +} + +#[tokio::test] +async fn note_owner_input_resets_watchdog_owner_idle_state() { + let harness = AgentControlHarness::new().await; + let (owner_thread_id, _thread) = harness.start_thread().await; + let target_thread_id = ThreadId::new(); + + harness + .control + .register_watchdog(WatchdogRegistration { + owner_thread_id, + target_thread_id, + child_depth: 0, + interval_s: 30, + prompt: String::new(), + config: harness.config.clone(), + }) + .await + .expect("watchdog registration should succeed"); + + assert_eq!( + harness + .control + .watchdog_owner_idle_since_is_none_for_tests(target_thread_id) + .await, + Some(false) + ); + + harness.control.note_owner_input(owner_thread_id).await; + + assert_eq!( + harness + .control + .watchdog_owner_idle_since_is_none_for_tests(target_thread_id) + .await, + Some(true) + ); +} + +#[test] +fn build_agent_inbox_items_emits_function_call_and_output() { + let sender_thread_id = ThreadId::new(); + let items = build_agent_inbox_items( + sender_thread_id, + "watchdog update".to_string(), + /*prepend_turn_start_user_message*/ false, + ) + .expect("tool role should build inbox items"); + + assert_eq!(items.len(), 2); + + let call_id = match &items[0] { + ResponseInputItem::FunctionCall { + name, + arguments, + call_id, + } => { + assert_eq!(name, AGENT_INBOX_KIND); + assert_eq!(arguments, "{}"); + call_id.clone() + } + other => panic!("expected function call item, got {other:?}"), + }; + + match &items[1] { + ResponseInputItem::FunctionCallOutput { + call_id: output_call_id, + output, + } => { + assert_eq!(output_call_id, &call_id); + let output_text = output + .body + .to_text() + .expect("payload should convert to text"); + let payload: AgentInboxPayload = + serde_json::from_str(&output_text).expect("payload should be valid json"); + assert!(payload.injected); + assert_eq!(payload.kind, AGENT_INBOX_KIND); + assert_eq!(payload.sender_thread_id, sender_thread_id); + assert_eq!(payload.message, "watchdog update"); + } + other => panic!("expected function call output item, got {other:?}"), + } +} + +#[test] +fn build_agent_inbox_items_prepends_empty_user_message_when_requested() { + let sender_thread_id = ThreadId::new(); + let items = build_agent_inbox_items( + sender_thread_id, + "watchdog update".to_string(), + /*prepend_turn_start_user_message*/ true, + ) + .expect("tool role should build inbox items"); + + assert_eq!(items.len(), 3); + assert_eq!( + items[0], + ResponseInputItem::Message { + role: "user".to_string(), + content: vec![ContentItem::InputText { + text: String::new(), + }], + phase: None, + } + ); + assert_matches!(&items[1], ResponseInputItem::FunctionCall { .. }); + assert_matches!(&items[2], ResponseInputItem::FunctionCallOutput { .. }); +} + +#[tokio::test] +async fn send_agent_message_to_root_thread_defaults_to_user_input() { + let harness = AgentControlHarness::new().await; + let (receiver_thread_id, _thread) = harness.start_thread().await; + let sender_thread_id = ThreadId::new(); + + let submission_id = harness + .control + .send_agent_message( + receiver_thread_id, + sender_thread_id, + "watchdog update".to_string(), + ) + .await + .expect("send_agent_message should succeed"); + assert!(!submission_id.is_empty()); + + let expected = ( + receiver_thread_id, + Op::UserInput { + items: vec![UserInput::Text { + text: "watchdog update".to_string(), + text_elements: Vec::new(), + }], + environments: None, + final_output_json_schema: None, + responsesapi_client_metadata: None, + }, + ); + let captured = harness + .manager + .captured_ops() + .into_iter() + .find(|entry| *entry == expected); + + assert_eq!(captured, Some(expected)); +} + +#[tokio::test] +async fn send_agent_message_to_root_thread_injects_response_items_when_enabled() { + let mut harness = AgentControlHarness::new().await; + harness.config.agent_use_function_call_inbox = true; + let (receiver_thread_id, _thread) = harness.start_thread().await; + let sender_thread_id = ThreadId::new(); + + let submission_id = harness + .control + .send_agent_message( + receiver_thread_id, + sender_thread_id, + "watchdog update".to_string(), + ) + .await + .expect("send_agent_message should succeed"); + assert!(!submission_id.is_empty()); + + let captured = harness + .manager + .captured_ops() + .into_iter() + .find(|(thread_id, op)| { + *thread_id == receiver_thread_id && matches!(op, Op::InjectResponseItems { .. }) + }) + .expect("expected injected agent inbox op"); + + let Op::InjectResponseItems { items } = captured.1 else { + unreachable!("matched above"); + }; + assert_eq!(items.len(), 3); + match &items[0] { + ResponseInputItem::Message { role, content, .. } => { + assert_eq!(role, "user"); + assert_eq!( + content, + &vec![ContentItem::InputText { + text: String::new(), + }] + ); + } + other => panic!("expected prepended user message, got {other:?}"), + } + match &items[1] { + ResponseInputItem::FunctionCall { + name, arguments, .. + } => { + assert_eq!(name, AGENT_INBOX_KIND); + assert_eq!(arguments, "{}"); + } + other => panic!("expected function call item, got {other:?}"), + } + match &items[2] { + ResponseInputItem::FunctionCallOutput { output, .. } => { + let output_text = output + .body + .to_text() + .expect("payload should convert to text"); + let payload: AgentInboxPayload = + serde_json::from_str(&output_text).expect("payload should be valid json"); + assert_eq!(payload.sender_thread_id, sender_thread_id); + assert_eq!(payload.message, "watchdog update"); + } + other => panic!("expected function call output item, got {other:?}"), + } +} + +#[tokio::test] +async fn send_watchdog_wakeup_strips_helper_prompt_scaffold_from_fallback_message() { + let harness = AgentControlHarness::new().await; + let (receiver_thread_id, _thread) = harness.start_thread().await; + let sender_thread_id = ThreadId::new(); + let leaked_prompt = "# You are a Subagent\n\n\ + Read AGENTS.watchdog.md before responding.\n\n\ + Target agent id: 019cc0e8-38b6-7493-8e31-73a64c5843b6\n\n\ + watchdog charter: keep the root AutoPlan aligned"; + + let submission_id = harness + .control + .send_watchdog_wakeup( + receiver_thread_id, + sender_thread_id, + leaked_prompt.to_string(), + ) + .await + .expect("send_watchdog_wakeup should succeed"); + assert!(submission_id.is_empty()); + assert_no_injected_agent_inbox_payload(&harness, receiver_thread_id); +} + +#[tokio::test] +async fn send_watchdog_wakeup_preserves_benign_marker_mentions_in_regular_fallback_reports() { + let harness = AgentControlHarness::new().await; + let (receiver_thread_id, _thread) = harness.start_thread().await; + let sender_thread_id = ThreadId::new(); + let message = "Watchdog report: the watchdog charter was refreshed and no action is needed."; + + let submission_id = harness + .control + .send_watchdog_wakeup(receiver_thread_id, sender_thread_id, message.to_string()) + .await + .expect("send_watchdog_wakeup should succeed"); + assert!(!submission_id.is_empty()); + + let payload = injected_agent_inbox_payload(&harness, receiver_thread_id); + assert_eq!( + payload, + AgentInboxPayload::new(sender_thread_id, message.to_string()) + ); +} + +#[tokio::test] +async fn send_watchdog_wakeup_preserves_report_body_after_stripping_helper_scaffold() { + let harness = AgentControlHarness::new().await; + let (receiver_thread_id, _thread) = harness.start_thread().await; + let sender_thread_id = ThreadId::new(); + let message = "# You are a Subagent\n\n\ + Read AGENTS.watchdog.md before responding.\n\n\ + Target agent id: 019cc0e8-38b6-7493-8e31-73a64c5843b6\n\n\ + watchdog charter: keep the root AutoPlan aligned\n\n\ + Watchdog report: branch checks are green."; + + let submission_id = harness + .control + .send_watchdog_wakeup(receiver_thread_id, sender_thread_id, message.to_string()) + .await + .expect("send_watchdog_wakeup should succeed"); + assert!(!submission_id.is_empty()); + + let payload = injected_agent_inbox_payload(&harness, receiver_thread_id); + assert_eq!( + payload, + AgentInboxPayload::new( + sender_thread_id, + "Watchdog report: branch checks are green.".to_string() + ) + ); +} + +#[tokio::test] +async fn send_watchdog_wakeup_strips_ordinary_prompt_instructions_before_report_marker() { + let harness = AgentControlHarness::new().await; + let (receiver_thread_id, _thread) = harness.start_thread().await; + let sender_thread_id = ThreadId::new(); + let message = "# You are a Subagent\n\n\ + More importantly, you are a **watchdog check-in agent**.\n\ + Keep the root agent unblocked, on-task, and executing real work.\n\n\ + Target agent id: 019cc0e8-38b6-7493-8e31-73a64c5843b6\n\n\ + AUTOPLAN_WATCHDOG_REPORT\n\ + required_action: rerun CI\n\ + reason: current checks are stale"; + + let submission_id = harness + .control + .send_watchdog_wakeup(receiver_thread_id, sender_thread_id, message.to_string()) + .await + .expect("send_watchdog_wakeup should succeed"); + assert!(!submission_id.is_empty()); + + let payload = injected_agent_inbox_payload(&harness, receiver_thread_id); + assert_eq!(payload.sender_thread_id, sender_thread_id); + assert_eq!( + payload.message, + "AUTOPLAN_WATCHDOG_REPORT\n\ + required_action: rerun CI\n\ + reason: current checks are stale" + ); + assert!(!payload.message.contains("# You are a Subagent")); + assert!( + !payload + .message + .contains("More importantly, you are a **watchdog check-in agent**.") + ); + assert!(!payload.message.contains("Target agent id:")); +} + +#[tokio::test] +async fn send_watchdog_wakeup_emits_nothing_when_scaffold_has_no_report_marker() { + let harness = AgentControlHarness::new().await; + let (receiver_thread_id, _thread) = harness.start_thread().await; + let sender_thread_id = ThreadId::new(); + let message = "# You are a Subagent\n\n\ + More importantly, you are a **watchdog check-in agent**.\n\ + Keep the root agent unblocked, on-task, and executing real work.\n\n\ + Target agent id: 019cc0e8-38b6-7493-8e31-73a64c5843b6"; + + let submission_id = harness + .control + .send_watchdog_wakeup(receiver_thread_id, sender_thread_id, message.to_string()) + .await + .expect("send_watchdog_wakeup should succeed"); + assert!(submission_id.is_empty()); + assert_no_injected_agent_inbox_payload(&harness, receiver_thread_id); +} + #[tokio::test] async fn send_inter_agent_communication_without_turn_queues_message_without_triggering_turn() { let harness = AgentControlHarness::new().await; @@ -939,6 +1327,127 @@ async fn spawn_agent_fork_last_n_turns_keeps_only_recent_turns() { .expect("parent shutdown should submit"); } +#[tokio::test] +async fn spawn_agent_fork_snapshots_parent_boundary_for_persisted_fork_reference() { + let harness = AgentControlHarness::new().await; + let (parent_thread_id, parent_thread) = harness.start_thread().await; + parent_thread + .inject_user_message_without_turn("parent seed context".to_string()) + .await; + let turn_context = parent_thread.codex.session.new_default_turn().await; + let parent_spawn_call_id = "spawn-call-dedup".to_string(); + let parent_spawn_call = ResponseItem::FunctionCall { + id: None, + name: "spawn_agent".to_string(), + namespace: None, + arguments: "{}".to_string(), + call_id: parent_spawn_call_id.clone(), + }; + parent_thread + .codex + .session + .record_conversation_items(turn_context.as_ref(), &[parent_spawn_call]) + .await; + parent_thread + .codex + .session + .ensure_rollout_materialized() + .await; + parent_thread.codex.session.flush_rollout().await; + let parent_rollout_path = parent_thread + .rollout_path() + .expect("parent rollout path should be available"); + + let child_thread_id = harness + .control + .spawn_agent_with_metadata( + harness.config.clone(), + text_input("child task"), + Some(SessionSource::SubAgent(SubAgentSource::ThreadSpawn { + parent_thread_id, + depth: 1, + agent_path: None, + agent_nickname: None, + agent_role: None, + })), + SpawnAgentOptions { + fork_parent_spawn_call_id: Some(parent_spawn_call_id), + fork_mode: Some(SpawnAgentForkMode::FullHistory), + ..Default::default() + }, + ) + .await + .expect("forked spawn should succeed") + .thread_id; + + parent_thread + .inject_user_message_without_turn("parent late turn".to_string()) + .await; + parent_thread + .codex + .session + .ensure_rollout_materialized() + .await; + parent_thread.codex.session.flush_rollout().await; + + let child_thread = harness + .manager + .get_thread(child_thread_id) + .await + .expect("child thread should be registered"); + let child_rollout_path = child_thread + .rollout_path() + .expect("child rollout path should be available"); + let InitialHistory::Resumed(resumed) = + RolloutRecorder::get_rollout_history(child_rollout_path.as_path()) + .await + .expect("child rollout should load") + else { + panic!("child rollout should include session metadata"); + }; + + assert!(resumed.history.iter().any(|item| { + matches!( + item, + RolloutItem::ForkReference(ForkReferenceItem { + rollout_path, + nth_user_message: 1, + }) if rollout_path == &parent_rollout_path + ) + })); + let materialized_child_rollout = + crate::rollout::truncation::materialize_rollout_items_for_replay( + harness.config.codex_home.as_path(), + &resumed.history, + ) + .await; + let materialized_child_response_items: Vec = materialized_child_rollout + .iter() + .filter_map(|item| match item { + RolloutItem::ResponseItem(response_item) => Some(response_item.clone()), + _ => None, + }) + .collect(); + assert!(history_contains_text( + &materialized_child_response_items, + "parent seed context", + )); + assert!(!history_contains_text( + &materialized_child_response_items, + "parent late turn", + )); + + let _ = harness + .control + .shutdown_live_agent(child_thread_id) + .await + .expect("child shutdown should submit"); + let _ = parent_thread + .submit(Op::Shutdown {}) + .await + .expect("parent shutdown should submit"); +} + #[tokio::test] async fn spawn_agent_respects_max_threads_limit() { let max_threads = 1usize; @@ -1499,8 +2008,11 @@ async fn spawn_thread_subagent_uses_role_specific_nickname_candidates() { "researcher".to_string(), AgentRoleConfig { description: Some("Research role".to_string()), + model: None, config_file: None, + watchdog_interval_s: None, nickname_candidates: Some(vec!["Atlas".to_string()]), + fork_context: None, }, ); let (parent_thread_id, _parent_thread) = harness.start_thread().await; @@ -2535,3 +3047,50 @@ async fn resume_agent_from_rollout_skips_descendants_when_parent_resume_fails() .await .expect("tree shutdown after partial subtree resume should succeed"); } + +fn injected_agent_inbox_payload( + harness: &AgentControlHarness, + receiver_thread_id: ThreadId, +) -> AgentInboxPayload { + let captured = harness + .manager + .captured_ops() + .into_iter() + .find(|(thread_id, op)| { + *thread_id == receiver_thread_id && matches!(op, Op::InjectResponseItems { .. }) + }) + .expect("expected injected agent inbox op"); + + let Op::InjectResponseItems { items } = captured.1 else { + unreachable!("matched above"); + }; + + let output = items + .iter() + .find_map(|item| match item { + ResponseInputItem::FunctionCallOutput { output, .. } => Some(output), + _ => None, + }) + .expect("expected function call output item"); + let output_text = output + .body + .to_text() + .expect("payload should convert to text"); + + serde_json::from_str(&output_text).expect("payload should be valid json") +} +fn assert_no_injected_agent_inbox_payload( + harness: &AgentControlHarness, + receiver_thread_id: ThreadId, +) { + assert!( + harness + .manager + .captured_ops() + .into_iter() + .all(|(thread_id, op)| { + thread_id != receiver_thread_id || !matches!(op, Op::InjectResponseItems { .. }) + }), + "expected no injected watchdog inbox op" + ); +} diff --git a/codex-rs/core/src/agent/mod.rs b/codex-rs/core/src/agent/mod.rs index a60fc3004a..ba70f24168 100644 --- a/codex-rs/core/src/agent/mod.rs +++ b/codex-rs/core/src/agent/mod.rs @@ -4,11 +4,16 @@ pub(crate) mod mailbox; mod registry; pub(crate) mod role; pub(crate) mod status; +mod watchdog; pub(crate) use codex_protocol::protocol::AgentStatus; pub(crate) use control::AgentControl; +pub(crate) use control::AgentListing; +pub(crate) use control::WatchdogParentCompactionResult; pub(crate) use mailbox::Mailbox; pub(crate) use mailbox::MailboxReceiver; pub(crate) use registry::exceeds_thread_spawn_depth_limit; pub(crate) use registry::next_thread_spawn_depth; pub(crate) use status::agent_status_from_event; +pub(crate) use watchdog::RemovedWatchdog; +pub(crate) use watchdog::WatchdogRegistration; diff --git a/codex-rs/core/src/agent/registry.rs b/codex-rs/core/src/agent/registry.rs index 1acd73085f..e1ed6d0259 100644 --- a/codex-rs/core/src/agent/registry.rs +++ b/codex-rs/core/src/agent/registry.rs @@ -77,6 +77,18 @@ pub(crate) fn exceeds_thread_spawn_depth_limit(depth: i32, max_depth: i32) -> bo } impl AgentRegistry { + pub(crate) fn tracked_thread_ids(&self) -> Vec { + let active_agents = self + .active_agents + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + active_agents + .agent_tree + .values() + .filter_map(|metadata| metadata.agent_id) + .collect() + } + pub(crate) fn reserve_spawn_slot( self: &Arc, max_threads: Option, diff --git a/codex-rs/core/src/agent/role.rs b/codex-rs/core/src/agent/role.rs index 2ab16cd22a..80eb4891b1 100644 --- a/codex-rs/core/src/agent/role.rs +++ b/codex-rs/core/src/agent/role.rs @@ -29,6 +29,11 @@ use toml::Value as TomlValue; pub const DEFAULT_ROLE_NAME: &str = "default"; const AGENT_TYPE_UNAVAILABLE_ERROR: &str = "agent type is currently not available"; +pub(crate) fn watchdog_interval_for_role(config: &Config, role_name: Option<&str>) -> Option { + let role_name = role_name.unwrap_or(DEFAULT_ROLE_NAME); + resolve_role_config(config, role_name).and_then(|role| role.watchdog_interval_s) +} + /// Applies a named role layer to `config` while preserving caller-owned model selection. /// /// The role layer is inserted at session-flag precedence so it can override persisted config, but @@ -62,9 +67,13 @@ async fn apply_role_to_config_inner( ) -> anyhow::Result<()> { let is_built_in = !config.agent_roles.contains_key(role_name); let Some(config_file) = role.config_file.as_ref() else { + if let Some(model) = &role.model { + config.model = Some(model.clone()); + } return Ok(()); }; - let role_layer_toml = load_role_layer_toml(config, config_file, is_built_in, role_name).await?; + let role_layer_toml = + load_role_layer_toml(config, config_file, is_built_in, role_name, role).await?; if role_layer_toml .as_table() .is_some_and(toml::map::Map::is_empty) @@ -89,6 +98,7 @@ async fn load_role_layer_toml( config_file: &Path, is_built_in: bool, role_name: &str, + role: &AgentRoleConfig, ) -> anyhow::Result { let (role_config_toml, role_config_base) = if is_built_in { let role_config_contents = built_in::config_file_contents(config_file) @@ -112,10 +122,14 @@ async fn load_role_layer_toml( }; deserialize_config_toml_with_base(role_config_toml.clone(), role_config_base)?; - Ok(resolve_relative_paths_in_config_toml( - role_config_toml, - role_config_base, - )?) + let mut role_layer_toml = + resolve_relative_paths_in_config_toml(role_config_toml, role_config_base)?; + if let Some(model) = &role.model + && let Some(table) = role_layer_toml.as_table_mut() + { + table.insert("model".to_string(), TomlValue::String(model.clone())); + } + Ok(role_layer_toml) } pub(crate) fn resolve_role_config<'a>( @@ -128,6 +142,13 @@ pub(crate) fn resolve_role_config<'a>( .or_else(|| built_in::configs().get(role_name)) } +pub(crate) fn default_fork_context_for_role(config: &Config, role_name: Option<&str>) -> bool { + let role_name = role_name.unwrap_or(DEFAULT_ROLE_NAME); + resolve_role_config(config, role_name) + .and_then(|role| role.fork_context) + .unwrap_or(true) +} + fn preservation_policy(config: &Config, role_layer_toml: &TomlValue) -> (bool, bool) { let role_selects_provider = role_layer_toml.get("model_provider").is_some(); let role_selects_profile = role_layer_toml.get("profile").is_some(); @@ -360,8 +381,11 @@ mod built_in { DEFAULT_ROLE_NAME.to_string(), AgentRoleConfig { description: Some("Default agent.".to_string()), + model: None, config_file: None, + watchdog_interval_s: None, nickname_candidates: None, + fork_context: Some(true), } ), ( @@ -374,8 +398,11 @@ Rules: - In order to avoid redundant work, you should avoid exploring the same problem that explorers have already covered. Typically, you should trust the explorer results without additional verification. You are still allowed to inspect the code yourself to gain the needed context! - You are encouraged to spawn up multiple explorers in parallel when you have multiple distinct questions to ask about the codebase that can be answered independently. This allows you to get more information faster without waiting for one question to finish before asking the next. While waiting for the explorer results, you can continue working on other local tasks that do not depend on those results. This parallelism is a key advantage of delegation, so use it whenever you have multiple questions to ask. - Reuse existing explorers for related questions."#.to_string()), + model: None, config_file: Some("explorer.toml".to_string().parse().unwrap_or_default()), + watchdog_interval_s: None, nickname_candidates: None, + fork_context: Some(true), } ), ( @@ -389,8 +416,27 @@ Typical tasks: Rules: - Explicitly assign **ownership** of the task (files / responsibility). When the subtask involves code changes, you should clearly specify which files or modules the worker is responsible for. This helps avoid merge conflicts and ensures accountability. For example, you can say "Worker 1 is responsible for updating the authentication module, while Worker 2 will handle the database layer." By defining clear ownership, you can delegate more effectively and reduce coordination overhead. - Always tell workers they are **not alone in the codebase**, and they should not revert the edits made by others, and they should adjust their implementation to accommodate the changes made by others. This is important because there may be multiple workers making changes in parallel, and they need to be aware of each other's work to avoid conflicts and ensure a cohesive final product."#.to_string()), + model: None, config_file: None, + watchdog_interval_s: None, nickname_candidates: None, + fork_context: Some(true), + } + ), + ( + "watchdog".to_string(), + AgentRoleConfig { + description: Some(r#"Use `watchdog` for long-running work that needs periodic oversight. +This role creates an idle-time watchdog handle instead of a conversational worker. +Rules: +- Watchdog check-ins are asynchronous and only happen after the current turn ends and the owner thread is idle. +- Do not call `wait` or `send_input` on the watchdog handle. +- Close the watchdog handle only when it is no longer needed or when replacing it with a new watchdog."#.to_string()), + model: None, + config_file: None, + watchdog_interval_s: Some(crate::config::DEFAULT_WATCHDOG_INTERVAL_S), + nickname_candidates: None, + fork_context: Some(true), } ), // Awaiter is temp removed diff --git a/codex-rs/core/src/agent/role_tests.rs b/codex-rs/core/src/agent/role_tests.rs index eceaaa9200..df33d7769f 100644 --- a/codex-rs/core/src/agent/role_tests.rs +++ b/codex-rs/core/src/agent/role_tests.rs @@ -73,6 +73,113 @@ async fn apply_role_returns_error_for_unknown_role() { assert_eq!(err, "unknown agent_type 'missing-role'"); } +#[tokio::test] +async fn default_fork_context_for_role_defaults_unspecified_custom_roles_to_true() { + let (_home, mut config) = test_config_with_cli_overrides(Vec::new()).await; + config.agent_roles.insert( + "custom".to_string(), + AgentRoleConfig { + description: Some("Custom role".to_string()), + model: None, + config_file: None, + watchdog_interval_s: None, + nickname_candidates: None, + fork_context: None, + }, + ); + + assert!(default_fork_context_for_role(&config, Some("custom"))); +} + +#[tokio::test] +async fn default_fork_context_for_role_defaults_discovered_role_files_to_true() { + let codex_home = TempDir::new().expect("create temp dir"); + let repo_root = TempDir::new().expect("create temp dir"); + let nested_cwd = repo_root.path().join("packages").join("app"); + fs::create_dir_all(repo_root.path().join(".git")).expect("create git dir"); + fs::create_dir_all(&nested_cwd).expect("create nested cwd"); + + let workspace_key = repo_root.path().to_string_lossy().replace('\\', "\\\\"); + tokio::fs::write( + codex_home.path().join(CONFIG_TOML_FILE), + format!( + r#"[projects."{workspace_key}"] +trust_level = "trusted" +"# + ), + ) + .await + .expect("write config"); + + let agents_dir = repo_root.path().join(".codex").join("agents"); + tokio::fs::create_dir_all(&agents_dir) + .await + .expect("create agents dir"); + tokio::fs::write( + agents_dir.join("custom.toml"), + r#" +name = "custom" +description = "Custom role" +developer_instructions = "Stay focused" +"#, + ) + .await + .expect("write role file"); + + let config = ConfigBuilder::default() + .codex_home(codex_home.path().to_path_buf()) + .harness_overrides(ConfigOverrides { + cwd: Some(nested_cwd), + ..Default::default() + }) + .build() + .await + .expect("load config"); + + assert!(default_fork_context_for_role(&config, Some("custom"))); +} + +#[tokio::test] +async fn default_fork_context_for_role_uses_explicit_custom_role_override() { + let (_home, mut config) = test_config_with_cli_overrides(Vec::new()).await; + config.agent_roles.insert( + "custom".to_string(), + AgentRoleConfig { + description: Some("Custom role".to_string()), + model: None, + config_file: None, + watchdog_interval_s: None, + nickname_candidates: None, + fork_context: Some(false), + }, + ); + + assert!(!default_fork_context_for_role(&config, Some("custom"))); +} + +#[tokio::test] +async fn default_fork_context_for_role_uses_explicit_custom_role_override_from_config_toml() { + let home = TempDir::new().expect("create temp dir"); + tokio::fs::write( + home.path().join(CONFIG_TOML_FILE), + r#"[agents.custom] +description = "Custom role" +fork_context = false +"#, + ) + .await + .expect("write config"); + + let config = ConfigBuilder::default() + .codex_home(home.path().to_path_buf()) + .fallback_cwd(Some(home.path().to_path_buf())) + .build() + .await + .expect("load test config"); + + assert!(!default_fork_context_for_role(&config, Some("custom"))); +} + #[tokio::test] #[ignore = "No role requiring it for now"] async fn apply_explorer_role_sets_model_and_adds_session_flags_layer() { @@ -111,8 +218,11 @@ async fn apply_role_returns_unavailable_for_missing_user_role_file() { "custom".to_string(), AgentRoleConfig { description: None, + model: None, config_file: Some(PathBuf::from("/path/does/not/exist.toml")), + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, ); @@ -131,8 +241,11 @@ async fn apply_role_returns_unavailable_for_invalid_user_role_toml() { "custom".to_string(), AgentRoleConfig { description: None, + model: None, config_file: Some(role_path), + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, ); @@ -162,8 +275,11 @@ model = "role-model" "custom".to_string(), AgentRoleConfig { description: None, + model: None, config_file: Some(role_path), + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, ); @@ -193,8 +309,11 @@ async fn apply_role_preserves_unspecified_keys() { "custom".to_string(), AgentRoleConfig { description: None, + model: None, config_file: Some(role_path), + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, ); @@ -252,8 +371,11 @@ model_provider = "test-provider" "custom".to_string(), AgentRoleConfig { description: None, + model: None, config_file: Some(role_path), + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, ); @@ -306,8 +428,11 @@ model_verbosity = "high" "custom".to_string(), AgentRoleConfig { description: None, + model: None, config_file: Some(role_path), + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, ); @@ -372,8 +497,11 @@ model_provider = "role-provider" "custom".to_string(), AgentRoleConfig { description: None, + model: None, config_file: Some(role_path), + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, ); @@ -430,8 +558,11 @@ model_provider = "base-provider" "custom".to_string(), AgentRoleConfig { description: None, + model: None, config_file: Some(role_path), + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, ); @@ -494,8 +625,11 @@ model_reasoning_effort = "high" "custom".to_string(), AgentRoleConfig { description: None, + model: None, config_file: Some(role_path), + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, ); @@ -538,8 +672,11 @@ writable_roots = ["./sandbox-root"] "custom".to_string(), AgentRoleConfig { description: None, + model: None, config_file: Some(role_path), + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, ); @@ -600,8 +737,11 @@ async fn apply_role_takes_precedence_over_existing_session_flags_for_same_key() "custom".to_string(), AgentRoleConfig { description: None, + model: None, config_file: Some(role_path), + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, ); @@ -643,8 +783,11 @@ enabled = false "custom".to_string(), AgentRoleConfig { description: None, + model: None, config_file: Some(role_path), + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, ); @@ -673,6 +816,17 @@ enabled = false assert_eq!(outcome.is_skill_enabled(skill), false); } +#[tokio::test] +async fn watchdog_interval_for_role_returns_built_in_watchdog_interval() { + let (_home, config) = test_config_with_cli_overrides(Vec::new()).await; + + assert_eq!( + watchdog_interval_for_role(&config, Some("watchdog")), + Some(crate::config::DEFAULT_WATCHDOG_INTERVAL_S) + ); + assert_eq!(watchdog_interval_for_role(&config, Some("default")), None); +} + #[test] fn spawn_tool_spec_build_deduplicates_user_defined_built_in_roles() { let user_defined_roles = BTreeMap::from([ @@ -680,8 +834,11 @@ fn spawn_tool_spec_build_deduplicates_user_defined_built_in_roles() { "explorer".to_string(), AgentRoleConfig { description: Some("user override".to_string()), + model: None, config_file: None, + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, ), ("researcher".to_string(), AgentRoleConfig::default()), @@ -701,8 +858,11 @@ fn spawn_tool_spec_lists_user_defined_roles_before_built_ins() { "aaa".to_string(), AgentRoleConfig { description: Some("first".to_string()), + model: None, config_file: None, + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, )]); @@ -728,8 +888,11 @@ fn spawn_tool_spec_marks_role_locked_model_and_reasoning_effort() { "researcher".to_string(), AgentRoleConfig { description: Some("Research carefully.".to_string()), + model: None, config_file: Some(role_path), + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, )]); @@ -753,8 +916,11 @@ fn spawn_tool_spec_marks_role_locked_reasoning_effort_only() { "reviewer".to_string(), AgentRoleConfig { description: Some("Review carefully.".to_string()), + model: None, config_file: Some(role_path), + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, )]); diff --git a/codex-rs/core/src/agent/watchdog.rs b/codex-rs/core/src/agent/watchdog.rs new file mode 100644 index 0000000000..adc82bda74 --- /dev/null +++ b/codex-rs/core/src/agent/watchdog.rs @@ -0,0 +1,661 @@ +use super::control::AgentControl; +use super::registry::AgentRegistry; +use super::registry::exceeds_thread_spawn_depth_limit; +use super::status::is_final; +use crate::agent::control::SpawnAgentForkMode; +use crate::agent::control::SpawnAgentOptions; +use crate::config::Config; +use crate::session::load_watchdog_prompt; +use crate::thread_manager::ThreadManagerState; +use codex_features::Feature; +use codex_protocol::ThreadId; +use codex_protocol::error::CodexErr; +use codex_protocol::error::Result as CodexResult; +use codex_protocol::protocol::AgentStatus; +use codex_protocol::protocol::SessionSource; +use codex_protocol::protocol::SubAgentSource; +use codex_protocol::user_input::UserInput; +use std::collections::HashMap; +use std::collections::HashSet; +use std::sync::Arc; +use std::sync::Weak; +use std::sync::atomic::AtomicBool; +use std::sync::atomic::AtomicI64; +use std::sync::atomic::Ordering; +use tokio::sync::Mutex; +use tokio::time::Duration; +use tokio::time::Instant; +use tracing::info; +use tracing::warn; + +const WATCHDOG_TICK_SECONDS: i64 = 1; + +#[derive(Clone)] +pub(crate) struct WatchdogRegistration { + pub(crate) owner_thread_id: ThreadId, + pub(crate) target_thread_id: ThreadId, + pub(crate) child_depth: i32, + pub(crate) interval_s: i64, + pub(crate) prompt: String, + pub(crate) config: Config, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub(crate) struct RemovedWatchdog { + pub(crate) target_thread_id: ThreadId, + pub(crate) active_helper_id: Option, +} + +struct WatchdogEntry { + registration: WatchdogRegistration, + interval: Duration, + last_trigger: Instant, + active_helper_id: Option, + owner_idle_since: Option, + owner_was_running: bool, + force_due_once: bool, + generation: i64, +} + +pub(crate) struct WatchdogManager { + manager: Weak, + state: Arc, + registrations: Mutex>, + started: AtomicBool, + next_generation: AtomicI64, +} + +impl WatchdogManager { + pub(crate) fn new(manager: Weak, state: Arc) -> Arc { + Arc::new(Self { + manager, + state, + registrations: Mutex::new(HashMap::new()), + started: AtomicBool::new(false), + next_generation: AtomicI64::new(1), + }) + } + + pub(crate) fn start(self: &Arc) { + if self + .started + .compare_exchange(false, true, Ordering::AcqRel, Ordering::Acquire) + .is_err() + { + return; + } + + let manager = Arc::clone(self); + tokio::spawn(async move { + manager.run_loop().await; + }); + } + + pub(crate) async fn register( + self: &Arc, + registration: WatchdogRegistration, + ) -> CodexResult> { + if exceeds_thread_spawn_depth_limit( + registration.child_depth, + registration.config.agent_max_depth, + ) { + let max_depth = registration.config.agent_max_depth; + return Err(CodexErr::UnsupportedOperation(format!( + "agent depth limit reached: max depth is {max_depth}" + ))); + } + let interval = interval_duration(registration.interval_s)?; + let generation = self.next_generation.fetch_add(1, Ordering::AcqRel); + let now = Instant::now(); + let entry = WatchdogEntry { + registration, + interval, + last_trigger: now, + active_helper_id: None, + owner_idle_since: Some(now), + owner_was_running: false, + force_due_once: false, + generation, + }; + + let mut registrations = self.registrations.lock().await; + let superseded_targets: Vec = registrations + .iter() + .filter_map(|(target_thread_id, existing_entry)| { + (existing_entry.registration.owner_thread_id == entry.registration.owner_thread_id + && *target_thread_id != entry.registration.target_thread_id) + .then_some(*target_thread_id) + }) + .collect(); + let mut superseded = Vec::new(); + for superseded_target in superseded_targets { + if let Some(removed) = registrations.remove(&superseded_target) { + superseded.push(RemovedWatchdog { + target_thread_id: superseded_target, + active_helper_id: removed.active_helper_id, + }); + } + } + registrations.insert(entry.registration.target_thread_id, entry); + Ok(superseded) + } + + async fn run_loop(self: Arc) { + let tick = tick_duration(); + loop { + self.run_once().await; + if self.manager.upgrade().is_none() { + break; + } + tokio::time::sleep(tick).await; + } + } + + pub(crate) async fn run_once(self: &Arc) { + let Some(manager_state) = self.manager.upgrade() else { + self.registrations.lock().await.clear(); + return; + }; + + let snapshots: Vec<(ThreadId, i64)> = { + let registrations = self.registrations.lock().await; + registrations + .iter() + .map(|(target_id, entry)| (*target_id, entry.generation)) + .collect() + }; + let now = Instant::now(); + + for (target_id, generation) in snapshots { + self.evaluate(&manager_state, target_id, generation, now) + .await; + } + } + + async fn evaluate( + self: &Arc, + manager_state: &Arc, + target_thread_id: ThreadId, + generation: i64, + now: Instant, + ) { + let Some(snapshot) = self.snapshot(target_thread_id, generation).await else { + return; + }; + + let owner_thread = manager_state.get_thread(snapshot.owner_thread_id).await; + let owner_status = match owner_thread.as_ref() { + Ok(thread) => thread.agent_status().await, + Err(_) => AgentStatus::NotFound, + }; + let control_for_spawn = AgentControl::from_parts( + self.manager.clone(), + Arc::clone(&self.state), + Arc::clone(self), + ); + if is_watchdog_terminated(&owner_status) { + match control_for_spawn.shutdown_agent(target_thread_id).await { + Ok(_) | Err(CodexErr::ThreadNotFound(_)) | Err(CodexErr::InternalAgentDied) => {} + Err(err) => { + warn!( + owner_thread_id = %snapshot.owner_thread_id, + target_thread_id = %target_thread_id, + "watchdog owner termination cleanup failed: {err}" + ); + } + } + return; + } + let force_due = self + .take_force_due_if_generation(target_thread_id, generation) + .await; + let owner_has_active_turn = match owner_thread { + Ok(thread) => thread.has_active_turn().await, + Err(_) => false, + }; + let owner_running = (is_running(&owner_status) || owner_has_active_turn) && !force_due; + let owner_idle_since = self + .update_owner_idle_state_if_generation( + target_thread_id, + generation, + owner_running, + now, + force_due, + ) + .await; + if owner_running { + return; + } + let owner_idle_since = owner_idle_since.or(snapshot.owner_idle_since); + let Some(owner_idle_since) = owner_idle_since else { + return; + }; + if now.duration_since(owner_idle_since) < snapshot.interval { + return; + } + + if let Some(helper_id) = snapshot.active_helper_id { + let helper_status = get_status(manager_state, helper_id).await; + if !is_final(&helper_status) { + return; + } + + let helper_sent_input = manager_state + .get_thread(helper_id) + .await + .map(|thread| thread.last_completed_turn_used_agent_send_input()) + .unwrap_or(false); + // A watchdog helper should wake the owner through `send_input` or a + // final assistant report. If neither exists, emit nothing here and + // let the next scheduled check-in try again. + // + // Preferred path: the helper explicitly calls `send_input`. + // Fallback: if the helper reaches a terminal state without using + // `send_input`, forward only a real final assistant message. If + // there is no report body, emit nothing and schedule the next + // helper normally. + if !helper_sent_input { + let fallback_message = match &helper_status { + AgentStatus::Completed(Some(message)) if !message.trim().is_empty() => { + Some(message.clone()) + } + AgentStatus::Completed(_) + | AgentStatus::Errored(_) + | AgentStatus::Interrupted + | AgentStatus::Shutdown + | AgentStatus::NotFound + | AgentStatus::PendingInit + | AgentStatus::Running => None, + }; + + if let Some(message) = fallback_message { + if let Err(err) = control_for_spawn + .send_watchdog_wakeup(snapshot.owner_thread_id, helper_id, message) + .await + { + warn!( + helper_id = %helper_id, + owner_thread_id = %snapshot.owner_thread_id, + "watchdog helper forward failed: {err}" + ); + } else { + info!( + helper_id = %helper_id, + owner_thread_id = %snapshot.owner_thread_id, + "watchdog forwarded helper completion to owner" + ); + } + } + } + if let Err(err) = control_for_spawn.shutdown_agent(helper_id).await { + warn!( + helper_id = %helper_id, + owner_thread_id = %snapshot.owner_thread_id, + "watchdog helper cleanup failed: {err}" + ); + } + self.update_after_spawn( + target_thread_id, + generation, + now, + /*active_helper_id*/ None, + ) + .await; + return; + } + + if now.duration_since(snapshot.last_trigger) < snapshot.interval { + return; + } + + let session_source = SessionSource::SubAgent(SubAgentSource::ThreadSpawn { + parent_thread_id: snapshot.owner_thread_id, + depth: snapshot.child_depth, + agent_path: None, + agent_nickname: None, + agent_role: None, + }); + let mut helper_config = snapshot.config.clone(); + helper_config.ephemeral = true; + if helper_config + .features + .enabled(Feature::AgentPromptInjection) + { + let watchdog_prompt = load_watchdog_prompt(&helper_config.codex_home).await; + helper_config.developer_instructions = match ( + watchdog_prompt.trim().is_empty(), + helper_config.developer_instructions, + ) { + (true, existing) => existing, + (false, Some(existing)) if !existing.trim().is_empty() => { + Some(format!("{existing}\n\n{watchdog_prompt}")) + } + (false, _) => Some(watchdog_prompt), + }; + } + let helper_prompt = + watchdog_helper_prompt(&helper_config, snapshot.owner_thread_id, &snapshot.prompt) + .await; + // Watchdog check-ins must fork a distinct helper thread. If this path ever resumes + // the owner thread instead, the owner can self-wake and rapidly duplicate session + // state in memory. + let spawn_result = control_for_spawn + .spawn_agent_with_metadata( + helper_config, + codex_protocol::protocol::Op::UserInput { + items: vec![UserInput::Text { + text: helper_prompt, + text_elements: Vec::new(), + }], + environments: None, + final_output_json_schema: None, + responsesapi_client_metadata: None, + }, + Some(session_source), + SpawnAgentOptions { + fork_parent_spawn_call_id: Some(format!("watchdog_{target_thread_id}")), + fork_mode: Some(SpawnAgentForkMode::FullHistory), + environments: None, + }, + ) + .await; + + match spawn_result { + Ok(helper_agent) => { + let helper_id = helper_agent.thread_id; + info!("watchdog spawned helper {helper_id} for target {target_thread_id}"); + self.update_after_spawn(target_thread_id, generation, now, Some(helper_id)) + .await; + } + Err(err) => { + warn!("watchdog spawn failed for target {target_thread_id}: {err}"); + self.update_after_spawn( + target_thread_id, + generation, + now, + /*active_helper_id*/ None, + ) + .await; + } + } + } + + async fn snapshot( + &self, + target_thread_id: ThreadId, + generation: i64, + ) -> Option { + let registrations = self.registrations.lock().await; + let entry = registrations.get(&target_thread_id)?; + if entry.generation != generation { + return None; + } + Some(WatchdogSnapshot { + owner_thread_id: entry.registration.owner_thread_id, + child_depth: entry.registration.child_depth, + prompt: entry.registration.prompt.clone(), + config: entry.registration.config.clone(), + interval: entry.interval, + last_trigger: entry.last_trigger, + active_helper_id: entry.active_helper_id, + owner_idle_since: entry.owner_idle_since, + }) + } + + async fn update_owner_idle_state_if_generation( + &self, + target_thread_id: ThreadId, + generation: i64, + owner_running: bool, + now: Instant, + force_due: bool, + ) -> Option { + let mut registrations = self.registrations.lock().await; + let entry = registrations.get_mut(&target_thread_id)?; + if entry.generation != generation { + return None; + } + + if force_due { + return entry.owner_idle_since; + } + + if owner_running { + entry.owner_idle_since = None; + entry.owner_was_running = true; + return None; + } + + if entry.owner_was_running || entry.owner_idle_since.is_none() { + entry.owner_idle_since = Some(now); + } + entry.owner_was_running = false; + entry.owner_idle_since + } + + async fn take_force_due_if_generation( + &self, + target_thread_id: ThreadId, + generation: i64, + ) -> bool { + let mut registrations = self.registrations.lock().await; + let Some(entry) = registrations.get_mut(&target_thread_id) else { + return false; + }; + if entry.generation != generation || !entry.force_due_once { + return false; + } + entry.force_due_once = false; + true + } + + pub(crate) async fn note_owner_input(&self, owner_thread_id: ThreadId) { + let mut registrations = self.registrations.lock().await; + for entry in registrations.values_mut() { + if entry.registration.owner_thread_id == owner_thread_id { + entry.owner_idle_since = None; + entry.owner_was_running = true; + } + } + } + + #[cfg(test)] + pub(crate) async fn force_due_for_tests(&self, target_thread_id: ThreadId) { + let mut registrations = self.registrations.lock().await; + if let Some(entry) = registrations.get_mut(&target_thread_id) { + entry.force_due_once = true; + } + } + + #[cfg(test)] + pub(crate) async fn owner_idle_since_is_none_for_tests( + &self, + target_thread_id: ThreadId, + ) -> Option { + let registrations = self.registrations.lock().await; + registrations + .get(&target_thread_id) + .map(|entry| entry.owner_idle_since.is_none()) + } + + async fn update_after_spawn( + &self, + target_thread_id: ThreadId, + generation: i64, + now: Instant, + active_helper_id: Option, + ) { + let mut registrations = self.registrations.lock().await; + let Some(entry) = registrations.get_mut(&target_thread_id) else { + return; + }; + if entry.generation != generation { + return; + } + entry.last_trigger = now; + entry.active_helper_id = active_helper_id; + } + + pub(crate) async fn unregister(&self, target_thread_id: ThreadId) -> Option { + let mut registrations = self.registrations.lock().await; + registrations + .remove(&target_thread_id) + .map(|removed| RemovedWatchdog { + target_thread_id, + active_helper_id: removed.active_helper_id, + }) + } + + pub(crate) async fn owner_for_active_helper( + &self, + helper_thread_id: ThreadId, + ) -> Option { + let registrations = self.registrations.lock().await; + registrations.values().find_map(|entry| { + (entry.active_helper_id == Some(helper_thread_id)) + .then_some(entry.registration.owner_thread_id) + }) + } + + pub(crate) async fn registered_targets(&self, candidate_ids: &[ThreadId]) -> HashSet { + let registrations = self.registrations.lock().await; + candidate_ids + .iter() + .copied() + .filter(|candidate_id| registrations.contains_key(candidate_id)) + .collect() + } + + pub(crate) async fn take_for_owner(&self, owner_thread_id: ThreadId) -> Vec { + let mut registrations = self.registrations.lock().await; + let removed_targets: Vec = registrations + .iter() + .filter_map(|(target_thread_id, entry)| { + (entry.registration.owner_thread_id == owner_thread_id).then_some(*target_thread_id) + }) + .collect(); + let mut removed = Vec::new(); + for removed_target in removed_targets { + if let Some(entry) = registrations.remove(&removed_target) { + removed.push(RemovedWatchdog { + target_thread_id: removed_target, + active_helper_id: entry.active_helper_id, + }); + } + } + removed + } + + #[cfg(test)] + #[allow(dead_code)] + pub(crate) async fn set_active_helper_for_tests( + &self, + target_thread_id: ThreadId, + helper_thread_id: ThreadId, + ) { + let mut registrations = self.registrations.lock().await; + let Some(entry) = registrations.get_mut(&target_thread_id) else { + return; + }; + let due_at = Instant::now() - entry.interval; + entry.last_trigger = due_at; + entry.owner_idle_since = Some(due_at); + entry.owner_was_running = false; + entry.active_helper_id = Some(helper_thread_id); + } +} + +#[derive(Clone)] +struct WatchdogSnapshot { + owner_thread_id: ThreadId, + child_depth: i32, + prompt: String, + config: Config, + interval: Duration, + last_trigger: Instant, + active_helper_id: Option, + owner_idle_since: Option, +} + +async fn get_status(manager_state: &Arc, thread_id: ThreadId) -> AgentStatus { + let Ok(thread) = manager_state.get_thread(thread_id).await else { + return AgentStatus::NotFound; + }; + thread.agent_status().await +} + +fn is_running(status: &AgentStatus) -> bool { + matches!(status, AgentStatus::PendingInit | AgentStatus::Running) +} + +fn is_watchdog_terminated(status: &AgentStatus) -> bool { + matches!(status, AgentStatus::Shutdown | AgentStatus::NotFound) +} + +fn interval_duration(interval_s: i64) -> CodexResult { + if interval_s <= 0 { + return Err(CodexErr::UnsupportedOperation( + "interval_s must be greater than zero".to_string(), + )); + } + let seconds = u64::try_from(interval_s).map_err(|_| { + CodexErr::UnsupportedOperation(format!("interval_s out of range: {interval_s}")) + })?; + Ok(Duration::from_secs(seconds)) +} + +fn tick_duration() -> Duration { + let seconds = u64::try_from(WATCHDOG_TICK_SECONDS).unwrap_or(5); + Duration::from_secs(seconds) +} + +async fn watchdog_helper_prompt( + _config: &Config, + target_thread_id: ThreadId, + prompt: &str, +) -> String { + if prompt.trim().is_empty() { + format!("Target agent id: {target_thread_id}") + } else { + format!("Target agent id: {target_thread_id}\n\n{prompt}") + } +} + +#[cfg(test)] +mod tests { + use super::watchdog_helper_prompt; + use crate::config::ConfigBuilder; + use codex_features::Feature; + use codex_protocol::ThreadId; + + #[tokio::test] + async fn watchdog_helper_prompt_is_minimal_when_agent_prompt_injection_is_disabled() { + let codex_home = tempfile::tempdir().expect("create temp dir"); + let config = ConfigBuilder::default() + .codex_home(codex_home.path().to_path_buf()) + .build() + .await + .expect("load config"); + let target_thread_id = ThreadId::default(); + let prompt = watchdog_helper_prompt(&config, target_thread_id, "ping").await; + assert_eq!( + prompt, + format!("Target agent id: {target_thread_id}\n\nping") + ); + } + + #[tokio::test] + async fn watchdog_helper_prompt_is_user_task_only_when_enabled() { + let codex_home = tempfile::tempdir().expect("create temp dir"); + let mut config = ConfigBuilder::default() + .codex_home(codex_home.path().to_path_buf()) + .build() + .await + .expect("load config"); + let _ = config.features.enable(Feature::AgentPromptInjection); + + let prompt = watchdog_helper_prompt(&config, ThreadId::default(), "ping").await; + assert!(prompt.contains("Target agent id:")); + assert!(prompt.ends_with("\n\nping")); + } +} diff --git a/codex-rs/core/src/client.rs b/codex-rs/core/src/client.rs index ba81b451a7..cc6adcd07b 100644 --- a/codex-rs/core/src/client.rs +++ b/codex-rs/core/src/client.rs @@ -458,7 +458,7 @@ impl ModelClient { prompt.output_schema_strict, ); let payload = ApiCompactionInput { - model: &model_info.slug, + model: model_info.request_model_slug(), input: &input, instructions: &instructions, tools, @@ -544,7 +544,7 @@ impl ModelClient { .with_telemetry(Some(request_telemetry)); let payload = ApiMemorySummarizeInput { - model: model_info.slug.clone(), + model: model_info.request_model_slug().to_string(), raw_memories, reasoning: effort.map(|effort| Reasoning { effort: Some(effort), @@ -879,7 +879,7 @@ impl ModelClientSession { ); let prompt_cache_key = Some(self.client.state.conversation_id.to_string()); let request = ResponsesApiRequest { - model: model_info.slug.clone(), + model: model_info.request_model_slug().to_string(), instructions: instructions.clone(), input, tools, diff --git a/codex-rs/core/src/codex_thread.rs b/codex-rs/core/src/codex_thread.rs index e1c796ef32..4424da666f 100644 --- a/codex-rs/core/src/codex_thread.rs +++ b/codex-rs/core/src/codex_thread.rs @@ -11,6 +11,7 @@ use codex_protocol::config_types::CollaborationMode; use codex_protocol::config_types::Personality; use codex_protocol::config_types::ReasoningSummary; use codex_protocol::config_types::ServiceTier; +use codex_protocol::config_types::Verbosity; use codex_protocol::config_types::WindowsSandboxLevel; use codex_protocol::error::CodexErr; use codex_protocol::error::Result as CodexResult; @@ -45,13 +46,19 @@ pub struct ThreadConfigSnapshot { pub model: String, pub model_provider_id: String, pub service_tier: Option, + pub plan_mode_reasoning_effort: Option, + pub model_verbosity: Option, + pub model_context_window: Option, + pub model_auto_compact_token_limit: Option, pub approval_policy: AskForApproval, pub approvals_reviewer: ApprovalsReviewer, pub permission_profile: PermissionProfile, pub cwd: AbsolutePathBuf, pub ephemeral: bool, + pub agent_use_function_call_inbox: bool, pub reasoning_effort: Option, pub personality: Option, + pub active_profile: Option, pub session_source: SessionSource, } @@ -287,6 +294,16 @@ impl CodexThread { self.codex.session.token_usage_info().await } + pub(crate) async fn has_active_turn(&self) -> bool { + self.codex.session.has_active_turn().await + } + + pub(crate) fn last_completed_turn_used_agent_send_input(&self) -> bool { + self.codex + .session + .last_completed_turn_used_agent_send_input() + } + /// Records a user-role session-prefix message without creating a new user turn boundary. pub(crate) async fn inject_user_message_without_turn(&self, message: String) { let message = ResponseItem::Message { diff --git a/codex-rs/core/src/config/agent_roles.rs b/codex-rs/core/src/config/agent_roles.rs index abdef33e7d..d1e7ddddff 100644 --- a/codex-rs/core/src/config/agent_roles.rs +++ b/codex-rs/core/src/config/agent_roles.rs @@ -163,11 +163,14 @@ async fn read_declared_role( fn merge_missing_role_fields(role: &mut AgentRoleConfig, fallback: &AgentRoleConfig) { role.description = role.description.clone().or(fallback.description.clone()); + role.model = role.model.clone().or(fallback.model.clone()); role.config_file = role.config_file.clone().or(fallback.config_file.clone()); + role.watchdog_interval_s = role.watchdog_interval_s.or(fallback.watchdog_interval_s); role.nickname_candidates = role .nickname_candidates .clone() .or(fallback.nickname_candidates.clone()); + role.fork_context = role.fork_context.or(fallback.fork_context); } fn agents_toml_from_layer( @@ -209,8 +212,11 @@ async fn agent_role_config_from_toml( Ok(AgentRoleConfig { description, + model: role.model.clone(), config_file: config_file.map(AbsolutePathBuf::into_path_buf), + watchdog_interval_s: role.watchdog_interval_s, nickname_candidates, + fork_context: role.fork_context, }) } @@ -506,8 +512,11 @@ async fn discover_agent_roles_in_dir( role_name, AgentRoleConfig { description: parsed_file.description, + model: None, config_file: Some(agent_file.to_path_buf()), + watchdog_interval_s: None, nickname_candidates: parsed_file.nickname_candidates, + fork_context: None, }, ); } diff --git a/codex-rs/core/src/config/config_tests.rs b/codex-rs/core/src/config/config_tests.rs index 097e93baac..5533d6a0fb 100644 --- a/codex-rs/core/src/config/config_tests.rs +++ b/codex-rs/core/src/config/config_tests.rs @@ -12,6 +12,7 @@ use codex_config::config_toml::AgentRoleToml; use codex_config::config_toml::AgentsToml; use codex_config::config_toml::AutoReviewToml; use codex_config::config_toml::ConfigToml; +use codex_config::config_toml::CustomModelToml; use codex_config::config_toml::ProjectConfig; use codex_config::config_toml::RealtimeAudioConfig; use codex_config::config_toml::RealtimeConfig; @@ -72,6 +73,7 @@ use codex_protocol::permissions::NetworkSandboxPolicy; use codex_protocol::protocol::NetworkAccess; use codex_protocol::protocol::RealtimeVoice; use codex_protocol::protocol::SandboxPolicy; +use codex_utils_absolute_path::AbsolutePathBuf; use serde::Deserialize; use tempfile::tempdir; @@ -391,6 +393,50 @@ web_search = false ); } +#[tokio::test] +async fn config_toml_deserializes_custom_models() { + let custom_models = r#" +[[custom_models]] +name = "gpt-5.4 1m" +model = "gpt-5.4" +model_context_window = 1000000 +model_auto_compact_token_limit = 900000 +"#; + let custom_models_cfg = toml::from_str::(custom_models) + .expect("TOML deserialization should succeed for custom models"); + + assert_eq!( + custom_models_cfg.custom_models, + vec![CustomModelToml { + name: "gpt-5.4 1m".to_string(), + model: "gpt-5.4".to_string(), + model_context_window: Some(1_000_000), + model_auto_compact_token_limit: Some(900_000), + }] + ); + + let config = Config::load_from_base_config_with_overrides( + custom_models_cfg, + ConfigOverrides::default(), + AbsolutePathBuf::from_absolute_path_checked(tempdir().expect("tempdir").path()) + .expect("tempdir should be absolute"), + ) + .await + .expect("load config from custom models settings"); + + assert_eq!( + config.custom_models, + HashMap::from([( + "gpt-5.4 1m".to_string(), + CustomModelConfig { + model: "gpt-5.4".to_string(), + model_context_window: Some(1_000_000), + model_auto_compact_token_limit: Some(900_000), + }, + )]) + ); +} + #[test] fn rejects_provider_auth_with_env_key() { let err = toml::from_str::( @@ -2891,6 +2937,30 @@ async fn feature_table_overrides_legacy_flags() -> std::io::Result<()> { Ok(()) } +#[tokio::test] +async fn feature_table_enables_agent_function_call_inbox() -> std::io::Result<()> { + let codex_home = TempDir::new()?; + let mut entries = BTreeMap::new(); + entries.insert("agent_function_call_inbox".to_string(), true); + let cfg = ConfigToml { + features: Some(codex_features::FeaturesToml::from(entries)), + ..Default::default() + }; + + let config = Config::load_from_base_config_with_overrides( + cfg, + ConfigOverrides::default(), + AbsolutePathBuf::from_absolute_path_checked(codex_home.path()) + .expect("tempdir should be absolute"), + ) + .await?; + + assert!(config.features.enabled(Feature::AgentFunctionCallInbox)); + assert!(config.agent_use_function_call_inbox); + + Ok(()) +} + #[tokio::test] async fn legacy_toggles_map_to_features() -> std::io::Result<()> { let codex_home = TempDir::new()?; @@ -4606,12 +4676,16 @@ async fn load_config_rejects_missing_agent_role_config_file() -> std::io::Result max_depth: None, job_max_runtime_seconds: None, interrupt_message: None, + use_function_call_inbox: false, roles: BTreeMap::from([( "researcher".to_string(), AgentRoleToml { description: Some("Research role".to_string()), + model: None, config_file: Some(missing_path.abs()), + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, )]), }), @@ -5553,15 +5627,19 @@ async fn load_config_normalizes_agent_role_nickname_candidates() -> std::io::Res max_depth: None, job_max_runtime_seconds: None, interrupt_message: None, + use_function_call_inbox: false, roles: BTreeMap::from([( "researcher".to_string(), AgentRoleToml { description: Some("Research role".to_string()), + model: None, config_file: None, + watchdog_interval_s: None, nickname_candidates: Some(vec![ " Hypatia ".to_string(), "Noether".to_string(), ]), + fork_context: None, }, )]), }), @@ -5596,12 +5674,16 @@ async fn load_config_rejects_empty_agent_role_nickname_candidates() -> std::io:: max_depth: None, job_max_runtime_seconds: None, interrupt_message: None, + use_function_call_inbox: false, roles: BTreeMap::from([( "researcher".to_string(), AgentRoleToml { description: Some("Research role".to_string()), + model: None, config_file: None, + watchdog_interval_s: None, nickname_candidates: Some(Vec::new()), + fork_context: None, }, )]), }), @@ -5633,12 +5715,16 @@ async fn load_config_rejects_duplicate_agent_role_nickname_candidates() -> std:: max_depth: None, job_max_runtime_seconds: None, interrupt_message: None, + use_function_call_inbox: false, roles: BTreeMap::from([( "researcher".to_string(), AgentRoleToml { description: Some("Research role".to_string()), + model: None, config_file: None, + watchdog_interval_s: None, nickname_candidates: Some(vec!["Hypatia".to_string(), " Hypatia ".to_string()]), + fork_context: None, }, )]), }), @@ -5670,12 +5756,16 @@ async fn load_config_rejects_unsafe_agent_role_nickname_candidates() -> std::io: max_depth: None, job_max_runtime_seconds: None, interrupt_message: None, + use_function_call_inbox: false, roles: BTreeMap::from([( "researcher".to_string(), AgentRoleToml { description: Some("Research role".to_string()), + model: None, config_file: None, + watchdog_interval_s: None, nickname_candidates: Some(vec!["Agent ".to_string()]), + fork_context: None, }, )]), }), @@ -5888,6 +5978,7 @@ async fn test_precedence_fixture_with_o3_profile() -> std::io::Result<()> { review_model: None, model_context_window: None, model_auto_compact_token_limit: None, + custom_models: HashMap::new(), service_tier: None, model_provider_id: "openai".to_string(), model_provider: fixture.openai_provider.clone(), @@ -5923,6 +6014,8 @@ async fn test_precedence_fixture_with_o3_profile() -> std::io::Result<()> { memories: MemoriesConfig::default(), agent_job_max_runtime_seconds: DEFAULT_AGENT_JOB_MAX_RUNTIME_SECONDS, agent_interrupt_message_enabled: true, + agent_use_function_call_inbox: false, + watchdog_interval_s: DEFAULT_WATCHDOG_INTERVAL_S, codex_home: fixture.codex_home(), sqlite_home: fixture.codex_home().to_path_buf(), log_dir: fixture.codex_home().join("log").to_path_buf(), @@ -6082,6 +6175,7 @@ async fn test_precedence_fixture_with_gpt3_profile() -> std::io::Result<()> { review_model: None, model_context_window: None, model_auto_compact_token_limit: None, + custom_models: HashMap::new(), service_tier: None, model_provider_id: "openai-custom".to_string(), model_provider: fixture.openai_custom_provider.clone(), @@ -6117,6 +6211,8 @@ async fn test_precedence_fixture_with_gpt3_profile() -> std::io::Result<()> { memories: MemoriesConfig::default(), agent_job_max_runtime_seconds: DEFAULT_AGENT_JOB_MAX_RUNTIME_SECONDS, agent_interrupt_message_enabled: true, + agent_use_function_call_inbox: false, + watchdog_interval_s: DEFAULT_WATCHDOG_INTERVAL_S, codex_home: fixture.codex_home(), sqlite_home: fixture.codex_home().to_path_buf(), log_dir: fixture.codex_home().join("log").to_path_buf(), @@ -6230,6 +6326,7 @@ async fn test_precedence_fixture_with_zdr_profile() -> std::io::Result<()> { review_model: None, model_context_window: None, model_auto_compact_token_limit: None, + custom_models: HashMap::new(), service_tier: None, model_provider_id: "openai".to_string(), model_provider: fixture.openai_provider.clone(), @@ -6265,6 +6362,8 @@ async fn test_precedence_fixture_with_zdr_profile() -> std::io::Result<()> { memories: MemoriesConfig::default(), agent_job_max_runtime_seconds: DEFAULT_AGENT_JOB_MAX_RUNTIME_SECONDS, agent_interrupt_message_enabled: true, + agent_use_function_call_inbox: false, + watchdog_interval_s: DEFAULT_WATCHDOG_INTERVAL_S, codex_home: fixture.codex_home(), sqlite_home: fixture.codex_home().to_path_buf(), log_dir: fixture.codex_home().join("log").to_path_buf(), @@ -6363,6 +6462,7 @@ async fn test_precedence_fixture_with_gpt5_profile() -> std::io::Result<()> { review_model: None, model_context_window: None, model_auto_compact_token_limit: None, + custom_models: HashMap::new(), service_tier: None, model_provider_id: "openai".to_string(), model_provider: fixture.openai_provider.clone(), @@ -6398,6 +6498,8 @@ async fn test_precedence_fixture_with_gpt5_profile() -> std::io::Result<()> { memories: MemoriesConfig::default(), agent_job_max_runtime_seconds: DEFAULT_AGENT_JOB_MAX_RUNTIME_SECONDS, agent_interrupt_message_enabled: true, + agent_use_function_call_inbox: false, + watchdog_interval_s: DEFAULT_WATCHDOG_INTERVAL_S, codex_home: fixture.codex_home(), sqlite_home: fixture.codex_home().to_path_buf(), log_dir: fixture.codex_home().join("log").to_path_buf(), diff --git a/codex-rs/core/src/config/mod.rs b/codex-rs/core/src/config/mod.rs index fe9a8e4334..c6ae11d935 100644 --- a/codex-rs/core/src/config/mod.rs +++ b/codex-rs/core/src/config/mod.rs @@ -71,6 +71,7 @@ use codex_model_provider_info::ModelProviderInfo; use codex_model_provider_info::OLLAMA_CHAT_PROVIDER_REMOVED_ERROR; use codex_model_provider_info::built_in_model_providers; use codex_model_provider_info::merge_configured_model_providers; +pub use codex_models_manager::CustomModelConfig; use codex_models_manager::ModelsManagerConfig; use codex_protocol::config_types::AltScreenMode; use codex_protocol::config_types::ForcedLoginMethod; @@ -164,6 +165,7 @@ pub(crate) const MAX_MULTI_AGENT_V2_WAIT_TIMEOUT_MS: i64 = 3600 * 1000; pub(crate) const DEFAULT_AGENT_MAX_DEPTH: i32 = 1; pub(crate) const DEFAULT_AGENT_JOB_MAX_RUNTIME_SECONDS: Option = None; const LOCAL_DEV_BUILD_VERSION: &str = "0.0.0"; +pub(crate) const DEFAULT_WATCHDOG_INTERVAL_S: i64 = 10; pub const CONFIG_TOML_FILE: &str = "config.toml"; @@ -553,6 +555,9 @@ pub struct Config { /// Combined provider map (defaults plus user-defined providers). pub model_providers: HashMap, + /// User-defined model aliases shown in the picker. + pub custom_models: HashMap, + /// Maximum number of bytes to include from an AGENTS.md project doc file. pub project_doc_max_bytes: usize, @@ -566,6 +571,13 @@ pub struct Config { pub agent_max_threads: Option, /// Maximum runtime in seconds for agent job workers before they are failed. pub agent_job_max_runtime_seconds: Option, + /// When true, inbound agent messages to non-subagent threads are delivered + /// as a synthetic function_call/function_call_output pair instead of plain + /// user input. + pub agent_use_function_call_inbox: bool, + + /// Watchdog polling interval in seconds. + pub watchdog_interval_s: i64, /// Whether to record a model-visible message when an agent turn is interrupted. pub agent_interrupt_message_enabled: bool, @@ -962,6 +974,7 @@ impl Config { personality_enabled: self.features.enabled(Feature::Personality), model_supports_reasoning_summaries: self.model_supports_reasoning_summaries, model_catalog: self.model_catalog.clone(), + custom_models: self.custom_models.clone(), } } @@ -1413,10 +1426,16 @@ pub struct AgentRoleConfig { /// Human-facing role documentation used in spawn tool guidance. /// Required for loaded user-defined roles after deprecated/new metadata precedence resolves. pub description: Option, + /// Optional model override applied by this role. + pub model: Option, /// Path to a role-specific config layer. pub config_file: Option, + /// Optional watchdog interval for roles that should spawn as idle-time watchdog handles. + pub watchdog_interval_s: Option, /// Candidate nicknames for agents spawned with this role. pub nickname_candidates: Option>, + /// Default fork-context behavior for this role. + pub fork_context: Option, } fn resolve_tool_suggest_config( @@ -2251,6 +2270,24 @@ impl Config { let model_providers = merge_configured_model_providers(built_in_model_providers(openai_base_url), cfg.model_providers) .map_err(|message| std::io::Error::new(std::io::ErrorKind::InvalidData, message))?; + let mut custom_models = HashMap::new(); + for custom in cfg.custom_models { + let alias = custom.name; + if custom_models.contains_key(&alias) { + return Err(std::io::Error::new( + ErrorKind::InvalidInput, + format!("duplicate custom model alias: {alias}"), + )); + } + custom_models.insert( + alias, + CustomModelConfig { + model: custom.model, + model_context_window: custom.model_context_window, + model_auto_compact_token_limit: custom.model_auto_compact_token_limit, + }, + ); + } let model_provider_id = model_provider .or(config_profile.model_provider) @@ -2332,6 +2369,7 @@ impl Config { .as_ref() .and_then(|agents| agents.job_max_runtime_seconds) .or(DEFAULT_AGENT_JOB_MAX_RUNTIME_SECONDS); + let agent_use_function_call_inbox = features.enabled(Feature::AgentFunctionCallInbox); if agent_job_max_runtime_seconds == Some(0) { return Err(std::io::Error::new( std::io::ErrorKind::InvalidInput, @@ -2351,6 +2389,15 @@ impl Config { .as_ref() .and_then(|agents| agents.interrupt_message) .unwrap_or(true); + let watchdog_interval_s = cfg + .watchdog_interval_s + .unwrap_or(DEFAULT_WATCHDOG_INTERVAL_S); + if watchdog_interval_s <= 0 { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "watchdog_interval_s must be at least 1", + )); + } let background_terminal_max_timeout = cfg .background_terminal_max_timeout .unwrap_or(DEFAULT_MAX_BACKGROUND_TERMINAL_TIMEOUT_MS) @@ -2683,6 +2730,7 @@ impl Config { mcp_oauth_callback_port: cfg.mcp_oauth_callback_port, mcp_oauth_callback_url: cfg.mcp_oauth_callback_url.clone(), model_providers, + custom_models, project_doc_max_bytes: cfg.project_doc_max_bytes.unwrap_or(AGENTS_MD_MAX_BYTES), project_doc_fallback_filenames: cfg .project_doc_fallback_filenames @@ -2704,6 +2752,8 @@ impl Config { memories: cfg.memories.unwrap_or_default().into(), agent_job_max_runtime_seconds, agent_interrupt_message_enabled, + agent_use_function_call_inbox, + watchdog_interval_s, codex_home, sqlite_home, log_dir, diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs index bd0cdd885d..cc68d94663 100644 --- a/codex-rs/core/src/lib.rs +++ b/codex-rs/core/src/lib.rs @@ -165,6 +165,7 @@ pub use rollout::append_thread_name; pub use rollout::find_archived_thread_path_by_id_str; #[deprecated(note = "use find_thread_path_by_id_str")] pub use rollout::find_conversation_path_by_id_str; +pub use rollout::find_or_unarchive_thread_path_by_id_str; pub use rollout::find_thread_meta_by_name_str; pub use rollout::find_thread_name_by_id; pub use rollout::find_thread_names_by_ids; @@ -172,7 +173,9 @@ pub use rollout::find_thread_path_by_id_str; pub use rollout::parse_cursor; pub use rollout::read_head_for_summary; pub use rollout::read_session_meta_line; +pub use rollout::resolve_fork_reference_rollout_path; pub use rollout::rollout_date_parts; +pub use thread_rollout_truncation::materialize_rollout_items_for_replay; mod function_tool; mod state; mod tasks; diff --git a/codex-rs/core/src/prompt_debug.rs b/codex-rs/core/src/prompt_debug.rs index 46431c59d8..423e26a29d 100644 --- a/codex-rs/core/src/prompt_debug.rs +++ b/codex-rs/core/src/prompt_debug.rs @@ -41,6 +41,8 @@ pub async fn build_prompt_input( &config, Arc::clone(&auth_manager), SessionSource::Exec, + config.model_catalog.clone(), + config.custom_models.clone(), CollaborationModesConfig { default_mode_request_user_input: config .features diff --git a/codex-rs/core/src/rollout.rs b/codex-rs/core/src/rollout.rs index d4ac5c699a..23d30c3a43 100644 --- a/codex-rs/core/src/rollout.rs +++ b/codex-rs/core/src/rollout.rs @@ -15,6 +15,7 @@ pub use codex_rollout::append_thread_name; pub use codex_rollout::find_archived_thread_path_by_id_str; #[deprecated(note = "use find_thread_path_by_id_str")] pub use codex_rollout::find_conversation_path_by_id_str; +pub use codex_rollout::find_or_unarchive_thread_path_by_id_str; pub use codex_rollout::find_thread_meta_by_name_str; pub use codex_rollout::find_thread_name_by_id; pub use codex_rollout::find_thread_names_by_ids; @@ -22,6 +23,7 @@ pub use codex_rollout::find_thread_path_by_id_str; pub use codex_rollout::parse_cursor; pub use codex_rollout::read_head_for_summary; pub use codex_rollout::read_session_meta_line; +pub use codex_rollout::resolve_fork_reference_rollout_path; pub use codex_rollout::rollout_date_parts; impl codex_rollout::RolloutConfigView for Config { diff --git a/codex-rs/core/src/session/handlers.rs b/codex-rs/core/src/session/handlers.rs index b354861605..72fb1344fe 100644 --- a/codex-rs/core/src/session/handlers.rs +++ b/codex-rs/core/src/session/handlers.rs @@ -977,6 +977,13 @@ pub(super) async fn submission_loop( interrupt(&sess).await; false } + Op::NoteOwnerActivity => { + sess.services + .agent_control + .note_owner_input(sess.conversation_id) + .await; + false + } Op::CleanBackgroundTerminals => { clean_background_terminals(&sess).await; false diff --git a/codex-rs/core/src/session/mod.rs b/codex-rs/core/src/session/mod.rs index 0c5af3fc5e..2e100d2dd0 100644 --- a/codex-rs/core/src/session/mod.rs +++ b/codex-rs/core/src/session/mod.rs @@ -4,7 +4,9 @@ use std::fmt::Debug; use std::path::Path; use std::path::PathBuf; use std::sync::Arc; +use std::sync::atomic::AtomicBool; use std::sync::atomic::AtomicU64; +use std::sync::atomic::Ordering; use std::time::SystemTime; use std::time::UNIX_EPOCH; @@ -147,6 +149,7 @@ use rmcp::model::ReadResourceRequestParams; use rmcp::model::ReadResourceResult; use rmcp::model::RequestId; use serde_json::Value; +use tokio::fs; use tokio::sync::Mutex; use tokio::sync::RwLock; use tokio::sync::oneshot; @@ -283,6 +286,84 @@ use crate::skills_watcher::SkillsWatcherEvent; use crate::state::ActiveTurn; use crate::state::MailboxDeliveryPhase; use crate::state::PendingRequestPermissions; + +const ROOT_AGENT_PROMPT_FALLBACK: &str = include_str!("../../root_agent_prompt.md"); +const ROOT_AGENT_WATCHDOG_PROMPT_FALLBACK: &str = + include_str!("../../root_agent_watchdog_prompt.md"); +const SUBAGENT_PROMPT_FALLBACK: &str = include_str!("../../subagent_prompt.md"); +const SUBAGENT_WATCHDOG_PROMPT_FALLBACK: &str = include_str!("../../subagent_watchdog_prompt.md"); +const WATCHDOG_PROMPT_FALLBACK: &str = include_str!("../../watchdog_agent_prompt.md"); + +async fn load_agent_prompt_fallback( + codex_home: &Path, + fallback: &str, + override_filename: &str, +) -> String { + let override_path = codex_home.join(override_filename); + if let Ok(contents) = fs::read_to_string(&override_path).await + && !contents.trim().is_empty() + { + return contents; + } + + fallback.to_string() +} + +async fn maybe_load_agent_prompt_fragment( + codex_home: &Path, + fallback: &str, + override_filename: &str, + enabled: bool, +) -> Option { + if !enabled { + return None; + } + let fragment = load_agent_prompt_fallback(codex_home, fallback, override_filename).await; + if fragment.trim().is_empty() { + None + } else { + Some(fragment) + } +} + +async fn load_root_agent_prompt(codex_home: &Path, include_watchdog: bool) -> String { + let mut prompt = + load_agent_prompt_fallback(codex_home, ROOT_AGENT_PROMPT_FALLBACK, "AGENTS.root.md").await; + if let Some(fragment) = maybe_load_agent_prompt_fragment( + codex_home, + ROOT_AGENT_WATCHDOG_PROMPT_FALLBACK, + "AGENTS.root.watchdog.md", + include_watchdog, + ) + .await + { + prompt.push_str("\n\n"); + prompt.push_str(fragment.trim()); + } + prompt +} + +pub(crate) async fn load_subagent_prompt(codex_home: &Path, include_watchdog: bool) -> String { + let mut prompt = + load_agent_prompt_fallback(codex_home, SUBAGENT_PROMPT_FALLBACK, "AGENTS.subagent.md") + .await; + if let Some(fragment) = maybe_load_agent_prompt_fragment( + codex_home, + SUBAGENT_WATCHDOG_PROMPT_FALLBACK, + "AGENTS.subagent.watchdog.md", + include_watchdog, + ) + .await + { + prompt.push_str("\n\n"); + prompt.push_str(fragment.trim()); + } + prompt +} + +pub(crate) async fn load_watchdog_prompt(codex_home: &Path) -> String { + load_agent_prompt_fallback(codex_home, WATCHDOG_PROMPT_FALLBACK, "AGENTS.watchdog.md").await +} use crate::state::SessionServices; use crate::state::SessionState; #[cfg(test)] @@ -531,7 +612,6 @@ impl Codex { let model = models_manager .get_default_model(&config.model, refresh_strategy) .await; - // Resolve base instructions for the session. Priority order: // 1. config.base_instructions override // 2. conversation history => session_meta.base_instructions @@ -1031,6 +1111,23 @@ impl Session { self.services.live_thread.as_ref() } + pub(crate) async fn has_active_turn(&self) -> bool { + self.active_turn.lock().await.is_some() + } + + pub(crate) fn snapshot_agent_send_input_on_turn_complete(&self) { + let used_agent_send_input = self + .turn_used_agent_send_input + .swap(false, Ordering::AcqRel); + self.last_completed_turn_used_agent_send_input + .store(used_agent_send_input, Ordering::Release); + } + + pub(crate) fn last_completed_turn_used_agent_send_input(&self) -> bool { + self.last_completed_turn_used_agent_send_input + .load(Ordering::Acquire) + } + /// Flush rollout writes and return the final durability-barrier result. pub(crate) async fn flush_rollout(&self) -> std::io::Result<()> { if let Some(live_thread) = self.live_thread() { @@ -1140,6 +1237,11 @@ impl Session { state.clear_connector_selection(); } + async fn set_connector_selection(&self, connector_ids: HashSet) { + self.clear_connector_selection().await; + self.merge_connector_selection(connector_ids).await; + } + async fn record_initial_history(&self, conversation_history: InitialHistory) { let turn_context = self.new_default_turn().await; let is_subagent = { @@ -1163,8 +1265,19 @@ impl Session { } InitialHistory::Resumed(resumed_history) => { let rollout_items = resumed_history.history; + let hydrated_rollout_items = if rollout_items + .iter() + .any(|item| matches!(item, RolloutItem::ForkReference(_))) + { + self.materialize_rollout_items_for_replay(&rollout_items) + .await + } else { + rollout_items.clone() + }; + let restored_connector_selection = + Self::extract_connector_selection_from_rollout(&hydrated_rollout_items); let previous_turn_settings = self - .apply_rollout_reconstruction(&turn_context, &rollout_items) + .apply_rollout_reconstruction(&turn_context, &hydrated_rollout_items) .await; // If resuming, warn when the last recorded model differs from the current one. @@ -1189,10 +1302,13 @@ impl Session { // Seed usage info from the recorded rollout so UIs can show token counts // immediately on resume/fork. - if let Some(info) = Self::last_token_info_from_rollout(&rollout_items) { + if let Some(info) = Self::last_token_info_from_rollout(&hydrated_rollout_items) { let mut state = self.state.lock().await; state.set_token_info(Some(info)); } + if let Some(selected_connectors) = restored_connector_selection { + self.set_connector_selection(selected_connectors).await; + } // Defer seeding the session's initial context until the first turn starts so // turn/start overrides can be merged before we write to the rollout. @@ -1201,18 +1317,50 @@ impl Session { } } InitialHistory::Forked(rollout_items) => { - self.apply_rollout_reconstruction(&turn_context, &rollout_items) + let persisted_rollout_items = rollout_items + .iter() + .position(|item| matches!(item, RolloutItem::ForkReference(_))) + .map(|index| rollout_items[index..].to_vec()); + let mut hydrated_rollout_items = if rollout_items + .iter() + .any(|item| matches!(item, RolloutItem::ForkReference(_))) + { + self.materialize_rollout_items_for_replay(&rollout_items) + .await + } else { + rollout_items.clone() + }; + // Forked children need a fresh context diff baseline even when their compact + // fork reference is materialized back into full parent history on startup. Keep + // the compact reference for persistence below, but remove it from this hydrated + // in-memory copy so later reconstruction does not re-expand the parent baseline. + hydrated_rollout_items.retain(|item| { + !matches!( + item, + RolloutItem::ForkReference(_) | RolloutItem::TurnContext(_) + ) + }); + let restored_connector_selection = + Self::extract_connector_selection_from_rollout(&hydrated_rollout_items); + + self.apply_rollout_reconstruction(&turn_context, &hydrated_rollout_items) .await; // Seed usage info from the recorded rollout so UIs can show token counts // immediately on resume/fork. - if let Some(info) = Self::last_token_info_from_rollout(&rollout_items) { + if let Some(info) = Self::last_token_info_from_rollout(&hydrated_rollout_items) { let mut state = self.state.lock().await; state.set_token_info(Some(info)); } + if let Some(selected_connectors) = restored_connector_selection { + self.set_connector_selection(selected_connectors).await; + } - // If persisting, persist all rollout items as-is (the store filters). - if !rollout_items.is_empty() { + // Persist only the compact fork reference suffix so child rollouts do not + // duplicate the full parent history they inherited in memory. + if let Some(persisted_rollout_items) = persisted_rollout_items { + self.persist_rollout_items(&persisted_rollout_items).await; + } else if !rollout_items.is_empty() { self.persist_rollout_items(&rollout_items).await; } @@ -1253,6 +1401,41 @@ impl Session { }) } + fn extract_connector_selection_from_rollout( + rollout_items: &[RolloutItem], + ) -> Option> { + let mut active_selected_connectors: Option> = None; + + for item in rollout_items { + let RolloutItem::ResponseItem(response_item) = item else { + continue; + }; + let ResponseItem::FunctionCallOutput { output, .. } = response_item else { + continue; + }; + let Some(content) = output.body.to_text() else { + continue; + }; + let Ok(payload) = serde_json::from_str::(&content) else { + continue; + }; + let Some(selected_connectors) = payload + .get("active_selected_tools") + .and_then(Value::as_array) + else { + continue; + }; + let connector_ids = selected_connectors + .iter() + .filter_map(Value::as_str) + .map(ToOwned::to_owned) + .collect::>(); + active_selected_connectors = Some(connector_ids); + } + + active_selected_connectors + } + async fn previous_turn_settings(&self) -> Option { let state = self.state.lock().await; state.previous_turn_settings() @@ -2393,7 +2576,8 @@ impl Session { turn_context: &Arc, server_model: String, ) -> bool { - let requested_model = turn_context.model_info.slug.clone(); + let requested_model = turn_context.model_info.request_model_slug().to_string(); + let selected_model = turn_context.model_info.slug.clone(); let server_model_normalized = server_model.to_ascii_lowercase(); let requested_model_normalized = requested_model.to_ascii_lowercase(); if server_model_normalized == requested_model_normalized { @@ -2401,7 +2585,9 @@ impl Session { return false; } - warn!("server reported model {server_model} while requested model was {requested_model}"); + warn!( + "server reported model {server_model} while requested model was {requested_model} (selected alias: {selected_model})" + ); let warning_message = format!( "Your account was flagged for potentially high-risk cyber activity and this request was routed to gpt-5.2 as a fallback. To regain access to gpt-5.3-codex, apply for trusted access: {CYBER_VERIFY_URL} or learn more: {CYBER_SAFETY_URL}" @@ -2410,7 +2596,7 @@ impl Session { self.send_event( turn_context, EventMsg::ModelReroute(ModelRerouteEvent { - from_model: requested_model.clone(), + from_model: selected_model, to_model: server_model.clone(), reason: ModelRerouteReason::HighRiskCyberActivity, }), diff --git a/codex-rs/core/src/session/rollout_reconstruction.rs b/codex-rs/core/src/session/rollout_reconstruction.rs index a4c042af0c..34dd23a1ac 100644 --- a/codex-rs/core/src/session/rollout_reconstruction.rs +++ b/codex-rs/core/src/session/rollout_reconstruction.rs @@ -84,11 +84,34 @@ fn finalize_active_segment<'a>( } impl Session { + pub(super) async fn materialize_rollout_items_for_replay( + &self, + rollout_items: &[RolloutItem], + ) -> Vec { + let codex_home = { + self.state + .lock() + .await + .session_configuration + .codex_home + .clone() + }; + crate::rollout::truncation::materialize_rollout_items_for_replay( + codex_home.as_path(), + rollout_items, + ) + .await + } + pub(super) async fn reconstruct_history_from_rollout( &self, turn_context: &TurnContext, rollout_items: &[RolloutItem], ) -> RolloutReconstruction { + let rollout_items = self + .materialize_rollout_items_for_replay(rollout_items) + .await; + let rollout_items = rollout_items.as_slice(); // Replay metadata should already match the shape of the future lazy reverse loader, even // while history materialization still uses an eager bridge. Scan newest-to-oldest, // stopping once a surviving replacement-history checkpoint and the required resume metadata @@ -207,7 +230,9 @@ impl Session { active_segment.get_or_insert_with(ActiveReplaySegment::default); active_segment.counts_as_user_turn |= is_user_turn_boundary(response_item); } - RolloutItem::EventMsg(_) | RolloutItem::SessionMeta(_) => {} + RolloutItem::EventMsg(_) + | RolloutItem::SessionMeta(_) + | RolloutItem::ForkReference(_) => {} } if base_replacement_history.is_some() @@ -275,6 +300,7 @@ impl Session { history.drop_last_n_user_turns(rollback.num_turns); } RolloutItem::EventMsg(_) + | RolloutItem::ForkReference(_) | RolloutItem::TurnContext(_) | RolloutItem::SessionMeta(_) => {} } diff --git a/codex-rs/core/src/session/rollout_reconstruction_tests.rs b/codex-rs/core/src/session/rollout_reconstruction_tests.rs index 5cfcc38053..1672358f84 100644 --- a/codex-rs/core/src/session/rollout_reconstruction_tests.rs +++ b/codex-rs/core/src/session/rollout_reconstruction_tests.rs @@ -6,11 +6,19 @@ use codex_protocol::ThreadId; use codex_protocol::models::ContentItem; use codex_protocol::models::ResponseItem; use codex_protocol::protocol::CompactedItem; +use codex_protocol::protocol::ForkReferenceItem; use codex_protocol::protocol::InitialHistory; use codex_protocol::protocol::InterAgentCommunication; use codex_protocol::protocol::ResumedHistory; +use codex_protocol::protocol::RolloutItem; +use codex_protocol::protocol::RolloutLine; +use codex_protocol::protocol::SessionMeta; +use codex_protocol::protocol::SessionMetaLine; +use codex_protocol::protocol::SessionSource; use pretty_assertions::assert_eq; +use std::path::Path; use std::path::PathBuf; +use tempfile::TempDir; fn user_message(text: &str) -> ResponseItem { ResponseItem::Message { @@ -52,6 +60,53 @@ fn inter_agent_assistant_message(text: &str) -> ResponseItem { } } +fn write_rollout_items( + root: &Path, + thread_id: ThreadId, + items: &[RolloutItem], +) -> std::io::Result { + let rollout_dir = root + .join(crate::SESSIONS_SUBDIR) + .join("2026") + .join("03") + .join("05"); + std::fs::create_dir_all(&rollout_dir)?; + let rollout_path = rollout_dir.join(format!("rollout-2026-03-05T00-00-00-{thread_id}.jsonl")); + let session_meta_line = RolloutLine { + timestamp: "2026-03-05T00:00:00Z".to_string(), + item: RolloutItem::SessionMeta(SessionMetaLine { + meta: SessionMeta { + id: thread_id, + timestamp: "2026-03-05T00:00:00Z".to_string(), + cwd: root.to_path_buf(), + originator: "codex".to_string(), + cli_version: "test".to_string(), + source: SessionSource::Exec, + agent_nickname: None, + agent_role: None, + agent_path: None, + model_provider: Some("openai".to_string()), + base_instructions: None, + dynamic_tools: None, + memory_mode: None, + forked_from_id: None, + }, + git: None, + }), + }; + let mut text = format!("{}\n", serde_json::to_string(&session_meta_line).unwrap()); + for item in items { + let line = RolloutLine { + timestamp: "2026-03-05T00:00:01Z".to_string(), + item: item.clone(), + }; + text.push_str(&serde_json::to_string(&line).unwrap()); + text.push('\n'); + } + std::fs::write(&rollout_path, text)?; + Ok(rollout_path) +} + #[tokio::test] async fn record_initial_history_resumed_bare_turn_context_does_not_hydrate_previous_turn_settings() { @@ -93,6 +148,124 @@ async fn record_initial_history_resumed_bare_turn_context_does_not_hydrate_previ assert!(session.reference_context_item().await.is_none()); } +#[tokio::test] +async fn reconstruct_history_materializes_fork_reference_rollout_items() { + let (session, turn_context) = make_session_and_context().await; + let dir = TempDir::new().expect("create temp dir"); + let parent_thread_id = ThreadId::new(); + let parent_rollout_path = write_rollout_items( + dir.path(), + parent_thread_id, + &[ + RolloutItem::ResponseItem(user_message("first user")), + RolloutItem::ResponseItem(assistant_message("first reply")), + RolloutItem::ResponseItem(user_message("second user")), + RolloutItem::ResponseItem(assistant_message("second reply")), + ], + ) + .expect("write parent rollout"); + let rollout_items = vec![RolloutItem::ForkReference(ForkReferenceItem { + rollout_path: parent_rollout_path, + nth_user_message: 1, + })]; + + let reconstructed = session + .reconstruct_history_from_rollout(&turn_context, &rollout_items) + .await; + + assert_eq!( + reconstructed.history, + vec![user_message("first user"), assistant_message("first reply")] + ); +} + +#[tokio::test] +async fn record_initial_history_forked_materializes_fork_reference_rollout_items() { + let (session, turn_context) = make_session_and_context().await; + let codex_home = turn_context.config.codex_home.clone(); + let parent_thread_id = ThreadId::new(); + let parent_rollout_path = write_rollout_items( + codex_home.as_path(), + parent_thread_id, + &[ + RolloutItem::ResponseItem(user_message("first user")), + RolloutItem::ResponseItem(assistant_message("first reply")), + RolloutItem::ResponseItem(user_message("second user")), + RolloutItem::ResponseItem(assistant_message("second reply")), + ], + ) + .expect("write parent rollout"); + let rollout_items = vec![RolloutItem::ForkReference(ForkReferenceItem { + rollout_path: parent_rollout_path, + nth_user_message: 1, + })]; + + session + .record_initial_history(InitialHistory::Forked(rollout_items)) + .await; + + let expected = vec![user_message("first user"), assistant_message("first reply")]; + + let history = session.state.lock().await.clone_history(); + assert_eq!(expected, history.raw_items()); + assert!(session.reference_context_item().await.is_none()); +} + +#[tokio::test] +async fn reconstruct_history_resolves_fork_reference_after_parent_archive_and_unarchive() { + let (session, turn_context) = make_session_and_context().await; + let codex_home = turn_context.config.codex_home.clone(); + let parent_thread_id = ThreadId::new(); + let parent_rollout_path = write_rollout_items( + codex_home.as_path(), + parent_thread_id, + &[ + RolloutItem::ResponseItem(user_message("first user")), + RolloutItem::ResponseItem(assistant_message("first reply")), + RolloutItem::ResponseItem(user_message("second user")), + RolloutItem::ResponseItem(assistant_message("second reply")), + ], + ) + .expect("write parent rollout"); + let rollout_items = vec![RolloutItem::ForkReference(ForkReferenceItem { + rollout_path: parent_rollout_path.clone(), + nth_user_message: 1, + })]; + let expected_history = vec![user_message("first user"), assistant_message("first reply")]; + + let archived_rollout_dir = codex_home + .join(crate::ARCHIVED_SESSIONS_SUBDIR) + .join("2026") + .join("03") + .join("05"); + std::fs::create_dir_all(&archived_rollout_dir).expect("create archived rollout dir"); + let archived_rollout_path = archived_rollout_dir.join( + parent_rollout_path + .file_name() + .expect("parent rollout file name"), + ); + std::fs::rename(&parent_rollout_path, &archived_rollout_path).expect("archive parent rollout"); + + let reconstructed = session + .reconstruct_history_from_rollout(&turn_context, &rollout_items) + .await; + assert_eq!(reconstructed.history, expected_history); + + let unarchived_rollout_dir = codex_home + .join(crate::SESSIONS_SUBDIR) + .join("2026") + .join("03") + .join("05"); + std::fs::create_dir_all(&unarchived_rollout_dir).expect("create unarchived rollout dir"); + std::fs::rename(&archived_rollout_path, &parent_rollout_path) + .expect("unarchive parent rollout"); + + let reconstructed = session + .reconstruct_history_from_rollout(&turn_context, &rollout_items) + .await; + assert_eq!(reconstructed.history, expected_history); +} + #[tokio::test] async fn record_initial_history_resumed_hydrates_previous_turn_settings_from_lifecycle_turn_with_missing_turn_context_id() { diff --git a/codex-rs/core/src/session/session.rs b/codex-rs/core/src/session/session.rs index 03849f7e07..dd77533a9c 100644 --- a/codex-rs/core/src/session/session.rs +++ b/codex-rs/core/src/session/session.rs @@ -29,6 +29,8 @@ pub(crate) struct Session { pub(crate) guardian_review_session: GuardianReviewSessionManager, pub(crate) services: SessionServices, pub(super) next_internal_sub_id: AtomicU64, + pub(super) turn_used_agent_send_input: AtomicBool, + pub(super) last_completed_turn_used_agent_send_input: AtomicBool, } #[derive(Clone)] @@ -124,13 +126,23 @@ impl SessionConfiguration { model: self.collaboration_mode.model().to_string(), model_provider_id: self.original_config_do_not_use.model_provider_id.clone(), service_tier: self.service_tier, + plan_mode_reasoning_effort: self.original_config_do_not_use.plan_mode_reasoning_effort, + model_verbosity: self.original_config_do_not_use.model_verbosity, + model_context_window: self.original_config_do_not_use.model_context_window, + model_auto_compact_token_limit: self + .original_config_do_not_use + .model_auto_compact_token_limit, approval_policy: self.approval_policy.value(), approvals_reviewer: self.approvals_reviewer, permission_profile: self.permission_profile(), cwd: self.cwd.clone(), ephemeral: self.original_config_do_not_use.ephemeral, + agent_use_function_call_inbox: self + .original_config_do_not_use + .agent_use_function_call_inbox, reasoning_effort: self.collaboration_mode.reasoning_effort(), personality: self.personality, + active_profile: self.original_config_do_not_use.active_profile.clone(), session_source: self.session_source.clone(), } } @@ -867,6 +879,8 @@ impl Session { guardian_review_session: GuardianReviewSessionManager::default(), services, next_internal_sub_id: AtomicU64::new(0), + turn_used_agent_send_input: AtomicBool::new(false), + last_completed_turn_used_agent_send_input: AtomicBool::new(false), }); if let Some(network_policy_decider_session) = network_policy_decider_session { let mut guard = network_policy_decider_session.write().await; diff --git a/codex-rs/core/src/session/tests.rs b/codex-rs/core/src/session/tests.rs index e7484e43b7..69cff9072f 100644 --- a/codex-rs/core/src/session/tests.rs +++ b/codex-rs/core/src/session/tests.rs @@ -143,6 +143,7 @@ use opentelemetry_sdk::metrics::data::AggregatedMetrics; use opentelemetry_sdk::metrics::data::Metric; use opentelemetry_sdk::metrics::data::MetricData; use opentelemetry_sdk::metrics::data::ResourceMetrics; +use std::collections::HashMap; use std::path::Path; use std::time::Duration; use tokio::sync::Semaphore; @@ -3107,8 +3108,11 @@ enabled = false "custom".to_string(), crate::config::AgentRoleConfig { description: None, + model: None, config_file: Some(role_path.to_path_buf()), + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, ); crate::agent::role::apply_role_to_config(&mut child_config, Some("custom")) @@ -3568,6 +3572,8 @@ pub(crate) async fn make_session_and_context() -> (Session, TurnContext) { goal_runtime: crate::goals::GoalRuntimeState::new(), guardian_review_session: crate::guardian::GuardianReviewSessionManager::default(), services, + turn_used_agent_send_input: std::sync::atomic::AtomicBool::new(false), + last_completed_turn_used_agent_send_input: std::sync::atomic::AtomicBool::new(false), next_internal_sub_id: AtomicU64::new(0), }; @@ -4994,6 +5000,8 @@ where goal_runtime: crate::goals::GoalRuntimeState::new(), guardian_review_session: crate::guardian::GuardianReviewSessionManager::default(), services, + turn_used_agent_send_input: std::sync::atomic::AtomicBool::new(false), + last_completed_turn_used_agent_send_input: std::sync::atomic::AtomicBool::new(false), next_internal_sub_id: AtomicU64::new(0), }); @@ -8237,3 +8245,27 @@ async fn session_start_hooks_require_project_trust_without_config_toml() -> std: Ok(()) } + +#[tokio::test] +async fn root_agent_prompt_only_includes_watchdog_fragment_when_enabled() { + let codex_home = tempfile::tempdir().expect("create temp dir"); + + let without_watchdog = + load_root_agent_prompt(codex_home.path(), /*include_watchdog*/ false).await; + assert!(!without_watchdog.contains("## Watchdogs")); + + let with_watchdog = load_root_agent_prompt(codex_home.path(), /*include_watchdog*/ true).await; + assert!(with_watchdog.contains("## Watchdogs")); +} + +#[tokio::test] +async fn subagent_prompt_only_includes_watchdog_fragment_when_enabled() { + let codex_home = tempfile::tempdir().expect("create temp dir"); + + let without_watchdog = + load_subagent_prompt(codex_home.path(), /*include_watchdog*/ false).await; + assert!(!without_watchdog.contains("## Watchdog-only Guidance")); + + let with_watchdog = load_subagent_prompt(codex_home.path(), /*include_watchdog*/ true).await; + assert!(with_watchdog.contains("## Watchdog-only Guidance")); +} diff --git a/codex-rs/core/src/stream_events_utils.rs b/codex-rs/core/src/stream_events_utils.rs index 5a31d18020..e41b14af84 100644 --- a/codex-rs/core/src/stream_events_utils.rs +++ b/codex-rs/core/src/stream_events_utils.rs @@ -467,6 +467,17 @@ fn completed_item_defers_mailbox_delivery_to_next_turn( pub(crate) fn response_input_to_response_item(input: &ResponseInputItem) -> Option { match input { + ResponseInputItem::FunctionCall { + name, + arguments, + call_id, + } => Some(ResponseItem::FunctionCall { + id: None, + name: name.clone(), + namespace: None, + arguments: arguments.clone(), + call_id: call_id.clone(), + }), ResponseInputItem::FunctionCallOutput { call_id, output } => { Some(ResponseItem::FunctionCallOutput { call_id: call_id.clone(), diff --git a/codex-rs/core/src/tasks/compact.rs b/codex-rs/core/src/tasks/compact.rs index 86b2e24c7b..274ee181f3 100644 --- a/codex-rs/core/src/tasks/compact.rs +++ b/codex-rs/core/src/tasks/compact.rs @@ -42,6 +42,11 @@ impl SessionTask for CompactTask { ); crate::compact::run_compact_task(session.clone(), ctx, input).await }; + session + .services + .agent_control + .finish_watchdog_parent_compaction(session.conversation_id) + .await; None } } diff --git a/codex-rs/core/src/tasks/mod.rs b/codex-rs/core/src/tasks/mod.rs index 91078c50ce..4cb9e54fb3 100644 --- a/codex-rs/core/src/tasks/mod.rs +++ b/codex-rs/core/src/tasks/mod.rs @@ -561,6 +561,7 @@ impl Session { turn_context .turn_metadata_state .cancel_git_enrichment_task(); + self.snapshot_agent_send_input_on_turn_complete(); let mut pending_input = Vec::::new(); let mut should_clear_active_turn = false; diff --git a/codex-rs/core/src/test_support.rs b/codex-rs/core/src/test_support.rs index 6c2f29d9c0..68bc6d4ce2 100644 --- a/codex-rs/core/src/test_support.rs +++ b/codex-rs/core/src/test_support.rs @@ -114,6 +114,7 @@ pub fn models_manager_with_provider( codex_home, /*config_model_catalog*/ None, Default::default(), + Default::default(), ) } diff --git a/codex-rs/core/src/thread_manager.rs b/codex-rs/core/src/thread_manager.rs index 2a83eb4511..64c97a005c 100644 --- a/codex-rs/core/src/thread_manager.rs +++ b/codex-rs/core/src/thread_manager.rs @@ -2,6 +2,7 @@ use crate::SkillsManager; use crate::agent::AgentControl; use crate::codex_thread::CodexThread; use crate::config::Config; +use crate::config::CustomModelConfig; use crate::config::ThreadStoreConfig; use crate::environment_selection::default_thread_environment_selections; use crate::environment_selection::selected_primary_environment; @@ -38,8 +39,10 @@ use codex_protocol::error::Result as CodexResult; #[cfg(test)] use codex_protocol::models::ResponseItem; use codex_protocol::openai_models::ModelPreset; +use codex_protocol::openai_models::ModelsResponse; use codex_protocol::protocol::Event; use codex_protocol::protocol::EventMsg; +use codex_protocol::protocol::ForkReferenceItem; use codex_protocol::protocol::InitialHistory; use codex_protocol::protocol::McpServerRefreshConfig; use codex_protocol::protocol::Op; @@ -63,6 +66,7 @@ use futures::StreamExt; use futures::stream::FuturesUnordered; use std::collections::HashMap; use std::collections::HashSet; +use std::path::Path; use std::path::PathBuf; use std::sync::Arc; use std::sync::atomic::AtomicBool; @@ -255,12 +259,15 @@ pub(crate) struct ThreadManagerState { pub fn build_models_manager( config: &Config, auth_manager: Arc, + model_catalog: Option, + custom_models: HashMap, collaboration_modes_config: CollaborationModesConfig, ) -> SharedModelsManager { let provider = create_model_provider(config.model_provider.clone(), Some(auth_manager)); provider.models_manager( config.codex_home.to_path_buf(), - config.model_catalog.clone(), + model_catalog, + custom_models, collaboration_modes_config, ) } @@ -281,6 +288,8 @@ impl ThreadManager { config: &Config, auth_manager: Arc, session_source: SessionSource, + model_catalog: Option, + custom_models: HashMap, collaboration_modes_config: CollaborationModesConfig, environment_manager: Arc, analytics_events_client: Option, @@ -306,6 +315,8 @@ impl ThreadManager { models_manager: build_models_manager( config, auth_manager.clone(), + model_catalog, + custom_models, collaboration_modes_config, ), environment_manager, @@ -381,6 +392,7 @@ impl ThreadManager { .models_manager( codex_home, /*config_model_catalog*/ None, + HashMap::new(), CollaborationModesConfig::default(), ), environment_manager, @@ -777,12 +789,16 @@ impl ThreadManager { S: Into, { let snapshot = snapshot.into(); - let history = RolloutRecorder::get_rollout_history(&path).await?; - self.fork_thread_from_history( + // True forks must discard the source rollout's conversation id so the child gets a + // distinct thread id and preserves `forked_from_id` in its SessionMeta. Using the + // resume loader here silently turns a fork into an in-place resume. + let history = RolloutRecorder::get_fork_history(&path).await?; + self.fork_thread_with_initial_history( snapshot, config, thread_store, history, + Some(path), persist_extended_history, parent_trace, ) @@ -802,11 +818,16 @@ impl ThreadManager { where S: Into, { + let source_rollout_path = match &history { + InitialHistory::Resumed(resumed) => resumed.rollout_path.clone(), + InitialHistory::New | InitialHistory::Cleared | InitialHistory::Forked(_) => None, + }; self.fork_thread_with_initial_history( snapshot.into(), config, thread_store, history, + source_rollout_path, persist_extended_history, parent_trace, ) @@ -819,11 +840,19 @@ impl ThreadManager { config: Config, thread_store: Arc, history: InitialHistory, + source_rollout_path: Option, persist_extended_history: bool, parent_trace: Option, ) -> CodexResult { let interrupted_marker = InterruptedTurnHistoryMarker::from_config(&config); - let history = fork_history_from_snapshot(snapshot, history, interrupted_marker); + let history = fork_history_from_snapshot( + snapshot, + config.codex_home.as_path(), + history, + source_rollout_path, + interrupted_marker, + ) + .await; let environments = default_thread_environment_selections( self.state.environment_manager.as_ref(), &config.cwd, @@ -1218,14 +1247,23 @@ impl ThreadManagerState { /// when the source thread is currently mid-turn they fall back to cutting /// before the active turn's opening boundary so the fork omits the unfinished /// suffix entirely. -fn truncate_before_nth_user_message( +async fn truncate_before_nth_user_message( + codex_home: &Path, history: InitialHistory, - n: usize, + n: i64, snapshot_state: &SnapshotTurnState, ) -> InitialHistory { - let items: Vec = history.get_rollout_items(); + let mut items: Vec = history.get_rollout_items(); + if items + .iter() + .any(|item| matches!(item, RolloutItem::ForkReference(_))) + { + items = truncation::materialize_rollout_items_for_replay(codex_home, &items).await; + } let user_positions = truncation::user_message_positions_in_rollout(&items); - let rolled = if snapshot_state.ends_mid_turn && n >= user_positions.len() { + let rolled = if snapshot_state.ends_mid_turn + && usize::try_from(n).map_or(true, |n| n >= user_positions.len()) + { if let Some(cut_idx) = snapshot_state .active_turn_start_index .or_else(|| user_positions.last().copied()) @@ -1305,15 +1343,23 @@ fn snapshot_turn_state(history: &InitialHistory) -> SnapshotTurnState { } } -fn fork_history_from_snapshot( +async fn fork_history_from_snapshot( snapshot: ForkSnapshot, + codex_home: &Path, history: InitialHistory, + source_rollout_path: Option, interrupted_marker: InterruptedTurnHistoryMarker, ) -> InitialHistory { let snapshot_state = snapshot_turn_state(&history); - match snapshot { + let mut history = match snapshot { ForkSnapshot::TruncateBeforeNthUserMessage(nth_user_message) => { - truncate_before_nth_user_message(history, nth_user_message, &snapshot_state) + truncate_before_nth_user_message( + codex_home, + history, + i64::try_from(nth_user_message).unwrap_or(i64::MAX), + &snapshot_state, + ) + .await } ForkSnapshot::Interrupted => { let history = match history { @@ -1332,7 +1378,35 @@ fn fork_history_from_snapshot( history } } + }; + if let ( + ForkSnapshot::TruncateBeforeNthUserMessage(nth_user_message), + Some(rollout_path), + InitialHistory::Forked(items), + ) = (snapshot, source_rollout_path, &mut history) + { + let source_session_meta = items.iter().find_map(|item| match item { + RolloutItem::SessionMeta(meta_line) => Some(meta_line.clone()), + RolloutItem::ForkReference(_) + | RolloutItem::ResponseItem(_) + | RolloutItem::Compacted(_) + | RolloutItem::TurnContext(_) + | RolloutItem::EventMsg(_) => None, + }); + // Keep the source SessionMeta in memory so startup can derive `forked_from_id` + // while still persisting only the compact ForkReference suffix on disk. + *items = source_session_meta + .into_iter() + .map(RolloutItem::SessionMeta) + .chain(std::iter::once(RolloutItem::ForkReference( + ForkReferenceItem { + rollout_path, + nth_user_message: i64::try_from(nth_user_message).unwrap_or(i64::MAX), + }, + ))) + .collect(); } + history } /// Append the same persisted interrupt boundary used by the live interrupt path @@ -1379,3 +1453,150 @@ fn append_interrupted_boundary( #[cfg(test)] #[path = "thread_manager_tests.rs"] mod tests; +// Keep this inline fork-reference test module disabled on the refreshed main API; +// branch coverage now comes from the package/integration tests that match current types. +#[cfg(any())] +mod fork_reference_tests { + use super::*; + use crate::session::tests::make_session_and_context; + use codex_protocol::models::ContentItem; + use codex_protocol::models::ReasoningItemReasoningSummary; + use codex_protocol::models::ResponseItem; + use pretty_assertions::assert_eq; + + fn user_msg(text: &str) -> ResponseItem { + ResponseItem::Message { + id: None, + role: "user".to_string(), + content: vec![ContentItem::OutputText { + text: text.to_string(), + }], + end_turn: None, + phase: None, + } + } + fn assistant_msg(text: &str) -> ResponseItem { + ResponseItem::Message { + id: None, + role: "assistant".to_string(), + content: vec![ContentItem::OutputText { + text: text.to_string(), + }], + end_turn: None, + phase: None, + } + } + + #[tokio::test] + async fn drops_from_last_user_only() { + let items = [ + user_msg("u1"), + assistant_msg("a1"), + assistant_msg("a2"), + user_msg("u2"), + assistant_msg("a3"), + ResponseItem::Reasoning { + id: "r1".to_string(), + summary: vec![ReasoningItemReasoningSummary::SummaryText { + text: "s".to_string(), + }], + content: None, + encrypted_content: None, + }, + ResponseItem::FunctionCall { + id: None, + call_id: "c1".to_string(), + name: "tool".to_string(), + namespace: None, + arguments: "{}".to_string(), + }, + assistant_msg("a4"), + ]; + + let initial: Vec = items + .iter() + .cloned() + .map(RolloutItem::ResponseItem) + .collect(); + let truncated = truncate_before_nth_user_message( + Path::new("/tmp"), + InitialHistory::Forked(initial), + 1, + &SnapshotTurnState { + ends_mid_turn: false, + active_turn_id: None, + active_turn_start_index: None, + }, + ) + .await; + let got_items = truncated.get_rollout_items(); + let expected_items = vec![ + RolloutItem::ResponseItem(items[0].clone()), + RolloutItem::ResponseItem(items[1].clone()), + RolloutItem::ResponseItem(items[2].clone()), + ]; + assert_eq!( + serde_json::to_value(&got_items).unwrap(), + serde_json::to_value(&expected_items).unwrap() + ); + + let initial2: Vec = items + .iter() + .cloned() + .map(RolloutItem::ResponseItem) + .collect(); + let truncated2 = truncate_before_nth_user_message( + Path::new("/tmp"), + InitialHistory::Forked(initial2), + 2, + &SnapshotTurnState { + ends_mid_turn: false, + active_turn_id: None, + active_turn_start_index: None, + }, + ) + .await; + assert!(matches!(truncated2, InitialHistory::New)); + } + + #[tokio::test] + async fn ignores_session_prefix_messages_when_truncating() { + let (session, turn_context) = make_session_and_context().await; + let mut items = session.build_initial_context(&turn_context).await; + items.push(user_msg("feature request")); + items.push(assistant_msg("ack")); + items.push(user_msg("second question")); + items.push(assistant_msg("answer")); + + let rollout_items: Vec = items + .iter() + .cloned() + .map(RolloutItem::ResponseItem) + .collect(); + + let truncated = truncate_before_nth_user_message( + Path::new("/tmp"), + InitialHistory::Forked(rollout_items), + 1, + &SnapshotTurnState { + ends_mid_turn: false, + active_turn_id: None, + active_turn_start_index: None, + }, + ) + .await; + let got_items = truncated.get_rollout_items(); + + let expected: Vec = vec![ + RolloutItem::ResponseItem(items[0].clone()), + RolloutItem::ResponseItem(items[1].clone()), + RolloutItem::ResponseItem(items[2].clone()), + RolloutItem::ResponseItem(items[3].clone()), + ]; + + assert_eq!( + serde_json::to_value(&got_items).unwrap(), + serde_json::to_value(&expected).unwrap() + ); + } +} diff --git a/codex-rs/core/src/thread_manager_tests.rs b/codex-rs/core/src/thread_manager_tests.rs index 348edaa221..5b90d803f7 100644 --- a/codex-rs/core/src/thread_manager_tests.rs +++ b/codex-rs/core/src/thread_manager_tests.rs @@ -22,6 +22,8 @@ use core_test_support::PathBufExt; use core_test_support::PathExt; use core_test_support::responses::mount_models_once; use pretty_assertions::assert_eq; +use std::collections::HashMap; +use std::path::Path; use std::time::Duration; use tempfile::tempdir; use wiremock::MockServer; @@ -57,8 +59,8 @@ fn developer_interrupted_marker() -> ResponseItem { .expect("developer interrupted marker should be enabled") } -#[test] -fn truncates_before_requested_user_message() { +#[tokio::test] +async fn truncates_before_requested_user_message() { let items = [ user_msg("u1"), assistant_msg("a1"), @@ -89,6 +91,7 @@ fn truncates_before_requested_user_message() { .map(RolloutItem::ResponseItem) .collect(); let truncated = truncate_before_nth_user_message( + Path::new("/tmp"), InitialHistory::Forked(initial), /*n*/ 1, &SnapshotTurnState { @@ -96,7 +99,8 @@ fn truncates_before_requested_user_message() { active_turn_id: None, active_turn_start_index: None, }, - ); + ) + .await; let got_items = truncated.get_rollout_items(); let expected_items = vec![ RolloutItem::ResponseItem(items[0].clone()), @@ -114,6 +118,7 @@ fn truncates_before_requested_user_message() { .map(RolloutItem::ResponseItem) .collect(); let truncated2 = truncate_before_nth_user_message( + Path::new("/tmp"), InitialHistory::Forked(initial2.clone()), /*n*/ 2, &SnapshotTurnState { @@ -121,15 +126,16 @@ fn truncates_before_requested_user_message() { active_turn_id: None, active_turn_start_index: None, }, - ); + ) + .await; assert_eq!( serde_json::to_value(truncated2.get_rollout_items()).unwrap(), serde_json::to_value(initial2).unwrap() ); } -#[test] -fn out_of_range_truncation_drops_only_unfinished_suffix_mid_turn() { +#[tokio::test] +async fn out_of_range_truncation_drops_only_unfinished_suffix_mid_turn() { let items = vec![ RolloutItem::ResponseItem(user_msg("u1")), RolloutItem::ResponseItem(assistant_msg("a1")), @@ -138,14 +144,16 @@ fn out_of_range_truncation_drops_only_unfinished_suffix_mid_turn() { ]; let truncated = truncate_before_nth_user_message( + Path::new("/tmp"), InitialHistory::Forked(items.clone()), - usize::MAX, + /*n*/ -1, &SnapshotTurnState { ends_mid_turn: true, active_turn_id: None, active_turn_start_index: None, }, - ); + ) + .await; assert_eq!( serde_json::to_value(truncated.get_rollout_items()).unwrap(), @@ -173,8 +181,8 @@ fn fork_thread_accepts_legacy_usize_snapshot_argument() { let _: fn(&ThreadManager, Config, std::path::PathBuf) = assert_legacy_snapshot_callsite; } -#[test] -fn out_of_range_truncation_drops_pre_user_active_turn_prefix() { +#[tokio::test] +async fn out_of_range_truncation_drops_pre_user_active_turn_prefix() { let items = vec![ RolloutItem::ResponseItem(user_msg("u1")), RolloutItem::ResponseItem(assistant_msg("a1")), @@ -199,10 +207,12 @@ fn out_of_range_truncation_drops_pre_user_active_turn_prefix() { ); let truncated = truncate_before_nth_user_message( + Path::new("/tmp"), InitialHistory::Forked(items.clone()), - usize::MAX, + /*n*/ -1, &snapshot_state, - ); + ) + .await; assert_eq!( serde_json::to_value(truncated.get_rollout_items()).unwrap(), @@ -226,6 +236,7 @@ async fn ignores_session_prefix_messages_when_truncating() { .collect(); let truncated = truncate_before_nth_user_message( + Path::new("/tmp"), InitialHistory::Forked(rollout_items), /*n*/ 1, &SnapshotTurnState { @@ -233,7 +244,8 @@ async fn ignores_session_prefix_messages_when_truncating() { active_turn_id: None, active_turn_start_index: None, }, - ); + ) + .await; let got_items = truncated.get_rollout_items(); let expected: Vec = vec![ @@ -392,6 +404,8 @@ async fn resume_and_fork_do_not_restore_thread_environments_from_rollout() { &config, auth_manager.clone(), SessionSource::Exec, + config.model_catalog.clone(), + config.custom_models.clone(), CollaborationModesConfig::default(), Arc::new(codex_exec_server::EnvironmentManager::default_for_tests()), /*analytics_events_client*/ None, @@ -499,6 +513,8 @@ async fn new_uses_active_provider_for_model_refresh() { &config, auth_manager, SessionSource::Exec, + /*model_catalog*/ None, + HashMap::new(), CollaborationModesConfig::default(), Arc::new(codex_exec_server::EnvironmentManager::default_for_tests()), /*analytics_events_client*/ None, @@ -710,6 +726,8 @@ async fn interrupted_fork_snapshot_does_not_synthesize_turn_id_for_legacy_histor &config, auth_manager.clone(), SessionSource::Exec, + config.model_catalog.clone(), + config.custom_models.clone(), CollaborationModesConfig::default(), Arc::new(codex_exec_server::EnvironmentManager::default_for_tests()), /*analytics_events_client*/ None, @@ -814,6 +832,8 @@ async fn interrupted_fork_snapshot_preserves_explicit_turn_id() { &config, auth_manager.clone(), SessionSource::Exec, + config.model_catalog.clone(), + config.custom_models.clone(), CollaborationModesConfig::default(), Arc::new(codex_exec_server::EnvironmentManager::default_for_tests()), /*analytics_events_client*/ None, @@ -907,6 +927,8 @@ async fn interrupted_fork_snapshot_uses_persisted_mid_turn_history_without_live_ &config, auth_manager.clone(), SessionSource::Exec, + config.model_catalog.clone(), + config.custom_models.clone(), CollaborationModesConfig::default(), Arc::new(codex_exec_server::EnvironmentManager::default_for_tests()), /*analytics_events_client*/ None, @@ -1046,6 +1068,8 @@ async fn resumed_thread_activates_paused_goal_and_continues_on_request() -> anyh &config, auth_manager.clone(), SessionSource::Exec, + config.model_catalog.clone(), + config.custom_models.clone(), CollaborationModesConfig::default(), Arc::new(codex_exec_server::EnvironmentManager::default_for_tests()), /*analytics_events_client*/ None, diff --git a/codex-rs/core/src/thread_rollout_truncation.rs b/codex-rs/core/src/thread_rollout_truncation.rs index e20ee53d47..d2c8ae0072 100644 --- a/codex-rs/core/src/thread_rollout_truncation.rs +++ b/codex-rs/core/src/thread_rollout_truncation.rs @@ -5,12 +5,18 @@ use crate::context_manager::is_user_turn_boundary; use crate::event_mapping; +use crate::resolve_fork_reference_rollout_path; +use crate::rollout::RolloutRecorder; use codex_protocol::items::TurnItem; use codex_protocol::models::ResponseItem; use codex_protocol::protocol::EventMsg; use codex_protocol::protocol::InitialHistory; use codex_protocol::protocol::InterAgentCommunication; use codex_protocol::protocol::RolloutItem; +use std::path::Path; +use tracing::warn; + +const MAX_FORK_REFERENCE_DEPTH: usize = 8; pub(crate) fn initial_history_has_prior_user_turns(conversation_history: &InitialHistory) -> bool { conversation_history.scan_rollout_items(rollout_item_is_user_turn_boundary) @@ -100,21 +106,30 @@ pub(crate) fn fork_turn_positions_in_rollout(items: &[RolloutItem]) -> Vec i64 { + i64::try_from(user_message_positions_in_rollout(items).len()).unwrap_or(i64::MAX) +} + /// Return a prefix of `items` obtained by cutting strictly before the nth user message. /// /// The boundary index is 0-based from the start of `items` (so `n_from_start = 0` returns /// a prefix that excludes the first user message and everything after it). /// -/// If `n_from_start` is `usize::MAX`, this returns the full rollout (no truncation). +/// If `n_from_start` is negative, this returns the full rollout (no truncation). /// If fewer than or equal to `n_from_start` user messages exist, this returns the full /// rollout unchanged. pub(crate) fn truncate_rollout_before_nth_user_message_from_start( items: &[RolloutItem], - n_from_start: usize, + n_from_start: i64, ) -> Vec { - if n_from_start == usize::MAX { + let Ok(n_from_start) = usize::try_from(n_from_start) else { return items.to_vec(); - } + }; let user_positions = user_message_positions_in_rollout(items); @@ -165,6 +180,83 @@ fn is_trigger_turn_boundary(item: &ResponseItem) -> bool { .is_some_and(|communication| communication.trigger_turn) } +/// Expand `ForkReference` items into the referenced parent rollout slices they encode. +/// +/// This preserves child rollout compactness on disk while letting replay callers rebuild the +/// effective inherited transcript before reconstructing conversation history or deriving thread +/// summaries. +pub async fn materialize_rollout_items_for_replay( + codex_home: &Path, + rollout_items: &[RolloutItem], +) -> Vec { + let mut materialized = Vec::new(); + let mut stack: Vec<(Vec, usize, usize)> = vec![(rollout_items.to_vec(), 0, 0)]; + + while let Some((items, mut idx, depth)) = stack.pop() { + while idx < items.len() { + match &items[idx] { + RolloutItem::ForkReference(reference) => { + if depth >= MAX_FORK_REFERENCE_DEPTH { + warn!( + "skipping fork reference recursion at depth {} for {:?}", + depth, reference.rollout_path + ); + materialized.push(RolloutItem::ForkReference(reference.clone())); + idx += 1; + continue; + } + + let resolved_rollout_path = match resolve_fork_reference_rollout_path( + codex_home, + &reference.rollout_path, + ) + .await + { + Ok(path) => path, + Err(err) => { + warn!( + "failed to resolve fork reference rollout {:?}: {err}", + reference.rollout_path + ); + materialized.push(RolloutItem::ForkReference(reference.clone())); + idx += 1; + continue; + } + }; + let parent_history = match RolloutRecorder::get_rollout_history( + &resolved_rollout_path, + ) + .await + { + Ok(history) => history, + Err(err) => { + warn!( + "failed to load fork reference rollout {:?} (resolved from {:?}): {err}", + resolved_rollout_path, reference.rollout_path + ); + materialized.push(RolloutItem::ForkReference(reference.clone())); + idx += 1; + continue; + } + }; + let parent_items = truncate_rollout_before_nth_user_message_from_start( + &parent_history.get_rollout_items(), + reference.nth_user_message, + ); + + stack.push((items, idx + 1, depth)); + stack.push((parent_items, 0, depth + 1)); + break; + } + item => materialized.push(item.clone()), + } + idx += 1; + } + } + + materialized +} + #[cfg(test)] #[path = "thread_rollout_truncation_tests.rs"] mod tests; diff --git a/codex-rs/core/src/thread_rollout_truncation_tests.rs b/codex-rs/core/src/thread_rollout_truncation_tests.rs index df370a0546..1801753d92 100644 --- a/codex-rs/core/src/thread_rollout_truncation_tests.rs +++ b/codex-rs/core/src/thread_rollout_truncation_tests.rs @@ -3,9 +3,11 @@ use crate::session::tests::make_session_and_context; use codex_protocol::AgentPath; use codex_protocol::models::ContentItem; use codex_protocol::models::ReasoningItemReasoningSummary; +use codex_protocol::protocol::ForkReferenceItem; use codex_protocol::protocol::InterAgentCommunication; use codex_protocol::protocol::ThreadRolledBackEvent; use pretty_assertions::assert_eq; +use tempfile::TempDir; fn user_msg(text: &str) -> ResponseItem { ResponseItem::Message { @@ -100,7 +102,8 @@ fn truncation_max_keeps_full_rollout() { RolloutItem::ResponseItem(user_msg("u2")), ]; - let truncated = truncate_rollout_before_nth_user_message_from_start(&rollout, usize::MAX); + let truncated = + truncate_rollout_before_nth_user_message_from_start(&rollout, /*n_from_start*/ -1); assert_eq!( serde_json::to_value(&truncated).unwrap(), @@ -314,3 +317,25 @@ fn truncates_rollout_to_last_n_fork_turns_keeps_full_rollout_when_n_is_large() { serde_json::to_value(&rollout).unwrap() ); } + +#[tokio::test] +async fn materialize_rollout_items_for_replay_preserves_unresolved_fork_references() { + let codex_home = TempDir::new().unwrap(); + let fork_reference = RolloutItem::ForkReference(ForkReferenceItem { + rollout_path: "missing-rollout.jsonl".into(), + nth_user_message: 1, + }); + let rollout_items = vec![ + RolloutItem::ResponseItem(user_msg("u1")), + fork_reference.clone(), + RolloutItem::ResponseItem(assistant_msg("a1")), + ]; + + let materialized = + materialize_rollout_items_for_replay(codex_home.path(), &rollout_items).await; + + assert_eq!( + serde_json::to_value(&materialized).unwrap(), + serde_json::to_value(&rollout_items).unwrap() + ); +} diff --git a/codex-rs/core/src/tools/context.rs b/codex-rs/core/src/tools/context.rs index f65baeb6dc..209bc0f0fe 100644 --- a/codex-rs/core/src/tools/context.rs +++ b/codex-rs/core/src/tools/context.rs @@ -513,6 +513,16 @@ pub(crate) fn response_input_to_code_mode_result(response: ResponseInputItem) -> raw_arguments: String::new(), }) } + ResponseInputItem::FunctionCall { + name, + arguments, + call_id, + } => serde_json::json!({ + "type": "function_call", + "name": name, + "arguments": arguments, + "call_id": call_id, + }), } } diff --git a/codex-rs/core/src/tools/handlers/multi_agents.rs b/codex-rs/core/src/tools/handlers/multi_agents.rs index 2d70d3e92d..c5fdff75a5 100644 --- a/codex-rs/core/src/tools/handlers/multi_agents.rs +++ b/codex-rs/core/src/tools/handlers/multi_agents.rs @@ -23,7 +23,6 @@ use codex_protocol::openai_models::ReasoningEffort; use codex_protocol::protocol::CollabAgentInteractionBeginEvent; use codex_protocol::protocol::CollabAgentInteractionEndEvent; use codex_protocol::protocol::CollabAgentRef; -use codex_protocol::protocol::CollabAgentSpawnBeginEvent; use codex_protocol::protocol::CollabAgentSpawnEndEvent; use codex_protocol::protocol::CollabCloseBeginEvent; use codex_protocol::protocol::CollabCloseEndEvent; @@ -42,32 +41,23 @@ pub(crate) fn parse_agent_id_target(target: &str) -> Result, -) -> Result, FunctionCallError> { - if targets.is_empty() { - return Err(FunctionCallError::RespondToModel( - "agent ids must be non-empty".to_string(), - )); - } - - targets - .into_iter() - .map(|target| parse_agent_id_target(&target)) - .collect() -} - pub(crate) use close_agent::Handler as CloseAgentHandler; +pub(crate) use compact_parent_context::Handler as CompactParentContextHandler; +pub(crate) use list_agents::Handler as ListAgentsHandler; pub(crate) use resume_agent::Handler as ResumeAgentHandler; pub(crate) use send_input::Handler as SendInputHandler; pub(crate) use spawn::Handler as SpawnAgentHandler; pub(crate) use wait::Handler as WaitAgentHandler; +pub(crate) use watchdog_self_close::Handler as WatchdogSelfCloseHandler; pub(crate) mod close_agent; +mod compact_parent_context; +mod list_agents; mod resume_agent; mod send_input; mod spawn; pub(crate) mod wait; +mod watchdog_self_close; #[cfg(test)] #[path = "multi_agents_tests.rs"] diff --git a/codex-rs/core/src/tools/handlers/multi_agents/compact_parent_context.rs b/codex-rs/core/src/tools/handlers/multi_agents/compact_parent_context.rs new file mode 100644 index 0000000000..fec01c0458 --- /dev/null +++ b/codex-rs/core/src/tools/handlers/multi_agents/compact_parent_context.rs @@ -0,0 +1,97 @@ +use super::*; +use crate::agent::WatchdogParentCompactionResult; + +pub(crate) struct Handler; + +impl ToolHandler for Handler { + type Output = CompactParentContextResult; + + fn kind(&self) -> ToolKind { + ToolKind::Function + } + + fn matches_kind(&self, payload: &ToolPayload) -> bool { + matches!(payload, ToolPayload::Function { .. }) + } + + async fn handle(&self, invocation: ToolInvocation) -> Result { + let ToolInvocation { + session, payload, .. + } = invocation; + let arguments = function_arguments(payload)?; + let _args: CompactParentContextArgs = parse_arguments(&arguments)?; + let helper_thread_id = session.conversation_id; + let result = session + .services + .agent_control + .compact_parent_for_watchdog_helper(helper_thread_id) + .await + .map_err(|err| { + FunctionCallError::RespondToModel(format!("compact_parent_context failed: {err}")) + })?; + Ok(CompactParentContextResult::from(result)) + } +} + +#[derive(Debug, Deserialize)] +struct CompactParentContextArgs { + #[serde(rename = "reason")] + _reason: Option, + #[serde(rename = "evidence")] + _evidence: Option, +} + +#[derive(Debug, Serialize)] +pub(crate) struct CompactParentContextResult { + kind: &'static str, + parent_thread_id: Option, + submission_id: Option, +} + +impl From for CompactParentContextResult { + fn from(value: WatchdogParentCompactionResult) -> Self { + match value { + WatchdogParentCompactionResult::NotWatchdogHelper => Self { + kind: "not_watchdog_helper", + parent_thread_id: None, + submission_id: None, + }, + WatchdogParentCompactionResult::ParentBusy { parent_thread_id } => Self { + kind: "parent_busy", + parent_thread_id: Some(parent_thread_id.to_string()), + submission_id: None, + }, + WatchdogParentCompactionResult::AlreadyInProgress { parent_thread_id } => Self { + kind: "already_in_progress", + parent_thread_id: Some(parent_thread_id.to_string()), + submission_id: None, + }, + WatchdogParentCompactionResult::Submitted { + parent_thread_id, + submission_id, + } => Self { + kind: "submitted", + parent_thread_id: Some(parent_thread_id.to_string()), + submission_id: Some(submission_id), + }, + } + } +} + +impl ToolOutput for CompactParentContextResult { + fn log_preview(&self) -> String { + tool_output_json_text(self, "compact_parent_context") + } + + fn success_for_logging(&self) -> bool { + true + } + + fn to_response_item(&self, call_id: &str, payload: &ToolPayload) -> ResponseInputItem { + tool_output_response_item(call_id, payload, self, Some(true), "compact_parent_context") + } + + fn code_mode_result(&self, _payload: &ToolPayload) -> JsonValue { + tool_output_code_mode_result(self, "compact_parent_context") + } +} diff --git a/codex-rs/core/src/tools/handlers/multi_agents/list_agents.rs b/codex-rs/core/src/tools/handlers/multi_agents/list_agents.rs new file mode 100644 index 0000000000..9c73149b3d --- /dev/null +++ b/codex-rs/core/src/tools/handlers/multi_agents/list_agents.rs @@ -0,0 +1,108 @@ +use super::*; +use crate::agent::AgentListing; +use crate::agent::agent_resolver::resolve_agent_target; + +pub(crate) struct Handler; + +impl ToolHandler for Handler { + type Output = ListAgentsResult; + + fn kind(&self) -> ToolKind { + ToolKind::Function + } + + fn matches_kind(&self, payload: &ToolPayload) -> bool { + matches!(payload, ToolPayload::Function { .. }) + } + + async fn handle(&self, invocation: ToolInvocation) -> Result { + let ToolInvocation { + session, + turn, + payload, + .. + } = invocation; + let arguments = function_arguments(payload)?; + let args: ListAgentsArgs = parse_arguments(&arguments)?; + let owner_thread_id = if args.all { + session.conversation_id + } else if let Some(target) = args + .id + .as_deref() + .map(str::trim) + .filter(|id| !id.is_empty()) + { + resolve_agent_target(&session, &turn, target).await? + } else { + session.conversation_id + }; + let agents = session + .services + .agent_control + .list_agents(owner_thread_id, args.recursive, args.all) + .await + .map_err(|err| { + FunctionCallError::RespondToModel(format!("list_agents failed: {err}")) + })?; + Ok(ListAgentsResult { + agents: agents.into_iter().map(ListedAgent::from_listing).collect(), + }) + } +} + +#[derive(Debug, Deserialize)] +struct ListAgentsArgs { + id: Option, + #[serde(default = "default_recursive")] + recursive: bool, + #[serde(default)] + all: bool, +} + +fn default_recursive() -> bool { + true +} + +#[derive(Debug, Serialize)] +pub(crate) struct ListAgentsResult { + agents: Vec, +} + +#[derive(Debug, Serialize)] +struct ListedAgent { + thread_id: String, + parent_thread_id: Option, + status: AgentStatus, + depth: usize, +} + +impl ListedAgent { + fn from_listing(value: AgentListing) -> Self { + Self { + thread_id: value.thread_id.to_string(), + parent_thread_id: value + .parent_thread_id + .map(|thread_id| thread_id.to_string()), + status: value.status, + depth: value.depth, + } + } +} + +impl ToolOutput for ListAgentsResult { + fn log_preview(&self) -> String { + tool_output_json_text(self, "list_agents") + } + + fn success_for_logging(&self) -> bool { + true + } + + fn to_response_item(&self, call_id: &str, payload: &ToolPayload) -> ResponseInputItem { + tool_output_response_item(call_id, payload, self, Some(true), "list_agents") + } + + fn code_mode_result(&self, _payload: &ToolPayload) -> JsonValue { + tool_output_code_mode_result(self, "list_agents") + } +} diff --git a/codex-rs/core/src/tools/handlers/multi_agents/send_input.rs b/codex-rs/core/src/tools/handlers/multi_agents/send_input.rs index 4ae3240cb3..0ba952f8d5 100644 --- a/codex-rs/core/src/tools/handlers/multi_agents/send_input.rs +++ b/codex-rs/core/src/tools/handlers/multi_agents/send_input.rs @@ -1,4 +1,5 @@ use super::*; +use crate::agent::agent_resolver::resolve_agent_target; use crate::agent::control::render_input_preview; pub(crate) struct Handler; @@ -24,8 +25,8 @@ impl ToolHandler for Handler { } = invocation; let arguments = function_arguments(payload)?; let args: SendInputArgs = parse_arguments(&arguments)?; - let receiver_thread_id = parse_agent_id_target(&args.target)?; - let input_items = parse_collab_input(args.message, args.items)?; + let receiver_thread_id = resolve_agent_target(&session, &turn, &args.target).await?; + let input_items = parse_collab_input(args.message.clone(), args.items.clone())?; let prompt = render_input_preview(&input_items); let receiver_agent = session .services @@ -52,9 +53,15 @@ impl ToolHandler for Handler { .into(), ) .await; - let agent_control = session.services.agent_control.clone(); - let result = agent_control - .send_input(receiver_thread_id, input_items) + let result = session + .services + .agent_control + .send_agent_message_or_input( + receiver_thread_id, + session.conversation_id, + args.message, + args.items, + ) .await .map_err(|err| collab_agent_error(receiver_thread_id, err)); let status = session diff --git a/codex-rs/core/src/tools/handlers/multi_agents/spawn.rs b/codex-rs/core/src/tools/handlers/multi_agents/spawn.rs index 777cb9be1c..1b5a961d4c 100644 --- a/codex-rs/core/src/tools/handlers/multi_agents/spawn.rs +++ b/codex-rs/core/src/tools/handlers/multi_agents/spawn.rs @@ -1,4 +1,6 @@ use super::*; +use crate::agent::RemovedWatchdog; +use crate::agent::WatchdogRegistration; use crate::agent::control::SpawnAgentForkMode; use crate::agent::control::SpawnAgentOptions; use crate::agent::control::render_input_preview; @@ -6,7 +8,15 @@ use crate::agent::exceeds_thread_spawn_depth_limit; use crate::agent::next_thread_spawn_depth; use crate::agent::role::DEFAULT_ROLE_NAME; use crate::agent::role::apply_role_to_config; +use crate::agent::role::default_fork_context_for_role; +use crate::agent::role::watchdog_interval_for_role; +use crate::config::Config; use crate::session::turn_context::TurnEnvironment; +use codex_features::Feature; +use codex_protocol::error::Result as CodexResult; +use codex_protocol::protocol::Op; +use codex_protocol::protocol::SessionSource; +use std::collections::HashSet; pub(crate) struct Handler; @@ -41,81 +51,224 @@ impl ToolHandler for Handler { let session_source = turn.session_source.clone(); let child_depth = next_thread_spawn_depth(&session_source); let max_depth = turn.config.agent_max_depth; + let watchdog_interval_s = watchdog_interval_for_role(&turn.config, role_name); + let is_watchdog = watchdog_interval_s.is_some(); + + if is_watchdog && !turn.config.features.enabled(Feature::AgentWatchdog) { + return Err(FunctionCallError::RespondToModel( + "watchdogs are disabled".to_string(), + )); + } + if is_watchdog && matches!(session_source, SessionSource::SubAgent(_)) { + return Err(FunctionCallError::RespondToModel( + "watchdogs can only be spawned by root agents".to_string(), + )); + } if exceeds_thread_spawn_depth_limit(child_depth, max_depth) { return Err(FunctionCallError::RespondToModel( "Agent depth limit reached. Solve the task yourself.".to_string(), )); } - session - .send_event( - &turn, - CollabAgentSpawnBeginEvent { - call_id: call_id.clone(), - sender_thread_id: session.conversation_id, - prompt: prompt.clone(), - model: args.model.clone().unwrap_or_default(), - reasoning_effort: args.reasoning_effort.unwrap_or_default(), - } - .into(), - ) - .await; - let mut config = - build_agent_spawn_config(&session.get_base_instructions().await, turn.as_ref())?; - if args.fork_context { + let fork_context = args + .fork_context + .unwrap_or_else(|| default_fork_context_for_role(&turn.config, role_name)); + if fork_context { reject_full_fork_spawn_overrides( role_name, args.model.as_deref(), args.reasoning_effort, )?; - } else { - apply_requested_spawn_agent_model_overrides( + } + let config = + build_agent_spawn_config(&session.get_base_instructions().await, turn.as_ref())?; + let mut candidates_to_try = collect_spawn_agent_model_candidates( + args.model_fallback_list.as_ref(), + args.model.as_deref(), + args.reasoning_effort, + ); + if candidates_to_try.is_empty() { + candidates_to_try.push(SpawnAgentModelCandidate { + model: None, + reasoning_effort: None, + }); + } + + let mut spawn_result = None; + for (idx, candidate) in candidates_to_try.iter().enumerate() { + let attempt_call_id = spawn_attempt_event_call_id(&call_id, idx); + let candidate_model = candidate.model.clone().unwrap_or_default(); + let candidate_reasoning_effort = candidate.reasoning_effort.unwrap_or_default(); + send_collab_agent_spawn_begin_event( &session, - turn.as_ref(), - &mut config, - args.model.as_deref(), - args.reasoning_effort, + &turn, + attempt_call_id.clone(), + prompt.clone(), + candidate_model.clone(), + candidate_reasoning_effort, ) - .await?; - apply_role_to_config(&mut config, role_name) + .await; + let mut candidate_config = config.clone(); + if !fork_context { + apply_requested_spawn_agent_model_overrides( + &session, + turn.as_ref(), + &mut candidate_config, + candidate.model.as_deref(), + candidate.reasoning_effort, + ) + .await?; + } + apply_role_to_config(&mut candidate_config, role_name) .await .map_err(FunctionCallError::RespondToModel)?; - } - apply_spawn_agent_runtime_overrides(&mut config, turn.as_ref())?; - apply_spawn_agent_overrides(&mut config, child_depth); - - let result = Box::pin( - session.services.agent_control.spawn_agent_with_metadata( - config, - input_items, - Some(thread_spawn_source( + if fork_context { + restore_forked_spawn_agent_model_config(&mut candidate_config, turn.as_ref()); + } + apply_spawn_agent_runtime_overrides(&mut candidate_config, turn.as_ref())?; + apply_spawn_agent_overrides(&mut candidate_config, child_depth); + let spawn_source = thread_spawn_source( + session.conversation_id, + &turn.session_source, + child_depth, + role_name, + /*task_name*/ None, + )?; + let attempt_result = if let Some(watchdog_interval_s) = watchdog_interval_s { + spawn_watchdog( + &session.services.agent_control, + candidate_config, + prompt.clone(), session.conversation_id, - &turn.session_source, child_depth, - role_name, - /*task_name*/ None, - )?), - SpawnAgentOptions { - fork_parent_spawn_call_id: args.fork_context.then(|| call_id.clone()), - fork_mode: args.fork_context.then_some(SpawnAgentForkMode::FullHistory), - environments: Some( - turn.environments - .iter() - .map(TurnEnvironment::selection) - .collect(), + watchdog_interval_s, + spawn_source, + ) + .await + .map(|thread_id| { + let metadata = session.services.agent_control.get_agent_metadata(thread_id); + let (agent_path, agent_nickname, agent_role) = metadata + .map(|metadata| { + ( + metadata.agent_path.map(String::from), + metadata.agent_nickname, + metadata.agent_role, + ) + }) + .unwrap_or((None, None, None)); + SpawnAttemptResult { + thread_id, + status: AgentStatus::PendingInit, + agent_path, + agent_nickname, + agent_role, + } + }) + } else { + Box::pin( + session.services.agent_control.spawn_agent_with_metadata( + candidate_config, + input_items.clone(), + Some(spawn_source), + SpawnAgentOptions { + fork_parent_spawn_call_id: if fork_context { + Some(call_id.clone()) + } else { + None + }, + fork_mode: if fork_context { + Some(SpawnAgentForkMode::FullHistory) + } else { + None + }, + environments: Some( + turn.environments + .iter() + .map(TurnEnvironment::selection) + .collect(), + ), + }, ), - }, - ), - ) - .await - .map_err(collab_spawn_error); - let (new_thread_id, new_agent_metadata, status) = match &result { - Ok(spawned_agent) => ( - Some(spawned_agent.thread_id), - Some(spawned_agent.metadata.clone()), - spawned_agent.status.clone(), - ), - Err(_) => (None, None, AgentStatus::NotFound), + ) + .await + .map(|spawned_agent| { + let metadata = spawned_agent.metadata; + SpawnAttemptResult { + thread_id: spawned_agent.thread_id, + status: spawned_agent.status, + agent_path: metadata.agent_path.map(String::from), + agent_nickname: metadata.agent_nickname, + agent_role: metadata.agent_role, + } + }) + }; + match attempt_result { + Ok(spawned_agent) => { + let status = if idx + 1 < candidates_to_try.len() { + match probe_spawn_attempt_for_async_quota_exhaustion( + spawned_agent.status.clone(), + spawned_agent.thread_id, + &session.services.agent_control, + ) + .await + { + SpawnAttemptRetryDecision::Accept(status) => status, + SpawnAttemptRetryDecision::Retry(retry_status) => { + match close_quota_exhausted_spawn_attempt( + &session.services.agent_control, + spawned_agent.thread_id, + retry_status, + ) + .await + { + SpawnAttemptRetryDecision::Accept(status) => status, + SpawnAttemptRetryDecision::Retry(status) => { + send_collab_agent_spawn_retry_preempted_event( + &session, + &turn, + attempt_call_id, + prompt.clone(), + candidate_model, + candidate_reasoning_effort, + status, + ) + .await; + continue; + } + } + } + } + } else { + spawned_agent.status.clone() + }; + spawn_result = Some((spawned_agent, status, attempt_call_id)); + break; + } + Err(err) => { + send_collab_agent_spawn_error_event( + &session, + &turn, + attempt_call_id, + prompt.clone(), + candidate_model, + candidate_reasoning_effort, + &err, + ) + .await; + if spawn_should_retry_on_quota_exhaustion(&err) + && idx + 1 < candidates_to_try.len() + { + continue; + } + return Err(collab_spawn_error(err)); + } + } + } + let Some((spawned_agent, status, spawn_event_call_id)) = spawn_result else { + return Err(FunctionCallError::RespondToModel( + "No spawn attempts were executed".to_string(), + )); }; + let new_thread_id = Some(spawned_agent.thread_id); let agent_snapshot = match new_thread_id { Some(thread_id) => { session @@ -126,20 +279,18 @@ impl ToolHandler for Handler { } None => None, }; - let (_new_agent_path, new_agent_nickname, new_agent_role) = - match (&agent_snapshot, new_agent_metadata) { - (Some(snapshot), _) => ( - snapshot.session_source.get_agent_path().map(String::from), - snapshot.session_source.get_nickname(), - snapshot.session_source.get_agent_role(), - ), - (None, Some(metadata)) => ( - metadata.agent_path.map(String::from), - metadata.agent_nickname, - metadata.agent_role, - ), - (None, None) => (None, None, None), - }; + let (_new_agent_path, new_agent_nickname, new_agent_role) = match &agent_snapshot { + Some(snapshot) => ( + snapshot.session_source.get_agent_path().map(String::from), + snapshot.session_source.get_nickname(), + snapshot.session_source.get_agent_role(), + ), + None => ( + spawned_agent.agent_path, + spawned_agent.agent_nickname, + spawned_agent.agent_role, + ), + }; let effective_model = agent_snapshot .as_ref() .map(|snapshot| snapshot.model.clone()) @@ -153,7 +304,7 @@ impl ToolHandler for Handler { .send_event( &turn, CollabAgentSpawnEndEvent { - call_id, + call_id: spawn_event_call_id, sender_thread_id: session.conversation_id, new_thread_id, new_agent_nickname, @@ -166,7 +317,7 @@ impl ToolHandler for Handler { .into(), ) .await; - let new_thread_id = result?.thread_id; + let new_thread_id = spawned_agent.thread_id; let role_tag = role_name.unwrap_or(DEFAULT_ROLE_NAME); turn.session_telemetry.counter( "codex.multi_agent.spawn", @@ -175,26 +326,38 @@ impl ToolHandler for Handler { ); Ok(SpawnAgentResult { - agent_id: new_thread_id.to_string(), + agent_id: Some(new_thread_id.to_string()), + task_name: None, nickname, }) } } +struct SpawnAttemptResult { + thread_id: ThreadId, + status: AgentStatus, + agent_path: Option, + agent_nickname: Option, + agent_role: Option, +} + #[derive(Debug, Deserialize)] struct SpawnAgentArgs { message: Option, items: Option>, + #[serde(rename = "task_name")] + _task_name: Option, agent_type: Option, model: Option, + model_fallback_list: Option>, reasoning_effort: Option, - #[serde(default)] - fork_context: bool, + fork_context: Option, } #[derive(Debug, Serialize)] pub(crate) struct SpawnAgentResult { - agent_id: String, + agent_id: Option, + task_name: Option, nickname: Option, } @@ -215,3 +378,70 @@ impl ToolOutput for SpawnAgentResult { tool_output_code_mode_result(self, "spawn_agent") } } + +async fn spawn_watchdog( + agent_control: &crate::agent::AgentControl, + config: Config, + prompt: String, + owner_thread_id: ThreadId, + child_depth: i32, + interval_s: i64, + spawn_source: SessionSource, +) -> CodexResult { + let target_thread_id = agent_control + .spawn_agent_with_metadata( + config.clone(), + Op::UserInput { + items: vec![codex_protocol::user_input::UserInput::Text { + text: prompt.clone(), + text_elements: Vec::new(), + }], + environments: None, + final_output_json_schema: None, + responsesapi_client_metadata: None, + }, + Some(spawn_source), + SpawnAgentOptions::default(), + ) + .await? + .thread_id; + let superseded_before_register = agent_control + .unregister_watchdogs_for_owner(owner_thread_id) + .await; + shutdown_removed_watchdogs(agent_control, superseded_before_register).await; + let registration = WatchdogRegistration { + owner_thread_id, + target_thread_id, + child_depth, + interval_s, + prompt: prompt.clone(), + config, + }; + let superseded_after_register = match agent_control.register_watchdog(registration).await { + Ok(removed) => removed, + Err(err) => { + let _ = agent_control.close_agent(target_thread_id).await; + return Err(err); + } + }; + shutdown_removed_watchdogs(agent_control, superseded_after_register).await; + Ok(target_thread_id) +} + +async fn shutdown_removed_watchdogs( + agent_control: &crate::agent::AgentControl, + removed_watchdogs: Vec, +) { + let mut thread_ids = HashSet::new(); + for removed in removed_watchdogs { + thread_ids.insert(removed.target_thread_id); + if let Some(helper_id) = removed.active_helper_id { + thread_ids.insert(helper_id); + } + } + let mut thread_ids = thread_ids.into_iter().collect::>(); + thread_ids.sort_by_key(ToString::to_string); + for thread_id in thread_ids { + let _ = agent_control.close_agent(thread_id).await; + } +} diff --git a/codex-rs/core/src/tools/handlers/multi_agents/wait.rs b/codex-rs/core/src/tools/handlers/multi_agents/wait.rs index 77fa5f83a2..18afaa0d90 100644 --- a/codex-rs/core/src/tools/handlers/multi_agents/wait.rs +++ b/codex-rs/core/src/tools/handlers/multi_agents/wait.rs @@ -1,15 +1,16 @@ use super::*; +use crate::agent::agent_resolver::resolve_agent_targets; use crate::agent::status::is_final; use codex_protocol::error::CodexErr; use futures::FutureExt; use futures::StreamExt; use futures::stream::FuturesUnordered; use std::collections::HashMap; +use std::collections::HashSet; use std::sync::Arc; use std::time::Duration; use tokio::sync::watch::Receiver; use tokio::time::Instant; - use tokio::time::timeout_at; pub(crate) struct Handler; @@ -35,7 +36,19 @@ impl ToolHandler for Handler { } = invocation; let arguments = function_arguments(payload)?; let args: WaitArgs = parse_arguments(&arguments)?; - let receiver_thread_ids = parse_agent_id_targets(args.targets)?; + + if let Some(owner_thread_id) = session + .services + .agent_control + .watchdog_owner_for_active_helper(session.conversation_id) + .await + { + return Err(FunctionCallError::RespondToModel(format!( + "wait_agent is not available to watchdog check-in agents. This thread is a one-shot watchdog check-in for owner {owner_thread_id}. Send the result to the parent/root agent with `send_input`, or finish with a final assistant report that runtime can forward as fallback. If you exit without either one, runtime emits no root-visible fallback message and the next scheduled watchdog check-in will try again." + ))); + } + + let receiver_thread_ids = resolve_agent_targets(&session, &turn, args.targets).await?; let mut receiver_agents = Vec::with_capacity(receiver_thread_ids.len()); let mut target_by_thread_id = HashMap::with_capacity(receiver_thread_ids.len()); for receiver_thread_id in &receiver_thread_ids { @@ -59,6 +72,22 @@ impl ToolHandler for Handler { }); } + let watchdog_target_ids = session + .services + .agent_control + .watchdog_targets(&receiver_thread_ids) + .await; + let mut waited_thread_ids = Vec::new(); + let mut watchdog_statuses = Vec::new(); + split_wait_ids( + &session, + receiver_thread_ids, + &watchdog_target_ids, + &mut waited_thread_ids, + &mut watchdog_statuses, + ) + .await; + let timeout_ms = args.timeout_ms.unwrap_or(DEFAULT_WAIT_TIMEOUT_MS); let timeout_ms = match timeout_ms { ms if ms <= 0 => { @@ -74,7 +103,7 @@ impl ToolHandler for Handler { &turn, CollabWaitingBeginEvent { sender_thread_id: session.conversation_id, - receiver_thread_ids: receiver_thread_ids.clone(), + receiver_thread_ids: waited_thread_ids.clone(), receiver_agents: receiver_agents.clone(), call_id: call_id.clone(), } @@ -82,9 +111,31 @@ impl ToolHandler for Handler { ) .await; - let mut status_rxs = Vec::with_capacity(receiver_thread_ids.len()); + if waited_thread_ids.is_empty() { + let statuses_map = watchdog_statuses.iter().cloned().collect::>(); + let content = serde_json::to_string(&statuses_map).map_err(|err| { + FunctionCallError::Fatal(format!("failed to serialize wait_agent status: {err}")) + })?; + session + .send_event( + &turn, + CollabWaitingEndEvent { + sender_thread_id: session.conversation_id, + call_id, + agent_statuses: Vec::new(), + statuses: statuses_map, + } + .into(), + ) + .await; + return Err(FunctionCallError::RespondToModel(format!( + "wait_agent cannot be used to wait for watchdog check-ins. You passed only watchdog handle ids. Watchdog check-ins only happen after the current turn ends and the owner thread is idle for at least watchdog_interval_s. `wait_agent` on a watchdog handle is status-only and cannot confirm a new check-in. Do not poll with `wait_agent`, `list_agents`, or shell `sleep`: the owner thread is still active during this turn, so those calls cannot make the watchdog fire. Current watchdog handle statuses: {content}" + ))); + } + + let mut status_rxs = Vec::with_capacity(waited_thread_ids.len()); let mut initial_final_statuses = Vec::new(); - for id in &receiver_thread_ids { + for id in &waited_thread_ids { match session.services.agent_control.subscribe_status(*id).await { Ok(rx) => { let status = rx.borrow().clone(); @@ -97,8 +148,9 @@ impl ToolHandler for Handler { initial_final_statuses.push((*id, AgentStatus::NotFound)); } Err(err) => { - let mut statuses = HashMap::with_capacity(1); + let mut statuses = HashMap::with_capacity(1 + watchdog_statuses.len()); statuses.insert(*id, session.services.agent_control.get_status(*id).await); + statuses.extend(watchdog_statuses.iter().cloned()); session .send_event( &turn, @@ -123,7 +175,7 @@ impl ToolHandler for Handler { initial_final_statuses } else { let mut futures = FuturesUnordered::new(); - for (id, rx) in status_rxs.into_iter() { + for (id, rx) in status_rxs { let session = session.clone(); futures.push(wait_for_final_status(session, id, rx)); } @@ -152,16 +204,17 @@ impl ToolHandler for Handler { }; let timed_out = statuses.is_empty(); - let statuses_by_id = statuses.clone().into_iter().collect::>(); + let mut statuses_by_id = statuses.clone().into_iter().collect::>(); + statuses_by_id.extend(watchdog_statuses); let agent_statuses = build_wait_agent_statuses(&statuses_by_id, &receiver_agents); let result = WaitAgentResult { - status: statuses - .into_iter() + status: statuses_by_id + .iter() .filter_map(|(thread_id, status)| { target_by_thread_id - .get(&thread_id) + .get(thread_id) .cloned() - .map(|target| (target, status)) + .map(|target| (target, status.clone())) }) .collect(), timed_out, @@ -228,7 +281,10 @@ async fn wait_for_final_status( loop { if status_rx.changed().await.is_err() { let latest = session.services.agent_control.get_status(thread_id).await; - return is_final(&latest).then_some((thread_id, latest)); + if is_final(&latest) { + return Some((thread_id, latest)); + } + return None; } status = status_rx.borrow().clone(); if is_final(&status) { @@ -236,3 +292,20 @@ async fn wait_for_final_status( } } } + +async fn split_wait_ids( + session: &Arc, + requested_thread_ids: Vec, + watchdog_target_ids: &HashSet, + waited_thread_ids: &mut Vec, + watchdog_statuses: &mut Vec<(ThreadId, AgentStatus)>, +) { + for thread_id in requested_thread_ids { + if watchdog_target_ids.contains(&thread_id) { + let status = session.services.agent_control.get_status(thread_id).await; + watchdog_statuses.push((thread_id, status)); + } else { + waited_thread_ids.push(thread_id); + } + } +} diff --git a/codex-rs/core/src/tools/handlers/multi_agents/watchdog_self_close.rs b/codex-rs/core/src/tools/handlers/multi_agents/watchdog_self_close.rs new file mode 100644 index 0000000000..5a5645a1d2 --- /dev/null +++ b/codex-rs/core/src/tools/handlers/multi_agents/watchdog_self_close.rs @@ -0,0 +1,148 @@ +use super::*; + +pub(crate) struct Handler; + +impl ToolHandler for Handler { + type Output = WatchdogSelfCloseResult; + + fn kind(&self) -> ToolKind { + ToolKind::Function + } + + fn matches_kind(&self, payload: &ToolPayload) -> bool { + matches!(payload, ToolPayload::Function { .. }) + } + + async fn handle(&self, invocation: ToolInvocation) -> Result { + let ToolInvocation { + session, + payload, + call_id, + turn, + .. + } = invocation; + let arguments = function_arguments(payload)?; + let _args: WatchdogSelfCloseArgs = parse_arguments(&arguments)?; + let helper_thread_id = session.conversation_id; + if session + .services + .agent_control + .watchdog_owner_for_active_helper(helper_thread_id) + .await + .is_none() + { + return Err(FunctionCallError::RespondToModel( + "watchdog_self_close is only available in watchdog check-in threads.".to_string(), + )); + }; + + let receiver_agent = session + .services + .agent_control + .get_agent_metadata(helper_thread_id) + .unwrap_or_default(); + + session + .send_event( + &turn, + CollabCloseBeginEvent { + call_id: call_id.clone(), + sender_thread_id: helper_thread_id, + receiver_thread_id: helper_thread_id, + } + .into(), + ) + .await; + + let status = match session + .services + .agent_control + .subscribe_status(helper_thread_id) + .await + { + Ok(mut status_rx) => status_rx.borrow_and_update().clone(), + Err(err) => { + let status = session + .services + .agent_control + .get_status(helper_thread_id) + .await; + session + .send_event( + &turn, + CollabCloseEndEvent { + call_id: call_id.clone(), + sender_thread_id: helper_thread_id, + receiver_thread_id: helper_thread_id, + receiver_agent_nickname: receiver_agent.agent_nickname.clone(), + receiver_agent_role: receiver_agent.agent_role.clone(), + status, + } + .into(), + ) + .await; + return Err(collab_agent_error(helper_thread_id, err)); + } + }; + + let result = session + .services + .agent_control + .close_agent(helper_thread_id) + .await + .map_err(|err| collab_agent_error(helper_thread_id, err)) + .map(|_| ()); + + let receiver_agent = session + .services + .agent_control + .get_agent_metadata(helper_thread_id) + .unwrap_or_default(); + session + .send_event( + &turn, + CollabCloseEndEvent { + call_id, + sender_thread_id: helper_thread_id, + receiver_thread_id: helper_thread_id, + receiver_agent_nickname: receiver_agent.agent_nickname, + receiver_agent_role: receiver_agent.agent_role, + status: status.clone(), + } + .into(), + ) + .await; + + result?; + + Ok(WatchdogSelfCloseResult { + previous_status: status, + }) + } +} + +#[derive(Debug, Deserialize)] +struct WatchdogSelfCloseArgs {} + +#[derive(Debug, Serialize)] +pub(crate) struct WatchdogSelfCloseResult { + previous_status: AgentStatus, +} + +impl ToolOutput for WatchdogSelfCloseResult { + fn log_preview(&self) -> String { + tool_output_json_text(self, "watchdog_self_close") + } + + fn success_for_logging(&self) -> bool { + true + } + + fn to_response_item(&self, call_id: &str, payload: &ToolPayload) -> ResponseInputItem { + tool_output_response_item(call_id, payload, self, Some(true), "watchdog_self_close") + } + + fn code_mode_result(&self, _payload: &ToolPayload) -> JsonValue { + tool_output_code_mode_result(self, "watchdog_self_close") + } +} diff --git a/codex-rs/core/src/tools/handlers/multi_agents_common.rs b/codex-rs/core/src/tools/handlers/multi_agents_common.rs index c01755cb2b..7f792d037f 100644 --- a/codex-rs/core/src/tools/handlers/multi_agents_common.rs +++ b/codex-rs/core/src/tools/handlers/multi_agents_common.rs @@ -18,19 +18,39 @@ use codex_protocol::models::ResponseInputItem; use codex_protocol::openai_models::ReasoningEffort; use codex_protocol::openai_models::ReasoningEffortPreset; use codex_protocol::protocol::CollabAgentRef; +use codex_protocol::protocol::CollabAgentSpawnBeginEvent; +use codex_protocol::protocol::CollabAgentSpawnEndEvent; use codex_protocol::protocol::CollabAgentStatusEntry; use codex_protocol::protocol::Op; use codex_protocol::protocol::SessionSource; use codex_protocol::protocol::SubAgentSource; use codex_protocol::user_input::UserInput; +use serde::Deserialize; use serde::Serialize; use serde_json::Value as JsonValue; use std::collections::HashMap; +use tokio::time::Duration; +use tokio::time::Instant; +use tokio::time::timeout; /// Minimum wait timeout to prevent tight polling loops from burning CPU. pub(crate) const MIN_WAIT_TIMEOUT_MS: i64 = DEFAULT_MULTI_AGENT_V2_MIN_WAIT_TIMEOUT_MS; pub(crate) const DEFAULT_WAIT_TIMEOUT_MS: i64 = 30_000; pub(crate) const MAX_WAIT_TIMEOUT_MS: i64 = MAX_MULTI_AGENT_V2_WAIT_TIMEOUT_MS; +const ASYNC_QUOTA_EXHAUSTION_STATUS_TIMEOUT: Duration = Duration::from_secs(2); + +pub(crate) enum SpawnAttemptRetryDecision { + Accept(AgentStatus), + Retry(AgentStatus), +} + +pub(crate) fn spawn_attempt_event_call_id(call_id: &str, attempt_index: usize) -> String { + if attempt_index == 0 { + call_id.to_string() + } else { + format!("{call_id}#{}", attempt_index + 1) + } +} pub(crate) fn function_arguments(payload: ToolPayload) -> Result { match payload { @@ -73,6 +93,177 @@ where }) } +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) struct SpawnAgentModelCandidate { + pub(crate) model: Option, + pub(crate) reasoning_effort: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize)] +pub(crate) struct SpawnAgentModelFallbackCandidate { + pub(crate) model: String, + #[serde(default)] + pub(crate) reasoning_effort: Option, +} + +pub(crate) fn collect_spawn_agent_model_candidates( + model_fallback_list: Option<&Vec>, + requested_model: Option<&str>, + requested_reasoning_effort: Option, +) -> Vec { + if let Some(model_fallback_list) = model_fallback_list { + return model_fallback_list + .iter() + .map(|candidate| SpawnAgentModelCandidate { + model: Some(candidate.model.clone()), + reasoning_effort: candidate.reasoning_effort.or(requested_reasoning_effort), + }) + .collect(); + } + + let mut candidates = Vec::new(); + if requested_model.is_some() || requested_reasoning_effort.is_some() { + candidates.push(SpawnAgentModelCandidate { + model: requested_model.map(ToString::to_string), + reasoning_effort: requested_reasoning_effort, + }); + } + candidates +} + +pub(crate) async fn close_quota_exhausted_spawn_attempt( + agent_control: &crate::agent::control::AgentControl, + thread_id: ThreadId, + retry_status: AgentStatus, +) -> SpawnAttemptRetryDecision { + let retry_decision = + recheck_spawn_attempt_retry_decision(retry_status, thread_id, agent_control).await; + let SpawnAttemptRetryDecision::Retry(status) = retry_decision else { + return retry_decision; + }; + + // There is still a narrow TOCTOU window: a child can leave `PendingInit` after the final + // status read above and before `close_agent` runs. `AgentControl` does not currently expose + // a compare-and-close primitive, so this is the strongest local mitigation available. + if let Err(err) = agent_control.close_agent(thread_id).await + && !matches!( + err, + CodexErr::ThreadNotFound(_) | CodexErr::InternalAgentDied + ) + { + tracing::warn!("failed to close quota-exhausted spawn attempt {thread_id}: {err}"); + } + SpawnAttemptRetryDecision::Retry(status) +} +pub(crate) fn spawn_should_retry_on_quota_exhaustion(error: &CodexErr) -> bool { + matches!( + error, + CodexErr::QuotaExceeded | CodexErr::UsageLimitReached(_) + ) +} + +pub(crate) async fn probe_spawn_attempt_for_async_quota_exhaustion( + thread_status: AgentStatus, + thread_id: ThreadId, + agent_control: &crate::agent::control::AgentControl, +) -> SpawnAttemptRetryDecision { + match thread_status { + AgentStatus::Completed(_) + | AgentStatus::Errored(_) + | AgentStatus::Shutdown + | AgentStatus::NotFound => { + return retry_decision_for_final_spawn_status(thread_status); + } + AgentStatus::PendingInit | AgentStatus::Running | AgentStatus::Interrupted => {} + } + + let Ok(mut status_rx) = agent_control.subscribe_status(thread_id).await else { + return match thread_status { + AgentStatus::Running | AgentStatus::Interrupted => { + SpawnAttemptRetryDecision::Accept(thread_status) + } + _ => SpawnAttemptRetryDecision::Retry(AgentStatus::PendingInit), + }; + }; + let deadline = Instant::now() + ASYNC_QUOTA_EXHAUSTION_STATUS_TIMEOUT; + + loop { + let status = status_rx.borrow_and_update().clone(); + match status { + AgentStatus::Completed(_) + | AgentStatus::Errored(_) + | AgentStatus::Shutdown + | AgentStatus::NotFound => { + return retry_decision_for_final_spawn_status(status); + } + AgentStatus::PendingInit | AgentStatus::Running | AgentStatus::Interrupted => {} + } + + let Some(remaining) = deadline.checked_duration_since(Instant::now()) else { + return match status { + AgentStatus::PendingInit => { + SpawnAttemptRetryDecision::Retry(AgentStatus::PendingInit) + } + AgentStatus::Running | AgentStatus::Interrupted => { + SpawnAttemptRetryDecision::Accept(status) + } + AgentStatus::Completed(_) + | AgentStatus::Errored(_) + | AgentStatus::Shutdown + | AgentStatus::NotFound => retry_decision_for_final_spawn_status(status), + }; + }; + match timeout(remaining, status_rx.changed()).await { + Ok(Ok(())) => {} + Ok(Err(_)) => return SpawnAttemptRetryDecision::Retry(AgentStatus::PendingInit), + Err(_) => return SpawnAttemptRetryDecision::Retry(AgentStatus::PendingInit), + } + } +} + +pub(crate) async fn recheck_spawn_attempt_retry_decision( + status: AgentStatus, + thread_id: ThreadId, + agent_control: &crate::agent::control::AgentControl, +) -> SpawnAttemptRetryDecision { + if !matches!(status, AgentStatus::PendingInit) { + return SpawnAttemptRetryDecision::Retry(status); + } + + let latest_status = agent_control.get_status(thread_id).await; + match latest_status { + AgentStatus::Running | AgentStatus::Interrupted => { + SpawnAttemptRetryDecision::Accept(latest_status) + } + AgentStatus::Completed(_) + | AgentStatus::Errored(_) + | AgentStatus::Shutdown + | AgentStatus::NotFound => retry_decision_for_final_spawn_status(latest_status), + AgentStatus::PendingInit => SpawnAttemptRetryDecision::Retry(AgentStatus::PendingInit), + } +} + +fn retry_decision_for_final_spawn_status(status: AgentStatus) -> SpawnAttemptRetryDecision { + if spawn_should_retry_on_quota_exhaustion_status(&status) { + SpawnAttemptRetryDecision::Retry(status) + } else { + SpawnAttemptRetryDecision::Accept(status) + } +} + +fn spawn_should_retry_on_quota_exhaustion_status(status: &AgentStatus) -> bool { + match status { + AgentStatus::Errored(message) => { + let message = message.to_lowercase(); + message.contains("insufficient_quota") + || message.contains("usage limit") + || message.contains("quota") + } + AgentStatus::NotFound => false, + _ => false, + } +} + pub(crate) fn build_wait_agent_statuses( statuses: &HashMap, receiver_agents: &[CollabAgentRef], @@ -120,6 +311,88 @@ pub(crate) fn collab_spawn_error(err: CodexErr) -> FunctionCallError { } } +pub(crate) async fn send_collab_agent_spawn_error_event( + session: &Session, + turn: &TurnContext, + call_id: String, + prompt: String, + model: String, + reasoning_effort: ReasoningEffort, + err: &CodexErr, +) { + session + .send_event( + turn, + CollabAgentSpawnEndEvent { + call_id, + sender_thread_id: session.conversation_id, + new_thread_id: None, + new_agent_nickname: None, + new_agent_role: None, + prompt, + model, + reasoning_effort, + status: match err { + CodexErr::ThreadNotFound(_) => AgentStatus::NotFound, + err => AgentStatus::Errored(err.to_string()), + }, + } + .into(), + ) + .await; +} + +pub(crate) async fn send_collab_agent_spawn_begin_event( + session: &Session, + turn: &TurnContext, + call_id: String, + prompt: String, + model: String, + reasoning_effort: ReasoningEffort, +) { + session + .send_event( + turn, + CollabAgentSpawnBeginEvent { + call_id, + sender_thread_id: session.conversation_id, + prompt, + model, + reasoning_effort, + } + .into(), + ) + .await; +} + +pub(crate) async fn send_collab_agent_spawn_retry_preempted_event( + session: &Session, + turn: &TurnContext, + call_id: String, + prompt: String, + model: String, + reasoning_effort: ReasoningEffort, + status: AgentStatus, +) { + session + .send_event( + turn, + CollabAgentSpawnEndEvent { + call_id, + sender_thread_id: session.conversation_id, + new_thread_id: None, + new_agent_nickname: None, + new_agent_role: None, + prompt, + model, + reasoning_effort, + status, + } + .into(), + ) + .await; +} + pub(crate) fn collab_agent_error(agent_id: ThreadId, err: CodexErr) -> FunctionCallError { match err { CodexErr::ThreadNotFound(id) => { @@ -227,6 +500,8 @@ fn build_agent_shared_config(turn: &TurnContext) -> Result ThreadId { + let owner = manager + .start_thread(config.clone(), thread_store_from_config(config)) + .await + .expect("owner thread should start"); + let target = agent_control + .spawn_agent( + config.clone(), + Op::UserInput { + items: vec![UserInput::Text { + text: "watchdog target".to_string(), + text_elements: Vec::new(), + }], + environments: None, + final_output_json_schema: None, + responsesapi_client_metadata: None, + }, + /*session_source*/ None, + ) + .await + .expect("watchdog target thread should start"); + let helper = manager + .start_thread(config.clone(), thread_store_from_config(config)) + .await + .expect("helper thread should start"); + agent_control + .register_watchdog(WatchdogRegistration { + owner_thread_id: owner.thread_id, + target_thread_id: target, + child_depth: 1, + interval_s: 30, + prompt: "check in".to_string(), + config: config.clone(), + }) + .await + .expect("watchdog registration should succeed"); + agent_control + .set_watchdog_active_helper_for_tests(target, helper.thread_id) + .await; + assert_eq!( + agent_control + .watchdog_owner_for_active_helper(helper.thread_id) + .await, + Some(owner.thread_id), + "watchdog helper should be registered for owner" + ); + helper.thread_id +} + +async fn install_role_with_model_provider_and_profile_override(turn: &mut TurnContext) -> String { + let role_name = "fork-context-role".to_string(); + tokio::fs::create_dir_all(&turn.config.codex_home) + .await + .expect("codex home should be created"); + let role_config_path = turn + .config + .codex_home + .as_path() + .join("fork-context-role.toml"); + tokio::fs::write( + &role_config_path, + r#"developer_instructions = "Forked children should keep the parent model config." +model_provider = "openai" +model_context_window = 12345 +model_auto_compact_token_limit = 1234 +model_verbosity = "low" +plan_mode_reasoning_effort = "minimal" +profile = "role-profile" +service_tier = "fast" + +[profiles.role-profile] +model_provider = "openai" +"#, + ) + .await + .expect("role config should be written"); + + let mut config = (*turn.config).clone(); + config.service_tier = Some(ServiceTier::Flex); + config.plan_mode_reasoning_effort = Some(ReasoningEffort::High); + config.model_verbosity = Some(Verbosity::High); + config.model_context_window = Some(200_000); + config.model_auto_compact_token_limit = Some(180_000); + config.agent_roles.insert( + role_name.clone(), + AgentRoleConfig { + description: Some("Role with model-provider and profile overrides".to_string()), + model: None, + config_file: Some(role_config_path), + watchdog_interval_s: None, + nickname_candidates: None, + fork_context: None, + }, + ); + turn.config = Arc::new(config); + + role_name +} + +#[derive(Clone, Copy)] +struct NeverEndingTask; + +impl SessionTask for NeverEndingTask { + fn kind(&self) -> TaskKind { + TaskKind::Regular + } + + fn span_name(&self) -> &'static str { + "session_task.multi_agent_never_ending" + } + + async fn run( + self: Arc, + session: Arc, + ctx: Arc, + _input: Vec, + cancellation_token: CancellationToken, + ) -> Option { + session + .clone_session() + .send_event( + ctx.as_ref(), + EventMsg::TurnStarted(codex_protocol::protocol::TurnStartedEvent { + turn_id: ctx.sub_id.clone(), + started_at: Some(0), + model_context_window: ctx.model_context_window(), + collaboration_mode_kind: ctx.collaboration_mode.mode, + }), + ) + .await; + cancellation_token.cancelled().await; + None + } +} fn expect_text_output(output: T) -> (String, Option) where T: ToolOutput, @@ -156,6 +308,20 @@ where } } +async fn wait_for_collab_spawn_end_event( + rx: &async_channel::Receiver, +) -> CollabAgentSpawnEndEvent { + loop { + let event = timeout(Duration::from_secs(1), rx.recv()) + .await + .expect("collab spawn-end event timed out") + .expect("collab spawn-end event missing"); + if let EventMsg::CollabAgentSpawnEnd(event) = event.msg { + return event; + } + } +} + #[derive(Debug, Deserialize)] struct ListAgentsResult { agents: Vec, @@ -168,6 +334,11 @@ struct ListedAgentResult { last_task_message: Option, } +#[derive(Debug, Deserialize, PartialEq)] +struct WatchdogSelfCloseResult { + previous_status: AgentStatus, +} + #[tokio::test] async fn handler_rejects_non_function_payloads() { let (session, turn) = make_session_and_context().await; @@ -241,7 +412,15 @@ async fn spawn_agent_uses_explorer_role_and_preserves_approval_policy() { let (mut session, mut turn) = make_session_and_context().await; let manager = thread_manager(); + let root = manager + .start_thread( + (*turn.config).clone(), + thread_store_from_config(turn.config.as_ref()), + ) + .await + .expect("root thread should start"); session.services.agent_control = manager.agent_control(); + session.conversation_id = root.thread_id; let mut config = (*turn.config).clone(); let provider_info = built_in_model_providers(/* openai_base_url */ /*openai_base_url*/ None)["ollama"].clone(); @@ -264,7 +443,8 @@ async fn spawn_agent_uses_explorer_role_and_preserves_approval_policy() { "spawn_agent", function_payload(json!({ "message": "inspect this repo", - "agent_type": "explorer" + "agent_type": "explorer", + "fork_context": false })), ); let output = SpawnAgentHandler @@ -358,7 +538,7 @@ async fn spawn_agent_fork_context_rejects_child_model_overrides() { assert_eq!( err, - FunctionCallError::RespondToModel( + FunctionCallError::RespondToModel( "Full-history forked agents inherit the parent agent type, model, and reasoning effort; omit agent_type, model, and reasoning_effort, or spawn without a full-history fork.".to_string(), ) ); @@ -448,16 +628,16 @@ async fn multi_agent_v2_spawn_defaults_to_full_fork_and_rejects_child_model_over assert_eq!( err, - FunctionCallError::RespondToModel( + FunctionCallError::RespondToModel( "Full-history forked agents inherit the parent agent type, model, and reasoning effort; omit agent_type, model, and reasoning_effort, or spawn without a full-history fork.".to_string(), ) ); } #[tokio::test] -async fn multi_agent_v2_spawn_partial_fork_turns_allows_agent_type_override() { +async fn multi_agent_v2_spawn_partial_fork_turns_allows_role_overrides() { let (mut session, mut turn) = make_session_and_context().await; - let role_name = install_role_with_model_override(&mut turn).await; + let role_name = install_role_with_model_provider_and_profile_override(&mut turn).await; let manager = thread_manager(); let root = manager .start_thread( @@ -485,13 +665,13 @@ async fn multi_agent_v2_spawn_partial_fork_turns_allows_agent_type_override() { "spawn_agent", function_payload(json!({ "message": "inspect this repo", - "task_name": "partial_fork", "agent_type": role_name, - "fork_turns": "1" + "fork_turns": "1", + "task_name": "partial_fork" })), )) .await - .expect("partial fork should allow agent_type overrides"); + .expect("partial fork should preserve role overrides"); let (content, _) = expect_text_output(output); let result: serde_json::Value = serde_json::from_str(&content).expect("spawn_agent result should be json"); @@ -509,16 +689,32 @@ async fn multi_agent_v2_spawn_partial_fork_turns_allows_agent_type_override() { .config_snapshot() .await; - assert_eq!(snapshot.model, "gpt-5-role-override"); - assert_eq!(snapshot.model_provider_id, "ollama"); - assert_eq!(snapshot.reasoning_effort, Some(ReasoningEffort::Minimal)); + assert_eq!(snapshot.active_profile.as_deref(), Some("role-profile")); + assert_eq!(snapshot.model_context_window, Some(12_345)); + assert_eq!(snapshot.model_auto_compact_token_limit, Some(1_234)); + assert_eq!(snapshot.model_verbosity, Some(Verbosity::Low)); } #[tokio::test] -async fn spawn_agent_returns_agent_id_without_task_name() { - let (mut session, turn) = make_session_and_context().await; +async fn spawn_agent_watchdog_role_returns_handle_with_role_defaults() { + let (mut session, mut turn) = make_session_and_context().await; let manager = thread_manager(); - session.services.agent_control = manager.agent_control(); + let root = manager + .start_thread( + (*turn.config).clone(), + thread_store_from_config(turn.config.as_ref()), + ) + .await + .expect("root thread should start"); + let agent_control = manager.agent_control(); + session.services.agent_control = agent_control.clone(); + session.conversation_id = root.thread_id; + let mut config = (*turn.config).clone(); + config + .features + .enable(Feature::AgentWatchdog) + .expect("test config should allow feature update"); + turn.config = Arc::new(config); let output = SpawnAgentHandler .handle(invocation( @@ -526,7 +722,52 @@ async fn spawn_agent_returns_agent_id_without_task_name() { Arc::new(turn), "spawn_agent", function_payload(json!({ - "message": "inspect this repo" + "message": "check in periodically", + "agent_type": "watchdog" + })), + )) + .await + .expect("spawn_agent should succeed"); + let (content, success) = expect_text_output(output); + let result: serde_json::Value = + serde_json::from_str(&content).expect("spawn_agent result should be json"); + let agent_id = parse_agent_id( + result["agent_id"] + .as_str() + .expect("spawn_agent result should include agent_id"), + ); + + assert_eq!(success, Some(true)); + assert_eq!( + agent_control.get_status(agent_id).await, + AgentStatus::PendingInit + ); + let watchdog_targets = agent_control.watchdog_targets(&[agent_id]).await; + assert_eq!(watchdog_targets, HashSet::from([agent_id])); +} + +#[tokio::test] +async fn spawn_agent_returns_agent_id_without_task_name() { + let (mut session, turn) = make_session_and_context().await; + let manager = thread_manager(); + let root = manager + .start_thread( + (*turn.config).clone(), + thread_store_from_config(turn.config.as_ref()), + ) + .await + .expect("root thread should start"); + session.services.agent_control = manager.agent_control(); + session.conversation_id = root.thread_id; + + let output = SpawnAgentHandler + .handle(invocation( + Arc::new(session), + Arc::new(turn), + "spawn_agent", + function_payload(json!({ + "message": "inspect this repo", + "fork_context": false })), )) .await @@ -536,7 +777,7 @@ async fn spawn_agent_returns_agent_id_without_task_name() { serde_json::from_str(&content).expect("spawn_agent result should be json"); assert!(result["agent_id"].is_string()); - assert!(result.get("task_name").is_none()); + assert_eq!(result["task_name"], serde_json::Value::Null); assert!(result.get("nickname").is_some()); assert_eq!(success, Some(true)); } @@ -619,22 +860,178 @@ async fn multi_agent_v2_spawn_rejects_legacy_items_field() { #[tokio::test] async fn spawn_agent_errors_when_manager_dropped() { - let (session, turn) = make_session_and_context().await; + let (session, turn, rx) = make_session_and_context_with_rx().await; let invocation = invocation( - Arc::new(session), - Arc::new(turn), + session.clone(), + turn.clone(), "spawn_agent", - function_payload(json!({"message": "hello"})), + function_payload(json!({"message": "hello", "fork_context": false})), ); let Err(err) = SpawnAgentHandler.handle(invocation).await else { panic!("spawn should fail without a manager"); }; + let spawn_end_event = wait_for_collab_spawn_end_event(&rx).await; + assert_eq!(spawn_end_event.call_id, "call-1"); + assert_eq!(spawn_end_event.sender_thread_id, session.conversation_id); + assert_eq!(spawn_end_event.new_thread_id, None); + assert_eq!(spawn_end_event.new_agent_nickname, None); + assert_eq!(spawn_end_event.new_agent_role, None); + assert_eq!(spawn_end_event.prompt, "hello"); + assert_eq!(spawn_end_event.model, ""); + assert_eq!(spawn_end_event.reasoning_effort, ReasoningEffort::default()); + assert!(matches!( + spawn_end_event.status, + AgentStatus::Errored(ref message) if message.contains("thread manager dropped") + )); assert_eq!( err, FunctionCallError::RespondToModel("collab manager unavailable".to_string()) ); } +#[tokio::test] +async fn multi_agent_v2_spawn_agent_errors_when_manager_dropped() { + let (session, mut turn, rx) = make_session_and_context_with_rx().await; + let turn_context = Arc::get_mut(&mut turn).expect("single turn context ref"); + let mut config = (*turn_context.config).clone(); + config + .features + .enable(Feature::MultiAgentV2) + .expect("test config should allow feature update"); + turn_context.config = Arc::new(config); + + let invocation = invocation( + session.clone(), + turn.clone(), + "spawn_agent", + function_payload(json!({ + "message": "inspect this repo", + "task_name": "worker" + })), + ); + let Err(err) = SpawnAgentHandlerV2.handle(invocation).await else { + panic!("spawn should fail without a manager"); + }; + let spawn_end_event = wait_for_collab_spawn_end_event(&rx).await; + assert_eq!(spawn_end_event.call_id, "call-1"); + assert_eq!(spawn_end_event.sender_thread_id, session.conversation_id); + assert_eq!(spawn_end_event.new_thread_id, None); + assert_eq!(spawn_end_event.new_agent_nickname, None); + assert_eq!(spawn_end_event.new_agent_role, None); + assert_eq!(spawn_end_event.prompt, "inspect this repo"); + assert_eq!(spawn_end_event.model, ""); + assert_eq!(spawn_end_event.reasoning_effort, ReasoningEffort::default()); + assert!(matches!( + spawn_end_event.status, + AgentStatus::Errored(ref message) if message.contains("thread manager dropped") + )); + assert_eq!( + err, + FunctionCallError::RespondToModel("collab manager unavailable".to_string()) + ); +} + +#[tokio::test] +async fn spawn_retry_preempted_event_omits_thread_identity() { + let (session, turn, rx) = make_session_and_context_with_rx().await; + + send_collab_agent_spawn_retry_preempted_event( + session.as_ref(), + turn.as_ref(), + "call-1".to_string(), + "inspect this repo".to_string(), + "gpt-5.4-mini".to_string(), + ReasoningEffort::Medium, + AgentStatus::PendingInit, + ) + .await; + + let spawn_end_event = wait_for_collab_spawn_end_event(&rx).await; + assert_eq!(spawn_end_event.call_id, "call-1"); + assert_eq!(spawn_end_event.sender_thread_id, session.conversation_id); + assert_eq!(spawn_end_event.new_thread_id, None); + assert_eq!(spawn_end_event.new_agent_nickname, None); + assert_eq!(spawn_end_event.new_agent_role, None); + assert_eq!(spawn_end_event.prompt, "inspect this repo"); + assert_eq!(spawn_end_event.model, "gpt-5.4-mini"); + assert_eq!(spawn_end_event.reasoning_effort, ReasoningEffort::Medium); + assert_eq!(spawn_end_event.status, AgentStatus::PendingInit); +} + +#[tokio::test] +async fn spawn_async_quota_probe_accepts_running_child() { + let decision = probe_spawn_attempt_for_async_quota_exhaustion( + AgentStatus::Running, + ThreadId::default(), + &crate::agent::control::AgentControl::default(), + ) + .await; + + assert!(matches!( + decision, + SpawnAttemptRetryDecision::Accept(AgentStatus::Running) + )); +} + +#[tokio::test] +async fn close_quota_exhausted_spawn_attempt_accepts_child_that_started_running() { + let (_session, turn) = make_session_and_context().await; + let manager = thread_manager(); + let thread = manager + .start_thread( + (*turn.config).clone(), + thread_store_from_config(turn.config.as_ref()), + ) + .await + .expect("child thread should start"); + let active_turn = thread.thread.codex.session.new_default_turn().await; + thread + .thread + .codex + .session + .spawn_task( + Arc::clone(&active_turn), + vec![UserInput::Text { + text: "working".to_string(), + text_elements: Vec::new(), + }], + NeverEndingTask, + ) + .await; + timeout(Duration::from_secs(1), async { + loop { + if manager.agent_control().get_status(thread.thread_id).await == AgentStatus::Running { + break; + } + tokio::time::sleep(Duration::from_millis(10)).await; + } + }) + .await + .expect("child should reach running"); + + let decision = close_quota_exhausted_spawn_attempt( + &manager.agent_control(), + thread.thread_id, + AgentStatus::PendingInit, + ) + .await; + + assert!(matches!( + decision, + SpawnAttemptRetryDecision::Accept(AgentStatus::Running) + )); + assert_eq!( + manager.agent_control().get_status(thread.thread_id).await, + AgentStatus::Running + ); + + let _ = thread + .thread + .submit(Op::Shutdown {}) + .await + .expect("shutdown should submit"); +} + #[tokio::test] async fn multi_agent_v2_spawn_returns_path_and_send_message_accepts_relative_path() { #[derive(Debug, Deserialize)] @@ -1785,7 +2182,15 @@ async fn spawn_agent_reapplies_runtime_sandbox_after_role_config() { let (mut session, mut turn) = make_session_and_context().await; let manager = thread_manager(); + let root = manager + .start_thread( + (*turn.config).clone(), + thread_store_from_config(turn.config.as_ref()), + ) + .await + .expect("root thread should start"); session.services.agent_control = manager.agent_control(); + session.conversation_id = root.thread_id; let expected_sandbox = turn.config.legacy_sandbox_policy(); let mut expected_file_system_sandbox_policy = FileSystemSandboxPolicy::from_legacy_sandbox_policy_for_cwd(&expected_sandbox, &turn.cwd); @@ -1819,7 +2224,8 @@ async fn spawn_agent_reapplies_runtime_sandbox_after_role_config() { "spawn_agent", function_payload(json!({ "message": "await this command", - "agent_type": "explorer" + "agent_type": "explorer", + "fork_context": false })), ); let output = SpawnAgentHandler @@ -1881,7 +2287,7 @@ async fn spawn_agent_rejects_when_depth_limit_exceeded() { Arc::new(session), Arc::new(turn), "spawn_agent", - function_payload(json!({"message": "hello"})), + function_payload(json!({"message": "hello", "fork_context": false})), ); let Err(err) = SpawnAgentHandler.handle(invocation).await else { panic!("spawn should fail when depth limit exceeded"); @@ -1904,13 +2310,21 @@ async fn spawn_agent_allows_depth_up_to_configured_max_depth() { let (mut session, mut turn) = make_session_and_context().await; let manager = thread_manager(); + let root = manager + .start_thread( + (*turn.config).clone(), + thread_store_from_config(turn.config.as_ref()), + ) + .await + .expect("root thread should start"); session.services.agent_control = manager.agent_control(); + session.conversation_id = root.thread_id; let mut config = (*turn.config).clone(); config.agent_max_depth = DEFAULT_AGENT_MAX_DEPTH + 1; turn.config = Arc::new(config); turn.session_source = SessionSource::SubAgent(SubAgentSource::ThreadSpawn { - parent_thread_id: session.conversation_id, + parent_thread_id: root.thread_id, depth: DEFAULT_AGENT_MAX_DEPTH, agent_path: None, agent_nickname: None, @@ -1921,7 +2335,7 @@ async fn spawn_agent_allows_depth_up_to_configured_max_depth() { Arc::new(session), Arc::new(turn), "spawn_agent", - function_payload(json!({"message": "hello"})), + function_payload(json!({"message": "hello", "fork_context": false})), ); let output = SpawnAgentHandler .handle(invocation) @@ -2051,7 +2465,10 @@ async fn send_input_rejects_invalid_id() { let FunctionCallError::RespondToModel(msg) = err else { panic!("expected respond-to-model error"); }; - assert!(msg.starts_with("invalid agent id not-a-uuid:")); + assert_eq!( + msg, + "agent_name must use only lowercase letters, digits, and underscores" + ); } #[tokio::test] @@ -2399,7 +2816,7 @@ async fn wait_agent_rejects_invalid_target() { let FunctionCallError::RespondToModel(msg) = err else { panic!("expected respond-to-model error"); }; - assert!(msg.starts_with("invalid agent id invalid:")); + assert!(msg.contains("invalid")); } #[tokio::test] @@ -2416,7 +2833,7 @@ async fn wait_agent_rejects_empty_targets() { }; assert_eq!( err, - FunctionCallError::RespondToModel("agent ids must be non-empty".to_string()) + FunctionCallError::RespondToModel("agent targets must be non-empty".to_string()) ); } @@ -3238,6 +3655,119 @@ async fn close_agent_submits_shutdown_and_returns_previous_status() { assert_eq!(status_after, AgentStatus::NotFound); } +#[tokio::test] +async fn watchdog_self_close_rejects_non_watchdog_thread() { + let (mut session, turn) = make_session_and_context().await; + let manager = thread_manager(); + let agent_control = manager.agent_control(); + let thread = manager + .start_thread( + turn.config.as_ref().clone(), + thread_store_from_config(turn.config.as_ref()), + ) + .await + .expect("thread should start"); + session.services.agent_control = agent_control.clone(); + session.conversation_id = thread.thread_id; + + let err = WatchdogSelfCloseHandler + .handle(invocation( + Arc::new(session), + Arc::new(turn), + "watchdog_self_close", + function_payload(json!({})), + )) + .await + .expect_err("non-watchdog threads should be rejected"); + + assert_eq!( + err, + FunctionCallError::RespondToModel( + "watchdog_self_close is only available in watchdog check-in threads.".to_string(), + ) + ); +} + +#[tokio::test] +async fn watchdog_self_close_closes_watchdog_helper_and_returns_previous_status() { + let (mut session, turn) = make_session_and_context().await; + let manager = thread_manager(); + let agent_control = manager.agent_control(); + session.services.agent_control = agent_control.clone(); + let helper_thread_id = + attach_watchdog_helper_for_tests(&manager, &agent_control, turn.config.as_ref()).await; + session.conversation_id = helper_thread_id; + let status_before = agent_control.get_status(helper_thread_id).await; + + let output = WatchdogSelfCloseHandler + .handle(invocation( + Arc::new(session), + Arc::new(turn), + "watchdog_self_close", + function_payload(json!({})), + )) + .await + .expect("watchdog helper should be allowed to self-close"); + let (content, success) = expect_text_output(output); + let result: WatchdogSelfCloseResult = + serde_json::from_str(&content).expect("watchdog self-close result should be json"); + + assert_eq!( + result, + WatchdogSelfCloseResult { + previous_status: status_before, + } + ); + assert_eq!(success, Some(true)); + assert_eq!( + agent_control.get_status(helper_thread_id).await, + AgentStatus::NotFound + ); +} + +#[tokio::test] +async fn multi_agent_v2_watchdog_self_close_closes_watchdog_helper_and_returns_previous_status() { + let (mut session, mut turn) = make_session_and_context().await; + let manager = thread_manager(); + let agent_control = manager.agent_control(); + session.services.agent_control = agent_control.clone(); + let mut config = turn.config.as_ref().clone(); + config + .features + .enable(Feature::MultiAgentV2) + .expect("test config should allow feature update"); + let helper_thread_id = + attach_watchdog_helper_for_tests(&manager, &agent_control, &config).await; + session.conversation_id = helper_thread_id; + turn.config = Arc::new(config); + let status_before = agent_control.get_status(helper_thread_id).await; + + let output = WatchdogSelfCloseHandlerV2 + .handle(invocation( + Arc::new(session), + Arc::new(turn), + "watchdog_self_close", + function_payload(json!({})), + )) + .await + .expect("watchdog helper should be allowed to self-close"); + let (content, success) = expect_text_output(output); + let result: WatchdogSelfCloseResult = + serde_json::from_str(&content).expect("watchdog self-close result should be json"); + + assert_eq!( + result, + WatchdogSelfCloseResult { + previous_status: status_before, + } + ); + assert_eq!(success, Some(true)); + assert_eq!( + agent_control.get_status(helper_thread_id).await, + AgentStatus::NotFound + ); +} + #[tokio::test] async fn tool_handlers_cascade_close_and_resume_and_keep_explicitly_closed_subtrees_closed() { let (_session, turn) = make_session_and_context().await; @@ -3497,6 +4027,7 @@ async fn build_agent_spawn_config_uses_turn_context_values() { let config = build_agent_spawn_config(&base_instructions, &turn).expect("spawn config"); let mut expected = (*turn.config).clone(); + expected.features = config.features.clone(); expected.base_instructions = Some(base_instructions.text); expected.model = Some(turn.model_info.slug.clone()); expected.model_provider = turn.provider.info().clone(); diff --git a/codex-rs/core/src/tools/handlers/multi_agents_v2.rs b/codex-rs/core/src/tools/handlers/multi_agents_v2.rs index b561c5acb4..c15746f9fa 100644 --- a/codex-rs/core/src/tools/handlers/multi_agents_v2.rs +++ b/codex-rs/core/src/tools/handlers/multi_agents_v2.rs @@ -15,7 +15,6 @@ use codex_protocol::models::ResponseInputItem; use codex_protocol::openai_models::ReasoningEffort; use codex_protocol::protocol::CollabAgentInteractionBeginEvent; use codex_protocol::protocol::CollabAgentInteractionEndEvent; -use codex_protocol::protocol::CollabAgentSpawnBeginEvent; use codex_protocol::protocol::CollabAgentSpawnEndEvent; use codex_protocol::protocol::CollabCloseBeginEvent; use codex_protocol::protocol::CollabCloseEndEvent; @@ -32,6 +31,8 @@ pub(crate) use list_agents::Handler as ListAgentsHandler; pub(crate) use send_message::Handler as SendMessageHandler; pub(crate) use spawn::Handler as SpawnAgentHandler; pub(crate) use wait::Handler as WaitAgentHandler; +#[cfg(test)] +pub(crate) use watchdog_self_close::Handler as WatchdogSelfCloseHandlerV2; mod close_agent; mod followup_task; @@ -40,3 +41,4 @@ mod message_tool; mod send_message; mod spawn; pub(crate) mod wait; +mod watchdog_self_close; diff --git a/codex-rs/core/src/tools/handlers/multi_agents_v2/list_agents.rs b/codex-rs/core/src/tools/handlers/multi_agents_v2/list_agents.rs index 579c441993..5701ff7ecf 100644 --- a/codex-rs/core/src/tools/handlers/multi_agents_v2/list_agents.rs +++ b/codex-rs/core/src/tools/handlers/multi_agents_v2/list_agents.rs @@ -30,7 +30,7 @@ impl ToolHandler for Handler { let agents = session .services .agent_control - .list_agents(&turn.session_source, args.path_prefix.as_deref()) + .list_agents_by_path(&turn.session_source, args.path_prefix.as_deref()) .await .map_err(collab_spawn_error)?; diff --git a/codex-rs/core/src/tools/handlers/multi_agents_v2/spawn.rs b/codex-rs/core/src/tools/handlers/multi_agents_v2/spawn.rs index 26b6750c46..a060e4be4c 100644 --- a/codex-rs/core/src/tools/handlers/multi_agents_v2/spawn.rs +++ b/codex-rs/core/src/tools/handlers/multi_agents_v2/spawn.rs @@ -5,8 +5,10 @@ use crate::agent::control::render_input_preview; use crate::agent::next_thread_spawn_depth; use crate::agent::role::DEFAULT_ROLE_NAME; use crate::agent::role::apply_role_to_config; +use crate::agent::role::default_fork_context_for_role; use crate::session::turn_context::TurnEnvironment; use codex_protocol::AgentPath; +use codex_protocol::protocol::CollabAgentSpawnBeginEvent; use codex_protocol::protocol::InterAgentCommunication; use codex_protocol::protocol::Op; @@ -33,12 +35,13 @@ impl ToolHandler for Handler { } = invocation; let arguments = function_arguments(payload)?; let args: SpawnAgentArgs = parse_arguments(&arguments)?; - let fork_mode = args.fork_mode()?; let role_name = args .agent_type .as_deref() .map(str::trim) .filter(|role| !role.is_empty()); + let fork_context = default_fork_context_for_role(&turn.config, role_name); + let fork_mode = args.fork_mode(fork_context)?; let initial_operation = parse_collab_input(Some(args.message), /*items*/ None)?; let prompt = render_input_preview(&initial_operation); @@ -58,7 +61,7 @@ impl ToolHandler for Handler { .into(), ) .await; - let mut config = + let config = build_agent_spawn_config(&session.get_base_instructions().await, turn.as_ref())?; if matches!(fork_mode, Some(SpawnAgentForkMode::FullHistory)) { reject_full_fork_spawn_overrides( @@ -66,22 +69,7 @@ impl ToolHandler for Handler { args.model.as_deref(), args.reasoning_effort, )?; - } else { - apply_requested_spawn_agent_model_overrides( - &session, - turn.as_ref(), - &mut config, - args.model.as_deref(), - args.reasoning_effort, - ) - .await?; - apply_role_to_config(&mut config, role_name) - .await - .map_err(FunctionCallError::RespondToModel)?; } - apply_spawn_agent_runtime_overrides(&mut config, turn.as_ref())?; - apply_spawn_agent_overrides(&mut config, child_depth); - let spawn_source = thread_spawn_source( session.conversation_id, &turn.session_source, @@ -89,53 +77,159 @@ impl ToolHandler for Handler { role_name, Some(args.task_name.clone()), )?; - let result = session - .services - .agent_control - .spawn_agent_with_metadata( - config, - match (spawn_source.get_agent_path(), initial_operation) { - (Some(recipient), Op::UserInput { items, .. }) - if items - .iter() - .all(|item| matches!(item, UserInput::Text { .. })) => - { - Op::InterAgentCommunication { - communication: InterAgentCommunication::new( - turn.session_source - .get_agent_path() - .unwrap_or_else(AgentPath::root), - recipient, - Vec::new(), - prompt.clone(), - /*trigger_turn*/ true, - ), - } - } - (_, initial_operation) => initial_operation, - }, - Some(spawn_source), - SpawnAgentOptions { - fork_parent_spawn_call_id: fork_mode.as_ref().map(|_| call_id.clone()), - fork_mode, - environments: Some( - turn.environments - .iter() - .map(TurnEnvironment::selection) - .collect(), + let initial_agent_op = match (spawn_source.get_agent_path(), initial_operation) { + (Some(recipient), Op::UserInput { items, .. }) + if items + .iter() + .all(|item| matches!(item, UserInput::Text { .. })) => + { + Op::InterAgentCommunication { + communication: InterAgentCommunication::new( + turn.session_source + .get_agent_path() + .unwrap_or_else(AgentPath::root), + recipient, + Vec::new(), + prompt.clone(), + /*trigger_turn*/ true, ), - }, - ) - .await - .map_err(collab_spawn_error); - let (new_thread_id, new_agent_metadata, status) = match &result { - Ok(spawned_agent) => ( - Some(spawned_agent.thread_id), - Some(spawned_agent.metadata.clone()), - spawned_agent.status.clone(), - ), - Err(_) => (None, None, AgentStatus::NotFound), + } + } + (_, initial_operation) => initial_operation, }; + let mut candidates_to_try = collect_spawn_agent_model_candidates( + args.model_fallback_list.as_ref(), + args.model.as_deref(), + args.reasoning_effort, + ); + if candidates_to_try.is_empty() { + candidates_to_try.push(SpawnAgentModelCandidate { + model: None, + reasoning_effort: None, + }); + } + + let mut spawn_result = None; + for (idx, candidate) in candidates_to_try.iter().enumerate() { + let attempt_call_id = spawn_attempt_event_call_id(&call_id, idx); + let candidate_model = candidate.model.clone().unwrap_or_default(); + let candidate_reasoning_effort = candidate.reasoning_effort.unwrap_or_default(); + send_collab_agent_spawn_begin_event( + &session, + &turn, + attempt_call_id.clone(), + prompt.clone(), + candidate_model.clone(), + candidate_reasoning_effort, + ) + .await; + let mut candidate_config = config.clone(); + if !matches!(fork_mode, Some(SpawnAgentForkMode::FullHistory)) { + apply_requested_spawn_agent_model_overrides( + &session, + turn.as_ref(), + &mut candidate_config, + candidate.model.as_deref(), + candidate.reasoning_effort, + ) + .await?; + } + apply_role_to_config(&mut candidate_config, role_name) + .await + .map_err(FunctionCallError::RespondToModel)?; + if matches!(fork_mode, Some(SpawnAgentForkMode::FullHistory)) { + restore_forked_spawn_agent_model_config(&mut candidate_config, turn.as_ref()); + } + apply_spawn_agent_runtime_overrides(&mut candidate_config, turn.as_ref())?; + apply_spawn_agent_overrides(&mut candidate_config, child_depth); + let attempt_result = session + .services + .agent_control + .spawn_agent_with_metadata( + candidate_config, + initial_agent_op.clone(), + Some(spawn_source.clone()), + SpawnAgentOptions { + fork_parent_spawn_call_id: fork_mode.as_ref().map(|_| call_id.clone()), + fork_mode: fork_mode.clone(), + environments: Some( + turn.environments + .iter() + .map(TurnEnvironment::selection) + .collect(), + ), + }, + ) + .await; + match attempt_result { + Ok(spawned_agent) => { + let status = if idx + 1 < candidates_to_try.len() { + match probe_spawn_attempt_for_async_quota_exhaustion( + spawned_agent.status.clone(), + spawned_agent.thread_id, + &session.services.agent_control, + ) + .await + { + SpawnAttemptRetryDecision::Accept(status) => status, + SpawnAttemptRetryDecision::Retry(retry_status) => { + match close_quota_exhausted_spawn_attempt( + &session.services.agent_control, + spawned_agent.thread_id, + retry_status, + ) + .await + { + SpawnAttemptRetryDecision::Accept(status) => status, + SpawnAttemptRetryDecision::Retry(status) => { + send_collab_agent_spawn_retry_preempted_event( + &session, + &turn, + attempt_call_id, + prompt.clone(), + candidate_model, + candidate_reasoning_effort, + status, + ) + .await; + continue; + } + } + } + } + } else { + spawned_agent.status.clone() + }; + spawn_result = Some((spawned_agent, status, attempt_call_id)); + break; + } + Err(err) => { + send_collab_agent_spawn_error_event( + &session, + &turn, + attempt_call_id, + prompt.clone(), + candidate_model, + candidate_reasoning_effort, + &err, + ) + .await; + if spawn_should_retry_on_quota_exhaustion(&err) + && idx + 1 < candidates_to_try.len() + { + continue; + } + return Err(collab_spawn_error(err)); + } + } + } + let Some((spawned_agent, status, spawn_event_call_id)) = spawn_result else { + return Err(FunctionCallError::RespondToModel( + "No spawn attempts were executed".to_string(), + )); + }; + let new_thread_id = Some(spawned_agent.thread_id); + let new_agent_metadata = Some(spawned_agent.metadata.clone()); let agent_snapshot = match new_thread_id { Some(thread_id) => { session @@ -173,7 +267,7 @@ impl ToolHandler for Handler { .send_event( &turn, CollabAgentSpawnEndEvent { - call_id, + call_id: spawn_event_call_id, sender_thread_id: session.conversation_id, new_thread_id, new_agent_nickname, @@ -186,7 +280,6 @@ impl ToolHandler for Handler { .into(), ) .await; - let _ = result?; let role_tag = role_name.unwrap_or(DEFAULT_ROLE_NAME); turn.session_telemetry.counter( "codex.multi_agent.spawn", @@ -218,25 +311,31 @@ struct SpawnAgentArgs { task_name: String, agent_type: Option, model: Option, + model_fallback_list: Option>, reasoning_effort: Option, fork_turns: Option, fork_context: Option, } impl SpawnAgentArgs { - fn fork_mode(&self) -> Result, FunctionCallError> { + fn fork_mode( + &self, + default_fork_context: bool, + ) -> Result, FunctionCallError> { if self.fork_context.is_some() { return Err(FunctionCallError::RespondToModel( "fork_context is not supported in MultiAgentV2; use fork_turns instead".to_string(), )); } - let fork_turns = self + let Some(fork_turns) = self .fork_turns .as_deref() .map(str::trim) .filter(|fork_turns| !fork_turns.is_empty()) - .unwrap_or("all"); + else { + return Ok(default_fork_context.then_some(SpawnAgentForkMode::FullHistory)); + }; if fork_turns.eq_ignore_ascii_case("none") { return Ok(None); diff --git a/codex-rs/core/src/tools/handlers/multi_agents_v2/watchdog_self_close.rs b/codex-rs/core/src/tools/handlers/multi_agents_v2/watchdog_self_close.rs new file mode 100644 index 0000000000..5da954e418 --- /dev/null +++ b/codex-rs/core/src/tools/handlers/multi_agents_v2/watchdog_self_close.rs @@ -0,0 +1,149 @@ +use super::*; + +pub(crate) struct Handler; + +impl ToolHandler for Handler { + type Output = WatchdogSelfCloseResult; + + fn kind(&self) -> ToolKind { + ToolKind::Function + } + + fn matches_kind(&self, payload: &ToolPayload) -> bool { + matches!(payload, ToolPayload::Function { .. }) + } + + async fn handle(&self, invocation: ToolInvocation) -> Result { + let ToolInvocation { + session, + turn, + payload, + call_id, + .. + } = invocation; + let arguments = function_arguments(payload)?; + let _args: WatchdogSelfCloseArgs = parse_arguments(&arguments)?; + + let helper_thread_id = session.conversation_id; + if session + .services + .agent_control + .watchdog_owner_for_active_helper(helper_thread_id) + .await + .is_none() + { + return Err(FunctionCallError::RespondToModel( + "watchdog_self_close is only available in watchdog check-in threads.".to_string(), + )); + }; + + let receiver_agent = session + .services + .agent_control + .get_agent_metadata(helper_thread_id) + .unwrap_or_default(); + + session + .send_event( + &turn, + CollabCloseBeginEvent { + call_id: call_id.clone(), + sender_thread_id: helper_thread_id, + receiver_thread_id: helper_thread_id, + } + .into(), + ) + .await; + + let status = match session + .services + .agent_control + .subscribe_status(helper_thread_id) + .await + { + Ok(mut status_rx) => status_rx.borrow_and_update().clone(), + Err(err) => { + let status = session + .services + .agent_control + .get_status(helper_thread_id) + .await; + session + .send_event( + &turn, + CollabCloseEndEvent { + call_id: call_id.clone(), + sender_thread_id: helper_thread_id, + receiver_thread_id: helper_thread_id, + receiver_agent_nickname: receiver_agent.agent_nickname.clone(), + receiver_agent_role: receiver_agent.agent_role.clone(), + status, + } + .into(), + ) + .await; + return Err(collab_agent_error(helper_thread_id, err)); + } + }; + + let result = session + .services + .agent_control + .close_agent(helper_thread_id) + .await + .map_err(|err| collab_agent_error(helper_thread_id, err)) + .map(|_| ()); + + let receiver_agent = session + .services + .agent_control + .get_agent_metadata(helper_thread_id) + .unwrap_or_default(); + session + .send_event( + &turn, + CollabCloseEndEvent { + call_id, + sender_thread_id: helper_thread_id, + receiver_thread_id: helper_thread_id, + receiver_agent_nickname: receiver_agent.agent_nickname, + receiver_agent_role: receiver_agent.agent_role, + status: status.clone(), + } + .into(), + ) + .await; + + result?; + + Ok(WatchdogSelfCloseResult { + previous_status: status, + }) + } +} + +#[derive(Debug, Deserialize)] +struct WatchdogSelfCloseArgs {} + +#[derive(Debug, Serialize)] +pub(crate) struct WatchdogSelfCloseResult { + previous_status: AgentStatus, +} + +impl ToolOutput for WatchdogSelfCloseResult { + fn log_preview(&self) -> String { + tool_output_json_text(self, "watchdog_self_close") + } + + fn success_for_logging(&self) -> bool { + true + } + + fn to_response_item(&self, call_id: &str, payload: &ToolPayload) -> ResponseInputItem { + tool_output_response_item(call_id, payload, self, Some(true), "watchdog_self_close") + } + + fn code_mode_result(&self, _payload: &ToolPayload) -> JsonValue { + tool_output_code_mode_result(self, "watchdog_self_close") + } +} diff --git a/codex-rs/core/src/tools/spec.rs b/codex-rs/core/src/tools/spec.rs index c167fdbc3c..f5ba19187a 100644 --- a/codex-rs/core/src/tools/spec.rs +++ b/codex-rs/core/src/tools/spec.rs @@ -25,7 +25,6 @@ use codex_tools::build_tool_registry_plan; use std::collections::HashMap; use std::collections::HashSet; use std::sync::Arc; - pub(crate) fn tool_user_shell_type(user_shell: &Shell) -> ToolUserShellType { match user_shell.shell_type { ShellType::Zsh => ToolUserShellType::Zsh, @@ -96,10 +95,13 @@ pub(crate) fn build_specs_with_discoverable_tools( use crate::tools::handlers::UnifiedExecHandler; use crate::tools::handlers::ViewImageHandler; use crate::tools::handlers::multi_agents::CloseAgentHandler; + use crate::tools::handlers::multi_agents::CompactParentContextHandler; + use crate::tools::handlers::multi_agents::ListAgentsHandler; use crate::tools::handlers::multi_agents::ResumeAgentHandler; use crate::tools::handlers::multi_agents::SendInputHandler; use crate::tools::handlers::multi_agents::SpawnAgentHandler; use crate::tools::handlers::multi_agents::WaitAgentHandler; + use crate::tools::handlers::multi_agents::WatchdogSelfCloseHandler; use crate::tools::handlers::multi_agents_v2::CloseAgentHandler as CloseAgentHandlerV2; use crate::tools::handlers::multi_agents_v2::FollowupTaskHandler as FollowupTaskHandlerV2; use crate::tools::handlers::multi_agents_v2::ListAgentsHandler as ListAgentsHandlerV2; @@ -208,6 +210,9 @@ pub(crate) fn build_specs_with_discoverable_tools( ToolHandlerKind::CloseAgentV2 => { builder.register_handler(handler.name, Arc::new(CloseAgentHandlerV2)); } + ToolHandlerKind::CompactParentContext => { + builder.register_handler(handler.name, Arc::new(CompactParentContextHandler)); + } ToolHandlerKind::CodeModeExecute => { builder.register_handler(handler.name, code_mode_handler.clone()); } @@ -223,6 +228,9 @@ pub(crate) fn build_specs_with_discoverable_tools( ToolHandlerKind::Goal => { builder.register_handler(handler.name, goal_handler.clone()); } + ToolHandlerKind::ListAgentsV1 => { + builder.register_handler(handler.name, Arc::new(ListAgentsHandler)); + } ToolHandlerKind::ListAgentsV2 => { builder.register_handler(handler.name, Arc::new(ListAgentsHandlerV2)); } @@ -296,6 +304,9 @@ pub(crate) fn build_specs_with_discoverable_tools( ToolHandlerKind::WaitAgentV2 => { builder.register_handler(handler.name, Arc::new(WaitAgentHandlerV2)); } + ToolHandlerKind::WatchdogSelfClose => { + builder.register_handler(handler.name, Arc::new(WatchdogSelfCloseHandler)); + } } } if let Some(deferred_mcp_tools) = deferred_mcp_tools.as_ref() { diff --git a/codex-rs/core/src/tools/spec_tests.rs b/codex-rs/core/src/tools/spec_tests.rs index f3ba9cc48d..4275807d25 100644 --- a/codex-rs/core/src/tools/spec_tests.rs +++ b/codex-rs/core/src/tools/spec_tests.rs @@ -13,17 +13,23 @@ use codex_models_manager::model_info::with_config_overrides; use codex_protocol::config_types::WebSearchMode; use codex_protocol::config_types::WindowsSandboxLevel; use codex_protocol::models::PermissionProfile; +use codex_protocol::models::VIEW_IMAGE_TOOL_NAME; use codex_protocol::openai_models::ConfigShellToolType; use codex_protocol::openai_models::ModelInfo; use codex_protocol::protocol::SessionSource; +use codex_protocol::protocol::SubAgentSource; use codex_tools::AdditionalProperties; +use codex_tools::CommandToolOptions; use codex_tools::ConfiguredToolSpec; use codex_tools::DiscoverableTool; +use codex_tools::FreeformTool; use codex_tools::JsonSchema; use codex_tools::LoadableToolSpec; +use codex_tools::ResponsesApiNamespace; use codex_tools::ResponsesApiNamespaceTool; use codex_tools::ResponsesApiTool; use codex_tools::ShellCommandBackendConfig; +use codex_tools::SpawnAgentToolOptions; use codex_tools::TOOL_SEARCH_TOOL_NAME; use codex_tools::TOOL_SUGGEST_TOOL_NAME; use codex_tools::ToolName; @@ -31,12 +37,37 @@ use codex_tools::ToolSpec; use codex_tools::ToolsConfig; use codex_tools::ToolsConfigParams; use codex_tools::UnifiedExecShellMode; +use codex_tools::ViewImageToolOptions; +use codex_tools::WaitAgentTimeoutOptions; use codex_tools::ZshForkConfig; +use codex_tools::create_apply_patch_freeform_tool; +use codex_tools::create_close_agent_tool_v1; +use codex_tools::create_close_agent_tool_v2; +use codex_tools::create_compact_parent_context_tool; +use codex_tools::create_exec_command_tool; +use codex_tools::create_list_agents_tool; +use codex_tools::create_list_agents_tool_v1; +use codex_tools::create_request_permissions_tool; +use codex_tools::create_request_user_input_tool; +use codex_tools::create_resume_agent_tool; +use codex_tools::create_send_input_tool_v1; +use codex_tools::create_send_message_tool; +use codex_tools::create_spawn_agent_tool_v1; +use codex_tools::create_spawn_agent_tool_v2; +use codex_tools::create_update_plan_tool; +use codex_tools::create_view_image_tool; +use codex_tools::create_wait_agent_tool_v1; +use codex_tools::create_wait_agent_tool_v2; +use codex_tools::create_watchdog_self_close_tool; +use codex_tools::create_write_stdin_tool; use codex_tools::mcp_call_tool_result_output_schema; use codex_tools::mcp_tool_to_deferred_responses_api_tool; +use codex_tools::request_permissions_tool_description; +use codex_tools::request_user_input_tool_description; use codex_utils_absolute_path::AbsolutePathBuf; use core_test_support::assert_regex_match; use pretty_assertions::assert_eq; +use serde_json::json; use std::collections::BTreeMap; use std::path::PathBuf; @@ -193,10 +224,171 @@ fn shell_tool_name(config: &ToolsConfig) -> Option<&'static str> { } fn find_tool<'a>(tools: &'a [ConfiguredToolSpec], expected_name: &str) -> &'a ConfiguredToolSpec { - tools + if let Some(tool) = tools.iter().find(|tool| tool.name() == expected_name) { + return tool; + } + for tool in tools { + let ToolSpec::Namespace(namespace) = &tool.spec else { + continue; + }; + if let Some(tool) = namespace.tools.iter().find_map(|tool| match tool { + ResponsesApiNamespaceTool::Function(tool) if tool.name == expected_name => { + Some(tool.clone()) + } + _ => None, + }) { + return Box::leak(Box::new(ConfiguredToolSpec::new( + ToolSpec::Function(tool), + /*supports_parallel_tool_calls*/ false, + ))); + } + } + panic!("expected tool {expected_name}") +} + +fn find_namespaced_tool( + tools: &[ConfiguredToolSpec], + namespace_name: &str, + expected_name: &str, +) -> ConfiguredToolSpec { + let namespace = tools .iter() - .find(|tool| tool.name() == expected_name) - .unwrap_or_else(|| panic!("expected tool {expected_name}")) + .find_map(|tool| match &tool.spec { + ToolSpec::Namespace(namespace) if namespace.name == namespace_name => Some(namespace), + _ => None, + }) + .unwrap_or_else(|| panic!("expected namespace {namespace_name}")); + + let tool = namespace + .tools + .iter() + .find_map(|tool| match tool { + ResponsesApiNamespaceTool::Function(tool) if tool.name == expected_name => { + Some(tool.clone()) + } + _ => None, + }) + .unwrap_or_else(|| panic!("expected tool {expected_name} in {namespace_name}")); + + ConfiguredToolSpec::new( + ToolSpec::Function(tool), + /*supports_parallel_tool_calls*/ false, + ) +} + +fn assert_lacks_tool_name(tools: &[ConfiguredToolSpec], expected_absent: &str) { + let names = tools + .iter() + .map(ConfiguredToolSpec::name) + .collect::>(); + assert!( + !names.contains(&expected_absent), + "expected tool {expected_absent} to be absent; had: {names:?}" + ); +} + +fn assert_contains_top_level_tool_name(tools: &[ConfiguredToolSpec], expected: &str) { + let names = tools + .iter() + .map(ConfiguredToolSpec::name) + .collect::>(); + assert!( + names.contains(&expected), + "expected top-level tool {expected} to be present; had: {names:?}" + ); +} + +fn assert_lacks_top_level_tool_name(tools: &[ConfiguredToolSpec], expected_absent: &str) { + let names = tools + .iter() + .map(ConfiguredToolSpec::name) + .collect::>(); + assert!( + !names.contains(&expected_absent), + "expected top-level tool {expected_absent} to be absent; had: {names:?}" + ); +} + +fn request_user_input_tool_spec(default_mode_request_user_input: bool) -> ToolSpec { + create_request_user_input_tool(request_user_input_tool_description( + default_mode_request_user_input, + )) +} + +fn spawn_agent_tool_options(config: &ToolsConfig) -> SpawnAgentToolOptions<'_> { + SpawnAgentToolOptions { + available_models: &config.available_models, + agent_type_description: if config.agent_type_description.is_empty() { + crate::agent::role::spawn_tool_spec::build(&std::collections::BTreeMap::new()) + } else { + config.agent_type_description.clone() + }, + hide_agent_type_model_reasoning: config.hide_spawn_agent_metadata, + include_usage_hint: config.spawn_agent_usage_hint, + usage_hint_text: config.spawn_agent_usage_hint_text.clone(), + max_concurrent_threads_per_session: config.max_concurrent_threads_per_session, + } +} + +fn wait_agent_timeout_options() -> WaitAgentTimeoutOptions { + WaitAgentTimeoutOptions { + default_timeout_ms: DEFAULT_WAIT_TIMEOUT_MS, + min_timeout_ms: MIN_WAIT_TIMEOUT_MS, + max_timeout_ms: MAX_WAIT_TIMEOUT_MS, + } +} + +fn create_watchdog_tools_namespace(tools: Vec) -> ToolSpec { + let tools = tools + .into_iter() + .map(|tool| match tool { + ToolSpec::Function(tool) => ResponsesApiNamespaceTool::Function(tool), + ToolSpec::Namespace(_) + | ToolSpec::Freeform(_) + | ToolSpec::LocalShell {} + | ToolSpec::ImageGeneration { .. } + | ToolSpec::ToolSearch { .. } + | ToolSpec::WebSearch { .. } => { + panic!("watchdog namespace can only contain function tools") + } + }) + .collect(); + ToolSpec::Namespace(ResponsesApiNamespace { + name: "watchdog".to_string(), + description: + "Watchdog-only tools for parent-thread recovery and watchdog check-in lifecycle control." + .to_string(), + tools, + }) +} + +fn strip_descriptions_schema(schema: &mut JsonSchema) { + schema.description = None; + if let Some(items) = &mut schema.items { + strip_descriptions_schema(items); + } + if let Some(properties) = &mut schema.properties { + for value in properties.values_mut() { + strip_descriptions_schema(value); + } + } + if let Some(AdditionalProperties::Schema(schema)) = &mut schema.additional_properties { + strip_descriptions_schema(schema); + } +} + +fn strip_descriptions_tool(spec: &mut ToolSpec) { + match spec { + ToolSpec::ToolSearch { parameters, .. } => strip_descriptions_schema(parameters), + ToolSpec::Function(ResponsesApiTool { parameters, .. }) => { + strip_descriptions_schema(parameters); + } + ToolSpec::Namespace(_) + | ToolSpec::Freeform(FreeformTool { .. }) + | ToolSpec::LocalShell {} + | ToolSpec::ImageGeneration { .. } + | ToolSpec::WebSearch { .. } => {} + } } fn find_namespace_function_tool<'a>( @@ -323,6 +515,657 @@ async fn model_provided_unified_exec_is_blocked_for_windows_sandboxed_policies() assert_eq!(config.shell_type, expected_shell_type); } +#[tokio::test] +async fn test_full_toolset_specs_for_gpt5_codex_unified_exec_web_search() { + let model_info = model_info_from_models_json("gpt-5-codex").await; + let mut features = Features::with_defaults(); + features.enable(Feature::UnifiedExec); + let available_models = Vec::new(); + let config = ToolsConfig::new(&ToolsConfigParams { + model_info: &model_info, + available_models: &available_models, + features: &features, + image_generation_tool_auth_allowed: true, + web_search_mode: Some(WebSearchMode::Live), + session_source: SessionSource::Cli, + permission_profile: &PermissionProfile::Disabled, + windows_sandbox_level: WindowsSandboxLevel::Disabled, + }); + let (tools, _) = build_specs( + &config, + /*mcp_tools*/ None, + /*app_tools*/ None, + &[], + ) + .build(); + + // Build actual map name -> spec + use std::collections::BTreeMap; + use std::collections::HashSet; + let mut actual: BTreeMap = BTreeMap::from([]); + let mut duplicate_names = Vec::new(); + for t in &tools { + let name = t.name().to_string(); + if actual.insert(name.clone(), t.spec.clone()).is_some() { + duplicate_names.push(name); + } + } + assert!( + duplicate_names.is_empty(), + "duplicate tool entries detected: {duplicate_names:?}" + ); + + // Build expected from the same helpers used by the builder. + let mut expected: BTreeMap = BTreeMap::from([]); + for spec in [ + create_exec_command_tool(CommandToolOptions { + allow_login_shell: true, + exec_permission_approvals_enabled: false, + }), + create_write_stdin_tool(), + create_update_plan_tool(), + request_user_input_tool_spec(/*default_mode_request_user_input*/ false), + create_apply_patch_freeform_tool(), + ToolSpec::WebSearch { + external_web_access: Some(true), + filters: None, + user_location: None, + search_context_size: None, + search_content_types: None, + }, + create_view_image_tool(ViewImageToolOptions { + can_request_original_image_detail: config.can_request_original_image_detail, + }), + ] { + expected.insert(spec.name().to_string(), spec); + } + let mut collab_specs = if config.multi_agent_v2 { + vec![ + create_spawn_agent_tool_v2(spawn_agent_tool_options(&config)), + create_send_message_tool(), + create_wait_agent_tool_v2(wait_agent_timeout_options()), + create_close_agent_tool_v2(), + create_list_agents_tool(), + ] + } else { + let mut collab_specs = vec![ + create_spawn_agent_tool_v1(spawn_agent_tool_options(&config)), + create_send_input_tool_v1(), + create_resume_agent_tool(), + create_wait_agent_tool_v1(wait_agent_timeout_options()), + create_close_agent_tool_v1(), + ]; + if config.agent_watchdog { + collab_specs.push(create_list_agents_tool_v1(config.agent_watchdog)); + } + collab_specs + }; + for spec in collab_specs.split_off(0) { + expected.insert(spec.name().to_string(), spec); + } + if config.agent_watchdog { + let spec = create_watchdog_tools_namespace(vec![ + create_compact_parent_context_tool(), + create_watchdog_self_close_tool(), + ]); + expected.insert(spec.name().to_string(), spec); + } + + if config.exec_permission_approvals_enabled { + let spec = create_request_permissions_tool(request_permissions_tool_description()); + expected.insert(spec.name().to_string(), spec); + } + + // Exact name set match — this is the only test allowed to fail when tools change. + let actual_names: HashSet<_> = actual.keys().cloned().collect(); + let expected_names: HashSet<_> = expected.keys().cloned().collect(); + assert_eq!(actual_names, expected_names, "tool name set mismatch"); + + // Compare specs ignoring human-readable descriptions. + for name in expected.keys() { + let mut a = actual.get(name).expect("present").clone(); + let mut e = expected.get(name).expect("present").clone(); + strip_descriptions_tool(&mut a); + strip_descriptions_tool(&mut e); + assert_eq!(a, e, "spec mismatch for {name}"); + } +} + +#[tokio::test] +async fn test_build_specs_collab_tools_enabled() { + let config = test_config().await; + let model_info = construct_model_info_offline("gpt-5-codex", &config); + let mut features = Features::with_defaults(); + features.enable(Feature::Collab); + let available_models = Vec::new(); + let tools_config = ToolsConfig::new(&ToolsConfigParams { + model_info: &model_info, + available_models: &available_models, + features: &features, + image_generation_tool_auth_allowed: true, + web_search_mode: Some(WebSearchMode::Cached), + session_source: SessionSource::Cli, + permission_profile: &PermissionProfile::Disabled, + windows_sandbox_level: WindowsSandboxLevel::Disabled, + }); + let (tools, _) = build_specs( + &tools_config, + /*mcp_tools*/ None, + /*app_tools*/ None, + &[], + ) + .build(); + assert_contains_tool_names( + &tools, + &["spawn_agent", "send_input", "wait_agent", "close_agent"], + ); + assert_lacks_tool_name(&tools, "spawn_agents_on_csv"); + assert_lacks_tool_name(&tools, "list_agents"); + assert_lacks_tool_name(&tools, "watchdog"); +} + +#[tokio::test] +async fn test_build_specs_watchdog_collab_tools_include_self_close_tool() { + let config = test_config().await; + let model_info = construct_model_info_offline("gpt-5-codex", &config); + let mut features = Features::with_defaults(); + features.enable(Feature::Collab); + features.enable(Feature::AgentWatchdog); + features.normalize_dependencies(); + let available_models = Vec::new(); + let tools_config = ToolsConfig::new(&ToolsConfigParams { + model_info: &model_info, + available_models: &available_models, + features: &features, + image_generation_tool_auth_allowed: true, + web_search_mode: Some(WebSearchMode::Cached), + session_source: SessionSource::Cli, + permission_profile: &PermissionProfile::Disabled, + windows_sandbox_level: WindowsSandboxLevel::Disabled, + }); + + let (tools, _) = build_specs( + &tools_config, + /*mcp_tools*/ None, + /*app_tools*/ None, + &[], + ) + .build(); + + assert_contains_top_level_tool_name(&tools, "watchdog"); + assert_contains_top_level_tool_name(&tools, "spawn_agent"); + assert_contains_top_level_tool_name(&tools, "send_input"); + assert_contains_top_level_tool_name(&tools, "wait_agent"); + assert_contains_top_level_tool_name(&tools, "close_agent"); + assert_contains_top_level_tool_name(&tools, "list_agents"); + assert_contains_tool_names(&tools, &["list_agents", "close_agent"]); + assert_lacks_top_level_tool_name(&tools, "watchdog_self_close"); + assert_lacks_top_level_tool_name(&tools, "compact_parent_context"); + + let watchdog_self_close = find_namespaced_tool(&tools, "watchdog", "watchdog_self_close"); + let ToolSpec::Function(ResponsesApiTool { + defer_loading: Some(deferred), + .. + }) = &watchdog_self_close.spec + else { + panic!("watchdog_self_close should be a function tool"); + }; + assert!(*deferred); + let compact_parent_context = find_namespaced_tool(&tools, "watchdog", "compact_parent_context"); + let ToolSpec::Function(ResponsesApiTool { + defer_loading: Some(deferred), + .. + }) = &compact_parent_context.spec + else { + panic!("compact_parent_context should be a function tool"); + }; + assert!(*deferred); +} + +#[tokio::test] +async fn test_build_specs_multi_agent_v2_uses_task_names_and_hides_resume() { + let config = test_config().await; + let model_info = construct_model_info_offline("gpt-5-codex", &config); + let mut features = Features::with_defaults(); + features.enable(Feature::Collab); + features.enable(Feature::MultiAgentV2); + let available_models = Vec::new(); + let tools_config = ToolsConfig::new(&ToolsConfigParams { + model_info: &model_info, + available_models: &available_models, + features: &features, + image_generation_tool_auth_allowed: true, + web_search_mode: Some(WebSearchMode::Cached), + session_source: SessionSource::Cli, + permission_profile: &PermissionProfile::Disabled, + windows_sandbox_level: WindowsSandboxLevel::Disabled, + }); + let (tools, _) = build_specs( + &tools_config, + /*mcp_tools*/ None, + /*app_tools*/ None, + &[], + ) + .build(); + assert_contains_tool_names( + &tools, + &[ + "spawn_agent", + "send_message", + "assign_task", + "wait_agent", + "close_agent", + "list_agents", + ], + ); + + let spawn_agent = find_tool(&tools, "spawn_agent"); + let ToolSpec::Function(ResponsesApiTool { + parameters, + output_schema, + .. + }) = &spawn_agent.spec + else { + panic!("spawn_agent should be a function tool"); + }; + let properties = parameters + .properties + .as_ref() + .expect("spawn_agent should use object params"); + let required = parameters.required.as_ref(); + assert!(properties.contains_key("task_name")); + assert_eq!( + required, + Some(&vec!["task_name".to_string(), "message".to_string()]) + ); + let output_schema = output_schema + .as_ref() + .expect("spawn_agent should define output schema"); + assert_eq!( + output_schema["required"], + json!(["agent_id", "task_name", "nickname"]) + ); + + let send_message = find_tool(&tools, "send_message"); + let ToolSpec::Function(ResponsesApiTool { parameters, .. }) = &send_message.spec else { + panic!("send_message should be a function tool"); + }; + let properties = parameters + .properties + .as_ref() + .expect("send_message should use object params"); + let required = parameters.required.as_ref(); + assert!(properties.contains_key("target")); + assert!(properties.contains_key("message")); + assert!(!properties.contains_key("items")); + assert_eq!( + required, + Some(&vec!["target".to_string(), "message".to_string()]) + ); + + let assign_task = find_tool(&tools, "assign_task"); + let ToolSpec::Function(ResponsesApiTool { parameters, .. }) = &assign_task.spec else { + panic!("assign_task should be a function tool"); + }; + let properties = parameters + .properties + .as_ref() + .expect("assign_task should use object params"); + let required = parameters.required.as_ref(); + assert!(properties.contains_key("target")); + assert!(properties.contains_key("message")); + assert!(!properties.contains_key("items")); + assert_eq!( + required, + Some(&vec!["target".to_string(), "message".to_string()]) + ); + + let wait_agent = find_tool(&tools, "wait_agent"); + let ToolSpec::Function(ResponsesApiTool { + parameters, + output_schema, + .. + }) = &wait_agent.spec + else { + panic!("wait_agent should be a function tool"); + }; + let properties = parameters + .properties + .as_ref() + .expect("wait_agent should use object params"); + let required = parameters.required.as_ref(); + assert!(properties.contains_key("timeout_ms")); + assert!(!properties.contains_key("targets")); + assert_eq!(required, None); + let output_schema = output_schema + .as_ref() + .expect("wait_agent should define output schema"); + assert_eq!( + output_schema["properties"]["message"]["description"], + json!("Brief wait summary without the agent's final content.") + ); + + let list_agents = find_tool(&tools, "list_agents"); + let ToolSpec::Function(ResponsesApiTool { + parameters, + output_schema, + .. + }) = &list_agents.spec + else { + panic!("list_agents should be a function tool"); + }; + let properties = parameters + .properties + .as_ref() + .expect("list_agents should use object params"); + let required = parameters.required.as_ref(); + assert!(properties.contains_key("path_prefix")); + assert_eq!(required, None); + let output_schema = output_schema + .as_ref() + .expect("list_agents should define output schema"); + assert_eq!( + output_schema["properties"]["agents"]["items"]["required"], + json!(["agent_name", "agent_status", "last_task_message"]) + ); + assert_lacks_tool_name(&tools, "send_input"); + assert_lacks_tool_name(&tools, "resume_agent"); +} + +#[tokio::test] +async fn test_build_specs_enable_fanout_enables_agent_jobs_and_collab_tools() { + let config = test_config().await; + let model_info = construct_model_info_offline("gpt-5-codex", &config); + let mut features = Features::with_defaults(); + features.enable(Feature::SpawnCsv); + features.normalize_dependencies(); + let available_models = Vec::new(); + let tools_config = ToolsConfig::new(&ToolsConfigParams { + model_info: &model_info, + available_models: &available_models, + features: &features, + image_generation_tool_auth_allowed: true, + web_search_mode: Some(WebSearchMode::Cached), + session_source: SessionSource::Cli, + permission_profile: &PermissionProfile::Disabled, + windows_sandbox_level: WindowsSandboxLevel::Disabled, + }); + let (tools, _) = build_specs( + &tools_config, + /*mcp_tools*/ None, + /*app_tools*/ None, + &[], + ) + .build(); + assert_contains_tool_names( + &tools, + &[ + "spawn_agent", + "send_input", + "wait_agent", + "close_agent", + "spawn_agents_on_csv", + ], + ); +} + +#[tokio::test] +async fn view_image_tool_omits_detail_without_original_detail_feature() { + let config = test_config().await; + let mut model_info = construct_model_info_offline("gpt-5-codex", &config); + model_info.supports_image_detail_original = true; + let features = Features::with_defaults(); + let available_models = Vec::new(); + let tools_config = ToolsConfig::new(&ToolsConfigParams { + model_info: &model_info, + available_models: &available_models, + features: &features, + image_generation_tool_auth_allowed: true, + web_search_mode: Some(WebSearchMode::Cached), + session_source: SessionSource::Cli, + permission_profile: &PermissionProfile::Disabled, + windows_sandbox_level: WindowsSandboxLevel::Disabled, + }); + let (tools, _) = build_specs( + &tools_config, + /*mcp_tools*/ None, + /*app_tools*/ None, + &[], + ) + .build(); + let view_image = find_tool(&tools, VIEW_IMAGE_TOOL_NAME); + let ToolSpec::Function(ResponsesApiTool { parameters, .. }) = &view_image.spec else { + panic!("view_image should be a function tool"); + }; + let properties = parameters + .properties + .as_ref() + .expect("view_image should use an object schema"); + assert!(!properties.contains_key("detail")); +} + +#[tokio::test] +async fn view_image_tool_includes_detail_with_original_detail_feature() { + let config = test_config().await; + let mut model_info = construct_model_info_offline("gpt-5-codex", &config); + model_info.supports_image_detail_original = true; + let mut features = Features::with_defaults(); + features.enable(Feature::ImageDetailOriginal); + let available_models = Vec::new(); + let tools_config = ToolsConfig::new(&ToolsConfigParams { + model_info: &model_info, + available_models: &available_models, + features: &features, + image_generation_tool_auth_allowed: true, + web_search_mode: Some(WebSearchMode::Cached), + session_source: SessionSource::Cli, + permission_profile: &PermissionProfile::Disabled, + windows_sandbox_level: WindowsSandboxLevel::Disabled, + }); + let (tools, _) = build_specs( + &tools_config, + /*mcp_tools*/ None, + /*app_tools*/ None, + &[], + ) + .build(); + let view_image = find_tool(&tools, VIEW_IMAGE_TOOL_NAME); + let ToolSpec::Function(ResponsesApiTool { parameters, .. }) = &view_image.spec else { + panic!("view_image should be a function tool"); + }; + let properties = parameters + .properties + .as_ref() + .expect("view_image should use an object schema"); + assert!(properties.contains_key("detail")); + let Some(detail_schema) = properties.get("detail") else { + panic!("view_image detail should include a description"); + }; + let Some(description) = detail_schema.description.as_deref() else { + panic!("view_image detail should include a description"); + }; + assert!(description.contains("only supported value is `original`")); + assert!(description.contains("omit this field for default resized behavior")); +} + +#[tokio::test] +async fn test_build_specs_agent_job_worker_tools_enabled() { + let config = test_config().await; + let model_info = construct_model_info_offline("gpt-5-codex", &config); + let mut features = Features::with_defaults(); + features.enable(Feature::SpawnCsv); + features.normalize_dependencies(); + features.enable(Feature::Sqlite); + let available_models = Vec::new(); + let tools_config = ToolsConfig::new(&ToolsConfigParams { + model_info: &model_info, + available_models: &available_models, + features: &features, + image_generation_tool_auth_allowed: true, + web_search_mode: Some(WebSearchMode::Cached), + session_source: SessionSource::SubAgent(SubAgentSource::Other( + "agent_job:test".to_string(), + )), + permission_profile: &PermissionProfile::Disabled, + windows_sandbox_level: WindowsSandboxLevel::Disabled, + }); + let (tools, _) = build_specs( + &tools_config, + /*mcp_tools*/ None, + /*app_tools*/ None, + &[], + ) + .build(); + assert_contains_tool_names( + &tools, + &[ + "spawn_agent", + "send_input", + "resume_agent", + "wait_agent", + "close_agent", + "spawn_agents_on_csv", + "report_agent_job_result", + ], + ); + assert_lacks_tool_name(&tools, "request_user_input"); +} + +#[tokio::test] +async fn request_user_input_description_reflects_default_mode_feature_flag() { + let config = test_config().await; + let model_info = construct_model_info_offline("gpt-5-codex", &config); + let mut features = Features::with_defaults(); + let available_models = Vec::new(); + let tools_config = ToolsConfig::new(&ToolsConfigParams { + model_info: &model_info, + available_models: &available_models, + features: &features, + image_generation_tool_auth_allowed: true, + web_search_mode: Some(WebSearchMode::Cached), + session_source: SessionSource::Cli, + permission_profile: &PermissionProfile::Disabled, + windows_sandbox_level: WindowsSandboxLevel::Disabled, + }); + let (tools, _) = build_specs( + &tools_config, + /*mcp_tools*/ None, + /*app_tools*/ None, + &[], + ) + .build(); + let request_user_input_tool = find_tool(&tools, "request_user_input"); + assert_eq!( + request_user_input_tool.spec, + request_user_input_tool_spec(/*default_mode_request_user_input*/ false) + ); + + features.enable(Feature::DefaultModeRequestUserInput); + let available_models = Vec::new(); + let tools_config = ToolsConfig::new(&ToolsConfigParams { + model_info: &model_info, + available_models: &available_models, + features: &features, + image_generation_tool_auth_allowed: true, + web_search_mode: Some(WebSearchMode::Cached), + session_source: SessionSource::Cli, + permission_profile: &PermissionProfile::Disabled, + windows_sandbox_level: WindowsSandboxLevel::Disabled, + }); + let (tools, _) = build_specs( + &tools_config, + /*mcp_tools*/ None, + /*app_tools*/ None, + &[], + ) + .build(); + let request_user_input_tool = find_tool(&tools, "request_user_input"); + assert_eq!( + request_user_input_tool.spec, + request_user_input_tool_spec(/*default_mode_request_user_input*/ true) + ); +} + +#[tokio::test] +async fn request_permissions_requires_feature_flag() { + let config = test_config().await; + let model_info = construct_model_info_offline("gpt-5-codex", &config); + let features = Features::with_defaults(); + let available_models = Vec::new(); + let tools_config = ToolsConfig::new(&ToolsConfigParams { + model_info: &model_info, + available_models: &available_models, + features: &features, + image_generation_tool_auth_allowed: true, + web_search_mode: Some(WebSearchMode::Cached), + session_source: SessionSource::Cli, + permission_profile: &PermissionProfile::Disabled, + windows_sandbox_level: WindowsSandboxLevel::Disabled, + }); + let (tools, _) = build_specs( + &tools_config, + /*mcp_tools*/ None, + /*app_tools*/ None, + &[], + ) + .build(); + assert_lacks_tool_name(&tools, "request_permissions"); + + let mut features = Features::with_defaults(); + features.enable(Feature::RequestPermissionsTool); + let available_models = Vec::new(); + let tools_config = ToolsConfig::new(&ToolsConfigParams { + model_info: &model_info, + available_models: &available_models, + features: &features, + image_generation_tool_auth_allowed: true, + web_search_mode: Some(WebSearchMode::Cached), + session_source: SessionSource::Cli, + permission_profile: &PermissionProfile::Disabled, + windows_sandbox_level: WindowsSandboxLevel::Disabled, + }); + let (tools, _) = build_specs( + &tools_config, + /*mcp_tools*/ None, + /*app_tools*/ None, + &[], + ) + .build(); + let request_permissions_tool = find_tool(&tools, "request_permissions"); + assert_eq!( + request_permissions_tool.spec, + create_request_permissions_tool(request_permissions_tool_description()) + ); +} + +#[tokio::test] +async fn request_permissions_tool_is_independent_from_additional_permissions() { + let config = test_config().await; + let model_info = construct_model_info_offline("gpt-5-codex", &config); + let mut features = Features::with_defaults(); + features.enable(Feature::ExecPermissionApprovals); + let available_models = Vec::new(); + let tools_config = ToolsConfig::new(&ToolsConfigParams { + model_info: &model_info, + available_models: &available_models, + features: &features, + image_generation_tool_auth_allowed: true, + web_search_mode: Some(WebSearchMode::Cached), + session_source: SessionSource::Cli, + permission_profile: &PermissionProfile::Disabled, + windows_sandbox_level: WindowsSandboxLevel::Disabled, + }); + let (tools, _) = build_specs( + &tools_config, + /*mcp_tools*/ None, + /*app_tools*/ None, + &[], + ) + .build(); + + assert_lacks_tool_name(&tools, "request_permissions"); +} + #[tokio::test] async fn get_memory_requires_feature_flag() { let config = test_config().await; @@ -386,7 +1229,16 @@ async fn assert_model_tools( let model_visible_specs = router.model_visible_specs(); let tool_names = model_visible_specs .iter() - .map(ToolSpec::name) + .flat_map(|tool| match tool { + ToolSpec::Namespace(namespace) => namespace + .tools + .iter() + .map(|tool| match tool { + codex_tools::ResponsesApiNamespaceTool::Function(tool) => tool.name.as_str(), + }) + .collect::>(), + _ => vec![tool.name()], + }) .collect::>(); assert_eq!(&tool_names, &expected_tools,); } diff --git a/codex-rs/core/subagent_prompt.md b/codex-rs/core/subagent_prompt.md new file mode 100644 index 0000000000..1ca5e2d845 --- /dev/null +++ b/codex-rs/core/subagent_prompt.md @@ -0,0 +1,39 @@ +# You are a Subagent + +You are a **subagent** in a multi-agent Codex session. Your role is no longer root. Your goal is the task given by the parent/root agent. + +Term definitions in this file: +- **parent thread**: the thread that spawned this subagent. +- **root thread**: the top-level user-facing thread. +- **`send_input`**: send a message to an existing agent thread; it does not spawn agents. Delivery is asynchronous. +- **durable state**: thread-level task state needed across later turns/check-ins (not disk/database persistence). +- In this runtime, when a subagent calls `send_input`, `id = "parent"` and `id = "root"` both route to the immediate parent thread. + +## Subagent Responsibilities + +- Stay within parent/root scope (listed files/questions/constraints). Use additional files/tools only when needed to complete or verify the task. +- Blocking question = one clarification to parent via `send_input`. Ask only if missing information would change user-visible output, tool/action choice (multi-agent tool, target thread, or ask-vs-continue), file edits, control flow, or durable-state decisions; otherwise state one assumption and continue. +- Prefer concrete progress: edit files, run commands, and validate outcomes. +- Your responses go to the root/parent agent, not the end user. + +## Multi-Agent Guidance (Upstream Surface) + +The only multi-agent tools available in this environment are `spawn_agent`, `send_input`, `wait`, `close_agent`, and `list_agents`. + +Important: to coordinate with parent/root, use `send_input`. A plain assistant message in your own thread does not reliably notify the parent. + +You can call `send_input` without an `id` (or with `id = "parent"` / `id = "root"`); these forms target the immediate parent thread in this runtime. + +## Reporting Expectations + +When you make meaningful progress or complete a task, report back with: + +- The key outcome. +- Files changed (with paths). +- Commands run. +- Validation performed (tests, checks, or observed outputs). +- Risks, follow-ups, or open questions. + +Be specific enough that the root agent can integrate your work safely. + +Do not reference multi-agent tools that do not exist in the upstream surface. diff --git a/codex-rs/core/subagent_watchdog_prompt.md b/codex-rs/core/subagent_watchdog_prompt.md new file mode 100644 index 0000000000..cae2a4cf3d --- /dev/null +++ b/codex-rs/core/subagent_watchdog_prompt.md @@ -0,0 +1,10 @@ +## Watchdog-only Guidance + +If you are acting as a watchdog check-in agent, the deferred `watchdog` namespace may be available +through `tool_search`. + +- Use `watchdog.compact_parent_context` only when the parent thread is idle and appears stuck. +- `watchdog.compact_parent_context` is not part of the general subagent tool surface; do not + mention or rely on it unless you are explicitly operating as a watchdog check-in agent. +- `watchdog.watchdog_self_close` is also available to this watchdog thread and can be used to end + the check-in when work is complete. diff --git a/codex-rs/core/tests/common/test_codex.rs b/codex-rs/core/tests/common/test_codex.rs index 600a58c720..62bc61a61a 100644 --- a/codex-rs/core/tests/common/test_codex.rs +++ b/codex-rs/core/tests/common/test_codex.rs @@ -429,6 +429,8 @@ impl TestCodexBuilder { &config, codex_core::test_support::auth_manager_from_auth(auth.clone()), SessionSource::Exec, + config.model_catalog.clone(), + config.custom_models.clone(), CollaborationModesConfig::default(), Arc::clone(&environment_manager), /*analytics_events_client*/ None, diff --git a/codex-rs/core/tests/suite/agent_jobs.rs b/codex-rs/core/tests/suite/agent_jobs.rs index b275b1878a..9810853764 100644 --- a/codex-rs/core/tests/suite/agent_jobs.rs +++ b/codex-rs/core/tests/suite/agent_jobs.rs @@ -205,11 +205,16 @@ fn message_input_texts(body: &Value) -> Vec { }; items .iter() - .filter(|item| item.get("type").and_then(Value::as_str) == Some("message")) + .filter(|item| { + item.get("role").and_then(Value::as_str).is_some() + || item.get("type").and_then(Value::as_str) == Some("message") + }) .filter_map(|item| item.get("content").and_then(Value::as_array)) .flatten() - .filter(|span| span.get("type").and_then(Value::as_str) == Some("input_text")) - .filter_map(|span| span.get("text").and_then(Value::as_str)) + .filter_map(|span| match span.get("type").and_then(Value::as_str) { + Some("input_text") | None => span.get("text").and_then(Value::as_str), + _ => None, + }) .map(str::to_string) .collect() } @@ -320,9 +325,31 @@ async fn spawn_agents_on_csv_runs_and_exports() -> Result<()> { test.submit_turn("run batch job").await?; let output = fs::read_to_string(&output_path)?; - assert!(output.contains("result_json")); - assert!(output.contains("item_id")); - assert!(output.contains("\"item_id\"")); + let mut lines = output.lines(); + let headers = lines.next().expect("csv headers"); + let header_cols = parse_simple_csv_line(headers); + let status_index = header_cols + .iter() + .position(|header| header == "status") + .expect("status column"); + let result_json_index = header_cols + .iter() + .position(|header| header == "result_json") + .expect("result_json column"); + assert!(header_cols.iter().any(|header| header == "result_json")); + assert!(header_cols.iter().any(|header| header == "item_id")); + let rows: Vec> = lines.map(parse_simple_csv_line).collect(); + assert_eq!(rows.len(), 2); + assert_eq!( + rows.iter() + .map(|cols| cols[status_index].as_str()) + .collect::>(), + vec!["completed", "completed"] + ); + assert!( + rows.iter() + .all(|cols| !cols[result_json_index].trim().is_empty()) + ); Ok(()) } @@ -423,21 +450,28 @@ async fn spawn_agents_on_csv_stop_halts_future_items() -> Result<()> { test.submit_turn("run job").await?; let output = fs::read_to_string(&output_path)?; - let rows: Vec<&str> = output.lines().skip(1).collect(); + let mut lines = output.lines(); + let headers = lines.next().expect("csv headers"); + let header_cols = parse_simple_csv_line(headers); + let job_id_index = header_cols + .iter() + .position(|header| header == "job_id") + .expect("job_id column"); + let rows: Vec<&str> = lines.collect(); assert_eq!(rows.len(), 3); - let job_id = rows + let job_id: String = rows .first() - .and_then(|line| { - parse_simple_csv_line(line) - .iter() - .find(|value| value.len() == 36) - .cloned() - }) + .map(|line| parse_simple_csv_line(line)) + .and_then(|cols| cols.get(job_id_index).cloned()) .expect("job_id from csv"); let db = test.codex.state_db().expect("state db"); let job = db.get_agent_job(job_id.as_str()).await?.expect("job"); - assert_eq!(job.status, codex_state::AgentJobStatus::Cancelled); let progress = db.get_agent_job_progress(job_id.as_str()).await?; + assert_eq!( + job.status, + codex_state::AgentJobStatus::Cancelled, + "unexpected final job state: job={job:?} progress={progress:?} output={output}" + ); assert_eq!(progress.total_items, 3); assert_eq!(progress.completed_items, 1); assert_eq!(progress.failed_items, 0); diff --git a/codex-rs/core/tests/suite/client.rs b/codex-rs/core/tests/suite/client.rs index b8871cf524..3ab2de0a82 100644 --- a/codex-rs/core/tests/suite/client.rs +++ b/codex-rs/core/tests/suite/client.rs @@ -993,6 +993,8 @@ async fn chatgpt_auth_sends_correct_request() { .with_auth(create_dummy_codex_auth()) .with_config(move |config| { config.model_provider = model_provider; + // This test only covers ChatGPT auth headers; tool discovery has separate coverage. + config.features.disable(Feature::ToolSuggest).unwrap(); }); let test = builder .build(&server) @@ -1106,6 +1108,8 @@ async fn prefers_apikey_when_config_prefers_apikey_even_with_chatgpt_tokens() { &config, auth_manager, SessionSource::Exec, + config.model_catalog.clone(), + config.custom_models.clone(), CollaborationModesConfig { default_mode_request_user_input: config .features @@ -2198,12 +2202,18 @@ async fn includes_developer_instructions_message_in_request() { .iter() .filter(|item| item.get("role").and_then(|role| role.as_str()) == Some("developer")) .collect(); + let developer_contents: Vec<&str> = developer_messages + .iter() + .filter_map(|item| item.get("content").and_then(serde_json::Value::as_array)) + .flat_map(|content| content.iter()) + .filter(|span| span.get("type").and_then(serde_json::Value::as_str) == Some("input_text")) + .filter_map(|span| span.get("text").and_then(serde_json::Value::as_str)) + .collect(); assert!( - developer_messages + developer_contents .iter() - .any(|item| message_input_texts(item).contains(&"be useful")), - "expected developer instructions in a developer message, got {:?}", - request_body["input"] + .any(|content| content.contains("be useful")), + "expected developer instructions in a developer message, got {developer_contents:?}", ); assert_message_role(&request_body["input"][1], "user"); diff --git a/codex-rs/core/tests/suite/fork_thread.rs b/codex-rs/core/tests/suite/fork_thread.rs index 50e0dc1862..eab311b9b1 100644 --- a/codex-rs/core/tests/suite/fork_thread.rs +++ b/codex-rs/core/tests/suite/fork_thread.rs @@ -2,6 +2,8 @@ use codex_core::ForkSnapshot; use codex_core::NewThread; use codex_core::parse_turn_item; use codex_protocol::items::TurnItem; +use codex_protocol::models::ContentItem; +use codex_protocol::models::ResponseItem; use codex_protocol::protocol::EventMsg; use codex_protocol::protocol::InitialHistory; use codex_protocol::protocol::Op; @@ -21,6 +23,94 @@ use wiremock::ResponseTemplate; use wiremock::matchers::method; use wiremock::matchers::path; +fn find_user_input_positions(items: &[RolloutItem]) -> Vec { + let mut pos = Vec::new(); + for (i, it) in items.iter().enumerate() { + if let RolloutItem::ResponseItem(response_item) = it + && let Some(TurnItem::UserMessage(_)) = parse_turn_item(response_item) + { + pos.push(i); + } + } + pos +} + +fn truncate_before_nth_user_message( + items: &[RolloutItem], + nth_user_message: i64, +) -> Vec { + let Ok(nth_user_message) = usize::try_from(nth_user_message) else { + return items.to_vec(); + }; + let user_inputs = find_user_input_positions(items); + let Some(cut_idx) = user_inputs.get(nth_user_message).copied() else { + return items.to_vec(); + }; + items[..cut_idx].to_vec() +} + +fn test_user_message(text: &str) -> RolloutItem { + RolloutItem::ResponseItem(ResponseItem::Message { + id: None, + role: "user".to_string(), + content: vec![ContentItem::OutputText { + text: text.to_string(), + }], + phase: None, + }) +} + +#[test] +fn truncate_before_nth_user_message_keeps_full_history_for_out_of_range_boundaries() { + let rollout_items = vec![test_user_message("u1"), test_user_message("u2")]; + + pretty_assertions::assert_eq!( + serde_json::to_value(truncate_before_nth_user_message( + &rollout_items, + /*nth_user_message*/ 2, + )) + .unwrap(), + serde_json::to_value(&rollout_items).unwrap(), + ); +} + +#[test] +fn truncate_before_nth_user_message_keeps_full_history_for_i64_max_boundaries() { + let rollout_items = vec![test_user_message("u1"), test_user_message("u2")]; + + pretty_assertions::assert_eq!( + serde_json::to_value(truncate_before_nth_user_message(&rollout_items, i64::MAX,)).unwrap(), + serde_json::to_value(&rollout_items).unwrap(), + ); +} + +fn read_items_materialized(p: &std::path::Path) -> Vec { + let text = + std::fs::read_to_string(p).unwrap_or_else(|err| panic!("read rollout file {p:?}: {err}")); + let mut items: Vec = Vec::new(); + for line in text.lines() { + if line.trim().is_empty() { + continue; + } + let v: serde_json::Value = + serde_json::from_str(line).unwrap_or_else(|err| panic!("jsonl line parse: {err}")); + let rl: RolloutLine = + serde_json::from_value(v).unwrap_or_else(|err| panic!("rollout line parse: {err}")); + match rl.item { + RolloutItem::SessionMeta(_) => {} + RolloutItem::ForkReference(reference) => { + let parent_items = read_items_materialized(&reference.rollout_path); + items.extend(truncate_before_nth_user_message( + &parent_items, + reference.nth_user_message, + )); + } + other => items.push(other), + } + } + items +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn fork_thread_twice_drops_to_first_message() { skip_if_no_network!(); @@ -70,20 +160,7 @@ async fn fork_thread_twice_drops_to_first_message() { // Compute expected prefixes after each fork by truncating base rollout // strictly before the nth user input (0-based). - let base_items = read_rollout_items(&base_path); - let find_user_input_positions = |items: &[RolloutItem]| -> Vec { - let mut pos = Vec::new(); - for (i, it) in items.iter().enumerate() { - if let RolloutItem::ResponseItem(response_item) = it - && let Some(TurnItem::UserMessage(_)) = parse_turn_item(response_item) - { - // Consider any user message as an input boundary; recorder stores both EventMsg and ResponseItem. - // We specifically look for input items, which are represented as ContentItem::InputText. - pos.push(i); - } - } - pos - }; + let base_items = read_items_materialized(&base_path); let user_inputs = find_user_input_positions(&base_items); // After cutting at nth user input (n=1 → second user message), cut strictly before that input. @@ -111,13 +188,13 @@ async fn fork_thread_twice_drops_to_first_message() { let fork1_path = codex_fork1.rollout_path().expect("rollout path"); // GetHistory on fork1 flushed; the file is ready. - let fork1_items = read_rollout_items(&fork1_path); + let fork1_items = read_items_materialized(&fork1_path); pretty_assertions::assert_eq!( serde_json::to_value(&fork1_items).unwrap(), serde_json::to_value(&expected_after_first).unwrap() ); - // Fork again with n=0 → drops the (new) last user message, leaving only the first. + // Fork again with n=0 cuts before the remaining first user message. let NewThread { thread: codex_fork2, .. @@ -135,14 +212,14 @@ async fn fork_thread_twice_drops_to_first_message() { let fork2_path = codex_fork2.rollout_path().expect("rollout path"); // GetHistory on fork2 flushed; the file is ready. - let fork1_items = read_rollout_items(&fork1_path); + let fork1_items = read_items_materialized(&fork1_path); let fork1_user_inputs = find_user_input_positions(&fork1_items); let cut_last_on_fork1 = fork1_user_inputs .get(fork1_user_inputs.len().saturating_sub(1)) .copied() .unwrap_or(0); let expected_after_second: Vec = fork1_items[..cut_last_on_fork1].to_vec(); - let fork2_items = read_rollout_items(&fork2_path); + let fork2_items = read_items_materialized(&fork2_path); pretty_assertions::assert_eq!( serde_json::to_value(&fork2_items).unwrap(), serde_json::to_value(&expected_after_second).unwrap() @@ -186,7 +263,7 @@ async fn fork_thread_from_history_does_not_require_source_rollout_path() { let _ = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await; let source_path = codex.rollout_path().expect("source rollout path"); - let source_items = read_rollout_items(&source_path); + let source_items = read_items_materialized(&source_path); let NewThread { thread: forked_thread, .. @@ -207,7 +284,7 @@ async fn fork_thread_from_history_does_not_require_source_rollout_path() { .expect("fork from stored history"); let forked_path = forked_thread.rollout_path().expect("forked rollout path"); - let forked_items = read_rollout_items(&forked_path); + let forked_items = read_items_materialized(&forked_path); let forked_items = forked_items .iter() .map(|item| serde_json::to_value(item).unwrap()) @@ -222,28 +299,62 @@ async fn fork_thread_from_history_does_not_require_source_rollout_path() { ); } -fn read_rollout_items(path: &std::path::Path) -> Vec { - let text = match std::fs::read_to_string(path) { - Ok(text) => text, - Err(err) => panic!("failed to read rollout file {}: {err}", path.display()), - }; - let mut items: Vec = Vec::new(); - for line in text.lines() { - if line.trim().is_empty() { - continue; - } - let v: serde_json::Value = match serde_json::from_str(line) { - Ok(value) => value, - Err(err) => panic!("failed to parse rollout JSON line `{line}`: {err}"), - }; - let rl: RolloutLine = match serde_json::from_value(v) { - Ok(line) => line, - Err(err) => panic!("failed to parse rollout line `{line}`: {err}"), - }; - match rl.item { - RolloutItem::SessionMeta(_) => {} - other => items.push(other), - } - } - items +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn fork_thread_session_configured_preserves_parent_and_history() { + skip_if_no_network!(); + + let server = MockServer::start().await; + let sse = sse(vec![ev_response_created("resp"), ev_completed("resp")]); + let response = ResponseTemplate::new(200) + .insert_header("content-type", "text/event-stream") + .set_body_raw(sse, "text/event-stream"); + + Mock::given(method("POST")) + .and(path("/v1/responses")) + .respond_with(response) + .expect(1) + .mount(&server) + .await; + + let mut builder = test_codex(); + let test = builder.build(&server).await.expect("create conversation"); + let codex = test.codex.clone(); + let thread_manager = test.thread_manager.clone(); + let config_for_fork = test.config.clone(); + let parent_thread_id = test.session_configured.session_id; + + codex + .submit(Op::UserInput { + items: vec![UserInput::Text { + text: "seed".to_string(), + text_elements: Vec::new(), + }], + final_output_json_schema: None, + responsesapi_client_metadata: None, + environments: None, + }) + .await + .unwrap(); + let _ = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await; + + let base_path = codex.rollout_path().expect("rollout path"); + + let NewThread { + thread_id: child_thread_id, + session_configured, + .. + } = thread_manager + .fork_thread( + usize::MAX, + config_for_fork.clone(), + codex_core::thread_store_from_config(&config_for_fork), + base_path, + /*persist_extended_history*/ false, + /*parent_trace*/ None, + ) + .await + .expect("fork thread"); + + pretty_assertions::assert_eq!(session_configured.forked_from_id, Some(parent_thread_id)); + assert_ne!(child_thread_id, parent_thread_id); } diff --git a/codex-rs/core/tests/suite/model_switching.rs b/codex-rs/core/tests/suite/model_switching.rs index 43ec50746e..dc41bb9d47 100644 --- a/codex-rs/core/tests/suite/model_switching.rs +++ b/codex-rs/core/tests/suite/model_switching.rs @@ -91,6 +91,7 @@ fn test_model_info( ) -> ModelInfo { ModelInfo { slug: slug.to_string(), + request_model: None, display_name: display_name.to_string(), description: Some(description.to_string()), default_reasoning_level: Some(ReasoningEffort::Medium), @@ -803,6 +804,7 @@ async fn model_switch_to_smaller_model_updates_token_context_window() -> Result< let base_model = ModelInfo { slug: large_model_slug.to_string(), + request_model: None, display_name: "Larger Model".to_string(), description: Some("larger context window model".to_string()), default_reasoning_level: Some(ReasoningEffort::Medium), diff --git a/codex-rs/core/tests/suite/models_cache_ttl.rs b/codex-rs/core/tests/suite/models_cache_ttl.rs index e2688afc97..bdeb03a471 100644 --- a/codex-rs/core/tests/suite/models_cache_ttl.rs +++ b/codex-rs/core/tests/suite/models_cache_ttl.rs @@ -323,6 +323,7 @@ struct ModelsCache { fn test_remote_model(slug: &str, priority: i32) -> ModelInfo { ModelInfo { slug: slug.to_string(), + request_model: None, display_name: "Remote Test".to_string(), description: Some("remote model".to_string()), default_reasoning_level: Some(ReasoningEffort::Medium), diff --git a/codex-rs/core/tests/suite/personality.rs b/codex-rs/core/tests/suite/personality.rs index dde6d2ca51..fb1373e3cb 100644 --- a/codex-rs/core/tests/suite/personality.rs +++ b/codex-rs/core/tests/suite/personality.rs @@ -567,6 +567,7 @@ async fn remote_model_friendly_personality_instructions_with_feature() -> anyhow let friendly_personality_message = "Friendly variant"; let remote_model = ModelInfo { slug: remote_slug.to_string(), + request_model: None, display_name: "Remote default personality test".to_string(), description: Some("Remote model with default personality template".to_string()), default_reasoning_level: Some(ReasoningEffort::Medium), @@ -675,6 +676,7 @@ async fn user_turn_personality_remote_model_template_includes_update_message() - let remote_pragmatic_message = "Pragmatic from remote template"; let remote_model = ModelInfo { slug: remote_slug.to_string(), + request_model: None, display_name: "Remote personality test".to_string(), description: Some("Remote model with personality template".to_string()), default_reasoning_level: Some(ReasoningEffort::Medium), diff --git a/codex-rs/core/tests/suite/remote_models.rs b/codex-rs/core/tests/suite/remote_models.rs index 49218c78d2..4b3a39c51d 100644 --- a/codex-rs/core/tests/suite/remote_models.rs +++ b/codex-rs/core/tests/suite/remote_models.rs @@ -1,6 +1,7 @@ #![cfg(not(target_os = "windows"))] #![allow(clippy::expect_used)] use anyhow::Result; +use codex_core::config::CustomModelConfig; use codex_login::CodexAuth; use codex_model_provider_info::ModelProviderInfo; use codex_model_provider_info::built_in_model_providers; @@ -506,6 +507,81 @@ async fn namespaced_model_slug_uses_catalog_metadata_without_fallback_warning() Ok(()) } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn custom_model_alias_sends_base_model_slug() -> Result<()> { + skip_if_no_network!(Ok(())); + skip_if_sandbox!(Ok(())); + + let server = MockServer::start().await; + let alias = "gpt-5.4 1m"; + let base_model = "gpt-5.4"; + let response_mock = mount_sse_once( + &server, + sse(vec![ev_response_created("resp-1"), ev_completed("resp-1")]), + ) + .await; + + let mut builder = test_codex() + .with_model(alias) + .with_auth(CodexAuth::from_api_key("Test API Key")) + .with_config(move |config| { + config.custom_models.insert( + alias.to_string(), + CustomModelConfig { + model: base_model.to_string(), + model_context_window: Some(1_000_000), + model_auto_compact_token_limit: Some(900_000), + }, + ); + config.model_catalog = Some(ModelsResponse { + models: vec![test_remote_model( + base_model, + ModelVisibility::List, + /*priority*/ 1, + )], + }); + }); + + let TestCodex { + codex, + cwd, + config, + session_configured, + .. + } = builder.build(&server).await?; + + assert_eq!(session_configured.model, alias); + + codex + .submit(Op::UserTurn { + items: vec![UserInput::Text { + text: "check custom alias model routing".into(), + text_elements: Vec::new(), + }], + final_output_json_schema: None, + cwd: cwd.path().to_path_buf(), + approval_policy: config.permissions.approval_policy.value(), + approvals_reviewer: None, + sandbox_policy: config.legacy_sandbox_policy(), + model: alias.to_string(), + effort: None, + summary: None, + service_tier: None, + collaboration_mode: None, + personality: None, + permission_profile: None, + environments: None, + }) + .await?; + + wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await; + + let body = response_mock.single_request().body_json(); + assert_eq!(body["model"].as_str(), Some(base_model)); + + Ok(()) +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn remote_models_remote_model_uses_unified_exec() -> Result<()> { skip_if_no_network!(Ok(())); @@ -518,6 +594,7 @@ async fn remote_models_remote_model_uses_unified_exec() -> Result<()> { let remote_model = ModelInfo { slug: REMOTE_MODEL_SLUG.to_string(), + request_model: None, display_name: "Remote Test".to_string(), description: Some("A remote model that requires the test shell".to_string()), default_reasoning_level: Some(ReasoningEffort::Medium), @@ -774,6 +851,7 @@ async fn remote_models_apply_remote_base_instructions() -> Result<()> { let remote_base = "Use the remote base instructions only."; let remote_model = ModelInfo { slug: model.to_string(), + request_model: None, display_name: "Parallel Remote".to_string(), description: Some("A remote model with custom instructions".to_string()), default_reasoning_level: Some(ReasoningEffort::Medium), @@ -1264,6 +1342,7 @@ fn test_remote_model_with_policy( ) -> ModelInfo { ModelInfo { slug: slug.to_string(), + request_model: None, display_name: format!("{slug} display"), description: Some(format!("{slug} description")), default_reasoning_level: Some(ReasoningEffort::Medium), diff --git a/codex-rs/core/tests/suite/rmcp_client.rs b/codex-rs/core/tests/suite/rmcp_client.rs index 0947f4fba7..730db05345 100644 --- a/codex-rs/core/tests/suite/rmcp_client.rs +++ b/codex-rs/core/tests/suite/rmcp_client.rs @@ -1271,6 +1271,7 @@ async fn stdio_image_responses_are_sanitized_for_text_only_model() -> anyhow::Re ModelsResponse { models: vec![ModelInfo { slug: text_only_model_slug.to_string(), + request_model: None, display_name: "RMCP Text Only".to_string(), description: Some("Test model without image input support".to_string()), default_reasoning_level: None, diff --git a/codex-rs/core/tests/suite/rollout_list_find.rs b/codex-rs/core/tests/suite/rollout_list_find.rs index eef0d0f5f4..3cd7f7b966 100644 --- a/codex-rs/core/tests/suite/rollout_list_find.rs +++ b/codex-rs/core/tests/suite/rollout_list_find.rs @@ -9,6 +9,7 @@ use codex_core::RolloutRecorder; use codex_core::RolloutRecorderParams; use codex_core::config::ConfigBuilder; use codex_core::find_archived_thread_path_by_id_str; +use codex_core::find_or_unarchive_thread_path_by_id_str; use codex_core::find_thread_meta_by_name_str; use codex_core::find_thread_path_by_id_str; use codex_protocol::ThreadId; @@ -220,3 +221,77 @@ async fn find_archived_locates_rollout_file_by_id() { assert_eq!(found, Some(expected)); } + +#[tokio::test] +async fn find_thread_path_by_id_str_does_not_unarchive_archived_rollout() { + let home = TempDir::new().unwrap(); + let id = Uuid::new_v4(); + let archived = write_minimal_rollout_with_id_in_subdir(home.path(), "archived_sessions", id); + + let found = find_thread_path_by_id_str(home.path(), &id.to_string()) + .await + .unwrap(); + + assert_eq!(found, None); + assert!(archived.exists()); +} + +#[tokio::test] +async fn find_or_unarchive_restores_archived_rollout_file_by_id() { + let home = TempDir::new().unwrap(); + let id = Uuid::new_v4(); + let archived = write_minimal_rollout_with_id_in_subdir(home.path(), "archived_sessions", id); + let file_name = archived.file_name().unwrap().to_owned(); + let expected_restored = home.path().join("sessions/2024/01/01").join(file_name); + + let found = find_or_unarchive_thread_path_by_id_str(home.path(), &id.to_string()) + .await + .unwrap(); + + assert_eq!(found, Some(expected_restored.clone())); + assert!(expected_restored.exists()); + assert!(!archived.exists()); + + let archived_found = find_archived_thread_path_by_id_str(home.path(), &id.to_string()) + .await + .unwrap(); + assert_eq!(archived_found, None); +} + +#[tokio::test] +async fn find_does_not_move_unrelated_file_for_stale_archived_db_path() { + let home = TempDir::new().unwrap(); + let requested_id = Uuid::new_v4(); + let requested_thread_id = ThreadId::from_string(&requested_id.to_string()).unwrap(); + let unrelated_id = Uuid::new_v4(); + let unrelated_active_path = write_minimal_rollout_with_id(home.path(), unrelated_id); + + upsert_thread_metadata( + home.path(), + requested_thread_id, + unrelated_active_path.clone(), + ) + .await; + let runtime = StateRuntime::init(home.path().to_path_buf(), "test-provider".to_string()) + .await + .unwrap(); + runtime + .mark_backfill_complete(/*last_watermark*/ None) + .await + .unwrap(); + runtime + .mark_archived( + requested_thread_id, + unrelated_active_path.as_path(), + Utc::now(), + ) + .await + .unwrap(); + + let found = find_or_unarchive_thread_path_by_id_str(home.path(), &requested_id.to_string()) + .await + .unwrap(); + + assert_eq!(found, None); + assert!(unrelated_active_path.exists()); +} diff --git a/codex-rs/core/tests/suite/search_tool.rs b/codex-rs/core/tests/suite/search_tool.rs index 5b3c540ef3..5c5909eb59 100644 --- a/codex-rs/core/tests/suite/search_tool.rs +++ b/codex-rs/core/tests/suite/search_tool.rs @@ -291,7 +291,7 @@ async fn tool_search_disabled_exposes_apps_tools_directly() -> Result<()> { } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn search_tool_is_hidden_for_api_key_auth() -> Result<()> { +async fn search_tool_omits_tool_search_without_searchable_apps_for_api_key_auth() -> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; @@ -322,8 +322,10 @@ async fn search_tool_is_hidden_for_api_key_auth() -> Result<()> { let tools = tool_names(&body); assert!( !tools.iter().any(|name| name == TOOL_SEARCH_TOOL_NAME), - "tools list should not include {TOOL_SEARCH_TOOL_NAME} for API key auth: {tools:?}" + "tools list should omit {TOOL_SEARCH_TOOL_NAME} when API-key auth leaves no searchable app tools: {tools:?}" ); + assert!(tools.iter().any(|name| name == "spawn_agent")); + assert!(tools.iter().any(|name| name == "wait_agent")); Ok(()) } diff --git a/codex-rs/core/tests/suite/skills.rs b/codex-rs/core/tests/suite/skills.rs index 87b5eb46fc..7056b2471c 100644 --- a/codex-rs/core/tests/suite/skills.rs +++ b/codex-rs/core/tests/suite/skills.rs @@ -240,6 +240,8 @@ async fn list_skills_skips_cwd_roots_when_environment_disabled() -> Result<()> { &config, codex_core::test_support::auth_manager_from_auth(CodexAuth::from_api_key("dummy")), SessionSource::Exec, + /*model_catalog*/ None, + std::collections::HashMap::new(), CollaborationModesConfig::default(), Arc::new(EnvironmentManager::disabled_for_tests( ExecServerRuntimePaths::new( diff --git a/codex-rs/core/tests/suite/spawn_agent_description.rs b/codex-rs/core/tests/suite/spawn_agent_description.rs index 031c3135e8..6164c12b58 100644 --- a/codex-rs/core/tests/suite/spawn_agent_description.rs +++ b/codex-rs/core/tests/suite/spawn_agent_description.rs @@ -29,19 +29,30 @@ use tokio::time::sleep; const SPAWN_AGENT_TOOL_NAME: &str = "spawn_agent"; +fn find_tool_description(tool: &Value, name: &str) -> Option { + if tool.get("name").and_then(Value::as_str) == Some(name) { + return tool + .get("description") + .and_then(Value::as_str) + .map(str::to_string); + } + + tool.get("tools") + .and_then(Value::as_array) + .and_then(|tools| { + tools + .iter() + .find_map(|tool| find_tool_description(tool, name)) + }) +} + fn spawn_agent_description(body: &Value) -> Option { body.get("tools") .and_then(Value::as_array) .and_then(|tools| { - tools.iter().find_map(|tool| { - if tool.get("name").and_then(Value::as_str) == Some(SPAWN_AGENT_TOOL_NAME) { - tool.get("description") - .and_then(Value::as_str) - .map(str::to_string) - } else { - None - } - }) + tools + .iter() + .find_map(|tool| find_tool_description(tool, SPAWN_AGENT_TOOL_NAME)) }) } @@ -55,6 +66,7 @@ fn test_model_info( ) -> ModelInfo { ModelInfo { slug: slug.to_string(), + request_model: None, display_name: display_name.to_string(), description: Some(description.to_string()), default_reasoning_level: Some(default_reasoning_level), diff --git a/codex-rs/core/tests/suite/subagent_notifications.rs b/codex-rs/core/tests/suite/subagent_notifications.rs index 3f457967c1..7a10a87fde 100644 --- a/codex-rs/core/tests/suite/subagent_notifications.rs +++ b/codex-rs/core/tests/suite/subagent_notifications.rs @@ -4,6 +4,14 @@ use codex_core::config::AgentRoleConfig; use codex_features::Feature; use codex_protocol::ThreadId; use codex_protocol::openai_models::ReasoningEffort; +use codex_protocol::protocol::AgentStatus; +use codex_protocol::protocol::AskForApproval; +use codex_protocol::protocol::CollabAgentSpawnBeginEvent; +use codex_protocol::protocol::CollabAgentSpawnEndEvent; +use codex_protocol::protocol::EventMsg; +use codex_protocol::protocol::Op; +use codex_protocol::protocol::SandboxPolicy; +use codex_protocol::user_input::UserInput; use core_test_support::responses::ResponsesRequest; use core_test_support::responses::ev_assistant_message; use core_test_support::responses::ev_completed; @@ -17,7 +25,10 @@ use core_test_support::responses::start_mock_server; use core_test_support::skip_if_no_network; use core_test_support::test_codex::TestCodex; use core_test_support::test_codex::test_codex; +use core_test_support::wait_for_event; +use core_test_support::wait_for_event_match; use pretty_assertions::assert_eq; +use serde_json::Value; use serde_json::json; use std::fs; use std::path::Path; @@ -32,11 +43,17 @@ const TURN_1_PROMPT: &str = "spawn a child and continue"; const TURN_2_NO_WAIT_PROMPT: &str = "follow up without wait"; const CHILD_PROMPT: &str = "child: do work"; const INHERITED_MODEL: &str = "gpt-5.3-codex"; -const INHERITED_REASONING_EFFORT: ReasoningEffort = ReasoningEffort::XHigh; +// Fork-context children inherit the spawning turn's effective effort, which resolves to Medium in +// this test harness even when the selected model is gpt-5.3-codex. +const INHERITED_REASONING_EFFORT: ReasoningEffort = ReasoningEffort::Medium; const REQUESTED_MODEL: &str = "gpt-5.4"; const REQUESTED_REASONING_EFFORT: ReasoningEffort = ReasoningEffort::Low; const ROLE_MODEL: &str = "gpt-5.4"; const ROLE_REASONING_EFFORT: ReasoningEffort = ReasoningEffort::High; +const FALLBACK_MODEL_A: &str = "gpt-5.4-mini"; +const FALLBACK_REASONING_EFFORT_A: ReasoningEffort = ReasoningEffort::Low; +const FALLBACK_MODEL_B: &str = "gpt-5.2"; +const FALLBACK_REASONING_EFFORT_B: ReasoningEffort = ReasoningEffort::Medium; fn body_contains(req: &wiremock::Request, text: &str) -> bool { let is_zstd = req @@ -58,6 +75,57 @@ fn body_contains(req: &wiremock::Request, text: &str) -> bool { .is_some_and(|body| body.contains(text)) } +fn request_uses_model_and_effort( + req: &wiremock::Request, + model: &str, + reasoning_effort: &str, +) -> bool { + let is_zstd = req + .headers + .get("content-encoding") + .and_then(|value| value.to_str().ok()) + .is_some_and(|value| { + value + .split(',') + .any(|entry| entry.trim().eq_ignore_ascii_case("zstd")) + }); + let bytes = if is_zstd { + zstd::stream::decode_all(std::io::Cursor::new(&req.body)).ok() + } else { + Some(req.body.clone()) + }; + bytes + .and_then(|body| serde_json::from_slice::(&body).ok()) + .is_some_and(|body| { + body.get("model").and_then(Value::as_str) == Some(model) + && body + .get("reasoning") + .and_then(|reasoning| reasoning.get("effort")) + .and_then(Value::as_str) + == Some(reasoning_effort) + }) +} + +fn request_uses_model(req: &wiremock::Request, model: &str) -> bool { + let is_zstd = req + .headers + .get("content-encoding") + .and_then(|value| value.to_str().ok()) + .is_some_and(|value| { + value + .split(',') + .any(|entry| entry.trim().eq_ignore_ascii_case("zstd")) + }); + let bytes = if is_zstd { + zstd::stream::decode_all(std::io::Cursor::new(&req.body)).ok() + } else { + Some(req.body.clone()) + }; + bytes + .and_then(|body| serde_json::from_slice::(&body).ok()) + .is_some_and(|body| body.get("model").and_then(Value::as_str) == Some(model)) +} + fn has_subagent_notification(req: &ResponsesRequest) -> bool { req.message_input_texts("user") .iter() @@ -69,23 +137,33 @@ fn tool_parameter_description( tool_name: &str, parameter_name: &str, ) -> Option { + fn find_parameter_description( + tools: &[serde_json::Value], + tool_name: &str, + parameter_name: &str, + ) -> Option { + tools.iter().find_map(|tool| { + if tool.get("name").and_then(serde_json::Value::as_str) == Some(tool_name) { + return tool + .get("parameters") + .and_then(|parameters| parameters.get("properties")) + .and_then(|properties| properties.get(parameter_name)) + .and_then(|parameter| parameter.get("description")) + .and_then(serde_json::Value::as_str) + .map(str::to_owned); + } + tool.get("tools") + .and_then(serde_json::Value::as_array) + .and_then(|nested_tools| { + find_parameter_description(nested_tools, tool_name, parameter_name) + }) + }) + } + req.body_json() .get("tools") .and_then(serde_json::Value::as_array) - .and_then(|tools| { - tools.iter().find_map(|tool| { - if tool.get("name").and_then(serde_json::Value::as_str) == Some(tool_name) { - tool.get("parameters") - .and_then(|parameters| parameters.get("properties")) - .and_then(|properties| properties.get(parameter_name)) - .and_then(|parameter| parameter.get("description")) - .and_then(serde_json::Value::as_str) - .map(str::to_owned) - } else { - None - } - }) - }) + .and_then(|tools| find_parameter_description(tools, tool_name, parameter_name)) } fn role_block(description: &str, role_name: &str) -> Option { @@ -111,7 +189,7 @@ fn write_home_skill(codex_home: &Path, dir: &str, name: &str, description: &str) } async fn wait_for_spawned_thread_id(test: &TestCodex) -> Result { - let deadline = Instant::now() + Duration::from_secs(2); + let deadline = Instant::now() + Duration::from_secs(5); loop { let ids = test.thread_manager.list_thread_ids().await; if let Some(spawned_id) = ids @@ -143,6 +221,87 @@ async fn wait_for_requests( } } +async fn wait_for_matching_requests( + mock: &core_test_support::responses::ResponseMock, + mut predicate: impl FnMut(&ResponsesRequest) -> bool, +) -> Result> { + let deadline = Instant::now() + Duration::from_secs(2); + loop { + let requests = mock + .requests() + .into_iter() + .filter(&mut predicate) + .collect::>(); + if !requests.is_empty() { + return Ok(requests); + } + if Instant::now() >= deadline { + anyhow::bail!( + "expected at least 1 matching request, got {}", + requests.len() + ); + } + sleep(Duration::from_millis(10)).await; + } +} + +async fn submit_turn_and_wait_for_spawn_attempt_events( + test: &TestCodex, + prompt: &str, + expected_attempts: usize, +) -> Result> { + test.codex + .submit(Op::UserTurn { + items: vec![UserInput::Text { + text: prompt.to_string(), + text_elements: Vec::new(), + }], + final_output_json_schema: None, + cwd: test.cwd_path().to_path_buf(), + approval_policy: AskForApproval::Never, + approvals_reviewer: None, + sandbox_policy: SandboxPolicy::DangerFullAccess, + model: test.session_configured.model.clone(), + effort: None, + summary: None, + service_tier: None, + collaboration_mode: None, + personality: None, + permission_profile: None, + environments: None, + }) + .await?; + + let turn_id = wait_for_event_match(&test.codex, |event| match event { + EventMsg::TurnStarted(event) => Some(event.turn_id.clone()), + _ => None, + }) + .await; + let mut spawn_events = Vec::with_capacity(expected_attempts); + let mut pending_begin = None; + loop { + let event = wait_for_event(&test.codex, |_| true).await; + match event { + EventMsg::CollabAgentSpawnBegin(event) => { + pending_begin = Some(event); + } + EventMsg::CollabAgentSpawnEnd(event) => { + let begin_event = pending_begin + .take() + .ok_or_else(|| anyhow::anyhow!("spawn end event without matching begin"))?; + spawn_events.push((begin_event, event)); + } + EventMsg::TurnComplete(event) if event.turn_id == turn_id => break, + _ => {} + } + } + if let Some(begin_event) = pending_begin { + anyhow::bail!("spawn begin event without matching end: {begin_event:?}"); + } + assert_eq!(spawn_events.len(), expected_attempts); + Ok(spawn_events) +} + async fn setup_turn_one_with_spawned_child( server: &MockServer, child_response_delay: Option, @@ -151,6 +310,7 @@ async fn setup_turn_one_with_spawned_child( server, json!({ "message": CHILD_PROMPT, + "fork_context": false, }), child_response_delay, /*wait_for_parent_notification*/ true, @@ -230,15 +390,16 @@ async fn setup_turn_one_with_custom_spawned_child( test.submit_turn(TURN_1_PROMPT).await?; if child_response_delay.is_none() && wait_for_parent_notification { let _ = wait_for_requests(&child_request_log).await?; - let rollout_path = test - .codex - .rollout_path() - .ok_or_else(|| anyhow::anyhow!("expected parent rollout path"))?; + let Some(rollout_path) = test.codex.rollout_path() else { + anyhow::bail!("rollout path"); + }; let deadline = Instant::now() + Duration::from_secs(6); loop { - let has_notification = tokio::fs::read_to_string(&rollout_path) - .await - .is_ok_and(|rollout| rollout.contains("")); + test.codex.ensure_rollout_materialized().await; + let _ = test.codex.flush_rollout().await; + let has_notification = std::fs::read_to_string(&rollout_path) + .ok() + .is_some_and(|rollout| rollout.contains("")); if has_notification { break; } @@ -398,8 +559,7 @@ async fn spawned_child_receives_forked_parent_context() -> Result<()> { } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn spawn_agent_requested_model_and_reasoning_override_inherited_settings_without_role() --> Result<()> { +async fn spawn_agent_inherits_parent_model_and_reasoning_without_role() -> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; @@ -414,10 +574,10 @@ async fn spawn_agent_requested_model_and_reasoning_override_inherited_settings_w ) .await?; - assert_eq!(child_snapshot.model, REQUESTED_MODEL); + assert_eq!(child_snapshot.model, INHERITED_MODEL); assert_eq!( child_snapshot.reasoning_effort, - Some(REQUESTED_REASONING_EFFORT) + Some(INHERITED_REASONING_EFFORT) ); Ok(()) @@ -443,7 +603,7 @@ async fn spawned_multi_agent_v2_child_inherits_parent_developer_context() -> Res ) .await; - let _child_request_log = mount_sse_once_match( + let child_request_log = mount_sse_once_match( &server, |req: &wiremock::Request| body_contains(req, CHILD_PROMPT), sse(vec![ @@ -481,29 +641,9 @@ async fn spawned_multi_agent_v2_child_inherits_parent_developer_context() -> Res test.submit_turn(TURN_1_PROMPT).await?; - let deadline = Instant::now() + Duration::from_secs(2); - let child_request = loop { - if let Some(request) = server - .received_requests() - .await - .unwrap_or_default() - .into_iter() - .find(|request| { - body_contains(request, CHILD_PROMPT) && !body_contains(request, SPAWN_CALL_ID) - }) - { - break request; - } - if Instant::now() >= deadline { - anyhow::bail!("timed out waiting for spawned child request with developer context"); - } - sleep(Duration::from_millis(10)).await; - }; - assert!(body_contains( - &child_request, - "Parent developer instructions." - )); - assert!(body_contains(&child_request, CHILD_PROMPT)); + let child_request = child_request_log.single_request(); + assert!(child_request.body_contains_text("Parent developer instructions.")); + assert!(child_request.body_contains_text(CHILD_PROMPT)); Ok(()) } @@ -528,7 +668,7 @@ async fn skills_toggle_skips_instructions_for_parent_and_spawned_child() -> Resu ) .await; - let _child_request_log = mount_sse_once_match( + let child_request_log = mount_sse_once_match( &server, |req: &wiremock::Request| body_contains(req, CHILD_PROMPT), sse(vec![ @@ -575,32 +715,16 @@ async fn skills_toggle_skips_instructions_for_parent_and_spawned_child() -> Resu assert!(!parent_request.body_contains_text("")); assert!(!parent_request.body_contains_text("demo-skill")); - let deadline = Instant::now() + Duration::from_secs(2); - let child_request = loop { - if let Some(request) = server - .received_requests() - .await - .unwrap_or_default() - .into_iter() - .find(|request| { - body_contains(request, CHILD_PROMPT) && !body_contains(request, SPAWN_CALL_ID) - }) - { - break request; - } - if Instant::now() >= deadline { - anyhow::bail!("timed out waiting for spawned child request"); - } - sleep(Duration::from_millis(10)).await; - }; - assert!(!body_contains(&child_request, "")); - assert!(!body_contains(&child_request, "demo-skill")); + let child_request = child_request_log.single_request(); + assert!(!child_request.body_contains_text("")); + assert!(!child_request.body_contains_text("demo-skill")); Ok(()) } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn spawn_agent_role_overrides_requested_model_and_reasoning_settings() -> Result<()> { +async fn spawn_agent_role_overrides_requested_model_and_reasoning_settings_without_fork_context() +-> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; @@ -611,6 +735,7 @@ async fn spawn_agent_role_overrides_requested_model_and_reasoning_settings() -> "agent_type": "custom", "model": REQUESTED_MODEL, "reasoning_effort": REQUESTED_REASONING_EFFORT, + "fork_context": false, }), |builder| { builder.with_config(|config| { @@ -626,8 +751,11 @@ async fn spawn_agent_role_overrides_requested_model_and_reasoning_settings() -> "custom".to_string(), AgentRoleConfig { description: Some("Custom role".to_string()), + model: None, config_file: Some(role_path.to_path_buf()), + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, ); }) @@ -641,6 +769,224 @@ async fn spawn_agent_role_overrides_requested_model_and_reasoning_settings() -> Ok(()) } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn spawn_agent_model_fallback_list_retries_after_quota_exhaustion() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let spawn_args = serde_json::to_string(&json!({ + "message": CHILD_PROMPT, + "fork_context": false, + "model_fallback_list": [ + { + "model": FALLBACK_MODEL_A, + "reasoning_effort": FALLBACK_REASONING_EFFORT_A, + }, + { + "model": FALLBACK_MODEL_B, + "reasoning_effort": FALLBACK_REASONING_EFFORT_B, + } + ] + }))?; + + mount_sse_once_match( + &server, + |req: &wiremock::Request| body_contains(req, TURN_1_PROMPT), + sse(vec![ + ev_response_created("resp-turn1-1"), + ev_function_call(SPAWN_CALL_ID, "spawn_agent", &spawn_args), + ev_completed("resp-turn1-1"), + ]), + ) + .await; + + let quota_child_attempt = mount_sse_once_match( + &server, + |req: &wiremock::Request| { + body_contains(req, CHILD_PROMPT) + && request_uses_model_and_effort(req, FALLBACK_MODEL_A, "low") + }, + sse(vec![ + ev_response_created("resp-child-quota"), + json!({ + "type": "response.failed", + "response": { + "id": "resp-child-quota", + "error": { + "code": "insufficient_quota", + "message": "You exceeded your current quota, please check your plan and billing details." + } + } + }), + ]), + ) + .await; + + let fallback_child_attempt = mount_sse_once_match( + &server, + |req: &wiremock::Request| { + body_contains(req, CHILD_PROMPT) && request_uses_model(req, FALLBACK_MODEL_B) + }, + sse(vec![ + ev_response_created("resp-child-fallback"), + ev_assistant_message("msg-child-fallback", "child done"), + ev_completed("resp-child-fallback"), + ]), + ) + .await; + + let _turn1_followup = mount_sse_once_match( + &server, + |req: &wiremock::Request| body_contains(req, SPAWN_CALL_ID), + sse(vec![ + ev_response_created("resp-turn1-2"), + ev_assistant_message("msg-turn1-2", "parent done"), + ev_completed("resp-turn1-2"), + ]), + ) + .await; + + let mut builder = test_codex().with_config(|config| { + config + .features + .enable(Feature::Collab) + .expect("test config should allow feature update"); + config.model = Some(INHERITED_MODEL.to_string()); + config.model_reasoning_effort = Some(INHERITED_REASONING_EFFORT); + }); + let test = builder.build(&server).await?; + + let spawn_events = submit_turn_and_wait_for_spawn_attempt_events( + &test, + TURN_1_PROMPT, + /*expected_attempts*/ 2, + ) + .await?; + + let (quota_begin_event, quota_end_event) = &spawn_events[0]; + assert_eq!(quota_begin_event.call_id, SPAWN_CALL_ID); + assert_eq!(quota_begin_event.prompt, CHILD_PROMPT); + assert_eq!(quota_begin_event.model, FALLBACK_MODEL_A); + assert_eq!( + quota_begin_event.reasoning_effort, + FALLBACK_REASONING_EFFORT_A + ); + assert_eq!(quota_end_event.call_id, SPAWN_CALL_ID); + assert_eq!(quota_end_event.new_thread_id, None); + assert_eq!(quota_end_event.new_agent_nickname, None); + assert_eq!(quota_end_event.new_agent_role, None); + assert_eq!(quota_end_event.prompt, CHILD_PROMPT); + assert_eq!(quota_end_event.model, FALLBACK_MODEL_A); + assert_eq!( + quota_end_event.reasoning_effort, + FALLBACK_REASONING_EFFORT_A + ); + match "a_end_event.status { + AgentStatus::PendingInit => {} + AgentStatus::Errored(message) if message.to_lowercase().contains("quota") => {} + status => panic!("unexpected first-attempt retry status: {status:?}"), + } + + let (fallback_begin_event, fallback_end_event) = &spawn_events[1]; + assert_eq!(fallback_begin_event.call_id, format!("{SPAWN_CALL_ID}#2")); + assert_eq!(fallback_begin_event.prompt, CHILD_PROMPT); + assert_eq!(fallback_begin_event.model, FALLBACK_MODEL_B); + assert_eq!( + fallback_begin_event.reasoning_effort, + FALLBACK_REASONING_EFFORT_B + ); + assert_eq!(fallback_end_event.call_id, format!("{SPAWN_CALL_ID}#2")); + assert_eq!(fallback_end_event.prompt, CHILD_PROMPT); + assert_eq!(fallback_end_event.model, FALLBACK_MODEL_B); + assert_eq!( + fallback_end_event.reasoning_effort, + FALLBACK_REASONING_EFFORT_B + ); + + let quota_requests = quota_child_attempt + .requests() + .into_iter() + .filter(|request| { + request.body_json().get("model").and_then(Value::as_str) == Some(FALLBACK_MODEL_A) + }) + .collect::>(); + assert!(!quota_requests.is_empty()); + for quota_request in "a_requests { + let body = quota_request.body_json(); + assert_eq!( + body.get("model").and_then(Value::as_str), + Some(FALLBACK_MODEL_A) + ); + assert_eq!( + body.get("reasoning") + .and_then(|reasoning| reasoning.get("effort")) + .and_then(Value::as_str), + Some("low") + ); + } + + let fallback_requests = wait_for_matching_requests(&fallback_child_attempt, |request| { + request.body_json().get("model").and_then(Value::as_str) == Some(FALLBACK_MODEL_B) + }) + .await?; + assert!(!fallback_requests.is_empty()); + for fallback_request in &fallback_requests { + let fallback_body = fallback_request.body_json(); + assert_eq!( + fallback_body.get("model").and_then(Value::as_str), + Some(FALLBACK_MODEL_B) + ); + if let Some(effort) = fallback_body + .get("reasoning") + .and_then(|reasoning| reasoning.get("effort")) + .and_then(Value::as_str) + { + assert_eq!(effort, "medium"); + } + } + + let deadline = Instant::now() + Duration::from_secs(2); + let child_snapshot = loop { + let spawned_ids = test + .thread_manager + .list_thread_ids() + .await + .into_iter() + .filter(|id| *id != test.session_configured.session_id) + .collect::>(); + let mut matching_snapshot = None; + for thread_id in spawned_ids { + let snapshot = test + .thread_manager + .get_thread(thread_id) + .await? + .config_snapshot() + .await; + if snapshot.model == FALLBACK_MODEL_B + && snapshot.reasoning_effort == Some(FALLBACK_REASONING_EFFORT_B) + { + matching_snapshot = Some(snapshot); + break; + } + } + if let Some(snapshot) = matching_snapshot { + break snapshot; + } + if Instant::now() >= deadline { + anyhow::bail!("timed out waiting for fallback child snapshot"); + } + sleep(Duration::from_millis(10)).await; + }; + + assert_eq!(child_snapshot.model, FALLBACK_MODEL_B); + assert_eq!( + child_snapshot.reasoning_effort, + Some(FALLBACK_REASONING_EFFORT_B) + ); + + Ok(()) +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn spawn_agent_tool_description_mentions_role_locked_settings() -> Result<()> { skip_if_no_network!(Ok(())); @@ -674,8 +1020,11 @@ async fn spawn_agent_tool_description_mentions_role_locked_settings() -> Result< "custom".to_string(), AgentRoleConfig { description: Some("Custom role".to_string()), + model: None, config_file: Some(role_path.to_path_buf()), + watchdog_interval_s: None, nickname_candidates: None, + fork_context: None, }, ); }); diff --git a/codex-rs/core/tests/suite/tool_suggest.rs b/codex-rs/core/tests/suite/tool_suggest.rs index 6cb19d01a5..d6a89fd2cf 100644 --- a/codex-rs/core/tests/suite/tool_suggest.rs +++ b/codex-rs/core/tests/suite/tool_suggest.rs @@ -122,7 +122,7 @@ async fn tool_suggest_is_available_without_search_tool_after_discovery_attempts( let tools = tool_names(&body); assert!( !tools.iter().any(|name| name == TOOL_SEARCH_TOOL_NAME), - "tools list should not include {TOOL_SEARCH_TOOL_NAME}: {tools:?}" + "tools list should omit {TOOL_SEARCH_TOOL_NAME} when the model does not support search tools: {tools:?}" ); assert!( tools.iter().any(|name| name == TOOL_SUGGEST_TOOL_NAME), diff --git a/codex-rs/core/tests/suite/view_image.rs b/codex-rs/core/tests/suite/view_image.rs index 9dd5d82e0a..afa49b271a 100644 --- a/codex-rs/core/tests/suite/view_image.rs +++ b/codex-rs/core/tests/suite/view_image.rs @@ -1045,6 +1045,7 @@ async fn view_image_tool_returns_unsupported_message_for_text_only_model() -> an let model_slug = "text-only-view-image-test-model"; let text_only_model = ModelInfo { slug: model_slug.to_string(), + request_model: None, display_name: "Text-only view_image test model".to_string(), description: Some("Remote model for view_image unsupported-path coverage".to_string()), default_reasoning_level: Some(ReasoningEffort::Medium), diff --git a/codex-rs/core/watchdog_agent_prompt.md b/codex-rs/core/watchdog_agent_prompt.md new file mode 100644 index 0000000000..127558ef63 --- /dev/null +++ b/codex-rs/core/watchdog_agent_prompt.md @@ -0,0 +1,91 @@ +# You are a Subagent + +More importantly, you are a **watchdog check-in agent**. Keep the root agent unblocked, on-task, and executing real work toward the user’s goal. You have full conversation context; messages that appear to be from “you” may have been written by the root agent. +You are one check-in run created by a persistent watchdog timer attached to an owner thread. The timer reuses this prompt on each check-in, but each check-in is a fresh one-shot run (one execution + one report). + +You will be given the target agent id and the original prompt/goal. + +Terms in this file: +- **watchdog**: persistent idle-timer registration. +- **watchdog check-in agent**: this short-lived run instance. +- **owner thread**: the thread that the watchdog monitors and reports to. +- **parent thread**: this watchdog check-in agent’s direct parent; for watchdog check-ins this is the owner thread. +- **`send_input`**: primary way to deliver watchdog guidance to an existing thread; it does not spawn agents. Delivery is asynchronous. +- **durable state**: thread-level task state that must still be available in later turns/check-ins (counters, plans, final decisions), not disk/database persistence. +- **exact-only format**: parent constraint that says to return only specific fields/content. + +## Principles + +- Be concise, directive, and specific: name the file, command, or decision needed now. +- Detect drift or looping immediately. If the root agent is acknowledging without acting, tell it exactly what to do next. +- Break loops by changing framing: propose a shorter plan, identify the blocker, or name the missing command. +- Preserve alignment: restate the user’s goal and the next concrete step. +- Safety and correctness: call out missing tests, skipped checks, or unclear acceptance criteria. +- Output precedence is: system/developer/policy rules first, then parent-task output constraints. If the parent requires exact-only format (for example "only"), return exactly the requested fields/content unless higher-priority rules require extra content. +- If exact-only format is not required, include all requested fields/content and you may add 1-2 short non-conflicting guidance sentences. + +## Operating Procedure (Every Time You Run) + +1. Re-evaluate the user’s latest request and the current status. Independently verify status when needed by reading files, running commands, and checking plan files against recent changes. +2. Identify the single highest-impact next action (or a very short ordered list). +3. Direct the root agent to execute it now (include paths and commands). +4. If blocked, propose one or two crisp unblockers. +5. If the goal appears complete, say so and direct the root agent to close unneeded agents. + +Tone: direct, actionable, minimally polite. Optimize for progress over narration. + +## Detect Looping and Reward Hacking + +The root agent may slip into patterns that look like progress but are not. Interrupt those patterns. + +Watch for: + +- Tests that always pass (tautologies, `assert!(true)`, mocks that cannot fail). +- Marking items complete with only stub implementations. +- "Fixes" that comment out failing tests or code without addressing root causes. +- Claiming success without running required format/lint/tests. +- Ignoring explicit user requirements in favor of quicker but incomplete shortcuts. + +When you detect these, prescribe the corrective action explicitly. + +## Multi-Agent Tools (Upstream Surface) + +Use only the multi-agent tools that exist here: + +- `spawn_agent` (prefer `fork_context = true` when shared context matters). +- `send_input`. +- `tool_search` to discover deferred watchdog-only tools in the `watchdog` namespace. +- `watchdog.compact_parent_context` (watchdog-only recovery tool; see below). +- `watchdog.watchdog_self_close` (watchdog-only immediate exit tool; see below). +- `wait`. +- `close_agent`. + +There is no cancel tool. Use `watchdog.watchdog_self_close` to stop this watchdog check-in thread when its job is complete; use `close_agent` to stop subagents that are done or no longer needed. + +When recommending watchdogs to the root agent, keep `agent_type` at the default. + +Important: send watchdog check-in output with `send_input` to the owner/parent thread. A plain assistant message in your own watchdog check-in thread is not a reliable delivery path to the owner. + +Each watchdog check-in runs in a fresh one-shot watchdog check-in agent with no guaranteed continuity across check-ins. Do not keep durable state in watchdog-check-in-agent local memory/files; treat local state as run-local only. Ask the parent to track durable state, and use `send_input` (without `id`, or `id = "parent"`/`"root"`) to report results. + +`send_input` is the primary path for watchdog delivery to parent/owner. If a watchdog check-in agent finishes without `send_input`, runtime forwards one final multi-agent inbox message as the mandatory fallback wake-up path for the owner. Exiting without either `send_input` or a final message is a bug. + +For token protocols (for example `ping N` / `pong N`), treat those as literal text counters, not shell commands. Do not call command-execution tools unless the prompt explicitly asks you to execute commands. + +## Parent Recovery via Context Compaction + +`watchdog.compact_parent_context` asks the system to abbreviate/compact redundant parent-thread context so the parent can recover from loops. + +Use it only as a last resort: + +- The parent has been repeatedly non-responsive across multiple watchdog check-ins. +- The parent is taking no meaningful actions (no concrete commands/edits/tests) and making no progress. +- You already sent at least one direct corrective instruction with `send_input`, and it was ignored. + +`watchdog.watchdog_self_close` asks the runtime to end the current watchdog check-in thread immediately. Use it only after reporting status and when the check-in has no remaining work, to avoid idle watchdog loops. + +Do not call `watchdog.compact_parent_context` for routine nudges or normal delays. Prefer precise `send_input` guidance first. + +## Style + +Be explicit when precision matters. Your job is to drive real progress toward the user’s goal. diff --git a/codex-rs/exec/src/cli.rs b/codex-rs/exec/src/cli.rs index 2b12898c3c..8006e677ed 100644 --- a/codex-rs/exec/src/cli.rs +++ b/codex-rs/exec/src/cli.rs @@ -16,6 +16,12 @@ pub struct Cli { #[command(subcommand)] pub command: Option, + /// Fork from an existing session id (or thread name) before sending the prompt. + /// + /// This creates a new session with copied history, similar to `codex fork`. + #[arg(long = "fork", value_name = "SESSION_ID")] + pub fork_session_id: Option, + #[clap(flatten)] pub shared: ExecSharedCliOptions, @@ -156,7 +162,18 @@ fn mark_exec_global_args(cmd: clap::Command) -> clap::Command { arg.global(true) }) } +impl Cli { + pub fn validate(self) -> Result { + if self.fork_session_id.is_some() && self.command.is_some() { + return Err(clap::Error::raw( + clap::error::ErrorKind::ArgumentConflict, + "--fork cannot be used with subcommands", + )); + } + Ok(self) + } +} #[derive(Debug, clap::Subcommand)] pub enum Command { /// Resume a previous session by id or pick the most recent with --last. diff --git a/codex-rs/exec/src/cli_tests.rs b/codex-rs/exec/src/cli_tests.rs index 45f2aa330d..9d785ddc28 100644 --- a/codex-rs/exec/src/cli_tests.rs +++ b/codex-rs/exec/src/cli_tests.rs @@ -80,3 +80,22 @@ fn removed_full_auto_flag_reports_migration_path() { Some("warning: `--full-auto` is deprecated; use `--sandbox workspace-write` instead.") ); } + +#[test] +fn fork_option_parses_prompt() { + const PROMPT: &str = "echo fork-non-interactive"; + let cli = Cli::parse_from(["codex-exec", "--fork", "session-123", "--json", PROMPT]); + + assert_eq!(cli.fork_session_id.as_deref(), Some("session-123")); + assert_eq!(cli.prompt.as_deref(), Some(PROMPT)); + assert!(cli.command.is_none()); +} + +#[test] +fn fork_option_conflicts_with_subcommands() { + let err = Cli::try_parse_from(["codex-exec", "--fork", "session-123", "resume"]) + .and_then(Cli::validate) + .expect_err("fork should conflict with subcommands"); + + assert_eq!(err.kind(), clap::error::ErrorKind::ArgumentConflict); +} diff --git a/codex-rs/exec/src/lib.rs b/codex-rs/exec/src/lib.rs index 7846536206..b083645432 100644 --- a/codex-rs/exec/src/lib.rs +++ b/codex-rs/exec/src/lib.rs @@ -32,6 +32,8 @@ use codex_app_server_protocol::ReviewTarget as ApiReviewTarget; use codex_app_server_protocol::ServerNotification; use codex_app_server_protocol::ServerRequest; use codex_app_server_protocol::Thread as AppServerThread; +use codex_app_server_protocol::ThreadForkParams; +use codex_app_server_protocol::ThreadForkResponse; use codex_app_server_protocol::ThreadItem as AppServerThreadItem; use codex_app_server_protocol::ThreadListParams; use codex_app_server_protocol::ThreadListResponse; @@ -195,6 +197,7 @@ struct ExecRunArgs { config: Config, dangerously_bypass_approvals_and_sandbox: bool, exec_span: tracing::Span, + fork_session_id: Option, images: Vec, json_mode: bool, last_message_file: Option, @@ -227,6 +230,7 @@ pub async fn run_main(cli: Cli, arg0_paths: Arg0DispatchPaths) -> anyhow::Result let Cli { command, + fork_session_id, shared, skip_git_repo_check, ephemeral, @@ -525,6 +529,7 @@ pub async fn run_main(cli: Cli, arg0_paths: Arg0DispatchPaths) -> anyhow::Result config, dangerously_bypass_approvals_and_sandbox, exec_span: exec_span.clone(), + fork_session_id, images, json_mode, last_message_file, @@ -546,6 +551,7 @@ async fn run_exec_session(args: ExecRunArgs) -> anyhow::Result<()> { config, dangerously_bypass_approvals_and_sandbox, exec_span, + fork_session_id, images, json_mode, last_message_file, @@ -663,10 +669,10 @@ async fn run_exec_session(args: ExecRunArgs) -> anyhow::Result<()> { anyhow::anyhow!("failed to initialize in-process app-server client: {err}") })?; - // Handle resume subcommand through existing `thread/list` + `thread/resume` - // APIs so exec no longer reaches into rollout storage directly. - let (primary_thread_id, fallback_session_configured) = - if let Some(ExecCommand::Resume(args)) = command.as_ref() { + // Handle resume/fork/start through app-server APIs so exec no longer reaches into + // rollout storage directly for normal bootstrap. + let (primary_thread_id, fallback_session_configured) = match command.as_ref() { + Some(ExecCommand::Resume(args)) => { if let Some(thread_id) = resolve_resume_thread_id(&client, &config, args).await? { let response: ThreadResumeResponse = send_request_with_response( &client, @@ -696,22 +702,41 @@ async fn run_exec_session(args: ExecRunArgs) -> anyhow::Result<()> { .map_err(anyhow::Error::msg)?; (session_configured.session_id, session_configured) } - } else { - let response: ThreadStartResponse = send_request_with_response( - &client, - ClientRequest::ThreadStart { - request_id: request_ids.next(), - params: thread_start_params_from_config(&config), - }, - "thread/start", - ) - .await - .map_err(anyhow::Error::msg)?; - let session_configured = session_configured_from_thread_start_response(&response) + } + Some(ExecCommand::Review(_)) | None => { + if let Some(session_id) = fork_session_id.as_deref() { + let response: ThreadForkResponse = send_request_with_response( + &client, + ClientRequest::ThreadFork { + request_id: request_ids.next(), + params: thread_fork_params_from_config( + &config, session_id, /*path*/ None, + ), + }, + "thread/fork", + ) + .await .map_err(anyhow::Error::msg)?; - (session_configured.session_id, session_configured) - }; - + let session_configured = session_configured_from_thread_fork_response(&response) + .map_err(anyhow::Error::msg)?; + (session_configured.session_id, session_configured) + } else { + let response: ThreadStartResponse = send_request_with_response( + &client, + ClientRequest::ThreadStart { + request_id: request_ids.next(), + params: thread_start_params_from_config(&config), + }, + "thread/start", + ) + .await + .map_err(anyhow::Error::msg)?; + let session_configured = session_configured_from_thread_start_response(&response) + .map_err(anyhow::Error::msg)?; + (session_configured.session_id, session_configured) + } + } + }; let primary_thread_id_for_span = primary_thread_id.to_string(); // Use the start/resume response as the authoritative bootstrap payload. // Waiting for a later streamed `SessionConfigured` event adds up to 10s of @@ -958,6 +983,26 @@ fn approvals_reviewer_override_from_config( Some(config.approvals_reviewer.into()) } +fn thread_fork_params_from_config( + config: &Config, + thread_id: &str, + path: Option, +) -> ThreadForkParams { + ThreadForkParams { + thread_id: thread_id.to_string(), + path, + model: config.model.clone(), + model_provider: Some(config.model_provider_id.clone()), + cwd: Some(config.cwd.to_string_lossy().to_string()), + approval_policy: Some(config.permissions.approval_policy.value().into()), + approvals_reviewer: approvals_reviewer_override_from_config(config), + sandbox: None, + permission_profile: Some(config.permissions.permission_profile().into()), + config: config_request_overrides_from_config(config), + ..ThreadForkParams::default() + } +} + async fn send_request_with_response( client: &InProcessAppServerClient, request: ClientRequest, @@ -1013,6 +1058,25 @@ fn session_configured_from_thread_resume_response( ) } +fn session_configured_from_thread_fork_response( + response: &ThreadForkResponse, +) -> Result { + session_configured_from_thread_response( + &response.thread.id, + response.thread.name.clone(), + response.thread.path.clone(), + response.model.clone(), + response.model_provider.clone(), + response.service_tier, + response.approval_policy.to_core(), + response.approvals_reviewer.to_core(), + response.sandbox.to_core(), + response.permission_profile.clone().map(Into::into), + response.cwd.clone(), + response.reasoning_effort, + ) +} + fn review_target_to_api(target: ReviewTarget) -> ApiReviewTarget { match target { ReviewTarget::UncommittedChanges => ApiReviewTarget::UncommittedChanges, diff --git a/codex-rs/exec/src/main.rs b/codex-rs/exec/src/main.rs index 79a681b146..f820319ce7 100644 --- a/codex-rs/exec/src/main.rs +++ b/codex-rs/exec/src/main.rs @@ -29,7 +29,10 @@ fn main() -> anyhow::Result<()> { arg0_dispatch_or_else(|arg0_paths: Arg0DispatchPaths| async move { let top_cli = TopCli::parse(); // Merge root-level overrides into inner CLI struct so downstream logic remains unchanged. - let mut inner = top_cli.inner; + let mut inner = match top_cli.inner.validate() { + Ok(inner) => inner, + Err(err) => err.exit(), + }; inner .config_overrides .raw_overrides diff --git a/codex-rs/exec/src/main_tests.rs b/codex-rs/exec/src/main_tests.rs index a9cb0ec633..f99b7ed251 100644 --- a/codex-rs/exec/src/main_tests.rs +++ b/codex-rs/exec/src/main_tests.rs @@ -35,3 +35,24 @@ fn top_cli_parses_resume_prompt_after_config_flag() { "reasoning_level=xhigh" ); } + +#[test] +fn top_cli_parses_fork_option_with_root_config() { + let cli = TopCli::parse_from([ + "codex-exec", + "--config", + "reasoning_level=xhigh", + "--fork", + "session-123", + "echo fork", + ]); + + assert_eq!(cli.inner.fork_session_id.as_deref(), Some("session-123")); + assert!(cli.inner.command.is_none()); + assert_eq!(cli.inner.prompt.as_deref(), Some("echo fork")); + assert_eq!(cli.config_overrides.raw_overrides.len(), 1); + assert_eq!( + cli.config_overrides.raw_overrides[0], + "reasoning_level=xhigh" + ); +} diff --git a/codex-rs/exec/tests/suite/fork.rs b/codex-rs/exec/tests/suite/fork.rs new file mode 100644 index 0000000000..a180364b09 --- /dev/null +++ b/codex-rs/exec/tests/suite/fork.rs @@ -0,0 +1,162 @@ +#![allow(clippy::unwrap_used, clippy::expect_used)] + +use anyhow::Context; +use codex_utils_cargo_bin::find_resource; +use core_test_support::test_codex_exec::test_codex_exec; +use serde_json::Value; +use std::string::ToString; +use uuid::Uuid; +use walkdir::WalkDir; + +/// Utility: scan the sessions dir for a rollout file that contains `marker` +/// in any response_item.message.content entry. Returns the absolute path. +fn find_session_file_containing_marker( + sessions_dir: &std::path::Path, + marker: &str, +) -> Option { + for entry in WalkDir::new(sessions_dir) { + let entry = match entry { + Ok(e) => e, + Err(_) => continue, + }; + if !entry.file_type().is_file() { + continue; + } + if !entry.file_name().to_string_lossy().ends_with(".jsonl") { + continue; + } + let path = entry.path(); + let Ok(content) = std::fs::read_to_string(path) else { + continue; + }; + // Skip the first meta line and scan remaining JSONL entries. + let mut lines = content.lines(); + if lines.next().is_none() { + continue; + } + for line in lines { + if line.trim().is_empty() { + continue; + } + let Ok(item): Result = serde_json::from_str(line) else { + continue; + }; + if item.get("type").and_then(|t| t.as_str()) == Some("response_item") + && let Some(payload) = item.get("payload") + && payload.get("type").and_then(|t| t.as_str()) == Some("message") + && payload + .get("content") + .map(ToString::to_string) + .unwrap_or_default() + .contains(marker) + { + return Some(path.to_path_buf()); + } + } + } + None +} + +/// Extract the conversation UUID from the first SessionMeta line in the rollout file. +fn extract_conversation_id(path: &std::path::Path) -> String { + let content = std::fs::read_to_string(path).unwrap(); + let mut lines = content.lines(); + let meta_line = lines.next().expect("missing meta line"); + let meta: Value = serde_json::from_str(meta_line).expect("invalid meta json"); + meta.get("payload") + .and_then(|p| p.get("id")) + .and_then(|v| v.as_str()) + .unwrap_or_default() + .to_string() +} + +fn extract_forked_from_id(path: &std::path::Path) -> Option { + let content = std::fs::read_to_string(path).unwrap(); + let mut lines = content.lines(); + let meta_line = lines.next().expect("missing meta line"); + let meta: Value = serde_json::from_str(meta_line).expect("invalid meta json"); + meta.get("payload") + .and_then(|payload| payload.get("forked_from_id")) + .and_then(Value::as_str) + .map(ToString::to_string) +} + +fn rollout_contains_fork_reference(path: &std::path::Path) -> bool { + let Ok(content) = std::fs::read_to_string(path) else { + return false; + }; + content.lines().skip(1).any(|line| { + serde_json::from_str::(line) + .ok() + .and_then(|item| item.get("type").and_then(Value::as_str).map(str::to_string)) + .as_deref() + == Some("fork_reference") + }) +} + +fn exec_fixture() -> anyhow::Result { + Ok(find_resource!("tests/fixtures/cli_responses_fixture.sse")?) +} + +#[test] +fn exec_fork_by_id_creates_new_session_with_copied_history() -> anyhow::Result<()> { + let test = test_codex_exec(); + let fixture = exec_fixture()?; + + let marker = format!("fork-base-{}", Uuid::new_v4()); + let prompt = format!("echo {marker}"); + + test.cmd() + .env("CODEX_RS_SSE_FIXTURE", &fixture) + .env("OPENAI_BASE_URL", "http://unused.local") + .arg("--skip-git-repo-check") + .arg(&prompt) + .assert() + .success(); + + let sessions_dir = test.home_path().join("sessions"); + let original_path = find_session_file_containing_marker(&sessions_dir, &marker) + .context("no session file found after first run")?; + let session_id = extract_conversation_id(&original_path); + + let marker2 = format!("fork-follow-up-{}", Uuid::new_v4()); + let prompt2 = format!("echo {marker2}"); + + test.cmd() + .env("CODEX_RS_SSE_FIXTURE", &fixture) + .env("OPENAI_BASE_URL", "http://unused.local") + .arg("--skip-git-repo-check") + .arg("--fork") + .arg(&session_id) + .arg(&prompt2) + .assert() + .success(); + + let forked_path = find_session_file_containing_marker(&sessions_dir, &marker2) + .context("no forked session file found for second marker")?; + + assert_ne!( + forked_path, original_path, + "fork should create a new session file" + ); + + let forked_content = std::fs::read_to_string(&forked_path)?; + assert_eq!( + extract_forked_from_id(&forked_path).as_deref(), + Some(session_id.as_str()) + ); + assert!( + forked_content.contains(&marker) || rollout_contains_fork_reference(&forked_path), + "forked rollout should either inline parent history or record a fork reference" + ); + assert!(forked_content.contains(&marker2)); + + let original_content = std::fs::read_to_string(&original_path)?; + assert!(original_content.contains(&marker)); + assert!( + !original_content.contains(&marker2), + "original session should not receive the forked prompt" + ); + + Ok(()) +} diff --git a/codex-rs/exec/tests/suite/mod.rs b/codex-rs/exec/tests/suite/mod.rs index c6fa0f9fde..5513badc37 100644 --- a/codex-rs/exec/tests/suite/mod.rs +++ b/codex-rs/exec/tests/suite/mod.rs @@ -3,6 +3,7 @@ mod add_dir; mod apply_patch; mod auth_env; mod ephemeral; +mod fork; mod mcp_required_exit; mod originator; mod output_schema; diff --git a/codex-rs/exec/tests/suite/resume.rs b/codex-rs/exec/tests/suite/resume.rs index cfaa7aa81a..fccc98f0bb 100644 --- a/codex-rs/exec/tests/suite/resume.rs +++ b/codex-rs/exec/tests/suite/resume.rs @@ -145,9 +145,9 @@ fn exec_resume_last_appends_to_existing_file() -> anyhow::Result<()> { .arg("--skip-git-repo-check") .arg("-C") .arg(&repo_root) - .arg(&prompt2) .arg("resume") .arg("--last") + .arg(&prompt2) .assert() .success(); diff --git a/codex-rs/features/src/lib.rs b/codex-rs/features/src/lib.rs index d997cc771f..9fc287e8fb 100644 --- a/codex-rs/features/src/lib.rs +++ b/codex-rs/features/src/lib.rs @@ -145,6 +145,12 @@ pub enum Feature { MultiAgentV2, /// Enable CSV-backed agent job tools. SpawnCsv, + /// Deliver inbound agent messages via a synthetic function-call inbox envelope. + AgentFunctionCallInbox, + /// Enable prepending agent-specific developer instructions for agent sessions. + AgentPromptInjection, + /// Enable watchdog spawning and watchdog-only agent tools. + AgentWatchdog, /// Enable apps. Apps, /// Enable MCP apps. @@ -830,12 +836,30 @@ pub const FEATURES: &[FeatureSpec] = &[ stage: Stage::UnderDevelopment, default_enabled: false, }, + FeatureSpec { + id: Feature::AgentFunctionCallInbox, + key: "agent_function_call_inbox", + stage: Stage::UnderDevelopment, + default_enabled: false, + }, FeatureSpec { id: Feature::SpawnCsv, key: "enable_fanout", stage: Stage::UnderDevelopment, default_enabled: false, }, + FeatureSpec { + id: Feature::AgentPromptInjection, + key: "agent_prompt_injection", + stage: Stage::UnderDevelopment, + default_enabled: false, + }, + FeatureSpec { + id: Feature::AgentWatchdog, + key: "agent_watchdog", + stage: Stage::UnderDevelopment, + default_enabled: false, + }, FeatureSpec { id: Feature::Apps, key: "apps", @@ -1102,3 +1126,137 @@ pub fn unstable_features_warning_event( #[cfg(test)] mod tests; +#[cfg(test)] +mod inbox_feature_tests { + use super::*; + + use pretty_assertions::assert_eq; + + #[test] + fn under_development_features_are_disabled_by_default() { + for spec in FEATURES { + if matches!(spec.stage, Stage::UnderDevelopment) { + assert_eq!( + spec.default_enabled, false, + "feature `{}` is under development and must be disabled by default", + spec.key + ); + } + } + } + + #[test] + fn default_enabled_features_are_stable() { + for spec in FEATURES { + if spec.default_enabled { + assert!( + matches!(spec.stage, Stage::Stable | Stage::Removed), + "feature `{}` is enabled by default but is not stable/removed ({:?})", + spec.key, + spec.stage + ); + } + } + } + + #[test] + fn use_linux_sandbox_bwrap_is_removed() { + assert_eq!(Feature::UseLinuxSandboxBwrap.stage(), Stage::Removed); + assert_eq!(Feature::UseLinuxSandboxBwrap.default_enabled(), false); + } + + #[test] + fn js_repl_is_experimental_and_user_toggleable() { + let spec = Feature::JsRepl.info(); + let stage = spec.stage; + let expected_node_version = include_str!("../../node-version.txt").trim_end(); + + assert!(matches!(stage, Stage::Experimental { .. })); + assert_eq!(stage.experimental_menu_name(), Some("JavaScript REPL")); + assert_eq!( + stage.experimental_menu_description().map(str::to_owned), + Some(format!( + "Enable a persistent Node-backed JavaScript REPL for interactive website debugging and other inline JavaScript execution capabilities. Requires Node >= v{expected_node_version} installed." + )) + ); + assert_eq!(Feature::JsRepl.default_enabled(), false); + } + + #[test] + fn guardian_approval_is_experimental_and_user_toggleable() { + let spec = Feature::GuardianApproval.info(); + let stage = spec.stage; + + assert!(matches!(stage, Stage::Experimental { .. })); + assert_eq!(stage.experimental_menu_name(), Some("Guardian Approvals")); + assert_eq!( + stage.experimental_menu_description().map(str::to_owned), + Some( + "When Codex needs approval for higher-risk actions (e.g. sandbox escapes or blocked network access), route eligible approval requests to a carefully-prompted security reviewer subagent rather than blocking the agent on your input. This can consume significantly more tokens because it runs a subagent on every approval request.".to_string() + ) + ); + assert_eq!(stage.experimental_announcement(), None); + assert_eq!(Feature::GuardianApproval.default_enabled(), false); + } + + #[test] + fn request_permissions_tool_is_under_development() { + assert_eq!( + Feature::RequestPermissionsTool.stage(), + Stage::UnderDevelopment + ); + assert_eq!(Feature::RequestPermissionsTool.default_enabled(), false); + } + + #[test] + fn image_generation_is_under_development() { + assert_eq!(Feature::ImageGeneration.stage(), Stage::UnderDevelopment); + assert_eq!(Feature::ImageGeneration.default_enabled(), false); + } + + #[test] + fn collab_is_legacy_alias_for_multi_agent() { + assert_eq!(feature_for_key("multi_agent"), Some(Feature::Collab)); + assert_eq!(feature_for_key("collab"), Some(Feature::Collab)); + } + + #[test] + fn apps_require_feature_flag_and_chatgpt_auth() { + let mut features = Features::with_defaults(); + assert!(!features.apps_enabled_for_auth(/*auth*/ None)); + + features.enable(Feature::Apps); + assert!(!features.apps_enabled_for_auth(/*auth*/ None)); + + let api_key_auth = CodexAuth::from_api_key("test-api-key"); + assert!(!features.apps_enabled_for_auth(Some(&api_key_auth))); + + let chatgpt_auth = CodexAuth::create_dummy_chatgpt_auth_for_testing(); + assert!(features.apps_enabled_for_auth(Some(&chatgpt_auth))); + } + + #[test] + fn agent_function_call_inbox_is_under_development() { + assert_eq!( + Feature::AgentFunctionCallInbox.stage(), + Stage::UnderDevelopment + ); + assert_eq!(Feature::AgentFunctionCallInbox.default_enabled(), false); + assert_eq!( + feature_for_key("agent_function_call_inbox"), + Some(Feature::AgentFunctionCallInbox) + ); + } + + #[test] + fn agent_prompt_and_watchdog_features_use_canonical_keys() { + assert_eq!( + feature_for_key("agent_prompt_injection"), + Some(Feature::AgentPromptInjection) + ); + assert_eq!( + feature_for_key("agent_watchdog"), + Some(Feature::AgentWatchdog) + ); + } +} diff --git a/codex-rs/mcp-server/src/message_processor.rs b/codex-rs/mcp-server/src/message_processor.rs index 32d05d1f93..ab4c75fe88 100644 --- a/codex-rs/mcp-server/src/message_processor.rs +++ b/codex-rs/mcp-server/src/message_processor.rs @@ -65,6 +65,8 @@ impl MessageProcessor { config.as_ref(), auth_manager, SessionSource::Mcp, + config.model_catalog.clone(), + config.custom_models.clone(), CollaborationModesConfig { default_mode_request_user_input: config .features diff --git a/codex-rs/mcp-server/tests/suite/codex_tool.rs b/codex-rs/mcp-server/tests/suite/codex_tool.rs index 323d07264d..57ca890cbf 100644 --- a/codex-rs/mcp-server/tests/suite/codex_tool.rs +++ b/codex-rs/mcp-server/tests/suite/codex_tool.rs @@ -434,7 +434,9 @@ async fn codex_tool_passes_base_instructions() -> anyhow::Result<()> { "expected permissions developer message, got {developer_contents:?}" ); assert!( - developer_contents.contains(&"Foreshadow upcoming tool calls."), + developer_contents + .iter() + .any(|content| content.contains("Foreshadow upcoming tool calls.")), "expected developer instructions in developer messages, got {developer_contents:?}" ); diff --git a/codex-rs/model-provider/src/amazon_bedrock/catalog.rs b/codex-rs/model-provider/src/amazon_bedrock/catalog.rs index 4ca2cb891e..257c0a18f0 100644 --- a/codex-rs/model-provider/src/amazon_bedrock/catalog.rs +++ b/codex-rs/model-provider/src/amazon_bedrock/catalog.rs @@ -38,6 +38,7 @@ pub(crate) fn static_model_catalog() -> ModelsResponse { fn gpt_5_4_cmb_bedrock_model(priority: i32) -> ModelInfo { ModelInfo { slug: GPT_5_4_CMB_MODEL_ID.to_string(), + request_model: None, display_name: "gpt-5.4".to_string(), description: Some("Strong model for everyday coding.".to_string()), default_reasoning_level: Some(ReasoningEffort::Medium), @@ -74,6 +75,7 @@ fn gpt_5_4_cmb_bedrock_model(priority: i32) -> ModelInfo { fn bedrock_oss_model(slug: &str, display_name: &str, priority: i32) -> ModelInfo { ModelInfo { slug: slug.to_string(), + request_model: None, display_name: display_name.to_string(), description: Some(display_name.to_string()), default_reasoning_level: Some(ReasoningEffort::Medium), diff --git a/codex-rs/model-provider/src/amazon_bedrock/mod.rs b/codex-rs/model-provider/src/amazon_bedrock/mod.rs index b9987ed2a8..d642aaea31 100644 --- a/codex-rs/model-provider/src/amazon_bedrock/mod.rs +++ b/codex-rs/model-provider/src/amazon_bedrock/mod.rs @@ -2,6 +2,7 @@ mod auth; mod catalog; mod mantle; +use std::collections::HashMap; use std::path::PathBuf; use std::sync::Arc; @@ -11,6 +12,7 @@ use codex_login::AuthManager; use codex_login::CodexAuth; use codex_model_provider_info::ModelProviderAwsAuthInfo; use codex_model_provider_info::ModelProviderInfo; +use codex_models_manager::CustomModelConfig; use codex_models_manager::collaboration_mode_presets::CollaborationModesConfig; use codex_models_manager::manager::SharedModelsManager; use codex_models_manager::manager::StaticModelsManager; @@ -94,11 +96,13 @@ impl ModelProvider for AmazonBedrockModelProvider { &self, _codex_home: PathBuf, config_model_catalog: Option, + custom_models: HashMap, collaboration_modes_config: CollaborationModesConfig, ) -> SharedModelsManager { Arc::new(StaticModelsManager::new( /*auth_manager*/ None, config_model_catalog.unwrap_or_else(static_model_catalog), + custom_models, collaboration_modes_config, )) } diff --git a/codex-rs/model-provider/src/provider.rs b/codex-rs/model-provider/src/provider.rs index b6ce0da3cd..d631f0641a 100644 --- a/codex-rs/model-provider/src/provider.rs +++ b/codex-rs/model-provider/src/provider.rs @@ -1,3 +1,4 @@ +use std::collections::HashMap; use std::fmt; use std::path::PathBuf; use std::sync::Arc; @@ -7,6 +8,7 @@ use codex_api::SharedAuthProvider; use codex_login::AuthManager; use codex_login::CodexAuth; use codex_model_provider_info::ModelProviderInfo; +use codex_models_manager::CustomModelConfig; use codex_models_manager::collaboration_mode_presets::CollaborationModesConfig; use codex_models_manager::manager::OpenAiModelsManager; use codex_models_manager::manager::SharedModelsManager; @@ -118,6 +120,7 @@ pub trait ModelProvider: fmt::Debug + Send + Sync { &self, codex_home: PathBuf, config_model_catalog: Option, + custom_models: HashMap, collaboration_modes_config: CollaborationModesConfig, ) -> SharedModelsManager; } @@ -213,12 +216,14 @@ impl ModelProvider for ConfiguredModelProvider { &self, codex_home: PathBuf, config_model_catalog: Option, + custom_models: HashMap, collaboration_modes_config: CollaborationModesConfig, ) -> SharedModelsManager { match config_model_catalog { Some(model_catalog) => Arc::new(StaticModelsManager::new( self.auth_manager.clone(), model_catalog, + custom_models, collaboration_modes_config, )), None => { @@ -230,6 +235,7 @@ impl ModelProvider for ConfiguredModelProvider { codex_home, endpoint, self.auth_manager.clone(), + custom_models, collaboration_modes_config, )) } @@ -448,6 +454,7 @@ mod tests { let manager = provider.models_manager( test_codex_home(), /*config_model_catalog*/ None, + HashMap::new(), Default::default(), ); @@ -491,6 +498,7 @@ mod tests { Some(ModelsResponse { models: vec![custom_model], }), + HashMap::new(), Default::default(), ); @@ -531,6 +539,7 @@ mod tests { let manager = provider.models_manager( test_codex_home(), /*config_model_catalog*/ None, + HashMap::new(), Default::default(), ); let catalog = manager.raw_model_catalog(RefreshStrategy::Online).await; diff --git a/codex-rs/models-manager/src/config.rs b/codex-rs/models-manager/src/config.rs index b64add40fc..9e2f726260 100644 --- a/codex-rs/models-manager/src/config.rs +++ b/codex-rs/models-manager/src/config.rs @@ -1,4 +1,15 @@ use codex_protocol::openai_models::ModelsResponse; +use std::collections::HashMap; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct CustomModelConfig { + /// Provider-facing model slug used on API requests. + pub model: String, + /// Optional context window override applied when this alias is selected. + pub model_context_window: Option, + /// Optional auto-compaction token limit override applied when this alias is selected. + pub model_auto_compact_token_limit: Option, +} #[derive(Debug, Clone, Default)] pub struct ModelsManagerConfig { @@ -9,4 +20,11 @@ pub struct ModelsManagerConfig { pub personality_enabled: bool, pub model_supports_reasoning_summaries: Option, pub model_catalog: Option, + pub custom_models: HashMap, +} + +impl ModelsManagerConfig { + pub(crate) fn custom_model_alias(&self, alias: &str) -> Option<&CustomModelConfig> { + self.custom_models.get(alias) + } } diff --git a/codex-rs/models-manager/src/lib.rs b/codex-rs/models-manager/src/lib.rs index 8bf30d0b60..70301c57df 100644 --- a/codex-rs/models-manager/src/lib.rs +++ b/codex-rs/models-manager/src/lib.rs @@ -7,6 +7,7 @@ pub mod model_presets; pub mod test_support; pub use codex_app_server_protocol::AuthMode; +pub use config::CustomModelConfig; pub use config::ModelsManagerConfig; /// Load the bundled model catalog shipped with `codex-models-manager`. diff --git a/codex-rs/models-manager/src/manager.rs b/codex-rs/models-manager/src/manager.rs index f13f2df60d..495ba31e03 100644 --- a/codex-rs/models-manager/src/manager.rs +++ b/codex-rs/models-manager/src/manager.rs @@ -1,6 +1,7 @@ use super::cache::ModelsCacheManager; use crate::collaboration_mode_presets::CollaborationModesConfig; use crate::collaboration_mode_presets::builtin_collaboration_mode_presets; +use crate::config::CustomModelConfig; use crate::config::ModelsManagerConfig; use crate::model_info; use async_trait::async_trait; @@ -10,6 +11,8 @@ use codex_protocol::error::Result as CoreResult; use codex_protocol::openai_models::ModelInfo; use codex_protocol::openai_models::ModelPreset; use codex_protocol::openai_models::ModelsResponse; +use std::collections::HashMap; +use std::collections::HashSet; use std::fmt; use std::path::PathBuf; use std::sync::Arc; @@ -104,19 +107,46 @@ pub trait ModelsManager: fmt::Debug + Send + Sync { /// Return the auth manager used for picker filtering. fn auth_manager(&self) -> Option<&AuthManager>; + /// Return the startup snapshot of custom picker aliases. + fn custom_models(&self) -> &HashMap; + /// Build picker-ready presets from the active catalog snapshot. fn build_available_models(&self, mut remote_models: Vec) -> Vec { remote_models.sort_by(|a, b| a.priority.cmp(&b.priority)); - let mut presets: Vec = remote_models.into_iter().map(Into::into).collect(); + let mut presets: Vec = remote_models.iter().cloned().map(Into::into).collect(); + let mut existing_models: HashSet = + presets.iter().map(|preset| preset.model.clone()).collect(); + let mut custom_presets = Vec::new(); + + let mut custom_models = self.custom_models().iter().collect::>(); + custom_models.sort_by(|(left, _), (right, _)| left.cmp(right)); + for (alias, custom_model) in custom_models { + if existing_models.contains(alias) { + continue; + } + + let model_info = + construct_model_info_for_custom_alias(alias, custom_model, &remote_models); + let mut preset = ModelPreset::from(model_info); + preset.show_in_picker = true; + custom_presets.push(preset); + existing_models.insert(alias.to_string()); + } + let uses_codex_backend = self .auth_manager() .is_some_and(AuthManager::current_auth_uses_codex_backend); presets = ModelPreset::filter_by_auth(presets, uses_codex_backend); + custom_presets = ModelPreset::filter_by_auth(custom_presets, uses_codex_backend); ModelPreset::mark_default_by_picker_visibility(&mut presets); + if !presets.iter().any(|preset| preset.is_default) { + ModelPreset::mark_default_by_picker_visibility(&mut custom_presets); + } - presets + custom_presets.extend(presets); + custom_presets } /// List collaboration mode presets. @@ -161,7 +191,14 @@ pub trait ModelsManager: fmt::Debug + Send + Sync { async fn get_model_info(&self, model: &str, config: &ModelsManagerConfig) -> ModelInfo { async move { let remote_models = self.get_remote_models().await; - construct_model_info_from_candidates(model, &remote_models, config) + construct_model_info_from_candidates_with_custom( + model, + &remote_models, + config, + config + .custom_model_alias(model) + .or_else(|| self.custom_models().get(model)), + ) } .instrument(tracing::info_span!("get_model_info", model = model)) .await @@ -180,6 +217,7 @@ pub type SharedModelsManager = Arc; #[derive(Debug)] pub struct OpenAiModelsManager { remote_models: RwLock>, + custom_models: HashMap, collaboration_modes_config: CollaborationModesConfig, etag: RwLock>, cache_manager: ModelsCacheManager, @@ -191,6 +229,7 @@ pub struct OpenAiModelsManager { #[derive(Debug)] pub struct StaticModelsManager { remote_models: Vec, + custom_models: HashMap, collaboration_modes_config: CollaborationModesConfig, auth_manager: Option>, } @@ -201,6 +240,7 @@ impl OpenAiModelsManager { codex_home: PathBuf, endpoint_client: Arc, auth_manager: Option>, + custom_models: HashMap, collaboration_modes_config: CollaborationModesConfig, ) -> Self { let cache_path = codex_home.join(MODEL_CACHE_FILE); @@ -208,6 +248,7 @@ impl OpenAiModelsManager { let remote_models = load_remote_models_from_file().unwrap_or_default(); Self { remote_models: RwLock::new(remote_models), + custom_models, collaboration_modes_config, etag: RwLock::new(None), cache_manager, @@ -222,10 +263,12 @@ impl StaticModelsManager { pub fn new( auth_manager: Option>, model_catalog: ModelsResponse, + custom_models: HashMap, collaboration_modes_config: CollaborationModesConfig, ) -> Self { Self { remote_models: model_catalog.models, + custom_models, collaboration_modes_config, auth_manager, } @@ -255,6 +298,10 @@ impl ModelsManager for OpenAiModelsManager { self.auth_manager.as_deref() } + fn custom_models(&self) -> &HashMap { + &self.custom_models + } + fn list_collaboration_modes(&self) -> Vec { builtin_collaboration_mode_presets(self.collaboration_modes_config) } @@ -390,6 +437,10 @@ impl ModelsManager for StaticModelsManager { self.auth_manager.as_deref() } + fn custom_models(&self) -> &HashMap { + &self.custom_models + } + fn list_collaboration_modes(&self) -> Vec { builtin_collaboration_mode_presets(self.collaboration_modes_config) } @@ -451,6 +502,32 @@ pub(crate) fn construct_model_info_from_candidates( candidates: &[ModelInfo], config: &ModelsManagerConfig, ) -> ModelInfo { + construct_model_info_from_candidates_with_custom( + model, + candidates, + config, + config.custom_model_alias(model), + ) +} + +fn construct_model_info_from_candidates_with_custom( + model: &str, + candidates: &[ModelInfo], + config: &ModelsManagerConfig, + custom_model: Option<&CustomModelConfig>, +) -> ModelInfo { + if let Some(custom_model) = custom_model { + let mut config = config.clone(); + config.model_context_window = custom_model + .model_context_window + .or(config.model_context_window); + config.model_auto_compact_token_limit = custom_model + .model_auto_compact_token_limit + .or(config.model_auto_compact_token_limit); + let model_info = construct_model_info_for_custom_alias(model, custom_model, candidates); + return model_info::with_config_overrides(model_info, &config); + } + // First use the normal longest-prefix match. If that misses, allow a narrowly scoped // retry for namespaced slugs like `custom/gpt-5.3-codex`. let remote = find_model_by_longest_prefix(model, candidates) @@ -467,6 +544,36 @@ pub(crate) fn construct_model_info_from_candidates( model_info::with_config_overrides(model_info, config) } +fn construct_model_info_for_custom_alias( + alias: &str, + custom_model: &CustomModelConfig, + candidates: &[ModelInfo], +) -> ModelInfo { + let remote = find_model_by_longest_prefix(&custom_model.model, candidates) + .or_else(|| find_model_by_namespaced_suffix(&custom_model.model, candidates)); + if let Some(remote) = remote { + ModelInfo { + slug: alias.to_string(), + request_model: Some(custom_model.model.clone()), + display_name: alias.to_string(), + max_context_window: custom_model + .model_context_window + .or(remote.max_context_window), + used_fallback_model_metadata: false, + ..remote + } + } else { + let mut fallback_model = model_info::model_info_from_slug(&custom_model.model); + fallback_model.slug = alias.to_string(); + fallback_model.request_model = Some(custom_model.model.clone()); + fallback_model.display_name = alias.to_string(); + fallback_model.max_context_window = custom_model + .model_context_window + .or(fallback_model.max_context_window); + fallback_model + } +} + #[cfg(test)] #[path = "manager_tests.rs"] mod tests; diff --git a/codex-rs/models-manager/src/manager_tests.rs b/codex-rs/models-manager/src/manager_tests.rs index 9519f9713a..d27928d476 100644 --- a/codex-rs/models-manager/src/manager_tests.rs +++ b/codex-rs/models-manager/src/manager_tests.rs @@ -1,5 +1,6 @@ use super::*; use crate::ModelsManagerConfig; +use crate::config::CustomModelConfig; use chrono::Utc; use codex_app_server_protocol::AuthMode; use codex_login::AuthCredentialsStoreMode; @@ -12,6 +13,7 @@ use codex_login::TokenData; use codex_protocol::openai_models::ModelsResponse; use pretty_assertions::assert_eq; use serde_json::json; +use std::collections::HashMap; use std::collections::VecDeque; use std::path::Path; use std::sync::Arc; @@ -191,14 +193,23 @@ fn openai_manager_for_tests_with_auth( codex_home, endpoint_client, auth_manager, + HashMap::new(), CollaborationModesConfig::default(), ) } fn static_manager_for_tests(model_catalog: ModelsResponse) -> StaticModelsManager { + static_manager_with_custom_models_for_tests(model_catalog, HashMap::new()) +} + +fn static_manager_with_custom_models_for_tests( + model_catalog: ModelsResponse, + custom_models: HashMap, +) -> StaticModelsManager { StaticModelsManager::new( /*auth_manager*/ None, model_catalog, + custom_models, CollaborationModesConfig::default(), ) } @@ -735,6 +746,7 @@ async fn static_manager_reads_latest_auth_mode() { ModelsResponse { models: vec![chatgpt_only_model, api_model], }, + HashMap::new(), CollaborationModesConfig::default(), ); @@ -759,6 +771,167 @@ async fn static_manager_reads_latest_auth_mode() { ); } +#[tokio::test] +async fn get_model_info_uses_custom_alias_metadata_and_request_model() { + let mut config = ModelsManagerConfig::default(); + let alias = "gpt-5.4 1m".to_string(); + let custom_model = CustomModelConfig { + model: "gpt-5.4".to_string(), + model_context_window: Some(1_000_000), + model_auto_compact_token_limit: Some(800_000), + }; + config + .custom_models + .insert(alias.clone(), custom_model.clone()); + let manager = static_manager_with_custom_models_for_tests( + ModelsResponse { + models: vec![remote_model("gpt-5.4", "GPT 5.4", /*priority*/ 0)], + }, + HashMap::from([(alias.clone(), custom_model)]), + ); + + let model_info = manager.get_model_info(&alias, &config).await; + + assert_eq!(model_info.slug, alias); + assert_eq!(model_info.request_model.as_deref(), Some("gpt-5.4")); + assert_eq!(model_info.context_window, Some(1_000_000)); + assert_eq!(model_info.auto_compact_token_limit, Some(800_000)); +} + +#[tokio::test] +async fn get_model_info_prefers_custom_alias_context_over_global_config() { + let mut config = ModelsManagerConfig { + model_context_window: Some(250_000), + model_auto_compact_token_limit: Some(200_000), + ..Default::default() + }; + let alias = "gpt-5.4 1m".to_string(); + let custom_model = CustomModelConfig { + model: "gpt-5.4".to_string(), + model_context_window: Some(1_000_000), + model_auto_compact_token_limit: Some(800_000), + }; + config + .custom_models + .insert(alias.clone(), custom_model.clone()); + let manager = static_manager_with_custom_models_for_tests( + ModelsResponse { + models: vec![remote_model("gpt-5.4", "GPT 5.4", /*priority*/ 0)], + }, + HashMap::from([(alias.clone(), custom_model)]), + ); + + let model_info = manager.get_model_info(&alias, &config).await; + + assert_eq!(model_info.context_window, Some(1_000_000)); + assert_eq!(model_info.auto_compact_token_limit, Some(800_000)); +} + +#[tokio::test] +async fn get_model_info_prefers_active_config_alias_over_startup_snapshot() { + let alias = "gpt-5.4 1m".to_string(); + let mut config = ModelsManagerConfig::default(); + config.custom_models.insert( + alias.clone(), + CustomModelConfig { + model: "gpt-5.4-updated".to_string(), + model_context_window: Some(1_000_000), + model_auto_compact_token_limit: Some(900_000), + }, + ); + let manager = static_manager_with_custom_models_for_tests( + ModelsResponse { + models: vec![ + remote_model("gpt-5.4", "GPT 5.4", /*priority*/ 0), + remote_model("gpt-5.4-updated", "GPT 5.4 Updated", /*priority*/ 1), + ], + }, + HashMap::from([( + alias.clone(), + CustomModelConfig { + model: "gpt-5.4".to_string(), + model_context_window: Some(500_000), + model_auto_compact_token_limit: Some(400_000), + }, + )]), + ); + + let model_info = manager.get_model_info(&alias, &config).await; + + assert_eq!(model_info.slug, alias); + assert_eq!(model_info.request_model.as_deref(), Some("gpt-5.4-updated")); + assert_eq!(model_info.context_window, Some(1_000_000)); + assert_eq!(model_info.auto_compact_token_limit, Some(900_000)); +} + +#[test] +fn build_available_models_includes_custom_aliases() { + let manager = static_manager_with_custom_models_for_tests( + ModelsResponse { + models: vec![remote_model("gpt-5.4", "GPT 5.4", /*priority*/ 0)], + }, + HashMap::from([( + "gpt-5.4 1m".to_string(), + CustomModelConfig { + model: "gpt-5.4".to_string(), + model_context_window: Some(1_000_000), + model_auto_compact_token_limit: Some(800_000), + }, + )]), + ); + + let available = manager.build_available_models(vec![remote_model( + "gpt-5.4", "GPT 5.4", /*priority*/ 0, + )]); + let alias = available + .iter() + .find(|preset| preset.model == "gpt-5.4 1m") + .expect("custom alias should be listed"); + + assert!(alias.show_in_picker); + assert_eq!(alias.display_name, "gpt-5.4 1m"); +} + +#[test] +fn build_available_models_lists_custom_aliases_before_remote_models() { + let manager = static_manager_with_custom_models_for_tests( + ModelsResponse { + models: vec![ + remote_model("gpt-5.4", "GPT 5.4", /*priority*/ 0), + remote_model("gpt-5.3", "GPT 5.3", /*priority*/ 1), + ], + }, + HashMap::from([( + "gpt-5.4 1m".to_string(), + CustomModelConfig { + model: "gpt-5.4".to_string(), + model_context_window: Some(1_000_000), + model_auto_compact_token_limit: Some(800_000), + }, + )]), + ); + + let available = manager.build_available_models(vec![ + remote_model("gpt-5.4", "GPT 5.4", /*priority*/ 0), + remote_model("gpt-5.3", "GPT 5.3", /*priority*/ 1), + ]); + + assert_eq!( + available + .iter() + .map(|preset| preset.model.as_str()) + .collect::>(), + vec!["gpt-5.4 1m", "gpt-5.4", "gpt-5.3"] + ); + assert_eq!( + available + .iter() + .find(|preset| preset.is_default) + .map(|preset| preset.model.as_str()), + Some("gpt-5.4") + ); +} + #[test] fn bundled_models_json_roundtrips() { let response = crate::bundled_models_response() diff --git a/codex-rs/models-manager/src/model_info.rs b/codex-rs/models-manager/src/model_info.rs index 8e8abae549..7797cd18fb 100644 --- a/codex-rs/models-manager/src/model_info.rs +++ b/codex-rs/models-manager/src/model_info.rs @@ -67,6 +67,7 @@ pub fn model_info_from_slug(slug: &str) -> ModelInfo { warn!("Unknown model {slug} is used. This will use fallback model metadata."); ModelInfo { slug: slug.to_string(), + request_model: None, display_name: slug.to_string(), description: None, default_reasoning_level: None, diff --git a/codex-rs/models-manager/src/model_info_overrides_tests.rs b/codex-rs/models-manager/src/model_info_overrides_tests.rs index c499938ed4..e1565b6967 100644 --- a/codex-rs/models-manager/src/model_info_overrides_tests.rs +++ b/codex-rs/models-manager/src/model_info_overrides_tests.rs @@ -1,6 +1,11 @@ use crate::ModelsManagerConfig; +use crate::collaboration_mode_presets::CollaborationModesConfig; +use crate::config::CustomModelConfig; use crate::manager::ModelsManager; +use crate::manager::StaticModelsManager; use codex_protocol::openai_models::TruncationPolicyConfig; +use codex_protocol::openai_models::WebSearchToolType; +use codex_protocol::openai_models::default_input_modalities; use pretty_assertions::assert_eq; use tempfile::TempDir; @@ -43,3 +48,64 @@ async fn offline_model_info_with_tool_output_override() { TruncationPolicyConfig::tokens(/*limit*/ 123) ); } + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn custom_model_alias_applies_request_model_and_context_overrides() { + let mut config = ModelsManagerConfig::default(); + config.custom_models.insert( + "gpt-5.4 1m".to_string(), + CustomModelConfig { + model: "gpt-5.4".to_string(), + model_context_window: Some(1_000_000), + model_auto_compact_token_limit: Some(900_000), + }, + ); + let manager = StaticModelsManager::new( + /*auth_manager*/ None, + codex_protocol::openai_models::ModelsResponse { + models: vec![codex_protocol::openai_models::ModelInfo { + slug: "gpt-5.4".to_string(), + request_model: None, + display_name: "GPT-5.4".to_string(), + description: Some("desc".to_string()), + default_reasoning_level: None, + supported_reasoning_levels: Vec::new(), + shell_type: codex_protocol::openai_models::ConfigShellToolType::ShellCommand, + visibility: codex_protocol::openai_models::ModelVisibility::List, + supported_in_api: true, + priority: 1, + additional_speed_tiers: Vec::new(), + availability_nux: None, + upgrade: None, + base_instructions: "base".to_string(), + model_messages: None, + supports_reasoning_summaries: false, + default_reasoning_summary: codex_protocol::config_types::ReasoningSummary::Auto, + support_verbosity: false, + default_verbosity: None, + supports_search_tool: false, + apply_patch_tool_type: None, + truncation_policy: TruncationPolicyConfig::bytes(/*limit*/ 10_000), + supports_parallel_tool_calls: false, + supports_image_detail_original: false, + context_window: Some(272_000), + max_context_window: None, + auto_compact_token_limit: None, + effective_context_window_percent: 95, + experimental_supported_tools: Vec::new(), + input_modalities: default_input_modalities(), + web_search_tool_type: WebSearchToolType::Text, + used_fallback_model_metadata: false, + }], + }, + config.custom_models.clone(), + CollaborationModesConfig::default(), + ); + + let model_info = manager.get_model_info("gpt-5.4 1m", &config).await; + + assert_eq!(model_info.slug, "gpt-5.4 1m"); + assert_eq!(model_info.request_model.as_deref(), Some("gpt-5.4")); + assert_eq!(model_info.context_window, Some(1_000_000)); + assert_eq!(model_info.auto_compact_token_limit, Some(900_000)); +} diff --git a/codex-rs/protocol/src/models.rs b/codex-rs/protocol/src/models.rs index 2d7dba7d54..699c6da627 100644 --- a/codex-rs/protocol/src/models.rs +++ b/codex-rs/protocol/src/models.rs @@ -613,6 +613,11 @@ pub enum ResponseInputItem { #[ts(optional)] phase: Option, }, + FunctionCall { + name: String, + arguments: String, + call_id: String, + }, FunctionCallOutput { call_id: String, #[ts(as = "FunctionCallOutputBody")] @@ -1047,6 +1052,17 @@ impl From for ResponseItem { id: None, phase, }, + ResponseInputItem::FunctionCall { + name, + arguments, + call_id, + } => Self::FunctionCall { + id: None, + name, + namespace: None, + arguments, + call_id, + }, ResponseInputItem::FunctionCallOutput { call_id, output } => { Self::FunctionCallOutput { call_id, output } } diff --git a/codex-rs/protocol/src/openai_models.rs b/codex-rs/protocol/src/openai_models.rs index 41275e6a6b..503fc22850 100644 --- a/codex-rs/protocol/src/openai_models.rs +++ b/codex-rs/protocol/src/openai_models.rs @@ -247,6 +247,11 @@ const fn default_effective_context_window_percent() -> i64 { #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, TS, JsonSchema)] pub struct ModelInfo { pub slug: String, + /// Provider-facing model slug to send on API requests. + /// + /// When unset, `slug` is used. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub request_model: Option, pub display_name: String, pub description: Option, #[serde(default, skip_serializing_if = "Option::is_none")] @@ -307,6 +312,10 @@ impl ModelInfo { self.context_window.or(self.max_context_window) } + pub fn request_model_slug(&self) -> &str { + self.request_model.as_deref().unwrap_or(self.slug.as_str()) + } + pub fn auto_compact_token_limit(&self) -> Option { let context_limit = self .resolved_context_window() @@ -539,6 +548,7 @@ mod tests { fn test_model(spec: Option) -> ModelInfo { ModelInfo { slug: "test-model".to_string(), + request_model: None, display_name: "Test Model".to_string(), description: None, default_reasoning_level: None, diff --git a/codex-rs/protocol/src/protocol.rs b/codex-rs/protocol/src/protocol.rs index f8c830184c..f4100097e5 100644 --- a/codex-rs/protocol/src/protocol.rs +++ b/codex-rs/protocol/src/protocol.rs @@ -103,6 +103,27 @@ pub const COLLABORATION_MODE_CLOSE_TAG: &str = ""; pub const REALTIME_CONVERSATION_OPEN_TAG: &str = ""; pub const REALTIME_CONVERSATION_CLOSE_TAG: &str = ""; pub const USER_MESSAGE_BEGIN: &str = "## My request for Codex:"; +pub const AGENT_INBOX_KIND: &str = "agent_inbox"; +pub const AGENT_INBOX_MESSAGE_PREFIX: &str = "[agent_inbox:"; + +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, JsonSchema)] +pub struct AgentInboxPayload { + pub injected: bool, + pub kind: String, + pub sender_thread_id: ThreadId, + pub message: String, +} + +impl AgentInboxPayload { + pub fn new(sender_thread_id: ThreadId, message: String) -> Self { + Self { + injected: true, + kind: AGENT_INBOX_KIND.to_string(), + sender_thread_id, + message, + } + } +} #[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema)] pub struct TurnEnvironmentSelection { @@ -406,6 +427,9 @@ pub enum Op { /// This server sends [`EventMsg::TurnAborted`] in response. Interrupt, + /// Mark owner-side input activity without starting or steering a turn. + NoteOwnerActivity, + /// Terminate all running background terminal processes for this thread. /// Use this when callers intentionally want to stop long-lived background shells. CleanBackgroundTerminals, @@ -516,6 +540,9 @@ pub enum Op { personality: Option, }, + /// Inject non-user response items into an existing turn, or start a turn if needed. + InjectResponseItems { items: Vec }, + /// Similar to [`Op::UserInput`], but contains additional context required /// for a turn of a [`crate::codex_thread::CodexThread`]. UserTurn { @@ -876,6 +903,7 @@ impl Op { pub fn kind(&self) -> &'static str { match self { Self::Interrupt => "interrupt", + Self::NoteOwnerActivity => "note_owner_activity", Self::CleanBackgroundTerminals => "clean_background_terminals", Self::RealtimeConversationStart(_) => "realtime_conversation_start", Self::RealtimeConversationAudio(_) => "realtime_conversation_audio", @@ -893,6 +921,7 @@ impl Op { Self::UserInputAnswer { .. } => "user_input_answer", Self::RequestPermissionsResponse { .. } => "request_permissions_response", Self::DynamicToolResponse { .. } => "dynamic_tool_response", + Self::InjectResponseItems { .. } => "inject_response_items", Self::AddToHistory { .. } => "add_to_history", Self::GetHistoryEntryRequest { .. } => "get_history_entry_request", Self::ListMcpTools => "list_mcp_tools", @@ -2454,12 +2483,20 @@ impl InitialHistory { InitialHistory::Resumed(resumed) => { resumed.history.iter().find_map(|item| match item { RolloutItem::SessionMeta(meta_line) => meta_line.meta.forked_from_id, - _ => None, + RolloutItem::ForkReference(_) + | RolloutItem::ResponseItem(_) + | RolloutItem::Compacted(_) + | RolloutItem::TurnContext(_) + | RolloutItem::EventMsg(_) => None, }) } InitialHistory::Forked(items) => items.iter().find_map(|item| match item { RolloutItem::SessionMeta(meta_line) => Some(meta_line.meta.id), - _ => None, + RolloutItem::ForkReference(_) + | RolloutItem::ResponseItem(_) + | RolloutItem::Compacted(_) + | RolloutItem::TurnContext(_) + | RolloutItem::EventMsg(_) => None, }), } } @@ -2489,7 +2526,11 @@ impl InitialHistory { .iter() .filter_map(|ri| match ri { RolloutItem::EventMsg(ev) => Some(ev.clone()), - _ => None, + RolloutItem::SessionMeta(_) + | RolloutItem::ForkReference(_) + | RolloutItem::ResponseItem(_) + | RolloutItem::Compacted(_) + | RolloutItem::TurnContext(_) => None, }) .collect(), ), @@ -2498,7 +2539,11 @@ impl InitialHistory { .iter() .filter_map(|ri| match ri { RolloutItem::EventMsg(ev) => Some(ev.clone()), - _ => None, + RolloutItem::SessionMeta(_) + | RolloutItem::ForkReference(_) + | RolloutItem::ResponseItem(_) + | RolloutItem::Compacted(_) + | RolloutItem::TurnContext(_) => None, }) .collect(), ), @@ -2512,12 +2557,20 @@ impl InitialHistory { InitialHistory::Resumed(resumed) => { resumed.history.iter().find_map(|item| match item { RolloutItem::SessionMeta(meta_line) => meta_line.meta.base_instructions.clone(), - _ => None, + RolloutItem::ForkReference(_) + | RolloutItem::ResponseItem(_) + | RolloutItem::Compacted(_) + | RolloutItem::TurnContext(_) + | RolloutItem::EventMsg(_) => None, }) } InitialHistory::Forked(items) => items.iter().find_map(|item| match item { RolloutItem::SessionMeta(meta_line) => meta_line.meta.base_instructions.clone(), - _ => None, + RolloutItem::ForkReference(_) + | RolloutItem::ResponseItem(_) + | RolloutItem::Compacted(_) + | RolloutItem::TurnContext(_) + | RolloutItem::EventMsg(_) => None, }), } } @@ -2528,12 +2581,20 @@ impl InitialHistory { InitialHistory::Resumed(resumed) => { resumed.history.iter().find_map(|item| match item { RolloutItem::SessionMeta(meta_line) => meta_line.meta.dynamic_tools.clone(), - _ => None, + RolloutItem::ForkReference(_) + | RolloutItem::ResponseItem(_) + | RolloutItem::Compacted(_) + | RolloutItem::TurnContext(_) + | RolloutItem::EventMsg(_) => None, }) } InitialHistory::Forked(items) => items.iter().find_map(|item| match item { RolloutItem::SessionMeta(meta_line) => meta_line.meta.dynamic_tools.clone(), - _ => None, + RolloutItem::ForkReference(_) + | RolloutItem::ResponseItem(_) + | RolloutItem::Compacted(_) + | RolloutItem::TurnContext(_) + | RolloutItem::EventMsg(_) => None, }), } } @@ -2542,7 +2603,11 @@ impl InitialHistory { fn session_cwd_from_items(items: &[RolloutItem]) -> Option { items.iter().find_map(|item| match item { RolloutItem::SessionMeta(meta_line) => Some(meta_line.meta.cwd.clone()), - _ => None, + RolloutItem::ForkReference(_) + | RolloutItem::ResponseItem(_) + | RolloutItem::Compacted(_) + | RolloutItem::TurnContext(_) + | RolloutItem::EventMsg(_) => None, }) } @@ -2781,10 +2846,45 @@ pub struct SessionMetaLine { pub git: Option, } +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, JsonSchema, TS)] +pub struct ForkReferenceItem { + pub rollout_path: PathBuf, + #[serde( + deserialize_with = "deserialize_fork_reference_nth_user_message", + default + )] + pub nth_user_message: i64, +} + +fn deserialize_fork_reference_nth_user_message<'de, D>(deserializer: D) -> Result +where + D: serde::Deserializer<'de>, +{ + let value = Value::deserialize(deserializer)?; + let Value::Number(number) = value else { + return Err(serde::de::Error::custom( + "expected integer fork reference boundary", + )); + }; + + if let Some(nth_user_message) = number.as_i64() { + return Ok(nth_user_message); + } + + if number.as_u64().is_some() { + return Ok(i64::MAX); + } + + Err(serde::de::Error::custom( + "expected integer fork reference boundary", + )) +} + #[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, TS)] #[serde(tag = "type", content = "payload", rename_all = "snake_case")] pub enum RolloutItem { SessionMeta(SessionMetaLine), + ForkReference(ForkReferenceItem), ResponseItem(ResponseItem), Compacted(CompactedItem), TurnContext(TurnContextItem), @@ -4099,6 +4199,23 @@ mod tests { ); } + #[test] + fn fork_reference_item_deserializes_legacy_usize_max_boundary() { + let item: ForkReferenceItem = serde_json::from_value(json!({ + "rollout_path": "/tmp/rollout.jsonl", + "nth_user_message": u64::MAX, + })) + .expect("legacy fork reference item should deserialize"); + + assert_eq!( + item, + ForkReferenceItem { + rollout_path: PathBuf::from("/tmp/rollout.jsonl"), + nth_user_message: i64::MAX, + } + ); + } + #[test] fn session_source_restriction_product_does_not_guess_subagent_products() { assert_eq!( diff --git a/codex-rs/rollout/src/lib.rs b/codex-rs/rollout/src/lib.rs index 4046beb635..f8caeeefff 100644 --- a/codex-rs/rollout/src/lib.rs +++ b/codex-rs/rollout/src/lib.rs @@ -41,6 +41,7 @@ pub use list::ThreadListLayout; pub use list::ThreadSortKey; pub use list::ThreadsPage; pub use list::find_archived_thread_path_by_id_str; +pub use list::find_or_unarchive_thread_path_by_id_str; pub use list::find_thread_path_by_id_str; #[deprecated(note = "use find_thread_path_by_id_str")] pub use list::find_thread_path_by_id_str as find_conversation_path_by_id_str; @@ -50,6 +51,7 @@ pub use list::parse_cursor; pub use list::read_head_for_summary; pub use list::read_session_meta_line; pub use list::read_thread_item_from_rollout; +pub use list::resolve_fork_reference_rollout_path; pub use list::rollout_date_parts; pub use metadata::builder_from_items; pub use policy::EventPersistenceMode; diff --git a/codex-rs/rollout/src/list.rs b/codex-rs/rollout/src/list.rs index bdb7198835..b19e7b9645 100644 --- a/codex-rs/rollout/src/list.rs +++ b/codex-rs/rollout/src/list.rs @@ -1130,6 +1130,9 @@ async fn read_head_summary(path: &Path, head_limit: usize) -> io::Result { // Not included in `head`; skip. } + RolloutItem::ForkReference(_) => { + // Not included in `head`; skip. + } RolloutItem::EventMsg(ev) => { if let EventMsg::UserMessage(user) = ev { summary.saw_user_event = true; @@ -1181,6 +1184,7 @@ pub async fn read_head_for_summary(path: &Path) -> io::Result {} RolloutItem::Compacted(_) | RolloutItem::TurnContext(_) | RolloutItem::EventMsg(_) => {} @@ -1312,9 +1316,97 @@ async fn find_thread_path_by_id_str_in_subdir( Ok(found) } -/// Locate a recorded thread rollout file by its UUID string using the existing -/// paginated listing implementation. Returns `Ok(Some(path))` if found, `Ok(None)` if not present -/// or the id is invalid. +async fn try_unarchive_thread_path_by_id_str( + codex_home: &Path, + id_str: &str, +) -> io::Result> { + let Ok(requested_id) = Uuid::parse_str(id_str) else { + return Ok(None); + }; + let Some(archived_path) = + find_thread_path_by_id_str_in_subdir(codex_home, ARCHIVED_SESSIONS_SUBDIR, id_str).await? + else { + return Ok(None); + }; + let archived_root = codex_home.join(ARCHIVED_SESSIONS_SUBDIR); + if archived_path.strip_prefix(&archived_root).is_err() { + tracing::error!( + "archived rollout candidate for thread {id_str} is not under {}: {}", + archived_root.display(), + archived_path.display() + ); + return Ok(None); + } + + let Some(file_name) = archived_path.file_name().map(OsStr::to_owned) else { + tracing::error!( + "archived rollout path for thread {id_str} missing file name: {}", + archived_path.display() + ); + return Ok(None); + }; + let file_name_str = file_name.to_string_lossy(); + let Some((_created_at, file_id)) = parse_timestamp_uuid_from_filename(&file_name_str) else { + tracing::error!( + "archived rollout path for thread {id_str} has invalid rollout filename: {}", + archived_path.display() + ); + return Ok(None); + }; + if file_id != requested_id { + tracing::error!( + "archived rollout path for thread {id_str} has mismatched rollout filename: {}", + archived_path.display() + ); + return Ok(None); + } + let Some((year, month, day)) = rollout_date_parts(&file_name) else { + tracing::error!( + "archived rollout path for thread {id_str} missing filename timestamp: {}", + archived_path.display() + ); + return Ok(None); + }; + + let restored_dir = codex_home + .join(SESSIONS_SUBDIR) + .join(year) + .join(month) + .join(day); + tokio::fs::create_dir_all(&restored_dir).await?; + let restored_path = restored_dir.join(&file_name); + match tokio::fs::rename(&archived_path, &restored_path).await { + Ok(()) => {} + Err(err) => { + if tokio::fs::try_exists(&restored_path).await.unwrap_or(false) { + tracing::debug!( + "archived rollout for thread {id_str} already restored concurrently to {}", + restored_path.display() + ); + } else { + return Err(err); + } + } + } + + if let Some(state_db_ctx) = state_db::open_if_present(codex_home, "").await + && let Ok(thread_id) = ThreadId::from_string(id_str) + { + let _ = state_db_ctx + .mark_unarchived(thread_id, restored_path.as_path()) + .await; + } + + Ok(Some(restored_path)) +} + +/// Locate a recorded active thread rollout file by its UUID string using the existing paginated +/// listing implementation. +/// +/// This helper intentionally has no side effects. Callers that want "resume and restore from +/// archive if needed" semantics must opt in with [`find_or_unarchive_thread_path_by_id_str`] so +/// that other lookup paths (for example fork-reference resolution) do not silently move archived +/// rollouts back into `sessions/`. pub async fn find_thread_path_by_id_str( codex_home: &Path, id_str: &str, @@ -1322,6 +1414,18 @@ pub async fn find_thread_path_by_id_str( find_thread_path_by_id_str_in_subdir(codex_home, SESSIONS_SUBDIR, id_str).await } +/// Locate a thread rollout file by UUID string, restoring it from `archived_sessions/` when +/// needed for resume flows. +pub async fn find_or_unarchive_thread_path_by_id_str( + codex_home: &Path, + id_str: &str, +) -> io::Result> { + if let Some(active_path) = find_thread_path_by_id_str(codex_home, id_str).await? { + return Ok(Some(active_path)); + } + try_unarchive_thread_path_by_id_str(codex_home, id_str).await +} + /// Locate an archived thread rollout file by its UUID string. pub async fn find_archived_thread_path_by_id_str( codex_home: &Path, @@ -1330,6 +1434,40 @@ pub async fn find_archived_thread_path_by_id_str( find_thread_path_by_id_str_in_subdir(codex_home, ARCHIVED_SESSIONS_SUBDIR, id_str).await } +/// Resolve a stored fork-reference rollout path to the current on-disk location. +/// +/// Fork references persist a parent rollout filename. Archive and unarchive move that file +/// between `sessions/` and `archived_sessions/`, so stale stored paths must be repaired by +/// locating the rollout with the stable thread id embedded in the filename. +pub async fn resolve_fork_reference_rollout_path( + codex_home: &Path, + rollout_path: &Path, +) -> io::Result { + match tokio::fs::try_exists(rollout_path).await { + Ok(true) => return Ok(rollout_path.to_path_buf()), + Ok(false) => {} + Err(err) => return Err(err), + } + + let Some(file_name) = rollout_path.file_name().and_then(OsStr::to_str) else { + return Ok(rollout_path.to_path_buf()); + }; + let Some((_, thread_uuid)) = parse_timestamp_uuid_from_filename(file_name) else { + return Ok(rollout_path.to_path_buf()); + }; + let thread_id = thread_uuid.to_string(); + + if let Some(active_path) = find_thread_path_by_id_str(codex_home, &thread_id).await? { + return Ok(active_path); + } + if let Some(archived_path) = find_archived_thread_path_by_id_str(codex_home, &thread_id).await? + { + return Ok(archived_path); + } + + Ok(rollout_path.to_path_buf()) +} + /// Extract the `YYYY/MM/DD` directory components from a rollout filename. pub fn rollout_date_parts(file_name: &OsStr) -> Option<(String, String, String)> { let name = file_name.to_string_lossy(); diff --git a/codex-rs/rollout/src/metadata.rs b/codex-rs/rollout/src/metadata.rs index 58d55a887d..5d0aa5d983 100644 --- a/codex-rs/rollout/src/metadata.rs +++ b/codex-rs/rollout/src/metadata.rs @@ -70,7 +70,8 @@ pub fn builder_from_items( ) -> Option { if let Some(session_meta) = items.iter().find_map(|item| match item { RolloutItem::SessionMeta(meta_line) => Some(meta_line), - RolloutItem::ResponseItem(_) + RolloutItem::ForkReference(_) + | RolloutItem::ResponseItem(_) | RolloutItem::Compacted(_) | RolloutItem::TurnContext(_) | RolloutItem::EventMsg(_) => None, @@ -126,6 +127,7 @@ pub async fn extract_metadata_from_rollout( RolloutItem::SessionMeta(meta_line) => meta_line.meta.memory_mode.clone(), RolloutItem::ResponseItem(_) | RolloutItem::Compacted(_) + | RolloutItem::ForkReference(_) | RolloutItem::TurnContext(_) | RolloutItem::EventMsg(_) => None, }), diff --git a/codex-rs/rollout/src/policy.rs b/codex-rs/rollout/src/policy.rs index 22615623f3..45311a5dc6 100644 --- a/codex-rs/rollout/src/policy.rs +++ b/codex-rs/rollout/src/policy.rs @@ -16,9 +16,10 @@ pub fn is_persisted_response_item(item: &RolloutItem, mode: EventPersistenceMode RolloutItem::ResponseItem(item) => should_persist_response_item(item), RolloutItem::EventMsg(ev) => should_persist_event_msg(ev, mode), // Persist Codex executive markers so we can analyze flows (e.g., compaction, API turns). - RolloutItem::Compacted(_) | RolloutItem::TurnContext(_) | RolloutItem::SessionMeta(_) => { - true - } + RolloutItem::Compacted(_) + | RolloutItem::TurnContext(_) + | RolloutItem::SessionMeta(_) + | RolloutItem::ForkReference(_) => true, } } diff --git a/codex-rs/rollout/src/recorder.rs b/codex-rs/rollout/src/recorder.rs index dc2f08b7ab..e4970be3c2 100644 --- a/codex-rs/rollout/src/recorder.rs +++ b/codex-rs/rollout/src/recorder.rs @@ -894,6 +894,9 @@ impl RolloutRecorder { RolloutItem::ResponseItem(item) => { items.push(RolloutItem::ResponseItem(item)); } + RolloutItem::ForkReference(item) => { + items.push(RolloutItem::ForkReference(item)); + } RolloutItem::Compacted(item) => { items.push(RolloutItem::Compacted(item)); } @@ -920,6 +923,10 @@ impl RolloutRecorder { Ok((items, thread_id, parse_errors)) } + /// Load a rollout for resuming the same thread. + /// + /// This preserves the rollout's existing conversation id and rollout path, so callers + /// must not use it for true forking semantics. pub async fn get_rollout_history(path: &Path) -> std::io::Result { let (items, thread_id, _parse_errors) = Self::load_rollout_items(path).await?; let conversation_id = thread_id @@ -937,6 +944,22 @@ impl RolloutRecorder { })) } + /// Load a rollout for forking into a distinct thread. + /// + /// Unlike `get_rollout_history`, this intentionally discards the source rollout's + /// conversation id so `Codex::spawn` allocates a fresh thread id and rollout path for + /// the child. + pub async fn get_fork_history(path: &Path) -> std::io::Result { + let (items, _thread_id, _parse_errors) = Self::load_rollout_items(path).await?; + + if items.is_empty() { + return Ok(InitialHistory::New); + } + + info!("Loaded rollout fork history from {path:?}"); + Ok(InitialHistory::Forked(items)) + } + /// Drain pending items before stopping the writer task. /// /// If draining fails, the writer stays alive so callers can continue retrying flush/shutdown. @@ -1884,6 +1907,7 @@ async fn resume_candidate_matches_cwd( && let Some(latest_turn_context_cwd) = items.iter().rev().find_map(|item| match item { RolloutItem::TurnContext(turn_context) => Some(turn_context.cwd.as_path()), RolloutItem::SessionMeta(_) + | RolloutItem::ForkReference(_) | RolloutItem::ResponseItem(_) | RolloutItem::Compacted(_) | RolloutItem::EventMsg(_) => None, diff --git a/codex-rs/rollout/src/recorder_tests.rs b/codex-rs/rollout/src/recorder_tests.rs index 0138db72df..ff2ff9120f 100644 --- a/codex-rs/rollout/src/recorder_tests.rs +++ b/codex-rs/rollout/src/recorder_tests.rs @@ -8,6 +8,8 @@ use codex_protocol::models::ResponseItem; use codex_protocol::protocol::AgentMessageEvent; use codex_protocol::protocol::AskForApproval; use codex_protocol::protocol::EventMsg; +use codex_protocol::protocol::ForkReferenceItem; +use codex_protocol::protocol::InitialHistory; use codex_protocol::protocol::RolloutItem; use codex_protocol::protocol::RolloutLine; use codex_protocol::protocol::SandboxPolicy; @@ -36,7 +38,8 @@ fn test_config(codex_home: &Path) -> RolloutConfig { fn write_session_file(root: &Path, ts: &str, uuid: Uuid) -> std::io::Result { let day_dir = root.join("sessions/2025/01/03"); fs::create_dir_all(&day_dir)?; - let path = day_dir.join(format!("rollout-{ts}-{uuid}.jsonl")); + let file_ts = ts.replace(':', "-"); + let path = day_dir.join(format!("rollout-{file_ts}-{uuid}.jsonl")); let mut file = File::create(&path)?; let meta = serde_json::json!({ "timestamp": ts, @@ -220,6 +223,47 @@ async fn load_rollout_items_filters_legacy_ghost_snapshots_from_compaction_histo Ok(()) } +#[tokio::test] +async fn get_rollout_history_preserves_legacy_fork_reference_boundaries() -> std::io::Result<()> { + let home = TempDir::new().expect("temp dir"); + let parent_id = Uuid::new_v4(); + let child_id = Uuid::new_v4(); + let ts = "2025-01-03T12:00:00.000Z"; + let parent_rollout_path = write_session_file(home.path(), ts, parent_id)?; + let child_rollout_path = write_session_file(home.path(), ts, child_id)?; + + let mut file = fs::OpenOptions::new() + .append(true) + .open(&child_rollout_path)?; + let fork_reference_line = serde_json::json!({ + "timestamp": ts, + "type": "fork_reference", + "payload": { + "rollout_path": parent_rollout_path, + "nth_user_message": u64::MAX, + }, + }); + writeln!(file, "{fork_reference_line}")?; + + let history = RolloutRecorder::get_rollout_history(&child_rollout_path).await?; + let InitialHistory::Resumed(resumed) = history else { + panic!("expected resumed history"); + }; + + let loaded_fork_reference = resumed.history.last().and_then(|item| match item { + RolloutItem::ForkReference(fork_reference) => Some(fork_reference), + _ => None, + }); + assert_eq!( + loaded_fork_reference, + Some(&ForkReferenceItem { + rollout_path: parent_rollout_path, + nth_user_message: i64::MAX, + }), + ); + Ok(()) +} + #[tokio::test] async fn recorder_materializes_on_flush_with_pending_items() -> std::io::Result<()> { let home = TempDir::new().expect("temp dir"); diff --git a/codex-rs/state/src/extract.rs b/codex-rs/state/src/extract.rs index a4a0ab0f6a..f537d4bd26 100644 --- a/codex-rs/state/src/extract.rs +++ b/codex-rs/state/src/extract.rs @@ -22,6 +22,7 @@ pub fn apply_rollout_item( RolloutItem::TurnContext(turn_ctx) => apply_turn_context(metadata, turn_ctx), RolloutItem::EventMsg(event) => apply_event_msg(metadata, event), RolloutItem::ResponseItem(item) => apply_response_item(metadata, item), + RolloutItem::ForkReference(_) => {} RolloutItem::Compacted(_) => {} } if metadata.model_provider.is_empty() { @@ -36,9 +37,10 @@ pub fn rollout_item_affects_thread_metadata(item: &RolloutItem) -> bool { RolloutItem::EventMsg( EventMsg::TokenCount(_) | EventMsg::UserMessage(_) | EventMsg::ThreadNameUpdated(_), ) => true, - RolloutItem::EventMsg(_) | RolloutItem::ResponseItem(_) | RolloutItem::Compacted(_) => { - false - } + RolloutItem::EventMsg(_) + | RolloutItem::ResponseItem(_) + | RolloutItem::ForkReference(_) + | RolloutItem::Compacted(_) => false, } } diff --git a/codex-rs/state/src/runtime/threads.rs b/codex-rs/state/src/runtime/threads.rs index 8a3bcee5b3..12028c5f6b 100644 --- a/codex-rs/state/src/runtime/threads.rs +++ b/codex-rs/state/src/runtime/threads.rs @@ -972,6 +972,7 @@ pub(super) fn extract_dynamic_tools(items: &[RolloutItem]) -> Option Some(meta_line.meta.dynamic_tools.clone()), RolloutItem::ResponseItem(_) | RolloutItem::Compacted(_) + | RolloutItem::ForkReference(_) | RolloutItem::TurnContext(_) | RolloutItem::EventMsg(_) => None, }) @@ -982,6 +983,7 @@ pub(super) fn extract_memory_mode(items: &[RolloutItem]) -> Option { RolloutItem::SessionMeta(meta_line) => meta_line.meta.memory_mode.clone(), RolloutItem::ResponseItem(_) | RolloutItem::Compacted(_) + | RolloutItem::ForkReference(_) | RolloutItem::TurnContext(_) | RolloutItem::EventMsg(_) => None, }) diff --git a/codex-rs/tools/src/agent_tool.rs b/codex-rs/tools/src/agent_tool.rs index 7f83e6cada..e154f67d0e 100644 --- a/codex-rs/tools/src/agent_tool.rs +++ b/codex-rs/tools/src/agent_tool.rs @@ -152,6 +152,46 @@ pub fn create_send_message_tool() -> ToolSpec { }) } +pub fn create_list_agents_tool_v1(agent_watchdog: bool) -> ToolSpec { + let description = if agent_watchdog { + "List agents spawned by an agent, optionally recursively. This is a status view; polling it will not make a watchdog fire." + } else { + "List agents spawned by an agent, optionally recursively." + }; + let properties = BTreeMap::from([ + ( + "id".to_string(), + JsonSchema::string(Some( + "Identifier of the parent agent whose spawned agents to list. Defaults to the current agent." + .to_string(), + )), + ), + ( + "recursive".to_string(), + JsonSchema::boolean(Some( + "When true (default), include all descendants recursively. When false, include only direct children." + .to_string(), + )), + ), + ( + "all".to_string(), + JsonSchema::boolean(Some( + "When true, include completed/failed/canceled agents in addition to live agents." + .to_string(), + )), + ), + ]); + + ToolSpec::Function(ResponsesApiTool { + name: "list_agents".to_string(), + description: description.to_string(), + strict: false, + defer_loading: None, + parameters: JsonSchema::object(properties, /*required*/ None, Some(false.into())), + output_schema: None, + }) +} + pub fn create_followup_task_tool() -> ToolSpec { let properties = BTreeMap::from([ ( @@ -276,6 +316,47 @@ pub fn create_close_agent_tool_v2() -> ToolSpec { }) } +pub fn create_watchdog_self_close_tool() -> ToolSpec { + ToolSpec::Function(ResponsesApiTool { + name: "watchdog_self_close".to_string(), + description: + "Watchdog-only: close this watchdog check-in thread and terminate immediately." + .to_string(), + strict: false, + defer_loading: Some(true), + parameters: JsonSchema::object(BTreeMap::new(), /*required*/ None, Some(false.into())), + output_schema: Some(close_agent_output_schema()), + }) +} + +pub fn create_compact_parent_context_tool() -> ToolSpec { + let properties = BTreeMap::from([ + ( + "reason".to_string(), + JsonSchema::string(Some( + "Optional short reason describing why the parent appears stuck.".to_string(), + )), + ), + ( + "evidence".to_string(), + JsonSchema::string(Some( + "Optional concrete evidence of non-progress, such as repeated identical replies with no tool or file actions." + .to_string(), + )), + ), + ]); + + ToolSpec::Function(ResponsesApiTool { + name: "compact_parent_context".to_string(), + description: "Watchdog-only: request compaction for the watchdog helper's parent thread when it is idle and appears stuck." + .to_string(), + strict: false, + defer_loading: Some(true), + parameters: JsonSchema::object(properties, /*required*/ None, Some(false.into())), + output_schema: None, + }) +} + fn agent_status_output_schema() -> Value { json!({ "oneOf": [ @@ -501,6 +582,36 @@ fn create_collab_input_items_schema() -> JsonSchema { )) } +fn spawn_agent_model_fallback_list_schema() -> JsonSchema { + let model_fallback_item_properties = BTreeMap::from([ + ( + "model".to_string(), + JsonSchema::string(Some( + "Model to try. Must be a model slug from the current model picker list.".to_string(), + )), + ), + ( + "reasoning_effort".to_string(), + JsonSchema::string(Some( + "Optional reasoning effort override for this candidate. Replaces the inherited reasoning effort." + .to_string(), + )), + ), + ]); + + JsonSchema::array( + JsonSchema::object( + model_fallback_item_properties, + Some(vec!["model".to_string()]), + Some(false.into()), + ), + Some( + "Ordered model candidates for fallback retries. Each entry may include an optional reasoning effort." + .to_string(), + ), + ) +} + fn spawn_agent_common_properties_v1(agent_type_description: &str) -> BTreeMap { BTreeMap::from([ ( @@ -525,16 +636,20 @@ fn spawn_agent_common_properties_v1(agent_type_description: &str) -> BTreeMap BTreeMap String { + format!( + "{SPAWN_AGENT_MODEL_OVERRIDE_DESCRIPTION} Replaces the inherited model only when fork_context is false; forked children always inherit the parent model." + ) +} + +fn spawn_agent_model_override_description_v2() -> String { + format!( + "{SPAWN_AGENT_MODEL_OVERRIDE_DESCRIPTION} Replaces the inherited model only when fork_turns is `none`; forked children always inherit the parent model." + ) +} + fn hide_spawn_agent_metadata_options(properties: &mut BTreeMap) { properties.remove("agent_type"); properties.remove("model"); diff --git a/codex-rs/tools/src/agent_tool_tests.rs b/codex-rs/tools/src/agent_tool_tests.rs index 3157cfc547..7816dca3b0 100644 --- a/codex-rs/tools/src/agent_tool_tests.rs +++ b/codex-rs/tools/src/agent_tool_tests.rs @@ -73,8 +73,9 @@ fn spawn_agent_tool_v2_requires_task_name_and_lists_visible_models() { assert!(properties.contains_key("task_name")); assert!(properties.contains_key("message")); assert!(properties.contains_key("fork_turns")); + assert!(properties.contains_key("fork_context")); + assert!(properties.contains_key("model_fallback_list")); assert!(!properties.contains_key("items")); - assert!(!properties.contains_key("fork_context")); assert_eq!( properties.get("agent_type"), Some(&JsonSchema::string(Some("role help".to_string()))) @@ -83,12 +84,46 @@ fn spawn_agent_tool_v2_requires_task_name_and_lists_visible_models() { properties .get("model") .and_then(|schema| schema.description.as_deref()), - Some(SPAWN_AGENT_MODEL_OVERRIDE_DESCRIPTION) + Some(spawn_agent_model_override_description_v2().as_str()) ); assert_eq!( parameters.required.as_ref(), Some(&vec!["task_name".to_string(), "message".to_string()]) ); + let Some(model_fallback_list) = properties.get("model_fallback_list") else { + panic!("spawn_agent v2 should define model_fallback_list as an array"); + }; + assert_eq!( + model_fallback_list.schema_type, + Some(JsonSchemaType::Single(JsonSchemaPrimitiveType::Array)) + ); + let model_fallback_items = model_fallback_list + .items + .as_ref() + .expect("model_fallback_list should define item schema"); + assert_eq!( + model_fallback_items.schema_type, + Some(JsonSchemaType::Single(JsonSchemaPrimitiveType::Object)) + ); + let model_fallback_item_properties = model_fallback_items + .properties + .as_ref() + .expect("spawn_agent v2 model_fallback_list items should be objects"); + let model_fallback_item_required = model_fallback_items + .required + .as_ref() + .expect("model_fallback_list items should require model"); + if model_fallback_items.additional_properties != Some(false.into()) { + panic!("spawn_agent v2 model_fallback_list items should be objects"); + } + assert_eq!( + model_fallback_item_properties.get("model"), + Some(&JsonSchema::string(Some( + "Model to try. Must be a model slug from the current model picker list.".to_string(), + ))) + ); + assert!(model_fallback_item_properties.contains_key("reasoning_effort")); + assert_eq!(model_fallback_item_required, &vec!["model".to_string()]); assert_eq!( output_schema.expect("spawn_agent output schema")["required"], json!(["task_name", "nickname"]) @@ -96,7 +131,7 @@ fn spawn_agent_tool_v2_requires_task_name_and_lists_visible_models() { } #[test] -fn spawn_agent_tool_v1_keeps_legacy_fork_context_field() { +fn spawn_agent_tool_v1_includes_model_fallback_list() { let tool = create_spawn_agent_tool_v1(SpawnAgentToolOptions { available_models: &[], agent_type_description: "role help".to_string(), @@ -117,14 +152,62 @@ fn spawn_agent_tool_v1_keeps_legacy_fork_context_field() { .properties .as_ref() .expect("spawn_agent should use object params"); - + let Some(model_fallback_list) = properties.get("model_fallback_list") else { + panic!("spawn_agent v1 should define model_fallback_list as an array"); + }; + assert_eq!( + model_fallback_list.schema_type, + Some(JsonSchemaType::Single(JsonSchemaPrimitiveType::Array)) + ); + assert!(properties.contains_key("model_fallback_list")); assert!(properties.contains_key("fork_context")); assert!(!properties.contains_key("fork_turns")); assert_eq!( properties .get("model") .and_then(|schema| schema.description.as_deref()), - Some(SPAWN_AGENT_MODEL_OVERRIDE_DESCRIPTION) + Some(spawn_agent_model_override_description_v1().as_str()) + ); + assert_eq!( + properties.get("reasoning_effort"), + Some(&JsonSchema::string(Some( + "Optional reasoning effort override for the new agent. Replaces the inherited reasoning effort only when fork_context is false; forked children always inherit the parent reasoning effort." + .to_string(), + ))) + ); +} + +#[test] +fn spawn_agent_tool_v2_documents_that_forked_children_ignore_model_overrides() { + let tool = create_spawn_agent_tool_v2(SpawnAgentToolOptions { + available_models: &[], + agent_type_description: "role help".to_string(), + hide_agent_type_model_reasoning: false, + include_usage_hint: true, + usage_hint_text: None, + max_concurrent_threads_per_session: None, + }); + + let ToolSpec::Function(ResponsesApiTool { parameters, .. }) = tool else { + panic!("spawn_agent should be a function tool"); + }; + let properties = parameters + .properties + .as_ref() + .expect("spawn_agent should use object params"); + + assert_eq!( + properties.get("model"), + Some(&JsonSchema::string(Some( + spawn_agent_model_override_description_v2(), + ))) + ); + assert_eq!( + properties.get("reasoning_effort"), + Some(&JsonSchema::string(Some( + "Optional reasoning effort override for the new agent. Replaces the inherited reasoning effort only when fork_turns is `none`; forked children always inherit the parent reasoning effort." + .to_string(), + ))) ); } diff --git a/codex-rs/tools/src/lib.rs b/codex-rs/tools/src/lib.rs index 7c638ba675..e19111f8a3 100644 --- a/codex-rs/tools/src/lib.rs +++ b/codex-rs/tools/src/lib.rs @@ -31,8 +31,10 @@ pub use agent_tool::SpawnAgentToolOptions; pub use agent_tool::WaitAgentTimeoutOptions; pub use agent_tool::create_close_agent_tool_v1; pub use agent_tool::create_close_agent_tool_v2; +pub use agent_tool::create_compact_parent_context_tool; pub use agent_tool::create_followup_task_tool; pub use agent_tool::create_list_agents_tool; +pub use agent_tool::create_list_agents_tool_v1; pub use agent_tool::create_resume_agent_tool; pub use agent_tool::create_send_input_tool_v1; pub use agent_tool::create_send_message_tool; @@ -40,6 +42,7 @@ pub use agent_tool::create_spawn_agent_tool_v1; pub use agent_tool::create_spawn_agent_tool_v2; pub use agent_tool::create_wait_agent_tool_v1; pub use agent_tool::create_wait_agent_tool_v2; +pub use agent_tool::create_watchdog_self_close_tool; pub use apply_patch_tool::ApplyPatchToolArgs; pub use apply_patch_tool::create_apply_patch_freeform_tool; pub use apply_patch_tool::create_apply_patch_json_tool; diff --git a/codex-rs/tools/src/tool_config.rs b/codex-rs/tools/src/tool_config.rs index a218a5530a..6578d8678c 100644 --- a/codex-rs/tools/src/tool_config.rs +++ b/codex-rs/tools/src/tool_config.rs @@ -103,6 +103,7 @@ pub struct ToolsConfig { pub can_request_original_image_detail: bool, pub collab_tools: bool, pub goal_tools: bool, + pub agent_watchdog: bool, pub multi_agent_v2: bool, pub hide_spawn_agent_metadata: bool, pub spawn_agent_usage_hint: bool, @@ -144,6 +145,8 @@ impl ToolsConfig { let include_code_mode_only = include_code_mode && features.enabled(Feature::CodeModeOnly); let include_collab_tools = features.enabled(Feature::Collab); let include_goal_tools = features.enabled(Feature::Goals); + let include_agent_watchdog = + include_collab_tools && features.enabled(Feature::AgentWatchdog); let include_multi_agent_v2 = features.enabled(Feature::MultiAgentV2); let include_agent_jobs = features.enabled(Feature::SpawnCsv); let include_default_mode_request_user_input = @@ -224,6 +227,7 @@ impl ToolsConfig { can_request_original_image_detail: include_original_image_detail, collab_tools: include_collab_tools, goal_tools: include_goal_tools, + agent_watchdog: include_agent_watchdog, multi_agent_v2: include_multi_agent_v2, hide_spawn_agent_metadata: false, spawn_agent_usage_hint: true, diff --git a/codex-rs/tools/src/tool_discovery.rs b/codex-rs/tools/src/tool_discovery.rs index 74977dce38..ecff467ee1 100644 --- a/codex-rs/tools/src/tool_discovery.rs +++ b/codex-rs/tools/src/tool_discovery.rs @@ -16,6 +16,9 @@ const TUI_CLIENT_NAME: &str = "codex-tui"; pub const TOOL_SEARCH_TOOL_NAME: &str = "tool_search"; pub const TOOL_SEARCH_DEFAULT_LIMIT: usize = 8; pub const TOOL_SUGGEST_TOOL_NAME: &str = "tool_suggest"; +const WATCHDOG_TOOLS_NAMESPACE: &str = "watchdog"; +const WATCHDOG_TOOLS_NAMESPACE_DESCRIPTION: &str = + "Watchdog-only tools for parent-thread recovery and watchdog check-in lifecycle control."; #[derive(Clone, Debug, PartialEq, Eq)] pub struct ToolSearchSourceInfo { @@ -149,6 +152,7 @@ pub struct ToolSuggestEntry { pub fn create_tool_search_tool( searchable_sources: &[ToolSearchSourceInfo], default_limit: usize, + include_watchdog_tools: bool, ) -> ToolSpec { let properties = BTreeMap::from([ ( @@ -164,6 +168,12 @@ pub fn create_tool_search_tool( ]); let mut source_descriptions = BTreeMap::new(); + if include_watchdog_tools { + source_descriptions.insert( + WATCHDOG_TOOLS_NAMESPACE.to_string(), + Some(WATCHDOG_TOOLS_NAMESPACE_DESCRIPTION.to_string()), + ); + } for source in searchable_sources { source_descriptions .entry(source.name.clone()) diff --git a/codex-rs/tools/src/tool_discovery_tests.rs b/codex-rs/tools/src/tool_discovery_tests.rs index 9edbccffaa..351ba1bf0d 100644 --- a/codex-rs/tools/src/tool_discovery_tests.rs +++ b/codex-rs/tools/src/tool_discovery_tests.rs @@ -27,10 +27,11 @@ fn create_tool_search_tool_deduplicates_and_renders_enabled_sources() { }, ], /*default_limit*/ 8, + /*include_watchdog_tools*/ true, ), ToolSpec::ToolSearch { execution: "client".to_string(), - description: "# Tool discovery\n\nSearches over deferred tool metadata with BM25 and exposes matching tools for the next model call.\n\nYou have access to tools from the following sources:\n- Google Drive: Use Google Drive as the single entrypoint for Drive, Docs, Sheets, and Slides work.\n- docs\nSome of the tools may not have been provided to you upfront, and you should use this tool (`tool_search`) to search for the required tools. For MCP tool discovery, always use `tool_search` instead of `list_mcp_resources` or `list_mcp_resource_templates`.".to_string(), + description: "# Tool discovery\n\nSearches over deferred tool metadata with BM25 and exposes matching tools for the next model call.\n\nYou have access to tools from the following sources:\n- Google Drive: Use Google Drive as the single entrypoint for Drive, Docs, Sheets, and Slides work.\n- docs\n- watchdog: Watchdog-only tools for parent-thread recovery and watchdog check-in lifecycle control.\nSome of the tools may not have been provided to you upfront, and you should use this tool (`tool_search`) to search for the required tools. For MCP tool discovery, always use `tool_search` instead of `list_mcp_resources` or `list_mcp_resource_templates`.".to_string(), parameters: JsonSchema::object(BTreeMap::from([ ( "limit".to_string(), diff --git a/codex-rs/tools/src/tool_registry_plan.rs b/codex-rs/tools/src/tool_registry_plan.rs index eff3750edd..d477bac2f6 100644 --- a/codex-rs/tools/src/tool_registry_plan.rs +++ b/codex-rs/tools/src/tool_registry_plan.rs @@ -2,6 +2,7 @@ use crate::CommandToolOptions; use crate::REQUEST_USER_INPUT_TOOL_NAME; use crate::ResponsesApiNamespace; use crate::ResponsesApiNamespaceTool; +use crate::ResponsesApiTool; use crate::ShellToolOptions; use crate::SpawnAgentToolOptions; use crate::TOOL_SEARCH_DEFAULT_LIMIT; @@ -26,12 +27,14 @@ use crate::create_apply_patch_json_tool; use crate::create_close_agent_tool_v1; use crate::create_close_agent_tool_v2; use crate::create_code_mode_tool; +use crate::create_compact_parent_context_tool; use crate::create_create_goal_tool; use crate::create_exec_command_tool; use crate::create_followup_task_tool; use crate::create_get_goal_tool; use crate::create_image_generation_tool; use crate::create_list_agents_tool; +use crate::create_list_agents_tool_v1; use crate::create_list_dir_tool; use crate::create_list_mcp_resource_templates_tool; use crate::create_list_mcp_resources_tool; @@ -57,6 +60,7 @@ use crate::create_view_image_tool; use crate::create_wait_agent_tool_v1; use crate::create_wait_agent_tool_v2; use crate::create_wait_tool; +use crate::create_watchdog_self_close_tool; use crate::create_web_search_tool; use crate::create_write_stdin_tool; use crate::default_namespace_description; @@ -270,10 +274,10 @@ pub fn build_tool_registry_plan( } else { None }; + let includes_tool_search = config.search_tool + && (deferred_mcp_tools_for_search.is_some() || !deferred_dynamic_tools.is_empty()); - if config.search_tool - && (deferred_mcp_tools_for_search.is_some() || !deferred_dynamic_tools.is_empty()) - { + if includes_tool_search { let mut search_source_infos = deferred_mcp_tools_for_search .map(|deferred_mcp_tools| { collect_tool_search_source_infos(deferred_mcp_tools.iter().map(|tool| { @@ -294,7 +298,11 @@ pub fn build_tool_registry_plan( } plan.push_spec( - create_tool_search_tool(&search_source_infos, TOOL_SEARCH_DEFAULT_LIMIT), + create_tool_search_tool( + &search_source_infos, + TOOL_SEARCH_DEFAULT_LIMIT, + config.agent_watchdog, + ), /*supports_parallel_tool_calls*/ true, config.code_mode_enabled, ); @@ -482,6 +490,31 @@ pub fn build_tool_registry_plan( /*supports_parallel_tool_calls*/ false, config.code_mode_enabled, ); + if config.agent_watchdog { + plan.push_spec( + create_list_agents_tool_v1(config.agent_watchdog), + /*supports_parallel_tool_calls*/ false, + config.code_mode_enabled, + ); + plan.push_spec( + create_watchdog_tools_namespace(if includes_tool_search { + WatchdogToolLoading::Deferred + } else { + WatchdogToolLoading::Inline + }), + /*supports_parallel_tool_calls*/ false, + config.code_mode_enabled, + ); + plan.register_handler("list_agents", ToolHandlerKind::ListAgentsV1); + plan.register_handler( + "watchdog:compact_parent_context", + ToolHandlerKind::CompactParentContext, + ); + plan.register_handler( + "watchdog:watchdog_self_close", + ToolHandlerKind::WatchdogSelfClose, + ); + } plan.register_handler("spawn_agent", ToolHandlerKind::SpawnAgentV1); plan.register_handler("send_input", ToolHandlerKind::SendInputV1); plan.register_handler("wait_agent", ToolHandlerKind::WaitAgentV1); @@ -626,6 +659,48 @@ fn code_mode_namespace_name<'a>( .map(|namespace_description| namespace_description.name.as_str()) } +/// Controls whether watchdog namespace tools may rely on the Responses tool-search loader. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum WatchdogToolLoading { + /// The request includes `tool_search`, so watchdog tools may be loaded lazily. + Deferred, + /// The request omits `tool_search`, so watchdog tools must be sent inline. + Inline, +} + +fn create_watchdog_tools_namespace(loading: WatchdogToolLoading) -> ToolSpec { + const WATCHDOG_TOOLS_NAMESPACE_DESCRIPTION: &str = + "Watchdog-only tools for parent-thread recovery and watchdog check-in lifecycle control."; + + let tools = vec![ + create_compact_parent_context_tool(), + create_watchdog_self_close_tool(), + ] + .into_iter() + .map(|spec| match spec { + ToolSpec::Function(tool) => ResponsesApiNamespaceTool::Function(match loading { + WatchdogToolLoading::Deferred => tool, + WatchdogToolLoading::Inline => ResponsesApiTool { + defer_loading: None, + ..tool + }, + }), + ToolSpec::Namespace(_) + | ToolSpec::ToolSearch { .. } + | ToolSpec::LocalShell {} + | ToolSpec::ImageGeneration { .. } + | ToolSpec::WebSearch { .. } + | ToolSpec::Freeform(_) => unreachable!("watchdog tools must be function tools"), + }) + .collect(); + + ToolSpec::Namespace(ResponsesApiNamespace { + name: "watchdog".to_string(), + description: WATCHDOG_TOOLS_NAMESPACE_DESCRIPTION.to_string(), + tools, + }) +} + #[cfg(test)] #[path = "tool_registry_plan_tests.rs"] mod tests; diff --git a/codex-rs/tools/src/tool_registry_plan_tests.rs b/codex-rs/tools/src/tool_registry_plan_tests.rs index 8d3ab5253c..4b119b5a7c 100644 --- a/codex-rs/tools/src/tool_registry_plan_tests.rs +++ b/codex-rs/tools/src/tool_registry_plan_tests.rs @@ -289,8 +289,8 @@ fn test_build_specs_multi_agent_v2_uses_task_names_and_hides_resume() { assert!(properties.contains_key("task_name")); assert!(properties.contains_key("message")); assert!(properties.contains_key("fork_turns")); + assert!(properties.contains_key("fork_context")); assert!(!properties.contains_key("items")); - assert!(!properties.contains_key("fork_context")); assert_eq!( required, Some(&vec!["task_name".to_string(), "message".to_string()]) @@ -1649,6 +1649,44 @@ fn search_tool_keeps_plain_deferred_dynamic_tools_when_namespace_tools_are_disab })); } +#[test] +fn watchdog_namespace_does_not_defer_tools_without_search_tool() { + let model_info = search_capable_model_info(); + let mut features = Features::with_defaults(); + features.enable(Feature::AgentWatchdog); + features.enable(Feature::ToolSearch); + let available_models = Vec::new(); + let tools_config = ToolsConfig::new(&ToolsConfigParams { + model_info: &model_info, + available_models: &available_models, + features: &features, + image_generation_tool_auth_allowed: true, + web_search_mode: Some(WebSearchMode::Cached), + session_source: SessionSource::Cli, + permission_profile: &PermissionProfile::Disabled, + windows_sandbox_level: WindowsSandboxLevel::Disabled, + }); + let (tools, _) = build_specs( + &tools_config, + /*mcp_tools*/ None, + /*deferred_mcp_tools*/ None, + &[], + ); + + assert_lacks_tool_name(&tools, TOOL_SEARCH_TOOL_NAME); + let ToolSpec::Namespace(watchdog) = &find_tool(&tools, "watchdog").spec else { + panic!("expected watchdog namespace"); + }; + let actual = watchdog + .tools + .iter() + .map(|tool| match tool { + ResponsesApiNamespaceTool::Function(function) => function.defer_loading, + }) + .collect::>(); + assert_eq!(actual, vec![None, None]); +} + #[test] fn tool_suggest_is_not_registered_without_feature_flag() { let model_info = search_capable_model_info(); diff --git a/codex-rs/tools/src/tool_registry_plan_types.rs b/codex-rs/tools/src/tool_registry_plan_types.rs index d22335b614..5c1e1e2cbe 100644 --- a/codex-rs/tools/src/tool_registry_plan_types.rs +++ b/codex-rs/tools/src/tool_registry_plan_types.rs @@ -14,11 +14,13 @@ pub enum ToolHandlerKind { ApplyPatch, CloseAgentV1, CloseAgentV2, + CompactParentContext, CodeModeExecute, CodeModeWait, DynamicTool, FollowupTaskV2, Goal, + ListAgentsV1, ListAgentsV2, ListDir, Mcp, @@ -40,6 +42,7 @@ pub enum ToolHandlerKind { ViewImage, WaitAgentV1, WaitAgentV2, + WatchdogSelfClose, } #[derive(Debug, Clone, PartialEq, Eq)] diff --git a/codex-rs/tools/src/tool_spec_tests.rs b/codex-rs/tools/src/tool_spec_tests.rs index 0b98625626..00669ad3fe 100644 --- a/codex-rs/tools/src/tool_spec_tests.rs +++ b/codex-rs/tools/src/tool_spec_tests.rs @@ -10,6 +10,7 @@ use crate::JsonSchema; use crate::ResponsesApiNamespaceTool; use crate::ResponsesApiTool; use crate::create_tools_json_for_responses_api; +use crate::create_watchdog_self_close_tool; use codex_protocol::config_types::WebSearchContextSize; use codex_protocol::config_types::WebSearchFilters as ConfigWebSearchFilters; use codex_protocol::config_types::WebSearchUserLocation as ConfigWebSearchUserLocation; @@ -58,6 +59,15 @@ fn tool_spec_name_covers_all_variants() { .name(), "tool_search" ); + assert_eq!( + ToolSpec::Namespace(ResponsesApiNamespace { + name: "agents".to_string(), + description: "Agent tools".to_string(), + tools: Vec::new(), + }) + .name(), + "agents" + ); assert_eq!(ToolSpec::LocalShell {}.name(), "local_shell"); assert_eq!( ToolSpec::ImageGeneration { @@ -115,6 +125,32 @@ fn configured_tool_spec_name_delegates_to_tool_spec() { ); } +#[test] +fn watchdog_self_close_tool_spec_is_deferred_and_parameterless() { + let ToolSpec::Function(ResponsesApiTool { + name, + defer_loading, + parameters, + output_schema, + .. + }) = create_watchdog_self_close_tool() + else { + panic!("watchdog_self_close should be a function tool"); + }; + + assert_eq!(name, "watchdog_self_close"); + assert_eq!(defer_loading, Some(true)); + assert_eq!( + parameters, + JsonSchema::object( + BTreeMap::new(), + /*required*/ None, + Some(AdditionalProperties::Boolean(false)), + ) + ); + assert!(output_schema.is_some()); +} + #[test] fn web_search_config_converts_to_responses_api_types() { assert_eq!( diff --git a/codex-rs/tui/src/app.rs b/codex-rs/tui/src/app.rs index 1d138f9da4..6d34258004 100644 --- a/codex-rs/tui/src/app.rs +++ b/codex-rs/tui/src/app.rs @@ -199,6 +199,7 @@ mod resize_reflow; mod session_lifecycle; mod side; mod startup_prompts; +mod subagents; mod thread_events; mod thread_goal_actions; mod thread_routing; @@ -537,6 +538,8 @@ pub(crate) struct App { /// Controls the animation thread that sends CommitTick events. pub(crate) commit_anim_running: Arc, + /// Controls the animation thread that updates the live subagent panel. + pub(crate) subagent_anim_running: Arc, // Shared across ChatWidget instances so invalid status-line config warnings only emit once. status_line_invalid_items_warned: Arc, // Shared across ChatWidget instances so invalid terminal-title config warnings only emit once. @@ -571,6 +574,7 @@ pub(crate) struct App { thread_event_channels: HashMap, thread_event_listener_tasks: HashMap>, + subagents: subagents::SubagentRegistry, agent_navigation: AgentNavigationState, side_threads: HashMap, active_thread_id: Option, @@ -779,6 +783,7 @@ impl App { let status_line_invalid_items_warned = Arc::new(AtomicBool::new(false)); let terminal_title_invalid_items_warned = Arc::new(AtomicBool::new(false)); + let animations_enabled = config.animations; let enhanced_keys_supported = tui.enhanced_keys_supported(); let wait_for_initial_session_configured = @@ -928,6 +933,7 @@ See the Codex keymap documentation for supported actions and examples." transcript_reflow: TranscriptReflowState::default(), initial_history_replay_buffer: None, commit_anim_running: Arc::new(AtomicBool::new(false)), + subagent_anim_running: Arc::new(AtomicBool::new(false)), status_line_invalid_items_warned: status_line_invalid_items_warned.clone(), terminal_title_invalid_items_warned: terminal_title_invalid_items_warned.clone(), backtrack: BacktrackState::default(), @@ -942,6 +948,7 @@ See the Codex keymap documentation for supported actions and examples." windows_sandbox: WindowsSandboxState::default(), thread_event_channels: HashMap::new(), thread_event_listener_tasks: HashMap::new(), + subagents: subagents::SubagentRegistry::new(animations_enabled), agent_navigation: AgentNavigationState::default(), side_threads: HashMap::new(), active_thread_id: None, diff --git a/codex-rs/tui/src/app/event_dispatch.rs b/codex-rs/tui/src/app/event_dispatch.rs index 62cb727159..5350c52b33 100644 --- a/codex-rs/tui/src/app/event_dispatch.rs +++ b/codex-rs/tui/src/app/event_dispatch.rs @@ -294,6 +294,38 @@ impl App { AppEvent::CommitTick => { self.chat_widget.on_commit_tick(); } + AppEvent::StartSubagentAnimation => { + if self + .subagent_anim_running + .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed) + .is_ok() + { + let tx = self.app_event_tx.clone(); + let running = self.subagent_anim_running.clone(); + thread::spawn(move || { + while running.load(Ordering::Relaxed) { + thread::sleep(subagents::SUBAGENT_ANIMATION_TICK); + tx.send(AppEvent::SubagentTick); + } + }); + } + } + AppEvent::StopSubagentAnimation => { + self.subagent_anim_running.store(false, Ordering::Release); + } + AppEvent::SubagentTick => { + let root_active = self.subagents_root_active(); + self.update_subagent_animation(root_active); + if root_active && self.subagents.has_animating_agents() { + self.chat_widget.on_subagent_tick(); + } + } + AppEvent::UpdateSubagentPanel(panel) => { + self.chat_widget.on_subagent_panel_updated(panel); + } + AppEvent::ClearSubagentPanel => { + self.chat_widget.clear_subagent_panel(); + } AppEvent::Exit(mode) => { return Ok(self.handle_exit_mode(app_server, mode).await); } diff --git a/codex-rs/tui/src/app/session_lifecycle.rs b/codex-rs/tui/src/app/session_lifecycle.rs index c51863bd16..1c0d5b4388 100644 --- a/codex-rs/tui/src/app/session_lifecycle.rs +++ b/codex-rs/tui/src/app/session_lifecycle.rs @@ -408,7 +408,9 @@ impl App { pub(super) fn reset_thread_event_state(&mut self) { self.abort_all_thread_event_listeners(); + self.subagent_anim_running.store(false, Ordering::Release); self.thread_event_channels.clear(); + self.subagents = super::subagents::SubagentRegistry::new(self.config.animations); self.agent_navigation.clear(); self.side_threads.clear(); self.active_thread_id = None; @@ -418,6 +420,7 @@ impl App { self.primary_session_configured = None; self.pending_primary_events.clear(); self.pending_app_server_requests.clear(); + self.chat_widget.clear_subagent_panel(); self.chat_widget.set_pending_thread_approvals(Vec::new()); self.sync_active_agent_label(); } diff --git a/codex-rs/tui/src/app/subagents.rs b/codex-rs/tui/src/app/subagents.rs new file mode 100644 index 0000000000..d88d847281 --- /dev/null +++ b/codex-rs/tui/src/app/subagents.rs @@ -0,0 +1,732 @@ +//! Subagent status-panel orchestration for the TUI app. +//! +//! The app keeps transcript rendering inside `ChatWidget`, but this module owns the mutable +//! registry that turns collab lifecycle events into one live panel plus durable history cells. + +use super::*; +use crate::chatwidget::extract_first_bold; +use crate::history_cell::SubagentPanelAgent; +use crate::history_cell::SubagentPanelState; +use crate::history_cell::SubagentStatusCell; +use crate::text_formatting::truncate_text; +use codex_app_server_protocol::CollabAgentState; +use codex_app_server_protocol::CollabAgentStatus; +use codex_app_server_protocol::CollabAgentTool; +use codex_app_server_protocol::CollabAgentToolCallStatus; +use codex_protocol::protocol::AgentMessageDeltaEvent; +use codex_protocol::protocol::AgentMessageEvent; +use codex_protocol::protocol::AgentReasoningDeltaEvent; +use codex_protocol::protocol::AgentReasoningRawContentDeltaEvent; +use codex_protocol::protocol::AgentReasoningRawContentEvent; +use codex_protocol::protocol::AgentStatus; +use codex_protocol::protocol::CollabAgentSpawnEndEvent; +use codex_protocol::protocol::CollabCloseEndEvent; +use codex_protocol::protocol::CollabWaitingEndEvent; +use codex_protocol::protocol::ErrorEvent; +use codex_protocol::protocol::Event; +use codex_protocol::protocol::EventMsg; +use codex_protocol::protocol::TurnAbortedEvent; +use codex_protocol::protocol::TurnCompleteEvent; +use codex_protocol::protocol::TurnStartedEvent; +use std::collections::HashSet; +use std::sync::Mutex as StdMutex; + +const SUBAGENT_PROMPT_PREVIEW_BUDGET: usize = 120; +const SUBAGENT_UPDATE_PREVIEW_BUDGET: usize = 160; +const SUBAGENT_PENDING_EVENT_CAPACITY: usize = 12; +pub(super) const SUBAGENT_ANIMATION_TICK: Duration = Duration::from_millis(100); +const SUBAGENT_SHIMMER_WINDOW: Duration = Duration::from_secs(1); + +#[derive(Debug, Clone)] +pub(super) struct SubagentInfo { + pub(super) ordinal: i32, + pub(super) name: String, + pub(super) prompt_preview: String, + pub(super) is_watchdog: bool, + pub(super) status: AgentStatus, + spawned_at: Instant, + started_at: Option, + latest_summary: String, + pub(super) latest_preview: String, + pub(super) latest_update_at: Instant, + inflight_message: String, + reasoning_buffer: String, + notified_terminal: bool, +} + +impl SubagentInfo { + pub(super) fn new( + ordinal: i32, + name: String, + prompt_preview: String, + is_watchdog: bool, + ) -> Self { + let now = Instant::now(); + Self { + ordinal, + name, + prompt_preview: prompt_preview.clone(), + is_watchdog, + status: AgentStatus::PendingInit, + spawned_at: now, + started_at: None, + latest_summary: String::new(), + latest_preview: prompt_preview, + latest_update_at: now, + inflight_message: String::new(), + reasoning_buffer: String::new(), + notified_terminal: false, + } + } + + fn is_running(&self) -> bool { + matches!(self.status, AgentStatus::PendingInit | AgentStatus::Running) + } + + fn is_watchdog(&self) -> bool { + self.is_watchdog + } + + fn is_visible_in_panel(&self) -> bool { + if self.is_watchdog() { + matches!(self.status, AgentStatus::PendingInit | AgentStatus::Running) + } else { + self.is_running() + } + } + + fn is_running_for_panel(&self) -> bool { + if self.is_watchdog() { + matches!(self.status, AgentStatus::Running) + } else { + self.is_running() + } + } + + fn running_started_at(&self) -> Instant { + self.started_at.unwrap_or(self.spawned_at) + } + + fn update_preview(&mut self, preview: String) { + self.latest_preview = preview; + self.latest_update_at = Instant::now(); + } + + fn update_reasoning_summary(&mut self, delta: &str) { + self.reasoning_buffer.push_str(delta); + if let Some(summary) = extract_first_bold(&self.reasoning_buffer) { + self.latest_summary = truncate_text(summary.trim(), SUBAGENT_UPDATE_PREVIEW_BUDGET); + self.latest_update_at = Instant::now(); + } + } + + fn clear_turn_buffers(&mut self) { + self.inflight_message.clear(); + self.reasoning_buffer.clear(); + self.latest_summary.clear(); + } + + fn should_shimmer(&self, now: Instant) -> bool { + if self.is_watchdog() && matches!(self.status, AgentStatus::PendingInit) { + return false; + } + self.is_running() + && now.saturating_duration_since(self.latest_update_at) <= SUBAGENT_SHIMMER_WINDOW + } +} + +#[derive(Debug, Default)] +pub(super) struct SubagentRegistry { + root_thread_id: Option, + pub(super) agents: HashMap, + pub(super) order: Vec, + pending_events: HashMap>, + pending_history: Vec>, + panel_state: Option>>, + panel_cell: Option>, + animations_enabled: bool, +} + +impl SubagentRegistry { + pub(super) fn new(animations_enabled: bool) -> Self { + Self { + animations_enabled, + ..Self::default() + } + } + + pub(super) fn set_root_thread(&mut self, thread_id: ThreadId) { + self.root_thread_id = Some(thread_id); + } + + fn is_root_thread(&self, thread_id: ThreadId) -> bool { + self.root_thread_id == Some(thread_id) + } + + fn contains(&self, thread_id: ThreadId) -> bool { + self.agents.contains_key(&thread_id) + } + + fn on_spawn_end(&mut self, event: &CollabAgentSpawnEndEvent) -> Option> { + let new_thread_id = event.new_thread_id?; + let is_watchdog = event.new_agent_role.as_deref() == Some("watchdog"); + if is_watchdog { + self.prune_superseded_watchdogs(new_thread_id); + } + if self.contains(new_thread_id) { + return None; + } + + let ordinal = i32::try_from(self.order.len()) + .unwrap_or(i32::MAX - 1) + .saturating_add(1); + let prompt_preview = prompt_preview(&event.prompt); + let name = derive_subagent_name(&event.prompt, ordinal); + + let mut info = SubagentInfo::new(ordinal, name.clone(), prompt_preview, is_watchdog); + info.status = event.status.clone(); + info.latest_preview = info.prompt_preview.clone(); + info.latest_update_at = Instant::now(); + + self.order.push(new_thread_id); + self.agents.insert(new_thread_id, info); + + let early_events = self + .pending_events + .remove(&new_thread_id) + .unwrap_or_default(); + let mut follow_up = Vec::new(); + for msg in early_events { + follow_up.extend(self.on_agent_event(new_thread_id, &msg)); + } + for cell in follow_up { + self.queue_history(cell); + } + + let prompt_line = prompt_first_line(&event.prompt); + Some(Box::new(history_cell::new_subagent_spawned_cell( + &name, + &prompt_line, + ))) + } + + fn prune_superseded_watchdogs(&mut self, keep_thread_id: ThreadId) { + let superseded: HashSet = self + .agents + .iter() + .filter_map(|(thread_id, info)| { + (info.is_watchdog && *thread_id != keep_thread_id).then_some(*thread_id) + }) + .collect(); + if superseded.is_empty() { + return; + } + + self.order + .retain(|thread_id| !superseded.contains(thread_id)); + self.agents + .retain(|thread_id, _| !superseded.contains(thread_id)); + self.pending_events + .retain(|thread_id, _| !superseded.contains(thread_id)); + } + + fn on_close_end(&mut self, event: &CollabCloseEndEvent) -> Option> { + let receiver_id = event.receiver_thread_id; + let info = self.agents.get_mut(&receiver_id)?; + info.status = event.status.clone(); + info.latest_update_at = Instant::now(); + + if is_terminal_status(&info.status) && !info.notified_terminal { + info.notified_terminal = true; + let summary = terminal_summary(&info.status); + return Some(Box::new(history_cell::new_subagent_update_cell( + &info.name, + &info.status, + summary.as_str(), + ))); + } + None + } + + fn on_wait_end(&mut self, event: &CollabWaitingEndEvent) { + for (thread_id, status) in &event.statuses { + let Some(info) = self.agents.get_mut(thread_id) else { + continue; + }; + info.status = status.clone(); + info.latest_update_at = Instant::now(); + } + } + + fn on_agent_event(&mut self, thread_id: ThreadId, msg: &EventMsg) -> Vec> { + let Some(info) = self.agents.get_mut(&thread_id) else { + self.buffer_pending_event(thread_id, msg.clone()); + return Vec::new(); + }; + + let mut history = Vec::new(); + match msg { + EventMsg::TurnStarted(TurnStartedEvent { .. }) => { + info.clear_turn_buffers(); + info.status = AgentStatus::Running; + if info.started_at.is_none() { + info.started_at = Some(Instant::now()); + } + } + EventMsg::AgentReasoningDelta(AgentReasoningDeltaEvent { delta }) => { + info.update_reasoning_summary(delta); + } + EventMsg::AgentReasoningRawContentDelta(AgentReasoningRawContentDeltaEvent { + delta, + }) => { + info.update_reasoning_summary(delta); + } + EventMsg::AgentReasoningRawContent(AgentReasoningRawContentEvent { text }) => { + info.update_reasoning_summary(text); + info.reasoning_buffer.clear(); + } + EventMsg::AgentReasoning(_) | EventMsg::AgentReasoningSectionBreak(_) => { + info.reasoning_buffer.clear(); + } + EventMsg::AgentMessageDelta(AgentMessageDeltaEvent { delta }) => { + info.inflight_message.push_str(delta); + let preview = + truncate_text(info.inflight_message.trim(), SUBAGENT_UPDATE_PREVIEW_BUDGET); + info.update_preview(preview); + } + EventMsg::AgentMessage(AgentMessageEvent { message, .. }) => { + info.inflight_message.clear(); + let preview = truncate_text(message.trim(), SUBAGENT_UPDATE_PREVIEW_BUDGET); + info.update_preview(preview); + } + EventMsg::TurnComplete(TurnCompleteEvent { + last_agent_message, .. + }) => { + info.inflight_message.clear(); + info.status = AgentStatus::Completed(last_agent_message.clone()); + if !info.notified_terminal { + info.notified_terminal = true; + let summary = last_agent_message + .as_deref() + .map(|message| { + truncate_text(message.trim(), SUBAGENT_UPDATE_PREVIEW_BUDGET) + }) + .unwrap_or_else(|| "completed".to_string()); + history.push(Box::new(history_cell::new_subagent_update_cell( + &info.name, + &info.status, + summary.as_str(), + )) as Box); + } + } + EventMsg::TurnAborted(TurnAbortedEvent { reason, .. }) => { + info.inflight_message.clear(); + let reason_text = format!("{reason:?}").to_lowercase(); + info.status = AgentStatus::Errored(reason_text.clone()); + if !info.notified_terminal { + info.notified_terminal = true; + history.push(Box::new(history_cell::new_subagent_update_cell( + &info.name, + &info.status, + reason_text.as_str(), + )) as Box); + } + } + EventMsg::Error(ErrorEvent { message, .. }) => { + info.inflight_message.clear(); + let summary = truncate_text(message.trim(), SUBAGENT_UPDATE_PREVIEW_BUDGET); + info.status = AgentStatus::Errored(summary.clone()); + if !info.notified_terminal { + info.notified_terminal = true; + history.push(Box::new(history_cell::new_subagent_update_cell( + &info.name, + &info.status, + summary.as_str(), + )) as Box); + } + } + EventMsg::ShutdownComplete => { + info.inflight_message.clear(); + info.status = AgentStatus::Shutdown; + if !info.notified_terminal { + info.notified_terminal = true; + history.push(Box::new(history_cell::new_subagent_update_cell( + &info.name, + &info.status, + "shutdown", + )) as Box); + } + } + _ => {} + } + + if history.is_empty() && matches!(msg, EventMsg::TurnStarted(_)) { + info.latest_update_at = Instant::now(); + } + + history + } + + fn buffer_pending_event(&mut self, thread_id: ThreadId, msg: EventMsg) { + if self.is_root_thread(thread_id) { + return; + } + let entry = self.pending_events.entry(thread_id).or_default(); + entry.push(msg); + if entry.len() > SUBAGENT_PENDING_EVENT_CAPACITY { + let excess = entry.len() - SUBAGENT_PENDING_EVENT_CAPACITY; + entry.drain(0..excess); + } + } + + fn queue_history(&mut self, cell: Box) { + self.pending_history.push(cell); + } + + fn take_pending_history(&mut self) -> Vec> { + std::mem::take(&mut self.pending_history) + } + + pub(super) fn has_animating_agents(&self) -> bool { + let now = Instant::now(); + self.agents.values().any(|info| info.should_shimmer(now)) + } + + fn rebuild_panel_state(&mut self) { + let mut running_infos: Vec<&SubagentInfo> = self + .agents + .values() + .filter(|info| info.is_visible_in_panel()) + .collect(); + running_infos.sort_by_key(|info| info.ordinal); + + if running_infos.is_empty() { + self.panel_state = None; + self.panel_cell = None; + return; + } + + let started_at = running_infos + .iter() + .map(|info| info.running_started_at()) + .min() + .unwrap_or_else(Instant::now); + let running_count = i32::try_from( + running_infos + .iter() + .filter(|info| info.is_running_for_panel()) + .count(), + ) + .unwrap_or(i32::MAX); + let total_agents = i32::try_from(running_infos.len()).unwrap_or(i32::MAX); + let running_agents = running_infos + .into_iter() + .map(|info| SubagentPanelAgent { + ordinal: info.ordinal, + name: info.name.clone(), + status: info.status.clone(), + is_watchdog: info.is_watchdog(), + watchdog_countdown_started_at: info + .is_watchdog() + .then_some(info.running_started_at()), + preview: running_preview(info), + latest_update_at: info.latest_update_at, + }) + .collect(); + + let state = SubagentPanelState { + started_at, + total_agents, + running_count, + running_agents, + }; + + match &self.panel_state { + Some(existing) => { + let mut guard = existing + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + *guard = state; + } + None => { + self.panel_state = Some(Arc::new(StdMutex::new(state))); + } + } + + if let Some(panel_state) = &self.panel_state { + self.panel_cell = Some(Arc::new(SubagentStatusCell::new( + Arc::clone(panel_state), + self.animations_enabled, + ))); + } + } + + pub(super) fn panel_cell(&self) -> Option> { + self.panel_cell.clone() + } +} + +impl App { + pub(super) fn subagents_root_active(&self) -> bool { + self.primary_thread_id.is_some() && self.active_thread_id == self.primary_thread_id + } + + fn emit_or_queue_subagent_history(&mut self, cell: Box) { + if self.subagents_root_active() { + self.app_event_tx.send(AppEvent::InsertHistoryCell(cell)); + } else { + self.subagents.queue_history(cell); + } + } + + fn flush_subagent_history_if_root_active(&mut self) { + if !self.subagents_root_active() { + return; + } + let pending = self.subagents.take_pending_history(); + for cell in pending { + self.app_event_tx.send(AppEvent::InsertHistoryCell(cell)); + } + } + + pub(super) fn update_subagent_animation(&mut self, root_active: bool) { + let should_run = root_active && self.subagents.has_animating_agents(); + let is_running = self.subagent_anim_running.load(Ordering::Relaxed); + if should_run && !is_running { + self.app_event_tx.send(AppEvent::StartSubagentAnimation); + } else if !should_run && is_running { + self.app_event_tx.send(AppEvent::StopSubagentAnimation); + } + } + + pub(super) fn sync_subagent_panel_state(&mut self) { + let root_active = self.subagents_root_active(); + self.subagents.rebuild_panel_state(); + + if root_active { + self.flush_subagent_history_if_root_active(); + if let Some(panel) = self.subagents.panel_cell() { + self.app_event_tx.send(AppEvent::UpdateSubagentPanel(panel)); + } else { + self.app_event_tx.send(AppEvent::ClearSubagentPanel); + } + } else { + self.app_event_tx.send(AppEvent::ClearSubagentPanel); + } + + self.update_subagent_animation(root_active); + } + + #[allow(dead_code)] + pub(super) fn process_subagent_side_effects(&mut self, thread_id: ThreadId, event: &Event) { + if self.primary_thread_id == Some(thread_id) { + self.subagents.set_root_thread(thread_id); + } + + if self.subagents.is_root_thread(thread_id) { + match &event.msg { + EventMsg::CollabAgentSpawnEnd(ev) => { + let _ = self.subagents.on_spawn_end(ev); + } + EventMsg::CollabWaitingEnd(ev) => { + self.subagents.on_wait_end(ev); + } + EventMsg::CollabCloseEnd(ev) => { + let _ = self.subagents.on_close_end(ev); + } + _ => {} + } + } else { + let updates = self.subagents.on_agent_event(thread_id, &event.msg); + for cell in updates { + self.emit_or_queue_subagent_history(cell); + } + } + + self.sync_subagent_panel_state(); + } + + pub(super) fn process_subagent_notification_side_effects( + &mut self, + thread_id: ThreadId, + notification: &ServerNotification, + ) { + if self.primary_thread_id == Some(thread_id) { + self.subagents.set_root_thread(thread_id); + } + + if !self.subagents.is_root_thread(thread_id) { + return; + } + + let item = match notification { + ServerNotification::ItemStarted(notification) => ¬ification.item, + ServerNotification::ItemCompleted(notification) => ¬ification.item, + _ => { + self.sync_subagent_panel_state(); + return; + } + }; + + if let ThreadItem::CollabAgentToolCall { + id, + tool, + status, + sender_thread_id, + receiver_thread_ids, + prompt, + agents_states, + .. + } = item + { + if matches!(tool, CollabAgentTool::SpawnAgent) { + let Some(new_thread_id) = receiver_thread_ids + .first() + .and_then(|thread_id| ThreadId::from_string(thread_id).ok()) + else { + self.sync_subagent_panel_state(); + return; + }; + + let sender_thread_id = ThreadId::from_string(sender_thread_id).unwrap_or(thread_id); + let entry = self.agent_navigation.get(&new_thread_id); + let status = agents_states + .get(&new_thread_id.to_string()) + .map(app_server_collab_state_to_agent_status) + .unwrap_or(AgentStatus::PendingInit); + + let _ = self.subagents.on_spawn_end(&CollabAgentSpawnEndEvent { + call_id: id.clone(), + sender_thread_id, + new_thread_id: Some(new_thread_id), + new_agent_nickname: entry.and_then(|entry| entry.agent_nickname.clone()), + new_agent_role: entry.and_then(|entry| entry.agent_role.clone()), + prompt: prompt.clone().unwrap_or_default(), + model: String::new(), + reasoning_effort: ReasoningEffortConfig::Medium, + status, + }); + } else if !matches!(status, CollabAgentToolCallStatus::InProgress) { + for receiver_thread_id in receiver_thread_ids { + let Some(agent_state) = agents_states.get(receiver_thread_id) else { + continue; + }; + let Ok(receiver_thread_id) = ThreadId::from_string(receiver_thread_id) else { + continue; + }; + let Some(info) = self.subagents.agents.get_mut(&receiver_thread_id) else { + continue; + }; + info.status = app_server_collab_state_to_agent_status(agent_state); + info.latest_update_at = Instant::now(); + } + } + } + + self.sync_subagent_panel_state(); + } +} + +fn app_server_collab_state_to_agent_status(state: &CollabAgentState) -> AgentStatus { + match state.status { + CollabAgentStatus::PendingInit => AgentStatus::PendingInit, + CollabAgentStatus::Running => AgentStatus::Running, + CollabAgentStatus::Completed => AgentStatus::Completed(state.message.clone()), + CollabAgentStatus::Errored => { + AgentStatus::Errored(state.message.clone().unwrap_or_default()) + } + CollabAgentStatus::Interrupted => AgentStatus::Interrupted, + CollabAgentStatus::Shutdown => AgentStatus::Shutdown, + CollabAgentStatus::NotFound => AgentStatus::NotFound, + } +} + +fn is_terminal_status(status: &AgentStatus) -> bool { + matches!( + status, + AgentStatus::Completed(_) + | AgentStatus::Errored(_) + | AgentStatus::Shutdown + | AgentStatus::NotFound + ) +} + +fn terminal_summary(status: &AgentStatus) -> String { + match status { + AgentStatus::Completed(Some(message)) => { + truncate_text(message.trim(), SUBAGENT_UPDATE_PREVIEW_BUDGET) + } + AgentStatus::Completed(None) => "completed".to_string(), + AgentStatus::Errored(message) => { + truncate_text(message.trim(), SUBAGENT_UPDATE_PREVIEW_BUDGET) + } + AgentStatus::Interrupted => "interrupted".to_string(), + AgentStatus::Shutdown => "shutdown".to_string(), + AgentStatus::NotFound => "not found".to_string(), + AgentStatus::PendingInit | AgentStatus::Running => "running".to_string(), + } +} + +fn prompt_first_line(prompt: &str) -> String { + prompt + .lines() + .map(str::trim) + .find(|line| !line.is_empty()) + .unwrap_or_default() + .to_string() +} + +fn prompt_preview(prompt: &str) -> String { + let first_line = prompt_first_line(prompt); + truncate_text(first_line.trim(), SUBAGENT_PROMPT_PREVIEW_BUDGET) +} + +fn running_preview(info: &SubagentInfo) -> String { + if !info.latest_summary.trim().is_empty() { + return truncate_text(info.latest_summary.trim(), SUBAGENT_UPDATE_PREVIEW_BUDGET); + } + if !info.inflight_message.trim().is_empty() { + return truncate_text(info.inflight_message.trim(), SUBAGENT_UPDATE_PREVIEW_BUDGET); + } + if !info.latest_preview.trim().is_empty() { + return truncate_text(info.latest_preview.trim(), SUBAGENT_UPDATE_PREVIEW_BUDGET); + } + truncate_text(info.prompt_preview.trim(), SUBAGENT_PROMPT_PREVIEW_BUDGET) +} + +fn derive_subagent_name(prompt: &str, ordinal: i32) -> String { + let first_line = prompt_first_line(prompt); + let stripped = first_line + .strip_prefix("Task:") + .or_else(|| first_line.strip_prefix("task:")) + .unwrap_or(&first_line) + .trim(); + + let stopwords = [ + "the", "a", "an", "to", "and", "or", "of", "for", "from", "in", "on", "with", "read", + "file", "task", + ]; + + let tokens: Vec = stripped + .split_whitespace() + .map(clean_token) + .filter(|token| !token.is_empty()) + .filter(|token| !stopwords.contains(&token.as_str())) + .take(4) + .collect(); + + if tokens.is_empty() { + return format!("agent-{ordinal}"); + } + + let joined = tokens.join("-"); + truncate_text(&joined, /*max_graphemes*/ 40) +} + +fn clean_token(token: &str) -> String { + token + .chars() + .map(|ch| ch.to_ascii_lowercase()) + .filter(|ch| ch.is_ascii_alphanumeric() || *ch == '-') + .collect() +} diff --git a/codex-rs/tui/src/app/test_support.rs b/codex-rs/tui/src/app/test_support.rs index fd88161cad..ba13242dc3 100644 --- a/codex-rs/tui/src/app/test_support.rs +++ b/codex-rs/tui/src/app/test_support.rs @@ -13,6 +13,7 @@ pub(super) async fn make_test_app() -> App { let file_search = FileSearchManager::new(config.cwd.to_path_buf(), app_event_tx.clone()); let model = crate::legacy_core::test_support::get_model_offline(config.model.as_deref()); let session_telemetry = test_session_telemetry(&config, model.as_str()); + let animations_enabled = config.animations; App { model_catalog: chat_widget.model_catalog(), @@ -35,6 +36,7 @@ pub(super) async fn make_test_app() -> App { enhanced_keys_supported: false, keymap: crate::keymap::RuntimeKeymap::defaults(), commit_anim_running: Arc::new(AtomicBool::new(false)), + subagent_anim_running: Arc::new(AtomicBool::new(false)), status_line_invalid_items_warned: Arc::new(AtomicBool::new(false)), terminal_title_invalid_items_warned: Arc::new(AtomicBool::new(false)), backtrack: BacktrackState::default(), @@ -49,6 +51,7 @@ pub(super) async fn make_test_app() -> App { windows_sandbox: WindowsSandboxState::default(), thread_event_channels: HashMap::new(), thread_event_listener_tasks: HashMap::new(), + subagents: super::subagents::SubagentRegistry::new(animations_enabled), agent_navigation: AgentNavigationState::default(), side_threads: HashMap::new(), active_thread_id: None, diff --git a/codex-rs/tui/src/app/tests.rs b/codex-rs/tui/src/app/tests.rs index eca648251c..dda8d9212d 100644 --- a/codex-rs/tui/src/app/tests.rs +++ b/codex-rs/tui/src/app/tests.rs @@ -8,6 +8,7 @@ use crate::app_backtrack::BacktrackState; use crate::app_backtrack::user_count; use crate::app_command::AppCommand; +use super::subagents::SubagentInfo; use crate::chatwidget::ChatWidgetInit; use crate::chatwidget::create_initial_user_message; use crate::chatwidget::tests::make_chatwidget_manual_with_sender; @@ -29,6 +30,9 @@ use codex_app_server_protocol::AdditionalFileSystemPermissions; use codex_app_server_protocol::AdditionalNetworkPermissions; use codex_app_server_protocol::AdditionalPermissionProfile; use codex_app_server_protocol::AgentMessageDeltaNotification; +use codex_app_server_protocol::CollabAgentState; +use codex_app_server_protocol::CollabAgentStatus; +use codex_app_server_protocol::CollabAgentTool; use codex_app_server_protocol::CommandExecutionRequestApprovalParams; use codex_app_server_protocol::ConfigWarningNotification; use codex_app_server_protocol::FileChangeRequestApprovalParams; @@ -1225,6 +1229,159 @@ async fn token_usage_update_refreshes_status_line_with_runtime_context_window() ); } +#[tokio::test] +async fn queued_subagent_panel_update_mounts_on_fresh_chat_widget_after_thread_switch() { + let (mut app, mut app_event_rx, _op_rx) = make_test_app_with_channels().await; + let root_thread_id = ThreadId::new(); + let subagent_thread_id = ThreadId::new(); + + app.primary_thread_id = Some(root_thread_id); + app.active_thread_id = Some(root_thread_id); + app.subagents.set_root_thread(root_thread_id); + + let mut info = SubagentInfo::new( + /*ordinal*/ 1, + "watchdog-agent".to_string(), + "watchdog idle".to_string(), + /*is_watchdog*/ true, + ); + info.status = codex_protocol::protocol::AgentStatus::PendingInit; + info.latest_preview = "watchdog idle".to_string(); + info.latest_update_at = Instant::now(); + app.subagents.order.push(subagent_thread_id); + app.subagents.agents.insert(subagent_thread_id, info); + + app.sync_subagent_panel_state(); + + let queued_panel = match app_event_rx.try_recv() { + Ok(AppEvent::UpdateSubagentPanel(panel)) => panel, + other => panic!("expected queued subagent panel update, got {other:?}"), + }; + + let (fresh_chat_widget, _fresh_app_event_tx, _fresh_rx, _fresh_op_rx) = + make_chatwidget_manual_with_sender().await; + app.chat_widget = fresh_chat_widget; + app.chat_widget.set_composer_text( + "back on the root thread".to_string(), + Vec::new(), + Vec::new(), + ); + + app.chat_widget.on_subagent_panel_updated(queued_panel); + + let width = 80; + let height = app.chat_widget.desired_height(width); + let mut terminal = + ratatui::Terminal::new(crate::test_backend::VT100Backend::new(width, height)) + .expect("create terminal"); + terminal.set_viewport_area(ratatui::prelude::Rect::new(0, 0, width, height)); + terminal + .draw(|f| app.chat_widget.render(f.area(), f.buffer_mut())) + .expect("render fresh widget with queued subagent panel"); + let screen = terminal.backend().vt100().screen().contents(); + + assert!( + screen.contains("Subagents"), + "queued subagent panel update should mount on the fresh widget" + ); + assert!(screen.contains("watchdog-agent")); +} + +#[tokio::test] +async fn wait_completion_clears_subagent_status_panel() { + let (mut app, mut app_event_rx, _op_rx) = make_test_app_with_channels().await; + let root_thread_id = ThreadId::new(); + let subagent_thread_id = ThreadId::new(); + + app.primary_thread_id = Some(root_thread_id); + app.active_thread_id = Some(root_thread_id); + + while app_event_rx.try_recv().is_ok() {} + + app.process_subagent_notification_side_effects( + root_thread_id, + &ServerNotification::ItemCompleted(codex_app_server_protocol::ItemCompletedNotification { + thread_id: root_thread_id.to_string(), + turn_id: "turn-1".to_string(), + item: ThreadItem::CollabAgentToolCall { + id: "spawn-1".to_string(), + tool: CollabAgentTool::SpawnAgent, + status: codex_app_server_protocol::CollabAgentToolCallStatus::Completed, + sender_thread_id: root_thread_id.to_string(), + receiver_thread_ids: vec![subagent_thread_id.to_string()], + prompt: Some("Inspect the workspace".to_string()), + model: None, + reasoning_effort: None, + agents_states: HashMap::from([( + subagent_thread_id.to_string(), + CollabAgentState { + status: CollabAgentStatus::Running, + message: None, + }, + )]), + }, + }), + ); + + assert_matches!( + app.subagents + .agents + .get(&subagent_thread_id) + .map(|info| &info.status), + Some(codex_protocol::protocol::AgentStatus::Running) + ); + assert!( + app.subagents.panel_cell().is_some(), + "running subagent should render in the status panel" + ); + while app_event_rx.try_recv().is_ok() {} + + app.process_subagent_notification_side_effects( + root_thread_id, + &ServerNotification::ItemCompleted(codex_app_server_protocol::ItemCompletedNotification { + thread_id: root_thread_id.to_string(), + turn_id: "turn-1".to_string(), + item: ThreadItem::CollabAgentToolCall { + id: "wait-1".to_string(), + tool: CollabAgentTool::Wait, + status: codex_app_server_protocol::CollabAgentToolCallStatus::Completed, + sender_thread_id: root_thread_id.to_string(), + receiver_thread_ids: vec![subagent_thread_id.to_string()], + prompt: None, + model: None, + reasoning_effort: None, + agents_states: HashMap::from([( + subagent_thread_id.to_string(), + CollabAgentState { + status: CollabAgentStatus::Completed, + message: Some("done".to_string()), + }, + )]), + }, + }), + ); + + assert_matches!( + app.subagents + .agents + .get(&subagent_thread_id) + .map(|info| &info.status), + Some(codex_protocol::protocol::AgentStatus::Completed(Some(message))) if message == "done" + ); + assert!( + app.subagents.panel_cell().is_none(), + "completed subagent should be removed from the running status panel" + ); + let mut saw_clear_panel = false; + while let Ok(event) = app_event_rx.try_recv() { + saw_clear_panel |= matches!(event, AppEvent::ClearSubagentPanel); + } + assert!( + saw_clear_panel, + "completed subagent should clear the rendered status panel" + ); +} + #[tokio::test] async fn open_agent_picker_keeps_missing_threads_for_replay() -> Result<()> { let mut app = make_test_app().await; @@ -3686,6 +3843,7 @@ async fn make_test_app() -> App { let file_search = FileSearchManager::new(config.cwd.to_path_buf(), app_event_tx.clone()); let model = crate::legacy_core::test_support::get_model_offline(config.model.as_deref()); let session_telemetry = test_session_telemetry(&config, model.as_str()); + let animations_enabled = config.animations; App { model_catalog: chat_widget.model_catalog(), @@ -3708,6 +3866,7 @@ async fn make_test_app() -> App { enhanced_keys_supported: false, keymap: crate::keymap::RuntimeKeymap::defaults(), commit_anim_running: Arc::new(AtomicBool::new(false)), + subagent_anim_running: Arc::new(AtomicBool::new(false)), status_line_invalid_items_warned: Arc::new(AtomicBool::new(false)), terminal_title_invalid_items_warned: Arc::new(AtomicBool::new(false)), backtrack: BacktrackState::default(), @@ -3722,6 +3881,7 @@ async fn make_test_app() -> App { windows_sandbox: WindowsSandboxState::default(), thread_event_channels: HashMap::new(), thread_event_listener_tasks: HashMap::new(), + subagents: super::subagents::SubagentRegistry::new(animations_enabled), agent_navigation: AgentNavigationState::default(), side_threads: HashMap::new(), active_thread_id: None, @@ -3745,6 +3905,7 @@ async fn make_test_app_with_channels() -> ( let file_search = FileSearchManager::new(config.cwd.to_path_buf(), app_event_tx.clone()); let model = crate::legacy_core::test_support::get_model_offline(config.model.as_deref()); let session_telemetry = test_session_telemetry(&config, model.as_str()); + let animations_enabled = config.animations; ( App { @@ -3768,6 +3929,7 @@ async fn make_test_app_with_channels() -> ( enhanced_keys_supported: false, keymap: crate::keymap::RuntimeKeymap::defaults(), commit_anim_running: Arc::new(AtomicBool::new(false)), + subagent_anim_running: Arc::new(AtomicBool::new(false)), status_line_invalid_items_warned: Arc::new(AtomicBool::new(false)), terminal_title_invalid_items_warned: Arc::new(AtomicBool::new(false)), backtrack: BacktrackState::default(), @@ -3782,6 +3944,7 @@ async fn make_test_app_with_channels() -> ( windows_sandbox: WindowsSandboxState::default(), thread_event_channels: HashMap::new(), thread_event_listener_tasks: HashMap::new(), + subagents: super::subagents::SubagentRegistry::new(animations_enabled), agent_navigation: AgentNavigationState::default(), side_threads: HashMap::new(), active_thread_id: None, diff --git a/codex-rs/tui/src/app/thread_routing.rs b/codex-rs/tui/src/app/thread_routing.rs index 5957f552e2..b4ce19689d 100644 --- a/codex-rs/tui/src/app/thread_routing.rs +++ b/codex-rs/tui/src/app/thread_routing.rs @@ -60,6 +60,7 @@ impl App { self.active_thread_id = Some(thread_id); self.active_thread_rx = receiver; self.refresh_pending_thread_approvals().await; + self.sync_subagent_panel_state(); } pub(super) async fn store_active_thread_receiver(&mut self) { @@ -96,6 +97,7 @@ impl App { } self.active_thread_rx = None; self.refresh_pending_thread_approvals().await; + self.sync_subagent_panel_state(); } pub(super) async fn note_thread_outbound_op(&mut self, thread_id: ThreadId, op: &AppCommand) { @@ -1472,6 +1474,9 @@ impl App { if let ThreadBufferedEvent::Notification(notification) = &event { self.hydrate_collab_agent_metadata_for_notification(app_server, notification) .await; + if let Some(active_thread_id) = self.active_thread_id { + self.process_subagent_notification_side_effects(active_thread_id, notification); + } } self.handle_thread_event_now(event); diff --git a/codex-rs/tui/src/app_command.rs b/codex-rs/tui/src/app_command.rs index d60da69d1d..fb7b1644fb 100644 --- a/codex-rs/tui/src/app_command.rs +++ b/codex-rs/tui/src/app_command.rs @@ -30,6 +30,7 @@ use crate::permission_compat::legacy_compatible_permission_profile; #[derive(Debug, Clone, PartialEq, Serialize)] pub(crate) enum AppCommand { Interrupt, + NoteOwnerActivity, CleanBackgroundTerminals, RealtimeConversationStart(ConversationStartParams), RealtimeConversationAudio(ConversationAudioParams), @@ -115,6 +116,7 @@ pub(crate) enum AppCommand { #[allow(dead_code)] pub(crate) enum AppCommandView<'a> { Interrupt, + NoteOwnerActivity, CleanBackgroundTerminals, RealtimeConversationStart(&'a ConversationStartParams), RealtimeConversationAudio(&'a ConversationAudioParams), @@ -201,6 +203,10 @@ impl AppCommand { Self::Interrupt } + pub(crate) fn note_owner_activity() -> Self { + Self::NoteOwnerActivity + } + pub(crate) fn clean_background_terminals() -> Self { Self::CleanBackgroundTerminals } @@ -351,6 +357,7 @@ impl AppCommand { pub(crate) fn into_core(self) -> Op { match self { Self::Interrupt => Op::Interrupt, + Self::NoteOwnerActivity => Op::NoteOwnerActivity, Self::CleanBackgroundTerminals => Op::CleanBackgroundTerminals, Self::RealtimeConversationStart(params) => Op::RealtimeConversationStart(params), Self::RealtimeConversationAudio(params) => Op::RealtimeConversationAudio(params), @@ -471,6 +478,7 @@ impl AppCommand { pub(crate) fn view(&self) -> AppCommandView<'_> { match self { Self::Interrupt => AppCommandView::Interrupt, + Self::NoteOwnerActivity => AppCommandView::NoteOwnerActivity, Self::CleanBackgroundTerminals => AppCommandView::CleanBackgroundTerminals, Self::RealtimeConversationStart(params) => { AppCommandView::RealtimeConversationStart(params) @@ -590,6 +598,7 @@ impl From for AppCommand { fn from(value: Op) -> Self { match value { Op::Interrupt => Self::Interrupt, + Op::NoteOwnerActivity => Self::NoteOwnerActivity, Op::CleanBackgroundTerminals => Self::CleanBackgroundTerminals, Op::RealtimeConversationStart(params) => Self::RealtimeConversationStart(params), Op::RealtimeConversationAudio(params) => Self::RealtimeConversationAudio(params), diff --git a/codex-rs/tui/src/app_event.rs b/codex-rs/tui/src/app_event.rs index 4c351266ff..7b7dca883c 100644 --- a/codex-rs/tui/src/app_event.rs +++ b/codex-rs/tui/src/app_event.rs @@ -9,6 +9,7 @@ //! quits without reaching into the app loop or coupling to shutdown/exit sequencing. use std::path::PathBuf; +use std::sync::Arc; use codex_app_server_protocol::AddCreditsNudgeCreditType; use codex_app_server_protocol::AddCreditsNudgeEmailStatus; @@ -36,6 +37,7 @@ use crate::bottom_pane::ApprovalRequest; use crate::bottom_pane::StatusLineItem; use crate::bottom_pane::TerminalTitleItem; use crate::chatwidget::UserMessage; +use crate::history_cell::SubagentStatusCell; use codex_config::types::ApprovalsReviewer; use codex_features::Feature; use codex_plugin::PluginCapabilitySummary; @@ -459,6 +461,11 @@ pub(crate) enum AppEvent { StartCommitAnimation, StopCommitAnimation, CommitTick, + StartSubagentAnimation, + StopSubagentAnimation, + SubagentTick, + UpdateSubagentPanel(Arc), + ClearSubagentPanel, /// Update the current reasoning effort in the running app and widget. UpdateReasoningEffort(Option), diff --git a/codex-rs/tui/src/app_server_session.rs b/codex-rs/tui/src/app_server_session.rs index 96757998ff..8956946c0a 100644 --- a/codex-rs/tui/src/app_server_session.rs +++ b/codex-rs/tui/src/app_server_session.rs @@ -53,6 +53,8 @@ use codex_app_server_protocol::ThreadGoalSetResponse; use codex_app_server_protocol::ThreadGoalStatus; use codex_app_server_protocol::ThreadInjectItemsParams; use codex_app_server_protocol::ThreadInjectItemsResponse; +use codex_app_server_protocol::ThreadInputActivityParams; +use codex_app_server_protocol::ThreadInputActivityResponse; use codex_app_server_protocol::ThreadListParams; use codex_app_server_protocol::ThreadListResponse; use codex_app_server_protocol::ThreadLoadedListParams; @@ -612,6 +614,21 @@ impl AppServerSession { self.turn_interrupt(thread_id, String::new()).await } + pub(crate) async fn thread_input_activity(&mut self, thread_id: ThreadId) -> Result<()> { + let request_id = self.next_request_id(); + let _: ThreadInputActivityResponse = self + .client + .request_typed(ClientRequest::ThreadInputActivity { + request_id, + params: ThreadInputActivityParams { + thread_id: thread_id.to_string(), + }, + }) + .await + .wrap_err("thread/inputActivity failed in TUI")?; + Ok(()) + } + pub(crate) async fn turn_steer( &mut self, thread_id: ThreadId, diff --git a/codex-rs/tui/src/chatwidget.rs b/codex-rs/tui/src/chatwidget.rs index f7b21c7d14..d27ab024c0 100644 --- a/codex-rs/tui/src/chatwidget.rs +++ b/codex-rs/tui/src/chatwidget.rs @@ -57,6 +57,7 @@ use crate::bottom_pane::StatusSurfacePreviewData; use crate::bottom_pane::StatusSurfacePreviewItem; use crate::bottom_pane::TerminalTitleItem; use crate::bottom_pane::TerminalTitleSetupView; +use crate::history_cell::SubagentStatusCell; use crate::legacy_core::DEFAULT_AGENTS_MD_FILENAME; use crate::legacy_core::config::Config; use crate::legacy_core::config::Constrained; @@ -138,11 +139,16 @@ use codex_protocol::config_types::WindowsSandboxLevel; use codex_protocol::items::AgentMessageContent; use codex_protocol::items::AgentMessageItem; use codex_protocol::items::UserMessageItem; +use codex_protocol::models::ContentItem; use codex_protocol::models::MessagePhase; +use codex_protocol::models::ResponseItem; use codex_protocol::models::local_image_label_text; use codex_protocol::parse_command::ParsedCommand; use codex_protocol::plan_tool::PlanItemArg as UpdatePlanItemArg; use codex_protocol::plan_tool::StepStatus as UpdatePlanItemStatus; +use codex_protocol::protocol::AGENT_INBOX_KIND; +use codex_protocol::protocol::AGENT_INBOX_MESSAGE_PREFIX; +use codex_protocol::protocol::AgentInboxPayload; #[cfg(test)] use codex_protocol::protocol::AgentMessageDeltaEvent; #[cfg(test)] @@ -197,6 +203,8 @@ use codex_protocol::protocol::Op; use codex_protocol::protocol::PatchApplyBeginEvent; use codex_protocol::protocol::RateLimitReachedType; use codex_protocol::protocol::RateLimitSnapshot; +#[cfg(test)] +use codex_protocol::protocol::RawResponseItemEvent; use codex_protocol::protocol::ReviewRequest; use codex_protocol::protocol::ReviewTarget; use codex_protocol::protocol::SkillMetadata as ProtocolSkillMetadata; @@ -253,6 +261,7 @@ use tracing::debug; use tracing::warn; const DEFAULT_MODEL_DISPLAY_NAME: &str = "loading"; +const WATCHDOG_OWNER_ACTIVITY_SIGNAL_INTERVAL: Duration = Duration::from_secs(1); const MULTI_AGENT_ENABLE_TITLE: &str = "Enable subagents?"; const MULTI_AGENT_ENABLE_YES: &str = "Yes, enable"; const MULTI_AGENT_ENABLE_NO: &str = "Not now"; @@ -489,6 +498,37 @@ fn is_unified_exec_source(source: ExecCommandSource) -> bool { ) } +fn agent_inbox_message_from_item(item: &ResponseItem) -> Option<(Option, String)> { + match item { + ResponseItem::FunctionCallOutput { output, .. } => { + let text = output.body.to_text()?; + let payload: AgentInboxPayload = serde_json::from_str(&text).ok()?; + if !payload.injected || payload.kind != AGENT_INBOX_KIND { + return None; + } + Some((Some(payload.sender_thread_id.to_string()), payload.message)) + } + ResponseItem::Message { content, .. } => { + let text = content.iter().find_map(|item| match item { + ContentItem::InputText { text } | ContentItem::OutputText { text } => { + Some(text.as_str()) + } + _ => None, + })?; + let rest = text.strip_prefix(AGENT_INBOX_MESSAGE_PREFIX)?; + let (sender, message) = rest.split_once(']')?; + let message = message.trim_start().to_string(); + let sender = sender.trim().to_string(); + if sender.is_empty() { + Some((None, message)) + } else { + Some((Some(sender), message)) + } + } + _ => None, + } +} + fn is_standard_tool_call(parsed_cmd: &[ParsedCommand]) -> bool { !parsed_cmd.is_empty() && parsed_cmd @@ -798,6 +838,7 @@ pub(crate) struct ChatWidget { codex_op_target: CodexOpTarget, bottom_pane: BottomPane, active_cell: Option>, + subagent_panel: Option, /// Monotonic-ish counter used to invalidate transcript overlay caching. /// /// The transcript overlay appends a cached "live tail" for the current active cell. Most @@ -995,6 +1036,8 @@ pub(crate) struct ChatWidget { /// We require the second press to match this key so `Ctrl+C` followed by /// `Ctrl+D` (or vice versa) doesn't quit accidentally. quit_shortcut_key: Option, + // Last time we sent a lightweight owner-activity signal for the running thread. + last_watchdog_owner_activity_signal_at: Option, // Simple review mode flag; used to adjust layout and banners. is_review_mode: bool, // Snapshot of token usage to restore after review mode exits. @@ -1068,6 +1111,8 @@ pub(crate) struct ChatWidget { goal_status_active_turn_started_at: Option, external_editor_state: ExternalEditorState, realtime_conversation: RealtimeConversationUiState, + #[cfg(test)] + last_replayed_agent_inbox_message: Option<(Option, String)>, last_rendered_user_message_event: Option, last_non_retry_error: Option<(String, String)>, } @@ -2739,6 +2784,7 @@ impl ChatWidget { self.user_turn_pending_start = false; self.agent_turn_running = true; self.goal_status_active_turn_started_at = Some(Instant::now()); + self.last_watchdog_owner_activity_signal_at = None; self.turn_sleep_inhibitor .set_turn_running(/*turn_running*/ true); self.saw_copy_source_this_turn = false; @@ -2850,6 +2896,7 @@ impl ChatWidget { self.user_turn_pending_start = false; self.agent_turn_running = false; self.goal_status_active_turn_started_at = None; + self.last_watchdog_owner_activity_signal_at = None; self.turn_sleep_inhibitor .set_turn_running(/*turn_running*/ false); self.update_task_running_state(); @@ -4273,6 +4320,32 @@ impl ChatWidget { self.request_redraw(); } + #[cfg(test)] + fn on_raw_response_item(&mut self, event: RawResponseItemEvent, from_replay: bool) { + let Some((sender, message)) = agent_inbox_message_from_item(&event.item) else { + if from_replay { + self.last_replayed_agent_inbox_message = None; + } + return; + }; + + let replay_key = (sender.clone(), message.clone()); + if from_replay { + if self.last_replayed_agent_inbox_message.as_ref() == Some(&replay_key) { + return; + } + self.last_replayed_agent_inbox_message = Some(replay_key); + } else { + self.last_replayed_agent_inbox_message = None; + } + + let hint = sender.map(|sender| format!("from {sender}")); + self.add_to_history(history_cell::new_info_event( + format!("Agent message: {message}"), + hint, + )); + } + fn on_collab_agent_tool_call(&mut self, item: ThreadItem) { let ThreadItem::CollabAgentToolCall { id, @@ -4739,6 +4812,35 @@ impl ChatWidget { self.run_commit_tick(); } + pub(crate) fn on_subagent_panel_updated(&mut self, panel: Arc) { + let state_handle = panel.state_handle(); + + if let Some(existing) = self.subagent_panel.as_mut() { + if existing.matches_state(&state_handle) { + self.request_redraw(); + return; + } + *existing = panel.as_ref().clone(); + self.request_redraw(); + return; + } + + self.subagent_panel = Some(panel.as_ref().clone()); + self.request_redraw(); + } + + pub(crate) fn clear_subagent_panel(&mut self) { + if self.subagent_panel.take().is_some() { + self.request_redraw(); + } + } + + pub(crate) fn on_subagent_tick(&mut self) { + if self.subagent_panel.is_some() { + self.request_redraw(); + } + } + /// Runs a regular periodic commit tick. fn run_commit_tick(&mut self) { self.run_commit_tick_with_scope(CommitTickScope::AnyMode); @@ -5295,6 +5397,7 @@ impl ChatWidget { skills: None, }), active_cell, + subagent_panel: None, active_cell_revision: 0, config, effective_service_tier, @@ -5392,6 +5495,7 @@ impl ChatWidget { pending_notification: None, quit_shortcut_expires_at: None, quit_shortcut_key: None, + last_watchdog_owner_activity_signal_at: None, is_review_mode: false, pre_review_token_info: None, needs_final_message_separator: false, @@ -5424,6 +5528,8 @@ impl ChatWidget { goal_status_active_turn_started_at: None, external_editor_state: ExternalEditorState::Closed, realtime_conversation: RealtimeConversationUiState::default(), + #[cfg(test)] + last_replayed_agent_inbox_message: None, last_rendered_user_message_event: None, last_non_retry_error: None, }; @@ -5464,7 +5570,8 @@ impl ChatWidget { widget .bottom_pane .set_connectors_enabled(widget.connectors_enabled()); - widget.refresh_status_surfaces(); + widget.refresh_terminal_title(); + widget.refresh_terminal_title(); widget } @@ -5505,6 +5612,7 @@ impl ChatWidget { return; } + let composer_before = self.bottom_pane.composer_text_with_pending(); match key_event { KeyEvent { code: KeyCode::Char(c), @@ -5688,6 +5796,7 @@ impl ChatWidget { self.refresh_plan_mode_nudge(); } } + self.maybe_signal_watchdog_owner_activity_if_draft_changed(&composer_before); } /// Attach a local image to the composer when the active model supports image inputs. @@ -5712,8 +5821,10 @@ impl ChatWidget { } pub(crate) fn apply_external_edit(&mut self, text: String) { + let composer_before = self.bottom_pane.composer_text_with_pending(); self.bottom_pane.apply_external_edit(text); self.refresh_plan_mode_nudge(); + self.maybe_signal_watchdog_owner_activity_if_draft_changed(&composer_before); self.request_redraw(); } @@ -5854,8 +5965,37 @@ impl ChatWidget { } pub(crate) fn handle_paste(&mut self, text: String) { + let composer_before = self.bottom_pane.composer_text_with_pending(); self.bottom_pane.handle_paste(text); self.refresh_plan_mode_nudge(); + self.maybe_signal_watchdog_owner_activity_if_draft_changed(&composer_before); + } + + fn maybe_signal_watchdog_owner_activity_if_draft_changed(&mut self, composer_before: &str) { + if self.bottom_pane.composer_text_with_pending() == composer_before { + return; + } + self.maybe_signal_watchdog_owner_activity(); + } + + fn maybe_signal_watchdog_owner_activity(&mut self) { + if !self.agent_turn_running { + return; + } + let Some(thread_id) = self.thread_id else { + return; + }; + let now = Instant::now(); + if let Some(last_signal_at) = self.last_watchdog_owner_activity_signal_at + && now.duration_since(last_signal_at) < WATCHDOG_OWNER_ACTIVITY_SIGNAL_INTERVAL + { + return; + } + self.last_watchdog_owner_activity_signal_at = Some(now); + self.app_event_tx.send(AppEvent::SubmitThreadOp { + thread_id, + op: AppCommand::note_owner_activity().into(), + }); } // Returns true if caller should skip rendering this frame (a future frame is scheduled). @@ -5879,6 +6019,14 @@ impl ChatWidget { fn flush_active_cell(&mut self) { if let Some(active) = self.active_cell.take() { + // Subagent status is a transient panel, not transcript history. If we + // flush it into history every time another cell is inserted, the + // transcript gets spammed with repeated identical "Subagents ..." blocks. + // Keep the panel mounted so later transcript cells do not make it disappear. + if active.as_any().is::() { + self.active_cell = Some(active); + return; + } self.needs_final_message_separator = true; self.app_event_tx.send(AppEvent::InsertHistoryCell(active)); } @@ -7011,7 +7159,6 @@ impl ChatWidget { | ServerNotification::ThreadStatusChanged(_) | ServerNotification::ThreadArchived(_) | ServerNotification::ThreadUnarchived(_) - | ServerNotification::RawResponseItemCompleted(_) | ServerNotification::CommandExecOutputDelta(_) | ServerNotification::FileChangePatchUpdated(_) | ServerNotification::McpToolCallProgress(_) @@ -7028,6 +7175,15 @@ impl ChatWidget { | ServerNotification::WindowsSandboxSetupCompleted(_) | ServerNotification::AccountLoginCompleted(_) => {} ServerNotification::ContextCompacted(_) => {} + ServerNotification::RawResponseItemCompleted(notification) => { + if let Some((sender, message)) = agent_inbox_message_from_item(¬ification.item) { + let hint = sender.map(|sender| format!("from {sender}")); + self.add_to_history(history_cell::new_info_event( + format!("Agent message: {message}"), + hint, + )); + } + } } } @@ -7343,6 +7499,9 @@ impl ChatWidget { if !is_resume_initial_replay && !is_stream_error { self.restore_retry_status_header_if_present(); } + if !from_replay || !matches!(&msg, EventMsg::RawResponseItem(_)) { + self.last_replayed_agent_inbox_message = None; + } match msg { EventMsg::AgentMessageDelta(_) @@ -7611,8 +7770,8 @@ impl ChatWidget { }); } } - EventMsg::RawResponseItem(_) - | EventMsg::ItemStarted(_) + EventMsg::RawResponseItem(ev) => self.on_raw_response_item(ev, from_replay), + EventMsg::ItemStarted(_) | EventMsg::AgentMessageContentDelta(_) | EventMsg::PatchApplyUpdated(_) | EventMsg::ReasoningContentDelta(_) @@ -11950,6 +12109,13 @@ impl ChatWidget { let mut flex = FlexRenderable::new(); flex.push(/*flex*/ 1, active_cell_renderable); flex.push(/*flex*/ 0, active_hook_cell_renderable); + let subagent_panel_renderable = match &self.subagent_panel { + Some(panel) => RenderableItem::Borrowed(panel).inset(Insets::tlbr( + /*top*/ 1, /*left*/ 0, /*bottom*/ 0, /*right*/ 0, + )), + None => RenderableItem::Owned(Box::new(())), + }; + flex.push(/*flex*/ 0, subagent_panel_renderable); flex.push( /*flex*/ 0, RenderableItem::Borrowed(&self.bottom_pane).inset(Insets::tlbr( @@ -12131,7 +12297,7 @@ const SIDE_PLACEHOLDERS: [&str; 3] = [ // Extract the first bold (Markdown) element in the form **...** from `s`. // Returns the inner text if found; otherwise `None`. -fn extract_first_bold(s: &str) -> Option { +pub(crate) fn extract_first_bold(s: &str) -> Option { let bytes = s.as_bytes(); let mut i = 0usize; while i + 1 < bytes.len() { diff --git a/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__subagent_panel_mounts_while_placeholder_active_cell_exists.snap b/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__subagent_panel_mounts_while_placeholder_active_cell_exists.snap new file mode 100644 index 0000000000..ba46e2610b --- /dev/null +++ b/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__subagent_panel_mounts_while_placeholder_active_cell_exists.snap @@ -0,0 +1,18 @@ +--- +source: tui/src/chatwidget/tests/status_and_layout.rs +expression: contents +--- +╭───────────────────────────────────────╮ +│ >_ OpenAI Codex () │ +│ │ +│ model: loading /model to change │ +│ directory: /tmp/project │ +╰───────────────────────────────────────╯ + +• Subagents (no subagents running) +• [#1] [watchdog] watchdog-agent idle (59s) — watchdog idle + + +› show current subagent state + + gpt-5.3-codex default · 100% left · /tmp/project diff --git a/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__thread_snapshot_replay_agent_inbox_dedupes_compatibility_items.snap b/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__thread_snapshot_replay_agent_inbox_dedupes_compatibility_items.snap new file mode 100644 index 0000000000..40102b3e02 --- /dev/null +++ b/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__thread_snapshot_replay_agent_inbox_dedupes_compatibility_items.snap @@ -0,0 +1,6 @@ +--- +source: tui/src/chatwidget/tests.rs +assertion_line: 464 +expression: rendered +--- +• Agent message: Please review the latest diff from 019cbff7-558b-77d3-8653-8238ab5361ec diff --git a/codex-rs/tui/src/chatwidget/snapshots/codex_tui_app_server__chatwidget__tests__thread_snapshot_replay_agent_inbox_dedupes_compatibility_items.snap b/codex-rs/tui/src/chatwidget/snapshots/codex_tui_app_server__chatwidget__tests__thread_snapshot_replay_agent_inbox_dedupes_compatibility_items.snap new file mode 100644 index 0000000000..10417e9fbb --- /dev/null +++ b/codex-rs/tui/src/chatwidget/snapshots/codex_tui_app_server__chatwidget__tests__thread_snapshot_replay_agent_inbox_dedupes_compatibility_items.snap @@ -0,0 +1,5 @@ +--- +source: tui_app_server/src/chatwidget/tests.rs +expression: rendered +--- +• Agent message: Please review the latest diff from 019cbff7-558b-77d3-8653-8238ab5361ec diff --git a/codex-rs/tui/src/chatwidget/tests.rs b/codex-rs/tui/src/chatwidget/tests.rs index c1cbeae613..af0a6215da 100644 --- a/codex-rs/tui/src/chatwidget/tests.rs +++ b/codex-rs/tui/src/chatwidget/tests.rs @@ -15,6 +15,9 @@ pub(super) use crate::bottom_pane::LocalImageAttachment; pub(super) use crate::bottom_pane::MentionBinding; pub(super) use crate::bottom_pane::QueuedInputAction; pub(super) use crate::chatwidget::realtime::RealtimeConversationPhase; +pub(super) use crate::history_cell::SubagentPanelAgent; +pub(super) use crate::history_cell::SubagentPanelState; +pub(super) use crate::history_cell::SubagentStatusCell; pub(super) use crate::history_cell::UserHistoryCell; pub(super) use crate::legacy_core::config::Config; pub(super) use crate::legacy_core::config::ConfigBuilder; @@ -223,6 +226,7 @@ pub(super) use std::collections::BTreeMap; pub(super) use std::collections::HashMap; pub(super) use std::collections::HashSet; pub(super) use std::path::PathBuf; +pub(super) use std::sync::Mutex as StdMutex; pub(super) use tempfile::NamedTempFile; pub(super) use tempfile::tempdir; pub(super) use tokio::sync::mpsc::error::TryRecvError; diff --git a/codex-rs/tui/src/chatwidget/tests/helpers.rs b/codex-rs/tui/src/chatwidget/tests/helpers.rs index d26a503a89..95e06b58ad 100644 --- a/codex-rs/tui/src/chatwidget/tests/helpers.rs +++ b/codex-rs/tui/src/chatwidget/tests/helpers.rs @@ -223,6 +223,7 @@ pub(super) async fn make_chatwidget_manual( saw_copy_source_this_turn: false, running_commands: HashMap::new(), collab_agent_metadata: HashMap::new(), + subagent_panel: None, pending_collab_spawn_requests: HashMap::new(), suppressed_exec_calls: HashSet::new(), skills_all: Vec::new(), @@ -307,6 +308,7 @@ pub(super) async fn make_chatwidget_manual( last_terminal_title_requires_action: false, terminal_title_setup_original_items: None, terminal_title_animation_origin: Instant::now(), + last_watchdog_owner_activity_signal_at: None, status_line_project_root_name_cache: None, status_line_branch: None, status_line_branch_cwd: None, @@ -318,6 +320,7 @@ pub(super) async fn make_chatwidget_manual( external_editor_state: ExternalEditorState::Closed, realtime_conversation: RealtimeConversationUiState::default(), last_rendered_user_message_event: None, + last_replayed_agent_inbox_message: None, last_non_retry_error: None, }; widget.set_model(&resolved_model); diff --git a/codex-rs/tui/src/chatwidget/tests/status_and_layout.rs b/codex-rs/tui/src/chatwidget/tests/status_and_layout.rs index eff2c409e7..1fa6bbe88d 100644 --- a/codex-rs/tui/src/chatwidget/tests/status_and_layout.rs +++ b/codex-rs/tui/src/chatwidget/tests/status_and_layout.rs @@ -153,6 +153,113 @@ async fn turn_started_uses_runtime_context_window_before_first_token_count() { "expected /status to avoid raw config context window, got: {context_line}" ); } + +#[tokio::test] +async fn subagent_panel_is_not_flushed_into_transcript_history() { + let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(/*model_override*/ None).await; + + let state = Arc::new(StdMutex::new(SubagentPanelState { + started_at: Instant::now(), + total_agents: 1, + running_count: 0, + running_agents: vec![SubagentPanelAgent { + ordinal: 1, + name: "user-request-derisk-implement".to_string(), + status: AgentStatus::PendingInit, + is_watchdog: true, + watchdog_countdown_started_at: Some(Instant::now()), + preview: "watchdog idle".to_string(), + latest_update_at: Instant::now(), + }], + })); + chat.on_subagent_panel_updated(Arc::new(SubagentStatusCell::new( + Arc::clone(&state), + /*animations_enabled*/ true, + ))); + + chat.add_to_history(history_cell::new_error_event("follow-up cell".to_string())); + + let inserted = drain_insert_history(&mut rx); + assert_eq!( + inserted.len(), + 1, + "subagent panel should remain transient and not be inserted into transcript history" + ); + let rendered = lines_to_single_string(&inserted[0]); + assert!(rendered.contains("follow-up cell")); + assert!(!rendered.contains("Subagents")); + assert!( + chat.subagent_panel + .as_ref() + .is_some_and(|panel| panel.matches_state(&state)), + "subagent panel should stay mounted after other history cells are inserted" + ); +} + +#[tokio::test] +async fn subagent_panel_mounts_while_placeholder_active_cell_exists_snapshot() { + let (mut chat, _rx, _op_rx) = make_chatwidget_manual(/*model_override*/ None).await; + chat.active_cell = Some(ChatWidget::placeholder_session_header_cell( + chat.config_ref(), + )); + chat.bottom_pane.set_composer_text( + "show current subagent state".to_string(), + Vec::new(), + Vec::new(), + ); + + let state = Arc::new(StdMutex::new(SubagentPanelState { + started_at: Instant::now(), + total_agents: 1, + running_count: 0, + running_agents: vec![SubagentPanelAgent { + ordinal: 1, + name: "watchdog-agent".to_string(), + status: AgentStatus::PendingInit, + is_watchdog: true, + watchdog_countdown_started_at: Some(Instant::now()), + preview: "watchdog idle".to_string(), + latest_update_at: Instant::now(), + }], + })); + chat.on_subagent_panel_updated(Arc::new(SubagentStatusCell::new( + Arc::clone(&state), + /*animations_enabled*/ false, + ))); + + assert!( + chat.active_cell + .as_ref() + .is_some_and(|cell| cell.as_any().is::()), + "placeholder session header should remain the active cell" + ); + assert!( + chat.subagent_panel + .as_ref() + .is_some_and(|panel| panel.matches_state(&state)), + "subagent panel should mount even when another active cell already exists" + ); + + let width = 80; + let height = chat.desired_height(width); + let mut terminal = + ratatui::Terminal::new(VT100Backend::new(width, height)).expect("create terminal"); + terminal.set_viewport_area(Rect::new(0, 0, width, height)); + terminal + .draw(|f| chat.render(f.area(), f.buffer_mut())) + .expect("render chat with placeholder header and subagent panel"); + + let contents = terminal + .backend() + .vt100() + .screen() + .contents() + .replace(crate::version::CODEX_CLI_VERSION, ""); + assert_chatwidget_snapshot!( + "subagent_panel_mounts_while_placeholder_active_cell_exists", + contents + ); +} #[tokio::test] async fn helpers_are_available_and_do_not_panic() { let (tx_raw, _rx) = unbounded_channel::(); diff --git a/codex-rs/tui/src/history_cell.rs b/codex-rs/tui/src/history_cell.rs index 9d009a93fb..c890ad1bfc 100644 --- a/codex-rs/tui/src/history_cell.rs +++ b/codex-rs/tui/src/history_cell.rs @@ -27,6 +27,8 @@ use crate::render::line_utils::line_to_static; use crate::render::line_utils::prefix_lines; use crate::render::line_utils::push_owned_lines; use crate::render::renderable::Renderable; +use crate::shimmer::shimmer_spans; +use crate::status_indicator_widget::fmt_elapsed_compact; use crate::style::proposed_plan_style; use crate::style::user_message_style; #[cfg(test)] @@ -62,6 +64,7 @@ use codex_protocol::openai_models::ReasoningEffort as ReasoningEffortConfig; use codex_protocol::plan_tool::PlanItemArg; use codex_protocol::plan_tool::StepStatus; use codex_protocol::plan_tool::UpdatePlanArgs; +use codex_protocol::protocol::AgentStatus; use codex_protocol::protocol::AskForApproval; use codex_protocol::protocol::FileChange; use codex_protocol::protocol::McpAuthStatus; @@ -88,6 +91,8 @@ use std::collections::HashMap; use std::io::Cursor; use std::path::Path; use std::path::PathBuf; +use std::sync::Arc; +use std::sync::Mutex; use std::time::Duration; use std::time::Instant; use tracing::error; @@ -556,6 +561,303 @@ impl HistoryCell for PlainHistoryCell { } } +#[cfg_attr(not(test), allow(dead_code))] +#[derive(Clone, Debug)] +pub(crate) struct SubagentPanelAgent { + pub(crate) ordinal: i32, + pub(crate) name: String, + pub(crate) status: AgentStatus, + pub(crate) is_watchdog: bool, + pub(crate) watchdog_countdown_started_at: Option, + pub(crate) preview: String, + pub(crate) latest_update_at: Instant, +} + +#[cfg_attr(not(test), allow(dead_code))] +#[derive(Clone, Debug)] +pub(crate) struct SubagentPanelState { + pub(crate) started_at: Instant, + pub(crate) total_agents: i32, + pub(crate) running_count: i32, + pub(crate) running_agents: Vec, +} + +impl SubagentPanelState { + #[cfg_attr(not(test), allow(dead_code))] + pub(crate) fn running_count(&self) -> i32 { + self.running_count + } + + #[cfg_attr(not(test), allow(dead_code))] + pub(crate) fn has_animating_agents(&self, now: Instant) -> bool { + self.running_agents + .iter() + .any(|agent| should_shimmer(agent, now) || has_watchdog_countdown(agent, now)) + } +} + +#[cfg_attr(not(test), allow(dead_code))] +#[derive(Clone, Debug)] +pub(crate) struct SubagentStatusCell { + state: Arc>, + animations_enabled: bool, +} + +impl SubagentStatusCell { + #[cfg_attr(not(test), allow(dead_code))] + pub(crate) fn new( + state: Arc>, + animations_enabled: bool, + ) -> SubagentStatusCell { + SubagentStatusCell { + state, + animations_enabled, + } + } + + #[allow(dead_code)] + pub(crate) fn state_handle(&self) -> Arc> { + Arc::clone(&self.state) + } + + #[allow(dead_code)] + pub(crate) fn matches_state(&self, other: &Arc>) -> bool { + Arc::ptr_eq(&self.state, other) + } +} + +impl HistoryCell for SubagentStatusCell { + fn display_lines(&self, width: u16) -> Vec> { + let state = { + let guard = self + .state + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + guard.clone() + }; + if state.running_agents.is_empty() { + return Vec::new(); + } + + let running_count = state.running_count(); + let total_agents = state.total_agents.max(running_count); + let count_label = subagent_count_label(total_agents, running_count); + let header_suffix = if running_count > 0 { + let elapsed = fmt_elapsed_compact(state.started_at.elapsed().as_secs()); + format!("({elapsed} • {count_label} • esc to interrupt)") + } else { + format!("({count_label})") + }; + + let mut lines = Vec::new(); + lines.push(Line::from(vec![ + "• ".dim(), + "Subagents".bold(), + " ".into(), + header_suffix.dim(), + ])); + + let mut running_agents = state.running_agents; + running_agents.sort_by(|left, right| left.ordinal.cmp(&right.ordinal)); + let preview_budget = running_preview_budget(width); + let now = Instant::now(); + lines.extend(running_agents.into_iter().map(|agent| { + let preview = truncate_text(agent.preview.trim(), preview_budget); + let mut spans: Vec> = + vec!["• ".dim(), format!("[#{}] ", agent.ordinal).dim()]; + if agent.is_watchdog { + spans.push("[watchdog] ".magenta().dim()); + } + spans.push(Span::from(agent.name.clone())); + spans.push(" ".into()); + spans.push(status_span_for_panel(&agent, now)); + spans.push(" — ".dim()); + if self.animations_enabled && should_shimmer(&agent, now) { + spans.extend(shimmer_spans(&preview)); + } else { + spans.push(Span::from(preview)); + } + Line::from(spans) + })); + + lines + } + + fn transcript_animation_tick(&self) -> Option { + if !self.animations_enabled { + return None; + } + let guard = self + .state + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + let now = Instant::now(); + if !guard.has_animating_agents(now) { + return None; + } + Some((now.duration_since(guard.started_at).as_millis() / 100) as u64) + } +} + +impl Renderable for SubagentStatusCell { + fn render(&self, area: Rect, buf: &mut Buffer) { + let lines = self.display_lines(area.width); + let paragraph = Paragraph::new(Text::from(lines)).wrap(Wrap { trim: false }); + let y = if area.height == 0 { + 0 + } else { + let overflow = paragraph + .line_count(area.width) + .saturating_sub(usize::from(area.height)); + u16::try_from(overflow).unwrap_or(u16::MAX) + }; + paragraph.scroll((y, 0)).render(area, buf); + } + + fn desired_height(&self, width: u16) -> u16 { + HistoryCell::desired_height(self, width) + } +} + +#[cfg_attr(not(test), allow(dead_code))] +pub(crate) fn new_subagent_spawned_cell(name: &str, prompt_preview: &str) -> PlainHistoryCell { + let mut lines = Vec::new(); + lines.push(Line::from(vec![ + "• ".dim(), + "Spawned subagent ".into(), + Span::from(name.to_string()).bold(), + ])); + + let preview = truncate_text(prompt_preview.trim(), /*max_graphemes*/ 240); + if !preview.is_empty() { + lines.push(Line::from(vec![ + " └ ".dim(), + Span::from(format!("\"{preview}\"")).dim(), + ])); + } + + PlainHistoryCell::new(lines) +} + +#[allow(dead_code)] +pub(crate) fn new_subagent_update_cell( + name: &str, + status: &AgentStatus, + summary: &str, +) -> PlainHistoryCell { + let mut spans: Vec> = vec![ + "• ".dim(), + "Subagent update: ".into(), + Span::from(name.to_string()).bold(), + " ".into(), + status_label_span(status), + ]; + + let summary = truncate_text(summary.trim(), /*max_graphemes*/ 240); + if !summary.is_empty() { + spans.push(" — ".dim()); + spans.push(Span::from(summary)); + } + + PlainHistoryCell::new(vec![Line::from(spans)]) +} + +#[cfg_attr(not(test), allow(dead_code))] +fn running_preview_budget(width: u16) -> usize { + let width = width as usize; + width.saturating_sub(24).clamp(60, 160) +} + +#[cfg_attr(not(test), allow(dead_code))] +fn is_running_status(status: &AgentStatus) -> bool { + matches!(status, AgentStatus::PendingInit | AgentStatus::Running) +} + +#[cfg_attr(not(test), allow(dead_code))] +fn status_span_for_panel(agent: &SubagentPanelAgent, now: Instant) -> Span<'static> { + match &agent.status { + AgentStatus::PendingInit if agent.is_watchdog => { + if let Some(countdown) = watchdog_countdown_remaining(agent, now) { + format!("idle ({})", fmt_elapsed_compact(countdown.as_secs())).dim() + } else { + "idle".dim() + } + } + AgentStatus::PendingInit | AgentStatus::Running => "running".cyan().bold(), + AgentStatus::Interrupted => "interrupted".magenta(), + AgentStatus::Completed(_) => "completed".green(), + AgentStatus::Errored(_) => "errored".red(), + AgentStatus::Shutdown => "shutdown".dim(), + AgentStatus::NotFound => "not found".red(), + } +} + +#[cfg_attr(not(test), allow(dead_code))] +const SUBAGENT_SHIMMER_WINDOW: Duration = Duration::from_secs(1); +const WATCHDOG_COUNTDOWN: Duration = Duration::from_secs(60); + +#[cfg_attr(not(test), allow(dead_code))] +fn should_shimmer(agent: &SubagentPanelAgent, now: Instant) -> bool { + if agent.is_watchdog && matches!(agent.status, AgentStatus::PendingInit) { + return false; + } + is_running_status(&agent.status) + && now.saturating_duration_since(agent.latest_update_at) <= SUBAGENT_SHIMMER_WINDOW +} + +#[cfg_attr(not(test), allow(dead_code))] +fn has_watchdog_countdown(agent: &SubagentPanelAgent, now: Instant) -> bool { + watchdog_countdown_remaining(agent, now).is_some_and(|remaining| remaining > Duration::ZERO) +} + +#[cfg_attr(not(test), allow(dead_code))] +fn watchdog_countdown_remaining(agent: &SubagentPanelAgent, now: Instant) -> Option { + if !agent.is_watchdog { + return None; + } + if !matches!(agent.status, AgentStatus::PendingInit) { + return None; + } + let Some(started_at) = agent.watchdog_countdown_started_at else { + return None; + }; + let elapsed = now.saturating_duration_since(started_at); + Some(WATCHDOG_COUNTDOWN.saturating_sub(elapsed)) +} + +#[allow(dead_code)] +fn status_label_span(status: &AgentStatus) -> Span<'static> { + match status { + AgentStatus::PendingInit | AgentStatus::Running => "running".cyan().bold(), + AgentStatus::Interrupted => "interrupted".magenta(), + AgentStatus::Completed(_) => "completed".green(), + AgentStatus::Errored(_) => "errored".red(), + AgentStatus::Shutdown => "shutdown".dim(), + AgentStatus::NotFound => "not found".red(), + } +} + +#[cfg_attr(not(test), allow(dead_code))] +fn subagent_count_label(total: i32, running: i32) -> String { + if total <= 0 || running <= 0 { + return "no subagents running".to_string(); + } + let total_label = subagent_pluralize(total, "subagent"); + if running >= total { + return format!("{total_label} running"); + } + format!("{total_label}, {running} running") +} + +#[cfg_attr(not(test), allow(dead_code))] +fn subagent_pluralize(count: i32, singular: &str) -> String { + if count == 1 { + format!("1 {singular}") + } else { + format!("{count} {singular}s") + } +} + #[cfg_attr(debug_assertions, allow(dead_code))] #[derive(Debug)] pub(crate) struct UpdateAvailableHistoryCell { @@ -3001,6 +3303,7 @@ mod tests { use codex_protocol::account::PlanType; use codex_protocol::models::WebSearchAction; use codex_protocol::parse_command::ParsedCommand; + use codex_protocol::protocol::AgentStatus; use codex_protocol::protocol::AskForApproval; use codex_protocol::protocol::McpAuthStatus; use codex_protocol::protocol::SessionConfiguredEvent; @@ -3011,6 +3314,8 @@ mod tests { use serde_json::json; use std::collections::HashMap; use std::path::PathBuf; + use std::sync::Arc; + use std::sync::Mutex; use codex_protocol::mcp::CallToolResult; use codex_protocol::mcp::Tool; @@ -3655,6 +3960,125 @@ mod tests { assert_eq!(cell.desired_transcript_height(/*width*/ 80), 1); } + #[test] + fn subagent_panel_renders_watchdog_handle_as_idle() { + let state = Arc::new(Mutex::new(SubagentPanelState { + started_at: Instant::now(), + total_agents: 1, + running_count: 0, + running_agents: vec![SubagentPanelAgent { + ordinal: 1, + name: "watchdog-agent".to_string(), + status: AgentStatus::PendingInit, + is_watchdog: true, + watchdog_countdown_started_at: Some(Instant::now()), + preview: "monitor parent progress".to_string(), + latest_update_at: Instant::now(), + }], + })); + let cell = SubagentStatusCell::new(state, /*animations_enabled*/ true); + let lines = render_lines(&cell.display_lines(/*width*/ 120)); + + assert!(lines[0].contains("no subagents running")); + assert!(!lines[0].contains("esc to interrupt")); + assert!(lines[1].contains("[watchdog] watchdog-agent idle")); + assert!(lines[1].contains("idle (")); + } + + #[test] + fn subagent_panel_animation_tick_ticks_idle_watchdogs() { + let state = Arc::new(Mutex::new(SubagentPanelState { + started_at: Instant::now(), + total_agents: 1, + running_count: 0, + running_agents: vec![SubagentPanelAgent { + ordinal: 1, + name: "watchdog-agent".to_string(), + status: AgentStatus::PendingInit, + is_watchdog: true, + watchdog_countdown_started_at: Some(Instant::now()), + preview: "monitor parent progress".to_string(), + latest_update_at: Instant::now(), + }], + })); + let cell = SubagentStatusCell::new(state, /*animations_enabled*/ true); + + assert!(cell.transcript_animation_tick().is_some()); + } + + #[test] + fn subagent_panel_animation_tick_stops_after_countdown_expires() { + let state = Arc::new(Mutex::new(SubagentPanelState { + started_at: Instant::now(), + total_agents: 1, + running_count: 0, + running_agents: vec![SubagentPanelAgent { + ordinal: 1, + name: "watchdog-agent".to_string(), + status: AgentStatus::PendingInit, + is_watchdog: true, + watchdog_countdown_started_at: Some( + Instant::now() + .checked_sub(Duration::from_secs(61)) + .unwrap_or_else(Instant::now), + ), + preview: "monitor parent progress".to_string(), + latest_update_at: Instant::now(), + }], + })); + let cell = SubagentStatusCell::new(state, /*animations_enabled*/ true); + + assert_eq!(cell.transcript_animation_tick(), None); + } + + #[test] + fn subagent_panel_animation_tick_runs_for_recent_running_updates() { + let state = Arc::new(Mutex::new(SubagentPanelState { + started_at: Instant::now(), + total_agents: 1, + running_count: 1, + running_agents: vec![SubagentPanelAgent { + ordinal: 1, + name: "worker-agent".to_string(), + status: AgentStatus::Running, + is_watchdog: false, + watchdog_countdown_started_at: None, + preview: "working".to_string(), + latest_update_at: Instant::now(), + }], + })); + let cell = SubagentStatusCell::new(state, /*animations_enabled*/ true); + + assert!( + cell.transcript_animation_tick().is_some(), + "recent running updates should animate" + ); + } + + #[test] + fn subagent_panel_animation_tick_stops_when_updates_are_stale() { + let stale_update = Instant::now() + .checked_sub(Duration::from_secs(2)) + .unwrap_or_else(Instant::now); + let state = Arc::new(Mutex::new(SubagentPanelState { + started_at: Instant::now(), + total_agents: 1, + running_count: 1, + running_agents: vec![SubagentPanelAgent { + ordinal: 1, + name: "worker-agent".to_string(), + status: AgentStatus::Running, + is_watchdog: false, + watchdog_countdown_started_at: None, + preview: "working".to_string(), + latest_update_at: stale_update, + }], + })); + let cell = SubagentStatusCell::new(state, /*animations_enabled*/ true); + + assert_eq!(cell.transcript_animation_tick(), None); + } + #[test] fn prefixed_wrapped_history_cell_indents_wrapped_lines() { let summary = Line::from(vec![ diff --git a/codex-rs/tui/src/multi_agents.rs b/codex-rs/tui/src/multi_agents.rs index 293c80fcf0..f02aa758a0 100644 --- a/codex-rs/tui/src/multi_agents.rs +++ b/codex-rs/tui/src/multi_agents.rs @@ -769,6 +769,35 @@ mod tests { assert_eq!(title.spans[6].style.fg, Some(Color::Magenta)); } + #[test] + fn spawn_end_without_receiver_renders_failed_spawn_attempt() { + let sender_thread_id = ThreadId::from_string("00000000-0000-0000-0000-000000000001") + .expect("valid sender thread id"); + + let cell = spawn_end( + CollabAgentSpawnEndEvent { + call_id: "call-spawn".to_string(), + sender_thread_id, + new_thread_id: None, + new_agent_nickname: None, + new_agent_role: None, + prompt: "inspect the repo".to_string(), + model: "gpt-5".to_string(), + reasoning_effort: ReasoningEffortConfig::High, + status: AgentStatus::PendingInit, + }, + Some(&SpawnRequestSummary { + model: "gpt-5".to_string(), + reasoning_effort: ReasoningEffortConfig::High, + }), + ); + + assert_eq!( + cell_to_text(&cell), + "• Agent spawn failed\n └ inspect the repo" + ); + } + #[test] fn collab_resume_interrupted_snapshot() { let sender_thread_id = ThreadId::from_string("00000000-0000-0000-0000-000000000001") diff --git a/docs/config.md b/docs/config.md index 8dda2b6393..0f6b6f0817 100644 --- a/docs/config.md +++ b/docs/config.md @@ -66,6 +66,34 @@ Codex can run a notification hook when the agent finishes a turn. See the config When Codex knows which client started the turn, the legacy notify JSON payload also includes a top-level `client` field. The TUI reports `codex-tui`, and the app server reports the `clientInfo.name` value from `initialize`. +## Agent Inbox Delivery + +By default, inbound messages from other agents are delivered to non-subagent threads as normal +user input. If you want those handoffs to appear as explicit non-user transcript activity, you can +opt into a synthetic function-call/function-call-output envelope: + +```toml +[features] +agent_function_call_inbox = true +``` + +When enabled, Codex injects inbound agent messages into non-subagent threads as an `agent_inbox` +function-call/function-call-output pair. This is primarily a model-behavior workaround for cases +where you want a subagent handoff to start a valid turn while still being clearly marked as +non-user activity in the transcript. + +Messages sent to subagents continue to arrive as normal user input. + +## Watchdog Interval + +Watchdog agents use the top-level `watchdog_interval_s` setting to decide how long the owner thread +must be idle before a check-in helper is spawned when the `agent_watchdog` feature is enabled. +Owner-side typing/input activity resets the idle timer. + +```toml +watchdog_interval_s = 10 +``` + ## JSON Schema The generated JSON Schema for `config.toml` lives at `codex-rs/core/config.schema.json`. @@ -118,4 +146,20 @@ developer message Codex inserts when realtime becomes active. It only affects the realtime start message in prompt history and does not change websocket backend prompt settings or the realtime end/inactive message. +## Custom model aliases + +You can add aliases to the model picker via `custom_models` in `~/.codex/config.toml`. +Each entry maps a user-facing alias to a provider-facing model slug and can override context settings: + +```toml +[[custom_models]] +name = "gpt-5.4 1m" +model = "gpt-5.4" +model_context_window = 1000000 +model_auto_compact_token_limit = 900000 +``` + +When selected, Codex sends `model = "gpt-5.4"` to the backend while using your +alias-specific context overrides for that session. + Ctrl+C/Ctrl+D quitting uses a ~1 second double-press hint (`ctrl + c again to quit`). diff --git a/patches/aws-lc-sys_memcmp_check.patch b/patches/aws-lc-sys_memcmp_check.patch index e9d0a441fd..4d59cec140 100644 --- a/patches/aws-lc-sys_memcmp_check.patch +++ b/patches/aws-lc-sys_memcmp_check.patch @@ -7,13 +7,13 @@ diff --git a/builder/cc_builder.rs b/builder/cc_builder.rs use std::collections::HashMap; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; - + #[non_exhaustive] #[derive(PartialEq, Eq)] @@ -681,6 +681,16 @@ } let mut memcmp_compile_args = Vec::from(memcmp_compiler.args()); - + + // Keep the probe self-contained and avoid invoking external debug tools + // (for example `dsymutil`) that may be missing in hermetic sandboxes. + memcmp_compile_args.retain(|arg| { @@ -30,7 +30,7 @@ diff --git a/builder/cc_builder.rs b/builder/cc_builder.rs @@ -692,6 +702,15 @@ } } - + + if let Some(execroot) = Self::bazel_execroot(self.manifest_dir.as_path()) { + // In Bazel build-script sandboxes, `cc` can pass `bazel-out/...` args + // relative to the execroot while the process runs from elsewhere. diff --git a/patches/aws-lc-sys_windows_msvc_memcmp_probe.patch b/patches/aws-lc-sys_windows_msvc_memcmp_probe.patch index d244e9418f..3cc468e406 100644 --- a/patches/aws-lc-sys_windows_msvc_memcmp_probe.patch +++ b/patches/aws-lc-sys_windows_msvc_memcmp_probe.patch @@ -17,7 +17,7 @@ diff --git a/builder/cc_builder.rs b/builder/cc_builder.rs + emit_warning("Skipping memcmp probe for Bazel windows-msvc build scripts."); + return; + } - + let basename = "memcmp_invalid_stripped_check"; let exec_path = out_dir().join(basename); let memcmp_build = cc::Build::default(); @@ -30,7 +30,7 @@ diff --git a/builder/cc_builder.rs b/builder/cc_builder.rs memcmp_compile_args.push(flag.into()); } } - + - if let Some(execroot) = Self::bazel_execroot(self.manifest_dir.as_path()) { + if let Some(execroot) = bazel_execroot { // In Bazel build-script sandboxes, `cc` can pass `bazel-out/...` args diff --git a/patches/llvm_windows_symlink_extract.patch b/patches/llvm_windows_symlink_extract.patch index 9f548636b2..0285aeb131 100644 --- a/patches/llvm_windows_symlink_extract.patch +++ b/patches/llvm_windows_symlink_extract.patch @@ -14,7 +14,7 @@ index 89dcf81..cf27c92 100644 + "llvm/utils/mlgo-utils/combine_training_corpus.py", + "llvm/docs/_themes/llvm-theme", ] - + test_docs_subprojects = [ diff --git a/runtimes/mingw/BUILD.bazel b/runtimes/mingw/BUILD.bazel index ebd99db..9eb5d5b 100644 @@ -23,7 +23,7 @@ index ebd99db..9eb5d5b 100644 @@ -334,6 +334,30 @@ stub_library( name = "stdc++", ) - + +# Clang may inject -lssp and -lssp_nonshared for windows-gnu links. +# Provide compatibility archives in the MinGW runtime search directory. +stub_library( diff --git a/patches/rules_rust_windows_exec_bin_target.patch b/patches/rules_rust_windows_exec_bin_target.patch index e4cf306dff..7810ff46d5 100644 --- a/patches/rules_rust_windows_exec_bin_target.patch +++ b/patches/rules_rust_windows_exec_bin_target.patch @@ -13,7 +13,7 @@ diff --git a/rust/private/rustc.bzl b/rust/private/rustc.bzl @@ -129,6 +129,20 @@ build_setting = config.bool(flag = True), ) - + -def _get_rustc_env(attr, toolchain, crate_name): +def _effective_target_arch(toolchain, use_exec_target): + return toolchain.exec_triple.arch if use_exec_target else toolchain.target_arch @@ -31,9 +31,9 @@ diff --git a/rust/private/rustc.bzl b/rust/private/rustc.bzl + +def _get_rustc_env(attr, toolchain, crate_name, use_exec_target = False): """Gathers rustc environment variables - + @@ -147,6 +161,6 @@ - + result = { - "CARGO_CFG_TARGET_ARCH": "" if toolchain.target_arch == None else toolchain.target_arch, - "CARGO_CFG_TARGET_OS": "" if toolchain.target_os == None else toolchain.target_os, @@ -44,15 +44,15 @@ diff --git a/rust/private/rustc.bzl b/rust/private/rustc.bzl @@ -997,9 +1011,11 @@ if build_metadata and not use_json_output: fail("build_metadata requires parse_json_output") - + + use_exec_target = is_exec_configuration(ctx) and crate_info.type == "bin" + output_dir = getattr(crate_info.output, "dirname", None) linker_script = getattr(file, "linker_script", None) - + - env = _get_rustc_env(attr, toolchain, crate_info.name) + env = _get_rustc_env(attr, toolchain, crate_info.name, use_exec_target) - + # Wrapper args first @@ -1138,5 +1154,5 @@ if error_format != "json": @@ -64,7 +64,7 @@ diff --git a/rust/private/rustc.bzl b/rust/private/rustc.bzl @@ -1144,6 +1160,6 @@ if linker_script: rustc_flags.add(linker_script, format = "--codegen=link-arg=-T%s") - + # Tell Rustc where to find the standard library (or libcore) - rustc_flags.add_all(toolchain.rust_std_paths, before_each = "-L", format_each = "%s") + rustc_flags.add_all(_effective_rust_std_paths(toolchain, use_exec_target), before_each = "-L", format_each = "%s") diff --git a/patches/rules_rust_windows_gnullvm_build_script.patch b/patches/rules_rust_windows_gnullvm_build_script.patch index a1ed9bf145..1d0d8219cb 100644 --- a/patches/rules_rust_windows_gnullvm_build_script.patch +++ b/patches/rules_rust_windows_gnullvm_build_script.patch @@ -4,7 +4,7 @@ diff --git a/cargo/private/cargo_build_script.bzl b/cargo/private/cargo_build_sc @@ -120,6 +120,63 @@ executable = True, ) - + +def _strip_stack_protector_for_windows_llvm_mingw(toolchain, args): + """Drop stack protector flags unsupported by llvm-mingw build-script probes.""" + if "windows-gnullvm" not in toolchain.target_flag_value: @@ -64,11 +64,11 @@ diff --git a/cargo/private/cargo_build_script.bzl b/cargo/private/cargo_build_sc + def get_cc_compile_args_and_env(cc_toolchain, feature_configuration): """Gather cc environment variables from the given `cc_toolchain` - + @@ -503,6 +560,10 @@ if not env["AR"]: env["AR"] = cc_toolchain.ar_executable - + + cc_c_args = _strip_stack_protector_for_windows_llvm_mingw(toolchain, cc_c_args) + cc_cxx_args = _strip_stack_protector_for_windows_llvm_mingw(toolchain, cc_cxx_args) + cc_c_args = _rewrite_windows_exec_msvc_cc_args(toolchain, cc_c_args) diff --git a/patches/rusty_v8_prebuilt_out_dir.patch b/patches/rusty_v8_prebuilt_out_dir.patch index 4242a29beb..522e5aa60f 100644 --- a/patches/rusty_v8_prebuilt_out_dir.patch +++ b/patches/rusty_v8_prebuilt_out_dir.patch @@ -6,7 +6,7 @@ +++ b/build.rs @@ -543,10 +543,15 @@ } - + fn static_lib_name(suffix: &str) -> String { - let target_os = env::var("CARGO_CFG_TARGET_OS").unwrap(); - if target_os == "windows" { @@ -25,7 +25,7 @@ @@ -577,7 +577,23 @@ path } - + +fn out_dir_abs() -> PathBuf { + let cwd = env::current_dir().unwrap(); + @@ -45,10 +45,10 @@ + build_dir().join("gn_out").join("obj") } - + @@ -794,22 +810,23 @@ } - + fn print_link_flags() { + let target = env::var("TARGET").unwrap(); println!("cargo:rustc-link-lib=static=rusty_v8"); @@ -60,7 +60,7 @@ .split_whitespace() .any(|ba| ba == "use_custom_libcxx=false") }); - + if should_dyn_link_libcxx { // Based on https://github.com/alexcrichton/cc-rs/blob/fba7feded71ee4f63cfe885673ead6d7b4f2f454/src/lib.rs#L2462 if let Ok(stdlib) = env::var("CXXSTDLIB") { diff --git a/patches/v8_module_deps.patch b/patches/v8_module_deps.patch index 8f0fed9cf4..182798ce8b 100644 --- a/patches/v8_module_deps.patch +++ b/patches/v8_module_deps.patch @@ -63,13 +63,13 @@ diff --git a/orig/v8-14.6.202.11/MODULE.bazel b/mod/v8-14.6.202.11/MODULE.bazel + commit = "3d2de1816307bac63c16a297e8c4dc501b4076df", + remote = "https://chromium.googlesource.com/external/github.com/Maratyszcza/FP16.git", +) - + pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip") pip.parse( @@ -22,171 +80,3 @@ ) use_repo(pip, "v8_python_deps") - + -# Define the local LLVM toolchain repository -llvm_toolchain_repository = use_repo_rule("//bazel/toolchain:llvm_repository.bzl", "llvm_toolchain_repository") - @@ -249,22 +249,22 @@ new file mode 100644 +@@ -2,7 +2,7 @@ + load("@bazel_skylib//lib:selects.bzl", "selects") + load("@rules_license//rules:license.bzl", "license") -+ ++ +-load("@rules_cc//cc:defs.bzl", "cc_test") ++load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library", "cc_test") + # Placeholder#2 for Guitar, do not remove -+ ++ + package( +@@ -134,11 +134,7 @@ COPTS = select({ + ], + }) -+ ++ +-DEFINES = select({ +- ":compiler_msvc": ["HWY_SHARED_DEFINE"], +- ":compiler_clangcl": ["HWY_SHARED_DEFINE"], +- "//conditions:default": [], +-}) ++DEFINES = [] -+ ++ + # Unused on Bazel builds, where this is not defined/known; Copybara replaces + # usages with an empty list. diff --git a/patches/v8_source_portability.patch b/patches/v8_source_portability.patch index 4f5f46005f..bc5b2c8044 100644 --- a/patches/v8_source_portability.patch +++ b/patches/v8_source_portability.patch @@ -8,7 +8,7 @@ index 179a10f..4791e96 100644 +++ b/mod/v8-14.6.202.11/src/base/bits.h @@ -270,11 +270,17 @@ inline constexpr uint32_t RoundDownToPowerOfTwo32(uint32_t value) { } - + // Precondition: 0 <= shift < 32 +#ifdef RotateRight32 +#undef RotateRight32 @@ -16,7 +16,7 @@ index 179a10f..4791e96 100644 inline constexpr uint32_t RotateRight32(uint32_t value, uint32_t shift) { return (value >> shift) | (value << ((32 - shift) & 31)); } - + // Precondition: 0 <= shift < 32 +#ifdef RotateLeft32 +#undef RotateLeft32 @@ -31,7 +31,7 @@ index 6176ed4..a02043d 100644 @@ -64,6 +64,7 @@ namespace { volatile sig_atomic_t in_signal_handler = 0; bool dump_stack_in_signal_handler = true; - + +#if HAVE_EXECINFO_H // The prefix used for mangled symbols, per the Itanium C++ ABI: // http://www.codesourcery.com/cxx-abi/abi.html#mangling @@ -39,7 +39,7 @@ index 6176ed4..a02043d 100644 @@ -73,7 +74,6 @@ const char kMangledSymbolPrefix[] = "_Z"; const char kSymbolCharacters[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; - + -#if HAVE_EXECINFO_H // Demangles C++ symbols in the given text. Example: // @@ -49,14 +49,14 @@ index 861cfe4..1e73954 100644 --- a/orig/v8-14.6.202.11/src/base/export-template.h +++ b/mod/v8-14.6.202.11/src/base/export-template.h @@ -153,8 +153,10 @@ - + EXPORT_TEMPLATE_TEST(DEFAULT, ); EXPORT_TEMPLATE_TEST(DEFAULT, __attribute__((visibility("default")))); +#if defined(_MSC_VER) EXPORT_TEMPLATE_TEST(MSVC_HACK, __declspec(dllexport)); EXPORT_TEMPLATE_TEST(DEFAULT, __declspec(dllimport)); +#endif - + #undef EXPORT_TEMPLATE_TEST #undef EXPORT_TEMPLATE_TEST_DEFAULT_DEFAULT diff --git a/orig/v8-14.6.202.11/src/base/platform/platform-posix.cc b/mod/v8-14.6.202.11/src/base/platform/platform-posix.cc @@ -65,12 +65,12 @@ index 420df0b..6f47969 100644 +++ b/mod/v8-14.6.202.11/src/base/platform/platform-posix.cc @@ -95,7 +95,7 @@ extern int madvise(caddr_t, size_t, int); #endif - + #if defined(V8_LIBC_GLIBC) -extern "C" void* __libc_stack_end; +extern "C" void* __libc_stack_end V8_WEAK; #endif - + namespace v8 { @@ -1476,7 +1476,8 @@ Stack::StackSlot Stack::ObtainCurrentThreadStackStart() { // __libc_stack_end is process global and thus is only valid for @@ -94,13 +94,13 @@ index f5d9ddc..542ea1a 100644 - -#ifndef __MINGW64_VERSION_MAJOR +#if !defined(__MINGW64_VERSION_MAJOR) - + #define _TRUNCATE 0 #define STRUNCATE 80 @@ -81,9 +79,6 @@ inline void MemoryFence() { __asm__ __volatile__("xchgl %%eax,%0 ":"=r" (barrier)); } - + -#endif // __MINGW64_VERSION_MAJOR - - @@ -110,25 +110,25 @@ index f5d9ddc..542ea1a 100644 @@ -134,6 +129,8 @@ int strncpy_s(char* dest, size_t dest_size, const char* source, size_t count) { return 0; } - + +#endif // !defined(__MINGW64_VERSION_MAJOR) + #endif // __MINGW32__ - + namespace v8 { @@ -743,8 +740,10 @@ void OS::StrNCpy(char* dest, int length, const char* src, size_t n) { } - - + + +#if defined(__MINGW32__) && !defined(__MINGW64_VERSION_MAJOR) #undef _TRUNCATE #undef STRUNCATE +#endif - + DEFINE_LAZY_LEAKY_OBJECT_GETTER(RandomNumberGenerator, GetPlatformRandomNumberGenerator) @@ -1894,3 +1893,4 @@ Stack::StackSlot Stack::GetCurrentStackPosition() { - + } // namespace base } // namespace v8 + @@ -294,9 +294,9 @@ index bda0e43..b44f1d9 100644 --- a/orig/v8-14.6.202.11/src/libplatform/default-thread-isolated-allocator.cc +++ b/mod/v8-14.6.202.11/src/libplatform/default-thread-isolated-allocator.cc @@ -23,7 +23,7 @@ extern int pkey_free(int pkey) V8_WEAK; - + namespace { - + -bool KernelHasPkruFix() { +[[maybe_unused]] bool KernelHasPkruFix() { // PKU was broken on Linux kernels before 5.13 (see @@ -319,9 +319,9 @@ index 27e748f..cbf886c 100644 +#endif // !V8_CC_MSVC || V8_OS_WIN memset(&gc_subroot_entries_, 0, sizeof(gc_subroot_entries_)); } - + @@ -3878,3 +3878,4 @@ void HeapSnapshotJSONSerializer::SerializeLocations() { } - + } // namespace v8::internal + diff --git a/patches/windows-link.patch b/patches/windows-link.patch index 92423d1fe8..a3288bcfb1 100644 --- a/patches/windows-link.patch +++ b/patches/windows-link.patch @@ -6,5 +6,5 @@ index 2d5a2a2..6e8c4cd 100644 -#![doc = include_str!("../readme.md")] +#![doc = "windows-link"] #![no_std] - + /// Defines an external function to import.