mirror of
https://github.com/openai/codex.git
synced 2026-04-11 16:24:49 +00:00
Compare commits
5 Commits
codex-debu
...
dev/steve/
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4b14ab36b9 | ||
|
|
81f2b2a0d9 | ||
|
|
814f0623a5 | ||
|
|
15ba6609c9 | ||
|
|
dd9cc542ed |
@@ -33,6 +33,31 @@
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"additionalProperties": false,
|
||||
"description": "User has approved this command, but wants the agent to execute a replacement command instead of the originally proposed one.",
|
||||
"properties": {
|
||||
"approved_with_command_override": {
|
||||
"properties": {
|
||||
"command": {
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"command"
|
||||
],
|
||||
"type": "object"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"approved_with_command_override"
|
||||
],
|
||||
"title": "ApprovedWithCommandOverrideReviewDecision",
|
||||
"type": "object"
|
||||
},
|
||||
{
|
||||
"additionalProperties": false,
|
||||
"description": "User has approved this command and wants to apply the proposed execpolicy amendment so future matching commands are permitted.",
|
||||
|
||||
@@ -1790,6 +1790,34 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
"SdkDelegationConfig": {
|
||||
"properties": {
|
||||
"bridgeUrl": {
|
||||
"description": "Base URL for the host-managed Responses bridge reachable by the Codex runtime.",
|
||||
"type": "string"
|
||||
},
|
||||
"modelProviderId": {
|
||||
"description": "Optional model-provider id to register for this thread. Defaults to `codex-sdk-v2`.",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"streamIdleTimeoutMs": {
|
||||
"description": "Optional stream idle timeout override for the delegated provider.",
|
||||
"format": "uint64",
|
||||
"minimum": 0.0,
|
||||
"type": [
|
||||
"integer",
|
||||
"null"
|
||||
]
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"bridgeUrl"
|
||||
],
|
||||
"type": "object"
|
||||
},
|
||||
"ServiceTier": {
|
||||
"enum": [
|
||||
"fast",
|
||||
|
||||
@@ -216,6 +216,31 @@
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"additionalProperties": false,
|
||||
"description": "User approved execution, but wants to replace the command before it runs.",
|
||||
"properties": {
|
||||
"acceptWithCommandOverride": {
|
||||
"properties": {
|
||||
"command": {
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"command"
|
||||
],
|
||||
"type": "object"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"acceptWithCommandOverride"
|
||||
],
|
||||
"title": "AcceptWithCommandOverrideCommandExecutionApprovalDecision",
|
||||
"type": "object"
|
||||
},
|
||||
{
|
||||
"description": "User approved the command and future prompts in the same session-scoped approval cache should run without prompting.",
|
||||
"enum": [
|
||||
|
||||
@@ -10,6 +10,31 @@
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"additionalProperties": false,
|
||||
"description": "User approved execution, but wants to replace the command before it runs.",
|
||||
"properties": {
|
||||
"acceptWithCommandOverride": {
|
||||
"properties": {
|
||||
"command": {
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"command"
|
||||
],
|
||||
"type": "object"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"acceptWithCommandOverride"
|
||||
],
|
||||
"title": "AcceptWithCommandOverrideCommandExecutionApprovalDecision",
|
||||
"type": "object"
|
||||
},
|
||||
{
|
||||
"description": "User approved the command and future prompts in the same session-scoped approval cache should run without prompting.",
|
||||
"enum": [
|
||||
|
||||
@@ -5252,6 +5252,31 @@
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"additionalProperties": false,
|
||||
"description": "User has approved this command, but wants the agent to execute a replacement command instead of the originally proposed one.",
|
||||
"properties": {
|
||||
"approved_with_command_override": {
|
||||
"properties": {
|
||||
"command": {
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"command"
|
||||
],
|
||||
"type": "object"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"approved_with_command_override"
|
||||
],
|
||||
"title": "ApprovedWithCommandOverrideReviewDecision",
|
||||
"type": "object"
|
||||
},
|
||||
{
|
||||
"additionalProperties": false,
|
||||
"description": "User has approved this command and wants to apply the proposed execpolicy amendment so future matching commands are permitted.",
|
||||
|
||||
@@ -33,6 +33,31 @@
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"additionalProperties": false,
|
||||
"description": "User has approved this command, but wants the agent to execute a replacement command instead of the originally proposed one.",
|
||||
"properties": {
|
||||
"approved_with_command_override": {
|
||||
"properties": {
|
||||
"command": {
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"command"
|
||||
],
|
||||
"type": "object"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"approved_with_command_override"
|
||||
],
|
||||
"title": "ApprovedWithCommandOverrideReviewDecision",
|
||||
"type": "object"
|
||||
},
|
||||
{
|
||||
"additionalProperties": false,
|
||||
"description": "User has approved this command and wants to apply the proposed execpolicy amendment so future matching commands are permitted.",
|
||||
|
||||
@@ -1450,6 +1450,25 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
"SdkDelegationConfiguredNotification": {
|
||||
"properties": {
|
||||
"bridgeUrl": {
|
||||
"type": "string"
|
||||
},
|
||||
"modelProvider": {
|
||||
"type": "string"
|
||||
},
|
||||
"threadId": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"bridgeUrl",
|
||||
"modelProvider",
|
||||
"threadId"
|
||||
],
|
||||
"type": "object"
|
||||
},
|
||||
"ServerRequestResolvedNotification": {
|
||||
"properties": {
|
||||
"requestId": {
|
||||
@@ -3648,6 +3667,26 @@
|
||||
"title": "App/list/updatedNotification",
|
||||
"type": "object"
|
||||
},
|
||||
{
|
||||
"properties": {
|
||||
"method": {
|
||||
"enum": [
|
||||
"codexSdk/delegationConfigured"
|
||||
],
|
||||
"title": "CodexSdk/delegationConfiguredNotificationMethod",
|
||||
"type": "string"
|
||||
},
|
||||
"params": {
|
||||
"$ref": "#/definitions/SdkDelegationConfiguredNotification"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"method",
|
||||
"params"
|
||||
],
|
||||
"title": "CodexSdk/delegationConfiguredNotification",
|
||||
"type": "object"
|
||||
},
|
||||
{
|
||||
"properties": {
|
||||
"method": {
|
||||
|
||||
@@ -282,6 +282,31 @@
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"additionalProperties": false,
|
||||
"description": "User approved execution, but wants to replace the command before it runs.",
|
||||
"properties": {
|
||||
"acceptWithCommandOverride": {
|
||||
"properties": {
|
||||
"command": {
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"command"
|
||||
],
|
||||
"type": "object"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"acceptWithCommandOverride"
|
||||
],
|
||||
"title": "AcceptWithCommandOverrideCommandExecutionApprovalDecision",
|
||||
"type": "object"
|
||||
},
|
||||
{
|
||||
"description": "User approved the command and future prompts in the same session-scoped approval cache should run without prompting.",
|
||||
"enum": [
|
||||
|
||||
@@ -1514,6 +1514,31 @@
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"additionalProperties": false,
|
||||
"description": "User approved execution, but wants to replace the command before it runs.",
|
||||
"properties": {
|
||||
"acceptWithCommandOverride": {
|
||||
"properties": {
|
||||
"command": {
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"command"
|
||||
],
|
||||
"type": "object"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"acceptWithCommandOverride"
|
||||
],
|
||||
"title": "AcceptWithCommandOverrideCommandExecutionApprovalDecision",
|
||||
"type": "object"
|
||||
},
|
||||
{
|
||||
"description": "User approved the command and future prompts in the same session-scoped approval cache should run without prompting.",
|
||||
"enum": [
|
||||
@@ -6587,6 +6612,31 @@
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"additionalProperties": false,
|
||||
"description": "User has approved this command, but wants the agent to execute a replacement command instead of the originally proposed one.",
|
||||
"properties": {
|
||||
"approved_with_command_override": {
|
||||
"properties": {
|
||||
"command": {
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"command"
|
||||
],
|
||||
"type": "object"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"approved_with_command_override"
|
||||
],
|
||||
"title": "ApprovedWithCommandOverrideReviewDecision",
|
||||
"type": "object"
|
||||
},
|
||||
{
|
||||
"additionalProperties": false,
|
||||
"description": "User has approved this command and wants to apply the proposed execpolicy amendment so future matching commands are permitted.",
|
||||
@@ -7261,6 +7311,26 @@
|
||||
"title": "App/list/updatedNotification",
|
||||
"type": "object"
|
||||
},
|
||||
{
|
||||
"properties": {
|
||||
"method": {
|
||||
"enum": [
|
||||
"codexSdk/delegationConfigured"
|
||||
],
|
||||
"title": "CodexSdk/delegationConfiguredNotificationMethod",
|
||||
"type": "string"
|
||||
},
|
||||
"params": {
|
||||
"$ref": "#/definitions/v2/SdkDelegationConfiguredNotification"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"method",
|
||||
"params"
|
||||
],
|
||||
"title": "CodexSdk/delegationConfiguredNotification",
|
||||
"type": "object"
|
||||
},
|
||||
{
|
||||
"properties": {
|
||||
"method": {
|
||||
@@ -13161,6 +13231,55 @@
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"SdkDelegationConfig": {
|
||||
"properties": {
|
||||
"bridgeUrl": {
|
||||
"description": "Base URL for the host-managed Responses bridge reachable by the Codex runtime.",
|
||||
"type": "string"
|
||||
},
|
||||
"modelProviderId": {
|
||||
"description": "Optional model-provider id to register for this thread. Defaults to `codex-sdk-v2`.",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"streamIdleTimeoutMs": {
|
||||
"description": "Optional stream idle timeout override for the delegated provider.",
|
||||
"format": "uint64",
|
||||
"minimum": 0.0,
|
||||
"type": [
|
||||
"integer",
|
||||
"null"
|
||||
]
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"bridgeUrl"
|
||||
],
|
||||
"type": "object"
|
||||
},
|
||||
"SdkDelegationConfiguredNotification": {
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"properties": {
|
||||
"bridgeUrl": {
|
||||
"type": "string"
|
||||
},
|
||||
"modelProvider": {
|
||||
"type": "string"
|
||||
},
|
||||
"threadId": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"bridgeUrl",
|
||||
"modelProvider",
|
||||
"threadId"
|
||||
],
|
||||
"title": "SdkDelegationConfiguredNotification",
|
||||
"type": "object"
|
||||
},
|
||||
"ServerRequestResolvedNotification": {
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"properties": {
|
||||
|
||||
@@ -9801,6 +9801,31 @@
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"additionalProperties": false,
|
||||
"description": "User has approved this command, but wants the agent to execute a replacement command instead of the originally proposed one.",
|
||||
"properties": {
|
||||
"approved_with_command_override": {
|
||||
"properties": {
|
||||
"command": {
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"command"
|
||||
],
|
||||
"type": "object"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"approved_with_command_override"
|
||||
],
|
||||
"title": "ApprovedWithCommandOverrideReviewDecision",
|
||||
"type": "object"
|
||||
},
|
||||
{
|
||||
"additionalProperties": false,
|
||||
"description": "User has approved this command and wants to apply the proposed execpolicy amendment so future matching commands are permitted.",
|
||||
@@ -10245,6 +10270,55 @@
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"SdkDelegationConfig": {
|
||||
"properties": {
|
||||
"bridgeUrl": {
|
||||
"description": "Base URL for the host-managed Responses bridge reachable by the Codex runtime.",
|
||||
"type": "string"
|
||||
},
|
||||
"modelProviderId": {
|
||||
"description": "Optional model-provider id to register for this thread. Defaults to `codex-sdk-v2`.",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"streamIdleTimeoutMs": {
|
||||
"description": "Optional stream idle timeout override for the delegated provider.",
|
||||
"format": "uint64",
|
||||
"minimum": 0.0,
|
||||
"type": [
|
||||
"integer",
|
||||
"null"
|
||||
]
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"bridgeUrl"
|
||||
],
|
||||
"type": "object"
|
||||
},
|
||||
"SdkDelegationConfiguredNotification": {
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"properties": {
|
||||
"bridgeUrl": {
|
||||
"type": "string"
|
||||
},
|
||||
"modelProvider": {
|
||||
"type": "string"
|
||||
},
|
||||
"threadId": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"bridgeUrl",
|
||||
"modelProvider",
|
||||
"threadId"
|
||||
],
|
||||
"title": "SdkDelegationConfiguredNotification",
|
||||
"type": "object"
|
||||
},
|
||||
"ServerNotification": {
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"description": "Notification sent from the server to the client.",
|
||||
@@ -10771,6 +10845,26 @@
|
||||
"title": "App/list/updatedNotification",
|
||||
"type": "object"
|
||||
},
|
||||
{
|
||||
"properties": {
|
||||
"method": {
|
||||
"enum": [
|
||||
"codexSdk/delegationConfigured"
|
||||
],
|
||||
"title": "CodexSdk/delegationConfiguredNotificationMethod",
|
||||
"type": "string"
|
||||
},
|
||||
"params": {
|
||||
"$ref": "#/definitions/SdkDelegationConfiguredNotification"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"method",
|
||||
"params"
|
||||
],
|
||||
"title": "CodexSdk/delegationConfiguredNotification",
|
||||
"type": "object"
|
||||
},
|
||||
{
|
||||
"properties": {
|
||||
"method": {
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"properties": {
|
||||
"bridgeUrl": {
|
||||
"type": "string"
|
||||
},
|
||||
"modelProvider": {
|
||||
"type": "string"
|
||||
},
|
||||
"threadId": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"bridgeUrl",
|
||||
"modelProvider",
|
||||
"threadId"
|
||||
],
|
||||
"title": "SdkDelegationConfiguredNotification",
|
||||
"type": "object"
|
||||
}
|
||||
@@ -76,6 +76,34 @@
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
"SdkDelegationConfig": {
|
||||
"properties": {
|
||||
"bridgeUrl": {
|
||||
"description": "Base URL for the host-managed Responses bridge reachable by the Codex runtime.",
|
||||
"type": "string"
|
||||
},
|
||||
"modelProviderId": {
|
||||
"description": "Optional model-provider id to register for this thread. Defaults to `codex-sdk-v2`.",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"streamIdleTimeoutMs": {
|
||||
"description": "Optional stream idle timeout override for the delegated provider.",
|
||||
"format": "uint64",
|
||||
"minimum": 0.0,
|
||||
"type": [
|
||||
"integer",
|
||||
"null"
|
||||
]
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"bridgeUrl"
|
||||
],
|
||||
"type": "object"
|
||||
},
|
||||
"ServiceTier": {
|
||||
"enum": [
|
||||
"fast",
|
||||
|
||||
@@ -7,4 +7,4 @@ import type { NetworkPolicyAmendment } from "./NetworkPolicyAmendment";
|
||||
/**
|
||||
* User's decision in response to an ExecApprovalRequest.
|
||||
*/
|
||||
export type ReviewDecision = "approved" | { "approved_execpolicy_amendment": { proposed_execpolicy_amendment: ExecPolicyAmendment, } } | "approved_for_session" | { "network_policy_amendment": { network_policy_amendment: NetworkPolicyAmendment, } } | "denied" | "abort";
|
||||
export type ReviewDecision = "approved" | { "approved_with_command_override": { command: Array<string>, } } | { "approved_execpolicy_amendment": { proposed_execpolicy_amendment: ExecPolicyAmendment, } } | "approved_for_session" | { "network_policy_amendment": { network_policy_amendment: NetworkPolicyAmendment, } } | "denied" | "abort";
|
||||
|
||||
@@ -24,6 +24,7 @@ import type { RawResponseItemCompletedNotification } from "./v2/RawResponseItemC
|
||||
import type { ReasoningSummaryPartAddedNotification } from "./v2/ReasoningSummaryPartAddedNotification";
|
||||
import type { ReasoningSummaryTextDeltaNotification } from "./v2/ReasoningSummaryTextDeltaNotification";
|
||||
import type { ReasoningTextDeltaNotification } from "./v2/ReasoningTextDeltaNotification";
|
||||
import type { SdkDelegationConfiguredNotification } from "./v2/SdkDelegationConfiguredNotification";
|
||||
import type { ServerRequestResolvedNotification } from "./v2/ServerRequestResolvedNotification";
|
||||
import type { SkillsChangedNotification } from "./v2/SkillsChangedNotification";
|
||||
import type { TerminalInteractionNotification } from "./v2/TerminalInteractionNotification";
|
||||
@@ -49,4 +50,4 @@ import type { WindowsWorldWritableWarningNotification } from "./v2/WindowsWorldW
|
||||
/**
|
||||
* Notification sent from the server to the client.
|
||||
*/
|
||||
export type ServerNotification = { "method": "error", "params": ErrorNotification } | { "method": "thread/started", "params": ThreadStartedNotification } | { "method": "thread/status/changed", "params": ThreadStatusChangedNotification } | { "method": "thread/archived", "params": ThreadArchivedNotification } | { "method": "thread/unarchived", "params": ThreadUnarchivedNotification } | { "method": "thread/closed", "params": ThreadClosedNotification } | { "method": "skills/changed", "params": SkillsChangedNotification } | { "method": "thread/name/updated", "params": ThreadNameUpdatedNotification } | { "method": "thread/tokenUsage/updated", "params": ThreadTokenUsageUpdatedNotification } | { "method": "turn/started", "params": TurnStartedNotification } | { "method": "turn/completed", "params": TurnCompletedNotification } | { "method": "turn/diff/updated", "params": TurnDiffUpdatedNotification } | { "method": "turn/plan/updated", "params": TurnPlanUpdatedNotification } | { "method": "item/started", "params": ItemStartedNotification } | { "method": "item/completed", "params": ItemCompletedNotification } | { "method": "rawResponseItem/completed", "params": RawResponseItemCompletedNotification } | { "method": "item/agentMessage/delta", "params": AgentMessageDeltaNotification } | { "method": "item/plan/delta", "params": PlanDeltaNotification } | { "method": "item/commandExecution/outputDelta", "params": CommandExecutionOutputDeltaNotification } | { "method": "item/commandExecution/terminalInteraction", "params": TerminalInteractionNotification } | { "method": "item/fileChange/outputDelta", "params": FileChangeOutputDeltaNotification } | { "method": "serverRequest/resolved", "params": ServerRequestResolvedNotification } | { "method": "item/mcpToolCall/progress", "params": McpToolCallProgressNotification } | { "method": "mcpServer/oauthLogin/completed", "params": McpServerOauthLoginCompletedNotification } | { "method": "account/updated", "params": AccountUpdatedNotification } | { "method": "account/rateLimits/updated", "params": AccountRateLimitsUpdatedNotification } | { "method": "app/list/updated", "params": AppListUpdatedNotification } | { "method": "item/reasoning/summaryTextDelta", "params": ReasoningSummaryTextDeltaNotification } | { "method": "item/reasoning/summaryPartAdded", "params": ReasoningSummaryPartAddedNotification } | { "method": "item/reasoning/textDelta", "params": ReasoningTextDeltaNotification } | { "method": "thread/compacted", "params": ContextCompactedNotification } | { "method": "model/rerouted", "params": ModelReroutedNotification } | { "method": "deprecationNotice", "params": DeprecationNoticeNotification } | { "method": "configWarning", "params": ConfigWarningNotification } | { "method": "fuzzyFileSearch/sessionUpdated", "params": FuzzyFileSearchSessionUpdatedNotification } | { "method": "fuzzyFileSearch/sessionCompleted", "params": FuzzyFileSearchSessionCompletedNotification } | { "method": "thread/realtime/started", "params": ThreadRealtimeStartedNotification } | { "method": "thread/realtime/itemAdded", "params": ThreadRealtimeItemAddedNotification } | { "method": "thread/realtime/outputAudio/delta", "params": ThreadRealtimeOutputAudioDeltaNotification } | { "method": "thread/realtime/error", "params": ThreadRealtimeErrorNotification } | { "method": "thread/realtime/closed", "params": ThreadRealtimeClosedNotification } | { "method": "windows/worldWritableWarning", "params": WindowsWorldWritableWarningNotification } | { "method": "windowsSandbox/setupCompleted", "params": WindowsSandboxSetupCompletedNotification } | { "method": "account/login/completed", "params": AccountLoginCompletedNotification };
|
||||
export type ServerNotification = { "method": "error", "params": ErrorNotification } | { "method": "thread/started", "params": ThreadStartedNotification } | { "method": "thread/status/changed", "params": ThreadStatusChangedNotification } | { "method": "thread/archived", "params": ThreadArchivedNotification } | { "method": "thread/unarchived", "params": ThreadUnarchivedNotification } | { "method": "thread/closed", "params": ThreadClosedNotification } | { "method": "skills/changed", "params": SkillsChangedNotification } | { "method": "thread/name/updated", "params": ThreadNameUpdatedNotification } | { "method": "thread/tokenUsage/updated", "params": ThreadTokenUsageUpdatedNotification } | { "method": "turn/started", "params": TurnStartedNotification } | { "method": "turn/completed", "params": TurnCompletedNotification } | { "method": "turn/diff/updated", "params": TurnDiffUpdatedNotification } | { "method": "turn/plan/updated", "params": TurnPlanUpdatedNotification } | { "method": "item/started", "params": ItemStartedNotification } | { "method": "item/completed", "params": ItemCompletedNotification } | { "method": "rawResponseItem/completed", "params": RawResponseItemCompletedNotification } | { "method": "item/agentMessage/delta", "params": AgentMessageDeltaNotification } | { "method": "item/plan/delta", "params": PlanDeltaNotification } | { "method": "item/commandExecution/outputDelta", "params": CommandExecutionOutputDeltaNotification } | { "method": "item/commandExecution/terminalInteraction", "params": TerminalInteractionNotification } | { "method": "item/fileChange/outputDelta", "params": FileChangeOutputDeltaNotification } | { "method": "serverRequest/resolved", "params": ServerRequestResolvedNotification } | { "method": "item/mcpToolCall/progress", "params": McpToolCallProgressNotification } | { "method": "mcpServer/oauthLogin/completed", "params": McpServerOauthLoginCompletedNotification } | { "method": "account/updated", "params": AccountUpdatedNotification } | { "method": "account/rateLimits/updated", "params": AccountRateLimitsUpdatedNotification } | { "method": "app/list/updated", "params": AppListUpdatedNotification } | { "method": "codexSdk/delegationConfigured", "params": SdkDelegationConfiguredNotification } | { "method": "item/reasoning/summaryTextDelta", "params": ReasoningSummaryTextDeltaNotification } | { "method": "item/reasoning/summaryPartAdded", "params": ReasoningSummaryPartAddedNotification } | { "method": "item/reasoning/textDelta", "params": ReasoningTextDeltaNotification } | { "method": "thread/compacted", "params": ContextCompactedNotification } | { "method": "model/rerouted", "params": ModelReroutedNotification } | { "method": "deprecationNotice", "params": DeprecationNoticeNotification } | { "method": "configWarning", "params": ConfigWarningNotification } | { "method": "fuzzyFileSearch/sessionUpdated", "params": FuzzyFileSearchSessionUpdatedNotification } | { "method": "fuzzyFileSearch/sessionCompleted", "params": FuzzyFileSearchSessionCompletedNotification } | { "method": "thread/realtime/started", "params": ThreadRealtimeStartedNotification } | { "method": "thread/realtime/itemAdded", "params": ThreadRealtimeItemAddedNotification } | { "method": "thread/realtime/outputAudio/delta", "params": ThreadRealtimeOutputAudioDeltaNotification } | { "method": "thread/realtime/error", "params": ThreadRealtimeErrorNotification } | { "method": "thread/realtime/closed", "params": ThreadRealtimeClosedNotification } | { "method": "windows/worldWritableWarning", "params": WindowsWorldWritableWarningNotification } | { "method": "windowsSandbox/setupCompleted", "params": WindowsSandboxSetupCompletedNotification } | { "method": "account/login/completed", "params": AccountLoginCompletedNotification };
|
||||
|
||||
@@ -4,4 +4,4 @@
|
||||
import type { ExecPolicyAmendment } from "./ExecPolicyAmendment";
|
||||
import type { NetworkPolicyAmendment } from "./NetworkPolicyAmendment";
|
||||
|
||||
export type CommandExecutionApprovalDecision = "accept" | "acceptForSession" | { "acceptWithExecpolicyAmendment": { execpolicy_amendment: ExecPolicyAmendment, } } | { "applyNetworkPolicyAmendment": { network_policy_amendment: NetworkPolicyAmendment, } } | "decline" | "cancel";
|
||||
export type CommandExecutionApprovalDecision = "accept" | { "acceptWithCommandOverride": { command: Array<string>, } } | "acceptForSession" | { "acceptWithExecpolicyAmendment": { execpolicy_amendment: ExecPolicyAmendment, } } | { "applyNetworkPolicyAmendment": { network_policy_amendment: NetworkPolicyAmendment, } } | "decline" | "cancel";
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
// GENERATED CODE! DO NOT MODIFY BY HAND!
|
||||
|
||||
// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
|
||||
|
||||
export type SdkDelegationConfig = {
|
||||
/**
|
||||
* Base URL for the host-managed Responses bridge reachable by the Codex runtime.
|
||||
*/
|
||||
bridgeUrl: string,
|
||||
/**
|
||||
* Optional model-provider id to register for this thread.
|
||||
* Defaults to `codex-sdk-v2`.
|
||||
*/
|
||||
modelProviderId: string | null,
|
||||
/**
|
||||
* Optional stream idle timeout override for the delegated provider.
|
||||
*/
|
||||
streamIdleTimeoutMs: bigint | null, };
|
||||
@@ -0,0 +1,5 @@
|
||||
// GENERATED CODE! DO NOT MODIFY BY HAND!
|
||||
|
||||
// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
|
||||
|
||||
export type SdkDelegationConfiguredNotification = { threadId: string, modelProvider: string, bridgeUrl: string, };
|
||||
@@ -8,6 +8,9 @@ import type { AskForApproval } from "./AskForApproval";
|
||||
import type { SandboxMode } from "./SandboxMode";
|
||||
|
||||
export type ThreadStartParams = {model?: string | null, modelProvider?: string | null, serviceTier?: ServiceTier | null | null, cwd?: string | null, approvalPolicy?: AskForApproval | null, sandbox?: SandboxMode | null, config?: { [key in string]?: JsonValue } | null, serviceName?: string | null, baseInstructions?: string | null, developerInstructions?: string | null, personality?: Personality | null, ephemeral?: boolean | null, /**
|
||||
* If true, require host-visible approval before executing built-in tools.
|
||||
*/
|
||||
manualToolExecution?: boolean, /**
|
||||
* If true, opt into emitting raw Responses API items on the event stream.
|
||||
* This is for internal use only (e.g. Codex Cloud).
|
||||
*/
|
||||
|
||||
@@ -176,6 +176,8 @@ export type { ReviewTarget } from "./ReviewTarget";
|
||||
export type { SandboxMode } from "./SandboxMode";
|
||||
export type { SandboxPolicy } from "./SandboxPolicy";
|
||||
export type { SandboxWorkspaceWrite } from "./SandboxWorkspaceWrite";
|
||||
export type { SdkDelegationConfig } from "./SdkDelegationConfig";
|
||||
export type { SdkDelegationConfiguredNotification } from "./SdkDelegationConfiguredNotification";
|
||||
export type { ServerRequestResolvedNotification } from "./ServerRequestResolvedNotification";
|
||||
export type { SessionSource } from "./SessionSource";
|
||||
export type { SkillDependencies } from "./SkillDependencies";
|
||||
|
||||
@@ -790,6 +790,8 @@ server_notification_definitions! {
|
||||
AccountUpdated => "account/updated" (v2::AccountUpdatedNotification),
|
||||
AccountRateLimitsUpdated => "account/rateLimits/updated" (v2::AccountRateLimitsUpdatedNotification),
|
||||
AppListUpdated => "app/list/updated" (v2::AppListUpdatedNotification),
|
||||
#[experimental("thread/start.sdkDelegation")]
|
||||
SdkDelegationConfigured => "codexSdk/delegationConfigured" (v2::SdkDelegationConfiguredNotification),
|
||||
ReasoningSummaryTextDelta => "item/reasoning/summaryTextDelta" (v2::ReasoningSummaryTextDeltaNotification),
|
||||
ReasoningSummaryPartAdded => "item/reasoning/summaryPartAdded" (v2::ReasoningSummaryPartAddedNotification),
|
||||
ReasoningTextDelta => "item/reasoning/textDelta" (v2::ReasoningTextDeltaNotification),
|
||||
|
||||
@@ -744,12 +744,15 @@ pub struct ConfigEdit {
|
||||
pub merge_strategy: MergeStrategy,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, JsonSchema, TS)]
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, JsonSchema, TS, ExperimentalApi)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub enum CommandExecutionApprovalDecision {
|
||||
/// User approved the command.
|
||||
Accept,
|
||||
/// User approved execution, but wants to replace the command before it runs.
|
||||
#[experimental("item/commandExecution/requestApproval.overrideCommand")]
|
||||
AcceptWithCommandOverride { command: Vec<String> },
|
||||
/// User approved the command and future prompts in the same session-scoped
|
||||
/// approval cache should run without prompting.
|
||||
AcceptForSession,
|
||||
@@ -772,6 +775,9 @@ impl From<CoreReviewDecision> for CommandExecutionApprovalDecision {
|
||||
fn from(value: CoreReviewDecision) -> Self {
|
||||
match value {
|
||||
CoreReviewDecision::Approved => Self::Accept,
|
||||
CoreReviewDecision::ApprovedWithCommandOverride { command } => {
|
||||
Self::AcceptWithCommandOverride { command }
|
||||
}
|
||||
CoreReviewDecision::ApprovedExecpolicyAmendment {
|
||||
proposed_execpolicy_amendment,
|
||||
} => Self::AcceptWithExecpolicyAmendment {
|
||||
@@ -1868,6 +1874,13 @@ pub struct ThreadStartParams {
|
||||
#[experimental("thread/start.dynamicTools")]
|
||||
#[ts(optional = nullable)]
|
||||
pub dynamic_tools: Option<Vec<DynamicToolSpec>>,
|
||||
#[experimental("thread/start.builtinTools")]
|
||||
#[ts(optional = nullable)]
|
||||
pub builtin_tools: Option<Vec<String>>,
|
||||
/// If true, require host-visible approval before executing built-in tools.
|
||||
#[experimental("thread/start.manualToolExecution")]
|
||||
#[serde(default, skip_serializing_if = "std::ops::Not::not")]
|
||||
pub manual_tool_execution: bool,
|
||||
/// Test-only experimental field used to validate experimental gating and
|
||||
/// schema filtering behavior in a stable way.
|
||||
#[experimental("thread/start.mockExperimentalField")]
|
||||
@@ -1883,6 +1896,24 @@ pub struct ThreadStartParams {
|
||||
#[experimental("thread/start.persistFullHistory")]
|
||||
#[serde(default)]
|
||||
pub persist_extended_history: bool,
|
||||
/// EXPERIMENTAL - route this thread's model traffic through a host-managed
|
||||
/// bridge instead of the user's default provider configuration.
|
||||
#[experimental("thread/start.sdkDelegation")]
|
||||
#[ts(optional = nullable)]
|
||||
pub sdk_delegation: Option<SdkDelegationConfig>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub struct SdkDelegationConfig {
|
||||
/// Base URL for the host-managed Responses bridge reachable by the Codex runtime.
|
||||
pub bridge_url: String,
|
||||
/// Optional model-provider id to register for this thread.
|
||||
/// Defaults to `codex-sdk-v2`.
|
||||
pub model_provider_id: Option<String>,
|
||||
/// Optional stream idle timeout override for the delegated provider.
|
||||
pub stream_idle_timeout_ms: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq, JsonSchema, TS)]
|
||||
@@ -3729,6 +3760,15 @@ pub struct ThreadStartedNotification {
|
||||
pub thread: Thread,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub struct SdkDelegationConfiguredNotification {
|
||||
pub thread_id: String,
|
||||
pub model_provider: String,
|
||||
pub bridge_url: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
@@ -5552,6 +5592,74 @@ mod tests {
|
||||
assert_eq!(serialized_without_override.get("serviceTier"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn thread_start_params_round_trip_sdk_delegation() {
|
||||
let params: ThreadStartParams = serde_json::from_value(json!({
|
||||
"sdkDelegation": {
|
||||
"bridgeUrl": "http://127.0.0.1:8080/v1",
|
||||
"modelProviderId": "codex-sdk-v2",
|
||||
"streamIdleTimeoutMs": 5000
|
||||
}
|
||||
}))
|
||||
.expect("params should deserialize");
|
||||
assert_eq!(
|
||||
params.sdk_delegation,
|
||||
Some(SdkDelegationConfig {
|
||||
bridge_url: "http://127.0.0.1:8080/v1".to_string(),
|
||||
model_provider_id: Some("codex-sdk-v2".to_string()),
|
||||
stream_idle_timeout_ms: Some(5000),
|
||||
})
|
||||
);
|
||||
|
||||
let serialized = serde_json::to_value(¶ms).expect("params should serialize");
|
||||
let mut expected =
|
||||
serde_json::to_value(ThreadStartParams::default()).expect("params should serialize");
|
||||
expected
|
||||
.as_object_mut()
|
||||
.expect("serialized params should be an object")
|
||||
.insert(
|
||||
"sdkDelegation".to_string(),
|
||||
json!({
|
||||
"bridgeUrl": "http://127.0.0.1:8080/v1",
|
||||
"modelProviderId": "codex-sdk-v2",
|
||||
"streamIdleTimeoutMs": 5000
|
||||
}),
|
||||
);
|
||||
assert_eq!(serialized, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn thread_start_params_round_trip_manual_tool_execution() {
|
||||
let params: ThreadStartParams =
|
||||
serde_json::from_value(json!({ "manualToolExecution": true }))
|
||||
.expect("params should deserialize");
|
||||
assert!(params.manual_tool_execution);
|
||||
|
||||
let serialized = serde_json::to_value(¶ms).expect("params should serialize");
|
||||
let mut expected =
|
||||
serde_json::to_value(ThreadStartParams::default()).expect("params should serialize");
|
||||
expected
|
||||
.as_object_mut()
|
||||
.expect("serialized params should be an object")
|
||||
.insert("manualToolExecution".to_string(), json!(true));
|
||||
assert_eq!(serialized, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn command_execution_request_approval_response_round_trip_command_override() {
|
||||
let response = CommandExecutionRequestApprovalResponse {
|
||||
decision: CommandExecutionApprovalDecision::AcceptWithCommandOverride {
|
||||
command: vec!["ls".to_string(), "-la".to_string()],
|
||||
},
|
||||
};
|
||||
|
||||
let json = serde_json::to_value(&response).expect("serialize response");
|
||||
let parsed: CommandExecutionRequestApprovalResponse =
|
||||
serde_json::from_value(json).expect("deserialize response");
|
||||
|
||||
assert_eq!(parsed, response);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn turn_start_params_preserve_explicit_null_service_tier() {
|
||||
let params: TurnStartParams = serde_json::from_value(json!({
|
||||
|
||||
@@ -120,7 +120,7 @@ Example with notification opt-out:
|
||||
|
||||
## API Overview
|
||||
|
||||
- `thread/start` — create a new thread; emits `thread/started` (including the current `thread.status`) and auto-subscribes you to turn/item events for that thread.
|
||||
- `thread/start` — create a new thread; emits `thread/started` (including the current `thread.status`) and auto-subscribes you to turn/item events for that thread. Experimental: `thread/start.sdkDelegation` lets a host integration register a per-thread delegated Responses provider that points Codex at a host-managed bridge. In the current prototype shape, Codex sends the raw Responses request body to that bridge and the host bridge injects upstream authorization before forwarding the request. Experimental: `thread/start.builtinTools` lets a client provide an exact allowlist of built-in Codex tool names for that thread. Experimental: `thread/start.manualToolExecution` forces built-in tool executions to pause for host approval before Codex invokes them.
|
||||
- `thread/resume` — reopen an existing thread by id so subsequent `turn/start` calls append to it.
|
||||
- `thread/fork` — fork an existing thread into a new thread id by copying the stored history; emits `thread/started` (including the current `thread.status`) and auto-subscribes you to turn/item events for the new thread.
|
||||
- `thread/list` — page through stored rollouts; supports cursor-based pagination and optional `modelProviders`, `sourceKinds`, `archived`, `cwd`, and `searchTerm` filters. Each returned `thread` includes `status` (`ThreadStatus`), defaulting to `notLoaded` when the thread is not currently loaded.
|
||||
@@ -128,6 +128,7 @@ Example with notification opt-out:
|
||||
- `thread/read` — read a stored thread by id without resuming it; optionally include turns via `includeTurns`. The returned `thread` includes `status` (`ThreadStatus`), defaulting to `notLoaded` when the thread is not currently loaded.
|
||||
- `thread/metadata/update` — patch stored thread metadata in sqlite; currently supports updating persisted `gitInfo` fields and returns the refreshed `thread`.
|
||||
- `thread/status/changed` — notification emitted when a loaded thread’s status changes (`threadId` + new `status`).
|
||||
- `codexSdk/delegationConfigured` — experimental notification emitted after `thread/start` when `sdkDelegation` is active for that thread.
|
||||
- `thread/archive` — move a thread’s rollout file into the archived directory; returns `{}` on success and emits `thread/archived`.
|
||||
- `thread/unsubscribe` — unsubscribe this connection from thread turn/item events. If this was the last subscriber, the server shuts down and unloads the thread, then emits `thread/closed`.
|
||||
- `thread/name/set` — set or update a thread’s user-facing name for either a loaded thread or a persisted rollout; returns `{}` on success and emits `thread/name/updated` to initialized, opted-in clients. Thread names are not required to be unique; name lookups resolve to the most recently updated thread.
|
||||
@@ -197,6 +198,13 @@ Start a fresh thread when you need a new Codex conversation.
|
||||
}
|
||||
}
|
||||
],
|
||||
"builtinTools": [
|
||||
"exec_command",
|
||||
"write_stdin",
|
||||
"apply_patch",
|
||||
"view_image"
|
||||
],
|
||||
"manualToolExecution": true
|
||||
} }
|
||||
{ "id": 10, "result": {
|
||||
"thread": {
|
||||
@@ -764,7 +772,7 @@ Certain actions (shell commands or modifying files) may require explicit user ap
|
||||
Order of messages:
|
||||
|
||||
1. `item/started` — shows the pending `commandExecution` item with `command`, `cwd`, and other fields so you can render the proposed action.
|
||||
2. `item/commandExecution/requestApproval` (request) — carries the same `itemId`, `threadId`, `turnId`, optionally `approvalId` (for subcommand callbacks), and `reason`. For normal command approvals, it also includes `command`, `cwd`, and `commandActions` for friendly display. When `initialize.params.capabilities.experimentalApi = true`, it may also include experimental `additionalPermissions` describing requested per-command sandbox access; any filesystem paths in that payload are absolute on the wire, and network access is represented as `additionalPermissions.network.enabled`. For network-only approvals, those command fields may be omitted and `networkApprovalContext` is provided instead. Optional persistence hints may also be included via `proposedExecpolicyAmendment` and `proposedNetworkPolicyAmendments`. Clients can prefer `availableDecisions` when present to render the exact set of choices the server wants to expose, while still falling back to the older heuristics if it is omitted.
|
||||
2. `item/commandExecution/requestApproval` (request) — carries the same `itemId`, `threadId`, `turnId`, optionally `approvalId` (for subcommand callbacks), and `reason`. For normal command approvals, it also includes `command`, `cwd`, and `commandActions` for friendly display. When `initialize.params.capabilities.experimentalApi = true`, it may also include experimental `additionalPermissions` describing requested per-command sandbox access; any filesystem paths in that payload are absolute on the wire, and network access is represented as `additionalPermissions.network.enabled`. For network-only approvals, those command fields may be omitted and `networkApprovalContext` is provided instead. Optional persistence hints may also be included via `proposedExecpolicyAmendment` and `proposedNetworkPolicyAmendments`. Experimental clients may also answer with `acceptWithCommandOverride` to replace the proposed built-in command before execution. Clients can prefer `availableDecisions` when present to render the exact set of choices the server wants to expose, while still falling back to the older heuristics if it is omitted.
|
||||
3. Client response — for example `{ "decision": "accept" }`, `{ "decision": "acceptForSession" }`, `{ "decision": { "acceptWithExecpolicyAmendment": { "execpolicy_amendment": [...] } } }`, `{ "decision": { "applyNetworkPolicyAmendment": { "network_policy_amendment": { "host": "example.com", "action": "allow" } } } }`, `{ "decision": "decline" }`, or `{ "decision": "cancel" }`.
|
||||
4. `serverRequest/resolved` — `{ threadId, requestId }` confirms the pending request has been resolved or cleared, including lifecycle cleanup on turn start/complete/interrupt.
|
||||
5. `item/completed` — final `commandExecution` item with `status: "completed" | "failed" | "declined"` and execution output. Render this as the authoritative result.
|
||||
|
||||
@@ -2256,6 +2256,10 @@ async fn on_command_execution_request_approval_response(
|
||||
|
||||
let (decision, completion_status) = match decision {
|
||||
CommandExecutionApprovalDecision::Accept => (ReviewDecision::Approved, None),
|
||||
CommandExecutionApprovalDecision::AcceptWithCommandOverride { command } => (
|
||||
ReviewDecision::ApprovedWithCommandOverride { command },
|
||||
None,
|
||||
),
|
||||
CommandExecutionApprovalDecision::AcceptForSession => {
|
||||
(ReviewDecision::ApprovedForSession, None)
|
||||
}
|
||||
|
||||
@@ -92,6 +92,8 @@ use codex_app_server_protocol::ReviewStartParams;
|
||||
use codex_app_server_protocol::ReviewStartResponse;
|
||||
use codex_app_server_protocol::ReviewTarget as ApiReviewTarget;
|
||||
use codex_app_server_protocol::SandboxMode;
|
||||
use codex_app_server_protocol::SdkDelegationConfig;
|
||||
use codex_app_server_protocol::SdkDelegationConfiguredNotification;
|
||||
use codex_app_server_protocol::ServerNotification;
|
||||
use codex_app_server_protocol::ServerRequestResolvedNotification;
|
||||
use codex_app_server_protocol::SkillsConfigWriteParams;
|
||||
@@ -1602,11 +1604,14 @@ impl CodexMessageProcessor {
|
||||
base_instructions,
|
||||
developer_instructions,
|
||||
dynamic_tools,
|
||||
builtin_tools,
|
||||
manual_tool_execution,
|
||||
mock_experimental_field: _mock_experimental_field,
|
||||
experimental_raw_events,
|
||||
personality,
|
||||
ephemeral,
|
||||
persist_extended_history,
|
||||
sdk_delegation,
|
||||
} = params;
|
||||
let mut typesafe_overrides = self.build_thread_config_overrides(
|
||||
model,
|
||||
@@ -1620,6 +1625,11 @@ impl CodexMessageProcessor {
|
||||
personality,
|
||||
);
|
||||
typesafe_overrides.ephemeral = ephemeral;
|
||||
let mut config = config.unwrap_or_default();
|
||||
let sdk_delegation = sdk_delegation.inspect(|delegation| {
|
||||
apply_sdk_delegation_overrides(&mut config, &mut typesafe_overrides, delegation);
|
||||
});
|
||||
let config = (!config.is_empty()).then_some(config);
|
||||
let cli_overrides = self.cli_overrides.clone();
|
||||
let cloud_requirements = self.current_cloud_requirements();
|
||||
let listener_task_context = ListenerTaskContext {
|
||||
@@ -1640,9 +1650,12 @@ impl CodexMessageProcessor {
|
||||
config,
|
||||
typesafe_overrides,
|
||||
dynamic_tools,
|
||||
builtin_tools,
|
||||
manual_tool_execution,
|
||||
persist_extended_history,
|
||||
service_name,
|
||||
experimental_raw_events,
|
||||
sdk_delegation,
|
||||
)
|
||||
.await;
|
||||
});
|
||||
@@ -1657,9 +1670,12 @@ impl CodexMessageProcessor {
|
||||
config_overrides: Option<HashMap<String, serde_json::Value>>,
|
||||
typesafe_overrides: ConfigOverrides,
|
||||
dynamic_tools: Option<Vec<ApiDynamicToolSpec>>,
|
||||
builtin_tools: Option<Vec<String>>,
|
||||
manual_tool_execution: bool,
|
||||
persist_extended_history: bool,
|
||||
service_name: Option<String>,
|
||||
experimental_raw_events: bool,
|
||||
sdk_delegation: Option<SdkDelegationConfig>,
|
||||
) {
|
||||
let config = match derive_config_from_params(
|
||||
&cli_overrides,
|
||||
@@ -1685,6 +1701,20 @@ impl CodexMessageProcessor {
|
||||
};
|
||||
|
||||
let dynamic_tools = dynamic_tools.unwrap_or_default();
|
||||
if let Some(builtin_tools) = builtin_tools.as_ref()
|
||||
&& let Err(message) = validate_builtin_tools(builtin_tools)
|
||||
{
|
||||
let error = JSONRPCErrorError {
|
||||
code: INVALID_REQUEST_ERROR_CODE,
|
||||
message,
|
||||
data: None,
|
||||
};
|
||||
listener_task_context
|
||||
.outgoing
|
||||
.send_error(request_id, error)
|
||||
.await;
|
||||
return;
|
||||
}
|
||||
let core_dynamic_tools = if dynamic_tools.is_empty() {
|
||||
Vec::new()
|
||||
} else {
|
||||
@@ -1715,6 +1745,8 @@ impl CodexMessageProcessor {
|
||||
.start_thread_with_tools_and_service_name(
|
||||
config,
|
||||
core_dynamic_tools,
|
||||
builtin_tools,
|
||||
manual_tool_execution,
|
||||
persist_extended_history,
|
||||
service_name,
|
||||
)
|
||||
@@ -1772,17 +1804,36 @@ impl CodexMessageProcessor {
|
||||
sandbox: config_snapshot.sandbox_policy.into(),
|
||||
reasoning_effort: config_snapshot.reasoning_effort,
|
||||
};
|
||||
let response_thread_id = response.thread.id.clone();
|
||||
let response_model_provider = response.model_provider.clone();
|
||||
|
||||
listener_task_context
|
||||
.outgoing
|
||||
.send_response(request_id, response)
|
||||
.await;
|
||||
info!("thread/start created thread {response_thread_id}");
|
||||
|
||||
let notif = ThreadStartedNotification { thread };
|
||||
listener_task_context
|
||||
.outgoing
|
||||
.send_server_notification(ServerNotification::ThreadStarted(notif))
|
||||
.await;
|
||||
info!("thread/start sent thread/started for {response_thread_id}");
|
||||
|
||||
if let Some(sdk_delegation) = sdk_delegation {
|
||||
let notification = SdkDelegationConfiguredNotification {
|
||||
thread_id: response_thread_id,
|
||||
model_provider: response_model_provider,
|
||||
bridge_url: sdk_delegation.bridge_url,
|
||||
};
|
||||
listener_task_context
|
||||
.outgoing
|
||||
.send_server_notification(ServerNotification::SdkDelegationConfigured(
|
||||
notification,
|
||||
))
|
||||
.await;
|
||||
info!("thread/start sent codexSdk/delegationConfigured");
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
let error = JSONRPCErrorError {
|
||||
@@ -6877,6 +6928,25 @@ fn validate_dynamic_tools(tools: &[ApiDynamicToolSpec]) -> Result<(), String> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn validate_builtin_tools(tools: &[String]) -> Result<(), String> {
|
||||
let mut seen = HashSet::new();
|
||||
for tool in tools {
|
||||
let name = tool.trim();
|
||||
if name.is_empty() {
|
||||
return Err("builtin tool name must not be empty".to_string());
|
||||
}
|
||||
if name != tool {
|
||||
return Err(format!(
|
||||
"builtin tool name has leading/trailing whitespace: {tool}"
|
||||
));
|
||||
}
|
||||
if !seen.insert(name.to_string()) {
|
||||
return Err(format!("duplicate builtin tool name: {name}"));
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn replace_cloud_requirements_loader(
|
||||
cloud_requirements: &RwLock<CloudRequirementsLoader>,
|
||||
auth_manager: Arc<AuthManager>,
|
||||
@@ -6948,6 +7018,47 @@ async fn derive_config_from_params(
|
||||
.await
|
||||
}
|
||||
|
||||
fn apply_sdk_delegation_overrides(
|
||||
request_overrides: &mut HashMap<String, serde_json::Value>,
|
||||
typesafe_overrides: &mut ConfigOverrides,
|
||||
sdk_delegation: &SdkDelegationConfig,
|
||||
) {
|
||||
let provider_id = sdk_delegation
|
||||
.model_provider_id
|
||||
.clone()
|
||||
.unwrap_or_else(|| "codex-sdk-v2".to_string());
|
||||
typesafe_overrides.model_provider = Some(provider_id.clone());
|
||||
|
||||
let provider_prefix = format!("model_providers.{provider_id}");
|
||||
request_overrides.insert(
|
||||
format!("{provider_prefix}.name"),
|
||||
serde_json::Value::String("Codex SDK v2 Delegated Provider".to_string()),
|
||||
);
|
||||
request_overrides.insert(
|
||||
format!("{provider_prefix}.base_url"),
|
||||
serde_json::Value::String(sdk_delegation.bridge_url.clone()),
|
||||
);
|
||||
request_overrides.insert(
|
||||
format!("{provider_prefix}.wire_api"),
|
||||
serde_json::Value::String("responses".to_string()),
|
||||
);
|
||||
request_overrides.insert(
|
||||
format!("{provider_prefix}.supports_websockets"),
|
||||
serde_json::Value::Bool(false),
|
||||
);
|
||||
request_overrides.insert(
|
||||
format!("{provider_prefix}.requires_openai_auth"),
|
||||
serde_json::Value::Bool(false),
|
||||
);
|
||||
|
||||
if let Some(stream_idle_timeout_ms) = sdk_delegation.stream_idle_timeout_ms {
|
||||
request_overrides.insert(
|
||||
format!("{provider_prefix}.stream_idle_timeout_ms"),
|
||||
serde_json::Value::Number(stream_idle_timeout_ms.into()),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
async fn derive_config_for_cwd(
|
||||
cli_overrides: &[(String, TomlValue)],
|
||||
request_overrides: Option<HashMap<String, serde_json::Value>>,
|
||||
|
||||
@@ -28,6 +28,8 @@ use core_test_support::responses;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::Value;
|
||||
use serde_json::json;
|
||||
use std::collections::HashMap;
|
||||
use std::collections::HashSet;
|
||||
use std::path::Path;
|
||||
use std::time::Duration;
|
||||
use tempfile::TempDir;
|
||||
@@ -118,6 +120,85 @@ async fn thread_start_injects_dynamic_tools_into_model_requests() -> Result<()>
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn thread_start_builtin_tools_filters_model_requests() -> Result<()> {
|
||||
let responses = vec![create_final_assistant_message_sse_response("Done")?];
|
||||
let server = create_mock_responses_server_sequence_unchecked(responses).await;
|
||||
|
||||
let codex_home = TempDir::new()?;
|
||||
create_config_toml(codex_home.path(), &server.uri())?;
|
||||
|
||||
let mut mcp = McpProcess::new(codex_home.path()).await?;
|
||||
timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
|
||||
|
||||
let thread_req = mcp
|
||||
.send_thread_start_request(ThreadStartParams {
|
||||
config: Some(HashMap::from([
|
||||
(
|
||||
"experimental_use_unified_exec_tool".to_string(),
|
||||
json!(true),
|
||||
),
|
||||
("include_apply_patch_tool".to_string(), json!(true)),
|
||||
])),
|
||||
builtin_tools: Some(vec![
|
||||
"exec_command".to_string(),
|
||||
"write_stdin".to_string(),
|
||||
"update_plan".to_string(),
|
||||
"view_image".to_string(),
|
||||
]),
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
let thread_resp: JSONRPCResponse = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp.read_stream_until_response_message(RequestId::Integer(thread_req)),
|
||||
)
|
||||
.await??;
|
||||
let ThreadStartResponse { thread, .. } = to_response::<ThreadStartResponse>(thread_resp)?;
|
||||
|
||||
let turn_req = mcp
|
||||
.send_turn_start_request(TurnStartParams {
|
||||
thread_id: thread.id,
|
||||
input: vec![V2UserInput::Text {
|
||||
text: "Hello".to_string(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
let turn_resp: JSONRPCResponse = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp.read_stream_until_response_message(RequestId::Integer(turn_req)),
|
||||
)
|
||||
.await??;
|
||||
let _turn: TurnStartResponse = to_response::<TurnStartResponse>(turn_resp)?;
|
||||
|
||||
timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp.read_stream_until_notification_message("turn/completed"),
|
||||
)
|
||||
.await??;
|
||||
|
||||
let bodies = responses_bodies(&server).await?;
|
||||
let body = bodies
|
||||
.first()
|
||||
.context("expected at least one responses request")?;
|
||||
let tool_names = body
|
||||
.get("tools")
|
||||
.and_then(Value::as_array)
|
||||
.context("expected tools array in request body")?
|
||||
.iter()
|
||||
.filter_map(|tool| tool.get("name").and_then(Value::as_str))
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
assert_eq!(
|
||||
tool_names,
|
||||
HashSet::from(["exec_command", "write_stdin", "update_plan", "view_image",])
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Exercises the full dynamic tool call path (server request, client response, model output).
|
||||
#[tokio::test]
|
||||
async fn dynamic_tool_call_round_trip_sends_text_content_items_to_model() -> Result<()> {
|
||||
|
||||
@@ -10,6 +10,7 @@ use codex_app_server_protocol::JSONRPCMessage;
|
||||
use codex_app_server_protocol::JSONRPCResponse;
|
||||
use codex_app_server_protocol::MockExperimentalMethodParams;
|
||||
use codex_app_server_protocol::RequestId;
|
||||
use codex_app_server_protocol::SdkDelegationConfig;
|
||||
use codex_app_server_protocol::ThreadRealtimeStartParams;
|
||||
use codex_app_server_protocol::ThreadStartParams;
|
||||
use codex_app_server_protocol::ThreadStartResponse;
|
||||
@@ -121,6 +122,46 @@ async fn thread_start_mock_field_requires_experimental_api_capability() -> Resul
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn thread_start_sdk_delegation_requires_experimental_api_capability() -> Result<()> {
|
||||
let server = create_mock_responses_server_sequence_unchecked(Vec::new()).await;
|
||||
let codex_home = TempDir::new()?;
|
||||
create_config_toml(codex_home.path(), &server.uri())?;
|
||||
|
||||
let mut mcp = McpProcess::new(codex_home.path()).await?;
|
||||
let init = mcp
|
||||
.initialize_with_capabilities(
|
||||
default_client_info(),
|
||||
Some(InitializeCapabilities {
|
||||
experimental_api: false,
|
||||
opt_out_notification_methods: None,
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
let JSONRPCMessage::Response(_) = init else {
|
||||
anyhow::bail!("expected initialize response, got {init:?}");
|
||||
};
|
||||
|
||||
let request_id = mcp
|
||||
.send_thread_start_request(ThreadStartParams {
|
||||
sdk_delegation: Some(SdkDelegationConfig {
|
||||
bridge_url: "http://127.0.0.1:8080/v1".to_string(),
|
||||
model_provider_id: None,
|
||||
stream_idle_timeout_ms: None,
|
||||
}),
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
|
||||
let error = timeout(
|
||||
DEFAULT_TIMEOUT,
|
||||
mcp.read_stream_until_error_message(RequestId::Integer(request_id)),
|
||||
)
|
||||
.await??;
|
||||
assert_experimental_capability_error(error, "thread/start.sdkDelegation");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn thread_start_without_dynamic_tools_allows_without_experimental_api_capability()
|
||||
-> Result<()> {
|
||||
|
||||
@@ -241,9 +241,12 @@ async fn skills_changed_notification_is_emitted_after_skill_change() -> Result<(
|
||||
personality: None,
|
||||
ephemeral: None,
|
||||
dynamic_tools: None,
|
||||
builtin_tools: None,
|
||||
manual_tool_execution: false,
|
||||
mock_experimental_field: None,
|
||||
experimental_raw_events: false,
|
||||
persist_extended_history: false,
|
||||
sdk_delegation: None,
|
||||
})
|
||||
.await?;
|
||||
let _: JSONRPCResponse = timeout(
|
||||
|
||||
@@ -6,6 +6,8 @@ use codex_app_server_protocol::JSONRPCError;
|
||||
use codex_app_server_protocol::JSONRPCMessage;
|
||||
use codex_app_server_protocol::JSONRPCResponse;
|
||||
use codex_app_server_protocol::RequestId;
|
||||
use codex_app_server_protocol::SdkDelegationConfig;
|
||||
use codex_app_server_protocol::SdkDelegationConfiguredNotification;
|
||||
use codex_app_server_protocol::ThreadStartParams;
|
||||
use codex_app_server_protocol::ThreadStartResponse;
|
||||
use codex_app_server_protocol::ThreadStartedNotification;
|
||||
@@ -181,6 +183,55 @@ model_reasoning_effort = "high"
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn thread_start_emits_sdk_delegation_configured_notification() -> Result<()> {
|
||||
let server = create_mock_responses_server_repeating_assistant("Done").await;
|
||||
|
||||
let codex_home = TempDir::new()?;
|
||||
create_config_toml(codex_home.path(), &server.uri())?;
|
||||
|
||||
let mut mcp = McpProcess::new(codex_home.path()).await?;
|
||||
timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
|
||||
|
||||
let req_id = mcp
|
||||
.send_thread_start_request(ThreadStartParams {
|
||||
sdk_delegation: Some(SdkDelegationConfig {
|
||||
bridge_url: "http://127.0.0.1:8080/v1".to_string(),
|
||||
model_provider_id: Some("sdk-provider".to_string()),
|
||||
stream_idle_timeout_ms: Some(5000),
|
||||
}),
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
|
||||
let resp: JSONRPCResponse = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp.read_stream_until_response_message(RequestId::Integer(req_id)),
|
||||
)
|
||||
.await??;
|
||||
let ThreadStartResponse { thread, .. } = to_response::<ThreadStartResponse>(resp)?;
|
||||
|
||||
let deadline = tokio::time::Instant::now() + DEFAULT_READ_TIMEOUT;
|
||||
let notification = loop {
|
||||
let remaining = deadline.saturating_duration_since(tokio::time::Instant::now());
|
||||
let message = timeout(remaining, mcp.read_next_message()).await??;
|
||||
let JSONRPCMessage::Notification(notification) = message else {
|
||||
continue;
|
||||
};
|
||||
if notification.method == "codexSdk/delegationConfigured" {
|
||||
break notification;
|
||||
}
|
||||
};
|
||||
|
||||
let configured: SdkDelegationConfiguredNotification =
|
||||
serde_json::from_value(notification.params.expect("params must be present"))?;
|
||||
assert_eq!(configured.thread_id, thread.id);
|
||||
assert_eq!(configured.model_provider, "sdk-provider");
|
||||
assert_eq!(configured.bridge_url, "http://127.0.0.1:8080/v1");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn thread_start_accepts_flex_service_tier() -> Result<()> {
|
||||
let server = create_mock_responses_server_repeating_assistant("Done").await;
|
||||
|
||||
75
codex-rs/core/src/base_instructions.rs
Normal file
75
codex-rs/core/src/base_instructions.rs
Normal file
@@ -0,0 +1,75 @@
|
||||
use crate::tools::spec::ToolsConfig;
|
||||
|
||||
const CORE_BASE_INSTRUCTIONS: &str = include_str!("../templates/base_instructions/core.md");
|
||||
const APPLY_PATCH_INSTRUCTIONS: &str =
|
||||
include_str!("../templates/base_instructions/capabilities/apply_patch.md");
|
||||
const UNIFIED_EXEC_INSTRUCTIONS: &str =
|
||||
include_str!("../templates/base_instructions/capabilities/unified_exec.md");
|
||||
const UPDATE_PLAN_INSTRUCTIONS: &str =
|
||||
include_str!("../templates/base_instructions/capabilities/update_plan.md");
|
||||
|
||||
pub(crate) fn compose_base_instructions(tools_config: &ToolsConfig) -> String {
|
||||
let mut sections = vec![CORE_BASE_INSTRUCTIONS.trim().to_string()];
|
||||
|
||||
if tools_config.has_builtin_tool("exec_command") {
|
||||
sections.push(UNIFIED_EXEC_INSTRUCTIONS.trim().to_string());
|
||||
}
|
||||
if tools_config.has_builtin_tool("apply_patch") {
|
||||
sections.push(APPLY_PATCH_INSTRUCTIONS.trim().to_string());
|
||||
}
|
||||
if tools_config.has_builtin_tool("update_plan") {
|
||||
sections.push(UPDATE_PLAN_INSTRUCTIONS.trim().to_string());
|
||||
}
|
||||
|
||||
sections.join("\n\n")
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::features::Feature;
|
||||
use crate::features::Features;
|
||||
use crate::models_manager::model_info::model_info_from_slug;
|
||||
use crate::tools::spec::ToolsConfigParams;
|
||||
use codex_protocol::protocol::SessionSource;
|
||||
|
||||
fn tools_config() -> ToolsConfig {
|
||||
let mut features = Features::new();
|
||||
let _ = features.enable(Feature::ShellTool);
|
||||
let _ = features.enable(Feature::UnifiedExec);
|
||||
let _ = features.enable(Feature::ApplyPatchFreeform);
|
||||
let model_info = model_info_from_slug("gpt-5.2-codex");
|
||||
ToolsConfig::new(&ToolsConfigParams {
|
||||
model_info: &model_info,
|
||||
features: &features,
|
||||
web_search_mode: None,
|
||||
session_source: SessionSource::Cli,
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn omits_unenabled_capability_sections() {
|
||||
let tools_config = tools_config().with_builtin_tools(Some(vec!["exec_command".to_string()]));
|
||||
|
||||
let instructions = compose_base_instructions(&tools_config);
|
||||
|
||||
assert!(instructions.contains("# Unified Exec"));
|
||||
assert!(!instructions.contains("# Apply Patch"));
|
||||
assert!(!instructions.contains("# Update Plan"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn includes_capability_sections_for_enabled_tools() {
|
||||
let tools_config = tools_config().with_builtin_tools(Some(vec![
|
||||
"exec_command".to_string(),
|
||||
"apply_patch".to_string(),
|
||||
"update_plan".to_string(),
|
||||
]));
|
||||
|
||||
let instructions = compose_base_instructions(&tools_config);
|
||||
|
||||
assert!(instructions.contains("# Unified Exec"));
|
||||
assert!(instructions.contains("# Apply Patch"));
|
||||
assert!(instructions.contains("# Update Plan"));
|
||||
}
|
||||
}
|
||||
@@ -18,6 +18,7 @@ use crate::analytics_client::InvocationType;
|
||||
use crate::analytics_client::build_track_events_context;
|
||||
use crate::apps::render_apps_section;
|
||||
use crate::commit_attribution::commit_message_trailer_instruction;
|
||||
use crate::base_instructions::compose_base_instructions;
|
||||
use crate::compact;
|
||||
use crate::compact::InitialContextInjection;
|
||||
use crate::compact::run_inline_auto_compact_task;
|
||||
@@ -356,6 +357,8 @@ impl Codex {
|
||||
session_source: SessionSource,
|
||||
agent_control: AgentControl,
|
||||
dynamic_tools: Vec<DynamicToolSpec>,
|
||||
builtin_tools: Option<Vec<String>>,
|
||||
manual_tool_execution: bool,
|
||||
persist_extended_history: bool,
|
||||
metrics_service_name: Option<String>,
|
||||
inherited_shell_snapshot: Option<Arc<ShellSnapshot>>,
|
||||
@@ -428,16 +431,24 @@ impl Codex {
|
||||
.get_default_model(&config.model, refresh_strategy)
|
||||
.await;
|
||||
|
||||
let model_info = models_manager.get_model_info(model.as_str(), &config).await;
|
||||
let tools_config = ToolsConfig::new(&ToolsConfigParams {
|
||||
model_info: &model_info,
|
||||
features: &config.features,
|
||||
web_search_mode: config.web_search_request_mode.as_ref(),
|
||||
session_source: session_source.clone(),
|
||||
})
|
||||
.with_builtin_tools(builtin_tools.clone())
|
||||
.with_manual_tool_execution(manual_tool_execution);
|
||||
// Resolve base instructions for the session. Priority order:
|
||||
// 1. config.base_instructions override
|
||||
// 2. conversation history => session_meta.base_instructions
|
||||
// 3. base_instructions for current model
|
||||
let model_info = models_manager.get_model_info(model.as_str(), &config).await;
|
||||
// 3. composed base instructions for the current tool/capability set
|
||||
let base_instructions = config
|
||||
.base_instructions
|
||||
.clone()
|
||||
.or_else(|| conversation_history.get_base_instructions().map(|s| s.text))
|
||||
.unwrap_or_else(|| model_info.get_model_instructions(config.personality));
|
||||
.unwrap_or_else(|| compose_base_instructions(&tools_config));
|
||||
|
||||
// Respect thread-start tools. When missing (resumed/forked threads), read from the db
|
||||
// first, then fall back to rollout-file tools.
|
||||
@@ -497,6 +508,8 @@ impl Codex {
|
||||
app_server_client_name: None,
|
||||
session_source,
|
||||
dynamic_tools,
|
||||
builtin_tools,
|
||||
manual_tool_execution,
|
||||
persist_extended_history,
|
||||
inherited_shell_snapshot,
|
||||
};
|
||||
@@ -745,7 +758,9 @@ impl TurnContext {
|
||||
session_source: self.session_source.clone(),
|
||||
})
|
||||
.with_allow_login_shell(self.tools_config.allow_login_shell)
|
||||
.with_agent_roles(config.agent_roles.clone());
|
||||
.with_agent_roles(config.agent_roles.clone())
|
||||
.with_builtin_tools(self.tools_config.builtin_tools.clone())
|
||||
.with_manual_tool_execution(self.tools_config.manual_tool_execution);
|
||||
|
||||
Self {
|
||||
sub_id: self.sub_id.clone(),
|
||||
@@ -901,6 +916,8 @@ pub(crate) struct SessionConfiguration {
|
||||
/// Source of the session (cli, vscode, exec, mcp, ...)
|
||||
session_source: SessionSource,
|
||||
dynamic_tools: Vec<DynamicToolSpec>,
|
||||
builtin_tools: Option<Vec<String>>,
|
||||
manual_tool_execution: bool,
|
||||
persist_extended_history: bool,
|
||||
inherited_shell_snapshot: Option<Arc<ShellSnapshot>>,
|
||||
}
|
||||
@@ -1120,7 +1137,9 @@ impl Session {
|
||||
session_source: session_source.clone(),
|
||||
})
|
||||
.with_allow_login_shell(per_turn_config.permissions.allow_login_shell)
|
||||
.with_agent_roles(per_turn_config.agent_roles.clone());
|
||||
.with_agent_roles(per_turn_config.agent_roles.clone())
|
||||
.with_builtin_tools(session_configuration.builtin_tools.clone())
|
||||
.with_manual_tool_execution(session_configuration.manual_tool_execution);
|
||||
|
||||
let cwd = session_configuration.cwd.clone();
|
||||
let turn_metadata_state = Arc::new(TurnMetadataState::new(
|
||||
@@ -4912,7 +4931,9 @@ async fn spawn_review_thread(
|
||||
session_source: parent_turn_context.session_source.clone(),
|
||||
})
|
||||
.with_allow_login_shell(config.permissions.allow_login_shell)
|
||||
.with_agent_roles(config.agent_roles.clone());
|
||||
.with_agent_roles(config.agent_roles.clone())
|
||||
.with_builtin_tools(parent_turn_context.tools_config.builtin_tools.clone())
|
||||
.with_manual_tool_execution(parent_turn_context.tools_config.manual_tool_execution);
|
||||
|
||||
let review_prompt = resolved.prompt.clone();
|
||||
let provider = parent_turn_context.provider.clone();
|
||||
|
||||
@@ -60,6 +60,8 @@ pub(crate) async fn run_codex_thread_interactive(
|
||||
SessionSource::SubAgent(SubAgentSource::Review),
|
||||
parent_session.services.agent_control.clone(),
|
||||
Vec::new(),
|
||||
None,
|
||||
false,
|
||||
false,
|
||||
None,
|
||||
None,
|
||||
|
||||
@@ -1425,6 +1425,8 @@ async fn set_rate_limits_retains_previous_credits() {
|
||||
app_server_client_name: None,
|
||||
session_source: SessionSource::Exec,
|
||||
dynamic_tools: Vec::new(),
|
||||
builtin_tools: None,
|
||||
manual_tool_execution: false,
|
||||
persist_extended_history: false,
|
||||
inherited_shell_snapshot: None,
|
||||
};
|
||||
@@ -1519,6 +1521,8 @@ async fn set_rate_limits_updates_plan_type_when_present() {
|
||||
app_server_client_name: None,
|
||||
session_source: SessionSource::Exec,
|
||||
dynamic_tools: Vec::new(),
|
||||
builtin_tools: None,
|
||||
manual_tool_execution: false,
|
||||
persist_extended_history: false,
|
||||
inherited_shell_snapshot: None,
|
||||
};
|
||||
@@ -1871,6 +1875,8 @@ pub(crate) async fn make_session_configuration_for_tests() -> SessionConfigurati
|
||||
app_server_client_name: None,
|
||||
session_source: SessionSource::Exec,
|
||||
dynamic_tools: Vec::new(),
|
||||
builtin_tools: None,
|
||||
manual_tool_execution: false,
|
||||
persist_extended_history: false,
|
||||
inherited_shell_snapshot: None,
|
||||
}
|
||||
@@ -1928,6 +1934,8 @@ async fn session_new_fails_when_zsh_fork_enabled_without_zsh_path() {
|
||||
app_server_client_name: None,
|
||||
session_source: SessionSource::Exec,
|
||||
dynamic_tools: Vec::new(),
|
||||
builtin_tools: None,
|
||||
manual_tool_execution: false,
|
||||
persist_extended_history: false,
|
||||
inherited_shell_snapshot: None,
|
||||
};
|
||||
@@ -2018,6 +2026,8 @@ pub(crate) async fn make_session_and_context() -> (Session, TurnContext) {
|
||||
app_server_client_name: None,
|
||||
session_source: SessionSource::Exec,
|
||||
dynamic_tools: Vec::new(),
|
||||
builtin_tools: None,
|
||||
manual_tool_execution: false,
|
||||
persist_extended_history: false,
|
||||
inherited_shell_snapshot: None,
|
||||
};
|
||||
@@ -2423,6 +2433,8 @@ pub(crate) async fn make_session_and_context_with_dynamic_tools_and_rx(
|
||||
app_server_client_name: None,
|
||||
session_source: SessionSource::Exec,
|
||||
dynamic_tools,
|
||||
builtin_tools: None,
|
||||
manual_tool_execution: false,
|
||||
persist_extended_history: false,
|
||||
inherited_shell_snapshot: None,
|
||||
};
|
||||
|
||||
@@ -175,6 +175,7 @@ pub(crate) struct ExecApprovalRequest<'a> {
|
||||
pub(crate) sandbox_policy: &'a SandboxPolicy,
|
||||
pub(crate) sandbox_permissions: SandboxPermissions,
|
||||
pub(crate) prefix_rule: Option<Vec<String>>,
|
||||
pub(crate) manual_tool_execution: bool,
|
||||
}
|
||||
|
||||
impl ExecPolicyManager {
|
||||
@@ -206,6 +207,7 @@ impl ExecPolicyManager {
|
||||
sandbox_policy,
|
||||
sandbox_permissions,
|
||||
prefix_rule,
|
||||
manual_tool_execution,
|
||||
} = req;
|
||||
let exec_policy = self.current();
|
||||
let (commands, used_complex_parsing) = commands_for_exec_policy(command);
|
||||
@@ -220,6 +222,7 @@ impl ExecPolicyManager {
|
||||
cmd,
|
||||
sandbox_permissions,
|
||||
used_complex_parsing,
|
||||
manual_tool_execution,
|
||||
)
|
||||
};
|
||||
let match_options = MatchOptions {
|
||||
@@ -491,7 +494,11 @@ pub fn render_decision_for_unmatched_command(
|
||||
command: &[String],
|
||||
sandbox_permissions: SandboxPermissions,
|
||||
used_complex_parsing: bool,
|
||||
manual_tool_execution: bool,
|
||||
) -> Decision {
|
||||
if manual_tool_execution {
|
||||
return Decision::Prompt;
|
||||
}
|
||||
if is_known_safe_command(command) && !used_complex_parsing {
|
||||
return Decision::Allow;
|
||||
}
|
||||
@@ -1234,6 +1241,7 @@ prefix_rule(pattern=["rm"], decision="forbidden")
|
||||
sandbox_policy: &SandboxPolicy::DangerFullAccess,
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: None,
|
||||
manual_tool_execution: false,
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -1284,6 +1292,7 @@ prefix_rule(pattern=["rm"], decision="forbidden")
|
||||
sandbox_policy: &SandboxPolicy::new_read_only_policy(),
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: None,
|
||||
manual_tool_execution: false,
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -1311,6 +1320,7 @@ prefix_rule(pattern=["rm"], decision="forbidden")
|
||||
sandbox_policy: &SandboxPolicy::new_read_only_policy(),
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: None,
|
||||
manual_tool_execution: false,
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -1339,6 +1349,7 @@ prefix_rule(pattern=["rm"], decision="forbidden")
|
||||
sandbox_policy: &SandboxPolicy::new_read_only_policy(),
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: Some(requested_prefix.clone()),
|
||||
manual_tool_execution: false,
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -1378,6 +1389,7 @@ prefix_rule(
|
||||
sandbox_policy: &SandboxPolicy::DangerFullAccess,
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: None,
|
||||
manual_tool_execution: false,
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -1407,6 +1419,7 @@ prefix_rule(
|
||||
sandbox_policy: &SandboxPolicy::DangerFullAccess,
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: None,
|
||||
manual_tool_execution: false,
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -1443,6 +1456,7 @@ prefix_rule(pattern=["git"], decision="allow")
|
||||
sandbox_policy: &SandboxPolicy::new_read_only_policy(),
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: None,
|
||||
manual_tool_execution: false,
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -1485,6 +1499,7 @@ prefix_rule(pattern=["git"], decision="prompt")
|
||||
sandbox_policy: &SandboxPolicy::new_read_only_policy(),
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: None,
|
||||
manual_tool_execution: false,
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -1513,6 +1528,7 @@ prefix_rule(pattern=["git"], decision="prompt")
|
||||
sandbox_policy: &SandboxPolicy::new_read_only_policy(),
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: Some(vec!["cargo".to_string(), "install".to_string()]),
|
||||
manual_tool_execution: false,
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -1546,6 +1562,7 @@ prefix_rule(pattern=["git"], decision="prompt")
|
||||
sandbox_policy: &SandboxPolicy::DangerFullAccess,
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: None,
|
||||
manual_tool_execution: false,
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -1573,6 +1590,7 @@ prefix_rule(pattern=["git"], decision="prompt")
|
||||
&command,
|
||||
SandboxPermissions::RequireEscalated,
|
||||
false,
|
||||
false,
|
||||
)
|
||||
);
|
||||
}
|
||||
@@ -1592,6 +1610,7 @@ prefix_rule(pattern=["git"], decision="prompt")
|
||||
sandbox_policy: &SandboxPolicy::new_read_only_policy(),
|
||||
sandbox_permissions: SandboxPermissions::RequireEscalated,
|
||||
prefix_rule: None,
|
||||
manual_tool_execution: false,
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -1628,6 +1647,7 @@ prefix_rule(pattern=["git"], decision="prompt")
|
||||
sandbox_policy: &SandboxPolicy::new_read_only_policy(),
|
||||
sandbox_permissions: SandboxPermissions::RequireEscalated,
|
||||
prefix_rule: None,
|
||||
manual_tool_execution: false,
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -1662,6 +1682,7 @@ prefix_rule(pattern=["git"], decision="prompt")
|
||||
sandbox_policy: &SandboxPolicy::new_read_only_policy(),
|
||||
sandbox_permissions: SandboxPermissions::RequireEscalated,
|
||||
prefix_rule: None,
|
||||
manual_tool_execution: false,
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -1685,6 +1706,7 @@ prefix_rule(pattern=["git"], decision="prompt")
|
||||
sandbox_policy: &SandboxPolicy::new_read_only_policy(),
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: None,
|
||||
manual_tool_execution: false,
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -1709,6 +1731,7 @@ prefix_rule(pattern=["git"], decision="prompt")
|
||||
sandbox_policy: &SandboxPolicy::new_read_only_policy(),
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: None,
|
||||
manual_tool_execution: false,
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -1737,6 +1760,7 @@ prefix_rule(pattern=["git"], decision="prompt")
|
||||
sandbox_policy: &SandboxPolicy::new_read_only_policy(),
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: None,
|
||||
manual_tool_execution: false,
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -1765,6 +1789,7 @@ prefix_rule(pattern=["git"], decision="prompt")
|
||||
sandbox_policy: &SandboxPolicy::new_read_only_policy(),
|
||||
sandbox_permissions: SandboxPermissions::RequireEscalated,
|
||||
prefix_rule: Some(vec!["cargo".to_string(), "install".to_string()]),
|
||||
manual_tool_execution: false,
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -1796,6 +1821,7 @@ prefix_rule(pattern=["git"], decision="prompt")
|
||||
sandbox_policy: &SandboxPolicy::DangerFullAccess,
|
||||
sandbox_permissions: SandboxPermissions::RequireEscalated,
|
||||
prefix_rule: Some(vec!["cargo".to_string(), "install".to_string()]),
|
||||
manual_tool_execution: false,
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -1834,6 +1860,7 @@ prefix_rule(pattern=["git"], decision="prompt")
|
||||
sandbox_policy: &SandboxPolicy::DangerFullAccess,
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: None,
|
||||
manual_tool_execution: false,
|
||||
})
|
||||
.await,
|
||||
ExecApprovalRequirement::NeedsApproval {
|
||||
@@ -1908,6 +1935,7 @@ prefix_rule(pattern=["git"], decision="prompt")
|
||||
sandbox_policy: &SandboxPolicy::new_read_only_policy(),
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: None,
|
||||
manual_tool_execution: false,
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -1938,6 +1966,7 @@ prefix_rule(pattern=["git"], decision="prompt")
|
||||
sandbox_policy: &SandboxPolicy::DangerFullAccess,
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: None,
|
||||
manual_tool_execution: false,
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -1965,6 +1994,7 @@ prefix_rule(pattern=["git"], decision="prompt")
|
||||
sandbox_policy: &SandboxPolicy::new_read_only_policy(),
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: None,
|
||||
manual_tool_execution: false,
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -2003,6 +2033,7 @@ prefix_rule(pattern=["git"], decision="prompt")
|
||||
sandbox_policy: &SandboxPolicy::new_read_only_policy(),
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: None,
|
||||
manual_tool_execution: false,
|
||||
})
|
||||
.await,
|
||||
ExecApprovalRequirement::NeedsApproval {
|
||||
@@ -2026,6 +2057,7 @@ prefix_rule(pattern=["git"], decision="prompt")
|
||||
sandbox_policy: &SandboxPolicy::new_read_only_policy(),
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: None,
|
||||
manual_tool_execution: false,
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -2038,6 +2070,31 @@ prefix_rule(pattern=["git"], decision="prompt")
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn manual_tool_execution_requires_approval_for_safe_command() {
|
||||
let command = vec!["echo".to_string(), "safe".to_string()];
|
||||
|
||||
let manager = ExecPolicyManager::default();
|
||||
let requirement = manager
|
||||
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
|
||||
command: &command,
|
||||
approval_policy: AskForApproval::OnRequest,
|
||||
sandbox_policy: &SandboxPolicy::new_read_only_policy(),
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: None,
|
||||
manual_tool_execution: true,
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(
|
||||
requirement,
|
||||
ExecApprovalRequirement::NeedsApproval {
|
||||
reason: None,
|
||||
proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(command)),
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn proposed_execpolicy_amendment_is_suppressed_when_policy_matches_allow() {
|
||||
let policy_src = r#"prefix_rule(pattern=["echo"], decision="allow")"#;
|
||||
@@ -2056,6 +2113,7 @@ prefix_rule(pattern=["git"], decision="prompt")
|
||||
sandbox_policy: &SandboxPolicy::new_read_only_policy(),
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: None,
|
||||
manual_tool_execution: false,
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -2229,6 +2287,7 @@ prefix_rule(pattern=["git"], decision="prompt")
|
||||
sandbox_policy: &SandboxPolicy::DangerFullAccess,
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: None,
|
||||
manual_tool_execution: false,
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -2295,6 +2354,7 @@ prefix_rule(pattern=["git"], decision="prompt")
|
||||
sandbox_policy: &SandboxPolicy::new_read_only_policy(),
|
||||
sandbox_permissions: permissions,
|
||||
prefix_rule: None,
|
||||
manual_tool_execution: false,
|
||||
})
|
||||
.await,
|
||||
"{pwsh_approval_reason}"
|
||||
@@ -2318,6 +2378,7 @@ prefix_rule(pattern=["git"], decision="prompt")
|
||||
sandbox_policy: &SandboxPolicy::new_read_only_policy(),
|
||||
sandbox_permissions: permissions,
|
||||
prefix_rule: None,
|
||||
manual_tool_execution: false,
|
||||
})
|
||||
.await,
|
||||
r#"On all platforms, a forbidden command should require approval
|
||||
@@ -2337,6 +2398,7 @@ prefix_rule(pattern=["git"], decision="prompt")
|
||||
sandbox_policy: &SandboxPolicy::new_read_only_policy(),
|
||||
sandbox_permissions: permissions,
|
||||
prefix_rule: None,
|
||||
manual_tool_execution: false,
|
||||
})
|
||||
.await,
|
||||
r#"On all platforms, a forbidden command should require approval
|
||||
|
||||
@@ -10,6 +10,7 @@ pub mod api_bridge;
|
||||
mod apply_patch;
|
||||
mod apps;
|
||||
pub mod auth;
|
||||
mod base_instructions;
|
||||
mod client;
|
||||
mod client_common;
|
||||
pub mod codex;
|
||||
|
||||
@@ -314,18 +314,22 @@ impl ThreadManager {
|
||||
pub async fn start_thread(&self, config: Config) -> CodexResult<NewThread> {
|
||||
// Box delegated thread-spawn futures so these convenience wrappers do
|
||||
// not inline the full spawn path into every caller's async state.
|
||||
Box::pin(self.start_thread_with_tools(config, Vec::new(), false)).await
|
||||
Box::pin(self.start_thread_with_tools(config, Vec::new(), None, false, false)).await
|
||||
}
|
||||
|
||||
pub async fn start_thread_with_tools(
|
||||
&self,
|
||||
config: Config,
|
||||
dynamic_tools: Vec<codex_protocol::dynamic_tools::DynamicToolSpec>,
|
||||
builtin_tools: Option<Vec<String>>,
|
||||
manual_tool_execution: bool,
|
||||
persist_extended_history: bool,
|
||||
) -> CodexResult<NewThread> {
|
||||
Box::pin(self.start_thread_with_tools_and_service_name(
|
||||
config,
|
||||
dynamic_tools,
|
||||
builtin_tools,
|
||||
manual_tool_execution,
|
||||
persist_extended_history,
|
||||
None,
|
||||
))
|
||||
@@ -336,6 +340,8 @@ impl ThreadManager {
|
||||
&self,
|
||||
config: Config,
|
||||
dynamic_tools: Vec<codex_protocol::dynamic_tools::DynamicToolSpec>,
|
||||
builtin_tools: Option<Vec<String>>,
|
||||
manual_tool_execution: bool,
|
||||
persist_extended_history: bool,
|
||||
metrics_service_name: Option<String>,
|
||||
) -> CodexResult<NewThread> {
|
||||
@@ -345,6 +351,8 @@ impl ThreadManager {
|
||||
Arc::clone(&self.state.auth_manager),
|
||||
self.agent_control(),
|
||||
dynamic_tools,
|
||||
builtin_tools,
|
||||
manual_tool_execution,
|
||||
persist_extended_history,
|
||||
metrics_service_name,
|
||||
))
|
||||
@@ -375,6 +383,8 @@ impl ThreadManager {
|
||||
auth_manager,
|
||||
self.agent_control(),
|
||||
Vec::new(),
|
||||
None,
|
||||
false,
|
||||
persist_extended_history,
|
||||
None,
|
||||
))
|
||||
@@ -416,6 +426,8 @@ impl ThreadManager {
|
||||
Arc::clone(&self.state.auth_manager),
|
||||
self.agent_control(),
|
||||
Vec::new(),
|
||||
None,
|
||||
false,
|
||||
persist_extended_history,
|
||||
None,
|
||||
))
|
||||
@@ -499,6 +511,8 @@ impl ThreadManagerState {
|
||||
agent_control,
|
||||
session_source,
|
||||
Vec::new(),
|
||||
None,
|
||||
false,
|
||||
persist_extended_history,
|
||||
metrics_service_name,
|
||||
inherited_shell_snapshot,
|
||||
@@ -522,6 +536,8 @@ impl ThreadManagerState {
|
||||
agent_control,
|
||||
session_source,
|
||||
Vec::new(),
|
||||
None,
|
||||
false,
|
||||
false,
|
||||
None,
|
||||
inherited_shell_snapshot,
|
||||
@@ -545,6 +561,8 @@ impl ThreadManagerState {
|
||||
agent_control,
|
||||
session_source,
|
||||
Vec::new(),
|
||||
None,
|
||||
false,
|
||||
persist_extended_history,
|
||||
None,
|
||||
inherited_shell_snapshot,
|
||||
@@ -561,6 +579,8 @@ impl ThreadManagerState {
|
||||
auth_manager: Arc<AuthManager>,
|
||||
agent_control: AgentControl,
|
||||
dynamic_tools: Vec<codex_protocol::dynamic_tools::DynamicToolSpec>,
|
||||
builtin_tools: Option<Vec<String>>,
|
||||
manual_tool_execution: bool,
|
||||
persist_extended_history: bool,
|
||||
metrics_service_name: Option<String>,
|
||||
) -> CodexResult<NewThread> {
|
||||
@@ -571,6 +591,8 @@ impl ThreadManagerState {
|
||||
agent_control,
|
||||
self.session_source.clone(),
|
||||
dynamic_tools,
|
||||
builtin_tools,
|
||||
manual_tool_execution,
|
||||
persist_extended_history,
|
||||
metrics_service_name,
|
||||
None,
|
||||
@@ -587,6 +609,8 @@ impl ThreadManagerState {
|
||||
agent_control: AgentControl,
|
||||
session_source: SessionSource,
|
||||
dynamic_tools: Vec<codex_protocol::dynamic_tools::DynamicToolSpec>,
|
||||
builtin_tools: Option<Vec<String>>,
|
||||
manual_tool_execution: bool,
|
||||
persist_extended_history: bool,
|
||||
metrics_service_name: Option<String>,
|
||||
inherited_shell_snapshot: Option<Arc<ShellSnapshot>>,
|
||||
@@ -608,6 +632,8 @@ impl ThreadManagerState {
|
||||
session_source,
|
||||
agent_control,
|
||||
dynamic_tools,
|
||||
builtin_tools,
|
||||
manual_tool_execution,
|
||||
persist_extended_history,
|
||||
metrics_service_name,
|
||||
inherited_shell_snapshot,
|
||||
|
||||
@@ -129,7 +129,7 @@ impl ToolHandler for ApplyPatchHandler {
|
||||
);
|
||||
emitter.begin(event_ctx).await;
|
||||
|
||||
let req = ApplyPatchRequest {
|
||||
let mut req = ApplyPatchRequest {
|
||||
action: apply.action,
|
||||
file_paths,
|
||||
changes,
|
||||
@@ -149,7 +149,7 @@ impl ToolHandler for ApplyPatchHandler {
|
||||
let out = orchestrator
|
||||
.run(
|
||||
&mut runtime,
|
||||
&req,
|
||||
&mut req,
|
||||
&tool_ctx,
|
||||
turn.as_ref(),
|
||||
turn.approval_policy.value(),
|
||||
@@ -231,7 +231,7 @@ pub(crate) async fn intercept_apply_patch(
|
||||
);
|
||||
emitter.begin(event_ctx).await;
|
||||
|
||||
let req = ApplyPatchRequest {
|
||||
let mut req = ApplyPatchRequest {
|
||||
action: apply.action,
|
||||
file_paths: approval_keys,
|
||||
changes,
|
||||
@@ -251,7 +251,7 @@ pub(crate) async fn intercept_apply_patch(
|
||||
let out = orchestrator
|
||||
.run(
|
||||
&mut runtime,
|
||||
&req,
|
||||
&mut req,
|
||||
&tool_ctx,
|
||||
turn.as_ref(),
|
||||
turn.approval_policy.value(),
|
||||
|
||||
@@ -389,10 +389,11 @@ impl ShellHandler {
|
||||
sandbox_policy: turn.sandbox_policy.get(),
|
||||
sandbox_permissions: exec_params.sandbox_permissions,
|
||||
prefix_rule,
|
||||
manual_tool_execution: false,
|
||||
})
|
||||
.await;
|
||||
|
||||
let req = ShellRequest {
|
||||
let mut req = ShellRequest {
|
||||
command: exec_params.command.clone(),
|
||||
cwd: exec_params.cwd.clone(),
|
||||
timeout_ms: exec_params.expiration.timeout_ms(),
|
||||
@@ -423,7 +424,7 @@ impl ShellHandler {
|
||||
let out = orchestrator
|
||||
.run(
|
||||
&mut runtime,
|
||||
&req,
|
||||
&mut req,
|
||||
&tool_ctx,
|
||||
&turn,
|
||||
turn.approval_policy.value(),
|
||||
|
||||
@@ -349,7 +349,9 @@ impl NetworkApprovalService {
|
||||
|
||||
let mut cache_session_deny = false;
|
||||
let resolved = match approval_decision {
|
||||
ReviewDecision::Approved | ReviewDecision::ApprovedExecpolicyAmendment { .. } => {
|
||||
ReviewDecision::Approved
|
||||
| ReviewDecision::ApprovedWithCommandOverride { .. }
|
||||
| ReviewDecision::ApprovedExecpolicyAmendment { .. } => {
|
||||
PendingApprovalDecision::AllowOnce
|
||||
}
|
||||
ReviewDecision::ApprovedForSession => PendingApprovalDecision::AllowForSession,
|
||||
|
||||
@@ -100,7 +100,7 @@ impl ToolOrchestrator {
|
||||
pub async fn run<Rq, Out, T>(
|
||||
&mut self,
|
||||
tool: &mut T,
|
||||
req: &Rq,
|
||||
req: &mut Rq,
|
||||
tool_ctx: &ToolCtx,
|
||||
turn_ctx: &crate::codex::TurnContext,
|
||||
approval_policy: AskForApproval,
|
||||
@@ -144,10 +144,11 @@ impl ToolOrchestrator {
|
||||
return Err(ToolError::Rejected("rejected by user".to_string()));
|
||||
}
|
||||
ReviewDecision::Approved
|
||||
| ReviewDecision::ApprovedWithCommandOverride { .. }
|
||||
| ReviewDecision::ApprovedExecpolicyAmendment { .. }
|
||||
| ReviewDecision::ApprovedForSession => {}
|
||||
ReviewDecision::NetworkPolicyAmendment {
|
||||
network_policy_amendment,
|
||||
ref network_policy_amendment,
|
||||
} => match network_policy_amendment.action {
|
||||
NetworkPolicyRuleAction::Allow => {}
|
||||
NetworkPolicyRuleAction::Deny => {
|
||||
@@ -155,6 +156,7 @@ impl ToolOrchestrator {
|
||||
}
|
||||
},
|
||||
}
|
||||
tool.apply_approval_decision(req, &decision);
|
||||
already_approved = true;
|
||||
}
|
||||
}
|
||||
@@ -280,10 +282,11 @@ impl ToolOrchestrator {
|
||||
return Err(ToolError::Rejected("rejected by user".to_string()));
|
||||
}
|
||||
ReviewDecision::Approved
|
||||
| ReviewDecision::ApprovedWithCommandOverride { .. }
|
||||
| ReviewDecision::ApprovedExecpolicyAmendment { .. }
|
||||
| ReviewDecision::ApprovedForSession => {}
|
||||
ReviewDecision::NetworkPolicyAmendment {
|
||||
network_policy_amendment,
|
||||
ref network_policy_amendment,
|
||||
} => match network_policy_amendment.action {
|
||||
NetworkPolicyRuleAction::Allow => {}
|
||||
NetworkPolicyRuleAction::Deny => {
|
||||
@@ -291,6 +294,7 @@ impl ToolOrchestrator {
|
||||
}
|
||||
},
|
||||
}
|
||||
tool.apply_approval_decision(req, &decision);
|
||||
}
|
||||
|
||||
let escalated_attempt = SandboxAttempt {
|
||||
|
||||
@@ -1,8 +1,11 @@
|
||||
use std::collections::BTreeSet;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use std::time::Instant;
|
||||
|
||||
use crate::client_common::tools::FreeformTool;
|
||||
use crate::client_common::tools::ResponsesApiTool;
|
||||
use crate::client_common::tools::ToolSpec;
|
||||
use crate::features::Feature;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
@@ -283,6 +286,21 @@ impl ToolRegistryBuilder {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn filter_builtin_tools(
|
||||
mut self,
|
||||
builtin_tools: &BTreeSet<String>,
|
||||
enabled_builtin_tools: &BTreeSet<String>,
|
||||
) -> Self {
|
||||
self.specs.retain(|tool| {
|
||||
let name = tool_spec_name(&tool.spec);
|
||||
!builtin_tools.contains(name) || enabled_builtin_tools.contains(name)
|
||||
});
|
||||
self.handlers.retain(|name, _| {
|
||||
!builtin_tools.contains(name) || enabled_builtin_tools.contains(name)
|
||||
});
|
||||
self
|
||||
}
|
||||
|
||||
// TODO(jif) for dynamic tools.
|
||||
// pub fn register_many<I>(&mut self, names: I, handler: Arc<dyn ToolHandler>)
|
||||
// where
|
||||
@@ -307,6 +325,16 @@ impl ToolRegistryBuilder {
|
||||
}
|
||||
}
|
||||
|
||||
fn tool_spec_name(spec: &ToolSpec) -> &str {
|
||||
match spec {
|
||||
ToolSpec::Function(ResponsesApiTool { name, .. }) => name,
|
||||
ToolSpec::LocalShell {} => "local_shell",
|
||||
ToolSpec::ImageGeneration { .. } => "image_generation",
|
||||
ToolSpec::WebSearch { .. } => "web_search",
|
||||
ToolSpec::Freeform(FreeformTool { name, .. }) => name,
|
||||
}
|
||||
}
|
||||
|
||||
fn unsupported_tool_call_message(payload: &ToolPayload, tool_name: &str) -> String {
|
||||
match payload {
|
||||
ToolPayload::Custom { .. } => format!("unsupported custom tool call: {tool_name}"),
|
||||
|
||||
@@ -452,6 +452,7 @@ impl CoreShellActionProvider {
|
||||
.await?
|
||||
{
|
||||
ReviewDecision::Approved
|
||||
| ReviewDecision::ApprovedWithCommandOverride { .. }
|
||||
| ReviewDecision::ApprovedExecpolicyAmendment { .. } => {
|
||||
if needs_escalation {
|
||||
EscalationDecision::escalate(escalation_execution.clone())
|
||||
@@ -677,6 +678,7 @@ fn evaluate_intercepted_exec_policy(
|
||||
cmd,
|
||||
sandbox_permissions,
|
||||
used_complex_parsing,
|
||||
false,
|
||||
)
|
||||
};
|
||||
|
||||
|
||||
@@ -144,6 +144,12 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
|
||||
Some(req.exec_approval_requirement.clone())
|
||||
}
|
||||
|
||||
fn apply_approval_decision(&self, req: &mut UnifiedExecRequest, decision: &ReviewDecision) {
|
||||
if let ReviewDecision::ApprovedWithCommandOverride { command } = decision {
|
||||
req.command = command.clone();
|
||||
}
|
||||
}
|
||||
|
||||
fn sandbox_mode_for_first_attempt(&self, req: &UnifiedExecRequest) -> SandboxOverride {
|
||||
sandbox_override_for_first_attempt(req.sandbox_permissions, &req.exec_approval_requirement)
|
||||
}
|
||||
|
||||
@@ -271,6 +271,8 @@ pub(crate) trait Approvable<Req> {
|
||||
req: &'a Req,
|
||||
ctx: ApprovalCtx<'a>,
|
||||
) -> BoxFuture<'a, ReviewDecision>;
|
||||
|
||||
fn apply_approval_decision(&self, _req: &mut Req, _decision: &ReviewDecision) {}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
|
||||
@@ -33,6 +33,7 @@ use serde::Serialize;
|
||||
use serde_json::Value as JsonValue;
|
||||
use serde_json::json;
|
||||
use std::collections::BTreeMap;
|
||||
use std::collections::BTreeSet;
|
||||
use std::collections::HashMap;
|
||||
|
||||
const SEARCH_TOOL_BM25_DESCRIPTION_TEMPLATE: &str =
|
||||
@@ -72,6 +73,8 @@ pub(crate) struct ToolsConfig {
|
||||
pub experimental_supported_tools: Vec<String>,
|
||||
pub agent_jobs_tools: bool,
|
||||
pub agent_jobs_worker_tools: bool,
|
||||
pub builtin_tools: Option<Vec<String>>,
|
||||
pub manual_tool_execution: bool,
|
||||
}
|
||||
|
||||
pub(crate) struct ToolsConfigParams<'a> {
|
||||
@@ -172,6 +175,8 @@ impl ToolsConfig {
|
||||
experimental_supported_tools: model_info.experimental_supported_tools.clone(),
|
||||
agent_jobs_tools: include_agent_jobs,
|
||||
agent_jobs_worker_tools,
|
||||
builtin_tools: None,
|
||||
manual_tool_execution: false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -184,6 +189,35 @@ impl ToolsConfig {
|
||||
self.allow_login_shell = allow_login_shell;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_builtin_tools(mut self, builtin_tools: Option<Vec<String>>) -> Self {
|
||||
self.builtin_tools = builtin_tools;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_manual_tool_execution(mut self, manual_tool_execution: bool) -> Self {
|
||||
self.manual_tool_execution = manual_tool_execution;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn has_builtin_tool(&self, tool_name: &str) -> bool {
|
||||
if let Some(builtin_tools) = &self.builtin_tools {
|
||||
return builtin_tools.iter().any(|tool| tool == tool_name);
|
||||
}
|
||||
|
||||
match tool_name {
|
||||
"exec_command" | "write_stdin" => self.shell_type == ConfigShellToolType::UnifiedExec,
|
||||
"update_plan" => true,
|
||||
"request_user_input" => self.request_user_input,
|
||||
"apply_patch" => self.apply_patch_tool_type.is_some(),
|
||||
"search_tool_bm25" => self.search_tool,
|
||||
"view_image" => true,
|
||||
"spawn_agent" => self.collab_tools,
|
||||
"spawn_agents_on_csv" => self.agent_jobs_tools,
|
||||
"artifacts" => self.artifact_tools,
|
||||
_ => self.experimental_supported_tools.iter().any(|tool| tool == tool_name),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn supports_image_generation(model_info: &ModelInfo) -> bool {
|
||||
@@ -2055,7 +2089,50 @@ pub(crate) fn build_specs(
|
||||
}
|
||||
}
|
||||
|
||||
builder
|
||||
if let Some(builtin_tools) = &config.builtin_tools {
|
||||
let builtin_tools = builtin_tools.iter().cloned().collect::<BTreeSet<_>>();
|
||||
builder.filter_builtin_tools(&known_builtin_tool_names(), &builtin_tools)
|
||||
} else {
|
||||
builder
|
||||
}
|
||||
}
|
||||
|
||||
fn known_builtin_tool_names() -> BTreeSet<String> {
|
||||
[
|
||||
"artifacts",
|
||||
"apply_patch",
|
||||
"close_agent",
|
||||
"container.exec",
|
||||
"exec_command",
|
||||
"grep_files",
|
||||
"image_generation",
|
||||
"js_repl",
|
||||
"js_repl_reset",
|
||||
"list_dir",
|
||||
"list_mcp_resource_templates",
|
||||
"list_mcp_resources",
|
||||
"local_shell",
|
||||
"read_file",
|
||||
"read_mcp_resource",
|
||||
"report_agent_job_result",
|
||||
"request_user_input",
|
||||
"resume_agent",
|
||||
"search_tool_bm25",
|
||||
"send_input",
|
||||
"shell",
|
||||
"shell_command",
|
||||
"spawn_agent",
|
||||
"spawn_agents_on_csv",
|
||||
"test_sync_tool",
|
||||
"update_plan",
|
||||
"view_image",
|
||||
"wait",
|
||||
"web_search",
|
||||
"write_stdin",
|
||||
]
|
||||
.into_iter()
|
||||
.map(str::to_string)
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -584,9 +584,10 @@ impl UnifiedExecProcessManager {
|
||||
sandbox_policy: context.turn.sandbox_policy.get(),
|
||||
sandbox_permissions: request.sandbox_permissions,
|
||||
prefix_rule: request.prefix_rule.clone(),
|
||||
manual_tool_execution: context.turn.tools_config.manual_tool_execution,
|
||||
})
|
||||
.await;
|
||||
let req = UnifiedExecToolRequest {
|
||||
let mut req = UnifiedExecToolRequest {
|
||||
command: request.command.clone(),
|
||||
cwd,
|
||||
env,
|
||||
@@ -607,7 +608,7 @@ impl UnifiedExecProcessManager {
|
||||
orchestrator
|
||||
.run(
|
||||
&mut runtime,
|
||||
&req,
|
||||
&mut req,
|
||||
&tool_ctx,
|
||||
&context.turn,
|
||||
context.turn.approval_policy.value(),
|
||||
|
||||
@@ -0,0 +1,8 @@
|
||||
# Apply Patch
|
||||
|
||||
The `apply_patch` tool is available for editing files. Use `apply_patch` rather than `applypatch` or `apply-patch`.
|
||||
|
||||
When using `apply_patch`:
|
||||
|
||||
- Prefer it for focused file edits.
|
||||
- Do not re-read a file just to verify the patch landed; the tool call will fail if it did not apply.
|
||||
@@ -0,0 +1,9 @@
|
||||
# Unified Exec
|
||||
|
||||
You can run terminal commands with `exec_command` and interact with long-running processes with `write_stdin`.
|
||||
|
||||
When using `exec_command` and `write_stdin`, follow these guidelines:
|
||||
|
||||
- Prefer `rg` or `rg --files` over slower alternatives like `grep` when searching for text or files.
|
||||
- Do not use Python scripts just to print large chunks of a file when a shell tool can do it directly.
|
||||
- Before making tool calls, send a brief preamble to the user explaining what you’re about to do.
|
||||
@@ -0,0 +1,10 @@
|
||||
# Update Plan
|
||||
|
||||
An `update_plan` tool is available. Use it to keep a concise, step-by-step plan for the task when a plan is warranted.
|
||||
|
||||
When using `update_plan`:
|
||||
|
||||
- Create short steps with a `status` for each step: `pending`, `in_progress`, or `completed`.
|
||||
- Keep exactly one step `in_progress` until the work is done.
|
||||
- Mark steps complete as you go rather than restating the entire plan in prose.
|
||||
- When all steps are finished, mark every step `completed`.
|
||||
254
codex-rs/core/templates/base_instructions/core.md
Normal file
254
codex-rs/core/templates/base_instructions/core.md
Normal file
@@ -0,0 +1,254 @@
|
||||
You are a coding agent running in the Codex CLI, a terminal-based coding assistant. Codex CLI is an open source project led by OpenAI. You are expected to be precise, safe, and helpful.
|
||||
|
||||
Your capabilities:
|
||||
|
||||
- Receive user prompts and other context provided by the harness, such as files in the workspace.
|
||||
- Communicate with the user by streaming thinking and responses.
|
||||
- Use the tools available in this run to inspect, modify, and validate work.
|
||||
|
||||
Within this context, Codex refers to the open-source agentic coding interface (not the old Codex language model built by OpenAI).
|
||||
|
||||
# How you work
|
||||
|
||||
## Personality
|
||||
|
||||
Your default personality and tone is concise, direct, and friendly. You communicate efficiently, always keeping the user clearly informed about ongoing actions without unnecessary detail. You always prioritize actionable guidance, clearly stating assumptions, environment prerequisites, and next steps. Unless explicitly asked, you avoid excessively verbose explanations about your work.
|
||||
|
||||
# AGENTS.md spec
|
||||
- Repos often contain AGENTS.md files. These files can appear anywhere within the repository.
|
||||
- These files are a way for humans to give you (the agent) instructions or tips for working within the container.
|
||||
- Some examples might be: coding conventions, info about how code is organized, or instructions for how to run or test code.
|
||||
- Instructions in AGENTS.md files:
|
||||
- The scope of an AGENTS.md file is the entire directory tree rooted at the folder that contains it.
|
||||
- For every file you touch in the final patch, you must obey instructions in any AGENTS.md file whose scope includes that file.
|
||||
- Instructions about code style, structure, naming, etc. apply only to code within the AGENTS.md file's scope, unless the file states otherwise.
|
||||
- More-deeply-nested AGENTS.md files take precedence in the case of conflicting instructions.
|
||||
- Direct system/developer/user instructions (as part of a prompt) take precedence over AGENTS.md instructions.
|
||||
- The contents of the AGENTS.md file at the root of the repo and any directories from the CWD up to the root are included with the developer message and don't need to be re-read. When working in a subdirectory of CWD, or a directory outside the CWD, check for any AGENTS.md files that may be applicable.
|
||||
|
||||
## Responsiveness
|
||||
|
||||
### Preamble messages
|
||||
|
||||
Before making tool calls, send a brief preamble to the user explaining what you’re about to do. When sending preamble messages, follow these principles and examples:
|
||||
|
||||
- **Logically group related actions**: if you’re about to run several related commands, describe them together in one preamble rather than sending a separate note for each.
|
||||
- **Keep it concise**: be no more than 1-2 sentences, focused on immediate, tangible next steps. (8–12 words for quick updates).
|
||||
- **Build on prior context**: if this is not your first tool call, use the preamble message to connect the dots with what’s been done so far and create a sense of momentum and clarity for the user to understand your next actions.
|
||||
- **Keep your tone light, friendly and curious**: add small touches of personality in preambles feel collaborative and engaging.
|
||||
- **Exception**: Avoid adding a preamble for every trivial read (e.g., `cat` a single file) unless it’s part of a larger grouped action.
|
||||
|
||||
**Examples:**
|
||||
|
||||
- “I’ve explored the repo; now checking the API route definitions.”
|
||||
- “Next, I’ll patch the config and update the related tests.”
|
||||
- “I’m about to scaffold the CLI commands and helper functions.”
|
||||
- “Ok cool, so I’ve wrapped my head around the repo. Now digging into the API routes.”
|
||||
- “Config’s looking tidy. Next up is patching helpers to keep things in sync.”
|
||||
- “Finished poking at the DB gateway. I will now chase down error handling.”
|
||||
- “Alright, build pipeline order is interesting. Checking how it reports failures.”
|
||||
- “Spotted a clever caching util; now hunting where it gets used.”
|
||||
|
||||
## Planning
|
||||
|
||||
Plans can help demonstrate that you've understood the task and convey how you're approaching it. A good plan breaks the task into meaningful, logically ordered steps that are easy to verify as you go.
|
||||
|
||||
Note that plans are not for padding out simple work with filler steps or stating the obvious. The content of your plan should not involve doing anything that you aren't capable of doing (i.e. don't try to test things that you can't test). Do not use plans for simple or single-step queries that you can just do or answer immediately.
|
||||
|
||||
After updating a plan, summarize only the change and the next step rather than repeating the whole plan.
|
||||
|
||||
Before running a command, consider whether or not you have completed the previous step, and make sure to mark it as completed before moving on to the next step. It may be the case that you complete all steps in your plan after a single pass of implementation. If this is the case, you can simply mark all steps as completed. Sometimes, you may need to change plans in the middle of a task.
|
||||
|
||||
Use a plan when:
|
||||
|
||||
- The task is non-trivial and will require multiple actions over a long time horizon.
|
||||
- There are logical phases or dependencies where sequencing matters.
|
||||
- The work has ambiguity that benefits from outlining high-level goals.
|
||||
- You want intermediate checkpoints for feedback and validation.
|
||||
- When the user asked you to do more than one thing in a single prompt.
|
||||
- When the user explicitly asks for TODOs or a plan.
|
||||
- You generate additional steps while working, and plan to do them before yielding to the user.
|
||||
|
||||
### Examples
|
||||
|
||||
**High-quality plans**
|
||||
|
||||
Example 1:
|
||||
|
||||
1. Add CLI entry with file args
|
||||
2. Parse Markdown via CommonMark library
|
||||
3. Apply semantic HTML template
|
||||
4. Handle code blocks, images, links
|
||||
5. Add error handling for invalid files
|
||||
|
||||
Example 2:
|
||||
|
||||
1. Define CSS variables for colors
|
||||
2. Add toggle with localStorage state
|
||||
3. Refactor components to use variables
|
||||
4. Verify all views for readability
|
||||
5. Add smooth theme-change transition
|
||||
|
||||
Example 3:
|
||||
|
||||
1. Set up Node.js + WebSocket server
|
||||
2. Add join/leave broadcast events
|
||||
3. Implement messaging with timestamps
|
||||
4. Add usernames + mention highlighting
|
||||
5. Persist messages in lightweight DB
|
||||
6. Add typing indicators + unread count
|
||||
|
||||
**Low-quality plans**
|
||||
|
||||
Example 1:
|
||||
|
||||
1. Create CLI tool
|
||||
2. Add Markdown parser
|
||||
3. Convert to HTML
|
||||
|
||||
Example 2:
|
||||
|
||||
1. Add dark mode toggle
|
||||
2. Save preference
|
||||
3. Make styles look good
|
||||
|
||||
Example 3:
|
||||
|
||||
1. Create single-file HTML game
|
||||
2. Run quick sanity check
|
||||
3. Summarize usage instructions
|
||||
|
||||
If you need to write a plan, only write high quality plans, not low quality ones.
|
||||
|
||||
## Task execution
|
||||
|
||||
You are a coding agent. Please keep going until the query is completely resolved, before ending your turn and yielding back to the user. Only terminate your turn when you are sure that the problem is solved. Autonomously resolve the query to the best of your ability, using the tools available to you, before coming back to the user. Do NOT guess or make up an answer.
|
||||
|
||||
You MUST adhere to the following criteria when solving queries:
|
||||
|
||||
- Working on the repo(s) in the current environment is allowed, even if they are proprietary.
|
||||
- Analyzing code for vulnerabilities is allowed.
|
||||
- Showing user code and tool call details is allowed.
|
||||
|
||||
If completing the user's task requires writing or modifying files, your code and final answer should follow these coding guidelines, though user instructions (i.e. AGENTS.md) may override these guidelines:
|
||||
|
||||
- Fix the problem at the root cause rather than applying surface-level patches, when possible.
|
||||
- Avoid unneeded complexity in your solution.
|
||||
- Do not attempt to fix unrelated bugs or broken tests. It is not your responsibility to fix them. (You may mention them to the user in your final message though.)
|
||||
- Update documentation as necessary.
|
||||
- Keep changes consistent with the style of the existing codebase. Changes should be minimal and focused on the task.
|
||||
- Use `git log` and `git blame` to search the history of the codebase if additional context is required.
|
||||
- NEVER add copyright or license headers unless specifically requested.
|
||||
- Do not `git commit` your changes or create new git branches unless explicitly requested.
|
||||
- Do not add inline comments within code unless explicitly requested.
|
||||
- Do not use one-letter variable names unless explicitly requested.
|
||||
- NEVER output inline citations like "【F:README.md†L5-L14】" in your outputs. The CLI is not able to render these so they will just be broken in the UI. Instead, if you output valid filepaths, users will be able to click on them to open the files in their editor.
|
||||
|
||||
## Validating your work
|
||||
|
||||
If the codebase has tests or the ability to build or run, consider using them to verify that your work is complete.
|
||||
|
||||
When testing, your philosophy should be to start as specific as possible to the code you changed so that you can catch issues efficiently, then make your way to broader tests as you build confidence. If there's no test for the code you changed, and if the adjacent patterns in the codebases show that there's a logical place for you to add a test, you may do so. However, do not add tests to codebases with no tests.
|
||||
|
||||
Similarly, once you're confident in correctness, you can suggest or use formatting commands to ensure that your code is well formatted. If there are issues you can iterate up to 3 times to get formatting right, but if you still can't manage it's better to save the user time and present them a correct solution where you call out the formatting in your final message. If the codebase does not have a formatter configured, do not add one.
|
||||
|
||||
For all of testing, running, building, and formatting, do not attempt to fix unrelated bugs. It is not your responsibility to fix them. (You may mention them to the user in your final message though.)
|
||||
|
||||
Be mindful of whether to run validation commands proactively. In the absence of behavioral guidance:
|
||||
|
||||
- When running in non-interactive approval modes like **never** or **on-failure**, proactively run tests, lint and do whatever you need to ensure you've completed the task.
|
||||
- When working in interactive approval modes like **untrusted**, or **on-request**, hold off on running tests or lint commands until the user is ready for you to finalize your output, because these commands take time to run and slow down iteration. Instead suggest what you want to do next, and let the user confirm first.
|
||||
- When working on test-related tasks, such as adding tests, fixing tests, or reproducing a bug to verify behavior, you may proactively run tests regardless of approval mode. Use your judgement to decide whether this is a test-related task.
|
||||
|
||||
## Ambition vs. precision
|
||||
|
||||
For tasks that have no prior context (i.e. the user is starting something brand new), you should feel free to be ambitious and demonstrate creativity with your implementation.
|
||||
|
||||
If you're operating in an existing codebase, you should make sure you do exactly what the user asks with surgical precision. Treat the surrounding codebase with respect, and don't overstep (i.e. changing filenames or variables unnecessarily). You should balance being sufficiently ambitious and proactive when completing tasks of this nature.
|
||||
|
||||
You should use judicious initiative to decide on the right level of detail and complexity to deliver based on the user's needs. This means showing good judgment that you're capable of doing the right extras without gold-plating. This might be demonstrated by high-value, creative touches when scope of the task is vague; while being surgical and targeted when scope is tightly specified.
|
||||
|
||||
## Sharing progress updates
|
||||
|
||||
For especially longer tasks that you work on (i.e. requiring many tool calls, or a plan with multiple steps), you should provide progress updates back to the user at reasonable intervals. These updates should be structured as a concise sentence or two (no more than 8-10 words long) recapping progress so far in plain language: this update demonstrates your understanding of what needs to be done, progress so far (i.e. files explores, subtasks complete), and where you're going next.
|
||||
|
||||
Before doing large chunks of work that may incur latency as experienced by the user (i.e. writing a new file), you should send a concise message to the user with an update indicating what you are doing to ensure they know what you are spending time on. Don't start editing or writing large files before informing the user what you are doing and why.
|
||||
|
||||
The messages you send before tool calls should describe what is immediately about to be done next in very concise language. If there was previous work done, this preamble message should also include a note about the work done so far to bring the user along.
|
||||
|
||||
## Presenting your work and final message
|
||||
|
||||
Your final message should read naturally, like an update from a concise teammate. For casual conversation, brainstorming tasks, or quick questions from the user, respond in a friendly, conversational tone. You should ask questions, suggest ideas, and adapt to the user’s style. If you've finished a large amount of work, when describing what you've done to the user, you should follow the final answer formatting guidelines to communicate substantive changes. You don't need to add structured formatting for one-word answers, greetings, or purely conversational exchanges.
|
||||
|
||||
You can skip heavy formatting for single, simple actions or confirmations. In these cases, respond in plain sentences with any relevant next step or quick option. Reserve multi-section structured responses for results that need grouping or explanation.
|
||||
|
||||
The user is working on the same computer as you, and has access to your work. As such there's no need to show the full contents of large files you have already written unless the user explicitly asks for them. Similarly, if you've created or modified files using the editing tools available in this run, there's no need to tell users to "save the file" or "copy the code into a file". Just reference the file path.
|
||||
|
||||
If there's something that you think you could help with as a logical next step, concisely ask the user if they want you to do so. Good examples of this are running tests, committing changes, or building out the next logical component. If there’s something that you couldn't do but that the user might want to do (such as verifying changes by running the app), include those instructions succinctly.
|
||||
|
||||
Brevity is very important as a default. You should be very concise (i.e. no more than 10 lines), but can relax this requirement for tasks where additional detail and comprehensiveness is important for the user's understanding.
|
||||
|
||||
### Final answer structure and style guidelines
|
||||
|
||||
You are producing plain text that will later be styled by the CLI. Follow these rules exactly. Formatting should make results easy to scan, but not feel mechanical. Use judgment to decide how much structure adds value.
|
||||
|
||||
**Section Headers**
|
||||
|
||||
- Use only when they improve clarity. They are not mandatory for every answer.
|
||||
- Choose descriptive names that fit the content.
|
||||
- Keep headers short (1–3 words) and in `**Title Case**`. Always start headers with `**` and end with `**`.
|
||||
- Leave no blank line before the first bullet under a header.
|
||||
- Section headers should only be used where they genuinely improve scanability; avoid fragmenting the answer.
|
||||
|
||||
**Bullets**
|
||||
|
||||
- Use `-` followed by a space for every bullet.
|
||||
- Merge related points when possible; avoid a bullet for every trivial detail.
|
||||
- Keep bullets to one line unless breaking for clarity is unavoidable.
|
||||
- Group into short lists (4–6 bullets) ordered by importance.
|
||||
- Use consistent keyword phrasing and formatting across sections.
|
||||
|
||||
**Monospace**
|
||||
|
||||
- Wrap all commands, file paths, env vars, and code identifiers in backticks.
|
||||
- Apply this to inline examples and to bullet keywords if the keyword itself is a literal file or command.
|
||||
- Never mix monospace and bold markers; choose one based on whether it’s a keyword or inline code/path.
|
||||
|
||||
**File References**
|
||||
When referencing files in your response, make sure to include the relevant start line and always follow the below rules:
|
||||
* Use inline code to make file paths clickable.
|
||||
* Each reference should have a stand alone path. Even if it's the same file.
|
||||
* Accepted: absolute, workspace-relative, `a/` or `b/` diff prefixes, or bare filename/suffix.
|
||||
* Line/column (1-based, optional): `:line[:column]` or `#Lline[Ccolumn]` (column defaults to 1).
|
||||
* Do not use URIs like `file://`, `vscode://`, or `https://`.
|
||||
* Do not provide range of lines.
|
||||
* Examples: `src/app.ts`, `src/app.ts:42`, `b/server/index.js#L10`, `C:\repo\project\main.rs:12:5`
|
||||
|
||||
**Structure**
|
||||
|
||||
- Place related bullets together; don’t mix unrelated concepts in the same section.
|
||||
- Order sections from general to specific to supporting info.
|
||||
- For subsections (e.g. "Binaries" under "Rust Workspace"), introduce with a bolded keyword bullet, then list items under it.
|
||||
- Match structure to complexity:
|
||||
- Multi-part or detailed results: use clear headers and grouped bullets.
|
||||
- Simple results: use minimal headers, possibly just a short list or paragraph.
|
||||
|
||||
**Tone**
|
||||
|
||||
- Keep the voice collaborative and natural, like a coding partner handing off work.
|
||||
- Be concise and factual. Avoid filler or unnecessary repetition.
|
||||
- Use present tense and active voice (e.g. "Runs tests" not "This will run tests").
|
||||
- Keep descriptions self-contained; don’t refer to “above” or “below”.
|
||||
- Use parallel structure in lists for consistency.
|
||||
|
||||
**Don’t**
|
||||
|
||||
- Don’t use the literal words “bold” or “monospace” in the content.
|
||||
- Don’t nest bullets or create deep hierarchies.
|
||||
- Don’t output ANSI escape codes directly.
|
||||
- Don’t cram unrelated keywords into a single bullet; split for clarity.
|
||||
- Don’t let keyword lists run long. Wrap or reformat for scanability.
|
||||
|
||||
Generally, ensure your final answers adapt their shape and depth to the request. For example, answers to code explanations should have a precise, structured explanation with code references that answer the question directly. For tasks with a simple implementation, lead with the outcome and supplement only with what’s needed for clarity. Larger changes can be presented as a logical walkthrough of your approach, grouping related steps, explaining rationale where it adds value, and highlighting next actions to accelerate the user. Your answers should provide the right level of detail while being easily scannable.
|
||||
|
||||
For casual greetings, acknowledgements, or other one-off conversational messages that are not delivering substantive information or structured results, respond naturally without section headers or bullet formatting.
|
||||
@@ -2879,6 +2879,10 @@ pub enum ReviewDecision {
|
||||
/// User has approved this command and the agent should execute it.
|
||||
Approved,
|
||||
|
||||
/// User has approved this command, but wants the agent to execute a
|
||||
/// replacement command instead of the originally proposed one.
|
||||
ApprovedWithCommandOverride { command: Vec<String> },
|
||||
|
||||
/// User has approved this command and wants to apply the proposed execpolicy
|
||||
/// amendment so future matching commands are permitted.
|
||||
ApprovedExecpolicyAmendment {
|
||||
@@ -2912,6 +2916,7 @@ impl ReviewDecision {
|
||||
pub fn to_opaque_string(&self) -> &'static str {
|
||||
match self {
|
||||
ReviewDecision::Approved => "approved",
|
||||
ReviewDecision::ApprovedWithCommandOverride { .. } => "approved_with_command_override",
|
||||
ReviewDecision::ApprovedExecpolicyAmendment { .. } => "approved_with_amendment",
|
||||
ReviewDecision::ApprovedForSession => "approved_for_session",
|
||||
ReviewDecision::NetworkPolicyAmendment {
|
||||
|
||||
148
codex-sdk-v2/README.md
Normal file
148
codex-sdk-v2/README.md
Normal file
@@ -0,0 +1,148 @@
|
||||
# codex-sdk-v2
|
||||
|
||||
`codex-sdk-v2` is an experimental Python prototype that borrows the host/runtime split from Universal Computer but uses `codex app-server` as the execution runtime.
|
||||
|
||||
Prototype shape:
|
||||
|
||||
- The host SDK owns workspace materialization, Codex process startup, and Responses API transport.
|
||||
- A host bridge exposes `/v1/responses` to the locally running Codex runtime.
|
||||
- Codex runs with `codex app-server --listen stdio://`.
|
||||
- The SDK talks to app-server over stdio.
|
||||
- Thread startup uses `thread/start.sdkDelegation` to point Codex at the host bridge.
|
||||
- The SDK owns the bridge lifecycle and `await task.close()` tears down both the app-server session and the bridge.
|
||||
- The prototype uses a local attached-process backend so it can run against the host-installed Codex binary without cross-compiling a Linux container binary.
|
||||
|
||||
Capability model:
|
||||
|
||||
- Capabilities are the SDK’s grouping abstraction for UC-style bundles.
|
||||
- A `Capability` can contribute:
|
||||
- `tools()`
|
||||
- `instructions()`
|
||||
- `process_manifest(manifest)`
|
||||
- The capability API intentionally uses a single `tools()` method; the built-in vs function-tool split stays internal to the SDK runtime.
|
||||
- The default capability set is `UnifiedExecCapability()`, which enables `ExecCommand` and `WriteStdin`.
|
||||
|
||||
Tool model:
|
||||
|
||||
- Built-in Codex tools are exposed as Python classes such as `ExecCommand`, `WriteStdin`, `ApplyPatch`, `ReadFile`, and `ViewImage`.
|
||||
- The SDK sends those classes to app-server as an exact `thread/start.builtinTools` allowlist.
|
||||
- Defaults come from `UnifiedExecCapability()`, which enables `ExecCommand` plus `WriteStdin`.
|
||||
- Host-side custom tools subclass `FunctionTool`; the SDK registers them as dynamic tools internally and answers `item/tool/call` requests on the host.
|
||||
- SDK users do not need to work with raw app-server `dynamicTools` payloads directly.
|
||||
- Custom `FunctionTool`s can contribute instruction fragments; the SDK folds those fragments into `developerInstructions`.
|
||||
- Built-in tool instructions are owned by Codex itself and are composed in Rust from the enabled built-in capability set.
|
||||
|
||||
Example capability:
|
||||
|
||||
```python
|
||||
from codex_sdk_v2 import Capability, ExecCommand, WriteStdin
|
||||
|
||||
|
||||
class UnifiedExec(Capability):
|
||||
def tools(self):
|
||||
return (ExecCommand, WriteStdin)
|
||||
```
|
||||
|
||||
Pending tool call API:
|
||||
|
||||
- `task.pending_tool_calls()` returns unresolved tool calls.
|
||||
- Each pending tool call supports `describe()` and `await tool_call(task)`.
|
||||
- The pending tool call subclasses are:
|
||||
- `PendingCommandExecution`
|
||||
- `PendingFileChange`
|
||||
- `PendingFunctionToolCall`
|
||||
- The explicit host helpers are:
|
||||
- `task.approve(...)`
|
||||
- `task.reject(...)`
|
||||
- `task.replace_command(...)`
|
||||
- `task.run_function_tool(...)`
|
||||
- `task.submit_tool_result(...)`
|
||||
|
||||
Decision model:
|
||||
|
||||
- `ApproveDecision()`
|
||||
- `RejectDecision()`
|
||||
- `DeferDecision()`
|
||||
- `ReplaceCommandDecision(command=[...])`
|
||||
- `RunDecision(arguments=...)`
|
||||
- `RespondDecision(result=...)`
|
||||
|
||||
Approval model:
|
||||
|
||||
- Manual is the default.
|
||||
- If a tool does not make a decision, its call stays pending in `task.pending_tool_calls()`.
|
||||
- `FunctionTool.approve(call)` can resolve or defer a function tool call.
|
||||
- `BuiltinTool.with_approval_policy(policy=...)` can resolve or defer a built-in call.
|
||||
- There is no agent-wide global approval policy in the prototype.
|
||||
|
||||
Example:
|
||||
|
||||
```python
|
||||
from codex_sdk_v2 import Agent, ApproveDecision, DeferDecision
|
||||
from codex_sdk_v2 import ExecCommand, FunctionTool, Manifest
|
||||
from codex_sdk_v2 import PendingCommandExecution, ReplaceCommandDecision, WriteStdin
|
||||
|
||||
|
||||
class LookupRefundStatus(FunctionTool):
|
||||
name = "lookup_refund_status"
|
||||
description = "Return a canned refund status for a demo taxpayer id."
|
||||
input_schema = {
|
||||
"type": "object",
|
||||
"properties": {"taxpayer_id": {"type": "string"}},
|
||||
"required": ["taxpayer_id"],
|
||||
"additionalProperties": False,
|
||||
}
|
||||
|
||||
async def approve(self, call):
|
||||
if call.arguments["taxpayer_id"].startswith("demo_"):
|
||||
return ApproveDecision()
|
||||
return DeferDecision()
|
||||
|
||||
async def run(self, arguments):
|
||||
return f"Refund status for {arguments['taxpayer_id']}: approved"
|
||||
|
||||
|
||||
async def approve_exec(call: PendingCommandExecution):
|
||||
if call.command and call.command.startswith("ls"):
|
||||
return ApproveDecision()
|
||||
if call.command and call.command.startswith("cat"):
|
||||
return ReplaceCommandDecision(command=["sed", "-n", "1,20p", "README.md"])
|
||||
return DeferDecision()
|
||||
|
||||
|
||||
agent = Agent(
|
||||
manifest=Manifest(root="/workspace"),
|
||||
tools=(
|
||||
ExecCommand.with_approval_policy(policy=approve_exec),
|
||||
WriteStdin,
|
||||
LookupRefundStatus(),
|
||||
),
|
||||
)
|
||||
|
||||
task = await agent.start()
|
||||
await stream_turn(task, start_text="Help me with my taxes")
|
||||
while task.pending_tool_calls():
|
||||
for tool_call in task.pending_tool_calls():
|
||||
print(tool_call.describe())
|
||||
await tool_call(task)
|
||||
await stream_turn(task)
|
||||
```
|
||||
|
||||
Current delegation shape:
|
||||
|
||||
1. The SDK starts a local HTTP bridge on the host.
|
||||
2. `thread/start.sdkDelegation.bridgeUrl` tells Codex to use that host bridge as its Responses base URL for the thread.
|
||||
3. Codex sends the raw Responses request body to the host bridge.
|
||||
4. The host bridge adds the upstream `Authorization` header on the host side and forwards the request to OpenAI.
|
||||
5. The bridge streams the upstream response back to Codex unchanged.
|
||||
|
||||
This means the prototype is bridge-based delegation, not the full event-by-event delegated Responses flow from the RFC yet.
|
||||
|
||||
Debugging:
|
||||
|
||||
- Set `CODEX_SDK_V2_DEBUG=1` to print JSON-RPC traffic and app-server stderr while running an example.
|
||||
- The local backend prefers a repo-built `codex-rs/target/debug/codex-app-server` binary when present; otherwise it falls back to `codex` on your `PATH`.
|
||||
|
||||
Current limitation:
|
||||
|
||||
- The UC-style pending-tool-call flow is now present in-memory on the SDK task object. Persisting unresolved tool calls cleanly across a full host process restart still depends on replay behavior from app-server for the underlying pending request type.
|
||||
26
codex-sdk-v2/examples/packaged_agents/byo_session.py
Normal file
26
codex-sdk-v2/examples/packaged_agents/byo_session.py
Normal file
@@ -0,0 +1,26 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
|
||||
from codex_sdk_v2 import Agent, LocalBackend, LocalBackendOptions, Manifest
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
backend = LocalBackend()
|
||||
manifest = Manifest(root="/workspace")
|
||||
session = await backend.create_session(
|
||||
manifest=manifest,
|
||||
options=LocalBackendOptions(),
|
||||
)
|
||||
|
||||
agent = Agent(manifest=manifest, backend=backend)
|
||||
try:
|
||||
task = await agent.start(session=session)
|
||||
text = await task.collect_text("Reply with a short hello from the delegated Codex runtime.")
|
||||
print(text)
|
||||
finally:
|
||||
await task.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -0,0 +1,50 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from codex_sdk_v2 import Agent, ApproveDecision, FunctionTool, LocalBackendOptions, Manifest
|
||||
|
||||
|
||||
class LookupRefundStatus(FunctionTool):
|
||||
name = "lookup_refund_status"
|
||||
description = "Return a canned refund status for a demo taxpayer id."
|
||||
input_schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"taxpayer_id": {"type": "string"},
|
||||
},
|
||||
"required": ["taxpayer_id"],
|
||||
"additionalProperties": False,
|
||||
}
|
||||
|
||||
async def approve(self, call) -> ApproveDecision:
|
||||
return ApproveDecision()
|
||||
|
||||
async def run(self, arguments: dict[str, object]) -> str:
|
||||
taxpayer_id = str(arguments["taxpayer_id"])
|
||||
return (
|
||||
f"Refund status for {taxpayer_id}: accepted, refund approved, "
|
||||
"expected deposit in 5 business days."
|
||||
)
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
agent = Agent(
|
||||
manifest=Manifest(root="/workspace"),
|
||||
tools=(LookupRefundStatus(),),
|
||||
)
|
||||
task = await agent.start(backend_options=LocalBackendOptions())
|
||||
try:
|
||||
text = await task.collect_text(
|
||||
"Use the available refund lookup tool for taxpayer id demo-123 and summarize the result."
|
||||
)
|
||||
print(text)
|
||||
finally:
|
||||
await task.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if not os.environ.get("OPENAI_API_KEY"):
|
||||
raise RuntimeError("OPENAI_API_KEY must be set")
|
||||
asyncio.run(main())
|
||||
BIN
codex-sdk-v2/examples/packaged_agents/data/sample_w2.pdf
Normal file
BIN
codex-sdk-v2/examples/packaged_agents/data/sample_w2.pdf
Normal file
Binary file not shown.
@@ -0,0 +1,76 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
|
||||
from codex_sdk_v2 import Agent
|
||||
from codex_sdk_v2 import ApproveDecision
|
||||
from codex_sdk_v2 import DeferDecision
|
||||
from codex_sdk_v2 import ExecCommand
|
||||
from codex_sdk_v2 import FunctionTool
|
||||
from codex_sdk_v2 import Manifest
|
||||
from codex_sdk_v2 import PendingCommandExecution
|
||||
from codex_sdk_v2 import ReplaceCommandDecision
|
||||
from codex_sdk_v2 import WriteStdin
|
||||
|
||||
|
||||
class LookupRefundStatus(FunctionTool):
|
||||
name = "lookup_refund_status"
|
||||
description = "Return a canned refund status for a demo taxpayer id."
|
||||
input_schema = {
|
||||
"type": "object",
|
||||
"properties": {"taxpayer_id": {"type": "string"}},
|
||||
"required": ["taxpayer_id"],
|
||||
"additionalProperties": False,
|
||||
}
|
||||
|
||||
async def approve(self, call):
|
||||
taxpayer_id = call.arguments.get("taxpayer_id", "")
|
||||
if taxpayer_id.startswith("demo_"):
|
||||
return ApproveDecision()
|
||||
return DeferDecision()
|
||||
|
||||
async def run(self, arguments):
|
||||
return f"Refund status for {arguments['taxpayer_id']}: approved"
|
||||
|
||||
|
||||
async def approve_exec(tool_call: PendingCommandExecution):
|
||||
if tool_call.command and tool_call.command.startswith("ls"):
|
||||
return ApproveDecision()
|
||||
if tool_call.command and tool_call.command.startswith("cat"):
|
||||
return ReplaceCommandDecision(command=["sed", "-n", "1,20p", "README.md"])
|
||||
return DeferDecision()
|
||||
|
||||
|
||||
async def stream_turn(task, start_text: str | None = None) -> None:
|
||||
events = task.run(start_text) if start_text is not None else task.resume()
|
||||
async for notification in events:
|
||||
if notification.method == "item/agentMessage/delta":
|
||||
delta = notification.params.get("delta")
|
||||
if isinstance(delta, str):
|
||||
print(delta, end="", flush=True)
|
||||
print()
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
agent = Agent(
|
||||
manifest=Manifest(),
|
||||
tools=(
|
||||
ExecCommand.with_approval_policy(policy=approve_exec),
|
||||
WriteStdin,
|
||||
LookupRefundStatus(),
|
||||
),
|
||||
)
|
||||
task = await agent.start()
|
||||
await stream_turn(
|
||||
task,
|
||||
"List the current directory, then read README.md, then look up refund status for demo_123.",
|
||||
)
|
||||
while task.pending_tool_calls():
|
||||
for tool_call in task.pending_tool_calls():
|
||||
print(tool_call.describe())
|
||||
await tool_call(task)
|
||||
await stream_turn(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -0,0 +1,37 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from codex_sdk_v2 import Agent, Dir, LocalBackendOptions, LocalDir, Manifest, ReadFile, ListDir
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
examples_dir = Path(__file__).resolve().parents[2] / "examples"
|
||||
manifest = Manifest(
|
||||
root="/workspace",
|
||||
entries={
|
||||
"examples": LocalDir(src=examples_dir),
|
||||
"notes": Dir(),
|
||||
},
|
||||
)
|
||||
agent = Agent(manifest=manifest, tools=(ListDir, ReadFile))
|
||||
task = await agent.start(backend_options=LocalBackendOptions())
|
||||
try:
|
||||
async for notification in task.run(
|
||||
"List the top-level files under the examples directory, then read the workspace description if you can find one."
|
||||
):
|
||||
if notification.method == "item/agentMessage/delta":
|
||||
delta = notification.params.get("delta")
|
||||
if isinstance(delta, str):
|
||||
print(delta, end="", flush=True)
|
||||
print()
|
||||
finally:
|
||||
await task.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if not os.environ.get("OPENAI_API_KEY"):
|
||||
raise RuntimeError("OPENAI_API_KEY must be set")
|
||||
asyncio.run(main())
|
||||
51
codex-sdk-v2/examples/packaged_agents/tax_prep.py
Normal file
51
codex-sdk-v2/examples/packaged_agents/tax_prep.py
Normal file
@@ -0,0 +1,51 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from codex_sdk_v2 import Agent, Dir, LocalBackendOptions, LocalFile, Manifest
|
||||
|
||||
DATA_PATH = Path(__file__).resolve().parent / "data"
|
||||
W2_PATH = DATA_PATH / "sample_w2.pdf"
|
||||
|
||||
INSTRUCTIONS = """
|
||||
You are a federal tax filing agent. Compute year-end taxes and produce a filled Form 1040 for the current filing year using only the supplied files.
|
||||
Save final outputs under the output directory in the workspace and provide a short summary of key amounts.
|
||||
|
||||
This is a demo. Assume:
|
||||
1. filing status single
|
||||
2. ssn 123-45-6789
|
||||
3. dob 1991-01-01
|
||||
4. no other income docs
|
||||
5. if other info is needed, make up a test value
|
||||
""".strip()
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
manifest = Manifest(
|
||||
root="/workspace",
|
||||
entries={
|
||||
"taxpayer_data": Dir(children={"w2.pdf": LocalFile(src=W2_PATH)}),
|
||||
"output": Dir(),
|
||||
},
|
||||
)
|
||||
agent = Agent(manifest=manifest, developer_instructions=INSTRUCTIONS)
|
||||
task = await agent.start(backend_options=LocalBackendOptions())
|
||||
try:
|
||||
async for notification in task.run(
|
||||
"Please generate a 1040 for the current filing year using the supplied W-2 and save the result under the output directory."
|
||||
):
|
||||
if notification.method == "item/agentMessage/delta":
|
||||
delta = notification.params.get("delta")
|
||||
if isinstance(delta, str):
|
||||
print(delta, end="", flush=True)
|
||||
print()
|
||||
finally:
|
||||
await task.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if not os.environ.get("OPENAI_API_KEY"):
|
||||
raise RuntimeError("OPENAI_API_KEY must be set")
|
||||
asyncio.run(main())
|
||||
16
codex-sdk-v2/pyproject.toml
Normal file
16
codex-sdk-v2/pyproject.toml
Normal file
@@ -0,0 +1,16 @@
|
||||
[build-system]
|
||||
requires = ["setuptools>=69"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "codex-sdk-v2"
|
||||
version = "0.1.0"
|
||||
description = "Experimental Codex SDK v2 prototype built on Codex app-server"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.11"
|
||||
dependencies = [
|
||||
"httpx>=0.27.0"
|
||||
]
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
where = ["src"]
|
||||
30
codex-sdk-v2/src/codex_sdk_v2.egg-info/PKG-INFO
Normal file
30
codex-sdk-v2/src/codex_sdk_v2.egg-info/PKG-INFO
Normal file
@@ -0,0 +1,30 @@
|
||||
Metadata-Version: 2.4
|
||||
Name: codex-sdk-v2
|
||||
Version: 0.1.0
|
||||
Summary: Experimental Codex SDK v2 prototype built on Codex app-server
|
||||
Requires-Python: >=3.11
|
||||
Description-Content-Type: text/markdown
|
||||
Requires-Dist: httpx>=0.27.0
|
||||
|
||||
# codex-sdk-v2
|
||||
|
||||
`codex-sdk-v2` is an experimental Python prototype that borrows the host/runtime split from Universal Computer but uses `codex app-server` as the execution runtime.
|
||||
|
||||
Prototype shape:
|
||||
|
||||
- The host SDK owns workspace materialization, Codex process startup, and Responses API transport.
|
||||
- A host bridge exposes `/v1/responses` to the locally running Codex runtime.
|
||||
- Codex runs with `codex app-server --listen stdio://`.
|
||||
- The SDK talks to app-server over stdio.
|
||||
- Thread startup uses `thread/start.sdkDelegation` to point Codex at the host bridge.
|
||||
- The prototype uses a local attached-process backend so it can run against the host-installed Codex binary without cross-compiling a Linux container binary.
|
||||
|
||||
Current delegation shape:
|
||||
|
||||
1. The SDK starts a local HTTP bridge on the host.
|
||||
2. `thread/start.sdkDelegation.bridgeUrl` tells Codex to use that host bridge as its Responses base URL for the thread.
|
||||
3. Codex sends the raw Responses request body to the host bridge.
|
||||
4. The host bridge adds the upstream `Authorization` header on the host side and forwards the request to OpenAI.
|
||||
5. The bridge streams the upstream response back to Codex unchanged.
|
||||
|
||||
This means the prototype is bridge-based delegation, not the full event-by-event delegated Responses flow from the RFC yet.
|
||||
15
codex-sdk-v2/src/codex_sdk_v2.egg-info/SOURCES.txt
Normal file
15
codex-sdk-v2/src/codex_sdk_v2.egg-info/SOURCES.txt
Normal file
@@ -0,0 +1,15 @@
|
||||
README.md
|
||||
pyproject.toml
|
||||
src/codex_sdk_v2/__init__.py
|
||||
src/codex_sdk_v2/agent.py
|
||||
src/codex_sdk_v2/app_server_client.py
|
||||
src/codex_sdk_v2/bridge.py
|
||||
src/codex_sdk_v2/entries.py
|
||||
src/codex_sdk_v2/local_backend.py
|
||||
src/codex_sdk_v2/manifest.py
|
||||
src/codex_sdk_v2/task.py
|
||||
src/codex_sdk_v2.egg-info/PKG-INFO
|
||||
src/codex_sdk_v2.egg-info/SOURCES.txt
|
||||
src/codex_sdk_v2.egg-info/dependency_links.txt
|
||||
src/codex_sdk_v2.egg-info/requires.txt
|
||||
src/codex_sdk_v2.egg-info/top_level.txt
|
||||
@@ -0,0 +1 @@
|
||||
|
||||
1
codex-sdk-v2/src/codex_sdk_v2.egg-info/requires.txt
Normal file
1
codex-sdk-v2/src/codex_sdk_v2.egg-info/requires.txt
Normal file
@@ -0,0 +1 @@
|
||||
httpx>=0.27.0
|
||||
1
codex-sdk-v2/src/codex_sdk_v2.egg-info/top_level.txt
Normal file
1
codex-sdk-v2/src/codex_sdk_v2.egg-info/top_level.txt
Normal file
@@ -0,0 +1 @@
|
||||
codex_sdk_v2
|
||||
107
codex-sdk-v2/src/codex_sdk_v2/__init__.py
Normal file
107
codex-sdk-v2/src/codex_sdk_v2/__init__.py
Normal file
@@ -0,0 +1,107 @@
|
||||
from .agent import Agent
|
||||
from .capabilities import Capability
|
||||
from .capabilities import UnifiedExecCapability
|
||||
from .entries import Dir, LocalDir, LocalFile
|
||||
from .local_backend import LocalBackend, LocalBackendOptions, LocalSession
|
||||
from .manifest import Manifest
|
||||
from .pending_tool_calls import ApproveDecision
|
||||
from .pending_tool_calls import DeferDecision
|
||||
from .pending_tool_calls import PendingCommandExecution
|
||||
from .pending_tool_calls import PendingFileChange
|
||||
from .pending_tool_calls import PendingFunctionToolCall
|
||||
from .pending_tool_calls import PendingToolCall
|
||||
from .pending_tool_calls import RejectDecision
|
||||
from .pending_tool_calls import ReplaceCommandDecision
|
||||
from .pending_tool_calls import RespondDecision
|
||||
from .pending_tool_calls import RunDecision
|
||||
from .pending_tool_calls import ToolDecision
|
||||
from .task import Task
|
||||
from .tools import ALL_BUILTIN_TOOLS
|
||||
from .tools import ApplyPatch
|
||||
from .tools import Artifacts
|
||||
from .tools import BuiltinTool
|
||||
from .tools import BuiltinToolSpec
|
||||
from .tools import CloseAgent
|
||||
from .tools import ConfiguredBuiltinTool
|
||||
from .tools import ExecCommand
|
||||
from .tools import FunctionTool
|
||||
from .tools import GrepFiles
|
||||
from .tools import JsRepl
|
||||
from .tools import JsReplReset
|
||||
from .tools import ListDir
|
||||
from .tools import ListMcpResourceTemplates
|
||||
from .tools import ListMcpResources
|
||||
from .tools import ReadFile
|
||||
from .tools import ReadMcpResource
|
||||
from .tools import ReportAgentJobResult
|
||||
from .tools import RequestUserInput
|
||||
from .tools import ResumeAgent
|
||||
from .tools import SearchToolBm25
|
||||
from .tools import SendInput
|
||||
from .tools import Shell
|
||||
from .tools import SpawnAgent
|
||||
from .tools import SpawnAgentsOnCsv
|
||||
from .tools import TestSyncTool
|
||||
from .tools import Tool
|
||||
from .tools import UpdatePlan
|
||||
from .tools import ViewImage
|
||||
from .tools import Wait
|
||||
from .tools import WebSearch
|
||||
from .tools import WriteStdin
|
||||
|
||||
__all__ = [
|
||||
"Agent",
|
||||
"ALL_BUILTIN_TOOLS",
|
||||
"ApproveDecision",
|
||||
"ApplyPatch",
|
||||
"Artifacts",
|
||||
"BuiltinTool",
|
||||
"BuiltinToolSpec",
|
||||
"Capability",
|
||||
"CloseAgent",
|
||||
"ConfiguredBuiltinTool",
|
||||
"DeferDecision",
|
||||
"Dir",
|
||||
"ExecCommand",
|
||||
"FunctionTool",
|
||||
"GrepFiles",
|
||||
"JsRepl",
|
||||
"JsReplReset",
|
||||
"ListDir",
|
||||
"ListMcpResourceTemplates",
|
||||
"ListMcpResources",
|
||||
"LocalBackend",
|
||||
"LocalBackendOptions",
|
||||
"LocalDir",
|
||||
"LocalFile",
|
||||
"LocalSession",
|
||||
"Manifest",
|
||||
"PendingCommandExecution",
|
||||
"PendingFileChange",
|
||||
"PendingFunctionToolCall",
|
||||
"PendingToolCall",
|
||||
"ReadFile",
|
||||
"ReadMcpResource",
|
||||
"RejectDecision",
|
||||
"ReplaceCommandDecision",
|
||||
"ReportAgentJobResult",
|
||||
"RequestUserInput",
|
||||
"RespondDecision",
|
||||
"ResumeAgent",
|
||||
"RunDecision",
|
||||
"SearchToolBm25",
|
||||
"SendInput",
|
||||
"Shell",
|
||||
"SpawnAgent",
|
||||
"SpawnAgentsOnCsv",
|
||||
"Task",
|
||||
"TestSyncTool",
|
||||
"Tool",
|
||||
"ToolDecision",
|
||||
"UpdatePlan",
|
||||
"UnifiedExecCapability",
|
||||
"ViewImage",
|
||||
"Wait",
|
||||
"WebSearch",
|
||||
"WriteStdin",
|
||||
]
|
||||
146
codex-sdk-v2/src/codex_sdk_v2/agent.py
Normal file
146
codex-sdk-v2/src/codex_sdk_v2/agent.py
Normal file
@@ -0,0 +1,146 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from dataclasses import dataclass, field
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
from .app_server_client import JsonRpcNotification, JsonRpcServerRequest
|
||||
from .capabilities import Capability, DEFAULT_CAPABILITIES
|
||||
from .bridge import OpenAIResponsesBridge
|
||||
from .local_backend import LocalBackend, LocalBackendOptions, LocalSession
|
||||
from .manifest import Manifest
|
||||
from .task import Task
|
||||
from .tools import Tool, builtin_tools, function_tools, tool_instruction_fragments
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class Agent:
|
||||
manifest: Manifest
|
||||
model: str = "gpt-5.2-codex"
|
||||
# Replaces Codex's composed base instructions for the thread. When set, this
|
||||
# bypasses the Rust-side built-in capability prompt composition.
|
||||
base_instructions: str | None = None
|
||||
# Additive developer-role instructions for the thread. These are composed
|
||||
# together with capability- and FunctionTool-contributed instruction
|
||||
# fragments and sent via `thread/start.developerInstructions`.
|
||||
developer_instructions: str | None = None
|
||||
tools: tuple[Tool | type[Tool], ...] = field(default_factory=tuple)
|
||||
capabilities: tuple[Capability, ...] = field(default_factory=lambda: DEFAULT_CAPABILITIES)
|
||||
backend: LocalBackend = field(default_factory=LocalBackend)
|
||||
approval_policy: str | None = None
|
||||
|
||||
async def start(
|
||||
self,
|
||||
*,
|
||||
backend_options: LocalBackendOptions | None = None,
|
||||
session: LocalSession | None = None,
|
||||
) -> Task:
|
||||
api_key = os.environ.get("OPENAI_API_KEY")
|
||||
if not api_key:
|
||||
raise RuntimeError("OPENAI_API_KEY must be set for the prototype bridge")
|
||||
bridge = OpenAIResponsesBridge(api_key=api_key)
|
||||
bridge.start()
|
||||
|
||||
manifest = self.manifest
|
||||
for capability in self.capabilities:
|
||||
manifest = capability.process_manifest(manifest)
|
||||
|
||||
resolved_tools: tuple[Tool | type[Tool], ...] = (
|
||||
*(tool for capability in self.capabilities for tool in capability.tools()),
|
||||
*self.tools,
|
||||
)
|
||||
builtin_tool_names, builtin_tool_policies = builtin_tools(resolved_tools)
|
||||
resolved_function_tools = function_tools(resolved_tools)
|
||||
tool_fragments = tool_instruction_fragments(resolved_tools)
|
||||
capability_fragments = [
|
||||
fragment
|
||||
for capability in self.capabilities
|
||||
if (fragment := capability.instructions()) is not None
|
||||
]
|
||||
dynamic_tools = [type(tool).dynamic_tool_spec() for tool in resolved_function_tools]
|
||||
function_tool_map = {
|
||||
type(tool).dynamic_tool_spec()["name"]: tool for tool in resolved_function_tools
|
||||
}
|
||||
developer_instructions = self.developer_instructions
|
||||
if capability_fragments or tool_fragments:
|
||||
sections = [
|
||||
fragment
|
||||
for fragment in [
|
||||
developer_instructions,
|
||||
*capability_fragments,
|
||||
*tool_fragments,
|
||||
]
|
||||
if fragment
|
||||
]
|
||||
developer_instructions = "\n\n".join(sections) if sections else None
|
||||
|
||||
if session is None:
|
||||
session = await self.backend.create_session(
|
||||
manifest=manifest,
|
||||
options=backend_options,
|
||||
)
|
||||
client = await session.start_app_server()
|
||||
await client.initialize(
|
||||
client_name="codex_sdk_v2",
|
||||
client_title="Codex SDK v2 Prototype",
|
||||
client_version="0.1.0",
|
||||
)
|
||||
approval_policy = self.approval_policy
|
||||
if approval_policy is None:
|
||||
approval_policy = "on-request" if builtin_tool_names else "never"
|
||||
thread_start_params: dict[str, Any] = {
|
||||
"model": self.model,
|
||||
"cwd": str(session.workspace_root),
|
||||
"sandbox": "danger-full-access",
|
||||
"approvalPolicy": approval_policy,
|
||||
"config": {
|
||||
"experimental_use_unified_exec_tool": True,
|
||||
},
|
||||
"baseInstructions": self.base_instructions,
|
||||
"developerInstructions": developer_instructions,
|
||||
"sdkDelegation": {
|
||||
"bridgeUrl": bridge.bridge_url,
|
||||
},
|
||||
"builtinTools": builtin_tool_names,
|
||||
"manualToolExecution": bool(builtin_tool_names),
|
||||
}
|
||||
if dynamic_tools:
|
||||
thread_start_params["dynamicTools"] = dynamic_tools
|
||||
result = await client.request("thread/start", thread_start_params)
|
||||
thread_started_notification: JsonRpcNotification | None = None
|
||||
deferred_messages: list[JsonRpcNotification | JsonRpcServerRequest] = []
|
||||
seen_message_methods: list[str] = []
|
||||
while thread_started_notification is None:
|
||||
message = await asyncio.wait_for(client.next_message(), timeout=5)
|
||||
seen_message_methods.append(message.method)
|
||||
if isinstance(message, JsonRpcNotification) and message.method == "thread/started":
|
||||
thread_started_notification = message
|
||||
else:
|
||||
deferred_messages.append(message)
|
||||
|
||||
delegation_notification: JsonRpcNotification | None = None
|
||||
while delegation_notification is None:
|
||||
try:
|
||||
message = await asyncio.wait_for(client.next_message(), timeout=5)
|
||||
except TimeoutError as exc:
|
||||
raise RuntimeError(
|
||||
"did not receive codexSdk/delegationConfigured after thread/started; "
|
||||
"if you intended to use the repo changes, make sure the example is launching "
|
||||
"the locally built app-server binary instead of the installed Codex binary; "
|
||||
f"seen={seen_message_methods}"
|
||||
) from exc
|
||||
seen_message_methods.append(message.method)
|
||||
if isinstance(message, JsonRpcNotification) and message.method == "codexSdk/delegationConfigured":
|
||||
delegation_notification = message
|
||||
else:
|
||||
deferred_messages.append(message)
|
||||
client.prepend_messages(deferred_messages)
|
||||
return Task(
|
||||
session=session,
|
||||
thread_id=result["thread"]["id"],
|
||||
initial_thread_started=thread_started_notification.params,
|
||||
function_tools=function_tool_map,
|
||||
builtin_tool_policies=builtin_tool_policies,
|
||||
_owned_bridge=bridge,
|
||||
)
|
||||
136
codex-sdk-v2/src/codex_sdk_v2/app_server_client.py
Normal file
136
codex-sdk-v2/src/codex_sdk_v2/app_server_client.py
Normal file
@@ -0,0 +1,136 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
import os
|
||||
import sys
|
||||
from typing import Any, TypeAlias
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class JsonRpcNotification:
|
||||
method: str
|
||||
params: dict[str, Any]
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class JsonRpcServerRequest:
|
||||
request_id: int | str
|
||||
method: str
|
||||
params: dict[str, Any]
|
||||
|
||||
|
||||
IncomingMessage: TypeAlias = JsonRpcNotification | JsonRpcServerRequest
|
||||
|
||||
|
||||
class AppServerClient:
|
||||
def __init__(self, process: asyncio.subprocess.Process) -> None:
|
||||
if process.stdout is None or process.stdin is None:
|
||||
raise RuntimeError("app-server process must be started with stdin/stdout pipes")
|
||||
self._process = process
|
||||
self._stdout = process.stdout
|
||||
self._stdin = process.stdin
|
||||
self._request_id = 0
|
||||
self._pending_messages: list[IncomingMessage] = []
|
||||
self._debug_enabled = os.environ.get("CODEX_SDK_V2_DEBUG") == "1"
|
||||
|
||||
async def initialize(self, *, client_name: str, client_title: str, client_version: str) -> None:
|
||||
await self.request(
|
||||
"initialize",
|
||||
{
|
||||
"clientInfo": {
|
||||
"name": client_name,
|
||||
"title": client_title,
|
||||
"version": client_version,
|
||||
},
|
||||
"capabilities": {"experimentalApi": True},
|
||||
},
|
||||
)
|
||||
await self.notify("initialized", {})
|
||||
|
||||
async def notify(self, method: str, params: dict[str, Any]) -> None:
|
||||
await self._write({"method": method, "params": params})
|
||||
|
||||
async def request(self, method: str, params: dict[str, Any]) -> dict[str, Any]:
|
||||
request_id = self._request_id
|
||||
self._request_id += 1
|
||||
await self._write({"id": request_id, "method": method, "params": params})
|
||||
while True:
|
||||
message = await self._read_message()
|
||||
if message.get("id") == request_id and "method" not in message:
|
||||
if "error" in message:
|
||||
raise RuntimeError(f"app-server {method} failed: {message['error']}")
|
||||
return message["result"]
|
||||
queued = self._decode_incoming(message)
|
||||
if queued is not None:
|
||||
self._pending_messages.append(queued)
|
||||
|
||||
async def send_result(self, request_id: int | str, result: dict[str, Any]) -> None:
|
||||
await self._write({"id": request_id, "result": result})
|
||||
|
||||
async def send_error(self, request_id: int | str, code: int, message: str) -> None:
|
||||
await self._write(
|
||||
{
|
||||
"id": request_id,
|
||||
"error": {
|
||||
"code": code,
|
||||
"message": message,
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
async def next_message(self) -> IncomingMessage:
|
||||
if self._pending_messages:
|
||||
return self._pending_messages.pop(0)
|
||||
while True:
|
||||
message = await self._read_message()
|
||||
incoming = self._decode_incoming(message)
|
||||
if incoming is not None:
|
||||
return incoming
|
||||
|
||||
async def next_notification(self) -> JsonRpcNotification:
|
||||
for index, pending in enumerate(self._pending_messages):
|
||||
if isinstance(pending, JsonRpcNotification):
|
||||
return self._pending_messages.pop(index)
|
||||
while True:
|
||||
message = await self._read_message()
|
||||
incoming = self._decode_incoming(message)
|
||||
if incoming is None:
|
||||
continue
|
||||
if isinstance(incoming, JsonRpcNotification):
|
||||
return incoming
|
||||
self._pending_messages.append(incoming)
|
||||
|
||||
def prepend_messages(self, messages: list[IncomingMessage]) -> None:
|
||||
if messages:
|
||||
self._pending_messages = messages + self._pending_messages
|
||||
|
||||
async def _write(self, payload: dict[str, Any]) -> None:
|
||||
if self._debug_enabled:
|
||||
print(f"[codex-sdk-v2] -> {payload}", file=sys.stderr)
|
||||
data = json.dumps(payload, separators=(",", ":")).encode("utf-8") + b"\n"
|
||||
self._stdin.write(data)
|
||||
await self._stdin.drain()
|
||||
|
||||
async def _read_message(self) -> dict[str, Any]:
|
||||
line = await self._stdout.readline()
|
||||
if not line:
|
||||
raise RuntimeError("app-server closed the transport")
|
||||
message = json.loads(line.decode("utf-8"))
|
||||
if self._debug_enabled:
|
||||
print(f"[codex-sdk-v2] <- {message}", file=sys.stderr)
|
||||
return message
|
||||
|
||||
def _decode_incoming(self, message: dict[str, Any]) -> IncomingMessage | None:
|
||||
method = message.get("method")
|
||||
if method is None:
|
||||
return None
|
||||
params = message.get("params", {})
|
||||
if "id" in message:
|
||||
return JsonRpcServerRequest(
|
||||
request_id=message["id"],
|
||||
method=method,
|
||||
params=params,
|
||||
)
|
||||
return JsonRpcNotification(method=method, params=params)
|
||||
121
codex-sdk-v2/src/codex_sdk_v2/bridge.py
Normal file
121
codex-sdk-v2/src/codex_sdk_v2/bridge.py
Normal file
@@ -0,0 +1,121 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from http import HTTPStatus
|
||||
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
||||
import threading
|
||||
from typing import Protocol
|
||||
|
||||
import httpx
|
||||
|
||||
|
||||
class ResponsesBridge(Protocol):
|
||||
def serve_forever(self) -> None: ...
|
||||
def shutdown(self) -> None: ...
|
||||
@property
|
||||
def bridge_url(self) -> str: ...
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class _BridgeConfig:
|
||||
bind_host: str
|
||||
port: int
|
||||
upstream_url: str
|
||||
auth_header: str
|
||||
|
||||
|
||||
class _ResponsesHandler(BaseHTTPRequestHandler):
|
||||
server: "_BridgeServer"
|
||||
|
||||
def do_POST(self) -> None: # noqa: N802
|
||||
config = self.server.config
|
||||
if self.path != "/v1/responses":
|
||||
self.send_error(HTTPStatus.FORBIDDEN)
|
||||
return
|
||||
|
||||
content_length = int(self.headers.get("Content-Length", "0"))
|
||||
body = self.rfile.read(content_length)
|
||||
|
||||
upstream_headers = {
|
||||
key: value
|
||||
for key, value in self.headers.items()
|
||||
if key.lower() not in {"authorization", "host", "content-length"}
|
||||
}
|
||||
upstream_headers["Authorization"] = config.auth_header
|
||||
|
||||
with httpx.stream(
|
||||
"POST",
|
||||
config.upstream_url,
|
||||
headers=upstream_headers,
|
||||
content=body,
|
||||
timeout=None,
|
||||
) as response:
|
||||
self.send_response(response.status_code)
|
||||
for key, value in response.headers.items():
|
||||
if key.lower() in {"content-length", "transfer-encoding", "connection"}:
|
||||
continue
|
||||
self.send_header(key, value)
|
||||
self.end_headers()
|
||||
for chunk in response.iter_raw():
|
||||
self.wfile.write(chunk)
|
||||
self.wfile.flush()
|
||||
|
||||
def log_message(self, _format: str, *args: object) -> None:
|
||||
_ = args
|
||||
return
|
||||
|
||||
|
||||
class _BridgeServer(ThreadingHTTPServer):
|
||||
def __init__(self, config: _BridgeConfig) -> None:
|
||||
super().__init__((config.bind_host, config.port), _ResponsesHandler)
|
||||
self.config = config
|
||||
|
||||
|
||||
class OpenAIResponsesBridge:
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
api_key: str,
|
||||
bind_host: str = "127.0.0.1",
|
||||
port: int = 0,
|
||||
upstream_url: str = "https://api.openai.com/v1/responses",
|
||||
) -> None:
|
||||
self._config = _BridgeConfig(
|
||||
bind_host=bind_host,
|
||||
port=port,
|
||||
upstream_url=upstream_url,
|
||||
auth_header=f"Bearer {api_key}",
|
||||
)
|
||||
self._server = _BridgeServer(self._config)
|
||||
self._thread: threading.Thread | None = None
|
||||
|
||||
@property
|
||||
def bridge_url(self) -> str:
|
||||
host, port = self._server.server_address
|
||||
return f"http://{host}:{port}/v1"
|
||||
|
||||
def serve_forever(self) -> None:
|
||||
self._server.serve_forever(poll_interval=0.1)
|
||||
|
||||
def start(self) -> None:
|
||||
if self._thread is not None:
|
||||
return
|
||||
# `ThreadingHTTPServer` is synchronous. Running it on a daemon thread keeps
|
||||
# the bridge loop independent from the caller's asyncio event loop.
|
||||
self._thread = threading.Thread(target=self.serve_forever, daemon=True)
|
||||
self._thread.start()
|
||||
|
||||
def shutdown(self) -> None:
|
||||
self._server.shutdown()
|
||||
self._server.server_close()
|
||||
if self._thread is not None:
|
||||
self._thread.join(timeout=1)
|
||||
self._thread = None
|
||||
|
||||
def __enter__(self) -> "OpenAIResponsesBridge":
|
||||
self.start()
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb) -> None:
|
||||
_ = (exc_type, exc, tb)
|
||||
self.shutdown()
|
||||
28
codex-sdk-v2/src/codex_sdk_v2/capabilities.py
Normal file
28
codex-sdk-v2/src/codex_sdk_v2/capabilities.py
Normal file
@@ -0,0 +1,28 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
from .manifest import Manifest
|
||||
from .tools import ExecCommand
|
||||
from .tools import Tool
|
||||
from .tools import WriteStdin
|
||||
|
||||
|
||||
class Capability:
|
||||
def tools(self) -> tuple[Tool | type[Tool], ...]:
|
||||
return ()
|
||||
|
||||
def instructions(self) -> str | None:
|
||||
return None
|
||||
|
||||
def process_manifest(self, manifest: Manifest) -> Manifest:
|
||||
return manifest
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class UnifiedExecCapability(Capability):
|
||||
def tools(self) -> tuple[Tool | type[Tool], ...]:
|
||||
return (ExecCommand, WriteStdin)
|
||||
|
||||
|
||||
DEFAULT_CAPABILITIES: tuple[Capability, ...] = (UnifiedExecCapability(),)
|
||||
43
codex-sdk-v2/src/codex_sdk_v2/entries.py
Normal file
43
codex-sdk-v2/src/codex_sdk_v2/entries.py
Normal file
@@ -0,0 +1,43 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
import shutil
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class Entry:
|
||||
def materialize(self, destination: Path) -> None:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class Dir(Entry):
|
||||
children: dict[str | Path, Entry] = field(default_factory=dict)
|
||||
description: str | None = None
|
||||
|
||||
def materialize(self, destination: Path) -> None:
|
||||
destination.mkdir(parents=True, exist_ok=True)
|
||||
for name, entry in self.children.items():
|
||||
entry.materialize(destination / Path(name))
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class LocalFile(Entry):
|
||||
src: Path
|
||||
mode: int = 0o644
|
||||
|
||||
def materialize(self, destination: Path) -> None:
|
||||
destination.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy2(self.src, destination)
|
||||
destination.chmod(self.mode)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class LocalDir(Entry):
|
||||
src: Path
|
||||
|
||||
def materialize(self, destination: Path) -> None:
|
||||
if destination.exists():
|
||||
shutil.rmtree(destination)
|
||||
shutil.copytree(self.src, destination)
|
||||
53
codex-sdk-v2/src/codex_sdk_v2/events.py
Normal file
53
codex-sdk-v2/src/codex_sdk_v2/events.py
Normal file
@@ -0,0 +1,53 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Literal, TypeAlias
|
||||
|
||||
from .app_server_client import JsonRpcNotification, JsonRpcServerRequest
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class ApprovalDecision:
|
||||
decision: Literal["approve", "approve_for_session", "reject", "cancel"]
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class CommandApprovalRequestEvent:
|
||||
request_id: int | str
|
||||
thread_id: str
|
||||
turn_id: str
|
||||
item_id: str
|
||||
approval_id: str | None
|
||||
reason: str | None
|
||||
command: str | None
|
||||
cwd: str | None
|
||||
command_actions: list[dict[str, Any]] | None
|
||||
raw_request: JsonRpcServerRequest
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class FileChangeApprovalRequestEvent:
|
||||
request_id: int | str
|
||||
thread_id: str
|
||||
turn_id: str
|
||||
item_id: str
|
||||
reason: str | None
|
||||
grant_root: str | None
|
||||
raw_request: JsonRpcServerRequest
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class FunctionToolCallEvent:
|
||||
request_id: int | str
|
||||
thread_id: str
|
||||
turn_id: str
|
||||
call_id: str
|
||||
tool_name: str
|
||||
arguments: dict[str, Any]
|
||||
raw_request: JsonRpcServerRequest
|
||||
|
||||
|
||||
ApprovalRequestEvent: TypeAlias = (
|
||||
CommandApprovalRequestEvent | FileChangeApprovalRequestEvent | FunctionToolCallEvent
|
||||
)
|
||||
TaskEvent: TypeAlias = JsonRpcNotification | ApprovalRequestEvent
|
||||
136
codex-sdk-v2/src/codex_sdk_v2/local_backend.py
Normal file
136
codex-sdk-v2/src/codex_sdk_v2/local_backend.py
Normal file
@@ -0,0 +1,136 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from dataclasses import dataclass
|
||||
import os
|
||||
from pathlib import Path
|
||||
import shutil
|
||||
import sys
|
||||
|
||||
from .app_server_client import AppServerClient
|
||||
from .manifest import Manifest
|
||||
|
||||
APP_SERVER_STREAM_LIMIT = 16 * 1024 * 1024
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class LocalBackendOptions:
|
||||
workspace_root: Path | None = None
|
||||
codex_binary: Path | None = None
|
||||
|
||||
|
||||
class LocalSession:
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
workspace_root: Path,
|
||||
app_server_binary: Path,
|
||||
app_server_args: tuple[str, ...],
|
||||
owned_workspace: bool,
|
||||
) -> None:
|
||||
self.workspace_root = workspace_root
|
||||
self.app_server_binary = app_server_binary
|
||||
self.app_server_args = app_server_args
|
||||
self.owned_workspace = owned_workspace
|
||||
self.app_server_process: asyncio.subprocess.Process | None = None
|
||||
self.app_server_client: AppServerClient | None = None
|
||||
self._stderr_task: asyncio.Task[None] | None = None
|
||||
|
||||
async def start_app_server(self) -> AppServerClient:
|
||||
if self.app_server_client is not None:
|
||||
return self.app_server_client
|
||||
env = os.environ.copy()
|
||||
debug_enabled = env.get("CODEX_SDK_V2_DEBUG") == "1"
|
||||
if debug_enabled and "RUST_LOG" not in env:
|
||||
env["RUST_LOG"] = "codex_app_server=info"
|
||||
process = await asyncio.create_subprocess_exec(
|
||||
str(self.app_server_binary),
|
||||
*self.app_server_args,
|
||||
cwd=str(self.workspace_root),
|
||||
stdin=asyncio.subprocess.PIPE,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
env=env,
|
||||
limit=APP_SERVER_STREAM_LIMIT,
|
||||
)
|
||||
self.app_server_process = process
|
||||
if debug_enabled and process.stderr is not None:
|
||||
self._stderr_task = asyncio.create_task(self._pump_stderr(process.stderr))
|
||||
self.app_server_client = AppServerClient(process)
|
||||
return self.app_server_client
|
||||
|
||||
async def stop(self) -> None:
|
||||
if self.app_server_process is not None:
|
||||
self.app_server_process.terminate()
|
||||
await self.app_server_process.wait()
|
||||
self.app_server_process = None
|
||||
self.app_server_client = None
|
||||
if self._stderr_task is not None:
|
||||
await self._stderr_task
|
||||
self._stderr_task = None
|
||||
if self.owned_workspace:
|
||||
shutil.rmtree(self.workspace_root, ignore_errors=True)
|
||||
|
||||
async def _pump_stderr(self, stream: asyncio.StreamReader) -> None:
|
||||
while True:
|
||||
line = await stream.readline()
|
||||
if not line:
|
||||
return
|
||||
print(f"[codex-app-server] {line.decode('utf-8', errors='replace').rstrip()}", file=sys.stderr)
|
||||
|
||||
|
||||
class LocalBackend:
|
||||
def __init__(self, *, codex_binary: Path | None = None) -> None:
|
||||
self.codex_binary = codex_binary or self._default_app_server_binary()
|
||||
|
||||
async def create_session(
|
||||
self,
|
||||
*,
|
||||
manifest: Manifest,
|
||||
options: LocalBackendOptions | None = None,
|
||||
) -> LocalSession:
|
||||
options = options or LocalBackendOptions()
|
||||
codex_binary = options.codex_binary or self.codex_binary
|
||||
if not codex_binary.exists():
|
||||
raise RuntimeError(f"codex binary not found at {codex_binary}")
|
||||
app_server_args = self._app_server_args_for_binary(codex_binary)
|
||||
|
||||
if options.workspace_root is None:
|
||||
workspace_root = manifest.materialize()
|
||||
owned_workspace = True
|
||||
else:
|
||||
workspace_root = options.workspace_root
|
||||
workspace_root.mkdir(parents=True, exist_ok=True)
|
||||
materialized = manifest.materialize()
|
||||
try:
|
||||
for child in materialized.iterdir():
|
||||
destination = workspace_root / child.name
|
||||
if destination.exists():
|
||||
if destination.is_dir():
|
||||
shutil.rmtree(destination)
|
||||
else:
|
||||
destination.unlink()
|
||||
shutil.move(str(child), str(destination))
|
||||
finally:
|
||||
shutil.rmtree(materialized, ignore_errors=True)
|
||||
owned_workspace = False
|
||||
|
||||
return LocalSession(
|
||||
workspace_root=workspace_root,
|
||||
app_server_binary=codex_binary,
|
||||
app_server_args=app_server_args,
|
||||
owned_workspace=owned_workspace,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _default_app_server_binary() -> Path:
|
||||
repo_app_server = Path(__file__).resolve().parents[3] / "codex-rs" / "target" / "debug" / "codex-app-server"
|
||||
if repo_app_server.exists():
|
||||
return repo_app_server
|
||||
return Path(shutil.which("codex") or "/opt/homebrew/bin/codex")
|
||||
|
||||
@staticmethod
|
||||
def _app_server_args_for_binary(binary: Path) -> tuple[str, ...]:
|
||||
if binary.name == "codex-app-server":
|
||||
return ("--listen", "stdio://")
|
||||
return ("app-server", "--listen", "stdio://")
|
||||
19
codex-sdk-v2/src/codex_sdk_v2/manifest.py
Normal file
19
codex-sdk-v2/src/codex_sdk_v2/manifest.py
Normal file
@@ -0,0 +1,19 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
import tempfile
|
||||
|
||||
from .entries import Entry
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class Manifest:
|
||||
root: str = "/workspace"
|
||||
entries: dict[str | Path, Entry] = field(default_factory=dict)
|
||||
|
||||
def materialize(self) -> Path:
|
||||
tempdir = Path(tempfile.mkdtemp(prefix="codex-sdk-v2-manifest-"))
|
||||
for name, entry in self.entries.items():
|
||||
entry.materialize(tempdir / Path(name))
|
||||
return tempdir
|
||||
103
codex-sdk-v2/src/codex_sdk_v2/pending_tool_calls.py
Normal file
103
codex-sdk-v2/src/codex_sdk_v2/pending_tool_calls.py
Normal file
@@ -0,0 +1,103 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, ClassVar, Literal, Mapping
|
||||
|
||||
|
||||
class ToolDecision:
|
||||
pass
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class ApproveDecision(ToolDecision):
|
||||
for_session: bool = False
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class RejectDecision(ToolDecision):
|
||||
cancel: bool = False
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class DeferDecision(ToolDecision):
|
||||
pass
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class RunDecision(ToolDecision):
|
||||
arguments: Mapping[str, Any] | None = None
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class ReplaceCommandDecision(ToolDecision):
|
||||
command: list[str]
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class RespondDecision(ToolDecision):
|
||||
result: Any
|
||||
success: bool = True
|
||||
|
||||
|
||||
class PendingToolCall:
|
||||
kind: ClassVar[str]
|
||||
|
||||
async def __call__(self, task: Any) -> None:
|
||||
await task.resolve_tool_call(self)
|
||||
|
||||
def describe(self) -> str:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class PendingCommandExecution(PendingToolCall):
|
||||
kind: ClassVar[str] = "command_execution"
|
||||
request_id: int | str
|
||||
thread_id: str
|
||||
turn_id: str
|
||||
item_id: str
|
||||
approval_id: str | None
|
||||
reason: str | None
|
||||
command: str | None
|
||||
cwd: str | None
|
||||
command_actions: list[dict[str, Any]] | None
|
||||
resolved: bool = False
|
||||
|
||||
def describe(self) -> str:
|
||||
if self.command:
|
||||
return f"Approve command: {self.command}"
|
||||
if self.reason:
|
||||
return f"Approve command execution: {self.reason}"
|
||||
return "Approve command execution"
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class PendingFileChange(PendingToolCall):
|
||||
kind: ClassVar[str] = "file_change"
|
||||
request_id: int | str
|
||||
thread_id: str
|
||||
turn_id: str
|
||||
item_id: str
|
||||
reason: str | None
|
||||
grant_root: str | None
|
||||
resolved: bool = False
|
||||
|
||||
def describe(self) -> str:
|
||||
if self.reason:
|
||||
return f"Approve file changes: {self.reason}"
|
||||
return "Approve file changes"
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class PendingFunctionToolCall(PendingToolCall):
|
||||
kind: ClassVar[str] = "function_tool"
|
||||
request_id: int | str
|
||||
thread_id: str
|
||||
turn_id: str
|
||||
call_id: str
|
||||
tool_name: str
|
||||
arguments: dict[str, Any]
|
||||
resolved: bool = False
|
||||
|
||||
def describe(self) -> str:
|
||||
return f"Run function tool {self.tool_name}({self.arguments})"
|
||||
385
codex-sdk-v2/src/codex_sdk_v2/task.py
Normal file
385
codex-sdk-v2/src/codex_sdk_v2/task.py
Normal file
@@ -0,0 +1,385 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import inspect
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, AsyncIterator, Mapping
|
||||
|
||||
from .app_server_client import JsonRpcNotification, JsonRpcServerRequest
|
||||
from .pending_tool_calls import ApproveDecision
|
||||
from .pending_tool_calls import DeferDecision
|
||||
from .pending_tool_calls import PendingCommandExecution
|
||||
from .pending_tool_calls import PendingFileChange
|
||||
from .pending_tool_calls import PendingFunctionToolCall
|
||||
from .pending_tool_calls import PendingToolCall
|
||||
from .pending_tool_calls import RejectDecision
|
||||
from .pending_tool_calls import ReplaceCommandDecision
|
||||
from .pending_tool_calls import RespondDecision
|
||||
from .pending_tool_calls import RunDecision
|
||||
from .pending_tool_calls import ToolDecision
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class Task:
|
||||
session: Any
|
||||
thread_id: str
|
||||
initial_thread_started: dict[str, Any]
|
||||
function_tools: Mapping[str, Any] = field(default_factory=dict)
|
||||
builtin_tool_policies: Mapping[str, Any] = field(default_factory=dict)
|
||||
_owned_bridge: Any | None = None
|
||||
_pending_tool_calls: list[PendingToolCall] = field(default_factory=list)
|
||||
_active_turn_id: str | None = None
|
||||
_turn_complete: bool = True
|
||||
|
||||
async def close(self) -> None:
|
||||
try:
|
||||
await self.session.stop()
|
||||
finally:
|
||||
if self._owned_bridge is not None:
|
||||
self._owned_bridge.shutdown()
|
||||
self._owned_bridge = None
|
||||
|
||||
async def run(self, user_text: str) -> AsyncIterator[JsonRpcNotification]:
|
||||
if self.session.app_server_client is None:
|
||||
raise RuntimeError("app-server client is not attached")
|
||||
if self.pending_tool_calls():
|
||||
raise RuntimeError("cannot start a new turn while tool calls are pending")
|
||||
response = await self.session.app_server_client.request(
|
||||
"turn/start",
|
||||
{
|
||||
"threadId": self.thread_id,
|
||||
"input": [{"type": "text", "text": user_text}],
|
||||
},
|
||||
)
|
||||
self._active_turn_id = response["turn"]["id"]
|
||||
self._turn_complete = False
|
||||
while True:
|
||||
message = await self.session.app_server_client.next_message()
|
||||
if isinstance(message, JsonRpcServerRequest):
|
||||
should_pause = await self._handle_server_request(message)
|
||||
if should_pause:
|
||||
return
|
||||
continue
|
||||
yield message
|
||||
params = message.params
|
||||
matches_turn = params.get("turnId") == self._active_turn_id or params.get("turn", {}).get("id") == self._active_turn_id
|
||||
if message.method == "turn/completed" and matches_turn:
|
||||
self._turn_complete = True
|
||||
self._active_turn_id = None
|
||||
return
|
||||
|
||||
async def resume(self) -> AsyncIterator[JsonRpcNotification]:
|
||||
if self.session.app_server_client is None:
|
||||
raise RuntimeError("app-server client is not attached")
|
||||
if self.pending_tool_calls():
|
||||
raise RuntimeError("cannot resume while tool calls are pending")
|
||||
if self._turn_complete or self._active_turn_id is None:
|
||||
return
|
||||
while True:
|
||||
message = await self.session.app_server_client.next_message()
|
||||
if isinstance(message, JsonRpcServerRequest):
|
||||
should_pause = await self._handle_server_request(message)
|
||||
if should_pause:
|
||||
return
|
||||
continue
|
||||
yield message
|
||||
params = message.params
|
||||
matches_turn = params.get("turnId") == self._active_turn_id or params.get("turn", {}).get("id") == self._active_turn_id
|
||||
if message.method == "turn/completed" and matches_turn:
|
||||
self._turn_complete = True
|
||||
self._active_turn_id = None
|
||||
return
|
||||
|
||||
async def collect_text(self, user_text: str) -> str:
|
||||
text_chunks: list[str] = []
|
||||
async for event in self.run(user_text):
|
||||
if event.method == "item/agentMessage/delta":
|
||||
delta = event.params.get("delta")
|
||||
if isinstance(delta, str):
|
||||
text_chunks.append(delta)
|
||||
while not self._turn_complete and not self.pending_tool_calls():
|
||||
async for event in self.resume():
|
||||
if event.method == "item/agentMessage/delta":
|
||||
delta = event.params.get("delta")
|
||||
if isinstance(delta, str):
|
||||
text_chunks.append(delta)
|
||||
return "".join(text_chunks)
|
||||
|
||||
def pending_tool_calls(self) -> list[PendingToolCall]:
|
||||
return [tool_call for tool_call in self._pending_tool_calls if not tool_call.resolved]
|
||||
|
||||
async def resolve_tool_call(self, tool_call: PendingToolCall) -> None:
|
||||
if isinstance(tool_call, PendingCommandExecution):
|
||||
await self.approve(tool_call)
|
||||
return
|
||||
if isinstance(tool_call, PendingFileChange):
|
||||
await self.approve(tool_call)
|
||||
return
|
||||
await self.run_function_tool(tool_call)
|
||||
|
||||
async def apply_tool_decision(self, tool_call: PendingToolCall) -> bool:
|
||||
decision = await self._call_tool_approval(tool_call)
|
||||
if decision is None or isinstance(decision, DeferDecision):
|
||||
return False
|
||||
if isinstance(decision, ApproveDecision):
|
||||
await self.approve(tool_call, for_session=decision.for_session)
|
||||
return True
|
||||
if isinstance(decision, RejectDecision):
|
||||
await self.reject(tool_call, cancel=decision.cancel)
|
||||
return True
|
||||
if isinstance(decision, ReplaceCommandDecision):
|
||||
if not isinstance(tool_call, PendingCommandExecution):
|
||||
raise TypeError(
|
||||
"ReplaceCommandDecision can only be used with built-in command approvals"
|
||||
)
|
||||
await self.replace_command(tool_call, decision.command)
|
||||
return True
|
||||
if isinstance(decision, RunDecision):
|
||||
if not isinstance(tool_call, PendingFunctionToolCall):
|
||||
raise TypeError("RunDecision can only be used with function tools")
|
||||
await self.run_function_tool(tool_call, arguments=decision.arguments)
|
||||
return True
|
||||
if isinstance(decision, RespondDecision):
|
||||
if not isinstance(tool_call, PendingFunctionToolCall):
|
||||
raise TypeError("RespondDecision is only valid for function tool calls")
|
||||
await self.submit_tool_result(tool_call, decision.result, success=decision.success)
|
||||
return True
|
||||
raise TypeError(f"unsupported tool decision: {type(decision)!r}")
|
||||
|
||||
async def approve(self, tool_call: PendingToolCall, *, for_session: bool = False) -> None:
|
||||
if isinstance(tool_call, PendingCommandExecution):
|
||||
await self._respond_to_command_approval(tool_call, for_session=for_session)
|
||||
tool_call.resolved = True
|
||||
return
|
||||
if isinstance(tool_call, PendingFileChange):
|
||||
await self._respond_to_file_change_approval(tool_call, for_session=for_session)
|
||||
tool_call.resolved = True
|
||||
return
|
||||
await self.run_function_tool(tool_call)
|
||||
|
||||
async def replace_command(
|
||||
self,
|
||||
tool_call: PendingCommandExecution,
|
||||
command: list[str],
|
||||
) -> None:
|
||||
await self._respond_to_command_override(tool_call, command=command)
|
||||
tool_call.command = " ".join(command)
|
||||
tool_call.resolved = True
|
||||
|
||||
async def reject(self, tool_call: PendingToolCall, *, cancel: bool = False) -> None:
|
||||
if isinstance(tool_call, PendingCommandExecution):
|
||||
await self._respond_to_command_rejection(tool_call, cancel=cancel)
|
||||
tool_call.resolved = True
|
||||
return
|
||||
if isinstance(tool_call, PendingFileChange):
|
||||
await self._respond_to_file_change_rejection(tool_call, cancel=cancel)
|
||||
tool_call.resolved = True
|
||||
return
|
||||
await self._reject_function_tool(tool_call, cancel=cancel)
|
||||
|
||||
async def run_function_tool(
|
||||
self,
|
||||
tool_call: PendingFunctionToolCall,
|
||||
*,
|
||||
arguments: Mapping[str, Any] | None = None,
|
||||
) -> None:
|
||||
tool = self.function_tools.get(tool_call.tool_name)
|
||||
if tool is None:
|
||||
await self.submit_tool_result(
|
||||
tool_call,
|
||||
f"unknown function tool: {tool_call.tool_name}",
|
||||
success=False,
|
||||
)
|
||||
return
|
||||
call_arguments = dict(arguments) if arguments is not None else tool_call.arguments
|
||||
try:
|
||||
result = await tool.run(call_arguments)
|
||||
except Exception as exc:
|
||||
await self.submit_tool_result(
|
||||
tool_call,
|
||||
f"function tool {tool_call.tool_name} failed: {exc}",
|
||||
success=False,
|
||||
)
|
||||
return
|
||||
await self.submit_tool_result(tool_call, result, success=True)
|
||||
|
||||
async def submit_tool_result(
|
||||
self,
|
||||
tool_call: PendingFunctionToolCall,
|
||||
result: Any,
|
||||
*,
|
||||
success: bool = True,
|
||||
) -> None:
|
||||
try:
|
||||
text = result if isinstance(result, str) else json.dumps(result, indent=2, sort_keys=True)
|
||||
except TypeError as exc:
|
||||
text = f"function tool {tool_call.tool_name} returned a non-serializable result: {exc}"
|
||||
success = False
|
||||
await self.session.app_server_client.send_result(
|
||||
tool_call.request_id,
|
||||
{
|
||||
"contentItems": [{"type": "inputText", "text": text}],
|
||||
"success": success,
|
||||
},
|
||||
)
|
||||
tool_call.resolved = True
|
||||
|
||||
async def _handle_server_request(self, request: JsonRpcServerRequest) -> bool:
|
||||
tool_call = self._decode_tool_call(request)
|
||||
if tool_call is None:
|
||||
await self.session.app_server_client.send_error(
|
||||
request.request_id,
|
||||
-32601,
|
||||
f"unsupported server request method: {request.method}",
|
||||
)
|
||||
return False
|
||||
self._pending_tool_calls.append(tool_call)
|
||||
handled = await self.apply_tool_decision(tool_call)
|
||||
if handled:
|
||||
return False
|
||||
return not tool_call.resolved
|
||||
|
||||
def _decode_tool_call(self, request: JsonRpcServerRequest) -> PendingToolCall | None:
|
||||
if request.method == "item/commandExecution/requestApproval":
|
||||
return PendingCommandExecution(
|
||||
request_id=request.request_id,
|
||||
thread_id=str(request.params.get("threadId", self.thread_id)),
|
||||
turn_id=str(request.params.get("turnId", "")),
|
||||
item_id=str(request.params.get("itemId", "")),
|
||||
approval_id=self._optional_str(request.params.get("approvalId")),
|
||||
reason=self._optional_str(request.params.get("reason")),
|
||||
command=self._optional_str(request.params.get("command")),
|
||||
cwd=self._optional_str(request.params.get("cwd")),
|
||||
command_actions=self._command_actions(request.params.get("commandActions")),
|
||||
)
|
||||
if request.method == "item/fileChange/requestApproval":
|
||||
return PendingFileChange(
|
||||
request_id=request.request_id,
|
||||
thread_id=str(request.params.get("threadId", self.thread_id)),
|
||||
turn_id=str(request.params.get("turnId", "")),
|
||||
item_id=str(request.params.get("itemId", "")),
|
||||
reason=self._optional_str(request.params.get("reason")),
|
||||
grant_root=self._optional_str(request.params.get("grantRoot")),
|
||||
)
|
||||
if request.method != "item/tool/call":
|
||||
return None
|
||||
tool_name = request.params.get("tool")
|
||||
arguments = request.params.get("arguments", {})
|
||||
if not isinstance(tool_name, str):
|
||||
raise RuntimeError("tool call is missing a string tool name")
|
||||
if not isinstance(arguments, dict):
|
||||
raise RuntimeError(f"tool call arguments for {tool_name} must be an object")
|
||||
return PendingFunctionToolCall(
|
||||
request_id=request.request_id,
|
||||
thread_id=str(request.params.get("threadId", self.thread_id)),
|
||||
turn_id=str(request.params.get("turnId", "")),
|
||||
call_id=str(request.params.get("callId", "")),
|
||||
tool_name=tool_name,
|
||||
arguments=arguments,
|
||||
)
|
||||
|
||||
async def _call_tool_approval(self, tool_call: PendingToolCall) -> ToolDecision | None:
|
||||
decision: Any = None
|
||||
if isinstance(tool_call, PendingFunctionToolCall):
|
||||
tool = self.function_tools.get(tool_call.tool_name)
|
||||
if tool is None:
|
||||
return None
|
||||
decision = tool.approve(tool_call)
|
||||
elif isinstance(tool_call, PendingCommandExecution):
|
||||
command_name = self._command_name(tool_call.command)
|
||||
policy = self.builtin_tool_policies.get(command_name)
|
||||
if policy is None:
|
||||
return None
|
||||
decision = policy(tool_call)
|
||||
elif isinstance(tool_call, PendingFileChange):
|
||||
return None
|
||||
if inspect.isawaitable(decision):
|
||||
decision = await decision
|
||||
if decision is None or isinstance(decision, ToolDecision):
|
||||
return decision
|
||||
raise TypeError(f"tool approval must return ToolDecision or None, got {type(decision)!r}")
|
||||
|
||||
async def _respond_to_command_approval(
|
||||
self,
|
||||
tool_call: PendingCommandExecution,
|
||||
*,
|
||||
for_session: bool,
|
||||
) -> None:
|
||||
await self.session.app_server_client.send_result(
|
||||
tool_call.request_id,
|
||||
{"decision": "acceptForSession" if for_session else "accept"},
|
||||
)
|
||||
|
||||
async def _respond_to_command_rejection(
|
||||
self,
|
||||
tool_call: PendingCommandExecution,
|
||||
*,
|
||||
cancel: bool,
|
||||
) -> None:
|
||||
await self.session.app_server_client.send_result(
|
||||
tool_call.request_id,
|
||||
{"decision": "cancel" if cancel else "decline"},
|
||||
)
|
||||
|
||||
async def _respond_to_command_override(
|
||||
self,
|
||||
tool_call: PendingCommandExecution,
|
||||
*,
|
||||
command: list[str],
|
||||
) -> None:
|
||||
await self.session.app_server_client.send_result(
|
||||
tool_call.request_id,
|
||||
{
|
||||
"decision": {
|
||||
"acceptWithCommandOverride": {
|
||||
"command": command,
|
||||
}
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
async def _respond_to_file_change_approval(
|
||||
self,
|
||||
tool_call: PendingFileChange,
|
||||
*,
|
||||
for_session: bool,
|
||||
) -> None:
|
||||
await self.session.app_server_client.send_result(
|
||||
tool_call.request_id,
|
||||
{"decision": "acceptForSession" if for_session else "accept"},
|
||||
)
|
||||
|
||||
async def _respond_to_file_change_rejection(
|
||||
self,
|
||||
tool_call: PendingFileChange,
|
||||
*,
|
||||
cancel: bool,
|
||||
) -> None:
|
||||
await self.session.app_server_client.send_result(
|
||||
tool_call.request_id,
|
||||
{"decision": "cancel" if cancel else "decline"},
|
||||
)
|
||||
|
||||
async def _reject_function_tool(self, tool_call: PendingFunctionToolCall, *, cancel: bool) -> None:
|
||||
action = "canceled" if cancel else "rejected"
|
||||
await self.submit_tool_result(
|
||||
tool_call,
|
||||
f"function tool {tool_call.tool_name} was {action} by the host",
|
||||
success=False,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _command_actions(value: Any) -> list[dict[str, Any]] | None:
|
||||
if isinstance(value, list) and all(isinstance(action, dict) for action in value):
|
||||
return value
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _optional_str(value: Any) -> str | None:
|
||||
return value if isinstance(value, str) else None
|
||||
|
||||
@staticmethod
|
||||
def _command_name(command: str | None) -> str:
|
||||
if not command:
|
||||
return "exec_command"
|
||||
return "write_stdin" if command.startswith("write_stdin:") else "exec_command"
|
||||
283
codex-sdk-v2/src/codex_sdk_v2/tools.py
Normal file
283
codex-sdk-v2/src/codex_sdk_v2/tools.py
Normal file
@@ -0,0 +1,283 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING, Any, Awaitable, Callable, ClassVar, Mapping, Sequence
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .pending_tool_calls import PendingCommandExecution
|
||||
from .pending_tool_calls import PendingFunctionToolCall
|
||||
from .pending_tool_calls import ToolDecision
|
||||
|
||||
if TYPE_CHECKING:
|
||||
BuiltinApprovalPolicy = Callable[
|
||||
[PendingCommandExecution],
|
||||
Awaitable[ToolDecision | None] | ToolDecision | None,
|
||||
]
|
||||
else:
|
||||
BuiltinApprovalPolicy = Callable[[Any], Awaitable[Any] | Any]
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class BuiltinToolSpec:
|
||||
tool_name: str
|
||||
|
||||
|
||||
class Tool:
|
||||
pass
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class ConfiguredBuiltinTool(Tool):
|
||||
tool_type: type["BuiltinTool"]
|
||||
approval_policy: BuiltinApprovalPolicy | None = None
|
||||
|
||||
def builtin_spec(self) -> BuiltinToolSpec:
|
||||
return self.tool_type.builtin_spec()
|
||||
|
||||
|
||||
class BuiltinTool(Tool):
|
||||
codex_builtin_tool: ClassVar[str]
|
||||
|
||||
@classmethod
|
||||
def builtin_spec(cls) -> BuiltinToolSpec:
|
||||
tool_name = getattr(cls, "codex_builtin_tool", None)
|
||||
if not tool_name:
|
||||
raise TypeError(f"{cls.__name__} must define codex_builtin_tool")
|
||||
return BuiltinToolSpec(tool_name=tool_name)
|
||||
|
||||
@classmethod
|
||||
def with_approval_policy(
|
||||
cls,
|
||||
*,
|
||||
policy: BuiltinApprovalPolicy,
|
||||
) -> ConfiguredBuiltinTool:
|
||||
return ConfiguredBuiltinTool(tool_type=cls, approval_policy=policy)
|
||||
|
||||
|
||||
class FunctionTool(Tool):
|
||||
name: ClassVar[str]
|
||||
description: ClassVar[str]
|
||||
input_schema: ClassVar[dict[str, Any]]
|
||||
|
||||
@classmethod
|
||||
def dynamic_tool_spec(cls) -> dict[str, Any]:
|
||||
name = getattr(cls, "name", None)
|
||||
description = getattr(cls, "description", None)
|
||||
input_schema = getattr(cls, "input_schema", None)
|
||||
if not name:
|
||||
raise TypeError(f"{cls.__name__} must define name")
|
||||
if not description:
|
||||
raise TypeError(f"{cls.__name__} must define description")
|
||||
if not isinstance(input_schema, dict):
|
||||
raise TypeError(f"{cls.__name__} must define input_schema as a dict")
|
||||
return {
|
||||
"name": name,
|
||||
"description": description,
|
||||
"inputSchema": input_schema,
|
||||
}
|
||||
|
||||
async def approve(self, call: PendingFunctionToolCall) -> ToolDecision | None:
|
||||
return None
|
||||
|
||||
def instructions(self) -> str | None:
|
||||
return None
|
||||
|
||||
async def run(self, arguments: Mapping[str, Any]) -> Any:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class ExecCommand(BuiltinTool):
|
||||
codex_builtin_tool = "exec_command"
|
||||
|
||||
|
||||
class WriteStdin(BuiltinTool):
|
||||
codex_builtin_tool = "write_stdin"
|
||||
|
||||
|
||||
class Shell(BuiltinTool):
|
||||
codex_builtin_tool = "shell"
|
||||
|
||||
|
||||
class UpdatePlan(BuiltinTool):
|
||||
codex_builtin_tool = "update_plan"
|
||||
|
||||
|
||||
class RequestUserInput(BuiltinTool):
|
||||
codex_builtin_tool = "request_user_input"
|
||||
|
||||
|
||||
class ApplyPatch(BuiltinTool):
|
||||
codex_builtin_tool = "apply_patch"
|
||||
|
||||
|
||||
class WebSearch(BuiltinTool):
|
||||
codex_builtin_tool = "web_search"
|
||||
|
||||
|
||||
class ViewImage(BuiltinTool):
|
||||
codex_builtin_tool = "view_image"
|
||||
|
||||
|
||||
class SearchToolBm25(BuiltinTool):
|
||||
codex_builtin_tool = "search_tool_bm25"
|
||||
|
||||
|
||||
class ReadFile(BuiltinTool):
|
||||
codex_builtin_tool = "read_file"
|
||||
|
||||
|
||||
class ListDir(BuiltinTool):
|
||||
codex_builtin_tool = "list_dir"
|
||||
|
||||
|
||||
class GrepFiles(BuiltinTool):
|
||||
codex_builtin_tool = "grep_files"
|
||||
|
||||
|
||||
class ListMcpResources(BuiltinTool):
|
||||
codex_builtin_tool = "list_mcp_resources"
|
||||
|
||||
|
||||
class ListMcpResourceTemplates(BuiltinTool):
|
||||
codex_builtin_tool = "list_mcp_resource_templates"
|
||||
|
||||
|
||||
class ReadMcpResource(BuiltinTool):
|
||||
codex_builtin_tool = "read_mcp_resource"
|
||||
|
||||
|
||||
class SpawnAgent(BuiltinTool):
|
||||
codex_builtin_tool = "spawn_agent"
|
||||
|
||||
|
||||
class SendInput(BuiltinTool):
|
||||
codex_builtin_tool = "send_input"
|
||||
|
||||
|
||||
class ResumeAgent(BuiltinTool):
|
||||
codex_builtin_tool = "resume_agent"
|
||||
|
||||
|
||||
class Wait(BuiltinTool):
|
||||
codex_builtin_tool = "wait"
|
||||
|
||||
|
||||
class CloseAgent(BuiltinTool):
|
||||
codex_builtin_tool = "close_agent"
|
||||
|
||||
|
||||
class SpawnAgentsOnCsv(BuiltinTool):
|
||||
codex_builtin_tool = "spawn_agents_on_csv"
|
||||
|
||||
|
||||
class JsRepl(BuiltinTool):
|
||||
codex_builtin_tool = "js_repl"
|
||||
|
||||
|
||||
class JsReplReset(BuiltinTool):
|
||||
codex_builtin_tool = "js_repl_reset"
|
||||
|
||||
|
||||
class Artifacts(BuiltinTool):
|
||||
codex_builtin_tool = "artifacts"
|
||||
|
||||
|
||||
class ReportAgentJobResult(BuiltinTool):
|
||||
codex_builtin_tool = "report_agent_job_result"
|
||||
|
||||
|
||||
class TestSyncTool(BuiltinTool):
|
||||
codex_builtin_tool = "test_sync_tool"
|
||||
|
||||
|
||||
ALL_BUILTIN_TOOLS: tuple[type[BuiltinTool], ...] = (
|
||||
ExecCommand,
|
||||
WriteStdin,
|
||||
Shell,
|
||||
UpdatePlan,
|
||||
RequestUserInput,
|
||||
ApplyPatch,
|
||||
WebSearch,
|
||||
ViewImage,
|
||||
SearchToolBm25,
|
||||
ReadFile,
|
||||
ListDir,
|
||||
GrepFiles,
|
||||
ListMcpResources,
|
||||
ListMcpResourceTemplates,
|
||||
ReadMcpResource,
|
||||
SpawnAgent,
|
||||
SendInput,
|
||||
ResumeAgent,
|
||||
Wait,
|
||||
CloseAgent,
|
||||
SpawnAgentsOnCsv,
|
||||
JsRepl,
|
||||
JsReplReset,
|
||||
Artifacts,
|
||||
ReportAgentJobResult,
|
||||
TestSyncTool,
|
||||
)
|
||||
|
||||
|
||||
def builtin_tools(tools: Sequence[Tool | type[Tool]]) -> tuple[list[str], dict[str, BuiltinApprovalPolicy]]:
|
||||
names: list[str] = []
|
||||
policies: dict[str, BuiltinApprovalPolicy] = {}
|
||||
seen: set[str] = set()
|
||||
for tool in tools:
|
||||
resolved = tool if isinstance(tool, ConfiguredBuiltinTool) else None
|
||||
tool_type = resolved.tool_type if resolved is not None else tool if isinstance(tool, type) else type(tool)
|
||||
if not isinstance(tool_type, type) or not issubclass(tool_type, BuiltinTool):
|
||||
continue
|
||||
name = tool_type.builtin_spec().tool_name
|
||||
if name not in seen:
|
||||
seen.add(name)
|
||||
names.append(name)
|
||||
if resolved is not None and resolved.approval_policy is not None:
|
||||
policies[name] = resolved.approval_policy
|
||||
return names, policies
|
||||
|
||||
|
||||
def function_tools(tools: Sequence[Tool | type[Tool]]) -> list[FunctionTool]:
|
||||
resolved: list[FunctionTool] = []
|
||||
seen: set[str] = set()
|
||||
builtin_names = {tool.builtin_spec().tool_name for tool in ALL_BUILTIN_TOOLS}
|
||||
for tool in tools:
|
||||
if isinstance(tool, ConfiguredBuiltinTool):
|
||||
continue
|
||||
tool_instance = tool() if isinstance(tool, type) else tool
|
||||
if not isinstance(tool_instance, FunctionTool):
|
||||
continue
|
||||
tool_name = type(tool_instance).dynamic_tool_spec()["name"]
|
||||
if tool_name in builtin_names:
|
||||
raise ValueError(f"function tool name collides with codex built-in: {tool_name}")
|
||||
if tool_name in seen:
|
||||
raise ValueError(f"duplicate function tool name: {tool_name}")
|
||||
seen.add(tool_name)
|
||||
resolved.append(tool_instance)
|
||||
return resolved
|
||||
|
||||
|
||||
def tool_instruction_fragments(tools: Sequence[Tool | type[Tool]]) -> list[str]:
|
||||
fragments: list[str] = []
|
||||
seen: set[str] = set()
|
||||
|
||||
for tool in tools:
|
||||
if isinstance(tool, ConfiguredBuiltinTool):
|
||||
continue
|
||||
|
||||
tool_instance = tool() if isinstance(tool, type) else tool
|
||||
if isinstance(tool_instance, FunctionTool):
|
||||
fragment = tool_instance.instructions()
|
||||
key = f"function:{type(tool_instance).dynamic_tool_spec()['name']}"
|
||||
if fragment and key not in seen:
|
||||
seen.add(key)
|
||||
fragments.append(fragment)
|
||||
|
||||
return fragments
|
||||
|
||||
|
||||
DEFAULT_TOOLS: tuple[type[BuiltinTool], ...] = (
|
||||
ExecCommand,
|
||||
WriteStdin,
|
||||
)
|
||||
91
codex-sdk-v2/uv.lock
generated
Normal file
91
codex-sdk-v2/uv.lock
generated
Normal file
@@ -0,0 +1,91 @@
|
||||
version = 1
|
||||
revision = 3
|
||||
requires-python = ">=3.11"
|
||||
|
||||
[[package]]
|
||||
name = "anyio"
|
||||
version = "4.12.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "idna" },
|
||||
{ name = "typing-extensions", marker = "python_full_version < '3.13'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/96/f0/5eb65b2bb0d09ac6776f2eb54adee6abe8228ea05b20a5ad0e4945de8aac/anyio-4.12.1.tar.gz", hash = "sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703", size = 228685, upload-time = "2026-01-06T11:45:21.246Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "certifi"
|
||||
version = "2026.2.25"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/af/2d/7bf41579a8986e348fa033a31cdd0e4121114f6bce2457e8876010b092dd/certifi-2026.2.25.tar.gz", hash = "sha256:e887ab5cee78ea814d3472169153c2d12cd43b14bd03329a39a9c6e2e80bfba7", size = 155029, upload-time = "2026-02-25T02:54:17.342Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/9a/3c/c17fb3ca2d9c3acff52e30b309f538586f9f5b9c9cf454f3845fc9af4881/certifi-2026.2.25-py3-none-any.whl", hash = "sha256:027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa", size = 153684, upload-time = "2026-02-25T02:54:15.766Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codex-sdk-v2"
|
||||
version = "0.1.0"
|
||||
source = { editable = "." }
|
||||
dependencies = [
|
||||
{ name = "httpx" },
|
||||
]
|
||||
|
||||
[package.metadata]
|
||||
requires-dist = [{ name = "httpx", specifier = ">=0.27.0" }]
|
||||
|
||||
[[package]]
|
||||
name = "h11"
|
||||
version = "0.16.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload-time = "2025-04-24T03:35:25.427Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "httpcore"
|
||||
version = "1.0.9"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "certifi" },
|
||||
{ name = "h11" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "httpx"
|
||||
version = "0.28.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "anyio" },
|
||||
{ name = "certifi" },
|
||||
{ name = "httpcore" },
|
||||
{ name = "idna" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "idna"
|
||||
version = "3.11"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "typing-extensions"
|
||||
version = "4.15.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" },
|
||||
]
|
||||
482
codex_sdk_2_prototype_summary.md
Normal file
482
codex_sdk_2_prototype_summary.md
Normal file
@@ -0,0 +1,482 @@
|
||||
# codex-sdk-v2 Prototype Summary
|
||||
|
||||
## 1. Goals of the prototype
|
||||
|
||||
This prototype was meant to answer one core question:
|
||||
|
||||
Can we reuse Codex as the runtime and tool-execution engine while preserving the Universal Computer model in which the host SDK owns orchestration, configuration, approvals, and provider access?
|
||||
|
||||
More concretely, the prototype set out to prove that we can:
|
||||
|
||||
- run Codex through app-server instead of re-implementing tool behavior in Python
|
||||
- let the host SDK own Responses API credentials and transport
|
||||
- choose which Codex built-in tools are enabled from the SDK
|
||||
- add host-defined function tools in a Universal Computer-style API
|
||||
- keep the SDK in the loop for all tool calls via pending tool call pause-points
|
||||
- support programmatic approval, defer, reject, and argument/command rewriting
|
||||
- start moving away from monolithic prompt construction toward capability-scoped instruction composition
|
||||
- preserve the Universal Computer ergonomics where possible while adopting Codex’s runtime model
|
||||
|
||||
The result is a successful prototype, with one important caveat:
|
||||
|
||||
The current transport is still bridge-based delegation, not the final event-by-event host-delegation model from the RFC.
|
||||
|
||||
### Prototype architecture
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph Host["Host SDK"]
|
||||
SDK["AgentSDKv2"]
|
||||
Bridge["Host Responses bridge"]
|
||||
Provider["Upstream provider"]
|
||||
end
|
||||
|
||||
subgraph Runtime["Runtime"]
|
||||
AppServer["Codex app-server"]
|
||||
Codex["Codex runtime"]
|
||||
end
|
||||
|
||||
SDK --> AppServer
|
||||
AppServer --> Codex
|
||||
Codex --> Bridge
|
||||
Bridge --> Provider
|
||||
```
|
||||
|
||||
## 2. Changes made in Codex
|
||||
|
||||
To support the prototype, Codex needed a handful of structural changes.
|
||||
|
||||
### App-server and thread/session configuration
|
||||
|
||||
We added new thread-start knobs so the SDK can shape the runtime explicitly:
|
||||
|
||||
- a thread-scoped delegation configuration pointing Codex at a host-managed Responses bridge
|
||||
- a built-in tool allowlist so the SDK can choose which Codex tools are exposed
|
||||
- a manual tool execution mode so built-in tool calls can become host-visible pause-points
|
||||
|
||||
This is a meaningful shift in ownership. Before, tool availability and model transport were mostly internal to Codex. With this prototype, the SDK becomes an active configuration authority.
|
||||
|
||||
### Model transport delegation
|
||||
|
||||
Codex was extended so a thread can target a host-local bridge for `/v1/responses` traffic instead of always talking directly to the upstream provider.
|
||||
|
||||
That required:
|
||||
|
||||
- per-thread provider override behavior
|
||||
- app-server awareness of the delegation configuration
|
||||
- explicit startup signaling so the SDK can verify that delegation was actually applied
|
||||
|
||||
This is enough for the prototype, but it is still a proxy model rather than true delegated transport.
|
||||
|
||||
In the prototype:
|
||||
|
||||
- Codex still constructs a provider-shaped HTTP request
|
||||
- Codex still opens the streaming HTTP connection itself
|
||||
- that request is aimed at the host-owned bridge rather than directly at OpenAI
|
||||
- the host bridge then makes the real upstream HTTP request, injects `Authorization`, and streams the provider response bytes back down to Codex unchanged
|
||||
|
||||
So the host really is performing the upstream HTTP request in this prototype, but Codex still thinks it is talking to a Responses-compatible HTTP endpoint. The transport contract is still HTTP proxying, not app-server-level request delegation.
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant SDK as "Host SDK"
|
||||
participant Codex as "Codex runtime"
|
||||
participant Bridge as "Host bridge"
|
||||
participant Provider as "Model provider"
|
||||
|
||||
SDK->>Codex: thread/start + sdkDelegation.bridgeUrl
|
||||
SDK->>Codex: turn/start
|
||||
Codex->>Bridge: HTTP POST /v1/responses
|
||||
Bridge->>Provider: HTTP POST /v1/responses + auth
|
||||
Provider-->>Bridge: streaming response
|
||||
Bridge-->>Codex: streaming response
|
||||
Codex-->>SDK: app-server events + tool pause-points
|
||||
```
|
||||
|
||||
### Built-in tool selection and host-visible control flow
|
||||
|
||||
Codex already had built-in tools, but the prototype needed the SDK to decide which ones are present on a thread.
|
||||
|
||||
That led to:
|
||||
|
||||
- explicit built-in tool filtering at thread startup
|
||||
- manual execution mode for unified-exec so the SDK can pause on built-in calls before Codex executes them
|
||||
- command-override support for unified-exec so the SDK can replace a proposed command rather than only approve or reject it
|
||||
|
||||
This is a real architectural improvement for host control, but it also increases surface area around tool semantics and approval behavior.
|
||||
|
||||
### Prompt composition changes
|
||||
|
||||
Codex previously relied on a fairly monolithic base prompt. To support tool-conditional guidance, we introduced capability-scoped prompt fragments for built-ins and composed them into the session base instructions.
|
||||
|
||||
The important structural change here is not just “more prompt files.” It is that built-in tool guidance is no longer conceptually part of one indivisible system prompt. It is now attached to enabled capabilities.
|
||||
|
||||
That is the right direction, but it introduces a maintenance obligation: prompt behavior now depends on both model metadata and tool configuration, so drift between those layers becomes a real risk.
|
||||
|
||||
### Risks and maintenance challenges introduced in Codex
|
||||
|
||||
- The bridge-based delegation path is a temporary architecture and will be easy to over-invest in if we are not disciplined.
|
||||
- Tool semantics now exist at the intersection of prompt composition, tool registry configuration, and approval handling, which increases the chance of subtle mismatches.
|
||||
- Manual built-in pause-points make the runtime more host-friendly, but they also make turn progression and resume behavior more stateful and therefore more failure-prone.
|
||||
- Built-in capability prompts now need to stay aligned with the actual available tool surface. If we add tools and forget to add or adjust capability fragments, the prompt can become misleading again.
|
||||
- The prototype only exercises a narrow built-in set, mainly unified-exec. Expanding to the full Codex built-in surface will add complexity.
|
||||
|
||||
## 3. Changes from the original Universal Computer model
|
||||
|
||||
To make Universal Computer work in this world, we had to reshape several assumptions from the original package.
|
||||
|
||||
### Tool implementation ownership
|
||||
|
||||
Original Universal Computer treated the Python SDK as the home of tool execution for built-ins like filesystem and shell behavior.
|
||||
|
||||
In the new world:
|
||||
|
||||
- Codex owns built-in tool execution
|
||||
- the SDK only enables or disables built-ins and participates in approvals/control flow
|
||||
- host-defined tools remain host-executed, but they are expressed as Codex dynamic tools under the hood
|
||||
|
||||
This is probably the biggest philosophical change in the whole prototype.
|
||||
|
||||
### Plugin architecture became capability architecture
|
||||
|
||||
Universal Computer plugins mixed together:
|
||||
|
||||
- tool groups
|
||||
- instructions
|
||||
- manifest mutation
|
||||
- request shaping
|
||||
|
||||
For the prototype, that concept was reintroduced as capabilities:
|
||||
|
||||
- a capability exposes a single `tools()` method
|
||||
- it can contribute instructions
|
||||
- it can mutate the manifest
|
||||
|
||||
The built-in/function distinction is now internal to the SDK rather than part of the public composition API.
|
||||
|
||||
### Approval and execution flow
|
||||
|
||||
Original Universal Computer exposed a very host-centric tool call loop. We restored that shape, but the semantics changed:
|
||||
|
||||
- built-in Codex tools now pause for host approval rather than being host-executed
|
||||
- host function tools are still host-executed
|
||||
- both are surfaced through one pending tool call abstraction
|
||||
|
||||
This preserves the ergonomics while changing the underlying runtime ownership model.
|
||||
|
||||
### Session instruction model
|
||||
|
||||
Universal Computer previously had a stronger notion of separate base, developer, and user instructions.
|
||||
|
||||
In the prototype:
|
||||
|
||||
- `base_instructions` remains a replacement channel
|
||||
- `developer_instructions` remains an additive channel
|
||||
- `user_instructions` was removed rather than carrying forward a misleading prefix-based approximation
|
||||
|
||||
That is a simplification, but also an admission that the old user-instructions shape was not yet properly mapped.
|
||||
|
||||
### Transport and backend model
|
||||
|
||||
Universal Computer’s original long-term direction is backend-agnostic remote execution.
|
||||
|
||||
The prototype narrows that significantly:
|
||||
|
||||
- it currently uses a local attached-process backend
|
||||
- the SDK owns a local bridge
|
||||
- the “copy Codex binary into a destination container” story is not yet real
|
||||
|
||||
This was the right tradeoff for a prototype, but it means the transport/backend layer is still far from feature-complete.
|
||||
|
||||
## 4. Remaining work to fully port the Universal Computer paradigm
|
||||
|
||||
There is still substantial work left before this becomes a full Universal Computer-on-Codex implementation.
|
||||
|
||||
### Replace bridge-based delegation with true app-server delegation
|
||||
|
||||
The RFC’s intended architecture is:
|
||||
|
||||
- Codex prepares the upstream model request
|
||||
- app-server emits host-directed delegation events
|
||||
- the host SDK makes the provider call
|
||||
- the host streams upstream events back into Codex
|
||||
|
||||
The prototype does not do that yet. It uses a bridge/proxy instead.
|
||||
|
||||
The difference matters:
|
||||
|
||||
- in the prototype bridge model, Codex speaks HTTP to a Responses-compatible endpoint and the host pretends to be that endpoint
|
||||
- in the intended full-delegation model, Codex does not speak provider HTTP at all; it emits structured app-server events and the SDK owns the provider call lifecycle explicitly
|
||||
|
||||
Said another way:
|
||||
|
||||
- today, the host owns the real upstream HTTP request, but Codex still owns the HTTP client behavior and stream shape it expects to speak
|
||||
- in the future design, the host owns both the upstream HTTP request and the transport contract between Codex and the host
|
||||
|
||||
That future-state is what unlocks:
|
||||
|
||||
- provider switching without pretending every provider is a Responses-compatible bridge
|
||||
- clean host-side request persistence and replay
|
||||
- first-class interception, cancellation, and routing at the SDK layer
|
||||
- a better multi-container story because the host is in the loop at the app-server event layer rather than only behind an HTTP shim
|
||||
|
||||
This is the most important architectural gap to close.
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant SDK as "Host SDK"
|
||||
participant AppServer as "Codex app-server"
|
||||
participant Codex as "Codex runtime"
|
||||
participant Provider as "Model provider"
|
||||
|
||||
SDK->>AppServer: thread/start
|
||||
SDK->>AppServer: turn/start
|
||||
AppServer->>SDK: model/request
|
||||
SDK->>Provider: provider request + auth
|
||||
Provider-->>SDK: streaming provider events
|
||||
SDK-->>AppServer: model/streamEvent*
|
||||
AppServer->>Codex: normalized turn progression
|
||||
Codex-->>SDK: tool approvals / pending tool calls / output
|
||||
```
|
||||
|
||||
### Generate typed SDK protocol models
|
||||
|
||||
The prototype still hand-rolls Python-side JSON-RPC parsing and event handling.
|
||||
|
||||
To make this production-worthy we should generate typed SDK protocol models from the Rust app-server source of truth, including:
|
||||
|
||||
- Python Pydantic models for app-server requests, responses, and notifications
|
||||
- publishing those generated Python models as a dedicated package to a PyPI repository
|
||||
- pinning that package version to Codex releases so SDK/runtime compatibility is explicit
|
||||
- continued TypeScript generation from the same Rust source
|
||||
- a clearer separation between wire-level protocol models and higher-level SDK runtime objects like pending tool calls
|
||||
|
||||
This matters for correctness and maintainability. As the app-server surface grows, hand-maintained Python shapes will drift.
|
||||
|
||||
### Reintroduce durable rollout ownership on the host
|
||||
|
||||
The prototype has in-memory pending tool calls, but it does not yet fully restore the Universal Computer model where the host owns durable rollout state and can cleanly pause, spin down, and resume later.
|
||||
|
||||
To finish that work we need:
|
||||
|
||||
- stable serialization of unresolved pending tool calls
|
||||
- robust replay/rehydration for built-in approvals and function-tool calls
|
||||
- host-owned transcript and turn state as the source of truth
|
||||
|
||||
### Complete the capability system
|
||||
|
||||
The prototype capability model is intentionally small. A fuller port would need:
|
||||
|
||||
- richer capability composition
|
||||
- skill-like capability bundles
|
||||
- memory capability support
|
||||
- manifest-processing conventions
|
||||
- clearer precedence rules when multiple capabilities contribute instructions or tools
|
||||
|
||||
### Provider abstraction on the host
|
||||
|
||||
Universal Computer wants host-side multi-provider support. The prototype still assumes an OpenAI-shaped host bridge.
|
||||
|
||||
A real port needs:
|
||||
|
||||
- provider-neutral request abstractions on the host
|
||||
- OpenAI and Anthropic support at minimum
|
||||
- streaming normalization back into Codex’s expectations
|
||||
|
||||
### Remote backend support
|
||||
|
||||
The prototype does not yet solve:
|
||||
|
||||
- pinned Codex version acquisition
|
||||
- copying the right Codex binary to the destination environment
|
||||
- attached-process support across all intended backends
|
||||
- reliable host-container transport for app-server
|
||||
|
||||
That backend work is core to the Universal Computer value proposition and still remains.
|
||||
|
||||
### Complete prompt/capability alignment
|
||||
|
||||
We now have the beginning of built-in capability prompt composition, but not the final state.
|
||||
|
||||
Still needed:
|
||||
|
||||
- expand capability fragments beyond the current narrow built-in set
|
||||
- unify prompt composition conventions across all built-ins
|
||||
- make capability ownership and prompt ownership obvious and durable
|
||||
|
||||
## 5. Risks and challenges to productionize
|
||||
|
||||
### Architecture risk
|
||||
|
||||
The biggest risk is shipping too much around the bridge transport and then having to unwind it when moving to true full delegation mode. The bridge is a useful prototype tool, but it is not the right final abstraction.
|
||||
|
||||
### State and resume complexity
|
||||
|
||||
Host-visible pause-points are powerful, but productionizing them means solving:
|
||||
|
||||
- durable pending tool state
|
||||
- replay correctness
|
||||
- no double-execution on resume
|
||||
- clear ownership of partially completed turns
|
||||
|
||||
This is tricky and easy to get subtly wrong.
|
||||
|
||||
### Prompt and capability drift
|
||||
|
||||
We are now explicitly tying enabled capabilities to prompt sections. That is a better model, but it creates a new kind of maintenance burden:
|
||||
|
||||
- adding or removing a tool may require prompt updates
|
||||
- prompt fragments may drift from actual runtime behavior
|
||||
- capability bundles may accumulate overlapping or contradictory instructions
|
||||
|
||||
### Cross-runtime compatibility
|
||||
|
||||
Codex is a Rust runtime with strong internal assumptions. Universal Computer wants host orchestration across heterogeneous backends and providers. The seam between those two worlds needs to stay disciplined or the SDK will slowly become a shadow runtime.
|
||||
|
||||
### Operational complexity
|
||||
|
||||
Productionizing this means dealing with:
|
||||
|
||||
- version pinning
|
||||
- binary distribution
|
||||
- backend compatibility
|
||||
- network transport
|
||||
- auth boundaries
|
||||
- observability for both the host SDK and the remote Codex runtime
|
||||
|
||||
That is a larger operational surface than either original system had in isolation.
|
||||
|
||||
## 6. Suggested engineering roadmap
|
||||
|
||||
### Phase 0: Generated protocol models and package distribution
|
||||
|
||||
Goals:
|
||||
|
||||
- frontload generated Python protocol models before broader SDK implementation
|
||||
- make app-server wire types a versioned dependency rather than a copied internal detail
|
||||
- tie the Python protocol package version explicitly to Codex releases
|
||||
|
||||
Deliverables:
|
||||
|
||||
- generated Python Pydantic models for app-server protocol payloads
|
||||
- continued TypeScript generation from the same Rust source
|
||||
- published Python protocol package in the target PyPI repository
|
||||
- explicit versioning and compatibility policy between Codex and the protocol package
|
||||
|
||||
### Phase 1: Architecture foundation and protocol contracts
|
||||
|
||||
Goals:
|
||||
|
||||
- stabilize the `codex-sdk-v2` API
|
||||
- keep capabilities as the public composition abstraction
|
||||
- expand and clean up built-in capability prompt composition
|
||||
- remove obviously prototype-only rough edges
|
||||
|
||||
Deliverables:
|
||||
|
||||
- clear capability API
|
||||
- consistent base/developer instruction semantics
|
||||
- better example coverage
|
||||
- tighter prompt composition ownership rules
|
||||
|
||||
### Phase 2: Replace bridge delegation with real app-server full delegation
|
||||
|
||||
Goals:
|
||||
|
||||
- make the host SDK the true owner of provider transport
|
||||
- stop relying on a bridge/proxy architecture
|
||||
|
||||
Deliverables:
|
||||
|
||||
- new app-server delegation events for model requests and streamed upstream events
|
||||
- generated protocol types covering the new delegation events
|
||||
- host-side transport driver
|
||||
- Codex-side external stream ingestion
|
||||
- explicit cancellation and failure semantics
|
||||
|
||||
This phase is the real architectural transition.
|
||||
|
||||
### Phase 3: Host-owned rollout and durable pause/resume
|
||||
|
||||
Goals:
|
||||
|
||||
- restore the original Universal Computer durability model
|
||||
- make pending tool calls and turn state resumable across process restarts
|
||||
|
||||
Deliverables:
|
||||
|
||||
- serialized pending tool state
|
||||
- replay-safe resume logic
|
||||
- host-owned rollout persistence as source of truth
|
||||
- strong idempotency guarantees where possible
|
||||
|
||||
### Phase 4: Backend generalization
|
||||
|
||||
Goals:
|
||||
|
||||
- move beyond local attached-process execution
|
||||
- support the Universal Computer backend model in earnest
|
||||
|
||||
Deliverables:
|
||||
|
||||
- pinned Codex version management
|
||||
- binary acquisition and staging
|
||||
- attached-process support across all supported backends
|
||||
- robust host-runtime transport recommendations and implementations
|
||||
|
||||
### Phase 5: Provider and capability expansion
|
||||
|
||||
Goals:
|
||||
|
||||
- make the host SDK genuinely multi-provider
|
||||
- expand the capability model to cover more of the original Universal Computer ecosystem
|
||||
|
||||
Deliverables:
|
||||
|
||||
- provider abstraction for OpenAI and Anthropic
|
||||
- richer capability bundles
|
||||
- memory/skills-style capabilities
|
||||
- better story for apps and connector-driven capabilities
|
||||
|
||||
### Phase 6: Hardening and production readiness
|
||||
|
||||
Goals:
|
||||
|
||||
- make the system operable and debuggable in real workloads
|
||||
|
||||
Deliverables:
|
||||
|
||||
- observability across host and runtime
|
||||
- clear failure semantics
|
||||
- migration strategy from Universal Computer package users
|
||||
- load, reliability, and recovery testing
|
||||
- documentation and support model
|
||||
|
||||
### Roadmap at a glance
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
P0["Phase 0<br/>Generated protocol package"]
|
||||
P1["Phase 1<br/>Architecture + protocol contracts"]
|
||||
P2["Phase 2<br/>Full delegation transport"]
|
||||
P3["Phase 3<br/>Host-owned rollout + resume"]
|
||||
P4["Phase 4<br/>Capabilities + backend generalization"]
|
||||
P5["Phase 5<br/>Provider expansion"]
|
||||
P6["Phase 6<br/>Production hardening"]
|
||||
|
||||
P0 --> P1 --> P2 --> P3 --> P4 --> P5 --> P6
|
||||
```
|
||||
|
||||
## Final assessment
|
||||
|
||||
This prototype successfully demonstrates that Codex can act as the execution/runtime layer for a Universal Computer-style SDK without forcing the SDK to re-implement Codex’s built-in tools in Python.
|
||||
|
||||
That is the right strategic result.
|
||||
|
||||
At the same time, it is still a prototype in the most important sense:
|
||||
|
||||
- the delegation transport is not final
|
||||
- durability is not final
|
||||
- backend generality is not final
|
||||
|
||||
The good news is that the prototype reduced uncertainty in the right places. The remaining work is substantial, but it now looks like engineering, not research.
|
||||
884
universal-computer-codex-app-server-rfc.md
Normal file
884
universal-computer-codex-app-server-rfc.md
Normal file
@@ -0,0 +1,884 @@
|
||||
# RFC: Host-Delegated Codex App-Server for Universal Computer
|
||||
|
||||
## Summary
|
||||
|
||||
Universal Computer already has the right high-level instinct: the host SDK should own orchestration, credentials, approvals, persistence, and backend selection, while the remote runtime should own local execution against the target filesystem and sandbox.
|
||||
|
||||
Today, those responsibilities are split awkwardly. Universal Computer's Python SDK builds the Responses request, defines tools in Python, and interprets raw model output into actionable tool calls. Codex app-server, by contrast, already has a richer Rust-native execution engine, tool surface, approval model, and event model, but it assumes Codex itself is the party speaking to the Responses API and, in normal operation, the party managing rollout persistence.
|
||||
|
||||
The proposal is to add a new **full delegation mode** to codex app-server:
|
||||
|
||||
- Codex still runs inside the destination container or locally.
|
||||
- Codex still owns prompt assembly, tool registration, tool execution, approvals, and turn semantics.
|
||||
- The **host SDK** becomes the sole party that talks to the Responses API.
|
||||
- The **host SDK** also becomes the source of truth for rollout persistence.
|
||||
- The app-server protocol grows a small set of new server-initiated requests and client responses so Codex can ask the host to create, stream, cancel, and finalize upstream model requests.
|
||||
|
||||
This gives Universal Computer what it wants: reuse of Codex's Rust-native tool/runtime behavior without giving up host-side orchestration, multi-container routing, approval policy, or host-managed conversation state.
|
||||
|
||||
## Context
|
||||
|
||||
### What Universal Computer does well today
|
||||
|
||||
From the Universal Computer side, the architecture is already clean:
|
||||
|
||||
- `Agent` owns declarative configuration:
|
||||
- `base_instructions`
|
||||
- `developer_instructions`
|
||||
- `user_instructions`
|
||||
- `plugins`
|
||||
- `tools`
|
||||
- `sampling_params`
|
||||
- `TaskContext` owns startup, manifest injection, snapshotting, and session binding.
|
||||
- `Task` is the durable rollout object:
|
||||
- it stores context
|
||||
- it stores resumable session state
|
||||
- it streams raw Responses events
|
||||
- it pauses when tool calls are pending
|
||||
- plugins are more than tool bundles:
|
||||
- they can contribute instructions
|
||||
- mutate context
|
||||
- mutate sampling params
|
||||
- mutate manifest/session setup
|
||||
|
||||
That is an important constraint: Universal Computer is not merely a remote shell. It is a **host orchestration framework**.
|
||||
|
||||
### What Codex app-server already provides
|
||||
|
||||
Codex app-server is already surprisingly close to what we need:
|
||||
|
||||
- thread and turn lifecycle APIs
|
||||
- streaming turn/item notifications
|
||||
- server-initiated approval requests
|
||||
- dynamic tools
|
||||
- apps/plugins/skills integration
|
||||
- configurable developer instructions and other session settings
|
||||
- client-managed notification transport
|
||||
|
||||
But the current model assumes:
|
||||
|
||||
- Codex itself makes the Responses API request
|
||||
- Codex owns the upstream stream lifecycle
|
||||
- Codex is the natural home for thread persistence
|
||||
|
||||
That assumption is the seam that needs to change.
|
||||
|
||||
## Design principle
|
||||
|
||||
The right boundary is:
|
||||
|
||||
- **Host SDK owns external orchestration**
|
||||
- **Remote Codex owns local execution semantics**
|
||||
|
||||
More concretely:
|
||||
|
||||
### Host-owned
|
||||
|
||||
- Responses API transport and credentials
|
||||
- rollout persistence
|
||||
- backend selection
|
||||
- multi-container routing
|
||||
- approval UX and policy
|
||||
- high-level session bootstrap
|
||||
|
||||
### Codex-owned
|
||||
|
||||
- instruction compilation
|
||||
- model request planning
|
||||
- tool schema materialization
|
||||
- tool execution against the live workspace/container
|
||||
- item/turn state machine
|
||||
- normalization of model events into Codex semantics
|
||||
|
||||
This is the key pushback: the host should not have to reconstruct Codex's prompts, tool schemas, or internal turn loop. If we force the SDK to do that, we reintroduce the exact duplication you want to eliminate.
|
||||
|
||||
## Goals
|
||||
|
||||
1. Support running Codex app-server in a target container or locally.
|
||||
2. Allow the host SDK to be the only component that talks to the Responses API.
|
||||
3. Preserve Codex as the implementation of the default tool surface.
|
||||
4. Preserve host-side approvals for all tools.
|
||||
5. Preserve host-side rollout persistence as the source of truth.
|
||||
6. Allow full client-provided configuration:
|
||||
- base instructions
|
||||
- developer instructions
|
||||
- user instructions
|
||||
- tool/plugin/app config
|
||||
7. Allow the host to override or replace the default tool set.
|
||||
8. Keep the protocol high-level and transport-agnostic enough for non-Docker backends.
|
||||
|
||||
## Non-goals
|
||||
|
||||
1. Re-implement Codex tool behavior in the Python SDK.
|
||||
2. Make the host responsible for prompt assembly.
|
||||
3. Force app-server to lose its current direct-to-Responses mode.
|
||||
4. Solve every multi-agent routing problem in the first iteration.
|
||||
|
||||
## Proposed model: Full Delegation Mode
|
||||
|
||||
Add a new app-server execution mode, conceptually:
|
||||
|
||||
- `direct` mode: current behavior
|
||||
- `fullDelegation` mode: new behavior
|
||||
|
||||
In `fullDelegation` mode:
|
||||
|
||||
1. The host starts app-server inside the target environment.
|
||||
2. The host provides all desired configuration at thread/session startup.
|
||||
3. Codex prepares the next upstream Responses request, but does not send it.
|
||||
4. Codex emits a server-initiated request to the host containing the prepared upstream request envelope.
|
||||
5. The host executes that request against the Responses API.
|
||||
6. The host streams upstream events back into app-server.
|
||||
7. Codex consumes those events, updates turn state, emits its normal item/turn notifications, and requests approvals or user input as needed.
|
||||
8. The host persists the resulting rollout externally.
|
||||
|
||||
This is not "remote shell plus JSON." It is better understood as **remote Codex with externalized model transport**.
|
||||
|
||||
## Why this fits Universal Computer
|
||||
|
||||
Today Universal Computer's `Task.run()` does three important jobs:
|
||||
|
||||
1. build the request
|
||||
2. stream events
|
||||
3. pause for tool calls
|
||||
|
||||
Under this RFC:
|
||||
|
||||
- job 1 moves from Python to Codex
|
||||
- job 2 remains host-owned at the transport layer
|
||||
- job 3 becomes cleaner, because Codex itself now owns tool interpretation and execution
|
||||
|
||||
That is a net simplification.
|
||||
|
||||
## Protocol additions
|
||||
|
||||
The existing app-server pattern to imitate is the approval flow: Codex can already issue server-initiated JSON-RPC requests to the client and resume when the client responds.
|
||||
|
||||
Full delegation should reuse that same pattern.
|
||||
|
||||
## New concepts
|
||||
|
||||
### 1. Delegated model request
|
||||
|
||||
Codex needs a way to say:
|
||||
|
||||
> "Here is the exact upstream request I want to make. Please make it for me, and stream the result back."
|
||||
|
||||
Proposed request:
|
||||
|
||||
- `model/request`
|
||||
|
||||
Purpose:
|
||||
|
||||
- server-initiated request from Codex to host
|
||||
- carries a canonicalized Responses request envelope
|
||||
|
||||
This envelope should include, at minimum:
|
||||
|
||||
- model
|
||||
- instructions or compiled system input
|
||||
- input items/messages
|
||||
- tool definitions
|
||||
- reasoning config
|
||||
- tool-choice config
|
||||
- request-level overrides derived from SDK/user configuration, such as reasoning effort, summary mode, verbosity, and other per-turn sampling controls
|
||||
- metadata needed for correlation
|
||||
- optional previous-response linkage if Codex wants it
|
||||
- stream expectation
|
||||
- opaque session/turn correlation ids
|
||||
|
||||
The important point is that this is **Codex-authored**. The host forwards it, it does not reinterpret it.
|
||||
|
||||
### 2. Delegated model stream injection
|
||||
|
||||
The host needs a way to stream upstream events back into Codex.
|
||||
|
||||
Proposed client method:
|
||||
|
||||
- `model/streamEvent`
|
||||
|
||||
Purpose:
|
||||
|
||||
- client-to-server notification or request delivering one upstream Responses stream event at a time
|
||||
|
||||
The server should accept:
|
||||
|
||||
- raw upstream event payload
|
||||
- correlation id tying the event to the outstanding `model/request`
|
||||
|
||||
This lets Codex continue using its native event handling logic.
|
||||
|
||||
### 3. Terminal stream semantics
|
||||
|
||||
For normal operation, Codex should infer terminal model state from the raw upstream Responses events themselves, especially `response.completed` and `response.failed`. In other words, the canonical end-of-turn signal should come from the same event stream Codex is already consuming.
|
||||
|
||||
A separate client method is only needed for cases where the host cannot provide a terminal Responses event, for example:
|
||||
|
||||
- the host canceled the upstream request before a terminal event was emitted
|
||||
- the network stream disconnected mid-flight
|
||||
- the host rejected the delegated request before sending it upstream
|
||||
|
||||
In that narrower case, a small escape hatch such as `model/streamAborted` is useful. It should carry:
|
||||
|
||||
- delegated request id
|
||||
- abort reason such as `canceled`, `disconnected`, or `requestRejected`
|
||||
- normalized error info if relevant
|
||||
|
||||
This keeps the happy path simple while still giving Codex a way to distinguish "the model finished" from "the host-side transport broke."
|
||||
|
||||
### 4. Delegated model cancellation
|
||||
|
||||
Codex may need to ask the host to cancel an in-flight upstream request.
|
||||
|
||||
Proposed server request:
|
||||
|
||||
- `model/cancel`
|
||||
|
||||
This is important for:
|
||||
|
||||
- turn interruption
|
||||
- approval denial during streaming
|
||||
- client disconnect handling
|
||||
- compaction or reroute logic
|
||||
|
||||
### 5. External rollout mode
|
||||
|
||||
Codex needs to know it is not the durable source of truth.
|
||||
|
||||
Proposed thread/session config:
|
||||
|
||||
- `rolloutOwnership: "server" | "client"`
|
||||
|
||||
In the new mode, use `"client"`.
|
||||
|
||||
Behaviorally, this means:
|
||||
|
||||
- server may still keep ephemeral in-memory turn state
|
||||
- server should not assume persisted thread state is canonical
|
||||
- resume/fork semantics should allow the client to provide prior rollout context explicitly
|
||||
|
||||
### 6. External history hydrate
|
||||
|
||||
If the host owns persistence, Codex needs a way to rehydrate a thread from client-supplied history.
|
||||
|
||||
Proposed startup field or dedicated method:
|
||||
|
||||
- `thread/start` or `thread/resume` with `initialItems` / `turnHistory`
|
||||
|
||||
This should be the normalized Codex-facing history representation, not raw Responses-only items.
|
||||
|
||||
That keeps Codex's turn engine informed without forcing SQLite/file rollout ownership back into the container.
|
||||
|
||||
There is already an implicit translation boundary here today: Codex does not operate on raw SSE events as its durable thread model. It turns upstream Responses output into a richer internal history made of turns and items. Client-owned rollout mode would make that boundary explicit. The host would persist the Codex-facing item history it receives over app-server notifications, then feed that normalized history back on resume, rather than trying to reconstruct a thread from raw Responses API events alone.
|
||||
|
||||
## Proposed event surface
|
||||
|
||||
A clean high-level set could be:
|
||||
|
||||
### Server -> client
|
||||
|
||||
- `model/request`
|
||||
- `model/cancel`
|
||||
- `delegation/request` for subagents or cross-container execution
|
||||
- existing approval requests remain unchanged
|
||||
- existing `item/tool/requestUserInput` remains unchanged
|
||||
|
||||
### Client -> server
|
||||
|
||||
- `model/streamEvent`
|
||||
- `model/streamAborted`
|
||||
- `model/requestRejected`
|
||||
- `delegation/result`
|
||||
- existing approval decisions remain unchanged
|
||||
- existing tool/user-input responses remain unchanged
|
||||
|
||||
## Mermaid: end-to-end flow
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Host as "Universal Computer SDK (host)"
|
||||
participant Codex as "Codex app-server (container/local)"
|
||||
participant API as "Responses API"
|
||||
|
||||
Host->>Codex: thread/start + full config + fullDelegation
|
||||
Host->>Codex: turn/start(user input)
|
||||
|
||||
Codex-->>Host: model/request(request envelope)
|
||||
Host->>API: POST /v1/responses (stream=true)
|
||||
|
||||
loop Streaming
|
||||
API-->>Host: response event
|
||||
Host->>Codex: model/streamEvent(event)
|
||||
Codex-->>Host: item/turn notifications
|
||||
end
|
||||
|
||||
Codex-->>Host: item/commandExecution/requestApproval
|
||||
Host->>Host: programmatic approval policy
|
||||
Host-->>Codex: approval decision
|
||||
|
||||
Codex->>Codex: execute tool in container
|
||||
|
||||
Codex-->>Host: model/request(next request after tool output)
|
||||
Host->>API: next Responses call
|
||||
API-->>Host: terminal event
|
||||
Host->>Codex: model/streamEvent(response.completed)
|
||||
|
||||
Codex-->>Host: turn/completed
|
||||
Host->>Host: persist rollout as source of truth
|
||||
```
|
||||
|
||||
## Mermaid: state ownership
|
||||
|
||||
In plain English: the host SDK remains the control plane, and Codex inside the container remains the execution plane. The host is responsible for the things that need global visibility or trust: talking to the Responses API, persisting rollout state, deciding approval policy, and deciding where delegated work should run. Codex is responsible for the things that need local workspace context: assembling the actual model request, running the turn state machine, choosing and executing tools, and applying side effects inside the container. The diagram below is just showing that split of responsibilities rather than a strict request-by-request sequence.
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph Host["Host SDK"]
|
||||
H1["Responses auth + transport"]
|
||||
H2["Rollout persistence"]
|
||||
H3["Approval policy"]
|
||||
H4["Backend routing / multi-container"]
|
||||
end
|
||||
|
||||
subgraph Remote["Remote Codex app-server"]
|
||||
C1["Prompt + request synthesis"]
|
||||
C2["Turn state machine"]
|
||||
C3["Tool registry + execution"]
|
||||
C4["Workspace-local side effects"]
|
||||
end
|
||||
|
||||
H1 --> C2
|
||||
H2 --> C2
|
||||
H3 --> C3
|
||||
H4 --> C2
|
||||
C1 --> H1
|
||||
C2 --> H2
|
||||
C3 --> H3
|
||||
```
|
||||
|
||||
## Behavioral changes required inside Codex
|
||||
|
||||
### 1. Separate "prepare request" from "send request"
|
||||
|
||||
Today those are effectively fused. Full delegation requires Codex to:
|
||||
|
||||
- build the canonical upstream request
|
||||
- stop before transport
|
||||
- wait for externally streamed events
|
||||
|
||||
That is the fundamental internal refactor.
|
||||
|
||||
### 2. Accept externally sourced Responses events as first-class input
|
||||
|
||||
Codex must be able to ingest a Responses event stream that it did not open itself.
|
||||
|
||||
This means:
|
||||
|
||||
- correlation of event stream to active turn/request
|
||||
- same parsing, validation, and item synthesis path as direct mode
|
||||
- same terminal handling and retry semantics where applicable
|
||||
|
||||
### 3. Make thread persistence optional, not authoritative
|
||||
|
||||
In client-owned rollout mode, Codex should treat persistence as operational cache, not source of truth.
|
||||
|
||||
A good discipline is:
|
||||
|
||||
- in-memory state for active turn execution
|
||||
- explicit rehydrate from client history on resume
|
||||
- no hidden reliance on local rollout files for correctness
|
||||
|
||||
### 4. Make tool registry fully session-configurable
|
||||
|
||||
This is already partly present through dynamic tools, plugins, and apps, but the new mode should make it explicit that the tool surface may be:
|
||||
|
||||
- default Codex tools
|
||||
- default Codex tools plus client additions
|
||||
- a full client override
|
||||
- a minimal safe subset
|
||||
|
||||
The important policy question is precedence. My recommendation:
|
||||
|
||||
- `default`
|
||||
- `default + additive overrides`
|
||||
- `replace entirely`
|
||||
|
||||
as three explicit modes, not implicit merging.
|
||||
|
||||
### 5. Preserve current approval semantics across all tools
|
||||
|
||||
Approvals must remain server-initiated from Codex to host, because that is the clean point where the host can inject policy without reimplementing runtime behavior.
|
||||
|
||||
Operationally, this likely means Codex should be started with an approval configuration that never blocks on an in-container human prompt and instead always routes approval decisions through app-server requests to the host. The host SDK then becomes the policy engine and UI surface for tool approvals, while Codex remains the party that formulates the execution request and enforces the answer.
|
||||
|
||||
The host should not be approving raw Responses tool call output. It should be approving Codex's normalized execution intent: "run this command," "apply this patch," "grant this network access," and so on.
|
||||
|
||||
### 6. Support host-intercepted delegation as a future sibling of model delegation
|
||||
|
||||
If you want multi-container delegation, do not hide subagent creation entirely inside the container runtime. Give it a parallel host-visible control point.
|
||||
|
||||
Concretely, app-server should emit a `delegation/request` event whenever Codex wants to spawn a subagent. That event should include:
|
||||
|
||||
- the parent thread/turn context
|
||||
- the requested subagent instructions and input items
|
||||
- the requested tool/profile configuration
|
||||
- execution hints such as preferred cwd, sandbox, or model
|
||||
- enough metadata for the host to correlate the child back to the parent
|
||||
|
||||
The host SDK can then choose one of two paths:
|
||||
|
||||
1. run the subagent in the same container and return a `delegation/result`
|
||||
2. materialize it as a separate top-level agent on another backend or container and still return a `delegation/result`
|
||||
|
||||
In both cases, Codex should treat the result as a structured child outcome rather than assuming where or how the subagent ran. That gives the SDK user real control over topology without making Codex blind to delegated work.
|
||||
|
||||
## Configuration model
|
||||
|
||||
Full delegation mode must support all the configuration Universal Computer already treats as first-class:
|
||||
|
||||
- base instructions
|
||||
- developer instructions
|
||||
- user instructions
|
||||
- model choice
|
||||
- sampling params
|
||||
- plugin declarations
|
||||
- app/plugin auth context
|
||||
- tool override policy
|
||||
- approval policy
|
||||
- cwd / manifest / workspace metadata
|
||||
|
||||
The cleanest way to do this is:
|
||||
|
||||
1. client sends declarative config to app-server
|
||||
2. Codex composes the actual upstream request
|
||||
3. host transmits that request unchanged
|
||||
|
||||
This preserves client control without creating dual prompt builders.
|
||||
|
||||
In practice, this means there are two useful layers of configuration:
|
||||
|
||||
- session-level defaults supplied when establishing the thread or runtime
|
||||
- request-level overrides supplied per turn, such as reasoning effort, summaries, verbosity, or other sampling controls
|
||||
|
||||
Codex should own how those layers merge into the final upstream request, but the SDK user should still be able to express both layers declaratively.
|
||||
|
||||
## Tooling model
|
||||
|
||||
Universal Computer's current plugins can affect:
|
||||
|
||||
- tools
|
||||
- instructions
|
||||
- sampling params
|
||||
- context
|
||||
- manifest
|
||||
|
||||
Codex app-server should not try to mimic Python plugin objects. Instead, the protocol should expose the resulting configuration effects in transportable form.
|
||||
|
||||
Three buckets matter:
|
||||
|
||||
### 1. Native Codex tools
|
||||
|
||||
Examples:
|
||||
|
||||
- shell
|
||||
- apply patch
|
||||
- filesystem-like behavior
|
||||
- skills/apps tooling
|
||||
|
||||
These should stay implemented in Rust.
|
||||
|
||||
### 2. Declarative client-added tools
|
||||
|
||||
These are already conceptually close to dynamic tools.
|
||||
|
||||
### 3. Host policy wrappers
|
||||
|
||||
The host may still want to:
|
||||
|
||||
- require approval
|
||||
- deny certain tools
|
||||
- redirect certain actions
|
||||
- attach metadata
|
||||
|
||||
This should be policy/config, not alternative execution logic.
|
||||
|
||||
## Rollout ownership
|
||||
|
||||
This deserves explicit treatment.
|
||||
|
||||
If the host is the source of truth, then app-server should not quietly persist a more authoritative local reality than the host sees.
|
||||
|
||||
Recommended behavior in client-owned rollout mode:
|
||||
|
||||
- active turn state exists in memory inside Codex
|
||||
- the host receives all canonical turn/item notifications
|
||||
- the host persists them
|
||||
- resume requires the host to resupply prior normalized history
|
||||
- local persistence, if any, is cache-only and discardable
|
||||
|
||||
That keeps recovery honest.
|
||||
|
||||
## Failure semantics
|
||||
|
||||
Full delegation mode needs explicit failure boundaries.
|
||||
|
||||
### Host-side failures
|
||||
|
||||
Examples:
|
||||
|
||||
- Responses auth failure
|
||||
- network failure
|
||||
- stream disconnect
|
||||
- host policy rejection
|
||||
|
||||
These should arrive back in Codex as delegated request failures and surface through normal turn failure notifications.
|
||||
|
||||
### Codex-side failures
|
||||
|
||||
Examples:
|
||||
|
||||
- malformed upstream event
|
||||
- incompatible tool result
|
||||
- internal turn-state fault
|
||||
|
||||
These should surface as Codex errors to the host.
|
||||
|
||||
### Split-brain prevention
|
||||
|
||||
At most one outstanding delegated model request should be active per active turn segment unless Codex explicitly supports multiplexing. Start single-flight.
|
||||
|
||||
That constraint is worth being conservative about.
|
||||
|
||||
It is different from "only one tool runs at a time" or "only one turn exists at a time." The point is narrower: for a given live turn segment, there should be one authoritative upstream model stream that Codex is currently interpreting. If the host allowed two overlapping delegated Responses streams to feed the same turn state, Codex would need a much more complicated merge model for deltas, tool calls, and terminal events. Starting with single-flight keeps turn state deterministic.
|
||||
|
||||
## Security and trust boundaries
|
||||
|
||||
This design is stronger than today's Python-tool model in one important way: the canonical executor of shell and file actions moves into the same Rust runtime that already knows Codex's approval and event semantics.
|
||||
|
||||
That said, the host now becomes highly privileged because it owns:
|
||||
|
||||
- auth
|
||||
- transcript persistence
|
||||
- upstream transport
|
||||
- approval decisions
|
||||
|
||||
That is acceptable, because Universal Computer already lives at that privilege level.
|
||||
|
||||
## Migration path
|
||||
|
||||
### Phase 1
|
||||
|
||||
- add `fullDelegation` execution mode
|
||||
- add `model/request`
|
||||
- add `model/streamEvent`
|
||||
- add `model/streamAborted`
|
||||
- add `model/cancel`
|
||||
- add client-owned rollout mode with startup rehydrate
|
||||
|
||||
This is enough for a single-container Universal Computer integration.
|
||||
|
||||
### Phase 2
|
||||
|
||||
- add explicit tool-set override modes
|
||||
- harden resume/fork semantics for externally persisted history
|
||||
- support more complete correlation and retry rules
|
||||
|
||||
### Phase 3
|
||||
|
||||
- add host-visible delegation/subagent interception
|
||||
- route subagents to alternate containers/backends
|
||||
|
||||
## Open questions
|
||||
|
||||
1. What is the canonical wire format for delegated model requests? My recommendation: a Codex-defined envelope that is close to Responses payloads, but explicitly versioned and correlation-safe.
|
||||
2. Should the host stream raw Responses events or normalized Codex events back? Raw Responses events. Normalization should remain inside Codex.
|
||||
3. Should local persistence be disabled entirely in client-owned rollout mode? Prefer "non-authoritative cache" over "disabled," but correctness must not depend on it.
|
||||
4. Should tool overrides be merged or replaced? There is a real difference:
|
||||
- merged means "start with Codex defaults, then add or selectively override entries"
|
||||
- replaced means "the client supplies the entire tool surface and Codex defaults are not implicitly present"
|
||||
Support both, explicitly. Implicit merge will become a policy trap.
|
||||
5. How much of plugin behavior should be representable over protocol? Only the effects, not the Python object model.
|
||||
|
||||
## Changes required in Universal Computer
|
||||
|
||||
The Codex-side protocol changes are only half of the story. To make this architecture real, Universal Computer also needs to grow a host-side integration layer that treats Codex app-server as a remote execution runtime rather than treating the Responses API as the only runtime boundary.
|
||||
|
||||
At a high level, Universal Computer should stop being responsible for implementing the default Codex tool surface in Python and instead become responsible for:
|
||||
|
||||
- provisioning a compatible Codex binary
|
||||
- starting and supervising app-server
|
||||
- relaying delegated model traffic to the selected provider
|
||||
- persisting rollout state as the canonical host-side record
|
||||
- exposing SDK ergonomics for tool configuration, approvals, and delegation routing
|
||||
|
||||
### 1. Pin and provision a Codex version
|
||||
|
||||
Universal Computer will need an explicit notion of the Codex runtime version it expects to launch.
|
||||
|
||||
That likely means:
|
||||
|
||||
- adding a pinned Codex version field to the agent or runtime configuration
|
||||
- defining how that resolves to a concrete binary artifact for the current host platform
|
||||
- making the app-server protocol version part of compatibility checks
|
||||
|
||||
This should be treated as a first-class runtime dependency, not an incidental local executable lookup. If the host and container disagree about protocol shape, delegation mode will fail in confusing ways, so version pinning should be deliberate.
|
||||
|
||||
Recommended direction:
|
||||
|
||||
- Universal Computer pins a Codex release or build identifier explicitly
|
||||
- the host resolves and caches that artifact
|
||||
- the runtime startup path verifies the binary version before starting app-server
|
||||
|
||||
### 2. Reuse existing backends to place the Codex binary in the destination environment
|
||||
|
||||
Universal Computer already knows how to create and resume execution environments. It should reuse that backend abstraction for Codex provisioning rather than inventing a separate deployment system.
|
||||
|
||||
Concretely, the current backend model is already a good fit for binary staging:
|
||||
|
||||
- `BaseSandboxClient` creates and resumes sessions
|
||||
- `BaseSandboxSession` exposes `write`, `read`, `exec`, and workspace materialization
|
||||
- manifest entries such as `LocalFile` already support copying a host file into the workspace and applying permissions via `chmod`
|
||||
|
||||
So the binary-placement story does not need a brand-new distribution mechanism. Universal Computer can either:
|
||||
|
||||
- stage the pinned Codex binary as a manifest artifact with executable permissions, or
|
||||
- push it into the workspace during session startup with `session.write(...)` followed by `chmod`
|
||||
|
||||
The first option is especially attractive because it fits the existing manifest/snapshot model and keeps provisioning declarative.
|
||||
|
||||
At a high level, each backend would need to support:
|
||||
|
||||
- ensuring the Codex binary is present in the target environment
|
||||
- placing any required companion assets if Codex needs them
|
||||
- starting `codex app-server` with the right arguments
|
||||
- returning a live transport handle back to the host SDK
|
||||
|
||||
For local execution, this step can degenerate into "use a local binary and skip copy." For remote or containerized execution, this becomes an explicit staging step.
|
||||
|
||||
The important design point is that backend-specific logic stays confined to:
|
||||
|
||||
- binary placement
|
||||
- process startup
|
||||
- transport attachment
|
||||
- snapshot and manifest lifecycle
|
||||
|
||||
and not tool execution.
|
||||
|
||||
One nuance from the codebase: backend reuse is straightforward for file placement, but not yet for long-lived supervised process attachment. Universal Computer's shared session API supports one-shot `exec` everywhere, while PTY-style attached process interaction exists only on some backends. If Codex app-server is going to be launched as a long-running child process, Universal Computer will likely need one additional backend-neutral capability for "start a process and keep a live byte stream attached," rather than trying to shoehorn everything through one-shot exec.
|
||||
|
||||
### 3. Replace Python implementations of the default tool surface with symbolic tool references
|
||||
|
||||
Universal Computer can likely delete or de-emphasize the Python implementations of the default filesystem and shell tool behavior once Codex is the executor.
|
||||
|
||||
The code today makes this fairly concrete: the built-in tool surface is assembled from Python plugins like `Filesystem`, `Shell`, `ApplyPatch`, and `Compaction`. The first three are thin wrappers that bind to a `SandboxSession`, expose tool schemas, and add instruction fragments; they are not deep subsystems in their own right.
|
||||
|
||||
But the SDK still needs a way to express tool policy and shape the tool surface. So instead of Python tool implementations being the source of truth, they should become declarative references, for example:
|
||||
|
||||
- enable Codex shell
|
||||
- disable Codex apply-patch
|
||||
- use the default Codex tool set
|
||||
- replace the default tool set with a minimal subset
|
||||
|
||||
In other words, the Python layer should continue to speak in terms of tool identities and policy, but not carry the execution logic for the built-in tools.
|
||||
|
||||
This is important for UX. SDK users still want to write things like:
|
||||
|
||||
- "enable shell but not apply patch"
|
||||
- "disable filesystem writes"
|
||||
- "use only custom tools"
|
||||
|
||||
Those should remain easy, but they should compile down to app-server configuration rather than selecting Python classes that implement the behavior directly.
|
||||
|
||||
The one built-in plugin that does not fit the "just replace it with a Codex tool" bucket is compaction. In Universal Computer today, compaction is expressed as sampling-parameter and context-processing behavior rather than as a shell/filesystem tool. So the migration should separate:
|
||||
|
||||
- built-in execution tools that move to Codex
|
||||
- host-side request shaping policies, like compaction thresholds, that may still belong in the SDK and need to be forwarded into delegated model requests
|
||||
|
||||
### 4. Add a dedicated app-server package or module
|
||||
|
||||
Universal Computer should grow a dedicated host-side app-server integration package rather than smearing the logic across the existing agent runtime.
|
||||
|
||||
Conceptually, that package would own:
|
||||
|
||||
- app-server process lifecycle
|
||||
- connection management
|
||||
- protocol type definitions
|
||||
- delegated model request handling
|
||||
- approval request handling
|
||||
- delegated subagent handling
|
||||
- rollout event capture and persistence hooks
|
||||
|
||||
A clean package boundary here matters because this integration is not just "another tool." It is a new runtime substrate.
|
||||
|
||||
A useful mental split would be:
|
||||
|
||||
- core Universal Computer agent model
|
||||
- backend/session abstractions
|
||||
- provider adapters
|
||||
- app-server bridge
|
||||
|
||||
That keeps the Codex-specific transport logic from leaking into unrelated parts of the SDK.
|
||||
|
||||
### 5. Support the new delegated app-server events
|
||||
|
||||
Universal Computer will need host-side handlers for the new protocol surface proposed above.
|
||||
|
||||
At minimum, that means understanding and responding to:
|
||||
|
||||
- `model/request`
|
||||
- `model/streamEvent`
|
||||
- `model/streamAborted`
|
||||
- `model/cancel`
|
||||
- `delegation/request`
|
||||
- `delegation/result`
|
||||
- existing approval requests
|
||||
|
||||
In practice, the host runtime loop changes from:
|
||||
|
||||
- call `responses.create(...)`
|
||||
- stream raw events
|
||||
- inspect pending tool calls
|
||||
|
||||
to:
|
||||
|
||||
- wait for `model/request` from Codex
|
||||
- execute that request against the selected provider
|
||||
- feed raw upstream events back with `model/streamEvent`
|
||||
- honor `model/cancel` and approval flows
|
||||
- optionally route `delegation/request` to a different container or backend
|
||||
|
||||
That is a meaningful runtime refactor, but it is conceptually clean: Universal Computer becomes an orchestrator around Codex rather than a reimplementation of Codex behavior.
|
||||
|
||||
### 6. Add a host-side multi-provider abstraction
|
||||
|
||||
Today Universal Computer is structurally very OpenAI-shaped because the runtime path is built around the Responses API client. In delegated mode, that logic becomes even more central, so it should be abstracted intentionally.
|
||||
|
||||
The current code is explicit about this: `Task` stores an `openai.AsyncClient` and its default producer literally calls `client.responses.create(...)`. So multi-provider support is not a small configuration tweak; it is a real runtime abstraction change.
|
||||
|
||||
The host needs a provider abstraction capable of:
|
||||
|
||||
- taking a Codex-authored delegated model request
|
||||
- translating it to the selected upstream provider call shape
|
||||
- streaming provider events back into the common app-server event format
|
||||
- surfacing provider-specific failures in a normalized way
|
||||
|
||||
For OpenAI-backed flows, that can stay close to raw Responses semantics.
|
||||
|
||||
For Anthropic or other providers, the host may need an adapter layer that maps:
|
||||
|
||||
- request fields
|
||||
- tool-calling events
|
||||
- reasoning/summary controls where supported
|
||||
- terminal and error events
|
||||
|
||||
back into the event shape Codex expects.
|
||||
|
||||
This is precisely why the translation boundary should live on the host, not in the container. Provider choice is a host concern.
|
||||
|
||||
Recommended direction:
|
||||
|
||||
- define a `ModelProvider` or similarly named host-side interface
|
||||
- keep OpenAI as the reference implementation
|
||||
- add provider capability metadata so unsupported delegated-request features can fail clearly rather than degrade silently
|
||||
|
||||
There is already a hint of the right design elsewhere in Universal Computer: the memory subsystem defines normalized result schemas specifically so the rest of the system does not need to understand provider-specific formats. The delegated app-server bridge should follow the same principle for streamed model events.
|
||||
|
||||
### 7. Add host-side rollout persistence built around Codex item history
|
||||
|
||||
If the host is now the source of truth, Universal Computer should persist the Codex-facing event history it receives from app-server, not just the raw upstream Responses interaction.
|
||||
|
||||
That likely means persisting:
|
||||
|
||||
- thread identity
|
||||
- turns
|
||||
- normalized items
|
||||
- approval decisions
|
||||
- delegation edges between parent and child agents
|
||||
- provider and runtime metadata
|
||||
|
||||
This persistence layer should support:
|
||||
|
||||
- resume into the same container
|
||||
- resume into a fresh container with rehydrated history
|
||||
- cross-backend continuation when the SDK chooses to re-home the work
|
||||
|
||||
### 8. Transport recommendation: prefer stdio over a reliable byte stream bridge
|
||||
|
||||
For the host-to-container app-server transport, the safest recommendation is:
|
||||
|
||||
- first choice: stdio over an attached process handle
|
||||
- second choice: a reliable byte-stream tunnel such as SSH or a backend-managed TCP stream
|
||||
|
||||
Why:
|
||||
|
||||
- app-server traffic is ordered, stateful, and request-response oriented
|
||||
- JSON-RPC + streaming notifications want reliable delivery and backpressure
|
||||
|
||||
`stdio` is still the right target transport because Codex app-server already supports it as the primary mode. But after a deeper look at Universal Computer, there is an important implementation detail: the current shared session abstraction does not yet provide a backend-neutral "launch a long-lived child process and keep stdin/stdout attached" API. It provides:
|
||||
|
||||
- one-shot `exec` everywhere
|
||||
- optional PTY process support on some backends such as local Unix and Modal
|
||||
- no equivalent attached-process primitive on Docker today
|
||||
|
||||
So the recommendation should be more precise:
|
||||
|
||||
- standardize on app-server `stdio` as the protocol transport
|
||||
- add a new backend-neutral attached-process capability to Universal Computer for long-lived bridge processes
|
||||
- make that capability part of the expected contract for all supported backends, instead of treating it as an optimization for only a few environments
|
||||
- implement that capability per backend, instead of introducing a separate network protocol just to compensate for the missing primitive
|
||||
|
||||
If Universal Computer can directly attach to the launched process, `stdio` is ideal because:
|
||||
|
||||
- it matches app-server's primary supported transport
|
||||
- it avoids inventing network semantics
|
||||
- it inherits process lifecycle naturally
|
||||
- it is easy to secure because nothing is exposed on a network port
|
||||
|
||||
For Docker specifically, that likely means adding a backend implementation that can launch Codex as an attached process rather than relying only on detached one-shot execs. For example, the backend could use an attached `docker exec` session or make Codex the supervised long-lived process inside the container and bridge its stdio back to the host.
|
||||
|
||||
If a direct process attachment is impossible because of the backend, the next best choice is a reliable stream transport tunneled over something the backend already trusts:
|
||||
|
||||
- SSH port forwarding or command execution with pipes
|
||||
- a backend-provided TCP tunnel
|
||||
|
||||
I would not recommend treating app-server websocket as the default fallback here, because Codex app-server currently describes websocket transport as experimental and unsupported. If a backend absolutely forces a bridged network transport, prefer a reliable stream that still carries stdio-like semantics over inventing a new public network surface.
|
||||
|
||||
Recommendation:
|
||||
|
||||
- standardize on `stdio` as the canonical transport
|
||||
- add a UC session-level attached-process abstraction to make `stdio` practical across backends
|
||||
- require all supported backends to implement an attached-process bridge capable of launching and supervising app-server with a live byte stream
|
||||
- use SSH or another reliable stream tunnel only when direct attachment is impossible
|
||||
- treat websocket support as an implementation detail of last resort, not the preferred contract
|
||||
|
||||
This keeps the transport boring, which is exactly what you want for the control plane of a remote agent runtime.
|
||||
|
||||
### 9. Suggested Universal Computer rollout plan
|
||||
|
||||
A pragmatic order of operations would be:
|
||||
|
||||
1. add a Codex runtime abstraction with version pinning and binary provisioning
|
||||
2. add an app-server bridge package with stdio-based transport
|
||||
3. implement OpenAI delegated model handling end to end
|
||||
4. persist Codex-facing history host-side and support resume
|
||||
5. replace Python built-in tool execution with declarative tool enablement
|
||||
6. add subagent interception and routing
|
||||
7. add additional provider adapters such as Anthropic
|
||||
|
||||
That sequence gets a single-container OpenAI-backed flow working early while leaving room for multi-provider and multi-container sophistication later.
|
||||
|
||||
## Recommendation
|
||||
|
||||
Build **full delegation mode** as an app-server-level capability, not as a Universal Computer-specific shim.
|
||||
|
||||
The winning shape is:
|
||||
|
||||
- remote Codex prepares
|
||||
- host transmits
|
||||
- remote Codex interprets
|
||||
- host persists
|
||||
|
||||
That preserves the best properties of both systems:
|
||||
|
||||
- Universal Computer keeps its orchestration superpower
|
||||
- Codex becomes the reusable execution engine and tool runtime you actually want to standardize on
|
||||
Reference in New Issue
Block a user