diff --git a/sdk/python/docs/api-reference.md b/sdk/python/docs/api-reference.md index 6be7e19737..183377f76d 100644 --- a/sdk/python/docs/api-reference.md +++ b/sdk/python/docs/api-reference.md @@ -3,7 +3,7 @@ Public surface of `openai_codex` for app-server v2. This SDK surface is experimental. Turn streams are routed by turn ID so one client can consume multiple active turns concurrently. -Thread and turn starts expose `approval_mode`. `ApprovalMode.auto_review` is the default; use `ApprovalMode.deny_all` to deny escalated permissions. +Thread and turn starts expose `approval_mode`. `ApprovalMode.auto_review` is the default; use `ApprovalMode.deny_all` to deny escalated permissions. `ApprovalMode.dangerously_bypass_approvals_and_sandbox` disables approvals and requests full sandbox bypass. That explicit bypass mode cannot be combined with `sandbox` on thread APIs or `sandbox_policy` on turn APIs. ## Package Entry diff --git a/sdk/python/scripts/update_sdk_artifacts.py b/sdk/python/scripts/update_sdk_artifacts.py index a40a4aa9c3..344ab0c9be 100755 --- a/sdk/python/scripts/update_sdk_artifacts.py +++ b/sdk/python/scripts/update_sdk_artifacts.py @@ -867,7 +867,26 @@ def _approval_mode_model_arg_lines(*, indent: str = " ") -> list[str] ] -def _model_arg_lines(fields: list[PublicFieldSpec], *, indent: str = " ") -> list[str]: +def _approval_mode_thread_sandbox_line(*, indent: str = " ") -> str: + """Return the approval-mode sandbox preset for thread operations.""" + return f"{indent}sandbox = _thread_sandbox_for_approval_mode(approval_mode, sandbox)" + + +def _approval_mode_turn_sandbox_policy_lines( + *, indent: str = " " +) -> list[str]: + """Return the approval-mode sandbox preset for turn operations.""" + return [ + f"{indent}sandbox_policy = _turn_sandbox_policy_for_approval_mode(", + f"{indent} approval_mode,", + f"{indent} sandbox_policy,", + f"{indent})", + ] + + +def _model_arg_lines( + fields: list[PublicFieldSpec], *, indent: str = " " +) -> list[str]: return [f"{indent}{field.wire_name}={field.py_name}," for field in fields] @@ -896,6 +915,7 @@ def _render_codex_block( *_kw_signature_lines(thread_start_fields), " ) -> Thread:", _approval_mode_assignment_line("_approval_mode_settings"), + _approval_mode_thread_sandbox_line(), " params = ThreadStartParams(", *_approval_mode_model_arg_lines(), *_model_arg_lines(thread_start_fields), @@ -921,6 +941,7 @@ def _render_codex_block( *_kw_signature_lines(resume_fields), " ) -> Thread:", _approval_mode_assignment_line("_approval_mode_override_settings"), + _approval_mode_thread_sandbox_line(), " params = ThreadResumeParams(", " thread_id=thread_id,", *_approval_mode_model_arg_lines(), @@ -937,6 +958,7 @@ def _render_codex_block( *_kw_signature_lines(fork_fields), " ) -> Thread:", _approval_mode_assignment_line("_approval_mode_override_settings"), + _approval_mode_thread_sandbox_line(), " params = ThreadForkParams(", " thread_id=thread_id,", *_approval_mode_model_arg_lines(), @@ -970,6 +992,7 @@ def _render_async_codex_block( " ) -> AsyncThread:", " await self._ensure_initialized()", _approval_mode_assignment_line("_approval_mode_settings"), + _approval_mode_thread_sandbox_line(), " params = ThreadStartParams(", *_approval_mode_model_arg_lines(), *_model_arg_lines(thread_start_fields), @@ -997,6 +1020,7 @@ def _render_async_codex_block( " ) -> AsyncThread:", " await self._ensure_initialized()", _approval_mode_assignment_line("_approval_mode_override_settings"), + _approval_mode_thread_sandbox_line(), " params = ThreadResumeParams(", " thread_id=thread_id,", *_approval_mode_model_arg_lines(), @@ -1014,6 +1038,7 @@ def _render_async_codex_block( " ) -> AsyncThread:", " await self._ensure_initialized()", _approval_mode_assignment_line("_approval_mode_override_settings"), + _approval_mode_thread_sandbox_line(), " params = ThreadForkParams(", " thread_id=thread_id,", *_approval_mode_model_arg_lines(), @@ -1047,6 +1072,7 @@ def _render_thread_block( " ) -> TurnHandle:", " wire_input = _to_wire_input(input)", _approval_mode_assignment_line("_approval_mode_override_settings"), + *_approval_mode_turn_sandbox_policy_lines(), " params = TurnStartParams(", " thread_id=self.id,", " input=wire_input,", @@ -1073,6 +1099,7 @@ def _render_async_thread_block( " await self._codex._ensure_initialized()", " wire_input = _to_wire_input(input)", _approval_mode_assignment_line("_approval_mode_override_settings"), + *_approval_mode_turn_sandbox_policy_lines(), " params = TurnStartParams(", " thread_id=self.id,", " input=wire_input,", diff --git a/sdk/python/src/openai_codex/_approval_mode.py b/sdk/python/src/openai_codex/_approval_mode.py index bbb57030c0..033dfe6f19 100644 --- a/sdk/python/src/openai_codex/_approval_mode.py +++ b/sdk/python/src/openai_codex/_approval_mode.py @@ -15,6 +15,9 @@ class ApprovalMode(str, Enum): deny_all = "deny_all" auto_review = "auto_review" + dangerously_bypass_approvals_and_sandbox = ( + "dangerously_bypass_approvals_and_sandbox" + ) def _approval_mode_settings( @@ -33,6 +36,8 @@ def _approval_mode_settings( ) case ApprovalMode.deny_all: return AskForApproval(root=AskForApprovalValue.never), None + case ApprovalMode.dangerously_bypass_approvals_and_sandbox: + return AskForApproval(root=AskForApprovalValue.never), None case _: return _assert_never_approval_mode(approval_mode) diff --git a/sdk/python/src/openai_codex/api.py b/sdk/python/src/openai_codex/api.py index 0ea99f9b3d..2dd6bfc81a 100644 --- a/sdk/python/src/openai_codex/api.py +++ b/sdk/python/src/openai_codex/api.py @@ -30,6 +30,7 @@ from ._run import ( from .async_client import AsyncAppServerClient from .client import AppServerClient, AppServerConfig from .generated.v2_all import ( + DangerFullAccessSandboxPolicy, ModelListResponse, Personality, ReasoningEffort, @@ -60,6 +61,36 @@ from .generated.v2_all import ( from .models import InitializeResponse, JsonObject, Notification +def _thread_sandbox_for_approval_mode( + approval_mode: ApprovalMode | None, + sandbox: SandboxMode | None, +) -> SandboxMode | None: + """Apply approval-mode sandbox presets for thread operations.""" + if approval_mode is not ApprovalMode.dangerously_bypass_approvals_and_sandbox: + return sandbox + if sandbox is not None: + raise ValueError( + "dangerous bypass approval_mode cannot be combined with sandbox" + ) + return SandboxMode.danger_full_access + + +def _turn_sandbox_policy_for_approval_mode( + approval_mode: ApprovalMode | None, + sandbox_policy: SandboxPolicy | None, +) -> SandboxPolicy | None: + """Apply approval-mode sandbox presets for turn operations.""" + if approval_mode is not ApprovalMode.dangerously_bypass_approvals_and_sandbox: + return sandbox_policy + if sandbox_policy is not None: + raise ValueError( + "dangerous bypass approval_mode cannot be combined with sandbox_policy" + ) + return SandboxPolicy( + root=DangerFullAccessSandboxPolicy(type="dangerFullAccess") + ) + + class Codex: """Typed Python client for app-server v2 workflows.""" @@ -105,6 +136,7 @@ class Codex: thread_source: ThreadSource | None = None, ) -> Thread: approval_policy, approvals_reviewer = _approval_mode_settings(approval_mode) + sandbox = _thread_sandbox_for_approval_mode(approval_mode, sandbox) params = ThreadStartParams( approval_policy=approval_policy, approvals_reviewer=approvals_reviewer, @@ -168,7 +200,10 @@ class Codex: sandbox: SandboxMode | None = None, service_tier: str | None = None, ) -> Thread: - approval_policy, approvals_reviewer = _approval_mode_override_settings(approval_mode) + approval_policy, approvals_reviewer = _approval_mode_override_settings( + approval_mode + ) + sandbox = _thread_sandbox_for_approval_mode(approval_mode, sandbox) params = ThreadResumeParams( thread_id=thread_id, approval_policy=approval_policy, @@ -202,7 +237,10 @@ class Codex: service_tier: str | None = None, thread_source: ThreadSource | None = None, ) -> Thread: - approval_policy, approvals_reviewer = _approval_mode_override_settings(approval_mode) + approval_policy, approvals_reviewer = _approval_mode_override_settings( + approval_mode + ) + sandbox = _thread_sandbox_for_approval_mode(approval_mode, sandbox) params = ThreadForkParams( thread_id=thread_id, approval_policy=approval_policy, @@ -307,6 +345,7 @@ class AsyncCodex: ) -> AsyncThread: await self._ensure_initialized() approval_policy, approvals_reviewer = _approval_mode_settings(approval_mode) + sandbox = _thread_sandbox_for_approval_mode(approval_mode, sandbox) params = ThreadStartParams( approval_policy=approval_policy, approvals_reviewer=approvals_reviewer, @@ -372,7 +411,10 @@ class AsyncCodex: service_tier: str | None = None, ) -> AsyncThread: await self._ensure_initialized() - approval_policy, approvals_reviewer = _approval_mode_override_settings(approval_mode) + approval_policy, approvals_reviewer = _approval_mode_override_settings( + approval_mode + ) + sandbox = _thread_sandbox_for_approval_mode(approval_mode, sandbox) params = ThreadResumeParams( thread_id=thread_id, approval_policy=approval_policy, @@ -407,7 +449,10 @@ class AsyncCodex: thread_source: ThreadSource | None = None, ) -> AsyncThread: await self._ensure_initialized() - approval_policy, approvals_reviewer = _approval_mode_override_settings(approval_mode) + approval_policy, approvals_reviewer = _approval_mode_override_settings( + approval_mode + ) + sandbox = _thread_sandbox_for_approval_mode(approval_mode, sandbox) params = ThreadForkParams( thread_id=thread_id, approval_policy=approval_policy, @@ -495,7 +540,13 @@ class Thread: summary: ReasoningSummary | None = None, ) -> TurnHandle: wire_input = _to_wire_input(input) - approval_policy, approvals_reviewer = _approval_mode_override_settings(approval_mode) + approval_policy, approvals_reviewer = _approval_mode_override_settings( + approval_mode + ) + sandbox_policy = _turn_sandbox_policy_for_approval_mode( + approval_mode, + sandbox_policy, + ) params = TurnStartParams( thread_id=self.id, input=wire_input, @@ -579,7 +630,13 @@ class AsyncThread: ) -> AsyncTurnHandle: await self._codex._ensure_initialized() wire_input = _to_wire_input(input) - approval_policy, approvals_reviewer = _approval_mode_override_settings(approval_mode) + approval_policy, approvals_reviewer = _approval_mode_override_settings( + approval_mode + ) + sandbox_policy = _turn_sandbox_policy_for_approval_mode( + approval_mode, + sandbox_policy, + ) params = TurnStartParams( thread_id=self.id, input=wire_input, diff --git a/sdk/python/tests/app_server_helpers.py b/sdk/python/tests/app_server_helpers.py index 10db00901c..5051831dde 100644 --- a/sdk/python/tests/app_server_helpers.py +++ b/sdk/python/tests/app_server_helpers.py @@ -98,6 +98,11 @@ def response_approval_policy(response: Any) -> str: return response.model_dump(by_alias=True, mode="json")["approvalPolicy"] +def response_sandbox_type(response: Any) -> str: + """Return serialized sandbox policy type from a generated thread response.""" + return response.model_dump(by_alias=True, mode="json")["sandbox"]["type"] + + def agent_message_texts(events: list[Notification]) -> list[str]: """Extract completed agent-message text from SDK notifications.""" texts: list[str] = [] diff --git a/sdk/python/tests/test_app_server_approvals.py b/sdk/python/tests/test_app_server_approvals.py index bc8c36a6bf..da40a322c6 100644 --- a/sdk/python/tests/test_app_server_approvals.py +++ b/sdk/python/tests/test_app_server_approvals.py @@ -1,12 +1,33 @@ from __future__ import annotations import asyncio +import json +import shlex -from app_server_harness import AppServerHarness -from app_server_helpers import response_approval_policy +import pytest + +from app_server_harness import ( + AppServerHarness, + ev_completed, + ev_function_call, + ev_response_created, + sse, +) +from app_server_helpers import response_approval_policy, response_sandbox_type from openai_codex import ApprovalMode, AsyncCodex, Codex -from openai_codex.generated.v2_all import AskForApprovalValue, ThreadResumeParams +from openai_codex.generated.v2_all import ( + AskForApprovalValue, + DangerFullAccessSandboxPolicy, + ReadOnlySandboxPolicy, + SandboxMode, + SandboxPolicy, + ThreadResumeParams, +) + +DANGER_FULL_ACCESS_SANDBOX_POLICY_TYPE = DangerFullAccessSandboxPolicy( + type="dangerFullAccess" +).type def test_thread_resume_inherits_deny_all_approval_mode(tmp_path) -> None: @@ -86,6 +107,263 @@ def test_thread_fork_can_override_approval_mode(tmp_path) -> None: } +def test_dangerous_bypass_thread_lifecycle_persists_thread_settings( + tmp_path, +) -> None: + """Thread lifecycle operations should preserve the explicit bypass preset.""" + with AppServerHarness(tmp_path) as harness: + harness.responses.enqueue_assistant_message( + "bypass seeded", + response_id="bypass-thread", + ) + + with Codex(config=harness.app_server_config()) as codex: + source = codex.thread_start( + approval_mode=ApprovalMode.dangerously_bypass_approvals_and_sandbox, + ) + result = source.run("seed the bypass thread") + started_state = codex._client.thread_resume( # noqa: SLF001 + source.id, + ThreadResumeParams(thread_id=source.id), + ) + resumed = codex.thread_resume(source.id) + resumed_state = codex._client.thread_resume( # noqa: SLF001 + resumed.id, + ThreadResumeParams(thread_id=resumed.id), + ) + forked = codex.thread_fork(source.id) + forked_state = codex._client.thread_resume( # noqa: SLF001 + forked.id, + ThreadResumeParams(thread_id=forked.id), + ) + + assert { + "final_response": result.final_response, + "forked_is_distinct": forked.id != source.id, + "started": ( + response_approval_policy(started_state), + response_sandbox_type(started_state), + ), + "resumed": ( + response_approval_policy(resumed_state), + response_sandbox_type(resumed_state), + ), + "forked": ( + response_approval_policy(forked_state), + response_sandbox_type(forked_state), + ), + } == { + "final_response": "bypass seeded", + "forked_is_distinct": True, + "started": ( + AskForApprovalValue.never.value, + DANGER_FULL_ACCESS_SANDBOX_POLICY_TYPE, + ), + "resumed": ( + AskForApprovalValue.never.value, + DANGER_FULL_ACCESS_SANDBOX_POLICY_TYPE, + ), + "forked": ( + AskForApprovalValue.never.value, + DANGER_FULL_ACCESS_SANDBOX_POLICY_TYPE, + ), + } + + +def test_turn_dangerous_bypass_persists_thread_settings(tmp_path) -> None: + """Turn-level bypass should persist approvals disabled and sandbox bypassed.""" + with AppServerHarness(tmp_path) as harness: + harness.responses.enqueue_assistant_message( + "turn bypass", + response_id="bypass-turn", + ) + + with Codex(config=harness.app_server_config()) as codex: + thread = codex.thread_start(approval_mode=ApprovalMode.auto_review) + result = thread.run( + "bypass this turn", + approval_mode=ApprovalMode.dangerously_bypass_approvals_and_sandbox, + ) + after_turn = codex._client.thread_resume( # noqa: SLF001 + thread.id, + ThreadResumeParams(thread_id=thread.id), + ) + + assert { + "final_response": result.final_response, + "thread_settings": ( + response_approval_policy(after_turn), + response_sandbox_type(after_turn), + ), + } == { + "final_response": "turn bypass", + "thread_settings": ( + AskForApprovalValue.never.value, + DANGER_FULL_ACCESS_SANDBOX_POLICY_TYPE, + ), + } + + +def test_async_turn_dangerous_bypass_persists_thread_settings(tmp_path) -> None: + """Async turn-level bypass should persist the same app-server settings.""" + + async def scenario() -> None: + with AppServerHarness(tmp_path) as harness: + harness.responses.enqueue_assistant_message( + "async turn bypass", + response_id="async-bypass-turn", + ) + + async with AsyncCodex(config=harness.app_server_config()) as codex: + thread = await codex.thread_start( + approval_mode=ApprovalMode.auto_review + ) + result = await thread.run( + "bypass this async turn", + approval_mode=ApprovalMode.dangerously_bypass_approvals_and_sandbox, + ) + after_turn = await codex._client.thread_resume( # noqa: SLF001 + thread.id, + ThreadResumeParams(thread_id=thread.id), + ) + + assert { + "final_response": result.final_response, + "thread_settings": ( + response_approval_policy(after_turn), + response_sandbox_type(after_turn), + ), + } == { + "final_response": "async turn bypass", + "thread_settings": ( + AskForApprovalValue.never.value, + DANGER_FULL_ACCESS_SANDBOX_POLICY_TYPE, + ), + } + + asyncio.run(scenario()) + + +def test_outside_workspace_write_rejected_for_deny_all_and_allowed_for_bypass( + tmp_path, +) -> None: + """Dangerous bypass should be the mode that permits outside-workspace writes.""" + rejected_path = tmp_path / "deny-all-outside-write.txt" + allowed_path = tmp_path / "dangerous-outside-write.txt" + + with AppServerHarness(tmp_path) as harness: + rejected_args = json.dumps( + { + "command": ( + f"printf %s rejected > {shlex.quote(str(rejected_path))}" + ), + "login": False, + "timeout_ms": 1_000, + } + ) + dangerous_args = json.dumps( + { + "command": ( + f"printf %s dangerous > {shlex.quote(str(allowed_path))}" + ), + "login": False, + "timeout_ms": 1_000, + } + ) + harness.responses.enqueue_sse( + sse( + [ + ev_response_created("deny-all-write"), + ev_function_call( + "deny-all-outside-write", + "shell_command", + rejected_args, + ), + ev_completed("deny-all-write"), + ] + ) + ) + harness.responses.enqueue_assistant_message( + "deny-all shell completed", + response_id="deny-all-final", + ) + harness.responses.enqueue_sse( + sse( + [ + ev_response_created("dangerous-write"), + ev_function_call( + "dangerous-outside-write", + "shell_command", + dangerous_args, + ), + ev_completed("dangerous-write"), + ] + ) + ) + harness.responses.enqueue_assistant_message( + "dangerous shell completed", + response_id="dangerous-final", + ) + + with Codex(config=harness.app_server_config()) as codex: + denied_thread = codex.thread_start(approval_mode=ApprovalMode.deny_all) + denied_result = denied_thread.run("write outside the workspace") + + bypass_thread = codex.thread_start( + approval_mode=ApprovalMode.dangerously_bypass_approvals_and_sandbox, + ) + bypass_result = bypass_thread.run("write outside the workspace") + + assert { + "denied_final_response": denied_result.final_response, + "denied_path_exists": rejected_path.exists(), + "bypass_final_response": bypass_result.final_response, + "bypass_file_contents": allowed_path.read_text(), + } == { + "denied_final_response": "deny-all shell completed", + "denied_path_exists": False, + "bypass_final_response": "dangerous shell completed", + "bypass_file_contents": "dangerous", + } + + +def test_dangerous_bypass_rejects_explicit_sandbox_conflicts_before_state_changes( + tmp_path, +) -> None: + """Conflicting bypass presets should fail before mutating app-server state.""" + with AppServerHarness(tmp_path) as harness: + with Codex(config=harness.app_server_config()) as codex: + with pytest.raises(ValueError, match="combined with sandbox"): + codex.thread_start( + approval_mode=ApprovalMode.dangerously_bypass_approvals_and_sandbox, + sandbox=SandboxMode.read_only, + ) + + threads_after_invalid_start = codex.thread_list(archived=False) + thread = codex.thread_start() + + with pytest.raises(ValueError, match="combined with sandbox_policy"): + thread.run( + "this should never reach app-server", + approval_mode=ApprovalMode.dangerously_bypass_approvals_and_sandbox, + sandbox_policy=SandboxPolicy( + root=ReadOnlySandboxPolicy(type="readOnly") + ), + ) + + thread_state = thread.read(include_turns=True) + + assert { + "threads_after_invalid_start": [ + existing.id for existing in threads_after_invalid_start.data + ], + "turns_after_invalid_run": thread_state.thread.turns, + } == { + "threads_after_invalid_start": [], + "turns_after_invalid_run": [], + } + + def test_turn_approval_mode_persists_until_next_turn(tmp_path) -> None: """A turn-level approval override should apply to later omitted-arg turns.""" with AppServerHarness(tmp_path) as harness: diff --git a/sdk/python/tests/test_public_api_runtime_behavior.py b/sdk/python/tests/test_public_api_runtime_behavior.py index 4e79cb6104..4d4b137d22 100644 --- a/sdk/python/tests/test_public_api_runtime_behavior.py +++ b/sdk/python/tests/test_public_api_runtime_behavior.py @@ -149,12 +149,18 @@ def test_approval_modes_serialize_to_expected_start_params() -> None: "approvalPolicy": "on-request", "approvalsReviewer": "auto_review", }, + "dangerously_bypass_approvals_and_sandbox": { + "approvalPolicy": "never", + }, } def test_unknown_approval_mode_is_rejected() -> None: """Invalid approval modes should fail before params are constructed.""" - with pytest.raises(ValueError, match="deny_all, auto_review"): + with pytest.raises( + ValueError, + match="deny_all, auto_review, dangerously_bypass_approvals_and_sandbox", + ): public_api_module._approval_mode_settings("allow_all") # type: ignore[arg-type] diff --git a/sdk/python/tests/test_public_api_signatures.py b/sdk/python/tests/test_public_api_signatures.py index f13fb35c08..7fa48e1ae4 100644 --- a/sdk/python/tests/test_public_api_signatures.py +++ b/sdk/python/tests/test_public_api_signatures.py @@ -129,6 +129,10 @@ def test_root_exports_approval_mode() -> None: assert [(mode.name, mode.value) for mode in ApprovalMode] == [ ("deny_all", "deny_all"), ("auto_review", "auto_review"), + ( + "dangerously_bypass_approvals_and_sandbox", + "dangerously_bypass_approvals_and_sandbox", + ), ]