mirror of
https://github.com/openai/codex.git
synced 2026-05-23 12:34:25 +00:00
487 lines
18 KiB
Python
487 lines
18 KiB
Python
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import json
|
|
import shlex
|
|
|
|
import pytest
|
|
|
|
from app_server_harness import (
|
|
AppServerHarness,
|
|
ev_completed,
|
|
ev_function_call,
|
|
ev_response_created,
|
|
sse,
|
|
)
|
|
from app_server_helpers import response_approval_policy, response_sandbox_type
|
|
|
|
from openai_codex import ApprovalMode, AsyncCodex, Codex
|
|
from openai_codex.generated.v2_all import (
|
|
AskForApprovalValue,
|
|
DangerFullAccessSandboxPolicy,
|
|
ReadOnlySandboxPolicy,
|
|
SandboxMode,
|
|
SandboxPolicy,
|
|
ThreadResumeParams,
|
|
)
|
|
|
|
DANGER_FULL_ACCESS_SANDBOX_POLICY_TYPE = DangerFullAccessSandboxPolicy(
|
|
type="dangerFullAccess"
|
|
).type
|
|
|
|
|
|
def test_thread_resume_inherits_deny_all_approval_mode(tmp_path) -> None:
|
|
"""Resuming a thread should preserve its stored approval mode."""
|
|
with AppServerHarness(tmp_path) as harness:
|
|
harness.responses.enqueue_assistant_message("source seeded", response_id="resume-mode")
|
|
|
|
with Codex(config=harness.app_server_config()) as codex:
|
|
source = codex.thread_start(approval_mode=ApprovalMode.deny_all)
|
|
result = source.run("seed the source rollout")
|
|
resumed = codex.thread_resume(source.id)
|
|
resumed_state = codex._client.thread_resume( # noqa: SLF001
|
|
resumed.id,
|
|
ThreadResumeParams(thread_id=resumed.id),
|
|
)
|
|
|
|
assert {
|
|
"final_response": result.final_response,
|
|
"resumed_policy": response_approval_policy(resumed_state),
|
|
} == {
|
|
"final_response": "source seeded",
|
|
"resumed_policy": AskForApprovalValue.never.value,
|
|
}
|
|
|
|
|
|
def test_thread_fork_inherits_deny_all_approval_mode(tmp_path) -> None:
|
|
"""Forking without an override should preserve the source approval mode."""
|
|
with AppServerHarness(tmp_path) as harness:
|
|
harness.responses.enqueue_assistant_message("source seeded", response_id="fork-mode")
|
|
|
|
with Codex(config=harness.app_server_config()) as codex:
|
|
source = codex.thread_start(approval_mode=ApprovalMode.deny_all)
|
|
result = source.run("seed the source rollout")
|
|
forked = codex.thread_fork(source.id)
|
|
forked_state = codex._client.thread_resume( # noqa: SLF001
|
|
forked.id,
|
|
ThreadResumeParams(thread_id=forked.id),
|
|
)
|
|
|
|
assert {
|
|
"final_response": result.final_response,
|
|
"forked_is_distinct": forked.id != source.id,
|
|
"forked_policy": response_approval_policy(forked_state),
|
|
} == {
|
|
"final_response": "source seeded",
|
|
"forked_is_distinct": True,
|
|
"forked_policy": AskForApprovalValue.never.value,
|
|
}
|
|
|
|
|
|
def test_thread_fork_can_override_approval_mode(tmp_path) -> None:
|
|
"""Forking with an explicit approval mode should send an override."""
|
|
with AppServerHarness(tmp_path) as harness:
|
|
harness.responses.enqueue_assistant_message(
|
|
"source seeded",
|
|
response_id="fork-override-mode",
|
|
)
|
|
|
|
with Codex(config=harness.app_server_config()) as codex:
|
|
source = codex.thread_start(approval_mode=ApprovalMode.deny_all)
|
|
result = source.run("seed the source rollout")
|
|
forked = codex.thread_fork(
|
|
source.id,
|
|
approval_mode=ApprovalMode.auto_review,
|
|
)
|
|
forked_state = codex._client.thread_resume( # noqa: SLF001
|
|
forked.id,
|
|
ThreadResumeParams(thread_id=forked.id),
|
|
)
|
|
|
|
assert {
|
|
"final_response": result.final_response,
|
|
"forked_policy": response_approval_policy(forked_state),
|
|
} == {
|
|
"final_response": "source seeded",
|
|
"forked_policy": AskForApprovalValue.on_request.value,
|
|
}
|
|
|
|
|
|
def test_dangerous_bypass_thread_lifecycle_persists_thread_settings(
|
|
tmp_path,
|
|
) -> None:
|
|
"""Thread lifecycle operations should preserve the explicit bypass preset."""
|
|
with AppServerHarness(tmp_path) as harness:
|
|
harness.responses.enqueue_assistant_message(
|
|
"bypass seeded",
|
|
response_id="bypass-thread",
|
|
)
|
|
|
|
with Codex(config=harness.app_server_config()) as codex:
|
|
source = codex.thread_start(
|
|
approval_mode=ApprovalMode.dangerously_bypass_approvals_and_sandbox,
|
|
)
|
|
result = source.run("seed the bypass thread")
|
|
started_state = codex._client.thread_resume( # noqa: SLF001
|
|
source.id,
|
|
ThreadResumeParams(thread_id=source.id),
|
|
)
|
|
resumed = codex.thread_resume(source.id)
|
|
resumed_state = codex._client.thread_resume( # noqa: SLF001
|
|
resumed.id,
|
|
ThreadResumeParams(thread_id=resumed.id),
|
|
)
|
|
forked = codex.thread_fork(source.id)
|
|
forked_state = codex._client.thread_resume( # noqa: SLF001
|
|
forked.id,
|
|
ThreadResumeParams(thread_id=forked.id),
|
|
)
|
|
|
|
assert {
|
|
"final_response": result.final_response,
|
|
"forked_is_distinct": forked.id != source.id,
|
|
"started": (
|
|
response_approval_policy(started_state),
|
|
response_sandbox_type(started_state),
|
|
),
|
|
"resumed": (
|
|
response_approval_policy(resumed_state),
|
|
response_sandbox_type(resumed_state),
|
|
),
|
|
"forked": (
|
|
response_approval_policy(forked_state),
|
|
response_sandbox_type(forked_state),
|
|
),
|
|
} == {
|
|
"final_response": "bypass seeded",
|
|
"forked_is_distinct": True,
|
|
"started": (
|
|
AskForApprovalValue.never.value,
|
|
DANGER_FULL_ACCESS_SANDBOX_POLICY_TYPE,
|
|
),
|
|
"resumed": (
|
|
AskForApprovalValue.never.value,
|
|
DANGER_FULL_ACCESS_SANDBOX_POLICY_TYPE,
|
|
),
|
|
"forked": (
|
|
AskForApprovalValue.never.value,
|
|
DANGER_FULL_ACCESS_SANDBOX_POLICY_TYPE,
|
|
),
|
|
}
|
|
|
|
|
|
def test_turn_dangerous_bypass_persists_thread_settings(tmp_path) -> None:
|
|
"""Turn-level bypass should persist approvals disabled and sandbox bypassed."""
|
|
with AppServerHarness(tmp_path) as harness:
|
|
harness.responses.enqueue_assistant_message(
|
|
"turn bypass",
|
|
response_id="bypass-turn",
|
|
)
|
|
|
|
with Codex(config=harness.app_server_config()) as codex:
|
|
thread = codex.thread_start(approval_mode=ApprovalMode.auto_review)
|
|
result = thread.run(
|
|
"bypass this turn",
|
|
approval_mode=ApprovalMode.dangerously_bypass_approvals_and_sandbox,
|
|
)
|
|
after_turn = codex._client.thread_resume( # noqa: SLF001
|
|
thread.id,
|
|
ThreadResumeParams(thread_id=thread.id),
|
|
)
|
|
|
|
assert {
|
|
"final_response": result.final_response,
|
|
"thread_settings": (
|
|
response_approval_policy(after_turn),
|
|
response_sandbox_type(after_turn),
|
|
),
|
|
} == {
|
|
"final_response": "turn bypass",
|
|
"thread_settings": (
|
|
AskForApprovalValue.never.value,
|
|
DANGER_FULL_ACCESS_SANDBOX_POLICY_TYPE,
|
|
),
|
|
}
|
|
|
|
|
|
def test_async_turn_dangerous_bypass_persists_thread_settings(tmp_path) -> None:
|
|
"""Async turn-level bypass should persist the same app-server settings."""
|
|
|
|
async def scenario() -> None:
|
|
with AppServerHarness(tmp_path) as harness:
|
|
harness.responses.enqueue_assistant_message(
|
|
"async turn bypass",
|
|
response_id="async-bypass-turn",
|
|
)
|
|
|
|
async with AsyncCodex(config=harness.app_server_config()) as codex:
|
|
thread = await codex.thread_start(
|
|
approval_mode=ApprovalMode.auto_review
|
|
)
|
|
result = await thread.run(
|
|
"bypass this async turn",
|
|
approval_mode=ApprovalMode.dangerously_bypass_approvals_and_sandbox,
|
|
)
|
|
after_turn = await codex._client.thread_resume( # noqa: SLF001
|
|
thread.id,
|
|
ThreadResumeParams(thread_id=thread.id),
|
|
)
|
|
|
|
assert {
|
|
"final_response": result.final_response,
|
|
"thread_settings": (
|
|
response_approval_policy(after_turn),
|
|
response_sandbox_type(after_turn),
|
|
),
|
|
} == {
|
|
"final_response": "async turn bypass",
|
|
"thread_settings": (
|
|
AskForApprovalValue.never.value,
|
|
DANGER_FULL_ACCESS_SANDBOX_POLICY_TYPE,
|
|
),
|
|
}
|
|
|
|
asyncio.run(scenario())
|
|
|
|
|
|
def test_outside_workspace_write_rejected_for_deny_all_and_allowed_for_bypass(
|
|
tmp_path,
|
|
) -> None:
|
|
"""Dangerous bypass should be the mode that permits outside-workspace writes."""
|
|
rejected_path = tmp_path / "deny-all-outside-write.txt"
|
|
allowed_path = tmp_path / "dangerous-outside-write.txt"
|
|
|
|
with AppServerHarness(tmp_path) as harness:
|
|
rejected_args = json.dumps(
|
|
{
|
|
"command": (
|
|
f"printf %s rejected > {shlex.quote(str(rejected_path))}"
|
|
),
|
|
"login": False,
|
|
"timeout_ms": 1_000,
|
|
}
|
|
)
|
|
dangerous_args = json.dumps(
|
|
{
|
|
"command": (
|
|
f"printf %s dangerous > {shlex.quote(str(allowed_path))}"
|
|
),
|
|
"login": False,
|
|
"timeout_ms": 1_000,
|
|
}
|
|
)
|
|
harness.responses.enqueue_sse(
|
|
sse(
|
|
[
|
|
ev_response_created("deny-all-write"),
|
|
ev_function_call(
|
|
"deny-all-outside-write",
|
|
"shell_command",
|
|
rejected_args,
|
|
),
|
|
ev_completed("deny-all-write"),
|
|
]
|
|
)
|
|
)
|
|
harness.responses.enqueue_assistant_message(
|
|
"deny-all shell completed",
|
|
response_id="deny-all-final",
|
|
)
|
|
harness.responses.enqueue_sse(
|
|
sse(
|
|
[
|
|
ev_response_created("dangerous-write"),
|
|
ev_function_call(
|
|
"dangerous-outside-write",
|
|
"shell_command",
|
|
dangerous_args,
|
|
),
|
|
ev_completed("dangerous-write"),
|
|
]
|
|
)
|
|
)
|
|
harness.responses.enqueue_assistant_message(
|
|
"dangerous shell completed",
|
|
response_id="dangerous-final",
|
|
)
|
|
|
|
with Codex(config=harness.app_server_config()) as codex:
|
|
denied_thread = codex.thread_start(approval_mode=ApprovalMode.deny_all)
|
|
denied_result = denied_thread.run("write outside the workspace")
|
|
|
|
bypass_thread = codex.thread_start(
|
|
approval_mode=ApprovalMode.dangerously_bypass_approvals_and_sandbox,
|
|
)
|
|
bypass_result = bypass_thread.run("write outside the workspace")
|
|
|
|
assert {
|
|
"denied_final_response": denied_result.final_response,
|
|
"denied_path_exists": rejected_path.exists(),
|
|
"bypass_final_response": bypass_result.final_response,
|
|
"bypass_file_contents": allowed_path.read_text(),
|
|
} == {
|
|
"denied_final_response": "deny-all shell completed",
|
|
"denied_path_exists": False,
|
|
"bypass_final_response": "dangerous shell completed",
|
|
"bypass_file_contents": "dangerous",
|
|
}
|
|
|
|
|
|
def test_dangerous_bypass_rejects_explicit_sandbox_conflicts_before_state_changes(
|
|
tmp_path,
|
|
) -> None:
|
|
"""Conflicting bypass presets should fail before mutating app-server state."""
|
|
with AppServerHarness(tmp_path) as harness:
|
|
with Codex(config=harness.app_server_config()) as codex:
|
|
with pytest.raises(ValueError, match="combined with sandbox"):
|
|
codex.thread_start(
|
|
approval_mode=ApprovalMode.dangerously_bypass_approvals_and_sandbox,
|
|
sandbox=SandboxMode.read_only,
|
|
)
|
|
|
|
threads_after_invalid_start = codex.thread_list(archived=False)
|
|
thread = codex.thread_start()
|
|
|
|
with pytest.raises(ValueError, match="combined with sandbox_policy"):
|
|
thread.run(
|
|
"this should never reach app-server",
|
|
approval_mode=ApprovalMode.dangerously_bypass_approvals_and_sandbox,
|
|
sandbox_policy=SandboxPolicy(
|
|
root=ReadOnlySandboxPolicy(type="readOnly")
|
|
),
|
|
)
|
|
|
|
thread_state = thread.read(include_turns=True)
|
|
|
|
assert {
|
|
"threads_after_invalid_start": [
|
|
existing.id for existing in threads_after_invalid_start.data
|
|
],
|
|
"turns_after_invalid_run": thread_state.thread.turns,
|
|
} == {
|
|
"threads_after_invalid_start": [],
|
|
"turns_after_invalid_run": [],
|
|
}
|
|
|
|
|
|
def test_turn_approval_mode_persists_until_next_turn(tmp_path) -> None:
|
|
"""A turn-level approval override should apply to later omitted-arg turns."""
|
|
with AppServerHarness(tmp_path) as harness:
|
|
harness.responses.enqueue_assistant_message("turn override", response_id="turn-mode-1")
|
|
harness.responses.enqueue_assistant_message("turn inherited", response_id="turn-mode-2")
|
|
|
|
with Codex(config=harness.app_server_config()) as codex:
|
|
thread = codex.thread_start()
|
|
first_result = thread.run(
|
|
"deny this and later turns",
|
|
approval_mode=ApprovalMode.deny_all,
|
|
)
|
|
after_turn_override = codex._client.thread_resume( # noqa: SLF001
|
|
thread.id,
|
|
ThreadResumeParams(thread_id=thread.id),
|
|
)
|
|
second_result = thread.run("inherit previous approval mode")
|
|
after_omitted_turn = codex._client.thread_resume( # noqa: SLF001
|
|
thread.id,
|
|
ThreadResumeParams(thread_id=thread.id),
|
|
)
|
|
|
|
assert {
|
|
"after_turn_override": response_approval_policy(after_turn_override),
|
|
"after_omitted_turn": response_approval_policy(after_omitted_turn),
|
|
"final_responses": [
|
|
first_result.final_response,
|
|
second_result.final_response,
|
|
],
|
|
} == {
|
|
"after_turn_override": AskForApprovalValue.never.value,
|
|
"after_omitted_turn": AskForApprovalValue.never.value,
|
|
"final_responses": ["turn override", "turn inherited"],
|
|
}
|
|
|
|
|
|
def test_thread_run_approval_mode_persists_until_explicit_override(tmp_path) -> None:
|
|
"""Omitted run approval mode should not rewrite the thread's stored setting."""
|
|
with AppServerHarness(tmp_path) as harness:
|
|
harness.responses.enqueue_assistant_message("locked down", response_id="approval-1")
|
|
harness.responses.enqueue_assistant_message("reviewable", response_id="approval-2")
|
|
|
|
with Codex(config=harness.app_server_config()) as codex:
|
|
thread = codex.thread_start(approval_mode=ApprovalMode.deny_all)
|
|
|
|
first_result = thread.run("keep approvals denied")
|
|
after_default_run = codex._client.thread_resume( # noqa: SLF001
|
|
thread.id,
|
|
ThreadResumeParams(thread_id=thread.id),
|
|
)
|
|
second_result = thread.run(
|
|
"allow auto review now",
|
|
approval_mode=ApprovalMode.auto_review,
|
|
)
|
|
after_override_run = codex._client.thread_resume( # noqa: SLF001
|
|
thread.id,
|
|
ThreadResumeParams(thread_id=thread.id),
|
|
)
|
|
|
|
assert {
|
|
"after_default_policy": response_approval_policy(after_default_run),
|
|
"after_override_policy": response_approval_policy(after_override_run),
|
|
"final_responses": [
|
|
first_result.final_response,
|
|
second_result.final_response,
|
|
],
|
|
} == {
|
|
"after_default_policy": AskForApprovalValue.never.value,
|
|
"after_override_policy": AskForApprovalValue.on_request.value,
|
|
"final_responses": ["locked down", "reviewable"],
|
|
}
|
|
|
|
|
|
def test_async_thread_run_approval_mode_persists_until_explicit_override(
|
|
tmp_path,
|
|
) -> None:
|
|
"""Async omitted run approval mode should leave stored settings alone."""
|
|
|
|
async def scenario() -> None:
|
|
"""Use the async client to verify persisted app-server approval state."""
|
|
with AppServerHarness(tmp_path) as harness:
|
|
harness.responses.enqueue_assistant_message(
|
|
"async locked down",
|
|
response_id="async-approval-1",
|
|
)
|
|
harness.responses.enqueue_assistant_message(
|
|
"async reviewable",
|
|
response_id="async-approval-2",
|
|
)
|
|
|
|
async with AsyncCodex(config=harness.app_server_config()) as codex:
|
|
thread = await codex.thread_start(approval_mode=ApprovalMode.deny_all)
|
|
first_result = await thread.run("keep async approvals denied")
|
|
after_default_run = await codex._client.thread_resume( # noqa: SLF001
|
|
thread.id,
|
|
ThreadResumeParams(thread_id=thread.id),
|
|
)
|
|
second_result = await thread.run(
|
|
"allow async auto review now",
|
|
approval_mode=ApprovalMode.auto_review,
|
|
)
|
|
after_override_run = await codex._client.thread_resume( # noqa: SLF001
|
|
thread.id,
|
|
ThreadResumeParams(thread_id=thread.id),
|
|
)
|
|
|
|
assert {
|
|
"after_default_policy": response_approval_policy(after_default_run),
|
|
"after_override_policy": response_approval_policy(after_override_run),
|
|
"final_responses": [
|
|
first_result.final_response,
|
|
second_result.final_response,
|
|
],
|
|
} == {
|
|
"after_default_policy": AskForApprovalValue.never.value,
|
|
"after_override_policy": AskForApprovalValue.on_request.value,
|
|
"final_responses": ["async locked down", "async reviewable"],
|
|
}
|
|
|
|
asyncio.run(scenario())
|