Files
codex/sdk/python/tests/test_app_server_approvals.py
Ahmed Ibrahim 3e10e09e24 [7/8] Add Python SDK app-server integration harness (#22014)
## Why

The SDK had behavioral tests that replaced SDK client internals. Those
tests could catch wrapper mistakes, but they did not prove the pinned
app-server runtime, generated notification models, request routing, and
sync/async public clients worked together.

This PR adds deterministic integration coverage that starts the pinned
`codex app-server` process and mocks only the upstream Responses HTTP
boundary.

## What

- Add `AppServerHarness` and `MockResponsesServer` helpers for isolated
`CODEX_HOME`, mock-provider config, queued SSE responses, and captured
`/v1/responses` requests.
- Add shared helpers for SSE construction, stream assertions,
approval-policy inspection, and image fixtures.
- Split integration coverage into focused modules for run behavior,
inputs, streaming, turn controls, approvals, and thread lifecycle.
- Cover sync and async `Thread.run`, `TurnHandle.stream`, interleaved
streams, approval-mode persistence, lifecycle helpers, final-answer
phase handling, image inputs, loaded skill input injection, steering,
interruption, listing, history reads, run overrides, and token usage
mapping.
- Replace public-wrapper tests that duplicated integration-test behavior
with lower-level client tests only where direct client behavior is the
thing under test.

## Stack

1. #21891 `[1/8]` Pin Python SDK runtime dependency
2. #21893 `[2/8]` Generate Python SDK types from pinned runtime
3. #21895 `[3/8]` Run Python SDK tests in CI
4. #21896 `[4/8]` Define Python SDK public API surface
5. #21905 `[5/8]` Rename Python SDK package to `openai-codex`
6. #21910 `[6/8]` Add high-level Python SDK approval mode
7. This PR `[7/8]` Add Python SDK app-server integration harness
8. #22021 `[8/8]` Add Python SDK Ruff formatting

## Verification

- Added pinned app-server integration tests under
`sdk/python/tests/test_app_server_*.py` and
`test_real_app_server_integration.py`.

---------

Co-authored-by: Codex <noreply@openai.com>
2026-05-12 01:06:41 +03:00

208 lines
8.5 KiB
Python

from __future__ import annotations
import asyncio
from app_server_harness import AppServerHarness
from openai_codex import ApprovalMode, AsyncCodex, Codex
from openai_codex.generated.v2_all import AskForApprovalValue, ThreadResumeParams
from app_server_helpers import response_approval_policy
def test_thread_resume_inherits_deny_all_approval_mode(tmp_path) -> None:
"""Resuming a thread should preserve its stored approval mode."""
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_assistant_message("source seeded", response_id="resume-mode")
with Codex(config=harness.app_server_config()) as codex:
source = codex.thread_start(approval_mode=ApprovalMode.deny_all)
result = source.run("seed the source rollout")
resumed = codex.thread_resume(source.id)
resumed_state = codex._client.thread_resume( # noqa: SLF001
resumed.id,
ThreadResumeParams(thread_id=resumed.id),
)
assert {
"final_response": result.final_response,
"resumed_policy": response_approval_policy(resumed_state),
} == {
"final_response": "source seeded",
"resumed_policy": AskForApprovalValue.never.value,
}
def test_thread_fork_inherits_deny_all_approval_mode(tmp_path) -> None:
"""Forking without an override should preserve the source approval mode."""
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_assistant_message("source seeded", response_id="fork-mode")
with Codex(config=harness.app_server_config()) as codex:
source = codex.thread_start(approval_mode=ApprovalMode.deny_all)
result = source.run("seed the source rollout")
forked = codex.thread_fork(source.id)
forked_state = codex._client.thread_resume( # noqa: SLF001
forked.id,
ThreadResumeParams(thread_id=forked.id),
)
assert {
"final_response": result.final_response,
"forked_is_distinct": forked.id != source.id,
"forked_policy": response_approval_policy(forked_state),
} == {
"final_response": "source seeded",
"forked_is_distinct": True,
"forked_policy": AskForApprovalValue.never.value,
}
def test_thread_fork_can_override_approval_mode(tmp_path) -> None:
"""Forking with an explicit approval mode should send an override."""
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_assistant_message(
"source seeded",
response_id="fork-override-mode",
)
with Codex(config=harness.app_server_config()) as codex:
source = codex.thread_start(approval_mode=ApprovalMode.deny_all)
result = source.run("seed the source rollout")
forked = codex.thread_fork(
source.id,
approval_mode=ApprovalMode.auto_review,
)
forked_state = codex._client.thread_resume( # noqa: SLF001
forked.id,
ThreadResumeParams(thread_id=forked.id),
)
assert {
"final_response": result.final_response,
"forked_policy": response_approval_policy(forked_state),
} == {
"final_response": "source seeded",
"forked_policy": AskForApprovalValue.on_request.value,
}
def test_turn_approval_mode_persists_until_next_turn(tmp_path) -> None:
"""A turn-level approval override should apply to later omitted-arg turns."""
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_assistant_message("turn override", response_id="turn-mode-1")
harness.responses.enqueue_assistant_message("turn inherited", response_id="turn-mode-2")
with Codex(config=harness.app_server_config()) as codex:
thread = codex.thread_start()
first_result = thread.run(
"deny this and later turns",
approval_mode=ApprovalMode.deny_all,
)
after_turn_override = codex._client.thread_resume( # noqa: SLF001
thread.id,
ThreadResumeParams(thread_id=thread.id),
)
second_result = thread.run("inherit previous approval mode")
after_omitted_turn = codex._client.thread_resume( # noqa: SLF001
thread.id,
ThreadResumeParams(thread_id=thread.id),
)
assert {
"after_turn_override": response_approval_policy(after_turn_override),
"after_omitted_turn": response_approval_policy(after_omitted_turn),
"final_responses": [
first_result.final_response,
second_result.final_response,
],
} == {
"after_turn_override": AskForApprovalValue.never.value,
"after_omitted_turn": AskForApprovalValue.never.value,
"final_responses": ["turn override", "turn inherited"],
}
def test_thread_run_approval_mode_persists_until_explicit_override(tmp_path) -> None:
"""Omitted run approval mode should not rewrite the thread's stored setting."""
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_assistant_message("locked down", response_id="approval-1")
harness.responses.enqueue_assistant_message("reviewable", response_id="approval-2")
with Codex(config=harness.app_server_config()) as codex:
thread = codex.thread_start(approval_mode=ApprovalMode.deny_all)
first_result = thread.run("keep approvals denied")
after_default_run = codex._client.thread_resume( # noqa: SLF001
thread.id,
ThreadResumeParams(thread_id=thread.id),
)
second_result = thread.run(
"allow auto review now",
approval_mode=ApprovalMode.auto_review,
)
after_override_run = codex._client.thread_resume( # noqa: SLF001
thread.id,
ThreadResumeParams(thread_id=thread.id),
)
assert {
"after_default_policy": response_approval_policy(after_default_run),
"after_override_policy": response_approval_policy(after_override_run),
"final_responses": [
first_result.final_response,
second_result.final_response,
],
} == {
"after_default_policy": AskForApprovalValue.never.value,
"after_override_policy": AskForApprovalValue.on_request.value,
"final_responses": ["locked down", "reviewable"],
}
def test_async_thread_run_approval_mode_persists_until_explicit_override(
tmp_path,
) -> None:
"""Async omitted run approval mode should leave stored settings alone."""
async def scenario() -> None:
"""Use the async client to verify persisted app-server approval state."""
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_assistant_message(
"async locked down",
response_id="async-approval-1",
)
harness.responses.enqueue_assistant_message(
"async reviewable",
response_id="async-approval-2",
)
async with AsyncCodex(config=harness.app_server_config()) as codex:
thread = await codex.thread_start(approval_mode=ApprovalMode.deny_all)
first_result = await thread.run("keep async approvals denied")
after_default_run = await codex._client.thread_resume( # noqa: SLF001
thread.id,
ThreadResumeParams(thread_id=thread.id),
)
second_result = await thread.run(
"allow async auto review now",
approval_mode=ApprovalMode.auto_review,
)
after_override_run = await codex._client.thread_resume( # noqa: SLF001
thread.id,
ThreadResumeParams(thread_id=thread.id),
)
assert {
"after_default_policy": response_approval_policy(after_default_run),
"after_override_policy": response_approval_policy(after_override_run),
"final_responses": [
first_result.final_response,
second_result.final_response,
],
} == {
"after_default_policy": AskForApprovalValue.never.value,
"after_override_policy": AskForApprovalValue.on_request.value,
"final_responses": ["async locked down", "async reviewable"],
}
asyncio.run(scenario())