mirror of
https://github.com/openai/codex.git
synced 2026-05-15 16:53:05 +00:00
## Why The SDK had behavioral tests that replaced SDK client internals. Those tests could catch wrapper mistakes, but they did not prove the pinned app-server runtime, generated notification models, request routing, and sync/async public clients worked together. This PR adds deterministic integration coverage that starts the pinned `codex app-server` process and mocks only the upstream Responses HTTP boundary. ## What - Add `AppServerHarness` and `MockResponsesServer` helpers for isolated `CODEX_HOME`, mock-provider config, queued SSE responses, and captured `/v1/responses` requests. - Add shared helpers for SSE construction, stream assertions, approval-policy inspection, and image fixtures. - Split integration coverage into focused modules for run behavior, inputs, streaming, turn controls, approvals, and thread lifecycle. - Cover sync and async `Thread.run`, `TurnHandle.stream`, interleaved streams, approval-mode persistence, lifecycle helpers, final-answer phase handling, image inputs, loaded skill input injection, steering, interruption, listing, history reads, run overrides, and token usage mapping. - Replace public-wrapper tests that duplicated integration-test behavior with lower-level client tests only where direct client behavior is the thing under test. ## Stack 1. #21891 `[1/8]` Pin Python SDK runtime dependency 2. #21893 `[2/8]` Generate Python SDK types from pinned runtime 3. #21895 `[3/8]` Run Python SDK tests in CI 4. #21896 `[4/8]` Define Python SDK public API surface 5. #21905 `[5/8]` Rename Python SDK package to `openai-codex` 6. #21910 `[6/8]` Add high-level Python SDK approval mode 7. This PR `[7/8]` Add Python SDK app-server integration harness 8. #22021 `[8/8]` Add Python SDK Ruff formatting ## Verification - Added pinned app-server integration tests under `sdk/python/tests/test_app_server_*.py` and `test_real_app_server_integration.py`. --------- Co-authored-by: Codex <noreply@openai.com>
208 lines
8.5 KiB
Python
208 lines
8.5 KiB
Python
from __future__ import annotations
|
|
|
|
import asyncio
|
|
|
|
from app_server_harness import AppServerHarness
|
|
from openai_codex import ApprovalMode, AsyncCodex, Codex
|
|
from openai_codex.generated.v2_all import AskForApprovalValue, ThreadResumeParams
|
|
from app_server_helpers import response_approval_policy
|
|
|
|
|
|
def test_thread_resume_inherits_deny_all_approval_mode(tmp_path) -> None:
|
|
"""Resuming a thread should preserve its stored approval mode."""
|
|
with AppServerHarness(tmp_path) as harness:
|
|
harness.responses.enqueue_assistant_message("source seeded", response_id="resume-mode")
|
|
|
|
with Codex(config=harness.app_server_config()) as codex:
|
|
source = codex.thread_start(approval_mode=ApprovalMode.deny_all)
|
|
result = source.run("seed the source rollout")
|
|
resumed = codex.thread_resume(source.id)
|
|
resumed_state = codex._client.thread_resume( # noqa: SLF001
|
|
resumed.id,
|
|
ThreadResumeParams(thread_id=resumed.id),
|
|
)
|
|
|
|
assert {
|
|
"final_response": result.final_response,
|
|
"resumed_policy": response_approval_policy(resumed_state),
|
|
} == {
|
|
"final_response": "source seeded",
|
|
"resumed_policy": AskForApprovalValue.never.value,
|
|
}
|
|
|
|
|
|
def test_thread_fork_inherits_deny_all_approval_mode(tmp_path) -> None:
|
|
"""Forking without an override should preserve the source approval mode."""
|
|
with AppServerHarness(tmp_path) as harness:
|
|
harness.responses.enqueue_assistant_message("source seeded", response_id="fork-mode")
|
|
|
|
with Codex(config=harness.app_server_config()) as codex:
|
|
source = codex.thread_start(approval_mode=ApprovalMode.deny_all)
|
|
result = source.run("seed the source rollout")
|
|
forked = codex.thread_fork(source.id)
|
|
forked_state = codex._client.thread_resume( # noqa: SLF001
|
|
forked.id,
|
|
ThreadResumeParams(thread_id=forked.id),
|
|
)
|
|
|
|
assert {
|
|
"final_response": result.final_response,
|
|
"forked_is_distinct": forked.id != source.id,
|
|
"forked_policy": response_approval_policy(forked_state),
|
|
} == {
|
|
"final_response": "source seeded",
|
|
"forked_is_distinct": True,
|
|
"forked_policy": AskForApprovalValue.never.value,
|
|
}
|
|
|
|
|
|
def test_thread_fork_can_override_approval_mode(tmp_path) -> None:
|
|
"""Forking with an explicit approval mode should send an override."""
|
|
with AppServerHarness(tmp_path) as harness:
|
|
harness.responses.enqueue_assistant_message(
|
|
"source seeded",
|
|
response_id="fork-override-mode",
|
|
)
|
|
|
|
with Codex(config=harness.app_server_config()) as codex:
|
|
source = codex.thread_start(approval_mode=ApprovalMode.deny_all)
|
|
result = source.run("seed the source rollout")
|
|
forked = codex.thread_fork(
|
|
source.id,
|
|
approval_mode=ApprovalMode.auto_review,
|
|
)
|
|
forked_state = codex._client.thread_resume( # noqa: SLF001
|
|
forked.id,
|
|
ThreadResumeParams(thread_id=forked.id),
|
|
)
|
|
|
|
assert {
|
|
"final_response": result.final_response,
|
|
"forked_policy": response_approval_policy(forked_state),
|
|
} == {
|
|
"final_response": "source seeded",
|
|
"forked_policy": AskForApprovalValue.on_request.value,
|
|
}
|
|
|
|
|
|
def test_turn_approval_mode_persists_until_next_turn(tmp_path) -> None:
|
|
"""A turn-level approval override should apply to later omitted-arg turns."""
|
|
with AppServerHarness(tmp_path) as harness:
|
|
harness.responses.enqueue_assistant_message("turn override", response_id="turn-mode-1")
|
|
harness.responses.enqueue_assistant_message("turn inherited", response_id="turn-mode-2")
|
|
|
|
with Codex(config=harness.app_server_config()) as codex:
|
|
thread = codex.thread_start()
|
|
first_result = thread.run(
|
|
"deny this and later turns",
|
|
approval_mode=ApprovalMode.deny_all,
|
|
)
|
|
after_turn_override = codex._client.thread_resume( # noqa: SLF001
|
|
thread.id,
|
|
ThreadResumeParams(thread_id=thread.id),
|
|
)
|
|
second_result = thread.run("inherit previous approval mode")
|
|
after_omitted_turn = codex._client.thread_resume( # noqa: SLF001
|
|
thread.id,
|
|
ThreadResumeParams(thread_id=thread.id),
|
|
)
|
|
|
|
assert {
|
|
"after_turn_override": response_approval_policy(after_turn_override),
|
|
"after_omitted_turn": response_approval_policy(after_omitted_turn),
|
|
"final_responses": [
|
|
first_result.final_response,
|
|
second_result.final_response,
|
|
],
|
|
} == {
|
|
"after_turn_override": AskForApprovalValue.never.value,
|
|
"after_omitted_turn": AskForApprovalValue.never.value,
|
|
"final_responses": ["turn override", "turn inherited"],
|
|
}
|
|
|
|
|
|
def test_thread_run_approval_mode_persists_until_explicit_override(tmp_path) -> None:
|
|
"""Omitted run approval mode should not rewrite the thread's stored setting."""
|
|
with AppServerHarness(tmp_path) as harness:
|
|
harness.responses.enqueue_assistant_message("locked down", response_id="approval-1")
|
|
harness.responses.enqueue_assistant_message("reviewable", response_id="approval-2")
|
|
|
|
with Codex(config=harness.app_server_config()) as codex:
|
|
thread = codex.thread_start(approval_mode=ApprovalMode.deny_all)
|
|
|
|
first_result = thread.run("keep approvals denied")
|
|
after_default_run = codex._client.thread_resume( # noqa: SLF001
|
|
thread.id,
|
|
ThreadResumeParams(thread_id=thread.id),
|
|
)
|
|
second_result = thread.run(
|
|
"allow auto review now",
|
|
approval_mode=ApprovalMode.auto_review,
|
|
)
|
|
after_override_run = codex._client.thread_resume( # noqa: SLF001
|
|
thread.id,
|
|
ThreadResumeParams(thread_id=thread.id),
|
|
)
|
|
|
|
assert {
|
|
"after_default_policy": response_approval_policy(after_default_run),
|
|
"after_override_policy": response_approval_policy(after_override_run),
|
|
"final_responses": [
|
|
first_result.final_response,
|
|
second_result.final_response,
|
|
],
|
|
} == {
|
|
"after_default_policy": AskForApprovalValue.never.value,
|
|
"after_override_policy": AskForApprovalValue.on_request.value,
|
|
"final_responses": ["locked down", "reviewable"],
|
|
}
|
|
|
|
|
|
def test_async_thread_run_approval_mode_persists_until_explicit_override(
|
|
tmp_path,
|
|
) -> None:
|
|
"""Async omitted run approval mode should leave stored settings alone."""
|
|
|
|
async def scenario() -> None:
|
|
"""Use the async client to verify persisted app-server approval state."""
|
|
with AppServerHarness(tmp_path) as harness:
|
|
harness.responses.enqueue_assistant_message(
|
|
"async locked down",
|
|
response_id="async-approval-1",
|
|
)
|
|
harness.responses.enqueue_assistant_message(
|
|
"async reviewable",
|
|
response_id="async-approval-2",
|
|
)
|
|
|
|
async with AsyncCodex(config=harness.app_server_config()) as codex:
|
|
thread = await codex.thread_start(approval_mode=ApprovalMode.deny_all)
|
|
first_result = await thread.run("keep async approvals denied")
|
|
after_default_run = await codex._client.thread_resume( # noqa: SLF001
|
|
thread.id,
|
|
ThreadResumeParams(thread_id=thread.id),
|
|
)
|
|
second_result = await thread.run(
|
|
"allow async auto review now",
|
|
approval_mode=ApprovalMode.auto_review,
|
|
)
|
|
after_override_run = await codex._client.thread_resume( # noqa: SLF001
|
|
thread.id,
|
|
ThreadResumeParams(thread_id=thread.id),
|
|
)
|
|
|
|
assert {
|
|
"after_default_policy": response_approval_policy(after_default_run),
|
|
"after_override_policy": response_approval_policy(after_override_run),
|
|
"final_responses": [
|
|
first_result.final_response,
|
|
second_result.final_response,
|
|
],
|
|
} == {
|
|
"after_default_policy": AskForApprovalValue.never.value,
|
|
"after_override_policy": AskForApprovalValue.on_request.value,
|
|
"final_responses": ["async locked down", "async reviewable"],
|
|
}
|
|
|
|
asyncio.run(scenario())
|