Files
codex/sdk/python/tests/test_app_server_lifecycle.py
Ahmed Ibrahim 3e10e09e24 [7/8] Add Python SDK app-server integration harness (#22014)
## Why

The SDK had behavioral tests that replaced SDK client internals. Those
tests could catch wrapper mistakes, but they did not prove the pinned
app-server runtime, generated notification models, request routing, and
sync/async public clients worked together.

This PR adds deterministic integration coverage that starts the pinned
`codex app-server` process and mocks only the upstream Responses HTTP
boundary.

## What

- Add `AppServerHarness` and `MockResponsesServer` helpers for isolated
`CODEX_HOME`, mock-provider config, queued SSE responses, and captured
`/v1/responses` requests.
- Add shared helpers for SSE construction, stream assertions,
approval-policy inspection, and image fixtures.
- Split integration coverage into focused modules for run behavior,
inputs, streaming, turn controls, approvals, and thread lifecycle.
- Cover sync and async `Thread.run`, `TurnHandle.stream`, interleaved
streams, approval-mode persistence, lifecycle helpers, final-answer
phase handling, image inputs, loaded skill input injection, steering,
interruption, listing, history reads, run overrides, and token usage
mapping.
- Replace public-wrapper tests that duplicated integration-test behavior
with lower-level client tests only where direct client behavior is the
thing under test.

## Stack

1. #21891 `[1/8]` Pin Python SDK runtime dependency
2. #21893 `[2/8]` Generate Python SDK types from pinned runtime
3. #21895 `[3/8]` Run Python SDK tests in CI
4. #21896 `[4/8]` Define Python SDK public API surface
5. #21905 `[5/8]` Rename Python SDK package to `openai-codex`
6. #21910 `[6/8]` Add high-level Python SDK approval mode
7. This PR `[7/8]` Add Python SDK app-server integration harness
8. #22021 `[8/8]` Add Python SDK Ruff formatting

## Verification

- Added pinned app-server integration tests under
`sdk/python/tests/test_app_server_*.py` and
`test_real_app_server_integration.py`.

---------

Co-authored-by: Codex <noreply@openai.com>
2026-05-12 01:06:41 +03:00

218 lines
8.3 KiB
Python

from __future__ import annotations
import asyncio
from app_server_harness import AppServerHarness
from openai_codex import AsyncCodex, Codex
from app_server_helpers import request_kind
def _thread_message_summary(read_response) -> list[tuple[str, str]]:
"""Return persisted user/agent messages from a thread read response."""
messages: list[tuple[str, str]] = []
for turn in read_response.thread.turns:
for item in turn.items:
root = item.root
if root.type == "userMessage":
text = "\n".join(
input_item.root.text
for input_item in root.content
if input_item.root.type == "text"
)
messages.append(("user", text))
if root.type == "agentMessage":
messages.append(("agent", root.text))
return messages
def test_thread_set_name_and_read(tmp_path) -> None:
"""Thread naming should round-trip through app-server JSON-RPC."""
with AppServerHarness(tmp_path) as harness:
with Codex(config=harness.app_server_config()) as codex:
thread = codex.thread_start()
thread.set_name("sdk integration thread")
named = thread.read(include_turns=True)
assert {"thread_name": named.thread.name} == {
"thread_name": "sdk integration thread",
}
def test_thread_list_filters_archived_threads(tmp_path) -> None:
"""Thread listing should reflect archive state through app-server."""
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_assistant_message("active", response_id="list-active")
harness.responses.enqueue_assistant_message(
"archived",
response_id="list-archived",
)
with Codex(config=harness.app_server_config()) as codex:
active_thread = codex.thread_start()
archived_thread = codex.thread_start()
active_thread.run("keep this listed")
archived_thread.run("archive this")
codex.thread_archive(archived_thread.id)
active_list = codex.thread_list(archived=False)
archived_list = codex.thread_list(archived=True)
expected_ids = {active_thread.id, archived_thread.id}
assert {
"active_ids": sorted(
thread.id for thread in active_list.data if thread.id in expected_ids
),
"archived_ids": sorted(
thread.id for thread in archived_list.data if thread.id in expected_ids
),
} == {
"active_ids": [active_thread.id],
"archived_ids": [archived_thread.id],
}
def test_read_include_turns_returns_persisted_history(tmp_path) -> None:
"""Thread.read(include_turns=True) should load real persisted turn items."""
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_assistant_message("first answer", response_id="read-1")
harness.responses.enqueue_assistant_message("second answer", response_id="read-2")
with Codex(config=harness.app_server_config()) as codex:
thread = codex.thread_start()
thread.run("first question")
thread.run("second question")
read = thread.read(include_turns=True)
assert _thread_message_summary(read) == [
("user", "first question"),
("agent", "first answer"),
("user", "second question"),
("agent", "second answer"),
]
def test_async_lifecycle_methods_round_trip(tmp_path) -> None:
"""Async lifecycle helpers should preserve the same app-server thread state."""
async def scenario() -> None:
"""Exercise async wrappers over one materialized thread."""
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_assistant_message(
"async materialized",
response_id="async-lifecycle",
)
async with AsyncCodex(config=harness.app_server_config()) as codex:
thread = await codex.thread_start()
run_result = await thread.run("materialize async thread")
await thread.set_name("async lifecycle")
named = await thread.read()
resumed = await codex.thread_resume(thread.id)
forked = await codex.thread_fork(thread.id)
archive_response = await codex.thread_archive(thread.id)
unarchived = await codex.thread_unarchive(thread.id)
assert {
"run_final_response": run_result.final_response,
"named_thread": named.thread.name,
"resumed_id": resumed.id,
"forked_is_distinct": forked.id != thread.id,
"archive_response": archive_response.model_dump(by_alias=True, mode="json"),
"unarchived_id": unarchived.id,
} == {
"run_final_response": "async materialized",
"named_thread": "async lifecycle",
"resumed_id": thread.id,
"forked_is_distinct": True,
"archive_response": {},
"unarchived_id": thread.id,
}
asyncio.run(scenario())
def test_thread_fork_returns_distinct_thread(tmp_path) -> None:
"""Thread fork should return a distinct thread for a persisted rollout."""
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_assistant_message("materialized", response_id="fork-seed")
with Codex(config=harness.app_server_config()) as codex:
thread = codex.thread_start()
seeded = thread.run("materialize this thread before fork")
forked = codex.thread_fork(thread.id)
assert {
"seeded_response": seeded.final_response,
"forked_is_distinct": forked.id != thread.id,
} == {
"seeded_response": "materialized",
"forked_is_distinct": True,
}
def test_archive_unarchive_round_trip_uses_materialized_rollout(tmp_path) -> None:
"""Archive helpers should work once the app-server has persisted a rollout."""
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_assistant_message("materialized", response_id="archive-seed")
with Codex(config=harness.app_server_config()) as codex:
thread = codex.thread_start()
seeded = thread.run("materialize this thread before archive")
archived = codex.thread_archive(thread.id)
unarchived = codex.thread_unarchive(thread.id)
read = unarchived.read()
assert {
"seeded_response": seeded.final_response,
"archive_response": archived.model_dump(by_alias=True, mode="json"),
"unarchived_id": unarchived.id,
"read_id": read.thread.id,
} == {
"seeded_response": "materialized",
"archive_response": {},
"unarchived_id": thread.id,
"read_id": thread.id,
}
def test_models_rpc(tmp_path) -> None:
"""Model listing should go through the pinned app-server method."""
with AppServerHarness(tmp_path) as harness:
with Codex(config=harness.app_server_config()) as codex:
models = codex.models(include_hidden=True)
assert {
"models_payload_has_data": isinstance(
models.model_dump(by_alias=True, mode="json").get("data"),
list,
),
} == {"models_payload_has_data": True}
def test_compact_rpc_hits_mock_responses(tmp_path) -> None:
"""Compaction should run through app-server and hit the mock Responses boundary."""
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_assistant_message("history", response_id="compact-history")
harness.responses.enqueue_assistant_message(
"compact summary",
response_id="compact-summary",
)
with Codex(config=harness.app_server_config()) as codex:
thread = codex.thread_start()
run_result = thread.run("create history")
compact_response = thread.compact()
requests = harness.responses.wait_for_requests(2)
assert {
"run_final_response": run_result.final_response,
"compact_response": compact_response.model_dump(
by_alias=True,
mode="json",
),
"request_kinds": [request_kind(request.path) for request in requests],
} == {
"run_final_response": "history",
"compact_response": {},
"request_kinds": ["responses", "responses"],
}