mirror of
https://github.com/openai/codex.git
synced 2026-05-15 08:42:34 +00:00
## Why The SDK had behavioral tests that replaced SDK client internals. Those tests could catch wrapper mistakes, but they did not prove the pinned app-server runtime, generated notification models, request routing, and sync/async public clients worked together. This PR adds deterministic integration coverage that starts the pinned `codex app-server` process and mocks only the upstream Responses HTTP boundary. ## What - Add `AppServerHarness` and `MockResponsesServer` helpers for isolated `CODEX_HOME`, mock-provider config, queued SSE responses, and captured `/v1/responses` requests. - Add shared helpers for SSE construction, stream assertions, approval-policy inspection, and image fixtures. - Split integration coverage into focused modules for run behavior, inputs, streaming, turn controls, approvals, and thread lifecycle. - Cover sync and async `Thread.run`, `TurnHandle.stream`, interleaved streams, approval-mode persistence, lifecycle helpers, final-answer phase handling, image inputs, loaded skill input injection, steering, interruption, listing, history reads, run overrides, and token usage mapping. - Replace public-wrapper tests that duplicated integration-test behavior with lower-level client tests only where direct client behavior is the thing under test. ## Stack 1. #21891 `[1/8]` Pin Python SDK runtime dependency 2. #21893 `[2/8]` Generate Python SDK types from pinned runtime 3. #21895 `[3/8]` Run Python SDK tests in CI 4. #21896 `[4/8]` Define Python SDK public API surface 5. #21905 `[5/8]` Rename Python SDK package to `openai-codex` 6. #21910 `[6/8]` Add high-level Python SDK approval mode 7. This PR `[7/8]` Add Python SDK app-server integration harness 8. #22021 `[8/8]` Add Python SDK Ruff formatting ## Verification - Added pinned app-server integration tests under `sdk/python/tests/test_app_server_*.py` and `test_real_app_server_integration.py`. --------- Co-authored-by: Codex <noreply@openai.com>
218 lines
8.3 KiB
Python
218 lines
8.3 KiB
Python
from __future__ import annotations
|
|
|
|
import asyncio
|
|
|
|
from app_server_harness import AppServerHarness
|
|
from openai_codex import AsyncCodex, Codex
|
|
from app_server_helpers import request_kind
|
|
|
|
|
|
def _thread_message_summary(read_response) -> list[tuple[str, str]]:
|
|
"""Return persisted user/agent messages from a thread read response."""
|
|
messages: list[tuple[str, str]] = []
|
|
for turn in read_response.thread.turns:
|
|
for item in turn.items:
|
|
root = item.root
|
|
if root.type == "userMessage":
|
|
text = "\n".join(
|
|
input_item.root.text
|
|
for input_item in root.content
|
|
if input_item.root.type == "text"
|
|
)
|
|
messages.append(("user", text))
|
|
if root.type == "agentMessage":
|
|
messages.append(("agent", root.text))
|
|
return messages
|
|
|
|
|
|
def test_thread_set_name_and_read(tmp_path) -> None:
|
|
"""Thread naming should round-trip through app-server JSON-RPC."""
|
|
with AppServerHarness(tmp_path) as harness:
|
|
with Codex(config=harness.app_server_config()) as codex:
|
|
thread = codex.thread_start()
|
|
thread.set_name("sdk integration thread")
|
|
named = thread.read(include_turns=True)
|
|
|
|
assert {"thread_name": named.thread.name} == {
|
|
"thread_name": "sdk integration thread",
|
|
}
|
|
|
|
|
|
def test_thread_list_filters_archived_threads(tmp_path) -> None:
|
|
"""Thread listing should reflect archive state through app-server."""
|
|
with AppServerHarness(tmp_path) as harness:
|
|
harness.responses.enqueue_assistant_message("active", response_id="list-active")
|
|
harness.responses.enqueue_assistant_message(
|
|
"archived",
|
|
response_id="list-archived",
|
|
)
|
|
|
|
with Codex(config=harness.app_server_config()) as codex:
|
|
active_thread = codex.thread_start()
|
|
archived_thread = codex.thread_start()
|
|
active_thread.run("keep this listed")
|
|
archived_thread.run("archive this")
|
|
codex.thread_archive(archived_thread.id)
|
|
active_list = codex.thread_list(archived=False)
|
|
archived_list = codex.thread_list(archived=True)
|
|
|
|
expected_ids = {active_thread.id, archived_thread.id}
|
|
assert {
|
|
"active_ids": sorted(
|
|
thread.id for thread in active_list.data if thread.id in expected_ids
|
|
),
|
|
"archived_ids": sorted(
|
|
thread.id for thread in archived_list.data if thread.id in expected_ids
|
|
),
|
|
} == {
|
|
"active_ids": [active_thread.id],
|
|
"archived_ids": [archived_thread.id],
|
|
}
|
|
|
|
|
|
def test_read_include_turns_returns_persisted_history(tmp_path) -> None:
|
|
"""Thread.read(include_turns=True) should load real persisted turn items."""
|
|
with AppServerHarness(tmp_path) as harness:
|
|
harness.responses.enqueue_assistant_message("first answer", response_id="read-1")
|
|
harness.responses.enqueue_assistant_message("second answer", response_id="read-2")
|
|
|
|
with Codex(config=harness.app_server_config()) as codex:
|
|
thread = codex.thread_start()
|
|
thread.run("first question")
|
|
thread.run("second question")
|
|
read = thread.read(include_turns=True)
|
|
|
|
assert _thread_message_summary(read) == [
|
|
("user", "first question"),
|
|
("agent", "first answer"),
|
|
("user", "second question"),
|
|
("agent", "second answer"),
|
|
]
|
|
|
|
|
|
def test_async_lifecycle_methods_round_trip(tmp_path) -> None:
|
|
"""Async lifecycle helpers should preserve the same app-server thread state."""
|
|
|
|
async def scenario() -> None:
|
|
"""Exercise async wrappers over one materialized thread."""
|
|
with AppServerHarness(tmp_path) as harness:
|
|
harness.responses.enqueue_assistant_message(
|
|
"async materialized",
|
|
response_id="async-lifecycle",
|
|
)
|
|
|
|
async with AsyncCodex(config=harness.app_server_config()) as codex:
|
|
thread = await codex.thread_start()
|
|
run_result = await thread.run("materialize async thread")
|
|
await thread.set_name("async lifecycle")
|
|
named = await thread.read()
|
|
resumed = await codex.thread_resume(thread.id)
|
|
forked = await codex.thread_fork(thread.id)
|
|
archive_response = await codex.thread_archive(thread.id)
|
|
unarchived = await codex.thread_unarchive(thread.id)
|
|
|
|
assert {
|
|
"run_final_response": run_result.final_response,
|
|
"named_thread": named.thread.name,
|
|
"resumed_id": resumed.id,
|
|
"forked_is_distinct": forked.id != thread.id,
|
|
"archive_response": archive_response.model_dump(by_alias=True, mode="json"),
|
|
"unarchived_id": unarchived.id,
|
|
} == {
|
|
"run_final_response": "async materialized",
|
|
"named_thread": "async lifecycle",
|
|
"resumed_id": thread.id,
|
|
"forked_is_distinct": True,
|
|
"archive_response": {},
|
|
"unarchived_id": thread.id,
|
|
}
|
|
|
|
asyncio.run(scenario())
|
|
|
|
|
|
def test_thread_fork_returns_distinct_thread(tmp_path) -> None:
|
|
"""Thread fork should return a distinct thread for a persisted rollout."""
|
|
with AppServerHarness(tmp_path) as harness:
|
|
harness.responses.enqueue_assistant_message("materialized", response_id="fork-seed")
|
|
|
|
with Codex(config=harness.app_server_config()) as codex:
|
|
thread = codex.thread_start()
|
|
seeded = thread.run("materialize this thread before fork")
|
|
forked = codex.thread_fork(thread.id)
|
|
|
|
assert {
|
|
"seeded_response": seeded.final_response,
|
|
"forked_is_distinct": forked.id != thread.id,
|
|
} == {
|
|
"seeded_response": "materialized",
|
|
"forked_is_distinct": True,
|
|
}
|
|
|
|
|
|
def test_archive_unarchive_round_trip_uses_materialized_rollout(tmp_path) -> None:
|
|
"""Archive helpers should work once the app-server has persisted a rollout."""
|
|
with AppServerHarness(tmp_path) as harness:
|
|
harness.responses.enqueue_assistant_message("materialized", response_id="archive-seed")
|
|
|
|
with Codex(config=harness.app_server_config()) as codex:
|
|
thread = codex.thread_start()
|
|
seeded = thread.run("materialize this thread before archive")
|
|
archived = codex.thread_archive(thread.id)
|
|
unarchived = codex.thread_unarchive(thread.id)
|
|
read = unarchived.read()
|
|
|
|
assert {
|
|
"seeded_response": seeded.final_response,
|
|
"archive_response": archived.model_dump(by_alias=True, mode="json"),
|
|
"unarchived_id": unarchived.id,
|
|
"read_id": read.thread.id,
|
|
} == {
|
|
"seeded_response": "materialized",
|
|
"archive_response": {},
|
|
"unarchived_id": thread.id,
|
|
"read_id": thread.id,
|
|
}
|
|
|
|
|
|
def test_models_rpc(tmp_path) -> None:
|
|
"""Model listing should go through the pinned app-server method."""
|
|
with AppServerHarness(tmp_path) as harness:
|
|
with Codex(config=harness.app_server_config()) as codex:
|
|
models = codex.models(include_hidden=True)
|
|
|
|
assert {
|
|
"models_payload_has_data": isinstance(
|
|
models.model_dump(by_alias=True, mode="json").get("data"),
|
|
list,
|
|
),
|
|
} == {"models_payload_has_data": True}
|
|
|
|
|
|
def test_compact_rpc_hits_mock_responses(tmp_path) -> None:
|
|
"""Compaction should run through app-server and hit the mock Responses boundary."""
|
|
with AppServerHarness(tmp_path) as harness:
|
|
harness.responses.enqueue_assistant_message("history", response_id="compact-history")
|
|
harness.responses.enqueue_assistant_message(
|
|
"compact summary",
|
|
response_id="compact-summary",
|
|
)
|
|
|
|
with Codex(config=harness.app_server_config()) as codex:
|
|
thread = codex.thread_start()
|
|
run_result = thread.run("create history")
|
|
compact_response = thread.compact()
|
|
requests = harness.responses.wait_for_requests(2)
|
|
|
|
assert {
|
|
"run_final_response": run_result.final_response,
|
|
"compact_response": compact_response.model_dump(
|
|
by_alias=True,
|
|
mode="json",
|
|
),
|
|
"request_kinds": [request_kind(request.path) for request in requests],
|
|
} == {
|
|
"run_final_response": "history",
|
|
"compact_response": {},
|
|
"request_kinds": ["responses", "responses"],
|
|
}
|