Add Python SDK mock app-server integration tests

Build deterministic Python SDK integration coverage around the pinned app-server runtime and a local mock Responses server. Port behavioral coverage off direct SDK monkeypatches where the real app-server boundary is more useful.

Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
Ahmed Ibrahim
2026-05-10 13:32:31 +03:00
parent f3e16de572
commit cad4bbdd64
3 changed files with 899 additions and 342 deletions

View File

@@ -14,21 +14,16 @@ from openai_codex.generated.v2_all import (
AgentMessageDeltaNotification,
ItemCompletedNotification,
MessagePhase,
ThreadTokenUsageUpdatedNotification,
TurnCompletedNotification,
TurnStartParams,
TurnStatus,
)
from openai_codex.models import InitializeResponse, Notification
from openai_codex.api import (
ApprovalMode,
AsyncCodex,
AsyncThread,
AsyncTurnHandle,
Codex,
RunResult,
Thread,
TurnHandle,
)
ROOT = Path(__file__).resolve().parents[1]
@@ -123,38 +118,6 @@ def _item_completed_notification(
)
def _token_usage_notification(
*,
thread_id: str = "thread-1",
turn_id: str = "turn-1",
) -> Notification:
return Notification(
method="thread/tokenUsage/updated",
payload=ThreadTokenUsageUpdatedNotification.model_validate(
{
"threadId": thread_id,
"turnId": turn_id,
"tokenUsage": {
"last": {
"cachedInputTokens": 1,
"inputTokens": 2,
"outputTokens": 3,
"reasoningOutputTokens": 4,
"totalTokens": 9,
},
"total": {
"cachedInputTokens": 5,
"inputTokens": 6,
"outputTokens": 7,
"reasoningOutputTokens": 8,
"totalTokens": 26,
},
},
}
),
)
def test_codex_init_failure_closes_client(monkeypatch: pytest.MonkeyPatch) -> None:
closed: list[bool] = []
@@ -261,64 +224,6 @@ def _approval_mode_turn_params(approval_mode: ApprovalMode) -> TurnStartParams:
)
class CapturingApprovalClient:
"""Collect wrapper params at the app-server client boundary."""
def __init__(self) -> None:
self.params: list[Any] = []
def thread_start(self, params: Any) -> SimpleNamespace:
self.params.append(params)
return SimpleNamespace(thread=SimpleNamespace(id="thread-1"))
def thread_resume(self, thread_id: str, params: Any) -> SimpleNamespace:
self.params.append(params)
return SimpleNamespace(thread=SimpleNamespace(id=thread_id))
def thread_fork(self, thread_id: str, params: Any) -> SimpleNamespace:
self.params.append(params)
return SimpleNamespace(thread=SimpleNamespace(id=f"{thread_id}-fork"))
def turn_start(
self,
thread_id: str,
input: object, # noqa: A002
*,
params: Any,
) -> SimpleNamespace:
self.params.append(params)
return SimpleNamespace(turn=SimpleNamespace(id=f"{thread_id}-turn"))
class CapturingAsyncApprovalClient:
"""Async mirror of CapturingApprovalClient for public async wrappers."""
def __init__(self) -> None:
self.params: list[Any] = []
async def thread_start(self, params: Any) -> SimpleNamespace:
self.params.append(params)
return SimpleNamespace(thread=SimpleNamespace(id="thread-1"))
async def thread_resume(self, thread_id: str, params: Any) -> SimpleNamespace:
self.params.append(params)
return SimpleNamespace(thread=SimpleNamespace(id=thread_id))
async def thread_fork(self, thread_id: str, params: Any) -> SimpleNamespace:
self.params.append(params)
return SimpleNamespace(thread=SimpleNamespace(id=f"{thread_id}-fork"))
async def turn_start(
self,
thread_id: str,
input: object, # noqa: A002
*,
params: Any,
) -> SimpleNamespace:
self.params.append(params)
return SimpleNamespace(turn=SimpleNamespace(id=f"{thread_id}-turn"))
def test_approval_modes_serialize_to_expected_start_params() -> None:
"""ApprovalMode should map to the app-server params sent for new work."""
assert {
@@ -339,195 +244,6 @@ def test_unknown_approval_mode_is_rejected() -> None:
public_api_module._approval_mode_settings("allow_all") # type: ignore[arg-type]
def test_approval_defaults_preserve_existing_sync_thread_settings() -> None:
"""Only thread creation should write approval defaults unless callers override."""
client = CapturingApprovalClient()
codex = Codex.__new__(Codex)
codex._client = client
started = codex.thread_start(approval_mode=ApprovalMode.deny_all)
started.turn([])
codex.thread_resume("existing-thread")
codex.thread_fork("existing-thread")
started.turn([], approval_mode=ApprovalMode.auto_review)
assert _approval_settings(client.params) == [
{"approvalPolicy": "never"},
{},
{},
{},
{
"approvalPolicy": "on-request",
"approvalsReviewer": "auto_review",
},
]
def test_approval_defaults_preserve_existing_async_thread_settings() -> None:
"""Async wrappers should follow the same approval override semantics."""
async def scenario() -> None:
client = CapturingAsyncApprovalClient()
codex = AsyncCodex()
codex._client = client # type: ignore[assignment]
codex._initialized = True
started = await codex.thread_start(approval_mode=ApprovalMode.deny_all)
await started.turn([])
await codex.thread_resume("existing-thread")
await codex.thread_fork("existing-thread")
await started.turn([], approval_mode=ApprovalMode.auto_review)
assert _approval_settings(client.params) == [
{"approvalPolicy": "never"},
{},
{},
{},
{
"approvalPolicy": "on-request",
"approvalsReviewer": "auto_review",
},
]
asyncio.run(scenario())
def test_turn_streams_can_consume_multiple_turns_on_one_client() -> None:
"""Two sync TurnHandle streams should advance independently on one client."""
client = AppServerClient()
notifications: dict[str, deque[Notification]] = {
"turn-1": deque(
[
_delta_notification(turn_id="turn-1", text="one"),
_completed_notification(turn_id="turn-1"),
]
),
"turn-2": deque(
[
_delta_notification(turn_id="turn-2", text="two"),
_completed_notification(turn_id="turn-2"),
]
),
}
client.next_turn_notification = lambda turn_id: notifications[turn_id].popleft() # type: ignore[method-assign]
first_stream = TurnHandle(client, "thread-1", "turn-1").stream()
assert next(first_stream).method == "item/agentMessage/delta"
second_stream = TurnHandle(client, "thread-1", "turn-2").stream()
assert next(second_stream).method == "item/agentMessage/delta"
assert next(first_stream).method == "turn/completed"
assert next(second_stream).method == "turn/completed"
first_stream.close()
second_stream.close()
def test_async_turn_streams_can_consume_multiple_turns_on_one_client() -> None:
"""Two async TurnHandle streams should advance independently on one client."""
async def scenario() -> None:
"""Interleave two async streams backed by separate per-turn queues."""
codex = AsyncCodex()
async def fake_ensure_initialized() -> None:
"""Avoid starting a real app-server process for this stream test."""
return None
notifications: dict[str, deque[Notification]] = {
"turn-1": deque(
[
_delta_notification(turn_id="turn-1", text="one"),
_completed_notification(turn_id="turn-1"),
]
),
"turn-2": deque(
[
_delta_notification(turn_id="turn-2", text="two"),
_completed_notification(turn_id="turn-2"),
]
),
}
async def fake_next_notification(turn_id: str) -> Notification:
"""Return the next notification from the requested per-turn queue."""
return notifications[turn_id].popleft()
codex._ensure_initialized = fake_ensure_initialized # type: ignore[method-assign]
codex._client.next_turn_notification = fake_next_notification # type: ignore[method-assign]
first_stream = AsyncTurnHandle(codex, "thread-1", "turn-1").stream()
assert (await anext(first_stream)).method == "item/agentMessage/delta"
second_stream = AsyncTurnHandle(codex, "thread-1", "turn-2").stream()
assert (await anext(second_stream)).method == "item/agentMessage/delta"
assert (await anext(first_stream)).method == "turn/completed"
assert (await anext(second_stream)).method == "turn/completed"
await first_stream.aclose()
await second_stream.aclose()
asyncio.run(scenario())
def test_turn_run_returns_completed_turn_payload() -> None:
client = AppServerClient()
notifications: deque[Notification] = deque(
[
_completed_notification(),
]
)
client.next_turn_notification = lambda _turn_id: notifications.popleft() # type: ignore[method-assign]
result = TurnHandle(client, "thread-1", "turn-1").run()
assert result.id == "turn-1"
assert result.status == TurnStatus.completed
assert result.items == []
def test_thread_run_accepts_string_input_and_returns_run_result() -> None:
"""Sync Thread.run should preserve approval settings unless explicitly overridden."""
client = AppServerClient()
item_notification = _item_completed_notification(text="Hello.")
usage_notification = _token_usage_notification()
notifications: deque[Notification] = deque(
[
item_notification,
usage_notification,
_completed_notification(),
]
)
client.next_turn_notification = lambda _turn_id: notifications.popleft() # type: ignore[method-assign]
seen: dict[str, object] = {}
def fake_turn_start(thread_id: str, wire_input: object, *, params=None): # noqa: ANN001,ANN202
seen["thread_id"] = thread_id
seen["wire_input"] = wire_input
seen["params"] = params
return SimpleNamespace(turn=SimpleNamespace(id="turn-1"))
client.turn_start = fake_turn_start # type: ignore[method-assign]
result = Thread(client, "thread-1").run("hello")
assert (
seen["thread_id"],
seen["wire_input"],
_approval_settings([seen["params"]]),
result,
) == (
"thread-1",
[{"type": "text", "text": "hello"}],
[{}],
RunResult(
final_response="Hello.",
items=[item_notification.payload.item],
usage=usage_notification.payload.token_usage,
),
)
def test_thread_run_uses_last_completed_assistant_message_as_final_response() -> None:
client = AppServerClient()
first_item_notification = _item_completed_notification(text="First message")
@@ -694,64 +410,6 @@ def test_stream_text_registers_and_consumes_turn_notifications() -> None:
)
def test_async_thread_run_accepts_string_input_and_returns_run_result() -> None:
"""Async Thread.run should preserve approvals while collecting routed results."""
async def scenario() -> None:
"""Feed item, usage, and completion events through the async turn stream."""
codex = AsyncCodex()
async def fake_ensure_initialized() -> None:
"""Avoid starting a real app-server process for this run test."""
return None
item_notification = _item_completed_notification(text="Hello async.")
usage_notification = _token_usage_notification()
notifications: deque[Notification] = deque(
[
item_notification,
usage_notification,
_completed_notification(),
]
)
seen: dict[str, object] = {}
async def fake_turn_start(thread_id: str, wire_input: object, *, params=None): # noqa: ANN001,ANN202
"""Capture normalized input and return a synthetic turn id."""
seen["thread_id"] = thread_id
seen["wire_input"] = wire_input
seen["params"] = params
return SimpleNamespace(turn=SimpleNamespace(id="turn-1"))
async def fake_next_notification(_turn_id: str) -> Notification:
"""Return the next queued notification for the synthetic turn."""
return notifications.popleft()
codex._ensure_initialized = fake_ensure_initialized # type: ignore[method-assign]
codex._client.turn_start = fake_turn_start # type: ignore[method-assign]
codex._client.next_turn_notification = fake_next_notification # type: ignore[method-assign]
result = await AsyncThread(codex, "thread-1").run("hello")
assert (
seen["thread_id"],
seen["wire_input"],
_approval_settings([seen["params"]]),
result,
) == (
"thread-1",
[{"type": "text", "text": "hello"}],
[{}],
RunResult(
final_response="Hello async.",
items=[item_notification.payload.item],
usage=usage_notification.payload.token_usage,
),
)
asyncio.run(scenario())
def test_async_thread_run_uses_last_completed_assistant_message_as_final_response() -> (
None
):