mirror of
https://github.com/openai/codex.git
synced 2026-05-15 08:42:34 +00:00
## Why The SDK had behavioral tests that replaced SDK client internals. Those tests could catch wrapper mistakes, but they did not prove the pinned app-server runtime, generated notification models, request routing, and sync/async public clients worked together. This PR adds deterministic integration coverage that starts the pinned `codex app-server` process and mocks only the upstream Responses HTTP boundary. ## What - Add `AppServerHarness` and `MockResponsesServer` helpers for isolated `CODEX_HOME`, mock-provider config, queued SSE responses, and captured `/v1/responses` requests. - Add shared helpers for SSE construction, stream assertions, approval-policy inspection, and image fixtures. - Split integration coverage into focused modules for run behavior, inputs, streaming, turn controls, approvals, and thread lifecycle. - Cover sync and async `Thread.run`, `TurnHandle.stream`, interleaved streams, approval-mode persistence, lifecycle helpers, final-answer phase handling, image inputs, loaded skill input injection, steering, interruption, listing, history reads, run overrides, and token usage mapping. - Replace public-wrapper tests that duplicated integration-test behavior with lower-level client tests only where direct client behavior is the thing under test. ## Stack 1. #21891 `[1/8]` Pin Python SDK runtime dependency 2. #21893 `[2/8]` Generate Python SDK types from pinned runtime 3. #21895 `[3/8]` Run Python SDK tests in CI 4. #21896 `[4/8]` Define Python SDK public API surface 5. #21905 `[5/8]` Rename Python SDK package to `openai-codex` 6. #21910 `[6/8]` Add high-level Python SDK approval mode 7. This PR `[7/8]` Add Python SDK app-server integration harness 8. #22021 `[8/8]` Add Python SDK Ruff formatting ## Verification - Added pinned app-server integration tests under `sdk/python/tests/test_app_server_*.py` and `test_real_app_server_integration.py`. --------- Co-authored-by: Codex <noreply@openai.com>
171 lines
5.2 KiB
Python
171 lines
5.2 KiB
Python
from __future__ import annotations
|
|
|
|
import asyncio
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
import pytest
|
|
|
|
import openai_codex.api as public_api_module
|
|
from openai_codex.generated.v2_all import TurnStartParams
|
|
from openai_codex.models import InitializeResponse
|
|
from openai_codex.api import (
|
|
ApprovalMode,
|
|
AsyncCodex,
|
|
Codex,
|
|
)
|
|
|
|
ROOT = Path(__file__).resolve().parents[1]
|
|
|
|
|
|
def _approval_settings(params: list[Any]) -> list[dict[str, object]]:
|
|
"""Return serialized approval settings from captured Pydantic params."""
|
|
return [
|
|
{
|
|
key: value
|
|
for key, value in param.model_dump(
|
|
by_alias=True,
|
|
exclude_none=True,
|
|
mode="json",
|
|
).items()
|
|
if key in {"approvalPolicy", "approvalsReviewer"}
|
|
}
|
|
for param in params
|
|
]
|
|
|
|
|
|
def test_codex_init_failure_closes_client(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
closed: list[bool] = []
|
|
|
|
class FakeClient:
|
|
def __init__(self, config=None) -> None: # noqa: ANN001,ARG002
|
|
self._closed = False
|
|
|
|
def start(self) -> None:
|
|
return None
|
|
|
|
def initialize(self) -> InitializeResponse:
|
|
return InitializeResponse.model_validate({})
|
|
|
|
def close(self) -> None:
|
|
self._closed = True
|
|
closed.append(True)
|
|
|
|
monkeypatch.setattr(public_api_module, "AppServerClient", FakeClient)
|
|
|
|
with pytest.raises(RuntimeError, match="missing required metadata"):
|
|
Codex()
|
|
|
|
assert closed == [True]
|
|
|
|
|
|
def test_async_codex_init_failure_closes_client() -> None:
|
|
async def scenario() -> None:
|
|
codex = AsyncCodex()
|
|
close_calls = 0
|
|
|
|
async def fake_start() -> None:
|
|
return None
|
|
|
|
async def fake_initialize() -> InitializeResponse:
|
|
return InitializeResponse.model_validate({})
|
|
|
|
async def fake_close() -> None:
|
|
nonlocal close_calls
|
|
close_calls += 1
|
|
|
|
codex._client.start = fake_start # type: ignore[method-assign]
|
|
codex._client.initialize = fake_initialize # type: ignore[method-assign]
|
|
codex._client.close = fake_close # type: ignore[method-assign]
|
|
|
|
with pytest.raises(RuntimeError, match="missing required metadata"):
|
|
await codex.models()
|
|
|
|
assert close_calls == 1
|
|
assert codex._initialized is False
|
|
assert codex._init is None
|
|
|
|
asyncio.run(scenario())
|
|
|
|
|
|
def test_async_codex_initializes_only_once_under_concurrency() -> None:
|
|
async def scenario() -> None:
|
|
codex = AsyncCodex()
|
|
start_calls = 0
|
|
initialize_calls = 0
|
|
ready = asyncio.Event()
|
|
|
|
async def fake_start() -> None:
|
|
nonlocal start_calls
|
|
start_calls += 1
|
|
|
|
async def fake_initialize() -> InitializeResponse:
|
|
nonlocal initialize_calls
|
|
initialize_calls += 1
|
|
ready.set()
|
|
await asyncio.sleep(0.02)
|
|
return InitializeResponse.model_validate(
|
|
{
|
|
"userAgent": "codex-cli/1.2.3",
|
|
"serverInfo": {"name": "codex-cli", "version": "1.2.3"},
|
|
}
|
|
)
|
|
|
|
async def fake_model_list(include_hidden: bool = False): # noqa: ANN202,ARG001
|
|
await ready.wait()
|
|
return object()
|
|
|
|
codex._client.start = fake_start # type: ignore[method-assign]
|
|
codex._client.initialize = fake_initialize # type: ignore[method-assign]
|
|
codex._client.model_list = fake_model_list # type: ignore[method-assign]
|
|
|
|
await asyncio.gather(codex.models(), codex.models())
|
|
|
|
assert start_calls == 1
|
|
assert initialize_calls == 1
|
|
|
|
asyncio.run(scenario())
|
|
|
|
|
|
def _approval_mode_turn_params(approval_mode: ApprovalMode) -> TurnStartParams:
|
|
"""Build real generated turn params from one public approval mode."""
|
|
approval_policy, approvals_reviewer = public_api_module._approval_mode_settings(
|
|
approval_mode
|
|
)
|
|
return TurnStartParams(
|
|
thread_id="thread-1",
|
|
input=[],
|
|
approval_policy=approval_policy,
|
|
approvals_reviewer=approvals_reviewer,
|
|
)
|
|
|
|
|
|
def test_approval_modes_serialize_to_expected_start_params() -> None:
|
|
"""ApprovalMode should map to the app-server params sent for new work."""
|
|
assert {
|
|
mode.value: _approval_settings([_approval_mode_turn_params(mode)])[0]
|
|
for mode in ApprovalMode
|
|
} == {
|
|
"deny_all": {"approvalPolicy": "never"},
|
|
"auto_review": {
|
|
"approvalPolicy": "on-request",
|
|
"approvalsReviewer": "auto_review",
|
|
},
|
|
}
|
|
|
|
|
|
def test_unknown_approval_mode_is_rejected() -> None:
|
|
"""Invalid approval modes should fail before params are constructed."""
|
|
with pytest.raises(ValueError, match="deny_all, auto_review"):
|
|
public_api_module._approval_mode_settings("allow_all") # type: ignore[arg-type]
|
|
|
|
|
|
def test_retry_examples_compare_status_with_enum() -> None:
|
|
for path in (
|
|
ROOT / "examples" / "10_error_handling_and_retry" / "sync.py",
|
|
ROOT / "examples" / "10_error_handling_and_retry" / "async.py",
|
|
):
|
|
source = path.read_text()
|
|
assert '== "failed"' not in source
|
|
assert "TurnStatus.failed" in source
|