Files
codex/sdk/python/tests/test_real_app_server_integration.py
Ahmed Ibrahim aa9e8f0262 [8/8] Add Python SDK Ruff formatting (#22021)
## Why

The Python SDK needs the same tight formatter/lint loop as the rest of
the repo: a safe Ruff autofix pass, Ruff formatting, editor save
behavior, and CI checks that catch drift. Without that loop, SDK changes
can land with formatting or import ordering that differs from what
reviewers and CI expect.

## What

- Add Ruff configuration to `sdk/python/pyproject.toml`, excluding
generated protocol code and notebooks from the normal lint/format pass.
- Update `just fmt` so it still formats Rust and also runs Python SDK
Ruff autofix and formatting.
- Add Python SDK CI steps for `ruff check` and `ruff format --check`
before pytest.
- Recommend the Ruff VS Code extension and enable Python
format/fix/organize-on-save so Cmd+S uses the same tooling.
- Apply the resulting Ruff formatting to SDK Python files, examples, and
the checked-in generated `v2_all.py` output emitted by the pinned
generator.
- Add a guard test for the `just fmt` recipe so it keeps working from
both Rust and Python SDK working directories.

## Stack

1. #21891 `[1/8]` Pin Python SDK runtime dependency
2. #21893 `[2/8]` Generate Python SDK types from pinned runtime
3. #21895 `[3/8]` Run Python SDK tests in CI
4. #21896 `[4/8]` Define Python SDK public API surface
5. #21905 `[5/8]` Rename Python SDK package to `openai-codex`
6. #21910 `[6/8]` Add high-level Python SDK approval mode
7. #22014 `[7/8]` Add Python SDK app-server integration harness
8. This PR `[8/8]` Add Python SDK Ruff formatting

## Verification

- Added `test_root_fmt_recipe_formats_rust_and_python_sdk` for the
shared format recipe.
- Ran `just fmt` after the recipe update.

---------

Co-authored-by: Codex <noreply@openai.com>
2026-05-12 01:10:29 +03:00

548 lines
18 KiB
Python

from __future__ import annotations
import json
import os
import subprocess
import sys
import tempfile
import textwrap
from dataclasses import dataclass
from pathlib import Path
import pytest
ROOT = Path(__file__).resolve().parents[1]
EXAMPLES_DIR = ROOT / "examples"
NOTEBOOK_PATH = ROOT / "notebooks" / "sdk_walkthrough.ipynb"
root_str = str(ROOT)
if root_str not in sys.path:
sys.path.insert(0, root_str)
from _runtime_setup import ensure_runtime_package_installed, pinned_runtime_version
RUN_REAL_CODEX_TESTS = os.environ.get("RUN_REAL_CODEX_TESTS") == "1"
pytestmark = pytest.mark.skipif(
not RUN_REAL_CODEX_TESTS,
reason="set RUN_REAL_CODEX_TESTS=1 to run real Codex integration coverage",
)
# 11_cli_mini_app is interactive; we still run it by feeding one prompt, then '/exit'.
EXAMPLE_CASES: list[tuple[str, str]] = [
("01_quickstart_constructor", "sync.py"),
("01_quickstart_constructor", "async.py"),
("02_turn_run", "sync.py"),
("02_turn_run", "async.py"),
("03_turn_stream_events", "sync.py"),
("03_turn_stream_events", "async.py"),
("04_models_and_metadata", "sync.py"),
("04_models_and_metadata", "async.py"),
("05_existing_thread", "sync.py"),
("05_existing_thread", "async.py"),
("06_thread_lifecycle_and_controls", "sync.py"),
("06_thread_lifecycle_and_controls", "async.py"),
("07_image_and_text", "sync.py"),
("07_image_and_text", "async.py"),
("08_local_image_and_text", "sync.py"),
("08_local_image_and_text", "async.py"),
("09_async_parity", "sync.py"),
# 09_async_parity async path is represented by 01 async + dedicated async-based cases above.
("10_error_handling_and_retry", "sync.py"),
("10_error_handling_and_retry", "async.py"),
("11_cli_mini_app", "sync.py"),
("11_cli_mini_app", "async.py"),
("12_turn_params_kitchen_sink", "sync.py"),
("12_turn_params_kitchen_sink", "async.py"),
("13_model_select_and_turn_params", "sync.py"),
("13_model_select_and_turn_params", "async.py"),
("14_turn_controls", "sync.py"),
("14_turn_controls", "async.py"),
]
@dataclass(frozen=True)
class PreparedRuntimeEnv:
python: str
env: dict[str, str]
runtime_version: str
@pytest.fixture(scope="session")
def runtime_env(tmp_path_factory: pytest.TempPathFactory) -> PreparedRuntimeEnv:
runtime_version = pinned_runtime_version()
temp_root = tmp_path_factory.mktemp("python-runtime-env")
isolated_site = temp_root / "site-packages"
python = sys.executable
_run_command(
[
python,
"-m",
"pip",
"install",
"--target",
str(isolated_site),
"pydantic>=2.12",
],
cwd=ROOT,
env=os.environ.copy(),
timeout_s=240,
)
ensure_runtime_package_installed(
python,
ROOT,
install_target=isolated_site,
)
env = os.environ.copy()
env["PYTHONPATH"] = os.pathsep.join([str(isolated_site), str(ROOT / "src")])
env["CODEX_PYTHON_SDK_DIR"] = str(ROOT)
return PreparedRuntimeEnv(python=python, env=env, runtime_version=runtime_version)
def _run_command(
args: list[str],
*,
cwd: Path,
env: dict[str, str],
timeout_s: int,
stdin: str | None = None,
) -> subprocess.CompletedProcess[str]:
return subprocess.run(
args,
cwd=str(cwd),
env=env,
input=stdin,
text=True,
capture_output=True,
timeout=timeout_s,
check=False,
)
def _run_python(
runtime_env: PreparedRuntimeEnv,
source: str,
*,
cwd: Path | None = None,
timeout_s: int = 180,
) -> subprocess.CompletedProcess[str]:
return _run_command(
[str(runtime_env.python), "-c", source],
cwd=cwd or ROOT,
env=runtime_env.env,
timeout_s=timeout_s,
)
def _runtime_compatibility_hint(
runtime_env: PreparedRuntimeEnv,
*,
stdout: str,
stderr: str,
) -> str:
combined = f"{stdout}\n{stderr}"
if "ThreadStartResponse" in combined and "approvalsReviewer" in combined:
return (
"\nCompatibility hint:\n"
f"Pinned runtime {runtime_env.runtime_version} returned a thread/start payload "
"that is older than the current SDK schema and is missing "
"`approvalsReviewer`. Bump `sdk/python/_runtime_setup.py` to a matching "
"released runtime version.\n"
)
return ""
def _run_json_python(
runtime_env: PreparedRuntimeEnv,
source: str,
*,
cwd: Path | None = None,
timeout_s: int = 180,
) -> dict[str, object]:
result = _run_python(runtime_env, source, cwd=cwd, timeout_s=timeout_s)
assert result.returncode == 0, (
"Python snippet failed.\n"
f"STDOUT:\n{result.stdout}\n"
f"STDERR:\n{result.stderr}"
f"{_runtime_compatibility_hint(runtime_env, stdout=result.stdout, stderr=result.stderr)}"
)
return json.loads(result.stdout)
def _run_example(
runtime_env: PreparedRuntimeEnv,
folder: str,
script: str,
*,
timeout_s: int = 180,
) -> subprocess.CompletedProcess[str]:
path = EXAMPLES_DIR / folder / script
assert path.exists(), f"Missing example script: {path}"
stdin = (
"Give 3 short bullets on SIMD.\nNow rewrite that as 1 short sentence.\n/exit\n"
if folder == "11_cli_mini_app"
else None
)
return _run_command(
[str(runtime_env.python), str(path)],
cwd=ROOT,
env=runtime_env.env,
timeout_s=timeout_s,
stdin=stdin,
)
def _notebook_cell_source(cell_index: int) -> str:
notebook = json.loads(NOTEBOOK_PATH.read_text())
return "".join(notebook["cells"][cell_index]["source"])
def test_real_initialize_and_model_list(runtime_env: PreparedRuntimeEnv) -> None:
data = _run_json_python(
runtime_env,
textwrap.dedent(
"""
import json
from openai_codex import Codex
with Codex() as codex:
models = codex.models(include_hidden=True)
server = codex.metadata.serverInfo
print(json.dumps({
"user_agent": codex.metadata.userAgent,
"server_name": None if server is None else server.name,
"server_version": None if server is None else server.version,
"model_count": len(models.data),
}))
"""
),
)
assert isinstance(data["user_agent"], str) and data["user_agent"].strip()
if data["server_name"] is not None:
assert isinstance(data["server_name"], str) and data["server_name"].strip()
if data["server_version"] is not None:
assert isinstance(data["server_version"], str) and data["server_version"].strip()
assert isinstance(data["model_count"], int)
def test_real_thread_and_turn_start_smoke(runtime_env: PreparedRuntimeEnv) -> None:
data = _run_json_python(
runtime_env,
textwrap.dedent(
"""
import json
from openai_codex import Codex, TextInput
with Codex() as codex:
thread = codex.thread_start(
model="gpt-5.4",
config={"model_reasoning_effort": "high"},
)
result = thread.turn(TextInput("hello")).run()
persisted = thread.read(include_turns=True)
persisted_turn = next(
(turn for turn in persisted.thread.turns or [] if turn.id == result.id),
None,
)
print(json.dumps({
"thread_id": thread.id,
"turn_id": result.id,
"status": result.status.value,
"items_count": len(result.items or []),
"persisted_items_count": 0 if persisted_turn is None else len(persisted_turn.items or []),
}))
"""
),
)
assert isinstance(data["thread_id"], str) and data["thread_id"].strip()
assert isinstance(data["turn_id"], str) and data["turn_id"].strip()
assert data["status"] == "completed"
assert isinstance(data["items_count"], int)
assert isinstance(data["persisted_items_count"], int)
def test_real_thread_run_convenience_smoke(runtime_env: PreparedRuntimeEnv) -> None:
data = _run_json_python(
runtime_env,
textwrap.dedent(
"""
import json
from openai_codex import Codex
with Codex() as codex:
thread = codex.thread_start(
model="gpt-5.4",
config={"model_reasoning_effort": "high"},
)
result = thread.run("say ok")
print(json.dumps({
"thread_id": thread.id,
"final_response": result.final_response,
"items_count": len(result.items),
"has_usage": result.usage is not None,
}))
"""
),
)
assert isinstance(data["thread_id"], str) and data["thread_id"].strip()
assert isinstance(data["final_response"], str) and data["final_response"].strip()
assert isinstance(data["items_count"], int)
assert isinstance(data["has_usage"], bool)
def test_real_async_thread_turn_usage_and_ids_smoke(
runtime_env: PreparedRuntimeEnv,
) -> None:
data = _run_json_python(
runtime_env,
textwrap.dedent(
"""
import asyncio
import json
from openai_codex import AsyncCodex, TextInput
async def main():
async with AsyncCodex() as codex:
thread = await codex.thread_start(
model="gpt-5.4",
config={"model_reasoning_effort": "high"},
)
result = await (await thread.turn(TextInput("say ok"))).run()
persisted = await thread.read(include_turns=True)
persisted_turn = next(
(turn for turn in persisted.thread.turns or [] if turn.id == result.id),
None,
)
print(json.dumps({
"thread_id": thread.id,
"turn_id": result.id,
"status": result.status.value,
"items_count": len(result.items or []),
"persisted_items_count": 0 if persisted_turn is None else len(persisted_turn.items or []),
}))
asyncio.run(main())
"""
),
)
assert isinstance(data["thread_id"], str) and data["thread_id"].strip()
assert isinstance(data["turn_id"], str) and data["turn_id"].strip()
assert data["status"] == "completed"
assert isinstance(data["items_count"], int)
assert isinstance(data["persisted_items_count"], int)
def test_real_async_thread_run_convenience_smoke(
runtime_env: PreparedRuntimeEnv,
) -> None:
data = _run_json_python(
runtime_env,
textwrap.dedent(
"""
import asyncio
import json
from openai_codex import AsyncCodex
async def main():
async with AsyncCodex() as codex:
thread = await codex.thread_start(
model="gpt-5.4",
config={"model_reasoning_effort": "high"},
)
result = await thread.run("say ok")
print(json.dumps({
"thread_id": thread.id,
"final_response": result.final_response,
"items_count": len(result.items),
"has_usage": result.usage is not None,
}))
asyncio.run(main())
"""
),
)
assert isinstance(data["thread_id"], str) and data["thread_id"].strip()
assert isinstance(data["final_response"], str) and data["final_response"].strip()
assert isinstance(data["items_count"], int)
assert isinstance(data["has_usage"], bool)
def test_notebook_bootstrap_resolves_sdk_and_runtime_from_unrelated_cwd(
runtime_env: PreparedRuntimeEnv,
) -> None:
cell_1_source = _notebook_cell_source(1)
env = runtime_env.env.copy()
with tempfile.TemporaryDirectory() as temp_cwd:
result = _run_command(
[str(runtime_env.python), "-c", cell_1_source],
cwd=Path(temp_cwd),
env=env,
timeout_s=180,
)
assert result.returncode == 0, (
f"Notebook bootstrap failed from unrelated cwd.\n"
f"STDOUT:\n{result.stdout}\n"
f"STDERR:\n{result.stderr}"
)
assert "SDK source:" in result.stdout
assert f"Runtime package: {runtime_env.runtime_version}" in result.stdout
def test_notebook_sync_cell_smoke(runtime_env: PreparedRuntimeEnv) -> None:
source = "\n\n".join(
[
_notebook_cell_source(1),
_notebook_cell_source(2),
_notebook_cell_source(3),
]
)
result = _run_python(runtime_env, source, timeout_s=240)
assert result.returncode == 0, (
f"Notebook sync smoke failed.\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
)
assert "status:" in result.stdout
assert "server:" in result.stdout
def test_notebook_advanced_cell_smoke(runtime_env: PreparedRuntimeEnv) -> None:
source = "\n\n".join(
[
_notebook_cell_source(1),
_notebook_cell_source(2),
_notebook_cell_source(7),
]
)
result = _run_python(runtime_env, source, timeout_s=360)
assert result.returncode == 0, (
f"Notebook advanced smoke failed.\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
)
assert "selected.model:" in result.stdout
assert "agent.message.params:" in result.stdout
assert "items.params:" in result.stdout
def test_real_streaming_smoke_turn_completed(runtime_env: PreparedRuntimeEnv) -> None:
data = _run_json_python(
runtime_env,
textwrap.dedent(
"""
import json
from openai_codex import Codex, TextInput
with Codex() as codex:
thread = codex.thread_start(
model="gpt-5.4",
config={"model_reasoning_effort": "high"},
)
turn = thread.turn(TextInput("Reply with one short sentence."))
saw_delta = False
saw_completed = False
for event in turn.stream():
if event.method == "item/agentMessage/delta":
saw_delta = True
if event.method == "turn/completed":
saw_completed = True
print(json.dumps({
"saw_delta": saw_delta,
"saw_completed": saw_completed,
}))
"""
),
)
assert data["saw_completed"] is True
assert isinstance(data["saw_delta"], bool)
def test_real_turn_interrupt_smoke(runtime_env: PreparedRuntimeEnv) -> None:
data = _run_json_python(
runtime_env,
textwrap.dedent(
"""
import json
from openai_codex import Codex, TextInput
with Codex() as codex:
thread = codex.thread_start(
model="gpt-5.4",
config={"model_reasoning_effort": "high"},
)
turn = thread.turn(TextInput("Count from 1 to 200 with commas."))
turn.interrupt()
follow_up = thread.turn(TextInput("Say 'ok' only.")).run()
print(json.dumps({"status": follow_up.status.value}))
"""
),
)
assert data["status"] in {"completed", "failed"}
@pytest.mark.parametrize(("folder", "script"), EXAMPLE_CASES)
def test_real_examples_run_and_assert(
runtime_env: PreparedRuntimeEnv,
folder: str,
script: str,
) -> None:
result = _run_example(runtime_env, folder, script)
assert result.returncode == 0, (
f"Example failed: {folder}/{script}\n"
f"STDOUT:\n{result.stdout}\n"
f"STDERR:\n{result.stderr}"
f"{_runtime_compatibility_hint(runtime_env, stdout=result.stdout, stderr=result.stderr)}"
)
out = result.stdout
if folder == "01_quickstart_constructor":
assert "Server:" in out and "Items:" in out and "Text:" in out
assert "Server: unknown" not in out
elif folder == "02_turn_run":
assert "thread_id:" in out and "turn_id:" in out and "status:" in out
assert "persisted.items.count:" in out
elif folder == "03_turn_stream_events":
assert "stream.completed:" in out
assert "assistant>" in out
elif folder == "04_models_and_metadata":
assert "server:" in out
assert "models.count:" in out
assert "models:" in out
assert "metadata:" not in out
elif folder == "05_existing_thread":
assert "Created thread:" in out
elif folder == "06_thread_lifecycle_and_controls":
assert "Lifecycle OK:" in out
elif folder in {"07_image_and_text", "08_local_image_and_text"}:
assert "completed" in out.lower() or "Status:" in out
elif folder == "09_async_parity":
assert "Thread:" in out and "Turn:" in out
elif folder == "10_error_handling_and_retry":
assert "Text:" in out
elif folder == "11_cli_mini_app":
assert "Thread:" in out
assert out.count("assistant>") >= 2
assert out.count("assistant.status>") >= 2
assert out.count("usage>") >= 2
elif folder == "12_turn_params_kitchen_sink":
assert "Status:" in out
assert "summary:" in out
assert "actions:" in out
assert "Items:" in out
elif folder == "13_model_select_and_turn_params":
assert (
"selected.model:" in out and "agent.message.params:" in out and "items.params:" in out
)
elif folder == "14_turn_controls":
assert "steer.result:" in out and "steer.final.status:" in out
assert "interrupt.result:" in out and "interrupt.final.status:" in out