mirror of
https://github.com/openai/codex.git
synced 2026-04-24 06:35:50 +00:00
- make 03_turn_stream_events a clean streaming example with curated event output - add 14_turn_controls as a separate steer and interrupt demo with concise summaries - update the notebook and runtime-backed example assertions to match the new example shapes Co-authored-by: Codex <noreply@openai.com>
452 lines
15 KiB
Python
452 lines
15 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
import tempfile
|
|
import textwrap
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
ROOT = Path(__file__).resolve().parents[1]
|
|
EXAMPLES_DIR = ROOT / "examples"
|
|
NOTEBOOK_PATH = ROOT / "notebooks" / "sdk_walkthrough.ipynb"
|
|
|
|
root_str = str(ROOT)
|
|
if root_str not in sys.path:
|
|
sys.path.insert(0, root_str)
|
|
|
|
from _runtime_setup import ensure_runtime_package_installed, pinned_runtime_version
|
|
|
|
RUN_REAL_CODEX_TESTS = os.environ.get("RUN_REAL_CODEX_TESTS") == "1"
|
|
pytestmark = pytest.mark.skipif(
|
|
not RUN_REAL_CODEX_TESTS,
|
|
reason="set RUN_REAL_CODEX_TESTS=1 to run real Codex integration coverage",
|
|
)
|
|
|
|
# 11_cli_mini_app is interactive; we still run it by feeding '/exit'.
|
|
EXAMPLE_CASES: list[tuple[str, str]] = [
|
|
("01_quickstart_constructor", "sync.py"),
|
|
("01_quickstart_constructor", "async.py"),
|
|
("02_turn_run", "sync.py"),
|
|
("02_turn_run", "async.py"),
|
|
("03_turn_stream_events", "sync.py"),
|
|
("03_turn_stream_events", "async.py"),
|
|
("04_models_and_metadata", "sync.py"),
|
|
("04_models_and_metadata", "async.py"),
|
|
("05_existing_thread", "sync.py"),
|
|
("05_existing_thread", "async.py"),
|
|
("06_thread_lifecycle_and_controls", "sync.py"),
|
|
("06_thread_lifecycle_and_controls", "async.py"),
|
|
("07_image_and_text", "sync.py"),
|
|
("07_image_and_text", "async.py"),
|
|
("08_local_image_and_text", "sync.py"),
|
|
("08_local_image_and_text", "async.py"),
|
|
("09_async_parity", "sync.py"),
|
|
# 09_async_parity async path is represented by 01 async + dedicated async-based cases above.
|
|
("10_error_handling_and_retry", "sync.py"),
|
|
("10_error_handling_and_retry", "async.py"),
|
|
("11_cli_mini_app", "sync.py"),
|
|
("11_cli_mini_app", "async.py"),
|
|
("12_turn_params_kitchen_sink", "sync.py"),
|
|
("12_turn_params_kitchen_sink", "async.py"),
|
|
("13_model_select_and_turn_params", "sync.py"),
|
|
("13_model_select_and_turn_params", "async.py"),
|
|
("14_turn_controls", "sync.py"),
|
|
("14_turn_controls", "async.py"),
|
|
]
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class PreparedRuntimeEnv:
|
|
python: str
|
|
env: dict[str, str]
|
|
runtime_version: str
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def runtime_env(tmp_path_factory: pytest.TempPathFactory) -> PreparedRuntimeEnv:
|
|
runtime_version = pinned_runtime_version()
|
|
temp_root = tmp_path_factory.mktemp("python-runtime-env")
|
|
isolated_site = temp_root / "site-packages"
|
|
python = sys.executable
|
|
|
|
_run_command(
|
|
[
|
|
python,
|
|
"-m",
|
|
"pip",
|
|
"install",
|
|
"--target",
|
|
str(isolated_site),
|
|
"pydantic>=2.12",
|
|
],
|
|
cwd=ROOT,
|
|
env=os.environ.copy(),
|
|
timeout_s=240,
|
|
)
|
|
ensure_runtime_package_installed(
|
|
python,
|
|
ROOT,
|
|
install_target=isolated_site,
|
|
)
|
|
|
|
env = os.environ.copy()
|
|
env["PYTHONPATH"] = os.pathsep.join([str(isolated_site), str(ROOT / "src")])
|
|
env["CODEX_PYTHON_SDK_DIR"] = str(ROOT)
|
|
return PreparedRuntimeEnv(python=python, env=env, runtime_version=runtime_version)
|
|
|
|
|
|
def _run_command(
|
|
args: list[str],
|
|
*,
|
|
cwd: Path,
|
|
env: dict[str, str],
|
|
timeout_s: int,
|
|
stdin: str | None = None,
|
|
) -> subprocess.CompletedProcess[str]:
|
|
return subprocess.run(
|
|
args,
|
|
cwd=str(cwd),
|
|
env=env,
|
|
input=stdin,
|
|
text=True,
|
|
capture_output=True,
|
|
timeout=timeout_s,
|
|
check=False,
|
|
)
|
|
|
|
|
|
def _run_python(
|
|
runtime_env: PreparedRuntimeEnv,
|
|
source: str,
|
|
*,
|
|
cwd: Path | None = None,
|
|
timeout_s: int = 180,
|
|
) -> subprocess.CompletedProcess[str]:
|
|
return _run_command(
|
|
[str(runtime_env.python), "-c", source],
|
|
cwd=cwd or ROOT,
|
|
env=runtime_env.env,
|
|
timeout_s=timeout_s,
|
|
)
|
|
|
|
|
|
def _runtime_compatibility_hint(
|
|
runtime_env: PreparedRuntimeEnv,
|
|
*,
|
|
stdout: str,
|
|
stderr: str,
|
|
) -> str:
|
|
combined = f"{stdout}\n{stderr}"
|
|
if "ThreadStartResponse" in combined and "approvalsReviewer" in combined:
|
|
return (
|
|
"\nCompatibility hint:\n"
|
|
f"Pinned runtime {runtime_env.runtime_version} returned a thread/start payload "
|
|
"that is older than the current SDK schema and is missing "
|
|
"`approvalsReviewer`. Bump `sdk/python/_runtime_setup.py` to a matching "
|
|
"released runtime version.\n"
|
|
)
|
|
return ""
|
|
|
|
|
|
def _run_json_python(
|
|
runtime_env: PreparedRuntimeEnv,
|
|
source: str,
|
|
*,
|
|
cwd: Path | None = None,
|
|
timeout_s: int = 180,
|
|
) -> dict[str, object]:
|
|
result = _run_python(runtime_env, source, cwd=cwd, timeout_s=timeout_s)
|
|
assert result.returncode == 0, (
|
|
"Python snippet failed.\n"
|
|
f"STDOUT:\n{result.stdout}\n"
|
|
f"STDERR:\n{result.stderr}"
|
|
f"{_runtime_compatibility_hint(runtime_env, stdout=result.stdout, stderr=result.stderr)}"
|
|
)
|
|
return json.loads(result.stdout)
|
|
|
|
|
|
def _run_example(
|
|
runtime_env: PreparedRuntimeEnv,
|
|
folder: str,
|
|
script: str,
|
|
*,
|
|
timeout_s: int = 180,
|
|
) -> subprocess.CompletedProcess[str]:
|
|
path = EXAMPLES_DIR / folder / script
|
|
assert path.exists(), f"Missing example script: {path}"
|
|
|
|
stdin = "/exit\n" if folder == "11_cli_mini_app" else None
|
|
return _run_command(
|
|
[str(runtime_env.python), str(path)],
|
|
cwd=ROOT,
|
|
env=runtime_env.env,
|
|
timeout_s=timeout_s,
|
|
stdin=stdin,
|
|
)
|
|
|
|
|
|
def _notebook_cell_source(cell_index: int) -> str:
|
|
notebook = json.loads(NOTEBOOK_PATH.read_text())
|
|
return "".join(notebook["cells"][cell_index]["source"])
|
|
|
|
|
|
def test_real_initialize_and_model_list(runtime_env: PreparedRuntimeEnv) -> None:
|
|
data = _run_json_python(
|
|
runtime_env,
|
|
textwrap.dedent(
|
|
"""
|
|
import json
|
|
from codex_app_server import Codex
|
|
|
|
with Codex() as codex:
|
|
models = codex.models(include_hidden=True)
|
|
server = codex.metadata.serverInfo
|
|
print(json.dumps({
|
|
"user_agent": codex.metadata.userAgent,
|
|
"server_name": None if server is None else server.name,
|
|
"server_version": None if server is None else server.version,
|
|
"model_count": len(models.data),
|
|
}))
|
|
"""
|
|
),
|
|
)
|
|
|
|
assert isinstance(data["user_agent"], str) and data["user_agent"].strip()
|
|
if data["server_name"] is not None:
|
|
assert isinstance(data["server_name"], str) and data["server_name"].strip()
|
|
if data["server_version"] is not None:
|
|
assert isinstance(data["server_version"], str) and data["server_version"].strip()
|
|
assert isinstance(data["model_count"], int)
|
|
|
|
|
|
def test_real_thread_and_turn_start_smoke(runtime_env: PreparedRuntimeEnv) -> None:
|
|
data = _run_json_python(
|
|
runtime_env,
|
|
textwrap.dedent(
|
|
"""
|
|
import json
|
|
from codex_app_server import Codex, TextInput
|
|
|
|
with Codex() as codex:
|
|
thread = codex.thread_start(
|
|
model="gpt-5.4",
|
|
config={"model_reasoning_effort": "high"},
|
|
)
|
|
result = thread.turn(TextInput("hello")).run()
|
|
persisted = thread.read(include_turns=True)
|
|
persisted_turn = next(
|
|
(turn for turn in persisted.thread.turns or [] if turn.id == result.id),
|
|
None,
|
|
)
|
|
print(json.dumps({
|
|
"thread_id": thread.id,
|
|
"turn_id": result.id,
|
|
"status": result.status.value,
|
|
"items_count": len(result.items or []),
|
|
"persisted_items_count": 0 if persisted_turn is None else len(persisted_turn.items or []),
|
|
}))
|
|
"""
|
|
),
|
|
)
|
|
|
|
assert isinstance(data["thread_id"], str) and data["thread_id"].strip()
|
|
assert isinstance(data["turn_id"], str) and data["turn_id"].strip()
|
|
assert data["status"] == "completed"
|
|
assert isinstance(data["items_count"], int)
|
|
assert isinstance(data["persisted_items_count"], int)
|
|
|
|
|
|
def test_real_async_thread_turn_usage_and_ids_smoke(
|
|
runtime_env: PreparedRuntimeEnv,
|
|
) -> None:
|
|
data = _run_json_python(
|
|
runtime_env,
|
|
textwrap.dedent(
|
|
"""
|
|
import asyncio
|
|
import json
|
|
from codex_app_server import AsyncCodex, TextInput
|
|
|
|
async def main():
|
|
async with AsyncCodex() as codex:
|
|
thread = await codex.thread_start(
|
|
model="gpt-5.4",
|
|
config={"model_reasoning_effort": "high"},
|
|
)
|
|
result = await (await thread.turn(TextInput("say ok"))).run()
|
|
persisted = await thread.read(include_turns=True)
|
|
persisted_turn = next(
|
|
(turn for turn in persisted.thread.turns or [] if turn.id == result.id),
|
|
None,
|
|
)
|
|
print(json.dumps({
|
|
"thread_id": thread.id,
|
|
"turn_id": result.id,
|
|
"status": result.status.value,
|
|
"items_count": len(result.items or []),
|
|
"persisted_items_count": 0 if persisted_turn is None else len(persisted_turn.items or []),
|
|
}))
|
|
|
|
asyncio.run(main())
|
|
"""
|
|
),
|
|
)
|
|
|
|
assert isinstance(data["thread_id"], str) and data["thread_id"].strip()
|
|
assert isinstance(data["turn_id"], str) and data["turn_id"].strip()
|
|
assert data["status"] == "completed"
|
|
assert isinstance(data["items_count"], int)
|
|
assert isinstance(data["persisted_items_count"], int)
|
|
|
|
|
|
def test_notebook_bootstrap_resolves_sdk_and_runtime_from_unrelated_cwd(
|
|
runtime_env: PreparedRuntimeEnv,
|
|
) -> None:
|
|
cell_1_source = _notebook_cell_source(1)
|
|
env = runtime_env.env.copy()
|
|
|
|
with tempfile.TemporaryDirectory() as temp_cwd:
|
|
result = _run_command(
|
|
[str(runtime_env.python), "-c", cell_1_source],
|
|
cwd=Path(temp_cwd),
|
|
env=env,
|
|
timeout_s=180,
|
|
)
|
|
|
|
assert result.returncode == 0, (
|
|
f"Notebook bootstrap failed from unrelated cwd.\n"
|
|
f"STDOUT:\n{result.stdout}\n"
|
|
f"STDERR:\n{result.stderr}"
|
|
)
|
|
assert "SDK source:" in result.stdout
|
|
assert f"Runtime package: {runtime_env.runtime_version}" in result.stdout
|
|
|
|
|
|
def test_notebook_sync_cell_smoke(runtime_env: PreparedRuntimeEnv) -> None:
|
|
source = "\n\n".join(
|
|
[
|
|
_notebook_cell_source(1),
|
|
_notebook_cell_source(2),
|
|
_notebook_cell_source(3),
|
|
]
|
|
)
|
|
result = _run_python(runtime_env, source, timeout_s=240)
|
|
assert result.returncode == 0, (
|
|
f"Notebook sync smoke failed.\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
|
|
)
|
|
assert "status:" in result.stdout
|
|
assert "server:" in result.stdout
|
|
|
|
|
|
def test_real_streaming_smoke_turn_completed(runtime_env: PreparedRuntimeEnv) -> None:
|
|
data = _run_json_python(
|
|
runtime_env,
|
|
textwrap.dedent(
|
|
"""
|
|
import json
|
|
from codex_app_server import Codex, TextInput
|
|
|
|
with Codex() as codex:
|
|
thread = codex.thread_start(
|
|
model="gpt-5.4",
|
|
config={"model_reasoning_effort": "high"},
|
|
)
|
|
turn = thread.turn(TextInput("Reply with one short sentence."))
|
|
saw_delta = False
|
|
saw_completed = False
|
|
for event in turn.stream():
|
|
if event.method == "item/agentMessage/delta":
|
|
saw_delta = True
|
|
if event.method == "turn/completed":
|
|
saw_completed = True
|
|
print(json.dumps({
|
|
"saw_delta": saw_delta,
|
|
"saw_completed": saw_completed,
|
|
}))
|
|
"""
|
|
),
|
|
)
|
|
|
|
assert data["saw_completed"] is True
|
|
assert isinstance(data["saw_delta"], bool)
|
|
|
|
|
|
def test_real_turn_interrupt_smoke(runtime_env: PreparedRuntimeEnv) -> None:
|
|
data = _run_json_python(
|
|
runtime_env,
|
|
textwrap.dedent(
|
|
"""
|
|
import json
|
|
from codex_app_server import Codex, TextInput
|
|
|
|
with Codex() as codex:
|
|
thread = codex.thread_start(
|
|
model="gpt-5.4",
|
|
config={"model_reasoning_effort": "high"},
|
|
)
|
|
turn = thread.turn(TextInput("Count from 1 to 200 with commas."))
|
|
turn.interrupt()
|
|
follow_up = thread.turn(TextInput("Say 'ok' only.")).run()
|
|
print(json.dumps({"status": follow_up.status.value}))
|
|
"""
|
|
),
|
|
)
|
|
|
|
assert data["status"] in {"completed", "failed"}
|
|
|
|
|
|
@pytest.mark.parametrize(("folder", "script"), EXAMPLE_CASES)
|
|
def test_real_examples_run_and_assert(
|
|
runtime_env: PreparedRuntimeEnv,
|
|
folder: str,
|
|
script: str,
|
|
) -> None:
|
|
result = _run_example(runtime_env, folder, script)
|
|
|
|
assert result.returncode == 0, (
|
|
f"Example failed: {folder}/{script}\n"
|
|
f"STDOUT:\n{result.stdout}\n"
|
|
f"STDERR:\n{result.stderr}"
|
|
f"{_runtime_compatibility_hint(runtime_env, stdout=result.stdout, stderr=result.stderr)}"
|
|
)
|
|
|
|
out = result.stdout
|
|
|
|
if folder == "01_quickstart_constructor":
|
|
assert "Status:" in out and "Text:" in out
|
|
assert "Server: unknown" not in out
|
|
elif folder == "02_turn_run":
|
|
assert "thread_id:" in out and "turn_id:" in out and "status:" in out
|
|
assert "persisted.items.count:" in out
|
|
elif folder == "03_turn_stream_events":
|
|
assert "stream.completed:" in out
|
|
assert "assistant>" in out
|
|
elif folder == "04_models_and_metadata":
|
|
assert "models.count:" in out
|
|
assert "server_name=None" not in out
|
|
assert "server_version=None" not in out
|
|
elif folder == "05_existing_thread":
|
|
assert "Created thread:" in out
|
|
elif folder == "06_thread_lifecycle_and_controls":
|
|
assert "Lifecycle OK:" in out
|
|
elif folder in {"07_image_and_text", "08_local_image_and_text"}:
|
|
assert "completed" in out.lower() or "Status:" in out
|
|
elif folder == "09_async_parity":
|
|
assert "Thread:" in out and "Turn:" in out
|
|
elif folder == "10_error_handling_and_retry":
|
|
assert "Text:" in out
|
|
elif folder == "11_cli_mini_app":
|
|
assert "Thread:" in out
|
|
elif folder == "12_turn_params_kitchen_sink":
|
|
assert "Status:" in out and "Items:" in out
|
|
elif folder == "13_model_select_and_turn_params":
|
|
assert "selected.model:" in out and "agent.message.params:" in out and "items.params:" in out
|
|
elif folder == "14_turn_controls":
|
|
assert "steer.result:" in out and "steer.final.status:" in out
|
|
assert "interrupt.result:" in out and "interrupt.final.status:" in out
|