[2/8] Generate Python SDK types from pinned runtime (#21893)

## Why Once the SDK declares its runtime package, generated Python artifacts should come from that pinned runtime rather than whatever app-server schema happens to be in the current checkout. That keeps the generated API and model surface aligned with the runtime users install. ## What - Teach `scripts/update_sdk_artifacts.py generate-types` to invoke the pinned runtime package for schema generation. - Regenerate `v2_all.py`, `notification_registry.py`, and generated public wrapper methods from that schema. - Add freshness coverage so regenerating from the pinned runtime must leave checked-in artifacts unchanged. ## Stack 1. #21891 `[1/8]` Pin Python SDK runtime dependency 2. This PR `[2/8]` Generate Python SDK types from pinned runtime 3. #21895 `[3/8]` Run Python SDK tests in CI 4. #21896 `[4/8]` Define Python SDK public API surface 5. #21905 `[5/8]` Rename Python SDK package to `openai-codex` 6. #21910 `[6/8]` Add high-level Python SDK approval mode 7. #22014 `[7/8]` Add Python SDK app-server integration harness 8. #22021 `[8/8]` Add Python SDK Ruff formatting ## Verification - Added `test_generated_files_are_up_to_date` for pinned-runtime generation drift. - Added generator-structure tests for schema annotation and notification metadata generation. --------- Co-authored-by: Codex <noreply@openai.com>
2026-05-23 12:34:25 +00:00 · 2026-05-12 00:53:21 +03:00
parent 5fe33443b0
commit 6a4653efc8
11 changed files with 752 additions and 505 deletions
--- a/sdk/python/tests/test_artifact_workflow_and_binaries.py
+++ b/sdk/python/tests/test_artifact_workflow_and_binaries.py
@@ -16,6 +16,7 @@ ROOT = Path(__file__).resolve().parents[1]


 def _load_update_script_module():
+    """Load the maintenance script as a module so tests exercise real helpers."""
    script_path = ROOT / "scripts" / "update_sdk_artifacts.py"
    spec = importlib.util.spec_from_file_location("update_sdk_artifacts", script_path)
    if spec is None or spec.loader is None:
@@ -27,6 +28,7 @@ def _load_update_script_module():


 def _load_runtime_setup_module():
+    """Load runtime setup without importing the SDK package under test."""
    runtime_setup_path = ROOT / "_runtime_setup.py"
    spec = importlib.util.spec_from_file_location("_runtime_setup", runtime_setup_path)
    if spec is None or spec.loader is None:
@@ -40,11 +42,13 @@ def _load_runtime_setup_module():


 def test_generation_has_single_maintenance_entrypoint_script() -> None:
+    """Keep artifact workflows routed through one script instead of side entrypoints."""
    scripts = sorted(p.name for p in (ROOT / "scripts").glob("*.py"))
    assert scripts == ["update_sdk_artifacts.py"]


 def test_generate_types_wires_all_generation_steps() -> None:
+    """The type generation command should refresh every schema-derived artifact."""
    source = (ROOT / "scripts" / "update_sdk_artifacts.py").read_text()
    tree = ast.parse(source)

@@ -52,7 +56,8 @@ def test_generate_types_wires_all_generation_steps() -> None:
        (
            node
            for node in tree.body
-            if isinstance(node, ast.FunctionDef) and node.name == "generate_types"
+            if isinstance(node, ast.FunctionDef)
+            and node.name == "generate_types_from_schema_dir"
        ),
        None,
    )
@@ -72,19 +77,19 @@ def test_generate_types_wires_all_generation_steps() -> None:
    ]


-def test_schema_normalization_only_flattens_string_literal_oneofs() -> None:
+def _load_runtime_schema_bundle(tmp_path: Path) -> dict:
+    """Ask the pinned runtime package for a real schema bundle used by tests."""
    script = _load_update_script_module()
-    schema = json.loads(
-        (
-            ROOT.parent.parent
-            / "codex-rs"
-            / "app-server-protocol"
-            / "schema"
-            / "json"
-            / "codex_app_server_protocol.v2.schemas.json"
-        ).read_text()
-    )
+    schema_dir = script.generate_schema_from_pinned_runtime(tmp_path / "schema")
+    return json.loads(script.schema_bundle_path(schema_dir).read_text())

+
+def test_schema_normalization_only_flattens_string_literal_oneofs(
+    tmp_path: Path,
+) -> None:
+    """Schema normalization should only flatten the enum-shaped oneOf variants."""
+    script = _load_update_script_module()
+    schema = _load_runtime_schema_bundle(tmp_path)
    definitions = schema["definitions"]
    flattened = [
        name
@@ -94,27 +99,23 @@ def test_schema_normalization_only_flattens_string_literal_oneofs() -> None:
    ]

    assert flattened == [
-        "AuthMode",
-        "CommandExecOutputStream",
-        "ExperimentalFeatureStage",
-        "InputModality",
        "MessagePhase",
+        "TurnItemsView",
+        "PluginAvailability",
+        "AuthMode",
+        "InputModality",
+        "ExperimentalFeatureStage",
+        "CommandExecOutputStream",
+        "ProcessOutputStream",
    ]


-def test_python_codegen_schema_annotation_adds_stable_variant_titles() -> None:
+def test_python_codegen_schema_annotation_adds_stable_variant_titles(
+    tmp_path: Path,
+) -> None:
+    """Schema annotations should give generated protocol classes stable names."""
    script = _load_update_script_module()
-    schema = json.loads(
-        (
-            ROOT.parent.parent
-            / "codex-rs"
-            / "app-server-protocol"
-            / "schema"
-            / "json"
-            / "codex_app_server_protocol.v2.schemas.json"
-        ).read_text()
-    )
-
+    schema = _load_runtime_schema_bundle(tmp_path)
    script._annotate_schema(schema)
    definitions = schema["definitions"]

@@ -186,6 +187,25 @@ def test_runtime_distribution_name_is_consistent() -> None:
    )


+def test_source_sdk_package_pins_published_runtime() -> None:
+    """The source package metadata should pin the runtime wheel that ships schemas."""
+    script = _load_update_script_module()
+    pyproject = tomllib.loads((ROOT / "pyproject.toml").read_text())
+
+    assert {
+        "sdk_version": pyproject["project"]["version"],
+        "runtime_pin": script.pinned_runtime_version(),
+        "dependencies": pyproject["project"]["dependencies"],
+    } == {
+        "sdk_version": "0.131.0a4",
+        "runtime_pin": "0.131.0a4",
+        "dependencies": [
+            "pydantic>=2.12",
+            "openai-codex-cli-bin==0.131.0a4",
+        ],
+    }
+
+
 def test_release_metadata_retries_without_invalid_auth(
    monkeypatch: pytest.MonkeyPatch,
 ) -> None:
@@ -422,9 +442,7 @@ def test_runtime_resource_binaries_are_included_by_wheel_config(
    pyproject = tomllib.loads((staged / "pyproject.toml").read_text())
    assert {
        "include": pyproject["tool"]["hatch"]["build"]["targets"]["wheel"]["include"],
-        "helper": (
-            staged / "src" / "codex_cli_bin" / "bin" / "helper"
-        ).read_text(),
+        "helper": (staged / "src" / "codex_cli_bin" / "bin" / "helper").read_text(),
    } == {
        "include": ["src/codex_cli_bin/bin/**"],
        "helper": "fake helper\n",
--- a/sdk/python/tests/test_client_rpc_methods.py
+++ b/sdk/python/tests/test_client_rpc_methods.py
@@ -50,6 +50,7 @@ def test_generated_v2_bundle_has_single_shared_plan_type_definition() -> None:


 def test_thread_resume_response_accepts_auto_review_reviewer() -> None:
+    """Generated response models should keep accepting the auto review enum value."""
    response = ThreadResumeResponse.model_validate(
        {
            "approvalPolicy": "on-request",
@@ -66,6 +67,8 @@ def test_thread_resume_response_accepts_auto_review_reviewer() -> None:
                "id": "thread-1",
                "modelProvider": "openai",
                "preview": "",
+                # The pinned runtime schema requires the session id on threads.
+                "sessionId": "session-1",
                "source": "cli",
                "status": {"type": "idle"},
                "turns": [],
--- a/sdk/python/tests/test_contract_generation.py
+++ b/sdk/python/tests/test_contract_generation.py
@@ -1,5 +1,6 @@
 from __future__ import annotations

+import importlib.metadata
 import os
 import subprocess
 import sys
@@ -14,6 +15,7 @@ GENERATED_TARGETS = [


 def _snapshot_target(root: Path, rel_path: Path) -> dict[str, bytes] | bytes | None:
+    """Capture one generated artifact so regeneration drift is easy to compare."""
    target = root / rel_path
    if not target.exists():
        return None
@@ -28,16 +30,22 @@ def _snapshot_target(root: Path, rel_path: Path) -> dict[str, bytes] | bytes | N


 def _snapshot_targets(root: Path) -> dict[str, dict[str, bytes] | bytes | None]:
+    """Capture all checked-in generated artifacts before and after regeneration."""
    return {
-        str(rel_path): _snapshot_target(root, rel_path) for rel_path in GENERATED_TARGETS
+        str(rel_path): _snapshot_target(root, rel_path)
+        for rel_path in GENERATED_TARGETS
    }


 def test_generated_files_are_up_to_date():
+    """Regenerating from the pinned runtime package should leave artifacts unchanged."""
    before = _snapshot_targets(ROOT)

-    # Regenerate contract artifacts via single maintenance entrypoint.
+    # Regenerate contract artifacts via the pinned runtime package, not a local
+    # app-server binary from the checkout or CI environment.
+    assert importlib.metadata.version("openai-codex-cli-bin") == "0.131.0a4"
    env = os.environ.copy()
+    env.pop("CODEX_EXEC_PATH", None)
    python_bin = str(Path(sys.executable).parent)
    env["PATH"] = f"{python_bin}{os.pathsep}{env.get('PATH', '')}"

--- a/sdk/python/tests/test_public_api_runtime_behavior.py
+++ b/sdk/python/tests/test_public_api_runtime_behavior.py
@@ -82,6 +82,7 @@ def _item_completed_notification(
    text: str = "final text",
    phase: MessagePhase | None = None,
 ) -> Notification:
+    """Build a realistic completed-item notification accepted by generated models."""
    item: dict[str, object] = {
        "id": "item-1",
        "text": text,
@@ -93,6 +94,8 @@ def _item_completed_notification(
        method="item/completed",
        payload=ItemCompletedNotification.model_validate(
            {
+                # The pinned runtime schema requires completion timestamps.
+                "completedAtMs": 1,
                "item": item,
                "threadId": thread_id,
                "turnId": turn_id,
--- a/sdk/python/tests/test_public_api_signatures.py
+++ b/sdk/python/tests/test_public_api_signatures.py
@@ -54,6 +54,7 @@ def test_package_includes_py_typed_marker() -> None:


 def test_generated_public_signatures_are_snake_case_and_typed() -> None:
+    """Generated convenience methods should expose typed Pythonic keyword names."""
    expected = {
        Codex.thread_start: [
            "approval_policy",
@@ -70,6 +71,7 @@ def test_generated_public_signatures_are_snake_case_and_typed() -> None:
            "service_name",
            "service_tier",
            "session_start_source",
+            "thread_source",
        ],
        Codex.thread_list: [
            "archived",
@@ -108,6 +110,7 @@ def test_generated_public_signatures_are_snake_case_and_typed() -> None:
            "model_provider",
            "sandbox",
            "service_tier",
+            "thread_source",
        ],
        Thread.turn: [
            "approval_policy",
@@ -148,6 +151,7 @@ def test_generated_public_signatures_are_snake_case_and_typed() -> None:
            "service_name",
            "service_tier",
            "session_start_source",
+            "thread_source",
        ],
        AsyncCodex.thread_list: [
            "archived",
@@ -186,6 +190,7 @@ def test_generated_public_signatures_are_snake_case_and_typed() -> None:
            "model_provider",
            "sandbox",
            "service_tier",
+            "thread_source",
        ],
        AsyncThread.turn: [
            "approval_policy",