Cover SDK app-server integration gaps

Add focused integration coverage for thread listing, persisted history reads, async lifecycle wrappers, skill input injection, and run override/usage behavior through the pinned app-server test harness. Co-authored-by: Codex <noreply@openai.com>
2026-05-29 15:30:22 +00:00 · 2026-05-10 15:14:45 +03:00
parent b9cd273f8d
commit d77f543654
4 changed files with 245 additions and 2 deletions
--- a/sdk/python/tests/app_server_harness.py
+++ b/sdk/python/tests/app_server_harness.py
@@ -364,6 +364,33 @@ def ev_completed(response_id: str) -> Json:
    }


+def ev_completed_with_usage(
+    response_id: str,
+    *,
+    input_tokens: int,
+    cached_input_tokens: int,
+    output_tokens: int,
+    reasoning_output_tokens: int,
+    total_tokens: int,
+) -> Json:
+    """Return `response.completed` with explicit token accounting."""
+    return {
+        "type": "response.completed",
+        "response": {
+            "id": response_id,
+            "usage": {
+                "input_tokens": input_tokens,
+                "input_tokens_details": {"cached_tokens": cached_input_tokens},
+                "output_tokens": output_tokens,
+                "output_tokens_details": {
+                    "reasoning_tokens": reasoning_output_tokens,
+                },
+                "total_tokens": total_tokens,
+            },
+        },
+    }
+
+
 def ev_assistant_message(item_id: str, text: str) -> Json:
    """Return a completed assistant message output item."""
    return {
--- a/sdk/python/tests/test_app_server_inputs.py
+++ b/sdk/python/tests/test_app_server_inputs.py
@@ -1,7 +1,7 @@
 from __future__ import annotations

 from app_server_harness import AppServerHarness
-from openai_codex import Codex, ImageInput, LocalImageInput, TextInput
+from openai_codex import Codex, ImageInput, LocalImageInput, SkillInput, TextInput
 from app_server_helpers import TINY_PNG_BYTES


@@ -72,3 +72,38 @@ def test_local_image_input_reaches_responses_api(
        "contains_user_prompt": True,
        "image_url_is_png_data_url": True,
    }
+
+
+def test_skill_input_injects_skill_body(tmp_path) -> None:
+    """SkillInput should cross the SDK boundary and reach the model prompt."""
+    skill_file = tmp_path / "skills" / "demo" / "SKILL.md"
+    skill_file.parent.mkdir(parents=True)
+    skill_file.write_text("# Demo\n\nUse the word cobalt.\n")
+
+    with AppServerHarness(tmp_path) as harness:
+        harness.responses.enqueue_assistant_message(
+            "skill received",
+            response_id="skill-input",
+        )
+
+        with Codex(config=harness.app_server_config()) as codex:
+            result = codex.thread_start().run(
+                [
+                    TextInput("Use the selected skill."),
+                    SkillInput("demo", str(skill_file)),
+                ]
+            )
+            request = harness.responses.single_request()
+
+    assert {
+        "final_response": result.final_response,
+        "skill_prompt_was_injected": any(
+            "<skill>\n<name>demo</name>" in text
+            and "Use the word cobalt." in text
+            and str(skill_file) in text
+            for text in request.message_input_texts("user")
+        ),
+    } == {
+        "final_response": "skill received",
+        "skill_prompt_was_injected": True,
+    }
--- a/sdk/python/tests/test_app_server_lifecycle.py
+++ b/sdk/python/tests/test_app_server_lifecycle.py
@@ -1,10 +1,30 @@
 from __future__ import annotations

+import asyncio
+
 from app_server_harness import AppServerHarness
-from openai_codex import Codex
+from openai_codex import AsyncCodex, Codex
 from app_server_helpers import request_kind


+def _thread_message_summary(read_response) -> list[tuple[str, str]]:
+    """Return persisted user/agent messages from a thread read response."""
+    messages: list[tuple[str, str]] = []
+    for turn in read_response.thread.turns:
+        for item in turn.items:
+            root = item.root
+            if root.type == "userMessage":
+                text = "\n".join(
+                    input_item.root.text
+                    for input_item in root.content
+                    if input_item.root.type == "text"
+                )
+                messages.append(("user", text))
+            if root.type == "agentMessage":
+                messages.append(("agent", root.text))
+    return messages
+
+
 def test_thread_set_name_and_read(tmp_path) -> None:
    """Thread naming should round-trip through app-server JSON-RPC."""
    with AppServerHarness(tmp_path) as harness:
@@ -18,6 +38,98 @@ def test_thread_set_name_and_read(tmp_path) -> None:
    }


+def test_thread_list_filters_archived_threads(tmp_path) -> None:
+    """Thread listing should reflect archive state through app-server."""
+    with AppServerHarness(tmp_path) as harness:
+        harness.responses.enqueue_assistant_message("active", response_id="list-active")
+        harness.responses.enqueue_assistant_message(
+            "archived",
+            response_id="list-archived",
+        )
+
+        with Codex(config=harness.app_server_config()) as codex:
+            active_thread = codex.thread_start()
+            archived_thread = codex.thread_start()
+            active_thread.run("keep this listed")
+            archived_thread.run("archive this")
+            codex.thread_archive(archived_thread.id)
+            active_list = codex.thread_list(archived=False)
+            archived_list = codex.thread_list(archived=True)
+
+    expected_ids = {active_thread.id, archived_thread.id}
+    assert {
+        "active_ids": sorted(
+            thread.id for thread in active_list.data if thread.id in expected_ids
+        ),
+        "archived_ids": sorted(
+            thread.id for thread in archived_list.data if thread.id in expected_ids
+        ),
+    } == {
+        "active_ids": [active_thread.id],
+        "archived_ids": [archived_thread.id],
+    }
+
+
+def test_read_include_turns_returns_persisted_history(tmp_path) -> None:
+    """Thread.read(include_turns=True) should load real persisted turn items."""
+    with AppServerHarness(tmp_path) as harness:
+        harness.responses.enqueue_assistant_message("first answer", response_id="read-1")
+        harness.responses.enqueue_assistant_message("second answer", response_id="read-2")
+
+        with Codex(config=harness.app_server_config()) as codex:
+            thread = codex.thread_start()
+            thread.run("first question")
+            thread.run("second question")
+            read = thread.read(include_turns=True)
+
+    assert _thread_message_summary(read) == [
+        ("user", "first question"),
+        ("agent", "first answer"),
+        ("user", "second question"),
+        ("agent", "second answer"),
+    ]
+
+
+def test_async_lifecycle_methods_round_trip(tmp_path) -> None:
+    """Async lifecycle helpers should preserve the same app-server thread state."""
+
+    async def scenario() -> None:
+        """Exercise async wrappers over one materialized thread."""
+        with AppServerHarness(tmp_path) as harness:
+            harness.responses.enqueue_assistant_message(
+                "async materialized",
+                response_id="async-lifecycle",
+            )
+
+            async with AsyncCodex(config=harness.app_server_config()) as codex:
+                thread = await codex.thread_start()
+                run_result = await thread.run("materialize async thread")
+                await thread.set_name("async lifecycle")
+                named = await thread.read()
+                resumed = await codex.thread_resume(thread.id)
+                forked = await codex.thread_fork(thread.id)
+                archive_response = await codex.thread_archive(thread.id)
+                unarchived = await codex.thread_unarchive(thread.id)
+
+        assert {
+            "run_final_response": run_result.final_response,
+            "named_thread": named.thread.name,
+            "resumed_id": resumed.id,
+            "forked_is_distinct": forked.id != thread.id,
+            "archive_response": archive_response.model_dump(by_alias=True, mode="json"),
+            "unarchived_id": unarchived.id,
+        } == {
+            "run_final_response": "async materialized",
+            "named_thread": "async lifecycle",
+            "resumed_id": thread.id,
+            "forked_is_distinct": True,
+            "archive_response": {},
+            "unarchived_id": thread.id,
+        }
+
+    asyncio.run(scenario())
+
+
 def test_thread_fork_returns_distinct_thread(tmp_path) -> None:
    """Thread fork should return a distinct thread for a persisted rollout."""
    with AppServerHarness(tmp_path) as harness:
--- a/sdk/python/tests/test_app_server_run.py
+++ b/sdk/python/tests/test_app_server_run.py
@@ -8,6 +8,7 @@ from app_server_harness import (
    AppServerHarness,
    ev_assistant_message,
    ev_completed,
+    ev_completed_with_usage,
    ev_failed,
    ev_response_created,
    sse,
@@ -51,6 +52,74 @@ def test_sync_thread_run_uses_mock_responses(
    }


+def test_run_params_and_usage_cross_app_server_boundary(tmp_path) -> None:
+    """Thread.run should pass overrides and collect app-server token usage."""
+    turn_cwd = tmp_path / "turn-cwd"
+    turn_cwd.mkdir()
+
+    with AppServerHarness(tmp_path) as harness:
+        harness.responses.enqueue_sse(
+            sse(
+                [
+                    ev_response_created("run-overrides"),
+                    ev_assistant_message("msg-run-overrides", "overrides applied"),
+                    ev_completed_with_usage(
+                        "run-overrides",
+                        input_tokens=11,
+                        cached_input_tokens=3,
+                        output_tokens=7,
+                        reasoning_output_tokens=5,
+                        total_tokens=18,
+                    ),
+                ]
+            )
+        )
+
+        with Codex(config=harness.app_server_config()) as codex:
+            thread = codex.thread_start()
+            result = thread.run(
+                "use overrides",
+                cwd=str(turn_cwd),
+                model="mock-model-override",
+            )
+            read = thread.read()
+            request = harness.responses.single_request()
+
+    usage_payload = None
+    if result.usage is not None:
+        dumped_usage = result.usage.model_dump(by_alias=True, mode="json")
+        usage_payload = {
+            "last": dumped_usage["last"],
+            "total": dumped_usage["total"],
+        }
+    assert {
+        "final_response": result.final_response,
+        "request_model": request.body_json()["model"],
+        "thread_cwd": read.thread.cwd.root,
+        "usage": usage_payload,
+    } == {
+        "final_response": "overrides applied",
+        "request_model": "mock-model-override",
+        "thread_cwd": str(turn_cwd),
+        "usage": {
+            "last": {
+                "cachedInputTokens": 3,
+                "inputTokens": 11,
+                "outputTokens": 7,
+                "reasoningOutputTokens": 5,
+                "totalTokens": 18,
+            },
+            "total": {
+                "cachedInputTokens": 3,
+                "inputTokens": 11,
+                "outputTokens": 7,
+                "reasoningOutputTokens": 5,
+                "totalTokens": 18,
+            },
+        },
+    }
+
+
 def test_async_thread_run_uses_mock_responses(
    tmp_path,
 ) -> None: