Cover SDK app-server integration gaps

Add focused integration coverage for thread listing, persisted history reads, async lifecycle wrappers, skill input injection, and run override/usage behavior through the pinned app-server test harness.

Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
Ahmed Ibrahim
2026-05-10 15:14:45 +03:00
parent b9cd273f8d
commit d77f543654
4 changed files with 245 additions and 2 deletions

View File

@@ -364,6 +364,33 @@ def ev_completed(response_id: str) -> Json:
}
def ev_completed_with_usage(
response_id: str,
*,
input_tokens: int,
cached_input_tokens: int,
output_tokens: int,
reasoning_output_tokens: int,
total_tokens: int,
) -> Json:
"""Return `response.completed` with explicit token accounting."""
return {
"type": "response.completed",
"response": {
"id": response_id,
"usage": {
"input_tokens": input_tokens,
"input_tokens_details": {"cached_tokens": cached_input_tokens},
"output_tokens": output_tokens,
"output_tokens_details": {
"reasoning_tokens": reasoning_output_tokens,
},
"total_tokens": total_tokens,
},
},
}
def ev_assistant_message(item_id: str, text: str) -> Json:
"""Return a completed assistant message output item."""
return {

View File

@@ -1,7 +1,7 @@
from __future__ import annotations
from app_server_harness import AppServerHarness
from openai_codex import Codex, ImageInput, LocalImageInput, TextInput
from openai_codex import Codex, ImageInput, LocalImageInput, SkillInput, TextInput
from app_server_helpers import TINY_PNG_BYTES
@@ -72,3 +72,38 @@ def test_local_image_input_reaches_responses_api(
"contains_user_prompt": True,
"image_url_is_png_data_url": True,
}
def test_skill_input_injects_skill_body(tmp_path) -> None:
"""SkillInput should cross the SDK boundary and reach the model prompt."""
skill_file = tmp_path / "skills" / "demo" / "SKILL.md"
skill_file.parent.mkdir(parents=True)
skill_file.write_text("# Demo\n\nUse the word cobalt.\n")
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_assistant_message(
"skill received",
response_id="skill-input",
)
with Codex(config=harness.app_server_config()) as codex:
result = codex.thread_start().run(
[
TextInput("Use the selected skill."),
SkillInput("demo", str(skill_file)),
]
)
request = harness.responses.single_request()
assert {
"final_response": result.final_response,
"skill_prompt_was_injected": any(
"<skill>\n<name>demo</name>" in text
and "Use the word cobalt." in text
and str(skill_file) in text
for text in request.message_input_texts("user")
),
} == {
"final_response": "skill received",
"skill_prompt_was_injected": True,
}

View File

@@ -1,10 +1,30 @@
from __future__ import annotations
import asyncio
from app_server_harness import AppServerHarness
from openai_codex import Codex
from openai_codex import AsyncCodex, Codex
from app_server_helpers import request_kind
def _thread_message_summary(read_response) -> list[tuple[str, str]]:
"""Return persisted user/agent messages from a thread read response."""
messages: list[tuple[str, str]] = []
for turn in read_response.thread.turns:
for item in turn.items:
root = item.root
if root.type == "userMessage":
text = "\n".join(
input_item.root.text
for input_item in root.content
if input_item.root.type == "text"
)
messages.append(("user", text))
if root.type == "agentMessage":
messages.append(("agent", root.text))
return messages
def test_thread_set_name_and_read(tmp_path) -> None:
"""Thread naming should round-trip through app-server JSON-RPC."""
with AppServerHarness(tmp_path) as harness:
@@ -18,6 +38,98 @@ def test_thread_set_name_and_read(tmp_path) -> None:
}
def test_thread_list_filters_archived_threads(tmp_path) -> None:
"""Thread listing should reflect archive state through app-server."""
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_assistant_message("active", response_id="list-active")
harness.responses.enqueue_assistant_message(
"archived",
response_id="list-archived",
)
with Codex(config=harness.app_server_config()) as codex:
active_thread = codex.thread_start()
archived_thread = codex.thread_start()
active_thread.run("keep this listed")
archived_thread.run("archive this")
codex.thread_archive(archived_thread.id)
active_list = codex.thread_list(archived=False)
archived_list = codex.thread_list(archived=True)
expected_ids = {active_thread.id, archived_thread.id}
assert {
"active_ids": sorted(
thread.id for thread in active_list.data if thread.id in expected_ids
),
"archived_ids": sorted(
thread.id for thread in archived_list.data if thread.id in expected_ids
),
} == {
"active_ids": [active_thread.id],
"archived_ids": [archived_thread.id],
}
def test_read_include_turns_returns_persisted_history(tmp_path) -> None:
"""Thread.read(include_turns=True) should load real persisted turn items."""
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_assistant_message("first answer", response_id="read-1")
harness.responses.enqueue_assistant_message("second answer", response_id="read-2")
with Codex(config=harness.app_server_config()) as codex:
thread = codex.thread_start()
thread.run("first question")
thread.run("second question")
read = thread.read(include_turns=True)
assert _thread_message_summary(read) == [
("user", "first question"),
("agent", "first answer"),
("user", "second question"),
("agent", "second answer"),
]
def test_async_lifecycle_methods_round_trip(tmp_path) -> None:
"""Async lifecycle helpers should preserve the same app-server thread state."""
async def scenario() -> None:
"""Exercise async wrappers over one materialized thread."""
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_assistant_message(
"async materialized",
response_id="async-lifecycle",
)
async with AsyncCodex(config=harness.app_server_config()) as codex:
thread = await codex.thread_start()
run_result = await thread.run("materialize async thread")
await thread.set_name("async lifecycle")
named = await thread.read()
resumed = await codex.thread_resume(thread.id)
forked = await codex.thread_fork(thread.id)
archive_response = await codex.thread_archive(thread.id)
unarchived = await codex.thread_unarchive(thread.id)
assert {
"run_final_response": run_result.final_response,
"named_thread": named.thread.name,
"resumed_id": resumed.id,
"forked_is_distinct": forked.id != thread.id,
"archive_response": archive_response.model_dump(by_alias=True, mode="json"),
"unarchived_id": unarchived.id,
} == {
"run_final_response": "async materialized",
"named_thread": "async lifecycle",
"resumed_id": thread.id,
"forked_is_distinct": True,
"archive_response": {},
"unarchived_id": thread.id,
}
asyncio.run(scenario())
def test_thread_fork_returns_distinct_thread(tmp_path) -> None:
"""Thread fork should return a distinct thread for a persisted rollout."""
with AppServerHarness(tmp_path) as harness:

View File

@@ -8,6 +8,7 @@ from app_server_harness import (
AppServerHarness,
ev_assistant_message,
ev_completed,
ev_completed_with_usage,
ev_failed,
ev_response_created,
sse,
@@ -51,6 +52,74 @@ def test_sync_thread_run_uses_mock_responses(
}
def test_run_params_and_usage_cross_app_server_boundary(tmp_path) -> None:
"""Thread.run should pass overrides and collect app-server token usage."""
turn_cwd = tmp_path / "turn-cwd"
turn_cwd.mkdir()
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_sse(
sse(
[
ev_response_created("run-overrides"),
ev_assistant_message("msg-run-overrides", "overrides applied"),
ev_completed_with_usage(
"run-overrides",
input_tokens=11,
cached_input_tokens=3,
output_tokens=7,
reasoning_output_tokens=5,
total_tokens=18,
),
]
)
)
with Codex(config=harness.app_server_config()) as codex:
thread = codex.thread_start()
result = thread.run(
"use overrides",
cwd=str(turn_cwd),
model="mock-model-override",
)
read = thread.read()
request = harness.responses.single_request()
usage_payload = None
if result.usage is not None:
dumped_usage = result.usage.model_dump(by_alias=True, mode="json")
usage_payload = {
"last": dumped_usage["last"],
"total": dumped_usage["total"],
}
assert {
"final_response": result.final_response,
"request_model": request.body_json()["model"],
"thread_cwd": read.thread.cwd.root,
"usage": usage_payload,
} == {
"final_response": "overrides applied",
"request_model": "mock-model-override",
"thread_cwd": str(turn_cwd),
"usage": {
"last": {
"cachedInputTokens": 3,
"inputTokens": 11,
"outputTokens": 7,
"reasoningOutputTokens": 5,
"totalTokens": 18,
},
"total": {
"cachedInputTokens": 3,
"inputTokens": 11,
"outputTokens": 7,
"reasoningOutputTokens": 5,
"totalTokens": 18,
},
},
}
def test_async_thread_run_uses_mock_responses(
tmp_path,
) -> None: