mirror of
https://github.com/openai/codex.git
synced 2026-05-29 15:30:22 +00:00
Cover SDK app-server integration gaps
Add focused integration coverage for thread listing, persisted history reads, async lifecycle wrappers, skill input injection, and run override/usage behavior through the pinned app-server test harness. Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
@@ -364,6 +364,33 @@ def ev_completed(response_id: str) -> Json:
|
||||
}
|
||||
|
||||
|
||||
def ev_completed_with_usage(
|
||||
response_id: str,
|
||||
*,
|
||||
input_tokens: int,
|
||||
cached_input_tokens: int,
|
||||
output_tokens: int,
|
||||
reasoning_output_tokens: int,
|
||||
total_tokens: int,
|
||||
) -> Json:
|
||||
"""Return `response.completed` with explicit token accounting."""
|
||||
return {
|
||||
"type": "response.completed",
|
||||
"response": {
|
||||
"id": response_id,
|
||||
"usage": {
|
||||
"input_tokens": input_tokens,
|
||||
"input_tokens_details": {"cached_tokens": cached_input_tokens},
|
||||
"output_tokens": output_tokens,
|
||||
"output_tokens_details": {
|
||||
"reasoning_tokens": reasoning_output_tokens,
|
||||
},
|
||||
"total_tokens": total_tokens,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def ev_assistant_message(item_id: str, text: str) -> Json:
|
||||
"""Return a completed assistant message output item."""
|
||||
return {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from app_server_harness import AppServerHarness
|
||||
from openai_codex import Codex, ImageInput, LocalImageInput, TextInput
|
||||
from openai_codex import Codex, ImageInput, LocalImageInput, SkillInput, TextInput
|
||||
from app_server_helpers import TINY_PNG_BYTES
|
||||
|
||||
|
||||
@@ -72,3 +72,38 @@ def test_local_image_input_reaches_responses_api(
|
||||
"contains_user_prompt": True,
|
||||
"image_url_is_png_data_url": True,
|
||||
}
|
||||
|
||||
|
||||
def test_skill_input_injects_skill_body(tmp_path) -> None:
|
||||
"""SkillInput should cross the SDK boundary and reach the model prompt."""
|
||||
skill_file = tmp_path / "skills" / "demo" / "SKILL.md"
|
||||
skill_file.parent.mkdir(parents=True)
|
||||
skill_file.write_text("# Demo\n\nUse the word cobalt.\n")
|
||||
|
||||
with AppServerHarness(tmp_path) as harness:
|
||||
harness.responses.enqueue_assistant_message(
|
||||
"skill received",
|
||||
response_id="skill-input",
|
||||
)
|
||||
|
||||
with Codex(config=harness.app_server_config()) as codex:
|
||||
result = codex.thread_start().run(
|
||||
[
|
||||
TextInput("Use the selected skill."),
|
||||
SkillInput("demo", str(skill_file)),
|
||||
]
|
||||
)
|
||||
request = harness.responses.single_request()
|
||||
|
||||
assert {
|
||||
"final_response": result.final_response,
|
||||
"skill_prompt_was_injected": any(
|
||||
"<skill>\n<name>demo</name>" in text
|
||||
and "Use the word cobalt." in text
|
||||
and str(skill_file) in text
|
||||
for text in request.message_input_texts("user")
|
||||
),
|
||||
} == {
|
||||
"final_response": "skill received",
|
||||
"skill_prompt_was_injected": True,
|
||||
}
|
||||
|
||||
@@ -1,10 +1,30 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
|
||||
from app_server_harness import AppServerHarness
|
||||
from openai_codex import Codex
|
||||
from openai_codex import AsyncCodex, Codex
|
||||
from app_server_helpers import request_kind
|
||||
|
||||
|
||||
def _thread_message_summary(read_response) -> list[tuple[str, str]]:
|
||||
"""Return persisted user/agent messages from a thread read response."""
|
||||
messages: list[tuple[str, str]] = []
|
||||
for turn in read_response.thread.turns:
|
||||
for item in turn.items:
|
||||
root = item.root
|
||||
if root.type == "userMessage":
|
||||
text = "\n".join(
|
||||
input_item.root.text
|
||||
for input_item in root.content
|
||||
if input_item.root.type == "text"
|
||||
)
|
||||
messages.append(("user", text))
|
||||
if root.type == "agentMessage":
|
||||
messages.append(("agent", root.text))
|
||||
return messages
|
||||
|
||||
|
||||
def test_thread_set_name_and_read(tmp_path) -> None:
|
||||
"""Thread naming should round-trip through app-server JSON-RPC."""
|
||||
with AppServerHarness(tmp_path) as harness:
|
||||
@@ -18,6 +38,98 @@ def test_thread_set_name_and_read(tmp_path) -> None:
|
||||
}
|
||||
|
||||
|
||||
def test_thread_list_filters_archived_threads(tmp_path) -> None:
|
||||
"""Thread listing should reflect archive state through app-server."""
|
||||
with AppServerHarness(tmp_path) as harness:
|
||||
harness.responses.enqueue_assistant_message("active", response_id="list-active")
|
||||
harness.responses.enqueue_assistant_message(
|
||||
"archived",
|
||||
response_id="list-archived",
|
||||
)
|
||||
|
||||
with Codex(config=harness.app_server_config()) as codex:
|
||||
active_thread = codex.thread_start()
|
||||
archived_thread = codex.thread_start()
|
||||
active_thread.run("keep this listed")
|
||||
archived_thread.run("archive this")
|
||||
codex.thread_archive(archived_thread.id)
|
||||
active_list = codex.thread_list(archived=False)
|
||||
archived_list = codex.thread_list(archived=True)
|
||||
|
||||
expected_ids = {active_thread.id, archived_thread.id}
|
||||
assert {
|
||||
"active_ids": sorted(
|
||||
thread.id for thread in active_list.data if thread.id in expected_ids
|
||||
),
|
||||
"archived_ids": sorted(
|
||||
thread.id for thread in archived_list.data if thread.id in expected_ids
|
||||
),
|
||||
} == {
|
||||
"active_ids": [active_thread.id],
|
||||
"archived_ids": [archived_thread.id],
|
||||
}
|
||||
|
||||
|
||||
def test_read_include_turns_returns_persisted_history(tmp_path) -> None:
|
||||
"""Thread.read(include_turns=True) should load real persisted turn items."""
|
||||
with AppServerHarness(tmp_path) as harness:
|
||||
harness.responses.enqueue_assistant_message("first answer", response_id="read-1")
|
||||
harness.responses.enqueue_assistant_message("second answer", response_id="read-2")
|
||||
|
||||
with Codex(config=harness.app_server_config()) as codex:
|
||||
thread = codex.thread_start()
|
||||
thread.run("first question")
|
||||
thread.run("second question")
|
||||
read = thread.read(include_turns=True)
|
||||
|
||||
assert _thread_message_summary(read) == [
|
||||
("user", "first question"),
|
||||
("agent", "first answer"),
|
||||
("user", "second question"),
|
||||
("agent", "second answer"),
|
||||
]
|
||||
|
||||
|
||||
def test_async_lifecycle_methods_round_trip(tmp_path) -> None:
|
||||
"""Async lifecycle helpers should preserve the same app-server thread state."""
|
||||
|
||||
async def scenario() -> None:
|
||||
"""Exercise async wrappers over one materialized thread."""
|
||||
with AppServerHarness(tmp_path) as harness:
|
||||
harness.responses.enqueue_assistant_message(
|
||||
"async materialized",
|
||||
response_id="async-lifecycle",
|
||||
)
|
||||
|
||||
async with AsyncCodex(config=harness.app_server_config()) as codex:
|
||||
thread = await codex.thread_start()
|
||||
run_result = await thread.run("materialize async thread")
|
||||
await thread.set_name("async lifecycle")
|
||||
named = await thread.read()
|
||||
resumed = await codex.thread_resume(thread.id)
|
||||
forked = await codex.thread_fork(thread.id)
|
||||
archive_response = await codex.thread_archive(thread.id)
|
||||
unarchived = await codex.thread_unarchive(thread.id)
|
||||
|
||||
assert {
|
||||
"run_final_response": run_result.final_response,
|
||||
"named_thread": named.thread.name,
|
||||
"resumed_id": resumed.id,
|
||||
"forked_is_distinct": forked.id != thread.id,
|
||||
"archive_response": archive_response.model_dump(by_alias=True, mode="json"),
|
||||
"unarchived_id": unarchived.id,
|
||||
} == {
|
||||
"run_final_response": "async materialized",
|
||||
"named_thread": "async lifecycle",
|
||||
"resumed_id": thread.id,
|
||||
"forked_is_distinct": True,
|
||||
"archive_response": {},
|
||||
"unarchived_id": thread.id,
|
||||
}
|
||||
|
||||
asyncio.run(scenario())
|
||||
|
||||
|
||||
def test_thread_fork_returns_distinct_thread(tmp_path) -> None:
|
||||
"""Thread fork should return a distinct thread for a persisted rollout."""
|
||||
with AppServerHarness(tmp_path) as harness:
|
||||
|
||||
@@ -8,6 +8,7 @@ from app_server_harness import (
|
||||
AppServerHarness,
|
||||
ev_assistant_message,
|
||||
ev_completed,
|
||||
ev_completed_with_usage,
|
||||
ev_failed,
|
||||
ev_response_created,
|
||||
sse,
|
||||
@@ -51,6 +52,74 @@ def test_sync_thread_run_uses_mock_responses(
|
||||
}
|
||||
|
||||
|
||||
def test_run_params_and_usage_cross_app_server_boundary(tmp_path) -> None:
|
||||
"""Thread.run should pass overrides and collect app-server token usage."""
|
||||
turn_cwd = tmp_path / "turn-cwd"
|
||||
turn_cwd.mkdir()
|
||||
|
||||
with AppServerHarness(tmp_path) as harness:
|
||||
harness.responses.enqueue_sse(
|
||||
sse(
|
||||
[
|
||||
ev_response_created("run-overrides"),
|
||||
ev_assistant_message("msg-run-overrides", "overrides applied"),
|
||||
ev_completed_with_usage(
|
||||
"run-overrides",
|
||||
input_tokens=11,
|
||||
cached_input_tokens=3,
|
||||
output_tokens=7,
|
||||
reasoning_output_tokens=5,
|
||||
total_tokens=18,
|
||||
),
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
with Codex(config=harness.app_server_config()) as codex:
|
||||
thread = codex.thread_start()
|
||||
result = thread.run(
|
||||
"use overrides",
|
||||
cwd=str(turn_cwd),
|
||||
model="mock-model-override",
|
||||
)
|
||||
read = thread.read()
|
||||
request = harness.responses.single_request()
|
||||
|
||||
usage_payload = None
|
||||
if result.usage is not None:
|
||||
dumped_usage = result.usage.model_dump(by_alias=True, mode="json")
|
||||
usage_payload = {
|
||||
"last": dumped_usage["last"],
|
||||
"total": dumped_usage["total"],
|
||||
}
|
||||
assert {
|
||||
"final_response": result.final_response,
|
||||
"request_model": request.body_json()["model"],
|
||||
"thread_cwd": read.thread.cwd.root,
|
||||
"usage": usage_payload,
|
||||
} == {
|
||||
"final_response": "overrides applied",
|
||||
"request_model": "mock-model-override",
|
||||
"thread_cwd": str(turn_cwd),
|
||||
"usage": {
|
||||
"last": {
|
||||
"cachedInputTokens": 3,
|
||||
"inputTokens": 11,
|
||||
"outputTokens": 7,
|
||||
"reasoningOutputTokens": 5,
|
||||
"totalTokens": 18,
|
||||
},
|
||||
"total": {
|
||||
"cachedInputTokens": 3,
|
||||
"inputTokens": 11,
|
||||
"outputTokens": 7,
|
||||
"reasoningOutputTokens": 5,
|
||||
"totalTokens": 18,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def test_async_thread_run_uses_mock_responses(
|
||||
tmp_path,
|
||||
) -> None:
|
||||
|
||||
Reference in New Issue
Block a user