diff --git a/sdk/python/tests/app_server_harness.py b/sdk/python/tests/app_server_harness.py index bd62e63eda..6fb0f608be 100644 --- a/sdk/python/tests/app_server_harness.py +++ b/sdk/python/tests/app_server_harness.py @@ -364,6 +364,33 @@ def ev_completed(response_id: str) -> Json: } +def ev_completed_with_usage( + response_id: str, + *, + input_tokens: int, + cached_input_tokens: int, + output_tokens: int, + reasoning_output_tokens: int, + total_tokens: int, +) -> Json: + """Return `response.completed` with explicit token accounting.""" + return { + "type": "response.completed", + "response": { + "id": response_id, + "usage": { + "input_tokens": input_tokens, + "input_tokens_details": {"cached_tokens": cached_input_tokens}, + "output_tokens": output_tokens, + "output_tokens_details": { + "reasoning_tokens": reasoning_output_tokens, + }, + "total_tokens": total_tokens, + }, + }, + } + + def ev_assistant_message(item_id: str, text: str) -> Json: """Return a completed assistant message output item.""" return { diff --git a/sdk/python/tests/test_app_server_inputs.py b/sdk/python/tests/test_app_server_inputs.py index dbd4afd4e0..2cf8cd9cc2 100644 --- a/sdk/python/tests/test_app_server_inputs.py +++ b/sdk/python/tests/test_app_server_inputs.py @@ -1,7 +1,7 @@ from __future__ import annotations from app_server_harness import AppServerHarness -from openai_codex import Codex, ImageInput, LocalImageInput, TextInput +from openai_codex import Codex, ImageInput, LocalImageInput, SkillInput, TextInput from app_server_helpers import TINY_PNG_BYTES @@ -72,3 +72,38 @@ def test_local_image_input_reaches_responses_api( "contains_user_prompt": True, "image_url_is_png_data_url": True, } + + +def test_skill_input_injects_skill_body(tmp_path) -> None: + """SkillInput should cross the SDK boundary and reach the model prompt.""" + skill_file = tmp_path / "skills" / "demo" / "SKILL.md" + skill_file.parent.mkdir(parents=True) + skill_file.write_text("# Demo\n\nUse the word cobalt.\n") + + with AppServerHarness(tmp_path) as harness: + harness.responses.enqueue_assistant_message( + "skill received", + response_id="skill-input", + ) + + with Codex(config=harness.app_server_config()) as codex: + result = codex.thread_start().run( + [ + TextInput("Use the selected skill."), + SkillInput("demo", str(skill_file)), + ] + ) + request = harness.responses.single_request() + + assert { + "final_response": result.final_response, + "skill_prompt_was_injected": any( + "\ndemo" in text + and "Use the word cobalt." in text + and str(skill_file) in text + for text in request.message_input_texts("user") + ), + } == { + "final_response": "skill received", + "skill_prompt_was_injected": True, + } diff --git a/sdk/python/tests/test_app_server_lifecycle.py b/sdk/python/tests/test_app_server_lifecycle.py index e08462ff65..644b97585a 100644 --- a/sdk/python/tests/test_app_server_lifecycle.py +++ b/sdk/python/tests/test_app_server_lifecycle.py @@ -1,10 +1,30 @@ from __future__ import annotations +import asyncio + from app_server_harness import AppServerHarness -from openai_codex import Codex +from openai_codex import AsyncCodex, Codex from app_server_helpers import request_kind +def _thread_message_summary(read_response) -> list[tuple[str, str]]: + """Return persisted user/agent messages from a thread read response.""" + messages: list[tuple[str, str]] = [] + for turn in read_response.thread.turns: + for item in turn.items: + root = item.root + if root.type == "userMessage": + text = "\n".join( + input_item.root.text + for input_item in root.content + if input_item.root.type == "text" + ) + messages.append(("user", text)) + if root.type == "agentMessage": + messages.append(("agent", root.text)) + return messages + + def test_thread_set_name_and_read(tmp_path) -> None: """Thread naming should round-trip through app-server JSON-RPC.""" with AppServerHarness(tmp_path) as harness: @@ -18,6 +38,98 @@ def test_thread_set_name_and_read(tmp_path) -> None: } +def test_thread_list_filters_archived_threads(tmp_path) -> None: + """Thread listing should reflect archive state through app-server.""" + with AppServerHarness(tmp_path) as harness: + harness.responses.enqueue_assistant_message("active", response_id="list-active") + harness.responses.enqueue_assistant_message( + "archived", + response_id="list-archived", + ) + + with Codex(config=harness.app_server_config()) as codex: + active_thread = codex.thread_start() + archived_thread = codex.thread_start() + active_thread.run("keep this listed") + archived_thread.run("archive this") + codex.thread_archive(archived_thread.id) + active_list = codex.thread_list(archived=False) + archived_list = codex.thread_list(archived=True) + + expected_ids = {active_thread.id, archived_thread.id} + assert { + "active_ids": sorted( + thread.id for thread in active_list.data if thread.id in expected_ids + ), + "archived_ids": sorted( + thread.id for thread in archived_list.data if thread.id in expected_ids + ), + } == { + "active_ids": [active_thread.id], + "archived_ids": [archived_thread.id], + } + + +def test_read_include_turns_returns_persisted_history(tmp_path) -> None: + """Thread.read(include_turns=True) should load real persisted turn items.""" + with AppServerHarness(tmp_path) as harness: + harness.responses.enqueue_assistant_message("first answer", response_id="read-1") + harness.responses.enqueue_assistant_message("second answer", response_id="read-2") + + with Codex(config=harness.app_server_config()) as codex: + thread = codex.thread_start() + thread.run("first question") + thread.run("second question") + read = thread.read(include_turns=True) + + assert _thread_message_summary(read) == [ + ("user", "first question"), + ("agent", "first answer"), + ("user", "second question"), + ("agent", "second answer"), + ] + + +def test_async_lifecycle_methods_round_trip(tmp_path) -> None: + """Async lifecycle helpers should preserve the same app-server thread state.""" + + async def scenario() -> None: + """Exercise async wrappers over one materialized thread.""" + with AppServerHarness(tmp_path) as harness: + harness.responses.enqueue_assistant_message( + "async materialized", + response_id="async-lifecycle", + ) + + async with AsyncCodex(config=harness.app_server_config()) as codex: + thread = await codex.thread_start() + run_result = await thread.run("materialize async thread") + await thread.set_name("async lifecycle") + named = await thread.read() + resumed = await codex.thread_resume(thread.id) + forked = await codex.thread_fork(thread.id) + archive_response = await codex.thread_archive(thread.id) + unarchived = await codex.thread_unarchive(thread.id) + + assert { + "run_final_response": run_result.final_response, + "named_thread": named.thread.name, + "resumed_id": resumed.id, + "forked_is_distinct": forked.id != thread.id, + "archive_response": archive_response.model_dump(by_alias=True, mode="json"), + "unarchived_id": unarchived.id, + } == { + "run_final_response": "async materialized", + "named_thread": "async lifecycle", + "resumed_id": thread.id, + "forked_is_distinct": True, + "archive_response": {}, + "unarchived_id": thread.id, + } + + asyncio.run(scenario()) + + def test_thread_fork_returns_distinct_thread(tmp_path) -> None: """Thread fork should return a distinct thread for a persisted rollout.""" with AppServerHarness(tmp_path) as harness: diff --git a/sdk/python/tests/test_app_server_run.py b/sdk/python/tests/test_app_server_run.py index 81478d0f7a..35d58db565 100644 --- a/sdk/python/tests/test_app_server_run.py +++ b/sdk/python/tests/test_app_server_run.py @@ -8,6 +8,7 @@ from app_server_harness import ( AppServerHarness, ev_assistant_message, ev_completed, + ev_completed_with_usage, ev_failed, ev_response_created, sse, @@ -51,6 +52,74 @@ def test_sync_thread_run_uses_mock_responses( } +def test_run_params_and_usage_cross_app_server_boundary(tmp_path) -> None: + """Thread.run should pass overrides and collect app-server token usage.""" + turn_cwd = tmp_path / "turn-cwd" + turn_cwd.mkdir() + + with AppServerHarness(tmp_path) as harness: + harness.responses.enqueue_sse( + sse( + [ + ev_response_created("run-overrides"), + ev_assistant_message("msg-run-overrides", "overrides applied"), + ev_completed_with_usage( + "run-overrides", + input_tokens=11, + cached_input_tokens=3, + output_tokens=7, + reasoning_output_tokens=5, + total_tokens=18, + ), + ] + ) + ) + + with Codex(config=harness.app_server_config()) as codex: + thread = codex.thread_start() + result = thread.run( + "use overrides", + cwd=str(turn_cwd), + model="mock-model-override", + ) + read = thread.read() + request = harness.responses.single_request() + + usage_payload = None + if result.usage is not None: + dumped_usage = result.usage.model_dump(by_alias=True, mode="json") + usage_payload = { + "last": dumped_usage["last"], + "total": dumped_usage["total"], + } + assert { + "final_response": result.final_response, + "request_model": request.body_json()["model"], + "thread_cwd": read.thread.cwd.root, + "usage": usage_payload, + } == { + "final_response": "overrides applied", + "request_model": "mock-model-override", + "thread_cwd": str(turn_cwd), + "usage": { + "last": { + "cachedInputTokens": 3, + "inputTokens": 11, + "outputTokens": 7, + "reasoningOutputTokens": 5, + "totalTokens": 18, + }, + "total": { + "cachedInputTokens": 3, + "inputTokens": 11, + "outputTokens": 7, + "reasoningOutputTokens": 5, + "totalTokens": 18, + }, + }, + } + + def test_async_thread_run_uses_mock_responses( tmp_path, ) -> None: