Files
codex/sdk/python/tests/test_app_server_run.py
Ahmed Ibrahim d77f543654 Cover SDK app-server integration gaps
Add focused integration coverage for thread listing, persisted history reads, async lifecycle wrappers, skill input injection, and run override/usage behavior through the pinned app-server test harness.

Co-authored-by: Codex <noreply@openai.com>
2026-05-10 15:14:45 +03:00

377 lines
13 KiB
Python

from __future__ import annotations
import asyncio
import pytest
from app_server_harness import (
AppServerHarness,
ev_assistant_message,
ev_completed,
ev_completed_with_usage,
ev_failed,
ev_response_created,
sse,
)
from openai_codex import AsyncCodex, Codex
from openai_codex.generated.v2_all import MessagePhase
from app_server_helpers import (
agent_message_texts_from_items,
assistant_message_with_phase,
)
def test_sync_thread_run_uses_mock_responses(
tmp_path,
) -> None:
"""Drive Thread.run through the pinned app-server and inspect the HTTP request."""
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_assistant_message("Hello from the mock.", response_id="run-1")
with Codex(config=harness.app_server_config()) as codex:
thread = codex.thread_start()
result = thread.run("hello")
request = harness.responses.single_request()
body = request.body_json()
assert {
"final_response": result.final_response,
"agent_messages": agent_message_texts_from_items(result.items),
"has_usage": result.usage is not None,
"request_model": body["model"],
"request_stream": body["stream"],
"request_user_texts": request.message_input_texts("user")[-1:],
} == {
"final_response": "Hello from the mock.",
"agent_messages": ["Hello from the mock."],
"has_usage": True,
"request_model": "mock-model",
"request_stream": True,
"request_user_texts": ["hello"],
}
def test_run_params_and_usage_cross_app_server_boundary(tmp_path) -> None:
"""Thread.run should pass overrides and collect app-server token usage."""
turn_cwd = tmp_path / "turn-cwd"
turn_cwd.mkdir()
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_sse(
sse(
[
ev_response_created("run-overrides"),
ev_assistant_message("msg-run-overrides", "overrides applied"),
ev_completed_with_usage(
"run-overrides",
input_tokens=11,
cached_input_tokens=3,
output_tokens=7,
reasoning_output_tokens=5,
total_tokens=18,
),
]
)
)
with Codex(config=harness.app_server_config()) as codex:
thread = codex.thread_start()
result = thread.run(
"use overrides",
cwd=str(turn_cwd),
model="mock-model-override",
)
read = thread.read()
request = harness.responses.single_request()
usage_payload = None
if result.usage is not None:
dumped_usage = result.usage.model_dump(by_alias=True, mode="json")
usage_payload = {
"last": dumped_usage["last"],
"total": dumped_usage["total"],
}
assert {
"final_response": result.final_response,
"request_model": request.body_json()["model"],
"thread_cwd": read.thread.cwd.root,
"usage": usage_payload,
} == {
"final_response": "overrides applied",
"request_model": "mock-model-override",
"thread_cwd": str(turn_cwd),
"usage": {
"last": {
"cachedInputTokens": 3,
"inputTokens": 11,
"outputTokens": 7,
"reasoningOutputTokens": 5,
"totalTokens": 18,
},
"total": {
"cachedInputTokens": 3,
"inputTokens": 11,
"outputTokens": 7,
"reasoningOutputTokens": 5,
"totalTokens": 18,
},
},
}
def test_async_thread_run_uses_mock_responses(
tmp_path,
) -> None:
"""Async Thread.run should exercise the same app-server boundary."""
async def scenario() -> None:
"""Run the async client against a real app-server process."""
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_assistant_message(
"Hello async.",
response_id="async-run-1",
)
async with AsyncCodex(config=harness.app_server_config()) as codex:
thread = await codex.thread_start()
result = await thread.run("async hello")
request = harness.responses.single_request()
assert {
"final_response": result.final_response,
"agent_messages": agent_message_texts_from_items(result.items),
"request_user_texts": request.message_input_texts("user")[-1:],
} == {
"final_response": "Hello async.",
"agent_messages": ["Hello async."],
"request_user_texts": ["async hello"],
}
asyncio.run(scenario())
def test_sync_run_result_uses_last_unknown_phase_message(tmp_path) -> None:
"""RunResult should use the last unknown-phase agent message as final text."""
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_sse(
sse(
[
ev_response_created("items-last"),
ev_assistant_message("msg-items-first", "First message"),
ev_assistant_message("msg-items-second", "Second message"),
ev_completed("items-last"),
]
)
)
with Codex(config=harness.app_server_config()) as codex:
result = codex.thread_start().run("case: last unknown phase wins")
assert {
"final_response": result.final_response,
"agent_messages": agent_message_texts_from_items(result.items),
} == {
"final_response": "Second message",
"agent_messages": ["First message", "Second message"],
}
def test_sync_run_result_preserves_empty_last_message(tmp_path) -> None:
"""RunResult should preserve an empty final agent message instead of skipping it."""
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_sse(
sse(
[
ev_response_created("items-empty"),
ev_assistant_message("msg-items-nonempty", "First message"),
ev_assistant_message("msg-items-empty", ""),
ev_completed("items-empty"),
]
)
)
with Codex(config=harness.app_server_config()) as codex:
result = codex.thread_start().run("case: empty last message")
assert {
"final_response": result.final_response,
"agent_messages": agent_message_texts_from_items(result.items),
} == {
"final_response": "",
"agent_messages": ["First message", ""],
}
def test_sync_run_result_does_not_promote_commentary_only_to_final(tmp_path) -> None:
"""RunResult final_response should stay unset when app-server marks only commentary."""
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_sse(
sse(
[
ev_response_created("items-commentary"),
assistant_message_with_phase(
"msg-items-commentary",
"Commentary",
MessagePhase.commentary,
),
ev_completed("items-commentary"),
]
)
)
with Codex(config=harness.app_server_config()) as codex:
result = codex.thread_start().run("case: commentary only")
assert {
"final_response": result.final_response,
"agent_messages": agent_message_texts_from_items(result.items),
} == {
"final_response": None,
"agent_messages": ["Commentary"],
}
def test_async_run_result_uses_last_unknown_phase_message(tmp_path) -> None:
"""Async RunResult should use the last unknown-phase agent message."""
async def scenario() -> None:
"""Run one async result-mapping case against a pinned app-server."""
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_sse(
sse(
[
ev_response_created("async-items-last"),
ev_assistant_message(
"msg-async-items-first",
"First async message",
),
ev_assistant_message(
"msg-async-items-second",
"Second async message",
),
ev_completed("async-items-last"),
]
)
)
async with AsyncCodex(config=harness.app_server_config()) as codex:
result = await (await codex.thread_start()).run(
"case: async last unknown phase"
)
assert {
"final_response": result.final_response,
"agent_messages": agent_message_texts_from_items(result.items),
} == {
"final_response": "Second async message",
"agent_messages": ["First async message", "Second async message"],
}
asyncio.run(scenario())
def test_async_run_result_does_not_promote_commentary_only_to_final(
tmp_path,
) -> None:
"""Async RunResult final_response should stay unset for commentary-only output."""
async def scenario() -> None:
"""Run one async commentary mapping case against a pinned app-server."""
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_sse(
sse(
[
ev_response_created("async-items-commentary"),
assistant_message_with_phase(
"msg-async-items-commentary",
"Async commentary",
MessagePhase.commentary,
),
ev_completed("async-items-commentary"),
]
)
)
async with AsyncCodex(config=harness.app_server_config()) as codex:
result = await (await codex.thread_start()).run(
"case: async commentary only"
)
assert {
"final_response": result.final_response,
"agent_messages": agent_message_texts_from_items(result.items),
} == {
"final_response": None,
"agent_messages": ["Async commentary"],
}
asyncio.run(scenario())
def test_thread_run_raises_when_real_app_server_reports_failed_turn(tmp_path) -> None:
"""Thread.run should surface the failed turn error emitted by app-server."""
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_sse(
sse(
[
ev_response_created("failed-run"),
ev_failed("failed-run", "boom from mock model"),
]
)
)
with Codex(config=harness.app_server_config()) as codex:
thread = codex.thread_start()
with pytest.raises(RuntimeError, match="boom from mock model"):
thread.run("trigger failure")
def test_final_answer_phase_survives_real_app_server_mapping(tmp_path) -> None:
"""RunResult should use the final-answer item emitted by app-server."""
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_sse(
sse(
[
ev_response_created("phase-1"),
{
**ev_assistant_message("msg-commentary", "Commentary"),
"item": {
**ev_assistant_message("msg-commentary", "Commentary")["item"],
"phase": MessagePhase.commentary.value,
},
},
{
**ev_assistant_message("msg-final", "Final answer"),
"item": {
**ev_assistant_message("msg-final", "Final answer")["item"],
"phase": MessagePhase.final_answer.value,
},
},
ev_completed("phase-1"),
]
)
)
with Codex(config=harness.app_server_config()) as codex:
result = codex.thread_start().run("choose final answer")
assert {
"final_response": result.final_response,
"items": [
{
"text": item.root.text,
"phase": None if item.root.phase is None else item.root.phase.value,
}
for item in result.items
if item.root.type == "agentMessage"
],
} == {
"final_response": "Final answer",
"items": [
{"text": "Commentary", "phase": MessagePhase.commentary.value},
{"text": "Final answer", "phase": MessagePhase.final_answer.value},
],
}