Files
codex/sdk/python/tests/test_app_server_run.py
Ahmed Ibrahim c3e22fe134 Tighten SDK integration assertions
Assert skill inputs as persisted structured history and keep run override coverage to the model request plus token usage, matching the public SDK behavior exercised by the harness.

Co-authored-by: Codex <noreply@openai.com>
2026-05-10 15:17:20 +03:00

370 lines
13 KiB
Python

from __future__ import annotations
import asyncio
import pytest
from app_server_harness import (
AppServerHarness,
ev_assistant_message,
ev_completed,
ev_completed_with_usage,
ev_failed,
ev_response_created,
sse,
)
from openai_codex import AsyncCodex, Codex
from openai_codex.generated.v2_all import MessagePhase
from app_server_helpers import (
agent_message_texts_from_items,
assistant_message_with_phase,
)
def test_sync_thread_run_uses_mock_responses(
tmp_path,
) -> None:
"""Drive Thread.run through the pinned app-server and inspect the HTTP request."""
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_assistant_message("Hello from the mock.", response_id="run-1")
with Codex(config=harness.app_server_config()) as codex:
thread = codex.thread_start()
result = thread.run("hello")
request = harness.responses.single_request()
body = request.body_json()
assert {
"final_response": result.final_response,
"agent_messages": agent_message_texts_from_items(result.items),
"has_usage": result.usage is not None,
"request_model": body["model"],
"request_stream": body["stream"],
"request_user_texts": request.message_input_texts("user")[-1:],
} == {
"final_response": "Hello from the mock.",
"agent_messages": ["Hello from the mock."],
"has_usage": True,
"request_model": "mock-model",
"request_stream": True,
"request_user_texts": ["hello"],
}
def test_run_params_and_usage_cross_app_server_boundary(tmp_path) -> None:
"""Thread.run should pass overrides and collect app-server token usage."""
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_sse(
sse(
[
ev_response_created("run-overrides"),
ev_assistant_message("msg-run-overrides", "overrides applied"),
ev_completed_with_usage(
"run-overrides",
input_tokens=11,
cached_input_tokens=3,
output_tokens=7,
reasoning_output_tokens=5,
total_tokens=18,
),
]
)
)
with Codex(config=harness.app_server_config()) as codex:
thread = codex.thread_start()
result = thread.run(
"use overrides",
model="mock-model-override",
)
request = harness.responses.single_request()
usage_payload = None
if result.usage is not None:
dumped_usage = result.usage.model_dump(by_alias=True, mode="json")
usage_payload = {
"last": dumped_usage["last"],
"total": dumped_usage["total"],
}
assert {
"final_response": result.final_response,
"request_model": request.body_json()["model"],
"usage": usage_payload,
} == {
"final_response": "overrides applied",
"request_model": "mock-model-override",
"usage": {
"last": {
"cachedInputTokens": 3,
"inputTokens": 11,
"outputTokens": 7,
"reasoningOutputTokens": 5,
"totalTokens": 18,
},
"total": {
"cachedInputTokens": 3,
"inputTokens": 11,
"outputTokens": 7,
"reasoningOutputTokens": 5,
"totalTokens": 18,
},
},
}
def test_async_thread_run_uses_mock_responses(
tmp_path,
) -> None:
"""Async Thread.run should exercise the same app-server boundary."""
async def scenario() -> None:
"""Run the async client against a real app-server process."""
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_assistant_message(
"Hello async.",
response_id="async-run-1",
)
async with AsyncCodex(config=harness.app_server_config()) as codex:
thread = await codex.thread_start()
result = await thread.run("async hello")
request = harness.responses.single_request()
assert {
"final_response": result.final_response,
"agent_messages": agent_message_texts_from_items(result.items),
"request_user_texts": request.message_input_texts("user")[-1:],
} == {
"final_response": "Hello async.",
"agent_messages": ["Hello async."],
"request_user_texts": ["async hello"],
}
asyncio.run(scenario())
def test_sync_run_result_uses_last_unknown_phase_message(tmp_path) -> None:
"""RunResult should use the last unknown-phase agent message as final text."""
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_sse(
sse(
[
ev_response_created("items-last"),
ev_assistant_message("msg-items-first", "First message"),
ev_assistant_message("msg-items-second", "Second message"),
ev_completed("items-last"),
]
)
)
with Codex(config=harness.app_server_config()) as codex:
result = codex.thread_start().run("case: last unknown phase wins")
assert {
"final_response": result.final_response,
"agent_messages": agent_message_texts_from_items(result.items),
} == {
"final_response": "Second message",
"agent_messages": ["First message", "Second message"],
}
def test_sync_run_result_preserves_empty_last_message(tmp_path) -> None:
"""RunResult should preserve an empty final agent message instead of skipping it."""
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_sse(
sse(
[
ev_response_created("items-empty"),
ev_assistant_message("msg-items-nonempty", "First message"),
ev_assistant_message("msg-items-empty", ""),
ev_completed("items-empty"),
]
)
)
with Codex(config=harness.app_server_config()) as codex:
result = codex.thread_start().run("case: empty last message")
assert {
"final_response": result.final_response,
"agent_messages": agent_message_texts_from_items(result.items),
} == {
"final_response": "",
"agent_messages": ["First message", ""],
}
def test_sync_run_result_does_not_promote_commentary_only_to_final(tmp_path) -> None:
"""RunResult final_response should stay unset when app-server marks only commentary."""
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_sse(
sse(
[
ev_response_created("items-commentary"),
assistant_message_with_phase(
"msg-items-commentary",
"Commentary",
MessagePhase.commentary,
),
ev_completed("items-commentary"),
]
)
)
with Codex(config=harness.app_server_config()) as codex:
result = codex.thread_start().run("case: commentary only")
assert {
"final_response": result.final_response,
"agent_messages": agent_message_texts_from_items(result.items),
} == {
"final_response": None,
"agent_messages": ["Commentary"],
}
def test_async_run_result_uses_last_unknown_phase_message(tmp_path) -> None:
"""Async RunResult should use the last unknown-phase agent message."""
async def scenario() -> None:
"""Run one async result-mapping case against a pinned app-server."""
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_sse(
sse(
[
ev_response_created("async-items-last"),
ev_assistant_message(
"msg-async-items-first",
"First async message",
),
ev_assistant_message(
"msg-async-items-second",
"Second async message",
),
ev_completed("async-items-last"),
]
)
)
async with AsyncCodex(config=harness.app_server_config()) as codex:
result = await (await codex.thread_start()).run(
"case: async last unknown phase"
)
assert {
"final_response": result.final_response,
"agent_messages": agent_message_texts_from_items(result.items),
} == {
"final_response": "Second async message",
"agent_messages": ["First async message", "Second async message"],
}
asyncio.run(scenario())
def test_async_run_result_does_not_promote_commentary_only_to_final(
tmp_path,
) -> None:
"""Async RunResult final_response should stay unset for commentary-only output."""
async def scenario() -> None:
"""Run one async commentary mapping case against a pinned app-server."""
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_sse(
sse(
[
ev_response_created("async-items-commentary"),
assistant_message_with_phase(
"msg-async-items-commentary",
"Async commentary",
MessagePhase.commentary,
),
ev_completed("async-items-commentary"),
]
)
)
async with AsyncCodex(config=harness.app_server_config()) as codex:
result = await (await codex.thread_start()).run(
"case: async commentary only"
)
assert {
"final_response": result.final_response,
"agent_messages": agent_message_texts_from_items(result.items),
} == {
"final_response": None,
"agent_messages": ["Async commentary"],
}
asyncio.run(scenario())
def test_thread_run_raises_when_real_app_server_reports_failed_turn(tmp_path) -> None:
"""Thread.run should surface the failed turn error emitted by app-server."""
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_sse(
sse(
[
ev_response_created("failed-run"),
ev_failed("failed-run", "boom from mock model"),
]
)
)
with Codex(config=harness.app_server_config()) as codex:
thread = codex.thread_start()
with pytest.raises(RuntimeError, match="boom from mock model"):
thread.run("trigger failure")
def test_final_answer_phase_survives_real_app_server_mapping(tmp_path) -> None:
"""RunResult should use the final-answer item emitted by app-server."""
with AppServerHarness(tmp_path) as harness:
harness.responses.enqueue_sse(
sse(
[
ev_response_created("phase-1"),
{
**ev_assistant_message("msg-commentary", "Commentary"),
"item": {
**ev_assistant_message("msg-commentary", "Commentary")["item"],
"phase": MessagePhase.commentary.value,
},
},
{
**ev_assistant_message("msg-final", "Final answer"),
"item": {
**ev_assistant_message("msg-final", "Final answer")["item"],
"phase": MessagePhase.final_answer.value,
},
},
ev_completed("phase-1"),
]
)
)
with Codex(config=harness.app_server_config()) as codex:
result = codex.thread_start().run("choose final answer")
assert {
"final_response": result.final_response,
"items": [
{
"text": item.root.text,
"phase": None if item.root.phase is None else item.root.phase.value,
}
for item in result.items
if item.root.type == "agentMessage"
],
} == {
"final_response": "Final answer",
"items": [
{"text": "Commentary", "phase": MessagePhase.commentary.value},
{"text": "Final answer", "phase": MessagePhase.final_answer.value},
],
}