mirror of
https://github.com/openai/codex.git
synced 2026-05-21 03:33:41 +00:00
[codex] Return TurnResult from Python turn handles (#23151)
## Why
`TurnHandle.run()` returned the raw app-server `Turn`, whose live
start/completed payloads do not include loaded `items`, so users saw
empty `items` after starting a turn. That made the handle-based path
behave differently from `Thread.run(...)`, and pushed examples toward
persisted-thread reads plus helper extraction.
This PR makes the run APIs standalone: starting a turn and running it
returns collected turn data directly, or fails visibly when required
stream events are missing.
## What Changed
- Replaces the public `RunResult` export with `TurnResult`.
- Adds turn metadata to `TurnResult`: `id`, `status`, `error`,
`started_at`, `completed_at`, and `duration_ms`, alongside
`final_response`, `items`, and `usage`.
- Changes `TurnHandle.run()` and `AsyncTurnHandle.run()` to consume
stream events with the same collector used by `Thread.run(...)`.
- Exports `TurnError` from `openai_codex.types` for the new result
shape.
- Updates tests, examples, docs, and the walkthrough notebook to use
`result.final_response` and `result.items` directly.
- Removes persisted-thread helper paths and placeholder/skipped control
flows from the public examples and notebook.
## Verification
- `python3 -m py_compile ...` over changed SDK, example, and test Python
files.
- `python3 -c "import json;
json.load(open('sdk/python/notebooks/sdk_walkthrough.ipynb'))"`
- `git diff --check`
- `PYTHONPATH=sdk/python/src python3 -c ...` import/signature smoke for
`TurnResult`, `TurnHandle.run`, and `AsyncTurnHandle.run`.
This commit is contained in:
@@ -111,7 +111,7 @@ def agent_message_texts(events: list[Notification]) -> list[str]:
|
||||
|
||||
|
||||
def agent_message_texts_from_items(items: Iterable[Any]) -> list[str]:
|
||||
"""Extract agent-message text from completed run result items."""
|
||||
"""Extract agent-message text from completed turn result items."""
|
||||
texts: list[str] = []
|
||||
for item in items:
|
||||
root = item.root
|
||||
|
||||
@@ -157,7 +157,7 @@ def test_async_lifecycle_methods_round_trip(tmp_path) -> None:
|
||||
|
||||
async with AsyncCodex(config=harness.app_server_config()) as codex:
|
||||
thread = await codex.thread_start()
|
||||
run_result = await thread.run("materialize async thread")
|
||||
turn_result = await thread.run("materialize async thread")
|
||||
await thread.set_name("async lifecycle")
|
||||
named = await thread.read()
|
||||
resumed = await codex.thread_resume(thread.id)
|
||||
@@ -166,14 +166,14 @@ def test_async_lifecycle_methods_round_trip(tmp_path) -> None:
|
||||
unarchived = await codex.thread_unarchive(thread.id)
|
||||
|
||||
assert {
|
||||
"run_final_response": run_result.final_response,
|
||||
"turn_final_response": turn_result.final_response,
|
||||
"named_thread": named.thread.name,
|
||||
"resumed_id": resumed.id,
|
||||
"forked_is_distinct": forked.id != thread.id,
|
||||
"archive_response": archive_response.model_dump(by_alias=True, mode="json"),
|
||||
"unarchived_id": unarchived.id,
|
||||
} == {
|
||||
"run_final_response": "async materialized",
|
||||
"turn_final_response": "async materialized",
|
||||
"named_thread": "async lifecycle",
|
||||
"resumed_id": thread.id,
|
||||
"forked_is_distinct": True,
|
||||
@@ -253,19 +253,19 @@ def test_compact_rpc_hits_mock_responses(tmp_path) -> None:
|
||||
|
||||
with Codex(config=harness.app_server_config()) as codex:
|
||||
thread = codex.thread_start()
|
||||
run_result = thread.run("create history")
|
||||
turn_result = thread.run("create history")
|
||||
compact_response = thread.compact()
|
||||
requests = harness.responses.wait_for_requests(2)
|
||||
|
||||
assert {
|
||||
"run_final_response": run_result.final_response,
|
||||
"turn_final_response": turn_result.final_response,
|
||||
"compact_response": compact_response.model_dump(
|
||||
by_alias=True,
|
||||
mode="json",
|
||||
),
|
||||
"request_kinds": [request_kind(request.path) for request in requests],
|
||||
} == {
|
||||
"run_final_response": "history",
|
||||
"turn_final_response": "history",
|
||||
"compact_response": {},
|
||||
"request_kinds": ["responses", "responses"],
|
||||
}
|
||||
|
||||
@@ -145,8 +145,8 @@ def test_async_thread_run_uses_mock_responses(
|
||||
asyncio.run(scenario())
|
||||
|
||||
|
||||
def test_sync_run_result_uses_last_unknown_phase_message(tmp_path) -> None:
|
||||
"""RunResult should use the last unknown-phase agent message as final text."""
|
||||
def test_sync_turn_result_uses_last_unknown_phase_message(tmp_path) -> None:
|
||||
"""TurnResult should use the last unknown-phase agent message as final text."""
|
||||
with AppServerHarness(tmp_path) as harness:
|
||||
harness.responses.enqueue_sse(
|
||||
sse(
|
||||
@@ -171,8 +171,8 @@ def test_sync_run_result_uses_last_unknown_phase_message(tmp_path) -> None:
|
||||
}
|
||||
|
||||
|
||||
def test_sync_run_result_preserves_empty_last_message(tmp_path) -> None:
|
||||
"""RunResult should preserve an empty final agent message instead of skipping it."""
|
||||
def test_sync_turn_result_preserves_empty_last_message(tmp_path) -> None:
|
||||
"""TurnResult should preserve an empty final agent message instead of skipping it."""
|
||||
with AppServerHarness(tmp_path) as harness:
|
||||
harness.responses.enqueue_sse(
|
||||
sse(
|
||||
@@ -197,8 +197,8 @@ def test_sync_run_result_preserves_empty_last_message(tmp_path) -> None:
|
||||
}
|
||||
|
||||
|
||||
def test_sync_run_result_does_not_promote_commentary_only_to_final(tmp_path) -> None:
|
||||
"""RunResult final_response should stay unset when app-server marks only commentary."""
|
||||
def test_sync_turn_result_does_not_promote_commentary_only_to_final(tmp_path) -> None:
|
||||
"""TurnResult final_response should stay unset when app-server marks only commentary."""
|
||||
with AppServerHarness(tmp_path) as harness:
|
||||
harness.responses.enqueue_sse(
|
||||
sse(
|
||||
@@ -226,8 +226,8 @@ def test_sync_run_result_does_not_promote_commentary_only_to_final(tmp_path) ->
|
||||
}
|
||||
|
||||
|
||||
def test_async_run_result_uses_last_unknown_phase_message(tmp_path) -> None:
|
||||
"""Async RunResult should use the last unknown-phase agent message."""
|
||||
def test_async_turn_result_uses_last_unknown_phase_message(tmp_path) -> None:
|
||||
"""Async TurnResult should use the last unknown-phase agent message."""
|
||||
|
||||
async def scenario() -> None:
|
||||
"""Run one async result-mapping case against a pinned app-server."""
|
||||
@@ -263,10 +263,10 @@ def test_async_run_result_uses_last_unknown_phase_message(tmp_path) -> None:
|
||||
asyncio.run(scenario())
|
||||
|
||||
|
||||
def test_async_run_result_does_not_promote_commentary_only_to_final(
|
||||
def test_async_turn_result_does_not_promote_commentary_only_to_final(
|
||||
tmp_path,
|
||||
) -> None:
|
||||
"""Async RunResult final_response should stay unset for commentary-only output."""
|
||||
"""Async TurnResult final_response should stay unset for commentary-only output."""
|
||||
|
||||
async def scenario() -> None:
|
||||
"""Run one async commentary mapping case against a pinned app-server."""
|
||||
@@ -318,7 +318,7 @@ def test_thread_run_raises_when_real_app_server_reports_failed_turn(tmp_path) ->
|
||||
|
||||
|
||||
def test_final_answer_phase_survives_real_app_server_mapping(tmp_path) -> None:
|
||||
"""RunResult should use the final-answer item emitted by app-server."""
|
||||
"""TurnResult should use the final-answer item emitted by app-server."""
|
||||
with AppServerHarness(tmp_path) as harness:
|
||||
harness.responses.enqueue_sse(
|
||||
sse(
|
||||
|
||||
@@ -5,6 +5,7 @@ import asyncio
|
||||
from app_server_harness import AppServerHarness
|
||||
from app_server_helpers import (
|
||||
agent_message_texts,
|
||||
agent_message_texts_from_items,
|
||||
next_async_delta,
|
||||
next_sync_delta,
|
||||
streaming_response,
|
||||
@@ -48,7 +49,7 @@ def test_sync_stream_routes_text_deltas_and_completion(tmp_path) -> None:
|
||||
|
||||
|
||||
def test_turn_run_returns_completed_turn(tmp_path) -> None:
|
||||
"""TurnHandle.run should wait for the app-server completion notification."""
|
||||
"""TurnHandle.run should collect output and completion metadata."""
|
||||
with AppServerHarness(tmp_path) as harness:
|
||||
harness.responses.enqueue_assistant_message("turn complete", response_id="turn-run-1")
|
||||
|
||||
@@ -60,11 +61,13 @@ def test_turn_run_returns_completed_turn(tmp_path) -> None:
|
||||
assert {
|
||||
"turn_id": completed.id,
|
||||
"status": completed.status,
|
||||
"items": completed.items,
|
||||
"agent_messages": agent_message_texts_from_items(completed.items),
|
||||
"final_response": completed.final_response,
|
||||
} == {
|
||||
"turn_id": turn.id,
|
||||
"status": TurnStatus.completed,
|
||||
"items": [],
|
||||
"agent_messages": ["turn complete"],
|
||||
"final_response": "turn complete",
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -14,9 +14,11 @@ from openai_codex import (
|
||||
AppServerConfig,
|
||||
AsyncCodex,
|
||||
AsyncThread,
|
||||
AsyncTurnHandle,
|
||||
Codex,
|
||||
RunResult,
|
||||
Thread,
|
||||
TurnHandle,
|
||||
TurnResult,
|
||||
)
|
||||
from openai_codex._initialize_metadata import validate_initialize_metadata
|
||||
from openai_codex.types import InitializeResponse
|
||||
@@ -35,7 +37,7 @@ EXPECTED_ROOT_EXPORTS = [
|
||||
"AsyncThread",
|
||||
"TurnHandle",
|
||||
"AsyncTurnHandle",
|
||||
"RunResult",
|
||||
"TurnResult",
|
||||
"Input",
|
||||
"InputItem",
|
||||
"TextInput",
|
||||
@@ -92,6 +94,7 @@ EXPECTED_TYPES_EXPORTS = [
|
||||
"ThreadTokenUsageUpdatedNotification",
|
||||
"Turn",
|
||||
"TurnCompletedNotification",
|
||||
"TurnError",
|
||||
"TurnInterruptResponse",
|
||||
"TurnStatus",
|
||||
"TurnSteerResponse",
|
||||
@@ -128,9 +131,39 @@ def test_root_exports_app_server_config() -> None:
|
||||
assert AppServerConfig.__name__ == "AppServerConfig"
|
||||
|
||||
|
||||
def test_root_exports_run_result() -> None:
|
||||
"""The root package should expose the common-case run result wrapper."""
|
||||
assert RunResult.__name__ == "RunResult"
|
||||
def test_root_exports_turn_result() -> None:
|
||||
"""The root package should expose the collected turn result wrapper."""
|
||||
assert {
|
||||
"name": TurnResult.__name__,
|
||||
"fields": list(TurnResult.__dataclass_fields__),
|
||||
} == {
|
||||
"name": "TurnResult",
|
||||
"fields": [
|
||||
"id",
|
||||
"status",
|
||||
"error",
|
||||
"started_at",
|
||||
"completed_at",
|
||||
"duration_ms",
|
||||
"final_response",
|
||||
"items",
|
||||
"usage",
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def test_turn_run_methods_return_turn_result() -> None:
|
||||
"""Both convenience and handle-based run APIs return the same result shape."""
|
||||
funcs = [
|
||||
Thread.run,
|
||||
TurnHandle.run,
|
||||
AsyncThread.run,
|
||||
AsyncTurnHandle.run,
|
||||
]
|
||||
|
||||
assert {fn: inspect.signature(fn).return_annotation for fn in funcs} == dict.fromkeys(
|
||||
funcs, "TurnResult"
|
||||
)
|
||||
|
||||
|
||||
def test_root_exports_approval_mode() -> None:
|
||||
|
||||
@@ -135,7 +135,7 @@ def _run_python(
|
||||
)
|
||||
|
||||
|
||||
def _runtime_compatibility_hint(
|
||||
def _runtime_schema_hint(
|
||||
runtime_env: PreparedRuntimeEnv,
|
||||
*,
|
||||
stdout: str,
|
||||
@@ -144,7 +144,7 @@ def _runtime_compatibility_hint(
|
||||
combined = f"{stdout}\n{stderr}"
|
||||
if "ThreadStartResponse" in combined and "approvalsReviewer" in combined:
|
||||
return (
|
||||
"\nCompatibility hint:\n"
|
||||
"\nSchema hint:\n"
|
||||
f"Pinned runtime {runtime_env.runtime_version} returned a thread/start payload "
|
||||
"that is older than the current SDK schema and is missing "
|
||||
"`approvalsReviewer`. Bump `sdk/python/_runtime_setup.py` to a matching "
|
||||
@@ -165,7 +165,7 @@ def _run_json_python(
|
||||
"Python snippet failed.\n"
|
||||
f"STDOUT:\n{result.stdout}\n"
|
||||
f"STDERR:\n{result.stderr}"
|
||||
f"{_runtime_compatibility_hint(runtime_env, stdout=result.stdout, stderr=result.stderr)}"
|
||||
f"{_runtime_schema_hint(runtime_env, stdout=result.stdout, stderr=result.stderr)}"
|
||||
)
|
||||
return json.loads(result.stdout)
|
||||
|
||||
@@ -242,17 +242,12 @@ def test_real_thread_and_turn_start_smoke(runtime_env: PreparedRuntimeEnv) -> No
|
||||
config={"model_reasoning_effort": "high"},
|
||||
)
|
||||
result = thread.turn(TextInput("hello")).run()
|
||||
persisted = thread.read(include_turns=True)
|
||||
persisted_turn = next(
|
||||
(turn for turn in persisted.thread.turns or [] if turn.id == result.id),
|
||||
None,
|
||||
)
|
||||
print(json.dumps({
|
||||
"thread_id": thread.id,
|
||||
"turn_id": result.id,
|
||||
"status": result.status.value,
|
||||
"items_count": len(result.items or []),
|
||||
"persisted_items_count": 0 if persisted_turn is None else len(persisted_turn.items or []),
|
||||
"items_count": len(result.items),
|
||||
"final_response_is_text": isinstance(result.final_response, str) and bool(result.final_response.strip()),
|
||||
}))
|
||||
"""
|
||||
),
|
||||
@@ -261,8 +256,8 @@ def test_real_thread_and_turn_start_smoke(runtime_env: PreparedRuntimeEnv) -> No
|
||||
assert isinstance(data["thread_id"], str) and data["thread_id"].strip()
|
||||
assert isinstance(data["turn_id"], str) and data["turn_id"].strip()
|
||||
assert data["status"] == "completed"
|
||||
assert isinstance(data["items_count"], int)
|
||||
assert isinstance(data["persisted_items_count"], int)
|
||||
assert data["items_count"] > 0
|
||||
assert data["final_response_is_text"] is True
|
||||
|
||||
|
||||
def test_real_thread_run_convenience_smoke(runtime_env: PreparedRuntimeEnv) -> None:
|
||||
@@ -345,17 +340,12 @@ def test_real_async_thread_turn_usage_and_ids_smoke(
|
||||
config={"model_reasoning_effort": "high"},
|
||||
)
|
||||
result = await (await thread.turn(TextInput("say ok"))).run()
|
||||
persisted = await thread.read(include_turns=True)
|
||||
persisted_turn = next(
|
||||
(turn for turn in persisted.thread.turns or [] if turn.id == result.id),
|
||||
None,
|
||||
)
|
||||
print(json.dumps({
|
||||
"thread_id": thread.id,
|
||||
"turn_id": result.id,
|
||||
"status": result.status.value,
|
||||
"items_count": len(result.items or []),
|
||||
"persisted_items_count": 0 if persisted_turn is None else len(persisted_turn.items or []),
|
||||
"items_count": len(result.items),
|
||||
"final_response_is_text": isinstance(result.final_response, str) and bool(result.final_response.strip()),
|
||||
}))
|
||||
|
||||
asyncio.run(main())
|
||||
@@ -366,8 +356,8 @@ def test_real_async_thread_turn_usage_and_ids_smoke(
|
||||
assert isinstance(data["thread_id"], str) and data["thread_id"].strip()
|
||||
assert isinstance(data["turn_id"], str) and data["turn_id"].strip()
|
||||
assert data["status"] == "completed"
|
||||
assert isinstance(data["items_count"], int)
|
||||
assert isinstance(data["persisted_items_count"], int)
|
||||
assert data["items_count"] > 0
|
||||
assert data["final_response_is_text"] is True
|
||||
|
||||
|
||||
def test_real_async_thread_run_convenience_smoke(
|
||||
@@ -531,7 +521,7 @@ def test_real_examples_run_and_assert(
|
||||
f"Example failed: {folder}/{script}\n"
|
||||
f"STDOUT:\n{result.stdout}\n"
|
||||
f"STDERR:\n{result.stderr}"
|
||||
f"{_runtime_compatibility_hint(runtime_env, stdout=result.stdout, stderr=result.stderr)}"
|
||||
f"{_runtime_schema_hint(runtime_env, stdout=result.stdout, stderr=result.stderr)}"
|
||||
)
|
||||
|
||||
out = result.stdout
|
||||
@@ -541,7 +531,7 @@ def test_real_examples_run_and_assert(
|
||||
assert "Server: unknown" not in out
|
||||
elif folder == "02_turn_run":
|
||||
assert "thread_id:" in out and "turn_id:" in out and "status:" in out
|
||||
assert "persisted.items.count:" in out
|
||||
assert "items.count:" in out
|
||||
elif folder == "03_turn_stream_events":
|
||||
assert "stream.completed:" in out
|
||||
assert "assistant>" in out
|
||||
|
||||
Reference in New Issue
Block a user