mirror of
https://github.com/openai/codex.git
synced 2026-05-14 16:22:51 +00:00
## Why The Python SDK needs the same tight formatter/lint loop as the rest of the repo: a safe Ruff autofix pass, Ruff formatting, editor save behavior, and CI checks that catch drift. Without that loop, SDK changes can land with formatting or import ordering that differs from what reviewers and CI expect. ## What - Add Ruff configuration to `sdk/python/pyproject.toml`, excluding generated protocol code and notebooks from the normal lint/format pass. - Update `just fmt` so it still formats Rust and also runs Python SDK Ruff autofix and formatting. - Add Python SDK CI steps for `ruff check` and `ruff format --check` before pytest. - Recommend the Ruff VS Code extension and enable Python format/fix/organize-on-save so Cmd+S uses the same tooling. - Apply the resulting Ruff formatting to SDK Python files, examples, and the checked-in generated `v2_all.py` output emitted by the pinned generator. - Add a guard test for the `just fmt` recipe so it keeps working from both Rust and Python SDK working directories. ## Stack 1. #21891 `[1/8]` Pin Python SDK runtime dependency 2. #21893 `[2/8]` Generate Python SDK types from pinned runtime 3. #21895 `[3/8]` Run Python SDK tests in CI 4. #21896 `[4/8]` Define Python SDK public API surface 5. #21905 `[5/8]` Rename Python SDK package to `openai-codex` 6. #21910 `[6/8]` Add high-level Python SDK approval mode 7. #22014 `[7/8]` Add Python SDK app-server integration harness 8. This PR `[8/8]` Add Python SDK Ruff formatting ## Verification - Added `test_root_fmt_recipe_formats_rust_and_python_sdk` for the shared format recipe. - Ran `just fmt` after the recipe update. --------- Co-authored-by: Codex <noreply@openai.com>
366 lines
13 KiB
Python
366 lines
13 KiB
Python
from __future__ import annotations
|
|
|
|
import asyncio
|
|
|
|
import pytest
|
|
from app_server_harness import (
|
|
AppServerHarness,
|
|
ev_assistant_message,
|
|
ev_completed,
|
|
ev_completed_with_usage,
|
|
ev_failed,
|
|
ev_response_created,
|
|
sse,
|
|
)
|
|
from app_server_helpers import (
|
|
agent_message_texts_from_items,
|
|
assistant_message_with_phase,
|
|
)
|
|
|
|
from openai_codex import AsyncCodex, Codex
|
|
from openai_codex.generated.v2_all import MessagePhase
|
|
|
|
|
|
def test_sync_thread_run_uses_mock_responses(
|
|
tmp_path,
|
|
) -> None:
|
|
"""Drive Thread.run through the pinned app-server and inspect the HTTP request."""
|
|
with AppServerHarness(tmp_path) as harness:
|
|
harness.responses.enqueue_assistant_message("Hello from the mock.", response_id="run-1")
|
|
|
|
with Codex(config=harness.app_server_config()) as codex:
|
|
thread = codex.thread_start()
|
|
result = thread.run("hello")
|
|
|
|
request = harness.responses.single_request()
|
|
|
|
body = request.body_json()
|
|
assert {
|
|
"final_response": result.final_response,
|
|
"agent_messages": agent_message_texts_from_items(result.items),
|
|
"has_usage": result.usage is not None,
|
|
"request_model": body["model"],
|
|
"request_stream": body["stream"],
|
|
"request_user_texts": request.message_input_texts("user")[-1:],
|
|
} == {
|
|
"final_response": "Hello from the mock.",
|
|
"agent_messages": ["Hello from the mock."],
|
|
"has_usage": True,
|
|
"request_model": "mock-model",
|
|
"request_stream": True,
|
|
"request_user_texts": ["hello"],
|
|
}
|
|
|
|
|
|
def test_run_params_and_usage_cross_app_server_boundary(tmp_path) -> None:
|
|
"""Thread.run should pass overrides and collect app-server token usage."""
|
|
with AppServerHarness(tmp_path) as harness:
|
|
harness.responses.enqueue_sse(
|
|
sse(
|
|
[
|
|
ev_response_created("run-overrides"),
|
|
ev_assistant_message("msg-run-overrides", "overrides applied"),
|
|
ev_completed_with_usage(
|
|
"run-overrides",
|
|
input_tokens=11,
|
|
cached_input_tokens=3,
|
|
output_tokens=7,
|
|
reasoning_output_tokens=5,
|
|
total_tokens=18,
|
|
),
|
|
]
|
|
)
|
|
)
|
|
|
|
with Codex(config=harness.app_server_config()) as codex:
|
|
thread = codex.thread_start()
|
|
result = thread.run(
|
|
"use overrides",
|
|
model="mock-model-override",
|
|
)
|
|
request = harness.responses.single_request()
|
|
|
|
usage_payload = None
|
|
if result.usage is not None:
|
|
dumped_usage = result.usage.model_dump(by_alias=True, mode="json")
|
|
usage_payload = {
|
|
"last": dumped_usage["last"],
|
|
"total": dumped_usage["total"],
|
|
}
|
|
assert {
|
|
"final_response": result.final_response,
|
|
"request_model": request.body_json()["model"],
|
|
"usage": usage_payload,
|
|
} == {
|
|
"final_response": "overrides applied",
|
|
"request_model": "mock-model-override",
|
|
"usage": {
|
|
"last": {
|
|
"cachedInputTokens": 3,
|
|
"inputTokens": 11,
|
|
"outputTokens": 7,
|
|
"reasoningOutputTokens": 5,
|
|
"totalTokens": 18,
|
|
},
|
|
"total": {
|
|
"cachedInputTokens": 3,
|
|
"inputTokens": 11,
|
|
"outputTokens": 7,
|
|
"reasoningOutputTokens": 5,
|
|
"totalTokens": 18,
|
|
},
|
|
},
|
|
}
|
|
|
|
|
|
def test_async_thread_run_uses_mock_responses(
|
|
tmp_path,
|
|
) -> None:
|
|
"""Async Thread.run should exercise the same app-server boundary."""
|
|
|
|
async def scenario() -> None:
|
|
"""Run the async client against a real app-server process."""
|
|
with AppServerHarness(tmp_path) as harness:
|
|
harness.responses.enqueue_assistant_message(
|
|
"Hello async.",
|
|
response_id="async-run-1",
|
|
)
|
|
|
|
async with AsyncCodex(config=harness.app_server_config()) as codex:
|
|
thread = await codex.thread_start()
|
|
result = await thread.run("async hello")
|
|
|
|
request = harness.responses.single_request()
|
|
|
|
assert {
|
|
"final_response": result.final_response,
|
|
"agent_messages": agent_message_texts_from_items(result.items),
|
|
"request_user_texts": request.message_input_texts("user")[-1:],
|
|
} == {
|
|
"final_response": "Hello async.",
|
|
"agent_messages": ["Hello async."],
|
|
"request_user_texts": ["async hello"],
|
|
}
|
|
|
|
asyncio.run(scenario())
|
|
|
|
|
|
def test_sync_run_result_uses_last_unknown_phase_message(tmp_path) -> None:
|
|
"""RunResult should use the last unknown-phase agent message as final text."""
|
|
with AppServerHarness(tmp_path) as harness:
|
|
harness.responses.enqueue_sse(
|
|
sse(
|
|
[
|
|
ev_response_created("items-last"),
|
|
ev_assistant_message("msg-items-first", "First message"),
|
|
ev_assistant_message("msg-items-second", "Second message"),
|
|
ev_completed("items-last"),
|
|
]
|
|
)
|
|
)
|
|
|
|
with Codex(config=harness.app_server_config()) as codex:
|
|
result = codex.thread_start().run("case: last unknown phase wins")
|
|
|
|
assert {
|
|
"final_response": result.final_response,
|
|
"agent_messages": agent_message_texts_from_items(result.items),
|
|
} == {
|
|
"final_response": "Second message",
|
|
"agent_messages": ["First message", "Second message"],
|
|
}
|
|
|
|
|
|
def test_sync_run_result_preserves_empty_last_message(tmp_path) -> None:
|
|
"""RunResult should preserve an empty final agent message instead of skipping it."""
|
|
with AppServerHarness(tmp_path) as harness:
|
|
harness.responses.enqueue_sse(
|
|
sse(
|
|
[
|
|
ev_response_created("items-empty"),
|
|
ev_assistant_message("msg-items-nonempty", "First message"),
|
|
ev_assistant_message("msg-items-empty", ""),
|
|
ev_completed("items-empty"),
|
|
]
|
|
)
|
|
)
|
|
|
|
with Codex(config=harness.app_server_config()) as codex:
|
|
result = codex.thread_start().run("case: empty last message")
|
|
|
|
assert {
|
|
"final_response": result.final_response,
|
|
"agent_messages": agent_message_texts_from_items(result.items),
|
|
} == {
|
|
"final_response": "",
|
|
"agent_messages": ["First message", ""],
|
|
}
|
|
|
|
|
|
def test_sync_run_result_does_not_promote_commentary_only_to_final(tmp_path) -> None:
|
|
"""RunResult final_response should stay unset when app-server marks only commentary."""
|
|
with AppServerHarness(tmp_path) as harness:
|
|
harness.responses.enqueue_sse(
|
|
sse(
|
|
[
|
|
ev_response_created("items-commentary"),
|
|
assistant_message_with_phase(
|
|
"msg-items-commentary",
|
|
"Commentary",
|
|
MessagePhase.commentary,
|
|
),
|
|
ev_completed("items-commentary"),
|
|
]
|
|
)
|
|
)
|
|
|
|
with Codex(config=harness.app_server_config()) as codex:
|
|
result = codex.thread_start().run("case: commentary only")
|
|
|
|
assert {
|
|
"final_response": result.final_response,
|
|
"agent_messages": agent_message_texts_from_items(result.items),
|
|
} == {
|
|
"final_response": None,
|
|
"agent_messages": ["Commentary"],
|
|
}
|
|
|
|
|
|
def test_async_run_result_uses_last_unknown_phase_message(tmp_path) -> None:
|
|
"""Async RunResult should use the last unknown-phase agent message."""
|
|
|
|
async def scenario() -> None:
|
|
"""Run one async result-mapping case against a pinned app-server."""
|
|
with AppServerHarness(tmp_path) as harness:
|
|
harness.responses.enqueue_sse(
|
|
sse(
|
|
[
|
|
ev_response_created("async-items-last"),
|
|
ev_assistant_message(
|
|
"msg-async-items-first",
|
|
"First async message",
|
|
),
|
|
ev_assistant_message(
|
|
"msg-async-items-second",
|
|
"Second async message",
|
|
),
|
|
ev_completed("async-items-last"),
|
|
]
|
|
)
|
|
)
|
|
|
|
async with AsyncCodex(config=harness.app_server_config()) as codex:
|
|
result = await (await codex.thread_start()).run("case: async last unknown phase")
|
|
|
|
assert {
|
|
"final_response": result.final_response,
|
|
"agent_messages": agent_message_texts_from_items(result.items),
|
|
} == {
|
|
"final_response": "Second async message",
|
|
"agent_messages": ["First async message", "Second async message"],
|
|
}
|
|
|
|
asyncio.run(scenario())
|
|
|
|
|
|
def test_async_run_result_does_not_promote_commentary_only_to_final(
|
|
tmp_path,
|
|
) -> None:
|
|
"""Async RunResult final_response should stay unset for commentary-only output."""
|
|
|
|
async def scenario() -> None:
|
|
"""Run one async commentary mapping case against a pinned app-server."""
|
|
with AppServerHarness(tmp_path) as harness:
|
|
harness.responses.enqueue_sse(
|
|
sse(
|
|
[
|
|
ev_response_created("async-items-commentary"),
|
|
assistant_message_with_phase(
|
|
"msg-async-items-commentary",
|
|
"Async commentary",
|
|
MessagePhase.commentary,
|
|
),
|
|
ev_completed("async-items-commentary"),
|
|
]
|
|
)
|
|
)
|
|
|
|
async with AsyncCodex(config=harness.app_server_config()) as codex:
|
|
result = await (await codex.thread_start()).run("case: async commentary only")
|
|
|
|
assert {
|
|
"final_response": result.final_response,
|
|
"agent_messages": agent_message_texts_from_items(result.items),
|
|
} == {
|
|
"final_response": None,
|
|
"agent_messages": ["Async commentary"],
|
|
}
|
|
|
|
asyncio.run(scenario())
|
|
|
|
|
|
def test_thread_run_raises_when_real_app_server_reports_failed_turn(tmp_path) -> None:
|
|
"""Thread.run should surface the failed turn error emitted by app-server."""
|
|
with AppServerHarness(tmp_path) as harness:
|
|
harness.responses.enqueue_sse(
|
|
sse(
|
|
[
|
|
ev_response_created("failed-run"),
|
|
ev_failed("failed-run", "boom from mock model"),
|
|
]
|
|
)
|
|
)
|
|
|
|
with Codex(config=harness.app_server_config()) as codex:
|
|
thread = codex.thread_start()
|
|
with pytest.raises(RuntimeError, match="boom from mock model"):
|
|
thread.run("trigger failure")
|
|
|
|
|
|
def test_final_answer_phase_survives_real_app_server_mapping(tmp_path) -> None:
|
|
"""RunResult should use the final-answer item emitted by app-server."""
|
|
with AppServerHarness(tmp_path) as harness:
|
|
harness.responses.enqueue_sse(
|
|
sse(
|
|
[
|
|
ev_response_created("phase-1"),
|
|
{
|
|
**ev_assistant_message("msg-commentary", "Commentary"),
|
|
"item": {
|
|
**ev_assistant_message("msg-commentary", "Commentary")["item"],
|
|
"phase": MessagePhase.commentary.value,
|
|
},
|
|
},
|
|
{
|
|
**ev_assistant_message("msg-final", "Final answer"),
|
|
"item": {
|
|
**ev_assistant_message("msg-final", "Final answer")["item"],
|
|
"phase": MessagePhase.final_answer.value,
|
|
},
|
|
},
|
|
ev_completed("phase-1"),
|
|
]
|
|
)
|
|
)
|
|
|
|
with Codex(config=harness.app_server_config()) as codex:
|
|
result = codex.thread_start().run("choose final answer")
|
|
|
|
assert {
|
|
"final_response": result.final_response,
|
|
"items": [
|
|
{
|
|
"text": item.root.text,
|
|
"phase": None if item.root.phase is None else item.root.phase.value,
|
|
}
|
|
for item in result.items
|
|
if item.root.type == "agentMessage"
|
|
],
|
|
} == {
|
|
"final_response": "Final answer",
|
|
"items": [
|
|
{"text": "Commentary", "phase": MessagePhase.commentary.value},
|
|
{"text": "Final answer", "phase": MessagePhase.final_answer.value},
|
|
],
|
|
}
|