Compare commits

...

5 Commits

Author SHA1 Message Date
Shaqayeq
5f141adc0d python-sdk: fix thread.run final response semantics 2026-03-18 13:51:05 -07:00
Shaqayeq
8c521cb74d Merge branch 'main' into dev/shaqayeq/python-sdk-thread-run 2026-03-18 13:28:48 -07:00
Shaqayeq
33289a1dd3 python-sdk: update quickstart examples for thread.run 2026-03-18 13:22:38 -07:00
Shaqayeq
3536149351 python-sdk: add thread.run convenience methods
Co-authored-by: Codex <noreply@openai.com>
2026-03-18 13:21:43 -07:00
Felipe Coury
334164a6f7 feat(tui): restore composer history in app-server tui (#14945)
## Problem

The app-server TUI (`tui_app_server`) lacked composer history support.
Pressing Up/Down to recall previous prompts hit a stub that logged a
warning and displayed "Not available in app-server TUI yet." New
submissions were silently dropped from the shared history file, so
nothing persisted for future sessions.

## Mental model

Codex maintains a single, append-only history file
(`$CODEX_HOME/history.jsonl`) shared across all TUI processes on the
same machine. The legacy (in-process) TUI already reads/writes this file
through `codex_core::message_history`. The app-server TUI delegates most
operations to a separate process over RPC, but history is intentionally
*not* an RPC concern — it's a client-local file.

This PR makes the app-server TUI access the same history file directly,
bypassing the app-server process entirely. The composer's Up/Down
navigation and submit-time persistence now follow the same code paths as
the legacy TUI, with the only difference being *where* the call is
dispatched (locally in `App`, rather than inside `CodexThread`).

The branch is rebuilt directly on top of `upstream/main`, so it keeps
the
existing app-server restore architecture intact.
`AppServerStartedThread`
still restores transcript history from the server `Thread` snapshot via
`thread_snapshot_events`; this PR only adds composer-history support.

## Non-goals

- Adding history support to the app-server protocol. History remains
client-local.
- Changing the on-disk format or location of `history.jsonl`.
- Surfacing history I/O errors to the user (failures are logged and
silently swallowed, matching the legacy TUI).

## Tradeoffs

| Decision | Why | Risk |
|----------|-----|------|
| Widen `message_history` from `pub(crate)` to `pub` | Avoids
duplicating file I/O logic; the module already has a clean, minimal API
surface. | Other workspace crates can now call these functions — the
contract is no longer crate-private. However, this is consistent with
recent precedent: `590cfa617` exposed `mention_syntax` for TUI
consumption, `752402c4f` exposed plugin APIs (`PluginsManager`), and
`14fcb6645`/`edacbf7b6` widened internal core APIs for other crates.
These were all narrow, intentional exposures of specific APIs — not
broad "make internals public" moves. `1af2a37ad` even went the other
direction, reducing broad re-exports to tighten boundaries. This change
follows the same pattern: a small, deliberate API surface (3 functions)
rather than a wholesale visibility change. |
| Intercept `AddToHistory` / `GetHistoryEntryRequest` in `App` before
RPC fallback | Keeps history ops out of the "unsupported op" error path
without changing app-server protocol. | This now routes through a single
`submit_thread_op` entry point, which is safer than the original
duplicated dispatch. The remaining risk is organizational: future
thread-op submission paths need to keep using that shared entry point. |
| `session_configured_from_thread_response` is now `async` | Needs
`await` on `history_metadata()` to populate real `history_log_id` /
`history_entry_count`. | Adds an async file-stat + full-file newline
scan to the session bootstrap path. The scan is bounded by
`history.max_bytes` and matches the legacy TUI's cost profile, but
startup latency still scales with file size. |

## Architecture

```
User presses Up                     User submits a prompt
       │                                    │
       ▼                                    ▼
ChatComposerHistory                 ChatWidget::do_submit_turn
  navigate_up()                       encode_history_mentions()
       │                                    │
       ▼                                    ▼
  AppEvent::CodexOp                  Op::AddToHistory { text }
  (GetHistoryEntryRequest)                  │
       │                                    ▼
       ▼                            App::try_handle_local_history_op
  App::try_handle_local_history_op    message_history::append_entry()
    spawn_blocking {                        │
      message_history::lookup()             ▼
    }                                $CODEX_HOME/history.jsonl
       │
       ▼
  AppEvent::ThreadEvent
  (GetHistoryEntryResponse)
       │
       ▼
  ChatComposerHistory::on_entry_response()
```

## Observability

- `tracing::warn` on `append_entry` failure (includes thread ID).
- `tracing::warn` on `spawn_blocking` lookup join error.
- `tracing::warn` from `message_history` internals on file-open, lock,
or parse failures.

## Tests

- `chat_composer_history::tests::navigation_with_async_fetch` — verifies
that Up emits `Op::GetHistoryEntryRequest` (was: checked for stub error
cell).
- `app::tests::history_lookup_response_is_routed_to_requesting_thread` —
verifies multi-thread composer recall routes the lookup result back to
the originating thread.
-
`app_server_session::tests::resume_response_relies_on_snapshot_replay_not_initial_messages`
— verifies app-server session restore still uses the upstream
thread-snapshot path.
-
`app_server_session::tests::session_configured_populates_history_metadata`
— verifies bootstrap sets nonzero `history_log_id` /
`history_entry_count` from the shared local history file.
2026-03-18 11:54:11 -06:00
19 changed files with 892 additions and 149 deletions

View File

@@ -63,7 +63,7 @@ mod mcp_tool_call;
mod memories;
pub mod mention_syntax;
mod mentions;
mod message_history;
pub mod message_history;
mod model_provider_info;
pub mod path_utils;
pub mod personality_migration;

View File

@@ -66,14 +66,22 @@ fn history_filepath(config: &Config) -> PathBuf {
path
}
/// Append a `text` entry associated with `conversation_id` to the history file. Uses
/// advisory file locking to ensure that concurrent writes do not interleave,
/// which entails a small amount of blocking I/O internally.
pub(crate) async fn append_entry(
text: &str,
conversation_id: &ThreadId,
config: &Config,
) -> Result<()> {
/// Append a `text` entry associated with `conversation_id` to the history file.
///
/// Uses advisory file locking (`File::try_lock`) with a retry loop to ensure
/// concurrent writes from multiple TUI processes do not interleave. The lock
/// acquisition and write are performed inside `spawn_blocking` so the caller's
/// async runtime is not blocked.
///
/// The entry is silently skipped when `config.history.persistence` is
/// [`HistoryPersistence::None`].
///
/// # Errors
///
/// Returns an I/O error if the history file cannot be opened/created, the
/// system clock is before the Unix epoch, or the exclusive lock cannot be
/// acquired after [`MAX_RETRIES`] attempts.
pub async fn append_entry(text: &str, conversation_id: &ThreadId, config: &Config) -> Result<()> {
match config.history.persistence {
HistoryPersistence::SaveAll => {
// Save everything: proceed.
@@ -243,22 +251,29 @@ fn trim_target_bytes(max_bytes: u64, newest_entry_len: u64) -> u64 {
soft_cap_bytes.max(newest_entry_len)
}
/// Asynchronously fetch the history file's *identifier* (inode on Unix) and
/// the current number of entries by counting newline characters.
pub(crate) async fn history_metadata(config: &Config) -> (u64, usize) {
/// Asynchronously fetch the history file's *identifier* and current entry count.
///
/// The identifier is the file's inode on Unix or creation time on Windows.
/// The entry count is derived by counting newline bytes in the file. Returns
/// `(0, 0)` when the file does not exist or its metadata cannot be read. If
/// metadata succeeds but the file cannot be opened or scanned, returns
/// `(log_id, 0)` so callers can still detect that a history file exists.
pub async fn history_metadata(config: &Config) -> (u64, usize) {
let path = history_filepath(config);
history_metadata_for_file(&path).await
}
/// Given a `log_id` (on Unix this is the file's inode number,
/// on Windows this is the file's creation time) and a zero-based
/// `offset`, return the corresponding `HistoryEntry` if the identifier matches
/// the current history file **and** the requested offset exists. Any I/O or
/// parsing errors are logged and result in `None`.
/// Look up a single history entry by file identity and zero-based offset.
///
/// Note this function is not async because it uses a sync advisory file
/// locking API.
pub(crate) fn lookup(log_id: u64, offset: usize, config: &Config) -> Option<HistoryEntry> {
/// Returns `Some(entry)` when the current history file's identifier (inode on
/// Unix, creation time on Windows) matches `log_id` **and** a valid JSON
/// record exists at `offset`. Returns `None` on any mismatch, I/O error, or
/// parse failure, all of which are logged at `warn` level.
///
/// This function is synchronous because it acquires a shared advisory file lock
/// via `File::try_lock_shared`. Callers on an async runtime should wrap it in
/// `spawn_blocking`.
pub fn lookup(log_id: u64, offset: usize, config: &Config) -> Option<HistoryEntry> {
let path = history_filepath(config);
lookup_history_entry(&path, log_id, offset)
}

View File

@@ -69,6 +69,7 @@ use codex_core::config::types::ApprovalsReviewer;
use codex_core::config::types::ModelAvailabilityNuxConfig;
use codex_core::config_loader::ConfigLayerStackOrdering;
use codex_core::features::Feature;
use codex_core::message_history;
use codex_core::models_manager::collaboration_mode_presets::CollaborationModesConfig;
use codex_core::models_manager::model_presets::HIDE_GPT_5_1_CODEX_MAX_MIGRATION_PROMPT_CONFIG;
use codex_core::models_manager::model_presets::HIDE_GPT5_1_MIGRATION_PROMPT_CONFIG;
@@ -86,10 +87,10 @@ use codex_protocol::openai_models::ModelUpgrade;
use codex_protocol::openai_models::ReasoningEffort as ReasoningEffortConfig;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::FinalOutput;
use codex_protocol::protocol::GetHistoryEntryResponseEvent;
use codex_protocol::protocol::ListSkillsResponseEvent;
#[cfg(test)]
use codex_protocol::protocol::McpAuthStatus;
#[cfg(test)]
use codex_protocol::protocol::Op;
use codex_protocol::protocol::SandboxPolicy;
use codex_protocol::protocol::SessionSource;
@@ -457,6 +458,7 @@ struct ThreadEventSnapshot {
enum ThreadBufferedEvent {
Notification(ServerNotification),
Request(ServerRequest),
HistoryEntryResponse(GetHistoryEntryResponseEvent),
LegacyWarning(String),
LegacyRollback { num_turns: u32 },
}
@@ -616,6 +618,7 @@ impl ThreadEventStore {
.pending_interactive_replay
.should_replay_snapshot_request(request),
ThreadBufferedEvent::Notification(_)
| ThreadBufferedEvent::HistoryEntryResponse(_)
| ThreadBufferedEvent::LegacyWarning(_)
| ThreadBufferedEvent::LegacyRollback { .. } => true,
})
@@ -1763,8 +1766,21 @@ impl App {
return Ok(());
};
self.submit_thread_op(app_server, thread_id, op).await
}
async fn submit_thread_op(
&mut self,
app_server: &mut AppServerSession,
thread_id: ThreadId,
op: AppCommand,
) -> Result<()> {
crate::session_log::log_outbound_op(&op);
if self.try_handle_local_history_op(thread_id, &op).await? {
return Ok(());
}
if self
.try_resolve_app_server_request(app_server, thread_id, &op)
.await?
@@ -1777,7 +1793,7 @@ impl App {
.await?
{
if ThreadEventStore::op_can_change_pending_replay_state(&op) {
self.note_active_thread_outbound_op(&op).await;
self.note_thread_outbound_op(thread_id, &op).await;
self.refresh_pending_thread_approvals().await;
}
return Ok(());
@@ -1855,6 +1871,66 @@ impl App {
}
}
/// Intercept composer-history operations and handle them locally against
/// `$CODEX_HOME/history.jsonl`, bypassing the app-server RPC layer.
async fn try_handle_local_history_op(
&mut self,
thread_id: ThreadId,
op: &AppCommand,
) -> Result<bool> {
match op.view() {
AppCommandView::Other(Op::AddToHistory { text }) => {
let text = text.clone();
let config = self.chat_widget.config_ref().clone();
tokio::spawn(async move {
if let Err(err) =
message_history::append_entry(&text, &thread_id, &config).await
{
tracing::warn!(
thread_id = %thread_id,
error = %err,
"failed to append to message history"
);
}
});
Ok(true)
}
AppCommandView::Other(Op::GetHistoryEntryRequest { offset, log_id }) => {
let offset = *offset;
let log_id = *log_id;
let config = self.chat_widget.config_ref().clone();
let app_event_tx = self.app_event_tx.clone();
tokio::spawn(async move {
let entry_opt = tokio::task::spawn_blocking(move || {
message_history::lookup(log_id, offset, &config)
})
.await
.unwrap_or_else(|err| {
tracing::warn!(error = %err, "history lookup task failed");
None
});
app_event_tx.send(AppEvent::ThreadHistoryEntryResponse {
thread_id,
event: GetHistoryEntryResponseEvent {
offset,
log_id,
entry: entry_opt.map(|entry| {
codex_protocol::message_history::HistoryEntry {
conversation_id: entry.session_id,
ts: entry.ts,
text: entry.text,
}
}),
},
});
});
Ok(true)
}
_ => Ok(false),
}
}
async fn try_submit_active_thread_op_via_app_server(
&mut self,
app_server: &mut AppServerSession,
@@ -2213,6 +2289,50 @@ impl App {
Ok(())
}
async fn enqueue_thread_history_entry_response(
&mut self,
thread_id: ThreadId,
event: GetHistoryEntryResponseEvent,
) -> Result<()> {
let (sender, store) = {
let channel = self.ensure_thread_channel(thread_id);
(channel.sender.clone(), Arc::clone(&channel.store))
};
let should_send = {
let mut guard = store.lock().await;
guard
.buffer
.push_back(ThreadBufferedEvent::HistoryEntryResponse(event.clone()));
if guard.buffer.len() > guard.capacity
&& let Some(removed) = guard.buffer.pop_front()
&& let ThreadBufferedEvent::Request(request) = &removed
{
guard
.pending_interactive_replay
.note_evicted_server_request(request);
}
guard.active
};
if should_send {
match sender.try_send(ThreadBufferedEvent::HistoryEntryResponse(event)) {
Ok(()) => {}
Err(TrySendError::Full(event)) => {
tokio::spawn(async move {
if let Err(err) = sender.send(event).await {
tracing::warn!("thread {thread_id} event channel closed: {err}");
}
});
}
Err(TrySendError::Closed(_)) => {
tracing::warn!("thread {thread_id} event channel closed");
}
}
}
Ok(())
}
async fn enqueue_thread_legacy_rollback(
&mut self,
thread_id: ThreadId,
@@ -2304,6 +2424,10 @@ impl App {
ThreadBufferedEvent::Request(request) => {
self.enqueue_thread_request(thread_id, request).await?;
}
ThreadBufferedEvent::HistoryEntryResponse(event) => {
self.enqueue_thread_history_entry_response(thread_id, event)
.await?;
}
ThreadBufferedEvent::LegacyWarning(message) => {
self.enqueue_thread_legacy_warning(thread_id, message)
.await?;
@@ -3465,22 +3589,12 @@ impl App {
self.submit_active_thread_op(app_server, op.into()).await?;
}
AppEvent::SubmitThreadOp { thread_id, op } => {
let app_command: AppCommand = op.into();
if self
.try_resolve_app_server_request(app_server, thread_id, &app_command)
.await?
{
return Ok(AppRunControl::Continue);
}
crate::session_log::log_outbound_op(&app_command);
tracing::error!(
thread_id = %thread_id,
op = ?app_command,
"unexpected unresolved thread-scoped app command"
);
self.chat_widget.add_error_message(format!(
"Thread-scoped request is no longer pending for thread {thread_id}."
));
self.submit_thread_op(app_server, thread_id, op.into())
.await?;
}
AppEvent::ThreadHistoryEntryResponse { thread_id, event } => {
self.enqueue_thread_history_entry_response(thread_id, event)
.await?;
}
AppEvent::DiffResult(text) => {
// Clear the in-progress state in the bottom pane
@@ -4639,6 +4753,9 @@ impl App {
self.chat_widget
.handle_server_request(request, /*replay_kind*/ None);
}
ThreadBufferedEvent::HistoryEntryResponse(event) => {
self.chat_widget.handle_history_entry_response(event);
}
ThreadBufferedEvent::LegacyWarning(message) => {
self.chat_widget.add_warning_message(message);
}
@@ -4660,6 +4777,9 @@ impl App {
ThreadBufferedEvent::Request(request) => self
.chat_widget
.handle_server_request(request, Some(ReplayKind::ThreadSnapshot)),
ThreadBufferedEvent::HistoryEntryResponse(event) => {
self.chat_widget.handle_history_entry_response(event)
}
ThreadBufferedEvent::LegacyWarning(message) => {
self.chat_widget.add_warning_message(message);
}
@@ -5520,6 +5640,44 @@ mod tests {
.expect("listener task drop notification should succeed");
}
#[tokio::test]
async fn history_lookup_response_is_routed_to_requesting_thread() -> Result<()> {
let (mut app, mut app_event_rx, _op_rx) = make_test_app_with_channels().await;
let thread_id = ThreadId::new();
let handled = app
.try_handle_local_history_op(
thread_id,
&Op::GetHistoryEntryRequest {
offset: 0,
log_id: 1,
}
.into(),
)
.await?;
assert!(handled);
let app_event = tokio::time::timeout(Duration::from_secs(1), app_event_rx.recv())
.await
.expect("history lookup should emit an app event")
.expect("app event channel should stay open");
let AppEvent::ThreadHistoryEntryResponse {
thread_id: routed_thread_id,
event,
} = app_event
else {
panic!("expected thread-routed history response");
};
assert_eq!(routed_thread_id, thread_id);
assert_eq!(event.offset, 0);
assert_eq!(event.log_id, 1);
assert!(event.entry.is_none());
Ok(())
}
#[tokio::test]
async fn enqueue_thread_event_does_not_block_when_channel_full() -> Result<()> {
let mut app = make_test_app().await;

View File

@@ -15,6 +15,7 @@ use codex_chatgpt::connectors::AppInfo;
use codex_file_search::FileMatch;
use codex_protocol::ThreadId;
use codex_protocol::openai_models::ModelPreset;
use codex_protocol::protocol::GetHistoryEntryResponseEvent;
use codex_protocol::protocol::Op;
use codex_protocol::protocol::RateLimitSnapshot;
use codex_utils_approval_presets::ApprovalPreset;
@@ -81,6 +82,12 @@ pub(crate) enum AppEvent {
op: Op,
},
/// Deliver a synthetic history lookup response to a specific thread channel.
ThreadHistoryEntryResponse {
thread_id: ThreadId,
event: GetHistoryEntryResponseEvent,
},
/// Start a new session.
NewSession,

View File

@@ -54,6 +54,7 @@ use codex_app_server_protocol::TurnStartResponse;
use codex_app_server_protocol::TurnSteerParams;
use codex_app_server_protocol::TurnSteerResponse;
use codex_core::config::Config;
use codex_core::message_history;
use codex_otel::TelemetryAuthMode;
use codex_protocol::ThreadId;
use codex_protocol::openai_models::ModelAvailabilityNux;
@@ -277,7 +278,7 @@ impl AppServerSession {
})
.await
.wrap_err("thread/start failed during TUI bootstrap")?;
started_thread_from_start_response(response)
started_thread_from_start_response(response, config).await
}
pub(crate) async fn resume_thread(
@@ -291,14 +292,14 @@ impl AppServerSession {
.request_typed(ClientRequest::ThreadResume {
request_id,
params: thread_resume_params_from_config(
config,
config.clone(),
thread_id,
self.thread_params_mode(),
),
})
.await
.wrap_err("thread/resume failed during TUI bootstrap")?;
started_thread_from_resume_response(&response)
started_thread_from_resume_response(response, &config).await
}
pub(crate) async fn fork_thread(
@@ -312,14 +313,14 @@ impl AppServerSession {
.request_typed(ClientRequest::ThreadFork {
request_id,
params: thread_fork_params_from_config(
config,
config.clone(),
thread_id,
self.thread_params_mode(),
),
})
.await
.wrap_err("thread/fork failed during TUI bootstrap")?;
started_thread_from_fork_response(&response)
started_thread_from_fork_response(response, &config).await
}
fn thread_params_mode(&self) -> ThreadParamsMode {
@@ -843,10 +844,12 @@ fn thread_cwd_from_config(config: &Config, thread_params_mode: ThreadParamsMode)
}
}
fn started_thread_from_start_response(
async fn started_thread_from_start_response(
response: ThreadStartResponse,
config: &Config,
) -> Result<AppServerStartedThread> {
let session = thread_session_state_from_thread_start_response(&response)
let session = thread_session_state_from_thread_start_response(&response, config)
.await
.map_err(color_eyre::eyre::Report::msg)?;
Ok(AppServerStartedThread {
session,
@@ -854,30 +857,35 @@ fn started_thread_from_start_response(
})
}
fn started_thread_from_resume_response(
response: &ThreadResumeResponse,
async fn started_thread_from_resume_response(
response: ThreadResumeResponse,
config: &Config,
) -> Result<AppServerStartedThread> {
let session = thread_session_state_from_thread_resume_response(response)
let session = thread_session_state_from_thread_resume_response(&response, config)
.await
.map_err(color_eyre::eyre::Report::msg)?;
Ok(AppServerStartedThread {
session,
turns: response.thread.turns.clone(),
turns: response.thread.turns,
})
}
fn started_thread_from_fork_response(
response: &ThreadForkResponse,
async fn started_thread_from_fork_response(
response: ThreadForkResponse,
config: &Config,
) -> Result<AppServerStartedThread> {
let session = thread_session_state_from_thread_fork_response(response)
let session = thread_session_state_from_thread_fork_response(&response, config)
.await
.map_err(color_eyre::eyre::Report::msg)?;
Ok(AppServerStartedThread {
session,
turns: response.thread.turns.clone(),
turns: response.thread.turns,
})
}
fn thread_session_state_from_thread_start_response(
async fn thread_session_state_from_thread_start_response(
response: &ThreadStartResponse,
config: &Config,
) -> Result<ThreadSessionState, String> {
thread_session_state_from_thread_response(
&response.thread.id,
@@ -891,11 +899,14 @@ fn thread_session_state_from_thread_start_response(
response.sandbox.to_core(),
response.cwd.clone(),
response.reasoning_effort,
config,
)
.await
}
fn thread_session_state_from_thread_resume_response(
async fn thread_session_state_from_thread_resume_response(
response: &ThreadResumeResponse,
config: &Config,
) -> Result<ThreadSessionState, String> {
thread_session_state_from_thread_response(
&response.thread.id,
@@ -909,11 +920,14 @@ fn thread_session_state_from_thread_resume_response(
response.sandbox.to_core(),
response.cwd.clone(),
response.reasoning_effort,
config,
)
.await
}
fn thread_session_state_from_thread_fork_response(
async fn thread_session_state_from_thread_fork_response(
response: &ThreadForkResponse,
config: &Config,
) -> Result<ThreadSessionState, String> {
thread_session_state_from_thread_response(
&response.thread.id,
@@ -927,7 +941,9 @@ fn thread_session_state_from_thread_fork_response(
response.sandbox.to_core(),
response.cwd.clone(),
response.reasoning_effort,
config,
)
.await
}
fn review_target_to_app_server(
@@ -953,7 +969,7 @@ fn review_target_to_app_server(
clippy::too_many_arguments,
reason = "session mapping keeps explicit fields"
)]
fn thread_session_state_from_thread_response(
async fn thread_session_state_from_thread_response(
thread_id: &str,
thread_name: Option<String>,
rollout_path: Option<PathBuf>,
@@ -965,9 +981,12 @@ fn thread_session_state_from_thread_response(
sandbox_policy: SandboxPolicy,
cwd: PathBuf,
reasoning_effort: Option<codex_protocol::openai_models::ReasoningEffort>,
config: &Config,
) -> Result<ThreadSessionState, String> {
let thread_id = ThreadId::from_string(thread_id)
.map_err(|err| format!("thread id `{thread_id}` is invalid: {err}"))?;
let (history_log_id, history_entry_count) = message_history::history_metadata(config).await;
let history_entry_count = u64::try_from(history_entry_count).unwrap_or(u64::MAX);
Ok(ThreadSessionState {
thread_id,
@@ -981,8 +1000,8 @@ fn thread_session_state_from_thread_response(
sandbox_policy,
cwd,
reasoning_effort,
history_log_id: 0,
history_entry_count: 0,
history_log_id,
history_entry_count,
network_proxy: None,
rollout_path,
})
@@ -1084,8 +1103,10 @@ mod tests {
assert_eq!(fork.model_provider, None);
}
#[test]
fn resume_response_restores_turns_from_thread_items() {
#[tokio::test]
async fn resume_response_restores_turns_from_thread_items() {
let temp_dir = tempfile::tempdir().expect("tempdir");
let config = build_config(&temp_dir).await;
let thread_id = ThreadId::new();
let response = ThreadResumeResponse {
thread: codex_app_server_protocol::Thread {
@@ -1135,9 +1156,44 @@ mod tests {
reasoning_effort: None,
};
let started =
started_thread_from_resume_response(&response).expect("resume response should map");
let started = started_thread_from_resume_response(response.clone(), &config)
.await
.expect("resume response should map");
assert_eq!(started.turns.len(), 1);
assert_eq!(started.turns[0], response.thread.turns[0]);
}
#[tokio::test]
async fn session_configured_populates_history_metadata() {
let temp_dir = tempfile::tempdir().expect("tempdir");
let config = build_config(&temp_dir).await;
let thread_id = ThreadId::new();
message_history::append_entry("older", &thread_id, &config)
.await
.expect("history append should succeed");
message_history::append_entry("newer", &thread_id, &config)
.await
.expect("history append should succeed");
let session = thread_session_state_from_thread_response(
&thread_id.to_string(),
Some("restore".to_string()),
None,
"gpt-5.4".to_string(),
"openai".to_string(),
None,
AskForApproval::Never,
codex_protocol::config_types::ApprovalsReviewer::User,
SandboxPolicy::new_read_only_policy(),
PathBuf::from("/tmp/project"),
None,
&config,
)
.await
.expect("session should map");
assert_ne!(session.history_log_id, 0);
assert_eq!(session.history_entry_count, 2);
}
}

View File

@@ -740,7 +740,6 @@ impl ChatComposer {
/// composer rehydrates the entry immediately. This path intentionally routes through
/// [`Self::apply_history_entry`] so cursor placement remains aligned with keyboard history
/// recall semantics.
#[cfg(test)]
pub(crate) fn on_history_entry_response(
&mut self,
log_id: u64,

View File

@@ -4,10 +4,9 @@ use std::path::PathBuf;
use crate::app_event::AppEvent;
use crate::app_event_sender::AppEventSender;
use crate::bottom_pane::MentionBinding;
use crate::history_cell;
use crate::mention_codec::decode_history_mentions;
use codex_protocol::protocol::Op;
use codex_protocol::user_input::TextElement;
use tracing::warn;
/// A composer history entry that can rehydrate draft state.
#[derive(Debug, Clone, PartialEq)]
@@ -237,7 +236,6 @@ impl ChatComposerHistory {
}
/// Integrate a GetHistoryEntryResponse event.
#[cfg(test)]
pub fn on_entry_response(
&mut self,
log_id: u64,
@@ -280,16 +278,10 @@ impl ChatComposerHistory {
self.last_history_text = Some(entry.text.clone());
return Some(entry);
} else if let Some(log_id) = self.history_log_id {
warn!(
app_event_tx.send(AppEvent::CodexOp(Op::GetHistoryEntryRequest {
offset: global_idx,
log_id,
offset = global_idx,
"composer history fetch is unavailable in app-server TUI"
);
app_event_tx.send(AppEvent::InsertHistoryCell(Box::new(
history_cell::new_error_event(
"Composer history fetch: Not available in app-server TUI yet.".to_string(),
),
)));
}));
}
None
}
@@ -344,17 +336,18 @@ mod tests {
assert!(history.should_handle_navigation("", 0));
assert!(history.navigate_up(&tx).is_none()); // don't replace the text yet
// Verify that the app-server TUI emits an explicit user-facing stub error instead.
// Verify that a history lookup request was sent.
let event = rx.try_recv().expect("expected AppEvent to be sent");
let AppEvent::InsertHistoryCell(cell) = event else {
let AppEvent::CodexOp(op) = event else {
panic!("unexpected event variant");
};
let rendered = cell
.display_lines(80)
.into_iter()
.map(|line| line.to_string())
.collect::<String>();
assert!(rendered.contains("Composer history fetch: Not available in app-server TUI yet."));
assert_eq!(
Op::GetHistoryEntryRequest {
log_id: 1,
offset: 2,
},
op
);
// Inject the async response.
assert_eq!(
@@ -365,17 +358,18 @@ mod tests {
// Next Up should move to offset 1.
assert!(history.navigate_up(&tx).is_none()); // don't replace the text yet
// Verify second stub error for offset 1.
// Verify second lookup request for offset 1.
let event2 = rx.try_recv().expect("expected second event");
let AppEvent::InsertHistoryCell(cell) = event2 else {
let AppEvent::CodexOp(op) = event2 else {
panic!("unexpected event variant");
};
let rendered = cell
.display_lines(80)
.into_iter()
.map(|line| line.to_string())
.collect::<String>();
assert!(rendered.contains("Composer history fetch: Not available in app-server TUI yet."));
assert_eq!(
Op::GetHistoryEntryRequest {
log_id: 1,
offset: 1,
},
op
);
assert_eq!(
Some(HistoryEntry::new("older".to_string())),

View File

@@ -1073,7 +1073,6 @@ impl BottomPane {
|| self.composer.is_in_paste_burst()
}
#[cfg(test)]
pub(crate) fn on_history_entry_response(
&mut self,
log_id: u64,

View File

@@ -46,6 +46,8 @@ use crate::audio_device::list_realtime_audio_device_names;
use crate::bottom_pane::StatusLineItem;
use crate::bottom_pane::StatusLinePreviewData;
use crate::bottom_pane::StatusLineSetupView;
use crate::mention_codec::LinkedMention;
use crate::mention_codec::encode_history_mentions;
use crate::model_catalog::ModelCatalog;
use crate::multi_agents;
use crate::status::RateLimitWindowDisplay;
@@ -3474,8 +3476,7 @@ impl ChatWidget {
}
}
#[cfg(test)]
fn on_get_history_entry_response(
pub(crate) fn handle_history_entry_response(
&mut self,
event: codex_protocol::protocol::GetHistoryEntryResponseEvent,
) {
@@ -5316,9 +5317,19 @@ impl ChatWidget {
return;
}
// Persist the text to cross-session message history.
// Persist the text to cross-session message history. Mentions are
// encoded into placeholder syntax so recall can reconstruct the
// mention bindings in a future session.
if !text.is_empty() {
warn!("skipping composer history persistence in app-server TUI");
let encoded_mentions = mention_bindings
.iter()
.map(|binding| LinkedMention {
mention: binding.mention.clone(),
path: binding.path.clone(),
})
.collect::<Vec<_>>();
let history_text = encode_history_mentions(&text, &encoded_mentions);
self.submit_op(Op::AddToHistory { text: history_text });
}
if let Some(pending_steer) = pending_steer {
@@ -6440,7 +6451,7 @@ impl ChatWidget {
EventMsg::McpToolCallEnd(ev) => self.on_mcp_tool_call_end(ev),
EventMsg::WebSearchBegin(ev) => self.on_web_search_begin(ev),
EventMsg::WebSearchEnd(ev) => self.on_web_search_end(ev),
EventMsg::GetHistoryEntryResponse(ev) => self.on_get_history_entry_response(ev),
EventMsg::GetHistoryEntryResponse(ev) => self.handle_history_entry_response(ev),
EventMsg::McpListToolsResponse(ev) => self.on_list_mcp_tools(ev),
EventMsg::ListCustomPromptsResponse(_) => {
tracing::warn!(

View File

@@ -19,13 +19,13 @@ installs the pinned runtime package automatically.
## Quickstart
```python
from codex_app_server import Codex, TextInput
from codex_app_server import Codex
with Codex() as codex:
thread = codex.thread_start(model="gpt-5")
completed_turn = thread.turn(TextInput("Say hello in one sentence.")).run()
print(completed_turn.status)
print(completed_turn.id)
result = thread.run("Say hello in one sentence.")
print(result.final_response)
print(len(result.items))
```
## Docs map
@@ -95,4 +95,6 @@ This supports the CI release flow:
- `Codex()` is eager and performs startup + `initialize` in the constructor.
- Use context managers (`with Codex() as codex:`) to ensure shutdown.
- Prefer `thread.run("...")` for the common case. Use `thread.turn(...)` when
you need streaming, steering, or interrupt control.
- For transient overload, use `codex_app_server.retry.retry_on_overload`.

View File

@@ -2,7 +2,7 @@
Public surface of `codex_app_server` for app-server v2.
This SDK surface is experimental. The current implementation intentionally allows only one active `TurnHandle.stream()` or `TurnHandle.run()` consumer per client instance at a time.
This SDK surface is experimental. The current implementation intentionally allows only one active turn consumer (`Thread.run()`, `TurnHandle.stream()`, or `TurnHandle.run()`) per client instance at a time.
## Package Entry
@@ -10,6 +10,7 @@ This SDK surface is experimental. The current implementation intentionally allow
from codex_app_server import (
Codex,
AsyncCodex,
RunResult,
Thread,
AsyncThread,
TurnHandle,
@@ -24,7 +25,7 @@ from codex_app_server import (
MentionInput,
TurnStatus,
)
from codex_app_server.generated.v2_all import ThreadItem
from codex_app_server.generated.v2_all import ThreadItem, ThreadTokenUsage
```
- Version: `codex_app_server.__version__`
@@ -97,6 +98,7 @@ async with AsyncCodex() as codex:
### Thread
- `run(input: str | Input, *, approval_policy=None, approvals_reviewer=None, cwd=None, effort=None, model=None, output_schema=None, personality=None, sandbox_policy=None, service_tier=None, summary=None) -> RunResult`
- `turn(input: Input, *, approval_policy=None, cwd=None, effort=None, model=None, output_schema=None, personality=None, sandbox_policy=None, summary=None) -> TurnHandle`
- `read(*, include_turns: bool = False) -> ThreadReadResponse`
- `set_name(name: str) -> ThreadSetNameResponse`
@@ -104,11 +106,23 @@ async with AsyncCodex() as codex:
### AsyncThread
- `run(input: str | Input, *, approval_policy=None, approvals_reviewer=None, cwd=None, effort=None, model=None, output_schema=None, personality=None, sandbox_policy=None, service_tier=None, summary=None) -> Awaitable[RunResult]`
- `turn(input: Input, *, approval_policy=None, cwd=None, effort=None, model=None, output_schema=None, personality=None, sandbox_policy=None, summary=None) -> Awaitable[AsyncTurnHandle]`
- `read(*, include_turns: bool = False) -> Awaitable[ThreadReadResponse]`
- `set_name(name: str) -> Awaitable[ThreadSetNameResponse]`
- `compact() -> Awaitable[ThreadCompactStartResponse]`
`run(...)` is the common-case convenience path. It accepts plain strings, starts
the turn, consumes notifications until completion, and returns a small result
object with:
- `final_response: str`
- `items: list[ThreadItem]`
- `usage: ThreadTokenUsage | None`
Use `turn(...)` when you need low-level turn control (`stream()`, `steer()`,
`interrupt()`) or the canonical generated `Turn` from `TurnHandle.run()`.
## TurnHandle / AsyncTurnHandle
### TurnHandle
@@ -181,10 +195,10 @@ from codex_app_server import (
## Example
```python
from codex_app_server import Codex, TextInput
from codex_app_server import Codex
with Codex() as codex:
thread = codex.thread_start(model="gpt-5.4", config={"model_reasoning_effort": "high"})
completed_turn = thread.turn(TextInput("Say hello in one sentence.")).run()
print(completed_turn.id, completed_turn.status)
result = thread.run("Say hello in one sentence.")
print(result.final_response)
```

View File

@@ -22,41 +22,41 @@ Requirements:
## 2) Run your first turn (sync)
```python
from codex_app_server import Codex, TextInput
from codex_app_server import Codex
with Codex() as codex:
server = codex.metadata.serverInfo
print("Server:", None if server is None else server.name, None if server is None else server.version)
thread = codex.thread_start(model="gpt-5.4", config={"model_reasoning_effort": "high"})
completed_turn = thread.turn(TextInput("Say hello in one sentence.")).run()
result = thread.run("Say hello in one sentence.")
print("Thread:", thread.id)
print("Turn:", completed_turn.id)
print("Status:", completed_turn.status)
print("Items:", len(completed_turn.items or []))
print("Text:", result.final_response)
print("Items:", len(result.items))
```
What happened:
- `Codex()` started and initialized `codex app-server`.
- `thread_start(...)` created a thread.
- `turn(...).run()` consumed events until `turn/completed` and returned the canonical generated app-server `Turn` model.
- one client can have only one active `TurnHandle.stream()` / `TurnHandle.run()` consumer at a time in the current experimental build
- `thread.run("...")` started a turn, consumed events until completion, and returned the final assistant response plus collected items and usage.
- use `thread.turn(...)` when you need a `TurnHandle` for streaming, steering, interrupting, or turn IDs/status
- one client can have only one active turn consumer (`thread.run(...)`, `TurnHandle.stream()`, or `TurnHandle.run()`) at a time in the current experimental build
## 3) Continue the same thread (multi-turn)
```python
from codex_app_server import Codex, TextInput
from codex_app_server import Codex
with Codex() as codex:
thread = codex.thread_start(model="gpt-5.4", config={"model_reasoning_effort": "high"})
first = thread.turn(TextInput("Summarize Rust ownership in 2 bullets.")).run()
second = thread.turn(TextInput("Now explain it to a Python developer.")).run()
first = thread.run("Summarize Rust ownership in 2 bullets.")
second = thread.run("Now explain it to a Python developer.")
print("first:", first.id, first.status)
print("second:", second.id, second.status)
print("first:", first.final_response)
print("second:", second.final_response)
```
## 4) Async parity
@@ -66,15 +66,14 @@ initializes lazily, and context entry makes startup/shutdown explicit.
```python
import asyncio
from codex_app_server import AsyncCodex, TextInput
from codex_app_server import AsyncCodex
async def main() -> None:
async with AsyncCodex() as codex:
thread = await codex.thread_start(model="gpt-5.4", config={"model_reasoning_effort": "high"})
turn = await thread.turn(TextInput("Continue where we left off."))
completed_turn = await turn.run()
print(completed_turn.id, completed_turn.status)
result = await thread.run("Continue where we left off.")
print(result.final_response)
asyncio.run(main())
@@ -83,14 +82,14 @@ asyncio.run(main())
## 5) Resume an existing thread
```python
from codex_app_server import Codex, TextInput
from codex_app_server import Codex
THREAD_ID = "thr_123" # replace with a real id
with Codex() as codex:
thread = codex.thread_resume(THREAD_ID)
completed_turn = thread.turn(TextInput("Continue where we left off.")).run()
print(completed_turn.id, completed_turn.status)
result = thread.run("Continue where we left off.")
print(result.final_response)
```
## 6) Generated models

View File

@@ -6,9 +6,7 @@ if str(_EXAMPLES_ROOT) not in sys.path:
sys.path.insert(0, str(_EXAMPLES_ROOT))
from _bootstrap import (
assistant_text_from_turn,
ensure_local_sdk_src,
find_turn_by_id,
runtime_config,
server_label,
)
@@ -17,7 +15,7 @@ ensure_local_sdk_src()
import asyncio
from codex_app_server import AsyncCodex, TextInput
from codex_app_server import AsyncCodex
async def main() -> None:
@@ -25,13 +23,9 @@ async def main() -> None:
print("Server:", server_label(codex.metadata))
thread = await codex.thread_start(model="gpt-5.4", config={"model_reasoning_effort": "high"})
turn = await thread.turn(TextInput("Say hello in one sentence."))
result = await turn.run()
persisted = await thread.read(include_turns=True)
persisted_turn = find_turn_by_id(persisted.thread.turns, result.id)
print("Status:", result.status)
print("Text:", assistant_text_from_turn(persisted_turn))
result = await thread.run("Say hello in one sentence.")
print("Items:", len(result.items))
print("Text:", result.final_response)
if __name__ == "__main__":

View File

@@ -6,23 +6,19 @@ if str(_EXAMPLES_ROOT) not in sys.path:
sys.path.insert(0, str(_EXAMPLES_ROOT))
from _bootstrap import (
assistant_text_from_turn,
ensure_local_sdk_src,
find_turn_by_id,
runtime_config,
server_label,
)
ensure_local_sdk_src()
from codex_app_server import Codex, TextInput
from codex_app_server import Codex
with Codex(config=runtime_config()) as codex:
print("Server:", server_label(codex.metadata))
thread = codex.thread_start(model="gpt-5.4", config={"model_reasoning_effort": "high"})
result = thread.turn(TextInput("Say hello in one sentence.")).run()
persisted = thread.read(include_turns=True)
persisted_turn = find_turn_by_id(persisted.thread.turns, result.id)
print("Status:", result.status)
print("Text:", assistant_text_from_turn(persisted_turn))
result = thread.run("Say hello in one sentence.")
print("Items:", len(result.items))
print("Text:", result.final_response)

View File

@@ -47,6 +47,7 @@ from .api import (
InputItem,
LocalImageInput,
MentionInput,
RunResult,
SkillInput,
TextInput,
Thread,
@@ -68,6 +69,7 @@ __all__ = [
"TurnHandle",
"AsyncTurnHandle",
"InitializeResponse",
"RunResult",
"Input",
"InputItem",
"TextInput",

View File

@@ -7,7 +7,9 @@ from typing import AsyncIterator, Iterator
from .async_client import AsyncAppServerClient
from .client import AppServerClient, AppServerConfig
from .generated.v2_all import (
ApprovalsReviewer,
AskForApproval,
ItemCompletedNotification,
ModelListResponse,
Personality,
ReasoningEffort,
@@ -27,10 +29,13 @@ from .generated.v2_all import (
ThreadSortKey,
ThreadSourceKind,
ThreadStartParams,
ThreadTokenUsage,
ThreadTokenUsageUpdatedNotification,
Turn as AppServerTurn,
TurnCompletedNotification,
TurnInterruptResponse,
TurnStartParams,
TurnStatus,
TurnSteerResponse,
)
from .models import InitializeResponse, JsonObject, Notification, ServerInfo
@@ -65,6 +70,14 @@ class MentionInput:
InputItem = TextInput | ImageInput | LocalImageInput | SkillInput | MentionInput
Input = list[InputItem] | InputItem
RunInput = Input | str
@dataclass(slots=True)
class RunResult:
final_response: str
items: list[ThreadItem]
usage: ThreadTokenUsage | None
def _to_wire_item(item: InputItem) -> JsonObject:
@@ -87,6 +100,108 @@ def _to_wire_input(input: Input) -> list[JsonObject]:
return [_to_wire_item(input)]
def _normalize_run_input(input: RunInput) -> Input:
if isinstance(input, str):
return TextInput(input)
return input
def _assistant_text_from_item(item: ThreadItem) -> str | None:
raw_item = item.model_dump(mode="json") if hasattr(item, "model_dump") else item
if not isinstance(raw_item, dict):
return None
item_type = raw_item.get("type")
if item_type == "agentMessage":
text = raw_item.get("text")
return text if isinstance(text, str) and text else None
if item_type != "message" or raw_item.get("role") != "assistant":
return None
chunks: list[str] = []
for content in raw_item.get("content") or []:
if not isinstance(content, dict) or content.get("type") != "output_text":
continue
text = content.get("text")
if isinstance(text, str) and text:
chunks.append(text)
return "".join(chunks) or None
def _final_assistant_response_from_items(items: list[ThreadItem]) -> str:
final_response = ""
for item in items:
item_text = _assistant_text_from_item(item)
if item_text is not None:
final_response = item_text
return final_response
def _raise_for_failed_turn(turn: AppServerTurn) -> None:
if turn.status != TurnStatus.failed:
return
if turn.error is not None and turn.error.message:
raise RuntimeError(turn.error.message)
raise RuntimeError(f"turn failed with status {turn.status.value}")
def _collect_run_result(stream: Iterator[Notification], *, turn_id: str) -> RunResult:
completed: TurnCompletedNotification | None = None
items: list[ThreadItem] = []
usage: ThreadTokenUsage | None = None
for event in stream:
payload = event.payload
if isinstance(payload, ItemCompletedNotification) and payload.turn_id == turn_id:
items.append(payload.item)
continue
if isinstance(payload, ThreadTokenUsageUpdatedNotification) and payload.turn_id == turn_id:
usage = payload.token_usage
continue
if isinstance(payload, TurnCompletedNotification) and payload.turn.id == turn_id:
completed = payload
if completed is None:
raise RuntimeError("turn completed event not received")
_raise_for_failed_turn(completed.turn)
return RunResult(
final_response=_final_assistant_response_from_items(items),
items=items,
usage=usage,
)
async def _collect_async_run_result(
stream: AsyncIterator[Notification], *, turn_id: str
) -> RunResult:
completed: TurnCompletedNotification | None = None
items: list[ThreadItem] = []
usage: ThreadTokenUsage | None = None
async for event in stream:
payload = event.payload
if isinstance(payload, ItemCompletedNotification) and payload.turn_id == turn_id:
items.append(payload.item)
continue
if isinstance(payload, ThreadTokenUsageUpdatedNotification) and payload.turn_id == turn_id:
usage = payload.token_usage
continue
if isinstance(payload, TurnCompletedNotification) and payload.turn.id == turn_id:
completed = payload
if completed is None:
raise RuntimeError("turn completed event not received")
_raise_for_failed_turn(completed.turn)
return RunResult(
final_response=_final_assistant_response_from_items(items),
items=items,
usage=usage,
)
def _split_user_agent(user_agent: str) -> tuple[str | None, str | None]:
raw = user_agent.strip()
if not raw:
@@ -503,6 +618,40 @@ class Thread:
_client: AppServerClient
id: str
def run(
self,
input: RunInput,
*,
approval_policy: AskForApproval | None = None,
approvals_reviewer: ApprovalsReviewer | None = None,
cwd: str | None = None,
effort: ReasoningEffort | None = None,
model: str | None = None,
output_schema: JsonObject | None = None,
personality: Personality | None = None,
sandbox_policy: SandboxPolicy | None = None,
service_tier: ServiceTier | None = None,
summary: ReasoningSummary | None = None,
) -> RunResult:
turn = self.turn(
_normalize_run_input(input),
approval_policy=approval_policy,
approvals_reviewer=approvals_reviewer,
cwd=cwd,
effort=effort,
model=model,
output_schema=output_schema,
personality=personality,
sandbox_policy=sandbox_policy,
service_tier=service_tier,
summary=summary,
)
stream = turn.stream()
try:
return _collect_run_result(stream, turn_id=turn.id)
finally:
stream.close()
# BEGIN GENERATED: Thread.flat_methods
def turn(
self,
@@ -553,6 +702,40 @@ class AsyncThread:
_codex: AsyncCodex
id: str
async def run(
self,
input: RunInput,
*,
approval_policy: AskForApproval | None = None,
approvals_reviewer: ApprovalsReviewer | None = None,
cwd: str | None = None,
effort: ReasoningEffort | None = None,
model: str | None = None,
output_schema: JsonObject | None = None,
personality: Personality | None = None,
sandbox_policy: SandboxPolicy | None = None,
service_tier: ServiceTier | None = None,
summary: ReasoningSummary | None = None,
) -> RunResult:
turn = await self.turn(
_normalize_run_input(input),
approval_policy=approval_policy,
approvals_reviewer=approvals_reviewer,
cwd=cwd,
effort=effort,
model=model,
output_schema=output_schema,
personality=personality,
sandbox_policy=sandbox_policy,
service_tier=service_tier,
summary=summary,
)
stream = turn.stream()
try:
return await _collect_async_run_result(stream, turn_id=turn.id)
finally:
await stream.aclose()
# BEGIN GENERATED: AsyncThread.flat_methods
async def turn(
self,

View File

@@ -3,6 +3,7 @@ from __future__ import annotations
import asyncio
from collections import deque
from pathlib import Path
from types import SimpleNamespace
import pytest
@@ -10,14 +11,19 @@ import codex_app_server.api as public_api_module
from codex_app_server.client import AppServerClient
from codex_app_server.generated.v2_all import (
AgentMessageDeltaNotification,
ItemCompletedNotification,
ThreadTokenUsageUpdatedNotification,
TurnCompletedNotification,
TurnStatus,
)
from codex_app_server.models import InitializeResponse, Notification
from codex_app_server.api import (
AsyncCodex,
AsyncThread,
AsyncTurnHandle,
Codex,
RunResult,
Thread,
TurnHandle,
)
@@ -48,16 +54,74 @@ def _completed_notification(
thread_id: str = "thread-1",
turn_id: str = "turn-1",
status: str = "completed",
error_message: str | None = None,
) -> Notification:
turn: dict[str, object] = {
"id": turn_id,
"items": [],
"status": status,
}
if error_message is not None:
turn["error"] = {"message": error_message}
return Notification(
method="turn/completed",
payload=TurnCompletedNotification.model_validate(
{
"threadId": thread_id,
"turn": {
"id": turn_id,
"items": [],
"status": status,
"turn": turn,
}
),
)
def _item_completed_notification(
*,
thread_id: str = "thread-1",
turn_id: str = "turn-1",
text: str = "final text",
) -> Notification:
return Notification(
method="item/completed",
payload=ItemCompletedNotification.model_validate(
{
"item": {
"id": "item-1",
"text": text,
"type": "agentMessage",
},
"threadId": thread_id,
"turnId": turn_id,
}
),
)
def _token_usage_notification(
*,
thread_id: str = "thread-1",
turn_id: str = "turn-1",
) -> Notification:
return Notification(
method="thread/tokenUsage/updated",
payload=ThreadTokenUsageUpdatedNotification.model_validate(
{
"threadId": thread_id,
"turnId": turn_id,
"tokenUsage": {
"last": {
"cachedInputTokens": 1,
"inputTokens": 2,
"outputTokens": 3,
"reasoningOutputTokens": 4,
"totalTokens": 9,
},
"total": {
"cachedInputTokens": 5,
"inputTokens": 6,
"outputTokens": 7,
"reasoningOutputTokens": 8,
"totalTokens": 26,
},
},
}
),
@@ -225,6 +289,162 @@ def test_turn_run_returns_completed_turn_payload() -> None:
assert result.items == []
def test_thread_run_accepts_string_input_and_returns_run_result() -> None:
client = AppServerClient()
item_notification = _item_completed_notification(text="Hello.")
usage_notification = _token_usage_notification()
notifications: deque[Notification] = deque(
[
item_notification,
usage_notification,
_completed_notification(),
]
)
client.next_notification = notifications.popleft # type: ignore[method-assign]
seen: dict[str, object] = {}
def fake_turn_start(thread_id: str, wire_input: object, *, params=None): # noqa: ANN001,ANN202
seen["thread_id"] = thread_id
seen["wire_input"] = wire_input
seen["params"] = params
return SimpleNamespace(turn=SimpleNamespace(id="turn-1"))
client.turn_start = fake_turn_start # type: ignore[method-assign]
result = Thread(client, "thread-1").run("hello")
assert seen["thread_id"] == "thread-1"
assert seen["wire_input"] == [{"type": "text", "text": "hello"}]
assert result == RunResult(
final_response="Hello.",
items=[item_notification.payload.item],
usage=usage_notification.payload.token_usage,
)
def test_thread_run_uses_last_completed_assistant_message_as_final_response() -> None:
client = AppServerClient()
first_item_notification = _item_completed_notification(text="First message")
second_item_notification = _item_completed_notification(text="Second message")
notifications: deque[Notification] = deque(
[
first_item_notification,
second_item_notification,
_completed_notification(),
]
)
client.next_notification = notifications.popleft # type: ignore[method-assign]
client.turn_start = lambda thread_id, wire_input, *, params=None: SimpleNamespace( # noqa: ARG005,E731
turn=SimpleNamespace(id="turn-1")
)
result = Thread(client, "thread-1").run("hello")
assert result.final_response == "Second message"
assert result.items == [
first_item_notification.payload.item,
second_item_notification.payload.item,
]
def test_thread_run_raises_on_failed_turn() -> None:
client = AppServerClient()
notifications: deque[Notification] = deque(
[
_completed_notification(status="failed", error_message="boom"),
]
)
client.next_notification = notifications.popleft # type: ignore[method-assign]
client.turn_start = lambda thread_id, wire_input, *, params=None: SimpleNamespace( # noqa: ARG005,E731
turn=SimpleNamespace(id="turn-1")
)
with pytest.raises(RuntimeError, match="boom"):
Thread(client, "thread-1").run("hello")
def test_async_thread_run_accepts_string_input_and_returns_run_result() -> None:
async def scenario() -> None:
codex = AsyncCodex()
async def fake_ensure_initialized() -> None:
return None
item_notification = _item_completed_notification(text="Hello async.")
usage_notification = _token_usage_notification()
notifications: deque[Notification] = deque(
[
item_notification,
usage_notification,
_completed_notification(),
]
)
seen: dict[str, object] = {}
async def fake_turn_start(thread_id: str, wire_input: object, *, params=None): # noqa: ANN001,ANN202
seen["thread_id"] = thread_id
seen["wire_input"] = wire_input
seen["params"] = params
return SimpleNamespace(turn=SimpleNamespace(id="turn-1"))
async def fake_next_notification() -> Notification:
return notifications.popleft()
codex._ensure_initialized = fake_ensure_initialized # type: ignore[method-assign]
codex._client.turn_start = fake_turn_start # type: ignore[method-assign]
codex._client.next_notification = fake_next_notification # type: ignore[method-assign]
result = await AsyncThread(codex, "thread-1").run("hello")
assert seen["thread_id"] == "thread-1"
assert seen["wire_input"] == [{"type": "text", "text": "hello"}]
assert result == RunResult(
final_response="Hello async.",
items=[item_notification.payload.item],
usage=usage_notification.payload.token_usage,
)
asyncio.run(scenario())
def test_async_thread_run_uses_last_completed_assistant_message_as_final_response() -> None:
async def scenario() -> None:
codex = AsyncCodex()
async def fake_ensure_initialized() -> None:
return None
first_item_notification = _item_completed_notification(text="First async message")
second_item_notification = _item_completed_notification(text="Second async message")
notifications: deque[Notification] = deque(
[
first_item_notification,
second_item_notification,
_completed_notification(),
]
)
async def fake_turn_start(thread_id: str, wire_input: object, *, params=None): # noqa: ANN001,ANN202,ARG001
return SimpleNamespace(turn=SimpleNamespace(id="turn-1"))
async def fake_next_notification() -> Notification:
return notifications.popleft()
codex._ensure_initialized = fake_ensure_initialized # type: ignore[method-assign]
codex._client.turn_start = fake_turn_start # type: ignore[method-assign]
codex._client.next_notification = fake_next_notification # type: ignore[method-assign]
result = await AsyncThread(codex, "thread-1").run("hello")
assert result.final_response == "Second async message"
assert result.items == [
first_item_notification.payload.item,
second_item_notification.payload.item,
]
asyncio.run(scenario())
def test_retry_examples_compare_status_with_enum() -> None:
for path in (
ROOT / "examples" / "10_error_handling_and_retry" / "sync.py",

View File

@@ -4,7 +4,7 @@ import importlib.resources as resources
import inspect
from typing import Any
from codex_app_server import AppServerConfig
from codex_app_server import AppServerConfig, RunResult
from codex_app_server.models import InitializeResponse
from codex_app_server.api import AsyncCodex, AsyncThread, Codex, Thread
@@ -31,6 +31,10 @@ def test_root_exports_app_server_config() -> None:
assert AppServerConfig.__name__ == "AppServerConfig"
def test_root_exports_run_result() -> None:
assert RunResult.__name__ == "RunResult"
def test_package_includes_py_typed_marker() -> None:
marker = resources.files("codex_app_server").joinpath("py.typed")
assert marker.is_file()
@@ -101,6 +105,18 @@ def test_generated_public_signatures_are_snake_case_and_typed() -> None:
"service_tier",
"summary",
],
Thread.run: [
"approval_policy",
"approvals_reviewer",
"cwd",
"effort",
"model",
"output_schema",
"personality",
"sandbox_policy",
"service_tier",
"summary",
],
AsyncCodex.thread_start: [
"approval_policy",
"approvals_reviewer",
@@ -164,6 +180,18 @@ def test_generated_public_signatures_are_snake_case_and_typed() -> None:
"service_tier",
"summary",
],
AsyncThread.run: [
"approval_policy",
"approvals_reviewer",
"cwd",
"effort",
"model",
"output_schema",
"personality",
"sandbox_policy",
"service_tier",
"summary",
],
}
for fn, expected_kwargs in expected.items():

View File

@@ -265,6 +265,36 @@ def test_real_thread_and_turn_start_smoke(runtime_env: PreparedRuntimeEnv) -> No
assert isinstance(data["persisted_items_count"], int)
def test_real_thread_run_convenience_smoke(runtime_env: PreparedRuntimeEnv) -> None:
data = _run_json_python(
runtime_env,
textwrap.dedent(
"""
import json
from codex_app_server import Codex
with Codex() as codex:
thread = codex.thread_start(
model="gpt-5.4",
config={"model_reasoning_effort": "high"},
)
result = thread.run("say ok")
print(json.dumps({
"thread_id": thread.id,
"final_response": result.final_response,
"items_count": len(result.items),
"has_usage": result.usage is not None,
}))
"""
),
)
assert isinstance(data["thread_id"], str) and data["thread_id"].strip()
assert isinstance(data["final_response"], str) and data["final_response"].strip()
assert isinstance(data["items_count"], int)
assert isinstance(data["has_usage"], bool)
def test_real_async_thread_turn_usage_and_ids_smoke(
runtime_env: PreparedRuntimeEnv,
) -> None:
@@ -308,6 +338,42 @@ def test_real_async_thread_turn_usage_and_ids_smoke(
assert isinstance(data["persisted_items_count"], int)
def test_real_async_thread_run_convenience_smoke(
runtime_env: PreparedRuntimeEnv,
) -> None:
data = _run_json_python(
runtime_env,
textwrap.dedent(
"""
import asyncio
import json
from codex_app_server import AsyncCodex
async def main():
async with AsyncCodex() as codex:
thread = await codex.thread_start(
model="gpt-5.4",
config={"model_reasoning_effort": "high"},
)
result = await thread.run("say ok")
print(json.dumps({
"thread_id": thread.id,
"final_response": result.final_response,
"items_count": len(result.items),
"has_usage": result.usage is not None,
}))
asyncio.run(main())
"""
),
)
assert isinstance(data["thread_id"], str) and data["thread_id"].strip()
assert isinstance(data["final_response"], str) and data["final_response"].strip()
assert isinstance(data["items_count"], int)
assert isinstance(data["has_usage"], bool)
def test_notebook_bootstrap_resolves_sdk_and_runtime_from_unrelated_cwd(
runtime_env: PreparedRuntimeEnv,
) -> None: