python-sdk: fix thread.run final response semantics

Merge branch 'main' into dev/shaqayeq/python-sdk-thread-run
python-sdk: update quickstart examples for thread.run
2026-03-18 20:53:55 +00:00 · 2026-03-18 13:51:05 -07:00 · 2026-03-18 13:28:48 -07:00 · 2026-03-18 13:22:38 -07:00 · 2026-03-18 13:21:43 -07:00 · 2026-03-18 11:54:11 -06:00
19 changed files with 892 additions and 149 deletions
--- a/codex-rs/core/src/lib.rs
+++ b/codex-rs/core/src/lib.rs
@@ -63,7 +63,7 @@ mod mcp_tool_call;
 mod memories;
 pub mod mention_syntax;
 mod mentions;
-mod message_history;
+pub mod message_history;
 mod model_provider_info;
 pub mod path_utils;
 pub mod personality_migration;
--- a/codex-rs/core/src/message_history.rs
+++ b/codex-rs/core/src/message_history.rs
@@ -66,14 +66,22 @@ fn history_filepath(config: &Config) -> PathBuf {
    path
 }

-/// Append a `text` entry associated with `conversation_id` to the history file. Uses
-/// advisory file locking to ensure that concurrent writes do not interleave,
-/// which entails a small amount of blocking I/O internally.
-pub(crate) async fn append_entry(
-    text: &str,
-    conversation_id: &ThreadId,
-    config: &Config,
-) -> Result<()> {
+/// Append a `text` entry associated with `conversation_id` to the history file.
+///
+/// Uses advisory file locking (`File::try_lock`) with a retry loop to ensure
+/// concurrent writes from multiple TUI processes do not interleave. The lock
+/// acquisition and write are performed inside `spawn_blocking` so the caller's
+/// async runtime is not blocked.
+///
+/// The entry is silently skipped when `config.history.persistence` is
+/// [`HistoryPersistence::None`].
+///
+/// # Errors
+///
+/// Returns an I/O error if the history file cannot be opened/created, the
+/// system clock is before the Unix epoch, or the exclusive lock cannot be
+/// acquired after [`MAX_RETRIES`] attempts.
+pub async fn append_entry(text: &str, conversation_id: &ThreadId, config: &Config) -> Result<()> {
    match config.history.persistence {
        HistoryPersistence::SaveAll => {
            // Save everything: proceed.
@@ -243,22 +251,29 @@ fn trim_target_bytes(max_bytes: u64, newest_entry_len: u64) -> u64 {
    soft_cap_bytes.max(newest_entry_len)
 }

-/// Asynchronously fetch the history file's *identifier* (inode on Unix) and
-/// the current number of entries by counting newline characters.
-pub(crate) async fn history_metadata(config: &Config) -> (u64, usize) {
+/// Asynchronously fetch the history file's *identifier* and current entry count.
+///
+/// The identifier is the file's inode on Unix or creation time on Windows.
+/// The entry count is derived by counting newline bytes in the file. Returns
+/// `(0, 0)` when the file does not exist or its metadata cannot be read. If
+/// metadata succeeds but the file cannot be opened or scanned, returns
+/// `(log_id, 0)` so callers can still detect that a history file exists.
+pub async fn history_metadata(config: &Config) -> (u64, usize) {
    let path = history_filepath(config);
    history_metadata_for_file(&path).await
 }

-/// Given a `log_id` (on Unix this is the file's inode number,
-/// on Windows this is the file's creation time) and a zero-based
-/// `offset`, return the corresponding `HistoryEntry` if the identifier matches
-/// the current history file **and** the requested offset exists. Any I/O or
-/// parsing errors are logged and result in `None`.
+/// Look up a single history entry by file identity and zero-based offset.
 ///
-/// Note this function is not async because it uses a sync advisory file
-/// locking API.
-pub(crate) fn lookup(log_id: u64, offset: usize, config: &Config) -> Option<HistoryEntry> {
+/// Returns `Some(entry)` when the current history file's identifier (inode on
+/// Unix, creation time on Windows) matches `log_id` **and** a valid JSON
+/// record exists at `offset`. Returns `None` on any mismatch, I/O error, or
+/// parse failure, all of which are logged at `warn` level.
+///
+/// This function is synchronous because it acquires a shared advisory file lock
+/// via `File::try_lock_shared`. Callers on an async runtime should wrap it in
+/// `spawn_blocking`.
+pub fn lookup(log_id: u64, offset: usize, config: &Config) -> Option<HistoryEntry> {
    let path = history_filepath(config);
    lookup_history_entry(&path, log_id, offset)
 }
--- a/codex-rs/tui_app_server/src/app.rs
+++ b/codex-rs/tui_app_server/src/app.rs
@@ -69,6 +69,7 @@ use codex_core::config::types::ApprovalsReviewer;
 use codex_core::config::types::ModelAvailabilityNuxConfig;
 use codex_core::config_loader::ConfigLayerStackOrdering;
 use codex_core::features::Feature;
+use codex_core::message_history;
 use codex_core::models_manager::collaboration_mode_presets::CollaborationModesConfig;
 use codex_core::models_manager::model_presets::HIDE_GPT_5_1_CODEX_MAX_MIGRATION_PROMPT_CONFIG;
 use codex_core::models_manager::model_presets::HIDE_GPT5_1_MIGRATION_PROMPT_CONFIG;
@@ -86,10 +87,10 @@ use codex_protocol::openai_models::ModelUpgrade;
 use codex_protocol::openai_models::ReasoningEffort as ReasoningEffortConfig;
 use codex_protocol::protocol::AskForApproval;
 use codex_protocol::protocol::FinalOutput;
+use codex_protocol::protocol::GetHistoryEntryResponseEvent;
 use codex_protocol::protocol::ListSkillsResponseEvent;
 #[cfg(test)]
 use codex_protocol::protocol::McpAuthStatus;
-#[cfg(test)]
 use codex_protocol::protocol::Op;
 use codex_protocol::protocol::SandboxPolicy;
 use codex_protocol::protocol::SessionSource;
@@ -457,6 +458,7 @@ struct ThreadEventSnapshot {
 enum ThreadBufferedEvent {
    Notification(ServerNotification),
    Request(ServerRequest),
+    HistoryEntryResponse(GetHistoryEntryResponseEvent),
    LegacyWarning(String),
    LegacyRollback { num_turns: u32 },
 }
@@ -616,6 +618,7 @@ impl ThreadEventStore {
                        .pending_interactive_replay
                        .should_replay_snapshot_request(request),
                    ThreadBufferedEvent::Notification(_)
+                    | ThreadBufferedEvent::HistoryEntryResponse(_)
                    | ThreadBufferedEvent::LegacyWarning(_)
                    | ThreadBufferedEvent::LegacyRollback { .. } => true,
                })
@@ -1763,8 +1766,21 @@ impl App {
            return Ok(());
        };

+        self.submit_thread_op(app_server, thread_id, op).await
+    }
+
+    async fn submit_thread_op(
+        &mut self,
+        app_server: &mut AppServerSession,
+        thread_id: ThreadId,
+        op: AppCommand,
+    ) -> Result<()> {
        crate::session_log::log_outbound_op(&op);

+        if self.try_handle_local_history_op(thread_id, &op).await? {
+            return Ok(());
+        }
+
        if self
            .try_resolve_app_server_request(app_server, thread_id, &op)
            .await?
@@ -1777,7 +1793,7 @@ impl App {
            .await?
        {
            if ThreadEventStore::op_can_change_pending_replay_state(&op) {
-                self.note_active_thread_outbound_op(&op).await;
+                self.note_thread_outbound_op(thread_id, &op).await;
                self.refresh_pending_thread_approvals().await;
            }
            return Ok(());
@@ -1855,6 +1871,66 @@ impl App {
        }
    }

+    /// Intercept composer-history operations and handle them locally against
+    /// `$CODEX_HOME/history.jsonl`, bypassing the app-server RPC layer.
+    async fn try_handle_local_history_op(
+        &mut self,
+        thread_id: ThreadId,
+        op: &AppCommand,
+    ) -> Result<bool> {
+        match op.view() {
+            AppCommandView::Other(Op::AddToHistory { text }) => {
+                let text = text.clone();
+                let config = self.chat_widget.config_ref().clone();
+                tokio::spawn(async move {
+                    if let Err(err) =
+                        message_history::append_entry(&text, &thread_id, &config).await
+                    {
+                        tracing::warn!(
+                            thread_id = %thread_id,
+                            error = %err,
+                            "failed to append to message history"
+                        );
+                    }
+                });
+                Ok(true)
+            }
+            AppCommandView::Other(Op::GetHistoryEntryRequest { offset, log_id }) => {
+                let offset = *offset;
+                let log_id = *log_id;
+                let config = self.chat_widget.config_ref().clone();
+                let app_event_tx = self.app_event_tx.clone();
+                tokio::spawn(async move {
+                    let entry_opt = tokio::task::spawn_blocking(move || {
+                        message_history::lookup(log_id, offset, &config)
+                    })
+                    .await
+                    .unwrap_or_else(|err| {
+                        tracing::warn!(error = %err, "history lookup task failed");
+                        None
+                    });
+
+                    app_event_tx.send(AppEvent::ThreadHistoryEntryResponse {
+                        thread_id,
+                        event: GetHistoryEntryResponseEvent {
+                            offset,
+                            log_id,
+                            entry: entry_opt.map(|entry| {
+                                codex_protocol::message_history::HistoryEntry {
+                                    conversation_id: entry.session_id,
+                                    ts: entry.ts,
+                                    text: entry.text,
+                                }
+                            }),
+                        },
+                    });
+                });
+                Ok(true)
+            }
+            _ => Ok(false),
+        }
+    }
+
    async fn try_submit_active_thread_op_via_app_server(
        &mut self,
        app_server: &mut AppServerSession,
@@ -2213,6 +2289,50 @@ impl App {
        Ok(())
    }

+    async fn enqueue_thread_history_entry_response(
+        &mut self,
+        thread_id: ThreadId,
+        event: GetHistoryEntryResponseEvent,
+    ) -> Result<()> {
+        let (sender, store) = {
+            let channel = self.ensure_thread_channel(thread_id);
+            (channel.sender.clone(), Arc::clone(&channel.store))
+        };
+
+        let should_send = {
+            let mut guard = store.lock().await;
+            guard
+                .buffer
+                .push_back(ThreadBufferedEvent::HistoryEntryResponse(event.clone()));
+            if guard.buffer.len() > guard.capacity
+                && let Some(removed) = guard.buffer.pop_front()
+                && let ThreadBufferedEvent::Request(request) = &removed
+            {
+                guard
+                    .pending_interactive_replay
+                    .note_evicted_server_request(request);
+            }
+            guard.active
+        };
+
+        if should_send {
+            match sender.try_send(ThreadBufferedEvent::HistoryEntryResponse(event)) {
+                Ok(()) => {}
+                Err(TrySendError::Full(event)) => {
+                    tokio::spawn(async move {
+                        if let Err(err) = sender.send(event).await {
+                            tracing::warn!("thread {thread_id} event channel closed: {err}");
+                        }
+                    });
+                }
+                Err(TrySendError::Closed(_)) => {
+                    tracing::warn!("thread {thread_id} event channel closed");
+                }
+            }
+        }
+        Ok(())
+    }
+
    async fn enqueue_thread_legacy_rollback(
        &mut self,
        thread_id: ThreadId,
@@ -2304,6 +2424,10 @@ impl App {
                ThreadBufferedEvent::Request(request) => {
                    self.enqueue_thread_request(thread_id, request).await?;
                }
+                ThreadBufferedEvent::HistoryEntryResponse(event) => {
+                    self.enqueue_thread_history_entry_response(thread_id, event)
+                        .await?;
+                }
                ThreadBufferedEvent::LegacyWarning(message) => {
                    self.enqueue_thread_legacy_warning(thread_id, message)
                        .await?;
@@ -3465,22 +3589,12 @@ impl App {
                self.submit_active_thread_op(app_server, op.into()).await?;
            }
            AppEvent::SubmitThreadOp { thread_id, op } => {
-                let app_command: AppCommand = op.into();
-                if self
-                    .try_resolve_app_server_request(app_server, thread_id, &app_command)
-                    .await?
-                {
-                    return Ok(AppRunControl::Continue);
-                }
-                crate::session_log::log_outbound_op(&app_command);
-                tracing::error!(
-                    thread_id = %thread_id,
-                    op = ?app_command,
-                    "unexpected unresolved thread-scoped app command"
-                );
-                self.chat_widget.add_error_message(format!(
-                    "Thread-scoped request is no longer pending for thread {thread_id}."
-                ));
+                self.submit_thread_op(app_server, thread_id, op.into())
+                    .await?;
+            }
+            AppEvent::ThreadHistoryEntryResponse { thread_id, event } => {
+                self.enqueue_thread_history_entry_response(thread_id, event)
+                    .await?;
            }
            AppEvent::DiffResult(text) => {
                // Clear the in-progress state in the bottom pane
@@ -4639,6 +4753,9 @@ impl App {
                self.chat_widget
                    .handle_server_request(request, /*replay_kind*/ None);
            }
+            ThreadBufferedEvent::HistoryEntryResponse(event) => {
+                self.chat_widget.handle_history_entry_response(event);
+            }
            ThreadBufferedEvent::LegacyWarning(message) => {
                self.chat_widget.add_warning_message(message);
            }
@@ -4660,6 +4777,9 @@ impl App {
            ThreadBufferedEvent::Request(request) => self
                .chat_widget
                .handle_server_request(request, Some(ReplayKind::ThreadSnapshot)),
+            ThreadBufferedEvent::HistoryEntryResponse(event) => {
+                self.chat_widget.handle_history_entry_response(event)
+            }
            ThreadBufferedEvent::LegacyWarning(message) => {
                self.chat_widget.add_warning_message(message);
            }
@@ -5520,6 +5640,44 @@ mod tests {
            .expect("listener task drop notification should succeed");
    }

+    #[tokio::test]
+    async fn history_lookup_response_is_routed_to_requesting_thread() -> Result<()> {
+        let (mut app, mut app_event_rx, _op_rx) = make_test_app_with_channels().await;
+        let thread_id = ThreadId::new();
+
+        let handled = app
+            .try_handle_local_history_op(
+                thread_id,
+                &Op::GetHistoryEntryRequest {
+                    offset: 0,
+                    log_id: 1,
+                }
+                .into(),
+            )
+            .await?;
+
+        assert!(handled);
+
+        let app_event = tokio::time::timeout(Duration::from_secs(1), app_event_rx.recv())
+            .await
+            .expect("history lookup should emit an app event")
+            .expect("app event channel should stay open");
+
+        let AppEvent::ThreadHistoryEntryResponse {
+            thread_id: routed_thread_id,
+            event,
+        } = app_event
+        else {
+            panic!("expected thread-routed history response");
+        };
+        assert_eq!(routed_thread_id, thread_id);
+        assert_eq!(event.offset, 0);
+        assert_eq!(event.log_id, 1);
+        assert!(event.entry.is_none());
+
+        Ok(())
+    }
+
    #[tokio::test]
    async fn enqueue_thread_event_does_not_block_when_channel_full() -> Result<()> {
        let mut app = make_test_app().await;
--- a/codex-rs/tui_app_server/src/app_event.rs
+++ b/codex-rs/tui_app_server/src/app_event.rs
@@ -15,6 +15,7 @@ use codex_chatgpt::connectors::AppInfo;
 use codex_file_search::FileMatch;
 use codex_protocol::ThreadId;
 use codex_protocol::openai_models::ModelPreset;
+use codex_protocol::protocol::GetHistoryEntryResponseEvent;
 use codex_protocol::protocol::Op;
 use codex_protocol::protocol::RateLimitSnapshot;
 use codex_utils_approval_presets::ApprovalPreset;
@@ -81,6 +82,12 @@ pub(crate) enum AppEvent {
        op: Op,
    },

+    /// Deliver a synthetic history lookup response to a specific thread channel.
+    ThreadHistoryEntryResponse {
+        thread_id: ThreadId,
+        event: GetHistoryEntryResponseEvent,
+    },
+
    /// Start a new session.
    NewSession,

--- a/codex-rs/tui_app_server/src/app_server_session.rs
+++ b/codex-rs/tui_app_server/src/app_server_session.rs
@@ -54,6 +54,7 @@ use codex_app_server_protocol::TurnStartResponse;
 use codex_app_server_protocol::TurnSteerParams;
 use codex_app_server_protocol::TurnSteerResponse;
 use codex_core::config::Config;
+use codex_core::message_history;
 use codex_otel::TelemetryAuthMode;
 use codex_protocol::ThreadId;
 use codex_protocol::openai_models::ModelAvailabilityNux;
@@ -277,7 +278,7 @@ impl AppServerSession {
            })
            .await
            .wrap_err("thread/start failed during TUI bootstrap")?;
-        started_thread_from_start_response(response)
+        started_thread_from_start_response(response, config).await
    }

    pub(crate) async fn resume_thread(
@@ -291,14 +292,14 @@ impl AppServerSession {
            .request_typed(ClientRequest::ThreadResume {
                request_id,
                params: thread_resume_params_from_config(
-                    config,
+                    config.clone(),
                    thread_id,
                    self.thread_params_mode(),
                ),
            })
            .await
            .wrap_err("thread/resume failed during TUI bootstrap")?;
-        started_thread_from_resume_response(&response)
+        started_thread_from_resume_response(response, &config).await
    }

    pub(crate) async fn fork_thread(
@@ -312,14 +313,14 @@ impl AppServerSession {
            .request_typed(ClientRequest::ThreadFork {
                request_id,
                params: thread_fork_params_from_config(
-                    config,
+                    config.clone(),
                    thread_id,
                    self.thread_params_mode(),
                ),
            })
            .await
            .wrap_err("thread/fork failed during TUI bootstrap")?;
-        started_thread_from_fork_response(&response)
+        started_thread_from_fork_response(response, &config).await
    }

    fn thread_params_mode(&self) -> ThreadParamsMode {
@@ -843,10 +844,12 @@ fn thread_cwd_from_config(config: &Config, thread_params_mode: ThreadParamsMode)
    }
 }

-fn started_thread_from_start_response(
+async fn started_thread_from_start_response(
    response: ThreadStartResponse,
+    config: &Config,
 ) -> Result<AppServerStartedThread> {
-    let session = thread_session_state_from_thread_start_response(&response)
+    let session = thread_session_state_from_thread_start_response(&response, config)
+        .await
        .map_err(color_eyre::eyre::Report::msg)?;
    Ok(AppServerStartedThread {
        session,
@@ -854,30 +857,35 @@ fn started_thread_from_start_response(
    })
 }

-fn started_thread_from_resume_response(
-    response: &ThreadResumeResponse,
+async fn started_thread_from_resume_response(
+    response: ThreadResumeResponse,
+    config: &Config,
 ) -> Result<AppServerStartedThread> {
-    let session = thread_session_state_from_thread_resume_response(response)
+    let session = thread_session_state_from_thread_resume_response(&response, config)
+        .await
        .map_err(color_eyre::eyre::Report::msg)?;
    Ok(AppServerStartedThread {
        session,
-        turns: response.thread.turns.clone(),
+        turns: response.thread.turns,
    })
 }

-fn started_thread_from_fork_response(
-    response: &ThreadForkResponse,
+async fn started_thread_from_fork_response(
+    response: ThreadForkResponse,
+    config: &Config,
 ) -> Result<AppServerStartedThread> {
-    let session = thread_session_state_from_thread_fork_response(response)
+    let session = thread_session_state_from_thread_fork_response(&response, config)
+        .await
        .map_err(color_eyre::eyre::Report::msg)?;
    Ok(AppServerStartedThread {
        session,
-        turns: response.thread.turns.clone(),
+        turns: response.thread.turns,
    })
 }

-fn thread_session_state_from_thread_start_response(
+async fn thread_session_state_from_thread_start_response(
    response: &ThreadStartResponse,
+    config: &Config,
 ) -> Result<ThreadSessionState, String> {
    thread_session_state_from_thread_response(
        &response.thread.id,
@@ -891,11 +899,14 @@ fn thread_session_state_from_thread_start_response(
        response.sandbox.to_core(),
        response.cwd.clone(),
        response.reasoning_effort,
+        config,
    )
+    .await
 }

-fn thread_session_state_from_thread_resume_response(
+async fn thread_session_state_from_thread_resume_response(
    response: &ThreadResumeResponse,
+    config: &Config,
 ) -> Result<ThreadSessionState, String> {
    thread_session_state_from_thread_response(
        &response.thread.id,
@@ -909,11 +920,14 @@ fn thread_session_state_from_thread_resume_response(
        response.sandbox.to_core(),
        response.cwd.clone(),
        response.reasoning_effort,
+        config,
    )
+    .await
 }

-fn thread_session_state_from_thread_fork_response(
+async fn thread_session_state_from_thread_fork_response(
    response: &ThreadForkResponse,
+    config: &Config,
 ) -> Result<ThreadSessionState, String> {
    thread_session_state_from_thread_response(
        &response.thread.id,
@@ -927,7 +941,9 @@ fn thread_session_state_from_thread_fork_response(
        response.sandbox.to_core(),
        response.cwd.clone(),
        response.reasoning_effort,
+        config,
    )
+    .await
 }

 fn review_target_to_app_server(
@@ -953,7 +969,7 @@ fn review_target_to_app_server(
    clippy::too_many_arguments,
    reason = "session mapping keeps explicit fields"
 )]
-fn thread_session_state_from_thread_response(
+async fn thread_session_state_from_thread_response(
    thread_id: &str,
    thread_name: Option<String>,
    rollout_path: Option<PathBuf>,
@@ -965,9 +981,12 @@ fn thread_session_state_from_thread_response(
    sandbox_policy: SandboxPolicy,
    cwd: PathBuf,
    reasoning_effort: Option<codex_protocol::openai_models::ReasoningEffort>,
+    config: &Config,
 ) -> Result<ThreadSessionState, String> {
    let thread_id = ThreadId::from_string(thread_id)
        .map_err(|err| format!("thread id `{thread_id}` is invalid: {err}"))?;
+    let (history_log_id, history_entry_count) = message_history::history_metadata(config).await;
+    let history_entry_count = u64::try_from(history_entry_count).unwrap_or(u64::MAX);

    Ok(ThreadSessionState {
        thread_id,
@@ -981,8 +1000,8 @@ fn thread_session_state_from_thread_response(
        sandbox_policy,
        cwd,
        reasoning_effort,
-        history_log_id: 0,
-        history_entry_count: 0,
+        history_log_id,
+        history_entry_count,
        network_proxy: None,
        rollout_path,
    })
@@ -1084,8 +1103,10 @@ mod tests {
        assert_eq!(fork.model_provider, None);
    }

-    #[test]
-    fn resume_response_restores_turns_from_thread_items() {
+    #[tokio::test]
+    async fn resume_response_restores_turns_from_thread_items() {
+        let temp_dir = tempfile::tempdir().expect("tempdir");
+        let config = build_config(&temp_dir).await;
        let thread_id = ThreadId::new();
        let response = ThreadResumeResponse {
            thread: codex_app_server_protocol::Thread {
@@ -1135,9 +1156,44 @@ mod tests {
            reasoning_effort: None,
        };

-        let started =
-            started_thread_from_resume_response(&response).expect("resume response should map");
+        let started = started_thread_from_resume_response(response.clone(), &config)
+            .await
+            .expect("resume response should map");
        assert_eq!(started.turns.len(), 1);
        assert_eq!(started.turns[0], response.thread.turns[0]);
    }
+
+    #[tokio::test]
+    async fn session_configured_populates_history_metadata() {
+        let temp_dir = tempfile::tempdir().expect("tempdir");
+        let config = build_config(&temp_dir).await;
+        let thread_id = ThreadId::new();
+
+        message_history::append_entry("older", &thread_id, &config)
+            .await
+            .expect("history append should succeed");
+        message_history::append_entry("newer", &thread_id, &config)
+            .await
+            .expect("history append should succeed");
+
+        let session = thread_session_state_from_thread_response(
+            &thread_id.to_string(),
+            Some("restore".to_string()),
+            None,
+            "gpt-5.4".to_string(),
+            "openai".to_string(),
+            None,
+            AskForApproval::Never,
+            codex_protocol::config_types::ApprovalsReviewer::User,
+            SandboxPolicy::new_read_only_policy(),
+            PathBuf::from("/tmp/project"),
+            None,
+            &config,
+        )
+        .await
+        .expect("session should map");
+
+        assert_ne!(session.history_log_id, 0);
+        assert_eq!(session.history_entry_count, 2);
+    }
 }
--- a/codex-rs/tui_app_server/src/bottom_pane/chat_composer.rs
+++ b/codex-rs/tui_app_server/src/bottom_pane/chat_composer.rs
@@ -740,7 +740,6 @@ impl ChatComposer {
    /// composer rehydrates the entry immediately. This path intentionally routes through
    /// [`Self::apply_history_entry`] so cursor placement remains aligned with keyboard history
    /// recall semantics.
-    #[cfg(test)]
    pub(crate) fn on_history_entry_response(
        &mut self,
        log_id: u64,
--- a/codex-rs/tui_app_server/src/bottom_pane/chat_composer_history.rs
+++ b/codex-rs/tui_app_server/src/bottom_pane/chat_composer_history.rs
@@ -4,10 +4,9 @@ use std::path::PathBuf;
 use crate::app_event::AppEvent;
 use crate::app_event_sender::AppEventSender;
 use crate::bottom_pane::MentionBinding;
-use crate::history_cell;
 use crate::mention_codec::decode_history_mentions;
+use codex_protocol::protocol::Op;
 use codex_protocol::user_input::TextElement;
-use tracing::warn;

 /// A composer history entry that can rehydrate draft state.
 #[derive(Debug, Clone, PartialEq)]
@@ -237,7 +236,6 @@ impl ChatComposerHistory {
    }

    /// Integrate a GetHistoryEntryResponse event.
-    #[cfg(test)]
    pub fn on_entry_response(
        &mut self,
        log_id: u64,
@@ -280,16 +278,10 @@ impl ChatComposerHistory {
            self.last_history_text = Some(entry.text.clone());
            return Some(entry);
        } else if let Some(log_id) = self.history_log_id {
-            warn!(
+            app_event_tx.send(AppEvent::CodexOp(Op::GetHistoryEntryRequest {
+                offset: global_idx,
                log_id,
-                offset = global_idx,
-                "composer history fetch is unavailable in app-server TUI"
-            );
-            app_event_tx.send(AppEvent::InsertHistoryCell(Box::new(
-                history_cell::new_error_event(
-                    "Composer history fetch: Not available in app-server TUI yet.".to_string(),
-                ),
-            )));
+            }));
        }
        None
    }
@@ -344,17 +336,18 @@ mod tests {
        assert!(history.should_handle_navigation("", 0));
        assert!(history.navigate_up(&tx).is_none()); // don't replace the text yet

-        // Verify that the app-server TUI emits an explicit user-facing stub error instead.
+        // Verify that a history lookup request was sent.
        let event = rx.try_recv().expect("expected AppEvent to be sent");
-        let AppEvent::InsertHistoryCell(cell) = event else {
+        let AppEvent::CodexOp(op) = event else {
            panic!("unexpected event variant");
        };
-        let rendered = cell
-            .display_lines(80)
-            .into_iter()
-            .map(|line| line.to_string())
-            .collect::<String>();
-        assert!(rendered.contains("Composer history fetch: Not available in app-server TUI yet."));
+        assert_eq!(
+            Op::GetHistoryEntryRequest {
+                log_id: 1,
+                offset: 2,
+            },
+            op
+        );

        // Inject the async response.
        assert_eq!(
@@ -365,17 +358,18 @@ mod tests {
        // Next Up should move to offset 1.
        assert!(history.navigate_up(&tx).is_none()); // don't replace the text yet

-        // Verify second stub error for offset 1.
+        // Verify second lookup request for offset 1.
        let event2 = rx.try_recv().expect("expected second event");
-        let AppEvent::InsertHistoryCell(cell) = event2 else {
+        let AppEvent::CodexOp(op) = event2 else {
            panic!("unexpected event variant");
        };
-        let rendered = cell
-            .display_lines(80)
-            .into_iter()
-            .map(|line| line.to_string())
-            .collect::<String>();
-        assert!(rendered.contains("Composer history fetch: Not available in app-server TUI yet."));
+        assert_eq!(
+            Op::GetHistoryEntryRequest {
+                log_id: 1,
+                offset: 1,
+            },
+            op
+        );

        assert_eq!(
            Some(HistoryEntry::new("older".to_string())),
--- a/codex-rs/tui_app_server/src/bottom_pane/mod.rs
+++ b/codex-rs/tui_app_server/src/bottom_pane/mod.rs
@@ -1073,7 +1073,6 @@ impl BottomPane {
            || self.composer.is_in_paste_burst()
    }

-    #[cfg(test)]
    pub(crate) fn on_history_entry_response(
        &mut self,
        log_id: u64,
--- a/codex-rs/tui_app_server/src/chatwidget.rs
+++ b/codex-rs/tui_app_server/src/chatwidget.rs
@@ -46,6 +46,8 @@ use crate::audio_device::list_realtime_audio_device_names;
 use crate::bottom_pane::StatusLineItem;
 use crate::bottom_pane::StatusLinePreviewData;
 use crate::bottom_pane::StatusLineSetupView;
+use crate::mention_codec::LinkedMention;
+use crate::mention_codec::encode_history_mentions;
 use crate::model_catalog::ModelCatalog;
 use crate::multi_agents;
 use crate::status::RateLimitWindowDisplay;
@@ -3474,8 +3476,7 @@ impl ChatWidget {
        }
    }

-    #[cfg(test)]
-    fn on_get_history_entry_response(
+    pub(crate) fn handle_history_entry_response(
        &mut self,
        event: codex_protocol::protocol::GetHistoryEntryResponseEvent,
    ) {
@@ -5316,9 +5317,19 @@ impl ChatWidget {
            return;
        }

-        // Persist the text to cross-session message history.
+        // Persist the text to cross-session message history. Mentions are
+        // encoded into placeholder syntax so recall can reconstruct the
+        // mention bindings in a future session.
        if !text.is_empty() {
-            warn!("skipping composer history persistence in app-server TUI");
+            let encoded_mentions = mention_bindings
+                .iter()
+                .map(|binding| LinkedMention {
+                    mention: binding.mention.clone(),
+                    path: binding.path.clone(),
+                })
+                .collect::<Vec<_>>();
+            let history_text = encode_history_mentions(&text, &encoded_mentions);
+            self.submit_op(Op::AddToHistory { text: history_text });
        }

        if let Some(pending_steer) = pending_steer {
@@ -6440,7 +6451,7 @@ impl ChatWidget {
            EventMsg::McpToolCallEnd(ev) => self.on_mcp_tool_call_end(ev),
            EventMsg::WebSearchBegin(ev) => self.on_web_search_begin(ev),
            EventMsg::WebSearchEnd(ev) => self.on_web_search_end(ev),
-            EventMsg::GetHistoryEntryResponse(ev) => self.on_get_history_entry_response(ev),
+            EventMsg::GetHistoryEntryResponse(ev) => self.handle_history_entry_response(ev),
            EventMsg::McpListToolsResponse(ev) => self.on_list_mcp_tools(ev),
            EventMsg::ListCustomPromptsResponse(_) => {
                tracing::warn!(
--- a/sdk/python/README.md
+++ b/sdk/python/README.md
@@ -19,13 +19,13 @@ installs the pinned runtime package automatically.
 ## Quickstart

 ```python
-from codex_app_server import Codex, TextInput
+from codex_app_server import Codex

 with Codex() as codex:
    thread = codex.thread_start(model="gpt-5")
-    completed_turn = thread.turn(TextInput("Say hello in one sentence.")).run()
-    print(completed_turn.status)
-    print(completed_turn.id)
+    result = thread.run("Say hello in one sentence.")
+    print(result.final_response)
+    print(len(result.items))
 ```

 ## Docs map
@@ -95,4 +95,6 @@ This supports the CI release flow:

 - `Codex()` is eager and performs startup + `initialize` in the constructor.
 - Use context managers (`with Codex() as codex:`) to ensure shutdown.
+- Prefer `thread.run("...")` for the common case. Use `thread.turn(...)` when
+  you need streaming, steering, or interrupt control.
 - For transient overload, use `codex_app_server.retry.retry_on_overload`.
--- a/sdk/python/docs/api-reference.md
+++ b/sdk/python/docs/api-reference.md
@@ -2,7 +2,7 @@

 Public surface of `codex_app_server` for app-server v2.

-This SDK surface is experimental. The current implementation intentionally allows only one active `TurnHandle.stream()` or `TurnHandle.run()` consumer per client instance at a time.
+This SDK surface is experimental. The current implementation intentionally allows only one active turn consumer (`Thread.run()`, `TurnHandle.stream()`, or `TurnHandle.run()`) per client instance at a time.

 ## Package Entry

@@ -10,6 +10,7 @@ This SDK surface is experimental. The current implementation intentionally allow
 from codex_app_server import (
    Codex,
    AsyncCodex,
+    RunResult,
    Thread,
    AsyncThread,
    TurnHandle,
@@ -24,7 +25,7 @@ from codex_app_server import (
    MentionInput,
    TurnStatus,
 )
-from codex_app_server.generated.v2_all import ThreadItem
+from codex_app_server.generated.v2_all import ThreadItem, ThreadTokenUsage
 ```

 - Version: `codex_app_server.__version__`
@@ -97,6 +98,7 @@ async with AsyncCodex() as codex:

 ### Thread

+- `run(input: str | Input, *, approval_policy=None, approvals_reviewer=None, cwd=None, effort=None, model=None, output_schema=None, personality=None, sandbox_policy=None, service_tier=None, summary=None) -> RunResult`
 - `turn(input: Input, *, approval_policy=None, cwd=None, effort=None, model=None, output_schema=None, personality=None, sandbox_policy=None, summary=None) -> TurnHandle`
 - `read(*, include_turns: bool = False) -> ThreadReadResponse`
 - `set_name(name: str) -> ThreadSetNameResponse`
@@ -104,11 +106,23 @@ async with AsyncCodex() as codex:

 ### AsyncThread

+- `run(input: str | Input, *, approval_policy=None, approvals_reviewer=None, cwd=None, effort=None, model=None, output_schema=None, personality=None, sandbox_policy=None, service_tier=None, summary=None) -> Awaitable[RunResult]`
 - `turn(input: Input, *, approval_policy=None, cwd=None, effort=None, model=None, output_schema=None, personality=None, sandbox_policy=None, summary=None) -> Awaitable[AsyncTurnHandle]`
 - `read(*, include_turns: bool = False) -> Awaitable[ThreadReadResponse]`
 - `set_name(name: str) -> Awaitable[ThreadSetNameResponse]`
 - `compact() -> Awaitable[ThreadCompactStartResponse]`

+`run(...)` is the common-case convenience path. It accepts plain strings, starts
+the turn, consumes notifications until completion, and returns a small result
+object with:
+
+- `final_response: str`
+- `items: list[ThreadItem]`
+- `usage: ThreadTokenUsage | None`
+
+Use `turn(...)` when you need low-level turn control (`stream()`, `steer()`,
+`interrupt()`) or the canonical generated `Turn` from `TurnHandle.run()`.
+
 ## TurnHandle / AsyncTurnHandle

 ### TurnHandle
@@ -181,10 +195,10 @@ from codex_app_server import (
 ## Example

 ```python
-from codex_app_server import Codex, TextInput
+from codex_app_server import Codex

 with Codex() as codex:
    thread = codex.thread_start(model="gpt-5.4", config={"model_reasoning_effort": "high"})
-    completed_turn = thread.turn(TextInput("Say hello in one sentence.")).run()
-    print(completed_turn.id, completed_turn.status)
+    result = thread.run("Say hello in one sentence.")
+    print(result.final_response)
 ```
--- a/sdk/python/docs/getting-started.md
+++ b/sdk/python/docs/getting-started.md
@@ -22,41 +22,41 @@ Requirements:
 ## 2) Run your first turn (sync)

 ```python
-from codex_app_server import Codex, TextInput
+from codex_app_server import Codex

 with Codex() as codex:
    server = codex.metadata.serverInfo
    print("Server:", None if server is None else server.name, None if server is None else server.version)

    thread = codex.thread_start(model="gpt-5.4", config={"model_reasoning_effort": "high"})
-    completed_turn = thread.turn(TextInput("Say hello in one sentence.")).run()
+    result = thread.run("Say hello in one sentence.")

    print("Thread:", thread.id)
-    print("Turn:", completed_turn.id)
-    print("Status:", completed_turn.status)
-    print("Items:", len(completed_turn.items or []))
+    print("Text:", result.final_response)
+    print("Items:", len(result.items))
 ```

 What happened:

 - `Codex()` started and initialized `codex app-server`.
 - `thread_start(...)` created a thread.
- `turn(...).run()` consumed events until `turn/completed` and returned the canonical generated app-server `Turn` model.
- one client can have only one active `TurnHandle.stream()` / `TurnHandle.run()` consumer at a time in the current experimental build
+- `thread.run("...")` started a turn, consumed events until completion, and returned the final assistant response plus collected items and usage.
+- use `thread.turn(...)` when you need a `TurnHandle` for streaming, steering, interrupting, or turn IDs/status
+- one client can have only one active turn consumer (`thread.run(...)`, `TurnHandle.stream()`, or `TurnHandle.run()`) at a time in the current experimental build

 ## 3) Continue the same thread (multi-turn)

 ```python
-from codex_app_server import Codex, TextInput
+from codex_app_server import Codex

 with Codex() as codex:
    thread = codex.thread_start(model="gpt-5.4", config={"model_reasoning_effort": "high"})

-    first = thread.turn(TextInput("Summarize Rust ownership in 2 bullets.")).run()
-    second = thread.turn(TextInput("Now explain it to a Python developer.")).run()
+    first = thread.run("Summarize Rust ownership in 2 bullets.")
+    second = thread.run("Now explain it to a Python developer.")

-    print("first:", first.id, first.status)
-    print("second:", second.id, second.status)
+    print("first:", first.final_response)
+    print("second:", second.final_response)
 ```

 ## 4) Async parity
@@ -66,15 +66,14 @@ initializes lazily, and context entry makes startup/shutdown explicit.

 ```python
 import asyncio
-from codex_app_server import AsyncCodex, TextInput
+from codex_app_server import AsyncCodex


 async def main() -> None:
    async with AsyncCodex() as codex:
        thread = await codex.thread_start(model="gpt-5.4", config={"model_reasoning_effort": "high"})
-        turn = await thread.turn(TextInput("Continue where we left off."))
-        completed_turn = await turn.run()
-        print(completed_turn.id, completed_turn.status)
+        result = await thread.run("Continue where we left off.")
+        print(result.final_response)


 asyncio.run(main())
@@ -83,14 +82,14 @@ asyncio.run(main())
 ## 5) Resume an existing thread

 ```python
-from codex_app_server import Codex, TextInput
+from codex_app_server import Codex

 THREAD_ID = "thr_123"  # replace with a real id

 with Codex() as codex:
    thread = codex.thread_resume(THREAD_ID)
-    completed_turn = thread.turn(TextInput("Continue where we left off.")).run()
-    print(completed_turn.id, completed_turn.status)
+    result = thread.run("Continue where we left off.")
+    print(result.final_response)
 ```

 ## 6) Generated models
--- a/sdk/python/examples/01_quickstart_constructor/async.py
+++ b/sdk/python/examples/01_quickstart_constructor/async.py
@@ -6,9 +6,7 @@ if str(_EXAMPLES_ROOT) not in sys.path:
    sys.path.insert(0, str(_EXAMPLES_ROOT))

 from _bootstrap import (
-    assistant_text_from_turn,
    ensure_local_sdk_src,
-    find_turn_by_id,
    runtime_config,
    server_label,
 )
@@ -17,7 +15,7 @@ ensure_local_sdk_src()

 import asyncio

-from codex_app_server import AsyncCodex, TextInput
+from codex_app_server import AsyncCodex


 async def main() -> None:
@@ -25,13 +23,9 @@ async def main() -> None:
        print("Server:", server_label(codex.metadata))

        thread = await codex.thread_start(model="gpt-5.4", config={"model_reasoning_effort": "high"})
-        turn = await thread.turn(TextInput("Say hello in one sentence."))
-        result = await turn.run()
-        persisted = await thread.read(include_turns=True)
-        persisted_turn = find_turn_by_id(persisted.thread.turns, result.id)
-
-        print("Status:", result.status)
-        print("Text:", assistant_text_from_turn(persisted_turn))
+        result = await thread.run("Say hello in one sentence.")
+        print("Items:", len(result.items))
+        print("Text:", result.final_response)


 if __name__ == "__main__":
--- a/sdk/python/examples/01_quickstart_constructor/sync.py
+++ b/sdk/python/examples/01_quickstart_constructor/sync.py
@@ -6,23 +6,19 @@ if str(_EXAMPLES_ROOT) not in sys.path:
    sys.path.insert(0, str(_EXAMPLES_ROOT))

 from _bootstrap import (
-    assistant_text_from_turn,
    ensure_local_sdk_src,
-    find_turn_by_id,
    runtime_config,
    server_label,
 )

 ensure_local_sdk_src()

-from codex_app_server import Codex, TextInput
+from codex_app_server import Codex

 with Codex(config=runtime_config()) as codex:
    print("Server:", server_label(codex.metadata))

    thread = codex.thread_start(model="gpt-5.4", config={"model_reasoning_effort": "high"})
-    result = thread.turn(TextInput("Say hello in one sentence.")).run()
-    persisted = thread.read(include_turns=True)
-    persisted_turn = find_turn_by_id(persisted.thread.turns, result.id)
-    print("Status:", result.status)
-    print("Text:", assistant_text_from_turn(persisted_turn))
+    result = thread.run("Say hello in one sentence.")
+    print("Items:", len(result.items))
+    print("Text:", result.final_response)
--- a/sdk/python/src/codex_app_server/init.py
+++ b/sdk/python/src/codex_app_server/init.py
@@ -47,6 +47,7 @@ from .api import (
    InputItem,
    LocalImageInput,
    MentionInput,
+    RunResult,
    SkillInput,
    TextInput,
    Thread,
@@ -68,6 +69,7 @@ __all__ = [
    "TurnHandle",
    "AsyncTurnHandle",
    "InitializeResponse",
+    "RunResult",
    "Input",
    "InputItem",
    "TextInput",
--- a/sdk/python/src/codex_app_server/api.py
+++ b/sdk/python/src/codex_app_server/api.py
@@ -7,7 +7,9 @@ from typing import AsyncIterator, Iterator
 from .async_client import AsyncAppServerClient
 from .client import AppServerClient, AppServerConfig
 from .generated.v2_all import (
+    ApprovalsReviewer,
    AskForApproval,
+    ItemCompletedNotification,
    ModelListResponse,
    Personality,
    ReasoningEffort,
@@ -27,10 +29,13 @@ from .generated.v2_all import (
    ThreadSortKey,
    ThreadSourceKind,
    ThreadStartParams,
+    ThreadTokenUsage,
+    ThreadTokenUsageUpdatedNotification,
    Turn as AppServerTurn,
    TurnCompletedNotification,
    TurnInterruptResponse,
    TurnStartParams,
+    TurnStatus,
    TurnSteerResponse,
 )
 from .models import InitializeResponse, JsonObject, Notification, ServerInfo
@@ -65,6 +70,14 @@ class MentionInput:

 InputItem = TextInput | ImageInput | LocalImageInput | SkillInput | MentionInput
 Input = list[InputItem] | InputItem
+RunInput = Input | str
+
+
+@dataclass(slots=True)
+class RunResult:
+    final_response: str
+    items: list[ThreadItem]
+    usage: ThreadTokenUsage | None


 def _to_wire_item(item: InputItem) -> JsonObject:
@@ -87,6 +100,108 @@ def _to_wire_input(input: Input) -> list[JsonObject]:
    return [_to_wire_item(input)]


+def _normalize_run_input(input: RunInput) -> Input:
+    if isinstance(input, str):
+        return TextInput(input)
+    return input
+
+
+def _assistant_text_from_item(item: ThreadItem) -> str | None:
+    raw_item = item.model_dump(mode="json") if hasattr(item, "model_dump") else item
+    if not isinstance(raw_item, dict):
+        return None
+
+    item_type = raw_item.get("type")
+    if item_type == "agentMessage":
+        text = raw_item.get("text")
+        return text if isinstance(text, str) and text else None
+
+    if item_type != "message" or raw_item.get("role") != "assistant":
+        return None
+
+    chunks: list[str] = []
+    for content in raw_item.get("content") or []:
+        if not isinstance(content, dict) or content.get("type") != "output_text":
+            continue
+        text = content.get("text")
+        if isinstance(text, str) and text:
+            chunks.append(text)
+    return "".join(chunks) or None
+
+
+def _final_assistant_response_from_items(items: list[ThreadItem]) -> str:
+    final_response = ""
+    for item in items:
+        item_text = _assistant_text_from_item(item)
+        if item_text is not None:
+            final_response = item_text
+    return final_response
+
+
+def _raise_for_failed_turn(turn: AppServerTurn) -> None:
+    if turn.status != TurnStatus.failed:
+        return
+    if turn.error is not None and turn.error.message:
+        raise RuntimeError(turn.error.message)
+    raise RuntimeError(f"turn failed with status {turn.status.value}")
+
+
+def _collect_run_result(stream: Iterator[Notification], *, turn_id: str) -> RunResult:
+    completed: TurnCompletedNotification | None = None
+    items: list[ThreadItem] = []
+    usage: ThreadTokenUsage | None = None
+
+    for event in stream:
+        payload = event.payload
+        if isinstance(payload, ItemCompletedNotification) and payload.turn_id == turn_id:
+            items.append(payload.item)
+            continue
+        if isinstance(payload, ThreadTokenUsageUpdatedNotification) and payload.turn_id == turn_id:
+            usage = payload.token_usage
+            continue
+        if isinstance(payload, TurnCompletedNotification) and payload.turn.id == turn_id:
+            completed = payload
+
+    if completed is None:
+        raise RuntimeError("turn completed event not received")
+
+    _raise_for_failed_turn(completed.turn)
+    return RunResult(
+        final_response=_final_assistant_response_from_items(items),
+        items=items,
+        usage=usage,
+    )
+
+
+async def _collect_async_run_result(
+    stream: AsyncIterator[Notification], *, turn_id: str
+) -> RunResult:
+    completed: TurnCompletedNotification | None = None
+    items: list[ThreadItem] = []
+    usage: ThreadTokenUsage | None = None
+
+    async for event in stream:
+        payload = event.payload
+        if isinstance(payload, ItemCompletedNotification) and payload.turn_id == turn_id:
+            items.append(payload.item)
+            continue
+        if isinstance(payload, ThreadTokenUsageUpdatedNotification) and payload.turn_id == turn_id:
+            usage = payload.token_usage
+            continue
+        if isinstance(payload, TurnCompletedNotification) and payload.turn.id == turn_id:
+            completed = payload
+
+    if completed is None:
+        raise RuntimeError("turn completed event not received")
+
+    _raise_for_failed_turn(completed.turn)
+    return RunResult(
+        final_response=_final_assistant_response_from_items(items),
+        items=items,
+        usage=usage,
+    )
+
+
 def _split_user_agent(user_agent: str) -> tuple[str | None, str | None]:
    raw = user_agent.strip()
    if not raw:
@@ -503,6 +618,40 @@ class Thread:
    _client: AppServerClient
    id: str

+    def run(
+        self,
+        input: RunInput,
+        *,
+        approval_policy: AskForApproval | None = None,
+        approvals_reviewer: ApprovalsReviewer | None = None,
+        cwd: str | None = None,
+        effort: ReasoningEffort | None = None,
+        model: str | None = None,
+        output_schema: JsonObject | None = None,
+        personality: Personality | None = None,
+        sandbox_policy: SandboxPolicy | None = None,
+        service_tier: ServiceTier | None = None,
+        summary: ReasoningSummary | None = None,
+    ) -> RunResult:
+        turn = self.turn(
+            _normalize_run_input(input),
+            approval_policy=approval_policy,
+            approvals_reviewer=approvals_reviewer,
+            cwd=cwd,
+            effort=effort,
+            model=model,
+            output_schema=output_schema,
+            personality=personality,
+            sandbox_policy=sandbox_policy,
+            service_tier=service_tier,
+            summary=summary,
+        )
+        stream = turn.stream()
+        try:
+            return _collect_run_result(stream, turn_id=turn.id)
+        finally:
+            stream.close()
+
    # BEGIN GENERATED: Thread.flat_methods
    def turn(
        self,
@@ -553,6 +702,40 @@ class AsyncThread:
    _codex: AsyncCodex
    id: str

+    async def run(
+        self,
+        input: RunInput,
+        *,
+        approval_policy: AskForApproval | None = None,
+        approvals_reviewer: ApprovalsReviewer | None = None,
+        cwd: str | None = None,
+        effort: ReasoningEffort | None = None,
+        model: str | None = None,
+        output_schema: JsonObject | None = None,
+        personality: Personality | None = None,
+        sandbox_policy: SandboxPolicy | None = None,
+        service_tier: ServiceTier | None = None,
+        summary: ReasoningSummary | None = None,
+    ) -> RunResult:
+        turn = await self.turn(
+            _normalize_run_input(input),
+            approval_policy=approval_policy,
+            approvals_reviewer=approvals_reviewer,
+            cwd=cwd,
+            effort=effort,
+            model=model,
+            output_schema=output_schema,
+            personality=personality,
+            sandbox_policy=sandbox_policy,
+            service_tier=service_tier,
+            summary=summary,
+        )
+        stream = turn.stream()
+        try:
+            return await _collect_async_run_result(stream, turn_id=turn.id)
+        finally:
+            await stream.aclose()
+
    # BEGIN GENERATED: AsyncThread.flat_methods
    async def turn(
        self,
--- a/sdk/python/tests/test_public_api_runtime_behavior.py
+++ b/sdk/python/tests/test_public_api_runtime_behavior.py
@@ -3,6 +3,7 @@ from __future__ import annotations
 import asyncio
 from collections import deque
 from pathlib import Path
+from types import SimpleNamespace

 import pytest

@@ -10,14 +11,19 @@ import codex_app_server.api as public_api_module
 from codex_app_server.client import AppServerClient
 from codex_app_server.generated.v2_all import (
    AgentMessageDeltaNotification,
+    ItemCompletedNotification,
+    ThreadTokenUsageUpdatedNotification,
    TurnCompletedNotification,
    TurnStatus,
 )
 from codex_app_server.models import InitializeResponse, Notification
 from codex_app_server.api import (
    AsyncCodex,
+    AsyncThread,
    AsyncTurnHandle,
    Codex,
+    RunResult,
+    Thread,
    TurnHandle,
 )

@@ -48,16 +54,74 @@ def _completed_notification(
    thread_id: str = "thread-1",
    turn_id: str = "turn-1",
    status: str = "completed",
+    error_message: str | None = None,
 ) -> Notification:
+    turn: dict[str, object] = {
+        "id": turn_id,
+        "items": [],
+        "status": status,
+    }
+    if error_message is not None:
+        turn["error"] = {"message": error_message}
    return Notification(
        method="turn/completed",
        payload=TurnCompletedNotification.model_validate(
            {
                "threadId": thread_id,
-                "turn": {
-                    "id": turn_id,
-                    "items": [],
-                    "status": status,
+                "turn": turn,
+            }
+        ),
+    )
+
+
+def _item_completed_notification(
+    *,
+    thread_id: str = "thread-1",
+    turn_id: str = "turn-1",
+    text: str = "final text",
+) -> Notification:
+    return Notification(
+        method="item/completed",
+        payload=ItemCompletedNotification.model_validate(
+            {
+                "item": {
+                    "id": "item-1",
+                    "text": text,
+                    "type": "agentMessage",
+                },
+                "threadId": thread_id,
+                "turnId": turn_id,
+            }
+        ),
+    )
+
+
+def _token_usage_notification(
+    *,
+    thread_id: str = "thread-1",
+    turn_id: str = "turn-1",
+) -> Notification:
+    return Notification(
+        method="thread/tokenUsage/updated",
+        payload=ThreadTokenUsageUpdatedNotification.model_validate(
+            {
+                "threadId": thread_id,
+                "turnId": turn_id,
+                "tokenUsage": {
+                    "last": {
+                        "cachedInputTokens": 1,
+                        "inputTokens": 2,
+                        "outputTokens": 3,
+                        "reasoningOutputTokens": 4,
+                        "totalTokens": 9,
+                    },
+                    "total": {
+                        "cachedInputTokens": 5,
+                        "inputTokens": 6,
+                        "outputTokens": 7,
+                        "reasoningOutputTokens": 8,
+                        "totalTokens": 26,
+                    },
                },
            }
        ),
@@ -225,6 +289,162 @@ def test_turn_run_returns_completed_turn_payload() -> None:
    assert result.items == []


+def test_thread_run_accepts_string_input_and_returns_run_result() -> None:
+    client = AppServerClient()
+    item_notification = _item_completed_notification(text="Hello.")
+    usage_notification = _token_usage_notification()
+    notifications: deque[Notification] = deque(
+        [
+            item_notification,
+            usage_notification,
+            _completed_notification(),
+        ]
+    )
+    client.next_notification = notifications.popleft  # type: ignore[method-assign]
+    seen: dict[str, object] = {}
+
+    def fake_turn_start(thread_id: str, wire_input: object, *, params=None):  # noqa: ANN001,ANN202
+        seen["thread_id"] = thread_id
+        seen["wire_input"] = wire_input
+        seen["params"] = params
+        return SimpleNamespace(turn=SimpleNamespace(id="turn-1"))
+
+    client.turn_start = fake_turn_start  # type: ignore[method-assign]
+
+    result = Thread(client, "thread-1").run("hello")
+
+    assert seen["thread_id"] == "thread-1"
+    assert seen["wire_input"] == [{"type": "text", "text": "hello"}]
+    assert result == RunResult(
+        final_response="Hello.",
+        items=[item_notification.payload.item],
+        usage=usage_notification.payload.token_usage,
+    )
+
+
+def test_thread_run_uses_last_completed_assistant_message_as_final_response() -> None:
+    client = AppServerClient()
+    first_item_notification = _item_completed_notification(text="First message")
+    second_item_notification = _item_completed_notification(text="Second message")
+    notifications: deque[Notification] = deque(
+        [
+            first_item_notification,
+            second_item_notification,
+            _completed_notification(),
+        ]
+    )
+    client.next_notification = notifications.popleft  # type: ignore[method-assign]
+    client.turn_start = lambda thread_id, wire_input, *, params=None: SimpleNamespace(  # noqa: ARG005,E731
+        turn=SimpleNamespace(id="turn-1")
+    )
+
+    result = Thread(client, "thread-1").run("hello")
+
+    assert result.final_response == "Second message"
+    assert result.items == [
+        first_item_notification.payload.item,
+        second_item_notification.payload.item,
+    ]
+
+
+def test_thread_run_raises_on_failed_turn() -> None:
+    client = AppServerClient()
+    notifications: deque[Notification] = deque(
+        [
+            _completed_notification(status="failed", error_message="boom"),
+        ]
+    )
+    client.next_notification = notifications.popleft  # type: ignore[method-assign]
+    client.turn_start = lambda thread_id, wire_input, *, params=None: SimpleNamespace(  # noqa: ARG005,E731
+        turn=SimpleNamespace(id="turn-1")
+    )
+
+    with pytest.raises(RuntimeError, match="boom"):
+        Thread(client, "thread-1").run("hello")
+
+
+def test_async_thread_run_accepts_string_input_and_returns_run_result() -> None:
+    async def scenario() -> None:
+        codex = AsyncCodex()
+
+        async def fake_ensure_initialized() -> None:
+            return None
+
+        item_notification = _item_completed_notification(text="Hello async.")
+        usage_notification = _token_usage_notification()
+        notifications: deque[Notification] = deque(
+            [
+                item_notification,
+                usage_notification,
+                _completed_notification(),
+            ]
+        )
+        seen: dict[str, object] = {}
+
+        async def fake_turn_start(thread_id: str, wire_input: object, *, params=None):  # noqa: ANN001,ANN202
+            seen["thread_id"] = thread_id
+            seen["wire_input"] = wire_input
+            seen["params"] = params
+            return SimpleNamespace(turn=SimpleNamespace(id="turn-1"))
+
+        async def fake_next_notification() -> Notification:
+            return notifications.popleft()
+
+        codex._ensure_initialized = fake_ensure_initialized  # type: ignore[method-assign]
+        codex._client.turn_start = fake_turn_start  # type: ignore[method-assign]
+        codex._client.next_notification = fake_next_notification  # type: ignore[method-assign]
+
+        result = await AsyncThread(codex, "thread-1").run("hello")
+
+        assert seen["thread_id"] == "thread-1"
+        assert seen["wire_input"] == [{"type": "text", "text": "hello"}]
+        assert result == RunResult(
+            final_response="Hello async.",
+            items=[item_notification.payload.item],
+            usage=usage_notification.payload.token_usage,
+        )
+
+    asyncio.run(scenario())
+
+
+def test_async_thread_run_uses_last_completed_assistant_message_as_final_response() -> None:
+    async def scenario() -> None:
+        codex = AsyncCodex()
+
+        async def fake_ensure_initialized() -> None:
+            return None
+
+        first_item_notification = _item_completed_notification(text="First async message")
+        second_item_notification = _item_completed_notification(text="Second async message")
+        notifications: deque[Notification] = deque(
+            [
+                first_item_notification,
+                second_item_notification,
+                _completed_notification(),
+            ]
+        )
+
+        async def fake_turn_start(thread_id: str, wire_input: object, *, params=None):  # noqa: ANN001,ANN202,ARG001
+            return SimpleNamespace(turn=SimpleNamespace(id="turn-1"))
+
+        async def fake_next_notification() -> Notification:
+            return notifications.popleft()
+
+        codex._ensure_initialized = fake_ensure_initialized  # type: ignore[method-assign]
+        codex._client.turn_start = fake_turn_start  # type: ignore[method-assign]
+        codex._client.next_notification = fake_next_notification  # type: ignore[method-assign]
+
+        result = await AsyncThread(codex, "thread-1").run("hello")
+
+        assert result.final_response == "Second async message"
+        assert result.items == [
+            first_item_notification.payload.item,
+            second_item_notification.payload.item,
+        ]
+
+    asyncio.run(scenario())
+
+
 def test_retry_examples_compare_status_with_enum() -> None:
    for path in (
        ROOT / "examples" / "10_error_handling_and_retry" / "sync.py",
--- a/sdk/python/tests/test_public_api_signatures.py
+++ b/sdk/python/tests/test_public_api_signatures.py
@@ -4,7 +4,7 @@ import importlib.resources as resources
 import inspect
 from typing import Any

-from codex_app_server import AppServerConfig
+from codex_app_server import AppServerConfig, RunResult
 from codex_app_server.models import InitializeResponse
 from codex_app_server.api import AsyncCodex, AsyncThread, Codex, Thread

@@ -31,6 +31,10 @@ def test_root_exports_app_server_config() -> None:
    assert AppServerConfig.__name__ == "AppServerConfig"


+def test_root_exports_run_result() -> None:
+    assert RunResult.__name__ == "RunResult"
+
+
 def test_package_includes_py_typed_marker() -> None:
    marker = resources.files("codex_app_server").joinpath("py.typed")
    assert marker.is_file()
@@ -101,6 +105,18 @@ def test_generated_public_signatures_are_snake_case_and_typed() -> None:
            "service_tier",
            "summary",
        ],
+        Thread.run: [
+            "approval_policy",
+            "approvals_reviewer",
+            "cwd",
+            "effort",
+            "model",
+            "output_schema",
+            "personality",
+            "sandbox_policy",
+            "service_tier",
+            "summary",
+        ],
        AsyncCodex.thread_start: [
            "approval_policy",
            "approvals_reviewer",
@@ -164,6 +180,18 @@ def test_generated_public_signatures_are_snake_case_and_typed() -> None:
            "service_tier",
            "summary",
        ],
+        AsyncThread.run: [
+            "approval_policy",
+            "approvals_reviewer",
+            "cwd",
+            "effort",
+            "model",
+            "output_schema",
+            "personality",
+            "sandbox_policy",
+            "service_tier",
+            "summary",
+        ],
    }

    for fn, expected_kwargs in expected.items():
--- a/sdk/python/tests/test_real_app_server_integration.py
+++ b/sdk/python/tests/test_real_app_server_integration.py
@@ -265,6 +265,36 @@ def test_real_thread_and_turn_start_smoke(runtime_env: PreparedRuntimeEnv) -> No
    assert isinstance(data["persisted_items_count"], int)


+def test_real_thread_run_convenience_smoke(runtime_env: PreparedRuntimeEnv) -> None:
+    data = _run_json_python(
+        runtime_env,
+        textwrap.dedent(
+            """
+            import json
+            from codex_app_server import Codex
+
+            with Codex() as codex:
+                thread = codex.thread_start(
+                    model="gpt-5.4",
+                    config={"model_reasoning_effort": "high"},
+                )
+                result = thread.run("say ok")
+                print(json.dumps({
+                    "thread_id": thread.id,
+                    "final_response": result.final_response,
+                    "items_count": len(result.items),
+                    "has_usage": result.usage is not None,
+                }))
+            """
+        ),
+    )
+
+    assert isinstance(data["thread_id"], str) and data["thread_id"].strip()
+    assert isinstance(data["final_response"], str) and data["final_response"].strip()
+    assert isinstance(data["items_count"], int)
+    assert isinstance(data["has_usage"], bool)
+
+
 def test_real_async_thread_turn_usage_and_ids_smoke(
    runtime_env: PreparedRuntimeEnv,
 ) -> None:
@@ -308,6 +338,42 @@ def test_real_async_thread_turn_usage_and_ids_smoke(
    assert isinstance(data["persisted_items_count"], int)


+def test_real_async_thread_run_convenience_smoke(
+    runtime_env: PreparedRuntimeEnv,
+) -> None:
+    data = _run_json_python(
+        runtime_env,
+        textwrap.dedent(
+            """
+            import asyncio
+            import json
+            from codex_app_server import AsyncCodex
+
+            async def main():
+                async with AsyncCodex() as codex:
+                    thread = await codex.thread_start(
+                        model="gpt-5.4",
+                        config={"model_reasoning_effort": "high"},
+                    )
+                    result = await thread.run("say ok")
+                    print(json.dumps({
+                        "thread_id": thread.id,
+                        "final_response": result.final_response,
+                        "items_count": len(result.items),
+                        "has_usage": result.usage is not None,
+                    }))
+
+            asyncio.run(main())
+            """
+        ),
+    )
+
+    assert isinstance(data["thread_id"], str) and data["thread_id"].strip()
+    assert isinstance(data["final_response"], str) and data["final_response"].strip()
+    assert isinstance(data["items_count"], int)
+    assert isinstance(data["has_usage"], bool)
+
+
 def test_notebook_bootstrap_resolves_sdk_and_runtime_from_unrelated_cwd(
    runtime_env: PreparedRuntimeEnv,
 ) -> None:
Author	SHA1	Message	Date
Shaqayeq	5f141adc0d	python-sdk: fix thread.run final response semantics	2026-03-18 13:51:05 -07:00
Shaqayeq	8c521cb74d	Merge branch 'main' into dev/shaqayeq/python-sdk-thread-run	2026-03-18 13:28:48 -07:00
Shaqayeq	33289a1dd3	python-sdk: update quickstart examples for thread.run	2026-03-18 13:22:38 -07:00
Shaqayeq	3536149351	python-sdk: add thread.run convenience methods Co-authored-by: Codex <noreply@openai.com>	2026-03-18 13:21:43 -07:00
Felipe Coury	334164a6f7	feat(tui): restore composer history in app-server tui (#14945 ) ## Problem The app-server TUI (`tui_app_server`) lacked composer history support. Pressing Up/Down to recall previous prompts hit a stub that logged a warning and displayed "Not available in app-server TUI yet." New submissions were silently dropped from the shared history file, so nothing persisted for future sessions. ## Mental model Codex maintains a single, append-only history file (`$CODEX_HOME/history.jsonl`) shared across all TUI processes on the same machine. The legacy (in-process) TUI already reads/writes this file through `codex_core::message_history`. The app-server TUI delegates most operations to a separate process over RPC, but history is intentionally not an RPC concern — it's a client-local file. This PR makes the app-server TUI access the same history file directly, bypassing the app-server process entirely. The composer's Up/Down navigation and submit-time persistence now follow the same code paths as the legacy TUI, with the only difference being where the call is dispatched (locally in `App`, rather than inside `CodexThread`). The branch is rebuilt directly on top of `upstream/main`, so it keeps the existing app-server restore architecture intact. `AppServerStartedThread` still restores transcript history from the server `Thread` snapshot via `thread_snapshot_events`; this PR only adds composer-history support. ## Non-goals - Adding history support to the app-server protocol. History remains client-local. - Changing the on-disk format or location of `history.jsonl`. - Surfacing history I/O errors to the user (failures are logged and silently swallowed, matching the legacy TUI). ## Tradeoffs \| Decision \| Why \| Risk \| \|----------\|-----\|------\| \| Widen `message_history` from `pub(crate)` to `pub` \| Avoids duplicating file I/O logic; the module already has a clean, minimal API surface. \| Other workspace crates can now call these functions — the contract is no longer crate-private. However, this is consistent with recent precedent: `590cfa617` exposed `mention_syntax` for TUI consumption, `752402c4f` exposed plugin APIs (`PluginsManager`), and `14fcb6645`/`edacbf7b6` widened internal core APIs for other crates. These were all narrow, intentional exposures of specific APIs — not broad "make internals public" moves. `1af2a37ad` even went the other direction, reducing broad re-exports to tighten boundaries. This change follows the same pattern: a small, deliberate API surface (3 functions) rather than a wholesale visibility change. \| \| Intercept `AddToHistory` / `GetHistoryEntryRequest` in `App` before RPC fallback \| Keeps history ops out of the "unsupported op" error path without changing app-server protocol. \| This now routes through a single `submit_thread_op` entry point, which is safer than the original duplicated dispatch. The remaining risk is organizational: future thread-op submission paths need to keep using that shared entry point. \| \| `session_configured_from_thread_response` is now `async` \| Needs `await` on `history_metadata()` to populate real `history_log_id` / `history_entry_count`. \| Adds an async file-stat + full-file newline scan to the session bootstrap path. The scan is bounded by `history.max_bytes` and matches the legacy TUI's cost profile, but startup latency still scales with file size. \| ## Architecture ``` User presses Up User submits a prompt │ │ ▼ ▼ ChatComposerHistory ChatWidget::do_submit_turn navigate_up() encode_history_mentions() │ │ ▼ ▼ AppEvent::CodexOp Op::AddToHistory { text } (GetHistoryEntryRequest) │ │ ▼ ▼ App::try_handle_local_history_op App::try_handle_local_history_op message_history::append_entry() spawn_blocking { │ message_history::lookup() ▼ } $CODEX_HOME/history.jsonl │ ▼ AppEvent::ThreadEvent (GetHistoryEntryResponse) │ ▼ ChatComposerHistory::on_entry_response() ``` ## Observability - `tracing::warn` on `append_entry` failure (includes thread ID). - `tracing::warn` on `spawn_blocking` lookup join error. - `tracing::warn` from `message_history` internals on file-open, lock, or parse failures. ## Tests - `chat_composer_history::tests::navigation_with_async_fetch` — verifies that Up emits `Op::GetHistoryEntryRequest` (was: checked for stub error cell). - `app::tests::history_lookup_response_is_routed_to_requesting_thread` — verifies multi-thread composer recall routes the lookup result back to the originating thread. - `app_server_session::tests::resume_response_relies_on_snapshot_replay_not_initial_messages` — verifies app-server session restore still uses the upstream thread-snapshot path. - `app_server_session::tests::session_configured_populates_history_metadata` — verifies bootstrap sets nonzero `history_log_id` / `history_entry_count` from the shared local history file.	2026-03-18 11:54:11 -06:00