Files
codex/codex-rs/core/src/agent/control.rs
Owen Lin efc8d45750 feat(app-server): experimental flag to persist extended history (#11227)
This PR adds an experimental `persist_extended_history` bool flag to
app-server thread APIs so rollout logs can retain a richer set of
EventMsgs for non-lossy Thread > Turn > ThreadItems reconstruction (i.e.
on `thread/resume`).

### Motivation
Today, our rollout recorder only persists a small subset (e.g. user
message, reasoning, assistant message) of `EventMsg` types, dropping a
good number (like command exec, file change, etc.) that are important
for reconstructing full item history for `thread/resume`, `thread/read`,
and `thread/fork`.

Some clients want to be able to resume a thread without lossiness. This
lossiness is primarily a UI thing, since what the model sees are
`ResponseItem` and not `EventMsg`.

### Approach
This change introduces an opt-in `persist_full_history` flag to preserve
those events when you start/resume/fork a thread (defaults to `false`).

This is done by adding an `EventPersistenceMode` to the rollout
recorder:
- `Limited` (existing behavior, default)
- `Extended` (new opt-in behavior)

In `Extended` mode, persist additional `EventMsg` variants needed for
non-lossy app-server `ThreadItem` reconstruction. We now store the
following ThreadItems that we didn't before:
- web search
- command execution
- patch/file changes
- MCP tool calls
- image view calls
- collab tool outcomes
- context compaction
- review mode enter/exit

For **command executions** in particular, we truncate the output using
the existing `truncate_text` from core to store an upper bound of 10,000
bytes, which is also the default value for truncating tool outputs shown
to the model. This keeps the size of the rollout file and command
execution items returned over the wire reasonable.

And we also persist `EventMsg::Error` which we can now map back to the
Turn's status and populates the Turn's error metadata.

#### Updates to EventMsgs
To truly make `thread/resume` non-lossy, we also needed to persist the
`status` on `EventMsg::CommandExecutionEndEvent` and
`EventMsg::PatchApplyEndEvent`. Previously it was not obvious whether a
command failed or was declined (similar for apply_patch). These
EventMsgs were never persisted before so I made it a required field.
2026-02-12 19:34:22 +00:00

676 lines
23 KiB
Rust

use crate::agent::AgentStatus;
use crate::agent::guards::Guards;
use crate::error::CodexErr;
use crate::error::Result as CodexResult;
use crate::thread_manager::ThreadManagerState;
use codex_protocol::ThreadId;
use codex_protocol::protocol::Op;
use codex_protocol::protocol::SessionSource;
use codex_protocol::user_input::UserInput;
use std::path::PathBuf;
use std::sync::Arc;
use std::sync::Weak;
use tokio::sync::watch;
/// Control-plane handle for multi-agent operations.
/// `AgentControl` is held by each session (via `SessionServices`). It provides capability to
/// spawn new agents and the inter-agent communication layer.
/// An `AgentControl` instance is shared per "user session" which means the same `AgentControl`
/// is used for every sub-agent spawned by Codex. By doing so, we make sure the guards are
/// scoped to a user session.
#[derive(Clone, Default)]
pub(crate) struct AgentControl {
/// Weak handle back to the global thread registry/state.
/// This is `Weak` to avoid reference cycles and shadow persistence of the form
/// `ThreadManagerState -> CodexThread -> Session -> SessionServices -> ThreadManagerState`.
manager: Weak<ThreadManagerState>,
state: Arc<Guards>,
}
impl AgentControl {
/// Construct a new `AgentControl` that can spawn/message agents via the given manager state.
pub(crate) fn new(manager: Weak<ThreadManagerState>) -> Self {
Self {
manager,
..Default::default()
}
}
/// Spawn a new agent thread and submit the initial prompt.
pub(crate) async fn spawn_agent(
&self,
config: crate::config::Config,
items: Vec<UserInput>,
session_source: Option<SessionSource>,
) -> CodexResult<ThreadId> {
let state = self.upgrade()?;
let reservation = self.state.reserve_spawn_slot(config.agent_max_threads)?;
// The same `AgentControl` is sent to spawn the thread.
let new_thread = match session_source {
Some(session_source) => {
state
.spawn_new_thread_with_source(config, self.clone(), session_source, false)
.await?
}
None => state.spawn_new_thread(config, self.clone()).await?,
};
reservation.commit(new_thread.thread_id);
// Notify a new thread has been created. This notification will be processed by clients
// to subscribe or drain this newly created thread.
// TODO(jif) add helper for drain
state.notify_thread_created(new_thread.thread_id);
self.send_input(new_thread.thread_id, items).await?;
Ok(new_thread.thread_id)
}
/// Resume an existing agent thread from a recorded rollout file.
pub(crate) async fn resume_agent_from_rollout(
&self,
config: crate::config::Config,
rollout_path: PathBuf,
session_source: SessionSource,
) -> CodexResult<ThreadId> {
let state = self.upgrade()?;
let reservation = self.state.reserve_spawn_slot(config.agent_max_threads)?;
let resumed_thread = state
.resume_thread_from_rollout_with_source(
config,
rollout_path,
self.clone(),
session_source,
)
.await?;
reservation.commit(resumed_thread.thread_id);
// Resumed threads are re-registered in-memory and need the same listener
// attachment path as freshly spawned threads.
state.notify_thread_created(resumed_thread.thread_id);
Ok(resumed_thread.thread_id)
}
/// Send rich user input items to an existing agent thread.
pub(crate) async fn send_input(
&self,
agent_id: ThreadId,
items: Vec<UserInput>,
) -> CodexResult<String> {
let state = self.upgrade()?;
let result = state
.send_op(
agent_id,
Op::UserInput {
items,
final_output_json_schema: None,
},
)
.await;
if matches!(result, Err(CodexErr::InternalAgentDied)) {
let _ = state.remove_thread(&agent_id).await;
self.state.release_spawned_thread(agent_id);
}
result
}
/// Interrupt the current task for an existing agent thread.
pub(crate) async fn interrupt_agent(&self, agent_id: ThreadId) -> CodexResult<String> {
let state = self.upgrade()?;
state.send_op(agent_id, Op::Interrupt).await
}
/// Submit a shutdown request to an existing agent thread.
pub(crate) async fn shutdown_agent(&self, agent_id: ThreadId) -> CodexResult<String> {
let state = self.upgrade()?;
let result = state.send_op(agent_id, Op::Shutdown {}).await;
let _ = state.remove_thread(&agent_id).await;
self.state.release_spawned_thread(agent_id);
result
}
/// Fetch the last known status for `agent_id`, returning `NotFound` when unavailable.
pub(crate) async fn get_status(&self, agent_id: ThreadId) -> AgentStatus {
let Ok(state) = self.upgrade() else {
// No agent available if upgrade fails.
return AgentStatus::NotFound;
};
let Ok(thread) = state.get_thread(agent_id).await else {
return AgentStatus::NotFound;
};
thread.agent_status().await
}
/// Subscribe to status updates for `agent_id`, yielding the latest value and changes.
pub(crate) async fn subscribe_status(
&self,
agent_id: ThreadId,
) -> CodexResult<watch::Receiver<AgentStatus>> {
let state = self.upgrade()?;
let thread = state.get_thread(agent_id).await?;
Ok(thread.subscribe_status())
}
fn upgrade(&self) -> CodexResult<Arc<ThreadManagerState>> {
self.manager
.upgrade()
.ok_or_else(|| CodexErr::UnsupportedOperation("thread manager dropped".to_string()))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::CodexAuth;
use crate::CodexThread;
use crate::ThreadManager;
use crate::agent::agent_status_from_event;
use crate::config::Config;
use crate::config::ConfigBuilder;
use assert_matches::assert_matches;
use codex_protocol::config_types::ModeKind;
use codex_protocol::protocol::ErrorEvent;
use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::TurnAbortReason;
use codex_protocol::protocol::TurnAbortedEvent;
use codex_protocol::protocol::TurnCompleteEvent;
use codex_protocol::protocol::TurnStartedEvent;
use pretty_assertions::assert_eq;
use tempfile::TempDir;
use toml::Value as TomlValue;
async fn test_config_with_cli_overrides(
cli_overrides: Vec<(String, TomlValue)>,
) -> (TempDir, Config) {
let home = TempDir::new().expect("create temp dir");
let config = ConfigBuilder::default()
.codex_home(home.path().to_path_buf())
.cli_overrides(cli_overrides)
.build()
.await
.expect("load default test config");
(home, config)
}
async fn test_config() -> (TempDir, Config) {
test_config_with_cli_overrides(Vec::new()).await
}
fn text_input(text: &str) -> Vec<UserInput> {
vec![UserInput::Text {
text: text.to_string(),
text_elements: Vec::new(),
}]
}
struct AgentControlHarness {
_home: TempDir,
config: Config,
manager: ThreadManager,
control: AgentControl,
}
impl AgentControlHarness {
async fn new() -> Self {
let (home, config) = test_config().await;
let manager = ThreadManager::with_models_provider_and_home_for_tests(
CodexAuth::from_api_key("dummy"),
config.model_provider.clone(),
config.codex_home.clone(),
);
let control = manager.agent_control();
Self {
_home: home,
config,
manager,
control,
}
}
async fn start_thread(&self) -> (ThreadId, Arc<CodexThread>) {
let new_thread = self
.manager
.start_thread(self.config.clone())
.await
.expect("start thread");
(new_thread.thread_id, new_thread.thread)
}
}
#[tokio::test]
async fn send_input_errors_when_manager_dropped() {
let control = AgentControl::default();
let err = control
.send_input(
ThreadId::new(),
vec![UserInput::Text {
text: "hello".to_string(),
text_elements: Vec::new(),
}],
)
.await
.expect_err("send_input should fail without a manager");
assert_eq!(
err.to_string(),
"unsupported operation: thread manager dropped"
);
}
#[tokio::test]
async fn get_status_returns_not_found_without_manager() {
let control = AgentControl::default();
let got = control.get_status(ThreadId::new()).await;
assert_eq!(got, AgentStatus::NotFound);
}
#[tokio::test]
async fn on_event_updates_status_from_task_started() {
let status = agent_status_from_event(&EventMsg::TurnStarted(TurnStartedEvent {
turn_id: "turn-1".to_string(),
model_context_window: None,
collaboration_mode_kind: ModeKind::Default,
}));
assert_eq!(status, Some(AgentStatus::Running));
}
#[tokio::test]
async fn on_event_updates_status_from_task_complete() {
let status = agent_status_from_event(&EventMsg::TurnComplete(TurnCompleteEvent {
turn_id: "turn-1".to_string(),
last_agent_message: Some("done".to_string()),
}));
let expected = AgentStatus::Completed(Some("done".to_string()));
assert_eq!(status, Some(expected));
}
#[tokio::test]
async fn on_event_updates_status_from_error() {
let status = agent_status_from_event(&EventMsg::Error(ErrorEvent {
message: "boom".to_string(),
codex_error_info: None,
}));
let expected = AgentStatus::Errored("boom".to_string());
assert_eq!(status, Some(expected));
}
#[tokio::test]
async fn on_event_updates_status_from_turn_aborted() {
let status = agent_status_from_event(&EventMsg::TurnAborted(TurnAbortedEvent {
turn_id: Some("turn-1".to_string()),
reason: TurnAbortReason::Interrupted,
}));
let expected = AgentStatus::Errored("Interrupted".to_string());
assert_eq!(status, Some(expected));
}
#[tokio::test]
async fn on_event_updates_status_from_shutdown_complete() {
let status = agent_status_from_event(&EventMsg::ShutdownComplete);
assert_eq!(status, Some(AgentStatus::Shutdown));
}
#[tokio::test]
async fn spawn_agent_errors_when_manager_dropped() {
let control = AgentControl::default();
let (_home, config) = test_config().await;
let err = control
.spawn_agent(config, text_input("hello"), None)
.await
.expect_err("spawn_agent should fail without a manager");
assert_eq!(
err.to_string(),
"unsupported operation: thread manager dropped"
);
}
#[tokio::test]
async fn resume_agent_errors_when_manager_dropped() {
let control = AgentControl::default();
let (_home, config) = test_config().await;
let err = control
.resume_agent_from_rollout(
config,
PathBuf::from("/tmp/missing-rollout.jsonl"),
SessionSource::Exec,
)
.await
.expect_err("resume_agent should fail without a manager");
assert_eq!(
err.to_string(),
"unsupported operation: thread manager dropped"
);
}
#[tokio::test]
async fn send_input_errors_when_thread_missing() {
let harness = AgentControlHarness::new().await;
let thread_id = ThreadId::new();
let err = harness
.control
.send_input(
thread_id,
vec![UserInput::Text {
text: "hello".to_string(),
text_elements: Vec::new(),
}],
)
.await
.expect_err("send_input should fail for missing thread");
assert_matches!(err, CodexErr::ThreadNotFound(id) if id == thread_id);
}
#[tokio::test]
async fn get_status_returns_not_found_for_missing_thread() {
let harness = AgentControlHarness::new().await;
let status = harness.control.get_status(ThreadId::new()).await;
assert_eq!(status, AgentStatus::NotFound);
}
#[tokio::test]
async fn get_status_returns_pending_init_for_new_thread() {
let harness = AgentControlHarness::new().await;
let (thread_id, _) = harness.start_thread().await;
let status = harness.control.get_status(thread_id).await;
assert_eq!(status, AgentStatus::PendingInit);
}
#[tokio::test]
async fn subscribe_status_errors_for_missing_thread() {
let harness = AgentControlHarness::new().await;
let thread_id = ThreadId::new();
let err = harness
.control
.subscribe_status(thread_id)
.await
.expect_err("subscribe_status should fail for missing thread");
assert_matches!(err, CodexErr::ThreadNotFound(id) if id == thread_id);
}
#[tokio::test]
async fn subscribe_status_updates_on_shutdown() {
let harness = AgentControlHarness::new().await;
let (thread_id, thread) = harness.start_thread().await;
let mut status_rx = harness
.control
.subscribe_status(thread_id)
.await
.expect("subscribe_status should succeed");
assert_eq!(status_rx.borrow().clone(), AgentStatus::PendingInit);
let _ = thread
.submit(Op::Shutdown {})
.await
.expect("shutdown should submit");
let _ = status_rx.changed().await;
assert_eq!(status_rx.borrow().clone(), AgentStatus::Shutdown);
}
#[tokio::test]
async fn send_input_submits_user_message() {
let harness = AgentControlHarness::new().await;
let (thread_id, _thread) = harness.start_thread().await;
let submission_id = harness
.control
.send_input(
thread_id,
vec![UserInput::Text {
text: "hello from tests".to_string(),
text_elements: Vec::new(),
}],
)
.await
.expect("send_input should succeed");
assert!(!submission_id.is_empty());
let expected = (
thread_id,
Op::UserInput {
items: vec![UserInput::Text {
text: "hello from tests".to_string(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
},
);
let captured = harness
.manager
.captured_ops()
.into_iter()
.find(|entry| *entry == expected);
assert_eq!(captured, Some(expected));
}
#[tokio::test]
async fn spawn_agent_creates_thread_and_sends_prompt() {
let harness = AgentControlHarness::new().await;
let thread_id = harness
.control
.spawn_agent(harness.config.clone(), text_input("spawned"), None)
.await
.expect("spawn_agent should succeed");
let _thread = harness
.manager
.get_thread(thread_id)
.await
.expect("thread should be registered");
let expected = (
thread_id,
Op::UserInput {
items: vec![UserInput::Text {
text: "spawned".to_string(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
},
);
let captured = harness
.manager
.captured_ops()
.into_iter()
.find(|entry| *entry == expected);
assert_eq!(captured, Some(expected));
}
#[tokio::test]
async fn spawn_agent_respects_max_threads_limit() {
let max_threads = 1usize;
let (_home, config) = test_config_with_cli_overrides(vec![(
"agents.max_threads".to_string(),
TomlValue::Integer(max_threads as i64),
)])
.await;
let manager = ThreadManager::with_models_provider_and_home_for_tests(
CodexAuth::from_api_key("dummy"),
config.model_provider.clone(),
config.codex_home.clone(),
);
let control = manager.agent_control();
let _ = manager
.start_thread(config.clone())
.await
.expect("start thread");
let first_agent_id = control
.spawn_agent(config.clone(), text_input("hello"), None)
.await
.expect("spawn_agent should succeed");
let err = control
.spawn_agent(config, text_input("hello again"), None)
.await
.expect_err("spawn_agent should respect max threads");
let CodexErr::AgentLimitReached {
max_threads: seen_max_threads,
} = err
else {
panic!("expected CodexErr::AgentLimitReached");
};
assert_eq!(seen_max_threads, max_threads);
let _ = control
.shutdown_agent(first_agent_id)
.await
.expect("shutdown agent");
}
#[tokio::test]
async fn spawn_agent_releases_slot_after_shutdown() {
let max_threads = 1usize;
let (_home, config) = test_config_with_cli_overrides(vec![(
"agents.max_threads".to_string(),
TomlValue::Integer(max_threads as i64),
)])
.await;
let manager = ThreadManager::with_models_provider_and_home_for_tests(
CodexAuth::from_api_key("dummy"),
config.model_provider.clone(),
config.codex_home.clone(),
);
let control = manager.agent_control();
let first_agent_id = control
.spawn_agent(config.clone(), text_input("hello"), None)
.await
.expect("spawn_agent should succeed");
let _ = control
.shutdown_agent(first_agent_id)
.await
.expect("shutdown agent");
let second_agent_id = control
.spawn_agent(config.clone(), text_input("hello again"), None)
.await
.expect("spawn_agent should succeed after shutdown");
let _ = control
.shutdown_agent(second_agent_id)
.await
.expect("shutdown agent");
}
#[tokio::test]
async fn spawn_agent_limit_shared_across_clones() {
let max_threads = 1usize;
let (_home, config) = test_config_with_cli_overrides(vec![(
"agents.max_threads".to_string(),
TomlValue::Integer(max_threads as i64),
)])
.await;
let manager = ThreadManager::with_models_provider_and_home_for_tests(
CodexAuth::from_api_key("dummy"),
config.model_provider.clone(),
config.codex_home.clone(),
);
let control = manager.agent_control();
let cloned = control.clone();
let first_agent_id = cloned
.spawn_agent(config.clone(), text_input("hello"), None)
.await
.expect("spawn_agent should succeed");
let err = control
.spawn_agent(config, text_input("hello again"), None)
.await
.expect_err("spawn_agent should respect shared guard");
let CodexErr::AgentLimitReached { max_threads } = err else {
panic!("expected CodexErr::AgentLimitReached");
};
assert_eq!(max_threads, 1);
let _ = control
.shutdown_agent(first_agent_id)
.await
.expect("shutdown agent");
}
#[tokio::test]
async fn resume_agent_respects_max_threads_limit() {
let max_threads = 1usize;
let (_home, config) = test_config_with_cli_overrides(vec![(
"agents.max_threads".to_string(),
TomlValue::Integer(max_threads as i64),
)])
.await;
let manager = ThreadManager::with_models_provider_and_home_for_tests(
CodexAuth::from_api_key("dummy"),
config.model_provider.clone(),
config.codex_home.clone(),
);
let control = manager.agent_control();
let resumable_id = control
.spawn_agent(config.clone(), text_input("hello"), None)
.await
.expect("spawn_agent should succeed");
let rollout_path = manager
.get_thread(resumable_id)
.await
.expect("thread should exist")
.rollout_path()
.expect("rollout path should exist");
let _ = control
.shutdown_agent(resumable_id)
.await
.expect("shutdown resumable thread");
let active_id = control
.spawn_agent(config.clone(), text_input("occupy"), None)
.await
.expect("spawn_agent should succeed for active slot");
let err = control
.resume_agent_from_rollout(config, rollout_path, SessionSource::Exec)
.await
.expect_err("resume should respect max threads");
let CodexErr::AgentLimitReached {
max_threads: seen_max_threads,
} = err
else {
panic!("expected CodexErr::AgentLimitReached");
};
assert_eq!(seen_max_threads, max_threads);
let _ = control
.shutdown_agent(active_id)
.await
.expect("shutdown active thread");
}
#[tokio::test]
async fn resume_agent_releases_slot_after_resume_failure() {
let max_threads = 1usize;
let (_home, config) = test_config_with_cli_overrides(vec![(
"agents.max_threads".to_string(),
TomlValue::Integer(max_threads as i64),
)])
.await;
let manager = ThreadManager::with_models_provider_and_home_for_tests(
CodexAuth::from_api_key("dummy"),
config.model_provider.clone(),
config.codex_home.clone(),
);
let control = manager.agent_control();
let missing_rollout = config.codex_home.join("sessions/missing-rollout.jsonl");
let _ = control
.resume_agent_from_rollout(config.clone(), missing_rollout, SessionSource::Exec)
.await
.expect_err("resume should fail for missing rollout path");
let resumed_id = control
.spawn_agent(config, text_input("hello"), None)
.await
.expect("spawn should succeed after failed resume");
let _ = control
.shutdown_agent(resumed_id)
.await
.expect("shutdown resumed thread");
}
}