mirror of
https://github.com/openai/codex.git
synced 2026-04-24 22:54:54 +00:00
Add `thread/rollback` to app-server to support IDEs undo-ing the last N turns of a thread. For context, an IDE partner will be supporting an "undo" capability where the IDE (the app-server client) will be responsible for reverting the local changes made during the last turn. To support this well, we also need a way to drop the last turn (or more generally, the last N turns) from the agent's context. This is what `thread/rollback` does. **Core idea**: A Thread rollback is represented as a persisted event message (EventMsg::ThreadRollback) in the rollout JSONL file, not by rewriting history. On resume, both the model's context (core replay) and the UI turn list (app-server v2's thread history builder) apply these markers so the pruned history is consistent across live conversations and `thread/resume`. Implementation notes: - Rollback only affects agent context and appends to the rollout file; clients are responsible for reverting files on disk. - If a thread rollback is currently in progress, subsequent `thread/rollback` calls are rejected. - Because we use `CodexConversation::submit` and codex core tracks active turns, returning an error on concurrent rollbacks is communicated via an `EventMsg::Error` with a new variant `CodexErrorInfo::ThreadRollbackFailed`. app-server watches for that and sends the BAD_REQUEST RPC response. Tests cover thread rollbacks in both core and app-server, including when `num_turns` > existing turns (which clears all turns). **Note**: this explicitly does **not** behave like `/undo` which we just removed from the CLI, which does the opposite of what `thread/rollback` does. `/undo` reverts local changes via ghost commits/snapshots and does not modify the agent's context / conversation history.
178 lines
5.7 KiB
Rust
178 lines
5.7 KiB
Rust
use anyhow::Result;
|
|
use app_test_support::McpProcess;
|
|
use app_test_support::create_final_assistant_message_sse_response;
|
|
use app_test_support::create_mock_chat_completions_server_unchecked;
|
|
use app_test_support::to_response;
|
|
use codex_app_server_protocol::JSONRPCResponse;
|
|
use codex_app_server_protocol::RequestId;
|
|
use codex_app_server_protocol::ThreadItem;
|
|
use codex_app_server_protocol::ThreadResumeParams;
|
|
use codex_app_server_protocol::ThreadResumeResponse;
|
|
use codex_app_server_protocol::ThreadRollbackParams;
|
|
use codex_app_server_protocol::ThreadRollbackResponse;
|
|
use codex_app_server_protocol::ThreadStartParams;
|
|
use codex_app_server_protocol::ThreadStartResponse;
|
|
use codex_app_server_protocol::TurnStartParams;
|
|
use codex_app_server_protocol::UserInput as V2UserInput;
|
|
use pretty_assertions::assert_eq;
|
|
use tempfile::TempDir;
|
|
use tokio::time::timeout;
|
|
|
|
const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);
|
|
|
|
#[tokio::test]
|
|
async fn thread_rollback_drops_last_turns_and_persists_to_rollout() -> Result<()> {
|
|
// Three Codex turns hit the mock model (session start + two turn/start calls).
|
|
let responses = vec![
|
|
create_final_assistant_message_sse_response("Done")?,
|
|
create_final_assistant_message_sse_response("Done")?,
|
|
create_final_assistant_message_sse_response("Done")?,
|
|
];
|
|
let server = create_mock_chat_completions_server_unchecked(responses).await;
|
|
|
|
let codex_home = TempDir::new()?;
|
|
create_config_toml(codex_home.path(), &server.uri())?;
|
|
|
|
let mut mcp = McpProcess::new(codex_home.path()).await?;
|
|
timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
|
|
|
|
// Start a thread.
|
|
let start_id = mcp
|
|
.send_thread_start_request(ThreadStartParams {
|
|
model: Some("mock-model".to_string()),
|
|
..Default::default()
|
|
})
|
|
.await?;
|
|
let start_resp: JSONRPCResponse = timeout(
|
|
DEFAULT_READ_TIMEOUT,
|
|
mcp.read_stream_until_response_message(RequestId::Integer(start_id)),
|
|
)
|
|
.await??;
|
|
let ThreadStartResponse { thread, .. } = to_response::<ThreadStartResponse>(start_resp)?;
|
|
|
|
// Two turns.
|
|
let first_text = "First";
|
|
let turn1_id = mcp
|
|
.send_turn_start_request(TurnStartParams {
|
|
thread_id: thread.id.clone(),
|
|
input: vec![V2UserInput::Text {
|
|
text: first_text.to_string(),
|
|
}],
|
|
..Default::default()
|
|
})
|
|
.await?;
|
|
let _turn1_resp: JSONRPCResponse = timeout(
|
|
DEFAULT_READ_TIMEOUT,
|
|
mcp.read_stream_until_response_message(RequestId::Integer(turn1_id)),
|
|
)
|
|
.await??;
|
|
let _completed1 = timeout(
|
|
DEFAULT_READ_TIMEOUT,
|
|
mcp.read_stream_until_notification_message("turn/completed"),
|
|
)
|
|
.await??;
|
|
|
|
let turn2_id = mcp
|
|
.send_turn_start_request(TurnStartParams {
|
|
thread_id: thread.id.clone(),
|
|
input: vec![V2UserInput::Text {
|
|
text: "Second".to_string(),
|
|
}],
|
|
..Default::default()
|
|
})
|
|
.await?;
|
|
let _turn2_resp: JSONRPCResponse = timeout(
|
|
DEFAULT_READ_TIMEOUT,
|
|
mcp.read_stream_until_response_message(RequestId::Integer(turn2_id)),
|
|
)
|
|
.await??;
|
|
let _completed2 = timeout(
|
|
DEFAULT_READ_TIMEOUT,
|
|
mcp.read_stream_until_notification_message("turn/completed"),
|
|
)
|
|
.await??;
|
|
|
|
// Roll back the last turn.
|
|
let rollback_id = mcp
|
|
.send_thread_rollback_request(ThreadRollbackParams {
|
|
thread_id: thread.id.clone(),
|
|
num_turns: 1,
|
|
})
|
|
.await?;
|
|
let rollback_resp: JSONRPCResponse = timeout(
|
|
DEFAULT_READ_TIMEOUT,
|
|
mcp.read_stream_until_response_message(RequestId::Integer(rollback_id)),
|
|
)
|
|
.await??;
|
|
let ThreadRollbackResponse {
|
|
thread: rolled_back_thread,
|
|
} = to_response::<ThreadRollbackResponse>(rollback_resp)?;
|
|
|
|
assert_eq!(rolled_back_thread.turns.len(), 1);
|
|
assert_eq!(rolled_back_thread.turns[0].items.len(), 2);
|
|
match &rolled_back_thread.turns[0].items[0] {
|
|
ThreadItem::UserMessage { content, .. } => {
|
|
assert_eq!(
|
|
content,
|
|
&vec![V2UserInput::Text {
|
|
text: first_text.to_string()
|
|
}]
|
|
);
|
|
}
|
|
other => panic!("expected user message item, got {other:?}"),
|
|
}
|
|
|
|
// Resume and confirm the history is pruned.
|
|
let resume_id = mcp
|
|
.send_thread_resume_request(ThreadResumeParams {
|
|
thread_id: thread.id,
|
|
..Default::default()
|
|
})
|
|
.await?;
|
|
let resume_resp: JSONRPCResponse = timeout(
|
|
DEFAULT_READ_TIMEOUT,
|
|
mcp.read_stream_until_response_message(RequestId::Integer(resume_id)),
|
|
)
|
|
.await??;
|
|
let ThreadResumeResponse { thread, .. } = to_response::<ThreadResumeResponse>(resume_resp)?;
|
|
|
|
assert_eq!(thread.turns.len(), 1);
|
|
assert_eq!(thread.turns[0].items.len(), 2);
|
|
match &thread.turns[0].items[0] {
|
|
ThreadItem::UserMessage { content, .. } => {
|
|
assert_eq!(
|
|
content,
|
|
&vec![V2UserInput::Text {
|
|
text: first_text.to_string()
|
|
}]
|
|
);
|
|
}
|
|
other => panic!("expected user message item, got {other:?}"),
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn create_config_toml(codex_home: &std::path::Path, server_uri: &str) -> std::io::Result<()> {
|
|
let config_toml = codex_home.join("config.toml");
|
|
std::fs::write(
|
|
config_toml,
|
|
format!(
|
|
r#"
|
|
model = "mock-model"
|
|
approval_policy = "never"
|
|
sandbox_mode = "read-only"
|
|
|
|
model_provider = "mock_provider"
|
|
|
|
[model_providers.mock_provider]
|
|
name = "Mock provider for test"
|
|
base_url = "{server_uri}/v1"
|
|
wire_api = "chat"
|
|
request_max_retries = 0
|
|
stream_max_retries = 0
|
|
"#
|
|
),
|
|
)
|
|
}
|