Compare commits

...

21 Commits

Author SHA1 Message Date
Charles Cunningham
120ef3e15a Gate todo update notifications by client version 2026-02-02 12:58:24 -08:00
Charles Cunningham
76ab4ac6a7 Resume old session note 2026-01-29 13:36:59 -08:00
Charles Cunningham
12ed31f94b fix bug 2026-01-29 11:48:27 -08:00
Charles Cunningham
e54ff678fd Accept todo+plan payloads 2026-01-29 11:29:17 -08:00
Charles Cunningham
1a65f5369c Revert list_models test and drop update_plan alias 2026-01-29 11:16:22 -08:00
Charles Cunningham
b26a2352b7 update comment 2026-01-29 11:09:16 -08:00
Charles Cunningham
a5262514cd comment 2026-01-29 11:09:16 -08:00
Charles Cunningham
6401df9321 prompt tweak 2026-01-29 11:09:16 -08:00
Charles Cunningham
710247c37b Remove Plan mode references from todo_write prompts 2026-01-29 11:09:16 -08:00
Charles Cunningham
437d37a823 Hide legacy plan field from todo_write tool spec 2026-01-29 11:09:16 -08:00
Charles Cunningham
7c96c5a1e2 Document todo/plan alias deserialization 2026-01-29 11:09:16 -08:00
Charles Cunningham
3ccc15dcdc Prefer explicit todo field over legacy plan alias 2026-01-29 11:09:16 -08:00
Charles Cunningham
1e7dab7e22 Clean up todo update notification serialization 2026-01-29 11:09:16 -08:00
Charles Cunningham
26e007ba70 Rename todo update notification and align legacy aliases 2026-01-29 11:09:16 -08:00
Charles Cunningham
5c91b90aed Rename plan step types and drop todo alias 2026-01-29 11:09:16 -08:00
Charles Cunningham
16dafc0c52 Fix bundled models.json escaping 2026-01-29 11:09:16 -08:00
Charles Cunningham
894e924276 Stabilize list_models test expectations 2026-01-29 11:09:16 -08:00
Charles Cunningham
8b8840047c Rename update_plan tool to todo_write 2026-01-29 11:09:15 -08:00
Charles Cunningham
e162f96b78 More aliasing 2026-01-29 11:09:15 -08:00
Charles Cunningham
525e0d7f3c Rename plan tool types to todo and dual-emit todo/plan 2026-01-29 11:09:15 -08:00
Charles Cunningham
b0f1b9f622 Rename update_plan tool to todo_write (keep update_plan alias) 2026-01-29 11:09:15 -08:00
41 changed files with 902 additions and 429 deletions

1
codex-rs/Cargo.lock generated
View File

@@ -1102,6 +1102,7 @@ dependencies = [
"os_info",
"pretty_assertions",
"rmcp",
"semver",
"serde",
"serde_json",
"serial_test",

View File

@@ -429,7 +429,10 @@ macro_rules! server_notification_definitions {
(
$(
$(#[$variant_meta:meta])*
$variant:ident $(=> $wire:literal)? ( $payload:ty )
// Optional wire name and aliases:
// `Variant => "wire/name", aliases(["old/name"]) (Payload)`
// expands to serde rename + alias attributes on the enum variant.
$variant:ident $(=> $wire:literal $(, aliases([$($alias:literal),* $(,)?]))? )? ( $payload:ty )
),* $(,)?
) => {
/// Notification sent from the server to the client.
@@ -439,7 +442,16 @@ macro_rules! server_notification_definitions {
pub enum ServerNotification {
$(
$(#[$variant_meta])*
$(#[serde(rename = $wire)] #[ts(rename = $wire)] #[strum(serialize = $wire)])?
$(
// If a wire name is provided, emit serde/TS/strum renames
// plus any legacy serde aliases for compatibility.
#[serde(rename = $wire)]
#[ts(rename = $wire)]
#[strum(serialize = $wire)]
$(
$(#[serde(alias = $alias)])*
)?
)?
$variant($payload),
)*
}
@@ -582,7 +594,7 @@ server_notification_definitions! {
TurnStarted => "turn/started" (v2::TurnStartedNotification),
TurnCompleted => "turn/completed" (v2::TurnCompletedNotification),
TurnDiffUpdated => "turn/diff/updated" (v2::TurnDiffUpdatedNotification),
TurnPlanUpdated => "turn/plan/updated" (v2::TurnPlanUpdatedNotification),
TurnTodosUpdated => "turn/todos/updated", aliases(["turn/plan/updated"]) (v2::TurnTodosUpdatedNotification),
ItemStarted => "item/started" (v2::ItemStartedNotification),
ItemCompleted => "item/completed" (v2::ItemCompletedNotification),
/// This event is internal-only. Used by Codex Cloud.

View File

@@ -17,8 +17,6 @@ use codex_protocol::items::TurnItem as CoreTurnItem;
use codex_protocol::models::ResponseItem;
use codex_protocol::openai_models::ReasoningEffort;
use codex_protocol::parse_command::ParsedCommand as CoreParsedCommand;
use codex_protocol::plan_tool::PlanItemArg as CorePlanItemArg;
use codex_protocol::plan_tool::StepStatus as CorePlanStepStatus;
use codex_protocol::protocol::AgentStatus as CoreAgentStatus;
use codex_protocol::protocol::AskForApproval as CoreAskForApproval;
use codex_protocol::protocol::CodexErrorInfo as CoreCodexErrorInfo;
@@ -36,6 +34,8 @@ use codex_protocol::protocol::SkillToolDependency as CoreSkillToolDependency;
use codex_protocol::protocol::SubAgentSource as CoreSubAgentSource;
use codex_protocol::protocol::TokenUsage as CoreTokenUsage;
use codex_protocol::protocol::TokenUsageInfo as CoreTokenUsageInfo;
use codex_protocol::todo_tool::TodoItemArg as CoreTodoItemArg;
use codex_protocol::todo_tool::TodoStatus as CoreTodoStatus;
use codex_protocol::user_input::ByteRange as CoreByteRange;
use codex_protocol::user_input::TextElement as CoreTextElement;
use codex_protocol::user_input::UserInput as CoreUserInput;
@@ -46,7 +46,9 @@ use mcp_types::ResourceTemplate as McpResourceTemplate;
use mcp_types::Tool as McpTool;
use schemars::JsonSchema;
use serde::Deserialize;
use serde::Deserializer;
use serde::Serialize;
use serde::Serializer;
use serde_json::Value as JsonValue;
use thiserror::Error;
use ts_rs::TS;
@@ -2253,35 +2255,122 @@ pub struct TurnDiffUpdatedNotification {
pub diff: String,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[derive(Debug, Clone, PartialEq, JsonSchema, TS)]
#[schemars(schema_with = "TurnTodosUpdatedNotificationWire::json_schema")]
#[ts(export_to = "v2/")]
pub struct TurnPlanUpdatedNotification {
#[ts(as = "TurnTodosUpdatedNotificationWire")]
/// Todo list update from the todo_write tool. `todo` is preferred; `plan` is a legacy alias.
pub struct TurnTodosUpdatedNotification {
pub thread_id: String,
pub turn_id: String,
pub explanation: Option<String>,
pub plan: Vec<TurnPlanStep>,
#[serde(default)]
pub todo: Vec<TurnTodoStep>,
}
#[derive(Serialize, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]
/// Wire type that keeps serde, schemars, and ts-rs aligned on legacy aliases.
struct TurnTodosUpdatedNotificationWire {
thread_id: String,
turn_id: String,
explanation: Option<String>,
#[serde(default)]
todo: Vec<TurnTodoStep>,
#[serde(default)]
plan: Vec<TurnTodoStep>,
}
impl<'de> Deserialize<'de> for TurnTodosUpdatedNotificationWire {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
// Use Option<Vec<_>> so we can detect whether fields are present and
// prefer `todo` if both legacy aliases are sent.
#[derive(Deserialize)]
#[serde(rename_all = "camelCase")]
struct WireIn {
thread_id: String,
turn_id: String,
explanation: Option<String>,
#[serde(default)]
todo: Option<Vec<TurnTodoStep>>,
#[serde(default)]
plan: Option<Vec<TurnTodoStep>>,
}
let wire = WireIn::deserialize(deserializer)?;
let todo = wire.todo.or(wire.plan).unwrap_or_default();
Ok(Self {
thread_id: wire.thread_id,
turn_id: wire.turn_id,
explanation: wire.explanation,
todo,
plan: Vec::new(),
})
}
}
impl Serialize for TurnTodosUpdatedNotification {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
// Emit both `todo` and the legacy `plan` alias with identical data.
TurnTodosUpdatedNotificationWire {
thread_id: self.thread_id.clone(),
turn_id: self.turn_id.clone(),
explanation: self.explanation.clone(),
todo: self.todo.clone(),
plan: self.todo.clone(),
}
.serialize(serializer)
}
}
impl<'de> Deserialize<'de> for TurnTodosUpdatedNotification {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let wire = TurnTodosUpdatedNotificationWire::deserialize(deserializer)?;
Ok(Self {
thread_id: wire.thread_id,
turn_id: wire.turn_id,
explanation: wire.explanation,
todo: wire.todo,
})
}
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]
pub struct TurnPlanStep {
/// Todo step used by the todo list update notification.
pub struct TurnTodoStep {
pub step: String,
pub status: TurnPlanStepStatus,
pub status: TurnTodoStepStatus,
}
#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]
pub enum TurnPlanStepStatus {
/// Todo status used by the todo list update notification.
pub enum TurnTodoStepStatus {
Pending,
InProgress,
Completed,
}
impl From<CorePlanItemArg> for TurnPlanStep {
fn from(value: CorePlanItemArg) -> Self {
#[deprecated(note = "use TurnTodoStep")]
pub type TurnPlanStep = TurnTodoStep;
#[deprecated(note = "use TurnTodoStepStatus")]
pub type TurnPlanStepStatus = TurnTodoStepStatus;
impl From<CoreTodoItemArg> for TurnTodoStep {
fn from(value: CoreTodoItemArg) -> Self {
Self {
step: value.step,
status: value.status.into(),
@@ -2289,12 +2378,12 @@ impl From<CorePlanItemArg> for TurnPlanStep {
}
}
impl From<CorePlanStepStatus> for TurnPlanStepStatus {
fn from(value: CorePlanStepStatus) -> Self {
impl From<CoreTodoStatus> for TurnTodoStepStatus {
fn from(value: CoreTodoStatus) -> Self {
match value {
CorePlanStepStatus::Pending => Self::Pending,
CorePlanStepStatus::InProgress => Self::InProgress,
CorePlanStepStatus::Completed => Self::Completed,
CoreTodoStatus::Pending => Self::Pending,
CoreTodoStatus::InProgress => Self::InProgress,
CoreTodoStatus::Completed => Self::Completed,
}
}
}
@@ -2865,4 +2954,23 @@ mod tests {
})
);
}
#[test]
fn turn_todos_updated_deserialize_prefers_todo_when_both_present() {
let value = json!({
"threadId": "thread-1",
"turnId": "turn-1",
"todo": [{ "step": "current", "status": "completed" }],
"plan": [{ "step": "stale", "status": "pending" }],
});
let decoded = serde_json::from_value::<TurnTodosUpdatedNotification>(value).unwrap();
assert_eq!(
decoded.todo,
vec![TurnTodoStep {
step: "current".to_string(),
status: TurnTodoStepStatus::Completed,
}]
);
}
}

View File

@@ -33,6 +33,7 @@ codex-utils-json-to-toml = { workspace = true }
chrono = { workspace = true }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
semver = { workspace = true }
mcp-types = { workspace = true }
tempfile = { workspace = true }
time = { workspace = true }

View File

@@ -433,7 +433,7 @@ The app-server streams JSON-RPC notifications while a turn is running. Each turn
- `turn/started``{ turn }` with the turn id, empty `items`, and `status: "inProgress"`.
- `turn/completed``{ turn }` where `turn.status` is `completed`, `interrupted`, or `failed`; failures carry `{ error: { message, codexErrorInfo?, additionalDetails? } }`.
- `turn/diff/updated``{ threadId, turnId, diff }` represents the up-to-date snapshot of the turn-level unified diff, emitted after every FileChange item. `diff` is the latest aggregated unified diff across every file change in the turn. UIs can render this to show the full "what changed" view without stitching individual `fileChange` items.
- `turn/plan/updated``{ turnId, explanation?, plan }` whenever the agent shares or changes its plan; each `plan` entry is `{ step, status }` with `status` in `pending`, `inProgress`, or `completed`.
- `turn/todos/updated``{ turnId, explanation?, todo, plan }` whenever the agent shares or changes its todo list; each entry is `{ step, status }` with `status` in `pending`, `inProgress`, or `completed`. `todo` is preferred and `plan` is a deprecated legacy alias; both are emitted for compatibility. Clients may also send/accept the legacy alias `turn/plan/updated`. (The plan-named event and field are legacy and are unrelated to Plan mode.)
Today both notifications carry an empty `items` array even when item events were streamed; rely on `item/*` notifications for the canonical item list until this is fixed.

View File

@@ -9,6 +9,7 @@ use crate::codex_message_processor::summary_to_thread;
use crate::error_code::INTERNAL_ERROR_CODE;
use crate::error_code::INVALID_REQUEST_ERROR_CODE;
use crate::outgoing_message::OutgoingMessageSender;
use crate::outgoing_message::OutgoingNotification;
use codex_app_server_protocol::AccountRateLimitsUpdatedNotification;
use codex_app_server_protocol::AgentMessageDeltaNotification;
use codex_app_server_protocol::ApplyPatchApprovalParams;
@@ -64,9 +65,9 @@ use codex_app_server_protocol::TurnCompletedNotification;
use codex_app_server_protocol::TurnDiffUpdatedNotification;
use codex_app_server_protocol::TurnError;
use codex_app_server_protocol::TurnInterruptResponse;
use codex_app_server_protocol::TurnPlanStep;
use codex_app_server_protocol::TurnPlanUpdatedNotification;
use codex_app_server_protocol::TurnStatus;
use codex_app_server_protocol::TurnTodoStep;
use codex_app_server_protocol::TurnTodosUpdatedNotification as TurnTodoUpdatedNotification;
use codex_app_server_protocol::build_turns_from_event_msgs;
use codex_core::CodexThread;
use codex_core::parse_command::shlex_join;
@@ -87,10 +88,10 @@ use codex_core::review_format::format_review_findings_block;
use codex_core::review_prompts;
use codex_protocol::ThreadId;
use codex_protocol::dynamic_tools::DynamicToolResponse as CoreDynamicToolResponse;
use codex_protocol::plan_tool::UpdatePlanArgs;
use codex_protocol::protocol::ReviewOutputEvent;
use codex_protocol::request_user_input::RequestUserInputAnswer as CoreRequestUserInputAnswer;
use codex_protocol::request_user_input::RequestUserInputResponse as CoreRequestUserInputResponse;
use codex_protocol::todo_tool::UpdateTodoArgs;
use std::collections::HashMap;
use std::convert::TryFrom;
use std::path::PathBuf;
@@ -1106,11 +1107,11 @@ pub(crate) async fn apply_bespoke_event_handling(
)
.await;
}
EventMsg::PlanUpdate(plan_update_event) => {
handle_turn_plan_update(
EventMsg::TodoUpdate(todo_update_event) => {
handle_turn_todo_update(
conversation_id,
&event_turn_id,
plan_update_event,
todo_update_event,
api_version,
outgoing.as_ref(),
)
@@ -1140,27 +1141,43 @@ async fn handle_turn_diff(
}
}
async fn handle_turn_plan_update(
async fn handle_turn_todo_update(
conversation_id: ThreadId,
event_turn_id: &str,
plan_update_event: UpdatePlanArgs,
todo_update_event: UpdateTodoArgs,
api_version: ApiVersion,
outgoing: &OutgoingMessageSender,
) {
if let ApiVersion::V2 = api_version {
let notification = TurnPlanUpdatedNotification {
let (explanation, todo_items) = todo_update_event.into_parts();
let steps: Vec<TurnTodoStep> = todo_items.into_iter().map(TurnTodoStep::from).collect();
let notification = TurnTodoUpdatedNotification {
thread_id: conversation_id.to_string(),
turn_id: event_turn_id.to_string(),
explanation: plan_update_event.explanation,
plan: plan_update_event
.plan
.into_iter()
.map(TurnPlanStep::from)
.collect(),
explanation,
todo: steps,
};
outgoing
.send_server_notification(ServerNotification::TurnPlanUpdated(notification))
.await;
if outgoing.supports_turn_todos_updated().await {
outgoing
.send_server_notification(ServerNotification::TurnTodosUpdated(notification))
.await;
return;
}
let legacy_params = match serde_json::to_value(&notification) {
Ok(params) => Some(params),
Err(err) => {
error!("failed to serialize legacy todo update notification: {err}");
None
}
};
if let Some(params) = legacy_params {
outgoing
.send_notification(OutgoingNotification {
method: "turn/plan/updated".to_string(),
params: Some(params),
})
.await;
}
}
}
@@ -1805,20 +1822,21 @@ mod tests {
use anyhow::Result;
use anyhow::anyhow;
use anyhow::bail;
use codex_app_server_protocol::TurnPlanStepStatus;
use codex_app_server_protocol::TurnTodoStepStatus;
use codex_core::protocol::CreditsSnapshot;
use codex_core::protocol::McpInvocation;
use codex_core::protocol::RateLimitSnapshot;
use codex_core::protocol::RateLimitWindow;
use codex_core::protocol::TokenUsage;
use codex_core::protocol::TokenUsageInfo;
use codex_protocol::plan_tool::PlanItemArg;
use codex_protocol::plan_tool::StepStatus;
use codex_protocol::todo_tool::TodoItemArg;
use codex_protocol::todo_tool::TodoStatus;
use mcp_types::CallToolResult;
use mcp_types::ContentBlock;
use mcp_types::TextContent;
use pretty_assertions::assert_eq;
use serde_json::Value as JsonValue;
use serde_json::to_value;
use std::collections::HashMap;
use std::time::Duration;
use tokio::sync::Mutex;
@@ -1988,26 +2006,27 @@ mod tests {
}
#[tokio::test]
async fn test_handle_turn_plan_update_emits_notification_for_v2() -> Result<()> {
async fn test_handle_turn_todo_update_emits_notification_for_v2() -> Result<()> {
let (tx, mut rx) = mpsc::channel(CHANNEL_CAPACITY);
let outgoing = OutgoingMessageSender::new(tx);
let update = UpdatePlanArgs {
explanation: Some("need plan".to_string()),
plan: vec![
PlanItemArg {
outgoing.set_client_version("0.94.0").await;
let update = UpdateTodoArgs::new(
Some("need todo list".to_string()),
vec![
TodoItemArg {
step: "first".to_string(),
status: StepStatus::Pending,
status: TodoStatus::Pending,
},
PlanItemArg {
TodoItemArg {
step: "second".to_string(),
status: StepStatus::Completed,
status: TodoStatus::Completed,
},
],
};
);
let conversation_id = ThreadId::new();
handle_turn_plan_update(
handle_turn_todo_update(
conversation_id,
"turn-123",
update,
@@ -2019,17 +2038,75 @@ mod tests {
let msg = rx
.recv()
.await
.ok_or_else(|| anyhow!("should send one notification"))?;
.ok_or_else(|| anyhow!("should send a notification"))?;
match msg {
OutgoingMessage::AppServerNotification(ServerNotification::TurnPlanUpdated(n)) => {
OutgoingMessage::AppServerNotification(ServerNotification::TurnTodosUpdated(n)) => {
assert_eq!(n.thread_id, conversation_id.to_string());
assert_eq!(n.turn_id, "turn-123");
assert_eq!(n.explanation.as_deref(), Some("need plan"));
assert_eq!(n.plan.len(), 2);
assert_eq!(n.plan[0].step, "first");
assert_eq!(n.plan[0].status, TurnPlanStepStatus::Pending);
assert_eq!(n.plan[1].step, "second");
assert_eq!(n.plan[1].status, TurnPlanStepStatus::Completed);
assert_eq!(n.explanation.as_deref(), Some("need todo list"));
let json = to_value(&n)?;
assert_eq!(
json.get("todo"),
json.get("plan"),
"legacy plan alias should mirror todo"
);
assert_eq!(n.todo.len(), 2);
assert_eq!(n.todo[0].step, "first");
assert_eq!(n.todo[0].status, TurnTodoStepStatus::Pending);
assert_eq!(n.todo[1].step, "second");
assert_eq!(n.todo[1].status, TurnTodoStepStatus::Completed);
}
other => bail!("unexpected message: {other:?}"),
}
assert!(rx.try_recv().is_err(), "no extra messages expected");
Ok(())
}
#[tokio::test]
async fn test_handle_turn_todo_update_emits_legacy_for_old_client() -> Result<()> {
let (tx, mut rx) = mpsc::channel(CHANNEL_CAPACITY);
let outgoing = OutgoingMessageSender::new(tx);
outgoing.set_client_version("0.93.0").await;
let update = UpdateTodoArgs::new(
Some("need todo list".to_string()),
vec![
TodoItemArg {
step: "first".to_string(),
status: TodoStatus::Pending,
},
TodoItemArg {
step: "second".to_string(),
status: TodoStatus::Completed,
},
],
);
let conversation_id = ThreadId::new();
handle_turn_todo_update(
conversation_id,
"turn-123",
update,
ApiVersion::V2,
&outgoing,
)
.await;
let msg = rx
.recv()
.await
.ok_or_else(|| anyhow!("should send legacy notification"))?;
match msg {
OutgoingMessage::Notification(notification) => {
assert_eq!(notification.method, "turn/plan/updated");
let params = notification
.params
.ok_or_else(|| anyhow!("legacy notification missing params"))?;
assert_eq!(
params.get("todo"),
params.get("plan"),
"legacy plan alias should mirror todo"
);
}
other => bail!("unexpected message: {other:?}"),
}

View File

@@ -155,6 +155,7 @@ impl MessageProcessor {
if let Ok(mut suffix) = USER_AGENT_SUFFIX.lock() {
*suffix = Some(user_agent_suffix);
}
self.outgoing.set_client_version(&version).await;
let user_agent = get_codex_user_agent();
let response = InitializeResponse { user_agent };

View File

@@ -8,6 +8,7 @@ use codex_app_server_protocol::Result;
use codex_app_server_protocol::ServerNotification;
use codex_app_server_protocol::ServerRequest;
use codex_app_server_protocol::ServerRequestPayload;
use semver::Version;
use serde::Serialize;
use tokio::sync::Mutex;
use tokio::sync::mpsc;
@@ -24,6 +25,7 @@ pub(crate) struct OutgoingMessageSender {
next_request_id: AtomicI64,
sender: mpsc::Sender<OutgoingMessage>,
request_id_to_callback: Mutex<HashMap<RequestId, oneshot::Sender<Result>>>,
client_version: Mutex<Option<Version>>,
}
impl OutgoingMessageSender {
@@ -32,6 +34,21 @@ impl OutgoingMessageSender {
next_request_id: AtomicI64::new(0),
sender,
request_id_to_callback: Mutex::new(HashMap::new()),
client_version: Mutex::new(None),
}
}
pub(crate) async fn set_client_version(&self, version: &str) {
let parsed = parse_cli_version(version);
let mut client_version = self.client_version.lock().await;
*client_version = parsed;
}
pub(crate) async fn supports_turn_todos_updated(&self) -> bool {
let client_version = self.client_version.lock().await;
match client_version.as_ref() {
Some(version) => supports_turn_todos_updated(version),
None => false,
}
}
@@ -156,6 +173,22 @@ pub(crate) struct OutgoingError {
pub id: RequestId,
}
const TURN_TODOS_UPDATED_MIN_VERSION: (u64, u64, u64) = (0, 94, 0);
fn parse_cli_version(raw: &str) -> Option<Version> {
let trimmed = raw.trim();
let start = trimmed.find(|ch: char| ch.is_ascii_digit())?;
let candidate: String = trimmed[start..]
.chars()
.take_while(|ch| ch.is_ascii_digit() || *ch == '.')
.collect();
Version::parse(&candidate).ok()
}
fn supports_turn_todos_updated(version: &Version) -> bool {
(version.major, version.minor, version.patch) >= TURN_TODOS_UPDATED_MIN_VERSION
}
#[cfg(test)]
mod tests {
use codex_app_server_protocol::AccountLoginCompletedNotification;

View File

@@ -18,12 +18,14 @@ You are Codex, based on GPT-5. You are running as a coding agent in the Codex CL
- While you are working, you might notice unexpected changes that you didn't make. If this happens, STOP IMMEDIATELY and ask the user how they would like to proceed.
- **NEVER** use destructive commands like `git reset --hard` or `git checkout --` unless specifically requested or approved by the user.
## Plan tool
## Todo tool
When using the planning tool:
- Skip using the planning tool for straightforward tasks (roughly the easiest 25%).
- Do not make single-step plans.
- When you made a plan, update it after having performed one of the sub-tasks that you shared on the plan.
Use the `todo_write` tool to track and communicate your task list.
- Skip using the todo tool for straightforward tasks (roughly the easiest 25%).
- Do not make single-step todo lists.
- Keep exactly one step `in_progress` at a time.
- Update the todo list after completing a step you previously shared.
## Special user requests

View File

@@ -18,12 +18,14 @@ You are Codex, based on GPT-5. You are running as a coding agent in the Codex CL
- While you are working, you might notice unexpected changes that you didn't make. If this happens, STOP IMMEDIATELY and ask the user how they would like to proceed.
- **NEVER** use destructive commands like `git reset --hard` or `git checkout --` unless specifically requested or approved by the user.
## Plan tool
## Todo tool
When using the planning tool:
- Skip using the planning tool for straightforward tasks (roughly the easiest 25%).
- Do not make single-step plans.
- When you made a plan, update it after having performed one of the sub-tasks that you shared on the plan.
Use the `todo_write` tool to track and communicate your task list.
- Skip using the todo tool for straightforward tasks (roughly the easiest 25%).
- Do not make single-step todo lists.
- Keep exactly one step `in_progress` at a time.
- Update the todo list after completing a step you previously shared.
## Special user requests

View File

@@ -3,7 +3,7 @@ You are GPT-5.1 running in the Codex CLI, a terminal-based coding assistant. Cod
Your capabilities:
- Receive user prompts and other context provided by the harness, such as files in the workspace.
- Communicate with the user by streaming thinking & responses, and by making & updating plans.
- Communicate with the user by streaming thinking & responses, and by making & updating todo lists.
- Emit function calls to run terminal commands and apply patches. Depending on how this specific run is configured, you can request that these function calls be escalated to the user for approval before running. More on this in the "Sandbox and approvals" section.
Within this context, Codex refers to the open-source agentic coding interface (not the old Codex language model built by OpenAI).
@@ -39,7 +39,7 @@ You'll work for stretches with tool calls — it's critical to keep the user upd
Frequency & Length:
- Send short updates (12 sentences) whenever there is a meaningful, important insight you need to share with the user to keep them informed.
- If you expect a longer headsdown stretch, post a brief headsdown note with why and when you'll report back; when you resume, summarize what you learned.
- Only the initial plan, plan updates, and final recap can be longer, with multiple bullets and paragraphs
- Only the initial plan, todo list updates, and final recap can be longer, with multiple bullets and paragraphs
Tone:
- Friendly, confident, senior-engineer energy. Positive, collaborative, humble; fix mistakes quickly.
@@ -60,31 +60,31 @@ Content:
- “Alright, build pipeline order is interesting. Checking how it reports failures.”
- “Spotted a clever caching util; now hunting where it gets used.”
## Planning
## Todo Lists
You have access to an `update_plan` tool which tracks steps and progress and renders them to the user. Using the tool helps demonstrate that you've understood the task and convey how you're approaching it. Plans can help to make complex, ambiguous, or multi-phase work clearer and more collaborative for the user. A good plan should break the task into meaningful, logically ordered steps that are easy to verify as you go.
You have access to a `todo_write` tool which tracks steps and progress and renders them to the user. Using the tool helps demonstrate that you've understood the task and convey how you're approaching it. A todo list can help make complex, ambiguous, or multi-phase work clearer and more collaborative for the user. A good todo list should break the task into meaningful, logically ordered steps that are easy to verify as you go.
Note that plans are not for padding out simple work with filler steps or stating the obvious. The content of your plan should not involve doing anything that you aren't capable of doing (i.e. don't try to test things that you can't test). Do not use plans for simple or single-step queries that you can just do or answer immediately.
Note that todo lists are not for padding out simple work with filler steps or stating the obvious. The content of your list should not involve doing anything that you aren't capable of doing (i.e. don't try to test things that you can't test). Do not use todo lists for simple or single-step queries that you can just do or answer immediately.
Do not repeat the full contents of the plan after an `update_plan` call — the harness already displays it. Instead, summarize the change made and highlight any important context or next step.
Do not repeat the full contents of the todo list after a `todo_write` call — the harness already displays it. Instead, summarize the change made and highlight any important context or next step.
Before running a command, consider whether or not you have completed the previous step, and make sure to mark it as completed before moving on to the next step. It may be the case that you complete all steps in your plan after a single pass of implementation. If this is the case, you can simply mark all the planned steps as completed. Sometimes, you may need to change plans in the middle of a task: call `update_plan` with the updated plan and make sure to provide an `explanation` of the rationale when doing so.
Before running a command, consider whether or not you have completed the previous step, and make sure to mark it as completed before moving on to the next step. It may be the case that you complete all steps in your todo list after a single pass of implementation. If this is the case, you can simply mark all the steps as completed. Sometimes, you may need to change your list in the middle of a task: call `todo_write` with the updated list and make sure to provide an `explanation` of the rationale when doing so.
Maintain statuses in the tool: exactly one item in_progress at a time; mark items complete when done; post timely status transitions. Do not jump an item from pending to completed: always set it to in_progress first. Do not batch-complete multiple items after the fact. Finish with all items completed or explicitly canceled/deferred before ending the turn. Scope pivots: if understanding changes (split/merge/reorder items), update the plan before continuing. Do not let the plan go stale while coding.
Maintain statuses in the tool: exactly one item in_progress at a time; mark items complete when done; post timely status transitions. Do not jump an item from pending to completed: always set it to in_progress first. Do not batch-complete multiple items after the fact. Finish with all items completed or explicitly canceled/deferred before ending the turn. Scope pivots: if understanding changes (split/merge/reorder items), update the list before continuing. Do not let the list go stale while coding.
Use a plan when:
Use a todo list when:
- The task is non-trivial and will require multiple actions over a long time horizon.
- There are logical phases or dependencies where sequencing matters.
- The work has ambiguity that benefits from outlining high-level goals.
- You want intermediate checkpoints for feedback and validation.
- When the user asked you to do more than one thing in a single prompt
- The user has asked you to use the plan tool (aka "TODOs")
- You generate additional steps while working, and plan to do them before yielding to the user
- The user has asked you to use the todo tool
- You generate additional steps while working, and intend to do them before yielding to the user
### Examples
**High-quality plans**
**High-quality todo lists**
Example 1:
@@ -111,7 +111,7 @@ Example 3:
5. Persist messages in lightweight DB
6. Add typing indicators + unread count
**Low-quality plans**
**Low-quality todo lists**
Example 1:
@@ -320,12 +320,12 @@ It is important to remember:
- You must include a header with your intended action (Add/Delete/Update)
- You must prefix new lines with `+` even when creating a new file
## `update_plan`
## `todo_write`
A tool named `update_plan` is available to you. You can use it to keep an uptodate, stepbystep plan for the task.
A tool named `todo_write` is available to you. You can use it to keep an uptodate, stepbystep todo list for the task.
To create a new plan, call `update_plan` with a short list of 1sentence steps (no more than 5-7 words each) with a `status` for each step (`pending`, `in_progress`, or `completed`).
To create a new todo list, call `todo_write` with a short list of 1sentence steps (no more than 5-7 words each) with a `status` for each step (`pending`, `in_progress`, or `completed`).
When steps have been completed, use `update_plan` to mark each finished step as `completed` and the next step you are working on as `in_progress`. There should always be exactly one `in_progress` step until everything is done. You can mark multiple items as complete in a single `update_plan` call.
When steps have been completed, use `todo_write` to mark each finished step as `completed` and the next step you are working on as `in_progress`. There should always be exactly one `in_progress` step until everything is done. You can mark multiple items as complete in a single `todo_write` call.
If all steps are complete, ensure you call `update_plan` to mark all steps as `completed`.
If all steps are complete, ensure you call `todo_write` to mark all steps as `completed`.

View File

@@ -3,7 +3,7 @@ You are GPT-5.2 running in the Codex CLI, a terminal-based coding assistant. Cod
Your capabilities:
- Receive user prompts and other context provided by the harness, such as files in the workspace.
- Communicate with the user by streaming thinking & responses, and by making & updating plans.
- Communicate with the user by streaming thinking & responses, and by making & updating todo lists.
- Emit function calls to run terminal commands and apply patches. Depending on how this specific run is configured, you can request that these function calls be escalated to the user for approval before running. More on this in the "Sandbox and approvals" section.
Within this context, Codex refers to the open-source agentic coding interface (not the old Codex language model built by OpenAI).
@@ -33,31 +33,31 @@ Unless the user explicitly asks for a plan, asks a question about the code, is b
## Responsiveness
## Planning
## Todo Lists
You have access to an `update_plan` tool which tracks steps and progress and renders them to the user. Using the tool helps demonstrate that you've understood the task and convey how you're approaching it. Plans can help to make complex, ambiguous, or multi-phase work clearer and more collaborative for the user. A good plan should break the task into meaningful, logically ordered steps that are easy to verify as you go.
You have access to a `todo_write` tool which tracks steps and progress and renders them to the user. Using the tool helps demonstrate that you've understood the task and convey how you're approaching it. A todo list can help make complex, ambiguous, or multi-phase work clearer and more collaborative for the user. A good todo list should break the task into meaningful, logically ordered steps that are easy to verify as you go.
Note that plans are not for padding out simple work with filler steps or stating the obvious. The content of your plan should not involve doing anything that you aren't capable of doing (i.e. don't try to test things that you can't test). Do not use plans for simple or single-step queries that you can just do or answer immediately.
Note that todo lists are not for padding out simple work with filler steps or stating the obvious. The content of your list should not involve doing anything that you aren't capable of doing (i.e. don't try to test things that you can't test). Do not use todo lists for simple or single-step queries that you can just do or answer immediately.
Do not repeat the full contents of the plan after an `update_plan` call — the harness already displays it. Instead, summarize the change made and highlight any important context or next step.
Do not repeat the full contents of the todo list after a `todo_write` call — the harness already displays it. Instead, summarize the change made and highlight any important context or next step.
Before running a command, consider whether or not you have completed the previous step, and make sure to mark it as completed before moving on to the next step. It may be the case that you complete all steps in your plan after a single pass of implementation. If this is the case, you can simply mark all the planned steps as completed. Sometimes, you may need to change plans in the middle of a task: call `update_plan` with the updated plan and make sure to provide an `explanation` of the rationale when doing so.
Before running a command, consider whether or not you have completed the previous step, and make sure to mark it as completed before moving on to the next step. It may be the case that you complete all steps in your todo list after a single pass of implementation. If this is the case, you can simply mark all the steps as completed. Sometimes, you may need to change your list in the middle of a task: call `todo_write` with the updated list and make sure to provide an `explanation` of the rationale when doing so.
Maintain statuses in the tool: exactly one item in_progress at a time; mark items complete when done; post timely status transitions. Do not jump an item from pending to completed: always set it to in_progress first. Do not batch-complete multiple items after the fact. Finish with all items completed or explicitly canceled/deferred before ending the turn. Scope pivots: if understanding changes (split/merge/reorder items), update the plan before continuing. Do not let the plan go stale while coding.
Maintain statuses in the tool: exactly one item in_progress at a time; mark items complete when done; post timely status transitions. Do not jump an item from pending to completed: always set it to in_progress first. Do not batch-complete multiple items after the fact. Finish with all items completed or explicitly canceled/deferred before ending the turn. Scope pivots: if understanding changes (split/merge/reorder items), update the list before continuing. Do not let the list go stale while coding.
Use a plan when:
Use a todo list when:
- The task is non-trivial and will require multiple actions over a long time horizon.
- There are logical phases or dependencies where sequencing matters.
- The work has ambiguity that benefits from outlining high-level goals.
- You want intermediate checkpoints for feedback and validation.
- When the user asked you to do more than one thing in a single prompt
- The user has asked you to use the plan tool (aka "TODOs")
- You generate additional steps while working, and plan to do them before yielding to the user
- The user has asked you to use the todo tool
- You generate additional steps while working, and intend to do them before yielding to the user
### Examples
**High-quality plans**
**High-quality todo lists**
Example 1:
@@ -84,7 +84,7 @@ Example 3:
5. Persist messages in lightweight DB
6. Add typing indicators + unread count
**Low-quality plans**
**Low-quality todo lists**
Example 1:
@@ -104,7 +104,7 @@ Example 3:
2. Run quick sanity check
3. Summarize usage instructions
If you need to write a plan, only write high quality plans, not low quality ones.
If you need to write a todo list, only write high quality todo lists, not low quality ones.
## Task execution
@@ -287,12 +287,12 @@ It is important to remember:
- You must include a header with your intended action (Add/Delete/Update)
- You must prefix new lines with `+` even when creating a new file
## `update_plan`
## `todo_write`
A tool named `update_plan` is available to you. You can use it to keep an uptodate, stepbystep plan for the task.
A tool named `todo_write` is available to you. You can use it to keep an uptodate, stepbystep todo list for the task.
To create a new plan, call `update_plan` with a short list of 1sentence steps (no more than 5-7 words each) with a `status` for each step (`pending`, `in_progress`, or `completed`).
To create a new todo list, call `todo_write` with a short list of 1sentence steps (no more than 5-7 words each) with a `status` for each step (`pending`, `in_progress`, or `completed`).
When steps have been completed, use `update_plan` to mark each finished step as `completed` and the next step you are working on as `in_progress`. There should always be exactly one `in_progress` step until everything is done. You can mark multiple items as complete in a single `update_plan` call.
When steps have been completed, use `todo_write` to mark each finished step as `completed` and the next step you are working on as `in_progress`. There should always be exactly one `in_progress` step until everything is done. You can mark multiple items as complete in a single `todo_write` call.
If all steps are complete, ensure you call `update_plan` to mark all steps as `completed`.
If all steps are complete, ensure you call `todo_write` to mark all steps as `completed`.

View File

@@ -18,12 +18,14 @@ You are Codex, based on GPT-5. You are running as a coding agent in the Codex CL
- While you are working, you might notice unexpected changes that you didn't make. If this happens, STOP IMMEDIATELY and ask the user how they would like to proceed.
- **NEVER** use destructive commands like `git reset --hard` or `git checkout --` unless specifically requested or approved by the user.
## Plan tool
## Todo tool
When using the planning tool:
- Skip using the planning tool for straightforward tasks (roughly the easiest 25%).
- Do not make single-step plans.
- When you made a plan, update it after having performed one of the sub-tasks that you shared on the plan.
Use the `todo_write` tool to track and communicate your task list.
- Skip using the todo tool for straightforward tasks (roughly the easiest 25%).
- Do not make single-step todo lists.
- Keep exactly one step `in_progress` at a time.
- Update the todo list after completing a step you previously shared.
## Special user requests

File diff suppressed because one or more lines are too long

View File

@@ -3,7 +3,7 @@ You are a coding agent running in the Codex CLI, a terminal-based coding assista
Your capabilities:
- Receive user prompts and other context provided by the harness, such as files in the workspace.
- Communicate with the user by streaming thinking & responses, and by making & updating plans.
- Communicate with the user by streaming thinking & responses, and by making & updating todo lists.
- Emit function calls to run terminal commands and apply patches. Depending on how this specific run is configured, you can request that these function calls be escalated to the user for approval before running. More on this in the "Sandbox and approvals" section.
Within this context, Codex refers to the open-source agentic coding interface (not the old Codex language model built by OpenAI).
@@ -49,29 +49,29 @@ Before making tool calls, send a brief preamble to the user explaining what you
- “Alright, build pipeline order is interesting. Checking how it reports failures.”
- “Spotted a clever caching util; now hunting where it gets used.”
## Planning
## Todo Lists
You have access to an `update_plan` tool which tracks steps and progress and renders them to the user. Using the tool helps demonstrate that you've understood the task and convey how you're approaching it. Plans can help to make complex, ambiguous, or multi-phase work clearer and more collaborative for the user. A good plan should break the task into meaningful, logically ordered steps that are easy to verify as you go.
You have access to a `todo_write` tool which tracks steps and progress and renders them to the user. Using the tool helps demonstrate that you've understood the task and convey how you're approaching it. A todo list can help make complex, ambiguous, or multi-phase work clearer and more collaborative for the user. A good todo list should break the task into meaningful, logically ordered steps that are easy to verify as you go.
Note that plans are not for padding out simple work with filler steps or stating the obvious. The content of your plan should not involve doing anything that you aren't capable of doing (i.e. don't try to test things that you can't test). Do not use plans for simple or single-step queries that you can just do or answer immediately.
Note that todo lists are not for padding out simple work with filler steps or stating the obvious. The content of your list should not involve doing anything that you aren't capable of doing (i.e. don't try to test things that you can't test). Do not use todo lists for simple or single-step queries that you can just do or answer immediately.
Do not repeat the full contents of the plan after an `update_plan` call — the harness already displays it. Instead, summarize the change made and highlight any important context or next step.
Do not repeat the full contents of the todo list after a `todo_write` call — the harness already displays it. Instead, summarize the change made and highlight any important context or next step.
Before running a command, consider whether or not you have completed the previous step, and make sure to mark it as completed before moving on to the next step. It may be the case that you complete all steps in your plan after a single pass of implementation. If this is the case, you can simply mark all the planned steps as completed. Sometimes, you may need to change plans in the middle of a task: call `update_plan` with the updated plan and make sure to provide an `explanation` of the rationale when doing so.
Before running a command, consider whether or not you have completed the previous step, and make sure to mark it as completed before moving on to the next step. It may be the case that you complete all steps in your todo list after a single pass of implementation. If this is the case, you can simply mark all the steps as completed. Sometimes, you may need to change your list in the middle of a task: call `todo_write` with the updated list and make sure to provide an `explanation` of the rationale when doing so.
Use a plan when:
Use a todo list when:
- The task is non-trivial and will require multiple actions over a long time horizon.
- There are logical phases or dependencies where sequencing matters.
- The work has ambiguity that benefits from outlining high-level goals.
- You want intermediate checkpoints for feedback and validation.
- When the user asked you to do more than one thing in a single prompt
- The user has asked you to use the plan tool (aka "TODOs")
- You generate additional steps while working, and plan to do them before yielding to the user
- The user has asked you to use the todo tool
- You generate additional steps while working, and intend to do them before yielding to the user
### Examples
**High-quality plans**
**High-quality todo lists**
Example 1:
@@ -98,7 +98,7 @@ Example 3:
5. Persist messages in lightweight DB
6. Add typing indicators + unread count
**Low-quality plans**
**Low-quality todo lists**
Example 1:
@@ -264,12 +264,12 @@ When using the shell, you must adhere to the following guidelines:
- When searching for text or files, prefer using `rg` or `rg --files` respectively because `rg` is much faster than alternatives like `grep`. (If the `rg` command is not found, then use alternatives.)
- Do not use python scripts to attempt to output larger chunks of a file.
## `update_plan`
## `todo_write`
A tool named `update_plan` is available to you. You can use it to keep an uptodate, stepbystep plan for the task.
A tool named `todo_write` is available to you. You can use it to keep an uptodate, stepbystep todo list for the task.
To create a new plan, call `update_plan` with a short list of 1sentence steps (no more than 5-7 words each) with a `status` for each step (`pending`, `in_progress`, or `completed`).
To create a new todo list, call `todo_write` with a short list of 1sentence steps (no more than 5-7 words each) with a `status` for each step (`pending`, `in_progress`, or `completed`).
When steps have been completed, use `update_plan` to mark each finished step as `completed` and the next step you are working on as `in_progress`. There should always be exactly one `in_progress` step until everything is done. You can mark multiple items as complete in a single `update_plan` call.
When steps have been completed, use `todo_write` to mark each finished step as `completed` and the next step you are working on as `in_progress`. There should always be exactly one `in_progress` step until everything is done. You can mark multiple items as complete in a single `todo_write` call.
If all steps are complete, ensure you call `update_plan` to mark all steps as `completed`.
If all steps are complete, ensure you call `todo_write` to mark all steps as `completed`.

View File

@@ -3,7 +3,7 @@ You are a coding agent running in the Codex CLI, a terminal-based coding assista
Your capabilities:
- Receive user prompts and other context provided by the harness, such as files in the workspace.
- Communicate with the user by streaming thinking & responses, and by making & updating plans.
- Communicate with the user by streaming thinking & responses, and by making & updating todo lists.
- Emit function calls to run terminal commands and apply patches. Depending on how this specific run is configured, you can request that these function calls be escalated to the user for approval before running. More on this in the "Sandbox and approvals" section.
Within this context, Codex refers to the open-source agentic coding interface (not the old Codex language model built by OpenAI).
@@ -49,29 +49,29 @@ Before making tool calls, send a brief preamble to the user explaining what you
- “Alright, build pipeline order is interesting. Checking how it reports failures.”
- “Spotted a clever caching util; now hunting where it gets used.”
## Planning
## Todo Lists
You have access to an `update_plan` tool which tracks steps and progress and renders them to the user. Using the tool helps demonstrate that you've understood the task and convey how you're approaching it. Plans can help to make complex, ambiguous, or multi-phase work clearer and more collaborative for the user. A good plan should break the task into meaningful, logically ordered steps that are easy to verify as you go.
You have access to a `todo_write` tool which tracks steps and progress and renders them to the user. Using the tool helps demonstrate that you've understood the task and convey how you're approaching it. A todo list can help make complex, ambiguous, or multi-phase work clearer and more collaborative for the user. A good todo list should break the task into meaningful, logically ordered steps that are easy to verify as you go.
Note that plans are not for padding out simple work with filler steps or stating the obvious. The content of your plan should not involve doing anything that you aren't capable of doing (i.e. don't try to test things that you can't test). Do not use plans for simple or single-step queries that you can just do or answer immediately.
Note that todo lists are not for padding out simple work with filler steps or stating the obvious. The content of your list should not involve doing anything that you aren't capable of doing (i.e. don't try to test things that you can't test). Do not use todo lists for simple or single-step queries that you can just do or answer immediately.
Do not repeat the full contents of the plan after an `update_plan` call — the harness already displays it. Instead, summarize the change made and highlight any important context or next step.
Do not repeat the full contents of the todo list after a `todo_write` call — the harness already displays it. Instead, summarize the change made and highlight any important context or next step.
Before running a command, consider whether or not you have completed the previous step, and make sure to mark it as completed before moving on to the next step. It may be the case that you complete all steps in your plan after a single pass of implementation. If this is the case, you can simply mark all the planned steps as completed. Sometimes, you may need to change plans in the middle of a task: call `update_plan` with the updated plan and make sure to provide an `explanation` of the rationale when doing so.
Before running a command, consider whether or not you have completed the previous step, and make sure to mark it as completed before moving on to the next step. It may be the case that you complete all steps in your todo list after a single pass of implementation. If this is the case, you can simply mark all the steps as completed. Sometimes, you may need to change your list in the middle of a task: call `todo_write` with the updated list and make sure to provide an `explanation` of the rationale when doing so.
Use a plan when:
Use a todo list when:
- The task is non-trivial and will require multiple actions over a long time horizon.
- There are logical phases or dependencies where sequencing matters.
- The work has ambiguity that benefits from outlining high-level goals.
- You want intermediate checkpoints for feedback and validation.
- When the user asked you to do more than one thing in a single prompt
- The user has asked you to use the plan tool (aka "TODOs")
- You generate additional steps while working, and plan to do them before yielding to the user
- The user has asked you to use the todo tool
- You generate additional steps while working, and intend to do them before yielding to the user
### Examples
**High-quality plans**
**High-quality todo lists**
Example 1:
@@ -98,7 +98,7 @@ Example 3:
5. Persist messages in lightweight DB
6. Add typing indicators + unread count
**Low-quality plans**
**Low-quality todo lists**
Example 1:
@@ -264,15 +264,15 @@ When using the shell, you must adhere to the following guidelines:
- When searching for text or files, prefer using `rg` or `rg --files` respectively because `rg` is much faster than alternatives like `grep`. (If the `rg` command is not found, then use alternatives.)
- Do not use python scripts to attempt to output larger chunks of a file.
## `update_plan`
## `todo_write`
A tool named `update_plan` is available to you. You can use it to keep an uptodate, stepbystep plan for the task.
A tool named `todo_write` is available to you. You can use it to keep an uptodate, stepbystep todo list for the task.
To create a new plan, call `update_plan` with a short list of 1sentence steps (no more than 5-7 words each) with a `status` for each step (`pending`, `in_progress`, or `completed`).
To create a new todo list, call `todo_write` with a short list of 1sentence steps (no more than 5-7 words each) with a `status` for each step (`pending`, `in_progress`, or `completed`).
When steps have been completed, use `update_plan` to mark each finished step as `completed` and the next step you are working on as `in_progress`. There should always be exactly one `in_progress` step until everything is done. You can mark multiple items as complete in a single `update_plan` call.
When steps have been completed, use `todo_write` to mark each finished step as `completed` and the next step you are working on as `in_progress`. There should always be exactly one `in_progress` step until everything is done. You can mark multiple items as complete in a single `todo_write` call.
If all steps are complete, ensure you call `update_plan` to mark all steps as `completed`.
If all steps are complete, ensure you call `todo_write` to mark all steps as `completed`.
## `apply_patch`

View File

@@ -90,6 +90,10 @@ use tracing::instrument;
use tracing::trace_span;
use tracing::warn;
const LEGACY_UPDATE_PLAN_NOTE: &str = "Note: this session history may include the legacy tool name `update_plan`. \
That tool was renamed to `todo_write` and had nothing to do with Plan mode. \
Do not call `update_plan`; use `todo_write`.";
use crate::ModelProviderInfo;
use crate::WireApi;
use crate::client::ModelClient;
@@ -258,6 +262,50 @@ fn maybe_push_chat_wire_api_deprecation(
});
}
fn append_legacy_update_plan_note(
developer_instructions: Option<String>,
conversation_history: &InitialHistory,
) -> Option<String> {
if !resumed_history_has_update_plan(conversation_history) {
return developer_instructions;
}
match developer_instructions {
Some(instructions) if instructions.trim().is_empty() => {
Some(LEGACY_UPDATE_PLAN_NOTE.to_string())
}
Some(instructions) => Some(format!("{instructions}\n\n{LEGACY_UPDATE_PLAN_NOTE}")),
None => Some(LEGACY_UPDATE_PLAN_NOTE.to_string()),
}
}
fn resumed_history_has_update_plan(conversation_history: &InitialHistory) -> bool {
let InitialHistory::Resumed(resumed) = conversation_history else {
return false;
};
resumed.history.iter().any(rollout_item_has_update_plan)
}
fn rollout_item_has_update_plan(item: &RolloutItem) -> bool {
match item {
RolloutItem::ResponseItem(response_item) => response_item_has_update_plan(response_item),
RolloutItem::EventMsg(EventMsg::RawResponseItem(RawResponseItemEvent { item })) => {
response_item_has_update_plan(item)
}
_ => false,
}
}
fn response_item_has_update_plan(item: &ResponseItem) -> bool {
match item {
ResponseItem::FunctionCall { name, .. } | ResponseItem::CustomToolCall { name, .. } => {
name == "update_plan"
}
_ => false,
}
}
impl Codex {
/// Spawn a new [`Codex`] and initialize the session.
#[allow(clippy::too_many_arguments)]
@@ -322,6 +370,10 @@ impl Codex {
.clone()
.or_else(|| conversation_history.get_base_instructions().map(|s| s.text))
.unwrap_or_else(|| model_info.get_model_instructions(config.model_personality));
let developer_instructions = append_legacy_update_plan_note(
config.developer_instructions.clone(),
&conversation_history,
);
// TODO (aibrahim): Consolidate config.model and config.model_reasoning_effort into config.collaboration_mode
// to avoid extracting these fields separately and constructing CollaborationMode here.
@@ -337,7 +389,7 @@ impl Codex {
provider: config.model_provider.clone(),
collaboration_mode,
model_reasoning_summary: config.model_reasoning_summary,
developer_instructions: config.developer_instructions.clone(),
developer_instructions,
user_instructions,
personality: config.model_personality,
base_instructions,
@@ -4094,6 +4146,36 @@ mod tests {
assert_eq!(expected, reconstructed);
}
#[test]
fn append_legacy_update_plan_note_appends_for_resumed_history() {
let rollout_items = vec![RolloutItem::ResponseItem(ResponseItem::FunctionCall {
id: None,
name: "update_plan".to_string(),
arguments: "{}".to_string(),
call_id: "call-1".to_string(),
})];
let history = InitialHistory::Resumed(ResumedHistory {
conversation_id: ThreadId::default(),
history: rollout_items,
rollout_path: PathBuf::from("/tmp/resume.jsonl"),
});
let existing = "Existing instructions".to_string();
let updated = append_legacy_update_plan_note(Some(existing.clone()), &history);
assert_eq!(
updated,
Some(format!("{existing}\n\n{LEGACY_UPDATE_PLAN_NOTE}"))
);
}
#[test]
fn append_legacy_update_plan_note_skips_for_new_history() {
let existing = "Existing instructions".to_string();
let updated = append_legacy_update_plan_note(Some(existing.clone()), &InitialHistory::New);
assert_eq!(updated, Some(existing));
}
#[tokio::test]
async fn record_initial_history_reconstructs_resumed_transcript() {
let (session, turn_context) = make_session_and_context().await;

View File

@@ -83,7 +83,7 @@ pub(crate) fn should_persist_event_msg(ev: &EventMsg) -> bool {
| EventMsg::McpStartupComplete(_)
| EventMsg::ListCustomPromptsResponse(_)
| EventMsg::ListSkillsResponse(_)
| EventMsg::PlanUpdate(_)
| EventMsg::TodoUpdate(_)
| EventMsg::ShutdownComplete
| EventMsg::ViewImageToolCall(_)
| EventMsg::DeprecationNotice(_)

View File

@@ -13,7 +13,7 @@ mod test_sync;
mod unified_exec;
mod view_image;
pub use plan::PLAN_TOOL;
pub use plan::TODO_WRITE_TOOL;
use serde::Deserialize;
use crate::function_tool::FunctionCallError;
@@ -24,7 +24,7 @@ pub use grep_files::GrepFilesHandler;
pub use list_dir::ListDirHandler;
pub use mcp::McpHandler;
pub use mcp_resource::McpResourceHandler;
pub use plan::PlanHandler;
pub use plan::TodoWriteHandler;
pub use read_file::ReadFileHandler;
pub use request_user_input::RequestUserInputHandler;
pub use shell::ShellCommandHandler;

View File

@@ -10,27 +10,27 @@ use crate::tools::registry::ToolHandler;
use crate::tools::registry::ToolKind;
use crate::tools::spec::JsonSchema;
use async_trait::async_trait;
use codex_protocol::plan_tool::UpdatePlanArgs;
use codex_protocol::protocol::EventMsg;
use codex_protocol::todo_tool::UpdateTodoArgs;
use std::collections::BTreeMap;
use std::sync::LazyLock;
pub struct PlanHandler;
pub struct TodoWriteHandler;
pub static PLAN_TOOL: LazyLock<ToolSpec> = LazyLock::new(|| {
let mut plan_item_props = BTreeMap::new();
plan_item_props.insert("step".to_string(), JsonSchema::String { description: None });
plan_item_props.insert(
pub static TODO_WRITE_TOOL: LazyLock<ToolSpec> = LazyLock::new(|| {
let mut todo_item_props = BTreeMap::new();
todo_item_props.insert("step".to_string(), JsonSchema::String { description: None });
todo_item_props.insert(
"status".to_string(),
JsonSchema::String {
description: Some("One of: pending, in_progress, completed".to_string()),
},
);
let plan_items_schema = JsonSchema::Array {
let todo_items_schema = JsonSchema::Array {
description: Some("The list of steps".to_string()),
items: Box::new(JsonSchema::Object {
properties: plan_item_props,
properties: todo_item_props,
required: Some(vec!["step".to_string(), "status".to_string()]),
additional_properties: Some(false.into()),
}),
@@ -41,26 +41,26 @@ pub static PLAN_TOOL: LazyLock<ToolSpec> = LazyLock::new(|| {
"explanation".to_string(),
JsonSchema::String { description: None },
);
properties.insert("plan".to_string(), plan_items_schema);
properties.insert("todo".to_string(), todo_items_schema);
ToolSpec::Function(ResponsesApiTool {
name: "update_plan".to_string(),
description: r#"Updates the task plan.
Provide an optional explanation and a list of plan items, each with a step and status.
name: "todo_write".to_string(),
description: r#"Updates the task list.
Provide an optional explanation and a list of todo items in `todo`, each with a step and status.
At most one step can be in_progress at a time.
"#
.to_string(),
strict: false,
parameters: JsonSchema::Object {
properties,
required: Some(vec!["plan".to_string()]),
required: Some(vec!["todo".to_string()]),
additional_properties: Some(false.into()),
},
})
});
#[async_trait]
impl ToolHandler for PlanHandler {
impl ToolHandler for TodoWriteHandler {
fn kind(&self) -> ToolKind {
ToolKind::Function
}
@@ -78,13 +78,13 @@ impl ToolHandler for PlanHandler {
ToolPayload::Function { arguments } => arguments,
_ => {
return Err(FunctionCallError::RespondToModel(
"update_plan handler received unsupported payload".to_string(),
"todo_write handler received unsupported payload".to_string(),
));
}
};
let content =
handle_update_plan(session.as_ref(), turn.as_ref(), arguments, call_id).await?;
handle_todo_write(session.as_ref(), turn.as_ref(), arguments, call_id).await?;
Ok(ToolOutput::Function {
content,
@@ -94,24 +94,25 @@ impl ToolHandler for PlanHandler {
}
}
/// This function doesn't do anything useful. However, it gives the model a structured way to record its plan that clients can read and render.
/// So it's the _inputs_ to this function that are useful to clients, not the outputs and neither are actually useful for the model other
/// than forcing it to come up and document a plan (TBD how that affects performance).
pub(crate) async fn handle_update_plan(
/// This function doesn't do anything useful. However, it gives the model a structured way to record
/// its task list that clients can read and render. So it's the _inputs_ to this function that are
/// useful to clients, not the outputs and neither are actually useful for the model other than
/// forcing it to come up and document a task list (TBD how that affects performance).
pub(crate) async fn handle_todo_write(
session: &Session,
turn_context: &TurnContext,
arguments: String,
_call_id: String,
) -> Result<String, FunctionCallError> {
let args = parse_update_plan_arguments(&arguments)?;
let args = parse_todo_write_arguments(&arguments)?;
session
.send_event(turn_context, EventMsg::PlanUpdate(args))
.send_event(turn_context, EventMsg::TodoUpdate(args))
.await;
Ok("Plan updated".to_string())
Ok("Todo list updated".to_string())
}
fn parse_update_plan_arguments(arguments: &str) -> Result<UpdatePlanArgs, FunctionCallError> {
serde_json::from_str::<UpdatePlanArgs>(arguments).map_err(|e| {
fn parse_todo_write_arguments(arguments: &str) -> Result<UpdateTodoArgs, FunctionCallError> {
serde_json::from_str::<UpdateTodoArgs>(arguments).map_err(|e| {
FunctionCallError::RespondToModel(format!("failed to parse function arguments: {e}"))
})
}

View File

@@ -3,7 +3,7 @@ use crate::client_common::tools::ResponsesApiTool;
use crate::client_common::tools::ToolSpec;
use crate::features::Feature;
use crate::features::Features;
use crate::tools::handlers::PLAN_TOOL;
use crate::tools::handlers::TODO_WRITE_TOOL;
use crate::tools::handlers::apply_patch::create_apply_patch_freeform_tool;
use crate::tools::handlers::apply_patch::create_apply_patch_json_tool;
use crate::tools::handlers::collab::DEFAULT_WAIT_TIMEOUT_MS;
@@ -1273,12 +1273,12 @@ pub(crate) fn build_specs(
use crate::tools::handlers::ListDirHandler;
use crate::tools::handlers::McpHandler;
use crate::tools::handlers::McpResourceHandler;
use crate::tools::handlers::PlanHandler;
use crate::tools::handlers::ReadFileHandler;
use crate::tools::handlers::RequestUserInputHandler;
use crate::tools::handlers::ShellCommandHandler;
use crate::tools::handlers::ShellHandler;
use crate::tools::handlers::TestSyncHandler;
use crate::tools::handlers::TodoWriteHandler;
use crate::tools::handlers::UnifiedExecHandler;
use crate::tools::handlers::ViewImageHandler;
use std::sync::Arc;
@@ -1287,7 +1287,7 @@ pub(crate) fn build_specs(
let shell_handler = Arc::new(ShellHandler);
let unified_exec_handler = Arc::new(UnifiedExecHandler);
let plan_handler = Arc::new(PlanHandler);
let todo_handler = Arc::new(TodoWriteHandler);
let apply_patch_handler = Arc::new(ApplyPatchHandler);
let dynamic_tool_handler = Arc::new(DynamicToolHandler);
let view_image_handler = Arc::new(ViewImageHandler);
@@ -1332,8 +1332,8 @@ pub(crate) fn build_specs(
builder.register_handler("list_mcp_resource_templates", mcp_resource_handler.clone());
builder.register_handler("read_mcp_resource", mcp_resource_handler);
builder.push_spec(PLAN_TOOL.clone());
builder.register_handler("update_plan", plan_handler);
builder.push_spec(TODO_WRITE_TOOL.clone());
builder.register_handler("todo_write", todo_handler);
if config.collaboration_modes_tools {
builder.push_spec(create_request_user_input_tool());
@@ -1590,7 +1590,7 @@ mod tests {
create_list_mcp_resources_tool(),
create_list_mcp_resource_templates_tool(),
create_read_mcp_resource_tool(),
PLAN_TOOL.clone(),
TODO_WRITE_TOOL.clone(),
create_request_user_input_tool(),
create_apply_patch_freeform_tool(),
ToolSpec::WebSearch {
@@ -1737,7 +1737,7 @@ mod tests {
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"todo_write",
"request_user_input",
"apply_patch",
"web_search",
@@ -1759,7 +1759,7 @@ mod tests {
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"todo_write",
"request_user_input",
"apply_patch",
"web_search",
@@ -1783,7 +1783,7 @@ mod tests {
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"todo_write",
"request_user_input",
"apply_patch",
"web_search",
@@ -1807,7 +1807,7 @@ mod tests {
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"todo_write",
"request_user_input",
"apply_patch",
"web_search",
@@ -1829,7 +1829,7 @@ mod tests {
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"todo_write",
"request_user_input",
"web_search",
"view_image",
@@ -1850,7 +1850,7 @@ mod tests {
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"todo_write",
"request_user_input",
"apply_patch",
"web_search",
@@ -1872,7 +1872,7 @@ mod tests {
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"todo_write",
"request_user_input",
"web_search",
"view_image",
@@ -1893,7 +1893,7 @@ mod tests {
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"todo_write",
"request_user_input",
"apply_patch",
"web_search",
@@ -1916,7 +1916,7 @@ mod tests {
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"todo_write",
"request_user_input",
"apply_patch",
"web_search",
@@ -1940,7 +1940,7 @@ mod tests {
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"todo_write",
"request_user_input",
"web_search",
"view_image",
@@ -1962,7 +1962,7 @@ mod tests {
let (tools, _) = build_specs(&tools_config, Some(HashMap::new()), &[]).build();
// Only check the shell variant and a couple of core tools.
let mut subset = vec!["exec_command", "write_stdin", "update_plan"];
let mut subset = vec!["exec_command", "write_stdin", "todo_write"];
if let Some(shell_tool) = shell_tool_name(&tools_config) {
subset.push(shell_tool);
}

View File

@@ -34,7 +34,7 @@ Tone:
Frequency & Length:
- Send short updates (12 sentences) whenever there is a meaningful, important insight you need to share with the user to keep them informed.
- If you expect a longer headsdown stretch, post a brief headsdown note with why and when you'll report back; when you resume, summarize what you learned.
- Only the initial plan, plan updates, and final recap can be longer, with multiple bullets and paragraphs
- Only the initial plan, todo list updates, and final recap can be longer, with multiple bullets and paragraphs
Content:
- Before you begin, give a quick plan with goal, constraints, next steps.
@@ -79,10 +79,10 @@ When the user asks for a review, you default to a code-review mindset. Your resp
- Unless you are otherwise instructed, prefer using `rg` or `rg --files` respectively when searching because `rg` is much faster than alternatives like `grep`. If the `rg` command is not found, then use alternatives.
- Try to use apply_patch for single file edits, but it is fine to explore other options to make the edit if it does not work well. Do not use apply_patch for changes that are auto-generated (i.e. generating package.json or running a lint or format command like gofmt) or when scripting is more efficient (such as search and replacing a string across a codebase).
<!-- - Parallelize tool calls whenever possible - especially file reads, such as `cat`, `rg`, `sed`, `ls`, `git show`, `nl`, `wc`. Use `multi_tool_use.parallel` to parallelize tool calls and only this. -->
- Use the plan tool to explain to the user what you are going to do
- Only use it for more complex tasks, do not use it for straightforward tasks (roughly the easiest 40%).
- Do not make single-step plans. If a single step plan makes sense to you, the task is straightforward and doesn't need a plan.
- When you made a plan, update it after having performed one of the sub-tasks that you shared on the plan.
- Use the `todo_write` tool to explain what you are going to do.
- Only use it for more complex tasks; skip it for straightforward tasks (roughly the easiest 40%).
- Do not make single-step todo lists. If a single step makes sense, the task does not need a todo list.
- When you have made a todo list, update it after completing each of the sub-tasks you shared.
# Sub-agents
If `spawn_agent` is unavailable or fails, ignore this section and proceed solo.

View File

@@ -36,7 +36,7 @@ Treat the task as a sequence of concrete steps that add up to a complete deliver
- Avoid blocking on uncertainty: choose a reasonable default and continue.
## Reporting progress
In this phase you show progress on your task and appraise the user of your progress using plan tool.
In this phase you show progress on your task and appraise the user of your progress, using the `todo_write` tool for non-trivial work.
- Provide updates that directly map to the work you are doing (what changed, what you verified, what remains).
- If something fails, report what failed, what you tried, and what you will do next.
- When you finish, summarize what you delivered and how the user can validate it.

View File

@@ -52,8 +52,8 @@ When the user asks for a review, you default to a code-review mindset. Your resp
# Tool use
- Unless you are otherwise instructed, prefer using `rg` or `rg --files` respectively when searching because `rg` is much faster than alternatives like `grep`. If the `rg` command is not found, then use alternatives.
- Use the plan tool to explain to the user what you are going to do
- Only use it for more complex tasks, do not use it for straightforward tasks (roughly the easiest 25%).
- Do not make single-step plans. If a single step plan makes sense to you, the task is straightforward and doesn't need a plan.
- When you made a plan, update it after having performed one of the sub-tasks that you shared on the plan.
- Use the `todo_write` tool to explain what you are going to do.
- Only use it for more complex tasks, not straightforward ones (roughly the easiest 25%).
- Do not make single-step todo lists. If a single step makes sense, the task does not need a todo list.
- When you have made a todo list, update it after completing each of the sub-tasks you shared.
- Try to use apply_patch for single file edits, but it is fine to explore other options to make the edit if it does not work well. Do not use apply_patch for changes that are auto-generated (i.e. generating package.json or running a lint or format command like gofmt) or when scripting is more efficient (such as search and replacing a string across a codebase).

View File

@@ -65,7 +65,7 @@ async fn model_selects_expected_tools() {
"list_mcp_resources".to_string(),
"list_mcp_resource_templates".to_string(),
"read_mcp_resource".to_string(),
"update_plan".to_string(),
"todo_write".to_string(),
"request_user_input".to_string(),
"web_search".to_string(),
"view_image".to_string()
@@ -81,7 +81,7 @@ async fn model_selects_expected_tools() {
"list_mcp_resources".to_string(),
"list_mcp_resource_templates".to_string(),
"read_mcp_resource".to_string(),
"update_plan".to_string(),
"todo_write".to_string(),
"request_user_input".to_string(),
"apply_patch".to_string(),
"web_search".to_string(),
@@ -98,7 +98,7 @@ async fn model_selects_expected_tools() {
"list_mcp_resources".to_string(),
"list_mcp_resource_templates".to_string(),
"read_mcp_resource".to_string(),
"update_plan".to_string(),
"todo_write".to_string(),
"request_user_input".to_string(),
"apply_patch".to_string(),
"web_search".to_string(),
@@ -115,7 +115,7 @@ async fn model_selects_expected_tools() {
"list_mcp_resources".to_string(),
"list_mcp_resource_templates".to_string(),
"read_mcp_resource".to_string(),
"update_plan".to_string(),
"todo_write".to_string(),
"request_user_input".to_string(),
"web_search".to_string(),
"view_image".to_string()
@@ -131,7 +131,7 @@ async fn model_selects_expected_tools() {
"list_mcp_resources".to_string(),
"list_mcp_resource_templates".to_string(),
"read_mcp_resource".to_string(),
"update_plan".to_string(),
"todo_write".to_string(),
"request_user_input".to_string(),
"apply_patch".to_string(),
"web_search".to_string(),
@@ -148,7 +148,7 @@ async fn model_selects_expected_tools() {
"list_mcp_resources".to_string(),
"list_mcp_resource_templates".to_string(),
"read_mcp_resource".to_string(),
"update_plan".to_string(),
"todo_write".to_string(),
"request_user_input".to_string(),
"apply_patch".to_string(),
"web_search".to_string(),

View File

@@ -136,7 +136,7 @@ async fn prompt_tools_are_consistent_across_requests() -> anyhow::Result<()> {
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"todo_write",
"request_user_input",
"apply_patch",
"web_search",

View File

@@ -9,7 +9,7 @@ use codex_core::protocol::EventMsg;
use codex_core::protocol::Op;
use codex_core::protocol::SandboxPolicy;
use codex_protocol::config_types::ReasoningSummary;
use codex_protocol::plan_tool::StepStatus;
use codex_protocol::todo_tool::TodoStatus;
use codex_protocol::user_input::UserInput;
use core_test_support::assert_regex_match;
use core_test_support::responses;
@@ -108,7 +108,7 @@ async fn shell_tool_executes_command_and_streams_output() -> anyhow::Result<()>
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn update_plan_tool_emits_plan_update_event() -> anyhow::Result<()> {
async fn todo_write_tool_emits_todo_update_event() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
@@ -121,10 +121,10 @@ async fn update_plan_tool_emits_plan_update_event() -> anyhow::Result<()> {
..
} = builder.build(&server).await?;
let call_id = "plan-tool-call";
let plan_args = json!({
let call_id = "todo-tool-call";
let todo_args = json!({
"explanation": "Tool harness check",
"plan": [
"todo": [
{"step": "Inspect workspace", "status": "in_progress"},
{"step": "Report results", "status": "pending"},
],
@@ -133,13 +133,13 @@ async fn update_plan_tool_emits_plan_update_event() -> anyhow::Result<()> {
let first_response = sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "update_plan", &plan_args),
ev_function_call(call_id, "todo_write", &todo_args),
ev_completed("resp-1"),
]);
responses::mount_sse_once(&server, first_response).await;
let second_response = sse(vec![
ev_assistant_message("msg-1", "plan acknowledged"),
ev_assistant_message("msg-1", "todo list acknowledged"),
ev_completed("resp-2"),
]);
let second_mock = responses::mount_sse_once(&server, second_response).await;
@@ -149,7 +149,7 @@ async fn update_plan_tool_emits_plan_update_event() -> anyhow::Result<()> {
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "please update the plan".into(),
text: "please update the todo list".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
@@ -164,16 +164,17 @@ async fn update_plan_tool_emits_plan_update_event() -> anyhow::Result<()> {
})
.await?;
let mut saw_plan_update = false;
let mut saw_todo_update = false;
wait_for_event(&codex, |event| match event {
EventMsg::PlanUpdate(update) => {
saw_plan_update = true;
EventMsg::TodoUpdate(update) => {
saw_todo_update = true;
assert_eq!(update.explanation.as_deref(), Some("Tool harness check"));
assert_eq!(update.plan.len(), 2);
assert_eq!(update.plan[0].step, "Inspect workspace");
assert_matches!(update.plan[0].status, StepStatus::InProgress);
assert_eq!(update.plan[1].step, "Report results");
assert_matches!(update.plan[1].status, StepStatus::Pending);
let todo_items = update.todo_items();
assert_eq!(todo_items.len(), 2);
assert_eq!(todo_items[0].step, "Inspect workspace");
assert_matches!(todo_items[0].status, TodoStatus::InProgress);
assert_eq!(todo_items[1].step, "Report results");
assert_matches!(todo_items[1].status, TodoStatus::Pending);
false
}
EventMsg::TurnComplete(_) => true,
@@ -181,17 +182,20 @@ async fn update_plan_tool_emits_plan_update_event() -> anyhow::Result<()> {
})
.await;
assert!(saw_plan_update, "expected PlanUpdate event");
assert!(
saw_todo_update,
"expected todo_update event (plan_update legacy alias)"
);
let req = second_mock.single_request();
let (output_text, _success_flag) = call_output(&req, call_id);
assert_eq!(output_text, "Plan updated");
assert_eq!(output_text, "Todo list updated");
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn update_plan_tool_rejects_malformed_payload() -> anyhow::Result<()> {
async fn todo_write_tool_rejects_malformed_payload() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
@@ -204,21 +208,21 @@ async fn update_plan_tool_rejects_malformed_payload() -> anyhow::Result<()> {
..
} = builder.build(&server).await?;
let call_id = "plan-tool-invalid";
let call_id = "todo-tool-invalid";
let invalid_args = json!({
"explanation": "Missing plan data"
"explanation": "Missing todo data"
})
.to_string();
let first_response = sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "update_plan", &invalid_args),
ev_function_call(call_id, "todo_write", &invalid_args),
ev_completed("resp-1"),
]);
responses::mount_sse_once(&server, first_response).await;
let second_response = sse(vec![
ev_assistant_message("msg-1", "malformed plan payload"),
ev_assistant_message("msg-1", "malformed todo payload"),
ev_completed("resp-2"),
]);
let second_mock = responses::mount_sse_once(&server, second_response).await;
@@ -228,7 +232,7 @@ async fn update_plan_tool_rejects_malformed_payload() -> anyhow::Result<()> {
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "please update the plan".into(),
text: "please update the todo list".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
@@ -243,10 +247,10 @@ async fn update_plan_tool_rejects_malformed_payload() -> anyhow::Result<()> {
})
.await?;
let mut saw_plan_update = false;
let mut saw_todo_update = false;
wait_for_event(&codex, |event| match event {
EventMsg::PlanUpdate(_) => {
saw_plan_update = true;
EventMsg::TodoUpdate(_) => {
saw_todo_update = true;
false
}
EventMsg::TurnComplete(_) => true,
@@ -255,8 +259,8 @@ async fn update_plan_tool_rejects_malformed_payload() -> anyhow::Result<()> {
.await;
assert!(
!saw_plan_update,
"did not expect PlanUpdate event for malformed payload"
!saw_todo_update,
"did not expect todo_update event for malformed payload"
);
let req = second_mock.single_request();

View File

@@ -45,8 +45,7 @@ use crate::event_processor::CodexStatus;
use crate::event_processor::EventProcessor;
use crate::event_processor::handle_last_message;
use codex_common::create_config_summary_entries;
use codex_protocol::plan_tool::StepStatus;
use codex_protocol::plan_tool::UpdatePlanArgs;
use codex_protocol::todo_tool::TodoStatus;
/// This should be configurable. When used in CI, users may not want to impose
/// a limit so they can see the full transcript.
@@ -538,11 +537,11 @@ impl EventProcessor for EventProcessorWithHumanOutput {
ts_msg!(self, "model: {}", model);
eprintln!();
}
EventMsg::PlanUpdate(plan_update_event) => {
let UpdatePlanArgs { explanation, plan } = plan_update_event;
EventMsg::TodoUpdate(todo_update_event) => {
let (explanation, todo_items) = todo_update_event.into_parts();
// Header
ts_msg!(self, "{}", "Plan update".style(self.magenta));
ts_msg!(self, "{}", "Todo list update".style(self.magenta));
// Optional explanation
if let Some(explanation) = explanation
@@ -551,21 +550,21 @@ impl EventProcessor for EventProcessorWithHumanOutput {
ts_msg!(self, "{}", explanation.style(self.italic));
}
// Pretty-print the plan items with simple status markers.
for item in plan {
match item.status {
StepStatus::Completed => {
ts_msg!(self, " {} {}", "".style(self.green), item.step);
// Pretty-print the todo items with simple status markers.
for todo_item in todo_items {
match todo_item.status {
TodoStatus::Completed => {
ts_msg!(self, " {} {}", "".style(self.green), todo_item.step);
}
StepStatus::InProgress => {
ts_msg!(self, " {} {}", "".style(self.cyan), item.step);
TodoStatus::InProgress => {
ts_msg!(self, " {} {}", "".style(self.cyan), todo_item.step);
}
StepStatus::Pending => {
TodoStatus::Pending => {
ts_msg!(
self,
" {} {}",
"".style(self.dimmed),
item.step.style(self.dimmed)
todo_item.step.style(self.dimmed)
);
}
}

View File

@@ -50,8 +50,8 @@ use codex_core::protocol::CollabCloseEndEvent;
use codex_core::protocol::CollabWaitingBeginEvent;
use codex_core::protocol::CollabWaitingEndEvent;
use codex_protocol::models::WebSearchAction;
use codex_protocol::plan_tool::StepStatus;
use codex_protocol::plan_tool::UpdatePlanArgs;
use codex_protocol::todo_tool::TodoStatus;
use codex_protocol::todo_tool::UpdateTodoArgs;
use serde_json::Value as JsonValue;
use tracing::error;
use tracing::warn;
@@ -176,7 +176,7 @@ impl EventProcessorWithJsonOutput {
};
vec![ThreadEvent::Error(ThreadErrorEvent { message })]
}
protocol::EventMsg::PlanUpdate(ev) => self.handle_plan_update(ev),
protocol::EventMsg::TodoUpdate(ev) => self.handle_todo_update(ev),
_ => Vec::new(),
}
}
@@ -700,18 +700,18 @@ impl EventProcessorWithJsonOutput {
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent { item })]
}
fn todo_items_from_plan(&self, args: &UpdatePlanArgs) -> Vec<TodoItem> {
args.plan
fn todo_items_from_update(&self, args: &UpdateTodoArgs) -> Vec<TodoItem> {
args.todo_items()
.iter()
.map(|p| TodoItem {
text: p.step.clone(),
completed: matches!(p.status, StepStatus::Completed),
completed: matches!(p.status, TodoStatus::Completed),
})
.collect()
}
fn handle_plan_update(&mut self, args: &UpdatePlanArgs) -> Vec<ThreadEvent> {
let items = self.todo_items_from_plan(args);
fn handle_todo_update(&mut self, args: &UpdateTodoArgs) -> Vec<ThreadEvent> {
let items = self.todo_items_from_update(args);
if let Some(running) = &mut self.running_todo_list {
running.items = items.clone();

View File

@@ -56,12 +56,12 @@ use codex_exec::exec_events::Usage;
use codex_exec::exec_events::WebSearchItem;
use codex_protocol::ThreadId;
use codex_protocol::models::WebSearchAction;
use codex_protocol::plan_tool::PlanItemArg;
use codex_protocol::plan_tool::StepStatus;
use codex_protocol::plan_tool::UpdatePlanArgs;
use codex_protocol::protocol::CodexErrorInfo;
use codex_protocol::protocol::ExecCommandOutputDeltaEvent;
use codex_protocol::protocol::ExecOutputStream;
use codex_protocol::todo_tool::TodoItemArg;
use codex_protocol::todo_tool::TodoStatus;
use codex_protocol::todo_tool::UpdateTodoArgs;
use mcp_types::CallToolResult;
use mcp_types::ContentBlock;
use mcp_types::TextContent;
@@ -220,25 +220,25 @@ fn web_search_begin_then_end_reuses_item_id() {
}
#[test]
fn plan_update_emits_todo_list_started_updated_and_completed() {
fn todo_update_emits_todo_list_started_updated_and_completed() {
let mut ep = EventProcessorWithJsonOutput::new(None);
// First plan update => item.started (todo_list)
// First todo_write update => item.started (todo_list)
let first = event(
"p1",
EventMsg::PlanUpdate(UpdatePlanArgs {
explanation: None,
plan: vec![
PlanItemArg {
EventMsg::TodoUpdate(UpdateTodoArgs::new(
None,
vec![
TodoItemArg {
step: "step one".to_string(),
status: StepStatus::Pending,
status: TodoStatus::Pending,
},
PlanItemArg {
TodoItemArg {
step: "step two".to_string(),
status: StepStatus::InProgress,
status: TodoStatus::InProgress,
},
],
}),
)),
);
let out_first = ep.collect_thread_events(&first);
assert_eq!(
@@ -262,22 +262,22 @@ fn plan_update_emits_todo_list_started_updated_and_completed() {
})]
);
// Second plan update in same turn => item.updated (same id)
// Second todo_write update => item.updated (same id)
let second = event(
"p2",
EventMsg::PlanUpdate(UpdatePlanArgs {
explanation: None,
plan: vec![
PlanItemArg {
EventMsg::TodoUpdate(UpdateTodoArgs::new(
None,
vec![
TodoItemArg {
step: "step one".to_string(),
status: StepStatus::Completed,
status: TodoStatus::Completed,
},
PlanItemArg {
TodoItemArg {
step: "step two".to_string(),
status: StepStatus::InProgress,
status: TodoStatus::InProgress,
},
],
}),
)),
);
let out_second = ep.collect_thread_events(&second);
assert_eq!(
@@ -660,19 +660,19 @@ fn collab_wait_end_without_begin_synthesizes_failed_item() {
}
#[test]
fn plan_update_after_complete_starts_new_todo_list_with_new_id() {
fn todo_update_after_complete_starts_new_todo_list_with_new_id() {
let mut ep = EventProcessorWithJsonOutput::new(None);
// First turn: start + complete
let start = event(
"t1",
EventMsg::PlanUpdate(UpdatePlanArgs {
explanation: None,
plan: vec![PlanItemArg {
EventMsg::TodoUpdate(UpdateTodoArgs::new(
None,
vec![TodoItemArg {
step: "only".to_string(),
status: StepStatus::Pending,
status: TodoStatus::Pending,
}],
}),
)),
);
let _ = ep.collect_thread_events(&start);
let complete = event(
@@ -686,13 +686,13 @@ fn plan_update_after_complete_starts_new_todo_list_with_new_id() {
// Second turn: a new todo list should have a new id
let start_again = event(
"t3",
EventMsg::PlanUpdate(UpdatePlanArgs {
explanation: None,
plan: vec![PlanItemArg {
EventMsg::TodoUpdate(UpdateTodoArgs::new(
None,
vec![TodoItemArg {
step: "again".to_string(),
status: StepStatus::Pending,
status: TodoStatus::Pending,
}],
}),
)),
);
let out = ep.collect_thread_events(&start_again);

View File

@@ -343,7 +343,7 @@ async fn run_codex_tool_session_inner(
| EventMsg::WebSearchBegin(_)
| EventMsg::WebSearchEnd(_)
| EventMsg::GetHistoryEntryResponse(_)
| EventMsg::PlanUpdate(_)
| EventMsg::TodoUpdate(_)
| EventMsg::TurnAborted(_)
| EventMsg::UserMessage(_)
| EventMsg::ShutdownComplete

View File

@@ -14,4 +14,5 @@ pub mod parse_command;
pub mod plan_tool;
pub mod protocol;
pub mod request_user_input;
pub mod todo_tool;
pub mod user_input;

View File

@@ -1,28 +1,8 @@
use schemars::JsonSchema;
use serde::Deserialize;
use serde::Serialize;
use ts_rs::TS;
// Types for the TODO tool arguments matching codex-vscode/todo-mcp/src/main.rs
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, TS)]
#[serde(rename_all = "snake_case")]
pub enum StepStatus {
Pending,
InProgress,
Completed,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, TS)]
#[serde(deny_unknown_fields)]
pub struct PlanItemArg {
pub step: String,
pub status: StepStatus,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, TS)]
#[serde(deny_unknown_fields)]
pub struct UpdatePlanArgs {
#[serde(default)]
pub explanation: Option<String>,
pub plan: Vec<PlanItemArg>,
}
// Deprecated compatibility layer for the old plan_* names. These are todo list
// types and not related to Plan mode.
#[deprecated(note = "use codex_protocol::todo_tool::TodoItemArg")]
pub use crate::todo_tool::TodoItemArg as PlanItemArg;
#[deprecated(note = "use codex_protocol::todo_tool::TodoStatus")]
pub use crate::todo_tool::TodoStatus as StepStatus;
#[deprecated(note = "use codex_protocol::todo_tool::UpdateTodoArgs")]
pub use crate::todo_tool::UpdateTodoArgs as UpdatePlanArgs;

View File

@@ -3,7 +3,7 @@ You are a coding agent running in the Codex CLI, a terminal-based coding assista
Your capabilities:
- Receive user prompts and other context provided by the harness, such as files in the workspace.
- Communicate with the user by streaming thinking & responses, and by making & updating plans.
- Communicate with the user by streaming thinking & responses, and by making & updating todo lists.
- Emit function calls to run terminal commands and apply patches. Depending on how this specific run is configured, you can request that these function calls be escalated to the user for approval before running. More on this in the "Sandbox and approvals" section.
Within this context, Codex refers to the open-source agentic coding interface (not the old Codex language model built by OpenAI).
@@ -49,29 +49,29 @@ Before making tool calls, send a brief preamble to the user explaining what you
- “Alright, build pipeline order is interesting. Checking how it reports failures.”
- “Spotted a clever caching util; now hunting where it gets used.”
## Planning
## Todo Lists
You have access to an `update_plan` tool which tracks steps and progress and renders them to the user. Using the tool helps demonstrate that you've understood the task and convey how you're approaching it. Plans can help to make complex, ambiguous, or multi-phase work clearer and more collaborative for the user. A good plan should break the task into meaningful, logically ordered steps that are easy to verify as you go.
You have access to a `todo_write` tool which tracks steps and progress and renders them to the user. Using the tool helps demonstrate that you've understood the task and convey how you're approaching it. A todo list can help make complex, ambiguous, or multi-phase work clearer and more collaborative for the user. A good todo list should break the task into meaningful, logically ordered steps that are easy to verify as you go.
Note that plans are not for padding out simple work with filler steps or stating the obvious. The content of your plan should not involve doing anything that you aren't capable of doing (i.e. don't try to test things that you can't test). Do not use plans for simple or single-step queries that you can just do or answer immediately.
Note that todo lists are not for padding out simple work with filler steps or stating the obvious. The content of your list should not involve doing anything that you aren't capable of doing (i.e. don't try to test things that you can't test). Do not use todo lists for simple or single-step queries that you can just do or answer immediately.
Do not repeat the full contents of the plan after an `update_plan` call — the harness already displays it. Instead, summarize the change made and highlight any important context or next step.
Do not repeat the full contents of the todo list after a `todo_write` call — the harness already displays it. Instead, summarize the change made and highlight any important context or next step.
Before running a command, consider whether or not you have completed the previous step, and make sure to mark it as completed before moving on to the next step. It may be the case that you complete all steps in your plan after a single pass of implementation. If this is the case, you can simply mark all the planned steps as completed. Sometimes, you may need to change plans in the middle of a task: call `update_plan` with the updated plan and make sure to provide an `explanation` of the rationale when doing so.
Before running a command, consider whether or not you have completed the previous step, and make sure to mark it as completed before moving on to the next step. It may be the case that you complete all steps in your todo list after a single pass of implementation. If this is the case, you can simply mark all the steps as completed. Sometimes, you may need to change your list in the middle of a task: call `todo_write` with the updated list and make sure to provide an `explanation` of the rationale when doing so.
Use a plan when:
Use a todo list when:
- The task is non-trivial and will require multiple actions over a long time horizon.
- There are logical phases or dependencies where sequencing matters.
- The work has ambiguity that benefits from outlining high-level goals.
- You want intermediate checkpoints for feedback and validation.
- When the user asked you to do more than one thing in a single prompt
- The user has asked you to use the plan tool (aka "TODOs")
- You generate additional steps while working, and plan to do them before yielding to the user
- The user has asked you to use the todo tool
- You generate additional steps while working, and intend to do them before yielding to the user
### Examples
**High-quality plans**
**High-quality todo lists**
Example 1:
@@ -98,7 +98,7 @@ Example 3:
5. Persist messages in lightweight DB
6. Add typing indicators + unread count
**Low-quality plans**
**Low-quality todo lists**
Example 1:
@@ -264,12 +264,12 @@ When using the shell, you must adhere to the following guidelines:
- When searching for text or files, prefer using `rg` or `rg --files` respectively because `rg` is much faster than alternatives like `grep`. (If the `rg` command is not found, then use alternatives.)
- Do not use python scripts to attempt to output larger chunks of a file.
## `update_plan`
## `todo_write`
A tool named `update_plan` is available to you. You can use it to keep an uptodate, stepbystep plan for the task.
A tool named `todo_write` is available to you. You can use it to keep an uptodate, stepbystep todo list for the task.
To create a new plan, call `update_plan` with a short list of 1sentence steps (no more than 5-7 words each) with a `status` for each step (`pending`, `in_progress`, or `completed`).
To create a new todo list, call `todo_write` with a short list of 1sentence steps (no more than 5-7 words each) with a `status` for each step (`pending`, `in_progress`, or `completed`).
When steps have been completed, use `update_plan` to mark each finished step as `completed` and the next step you are working on as `in_progress`. There should always be exactly one `in_progress` step until everything is done. You can mark multiple items as complete in a single `update_plan` call.
When steps have been completed, use `todo_write` to mark each finished step as `completed` and the next step you are working on as `in_progress`. There should always be exactly one `in_progress` step until everything is done. You can mark multiple items as complete in a single `todo_write` call.
If all steps are complete, ensure you call `update_plan` to mark all steps as `completed`.
If all steps are complete, ensure you call `todo_write` to mark all steps as `completed`.

View File

@@ -29,8 +29,8 @@ use crate::models::WebSearchAction;
use crate::num_format::format_with_separators;
use crate::openai_models::ReasoningEffort as ReasoningEffortConfig;
use crate::parse_command::ParsedCommand;
use crate::plan_tool::UpdatePlanArgs;
use crate::request_user_input::RequestUserInputResponse;
use crate::todo_tool::UpdateTodoArgs;
use crate::user_input::UserInput;
use codex_utils_absolute_path::AbsolutePathBuf;
use mcp_types::CallToolResult;
@@ -810,7 +810,10 @@ pub enum EventMsg {
/// Notification that skill data may have been updated and clients may want to reload.
SkillsUpdateAvailable,
PlanUpdate(UpdatePlanArgs),
/// Todo list update from the todo_write tool. Primary event name is
/// `todo_update`; `plan_update` remains a legacy alias.
#[serde(alias = "plan_update")]
TodoUpdate(UpdateTodoArgs),
TurnAborted(TurnAbortedEvent),

View File

@@ -0,0 +1,158 @@
use schemars::JsonSchema;
use serde::Deserialize;
use serde::Deserializer;
use serde::Serialize;
use serde::Serializer;
use serde::de;
use serde::ser::SerializeStruct;
use ts_rs::TS;
// Types for the todo_write tool arguments matching codex-vscode/todo-mcp/src/main.rs.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, TS, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum TodoStatus {
Pending,
InProgress,
Completed,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, TS, PartialEq, Eq)]
#[serde(deny_unknown_fields)]
pub struct TodoItemArg {
pub step: String,
pub status: TodoStatus,
}
/// Arguments for the todo_write tool.
///
/// The `plan` field is a deprecated legacy alias for `todo`, but is still emitted for
/// backward compatibility.
#[derive(Debug, Clone, JsonSchema, TS, PartialEq, Eq, Default)]
pub struct UpdateTodoArgs {
pub explanation: Option<String>,
pub todo: Vec<TodoItemArg>,
pub plan: Vec<TodoItemArg>,
}
#[derive(Debug, Deserialize)]
#[serde(deny_unknown_fields)]
struct UpdateTodoArgsRaw {
#[serde(default)]
explanation: Option<String>,
#[serde(default)]
todo: Option<Vec<TodoItemArg>>,
#[serde(default)]
plan: Option<Vec<TodoItemArg>>,
}
impl UpdateTodoArgs {
pub fn new(explanation: Option<String>, todo: Vec<TodoItemArg>) -> Self {
let plan = todo.clone();
Self {
explanation,
todo,
plan,
}
}
pub fn todo_items(&self) -> &[TodoItemArg] {
if self.todo.is_empty() {
&self.plan
} else {
&self.todo
}
}
pub fn into_todo_items(self) -> Vec<TodoItemArg> {
if self.todo.is_empty() {
self.plan
} else {
self.todo
}
}
pub fn into_parts(self) -> (Option<String>, Vec<TodoItemArg>) {
let UpdateTodoArgs {
explanation,
todo,
plan,
} = self;
let items = if todo.is_empty() { plan } else { todo };
(explanation, items)
}
}
impl Serialize for UpdateTodoArgs {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let items = self.todo_items();
let mut state = serializer.serialize_struct("UpdateTodoArgs", 3)?;
if let Some(explanation) = &self.explanation {
state.serialize_field("explanation", explanation)?;
}
state.serialize_field("todo", items)?;
state.serialize_field("plan", items)?;
state.end()
}
}
impl<'de> Deserialize<'de> for UpdateTodoArgs {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let raw = UpdateTodoArgsRaw::deserialize(deserializer)?;
let items = match (raw.todo, raw.plan) {
(Some(todo), Some(plan)) => {
if todo.is_empty() {
plan
} else {
todo
}
}
(Some(todo), None) => todo,
(None, Some(plan)) => plan,
(None, None) => return Err(de::Error::missing_field("todo")),
};
Ok(UpdateTodoArgs::new(raw.explanation, items))
}
}
#[cfg(test)]
mod tests {
use super::TodoItemArg;
use super::TodoStatus;
use super::UpdateTodoArgs;
use pretty_assertions::assert_eq;
#[test]
fn deserializes_legacy_plan_field() {
let args: UpdateTodoArgs =
serde_json::from_str(r#"{"explanation":"x","plan":[{"step":"a","status":"pending"}]}"#)
.expect("legacy plan field should parse");
let expected = UpdateTodoArgs::new(
Some("x".to_string()),
vec![TodoItemArg {
step: "a".to_string(),
status: TodoStatus::Pending,
}],
);
assert_eq!(args, expected);
}
#[test]
fn serializes_todo_and_plan_fields() {
let args = UpdateTodoArgs::new(
None,
vec![TodoItemArg {
step: "a".to_string(),
status: TodoStatus::Completed,
}],
);
let value = serde_json::to_value(&args).expect("args should serialize");
assert_eq!(value.get("todo"), value.get("plan"));
}
}

View File

@@ -208,7 +208,7 @@ use codex_core::protocol::SandboxPolicy;
use codex_file_search::FileMatch;
use codex_protocol::openai_models::ModelPreset;
use codex_protocol::openai_models::ReasoningEffort as ReasoningEffortConfig;
use codex_protocol::plan_tool::UpdatePlanArgs;
use codex_protocol::todo_tool::UpdateTodoArgs;
use strum::IntoEnumIterator;
const USER_SHELL_COMMAND_HELP_TITLE: &str = "Prefix a command with ! to run it locally";
@@ -519,8 +519,8 @@ pub(crate) struct ChatWidget {
// This gates rendering of the "Worked for …" separator so purely conversational turns don't
// show an empty divider. It is reset when the separator is emitted.
had_work_activity: bool,
// Whether the current turn emitted a plan update.
saw_plan_update_this_turn: bool,
// Whether the current turn emitted a todo list update.
saw_todo_update_this_turn: bool,
// Status-indicator elapsed seconds captured at the last emitted final-message separator.
//
// This lets the separator show per-chunk work time (since the previous separator) rather than
@@ -911,7 +911,7 @@ impl ChatWidget {
fn on_task_started(&mut self) {
self.agent_turn_running = true;
self.saw_plan_update_this_turn = false;
self.saw_todo_update_this_turn = false;
self.bottom_pane.clear_quit_shortcut_hint();
self.quit_shortcut_expires_at = None;
self.quit_shortcut_key = None;
@@ -962,7 +962,7 @@ impl ChatWidget {
return;
}
let has_message = last_agent_message.is_some_and(|message| !message.trim().is_empty());
if !has_message && !self.saw_plan_update_this_turn {
if !has_message && !self.saw_todo_update_this_turn {
return;
}
if !self.bottom_pane.no_modal_or_popup_active() {
@@ -1324,9 +1324,9 @@ impl ChatWidget {
Some(combined)
}
fn on_plan_update(&mut self, update: UpdatePlanArgs) {
self.saw_plan_update_this_turn = true;
self.add_to_history(history_cell::new_plan_update(update));
fn on_todo_update(&mut self, update: UpdateTodoArgs) {
self.saw_todo_update_this_turn = true;
self.add_to_history(history_cell::new_todo_update(update));
}
fn on_exec_approval_request(&mut self, id: String, ev: ExecApprovalRequestEvent) {
@@ -2093,7 +2093,7 @@ impl ChatWidget {
pre_review_token_info: None,
needs_final_message_separator: false,
had_work_activity: false,
saw_plan_update_this_turn: false,
saw_todo_update_this_turn: false,
last_separator_elapsed_secs: None,
last_rendered_width: std::cell::Cell::new(None),
feedback,
@@ -2221,7 +2221,7 @@ impl ChatWidget {
retry_status_header: None,
thread_id: None,
forked_from: None,
saw_plan_update_this_turn: false,
saw_todo_update_this_turn: false,
queued_user_messages: VecDeque::new(),
show_welcome_banner: is_first_run,
suppress_session_configured_redraw: false,
@@ -2358,7 +2358,7 @@ impl ChatWidget {
pre_review_token_info: None,
needs_final_message_separator: false,
had_work_activity: false,
saw_plan_update_this_turn: false,
saw_todo_update_this_turn: false,
last_separator_elapsed_secs: None,
last_rendered_width: std::cell::Cell::new(None),
feedback,
@@ -3099,7 +3099,7 @@ impl ChatWidget {
self.on_interrupted_turn(ev.reason);
}
},
EventMsg::PlanUpdate(update) => self.on_plan_update(update),
EventMsg::TodoUpdate(update) => self.on_todo_update(update),
EventMsg::ExecApprovalRequest(ev) => {
// For replayed events, synthesize an empty id (these should not occur).
self.on_exec_approval_request(id.unwrap_or_default(), ev)

View File

@@ -70,10 +70,10 @@ use codex_protocol::config_types::Settings;
use codex_protocol::openai_models::ModelPreset;
use codex_protocol::openai_models::ReasoningEffortPreset;
use codex_protocol::parse_command::ParsedCommand;
use codex_protocol::plan_tool::PlanItemArg;
use codex_protocol::plan_tool::StepStatus;
use codex_protocol::plan_tool::UpdatePlanArgs;
use codex_protocol::protocol::CodexErrorInfo;
use codex_protocol::todo_tool::TodoItemArg;
use codex_protocol::todo_tool::TodoStatus;
use codex_protocol::todo_tool::UpdateTodoArgs;
use codex_protocol::user_input::TextElement;
use codex_protocol::user_input::UserInput;
use codex_utils_absolute_path::AbsolutePathBuf;
@@ -833,7 +833,7 @@ async fn make_chatwidget_manual(
pre_review_token_info: None,
needs_final_message_separator: false,
had_work_activity: false,
saw_plan_update_this_turn: false,
saw_todo_update_this_turn: false,
last_separator_elapsed_secs: None,
last_rendered_width: std::cell::Cell::new(None),
feedback: codex_feedback::CodexFeedback::new(),
@@ -1270,7 +1270,7 @@ async fn plan_implementation_popup_skips_when_messages_queued() {
}
#[tokio::test]
async fn plan_implementation_popup_shows_on_plan_update_without_message() {
async fn plan_implementation_popup_shows_on_todo_update_without_message() {
let (mut chat, _rx, _op_rx) = make_chatwidget_manual(Some("gpt-5")).await;
chat.set_feature_enabled(Feature::CollaborationModes, true);
let plan_mask =
@@ -1279,19 +1279,19 @@ async fn plan_implementation_popup_shows_on_plan_update_without_message() {
chat.set_collaboration_mask(plan_mask);
chat.on_task_started();
chat.on_plan_update(UpdatePlanArgs {
explanation: None,
plan: vec![PlanItemArg {
chat.on_todo_update(UpdateTodoArgs::new(
None,
vec![TodoItemArg {
step: "First".to_string(),
status: StepStatus::Pending,
status: TodoStatus::Pending,
}],
});
));
chat.on_task_complete(None, false);
let popup = render_bottom_popup(&chat, 80);
assert!(
popup.contains(PLAN_IMPLEMENTATION_TITLE),
"expected plan popup after plan update, got {popup:?}"
"expected implementation popup after todo list update, got {popup:?}"
);
}
@@ -1307,13 +1307,13 @@ async fn plan_implementation_popup_skips_when_rate_limit_prompt_pending() {
chat.set_collaboration_mask(plan_mask);
chat.on_task_started();
chat.on_plan_update(UpdatePlanArgs {
explanation: None,
plan: vec![PlanItemArg {
chat.on_todo_update(UpdateTodoArgs::new(
None,
vec![TodoItemArg {
step: "First".to_string(),
status: StepStatus::Pending,
status: TodoStatus::Pending,
}],
});
));
chat.on_rate_limit_snapshot(Some(snapshot(92.0)));
chat.on_task_complete(None, false);
@@ -4404,35 +4404,38 @@ async fn apply_patch_request_shows_diff_summary() -> anyhow::Result<()> {
}
#[tokio::test]
async fn plan_update_renders_history_cell() {
async fn todo_update_renders_history_cell() {
let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(None).await;
let update = UpdatePlanArgs {
explanation: Some("Adapting plan".to_string()),
plan: vec![
PlanItemArg {
let update = UpdateTodoArgs::new(
Some("Adapting plan".to_string()),
vec![
TodoItemArg {
step: "Explore codebase".into(),
status: StepStatus::Completed,
status: TodoStatus::Completed,
},
PlanItemArg {
TodoItemArg {
step: "Implement feature".into(),
status: StepStatus::InProgress,
status: TodoStatus::InProgress,
},
PlanItemArg {
TodoItemArg {
step: "Write tests".into(),
status: StepStatus::Pending,
status: TodoStatus::Pending,
},
],
};
);
chat.handle_codex_event(Event {
id: "sub-1".into(),
msg: EventMsg::PlanUpdate(update),
msg: EventMsg::TodoUpdate(update),
});
let cells = drain_insert_history(&mut rx);
assert!(!cells.is_empty(), "expected plan update cell to be sent");
assert!(
!cells.is_empty(),
"expected todo list update cell to be sent"
);
let blob = lines_to_single_string(cells.last().unwrap());
assert!(
blob.contains("Updated Plan"),
"missing plan header: {blob:?}"
blob.contains("Updated Todo List"),
"missing todo list header: {blob:?}"
);
assert!(blob.contains("Explore codebase"));
assert!(blob.contains("Implement feature"));

View File

@@ -46,9 +46,9 @@ use codex_core::protocol::SessionConfiguredEvent;
use codex_core::web_search::web_search_detail;
use codex_protocol::models::WebSearchAction;
use codex_protocol::openai_models::ReasoningEffort as ReasoningEffortConfig;
use codex_protocol::plan_tool::PlanItemArg;
use codex_protocol::plan_tool::StepStatus;
use codex_protocol::plan_tool::UpdatePlanArgs;
use codex_protocol::todo_tool::TodoItemArg;
use codex_protocol::todo_tool::TodoStatus;
use codex_protocol::todo_tool::UpdateTodoArgs;
use codex_protocol::user_input::TextElement;
use image::DynamicImage;
use image::ImageReader;
@@ -1727,19 +1727,22 @@ pub(crate) fn new_error_event(message: String) -> PlainHistoryCell {
PlainHistoryCell { lines }
}
/// Render a userfriendly plan update styled like a checkbox todo list.
pub(crate) fn new_plan_update(update: UpdatePlanArgs) -> PlanUpdateCell {
let UpdatePlanArgs { explanation, plan } = update;
PlanUpdateCell { explanation, plan }
/// Render a user-friendly todo list update styled like a checkbox list.
pub(crate) fn new_todo_update(update: UpdateTodoArgs) -> TodoUpdateCell {
let (explanation, todo_items) = update.into_parts();
TodoUpdateCell {
explanation,
todo_items,
}
}
#[derive(Debug)]
pub(crate) struct PlanUpdateCell {
pub(crate) struct TodoUpdateCell {
explanation: Option<String>,
plan: Vec<PlanItemArg>,
todo_items: Vec<TodoItemArg>,
}
impl HistoryCell for PlanUpdateCell {
impl HistoryCell for TodoUpdateCell {
fn display_lines(&self, width: u16) -> Vec<Line<'static>> {
let render_note = |text: &str| -> Vec<Line<'static>> {
let wrap_width = width.saturating_sub(4).max(1) as usize;
@@ -1749,11 +1752,11 @@ impl HistoryCell for PlanUpdateCell {
.collect()
};
let render_step = |status: &StepStatus, text: &str| -> Vec<Line<'static>> {
let render_step = |status: &TodoStatus, text: &str| -> Vec<Line<'static>> {
let (box_str, step_style) = match status {
StepStatus::Completed => ("", Style::default().crossed_out().dim()),
StepStatus::InProgress => ("", Style::default().cyan().bold()),
StepStatus::Pending => ("", Style::default().dim()),
TodoStatus::Completed => ("", Style::default().crossed_out().dim()),
TodoStatus::InProgress => ("", Style::default().cyan().bold()),
TodoStatus::Pending => ("", Style::default().dim()),
};
let wrap_width = (width as usize)
.saturating_sub(4)
@@ -1768,7 +1771,7 @@ impl HistoryCell for PlanUpdateCell {
};
let mut lines: Vec<Line<'static>> = vec![];
lines.push(vec!["".dim(), "Updated Plan".bold()].into());
lines.push(vec!["".dim(), "Updated Todo List".bold()].into());
let mut indented_lines = vec![];
let note = self
@@ -1780,10 +1783,10 @@ impl HistoryCell for PlanUpdateCell {
indented_lines.extend(render_note(expl));
};
if self.plan.is_empty() {
if self.todo_items.is_empty() {
indented_lines.push(Line::from("(no steps provided)".dim().italic()));
} else {
for PlanItemArg { step, status } in self.plan.iter() {
for TodoItemArg { step, status } in self.todo_items.iter() {
indented_lines.extend(render_step(status, step));
}
}
@@ -2887,30 +2890,30 @@ mod tests {
}
#[test]
fn plan_update_with_note_and_wrapping_snapshot() {
fn todo_update_with_note_and_wrapping_snapshot() {
// Long explanation forces wrapping; include long step text to verify step wrapping and alignment.
let update = UpdatePlanArgs {
explanation: Some(
let update = UpdateTodoArgs::new(
Some(
"Ill update Grafana call error handling by adding retries and clearer messages when the backend is unreachable."
.to_string(),
),
plan: vec![
PlanItemArg {
vec![
TodoItemArg {
step: "Investigate existing error paths and logging around HTTP timeouts".into(),
status: StepStatus::Completed,
status: TodoStatus::Completed,
},
PlanItemArg {
TodoItemArg {
step: "Harden Grafana client error handling with retry/backoff and userfriendly messages".into(),
status: StepStatus::InProgress,
status: TodoStatus::InProgress,
},
PlanItemArg {
TodoItemArg {
step: "Add tests for transient failure scenarios and surfacing to the UI".into(),
status: StepStatus::Pending,
status: TodoStatus::Pending,
},
],
};
);
let cell = new_plan_update(update);
let cell = new_todo_update(update);
// Narrow width to force wrapping for both the note and steps
let lines = cell.display_lines(32);
let rendered = render_lines(&lines).join("\n");
@@ -2918,22 +2921,22 @@ mod tests {
}
#[test]
fn plan_update_without_note_snapshot() {
let update = UpdatePlanArgs {
explanation: None,
plan: vec![
PlanItemArg {
fn todo_update_without_note_snapshot() {
let update = UpdateTodoArgs::new(
None,
vec![
TodoItemArg {
step: "Define error taxonomy".into(),
status: StepStatus::InProgress,
status: TodoStatus::InProgress,
},
PlanItemArg {
TodoItemArg {
step: "Implement mapping to user messages".into(),
status: StepStatus::Pending,
status: TodoStatus::Pending,
},
],
};
);
let cell = new_plan_update(update);
let cell = new_todo_update(update);
let lines = cell.display_lines(40);
let rendered = render_lines(&lines).join("\n");
insta::assert_snapshot!(rendered);

View File

@@ -2,7 +2,7 @@
source: tui/src/history_cell.rs
expression: rendered
---
• Updated Plan
• Updated Todo List
└ Ill update Grafana call
error handling by adding
retries and clearer

View File

@@ -2,6 +2,6 @@
source: tui/src/history_cell.rs
expression: rendered
---
• Updated Plan
• Updated Todo List
└ □ Define error taxonomy
□ Implement mapping to user messages