mirror of
https://github.com/openai/codex.git
synced 2026-02-03 15:33:41 +00:00
Compare commits
5 Commits
prototype
...
planning-t
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2c274cf279 | ||
|
|
97581a86a3 | ||
|
|
658f2d677f | ||
|
|
18619dbbc1 | ||
|
|
c66fd9d59a |
@@ -243,6 +243,21 @@ By default, `reasoning` is only set on requests to OpenAI models that are known
|
||||
model_supports_reasoning_summaries = true
|
||||
```
|
||||
|
||||
## experimental_include_plan_tool
|
||||
|
||||
Controls whether to expose the experimental plan tool (named `update_plan`) to the model and include the corresponding guidance in the system prompt.
|
||||
|
||||
Default behavior:
|
||||
- For known models (anything hardcoded in the models list), this is disabled by default.
|
||||
- For unknown models whose name starts with "gpt-", this is enabled by default so new GPT-family models get the feature without a CLI update.
|
||||
|
||||
When enabled, the model can call `update_plan` to keep an up-to-date, step-by-step plan for the task and Codex will render plan updates in the UI. When disabled, the tool is not advertised to the model and the “Plan updates” section is omitted from the prompt; any unsolicited `update_plan` calls will be treated as unsupported.
|
||||
|
||||
```toml
|
||||
# Enable the experimental plan tool and prompt instructions
|
||||
experimental_include_plan_tool = true
|
||||
```
|
||||
|
||||
## sandbox_mode
|
||||
|
||||
Codex executes model-generated shell commands inside an OS-level sandbox.
|
||||
|
||||
@@ -97,6 +97,7 @@ You can invoke apply_patch like:
|
||||
shell {"command":["apply_patch","*** Begin Patch\n*** Add File: hello.txt\n+Hello, world!\n*** End Patch\n"]}
|
||||
```
|
||||
|
||||
<!-- PLAN_TOOL:START -->
|
||||
Plan updates
|
||||
|
||||
A tool named `update_plan` is available. Use it to keep an up‑to‑date, step‑by‑step plan for the task so you can follow your progress. When making your plans, keep in mind that you are a deployed coding agent - `update_plan` calls should not involve doing anything that you aren't capable of doing. For example, `update_plan` calls should NEVER contain tasks to merge your own pull requests. Only stop to ask the user if you genuinely need their feedback on a change.
|
||||
@@ -105,3 +106,4 @@ A tool named `update_plan` is available. Use it to keep an up‑to‑date, step
|
||||
- Whenever you finish a step, call `update_plan` again, marking the finished step as `completed` and the next step as `in_progress`.
|
||||
- If your plan needs to change, call `update_plan` with the revised steps and include an `explanation` describing the change.
|
||||
- When all steps are complete, make a final `update_plan` call with all steps marked `completed`.
|
||||
<!-- PLAN_TOOL:END -->
|
||||
|
||||
@@ -37,7 +37,8 @@ pub(crate) async fn stream_chat_completions(
|
||||
// Build messages array
|
||||
let mut messages = Vec::<serde_json::Value>::new();
|
||||
|
||||
let full_instructions = prompt.get_full_instructions(model);
|
||||
let instr_cfg = crate::client_common::InstructionsConfig::for_model(model, include_plan_tool);
|
||||
let full_instructions = prompt.get_full_instructions(&instr_cfg);
|
||||
messages.push(json!({"role": "system", "content": full_instructions}));
|
||||
|
||||
if let Some(instr) = &prompt.user_instructions {
|
||||
|
||||
@@ -141,7 +141,11 @@ impl ModelClient {
|
||||
|
||||
let token = auth.get_token().await?;
|
||||
|
||||
let full_instructions = prompt.get_full_instructions(&self.config.model);
|
||||
let instr_cfg = crate::client_common::InstructionsConfig::for_model(
|
||||
&self.config.model,
|
||||
self.config.include_plan_tool,
|
||||
);
|
||||
let full_instructions = prompt.get_full_instructions(&instr_cfg);
|
||||
let tools_json = create_tools_json_for_responses_api(
|
||||
prompt,
|
||||
&self.config.model,
|
||||
|
||||
@@ -37,15 +37,60 @@ pub struct Prompt {
|
||||
pub base_instructions_override: Option<String>,
|
||||
}
|
||||
|
||||
/// Options that influence how the full instructions are composed for a request.
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct InstructionsConfig {
|
||||
pub include_plan_tool: bool,
|
||||
pub extra_sections: Vec<&'static str>,
|
||||
}
|
||||
|
||||
impl InstructionsConfig {
|
||||
pub fn for_model(model: &str, include_plan_tool: bool) -> Self {
|
||||
let mut extra_sections = Vec::new();
|
||||
if model.starts_with("gpt-4.1") {
|
||||
extra_sections.push(APPLY_PATCH_TOOL_INSTRUCTIONS);
|
||||
}
|
||||
Self {
|
||||
include_plan_tool,
|
||||
extra_sections,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Prompt {
|
||||
pub(crate) fn get_full_instructions(&self, model: &str) -> Cow<'_, str> {
|
||||
let base = self
|
||||
pub(crate) fn get_full_instructions(&self, cfg: &InstructionsConfig) -> Cow<'_, str> {
|
||||
let mut base = self
|
||||
.base_instructions_override
|
||||
.as_deref()
|
||||
.unwrap_or(BASE_INSTRUCTIONS);
|
||||
let mut sections: Vec<&str> = vec![base];
|
||||
if model.starts_with("gpt-4.1") {
|
||||
sections.push(APPLY_PATCH_TOOL_INSTRUCTIONS);
|
||||
.unwrap_or(BASE_INSTRUCTIONS)
|
||||
.to_string();
|
||||
|
||||
if !cfg.include_plan_tool {
|
||||
// Remove the plan-tool section if present. Prefer explicit markers
|
||||
// for robustness, but fall back to trimming from the "Plan updates"
|
||||
// heading if markers are missing.
|
||||
let start_marker = "<!-- PLAN_TOOL:START -->";
|
||||
let end_marker = "<!-- PLAN_TOOL:END -->";
|
||||
if let (Some(start), Some(end)) = (base.find(start_marker), base.find(end_marker)) {
|
||||
if end > start {
|
||||
let mut edited = String::with_capacity(base.len());
|
||||
edited.push_str(&base[..start]);
|
||||
edited.push_str(&base[end + end_marker.len()..]);
|
||||
base = edited;
|
||||
}
|
||||
} else if let Some(idx) = base
|
||||
.find("\n\nPlan updates")
|
||||
.or_else(|| base.find("\nPlan updates"))
|
||||
.or_else(|| base.find("Plan updates"))
|
||||
{
|
||||
base.truncate(idx);
|
||||
}
|
||||
base = base.trim_end().to_string();
|
||||
}
|
||||
|
||||
let mut sections: Vec<&str> = vec![&base];
|
||||
for s in &cfg.extra_sections {
|
||||
sections.push(s);
|
||||
}
|
||||
Cow::Owned(sections.join("\n"))
|
||||
}
|
||||
@@ -197,7 +242,18 @@ mod tests {
|
||||
..Default::default()
|
||||
};
|
||||
let expected = format!("{BASE_INSTRUCTIONS}\n{APPLY_PATCH_TOOL_INSTRUCTIONS}");
|
||||
let full = prompt.get_full_instructions("gpt-4.1");
|
||||
let cfg = InstructionsConfig::for_model("gpt-4.1", true);
|
||||
let full = prompt.get_full_instructions(&cfg);
|
||||
assert_eq!(full, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn plan_section_removed_when_disabled() {
|
||||
let prompt = Prompt::default();
|
||||
let cfg = InstructionsConfig::for_model("gpt-4.1", false);
|
||||
let full = prompt.get_full_instructions(&cfg);
|
||||
assert!(!full.contains("Plan updates"));
|
||||
assert!(!full.contains("update_plan"));
|
||||
assert!(full.contains(APPLY_PATCH_TOOL_INSTRUCTIONS));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -225,6 +225,9 @@ pub(crate) struct Session {
|
||||
state: Mutex<State>,
|
||||
codex_linux_sandbox_exe: Option<PathBuf>,
|
||||
user_shell: shell::Shell,
|
||||
|
||||
/// Whether the experimental plan tool is enabled for this session.
|
||||
include_plan_tool: bool,
|
||||
}
|
||||
|
||||
impl Session {
|
||||
@@ -791,6 +794,7 @@ async fn submission_loop(
|
||||
codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(),
|
||||
disable_response_storage,
|
||||
user_shell: default_shell,
|
||||
include_plan_tool: config.include_plan_tool,
|
||||
}));
|
||||
|
||||
// Patch restored state into the newly created session.
|
||||
@@ -1531,7 +1535,19 @@ async fn handle_function_call(
|
||||
};
|
||||
handle_container_exec_with_params(params, sess, sub_id, call_id).await
|
||||
}
|
||||
"update_plan" => handle_update_plan(sess, arguments, sub_id, call_id).await,
|
||||
"update_plan" => {
|
||||
if sess.include_plan_tool {
|
||||
handle_update_plan(sess, arguments, sub_id, call_id).await
|
||||
} else {
|
||||
ResponseInputItem::FunctionCallOutput {
|
||||
call_id,
|
||||
output: FunctionCallOutputPayload {
|
||||
content: format!("unsupported call: {name}"),
|
||||
success: None,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
match sess.mcp_connection_manager.parse_tool_name(&name) {
|
||||
Some((server, tool_name)) => {
|
||||
|
||||
@@ -342,6 +342,10 @@ pub struct ConfigToml {
|
||||
|
||||
/// The value for the `originator` header included with Responses API requests.
|
||||
pub internal_originator: Option<String>,
|
||||
|
||||
/// Include an experimental plan tool that the model can use to update its current plan and status of each step.
|
||||
/// This is experimental and may be removed in the future.
|
||||
pub experimental_include_plan_tool: Option<bool>,
|
||||
}
|
||||
|
||||
impl ConfigToml {
|
||||
@@ -476,6 +480,7 @@ impl Config {
|
||||
});
|
||||
|
||||
let experimental_resume = cfg.experimental_resume;
|
||||
let is_unknown_gpt = openai_model_info.is_none() && model.starts_with("gpt-");
|
||||
|
||||
// Load base instructions override from a file if specified. If the
|
||||
// path is relative, resolve it against the effective cwd so the
|
||||
@@ -527,7 +532,7 @@ impl Config {
|
||||
|
||||
model_supports_reasoning_summaries: cfg
|
||||
.model_supports_reasoning_summaries
|
||||
.unwrap_or(false),
|
||||
.unwrap_or(is_unknown_gpt),
|
||||
|
||||
chatgpt_base_url: config_profile
|
||||
.chatgpt_base_url
|
||||
@@ -535,7 +540,9 @@ impl Config {
|
||||
.unwrap_or("https://chatgpt.com/backend-api/".to_string()),
|
||||
|
||||
experimental_resume,
|
||||
include_plan_tool: include_plan_tool.unwrap_or(false),
|
||||
include_plan_tool: include_plan_tool
|
||||
.or(cfg.experimental_include_plan_tool)
|
||||
.unwrap_or(is_unknown_gpt),
|
||||
internal_originator: cfg.internal_originator,
|
||||
};
|
||||
Ok(config)
|
||||
@@ -649,6 +656,73 @@ mod tests {
|
||||
use pretty_assertions::assert_eq;
|
||||
use tempfile::TempDir;
|
||||
|
||||
#[test]
|
||||
fn test_plan_and_reasoning_defaults_known_vs_unknown() -> std::io::Result<()> {
|
||||
let fixture = create_test_fixture()?;
|
||||
|
||||
// Unknown GPT-like model -> defaults ON for plan tool and reasoning summaries override.
|
||||
let unknown_gpt_overrides = ConfigOverrides {
|
||||
model: Some("gpt-unknown-2025".to_string()),
|
||||
cwd: Some(fixture.cwd()),
|
||||
..Default::default()
|
||||
};
|
||||
let unknown_gpt_cfg = Config::load_from_base_config_with_overrides(
|
||||
fixture.cfg.clone(),
|
||||
unknown_gpt_overrides,
|
||||
fixture.codex_home(),
|
||||
)?;
|
||||
assert!(
|
||||
unknown_gpt_cfg.include_plan_tool,
|
||||
"plan tool should default to ON for unknown GPT-like models"
|
||||
);
|
||||
assert!(
|
||||
unknown_gpt_cfg.model_supports_reasoning_summaries,
|
||||
"reasoning summaries should default to ON for unknown GPT-like models"
|
||||
);
|
||||
|
||||
// Unknown non-GPT model -> defaults OFF for both.
|
||||
let unknown_non_gpt_overrides = ConfigOverrides {
|
||||
model: Some("my-new-model".to_string()),
|
||||
cwd: Some(fixture.cwd()),
|
||||
..Default::default()
|
||||
};
|
||||
let unknown_non_gpt_cfg = Config::load_from_base_config_with_overrides(
|
||||
fixture.cfg.clone(),
|
||||
unknown_non_gpt_overrides,
|
||||
fixture.codex_home(),
|
||||
)?;
|
||||
assert!(
|
||||
!unknown_non_gpt_cfg.include_plan_tool,
|
||||
"plan tool should default to OFF for unknown non-GPT models"
|
||||
);
|
||||
assert!(
|
||||
!unknown_non_gpt_cfg.model_supports_reasoning_summaries,
|
||||
"reasoning summaries should default to OFF for unknown non-GPT models"
|
||||
);
|
||||
|
||||
// Known model -> defaults OFF for plan tool and reasoning summaries override value.
|
||||
let known_overrides = ConfigOverrides {
|
||||
model: Some("gpt-3.5-turbo".to_string()),
|
||||
cwd: Some(fixture.cwd()),
|
||||
..Default::default()
|
||||
};
|
||||
let known_cfg = Config::load_from_base_config_with_overrides(
|
||||
fixture.cfg.clone(),
|
||||
known_overrides,
|
||||
fixture.codex_home(),
|
||||
)?;
|
||||
assert!(
|
||||
!known_cfg.include_plan_tool,
|
||||
"plan tool should default to OFF for known models"
|
||||
);
|
||||
assert!(
|
||||
!known_cfg.model_supports_reasoning_summaries,
|
||||
"reasoning summaries override should default to OFF for known models"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_toml_parsing() {
|
||||
let history_with_persistence = r#"
|
||||
|
||||
@@ -80,7 +80,7 @@ pub async fn run_main(
|
||||
config_profile: cli.config_profile.clone(),
|
||||
codex_linux_sandbox_exe,
|
||||
base_instructions: None,
|
||||
include_plan_tool: Some(true),
|
||||
include_plan_tool: None,
|
||||
};
|
||||
// Parse `-c` overrides from the CLI.
|
||||
let cli_kv_overrides = match cli.config_overrides.parse_overrides() {
|
||||
|
||||
Reference in New Issue
Block a user