mirror of
https://github.com/openai/codex.git
synced 2026-02-03 07:23:39 +00:00
Compare commits
13 Commits
dh--reenab
...
patch-1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5ebfafab97 | ||
|
|
4ce63c70a8 | ||
|
|
e201a3ea55 | ||
|
|
7d9ad3effd | ||
|
|
c3a710ee14 | ||
|
|
29364f3a9b | ||
|
|
e9abecfb68 | ||
|
|
530db0ad73 | ||
|
|
286cb2a021 | ||
|
|
8c3a2b1302 | ||
|
|
8258ad88a0 | ||
|
|
3de1e54474 | ||
|
|
2d3387169c |
@@ -151,6 +151,15 @@ mod tests {
|
||||
assert_eq!(v.as_integer(), Some(42));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_bool() {
|
||||
let true_literal = parse_toml_value("true").expect("parse");
|
||||
assert_eq!(true_literal.as_bool(), Some(true));
|
||||
|
||||
let false_literal = parse_toml_value("false").expect("parse");
|
||||
assert_eq!(false_literal.as_bool(), Some(false));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fails_on_unquoted_string() {
|
||||
assert!(parse_toml_value("hello").is_err());
|
||||
|
||||
@@ -104,7 +104,7 @@ pub fn extract_bash_command(command: &[String]) -> Option<(&str, &str)> {
|
||||
let [shell, flag, script] = command else {
|
||||
return None;
|
||||
};
|
||||
if flag != "-lc" || !is_well_known_sh_shell(shell) {
|
||||
if !matches!(flag.as_str(), "-lc" | "-c") || !is_well_known_sh_shell(shell) {
|
||||
return None;
|
||||
}
|
||||
Some((shell, script))
|
||||
|
||||
@@ -97,6 +97,7 @@ use crate::protocol::Submission;
|
||||
use crate::protocol::TokenCountEvent;
|
||||
use crate::protocol::TokenUsage;
|
||||
use crate::protocol::TurnDiffEvent;
|
||||
use crate::protocol::WarningEvent;
|
||||
use crate::rollout::RolloutRecorder;
|
||||
use crate::rollout::RolloutRecorderParams;
|
||||
use crate::shell;
|
||||
@@ -674,6 +675,34 @@ impl Session {
|
||||
let rollout_items = conversation_history.get_rollout_items();
|
||||
let persist = matches!(conversation_history, InitialHistory::Forked(_));
|
||||
|
||||
// If resuming, warn when the last recorded model differs from the current one.
|
||||
if let InitialHistory::Resumed(_) = conversation_history
|
||||
&& let Some(prev) = rollout_items.iter().rev().find_map(|it| {
|
||||
if let RolloutItem::TurnContext(ctx) = it {
|
||||
Some(ctx.model.as_str())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
{
|
||||
let curr = turn_context.client.get_model();
|
||||
if prev != curr {
|
||||
warn!(
|
||||
"resuming session with different model: previous={prev}, current={curr}"
|
||||
);
|
||||
self.send_event(
|
||||
&turn_context,
|
||||
EventMsg::Warning(WarningEvent {
|
||||
message: format!(
|
||||
"This session was recorded with model `{prev}` but is resuming with `{curr}`. \
|
||||
Consider switching back to `{prev}` as it may affect Codex performance."
|
||||
),
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
|
||||
// Always add response items to conversation history
|
||||
let reconstructed_history =
|
||||
self.reconstruct_history_from_rollout(&turn_context, &rollout_items);
|
||||
|
||||
@@ -29,6 +29,9 @@ pub enum Stage {
|
||||
pub enum Feature {
|
||||
/// Use the single unified PTY-backed exec tool.
|
||||
UnifiedExec,
|
||||
/// Use the shell command tool that takes `command` as a single string of
|
||||
/// shell instead of an array of args passed to `execvp(3)`.
|
||||
ShellCommandTool,
|
||||
/// Enable experimental RMCP features such as OAuth login.
|
||||
RmcpClient,
|
||||
/// Include the freeform apply_patch tool.
|
||||
@@ -250,6 +253,12 @@ pub const FEATURES: &[FeatureSpec] = &[
|
||||
stage: Stage::Experimental,
|
||||
default_enabled: false,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::ShellCommandTool,
|
||||
key: "shell_command_tool",
|
||||
stage: Stage::Experimental,
|
||||
default_enabled: false,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::RmcpClient,
|
||||
key: "rmcp_client",
|
||||
|
||||
@@ -31,16 +31,37 @@ pub enum Shell {
|
||||
impl Shell {
|
||||
pub fn name(&self) -> Option<String> {
|
||||
match self {
|
||||
Shell::Zsh(zsh) => std::path::Path::new(&zsh.shell_path)
|
||||
.file_name()
|
||||
.map(|s| s.to_string_lossy().to_string()),
|
||||
Shell::Bash(bash) => std::path::Path::new(&bash.shell_path)
|
||||
.file_name()
|
||||
.map(|s| s.to_string_lossy().to_string()),
|
||||
Shell::Zsh(ZshShell { shell_path, .. }) | Shell::Bash(BashShell { shell_path, .. }) => {
|
||||
std::path::Path::new(shell_path)
|
||||
.file_name()
|
||||
.map(|s| s.to_string_lossy().to_string())
|
||||
}
|
||||
Shell::PowerShell(ps) => Some(ps.exe.clone()),
|
||||
Shell::Unknown => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Takes a string of shell and returns the full list of command args to
|
||||
/// use with `exec()` to run the shell command.
|
||||
pub fn derive_exec_args(&self, command: &str, use_login_shell: bool) -> Vec<String> {
|
||||
match self {
|
||||
Shell::Zsh(ZshShell { shell_path, .. }) | Shell::Bash(BashShell { shell_path, .. }) => {
|
||||
let arg = if use_login_shell { "-lc" } else { "-c" };
|
||||
vec![shell_path.clone(), arg.to_string(), command.to_string()]
|
||||
}
|
||||
Shell::PowerShell(ps) => {
|
||||
let mut args = vec![ps.exe.clone(), "-NoLogo".to_string()];
|
||||
if !use_login_shell {
|
||||
args.push("-NoProfile".to_string());
|
||||
}
|
||||
|
||||
args.push("-Command".to_string());
|
||||
args.push(command.to_string());
|
||||
args
|
||||
}
|
||||
Shell::Unknown => shlex::split(command).unwrap_or_else(|| vec![command.to_string()]),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
|
||||
@@ -63,27 +63,10 @@ impl SessionTask for UserShellCommandTask {
|
||||
// Execute the user's script under their default shell when known; this
|
||||
// allows commands that use shell features (pipes, &&, redirects, etc.).
|
||||
// We do not source rc files or otherwise reformat the script.
|
||||
let shell_invocation = match session.user_shell() {
|
||||
crate::shell::Shell::Zsh(zsh) => vec![
|
||||
zsh.shell_path.clone(),
|
||||
"-lc".to_string(),
|
||||
self.command.clone(),
|
||||
],
|
||||
crate::shell::Shell::Bash(bash) => vec![
|
||||
bash.shell_path.clone(),
|
||||
"-lc".to_string(),
|
||||
self.command.clone(),
|
||||
],
|
||||
crate::shell::Shell::PowerShell(ps) => vec![
|
||||
ps.exe.clone(),
|
||||
"-NoProfile".to_string(),
|
||||
"-Command".to_string(),
|
||||
self.command.clone(),
|
||||
],
|
||||
crate::shell::Shell::Unknown => {
|
||||
shlex::split(&self.command).unwrap_or_else(|| vec![self.command.clone()])
|
||||
}
|
||||
};
|
||||
let use_login_shell = true;
|
||||
let shell_invocation = session
|
||||
.user_shell()
|
||||
.derive_exec_args(&self.command, use_login_shell);
|
||||
|
||||
let call_id = Uuid::new_v4().to_string();
|
||||
let raw_command = self.command.clone();
|
||||
|
||||
@@ -19,6 +19,7 @@ pub use mcp::McpHandler;
|
||||
pub use mcp_resource::McpResourceHandler;
|
||||
pub use plan::PlanHandler;
|
||||
pub use read_file::ReadFileHandler;
|
||||
pub use shell::ShellCommandHandler;
|
||||
pub use shell::ShellHandler;
|
||||
pub use test_sync::TestSyncHandler;
|
||||
pub use unified_exec::UnifiedExecHandler;
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use async_trait::async_trait;
|
||||
use codex_protocol::models::ShellCommandToolCallParams;
|
||||
use codex_protocol::models::ShellToolCallParams;
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -25,6 +26,8 @@ use crate::tools::sandboxing::ToolCtx;
|
||||
|
||||
pub struct ShellHandler;
|
||||
|
||||
pub struct ShellCommandHandler;
|
||||
|
||||
impl ShellHandler {
|
||||
fn to_exec_params(params: ShellToolCallParams, turn_context: &TurnContext) -> ExecParams {
|
||||
ExecParams {
|
||||
@@ -39,6 +42,28 @@ impl ShellHandler {
|
||||
}
|
||||
}
|
||||
|
||||
impl ShellCommandHandler {
|
||||
fn to_exec_params(
|
||||
params: ShellCommandToolCallParams,
|
||||
session: &crate::codex::Session,
|
||||
turn_context: &TurnContext,
|
||||
) -> ExecParams {
|
||||
let shell = session.user_shell();
|
||||
let use_login_shell = true;
|
||||
let command = shell.derive_exec_args(¶ms.command, use_login_shell);
|
||||
|
||||
ExecParams {
|
||||
command,
|
||||
cwd: turn_context.resolve_path(params.workdir.clone()),
|
||||
timeout_ms: params.timeout_ms,
|
||||
env: create_env(&turn_context.shell_environment_policy),
|
||||
with_escalated_permissions: params.with_escalated_permissions,
|
||||
justification: params.justification,
|
||||
arg0: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ToolHandler for ShellHandler {
|
||||
fn kind(&self) -> ToolKind {
|
||||
@@ -102,6 +127,49 @@ impl ToolHandler for ShellHandler {
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ToolHandler for ShellCommandHandler {
|
||||
fn kind(&self) -> ToolKind {
|
||||
ToolKind::Function
|
||||
}
|
||||
|
||||
fn matches_kind(&self, payload: &ToolPayload) -> bool {
|
||||
matches!(payload, ToolPayload::Function { .. })
|
||||
}
|
||||
|
||||
async fn handle(&self, invocation: ToolInvocation) -> Result<ToolOutput, FunctionCallError> {
|
||||
let ToolInvocation {
|
||||
session,
|
||||
turn,
|
||||
tracker,
|
||||
call_id,
|
||||
tool_name,
|
||||
payload,
|
||||
} = invocation;
|
||||
|
||||
let ToolPayload::Function { arguments } = payload else {
|
||||
return Err(FunctionCallError::RespondToModel(format!(
|
||||
"unsupported payload for shell_command handler: {tool_name}"
|
||||
)));
|
||||
};
|
||||
|
||||
let params: ShellCommandToolCallParams = serde_json::from_str(&arguments).map_err(|e| {
|
||||
FunctionCallError::RespondToModel(format!("failed to parse function arguments: {e:?}"))
|
||||
})?;
|
||||
let exec_params = Self::to_exec_params(params, session.as_ref(), turn.as_ref());
|
||||
ShellHandler::run_exec_like(
|
||||
tool_name.as_str(),
|
||||
exec_params,
|
||||
session,
|
||||
turn,
|
||||
tracker,
|
||||
call_id,
|
||||
false,
|
||||
)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
impl ShellHandler {
|
||||
async fn run_exec_like(
|
||||
tool_name: &str,
|
||||
@@ -240,3 +308,49 @@ impl ShellHandler {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::is_safe_command::is_known_safe_command;
|
||||
use crate::shell::BashShell;
|
||||
use crate::shell::Shell;
|
||||
use crate::shell::ZshShell;
|
||||
|
||||
/// The logic for is_known_safe_command() has heuristics for known shells,
|
||||
/// so we must ensure the commands generated by [ShellCommandHandler] can be
|
||||
/// recognized as safe if the `command` is safe.
|
||||
#[test]
|
||||
fn commands_generated_by_shell_command_handler_can_be_matched_by_is_known_safe_command() {
|
||||
let bash_shell = Shell::Bash(BashShell {
|
||||
shell_path: "/bin/bash".to_string(),
|
||||
bashrc_path: "/home/user/.bashrc".to_string(),
|
||||
});
|
||||
assert_safe(&bash_shell, "ls -la");
|
||||
|
||||
let zsh_shell = Shell::Zsh(ZshShell {
|
||||
shell_path: "/bin/zsh".to_string(),
|
||||
zshrc_path: "/home/user/.zshrc".to_string(),
|
||||
});
|
||||
assert_safe(&zsh_shell, "ls -la");
|
||||
|
||||
#[cfg(target_os = "windows")]
|
||||
{
|
||||
use crate::shell::PowerShellConfig;
|
||||
|
||||
let powershell = Shell::PowerShell(PowerShellConfig {
|
||||
exe: "pwsh.exe".to_string(),
|
||||
bash_exe_fallback: None,
|
||||
});
|
||||
assert_safe(&powershell, "ls -Name");
|
||||
}
|
||||
}
|
||||
|
||||
fn assert_safe(shell: &Shell, command: &str) {
|
||||
assert!(is_known_safe_command(
|
||||
&shell.derive_exec_args(command, /* use_login_shell */ true)
|
||||
));
|
||||
assert!(is_known_safe_command(
|
||||
&shell.derive_exec_args(command, /* use_login_shell */ false)
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,6 +20,8 @@ pub enum ConfigShellToolType {
|
||||
Default,
|
||||
Local,
|
||||
UnifiedExec,
|
||||
/// Takes a command as a single string to be run in the user's default shell.
|
||||
ShellCommand,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
@@ -48,6 +50,8 @@ impl ToolsConfig {
|
||||
|
||||
let shell_type = if features.enabled(Feature::UnifiedExec) {
|
||||
ConfigShellToolType::UnifiedExec
|
||||
} else if features.enabled(Feature::ShellCommandTool) {
|
||||
ConfigShellToolType::ShellCommand
|
||||
} else {
|
||||
model_family.shell_type.clone()
|
||||
};
|
||||
@@ -302,6 +306,53 @@ fn create_shell_tool() -> ToolSpec {
|
||||
})
|
||||
}
|
||||
|
||||
fn create_shell_command_tool() -> ToolSpec {
|
||||
let mut properties = BTreeMap::new();
|
||||
properties.insert(
|
||||
"command".to_string(),
|
||||
JsonSchema::String {
|
||||
description: Some(
|
||||
"The shell script to execute in the user's default shell".to_string(),
|
||||
),
|
||||
},
|
||||
);
|
||||
properties.insert(
|
||||
"workdir".to_string(),
|
||||
JsonSchema::String {
|
||||
description: Some("The working directory to execute the command in".to_string()),
|
||||
},
|
||||
);
|
||||
properties.insert(
|
||||
"timeout_ms".to_string(),
|
||||
JsonSchema::Number {
|
||||
description: Some("The timeout for the command in milliseconds".to_string()),
|
||||
},
|
||||
);
|
||||
properties.insert(
|
||||
"with_escalated_permissions".to_string(),
|
||||
JsonSchema::Boolean {
|
||||
description: Some("Whether to request escalated permissions. Set to true if command needs to be run without sandbox restrictions".to_string()),
|
||||
},
|
||||
);
|
||||
properties.insert(
|
||||
"justification".to_string(),
|
||||
JsonSchema::String {
|
||||
description: Some("Only set if with_escalated_permissions is true. 1-sentence explanation of why we want to run this command.".to_string()),
|
||||
},
|
||||
);
|
||||
|
||||
ToolSpec::Function(ResponsesApiTool {
|
||||
name: "shell_command".to_string(),
|
||||
description: "Runs a shell command string and returns its output.".to_string(),
|
||||
strict: false,
|
||||
parameters: JsonSchema::Object {
|
||||
properties,
|
||||
required: Some(vec!["command".to_string()]),
|
||||
additional_properties: Some(false.into()),
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
fn create_view_image_tool() -> ToolSpec {
|
||||
// Support only local filesystem path.
|
||||
let mut properties = BTreeMap::new();
|
||||
@@ -891,6 +942,7 @@ pub(crate) fn build_specs(
|
||||
use crate::tools::handlers::McpResourceHandler;
|
||||
use crate::tools::handlers::PlanHandler;
|
||||
use crate::tools::handlers::ReadFileHandler;
|
||||
use crate::tools::handlers::ShellCommandHandler;
|
||||
use crate::tools::handlers::ShellHandler;
|
||||
use crate::tools::handlers::TestSyncHandler;
|
||||
use crate::tools::handlers::UnifiedExecHandler;
|
||||
@@ -906,6 +958,7 @@ pub(crate) fn build_specs(
|
||||
let view_image_handler = Arc::new(ViewImageHandler);
|
||||
let mcp_handler = Arc::new(McpHandler);
|
||||
let mcp_resource_handler = Arc::new(McpResourceHandler);
|
||||
let shell_command_handler = Arc::new(ShellCommandHandler);
|
||||
|
||||
match &config.shell_type {
|
||||
ConfigShellToolType::Default => {
|
||||
@@ -920,12 +973,16 @@ pub(crate) fn build_specs(
|
||||
builder.register_handler("exec_command", unified_exec_handler.clone());
|
||||
builder.register_handler("write_stdin", unified_exec_handler);
|
||||
}
|
||||
ConfigShellToolType::ShellCommand => {
|
||||
builder.push_spec(create_shell_command_tool());
|
||||
}
|
||||
}
|
||||
|
||||
// Always register shell aliases so older prompts remain compatible.
|
||||
builder.register_handler("shell", shell_handler.clone());
|
||||
builder.register_handler("container.exec", shell_handler.clone());
|
||||
builder.register_handler("local_shell", shell_handler);
|
||||
builder.register_handler("shell_command", shell_command_handler);
|
||||
|
||||
builder.push_spec_with_parallel_support(create_list_mcp_resources_tool(), true);
|
||||
builder.push_spec_with_parallel_support(create_list_mcp_resource_templates_tool(), true);
|
||||
@@ -1061,6 +1118,7 @@ mod tests {
|
||||
ConfigShellToolType::Default => Some("shell"),
|
||||
ConfigShellToolType::Local => Some("local_shell"),
|
||||
ConfigShellToolType::UnifiedExec => None,
|
||||
ConfigShellToolType::ShellCommand => Some("shell_command"),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1293,6 +1351,22 @@ mod tests {
|
||||
assert_contains_tool_names(&tools, &subset);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_specs_shell_command_present() {
|
||||
assert_model_tools(
|
||||
"codex-mini-latest",
|
||||
Features::with_defaults().enable(Feature::ShellCommandTool),
|
||||
&[
|
||||
"shell_command",
|
||||
"list_mcp_resources",
|
||||
"list_mcp_resource_templates",
|
||||
"read_mcp_resource",
|
||||
"update_plan",
|
||||
"view_image",
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn test_parallel_support_flags() {
|
||||
@@ -1748,6 +1822,21 @@ mod tests {
|
||||
assert_eq!(description, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_shell_command_tool() {
|
||||
let tool = super::create_shell_command_tool();
|
||||
let ToolSpec::Function(ResponsesApiTool {
|
||||
description, name, ..
|
||||
}) = &tool
|
||||
else {
|
||||
panic!("expected function tool");
|
||||
};
|
||||
assert_eq!(name, "shell_command");
|
||||
|
||||
let expected = "Runs a shell command string and returns its output.";
|
||||
assert_eq!(description, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_openai_tools_mcp_tools_with_additional_properties_schema() {
|
||||
let model_family = find_family_for_model("gpt-5-codex")
|
||||
|
||||
@@ -446,12 +446,6 @@ pub async fn mount_sse_once(server: &MockServer, body: String) -> ResponseMock {
|
||||
response_mock
|
||||
}
|
||||
|
||||
pub async fn mount_sse(server: &MockServer, body: String) -> ResponseMock {
|
||||
let (mock, response_mock) = base_mock();
|
||||
mock.respond_with(sse_response(body)).mount(server).await;
|
||||
response_mock
|
||||
}
|
||||
|
||||
pub async fn start_mock_server() -> MockServer {
|
||||
MockServer::builder()
|
||||
.body_print_limit(BodyPrintLimit::Limited(80_000))
|
||||
|
||||
@@ -9,7 +9,6 @@ use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_custom_tool_call;
|
||||
use core_test_support::responses::ev_function_call;
|
||||
use core_test_support::responses::mount_sse;
|
||||
use core_test_support::responses::mount_sse_once;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
@@ -103,8 +102,6 @@ async fn process_sse_emits_failed_event_on_parse_error() {
|
||||
let TestCodex { codex, .. } = test_codex()
|
||||
.with_config(move |config| {
|
||||
config.features.disable(Feature::GhostCommit);
|
||||
config.model_provider.request_max_retries = Some(0);
|
||||
config.model_provider.stream_max_retries = Some(0);
|
||||
})
|
||||
.build(&server)
|
||||
.await
|
||||
@@ -144,8 +141,6 @@ async fn process_sse_records_failed_event_when_stream_closes_without_completed()
|
||||
let TestCodex { codex, .. } = test_codex()
|
||||
.with_config(move |config| {
|
||||
config.features.disable(Feature::GhostCommit);
|
||||
config.model_provider.request_max_retries = Some(0);
|
||||
config.model_provider.stream_max_retries = Some(0);
|
||||
})
|
||||
.build(&server)
|
||||
.await
|
||||
@@ -193,12 +188,18 @@ async fn process_sse_failed_event_records_response_error_message() {
|
||||
})]),
|
||||
)
|
||||
.await;
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "local shell done"),
|
||||
ev_completed("done"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let TestCodex { codex, .. } = test_codex()
|
||||
.with_config(move |config| {
|
||||
config.features.disable(Feature::GhostCommit);
|
||||
config.model_provider.request_max_retries = Some(0);
|
||||
config.model_provider.stream_max_retries = Some(0);
|
||||
})
|
||||
.build(&server)
|
||||
.await
|
||||
@@ -244,12 +245,18 @@ async fn process_sse_failed_event_logs_parse_error() {
|
||||
})]),
|
||||
)
|
||||
.await;
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "local shell done"),
|
||||
ev_completed("done"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let TestCodex { codex, .. } = test_codex()
|
||||
.with_config(move |config| {
|
||||
config.features.disable(Feature::GhostCommit);
|
||||
config.model_provider.request_max_retries = Some(0);
|
||||
config.model_provider.stream_max_retries = Some(0);
|
||||
})
|
||||
.build(&server)
|
||||
.await
|
||||
@@ -294,8 +301,6 @@ async fn process_sse_failed_event_logs_missing_error() {
|
||||
let TestCodex { codex, .. } = test_codex()
|
||||
.with_config(move |config| {
|
||||
config.features.disable(Feature::GhostCommit);
|
||||
config.model_provider.request_max_retries = Some(0);
|
||||
config.model_provider.stream_max_retries = Some(0);
|
||||
})
|
||||
.build(&server)
|
||||
.await
|
||||
@@ -337,11 +342,18 @@ async fn process_sse_failed_event_logs_response_completed_parse_error() {
|
||||
)
|
||||
.await;
|
||||
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "local shell done"),
|
||||
ev_completed("done"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let TestCodex { codex, .. } = test_codex()
|
||||
.with_config(move |config| {
|
||||
config.features.disable(Feature::GhostCommit);
|
||||
config.model_provider.request_max_retries = Some(0);
|
||||
config.model_provider.stream_max_retries = Some(0);
|
||||
})
|
||||
.build(&server)
|
||||
.await
|
||||
@@ -430,7 +442,7 @@ async fn process_sse_emits_completed_telemetry() {
|
||||
async fn handle_response_item_records_tool_result_for_custom_tool_call() {
|
||||
let server = start_mock_server().await;
|
||||
|
||||
mount_sse(
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_custom_tool_call(
|
||||
@@ -442,12 +454,18 @@ async fn handle_response_item_records_tool_result_for_custom_tool_call() {
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "local shell done"),
|
||||
ev_completed("done"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let TestCodex { codex, .. } = test_codex()
|
||||
.with_config(move |config| {
|
||||
config.features.disable(Feature::GhostCommit);
|
||||
config.model_provider.request_max_retries = Some(0);
|
||||
config.model_provider.stream_max_retries = Some(0);
|
||||
})
|
||||
.build(&server)
|
||||
.await
|
||||
@@ -494,7 +512,7 @@ async fn handle_response_item_records_tool_result_for_custom_tool_call() {
|
||||
async fn handle_response_item_records_tool_result_for_function_call() {
|
||||
let server = start_mock_server().await;
|
||||
|
||||
mount_sse(
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_function_call("function-call", "nonexistent", "{\"value\":1}"),
|
||||
@@ -503,11 +521,18 @@ async fn handle_response_item_records_tool_result_for_function_call() {
|
||||
)
|
||||
.await;
|
||||
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "local shell done"),
|
||||
ev_completed("done"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let TestCodex { codex, .. } = test_codex()
|
||||
.with_config(move |config| {
|
||||
config.features.disable(Feature::GhostCommit);
|
||||
config.model_provider.request_max_retries = Some(0);
|
||||
config.model_provider.stream_max_retries = Some(0);
|
||||
})
|
||||
.build(&server)
|
||||
.await
|
||||
@@ -554,7 +579,7 @@ async fn handle_response_item_records_tool_result_for_function_call() {
|
||||
async fn handle_response_item_records_tool_result_for_local_shell_missing_ids() {
|
||||
let server = start_mock_server().await;
|
||||
|
||||
mount_sse(
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
serde_json::json!({
|
||||
@@ -573,11 +598,18 @@ async fn handle_response_item_records_tool_result_for_local_shell_missing_ids()
|
||||
)
|
||||
.await;
|
||||
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "local shell done"),
|
||||
ev_completed("done"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let TestCodex { codex, .. } = test_codex()
|
||||
.with_config(move |config| {
|
||||
config.features.disable(Feature::GhostCommit);
|
||||
config.model_provider.request_max_retries = Some(0);
|
||||
config.model_provider.stream_max_retries = Some(0);
|
||||
})
|
||||
.build(&server)
|
||||
.await
|
||||
@@ -618,7 +650,7 @@ async fn handle_response_item_records_tool_result_for_local_shell_missing_ids()
|
||||
async fn handle_response_item_records_tool_result_for_local_shell_call() {
|
||||
let server = start_mock_server().await;
|
||||
|
||||
mount_sse(
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_local_shell_call("shell-call", "completed", vec!["/bin/echo", "shell"]),
|
||||
@@ -627,11 +659,18 @@ async fn handle_response_item_records_tool_result_for_local_shell_call() {
|
||||
)
|
||||
.await;
|
||||
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "local shell done"),
|
||||
ev_completed("done"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let TestCodex { codex, .. } = test_codex()
|
||||
.with_config(move |config| {
|
||||
config.features.disable(Feature::GhostCommit);
|
||||
config.model_provider.request_max_retries = Some(0);
|
||||
config.model_provider.stream_max_retries = Some(0);
|
||||
})
|
||||
.build(&server)
|
||||
.await
|
||||
@@ -710,10 +749,23 @@ fn tool_decision_assertion<'a>(
|
||||
#[traced_test]
|
||||
async fn handle_container_exec_autoapprove_from_config_records_tool_decision() {
|
||||
let server = start_mock_server().await;
|
||||
mount_sse(
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_local_shell_call("auto_config_call", "completed", vec!["/bin/echo", "hello"]),
|
||||
ev_local_shell_call(
|
||||
"auto_config_call",
|
||||
"completed",
|
||||
vec!["/bin/echo", "local shell"],
|
||||
),
|
||||
ev_completed("done"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "local shell done"),
|
||||
ev_completed("done"),
|
||||
]),
|
||||
)
|
||||
@@ -723,8 +775,6 @@ async fn handle_container_exec_autoapprove_from_config_records_tool_decision() {
|
||||
.with_config(|config| {
|
||||
config.approval_policy = AskForApproval::OnRequest;
|
||||
config.sandbox_policy = SandboxPolicy::DangerFullAccess;
|
||||
config.model_provider.request_max_retries = Some(0);
|
||||
config.model_provider.stream_max_retries = Some(0);
|
||||
})
|
||||
.build(&server)
|
||||
.await
|
||||
@@ -739,7 +789,7 @@ async fn handle_container_exec_autoapprove_from_config_records_tool_decision() {
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TokenCount(_))).await;
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
logs_assert(tool_decision_assertion(
|
||||
"auto_config_call",
|
||||
@@ -752,7 +802,7 @@ async fn handle_container_exec_autoapprove_from_config_records_tool_decision() {
|
||||
#[traced_test]
|
||||
async fn handle_container_exec_user_approved_records_tool_decision() {
|
||||
let server = start_mock_server().await;
|
||||
mount_sse(
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_local_shell_call("user_approved_call", "completed", vec!["/bin/date"]),
|
||||
@@ -761,11 +811,18 @@ async fn handle_container_exec_user_approved_records_tool_decision() {
|
||||
)
|
||||
.await;
|
||||
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "local shell done"),
|
||||
ev_completed("done"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let TestCodex { codex, .. } = test_codex()
|
||||
.with_config(|config| {
|
||||
config.approval_policy = AskForApproval::UnlessTrusted;
|
||||
config.model_provider.request_max_retries = Some(0);
|
||||
config.model_provider.stream_max_retries = Some(0);
|
||||
})
|
||||
.build(&server)
|
||||
.await
|
||||
@@ -804,7 +861,7 @@ async fn handle_container_exec_user_approved_records_tool_decision() {
|
||||
async fn handle_container_exec_user_approved_for_session_records_tool_decision() {
|
||||
let server = start_mock_server().await;
|
||||
|
||||
mount_sse(
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_local_shell_call("user_approved_session_call", "completed", vec!["/bin/date"]),
|
||||
@@ -812,12 +869,18 @@ async fn handle_container_exec_user_approved_for_session_records_tool_decision()
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "local shell done"),
|
||||
ev_completed("done"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let TestCodex { codex, .. } = test_codex()
|
||||
.with_config(|config| {
|
||||
config.approval_policy = AskForApproval::UnlessTrusted;
|
||||
config.model_provider.request_max_retries = Some(0);
|
||||
config.model_provider.stream_max_retries = Some(0);
|
||||
})
|
||||
.build(&server)
|
||||
.await
|
||||
@@ -856,7 +919,7 @@ async fn handle_container_exec_user_approved_for_session_records_tool_decision()
|
||||
async fn handle_sandbox_error_user_approves_retry_records_tool_decision() {
|
||||
let server = start_mock_server().await;
|
||||
|
||||
mount_sse(
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_local_shell_call("sandbox_retry_call", "completed", vec!["/bin/date"]),
|
||||
@@ -864,12 +927,18 @@ async fn handle_sandbox_error_user_approves_retry_records_tool_decision() {
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "local shell done"),
|
||||
ev_completed("done"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let TestCodex { codex, .. } = test_codex()
|
||||
.with_config(|config| {
|
||||
config.approval_policy = AskForApproval::UnlessTrusted;
|
||||
config.model_provider.request_max_retries = Some(0);
|
||||
config.model_provider.stream_max_retries = Some(0);
|
||||
})
|
||||
.build(&server)
|
||||
.await
|
||||
@@ -908,7 +977,7 @@ async fn handle_sandbox_error_user_approves_retry_records_tool_decision() {
|
||||
async fn handle_container_exec_user_denies_records_tool_decision() {
|
||||
let server = start_mock_server().await;
|
||||
|
||||
mount_sse(
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_local_shell_call("user_denied_call", "completed", vec!["/bin/date"]),
|
||||
@@ -917,11 +986,17 @@ async fn handle_container_exec_user_denies_records_tool_decision() {
|
||||
)
|
||||
.await;
|
||||
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "local shell done"),
|
||||
ev_completed("done"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let TestCodex { codex, .. } = test_codex()
|
||||
.with_config(|config| {
|
||||
config.approval_policy = AskForApproval::UnlessTrusted;
|
||||
config.model_provider.request_max_retries = Some(0);
|
||||
config.model_provider.stream_max_retries = Some(0);
|
||||
})
|
||||
.build(&server)
|
||||
.await
|
||||
@@ -960,7 +1035,7 @@ async fn handle_container_exec_user_denies_records_tool_decision() {
|
||||
async fn handle_sandbox_error_user_approves_for_session_records_tool_decision() {
|
||||
let server = start_mock_server().await;
|
||||
|
||||
mount_sse(
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_local_shell_call("sandbox_session_call", "completed", vec!["/bin/date"]),
|
||||
@@ -968,12 +1043,18 @@ async fn handle_sandbox_error_user_approves_for_session_records_tool_decision()
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "local shell done"),
|
||||
ev_completed("done"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let TestCodex { codex, .. } = test_codex()
|
||||
.with_config(|config| {
|
||||
config.approval_policy = AskForApproval::UnlessTrusted;
|
||||
config.model_provider.request_max_retries = Some(0);
|
||||
config.model_provider.stream_max_retries = Some(0);
|
||||
})
|
||||
.build(&server)
|
||||
.await
|
||||
@@ -1012,7 +1093,7 @@ async fn handle_sandbox_error_user_approves_for_session_records_tool_decision()
|
||||
async fn handle_sandbox_error_user_denies_records_tool_decision() {
|
||||
let server = start_mock_server().await;
|
||||
|
||||
mount_sse(
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_local_shell_call("sandbox_deny_call", "completed", vec!["/bin/date"]),
|
||||
@@ -1021,11 +1102,18 @@ async fn handle_sandbox_error_user_denies_records_tool_decision() {
|
||||
)
|
||||
.await;
|
||||
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "local shell done"),
|
||||
ev_completed("done"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let TestCodex { codex, .. } = test_codex()
|
||||
.with_config(|config| {
|
||||
config.approval_policy = AskForApproval::UnlessTrusted;
|
||||
config.model_provider.request_max_retries = Some(0);
|
||||
config.model_provider.stream_max_retries = Some(0);
|
||||
})
|
||||
.build(&server)
|
||||
.await
|
||||
|
||||
70
codex-rs/core/tests/suite/resume_warning.rs
Normal file
70
codex-rs/core/tests/suite/resume_warning.rs
Normal file
@@ -0,0 +1,70 @@
|
||||
#![allow(clippy::unwrap_used, clippy::expect_used)]
|
||||
|
||||
use codex_core::AuthManager;
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::ConversationManager;
|
||||
use codex_core::NewConversation;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::InitialHistory;
|
||||
use codex_core::protocol::ResumedHistory;
|
||||
use codex_core::protocol::RolloutItem;
|
||||
use codex_core::protocol::TurnContextItem;
|
||||
use codex_core::protocol::WarningEvent;
|
||||
use codex_protocol::ConversationId;
|
||||
use core::time::Duration;
|
||||
use core_test_support::load_default_config_for_test;
|
||||
use core_test_support::wait_for_event;
|
||||
use tempfile::TempDir;
|
||||
|
||||
fn resume_history(config: &codex_core::config::Config, previous_model: &str, rollout_path: &std::path::Path) -> InitialHistory {
|
||||
let turn_ctx = TurnContextItem {
|
||||
cwd: config.cwd.clone(),
|
||||
approval_policy: config.approval_policy,
|
||||
sandbox_policy: config.sandbox_policy.clone(),
|
||||
model: previous_model.to_string(),
|
||||
effort: config.model_reasoning_effort,
|
||||
summary: config.model_reasoning_summary,
|
||||
};
|
||||
|
||||
InitialHistory::Resumed(ResumedHistory {
|
||||
conversation_id: ConversationId::default(),
|
||||
history: vec![RolloutItem::TurnContext(turn_ctx)],
|
||||
rollout_path: rollout_path.to_path_buf(),
|
||||
})
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn emits_warning_when_resumed_model_differs() {
|
||||
// Arrange a config with a current model and a prior rollout recorded under a different model.
|
||||
let home = TempDir::new().expect("tempdir");
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
config.model = "current-model".to_string();
|
||||
// Ensure cwd is absolute (the helper sets it to the temp dir already).
|
||||
assert!(config.cwd.is_absolute());
|
||||
|
||||
let rollout_path = home.path().join("rollout.jsonl");
|
||||
std::fs::write(&rollout_path, "").expect("create rollout placeholder");
|
||||
|
||||
let initial_history = resume_history(&config, "previous-model", &rollout_path);
|
||||
|
||||
let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("test"));
|
||||
let auth_manager = AuthManager::from_auth_for_testing(CodexAuth::from_api_key("test"));
|
||||
|
||||
// Act: resume the conversation.
|
||||
let NewConversation { conversation, .. } = conversation_manager
|
||||
.resume_conversation_with_history(config, initial_history, auth_manager)
|
||||
.await
|
||||
.expect("resume conversation");
|
||||
|
||||
// Assert: a Warning event is emitted describing the model mismatch.
|
||||
let warning = wait_for_event(&conversation, |ev| matches!(ev, EventMsg::Warning(_))).await;
|
||||
let EventMsg::Warning(WarningEvent { message }) = warning else {
|
||||
panic!("expected warning event");
|
||||
};
|
||||
assert!(message.contains("previous-model"));
|
||||
assert!(message.contains("current-model"));
|
||||
|
||||
// Drain the TaskComplete/Shutdown window to avoid leaking tasks between tests.
|
||||
// The warning is emitted during initialization, so a short sleep is sufficient.
|
||||
tokio::time::sleep(Duration::from_millis(50)).await;
|
||||
}
|
||||
@@ -292,7 +292,7 @@ impl From<Vec<UserInput>> for ResponseInputItem {
|
||||
}
|
||||
|
||||
/// If the `name` of a `ResponseItem::FunctionCall` is either `container.exec`
|
||||
/// or shell`, the `arguments` field should deserialize to this struct.
|
||||
/// or `shell`, the `arguments` field should deserialize to this struct.
|
||||
#[derive(Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
pub struct ShellToolCallParams {
|
||||
pub command: Vec<String>,
|
||||
@@ -307,6 +307,22 @@ pub struct ShellToolCallParams {
|
||||
pub justification: Option<String>,
|
||||
}
|
||||
|
||||
/// If the `name` of a `ResponseItem::FunctionCall` is `shell_command`, the
|
||||
/// `arguments` field should deserialize to this struct.
|
||||
#[derive(Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
pub struct ShellCommandToolCallParams {
|
||||
pub command: String,
|
||||
pub workdir: Option<String>,
|
||||
|
||||
/// This is the maximum time in milliseconds that the command is allowed to run.
|
||||
#[serde(alias = "timeout")]
|
||||
pub timeout_ms: Option<u64>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub justification: Option<String>,
|
||||
}
|
||||
|
||||
/// Responses API compatible content items that can be returned by a tool call.
|
||||
/// This is a subset of ContentItem with the types we support as function call outputs.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
|
||||
|
||||
@@ -42,16 +42,55 @@ Custom prompts turn your repeatable instructions into reusable slash commands, s
|
||||
|
||||
### Examples
|
||||
|
||||
**Draft PR helper**
|
||||
### Example 1: Basic named arguments
|
||||
|
||||
`~/.codex/prompts/draftpr.md`
|
||||
**File**: `~/.codex/prompts/ticket.md`
|
||||
|
||||
```markdown
|
||||
---
|
||||
description: Create feature branch, commit and open draft PR.
|
||||
description: Generate a commit message for a ticket
|
||||
argument-hint: TICKET_ID=<id> TICKET_TITLE=<title>
|
||||
---
|
||||
|
||||
Create a branch named `tibo/<feature_name>`, commit the changes, and open a draft PR.
|
||||
Please write a concise commit message for ticket $TICKET_ID: $TICKET_TITLE
|
||||
```
|
||||
|
||||
Usage: type `/prompts:draftpr` to have codex perform the work.
|
||||
**Usage**:
|
||||
|
||||
```
|
||||
/prompts:ticket TICKET_ID=JIRA-1234 TICKET_TITLE="Fix login bug"
|
||||
```
|
||||
|
||||
**Expanded prompt sent to Codex**:
|
||||
|
||||
```
|
||||
Please write a concise commit message for ticket JIRA-1234: Fix login bug
|
||||
```
|
||||
|
||||
**Note**: Both `TICKET_ID` and `TICKET_TITLE` are required. If either is missing, Codex will show a validation error. Values with spaces must be double-quoted.
|
||||
|
||||
### Example 2: Mixed positional and named arguments
|
||||
|
||||
**File**: `~/.codex/prompts/review.md`
|
||||
|
||||
```markdown
|
||||
---
|
||||
description: Review code in a specific file with focus area
|
||||
argument-hint: FILE=<path> [FOCUS=<section>]
|
||||
---
|
||||
|
||||
Review the code in $FILE. Pay special attention to $FOCUS.
|
||||
```
|
||||
|
||||
**Usage**:
|
||||
|
||||
```
|
||||
/prompts:review FILE=src/auth.js FOCUS="error handling"
|
||||
```
|
||||
|
||||
**Expanded prompt**:
|
||||
|
||||
```
|
||||
Review the code in src/auth.js. Pay special attention to error handling.
|
||||
|
||||
```
|
||||
|
||||
Reference in New Issue
Block a user