mirror of
https://github.com/openai/codex.git
synced 2026-02-02 06:57:03 +00:00
Compare commits
32 Commits
add-contex
...
dev/zhao/p
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c7d8c38d2f | ||
|
|
6259f91e5d | ||
|
|
189afb052f | ||
|
|
82f49f4b4e | ||
|
|
58ce6bcfb5 | ||
|
|
296c98b159 | ||
|
|
82639ec81b | ||
|
|
c8ecc49441 | ||
|
|
6343be1100 | ||
|
|
21ae112767 | ||
|
|
a1db68171c | ||
|
|
fc6aa8406a | ||
|
|
7ffdcecb8f | ||
|
|
f8e3c57975 | ||
|
|
bfa6fbc700 | ||
|
|
57c3632a9e | ||
|
|
a941ae7632 | ||
|
|
2c665fb1dd | ||
|
|
98a90a3bb2 | ||
|
|
7c8d333980 | ||
|
|
497fb4a19c | ||
|
|
5860481bc4 | ||
|
|
a52cf4d2b4 | ||
|
|
e70c52a3af | ||
|
|
de1768d3ba | ||
|
|
702238f004 | ||
|
|
fa5f6e76c9 | ||
|
|
f828cd2897 | ||
|
|
326c1e0a7e | ||
|
|
3f1c4b9add | ||
|
|
0b28e72b66 | ||
|
|
94dfb211af |
4
.github/workflows/cla.yml
vendored
4
.github/workflows/cla.yml
vendored
@@ -46,4 +46,6 @@ jobs:
|
||||
path-to-document: https://github.com/openai/codex/blob/main/docs/CLA.md
|
||||
path-to-signatures: signatures/cla.json
|
||||
branch: cla-signatures
|
||||
allowlist: dependabot[bot]
|
||||
allowlist: |
|
||||
codex
|
||||
dependabot[bot]
|
||||
|
||||
@@ -15,7 +15,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Close inactive PRs from contributors
|
||||
uses: actions/github-script@v7
|
||||
uses: actions/github-script@v8
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
|
||||
12
.github/workflows/rust-ci.yml
vendored
12
.github/workflows/rust-ci.yml
vendored
@@ -97,7 +97,6 @@ jobs:
|
||||
env:
|
||||
# Speed up repeated builds across CI runs by caching compiled objects (non-Windows).
|
||||
USE_SCCACHE: ${{ startsWith(matrix.runner, 'windows') && 'false' || 'true' }}
|
||||
RUSTC_WRAPPER: ${{ startsWith(matrix.runner, 'windows') && '' || 'sccache' }}
|
||||
CARGO_INCREMENTAL: "0"
|
||||
SCCACHE_CACHE_SIZE: 10G
|
||||
|
||||
@@ -191,6 +190,11 @@ jobs:
|
||||
echo "Using sccache local disk + actions/cache fallback"
|
||||
fi
|
||||
|
||||
- name: Enable sccache wrapper
|
||||
if: ${{ env.USE_SCCACHE == 'true' }}
|
||||
shell: bash
|
||||
run: echo "RUSTC_WRAPPER=sccache" >> "$GITHUB_ENV"
|
||||
|
||||
- name: Restore sccache cache (fallback)
|
||||
if: ${{ env.USE_SCCACHE == 'true' && env.SCCACHE_GHA_ENABLED != 'true' }}
|
||||
id: cache_sccache_restore
|
||||
@@ -331,7 +335,6 @@ jobs:
|
||||
env:
|
||||
# Speed up repeated builds across CI runs by caching compiled objects (non-Windows).
|
||||
USE_SCCACHE: ${{ startsWith(matrix.runner, 'windows') && 'false' || 'true' }}
|
||||
RUSTC_WRAPPER: ${{ startsWith(matrix.runner, 'windows') && '' || 'sccache' }}
|
||||
CARGO_INCREMENTAL: "0"
|
||||
SCCACHE_CACHE_SIZE: 10G
|
||||
|
||||
@@ -395,6 +398,11 @@ jobs:
|
||||
echo "Using sccache local disk + actions/cache fallback"
|
||||
fi
|
||||
|
||||
- name: Enable sccache wrapper
|
||||
if: ${{ env.USE_SCCACHE == 'true' }}
|
||||
shell: bash
|
||||
run: echo "RUSTC_WRAPPER=sccache" >> "$GITHUB_ENV"
|
||||
|
||||
- name: Restore sccache cache (fallback)
|
||||
if: ${{ env.USE_SCCACHE == 'true' && env.SCCACHE_GHA_ENABLED != 'true' }}
|
||||
id: cache_sccache_restore
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -64,6 +64,9 @@ apply_patch/
|
||||
# coverage
|
||||
coverage/
|
||||
|
||||
# personal files
|
||||
personal/
|
||||
|
||||
# os
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
17
codex-rs/Cargo.lock
generated
17
codex-rs/Cargo.lock
generated
@@ -1202,6 +1202,21 @@ dependencies = [
|
||||
"tempfile",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codex-execpolicy2"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap",
|
||||
"multimap",
|
||||
"pretty_assertions",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"shlex",
|
||||
"starlark",
|
||||
"thiserror 2.0.17",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codex-feedback"
|
||||
version = "0.0.0"
|
||||
@@ -1427,6 +1442,7 @@ dependencies = [
|
||||
"tracing",
|
||||
"urlencoding",
|
||||
"webbrowser",
|
||||
"which",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1454,6 +1470,7 @@ dependencies = [
|
||||
"codex-ansi-escape",
|
||||
"codex-app-server-protocol",
|
||||
"codex-arg0",
|
||||
"codex-backend-client",
|
||||
"codex-common",
|
||||
"codex-core",
|
||||
"codex-feedback",
|
||||
|
||||
@@ -17,6 +17,7 @@ members = [
|
||||
"core",
|
||||
"exec",
|
||||
"execpolicy",
|
||||
"execpolicy2",
|
||||
"keyring-store",
|
||||
"file-search",
|
||||
"linux-sandbox",
|
||||
@@ -130,7 +131,7 @@ image = { version = "^0.25.8", default-features = false }
|
||||
indexmap = "2.12.0"
|
||||
insta = "1.43.2"
|
||||
itertools = "0.14.0"
|
||||
keyring = "3.6"
|
||||
keyring = { version = "3.6", default-features = false }
|
||||
landlock = "0.4.1"
|
||||
lazy_static = "1"
|
||||
libc = "0.2.175"
|
||||
|
||||
@@ -967,6 +967,7 @@ impl CodexMessageProcessor {
|
||||
client
|
||||
.get_rate_limits()
|
||||
.await
|
||||
.map(|status| status.snapshot)
|
||||
.map_err(|err| JSONRPCErrorError {
|
||||
code: INTERNAL_ERROR_CODE,
|
||||
message: format!("failed to fetch codex rate limits: {err}"),
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use crate::types::CodeTaskDetailsResponse;
|
||||
use crate::types::PaginatedListTaskListItem;
|
||||
use crate::types::PlanType;
|
||||
use crate::types::RateLimitStatusPayload;
|
||||
use crate::types::RateLimitWindowSnapshot;
|
||||
use crate::types::TurnAttemptsSiblingTurnsResponse;
|
||||
@@ -44,6 +45,12 @@ pub struct Client {
|
||||
path_style: PathStyle,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct RateLimitStatus {
|
||||
pub plan_type: String,
|
||||
pub snapshot: RateLimitSnapshot,
|
||||
}
|
||||
|
||||
impl Client {
|
||||
pub fn new(base_url: impl Into<String>) -> Result<Self> {
|
||||
let mut base_url = base_url.into();
|
||||
@@ -155,7 +162,7 @@ impl Client {
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_rate_limits(&self) -> Result<RateLimitSnapshot> {
|
||||
pub async fn get_rate_limits(&self) -> Result<RateLimitStatus> {
|
||||
let url = match self.path_style {
|
||||
PathStyle::CodexApi => format!("{}/api/codex/usage", self.base_url),
|
||||
PathStyle::ChatGptApi => format!("{}/wham/usage", self.base_url),
|
||||
@@ -163,7 +170,9 @@ impl Client {
|
||||
let req = self.http.get(&url).headers(self.headers());
|
||||
let (body, ct) = self.exec_request(req, "GET", &url).await?;
|
||||
let payload: RateLimitStatusPayload = self.decode_json(&url, &ct, &body)?;
|
||||
Ok(Self::rate_limit_snapshot_from_payload(payload))
|
||||
let plan_type = Self::plan_type_to_string(payload.plan_type);
|
||||
let snapshot = Self::rate_limit_snapshot_from_payload(payload);
|
||||
Ok(RateLimitStatus { plan_type, snapshot })
|
||||
}
|
||||
|
||||
pub async fn list_tasks(
|
||||
@@ -288,6 +297,22 @@ impl Client {
|
||||
}
|
||||
}
|
||||
|
||||
fn plan_type_to_string(plan_type: PlanType) -> String {
|
||||
match plan_type {
|
||||
PlanType::Free => "free",
|
||||
PlanType::Go => "go",
|
||||
PlanType::Plus => "plus",
|
||||
PlanType::Pro => "pro",
|
||||
PlanType::Team => "team",
|
||||
PlanType::Business => "business",
|
||||
PlanType::Education => "education",
|
||||
PlanType::Quorum => "quorum",
|
||||
PlanType::Enterprise => "enterprise",
|
||||
PlanType::Edu => "edu",
|
||||
}
|
||||
.to_string()
|
||||
}
|
||||
|
||||
fn map_rate_limit_window(
|
||||
window: Option<Option<Box<RateLimitWindowSnapshot>>>,
|
||||
) -> Option<RateLimitWindow> {
|
||||
|
||||
@@ -2,6 +2,7 @@ mod client;
|
||||
pub mod types;
|
||||
|
||||
pub use client::Client;
|
||||
pub use client::RateLimitStatus;
|
||||
pub use types::CodeTaskDetailsResponse;
|
||||
pub use types::CodeTaskDetailsResponseExt;
|
||||
pub use types::PaginatedListTaskListItem;
|
||||
|
||||
@@ -40,12 +40,7 @@ eventsource-stream = { workspace = true }
|
||||
futures = { workspace = true }
|
||||
http = { workspace = true }
|
||||
indexmap = { workspace = true }
|
||||
keyring = { workspace = true, features = [
|
||||
"apple-native",
|
||||
"crypto-rust",
|
||||
"linux-native-async-persistent",
|
||||
"windows-native",
|
||||
] }
|
||||
keyring = { workspace = true, features = ["crypto-rust"] }
|
||||
libc = { workspace = true }
|
||||
mcp-types = { workspace = true }
|
||||
os_info = { workspace = true }
|
||||
@@ -90,9 +85,11 @@ wildmatch = { workspace = true }
|
||||
[target.'cfg(target_os = "linux")'.dependencies]
|
||||
landlock = { workspace = true }
|
||||
seccompiler = { workspace = true }
|
||||
keyring = { workspace = true, features = ["linux-native-async-persistent"] }
|
||||
|
||||
[target.'cfg(target_os = "macos")'.dependencies]
|
||||
core-foundation = "0.9"
|
||||
keyring = { workspace = true, features = ["apple-native"] }
|
||||
|
||||
# Build OpenSSL from source for musl builds.
|
||||
[target.x86_64-unknown-linux-musl.dependencies]
|
||||
@@ -102,6 +99,12 @@ openssl-sys = { workspace = true, features = ["vendored"] }
|
||||
[target.aarch64-unknown-linux-musl.dependencies]
|
||||
openssl-sys = { workspace = true, features = ["vendored"] }
|
||||
|
||||
[target.'cfg(target_os = "windows")'.dependencies]
|
||||
keyring = { workspace = true, features = ["windows-native"] }
|
||||
|
||||
[target.'cfg(any(target_os = "freebsd", target_os = "openbsd"))'.dependencies]
|
||||
keyring = { workspace = true, features = ["sync-secret-service"] }
|
||||
|
||||
[dev-dependencies]
|
||||
assert_cmd = { workspace = true }
|
||||
assert_matches = { workspace = true }
|
||||
|
||||
@@ -673,7 +673,9 @@ async fn process_chat_sse<S>(
|
||||
}
|
||||
|
||||
// Emit end-of-turn when finish_reason signals completion.
|
||||
if let Some(finish_reason) = choice.get("finish_reason").and_then(|v| v.as_str()) {
|
||||
if let Some(finish_reason) = choice.get("finish_reason").and_then(|v| v.as_str())
|
||||
&& !finish_reason.is_empty()
|
||||
{
|
||||
match finish_reason {
|
||||
"tool_calls" if fn_call_state.active => {
|
||||
// First, flush the terminal raw reasoning so UIs can finalize
|
||||
|
||||
@@ -136,7 +136,7 @@ fn reserialize_shell_outputs(items: &mut [ResponseItem]) {
|
||||
}
|
||||
|
||||
fn is_shell_tool_name(name: &str) -> bool {
|
||||
matches!(name, "shell" | "container.exec")
|
||||
matches!(name, "shell" | "container.exec" | "shell_command")
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
|
||||
@@ -1819,7 +1819,6 @@ pub(crate) async fn run_task(
|
||||
// Although from the perspective of codex.rs, TurnDiffTracker has the lifecycle of a Task which contains
|
||||
// many turns, from the perspective of the user, it is a single turn.
|
||||
let turn_diff_tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
|
||||
let mut auto_compact_recently_attempted = false;
|
||||
|
||||
loop {
|
||||
// Note that pending_input would be something like a message the user
|
||||
@@ -1874,27 +1873,12 @@ pub(crate) async fn run_task(
|
||||
let (responses, items_to_record_in_conversation_history) =
|
||||
process_items(processed_items, &sess, &turn_context).await;
|
||||
|
||||
// as long as compaction works well in getting us way below the token limit, we shouldn't worry about being in an infinite loop.
|
||||
if token_limit_reached {
|
||||
if auto_compact_recently_attempted {
|
||||
let limit_str = limit.to_string();
|
||||
let current_tokens = total_usage_tokens
|
||||
.map(|tokens| tokens.to_string())
|
||||
.unwrap_or_else(|| "unknown".to_string());
|
||||
let event = EventMsg::Error(ErrorEvent {
|
||||
message: format!(
|
||||
"Conversation is still above the token limit after automatic summarization (limit {limit_str}, current {current_tokens}). Please start a new session or trim your input."
|
||||
),
|
||||
});
|
||||
sess.send_event(&turn_context, event).await;
|
||||
break;
|
||||
}
|
||||
auto_compact_recently_attempted = true;
|
||||
compact::run_inline_auto_compact_task(sess.clone(), turn_context.clone()).await;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto_compact_recently_attempted = false;
|
||||
|
||||
if responses.is_empty() {
|
||||
last_agent_message = get_last_assistant_message_from_turn(
|
||||
&items_to_record_in_conversation_history,
|
||||
|
||||
@@ -26,6 +26,7 @@ use futures::prelude::*;
|
||||
use tracing::error;
|
||||
|
||||
pub const SUMMARIZATION_PROMPT: &str = include_str!("../templates/compact/prompt.md");
|
||||
pub const SUMMARY_PREFIX: &str = include_str!("../templates/compact/summary_prefix.md");
|
||||
const COMPACT_USER_MESSAGE_MAX_TOKENS: usize = 20_000;
|
||||
|
||||
pub(crate) async fn run_inline_auto_compact_task(
|
||||
@@ -140,7 +141,9 @@ async fn run_compact_task_inner(
|
||||
}
|
||||
|
||||
let history_snapshot = sess.clone_history().await.get_history();
|
||||
let summary_text = get_last_assistant_message_from_turn(&history_snapshot).unwrap_or_default();
|
||||
let summary_suffix =
|
||||
get_last_assistant_message_from_turn(&history_snapshot).unwrap_or_default();
|
||||
let summary_text = format!("{SUMMARY_PREFIX}\n{summary_suffix}");
|
||||
let user_messages = collect_user_messages(&history_snapshot);
|
||||
|
||||
let initial_context = sess.build_initial_context(turn_context.as_ref());
|
||||
@@ -201,12 +204,22 @@ pub(crate) fn collect_user_messages(items: &[ResponseItem]) -> Vec<String> {
|
||||
items
|
||||
.iter()
|
||||
.filter_map(|item| match crate::event_mapping::parse_turn_item(item) {
|
||||
Some(TurnItem::UserMessage(user)) => Some(user.message()),
|
||||
Some(TurnItem::UserMessage(user)) => {
|
||||
if is_summary_message(&user.message()) {
|
||||
None
|
||||
} else {
|
||||
Some(user.message())
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub(crate) fn is_summary_message(message: &str) -> bool {
|
||||
message.starts_with(format!("{SUMMARY_PREFIX}\n").as_str())
|
||||
}
|
||||
|
||||
pub(crate) fn build_compacted_history(
|
||||
initial_context: Vec<ResponseItem>,
|
||||
user_messages: &[String],
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
use crate::codex::TurnContext;
|
||||
use crate::context_manager::normalize;
|
||||
use crate::context_manager::truncate;
|
||||
use crate::context_manager::truncate::format_output_for_model_body;
|
||||
use crate::context_manager::truncate::globally_truncate_function_output_items;
|
||||
use crate::truncate;
|
||||
use crate::truncate::format_output_for_model_body;
|
||||
use crate::truncate::globally_truncate_function_output_items;
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::TokenUsage;
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use super::*;
|
||||
use crate::context_manager::MODEL_FORMAT_MAX_LINES;
|
||||
use crate::context_manager::truncate;
|
||||
use crate::truncate;
|
||||
use codex_git::GhostCommit;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
mod history;
|
||||
mod normalize;
|
||||
mod truncate;
|
||||
|
||||
pub(crate) use crate::truncate::MODEL_FORMAT_MAX_BYTES;
|
||||
pub(crate) use crate::truncate::MODEL_FORMAT_MAX_LINES;
|
||||
pub(crate) use crate::truncate::format_output_for_model_body;
|
||||
pub(crate) use history::ContextManager;
|
||||
pub(crate) use truncate::MODEL_FORMAT_MAX_BYTES;
|
||||
pub(crate) use truncate::MODEL_FORMAT_MAX_LINES;
|
||||
pub(crate) use truncate::format_output_for_model_body;
|
||||
|
||||
@@ -1,148 +0,0 @@
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use codex_utils_string::take_bytes_at_char_boundary;
|
||||
use codex_utils_string::take_last_bytes_at_char_boundary;
|
||||
|
||||
use crate::util::error_or_panic;
|
||||
|
||||
// Model-formatting limits: clients get full streams; only content sent to the model is truncated.
|
||||
pub const MODEL_FORMAT_MAX_BYTES: usize = 10 * 1024; // 10 KiB
|
||||
pub const MODEL_FORMAT_MAX_LINES: usize = 256; // lines
|
||||
|
||||
pub(crate) fn globally_truncate_function_output_items(
|
||||
items: &[FunctionCallOutputContentItem],
|
||||
) -> Vec<FunctionCallOutputContentItem> {
|
||||
let mut out: Vec<FunctionCallOutputContentItem> = Vec::with_capacity(items.len());
|
||||
let mut remaining = MODEL_FORMAT_MAX_BYTES;
|
||||
let mut omitted_text_items = 0usize;
|
||||
|
||||
for it in items {
|
||||
match it {
|
||||
FunctionCallOutputContentItem::InputText { text } => {
|
||||
if remaining == 0 {
|
||||
omitted_text_items += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
let len = text.len();
|
||||
if len <= remaining {
|
||||
out.push(FunctionCallOutputContentItem::InputText { text: text.clone() });
|
||||
remaining -= len;
|
||||
} else {
|
||||
let slice = take_bytes_at_char_boundary(text, remaining);
|
||||
if !slice.is_empty() {
|
||||
out.push(FunctionCallOutputContentItem::InputText {
|
||||
text: slice.to_string(),
|
||||
});
|
||||
}
|
||||
remaining = 0;
|
||||
}
|
||||
}
|
||||
// todo(aibrahim): handle input images; resize
|
||||
FunctionCallOutputContentItem::InputImage { image_url } => {
|
||||
out.push(FunctionCallOutputContentItem::InputImage {
|
||||
image_url: image_url.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if omitted_text_items > 0 {
|
||||
out.push(FunctionCallOutputContentItem::InputText {
|
||||
text: format!("[omitted {omitted_text_items} text items ...]"),
|
||||
});
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
pub(crate) fn format_output_for_model_body(
|
||||
content: &str,
|
||||
limit_bytes: usize,
|
||||
limit_lines: usize,
|
||||
) -> String {
|
||||
// Head+tail truncation for the model: show the beginning and end with an elision.
|
||||
// Clients still receive full streams; only this formatted summary is capped.
|
||||
let total_lines = content.lines().count();
|
||||
if content.len() <= limit_bytes && total_lines <= limit_lines {
|
||||
return content.to_string();
|
||||
}
|
||||
let output = truncate_formatted_exec_output(content, total_lines, limit_bytes, limit_lines);
|
||||
format!("Total output lines: {total_lines}\n\n{output}")
|
||||
}
|
||||
|
||||
fn truncate_formatted_exec_output(
|
||||
content: &str,
|
||||
total_lines: usize,
|
||||
limit_bytes: usize,
|
||||
limit_lines: usize,
|
||||
) -> String {
|
||||
debug_panic_on_double_truncation(content);
|
||||
let head_lines: usize = limit_lines / 2;
|
||||
let tail_lines: usize = limit_lines - head_lines; // 128
|
||||
let head_bytes: usize = limit_bytes / 2;
|
||||
let segments: Vec<&str> = content.split_inclusive('\n').collect();
|
||||
let head_take = head_lines.min(segments.len());
|
||||
let tail_take = tail_lines.min(segments.len().saturating_sub(head_take));
|
||||
let omitted = segments.len().saturating_sub(head_take + tail_take);
|
||||
|
||||
let head_slice_end: usize = segments
|
||||
.iter()
|
||||
.take(head_take)
|
||||
.map(|segment| segment.len())
|
||||
.sum();
|
||||
let tail_slice_start: usize = if tail_take == 0 {
|
||||
content.len()
|
||||
} else {
|
||||
content.len()
|
||||
- segments
|
||||
.iter()
|
||||
.rev()
|
||||
.take(tail_take)
|
||||
.map(|segment| segment.len())
|
||||
.sum::<usize>()
|
||||
};
|
||||
let head_slice = &content[..head_slice_end];
|
||||
let tail_slice = &content[tail_slice_start..];
|
||||
let truncated_by_bytes = content.len() > limit_bytes;
|
||||
// this is a bit wrong. We are counting metadata lines and not just shell output lines.
|
||||
let marker = if omitted > 0 {
|
||||
Some(format!(
|
||||
"\n[... omitted {omitted} of {total_lines} lines ...]\n\n"
|
||||
))
|
||||
} else if truncated_by_bytes {
|
||||
Some(format!(
|
||||
"\n[... output truncated to fit {limit_bytes} bytes ...]\n\n"
|
||||
))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let marker_len = marker.as_ref().map_or(0, String::len);
|
||||
let base_head_budget = head_bytes.min(limit_bytes);
|
||||
let head_budget = base_head_budget.min(limit_bytes.saturating_sub(marker_len));
|
||||
let head_part = take_bytes_at_char_boundary(head_slice, head_budget);
|
||||
let mut result = String::with_capacity(limit_bytes.min(content.len()));
|
||||
|
||||
result.push_str(head_part);
|
||||
if let Some(marker_text) = marker.as_ref() {
|
||||
result.push_str(marker_text);
|
||||
}
|
||||
|
||||
let remaining = limit_bytes.saturating_sub(result.len());
|
||||
if remaining == 0 {
|
||||
return result;
|
||||
}
|
||||
|
||||
let tail_part = take_last_bytes_at_char_boundary(tail_slice, remaining);
|
||||
result.push_str(tail_part);
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
fn debug_panic_on_double_truncation(content: &str) {
|
||||
if content.contains("Total output lines:") && content.contains("omitted") {
|
||||
error_or_panic(format!(
|
||||
"FunctionCallOutput content was already truncated before ContextManager::record_items; this would cause double truncation {content}"
|
||||
));
|
||||
}
|
||||
}
|
||||
@@ -112,7 +112,7 @@ impl ToolCallRuntime {
|
||||
|
||||
fn abort_message(call: &ToolCall, secs: f32) -> String {
|
||||
match call.tool_name.as_str() {
|
||||
"shell" | "container.exec" | "local_shell" | "unified_exec" => {
|
||||
"shell" | "container.exec" | "local_shell" | "shell_command" | "unified_exec" => {
|
||||
format!("Wall time: {secs:.1} seconds\naborted by user")
|
||||
}
|
||||
_ => format!("aborted by user after {secs:.1}s"),
|
||||
|
||||
@@ -1,8 +1,194 @@
|
||||
//! Utilities for truncating large chunks of output while preserving a prefix
|
||||
//! and suffix on UTF-8 boundaries.
|
||||
//! and suffix on UTF-8 boundaries, and helpers for line/token‑based truncation
|
||||
//! used across the core crate.
|
||||
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use codex_utils_string::take_bytes_at_char_boundary;
|
||||
use codex_utils_string::take_last_bytes_at_char_boundary;
|
||||
use codex_utils_tokenizer::Tokenizer;
|
||||
|
||||
/// Model-formatting limits: clients get full streams; only content sent to the model is truncated.
|
||||
pub const MODEL_FORMAT_MAX_BYTES: usize = 10 * 1024; // 10 KiB
|
||||
pub const MODEL_FORMAT_MAX_LINES: usize = 256; // lines
|
||||
|
||||
/// Globally truncate function output items to fit within `MODEL_FORMAT_MAX_BYTES`
|
||||
/// by preserving as many text/image items as possible and appending a summary
|
||||
/// for any omitted text items.
|
||||
pub(crate) fn globally_truncate_function_output_items(
|
||||
items: &[FunctionCallOutputContentItem],
|
||||
) -> Vec<FunctionCallOutputContentItem> {
|
||||
let mut out: Vec<FunctionCallOutputContentItem> = Vec::with_capacity(items.len());
|
||||
let mut remaining = MODEL_FORMAT_MAX_BYTES;
|
||||
let mut omitted_text_items = 0usize;
|
||||
|
||||
for it in items {
|
||||
match it {
|
||||
FunctionCallOutputContentItem::InputText { text } => {
|
||||
if remaining == 0 {
|
||||
omitted_text_items += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
let len = text.len();
|
||||
if len <= remaining {
|
||||
out.push(FunctionCallOutputContentItem::InputText { text: text.clone() });
|
||||
remaining -= len;
|
||||
} else {
|
||||
let slice = take_bytes_at_char_boundary(text, remaining);
|
||||
if !slice.is_empty() {
|
||||
out.push(FunctionCallOutputContentItem::InputText {
|
||||
text: slice.to_string(),
|
||||
});
|
||||
}
|
||||
remaining = 0;
|
||||
}
|
||||
}
|
||||
// todo(aibrahim): handle input images; resize
|
||||
FunctionCallOutputContentItem::InputImage { image_url } => {
|
||||
out.push(FunctionCallOutputContentItem::InputImage {
|
||||
image_url: image_url.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if omitted_text_items > 0 {
|
||||
out.push(FunctionCallOutputContentItem::InputText {
|
||||
text: format!("[omitted {omitted_text_items} text items ...]"),
|
||||
});
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
/// Format a block of exec/tool output for model consumption, truncating by
|
||||
/// lines and bytes while preserving head and tail segments.
|
||||
pub(crate) fn format_output_for_model_body(
|
||||
content: &str,
|
||||
limit_bytes: usize,
|
||||
limit_lines: usize,
|
||||
) -> String {
|
||||
// Head+tail truncation for the model: show the beginning and end with an elision.
|
||||
// Clients still receive full streams; only this formatted summary is capped.
|
||||
let total_lines = content.lines().count();
|
||||
if content.len() <= limit_bytes && total_lines <= limit_lines {
|
||||
return content.to_string();
|
||||
}
|
||||
let output = truncate_formatted_exec_output(content, total_lines, limit_bytes, limit_lines);
|
||||
format!("Total output lines: {total_lines}\n\n{output}")
|
||||
}
|
||||
|
||||
fn truncate_formatted_exec_output(
|
||||
content: &str,
|
||||
total_lines: usize,
|
||||
limit_bytes: usize,
|
||||
limit_lines: usize,
|
||||
) -> String {
|
||||
error_on_double_truncation(content);
|
||||
let head_lines: usize = limit_lines / 2;
|
||||
let tail_lines: usize = limit_lines - head_lines; // 128
|
||||
let head_bytes: usize = limit_bytes / 2;
|
||||
let segments: Vec<&str> = content.split_inclusive('\n').collect();
|
||||
let head_take = head_lines.min(segments.len());
|
||||
let tail_take = tail_lines.min(segments.len().saturating_sub(head_take));
|
||||
let omitted = segments.len().saturating_sub(head_take + tail_take);
|
||||
|
||||
let head_slice_end: usize = segments
|
||||
.iter()
|
||||
.take(head_take)
|
||||
.map(|segment| segment.len())
|
||||
.sum();
|
||||
let tail_slice_start: usize = if tail_take == 0 {
|
||||
content.len()
|
||||
} else {
|
||||
content.len()
|
||||
- segments
|
||||
.iter()
|
||||
.rev()
|
||||
.take(tail_take)
|
||||
.map(|segment| segment.len())
|
||||
.sum::<usize>()
|
||||
};
|
||||
let head_slice = &content[..head_slice_end];
|
||||
let tail_slice = &content[tail_slice_start..];
|
||||
let truncated_by_bytes = content.len() > limit_bytes;
|
||||
// this is a bit wrong. We are counting metadata lines and not just shell output lines.
|
||||
let marker = if omitted > 0 {
|
||||
Some(format!(
|
||||
"\n[... omitted {omitted} of {total_lines} lines ...]\n\n"
|
||||
))
|
||||
} else if truncated_by_bytes {
|
||||
Some(format!(
|
||||
"\n[... output truncated to fit {limit_bytes} bytes ...]\n\n"
|
||||
))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let marker_len = marker.as_ref().map_or(0, String::len);
|
||||
let base_head_budget = head_bytes.min(limit_bytes);
|
||||
let head_budget = base_head_budget.min(limit_bytes.saturating_sub(marker_len));
|
||||
let head_part = take_bytes_at_char_boundary(head_slice, head_budget);
|
||||
let mut result = String::with_capacity(limit_bytes.min(content.len()));
|
||||
|
||||
result.push_str(head_part);
|
||||
if let Some(marker_text) = marker.as_ref() {
|
||||
result.push_str(marker_text);
|
||||
}
|
||||
|
||||
let remaining = limit_bytes.saturating_sub(result.len());
|
||||
if remaining == 0 {
|
||||
return result;
|
||||
}
|
||||
|
||||
let tail_part = take_last_bytes_at_char_boundary(tail_slice, remaining);
|
||||
result.push_str(tail_part);
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
fn error_on_double_truncation(content: &str) {
|
||||
if content.contains("Total output lines:") && content.contains("omitted") {
|
||||
tracing::error!(
|
||||
"FunctionCallOutput content was already truncated before ContextManager::record_items; this would cause double truncation {content}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Truncate an output string to a maximum number of “tokens”, where tokens are
|
||||
/// approximated as individual `char`s. Preserves a prefix and suffix with an
|
||||
/// elision marker describing how many tokens were omitted.
|
||||
pub(crate) fn truncate_output_to_tokens(
|
||||
output: &str,
|
||||
max_tokens: usize,
|
||||
) -> (String, Option<usize>) {
|
||||
if max_tokens == 0 {
|
||||
let total_tokens = output.chars().count();
|
||||
let message = format!("…{total_tokens} tokens truncated…");
|
||||
return (message, Some(total_tokens));
|
||||
}
|
||||
|
||||
let tokens: Vec<char> = output.chars().collect();
|
||||
let total_tokens = tokens.len();
|
||||
if total_tokens <= max_tokens {
|
||||
return (output.to_string(), None);
|
||||
}
|
||||
|
||||
let half = max_tokens / 2;
|
||||
if half == 0 {
|
||||
let truncated = total_tokens.saturating_sub(max_tokens);
|
||||
let message = format!("…{truncated} tokens truncated…");
|
||||
return (message, Some(total_tokens));
|
||||
}
|
||||
|
||||
let truncated = total_tokens.saturating_sub(half * 2);
|
||||
let mut truncated_output = String::new();
|
||||
truncated_output.extend(&tokens[..half]);
|
||||
truncated_output.push_str(&format!("…{truncated} tokens truncated…"));
|
||||
truncated_output.extend(&tokens[total_tokens - half..]);
|
||||
(truncated_output, Some(total_tokens))
|
||||
}
|
||||
|
||||
/// Truncate the middle of a UTF-8 string to at most `max_bytes` bytes,
|
||||
/// preserving the beginning and the end. Returns the possibly truncated
|
||||
/// string and `Some(original_token_count)` (counted with the local tokenizer;
|
||||
@@ -136,8 +322,33 @@ pub(crate) fn truncate_middle(s: &str, max_bytes: usize) -> (String, Option<u64>
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::MODEL_FORMAT_MAX_BYTES;
|
||||
use super::MODEL_FORMAT_MAX_LINES;
|
||||
use super::format_output_for_model_body;
|
||||
use super::globally_truncate_function_output_items;
|
||||
use super::truncate_middle;
|
||||
use super::truncate_output_to_tokens;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use codex_utils_tokenizer::Tokenizer;
|
||||
use pretty_assertions::assert_eq;
|
||||
use regex_lite::Regex;
|
||||
|
||||
fn truncated_message_pattern(line: &str, total_lines: usize) -> String {
|
||||
let head_lines = MODEL_FORMAT_MAX_LINES / 2;
|
||||
let tail_lines = MODEL_FORMAT_MAX_LINES - head_lines;
|
||||
let head_take = head_lines.min(total_lines);
|
||||
let tail_take = tail_lines.min(total_lines.saturating_sub(head_take));
|
||||
let omitted = total_lines.saturating_sub(head_take + tail_take);
|
||||
let escaped_line = regex_lite::escape(line);
|
||||
if omitted == 0 {
|
||||
return format!(
|
||||
r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes \.{{3}}]\n\n.*)$",
|
||||
);
|
||||
}
|
||||
format!(
|
||||
r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} omitted {omitted} of {total_lines} lines \.{{3}}]\n\n.*)$",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_middle_no_newlines_fallback() {
|
||||
@@ -197,4 +408,208 @@ mod tests {
|
||||
assert!(out.ends_with("017\n018\n019\n020\n"));
|
||||
assert_eq!(total, Some(tok.count(&s) as u64));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_output_to_tokens_returns_original_when_under_limit() {
|
||||
let s = "short output";
|
||||
let (truncated, original) = truncate_output_to_tokens(s, 100);
|
||||
assert_eq!(truncated, s);
|
||||
assert_eq!(original, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_output_to_tokens_reports_truncation_at_zero_limit() {
|
||||
let s = "abcdef";
|
||||
let (truncated, original) = truncate_output_to_tokens(s, 0);
|
||||
assert!(truncated.contains("tokens truncated"));
|
||||
assert_eq!(original, Some(s.chars().count()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_output_to_tokens_preserves_prefix_and_suffix() {
|
||||
let s = "abcdefghijklmnopqrstuvwxyz";
|
||||
let max_tokens = 10;
|
||||
let (truncated, original) = truncate_output_to_tokens(s, max_tokens);
|
||||
assert!(truncated.starts_with("abcde"));
|
||||
assert!(truncated.ends_with("vwxyz"));
|
||||
assert_eq!(original, Some(s.chars().count()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_exec_output_truncates_large_error() {
|
||||
let line = "very long execution error line that should trigger truncation\n";
|
||||
let large_error = line.repeat(2_500); // way beyond both byte and line limits
|
||||
|
||||
let truncated = format_output_for_model_body(
|
||||
&large_error,
|
||||
MODEL_FORMAT_MAX_BYTES,
|
||||
MODEL_FORMAT_MAX_LINES,
|
||||
);
|
||||
|
||||
let total_lines = large_error.lines().count();
|
||||
let pattern = truncated_message_pattern(line, total_lines);
|
||||
let regex = Regex::new(&pattern).unwrap_or_else(|err| {
|
||||
panic!("failed to compile regex {pattern}: {err}");
|
||||
});
|
||||
let captures = regex
|
||||
.captures(&truncated)
|
||||
.unwrap_or_else(|| panic!("message failed to match pattern {pattern}: {truncated}"));
|
||||
let body = captures
|
||||
.name("body")
|
||||
.expect("missing body capture")
|
||||
.as_str();
|
||||
assert!(
|
||||
body.len() <= MODEL_FORMAT_MAX_BYTES,
|
||||
"body exceeds byte limit: {} bytes",
|
||||
body.len()
|
||||
);
|
||||
assert_ne!(truncated, large_error);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_exec_output_marks_byte_truncation_without_omitted_lines() {
|
||||
let long_line = "a".repeat(MODEL_FORMAT_MAX_BYTES + 50);
|
||||
let truncated = format_output_for_model_body(
|
||||
&long_line,
|
||||
MODEL_FORMAT_MAX_BYTES,
|
||||
MODEL_FORMAT_MAX_LINES,
|
||||
);
|
||||
|
||||
assert_ne!(truncated, long_line);
|
||||
let marker_line =
|
||||
format!("[... output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes ...]");
|
||||
assert!(
|
||||
truncated.contains(&marker_line),
|
||||
"missing byte truncation marker: {truncated}"
|
||||
);
|
||||
assert!(
|
||||
!truncated.contains("omitted"),
|
||||
"line omission marker should not appear when no lines were dropped: {truncated}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_exec_output_returns_original_when_within_limits() {
|
||||
let content = "example output\n".repeat(10);
|
||||
|
||||
assert_eq!(
|
||||
format_output_for_model_body(&content, MODEL_FORMAT_MAX_BYTES, MODEL_FORMAT_MAX_LINES),
|
||||
content
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_exec_output_reports_omitted_lines_and_keeps_head_and_tail() {
|
||||
let total_lines = MODEL_FORMAT_MAX_LINES + 100;
|
||||
let content: String = (0..total_lines)
|
||||
.map(|idx| format!("line-{idx}\n"))
|
||||
.collect();
|
||||
|
||||
let truncated =
|
||||
format_output_for_model_body(&content, MODEL_FORMAT_MAX_BYTES, MODEL_FORMAT_MAX_LINES);
|
||||
|
||||
let omitted = total_lines - MODEL_FORMAT_MAX_LINES;
|
||||
let expected_marker = format!("[... omitted {omitted} of {total_lines} lines ...]");
|
||||
|
||||
assert!(
|
||||
truncated.contains(&expected_marker),
|
||||
"missing omitted marker: {truncated}"
|
||||
);
|
||||
assert!(
|
||||
truncated.contains("line-0\n"),
|
||||
"expected head line to remain: {truncated}"
|
||||
);
|
||||
|
||||
let last_line = format!("line-{}\n", total_lines - 1);
|
||||
assert!(
|
||||
truncated.contains(&last_line),
|
||||
"expected tail line to remain: {truncated}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_exec_output_prefers_line_marker_when_both_limits_exceeded() {
|
||||
let total_lines = MODEL_FORMAT_MAX_LINES + 42;
|
||||
let long_line = "x".repeat(256);
|
||||
let content: String = (0..total_lines)
|
||||
.map(|idx| format!("line-{idx}-{long_line}\n"))
|
||||
.collect();
|
||||
|
||||
let truncated =
|
||||
format_output_for_model_body(&content, MODEL_FORMAT_MAX_BYTES, MODEL_FORMAT_MAX_LINES);
|
||||
|
||||
assert!(
|
||||
truncated.contains("[... omitted 42 of 298 lines ...]"),
|
||||
"expected omitted marker when line count exceeds limit: {truncated}"
|
||||
);
|
||||
assert!(
|
||||
!truncated.contains("output truncated to fit"),
|
||||
"line omission marker should take precedence over byte marker: {truncated}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncates_across_multiple_under_limit_texts_and_reports_omitted() {
|
||||
// Arrange: several text items, none exceeding per-item limit, but total exceeds budget.
|
||||
let budget = MODEL_FORMAT_MAX_BYTES;
|
||||
let t1_len = (budget / 2).saturating_sub(10);
|
||||
let t2_len = (budget / 2).saturating_sub(10);
|
||||
let remaining_after_t1_t2 = budget.saturating_sub(t1_len + t2_len);
|
||||
let t3_len = 50; // gets truncated to remaining_after_t1_t2
|
||||
let t4_len = 5; // omitted
|
||||
let t5_len = 7; // omitted
|
||||
|
||||
let t1 = "a".repeat(t1_len);
|
||||
let t2 = "b".repeat(t2_len);
|
||||
let t3 = "c".repeat(t3_len);
|
||||
let t4 = "d".repeat(t4_len);
|
||||
let t5 = "e".repeat(t5_len);
|
||||
|
||||
let items = vec![
|
||||
FunctionCallOutputContentItem::InputText { text: t1 },
|
||||
FunctionCallOutputContentItem::InputText { text: t2 },
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "img:mid".to_string(),
|
||||
},
|
||||
FunctionCallOutputContentItem::InputText { text: t3 },
|
||||
FunctionCallOutputContentItem::InputText { text: t4 },
|
||||
FunctionCallOutputContentItem::InputText { text: t5 },
|
||||
];
|
||||
|
||||
let output = globally_truncate_function_output_items(&items);
|
||||
|
||||
// Expect: t1 (full), t2 (full), image, t3 (truncated), summary mentioning 2 omitted.
|
||||
assert_eq!(output.len(), 5);
|
||||
|
||||
let first_text = match &output[0] {
|
||||
FunctionCallOutputContentItem::InputText { text } => text,
|
||||
other => panic!("unexpected first item: {other:?}"),
|
||||
};
|
||||
assert_eq!(first_text.len(), t1_len);
|
||||
|
||||
let second_text = match &output[1] {
|
||||
FunctionCallOutputContentItem::InputText { text } => text,
|
||||
other => panic!("unexpected second item: {other:?}"),
|
||||
};
|
||||
assert_eq!(second_text.len(), t2_len);
|
||||
|
||||
assert_eq!(
|
||||
output[2],
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "img:mid".to_string()
|
||||
}
|
||||
);
|
||||
|
||||
let fourth_text = match &output[3] {
|
||||
FunctionCallOutputContentItem::InputText { text } => text,
|
||||
other => panic!("unexpected fourth item: {other:?}"),
|
||||
};
|
||||
assert_eq!(fourth_text.len(), remaining_after_t1_t2);
|
||||
|
||||
let summary_text = match &output[4] {
|
||||
FunctionCallOutputContentItem::InputText { text } => text,
|
||||
other => panic!("unexpected summary item: {other:?}"),
|
||||
};
|
||||
assert!(summary_text.contains("omitted 2 text items"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -124,37 +124,6 @@ pub(crate) fn generate_chunk_id() -> String {
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub(crate) fn truncate_output_to_tokens(
|
||||
output: &str,
|
||||
max_tokens: usize,
|
||||
) -> (String, Option<usize>) {
|
||||
if max_tokens == 0 {
|
||||
let total_tokens = output.chars().count();
|
||||
let message = format!("…{total_tokens} tokens truncated…");
|
||||
return (message, Some(total_tokens));
|
||||
}
|
||||
|
||||
let tokens: Vec<char> = output.chars().collect();
|
||||
let total_tokens = tokens.len();
|
||||
if total_tokens <= max_tokens {
|
||||
return (output.to_string(), None);
|
||||
}
|
||||
|
||||
let half = max_tokens / 2;
|
||||
if half == 0 {
|
||||
let truncated = total_tokens.saturating_sub(max_tokens);
|
||||
let message = format!("…{truncated} tokens truncated…");
|
||||
return (message, Some(total_tokens));
|
||||
}
|
||||
|
||||
let truncated = total_tokens.saturating_sub(half * 2);
|
||||
let mut truncated_output = String::new();
|
||||
truncated_output.extend(&tokens[..half]);
|
||||
truncated_output.push_str(&format!("…{truncated} tokens truncated…"));
|
||||
truncated_output.extend(&tokens[total_tokens - half..]);
|
||||
(truncated_output, Some(total_tokens))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[cfg(unix)]
|
||||
mod tests {
|
||||
|
||||
@@ -36,7 +36,7 @@ use super::generate_chunk_id;
|
||||
use super::resolve_max_tokens;
|
||||
use super::session::OutputBuffer;
|
||||
use super::session::UnifiedExecSession;
|
||||
use super::truncate_output_to_tokens;
|
||||
use crate::truncate::truncate_output_to_tokens;
|
||||
|
||||
impl UnifiedExecSessionManager {
|
||||
pub(crate) async fn exec_command(
|
||||
|
||||
1
codex-rs/core/templates/compact/summary_prefix.md
Normal file
1
codex-rs/core/templates/compact/summary_prefix.md
Normal file
@@ -0,0 +1 @@
|
||||
Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:
|
||||
@@ -1,10 +1,12 @@
|
||||
#![allow(clippy::expect_used)]
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::ConversationManager;
|
||||
use codex_core::ModelProviderInfo;
|
||||
use codex_core::NewConversation;
|
||||
use codex_core::built_in_model_providers;
|
||||
use codex_core::compact::SUMMARIZATION_PROMPT;
|
||||
use codex_core::compact::SUMMARY_PREFIX;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::protocol::ErrorEvent;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::RolloutItem;
|
||||
@@ -12,7 +14,10 @@ use codex_core::protocol::RolloutLine;
|
||||
use codex_core::protocol::WarningEvent;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::load_default_config_for_test;
|
||||
use core_test_support::responses::ev_local_shell_call;
|
||||
use core_test_support::responses::ev_reasoning_item;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use core_test_support::wait_for_event_match;
|
||||
use std::collections::VecDeque;
|
||||
@@ -38,7 +43,6 @@ const THIRD_USER_MSG: &str = "next turn";
|
||||
const AUTO_SUMMARY_TEXT: &str = "AUTO_SUMMARY";
|
||||
const FIRST_AUTO_MSG: &str = "token limit start";
|
||||
const SECOND_AUTO_MSG: &str = "token limit push";
|
||||
const STILL_TOO_BIG_REPLY: &str = "STILL_TOO_BIG";
|
||||
const MULTI_AUTO_MSG: &str = "multi auto";
|
||||
const SECOND_LARGE_REPLY: &str = "SECOND_LARGE_REPLY";
|
||||
const FIRST_AUTO_SUMMARY: &str = "FIRST_AUTO_SUMMARY";
|
||||
@@ -50,10 +54,6 @@ const DUMMY_FUNCTION_NAME: &str = "unsupported_tool";
|
||||
const DUMMY_CALL_ID: &str = "call-multi-auto";
|
||||
const FUNCTION_CALL_LIMIT_MSG: &str = "function call limit push";
|
||||
const POST_AUTO_USER_MSG: &str = "post auto follow-up";
|
||||
const COMPACT_PROMPT_MARKER: &str =
|
||||
"You are performing a CONTEXT CHECKPOINT COMPACTION for a tool.";
|
||||
pub(super) const TEST_COMPACT_PROMPT: &str =
|
||||
"You are performing a CONTEXT CHECKPOINT COMPACTION for a tool.\nTest-only compact prompt.";
|
||||
|
||||
pub(super) const COMPACT_WARNING_MESSAGE: &str = "Heads up: Long conversations and multiple compactions can cause the model to be less accurate. Start a new conversation when possible to keep conversations small and targeted.";
|
||||
|
||||
@@ -61,6 +61,10 @@ fn auto_summary(summary: &str) -> String {
|
||||
summary.to_string()
|
||||
}
|
||||
|
||||
fn summary_with_prefix(summary: &str) -> String {
|
||||
format!("{SUMMARY_PREFIX}\n{summary}")
|
||||
}
|
||||
|
||||
fn drop_call_id(value: &mut serde_json::Value) {
|
||||
match value {
|
||||
serde_json::Value::Object(obj) => {
|
||||
@@ -79,7 +83,18 @@ fn drop_call_id(value: &mut serde_json::Value) {
|
||||
}
|
||||
|
||||
fn set_test_compact_prompt(config: &mut Config) {
|
||||
config.compact_prompt = Some(TEST_COMPACT_PROMPT.to_string());
|
||||
config.compact_prompt = Some(SUMMARIZATION_PROMPT.to_string());
|
||||
}
|
||||
|
||||
fn body_contains_text(body: &str, text: &str) -> bool {
|
||||
body.contains(&json_fragment(text))
|
||||
}
|
||||
|
||||
fn json_fragment(text: &str) -> String {
|
||||
serde_json::to_string(text)
|
||||
.expect("serialize text to JSON")
|
||||
.trim_matches('"')
|
||||
.to_string()
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
@@ -107,13 +122,13 @@ async fn summarize_context_three_requests_and_instructions() {
|
||||
// Mount three expectations, one per request, matched by body content.
|
||||
let first_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains("\"text\":\"hello world\"") && !body.contains(COMPACT_PROMPT_MARKER)
|
||||
body.contains("\"text\":\"hello world\"") && !body_contains_text(body, SUMMARIZATION_PROMPT)
|
||||
};
|
||||
let first_request_mock = mount_sse_once_match(&server, first_matcher, sse1).await;
|
||||
|
||||
let second_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(COMPACT_PROMPT_MARKER)
|
||||
body_contains_text(body, SUMMARIZATION_PROMPT)
|
||||
};
|
||||
let second_request_mock = mount_sse_once_match(&server, second_matcher, sse2).await;
|
||||
|
||||
@@ -197,7 +212,7 @@ async fn summarize_context_three_requests_and_instructions() {
|
||||
assert_eq!(last2.get("role").unwrap().as_str().unwrap(), "user");
|
||||
let text2 = last2["content"][0]["text"].as_str().unwrap();
|
||||
assert_eq!(
|
||||
text2, TEST_COMPACT_PROMPT,
|
||||
text2, SUMMARIZATION_PROMPT,
|
||||
"expected summarize trigger, got `{text2}`"
|
||||
);
|
||||
|
||||
@@ -210,6 +225,7 @@ async fn summarize_context_three_requests_and_instructions() {
|
||||
);
|
||||
|
||||
let mut messages: Vec<(String, String)> = Vec::new();
|
||||
let expected_summary_message = summary_with_prefix(SUMMARY_TEXT);
|
||||
|
||||
for item in input3 {
|
||||
if let Some("message") = item.get("type").and_then(|v| v.as_str()) {
|
||||
@@ -248,13 +264,13 @@ async fn summarize_context_three_requests_and_instructions() {
|
||||
assert!(
|
||||
messages
|
||||
.iter()
|
||||
.any(|(r, t)| r == "user" && t == SUMMARY_TEXT),
|
||||
.any(|(r, t)| r == "user" && t == &expected_summary_message),
|
||||
"third request should include the summary message"
|
||||
);
|
||||
assert!(
|
||||
!messages
|
||||
.iter()
|
||||
.any(|(_, text)| text.contains(TEST_COMPACT_PROMPT)),
|
||||
.any(|(_, text)| text.contains(SUMMARIZATION_PROMPT)),
|
||||
"third request should not include the summarize trigger"
|
||||
);
|
||||
|
||||
@@ -285,7 +301,7 @@ async fn summarize_context_three_requests_and_instructions() {
|
||||
api_turn_count += 1;
|
||||
}
|
||||
RolloutItem::Compacted(ci) => {
|
||||
if ci.message == SUMMARY_TEXT {
|
||||
if ci.message == expected_summary_message {
|
||||
saw_compacted_summary = true;
|
||||
}
|
||||
}
|
||||
@@ -358,7 +374,7 @@ async fn manual_compact_uses_custom_prompt() {
|
||||
if text == custom_prompt {
|
||||
found_custom_prompt = true;
|
||||
}
|
||||
if text == TEST_COMPACT_PROMPT {
|
||||
if text == SUMMARIZATION_PROMPT {
|
||||
found_default_prompt = true;
|
||||
}
|
||||
}
|
||||
@@ -433,6 +449,514 @@ async fn manual_compact_emits_estimated_token_usage_event() {
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() {
|
||||
skip_if_no_network!();
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let codex = test_codex()
|
||||
.build(&server)
|
||||
.await
|
||||
.expect("build codex")
|
||||
.codex;
|
||||
|
||||
// user message
|
||||
let user_message = "create an app";
|
||||
|
||||
// Prepare the mock responses from the model
|
||||
|
||||
// summary texts from model
|
||||
let first_summary_text = "The task is to create an app. I started to create a react app.";
|
||||
let second_summary_text = "The task is to create an app. I started to create a react app. then I realized that I need to create a node app.";
|
||||
let third_summary_text = "The task is to create an app. I started to create a react app. then I realized that I need to create a node app. then I realized that I need to create a python app.";
|
||||
// summary texts with prefix
|
||||
let prefixed_first_summary = summary_with_prefix(first_summary_text);
|
||||
let prefixed_second_summary = summary_with_prefix(second_summary_text);
|
||||
let prefixed_third_summary = summary_with_prefix(third_summary_text);
|
||||
// token used count after long work
|
||||
let token_count_used = 270_000;
|
||||
// token used count after compaction
|
||||
let token_count_used_after_compaction = 80000;
|
||||
|
||||
// mock responses from the model
|
||||
|
||||
// first chunk of work
|
||||
let model_reasoning_response_1_sse = sse(vec![
|
||||
ev_reasoning_item("m1", &["I will create a react app"], &[]),
|
||||
ev_local_shell_call("r1-shell", "completed", vec!["echo", "make-react"]),
|
||||
ev_completed_with_tokens("r1", token_count_used),
|
||||
]);
|
||||
|
||||
// first compaction response
|
||||
let model_compact_response_1_sse = sse(vec![
|
||||
ev_assistant_message("m2", first_summary_text),
|
||||
ev_completed_with_tokens("r2", token_count_used_after_compaction),
|
||||
]);
|
||||
|
||||
// second chunk of work
|
||||
let model_reasoning_response_2_sse = sse(vec![
|
||||
ev_reasoning_item("m3", &["I will create a node app"], &[]),
|
||||
ev_local_shell_call("r3-shell", "completed", vec!["echo", "make-node"]),
|
||||
ev_completed_with_tokens("r3", token_count_used),
|
||||
]);
|
||||
|
||||
// second compaction response
|
||||
let model_compact_response_2_sse = sse(vec![
|
||||
ev_assistant_message("m4", second_summary_text),
|
||||
ev_completed_with_tokens("r4", token_count_used_after_compaction),
|
||||
]);
|
||||
|
||||
// third chunk of work
|
||||
let model_reasoning_response_3_sse = sse(vec![
|
||||
ev_reasoning_item("m6", &["I will create a python app"], &[]),
|
||||
ev_local_shell_call("r6-shell", "completed", vec!["echo", "make-python"]),
|
||||
ev_completed_with_tokens("r6", token_count_used),
|
||||
]);
|
||||
|
||||
// third compaction response
|
||||
let model_compact_response_3_sse = sse(vec![
|
||||
ev_assistant_message("m7", third_summary_text),
|
||||
ev_completed_with_tokens("r7", token_count_used_after_compaction),
|
||||
]);
|
||||
|
||||
// final response
|
||||
let model_final_response_sse = sse(vec![
|
||||
ev_assistant_message(
|
||||
"m8",
|
||||
"The task is to create an app. I started to create a react app. then I realized that I need to create a node app. then I realized that I need to create a python app.",
|
||||
),
|
||||
ev_completed_with_tokens("r8", token_count_used_after_compaction + 1000),
|
||||
]);
|
||||
|
||||
// mount the mock responses from the model
|
||||
let bodies = vec![
|
||||
model_reasoning_response_1_sse,
|
||||
model_compact_response_1_sse,
|
||||
model_reasoning_response_2_sse,
|
||||
model_compact_response_2_sse,
|
||||
model_reasoning_response_3_sse,
|
||||
model_compact_response_3_sse,
|
||||
model_final_response_sse,
|
||||
];
|
||||
mount_sse_sequence(&server, bodies).await;
|
||||
|
||||
// Start the conversation with the user message
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: user_message.into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.expect("submit user input");
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
// collect the requests payloads from the model
|
||||
let requests_payloads = server.received_requests().await.unwrap();
|
||||
|
||||
let body = requests_payloads[0]
|
||||
.body_json::<serde_json::Value>()
|
||||
.unwrap();
|
||||
let input = body.get("input").and_then(|v| v.as_array()).unwrap();
|
||||
let environment_message = input[0]["content"][0]["text"].as_str().unwrap();
|
||||
|
||||
// test 1: after compaction, we should have one environment message, one user message, and one user message with summary prefix
|
||||
let compaction_indices = [2, 4, 6];
|
||||
let expected_summaries = [
|
||||
prefixed_first_summary.as_str(),
|
||||
prefixed_second_summary.as_str(),
|
||||
prefixed_third_summary.as_str(),
|
||||
];
|
||||
for (i, expected_summary) in compaction_indices.into_iter().zip(expected_summaries) {
|
||||
let body = requests_payloads.clone()[i]
|
||||
.body_json::<serde_json::Value>()
|
||||
.unwrap();
|
||||
let input = body.get("input").and_then(|v| v.as_array()).unwrap();
|
||||
assert_eq!(input.len(), 3);
|
||||
let environment_message = input[0]["content"][0]["text"].as_str().unwrap();
|
||||
let user_message_received = input[1]["content"][0]["text"].as_str().unwrap();
|
||||
let summary_message = input[2]["content"][0]["text"].as_str().unwrap();
|
||||
assert_eq!(environment_message, environment_message);
|
||||
assert_eq!(user_message_received, user_message);
|
||||
assert_eq!(
|
||||
summary_message, expected_summary,
|
||||
"compaction request at index {i} should include the prefixed summary"
|
||||
);
|
||||
}
|
||||
|
||||
// test 2: the expected requests inputs should be as follows:
|
||||
let expected_requests_inputs = json!([
|
||||
[
|
||||
// 0: first request of the user message.
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": environment_message,
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
},
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": "create an app",
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
}
|
||||
]
|
||||
,
|
||||
[
|
||||
// 1: first automatic compaction request.
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": environment_message,
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
},
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": "create an app",
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
},
|
||||
{
|
||||
"content": null,
|
||||
"encrypted_content": null,
|
||||
"summary": [
|
||||
{
|
||||
"text": "I will create a react app",
|
||||
"type": "summary_text"
|
||||
}
|
||||
],
|
||||
"type": "reasoning"
|
||||
},
|
||||
{
|
||||
"action": {
|
||||
"command": [
|
||||
"echo",
|
||||
"make-react"
|
||||
],
|
||||
"env": null,
|
||||
"timeout_ms": null,
|
||||
"type": "exec",
|
||||
"user": null,
|
||||
"working_directory": null
|
||||
},
|
||||
"call_id": "r1-shell",
|
||||
"status": "completed",
|
||||
"type": "local_shell_call"
|
||||
},
|
||||
{
|
||||
"call_id": "r1-shell",
|
||||
"output": "execution error: Io(Os { code: 2, kind: NotFound, message: \"No such file or directory\" })",
|
||||
"type": "function_call_output"
|
||||
},
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": SUMMARIZATION_PROMPT,
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
}
|
||||
]
|
||||
,
|
||||
[
|
||||
// 2: request after first automatic compaction.
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": environment_message,
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
},
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": "create an app",
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
},
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": prefixed_first_summary.clone(),
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
}
|
||||
]
|
||||
,
|
||||
[
|
||||
// 3: request for second automatic compaction.
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": environment_message,
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
},
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": "create an app",
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
},
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": prefixed_first_summary.clone(),
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
},
|
||||
{
|
||||
"content": null,
|
||||
"encrypted_content": null,
|
||||
"summary": [
|
||||
{
|
||||
"text": "I will create a node app",
|
||||
"type": "summary_text"
|
||||
}
|
||||
],
|
||||
"type": "reasoning"
|
||||
},
|
||||
{
|
||||
"action": {
|
||||
"command": [
|
||||
"echo",
|
||||
"make-node"
|
||||
],
|
||||
"env": null,
|
||||
"timeout_ms": null,
|
||||
"type": "exec",
|
||||
"user": null,
|
||||
"working_directory": null
|
||||
},
|
||||
"call_id": "r3-shell",
|
||||
"status": "completed",
|
||||
"type": "local_shell_call"
|
||||
},
|
||||
{
|
||||
"call_id": "r3-shell",
|
||||
"output": "execution error: Io(Os { code: 2, kind: NotFound, message: \"No such file or directory\" })",
|
||||
"type": "function_call_output"
|
||||
},
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": SUMMARIZATION_PROMPT,
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
}
|
||||
]
|
||||
,
|
||||
// 4: request after second automatic compaction.
|
||||
[
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": environment_message,
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
},
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": "create an app",
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
},
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": prefixed_second_summary.clone(),
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
}
|
||||
]
|
||||
,
|
||||
[
|
||||
// 5: request for third automatic compaction.
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": environment_message,
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
},
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": "create an app",
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
},
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": prefixed_second_summary.clone(),
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
},
|
||||
{
|
||||
"content": null,
|
||||
"encrypted_content": null,
|
||||
"summary": [
|
||||
{
|
||||
"text": "I will create a python app",
|
||||
"type": "summary_text"
|
||||
}
|
||||
],
|
||||
"type": "reasoning"
|
||||
},
|
||||
{
|
||||
"action": {
|
||||
"command": [
|
||||
"echo",
|
||||
"make-python"
|
||||
],
|
||||
"env": null,
|
||||
"timeout_ms": null,
|
||||
"type": "exec",
|
||||
"user": null,
|
||||
"working_directory": null
|
||||
},
|
||||
"call_id": "r6-shell",
|
||||
"status": "completed",
|
||||
"type": "local_shell_call"
|
||||
},
|
||||
{
|
||||
"call_id": "r6-shell",
|
||||
"output": "execution error: Io(Os { code: 2, kind: NotFound, message: \"No such file or directory\" })",
|
||||
"type": "function_call_output"
|
||||
},
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": SUMMARIZATION_PROMPT,
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
}
|
||||
]
|
||||
,
|
||||
[
|
||||
{
|
||||
// 6: request after third automatic compaction.
|
||||
"content": [
|
||||
{
|
||||
"text": environment_message,
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
},
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": "create an app",
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
},
|
||||
{
|
||||
"content": [
|
||||
{
|
||||
"text": prefixed_third_summary.clone(),
|
||||
"type": "input_text"
|
||||
}
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message"
|
||||
}
|
||||
]
|
||||
]);
|
||||
|
||||
// ignore local shell calls output because it differs from OS to another and it's out of the scope of this test.
|
||||
fn normalize_inputs(values: &[serde_json::Value]) -> Vec<serde_json::Value> {
|
||||
values
|
||||
.iter()
|
||||
.filter(|value| {
|
||||
value
|
||||
.get("type")
|
||||
.and_then(|ty| ty.as_str())
|
||||
.is_none_or(|ty| ty != "function_call_output")
|
||||
})
|
||||
.cloned()
|
||||
.collect()
|
||||
}
|
||||
|
||||
for (i, request) in requests_payloads.iter().enumerate() {
|
||||
let body = request.body_json::<serde_json::Value>().unwrap();
|
||||
let input = body.get("input").and_then(|v| v.as_array()).unwrap();
|
||||
let expected_input = expected_requests_inputs[i].as_array().unwrap();
|
||||
assert_eq!(normalize_inputs(input), normalize_inputs(expected_input));
|
||||
}
|
||||
|
||||
// test 3: the number of requests should be 7
|
||||
assert_eq!(requests_payloads.len(), 7);
|
||||
}
|
||||
|
||||
// Windows CI only: bump to 4 workers to prevent SSE/event starvation and test timeouts.
|
||||
#[cfg_attr(windows, tokio::test(flavor = "multi_thread", worker_threads = 4))]
|
||||
#[cfg_attr(not(windows), tokio::test(flavor = "multi_thread", worker_threads = 2))]
|
||||
@@ -460,12 +984,13 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
ev_assistant_message("m4", FINAL_REPLY),
|
||||
ev_completed_with_tokens("r4", 120),
|
||||
]);
|
||||
let prefixed_auto_summary = AUTO_SUMMARY_TEXT;
|
||||
|
||||
let first_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(FIRST_AUTO_MSG)
|
||||
&& !body.contains(SECOND_AUTO_MSG)
|
||||
&& !body.contains(COMPACT_PROMPT_MARKER)
|
||||
&& !body_contains_text(body, SUMMARIZATION_PROMPT)
|
||||
};
|
||||
mount_sse_once_match(&server, first_matcher, sse1).await;
|
||||
|
||||
@@ -473,27 +998,28 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(SECOND_AUTO_MSG)
|
||||
&& body.contains(FIRST_AUTO_MSG)
|
||||
&& !body.contains(COMPACT_PROMPT_MARKER)
|
||||
&& !body_contains_text(body, SUMMARIZATION_PROMPT)
|
||||
};
|
||||
mount_sse_once_match(&server, second_matcher, sse2).await;
|
||||
|
||||
let third_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(COMPACT_PROMPT_MARKER)
|
||||
body_contains_text(body, SUMMARIZATION_PROMPT)
|
||||
};
|
||||
mount_sse_once_match(&server, third_matcher, sse3).await;
|
||||
|
||||
let resume_matcher = |req: &wiremock::Request| {
|
||||
let resume_marker = prefixed_auto_summary;
|
||||
let resume_matcher = move |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(AUTO_SUMMARY_TEXT)
|
||||
&& !body.contains(COMPACT_PROMPT_MARKER)
|
||||
body.contains(resume_marker)
|
||||
&& !body_contains_text(body, SUMMARIZATION_PROMPT)
|
||||
&& !body.contains(POST_AUTO_USER_MSG)
|
||||
};
|
||||
mount_sse_once_match(&server, resume_matcher, sse_resume).await;
|
||||
|
||||
let fourth_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(POST_AUTO_USER_MSG) && !body.contains(COMPACT_PROMPT_MARKER)
|
||||
body.contains(POST_AUTO_USER_MSG) && !body_contains_text(body, SUMMARIZATION_PROMPT)
|
||||
};
|
||||
mount_sse_once_match(&server, fourth_matcher, sse4).await;
|
||||
|
||||
@@ -555,9 +1081,10 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
requests.len()
|
||||
);
|
||||
let is_auto_compact = |req: &wiremock::Request| {
|
||||
std::str::from_utf8(&req.body)
|
||||
.unwrap_or("")
|
||||
.contains(COMPACT_PROMPT_MARKER)
|
||||
body_contains_text(
|
||||
std::str::from_utf8(&req.body).unwrap_or(""),
|
||||
SUMMARIZATION_PROMPT,
|
||||
)
|
||||
};
|
||||
let auto_compact_count = requests.iter().filter(|req| is_auto_compact(req)).count();
|
||||
assert_eq!(
|
||||
@@ -574,13 +1101,14 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
"auto compact should add a third request"
|
||||
);
|
||||
|
||||
let resume_summary_marker = prefixed_auto_summary;
|
||||
let resume_index = requests
|
||||
.iter()
|
||||
.enumerate()
|
||||
.find_map(|(idx, req)| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
(body.contains(AUTO_SUMMARY_TEXT)
|
||||
&& !body.contains(COMPACT_PROMPT_MARKER)
|
||||
(body.contains(resume_summary_marker)
|
||||
&& !body_contains_text(body, SUMMARIZATION_PROMPT)
|
||||
&& !body.contains(POST_AUTO_USER_MSG))
|
||||
.then_some(idx)
|
||||
})
|
||||
@@ -592,7 +1120,7 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
.rev()
|
||||
.find_map(|(idx, req)| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
(body.contains(POST_AUTO_USER_MSG) && !body.contains(COMPACT_PROMPT_MARKER))
|
||||
(body.contains(POST_AUTO_USER_MSG) && !body_contains_text(body, SUMMARIZATION_PROMPT))
|
||||
.then_some(idx)
|
||||
})
|
||||
.expect("follow-up request missing");
|
||||
@@ -639,7 +1167,7 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
.and_then(|text| text.as_str())
|
||||
.unwrap_or_default();
|
||||
assert_eq!(
|
||||
last_text, TEST_COMPACT_PROMPT,
|
||||
last_text, SUMMARIZATION_PROMPT,
|
||||
"auto compact should send the summarization prompt as a user message",
|
||||
);
|
||||
|
||||
@@ -654,7 +1182,8 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
.and_then(|arr| arr.first())
|
||||
.and_then(|entry| entry.get("text"))
|
||||
.and_then(|v| v.as_str())
|
||||
== Some(AUTO_SUMMARY_TEXT)
|
||||
.map(|text| text.contains(prefixed_auto_summary))
|
||||
.unwrap_or(false)
|
||||
}),
|
||||
"resume request should include compacted history"
|
||||
);
|
||||
@@ -689,7 +1218,9 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
"auto compact follow-up request should include the new user message"
|
||||
);
|
||||
assert!(
|
||||
user_texts.iter().any(|text| text == AUTO_SUMMARY_TEXT),
|
||||
user_texts
|
||||
.iter()
|
||||
.any(|text| text.contains(prefixed_auto_summary)),
|
||||
"auto compact follow-up request should include the summary message"
|
||||
);
|
||||
}
|
||||
@@ -720,7 +1251,7 @@ async fn auto_compact_persists_rollout_entries() {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(FIRST_AUTO_MSG)
|
||||
&& !body.contains(SECOND_AUTO_MSG)
|
||||
&& !body.contains(COMPACT_PROMPT_MARKER)
|
||||
&& !body_contains_text(body, SUMMARIZATION_PROMPT)
|
||||
};
|
||||
mount_sse_once_match(&server, first_matcher, sse1).await;
|
||||
|
||||
@@ -728,13 +1259,13 @@ async fn auto_compact_persists_rollout_entries() {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(SECOND_AUTO_MSG)
|
||||
&& body.contains(FIRST_AUTO_MSG)
|
||||
&& !body.contains(COMPACT_PROMPT_MARKER)
|
||||
&& !body_contains_text(body, SUMMARIZATION_PROMPT)
|
||||
};
|
||||
mount_sse_once_match(&server, second_matcher, sse2).await;
|
||||
|
||||
let third_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(COMPACT_PROMPT_MARKER)
|
||||
body_contains_text(body, SUMMARIZATION_PROMPT)
|
||||
};
|
||||
mount_sse_once_match(&server, third_matcher, sse3).await;
|
||||
|
||||
@@ -809,112 +1340,6 @@ async fn auto_compact_persists_rollout_entries() {
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn auto_compact_stops_after_failed_attempt() {
|
||||
skip_if_no_network!();
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let sse1 = sse(vec![
|
||||
ev_assistant_message("m1", FIRST_REPLY),
|
||||
ev_completed_with_tokens("r1", 500),
|
||||
]);
|
||||
|
||||
let summary_payload = auto_summary(SUMMARY_TEXT);
|
||||
let sse2 = sse(vec![
|
||||
ev_assistant_message("m2", &summary_payload),
|
||||
ev_completed_with_tokens("r2", 50),
|
||||
]);
|
||||
|
||||
let sse3 = sse(vec![
|
||||
ev_assistant_message("m3", STILL_TOO_BIG_REPLY),
|
||||
ev_completed_with_tokens("r3", 500),
|
||||
]);
|
||||
|
||||
let first_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(FIRST_AUTO_MSG) && !body.contains(COMPACT_PROMPT_MARKER)
|
||||
};
|
||||
mount_sse_once_match(&server, first_matcher, sse1.clone()).await;
|
||||
|
||||
let second_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(COMPACT_PROMPT_MARKER)
|
||||
};
|
||||
mount_sse_once_match(&server, second_matcher, sse2.clone()).await;
|
||||
|
||||
let third_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
!body.contains(COMPACT_PROMPT_MARKER) && body.contains(SUMMARY_TEXT)
|
||||
};
|
||||
mount_sse_once_match(&server, third_matcher, sse3.clone()).await;
|
||||
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
};
|
||||
|
||||
let home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
config.model_provider = model_provider;
|
||||
set_test_compact_prompt(&mut config);
|
||||
config.model_auto_compact_token_limit = Some(200);
|
||||
let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
|
||||
let codex = conversation_manager
|
||||
.new_conversation(config)
|
||||
.await
|
||||
.unwrap()
|
||||
.conversation;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: FIRST_AUTO_MSG.into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let error_event = wait_for_event(&codex, |ev| matches!(ev, EventMsg::Error(_))).await;
|
||||
let EventMsg::Error(ErrorEvent { message }) = error_event else {
|
||||
panic!("expected error event");
|
||||
};
|
||||
assert!(
|
||||
message.contains("limit"),
|
||||
"error message should include limit information: {message}"
|
||||
);
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let requests = server.received_requests().await.unwrap();
|
||||
assert_eq!(
|
||||
requests.len(),
|
||||
3,
|
||||
"auto compact should attempt at most one summarization before erroring"
|
||||
);
|
||||
|
||||
let last_body = requests[2].body_json::<serde_json::Value>().unwrap();
|
||||
let input = last_body
|
||||
.get("input")
|
||||
.and_then(|v| v.as_array())
|
||||
.unwrap_or_else(|| panic!("unexpected request format: {last_body}"));
|
||||
let contains_prompt = input.iter().any(|item| {
|
||||
item.get("type").and_then(|v| v.as_str()) == Some("message")
|
||||
&& item.get("role").and_then(|v| v.as_str()) == Some("user")
|
||||
&& item
|
||||
.get("content")
|
||||
.and_then(|v| v.as_array())
|
||||
.and_then(|items| items.first())
|
||||
.and_then(|entry| entry.get("text"))
|
||||
.and_then(|text| text.as_str())
|
||||
.map(|text| text == TEST_COMPACT_PROMPT)
|
||||
.unwrap_or(false)
|
||||
});
|
||||
assert!(
|
||||
!contains_prompt,
|
||||
"third request should be the follow-up turn, not another summarization",
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn manual_compact_retries_after_context_window_error() {
|
||||
skip_if_no_network!();
|
||||
@@ -1013,7 +1438,7 @@ async fn manual_compact_retries_after_context_window_error() {
|
||||
.and_then(|items| items.first())
|
||||
.and_then(|entry| entry.get("text"))
|
||||
.and_then(|text| text.as_str()),
|
||||
Some(TEST_COMPACT_PROMPT),
|
||||
Some(SUMMARIZATION_PROMPT),
|
||||
"compact attempt should include summarization prompt"
|
||||
);
|
||||
assert_eq!(
|
||||
@@ -1024,7 +1449,7 @@ async fn manual_compact_retries_after_context_window_error() {
|
||||
.and_then(|items| items.first())
|
||||
.and_then(|entry| entry.get("text"))
|
||||
.and_then(|text| text.as_str()),
|
||||
Some(TEST_COMPACT_PROMPT),
|
||||
Some(SUMMARIZATION_PROMPT),
|
||||
"retry attempt should include summarization prompt"
|
||||
);
|
||||
assert_eq!(
|
||||
@@ -1053,6 +1478,7 @@ async fn manual_compact_twice_preserves_latest_user_messages() {
|
||||
let final_user_message = "post compact follow-up";
|
||||
let first_summary = "FIRST_MANUAL_SUMMARY";
|
||||
let second_summary = "SECOND_MANUAL_SUMMARY";
|
||||
let expected_second_summary = summary_with_prefix(second_summary);
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
@@ -1170,13 +1596,13 @@ async fn manual_compact_twice_preserves_latest_user_messages() {
|
||||
"first turn request missing first user message"
|
||||
);
|
||||
assert!(
|
||||
!contains_user_text(&first_turn_input, TEST_COMPACT_PROMPT),
|
||||
!contains_user_text(&first_turn_input, SUMMARIZATION_PROMPT),
|
||||
"first turn request should not include summarization prompt"
|
||||
);
|
||||
|
||||
let first_compact_input = requests[1].input();
|
||||
assert!(
|
||||
contains_user_text(&first_compact_input, TEST_COMPACT_PROMPT),
|
||||
contains_user_text(&first_compact_input, SUMMARIZATION_PROMPT),
|
||||
"first compact request should include summarization prompt"
|
||||
);
|
||||
assert!(
|
||||
@@ -1196,7 +1622,7 @@ async fn manual_compact_twice_preserves_latest_user_messages() {
|
||||
|
||||
let second_compact_input = requests[3].input();
|
||||
assert!(
|
||||
contains_user_text(&second_compact_input, TEST_COMPACT_PROMPT),
|
||||
contains_user_text(&second_compact_input, SUMMARIZATION_PROMPT),
|
||||
"second compact request should include summarization prompt"
|
||||
);
|
||||
assert!(
|
||||
@@ -1230,14 +1656,6 @@ async fn manual_compact_twice_preserves_latest_user_messages() {
|
||||
"role": "user",
|
||||
"type": "message",
|
||||
}),
|
||||
json!({
|
||||
"content": vec![json!({
|
||||
"text": first_summary,
|
||||
"type": "input_text",
|
||||
})],
|
||||
"role": "user",
|
||||
"type": "message",
|
||||
}),
|
||||
json!({
|
||||
"content": vec![json!({
|
||||
"text": second_user_message,
|
||||
@@ -1248,7 +1666,7 @@ async fn manual_compact_twice_preserves_latest_user_messages() {
|
||||
}),
|
||||
json!({
|
||||
"content": vec![json!({
|
||||
"text": second_summary,
|
||||
"text": expected_second_summary,
|
||||
"type": "input_text",
|
||||
})],
|
||||
"role": "user",
|
||||
@@ -1368,7 +1786,7 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
|
||||
"first request should contain the user input"
|
||||
);
|
||||
assert!(
|
||||
request_bodies[1].contains(COMPACT_PROMPT_MARKER),
|
||||
body_contains_text(&request_bodies[1], SUMMARIZATION_PROMPT),
|
||||
"first auto compact request should include the summarization prompt"
|
||||
);
|
||||
assert!(
|
||||
@@ -1376,7 +1794,7 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
|
||||
"function call output should be sent before the second auto compact"
|
||||
);
|
||||
assert!(
|
||||
request_bodies[4].contains(COMPACT_PROMPT_MARKER),
|
||||
body_contains_text(&request_bodies[4], SUMMARIZATION_PROMPT),
|
||||
"second auto compact request should include the summarization prompt"
|
||||
);
|
||||
}
|
||||
@@ -1472,7 +1890,7 @@ async fn auto_compact_triggers_after_function_call_over_95_percent_usage() {
|
||||
|
||||
let auto_compact_body = auto_compact_mock.single_request().body_json().to_string();
|
||||
assert!(
|
||||
auto_compact_body.contains(COMPACT_PROMPT_MARKER),
|
||||
body_contains_text(&auto_compact_body, SUMMARIZATION_PROMPT),
|
||||
"auto compact request should include the summarization prompt after exceeding 95% (limit {limit})"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -10,13 +10,13 @@
|
||||
use super::compact::COMPACT_WARNING_MESSAGE;
|
||||
use super::compact::FIRST_REPLY;
|
||||
use super::compact::SUMMARY_TEXT;
|
||||
use super::compact::TEST_COMPACT_PROMPT;
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::CodexConversation;
|
||||
use codex_core::ConversationManager;
|
||||
use codex_core::ModelProviderInfo;
|
||||
use codex_core::NewConversation;
|
||||
use codex_core::built_in_model_providers;
|
||||
use codex_core::compact::SUMMARIZATION_PROMPT;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::config::OPENAI_DEFAULT_MODEL;
|
||||
use codex_core::protocol::EventMsg;
|
||||
@@ -38,13 +38,22 @@ use tempfile::TempDir;
|
||||
use wiremock::MockServer;
|
||||
|
||||
const AFTER_SECOND_RESUME: &str = "AFTER_SECOND_RESUME";
|
||||
const COMPACT_PROMPT_MARKER: &str =
|
||||
"You are performing a CONTEXT CHECKPOINT COMPACTION for a tool.";
|
||||
|
||||
fn network_disabled() -> bool {
|
||||
std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok()
|
||||
}
|
||||
|
||||
fn body_contains_text(body: &str, text: &str) -> bool {
|
||||
body.contains(&json_fragment(text))
|
||||
}
|
||||
|
||||
fn json_fragment(text: &str) -> String {
|
||||
serde_json::to_string(text)
|
||||
.expect("serialize text to JSON")
|
||||
.trim_matches('"')
|
||||
.to_string()
|
||||
}
|
||||
|
||||
fn filter_out_ghost_snapshot_entries(items: &[Value]) -> Vec<Value> {
|
||||
items
|
||||
.iter()
|
||||
@@ -82,7 +91,8 @@ fn extract_summary_message(request: &Value, summary_text: &str) -> Value {
|
||||
.and_then(|arr| arr.first())
|
||||
.and_then(|entry| entry.get("text"))
|
||||
.and_then(Value::as_str)
|
||||
== Some(summary_text)
|
||||
.map(|text| text.contains(summary_text))
|
||||
.unwrap_or(false)
|
||||
})
|
||||
})
|
||||
.cloned()
|
||||
@@ -283,7 +293,7 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": TEST_COMPACT_PROMPT
|
||||
"text": SUMMARIZATION_PROMPT
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -741,7 +751,7 @@ async fn mount_initial_flow(server: &MockServer) {
|
||||
let match_first = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains("\"text\":\"hello world\"")
|
||||
&& !body.contains(COMPACT_PROMPT_MARKER)
|
||||
&& !body_contains_text(body, SUMMARIZATION_PROMPT)
|
||||
&& !body.contains(&format!("\"text\":\"{SUMMARY_TEXT}\""))
|
||||
&& !body.contains("\"text\":\"AFTER_COMPACT\"")
|
||||
&& !body.contains("\"text\":\"AFTER_RESUME\"")
|
||||
@@ -751,7 +761,7 @@ async fn mount_initial_flow(server: &MockServer) {
|
||||
|
||||
let match_compact = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(COMPACT_PROMPT_MARKER)
|
||||
body_contains_text(body, SUMMARIZATION_PROMPT)
|
||||
};
|
||||
mount_sse_once_match(server, match_compact, sse2).await;
|
||||
|
||||
@@ -785,7 +795,7 @@ async fn mount_second_compact_flow(server: &MockServer) {
|
||||
|
||||
let match_second_compact = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(COMPACT_PROMPT_MARKER) && body.contains("AFTER_FORK")
|
||||
body_contains_text(body, SUMMARIZATION_PROMPT) && body.contains("AFTER_FORK")
|
||||
};
|
||||
mount_sse_once_match(server, match_second_compact, sse6).await;
|
||||
|
||||
@@ -806,7 +816,7 @@ async fn start_test_conversation(
|
||||
let home = TempDir::new().expect("create temp dir");
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
config.model_provider = model_provider;
|
||||
config.compact_prompt = Some(TEST_COMPACT_PROMPT.to_string());
|
||||
config.compact_prompt = Some(SUMMARIZATION_PROMPT.to_string());
|
||||
|
||||
let manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
|
||||
let NewConversation { conversation, .. } = manager
|
||||
|
||||
@@ -788,6 +788,59 @@ Output:
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_command_output_is_structured() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.features.enable(Feature::ShellCommandTool);
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-command";
|
||||
let args = json!({
|
||||
"command": "echo shell command",
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
json!({"type": "response.created", "response": {"id": "resp-1"}}),
|
||||
ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "shell_command done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
let mock = mount_sse_sequence(&server, responses).await;
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
"run the shell_command script in the user's shell",
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let req = mock
|
||||
.last_request()
|
||||
.expect("shell_command output request recorded");
|
||||
let output_item = req.function_call_output(call_id);
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("shell_command output string");
|
||||
|
||||
let expected_pattern = r"(?s)^Exit code: 0
|
||||
Wall time: [0-9]+(?:\.[0-9]+)? seconds
|
||||
Output:
|
||||
shell command
|
||||
?$";
|
||||
assert_regex_match(expected_pattern, output);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn local_shell_call_output_is_structured() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
29
codex-rs/execpolicy2/Cargo.toml
Normal file
29
codex-rs/execpolicy2/Cargo.toml
Normal file
@@ -0,0 +1,29 @@
|
||||
[package]
|
||||
name = "codex-execpolicy2"
|
||||
version = { workspace = true }
|
||||
edition = "2024"
|
||||
description = "Codex exec policy v2: prefix-based Starlark rules for command decisions."
|
||||
|
||||
[lib]
|
||||
name = "codex_execpolicy2"
|
||||
path = "src/lib.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "codex-execpolicy2"
|
||||
path = "src/main.rs"
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = { workspace = true }
|
||||
clap = { workspace = true, features = ["derive"] }
|
||||
multimap = { workspace = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
serde_json = { workspace = true }
|
||||
shlex = { workspace = true }
|
||||
starlark = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
pretty_assertions = { workspace = true }
|
||||
54
codex-rs/execpolicy2/README.md
Normal file
54
codex-rs/execpolicy2/README.md
Normal file
@@ -0,0 +1,54 @@
|
||||
# codex-execpolicy2
|
||||
|
||||
## Overview
|
||||
- Policy engine and CLI built around `prefix_rule(pattern=[...], decision?, match?, not_match?)`.
|
||||
- This release covers only the prefix-rule subset of the planned execpolicy v2 language; a richer language will follow.
|
||||
- Tokens are matched in order; any `pattern` element may be a list to denote alternatives. `decision` defaults to `allow`; valid values: `allow`, `prompt`, `forbidden`.
|
||||
- `match` / `not_match` supply example invocations that are validated at load time (think of them as unit tests); examples can be token arrays or strings (strings are tokenized with `shlex`).
|
||||
- The CLI always prints the JSON serialization of the evaluation result (whether a match or not).
|
||||
|
||||
## Policy shapes
|
||||
- Prefix rules use Starlark syntax:
|
||||
```starlark
|
||||
prefix_rule(
|
||||
pattern = ["cmd", ["alt1", "alt2"]], # ordered tokens; list entries denote alternatives
|
||||
decision = "prompt", # allow | prompt | forbidden; defaults to allow
|
||||
match = [["cmd", "alt1"], "cmd alt2"], # examples that must match this rule
|
||||
not_match = [["cmd", "oops"], "cmd alt3"], # examples that must not match this rule
|
||||
)
|
||||
```
|
||||
|
||||
## Response shapes
|
||||
- Match:
|
||||
```json
|
||||
{
|
||||
"match": {
|
||||
"decision": "allow|prompt|forbidden",
|
||||
"matchedRules": [
|
||||
{
|
||||
"prefixRuleMatch": {
|
||||
"matchedPrefix": ["<token>", "..."],
|
||||
"decision": "allow|prompt|forbidden"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
- No match:
|
||||
```json
|
||||
"noMatch"
|
||||
```
|
||||
|
||||
- `matchedRules` lists every rule whose prefix matched the command; `matchedPrefix` is the exact prefix that matched.
|
||||
- The effective `decision` is the strictest severity across all matches (`forbidden` > `prompt` > `allow`).
|
||||
|
||||
## CLI
|
||||
- Provide a policy file (for example `src/default.codexpolicy`) to check a command:
|
||||
```bash
|
||||
cargo run -p codex-execpolicy2 -- check --policy path/to/policy.codexpolicy git status
|
||||
```
|
||||
- Example outcomes:
|
||||
- Match: `{"match": { ... "decision": "allow" ... }}`
|
||||
- No match: `"noMatch"`
|
||||
77
codex-rs/execpolicy2/examples/example.codexpolicy
Normal file
77
codex-rs/execpolicy2/examples/example.codexpolicy
Normal file
@@ -0,0 +1,77 @@
|
||||
|
||||
# Example policy to illustrate syntax; not comprehensive and not recommended for actual use.
|
||||
|
||||
prefix_rule(
|
||||
pattern = ["git", "reset", "--hard"],
|
||||
decision = "forbidden",
|
||||
match = [
|
||||
["git", "reset", "--hard"],
|
||||
],
|
||||
not_match = [
|
||||
["git", "reset", "--keep"],
|
||||
"git reset --merge",
|
||||
],
|
||||
)
|
||||
|
||||
prefix_rule(
|
||||
pattern = ["ls"],
|
||||
match = [
|
||||
["ls"],
|
||||
["ls", "-l"],
|
||||
["ls", "-a", "."],
|
||||
],
|
||||
)
|
||||
|
||||
prefix_rule(
|
||||
pattern = ["cat"],
|
||||
match = [
|
||||
["cat", "file.txt"],
|
||||
["cat", "-n", "README.md"],
|
||||
],
|
||||
)
|
||||
|
||||
prefix_rule(
|
||||
pattern = ["cp"],
|
||||
decision = "prompt",
|
||||
match = [
|
||||
["cp", "foo", "bar"],
|
||||
"cp -r src dest",
|
||||
],
|
||||
)
|
||||
|
||||
prefix_rule(
|
||||
pattern = ["head"],
|
||||
match = [
|
||||
["head", "README.md"],
|
||||
["head", "-n", "5", "CHANGELOG.md"],
|
||||
],
|
||||
not_match = [
|
||||
["hea", "-n", "1,5p", "CHANGELOG.md"],
|
||||
],
|
||||
)
|
||||
|
||||
prefix_rule(
|
||||
pattern = ["printenv"],
|
||||
match = [
|
||||
["printenv"],
|
||||
["printenv", "PATH"],
|
||||
],
|
||||
not_match = [
|
||||
["print", "-0"],
|
||||
],
|
||||
)
|
||||
|
||||
prefix_rule(
|
||||
pattern = ["pwd"],
|
||||
match = [
|
||||
["pwd"],
|
||||
],
|
||||
)
|
||||
|
||||
prefix_rule(
|
||||
pattern = ["which"],
|
||||
match = [
|
||||
["which", "python3"],
|
||||
["which", "-a", "python3"],
|
||||
],
|
||||
)
|
||||
27
codex-rs/execpolicy2/src/decision.rs
Normal file
27
codex-rs/execpolicy2/src/decision.rs
Normal file
@@ -0,0 +1,27 @@
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::error::Error;
|
||||
use crate::error::Result;
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub enum Decision {
|
||||
/// Command may run without further approval.
|
||||
Allow,
|
||||
/// Request explicit user approval; rejected outright when running with `approval_policy="never"`.
|
||||
Prompt,
|
||||
/// Command is blocked without further consideration.
|
||||
Forbidden,
|
||||
}
|
||||
|
||||
impl Decision {
|
||||
pub fn parse(raw: &str) -> Result<Self> {
|
||||
match raw {
|
||||
"allow" => Ok(Self::Allow),
|
||||
"prompt" => Ok(Self::Prompt),
|
||||
"forbidden" => Ok(Self::Forbidden),
|
||||
other => Err(Error::InvalidDecision(other.to_string())),
|
||||
}
|
||||
}
|
||||
}
|
||||
26
codex-rs/execpolicy2/src/error.rs
Normal file
26
codex-rs/execpolicy2/src/error.rs
Normal file
@@ -0,0 +1,26 @@
|
||||
use starlark::Error as StarlarkError;
|
||||
use thiserror::Error;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum Error {
|
||||
#[error("invalid decision: {0}")]
|
||||
InvalidDecision(String),
|
||||
#[error("invalid pattern element: {0}")]
|
||||
InvalidPattern(String),
|
||||
#[error("invalid example: {0}")]
|
||||
InvalidExample(String),
|
||||
#[error(
|
||||
"expected every example to match at least one rule. rules: {rules:?}; unmatched examples: \
|
||||
{examples:?}"
|
||||
)]
|
||||
ExampleDidNotMatch {
|
||||
rules: Vec<String>,
|
||||
examples: Vec<String>,
|
||||
},
|
||||
#[error("expected example to not match rule `{rule}`: {example}")]
|
||||
ExampleDidMatch { rule: String, example: String },
|
||||
#[error("starlark error: {0}")]
|
||||
Starlark(StarlarkError),
|
||||
}
|
||||
15
codex-rs/execpolicy2/src/lib.rs
Normal file
15
codex-rs/execpolicy2/src/lib.rs
Normal file
@@ -0,0 +1,15 @@
|
||||
pub mod decision;
|
||||
pub mod error;
|
||||
pub mod parser;
|
||||
pub mod policy;
|
||||
pub mod rule;
|
||||
|
||||
pub use decision::Decision;
|
||||
pub use error::Error;
|
||||
pub use error::Result;
|
||||
pub use parser::PolicyParser;
|
||||
pub use policy::Evaluation;
|
||||
pub use policy::Policy;
|
||||
pub use rule::Rule;
|
||||
pub use rule::RuleMatch;
|
||||
pub use rule::RuleRef;
|
||||
54
codex-rs/execpolicy2/src/main.rs
Normal file
54
codex-rs/execpolicy2/src/main.rs
Normal file
@@ -0,0 +1,54 @@
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use clap::Parser;
|
||||
use codex_execpolicy2::PolicyParser;
|
||||
|
||||
/// CLI for evaluating exec policies
|
||||
#[derive(Parser)]
|
||||
#[command(name = "codex-execpolicy2")]
|
||||
enum Cli {
|
||||
/// Evaluate a command against a policy.
|
||||
Check {
|
||||
#[arg(short, long, value_name = "PATH")]
|
||||
policy: PathBuf,
|
||||
|
||||
/// Command tokens to check.
|
||||
#[arg(
|
||||
value_name = "COMMAND",
|
||||
required = true,
|
||||
trailing_var_arg = true,
|
||||
allow_hyphen_values = true
|
||||
)]
|
||||
command: Vec<String>,
|
||||
},
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let cli = Cli::parse();
|
||||
match cli {
|
||||
Cli::Check { policy, command } => cmd_check(policy, command),
|
||||
}
|
||||
}
|
||||
|
||||
fn cmd_check(policy_path: PathBuf, args: Vec<String>) -> Result<()> {
|
||||
let policy = load_policy(&policy_path)?;
|
||||
|
||||
let eval = policy.check(&args);
|
||||
let json = serde_json::to_string_pretty(&eval)?;
|
||||
println!("{json}");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn load_policy(policy_path: &Path) -> Result<codex_execpolicy2::Policy> {
|
||||
let policy_file_contents = fs::read_to_string(policy_path)
|
||||
.with_context(|| format!("failed to read policy at {}", policy_path.display()))?;
|
||||
let policy_identifier = policy_path.to_string_lossy();
|
||||
Ok(PolicyParser::parse(
|
||||
policy_identifier.as_ref(),
|
||||
&policy_file_contents,
|
||||
)?)
|
||||
}
|
||||
247
codex-rs/execpolicy2/src/parser.rs
Normal file
247
codex-rs/execpolicy2/src/parser.rs
Normal file
@@ -0,0 +1,247 @@
|
||||
use multimap::MultiMap;
|
||||
use shlex;
|
||||
use starlark::any::ProvidesStaticType;
|
||||
use starlark::environment::GlobalsBuilder;
|
||||
use starlark::environment::Module;
|
||||
use starlark::eval::Evaluator;
|
||||
use starlark::starlark_module;
|
||||
use starlark::syntax::AstModule;
|
||||
use starlark::syntax::Dialect;
|
||||
use starlark::values::Value;
|
||||
use starlark::values::list::ListRef;
|
||||
use starlark::values::list::UnpackList;
|
||||
use starlark::values::none::NoneType;
|
||||
use std::cell::RefCell;
|
||||
use std::cell::RefMut;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::decision::Decision;
|
||||
use crate::error::Error;
|
||||
use crate::error::Result;
|
||||
use crate::rule::PatternToken;
|
||||
use crate::rule::PrefixPattern;
|
||||
use crate::rule::PrefixRule;
|
||||
use crate::rule::RuleRef;
|
||||
use crate::rule::validate_match_examples;
|
||||
use crate::rule::validate_not_match_examples;
|
||||
|
||||
// todo: support parsing multiple policies
|
||||
pub struct PolicyParser;
|
||||
|
||||
impl PolicyParser {
|
||||
/// Parses a policy, tagging parser errors with `policy_identifier` so failures include the
|
||||
/// identifier alongside line numbers.
|
||||
pub fn parse(
|
||||
policy_identifier: &str,
|
||||
policy_file_contents: &str,
|
||||
) -> Result<crate::policy::Policy> {
|
||||
let mut dialect = Dialect::Extended.clone();
|
||||
dialect.enable_f_strings = true;
|
||||
let ast = AstModule::parse(
|
||||
policy_identifier,
|
||||
policy_file_contents.to_string(),
|
||||
&dialect,
|
||||
)
|
||||
.map_err(Error::Starlark)?;
|
||||
let globals = GlobalsBuilder::standard().with(policy_builtins).build();
|
||||
let module = Module::new();
|
||||
|
||||
let builder = RefCell::new(PolicyBuilder::new());
|
||||
{
|
||||
let mut eval = Evaluator::new(&module);
|
||||
eval.extra = Some(&builder);
|
||||
eval.eval_module(ast, &globals).map_err(Error::Starlark)?;
|
||||
}
|
||||
Ok(builder.into_inner().build())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, ProvidesStaticType)]
|
||||
struct PolicyBuilder {
|
||||
rules_by_program: MultiMap<String, RuleRef>,
|
||||
}
|
||||
|
||||
impl PolicyBuilder {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
rules_by_program: MultiMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn add_rule(&mut self, rule: RuleRef) {
|
||||
self.rules_by_program
|
||||
.insert(rule.program().to_string(), rule);
|
||||
}
|
||||
|
||||
fn build(self) -> crate::policy::Policy {
|
||||
crate::policy::Policy::new(self.rules_by_program)
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_pattern<'v>(pattern: UnpackList<Value<'v>>) -> Result<Vec<PatternToken>> {
|
||||
let tokens: Vec<PatternToken> = pattern
|
||||
.items
|
||||
.into_iter()
|
||||
.map(parse_pattern_token)
|
||||
.collect::<Result<_>>()?;
|
||||
if tokens.is_empty() {
|
||||
Err(Error::InvalidPattern("pattern cannot be empty".to_string()))
|
||||
} else {
|
||||
Ok(tokens)
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_pattern_token<'v>(value: Value<'v>) -> Result<PatternToken> {
|
||||
if let Some(s) = value.unpack_str() {
|
||||
Ok(PatternToken::Single(s.to_string()))
|
||||
} else if let Some(list) = ListRef::from_value(value) {
|
||||
let tokens: Vec<String> = list
|
||||
.content()
|
||||
.iter()
|
||||
.map(|value| {
|
||||
value
|
||||
.unpack_str()
|
||||
.ok_or_else(|| {
|
||||
Error::InvalidPattern(format!(
|
||||
"pattern alternative must be a string (got {})",
|
||||
value.get_type()
|
||||
))
|
||||
})
|
||||
.map(str::to_string)
|
||||
})
|
||||
.collect::<Result<_>>()?;
|
||||
|
||||
match tokens.as_slice() {
|
||||
[] => Err(Error::InvalidPattern(
|
||||
"pattern alternatives cannot be empty".to_string(),
|
||||
)),
|
||||
[single] => Ok(PatternToken::Single(single.clone())),
|
||||
_ => Ok(PatternToken::Alts(tokens)),
|
||||
}
|
||||
} else {
|
||||
Err(Error::InvalidPattern(format!(
|
||||
"pattern element must be a string or list of strings (got {})",
|
||||
value.get_type()
|
||||
)))
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_examples<'v>(examples: UnpackList<Value<'v>>) -> Result<Vec<Vec<String>>> {
|
||||
examples.items.into_iter().map(parse_example).collect()
|
||||
}
|
||||
|
||||
fn parse_example<'v>(value: Value<'v>) -> Result<Vec<String>> {
|
||||
if let Some(raw) = value.unpack_str() {
|
||||
parse_string_example(raw)
|
||||
} else if let Some(list) = ListRef::from_value(value) {
|
||||
parse_list_example(list)
|
||||
} else {
|
||||
Err(Error::InvalidExample(format!(
|
||||
"example must be a string or list of strings (got {})",
|
||||
value.get_type()
|
||||
)))
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_string_example(raw: &str) -> Result<Vec<String>> {
|
||||
let tokens = shlex::split(raw).ok_or_else(|| {
|
||||
Error::InvalidExample("example string has invalid shell syntax".to_string())
|
||||
})?;
|
||||
|
||||
if tokens.is_empty() {
|
||||
Err(Error::InvalidExample(
|
||||
"example cannot be an empty string".to_string(),
|
||||
))
|
||||
} else {
|
||||
Ok(tokens)
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_list_example(list: &ListRef) -> Result<Vec<String>> {
|
||||
let tokens: Vec<String> = list
|
||||
.content()
|
||||
.iter()
|
||||
.map(|value| {
|
||||
value
|
||||
.unpack_str()
|
||||
.ok_or_else(|| {
|
||||
Error::InvalidExample(format!(
|
||||
"example tokens must be strings (got {})",
|
||||
value.get_type()
|
||||
))
|
||||
})
|
||||
.map(str::to_string)
|
||||
})
|
||||
.collect::<Result<_>>()?;
|
||||
|
||||
if tokens.is_empty() {
|
||||
Err(Error::InvalidExample(
|
||||
"example cannot be an empty list".to_string(),
|
||||
))
|
||||
} else {
|
||||
Ok(tokens)
|
||||
}
|
||||
}
|
||||
|
||||
fn policy_builder<'v, 'a>(eval: &Evaluator<'v, 'a, '_>) -> RefMut<'a, PolicyBuilder> {
|
||||
#[expect(clippy::expect_used)]
|
||||
eval.extra
|
||||
.as_ref()
|
||||
.expect("policy_builder requires Evaluator.extra to be populated")
|
||||
.downcast_ref::<RefCell<PolicyBuilder>>()
|
||||
.expect("Evaluator.extra must contain a PolicyBuilder")
|
||||
.borrow_mut()
|
||||
}
|
||||
|
||||
#[starlark_module]
|
||||
fn policy_builtins(builder: &mut GlobalsBuilder) {
|
||||
fn prefix_rule<'v>(
|
||||
pattern: UnpackList<Value<'v>>,
|
||||
decision: Option<&'v str>,
|
||||
r#match: Option<UnpackList<Value<'v>>>,
|
||||
not_match: Option<UnpackList<Value<'v>>>,
|
||||
eval: &mut Evaluator<'v, '_, '_>,
|
||||
) -> anyhow::Result<NoneType> {
|
||||
let decision = match decision {
|
||||
Some(raw) => Decision::parse(raw)?,
|
||||
None => Decision::Allow,
|
||||
};
|
||||
|
||||
let pattern_tokens = parse_pattern(pattern)?;
|
||||
|
||||
let matches: Vec<Vec<String>> =
|
||||
r#match.map(parse_examples).transpose()?.unwrap_or_default();
|
||||
let not_matches: Vec<Vec<String>> = not_match
|
||||
.map(parse_examples)
|
||||
.transpose()?
|
||||
.unwrap_or_default();
|
||||
|
||||
let mut builder = policy_builder(eval);
|
||||
|
||||
let (first_token, remaining_tokens) = pattern_tokens
|
||||
.split_first()
|
||||
.ok_or_else(|| Error::InvalidPattern("pattern cannot be empty".to_string()))?;
|
||||
|
||||
let rest: Arc<[PatternToken]> = remaining_tokens.to_vec().into();
|
||||
|
||||
let rules: Vec<RuleRef> = first_token
|
||||
.alternatives()
|
||||
.iter()
|
||||
.map(|head| {
|
||||
Arc::new(PrefixRule {
|
||||
pattern: PrefixPattern {
|
||||
first: Arc::from(head.as_str()),
|
||||
rest: rest.clone(),
|
||||
},
|
||||
decision,
|
||||
}) as RuleRef
|
||||
})
|
||||
.collect();
|
||||
|
||||
validate_not_match_examples(&rules, ¬_matches)?;
|
||||
validate_match_examples(&rules, &matches)?;
|
||||
|
||||
rules.into_iter().for_each(|rule| builder.add_rule(rule));
|
||||
Ok(NoneType)
|
||||
}
|
||||
}
|
||||
58
codex-rs/execpolicy2/src/policy.rs
Normal file
58
codex-rs/execpolicy2/src/policy.rs
Normal file
@@ -0,0 +1,58 @@
|
||||
use crate::decision::Decision;
|
||||
use crate::rule::RuleMatch;
|
||||
use crate::rule::RuleRef;
|
||||
use multimap::MultiMap;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Policy {
|
||||
rules_by_program: MultiMap<String, RuleRef>,
|
||||
}
|
||||
|
||||
impl Policy {
|
||||
pub fn new(rules_by_program: MultiMap<String, RuleRef>) -> Self {
|
||||
Self { rules_by_program }
|
||||
}
|
||||
|
||||
pub fn rules(&self) -> &MultiMap<String, RuleRef> {
|
||||
&self.rules_by_program
|
||||
}
|
||||
|
||||
pub fn check(&self, cmd: &[String]) -> Evaluation {
|
||||
let rules = match cmd.first() {
|
||||
Some(first) => match self.rules_by_program.get_vec(first) {
|
||||
Some(rules) => rules,
|
||||
None => return Evaluation::NoMatch,
|
||||
},
|
||||
None => return Evaluation::NoMatch,
|
||||
};
|
||||
|
||||
let matched_rules: Vec<RuleMatch> =
|
||||
rules.iter().filter_map(|rule| rule.matches(cmd)).collect();
|
||||
match matched_rules.iter().map(RuleMatch::decision).max() {
|
||||
Some(decision) => Evaluation::Match {
|
||||
decision,
|
||||
matched_rules,
|
||||
},
|
||||
None => Evaluation::NoMatch,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub enum Evaluation {
|
||||
NoMatch,
|
||||
Match {
|
||||
decision: Decision,
|
||||
#[serde(rename = "matchedRules")]
|
||||
matched_rules: Vec<RuleMatch>,
|
||||
},
|
||||
}
|
||||
|
||||
impl Evaluation {
|
||||
pub fn is_match(&self) -> bool {
|
||||
matches!(self, Self::Match { .. })
|
||||
}
|
||||
}
|
||||
147
codex-rs/execpolicy2/src/rule.rs
Normal file
147
codex-rs/execpolicy2/src/rule.rs
Normal file
@@ -0,0 +1,147 @@
|
||||
use crate::decision::Decision;
|
||||
use crate::error::Error;
|
||||
use crate::error::Result;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use shlex::try_join;
|
||||
use std::any::Any;
|
||||
use std::fmt::Debug;
|
||||
use std::sync::Arc;
|
||||
|
||||
/// Matches a single command token, either a fixed string or one of several allowed alternatives.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub enum PatternToken {
|
||||
Single(String),
|
||||
Alts(Vec<String>),
|
||||
}
|
||||
|
||||
impl PatternToken {
|
||||
fn matches(&self, token: &str) -> bool {
|
||||
match self {
|
||||
Self::Single(expected) => expected == token,
|
||||
Self::Alts(alternatives) => alternatives.iter().any(|alt| alt == token),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn alternatives(&self) -> &[String] {
|
||||
match self {
|
||||
Self::Single(expected) => std::slice::from_ref(expected),
|
||||
Self::Alts(alternatives) => alternatives,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Prefix matcher for commands with support for alternative match tokens.
|
||||
/// First token is fixed since we key by the first token in policy.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub struct PrefixPattern {
|
||||
pub first: Arc<str>,
|
||||
pub rest: Arc<[PatternToken]>,
|
||||
}
|
||||
|
||||
impl PrefixPattern {
|
||||
pub fn matches_prefix(&self, cmd: &[String]) -> Option<Vec<String>> {
|
||||
let pattern_length = self.rest.len() + 1;
|
||||
if cmd.len() < pattern_length || cmd[0] != self.first.as_ref() {
|
||||
return None;
|
||||
}
|
||||
|
||||
for (pattern_token, cmd_token) in self.rest.iter().zip(&cmd[1..pattern_length]) {
|
||||
if !pattern_token.matches(cmd_token) {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
Some(cmd[..pattern_length].to_vec())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub enum RuleMatch {
|
||||
PrefixRuleMatch {
|
||||
#[serde(rename = "matchedPrefix")]
|
||||
matched_prefix: Vec<String>,
|
||||
decision: Decision,
|
||||
},
|
||||
}
|
||||
|
||||
impl RuleMatch {
|
||||
pub fn decision(&self) -> Decision {
|
||||
match self {
|
||||
Self::PrefixRuleMatch { decision, .. } => *decision,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub struct PrefixRule {
|
||||
pub pattern: PrefixPattern,
|
||||
pub decision: Decision,
|
||||
}
|
||||
|
||||
pub trait Rule: Any + Debug + Send + Sync {
|
||||
fn program(&self) -> &str;
|
||||
|
||||
fn matches(&self, cmd: &[String]) -> Option<RuleMatch>;
|
||||
}
|
||||
|
||||
pub type RuleRef = Arc<dyn Rule>;
|
||||
|
||||
impl Rule for PrefixRule {
|
||||
fn program(&self) -> &str {
|
||||
self.pattern.first.as_ref()
|
||||
}
|
||||
|
||||
fn matches(&self, cmd: &[String]) -> Option<RuleMatch> {
|
||||
self.pattern
|
||||
.matches_prefix(cmd)
|
||||
.map(|matched_prefix| RuleMatch::PrefixRuleMatch {
|
||||
matched_prefix,
|
||||
decision: self.decision,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Count how many rules match each provided example and error if any example is unmatched.
|
||||
pub(crate) fn validate_match_examples(rules: &[RuleRef], matches: &[Vec<String>]) -> Result<()> {
|
||||
let mut unmatched_examples = Vec::new();
|
||||
|
||||
for example in matches {
|
||||
if rules.iter().any(|rule| rule.matches(example).is_some()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
unmatched_examples.push(
|
||||
try_join(example.iter().map(String::as_str))
|
||||
.unwrap_or_else(|_| "unable to render example".to_string()),
|
||||
);
|
||||
}
|
||||
|
||||
if unmatched_examples.is_empty() {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(Error::ExampleDidNotMatch {
|
||||
rules: rules.iter().map(|rule| format!("{rule:?}")).collect(),
|
||||
examples: unmatched_examples,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Ensure that no rule matches any provided negative example.
|
||||
pub(crate) fn validate_not_match_examples(
|
||||
rules: &[RuleRef],
|
||||
not_matches: &[Vec<String>],
|
||||
) -> Result<()> {
|
||||
for example in not_matches {
|
||||
if let Some(rule) = rules.iter().find(|rule| rule.matches(example).is_some()) {
|
||||
return Err(Error::ExampleDidMatch {
|
||||
rule: format!("{rule:?}"),
|
||||
example: try_join(example.iter().map(String::as_str))
|
||||
.unwrap_or_else(|_| "unable to render example".to_string()),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
256
codex-rs/execpolicy2/tests/basic.rs
Normal file
256
codex-rs/execpolicy2/tests/basic.rs
Normal file
@@ -0,0 +1,256 @@
|
||||
use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
|
||||
use codex_execpolicy2::Decision;
|
||||
use codex_execpolicy2::Evaluation;
|
||||
use codex_execpolicy2::PolicyParser;
|
||||
use codex_execpolicy2::RuleMatch;
|
||||
use codex_execpolicy2::RuleRef;
|
||||
use codex_execpolicy2::rule::PatternToken;
|
||||
use codex_execpolicy2::rule::PrefixPattern;
|
||||
use codex_execpolicy2::rule::PrefixRule;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
fn tokens(cmd: &[&str]) -> Vec<String> {
|
||||
cmd.iter().map(std::string::ToString::to_string).collect()
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
enum RuleSnapshot {
|
||||
Prefix(PrefixRule),
|
||||
}
|
||||
|
||||
fn rule_snapshots(rules: &[RuleRef]) -> Vec<RuleSnapshot> {
|
||||
rules
|
||||
.iter()
|
||||
.map(|rule| {
|
||||
let rule_any = rule.as_ref() as &dyn Any;
|
||||
if let Some(prefix_rule) = rule_any.downcast_ref::<PrefixRule>() {
|
||||
RuleSnapshot::Prefix(prefix_rule.clone())
|
||||
} else {
|
||||
panic!("unexpected rule type in RuleRef: {rule:?}");
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn basic_match() {
|
||||
let policy_src = r#"
|
||||
prefix_rule(
|
||||
pattern = ["git", "status"],
|
||||
)
|
||||
"#;
|
||||
let policy = PolicyParser::parse("test.codexpolicy", policy_src).expect("parse policy");
|
||||
let cmd = tokens(&["git", "status"]);
|
||||
let evaluation = policy.check(&cmd);
|
||||
assert_eq!(
|
||||
Evaluation::Match {
|
||||
decision: Decision::Allow,
|
||||
matched_rules: vec![RuleMatch::PrefixRuleMatch {
|
||||
matched_prefix: tokens(&["git", "status"]),
|
||||
decision: Decision::Allow,
|
||||
}],
|
||||
},
|
||||
evaluation
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn only_first_token_alias_expands_to_multiple_rules() {
|
||||
let policy_src = r#"
|
||||
prefix_rule(
|
||||
pattern = [["bash", "sh"], ["-c", "-l"]],
|
||||
)
|
||||
"#;
|
||||
let policy = PolicyParser::parse("test.codexpolicy", policy_src).expect("parse policy");
|
||||
|
||||
let bash_rules = rule_snapshots(policy.rules().get_vec("bash").expect("bash rules"));
|
||||
let sh_rules = rule_snapshots(policy.rules().get_vec("sh").expect("sh rules"));
|
||||
assert_eq!(
|
||||
vec![RuleSnapshot::Prefix(PrefixRule {
|
||||
pattern: PrefixPattern {
|
||||
first: Arc::from("bash"),
|
||||
rest: vec![PatternToken::Alts(vec!["-c".to_string(), "-l".to_string()])].into(),
|
||||
},
|
||||
decision: Decision::Allow,
|
||||
})],
|
||||
bash_rules
|
||||
);
|
||||
assert_eq!(
|
||||
vec![RuleSnapshot::Prefix(PrefixRule {
|
||||
pattern: PrefixPattern {
|
||||
first: Arc::from("sh"),
|
||||
rest: vec![PatternToken::Alts(vec!["-c".to_string(), "-l".to_string()])].into(),
|
||||
},
|
||||
decision: Decision::Allow,
|
||||
})],
|
||||
sh_rules
|
||||
);
|
||||
|
||||
let bash_eval = policy.check(&tokens(&["bash", "-c", "echo", "hi"]));
|
||||
assert_eq!(
|
||||
Evaluation::Match {
|
||||
decision: Decision::Allow,
|
||||
matched_rules: vec![RuleMatch::PrefixRuleMatch {
|
||||
matched_prefix: tokens(&["bash", "-c"]),
|
||||
decision: Decision::Allow,
|
||||
}],
|
||||
},
|
||||
bash_eval
|
||||
);
|
||||
|
||||
let sh_eval = policy.check(&tokens(&["sh", "-l", "echo", "hi"]));
|
||||
assert_eq!(
|
||||
Evaluation::Match {
|
||||
decision: Decision::Allow,
|
||||
matched_rules: vec![RuleMatch::PrefixRuleMatch {
|
||||
matched_prefix: tokens(&["sh", "-l"]),
|
||||
decision: Decision::Allow,
|
||||
}],
|
||||
},
|
||||
sh_eval
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tail_aliases_are_not_cartesian_expanded() {
|
||||
let policy_src = r#"
|
||||
prefix_rule(
|
||||
pattern = ["npm", ["i", "install"], ["--legacy-peer-deps", "--no-save"]],
|
||||
)
|
||||
"#;
|
||||
let policy = PolicyParser::parse("test.codexpolicy", policy_src).expect("parse policy");
|
||||
|
||||
let rules = rule_snapshots(policy.rules().get_vec("npm").expect("npm rules"));
|
||||
assert_eq!(
|
||||
vec![RuleSnapshot::Prefix(PrefixRule {
|
||||
pattern: PrefixPattern {
|
||||
first: Arc::from("npm"),
|
||||
rest: vec![
|
||||
PatternToken::Alts(vec!["i".to_string(), "install".to_string()]),
|
||||
PatternToken::Alts(vec![
|
||||
"--legacy-peer-deps".to_string(),
|
||||
"--no-save".to_string(),
|
||||
]),
|
||||
]
|
||||
.into(),
|
||||
},
|
||||
decision: Decision::Allow,
|
||||
})],
|
||||
rules
|
||||
);
|
||||
|
||||
let npm_i = policy.check(&tokens(&["npm", "i", "--legacy-peer-deps"]));
|
||||
assert_eq!(
|
||||
Evaluation::Match {
|
||||
decision: Decision::Allow,
|
||||
matched_rules: vec![RuleMatch::PrefixRuleMatch {
|
||||
matched_prefix: tokens(&["npm", "i", "--legacy-peer-deps"]),
|
||||
decision: Decision::Allow,
|
||||
}],
|
||||
},
|
||||
npm_i
|
||||
);
|
||||
|
||||
let npm_install = policy.check(&tokens(&["npm", "install", "--no-save", "leftpad"]));
|
||||
assert_eq!(
|
||||
Evaluation::Match {
|
||||
decision: Decision::Allow,
|
||||
matched_rules: vec![RuleMatch::PrefixRuleMatch {
|
||||
matched_prefix: tokens(&["npm", "install", "--no-save"]),
|
||||
decision: Decision::Allow,
|
||||
}],
|
||||
},
|
||||
npm_install
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn match_and_not_match_examples_are_enforced() {
|
||||
let policy_src = r#"
|
||||
prefix_rule(
|
||||
pattern = ["git", "status"],
|
||||
match = [["git", "status"], "git status"],
|
||||
not_match = [
|
||||
["git", "--config", "color.status=always", "status"],
|
||||
"git --config color.status=always status",
|
||||
],
|
||||
)
|
||||
"#;
|
||||
let policy = PolicyParser::parse("test.codexpolicy", policy_src).expect("parse policy");
|
||||
let match_eval = policy.check(&tokens(&["git", "status"]));
|
||||
assert_eq!(
|
||||
Evaluation::Match {
|
||||
decision: Decision::Allow,
|
||||
matched_rules: vec![RuleMatch::PrefixRuleMatch {
|
||||
matched_prefix: tokens(&["git", "status"]),
|
||||
decision: Decision::Allow,
|
||||
}],
|
||||
},
|
||||
match_eval
|
||||
);
|
||||
|
||||
let no_match_eval = policy.check(&tokens(&[
|
||||
"git",
|
||||
"--config",
|
||||
"color.status=always",
|
||||
"status",
|
||||
]));
|
||||
assert_eq!(Evaluation::NoMatch, no_match_eval);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strictest_decision_wins_across_matches() {
|
||||
let policy_src = r#"
|
||||
prefix_rule(
|
||||
pattern = ["git", "status"],
|
||||
decision = "allow",
|
||||
)
|
||||
prefix_rule(
|
||||
pattern = ["git"],
|
||||
decision = "prompt",
|
||||
)
|
||||
prefix_rule(
|
||||
pattern = ["git", "commit"],
|
||||
decision = "forbidden",
|
||||
)
|
||||
"#;
|
||||
let policy = PolicyParser::parse("test.codexpolicy", policy_src).expect("parse policy");
|
||||
|
||||
let status = policy.check(&tokens(&["git", "status"]));
|
||||
assert_eq!(
|
||||
Evaluation::Match {
|
||||
decision: Decision::Prompt,
|
||||
matched_rules: vec![
|
||||
RuleMatch::PrefixRuleMatch {
|
||||
matched_prefix: tokens(&["git", "status"]),
|
||||
decision: Decision::Allow,
|
||||
},
|
||||
RuleMatch::PrefixRuleMatch {
|
||||
matched_prefix: tokens(&["git"]),
|
||||
decision: Decision::Prompt,
|
||||
},
|
||||
],
|
||||
},
|
||||
status
|
||||
);
|
||||
|
||||
let commit = policy.check(&tokens(&["git", "commit", "-m", "hi"]));
|
||||
assert_eq!(
|
||||
Evaluation::Match {
|
||||
decision: Decision::Forbidden,
|
||||
matched_rules: vec![
|
||||
RuleMatch::PrefixRuleMatch {
|
||||
matched_prefix: tokens(&["git"]),
|
||||
decision: Decision::Prompt,
|
||||
},
|
||||
RuleMatch::PrefixRuleMatch {
|
||||
matched_prefix: tokens(&["git", "commit"]),
|
||||
decision: Decision::Forbidden,
|
||||
},
|
||||
],
|
||||
},
|
||||
commit
|
||||
);
|
||||
}
|
||||
@@ -7,10 +7,17 @@ version = { workspace = true }
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
keyring = { workspace = true, features = [
|
||||
"apple-native",
|
||||
"crypto-rust",
|
||||
"linux-native-async-persistent",
|
||||
"windows-native",
|
||||
] }
|
||||
keyring = { workspace = true, features = ["crypto-rust"] }
|
||||
tracing = { workspace = true }
|
||||
|
||||
[target.'cfg(target_os = "linux")'.dependencies]
|
||||
keyring = { workspace = true, features = ["linux-native-async-persistent"] }
|
||||
|
||||
[target.'cfg(target_os = "macos")'.dependencies]
|
||||
keyring = { workspace = true, features = ["apple-native"] }
|
||||
|
||||
[target.'cfg(target_os = "windows")'.dependencies]
|
||||
keyring = { workspace = true, features = ["windows-native"] }
|
||||
|
||||
[target.'cfg(any(target_os = "freebsd", target_os = "openbsd"))'.dependencies]
|
||||
keyring = { workspace = true, features = ["sync-secret-service"] }
|
||||
|
||||
@@ -11,11 +11,4 @@ path = "src/lib.rs"
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
[target.'cfg(target_os = "linux")'.dependencies]
|
||||
libc = { workspace = true }
|
||||
|
||||
[target.'cfg(target_os = "android")'.dependencies]
|
||||
libc = { workspace = true }
|
||||
|
||||
[target.'cfg(target_os = "macos")'.dependencies]
|
||||
libc = { workspace = true }
|
||||
|
||||
@@ -10,6 +10,10 @@ pub fn pre_main_hardening() {
|
||||
#[cfg(target_os = "macos")]
|
||||
pre_main_hardening_macos();
|
||||
|
||||
// On FreeBSD and OpenBSD, apply similar hardening to Linux/macOS:
|
||||
#[cfg(any(target_os = "freebsd", target_os = "openbsd"))]
|
||||
pre_main_hardening_bsd();
|
||||
|
||||
#[cfg(windows)]
|
||||
pre_main_hardening_windows();
|
||||
}
|
||||
@@ -20,7 +24,13 @@ const PRCTL_FAILED_EXIT_CODE: i32 = 5;
|
||||
#[cfg(target_os = "macos")]
|
||||
const PTRACE_DENY_ATTACH_FAILED_EXIT_CODE: i32 = 6;
|
||||
|
||||
#[cfg(any(target_os = "linux", target_os = "android", target_os = "macos"))]
|
||||
#[cfg(any(
|
||||
target_os = "linux",
|
||||
target_os = "android",
|
||||
target_os = "macos",
|
||||
target_os = "freebsd",
|
||||
target_os = "openbsd"
|
||||
))]
|
||||
const SET_RLIMIT_CORE_FAILED_EXIT_CODE: i32 = 7;
|
||||
|
||||
#[cfg(any(target_os = "linux", target_os = "android"))]
|
||||
@@ -57,6 +67,27 @@ pub(crate) fn pre_main_hardening_linux() {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(any(target_os = "freebsd", target_os = "openbsd"))]
|
||||
pub(crate) fn pre_main_hardening_bsd() {
|
||||
// FreeBSD/OpenBSD: set RLIMIT_CORE to 0 and clear LD_* env vars
|
||||
set_core_file_size_limit_to_zero();
|
||||
|
||||
let ld_keys: Vec<String> = std::env::vars()
|
||||
.filter_map(|(key, _)| {
|
||||
if key.starts_with("LD_") {
|
||||
Some(key)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
for key in ld_keys {
|
||||
unsafe {
|
||||
std::env::remove_var(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_os = "macos")]
|
||||
pub(crate) fn pre_main_hardening_macos() {
|
||||
// Prevent debuggers from attaching to this process.
|
||||
|
||||
@@ -158,6 +158,19 @@ fn local_image_error_placeholder(
|
||||
}
|
||||
}
|
||||
|
||||
fn invalid_image_error_placeholder(
|
||||
path: &std::path::Path,
|
||||
error: impl std::fmt::Display,
|
||||
) -> ContentItem {
|
||||
ContentItem::InputText {
|
||||
text: format!(
|
||||
"Image located at `{}` is invalid: {}",
|
||||
path.display(),
|
||||
error
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ResponseInputItem> for ResponseItem {
|
||||
fn from(item: ResponseInputItem) -> Self {
|
||||
match item {
|
||||
@@ -247,9 +260,10 @@ impl From<Vec<UserInput>> for ResponseInputItem {
|
||||
image_url: image.into_data_url(),
|
||||
},
|
||||
Err(err) => {
|
||||
tracing::warn!("Failed to resize image {}: {}", path.display(), err);
|
||||
if matches!(&err, ImageProcessingError::Read { .. }) {
|
||||
local_image_error_placeholder(&path, &err)
|
||||
} else if err.is_invalid_image() {
|
||||
invalid_image_error_placeholder(&path, &err)
|
||||
} else {
|
||||
match std::fs::read(&path) {
|
||||
Ok(bytes) => {
|
||||
@@ -365,6 +379,7 @@ impl Serialize for FunctionCallOutputPayload {
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
tracing::error!("Payload: {:?}", self);
|
||||
if let Some(items) = &self.content_items {
|
||||
items.serialize(serializer)
|
||||
} else {
|
||||
@@ -452,7 +467,7 @@ fn convert_content_blocks_to_items(
|
||||
) -> Option<Vec<FunctionCallOutputContentItem>> {
|
||||
let mut saw_image = false;
|
||||
let mut items = Vec::with_capacity(blocks.len());
|
||||
|
||||
tracing::warn!("Blocks: {:?}", blocks);
|
||||
for block in blocks {
|
||||
match block {
|
||||
ContentBlock::TextContent(text) => {
|
||||
|
||||
@@ -16,12 +16,7 @@ codex-keyring-store = { workspace = true }
|
||||
codex-protocol = { workspace = true }
|
||||
dirs = { workspace = true }
|
||||
futures = { workspace = true, default-features = false, features = ["std"] }
|
||||
keyring = { workspace = true, features = [
|
||||
"apple-native",
|
||||
"crypto-rust",
|
||||
"linux-native-async-persistent",
|
||||
"windows-native",
|
||||
] }
|
||||
keyring = { workspace = true, features = ["crypto-rust"] }
|
||||
mcp-types = { path = "../mcp-types" }
|
||||
oauth2 = "5"
|
||||
reqwest = { version = "0.12", default-features = false, features = [
|
||||
@@ -56,9 +51,21 @@ tokio = { workspace = true, features = [
|
||||
tracing = { workspace = true, features = ["log"] }
|
||||
urlencoding = { workspace = true }
|
||||
webbrowser = { workspace = true }
|
||||
which = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
escargot = { workspace = true }
|
||||
pretty_assertions = { workspace = true }
|
||||
serial_test = { workspace = true }
|
||||
tempfile = { workspace = true }
|
||||
[target.'cfg(target_os = "linux")'.dependencies]
|
||||
keyring = { workspace = true, features = ["linux-native-async-persistent"] }
|
||||
|
||||
[target.'cfg(target_os = "macos")'.dependencies]
|
||||
keyring = { workspace = true, features = ["apple-native"] }
|
||||
|
||||
[target.'cfg(target_os = "windows")'.dependencies]
|
||||
keyring = { workspace = true, features = ["windows-native"] }
|
||||
|
||||
[target.'cfg(any(target_os = "freebsd", target_os = "openbsd"))'.dependencies]
|
||||
keyring = { workspace = true, features = ["sync-secret-service"] }
|
||||
|
||||
@@ -3,6 +3,7 @@ mod find_codex_home;
|
||||
mod logging_client_handler;
|
||||
mod oauth;
|
||||
mod perform_oauth_login;
|
||||
mod program_resolver;
|
||||
mod rmcp_client;
|
||||
mod utils;
|
||||
|
||||
|
||||
222
codex-rs/rmcp-client/src/program_resolver.rs
Normal file
222
codex-rs/rmcp-client/src/program_resolver.rs
Normal file
@@ -0,0 +1,222 @@
|
||||
//! Platform-specific program resolution for MCP server execution.
|
||||
//!
|
||||
//! This module provides a unified interface for resolving executable paths
|
||||
//! across different operating systems. The key challenge it addresses is that
|
||||
//! Windows cannot execute script files (e.g., `.cmd`, `.bat`) directly through
|
||||
//! `Command::new()` without their file extensions, while Unix systems handle
|
||||
//! scripts natively through shebangs.
|
||||
//!
|
||||
//! The `resolve` function abstracts these platform differences:
|
||||
//! - On Unix: Returns the program unchanged (OS handles script execution)
|
||||
//! - On Windows: Uses the `which` crate to resolve full paths including extensions
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::ffi::OsString;
|
||||
|
||||
#[cfg(windows)]
|
||||
use std::env;
|
||||
#[cfg(windows)]
|
||||
use tracing::debug;
|
||||
|
||||
/// Resolves a program to its executable path on Unix systems.
|
||||
///
|
||||
/// Unix systems handle PATH resolution and script execution natively through
|
||||
/// the kernel's shebang (`#!`) mechanism, so this function simply returns
|
||||
/// the program name unchanged.
|
||||
#[cfg(unix)]
|
||||
pub fn resolve(program: OsString, _env: &HashMap<String, String>) -> std::io::Result<OsString> {
|
||||
Ok(program)
|
||||
}
|
||||
|
||||
/// Resolves a program to its executable path on Windows systems.
|
||||
///
|
||||
/// Windows requires explicit file extensions for script execution. This function
|
||||
/// uses the `which` crate to search the `PATH` environment variable and find
|
||||
/// the full path to the executable, including necessary script extensions
|
||||
/// (`.cmd`, `.bat`, etc.) defined in `PATHEXT`.
|
||||
///
|
||||
/// This enables tools like `npx`, `pnpm`, and `yarn` to work correctly on Windows
|
||||
/// without requiring users to specify full paths or extensions in their configuration.
|
||||
#[cfg(windows)]
|
||||
pub fn resolve(program: OsString, env: &HashMap<String, String>) -> std::io::Result<OsString> {
|
||||
// Get current directory for relative path resolution
|
||||
let cwd = env::current_dir()
|
||||
.map_err(|e| std::io::Error::other(format!("Failed to get current directory: {e}")))?;
|
||||
|
||||
// Extract PATH from environment for search locations
|
||||
let search_path = env.get("PATH");
|
||||
|
||||
// Attempt resolution via which crate
|
||||
match which::which_in(&program, search_path, &cwd) {
|
||||
Ok(resolved) => {
|
||||
debug!("Resolved {:?} to {:?}", program, resolved);
|
||||
Ok(resolved.into_os_string())
|
||||
}
|
||||
Err(e) => {
|
||||
debug!(
|
||||
"Failed to resolve {:?}: {}. Using original path",
|
||||
program, e
|
||||
);
|
||||
// Fallback to original program - let Command::new() handle the error
|
||||
Ok(program)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::utils::create_env_for_mcp_server;
|
||||
use anyhow::Result;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use tempfile::TempDir;
|
||||
use tokio::process::Command;
|
||||
|
||||
/// Unix: Verifies the OS handles script execution without file extensions.
|
||||
#[cfg(unix)]
|
||||
#[tokio::test]
|
||||
async fn test_unix_executes_script_without_extension() -> Result<()> {
|
||||
let env = TestExecutableEnv::new()?;
|
||||
let mut cmd = Command::new(&env.program_name);
|
||||
cmd.envs(&env.mcp_env);
|
||||
|
||||
let output = cmd.output().await;
|
||||
assert!(output.is_ok(), "Unix should execute scripts directly");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Windows: Verifies scripts fail to execute without the proper extension.
|
||||
#[cfg(windows)]
|
||||
#[tokio::test]
|
||||
async fn test_windows_fails_without_extension() -> Result<()> {
|
||||
let env = TestExecutableEnv::new()?;
|
||||
let mut cmd = Command::new(&env.program_name);
|
||||
cmd.envs(&env.mcp_env);
|
||||
|
||||
let output = cmd.output().await;
|
||||
assert!(
|
||||
output.is_err(),
|
||||
"Windows requires .cmd/.bat extension for direct execution"
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Windows: Verifies scripts with an explicit extension execute correctly.
|
||||
#[cfg(windows)]
|
||||
#[tokio::test]
|
||||
async fn test_windows_succeeds_with_extension() -> Result<()> {
|
||||
let env = TestExecutableEnv::new()?;
|
||||
// Append the `.cmd` extension to the program name
|
||||
let program_with_ext = format!("{}.cmd", env.program_name);
|
||||
let mut cmd = Command::new(&program_with_ext);
|
||||
cmd.envs(&env.mcp_env);
|
||||
|
||||
let output = cmd.output().await;
|
||||
assert!(
|
||||
output.is_ok(),
|
||||
"Windows should execute scripts when the extension is provided"
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Verifies program resolution enables successful execution on all platforms.
|
||||
#[tokio::test]
|
||||
async fn test_resolved_program_executes_successfully() -> Result<()> {
|
||||
let env = TestExecutableEnv::new()?;
|
||||
let program = OsString::from(&env.program_name);
|
||||
|
||||
// Apply platform-specific resolution
|
||||
let resolved = resolve(program, &env.mcp_env)?;
|
||||
|
||||
// Verify resolved path executes successfully
|
||||
let mut cmd = Command::new(resolved);
|
||||
cmd.envs(&env.mcp_env);
|
||||
let output = cmd.output().await;
|
||||
|
||||
assert!(
|
||||
output.is_ok(),
|
||||
"Resolved program should execute successfully"
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Test fixture for creating temporary executables in a controlled environment.
|
||||
struct TestExecutableEnv {
|
||||
// Held to prevent the temporary directory from being deleted.
|
||||
_temp_dir: TempDir,
|
||||
program_name: String,
|
||||
mcp_env: HashMap<String, String>,
|
||||
}
|
||||
|
||||
impl TestExecutableEnv {
|
||||
const TEST_PROGRAM: &'static str = "test_mcp_server";
|
||||
|
||||
fn new() -> Result<Self> {
|
||||
let temp_dir = TempDir::new()?;
|
||||
let dir_path = temp_dir.path();
|
||||
|
||||
Self::create_executable(dir_path)?;
|
||||
|
||||
// Build a clean environment with the temp dir in the PATH.
|
||||
let mut extra_env = HashMap::new();
|
||||
extra_env.insert("PATH".to_string(), Self::build_path(dir_path));
|
||||
|
||||
#[cfg(windows)]
|
||||
extra_env.insert("PATHEXT".to_string(), Self::ensure_cmd_extension());
|
||||
|
||||
let mcp_env = create_env_for_mcp_server(Some(extra_env), &[]);
|
||||
|
||||
Ok(Self {
|
||||
_temp_dir: temp_dir,
|
||||
program_name: Self::TEST_PROGRAM.to_string(),
|
||||
mcp_env,
|
||||
})
|
||||
}
|
||||
|
||||
/// Creates a simple, platform-specific executable script.
|
||||
fn create_executable(dir: &Path) -> Result<()> {
|
||||
#[cfg(windows)]
|
||||
{
|
||||
let file = dir.join(format!("{}.cmd", Self::TEST_PROGRAM));
|
||||
fs::write(&file, "@echo off\nexit 0")?;
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
{
|
||||
let file = dir.join(Self::TEST_PROGRAM);
|
||||
fs::write(&file, "#!/bin/sh\nexit 0")?;
|
||||
Self::set_executable(&file)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
fn set_executable(path: &Path) -> Result<()> {
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
let mut perms = fs::metadata(path)?.permissions();
|
||||
perms.set_mode(0o755);
|
||||
fs::set_permissions(path, perms)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Prepends the given directory to the system's PATH variable.
|
||||
fn build_path(dir: &Path) -> String {
|
||||
let current = std::env::var("PATH").unwrap_or_default();
|
||||
let sep = if cfg!(windows) { ";" } else { ":" };
|
||||
format!("{}{sep}{current}", dir.to_string_lossy())
|
||||
}
|
||||
|
||||
/// Ensures `.CMD` is in the `PATHEXT` variable on Windows for script discovery.
|
||||
#[cfg(windows)]
|
||||
fn ensure_cmd_extension() -> String {
|
||||
let current = std::env::var("PATHEXT").unwrap_or_default();
|
||||
if current.to_uppercase().contains(".CMD") {
|
||||
current
|
||||
} else {
|
||||
format!(".CMD;{current}")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -47,6 +47,7 @@ use crate::logging_client_handler::LoggingClientHandler;
|
||||
use crate::oauth::OAuthCredentialsStoreMode;
|
||||
use crate::oauth::OAuthPersistor;
|
||||
use crate::oauth::StoredOAuthTokens;
|
||||
use crate::program_resolver;
|
||||
use crate::utils::apply_default_headers;
|
||||
use crate::utils::build_default_headers;
|
||||
use crate::utils::convert_call_tool_result;
|
||||
@@ -91,13 +92,20 @@ impl RmcpClient {
|
||||
cwd: Option<PathBuf>,
|
||||
) -> io::Result<Self> {
|
||||
let program_name = program.to_string_lossy().into_owned();
|
||||
let mut command = Command::new(&program);
|
||||
|
||||
// Build environment for program resolution and subprocess
|
||||
let envs = create_env_for_mcp_server(env, env_vars);
|
||||
|
||||
// Resolve program to executable path (platform-specific)
|
||||
let resolved_program = program_resolver::resolve(program, &envs)?;
|
||||
|
||||
let mut command = Command::new(resolved_program);
|
||||
command
|
||||
.kill_on_drop(true)
|
||||
.stdin(Stdio::piped())
|
||||
.stdout(Stdio::piped())
|
||||
.env_clear()
|
||||
.envs(create_env_for_mcp_server(env, env_vars))
|
||||
.envs(envs)
|
||||
.args(&args);
|
||||
if let Some(cwd) = cwd {
|
||||
command.current_dir(cwd);
|
||||
|
||||
@@ -29,6 +29,7 @@ clap = { workspace = true, features = ["derive"] }
|
||||
codex-ansi-escape = { workspace = true }
|
||||
codex-app-server-protocol = { workspace = true }
|
||||
codex-arg0 = { workspace = true }
|
||||
codex-backend-client = { workspace = true }
|
||||
codex-common = { workspace = true, features = [
|
||||
"cli",
|
||||
"elapsed",
|
||||
|
||||
@@ -496,6 +496,12 @@ impl App {
|
||||
AppEvent::FileSearchResult { query, matches } => {
|
||||
self.chat_widget.apply_file_search_result(query, matches);
|
||||
}
|
||||
AppEvent::RateLimitSnapshotFetched(status) => {
|
||||
self.chat_widget
|
||||
.on_rate_limit_snapshot(Some(status.snapshot.clone()));
|
||||
self.chat_widget
|
||||
.reconcile_plan_type_from_usage(status.plan_type);
|
||||
}
|
||||
AppEvent::UpdateReasoningEffort(effort) => {
|
||||
self.on_update_reasoning_effort(effort);
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
use codex_backend_client::RateLimitStatus;
|
||||
use codex_common::approval_presets::ApprovalPreset;
|
||||
use codex_common::model_presets::ModelPreset;
|
||||
use codex_core::protocol::ConversationPathResponseEvent;
|
||||
@@ -41,6 +42,9 @@ pub(crate) enum AppEvent {
|
||||
matches: Vec<FileMatch>,
|
||||
},
|
||||
|
||||
/// Result of refreshing rate limits
|
||||
RateLimitSnapshotFetched(RateLimitStatus),
|
||||
|
||||
/// Result of computing a `/diff` command.
|
||||
DiffResult(String),
|
||||
|
||||
|
||||
@@ -247,6 +247,16 @@ impl TextArea {
|
||||
} if modifiers == (KeyModifiers::CONTROL | KeyModifiers::ALT) => {
|
||||
self.delete_backward_word()
|
||||
},
|
||||
KeyEvent {
|
||||
code: KeyCode::Char(c),
|
||||
modifiers,
|
||||
..
|
||||
} if modifiers.contains(KeyModifiers::ALT)
|
||||
&& modifiers.contains(KeyModifiers::CONTROL) =>
|
||||
{
|
||||
// AltGr on many keyboards reports as Ctrl+Alt; treat it as a literal char.
|
||||
self.insert_str(&c.to_string());
|
||||
},
|
||||
KeyEvent {
|
||||
code: KeyCode::Backspace,
|
||||
modifiers: KeyModifiers::ALT,
|
||||
@@ -1454,6 +1464,17 @@ mod tests {
|
||||
assert_eq!(t.cursor(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn altgr_ctrl_alt_char_inserts_literal() {
|
||||
let mut t = ta_with("");
|
||||
t.input(KeyEvent::new(
|
||||
KeyCode::Char('c'),
|
||||
KeyModifiers::CONTROL | KeyModifiers::ALT,
|
||||
));
|
||||
assert_eq!(t.text(), "c");
|
||||
assert_eq!(t.cursor(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cursor_vertical_movement_across_lines_and_bounds() {
|
||||
let mut t = ta_with("short\nloooooooooong\nmid");
|
||||
|
||||
@@ -2,7 +2,11 @@ use std::collections::HashMap;
|
||||
use std::collections::VecDeque;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use codex_app_server_protocol::AuthMode;
|
||||
use codex_backend_client::Client as BackendClient;
|
||||
use codex_backend_client::RateLimitStatus;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::config::types::Notifications;
|
||||
use codex_core::git_info::current_branch_name;
|
||||
@@ -62,6 +66,7 @@ use ratatui::text::Line;
|
||||
use ratatui::widgets::Paragraph;
|
||||
use ratatui::widgets::Wrap;
|
||||
use tokio::sync::mpsc::UnboundedSender;
|
||||
use tokio::task::JoinHandle;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::app_event::AppEvent;
|
||||
@@ -116,6 +121,7 @@ use codex_common::approval_presets::builtin_approval_presets;
|
||||
use codex_common::model_presets::ModelPreset;
|
||||
use codex_common::model_presets::builtin_model_presets;
|
||||
use codex_core::AuthManager;
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::ConversationManager;
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
@@ -255,6 +261,8 @@ pub(crate) struct ChatWidget {
|
||||
rate_limit_snapshot: Option<RateLimitSnapshotDisplay>,
|
||||
rate_limit_warnings: RateLimitWarningState,
|
||||
rate_limit_switch_prompt: RateLimitSwitchPromptState,
|
||||
rate_limit_poller: Option<JoinHandle<()>>,
|
||||
last_plan_refresh_attempt: Option<String>,
|
||||
// Stream lifecycle controller
|
||||
stream_controller: Option<StreamController>,
|
||||
running_commands: HashMap<String, RunningCommand>,
|
||||
@@ -494,7 +502,7 @@ impl ChatWidget {
|
||||
}
|
||||
}
|
||||
|
||||
fn on_rate_limit_snapshot(&mut self, snapshot: Option<RateLimitSnapshot>) {
|
||||
pub(crate) fn on_rate_limit_snapshot(&mut self, snapshot: Option<RateLimitSnapshot>) {
|
||||
if let Some(snapshot) = snapshot {
|
||||
let warnings = self.rate_limit_warnings.take_warnings(
|
||||
snapshot
|
||||
@@ -547,6 +555,51 @@ impl ChatWidget {
|
||||
self.rate_limit_snapshot = None;
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn reconcile_plan_type_from_usage(&mut self, backend_plan: String) {
|
||||
let trimmed = backend_plan.trim();
|
||||
if trimmed.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
let plan_lower = trimmed.to_ascii_lowercase();
|
||||
let Some(auth) = self.auth_manager.auth() else {
|
||||
self.last_plan_refresh_attempt = None;
|
||||
return;
|
||||
};
|
||||
if auth.mode != AuthMode::ChatGPT {
|
||||
self.last_plan_refresh_attempt = None;
|
||||
return;
|
||||
}
|
||||
|
||||
let current_plan = auth
|
||||
.raw_plan_type()
|
||||
.map(|plan| plan.to_ascii_lowercase());
|
||||
|
||||
if current_plan
|
||||
.as_deref()
|
||||
.is_some_and(|plan| plan == plan_lower)
|
||||
{
|
||||
self.last_plan_refresh_attempt = None;
|
||||
return;
|
||||
}
|
||||
|
||||
if self
|
||||
.last_plan_refresh_attempt
|
||||
.as_deref()
|
||||
.is_some_and(|attempt| attempt == plan_lower)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
self.last_plan_refresh_attempt = Some(plan_lower);
|
||||
let auth_manager = self.auth_manager.clone();
|
||||
tokio::spawn(async move {
|
||||
if let Err(err) = auth_manager.refresh_token().await {
|
||||
debug!(?err, "failed to refresh auth after plan mismatch");
|
||||
}
|
||||
});
|
||||
}
|
||||
/// Finalize any active exec as failed and stop/clear running UI state.
|
||||
fn finalize_turn(&mut self) {
|
||||
// Ensure any spinner is replaced by a red ✗ and flushed into history.
|
||||
@@ -1034,7 +1087,7 @@ impl ChatWidget {
|
||||
let placeholder = EXAMPLE_PROMPTS[rng.random_range(0..EXAMPLE_PROMPTS.len())].to_string();
|
||||
let codex_op_tx = spawn_agent(config.clone(), app_event_tx.clone(), conversation_manager);
|
||||
|
||||
Self {
|
||||
let mut widget = Self {
|
||||
app_event_tx: app_event_tx.clone(),
|
||||
frame_requester: frame_requester.clone(),
|
||||
codex_op_tx,
|
||||
@@ -1058,6 +1111,8 @@ impl ChatWidget {
|
||||
rate_limit_snapshot: None,
|
||||
rate_limit_warnings: RateLimitWarningState::default(),
|
||||
rate_limit_switch_prompt: RateLimitSwitchPromptState::default(),
|
||||
rate_limit_poller: None,
|
||||
last_plan_refresh_attempt: None,
|
||||
stream_controller: None,
|
||||
running_commands: HashMap::new(),
|
||||
task_complete_pending: false,
|
||||
@@ -1076,7 +1131,11 @@ impl ChatWidget {
|
||||
last_rendered_width: std::cell::Cell::new(None),
|
||||
feedback,
|
||||
current_rollout_path: None,
|
||||
}
|
||||
};
|
||||
|
||||
widget.prefetch_rate_limits();
|
||||
|
||||
widget
|
||||
}
|
||||
|
||||
/// Create a ChatWidget attached to an existing conversation (e.g., a fork).
|
||||
@@ -1101,7 +1160,7 @@ impl ChatWidget {
|
||||
let codex_op_tx =
|
||||
spawn_agent_from_existing(conversation, session_configured, app_event_tx.clone());
|
||||
|
||||
Self {
|
||||
let mut widget = Self {
|
||||
app_event_tx: app_event_tx.clone(),
|
||||
frame_requester: frame_requester.clone(),
|
||||
codex_op_tx,
|
||||
@@ -1125,6 +1184,8 @@ impl ChatWidget {
|
||||
rate_limit_snapshot: None,
|
||||
rate_limit_warnings: RateLimitWarningState::default(),
|
||||
rate_limit_switch_prompt: RateLimitSwitchPromptState::default(),
|
||||
rate_limit_poller: None,
|
||||
last_plan_refresh_attempt: None,
|
||||
stream_controller: None,
|
||||
running_commands: HashMap::new(),
|
||||
task_complete_pending: false,
|
||||
@@ -1143,7 +1204,11 @@ impl ChatWidget {
|
||||
last_rendered_width: std::cell::Cell::new(None),
|
||||
feedback,
|
||||
current_rollout_path: None,
|
||||
}
|
||||
};
|
||||
|
||||
widget.prefetch_rate_limits();
|
||||
|
||||
widget
|
||||
}
|
||||
|
||||
pub(crate) fn handle_key_event(&mut self, key_event: KeyEvent) {
|
||||
@@ -1737,6 +1802,38 @@ impl ChatWidget {
|
||||
Local::now(),
|
||||
));
|
||||
}
|
||||
fn stop_rate_limit_poller(&mut self) {
|
||||
if let Some(handle) = self.rate_limit_poller.take() {
|
||||
handle.abort();
|
||||
}
|
||||
}
|
||||
|
||||
fn prefetch_rate_limits(&mut self) {
|
||||
self.stop_rate_limit_poller();
|
||||
|
||||
let Some(auth) = self.auth_manager.auth() else {
|
||||
return;
|
||||
};
|
||||
if auth.mode != AuthMode::ChatGPT {
|
||||
return;
|
||||
}
|
||||
|
||||
let base_url = self.config.chatgpt_base_url.clone();
|
||||
let app_event_tx = self.app_event_tx.clone();
|
||||
|
||||
let handle = tokio::spawn(async move {
|
||||
let mut interval = tokio::time::interval(Duration::from_secs(60));
|
||||
|
||||
loop {
|
||||
if let Some(status) = fetch_rate_limits(base_url.clone(), auth.clone()).await {
|
||||
app_event_tx.send(AppEvent::RateLimitSnapshotFetched(status));
|
||||
}
|
||||
interval.tick().await;
|
||||
}
|
||||
});
|
||||
|
||||
self.rate_limit_poller = Some(handle);
|
||||
}
|
||||
|
||||
fn lower_cost_preset(&self) -> Option<ModelPreset> {
|
||||
let auth_mode = self.auth_manager.auth().map(|auth| auth.mode);
|
||||
@@ -2774,6 +2871,12 @@ impl ChatWidget {
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for ChatWidget {
|
||||
fn drop(&mut self) {
|
||||
self.stop_rate_limit_poller();
|
||||
}
|
||||
}
|
||||
|
||||
impl Renderable for ChatWidget {
|
||||
fn render(&self, area: Rect, buf: &mut Buffer) {
|
||||
self.as_renderable().render(area, buf);
|
||||
@@ -2892,6 +2995,22 @@ fn extract_first_bold(s: &str) -> Option<String> {
|
||||
None
|
||||
}
|
||||
|
||||
async fn fetch_rate_limits(base_url: String, auth: CodexAuth) -> Option<RateLimitStatus> {
|
||||
match BackendClient::from_auth(base_url, &auth).await {
|
||||
Ok(client) => match client.get_rate_limits().await {
|
||||
Ok(snapshot) => Some(snapshot),
|
||||
Err(err) => {
|
||||
debug!(error = ?err, "failed to fetch rate limits from /usage");
|
||||
None
|
||||
}
|
||||
},
|
||||
Err(err) => {
|
||||
debug!(error = ?err, "failed to construct backend client for rate limits");
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn show_review_commit_picker_with_entries(
|
||||
chat: &mut ChatWidget,
|
||||
|
||||
@@ -333,6 +333,7 @@ fn make_chatwidget_manual() -> (
|
||||
rate_limit_snapshot: None,
|
||||
rate_limit_warnings: RateLimitWarningState::default(),
|
||||
rate_limit_switch_prompt: RateLimitSwitchPromptState::default(),
|
||||
rate_limit_poller: None,
|
||||
stream_controller: None,
|
||||
running_commands: HashMap::new(),
|
||||
task_complete_pending: false,
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use image::ImageError;
|
||||
use image::ImageFormat;
|
||||
use std::path::PathBuf;
|
||||
use thiserror::Error;
|
||||
@@ -23,3 +24,15 @@ pub enum ImageProcessingError {
|
||||
source: image::ImageError,
|
||||
},
|
||||
}
|
||||
|
||||
impl ImageProcessingError {
|
||||
pub fn is_invalid_image(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
ImageProcessingError::Decode {
|
||||
source: ImageError::Decoding(_),
|
||||
..
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -42,16 +42,55 @@ Custom prompts turn your repeatable instructions into reusable slash commands, s
|
||||
|
||||
### Examples
|
||||
|
||||
**Draft PR helper**
|
||||
### Example 1: Basic named arguments
|
||||
|
||||
`~/.codex/prompts/draftpr.md`
|
||||
**File**: `~/.codex/prompts/ticket.md`
|
||||
|
||||
```markdown
|
||||
---
|
||||
description: Create feature branch, commit and open draft PR.
|
||||
description: Generate a commit message for a ticket
|
||||
argument-hint: TICKET_ID=<id> TICKET_TITLE=<title>
|
||||
---
|
||||
|
||||
Create a branch named `tibo/<feature_name>`, commit the changes, and open a draft PR.
|
||||
Please write a concise commit message for ticket $TICKET_ID: $TICKET_TITLE
|
||||
```
|
||||
|
||||
Usage: type `/prompts:draftpr` to have codex perform the work.
|
||||
**Usage**:
|
||||
|
||||
```
|
||||
/prompts:ticket TICKET_ID=JIRA-1234 TICKET_TITLE="Fix login bug"
|
||||
```
|
||||
|
||||
**Expanded prompt sent to Codex**:
|
||||
|
||||
```
|
||||
Please write a concise commit message for ticket JIRA-1234: Fix login bug
|
||||
```
|
||||
|
||||
**Note**: Both `TICKET_ID` and `TICKET_TITLE` are required. If either is missing, Codex will show a validation error. Values with spaces must be double-quoted.
|
||||
|
||||
### Example 2: Mixed positional and named arguments
|
||||
|
||||
**File**: `~/.codex/prompts/review.md`
|
||||
|
||||
```markdown
|
||||
---
|
||||
description: Review code in a specific file with focus area
|
||||
argument-hint: FILE=<path> [FOCUS=<section>]
|
||||
---
|
||||
|
||||
Review the code in $FILE. Pay special attention to $FOCUS.
|
||||
```
|
||||
|
||||
**Usage**:
|
||||
|
||||
```
|
||||
/prompts:review FILE=src/auth.js FOCUS="error handling"
|
||||
```
|
||||
|
||||
**Expanded prompt**:
|
||||
|
||||
```
|
||||
Review the code in src/auth.js. Pay special attention to error handling.
|
||||
|
||||
```
|
||||
|
||||
Reference in New Issue
Block a user