mirror of
https://github.com/openai/codex.git
synced 2026-02-02 23:13:37 +00:00
Compare commits
9 Commits
input-vali
...
pr5657
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4a62376e6b | ||
|
|
f8af4f5c8d | ||
|
|
a4be4d78b9 | ||
|
|
00c1de0c56 | ||
|
|
190e7eb104 | ||
|
|
061862a0e2 | ||
|
|
c72b2ad766 | ||
|
|
80783a7bb9 | ||
|
|
ed77d2d977 |
@@ -33,6 +33,8 @@ Then simply run `codex` to get started:
|
||||
codex
|
||||
```
|
||||
|
||||
If you're running into upgrade issues with Homebrew, see the [FAQ entry on brew upgrade codex](./docs/faq.md#brew-update-codex-isnt-upgrading-me).
|
||||
|
||||
<details>
|
||||
<summary>You can also go to the <a href="https://github.com/openai/codex/releases/latest">latest GitHub Release</a> and download the appropriate binary for your platform.</summary>
|
||||
|
||||
|
||||
1
codex-rs/Cargo.lock
generated
1
codex-rs/Cargo.lock
generated
@@ -1075,6 +1075,7 @@ dependencies = [
|
||||
"escargot",
|
||||
"eventsource-stream",
|
||||
"futures",
|
||||
"http",
|
||||
"indexmap 2.10.0",
|
||||
"landlock",
|
||||
"libc",
|
||||
|
||||
@@ -116,6 +116,7 @@ env_logger = "0.11.5"
|
||||
escargot = "0.5"
|
||||
eventsource-stream = "0.2.3"
|
||||
futures = { version = "0.3", default-features = false }
|
||||
http = "1.3.1"
|
||||
icu_decimal = "2.0.0"
|
||||
icu_locale_core = "2.0.0"
|
||||
ignore = "0.4.23"
|
||||
|
||||
@@ -23,6 +23,7 @@ use std::io::Write;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::process::Command;
|
||||
use ts_rs::ExportError;
|
||||
use ts_rs::TS;
|
||||
|
||||
const HEADER: &str = "// GENERATED CODE! DO NOT MODIFY BY HAND!\n\n";
|
||||
@@ -104,6 +105,19 @@ macro_rules! for_each_schema_type {
|
||||
};
|
||||
}
|
||||
|
||||
fn export_ts_with_context<F>(label: &str, export: F) -> Result<()>
|
||||
where
|
||||
F: FnOnce() -> std::result::Result<(), ExportError>,
|
||||
{
|
||||
match export() {
|
||||
Ok(()) => Ok(()),
|
||||
Err(ExportError::CannotBeExported(ty)) => Err(anyhow!(
|
||||
"failed to export {label}: dependency {ty} cannot be exported"
|
||||
)),
|
||||
Err(err) => Err(err.into()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn generate_types(out_dir: &Path, prettier: Option<&Path>) -> Result<()> {
|
||||
generate_ts(out_dir, prettier)?;
|
||||
generate_json(out_dir)?;
|
||||
@@ -113,13 +127,17 @@ pub fn generate_types(out_dir: &Path, prettier: Option<&Path>) -> Result<()> {
|
||||
pub fn generate_ts(out_dir: &Path, prettier: Option<&Path>) -> Result<()> {
|
||||
ensure_dir(out_dir)?;
|
||||
|
||||
ClientRequest::export_all_to(out_dir)?;
|
||||
export_client_responses(out_dir)?;
|
||||
ClientNotification::export_all_to(out_dir)?;
|
||||
export_ts_with_context("ClientRequest", || ClientRequest::export_all_to(out_dir))?;
|
||||
export_ts_with_context("client responses", || export_client_responses(out_dir))?;
|
||||
export_ts_with_context("ClientNotification", || {
|
||||
ClientNotification::export_all_to(out_dir)
|
||||
})?;
|
||||
|
||||
ServerRequest::export_all_to(out_dir)?;
|
||||
export_server_responses(out_dir)?;
|
||||
ServerNotification::export_all_to(out_dir)?;
|
||||
export_ts_with_context("ServerRequest", || ServerRequest::export_all_to(out_dir))?;
|
||||
export_ts_with_context("server responses", || export_server_responses(out_dir))?;
|
||||
export_ts_with_context("ServerNotification", || {
|
||||
ServerNotification::export_all_to(out_dir)
|
||||
})?;
|
||||
|
||||
generate_index_ts(out_dir)?;
|
||||
|
||||
|
||||
@@ -17,6 +17,7 @@ use codex_protocol::protocol::EventMsg;
|
||||
use codex_protocol::protocol::FileChange;
|
||||
use codex_protocol::protocol::RateLimitSnapshot;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use codex_protocol::protocol::SandboxCommandAssessment;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use codex_protocol::protocol::TurnAbortReason;
|
||||
use paste::paste;
|
||||
@@ -127,7 +128,7 @@ client_request_definitions! {
|
||||
#[ts(rename = "account/read")]
|
||||
GetAccount {
|
||||
params: #[ts(type = "undefined")] #[serde(skip_serializing_if = "Option::is_none")] Option<()>,
|
||||
response: Option<Account>,
|
||||
response: GetAccountResponse,
|
||||
},
|
||||
|
||||
/// DEPRECATED APIs below
|
||||
@@ -534,6 +535,12 @@ pub struct GetAccountRateLimitsResponse {
|
||||
pub rate_limits: RateLimitSnapshot,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(transparent)]
|
||||
#[ts(export)]
|
||||
#[ts(type = "Account | null")]
|
||||
pub struct GetAccountResponse(#[ts(type = "Account | null")] pub Option<Account>);
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct GetAuthStatusResponse {
|
||||
@@ -841,6 +848,8 @@ pub struct ExecCommandApprovalParams {
|
||||
pub cwd: PathBuf,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub reason: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub risk: Option<SandboxCommandAssessment>,
|
||||
pub parsed_cmd: Vec<ParsedCommand>,
|
||||
}
|
||||
|
||||
@@ -1057,6 +1066,7 @@ mod tests {
|
||||
command: vec!["echo".to_string(), "hello".to_string()],
|
||||
cwd: PathBuf::from("/tmp"),
|
||||
reason: Some("because tests".to_string()),
|
||||
risk: None,
|
||||
parsed_cmd: vec![ParsedCommand::Unknown {
|
||||
cmd: "echo hello".to_string(),
|
||||
}],
|
||||
|
||||
@@ -1447,6 +1447,7 @@ async fn apply_bespoke_event_handling(
|
||||
command,
|
||||
cwd,
|
||||
reason,
|
||||
risk,
|
||||
parsed_cmd,
|
||||
}) => {
|
||||
let params = ExecCommandApprovalParams {
|
||||
@@ -1455,6 +1456,7 @@ async fn apply_bespoke_event_handling(
|
||||
command,
|
||||
cwd,
|
||||
reason,
|
||||
risk,
|
||||
parsed_cmd,
|
||||
};
|
||||
let rx = outgoing
|
||||
@@ -1523,6 +1525,7 @@ async fn derive_config_from_params(
|
||||
include_view_image_tool: None,
|
||||
show_raw_agent_reasoning: None,
|
||||
tools_web_search_request: None,
|
||||
experimental_sandbox_command_assessment: None,
|
||||
additional_writable_roots: Vec::new(),
|
||||
};
|
||||
|
||||
|
||||
@@ -311,6 +311,7 @@ async fn test_send_user_turn_changes_approval_policy_behavior() {
|
||||
],
|
||||
cwd: working_directory.clone(),
|
||||
reason: None,
|
||||
risk: None,
|
||||
parsed_cmd: vec![ParsedCommand::Unknown {
|
||||
cmd: "python3 -c 'print(42)'".to_string()
|
||||
}],
|
||||
|
||||
@@ -34,6 +34,7 @@ dunce = { workspace = true }
|
||||
env-flags = { workspace = true }
|
||||
eventsource-stream = { workspace = true }
|
||||
futures = { workspace = true }
|
||||
http = { workspace = true }
|
||||
indexmap = { workspace = true }
|
||||
libc = { workspace = true }
|
||||
mcp-types = { workspace = true }
|
||||
|
||||
@@ -21,6 +21,7 @@ use codex_app_server_protocol::AuthMode;
|
||||
use codex_protocol::config_types::ForcedLoginMethod;
|
||||
|
||||
use crate::config::Config;
|
||||
use crate::default_client::CodexHttpClient;
|
||||
use crate::token_data::PlanType;
|
||||
use crate::token_data::TokenData;
|
||||
use crate::token_data::parse_id_token;
|
||||
@@ -32,7 +33,7 @@ pub struct CodexAuth {
|
||||
pub(crate) api_key: Option<String>,
|
||||
pub(crate) auth_dot_json: Arc<Mutex<Option<AuthDotJson>>>,
|
||||
pub(crate) auth_file: PathBuf,
|
||||
pub(crate) client: reqwest::Client,
|
||||
pub(crate) client: CodexHttpClient,
|
||||
}
|
||||
|
||||
impl PartialEq for CodexAuth {
|
||||
@@ -43,6 +44,8 @@ impl PartialEq for CodexAuth {
|
||||
|
||||
impl CodexAuth {
|
||||
pub async fn refresh_token(&self) -> Result<String, std::io::Error> {
|
||||
tracing::info!("Refreshing token");
|
||||
|
||||
let token_data = self
|
||||
.get_current_token_data()
|
||||
.ok_or(std::io::Error::other("Token data is not available."))?;
|
||||
@@ -180,7 +183,7 @@ impl CodexAuth {
|
||||
}
|
||||
}
|
||||
|
||||
fn from_api_key_with_client(api_key: &str, client: reqwest::Client) -> Self {
|
||||
fn from_api_key_with_client(api_key: &str, client: CodexHttpClient) -> Self {
|
||||
Self {
|
||||
api_key: Some(api_key.to_owned()),
|
||||
mode: AuthMode::ApiKey,
|
||||
@@ -400,7 +403,7 @@ async fn update_tokens(
|
||||
|
||||
async fn try_refresh_token(
|
||||
refresh_token: String,
|
||||
client: &reqwest::Client,
|
||||
client: &CodexHttpClient,
|
||||
) -> std::io::Result<RefreshResponse> {
|
||||
let refresh_request = RefreshRequest {
|
||||
client_id: CLIENT_ID,
|
||||
@@ -916,7 +919,10 @@ impl AuthManager {
|
||||
self.reload();
|
||||
Ok(Some(token))
|
||||
}
|
||||
Err(e) => Err(e),
|
||||
Err(e) => {
|
||||
tracing::error!("Failed to refresh token: {}", e);
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ use crate::ModelProviderInfo;
|
||||
use crate::client_common::Prompt;
|
||||
use crate::client_common::ResponseEvent;
|
||||
use crate::client_common::ResponseStream;
|
||||
use crate::default_client::CodexHttpClient;
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::ConnectionFailedError;
|
||||
use crate::error::ResponseStreamFailed;
|
||||
@@ -36,7 +37,7 @@ use tracing::trace;
|
||||
pub(crate) async fn stream_chat_completions(
|
||||
prompt: &Prompt,
|
||||
model_family: &ModelFamily,
|
||||
client: &reqwest::Client,
|
||||
client: &CodexHttpClient,
|
||||
provider: &ModelProviderInfo,
|
||||
otel_event_manager: &OtelEventManager,
|
||||
) -> Result<ResponseStream> {
|
||||
|
||||
@@ -39,6 +39,7 @@ use crate::client_common::ResponsesApiRequest;
|
||||
use crate::client_common::create_reasoning_param_for_request;
|
||||
use crate::client_common::create_text_param_for_request;
|
||||
use crate::config::Config;
|
||||
use crate::default_client::CodexHttpClient;
|
||||
use crate::default_client::create_client;
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::ConnectionFailedError;
|
||||
@@ -81,7 +82,7 @@ pub struct ModelClient {
|
||||
config: Arc<Config>,
|
||||
auth_manager: Option<Arc<AuthManager>>,
|
||||
otel_event_manager: OtelEventManager,
|
||||
client: reqwest::Client,
|
||||
client: CodexHttpClient,
|
||||
provider: ModelProviderInfo,
|
||||
conversation_id: ConversationId,
|
||||
effort: Option<ReasoningEffortConfig>,
|
||||
@@ -133,6 +134,14 @@ impl ModelClient {
|
||||
self.stream_with_task_kind(prompt, TaskKind::Regular).await
|
||||
}
|
||||
|
||||
pub fn config(&self) -> Arc<Config> {
|
||||
Arc::clone(&self.config)
|
||||
}
|
||||
|
||||
pub fn provider(&self) -> &ModelProviderInfo {
|
||||
&self.provider
|
||||
}
|
||||
|
||||
pub(crate) async fn stream_with_task_kind(
|
||||
&self,
|
||||
prompt: &Prompt,
|
||||
@@ -300,6 +309,7 @@ impl ModelClient {
|
||||
"POST to {}: {:?}",
|
||||
self.provider.get_full_url(&auth),
|
||||
serde_json::to_string(payload_json)
|
||||
.unwrap_or("<unable to serialize payload>".to_string())
|
||||
);
|
||||
|
||||
let mut req_builder = self
|
||||
@@ -335,13 +345,6 @@ impl ModelClient {
|
||||
.headers()
|
||||
.get("cf-ray")
|
||||
.map(|v| v.to_str().unwrap_or_default().to_string());
|
||||
|
||||
debug!(
|
||||
"Response status: {}, cf-ray: {:?}, version: {:?}",
|
||||
resp.status(),
|
||||
request_id,
|
||||
resp.version()
|
||||
);
|
||||
}
|
||||
|
||||
match res {
|
||||
|
||||
@@ -8,6 +8,7 @@ use crate::AuthManager;
|
||||
use crate::client_common::REVIEW_PROMPT;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::mcp::auth::McpAuthStatusEntry;
|
||||
use crate::mcp_connection_manager::DEFAULT_STARTUP_TIMEOUT;
|
||||
use crate::parse_command::parse_command;
|
||||
use crate::parse_turn_item;
|
||||
use crate::response_processing::process_items;
|
||||
@@ -59,7 +60,6 @@ use crate::config::Config;
|
||||
use crate::config_types::McpServerTransportConfig;
|
||||
use crate::config_types::ShellEnvironmentPolicy;
|
||||
use crate::conversation_history::ConversationHistory;
|
||||
use crate::conversation_history::prefetch_tokenizer_in_background;
|
||||
use crate::environment_context::EnvironmentContext;
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::Result as CodexResult;
|
||||
@@ -88,6 +88,7 @@ use crate::protocol::Op;
|
||||
use crate::protocol::RateLimitSnapshot;
|
||||
use crate::protocol::ReviewDecision;
|
||||
use crate::protocol::ReviewOutputEvent;
|
||||
use crate::protocol::SandboxCommandAssessment;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use crate::protocol::SessionConfiguredEvent;
|
||||
use crate::protocol::StreamErrorEvent;
|
||||
@@ -160,8 +161,6 @@ impl Codex {
|
||||
conversation_history: InitialHistory,
|
||||
session_source: SessionSource,
|
||||
) -> CodexResult<CodexSpawnOk> {
|
||||
// Start loading the tokenizer in the background so we don't block later.
|
||||
prefetch_tokenizer_in_background();
|
||||
let (tx_sub, rx_sub) = async_channel::bounded(SUBMISSION_CHANNEL_CAPACITY);
|
||||
let (tx_event, rx_event) = async_channel::unbounded();
|
||||
|
||||
@@ -571,9 +570,7 @@ impl Session {
|
||||
// Dispatch the SessionConfiguredEvent first and then report any errors.
|
||||
// If resuming, include converted initial messages in the payload so UIs can render them immediately.
|
||||
let initial_messages = initial_history.get_event_msgs();
|
||||
sess.record_initial_history(initial_history)
|
||||
.await
|
||||
.map_err(anyhow::Error::new)?;
|
||||
sess.record_initial_history(initial_history).await;
|
||||
|
||||
let events = std::iter::once(Event {
|
||||
id: INITIAL_SUBMIT_ID.to_owned(),
|
||||
@@ -606,16 +603,13 @@ impl Session {
|
||||
format!("auto-compact-{id}")
|
||||
}
|
||||
|
||||
async fn record_initial_history(
|
||||
&self,
|
||||
conversation_history: InitialHistory,
|
||||
) -> CodexResult<()> {
|
||||
async fn record_initial_history(&self, conversation_history: InitialHistory) {
|
||||
let turn_context = self.new_turn(SessionSettingsUpdate::default()).await;
|
||||
match conversation_history {
|
||||
InitialHistory::New => {
|
||||
// Build and record initial items (user instructions + environment context)
|
||||
let items = self.build_initial_context(&turn_context);
|
||||
self.record_conversation_items(&items).await?;
|
||||
self.record_conversation_items(&items).await;
|
||||
}
|
||||
InitialHistory::Resumed(_) | InitialHistory::Forked(_) => {
|
||||
let rollout_items = conversation_history.get_rollout_items();
|
||||
@@ -623,9 +617,9 @@ impl Session {
|
||||
|
||||
// Always add response items to conversation history
|
||||
let reconstructed_history =
|
||||
self.reconstruct_history_from_rollout(&turn_context, &rollout_items)?;
|
||||
self.reconstruct_history_from_rollout(&turn_context, &rollout_items);
|
||||
if !reconstructed_history.is_empty() {
|
||||
self.record_into_history(&reconstructed_history).await?;
|
||||
self.record_into_history(&reconstructed_history).await;
|
||||
}
|
||||
|
||||
// If persisting, persist all rollout items as-is (recorder filters)
|
||||
@@ -634,7 +628,6 @@ impl Session {
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn update_settings(&self, updates: SessionSettingsUpdate) {
|
||||
@@ -763,6 +756,32 @@ impl Session {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn assess_sandbox_command(
|
||||
&self,
|
||||
turn_context: &TurnContext,
|
||||
call_id: &str,
|
||||
command: &[String],
|
||||
failure_message: Option<&str>,
|
||||
) -> Option<SandboxCommandAssessment> {
|
||||
let config = turn_context.client.config();
|
||||
let provider = turn_context.client.provider().clone();
|
||||
let auth_manager = Arc::clone(&self.services.auth_manager);
|
||||
let otel = self.services.otel_event_manager.clone();
|
||||
crate::sandboxing::assessment::assess_command(
|
||||
config,
|
||||
provider,
|
||||
auth_manager,
|
||||
&otel,
|
||||
self.conversation_id,
|
||||
call_id,
|
||||
command,
|
||||
&turn_context.sandbox_policy,
|
||||
&turn_context.cwd,
|
||||
failure_message,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Emit an exec approval request event and await the user's decision.
|
||||
///
|
||||
/// The request is keyed by `sub_id`/`call_id` so matching responses are delivered
|
||||
@@ -775,6 +794,7 @@ impl Session {
|
||||
command: Vec<String>,
|
||||
cwd: PathBuf,
|
||||
reason: Option<String>,
|
||||
risk: Option<SandboxCommandAssessment>,
|
||||
) -> ReviewDecision {
|
||||
let sub_id = turn_context.sub_id.clone();
|
||||
// Add the tx_approve callback to the map before sending the request.
|
||||
@@ -800,6 +820,7 @@ impl Session {
|
||||
command,
|
||||
cwd,
|
||||
reason,
|
||||
risk,
|
||||
parsed_cmd,
|
||||
});
|
||||
self.send_event(turn_context, event).await;
|
||||
@@ -865,25 +886,21 @@ impl Session {
|
||||
|
||||
/// Records input items: always append to conversation history and
|
||||
/// persist these response items to rollout.
|
||||
pub(crate) async fn record_conversation_items(
|
||||
&self,
|
||||
items: &[ResponseItem],
|
||||
) -> CodexResult<()> {
|
||||
self.record_into_history(items).await?;
|
||||
pub(crate) async fn record_conversation_items(&self, items: &[ResponseItem]) {
|
||||
self.record_into_history(items).await;
|
||||
self.persist_rollout_response_items(items).await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn reconstruct_history_from_rollout(
|
||||
&self,
|
||||
turn_context: &TurnContext,
|
||||
rollout_items: &[RolloutItem],
|
||||
) -> CodexResult<Vec<ResponseItem>> {
|
||||
) -> Vec<ResponseItem> {
|
||||
let mut history = ConversationHistory::new();
|
||||
for item in rollout_items {
|
||||
match item {
|
||||
RolloutItem::ResponseItem(response_item) => {
|
||||
history.record_items(std::iter::once(response_item))?;
|
||||
history.record_items(std::iter::once(response_item));
|
||||
}
|
||||
RolloutItem::Compacted(compacted) => {
|
||||
let snapshot = history.get_history();
|
||||
@@ -898,14 +915,13 @@ impl Session {
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
Ok(history.get_history())
|
||||
history.get_history()
|
||||
}
|
||||
|
||||
/// Append ResponseItems to the in-memory conversation history only.
|
||||
async fn record_into_history(&self, items: &[ResponseItem]) -> CodexResult<()> {
|
||||
async fn record_into_history(&self, items: &[ResponseItem]) {
|
||||
let mut state = self.state.lock().await;
|
||||
state.record_items(items.iter())?;
|
||||
Ok(())
|
||||
state.record_items(items.iter());
|
||||
}
|
||||
|
||||
async fn replace_history(&self, items: Vec<ResponseItem>) {
|
||||
@@ -1014,11 +1030,11 @@ impl Session {
|
||||
&self,
|
||||
turn_context: &TurnContext,
|
||||
response_input: &ResponseInputItem,
|
||||
) -> CodexResult<()> {
|
||||
) {
|
||||
let response_item: ResponseItem = response_input.clone().into();
|
||||
// Add to conversation history and persist response item to rollout
|
||||
self.record_conversation_items(std::slice::from_ref(&response_item))
|
||||
.await?;
|
||||
.await;
|
||||
|
||||
// Derive user message events and persist only UserMessage to rollout
|
||||
let turn_item = parse_turn_item(&response_item);
|
||||
@@ -1027,7 +1043,6 @@ impl Session {
|
||||
self.emit_turn_item_started_completed(turn_context, item, false)
|
||||
.await;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Helper that emits a BackgroundEvent with the given message. This keeps
|
||||
@@ -1208,17 +1223,9 @@ async fn submission_loop(sess: Arc<Session>, config: Arc<Config>, rx_sub: Receiv
|
||||
if let Err(items) = sess.inject_input(items).await {
|
||||
if let Some(env_item) = sess
|
||||
.build_environment_update_item(previous_context.as_ref(), ¤t_context)
|
||||
&& let Err(err) = sess
|
||||
.record_conversation_items(std::slice::from_ref(&env_item))
|
||||
.await
|
||||
{
|
||||
sess.send_event(
|
||||
current_context.as_ref(),
|
||||
EventMsg::Error(ErrorEvent {
|
||||
message: err.to_string(),
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
sess.record_conversation_items(std::slice::from_ref(&env_item))
|
||||
.await;
|
||||
}
|
||||
|
||||
sess.spawn_task(Arc::clone(¤t_context), items, RegularTask)
|
||||
@@ -1531,9 +1538,9 @@ pub(crate) async fn run_task(
|
||||
input: Vec<UserInput>,
|
||||
task_kind: TaskKind,
|
||||
cancellation_token: CancellationToken,
|
||||
) -> CodexResult<Option<String>> {
|
||||
) -> Option<String> {
|
||||
if input.is_empty() {
|
||||
return Ok(None);
|
||||
return None;
|
||||
}
|
||||
let event = EventMsg::TaskStarted(TaskStartedEvent {
|
||||
model_context_window: turn_context.client.get_model_context_window(),
|
||||
@@ -1550,11 +1557,11 @@ pub(crate) async fn run_task(
|
||||
if is_review_mode {
|
||||
// Seed review threads with environment context so the model knows the working directory.
|
||||
review_thread_history
|
||||
.record_items(sess.build_initial_context(turn_context.as_ref()).iter())?;
|
||||
review_thread_history.record_items(std::iter::once(&initial_input_for_turn.into()))?;
|
||||
.record_items(sess.build_initial_context(turn_context.as_ref()).iter());
|
||||
review_thread_history.record_items(std::iter::once(&initial_input_for_turn.into()));
|
||||
} else {
|
||||
sess.record_input_and_rollout_usermsg(turn_context.as_ref(), &initial_input_for_turn)
|
||||
.await?;
|
||||
.await;
|
||||
}
|
||||
|
||||
let mut last_agent_message: Option<String> = None;
|
||||
@@ -1586,11 +1593,11 @@ pub(crate) async fn run_task(
|
||||
// represents an append-only log without duplicates.
|
||||
let turn_input: Vec<ResponseItem> = if is_review_mode {
|
||||
if !pending_input.is_empty() {
|
||||
review_thread_history.record_items(&pending_input)?;
|
||||
review_thread_history.record_items(&pending_input);
|
||||
}
|
||||
review_thread_history.get_history()
|
||||
} else {
|
||||
sess.record_conversation_items(&pending_input).await?;
|
||||
sess.record_conversation_items(&pending_input).await;
|
||||
sess.history_snapshot().await
|
||||
};
|
||||
|
||||
@@ -1638,7 +1645,7 @@ pub(crate) async fn run_task(
|
||||
&mut review_thread_history,
|
||||
&sess,
|
||||
)
|
||||
.await?;
|
||||
.await;
|
||||
|
||||
if token_limit_reached {
|
||||
if auto_compact_recently_attempted {
|
||||
@@ -1655,8 +1662,7 @@ pub(crate) async fn run_task(
|
||||
break;
|
||||
}
|
||||
auto_compact_recently_attempted = true;
|
||||
compact::run_inline_auto_compact_task(sess.clone(), turn_context.clone())
|
||||
.await?;
|
||||
compact::run_inline_auto_compact_task(sess.clone(), turn_context.clone()).await;
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -1681,13 +1687,13 @@ pub(crate) async fn run_task(
|
||||
Err(CodexErr::TurnAborted {
|
||||
dangling_artifacts: processed_items,
|
||||
}) => {
|
||||
process_items(
|
||||
let _ = process_items(
|
||||
processed_items,
|
||||
is_review_mode,
|
||||
&mut review_thread_history,
|
||||
&sess,
|
||||
)
|
||||
.await?;
|
||||
.await;
|
||||
// Aborted turn is reported via a different event.
|
||||
break;
|
||||
}
|
||||
@@ -1716,10 +1722,10 @@ pub(crate) async fn run_task(
|
||||
Arc::clone(&turn_context),
|
||||
last_agent_message.as_deref().map(parse_review_output_event),
|
||||
)
|
||||
.await?;
|
||||
.await;
|
||||
}
|
||||
|
||||
Ok(last_agent_message)
|
||||
last_agent_message
|
||||
}
|
||||
|
||||
/// Parse the review output; when not valid JSON, build a structured
|
||||
@@ -2158,7 +2164,7 @@ pub(crate) async fn exit_review_mode(
|
||||
session: Arc<Session>,
|
||||
turn_context: Arc<TurnContext>,
|
||||
review_output: Option<ReviewOutputEvent>,
|
||||
) -> CodexResult<()> {
|
||||
) {
|
||||
let event = EventMsg::ExitedReviewMode(ExitedReviewModeEvent {
|
||||
review_output: review_output.clone(),
|
||||
});
|
||||
@@ -2201,8 +2207,7 @@ pub(crate) async fn exit_review_mode(
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputText { text: user_message }],
|
||||
}])
|
||||
.await?;
|
||||
Ok(())
|
||||
.await;
|
||||
}
|
||||
|
||||
fn mcp_init_error_display(
|
||||
@@ -2224,12 +2229,24 @@ fn mcp_init_error_display(
|
||||
// That means that the user has to specify a personal access token either via bearer_token_env_var or http_headers.
|
||||
// https://github.com/github/github-mcp-server/issues/921#issuecomment-3221026448
|
||||
format!(
|
||||
"GitHub MCP does not support OAuth. Log in by adding `bearer_token_env_var = CODEX_GITHUB_PAT` in the `mcp_servers.{server_name}` section of your config.toml"
|
||||
"GitHub MCP does not support OAuth. Log in by adding a personal access token (https://github.com/settings/personal-access-tokens) to your environment and config.toml:\n[mcp_servers.{server_name}]\nbearer_token_env_var = CODEX_GITHUB_PERSONAL_ACCESS_TOKEN"
|
||||
)
|
||||
} else if is_mcp_client_auth_required_error(err) {
|
||||
format!(
|
||||
"The {server_name} MCP server is not logged in. Run `codex mcp login {server_name}`."
|
||||
)
|
||||
} else if is_mcp_client_startup_timeout_error(err) {
|
||||
let startup_timeout_secs = match entry {
|
||||
Some(entry) => match entry.config.startup_timeout_sec {
|
||||
Some(timeout) => timeout,
|
||||
None => DEFAULT_STARTUP_TIMEOUT,
|
||||
},
|
||||
None => DEFAULT_STARTUP_TIMEOUT,
|
||||
}
|
||||
.as_secs();
|
||||
format!(
|
||||
"MCP client for `{server_name}` timed out after {startup_timeout_secs} seconds. Add or adjust `startup_timeout_sec` in your config.toml:\n[mcp_servers.{server_name}]\nstartup_timeout_sec = XX"
|
||||
)
|
||||
} else {
|
||||
format!("MCP client for `{server_name}` failed to start: {err:#}")
|
||||
}
|
||||
@@ -2240,6 +2257,12 @@ fn is_mcp_client_auth_required_error(error: &anyhow::Error) -> bool {
|
||||
error.to_string().contains("Auth required")
|
||||
}
|
||||
|
||||
fn is_mcp_client_startup_timeout_error(error: &anyhow::Error) -> bool {
|
||||
let error_message = error.to_string();
|
||||
error_message.contains("request timed out")
|
||||
|| error_message.contains("timed out handshaking with MCP server")
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) use tests::make_session_and_context;
|
||||
|
||||
@@ -2250,7 +2273,6 @@ mod tests {
|
||||
use crate::config::ConfigToml;
|
||||
use crate::config_types::McpServerConfig;
|
||||
use crate::config_types::McpServerTransportConfig;
|
||||
use crate::error::Result as CodexResult;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::mcp::auth::McpAuthStatusEntry;
|
||||
use crate::tools::format_exec_output_str;
|
||||
@@ -2291,12 +2313,9 @@ mod tests {
|
||||
#[test]
|
||||
fn reconstruct_history_matches_live_compactions() {
|
||||
let (session, turn_context) = make_session_and_context();
|
||||
let (rollout_items, expected) =
|
||||
sample_rollout(&session, &turn_context).expect("sample rollout");
|
||||
let (rollout_items, expected) = sample_rollout(&session, &turn_context);
|
||||
|
||||
let reconstructed = session
|
||||
.reconstruct_history_from_rollout(&turn_context, &rollout_items)
|
||||
.expect("reconstruct history");
|
||||
let reconstructed = session.reconstruct_history_from_rollout(&turn_context, &rollout_items);
|
||||
|
||||
assert_eq!(expected, reconstructed);
|
||||
}
|
||||
@@ -2304,19 +2323,15 @@ mod tests {
|
||||
#[test]
|
||||
fn record_initial_history_reconstructs_resumed_transcript() {
|
||||
let (session, turn_context) = make_session_and_context();
|
||||
let (rollout_items, expected) =
|
||||
sample_rollout(&session, &turn_context).expect("sample rollout");
|
||||
let (rollout_items, expected) = sample_rollout(&session, &turn_context);
|
||||
|
||||
tokio_test::block_on(async {
|
||||
session
|
||||
.record_initial_history(InitialHistory::Resumed(ResumedHistory {
|
||||
conversation_id: ConversationId::default(),
|
||||
history: rollout_items,
|
||||
rollout_path: PathBuf::from("/tmp/resume.jsonl"),
|
||||
}))
|
||||
.await
|
||||
.expect("record resumed history");
|
||||
});
|
||||
tokio_test::block_on(session.record_initial_history(InitialHistory::Resumed(
|
||||
ResumedHistory {
|
||||
conversation_id: ConversationId::default(),
|
||||
history: rollout_items,
|
||||
rollout_path: PathBuf::from("/tmp/resume.jsonl"),
|
||||
},
|
||||
)));
|
||||
|
||||
let actual = tokio_test::block_on(async { session.state.lock().await.history_snapshot() });
|
||||
assert_eq!(expected, actual);
|
||||
@@ -2325,15 +2340,9 @@ mod tests {
|
||||
#[test]
|
||||
fn record_initial_history_reconstructs_forked_transcript() {
|
||||
let (session, turn_context) = make_session_and_context();
|
||||
let (rollout_items, expected) =
|
||||
sample_rollout(&session, &turn_context).expect("sample rollout");
|
||||
let (rollout_items, expected) = sample_rollout(&session, &turn_context);
|
||||
|
||||
tokio_test::block_on(async {
|
||||
session
|
||||
.record_initial_history(InitialHistory::Forked(rollout_items))
|
||||
.await
|
||||
.expect("record forked history");
|
||||
});
|
||||
tokio_test::block_on(session.record_initial_history(InitialHistory::Forked(rollout_items)));
|
||||
|
||||
let actual = tokio_test::block_on(async { session.state.lock().await.history_snapshot() });
|
||||
assert_eq!(expected, actual);
|
||||
@@ -2693,10 +2702,10 @@ mod tests {
|
||||
_ctx: Arc<TurnContext>,
|
||||
_input: Vec<UserInput>,
|
||||
cancellation_token: CancellationToken,
|
||||
) -> CodexResult<Option<String>> {
|
||||
) -> Option<String> {
|
||||
if self.listen_to_cancellation_token {
|
||||
cancellation_token.cancelled().await;
|
||||
return Ok(None);
|
||||
return None;
|
||||
}
|
||||
loop {
|
||||
sleep(Duration::from_secs(60)).await;
|
||||
@@ -2705,7 +2714,7 @@ mod tests {
|
||||
|
||||
async fn abort(&self, session: Arc<SessionTaskContext>, ctx: Arc<TurnContext>) {
|
||||
if let TaskKind::Review = self.kind {
|
||||
let _ = exit_review_mode(session.clone_session(), ctx, None).await;
|
||||
exit_review_mode(session.clone_session(), ctx, None).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2861,7 +2870,7 @@ mod tests {
|
||||
fn sample_rollout(
|
||||
session: &Session,
|
||||
turn_context: &TurnContext,
|
||||
) -> CodexResult<(Vec<RolloutItem>, Vec<ResponseItem>)> {
|
||||
) -> (Vec<RolloutItem>, Vec<ResponseItem>) {
|
||||
let mut rollout_items = Vec::new();
|
||||
let mut live_history = ConversationHistory::new();
|
||||
|
||||
@@ -2869,7 +2878,7 @@ mod tests {
|
||||
for item in &initial_context {
|
||||
rollout_items.push(RolloutItem::ResponseItem(item.clone()));
|
||||
}
|
||||
live_history.record_items(initial_context.iter())?;
|
||||
live_history.record_items(initial_context.iter());
|
||||
|
||||
let user1 = ResponseItem::Message {
|
||||
id: None,
|
||||
@@ -2878,7 +2887,7 @@ mod tests {
|
||||
text: "first user".to_string(),
|
||||
}],
|
||||
};
|
||||
live_history.record_items(std::iter::once(&user1))?;
|
||||
live_history.record_items(std::iter::once(&user1));
|
||||
rollout_items.push(RolloutItem::ResponseItem(user1.clone()));
|
||||
|
||||
let assistant1 = ResponseItem::Message {
|
||||
@@ -2888,7 +2897,7 @@ mod tests {
|
||||
text: "assistant reply one".to_string(),
|
||||
}],
|
||||
};
|
||||
live_history.record_items(std::iter::once(&assistant1))?;
|
||||
live_history.record_items(std::iter::once(&assistant1));
|
||||
rollout_items.push(RolloutItem::ResponseItem(assistant1.clone()));
|
||||
|
||||
let summary1 = "summary one";
|
||||
@@ -2911,7 +2920,7 @@ mod tests {
|
||||
text: "second user".to_string(),
|
||||
}],
|
||||
};
|
||||
live_history.record_items(std::iter::once(&user2))?;
|
||||
live_history.record_items(std::iter::once(&user2));
|
||||
rollout_items.push(RolloutItem::ResponseItem(user2.clone()));
|
||||
|
||||
let assistant2 = ResponseItem::Message {
|
||||
@@ -2921,7 +2930,7 @@ mod tests {
|
||||
text: "assistant reply two".to_string(),
|
||||
}],
|
||||
};
|
||||
live_history.record_items(std::iter::once(&assistant2))?;
|
||||
live_history.record_items(std::iter::once(&assistant2));
|
||||
rollout_items.push(RolloutItem::ResponseItem(assistant2.clone()));
|
||||
|
||||
let summary2 = "summary two";
|
||||
@@ -2944,7 +2953,7 @@ mod tests {
|
||||
text: "third user".to_string(),
|
||||
}],
|
||||
};
|
||||
live_history.record_items(std::iter::once(&user3))?;
|
||||
live_history.record_items(std::iter::once(&user3));
|
||||
rollout_items.push(RolloutItem::ResponseItem(user3.clone()));
|
||||
|
||||
let assistant3 = ResponseItem::Message {
|
||||
@@ -2954,10 +2963,10 @@ mod tests {
|
||||
text: "assistant reply three".to_string(),
|
||||
}],
|
||||
};
|
||||
live_history.record_items(std::iter::once(&assistant3))?;
|
||||
live_history.record_items(std::iter::once(&assistant3));
|
||||
rollout_items.push(RolloutItem::ResponseItem(assistant3.clone()));
|
||||
|
||||
Ok((rollout_items, live_history.get_history()))
|
||||
(rollout_items, live_history.get_history())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -3111,7 +3120,7 @@ mod tests {
|
||||
let display = mcp_init_error_display(server_name, Some(&entry), &err);
|
||||
|
||||
let expected = format!(
|
||||
"GitHub MCP does not support OAuth. Log in by adding `bearer_token_env_var = CODEX_GITHUB_PAT` in the `mcp_servers.{server_name}` section of your config.toml"
|
||||
"GitHub MCP does not support OAuth. Log in by adding a personal access token (https://github.com/settings/personal-access-tokens) to your environment and config.toml:\n[mcp_servers.{server_name}]\nbearer_token_env_var = CODEX_GITHUB_PERSONAL_ACCESS_TOKEN"
|
||||
);
|
||||
|
||||
assert_eq!(expected, display);
|
||||
@@ -3158,4 +3167,17 @@ mod tests {
|
||||
|
||||
assert_eq!(expected, display);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mcp_init_error_display_includes_startup_timeout_hint() {
|
||||
let server_name = "slow";
|
||||
let err = anyhow::anyhow!("request timed out");
|
||||
|
||||
let display = mcp_init_error_display(server_name, None, &err);
|
||||
|
||||
assert_eq!(
|
||||
"MCP client for `slow` timed out after 10 seconds. Add or adjust `startup_timeout_sec` in your config.toml:\n[mcp_servers.slow]\nstartup_timeout_sec = XX",
|
||||
display
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -39,35 +39,35 @@ struct HistoryBridgeTemplate<'a> {
|
||||
pub(crate) async fn run_inline_auto_compact_task(
|
||||
sess: Arc<Session>,
|
||||
turn_context: Arc<TurnContext>,
|
||||
) -> CodexResult<()> {
|
||||
) {
|
||||
let input = vec![UserInput::Text {
|
||||
text: SUMMARIZATION_PROMPT.to_string(),
|
||||
}];
|
||||
run_compact_task_inner(sess, turn_context, input).await
|
||||
run_compact_task_inner(sess, turn_context, input).await;
|
||||
}
|
||||
|
||||
pub(crate) async fn run_compact_task(
|
||||
sess: Arc<Session>,
|
||||
turn_context: Arc<TurnContext>,
|
||||
input: Vec<UserInput>,
|
||||
) -> CodexResult<Option<String>> {
|
||||
) -> Option<String> {
|
||||
let start_event = EventMsg::TaskStarted(TaskStartedEvent {
|
||||
model_context_window: turn_context.client.get_model_context_window(),
|
||||
});
|
||||
sess.send_event(&turn_context, start_event).await;
|
||||
run_compact_task_inner(sess.clone(), turn_context, input).await?;
|
||||
Ok(None)
|
||||
run_compact_task_inner(sess.clone(), turn_context, input).await;
|
||||
None
|
||||
}
|
||||
|
||||
async fn run_compact_task_inner(
|
||||
sess: Arc<Session>,
|
||||
turn_context: Arc<TurnContext>,
|
||||
input: Vec<UserInput>,
|
||||
) -> CodexResult<()> {
|
||||
) {
|
||||
let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input);
|
||||
|
||||
let mut history = sess.clone_history().await;
|
||||
history.record_items(&[initial_input_for_turn.into()])?;
|
||||
history.record_items(&[initial_input_for_turn.into()]);
|
||||
|
||||
let mut truncated_count = 0usize;
|
||||
|
||||
@@ -106,7 +106,7 @@ async fn run_compact_task_inner(
|
||||
break;
|
||||
}
|
||||
Err(CodexErr::Interrupted) => {
|
||||
return Ok(());
|
||||
return;
|
||||
}
|
||||
Err(e @ CodexErr::ContextWindowExceeded) => {
|
||||
if turn_input.len() > 1 {
|
||||
@@ -124,7 +124,7 @@ async fn run_compact_task_inner(
|
||||
message: e.to_string(),
|
||||
});
|
||||
sess.send_event(&turn_context, event).await;
|
||||
return Ok(());
|
||||
return;
|
||||
}
|
||||
Err(e) => {
|
||||
if retries < max_retries {
|
||||
@@ -142,7 +142,7 @@ async fn run_compact_task_inner(
|
||||
message: e.to_string(),
|
||||
});
|
||||
sess.send_event(&turn_context, event).await;
|
||||
return Ok(());
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -164,7 +164,6 @@ async fn run_compact_task_inner(
|
||||
message: "Compact task completed".to_string(),
|
||||
});
|
||||
sess.send_event(&turn_context, event).await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn content_items_to_text(content: &[ContentItem]) -> Option<String> {
|
||||
@@ -253,8 +252,7 @@ async fn drain_to_completed(
|
||||
};
|
||||
match event {
|
||||
Ok(ResponseEvent::OutputItemDone(item)) => {
|
||||
sess.record_into_history(std::slice::from_ref(&item))
|
||||
.await?;
|
||||
sess.record_into_history(std::slice::from_ref(&item)).await;
|
||||
}
|
||||
Ok(ResponseEvent::RateLimits(snapshot)) => {
|
||||
sess.update_rate_limits(turn_context, snapshot).await;
|
||||
|
||||
@@ -223,6 +223,9 @@ pub struct Config {
|
||||
|
||||
pub tools_web_search_request: bool,
|
||||
|
||||
/// When `true`, run a model-based assessment for commands denied by the sandbox.
|
||||
pub experimental_sandbox_command_assessment: bool,
|
||||
|
||||
pub use_experimental_streamable_shell_tool: bool,
|
||||
|
||||
/// If set to `true`, used only the experimental unified exec tool.
|
||||
@@ -958,6 +961,7 @@ pub struct ConfigToml {
|
||||
pub experimental_use_unified_exec_tool: Option<bool>,
|
||||
pub experimental_use_rmcp_client: Option<bool>,
|
||||
pub experimental_use_freeform_apply_patch: Option<bool>,
|
||||
pub experimental_sandbox_command_assessment: Option<bool>,
|
||||
}
|
||||
|
||||
impl From<ConfigToml> for UserSavedConfig {
|
||||
@@ -1118,6 +1122,7 @@ pub struct ConfigOverrides {
|
||||
pub include_view_image_tool: Option<bool>,
|
||||
pub show_raw_agent_reasoning: Option<bool>,
|
||||
pub tools_web_search_request: Option<bool>,
|
||||
pub experimental_sandbox_command_assessment: Option<bool>,
|
||||
/// Additional directories that should be treated as writable roots for this session.
|
||||
pub additional_writable_roots: Vec<PathBuf>,
|
||||
}
|
||||
@@ -1147,6 +1152,7 @@ impl Config {
|
||||
include_view_image_tool: include_view_image_tool_override,
|
||||
show_raw_agent_reasoning,
|
||||
tools_web_search_request: override_tools_web_search_request,
|
||||
experimental_sandbox_command_assessment: sandbox_command_assessment_override,
|
||||
additional_writable_roots,
|
||||
} = overrides;
|
||||
|
||||
@@ -1172,6 +1178,7 @@ impl Config {
|
||||
include_apply_patch_tool: include_apply_patch_tool_override,
|
||||
include_view_image_tool: include_view_image_tool_override,
|
||||
web_search_request: override_tools_web_search_request,
|
||||
experimental_sandbox_command_assessment: sandbox_command_assessment_override,
|
||||
};
|
||||
|
||||
let features = Features::from_config(&cfg, &config_profile, feature_overrides);
|
||||
@@ -1269,6 +1276,8 @@ impl Config {
|
||||
let use_experimental_streamable_shell_tool = features.enabled(Feature::StreamableShell);
|
||||
let use_experimental_unified_exec_tool = features.enabled(Feature::UnifiedExec);
|
||||
let use_experimental_use_rmcp_client = features.enabled(Feature::RmcpClient);
|
||||
let experimental_sandbox_command_assessment =
|
||||
features.enabled(Feature::SandboxCommandAssessment);
|
||||
|
||||
let forced_chatgpt_workspace_id =
|
||||
cfg.forced_chatgpt_workspace_id.as_ref().and_then(|value| {
|
||||
@@ -1390,6 +1399,7 @@ impl Config {
|
||||
forced_login_method,
|
||||
include_apply_patch_tool: include_apply_patch_tool_flag,
|
||||
tools_web_search_request,
|
||||
experimental_sandbox_command_assessment,
|
||||
use_experimental_streamable_shell_tool,
|
||||
use_experimental_unified_exec_tool,
|
||||
use_experimental_use_rmcp_client,
|
||||
@@ -2873,6 +2883,7 @@ model_verbosity = "high"
|
||||
forced_login_method: None,
|
||||
include_apply_patch_tool: false,
|
||||
tools_web_search_request: false,
|
||||
experimental_sandbox_command_assessment: false,
|
||||
use_experimental_streamable_shell_tool: false,
|
||||
use_experimental_unified_exec_tool: false,
|
||||
use_experimental_use_rmcp_client: false,
|
||||
@@ -2941,6 +2952,7 @@ model_verbosity = "high"
|
||||
forced_login_method: None,
|
||||
include_apply_patch_tool: false,
|
||||
tools_web_search_request: false,
|
||||
experimental_sandbox_command_assessment: false,
|
||||
use_experimental_streamable_shell_tool: false,
|
||||
use_experimental_unified_exec_tool: false,
|
||||
use_experimental_use_rmcp_client: false,
|
||||
@@ -3024,6 +3036,7 @@ model_verbosity = "high"
|
||||
forced_login_method: None,
|
||||
include_apply_patch_tool: false,
|
||||
tools_web_search_request: false,
|
||||
experimental_sandbox_command_assessment: false,
|
||||
use_experimental_streamable_shell_tool: false,
|
||||
use_experimental_unified_exec_tool: false,
|
||||
use_experimental_use_rmcp_client: false,
|
||||
@@ -3093,6 +3106,7 @@ model_verbosity = "high"
|
||||
forced_login_method: None,
|
||||
include_apply_patch_tool: false,
|
||||
tools_web_search_request: false,
|
||||
experimental_sandbox_command_assessment: false,
|
||||
use_experimental_streamable_shell_tool: false,
|
||||
use_experimental_unified_exec_tool: false,
|
||||
use_experimental_use_rmcp_client: false,
|
||||
|
||||
@@ -26,6 +26,7 @@ pub struct ConfigProfile {
|
||||
pub experimental_use_exec_command_tool: Option<bool>,
|
||||
pub experimental_use_rmcp_client: Option<bool>,
|
||||
pub experimental_use_freeform_apply_patch: Option<bool>,
|
||||
pub experimental_sandbox_command_assessment: Option<bool>,
|
||||
pub tools_web_search: Option<bool>,
|
||||
pub tools_view_image: Option<bool>,
|
||||
/// Optional feature toggles scoped to this profile.
|
||||
|
||||
@@ -1,20 +1,11 @@
|
||||
use std::sync::Arc;
|
||||
use std::sync::OnceLock;
|
||||
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::TokenUsage;
|
||||
use codex_protocol::protocol::TokenUsageInfo;
|
||||
use codex_utils_tokenizer::Tokenizer;
|
||||
use tokio::task;
|
||||
use tracing::error;
|
||||
|
||||
static TOKENIZER: OnceLock<Option<Arc<Tokenizer>>> = OnceLock::new();
|
||||
|
||||
use crate::error::CodexErr;
|
||||
|
||||
/// Transcript of conversation history
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub(crate) struct ConversationHistory {
|
||||
/// The oldest items are at the beginning of the vector.
|
||||
items: Vec<ResponseItem>,
|
||||
@@ -25,7 +16,7 @@ impl ConversationHistory {
|
||||
pub(crate) fn new() -> Self {
|
||||
Self {
|
||||
items: Vec::new(),
|
||||
token_info: None,
|
||||
token_info: TokenUsageInfo::new_or_append(&None, &None, None),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -43,7 +34,7 @@ impl ConversationHistory {
|
||||
}
|
||||
|
||||
/// `items` is ordered from oldest to newest.
|
||||
pub(crate) fn record_items<I>(&mut self, items: I) -> Result<(), CodexErr>
|
||||
pub(crate) fn record_items<I>(&mut self, items: I)
|
||||
where
|
||||
I: IntoIterator,
|
||||
I::Item: std::ops::Deref<Target = ResponseItem>,
|
||||
@@ -52,10 +43,9 @@ impl ConversationHistory {
|
||||
if !is_api_message(&item) {
|
||||
continue;
|
||||
}
|
||||
self.validate_input(&item)?;
|
||||
|
||||
self.items.push(item.clone());
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn get_history(&mut self) -> Vec<ResponseItem> {
|
||||
@@ -91,65 +81,6 @@ impl ConversationHistory {
|
||||
self.items.clone()
|
||||
}
|
||||
|
||||
fn validate_input(&self, item: &ResponseItem) -> Result<(), CodexErr> {
|
||||
match item {
|
||||
ResponseItem::Message { content, .. } => {
|
||||
self.validate_input_content_item(content)?;
|
||||
Ok(())
|
||||
}
|
||||
ResponseItem::FunctionCall { .. }
|
||||
| ResponseItem::FunctionCallOutput { .. }
|
||||
| ResponseItem::CustomToolCall { .. }
|
||||
| ResponseItem::CustomToolCallOutput { .. }
|
||||
| ResponseItem::LocalShellCall { .. }
|
||||
| ResponseItem::Reasoning { .. }
|
||||
| ResponseItem::WebSearchCall { .. } => Ok(()),
|
||||
ResponseItem::Other => Err(CodexErr::InvalidInput(format!("invalid input: {item:?}"))),
|
||||
}
|
||||
}
|
||||
|
||||
fn validate_input_content_item(
|
||||
&self,
|
||||
content: &[codex_protocol::models::ContentItem],
|
||||
) -> Result<(), CodexErr> {
|
||||
let Some(info) = &self.token_info else {
|
||||
return Ok(());
|
||||
};
|
||||
// this will intentionally not check the context for the first turn before getting this information.
|
||||
// it's acceptable tradeoff.
|
||||
let Some(context_window) = info.model_context_window else {
|
||||
return Ok(());
|
||||
};
|
||||
let tokenizer = match shared_tokenizer() {
|
||||
Some(t) => t,
|
||||
None => return Ok(()),
|
||||
};
|
||||
|
||||
let mut input_tokens: i64 = 0;
|
||||
for item in content {
|
||||
match item {
|
||||
codex_protocol::models::ContentItem::InputText { text } => {
|
||||
input_tokens += tokenizer.count(text);
|
||||
}
|
||||
codex_protocol::models::ContentItem::InputImage { .. } => {
|
||||
// no validation currently
|
||||
}
|
||||
codex_protocol::models::ContentItem::OutputText { .. } => {
|
||||
// no validation currently
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let prior_total = info.last_token_usage.total_tokens;
|
||||
let combined_tokens = prior_total.saturating_add(input_tokens);
|
||||
let threshold = context_window * 95 / 100;
|
||||
if combined_tokens > threshold {
|
||||
return Err(CodexErr::InvalidInput("input too large".to_string()));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn ensure_call_outputs_present(&mut self) {
|
||||
// Collect synthetic outputs to insert immediately after their calls.
|
||||
// Store the insertion position (index of call) alongside the item so
|
||||
@@ -412,36 +343,6 @@ fn error_or_panic(message: String) {
|
||||
}
|
||||
}
|
||||
|
||||
fn shared_tokenizer() -> Option<Arc<Tokenizer>> {
|
||||
TOKENIZER.get().and_then(|opt| opt.as_ref().map(Arc::clone))
|
||||
}
|
||||
|
||||
/// Kick off background initialization of the shared tokenizer without blocking the caller.
|
||||
pub(crate) fn prefetch_tokenizer_in_background() {
|
||||
if TOKENIZER.get().is_some() {
|
||||
return;
|
||||
}
|
||||
|
||||
// Spawn a background task to initialize the tokenizer. Use spawn_blocking in case
|
||||
// initialization performs CPU-heavy work or file I/O.
|
||||
tokio::spawn(async {
|
||||
let result = task::spawn_blocking(Tokenizer::try_default).await;
|
||||
match result {
|
||||
Ok(Ok(tokenizer)) => {
|
||||
let _ = TOKENIZER.set(Some(Arc::new(tokenizer)));
|
||||
}
|
||||
Ok(Err(error)) => {
|
||||
error!("failed to create tokenizer: {error}");
|
||||
let _ = TOKENIZER.set(None);
|
||||
}
|
||||
Err(join_error) => {
|
||||
error!("failed to join tokenizer init task: {join_error}");
|
||||
let _ = TOKENIZER.set(None);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/// Anything that is not a system message or "reasoning" message is considered
|
||||
/// an API message.
|
||||
fn is_api_message(message: &ResponseItem) -> bool {
|
||||
@@ -480,7 +381,7 @@ mod tests {
|
||||
|
||||
fn create_history_with_items(items: Vec<ResponseItem>) -> ConversationHistory {
|
||||
let mut h = ConversationHistory::new();
|
||||
h.record_items(items.iter()).unwrap();
|
||||
h.record_items(items.iter());
|
||||
h
|
||||
}
|
||||
|
||||
@@ -496,7 +397,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn filters_non_api_messages() {
|
||||
let mut h = ConversationHistory::new();
|
||||
let mut h = ConversationHistory::default();
|
||||
// System message is not an API message; Other is ignored.
|
||||
let system = ResponseItem::Message {
|
||||
id: None,
|
||||
@@ -505,12 +406,12 @@ mod tests {
|
||||
text: "ignored".to_string(),
|
||||
}],
|
||||
};
|
||||
h.record_items([&system, &ResponseItem::Other]).unwrap();
|
||||
h.record_items([&system, &ResponseItem::Other]);
|
||||
|
||||
// User and assistant should be retained.
|
||||
let u = user_msg("hi");
|
||||
let a = assistant_msg("hello");
|
||||
h.record_items([&u, &a]).unwrap();
|
||||
h.record_items([&u, &a]);
|
||||
|
||||
let items = h.contents();
|
||||
assert_eq!(
|
||||
|
||||
@@ -32,8 +32,34 @@ pub struct NewConversation {
|
||||
|
||||
/// [`ConversationManager`] is responsible for creating conversations and
|
||||
/// maintaining them in memory.
|
||||
#[derive(Clone)]
|
||||
struct ConversationEntry {
|
||||
conversation: Arc<CodexConversation>,
|
||||
session_configured: SessionConfiguredEvent,
|
||||
}
|
||||
|
||||
impl ConversationEntry {
|
||||
fn new(
|
||||
conversation: Arc<CodexConversation>,
|
||||
session_configured: SessionConfiguredEvent,
|
||||
) -> Self {
|
||||
Self {
|
||||
conversation,
|
||||
session_configured,
|
||||
}
|
||||
}
|
||||
|
||||
fn to_new_conversation(&self, conversation_id: ConversationId) -> NewConversation {
|
||||
NewConversation {
|
||||
conversation_id,
|
||||
conversation: self.conversation.clone(),
|
||||
session_configured: self.session_configured.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ConversationManager {
|
||||
conversations: Arc<RwLock<HashMap<ConversationId, Arc<CodexConversation>>>>,
|
||||
conversations: Arc<RwLock<HashMap<ConversationId, ConversationEntry>>>,
|
||||
auth_manager: Arc<AuthManager>,
|
||||
session_source: SessionSource,
|
||||
}
|
||||
@@ -99,10 +125,11 @@ impl ConversationManager {
|
||||
};
|
||||
|
||||
let conversation = Arc::new(CodexConversation::new(codex));
|
||||
let entry = ConversationEntry::new(conversation.clone(), session_configured.clone());
|
||||
self.conversations
|
||||
.write()
|
||||
.await
|
||||
.insert(conversation_id, conversation.clone());
|
||||
.insert(conversation_id, entry);
|
||||
|
||||
Ok(NewConversation {
|
||||
conversation_id,
|
||||
@@ -118,7 +145,7 @@ impl ConversationManager {
|
||||
let conversations = self.conversations.read().await;
|
||||
conversations
|
||||
.get(&conversation_id)
|
||||
.cloned()
|
||||
.map(|entry| entry.conversation.clone())
|
||||
.ok_or_else(|| CodexErr::ConversationNotFound(conversation_id))
|
||||
}
|
||||
|
||||
@@ -129,11 +156,22 @@ impl ConversationManager {
|
||||
auth_manager: Arc<AuthManager>,
|
||||
) -> CodexResult<NewConversation> {
|
||||
let initial_history = RolloutRecorder::get_rollout_history(&rollout_path).await?;
|
||||
let CodexSpawnOk {
|
||||
codex,
|
||||
conversation_id,
|
||||
} = Codex::spawn(config, auth_manager, initial_history, self.session_source).await?;
|
||||
self.finalize_spawn(codex, conversation_id).await
|
||||
if let InitialHistory::Resumed(resumed) = &initial_history
|
||||
&& let Some(existing) = self
|
||||
.conversations
|
||||
.read()
|
||||
.await
|
||||
.get(&resumed.conversation_id)
|
||||
.cloned()
|
||||
{
|
||||
Ok(existing.to_new_conversation(resumed.conversation_id))
|
||||
} else {
|
||||
let CodexSpawnOk {
|
||||
codex,
|
||||
conversation_id,
|
||||
} = Codex::spawn(config, auth_manager, initial_history, self.session_source).await?;
|
||||
self.finalize_spawn(codex, conversation_id).await
|
||||
}
|
||||
}
|
||||
|
||||
/// Removes the conversation from the manager's internal map, though the
|
||||
@@ -144,7 +182,11 @@ impl ConversationManager {
|
||||
&self,
|
||||
conversation_id: &ConversationId,
|
||||
) -> Option<Arc<CodexConversation>> {
|
||||
self.conversations.write().await.remove(conversation_id)
|
||||
self.conversations
|
||||
.write()
|
||||
.await
|
||||
.remove(conversation_id)
|
||||
.map(|entry| entry.conversation)
|
||||
}
|
||||
|
||||
/// Fork an existing conversation by taking messages up to the given position
|
||||
|
||||
@@ -1,5 +1,13 @@
|
||||
use crate::spawn::CODEX_SANDBOX_ENV_VAR;
|
||||
use http::Error as HttpError;
|
||||
use reqwest::IntoUrl;
|
||||
use reqwest::Method;
|
||||
use reqwest::Response;
|
||||
use reqwest::header::HeaderName;
|
||||
use reqwest::header::HeaderValue;
|
||||
use serde::Serialize;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Display;
|
||||
use std::sync::LazyLock;
|
||||
use std::sync::Mutex;
|
||||
use std::sync::OnceLock;
|
||||
@@ -22,6 +30,130 @@ use std::sync::OnceLock;
|
||||
pub static USER_AGENT_SUFFIX: LazyLock<Mutex<Option<String>>> = LazyLock::new(|| Mutex::new(None));
|
||||
pub const DEFAULT_ORIGINATOR: &str = "codex_cli_rs";
|
||||
pub const CODEX_INTERNAL_ORIGINATOR_OVERRIDE_ENV_VAR: &str = "CODEX_INTERNAL_ORIGINATOR_OVERRIDE";
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct CodexHttpClient {
|
||||
inner: reqwest::Client,
|
||||
}
|
||||
|
||||
impl CodexHttpClient {
|
||||
fn new(inner: reqwest::Client) -> Self {
|
||||
Self { inner }
|
||||
}
|
||||
|
||||
pub fn get<U>(&self, url: U) -> CodexRequestBuilder
|
||||
where
|
||||
U: IntoUrl,
|
||||
{
|
||||
self.request(Method::GET, url)
|
||||
}
|
||||
|
||||
pub fn post<U>(&self, url: U) -> CodexRequestBuilder
|
||||
where
|
||||
U: IntoUrl,
|
||||
{
|
||||
self.request(Method::POST, url)
|
||||
}
|
||||
|
||||
pub fn request<U>(&self, method: Method, url: U) -> CodexRequestBuilder
|
||||
where
|
||||
U: IntoUrl,
|
||||
{
|
||||
let url_str = url.as_str().to_string();
|
||||
CodexRequestBuilder::new(self.inner.request(method.clone(), url), method, url_str)
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use = "requests are not sent unless `send` is awaited"]
|
||||
#[derive(Debug)]
|
||||
pub struct CodexRequestBuilder {
|
||||
builder: reqwest::RequestBuilder,
|
||||
method: Method,
|
||||
url: String,
|
||||
}
|
||||
|
||||
impl CodexRequestBuilder {
|
||||
fn new(builder: reqwest::RequestBuilder, method: Method, url: String) -> Self {
|
||||
Self {
|
||||
builder,
|
||||
method,
|
||||
url,
|
||||
}
|
||||
}
|
||||
|
||||
fn map(self, f: impl FnOnce(reqwest::RequestBuilder) -> reqwest::RequestBuilder) -> Self {
|
||||
Self {
|
||||
builder: f(self.builder),
|
||||
method: self.method,
|
||||
url: self.url,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn header<K, V>(self, key: K, value: V) -> Self
|
||||
where
|
||||
HeaderName: TryFrom<K>,
|
||||
<HeaderName as TryFrom<K>>::Error: Into<HttpError>,
|
||||
HeaderValue: TryFrom<V>,
|
||||
<HeaderValue as TryFrom<V>>::Error: Into<HttpError>,
|
||||
{
|
||||
self.map(|builder| builder.header(key, value))
|
||||
}
|
||||
|
||||
pub fn bearer_auth<T>(self, token: T) -> Self
|
||||
where
|
||||
T: Display,
|
||||
{
|
||||
self.map(|builder| builder.bearer_auth(token))
|
||||
}
|
||||
|
||||
pub fn json<T>(self, value: &T) -> Self
|
||||
where
|
||||
T: ?Sized + Serialize,
|
||||
{
|
||||
self.map(|builder| builder.json(value))
|
||||
}
|
||||
|
||||
pub async fn send(self) -> Result<Response, reqwest::Error> {
|
||||
match self.builder.send().await {
|
||||
Ok(response) => {
|
||||
let request_ids = Self::extract_request_ids(&response);
|
||||
tracing::debug!(
|
||||
method = %self.method,
|
||||
url = %self.url,
|
||||
status = %response.status(),
|
||||
request_ids = ?request_ids,
|
||||
version = ?response.version(),
|
||||
"Request completed"
|
||||
);
|
||||
|
||||
Ok(response)
|
||||
}
|
||||
Err(error) => {
|
||||
let status = error.status();
|
||||
tracing::debug!(
|
||||
method = %self.method,
|
||||
url = %self.url,
|
||||
status = status.map(|s| s.as_u16()),
|
||||
error = %error,
|
||||
"Request failed"
|
||||
);
|
||||
Err(error)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_request_ids(response: &Response) -> HashMap<String, String> {
|
||||
["cf-ray", "x-request-id", "x-oai-request-id"]
|
||||
.iter()
|
||||
.filter_map(|&name| {
|
||||
let header_name = HeaderName::from_static(name);
|
||||
let value = response.headers().get(header_name)?;
|
||||
let value = value.to_str().ok()?.to_owned();
|
||||
Some((name.to_owned(), value))
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Originator {
|
||||
pub value: String,
|
||||
@@ -124,8 +256,8 @@ fn sanitize_user_agent(candidate: String, fallback: &str) -> String {
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a reqwest client with default `originator` and `User-Agent` headers set.
|
||||
pub fn create_client() -> reqwest::Client {
|
||||
/// Create an HTTP client with default `originator` and `User-Agent` headers set.
|
||||
pub fn create_client() -> CodexHttpClient {
|
||||
use reqwest::header::HeaderMap;
|
||||
|
||||
let mut headers = HeaderMap::new();
|
||||
@@ -140,7 +272,8 @@ pub fn create_client() -> reqwest::Client {
|
||||
builder = builder.no_proxy();
|
||||
}
|
||||
|
||||
builder.build().unwrap_or_else(|_| reqwest::Client::new())
|
||||
let inner = builder.build().unwrap_or_else(|_| reqwest::Client::new());
|
||||
CodexHttpClient::new(inner)
|
||||
}
|
||||
|
||||
fn is_sandboxed() -> bool {
|
||||
|
||||
@@ -158,9 +158,6 @@ pub enum CodexErr {
|
||||
|
||||
#[error("{0}")]
|
||||
EnvVar(EnvVarError),
|
||||
|
||||
#[error("invalid input: {0}")]
|
||||
InvalidInput(String),
|
||||
}
|
||||
|
||||
impl From<CancelErr> for CodexErr {
|
||||
|
||||
@@ -39,6 +39,8 @@ pub enum Feature {
|
||||
ViewImageTool,
|
||||
/// Allow the model to request web searches.
|
||||
WebSearchRequest,
|
||||
/// Enable the model-based risk assessments for sandboxed commands.
|
||||
SandboxCommandAssessment,
|
||||
}
|
||||
|
||||
impl Feature {
|
||||
@@ -73,6 +75,7 @@ pub struct FeatureOverrides {
|
||||
pub include_apply_patch_tool: Option<bool>,
|
||||
pub include_view_image_tool: Option<bool>,
|
||||
pub web_search_request: Option<bool>,
|
||||
pub experimental_sandbox_command_assessment: Option<bool>,
|
||||
}
|
||||
|
||||
impl FeatureOverrides {
|
||||
@@ -137,6 +140,7 @@ impl Features {
|
||||
let mut features = Features::with_defaults();
|
||||
|
||||
let base_legacy = LegacyFeatureToggles {
|
||||
experimental_sandbox_command_assessment: cfg.experimental_sandbox_command_assessment,
|
||||
experimental_use_freeform_apply_patch: cfg.experimental_use_freeform_apply_patch,
|
||||
experimental_use_exec_command_tool: cfg.experimental_use_exec_command_tool,
|
||||
experimental_use_unified_exec_tool: cfg.experimental_use_unified_exec_tool,
|
||||
@@ -154,6 +158,8 @@ impl Features {
|
||||
let profile_legacy = LegacyFeatureToggles {
|
||||
include_apply_patch_tool: config_profile.include_apply_patch_tool,
|
||||
include_view_image_tool: config_profile.include_view_image_tool,
|
||||
experimental_sandbox_command_assessment: config_profile
|
||||
.experimental_sandbox_command_assessment,
|
||||
experimental_use_freeform_apply_patch: config_profile
|
||||
.experimental_use_freeform_apply_patch,
|
||||
experimental_use_exec_command_tool: config_profile.experimental_use_exec_command_tool,
|
||||
@@ -236,4 +242,10 @@ pub const FEATURES: &[FeatureSpec] = &[
|
||||
stage: Stage::Stable,
|
||||
default_enabled: false,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::SandboxCommandAssessment,
|
||||
key: "experimental_sandbox_command_assessment",
|
||||
stage: Stage::Experimental,
|
||||
default_enabled: false,
|
||||
},
|
||||
];
|
||||
|
||||
@@ -9,6 +9,10 @@ struct Alias {
|
||||
}
|
||||
|
||||
const ALIASES: &[Alias] = &[
|
||||
Alias {
|
||||
legacy_key: "experimental_sandbox_command_assessment",
|
||||
feature: Feature::SandboxCommandAssessment,
|
||||
},
|
||||
Alias {
|
||||
legacy_key: "experimental_use_unified_exec_tool",
|
||||
feature: Feature::UnifiedExec,
|
||||
@@ -53,6 +57,7 @@ pub(crate) fn feature_for_key(key: &str) -> Option<Feature> {
|
||||
pub struct LegacyFeatureToggles {
|
||||
pub include_apply_patch_tool: Option<bool>,
|
||||
pub include_view_image_tool: Option<bool>,
|
||||
pub experimental_sandbox_command_assessment: Option<bool>,
|
||||
pub experimental_use_freeform_apply_patch: Option<bool>,
|
||||
pub experimental_use_exec_command_tool: Option<bool>,
|
||||
pub experimental_use_unified_exec_tool: Option<bool>,
|
||||
@@ -69,6 +74,12 @@ impl LegacyFeatureToggles {
|
||||
self.include_apply_patch_tool,
|
||||
"include_apply_patch_tool",
|
||||
);
|
||||
set_if_some(
|
||||
features,
|
||||
Feature::SandboxCommandAssessment,
|
||||
self.experimental_sandbox_command_assessment,
|
||||
"experimental_sandbox_command_assessment",
|
||||
);
|
||||
set_if_some(
|
||||
features,
|
||||
Feature::ApplyPatchFreeform,
|
||||
|
||||
@@ -49,7 +49,7 @@ const MCP_TOOL_NAME_DELIMITER: &str = "__";
|
||||
const MAX_TOOL_NAME_LENGTH: usize = 64;
|
||||
|
||||
/// Default timeout for initializing MCP server & initially listing tools.
|
||||
const DEFAULT_STARTUP_TIMEOUT: Duration = Duration::from_secs(10);
|
||||
pub const DEFAULT_STARTUP_TIMEOUT: Duration = Duration::from_secs(10);
|
||||
|
||||
/// Default timeout for individual tool calls.
|
||||
const DEFAULT_TOOL_TIMEOUT: Duration = Duration::from_secs(60);
|
||||
|
||||
@@ -6,6 +6,8 @@
|
||||
//! key. These override or extend the defaults at runtime.
|
||||
|
||||
use crate::CodexAuth;
|
||||
use crate::default_client::CodexHttpClient;
|
||||
use crate::default_client::CodexRequestBuilder;
|
||||
use codex_app_server_protocol::AuthMode;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
@@ -95,7 +97,7 @@ pub struct ModelProviderInfo {
|
||||
|
||||
impl ModelProviderInfo {
|
||||
/// Construct a `POST` RequestBuilder for the given URL using the provided
|
||||
/// reqwest Client applying:
|
||||
/// [`CodexHttpClient`] applying:
|
||||
/// • provider-specific headers (static + env based)
|
||||
/// • Bearer auth header when an API key is available.
|
||||
/// • Auth token for OAuth.
|
||||
@@ -104,9 +106,9 @@ impl ModelProviderInfo {
|
||||
/// one produced by [`ModelProviderInfo::api_key`].
|
||||
pub async fn create_request_builder<'a>(
|
||||
&'a self,
|
||||
client: &'a reqwest::Client,
|
||||
client: &'a CodexHttpClient,
|
||||
auth: &Option<CodexAuth>,
|
||||
) -> crate::error::Result<reqwest::RequestBuilder> {
|
||||
) -> crate::error::Result<CodexRequestBuilder> {
|
||||
let effective_auth = if let Some(secret_key) = &self.experimental_bearer_token {
|
||||
Some(CodexAuth::from_api_key(secret_key))
|
||||
} else {
|
||||
@@ -187,9 +189,9 @@ impl ModelProviderInfo {
|
||||
}
|
||||
|
||||
/// Apply provider-specific HTTP headers (both static and environment-based)
|
||||
/// onto an existing `reqwest::RequestBuilder` and return the updated
|
||||
/// onto an existing [`CodexRequestBuilder`] and return the updated
|
||||
/// builder.
|
||||
fn apply_http_headers(&self, mut builder: reqwest::RequestBuilder) -> reqwest::RequestBuilder {
|
||||
fn apply_http_headers(&self, mut builder: CodexRequestBuilder) -> CodexRequestBuilder {
|
||||
if let Some(extra) = &self.http_headers {
|
||||
for (k, v) in extra {
|
||||
builder = builder.header(k, v);
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
use crate::codex::Session;
|
||||
use crate::conversation_history::ConversationHistory;
|
||||
use crate::error::Result as CodexResult;
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
use codex_protocol::models::ResponseInputItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
@@ -14,7 +13,7 @@ pub(crate) async fn process_items(
|
||||
is_review_mode: bool,
|
||||
review_thread_history: &mut ConversationHistory,
|
||||
sess: &Session,
|
||||
) -> CodexResult<(Vec<ResponseInputItem>, Vec<ResponseItem>)> {
|
||||
) -> (Vec<ResponseInputItem>, Vec<ResponseItem>) {
|
||||
let mut items_to_record_in_conversation_history = Vec::<ResponseItem>::new();
|
||||
let mut responses = Vec::<ResponseInputItem>::new();
|
||||
for processed_response_item in processed_items {
|
||||
@@ -103,11 +102,11 @@ pub(crate) async fn process_items(
|
||||
// Only attempt to take the lock if there is something to record.
|
||||
if !items_to_record_in_conversation_history.is_empty() {
|
||||
if is_review_mode {
|
||||
review_thread_history.record_items(items_to_record_in_conversation_history.iter())?;
|
||||
review_thread_history.record_items(items_to_record_in_conversation_history.iter());
|
||||
} else {
|
||||
sess.record_conversation_items(&items_to_record_in_conversation_history)
|
||||
.await?;
|
||||
.await;
|
||||
}
|
||||
}
|
||||
Ok((responses, items_to_record_in_conversation_history))
|
||||
(responses, items_to_record_in_conversation_history)
|
||||
}
|
||||
|
||||
275
codex-rs/core/src/sandboxing/assessment.rs
Normal file
275
codex-rs/core/src/sandboxing/assessment.rs
Normal file
@@ -0,0 +1,275 @@
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use std::time::Instant;
|
||||
|
||||
use crate::AuthManager;
|
||||
use crate::ModelProviderInfo;
|
||||
use crate::client::ModelClient;
|
||||
use crate::client_common::Prompt;
|
||||
use crate::client_common::ResponseEvent;
|
||||
use crate::config::Config;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use askama::Template;
|
||||
use codex_otel::otel_event_manager::OtelEventManager;
|
||||
use codex_protocol::ConversationId;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::SandboxCommandAssessment;
|
||||
use futures::StreamExt;
|
||||
use serde_json::json;
|
||||
use tokio::time::timeout;
|
||||
use tracing::warn;
|
||||
|
||||
const SANDBOX_ASSESSMENT_TIMEOUT: Duration = Duration::from_secs(5);
|
||||
|
||||
const SANDBOX_RISK_CATEGORY_VALUES: &[&str] = &[
|
||||
"data_deletion",
|
||||
"data_exfiltration",
|
||||
"privilege_escalation",
|
||||
"system_modification",
|
||||
"network_access",
|
||||
"resource_exhaustion",
|
||||
"compliance",
|
||||
];
|
||||
|
||||
#[derive(Template)]
|
||||
#[template(path = "sandboxing/assessment_prompt.md", escape = "none")]
|
||||
struct SandboxAssessmentPromptTemplate<'a> {
|
||||
platform: &'a str,
|
||||
sandbox_policy: &'a str,
|
||||
filesystem_roots: Option<&'a str>,
|
||||
working_directory: &'a str,
|
||||
command_argv: &'a str,
|
||||
command_joined: &'a str,
|
||||
sandbox_failure_message: Option<&'a str>,
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(crate) async fn assess_command(
|
||||
config: Arc<Config>,
|
||||
provider: ModelProviderInfo,
|
||||
auth_manager: Arc<AuthManager>,
|
||||
parent_otel: &OtelEventManager,
|
||||
conversation_id: ConversationId,
|
||||
call_id: &str,
|
||||
command: &[String],
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
cwd: &Path,
|
||||
failure_message: Option<&str>,
|
||||
) -> Option<SandboxCommandAssessment> {
|
||||
if !config.experimental_sandbox_command_assessment || command.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let command_json = serde_json::to_string(command).unwrap_or_else(|_| "[]".to_string());
|
||||
let command_joined =
|
||||
shlex::try_join(command.iter().map(String::as_str)).unwrap_or_else(|_| command.join(" "));
|
||||
let failure = failure_message
|
||||
.map(str::trim)
|
||||
.filter(|msg| !msg.is_empty())
|
||||
.map(str::to_string);
|
||||
|
||||
let cwd_str = cwd.to_string_lossy().to_string();
|
||||
let sandbox_summary = summarize_sandbox_policy(sandbox_policy);
|
||||
let mut roots = sandbox_roots_for_prompt(sandbox_policy, cwd);
|
||||
roots.sort();
|
||||
roots.dedup();
|
||||
|
||||
let platform = std::env::consts::OS;
|
||||
let roots_formatted = roots.iter().map(|root| root.to_string_lossy().to_string());
|
||||
let filesystem_roots = match roots_formatted.collect::<Vec<_>>() {
|
||||
collected if collected.is_empty() => None,
|
||||
collected => Some(collected.join(", ")),
|
||||
};
|
||||
|
||||
let prompt_template = SandboxAssessmentPromptTemplate {
|
||||
platform,
|
||||
sandbox_policy: sandbox_summary.as_str(),
|
||||
filesystem_roots: filesystem_roots.as_deref(),
|
||||
working_directory: cwd_str.as_str(),
|
||||
command_argv: command_json.as_str(),
|
||||
command_joined: command_joined.as_str(),
|
||||
sandbox_failure_message: failure.as_deref(),
|
||||
};
|
||||
let rendered_prompt = match prompt_template.render() {
|
||||
Ok(rendered) => rendered,
|
||||
Err(err) => {
|
||||
warn!("failed to render sandbox assessment prompt: {err}");
|
||||
return None;
|
||||
}
|
||||
};
|
||||
let (system_prompt_section, user_prompt_section) = match rendered_prompt.split_once("\n---\n") {
|
||||
Some(split) => split,
|
||||
None => {
|
||||
warn!("rendered sandbox assessment prompt missing separator");
|
||||
return None;
|
||||
}
|
||||
};
|
||||
let system_prompt = system_prompt_section
|
||||
.strip_prefix("System Prompt:\n")
|
||||
.unwrap_or(system_prompt_section)
|
||||
.trim()
|
||||
.to_string();
|
||||
let user_prompt = user_prompt_section
|
||||
.strip_prefix("User Prompt:\n")
|
||||
.unwrap_or(user_prompt_section)
|
||||
.trim()
|
||||
.to_string();
|
||||
|
||||
let prompt = Prompt {
|
||||
input: vec![ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputText { text: user_prompt }],
|
||||
}],
|
||||
tools: Vec::new(),
|
||||
parallel_tool_calls: false,
|
||||
base_instructions_override: Some(system_prompt),
|
||||
output_schema: Some(sandbox_assessment_schema()),
|
||||
};
|
||||
|
||||
let child_otel =
|
||||
parent_otel.with_model(config.model.as_str(), config.model_family.slug.as_str());
|
||||
|
||||
let client = ModelClient::new(
|
||||
Arc::clone(&config),
|
||||
Some(auth_manager),
|
||||
child_otel,
|
||||
provider,
|
||||
config.model_reasoning_effort,
|
||||
config.model_reasoning_summary,
|
||||
conversation_id,
|
||||
);
|
||||
|
||||
let start = Instant::now();
|
||||
let assessment_result = timeout(SANDBOX_ASSESSMENT_TIMEOUT, async move {
|
||||
let mut stream = client.stream(&prompt).await?;
|
||||
let mut last_json: Option<String> = None;
|
||||
while let Some(event) = stream.next().await {
|
||||
match event {
|
||||
Ok(ResponseEvent::OutputItemDone(item)) => {
|
||||
if let Some(text) = response_item_text(&item) {
|
||||
last_json = Some(text);
|
||||
}
|
||||
}
|
||||
Ok(ResponseEvent::RateLimits(_)) => {}
|
||||
Ok(ResponseEvent::Completed { .. }) => break,
|
||||
Ok(_) => continue,
|
||||
Err(err) => return Err(err),
|
||||
}
|
||||
}
|
||||
Ok(last_json)
|
||||
})
|
||||
.await;
|
||||
let duration = start.elapsed();
|
||||
parent_otel.sandbox_assessment_latency(call_id, duration);
|
||||
|
||||
match assessment_result {
|
||||
Ok(Ok(Some(raw))) => match serde_json::from_str::<SandboxCommandAssessment>(raw.trim()) {
|
||||
Ok(assessment) => {
|
||||
parent_otel.sandbox_assessment(
|
||||
call_id,
|
||||
"success",
|
||||
Some(assessment.risk_level),
|
||||
&assessment.risk_categories,
|
||||
duration,
|
||||
);
|
||||
return Some(assessment);
|
||||
}
|
||||
Err(err) => {
|
||||
warn!("failed to parse sandbox assessment JSON: {err}");
|
||||
parent_otel.sandbox_assessment(call_id, "parse_error", None, &[], duration);
|
||||
}
|
||||
},
|
||||
Ok(Ok(None)) => {
|
||||
warn!("sandbox assessment response did not include any message");
|
||||
parent_otel.sandbox_assessment(call_id, "no_output", None, &[], duration);
|
||||
}
|
||||
Ok(Err(err)) => {
|
||||
warn!("sandbox assessment failed: {err}");
|
||||
parent_otel.sandbox_assessment(call_id, "model_error", None, &[], duration);
|
||||
}
|
||||
Err(_) => {
|
||||
warn!("sandbox assessment timed out");
|
||||
parent_otel.sandbox_assessment(call_id, "timeout", None, &[], duration);
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
fn summarize_sandbox_policy(policy: &SandboxPolicy) -> String {
|
||||
match policy {
|
||||
SandboxPolicy::DangerFullAccess => "danger-full-access".to_string(),
|
||||
SandboxPolicy::ReadOnly => "read-only".to_string(),
|
||||
SandboxPolicy::WorkspaceWrite { network_access, .. } => {
|
||||
let network = if *network_access {
|
||||
"network"
|
||||
} else {
|
||||
"no-network"
|
||||
};
|
||||
format!("workspace-write (network_access={network})")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn sandbox_roots_for_prompt(policy: &SandboxPolicy, cwd: &Path) -> Vec<PathBuf> {
|
||||
let mut roots = vec![cwd.to_path_buf()];
|
||||
if let SandboxPolicy::WorkspaceWrite { writable_roots, .. } = policy {
|
||||
roots.extend(writable_roots.iter().cloned());
|
||||
}
|
||||
roots
|
||||
}
|
||||
|
||||
fn sandbox_assessment_schema() -> serde_json::Value {
|
||||
json!({
|
||||
"type": "object",
|
||||
"required": ["description", "risk_level", "risk_categories"],
|
||||
"properties": {
|
||||
"description": {
|
||||
"type": "string",
|
||||
"minLength": 1,
|
||||
"maxLength": 500
|
||||
},
|
||||
"risk_level": {
|
||||
"type": "string",
|
||||
"enum": ["low", "medium", "high"]
|
||||
},
|
||||
"risk_categories": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"enum": SANDBOX_RISK_CATEGORY_VALUES
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
})
|
||||
}
|
||||
|
||||
fn response_item_text(item: &ResponseItem) -> Option<String> {
|
||||
match item {
|
||||
ResponseItem::Message { content, .. } => {
|
||||
let mut buffers: Vec<&str> = Vec::new();
|
||||
for segment in content {
|
||||
match segment {
|
||||
ContentItem::InputText { text } | ContentItem::OutputText { text } => {
|
||||
if !text.is_empty() {
|
||||
buffers.push(text);
|
||||
}
|
||||
}
|
||||
ContentItem::InputImage { .. } => {}
|
||||
}
|
||||
}
|
||||
if buffers.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(buffers.join("\n"))
|
||||
}
|
||||
}
|
||||
ResponseItem::FunctionCallOutput { output, .. } => Some(output.content.clone()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@@ -5,6 +5,9 @@ Build platform wrappers and produce ExecEnv for execution. Owns low‑level
|
||||
sandbox placement and transformation of portable CommandSpec into a
|
||||
ready‑to‑spawn environment.
|
||||
*/
|
||||
|
||||
pub mod assessment;
|
||||
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::exec::SandboxType;
|
||||
use crate::exec::StdoutStream;
|
||||
|
||||
@@ -4,7 +4,6 @@ use codex_protocol::models::ResponseItem;
|
||||
|
||||
use crate::codex::SessionConfiguration;
|
||||
use crate::conversation_history::ConversationHistory;
|
||||
use crate::error::CodexErr;
|
||||
use crate::protocol::RateLimitSnapshot;
|
||||
use crate::protocol::TokenUsage;
|
||||
use crate::protocol::TokenUsageInfo;
|
||||
@@ -27,13 +26,12 @@ impl SessionState {
|
||||
}
|
||||
|
||||
// History helpers
|
||||
pub(crate) fn record_items<I>(&mut self, items: I) -> Result<(), CodexErr>
|
||||
pub(crate) fn record_items<I>(&mut self, items: I)
|
||||
where
|
||||
I: IntoIterator,
|
||||
I::Item: std::ops::Deref<Target = ResponseItem>,
|
||||
{
|
||||
self.history.record_items(items)?;
|
||||
Ok(())
|
||||
self.history.record_items(items)
|
||||
}
|
||||
|
||||
pub(crate) fn history_snapshot(&mut self) -> Vec<ResponseItem> {
|
||||
@@ -68,14 +66,7 @@ impl SessionState {
|
||||
pub(crate) fn token_info_and_rate_limits(
|
||||
&self,
|
||||
) -> (Option<TokenUsageInfo>, Option<RateLimitSnapshot>) {
|
||||
let info = self.token_info().and_then(|info| {
|
||||
if info.total_token_usage.is_zero() && info.last_token_usage.is_zero() {
|
||||
None
|
||||
} else {
|
||||
Some(info)
|
||||
}
|
||||
});
|
||||
(info, self.latest_rate_limits.clone())
|
||||
(self.token_info(), self.latest_rate_limits.clone())
|
||||
}
|
||||
|
||||
pub(crate) fn set_token_usage_full(&mut self, context_window: i64) {
|
||||
|
||||
@@ -5,7 +5,6 @@ use tokio_util::sync::CancellationToken;
|
||||
|
||||
use crate::codex::TurnContext;
|
||||
use crate::codex::compact;
|
||||
use crate::error::Result as CodexResult;
|
||||
use crate::state::TaskKind;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
|
||||
@@ -27,7 +26,7 @@ impl SessionTask for CompactTask {
|
||||
ctx: Arc<TurnContext>,
|
||||
input: Vec<UserInput>,
|
||||
_cancellation_token: CancellationToken,
|
||||
) -> CodexResult<Option<String>> {
|
||||
) -> Option<String> {
|
||||
compact::run_compact_task(session.clone_session(), ctx, input).await
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,8 +15,6 @@ use tracing::warn;
|
||||
|
||||
use crate::codex::Session;
|
||||
use crate::codex::TurnContext;
|
||||
use crate::error::Result as CodexResult;
|
||||
use crate::protocol::ErrorEvent;
|
||||
use crate::protocol::EventMsg;
|
||||
use crate::protocol::TaskCompleteEvent;
|
||||
use crate::protocol::TurnAbortReason;
|
||||
@@ -58,7 +56,7 @@ pub(crate) trait SessionTask: Send + Sync + 'static {
|
||||
ctx: Arc<TurnContext>,
|
||||
input: Vec<UserInput>,
|
||||
cancellation_token: CancellationToken,
|
||||
) -> CodexResult<Option<String>>;
|
||||
) -> Option<String>;
|
||||
|
||||
async fn abort(&self, session: Arc<SessionTaskContext>, ctx: Arc<TurnContext>) {
|
||||
let _ = (session, ctx);
|
||||
@@ -88,7 +86,7 @@ impl Session {
|
||||
let task_cancellation_token = cancellation_token.child_token();
|
||||
tokio::spawn(async move {
|
||||
let ctx_for_finish = Arc::clone(&ctx);
|
||||
let run_result = task_for_run
|
||||
let last_agent_message = task_for_run
|
||||
.run(
|
||||
Arc::clone(&session_ctx),
|
||||
ctx,
|
||||
@@ -100,21 +98,8 @@ impl Session {
|
||||
if !task_cancellation_token.is_cancelled() {
|
||||
// Emit completion uniformly from spawn site so all tasks share the same lifecycle.
|
||||
let sess = session_ctx.clone_session();
|
||||
match run_result {
|
||||
Ok(last_agent_message) => {
|
||||
sess.on_task_finished(ctx_for_finish, last_agent_message)
|
||||
.await;
|
||||
}
|
||||
Err(err) => {
|
||||
let message = err.to_string();
|
||||
sess.send_event(
|
||||
ctx_for_finish.as_ref(),
|
||||
EventMsg::Error(ErrorEvent { message }),
|
||||
)
|
||||
.await;
|
||||
sess.on_task_finished(ctx_for_finish, None).await;
|
||||
}
|
||||
}
|
||||
sess.on_task_finished(ctx_for_finish, last_agent_message)
|
||||
.await;
|
||||
}
|
||||
done_clone.notify_waiters();
|
||||
})
|
||||
|
||||
@@ -5,7 +5,6 @@ use tokio_util::sync::CancellationToken;
|
||||
|
||||
use crate::codex::TurnContext;
|
||||
use crate::codex::run_task;
|
||||
use crate::error::Result as CodexResult;
|
||||
use crate::state::TaskKind;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
|
||||
@@ -27,7 +26,7 @@ impl SessionTask for RegularTask {
|
||||
ctx: Arc<TurnContext>,
|
||||
input: Vec<UserInput>,
|
||||
cancellation_token: CancellationToken,
|
||||
) -> CodexResult<Option<String>> {
|
||||
) -> Option<String> {
|
||||
let sess = session.clone_session();
|
||||
run_task(sess, ctx, input, TaskKind::Regular, cancellation_token).await
|
||||
}
|
||||
|
||||
@@ -6,7 +6,6 @@ use tokio_util::sync::CancellationToken;
|
||||
use crate::codex::TurnContext;
|
||||
use crate::codex::exit_review_mode;
|
||||
use crate::codex::run_task;
|
||||
use crate::error::Result as CodexResult;
|
||||
use crate::state::TaskKind;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
|
||||
@@ -28,12 +27,12 @@ impl SessionTask for ReviewTask {
|
||||
ctx: Arc<TurnContext>,
|
||||
input: Vec<UserInput>,
|
||||
cancellation_token: CancellationToken,
|
||||
) -> CodexResult<Option<String>> {
|
||||
) -> Option<String> {
|
||||
let sess = session.clone_session();
|
||||
run_task(sess, ctx, input, TaskKind::Review, cancellation_token).await
|
||||
}
|
||||
|
||||
async fn abort(&self, session: Arc<SessionTaskContext>, ctx: Arc<TurnContext>) {
|
||||
let _ = exit_review_mode(session.clone_session(), ctx, None).await;
|
||||
exit_review_mode(session.clone_session(), ctx, None).await;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,9 +7,11 @@ retry without sandbox on denial (no re‑approval thanks to caching).
|
||||
*/
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::SandboxErr;
|
||||
use crate::error::get_error_message_ui;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::sandboxing::SandboxManager;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::ProvidesSandboxRetryData;
|
||||
use crate::tools::sandboxing::SandboxAttempt;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
@@ -38,6 +40,7 @@ impl ToolOrchestrator {
|
||||
) -> Result<Out, ToolError>
|
||||
where
|
||||
T: ToolRuntime<Rq, Out>,
|
||||
Rq: ProvidesSandboxRetryData,
|
||||
{
|
||||
let otel = turn_ctx.client.get_otel_event_manager();
|
||||
let otel_tn = &tool_ctx.tool_name;
|
||||
@@ -56,6 +59,7 @@ impl ToolOrchestrator {
|
||||
turn: turn_ctx,
|
||||
call_id: &tool_ctx.call_id,
|
||||
retry_reason: None,
|
||||
risk: None,
|
||||
};
|
||||
let decision = tool.start_approval_async(req, approval_ctx).await;
|
||||
|
||||
@@ -107,12 +111,33 @@ impl ToolOrchestrator {
|
||||
|
||||
// Ask for approval before retrying without sandbox.
|
||||
if !tool.should_bypass_approval(approval_policy, already_approved) {
|
||||
let mut risk = None;
|
||||
|
||||
if let Some(metadata) = req.sandbox_retry_data() {
|
||||
let err = SandboxErr::Denied {
|
||||
output: output.clone(),
|
||||
};
|
||||
let friendly = get_error_message_ui(&CodexErr::Sandbox(err));
|
||||
let failure_summary = format!("failed in sandbox: {friendly}");
|
||||
|
||||
risk = tool_ctx
|
||||
.session
|
||||
.assess_sandbox_command(
|
||||
turn_ctx,
|
||||
&tool_ctx.call_id,
|
||||
&metadata.command,
|
||||
Some(failure_summary.as_str()),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
let reason_msg = build_denial_reason_from_output(output.as_ref());
|
||||
let approval_ctx = ApprovalCtx {
|
||||
session: tool_ctx.session,
|
||||
turn: turn_ctx,
|
||||
call_id: &tool_ctx.call_id,
|
||||
retry_reason: Some(reason_msg),
|
||||
risk,
|
||||
};
|
||||
|
||||
let decision = tool.start_approval_async(req, approval_ctx).await;
|
||||
|
||||
@@ -10,7 +10,9 @@ use crate::sandboxing::CommandSpec;
|
||||
use crate::sandboxing::execute_env;
|
||||
use crate::tools::sandboxing::Approvable;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::ProvidesSandboxRetryData;
|
||||
use crate::tools::sandboxing::SandboxAttempt;
|
||||
use crate::tools::sandboxing::SandboxRetryData;
|
||||
use crate::tools::sandboxing::Sandboxable;
|
||||
use crate::tools::sandboxing::SandboxablePreference;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
@@ -32,6 +34,12 @@ pub struct ApplyPatchRequest {
|
||||
pub codex_exe: Option<PathBuf>,
|
||||
}
|
||||
|
||||
impl ProvidesSandboxRetryData for ApplyPatchRequest {
|
||||
fn sandbox_retry_data(&self) -> Option<SandboxRetryData> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct ApplyPatchRuntime;
|
||||
|
||||
@@ -106,9 +114,10 @@ impl Approvable<ApplyPatchRequest> for ApplyPatchRuntime {
|
||||
let call_id = ctx.call_id.to_string();
|
||||
let cwd = req.cwd.clone();
|
||||
let retry_reason = ctx.retry_reason.clone();
|
||||
let risk = ctx.risk.clone();
|
||||
let user_explicitly_approved = req.user_explicitly_approved;
|
||||
Box::pin(async move {
|
||||
with_cached_approval(&session.services, key, || async move {
|
||||
with_cached_approval(&session.services, key, move || async move {
|
||||
if let Some(reason) = retry_reason {
|
||||
session
|
||||
.request_command_approval(
|
||||
@@ -117,6 +126,7 @@ impl Approvable<ApplyPatchRequest> for ApplyPatchRuntime {
|
||||
vec!["apply_patch".to_string()],
|
||||
cwd,
|
||||
Some(reason),
|
||||
risk,
|
||||
)
|
||||
.await
|
||||
} else if user_explicitly_approved {
|
||||
|
||||
@@ -12,7 +12,9 @@ use crate::sandboxing::execute_env;
|
||||
use crate::tools::runtimes::build_command_spec;
|
||||
use crate::tools::sandboxing::Approvable;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::ProvidesSandboxRetryData;
|
||||
use crate::tools::sandboxing::SandboxAttempt;
|
||||
use crate::tools::sandboxing::SandboxRetryData;
|
||||
use crate::tools::sandboxing::Sandboxable;
|
||||
use crate::tools::sandboxing::SandboxablePreference;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
@@ -34,6 +36,15 @@ pub struct ShellRequest {
|
||||
pub justification: Option<String>,
|
||||
}
|
||||
|
||||
impl ProvidesSandboxRetryData for ShellRequest {
|
||||
fn sandbox_retry_data(&self) -> Option<SandboxRetryData> {
|
||||
Some(SandboxRetryData {
|
||||
command: self.command.clone(),
|
||||
cwd: self.cwd.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct ShellRuntime;
|
||||
|
||||
@@ -90,13 +101,14 @@ impl Approvable<ShellRequest> for ShellRuntime {
|
||||
.retry_reason
|
||||
.clone()
|
||||
.or_else(|| req.justification.clone());
|
||||
let risk = ctx.risk.clone();
|
||||
let session = ctx.session;
|
||||
let turn = ctx.turn;
|
||||
let call_id = ctx.call_id.to_string();
|
||||
Box::pin(async move {
|
||||
with_cached_approval(&session.services, key, || async move {
|
||||
with_cached_approval(&session.services, key, move || async move {
|
||||
session
|
||||
.request_command_approval(turn, call_id, command, cwd, reason)
|
||||
.request_command_approval(turn, call_id, command, cwd, reason, risk)
|
||||
.await
|
||||
})
|
||||
.await
|
||||
|
||||
@@ -9,7 +9,9 @@ use crate::error::SandboxErr;
|
||||
use crate::tools::runtimes::build_command_spec;
|
||||
use crate::tools::sandboxing::Approvable;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::ProvidesSandboxRetryData;
|
||||
use crate::tools::sandboxing::SandboxAttempt;
|
||||
use crate::tools::sandboxing::SandboxRetryData;
|
||||
use crate::tools::sandboxing::Sandboxable;
|
||||
use crate::tools::sandboxing::SandboxablePreference;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
@@ -31,6 +33,15 @@ pub struct UnifiedExecRequest {
|
||||
pub env: HashMap<String, String>,
|
||||
}
|
||||
|
||||
impl ProvidesSandboxRetryData for UnifiedExecRequest {
|
||||
fn sandbox_retry_data(&self) -> Option<SandboxRetryData> {
|
||||
Some(SandboxRetryData {
|
||||
command: self.command.clone(),
|
||||
cwd: self.cwd.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(serde::Serialize, Clone, Debug, Eq, PartialEq, Hash)]
|
||||
pub struct UnifiedExecApprovalKey {
|
||||
pub command: Vec<String>,
|
||||
@@ -85,10 +96,11 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
|
||||
let command = req.command.clone();
|
||||
let cwd = req.cwd.clone();
|
||||
let reason = ctx.retry_reason.clone();
|
||||
let risk = ctx.risk.clone();
|
||||
Box::pin(async move {
|
||||
with_cached_approval(&session.services, key, || async move {
|
||||
session
|
||||
.request_command_approval(turn, call_id, command, cwd, reason)
|
||||
.request_command_approval(turn, call_id, command, cwd, reason, risk)
|
||||
.await
|
||||
})
|
||||
.await
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
use crate::codex::Session;
|
||||
use crate::codex::TurnContext;
|
||||
use crate::error::CodexErr;
|
||||
use crate::protocol::SandboxCommandAssessment;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use crate::sandboxing::CommandSpec;
|
||||
use crate::sandboxing::SandboxManager;
|
||||
@@ -18,6 +19,7 @@ use std::collections::HashMap;
|
||||
use std::fmt::Debug;
|
||||
use std::hash::Hash;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use futures::Future;
|
||||
use futures::future::BoxFuture;
|
||||
@@ -81,6 +83,7 @@ pub(crate) struct ApprovalCtx<'a> {
|
||||
pub turn: &'a TurnContext,
|
||||
pub call_id: &'a str,
|
||||
pub retry_reason: Option<String>,
|
||||
pub risk: Option<SandboxCommandAssessment>,
|
||||
}
|
||||
|
||||
pub(crate) trait Approvable<Req> {
|
||||
@@ -156,6 +159,17 @@ pub(crate) struct ToolCtx<'a> {
|
||||
pub tool_name: String,
|
||||
}
|
||||
|
||||
/// Captures the command metadata needed to re-run a tool request without sandboxing.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct SandboxRetryData {
|
||||
pub command: Vec<String>,
|
||||
pub cwd: PathBuf,
|
||||
}
|
||||
|
||||
pub(crate) trait ProvidesSandboxRetryData {
|
||||
fn sandbox_retry_data(&self) -> Option<SandboxRetryData>;
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) enum ToolError {
|
||||
Rejected(String),
|
||||
|
||||
27
codex-rs/core/templates/sandboxing/assessment_prompt.md
Normal file
27
codex-rs/core/templates/sandboxing/assessment_prompt.md
Normal file
@@ -0,0 +1,27 @@
|
||||
You are a security analyst evaluating shell commands that were blocked by a sandbox. Given the provided metadata, summarize the command's likely intent and assess the risk. Return strictly valid JSON with the keys:
|
||||
- description (concise summary, at most two sentences)
|
||||
- risk_level ("low", "medium", or "high")
|
||||
- risk_categories (optional array of zero or more category strings)
|
||||
Risk level examples:
|
||||
- low: read-only inspections, listing files, printing configuration
|
||||
- medium: modifying project files, installing dependencies, fetching artifacts from trusted sources
|
||||
- high: deleting or overwriting data, exfiltrating secrets, escalating privileges, or disabling security controls
|
||||
Recognized risk_categories: data_deletion, data_exfiltration, privilege_escalation, system_modification, network_access, resource_exhaustion, compliance.
|
||||
Use multiple categories when appropriate.
|
||||
If information is insufficient, choose the most cautious risk level supported by the evidence.
|
||||
Respond with JSON only, without markdown code fences or extra commentary.
|
||||
|
||||
---
|
||||
|
||||
Command metadata:
|
||||
Platform: {{ platform }}
|
||||
Sandbox policy: {{ sandbox_policy }}
|
||||
{% if let Some(roots) = filesystem_roots %}
|
||||
Filesystem roots: {{ roots }}
|
||||
{% endif %}
|
||||
Working directory: {{ working_directory }}
|
||||
Command argv: {{ command_argv }}
|
||||
Command (joined): {{ command_joined }}
|
||||
{% if let Some(message) = sandbox_failure_message %}
|
||||
Sandbox failure message: {{ message }}
|
||||
{% endif %}
|
||||
@@ -247,7 +247,11 @@ async fn resume_includes_initial_messages_and_sends_prior_items() {
|
||||
session_configured,
|
||||
..
|
||||
} = conversation_manager
|
||||
.resume_conversation_from_rollout(config, session_path.clone(), auth_manager)
|
||||
.resume_conversation_from_rollout(
|
||||
config.clone(),
|
||||
session_path.clone(),
|
||||
auth_manager.clone(),
|
||||
)
|
||||
.await
|
||||
.expect("resume conversation");
|
||||
|
||||
@@ -260,6 +264,23 @@ async fn resume_includes_initial_messages_and_sends_prior_items() {
|
||||
let expected_initial_json = json!([]);
|
||||
assert_eq!(initial_json, expected_initial_json);
|
||||
|
||||
let NewConversation {
|
||||
conversation: codex_again,
|
||||
session_configured: session_configured_again,
|
||||
..
|
||||
} = conversation_manager
|
||||
.resume_conversation_from_rollout(
|
||||
config.clone(),
|
||||
session_path.clone(),
|
||||
auth_manager.clone(),
|
||||
)
|
||||
.await
|
||||
.expect("resume existing conversation");
|
||||
assert!(Arc::ptr_eq(&codex, &codex_again));
|
||||
let session_configured_json = serde_json::to_value(&session_configured).unwrap();
|
||||
let session_configured_again_json = serde_json::to_value(&session_configured_again).unwrap();
|
||||
assert_eq!(session_configured_json, session_configured_again_json);
|
||||
|
||||
// 2) Submit new input; the request body must include the prior item followed by the new user input.
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
|
||||
@@ -279,6 +279,11 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
ev_completed_with_tokens("r2", 330_000),
|
||||
]);
|
||||
|
||||
let sse3 = sse(vec![
|
||||
ev_assistant_message("m3", AUTO_SUMMARY_TEXT),
|
||||
ev_completed_with_tokens("r3", 200),
|
||||
]);
|
||||
|
||||
let first_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(FIRST_AUTO_MSG)
|
||||
@@ -295,6 +300,12 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
};
|
||||
mount_sse_once_match(&server, second_matcher, sse2).await;
|
||||
|
||||
let third_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains("You have exceeded the maximum number of tokens")
|
||||
};
|
||||
mount_sse_once_match(&server, third_matcher, sse3).await;
|
||||
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
@@ -331,28 +342,69 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let error_event = wait_for_event(&codex, |ev| matches!(ev, EventMsg::Error(_))).await;
|
||||
let EventMsg::Error(error_event) = error_event else {
|
||||
unreachable!("wait_for_event returned unexpected payload");
|
||||
};
|
||||
assert_eq!(error_event.message, "invalid input: input too large");
|
||||
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
// wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let requests = server.received_requests().await.unwrap();
|
||||
assert_eq!(
|
||||
requests.len(),
|
||||
2,
|
||||
"auto compact should reject oversize prompts before issuing another request"
|
||||
assert!(
|
||||
requests.len() >= 3,
|
||||
"auto compact should add at least a third request, got {}",
|
||||
requests.len()
|
||||
);
|
||||
let saw_compact_prompt = requests.iter().any(|req| {
|
||||
let is_auto_compact = |req: &wiremock::Request| {
|
||||
std::str::from_utf8(&req.body)
|
||||
.unwrap_or("")
|
||||
.contains("You have exceeded the maximum number of tokens")
|
||||
});
|
||||
assert!(
|
||||
!saw_compact_prompt,
|
||||
"no auto compact request should be sent when the summarization prompt exceeds the limit"
|
||||
};
|
||||
let auto_compact_count = requests.iter().filter(|req| is_auto_compact(req)).count();
|
||||
assert_eq!(
|
||||
auto_compact_count, 1,
|
||||
"expected exactly one auto compact request"
|
||||
);
|
||||
let auto_compact_index = requests
|
||||
.iter()
|
||||
.enumerate()
|
||||
.find_map(|(idx, req)| is_auto_compact(req).then_some(idx))
|
||||
.expect("auto compact request missing");
|
||||
assert_eq!(
|
||||
auto_compact_index, 2,
|
||||
"auto compact should add a third request"
|
||||
);
|
||||
|
||||
let body_first = requests[0].body_json::<serde_json::Value>().unwrap();
|
||||
let body3 = requests[auto_compact_index]
|
||||
.body_json::<serde_json::Value>()
|
||||
.unwrap();
|
||||
let instructions = body3
|
||||
.get("instructions")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or_default();
|
||||
let baseline_instructions = body_first
|
||||
.get("instructions")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
assert_eq!(
|
||||
instructions, baseline_instructions,
|
||||
"auto compact should keep the standard developer instructions",
|
||||
);
|
||||
|
||||
let input3 = body3.get("input").and_then(|v| v.as_array()).unwrap();
|
||||
let last3 = input3
|
||||
.last()
|
||||
.expect("auto compact request should append a user message");
|
||||
assert_eq!(last3.get("type").and_then(|v| v.as_str()), Some("message"));
|
||||
assert_eq!(last3.get("role").and_then(|v| v.as_str()), Some("user"));
|
||||
let last_text = last3
|
||||
.get("content")
|
||||
.and_then(|v| v.as_array())
|
||||
.and_then(|items| items.first())
|
||||
.and_then(|item| item.get("text"))
|
||||
.and_then(|text| text.as_str())
|
||||
.unwrap_or_default();
|
||||
assert_eq!(
|
||||
last_text, SUMMARIZATION_PROMPT,
|
||||
"auto compact should send the summarization prompt as a user message",
|
||||
);
|
||||
}
|
||||
|
||||
@@ -817,7 +869,7 @@ async fn auto_compact_triggers_after_function_call_over_95_percent_usage() {
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let context_window = 20_000;
|
||||
let context_window = 100;
|
||||
let limit = context_window * 90 / 100;
|
||||
let over_limit_tokens = context_window * 95 / 100 + 1;
|
||||
|
||||
|
||||
@@ -1,69 +0,0 @@
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::responses;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event_with_timeout;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use wiremock::matchers::any;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn input_validation_should_fail_for_too_large_input() {
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let fixture = test_codex().build(&server).await.unwrap();
|
||||
let codex = Arc::clone(&fixture.codex);
|
||||
|
||||
// First: normal message with a mocked assistant response
|
||||
let first_response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_assistant_message("msg-1", "ok"),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
responses::mount_sse_once_match(&server, any(), first_response).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "hello world".into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Wait for the normal turn to complete before sending the oversized input
|
||||
let turn_timeout = Duration::from_secs(1);
|
||||
wait_for_event_with_timeout(
|
||||
&codex,
|
||||
|ev| matches!(ev, EventMsg::TaskComplete(_)),
|
||||
turn_timeout,
|
||||
)
|
||||
.await;
|
||||
|
||||
// Then: 300k-token message should trigger validation error
|
||||
let wait_timeout = Duration::from_millis(100);
|
||||
let input_300_tokens = "token ".repeat(300_000);
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: input_300_tokens,
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let error_event =
|
||||
wait_for_event_with_timeout(&codex, |ev| matches!(ev, EventMsg::Error(_)), wait_timeout)
|
||||
.await;
|
||||
let EventMsg::Error(error_event) = error_event else {
|
||||
unreachable!("wait_for_event_with_timeout returned unexpected payload");
|
||||
};
|
||||
assert_eq!(error_event.message, "invalid input: input too large");
|
||||
}
|
||||
@@ -13,7 +13,6 @@ mod compact_resume_fork;
|
||||
mod exec;
|
||||
mod fork_conversation;
|
||||
mod grep_files;
|
||||
mod input_validation;
|
||||
mod items;
|
||||
mod json_result;
|
||||
mod list_dir;
|
||||
|
||||
@@ -156,6 +156,12 @@ async fn unified_exec_emits_exec_command_end_event() -> Result<()> {
|
||||
"cmd": "/bin/echo END-EVENT".to_string(),
|
||||
"yield_time_ms": 250,
|
||||
});
|
||||
let poll_call_id = "uexec-end-event-poll";
|
||||
let poll_args = json!({
|
||||
"chars": "",
|
||||
"session_id": 0,
|
||||
"yield_time_ms": 250,
|
||||
});
|
||||
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
@@ -165,9 +171,18 @@ async fn unified_exec_emits_exec_command_end_event() -> Result<()> {
|
||||
]),
|
||||
sse(vec![
|
||||
ev_response_created("resp-2"),
|
||||
ev_assistant_message("msg-1", "finished"),
|
||||
ev_function_call(
|
||||
poll_call_id,
|
||||
"write_stdin",
|
||||
&serde_json::to_string(&poll_args)?,
|
||||
),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_response_created("resp-3"),
|
||||
ev_assistant_message("msg-1", "finished"),
|
||||
ev_completed("resp-3"),
|
||||
]),
|
||||
];
|
||||
mount_sse_sequence(&server, responses).await;
|
||||
|
||||
|
||||
@@ -179,6 +179,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
|
||||
include_view_image_tool: None,
|
||||
show_raw_agent_reasoning: oss.then_some(true),
|
||||
tools_web_search_request: None,
|
||||
experimental_sandbox_command_assessment: None,
|
||||
additional_writable_roots: Vec::new(),
|
||||
};
|
||||
// Parse `-c` overrides.
|
||||
|
||||
@@ -12,7 +12,7 @@ use anyhow::anyhow;
|
||||
use codex_protocol::ConversationId;
|
||||
use tracing_subscriber::fmt::writer::MakeWriter;
|
||||
|
||||
const DEFAULT_MAX_BYTES: usize = 2 * 1024 * 1024; // 2 MiB
|
||||
const DEFAULT_MAX_BYTES: usize = 4 * 1024 * 1024; // 4 MiB
|
||||
const SENTRY_DSN: &str =
|
||||
"https://ae32ed50620d7a7792c1ce5df38b3e3e@o33249.ingest.us.sentry.io/4510195390611458";
|
||||
const UPLOAD_TIMEOUT_SECS: u64 = 10;
|
||||
|
||||
@@ -158,6 +158,7 @@ impl CodexToolCallParam {
|
||||
include_view_image_tool: None,
|
||||
show_raw_agent_reasoning: None,
|
||||
tools_web_search_request: None,
|
||||
experimental_sandbox_command_assessment: None,
|
||||
additional_writable_roots: Vec::new(),
|
||||
};
|
||||
|
||||
|
||||
@@ -178,6 +178,7 @@ async fn run_codex_tool_session_inner(
|
||||
cwd,
|
||||
call_id,
|
||||
reason: _,
|
||||
risk,
|
||||
parsed_cmd,
|
||||
}) => {
|
||||
handle_exec_approval_request(
|
||||
@@ -190,6 +191,7 @@ async fn run_codex_tool_session_inner(
|
||||
event.id.clone(),
|
||||
call_id,
|
||||
parsed_cmd,
|
||||
risk,
|
||||
)
|
||||
.await;
|
||||
continue;
|
||||
|
||||
@@ -4,6 +4,7 @@ use std::sync::Arc;
|
||||
use codex_core::CodexConversation;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::ReviewDecision;
|
||||
use codex_core::protocol::SandboxCommandAssessment;
|
||||
use codex_protocol::parse_command::ParsedCommand;
|
||||
use mcp_types::ElicitRequest;
|
||||
use mcp_types::ElicitRequestParamsRequestedSchema;
|
||||
@@ -37,6 +38,8 @@ pub struct ExecApprovalElicitRequestParams {
|
||||
pub codex_command: Vec<String>,
|
||||
pub codex_cwd: PathBuf,
|
||||
pub codex_parsed_cmd: Vec<ParsedCommand>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub codex_risk: Option<SandboxCommandAssessment>,
|
||||
}
|
||||
|
||||
// TODO(mbolin): ExecApprovalResponse does not conform to ElicitResult. See:
|
||||
@@ -59,6 +62,7 @@ pub(crate) async fn handle_exec_approval_request(
|
||||
event_id: String,
|
||||
call_id: String,
|
||||
codex_parsed_cmd: Vec<ParsedCommand>,
|
||||
codex_risk: Option<SandboxCommandAssessment>,
|
||||
) {
|
||||
let escaped_command =
|
||||
shlex::try_join(command.iter().map(String::as_str)).unwrap_or_else(|_| command.join(" "));
|
||||
@@ -81,6 +85,7 @@ pub(crate) async fn handle_exec_approval_request(
|
||||
codex_command: command,
|
||||
codex_cwd: cwd,
|
||||
codex_parsed_cmd,
|
||||
codex_risk,
|
||||
};
|
||||
let params_json = match serde_json::to_value(¶ms) {
|
||||
Ok(value) => value,
|
||||
|
||||
@@ -196,6 +196,7 @@ fn create_expected_elicitation_request(
|
||||
codex_cwd: workdir.to_path_buf(),
|
||||
codex_call_id: "call1234".to_string(),
|
||||
codex_parsed_cmd,
|
||||
codex_risk: None,
|
||||
})?),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -8,6 +8,8 @@ use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use codex_protocol::protocol::SandboxRiskCategory;
|
||||
use codex_protocol::protocol::SandboxRiskLevel;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use eventsource_stream::Event as StreamEvent;
|
||||
use eventsource_stream::EventStreamError as StreamError;
|
||||
@@ -366,6 +368,63 @@ impl OtelEventManager {
|
||||
);
|
||||
}
|
||||
|
||||
pub fn sandbox_assessment(
|
||||
&self,
|
||||
call_id: &str,
|
||||
status: &str,
|
||||
risk_level: Option<SandboxRiskLevel>,
|
||||
risk_categories: &[SandboxRiskCategory],
|
||||
duration: Duration,
|
||||
) {
|
||||
let level = risk_level.map(|level| level.as_str());
|
||||
let categories = if risk_categories.is_empty() {
|
||||
String::new()
|
||||
} else {
|
||||
risk_categories
|
||||
.iter()
|
||||
.map(SandboxRiskCategory::as_str)
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ")
|
||||
};
|
||||
|
||||
tracing::event!(
|
||||
tracing::Level::INFO,
|
||||
event.name = "codex.sandbox_assessment",
|
||||
event.timestamp = %timestamp(),
|
||||
conversation.id = %self.metadata.conversation_id,
|
||||
app.version = %self.metadata.app_version,
|
||||
auth_mode = self.metadata.auth_mode,
|
||||
user.account_id = self.metadata.account_id,
|
||||
user.email = self.metadata.account_email,
|
||||
terminal.type = %self.metadata.terminal_type,
|
||||
model = %self.metadata.model,
|
||||
slug = %self.metadata.slug,
|
||||
call_id = %call_id,
|
||||
status = %status,
|
||||
risk_level = level,
|
||||
risk_categories = categories,
|
||||
duration_ms = %duration.as_millis(),
|
||||
);
|
||||
}
|
||||
|
||||
pub fn sandbox_assessment_latency(&self, call_id: &str, duration: Duration) {
|
||||
tracing::event!(
|
||||
tracing::Level::INFO,
|
||||
event.name = "codex.sandbox_assessment_latency",
|
||||
event.timestamp = %timestamp(),
|
||||
conversation.id = %self.metadata.conversation_id,
|
||||
app.version = %self.metadata.app_version,
|
||||
auth_mode = self.metadata.auth_mode,
|
||||
user.account_id = self.metadata.account_id,
|
||||
user.email = self.metadata.account_email,
|
||||
terminal.type = %self.metadata.terminal_type,
|
||||
model = %self.metadata.model,
|
||||
slug = %self.metadata.slug,
|
||||
call_id = %call_id,
|
||||
duration_ms = %duration.as_millis(),
|
||||
);
|
||||
}
|
||||
|
||||
pub async fn log_tool_result<F, Fut, E>(
|
||||
&self,
|
||||
tool_name: &str,
|
||||
|
||||
91
codex-rs/protocol/src/approvals.rs
Normal file
91
codex-rs/protocol/src/approvals.rs
Normal file
@@ -0,0 +1,91 @@
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use crate::parse_command::ParsedCommand;
|
||||
use crate::protocol::FileChange;
|
||||
use schemars::JsonSchema;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use ts_rs::TS;
|
||||
|
||||
#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Hash, JsonSchema, TS)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum SandboxRiskLevel {
|
||||
Low,
|
||||
Medium,
|
||||
High,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Hash, JsonSchema, TS)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum SandboxRiskCategory {
|
||||
DataDeletion,
|
||||
DataExfiltration,
|
||||
PrivilegeEscalation,
|
||||
SystemModification,
|
||||
NetworkAccess,
|
||||
ResourceExhaustion,
|
||||
Compliance,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS)]
|
||||
pub struct SandboxCommandAssessment {
|
||||
pub description: String,
|
||||
pub risk_level: SandboxRiskLevel,
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub risk_categories: Vec<SandboxRiskCategory>,
|
||||
}
|
||||
|
||||
impl SandboxRiskLevel {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
Self::Low => "low",
|
||||
Self::Medium => "medium",
|
||||
Self::High => "high",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SandboxRiskCategory {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
Self::DataDeletion => "data_deletion",
|
||||
Self::DataExfiltration => "data_exfiltration",
|
||||
Self::PrivilegeEscalation => "privilege_escalation",
|
||||
Self::SystemModification => "system_modification",
|
||||
Self::NetworkAccess => "network_access",
|
||||
Self::ResourceExhaustion => "resource_exhaustion",
|
||||
Self::Compliance => "compliance",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
|
||||
pub struct ExecApprovalRequestEvent {
|
||||
/// Identifier for the associated exec call, if available.
|
||||
pub call_id: String,
|
||||
/// The command to be executed.
|
||||
pub command: Vec<String>,
|
||||
/// The command's working directory.
|
||||
pub cwd: PathBuf,
|
||||
/// Optional human-readable reason for the approval (e.g. retry without sandbox).
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub reason: Option<String>,
|
||||
/// Optional model-provided risk assessment describing the blocked command.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub risk: Option<SandboxCommandAssessment>,
|
||||
pub parsed_cmd: Vec<ParsedCommand>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
|
||||
pub struct ApplyPatchApprovalRequestEvent {
|
||||
/// Responses API call id for the associated patch apply call, if available.
|
||||
pub call_id: String,
|
||||
pub changes: HashMap<PathBuf, FileChange>,
|
||||
/// Optional explanatory reason (e.g. request for extra write access).
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub reason: Option<String>,
|
||||
/// When set, the agent is asking the user to allow writes under this root for the remainder of the session.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub grant_root: Option<PathBuf>,
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
pub mod account;
|
||||
mod conversation_id;
|
||||
pub use conversation_id::ConversationId;
|
||||
pub mod approvals;
|
||||
pub mod config_types;
|
||||
pub mod custom_prompts;
|
||||
pub mod items;
|
||||
|
||||
@@ -34,6 +34,12 @@ use serde_with::serde_as;
|
||||
use strum_macros::Display;
|
||||
use ts_rs::TS;
|
||||
|
||||
pub use crate::approvals::ApplyPatchApprovalRequestEvent;
|
||||
pub use crate::approvals::ExecApprovalRequestEvent;
|
||||
pub use crate::approvals::SandboxCommandAssessment;
|
||||
pub use crate::approvals::SandboxRiskCategory;
|
||||
pub use crate::approvals::SandboxRiskLevel;
|
||||
|
||||
/// Open/close tags for special user-input blocks. Used across crates to avoid
|
||||
/// duplicated hardcoded strings.
|
||||
pub const USER_INSTRUCTIONS_OPEN_TAG: &str = "<user_instructions>";
|
||||
@@ -575,11 +581,9 @@ pub struct TokenUsage {
|
||||
pub total_tokens: i64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS, Default)]
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
|
||||
pub struct TokenUsageInfo {
|
||||
/// The total token usage for the session. accumulated from all turns.
|
||||
pub total_token_usage: TokenUsage,
|
||||
/// The token usage for the last turn. Received from the API. It's total tokens is the whole window size.
|
||||
pub last_token_usage: TokenUsage,
|
||||
#[ts(type = "number | null")]
|
||||
pub model_context_window: Option<i64>,
|
||||
@@ -1128,33 +1132,6 @@ pub struct ExecCommandOutputDeltaEvent {
|
||||
pub chunk: Vec<u8>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
|
||||
pub struct ExecApprovalRequestEvent {
|
||||
/// Identifier for the associated exec call, if available.
|
||||
pub call_id: String,
|
||||
/// The command to be executed.
|
||||
pub command: Vec<String>,
|
||||
/// The command's working directory.
|
||||
pub cwd: PathBuf,
|
||||
/// Optional human-readable reason for the approval (e.g. retry without sandbox).
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub reason: Option<String>,
|
||||
pub parsed_cmd: Vec<ParsedCommand>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
|
||||
pub struct ApplyPatchApprovalRequestEvent {
|
||||
/// Responses API call id for the associated patch apply call, if available.
|
||||
pub call_id: String,
|
||||
pub changes: HashMap<PathBuf, FileChange>,
|
||||
/// Optional explanatory reason (e.g. request for extra write access).
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub reason: Option<String>,
|
||||
/// When set, the agent is asking the user to allow writes under this root for the remainder of the session.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub grant_root: Option<PathBuf>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
|
||||
pub struct BackgroundEventEvent {
|
||||
pub message: String,
|
||||
|
||||
@@ -19,6 +19,9 @@ use crate::render::renderable::Renderable;
|
||||
use codex_core::protocol::FileChange;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::ReviewDecision;
|
||||
use codex_core::protocol::SandboxCommandAssessment;
|
||||
use codex_core::protocol::SandboxRiskCategory;
|
||||
use codex_core::protocol::SandboxRiskLevel;
|
||||
use crossterm::event::KeyCode;
|
||||
use crossterm::event::KeyEvent;
|
||||
use crossterm::event::KeyEventKind;
|
||||
@@ -38,6 +41,7 @@ pub(crate) enum ApprovalRequest {
|
||||
id: String,
|
||||
command: Vec<String>,
|
||||
reason: Option<String>,
|
||||
risk: Option<SandboxCommandAssessment>,
|
||||
},
|
||||
ApplyPatch {
|
||||
id: String,
|
||||
@@ -285,12 +289,17 @@ impl From<ApprovalRequest> for ApprovalRequestState {
|
||||
id,
|
||||
command,
|
||||
reason,
|
||||
risk,
|
||||
} => {
|
||||
let reason = reason.filter(|item| !item.is_empty());
|
||||
let has_reason = reason.is_some();
|
||||
let mut header: Vec<Line<'static>> = Vec::new();
|
||||
if let Some(reason) = reason
|
||||
&& !reason.is_empty()
|
||||
{
|
||||
if let Some(reason) = reason {
|
||||
header.push(Line::from(vec!["Reason: ".into(), reason.italic()]));
|
||||
}
|
||||
if let Some(risk) = risk.as_ref() {
|
||||
header.extend(render_risk_lines(risk));
|
||||
} else if has_reason {
|
||||
header.push(Line::from(""));
|
||||
}
|
||||
let full_cmd = strip_bash_lc_and_escape(&command);
|
||||
@@ -330,6 +339,52 @@ impl From<ApprovalRequest> for ApprovalRequestState {
|
||||
}
|
||||
}
|
||||
|
||||
fn render_risk_lines(risk: &SandboxCommandAssessment) -> Vec<Line<'static>> {
|
||||
let level_span = match risk.risk_level {
|
||||
SandboxRiskLevel::Low => "LOW".green().bold(),
|
||||
SandboxRiskLevel::Medium => "MEDIUM".cyan().bold(),
|
||||
SandboxRiskLevel::High => "HIGH".red().bold(),
|
||||
};
|
||||
|
||||
let mut lines = Vec::new();
|
||||
|
||||
let description = risk.description.trim();
|
||||
if !description.is_empty() {
|
||||
lines.push(Line::from(vec![
|
||||
"Summary: ".into(),
|
||||
description.to_string().into(),
|
||||
]));
|
||||
}
|
||||
|
||||
let mut spans: Vec<Span<'static>> = vec!["Risk: ".into(), level_span];
|
||||
if !risk.risk_categories.is_empty() {
|
||||
spans.push(" (".into());
|
||||
for (idx, category) in risk.risk_categories.iter().enumerate() {
|
||||
if idx > 0 {
|
||||
spans.push(", ".into());
|
||||
}
|
||||
spans.push(risk_category_label(*category).into());
|
||||
}
|
||||
spans.push(")".into());
|
||||
}
|
||||
|
||||
lines.push(Line::from(spans));
|
||||
lines.push(Line::from(""));
|
||||
lines
|
||||
}
|
||||
|
||||
fn risk_category_label(category: SandboxRiskCategory) -> &'static str {
|
||||
match category {
|
||||
SandboxRiskCategory::DataDeletion => "data deletion",
|
||||
SandboxRiskCategory::DataExfiltration => "data exfiltration",
|
||||
SandboxRiskCategory::PrivilegeEscalation => "privilege escalation",
|
||||
SandboxRiskCategory::SystemModification => "system modification",
|
||||
SandboxRiskCategory::NetworkAccess => "network access",
|
||||
SandboxRiskCategory::ResourceExhaustion => "resource exhaustion",
|
||||
SandboxRiskCategory::Compliance => "compliance",
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
enum ApprovalVariant {
|
||||
Exec { id: String, command: Vec<String> },
|
||||
@@ -404,6 +459,7 @@ mod tests {
|
||||
id: "test".to_string(),
|
||||
command: vec!["echo".to_string(), "hi".to_string()],
|
||||
reason: Some("reason".to_string()),
|
||||
risk: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -445,6 +501,7 @@ mod tests {
|
||||
id: "test".into(),
|
||||
command,
|
||||
reason: None,
|
||||
risk: None,
|
||||
};
|
||||
|
||||
let view = ApprovalOverlay::new(exec_request, tx);
|
||||
|
||||
@@ -557,6 +557,7 @@ mod tests {
|
||||
id: "1".to_string(),
|
||||
command: vec!["echo".into(), "ok".into()],
|
||||
reason: None,
|
||||
risk: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -777,6 +777,7 @@ impl ChatWidget {
|
||||
id,
|
||||
command: ev.command,
|
||||
reason: ev.reason,
|
||||
risk: ev.risk,
|
||||
};
|
||||
self.bottom_pane.push_approval_request(request);
|
||||
self.request_redraw();
|
||||
@@ -1631,6 +1632,7 @@ impl ChatWidget {
|
||||
context_usage,
|
||||
&self.conversation_id,
|
||||
self.rate_limit_snapshot.as_ref(),
|
||||
Local::now(),
|
||||
));
|
||||
}
|
||||
|
||||
|
||||
@@ -402,6 +402,7 @@ fn exec_approval_emits_proposed_command_and_decision_history() {
|
||||
reason: Some(
|
||||
"this is a test reason such as one that would be produced by the model".into(),
|
||||
),
|
||||
risk: None,
|
||||
parsed_cmd: vec![],
|
||||
};
|
||||
chat.handle_codex_event(Event {
|
||||
@@ -444,6 +445,7 @@ fn exec_approval_decision_truncates_multiline_and_long_commands() {
|
||||
reason: Some(
|
||||
"this is a test reason such as one that would be produced by the model".into(),
|
||||
),
|
||||
risk: None,
|
||||
parsed_cmd: vec![],
|
||||
};
|
||||
chat.handle_codex_event(Event {
|
||||
@@ -492,6 +494,7 @@ fn exec_approval_decision_truncates_multiline_and_long_commands() {
|
||||
command: vec!["bash".into(), "-lc".into(), long],
|
||||
cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")),
|
||||
reason: None,
|
||||
risk: None,
|
||||
parsed_cmd: vec![],
|
||||
};
|
||||
chat.handle_codex_event(Event {
|
||||
@@ -1421,6 +1424,7 @@ fn approval_modal_exec_snapshot() {
|
||||
reason: Some(
|
||||
"this is a test reason such as one that would be produced by the model".into(),
|
||||
),
|
||||
risk: None,
|
||||
parsed_cmd: vec![],
|
||||
};
|
||||
chat.handle_codex_event(Event {
|
||||
@@ -1465,6 +1469,7 @@ fn approval_modal_exec_without_reason_snapshot() {
|
||||
command: vec!["bash".into(), "-lc".into(), "echo hello world".into()],
|
||||
cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")),
|
||||
reason: None,
|
||||
risk: None,
|
||||
parsed_cmd: vec![],
|
||||
};
|
||||
chat.handle_codex_event(Event {
|
||||
@@ -1675,6 +1680,7 @@ fn status_widget_and_approval_modal_snapshot() {
|
||||
reason: Some(
|
||||
"this is a test reason such as one that would be produced by the model".into(),
|
||||
),
|
||||
risk: None,
|
||||
parsed_cmd: vec![],
|
||||
};
|
||||
chat.handle_codex_event(Event {
|
||||
|
||||
@@ -148,6 +148,7 @@ pub async fn run_main(
|
||||
include_view_image_tool: None,
|
||||
show_raw_agent_reasoning: cli.oss.then_some(true),
|
||||
tools_web_search_request: cli.web_search.then_some(true),
|
||||
experimental_sandbox_command_assessment: None,
|
||||
additional_writable_roots: additional_dirs,
|
||||
};
|
||||
let raw_overrides = cli.config_overrides.raw_overrides.clone();
|
||||
|
||||
@@ -3,6 +3,8 @@ use crate::history_cell::HistoryCell;
|
||||
use crate::history_cell::PlainHistoryCell;
|
||||
use crate::history_cell::with_border_with_inner_width;
|
||||
use crate::version::CODEX_CLI_VERSION;
|
||||
use chrono::DateTime;
|
||||
use chrono::Local;
|
||||
use codex_common::create_config_summary_entries;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
@@ -25,6 +27,7 @@ use super::helpers::format_directory_display;
|
||||
use super::helpers::format_tokens_compact;
|
||||
use super::rate_limits::RateLimitSnapshotDisplay;
|
||||
use super::rate_limits::StatusRateLimitData;
|
||||
use super::rate_limits::StatusRateLimitRow;
|
||||
use super::rate_limits::compose_rate_limit_data;
|
||||
use super::rate_limits::format_status_limit_summary;
|
||||
use super::rate_limits::render_status_limit_progress_bar;
|
||||
@@ -64,9 +67,17 @@ pub(crate) fn new_status_output(
|
||||
context_usage: Option<&TokenUsage>,
|
||||
session_id: &Option<ConversationId>,
|
||||
rate_limits: Option<&RateLimitSnapshotDisplay>,
|
||||
now: DateTime<Local>,
|
||||
) -> CompositeHistoryCell {
|
||||
let command = PlainHistoryCell::new(vec!["/status".magenta().into()]);
|
||||
let card = StatusHistoryCell::new(config, total_usage, context_usage, session_id, rate_limits);
|
||||
let card = StatusHistoryCell::new(
|
||||
config,
|
||||
total_usage,
|
||||
context_usage,
|
||||
session_id,
|
||||
rate_limits,
|
||||
now,
|
||||
);
|
||||
|
||||
CompositeHistoryCell::new(vec![Box::new(command), Box::new(card)])
|
||||
}
|
||||
@@ -78,6 +89,7 @@ impl StatusHistoryCell {
|
||||
context_usage: Option<&TokenUsage>,
|
||||
session_id: &Option<ConversationId>,
|
||||
rate_limits: Option<&RateLimitSnapshotDisplay>,
|
||||
now: DateTime<Local>,
|
||||
) -> Self {
|
||||
let config_entries = create_config_summary_entries(config);
|
||||
let (model_name, model_details) = compose_model_display(config, &config_entries);
|
||||
@@ -108,7 +120,7 @@ impl StatusHistoryCell {
|
||||
output: total_usage.output_tokens,
|
||||
context_window,
|
||||
};
|
||||
let rate_limits = compose_rate_limit_data(rate_limits);
|
||||
let rate_limits = compose_rate_limit_data(rate_limits, now);
|
||||
|
||||
Self {
|
||||
model_name,
|
||||
@@ -171,47 +183,66 @@ impl StatusHistoryCell {
|
||||
];
|
||||
}
|
||||
|
||||
let mut lines = Vec::with_capacity(rows_data.len() * 2);
|
||||
|
||||
for row in rows_data {
|
||||
let value_spans = vec![
|
||||
Span::from(render_status_limit_progress_bar(row.percent_used)),
|
||||
Span::from(" "),
|
||||
Span::from(format_status_limit_summary(row.percent_used)),
|
||||
];
|
||||
let base_spans = formatter.full_spans(row.label.as_str(), value_spans);
|
||||
let base_line = Line::from(base_spans.clone());
|
||||
|
||||
if let Some(resets_at) = row.resets_at.as_ref() {
|
||||
let resets_span = Span::from(format!("(resets {resets_at})")).dim();
|
||||
let mut inline_spans = base_spans.clone();
|
||||
inline_spans.push(Span::from(" ").dim());
|
||||
inline_spans.push(resets_span.clone());
|
||||
|
||||
if line_display_width(&Line::from(inline_spans.clone()))
|
||||
<= available_inner_width
|
||||
{
|
||||
lines.push(Line::from(inline_spans));
|
||||
} else {
|
||||
lines.push(base_line);
|
||||
lines.push(formatter.continuation(vec![resets_span]));
|
||||
}
|
||||
} else {
|
||||
lines.push(base_line);
|
||||
}
|
||||
}
|
||||
|
||||
self.rate_limit_row_lines(rows_data, available_inner_width, formatter)
|
||||
}
|
||||
StatusRateLimitData::Stale(rows_data) => {
|
||||
let mut lines =
|
||||
self.rate_limit_row_lines(rows_data, available_inner_width, formatter);
|
||||
lines.push(formatter.line(
|
||||
"Warning",
|
||||
vec![Span::from("limits may be stale - start new turn to refresh.").dim()],
|
||||
));
|
||||
lines
|
||||
}
|
||||
StatusRateLimitData::Missing => {
|
||||
vec![formatter.line(
|
||||
"Limits",
|
||||
vec![Span::from("send a message to load usage data").dim()],
|
||||
vec![
|
||||
Span::from("visit ").dim(),
|
||||
"chatgpt.com/codex/settings/usage".cyan().underlined(),
|
||||
],
|
||||
)]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn rate_limit_row_lines(
|
||||
&self,
|
||||
rows: &[StatusRateLimitRow],
|
||||
available_inner_width: usize,
|
||||
formatter: &FieldFormatter,
|
||||
) -> Vec<Line<'static>> {
|
||||
let mut lines = Vec::with_capacity(rows.len().saturating_mul(2));
|
||||
|
||||
for row in rows {
|
||||
let value_spans = vec![
|
||||
Span::from(render_status_limit_progress_bar(row.percent_used)),
|
||||
Span::from(" "),
|
||||
Span::from(format_status_limit_summary(row.percent_used)),
|
||||
];
|
||||
let base_spans = formatter.full_spans(row.label.as_str(), value_spans);
|
||||
let base_line = Line::from(base_spans.clone());
|
||||
|
||||
if let Some(resets_at) = row.resets_at.as_ref() {
|
||||
let resets_span = Span::from(format!("(resets {resets_at})")).dim();
|
||||
let mut inline_spans = base_spans.clone();
|
||||
inline_spans.push(Span::from(" ").dim());
|
||||
inline_spans.push(resets_span.clone());
|
||||
|
||||
if line_display_width(&Line::from(inline_spans.clone())) <= available_inner_width {
|
||||
lines.push(Line::from(inline_spans));
|
||||
} else {
|
||||
lines.push(base_line);
|
||||
lines.push(formatter.continuation(vec![resets_span]));
|
||||
}
|
||||
} else {
|
||||
lines.push(base_line);
|
||||
}
|
||||
}
|
||||
|
||||
lines
|
||||
}
|
||||
|
||||
fn collect_rate_limit_labels(&self, seen: &mut BTreeSet<String>, labels: &mut Vec<String>) {
|
||||
match &self.rate_limits {
|
||||
StatusRateLimitData::Available(rows) => {
|
||||
@@ -223,6 +254,12 @@ impl StatusHistoryCell {
|
||||
}
|
||||
}
|
||||
}
|
||||
StatusRateLimitData::Stale(rows) => {
|
||||
for row in rows {
|
||||
push_label(labels, seen, row.label.as_str());
|
||||
}
|
||||
push_label(labels, seen, "Warning");
|
||||
}
|
||||
StatusRateLimitData::Missing => push_label(labels, seen, "Limits"),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ use crate::chatwidget::get_limits_duration;
|
||||
|
||||
use super::helpers::format_reset_timestamp;
|
||||
use chrono::DateTime;
|
||||
use chrono::Duration as ChronoDuration;
|
||||
use chrono::Local;
|
||||
use chrono::Utc;
|
||||
use codex_core::protocol::RateLimitSnapshot;
|
||||
@@ -21,9 +22,12 @@ pub(crate) struct StatusRateLimitRow {
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) enum StatusRateLimitData {
|
||||
Available(Vec<StatusRateLimitRow>),
|
||||
Stale(Vec<StatusRateLimitRow>),
|
||||
Missing,
|
||||
}
|
||||
|
||||
pub(crate) const RATE_LIMIT_STALE_THRESHOLD_MINUTES: i64 = 15;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct RateLimitWindowDisplay {
|
||||
pub used_percent: f64,
|
||||
@@ -49,6 +53,7 @@ impl RateLimitWindowDisplay {
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct RateLimitSnapshotDisplay {
|
||||
pub captured_at: DateTime<Local>,
|
||||
pub primary: Option<RateLimitWindowDisplay>,
|
||||
pub secondary: Option<RateLimitWindowDisplay>,
|
||||
}
|
||||
@@ -58,6 +63,7 @@ pub(crate) fn rate_limit_snapshot_display(
|
||||
captured_at: DateTime<Local>,
|
||||
) -> RateLimitSnapshotDisplay {
|
||||
RateLimitSnapshotDisplay {
|
||||
captured_at,
|
||||
primary: snapshot
|
||||
.primary
|
||||
.as_ref()
|
||||
@@ -71,6 +77,7 @@ pub(crate) fn rate_limit_snapshot_display(
|
||||
|
||||
pub(crate) fn compose_rate_limit_data(
|
||||
snapshot: Option<&RateLimitSnapshotDisplay>,
|
||||
now: DateTime<Local>,
|
||||
) -> StatusRateLimitData {
|
||||
match snapshot {
|
||||
Some(snapshot) => {
|
||||
@@ -102,8 +109,13 @@ pub(crate) fn compose_rate_limit_data(
|
||||
});
|
||||
}
|
||||
|
||||
let is_stale = now.signed_duration_since(snapshot.captured_at)
|
||||
> ChronoDuration::minutes(RATE_LIMIT_STALE_THRESHOLD_MINUTES);
|
||||
|
||||
if rows.is_empty() {
|
||||
StatusRateLimitData::Available(vec![])
|
||||
} else if is_stale {
|
||||
StatusRateLimitData::Stale(rows)
|
||||
} else {
|
||||
StatusRateLimitData::Available(rows)
|
||||
}
|
||||
|
||||
@@ -15,5 +15,5 @@ expression: sanitized
|
||||
│ │
|
||||
│ Token usage: 750 total (500 input + 250 output) │
|
||||
│ Context window: 100% left (750 used / 272K) │
|
||||
│ Limits: send a message to load usage data │
|
||||
│ Limits: visit chatgpt.com/codex/settings/usage │
|
||||
╰─────────────────────────────────────────────────────────────────╯
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
---
|
||||
source: tui/src/status/tests.rs
|
||||
expression: sanitized
|
||||
---
|
||||
/status
|
||||
|
||||
╭─────────────────────────────────────────────────────────────────────╮
|
||||
│ >_ OpenAI Codex (v0.0.0) │
|
||||
│ │
|
||||
│ Model: gpt-5-codex (reasoning none, summaries auto) │
|
||||
│ Directory: [[workspace]] │
|
||||
│ Approval: on-request │
|
||||
│ Sandbox: read-only │
|
||||
│ Agents.md: <none> │
|
||||
│ │
|
||||
│ Token usage: 1.9K total (1K input + 900 output) │
|
||||
│ Context window: 100% left (2.1K used / 272K) │
|
||||
│ 5h limit: [███████████████░░░░░] 72% used (resets 03:14) │
|
||||
│ Weekly limit: [████████░░░░░░░░░░░░] 40% used (resets 03:34) │
|
||||
│ Warning: limits may be stale - start new turn to refresh. │
|
||||
╰─────────────────────────────────────────────────────────────────────╯
|
||||
@@ -111,7 +111,14 @@ fn status_snapshot_includes_reasoning_details() {
|
||||
};
|
||||
let rate_display = rate_limit_snapshot_display(&snapshot, captured_at);
|
||||
|
||||
let composite = new_status_output(&config, &usage, Some(&usage), &None, Some(&rate_display));
|
||||
let composite = new_status_output(
|
||||
&config,
|
||||
&usage,
|
||||
Some(&usage),
|
||||
&None,
|
||||
Some(&rate_display),
|
||||
captured_at,
|
||||
);
|
||||
let mut rendered_lines = render_lines(&composite.display_lines(80));
|
||||
if cfg!(windows) {
|
||||
for line in &mut rendered_lines {
|
||||
@@ -152,7 +159,14 @@ fn status_snapshot_includes_monthly_limit() {
|
||||
};
|
||||
let rate_display = rate_limit_snapshot_display(&snapshot, captured_at);
|
||||
|
||||
let composite = new_status_output(&config, &usage, Some(&usage), &None, Some(&rate_display));
|
||||
let composite = new_status_output(
|
||||
&config,
|
||||
&usage,
|
||||
Some(&usage),
|
||||
&None,
|
||||
Some(&rate_display),
|
||||
captured_at,
|
||||
);
|
||||
let mut rendered_lines = render_lines(&composite.display_lines(80));
|
||||
if cfg!(windows) {
|
||||
for line in &mut rendered_lines {
|
||||
@@ -178,7 +192,12 @@ fn status_card_token_usage_excludes_cached_tokens() {
|
||||
total_tokens: 2_100,
|
||||
};
|
||||
|
||||
let composite = new_status_output(&config, &usage, Some(&usage), &None, None);
|
||||
let now = chrono::Local
|
||||
.with_ymd_and_hms(2024, 1, 1, 0, 0, 0)
|
||||
.single()
|
||||
.expect("timestamp");
|
||||
|
||||
let composite = new_status_output(&config, &usage, Some(&usage), &None, None, now);
|
||||
let rendered = render_lines(&composite.display_lines(120));
|
||||
|
||||
assert!(
|
||||
@@ -219,7 +238,14 @@ fn status_snapshot_truncates_in_narrow_terminal() {
|
||||
};
|
||||
let rate_display = rate_limit_snapshot_display(&snapshot, captured_at);
|
||||
|
||||
let composite = new_status_output(&config, &usage, Some(&usage), &None, Some(&rate_display));
|
||||
let composite = new_status_output(
|
||||
&config,
|
||||
&usage,
|
||||
Some(&usage),
|
||||
&None,
|
||||
Some(&rate_display),
|
||||
captured_at,
|
||||
);
|
||||
let mut rendered_lines = render_lines(&composite.display_lines(46));
|
||||
if cfg!(windows) {
|
||||
for line in &mut rendered_lines {
|
||||
@@ -246,7 +272,12 @@ fn status_snapshot_shows_missing_limits_message() {
|
||||
total_tokens: 750,
|
||||
};
|
||||
|
||||
let composite = new_status_output(&config, &usage, Some(&usage), &None, None);
|
||||
let now = chrono::Local
|
||||
.with_ymd_and_hms(2024, 2, 3, 4, 5, 6)
|
||||
.single()
|
||||
.expect("timestamp");
|
||||
|
||||
let composite = new_status_output(&config, &usage, Some(&usage), &None, None, now);
|
||||
let mut rendered_lines = render_lines(&composite.display_lines(80));
|
||||
if cfg!(windows) {
|
||||
for line in &mut rendered_lines {
|
||||
@@ -282,7 +313,66 @@ fn status_snapshot_shows_empty_limits_message() {
|
||||
.expect("timestamp");
|
||||
let rate_display = rate_limit_snapshot_display(&snapshot, captured_at);
|
||||
|
||||
let composite = new_status_output(&config, &usage, Some(&usage), &None, Some(&rate_display));
|
||||
let composite = new_status_output(
|
||||
&config,
|
||||
&usage,
|
||||
Some(&usage),
|
||||
&None,
|
||||
Some(&rate_display),
|
||||
captured_at,
|
||||
);
|
||||
let mut rendered_lines = render_lines(&composite.display_lines(80));
|
||||
if cfg!(windows) {
|
||||
for line in &mut rendered_lines {
|
||||
*line = line.replace('\\', "/");
|
||||
}
|
||||
}
|
||||
let sanitized = sanitize_directory(rendered_lines).join("\n");
|
||||
assert_snapshot!(sanitized);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn status_snapshot_shows_stale_limits_message() {
|
||||
let temp_home = TempDir::new().expect("temp home");
|
||||
let mut config = test_config(&temp_home);
|
||||
config.model = "gpt-5-codex".to_string();
|
||||
config.cwd = PathBuf::from("/workspace/tests");
|
||||
|
||||
let usage = TokenUsage {
|
||||
input_tokens: 1_200,
|
||||
cached_input_tokens: 200,
|
||||
output_tokens: 900,
|
||||
reasoning_output_tokens: 150,
|
||||
total_tokens: 2_250,
|
||||
};
|
||||
|
||||
let captured_at = chrono::Local
|
||||
.with_ymd_and_hms(2024, 1, 2, 3, 4, 5)
|
||||
.single()
|
||||
.expect("timestamp");
|
||||
let snapshot = RateLimitSnapshot {
|
||||
primary: Some(RateLimitWindow {
|
||||
used_percent: 72.5,
|
||||
window_minutes: Some(300),
|
||||
resets_at: Some(reset_at_from(&captured_at, 600)),
|
||||
}),
|
||||
secondary: Some(RateLimitWindow {
|
||||
used_percent: 40.0,
|
||||
window_minutes: Some(10_080),
|
||||
resets_at: Some(reset_at_from(&captured_at, 1_800)),
|
||||
}),
|
||||
};
|
||||
let rate_display = rate_limit_snapshot_display(&snapshot, captured_at);
|
||||
let now = captured_at + ChronoDuration::minutes(20);
|
||||
|
||||
let composite = new_status_output(
|
||||
&config,
|
||||
&usage,
|
||||
Some(&usage),
|
||||
&None,
|
||||
Some(&rate_display),
|
||||
now,
|
||||
);
|
||||
let mut rendered_lines = render_lines(&composite.display_lines(80));
|
||||
if cfg!(windows) {
|
||||
for line in &mut rendered_lines {
|
||||
@@ -314,7 +404,12 @@ fn status_context_window_uses_last_usage() {
|
||||
total_tokens: 13_679,
|
||||
};
|
||||
|
||||
let composite = new_status_output(&config, &total_usage, Some(&last_usage), &None, None);
|
||||
let now = chrono::Local
|
||||
.with_ymd_and_hms(2024, 6, 1, 12, 0, 0)
|
||||
.single()
|
||||
.expect("timestamp");
|
||||
|
||||
let composite = new_status_output(&config, &total_usage, Some(&last_usage), &None, None, now);
|
||||
let rendered_lines = render_lines(&composite.display_lines(80));
|
||||
let context_line = rendered_lines
|
||||
.into_iter()
|
||||
|
||||
@@ -107,12 +107,6 @@ impl Tokenizer {
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for Tokenizer {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("Tokenizer").finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
11
docs/faq.md
11
docs/faq.md
@@ -42,3 +42,14 @@ Running Codex directly on Windows may work, but is not officially supported. We
|
||||
### Where should I start after installation?
|
||||
|
||||
Follow the quick setup in [Install & build](./install.md) and then jump into [Getting started](./getting-started.md) for interactive usage tips, prompt examples, and AGENTS.md guidance.
|
||||
|
||||
### `brew upgrade codex` isn't upgrading me
|
||||
|
||||
If you're running Codex v0.46.0 or older, `brew upgrade codex` will not move you to the latest version because we migrated from a Homebrew formula to a cask. To upgrade, uninstall the existing oudated formula and then install the new cask:
|
||||
|
||||
```bash
|
||||
brew uninstall --formula codex
|
||||
brew install --cask codex
|
||||
```
|
||||
|
||||
After reinstalling, `brew upgrade --cask codex` will keep future releases up to date.
|
||||
|
||||
Reference in New Issue
Block a user