mirror of
https://github.com/openai/codex.git
synced 2026-06-02 19:31:59 +00:00
Compare commits
16 Commits
main
...
starr/full
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
663100723b | ||
|
|
765f6639e2 | ||
|
|
c3245411dc | ||
|
|
715f8eb1fd | ||
|
|
830d3869c9 | ||
|
|
750e90633c | ||
|
|
cc85a4a2e9 | ||
|
|
1658bb5da9 | ||
|
|
2b67214208 | ||
|
|
457ec1f813 | ||
|
|
4fab9541eb | ||
|
|
560e339d86 | ||
|
|
a4bb775c0e | ||
|
|
5a0a64c3fb | ||
|
|
432bd259c8 | ||
|
|
a1c86fcdad |
@@ -47,6 +47,14 @@ on:
|
||||
required: false
|
||||
default: false
|
||||
type: boolean
|
||||
retry_archive:
|
||||
required: false
|
||||
default: false
|
||||
type: boolean
|
||||
archive_timeout_minutes:
|
||||
required: false
|
||||
default: 60
|
||||
type: number
|
||||
|
||||
# Caller workflow-level env does not flow through workflow_call, so keep the
|
||||
# Cargo git transport hardening on the archive and shard jobs directly here.
|
||||
@@ -57,7 +65,10 @@ jobs:
|
||||
archive:
|
||||
name: Build nextest archive
|
||||
runs-on: ${{ inputs.archive_runner_group != '' && fromJSON(format('{{"group":"{0}","labels":"{1}"}}', inputs.archive_runner_group, inputs.archive_runner_labels)) || inputs.archive_runner != '' && inputs.archive_runner || inputs.runner_group != '' && fromJSON(format('{{"group":"{0}","labels":"{1}"}}', inputs.runner_group, inputs.runner_labels)) || inputs.runner }}
|
||||
timeout-minutes: 60
|
||||
continue-on-error: ${{ inputs.retry_archive }}
|
||||
timeout-minutes: ${{ inputs.archive_timeout_minutes }}
|
||||
outputs:
|
||||
completed: ${{ steps.archive_completed.outputs.completed }}
|
||||
defaults:
|
||||
run:
|
||||
working-directory: codex-rs
|
||||
@@ -71,7 +82,7 @@ jobs:
|
||||
SCCACHE_CACHE_SIZE: 10G
|
||||
NEXTEST_ARCHIVE_FILE: nextest-${{ inputs.artifact_id }}.tar.zst
|
||||
TEST_HELPERS_ARTIFACT: nextest-test-helpers-${{ inputs.artifact_id }}
|
||||
steps:
|
||||
steps: &archive_steps
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
@@ -249,6 +260,11 @@ jobs:
|
||||
if-no-files-found: error
|
||||
retention-days: 1
|
||||
|
||||
- name: Mark archive complete
|
||||
id: archive_completed
|
||||
shell: bash
|
||||
run: echo "completed=true" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Save cargo home cache
|
||||
if: always() && !cancelled() && steps.cache_cargo_home_restore.outputs.cache-hit != 'true'
|
||||
continue-on-error: true
|
||||
@@ -286,9 +302,51 @@ jobs:
|
||||
echo '```';
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
|
||||
archive_retry:
|
||||
name: Retry build nextest archive
|
||||
needs: archive
|
||||
if: inputs.retry_archive && always() && needs.archive.outputs.completed != 'true'
|
||||
runs-on: ${{ inputs.archive_runner_group != '' && fromJSON(format('{{"group":"{0}","labels":"{1}"}}', inputs.archive_runner_group, inputs.archive_runner_labels)) || inputs.archive_runner != '' && inputs.archive_runner || inputs.runner_group != '' && fromJSON(format('{{"group":"{0}","labels":"{1}"}}', inputs.runner_group, inputs.runner_labels)) || inputs.runner }}
|
||||
continue-on-error: true
|
||||
timeout-minutes: ${{ inputs.archive_timeout_minutes }}
|
||||
outputs:
|
||||
completed: ${{ steps.archive_completed.outputs.completed }}
|
||||
defaults:
|
||||
run:
|
||||
working-directory: codex-rs
|
||||
env:
|
||||
ARCHIVE_CACHE_RUNNER: ${{ inputs.archive_runner != '' && inputs.archive_runner || inputs.runner }}
|
||||
USE_SCCACHE: ${{ inputs.use_sccache && 'true' || 'false' }}
|
||||
CARGO_INCREMENTAL: "0"
|
||||
SCCACHE_CACHE_SIZE: 10G
|
||||
NEXTEST_ARCHIVE_FILE: nextest-${{ inputs.artifact_id }}.tar.zst
|
||||
TEST_HELPERS_ARTIFACT: nextest-test-helpers-${{ inputs.artifact_id }}
|
||||
steps: *archive_steps
|
||||
|
||||
archive_retry_2:
|
||||
name: Retry build nextest archive 2
|
||||
needs: [archive, archive_retry]
|
||||
if: inputs.retry_archive && always() && needs.archive.outputs.completed != 'true' && needs.archive_retry.outputs.completed != 'true'
|
||||
runs-on: ${{ inputs.archive_runner_group != '' && fromJSON(format('{{"group":"{0}","labels":"{1}"}}', inputs.archive_runner_group, inputs.archive_runner_labels)) || inputs.archive_runner != '' && inputs.archive_runner || inputs.runner_group != '' && fromJSON(format('{{"group":"{0}","labels":"{1}"}}', inputs.runner_group, inputs.runner_labels)) || inputs.runner }}
|
||||
timeout-minutes: ${{ inputs.archive_timeout_minutes }}
|
||||
outputs:
|
||||
completed: ${{ steps.archive_completed.outputs.completed }}
|
||||
defaults:
|
||||
run:
|
||||
working-directory: codex-rs
|
||||
env:
|
||||
ARCHIVE_CACHE_RUNNER: ${{ inputs.archive_runner != '' && inputs.archive_runner || inputs.runner }}
|
||||
USE_SCCACHE: ${{ inputs.use_sccache && 'true' || 'false' }}
|
||||
CARGO_INCREMENTAL: "0"
|
||||
SCCACHE_CACHE_SIZE: 10G
|
||||
NEXTEST_ARCHIVE_FILE: nextest-${{ inputs.artifact_id }}.tar.zst
|
||||
TEST_HELPERS_ARTIFACT: nextest-test-helpers-${{ inputs.artifact_id }}
|
||||
steps: *archive_steps
|
||||
|
||||
shard:
|
||||
name: Tests shard ${{ matrix.shard }}/4
|
||||
needs: archive
|
||||
needs: [archive, archive_retry, archive_retry_2]
|
||||
if: always() && (needs.archive.outputs.completed == 'true' || needs.archive_retry.outputs.completed == 'true' || needs.archive_retry_2.outputs.completed == 'true')
|
||||
runs-on: ${{ inputs.runner_group != '' && fromJSON(format('{{"group":"{0}","labels":"{1}"}}', inputs.runner_group, inputs.runner_labels)) || inputs.runner }}
|
||||
timeout-minutes: 60
|
||||
defaults:
|
||||
|
||||
4
.github/workflows/rust-ci-full.yml
vendored
4
.github/workflows/rust-ci-full.yml
vendored
@@ -494,10 +494,13 @@ jobs:
|
||||
runner: ubuntu-24.04-arm
|
||||
runner_group: codex-runners
|
||||
runner_labels: codex-linux-arm64
|
||||
archive_runner: ubuntu-24.04-arm
|
||||
target: aarch64-unknown-linux-gnu
|
||||
profile: ci-test
|
||||
artifact_id: linux-arm64
|
||||
use_sccache: true
|
||||
retry_archive: true
|
||||
archive_timeout_minutes: 120
|
||||
secrets: inherit
|
||||
|
||||
tests_windows_x64:
|
||||
@@ -528,6 +531,7 @@ jobs:
|
||||
artifact_id: windows-arm64
|
||||
test_threads: 8
|
||||
use_sccache: true
|
||||
retry_archive: true
|
||||
secrets: inherit
|
||||
|
||||
# --- Gatherer job for the full post-merge workflow --------------------------
|
||||
|
||||
@@ -1544,7 +1544,7 @@ async fn remote_control_waits_for_account_id_before_enrolling() {
|
||||
.expect("auth with account id should save");
|
||||
auth_manager.reload().await;
|
||||
|
||||
let enroll_request = timeout(Duration::from_millis(100), accept_http_request(&listener))
|
||||
let enroll_request = timeout(Duration::from_secs(1), accept_http_request(&listener))
|
||||
.await
|
||||
.expect("auth change should wake remote control before the retry delay");
|
||||
assert_eq!(
|
||||
|
||||
@@ -779,12 +779,15 @@ mod tests {
|
||||
.as_deref()
|
||||
.is_some_and(|remedy| remedy.starts_with("Restart Codex"))
|
||||
}));
|
||||
assert!(
|
||||
check
|
||||
.details
|
||||
.iter()
|
||||
.any(|detail| detail.contains(missing_path.to_string_lossy().as_ref()))
|
||||
);
|
||||
assert!(check.details.iter().any(|detail| {
|
||||
detail.contains(
|
||||
missing_path
|
||||
.file_name()
|
||||
.expect("rollout path should have a file name")
|
||||
.to_string_lossy()
|
||||
.as_ref(),
|
||||
)
|
||||
}));
|
||||
}
|
||||
|
||||
struct Fixture {
|
||||
|
||||
@@ -1910,6 +1910,9 @@ async fn skill_roots_include_admin_with_lowest_priority() {
|
||||
if home_dir().is_some() {
|
||||
expected.insert(1, SkillScope::User);
|
||||
}
|
||||
if scopes.contains(&SkillScope::Repo) {
|
||||
expected.insert(0, SkillScope::Repo);
|
||||
}
|
||||
expected.push(SkillScope::Admin);
|
||||
assert_eq!(scopes, expected);
|
||||
}
|
||||
|
||||
@@ -2506,14 +2506,35 @@ impl Session {
|
||||
turn_context: &TurnContext,
|
||||
items: &[ResponseItem],
|
||||
) {
|
||||
{
|
||||
let mut state = self.state.lock().await;
|
||||
state.record_items(items.iter(), turn_context.truncation_policy);
|
||||
}
|
||||
self.record_conversation_items_with_history_policy(
|
||||
turn_context,
|
||||
items,
|
||||
turn_context.truncation_policy,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
pub(crate) async fn record_conversation_items_with_history_policy(
|
||||
&self,
|
||||
turn_context: &TurnContext,
|
||||
items: &[ResponseItem],
|
||||
history_truncation_policy: TruncationPolicy,
|
||||
) {
|
||||
self.record_into_history_with_policy(items, history_truncation_policy)
|
||||
.await;
|
||||
self.persist_rollout_response_items(items).await;
|
||||
self.send_raw_response_items(turn_context, items).await;
|
||||
}
|
||||
|
||||
pub(crate) async fn record_into_history_with_policy(
|
||||
&self,
|
||||
items: &[ResponseItem],
|
||||
history_truncation_policy: TruncationPolicy,
|
||||
) {
|
||||
let mut state = self.state.lock().await;
|
||||
state.record_items(items.iter(), history_truncation_policy);
|
||||
}
|
||||
|
||||
async fn maybe_warn_on_server_model_mismatch(
|
||||
self: &Arc<Self>,
|
||||
turn_context: &Arc<TurnContext>,
|
||||
|
||||
@@ -7851,7 +7851,7 @@ async fn run_user_shell_command_does_not_set_reference_context_item() {
|
||||
handlers::run_user_shell_command(&session, "sub-id".to_string(), "echo shell".to_string())
|
||||
.await;
|
||||
|
||||
let deadline = StdDuration::from_secs(15);
|
||||
let deadline = StdDuration::from_secs(30);
|
||||
let start = std::time::Instant::now();
|
||||
loop {
|
||||
let remaining = deadline.saturating_sub(start.elapsed());
|
||||
|
||||
@@ -426,7 +426,7 @@ async fn strict_auto_review_turn_grant_forces_guardian_for_shell_command_policy_
|
||||
"command": "echo hi",
|
||||
"login": false,
|
||||
"workdir": workdir,
|
||||
"timeout_ms": 1_000_u64,
|
||||
"timeout_ms": 10_000_u64,
|
||||
})
|
||||
.to_string(),
|
||||
},
|
||||
|
||||
@@ -54,6 +54,7 @@ use crate::tasks::emit_compact_metric;
|
||||
use crate::tools::ToolRouter;
|
||||
use crate::tools::context::SharedTurnDiffTracker;
|
||||
use crate::tools::parallel::ToolCallRuntime;
|
||||
use crate::tools::registry::RecordedToolResponse;
|
||||
use crate::tools::registry::ToolArgumentDiffConsumer;
|
||||
use crate::tools::router::ToolRouterParams;
|
||||
use crate::tools::router::extension_tool_executors;
|
||||
@@ -83,7 +84,6 @@ use codex_protocol::items::build_hook_prompt_message;
|
||||
use codex_protocol::models::BaseInstructions;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::MessagePhase;
|
||||
use codex_protocol::models::ResponseInputItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::AgentMessageContentDeltaEvent;
|
||||
use codex_protocol::protocol::AgentReasoningSectionBreakEvent;
|
||||
@@ -1661,16 +1661,22 @@ async fn handle_assistant_item_done_in_plan_mode(
|
||||
}
|
||||
|
||||
async fn drain_in_flight(
|
||||
in_flight: &mut FuturesOrdered<BoxFuture<'static, CodexResult<ResponseInputItem>>>,
|
||||
in_flight: &mut FuturesOrdered<BoxFuture<'static, CodexResult<RecordedToolResponse>>>,
|
||||
sess: Arc<Session>,
|
||||
turn_context: Arc<TurnContext>,
|
||||
) -> CodexResult<()> {
|
||||
while let Some(res) = in_flight.next().await {
|
||||
match res {
|
||||
Ok(response_input) => {
|
||||
let response_item = response_input.into();
|
||||
sess.record_conversation_items(&turn_context, std::slice::from_ref(&response_item))
|
||||
.await;
|
||||
Ok(recorded_tool_response) => {
|
||||
let response_item = recorded_tool_response.response_item.into();
|
||||
sess.record_conversation_items_with_history_policy(
|
||||
&turn_context,
|
||||
std::slice::from_ref(&response_item),
|
||||
recorded_tool_response
|
||||
.history_truncation_policy
|
||||
.unwrap_or(turn_context.truncation_policy),
|
||||
)
|
||||
.await;
|
||||
mark_thread_memory_mode_polluted_if_external_context(
|
||||
sess.as_ref(),
|
||||
turn_context.as_ref(),
|
||||
@@ -1732,7 +1738,7 @@ async fn try_run_sampling_request(
|
||||
.instrument(trace_span!("stream_request"))
|
||||
.or_cancel(&cancellation_token)
|
||||
.await??;
|
||||
let mut in_flight: FuturesOrdered<BoxFuture<'static, CodexResult<ResponseInputItem>>> =
|
||||
let mut in_flight: FuturesOrdered<BoxFuture<'static, CodexResult<RecordedToolResponse>>> =
|
||||
FuturesOrdered::new();
|
||||
let mut needs_follow_up = false;
|
||||
let mut last_agent_message: Option<String> = None;
|
||||
|
||||
@@ -242,7 +242,7 @@ async fn record_stage1_output_usage_for_memory_citation(
|
||||
/// queuing any tool execution futures. This records items immediately so
|
||||
/// history and rollout stay in sync even if the turn is later cancelled.
|
||||
pub(crate) type InFlightFuture<'f> =
|
||||
Pin<Box<dyn Future<Output = Result<ResponseInputItem>> + Send + 'f>>;
|
||||
Pin<Box<dyn Future<Output = Result<crate::tools::registry::RecordedToolResponse>> + Send + 'f>>;
|
||||
|
||||
#[derive(Default)]
|
||||
pub(crate) struct OutputItemResult {
|
||||
|
||||
@@ -10,6 +10,7 @@ use codex_tools::ToolSpec;
|
||||
|
||||
use super::ExecContext;
|
||||
use super::PUBLIC_TOOL_NAME;
|
||||
use super::code_mode_history_truncation_policy;
|
||||
use super::handle_runtime_response;
|
||||
use super::is_exec_tool_name;
|
||||
|
||||
@@ -120,4 +121,11 @@ impl CoreToolRuntime for CodeModeExecuteHandler {
|
||||
fn matches_kind(&self, payload: &ToolPayload) -> bool {
|
||||
matches!(payload, ToolPayload::Custom { .. })
|
||||
}
|
||||
|
||||
fn history_truncation_policy(
|
||||
&self,
|
||||
_invocation: &ToolInvocation,
|
||||
) -> Option<codex_utils_output_truncation::TruncationPolicy> {
|
||||
Some(code_mode_history_truncation_policy())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -226,8 +226,7 @@ fn truncate_code_mode_result(
|
||||
items: Vec<FunctionCallOutputContentItem>,
|
||||
max_output_tokens: Option<usize>,
|
||||
) -> Vec<FunctionCallOutputContentItem> {
|
||||
let max_output_tokens = resolve_max_tokens(max_output_tokens);
|
||||
let policy = TruncationPolicy::Tokens(max_output_tokens);
|
||||
let policy = code_mode_output_truncation_policy(max_output_tokens);
|
||||
if items
|
||||
.iter()
|
||||
.all(|item| matches!(item, FunctionCallOutputContentItem::InputText { .. }))
|
||||
@@ -240,6 +239,16 @@ fn truncate_code_mode_result(
|
||||
truncate_function_output_items_with_policy(&items, policy)
|
||||
}
|
||||
|
||||
pub(super) fn code_mode_output_truncation_policy(
|
||||
max_output_tokens: Option<usize>,
|
||||
) -> TruncationPolicy {
|
||||
TruncationPolicy::Tokens(resolve_max_tokens(max_output_tokens))
|
||||
}
|
||||
|
||||
pub(super) fn code_mode_history_truncation_policy() -> TruncationPolicy {
|
||||
TruncationPolicy::Bytes(usize::MAX)
|
||||
}
|
||||
|
||||
async fn call_nested_tool(
|
||||
_exec: ExecContext,
|
||||
tool_runtime: ToolCallRuntime,
|
||||
|
||||
@@ -15,6 +15,7 @@ use codex_tools::ToolSpec;
|
||||
use super::DEFAULT_WAIT_YIELD_TIME_MS;
|
||||
use super::ExecContext;
|
||||
use super::WAIT_TOOL_NAME;
|
||||
use super::code_mode_history_truncation_policy;
|
||||
use super::handle_runtime_response;
|
||||
use super::wait_spec::create_wait_tool;
|
||||
|
||||
@@ -114,6 +115,13 @@ impl ToolExecutor<ToolInvocation> for CodeModeWaitHandler {
|
||||
}
|
||||
|
||||
impl CoreToolRuntime for CodeModeWaitHandler {
|
||||
fn history_truncation_policy(
|
||||
&self,
|
||||
_invocation: &ToolInvocation,
|
||||
) -> Option<codex_utils_output_truncation::TruncationPolicy> {
|
||||
Some(code_mode_history_truncation_policy())
|
||||
}
|
||||
|
||||
fn pre_tool_use_payload(&self, _invocation: &ToolInvocation) -> Option<PreToolUsePayload> {
|
||||
// Code-mode `wait` is runtime control for an existing code cell, not a
|
||||
// standalone user action. Tool calls made from code mode still flow
|
||||
|
||||
@@ -20,6 +20,7 @@ use crate::tools::context::SharedTurnDiffTracker;
|
||||
use crate::tools::context::ToolPayload;
|
||||
use crate::tools::lifecycle::notify_tool_aborted;
|
||||
use crate::tools::registry::AnyToolResult;
|
||||
use crate::tools::registry::RecordedToolResponse;
|
||||
use crate::tools::registry::ToolArgumentDiffConsumer;
|
||||
use crate::tools::router::ToolCall;
|
||||
use crate::tools::router::ToolCallSource;
|
||||
@@ -64,13 +65,13 @@ impl ToolCallRuntime {
|
||||
self,
|
||||
call: ToolCall,
|
||||
cancellation_token: CancellationToken,
|
||||
) -> impl std::future::Future<Output = Result<ResponseInputItem, CodexErr>> {
|
||||
) -> impl std::future::Future<Output = Result<RecordedToolResponse, CodexErr>> {
|
||||
let error_call = call.clone();
|
||||
let future =
|
||||
self.handle_tool_call_with_source(call, ToolCallSource::Direct, cancellation_token);
|
||||
async move {
|
||||
match future.await {
|
||||
Ok(response) => Ok(response.into_response()),
|
||||
Ok(response) => Ok(response.into_recorded_response()),
|
||||
Err(FunctionCallError::Fatal(message)) => Err(CodexErr::Fatal(message)),
|
||||
Err(other) => Ok(Self::failure_response(error_call, other)),
|
||||
}
|
||||
@@ -183,9 +184,9 @@ impl ToolCallRuntime {
|
||||
FunctionCallError::Fatal(format!("tool task failed to receive: {err:?}"))
|
||||
}
|
||||
|
||||
fn failure_response(call: ToolCall, err: FunctionCallError) -> ResponseInputItem {
|
||||
fn failure_response(call: ToolCall, err: FunctionCallError) -> RecordedToolResponse {
|
||||
let message = err.to_string();
|
||||
match call.payload {
|
||||
let response_item = match call.payload {
|
||||
ToolPayload::ToolSearch { .. } => ResponseInputItem::ToolSearchOutput {
|
||||
call_id: call.call_id,
|
||||
status: "completed".to_string(),
|
||||
@@ -207,6 +208,10 @@ impl ToolCallRuntime {
|
||||
success: Some(false),
|
||||
},
|
||||
},
|
||||
};
|
||||
RecordedToolResponse {
|
||||
response_item,
|
||||
history_truncation_policy: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -218,6 +223,7 @@ impl ToolCallRuntime {
|
||||
message: Self::abort_message(call, secs),
|
||||
}),
|
||||
post_tool_use_payload: None,
|
||||
history_truncation_policy: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -456,7 +462,7 @@ mod tests {
|
||||
success: Some(true),
|
||||
},
|
||||
};
|
||||
assert_eq!(expected_response, response);
|
||||
assert_eq!(expected_response, response.response_item);
|
||||
|
||||
let actual = records
|
||||
.lock()
|
||||
@@ -521,7 +527,7 @@ mod tests {
|
||||
.await
|
||||
.expect("timed out waiting for tool response")
|
||||
.expect("tool response task should join")?;
|
||||
let ResponseInputItem::FunctionCallOutput { output, .. } = response else {
|
||||
let ResponseInputItem::FunctionCallOutput { output, .. } = response.response_item else {
|
||||
anyhow::bail!("cancelled tool should return function output");
|
||||
};
|
||||
let FunctionCallOutputBody::Text(text) = output.body else {
|
||||
|
||||
@@ -32,6 +32,7 @@ use codex_protocol::models::ResponseInputItem;
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
use codex_tools::ToolName;
|
||||
use codex_tools::ToolSpec;
|
||||
use codex_utils_output_truncation::TruncationPolicy;
|
||||
use futures::future::BoxFuture;
|
||||
use serde_json::Value;
|
||||
use tracing::warn;
|
||||
@@ -104,6 +105,10 @@ pub(crate) trait CoreToolRuntime: ToolExecutor<ToolInvocation> {
|
||||
})
|
||||
}
|
||||
|
||||
fn history_truncation_policy(&self, _invocation: &ToolInvocation) -> Option<TruncationPolicy> {
|
||||
None
|
||||
}
|
||||
|
||||
fn pre_tool_use_payload(&self, invocation: &ToolInvocation) -> Option<PreToolUsePayload> {
|
||||
let ToolPayload::Function { arguments } = &invocation.payload else {
|
||||
return None;
|
||||
@@ -166,9 +171,16 @@ pub(crate) struct AnyToolResult {
|
||||
pub(crate) payload: ToolPayload,
|
||||
pub(crate) result: Box<dyn ToolOutput>,
|
||||
pub(crate) post_tool_use_payload: Option<PostToolUsePayload>,
|
||||
pub(crate) history_truncation_policy: Option<TruncationPolicy>,
|
||||
}
|
||||
|
||||
pub(crate) struct RecordedToolResponse {
|
||||
pub(crate) response_item: ResponseInputItem,
|
||||
pub(crate) history_truncation_policy: Option<TruncationPolicy>,
|
||||
}
|
||||
|
||||
impl AnyToolResult {
|
||||
#[cfg(test)]
|
||||
pub(crate) fn into_response(self) -> ResponseInputItem {
|
||||
let Self {
|
||||
call_id,
|
||||
@@ -179,6 +191,20 @@ impl AnyToolResult {
|
||||
result.to_response_item(&call_id, &payload)
|
||||
}
|
||||
|
||||
pub(crate) fn into_recorded_response(self) -> RecordedToolResponse {
|
||||
let Self {
|
||||
call_id,
|
||||
payload,
|
||||
result,
|
||||
history_truncation_policy,
|
||||
..
|
||||
} = self;
|
||||
RecordedToolResponse {
|
||||
response_item: result.to_response_item(&call_id, &payload),
|
||||
history_truncation_policy,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn code_mode_result(self) -> serde_json::Value {
|
||||
let Self {
|
||||
payload, result, ..
|
||||
@@ -306,6 +332,10 @@ impl CoreToolRuntime for ExposureOverride {
|
||||
self.handler.post_tool_use_payload(invocation, result)
|
||||
}
|
||||
|
||||
fn history_truncation_policy(&self, invocation: &ToolInvocation) -> Option<TruncationPolicy> {
|
||||
self.handler.history_truncation_policy(invocation)
|
||||
}
|
||||
|
||||
fn with_updated_hook_input(
|
||||
&self,
|
||||
invocation: ToolInvocation,
|
||||
@@ -716,11 +746,13 @@ async fn handle_any_tool(
|
||||
let output = tool.handle(invocation.clone()).await?;
|
||||
let post_tool_use_payload =
|
||||
CoreToolRuntime::post_tool_use_payload(tool, &invocation, output.as_ref());
|
||||
let history_truncation_policy = CoreToolRuntime::history_truncation_policy(tool, &invocation);
|
||||
Ok(AnyToolResult {
|
||||
call_id,
|
||||
payload,
|
||||
result: output,
|
||||
post_tool_use_payload,
|
||||
history_truncation_policy,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -329,6 +329,7 @@ fn post_tool_use_feedback_output_keeps_code_mode_result_typed() {
|
||||
),
|
||||
}),
|
||||
post_tool_use_payload: None,
|
||||
history_truncation_policy: None,
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
@@ -356,6 +357,7 @@ fn post_tool_use_feedback_output_keeps_code_mode_result_typed() {
|
||||
),
|
||||
}),
|
||||
post_tool_use_payload: None,
|
||||
history_truncation_policy: None,
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
|
||||
@@ -820,11 +820,10 @@ fn prepend_code_mode_executors(
|
||||
planned_tools: &mut PlannedTools,
|
||||
) {
|
||||
let turn_context = context.turn_context;
|
||||
let deferred_tools_available = search_tool_enabled(turn_context)
|
||||
&& planned_tools
|
||||
.runtimes()
|
||||
.iter()
|
||||
.any(|executor| executor.exposure() == ToolExposure::Deferred);
|
||||
let deferred_tools_available = planned_tools
|
||||
.runtimes()
|
||||
.iter()
|
||||
.any(|executor| executor.exposure() == ToolExposure::Deferred);
|
||||
let code_mode_executors = build_code_mode_executors(
|
||||
turn_context,
|
||||
planned_tools.runtimes(),
|
||||
|
||||
@@ -3038,8 +3038,8 @@ async fn code_mode_can_call_hidden_dynamic_tools() -> Result<()> {
|
||||
test.session_configured = new_thread.session_configured;
|
||||
|
||||
let code = r#"
|
||||
const tool = ALL_TOOLS.find(({ name }) => name === "codex_app_hidden_dynamic_tool");
|
||||
const out = await tools.codex_app_hidden_dynamic_tool({ city: "Paris" });
|
||||
const tool = ALL_TOOLS.find(({ name }) => name === "codex_app__hidden_dynamic_tool");
|
||||
const out = await tools.codex_app__hidden_dynamic_tool({ city: "Paris" });
|
||||
text(
|
||||
JSON.stringify({
|
||||
name: tool?.name ?? null,
|
||||
@@ -3144,7 +3144,7 @@ text(
|
||||
)?;
|
||||
assert_eq!(
|
||||
parsed.get("name"),
|
||||
Some(&Value::String("codex_app_hidden_dynamic_tool".to_string()))
|
||||
Some(&Value::String("codex_app__hidden_dynamic_tool".to_string()))
|
||||
);
|
||||
assert_eq!(
|
||||
parsed.get("out"),
|
||||
@@ -3157,7 +3157,7 @@ text(
|
||||
.is_some_and(|description| {
|
||||
description.contains("A hidden dynamic tool.")
|
||||
&& description.contains("declare const tools:")
|
||||
&& description.contains("codex_app_hidden_dynamic_tool(args:")
|
||||
&& description.contains("codex_app__hidden_dynamic_tool(args:")
|
||||
})
|
||||
);
|
||||
|
||||
|
||||
@@ -934,6 +934,10 @@ fn normalize_string(value: &str) -> String {
|
||||
return "<UUID>".to_string();
|
||||
}
|
||||
|
||||
if value.starts_with("<skills_instructions>\n") && value.ends_with("\n</skills_instructions>") {
|
||||
return "<skills_instructions>\n...\n</skills_instructions>".to_string();
|
||||
}
|
||||
|
||||
let mut text = value.to_string();
|
||||
normalize_tmp_prefix_before_marker(&mut text, "/skills/");
|
||||
normalize_tmp_prefix_before_marker(&mut text, "\\skills\\");
|
||||
@@ -1029,6 +1033,15 @@ fn normalize_string_rewrites_windows_temp_skill_paths() {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_string_rewrites_skills_instructions_body() {
|
||||
let text = normalize_string(
|
||||
"<skills_instructions>\n## Skills\n- imagegen: ...\n</skills_instructions>",
|
||||
);
|
||||
|
||||
assert_eq!(text, "<skills_instructions>\n...\n</skills_instructions>");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_string_rewrites_shell_wall_times() {
|
||||
let text = normalize_string(
|
||||
|
||||
@@ -348,9 +348,15 @@ fn stdio_transport_with_cwd(
|
||||
fn insert_mcp_server(
|
||||
config: &mut Config,
|
||||
server_name: &str,
|
||||
transport: McpServerTransportConfig,
|
||||
mut transport: McpServerTransportConfig,
|
||||
options: TestMcpServerOptions,
|
||||
) {
|
||||
if options.environment_id != codex_config::DEFAULT_MCP_SERVER_ENVIRONMENT_ID
|
||||
&& let McpServerTransportConfig::Stdio { cwd, .. } = &mut transport
|
||||
&& cwd.is_none()
|
||||
{
|
||||
*cwd = Some(config.cwd.to_path_buf());
|
||||
}
|
||||
let mut servers = config.mcp_servers.get().clone();
|
||||
servers.insert(
|
||||
server_name.to_string(),
|
||||
|
||||
@@ -929,7 +929,7 @@ allow_local_binding = true
|
||||
.set_permission_profile(permission_profile_for_config)
|
||||
.expect("set permission profile");
|
||||
});
|
||||
let test = builder.build_with_remote_env(server).await?;
|
||||
let test = builder.build(server).await?;
|
||||
assert!(
|
||||
test.config.permissions.network.is_some(),
|
||||
"expected managed network proxy config to be present"
|
||||
|
||||
@@ -239,7 +239,7 @@ async fn exec_resume_last_respects_cwd_filter_and_all_flag() -> anyhow::Result<(
|
||||
|
||||
let test = test_codex_exec();
|
||||
let server = MockServer::start().await;
|
||||
let _response_mock = mount_exec_responses(&server, /*count*/ 5).await;
|
||||
let _response_mock = mount_exec_responses(&server, /*count*/ 4).await;
|
||||
|
||||
let dir_a = TempDir::new()?;
|
||||
let dir_b = TempDir::new()?;
|
||||
@@ -254,6 +254,10 @@ async fn exec_resume_last_respects_cwd_filter_and_all_flag() -> anyhow::Result<(
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
// `updated_at` is second-granularity, so ensure thread B is created in a later
|
||||
// second than thread A and is deterministically newest before `resume --last --all`.
|
||||
std::thread::sleep(std::time::Duration::from_millis(1100));
|
||||
|
||||
let marker_b = format!("resume-cwd-b-{}", Uuid::new_v4());
|
||||
let prompt_b = format!("echo {marker_b}");
|
||||
test.cmd_with_server(&server)
|
||||
@@ -270,24 +274,6 @@ async fn exec_resume_last_respects_cwd_filter_and_all_flag() -> anyhow::Result<(
|
||||
let path_b = find_session_file_containing_marker(&sessions_dir, &marker_b)
|
||||
.expect("no session file found for marker_b");
|
||||
|
||||
// `updated_at` is second-granularity, so ensure the touch lands in a later second
|
||||
// than the initial session creation on fast CI (especially Windows).
|
||||
std::thread::sleep(std::time::Duration::from_millis(1100));
|
||||
|
||||
// Make thread B deterministically newest according to rollout metadata.
|
||||
let session_id_b = extract_conversation_id(&path_b);
|
||||
let marker_b_touch = format!("resume-cwd-b-touch-{}", Uuid::new_v4());
|
||||
let prompt_b_touch = format!("echo {marker_b_touch}");
|
||||
test.cmd_with_server(&server)
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("-C")
|
||||
.arg(dir_b.path())
|
||||
.arg("resume")
|
||||
.arg(&session_id_b)
|
||||
.arg(&prompt_b_touch)
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
// `resume --last` sorts by `updated_at`, which is second-granularity. Sleep so
|
||||
// the upcoming `resume --last --all` write lands in a later second and becomes
|
||||
// deterministically newest (instead of tying and falling back to UUID order).
|
||||
|
||||
@@ -158,7 +158,9 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
StreamableHttpService::new(
|
||||
|| Ok(TestToolServer::new()),
|
||||
Arc::new(LocalSessionManager::default()),
|
||||
StreamableHttpServerConfig::default(),
|
||||
// Full CI can run this private test server in a remote container
|
||||
// reached through its dynamic container IP instead of loopback.
|
||||
StreamableHttpServerConfig::default().disable_allowed_hosts(),
|
||||
),
|
||||
)
|
||||
.layer(middleware::from_fn_with_state(
|
||||
|
||||
@@ -188,7 +188,8 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn environment_id_fallback_has_cwd_prefix() {
|
||||
let dir = tempfile::tempdir().expect("tempdir");
|
||||
let home = std::env::var_os("HOME").expect("HOME should be set for tests");
|
||||
let dir = tempfile::tempdir_in(home).expect("tempdir outside repo");
|
||||
let env_id = environment_id_from_cwd(dir.path());
|
||||
let canonical = dir
|
||||
.path()
|
||||
|
||||
@@ -61,6 +61,13 @@ fn cache_project_root(chat: &mut ChatWidget, root_name: &str) {
|
||||
});
|
||||
}
|
||||
|
||||
fn cache_missing_project_root(chat: &mut ChatWidget) {
|
||||
chat.status_line_project_root_name_cache = Some(CachedProjectRootName {
|
||||
cwd: chat.config.cwd.to_path_buf(),
|
||||
root_name: None,
|
||||
});
|
||||
}
|
||||
|
||||
fn cache_rate_limit_snapshot(chat: &mut ChatWidget) {
|
||||
chat.on_rate_limit_snapshot(Some(RateLimitSnapshot {
|
||||
limit_id: None,
|
||||
@@ -182,6 +189,7 @@ async fn status_line_setup_popup_hardcoded_only_snapshot() {
|
||||
#[tokio::test]
|
||||
async fn status_surface_preview_lines_mixed_snapshot() {
|
||||
let (mut chat, _rx, _op_rx) = make_chatwidget_manual(/*model_override*/ None).await;
|
||||
cache_missing_project_root(&mut chat);
|
||||
chat.status_line_branch = Some("feature/mixed-preview".to_string());
|
||||
chat.thread_name = Some("Mixed preview thread".to_string());
|
||||
|
||||
@@ -278,6 +286,7 @@ async fn status_line_setup_popup_rate_limits_snapshot() {
|
||||
#[tokio::test]
|
||||
async fn status_line_setup_popup_mixed_snapshot() {
|
||||
let (mut chat, _rx, _op_rx) = make_chatwidget_manual(/*model_override*/ None).await;
|
||||
cache_missing_project_root(&mut chat);
|
||||
chat.status_line_branch = Some("feature/mixed-preview".to_string());
|
||||
chat.thread_name = Some("Mixed preview thread".to_string());
|
||||
chat.config.tui_status_line = Some(vec![
|
||||
@@ -330,6 +339,7 @@ async fn terminal_title_setup_popup_hardcoded_only_snapshot() {
|
||||
#[tokio::test]
|
||||
async fn terminal_title_setup_popup_mixed_snapshot() {
|
||||
let (mut chat, _rx, _op_rx) = make_chatwidget_manual(/*model_override*/ None).await;
|
||||
cache_missing_project_root(&mut chat);
|
||||
chat.thread_name = Some("Mixed preview thread".to_string());
|
||||
chat.config.tui_terminal_title = Some(vec![
|
||||
"project-name".to_string(),
|
||||
@@ -361,6 +371,7 @@ async fn terminal_title_setup_popup_rate_limits_snapshot() {
|
||||
#[tokio::test]
|
||||
async fn missing_project_root_uses_different_status_and_title_preview_sources() {
|
||||
let (mut chat, _rx, _op_rx) = make_chatwidget_manual(/*model_override*/ None).await;
|
||||
cache_missing_project_root(&mut chat);
|
||||
|
||||
let status_preview = status_preview_line(&mut chat, &[StatusLineItem::ProjectRoot]);
|
||||
let title_preview = title_preview_line(&mut chat, &[TerminalTitleItem::Project]);
|
||||
|
||||
@@ -3,9 +3,17 @@
|
||||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
fn cache_missing_project_root(chat: &mut ChatWidget) {
|
||||
chat.status_line_project_root_name_cache = Some(CachedProjectRootName {
|
||||
cwd: chat.config.cwd.to_path_buf(),
|
||||
root_name: None,
|
||||
});
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn terminal_title_shows_action_required_while_exec_approval_is_pending() {
|
||||
let (mut chat, _rx, _op_rx) = make_chatwidget_manual(/*model_override*/ None).await;
|
||||
cache_missing_project_root(&mut chat);
|
||||
chat.bottom_pane.set_task_running(/*running*/ true);
|
||||
chat.refresh_terminal_title();
|
||||
|
||||
@@ -14,7 +22,7 @@ async fn terminal_title_shows_action_required_while_exec_approval_is_pending() {
|
||||
approval_id: Some("call-action-required".into()),
|
||||
turn_id: "turn-action-required".into(),
|
||||
command: vec!["bash".into(), "-lc".into(), "echo hello".into()],
|
||||
cwd: AbsolutePathBuf::current_dir().expect("current dir"),
|
||||
cwd: test_project_path().abs(),
|
||||
reason: Some("need confirmation".into()),
|
||||
network_approval_context: None,
|
||||
proposed_execpolicy_amendment: None,
|
||||
@@ -56,7 +64,7 @@ async fn terminal_title_action_required_respects_spinner_setting() {
|
||||
approval_id: Some("call-no-spinner".into()),
|
||||
turn_id: "turn-no-spinner".into(),
|
||||
command: vec!["bash".into(), "-lc".into(), "echo hello".into()],
|
||||
cwd: AbsolutePathBuf::current_dir().expect("current dir"),
|
||||
cwd: test_project_path().abs(),
|
||||
reason: Some("need confirmation".into()),
|
||||
network_approval_context: None,
|
||||
proposed_execpolicy_amendment: None,
|
||||
@@ -75,6 +83,7 @@ async fn terminal_title_action_required_respects_spinner_setting() {
|
||||
#[tokio::test]
|
||||
async fn terminal_title_action_required_blinks_when_animations_are_enabled() {
|
||||
let (mut chat, _rx, _op_rx) = make_chatwidget_manual(/*model_override*/ None).await;
|
||||
cache_missing_project_root(&mut chat);
|
||||
chat.bottom_pane.set_task_running(/*running*/ true);
|
||||
chat.terminal_title_animation_origin = Instant::now() - std::time::Duration::from_millis(1500);
|
||||
chat.refresh_terminal_title();
|
||||
@@ -84,7 +93,7 @@ async fn terminal_title_action_required_blinks_when_animations_are_enabled() {
|
||||
approval_id: Some("call-blink".into()),
|
||||
turn_id: "turn-blink".into(),
|
||||
command: vec!["bash".into(), "-lc".into(), "echo hello".into()],
|
||||
cwd: AbsolutePathBuf::current_dir().expect("current dir"),
|
||||
cwd: test_project_path().abs(),
|
||||
reason: Some("need confirmation".into()),
|
||||
network_approval_context: None,
|
||||
proposed_execpolicy_amendment: None,
|
||||
@@ -106,6 +115,7 @@ async fn terminal_title_action_required_blinks_when_animations_are_enabled() {
|
||||
#[tokio::test]
|
||||
async fn terminal_title_activity_indicators_do_not_animate_when_animations_are_disabled() {
|
||||
let (mut chat, _rx, _op_rx) = make_chatwidget_manual(/*model_override*/ None).await;
|
||||
cache_missing_project_root(&mut chat);
|
||||
chat.config.animations = false;
|
||||
chat.bottom_pane.set_task_running(/*running*/ true);
|
||||
chat.terminal_title_animation_origin = Instant::now() - std::time::Duration::from_millis(1500);
|
||||
@@ -119,7 +129,7 @@ async fn terminal_title_activity_indicators_do_not_animate_when_animations_are_d
|
||||
approval_id: Some("call-no-animations".into()),
|
||||
turn_id: "turn-no-animations".into(),
|
||||
command: vec!["bash".into(), "-lc".into(), "echo hello".into()],
|
||||
cwd: AbsolutePathBuf::current_dir().expect("current dir"),
|
||||
cwd: test_project_path().abs(),
|
||||
reason: Some("need confirmation".into()),
|
||||
network_approval_context: None,
|
||||
proposed_execpolicy_amendment: None,
|
||||
|
||||
@@ -247,7 +247,7 @@ fn legacy_non_tty_powershell_emits_output() {
|
||||
pwsh.display().to_string(),
|
||||
"-NoProfile".to_string(),
|
||||
"-Command".to_string(),
|
||||
"Write-Output LEGACY-NONTTY-DIRECT".to_string(),
|
||||
"'LEGACY-NONTTY-DIRECT'".to_string(),
|
||||
],
|
||||
cwd.as_path(),
|
||||
HashMap::new(),
|
||||
@@ -433,7 +433,7 @@ fn legacy_capture_powershell_emits_output() {
|
||||
pwsh.display().to_string(),
|
||||
"-NoProfile".to_string(),
|
||||
"-Command".to_string(),
|
||||
"Write-Output LEGACY-CAPTURE-DIRECT".to_string(),
|
||||
"'LEGACY-CAPTURE-DIRECT'".to_string(),
|
||||
],
|
||||
cwd.as_path(),
|
||||
HashMap::new(),
|
||||
@@ -527,7 +527,7 @@ fn legacy_tty_powershell_emits_output_and_accepts_input() {
|
||||
"-NoProfile".to_string(),
|
||||
"-NoExit".to_string(),
|
||||
"-Command".to_string(),
|
||||
"$PID; Write-Output ready".to_string(),
|
||||
"$PID; 'ready'".to_string(),
|
||||
],
|
||||
cwd.as_path(),
|
||||
HashMap::new(),
|
||||
@@ -544,7 +544,7 @@ fn legacy_tty_powershell_emits_output_and_accepts_input() {
|
||||
|
||||
let writer = spawned.session.writer_sender();
|
||||
writer
|
||||
.send(b"Write-Output second\n".to_vec())
|
||||
.send(b"'second'\n".to_vec())
|
||||
.await
|
||||
.expect("send second command");
|
||||
writer
|
||||
|
||||
Reference in New Issue
Block a user