Retry Windows ARM full CI archive

Retry nextest archive inside platform workflow
Retry unstable full CI arm platform
2026-06-02 19:31:59 +00:00 · 2026-05-29 11:57:36 -07:00 · 2026-05-29 08:38:50 -07:00 · 2026-05-29 08:36:41 -07:00 · 2026-05-29 08:00:28 -07:00 · 2026-05-29 06:37:00 -07:00
27 changed files with 260 additions and 72 deletions
--- a/.github/workflows/rust-ci-full-nextest-platform.yml
+++ b/.github/workflows/rust-ci-full-nextest-platform.yml
@@ -47,6 +47,14 @@ on:
        required: false
        default: false
        type: boolean
+      retry_archive:
+        required: false
+        default: false
+        type: boolean
+      archive_timeout_minutes:
+        required: false
+        default: 60
+        type: number

 # Caller workflow-level env does not flow through workflow_call, so keep the
 # Cargo git transport hardening on the archive and shard jobs directly here.
@@ -57,7 +65,10 @@ jobs:
  archive:
    name: Build nextest archive
    runs-on: ${{ inputs.archive_runner_group != '' && fromJSON(format('{{"group":"{0}","labels":"{1}"}}', inputs.archive_runner_group, inputs.archive_runner_labels)) || inputs.archive_runner != '' && inputs.archive_runner || inputs.runner_group != '' && fromJSON(format('{{"group":"{0}","labels":"{1}"}}', inputs.runner_group, inputs.runner_labels)) || inputs.runner }}
-    timeout-minutes: 60
+    continue-on-error: ${{ inputs.retry_archive }}
+    timeout-minutes: ${{ inputs.archive_timeout_minutes }}
+    outputs:
+      completed: ${{ steps.archive_completed.outputs.completed }}
    defaults:
      run:
        working-directory: codex-rs
@@ -71,7 +82,7 @@ jobs:
      SCCACHE_CACHE_SIZE: 10G
      NEXTEST_ARCHIVE_FILE: nextest-${{ inputs.artifact_id }}.tar.zst
      TEST_HELPERS_ARTIFACT: nextest-test-helpers-${{ inputs.artifact_id }}
-    steps:
+    steps: &archive_steps
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          persist-credentials: false
@@ -249,6 +260,11 @@ jobs:
          if-no-files-found: error
          retention-days: 1

+      - name: Mark archive complete
+        id: archive_completed
+        shell: bash
+        run: echo "completed=true" >> "$GITHUB_OUTPUT"
+
      - name: Save cargo home cache
        if: always() && !cancelled() && steps.cache_cargo_home_restore.outputs.cache-hit != 'true'
        continue-on-error: true
@@ -286,9 +302,51 @@ jobs:
            echo '```';
          } >> "$GITHUB_STEP_SUMMARY"

+  archive_retry:
+    name: Retry build nextest archive
+    needs: archive
+    if: inputs.retry_archive && always() && needs.archive.outputs.completed != 'true'
+    runs-on: ${{ inputs.archive_runner_group != '' && fromJSON(format('{{"group":"{0}","labels":"{1}"}}', inputs.archive_runner_group, inputs.archive_runner_labels)) || inputs.archive_runner != '' && inputs.archive_runner || inputs.runner_group != '' && fromJSON(format('{{"group":"{0}","labels":"{1}"}}', inputs.runner_group, inputs.runner_labels)) || inputs.runner }}
+    continue-on-error: true
+    timeout-minutes: ${{ inputs.archive_timeout_minutes }}
+    outputs:
+      completed: ${{ steps.archive_completed.outputs.completed }}
+    defaults:
+      run:
+        working-directory: codex-rs
+    env:
+      ARCHIVE_CACHE_RUNNER: ${{ inputs.archive_runner != '' && inputs.archive_runner || inputs.runner }}
+      USE_SCCACHE: ${{ inputs.use_sccache && 'true' || 'false' }}
+      CARGO_INCREMENTAL: "0"
+      SCCACHE_CACHE_SIZE: 10G
+      NEXTEST_ARCHIVE_FILE: nextest-${{ inputs.artifact_id }}.tar.zst
+      TEST_HELPERS_ARTIFACT: nextest-test-helpers-${{ inputs.artifact_id }}
+    steps: *archive_steps
+
+  archive_retry_2:
+    name: Retry build nextest archive 2
+    needs: [archive, archive_retry]
+    if: inputs.retry_archive && always() && needs.archive.outputs.completed != 'true' && needs.archive_retry.outputs.completed != 'true'
+    runs-on: ${{ inputs.archive_runner_group != '' && fromJSON(format('{{"group":"{0}","labels":"{1}"}}', inputs.archive_runner_group, inputs.archive_runner_labels)) || inputs.archive_runner != '' && inputs.archive_runner || inputs.runner_group != '' && fromJSON(format('{{"group":"{0}","labels":"{1}"}}', inputs.runner_group, inputs.runner_labels)) || inputs.runner }}
+    timeout-minutes: ${{ inputs.archive_timeout_minutes }}
+    outputs:
+      completed: ${{ steps.archive_completed.outputs.completed }}
+    defaults:
+      run:
+        working-directory: codex-rs
+    env:
+      ARCHIVE_CACHE_RUNNER: ${{ inputs.archive_runner != '' && inputs.archive_runner || inputs.runner }}
+      USE_SCCACHE: ${{ inputs.use_sccache && 'true' || 'false' }}
+      CARGO_INCREMENTAL: "0"
+      SCCACHE_CACHE_SIZE: 10G
+      NEXTEST_ARCHIVE_FILE: nextest-${{ inputs.artifact_id }}.tar.zst
+      TEST_HELPERS_ARTIFACT: nextest-test-helpers-${{ inputs.artifact_id }}
+    steps: *archive_steps
+
  shard:
    name: Tests shard ${{ matrix.shard }}/4
-    needs: archive
+    needs: [archive, archive_retry, archive_retry_2]
+    if: always() && (needs.archive.outputs.completed == 'true' || needs.archive_retry.outputs.completed == 'true' || needs.archive_retry_2.outputs.completed == 'true')
    runs-on: ${{ inputs.runner_group != '' && fromJSON(format('{{"group":"{0}","labels":"{1}"}}', inputs.runner_group, inputs.runner_labels)) || inputs.runner }}
    timeout-minutes: 60
    defaults:
--- a/.github/workflows/rust-ci-full.yml
+++ b/.github/workflows/rust-ci-full.yml
@@ -494,10 +494,13 @@ jobs:
      runner: ubuntu-24.04-arm
      runner_group: codex-runners
      runner_labels: codex-linux-arm64
+      archive_runner: ubuntu-24.04-arm
      target: aarch64-unknown-linux-gnu
      profile: ci-test
      artifact_id: linux-arm64
      use_sccache: true
+      retry_archive: true
+      archive_timeout_minutes: 120
    secrets: inherit

  tests_windows_x64:
@@ -528,6 +531,7 @@ jobs:
      artifact_id: windows-arm64
      test_threads: 8
      use_sccache: true
+      retry_archive: true
    secrets: inherit

  # --- Gatherer job for the full post-merge workflow --------------------------
--- a/codex-rs/app-server-transport/src/transport/remote_control/tests.rs
+++ b/codex-rs/app-server-transport/src/transport/remote_control/tests.rs
@@ -1544,7 +1544,7 @@ async fn remote_control_waits_for_account_id_before_enrolling() {
    .expect("auth with account id should save");
    auth_manager.reload().await;

-    let enroll_request = timeout(Duration::from_millis(100), accept_http_request(&listener))
+    let enroll_request = timeout(Duration::from_secs(1), accept_http_request(&listener))
        .await
        .expect("auth change should wake remote control before the retry delay");
    assert_eq!(
--- a/codex-rs/cli/src/doctor/thread_inventory.rs
+++ b/codex-rs/cli/src/doctor/thread_inventory.rs
@@ -779,12 +779,15 @@ mod tests {
                .as_deref()
                .is_some_and(|remedy| remedy.starts_with("Restart Codex"))
        }));
-        assert!(
-            check
-                .details
-                .iter()
-                .any(|detail| detail.contains(missing_path.to_string_lossy().as_ref()))
-        );
+        assert!(check.details.iter().any(|detail| {
+            detail.contains(
+                missing_path
+                    .file_name()
+                    .expect("rollout path should have a file name")
+                    .to_string_lossy()
+                    .as_ref(),
+            )
+        }));
    }

    struct Fixture {
--- a/codex-rs/core-skills/src/loader_tests.rs
+++ b/codex-rs/core-skills/src/loader_tests.rs
@@ -1910,6 +1910,9 @@ async fn skill_roots_include_admin_with_lowest_priority() {
    if home_dir().is_some() {
        expected.insert(1, SkillScope::User);
    }
+    if scopes.contains(&SkillScope::Repo) {
+        expected.insert(0, SkillScope::Repo);
+    }
    expected.push(SkillScope::Admin);
    assert_eq!(scopes, expected);
 }
--- a/codex-rs/core/src/session/mod.rs
+++ b/codex-rs/core/src/session/mod.rs
@@ -2506,14 +2506,35 @@ impl Session {
        turn_context: &TurnContext,
        items: &[ResponseItem],
    ) {
-        {
-            let mut state = self.state.lock().await;
-            state.record_items(items.iter(), turn_context.truncation_policy);
-        }
+        self.record_conversation_items_with_history_policy(
+            turn_context,
+            items,
+            turn_context.truncation_policy,
+        )
+        .await;
+    }
+
+    pub(crate) async fn record_conversation_items_with_history_policy(
+        &self,
+        turn_context: &TurnContext,
+        items: &[ResponseItem],
+        history_truncation_policy: TruncationPolicy,
+    ) {
+        self.record_into_history_with_policy(items, history_truncation_policy)
+            .await;
        self.persist_rollout_response_items(items).await;
        self.send_raw_response_items(turn_context, items).await;
    }

+    pub(crate) async fn record_into_history_with_policy(
+        &self,
+        items: &[ResponseItem],
+        history_truncation_policy: TruncationPolicy,
+    ) {
+        let mut state = self.state.lock().await;
+        state.record_items(items.iter(), history_truncation_policy);
+    }
+
    async fn maybe_warn_on_server_model_mismatch(
        self: &Arc<Self>,
        turn_context: &Arc<TurnContext>,
--- a/codex-rs/core/src/session/tests.rs
+++ b/codex-rs/core/src/session/tests.rs
@@ -7851,7 +7851,7 @@ async fn run_user_shell_command_does_not_set_reference_context_item() {
    handlers::run_user_shell_command(&session, "sub-id".to_string(), "echo shell".to_string())
        .await;

-    let deadline = StdDuration::from_secs(15);
+    let deadline = StdDuration::from_secs(30);
    let start = std::time::Instant::now();
    loop {
        let remaining = deadline.saturating_sub(start.elapsed());
--- a/codex-rs/core/src/session/tests/guardian_tests.rs
+++ b/codex-rs/core/src/session/tests/guardian_tests.rs
@@ -426,7 +426,7 @@ async fn strict_auto_review_turn_grant_forces_guardian_for_shell_command_policy_
                    "command": "echo hi",
                    "login": false,
                    "workdir": workdir,
-                    "timeout_ms": 1_000_u64,
+                    "timeout_ms": 10_000_u64,
                })
                .to_string(),
            },
--- a/codex-rs/core/src/session/turn.rs
+++ b/codex-rs/core/src/session/turn.rs
@@ -54,6 +54,7 @@ use crate::tasks::emit_compact_metric;
 use crate::tools::ToolRouter;
 use crate::tools::context::SharedTurnDiffTracker;
 use crate::tools::parallel::ToolCallRuntime;
+use crate::tools::registry::RecordedToolResponse;
 use crate::tools::registry::ToolArgumentDiffConsumer;
 use crate::tools::router::ToolRouterParams;
 use crate::tools::router::extension_tool_executors;
@@ -83,7 +84,6 @@ use codex_protocol::items::build_hook_prompt_message;
 use codex_protocol::models::BaseInstructions;
 use codex_protocol::models::ContentItem;
 use codex_protocol::models::MessagePhase;
-use codex_protocol::models::ResponseInputItem;
 use codex_protocol::models::ResponseItem;
 use codex_protocol::protocol::AgentMessageContentDeltaEvent;
 use codex_protocol::protocol::AgentReasoningSectionBreakEvent;
@@ -1661,16 +1661,22 @@ async fn handle_assistant_item_done_in_plan_mode(
 }

 async fn drain_in_flight(
-    in_flight: &mut FuturesOrdered<BoxFuture<'static, CodexResult<ResponseInputItem>>>,
+    in_flight: &mut FuturesOrdered<BoxFuture<'static, CodexResult<RecordedToolResponse>>>,
    sess: Arc<Session>,
    turn_context: Arc<TurnContext>,
 ) -> CodexResult<()> {
    while let Some(res) = in_flight.next().await {
        match res {
-            Ok(response_input) => {
-                let response_item = response_input.into();
-                sess.record_conversation_items(&turn_context, std::slice::from_ref(&response_item))
-                    .await;
+            Ok(recorded_tool_response) => {
+                let response_item = recorded_tool_response.response_item.into();
+                sess.record_conversation_items_with_history_policy(
+                    &turn_context,
+                    std::slice::from_ref(&response_item),
+                    recorded_tool_response
+                        .history_truncation_policy
+                        .unwrap_or(turn_context.truncation_policy),
+                )
+                .await;
                mark_thread_memory_mode_polluted_if_external_context(
                    sess.as_ref(),
                    turn_context.as_ref(),
@@ -1732,7 +1738,7 @@ async fn try_run_sampling_request(
        .instrument(trace_span!("stream_request"))
        .or_cancel(&cancellation_token)
        .await??;
-    let mut in_flight: FuturesOrdered<BoxFuture<'static, CodexResult<ResponseInputItem>>> =
+    let mut in_flight: FuturesOrdered<BoxFuture<'static, CodexResult<RecordedToolResponse>>> =
        FuturesOrdered::new();
    let mut needs_follow_up = false;
    let mut last_agent_message: Option<String> = None;
--- a/codex-rs/core/src/stream_events_utils.rs
+++ b/codex-rs/core/src/stream_events_utils.rs
@@ -242,7 +242,7 @@ async fn record_stage1_output_usage_for_memory_citation(
 /// queuing any tool execution futures. This records items immediately so
 /// history and rollout stay in sync even if the turn is later cancelled.
 pub(crate) type InFlightFuture<'f> =
-    Pin<Box<dyn Future<Output = Result<ResponseInputItem>> + Send + 'f>>;
+    Pin<Box<dyn Future<Output = Result<crate::tools::registry::RecordedToolResponse>> + Send + 'f>>;

 #[derive(Default)]
 pub(crate) struct OutputItemResult {
--- a/codex-rs/core/src/tools/code_mode/execute_handler.rs
+++ b/codex-rs/core/src/tools/code_mode/execute_handler.rs
@@ -10,6 +10,7 @@ use codex_tools::ToolSpec;

 use super::ExecContext;
 use super::PUBLIC_TOOL_NAME;
+use super::code_mode_history_truncation_policy;
 use super::handle_runtime_response;
 use super::is_exec_tool_name;

@@ -120,4 +121,11 @@ impl CoreToolRuntime for CodeModeExecuteHandler {
    fn matches_kind(&self, payload: &ToolPayload) -> bool {
        matches!(payload, ToolPayload::Custom { .. })
    }
+
+    fn history_truncation_policy(
+        &self,
+        _invocation: &ToolInvocation,
+    ) -> Option<codex_utils_output_truncation::TruncationPolicy> {
+        Some(code_mode_history_truncation_policy())
+    }
 }
--- a/codex-rs/core/src/tools/code_mode/mod.rs
+++ b/codex-rs/core/src/tools/code_mode/mod.rs
@@ -226,8 +226,7 @@ fn truncate_code_mode_result(
    items: Vec<FunctionCallOutputContentItem>,
    max_output_tokens: Option<usize>,
 ) -> Vec<FunctionCallOutputContentItem> {
-    let max_output_tokens = resolve_max_tokens(max_output_tokens);
-    let policy = TruncationPolicy::Tokens(max_output_tokens);
+    let policy = code_mode_output_truncation_policy(max_output_tokens);
    if items
        .iter()
        .all(|item| matches!(item, FunctionCallOutputContentItem::InputText { .. }))
@@ -240,6 +239,16 @@ fn truncate_code_mode_result(
    truncate_function_output_items_with_policy(&items, policy)
 }

+pub(super) fn code_mode_output_truncation_policy(
+    max_output_tokens: Option<usize>,
+) -> TruncationPolicy {
+    TruncationPolicy::Tokens(resolve_max_tokens(max_output_tokens))
+}
+
+pub(super) fn code_mode_history_truncation_policy() -> TruncationPolicy {
+    TruncationPolicy::Bytes(usize::MAX)
+}
+
 async fn call_nested_tool(
    _exec: ExecContext,
    tool_runtime: ToolCallRuntime,
--- a/codex-rs/core/src/tools/code_mode/wait_handler.rs
+++ b/codex-rs/core/src/tools/code_mode/wait_handler.rs
@@ -15,6 +15,7 @@ use codex_tools::ToolSpec;
 use super::DEFAULT_WAIT_YIELD_TIME_MS;
 use super::ExecContext;
 use super::WAIT_TOOL_NAME;
+use super::code_mode_history_truncation_policy;
 use super::handle_runtime_response;
 use super::wait_spec::create_wait_tool;

@@ -114,6 +115,13 @@ impl ToolExecutor<ToolInvocation> for CodeModeWaitHandler {
 }

 impl CoreToolRuntime for CodeModeWaitHandler {
+    fn history_truncation_policy(
+        &self,
+        _invocation: &ToolInvocation,
+    ) -> Option<codex_utils_output_truncation::TruncationPolicy> {
+        Some(code_mode_history_truncation_policy())
+    }
+
    fn pre_tool_use_payload(&self, _invocation: &ToolInvocation) -> Option<PreToolUsePayload> {
        // Code-mode `wait` is runtime control for an existing code cell, not a
        // standalone user action. Tool calls made from code mode still flow
--- a/codex-rs/core/src/tools/parallel.rs
+++ b/codex-rs/core/src/tools/parallel.rs
@@ -20,6 +20,7 @@ use crate::tools::context::SharedTurnDiffTracker;
 use crate::tools::context::ToolPayload;
 use crate::tools::lifecycle::notify_tool_aborted;
 use crate::tools::registry::AnyToolResult;
+use crate::tools::registry::RecordedToolResponse;
 use crate::tools::registry::ToolArgumentDiffConsumer;
 use crate::tools::router::ToolCall;
 use crate::tools::router::ToolCallSource;
@@ -64,13 +65,13 @@ impl ToolCallRuntime {
        self,
        call: ToolCall,
        cancellation_token: CancellationToken,
-    ) -> impl std::future::Future<Output = Result<ResponseInputItem, CodexErr>> {
+    ) -> impl std::future::Future<Output = Result<RecordedToolResponse, CodexErr>> {
        let error_call = call.clone();
        let future =
            self.handle_tool_call_with_source(call, ToolCallSource::Direct, cancellation_token);
        async move {
            match future.await {
-                Ok(response) => Ok(response.into_response()),
+                Ok(response) => Ok(response.into_recorded_response()),
                Err(FunctionCallError::Fatal(message)) => Err(CodexErr::Fatal(message)),
                Err(other) => Ok(Self::failure_response(error_call, other)),
            }
@@ -183,9 +184,9 @@ impl ToolCallRuntime {
        FunctionCallError::Fatal(format!("tool task failed to receive: {err:?}"))
    }

-    fn failure_response(call: ToolCall, err: FunctionCallError) -> ResponseInputItem {
+    fn failure_response(call: ToolCall, err: FunctionCallError) -> RecordedToolResponse {
        let message = err.to_string();
-        match call.payload {
+        let response_item = match call.payload {
            ToolPayload::ToolSearch { .. } => ResponseInputItem::ToolSearchOutput {
                call_id: call.call_id,
                status: "completed".to_string(),
@@ -207,6 +208,10 @@ impl ToolCallRuntime {
                    success: Some(false),
                },
            },
+        };
+        RecordedToolResponse {
+            response_item,
+            history_truncation_policy: None,
        }
    }

@@ -218,6 +223,7 @@ impl ToolCallRuntime {
                message: Self::abort_message(call, secs),
            }),
            post_tool_use_payload: None,
+            history_truncation_policy: None,
        }
    }

@@ -456,7 +462,7 @@ mod tests {
                success: Some(true),
            },
        };
-        assert_eq!(expected_response, response);
+        assert_eq!(expected_response, response.response_item);

        let actual = records
            .lock()
@@ -521,7 +527,7 @@ mod tests {
            .await
            .expect("timed out waiting for tool response")
            .expect("tool response task should join")?;
-        let ResponseInputItem::FunctionCallOutput { output, .. } = response else {
+        let ResponseInputItem::FunctionCallOutput { output, .. } = response.response_item else {
            anyhow::bail!("cancelled tool should return function output");
        };
        let FunctionCallOutputBody::Text(text) = output.body else {
--- a/codex-rs/core/src/tools/registry.rs
+++ b/codex-rs/core/src/tools/registry.rs
@@ -32,6 +32,7 @@ use codex_protocol::models::ResponseInputItem;
 use codex_protocol::protocol::EventMsg;
 use codex_tools::ToolName;
 use codex_tools::ToolSpec;
+use codex_utils_output_truncation::TruncationPolicy;
 use futures::future::BoxFuture;
 use serde_json::Value;
 use tracing::warn;
@@ -104,6 +105,10 @@ pub(crate) trait CoreToolRuntime: ToolExecutor<ToolInvocation> {
        })
    }

+    fn history_truncation_policy(&self, _invocation: &ToolInvocation) -> Option<TruncationPolicy> {
+        None
+    }
+
    fn pre_tool_use_payload(&self, invocation: &ToolInvocation) -> Option<PreToolUsePayload> {
        let ToolPayload::Function { arguments } = &invocation.payload else {
            return None;
@@ -166,9 +171,16 @@ pub(crate) struct AnyToolResult {
    pub(crate) payload: ToolPayload,
    pub(crate) result: Box<dyn ToolOutput>,
    pub(crate) post_tool_use_payload: Option<PostToolUsePayload>,
+    pub(crate) history_truncation_policy: Option<TruncationPolicy>,
+}
+
+pub(crate) struct RecordedToolResponse {
+    pub(crate) response_item: ResponseInputItem,
+    pub(crate) history_truncation_policy: Option<TruncationPolicy>,
 }

 impl AnyToolResult {
+    #[cfg(test)]
    pub(crate) fn into_response(self) -> ResponseInputItem {
        let Self {
            call_id,
@@ -179,6 +191,20 @@ impl AnyToolResult {
        result.to_response_item(&call_id, &payload)
    }

+    pub(crate) fn into_recorded_response(self) -> RecordedToolResponse {
+        let Self {
+            call_id,
+            payload,
+            result,
+            history_truncation_policy,
+            ..
+        } = self;
+        RecordedToolResponse {
+            response_item: result.to_response_item(&call_id, &payload),
+            history_truncation_policy,
+        }
+    }
+
    pub(crate) fn code_mode_result(self) -> serde_json::Value {
        let Self {
            payload, result, ..
@@ -306,6 +332,10 @@ impl CoreToolRuntime for ExposureOverride {
        self.handler.post_tool_use_payload(invocation, result)
    }

+    fn history_truncation_policy(&self, invocation: &ToolInvocation) -> Option<TruncationPolicy> {
+        self.handler.history_truncation_policy(invocation)
+    }
+
    fn with_updated_hook_input(
        &self,
        invocation: ToolInvocation,
@@ -716,11 +746,13 @@ async fn handle_any_tool(
    let output = tool.handle(invocation.clone()).await?;
    let post_tool_use_payload =
        CoreToolRuntime::post_tool_use_payload(tool, &invocation, output.as_ref());
+    let history_truncation_policy = CoreToolRuntime::history_truncation_policy(tool, &invocation);
    Ok(AnyToolResult {
        call_id,
        payload,
        result: output,
        post_tool_use_payload,
+        history_truncation_policy,
    })
 }

--- a/codex-rs/core/src/tools/registry_tests.rs
+++ b/codex-rs/core/src/tools/registry_tests.rs
@@ -329,6 +329,7 @@ fn post_tool_use_feedback_output_keeps_code_mode_result_typed() {
            ),
        }),
        post_tool_use_payload: None,
+        history_truncation_policy: None,
    };

    assert_eq!(
@@ -356,6 +357,7 @@ fn post_tool_use_feedback_output_keeps_code_mode_result_typed() {
            ),
        }),
        post_tool_use_payload: None,
+        history_truncation_policy: None,
    };

    assert_eq!(
--- a/codex-rs/core/src/tools/spec_plan.rs
+++ b/codex-rs/core/src/tools/spec_plan.rs
@@ -820,11 +820,10 @@ fn prepend_code_mode_executors(
    planned_tools: &mut PlannedTools,
 ) {
    let turn_context = context.turn_context;
-    let deferred_tools_available = search_tool_enabled(turn_context)
-        && planned_tools
-            .runtimes()
-            .iter()
-            .any(|executor| executor.exposure() == ToolExposure::Deferred);
+    let deferred_tools_available = planned_tools
+        .runtimes()
+        .iter()
+        .any(|executor| executor.exposure() == ToolExposure::Deferred);
    let code_mode_executors = build_code_mode_executors(
        turn_context,
        planned_tools.runtimes(),
--- a/codex-rs/core/tests/suite/code_mode.rs
+++ b/codex-rs/core/tests/suite/code_mode.rs
@@ -3038,8 +3038,8 @@ async fn code_mode_can_call_hidden_dynamic_tools() -> Result<()> {
    test.session_configured = new_thread.session_configured;

    let code = r#"
-const tool = ALL_TOOLS.find(({ name }) => name === "codex_app_hidden_dynamic_tool");
-const out = await tools.codex_app_hidden_dynamic_tool({ city: "Paris" });
+const tool = ALL_TOOLS.find(({ name }) => name === "codex_app__hidden_dynamic_tool");
+const out = await tools.codex_app__hidden_dynamic_tool({ city: "Paris" });
 text(
  JSON.stringify({
    name: tool?.name ?? null,
@@ -3144,7 +3144,7 @@ text(
    )?;
    assert_eq!(
        parsed.get("name"),
-        Some(&Value::String("codex_app_hidden_dynamic_tool".to_string()))
+        Some(&Value::String("codex_app__hidden_dynamic_tool".to_string()))
    );
    assert_eq!(
        parsed.get("out"),
@@ -3157,7 +3157,7 @@ text(
            .is_some_and(|description| {
                description.contains("A hidden dynamic tool.")
                    && description.contains("declare const tools:")
-                    && description.contains("codex_app_hidden_dynamic_tool(args:")
+                    && description.contains("codex_app__hidden_dynamic_tool(args:")
            })
    );

--- a/codex-rs/core/tests/suite/compact_remote_parity.rs
+++ b/codex-rs/core/tests/suite/compact_remote_parity.rs
@@ -934,6 +934,10 @@ fn normalize_string(value: &str) -> String {
        return "<UUID>".to_string();
    }

+    if value.starts_with("<skills_instructions>\n") && value.ends_with("\n</skills_instructions>") {
+        return "<skills_instructions>\n...\n</skills_instructions>".to_string();
+    }
+
    let mut text = value.to_string();
    normalize_tmp_prefix_before_marker(&mut text, "/skills/");
    normalize_tmp_prefix_before_marker(&mut text, "\\skills\\");
@@ -1029,6 +1033,15 @@ fn normalize_string_rewrites_windows_temp_skill_paths() {
    );
 }

+#[test]
+fn normalize_string_rewrites_skills_instructions_body() {
+    let text = normalize_string(
+        "<skills_instructions>\n## Skills\n- imagegen: ...\n</skills_instructions>",
+    );
+
+    assert_eq!(text, "<skills_instructions>\n...\n</skills_instructions>");
+}
+
 #[test]
 fn normalize_string_rewrites_shell_wall_times() {
    let text = normalize_string(
--- a/codex-rs/core/tests/suite/rmcp_client.rs
+++ b/codex-rs/core/tests/suite/rmcp_client.rs
@@ -348,9 +348,15 @@ fn stdio_transport_with_cwd(
 fn insert_mcp_server(
    config: &mut Config,
    server_name: &str,
-    transport: McpServerTransportConfig,
+    mut transport: McpServerTransportConfig,
    options: TestMcpServerOptions,
 ) {
+    if options.environment_id != codex_config::DEFAULT_MCP_SERVER_ENVIRONMENT_ID
+        && let McpServerTransportConfig::Stdio { cwd, .. } = &mut transport
+        && cwd.is_none()
+    {
+        *cwd = Some(config.cwd.to_path_buf());
+    }
    let mut servers = config.mcp_servers.get().clone();
    servers.insert(
        server_name.to_string(),
--- a/codex-rs/core/tests/suite/unified_exec.rs
+++ b/codex-rs/core/tests/suite/unified_exec.rs
@@ -929,7 +929,7 @@ allow_local_binding = true
                .set_permission_profile(permission_profile_for_config)
                .expect("set permission profile");
        });
-    let test = builder.build_with_remote_env(server).await?;
+    let test = builder.build(server).await?;
    assert!(
        test.config.permissions.network.is_some(),
        "expected managed network proxy config to be present"
--- a/codex-rs/exec/tests/suite/resume.rs
+++ b/codex-rs/exec/tests/suite/resume.rs
@@ -239,7 +239,7 @@ async fn exec_resume_last_respects_cwd_filter_and_all_flag() -> anyhow::Result<(

    let test = test_codex_exec();
    let server = MockServer::start().await;
-    let _response_mock = mount_exec_responses(&server, /*count*/ 5).await;
+    let _response_mock = mount_exec_responses(&server, /*count*/ 4).await;

    let dir_a = TempDir::new()?;
    let dir_b = TempDir::new()?;
@@ -254,6 +254,10 @@ async fn exec_resume_last_respects_cwd_filter_and_all_flag() -> anyhow::Result<(
        .assert()
        .success();

+    // `updated_at` is second-granularity, so ensure thread B is created in a later
+    // second than thread A and is deterministically newest before `resume --last --all`.
+    std::thread::sleep(std::time::Duration::from_millis(1100));
+
    let marker_b = format!("resume-cwd-b-{}", Uuid::new_v4());
    let prompt_b = format!("echo {marker_b}");
    test.cmd_with_server(&server)
@@ -270,24 +274,6 @@ async fn exec_resume_last_respects_cwd_filter_and_all_flag() -> anyhow::Result<(
    let path_b = find_session_file_containing_marker(&sessions_dir, &marker_b)
        .expect("no session file found for marker_b");

-    // `updated_at` is second-granularity, so ensure the touch lands in a later second
-    // than the initial session creation on fast CI (especially Windows).
-    std::thread::sleep(std::time::Duration::from_millis(1100));
-
-    // Make thread B deterministically newest according to rollout metadata.
-    let session_id_b = extract_conversation_id(&path_b);
-    let marker_b_touch = format!("resume-cwd-b-touch-{}", Uuid::new_v4());
-    let prompt_b_touch = format!("echo {marker_b_touch}");
-    test.cmd_with_server(&server)
-        .arg("--skip-git-repo-check")
-        .arg("-C")
-        .arg(dir_b.path())
-        .arg("resume")
-        .arg(&session_id_b)
-        .arg(&prompt_b_touch)
-        .assert()
-        .success();
-
    // `resume --last` sorts by `updated_at`, which is second-granularity. Sleep so
    // the upcoming `resume --last --all` write lands in a later second and becomes
    // deterministically newest (instead of tying and falling back to UUID order).
--- a/codex-rs/rmcp-client/src/bin/test_streamable_http_server.rs
+++ b/codex-rs/rmcp-client/src/bin/test_streamable_http_server.rs
@@ -158,7 +158,9 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
            StreamableHttpService::new(
                || Ok(TestToolServer::new()),
                Arc::new(LocalSessionManager::default()),
-                StreamableHttpServerConfig::default(),
+                // Full CI can run this private test server in a remote container
+                // reached through its dynamic container IP instead of loopback.
+                StreamableHttpServerConfig::default().disable_allowed_hosts(),
            ),
        )
        .layer(middleware::from_fn_with_state(
--- a/codex-rs/secrets/src/lib.rs
+++ b/codex-rs/secrets/src/lib.rs
@@ -188,7 +188,8 @@ mod tests {

    #[test]
    fn environment_id_fallback_has_cwd_prefix() {
-        let dir = tempfile::tempdir().expect("tempdir");
+        let home = std::env::var_os("HOME").expect("HOME should be set for tests");
+        let dir = tempfile::tempdir_in(home).expect("tempdir outside repo");
        let env_id = environment_id_from_cwd(dir.path());
        let canonical = dir
            .path()
--- a/codex-rs/tui/src/chatwidget/tests/status_surface_previews.rs
+++ b/codex-rs/tui/src/chatwidget/tests/status_surface_previews.rs
@@ -61,6 +61,13 @@ fn cache_project_root(chat: &mut ChatWidget, root_name: &str) {
    });
 }

+fn cache_missing_project_root(chat: &mut ChatWidget) {
+    chat.status_line_project_root_name_cache = Some(CachedProjectRootName {
+        cwd: chat.config.cwd.to_path_buf(),
+        root_name: None,
+    });
+}
+
 fn cache_rate_limit_snapshot(chat: &mut ChatWidget) {
    chat.on_rate_limit_snapshot(Some(RateLimitSnapshot {
        limit_id: None,
@@ -182,6 +189,7 @@ async fn status_line_setup_popup_hardcoded_only_snapshot() {
 #[tokio::test]
 async fn status_surface_preview_lines_mixed_snapshot() {
    let (mut chat, _rx, _op_rx) = make_chatwidget_manual(/*model_override*/ None).await;
+    cache_missing_project_root(&mut chat);
    chat.status_line_branch = Some("feature/mixed-preview".to_string());
    chat.thread_name = Some("Mixed preview thread".to_string());

@@ -278,6 +286,7 @@ async fn status_line_setup_popup_rate_limits_snapshot() {
 #[tokio::test]
 async fn status_line_setup_popup_mixed_snapshot() {
    let (mut chat, _rx, _op_rx) = make_chatwidget_manual(/*model_override*/ None).await;
+    cache_missing_project_root(&mut chat);
    chat.status_line_branch = Some("feature/mixed-preview".to_string());
    chat.thread_name = Some("Mixed preview thread".to_string());
    chat.config.tui_status_line = Some(vec![
@@ -330,6 +339,7 @@ async fn terminal_title_setup_popup_hardcoded_only_snapshot() {
 #[tokio::test]
 async fn terminal_title_setup_popup_mixed_snapshot() {
    let (mut chat, _rx, _op_rx) = make_chatwidget_manual(/*model_override*/ None).await;
+    cache_missing_project_root(&mut chat);
    chat.thread_name = Some("Mixed preview thread".to_string());
    chat.config.tui_terminal_title = Some(vec![
        "project-name".to_string(),
@@ -361,6 +371,7 @@ async fn terminal_title_setup_popup_rate_limits_snapshot() {
 #[tokio::test]
 async fn missing_project_root_uses_different_status_and_title_preview_sources() {
    let (mut chat, _rx, _op_rx) = make_chatwidget_manual(/*model_override*/ None).await;
+    cache_missing_project_root(&mut chat);

    let status_preview = status_preview_line(&mut chat, &[StatusLineItem::ProjectRoot]);
    let title_preview = title_preview_line(&mut chat, &[TerminalTitleItem::Project]);
--- a/codex-rs/tui/src/chatwidget/tests/terminal_title.rs
+++ b/codex-rs/tui/src/chatwidget/tests/terminal_title.rs
@@ -3,9 +3,17 @@
 use super::*;
 use pretty_assertions::assert_eq;

+fn cache_missing_project_root(chat: &mut ChatWidget) {
+    chat.status_line_project_root_name_cache = Some(CachedProjectRootName {
+        cwd: chat.config.cwd.to_path_buf(),
+        root_name: None,
+    });
+}
+
 #[tokio::test]
 async fn terminal_title_shows_action_required_while_exec_approval_is_pending() {
    let (mut chat, _rx, _op_rx) = make_chatwidget_manual(/*model_override*/ None).await;
+    cache_missing_project_root(&mut chat);
    chat.bottom_pane.set_task_running(/*running*/ true);
    chat.refresh_terminal_title();

@@ -14,7 +22,7 @@ async fn terminal_title_shows_action_required_while_exec_approval_is_pending() {
        approval_id: Some("call-action-required".into()),
        turn_id: "turn-action-required".into(),
        command: vec!["bash".into(), "-lc".into(), "echo hello".into()],
-        cwd: AbsolutePathBuf::current_dir().expect("current dir"),
+        cwd: test_project_path().abs(),
        reason: Some("need confirmation".into()),
        network_approval_context: None,
        proposed_execpolicy_amendment: None,
@@ -56,7 +64,7 @@ async fn terminal_title_action_required_respects_spinner_setting() {
        approval_id: Some("call-no-spinner".into()),
        turn_id: "turn-no-spinner".into(),
        command: vec!["bash".into(), "-lc".into(), "echo hello".into()],
-        cwd: AbsolutePathBuf::current_dir().expect("current dir"),
+        cwd: test_project_path().abs(),
        reason: Some("need confirmation".into()),
        network_approval_context: None,
        proposed_execpolicy_amendment: None,
@@ -75,6 +83,7 @@ async fn terminal_title_action_required_respects_spinner_setting() {
 #[tokio::test]
 async fn terminal_title_action_required_blinks_when_animations_are_enabled() {
    let (mut chat, _rx, _op_rx) = make_chatwidget_manual(/*model_override*/ None).await;
+    cache_missing_project_root(&mut chat);
    chat.bottom_pane.set_task_running(/*running*/ true);
    chat.terminal_title_animation_origin = Instant::now() - std::time::Duration::from_millis(1500);
    chat.refresh_terminal_title();
@@ -84,7 +93,7 @@ async fn terminal_title_action_required_blinks_when_animations_are_enabled() {
        approval_id: Some("call-blink".into()),
        turn_id: "turn-blink".into(),
        command: vec!["bash".into(), "-lc".into(), "echo hello".into()],
-        cwd: AbsolutePathBuf::current_dir().expect("current dir"),
+        cwd: test_project_path().abs(),
        reason: Some("need confirmation".into()),
        network_approval_context: None,
        proposed_execpolicy_amendment: None,
@@ -106,6 +115,7 @@ async fn terminal_title_action_required_blinks_when_animations_are_enabled() {
 #[tokio::test]
 async fn terminal_title_activity_indicators_do_not_animate_when_animations_are_disabled() {
    let (mut chat, _rx, _op_rx) = make_chatwidget_manual(/*model_override*/ None).await;
+    cache_missing_project_root(&mut chat);
    chat.config.animations = false;
    chat.bottom_pane.set_task_running(/*running*/ true);
    chat.terminal_title_animation_origin = Instant::now() - std::time::Duration::from_millis(1500);
@@ -119,7 +129,7 @@ async fn terminal_title_activity_indicators_do_not_animate_when_animations_are_d
        approval_id: Some("call-no-animations".into()),
        turn_id: "turn-no-animations".into(),
        command: vec!["bash".into(), "-lc".into(), "echo hello".into()],
-        cwd: AbsolutePathBuf::current_dir().expect("current dir"),
+        cwd: test_project_path().abs(),
        reason: Some("need confirmation".into()),
        network_approval_context: None,
        proposed_execpolicy_amendment: None,
--- a/codex-rs/windows-sandbox-rs/src/unified_exec/tests.rs
+++ b/codex-rs/windows-sandbox-rs/src/unified_exec/tests.rs
@@ -247,7 +247,7 @@ fn legacy_non_tty_powershell_emits_output() {
                pwsh.display().to_string(),
                "-NoProfile".to_string(),
                "-Command".to_string(),
-                "Write-Output LEGACY-NONTTY-DIRECT".to_string(),
+                "'LEGACY-NONTTY-DIRECT'".to_string(),
            ],
            cwd.as_path(),
            HashMap::new(),
@@ -433,7 +433,7 @@ fn legacy_capture_powershell_emits_output() {
            pwsh.display().to_string(),
            "-NoProfile".to_string(),
            "-Command".to_string(),
-            "Write-Output LEGACY-CAPTURE-DIRECT".to_string(),
+            "'LEGACY-CAPTURE-DIRECT'".to_string(),
        ],
        cwd.as_path(),
        HashMap::new(),
@@ -527,7 +527,7 @@ fn legacy_tty_powershell_emits_output_and_accepts_input() {
                "-NoProfile".to_string(),
                "-NoExit".to_string(),
                "-Command".to_string(),
-                "$PID; Write-Output ready".to_string(),
+                "$PID; 'ready'".to_string(),
            ],
            cwd.as_path(),
            HashMap::new(),
@@ -544,7 +544,7 @@ fn legacy_tty_powershell_emits_output_and_accepts_input() {

        let writer = spawned.session.writer_sender();
        writer
-            .send(b"Write-Output second\n".to_vec())
+            .send(b"'second'\n".to_vec())
            .await
            .expect("send second command");
        writer
Author	SHA1	Message	Date
starr-openai	663100723b	Retry Windows ARM full CI archive	2026-05-29 11:57:36 -07:00
starr-openai	765f6639e2	Retry nextest archive inside platform workflow	2026-05-29 08:38:50 -07:00
starr-openai	c3245411dc	Retry unstable full CI arm platform	2026-05-29 08:36:41 -07:00
starr-openai	715f8eb1fd	Retry full CI after arm64 runner shutdown	2026-05-29 08:00:28 -07:00
starr-openai	830d3869c9	Pin terminal title project root tests	2026-05-29 06:37:00 -07:00
starr-openai	750e90633c	Stabilize remote full CI test fixtures	2026-05-29 05:55:33 -07:00
starr-openai	cc85a4a2e9	Fix hidden dynamic code mode test name	2026-05-29 01:36:06 -07:00
starr-openai	1658bb5da9	Fix code mode history test plumbing	2026-05-29 01:18:09 -07:00
starr-openai	2b67214208	Fix code mode history lint	2026-05-29 01:10:27 -07:00
starr-openai	457ec1f813	Preserve deferred code mode guidance	2026-05-29 01:01:13 -07:00
starr-openai	4fab9541eb	Stabilize persistent full-ci test failures	2026-05-29 01:00:18 -07:00
starr-openai	560e339d86	Preserve rendered code mode history	2026-05-29 00:59:40 -07:00
starr-openai	a4bb775c0e	Preserve code mode output history budget	2026-05-29 00:59:39 -07:00
starr-openai	5a0a64c3fb	test: keep unified exec network denial local	2026-05-29 00:59:05 -07:00
starr-openai	432bd259c8	Stabilize compact remote parity skills normalization	2026-05-29 00:59:05 -07:00
starr-openai	a1c86fcdad	Harden Windows PowerShell output tests	2026-05-29 00:59:05 -07:00