Compare commits

...

16 Commits

Author SHA1 Message Date
starr-openai
663100723b Retry Windows ARM full CI archive 2026-05-29 11:57:36 -07:00
starr-openai
765f6639e2 Retry nextest archive inside platform workflow 2026-05-29 08:38:50 -07:00
starr-openai
c3245411dc Retry unstable full CI arm platform 2026-05-29 08:36:41 -07:00
starr-openai
715f8eb1fd Retry full CI after arm64 runner shutdown 2026-05-29 08:00:28 -07:00
starr-openai
830d3869c9 Pin terminal title project root tests 2026-05-29 06:37:00 -07:00
starr-openai
750e90633c Stabilize remote full CI test fixtures 2026-05-29 05:55:33 -07:00
starr-openai
cc85a4a2e9 Fix hidden dynamic code mode test name 2026-05-29 01:36:06 -07:00
starr-openai
1658bb5da9 Fix code mode history test plumbing 2026-05-29 01:18:09 -07:00
starr-openai
2b67214208 Fix code mode history lint 2026-05-29 01:10:27 -07:00
starr-openai
457ec1f813 Preserve deferred code mode guidance 2026-05-29 01:01:13 -07:00
starr-openai
4fab9541eb Stabilize persistent full-ci test failures 2026-05-29 01:00:18 -07:00
starr-openai
560e339d86 Preserve rendered code mode history 2026-05-29 00:59:40 -07:00
starr-openai
a4bb775c0e Preserve code mode output history budget 2026-05-29 00:59:39 -07:00
starr-openai
5a0a64c3fb test: keep unified exec network denial local 2026-05-29 00:59:05 -07:00
starr-openai
432bd259c8 Stabilize compact remote parity skills normalization 2026-05-29 00:59:05 -07:00
starr-openai
a1c86fcdad Harden Windows PowerShell output tests 2026-05-29 00:59:05 -07:00
27 changed files with 260 additions and 72 deletions

View File

@@ -47,6 +47,14 @@ on:
required: false
default: false
type: boolean
retry_archive:
required: false
default: false
type: boolean
archive_timeout_minutes:
required: false
default: 60
type: number
# Caller workflow-level env does not flow through workflow_call, so keep the
# Cargo git transport hardening on the archive and shard jobs directly here.
@@ -57,7 +65,10 @@ jobs:
archive:
name: Build nextest archive
runs-on: ${{ inputs.archive_runner_group != '' && fromJSON(format('{{"group":"{0}","labels":"{1}"}}', inputs.archive_runner_group, inputs.archive_runner_labels)) || inputs.archive_runner != '' && inputs.archive_runner || inputs.runner_group != '' && fromJSON(format('{{"group":"{0}","labels":"{1}"}}', inputs.runner_group, inputs.runner_labels)) || inputs.runner }}
timeout-minutes: 60
continue-on-error: ${{ inputs.retry_archive }}
timeout-minutes: ${{ inputs.archive_timeout_minutes }}
outputs:
completed: ${{ steps.archive_completed.outputs.completed }}
defaults:
run:
working-directory: codex-rs
@@ -71,7 +82,7 @@ jobs:
SCCACHE_CACHE_SIZE: 10G
NEXTEST_ARCHIVE_FILE: nextest-${{ inputs.artifact_id }}.tar.zst
TEST_HELPERS_ARTIFACT: nextest-test-helpers-${{ inputs.artifact_id }}
steps:
steps: &archive_steps
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
@@ -249,6 +260,11 @@ jobs:
if-no-files-found: error
retention-days: 1
- name: Mark archive complete
id: archive_completed
shell: bash
run: echo "completed=true" >> "$GITHUB_OUTPUT"
- name: Save cargo home cache
if: always() && !cancelled() && steps.cache_cargo_home_restore.outputs.cache-hit != 'true'
continue-on-error: true
@@ -286,9 +302,51 @@ jobs:
echo '```';
} >> "$GITHUB_STEP_SUMMARY"
archive_retry:
name: Retry build nextest archive
needs: archive
if: inputs.retry_archive && always() && needs.archive.outputs.completed != 'true'
runs-on: ${{ inputs.archive_runner_group != '' && fromJSON(format('{{"group":"{0}","labels":"{1}"}}', inputs.archive_runner_group, inputs.archive_runner_labels)) || inputs.archive_runner != '' && inputs.archive_runner || inputs.runner_group != '' && fromJSON(format('{{"group":"{0}","labels":"{1}"}}', inputs.runner_group, inputs.runner_labels)) || inputs.runner }}
continue-on-error: true
timeout-minutes: ${{ inputs.archive_timeout_minutes }}
outputs:
completed: ${{ steps.archive_completed.outputs.completed }}
defaults:
run:
working-directory: codex-rs
env:
ARCHIVE_CACHE_RUNNER: ${{ inputs.archive_runner != '' && inputs.archive_runner || inputs.runner }}
USE_SCCACHE: ${{ inputs.use_sccache && 'true' || 'false' }}
CARGO_INCREMENTAL: "0"
SCCACHE_CACHE_SIZE: 10G
NEXTEST_ARCHIVE_FILE: nextest-${{ inputs.artifact_id }}.tar.zst
TEST_HELPERS_ARTIFACT: nextest-test-helpers-${{ inputs.artifact_id }}
steps: *archive_steps
archive_retry_2:
name: Retry build nextest archive 2
needs: [archive, archive_retry]
if: inputs.retry_archive && always() && needs.archive.outputs.completed != 'true' && needs.archive_retry.outputs.completed != 'true'
runs-on: ${{ inputs.archive_runner_group != '' && fromJSON(format('{{"group":"{0}","labels":"{1}"}}', inputs.archive_runner_group, inputs.archive_runner_labels)) || inputs.archive_runner != '' && inputs.archive_runner || inputs.runner_group != '' && fromJSON(format('{{"group":"{0}","labels":"{1}"}}', inputs.runner_group, inputs.runner_labels)) || inputs.runner }}
timeout-minutes: ${{ inputs.archive_timeout_minutes }}
outputs:
completed: ${{ steps.archive_completed.outputs.completed }}
defaults:
run:
working-directory: codex-rs
env:
ARCHIVE_CACHE_RUNNER: ${{ inputs.archive_runner != '' && inputs.archive_runner || inputs.runner }}
USE_SCCACHE: ${{ inputs.use_sccache && 'true' || 'false' }}
CARGO_INCREMENTAL: "0"
SCCACHE_CACHE_SIZE: 10G
NEXTEST_ARCHIVE_FILE: nextest-${{ inputs.artifact_id }}.tar.zst
TEST_HELPERS_ARTIFACT: nextest-test-helpers-${{ inputs.artifact_id }}
steps: *archive_steps
shard:
name: Tests shard ${{ matrix.shard }}/4
needs: archive
needs: [archive, archive_retry, archive_retry_2]
if: always() && (needs.archive.outputs.completed == 'true' || needs.archive_retry.outputs.completed == 'true' || needs.archive_retry_2.outputs.completed == 'true')
runs-on: ${{ inputs.runner_group != '' && fromJSON(format('{{"group":"{0}","labels":"{1}"}}', inputs.runner_group, inputs.runner_labels)) || inputs.runner }}
timeout-minutes: 60
defaults:

View File

@@ -494,10 +494,13 @@ jobs:
runner: ubuntu-24.04-arm
runner_group: codex-runners
runner_labels: codex-linux-arm64
archive_runner: ubuntu-24.04-arm
target: aarch64-unknown-linux-gnu
profile: ci-test
artifact_id: linux-arm64
use_sccache: true
retry_archive: true
archive_timeout_minutes: 120
secrets: inherit
tests_windows_x64:
@@ -528,6 +531,7 @@ jobs:
artifact_id: windows-arm64
test_threads: 8
use_sccache: true
retry_archive: true
secrets: inherit
# --- Gatherer job for the full post-merge workflow --------------------------

View File

@@ -1544,7 +1544,7 @@ async fn remote_control_waits_for_account_id_before_enrolling() {
.expect("auth with account id should save");
auth_manager.reload().await;
let enroll_request = timeout(Duration::from_millis(100), accept_http_request(&listener))
let enroll_request = timeout(Duration::from_secs(1), accept_http_request(&listener))
.await
.expect("auth change should wake remote control before the retry delay");
assert_eq!(

View File

@@ -779,12 +779,15 @@ mod tests {
.as_deref()
.is_some_and(|remedy| remedy.starts_with("Restart Codex"))
}));
assert!(
check
.details
.iter()
.any(|detail| detail.contains(missing_path.to_string_lossy().as_ref()))
);
assert!(check.details.iter().any(|detail| {
detail.contains(
missing_path
.file_name()
.expect("rollout path should have a file name")
.to_string_lossy()
.as_ref(),
)
}));
}
struct Fixture {

View File

@@ -1910,6 +1910,9 @@ async fn skill_roots_include_admin_with_lowest_priority() {
if home_dir().is_some() {
expected.insert(1, SkillScope::User);
}
if scopes.contains(&SkillScope::Repo) {
expected.insert(0, SkillScope::Repo);
}
expected.push(SkillScope::Admin);
assert_eq!(scopes, expected);
}

View File

@@ -2506,14 +2506,35 @@ impl Session {
turn_context: &TurnContext,
items: &[ResponseItem],
) {
{
let mut state = self.state.lock().await;
state.record_items(items.iter(), turn_context.truncation_policy);
}
self.record_conversation_items_with_history_policy(
turn_context,
items,
turn_context.truncation_policy,
)
.await;
}
pub(crate) async fn record_conversation_items_with_history_policy(
&self,
turn_context: &TurnContext,
items: &[ResponseItem],
history_truncation_policy: TruncationPolicy,
) {
self.record_into_history_with_policy(items, history_truncation_policy)
.await;
self.persist_rollout_response_items(items).await;
self.send_raw_response_items(turn_context, items).await;
}
pub(crate) async fn record_into_history_with_policy(
&self,
items: &[ResponseItem],
history_truncation_policy: TruncationPolicy,
) {
let mut state = self.state.lock().await;
state.record_items(items.iter(), history_truncation_policy);
}
async fn maybe_warn_on_server_model_mismatch(
self: &Arc<Self>,
turn_context: &Arc<TurnContext>,

View File

@@ -7851,7 +7851,7 @@ async fn run_user_shell_command_does_not_set_reference_context_item() {
handlers::run_user_shell_command(&session, "sub-id".to_string(), "echo shell".to_string())
.await;
let deadline = StdDuration::from_secs(15);
let deadline = StdDuration::from_secs(30);
let start = std::time::Instant::now();
loop {
let remaining = deadline.saturating_sub(start.elapsed());

View File

@@ -426,7 +426,7 @@ async fn strict_auto_review_turn_grant_forces_guardian_for_shell_command_policy_
"command": "echo hi",
"login": false,
"workdir": workdir,
"timeout_ms": 1_000_u64,
"timeout_ms": 10_000_u64,
})
.to_string(),
},

View File

@@ -54,6 +54,7 @@ use crate::tasks::emit_compact_metric;
use crate::tools::ToolRouter;
use crate::tools::context::SharedTurnDiffTracker;
use crate::tools::parallel::ToolCallRuntime;
use crate::tools::registry::RecordedToolResponse;
use crate::tools::registry::ToolArgumentDiffConsumer;
use crate::tools::router::ToolRouterParams;
use crate::tools::router::extension_tool_executors;
@@ -83,7 +84,6 @@ use codex_protocol::items::build_hook_prompt_message;
use codex_protocol::models::BaseInstructions;
use codex_protocol::models::ContentItem;
use codex_protocol::models::MessagePhase;
use codex_protocol::models::ResponseInputItem;
use codex_protocol::models::ResponseItem;
use codex_protocol::protocol::AgentMessageContentDeltaEvent;
use codex_protocol::protocol::AgentReasoningSectionBreakEvent;
@@ -1661,16 +1661,22 @@ async fn handle_assistant_item_done_in_plan_mode(
}
async fn drain_in_flight(
in_flight: &mut FuturesOrdered<BoxFuture<'static, CodexResult<ResponseInputItem>>>,
in_flight: &mut FuturesOrdered<BoxFuture<'static, CodexResult<RecordedToolResponse>>>,
sess: Arc<Session>,
turn_context: Arc<TurnContext>,
) -> CodexResult<()> {
while let Some(res) = in_flight.next().await {
match res {
Ok(response_input) => {
let response_item = response_input.into();
sess.record_conversation_items(&turn_context, std::slice::from_ref(&response_item))
.await;
Ok(recorded_tool_response) => {
let response_item = recorded_tool_response.response_item.into();
sess.record_conversation_items_with_history_policy(
&turn_context,
std::slice::from_ref(&response_item),
recorded_tool_response
.history_truncation_policy
.unwrap_or(turn_context.truncation_policy),
)
.await;
mark_thread_memory_mode_polluted_if_external_context(
sess.as_ref(),
turn_context.as_ref(),
@@ -1732,7 +1738,7 @@ async fn try_run_sampling_request(
.instrument(trace_span!("stream_request"))
.or_cancel(&cancellation_token)
.await??;
let mut in_flight: FuturesOrdered<BoxFuture<'static, CodexResult<ResponseInputItem>>> =
let mut in_flight: FuturesOrdered<BoxFuture<'static, CodexResult<RecordedToolResponse>>> =
FuturesOrdered::new();
let mut needs_follow_up = false;
let mut last_agent_message: Option<String> = None;

View File

@@ -242,7 +242,7 @@ async fn record_stage1_output_usage_for_memory_citation(
/// queuing any tool execution futures. This records items immediately so
/// history and rollout stay in sync even if the turn is later cancelled.
pub(crate) type InFlightFuture<'f> =
Pin<Box<dyn Future<Output = Result<ResponseInputItem>> + Send + 'f>>;
Pin<Box<dyn Future<Output = Result<crate::tools::registry::RecordedToolResponse>> + Send + 'f>>;
#[derive(Default)]
pub(crate) struct OutputItemResult {

View File

@@ -10,6 +10,7 @@ use codex_tools::ToolSpec;
use super::ExecContext;
use super::PUBLIC_TOOL_NAME;
use super::code_mode_history_truncation_policy;
use super::handle_runtime_response;
use super::is_exec_tool_name;
@@ -120,4 +121,11 @@ impl CoreToolRuntime for CodeModeExecuteHandler {
fn matches_kind(&self, payload: &ToolPayload) -> bool {
matches!(payload, ToolPayload::Custom { .. })
}
fn history_truncation_policy(
&self,
_invocation: &ToolInvocation,
) -> Option<codex_utils_output_truncation::TruncationPolicy> {
Some(code_mode_history_truncation_policy())
}
}

View File

@@ -226,8 +226,7 @@ fn truncate_code_mode_result(
items: Vec<FunctionCallOutputContentItem>,
max_output_tokens: Option<usize>,
) -> Vec<FunctionCallOutputContentItem> {
let max_output_tokens = resolve_max_tokens(max_output_tokens);
let policy = TruncationPolicy::Tokens(max_output_tokens);
let policy = code_mode_output_truncation_policy(max_output_tokens);
if items
.iter()
.all(|item| matches!(item, FunctionCallOutputContentItem::InputText { .. }))
@@ -240,6 +239,16 @@ fn truncate_code_mode_result(
truncate_function_output_items_with_policy(&items, policy)
}
pub(super) fn code_mode_output_truncation_policy(
max_output_tokens: Option<usize>,
) -> TruncationPolicy {
TruncationPolicy::Tokens(resolve_max_tokens(max_output_tokens))
}
pub(super) fn code_mode_history_truncation_policy() -> TruncationPolicy {
TruncationPolicy::Bytes(usize::MAX)
}
async fn call_nested_tool(
_exec: ExecContext,
tool_runtime: ToolCallRuntime,

View File

@@ -15,6 +15,7 @@ use codex_tools::ToolSpec;
use super::DEFAULT_WAIT_YIELD_TIME_MS;
use super::ExecContext;
use super::WAIT_TOOL_NAME;
use super::code_mode_history_truncation_policy;
use super::handle_runtime_response;
use super::wait_spec::create_wait_tool;
@@ -114,6 +115,13 @@ impl ToolExecutor<ToolInvocation> for CodeModeWaitHandler {
}
impl CoreToolRuntime for CodeModeWaitHandler {
fn history_truncation_policy(
&self,
_invocation: &ToolInvocation,
) -> Option<codex_utils_output_truncation::TruncationPolicy> {
Some(code_mode_history_truncation_policy())
}
fn pre_tool_use_payload(&self, _invocation: &ToolInvocation) -> Option<PreToolUsePayload> {
// Code-mode `wait` is runtime control for an existing code cell, not a
// standalone user action. Tool calls made from code mode still flow

View File

@@ -20,6 +20,7 @@ use crate::tools::context::SharedTurnDiffTracker;
use crate::tools::context::ToolPayload;
use crate::tools::lifecycle::notify_tool_aborted;
use crate::tools::registry::AnyToolResult;
use crate::tools::registry::RecordedToolResponse;
use crate::tools::registry::ToolArgumentDiffConsumer;
use crate::tools::router::ToolCall;
use crate::tools::router::ToolCallSource;
@@ -64,13 +65,13 @@ impl ToolCallRuntime {
self,
call: ToolCall,
cancellation_token: CancellationToken,
) -> impl std::future::Future<Output = Result<ResponseInputItem, CodexErr>> {
) -> impl std::future::Future<Output = Result<RecordedToolResponse, CodexErr>> {
let error_call = call.clone();
let future =
self.handle_tool_call_with_source(call, ToolCallSource::Direct, cancellation_token);
async move {
match future.await {
Ok(response) => Ok(response.into_response()),
Ok(response) => Ok(response.into_recorded_response()),
Err(FunctionCallError::Fatal(message)) => Err(CodexErr::Fatal(message)),
Err(other) => Ok(Self::failure_response(error_call, other)),
}
@@ -183,9 +184,9 @@ impl ToolCallRuntime {
FunctionCallError::Fatal(format!("tool task failed to receive: {err:?}"))
}
fn failure_response(call: ToolCall, err: FunctionCallError) -> ResponseInputItem {
fn failure_response(call: ToolCall, err: FunctionCallError) -> RecordedToolResponse {
let message = err.to_string();
match call.payload {
let response_item = match call.payload {
ToolPayload::ToolSearch { .. } => ResponseInputItem::ToolSearchOutput {
call_id: call.call_id,
status: "completed".to_string(),
@@ -207,6 +208,10 @@ impl ToolCallRuntime {
success: Some(false),
},
},
};
RecordedToolResponse {
response_item,
history_truncation_policy: None,
}
}
@@ -218,6 +223,7 @@ impl ToolCallRuntime {
message: Self::abort_message(call, secs),
}),
post_tool_use_payload: None,
history_truncation_policy: None,
}
}
@@ -456,7 +462,7 @@ mod tests {
success: Some(true),
},
};
assert_eq!(expected_response, response);
assert_eq!(expected_response, response.response_item);
let actual = records
.lock()
@@ -521,7 +527,7 @@ mod tests {
.await
.expect("timed out waiting for tool response")
.expect("tool response task should join")?;
let ResponseInputItem::FunctionCallOutput { output, .. } = response else {
let ResponseInputItem::FunctionCallOutput { output, .. } = response.response_item else {
anyhow::bail!("cancelled tool should return function output");
};
let FunctionCallOutputBody::Text(text) = output.body else {

View File

@@ -32,6 +32,7 @@ use codex_protocol::models::ResponseInputItem;
use codex_protocol::protocol::EventMsg;
use codex_tools::ToolName;
use codex_tools::ToolSpec;
use codex_utils_output_truncation::TruncationPolicy;
use futures::future::BoxFuture;
use serde_json::Value;
use tracing::warn;
@@ -104,6 +105,10 @@ pub(crate) trait CoreToolRuntime: ToolExecutor<ToolInvocation> {
})
}
fn history_truncation_policy(&self, _invocation: &ToolInvocation) -> Option<TruncationPolicy> {
None
}
fn pre_tool_use_payload(&self, invocation: &ToolInvocation) -> Option<PreToolUsePayload> {
let ToolPayload::Function { arguments } = &invocation.payload else {
return None;
@@ -166,9 +171,16 @@ pub(crate) struct AnyToolResult {
pub(crate) payload: ToolPayload,
pub(crate) result: Box<dyn ToolOutput>,
pub(crate) post_tool_use_payload: Option<PostToolUsePayload>,
pub(crate) history_truncation_policy: Option<TruncationPolicy>,
}
pub(crate) struct RecordedToolResponse {
pub(crate) response_item: ResponseInputItem,
pub(crate) history_truncation_policy: Option<TruncationPolicy>,
}
impl AnyToolResult {
#[cfg(test)]
pub(crate) fn into_response(self) -> ResponseInputItem {
let Self {
call_id,
@@ -179,6 +191,20 @@ impl AnyToolResult {
result.to_response_item(&call_id, &payload)
}
pub(crate) fn into_recorded_response(self) -> RecordedToolResponse {
let Self {
call_id,
payload,
result,
history_truncation_policy,
..
} = self;
RecordedToolResponse {
response_item: result.to_response_item(&call_id, &payload),
history_truncation_policy,
}
}
pub(crate) fn code_mode_result(self) -> serde_json::Value {
let Self {
payload, result, ..
@@ -306,6 +332,10 @@ impl CoreToolRuntime for ExposureOverride {
self.handler.post_tool_use_payload(invocation, result)
}
fn history_truncation_policy(&self, invocation: &ToolInvocation) -> Option<TruncationPolicy> {
self.handler.history_truncation_policy(invocation)
}
fn with_updated_hook_input(
&self,
invocation: ToolInvocation,
@@ -716,11 +746,13 @@ async fn handle_any_tool(
let output = tool.handle(invocation.clone()).await?;
let post_tool_use_payload =
CoreToolRuntime::post_tool_use_payload(tool, &invocation, output.as_ref());
let history_truncation_policy = CoreToolRuntime::history_truncation_policy(tool, &invocation);
Ok(AnyToolResult {
call_id,
payload,
result: output,
post_tool_use_payload,
history_truncation_policy,
})
}

View File

@@ -329,6 +329,7 @@ fn post_tool_use_feedback_output_keeps_code_mode_result_typed() {
),
}),
post_tool_use_payload: None,
history_truncation_policy: None,
};
assert_eq!(
@@ -356,6 +357,7 @@ fn post_tool_use_feedback_output_keeps_code_mode_result_typed() {
),
}),
post_tool_use_payload: None,
history_truncation_policy: None,
};
assert_eq!(

View File

@@ -820,11 +820,10 @@ fn prepend_code_mode_executors(
planned_tools: &mut PlannedTools,
) {
let turn_context = context.turn_context;
let deferred_tools_available = search_tool_enabled(turn_context)
&& planned_tools
.runtimes()
.iter()
.any(|executor| executor.exposure() == ToolExposure::Deferred);
let deferred_tools_available = planned_tools
.runtimes()
.iter()
.any(|executor| executor.exposure() == ToolExposure::Deferred);
let code_mode_executors = build_code_mode_executors(
turn_context,
planned_tools.runtimes(),

View File

@@ -3038,8 +3038,8 @@ async fn code_mode_can_call_hidden_dynamic_tools() -> Result<()> {
test.session_configured = new_thread.session_configured;
let code = r#"
const tool = ALL_TOOLS.find(({ name }) => name === "codex_app_hidden_dynamic_tool");
const out = await tools.codex_app_hidden_dynamic_tool({ city: "Paris" });
const tool = ALL_TOOLS.find(({ name }) => name === "codex_app__hidden_dynamic_tool");
const out = await tools.codex_app__hidden_dynamic_tool({ city: "Paris" });
text(
JSON.stringify({
name: tool?.name ?? null,
@@ -3144,7 +3144,7 @@ text(
)?;
assert_eq!(
parsed.get("name"),
Some(&Value::String("codex_app_hidden_dynamic_tool".to_string()))
Some(&Value::String("codex_app__hidden_dynamic_tool".to_string()))
);
assert_eq!(
parsed.get("out"),
@@ -3157,7 +3157,7 @@ text(
.is_some_and(|description| {
description.contains("A hidden dynamic tool.")
&& description.contains("declare const tools:")
&& description.contains("codex_app_hidden_dynamic_tool(args:")
&& description.contains("codex_app__hidden_dynamic_tool(args:")
})
);

View File

@@ -934,6 +934,10 @@ fn normalize_string(value: &str) -> String {
return "<UUID>".to_string();
}
if value.starts_with("<skills_instructions>\n") && value.ends_with("\n</skills_instructions>") {
return "<skills_instructions>\n...\n</skills_instructions>".to_string();
}
let mut text = value.to_string();
normalize_tmp_prefix_before_marker(&mut text, "/skills/");
normalize_tmp_prefix_before_marker(&mut text, "\\skills\\");
@@ -1029,6 +1033,15 @@ fn normalize_string_rewrites_windows_temp_skill_paths() {
);
}
#[test]
fn normalize_string_rewrites_skills_instructions_body() {
let text = normalize_string(
"<skills_instructions>\n## Skills\n- imagegen: ...\n</skills_instructions>",
);
assert_eq!(text, "<skills_instructions>\n...\n</skills_instructions>");
}
#[test]
fn normalize_string_rewrites_shell_wall_times() {
let text = normalize_string(

View File

@@ -348,9 +348,15 @@ fn stdio_transport_with_cwd(
fn insert_mcp_server(
config: &mut Config,
server_name: &str,
transport: McpServerTransportConfig,
mut transport: McpServerTransportConfig,
options: TestMcpServerOptions,
) {
if options.environment_id != codex_config::DEFAULT_MCP_SERVER_ENVIRONMENT_ID
&& let McpServerTransportConfig::Stdio { cwd, .. } = &mut transport
&& cwd.is_none()
{
*cwd = Some(config.cwd.to_path_buf());
}
let mut servers = config.mcp_servers.get().clone();
servers.insert(
server_name.to_string(),

View File

@@ -929,7 +929,7 @@ allow_local_binding = true
.set_permission_profile(permission_profile_for_config)
.expect("set permission profile");
});
let test = builder.build_with_remote_env(server).await?;
let test = builder.build(server).await?;
assert!(
test.config.permissions.network.is_some(),
"expected managed network proxy config to be present"

View File

@@ -239,7 +239,7 @@ async fn exec_resume_last_respects_cwd_filter_and_all_flag() -> anyhow::Result<(
let test = test_codex_exec();
let server = MockServer::start().await;
let _response_mock = mount_exec_responses(&server, /*count*/ 5).await;
let _response_mock = mount_exec_responses(&server, /*count*/ 4).await;
let dir_a = TempDir::new()?;
let dir_b = TempDir::new()?;
@@ -254,6 +254,10 @@ async fn exec_resume_last_respects_cwd_filter_and_all_flag() -> anyhow::Result<(
.assert()
.success();
// `updated_at` is second-granularity, so ensure thread B is created in a later
// second than thread A and is deterministically newest before `resume --last --all`.
std::thread::sleep(std::time::Duration::from_millis(1100));
let marker_b = format!("resume-cwd-b-{}", Uuid::new_v4());
let prompt_b = format!("echo {marker_b}");
test.cmd_with_server(&server)
@@ -270,24 +274,6 @@ async fn exec_resume_last_respects_cwd_filter_and_all_flag() -> anyhow::Result<(
let path_b = find_session_file_containing_marker(&sessions_dir, &marker_b)
.expect("no session file found for marker_b");
// `updated_at` is second-granularity, so ensure the touch lands in a later second
// than the initial session creation on fast CI (especially Windows).
std::thread::sleep(std::time::Duration::from_millis(1100));
// Make thread B deterministically newest according to rollout metadata.
let session_id_b = extract_conversation_id(&path_b);
let marker_b_touch = format!("resume-cwd-b-touch-{}", Uuid::new_v4());
let prompt_b_touch = format!("echo {marker_b_touch}");
test.cmd_with_server(&server)
.arg("--skip-git-repo-check")
.arg("-C")
.arg(dir_b.path())
.arg("resume")
.arg(&session_id_b)
.arg(&prompt_b_touch)
.assert()
.success();
// `resume --last` sorts by `updated_at`, which is second-granularity. Sleep so
// the upcoming `resume --last --all` write lands in a later second and becomes
// deterministically newest (instead of tying and falling back to UUID order).

View File

@@ -158,7 +158,9 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
StreamableHttpService::new(
|| Ok(TestToolServer::new()),
Arc::new(LocalSessionManager::default()),
StreamableHttpServerConfig::default(),
// Full CI can run this private test server in a remote container
// reached through its dynamic container IP instead of loopback.
StreamableHttpServerConfig::default().disable_allowed_hosts(),
),
)
.layer(middleware::from_fn_with_state(

View File

@@ -188,7 +188,8 @@ mod tests {
#[test]
fn environment_id_fallback_has_cwd_prefix() {
let dir = tempfile::tempdir().expect("tempdir");
let home = std::env::var_os("HOME").expect("HOME should be set for tests");
let dir = tempfile::tempdir_in(home).expect("tempdir outside repo");
let env_id = environment_id_from_cwd(dir.path());
let canonical = dir
.path()

View File

@@ -61,6 +61,13 @@ fn cache_project_root(chat: &mut ChatWidget, root_name: &str) {
});
}
fn cache_missing_project_root(chat: &mut ChatWidget) {
chat.status_line_project_root_name_cache = Some(CachedProjectRootName {
cwd: chat.config.cwd.to_path_buf(),
root_name: None,
});
}
fn cache_rate_limit_snapshot(chat: &mut ChatWidget) {
chat.on_rate_limit_snapshot(Some(RateLimitSnapshot {
limit_id: None,
@@ -182,6 +189,7 @@ async fn status_line_setup_popup_hardcoded_only_snapshot() {
#[tokio::test]
async fn status_surface_preview_lines_mixed_snapshot() {
let (mut chat, _rx, _op_rx) = make_chatwidget_manual(/*model_override*/ None).await;
cache_missing_project_root(&mut chat);
chat.status_line_branch = Some("feature/mixed-preview".to_string());
chat.thread_name = Some("Mixed preview thread".to_string());
@@ -278,6 +286,7 @@ async fn status_line_setup_popup_rate_limits_snapshot() {
#[tokio::test]
async fn status_line_setup_popup_mixed_snapshot() {
let (mut chat, _rx, _op_rx) = make_chatwidget_manual(/*model_override*/ None).await;
cache_missing_project_root(&mut chat);
chat.status_line_branch = Some("feature/mixed-preview".to_string());
chat.thread_name = Some("Mixed preview thread".to_string());
chat.config.tui_status_line = Some(vec![
@@ -330,6 +339,7 @@ async fn terminal_title_setup_popup_hardcoded_only_snapshot() {
#[tokio::test]
async fn terminal_title_setup_popup_mixed_snapshot() {
let (mut chat, _rx, _op_rx) = make_chatwidget_manual(/*model_override*/ None).await;
cache_missing_project_root(&mut chat);
chat.thread_name = Some("Mixed preview thread".to_string());
chat.config.tui_terminal_title = Some(vec![
"project-name".to_string(),
@@ -361,6 +371,7 @@ async fn terminal_title_setup_popup_rate_limits_snapshot() {
#[tokio::test]
async fn missing_project_root_uses_different_status_and_title_preview_sources() {
let (mut chat, _rx, _op_rx) = make_chatwidget_manual(/*model_override*/ None).await;
cache_missing_project_root(&mut chat);
let status_preview = status_preview_line(&mut chat, &[StatusLineItem::ProjectRoot]);
let title_preview = title_preview_line(&mut chat, &[TerminalTitleItem::Project]);

View File

@@ -3,9 +3,17 @@
use super::*;
use pretty_assertions::assert_eq;
fn cache_missing_project_root(chat: &mut ChatWidget) {
chat.status_line_project_root_name_cache = Some(CachedProjectRootName {
cwd: chat.config.cwd.to_path_buf(),
root_name: None,
});
}
#[tokio::test]
async fn terminal_title_shows_action_required_while_exec_approval_is_pending() {
let (mut chat, _rx, _op_rx) = make_chatwidget_manual(/*model_override*/ None).await;
cache_missing_project_root(&mut chat);
chat.bottom_pane.set_task_running(/*running*/ true);
chat.refresh_terminal_title();
@@ -14,7 +22,7 @@ async fn terminal_title_shows_action_required_while_exec_approval_is_pending() {
approval_id: Some("call-action-required".into()),
turn_id: "turn-action-required".into(),
command: vec!["bash".into(), "-lc".into(), "echo hello".into()],
cwd: AbsolutePathBuf::current_dir().expect("current dir"),
cwd: test_project_path().abs(),
reason: Some("need confirmation".into()),
network_approval_context: None,
proposed_execpolicy_amendment: None,
@@ -56,7 +64,7 @@ async fn terminal_title_action_required_respects_spinner_setting() {
approval_id: Some("call-no-spinner".into()),
turn_id: "turn-no-spinner".into(),
command: vec!["bash".into(), "-lc".into(), "echo hello".into()],
cwd: AbsolutePathBuf::current_dir().expect("current dir"),
cwd: test_project_path().abs(),
reason: Some("need confirmation".into()),
network_approval_context: None,
proposed_execpolicy_amendment: None,
@@ -75,6 +83,7 @@ async fn terminal_title_action_required_respects_spinner_setting() {
#[tokio::test]
async fn terminal_title_action_required_blinks_when_animations_are_enabled() {
let (mut chat, _rx, _op_rx) = make_chatwidget_manual(/*model_override*/ None).await;
cache_missing_project_root(&mut chat);
chat.bottom_pane.set_task_running(/*running*/ true);
chat.terminal_title_animation_origin = Instant::now() - std::time::Duration::from_millis(1500);
chat.refresh_terminal_title();
@@ -84,7 +93,7 @@ async fn terminal_title_action_required_blinks_when_animations_are_enabled() {
approval_id: Some("call-blink".into()),
turn_id: "turn-blink".into(),
command: vec!["bash".into(), "-lc".into(), "echo hello".into()],
cwd: AbsolutePathBuf::current_dir().expect("current dir"),
cwd: test_project_path().abs(),
reason: Some("need confirmation".into()),
network_approval_context: None,
proposed_execpolicy_amendment: None,
@@ -106,6 +115,7 @@ async fn terminal_title_action_required_blinks_when_animations_are_enabled() {
#[tokio::test]
async fn terminal_title_activity_indicators_do_not_animate_when_animations_are_disabled() {
let (mut chat, _rx, _op_rx) = make_chatwidget_manual(/*model_override*/ None).await;
cache_missing_project_root(&mut chat);
chat.config.animations = false;
chat.bottom_pane.set_task_running(/*running*/ true);
chat.terminal_title_animation_origin = Instant::now() - std::time::Duration::from_millis(1500);
@@ -119,7 +129,7 @@ async fn terminal_title_activity_indicators_do_not_animate_when_animations_are_d
approval_id: Some("call-no-animations".into()),
turn_id: "turn-no-animations".into(),
command: vec!["bash".into(), "-lc".into(), "echo hello".into()],
cwd: AbsolutePathBuf::current_dir().expect("current dir"),
cwd: test_project_path().abs(),
reason: Some("need confirmation".into()),
network_approval_context: None,
proposed_execpolicy_amendment: None,

View File

@@ -247,7 +247,7 @@ fn legacy_non_tty_powershell_emits_output() {
pwsh.display().to_string(),
"-NoProfile".to_string(),
"-Command".to_string(),
"Write-Output LEGACY-NONTTY-DIRECT".to_string(),
"'LEGACY-NONTTY-DIRECT'".to_string(),
],
cwd.as_path(),
HashMap::new(),
@@ -433,7 +433,7 @@ fn legacy_capture_powershell_emits_output() {
pwsh.display().to_string(),
"-NoProfile".to_string(),
"-Command".to_string(),
"Write-Output LEGACY-CAPTURE-DIRECT".to_string(),
"'LEGACY-CAPTURE-DIRECT'".to_string(),
],
cwd.as_path(),
HashMap::new(),
@@ -527,7 +527,7 @@ fn legacy_tty_powershell_emits_output_and_accepts_input() {
"-NoProfile".to_string(),
"-NoExit".to_string(),
"-Command".to_string(),
"$PID; Write-Output ready".to_string(),
"$PID; 'ready'".to_string(),
],
cwd.as_path(),
HashMap::new(),
@@ -544,7 +544,7 @@ fn legacy_tty_powershell_emits_output_and_accepts_input() {
let writer = spawned.session.writer_sender();
writer
.send(b"Write-Output second\n".to_vec())
.send(b"'second'\n".to_vec())
.await
.expect("send second command");
writer