feat: unified exec basic pruning strategy (#7239)

LRU + exited sessions first
This commit is contained in:
jif-oai
2025-11-24 18:22:32 +01:00
committed by GitHub
parent 920239f272
commit b2cddec3d7
3 changed files with 286 additions and 3 deletions

View File

@@ -1760,3 +1760,160 @@ async fn unified_exec_runs_under_sandbox() -> Result<()> {
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn unified_exec_prunes_exited_sessions_first() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.use_experimental_unified_exec_tool = true;
config.features.enable(Feature::UnifiedExec);
});
let TestCodex {
codex,
cwd,
session_configured,
..
} = builder.build(&server).await?;
const MAX_SESSIONS_FOR_TEST: i32 = 64;
const FILLER_SESSIONS: i32 = MAX_SESSIONS_FOR_TEST - 1;
let keep_call_id = "uexec-prune-keep";
let keep_args = serde_json::json!({
"cmd": "/bin/cat",
"yield_time_ms": 250,
});
let prune_call_id = "uexec-prune-target";
let prune_args = serde_json::json!({
"cmd": "sleep 1",
"yield_time_ms": 250,
});
let mut events = vec![ev_response_created("resp-prune-1")];
events.push(ev_function_call(
keep_call_id,
"exec_command",
&serde_json::to_string(&keep_args)?,
));
events.push(ev_function_call(
prune_call_id,
"exec_command",
&serde_json::to_string(&prune_args)?,
));
for idx in 0..FILLER_SESSIONS {
let filler_args = serde_json::json!({
"cmd": format!("echo filler {idx}"),
"yield_time_ms": 250,
});
let call_id = format!("uexec-prune-fill-{idx}");
events.push(ev_function_call(
&call_id,
"exec_command",
&serde_json::to_string(&filler_args)?,
));
}
let keep_write_call_id = "uexec-prune-keep-write";
let keep_write_args = serde_json::json!({
"chars": "still alive\n",
"session_id": 0,
"yield_time_ms": 500,
});
events.push(ev_function_call(
keep_write_call_id,
"write_stdin",
&serde_json::to_string(&keep_write_args)?,
));
let probe_call_id = "uexec-prune-probe";
let probe_args = serde_json::json!({
"chars": "should fail\n",
"session_id": 1,
"yield_time_ms": 500,
});
events.push(ev_function_call(
probe_call_id,
"write_stdin",
&serde_json::to_string(&probe_args)?,
));
events.push(ev_completed("resp-prune-1"));
let first_response = sse(events);
let completion_response = sse(vec![
ev_response_created("resp-prune-2"),
ev_assistant_message("msg-prune", "done"),
ev_completed("resp-prune-2"),
]);
let response_mock =
mount_sse_sequence(&server, vec![first_response, completion_response]).await;
let session_model = session_configured.model.clone();
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "fill session cache".into(),
}],
final_output_json_schema: None,
cwd: cwd.path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: session_model,
effort: None,
summary: ReasoningSummary::Auto,
})
.await?;
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
let requests = response_mock.requests();
assert!(
!requests.is_empty(),
"expected at least one response request"
);
let keep_start = requests
.iter()
.find_map(|req| req.function_call_output_text(keep_call_id))
.expect("missing initial keep session output");
let keep_start_output = parse_unified_exec_output(&keep_start)?;
pretty_assertions::assert_eq!(keep_start_output.session_id, Some(0));
assert!(keep_start_output.exit_code.is_none());
let prune_start = requests
.iter()
.find_map(|req| req.function_call_output_text(prune_call_id))
.expect("missing initial prune session output");
let prune_start_output = parse_unified_exec_output(&prune_start)?;
pretty_assertions::assert_eq!(prune_start_output.session_id, Some(1));
assert!(prune_start_output.exit_code.is_none());
let keep_write = requests
.iter()
.find_map(|req| req.function_call_output_text(keep_write_call_id))
.expect("missing keep write output");
let keep_write_output = parse_unified_exec_output(&keep_write)?;
pretty_assertions::assert_eq!(keep_write_output.session_id, Some(0));
assert!(
keep_write_output.output.contains("still alive"),
"expected cat session to echo input, got {:?}",
keep_write_output.output
);
let pruned_probe = requests
.iter()
.find_map(|req| req.function_call_output_text(probe_call_id))
.expect("missing probe output");
assert!(
pruned_probe.contains("UnknownSessionId") || pruned_probe.contains("Unknown session id"),
"expected probe to fail after pruning, got {pruned_probe:?}"
);
Ok(())
}