feat: unified exec basic pruning strategy (#7239)

LRU + exited sessions first
2026-04-30 17:36:40 +00:00 · 2025-11-24 18:22:32 +01:00
parent 920239f272
commit b2cddec3d7
3 changed files with 286 additions and 3 deletions
--- a/codex-rs/core/tests/suite/unified_exec.rs
+++ b/codex-rs/core/tests/suite/unified_exec.rs
@@ -1760,3 +1760,160 @@ async fn unified_exec_runs_under_sandbox() -> Result<()> {

    Ok(())
 }
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn unified_exec_prunes_exited_sessions_first() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+    skip_if_sandbox!(Ok(()));
+
+    let server = start_mock_server().await;
+
+    let mut builder = test_codex().with_config(|config| {
+        config.use_experimental_unified_exec_tool = true;
+        config.features.enable(Feature::UnifiedExec);
+    });
+    let TestCodex {
+        codex,
+        cwd,
+        session_configured,
+        ..
+    } = builder.build(&server).await?;
+
+    const MAX_SESSIONS_FOR_TEST: i32 = 64;
+    const FILLER_SESSIONS: i32 = MAX_SESSIONS_FOR_TEST - 1;
+
+    let keep_call_id = "uexec-prune-keep";
+    let keep_args = serde_json::json!({
+        "cmd": "/bin/cat",
+        "yield_time_ms": 250,
+    });
+
+    let prune_call_id = "uexec-prune-target";
+    let prune_args = serde_json::json!({
+        "cmd": "sleep 1",
+        "yield_time_ms": 250,
+    });
+
+    let mut events = vec![ev_response_created("resp-prune-1")];
+    events.push(ev_function_call(
+        keep_call_id,
+        "exec_command",
+        &serde_json::to_string(&keep_args)?,
+    ));
+    events.push(ev_function_call(
+        prune_call_id,
+        "exec_command",
+        &serde_json::to_string(&prune_args)?,
+    ));
+
+    for idx in 0..FILLER_SESSIONS {
+        let filler_args = serde_json::json!({
+            "cmd": format!("echo filler {idx}"),
+            "yield_time_ms": 250,
+        });
+        let call_id = format!("uexec-prune-fill-{idx}");
+        events.push(ev_function_call(
+            &call_id,
+            "exec_command",
+            &serde_json::to_string(&filler_args)?,
+        ));
+    }
+
+    let keep_write_call_id = "uexec-prune-keep-write";
+    let keep_write_args = serde_json::json!({
+        "chars": "still alive\n",
+        "session_id": 0,
+        "yield_time_ms": 500,
+    });
+    events.push(ev_function_call(
+        keep_write_call_id,
+        "write_stdin",
+        &serde_json::to_string(&keep_write_args)?,
+    ));
+
+    let probe_call_id = "uexec-prune-probe";
+    let probe_args = serde_json::json!({
+        "chars": "should fail\n",
+        "session_id": 1,
+        "yield_time_ms": 500,
+    });
+    events.push(ev_function_call(
+        probe_call_id,
+        "write_stdin",
+        &serde_json::to_string(&probe_args)?,
+    ));
+
+    events.push(ev_completed("resp-prune-1"));
+    let first_response = sse(events);
+    let completion_response = sse(vec![
+        ev_response_created("resp-prune-2"),
+        ev_assistant_message("msg-prune", "done"),
+        ev_completed("resp-prune-2"),
+    ]);
+    let response_mock =
+        mount_sse_sequence(&server, vec![first_response, completion_response]).await;
+
+    let session_model = session_configured.model.clone();
+
+    codex
+        .submit(Op::UserTurn {
+            items: vec![UserInput::Text {
+                text: "fill session cache".into(),
+            }],
+            final_output_json_schema: None,
+            cwd: cwd.path().to_path_buf(),
+            approval_policy: AskForApproval::Never,
+            sandbox_policy: SandboxPolicy::DangerFullAccess,
+            model: session_model,
+            effort: None,
+            summary: ReasoningSummary::Auto,
+        })
+        .await?;
+
+    wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
+
+    let requests = response_mock.requests();
+    assert!(
+        !requests.is_empty(),
+        "expected at least one response request"
+    );
+
+    let keep_start = requests
+        .iter()
+        .find_map(|req| req.function_call_output_text(keep_call_id))
+        .expect("missing initial keep session output");
+    let keep_start_output = parse_unified_exec_output(&keep_start)?;
+    pretty_assertions::assert_eq!(keep_start_output.session_id, Some(0));
+    assert!(keep_start_output.exit_code.is_none());
+
+    let prune_start = requests
+        .iter()
+        .find_map(|req| req.function_call_output_text(prune_call_id))
+        .expect("missing initial prune session output");
+    let prune_start_output = parse_unified_exec_output(&prune_start)?;
+    pretty_assertions::assert_eq!(prune_start_output.session_id, Some(1));
+    assert!(prune_start_output.exit_code.is_none());
+
+    let keep_write = requests
+        .iter()
+        .find_map(|req| req.function_call_output_text(keep_write_call_id))
+        .expect("missing keep write output");
+    let keep_write_output = parse_unified_exec_output(&keep_write)?;
+    pretty_assertions::assert_eq!(keep_write_output.session_id, Some(0));
+    assert!(
+        keep_write_output.output.contains("still alive"),
+        "expected cat session to echo input, got {:?}",
+        keep_write_output.output
+    );
+
+    let pruned_probe = requests
+        .iter()
+        .find_map(|req| req.function_call_output_text(probe_call_id))
+        .expect("missing probe output");
+    assert!(
+        pruned_probe.contains("UnknownSessionId") || pruned_probe.contains("Unknown session id"),
+        "expected probe to fail after pruning, got {pruned_probe:?}"
+    );
+
+    Ok(())
+}