mirror of
https://github.com/openai/codex.git
synced 2026-04-24 22:54:54 +00:00
fix: race in js repl (#11922)
js_repl_reset previously raced with in-flight/new js_repl executions because reset() could clear exec_tool_calls without synchronizing with execute(). In that window, a running exec could lose its per-exec tool-call context, and subsequent kernel RunTool messages would fail with js_repl exec context not found. The fix serializes reset and execute on the same exec_lock, so reset cannot run concurrently with exec setup/teardown. We also keep the timeout path safe by performing reset steps inline while execute() already holds the lock, avoiding re-entrant lock acquisition. A regression test now verifies that reset waits for the exec lock and does not clear tool-call state early.
This commit is contained in:
@@ -399,6 +399,9 @@ impl JsReplManager {
|
||||
}
|
||||
|
||||
pub async fn reset(&self) -> Result<(), FunctionCallError> {
|
||||
let _permit = self.exec_lock.clone().acquire_owned().await.map_err(|_| {
|
||||
FunctionCallError::RespondToModel("js_repl execution unavailable".to_string())
|
||||
})?;
|
||||
self.reset_kernel().await;
|
||||
Self::clear_all_exec_tool_calls_map(&self.exec_tool_calls).await;
|
||||
Ok(())
|
||||
@@ -527,7 +530,9 @@ impl JsReplManager {
|
||||
return Err(FunctionCallError::RespondToModel(message));
|
||||
}
|
||||
Err(_) => {
|
||||
self.reset().await?;
|
||||
self.reset_kernel().await;
|
||||
self.wait_for_exec_tool_calls(&req_id).await;
|
||||
self.exec_tool_calls.lock().await.clear();
|
||||
return Err(FunctionCallError::RespondToModel(
|
||||
"js_repl execution timed out; kernel reset, rerun your request".to_string(),
|
||||
));
|
||||
@@ -1456,6 +1461,46 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn reset_waits_for_exec_lock_before_clearing_exec_tool_calls() {
|
||||
let manager = JsReplManager::new(None, PathBuf::from("."))
|
||||
.await
|
||||
.expect("manager should initialize");
|
||||
let permit = manager
|
||||
.exec_lock
|
||||
.clone()
|
||||
.acquire_owned()
|
||||
.await
|
||||
.expect("lock should be acquirable");
|
||||
let exec_id = Uuid::new_v4().to_string();
|
||||
manager.register_exec_tool_calls(&exec_id).await;
|
||||
|
||||
let reset_manager = Arc::clone(&manager);
|
||||
let mut reset_task = tokio::spawn(async move { reset_manager.reset().await });
|
||||
tokio::time::sleep(Duration::from_millis(50)).await;
|
||||
|
||||
assert!(
|
||||
!reset_task.is_finished(),
|
||||
"reset should wait until execute lock is released"
|
||||
);
|
||||
assert!(
|
||||
manager.exec_tool_calls.lock().await.contains_key(&exec_id),
|
||||
"reset must not clear tool-call contexts while execute lock is held"
|
||||
);
|
||||
|
||||
drop(permit);
|
||||
|
||||
tokio::time::timeout(Duration::from_secs(1), &mut reset_task)
|
||||
.await
|
||||
.expect("reset should complete after execute lock release")
|
||||
.expect("reset task should not panic")
|
||||
.expect("reset should succeed");
|
||||
assert!(
|
||||
!manager.exec_tool_calls.lock().await.contains_key(&exec_id),
|
||||
"reset should clear tool-call contexts after lock acquisition"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn reset_clears_inflight_exec_tool_calls_without_waiting() {
|
||||
let manager = JsReplManager::new(None, std::env::temp_dir())
|
||||
|
||||
Reference in New Issue
Block a user