hooks: emit Bash PostToolUse when exec_command completes via write_stdin (#18888)

Fixes #16246.

## Why

`exec_command` already emits `PreToolUse`, but long-running unified exec
commands that finish on a later `write_stdin` poll could miss the
matching `PostToolUse`. That left the Bash hook lifecycle inconsistent,
broke expectations around `tool_use_id` and `tool_input.command`, and
meant `PostToolUse` block/replacement feedback could fail to replace the
final session output before it reached model context.

This keeps the fix scoped to the `exec_command` / `write_stdin`
lifecycle. Broader non-Bash hook expansion is still out of scope here
and remains tracked separately in #16732.

## What changed

- Compute and store `PostToolUsePayload` while handlers still have
access to their concrete output type, and carry `tool_use_id` through
that payload.
- Preserve the original hook-facing `exec_command` string through
unified exec state (`ExecCommandRequest`, `ProcessEntry`,
`PreparedProcessHandles`, and `ExecCommandToolOutput`) via
`hook_command`, and remove the now-unused `session_command` output
metadata.
- Emit exactly one Bash `PostToolUse` for long-running `exec_command`
sessions when a later `write_stdin` poll observes final completion,
using the original `exec_command` call id and hook-facing command.
- Keep one-shot `exec_command` behavior aligned with the same payload
construction, including interactive completions that return a final
result directly.
- Apply `PostToolUse` block/replacement feedback before the final
`write_stdin` completion output is sent back to the model.
- Keep `write_stdin` itself out of `PreToolUse` matching so it continues
to act as transport/polling for the original Bash tool call.
- Restore plain matcher behavior for tool-name matchers such as `Bash`
and `Edit|Write`, while still treating patterns with regex characters
(for example `mcp__.*`) as regexes.
- Add unit coverage for unified exec payload construction and parallel
session separation, plus a core integration regression that verifies a
blocked `PostToolUse` replaces the final `write_stdin` output in model
context.

## Testing

- `cargo test -p codex-hooks`
- `cargo test -p codex-core post_tool_use_payload`
- `cargo test -p codex-core
post_tool_use_blocks_when_exec_session_completes_via_write_stdin`
This commit is contained in:
Andrei Eternal
2026-04-22 17:14:22 -07:00
committed by GitHub
parent 6ca038bbd1
commit eed0e07825
15 changed files with 345 additions and 78 deletions

View File

@@ -33,6 +33,7 @@ use core_test_support::responses::mount_sse_sequence;
use core_test_support::responses::sse;
use core_test_support::responses::start_mock_server;
use core_test_support::skip_if_no_network;
use core_test_support::skip_if_windows;
use core_test_support::streaming_sse::StreamingSseChunk;
use core_test_support::streaming_sse::start_streaming_sse_server;
use core_test_support::test_codex::test_codex;
@@ -407,6 +408,64 @@ elif mode == "exit_2":
Ok(())
}
fn write_logging_pre_and_blocking_post_tool_use_hooks(home: &Path, feedback: &str) -> Result<()> {
let pre_script_path = home.join("pre_tool_use_hook.py");
let pre_log_path = home.join("pre_tool_use_hook_log.jsonl");
let post_script_path = home.join("post_tool_use_hook.py");
let post_log_path = home.join("post_tool_use_hook_log.jsonl");
let feedback_json =
serde_json::to_string(feedback).context("serialize post tool use feedback")?;
let pre_script = format!(
r#"import json
from pathlib import Path
import sys
payload = json.load(sys.stdin)
with Path(r"{pre_log_path}").open("a", encoding="utf-8") as handle:
handle.write(json.dumps(payload) + "\n")
"#,
pre_log_path = pre_log_path.display(),
);
let post_script = format!(
r#"import json
from pathlib import Path
import sys
payload = json.load(sys.stdin)
with Path(r"{post_log_path}").open("a", encoding="utf-8") as handle:
handle.write(json.dumps(payload) + "\n")
sys.stderr.write({feedback_json} + "\n")
raise SystemExit(2)
"#,
post_log_path = post_log_path.display(),
);
let hooks = serde_json::json!({
"hooks": {
"PreToolUse": [{
"matcher": "Bash",
"hooks": [{
"type": "command",
"command": format!("python3 {}", pre_script_path.display()),
"statusMessage": "running pre tool use hook",
}]
}],
"PostToolUse": [{
"matcher": "Bash",
"hooks": [{
"type": "command",
"command": format!("python3 {}", post_script_path.display()),
"statusMessage": "running post tool use hook",
}]
}]
}
});
fs::write(&pre_script_path, pre_script).context("write pre tool use hook script")?;
fs::write(&post_script_path, post_script).context("write post tool use hook script")?;
fs::write(home.join("hooks.json"), hooks.to_string()).context("write hooks.json")?;
Ok(())
}
fn write_session_start_hook_recording_transcript(home: &Path) -> Result<()> {
let script_path = home.join("session_start_hook.py");
let log_path = home.join("session_start_hook_log.jsonl");
@@ -2514,6 +2573,112 @@ async fn post_tool_use_exit_two_replaces_one_shot_exec_command_output_with_feedb
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn post_tool_use_blocks_when_exec_session_completes_via_write_stdin() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_windows!(Ok(()));
let server = start_mock_server().await;
let start_call_id = "posttooluse-exec-session-start";
let poll_call_id = "posttooluse-exec-session-poll";
let command = "sleep 1; printf session-post-hook-output".to_string();
let start_args = serde_json::json!({
"cmd": command,
"shell": "/bin/sh",
"login": false,
"tty": false,
"yield_time_ms": 250,
});
let poll_args = serde_json::json!({
"session_id": 1000,
"chars": "",
"yield_time_ms": 5_000,
});
let feedback = "blocked by session post hook";
let responses = mount_sse_sequence(
&server,
vec![
sse(vec![
ev_response_created("resp-1"),
core_test_support::responses::ev_function_call(
start_call_id,
"exec_command",
&serde_json::to_string(&start_args)?,
),
ev_completed("resp-1"),
]),
sse(vec![
ev_response_created("resp-2"),
core_test_support::responses::ev_function_call(
poll_call_id,
"write_stdin",
&serde_json::to_string(&poll_args)?,
),
ev_completed("resp-2"),
]),
sse(vec![
ev_response_created("resp-3"),
ev_assistant_message("msg-1", "session post hook observed"),
ev_completed("resp-3"),
]),
],
)
.await;
let mut builder = test_codex()
.with_pre_build_hook(|home| {
if let Err(error) = write_logging_pre_and_blocking_post_tool_use_hooks(home, feedback) {
panic!("failed to write tool use hook test fixture: {error}");
}
})
.with_config(|config| {
config.use_experimental_unified_exec_tool = true;
config
.features
.enable(Feature::CodexHooks)
.expect("test config should allow feature update");
config
.features
.enable(Feature::UnifiedExec)
.expect("test config should allow feature update");
});
let test = builder.build(&server).await?;
test.submit_turn("run the exec command session with post hook")
.await?;
let requests = responses.requests();
assert_eq!(requests.len(), 3);
let output_item = requests[2].function_call_output(poll_call_id);
let output = output_item
.get("output")
.and_then(Value::as_str)
.expect("write_stdin output string");
assert_eq!(output, feedback);
let pre_hook_inputs = read_pre_tool_use_hook_inputs(test.codex_home_path())?;
assert_eq!(pre_hook_inputs.len(), 1);
assert_eq!(pre_hook_inputs[0]["tool_name"], "Bash");
assert_eq!(pre_hook_inputs[0]["tool_use_id"], start_call_id);
assert_eq!(pre_hook_inputs[0]["tool_input"]["command"], command);
let post_hook_inputs = read_post_tool_use_hook_inputs(test.codex_home_path())?;
assert_eq!(post_hook_inputs.len(), 1);
assert_eq!(post_hook_inputs[0]["hook_event_name"], "PostToolUse");
assert_eq!(post_hook_inputs[0]["tool_name"], "Bash");
assert_eq!(post_hook_inputs[0]["tool_use_id"], start_call_id);
assert_eq!(post_hook_inputs[0]["tool_input"]["command"], command);
assert!(
post_hook_inputs[0]["tool_response"]
.as_str()
.is_some_and(|tool_response| tool_response.contains("session-post-hook-output")),
"PostToolUse should see the final session output, got {:?}",
post_hook_inputs[0]["tool_response"]
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn post_tool_use_records_additional_context_for_apply_patch() -> Result<()> {
skip_if_no_network!(Ok(()));