mirror of
https://github.com/openai/codex.git
synced 2026-05-28 06:55:01 +00:00
Log rollout writer OS errors (#24474)
## Why Refs #24425. We have seen rollout JSONL corruption that appears consistent with a rollout write failing after partially appending a line, followed by a retry that appends the same item again. The available user logs did not include the underlying OS error, so it is hard to tell whether the trigger was `ENOSPC`, quota exhaustion, a filesystem error, or something else. This PR adds the missing diagnostics for future reports. ## What changed - Include `ErrorKind` and `raw_os_error()` in rollout writer failure logs. - Preserve the existing append-only rollout write path; this PR is diagnostic-only. ## Verification - `just test -p codex-rollout`
This commit is contained in:
@@ -734,8 +734,10 @@ impl RolloutRecorder {
|
||||
// This is the terminal background-task failure path. Normal I/O failures stay inside
|
||||
// `rollout_writer`, are reported through command acks, and leave items buffered for retry.
|
||||
error!(
|
||||
"rollout writer task failed for {}: {err}",
|
||||
rollout_path_for_spawn.display()
|
||||
"rollout writer task failed for {}: {err}; error_kind={:?}; raw_os_error={:?}",
|
||||
rollout_path_for_spawn.display(),
|
||||
err.kind(),
|
||||
err.raw_os_error()
|
||||
);
|
||||
writer_task_for_spawn.mark_failed(&err);
|
||||
}
|
||||
@@ -1468,8 +1470,11 @@ impl RolloutWriterState {
|
||||
let message = err.to_string();
|
||||
if self.last_logged_error.as_ref() != Some(&message) {
|
||||
error!(
|
||||
"rollout writer failed for {}; buffered rollout items will be retried: {err}",
|
||||
self.rollout_path.display()
|
||||
"rollout writer failed for {}; buffered rollout items will be retried: {err}; \
|
||||
error_kind={:?}; raw_os_error={:?}",
|
||||
self.rollout_path.display(),
|
||||
err.kind(),
|
||||
err.raw_os_error()
|
||||
);
|
||||
}
|
||||
self.last_logged_error = Some(message);
|
||||
|
||||
Reference in New Issue
Block a user