feat: better slug for rollout summaries (#12135)

This commit is contained in:
jif-oai
2026-02-18 16:39:38 +00:00
committed by GitHub
parent 491b4946ae
commit 18206a9c1e
2 changed files with 291 additions and 32 deletions

View File

@@ -3,6 +3,7 @@ use std::collections::HashSet;
use std::fmt::Write as _;
use std::path::Path;
use tracing::warn;
use uuid::Uuid;
use crate::memories::ensure_layout;
use crate::memories::raw_memories_file;
@@ -82,7 +83,10 @@ async fn rebuild_raw_memories_file(
.map_err(raw_memories_format_error)?;
writeln!(body, "cwd: {}", memory.cwd.display()).map_err(raw_memories_format_error)?;
writeln!(body).map_err(raw_memories_format_error)?;
body.push_str(memory.raw_memory.trim());
let rollout_summary_file = format!("{}.md", rollout_summary_file_stem(memory));
let raw_memory =
replace_rollout_summary_file_in_raw_memory(&memory.raw_memory, &rollout_summary_file);
body.push_str(raw_memory.trim());
body.push_str("\n\n");
}
@@ -157,12 +161,87 @@ fn rollout_summary_format_error(err: std::fmt::Error) -> std::io::Error {
std::io::Error::other(format!("format rollout summary: {err}"))
}
fn rollout_summary_file_stem(memory: &Stage1Output) -> String {
const ROLLOUT_SLUG_MAX_LEN: usize = 20;
fn replace_rollout_summary_file_in_raw_memory(
raw_memory: &str,
rollout_summary_file: &str,
) -> String {
const ROLLOUT_SUMMARY_PREFIX: &str = "rollout_summary_file: ";
let thread_id = memory.thread_id.to_string();
let Some(raw_slug) = memory.rollout_slug.as_deref() else {
return thread_id;
let replacement = format!("rollout_summary_file: {rollout_summary_file}");
raw_memory
.split('\n')
.map(|line| {
if line.starts_with(ROLLOUT_SUMMARY_PREFIX) {
replacement.as_str()
} else {
line
}
})
.collect::<Vec<_>>()
.join("\n")
}
pub(crate) fn rollout_summary_file_stem(memory: &Stage1Output) -> String {
rollout_summary_file_stem_from_parts(
memory.thread_id,
memory.source_updated_at,
memory.rollout_slug.as_deref(),
)
}
pub(super) fn rollout_summary_file_stem_from_parts(
thread_id: codex_protocol::ThreadId,
source_updated_at: chrono::DateTime<chrono::Utc>,
rollout_slug: Option<&str>,
) -> String {
const ROLLOUT_SLUG_MAX_LEN: usize = 20;
const SHORT_HASH_ALPHABET: &[u8; 62] =
b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
const SHORT_HASH_SPACE: u32 = 14_776_336;
let thread_id = thread_id.to_string();
let (timestamp_fragment, short_hash_seed) = match Uuid::parse_str(&thread_id) {
Ok(thread_uuid) => {
let timestamp = thread_uuid
.get_timestamp()
.and_then(|uuid_timestamp| {
let (seconds, nanos) = uuid_timestamp.to_unix();
i64::try_from(seconds).ok().and_then(|secs| {
chrono::DateTime::<chrono::Utc>::from_timestamp(secs, nanos)
})
})
.unwrap_or(source_updated_at);
let short_hash_seed = (thread_uuid.as_u128() & 0xFFFF_FFFF) as u32;
(
timestamp.format("%Y-%m-%dT%H-%M-%S").to_string(),
short_hash_seed,
)
}
Err(_) => {
let mut short_hash_seed = 0u32;
for byte in thread_id.bytes() {
short_hash_seed = short_hash_seed
.wrapping_mul(31)
.wrapping_add(u32::from(byte));
}
(
source_updated_at.format("%Y-%m-%dT%H-%M-%S").to_string(),
short_hash_seed,
)
}
};
let mut short_hash_value = short_hash_seed % SHORT_HASH_SPACE;
let mut short_hash_chars = ['0'; 4];
for idx in (0..short_hash_chars.len()).rev() {
let alphabet_idx = (short_hash_value % SHORT_HASH_ALPHABET.len() as u32) as usize;
short_hash_chars[idx] = SHORT_HASH_ALPHABET[alphabet_idx] as char;
short_hash_value /= SHORT_HASH_ALPHABET.len() as u32;
}
let short_hash: String = short_hash_chars.iter().collect();
let file_prefix = format!("{timestamp_fragment}-{short_hash}");
let Some(raw_slug) = rollout_slug else {
return file_prefix;
};
let mut slug = String::with_capacity(ROLLOUT_SLUG_MAX_LEN);
@@ -183,25 +262,28 @@ fn rollout_summary_file_stem(memory: &Stage1Output) -> String {
}
if slug.is_empty() {
thread_id
file_prefix
} else {
format!("{thread_id}-{slug}")
format!("{file_prefix}-{slug}")
}
}
#[cfg(test)]
mod tests {
use super::replace_rollout_summary_file_in_raw_memory;
use super::rollout_summary_file_stem;
use super::rollout_summary_file_stem_from_parts;
use chrono::TimeZone;
use chrono::Utc;
use codex_protocol::ThreadId;
use codex_state::Stage1Output;
use pretty_assertions::assert_eq;
use std::path::PathBuf;
const FIXED_PREFIX: &str = "2025-02-11T15-35-19-jqmb";
fn stage1_output_with_slug(rollout_slug: Option<&str>) -> Stage1Output {
fn stage1_output_with_slug(thread_id: ThreadId, rollout_slug: Option<&str>) -> Stage1Output {
Stage1Output {
thread_id: ThreadId::new(),
thread_id,
source_updated_at: Utc.timestamp_opt(123, 0).single().expect("timestamp"),
raw_memory: "raw memory".to_string(),
rollout_summary: "summary".to_string(),
@@ -211,31 +293,112 @@ mod tests {
}
}
#[test]
fn rollout_summary_file_stem_uses_thread_id_when_slug_missing() {
let memory = stage1_output_with_slug(None);
let thread_id = memory.thread_id.to_string();
assert_eq!(rollout_summary_file_stem(&memory), thread_id);
fn fixed_thread_id() -> ThreadId {
ThreadId::try_from("0194f5a6-89ab-7cde-8123-456789abcdef").expect("valid thread id")
}
#[test]
fn rollout_summary_file_stem_sanitizes_and_truncates_slug() {
let memory =
stage1_output_with_slug(Some("Unsafe Slug/With Spaces & Symbols + EXTRA_LONG_12345"));
let thread_id = memory.thread_id.to_string();
fn rollout_summary_file_stem_uses_uuid_timestamp_and_hash_when_slug_missing() {
let thread_id = fixed_thread_id();
let memory = stage1_output_with_slug(thread_id, None);
assert_eq!(rollout_summary_file_stem(&memory), FIXED_PREFIX);
assert_eq!(
rollout_summary_file_stem(&memory),
format!("{thread_id}-unsafe_slug_with_spa")
rollout_summary_file_stem_from_parts(
memory.thread_id,
memory.source_updated_at,
memory.rollout_slug.as_deref(),
),
FIXED_PREFIX
);
}
#[test]
fn rollout_summary_file_stem_uses_thread_id_when_slug_is_empty() {
let memory = stage1_output_with_slug(Some(""));
let thread_id = memory.thread_id.to_string();
fn rollout_summary_file_stem_sanitizes_and_truncates_slug() {
let thread_id = fixed_thread_id();
let memory = stage1_output_with_slug(
thread_id,
Some("Unsafe Slug/With Spaces & Symbols + EXTRA_LONG_12345"),
);
assert_eq!(rollout_summary_file_stem(&memory), thread_id);
assert_eq!(
rollout_summary_file_stem(&memory),
format!("{FIXED_PREFIX}-unsafe_slug_with_spa")
);
}
#[test]
fn rollout_summary_file_stem_uses_uuid_timestamp_and_hash_when_slug_is_empty() {
let thread_id = fixed_thread_id();
let memory = stage1_output_with_slug(thread_id, Some(""));
assert_eq!(rollout_summary_file_stem(&memory), FIXED_PREFIX);
}
#[test]
fn replace_rollout_summary_file_in_raw_memory_replaces_existing_value() {
let raw_memory = "\
---
rollout_summary_file: wrong.md
description: demo
keywords: one, two
---
- body line";
let normalized = replace_rollout_summary_file_in_raw_memory(
raw_memory,
"2025-01-01T00-00-00-abcd-demo.md",
);
assert_eq!(
normalized,
"\
---
rollout_summary_file: 2025-01-01T00-00-00-abcd-demo.md
description: demo
keywords: one, two
---
- body line"
);
}
#[test]
fn replace_rollout_summary_file_in_raw_memory_replaces_placeholder() {
let raw_memory = "\
---
rollout_summary_file: <system_populated_file.md>
description: demo
keywords: one, two
---
- body line";
let normalized = replace_rollout_summary_file_in_raw_memory(
raw_memory,
"2025-01-01T00-00-00-abcd-demo.md",
);
assert_eq!(
normalized,
"\
---
rollout_summary_file: 2025-01-01T00-00-00-abcd-demo.md
description: demo
keywords: one, two
---
- body line"
);
}
#[test]
fn replace_rollout_summary_file_in_raw_memory_leaves_text_without_field_unchanged() {
let raw_memory = "\
---
description: demo
keywords: one, two
---
- body line";
let normalized = replace_rollout_summary_file_in_raw_memory(
raw_memory,
"2025-01-01T00-00-00-abcd-demo.md",
);
assert_eq!(normalized, raw_memory);
}
}

View File

@@ -105,8 +105,28 @@ async fn sync_rollout_summaries_and_raw_memories_file_keeps_latest_memories_only
.await
.expect("rebuild raw memories");
assert!(keep_path.is_file());
assert!(!drop_path.exists());
assert!(
!tokio::fs::try_exists(&keep_path)
.await
.expect("check stale keep path"),
"sync should prune stale filename that used thread id only"
);
assert!(
!tokio::fs::try_exists(&drop_path)
.await
.expect("check stale drop path"),
"sync should prune stale filename for dropped thread"
);
let mut dir = tokio::fs::read_dir(rollout_summaries_dir(&root))
.await
.expect("open rollout summaries dir");
let mut files = Vec::new();
while let Some(entry) = dir.next_entry().await.expect("read dir entry") {
files.push(entry.file_name().to_string_lossy().to_string());
}
files.sort_unstable();
assert_eq!(files.len(), 1);
let raw_memories = tokio::fs::read_to_string(raw_memories_file(&root))
.await
@@ -117,7 +137,7 @@ async fn sync_rollout_summaries_and_raw_memories_file_keeps_latest_memories_only
}
#[tokio::test]
async fn sync_rollout_summaries_uses_thread_id_and_sanitized_slug_filename() {
async fn sync_rollout_summaries_uses_timestamp_hash_and_sanitized_slug_filename() {
let dir = tempdir().expect("tempdir");
let root = dir.path().join("memory");
ensure_layout(&root).await.expect("ensure layout");
@@ -165,9 +185,24 @@ async fn sync_rollout_summaries_uses_thread_id_and_sanitized_slug_filename() {
let stem = file_name
.strip_suffix(".md")
.expect("rollout summary file should end with .md");
let slug = stem
.strip_prefix(&format!("{thread_id}-"))
.expect("rollout summary filename should include thread id and slug");
let (prefix, slug) = stem
.rsplit_once('-')
.expect("rollout summary filename should include slug");
let (timestamp, short_hash) = prefix
.rsplit_once('-')
.expect("rollout summary filename should include short hash");
assert_eq!(timestamp.len(), 19, "timestamp should be second precision");
let parsed_timestamp = chrono::NaiveDateTime::parse_from_str(timestamp, "%Y-%m-%dT%H-%M-%S");
assert!(
parsed_timestamp.is_ok(),
"timestamp should use YYYY-MM-DDThh-mm-ss"
);
assert_eq!(short_hash.len(), 4, "short hash should be exactly 4 chars");
assert!(
short_hash.chars().all(|ch| ch.is_ascii_alphanumeric()),
"short hash should use only alphanumeric chars"
);
assert!(slug.len() <= 20, "slug should be capped at 20 chars");
assert!(
slug.chars()
@@ -193,6 +228,67 @@ async fn sync_rollout_summaries_uses_thread_id_and_sanitized_slug_filename() {
);
}
#[tokio::test]
async fn rebuild_raw_memories_file_rewrites_rollout_summary_file_to_canonical_filename() {
let dir = tempdir().expect("tempdir");
let root = dir.path().join("memory");
ensure_layout(&root).await.expect("ensure layout");
let thread_id =
ThreadId::try_from("0194f5a6-89ab-7cde-8123-456789abcdef").expect("valid thread id");
let memories = vec![Stage1Output {
thread_id,
source_updated_at: Utc.timestamp_opt(200, 0).single().expect("timestamp"),
raw_memory: "\
---
rollout_summary_file: state_migration_uniqueness_test.md
description: Added a migration test
keywords: codex-state, migrations
---
- Kept details."
.to_string(),
rollout_summary: "short summary".to_string(),
rollout_slug: Some("Unsafe Slug/With Spaces & Symbols + EXTRA_LONG_12345".to_string()),
cwd: PathBuf::from("/tmp/workspace"),
generated_at: Utc.timestamp_opt(201, 0).single().expect("timestamp"),
}];
sync_rollout_summaries_from_memories(
&root,
&memories,
DEFAULT_MEMORIES_MAX_RAW_MEMORIES_FOR_GLOBAL,
)
.await
.expect("sync rollout summaries");
rebuild_raw_memories_file_from_memories(
&root,
&memories,
DEFAULT_MEMORIES_MAX_RAW_MEMORIES_FOR_GLOBAL,
)
.await
.expect("rebuild raw memories");
let mut dir = tokio::fs::read_dir(rollout_summaries_dir(&root))
.await
.expect("open rollout summaries dir");
let mut files = Vec::new();
while let Some(entry) = dir.next_entry().await.expect("read dir entry") {
files.push(entry.file_name().to_string_lossy().to_string());
}
files.sort_unstable();
assert_eq!(files.len(), 1);
let canonical_rollout_summary_file = &files[0];
let raw_memories = tokio::fs::read_to_string(raw_memories_file(&root))
.await
.expect("read raw memories");
assert!(raw_memories.contains(&format!(
"rollout_summary_file: {canonical_rollout_summary_file}"
)));
assert!(!raw_memories.contains("rollout_summary_file: state_migration_uniqueness_test.md"));
assert!(raw_memories.contains("description: Added a migration test"));
}
mod phase2 {
use crate::CodexAuth;
use crate::ThreadManager;