mirror of
https://github.com/openai/codex.git
synced 2026-04-24 14:45:27 +00:00
feat: better slug for rollout summaries (#12135)
This commit is contained in:
@@ -3,6 +3,7 @@ use std::collections::HashSet;
|
||||
use std::fmt::Write as _;
|
||||
use std::path::Path;
|
||||
use tracing::warn;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::memories::ensure_layout;
|
||||
use crate::memories::raw_memories_file;
|
||||
@@ -82,7 +83,10 @@ async fn rebuild_raw_memories_file(
|
||||
.map_err(raw_memories_format_error)?;
|
||||
writeln!(body, "cwd: {}", memory.cwd.display()).map_err(raw_memories_format_error)?;
|
||||
writeln!(body).map_err(raw_memories_format_error)?;
|
||||
body.push_str(memory.raw_memory.trim());
|
||||
let rollout_summary_file = format!("{}.md", rollout_summary_file_stem(memory));
|
||||
let raw_memory =
|
||||
replace_rollout_summary_file_in_raw_memory(&memory.raw_memory, &rollout_summary_file);
|
||||
body.push_str(raw_memory.trim());
|
||||
body.push_str("\n\n");
|
||||
}
|
||||
|
||||
@@ -157,12 +161,87 @@ fn rollout_summary_format_error(err: std::fmt::Error) -> std::io::Error {
|
||||
std::io::Error::other(format!("format rollout summary: {err}"))
|
||||
}
|
||||
|
||||
fn rollout_summary_file_stem(memory: &Stage1Output) -> String {
|
||||
const ROLLOUT_SLUG_MAX_LEN: usize = 20;
|
||||
fn replace_rollout_summary_file_in_raw_memory(
|
||||
raw_memory: &str,
|
||||
rollout_summary_file: &str,
|
||||
) -> String {
|
||||
const ROLLOUT_SUMMARY_PREFIX: &str = "rollout_summary_file: ";
|
||||
|
||||
let thread_id = memory.thread_id.to_string();
|
||||
let Some(raw_slug) = memory.rollout_slug.as_deref() else {
|
||||
return thread_id;
|
||||
let replacement = format!("rollout_summary_file: {rollout_summary_file}");
|
||||
raw_memory
|
||||
.split('\n')
|
||||
.map(|line| {
|
||||
if line.starts_with(ROLLOUT_SUMMARY_PREFIX) {
|
||||
replacement.as_str()
|
||||
} else {
|
||||
line
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n")
|
||||
}
|
||||
|
||||
pub(crate) fn rollout_summary_file_stem(memory: &Stage1Output) -> String {
|
||||
rollout_summary_file_stem_from_parts(
|
||||
memory.thread_id,
|
||||
memory.source_updated_at,
|
||||
memory.rollout_slug.as_deref(),
|
||||
)
|
||||
}
|
||||
|
||||
pub(super) fn rollout_summary_file_stem_from_parts(
|
||||
thread_id: codex_protocol::ThreadId,
|
||||
source_updated_at: chrono::DateTime<chrono::Utc>,
|
||||
rollout_slug: Option<&str>,
|
||||
) -> String {
|
||||
const ROLLOUT_SLUG_MAX_LEN: usize = 20;
|
||||
const SHORT_HASH_ALPHABET: &[u8; 62] =
|
||||
b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
||||
const SHORT_HASH_SPACE: u32 = 14_776_336;
|
||||
|
||||
let thread_id = thread_id.to_string();
|
||||
let (timestamp_fragment, short_hash_seed) = match Uuid::parse_str(&thread_id) {
|
||||
Ok(thread_uuid) => {
|
||||
let timestamp = thread_uuid
|
||||
.get_timestamp()
|
||||
.and_then(|uuid_timestamp| {
|
||||
let (seconds, nanos) = uuid_timestamp.to_unix();
|
||||
i64::try_from(seconds).ok().and_then(|secs| {
|
||||
chrono::DateTime::<chrono::Utc>::from_timestamp(secs, nanos)
|
||||
})
|
||||
})
|
||||
.unwrap_or(source_updated_at);
|
||||
let short_hash_seed = (thread_uuid.as_u128() & 0xFFFF_FFFF) as u32;
|
||||
(
|
||||
timestamp.format("%Y-%m-%dT%H-%M-%S").to_string(),
|
||||
short_hash_seed,
|
||||
)
|
||||
}
|
||||
Err(_) => {
|
||||
let mut short_hash_seed = 0u32;
|
||||
for byte in thread_id.bytes() {
|
||||
short_hash_seed = short_hash_seed
|
||||
.wrapping_mul(31)
|
||||
.wrapping_add(u32::from(byte));
|
||||
}
|
||||
(
|
||||
source_updated_at.format("%Y-%m-%dT%H-%M-%S").to_string(),
|
||||
short_hash_seed,
|
||||
)
|
||||
}
|
||||
};
|
||||
let mut short_hash_value = short_hash_seed % SHORT_HASH_SPACE;
|
||||
let mut short_hash_chars = ['0'; 4];
|
||||
for idx in (0..short_hash_chars.len()).rev() {
|
||||
let alphabet_idx = (short_hash_value % SHORT_HASH_ALPHABET.len() as u32) as usize;
|
||||
short_hash_chars[idx] = SHORT_HASH_ALPHABET[alphabet_idx] as char;
|
||||
short_hash_value /= SHORT_HASH_ALPHABET.len() as u32;
|
||||
}
|
||||
let short_hash: String = short_hash_chars.iter().collect();
|
||||
let file_prefix = format!("{timestamp_fragment}-{short_hash}");
|
||||
|
||||
let Some(raw_slug) = rollout_slug else {
|
||||
return file_prefix;
|
||||
};
|
||||
|
||||
let mut slug = String::with_capacity(ROLLOUT_SLUG_MAX_LEN);
|
||||
@@ -183,25 +262,28 @@ fn rollout_summary_file_stem(memory: &Stage1Output) -> String {
|
||||
}
|
||||
|
||||
if slug.is_empty() {
|
||||
thread_id
|
||||
file_prefix
|
||||
} else {
|
||||
format!("{thread_id}-{slug}")
|
||||
format!("{file_prefix}-{slug}")
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::replace_rollout_summary_file_in_raw_memory;
|
||||
use super::rollout_summary_file_stem;
|
||||
use super::rollout_summary_file_stem_from_parts;
|
||||
use chrono::TimeZone;
|
||||
use chrono::Utc;
|
||||
use codex_protocol::ThreadId;
|
||||
use codex_state::Stage1Output;
|
||||
use pretty_assertions::assert_eq;
|
||||
use std::path::PathBuf;
|
||||
const FIXED_PREFIX: &str = "2025-02-11T15-35-19-jqmb";
|
||||
|
||||
fn stage1_output_with_slug(rollout_slug: Option<&str>) -> Stage1Output {
|
||||
fn stage1_output_with_slug(thread_id: ThreadId, rollout_slug: Option<&str>) -> Stage1Output {
|
||||
Stage1Output {
|
||||
thread_id: ThreadId::new(),
|
||||
thread_id,
|
||||
source_updated_at: Utc.timestamp_opt(123, 0).single().expect("timestamp"),
|
||||
raw_memory: "raw memory".to_string(),
|
||||
rollout_summary: "summary".to_string(),
|
||||
@@ -211,31 +293,112 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rollout_summary_file_stem_uses_thread_id_when_slug_missing() {
|
||||
let memory = stage1_output_with_slug(None);
|
||||
let thread_id = memory.thread_id.to_string();
|
||||
|
||||
assert_eq!(rollout_summary_file_stem(&memory), thread_id);
|
||||
fn fixed_thread_id() -> ThreadId {
|
||||
ThreadId::try_from("0194f5a6-89ab-7cde-8123-456789abcdef").expect("valid thread id")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rollout_summary_file_stem_sanitizes_and_truncates_slug() {
|
||||
let memory =
|
||||
stage1_output_with_slug(Some("Unsafe Slug/With Spaces & Symbols + EXTRA_LONG_12345"));
|
||||
let thread_id = memory.thread_id.to_string();
|
||||
fn rollout_summary_file_stem_uses_uuid_timestamp_and_hash_when_slug_missing() {
|
||||
let thread_id = fixed_thread_id();
|
||||
let memory = stage1_output_with_slug(thread_id, None);
|
||||
|
||||
assert_eq!(rollout_summary_file_stem(&memory), FIXED_PREFIX);
|
||||
assert_eq!(
|
||||
rollout_summary_file_stem(&memory),
|
||||
format!("{thread_id}-unsafe_slug_with_spa")
|
||||
rollout_summary_file_stem_from_parts(
|
||||
memory.thread_id,
|
||||
memory.source_updated_at,
|
||||
memory.rollout_slug.as_deref(),
|
||||
),
|
||||
FIXED_PREFIX
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rollout_summary_file_stem_uses_thread_id_when_slug_is_empty() {
|
||||
let memory = stage1_output_with_slug(Some(""));
|
||||
let thread_id = memory.thread_id.to_string();
|
||||
fn rollout_summary_file_stem_sanitizes_and_truncates_slug() {
|
||||
let thread_id = fixed_thread_id();
|
||||
let memory = stage1_output_with_slug(
|
||||
thread_id,
|
||||
Some("Unsafe Slug/With Spaces & Symbols + EXTRA_LONG_12345"),
|
||||
);
|
||||
|
||||
assert_eq!(rollout_summary_file_stem(&memory), thread_id);
|
||||
assert_eq!(
|
||||
rollout_summary_file_stem(&memory),
|
||||
format!("{FIXED_PREFIX}-unsafe_slug_with_spa")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rollout_summary_file_stem_uses_uuid_timestamp_and_hash_when_slug_is_empty() {
|
||||
let thread_id = fixed_thread_id();
|
||||
let memory = stage1_output_with_slug(thread_id, Some(""));
|
||||
|
||||
assert_eq!(rollout_summary_file_stem(&memory), FIXED_PREFIX);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replace_rollout_summary_file_in_raw_memory_replaces_existing_value() {
|
||||
let raw_memory = "\
|
||||
---
|
||||
rollout_summary_file: wrong.md
|
||||
description: demo
|
||||
keywords: one, two
|
||||
---
|
||||
- body line";
|
||||
let normalized = replace_rollout_summary_file_in_raw_memory(
|
||||
raw_memory,
|
||||
"2025-01-01T00-00-00-abcd-demo.md",
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
normalized,
|
||||
"\
|
||||
---
|
||||
rollout_summary_file: 2025-01-01T00-00-00-abcd-demo.md
|
||||
description: demo
|
||||
keywords: one, two
|
||||
---
|
||||
- body line"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replace_rollout_summary_file_in_raw_memory_replaces_placeholder() {
|
||||
let raw_memory = "\
|
||||
---
|
||||
rollout_summary_file: <system_populated_file.md>
|
||||
description: demo
|
||||
keywords: one, two
|
||||
---
|
||||
- body line";
|
||||
let normalized = replace_rollout_summary_file_in_raw_memory(
|
||||
raw_memory,
|
||||
"2025-01-01T00-00-00-abcd-demo.md",
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
normalized,
|
||||
"\
|
||||
---
|
||||
rollout_summary_file: 2025-01-01T00-00-00-abcd-demo.md
|
||||
description: demo
|
||||
keywords: one, two
|
||||
---
|
||||
- body line"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replace_rollout_summary_file_in_raw_memory_leaves_text_without_field_unchanged() {
|
||||
let raw_memory = "\
|
||||
---
|
||||
description: demo
|
||||
keywords: one, two
|
||||
---
|
||||
- body line";
|
||||
let normalized = replace_rollout_summary_file_in_raw_memory(
|
||||
raw_memory,
|
||||
"2025-01-01T00-00-00-abcd-demo.md",
|
||||
);
|
||||
assert_eq!(normalized, raw_memory);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -105,8 +105,28 @@ async fn sync_rollout_summaries_and_raw_memories_file_keeps_latest_memories_only
|
||||
.await
|
||||
.expect("rebuild raw memories");
|
||||
|
||||
assert!(keep_path.is_file());
|
||||
assert!(!drop_path.exists());
|
||||
assert!(
|
||||
!tokio::fs::try_exists(&keep_path)
|
||||
.await
|
||||
.expect("check stale keep path"),
|
||||
"sync should prune stale filename that used thread id only"
|
||||
);
|
||||
assert!(
|
||||
!tokio::fs::try_exists(&drop_path)
|
||||
.await
|
||||
.expect("check stale drop path"),
|
||||
"sync should prune stale filename for dropped thread"
|
||||
);
|
||||
|
||||
let mut dir = tokio::fs::read_dir(rollout_summaries_dir(&root))
|
||||
.await
|
||||
.expect("open rollout summaries dir");
|
||||
let mut files = Vec::new();
|
||||
while let Some(entry) = dir.next_entry().await.expect("read dir entry") {
|
||||
files.push(entry.file_name().to_string_lossy().to_string());
|
||||
}
|
||||
files.sort_unstable();
|
||||
assert_eq!(files.len(), 1);
|
||||
|
||||
let raw_memories = tokio::fs::read_to_string(raw_memories_file(&root))
|
||||
.await
|
||||
@@ -117,7 +137,7 @@ async fn sync_rollout_summaries_and_raw_memories_file_keeps_latest_memories_only
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn sync_rollout_summaries_uses_thread_id_and_sanitized_slug_filename() {
|
||||
async fn sync_rollout_summaries_uses_timestamp_hash_and_sanitized_slug_filename() {
|
||||
let dir = tempdir().expect("tempdir");
|
||||
let root = dir.path().join("memory");
|
||||
ensure_layout(&root).await.expect("ensure layout");
|
||||
@@ -165,9 +185,24 @@ async fn sync_rollout_summaries_uses_thread_id_and_sanitized_slug_filename() {
|
||||
let stem = file_name
|
||||
.strip_suffix(".md")
|
||||
.expect("rollout summary file should end with .md");
|
||||
let slug = stem
|
||||
.strip_prefix(&format!("{thread_id}-"))
|
||||
.expect("rollout summary filename should include thread id and slug");
|
||||
let (prefix, slug) = stem
|
||||
.rsplit_once('-')
|
||||
.expect("rollout summary filename should include slug");
|
||||
let (timestamp, short_hash) = prefix
|
||||
.rsplit_once('-')
|
||||
.expect("rollout summary filename should include short hash");
|
||||
|
||||
assert_eq!(timestamp.len(), 19, "timestamp should be second precision");
|
||||
let parsed_timestamp = chrono::NaiveDateTime::parse_from_str(timestamp, "%Y-%m-%dT%H-%M-%S");
|
||||
assert!(
|
||||
parsed_timestamp.is_ok(),
|
||||
"timestamp should use YYYY-MM-DDThh-mm-ss"
|
||||
);
|
||||
assert_eq!(short_hash.len(), 4, "short hash should be exactly 4 chars");
|
||||
assert!(
|
||||
short_hash.chars().all(|ch| ch.is_ascii_alphanumeric()),
|
||||
"short hash should use only alphanumeric chars"
|
||||
);
|
||||
assert!(slug.len() <= 20, "slug should be capped at 20 chars");
|
||||
assert!(
|
||||
slug.chars()
|
||||
@@ -193,6 +228,67 @@ async fn sync_rollout_summaries_uses_thread_id_and_sanitized_slug_filename() {
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn rebuild_raw_memories_file_rewrites_rollout_summary_file_to_canonical_filename() {
|
||||
let dir = tempdir().expect("tempdir");
|
||||
let root = dir.path().join("memory");
|
||||
ensure_layout(&root).await.expect("ensure layout");
|
||||
|
||||
let thread_id =
|
||||
ThreadId::try_from("0194f5a6-89ab-7cde-8123-456789abcdef").expect("valid thread id");
|
||||
let memories = vec![Stage1Output {
|
||||
thread_id,
|
||||
source_updated_at: Utc.timestamp_opt(200, 0).single().expect("timestamp"),
|
||||
raw_memory: "\
|
||||
---
|
||||
rollout_summary_file: state_migration_uniqueness_test.md
|
||||
description: Added a migration test
|
||||
keywords: codex-state, migrations
|
||||
---
|
||||
- Kept details."
|
||||
.to_string(),
|
||||
rollout_summary: "short summary".to_string(),
|
||||
rollout_slug: Some("Unsafe Slug/With Spaces & Symbols + EXTRA_LONG_12345".to_string()),
|
||||
cwd: PathBuf::from("/tmp/workspace"),
|
||||
generated_at: Utc.timestamp_opt(201, 0).single().expect("timestamp"),
|
||||
}];
|
||||
|
||||
sync_rollout_summaries_from_memories(
|
||||
&root,
|
||||
&memories,
|
||||
DEFAULT_MEMORIES_MAX_RAW_MEMORIES_FOR_GLOBAL,
|
||||
)
|
||||
.await
|
||||
.expect("sync rollout summaries");
|
||||
rebuild_raw_memories_file_from_memories(
|
||||
&root,
|
||||
&memories,
|
||||
DEFAULT_MEMORIES_MAX_RAW_MEMORIES_FOR_GLOBAL,
|
||||
)
|
||||
.await
|
||||
.expect("rebuild raw memories");
|
||||
|
||||
let mut dir = tokio::fs::read_dir(rollout_summaries_dir(&root))
|
||||
.await
|
||||
.expect("open rollout summaries dir");
|
||||
let mut files = Vec::new();
|
||||
while let Some(entry) = dir.next_entry().await.expect("read dir entry") {
|
||||
files.push(entry.file_name().to_string_lossy().to_string());
|
||||
}
|
||||
files.sort_unstable();
|
||||
assert_eq!(files.len(), 1);
|
||||
let canonical_rollout_summary_file = &files[0];
|
||||
|
||||
let raw_memories = tokio::fs::read_to_string(raw_memories_file(&root))
|
||||
.await
|
||||
.expect("read raw memories");
|
||||
assert!(raw_memories.contains(&format!(
|
||||
"rollout_summary_file: {canonical_rollout_summary_file}"
|
||||
)));
|
||||
assert!(!raw_memories.contains("rollout_summary_file: state_migration_uniqueness_test.md"));
|
||||
assert!(raw_memories.contains("description: Added a migration test"));
|
||||
}
|
||||
|
||||
mod phase2 {
|
||||
use crate::CodexAuth;
|
||||
use crate::ThreadManager;
|
||||
|
||||
Reference in New Issue
Block a user