Compare commits

...

6 Commits

Author SHA1 Message Date
Ahmed Ibrahim
6b004b74ca Preserve memory trace I/O errors
Co-authored-by: Codex <noreply@openai.com>
2026-03-18 05:44:23 +00:00
Ahmed Ibrahim
f5dc4dc801 split memories out of codex-core
Co-authored-by: Codex <noreply@openai.com>
2026-03-18 04:56:44 +00:00
Ahmed Ibrahim
1cf68f940c codex: fix CI failure on PR #15010
Co-authored-by: Codex <noreply@openai.com>
2026-03-18 04:05:05 +00:00
Ahmed Ibrahim
0f406c3de0 codex: address PR review feedback (#15010)
Co-authored-by: Codex <noreply@openai.com>
2026-03-18 03:49:51 +00:00
Ahmed Ibrahim
8b3fc35e0b fix: unblock config loader split CI
Co-authored-by: Codex <noreply@openai.com>
2026-03-18 03:34:46 +00:00
Ahmed Ibrahim
38a28973a8 refactor: move config loader internals into codex-config
Extract config-layer IO and managed requirements loading into codex-config so codex-core keeps a thinner config loader facade.

Co-authored-by: Codex <noreply@openai.com>
2026-03-18 02:30:22 +00:00
26 changed files with 1259 additions and 954 deletions

23
codex-rs/Cargo.lock generated
View File

@@ -1786,10 +1786,12 @@ name = "codex-config"
version = "0.0.0"
dependencies = [
"anyhow",
"base64 0.22.1",
"codex-app-server-protocol",
"codex-execpolicy",
"codex-protocol",
"codex-utils-absolute-path",
"core-foundation 0.9.4",
"futures",
"multimap",
"pretty_assertions",
@@ -1802,6 +1804,7 @@ dependencies = [
"toml 0.9.11+spec-1.1.0",
"toml_edit 0.24.0+spec-1.1.0",
"tracing",
"windows-sys 0.52.0",
]
[[package]]
@@ -1846,6 +1849,7 @@ dependencies = [
"codex-git",
"codex-hooks",
"codex-keyring-store",
"codex-memories",
"codex-network-proxy",
"codex-otel",
"codex-protocol",
@@ -1866,7 +1870,6 @@ dependencies = [
"codex-utils-stream-parser",
"codex-utils-string",
"codex-windows-sandbox",
"core-foundation 0.9.4",
"core_test_support",
"csv",
"ctor 0.6.3",
@@ -1926,7 +1929,6 @@ dependencies = [
"walkdir",
"which",
"wildmatch",
"windows-sys 0.52.0",
"wiremock",
"zip",
"zstd",
@@ -2193,6 +2195,23 @@ dependencies = [
"wiremock",
]
[[package]]
name = "codex-memories"
version = "0.0.0"
dependencies = [
"anyhow",
"chrono",
"codex-api",
"codex-protocol",
"codex-state",
"pretty_assertions",
"serde_json",
"tempfile",
"tokio",
"tracing",
"uuid",
]
[[package]]
name = "codex-network-proxy"
version = "0.0.0"

View File

@@ -32,6 +32,7 @@ members = [
"linux-sandbox",
"lmstudio",
"login",
"memories",
"mcp-server",
"network-proxy",
"ollama",
@@ -114,6 +115,7 @@ codex-keyring-store = { path = "keyring-store" }
codex-linux-sandbox = { path = "linux-sandbox" }
codex-lmstudio = { path = "lmstudio" }
codex-login = { path = "login" }
codex-memories = { path = "memories" }
codex-mcp-server = { path = "mcp-server" }
codex-network-proxy = { path = "network-proxy" }
codex-ollama = { path = "ollama" }

View File

@@ -4,10 +4,14 @@ version.workspace = true
edition.workspace = true
license.workspace = true
[lib]
doctest = false
[lints]
workspace = true
[dependencies]
base64 = { workspace = true }
codex-app-server-protocol = { workspace = true }
codex-execpolicy = { workspace = true }
codex-protocol = { workspace = true }
@@ -24,6 +28,16 @@ toml = { workspace = true }
toml_edit = { workspace = true }
tracing = { workspace = true }
[target.'cfg(target_os = "macos")'.dependencies]
core-foundation = "0.9"
[target.'cfg(target_os = "windows")'.dependencies]
windows-sys = { version = "0.52", features = [
"Win32_Foundation",
"Win32_System_Com",
"Win32_UI_Shell",
] }
[dev-dependencies]
anyhow = { workspace = true }
pretty_assertions = { workspace = true }

View File

@@ -1,10 +1,10 @@
use super::LoaderOverrides;
use crate::LoaderOverrides;
use crate::config_error_from_toml;
use crate::io_error_from_config_error;
#[cfg(target_os = "macos")]
use super::macos::ManagedAdminConfigLayer;
use crate::macos::ManagedAdminConfigLayer;
#[cfg(target_os = "macos")]
use super::macos::load_managed_admin_config_layer;
use codex_config::config_error_from_toml;
use codex_config::io_error_from_config_error;
use crate::macos::load_managed_admin_config_layer;
use codex_utils_absolute_path::AbsolutePathBuf;
use std::io;
use std::path::Path;
@@ -16,26 +16,26 @@ use toml::Value as TomlValue;
const CODEX_MANAGED_CONFIG_SYSTEM_PATH: &str = "/etc/codex/managed_config.toml";
#[derive(Debug, Clone)]
pub(super) struct MangedConfigFromFile {
pub struct ManagedConfigFromFile {
pub managed_config: TomlValue,
pub file: AbsolutePathBuf,
}
#[derive(Debug, Clone)]
pub(super) struct ManagedConfigFromMdm {
pub struct ManagedConfigFromMdm {
pub managed_config: TomlValue,
pub raw_toml: String,
}
#[derive(Debug, Clone)]
pub(super) struct LoadedConfigLayers {
pub struct LoadedConfigLayers {
/// If present, data read from a file such as `/etc/codex/managed_config.toml`.
pub managed_config: Option<MangedConfigFromFile>,
pub managed_config: Option<ManagedConfigFromFile>,
/// If present, data read from managed preferences (macOS only).
pub managed_config_from_mdm: Option<ManagedConfigFromMdm>,
}
pub(super) async fn load_config_layers_internal(
pub async fn load_config_layers_internal(
codex_home: &Path,
overrides: LoaderOverrides,
) -> io::Result<LoadedConfigLayers> {
@@ -59,7 +59,7 @@ pub(super) async fn load_config_layers_internal(
let managed_config =
read_config_from_path(&managed_config_path, /*log_missing_as_info*/ false)
.await?
.map(|managed_config| MangedConfigFromFile {
.map(|managed_config| ManagedConfigFromFile {
managed_config,
file: managed_config_path.clone(),
});
@@ -88,7 +88,7 @@ fn map_managed_admin_layer(layer: ManagedAdminConfigLayer) -> ManagedConfigFromM
}
}
pub(super) async fn read_config_from_path(
async fn read_config_from_path(
path: impl AsRef<Path>,
log_missing_as_info: bool,
) -> io::Result<Option<TomlValue>> {
@@ -120,8 +120,7 @@ pub(super) async fn read_config_from_path(
}
}
/// Return the default managed config path.
pub(super) fn managed_config_default_path(codex_home: &Path) -> PathBuf {
fn managed_config_default_path(codex_home: &Path) -> PathBuf {
#[cfg(unix)]
{
let _ = codex_home;

View File

@@ -3,6 +3,10 @@ mod config_requirements;
mod constraint;
mod diagnostics;
mod fingerprint;
mod layer_io;
mod loader;
#[cfg(target_os = "macos")]
mod macos;
mod merge;
mod overrides;
mod requirements_exec_policy;
@@ -44,6 +48,15 @@ pub use diagnostics::format_config_error;
pub use diagnostics::format_config_error_with_source;
pub use diagnostics::io_error_from_config_error;
pub use fingerprint::version_for_toml;
pub use layer_io::LoadedConfigLayers;
pub use layer_io::ManagedConfigFromFile;
pub use layer_io::ManagedConfigFromMdm;
pub use layer_io::load_config_layers_internal;
pub use loader::load_managed_admin_requirements;
pub use loader::load_requirements_from_legacy_scheme;
pub use loader::load_requirements_toml;
pub use loader::system_config_toml_file;
pub use loader::system_requirements_toml_file;
pub use merge::merge_toml_values;
pub use overrides::build_cli_overrides_layer;
pub use requirements_exec_policy::RequirementsExecPolicy;

View File

@@ -0,0 +1,236 @@
use crate::ConfigRequirementsToml;
use crate::ConfigRequirementsWithSources;
use crate::LoadedConfigLayers;
use crate::RequirementSource;
#[cfg(target_os = "macos")]
use crate::macos::load_managed_admin_requirements_toml;
use codex_protocol::config_types::SandboxMode;
use codex_protocol::protocol::AskForApproval;
use codex_utils_absolute_path::AbsolutePathBuf;
use serde::Deserialize;
use std::io;
use std::path::Path;
#[cfg(windows)]
use std::path::PathBuf;
#[cfg(unix)]
pub const SYSTEM_CONFIG_TOML_FILE_UNIX: &str = "/etc/codex/config.toml";
#[cfg(windows)]
const DEFAULT_PROGRAM_DATA_DIR_WINDOWS: &str = r"C:\ProgramData";
pub async fn load_requirements_toml(
config_requirements_toml: &mut ConfigRequirementsWithSources,
requirements_toml_file: impl AsRef<Path>,
) -> io::Result<()> {
let requirements_toml_file =
AbsolutePathBuf::from_absolute_path(requirements_toml_file.as_ref())?;
match tokio::fs::read_to_string(&requirements_toml_file).await {
Ok(contents) => {
let requirements_config: ConfigRequirementsToml =
toml::from_str(&contents).map_err(|err| {
io::Error::new(
io::ErrorKind::InvalidData,
format!(
"Error parsing requirements file {}: {err}",
requirements_toml_file.as_ref().display(),
),
)
})?;
config_requirements_toml.merge_unset_fields(
RequirementSource::SystemRequirementsToml {
file: requirements_toml_file.clone(),
},
requirements_config,
);
}
Err(err) if err.kind() == io::ErrorKind::NotFound => {}
Err(err) => {
return Err(io::Error::new(
err.kind(),
format!(
"Failed to read requirements file {}: {err}",
requirements_toml_file.as_ref().display(),
),
));
}
}
Ok(())
}
pub async fn load_managed_admin_requirements(
config_requirements_toml: &mut ConfigRequirementsWithSources,
managed_config_requirements_base64: Option<&str>,
) -> io::Result<()> {
#[cfg(target_os = "macos")]
{
load_managed_admin_requirements_toml(
config_requirements_toml,
managed_config_requirements_base64,
)
.await
}
#[cfg(not(target_os = "macos"))]
{
let _ = config_requirements_toml;
let _ = managed_config_requirements_base64;
Ok(())
}
}
#[cfg(unix)]
pub fn system_requirements_toml_file() -> io::Result<AbsolutePathBuf> {
AbsolutePathBuf::from_absolute_path(Path::new("/etc/codex/requirements.toml"))
}
#[cfg(windows)]
pub fn system_requirements_toml_file() -> io::Result<AbsolutePathBuf> {
windows_system_requirements_toml_file()
}
#[cfg(unix)]
pub fn system_config_toml_file() -> io::Result<AbsolutePathBuf> {
AbsolutePathBuf::from_absolute_path(Path::new(SYSTEM_CONFIG_TOML_FILE_UNIX))
}
#[cfg(windows)]
pub fn system_config_toml_file() -> io::Result<AbsolutePathBuf> {
windows_system_config_toml_file()
}
#[cfg(windows)]
fn windows_codex_system_dir() -> PathBuf {
let program_data = windows_program_data_dir_from_known_folder().unwrap_or_else(|err| {
tracing::warn!(
error = %err,
"Failed to resolve ProgramData known folder; using default path"
);
PathBuf::from(DEFAULT_PROGRAM_DATA_DIR_WINDOWS)
});
program_data.join("OpenAI").join("Codex")
}
#[cfg(windows)]
fn windows_system_requirements_toml_file() -> io::Result<AbsolutePathBuf> {
let requirements_toml_file = windows_codex_system_dir().join("requirements.toml");
AbsolutePathBuf::try_from(requirements_toml_file)
}
#[cfg(windows)]
fn windows_system_config_toml_file() -> io::Result<AbsolutePathBuf> {
let config_toml_file = windows_codex_system_dir().join("config.toml");
AbsolutePathBuf::try_from(config_toml_file)
}
#[cfg(windows)]
fn windows_program_data_dir_from_known_folder() -> io::Result<PathBuf> {
use std::ffi::OsString;
use std::os::windows::ffi::OsStringExt;
use windows_sys::Win32::System::Com::CoTaskMemFree;
use windows_sys::Win32::UI::Shell::FOLDERID_ProgramData;
use windows_sys::Win32::UI::Shell::KF_FLAG_DEFAULT;
use windows_sys::Win32::UI::Shell::SHGetKnownFolderPath;
let mut path_ptr = std::ptr::null_mut::<u16>();
let known_folder_flags = u32::try_from(KF_FLAG_DEFAULT).map_err(|_| {
io::Error::other(format!(
"KF_FLAG_DEFAULT did not fit in u32: {KF_FLAG_DEFAULT}"
))
})?;
let hr = unsafe {
SHGetKnownFolderPath(&FOLDERID_ProgramData, known_folder_flags, 0, &mut path_ptr)
};
if hr != 0 {
return Err(io::Error::other(format!(
"SHGetKnownFolderPath(FOLDERID_ProgramData) failed with HRESULT {hr:#010x}"
)));
}
if path_ptr.is_null() {
return Err(io::Error::other(
"SHGetKnownFolderPath(FOLDERID_ProgramData) returned a null pointer",
));
}
let path = unsafe {
let mut len = 0usize;
while *path_ptr.add(len) != 0 {
len += 1;
}
let wide = std::slice::from_raw_parts(path_ptr, len);
let path = PathBuf::from(OsString::from_wide(wide));
CoTaskMemFree(path_ptr.cast());
path
};
Ok(path)
}
pub async fn load_requirements_from_legacy_scheme(
config_requirements_toml: &mut ConfigRequirementsWithSources,
loaded_config_layers: LoadedConfigLayers,
) -> io::Result<()> {
let LoadedConfigLayers {
managed_config,
managed_config_from_mdm,
} = loaded_config_layers;
for (source, config) in managed_config_from_mdm
.map(|config| {
(
RequirementSource::LegacyManagedConfigTomlFromMdm,
config.managed_config,
)
})
.into_iter()
.chain(managed_config.map(|config| {
(
RequirementSource::LegacyManagedConfigTomlFromFile { file: config.file },
config.managed_config,
)
}))
{
let legacy_config: LegacyManagedConfigToml =
config.try_into().map_err(|err: toml::de::Error| {
io::Error::new(
io::ErrorKind::InvalidData,
format!("Failed to parse config requirements as TOML: {err}"),
)
})?;
let requirements = ConfigRequirementsToml::from(legacy_config);
config_requirements_toml.merge_unset_fields(source, requirements);
}
Ok(())
}
#[derive(Deserialize, Debug, Clone, Default, PartialEq)]
struct LegacyManagedConfigToml {
approval_policy: Option<AskForApproval>,
sandbox_mode: Option<SandboxMode>,
}
impl From<LegacyManagedConfigToml> for ConfigRequirementsToml {
fn from(legacy: LegacyManagedConfigToml) -> Self {
let mut config_requirements_toml = ConfigRequirementsToml::default();
let LegacyManagedConfigToml {
approval_policy,
sandbox_mode,
} = legacy;
if let Some(approval_policy) = approval_policy {
config_requirements_toml.allowed_approval_policies = Some(vec![approval_policy]);
}
if let Some(sandbox_mode) = sandbox_mode {
let required_mode = sandbox_mode.into();
let mut allowed_modes = vec![crate::SandboxModeRequirement::ReadOnly];
if required_mode != crate::SandboxModeRequirement::ReadOnly {
allowed_modes.push(required_mode);
}
config_requirements_toml.allowed_sandbox_modes = Some(allowed_modes);
}
config_requirements_toml
}
}

View File

@@ -1,6 +1,6 @@
use super::ConfigRequirementsToml;
use super::ConfigRequirementsWithSources;
use super::RequirementSource;
use crate::ConfigRequirementsToml;
use crate::ConfigRequirementsWithSources;
use crate::RequirementSource;
use base64::Engine;
use base64::prelude::BASE64_STANDARD;
use core_foundation::base::TCFType;
@@ -16,19 +16,19 @@ const MANAGED_PREFERENCES_CONFIG_KEY: &str = "config_toml_base64";
const MANAGED_PREFERENCES_REQUIREMENTS_KEY: &str = "requirements_toml_base64";
#[derive(Debug, Clone)]
pub(super) struct ManagedAdminConfigLayer {
pub struct ManagedAdminConfigLayer {
pub config: TomlValue,
pub raw_toml: String,
}
pub(super) fn managed_preferences_requirements_source() -> RequirementSource {
fn managed_preferences_requirements_source() -> RequirementSource {
RequirementSource::MdmManagedPreferences {
domain: MANAGED_PREFERENCES_APPLICATION_ID.to_string(),
key: MANAGED_PREFERENCES_REQUIREMENTS_KEY.to_string(),
}
}
pub(crate) async fn load_managed_admin_config_layer(
pub async fn load_managed_admin_config_layer(
override_base64: Option<&str>,
) -> io::Result<Option<ManagedAdminConfigLayer>> {
if let Some(encoded) = override_base64 {
@@ -61,7 +61,7 @@ fn load_managed_admin_config() -> io::Result<Option<ManagedAdminConfigLayer>> {
.transpose()
}
pub(crate) async fn load_managed_admin_requirements_toml(
pub async fn load_managed_admin_requirements_toml(
target: &mut ConfigRequirementsWithSources,
override_base64: Option<&str>,
) -> io::Result<()> {

View File

@@ -41,6 +41,7 @@ codex-file-search = { workspace = true }
codex-git = { workspace = true }
codex-hooks = { workspace = true }
codex-keyring-store = { workspace = true }
codex-memories = { workspace = true }
codex-network-proxy = { workspace = true }
codex-otel = { workspace = true }
codex-artifacts = { workspace = true }
@@ -123,7 +124,6 @@ landlock = { workspace = true }
seccompiler = { workspace = true }
[target.'cfg(target_os = "macos")'.dependencies]
core-foundation = "0.9"
keyring = { workspace = true, features = ["apple-native"] }
# Build OpenSSL from source for musl builds.
@@ -136,11 +136,6 @@ openssl-sys = { workspace = true, features = ["vendored"] }
[target.'cfg(target_os = "windows")'.dependencies]
keyring = { workspace = true, features = ["windows-native"] }
windows-sys = { version = "0.52", features = [
"Win32_Foundation",
"Win32_System_Com",
"Win32_UI_Shell",
] }
[target.'cfg(any(target_os = "freebsd", target_os = "openbsd"))'.dependencies]
keyring = { workspace = true, features = ["sync-secret-service"] }

View File

@@ -4798,8 +4798,10 @@ mod handlers {
errors.push("state db unavailable; memory rows were not cleared".to_string());
}
let memory_root = crate::memories::memory_root(&config.codex_home);
if let Err(err) = crate::memories::clear_memory_root_contents(&memory_root).await {
let memory_root = codex_memories::memories::memory_root(&config.codex_home);
if let Err(err) =
codex_memories::memories::control::clear_memory_root_contents(&memory_root).await
{
errors.push(format!(
"failed clearing memory directory {}: {err}",
memory_root.display()

View File

@@ -41,7 +41,6 @@ use crate::features::FeatureOverrides;
use crate::features::Features;
use crate::features::FeaturesToml;
use crate::git_info::resolve_root_git_project_for_trust;
use crate::memories::memory_root;
use crate::model_provider_info::LEGACY_OLLAMA_CHAT_PROVIDER_ID;
use crate::model_provider_info::LMSTUDIO_OSS_PROVIDER_ID;
use crate::model_provider_info::ModelProviderInfo;
@@ -62,6 +61,7 @@ use crate::windows_sandbox::resolve_windows_sandbox_mode;
use crate::windows_sandbox::resolve_windows_sandbox_private_desktop;
use codex_app_server_protocol::Tools;
use codex_app_server_protocol::UserSavedConfig;
use codex_memories::memories::memory_root;
use codex_protocol::config_types::AltScreenMode;
use codex_protocol::config_types::ForcedLoginMethod;
use codex_protocol::config_types::Personality;

View File

@@ -1,27 +1,18 @@
mod layer_io;
#[cfg(target_os = "macos")]
mod macos;
#[cfg(test)]
mod tests;
use crate::config::ConfigToml;
use crate::config_loader::layer_io::LoadedConfigLayers;
use crate::git_info::resolve_root_git_project_for_trust;
use codex_app_server_protocol::ConfigLayerSource;
use codex_config::CONFIG_TOML_FILE;
use codex_config::ConfigRequirementsWithSources;
use codex_protocol::config_types::SandboxMode;
use codex_protocol::config_types::TrustLevel;
use codex_protocol::protocol::AskForApproval;
use codex_utils_absolute_path::AbsolutePathBuf;
use codex_utils_absolute_path::AbsolutePathBufGuard;
use dunce::canonicalize as normalize_path;
use serde::Deserialize;
use std::io;
use std::path::Path;
#[cfg(windows)]
use std::path::PathBuf;
use toml::Value as TomlValue;
pub use codex_config::AppRequirementToml;
@@ -38,6 +29,7 @@ pub use codex_config::ConfigRequirements;
pub use codex_config::ConfigRequirementsToml;
pub use codex_config::ConstrainedWithSource;
pub use codex_config::FeatureRequirementsToml;
use codex_config::LoadedConfigLayers;
pub use codex_config::LoaderOverrides;
pub use codex_config::McpServerIdentity;
pub use codex_config::McpServerRequirement;
@@ -55,18 +47,16 @@ pub(crate) use codex_config::config_error_from_toml;
pub use codex_config::format_config_error;
pub use codex_config::format_config_error_with_source;
pub(crate) use codex_config::io_error_from_config_error;
use codex_config::load_config_layers_internal;
use codex_config::load_managed_admin_requirements;
use codex_config::load_requirements_from_legacy_scheme;
pub(crate) use codex_config::load_requirements_toml;
pub use codex_config::merge_toml_values;
use codex_config::system_config_toml_file;
use codex_config::system_requirements_toml_file;
#[cfg(test)]
pub(crate) use codex_config::version_for_toml;
/// On Unix systems, load default settings from this file path, if present.
/// Note that /etc/codex/ is treated as a "config folder," so subfolders such
/// as skills/ and rules/ will also be honored.
pub const SYSTEM_CONFIG_TOML_FILE_UNIX: &str = "/etc/codex/config.toml";
#[cfg(windows)]
const DEFAULT_PROGRAM_DATA_DIR_WINDOWS: &str = r"C:\ProgramData";
const DEFAULT_PROJECT_ROOT_MARKERS: &[&str] = &[".git"];
pub(crate) async fn first_layer_config_error(layers: &ConfigLayerStack) -> Option<ConfigError> {
@@ -125,8 +115,7 @@ pub async fn load_config_layers_state(
.merge_unset_fields(RequirementSource::CloudRequirements, requirements);
}
#[cfg(target_os = "macos")]
macos::load_managed_admin_requirements_toml(
load_managed_admin_requirements(
&mut config_requirements_toml,
overrides
.macos_managed_config_requirements_base64
@@ -140,7 +129,7 @@ pub async fn load_config_layers_state(
// Make a best-effort to support the legacy `managed_config.toml` as a
// requirements specification.
let loaded_config_layers = layer_io::load_config_layers_internal(codex_home, overrides).await?;
let loaded_config_layers = load_config_layers_internal(codex_home, overrides).await?;
load_requirements_from_legacy_scheme(
&mut config_requirements_toml,
loaded_config_layers.clone(),
@@ -343,185 +332,6 @@ async fn load_config_toml_for_required_layer(
Ok(create_entry(toml_value))
}
/// If available, apply requirements from the platform system
/// `requirements.toml` location to `config_requirements_toml` by filling in
/// any unset fields.
async fn load_requirements_toml(
config_requirements_toml: &mut ConfigRequirementsWithSources,
requirements_toml_file: impl AsRef<Path>,
) -> io::Result<()> {
let requirements_toml_file =
AbsolutePathBuf::from_absolute_path(requirements_toml_file.as_ref())?;
match tokio::fs::read_to_string(&requirements_toml_file).await {
Ok(contents) => {
let requirements_config: ConfigRequirementsToml =
toml::from_str(&contents).map_err(|e| {
io::Error::new(
io::ErrorKind::InvalidData,
format!(
"Error parsing requirements file {}: {e}",
requirements_toml_file.as_ref().display(),
),
)
})?;
config_requirements_toml.merge_unset_fields(
RequirementSource::SystemRequirementsToml {
file: requirements_toml_file.clone(),
},
requirements_config,
);
}
Err(e) => {
if e.kind() != io::ErrorKind::NotFound {
return Err(io::Error::new(
e.kind(),
format!(
"Failed to read requirements file {}: {e}",
requirements_toml_file.as_ref().display(),
),
));
}
}
}
Ok(())
}
#[cfg(unix)]
fn system_requirements_toml_file() -> io::Result<AbsolutePathBuf> {
AbsolutePathBuf::from_absolute_path(Path::new("/etc/codex/requirements.toml"))
}
#[cfg(windows)]
fn system_requirements_toml_file() -> io::Result<AbsolutePathBuf> {
windows_system_requirements_toml_file()
}
#[cfg(unix)]
fn system_config_toml_file() -> io::Result<AbsolutePathBuf> {
AbsolutePathBuf::from_absolute_path(Path::new(SYSTEM_CONFIG_TOML_FILE_UNIX))
}
#[cfg(windows)]
fn system_config_toml_file() -> io::Result<AbsolutePathBuf> {
windows_system_config_toml_file()
}
#[cfg(windows)]
fn windows_codex_system_dir() -> PathBuf {
let program_data = windows_program_data_dir_from_known_folder().unwrap_or_else(|err| {
tracing::warn!(
error = %err,
"Failed to resolve ProgramData known folder; using default path"
);
PathBuf::from(DEFAULT_PROGRAM_DATA_DIR_WINDOWS)
});
program_data.join("OpenAI").join("Codex")
}
#[cfg(windows)]
fn windows_system_requirements_toml_file() -> io::Result<AbsolutePathBuf> {
let requirements_toml_file = windows_codex_system_dir().join("requirements.toml");
AbsolutePathBuf::try_from(requirements_toml_file)
}
#[cfg(windows)]
fn windows_system_config_toml_file() -> io::Result<AbsolutePathBuf> {
let config_toml_file = windows_codex_system_dir().join("config.toml");
AbsolutePathBuf::try_from(config_toml_file)
}
#[cfg(windows)]
fn windows_program_data_dir_from_known_folder() -> io::Result<PathBuf> {
use std::ffi::OsString;
use std::os::windows::ffi::OsStringExt;
use windows_sys::Win32::System::Com::CoTaskMemFree;
use windows_sys::Win32::UI::Shell::FOLDERID_ProgramData;
use windows_sys::Win32::UI::Shell::KF_FLAG_DEFAULT;
use windows_sys::Win32::UI::Shell::SHGetKnownFolderPath;
let mut path_ptr = std::ptr::null_mut::<u16>();
let known_folder_flags = u32::try_from(KF_FLAG_DEFAULT).map_err(|_| {
io::Error::other(format!(
"KF_FLAG_DEFAULT did not fit in u32: {KF_FLAG_DEFAULT}"
))
})?;
// Known folder IDs reference:
// https://learn.microsoft.com/en-us/windows/win32/shell/knownfolderid
// SAFETY: SHGetKnownFolderPath initializes path_ptr with a CoTaskMem-allocated,
// null-terminated UTF-16 string on success.
let hr = unsafe {
SHGetKnownFolderPath(&FOLDERID_ProgramData, known_folder_flags, 0, &mut path_ptr)
};
if hr != 0 {
return Err(io::Error::other(format!(
"SHGetKnownFolderPath(FOLDERID_ProgramData) failed with HRESULT {hr:#010x}"
)));
}
if path_ptr.is_null() {
return Err(io::Error::other(
"SHGetKnownFolderPath(FOLDERID_ProgramData) returned a null pointer",
));
}
// SAFETY: path_ptr is a valid null-terminated UTF-16 string allocated by
// SHGetKnownFolderPath and must be freed with CoTaskMemFree.
let path = unsafe {
let mut len = 0usize;
while *path_ptr.add(len) != 0 {
len += 1;
}
let wide = std::slice::from_raw_parts(path_ptr, len);
let path = PathBuf::from(OsString::from_wide(wide));
CoTaskMemFree(path_ptr.cast());
path
};
Ok(path)
}
async fn load_requirements_from_legacy_scheme(
config_requirements_toml: &mut ConfigRequirementsWithSources,
loaded_config_layers: LoadedConfigLayers,
) -> io::Result<()> {
// In this implementation, earlier layers cannot be overwritten by later
// layers, so list managed_config_from_mdm first because it has the highest
// precedence.
let LoadedConfigLayers {
managed_config,
managed_config_from_mdm,
} = loaded_config_layers;
for (source, config) in managed_config_from_mdm
.map(|config| {
(
RequirementSource::LegacyManagedConfigTomlFromMdm,
config.managed_config,
)
})
.into_iter()
.chain(managed_config.map(|c| {
(
RequirementSource::LegacyManagedConfigTomlFromFile { file: c.file },
c.managed_config,
)
}))
{
let legacy_config: LegacyManagedConfigToml =
config.try_into().map_err(|err: toml::de::Error| {
io::Error::new(
io::ErrorKind::InvalidData,
format!("Failed to parse config requirements as TOML: {err}"),
)
})?;
let new_requirements_toml = ConfigRequirementsToml::from(legacy_config);
config_requirements_toml.merge_unset_fields(source, new_requirements_toml);
}
Ok(())
}
/// Reads `project_root_markers` from the [toml::Value] produced by merging
/// `config.toml` from the config layers in the stack preceding
/// [ConfigLayerSource::Project].
@@ -895,51 +705,12 @@ async fn load_project_layers(
Ok(layers)
}
/// The legacy mechanism for specifying admin-enforced configuration is to read
/// from a file like `/etc/codex/managed_config.toml` that has the same
/// structure as `config.toml` where fields like `approval_policy` can specify
/// exactly one value rather than a list of allowed values.
///
/// If present, re-interpret `managed_config.toml` as a `requirements.toml`
/// where each specified field is treated as a constraint allowing only that
/// value.
#[derive(Deserialize, Debug, Clone, Default, PartialEq)]
struct LegacyManagedConfigToml {
approval_policy: Option<AskForApproval>,
sandbox_mode: Option<SandboxMode>,
}
impl From<LegacyManagedConfigToml> for ConfigRequirementsToml {
fn from(legacy: LegacyManagedConfigToml) -> Self {
let mut config_requirements_toml = ConfigRequirementsToml::default();
let LegacyManagedConfigToml {
approval_policy,
sandbox_mode,
} = legacy;
if let Some(approval_policy) = approval_policy {
config_requirements_toml.allowed_approval_policies = Some(vec![approval_policy]);
}
if let Some(sandbox_mode) = sandbox_mode {
let required_mode: SandboxModeRequirement = sandbox_mode.into();
// Allowing read-only is a requirement for Codex to function correctly.
// So in this backfill path, we append read-only if it's not already specified.
let mut allowed_modes = vec![SandboxModeRequirement::ReadOnly];
if required_mode != SandboxModeRequirement::ReadOnly {
allowed_modes.push(required_mode);
}
config_requirements_toml.allowed_sandbox_modes = Some(allowed_modes);
}
config_requirements_toml
}
}
// Cannot name this `mod tests` because of tests.rs in this folder.
#[cfg(test)]
mod unit_tests {
use super::*;
#[cfg(windows)]
use std::path::Path;
use codex_config::ManagedConfigFromFile;
use codex_protocol::protocol::SandboxPolicy;
use tempfile::tempdir;
#[test]
@@ -979,65 +750,81 @@ foo = "xyzzy"
Ok(())
}
#[test]
fn legacy_managed_config_backfill_includes_read_only_sandbox_mode() {
let legacy = LegacyManagedConfigToml {
approval_policy: None,
sandbox_mode: Some(SandboxMode::WorkspaceWrite),
#[tokio::test]
async fn legacy_managed_config_backfill_includes_read_only_sandbox_mode() {
let tmp = tempdir().expect("tempdir");
let managed_path = AbsolutePathBuf::try_from(tmp.path().join("managed_config.toml"))
.expect("managed path");
let loaded_layers = LoadedConfigLayers {
managed_config: Some(ManagedConfigFromFile {
managed_config: toml::toml! {
sandbox_mode = "workspace-write"
}
.into(),
file: managed_path.clone(),
}),
managed_config_from_mdm: None,
};
let requirements = ConfigRequirementsToml::from(legacy);
let mut requirements_with_sources = ConfigRequirementsWithSources::default();
load_requirements_from_legacy_scheme(&mut requirements_with_sources, loaded_layers)
.await
.expect("load legacy requirements");
let requirements: ConfigRequirements = requirements_with_sources
.try_into()
.expect("requirements parse");
assert_eq!(
requirements.allowed_sandbox_modes,
Some(vec![
SandboxModeRequirement::ReadOnly,
SandboxModeRequirement::WorkspaceWrite
])
requirements.sandbox_policy.get(),
&SandboxPolicy::new_read_only_policy()
);
assert!(
requirements
.sandbox_policy
.can_set(&SandboxPolicy::new_workspace_write_policy())
.is_ok()
);
assert_eq!(
requirements
.sandbox_policy
.can_set(&SandboxPolicy::DangerFullAccess),
Err(codex_config::ConstraintError::InvalidValue {
field_name: "sandbox_mode",
candidate: "DangerFullAccess".into(),
allowed: "[ReadOnly, WorkspaceWrite]".into(),
requirement_source: RequirementSource::LegacyManagedConfigTomlFromFile {
file: managed_path,
},
})
);
}
#[cfg(windows)]
#[test]
fn windows_system_requirements_toml_file_uses_expected_suffix() {
let expected = windows_program_data_dir_from_known_folder()
.unwrap_or_else(|_| PathBuf::from(DEFAULT_PROGRAM_DATA_DIR_WINDOWS))
.join("OpenAI")
.join("Codex")
.join("requirements.toml");
assert_eq!(
windows_system_requirements_toml_file()
.expect("requirements.toml path")
.as_path(),
expected.as_path()
);
assert!(
windows_system_requirements_toml_file()
system_requirements_toml_file()
.expect("requirements.toml path")
.as_path()
.ends_with(Path::new("OpenAI").join("Codex").join("requirements.toml"))
.ends_with(
std::path::Path::new("OpenAI")
.join("Codex")
.join("requirements.toml")
)
);
}
#[cfg(windows)]
#[test]
fn windows_system_config_toml_file_uses_expected_suffix() {
let expected = windows_program_data_dir_from_known_folder()
.unwrap_or_else(|_| PathBuf::from(DEFAULT_PROGRAM_DATA_DIR_WINDOWS))
.join("OpenAI")
.join("Codex")
.join("config.toml");
assert_eq!(
windows_system_config_toml_file()
.expect("config.toml path")
.as_path(),
expected.as_path()
);
assert!(
windows_system_config_toml_file()
system_config_toml_file()
.expect("config.toml path")
.as_path()
.ends_with(Path::new("OpenAI").join("Codex").join("config.toml"))
.ends_with(
std::path::Path::new("OpenAI")
.join("Codex")
.join("config.toml")
)
);
}
}

View File

@@ -1,26 +0,0 @@
use super::get_thread_id_from_citations;
use codex_protocol::ThreadId;
use pretty_assertions::assert_eq;
#[test]
fn get_thread_id_from_citations_extracts_thread_ids() {
let first = ThreadId::new();
let second = ThreadId::new();
let citations = vec![format!(
"<memory_citation>\n<citation_entries>\nMEMORY.md:1-2|note=[x]\n</citation_entries>\n<thread_ids>\n{first}\nnot-a-uuid\n{second}\n</thread_ids>\n</memory_citation>"
)];
assert_eq!(get_thread_id_from_citations(citations), vec![first, second]);
}
#[test]
fn get_thread_id_from_citations_supports_legacy_rollout_ids() {
let thread_id = ThreadId::new();
let citations = vec![format!(
"<memory_citation>\n<rollout_ids>\n{thread_id}\n</rollout_ids>\n</memory_citation>"
)];
assert_eq!(get_thread_id_from_citations(citations), vec![thread_id]);
}

View File

@@ -4,29 +4,29 @@
//! - Phase 1: select rollouts, extract stage-1 raw memories, persist stage-1 outputs, and enqueue consolidation.
//! - Phase 2: claim a global consolidation lock, materialize consolidation inputs, and dispatch one consolidation agent.
pub(crate) mod citations;
mod control;
mod phase1;
mod phase2;
pub(crate) mod prompts;
mod start;
mod storage;
#[cfg(test)]
mod tests;
pub(crate) mod usage;
use codex_protocol::openai_models::ReasoningEffort;
pub(crate) use control::clear_memory_root_contents;
pub(crate) use codex_memories::memories::memory_root;
/// Starts the memory startup pipeline for eligible root sessions.
/// This is the single entrypoint that `codex` uses to trigger memory startup.
///
/// This is the entry point to read and understand this module.
pub(crate) use start::start_memories_startup_task;
mod artifacts {
pub(super) const ROLLOUT_SUMMARIES_SUBDIR: &str = "rollout_summaries";
pub(super) const RAW_MEMORIES_FILENAME: &str = "raw_memories.md";
pub(crate) mod citations {
pub(crate) use codex_memories::memories::citations::*;
}
pub(crate) mod storage {
pub(crate) use codex_memories::memories::storage::*;
}
/// Phase 1 (startup extraction).
@@ -98,18 +98,18 @@ mod metrics {
use std::path::Path;
use std::path::PathBuf;
pub fn memory_root(codex_home: &Path) -> PathBuf {
codex_home.join("memories")
pub(crate) async fn clear_memory_root_contents(memory_root: &Path) -> std::io::Result<()> {
codex_memories::memories::control::clear_memory_root_contents(memory_root).await
}
fn rollout_summaries_dir(root: &Path) -> PathBuf {
root.join(artifacts::ROLLOUT_SUMMARIES_SUBDIR)
pub(crate) fn rollout_summaries_dir(root: &Path) -> PathBuf {
codex_memories::memories::rollout_summaries_dir(root)
}
fn raw_memories_file(root: &Path) -> PathBuf {
root.join(artifacts::RAW_MEMORIES_FILENAME)
pub(crate) fn raw_memories_file(root: &Path) -> PathBuf {
codex_memories::memories::raw_memories_file(root)
}
async fn ensure_layout(root: &Path) -> std::io::Result<()> {
tokio::fs::create_dir_all(rollout_summaries_dir(root)).await
pub(crate) async fn ensure_layout(root: &Path) -> std::io::Result<()> {
codex_memories::memories::ensure_layout(root).await
}

View File

@@ -1,260 +0,0 @@
use codex_state::Stage1Output;
use std::collections::HashSet;
use std::fmt::Write as _;
use std::path::Path;
use tracing::warn;
use uuid::Uuid;
use crate::memories::ensure_layout;
use crate::memories::raw_memories_file;
use crate::memories::rollout_summaries_dir;
/// Rebuild `raw_memories.md` from DB-backed stage-1 outputs.
pub(super) async fn rebuild_raw_memories_file_from_memories(
root: &Path,
memories: &[Stage1Output],
max_raw_memories_for_consolidation: usize,
) -> std::io::Result<()> {
ensure_layout(root).await?;
rebuild_raw_memories_file(root, memories, max_raw_memories_for_consolidation).await
}
/// Syncs canonical rollout summary files from DB-backed stage-1 output rows.
pub(super) async fn sync_rollout_summaries_from_memories(
root: &Path,
memories: &[Stage1Output],
max_raw_memories_for_consolidation: usize,
) -> std::io::Result<()> {
ensure_layout(root).await?;
let retained = retained_memories(memories, max_raw_memories_for_consolidation);
let keep = retained
.iter()
.map(rollout_summary_file_stem)
.collect::<HashSet<_>>();
prune_rollout_summaries(root, &keep).await?;
for memory in retained {
write_rollout_summary_for_thread(root, memory).await?;
}
if retained.is_empty() {
for file_name in ["MEMORY.md", "memory_summary.md"] {
let path = root.join(file_name);
if let Err(err) = tokio::fs::remove_file(path).await
&& err.kind() != std::io::ErrorKind::NotFound
{
return Err(err);
}
}
let skills_dir = root.join("skills");
if let Err(err) = tokio::fs::remove_dir_all(skills_dir).await
&& err.kind() != std::io::ErrorKind::NotFound
{
return Err(err);
}
}
Ok(())
}
async fn rebuild_raw_memories_file(
root: &Path,
memories: &[Stage1Output],
max_raw_memories_for_consolidation: usize,
) -> std::io::Result<()> {
let retained = retained_memories(memories, max_raw_memories_for_consolidation);
let mut body = String::from("# Raw Memories\n\n");
if retained.is_empty() {
body.push_str("No raw memories yet.\n");
return tokio::fs::write(raw_memories_file(root), body).await;
}
body.push_str("Merged stage-1 raw memories (latest first):\n\n");
for memory in retained {
writeln!(body, "## Thread `{}`", memory.thread_id).map_err(raw_memories_format_error)?;
writeln!(
body,
"updated_at: {}",
memory.source_updated_at.to_rfc3339()
)
.map_err(raw_memories_format_error)?;
writeln!(body, "cwd: {}", memory.cwd.display()).map_err(raw_memories_format_error)?;
writeln!(body, "rollout_path: {}", memory.rollout_path.display())
.map_err(raw_memories_format_error)?;
let rollout_summary_file = format!("{}.md", rollout_summary_file_stem(memory));
writeln!(body, "rollout_summary_file: {rollout_summary_file}")
.map_err(raw_memories_format_error)?;
writeln!(body).map_err(raw_memories_format_error)?;
body.push_str(memory.raw_memory.trim());
body.push_str("\n\n");
}
tokio::fs::write(raw_memories_file(root), body).await
}
async fn prune_rollout_summaries(root: &Path, keep: &HashSet<String>) -> std::io::Result<()> {
let dir_path = rollout_summaries_dir(root);
let mut dir = match tokio::fs::read_dir(&dir_path).await {
Ok(dir) => dir,
Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(()),
Err(err) => return Err(err),
};
while let Some(entry) = dir.next_entry().await? {
let path = entry.path();
let Some(file_name) = path.file_name().and_then(|name| name.to_str()) else {
continue;
};
let Some(stem) = file_name.strip_suffix(".md") else {
continue;
};
if !keep.contains(stem)
&& let Err(err) = tokio::fs::remove_file(&path).await
&& err.kind() != std::io::ErrorKind::NotFound
{
warn!(
"failed pruning outdated rollout summary {}: {err}",
path.display()
);
}
}
Ok(())
}
async fn write_rollout_summary_for_thread(
root: &Path,
memory: &Stage1Output,
) -> std::io::Result<()> {
let file_stem = rollout_summary_file_stem(memory);
let path = rollout_summaries_dir(root).join(format!("{file_stem}.md"));
let mut body = String::new();
writeln!(body, "thread_id: {}", memory.thread_id).map_err(rollout_summary_format_error)?;
writeln!(
body,
"updated_at: {}",
memory.source_updated_at.to_rfc3339()
)
.map_err(rollout_summary_format_error)?;
writeln!(body, "rollout_path: {}", memory.rollout_path.display())
.map_err(rollout_summary_format_error)?;
writeln!(body, "cwd: {}", memory.cwd.display()).map_err(rollout_summary_format_error)?;
if let Some(git_branch) = memory.git_branch.as_deref() {
writeln!(body, "git_branch: {git_branch}").map_err(rollout_summary_format_error)?;
}
writeln!(body).map_err(rollout_summary_format_error)?;
body.push_str(&memory.rollout_summary);
body.push('\n');
tokio::fs::write(path, body).await
}
fn retained_memories(
memories: &[Stage1Output],
max_raw_memories_for_consolidation: usize,
) -> &[Stage1Output] {
&memories[..memories.len().min(max_raw_memories_for_consolidation)]
}
fn raw_memories_format_error(err: std::fmt::Error) -> std::io::Error {
std::io::Error::other(format!("format raw memories: {err}"))
}
fn rollout_summary_format_error(err: std::fmt::Error) -> std::io::Error {
std::io::Error::other(format!("format rollout summary: {err}"))
}
pub(crate) fn rollout_summary_file_stem(memory: &Stage1Output) -> String {
rollout_summary_file_stem_from_parts(
memory.thread_id,
memory.source_updated_at,
memory.rollout_slug.as_deref(),
)
}
pub(super) fn rollout_summary_file_stem_from_parts(
thread_id: codex_protocol::ThreadId,
source_updated_at: chrono::DateTime<chrono::Utc>,
rollout_slug: Option<&str>,
) -> String {
const ROLLOUT_SLUG_MAX_LEN: usize = 60;
const SHORT_HASH_ALPHABET: &[u8; 62] =
b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
const SHORT_HASH_SPACE: u32 = 14_776_336;
let thread_id = thread_id.to_string();
let (timestamp_fragment, short_hash_seed) = match Uuid::parse_str(&thread_id) {
Ok(thread_uuid) => {
let timestamp = thread_uuid
.get_timestamp()
.and_then(|uuid_timestamp| {
let (seconds, nanos) = uuid_timestamp.to_unix();
i64::try_from(seconds).ok().and_then(|secs| {
chrono::DateTime::<chrono::Utc>::from_timestamp(secs, nanos)
})
})
.unwrap_or(source_updated_at);
let short_hash_seed = (thread_uuid.as_u128() & 0xFFFF_FFFF) as u32;
(
timestamp.format("%Y-%m-%dT%H-%M-%S").to_string(),
short_hash_seed,
)
}
Err(_) => {
let mut short_hash_seed = 0u32;
for byte in thread_id.bytes() {
short_hash_seed = short_hash_seed
.wrapping_mul(31)
.wrapping_add(u32::from(byte));
}
(
source_updated_at.format("%Y-%m-%dT%H-%M-%S").to_string(),
short_hash_seed,
)
}
};
let mut short_hash_value = short_hash_seed % SHORT_HASH_SPACE;
let mut short_hash_chars = ['0'; 4];
for idx in (0..short_hash_chars.len()).rev() {
let alphabet_idx = (short_hash_value % SHORT_HASH_ALPHABET.len() as u32) as usize;
short_hash_chars[idx] = SHORT_HASH_ALPHABET[alphabet_idx] as char;
short_hash_value /= SHORT_HASH_ALPHABET.len() as u32;
}
let short_hash: String = short_hash_chars.iter().collect();
let file_prefix = format!("{timestamp_fragment}-{short_hash}");
let Some(raw_slug) = rollout_slug else {
return file_prefix;
};
let mut slug = String::with_capacity(ROLLOUT_SLUG_MAX_LEN);
for ch in raw_slug.chars() {
if slug.len() >= ROLLOUT_SLUG_MAX_LEN {
break;
}
if ch.is_ascii_alphanumeric() {
slug.push(ch.to_ascii_lowercase());
} else {
slug.push('_');
}
}
while slug.ends_with('_') {
slug.pop();
}
if slug.is_empty() {
file_prefix
} else {
format!("{file_prefix}-{slug}")
}
}
#[cfg(test)]
#[path = "storage_tests.rs"]
mod tests;

View File

@@ -1,70 +0,0 @@
use super::rollout_summary_file_stem;
use super::rollout_summary_file_stem_from_parts;
use chrono::TimeZone;
use chrono::Utc;
use codex_protocol::ThreadId;
use codex_state::Stage1Output;
use pretty_assertions::assert_eq;
use std::path::PathBuf;
const FIXED_PREFIX: &str = "2025-02-11T15-35-19-jqmb";
fn stage1_output_with_slug(thread_id: ThreadId, rollout_slug: Option<&str>) -> Stage1Output {
Stage1Output {
thread_id,
source_updated_at: Utc.timestamp_opt(123, 0).single().expect("timestamp"),
raw_memory: "raw memory".to_string(),
rollout_summary: "summary".to_string(),
rollout_slug: rollout_slug.map(ToString::to_string),
rollout_path: PathBuf::from("/tmp/rollout.jsonl"),
cwd: PathBuf::from("/tmp/workspace"),
git_branch: None,
generated_at: Utc.timestamp_opt(124, 0).single().expect("timestamp"),
}
}
fn fixed_thread_id() -> ThreadId {
ThreadId::try_from("0194f5a6-89ab-7cde-8123-456789abcdef").expect("valid thread id")
}
#[test]
fn rollout_summary_file_stem_uses_uuid_timestamp_and_hash_when_slug_missing() {
let thread_id = fixed_thread_id();
let memory = stage1_output_with_slug(thread_id, None);
assert_eq!(rollout_summary_file_stem(&memory), FIXED_PREFIX);
assert_eq!(
rollout_summary_file_stem_from_parts(
memory.thread_id,
memory.source_updated_at,
memory.rollout_slug.as_deref(),
),
FIXED_PREFIX
);
}
#[test]
fn rollout_summary_file_stem_sanitizes_and_truncates_slug() {
let thread_id = fixed_thread_id();
let memory = stage1_output_with_slug(
thread_id,
Some("Unsafe Slug/With Spaces & Symbols + EXTRA_LONG_12345_67890_ABCDE_fghij_klmno"),
);
let stem = rollout_summary_file_stem(&memory);
let slug = stem
.strip_prefix(&format!("{FIXED_PREFIX}-"))
.expect("slug suffix should be present");
assert_eq!(slug.len(), 60);
assert_eq!(
slug,
"unsafe_slug_with_spaces___symbols___extra_long_12345_67890_a"
);
}
#[test]
fn rollout_summary_file_stem_uses_uuid_timestamp_and_hash_when_slug_is_empty() {
let thread_id = fixed_thread_id();
let memory = stage1_output_with_slug(thread_id, Some(""));
assert_eq!(rollout_summary_file_stem(&memory), FIXED_PREFIX);
}

View File

@@ -1,30 +1,13 @@
use std::path::Path;
use std::io;
use std::path::PathBuf;
use crate::ModelClient;
use crate::error::CodexErr;
use crate::error::Result;
use codex_api::RawMemory as ApiRawMemory;
use codex_api::RawMemoryMetadata as ApiRawMemoryMetadata;
pub use codex_memories::memory_trace::BuiltMemory;
use codex_otel::SessionTelemetry;
use codex_protocol::openai_models::ModelInfo;
use codex_protocol::openai_models::ReasoningEffort as ReasoningEffortConfig;
use serde_json::Map;
use serde_json::Value;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct BuiltMemory {
pub memory_id: String,
pub source_path: PathBuf,
pub raw_memory: String,
pub memory_summary: String,
}
struct PreparedTrace {
memory_id: String,
source_path: PathBuf,
payload: ApiRawMemory,
}
/// Loads raw trace files, normalizes items, and builds memory summaries.
///
@@ -44,187 +27,20 @@ pub async fn build_memories_from_trace_files(
return Ok(Vec::new());
}
let mut prepared = Vec::with_capacity(trace_paths.len());
for (index, path) in trace_paths.iter().enumerate() {
prepared.push(prepare_trace(index + 1, path).await?);
}
let prepared = codex_memories::memory_trace::load_trace_requests(trace_paths)
.await
.map_err(map_trace_load_error)?;
let raw_memories = prepared.iter().map(|trace| trace.payload.clone()).collect();
let output = client
.summarize_memories(raw_memories, model_info, effort, session_telemetry)
.await?;
if output.len() != prepared.len() {
return Err(CodexErr::InvalidRequest(format!(
"unexpected memory summarize output length: expected {}, got {}",
prepared.len(),
output.len()
)));
}
Ok(prepared
.into_iter()
.zip(output)
.map(|(trace, summary)| BuiltMemory {
memory_id: trace.memory_id,
source_path: trace.source_path,
raw_memory: summary.raw_memory,
memory_summary: summary.memory_summary,
})
.collect())
codex_memories::memory_trace::build_memories_from_output(prepared, output)
.map_err(|err| CodexErr::InvalidRequest(err.to_string()))
}
async fn prepare_trace(index: usize, path: &Path) -> Result<PreparedTrace> {
let text = load_trace_text(path).await?;
let items = load_trace_items(path, &text)?;
let memory_id = build_memory_id(index, path);
let source_path = path.to_path_buf();
Ok(PreparedTrace {
memory_id: memory_id.clone(),
source_path: source_path.clone(),
payload: ApiRawMemory {
id: memory_id,
metadata: ApiRawMemoryMetadata {
source_path: source_path.display().to_string(),
},
items,
},
})
fn map_trace_load_error(err: anyhow::Error) -> CodexErr {
match err.downcast::<io::Error>() {
Ok(err) => CodexErr::Io(err),
Err(err) => CodexErr::InvalidRequest(err.to_string()),
}
}
async fn load_trace_text(path: &Path) -> Result<String> {
let raw = tokio::fs::read(path).await?;
Ok(decode_trace_bytes(&raw))
}
fn decode_trace_bytes(raw: &[u8]) -> String {
if let Some(without_bom) = raw.strip_prefix(&[0xEF, 0xBB, 0xBF])
&& let Ok(text) = String::from_utf8(without_bom.to_vec())
{
return text;
}
if let Ok(text) = String::from_utf8(raw.to_vec()) {
return text;
}
raw.iter().map(|b| char::from(*b)).collect()
}
fn load_trace_items(path: &Path, text: &str) -> Result<Vec<Value>> {
if let Ok(Value::Array(items)) = serde_json::from_str::<Value>(text) {
let dict_items = items
.into_iter()
.filter(serde_json::Value::is_object)
.collect::<Vec<_>>();
if dict_items.is_empty() {
return Err(CodexErr::InvalidRequest(format!(
"no object items found in trace file: {}",
path.display()
)));
}
return normalize_trace_items(dict_items, path);
}
let mut parsed_items = Vec::new();
for line in text.lines() {
let line = line.trim();
if line.is_empty() || (!line.starts_with('{') && !line.starts_with('[')) {
continue;
}
let Ok(obj) = serde_json::from_str::<Value>(line) else {
continue;
};
match obj {
Value::Object(_) => parsed_items.push(obj),
Value::Array(inner) => {
parsed_items.extend(inner.into_iter().filter(serde_json::Value::is_object))
}
_ => {}
}
}
if parsed_items.is_empty() {
return Err(CodexErr::InvalidRequest(format!(
"no JSON items parsed from trace file: {}",
path.display()
)));
}
normalize_trace_items(parsed_items, path)
}
fn normalize_trace_items(items: Vec<Value>, path: &Path) -> Result<Vec<Value>> {
let mut normalized = Vec::new();
for item in items {
let Value::Object(obj) = item else {
continue;
};
if let Some(payload) = obj.get("payload") {
if obj.get("type").and_then(Value::as_str) != Some("response_item") {
continue;
}
match payload {
Value::Object(payload_item) => {
if is_allowed_trace_item(payload_item) {
normalized.push(Value::Object(payload_item.clone()));
}
}
Value::Array(payload_items) => {
for payload_item in payload_items {
if let Value::Object(payload_item) = payload_item
&& is_allowed_trace_item(payload_item)
{
normalized.push(Value::Object(payload_item.clone()));
}
}
}
_ => {}
}
continue;
}
if is_allowed_trace_item(&obj) {
normalized.push(Value::Object(obj));
}
}
if normalized.is_empty() {
return Err(CodexErr::InvalidRequest(format!(
"no valid trace items after normalization: {}",
path.display()
)));
}
Ok(normalized)
}
fn is_allowed_trace_item(item: &Map<String, Value>) -> bool {
let Some(item_type) = item.get("type").and_then(Value::as_str) else {
return false;
};
if item_type == "message" {
return matches!(
item.get("role").and_then(Value::as_str),
Some("assistant" | "system" | "developer" | "user")
);
}
true
}
fn build_memory_id(index: usize, path: &Path) -> String {
let stem = path
.file_stem()
.map(|stem| stem.to_string_lossy().into_owned())
.filter(|stem| !stem.is_empty())
.unwrap_or_else(|| "memory".to_string());
format!("memory_{index}_{stem}")
}
#[cfg(test)]
#[path = "memory_trace_tests.rs"]
mod tests;

View File

@@ -1,73 +0,0 @@
use super::*;
use pretty_assertions::assert_eq;
use tempfile::tempdir;
#[test]
fn normalize_trace_items_handles_payload_wrapper_and_message_role_filtering() {
let items = vec![
serde_json::json!({
"type": "response_item",
"payload": {"type": "message", "role": "assistant", "content": []}
}),
serde_json::json!({
"type": "response_item",
"payload": [
{"type": "message", "role": "user", "content": []},
{"type": "message", "role": "tool", "content": []},
{"type": "function_call", "name": "shell", "arguments": "{}", "call_id": "c1"}
]
}),
serde_json::json!({
"type": "not_response_item",
"payload": {"type": "message", "role": "assistant", "content": []}
}),
serde_json::json!({
"type": "message",
"role": "developer",
"content": []
}),
];
let normalized = normalize_trace_items(items, Path::new("trace.json")).expect("normalize");
let expected = vec![
serde_json::json!({"type": "message", "role": "assistant", "content": []}),
serde_json::json!({"type": "message", "role": "user", "content": []}),
serde_json::json!({"type": "function_call", "name": "shell", "arguments": "{}", "call_id": "c1"}),
serde_json::json!({"type": "message", "role": "developer", "content": []}),
];
assert_eq!(normalized, expected);
}
#[test]
fn load_trace_items_supports_jsonl_arrays_and_objects() {
let text = r#"
{"type":"response_item","payload":{"type":"message","role":"assistant","content":[]}}
[{"type":"message","role":"user","content":[]},{"type":"message","role":"tool","content":[]}]
"#;
let loaded = load_trace_items(Path::new("trace.jsonl"), text).expect("load");
let expected = vec![
serde_json::json!({"type":"message","role":"assistant","content":[]}),
serde_json::json!({"type":"message","role":"user","content":[]}),
];
assert_eq!(loaded, expected);
}
#[tokio::test]
async fn load_trace_text_decodes_utf8_sig() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("trace.json");
tokio::fs::write(
&path,
[
0xEF, 0xBB, 0xBF, b'[', b'{', b'"', b't', b'y', b'p', b'e', b'"', b':', b'"', b'm',
b'e', b's', b's', b'a', b'g', b'e', b'"', b',', b'"', b'r', b'o', b'l', b'e', b'"',
b':', b'"', b'u', b's', b'e', b'r', b'"', b',', b'"', b'c', b'o', b'n', b't', b'e',
b'n', b't', b'"', b':', b'[', b']', b'}', b']',
],
)
.await
.expect("write");
let text = load_trace_text(&path).await.expect("decode");
assert!(text.starts_with('['));
}

View File

@@ -4,6 +4,7 @@ use std::sync::Arc;
use base64::Engine;
use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
use codex_memories::memories::citations::get_thread_id_from_citations;
use codex_protocol::config_types::ModeKind;
use codex_protocol::items::TurnItem;
use codex_utils_stream_parser::strip_citations;
@@ -14,7 +15,6 @@ use crate::codex::TurnContext;
use crate::error::CodexErr;
use crate::error::Result;
use crate::function_tool::FunctionCallError;
use crate::memories::citations::get_thread_id_from_citations;
use crate::parse_turn_item;
use crate::state_db;
use crate::tools::parallel::ToolCallRuntime;

View File

@@ -0,0 +1,6 @@
load("//:defs.bzl", "codex_rust_crate")
codex_rust_crate(
name = "memories",
crate_name = "codex_memories",
)

View File

@@ -0,0 +1,27 @@
[package]
name = "codex-memories"
version.workspace = true
edition.workspace = true
license.workspace = true
[lib]
name = "codex_memories"
path = "src/lib.rs"
[lints]
workspace = true
[dependencies]
anyhow = { workspace = true }
chrono = { workspace = true }
codex-api = { workspace = true }
codex-protocol = { workspace = true }
codex-state = { workspace = true }
serde_json = { workspace = true }
tokio = { workspace = true, features = ["fs", "macros", "rt"] }
tracing = { workspace = true }
uuid = { workspace = true, features = ["serde", "v4", "v5"] }
[dev-dependencies]
pretty_assertions = { workspace = true }
tempfile = { workspace = true }

View File

@@ -0,0 +1,2 @@
pub mod memories;
pub mod memory_trace;

View File

@@ -32,5 +32,31 @@ pub fn get_thread_id_from_citations(citations: Vec<String>) -> Vec<ThreadId> {
}
#[cfg(test)]
#[path = "citations_tests.rs"]
mod tests;
mod tests {
use super::get_thread_id_from_citations;
use codex_protocol::ThreadId;
use pretty_assertions::assert_eq;
#[test]
fn get_thread_id_from_citations_extracts_thread_ids() {
let first = ThreadId::new();
let second = ThreadId::new();
let citations = vec![format!(
"<memory_citation>\n<citation_entries>\nMEMORY.md:1-2|note=[x]\n</citation_entries>\n<thread_ids>\n{first}\nnot-a-uuid\n{second}\n</thread_ids>\n</memory_citation>"
)];
assert_eq!(get_thread_id_from_citations(citations), vec![first, second]);
}
#[test]
fn get_thread_id_from_citations_supports_legacy_rollout_ids() {
let thread_id = ThreadId::new();
let citations = vec![format!(
"<memory_citation>\n<rollout_ids>\n{thread_id}\n</rollout_ids>\n</memory_citation>"
)];
assert_eq!(get_thread_id_from_citations(citations), vec![thread_id]);
}
}

View File

@@ -1,6 +1,6 @@
use std::path::Path;
pub(crate) async fn clear_memory_root_contents(memory_root: &Path) -> std::io::Result<()> {
pub async fn clear_memory_root_contents(memory_root: &Path) -> std::io::Result<()> {
match tokio::fs::symlink_metadata(memory_root).await {
Ok(metadata) if metadata.file_type().is_symlink() => {
return Err(std::io::Error::new(

View File

@@ -0,0 +1,29 @@
//! Shared memory filesystem and artifact utilities.
pub mod citations;
pub mod control;
pub mod storage;
use std::path::Path;
use std::path::PathBuf;
mod artifacts {
pub(super) const ROLLOUT_SUMMARIES_SUBDIR: &str = "rollout_summaries";
pub(super) const RAW_MEMORIES_FILENAME: &str = "raw_memories.md";
}
pub fn memory_root(codex_home: &Path) -> PathBuf {
codex_home.join("memories")
}
pub fn rollout_summaries_dir(root: &Path) -> PathBuf {
root.join(artifacts::ROLLOUT_SUMMARIES_SUBDIR)
}
pub fn raw_memories_file(root: &Path) -> PathBuf {
root.join(artifacts::RAW_MEMORIES_FILENAME)
}
pub async fn ensure_layout(root: &Path) -> std::io::Result<()> {
tokio::fs::create_dir_all(rollout_summaries_dir(root)).await
}

View File

@@ -0,0 +1,480 @@
use crate::memories::ensure_layout;
use crate::memories::raw_memories_file;
use crate::memories::rollout_summaries_dir;
use codex_state::Stage1Output;
use std::collections::HashSet;
use std::fmt::Write as _;
use std::path::Path;
use tracing::warn;
use uuid::Uuid;
/// Rebuild `raw_memories.md` from DB-backed stage-1 outputs.
pub async fn rebuild_raw_memories_file_from_memories(
root: &Path,
memories: &[Stage1Output],
max_raw_memories_for_consolidation: usize,
) -> std::io::Result<()> {
ensure_layout(root).await?;
rebuild_raw_memories_file(root, memories, max_raw_memories_for_consolidation).await
}
/// Syncs canonical rollout summary files from DB-backed stage-1 output rows.
pub async fn sync_rollout_summaries_from_memories(
root: &Path,
memories: &[Stage1Output],
max_raw_memories_for_consolidation: usize,
) -> std::io::Result<()> {
ensure_layout(root).await?;
let retained = retained_memories(memories, max_raw_memories_for_consolidation);
let keep = retained
.iter()
.map(rollout_summary_file_stem)
.collect::<HashSet<_>>();
prune_rollout_summaries(root, &keep).await?;
for memory in retained {
write_rollout_summary_for_thread(root, memory).await?;
}
if retained.is_empty() {
for file_name in ["MEMORY.md", "memory_summary.md"] {
let path = root.join(file_name);
if let Err(err) = tokio::fs::remove_file(path).await
&& err.kind() != std::io::ErrorKind::NotFound
{
return Err(err);
}
}
let skills_dir = root.join("skills");
if let Err(err) = tokio::fs::remove_dir_all(skills_dir).await
&& err.kind() != std::io::ErrorKind::NotFound
{
return Err(err);
}
}
Ok(())
}
async fn rebuild_raw_memories_file(
root: &Path,
memories: &[Stage1Output],
max_raw_memories_for_consolidation: usize,
) -> std::io::Result<()> {
let retained = retained_memories(memories, max_raw_memories_for_consolidation);
let mut body = String::from("# Raw Memories\n\n");
if retained.is_empty() {
body.push_str("No raw memories yet.\n");
return tokio::fs::write(raw_memories_file(root), body).await;
}
body.push_str("Merged stage-1 raw memories (latest first):\n\n");
for memory in retained {
writeln!(body, "## Thread `{}`", memory.thread_id).map_err(raw_memories_format_error)?;
writeln!(
body,
"updated_at: {}",
memory.source_updated_at.to_rfc3339()
)
.map_err(raw_memories_format_error)?;
writeln!(body, "cwd: {}", memory.cwd.display()).map_err(raw_memories_format_error)?;
writeln!(body, "rollout_path: {}", memory.rollout_path.display())
.map_err(raw_memories_format_error)?;
let rollout_summary_file = format!("{}.md", rollout_summary_file_stem(memory));
writeln!(body, "rollout_summary_file: {rollout_summary_file}")
.map_err(raw_memories_format_error)?;
writeln!(body).map_err(raw_memories_format_error)?;
body.push_str(memory.raw_memory.trim());
body.push_str("\n\n");
}
tokio::fs::write(raw_memories_file(root), body).await
}
async fn prune_rollout_summaries(root: &Path, keep: &HashSet<String>) -> std::io::Result<()> {
let dir_path = rollout_summaries_dir(root);
let mut dir = match tokio::fs::read_dir(&dir_path).await {
Ok(dir) => dir,
Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(()),
Err(err) => return Err(err),
};
while let Some(entry) = dir.next_entry().await? {
let path = entry.path();
let Some(file_name) = path.file_name().and_then(|name| name.to_str()) else {
continue;
};
let Some(stem) = file_name.strip_suffix(".md") else {
continue;
};
if !keep.contains(stem)
&& let Err(err) = tokio::fs::remove_file(&path).await
&& err.kind() != std::io::ErrorKind::NotFound
{
warn!(
"failed pruning outdated rollout summary {}: {err}",
path.display()
);
}
}
Ok(())
}
async fn write_rollout_summary_for_thread(
root: &Path,
memory: &Stage1Output,
) -> std::io::Result<()> {
let file_stem = rollout_summary_file_stem(memory);
let path = rollout_summaries_dir(root).join(format!("{file_stem}.md"));
let mut body = String::new();
writeln!(body, "thread_id: {}", memory.thread_id).map_err(rollout_summary_format_error)?;
writeln!(
body,
"updated_at: {}",
memory.source_updated_at.to_rfc3339()
)
.map_err(rollout_summary_format_error)?;
writeln!(body, "rollout_path: {}", memory.rollout_path.display())
.map_err(rollout_summary_format_error)?;
writeln!(body, "cwd: {}", memory.cwd.display()).map_err(rollout_summary_format_error)?;
if let Some(git_branch) = memory.git_branch.as_deref() {
writeln!(body, "git_branch: {git_branch}").map_err(rollout_summary_format_error)?;
}
writeln!(body).map_err(rollout_summary_format_error)?;
body.push_str(&memory.rollout_summary);
body.push('\n');
tokio::fs::write(path, body).await
}
fn retained_memories(
memories: &[Stage1Output],
max_raw_memories_for_consolidation: usize,
) -> &[Stage1Output] {
&memories[..memories.len().min(max_raw_memories_for_consolidation)]
}
fn raw_memories_format_error(err: std::fmt::Error) -> std::io::Error {
std::io::Error::other(format!("format raw memories: {err}"))
}
fn rollout_summary_format_error(err: std::fmt::Error) -> std::io::Error {
std::io::Error::other(format!("format rollout summary: {err}"))
}
pub fn rollout_summary_file_stem(memory: &Stage1Output) -> String {
rollout_summary_file_stem_from_parts(
memory.thread_id,
memory.source_updated_at,
memory.rollout_slug.as_deref(),
)
}
pub fn rollout_summary_file_stem_from_parts(
thread_id: codex_protocol::ThreadId,
source_updated_at: chrono::DateTime<chrono::Utc>,
rollout_slug: Option<&str>,
) -> String {
const ROLLOUT_SLUG_MAX_LEN: usize = 60;
const SHORT_HASH_ALPHABET: &[u8; 62] =
b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
const SHORT_HASH_SPACE: u32 = 14_776_336;
let thread_id = thread_id.to_string();
let (timestamp_fragment, short_hash_seed) = match Uuid::parse_str(&thread_id) {
Ok(thread_uuid) => {
let timestamp = thread_uuid
.get_timestamp()
.and_then(|uuid_timestamp| {
let (seconds, nanos) = uuid_timestamp.to_unix();
i64::try_from(seconds).ok().and_then(|secs| {
chrono::DateTime::<chrono::Utc>::from_timestamp(secs, nanos)
})
})
.unwrap_or(source_updated_at);
let short_hash_seed = (thread_uuid.as_u128() & 0xFFFF_FFFF) as u32;
(
timestamp.format("%Y-%m-%dT%H-%M-%S").to_string(),
short_hash_seed,
)
}
Err(_) => {
let mut short_hash_seed = 0u32;
for byte in thread_id.bytes() {
short_hash_seed = short_hash_seed
.wrapping_mul(31)
.wrapping_add(u32::from(byte));
}
(
source_updated_at.format("%Y-%m-%dT%H-%M-%S").to_string(),
short_hash_seed,
)
}
};
let mut short_hash_value = short_hash_seed % SHORT_HASH_SPACE;
let mut short_hash = ['0'; 4];
for digit in short_hash.iter_mut().rev() {
let idx = usize::try_from(short_hash_value % 62).unwrap_or(0);
*digit = char::from(SHORT_HASH_ALPHABET[idx]);
short_hash_value /= 62;
}
let mut stem = format!(
"{timestamp_fragment}-{}",
short_hash.iter().collect::<String>()
);
let sanitized_slug = rollout_slug
.map(str::trim)
.filter(|slug| !slug.is_empty())
.map(|slug| {
slug.chars()
.map(|ch| {
if ch.is_ascii_alphanumeric() {
ch.to_ascii_lowercase()
} else {
'_'
}
})
.collect::<String>()
})
.map(|slug| slug.chars().take(ROLLOUT_SLUG_MAX_LEN).collect::<String>())
.filter(|slug| !slug.is_empty());
if let Some(slug) = sanitized_slug {
stem.push('-');
stem.push_str(&slug);
}
stem
}
#[cfg(test)]
mod tests {
use super::rebuild_raw_memories_file_from_memories;
use super::rollout_summary_file_stem;
use super::rollout_summary_file_stem_from_parts;
use super::sync_rollout_summaries_from_memories;
use crate::memories::control::clear_memory_root_contents;
use crate::memories::ensure_layout;
use crate::memories::memory_root;
use crate::memories::raw_memories_file;
use crate::memories::rollout_summaries_dir;
use chrono::TimeZone;
use chrono::Utc;
use codex_protocol::ThreadId;
use codex_state::Stage1Output;
use pretty_assertions::assert_eq;
use std::path::PathBuf;
use tempfile::tempdir;
const FIXED_PREFIX: &str = "2025-02-11T15-35-19-jqmb";
fn stage1_output_with_slug(thread_id: ThreadId, rollout_slug: Option<&str>) -> Stage1Output {
Stage1Output {
thread_id,
source_updated_at: Utc.timestamp_opt(123, 0).single().expect("timestamp"),
raw_memory: "raw memory".to_string(),
rollout_summary: "summary".to_string(),
rollout_slug: rollout_slug.map(ToString::to_string),
rollout_path: PathBuf::from("/tmp/rollout.jsonl"),
cwd: PathBuf::from("/tmp/workspace"),
git_branch: None,
generated_at: Utc.timestamp_opt(124, 0).single().expect("timestamp"),
}
}
fn fixed_thread_id() -> ThreadId {
ThreadId::try_from("0194f5a6-89ab-7cde-8123-456789abcdef").expect("valid thread id")
}
#[test]
fn memory_root_uses_shared_global_path() {
let dir = tempdir().expect("tempdir");
let codex_home = dir.path().join("codex");
assert_eq!(memory_root(&codex_home), codex_home.join("memories"));
}
#[tokio::test]
async fn clear_memory_root_contents_preserves_root_directory() {
let dir = tempdir().expect("tempdir");
let root = dir.path().join("memory");
let nested_dir = root.join("rollout_summaries");
tokio::fs::create_dir_all(&nested_dir)
.await
.expect("create rollout summaries dir");
tokio::fs::write(root.join("MEMORY.md"), "stale memory index\n")
.await
.expect("write memory index");
tokio::fs::write(nested_dir.join("rollout.md"), "stale rollout\n")
.await
.expect("write rollout summary");
clear_memory_root_contents(&root)
.await
.expect("clear memory root contents");
assert!(
tokio::fs::try_exists(&root)
.await
.expect("check memory root existence"),
"memory root should still exist after clearing contents"
);
let mut entries = tokio::fs::read_dir(&root)
.await
.expect("read memory root after clear");
assert!(
entries
.next_entry()
.await
.expect("read next entry")
.is_none(),
"memory root should be empty after clearing contents"
);
}
#[cfg(unix)]
#[tokio::test]
async fn clear_memory_root_contents_rejects_symlinked_root() {
let dir = tempdir().expect("tempdir");
let target = dir.path().join("outside");
tokio::fs::create_dir_all(&target)
.await
.expect("create symlink target dir");
let target_file = target.join("keep.txt");
tokio::fs::write(&target_file, "keep\n")
.await
.expect("write target file");
let root = dir.path().join("memory");
std::os::unix::fs::symlink(&target, &root).expect("create memory root symlink");
let err = clear_memory_root_contents(&root)
.await
.expect_err("symlinked memory root should be rejected");
assert_eq!(err.kind(), std::io::ErrorKind::InvalidInput);
assert!(
tokio::fs::try_exists(&target_file)
.await
.expect("check target file existence"),
"rejecting a symlinked memory root should not delete the symlink target"
);
}
#[tokio::test]
async fn sync_rollout_summaries_and_raw_memories_file_keeps_latest_memories_only() {
let dir = tempdir().expect("tempdir");
let root = dir.path().join("memory");
ensure_layout(&root).await.expect("ensure layout");
let keep_id = ThreadId::default().to_string();
let drop_id = ThreadId::default().to_string();
let keep_path = rollout_summaries_dir(&root).join(format!("{keep_id}.md"));
let drop_path = rollout_summaries_dir(&root).join(format!("{drop_id}.md"));
tokio::fs::write(&keep_path, "keep")
.await
.expect("write keep");
tokio::fs::write(&drop_path, "drop")
.await
.expect("write drop");
let memories = vec![Stage1Output {
thread_id: ThreadId::try_from(keep_id.clone()).expect("thread id"),
source_updated_at: Utc.timestamp_opt(100, 0).single().expect("timestamp"),
raw_memory: "raw memory".to_string(),
rollout_summary: "short summary".to_string(),
rollout_slug: None,
rollout_path: PathBuf::from("/tmp/rollout-100.jsonl"),
cwd: PathBuf::from("/tmp/workspace"),
git_branch: None,
generated_at: Utc.timestamp_opt(101, 0).single().expect("timestamp"),
}];
sync_rollout_summaries_from_memories(&root, &memories, 100)
.await
.expect("sync rollout summaries");
rebuild_raw_memories_file_from_memories(&root, &memories, 100)
.await
.expect("rebuild raw memories");
assert!(
!tokio::fs::try_exists(&keep_path)
.await
.expect("check stale keep path"),
"sync should prune stale filename that used thread id only"
);
assert!(
!tokio::fs::try_exists(&drop_path)
.await
.expect("check stale drop path"),
"sync should prune stale filename for dropped thread"
);
let mut dir = tokio::fs::read_dir(rollout_summaries_dir(&root))
.await
.expect("open rollout summaries dir");
let mut files = Vec::new();
while let Some(entry) = dir.next_entry().await.expect("read dir entry") {
files.push(entry.file_name().to_string_lossy().to_string());
}
files.sort_unstable();
assert_eq!(files.len(), 1);
let canonical_rollout_summary_file = &files[0];
let raw_memories = tokio::fs::read_to_string(raw_memories_file(&root))
.await
.expect("read raw memories");
assert!(raw_memories.contains("raw memory"));
assert!(raw_memories.contains(&keep_id));
assert!(raw_memories.contains("cwd: /tmp/workspace"));
assert!(raw_memories.contains("rollout_path: /tmp/rollout-100.jsonl"));
assert!(raw_memories.contains(&format!(
"rollout_summary_file: {canonical_rollout_summary_file}"
)));
}
#[test]
fn rollout_summary_file_stem_uses_uuid_timestamp_and_hash_when_slug_missing() {
let thread_id = fixed_thread_id();
let memory = stage1_output_with_slug(thread_id, None);
assert_eq!(rollout_summary_file_stem(&memory), FIXED_PREFIX);
assert_eq!(
rollout_summary_file_stem_from_parts(
memory.thread_id,
memory.source_updated_at,
memory.rollout_slug.as_deref(),
),
FIXED_PREFIX
);
}
#[test]
fn rollout_summary_file_stem_sanitizes_and_truncates_slug() {
let thread_id = fixed_thread_id();
let memory = stage1_output_with_slug(
thread_id,
Some("Unsafe Slug/With Spaces & Symbols + EXTRA_LONG_12345_67890_ABCDE_fghij_klmno"),
);
let stem = rollout_summary_file_stem(&memory);
let slug = stem
.strip_prefix(&format!("{FIXED_PREFIX}-"))
.expect("slug suffix should be present");
assert_eq!(slug.len(), 60);
assert_eq!(
slug,
"unsafe_slug_with_spaces___symbols___extra_long_12345_67890_a"
);
}
#[test]
fn rollout_summary_file_stem_uses_uuid_timestamp_and_hash_when_slug_is_empty() {
let thread_id = fixed_thread_id();
let memory = stage1_output_with_slug(thread_id, Some(""));
assert_eq!(rollout_summary_file_stem(&memory), FIXED_PREFIX);
}
}

View File

@@ -0,0 +1,281 @@
use anyhow::Result;
use codex_api::MemorySummarizeOutput;
use codex_api::RawMemory as ApiRawMemory;
use codex_api::RawMemoryMetadata as ApiRawMemoryMetadata;
use serde_json::Map;
use serde_json::Value;
use std::path::Path;
use std::path::PathBuf;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct BuiltMemory {
pub memory_id: String,
pub source_path: PathBuf,
pub raw_memory: String,
pub memory_summary: String,
}
#[derive(Debug, Clone)]
pub struct PreparedTrace {
pub memory_id: String,
pub source_path: PathBuf,
pub payload: ApiRawMemory,
}
pub async fn load_trace_requests(trace_paths: &[PathBuf]) -> Result<Vec<PreparedTrace>> {
let mut prepared = Vec::with_capacity(trace_paths.len());
for (index, path) in trace_paths.iter().enumerate() {
prepared.push(prepare_trace(index + 1, path).await?);
}
Ok(prepared)
}
pub fn build_memories_from_output(
prepared: Vec<PreparedTrace>,
output: Vec<MemorySummarizeOutput>,
) -> Result<Vec<BuiltMemory>> {
if output.len() != prepared.len() {
anyhow::bail!(
"unexpected memory summarize output length: expected {}, got {}",
prepared.len(),
output.len()
);
}
Ok(prepared
.into_iter()
.zip(output)
.map(|(trace, summary)| BuiltMemory {
memory_id: trace.memory_id,
source_path: trace.source_path,
raw_memory: summary.raw_memory,
memory_summary: summary.memory_summary,
})
.collect())
}
async fn prepare_trace(index: usize, path: &Path) -> Result<PreparedTrace> {
let text = load_trace_text(path).await?;
let items = load_trace_items(path, &text)?;
let memory_id = build_memory_id(index, path);
let source_path = path.to_path_buf();
Ok(PreparedTrace {
memory_id: memory_id.clone(),
source_path: source_path.clone(),
payload: ApiRawMemory {
id: memory_id,
metadata: ApiRawMemoryMetadata {
source_path: source_path.display().to_string(),
},
items,
},
})
}
async fn load_trace_text(path: &Path) -> Result<String> {
let raw = tokio::fs::read(path).await?;
Ok(decode_trace_bytes(&raw))
}
fn decode_trace_bytes(raw: &[u8]) -> String {
if let Some(without_bom) = raw.strip_prefix(&[0xEF, 0xBB, 0xBF])
&& let Ok(text) = String::from_utf8(without_bom.to_vec())
{
return text;
}
if let Ok(text) = String::from_utf8(raw.to_vec()) {
return text;
}
raw.iter().map(|b| char::from(*b)).collect()
}
fn load_trace_items(path: &Path, text: &str) -> Result<Vec<Value>> {
if let Ok(Value::Array(items)) = serde_json::from_str::<Value>(text) {
let dict_items = items
.into_iter()
.filter(serde_json::Value::is_object)
.collect::<Vec<_>>();
if dict_items.is_empty() {
anyhow::bail!("no object items found in trace file: {}", path.display());
}
return normalize_trace_items(dict_items, path);
}
let mut parsed_items = Vec::new();
for line in text.lines() {
let line = line.trim();
if line.is_empty() || (!line.starts_with('{') && !line.starts_with('[')) {
continue;
}
let Ok(obj) = serde_json::from_str::<Value>(line) else {
continue;
};
match obj {
Value::Object(_) => parsed_items.push(obj),
Value::Array(inner) => {
parsed_items.extend(inner.into_iter().filter(serde_json::Value::is_object))
}
_ => {}
}
}
if parsed_items.is_empty() {
anyhow::bail!("no JSON items parsed from trace file: {}", path.display());
}
normalize_trace_items(parsed_items, path)
}
fn normalize_trace_items(items: Vec<Value>, path: &Path) -> Result<Vec<Value>> {
let mut normalized = Vec::new();
for item in items {
let Value::Object(obj) = item else {
continue;
};
if let Some(payload) = obj.get("payload") {
if obj.get("type").and_then(Value::as_str) != Some("response_item") {
continue;
}
match payload {
Value::Object(payload_item) => {
if is_allowed_trace_item(payload_item) {
normalized.push(Value::Object(payload_item.clone()));
}
}
Value::Array(payload_items) => {
for payload_item in payload_items {
if let Value::Object(payload_item) = payload_item
&& is_allowed_trace_item(payload_item)
{
normalized.push(Value::Object(payload_item.clone()));
}
}
}
_ => {}
}
continue;
}
if is_allowed_trace_item(&obj) {
normalized.push(Value::Object(obj));
}
}
if normalized.is_empty() {
anyhow::bail!(
"no valid trace items after normalization: {}",
path.display()
);
}
Ok(normalized)
}
fn is_allowed_trace_item(item: &Map<String, Value>) -> bool {
let Some(item_type) = item.get("type").and_then(Value::as_str) else {
return false;
};
if item_type == "message" {
return matches!(
item.get("role").and_then(Value::as_str),
Some("assistant" | "system" | "developer" | "user")
);
}
true
}
fn build_memory_id(index: usize, path: &Path) -> String {
let stem = path
.file_stem()
.map(|stem| stem.to_string_lossy().into_owned())
.filter(|stem| !stem.is_empty())
.unwrap_or_else(|| "memory".to_string());
format!("memory_{index}_{stem}")
}
#[cfg(test)]
mod tests {
use super::load_trace_items;
use super::load_trace_text;
use super::normalize_trace_items;
use pretty_assertions::assert_eq;
use std::path::Path;
use tempfile::tempdir;
#[test]
fn normalize_trace_items_handles_payload_wrapper_and_message_role_filtering() {
let items = vec![
serde_json::json!({
"type": "response_item",
"payload": {"type": "message", "role": "assistant", "content": []}
}),
serde_json::json!({
"type": "response_item",
"payload": [
{"type": "message", "role": "user", "content": []},
{"type": "message", "role": "tool", "content": []},
{"type": "function_call", "name": "shell", "arguments": "{}", "call_id": "c1"}
]
}),
serde_json::json!({
"type": "not_response_item",
"payload": {"type": "message", "role": "assistant", "content": []}
}),
serde_json::json!({
"type": "message",
"role": "developer",
"content": []
}),
];
let normalized = normalize_trace_items(items, Path::new("trace.json")).expect("normalize");
let expected = vec![
serde_json::json!({"type": "message", "role": "assistant", "content": []}),
serde_json::json!({"type": "message", "role": "user", "content": []}),
serde_json::json!({"type": "function_call", "name": "shell", "arguments": "{}", "call_id": "c1"}),
serde_json::json!({"type": "message", "role": "developer", "content": []}),
];
assert_eq!(normalized, expected);
}
#[test]
fn load_trace_items_supports_jsonl_arrays_and_objects() {
let text = r#"
{"type":"response_item","payload":{"type":"message","role":"assistant","content":[]}}
[{"type":"message","role":"user","content":[]},{"type":"message","role":"tool","content":[]}]
"#;
let loaded = load_trace_items(Path::new("trace.jsonl"), text).expect("load");
let expected = vec![
serde_json::json!({"type":"message","role":"assistant","content":[]}),
serde_json::json!({"type":"message","role":"user","content":[]}),
];
assert_eq!(loaded, expected);
}
#[tokio::test]
async fn load_trace_text_decodes_utf8_sig() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("trace.json");
tokio::fs::write(
&path,
[
0xEF, 0xBB, 0xBF, b'[', b'{', b'"', b't', b'y', b'p', b'e', b'"', b':', b'"', b'm',
b'e', b's', b's', b'a', b'g', b'e', b'"', b',', b'"', b'r', b'o', b'l', b'e', b'"',
b':', b'"', b'u', b's', b'e', b'r', b'"', b',', b'"', b'c', b'o', b'n', b't', b'e',
b'n', b't', b'"', b':', b'[', b']', b'}', b']',
],
)
.await
.expect("write");
let text = load_trace_text(&path).await.expect("decode");
assert!(text.starts_with('['));
}
}