save session metadata

This commit is contained in:
Ryan Ragona
2025-04-26 14:25:26 -07:00
parent e782378176
commit 96d8d2a37a
15 changed files with 211 additions and 26 deletions

3
codex-rs/Cargo.lock generated
View File

@@ -579,6 +579,7 @@ dependencies = [
"anyhow",
"clap 4.5.37",
"codex-core",
"serde",
"tokio",
"tracing",
"tracing-subscriber",
@@ -611,6 +612,7 @@ dependencies = [
"anyhow",
"clap 4.5.37",
"codex-core",
"serde",
"tokio",
]
@@ -623,6 +625,7 @@ dependencies = [
"codex-core",
"owo-colors 4.2.0",
"rand 0.9.1",
"serde",
"tokio",
"tracing",
"tracing-subscriber",

View File

@@ -5,8 +5,9 @@ use clap::ValueEnum;
use crate::protocol::AskForApproval;
use crate::protocol::SandboxPolicy;
use serde::{Deserialize, Serialize};
#[derive(Clone, Debug, ValueEnum)]
#[derive(Clone, Debug, ValueEnum, Serialize, Deserialize)]
#[value(rename_all = "kebab-case")]
pub enum ApprovalModeCliArg {
/// Run all commands without asking for user approval.
@@ -24,7 +25,7 @@ pub enum ApprovalModeCliArg {
Never,
}
#[derive(Clone, Debug, ValueEnum)]
#[derive(Clone, Debug, ValueEnum, Serialize, Deserialize)]
#[value(rename_all = "kebab-case")]
pub enum SandboxModeCliArg {
/// Network syscalls will be blocked

View File

@@ -24,3 +24,6 @@ tokio = { version = "1", features = [
] }
tracing = { version = "0.1.41", features = ["log"] }
tracing-subscriber = { version = "0.3.19", features = ["env-filter"] }
# For serialising the `Cli` struct into the on-disk session metadata.
serde = { version = "1.0", features = ["derive"] }

View File

@@ -1,7 +1,12 @@
use clap::Parser;
use serde::{Deserialize, Serialize};
use std::path::PathBuf;
#[derive(Parser, Debug)]
/// Command-line interface for the non-interactive `codex-exec` agent.
///
/// The struct needs to be serialisable so the full invocation can be stored
/// in the on-disk session `meta.json` for later introspection.
#[derive(Parser, Debug, Clone, Serialize, Deserialize)]
#[command(version)]
pub struct Cli {
/// Optional image(s) to attach to the initial prompt.

View File

@@ -15,6 +15,7 @@ path = "src/lib.rs"
anyhow = "1"
clap = { version = "4", features = ["derive"] }
codex-core = { path = "../core", features = ["cli"] }
serde = { version = "1.0", features = ["derive"] }
tokio = { version = "1", features = [
"io-std",
"macros",

View File

@@ -1,9 +1,10 @@
use clap::Parser;
use codex_core::ApprovalModeCliArg;
use codex_core::SandboxModeCliArg;
use serde::{Deserialize, Serialize};
use std::path::PathBuf;
#[derive(Parser, Debug)]
#[derive(Parser, Debug, Clone, Serialize, Deserialize)]
#[command(version)]
pub struct Cli {
/// Optional image(s) to attach to the initial prompt.

View File

@@ -26,3 +26,6 @@ tokio = { version = "1", features = [
] }
tracing = { version = "0.1.41", features = ["log"] }
tracing-subscriber = { version = "0.3.19", features = ["env-filter"] }
# For serialising the `Cli` struct into the on-disk session metadata.
serde = { version = "1.0", features = ["derive"] }

View File

@@ -2,10 +2,16 @@ use clap::ArgAction;
use clap::Parser;
use codex_core::ApprovalModeCliArg;
use codex_core::SandboxModeCliArg;
use serde::{Deserialize, Serialize};
use std::path::PathBuf;
/// Commandline arguments.
#[derive(Debug, Parser)]
/// Command-line interface for the interactive `codex-repl` agent.
///
/// Making the struct serialisable allows us to persist the full configuration
/// inside the session metadata so we can inspect the exact flags that were
/// used to launch the session at a later time.
#[derive(Debug, Parser, Clone, Serialize, Deserialize)]
#[command(
author,
version,

19
codex-rs/session/build.rs Normal file
View File

@@ -0,0 +1,19 @@
// build.rs emit the current git commit so the code can embed it in the
// session metadata file.
fn main() {
// Try to run `git rev-parse HEAD` if that fails we fall back to
// "unknown" so the build does not break when the source is not a git
// repository (e.g., during `cargo publish`).
let git_sha = std::process::Command::new("git")
.args(["rev-parse", "HEAD"])
.output()
.ok()
.filter(|o| o.status.success())
.and_then(|o| String::from_utf8(o.stdout).ok())
.map(|s| s.trim().to_owned())
.unwrap_or_else(|| "unknown".into());
println!("cargo:rustc-env=GIT_SHA={git_sha}");
}

View File

@@ -0,0 +1,10 @@
//! Build-time information helpers (git commit hash, version, …).
/// Return the git commit hash that was recorded at compile time via the
/// `build.rs` build-script. Falls back to the static string "unknown" when the
/// build script failed to determine the hash (e.g. when building from a
/// source tarball without the `.git` directory).
pub fn git_sha() -> &'static str {
env!("GIT_SHA")
}

View File

@@ -8,6 +8,7 @@
//! The `create` command therefore has mutually exclusive sub-commands so the appropriate
//! arguments can be forwarded to the underlying agent binaries.
use crate::meta::{AgentCli, SessionMeta};
use crate::spawn;
use crate::store;
use anyhow::Context;
@@ -142,31 +143,44 @@ impl CreateCmd {
// bubbling up the error to the caller.
// -----------------------------------------------------------------
let spawn_result: Result<(u32, Option<String>, store::SessionKind)> = (|| match self.agent {
// Capture the child PID *and* the full CLI config so we can persist it
// in the metadata file.
let spawn_result: Result<(
u32, // pid
Option<String>, // prompt preview
store::SessionKind, // kind
AgentCli, // full CLI config
)> = (|| match self.agent {
AgentKind::Exec(cmd) => {
let args = build_exec_args(&cmd.exec_cli);
let child = spawn::spawn_exec(&paths, &args)?;
let preview = cmd.exec_cli.prompt.as_ref().map(|p| truncate_preview(p));
Ok((
child.id().unwrap_or_default(),
preview,
store::SessionKind::Exec,
AgentCli::Exec(cmd.exec_cli.clone()),
))
}
#[cfg(unix)]
AgentKind::Repl(cmd) => {
let args = build_repl_args(&cmd.repl_cli);
let child = spawn::spawn_repl(&paths, &args)?;
let preview = cmd.repl_cli.prompt.as_ref().map(|p| truncate_preview(p));
Ok((
child.id().unwrap_or_default(),
preview,
store::SessionKind::Repl,
AgentCli::Repl(cmd.repl_cli.clone()),
))
}
})();
let (pid, prompt_preview, kind) = match spawn_result {
let (pid, prompt_preview, kind, cli_cfg) = match spawn_result {
Ok(tuple) => tuple,
Err(err) => {
// Best effort clean-up ignore failures so we don't mask the
@@ -177,13 +191,9 @@ impl CreateCmd {
};
// Persist metadata **after** the process has been spawned so we can record its PID.
let meta = store::SessionMeta {
id: id.clone(),
pid,
kind,
created_at: chrono::Utc::now(),
prompt_preview,
};
// Persist metadata **after** the process has been spawned so we can record its PID.
let meta = SessionMeta::new(id.clone(), pid, kind, cli_cfg, prompt_preview);
store::write_meta(&paths, &meta)?;
println!("{id}");

View File

@@ -8,6 +8,8 @@
pub mod cli; // public so main.rs can access it.
mod spawn; // process creation helpers
pub mod store; // on-disk bookkeeping (public for tests)
pub mod meta; // richer on-disk metadata envelope
pub mod build; // build-time information helpers
pub use cli::Cli;

View File

@@ -0,0 +1,89 @@
//! Rich on-disk session metadata envelope.
//!
//! The file is written as `meta.json` inside every session directory so users
//! (and other tools) can inspect how a particular session was started even
//! months later. Keeping the full CLI invocation together with a few extra
//! bits of contextual information (like the git commit of the build) makes
//! debugging and reproducibility significantly easier.
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use crate::store::SessionKind;
/// The CLI configuration that was used to launch the underlying agent.
///
/// Depending on the chosen agent flavour (`codex-exec` vs `codex-repl`) the
/// contained configuration differs. We use an *externally tagged* enum so
/// the JSON clearly states which variant was used while still keeping the
/// nested structure as-is.
#[derive(Debug, Serialize, Deserialize)]
#[serde(tag = "agent", rename_all = "lowercase")]
pub enum AgentCli {
/// Non-interactive batch agent.
Exec(codex_exec::Cli),
/// Interactive REPL agent (only available on Unix-like systems).
#[cfg(unix)]
Repl(codex_repl::Cli),
}
/// Versioned envelope that is persisted to disk.
///
/// A monotonically increasing `version` field allows us to evolve the schema
/// over time while still being able to parse *older* files.
#[derive(Debug, Serialize, Deserialize)]
pub struct SessionMeta {
/// Unique identifier also doubles as the directory name.
pub id: String,
/// Process ID of the *leader* process belonging to the session.
pub pid: u32,
/// Whether the session is an `exec` or `repl` one.
pub kind: SessionKind,
/// Complete CLI configuration that was used to spawn the agent.
pub cli: AgentCli,
/// Short preview of the natural-language prompt (if present).
#[serde(skip_serializing_if = "Option::is_none")]
pub prompt_preview: Option<String>,
/// Wall-clock timestamp when the session was created.
pub created_at: DateTime<Utc>,
/// Git commit hash of the `codex-rs` build that produced this file.
pub codex_commit: String,
/// Schema version so we can migrate later.
pub version: u8,
}
impl SessionMeta {
/// Bump this whenever the structure changes in a backwards-incompatible
/// way.
pub const CURRENT_VERSION: u8 = 1;
/// Convenience constructor.
#[allow(clippy::too_many_arguments)]
pub fn new(
id: String,
pid: u32,
kind: SessionKind,
cli: AgentCli,
prompt_preview: Option<String>,
) -> Self {
Self {
id,
pid,
kind,
cli,
prompt_preview,
created_at: Utc::now(),
codex_commit: crate::build::git_sha().to_owned(),
version: Self::CURRENT_VERSION,
}
}
}

View File

@@ -6,8 +6,11 @@
use anyhow::Context;
use anyhow::Result;
use serde::Deserialize;
use serde::Serialize;
// The rich metadata envelope lives in its own module so other parts of the
// crate can import it without pulling in the whole `store` implementation.
use crate::meta::SessionMeta;
use serde::{Deserialize, Serialize};
use std::path::PathBuf;
#[derive(Clone, Debug)]
@@ -74,16 +77,9 @@ fn base_dir() -> Result<PathBuf> {
Ok(home.join(".codex").join("sessions"))
}
#[derive(Serialize, Deserialize, Debug)]
pub struct SessionMeta {
pub id: String,
pub pid: u32,
pub created_at: chrono::DateTime<chrono::Utc>,
#[serde(default)]
pub kind: SessionKind,
#[serde(skip_serializing_if = "Option::is_none")]
pub prompt_preview: Option<String>,
}
// Keep the original `SessionKind` enum here so we don't need a breaking change
// in all call-sites. The enum is re-exported so other modules (e.g. the newly
// added `meta` module) can still rely on the single source of truth.
#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]

View File

@@ -0,0 +1,36 @@
//! Simple round-trip test that serialises a freshly constructed `SessionMeta`
//! and deserialises it back to ensure the schema is self-consistent.
use codex_session::meta::{AgentCli, SessionMeta};
use codex_session::store::SessionKind;
#[test]
fn meta_round_trip() {
let exec_cli = codex_exec::Cli {
images: vec![],
model: Some("gpt-4o-mini".into()),
skip_git_repo_check: true,
disable_response_storage: false,
prompt: Some("hello world".into()),
};
let meta = SessionMeta::new(
"test-session".into(),
42,
SessionKind::Exec,
AgentCli::Exec(exec_cli.clone()),
exec_cli.prompt.clone(),
);
// Serialise with pretty printer so humans can read the file as well.
let json = serde_json::to_string_pretty(&meta).expect("serialise");
// … and parse it back.
let de: SessionMeta = serde_json::from_str(&json).expect("deserialise");
assert_eq!(de.version, SessionMeta::CURRENT_VERSION);
assert_eq!(de.id, "test-session");
assert_eq!(de.pid, 42);
assert!(matches!(de.cli, AgentCli::Exec(_)));
}