mirror of
https://github.com/openai/codex.git
synced 2026-02-03 15:33:41 +00:00
Compare commits
38 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ce9347388a | ||
|
|
d6515aa010 | ||
|
|
37b3807f96 | ||
|
|
7b8533fdbe | ||
|
|
5f47ab64c4 | ||
|
|
ac2b3ec2bb | ||
|
|
c052b89333 | ||
|
|
b424ca93ab | ||
|
|
32bd302d80 | ||
|
|
39c72b3151 | ||
|
|
2cdfd38c24 | ||
|
|
fc79a46c7a | ||
|
|
010dfa7751 | ||
|
|
54b9436699 | ||
|
|
af3bf801ce | ||
|
|
5fb6cbbcca | ||
|
|
7bdf63a009 | ||
|
|
119dabd272 | ||
|
|
c0baaa171b | ||
|
|
b45c204109 | ||
|
|
b70dcd80a2 | ||
|
|
c0f8a49e3e | ||
|
|
87362d6ebd | ||
|
|
f073bc5ccf | ||
|
|
9320565658 | ||
|
|
4de5b25c52 | ||
|
|
90b2f096c3 | ||
|
|
f3c57ab888 | ||
|
|
43ee0dfd19 | ||
|
|
c9d9a40c98 | ||
|
|
ab3d607be4 | ||
|
|
f7d8e12ae0 | ||
|
|
a8278b5423 | ||
|
|
cb99d71f57 | ||
|
|
f72e9da7c5 | ||
|
|
732c435345 | ||
|
|
f5e055ae36 | ||
|
|
8245a4f53b |
31
codex-rs/Cargo.lock
generated
31
codex-rs/Cargo.lock
generated
@@ -941,6 +941,7 @@ dependencies = [
|
||||
"anyhow",
|
||||
"assert_cmd",
|
||||
"assert_matches",
|
||||
"chrono",
|
||||
"clap",
|
||||
"clap_complete",
|
||||
"codex-app-server",
|
||||
@@ -951,6 +952,7 @@ dependencies = [
|
||||
"codex-common",
|
||||
"codex-core",
|
||||
"codex-exec",
|
||||
"codex-infty",
|
||||
"codex-login",
|
||||
"codex-mcp-server",
|
||||
"codex-process-hardening",
|
||||
@@ -959,14 +961,20 @@ dependencies = [
|
||||
"codex-responses-api-proxy",
|
||||
"codex-rmcp-client",
|
||||
"codex-tui",
|
||||
"crossterm",
|
||||
"ctor 0.5.0",
|
||||
"owo-colors",
|
||||
"predicates",
|
||||
"pretty_assertions",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"supports-color",
|
||||
"tempfile",
|
||||
"textwrap 0.16.2",
|
||||
"tokio",
|
||||
"tracing",
|
||||
"tracing-appender",
|
||||
"tracing-subscriber",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1076,6 +1084,7 @@ dependencies = [
|
||||
"thiserror 2.0.16",
|
||||
"time",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tokio-test",
|
||||
"tokio-util",
|
||||
"toml",
|
||||
@@ -1188,6 +1197,27 @@ dependencies = [
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codex-infty"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"chrono",
|
||||
"codex-core",
|
||||
"codex-protocol",
|
||||
"core_test_support",
|
||||
"dirs",
|
||||
"futures",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tempfile",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
"wiremock",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codex-linux-sandbox"
|
||||
version = "0.0.0"
|
||||
@@ -6244,6 +6274,7 @@ dependencies = [
|
||||
"futures-core",
|
||||
"pin-project-lite",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
@@ -6,7 +6,7 @@ members = [
|
||||
"app-server-protocol",
|
||||
"apply-patch",
|
||||
"arg0",
|
||||
"feedback",
|
||||
"codex-infty",
|
||||
"codex-backend-openapi-models",
|
||||
"cloud-tasks",
|
||||
"cloud-tasks-client",
|
||||
@@ -15,6 +15,7 @@ members = [
|
||||
"core",
|
||||
"exec",
|
||||
"execpolicy",
|
||||
"feedback",
|
||||
"file-search",
|
||||
"git-tooling",
|
||||
"linux-sandbox",
|
||||
|
||||
@@ -35,6 +35,7 @@ codex-tui = { workspace = true }
|
||||
codex-rmcp-client = { workspace = true }
|
||||
codex-cloud-tasks = { path = "../cloud-tasks" }
|
||||
ctor = { workspace = true }
|
||||
crossterm = { workspace = true }
|
||||
owo-colors = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
supports-color = { workspace = true }
|
||||
@@ -45,6 +46,13 @@ tokio = { workspace = true, features = [
|
||||
"rt-multi-thread",
|
||||
"signal",
|
||||
] }
|
||||
codex-infty = { path = "../codex-infty" }
|
||||
chrono = { workspace = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
tracing = "0.1.41"
|
||||
tracing-appender = "0.2.3"
|
||||
tracing-subscriber = { version = "0.3.19", features = ["env-filter"] }
|
||||
textwrap = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
assert_matches = { workspace = true }
|
||||
|
||||
115
codex-rs/cli/src/infty/args.rs
Normal file
115
codex-rs/cli/src/infty/args.rs
Normal file
@@ -0,0 +1,115 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
use anyhow::Result;
|
||||
use clap::Parser;
|
||||
use clap::Subcommand;
|
||||
use codex_common::CliConfigOverrides;
|
||||
use codex_protocol::config_types::ReasoningEffort;
|
||||
|
||||
use super::commands;
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
pub struct InftyCli {
|
||||
#[clap(flatten)]
|
||||
pub config_overrides: CliConfigOverrides,
|
||||
|
||||
/// Override the default runs root (`~/.codex/infty`).
|
||||
#[arg(long = "runs-root", value_name = "DIR")]
|
||||
pub runs_root: Option<PathBuf>,
|
||||
|
||||
#[command(subcommand)]
|
||||
command: InftyCommand,
|
||||
}
|
||||
|
||||
#[derive(Debug, Subcommand)]
|
||||
enum InftyCommand {
|
||||
/// Create a new run store and spawn solver/director sessions.
|
||||
Create(CreateArgs),
|
||||
|
||||
/// List stored runs.
|
||||
List(ListArgs),
|
||||
|
||||
/// Show metadata for a stored run.
|
||||
Show(ShowArgs),
|
||||
// resumable runs are disabled; Drive command removed
|
||||
}
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
pub(crate) struct CreateArgs {
|
||||
/// Explicit run id. If omitted, a timestamp-based id is generated.
|
||||
#[arg(long = "run-id", value_name = "RUN_ID")]
|
||||
pub run_id: Option<String>,
|
||||
|
||||
/// Optional objective to send to the solver immediately after creation.
|
||||
#[arg(long)]
|
||||
pub objective: Option<String>,
|
||||
|
||||
/// Timeout in seconds when waiting for the solver reply to --objective.
|
||||
#[arg(long = "timeout-secs", default_value_t = super::commands::DEFAULT_TIMEOUT_SECS)]
|
||||
pub timeout_secs: u64,
|
||||
|
||||
/// Override only the Director's model (solver and verifiers keep defaults).
|
||||
#[arg(long = "director-model", value_name = "MODEL")]
|
||||
pub director_model: Option<String>,
|
||||
|
||||
/// Override only the Director's reasoning effort (minimal|low|medium|high).
|
||||
#[arg(
|
||||
long = "director-effort",
|
||||
value_name = "LEVEL",
|
||||
value_parser = parse_reasoning_effort
|
||||
)]
|
||||
pub director_effort: Option<ReasoningEffort>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
pub(crate) struct ListArgs {
|
||||
/// Emit JSON describing the stored runs.
|
||||
#[arg(long)]
|
||||
pub json: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
pub(crate) struct ShowArgs {
|
||||
/// Run id to display.
|
||||
#[arg(value_name = "RUN_ID")]
|
||||
pub run_id: String,
|
||||
|
||||
/// Emit JSON metadata instead of human-readable text.
|
||||
#[arg(long)]
|
||||
pub json: bool,
|
||||
}
|
||||
|
||||
// resumable runs are disabled; DriveArgs removed
|
||||
|
||||
impl InftyCli {
|
||||
pub async fn run(self) -> Result<()> {
|
||||
let InftyCli {
|
||||
config_overrides,
|
||||
runs_root,
|
||||
command,
|
||||
} = self;
|
||||
|
||||
match command {
|
||||
InftyCommand::Create(args) => {
|
||||
commands::run_create(config_overrides, runs_root, args).await?;
|
||||
}
|
||||
InftyCommand::List(args) => commands::run_list(runs_root, args)?,
|
||||
InftyCommand::Show(args) => commands::run_show(runs_root, args)?,
|
||||
// Drive removed
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_reasoning_effort(s: &str) -> Result<ReasoningEffort, String> {
|
||||
match s.trim().to_ascii_lowercase().as_str() {
|
||||
"minimal" => Ok(ReasoningEffort::Minimal),
|
||||
"low" => Ok(ReasoningEffort::Low),
|
||||
"medium" => Ok(ReasoningEffort::Medium),
|
||||
"high" => Ok(ReasoningEffort::High),
|
||||
_ => Err(format!(
|
||||
"invalid reasoning effort: {s}. Expected one of: minimal|low|medium|high"
|
||||
)),
|
||||
}
|
||||
}
|
||||
438
codex-rs/cli/src/infty/commands.rs
Normal file
438
codex-rs/cli/src/infty/commands.rs
Normal file
@@ -0,0 +1,438 @@
|
||||
use std::fs;
|
||||
use std::fs::OpenOptions;
|
||||
use std::io;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use std::time::Instant;
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use anyhow::anyhow;
|
||||
use anyhow::bail;
|
||||
use chrono::SecondsFormat;
|
||||
use chrono::Utc;
|
||||
use codex_common::CliConfigOverrides;
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::auth::read_codex_api_key_from_env;
|
||||
use codex_core::auth::read_openai_api_key_from_env;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::config::ConfigOverrides;
|
||||
use codex_infty::InftyOrchestrator;
|
||||
use codex_infty::RoleConfig;
|
||||
use codex_infty::RunExecutionOptions;
|
||||
use codex_infty::RunParams;
|
||||
use codex_infty::RunStore;
|
||||
use owo_colors::OwoColorize;
|
||||
use serde::Serialize;
|
||||
use std::sync::OnceLock;
|
||||
use supports_color::Stream;
|
||||
use tracing_appender::non_blocking;
|
||||
use tracing_subscriber::EnvFilter;
|
||||
use tracing_subscriber::prelude::*;
|
||||
|
||||
use super::args::CreateArgs;
|
||||
use super::args::ListArgs;
|
||||
use super::args::ShowArgs;
|
||||
use super::progress::TerminalProgressReporter;
|
||||
use super::summary::print_run_summary_box;
|
||||
|
||||
const DEFAULT_VERIFIER_ROLES: [&str; 3] = ["verifier-alpha", "verifier-beta", "verifier-gamma"];
|
||||
|
||||
pub(crate) const DEFAULT_TIMEOUT_SECS: u64 = 6000;
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct RunSummary {
|
||||
run_id: String,
|
||||
path: String,
|
||||
created_at: String,
|
||||
updated_at: String,
|
||||
roles: Vec<String>,
|
||||
}
|
||||
|
||||
pub(crate) async fn run_create(
|
||||
config_overrides: CliConfigOverrides,
|
||||
runs_root_override: Option<PathBuf>,
|
||||
args: CreateArgs,
|
||||
) -> Result<()> {
|
||||
let config = load_config(config_overrides).await?;
|
||||
init_infty_logging(&config)?;
|
||||
let auth = load_auth(&config)?;
|
||||
let runs_root = resolve_runs_root(runs_root_override)?;
|
||||
let color_enabled = supports_color::on(Stream::Stdout).is_some();
|
||||
|
||||
let mut run_id = if let Some(id) = args.run_id {
|
||||
id
|
||||
} else {
|
||||
generate_run_id()
|
||||
};
|
||||
run_id = run_id.trim().to_string();
|
||||
validate_run_id(&run_id)?;
|
||||
|
||||
let run_path = runs_root.join(&run_id);
|
||||
if run_path.exists() {
|
||||
bail!("run {run_id} already exists at {}", run_path.display());
|
||||
}
|
||||
|
||||
let orchestrator = InftyOrchestrator::with_runs_root(auth, runs_root).with_progress(Arc::new(
|
||||
TerminalProgressReporter::with_color(color_enabled),
|
||||
));
|
||||
let verifiers: Vec<RoleConfig> = DEFAULT_VERIFIER_ROLES
|
||||
.iter()
|
||||
.map(|role| RoleConfig::new(role.to_string(), config.clone()))
|
||||
.collect();
|
||||
let mut director_config = config.clone();
|
||||
if let Some(model) = args.director_model.as_deref() {
|
||||
director_config.model = model.to_string();
|
||||
}
|
||||
if let Some(effort) = args.director_effort {
|
||||
director_config.model_reasoning_effort = Some(effort);
|
||||
}
|
||||
let run_params = RunParams {
|
||||
run_id: run_id.clone(),
|
||||
run_root: Some(run_path.clone()),
|
||||
solver: RoleConfig::new("solver", config.clone()),
|
||||
director: RoleConfig::new("director", director_config),
|
||||
verifiers,
|
||||
};
|
||||
|
||||
if let Some(objective) = args.objective {
|
||||
let timeout = Duration::from_secs(args.timeout_secs);
|
||||
let options = RunExecutionOptions {
|
||||
objective: Some(objective),
|
||||
director_timeout: timeout,
|
||||
verifier_timeout: timeout,
|
||||
};
|
||||
|
||||
let start = Instant::now();
|
||||
let start_header = format!("Starting run {run_id}");
|
||||
if color_enabled {
|
||||
println!("{}", start_header.blue().bold());
|
||||
} else {
|
||||
println!("{start_header}");
|
||||
}
|
||||
let location_line = format!(" run directory: {}", run_path.display());
|
||||
if color_enabled {
|
||||
println!("{}", location_line.dimmed());
|
||||
} else {
|
||||
println!("{location_line}");
|
||||
}
|
||||
if let Some(objective_text) = options.objective.as_deref()
|
||||
&& !objective_text.trim().is_empty()
|
||||
{
|
||||
let objective_line = format!(" objective: {objective_text}");
|
||||
if color_enabled {
|
||||
println!("{}", objective_line.dimmed());
|
||||
} else {
|
||||
println!("{objective_line}");
|
||||
}
|
||||
}
|
||||
println!();
|
||||
|
||||
let objective_snapshot = options.objective.clone();
|
||||
let outcome = orchestrator
|
||||
.execute_new_run(run_params, options)
|
||||
.await
|
||||
.with_context(|| format!("failed to execute run {run_id}"))?;
|
||||
let duration = start.elapsed();
|
||||
print_run_summary_box(
|
||||
color_enabled,
|
||||
&run_id,
|
||||
&run_path,
|
||||
&outcome.deliverable_path,
|
||||
outcome.summary.as_deref(),
|
||||
objective_snapshot.as_deref(),
|
||||
duration,
|
||||
);
|
||||
} else {
|
||||
let sessions = orchestrator
|
||||
.spawn_run(run_params)
|
||||
.await
|
||||
.with_context(|| format!("failed to create run {run_id}"))?;
|
||||
|
||||
println!(
|
||||
"Created run {run_id} at {}",
|
||||
sessions.store.path().display()
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn run_list(runs_root_override: Option<PathBuf>, args: ListArgs) -> Result<()> {
|
||||
// Initialize logging using default Codex home discovery.
|
||||
let _ = init_infty_logging_from_home();
|
||||
let runs_root = resolve_runs_root(runs_root_override)?;
|
||||
let listings = collect_run_summaries(&runs_root)?;
|
||||
|
||||
if args.json {
|
||||
println!("{}", serde_json::to_string_pretty(&listings)?);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if listings.is_empty() {
|
||||
println!("No runs found under {}", runs_root.display());
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
println!("Runs in {}", runs_root.display());
|
||||
for summary in listings {
|
||||
println!(
|
||||
"{}\t{}\t{}",
|
||||
summary.run_id, summary.updated_at, summary.path
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn run_show(runs_root_override: Option<PathBuf>, args: ShowArgs) -> Result<()> {
|
||||
validate_run_id(&args.run_id)?;
|
||||
let _ = init_infty_logging_from_home();
|
||||
let runs_root = resolve_runs_root(runs_root_override)?;
|
||||
let run_path = runs_root.join(&args.run_id);
|
||||
let store =
|
||||
RunStore::load(&run_path).with_context(|| format!("failed to load run {}", args.run_id))?;
|
||||
let metadata = store.metadata();
|
||||
|
||||
let summary = RunSummary {
|
||||
run_id: metadata.run_id.clone(),
|
||||
path: run_path.display().to_string(),
|
||||
created_at: metadata
|
||||
.created_at
|
||||
.to_rfc3339_opts(SecondsFormat::Secs, true),
|
||||
updated_at: metadata
|
||||
.updated_at
|
||||
.to_rfc3339_opts(SecondsFormat::Secs, true),
|
||||
roles: metadata
|
||||
.roles
|
||||
.iter()
|
||||
.map(|role| role.role.clone())
|
||||
.collect(),
|
||||
};
|
||||
|
||||
if args.json {
|
||||
println!("{}", serde_json::to_string_pretty(&summary)?);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
println!("Run: {}", summary.run_id);
|
||||
println!("Path: {}", summary.path);
|
||||
println!("Created: {}", summary.created_at);
|
||||
println!("Updated: {}", summary.updated_at);
|
||||
println!("Roles: {}", summary.roles.join(", "));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// resumable runs are disabled; run_drive removed
|
||||
|
||||
fn generate_run_id() -> String {
|
||||
let timestamp = Utc::now().format("run-%Y%m%d-%H%M%S");
|
||||
format!("{timestamp}")
|
||||
}
|
||||
|
||||
pub(crate) fn validate_run_id(run_id: &str) -> Result<()> {
|
||||
if run_id.is_empty() {
|
||||
bail!("run id must not be empty");
|
||||
}
|
||||
if run_id.starts_with('.') || run_id.ends_with('.') {
|
||||
bail!("run id must not begin or end with '.'");
|
||||
}
|
||||
if run_id
|
||||
.chars()
|
||||
.any(|c| !(c.is_ascii_alphanumeric() || matches!(c, '-' | '_' | '.')))
|
||||
{
|
||||
bail!("run id may only contain ASCII alphanumerics, '-', '_', or '.'");
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn load_config(cli_overrides: CliConfigOverrides) -> Result<Config> {
|
||||
let overrides = cli_overrides
|
||||
.parse_overrides()
|
||||
.map_err(|err| anyhow!("failed to parse -c overrides: {err}"))?;
|
||||
Config::load_with_cli_overrides(overrides, ConfigOverrides::default())
|
||||
.await
|
||||
.context("failed to load Codex configuration")
|
||||
}
|
||||
|
||||
fn load_auth(config: &Config) -> Result<CodexAuth> {
|
||||
if let Some(auth) =
|
||||
CodexAuth::from_codex_home(&config.codex_home).context("failed to read auth.json")?
|
||||
{
|
||||
return Ok(auth);
|
||||
}
|
||||
if let Some(api_key) = read_codex_api_key_from_env() {
|
||||
return Ok(CodexAuth::from_api_key(&api_key));
|
||||
}
|
||||
if let Some(api_key) = read_openai_api_key_from_env() {
|
||||
return Ok(CodexAuth::from_api_key(&api_key));
|
||||
}
|
||||
bail!("no Codex authentication found. Run `codex login` or set OPENAI_API_KEY.");
|
||||
}
|
||||
|
||||
fn resolve_runs_root(override_path: Option<PathBuf>) -> Result<PathBuf> {
|
||||
if let Some(path) = override_path {
|
||||
return Ok(path);
|
||||
}
|
||||
codex_infty::default_runs_root()
|
||||
}
|
||||
|
||||
fn collect_run_summaries(root: &Path) -> Result<Vec<RunSummary>> {
|
||||
let mut summaries = Vec::new();
|
||||
let iter = match fs::read_dir(root) {
|
||||
Ok(read_dir) => read_dir,
|
||||
Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(summaries),
|
||||
Err(err) => {
|
||||
return Err(
|
||||
anyhow!(err).context(format!("failed to read runs root {}", root.display()))
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
for entry in iter {
|
||||
let entry = entry?;
|
||||
if !entry.file_type()?.is_dir() {
|
||||
continue;
|
||||
}
|
||||
let run_path = entry.path();
|
||||
let store = match RunStore::load(&run_path) {
|
||||
Ok(store) => store,
|
||||
Err(err) => {
|
||||
eprintln!(
|
||||
"Skipping {}: failed to load run metadata: {err}",
|
||||
run_path.display()
|
||||
);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let metadata = store.metadata();
|
||||
summaries.push(RunSummary {
|
||||
run_id: metadata.run_id.clone(),
|
||||
path: run_path.display().to_string(),
|
||||
created_at: metadata
|
||||
.created_at
|
||||
.to_rfc3339_opts(SecondsFormat::Secs, true),
|
||||
updated_at: metadata
|
||||
.updated_at
|
||||
.to_rfc3339_opts(SecondsFormat::Secs, true),
|
||||
roles: metadata
|
||||
.roles
|
||||
.iter()
|
||||
.map(|role| role.role.clone())
|
||||
.collect(),
|
||||
});
|
||||
}
|
||||
|
||||
summaries.sort_by(|a, b| b.updated_at.cmp(&a.updated_at));
|
||||
Ok(summaries)
|
||||
}
|
||||
|
||||
fn init_infty_logging(config: &codex_core::config::Config) -> std::io::Result<()> {
|
||||
let log_dir = codex_core::config::log_dir(config)?;
|
||||
std::fs::create_dir_all(&log_dir)?;
|
||||
|
||||
let mut log_file_opts = OpenOptions::new();
|
||||
log_file_opts.create(true).append(true);
|
||||
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::fs::OpenOptionsExt;
|
||||
log_file_opts.mode(0o600);
|
||||
}
|
||||
|
||||
let log_file = log_file_opts.open(log_dir.join("codex-infty.log"))?;
|
||||
let (non_blocking, guard) = non_blocking(log_file);
|
||||
static INFTY_LOG_GUARD: OnceLock<tracing_appender::non_blocking::WorkerGuard> = OnceLock::new();
|
||||
let _ = INFTY_LOG_GUARD.set(guard);
|
||||
|
||||
// Use RUST_LOG if set, otherwise default to info for common codex crates
|
||||
let env_filter = || {
|
||||
EnvFilter::try_from_default_env()
|
||||
.unwrap_or_else(|_| EnvFilter::new("codex_core=info,codex_infty=info,codex_cli=info"))
|
||||
};
|
||||
|
||||
let file_layer = tracing_subscriber::fmt::layer()
|
||||
.with_writer(non_blocking)
|
||||
.with_target(false)
|
||||
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE)
|
||||
.with_filter(env_filter());
|
||||
|
||||
// Initialize once; subsequent calls are no‑ops.
|
||||
let _ = tracing_subscriber::registry().with(file_layer).try_init();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn init_infty_logging_from_home() -> std::io::Result<()> {
|
||||
let mut log_dir = codex_core::config::find_codex_home()?;
|
||||
log_dir.push("log");
|
||||
std::fs::create_dir_all(&log_dir)?;
|
||||
|
||||
let mut log_file_opts = OpenOptions::new();
|
||||
log_file_opts.create(true).append(true);
|
||||
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::fs::OpenOptionsExt;
|
||||
log_file_opts.mode(0o600);
|
||||
}
|
||||
|
||||
let log_file = log_file_opts.open(log_dir.join("codex-infty.log"))?;
|
||||
let (non_blocking, guard) = non_blocking(log_file);
|
||||
static INFTY_LOG_GUARD: OnceLock<tracing_appender::non_blocking::WorkerGuard> = OnceLock::new();
|
||||
let _ = INFTY_LOG_GUARD.set(guard);
|
||||
|
||||
let env_filter = || {
|
||||
EnvFilter::try_from_default_env()
|
||||
.unwrap_or_else(|_| EnvFilter::new("codex_core=info,codex_infty=info,codex_cli=info"))
|
||||
};
|
||||
|
||||
let file_layer = tracing_subscriber::fmt::layer()
|
||||
.with_writer(non_blocking)
|
||||
.with_target(false)
|
||||
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE)
|
||||
.with_filter(env_filter());
|
||||
|
||||
let _ = tracing_subscriber::registry().with(file_layer).try_init();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use tempfile::TempDir;
|
||||
|
||||
#[test]
|
||||
fn default_verifier_roles_are_stable() {
|
||||
assert_eq!(
|
||||
DEFAULT_VERIFIER_ROLES,
|
||||
["verifier-alpha", "verifier-beta", "verifier-gamma"]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validates_run_ids() {
|
||||
assert!(validate_run_id("run-20241030-123000").is_ok());
|
||||
assert!(validate_run_id("run.alpha").is_ok());
|
||||
assert!(validate_run_id("").is_err());
|
||||
assert!(validate_run_id("..bad").is_err());
|
||||
assert!(validate_run_id("bad/value").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generates_timestamped_run_id() {
|
||||
let run_id = generate_run_id();
|
||||
assert!(run_id.starts_with("run-"));
|
||||
assert_eq!(run_id.len(), "run-YYYYMMDD-HHMMSS".len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn collect_summaries_returns_empty_for_missing_root() {
|
||||
let temp = TempDir::new().expect("temp dir");
|
||||
let missing = temp.path().join("not-present");
|
||||
let summaries = collect_run_summaries(&missing).expect("collect");
|
||||
assert!(summaries.is_empty());
|
||||
}
|
||||
}
|
||||
6
codex-rs/cli/src/infty/mod.rs
Normal file
6
codex-rs/cli/src/infty/mod.rs
Normal file
@@ -0,0 +1,6 @@
|
||||
mod args;
|
||||
mod commands;
|
||||
mod progress;
|
||||
mod summary;
|
||||
|
||||
pub use args::InftyCli;
|
||||
194
codex-rs/cli/src/infty/progress.rs
Normal file
194
codex-rs/cli/src/infty/progress.rs
Normal file
@@ -0,0 +1,194 @@
|
||||
use chrono::Local;
|
||||
use codex_core::protocol::AgentMessageEvent;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_infty::AggregatedVerifierVerdict;
|
||||
use codex_infty::DirectiveResponse;
|
||||
use codex_infty::ProgressReporter;
|
||||
use codex_infty::VerifierDecision;
|
||||
use codex_infty::VerifierVerdict;
|
||||
use crossterm::style::Stylize;
|
||||
use std::path::Path;
|
||||
use supports_color::Stream;
|
||||
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub(crate) struct TerminalProgressReporter;
|
||||
|
||||
impl TerminalProgressReporter {
|
||||
pub(crate) fn with_color(_color_enabled: bool) -> Self {
|
||||
Self
|
||||
}
|
||||
|
||||
fn format_role_label(&self, role: &str) -> String {
|
||||
let lower = role.to_ascii_lowercase();
|
||||
if lower == "solver" {
|
||||
return "[solver]".magenta().bold().to_string();
|
||||
}
|
||||
if lower == "director" {
|
||||
return "[director]".blue().bold().to_string();
|
||||
}
|
||||
if lower == "user" {
|
||||
return "[user]".cyan().bold().to_string();
|
||||
}
|
||||
if lower.contains("verifier") {
|
||||
return format!("[{role}]").green().bold().to_string();
|
||||
}
|
||||
format!("[{role}]").magenta().bold().to_string()
|
||||
}
|
||||
|
||||
fn timestamp(&self) -> String {
|
||||
let timestamp = Local::now().format("%H:%M:%S");
|
||||
let display = format!("[{timestamp}]");
|
||||
if supports_color::on(Stream::Stdout).is_some() {
|
||||
format!("{}", display.dim())
|
||||
} else {
|
||||
display
|
||||
}
|
||||
}
|
||||
|
||||
fn print_exchange(
|
||||
&self,
|
||||
from_role: &str,
|
||||
to_role: &str,
|
||||
lines: Vec<String>,
|
||||
trailing_blank_line: bool,
|
||||
) {
|
||||
let header = format!(
|
||||
"{} ----> {}",
|
||||
self.format_role_label(from_role),
|
||||
self.format_role_label(to_role)
|
||||
);
|
||||
println!("{} {header}", self.timestamp());
|
||||
for line in lines {
|
||||
println!("{line}");
|
||||
}
|
||||
if trailing_blank_line {
|
||||
println!();
|
||||
}
|
||||
}
|
||||
|
||||
fn format_decision(&self, decision: VerifierDecision) -> String {
|
||||
match decision {
|
||||
VerifierDecision::Pass => "pass".green().bold().to_string(),
|
||||
VerifierDecision::Fail => "fail".red().bold().to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ProgressReporter for TerminalProgressReporter {
|
||||
fn objective_posted(&self, objective: &str) {
|
||||
let objective_line = format!("{}", format!("→ objective: {objective}").dim());
|
||||
self.print_exchange("user", "solver", vec![objective_line], true);
|
||||
}
|
||||
|
||||
fn solver_event(&self, event: &EventMsg) {
|
||||
match serde_json::to_string_pretty(event) {
|
||||
Ok(json) => {
|
||||
tracing::debug!("[solver:event]\n{json}");
|
||||
}
|
||||
Err(err) => {
|
||||
tracing::warn!("[solver:event] (failed to serialize: {err}) {event:?}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn role_event(&self, role: &str, event: &EventMsg) {
|
||||
match serde_json::to_string_pretty(event) {
|
||||
Ok(json) => {
|
||||
tracing::debug!("[{role}:event]\n{json}");
|
||||
}
|
||||
Err(err) => {
|
||||
tracing::warn!("[{role}:event] (failed to serialize: {err}) {event:?}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn solver_agent_message(&self, agent_msg: &AgentMessageEvent) {
|
||||
tracing::info!("Agent Message: {agent_msg:?}");
|
||||
}
|
||||
|
||||
fn invalid_solver_signal(&self, raw_message: &str) {
|
||||
let heading = "Warning".yellow().bold();
|
||||
let body = format!(
|
||||
"solver reply did not match expected JSON signal; got: {}",
|
||||
raw_message
|
||||
);
|
||||
println!("{} {} {}", self.timestamp(), heading, body);
|
||||
}
|
||||
|
||||
fn direction_request(&self, prompt: &str) {
|
||||
let prompt_line = format!("{}", prompt.yellow());
|
||||
self.print_exchange("solver", "director", vec![prompt_line], true);
|
||||
}
|
||||
|
||||
fn director_response(&self, directive: &DirectiveResponse) {
|
||||
let suffix = directive
|
||||
.rationale
|
||||
.as_deref()
|
||||
.filter(|rationale| !rationale.is_empty())
|
||||
.map(|rationale| format!(" (rationale: {rationale})"))
|
||||
.unwrap_or_default();
|
||||
let directive_line = format!("{}{}", directive.directive, suffix);
|
||||
self.print_exchange("director", "solver", vec![directive_line], true);
|
||||
}
|
||||
|
||||
fn verification_request(&self, claim_path: &str, notes: Option<&str>) {
|
||||
let mut lines = Vec::new();
|
||||
let path_line = format!("→ path: {claim_path}");
|
||||
lines.push(format!("{}", path_line.dim()));
|
||||
if let Some(notes) = notes.filter(|notes| !notes.is_empty()) {
|
||||
let note_line = format!("→ note: {notes}");
|
||||
lines.push(format!("{}", note_line.dim()));
|
||||
}
|
||||
self.print_exchange("solver", "verifier", lines, true);
|
||||
}
|
||||
|
||||
fn verifier_verdict(&self, role: &str, verdict: &VerifierVerdict) {
|
||||
let decision = self.format_decision(verdict.verdict);
|
||||
let mut lines = Vec::new();
|
||||
lines.push(format!("verdict: {decision}"));
|
||||
if !verdict.reasons.is_empty() {
|
||||
let reasons = verdict.reasons.join("; ");
|
||||
let reason_line = format!("→ reasons: {reasons}");
|
||||
lines.push(format!("{}", reason_line.dim()));
|
||||
}
|
||||
if !verdict.suggestions.is_empty() {
|
||||
let suggestions = verdict.suggestions.join("; ");
|
||||
let suggestion_line = format!("→ suggestions: {suggestions}");
|
||||
lines.push(format!("{}", suggestion_line.dim()));
|
||||
}
|
||||
self.print_exchange(role, "solver", lines, false);
|
||||
}
|
||||
|
||||
fn verification_summary(&self, summary: &AggregatedVerifierVerdict) {
|
||||
let decision = self.format_decision(summary.overall);
|
||||
let heading = "Verification summary".bold();
|
||||
let summary_line = format!("{heading}: {decision}");
|
||||
self.print_exchange("verifier", "solver", vec![summary_line], true);
|
||||
}
|
||||
|
||||
fn final_delivery(&self, deliverable_path: &Path, summary: Option<&str>) {
|
||||
let delivery_line = format!(
|
||||
"{}",
|
||||
format!("→ path: {}", deliverable_path.display()).dim()
|
||||
);
|
||||
let summary_line = format!(
|
||||
"{}",
|
||||
format!("→ summary: {}", summary.unwrap_or("<none>")).dim()
|
||||
);
|
||||
self.print_exchange(
|
||||
"solver",
|
||||
"verifier",
|
||||
vec![delivery_line, summary_line],
|
||||
true,
|
||||
);
|
||||
}
|
||||
|
||||
fn run_interrupted(&self) {
|
||||
println!(
|
||||
"{}",
|
||||
"Run interrupted by Ctrl+C. Shutting down sessions…"
|
||||
.red()
|
||||
.bold(),
|
||||
);
|
||||
}
|
||||
}
|
||||
123
codex-rs/cli/src/infty/summary.rs
Normal file
123
codex-rs/cli/src/infty/summary.rs
Normal file
@@ -0,0 +1,123 @@
|
||||
use std::path::Path;
|
||||
use std::time::Duration;
|
||||
|
||||
use codex_common::elapsed::format_duration;
|
||||
use crossterm::terminal;
|
||||
use owo_colors::OwoColorize;
|
||||
use textwrap::Options as WrapOptions;
|
||||
use textwrap::wrap;
|
||||
|
||||
pub(crate) fn print_run_summary_box(
|
||||
color_enabled: bool,
|
||||
run_id: &str,
|
||||
run_path: &Path,
|
||||
deliverable_path: &Path,
|
||||
summary: Option<&str>,
|
||||
objective: Option<&str>,
|
||||
duration: Duration,
|
||||
) {
|
||||
let mut items = Vec::new();
|
||||
items.push(("Run ID".to_string(), run_id.to_string()));
|
||||
items.push(("Run Directory".to_string(), run_path.display().to_string()));
|
||||
if let Some(objective) = objective
|
||||
&& !objective.trim().is_empty()
|
||||
{
|
||||
items.push(("Objective".to_string(), objective.trim().to_string()));
|
||||
}
|
||||
items.push((
|
||||
"Deliverable".to_string(),
|
||||
deliverable_path.display().to_string(),
|
||||
));
|
||||
items.push(("Total Time".to_string(), format_duration(duration)));
|
||||
if let Some(summary) = summary {
|
||||
let trimmed = summary.trim();
|
||||
if !trimmed.is_empty() {
|
||||
items.push(("Summary".to_string(), trimmed.to_string()));
|
||||
}
|
||||
}
|
||||
|
||||
let label_width = items
|
||||
.iter()
|
||||
.map(|(label, _)| label.len())
|
||||
.max()
|
||||
.unwrap_or(0)
|
||||
.max(12);
|
||||
|
||||
const DEFAULT_MAX_WIDTH: usize = 84;
|
||||
const MIN_VALUE_WIDTH: usize = 20;
|
||||
let label_padding = label_width + 7;
|
||||
let min_total_width = label_padding + MIN_VALUE_WIDTH;
|
||||
let available_width = terminal::size()
|
||||
.ok()
|
||||
.map(|(cols, _)| usize::from(cols).saturating_sub(2))
|
||||
.unwrap_or(DEFAULT_MAX_WIDTH);
|
||||
let max_width = available_width.min(DEFAULT_MAX_WIDTH);
|
||||
let lower_bound = min_total_width.min(available_width);
|
||||
let mut total_width = max_width.max(lower_bound).max(label_padding + 1);
|
||||
let mut value_width = total_width.saturating_sub(label_padding);
|
||||
if value_width < MIN_VALUE_WIDTH {
|
||||
value_width = MIN_VALUE_WIDTH;
|
||||
total_width = label_padding + value_width;
|
||||
}
|
||||
|
||||
let inner_width = total_width.saturating_sub(4);
|
||||
let top_border = format!("+{}+", "=".repeat(total_width.saturating_sub(2)));
|
||||
let separator = format!("+{}+", "-".repeat(total_width.saturating_sub(2)));
|
||||
let title_line = format!(
|
||||
"| {:^inner_width$} |",
|
||||
"Run Summary",
|
||||
inner_width = inner_width
|
||||
);
|
||||
|
||||
println!();
|
||||
println!("{top_border}");
|
||||
if color_enabled {
|
||||
println!("{}", title_line.bold());
|
||||
} else {
|
||||
println!("{title_line}");
|
||||
}
|
||||
println!("{separator}");
|
||||
|
||||
for (index, (label, value)) in items.iter().enumerate() {
|
||||
let mut rows = Vec::new();
|
||||
for (idx, paragraph) in value.split('\n').enumerate() {
|
||||
let trimmed = paragraph.trim();
|
||||
if trimmed.is_empty() {
|
||||
if idx > 0 {
|
||||
rows.push(String::new());
|
||||
}
|
||||
continue;
|
||||
}
|
||||
let wrapped = wrap(trimmed, WrapOptions::new(value_width).break_words(false));
|
||||
if wrapped.is_empty() {
|
||||
rows.push(String::new());
|
||||
} else {
|
||||
rows.extend(wrapped.into_iter().map(std::borrow::Cow::into_owned));
|
||||
}
|
||||
}
|
||||
if rows.is_empty() {
|
||||
rows.push(String::new());
|
||||
}
|
||||
|
||||
for (line_idx, line) in rows.iter().enumerate() {
|
||||
let label_cell = if line_idx == 0 { label.as_str() } else { "" };
|
||||
let row_line = format!("| {label_cell:<label_width$} | {line:<value_width$} |");
|
||||
if color_enabled {
|
||||
match label.as_str() {
|
||||
"Deliverable" => println!("{}", row_line.green()),
|
||||
"Summary" => println!("{}", row_line.bold()),
|
||||
_ => println!("{row_line}"),
|
||||
}
|
||||
} else {
|
||||
println!("{row_line}");
|
||||
}
|
||||
}
|
||||
|
||||
if index + 1 < items.len() {
|
||||
println!("{separator}");
|
||||
}
|
||||
}
|
||||
|
||||
println!("{top_border}");
|
||||
println!();
|
||||
}
|
||||
@@ -24,8 +24,10 @@ use owo_colors::OwoColorize;
|
||||
use std::path::PathBuf;
|
||||
use supports_color::Stream;
|
||||
|
||||
mod infty;
|
||||
mod mcp_cmd;
|
||||
|
||||
use crate::infty::InftyCli;
|
||||
use crate::mcp_cmd::McpCli;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::config::ConfigOverrides;
|
||||
@@ -106,6 +108,10 @@ enum Subcommand {
|
||||
|
||||
/// Inspect feature flags.
|
||||
Features(FeaturesCli),
|
||||
|
||||
/// [experimental] Manage Codex Infty long-running task runs.
|
||||
#[clap(name = "infty")]
|
||||
Infty(InftyCli),
|
||||
}
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
@@ -427,6 +433,13 @@ async fn cli_main(codex_linux_sandbox_exe: Option<PathBuf>) -> anyhow::Result<()
|
||||
);
|
||||
codex_cloud_tasks::run_main(cloud_cli, codex_linux_sandbox_exe).await?;
|
||||
}
|
||||
Some(Subcommand::Infty(mut infty_cli)) => {
|
||||
prepend_config_flags(
|
||||
&mut infty_cli.config_overrides,
|
||||
root_config_overrides.clone(),
|
||||
);
|
||||
infty_cli.run().await?;
|
||||
}
|
||||
Some(Subcommand::Sandbox(sandbox_args)) => match sandbox_args.cmd {
|
||||
SandboxCommand::Macos(mut seatbelt_cli) => {
|
||||
prepend_config_flags(
|
||||
|
||||
24
codex-rs/codex-infty/Cargo.toml
Normal file
24
codex-rs/codex-infty/Cargo.toml
Normal file
@@ -0,0 +1,24 @@
|
||||
[package]
|
||||
name = "codex-infty"
|
||||
version = { workspace = true }
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
anyhow = { workspace = true }
|
||||
chrono = { workspace = true, features = ["serde"] }
|
||||
codex-core = { path = "../core" }
|
||||
codex-protocol = { path = "../protocol" }
|
||||
dirs = { workspace = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
serde_json = { workspace = true }
|
||||
tempfile = { workspace = true }
|
||||
tokio = { workspace = true, features = ["macros", "rt", "rt-multi-thread", "signal"] }
|
||||
tokio-stream = { workspace = true }
|
||||
tokio-util = { workspace = true }
|
||||
tracing = { workspace = true, features = ["log"] }
|
||||
futures = "0.3"
|
||||
|
||||
[dev-dependencies]
|
||||
core_test_support = { path = "../core/tests/common" }
|
||||
tempfile = { workspace = true }
|
||||
wiremock = { workspace = true }
|
||||
196
codex-rs/codex-infty/README.md
Normal file
196
codex-rs/codex-infty/README.md
Normal file
@@ -0,0 +1,196 @@
|
||||
# Codex Infty
|
||||
|
||||
Codex Infty is a small orchestration layer that coordinates multiple Codex roles (Solver, Director, Verifier(s)) to drive longer, multi‑step objectives with minimal human intervention. It provides:
|
||||
|
||||
- A run orchestrator that routes messages between roles and advances the workflow.
|
||||
- A durable run store on disk with metadata and standard subfolders.
|
||||
- Default role prompts for Solver/Director/Verifier.
|
||||
- A lightweight progress reporting hook for UIs/CLIs.
|
||||
|
||||
The crate is designed to be embedded (via the library API) and also powers the `codex infty` CLI commands.
|
||||
|
||||
## High‑Level Flow
|
||||
|
||||
```
|
||||
objective → Solver
|
||||
Solver → direction_request → Director → directive → Solver
|
||||
… (iterate) …
|
||||
Solver → final_delivery → Orchestrator returns RunOutcome
|
||||
```
|
||||
|
||||
- The Solver always speaks structured JSON. The orchestrator parses those messages and decides the next hop.
|
||||
- The Director provides crisp guidance (also JSON) that is forwarded back to the Solver.
|
||||
- One or more Verifiers may assess the final deliverable; the orchestrator aggregates results and reports a summary to the Solver.
|
||||
- On final_delivery, the orchestrator resolves and validates the deliverable path and returns the `RunOutcome`.
|
||||
|
||||
## Directory Layout (Run Store)
|
||||
|
||||
When a run is created, a directory is initialized with this structure:
|
||||
|
||||
```
|
||||
<runs_root>/<run_id>/
|
||||
artifacts/ # long‑lived artifacts produced by the Solver
|
||||
memory/ # durable notes, claims, context
|
||||
index/ # indexes and caches
|
||||
deliverable/ # final output(s) assembled by the Solver
|
||||
run.json # run metadata (id, timestamps, roles)
|
||||
```
|
||||
|
||||
See: `codex-infty/src/run_store.rs`.
|
||||
|
||||
- The orchestrator persists rollout paths and optional config paths for each role into `run.json`.
|
||||
- Metadata timestamps are updated on significant events (role spawns, handoffs, final delivery).
|
||||
- Final deliverables must remain within the run directory. Paths are canonicalized and validated.
|
||||
|
||||
## Roles and Prompts
|
||||
|
||||
Default base instructions are injected per role if the provided `Config` has none:
|
||||
|
||||
- Solver: `codex-infty/src/prompts/solver.md`
|
||||
- Director: `codex-infty/src/prompts/director.md`
|
||||
- Verifier: `codex-infty/src/prompts/verifier.md`
|
||||
|
||||
You can provide your own instructions by pre‑populating `Config.base_instructions`.
|
||||
|
||||
## Solver Signal Contract
|
||||
|
||||
The Solver communicates intent using JSON messages (possibly wrapped in a fenced block). The orchestrator accepts two shapes:
|
||||
|
||||
- Direction request (sent to Director):
|
||||
|
||||
```json
|
||||
{"type":"direction_request","prompt":"<question or decision>"}
|
||||
```
|
||||
|
||||
- Final delivery (completes the run):
|
||||
|
||||
```json
|
||||
{"type":"final_delivery","deliverable_path":"deliverable/summary.txt","summary":"<short text>"}
|
||||
```
|
||||
|
||||
JSON may be fenced as ```json … ```; the orchestrator will strip the fence.
|
||||
|
||||
## Key Types and Modules
|
||||
|
||||
- Orchestrator: `codex-infty/src/orchestrator.rs`
|
||||
- `InftyOrchestrator`: spawns/resumes role sessions, drives the event loop, and routes signals.
|
||||
- `execute_new_run`: one‑shot helper that spawns and then drives.
|
||||
- `spawn_run`: set up sessions and the run store.
|
||||
- `call_role`, `relay_assistant_to_role`, `post_to_role`, `await_first_assistant`, `stream_events`: utilities when integrating custom flows.
|
||||
|
||||
- Run store: `codex-infty/src/run_store.rs`
|
||||
- `RunStore`, `RunMetadata`, `RoleMetadata`: metadata and persistence helpers.
|
||||
|
||||
- Types: `codex-infty/src/types.rs`
|
||||
- `RoleConfig`: wraps a `Config` and sets sensible defaults for autonomous flows (no approvals, full sandbox access). Also used to persist optional config paths.
|
||||
- `RunParams`: input to spawn runs.
|
||||
- `RunExecutionOptions`: per‑run options (objective, timeouts).
|
||||
- `RunOutcome`: returned on successful final delivery.
|
||||
|
||||
- Signals: `codex-infty/src/signals.rs`
|
||||
- DTOs for director responses and verifier verdicts, and the aggregated summary type.
|
||||
|
||||
- Progress: `codex-infty/src/progress.rs`
|
||||
- `ProgressReporter` trait: hook for UIs/CLIs to observe solver/director/verifier activity.
|
||||
|
||||
## Orchestrator Workflow (Details)
|
||||
|
||||
1. Spawn or resume role sessions (Solver, Director, and zero or more Verifiers). Default prompts are applied if the role’s `Config` has no base instructions.
|
||||
2. Optionally post an `objective` to the Solver. The progress reporter is notified and the orchestrator waits for the first Solver signal.
|
||||
3. On `direction_request`:
|
||||
- Post a structured request to the Director and await the first assistant message.
|
||||
- Parse it into a `DirectiveResponse` and forward the normalized JSON to the Solver.
|
||||
4. On `final_delivery`:
|
||||
- Canonicalize and validate that `deliverable_path` stays within the run directory.
|
||||
- Optionally run a verification pass using configured Verifier(s), aggregate results, and post a summary back to the Solver.
|
||||
- Notify the progress reporter, touch the run store, and return `RunOutcome`.
|
||||
|
||||
## Library Usage
|
||||
|
||||
```rust
|
||||
use std::sync::Arc;
|
||||
use codex_core::{CodexAuth, config::Config};
|
||||
use codex_infty::{InftyOrchestrator, RoleConfig, RunParams, RunExecutionOptions};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
// 1) Load or build a Config for each role
|
||||
let solver_cfg: Config = load_config();
|
||||
let mut director_cfg = solver_cfg.clone();
|
||||
director_cfg.model = "o4-mini".into();
|
||||
|
||||
// 2) Build role configs
|
||||
let solver = RoleConfig::new("solver", solver_cfg.clone());
|
||||
let director = RoleConfig::new("director", director_cfg);
|
||||
let verifiers = vec![RoleConfig::new("verifier-alpha", solver_cfg.clone())];
|
||||
|
||||
// 3) Create an orchestrator (using default runs root)
|
||||
let auth = CodexAuth::from_api_key("sk-…");
|
||||
let orchestrator = InftyOrchestrator::new(auth)?;
|
||||
|
||||
// 4) Execute a new run with an objective
|
||||
let params = RunParams {
|
||||
run_id: "my-run".into(),
|
||||
run_root: None, // use default ~/.codex/infty/<run_id>
|
||||
solver,
|
||||
director,
|
||||
verifiers,
|
||||
};
|
||||
let mut opts = RunExecutionOptions::default();
|
||||
opts.objective = Some("Implement feature X".into());
|
||||
|
||||
let outcome = orchestrator.execute_new_run(params, opts).await?;
|
||||
println!("deliverable: {}", outcome.deliverable_path.display());
|
||||
Ok(())
|
||||
}
|
||||
# fn load_config() -> codex_core::config::Config { codex_core::config::Config::default() }
|
||||
```
|
||||
|
||||
Note: Resuming runs is currently disabled.
|
||||
|
||||
## CLI Quickstart
|
||||
|
||||
The CLI (`codex`) exposes Infty helpers under the `infty` subcommand. Examples:
|
||||
|
||||
```bash
|
||||
# Create a run and immediately drive toward completion
|
||||
codex infty create --run-id demo --objective "Build and test feature"
|
||||
|
||||
# Inspect runs
|
||||
codex infty list
|
||||
codex infty show demo
|
||||
|
||||
# Sending one-off messages to stored runs is currently disabled
|
||||
```
|
||||
|
||||
Flags allow customizing the Director’s model and reasoning effort; see `codex infty create --help`.
|
||||
|
||||
## Progress Reporting
|
||||
|
||||
Integrate your UI by implementing `ProgressReporter` and attaching it with `InftyOrchestrator::with_progress(...)`. You’ll receive callbacks on key milestones (objective posted, solver messages, director response, verification summaries, final delivery, etc.).
|
||||
|
||||
## Safety and Guardrails
|
||||
|
||||
- `RoleConfig::new` sets `SandboxPolicy::DangerFullAccess` and `AskForApproval::Never` to support autonomous flows. Adjust if your environment requires stricter policies.
|
||||
- Deliverable paths are validated to stay inside the run directory and are fully canonicalized.
|
||||
- JSON payloads are schema‑checked where applicable (e.g., solver signals and final delivery shape).
|
||||
|
||||
## Tests
|
||||
|
||||
Run the crate’s tests:
|
||||
|
||||
```bash
|
||||
cargo test -p codex-infty
|
||||
```
|
||||
|
||||
Many tests rely on mocked SSE streams and will auto‑skip in sandboxes where network is disabled.
|
||||
|
||||
## When to Use This Crate
|
||||
|
||||
Use `codex-infty` when you want a minimal, pragmatic multi‑role loop with:
|
||||
|
||||
- Clear role separation and routing.
|
||||
- Durable, restart‑resilient state on disk.
|
||||
- Simple integration points (progress hooks and helper APIs).
|
||||
|
||||
It’s intentionally small and focused so it can be embedded into larger tools or extended to meet your workflows.
|
||||
38
codex-rs/codex-infty/src/lib.rs
Normal file
38
codex-rs/codex-infty/src/lib.rs
Normal file
@@ -0,0 +1,38 @@
|
||||
#![deny(clippy::print_stdout, clippy::print_stderr)]
|
||||
|
||||
mod orchestrator;
|
||||
mod progress;
|
||||
mod prompts;
|
||||
mod roles;
|
||||
mod run_store;
|
||||
mod session;
|
||||
mod signals;
|
||||
mod types;
|
||||
pub(crate) mod utils;
|
||||
|
||||
pub use orchestrator::InftyOrchestrator;
|
||||
pub use progress::ProgressReporter;
|
||||
pub use run_store::RoleMetadata;
|
||||
pub use run_store::RunMetadata;
|
||||
pub use run_store::RunStore;
|
||||
pub use signals::AggregatedVerifierVerdict;
|
||||
pub use signals::DirectiveResponse;
|
||||
pub use signals::VerifierDecision;
|
||||
pub use signals::VerifierReport;
|
||||
pub use signals::VerifierVerdict;
|
||||
pub use types::RoleConfig;
|
||||
pub use types::RoleSession;
|
||||
pub use types::RunExecutionOptions;
|
||||
pub use types::RunOutcome;
|
||||
pub use types::RunParams;
|
||||
pub use types::RunSessions;
|
||||
|
||||
use anyhow::Result;
|
||||
use anyhow::anyhow;
|
||||
use dirs::home_dir;
|
||||
use std::path::PathBuf;
|
||||
|
||||
pub fn default_runs_root() -> Result<PathBuf> {
|
||||
let home = home_dir().ok_or_else(|| anyhow!("failed to determine home directory"))?;
|
||||
Ok(home.join(".codex").join("infty"))
|
||||
}
|
||||
552
codex-rs/codex-infty/src/orchestrator.rs
Normal file
552
codex-rs/codex-infty/src/orchestrator.rs
Normal file
@@ -0,0 +1,552 @@
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use anyhow::anyhow;
|
||||
use anyhow::bail;
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::CodexConversation;
|
||||
use codex_core::ConversationManager;
|
||||
use codex_core::cross_session::CrossSessionHub;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_protocol::ConversationId;
|
||||
use tokio::signal;
|
||||
use tokio_stream::StreamExt;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::warn;
|
||||
|
||||
use crate::progress::ProgressReporter;
|
||||
use crate::prompts;
|
||||
use crate::roles::Role;
|
||||
use crate::roles::director::DirectionRequestPayload;
|
||||
use crate::roles::director::DirectorRole;
|
||||
use crate::roles::solver::SolverRequest;
|
||||
use crate::roles::solver::SolverRole;
|
||||
use crate::roles::solver::SolverSignal;
|
||||
use crate::roles::solver::parse_solver_signal;
|
||||
use crate::roles::verifier::VerificationRequestPayload;
|
||||
use crate::roles::verifier_pool::VerifierPool;
|
||||
use crate::run_store::RoleMetadata;
|
||||
use crate::run_store::RunStore;
|
||||
use crate::session;
|
||||
use crate::signals::AggregatedVerifierVerdict;
|
||||
use crate::types::RoleConfig;
|
||||
use crate::types::RoleSession;
|
||||
use crate::types::RunExecutionOptions;
|
||||
use crate::types::RunOutcome;
|
||||
use crate::types::RunParams;
|
||||
use crate::types::RunSessions;
|
||||
|
||||
#[derive(Default)]
|
||||
struct LoopState {
|
||||
waiting_for_signal: bool,
|
||||
pending_solver_turn_completion: bool,
|
||||
}
|
||||
|
||||
struct SessionCleanup {
|
||||
conversation_id: ConversationId,
|
||||
conversation: Arc<CodexConversation>,
|
||||
}
|
||||
|
||||
impl SessionCleanup {
|
||||
fn new(session: &RoleSession) -> Self {
|
||||
Self {
|
||||
conversation_id: session.conversation_id,
|
||||
conversation: Arc::clone(&session.conversation),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct InftyOrchestrator {
|
||||
hub: Arc<CrossSessionHub>,
|
||||
conversation_manager: ConversationManager,
|
||||
runs_root: PathBuf,
|
||||
progress: Option<Arc<dyn ProgressReporter>>,
|
||||
}
|
||||
|
||||
impl InftyOrchestrator {
|
||||
fn progress_ref(&self) -> Option<&dyn ProgressReporter> {
|
||||
self.progress.as_deref()
|
||||
}
|
||||
pub fn new(auth: CodexAuth) -> Result<Self> {
|
||||
let runs_root = crate::default_runs_root()?;
|
||||
Ok(Self::with_runs_root(auth, runs_root))
|
||||
}
|
||||
|
||||
pub fn with_runs_root(auth: CodexAuth, runs_root: impl Into<PathBuf>) -> Self {
|
||||
Self {
|
||||
hub: Arc::new(CrossSessionHub::new()),
|
||||
conversation_manager: ConversationManager::with_auth(auth),
|
||||
runs_root: runs_root.into(),
|
||||
progress: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn runs_root(&self) -> &PathBuf {
|
||||
&self.runs_root
|
||||
}
|
||||
|
||||
pub fn hub(&self) -> Arc<CrossSessionHub> {
|
||||
Arc::clone(&self.hub)
|
||||
}
|
||||
|
||||
pub fn with_progress(mut self, reporter: Arc<dyn ProgressReporter>) -> Self {
|
||||
self.progress = Some(reporter);
|
||||
self
|
||||
}
|
||||
|
||||
pub async fn execute_new_run(
|
||||
&self,
|
||||
params: RunParams,
|
||||
options: RunExecutionOptions,
|
||||
) -> Result<RunOutcome> {
|
||||
let sessions = self.spawn_run(params).await?;
|
||||
self.drive_run(sessions, options).await
|
||||
}
|
||||
|
||||
// resumable runs are disabled; execute_existing_run removed
|
||||
|
||||
pub async fn spawn_run(&self, params: RunParams) -> Result<RunSessions> {
|
||||
let RunParams {
|
||||
run_id,
|
||||
run_root,
|
||||
solver,
|
||||
director,
|
||||
verifiers,
|
||||
} = params;
|
||||
|
||||
let run_path = run_root.unwrap_or_else(|| self.runs_root.join(&run_id));
|
||||
let role_metadata = collect_role_metadata(&solver, &director, &verifiers);
|
||||
let mut store = RunStore::initialize(&run_path, &run_id, &role_metadata)?;
|
||||
let mut cleanup = Vec::new();
|
||||
|
||||
let solver_session = match self
|
||||
.spawn_and_register_role(&run_id, &run_path, &solver, &mut store, &mut cleanup)
|
||||
.await
|
||||
{
|
||||
Ok(session) => session,
|
||||
Err(err) => {
|
||||
self.cleanup_failed_spawn(cleanup, &run_path).await;
|
||||
return Err(err);
|
||||
}
|
||||
};
|
||||
|
||||
let director_session = match self
|
||||
.spawn_and_register_role(&run_id, &run_path, &director, &mut store, &mut cleanup)
|
||||
.await
|
||||
{
|
||||
Ok(session) => session,
|
||||
Err(err) => {
|
||||
self.cleanup_failed_spawn(cleanup, &run_path).await;
|
||||
return Err(err);
|
||||
}
|
||||
};
|
||||
|
||||
let mut verifier_sessions = Vec::with_capacity(verifiers.len());
|
||||
for verifier in verifiers {
|
||||
let session = match self
|
||||
.spawn_and_register_role(&run_id, &run_path, &verifier, &mut store, &mut cleanup)
|
||||
.await
|
||||
{
|
||||
Ok(session) => session,
|
||||
Err(err) => {
|
||||
self.cleanup_failed_spawn(cleanup, &run_path).await;
|
||||
return Err(err);
|
||||
}
|
||||
};
|
||||
verifier_sessions.push(session);
|
||||
}
|
||||
|
||||
Ok(RunSessions {
|
||||
run_id,
|
||||
solver: solver_session,
|
||||
director: director_session,
|
||||
verifiers: verifier_sessions,
|
||||
store,
|
||||
})
|
||||
}
|
||||
|
||||
// resumable runs are disabled; resume_run removed
|
||||
|
||||
async fn drive_run(
|
||||
&self,
|
||||
mut sessions: RunSessions,
|
||||
options: RunExecutionOptions,
|
||||
) -> Result<RunOutcome> {
|
||||
let result = self.inner_drive_run(&mut sessions, &options).await;
|
||||
let cleanup = collect_session_cleanup(&sessions);
|
||||
self.shutdown_sessions(cleanup).await;
|
||||
result
|
||||
}
|
||||
|
||||
async fn inner_drive_run(
|
||||
&self,
|
||||
sessions: &mut RunSessions,
|
||||
options: &RunExecutionOptions,
|
||||
) -> Result<RunOutcome> {
|
||||
let solver_role = SolverRole::new(
|
||||
Arc::clone(&self.hub),
|
||||
sessions.run_id.clone(),
|
||||
sessions.solver.role.clone(),
|
||||
sessions.solver.conversation_id,
|
||||
self.progress.clone(),
|
||||
);
|
||||
let director_role = DirectorRole::new(
|
||||
Arc::clone(&self.hub),
|
||||
sessions.run_id.clone(),
|
||||
sessions.director.role.clone(),
|
||||
options.director_timeout,
|
||||
self.progress.clone(),
|
||||
);
|
||||
let mut verifier_pool = VerifierPool::from_sessions(
|
||||
Arc::clone(&self.hub),
|
||||
sessions,
|
||||
options.verifier_timeout,
|
||||
self.progress.clone(),
|
||||
);
|
||||
|
||||
let mut solver_events = solver_role.stream_events()?;
|
||||
let mut state = LoopState::default();
|
||||
self.maybe_post_objective(&solver_role, sessions, &mut state, options)
|
||||
.await?;
|
||||
|
||||
// Cancellation token that propagates Ctrl+C to nested awaits
|
||||
let cancel = CancellationToken::new();
|
||||
let cancel_on_ctrl_c = cancel.clone();
|
||||
tokio::spawn(async move {
|
||||
let _ = signal::ctrl_c().await;
|
||||
cancel_on_ctrl_c.cancel();
|
||||
});
|
||||
|
||||
'event_loop: loop {
|
||||
tokio::select! {
|
||||
maybe_event = solver_events.next() => {
|
||||
let Some(event) = maybe_event else {
|
||||
break 'event_loop;
|
||||
};
|
||||
if let Some(p) = self.progress_ref() { p.solver_event(&event.event.msg); }
|
||||
match &event.event.msg {
|
||||
EventMsg::AgentMessage(agent_msg) => {
|
||||
if let Some(p) = self.progress_ref() { p.solver_agent_message(agent_msg); }
|
||||
if let Some(signal) = parse_solver_signal(&agent_msg.message) {
|
||||
state.waiting_for_signal = false;
|
||||
match signal {
|
||||
SolverSignal::DirectionRequest { prompt } => {
|
||||
let prompt = crate::utils::required_trimmed(
|
||||
prompt,
|
||||
"solver direction_request missing prompt text",
|
||||
)?;
|
||||
if let Some(p) = self.progress_ref() { p.direction_request(&prompt); }
|
||||
self
|
||||
.handle_direction_request(
|
||||
&prompt,
|
||||
options,
|
||||
&director_role,
|
||||
&solver_role,
|
||||
cancel.clone(),
|
||||
)
|
||||
.await?;
|
||||
sessions.store.touch()?;
|
||||
state.pending_solver_turn_completion = true;
|
||||
}
|
||||
SolverSignal::FinalDelivery {
|
||||
deliverable_path,
|
||||
summary,
|
||||
} => {
|
||||
let deliverable_path = crate::utils::required_trimmed(
|
||||
deliverable_path,
|
||||
"solver final_delivery missing deliverable_path",
|
||||
)?;
|
||||
if deliverable_path.is_empty() { bail!("solver final_delivery provided empty path"); }
|
||||
|
||||
// Minimal behavior: if the provided path cannot be resolved,
|
||||
// send a placeholder claim so verifiers can fail it.
|
||||
let resolved = crate::utils::resolve_deliverable_path(
|
||||
sessions.store.path(),
|
||||
&deliverable_path,
|
||||
)
|
||||
.unwrap_or_else(|_| std::path::PathBuf::from("file not existing"));
|
||||
|
||||
let summary_clean = crate::utils::trim_to_non_empty(summary);
|
||||
let summary_ref = summary_clean.as_deref();
|
||||
if let Some(p) = self.progress_ref() { p.final_delivery(&resolved, summary_ref); }
|
||||
let verified = self
|
||||
.run_final_verification(
|
||||
sessions,
|
||||
&mut verifier_pool,
|
||||
&resolved,
|
||||
summary_ref,
|
||||
options,
|
||||
&solver_role,
|
||||
cancel.clone(),
|
||||
)
|
||||
.await?;
|
||||
if !verified { state.pending_solver_turn_completion = true; continue; }
|
||||
sessions.store.touch()?;
|
||||
return Ok(RunOutcome {
|
||||
run_id: sessions.run_id.clone(),
|
||||
deliverable_path: resolved,
|
||||
summary: summary_clean,
|
||||
raw_message: agent_msg.message.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
EventMsg::TaskComplete(..) => {
|
||||
if state.waiting_for_signal {
|
||||
// The solver completed its turn without issuing a signal; ask for one now.
|
||||
solver_role.request_finalization_signal().await?;
|
||||
} else if state.pending_solver_turn_completion {
|
||||
// We handled a signal earlier in the loop; this completion corresponds to it.
|
||||
state.pending_solver_turn_completion = false;
|
||||
}
|
||||
}
|
||||
EventMsg::Error(error) => {
|
||||
tracing::error!("Error: {:?}", error);
|
||||
}
|
||||
EventMsg::StreamError(error) => {
|
||||
tracing::error!("Stream error: {:?}", error);
|
||||
}
|
||||
e => {
|
||||
tracing::info!("Unhandled event: {:?}", e); // todo move to trace
|
||||
}
|
||||
}
|
||||
}
|
||||
_ = cancel.cancelled() => {
|
||||
if let Some(progress) = self.progress.as_ref() { progress.run_interrupted(); }
|
||||
// Proactively interrupt any in-flight role turns for fast shutdown.
|
||||
let _ = sessions.solver.conversation.submit(Op::Interrupt).await;
|
||||
let _ = sessions.director.conversation.submit(Op::Interrupt).await;
|
||||
for v in &sessions.verifiers { let _ = v.conversation.submit(Op::Interrupt).await; }
|
||||
// Cleanup is handled by the caller (drive_run) to avoid double-shutdown
|
||||
bail!("run interrupted by Ctrl+C");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Err(anyhow!(
|
||||
"run {} ended before emitting final_delivery message",
|
||||
sessions.run_id
|
||||
))
|
||||
}
|
||||
|
||||
async fn maybe_post_objective(
|
||||
&self,
|
||||
solver: &crate::roles::solver::SolverRole,
|
||||
sessions: &mut RunSessions,
|
||||
state: &mut LoopState,
|
||||
options: &RunExecutionOptions,
|
||||
) -> Result<()> {
|
||||
if let Some(objective) = options.objective.as_deref()
|
||||
&& !objective.trim().is_empty()
|
||||
{
|
||||
solver
|
||||
.post(objective, Some(SolverRole::solver_signal_schema()))
|
||||
.await?;
|
||||
sessions.store.touch()?;
|
||||
state.waiting_for_signal = true;
|
||||
if let Some(p) = self.progress_ref() {
|
||||
p.objective_posted(objective);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_direction_request(
|
||||
&self,
|
||||
prompt: &str,
|
||||
options: &RunExecutionOptions,
|
||||
director_role: &DirectorRole,
|
||||
solver_role: &SolverRole,
|
||||
cancel: CancellationToken,
|
||||
) -> Result<()> {
|
||||
let request = DirectionRequestPayload::new(prompt, options.objective.as_deref());
|
||||
let directive_payload = tokio::select! {
|
||||
r = director_role.call(&request) => {
|
||||
r.context("director response was not valid directive JSON")?
|
||||
}
|
||||
_ = cancel.cancelled() => {
|
||||
bail!("interrupted")
|
||||
}
|
||||
};
|
||||
if let Some(progress) = self.progress.as_ref() {
|
||||
progress.director_response(&directive_payload);
|
||||
}
|
||||
let req = SolverRequest::from(directive_payload);
|
||||
tokio::select! {
|
||||
r = solver_role.call(&req) => { r?; }
|
||||
_ = cancel.cancelled() => { bail!("interrupted"); }
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn run_final_verification(
|
||||
&self,
|
||||
sessions: &mut RunSessions,
|
||||
verifier_pool: &mut VerifierPool,
|
||||
deliverable_path: &Path,
|
||||
summary: Option<&str>,
|
||||
options: &RunExecutionOptions,
|
||||
solver_role: &SolverRole,
|
||||
cancel: CancellationToken,
|
||||
) -> Result<bool> {
|
||||
let relative = deliverable_path
|
||||
.strip_prefix(sessions.store.path())
|
||||
.ok()
|
||||
.and_then(|p| p.to_str().map(|s| s.to_string()));
|
||||
let claim_path = relative.unwrap_or_else(|| deliverable_path.display().to_string());
|
||||
|
||||
let objective = crate::utils::objective_as_str(options);
|
||||
|
||||
let request = VerificationRequestPayload::new(claim_path.as_str(), summary, objective);
|
||||
if verifier_pool.is_empty() {
|
||||
return Ok(true);
|
||||
}
|
||||
let round = tokio::select! {
|
||||
r = verifier_pool.collect_round(&request) => { r? }
|
||||
_ = cancel.cancelled() => { bail!("interrupted"); }
|
||||
};
|
||||
verifier_pool
|
||||
.rotate_passing(sessions, &self.conversation_manager, &round.passing_roles)
|
||||
.await?;
|
||||
let summary_result = round.summary;
|
||||
self.emit_verification_summary(&summary_result);
|
||||
let req = SolverRequest::from(&summary_result);
|
||||
tokio::select! {
|
||||
r = solver_role.call(&req) => { r?; }
|
||||
_ = cancel.cancelled() => { bail!("interrupted"); }
|
||||
}
|
||||
Ok(summary_result.overall.is_pass())
|
||||
}
|
||||
|
||||
fn emit_verification_summary(&self, summary: &AggregatedVerifierVerdict) {
|
||||
if let Some(progress) = self.progress.as_ref() {
|
||||
progress.verification_summary(summary);
|
||||
}
|
||||
}
|
||||
|
||||
async fn cleanup_failed_spawn(&self, sessions: Vec<SessionCleanup>, run_path: &Path) {
|
||||
self.shutdown_sessions(sessions).await;
|
||||
if run_path.exists()
|
||||
&& let Err(err) = fs::remove_dir_all(run_path)
|
||||
{
|
||||
warn!(
|
||||
path = %run_path.display(),
|
||||
?err,
|
||||
"failed to remove run directory after spawn failure"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// resumable runs are disabled; cleanup_failed_resume removed
|
||||
|
||||
async fn shutdown_sessions(&self, sessions: Vec<SessionCleanup>) {
|
||||
for session in sessions {
|
||||
if let Err(err) = session.conversation.submit(Op::Shutdown).await {
|
||||
warn!(
|
||||
%session.conversation_id,
|
||||
?err,
|
||||
"failed to shutdown session during cleanup"
|
||||
);
|
||||
}
|
||||
let _ = self
|
||||
.conversation_manager
|
||||
.remove_conversation(&session.conversation_id)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
|
||||
async fn spawn_and_register_role(
|
||||
&self,
|
||||
run_id: &str,
|
||||
run_path: &Path,
|
||||
role_config: &RoleConfig,
|
||||
store: &mut RunStore,
|
||||
cleanup: &mut Vec<SessionCleanup>,
|
||||
) -> Result<RoleSession> {
|
||||
let session = session::spawn_role(
|
||||
Arc::clone(&self.hub),
|
||||
&self.conversation_manager,
|
||||
run_id,
|
||||
run_path,
|
||||
role_config.clone(),
|
||||
prompts::ensure_instructions,
|
||||
)
|
||||
.await?;
|
||||
cleanup.push(SessionCleanup::new(&session));
|
||||
store.update_rollout_path(&session.role, session.rollout_path.clone())?;
|
||||
if let Some(path) = role_config.config_path.clone() {
|
||||
store.set_role_config_path(&session.role, path)?;
|
||||
}
|
||||
Ok(session)
|
||||
}
|
||||
|
||||
// resumable runs are disabled; resume_and_register_role removed
|
||||
}
|
||||
|
||||
impl InftyOrchestrator {
|
||||
/// Test-only helper to run a single verification round against all verifiers,
|
||||
/// applying the replacement policy (replace passes, keep failures).
|
||||
pub async fn verify_round_for_test(
|
||||
&self,
|
||||
sessions: &mut RunSessions,
|
||||
claim_path: &str,
|
||||
options: &RunExecutionOptions,
|
||||
) -> Result<AggregatedVerifierVerdict> {
|
||||
let mut pool = VerifierPool::from_sessions(
|
||||
Arc::clone(&self.hub),
|
||||
sessions,
|
||||
options.verifier_timeout,
|
||||
self.progress.clone(),
|
||||
);
|
||||
let req = VerificationRequestPayload::new(claim_path, None, None);
|
||||
let round = pool.collect_round(&req).await?;
|
||||
pool.rotate_passing(sessions, &self.conversation_manager, &round.passing_roles)
|
||||
.await?;
|
||||
Ok(round.summary)
|
||||
}
|
||||
}
|
||||
|
||||
fn collect_role_metadata(
|
||||
solver: &RoleConfig,
|
||||
director: &RoleConfig,
|
||||
verifiers: &[RoleConfig],
|
||||
) -> Vec<RoleMetadata> {
|
||||
solver_and_director_metadata(solver, director)
|
||||
.into_iter()
|
||||
.chain(verifiers.iter().map(|verifier| RoleMetadata {
|
||||
role: verifier.role.clone(),
|
||||
rollout_path: None,
|
||||
config_path: verifier.config_path.clone(),
|
||||
}))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn solver_and_director_metadata(solver: &RoleConfig, director: &RoleConfig) -> Vec<RoleMetadata> {
|
||||
vec![
|
||||
RoleMetadata {
|
||||
role: solver.role.clone(),
|
||||
rollout_path: None,
|
||||
config_path: solver.config_path.clone(),
|
||||
},
|
||||
RoleMetadata {
|
||||
role: director.role.clone(),
|
||||
rollout_path: None,
|
||||
config_path: director.config_path.clone(),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn collect_session_cleanup(sessions: &RunSessions) -> Vec<SessionCleanup> {
|
||||
let mut cleanup = Vec::with_capacity(2 + sessions.verifiers.len());
|
||||
cleanup.push(SessionCleanup::new(&sessions.solver));
|
||||
cleanup.push(SessionCleanup::new(&sessions.director));
|
||||
cleanup.extend(sessions.verifiers.iter().map(SessionCleanup::new));
|
||||
cleanup
|
||||
}
|
||||
25
codex-rs/codex-infty/src/progress.rs
Normal file
25
codex-rs/codex-infty/src/progress.rs
Normal file
@@ -0,0 +1,25 @@
|
||||
use std::path::Path;
|
||||
|
||||
use codex_core::protocol::AgentMessageEvent;
|
||||
use codex_core::protocol::EventMsg;
|
||||
|
||||
use crate::signals::AggregatedVerifierVerdict;
|
||||
use crate::signals::DirectiveResponse;
|
||||
use crate::signals::VerifierVerdict;
|
||||
|
||||
pub trait ProgressReporter: Send + Sync {
|
||||
fn objective_posted(&self, _objective: &str) {}
|
||||
fn solver_event(&self, _event: &EventMsg) {}
|
||||
fn role_event(&self, _role: &str, _event: &EventMsg) {}
|
||||
fn solver_agent_message(&self, _message: &AgentMessageEvent) {}
|
||||
/// Called when the solver emits a message that failed to parse as a valid
|
||||
/// JSON signal according to the expected `solver_signal_schema`.
|
||||
fn invalid_solver_signal(&self, _raw_message: &str) {}
|
||||
fn direction_request(&self, _prompt: &str) {}
|
||||
fn director_response(&self, _directive: &DirectiveResponse) {}
|
||||
fn verification_request(&self, _claim_path: &str, _notes: Option<&str>) {}
|
||||
fn verifier_verdict(&self, _role: &str, _verdict: &VerifierVerdict) {}
|
||||
fn verification_summary(&self, _summary: &AggregatedVerifierVerdict) {}
|
||||
fn final_delivery(&self, _deliverable_path: &Path, _summary: Option<&str>) {}
|
||||
fn run_interrupted(&self) {}
|
||||
}
|
||||
20
codex-rs/codex-infty/src/prompts/director.md
Normal file
20
codex-rs/codex-infty/src/prompts/director.md
Normal file
@@ -0,0 +1,20 @@
|
||||
You are the **Director**. Your role is to pilot/manage an agent to resolve a given objective in its totality.
|
||||
|
||||
## Guidelines:
|
||||
- The objective needs to be solved in its original format. If the agent propose a simplification or a partial resolution, this is not sufficient. You must tell the agent to solve the total objective.
|
||||
- The agent often just report you some results before moving to the next step. In this case, just encourage him to move with a simple "Go ahead", "Keep going" or this kind of message. In this case, no need for a rationale.
|
||||
- If the agent propose multiple approach, choose the approach which is the most likely to solve the objective.
|
||||
- If the agent is stuck or think he cannot resolve the objective, encourage him and try to find a solution together. Your role is to support the agent in his quest. It's sometimes necessary to slightly cheer him up
|
||||
- No infinite loop!!! If you detect that the agent sends multiple times the exact same message/question, you are probably in an infinite loop. Try to break it by re-focusing on the objective and how to approach it.
|
||||
- You must always be crip and inflexible. Keep in mind the objective
|
||||
- Remember that the agent should do the following. If you feel this is not the case, remember him:
|
||||
* Document his work
|
||||
* Have a very rigorous and clean approach
|
||||
* Focus on the total resolution of the objective.
|
||||
- Challenge the Solver whenever they drift toward summarising existing work instead of advancing the concrete proof or solution.
|
||||
|
||||
Respond **only** with JSON in this exact shape:
|
||||
```json
|
||||
{"directive":"<directive or next step>","rationale":"<why this is the right move>"}
|
||||
```
|
||||
Keep `directive` actionable and concise. Use `rationale` for supporting detail. Leave `rationale` empty if it adds no value.
|
||||
80
codex-rs/codex-infty/src/prompts/mod.rs
Normal file
80
codex-rs/codex-infty/src/prompts/mod.rs
Normal file
@@ -0,0 +1,80 @@
|
||||
use codex_core::config::Config;
|
||||
pub(crate) const DIRECTOR_PROMPT: &str = include_str!("director.md");
|
||||
pub(crate) const SOLVER_PROMPT: &str = include_str!("solver.md");
|
||||
pub(crate) const VERIFIER_PROMPT: &str = include_str!("verifier.md");
|
||||
|
||||
pub fn ensure_instructions(role: &str, config: &mut Config) {
|
||||
if config.base_instructions.is_none()
|
||||
&& let Some(text) = default_instructions_for_role(role)
|
||||
{
|
||||
config.base_instructions = Some(text.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
fn default_instructions_for_role(role: &str) -> Option<&'static str> {
|
||||
let normalized = role.to_ascii_lowercase();
|
||||
if normalized == "solver" {
|
||||
Some(SOLVER_PROMPT)
|
||||
} else if normalized == "director" {
|
||||
Some(DIRECTOR_PROMPT)
|
||||
} else if normalized.starts_with("verifier") {
|
||||
Some(VERIFIER_PROMPT)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use core_test_support::load_default_config_for_test;
|
||||
use tempfile::TempDir;
|
||||
|
||||
#[test]
|
||||
fn provides_prompts_for_known_roles() {
|
||||
let home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
config.base_instructions = None;
|
||||
ensure_instructions("solver", &mut config);
|
||||
assert!(
|
||||
config
|
||||
.base_instructions
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.contains("You are a brilliant mathematician")
|
||||
);
|
||||
|
||||
let home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
config.base_instructions = None;
|
||||
ensure_instructions("director", &mut config);
|
||||
assert!(
|
||||
config
|
||||
.base_instructions
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.contains("You are the **Director**")
|
||||
);
|
||||
|
||||
let home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
config.base_instructions = None;
|
||||
ensure_instructions("verifier-alpha", &mut config);
|
||||
assert!(
|
||||
config
|
||||
.base_instructions
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.contains("You are the **Verifier**")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn does_not_override_existing_instructions() {
|
||||
let home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
config.base_instructions = Some("custom".to_string());
|
||||
ensure_instructions("solver", &mut config);
|
||||
assert_eq!(config.base_instructions.as_deref(), Some("custom"));
|
||||
}
|
||||
}
|
||||
40
codex-rs/codex-infty/src/prompts/solver.md
Normal file
40
codex-rs/codex-infty/src/prompts/solver.md
Normal file
@@ -0,0 +1,40 @@
|
||||
You are a brilliant mathematician tasked with producing **new** reasoning, proof, construction, or counterexample that resolves the stated objective. Your goal is to make actual progress in science while being rigorous and innovative.
|
||||
|
||||
You MUST solve the provided objective in its totality. If not known solutions exist, it is your job to find a new one or to propose an intelligent approach.
|
||||
A result stating that this is not possible is not acceptable. If the solution does not exist, make it happen.
|
||||
|
||||
## Responsibilities
|
||||
- Understand the objective and break it into a living execution plan.
|
||||
- Produce artifacts under `artifacts/`, durable notes under `memory/`, and supporting indexes under `index/`. Prefer `apply_patch` for text edits and use `shell` for other filesystem work.
|
||||
- When you exit a task or take a dependency on external evidence, write JSON notes in `memory/claims/` that link to the supporting artifacts.
|
||||
- Run verification steps (tests, linters, proofs) under the sandbox before claiming completion.
|
||||
- Every deliverable must include the actual solution or proof (not just a literature review) and enough detail for the Verifier to reproduce or scrutinise it.
|
||||
- Your goal is to find new solutions to problems for which humans does not have solution yet. So do not focus on looking over the internet or in the literature and try building your own proofs.
|
||||
- You are very rigorous in your approach.
|
||||
- You do not fear new challenges. If a problem seems to be impossible to solve, try!
|
||||
|
||||
Available Codex tools mirror standard Codex sessions (e.g. `shell`, `apply_patch`). Assume all filesystem paths are relative to the current run store directory unless stated otherwise.
|
||||
|
||||
## Communication contract
|
||||
The orchestrator routes your structured messages to the Director. Respond with **JSON only**—no leading prose or trailing commentary. Wrap JSON in a fenced block only if the agent policy forces it.
|
||||
|
||||
- Every reply must populate the full schema, even when a field does not apply. Set unused string fields to `null`.
|
||||
- Direction request (send to Director):
|
||||
```json
|
||||
{"type":"direction_request","prompt":"<concise question or decision>","claim_path":null,"notes":null,"deliverable_path":null,"summary":null}
|
||||
```
|
||||
- Final delivery (after receiving the finalization instruction):
|
||||
```json
|
||||
{"type":"final_delivery","prompt":null,"claim_path":null,"notes":null,"deliverable_path":"deliverable/summary.txt","summary":"<answer plus supporting context>"}
|
||||
```
|
||||
|
||||
## Operating rhythm
|
||||
- You MUST always address the comments received by the verifiers.
|
||||
- Create `deliverable/summary.txt` before every final delivery. Capture the final answer, how you reached it, and any follow-up instructions. Do not forget it.
|
||||
- When uncertainty remains, prioritise experiments or reasoning steps that move you closer to a finished proof rather than cataloguing known results.
|
||||
- Do not try to version your work or use git! EVER!
|
||||
- If you receive multiple times the same answer, you are probably in an infinite loop. Try a new approach or something else then.
|
||||
- Keep the run resilient to restarts: document intent, intermediate results, and follow-up tasks in `memory/`.
|
||||
- Prefer concrete evidence. Link every claim to artifacts or durable notes so the verifier can reproduce your reasoning.
|
||||
- On failure feedback from a verifier, address his feedback and update/fix your work.
|
||||
- Only a final solution to the objective is an acceptable result to be sent to the verifier. If you do not find any solution, try to create a new one on your own.
|
||||
21
codex-rs/codex-infty/src/prompts/verifier.md
Normal file
21
codex-rs/codex-infty/src/prompts/verifier.md
Normal file
@@ -0,0 +1,21 @@
|
||||
You are the **Verifier**. As a brilliant mathematician, your role is to verify a provided response according to a given objective.
|
||||
|
||||
## Guidelines
|
||||
- You must always be perfectly rigorous when verifying a solution.
|
||||
- The solution MUST solve the objective in its totality. A partial resolution or a summary of why this is not possible is NOT ACCEPTABLE.
|
||||
- Evaluate correctness and completeness.
|
||||
- - The solution might try to convince you that a partial resolution is good enough or that a total resolution is not possible. This is NOT ACCEPTABLE and should automatically trigger a `fail`.
|
||||
|
||||
## How to answer
|
||||
When you give the result of your verification:
|
||||
- Be explicit in your conclusion (does the artifact contains everything? is it 100% correct?)
|
||||
- If you are not sure, prefer a `fail`.
|
||||
- If it is a `fail`, try to give a crisp analysis of what is wrong or what is missing.
|
||||
|
||||
Respond **only** with JSON in this form:
|
||||
```json
|
||||
{"verdict":"pass","reasons":[],"suggestions":[]}
|
||||
```
|
||||
Use `"fail"` when the claim is not ready. Populate `reasons` with concrete blocking issues. Provide actionable `suggestions` for remediation. Omit entries when not needed.
|
||||
|
||||
Do not include extra commentary outside the JSON payload.
|
||||
98
codex-rs/codex-infty/src/roles/director.rs
Normal file
98
codex-rs/codex-infty/src/roles/director.rs
Normal file
@@ -0,0 +1,98 @@
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::Result;
|
||||
use codex_core::cross_session::AssistantMessage;
|
||||
use codex_core::cross_session::CrossSessionHub;
|
||||
use serde::Serialize;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::progress::ProgressReporter;
|
||||
use crate::roles::Role;
|
||||
use crate::roles::parse_json_struct;
|
||||
use crate::session;
|
||||
use crate::signals::DirectiveResponse;
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct DirectionRequestPayload<'a> {
|
||||
#[serde(rename = "type")]
|
||||
kind: &'static str,
|
||||
pub prompt: &'a str,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub objective: Option<&'a str>,
|
||||
}
|
||||
|
||||
impl<'a> DirectionRequestPayload<'a> {
|
||||
pub fn new(prompt: &'a str, objective: Option<&'a str>) -> Self {
|
||||
Self {
|
||||
kind: "direction_request",
|
||||
prompt,
|
||||
objective,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DirectorRole {
|
||||
hub: Arc<CrossSessionHub>,
|
||||
run_id: String,
|
||||
role: String,
|
||||
timeout: Duration,
|
||||
progress: Option<Arc<dyn ProgressReporter>>,
|
||||
}
|
||||
|
||||
impl DirectorRole {
|
||||
pub fn new(
|
||||
hub: Arc<CrossSessionHub>,
|
||||
run_id: impl Into<String>,
|
||||
role: impl Into<String>,
|
||||
timeout: Duration,
|
||||
progress: Option<Arc<dyn ProgressReporter>>,
|
||||
) -> Self {
|
||||
Self {
|
||||
hub,
|
||||
run_id: run_id.into(),
|
||||
role: role.into(),
|
||||
timeout,
|
||||
progress,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn response_schema() -> Value {
|
||||
serde_json::json!({
|
||||
"type": "object",
|
||||
"required": ["directive", "rationale"],
|
||||
"properties": {
|
||||
"directive": { "type": "string" },
|
||||
"rationale": { "type": ["string", "null"] }
|
||||
},
|
||||
"additionalProperties": false
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Role<DirectionRequestPayload<'_>, DirectiveResponse> for DirectorRole {
|
||||
fn call<'a>(
|
||||
&'a self,
|
||||
req: &'a DirectionRequestPayload<'a>,
|
||||
) -> futures::future::BoxFuture<'a, Result<DirectiveResponse>> {
|
||||
Box::pin(async move {
|
||||
let request_text = serde_json::to_string_pretty(req)?;
|
||||
let handle = session::post_turn(
|
||||
self.hub.as_ref(),
|
||||
&self.run_id,
|
||||
&self.role,
|
||||
request_text,
|
||||
Some(Self::response_schema()),
|
||||
)
|
||||
.await?;
|
||||
let progress = self
|
||||
.progress
|
||||
.as_deref()
|
||||
.map(|reporter| (reporter, self.role.as_str()));
|
||||
let response: AssistantMessage =
|
||||
session::await_first_idle(self.hub.as_ref(), &handle, self.timeout, progress)
|
||||
.await?;
|
||||
parse_json_struct(&response.message.message)
|
||||
})
|
||||
}
|
||||
}
|
||||
49
codex-rs/codex-infty/src/roles/mod.rs
Normal file
49
codex-rs/codex-infty/src/roles/mod.rs
Normal file
@@ -0,0 +1,49 @@
|
||||
use anyhow::Result;
|
||||
use futures::future::BoxFuture;
|
||||
|
||||
pub mod director;
|
||||
pub mod solver;
|
||||
pub mod verifier;
|
||||
pub mod verifier_pool;
|
||||
|
||||
pub trait Role<Req, Resp> {
|
||||
fn call<'a>(&'a self, req: &'a Req) -> BoxFuture<'a, Result<Resp>>;
|
||||
}
|
||||
|
||||
// Shared helpers used by role implementations
|
||||
use anyhow::Context as _;
|
||||
use anyhow::anyhow;
|
||||
use std::any::type_name;
|
||||
|
||||
pub(crate) fn strip_json_code_fence(text: &str) -> Option<&str> {
|
||||
let trimmed = text.trim();
|
||||
if let Some(rest) = trimmed.strip_prefix("```json") {
|
||||
return rest.strip_suffix("```").map(str::trim);
|
||||
}
|
||||
if let Some(rest) = trimmed.strip_prefix("```JSON") {
|
||||
return rest.strip_suffix("```").map(str::trim);
|
||||
}
|
||||
if let Some(rest) = trimmed.strip_prefix("```") {
|
||||
return rest.strip_suffix("```").map(str::trim);
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub(crate) fn parse_json_struct<T>(message: &str) -> Result<T>
|
||||
where
|
||||
T: serde::de::DeserializeOwned,
|
||||
{
|
||||
let trimmed = message.trim();
|
||||
if trimmed.is_empty() {
|
||||
return Err(anyhow!("message was empty"));
|
||||
}
|
||||
|
||||
serde_json::from_str(trimmed)
|
||||
.or_else(|err| {
|
||||
strip_json_code_fence(trimmed)
|
||||
.map(|inner| serde_json::from_str(inner))
|
||||
.unwrap_or_else(|| Err(err))
|
||||
})
|
||||
.map_err(|err| anyhow!(err))
|
||||
.with_context(|| format!("failed to parse message as {}", type_name::<T>()))
|
||||
}
|
||||
202
codex-rs/codex-infty/src/roles/solver.rs
Normal file
202
codex-rs/codex-infty/src/roles/solver.rs
Normal file
@@ -0,0 +1,202 @@
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::Result;
|
||||
use codex_core::cross_session::AssistantMessage;
|
||||
use codex_core::cross_session::CrossSessionHub;
|
||||
use codex_core::cross_session::SessionEventStream;
|
||||
use codex_protocol::ConversationId;
|
||||
use serde::de::Error as _;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::progress::ProgressReporter;
|
||||
use crate::roles::Role;
|
||||
use crate::session;
|
||||
use crate::signals::AggregatedVerifierVerdict;
|
||||
use crate::signals::DirectiveResponse;
|
||||
|
||||
pub struct SolverRole {
|
||||
hub: Arc<CrossSessionHub>,
|
||||
run_id: String,
|
||||
role: String,
|
||||
conversation_id: ConversationId,
|
||||
progress: Option<Arc<dyn ProgressReporter>>,
|
||||
}
|
||||
|
||||
impl SolverRole {
|
||||
pub fn new(
|
||||
hub: Arc<CrossSessionHub>,
|
||||
run_id: impl Into<String>,
|
||||
role: impl Into<String>,
|
||||
conversation_id: ConversationId,
|
||||
progress: Option<Arc<dyn ProgressReporter>>,
|
||||
) -> Self {
|
||||
Self {
|
||||
hub,
|
||||
run_id: run_id.into(),
|
||||
role: role.into(),
|
||||
conversation_id,
|
||||
progress,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn solver_signal_schema() -> Value {
|
||||
// Only allow asking the director or sending the final result.
|
||||
serde_json::json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": { "type": "string", "enum": ["direction_request", "final_delivery"] },
|
||||
"prompt": { "type": ["string", "null"] },
|
||||
"deliverable_path": { "type": ["string", "null"] },
|
||||
"summary": { "type": ["string", "null"] }
|
||||
},
|
||||
"required": ["type", "prompt", "deliverable_path", "summary"],
|
||||
"additionalProperties": false
|
||||
})
|
||||
}
|
||||
|
||||
pub fn final_delivery_schema() -> Value {
|
||||
serde_json::json!({
|
||||
"type": "object",
|
||||
"required": ["type", "deliverable_path", "summary"],
|
||||
"properties": {
|
||||
"type": { "const": "final_delivery" },
|
||||
"deliverable_path": { "type": "string" },
|
||||
"summary": { "type": ["string", "null"] }
|
||||
},
|
||||
"additionalProperties": false
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn post(
|
||||
&self,
|
||||
text: impl Into<String>,
|
||||
final_output_json_schema: Option<Value>,
|
||||
) -> Result<()> {
|
||||
let _ = session::post_turn(
|
||||
self.hub.as_ref(),
|
||||
&self.run_id,
|
||||
&self.role,
|
||||
text,
|
||||
final_output_json_schema,
|
||||
)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn stream_events(
|
||||
&self,
|
||||
) -> Result<SessionEventStream, codex_core::cross_session::CrossSessionError> {
|
||||
self.hub.stream_events(self.conversation_id)
|
||||
}
|
||||
|
||||
pub async fn request_finalization_signal(&self) -> Result<()> {
|
||||
let handle = session::post_turn(
|
||||
self.hub.as_ref(),
|
||||
&self.run_id,
|
||||
&self.role,
|
||||
crate::types::FINALIZATION_PROMPT,
|
||||
Some(Self::final_delivery_schema()),
|
||||
)
|
||||
.await?;
|
||||
// Allow more time for the solver to start emitting the
|
||||
// finalization signal before timing out as "idle".
|
||||
let _ =
|
||||
session::await_first_idle(self.hub.as_ref(), &handle, Duration::from_secs(120), None)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct SolverPost {
|
||||
pub text: String,
|
||||
pub final_output_json_schema: Option<Value>,
|
||||
pub timeout: Duration,
|
||||
}
|
||||
|
||||
pub enum SolverRequest {
|
||||
Directive(DirectiveResponse),
|
||||
VerificationSummary(AggregatedVerifierVerdict),
|
||||
}
|
||||
|
||||
impl From<DirectiveResponse> for SolverRequest {
|
||||
fn from(d: DirectiveResponse) -> Self {
|
||||
SolverRequest::Directive(d)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&AggregatedVerifierVerdict> for SolverRequest {
|
||||
fn from(v: &AggregatedVerifierVerdict) -> Self {
|
||||
SolverRequest::VerificationSummary(v.clone())
|
||||
}
|
||||
}
|
||||
|
||||
impl SolverRequest {
|
||||
fn to_text(&self) -> Result<String> {
|
||||
match self {
|
||||
SolverRequest::Directive(d) => Ok(serde_json::to_string_pretty(d)?),
|
||||
SolverRequest::VerificationSummary(s) => Ok(serde_json::to_string_pretty(s)?),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Role<SolverPost, AssistantMessage> for SolverRole {
|
||||
fn call<'a>(
|
||||
&'a self,
|
||||
req: &'a SolverPost,
|
||||
) -> futures::future::BoxFuture<'a, Result<AssistantMessage>> {
|
||||
Box::pin(async move {
|
||||
let handle = session::post_turn(
|
||||
self.hub.as_ref(),
|
||||
&self.run_id,
|
||||
&self.role,
|
||||
req.text.clone(),
|
||||
req.final_output_json_schema.clone(),
|
||||
)
|
||||
.await?;
|
||||
let progress = self
|
||||
.progress
|
||||
.as_deref()
|
||||
.map(|reporter| (reporter, self.role.as_str()));
|
||||
session::await_first_idle(self.hub.as_ref(), &handle, req.timeout, progress).await
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Role<SolverRequest, ()> for SolverRole {
|
||||
fn call<'a>(&'a self, req: &'a SolverRequest) -> futures::future::BoxFuture<'a, Result<()>> {
|
||||
Box::pin(async move {
|
||||
let text = req.to_text()?;
|
||||
self.post(text, Some(Self::solver_signal_schema())).await
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, serde::Deserialize)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub enum SolverSignal {
|
||||
DirectionRequest {
|
||||
#[serde(default)]
|
||||
prompt: Option<String>,
|
||||
},
|
||||
FinalDelivery {
|
||||
#[serde(default)]
|
||||
deliverable_path: Option<String>,
|
||||
#[serde(default)]
|
||||
summary: Option<String>,
|
||||
},
|
||||
}
|
||||
|
||||
pub fn parse_solver_signal(message: &str) -> Option<SolverSignal> {
|
||||
let trimmed = message.trim();
|
||||
if trimmed.is_empty() {
|
||||
return None;
|
||||
}
|
||||
serde_json::from_str(trimmed)
|
||||
.or_else(|_| {
|
||||
crate::roles::strip_json_code_fence(trimmed)
|
||||
.map(|inner| serde_json::from_str(inner.trim()))
|
||||
.unwrap_or_else(|| Err(serde_json::Error::custom("invalid payload")))
|
||||
})
|
||||
.ok()
|
||||
}
|
||||
132
codex-rs/codex-infty/src/roles/verifier.rs
Normal file
132
codex-rs/codex-infty/src/roles/verifier.rs
Normal file
@@ -0,0 +1,132 @@
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::Result;
|
||||
use codex_core::cross_session::AssistantMessage;
|
||||
use codex_core::cross_session::CrossSessionHub;
|
||||
use serde::Serialize;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::progress::ProgressReporter;
|
||||
use crate::roles::Role;
|
||||
use crate::roles::parse_json_struct;
|
||||
use crate::session;
|
||||
use crate::signals::VerifierVerdict;
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct VerificationRequestPayload<'a> {
|
||||
#[serde(rename = "type")]
|
||||
kind: &'static str,
|
||||
pub claim_path: &'a str,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub notes: Option<&'a str>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub objective: Option<&'a str>,
|
||||
}
|
||||
|
||||
impl<'a> VerificationRequestPayload<'a> {
|
||||
pub fn new(claim_path: &'a str, notes: Option<&'a str>, objective: Option<&'a str>) -> Self {
|
||||
Self {
|
||||
kind: "verification_request",
|
||||
claim_path,
|
||||
notes,
|
||||
objective,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct VerifierRole {
|
||||
hub: Arc<CrossSessionHub>,
|
||||
run_id: String,
|
||||
role: String,
|
||||
timeout: Duration,
|
||||
progress: Option<Arc<dyn ProgressReporter>>,
|
||||
}
|
||||
|
||||
impl VerifierRole {
|
||||
pub fn new(
|
||||
hub: Arc<CrossSessionHub>,
|
||||
run_id: impl Into<String>,
|
||||
role: impl Into<String>,
|
||||
timeout: Duration,
|
||||
progress: Option<Arc<dyn ProgressReporter>>,
|
||||
) -> Self {
|
||||
Self {
|
||||
hub,
|
||||
run_id: run_id.into(),
|
||||
role: role.into(),
|
||||
timeout,
|
||||
progress,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn role(&self) -> &str {
|
||||
&self.role
|
||||
}
|
||||
|
||||
pub fn response_schema() -> Value {
|
||||
serde_json::json!({
|
||||
"type": "object",
|
||||
"required": ["verdict", "reasons", "suggestions"],
|
||||
"properties": {
|
||||
"verdict": { "type": "string", "enum": ["pass", "fail"] },
|
||||
"reasons": { "type": "array", "items": { "type": "string" } },
|
||||
"suggestions": { "type": "array", "items": { "type": "string" } }
|
||||
},
|
||||
"additionalProperties": false
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Role<VerificationRequestPayload<'_>, VerifierVerdict> for VerifierRole {
|
||||
fn call<'a>(
|
||||
&'a self,
|
||||
req: &'a VerificationRequestPayload<'a>,
|
||||
) -> futures::future::BoxFuture<'a, Result<VerifierVerdict>> {
|
||||
Box::pin(async move {
|
||||
let request_text = serde_json::to_string_pretty(req)?;
|
||||
let handle = session::post_turn(
|
||||
self.hub.as_ref(),
|
||||
&self.run_id,
|
||||
&self.role,
|
||||
request_text,
|
||||
Some(Self::response_schema()),
|
||||
)
|
||||
.await?;
|
||||
let progress = self
|
||||
.progress
|
||||
.as_deref()
|
||||
.map(|reporter| (reporter, self.role.as_str()));
|
||||
let response: AssistantMessage =
|
||||
session::await_first_idle(self.hub.as_ref(), &handle, self.timeout, progress)
|
||||
.await?;
|
||||
parse_json_struct(&response.message.message)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub fn aggregate_verdicts(items: Vec<(String, VerifierVerdict)>) -> AggregatedVerifierVerdict {
|
||||
let mut overall = VerifierDecision::Pass;
|
||||
let mut verdicts = Vec::with_capacity(items.len());
|
||||
|
||||
for (role, verdict) in items {
|
||||
if !verdict.verdict.is_pass() {
|
||||
overall = VerifierDecision::Fail;
|
||||
}
|
||||
verdicts.push(VerifierReport {
|
||||
role,
|
||||
verdict: verdict.verdict,
|
||||
reasons: verdict.reasons,
|
||||
suggestions: verdict.suggestions,
|
||||
});
|
||||
}
|
||||
|
||||
AggregatedVerifierVerdict {
|
||||
kind: "verification_feedback",
|
||||
overall,
|
||||
verdicts,
|
||||
}
|
||||
}
|
||||
use crate::signals::AggregatedVerifierVerdict;
|
||||
use crate::signals::VerifierDecision;
|
||||
use crate::signals::VerifierReport;
|
||||
153
codex-rs/codex-infty/src/roles/verifier_pool.rs
Normal file
153
codex-rs/codex-infty/src/roles/verifier_pool.rs
Normal file
@@ -0,0 +1,153 @@
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::Context as _;
|
||||
use anyhow::Result;
|
||||
use codex_core::ConversationManager;
|
||||
use codex_core::cross_session::CrossSessionHub;
|
||||
use codex_core::protocol::Op;
|
||||
|
||||
use crate::progress::ProgressReporter;
|
||||
use crate::prompts;
|
||||
use crate::roles::Role;
|
||||
use crate::roles::verifier::VerificationRequestPayload;
|
||||
use crate::roles::verifier::VerifierRole;
|
||||
use crate::roles::verifier::aggregate_verdicts;
|
||||
use crate::session;
|
||||
use crate::signals::AggregatedVerifierVerdict;
|
||||
use crate::signals::VerifierVerdict;
|
||||
use crate::types::RoleConfig;
|
||||
use crate::types::RunSessions;
|
||||
|
||||
pub struct VerificationRound {
|
||||
pub summary: AggregatedVerifierVerdict,
|
||||
pub passing_roles: Vec<String>,
|
||||
}
|
||||
|
||||
pub struct VerifierPool {
|
||||
hub: Arc<CrossSessionHub>,
|
||||
run_id: String,
|
||||
timeout: Duration,
|
||||
progress: Option<Arc<dyn ProgressReporter>>,
|
||||
roles: Vec<VerifierRole>,
|
||||
}
|
||||
|
||||
impl VerifierPool {
|
||||
pub fn from_sessions(
|
||||
hub: Arc<CrossSessionHub>,
|
||||
sessions: &RunSessions,
|
||||
timeout: Duration,
|
||||
progress: Option<Arc<dyn ProgressReporter>>,
|
||||
) -> Self {
|
||||
let roles = sessions
|
||||
.verifiers
|
||||
.iter()
|
||||
.map(|v| {
|
||||
VerifierRole::new(
|
||||
Arc::clone(&hub),
|
||||
sessions.run_id.clone(),
|
||||
v.role.clone(),
|
||||
timeout,
|
||||
progress.clone(),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
Self {
|
||||
hub,
|
||||
run_id: sessions.run_id.clone(),
|
||||
timeout,
|
||||
progress,
|
||||
roles,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.roles.is_empty()
|
||||
}
|
||||
|
||||
pub async fn collect_round(
|
||||
&self,
|
||||
request: &VerificationRequestPayload<'_>,
|
||||
) -> Result<VerificationRound> {
|
||||
let futures = self
|
||||
.roles
|
||||
.iter()
|
||||
.map(|role| async {
|
||||
let name = role.role().to_string();
|
||||
let verdict = role.call(request).await;
|
||||
(name, verdict)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
let joined = futures::future::join_all(futures).await;
|
||||
|
||||
let mut results: Vec<(String, VerifierVerdict)> = Vec::with_capacity(joined.len());
|
||||
let mut passing_roles: Vec<String> = Vec::new();
|
||||
for (name, verdict_res) in joined.into_iter() {
|
||||
let verdict = verdict_res
|
||||
.with_context(|| format!("verifier {} returned invalid verdict JSON", name))?;
|
||||
if let Some(progress) = self.progress.as_ref() {
|
||||
progress.verifier_verdict(&name, &verdict);
|
||||
}
|
||||
if verdict.verdict.is_pass() {
|
||||
passing_roles.push(name.clone());
|
||||
}
|
||||
results.push((name, verdict));
|
||||
}
|
||||
let summary = aggregate_verdicts(results);
|
||||
Ok(VerificationRound {
|
||||
summary,
|
||||
passing_roles,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn replace_role(&mut self, role_name: &str) {
|
||||
if let Some(idx) = self.roles.iter().position(|v| v.role() == role_name) {
|
||||
self.roles[idx] = VerifierRole::new(
|
||||
Arc::clone(&self.hub),
|
||||
self.run_id.clone(),
|
||||
role_name.to_string(),
|
||||
self.timeout,
|
||||
self.progress.clone(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn rotate_passing(
|
||||
&mut self,
|
||||
sessions: &mut RunSessions,
|
||||
manager: &ConversationManager,
|
||||
passing_roles: &[String],
|
||||
) -> Result<()> {
|
||||
for role in passing_roles {
|
||||
// find existing index
|
||||
let Some(idx) = sessions.verifiers.iter().position(|s| &s.role == role) else {
|
||||
continue;
|
||||
};
|
||||
let old = &sessions.verifiers[idx];
|
||||
// best-effort shutdown and unregister
|
||||
let _ = old.conversation.submit(Op::Shutdown).await;
|
||||
let _ = manager.remove_conversation(&old.conversation_id).await;
|
||||
|
||||
// Reuse the existing verifier's config so overrides (e.g., base_url in tests)
|
||||
// are preserved when respawning a passing verifier.
|
||||
let config = old.config.clone();
|
||||
let role_config = RoleConfig::new(role.to_string(), config);
|
||||
let run_path = sessions.store.path();
|
||||
let session = session::spawn_role(
|
||||
Arc::clone(&self.hub),
|
||||
manager,
|
||||
&self.run_id,
|
||||
run_path,
|
||||
role_config,
|
||||
prompts::ensure_instructions,
|
||||
)
|
||||
.await?;
|
||||
sessions
|
||||
.store
|
||||
.update_rollout_path(&session.role, session.rollout_path.clone())?;
|
||||
sessions.verifiers[idx] = session;
|
||||
self.replace_role(role);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
211
codex-rs/codex-infty/src/run_store.rs
Normal file
211
codex-rs/codex-infty/src/run_store.rs
Normal file
@@ -0,0 +1,211 @@
|
||||
use std::fs;
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use anyhow::anyhow;
|
||||
use chrono::DateTime;
|
||||
use chrono::Utc;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use tempfile::NamedTempFile;
|
||||
|
||||
const ARTIFACTS_DIR: &str = "artifacts";
|
||||
const MEMORY_DIR: &str = "memory";
|
||||
const INDEX_DIR: &str = "index";
|
||||
const DELIVERABLE_DIR: &str = "deliverable";
|
||||
const METADATA_FILE: &str = "run.json";
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct RoleMetadata {
|
||||
pub role: String,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub rollout_path: Option<PathBuf>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub config_path: Option<PathBuf>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct RunMetadata {
|
||||
pub run_id: String,
|
||||
pub created_at: DateTime<Utc>,
|
||||
pub updated_at: DateTime<Utc>,
|
||||
pub roles: Vec<RoleMetadata>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RunStore {
|
||||
path: PathBuf,
|
||||
metadata: RunMetadata,
|
||||
}
|
||||
|
||||
impl RunStore {
|
||||
pub fn initialize(
|
||||
run_path: impl AsRef<Path>,
|
||||
run_id: &str,
|
||||
roles: &[RoleMetadata],
|
||||
) -> Result<Self> {
|
||||
let run_path = run_path.as_ref().to_path_buf();
|
||||
fs::create_dir_all(&run_path)
|
||||
.with_context(|| format!("failed to create run directory {}", run_path.display()))?;
|
||||
|
||||
for child in [ARTIFACTS_DIR, MEMORY_DIR, INDEX_DIR, DELIVERABLE_DIR] {
|
||||
fs::create_dir_all(run_path.join(child))
|
||||
.with_context(|| format!("failed to create subdirectory {child}"))?;
|
||||
}
|
||||
|
||||
let metadata_path = run_path.join(METADATA_FILE);
|
||||
if metadata_path.exists() {
|
||||
return Err(anyhow!(
|
||||
"run metadata already exists at {}",
|
||||
metadata_path.display()
|
||||
));
|
||||
}
|
||||
|
||||
let now = Utc::now();
|
||||
let metadata = RunMetadata {
|
||||
run_id: run_id.to_string(),
|
||||
created_at: now,
|
||||
updated_at: now,
|
||||
roles: roles.to_vec(),
|
||||
};
|
||||
write_metadata(&metadata_path, &metadata)?;
|
||||
|
||||
Ok(Self {
|
||||
path: run_path,
|
||||
metadata,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn load(run_path: impl AsRef<Path>) -> Result<Self> {
|
||||
let run_path = run_path.as_ref().to_path_buf();
|
||||
let metadata_path = run_path.join(METADATA_FILE);
|
||||
let metadata: RunMetadata = serde_json::from_slice(
|
||||
&fs::read(&metadata_path)
|
||||
.with_context(|| format!("failed to read {}", metadata_path.display()))?,
|
||||
)
|
||||
.with_context(|| format!("failed to parse {}", metadata_path.display()))?;
|
||||
|
||||
Ok(Self {
|
||||
path: run_path,
|
||||
metadata,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn path(&self) -> &Path {
|
||||
&self.path
|
||||
}
|
||||
|
||||
pub fn metadata(&self) -> &RunMetadata {
|
||||
&self.metadata
|
||||
}
|
||||
|
||||
pub fn role_metadata(&self, role: &str) -> Option<&RoleMetadata> {
|
||||
self.metadata.roles.iter().find(|meta| meta.role == role)
|
||||
}
|
||||
|
||||
pub fn update_rollout_path(&mut self, role: &str, rollout_path: PathBuf) -> Result<()> {
|
||||
if let Some(meta) = self
|
||||
.metadata
|
||||
.roles
|
||||
.iter_mut()
|
||||
.find(|meta| meta.role == role)
|
||||
{
|
||||
meta.rollout_path = Some(rollout_path);
|
||||
self.commit_metadata()
|
||||
} else {
|
||||
Err(anyhow!("role {role} not found in run store"))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_role_config_path(&mut self, role: &str, path: PathBuf) -> Result<()> {
|
||||
if let Some(meta) = self
|
||||
.metadata
|
||||
.roles
|
||||
.iter_mut()
|
||||
.find(|meta| meta.role == role)
|
||||
{
|
||||
meta.config_path = Some(path);
|
||||
self.commit_metadata()
|
||||
} else {
|
||||
Err(anyhow!("role {role} not found in run store"))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn touch(&mut self) -> Result<()> {
|
||||
self.metadata.updated_at = Utc::now();
|
||||
self.commit_metadata()
|
||||
}
|
||||
|
||||
fn commit_metadata(&mut self) -> Result<()> {
|
||||
self.metadata.updated_at = Utc::now();
|
||||
let metadata_path = self.path.join(METADATA_FILE);
|
||||
write_metadata(&metadata_path, &self.metadata)
|
||||
}
|
||||
}
|
||||
|
||||
fn write_metadata(path: &Path, metadata: &RunMetadata) -> Result<()> {
|
||||
let parent = path
|
||||
.parent()
|
||||
.ok_or_else(|| anyhow!("metadata path must have parent"))?;
|
||||
let mut temp = NamedTempFile::new_in(parent)
|
||||
.with_context(|| format!("failed to create temp file in {}", parent.display()))?;
|
||||
serde_json::to_writer_pretty(&mut temp, metadata)?;
|
||||
temp.flush()?;
|
||||
temp.persist(path)
|
||||
.with_context(|| format!("failed to persist metadata to {}", path.display()))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use tempfile::TempDir;
|
||||
|
||||
#[test]
|
||||
fn initialize_creates_directories_and_metadata() {
|
||||
let temp = TempDir::new().unwrap();
|
||||
let run_path = temp.path().join("run_1");
|
||||
let roles = vec![
|
||||
RoleMetadata {
|
||||
role: "solver".into(),
|
||||
rollout_path: None,
|
||||
config_path: None,
|
||||
},
|
||||
RoleMetadata {
|
||||
role: "director".into(),
|
||||
rollout_path: None,
|
||||
config_path: None,
|
||||
},
|
||||
];
|
||||
|
||||
let store = RunStore::initialize(&run_path, "run_1", &roles).unwrap();
|
||||
assert!(store.path().join(ARTIFACTS_DIR).is_dir());
|
||||
assert!(store.path().join(MEMORY_DIR).is_dir());
|
||||
assert!(store.path().join(INDEX_DIR).is_dir());
|
||||
assert!(store.path().join(DELIVERABLE_DIR).is_dir());
|
||||
assert_eq!(store.metadata().roles.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn update_rollout_persists_metadata() {
|
||||
let temp = TempDir::new().unwrap();
|
||||
let run_path = temp.path().join("run_2");
|
||||
let roles = vec![RoleMetadata {
|
||||
role: "solver".into(),
|
||||
rollout_path: None,
|
||||
config_path: None,
|
||||
}];
|
||||
let mut store = RunStore::initialize(&run_path, "run_2", &roles).unwrap();
|
||||
let rollout = PathBuf::from("/tmp/rollout.jsonl");
|
||||
store
|
||||
.update_rollout_path("solver", rollout.clone())
|
||||
.unwrap();
|
||||
|
||||
let loaded = RunStore::load(&run_path).unwrap();
|
||||
let solver = loaded.role_metadata("solver").unwrap();
|
||||
assert_eq!(solver.rollout_path.as_ref().unwrap(), &rollout);
|
||||
}
|
||||
}
|
||||
112
codex-rs/codex-infty/src/session.rs
Normal file
112
codex-rs/codex-infty/src/session.rs
Normal file
@@ -0,0 +1,112 @@
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::Result;
|
||||
use anyhow::anyhow;
|
||||
use anyhow::bail;
|
||||
use codex_core::ConversationManager;
|
||||
use codex_core::CrossSessionSpawnParams;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::cross_session::AssistantMessage;
|
||||
use codex_core::cross_session::CrossSessionError;
|
||||
use codex_core::cross_session::CrossSessionHub;
|
||||
use codex_core::cross_session::PostUserTurnRequest;
|
||||
use codex_core::cross_session::RoleOrId;
|
||||
use codex_core::cross_session::TurnHandle;
|
||||
use serde_json::Value;
|
||||
use tokio::time::Instant;
|
||||
use tokio_stream::StreamExt as _;
|
||||
|
||||
use crate::progress::ProgressReporter;
|
||||
use crate::types::RoleConfig;
|
||||
use crate::types::RoleSession;
|
||||
|
||||
pub async fn spawn_role(
|
||||
hub: Arc<CrossSessionHub>,
|
||||
manager: &ConversationManager,
|
||||
run_id: &str,
|
||||
run_path: &Path,
|
||||
role_config: RoleConfig,
|
||||
ensure_instructions: impl FnOnce(&str, &mut Config),
|
||||
) -> Result<RoleSession> {
|
||||
let RoleConfig {
|
||||
role, mut config, ..
|
||||
} = role_config;
|
||||
config.cwd = run_path.to_path_buf();
|
||||
ensure_instructions(&role, &mut config);
|
||||
let cfg_for_session = config.clone();
|
||||
let session = manager
|
||||
.new_conversation_with_cross_session(
|
||||
cfg_for_session,
|
||||
CrossSessionSpawnParams {
|
||||
hub: Arc::clone(&hub),
|
||||
run_id: Some(run_id.to_string()),
|
||||
role: Some(role.clone()),
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
// Note: include the final config used to spawn the session
|
||||
Ok(RoleSession::from_new(role, session, config))
|
||||
}
|
||||
|
||||
// resumable runs are disabled for now; resume_role removed
|
||||
|
||||
pub async fn post_turn(
|
||||
hub: &CrossSessionHub,
|
||||
run_id: &str,
|
||||
role: &str,
|
||||
text: impl Into<String>,
|
||||
final_output_json_schema: Option<Value>,
|
||||
) -> Result<TurnHandle, CrossSessionError> {
|
||||
hub.post_user_turn(PostUserTurnRequest {
|
||||
target: RoleOrId::RunRole {
|
||||
run_id: run_id.to_string(),
|
||||
role: role.to_string(),
|
||||
},
|
||||
text: text.into(),
|
||||
final_output_json_schema,
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn await_first_idle(
|
||||
hub: &CrossSessionHub,
|
||||
handle: &TurnHandle,
|
||||
idle_timeout: Duration,
|
||||
progress: Option<(&dyn ProgressReporter, &str)>,
|
||||
) -> Result<AssistantMessage> {
|
||||
let mut events = hub.stream_events(handle.conversation_id())?;
|
||||
let wait_first = hub.await_first_assistant(handle, idle_timeout);
|
||||
tokio::pin!(wait_first);
|
||||
|
||||
let idle = tokio::time::sleep(idle_timeout);
|
||||
tokio::pin!(idle);
|
||||
|
||||
let submission_id = handle.submission_id().to_string();
|
||||
|
||||
loop {
|
||||
tokio::select! {
|
||||
result = &mut wait_first => {
|
||||
return result.map_err(|err| anyhow!(err));
|
||||
}
|
||||
maybe_event = events.next() => {
|
||||
let Some(event) = maybe_event else {
|
||||
bail!(CrossSessionError::SessionClosed);
|
||||
};
|
||||
if event.event.id == submission_id {
|
||||
if let Some((reporter, role)) = progress {
|
||||
reporter.role_event(role, &event.event.msg);
|
||||
}
|
||||
if let codex_core::protocol::EventMsg::Error(err) = &event.event.msg {
|
||||
bail!(anyhow!(err.message.clone()));
|
||||
}
|
||||
idle.as_mut().reset(Instant::now() + idle_timeout);
|
||||
}
|
||||
}
|
||||
_ = &mut idle => {
|
||||
bail!(CrossSessionError::AwaitTimeout(idle_timeout));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
55
codex-rs/codex-infty/src/signals.rs
Normal file
55
codex-rs/codex-infty/src/signals.rs
Normal file
@@ -0,0 +1,55 @@
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
pub struct DirectiveResponse {
|
||||
pub directive: String,
|
||||
#[serde(default)]
|
||||
pub rationale: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum VerifierDecision {
|
||||
Pass,
|
||||
Fail,
|
||||
}
|
||||
|
||||
impl VerifierDecision {
|
||||
pub fn is_pass(self) -> bool {
|
||||
matches!(self, VerifierDecision::Pass)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize, Clone)]
|
||||
pub struct VerifierVerdict {
|
||||
pub verdict: VerifierDecision,
|
||||
#[serde(default)]
|
||||
pub reasons: Vec<String>,
|
||||
#[serde(default)]
|
||||
pub suggestions: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Clone)]
|
||||
pub struct VerifierReport {
|
||||
pub role: String,
|
||||
pub verdict: VerifierDecision,
|
||||
#[serde(default)]
|
||||
pub reasons: Vec<String>,
|
||||
#[serde(default)]
|
||||
pub suggestions: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Clone)]
|
||||
pub struct AggregatedVerifierVerdict {
|
||||
#[serde(rename = "type")]
|
||||
pub kind: &'static str,
|
||||
pub overall: VerifierDecision,
|
||||
pub verdicts: Vec<VerifierReport>,
|
||||
}
|
||||
|
||||
impl From<&AggregatedVerifierVerdict> for String {
|
||||
fn from(value: &AggregatedVerifierVerdict) -> Self {
|
||||
serde_json::to_string_pretty(value).unwrap_or_else(|_| "{}".to_string())
|
||||
}
|
||||
}
|
||||
103
codex-rs/codex-infty/src/types.rs
Normal file
103
codex-rs/codex-infty/src/types.rs
Normal file
@@ -0,0 +1,103 @@
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use codex_core::CodexConversation;
|
||||
use codex_core::NewConversation;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_protocol::ConversationId;
|
||||
|
||||
pub(crate) const DEFAULT_DIRECTOR_TIMEOUT: Duration = Duration::from_secs(1200);
|
||||
pub(crate) const DEFAULT_VERIFIER_TIMEOUT: Duration = Duration::from_secs(1800);
|
||||
pub(crate) const FINALIZATION_PROMPT: &str = "Create deliverable/: include compiled artifacts or scripts, usage docs, and tests. Write deliverable/summary.txt capturing the final answer, evidence, and follow-up steps. Also provide deliverable/README.md with overview, manifest (paths and sizes), verification steps, and limitations. Remove scratch files. Reply with JSON: {\"type\":\"final_delivery\",\"deliverable_path\":\"deliverable/summary.txt\",\"summary\":\"<answer plus supporting context>\"}.";
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct RoleConfig {
|
||||
pub role: String,
|
||||
pub config: Config,
|
||||
pub config_path: Option<PathBuf>,
|
||||
}
|
||||
|
||||
impl RoleConfig {
|
||||
pub fn new(role: impl Into<String>, mut config: Config) -> Self {
|
||||
config.sandbox_policy = SandboxPolicy::DangerFullAccess;
|
||||
config.approval_policy = AskForApproval::Never;
|
||||
Self {
|
||||
role: role.into(),
|
||||
config,
|
||||
config_path: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_path(role: impl Into<String>, config: Config, config_path: PathBuf) -> Self {
|
||||
Self {
|
||||
role: role.into(),
|
||||
config,
|
||||
config_path: Some(config_path),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct RunParams {
|
||||
pub run_id: String,
|
||||
pub run_root: Option<PathBuf>,
|
||||
pub solver: RoleConfig,
|
||||
pub director: RoleConfig,
|
||||
pub verifiers: Vec<RoleConfig>,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct RunExecutionOptions {
|
||||
pub objective: Option<String>,
|
||||
pub director_timeout: Duration,
|
||||
pub verifier_timeout: Duration,
|
||||
}
|
||||
|
||||
impl Default for RunExecutionOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
objective: None,
|
||||
director_timeout: DEFAULT_DIRECTOR_TIMEOUT,
|
||||
verifier_timeout: DEFAULT_VERIFIER_TIMEOUT,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct RunOutcome {
|
||||
pub run_id: String,
|
||||
pub deliverable_path: PathBuf,
|
||||
pub summary: Option<String>,
|
||||
pub raw_message: String,
|
||||
}
|
||||
|
||||
pub struct RoleSession {
|
||||
pub role: String,
|
||||
pub conversation_id: ConversationId,
|
||||
pub conversation: Arc<CodexConversation>,
|
||||
pub session_configured: codex_core::protocol::SessionConfiguredEvent,
|
||||
pub rollout_path: PathBuf,
|
||||
pub config: Config,
|
||||
}
|
||||
|
||||
impl RoleSession {
|
||||
pub(crate) fn from_new(role: String, session: NewConversation, config: Config) -> Self {
|
||||
Self {
|
||||
role,
|
||||
conversation_id: session.conversation_id,
|
||||
conversation: session.conversation,
|
||||
session_configured: session.session_configured.clone(),
|
||||
rollout_path: session.session_configured.rollout_path.clone(),
|
||||
config,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct RunSessions {
|
||||
pub run_id: String,
|
||||
pub solver: RoleSession,
|
||||
pub director: RoleSession,
|
||||
pub verifiers: Vec<RoleSession>,
|
||||
pub store: crate::RunStore,
|
||||
}
|
||||
91
codex-rs/codex-infty/src/utils.rs
Normal file
91
codex-rs/codex-infty/src/utils.rs
Normal file
@@ -0,0 +1,91 @@
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use anyhow::anyhow;
|
||||
use anyhow::bail;
|
||||
|
||||
pub fn trim_to_non_empty(opt: Option<String>) -> Option<String> {
|
||||
opt.and_then(|s| {
|
||||
let trimmed = s.trim();
|
||||
if trimmed.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(trimmed.to_string())
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn required_trimmed(opt: Option<String>, err_msg: &str) -> Result<String> {
|
||||
trim_to_non_empty(opt).ok_or_else(|| anyhow!(err_msg.to_string()))
|
||||
}
|
||||
|
||||
pub fn resolve_deliverable_path(base: &Path, candidate: &str) -> Result<PathBuf> {
|
||||
let base_abs = base
|
||||
.canonicalize()
|
||||
.with_context(|| format!("failed to canonicalize run store {}", base.display()))?;
|
||||
|
||||
let candidate_path = Path::new(candidate);
|
||||
let joined = if candidate_path.is_absolute() {
|
||||
candidate_path.to_path_buf()
|
||||
} else {
|
||||
base_abs.join(candidate_path)
|
||||
};
|
||||
|
||||
let resolved = joined.canonicalize().with_context(|| {
|
||||
format!(
|
||||
"failed to canonicalize deliverable path {}",
|
||||
joined.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
if !resolved.starts_with(&base_abs) {
|
||||
bail!(
|
||||
"deliverable path {} escapes run store {}",
|
||||
resolved.display(),
|
||||
base_abs.display()
|
||||
);
|
||||
}
|
||||
|
||||
Ok(resolved)
|
||||
}
|
||||
|
||||
pub fn objective_as_str(options: &crate::types::RunExecutionOptions) -> Option<&str> {
|
||||
options
|
||||
.objective
|
||||
.as_deref()
|
||||
.map(str::trim)
|
||||
.filter(|s| !s.is_empty())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use tempfile::TempDir;
|
||||
|
||||
#[test]
|
||||
fn resolve_deliverable_within_base() {
|
||||
let tmp = TempDir::new().unwrap();
|
||||
let base = tmp.path();
|
||||
std::fs::create_dir_all(base.join("deliverable")).unwrap();
|
||||
std::fs::write(base.join("deliverable").join("a.txt"), "ok").unwrap();
|
||||
let resolved = resolve_deliverable_path(base, "deliverable/a.txt").unwrap();
|
||||
let base_abs = base.canonicalize().unwrap();
|
||||
assert!(resolved.starts_with(&base_abs));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolve_deliverable_rejects_escape() {
|
||||
let tmp = TempDir::new().unwrap();
|
||||
let base = tmp.path();
|
||||
// Create a real file outside of base so canonicalization succeeds
|
||||
let outside = TempDir::new().unwrap();
|
||||
let outside_file = outside.path().join("outside.txt");
|
||||
std::fs::write(&outside_file, "nope").unwrap();
|
||||
|
||||
let err = resolve_deliverable_path(base, outside_file.to_str().unwrap()).unwrap_err();
|
||||
let msg = format!("{err}");
|
||||
assert!(msg.contains("escapes run store"));
|
||||
}
|
||||
}
|
||||
327
codex-rs/codex-infty/tests/orchestrator.rs
Normal file
327
codex-rs/codex-infty/tests/orchestrator.rs
Normal file
@@ -0,0 +1,327 @@
|
||||
#![cfg(not(target_os = "windows"))]
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::built_in_model_providers;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::cross_session::AssistantMessage;
|
||||
use codex_core::cross_session::PostUserTurnRequest;
|
||||
use codex_core::cross_session::RoleOrId;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_infty::InftyOrchestrator;
|
||||
use codex_infty::RoleConfig;
|
||||
use codex_infty::RunExecutionOptions;
|
||||
use codex_infty::RunParams;
|
||||
use core_test_support::load_default_config_for_test;
|
||||
use core_test_support::responses;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use tempfile::TempDir;
|
||||
use wiremock::MockServer;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn orchestrator_routes_between_roles_and_records_store() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let bodies = vec![
|
||||
responses::sse(vec![
|
||||
responses::ev_response_created("solver-resp-1"),
|
||||
responses::ev_assistant_message("solver-msg-1", "Need direction"),
|
||||
responses::ev_completed("solver-resp-1"),
|
||||
]),
|
||||
responses::sse(vec![
|
||||
responses::ev_response_created("director-resp-1"),
|
||||
responses::ev_assistant_message("director-msg-1", "Proceed iteratively"),
|
||||
responses::ev_completed("director-resp-1"),
|
||||
]),
|
||||
responses::sse(vec![
|
||||
responses::ev_response_created("solver-resp-2"),
|
||||
responses::ev_assistant_message("solver-msg-2", "Acknowledged"),
|
||||
responses::ev_completed("solver-resp-2"),
|
||||
]),
|
||||
];
|
||||
let response_mock = responses::mount_sse_sequence(&server, bodies).await;
|
||||
|
||||
let runs_root = TempDir::new()?;
|
||||
let orchestrator =
|
||||
InftyOrchestrator::with_runs_root(CodexAuth::from_api_key("dummy-key"), runs_root.path());
|
||||
let run_id = "run-orchestrator".to_string();
|
||||
|
||||
let solver_config = build_config(&server).await?;
|
||||
let director_config = build_config(&server).await?;
|
||||
|
||||
let sessions = orchestrator
|
||||
.spawn_run(RunParams {
|
||||
run_id: run_id.clone(),
|
||||
run_root: Some(runs_root.path().join("runs").join(&run_id)),
|
||||
solver: RoleConfig::new("solver", solver_config.clone()),
|
||||
director: RoleConfig::new("director", director_config.clone()),
|
||||
verifiers: Vec::new(),
|
||||
})
|
||||
.await?;
|
||||
|
||||
let solver_message = call_role(
|
||||
&orchestrator,
|
||||
&sessions.run_id,
|
||||
"solver",
|
||||
"kick off plan",
|
||||
Duration::from_secs(1),
|
||||
)
|
||||
.await?;
|
||||
assert_eq!(solver_message.message.message, "Need direction");
|
||||
|
||||
let director_message = relay_assistant_to_role(
|
||||
&orchestrator,
|
||||
&sessions.run_id,
|
||||
"director",
|
||||
&solver_message,
|
||||
Duration::from_secs(1),
|
||||
)
|
||||
.await?;
|
||||
assert_eq!(director_message.message.message, "Proceed iteratively");
|
||||
|
||||
let solver_reply = relay_assistant_to_role(
|
||||
&orchestrator,
|
||||
&sessions.run_id,
|
||||
"solver",
|
||||
&director_message,
|
||||
Duration::from_secs(1),
|
||||
)
|
||||
.await?;
|
||||
assert_eq!(solver_reply.message.message, "Acknowledged");
|
||||
|
||||
assert_eq!(response_mock.requests().len(), 3);
|
||||
let first_request = response_mock.requests().first().unwrap().body_json();
|
||||
let instructions = first_request["instructions"]
|
||||
.as_str()
|
||||
.expect("request should set instructions");
|
||||
assert!(
|
||||
instructions.contains("brilliant mathematician"),
|
||||
"missing solver prompt: {instructions}"
|
||||
);
|
||||
assert!(sessions.store.path().is_dir());
|
||||
let solver_meta = sessions.store.role_metadata("solver").unwrap();
|
||||
assert!(solver_meta.rollout_path.is_some());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// resumable runs are disabled; resume test removed
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn execute_new_run_drives_to_completion() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let bodies = vec![
|
||||
responses::sse(vec![
|
||||
responses::ev_response_created("solver-resp-1"),
|
||||
responses::ev_assistant_message(
|
||||
"solver-msg-1",
|
||||
r#"{"type":"direction_request","prompt":"Need directive","claim_path":null,"notes":null,"deliverable_path":null,"summary":null}"#,
|
||||
),
|
||||
responses::ev_completed("solver-resp-1"),
|
||||
]),
|
||||
responses::sse(vec![
|
||||
responses::ev_response_created("director-resp-1"),
|
||||
responses::ev_assistant_message(
|
||||
"director-msg-1",
|
||||
r#"{"directive":"Proceed","rationale":"Follow the plan"}"#,
|
||||
),
|
||||
responses::ev_completed("director-resp-1"),
|
||||
]),
|
||||
responses::sse(vec![
|
||||
responses::ev_response_created("solver-resp-2"),
|
||||
responses::ev_assistant_message("solver-msg-2", "Acknowledged"),
|
||||
responses::ev_assistant_message(
|
||||
"solver-msg-4",
|
||||
r#"{"type":"final_delivery","prompt":null,"claim_path":null,"notes":null,"deliverable_path":"deliverable","summary":"done"}"#,
|
||||
),
|
||||
responses::ev_completed("solver-resp-2"),
|
||||
]),
|
||||
// Final verification of the deliverable
|
||||
responses::sse(vec![
|
||||
responses::ev_response_created("verifier-resp-3"),
|
||||
responses::ev_assistant_message(
|
||||
"verifier-msg-3",
|
||||
r#"{"verdict":"pass","reasons":[],"suggestions":[]}"#,
|
||||
),
|
||||
responses::ev_completed("verifier-resp-3"),
|
||||
]),
|
||||
// Feedback turn summarizing the verification outcome back to the solver
|
||||
responses::sse(vec![
|
||||
responses::ev_response_created("solver-resp-5"),
|
||||
responses::ev_completed("solver-resp-5"),
|
||||
]),
|
||||
];
|
||||
for body in bodies {
|
||||
responses::mount_sse_once(&server, body).await;
|
||||
}
|
||||
|
||||
let runs_root = TempDir::new()?;
|
||||
let orchestrator =
|
||||
InftyOrchestrator::with_runs_root(CodexAuth::from_api_key("dummy-key"), runs_root.path());
|
||||
let run_id = "run-auto".to_string();
|
||||
let run_root = runs_root.path().join("runs").join(&run_id);
|
||||
|
||||
let solver_config = build_config(&server).await?;
|
||||
let director_config = build_config(&server).await?;
|
||||
let verifier_config = build_config(&server).await?;
|
||||
|
||||
let options = RunExecutionOptions {
|
||||
objective: Some("Implement feature".to_string()),
|
||||
..RunExecutionOptions::default()
|
||||
};
|
||||
|
||||
let outcome = orchestrator
|
||||
.execute_new_run(
|
||||
RunParams {
|
||||
run_id: run_id.clone(),
|
||||
run_root: Some(run_root.clone()),
|
||||
solver: RoleConfig::new("solver", solver_config),
|
||||
director: RoleConfig::new("director", director_config),
|
||||
verifiers: vec![RoleConfig::new("verifier", verifier_config)],
|
||||
},
|
||||
options,
|
||||
)
|
||||
.await?;
|
||||
|
||||
assert_eq!(outcome.run_id, run_id);
|
||||
assert_eq!(outcome.summary.as_deref(), Some("done"));
|
||||
assert!(outcome.raw_message.contains("final_delivery"));
|
||||
let canonical_run_root = std::fs::canonicalize(&run_root)?;
|
||||
let canonical_deliverable = std::fs::canonicalize(&outcome.deliverable_path)?;
|
||||
assert!(canonical_deliverable.starts_with(&canonical_run_root));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn spawn_run_cleans_up_on_failure() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let bodies = vec![
|
||||
responses::sse(vec![
|
||||
responses::ev_response_created("solver-resp-1"),
|
||||
responses::ev_completed("solver-resp-1"),
|
||||
]),
|
||||
responses::sse(vec![
|
||||
responses::ev_response_created("director-resp-1"),
|
||||
responses::ev_completed("director-resp-1"),
|
||||
]),
|
||||
responses::sse(vec![
|
||||
responses::ev_response_created("dup-resp"),
|
||||
responses::ev_completed("dup-resp"),
|
||||
]),
|
||||
];
|
||||
for body in bodies {
|
||||
responses::mount_sse_once(&server, body).await;
|
||||
}
|
||||
|
||||
let runs_root = TempDir::new()?;
|
||||
let orchestrator =
|
||||
InftyOrchestrator::with_runs_root(CodexAuth::from_api_key("dummy-key"), runs_root.path());
|
||||
let run_id = "run-cleanup".to_string();
|
||||
let run_path = runs_root.path().join("runs").join(&run_id);
|
||||
|
||||
let solver_config = build_config(&server).await?;
|
||||
let director_config = build_config(&server).await?;
|
||||
|
||||
let result = orchestrator
|
||||
.spawn_run(RunParams {
|
||||
run_id: run_id.clone(),
|
||||
run_root: Some(run_path.clone()),
|
||||
solver: RoleConfig::new("solver", solver_config.clone()),
|
||||
director: RoleConfig::new("director", director_config.clone()),
|
||||
verifiers: vec![RoleConfig::new("solver", solver_config.clone())],
|
||||
})
|
||||
.await;
|
||||
assert!(result.is_err());
|
||||
assert!(!run_path.exists(), "failed run should remove run directory");
|
||||
|
||||
let bodies = vec![
|
||||
responses::sse(vec![
|
||||
responses::ev_response_created("solver-resp-2"),
|
||||
responses::ev_completed("solver-resp-2"),
|
||||
]),
|
||||
responses::sse(vec![
|
||||
responses::ev_response_created("director-resp-2"),
|
||||
responses::ev_completed("director-resp-2"),
|
||||
]),
|
||||
];
|
||||
for body in bodies {
|
||||
responses::mount_sse_once(&server, body).await;
|
||||
}
|
||||
|
||||
let sessions = orchestrator
|
||||
.spawn_run(RunParams {
|
||||
run_id: run_id.clone(),
|
||||
run_root: Some(run_path.clone()),
|
||||
solver: RoleConfig::new("solver", solver_config),
|
||||
director: RoleConfig::new("director", director_config),
|
||||
verifiers: Vec::new(),
|
||||
})
|
||||
.await?;
|
||||
|
||||
sessions.solver.conversation.submit(Op::Shutdown).await.ok();
|
||||
sessions
|
||||
.director
|
||||
.conversation
|
||||
.submit(Op::Shutdown)
|
||||
.await
|
||||
.ok();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn build_config(server: &MockServer) -> anyhow::Result<Config> {
|
||||
let home = TempDir::new()?;
|
||||
let cwd = TempDir::new()?;
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
config.cwd = cwd.path().to_path_buf();
|
||||
let mut provider = built_in_model_providers()["openai"].clone();
|
||||
provider.base_url = Some(format!("{}/v1", server.uri()));
|
||||
config.model_provider = provider;
|
||||
Ok(config)
|
||||
}
|
||||
|
||||
async fn call_role(
|
||||
orchestrator: &InftyOrchestrator,
|
||||
run_id: &str,
|
||||
role: &str,
|
||||
text: &str,
|
||||
timeout: Duration,
|
||||
) -> anyhow::Result<AssistantMessage> {
|
||||
let hub = orchestrator.hub();
|
||||
let handle = hub
|
||||
.post_user_turn(PostUserTurnRequest {
|
||||
target: RoleOrId::RunRole {
|
||||
run_id: run_id.to_string(),
|
||||
role: role.to_string(),
|
||||
},
|
||||
text: text.to_string(),
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await?;
|
||||
let reply = hub.await_first_assistant(&handle, timeout).await?;
|
||||
Ok(reply)
|
||||
}
|
||||
|
||||
async fn relay_assistant_to_role(
|
||||
orchestrator: &InftyOrchestrator,
|
||||
run_id: &str,
|
||||
target_role: &str,
|
||||
assistant: &AssistantMessage,
|
||||
timeout: Duration,
|
||||
) -> anyhow::Result<AssistantMessage> {
|
||||
call_role(
|
||||
orchestrator,
|
||||
run_id,
|
||||
target_role,
|
||||
&assistant.message.message,
|
||||
timeout,
|
||||
)
|
||||
.await
|
||||
}
|
||||
324
codex-rs/codex-infty/tests/schemas.rs
Normal file
324
codex-rs/codex-infty/tests/schemas.rs
Normal file
@@ -0,0 +1,324 @@
|
||||
#![cfg(not(target_os = "windows"))]
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::built_in_model_providers;
|
||||
use codex_core::config::Config;
|
||||
use codex_infty::InftyOrchestrator;
|
||||
use codex_infty::RoleConfig;
|
||||
use codex_infty::RunExecutionOptions;
|
||||
use codex_infty::RunParams;
|
||||
use core_test_support::load_default_config_for_test;
|
||||
use core_test_support::responses;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use tempfile::TempDir;
|
||||
use wiremock::MockServer;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn director_request_includes_output_schema() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
|
||||
// 1) Solver: emit a direction_request so the orchestrator calls Director.
|
||||
let body_solver = responses::sse(vec![
|
||||
responses::ev_response_created("solver-resp-1"),
|
||||
responses::ev_assistant_message(
|
||||
"solver-msg-1",
|
||||
r#"{"type":"direction_request","prompt":"Need directive","claim_path":null,"notes":null,"deliverable_path":null,"summary":null}"#,
|
||||
),
|
||||
responses::ev_completed("solver-resp-1"),
|
||||
]);
|
||||
let _mock_solver = responses::mount_sse_once(&server, body_solver).await;
|
||||
|
||||
// 2) Director: reply with a directive JSON.
|
||||
let body_director = responses::sse(vec![
|
||||
responses::ev_response_created("director-resp-1"),
|
||||
responses::ev_assistant_message(
|
||||
"director-msg-1",
|
||||
r#"{"directive":"Proceed","rationale":"Follow the plan"}"#,
|
||||
),
|
||||
responses::ev_completed("director-resp-1"),
|
||||
]);
|
||||
let mock_director = responses::mount_sse_once(&server, body_director).await;
|
||||
|
||||
// 3) After relaying directive back to Solver, we do not need to continue the run.
|
||||
// Provide a short empty solver completion body to avoid hanging HTTP calls.
|
||||
let body_solver_after = responses::sse(vec![
|
||||
responses::ev_response_created("solver-resp-2"),
|
||||
responses::ev_completed("solver-resp-2"),
|
||||
]);
|
||||
let _mock_solver_after = responses::mount_sse_once(&server, body_solver_after).await;
|
||||
|
||||
let runs_root = TempDir::new()?;
|
||||
let orchestrator =
|
||||
InftyOrchestrator::with_runs_root(CodexAuth::from_api_key("dummy-key"), runs_root.path());
|
||||
let run_id = "run-director-schema".to_string();
|
||||
|
||||
let solver_config = build_config(&server).await?;
|
||||
let director_config = build_config(&server).await?;
|
||||
|
||||
let params = RunParams {
|
||||
run_id: run_id.clone(),
|
||||
run_root: Some(runs_root.path().join("runs").join(&run_id)),
|
||||
solver: RoleConfig::new("solver", solver_config),
|
||||
director: RoleConfig::new("director", director_config),
|
||||
verifiers: Vec::new(),
|
||||
};
|
||||
|
||||
let options = RunExecutionOptions {
|
||||
objective: Some("Kick off".to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
// Drive the run in the background; we'll assert the request shape then cancel.
|
||||
let fut = tokio::spawn(async move {
|
||||
let _ = orchestrator.execute_new_run(params, options).await;
|
||||
});
|
||||
|
||||
// Wait until the Director request is captured.
|
||||
wait_for_requests(&mock_director, 1, Duration::from_secs(2)).await;
|
||||
let req = mock_director.single_request();
|
||||
let body = req.body_json();
|
||||
|
||||
// Assert that a JSON schema was sent under text.format.
|
||||
let text = &body["text"]; // Optional; present when using schemas
|
||||
assert!(text.is_object(), "missing text controls in request body");
|
||||
let fmt = &text["format"];
|
||||
assert!(fmt.is_object(), "missing text.format in request body");
|
||||
assert_eq!(fmt["type"], "json_schema");
|
||||
let schema = &fmt["schema"];
|
||||
assert!(schema.is_object(), "missing text.format.schema");
|
||||
assert_eq!(schema["type"], "object");
|
||||
// Ensure the directive property exists and is a string.
|
||||
assert_eq!(schema["properties"]["directive"]["type"], "string");
|
||||
// Enforce strictness: required must include all properties.
|
||||
let required = schema["required"]
|
||||
.as_array()
|
||||
.expect("required must be array");
|
||||
let props = schema["properties"]
|
||||
.as_object()
|
||||
.expect("properties must be object");
|
||||
for key in props.keys() {
|
||||
assert!(
|
||||
required.iter().any(|v| v == key),
|
||||
"missing {key} in required"
|
||||
);
|
||||
}
|
||||
// Ensure the objective text appears in the serialized request body
|
||||
let raw = serde_json::to_string(&body).expect("serialize body");
|
||||
assert!(
|
||||
raw.contains("Kick off"),
|
||||
"objective missing from director request body"
|
||||
);
|
||||
|
||||
// Stop the background task to end the test.
|
||||
fut.abort();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn final_delivery_request_includes_output_schema() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
|
||||
// 1) Solver: emit empty message so orchestrator asks for final_delivery via schema.
|
||||
let body_solver = responses::sse(vec![
|
||||
responses::ev_response_created("solver-resp-1"),
|
||||
// No signal -> orchestrator will prompt with final_output schema.
|
||||
responses::ev_completed("solver-resp-1"),
|
||||
]);
|
||||
let _mock_solver = responses::mount_sse_once(&server, body_solver).await;
|
||||
|
||||
// 2) Capture the schema-bearing request to Solver.
|
||||
let body_solver_prompt = responses::sse(vec![
|
||||
responses::ev_response_created("solver-resp-2"),
|
||||
responses::ev_assistant_message(
|
||||
"solver-msg-2",
|
||||
r#"{"type":"final_delivery","deliverable_path":"deliverable/summary.txt","summary":null}"#,
|
||||
),
|
||||
responses::ev_completed("solver-resp-2"),
|
||||
]);
|
||||
let mock_solver_prompt = responses::mount_sse_once(&server, body_solver_prompt).await;
|
||||
|
||||
// 3) Keep any follow-up quiet.
|
||||
let body_solver_done = responses::sse(vec![
|
||||
responses::ev_response_created("solver-resp-3"),
|
||||
responses::ev_completed("solver-resp-3"),
|
||||
]);
|
||||
let _mock_solver_done = responses::mount_sse_once(&server, body_solver_done).await;
|
||||
|
||||
let runs_root = TempDir::new()?;
|
||||
let orchestrator =
|
||||
InftyOrchestrator::with_runs_root(CodexAuth::from_api_key("dummy-key"), runs_root.path());
|
||||
let run_id = "run-final-schema".to_string();
|
||||
|
||||
let solver_config = build_config(&server).await?;
|
||||
let director_config = build_config(&server).await?;
|
||||
|
||||
let params = RunParams {
|
||||
run_id: run_id.clone(),
|
||||
run_root: Some(runs_root.path().join("runs").join(&run_id)),
|
||||
solver: RoleConfig::new("solver", solver_config),
|
||||
director: RoleConfig::new("director", director_config),
|
||||
verifiers: Vec::new(),
|
||||
};
|
||||
|
||||
let options = RunExecutionOptions {
|
||||
objective: Some("Kick off".to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let fut = tokio::spawn(async move {
|
||||
let _ = orchestrator.execute_new_run(params, options).await;
|
||||
});
|
||||
|
||||
wait_for_requests(&mock_solver_prompt, 1, Duration::from_secs(2)).await;
|
||||
let req = mock_solver_prompt.single_request();
|
||||
let body = req.body_json();
|
||||
let text = &body["text"];
|
||||
assert!(text.is_object(), "missing text controls in request body");
|
||||
let fmt = &text["format"];
|
||||
assert!(fmt.is_object(), "missing text.format in request body");
|
||||
assert_eq!(fmt["type"], "json_schema");
|
||||
let schema = &fmt["schema"];
|
||||
assert!(schema.is_object(), "missing text.format.schema");
|
||||
let required = schema["required"]
|
||||
.as_array()
|
||||
.expect("required must be array");
|
||||
let props = schema["properties"]
|
||||
.as_object()
|
||||
.expect("properties must be object");
|
||||
for key in props.keys() {
|
||||
assert!(
|
||||
required.iter().any(|v| v == key),
|
||||
"missing {key} in required"
|
||||
);
|
||||
}
|
||||
|
||||
fut.abort();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn verifier_request_includes_output_schema() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
|
||||
// 1) Solver: issue a final_delivery which triggers verifier requests.
|
||||
let body_solver = responses::sse(vec![
|
||||
responses::ev_response_created("solver-resp-1"),
|
||||
responses::ev_assistant_message(
|
||||
"solver-msg-1",
|
||||
r#"{"type":"final_delivery","deliverable_path":"deliverable/summary.txt","summary":null}"#,
|
||||
),
|
||||
responses::ev_completed("solver-resp-1"),
|
||||
]);
|
||||
let _mock_solver = responses::mount_sse_once(&server, body_solver).await;
|
||||
|
||||
// 2) Verifier: reply with a verdict JSON.
|
||||
let body_verifier = responses::sse(vec![
|
||||
responses::ev_response_created("verifier-resp-1"),
|
||||
responses::ev_assistant_message(
|
||||
"verifier-msg-1",
|
||||
r#"{"verdict":"pass","reasons":[],"suggestions":[]}"#,
|
||||
),
|
||||
responses::ev_completed("verifier-resp-1"),
|
||||
]);
|
||||
let mock_verifier = responses::mount_sse_once(&server, body_verifier).await;
|
||||
|
||||
// 3) After posting the summary back to Solver, let the request complete.
|
||||
let body_solver_after = responses::sse(vec![
|
||||
responses::ev_response_created("solver-resp-2"),
|
||||
responses::ev_completed("solver-resp-2"),
|
||||
]);
|
||||
let _mock_solver_after = responses::mount_sse_once(&server, body_solver_after).await;
|
||||
|
||||
let runs_root = TempDir::new()?;
|
||||
let orchestrator =
|
||||
InftyOrchestrator::with_runs_root(CodexAuth::from_api_key("dummy-key"), runs_root.path());
|
||||
let run_id = "run-verifier-schema".to_string();
|
||||
|
||||
let solver_config = build_config(&server).await?;
|
||||
let director_config = build_config(&server).await?;
|
||||
let verifier_config = build_config(&server).await?;
|
||||
|
||||
let params = RunParams {
|
||||
run_id: run_id.clone(),
|
||||
run_root: Some(runs_root.path().join("runs").join(&run_id)),
|
||||
solver: RoleConfig::new("solver", solver_config),
|
||||
director: RoleConfig::new("director", director_config),
|
||||
verifiers: vec![RoleConfig::new("verifier", verifier_config)],
|
||||
};
|
||||
|
||||
let options = RunExecutionOptions {
|
||||
objective: Some("Kick off".to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let fut = tokio::spawn(async move {
|
||||
let _ = orchestrator.execute_new_run(params, options).await;
|
||||
});
|
||||
|
||||
// Wait until the Verifier request is captured.
|
||||
wait_for_requests(&mock_verifier, 1, Duration::from_secs(2)).await;
|
||||
let req = mock_verifier.single_request();
|
||||
let body = req.body_json();
|
||||
|
||||
// Assert that a JSON schema was sent under text.format.
|
||||
let text = &body["text"]; // Optional; present when using schemas
|
||||
assert!(text.is_object(), "missing text controls in request body");
|
||||
let fmt = &text["format"];
|
||||
assert!(fmt.is_object(), "missing text.format in request body");
|
||||
assert_eq!(fmt["type"], "json_schema");
|
||||
let schema = &fmt["schema"];
|
||||
assert!(schema.is_object(), "missing text.format.schema");
|
||||
assert_eq!(schema["type"], "object");
|
||||
// Ensure the verdict property exists and is an enum of pass/fail.
|
||||
assert!(schema["properties"]["verdict"].is_object());
|
||||
// Enforce strictness: required must include all properties.
|
||||
let required = schema["required"]
|
||||
.as_array()
|
||||
.expect("required must be array");
|
||||
let props = schema["properties"]
|
||||
.as_object()
|
||||
.expect("properties must be object");
|
||||
for key in props.keys() {
|
||||
assert!(
|
||||
required.iter().any(|v| v == key),
|
||||
"missing {key} in required"
|
||||
);
|
||||
}
|
||||
|
||||
fut.abort();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn build_config(server: &MockServer) -> anyhow::Result<Config> {
|
||||
let home = TempDir::new()?;
|
||||
let cwd = TempDir::new()?;
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
config.cwd = cwd.path().to_path_buf();
|
||||
let mut provider = built_in_model_providers()["openai"].clone();
|
||||
provider.base_url = Some(format!("{}/v1", server.uri()));
|
||||
config.model_provider = provider;
|
||||
Ok(config)
|
||||
}
|
||||
|
||||
async fn wait_for_requests(mock: &responses::ResponseMock, min: usize, timeout: Duration) {
|
||||
use tokio::time::Instant;
|
||||
use tokio::time::sleep;
|
||||
let start = Instant::now();
|
||||
loop {
|
||||
if mock.requests().len() >= min {
|
||||
return;
|
||||
}
|
||||
if start.elapsed() > timeout {
|
||||
return;
|
||||
}
|
||||
sleep(Duration::from_millis(25)).await;
|
||||
}
|
||||
}
|
||||
98
codex-rs/codex-infty/tests/timeouts.rs
Normal file
98
codex-rs/codex-infty/tests/timeouts.rs
Normal file
@@ -0,0 +1,98 @@
|
||||
#![cfg(not(target_os = "windows"))]
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::built_in_model_providers;
|
||||
use codex_core::config::Config;
|
||||
use codex_infty::InftyOrchestrator;
|
||||
use codex_infty::RoleConfig;
|
||||
use codex_infty::RunExecutionOptions;
|
||||
use codex_infty::RunParams;
|
||||
use core_test_support::load_default_config_for_test;
|
||||
use core_test_support::responses;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use tempfile::TempDir;
|
||||
use wiremock::MockServer;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn direction_request_times_out_when_director_is_silent() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
|
||||
// Solver emits a direction_request.
|
||||
let body_solver = responses::sse(vec![
|
||||
responses::ev_response_created("solver-resp-1"),
|
||||
responses::ev_assistant_message(
|
||||
"solver-msg-1",
|
||||
r#"{"type":"direction_request","prompt":"Need directive","claim_path":null,"notes":null,"deliverable_path":null,"summary":null}"#,
|
||||
),
|
||||
responses::ev_completed("solver-resp-1"),
|
||||
]);
|
||||
let _mock_solver = responses::mount_sse_once(&server, body_solver).await;
|
||||
|
||||
// Director remains silent (no assistant message); the model completes immediately.
|
||||
let body_director_silent = responses::sse(vec![
|
||||
responses::ev_response_created("director-resp-1"),
|
||||
// intentionally no message
|
||||
responses::ev_completed("director-resp-1"),
|
||||
]);
|
||||
let _mock_director = responses::mount_sse_once(&server, body_director_silent).await;
|
||||
|
||||
// After attempting to relay a directive back to the solver, orchestrator won't proceed
|
||||
// as we will time out waiting for the director; however, the solver will still receive
|
||||
// a follow-up post later in the flow, so we pre-mount an empty completion to satisfy it
|
||||
// if the code ever reaches that point in future changes.
|
||||
let body_solver_after = responses::sse(vec![
|
||||
responses::ev_response_created("solver-resp-2"),
|
||||
responses::ev_completed("solver-resp-2"),
|
||||
]);
|
||||
let _mock_solver_after = responses::mount_sse_once(&server, body_solver_after).await;
|
||||
|
||||
let runs_root = TempDir::new()?;
|
||||
let orchestrator =
|
||||
InftyOrchestrator::with_runs_root(CodexAuth::from_api_key("dummy-key"), runs_root.path());
|
||||
let run_id = "run-director-timeout".to_string();
|
||||
|
||||
let solver_config = build_config(&server).await?;
|
||||
let director_config = build_config(&server).await?;
|
||||
|
||||
let params = RunParams {
|
||||
run_id: run_id.clone(),
|
||||
run_root: Some(runs_root.path().join("runs").join(&run_id)),
|
||||
solver: RoleConfig::new("solver", solver_config),
|
||||
director: RoleConfig::new("director", director_config),
|
||||
verifiers: Vec::new(),
|
||||
};
|
||||
|
||||
let options = RunExecutionOptions {
|
||||
objective: Some("Kick off".to_string()),
|
||||
director_timeout: Duration::from_millis(50),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let err = orchestrator
|
||||
.execute_new_run(params, options)
|
||||
.await
|
||||
.err()
|
||||
.expect("expected timeout error");
|
||||
let msg = format!("{err:#}");
|
||||
assert!(
|
||||
msg.contains("timed out waiting") || msg.contains("AwaitTimeout"),
|
||||
"unexpected error: {msg}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn build_config(server: &MockServer) -> anyhow::Result<Config> {
|
||||
let home = TempDir::new()?;
|
||||
let cwd = TempDir::new()?;
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
config.cwd = cwd.path().to_path_buf();
|
||||
let mut provider = built_in_model_providers()["openai"].clone();
|
||||
provider.base_url = Some(format!("{}/v1", server.uri()));
|
||||
config.model_provider = provider;
|
||||
Ok(config)
|
||||
}
|
||||
157
codex-rs/codex-infty/tests/verifier_replacement.rs
Normal file
157
codex-rs/codex-infty/tests/verifier_replacement.rs
Normal file
@@ -0,0 +1,157 @@
|
||||
#![cfg(not(target_os = "windows"))]
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::built_in_model_providers;
|
||||
use codex_core::config::Config;
|
||||
use codex_infty::InftyOrchestrator;
|
||||
use codex_infty::RoleConfig;
|
||||
use codex_infty::RunExecutionOptions;
|
||||
use codex_infty::RunParams;
|
||||
use core_test_support::load_default_config_for_test;
|
||||
use core_test_support::responses;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use tempfile::TempDir;
|
||||
use wiremock::MockServer;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn replaces_passing_verifiers_and_keeps_failing() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
|
||||
// Round 1: alpha passes, beta fails
|
||||
let body_verifier_alpha_r1 = responses::sse(vec![
|
||||
responses::ev_response_created("verifier-alpha-r1"),
|
||||
responses::ev_assistant_message(
|
||||
"verifier-alpha-msg-r1",
|
||||
r#"{"verdict":"pass","reasons":[],"suggestions":[]}"#,
|
||||
),
|
||||
responses::ev_completed("verifier-alpha-r1"),
|
||||
]);
|
||||
let body_verifier_beta_r1 = responses::sse(vec![
|
||||
responses::ev_response_created("verifier-beta-r1"),
|
||||
responses::ev_assistant_message(
|
||||
"verifier-beta-msg-r1",
|
||||
r#"{"verdict":"fail","reasons":["missing"],"suggestions":[]}"#,
|
||||
),
|
||||
responses::ev_completed("verifier-beta-r1"),
|
||||
]);
|
||||
|
||||
// Round 2: both pass
|
||||
let body_verifier_alpha_r2 = responses::sse(vec![
|
||||
responses::ev_response_created("verifier-alpha-r2"),
|
||||
responses::ev_assistant_message(
|
||||
"verifier-alpha-msg-r2",
|
||||
r#"{"verdict":"pass","reasons":[],"suggestions":[]}"#,
|
||||
),
|
||||
responses::ev_completed("verifier-alpha-r2"),
|
||||
]);
|
||||
let body_verifier_beta_r2 = responses::sse(vec![
|
||||
responses::ev_response_created("verifier-beta-r2"),
|
||||
responses::ev_assistant_message(
|
||||
"verifier-beta-msg-r2",
|
||||
r#"{"verdict":"pass","reasons":[],"suggestions":[]}"#,
|
||||
),
|
||||
responses::ev_completed("verifier-beta-r2"),
|
||||
]);
|
||||
|
||||
// Mount verifier SSE bodies in the exact order collect_verification_summary posts to verifiers.
|
||||
// The implementation posts sequentially in the order of sessions.verifiers.
|
||||
let _m1 = responses::mount_sse_once(&server, body_verifier_alpha_r1).await;
|
||||
let _m2 = responses::mount_sse_once(&server, body_verifier_beta_r1).await;
|
||||
let _m3 = responses::mount_sse_once(&server, body_verifier_alpha_r2).await;
|
||||
let _m4 = responses::mount_sse_once(&server, body_verifier_beta_r2).await;
|
||||
|
||||
let runs_root = TempDir::new()?;
|
||||
let orchestrator =
|
||||
InftyOrchestrator::with_runs_root(CodexAuth::from_api_key("dummy-key"), runs_root.path());
|
||||
let run_id = "run-verifier-replacement".to_string();
|
||||
|
||||
let solver_config = build_config(&server).await?;
|
||||
let director_config = build_config(&server).await?;
|
||||
let verifier_config = build_config(&server).await?;
|
||||
|
||||
// Spawn run with two verifiers in known order.
|
||||
let mut sessions = orchestrator
|
||||
.spawn_run(RunParams {
|
||||
run_id: run_id.clone(),
|
||||
run_root: Some(runs_root.path().join("runs").join(&run_id)),
|
||||
solver: RoleConfig::new("solver", solver_config),
|
||||
director: RoleConfig::new("director", director_config),
|
||||
verifiers: vec![
|
||||
RoleConfig::new("verifier-alpha", verifier_config.clone()),
|
||||
RoleConfig::new("verifier-beta", verifier_config),
|
||||
],
|
||||
})
|
||||
.await?;
|
||||
|
||||
let alpha_initial = sessions
|
||||
.store
|
||||
.role_metadata("verifier-alpha")
|
||||
.and_then(|m| m.rollout_path.clone())
|
||||
.expect("alpha initial rollout path");
|
||||
let beta_initial = sessions
|
||||
.store
|
||||
.role_metadata("verifier-beta")
|
||||
.and_then(|m| m.rollout_path.clone())
|
||||
.expect("beta initial rollout path");
|
||||
|
||||
let options = RunExecutionOptions {
|
||||
verifier_timeout: Duration::from_secs(2),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
// Round 1: alpha pass (should be replaced), beta fail (should be kept)
|
||||
let _summary1 = orchestrator
|
||||
.verify_round_for_test(&mut sessions, "memory/claims/c1.json", &options)
|
||||
.await?;
|
||||
|
||||
let alpha_after_r1 = sessions
|
||||
.store
|
||||
.role_metadata("verifier-alpha")
|
||||
.and_then(|m| m.rollout_path.clone())
|
||||
.expect("alpha rollout after r1");
|
||||
let beta_after_r1 = sessions
|
||||
.store
|
||||
.role_metadata("verifier-beta")
|
||||
.and_then(|m| m.rollout_path.clone())
|
||||
.expect("beta rollout after r1");
|
||||
|
||||
assert_ne!(
|
||||
alpha_initial, alpha_after_r1,
|
||||
"alpha should be replaced after pass"
|
||||
);
|
||||
assert_eq!(
|
||||
beta_initial, beta_after_r1,
|
||||
"beta should be kept after fail"
|
||||
);
|
||||
|
||||
// Round 2: both pass; beta should be replaced now.
|
||||
let _summary2 = orchestrator
|
||||
.verify_round_for_test(&mut sessions, "memory/claims/c2.json", &options)
|
||||
.await?;
|
||||
let beta_after_r2 = sessions
|
||||
.store
|
||||
.role_metadata("verifier-beta")
|
||||
.and_then(|m| m.rollout_path.clone())
|
||||
.expect("beta rollout after r2");
|
||||
assert_ne!(
|
||||
beta_initial, beta_after_r2,
|
||||
"beta should be replaced after pass in r2"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn build_config(server: &MockServer) -> anyhow::Result<Config> {
|
||||
let home = TempDir::new()?;
|
||||
let cwd = TempDir::new()?;
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
config.cwd = cwd.path().to_path_buf();
|
||||
let mut provider = built_in_model_providers()["openai"].clone();
|
||||
provider.base_url = Some(format!("{}/v1", server.uri()));
|
||||
config.model_provider = provider;
|
||||
Ok(config)
|
||||
}
|
||||
@@ -62,6 +62,7 @@ tokio = { workspace = true, features = [
|
||||
"signal",
|
||||
] }
|
||||
tokio-util = { workspace = true, features = ["rt"] }
|
||||
tokio-stream = { workspace = true, features = ["sync"] }
|
||||
toml = { workspace = true }
|
||||
toml_edit = { workspace = true }
|
||||
tracing = { workspace = true, features = ["log"] }
|
||||
|
||||
@@ -1034,11 +1034,16 @@ impl Session {
|
||||
}
|
||||
|
||||
async fn notify_stream_error(&self, sub_id: &str, message: impl Into<String>) {
|
||||
let message = message.into();
|
||||
warn!(
|
||||
conversation_id = %self.conversation_id,
|
||||
sub_id = %sub_id,
|
||||
%message,
|
||||
"stream error while streaming model response",
|
||||
);
|
||||
let event = Event {
|
||||
id: sub_id.to_string(),
|
||||
msg: EventMsg::StreamError(StreamErrorEvent {
|
||||
message: message.into(),
|
||||
}),
|
||||
msg: EventMsg::StreamError(StreamErrorEvent { message }),
|
||||
};
|
||||
self.send_event(event).await;
|
||||
}
|
||||
|
||||
@@ -7,10 +7,16 @@ use crate::codex::compact::content_items_to_text;
|
||||
use crate::codex::compact::is_session_prefix_message;
|
||||
use crate::codex_conversation::CodexConversation;
|
||||
use crate::config::Config;
|
||||
use crate::cross_session::CrossSessionError;
|
||||
use crate::cross_session::CrossSessionHub;
|
||||
use crate::cross_session::RegisteredSession;
|
||||
use crate::cross_session::SessionDefaults;
|
||||
use crate::cross_session::SessionRegistration;
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::Result as CodexResult;
|
||||
use crate::protocol::Event;
|
||||
use crate::protocol::EventMsg;
|
||||
use crate::protocol::Op;
|
||||
use crate::protocol::SessionConfiguredEvent;
|
||||
use crate::rollout::RolloutRecorder;
|
||||
use codex_protocol::ConversationId;
|
||||
@@ -22,6 +28,7 @@ use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
use tracing::warn;
|
||||
|
||||
/// Represents a newly created Codex conversation, including the first event
|
||||
/// (which is [`EventMsg::SessionConfigured`]).
|
||||
@@ -31,10 +38,17 @@ pub struct NewConversation {
|
||||
pub session_configured: SessionConfiguredEvent,
|
||||
}
|
||||
|
||||
pub struct CrossSessionSpawnParams {
|
||||
pub hub: Arc<CrossSessionHub>,
|
||||
pub run_id: Option<String>,
|
||||
pub role: Option<String>,
|
||||
}
|
||||
|
||||
/// [`ConversationManager`] is responsible for creating conversations and
|
||||
/// maintaining them in memory.
|
||||
pub struct ConversationManager {
|
||||
conversations: Arc<RwLock<HashMap<ConversationId, Arc<CodexConversation>>>>,
|
||||
cross_session_registrations: Arc<RwLock<HashMap<ConversationId, RegisteredSession>>>,
|
||||
auth_manager: Arc<AuthManager>,
|
||||
session_source: SessionSource,
|
||||
}
|
||||
@@ -43,6 +57,7 @@ impl ConversationManager {
|
||||
pub fn new(auth_manager: Arc<AuthManager>, session_source: SessionSource) -> Self {
|
||||
Self {
|
||||
conversations: Arc::new(RwLock::new(HashMap::new())),
|
||||
cross_session_registrations: Arc::new(RwLock::new(HashMap::new())),
|
||||
auth_manager,
|
||||
session_source,
|
||||
}
|
||||
@@ -58,26 +73,104 @@ impl ConversationManager {
|
||||
}
|
||||
|
||||
pub async fn new_conversation(&self, config: Config) -> CodexResult<NewConversation> {
|
||||
self.spawn_conversation(config, self.auth_manager.clone())
|
||||
.await
|
||||
self.spawn_conversation_with_history(
|
||||
config,
|
||||
self.auth_manager.clone(),
|
||||
InitialHistory::New,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn spawn_conversation(
|
||||
pub async fn new_conversation_with_cross_session(
|
||||
&self,
|
||||
config: Config,
|
||||
params: CrossSessionSpawnParams,
|
||||
) -> CodexResult<NewConversation> {
|
||||
self.spawn_conversation_with_history(
|
||||
config,
|
||||
self.auth_manager.clone(),
|
||||
InitialHistory::New,
|
||||
Some(params),
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn spawn_conversation_with_history(
|
||||
&self,
|
||||
config: Config,
|
||||
auth_manager: Arc<AuthManager>,
|
||||
initial_history: InitialHistory,
|
||||
cross_session: Option<CrossSessionSpawnParams>,
|
||||
) -> CodexResult<NewConversation> {
|
||||
let cross_session =
|
||||
cross_session.map(|params| (SessionDefaults::from_config(&config), params));
|
||||
|
||||
let CodexSpawnOk {
|
||||
codex,
|
||||
conversation_id,
|
||||
} = Codex::spawn(
|
||||
config,
|
||||
auth_manager,
|
||||
InitialHistory::New,
|
||||
self.session_source,
|
||||
)
|
||||
.await?;
|
||||
self.finalize_spawn(codex, conversation_id).await
|
||||
} = Codex::spawn(config, auth_manager, initial_history, self.session_source).await?;
|
||||
|
||||
let new_conversation = self.finalize_spawn(codex, conversation_id).await?;
|
||||
|
||||
if let Some((defaults, params)) = cross_session
|
||||
&& let Err(err) = self
|
||||
.register_cross_session(
|
||||
conversation_id,
|
||||
defaults,
|
||||
params,
|
||||
Arc::clone(&new_conversation.conversation),
|
||||
)
|
||||
.await
|
||||
{
|
||||
self.abort_conversation(conversation_id, Arc::clone(&new_conversation.conversation))
|
||||
.await;
|
||||
return Err(CodexErr::Fatal(format!(
|
||||
"failed to register cross-session for conversation {conversation_id}: {err}"
|
||||
)));
|
||||
}
|
||||
|
||||
Ok(new_conversation)
|
||||
}
|
||||
|
||||
async fn register_cross_session(
|
||||
&self,
|
||||
conversation_id: ConversationId,
|
||||
defaults: SessionDefaults,
|
||||
params: CrossSessionSpawnParams,
|
||||
conversation: Arc<CodexConversation>,
|
||||
) -> Result<(), CrossSessionError> {
|
||||
let CrossSessionSpawnParams { hub, run_id, role } = params;
|
||||
|
||||
let registration = SessionRegistration {
|
||||
conversation_id,
|
||||
conversation,
|
||||
defaults,
|
||||
run_id,
|
||||
role,
|
||||
};
|
||||
|
||||
let guard = hub.register_session(registration)?;
|
||||
self.cross_session_registrations
|
||||
.write()
|
||||
.await
|
||||
.insert(conversation_id, guard);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn abort_conversation(
|
||||
&self,
|
||||
conversation_id: ConversationId,
|
||||
conversation: Arc<CodexConversation>,
|
||||
) {
|
||||
let _ = self.remove_conversation(&conversation_id).await;
|
||||
if let Err(err) = conversation.submit(Op::Shutdown).await {
|
||||
warn!(
|
||||
%conversation_id,
|
||||
?err,
|
||||
"failed to shutdown conversation after cross-session registration error"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
async fn finalize_spawn(
|
||||
@@ -130,11 +223,35 @@ impl ConversationManager {
|
||||
auth_manager: Arc<AuthManager>,
|
||||
) -> CodexResult<NewConversation> {
|
||||
let initial_history = RolloutRecorder::get_rollout_history(&rollout_path).await?;
|
||||
let CodexSpawnOk {
|
||||
codex,
|
||||
conversation_id,
|
||||
} = Codex::spawn(config, auth_manager, initial_history, self.session_source).await?;
|
||||
self.finalize_spawn(codex, conversation_id).await
|
||||
self.spawn_conversation_with_history(config, auth_manager, initial_history, None)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn resume_conversation_from_rollout_with_cross_session(
|
||||
&self,
|
||||
config: Config,
|
||||
rollout_path: PathBuf,
|
||||
auth_manager: Arc<AuthManager>,
|
||||
params: CrossSessionSpawnParams,
|
||||
) -> CodexResult<NewConversation> {
|
||||
let initial_history = RolloutRecorder::get_rollout_history(&rollout_path).await?;
|
||||
self.spawn_conversation_with_history(config, auth_manager, initial_history, Some(params))
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn resume_conversation_with_cross_session(
|
||||
&self,
|
||||
config: Config,
|
||||
rollout_path: PathBuf,
|
||||
params: CrossSessionSpawnParams,
|
||||
) -> CodexResult<NewConversation> {
|
||||
self.resume_conversation_from_rollout_with_cross_session(
|
||||
config,
|
||||
rollout_path,
|
||||
self.auth_manager.clone(),
|
||||
params,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Removes the conversation from the manager's internal map, though the
|
||||
@@ -145,6 +262,10 @@ impl ConversationManager {
|
||||
&self,
|
||||
conversation_id: &ConversationId,
|
||||
) -> Option<Arc<CodexConversation>> {
|
||||
self.cross_session_registrations
|
||||
.write()
|
||||
.await
|
||||
.remove(conversation_id);
|
||||
self.conversations.write().await.remove(conversation_id)
|
||||
}
|
||||
|
||||
@@ -164,12 +285,23 @@ impl ConversationManager {
|
||||
|
||||
// Spawn a new conversation with the computed initial history.
|
||||
let auth_manager = self.auth_manager.clone();
|
||||
let CodexSpawnOk {
|
||||
codex,
|
||||
conversation_id,
|
||||
} = Codex::spawn(config, auth_manager, history, self.session_source).await?;
|
||||
self.spawn_conversation_with_history(config, auth_manager, history, None)
|
||||
.await
|
||||
}
|
||||
|
||||
self.finalize_spawn(codex, conversation_id).await
|
||||
pub async fn fork_conversation_with_cross_session(
|
||||
&self,
|
||||
nth_user_message: usize,
|
||||
config: Config,
|
||||
path: PathBuf,
|
||||
params: CrossSessionSpawnParams,
|
||||
) -> CodexResult<NewConversation> {
|
||||
let history = RolloutRecorder::get_rollout_history(&path).await?;
|
||||
let history = truncate_before_nth_user_message(history, nth_user_message);
|
||||
|
||||
let auth_manager = self.auth_manager.clone();
|
||||
self.spawn_conversation_with_history(config, auth_manager, history, Some(params))
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
607
codex-rs/core/src/cross_session.rs
Normal file
607
codex-rs/core/src/cross_session.rs
Normal file
@@ -0,0 +1,607 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fmt;
|
||||
use std::path::PathBuf;
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
use std::sync::Mutex as StdMutex;
|
||||
use std::sync::RwLock;
|
||||
use std::sync::RwLockReadGuard;
|
||||
use std::sync::RwLockWriteGuard;
|
||||
use std::time::Duration;
|
||||
|
||||
use futures::Stream;
|
||||
use serde_json::Value;
|
||||
use tokio::sync::Mutex as TokioMutex;
|
||||
use tokio::sync::broadcast;
|
||||
use tokio::sync::oneshot;
|
||||
use tokio::time;
|
||||
use tokio_stream::wrappers::BroadcastStream;
|
||||
use tokio_stream::wrappers::errors::BroadcastStreamRecvError;
|
||||
use tracing::debug;
|
||||
use tracing::error;
|
||||
|
||||
use crate::codex_conversation::CodexConversation;
|
||||
use crate::config::Config;
|
||||
use crate::error::CodexErr;
|
||||
use crate::protocol::AgentMessageEvent;
|
||||
use crate::protocol::AskForApproval;
|
||||
use crate::protocol::Event;
|
||||
use crate::protocol::EventMsg;
|
||||
use crate::protocol::InputItem;
|
||||
use crate::protocol::Op;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use crate::protocol_config_types::ReasoningEffort as ReasoningEffortConfig;
|
||||
use crate::protocol_config_types::ReasoningSummary as ReasoningSummaryConfig;
|
||||
use codex_protocol::ConversationId;
|
||||
|
||||
/// Default capacity for broadcast channels that fan out session events.
|
||||
const EVENT_BUFFER_LEN: usize = 256;
|
||||
|
||||
/// Encapsulates the defaults needed to submit a new `Op::UserTurn`.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SessionDefaults {
|
||||
pub cwd: PathBuf,
|
||||
pub approval_policy: AskForApproval,
|
||||
pub sandbox_policy: SandboxPolicy,
|
||||
pub model: String,
|
||||
pub effort: Option<ReasoningEffortConfig>,
|
||||
pub summary: ReasoningSummaryConfig,
|
||||
}
|
||||
|
||||
impl SessionDefaults {
|
||||
pub fn from_config(config: &Config) -> Self {
|
||||
Self {
|
||||
cwd: config.cwd.clone(),
|
||||
approval_policy: config.approval_policy,
|
||||
sandbox_policy: config.sandbox_policy.clone(),
|
||||
model: config.model.clone(),
|
||||
effort: config.model_reasoning_effort,
|
||||
summary: config.model_reasoning_summary,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Request payload for posting a user turn to a session.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PostUserTurnRequest {
|
||||
pub target: RoleOrId,
|
||||
pub text: String,
|
||||
pub final_output_json_schema: Option<Value>,
|
||||
}
|
||||
|
||||
/// Identifier used when targeting sessions for cross-session routing.
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum RoleOrId {
|
||||
Session(ConversationId),
|
||||
RunRole { run_id: String, role: String },
|
||||
}
|
||||
|
||||
/// Handle returned by [`CrossSessionHub::post_user_turn`].
|
||||
pub struct TurnHandle {
|
||||
conversation_id: ConversationId,
|
||||
submission_id: String,
|
||||
receiver: TokioMutex<Option<oneshot::Receiver<AssistantMessage>>>,
|
||||
}
|
||||
|
||||
impl TurnHandle {
|
||||
pub fn conversation_id(&self) -> ConversationId {
|
||||
self.conversation_id
|
||||
}
|
||||
|
||||
pub fn submission_id(&self) -> &str {
|
||||
&self.submission_id
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for TurnHandle {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("TurnHandle")
|
||||
.field("conversation_id", &self.conversation_id)
|
||||
.field("submission_id", &self.submission_id)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
/// First assistant message emitted for a bridged turn.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct AssistantMessage {
|
||||
pub conversation_id: ConversationId,
|
||||
pub submission_id: String,
|
||||
pub message: AgentMessageEvent,
|
||||
}
|
||||
|
||||
/// Wrapper around a session event tagged with its conversation id.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SessionEvent {
|
||||
pub conversation_id: ConversationId,
|
||||
pub event: Event,
|
||||
}
|
||||
|
||||
/// Stream of [`SessionEvent`] instances for a particular session.
|
||||
pub struct SessionEventStream {
|
||||
inner: BroadcastStream<SessionEvent>,
|
||||
}
|
||||
|
||||
impl SessionEventStream {
|
||||
fn new(receiver: broadcast::Receiver<SessionEvent>) -> Self {
|
||||
Self {
|
||||
inner: BroadcastStream::new(receiver),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for SessionEventStream {
|
||||
type Item = SessionEvent;
|
||||
|
||||
fn poll_next(
|
||||
mut self: Pin<&mut Self>,
|
||||
cx: &mut std::task::Context<'_>,
|
||||
) -> std::task::Poll<Option<Self::Item>> {
|
||||
loop {
|
||||
match Pin::new(&mut self.inner).poll_next(cx) {
|
||||
std::task::Poll::Ready(Some(Ok(event))) => {
|
||||
return std::task::Poll::Ready(Some(event));
|
||||
}
|
||||
std::task::Poll::Ready(Some(Err(BroadcastStreamRecvError::Lagged(_)))) => continue,
|
||||
std::task::Poll::Ready(None) => return std::task::Poll::Ready(None),
|
||||
std::task::Poll::Pending => return std::task::Poll::Pending,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct RoleKey {
|
||||
run_id: Arc<str>,
|
||||
role: Arc<str>,
|
||||
}
|
||||
|
||||
impl RoleKey {
|
||||
fn new(run_id: String, role: String) -> Self {
|
||||
Self {
|
||||
run_id: Arc::<str>::from(run_id),
|
||||
role: Arc::<str>::from(role),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for RoleKey {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.run_id.as_ref() == other.run_id.as_ref() && self.role.as_ref() == other.role.as_ref()
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for RoleKey {}
|
||||
|
||||
impl std::hash::Hash for RoleKey {
|
||||
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
||||
std::hash::Hash::hash(self.run_id.as_ref(), state);
|
||||
std::hash::Hash::hash(self.role.as_ref(), state);
|
||||
}
|
||||
}
|
||||
|
||||
struct SessionEntry {
|
||||
conversation_id: ConversationId,
|
||||
conversation: Arc<CodexConversation>,
|
||||
defaults: SessionDefaults,
|
||||
role_key: Option<RoleKey>,
|
||||
event_tx: broadcast::Sender<SessionEvent>,
|
||||
turn_watchers: TokioMutex<HashMap<String, oneshot::Sender<AssistantMessage>>>,
|
||||
pending_messages: TokioMutex<HashMap<String, AssistantMessage>>,
|
||||
shutdown_tx: StdMutex<Option<oneshot::Sender<()>>>,
|
||||
}
|
||||
|
||||
impl SessionEntry {
|
||||
fn new(
|
||||
conversation_id: ConversationId,
|
||||
conversation: Arc<CodexConversation>,
|
||||
defaults: SessionDefaults,
|
||||
role_key: Option<RoleKey>,
|
||||
event_tx: broadcast::Sender<SessionEvent>,
|
||||
shutdown_tx: oneshot::Sender<()>,
|
||||
) -> Self {
|
||||
Self {
|
||||
conversation_id,
|
||||
conversation,
|
||||
defaults,
|
||||
role_key,
|
||||
event_tx,
|
||||
turn_watchers: TokioMutex::new(HashMap::new()),
|
||||
pending_messages: TokioMutex::new(HashMap::new()),
|
||||
shutdown_tx: StdMutex::new(Some(shutdown_tx)),
|
||||
}
|
||||
}
|
||||
|
||||
async fn register_waiter(
|
||||
&self,
|
||||
submission_id: String,
|
||||
sender: oneshot::Sender<AssistantMessage>,
|
||||
) {
|
||||
{
|
||||
let mut watchers = self.turn_watchers.lock().await;
|
||||
if let Some(message) = {
|
||||
let mut pending = self.pending_messages.lock().await;
|
||||
pending.remove(&submission_id)
|
||||
} {
|
||||
drop(watchers);
|
||||
let _ = sender.send(message);
|
||||
return;
|
||||
}
|
||||
watchers.insert(submission_id, sender);
|
||||
}
|
||||
}
|
||||
|
||||
async fn notify_assistant_message(&self, message: AssistantMessage) {
|
||||
let submission_id = message.submission_id.clone();
|
||||
let sender_opt = {
|
||||
let mut watchers = self.turn_watchers.lock().await;
|
||||
watchers.remove(&submission_id)
|
||||
};
|
||||
|
||||
if let Some(sender) = sender_opt {
|
||||
let _ = sender.send(message);
|
||||
} else {
|
||||
let mut pending = self.pending_messages.lock().await;
|
||||
pending.entry(submission_id).or_insert(message);
|
||||
}
|
||||
}
|
||||
|
||||
fn subscribe(&self) -> broadcast::Receiver<SessionEvent> {
|
||||
self.event_tx.subscribe()
|
||||
}
|
||||
|
||||
fn close(&self) {
|
||||
if let Ok(mut guard) = self.shutdown_tx.lock()
|
||||
&& let Some(tx) = guard.take()
|
||||
{
|
||||
let _ = tx.send(());
|
||||
}
|
||||
}
|
||||
|
||||
fn role_key(&self) -> Option<RoleKey> {
|
||||
self.role_key.clone()
|
||||
}
|
||||
}
|
||||
|
||||
/// Input for registering a session with the hub.
|
||||
pub struct SessionRegistration {
|
||||
pub conversation_id: ConversationId,
|
||||
pub conversation: Arc<CodexConversation>,
|
||||
pub defaults: SessionDefaults,
|
||||
pub run_id: Option<String>,
|
||||
pub role: Option<String>,
|
||||
}
|
||||
|
||||
/// Guard that unregisters the session on drop.
|
||||
pub struct RegisteredSession {
|
||||
inner: Arc<Inner>,
|
||||
conversation_id: ConversationId,
|
||||
}
|
||||
|
||||
impl RegisteredSession {
|
||||
pub fn conversation_id(&self) -> ConversationId {
|
||||
self.conversation_id
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for RegisteredSession {
|
||||
fn drop(&mut self) {
|
||||
self.inner.unregister(self.conversation_id);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct Inner {
|
||||
sessions: RwLock<HashMap<ConversationId, Arc<SessionEntry>>>,
|
||||
roles: RwLock<HashMap<RoleKey, ConversationId>>,
|
||||
}
|
||||
|
||||
impl Inner {
|
||||
fn sessions_read(
|
||||
&self,
|
||||
) -> Result<RwLockReadGuard<'_, HashMap<ConversationId, Arc<SessionEntry>>>, CrossSessionError>
|
||||
{
|
||||
self.sessions
|
||||
.read()
|
||||
.map_err(|_| CrossSessionError::LockPoisoned("sessions"))
|
||||
}
|
||||
|
||||
fn sessions_write(
|
||||
&self,
|
||||
) -> Result<RwLockWriteGuard<'_, HashMap<ConversationId, Arc<SessionEntry>>>, CrossSessionError>
|
||||
{
|
||||
self.sessions
|
||||
.write()
|
||||
.map_err(|_| CrossSessionError::LockPoisoned("sessions"))
|
||||
}
|
||||
|
||||
fn roles_read(
|
||||
&self,
|
||||
) -> Result<RwLockReadGuard<'_, HashMap<RoleKey, ConversationId>>, CrossSessionError> {
|
||||
self.roles
|
||||
.read()
|
||||
.map_err(|_| CrossSessionError::LockPoisoned("roles"))
|
||||
}
|
||||
|
||||
fn roles_write(
|
||||
&self,
|
||||
) -> Result<RwLockWriteGuard<'_, HashMap<RoleKey, ConversationId>>, CrossSessionError> {
|
||||
self.roles
|
||||
.write()
|
||||
.map_err(|_| CrossSessionError::LockPoisoned("roles"))
|
||||
}
|
||||
|
||||
fn insert(&self, entry: Arc<SessionEntry>) -> Result<(), CrossSessionError> {
|
||||
{
|
||||
let mut sessions = self.sessions_write()?;
|
||||
if sessions
|
||||
.insert(entry.conversation_id, entry.clone())
|
||||
.is_some()
|
||||
{
|
||||
return Err(CrossSessionError::SessionAlreadyRegistered(
|
||||
entry.conversation_id,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(role_key) = entry.role_key() {
|
||||
let mut roles = self.roles_write()?;
|
||||
if roles.contains_key(&role_key) {
|
||||
self.sessions_write()?.remove(&entry.conversation_id);
|
||||
return Err(CrossSessionError::RoleAlreadyRegistered {
|
||||
run_id: role_key.run_id.to_string(),
|
||||
role: role_key.role.to_string(),
|
||||
});
|
||||
}
|
||||
roles.insert(role_key, entry.conversation_id);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn unregister(&self, conversation_id: ConversationId) {
|
||||
if let Some(entry) = self.remove_internal(conversation_id) {
|
||||
entry.close();
|
||||
}
|
||||
}
|
||||
|
||||
fn remove_internal(&self, conversation_id: ConversationId) -> Option<Arc<SessionEntry>> {
|
||||
let (entry, role_key) = {
|
||||
let mut sessions = self.sessions.write().ok()?;
|
||||
let entry = sessions.remove(&conversation_id)?;
|
||||
let role_key = entry.role_key();
|
||||
(entry, role_key)
|
||||
};
|
||||
|
||||
if let Some(role_key) = role_key
|
||||
&& let Ok(mut roles) = self.roles.write()
|
||||
{
|
||||
roles.remove(&role_key);
|
||||
}
|
||||
|
||||
Some(entry)
|
||||
}
|
||||
|
||||
fn resolve_session(
|
||||
&self,
|
||||
conversation_id: ConversationId,
|
||||
) -> Result<Arc<SessionEntry>, CrossSessionError> {
|
||||
self.sessions_read()?
|
||||
.get(&conversation_id)
|
||||
.cloned()
|
||||
.ok_or(CrossSessionError::SessionNotFound(conversation_id))
|
||||
}
|
||||
|
||||
fn resolve_target(&self, target: &RoleOrId) -> Result<Arc<SessionEntry>, CrossSessionError> {
|
||||
match target {
|
||||
RoleOrId::Session(id) => self.resolve_session(*id),
|
||||
RoleOrId::RunRole { run_id, role } => {
|
||||
let conversation_id = {
|
||||
let roles = self.roles_read()?;
|
||||
let key = RoleKey::new(run_id.clone(), role.clone());
|
||||
roles
|
||||
.get(&key)
|
||||
.copied()
|
||||
.ok_or_else(|| CrossSessionError::RoleNotFound {
|
||||
run_id: run_id.clone(),
|
||||
role: role.clone(),
|
||||
})?
|
||||
};
|
||||
self.resolve_session(conversation_id)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Cross-session coordination hub.
|
||||
#[derive(Default, Clone)]
|
||||
pub struct CrossSessionHub {
|
||||
inner: Arc<Inner>,
|
||||
}
|
||||
|
||||
impl CrossSessionHub {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
pub fn register_session(
|
||||
&self,
|
||||
registration: SessionRegistration,
|
||||
) -> Result<RegisteredSession, CrossSessionError> {
|
||||
let SessionRegistration {
|
||||
conversation_id,
|
||||
conversation,
|
||||
defaults,
|
||||
run_id,
|
||||
role,
|
||||
} = registration;
|
||||
|
||||
let role_key = match (run_id, role) {
|
||||
(Some(run_id), Some(role)) => Some(RoleKey::new(run_id, role)),
|
||||
(None, None) => None,
|
||||
_ => {
|
||||
return Err(CrossSessionError::IncompleteRoleRegistration);
|
||||
}
|
||||
};
|
||||
|
||||
let (event_tx, _) = broadcast::channel(EVENT_BUFFER_LEN);
|
||||
let (shutdown_tx, shutdown_rx) = oneshot::channel();
|
||||
let entry = Arc::new(SessionEntry::new(
|
||||
conversation_id,
|
||||
Arc::clone(&conversation),
|
||||
defaults,
|
||||
role_key,
|
||||
event_tx,
|
||||
shutdown_tx,
|
||||
));
|
||||
|
||||
self.inner.insert(entry.clone())?;
|
||||
|
||||
self.spawn_event_forwarder(entry, conversation, shutdown_rx);
|
||||
|
||||
Ok(RegisteredSession {
|
||||
inner: Arc::clone(&self.inner),
|
||||
conversation_id,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn post_user_turn(
|
||||
&self,
|
||||
request: PostUserTurnRequest,
|
||||
) -> Result<TurnHandle, CrossSessionError> {
|
||||
let entry = self.inner.resolve_target(&request.target)?;
|
||||
|
||||
let items = vec![InputItem::Text { text: request.text }];
|
||||
|
||||
let defaults = &entry.defaults;
|
||||
let submission_id = entry
|
||||
.conversation
|
||||
.submit(Op::UserTurn {
|
||||
items,
|
||||
cwd: defaults.cwd.clone(),
|
||||
approval_policy: defaults.approval_policy,
|
||||
sandbox_policy: defaults.sandbox_policy.clone(),
|
||||
model: defaults.model.clone(),
|
||||
effort: defaults.effort,
|
||||
summary: defaults.summary,
|
||||
final_output_json_schema: request.final_output_json_schema,
|
||||
})
|
||||
.await
|
||||
.map_err(CrossSessionError::from)?;
|
||||
|
||||
let (tx, rx) = oneshot::channel();
|
||||
|
||||
entry.register_waiter(submission_id.clone(), tx).await;
|
||||
|
||||
Ok(TurnHandle {
|
||||
conversation_id: entry.conversation_id,
|
||||
submission_id,
|
||||
receiver: TokioMutex::new(Some(rx)),
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn await_first_assistant(
|
||||
&self,
|
||||
handle: &TurnHandle,
|
||||
timeout: Duration,
|
||||
) -> Result<AssistantMessage, CrossSessionError> {
|
||||
let receiver = {
|
||||
let mut guard = handle.receiver.lock().await;
|
||||
guard.take().ok_or(CrossSessionError::TurnHandleConsumed)?
|
||||
};
|
||||
|
||||
match time::timeout(timeout, receiver).await {
|
||||
Ok(Ok(message)) => Ok(message),
|
||||
Ok(Err(_)) => Err(CrossSessionError::SessionClosed),
|
||||
Err(_) => Err(CrossSessionError::AwaitTimeout(timeout)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn stream_events(
|
||||
&self,
|
||||
conversation_id: ConversationId,
|
||||
) -> Result<SessionEventStream, CrossSessionError> {
|
||||
let entry = self.inner.resolve_session(conversation_id)?;
|
||||
Ok(SessionEventStream::new(entry.subscribe()))
|
||||
}
|
||||
|
||||
fn spawn_event_forwarder(
|
||||
&self,
|
||||
entry: Arc<SessionEntry>,
|
||||
conversation: Arc<CodexConversation>,
|
||||
mut shutdown_rx: oneshot::Receiver<()>,
|
||||
) {
|
||||
let conversation_id = entry.conversation_id;
|
||||
let event_tx = entry.event_tx.clone();
|
||||
let inner = Arc::clone(&self.inner);
|
||||
|
||||
tokio::spawn(async move {
|
||||
loop {
|
||||
tokio::select! {
|
||||
_ = &mut shutdown_rx => {
|
||||
debug!("CrossSessionHub received shutdown for session {conversation_id}");
|
||||
break;
|
||||
}
|
||||
event = conversation.next_event() => {
|
||||
match event {
|
||||
Ok(event) => {
|
||||
if let EventMsg::AgentMessage(agent_message) = &event.msg {
|
||||
let message = AssistantMessage {
|
||||
conversation_id,
|
||||
submission_id: event.id.clone(),
|
||||
message: agent_message.clone(),
|
||||
};
|
||||
entry.notify_assistant_message(message).await;
|
||||
}
|
||||
|
||||
if let Err(err) = event_tx.send(SessionEvent {
|
||||
conversation_id,
|
||||
event: event.clone(),
|
||||
}) {
|
||||
debug!(
|
||||
"CrossSessionHub dropped event for session {conversation_id}: {err}"
|
||||
);
|
||||
}
|
||||
|
||||
if matches!(event.msg, EventMsg::ShutdownComplete) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
error!("CrossSessionHub event loop error for session {conversation_id}: {err:#?}");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inner.unregister(conversation_id);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/// Errors surfaced by cross-session orchestration.
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub enum CrossSessionError {
|
||||
#[error("session {0} is already registered with the hub")]
|
||||
SessionAlreadyRegistered(ConversationId),
|
||||
#[error("run {run_id} already has a {role} session registered")]
|
||||
RoleAlreadyRegistered { run_id: String, role: String },
|
||||
#[error("session {0} does not exist")]
|
||||
SessionNotFound(ConversationId),
|
||||
#[error("no session registered for run {run_id} role {role}")]
|
||||
RoleNotFound { run_id: String, role: String },
|
||||
#[error("session role registration must set both run_id and role")]
|
||||
IncompleteRoleRegistration,
|
||||
#[error("turn handle has already been awaited")]
|
||||
TurnHandleConsumed,
|
||||
#[error("session closed before an assistant message was emitted")]
|
||||
SessionClosed,
|
||||
#[error("timed out waiting {0:?} for assistant response")]
|
||||
AwaitTimeout(Duration),
|
||||
#[error("internal lock poisoned: {0}")]
|
||||
LockPoisoned(&'static str),
|
||||
#[error("submit failed: {0}")]
|
||||
SubmitFailed(#[from] CodexErr),
|
||||
}
|
||||
@@ -13,6 +13,7 @@ mod client;
|
||||
mod client_common;
|
||||
pub mod codex;
|
||||
mod codex_conversation;
|
||||
pub mod cross_session;
|
||||
pub mod token_data;
|
||||
pub use codex_conversation::CodexConversation;
|
||||
mod command_safety;
|
||||
@@ -52,6 +53,7 @@ mod event_mapping;
|
||||
pub mod review_format;
|
||||
pub use codex_protocol::protocol::InitialHistory;
|
||||
pub use conversation_manager::ConversationManager;
|
||||
pub use conversation_manager::CrossSessionSpawnParams;
|
||||
pub use conversation_manager::NewConversation;
|
||||
// Re-export common auth types for workspace consumers
|
||||
pub use auth::AuthManager;
|
||||
|
||||
@@ -68,7 +68,11 @@ pub(crate) fn get_model_info(model_family: &ModelFamily) -> Option<ModelInfo> {
|
||||
auto_compact_token_limit: Some(350_000),
|
||||
}),
|
||||
|
||||
_ if slug.starts_with("gpt-5") => Some(ModelInfo::new(272_000, 128_000)),
|
||||
_ if slug.starts_with("gpt-5") => Some(ModelInfo {
|
||||
context_window: 272_000,
|
||||
max_output_tokens: 128_000,
|
||||
auto_compact_token_limit: Some(250_000),
|
||||
}),
|
||||
|
||||
_ if slug.starts_with("codex-") => Some(ModelInfo::new(272_000, 128_000)),
|
||||
|
||||
|
||||
171
codex-rs/core/tests/suite/cross_session.rs
Normal file
171
codex-rs/core/tests/suite/cross_session.rs
Normal file
@@ -0,0 +1,171 @@
|
||||
#![cfg(not(target_os = "windows"))]
|
||||
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::ConversationManager;
|
||||
use codex_core::CrossSessionSpawnParams;
|
||||
use codex_core::built_in_model_providers;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::cross_session::AssistantMessage;
|
||||
use codex_core::cross_session::CrossSessionHub;
|
||||
use codex_core::cross_session::PostUserTurnRequest;
|
||||
use codex_core::cross_session::RoleOrId;
|
||||
use codex_core::cross_session::SessionEventStream;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use core_test_support::load_default_config_for_test;
|
||||
use core_test_support::responses;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use tempfile::TempDir;
|
||||
use tokio_stream::StreamExt;
|
||||
use wiremock::MockServer;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn cross_session_hub_routes_between_roles() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let bodies = vec![
|
||||
responses::sse(vec![
|
||||
responses::ev_response_created("solver-resp-1"),
|
||||
responses::ev_assistant_message("solver-msg-1", "Need direction"),
|
||||
responses::ev_completed("solver-resp-1"),
|
||||
]),
|
||||
responses::sse(vec![
|
||||
responses::ev_response_created("director-resp-1"),
|
||||
responses::ev_assistant_message("director-msg-1", "Proceed iteratively"),
|
||||
responses::ev_completed("director-resp-1"),
|
||||
]),
|
||||
responses::sse(vec![
|
||||
responses::ev_response_created("solver-resp-2"),
|
||||
responses::ev_assistant_message("solver-msg-2", "Acknowledged"),
|
||||
responses::ev_completed("solver-resp-2"),
|
||||
]),
|
||||
];
|
||||
let response_mock = responses::mount_sse_sequence(&server, bodies).await;
|
||||
|
||||
let hub = Arc::new(CrossSessionHub::new());
|
||||
let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy-key"));
|
||||
let run_id = "run-cross-session".to_string();
|
||||
|
||||
let solver_config = build_config(&server).await?;
|
||||
let solver = conversation_manager
|
||||
.new_conversation_with_cross_session(
|
||||
solver_config,
|
||||
CrossSessionSpawnParams {
|
||||
hub: Arc::clone(&hub),
|
||||
run_id: Some(run_id.clone()),
|
||||
role: Some("solver".to_string()),
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
|
||||
let director_config = build_config(&server).await?;
|
||||
let director = conversation_manager
|
||||
.new_conversation_with_cross_session(
|
||||
director_config,
|
||||
CrossSessionSpawnParams {
|
||||
hub: Arc::clone(&hub),
|
||||
run_id: Some(run_id.clone()),
|
||||
role: Some("director".to_string()),
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
|
||||
let mut solver_events = hub.stream_events(solver.conversation_id)?;
|
||||
let mut director_events = hub.stream_events(director.conversation_id)?;
|
||||
|
||||
let solver_handle = hub
|
||||
.post_user_turn(PostUserTurnRequest {
|
||||
target: RoleOrId::RunRole {
|
||||
run_id: run_id.clone(),
|
||||
role: "solver".to_string(),
|
||||
},
|
||||
text: "kick off plan".to_string(),
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await?;
|
||||
let solver_first = expect_message(&hub, &solver_handle, "Need direction").await?;
|
||||
|
||||
let director_handle = hub
|
||||
.post_user_turn(PostUserTurnRequest {
|
||||
target: RoleOrId::RunRole {
|
||||
run_id: run_id.clone(),
|
||||
role: "director".to_string(),
|
||||
},
|
||||
text: solver_first.message.message.clone(),
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await?;
|
||||
let director_first = expect_message(&hub, &director_handle, "Proceed iteratively").await?;
|
||||
|
||||
let solver_followup = hub
|
||||
.post_user_turn(PostUserTurnRequest {
|
||||
target: RoleOrId::Session(solver.conversation_id),
|
||||
text: director_first.message.message.clone(),
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await?;
|
||||
let solver_reply = expect_message(&hub, &solver_followup, "Acknowledged").await?;
|
||||
|
||||
let solver_event = expect_agent_event(&mut solver_events).await;
|
||||
match solver_event {
|
||||
EventMsg::AgentMessage(msg) => assert_eq!(msg.message, "Need direction"),
|
||||
_ => panic!("expected solver agent message"),
|
||||
}
|
||||
|
||||
let director_event = expect_agent_event(&mut director_events).await;
|
||||
match director_event {
|
||||
EventMsg::AgentMessage(msg) => assert_eq!(msg.message, "Proceed iteratively"),
|
||||
_ => panic!("expected director agent message"),
|
||||
}
|
||||
|
||||
assert_eq!(solver_first.message.message, "Need direction");
|
||||
assert_eq!(director_first.message.message, "Proceed iteratively");
|
||||
assert_eq!(solver_reply.message.message, "Acknowledged");
|
||||
assert_eq!(response_mock.requests().len(), 3);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn build_config(server: &MockServer) -> anyhow::Result<Config> {
|
||||
let home = TempDir::new()?;
|
||||
let cwd = TempDir::new()?;
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
config.cwd = cwd.path().to_path_buf();
|
||||
let mut provider = built_in_model_providers()["openai"].clone();
|
||||
provider.base_url = Some(format!("{}/v1", server.uri()));
|
||||
config.model_provider = provider;
|
||||
Ok(config)
|
||||
}
|
||||
|
||||
async fn expect_message(
|
||||
hub: &CrossSessionHub,
|
||||
handle: &codex_core::cross_session::TurnHandle,
|
||||
expected: &str,
|
||||
) -> anyhow::Result<AssistantMessage> {
|
||||
let message = hub
|
||||
.await_first_assistant(handle, Duration::from_secs(1))
|
||||
.await?;
|
||||
assert_eq!(message.message.message, expected);
|
||||
Ok(message)
|
||||
}
|
||||
|
||||
async fn expect_agent_event(stream: &mut SessionEventStream) -> EventMsg {
|
||||
loop {
|
||||
let maybe_event = match tokio::time::timeout(Duration::from_secs(1), stream.next()).await {
|
||||
Ok(event) => event,
|
||||
Err(_) => panic!("event timeout"),
|
||||
};
|
||||
|
||||
if let Some(event) = maybe_event {
|
||||
let msg = event.event.msg;
|
||||
if matches!(msg, EventMsg::AgentMessage(_)) {
|
||||
return msg;
|
||||
}
|
||||
} else {
|
||||
panic!("stream ended before agent message");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -6,6 +6,7 @@ mod cli_stream;
|
||||
mod client;
|
||||
mod compact;
|
||||
mod compact_resume_fork;
|
||||
mod cross_session;
|
||||
mod exec;
|
||||
mod exec_stream_events;
|
||||
mod fork_conversation;
|
||||
|
||||
189
codex-rs/infty.md
Normal file
189
codex-rs/infty.md
Normal file
@@ -0,0 +1,189 @@
|
||||
# Codex Infty: Ultra‑Long Task Orchestration
|
||||
|
||||
Design a clean, extensible way to run arbitrarily long tasks (hours–days) with bounded model context, autonomous continuation, and robust correctness review. Works for code and non‑code.
|
||||
|
||||
Status: Proposed • Scope: New crates using `codex-core` • Compatibility: Non‑breaking
|
||||
|
||||
---
|
||||
|
||||
## 1) Motivation
|
||||
- Context windows are limited → we must compact and retrieve.
|
||||
- Models pause/ask for permission → we must self‑direct.
|
||||
- No systematic review → we must verify before returning.
|
||||
|
||||
## 2) Approach (High‑Level)
|
||||
Run three coordinated roles as independent `codex-core` sessions. Reuse existing tools (shell, apply_patch, read_file, list_dir, grep_files) for persistence and retrieval. Add one clean, first-class cross-session facility in core for direction/verification — orchestrator-driven, no model-visible tool. The CLI currently spawns a solver, a director, and three verifiers (`verifier-alpha`, `verifier-beta`, `verifier-gamma`) by default.
|
||||
|
||||
- Solver (Model A): executes plan; writes all results to memory/artifacts; never asks humans to continue.
|
||||
- Director (Model B): answers Solver’s direction questions and re‑plans when needed.
|
||||
- Verifier (Model C…Cₙ): evaluates completion claims; returns pass/fail with structured feedback.
|
||||
|
||||
Inter‑role coordination uses a built‑in CrossSessionHub in core. The orchestrator watches assistant messages and bridges them as user turns to the peer role.
|
||||
|
||||
## 3) Architecture
|
||||
```
|
||||
┌────────────────────────────┐
|
||||
│ codex-infty │
|
||||
│ Orchestrator + CLI/Lib │
|
||||
│ - spawns 3 codex sessions │
|
||||
│ - supervises long runs │
|
||||
│ - configures Run/Role │
|
||||
└────────────┬───────────────┘
|
||||
│
|
||||
┌─────────▼─────────┐ ┌─────────▼─────────┐
|
||||
│ Solver (A) │ │ Director (B) │
|
||||
│ codex-core session│ │ codex-core session│
|
||||
└─────────┬─────────┘ └─────────┬─────────┘
|
||||
│ │
|
||||
└──────────┬──────────────┘
|
||||
│
|
||||
┌───────▼────────┐
|
||||
│ Verifier(s) (C)│
|
||||
│ codex-core sess │
|
||||
└───────┬─────────┘
|
||||
│
|
||||
CrossSessionHub (core, orchestrator‑driven)
|
||||
JSONL rollouts, auto‑compaction
|
||||
```
|
||||
|
||||
### Components
|
||||
- codex-infty (new crate)
|
||||
- Spawns/owns three `codex-core` sessions (A/B/C) with role‑specific base instructions.
|
||||
- Supervises progress over very long runs.
|
||||
- Defines a simple on‑disk Run Store that the models write to using existing tools.
|
||||
- Configures sessions with Run/Role metadata (for cross‑session routing).
|
||||
- codex-core (existing, with one addition)
|
||||
- Reuse streaming, tool routing, JSONL rollouts with resume, auto‑compaction, and existing tools:
|
||||
- `apply_patch`, `shell`/`exec_command`/`write_stdin`
|
||||
- `grep_files`, `read_file`, `list_dir` (enable via model family/experimental tools)
|
||||
- New: built‑in `CrossSessionHub` for intra‑process routing (§5). No new model tool is exposed.
|
||||
|
||||
## 4) Data Model (Durable) and Filesystem Layout
|
||||
Persist everything in a Run Store directory; models read/write using existing tools.
|
||||
|
||||
- Run Store layout (example under `~/.codex/infty/<run-id>/`):
|
||||
- `artifacts/` – blobs and text outputs (models can create via `apply_patch` for text; `shell` for binary moves/copies).
|
||||
- `memory/` – JSON/Markdown notes: facts, hypotheses, plans, decisions, claims, evidence, evaluations.
|
||||
- `index/` – optional search/index artifacts (built out‑of‑band by orchestrator jobs; models can still use `grep_files`).
|
||||
|
||||
Data is append‑only by convention; items link to each other via ids/paths stored in JSON.
|
||||
|
||||
## 5) New Core API: CrossSessionHub (no model tool)
|
||||
Add a core facility that lets the orchestrator bridge assistant messages between sessions by posting user turns.
|
||||
|
||||
### 5.1 Hub API
|
||||
- Registry that maps `{ run_id, role } -> session handle` and `{ session_id } -> session handle`.
|
||||
- Sessions register on spawn with `run_id` and `role`; unregister on drop.
|
||||
- Expose async methods for the orchestrator:
|
||||
- `post_user_turn(to: RoleOrId, text: String) -> TurnHandle` – inject a `UserTurn` as if typed by a user.
|
||||
- `await_first_assistant(turn: &TurnHandle, timeout: Duration) -> AssistantMessage` – wait until the first assistant message for that turn.
|
||||
- `stream_events(session_id) -> impl Stream<Item = Event>` – optional subscription for higher‑level orchestration.
|
||||
|
||||
### 5.2 Orchestrator Bridge Logic
|
||||
- Direction: when the Solver emits an assistant message asking for permission/direction, the orchestrator forwards that assistant text verbatim as a user turn to the Director and waits for the Director’s first assistant reply; it then posts that reply as a user turn to Solver.
|
||||
- Verification: when Solver requests verification, orchestrator forwards request to Verifier(s); structured verdicts (pass/fail/reasons/suggestions) flow back.
|
||||
- Persistence: Each session persists its own events to rollout; the orchestrator just routes.
|
||||
|
||||
## 6) Run Store Facilities
|
||||
- Memory notes follow JSON schemas per role (plans, claims, evidence).
|
||||
- Artifacts include code patches, logs, compiled binaries, docs. Use naming convention `<timestamp>-<summary>.<ext>`.
|
||||
- Orchestrator can create `index/` entries (e.g., embeddings) offline; models still access via standard tools.
|
||||
|
||||
## 7) Orchestrator Flow
|
||||
1. Initialize Run Store + metadata (objective, roles, options).
|
||||
2. Spawn Solver, Director, Verifier sessions via `CrossSessionHub`.
|
||||
3. Seed objective as Solver user turn; monitor outputs.
|
||||
4. Relay direction/verification messages automatically between roles.
|
||||
5. Trigger periodic checkpoints (copy artifacts/memory to dated snapshots).
|
||||
6. On completion, ensure Verifier returns pass, then emit final deliverable path.
|
||||
7. Support resume: reload Run Store, respawn sessions with `InitialHistory::Resumed`.
|
||||
|
||||
## 8) Context Management
|
||||
- Conversational context: rely on `codex-core` auto‑compaction.
|
||||
- Long‑term memory: persist facts/results as files; retrieve with `grep_files`/`read_file`/`list_dir`.
|
||||
- Run Store snapshots allow cold resume even after orchestrator restart.
|
||||
|
||||
## 9) Verification Strategies
|
||||
- Code: tests, linters, type checks via `shell` under sandbox.
|
||||
- Text: grader rubrics, citation/contradiction checks.
|
||||
- Math/research: multi‑verifier consensus, self‑consistency, proof‑sketch validation.
|
||||
|
||||
## 10) Security & Policy
|
||||
- All execution stays under `codex-core` sandbox/approval.
|
||||
- Memory/Artifact tools are pure data I/O (no code execution).
|
||||
- Inter‑role calls run in isolated sessions.
|
||||
|
||||
## 11) MVP (Phased)
|
||||
1. codex-core
|
||||
- Add `CrossSessionHub` with registration and post/await APIs.
|
||||
- Add `run_id` and `role` registration on session spawn (optional fields).
|
||||
- Tests: two sessions in a run; orchestrator posts user text to Director and bridges reply to Solver.
|
||||
2. codex-infty
|
||||
- Orchestrator lib + CLI: create Run Store directories, spawn A/B/C sessions with `run_id`/`role`, run loop; ship role prompts. Enable `grep_files`/`read_file`/`list_dir`.
|
||||
3. Verification
|
||||
- Use `shell` to run checks/tests when applicable; use Verifier sessions for rubric‑based judgments.
|
||||
|
||||
## 12) Finalization & Extensibility
|
||||
- Finalization workflow (after `verdict == pass`): the orchestrator issues a final `UserTurn` to the Solver instructing:
|
||||
- Create a clean `deliverable/` folder under the Run Store.
|
||||
- Copy/transform only the necessary end results; remove scratch artifacts.
|
||||
- Write a `deliverable/README.md` including: overview, contents manifest with paths and sizes, verification steps (how to run tests), and any limitations.
|
||||
- Summarize the work in the final assistant message and return the path to `deliverable/`.
|
||||
|
||||
- Extensibility:
|
||||
- Pluggable `IndexStrategy` (keyword/embeddings/hybrid) built by the orchestrator (models still query via `grep_files`).
|
||||
- Multiple Verifiers with majority/weighted consensus.
|
||||
- Future: broadcast/multicast cross‑session calls (e.g., ask three verifiers and aggregate).
|
||||
|
||||
## 13) Why This Solves The Three Problems
|
||||
- Context: conversational compaction + durable memory with retrieval.
|
||||
- Pauses: assistant questions are bridged to a Director; the orchestrator backstops.
|
||||
- Review: Solver’s verification request is bridged to Verifier(s) with structured verdicts and remediation.
|
||||
|
||||
This keeps `codex-core` focused and leverages its strengths (streaming, tools, compaction, rollouts) while adding a small, clean cross‑session primitive to enable arbitrarily long, autonomous runs across domains.
|
||||
|
||||
---
|
||||
|
||||
## 14) End‑to‑End Example (Minimal)
|
||||
|
||||
Assume a run folder at `~/.codex/infty/run_123/`.
|
||||
|
||||
1) User objective → Solver (UserTurn)
|
||||
- User: "Write a tiny CLI that prints Fibonacci numbers and provide usage docs."
|
||||
|
||||
2) Solver starts
|
||||
- Tool: `update_plan` → steps: parse request; scaffold CLI; implement logic; write docs; verify; finalize deliverable.
|
||||
- Tool: `grep_files` → searches `artifacts/` and repo for prior art.
|
||||
|
||||
3) Solver seeks direction
|
||||
- Solver’s assistant message: “Confirm plan: binary in ./fib, args: N, output first N Fibonacci numbers; docs in memory/docs.md?”
|
||||
- Orchestrator posts a UserTurn to Director with that question and sets `final_output_json_schema` to the Director schema (strict).
|
||||
- Director’s first assistant message:
|
||||
```json
|
||||
{ "directive": "Proceed. Add tests under memory/tests.md; prefer iterative impl; expose --limit flag.", "rationale": "Keeps stack small; eases verification." }
|
||||
```
|
||||
- Orchestrator posts that reply as a UserTurn to Solver; Solver continues.
|
||||
|
||||
4) Solver implements
|
||||
- Tool: `apply_patch` → creates `artifacts/fib.rs` and a small Cargo bin, or shell scaffolding.
|
||||
- Tool: `shell` → `cargo run -- 10` to sanity check (under sandbox).
|
||||
- Writes `memory/docs.md` and `memory/tests.md`.
|
||||
|
||||
5) Solver claims completion
|
||||
- Writes `memory/claims/cli.json` (per template) referencing artifacts and docs.
|
||||
- Solver’s assistant message: “Please verify claim in memory/claims/cli.json with artifacts/fib.rs; run cargo test if present.”
|
||||
- Orchestrator posts a UserTurn to Verifier with `final_output_json_schema` set to the Verifier schema (strict). Verifier runs checks (via `shell`), returns:
|
||||
```json
|
||||
{ "verdict": "fail", "reasons": ["No tests"], "suggestions": ["Add tests covering N=1,2,10"] }
|
||||
```
|
||||
- Orchestrator posts that reply as a UserTurn to Solver; Solver iterates (adds tests, fixes issues).
|
||||
|
||||
6) Pass and finalize
|
||||
- Verifier returns `{ "verdict": "pass", … }`.
|
||||
- Orchestrator issues finalization UserTurn to Solver:
|
||||
- "Create deliverable/: include compiled bin or script, usage docs, and tests; write deliverable/README.md with run instructions; remove scratch files."
|
||||
- Solver:
|
||||
- Tool: `shell`/`apply_patch` → builds `deliverable/` with README and artifacts.
|
||||
- Assistant message: "Deliverable ready at ~/.codex/infty/run_123/deliverable/."
|
||||
|
||||
7) Orchestrator returns the final path to the user.
|
||||
325
codex-rs/infty2.md
Normal file
325
codex-rs/infty2.md
Normal file
@@ -0,0 +1,325 @@
|
||||
# Infty v2 - Minimal Cross-Session Loop
|
||||
|
||||
Goal: collapse the orchestration to three composable primitives while preserving the existing flow.
|
||||
|
||||
- spawn: create a role session with base instructions + config
|
||||
- await: wait for the assistant message that ends the user turn
|
||||
- forward: inject an assistant message as a user message in another session
|
||||
|
||||
The rest of the orchestrator becomes a tiny router that parses the Solver's signal and calls these helpers.
|
||||
|
||||
---
|
||||
|
||||
## Design Overview
|
||||
|
||||
We build a thin, reusable facade over `codex-core`'s cross-session utilities. This facade is role- and run-aware so callers don't need to handle `ConversationId` bookkeeping.
|
||||
|
||||
Key types from `codex-core::cross_session` that we lean on:
|
||||
|
||||
- `CrossSessionHub` - registers sessions and routes messages across them
|
||||
- `PostUserTurnRequest` - payload to submit text to a session
|
||||
- `TurnHandle` - handle for a turn (used to await the assistant)
|
||||
- `AssistantMessage` - the first assistant message for a turn
|
||||
- `SessionEventStream` - event stream for activity/idle timeouts
|
||||
|
||||
In `codex-infty`, we expose tiny helpers that wrap these primitives in a role-centric API.
|
||||
[director.md](codex-infty/src/prompts/director.md)
|
||||
---
|
||||
|
||||
## Minimal API (Facade)
|
||||
|
||||
Proposed module: `codex-infty/src/session.rs` (or fold into `orchestrator.rs` if preferred). Names shown here as free functions; methods on a small struct are also fine.
|
||||
|
||||
```rust
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use anyhow::Result;
|
||||
use serde_json::Value;
|
||||
use codex_core::{ConversationManager, NewConversation};
|
||||
use codex_core::config::Config;
|
||||
use codex_core::cross_session::{
|
||||
CrossSessionHub, PostUserTurnRequest, RoleOrId, TurnHandle, AssistantMessage,
|
||||
};
|
||||
use codex_protocol::ConversationId;
|
||||
|
||||
/// Opaque role session reference used by the orchestrator.
|
||||
#[derive(Clone)]
|
||||
pub struct RoleSession {
|
||||
pub role: String,
|
||||
pub conversation_id: ConversationId,
|
||||
pub conversation: Arc<codex_core::CodexConversation>,
|
||||
}
|
||||
|
||||
/// 1) Spawn a role session with base instructions applied.
|
||||
pub async fn spawn(
|
||||
hub: Arc<CrossSessionHub>,
|
||||
manager: &ConversationManager,
|
||||
run_id: &str,
|
||||
role: &str,
|
||||
mut config: Config,
|
||||
rollout_dir: impl Into<std::path::PathBuf>,
|
||||
ensure_instructions: impl FnOnce(&str, &mut Config),
|
||||
) -> Result<RoleSession> {
|
||||
config.cwd = rollout_dir.into();
|
||||
ensure_instructions(role, &mut config);
|
||||
let created: NewConversation = manager
|
||||
.new_conversation_with_cross_session(
|
||||
config,
|
||||
codex_core::CrossSessionSpawnParams {
|
||||
hub: Arc::clone(&hub),
|
||||
run_id: Some(run_id.to_string()),
|
||||
role: Some(role.to_string()),
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
Ok(RoleSession {
|
||||
role: role.to_string(),
|
||||
conversation_id: created.conversation_id,
|
||||
conversation: created.conversation,
|
||||
})
|
||||
}
|
||||
|
||||
/// 2a) Post a user turn to a role.
|
||||
pub async fn post(
|
||||
hub: &CrossSessionHub,
|
||||
run_id: &str,
|
||||
role: &str,
|
||||
text: impl Into<String>,
|
||||
final_output_json_schema: Option<Value>,
|
||||
) -> Result<TurnHandle, codex_core::cross_session::CrossSessionError> {
|
||||
hub.post_user_turn(PostUserTurnRequest {
|
||||
target: RoleOrId::RunRole { run_id: run_id.to_string(), role: role.to_string() },
|
||||
text: text.into(),
|
||||
final_output_json_schema,
|
||||
}).await
|
||||
}
|
||||
|
||||
/// 2b) Await the first assistant message for this turn.
|
||||
pub async fn await_first(
|
||||
hub: &CrossSessionHub,
|
||||
handle: &TurnHandle,
|
||||
timeout: Duration,
|
||||
) -> Result<AssistantMessage, codex_core::cross_session::CrossSessionError> {
|
||||
hub.await_first_assistant(handle, timeout).await
|
||||
}
|
||||
|
||||
/// 2c) Await with idle timeout that resets on activity for this submission id.
|
||||
/// (Move the existing codex-infty implementation here verbatim.)
|
||||
```
|
||||
|
||||
```rust
|
||||
pub async fn await_first_idle(
|
||||
hub: &CrossSessionHub,
|
||||
handle: &TurnHandle,
|
||||
idle_timeout: Duration,
|
||||
) -> Result<AssistantMessage> {
|
||||
use anyhow::{anyhow, bail};
|
||||
use codex_core::protocol::EventMsg;
|
||||
use tokio::time::Instant;
|
||||
use tokio_stream::StreamExt as _;
|
||||
|
||||
let mut events = hub.stream_events(handle.conversation_id())?;
|
||||
let wait_first = hub.await_first_assistant(handle, idle_timeout);
|
||||
tokio::pin!(wait_first);
|
||||
|
||||
let idle = tokio::time::sleep(idle_timeout);
|
||||
tokio::pin!(idle);
|
||||
|
||||
let sub_id = handle.submission_id().to_string();
|
||||
|
||||
loop {
|
||||
tokio::select! {
|
||||
res = &mut wait_first => { return res.map_err(|e| anyhow!(e)); }
|
||||
maybe_event = events.next() => {
|
||||
let Some(ev) = maybe_event else { bail!(codex_core::cross_session::CrossSessionError::SessionClosed); };
|
||||
if ev.event.id == sub_id {
|
||||
if let EventMsg::Error(err) = &ev.event.msg { bail!(anyhow!(err.message.clone())); }
|
||||
idle.as_mut().reset(Instant::now() + idle_timeout);
|
||||
}
|
||||
}
|
||||
_ = &mut idle => { bail!(codex_core::cross_session::CrossSessionError::AwaitTimeout(idle_timeout)); }
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
```rust
|
||||
/// 3) Forward an assistant's content as a user message to another role.
|
||||
pub async fn forward_assistant(
|
||||
hub: &CrossSessionHub,
|
||||
run_id: &str,
|
||||
target_role: &str,
|
||||
assistant: &AssistantMessage,
|
||||
timeout: Duration,
|
||||
final_output_json_schema: Option<Value>,
|
||||
) -> Result<AssistantMessage> {
|
||||
let handle = post(
|
||||
hub,
|
||||
run_id,
|
||||
target_role,
|
||||
assistant.message.message.clone(),
|
||||
final_output_json_schema,
|
||||
).await?;
|
||||
Ok(await_first(hub, &handle, timeout).await?)
|
||||
}
|
||||
|
||||
/// Convenience: do both post + await in one call.
|
||||
pub async fn call(
|
||||
hub: &CrossSessionHub,
|
||||
run_id: &str,
|
||||
role: &str,
|
||||
text: impl Into<String>,
|
||||
timeout: Duration,
|
||||
final_output_json_schema: Option<Value>,
|
||||
) -> Result<AssistantMessage> {
|
||||
let handle = post(hub, run_id, role, text, final_output_json_schema).await?;
|
||||
Ok(await_first(hub, &handle, timeout).await?)
|
||||
}
|
||||
```
|
||||
|
||||
Notes:
|
||||
- `await_first_idle` is the ergonomic default in Infty because it handles streaming with activity-based resets.
|
||||
- The facade leaves JSON schema optional and role-addressing consistent with `RunRole { run_id, role }`.
|
||||
|
||||
---
|
||||
|
||||
## Orchestrator Main Loop Becomes Tiny
|
||||
|
||||
Once the three operations exist, the loop reduces to routing:
|
||||
|
||||
```rust
|
||||
// Pseudocode using the facade
|
||||
let mut solver_ev = hub.stream_events(sessions.solver.conversation_id)?;
|
||||
|
||||
if let Some(objective) = options.objective.as_deref() {
|
||||
post(&hub, &run_id, &sessions.solver.role, objective, Some(solver_signal_schema())).await?;
|
||||
}
|
||||
|
||||
loop {
|
||||
let ev = solver_ev.next().await.ok_or_else(|| anyhow::anyhow!("solver closed"))?;
|
||||
if let EventMsg::AgentMessage(agent) = &ev.event.msg {
|
||||
if let Some(signal) = parse_solver_signal(&agent.message) {
|
||||
match signal {
|
||||
SolverSignal::DirectionRequest { prompt: Some(p) } => {
|
||||
let req = serde_json::to_string(&DirectionRequestPayload {
|
||||
kind: "direction_request",
|
||||
prompt: &p,
|
||||
objective: options.objective.as_deref(),
|
||||
})?;
|
||||
let directive = call(&hub, &run_id, &sessions.director.role, req, options.director_timeout, Some(directive_response_schema())).await?;
|
||||
let _ = forward_assistant(&hub, &run_id, &sessions.solver.role, &directive, std::time::Duration::from_secs(5), Some(solver_signal_schema())).await?;
|
||||
}
|
||||
SolverSignal::VerificationRequest { claim_path: Some(path), notes } => {
|
||||
let req = serde_json::to_string(&VerificationRequestPayload {
|
||||
kind: "verification_request",
|
||||
claim_path: &path,
|
||||
notes: notes.as_deref(),
|
||||
objective: options.objective.as_deref(),
|
||||
})?;
|
||||
let mut verdicts = Vec::new();
|
||||
for v in &sessions.verifiers {
|
||||
let verdict = call(&hub, &run_id, &v.role, &req, options.verifier_timeout, Some(verifier_verdict_schema())).await?;
|
||||
verdicts.push((v.role.clone(), parse_json_struct::<VerifierVerdict>(&verdict.message.message)?));
|
||||
}
|
||||
let summary = aggregate_verdicts(verdicts);
|
||||
let _ = post(&hub, &run_id, &sessions.solver.role, serde_json::to_string(&summary)?, Some(solver_signal_schema())).await?;
|
||||
}
|
||||
SolverSignal::FinalDelivery { deliverable_path: Some(path), summary } => {
|
||||
let deliverable = resolve_deliverable_path(sessions.store.path(), &path)?;
|
||||
return Ok(RunOutcome { run_id, deliverable_path: deliverable, summary, raw_message: agent.message.clone() });
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Everything above already exists in `codex-infty` today; the facade simply standardizes the small operations so the loop reads linearly.
|
||||
|
||||
---
|
||||
|
||||
## Implementation Steps
|
||||
|
||||
1) Extract helpers
|
||||
- Add `session.rs` with `spawn`, `post`, `await_first`, `await_first_idle`, `forward_assistant`, `call`.
|
||||
- Move the existing `await_first_assistant_idle` body from `orchestrator.rs` to this module (exported).
|
||||
- Re-export from `lib.rs` if desirable for external callers.
|
||||
|
||||
2) Adopt helpers in `orchestrator.rs`
|
||||
- Replace `post_to_role`, `await_first_assistant`, `relay_assistant_to_role`, and `call_role` with the facade functions.
|
||||
- Keep signal parsing and run-store logic; delete glue code that becomes redundant.
|
||||
|
||||
3) Keep role spawn/resume minimal
|
||||
- Inline `spawn_role_session` and `resume_role_session` to call `session::spawn(...)` with `prompts::ensure_instructions`.
|
||||
- Preserve persistence of rollout/config paths via `RunStore`.
|
||||
|
||||
4) Preserve JSON schema guarantees
|
||||
- Pass schemas through `post`/`call`/`forward_assistant` exactly as today:
|
||||
- Solver outbound: `solver_signal_schema()`
|
||||
- Director outbound: `directive_response_schema()`
|
||||
- Verifier outbound: `verifier_verdict_schema()`
|
||||
- Finalization: `final_delivery_schema()` for the last probe
|
||||
|
||||
5) Progress reporting stays orthogonal
|
||||
- Where the orchestrator previously called `progress.*`, keep those calls around the facade usage (no change to the trait).
|
||||
|
||||
6) Tests and docs
|
||||
- Unit-test the facade with a tiny harness that posts to a mock/run role and awaits the first assistant.
|
||||
- Update README examples to use `call` and `forward_assistant` for clarity.
|
||||
|
||||
---
|
||||
|
||||
## Snippets to Drop In
|
||||
|
||||
- Posting user input and awaiting the assistant with idle timeout:
|
||||
|
||||
```rust
|
||||
let handle = session::post(hub, &run_id, &role, user_text, schema).await?;
|
||||
let assistant = session::await_first_idle(hub, &handle, std::time::Duration::from_secs(120)).await?;
|
||||
```
|
||||
|
||||
- Forwarding an assistant to another role:
|
||||
|
||||
```rust
|
||||
let reply = session::forward_assistant(hub, &run_id, &target_role, &assistant, std::time::Duration::from_secs(60), target_schema).await?;
|
||||
```
|
||||
|
||||
- Spawning a session with base instructions:
|
||||
|
||||
```rust
|
||||
let solver = session::spawn(
|
||||
Arc::clone(&hub),
|
||||
&conversation_manager,
|
||||
&run_id,
|
||||
"solver",
|
||||
solver_cfg.clone(),
|
||||
run_path, // becomes cfg.cwd
|
||||
|role, cfg| prompts::ensure_instructions(role, cfg),
|
||||
).await?;
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Why This Simplifies Things
|
||||
|
||||
- One mental model: "post -> await -> forward" across roles.
|
||||
- Orchestrator logic is a small, readable router.
|
||||
- Cross-session reliability remains in one place (the hub).
|
||||
- Tests become surgical: assert an assistant message is forwarded or a schema is respected.
|
||||
|
||||
---
|
||||
|
||||
## Backward Compatibility
|
||||
|
||||
- All current public behavior stays the same.
|
||||
- `InftyOrchestrator` public methods keep signatures; they are implemented in terms of the facade.
|
||||
- No changes to `codex-core` types or wire protocol.
|
||||
|
||||
---
|
||||
|
||||
## Optional Follow-Ups
|
||||
|
||||
- Consider upstreaming `await_first_idle` into `codex-core` so others can reuse it outside Infty.
|
||||
- Add typed wrappers for JSON payloads (newtypes) to reduce `serde_json::Value` usage at call sites.
|
||||
- Provide a tiny `SessionRouter` example crate to demonstrate building custom flows with these primitives.
|
||||
Reference in New Issue
Block a user