mirror of
https://github.com/openai/codex.git
synced 2026-04-24 14:45:27 +00:00
R1
This commit is contained in:
1
codex-rs/Cargo.lock
generated
1
codex-rs/Cargo.lock
generated
@@ -1194,6 +1194,7 @@ dependencies = [
|
||||
"codex-protocol",
|
||||
"core_test_support",
|
||||
"dirs",
|
||||
"futures",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tempfile",
|
||||
|
||||
@@ -31,9 +31,7 @@ enum InftyCommand {
|
||||
|
||||
/// Show metadata for a stored run.
|
||||
Show(ShowArgs),
|
||||
|
||||
/// Send a message to a role within a run and print the first reply.
|
||||
Drive(DriveArgs),
|
||||
// resumable runs are disabled; Drive command removed
|
||||
}
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
@@ -81,21 +79,7 @@ pub(crate) struct ShowArgs {
|
||||
pub json: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
pub(crate) struct DriveArgs {
|
||||
/// Run id to resume.
|
||||
pub run_id: String,
|
||||
|
||||
/// Role to address (e.g. solver, director).
|
||||
pub role: String,
|
||||
|
||||
/// Message to send to the role.
|
||||
pub message: String,
|
||||
|
||||
/// Timeout in seconds to await the first assistant message.
|
||||
#[arg(long = "timeout-secs", default_value_t = super::commands::DEFAULT_TIMEOUT_SECS)]
|
||||
pub timeout_secs: u64,
|
||||
}
|
||||
// resumable runs are disabled; DriveArgs removed
|
||||
|
||||
impl InftyCli {
|
||||
pub async fn run(self) -> Result<()> {
|
||||
@@ -111,9 +95,7 @@ impl InftyCli {
|
||||
}
|
||||
InftyCommand::List(args) => commands::run_list(runs_root, args)?,
|
||||
InftyCommand::Show(args) => commands::run_show(runs_root, args)?,
|
||||
InftyCommand::Drive(args) => {
|
||||
commands::run_drive(config_overrides, runs_root, args).await?;
|
||||
}
|
||||
// Drive removed
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -19,7 +19,6 @@ use codex_core::auth::read_openai_api_key_from_env;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::config::ConfigOverrides;
|
||||
use codex_infty::InftyOrchestrator;
|
||||
use codex_infty::ResumeParams;
|
||||
use codex_infty::RoleConfig;
|
||||
use codex_infty::RunExecutionOptions;
|
||||
use codex_infty::RunParams;
|
||||
@@ -29,7 +28,6 @@ use serde::Serialize;
|
||||
use supports_color::Stream;
|
||||
|
||||
use super::args::CreateArgs;
|
||||
use super::args::DriveArgs;
|
||||
use super::args::ListArgs;
|
||||
use super::args::ShowArgs;
|
||||
use super::progress::TerminalProgressReporter;
|
||||
@@ -37,7 +35,7 @@ use super::summary::print_run_summary_box;
|
||||
|
||||
const DEFAULT_VERIFIER_ROLES: [&str; 3] = ["verifier-alpha", "verifier-beta", "verifier-gamma"];
|
||||
|
||||
pub(crate) const DEFAULT_TIMEOUT_SECS: u64 = 60;
|
||||
pub(crate) const DEFAULT_TIMEOUT_SECS: u64 = 6000;
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct RunSummary {
|
||||
@@ -218,60 +216,7 @@ pub(crate) fn run_show(runs_root_override: Option<PathBuf>, args: ShowArgs) -> R
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn run_drive(
|
||||
config_overrides: CliConfigOverrides,
|
||||
runs_root_override: Option<PathBuf>,
|
||||
args: DriveArgs,
|
||||
) -> Result<()> {
|
||||
validate_run_id(&args.run_id)?;
|
||||
let config = load_config(config_overrides).await?;
|
||||
let auth = load_auth(&config)?;
|
||||
let runs_root = resolve_runs_root(runs_root_override)?;
|
||||
let run_path = runs_root.join(&args.run_id);
|
||||
let store =
|
||||
RunStore::load(&run_path).with_context(|| format!("failed to load run {}", args.run_id))?;
|
||||
|
||||
let solver_role = store
|
||||
.role_metadata("solver")
|
||||
.ok_or_else(|| anyhow!("run {} is missing solver role", args.run_id))?;
|
||||
let director_role = store
|
||||
.role_metadata("director")
|
||||
.ok_or_else(|| anyhow!("run {} is missing director role", args.run_id))?;
|
||||
|
||||
let verifiers: Vec<_> = store
|
||||
.metadata()
|
||||
.roles
|
||||
.iter()
|
||||
.filter(|role| role.role != solver_role.role && role.role != director_role.role)
|
||||
.map(|role| RoleConfig::new(role.role.clone(), config.clone()))
|
||||
.collect();
|
||||
|
||||
let orchestrator = InftyOrchestrator::with_runs_root(auth, runs_root)
|
||||
.with_progress(Arc::new(TerminalProgressReporter::default()));
|
||||
let sessions = orchestrator
|
||||
.resume_run(ResumeParams {
|
||||
run_path: run_path.clone(),
|
||||
solver: RoleConfig::new(solver_role.role.clone(), config.clone()),
|
||||
director: RoleConfig::new(director_role.role.clone(), config.clone()),
|
||||
verifiers,
|
||||
})
|
||||
.await
|
||||
.with_context(|| format!("failed to resume run {}", args.run_id))?;
|
||||
|
||||
let timeout = Duration::from_secs(args.timeout_secs);
|
||||
let reply = orchestrator
|
||||
.call_role(&sessions.run_id, &args.role, args.message, timeout, None)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"failed to deliver message to role {} in run {}",
|
||||
args.role, sessions.run_id
|
||||
)
|
||||
})?;
|
||||
|
||||
println!("{}", reply.message.message);
|
||||
Ok(())
|
||||
}
|
||||
// resumable runs are disabled; run_drive removed
|
||||
|
||||
fn generate_run_id() -> String {
|
||||
let timestamp = Utc::now().format("run-%Y%m%d-%H%M%S");
|
||||
|
||||
@@ -15,6 +15,7 @@ tempfile = { workspace = true }
|
||||
tokio = { workspace = true, features = ["macros", "rt", "rt-multi-thread", "signal"] }
|
||||
tokio-stream = { workspace = true }
|
||||
tracing = { workspace = true, features = ["log"] }
|
||||
futures = "0.3"
|
||||
|
||||
[dev-dependencies]
|
||||
core_test_support = { path = "../core/tests/common" }
|
||||
|
||||
@@ -81,8 +81,8 @@ JSON may be fenced as ```json … ```; the orchestrator will strip the fence.
|
||||
|
||||
- Orchestrator: `codex-infty/src/orchestrator.rs`
|
||||
- `InftyOrchestrator`: spawns/resumes role sessions, drives the event loop, and routes signals.
|
||||
- `execute_new_run` / `execute_existing_run`: one‑shot helpers that spawn/resume and then drive.
|
||||
- `spawn_run` / `resume_run`: set up sessions and the run store.
|
||||
- `execute_new_run`: one‑shot helper that spawns and then drives.
|
||||
- `spawn_run`: set up sessions and the run store.
|
||||
- `call_role`, `relay_assistant_to_role`, `post_to_role`, `await_first_assistant`, `stream_events`: utilities when integrating custom flows.
|
||||
|
||||
- Run store: `codex-infty/src/run_store.rs`
|
||||
@@ -90,7 +90,7 @@ JSON may be fenced as ```json … ```; the orchestrator will strip the fence.
|
||||
|
||||
- Types: `codex-infty/src/types.rs`
|
||||
- `RoleConfig`: wraps a `Config` and sets sensible defaults for autonomous flows (no approvals, full sandbox access). Also used to persist optional config paths.
|
||||
- `RunParams`, `ResumeParams`: input to spawn/resume runs.
|
||||
- `RunParams`: input to spawn runs.
|
||||
- `RunExecutionOptions`: per‑run options (objective, timeouts).
|
||||
- `RunOutcome`: returned on successful final delivery.
|
||||
|
||||
@@ -157,28 +157,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
# fn load_config() -> codex_core::config::Config { codex_core::config::Config::default() }
|
||||
```
|
||||
|
||||
Resuming an existing run:
|
||||
|
||||
```rust
|
||||
use codex_infty::{InftyOrchestrator, ResumeParams, RoleConfig};
|
||||
|
||||
async fn resume_example(orchestrator: &InftyOrchestrator) -> anyhow::Result<()> {
|
||||
let solver = RoleConfig::new("solver", load_config());
|
||||
let director = RoleConfig::new("director", load_config());
|
||||
let verifiers = vec![];
|
||||
|
||||
let resume = ResumeParams {
|
||||
run_path: std::path::PathBuf::from("/path/to/run"),
|
||||
solver,
|
||||
director,
|
||||
verifiers,
|
||||
};
|
||||
let outcome = orchestrator.execute_existing_run(resume, Default::default()).await?;
|
||||
println!("{}", outcome.run_id);
|
||||
Ok(())
|
||||
}
|
||||
# fn load_config() -> codex_core::config::Config { codex_core::config::Config::default() }
|
||||
```
|
||||
Note: Resuming runs is currently disabled.
|
||||
|
||||
## CLI Quickstart
|
||||
|
||||
@@ -192,8 +171,7 @@ codex infty create --run-id demo --objective "Build and test feature"
|
||||
codex infty list
|
||||
codex infty show demo
|
||||
|
||||
# Send a one-off message to a role in a running/resumable run
|
||||
codex infty drive demo solver "Summarize progress"
|
||||
# Sending one-off messages to stored runs is currently disabled
|
||||
```
|
||||
|
||||
Flags allow customizing the Director’s model and reasoning effort; see `codex infty create --help`.
|
||||
|
||||
@@ -18,7 +18,6 @@ pub use signals::DirectiveResponse;
|
||||
pub use signals::VerifierDecision;
|
||||
pub use signals::VerifierReport;
|
||||
pub use signals::VerifierVerdict;
|
||||
pub use types::ResumeParams;
|
||||
pub use types::RoleConfig;
|
||||
pub use types::RoleSession;
|
||||
pub use types::RunExecutionOptions;
|
||||
|
||||
@@ -12,6 +12,8 @@ use anyhow::bail;
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::CodexConversation;
|
||||
use codex_core::ConversationManager;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::config::ConfigOverrides;
|
||||
use codex_core::cross_session::AssistantMessage;
|
||||
use codex_core::cross_session::CrossSessionHub;
|
||||
use codex_core::cross_session::SessionEventStream;
|
||||
@@ -39,7 +41,6 @@ use crate::signals::VerifierDecision;
|
||||
use crate::signals::VerifierReport;
|
||||
use crate::signals::VerifierVerdict;
|
||||
use crate::types::FINALIZATION_PROMPT;
|
||||
use crate::types::ResumeParams;
|
||||
use crate::types::RoleConfig;
|
||||
use crate::types::RoleSession;
|
||||
use crate::types::RunExecutionOptions;
|
||||
@@ -146,14 +147,7 @@ impl InftyOrchestrator {
|
||||
self.drive_run(sessions, options).await
|
||||
}
|
||||
|
||||
pub async fn execute_existing_run(
|
||||
&self,
|
||||
params: ResumeParams,
|
||||
options: RunExecutionOptions,
|
||||
) -> Result<RunOutcome> {
|
||||
let sessions = self.resume_run(params).await?;
|
||||
self.drive_run(sessions, options).await
|
||||
}
|
||||
// resumable runs are disabled; execute_existing_run removed
|
||||
|
||||
pub async fn spawn_run(&self, params: RunParams) -> Result<RunSessions> {
|
||||
let RunParams {
|
||||
@@ -215,67 +209,7 @@ impl InftyOrchestrator {
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn resume_run(&self, params: ResumeParams) -> Result<RunSessions> {
|
||||
let ResumeParams {
|
||||
run_path,
|
||||
solver,
|
||||
director,
|
||||
verifiers,
|
||||
} = params;
|
||||
|
||||
let mut store = RunStore::load(&run_path)?;
|
||||
let run_id = store.metadata().run_id.clone();
|
||||
let mut cleanup = Vec::new();
|
||||
|
||||
let run_path = store.path().to_path_buf();
|
||||
|
||||
let solver_session = match self
|
||||
.resume_and_register_role(&run_id, &run_path, &solver, &mut store, &mut cleanup)
|
||||
.await
|
||||
{
|
||||
Ok(session) => session,
|
||||
Err(err) => {
|
||||
self.cleanup_failed_resume(cleanup).await;
|
||||
return Err(err);
|
||||
}
|
||||
};
|
||||
|
||||
let director_session = match self
|
||||
.resume_and_register_role(&run_id, &run_path, &director, &mut store, &mut cleanup)
|
||||
.await
|
||||
{
|
||||
Ok(session) => session,
|
||||
Err(err) => {
|
||||
self.cleanup_failed_resume(cleanup).await;
|
||||
return Err(err);
|
||||
}
|
||||
};
|
||||
|
||||
let mut verifier_sessions = Vec::with_capacity(verifiers.len());
|
||||
for verifier in verifiers.iter() {
|
||||
let session = match self
|
||||
.resume_and_register_role(&run_id, &run_path, verifier, &mut store, &mut cleanup)
|
||||
.await
|
||||
{
|
||||
Ok(session) => session,
|
||||
Err(err) => {
|
||||
self.cleanup_failed_resume(cleanup).await;
|
||||
return Err(err);
|
||||
}
|
||||
};
|
||||
verifier_sessions.push(session);
|
||||
}
|
||||
|
||||
store.touch()?;
|
||||
|
||||
Ok(RunSessions {
|
||||
run_id,
|
||||
solver: solver_session,
|
||||
director: director_session,
|
||||
verifiers: verifier_sessions,
|
||||
store,
|
||||
})
|
||||
}
|
||||
// resumable runs are disabled; resume_run removed
|
||||
|
||||
async fn drive_run(
|
||||
&self,
|
||||
@@ -534,7 +468,7 @@ impl InftyOrchestrator {
|
||||
|
||||
async fn handle_verification_request(
|
||||
&self,
|
||||
sessions: &RunSessions,
|
||||
sessions: &mut RunSessions,
|
||||
claim_path: &str,
|
||||
notes: Option<&str>,
|
||||
options: &RunExecutionOptions,
|
||||
@@ -556,7 +490,7 @@ impl InftyOrchestrator {
|
||||
|
||||
async fn run_final_verification(
|
||||
&self,
|
||||
sessions: &RunSessions,
|
||||
sessions: &mut RunSessions,
|
||||
deliverable_path: &Path,
|
||||
summary: Option<&str>,
|
||||
options: &RunExecutionOptions,
|
||||
@@ -604,7 +538,7 @@ impl InftyOrchestrator {
|
||||
|
||||
async fn collect_verification_summary(
|
||||
&self,
|
||||
sessions: &RunSessions,
|
||||
sessions: &mut RunSessions,
|
||||
claim_path: &str,
|
||||
notes: Option<&str>,
|
||||
objective: Option<&str>,
|
||||
@@ -621,7 +555,8 @@ impl InftyOrchestrator {
|
||||
objective,
|
||||
};
|
||||
let request_text = serde_json::to_string_pretty(&request)?;
|
||||
let mut collected = Vec::with_capacity(sessions.verifiers.len());
|
||||
let mut results: Vec<(String, VerifierVerdict)> =
|
||||
Vec::with_capacity(sessions.verifiers.len());
|
||||
for verifier in &sessions.verifiers {
|
||||
let handle = session::post_turn(
|
||||
self.hub.as_ref(),
|
||||
@@ -649,10 +584,70 @@ impl InftyOrchestrator {
|
||||
if let Some(progress) = self.progress.as_ref() {
|
||||
progress.verifier_verdict(&verifier.role, &verdict);
|
||||
}
|
||||
collected.push((verifier.role.clone(), verdict));
|
||||
results.push((verifier.role.clone(), verdict));
|
||||
}
|
||||
|
||||
Ok(aggregate_verdicts(collected))
|
||||
// Replace any verifier that passed with a fresh session; keep failures.
|
||||
// Build a set of roles to replace to avoid borrowing issues while mutating.
|
||||
let to_replace: Vec<String> = results
|
||||
.iter()
|
||||
.filter_map(|(role, verdict)| {
|
||||
if verdict.verdict.is_pass() {
|
||||
Some(role.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
for role in to_replace {
|
||||
if let Err(err) = self.replace_verifier_session(sessions, &role).await {
|
||||
warn!(role = %role, ?err, "failed to replace verifier session; keeping existing");
|
||||
}
|
||||
}
|
||||
|
||||
// Aggregate directly from the collected results
|
||||
Ok(aggregate_verdicts(results))
|
||||
}
|
||||
|
||||
async fn replace_verifier_session(&self, sessions: &mut RunSessions, role: &str) -> Result<()> {
|
||||
// Find the existing verifier session index by role
|
||||
let idx = sessions
|
||||
.verifiers
|
||||
.iter()
|
||||
.position(|s| s.role == role)
|
||||
.ok_or_else(|| anyhow!(format!("verifier role {role} not found")))?;
|
||||
|
||||
// Shut down the old session and unregister it from the hub
|
||||
let old = &sessions.verifiers[idx];
|
||||
// best-effort shutdown; ignore errors but proceed to unregister
|
||||
let _ = old.conversation.submit(Op::Shutdown).await;
|
||||
let _ = self
|
||||
.conversation_manager
|
||||
.remove_conversation(&old.conversation_id)
|
||||
.await;
|
||||
|
||||
// Prepare a fresh Config using current user defaults, then apply our autonomous policies
|
||||
let config = Config::load_with_cli_overrides(Vec::new(), ConfigOverrides::default())
|
||||
.await
|
||||
.context("failed to load Codex config for verifier respawn")?;
|
||||
// RoleConfig::new applies sandbox + approval; mimic that here via the constructor
|
||||
let role_config = crate::types::RoleConfig::new(role.to_string(), config);
|
||||
|
||||
// Spawn a new verifier session and register it
|
||||
let mut dummy = Vec::new();
|
||||
let run_path = sessions.store.path().to_path_buf();
|
||||
let new_session = self
|
||||
.spawn_and_register_role(
|
||||
&sessions.run_id,
|
||||
&run_path,
|
||||
&role_config,
|
||||
&mut sessions.store,
|
||||
&mut dummy,
|
||||
)
|
||||
.await?;
|
||||
|
||||
sessions.verifiers[idx] = new_session;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn emit_verification_summary(&self, summary: &AggregatedVerifierVerdict) {
|
||||
@@ -691,9 +686,7 @@ impl InftyOrchestrator {
|
||||
}
|
||||
}
|
||||
|
||||
async fn cleanup_failed_resume(&self, sessions: Vec<SessionCleanup>) {
|
||||
self.shutdown_sessions(sessions).await;
|
||||
}
|
||||
// resumable runs are disabled; cleanup_failed_resume removed
|
||||
|
||||
async fn shutdown_sessions(&self, sessions: Vec<SessionCleanup>) {
|
||||
for session in sessions {
|
||||
@@ -786,38 +779,20 @@ impl InftyOrchestrator {
|
||||
Ok(session)
|
||||
}
|
||||
|
||||
async fn resume_and_register_role(
|
||||
&self,
|
||||
run_id: &str,
|
||||
run_path: &Path,
|
||||
role_config: &RoleConfig,
|
||||
store: &mut RunStore,
|
||||
cleanup: &mut Vec<SessionCleanup>,
|
||||
) -> Result<RoleSession> {
|
||||
let metadata = store
|
||||
.role_metadata(&role_config.role)
|
||||
.ok_or_else(|| anyhow!("role {} not found in run metadata", role_config.role))?;
|
||||
let rollout_path = metadata
|
||||
.rollout_path
|
||||
.as_ref()
|
||||
.ok_or_else(|| anyhow!("missing rollout path for role {}", role_config.role))?;
|
||||
// resumable runs are disabled; resume_and_register_role removed
|
||||
}
|
||||
|
||||
let session = session::resume_role(
|
||||
Arc::clone(&self.hub),
|
||||
&self.conversation_manager,
|
||||
run_id,
|
||||
run_path,
|
||||
role_config,
|
||||
rollout_path,
|
||||
prompts::ensure_instructions,
|
||||
)
|
||||
.await?;
|
||||
cleanup.push(SessionCleanup::new(&session));
|
||||
store.update_rollout_path(&session.role, session.rollout_path.clone())?;
|
||||
if let Some(path) = role_config.config_path.clone() {
|
||||
store.set_role_config_path(&session.role, path)?;
|
||||
}
|
||||
Ok(session)
|
||||
impl InftyOrchestrator {
|
||||
/// Test-only helper to run a single verification round against all verifiers,
|
||||
/// applying the replacement policy (replace passes, keep failures).
|
||||
pub async fn verify_round_for_test(
|
||||
&self,
|
||||
sessions: &mut RunSessions,
|
||||
claim_path: &str,
|
||||
options: &RunExecutionOptions,
|
||||
) -> Result<AggregatedVerifierVerdict> {
|
||||
self.collect_verification_summary(sessions, claim_path, None, None, options)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@ Responsibilities:
|
||||
- When you exit a task or take a dependency on external evidence, write JSON notes in `memory/claims/` that link to the supporting artifacts.
|
||||
- Run verification steps (tests, linters, proofs) under the sandbox before claiming completion.
|
||||
- Every deliverable must include the actual solution or proof (not just a literature review) and enough detail for the Verifier to reproduce or scrutinise it.
|
||||
- Your goal is to find new solutions to problems for which humans does not have solution yet. So do not focus on looking over the internet or in the literature and try building your own proofs.
|
||||
|
||||
Available Codex tools mirror standard Codex sessions (e.g. `shell`, `apply_patch`). Assume all filesystem paths are relative to the current run store directory unless stated otherwise.
|
||||
|
||||
@@ -23,7 +24,7 @@ The orchestrator routes your structured messages to the Director or Verifier rol
|
||||
```json
|
||||
{"type":"direction_request","prompt":"<concise question or decision>","claim_path":null,"notes":null,"deliverable_path":null,"summary":null}
|
||||
```
|
||||
- Verification request (send to Verifier):
|
||||
- Verification request (send to Verifier). Do not ask for verification before having the final answer. The Verifier is not made for intermediate verification:
|
||||
```json
|
||||
{"type":"verification_request","prompt":null,"claim_path":"memory/claims/<file>.json","notes":null,"deliverable_path":null,"summary":null}
|
||||
```
|
||||
|
||||
@@ -35,9 +35,10 @@ pub async fn spawn_role(
|
||||
} = role_config;
|
||||
config.cwd = run_path.to_path_buf();
|
||||
ensure_instructions(&role, &mut config);
|
||||
let cfg_for_session = config.clone();
|
||||
let session = manager
|
||||
.new_conversation_with_cross_session(
|
||||
config,
|
||||
cfg_for_session,
|
||||
CrossSessionSpawnParams {
|
||||
hub: Arc::clone(&hub),
|
||||
run_id: Some(run_id.to_string()),
|
||||
@@ -45,34 +46,11 @@ pub async fn spawn_role(
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
Ok(RoleSession::from_new(role, session))
|
||||
// Note: include the final config used to spawn the session
|
||||
Ok(RoleSession::from_new(role, session, config))
|
||||
}
|
||||
|
||||
pub async fn resume_role(
|
||||
hub: Arc<CrossSessionHub>,
|
||||
manager: &ConversationManager,
|
||||
run_id: &str,
|
||||
run_path: &Path,
|
||||
role_config: &RoleConfig,
|
||||
rollout_path: &Path,
|
||||
ensure_instructions: impl FnOnce(&str, &mut Config),
|
||||
) -> Result<RoleSession> {
|
||||
let mut config = role_config.config.clone();
|
||||
config.cwd = run_path.to_path_buf();
|
||||
ensure_instructions(&role_config.role, &mut config);
|
||||
let session = manager
|
||||
.resume_conversation_with_cross_session(
|
||||
config,
|
||||
rollout_path.to_path_buf(),
|
||||
CrossSessionSpawnParams {
|
||||
hub: Arc::clone(&hub),
|
||||
run_id: Some(run_id.to_string()),
|
||||
role: Some(role_config.role.clone()),
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
Ok(RoleSession::from_new(role_config.role.clone(), session))
|
||||
}
|
||||
// resumable runs are disabled for now; resume_role removed
|
||||
|
||||
pub async fn post_turn(
|
||||
hub: &CrossSessionHub,
|
||||
|
||||
@@ -9,8 +9,8 @@ use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_protocol::ConversationId;
|
||||
|
||||
pub(crate) const DEFAULT_DIRECTOR_TIMEOUT: Duration = Duration::from_secs(120);
|
||||
pub(crate) const DEFAULT_VERIFIER_TIMEOUT: Duration = Duration::from_secs(180);
|
||||
pub(crate) const DEFAULT_DIRECTOR_TIMEOUT: Duration = Duration::from_secs(1200);
|
||||
pub(crate) const DEFAULT_VERIFIER_TIMEOUT: Duration = Duration::from_secs(1800);
|
||||
pub(crate) const FINALIZATION_PROMPT: &str = "Create deliverable/: include compiled artifacts or scripts, usage docs, and tests. Write deliverable/summary.txt capturing the final answer, evidence, and follow-up steps. Also provide deliverable/README.md with overview, manifest (paths and sizes), verification steps, and limitations. Remove scratch files. Reply with JSON: {\"type\":\"final_delivery\",\"deliverable_path\":\"deliverable/summary.txt\",\"summary\":\"<answer plus supporting context>\"}.";
|
||||
|
||||
#[derive(Clone)]
|
||||
@@ -48,13 +48,6 @@ pub struct RunParams {
|
||||
pub verifiers: Vec<RoleConfig>,
|
||||
}
|
||||
|
||||
pub struct ResumeParams {
|
||||
pub run_path: PathBuf,
|
||||
pub solver: RoleConfig,
|
||||
pub director: RoleConfig,
|
||||
pub verifiers: Vec<RoleConfig>,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct RunExecutionOptions {
|
||||
pub objective: Option<String>,
|
||||
@@ -85,16 +78,18 @@ pub struct RoleSession {
|
||||
pub conversation: Arc<CodexConversation>,
|
||||
pub session_configured: codex_core::protocol::SessionConfiguredEvent,
|
||||
pub rollout_path: PathBuf,
|
||||
pub config: Config,
|
||||
}
|
||||
|
||||
impl RoleSession {
|
||||
pub(crate) fn from_new(role: String, session: NewConversation) -> Self {
|
||||
pub(crate) fn from_new(role: String, session: NewConversation, config: Config) -> Self {
|
||||
Self {
|
||||
role,
|
||||
conversation_id: session.conversation_id,
|
||||
conversation: session.conversation,
|
||||
session_configured: session.session_configured.clone(),
|
||||
rollout_path: session.session_configured.rollout_path.clone(),
|
||||
config,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,7 +7,6 @@ use codex_core::built_in_model_providers;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_infty::InftyOrchestrator;
|
||||
use codex_infty::ResumeParams;
|
||||
use codex_infty::RoleConfig;
|
||||
use codex_infty::RunExecutionOptions;
|
||||
use codex_infty::RunParams;
|
||||
@@ -108,79 +107,7 @@ async fn orchestrator_routes_between_roles_and_records_store() -> anyhow::Result
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn orchestrator_resumes_existing_run() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let bodies = vec![
|
||||
responses::sse(vec![
|
||||
responses::ev_response_created("solver-resp-1"),
|
||||
responses::ev_assistant_message("solver-msg-1", "Need direction"),
|
||||
responses::ev_completed("solver-resp-1"),
|
||||
]),
|
||||
responses::sse(vec![
|
||||
responses::ev_response_created("director-resp-1"),
|
||||
responses::ev_assistant_message("director-msg-1", "Proceed iteratively"),
|
||||
responses::ev_completed("director-resp-1"),
|
||||
]),
|
||||
responses::sse(vec![
|
||||
responses::ev_response_created("solver-resp-2"),
|
||||
responses::ev_assistant_message("solver-msg-2", "Acknowledged"),
|
||||
responses::ev_completed("solver-resp-2"),
|
||||
]),
|
||||
];
|
||||
for body in bodies {
|
||||
responses::mount_sse_once(&server, body).await;
|
||||
}
|
||||
|
||||
let runs_root = TempDir::new()?;
|
||||
let orchestrator =
|
||||
InftyOrchestrator::with_runs_root(CodexAuth::from_api_key("dummy-key"), runs_root.path());
|
||||
let run_id = "run-resume".to_string();
|
||||
|
||||
let solver_config = build_config(&server).await?;
|
||||
let director_config = build_config(&server).await?;
|
||||
|
||||
let sessions = orchestrator
|
||||
.spawn_run(RunParams {
|
||||
run_id: run_id.clone(),
|
||||
run_root: Some(runs_root.path().join("runs").join(&run_id)),
|
||||
solver: RoleConfig::new("solver", solver_config.clone()),
|
||||
director: RoleConfig::new("director", director_config.clone()),
|
||||
verifiers: Vec::new(),
|
||||
})
|
||||
.await?;
|
||||
|
||||
sessions.solver.conversation.submit(Op::Shutdown).await.ok();
|
||||
sessions
|
||||
.director
|
||||
.conversation
|
||||
.submit(Op::Shutdown)
|
||||
.await
|
||||
.ok();
|
||||
drop(sessions);
|
||||
|
||||
let resume = orchestrator
|
||||
.resume_run(ResumeParams {
|
||||
run_path: runs_root.path().join("runs").join(&run_id),
|
||||
solver: RoleConfig::new("solver", solver_config),
|
||||
director: RoleConfig::new("director", director_config),
|
||||
verifiers: Vec::new(),
|
||||
})
|
||||
.await?;
|
||||
|
||||
assert_eq!(resume.run_id, run_id);
|
||||
assert!(
|
||||
resume
|
||||
.store
|
||||
.role_metadata("solver")
|
||||
.unwrap()
|
||||
.rollout_path
|
||||
.is_some()
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
// resumable runs are disabled; resume test removed
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn execute_new_run_drives_to_completion() -> anyhow::Result<()> {
|
||||
|
||||
Reference in New Issue
Block a user