mirror of
https://github.com/openai/codex.git
synced 2026-06-01 19:02:59 +00:00
Add goal model tools (3 / 5) (#18075)
Adds the model-facing goal tools on top of the app-server API from PR 2. ## Why Once goals are persisted and exposed to clients, the model needs a small, constrained tool surface for goal workflows. The tool contract should let the model inspect goals, create them only when explicitly requested, and mark them complete without giving it broad control over user/runtime-owned state. ## What changed - Added `get_goal`, `create_goal`, and `update_goal` tool specs behind the `goals` feature flag. - Added core goal tool handlers that validate objectives and token budgets before mutating persisted state. - Constrained `create_goal` to create only when no goal exists, with optional `token_budget` only when a budget is explicitly provided. - Tightened the `create_goal` instructions so the model does not infer goals from ordinary task requests. - Constrained `update_goal` to expose only goal completion; pause, resume, clear, and budget-limited transitions remain user- or runtime-controlled. - Registered the goal tools in the tool registry and kept them out of review contexts where they should not appear. ## Verification - Added tool-registry coverage for feature gating and tool availability. - Added core session tests for create/get/update behavior, duplicate goal rejection, budget validation, and completion-only updates.
This commit is contained in:
259
codex-rs/core/src/goals.rs
Normal file
259
codex-rs/core/src/goals.rs
Normal file
@@ -0,0 +1,259 @@
|
||||
//! Core support for persisted thread goals.
|
||||
//!
|
||||
//! This module bridges core sessions and the state-db goal table. It validates
|
||||
//! goal mutations, converts between state and protocol shapes, emits goal-update
|
||||
//! events, and owns helper hooks used by goal lifecycle behavior.
|
||||
|
||||
use crate::StateDbHandle;
|
||||
use crate::session::session::Session;
|
||||
use crate::session::turn_context::TurnContext;
|
||||
use anyhow::Context;
|
||||
use codex_features::Feature;
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
use codex_protocol::protocol::ThreadGoal;
|
||||
use codex_protocol::protocol::ThreadGoalStatus;
|
||||
use codex_protocol::protocol::ThreadGoalUpdatedEvent;
|
||||
use codex_rollout::state_db::reconcile_rollout;
|
||||
use codex_thread_store::LocalThreadStore;
|
||||
|
||||
pub(crate) struct SetGoalRequest {
|
||||
pub(crate) objective: Option<String>,
|
||||
pub(crate) status: Option<ThreadGoalStatus>,
|
||||
pub(crate) token_budget: Option<Option<i64>>,
|
||||
}
|
||||
|
||||
pub(crate) struct CreateGoalRequest {
|
||||
pub(crate) objective: String,
|
||||
pub(crate) token_budget: Option<i64>,
|
||||
}
|
||||
|
||||
impl Session {
|
||||
pub(crate) async fn get_thread_goal(&self) -> anyhow::Result<Option<ThreadGoal>> {
|
||||
if !self.enabled(Feature::Goals) {
|
||||
anyhow::bail!("goals feature is disabled");
|
||||
}
|
||||
|
||||
let state_db = self.state_db_for_thread_goals().await?;
|
||||
state_db
|
||||
.get_thread_goal(self.conversation_id)
|
||||
.await
|
||||
.map(|goal| goal.map(protocol_goal_from_state))
|
||||
}
|
||||
|
||||
pub(crate) async fn set_thread_goal(
|
||||
&self,
|
||||
turn_context: &TurnContext,
|
||||
request: SetGoalRequest,
|
||||
) -> anyhow::Result<ThreadGoal> {
|
||||
if !self.enabled(Feature::Goals) {
|
||||
anyhow::bail!("goals feature is disabled");
|
||||
}
|
||||
|
||||
validate_goal_budget(request.token_budget.flatten())?;
|
||||
let state_db = self.state_db_for_thread_goals().await?;
|
||||
let goal = if let Some(objective) = request.objective {
|
||||
let objective = objective.trim();
|
||||
if objective.is_empty() {
|
||||
anyhow::bail!("goal objective must not be empty");
|
||||
}
|
||||
state_db
|
||||
.replace_thread_goal(
|
||||
self.conversation_id,
|
||||
objective,
|
||||
request
|
||||
.status
|
||||
.map(state_goal_status_from_protocol)
|
||||
.unwrap_or(codex_state::ThreadGoalStatus::Active),
|
||||
request.token_budget.flatten(),
|
||||
)
|
||||
.await?
|
||||
} else {
|
||||
let status = request.status.map(state_goal_status_from_protocol);
|
||||
state_db
|
||||
.update_thread_goal(
|
||||
self.conversation_id,
|
||||
codex_state::ThreadGoalUpdate {
|
||||
status,
|
||||
token_budget: request.token_budget,
|
||||
expected_goal_id: None,
|
||||
},
|
||||
)
|
||||
.await?
|
||||
.ok_or_else(|| {
|
||||
anyhow::anyhow!(
|
||||
"cannot update goal for thread {}: no goal exists",
|
||||
self.conversation_id
|
||||
)
|
||||
})?
|
||||
};
|
||||
|
||||
let goal = protocol_goal_from_state(goal);
|
||||
self.send_event(
|
||||
turn_context,
|
||||
EventMsg::ThreadGoalUpdated(ThreadGoalUpdatedEvent {
|
||||
thread_id: self.conversation_id,
|
||||
turn_id: Some(turn_context.sub_id.clone()),
|
||||
goal: goal.clone(),
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
Ok(goal)
|
||||
}
|
||||
|
||||
pub(crate) async fn create_thread_goal(
|
||||
&self,
|
||||
turn_context: &TurnContext,
|
||||
request: CreateGoalRequest,
|
||||
) -> anyhow::Result<ThreadGoal> {
|
||||
if !self.enabled(Feature::Goals) {
|
||||
anyhow::bail!("goals feature is disabled");
|
||||
}
|
||||
|
||||
let CreateGoalRequest {
|
||||
objective,
|
||||
token_budget,
|
||||
} = request;
|
||||
validate_goal_budget(token_budget)?;
|
||||
let objective = objective.trim();
|
||||
if objective.is_empty() {
|
||||
anyhow::bail!("goal objective must not be empty");
|
||||
}
|
||||
|
||||
let state_db = self.state_db_for_thread_goals().await?;
|
||||
let goal = state_db
|
||||
.insert_thread_goal(
|
||||
self.conversation_id,
|
||||
objective,
|
||||
codex_state::ThreadGoalStatus::Active,
|
||||
token_budget,
|
||||
)
|
||||
.await?
|
||||
.ok_or_else(|| {
|
||||
anyhow::anyhow!(
|
||||
"cannot create a new goal because thread {} already has a goal",
|
||||
self.conversation_id
|
||||
)
|
||||
})?;
|
||||
|
||||
let goal = protocol_goal_from_state(goal);
|
||||
self.send_event(
|
||||
turn_context,
|
||||
EventMsg::ThreadGoalUpdated(ThreadGoalUpdatedEvent {
|
||||
thread_id: self.conversation_id,
|
||||
turn_id: Some(turn_context.sub_id.clone()),
|
||||
goal: goal.clone(),
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
Ok(goal)
|
||||
}
|
||||
}
|
||||
|
||||
impl Session {
|
||||
async fn state_db_for_thread_goals(&self) -> anyhow::Result<StateDbHandle> {
|
||||
let config = self.get_config().await;
|
||||
if config.ephemeral {
|
||||
anyhow::bail!("thread goals require a persisted thread; this thread is ephemeral");
|
||||
}
|
||||
|
||||
self.try_ensure_rollout_materialized()
|
||||
.await
|
||||
.context("failed to materialize rollout before opening state db for thread goals")?;
|
||||
|
||||
let state_db = if let Some(state_db) = self.state_db() {
|
||||
state_db
|
||||
} else if let Some(local_store) = self
|
||||
.services
|
||||
.thread_store
|
||||
.as_any()
|
||||
.downcast_ref::<LocalThreadStore>()
|
||||
{
|
||||
local_store.state_db().await.ok_or_else(|| {
|
||||
anyhow::anyhow!(
|
||||
"thread goals require a local persisted thread with a state database"
|
||||
)
|
||||
})?
|
||||
} else {
|
||||
anyhow::bail!("thread goals require a local persisted thread with a state database");
|
||||
};
|
||||
|
||||
let thread_metadata_present = state_db
|
||||
.get_thread(self.conversation_id)
|
||||
.await
|
||||
.context("failed to read thread metadata before reconciling thread goals")?
|
||||
.is_some();
|
||||
if !thread_metadata_present {
|
||||
let rollout_path = self
|
||||
.current_rollout_path()
|
||||
.await
|
||||
.context("failed to locate rollout before reconciling thread goals")?
|
||||
.ok_or_else(|| {
|
||||
anyhow::anyhow!("thread goals require materialized thread metadata")
|
||||
})?;
|
||||
reconcile_rollout(
|
||||
Some(&state_db),
|
||||
rollout_path.as_path(),
|
||||
config.model_provider_id.as_str(),
|
||||
/*builder*/ None,
|
||||
&[],
|
||||
/*archived_only*/ None,
|
||||
/*new_thread_memory_mode*/ None,
|
||||
)
|
||||
.await;
|
||||
let thread_metadata_present = state_db
|
||||
.get_thread(self.conversation_id)
|
||||
.await
|
||||
.context("failed to read thread metadata after reconciling thread goals")?
|
||||
.is_some();
|
||||
if !thread_metadata_present {
|
||||
anyhow::bail!("thread metadata is unavailable after reconciling thread goals");
|
||||
}
|
||||
}
|
||||
|
||||
Ok(state_db)
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn protocol_goal_from_state(goal: codex_state::ThreadGoal) -> ThreadGoal {
|
||||
ThreadGoal {
|
||||
thread_id: goal.thread_id,
|
||||
objective: goal.objective,
|
||||
status: protocol_goal_status_from_state(goal.status),
|
||||
token_budget: goal.token_budget,
|
||||
tokens_used: goal.tokens_used,
|
||||
time_used_seconds: goal.time_used_seconds,
|
||||
created_at: goal.created_at.timestamp(),
|
||||
updated_at: goal.updated_at.timestamp(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn protocol_goal_status_from_state(
|
||||
status: codex_state::ThreadGoalStatus,
|
||||
) -> ThreadGoalStatus {
|
||||
match status {
|
||||
codex_state::ThreadGoalStatus::Active => ThreadGoalStatus::Active,
|
||||
codex_state::ThreadGoalStatus::Paused => ThreadGoalStatus::Paused,
|
||||
codex_state::ThreadGoalStatus::BudgetLimited => ThreadGoalStatus::BudgetLimited,
|
||||
codex_state::ThreadGoalStatus::Complete => ThreadGoalStatus::Complete,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn state_goal_status_from_protocol(
|
||||
status: ThreadGoalStatus,
|
||||
) -> codex_state::ThreadGoalStatus {
|
||||
match status {
|
||||
ThreadGoalStatus::Active => codex_state::ThreadGoalStatus::Active,
|
||||
ThreadGoalStatus::Paused => codex_state::ThreadGoalStatus::Paused,
|
||||
ThreadGoalStatus::BudgetLimited => codex_state::ThreadGoalStatus::BudgetLimited,
|
||||
ThreadGoalStatus::Complete => codex_state::ThreadGoalStatus::Complete,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn validate_goal_budget(value: Option<i64>) -> anyhow::Result<()> {
|
||||
if let Some(value) = value
|
||||
&& value <= 0
|
||||
{
|
||||
anyhow::bail!("goal budgets must be positive when provided");
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -37,6 +37,7 @@ pub mod file_watcher;
|
||||
mod flags;
|
||||
#[cfg(test)]
|
||||
mod git_info_tests;
|
||||
mod goals;
|
||||
mod guardian;
|
||||
mod hook_runtime;
|
||||
mod installation_id;
|
||||
|
||||
@@ -24,6 +24,7 @@ pub(super) async fn spawn_review_thread(
|
||||
let _ = review_features.disable(Feature::WebSearchRequest);
|
||||
let _ = review_features.disable(Feature::WebSearchCached);
|
||||
let review_web_search_mode = WebSearchMode::Disabled;
|
||||
let goal_tools_supported = !config.ephemeral && parent_turn_context.tools_config.goal_tools;
|
||||
let tools_config = ToolsConfig::new(&ToolsConfigParams {
|
||||
model_info: &review_model_info,
|
||||
available_models: &sess
|
||||
@@ -51,6 +52,7 @@ pub(super) async fn spawn_review_thread(
|
||||
.with_spawn_agent_usage_hint(config.multi_agent_v2.usage_hint_enabled)
|
||||
.with_spawn_agent_usage_hint_text(config.multi_agent_v2.usage_hint_text.clone())
|
||||
.with_hide_spawn_agent_metadata(config.multi_agent_v2.hide_spawn_agent_metadata)
|
||||
.with_goal_tools_allowed(goal_tools_supported)
|
||||
.with_max_concurrent_threads_per_session(config.agent_max_threads)
|
||||
.with_agent_type_description(crate::agent::role::spawn_tool_spec::build(
|
||||
&config.agent_roles,
|
||||
|
||||
@@ -57,6 +57,7 @@ use crate::tasks::execute_user_shell_command;
|
||||
use crate::tools::ToolRouter;
|
||||
use crate::tools::context::ToolInvocation;
|
||||
use crate::tools::context::ToolPayload;
|
||||
use crate::tools::handlers::GoalHandler;
|
||||
use crate::tools::handlers::ShellHandler;
|
||||
use crate::tools::handlers::UnifiedExecHandler;
|
||||
use crate::tools::registry::ToolHandler;
|
||||
@@ -101,6 +102,7 @@ use codex_protocol::protocol::ResumedHistory;
|
||||
use codex_protocol::protocol::RolloutItem;
|
||||
use codex_protocol::protocol::SkillScope;
|
||||
use codex_protocol::protocol::Submission;
|
||||
use codex_protocol::protocol::ThreadGoalStatus;
|
||||
use codex_protocol::protocol::ThreadRolledBackEvent;
|
||||
use codex_protocol::protocol::TokenCountEvent;
|
||||
use codex_protocol::protocol::TokenUsage;
|
||||
@@ -3348,6 +3350,7 @@ pub(crate) async fn make_session_and_context() -> (Session, TurnContext) {
|
||||
session_configuration.cwd.clone(),
|
||||
"turn_id".to_string(),
|
||||
skills_outcome,
|
||||
/*goal_tools_supported*/ true,
|
||||
);
|
||||
|
||||
let (mailbox, mailbox_rx) = crate::agent::Mailbox::new();
|
||||
@@ -4703,6 +4706,7 @@ pub(crate) async fn make_session_and_context_with_dynamic_tools_and_rx(
|
||||
session_configuration.cwd.clone(),
|
||||
"turn_id".to_string(),
|
||||
skills_outcome,
|
||||
/*goal_tools_supported*/ true,
|
||||
));
|
||||
|
||||
let (mailbox, mailbox_rx) = crate::agent::Mailbox::new();
|
||||
@@ -6852,6 +6856,222 @@ async fn sample_rollout(
|
||||
)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn create_goal_tool_rejects_existing_goal() {
|
||||
let (mut session, turn_context) = make_session_and_context().await;
|
||||
let _ = session.features.enable(Feature::Goals);
|
||||
let session = Arc::new(session);
|
||||
upsert_goal_tool_test_thread(session.as_ref()).await;
|
||||
let turn_context = Arc::new(turn_context);
|
||||
let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
|
||||
let handler = GoalHandler;
|
||||
|
||||
handler
|
||||
.handle(ToolInvocation {
|
||||
session: Arc::clone(&session),
|
||||
turn: Arc::clone(&turn_context),
|
||||
cancellation_token: CancellationToken::new(),
|
||||
tracker: Arc::clone(&tracker),
|
||||
call_id: "create-goal-1".to_string(),
|
||||
tool_name: codex_tools::ToolName::plain("create_goal"),
|
||||
source: ToolCallSource::Direct,
|
||||
payload: ToolPayload::Function {
|
||||
arguments: serde_json::json!({
|
||||
"objective": "Keep the watcher alive",
|
||||
"token_budget": 123,
|
||||
})
|
||||
.to_string(),
|
||||
},
|
||||
})
|
||||
.await
|
||||
.expect("initial create_goal should succeed");
|
||||
|
||||
let response = handler
|
||||
.handle(ToolInvocation {
|
||||
session: Arc::clone(&session),
|
||||
turn: Arc::clone(&turn_context),
|
||||
cancellation_token: CancellationToken::new(),
|
||||
tracker,
|
||||
call_id: "create-goal-2".to_string(),
|
||||
tool_name: codex_tools::ToolName::plain("create_goal"),
|
||||
source: ToolCallSource::Direct,
|
||||
payload: ToolPayload::Function {
|
||||
arguments: serde_json::json!({
|
||||
"objective": "Replace the watcher",
|
||||
"token_budget": 456,
|
||||
})
|
||||
.to_string(),
|
||||
},
|
||||
})
|
||||
.await;
|
||||
|
||||
let Err(FunctionCallError::RespondToModel(output)) = response else {
|
||||
panic!("expected create_goal to reject an existing goal");
|
||||
};
|
||||
assert_eq!(
|
||||
output,
|
||||
"cannot create a new goal because this thread already has a goal; use update_goal only when the existing goal is complete"
|
||||
);
|
||||
|
||||
let goal = session
|
||||
.get_thread_goal()
|
||||
.await
|
||||
.expect("read thread goal")
|
||||
.expect("goal should still exist");
|
||||
assert_eq!(goal.objective, "Keep the watcher alive");
|
||||
assert_eq!(goal.token_budget, Some(123));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn update_goal_tool_rejects_pausing_goal() {
|
||||
let (mut session, turn_context) = make_session_and_context().await;
|
||||
let _ = session.features.enable(Feature::Goals);
|
||||
let session = Arc::new(session);
|
||||
upsert_goal_tool_test_thread(session.as_ref()).await;
|
||||
let turn_context = Arc::new(turn_context);
|
||||
let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
|
||||
let handler = GoalHandler;
|
||||
|
||||
handler
|
||||
.handle(ToolInvocation {
|
||||
session: Arc::clone(&session),
|
||||
turn: Arc::clone(&turn_context),
|
||||
cancellation_token: CancellationToken::new(),
|
||||
tracker: Arc::clone(&tracker),
|
||||
call_id: "create-goal".to_string(),
|
||||
tool_name: codex_tools::ToolName::plain("create_goal"),
|
||||
source: ToolCallSource::Direct,
|
||||
payload: ToolPayload::Function {
|
||||
arguments: serde_json::json!({
|
||||
"objective": "Keep the watcher alive",
|
||||
"token_budget": 123,
|
||||
})
|
||||
.to_string(),
|
||||
},
|
||||
})
|
||||
.await
|
||||
.expect("initial create_goal should succeed");
|
||||
|
||||
let response = handler
|
||||
.handle(ToolInvocation {
|
||||
session: Arc::clone(&session),
|
||||
turn: Arc::clone(&turn_context),
|
||||
cancellation_token: CancellationToken::new(),
|
||||
tracker,
|
||||
call_id: "pause-goal".to_string(),
|
||||
tool_name: codex_tools::ToolName::plain("update_goal"),
|
||||
source: ToolCallSource::Direct,
|
||||
payload: ToolPayload::Function {
|
||||
arguments: serde_json::json!({
|
||||
"status": "paused",
|
||||
})
|
||||
.to_string(),
|
||||
},
|
||||
})
|
||||
.await;
|
||||
|
||||
let Err(FunctionCallError::RespondToModel(output)) = response else {
|
||||
panic!("expected update_goal to reject pausing a goal");
|
||||
};
|
||||
assert_eq!(
|
||||
output,
|
||||
"update_goal can only mark the existing goal complete; pause, resume, and budget-limited status changes are controlled by the user or system"
|
||||
);
|
||||
|
||||
let goal = session
|
||||
.get_thread_goal()
|
||||
.await
|
||||
.expect("read thread goal")
|
||||
.expect("goal should still exist");
|
||||
assert_eq!(goal.status, ThreadGoalStatus::Active);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn update_goal_tool_marks_goal_complete() {
|
||||
let (mut session, turn_context) = make_session_and_context().await;
|
||||
let _ = session.features.enable(Feature::Goals);
|
||||
let session = Arc::new(session);
|
||||
upsert_goal_tool_test_thread(session.as_ref()).await;
|
||||
let turn_context = Arc::new(turn_context);
|
||||
let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
|
||||
let handler = GoalHandler;
|
||||
|
||||
handler
|
||||
.handle(ToolInvocation {
|
||||
session: Arc::clone(&session),
|
||||
turn: Arc::clone(&turn_context),
|
||||
cancellation_token: CancellationToken::new(),
|
||||
tracker: Arc::clone(&tracker),
|
||||
call_id: "create-goal".to_string(),
|
||||
tool_name: codex_tools::ToolName::plain("create_goal"),
|
||||
source: ToolCallSource::Direct,
|
||||
payload: ToolPayload::Function {
|
||||
arguments: serde_json::json!({
|
||||
"objective": "Keep the watcher alive",
|
||||
"token_budget": 123,
|
||||
})
|
||||
.to_string(),
|
||||
},
|
||||
})
|
||||
.await
|
||||
.expect("initial create_goal should succeed");
|
||||
|
||||
handler
|
||||
.handle(ToolInvocation {
|
||||
session: Arc::clone(&session),
|
||||
turn: Arc::clone(&turn_context),
|
||||
cancellation_token: CancellationToken::new(),
|
||||
tracker,
|
||||
call_id: "complete-goal".to_string(),
|
||||
tool_name: codex_tools::ToolName::plain("update_goal"),
|
||||
source: ToolCallSource::Direct,
|
||||
payload: ToolPayload::Function {
|
||||
arguments: serde_json::json!({
|
||||
"status": "complete",
|
||||
})
|
||||
.to_string(),
|
||||
},
|
||||
})
|
||||
.await
|
||||
.expect("update_goal should mark the goal complete");
|
||||
|
||||
let goal = session
|
||||
.get_thread_goal()
|
||||
.await
|
||||
.expect("read thread goal")
|
||||
.expect("goal should still exist");
|
||||
assert_eq!(goal.status, ThreadGoalStatus::Complete);
|
||||
}
|
||||
|
||||
async fn upsert_goal_tool_test_thread(session: &Session) {
|
||||
let config = session.get_config().await;
|
||||
let state_db = codex_state::StateRuntime::init(
|
||||
config.sqlite_home.clone(),
|
||||
config.model_provider_id.clone(),
|
||||
)
|
||||
.await
|
||||
.expect("state db should initialize");
|
||||
let mut builder = codex_state::ThreadMetadataBuilder::new(
|
||||
session.conversation_id,
|
||||
config
|
||||
.codex_home
|
||||
.join("goal-tool-test-rollout.jsonl")
|
||||
.to_path_buf(),
|
||||
chrono::Utc::now(),
|
||||
SessionSource::Exec,
|
||||
);
|
||||
builder.cwd = config.cwd.to_path_buf();
|
||||
builder.model_provider = Some(config.model_provider_id.clone());
|
||||
builder.cli_version = Some(env!("CARGO_PKG_VERSION").to_string());
|
||||
builder.sandbox_policy = config.permissions.sandbox_policy.get().clone();
|
||||
builder.approval_mode = config.permissions.approval_policy.value();
|
||||
let metadata = builder.build(config.model_provider_id.as_str());
|
||||
state_db
|
||||
.upsert_thread(&metadata)
|
||||
.await
|
||||
.expect("thread metadata should be upserted");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn rejects_escalated_permissions_when_policy_not_on_request() {
|
||||
use crate::exec::ExecParams;
|
||||
|
||||
@@ -180,6 +180,7 @@ impl TurnContext {
|
||||
.with_spawn_agent_usage_hint(config.multi_agent_v2.usage_hint_enabled)
|
||||
.with_spawn_agent_usage_hint_text(config.multi_agent_v2.usage_hint_text.clone())
|
||||
.with_hide_spawn_agent_metadata(config.multi_agent_v2.hide_spawn_agent_metadata)
|
||||
.with_goal_tools_allowed(self.tools_config.goal_tools)
|
||||
.with_max_concurrent_threads_per_session(config.agent_max_threads)
|
||||
.with_agent_type_description(crate::agent::role::spawn_tool_spec::build(
|
||||
&config.agent_roles,
|
||||
@@ -405,6 +406,7 @@ impl Session {
|
||||
cwd: AbsolutePathBuf,
|
||||
sub_id: String,
|
||||
skills_outcome: Arc<SkillLoadOutcome>,
|
||||
goal_tools_supported: bool,
|
||||
) -> TurnContext {
|
||||
let reasoning_effort = session_configuration.collaboration_mode.reasoning_effort();
|
||||
let reasoning_summary = session_configuration
|
||||
@@ -441,6 +443,7 @@ impl Session {
|
||||
.with_spawn_agent_usage_hint(per_turn_config.multi_agent_v2.usage_hint_enabled)
|
||||
.with_spawn_agent_usage_hint_text(per_turn_config.multi_agent_v2.usage_hint_text.clone())
|
||||
.with_hide_spawn_agent_metadata(per_turn_config.multi_agent_v2.hide_spawn_agent_metadata)
|
||||
.with_goal_tools_allowed(goal_tools_supported)
|
||||
.with_max_concurrent_threads_per_session(per_turn_config.agent_max_threads)
|
||||
.with_agent_type_description(crate::agent::role::spawn_tool_spec::build(
|
||||
&per_turn_config.agent_roles,
|
||||
@@ -653,6 +656,7 @@ impl Session {
|
||||
.skills_for_config(&skills_input, fs)
|
||||
.await,
|
||||
);
|
||||
let goal_tools_supported = !per_turn_config.ephemeral && self.state_db().is_some();
|
||||
let mut turn_context: TurnContext = Self::make_turn_context(
|
||||
self.conversation_id,
|
||||
Some(Arc::clone(&self.services.auth_manager)),
|
||||
@@ -679,6 +683,7 @@ impl Session {
|
||||
cwd,
|
||||
sub_id,
|
||||
skills_outcome,
|
||||
goal_tools_supported,
|
||||
);
|
||||
turn_context.realtime_active = self.conversation.running_state().await.is_some();
|
||||
|
||||
|
||||
276
codex-rs/core/src/tools/handlers/goal.rs
Normal file
276
codex-rs/core/src/tools/handlers/goal.rs
Normal file
@@ -0,0 +1,276 @@
|
||||
//! Built-in model tool handlers for persisted thread goals.
|
||||
//!
|
||||
//! The public tool contract intentionally splits goal creation from completion:
|
||||
//! `create_goal` starts an active objective, while `update_goal` can only mark
|
||||
//! the existing goal complete.
|
||||
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::goals::CreateGoalRequest;
|
||||
use crate::goals::SetGoalRequest;
|
||||
use crate::session::session::Session;
|
||||
use crate::session::turn_context::TurnContext;
|
||||
use crate::tools::context::FunctionToolOutput;
|
||||
use crate::tools::context::ToolInvocation;
|
||||
use crate::tools::context::ToolPayload;
|
||||
use crate::tools::handlers::parse_arguments;
|
||||
use crate::tools::registry::ToolHandler;
|
||||
use crate::tools::registry::ToolKind;
|
||||
use codex_protocol::protocol::ThreadGoal;
|
||||
use codex_protocol::protocol::ThreadGoalStatus;
|
||||
use codex_tools::CREATE_GOAL_TOOL_NAME;
|
||||
use codex_tools::GET_GOAL_TOOL_NAME;
|
||||
use codex_tools::UPDATE_GOAL_TOOL_NAME;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use std::fmt::Write as _;
|
||||
|
||||
pub struct GoalHandler;
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
struct CreateGoalArgs {
|
||||
objective: String,
|
||||
token_budget: Option<i64>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
struct UpdateGoalArgs {
|
||||
status: ThreadGoalStatus,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct GoalToolResponse {
|
||||
goal: Option<ThreadGoal>,
|
||||
remaining_tokens: Option<i64>,
|
||||
completion_budget_report: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
enum CompletionBudgetReport {
|
||||
Include,
|
||||
Omit,
|
||||
}
|
||||
|
||||
impl GoalToolResponse {
|
||||
fn new(goal: Option<ThreadGoal>, report_mode: CompletionBudgetReport) -> Self {
|
||||
let remaining_tokens = goal.as_ref().and_then(|goal| {
|
||||
goal.token_budget
|
||||
.map(|budget| (budget - goal.tokens_used).max(0))
|
||||
});
|
||||
let completion_budget_report = match report_mode {
|
||||
CompletionBudgetReport::Include => goal
|
||||
.as_ref()
|
||||
.filter(|goal| goal.status == ThreadGoalStatus::Complete)
|
||||
.and_then(completion_budget_report),
|
||||
CompletionBudgetReport::Omit => None,
|
||||
};
|
||||
Self {
|
||||
goal,
|
||||
remaining_tokens,
|
||||
completion_budget_report,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ToolHandler for GoalHandler {
|
||||
type Output = FunctionToolOutput;
|
||||
|
||||
fn kind(&self) -> ToolKind {
|
||||
ToolKind::Function
|
||||
}
|
||||
|
||||
async fn handle(&self, invocation: ToolInvocation) -> Result<Self::Output, FunctionCallError> {
|
||||
let ToolInvocation {
|
||||
session,
|
||||
turn,
|
||||
payload,
|
||||
tool_name,
|
||||
..
|
||||
} = invocation;
|
||||
|
||||
let arguments = match payload {
|
||||
ToolPayload::Function { arguments } => arguments,
|
||||
_ => {
|
||||
return Err(FunctionCallError::RespondToModel(
|
||||
"goal handler received unsupported payload".to_string(),
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
match tool_name.name.as_str() {
|
||||
GET_GOAL_TOOL_NAME => handle_get_goal(session.as_ref()).await,
|
||||
CREATE_GOAL_TOOL_NAME => {
|
||||
handle_create_goal(session.as_ref(), turn.as_ref(), &arguments).await
|
||||
}
|
||||
UPDATE_GOAL_TOOL_NAME => {
|
||||
handle_update_goal(session.as_ref(), turn.as_ref(), &arguments).await
|
||||
}
|
||||
other => Err(FunctionCallError::Fatal(format!(
|
||||
"goal handler received unsupported tool: {other}"
|
||||
))),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_get_goal(session: &Session) -> Result<FunctionToolOutput, FunctionCallError> {
|
||||
let goal = session
|
||||
.get_thread_goal()
|
||||
.await
|
||||
.map_err(|err| FunctionCallError::RespondToModel(format_goal_error(err)))?;
|
||||
goal_response(goal, CompletionBudgetReport::Omit)
|
||||
}
|
||||
|
||||
async fn handle_create_goal(
|
||||
session: &Session,
|
||||
turn_context: &TurnContext,
|
||||
arguments: &str,
|
||||
) -> Result<FunctionToolOutput, FunctionCallError> {
|
||||
let args: CreateGoalArgs = parse_arguments(arguments)?;
|
||||
let goal = session
|
||||
.create_thread_goal(
|
||||
turn_context,
|
||||
CreateGoalRequest {
|
||||
objective: args.objective,
|
||||
token_budget: args.token_budget,
|
||||
},
|
||||
)
|
||||
.await
|
||||
.map_err(|err| {
|
||||
if err
|
||||
.chain()
|
||||
.any(|cause| cause.to_string().contains("already has a goal"))
|
||||
{
|
||||
FunctionCallError::RespondToModel(
|
||||
"cannot create a new goal because this thread already has a goal; use update_goal only when the existing goal is complete"
|
||||
.to_string(),
|
||||
)
|
||||
} else {
|
||||
FunctionCallError::RespondToModel(format_goal_error(err))
|
||||
}
|
||||
})?;
|
||||
goal_response(Some(goal), CompletionBudgetReport::Omit)
|
||||
}
|
||||
|
||||
async fn handle_update_goal(
|
||||
session: &Session,
|
||||
turn_context: &TurnContext,
|
||||
arguments: &str,
|
||||
) -> Result<FunctionToolOutput, FunctionCallError> {
|
||||
let args: UpdateGoalArgs = parse_arguments(arguments)?;
|
||||
if args.status != ThreadGoalStatus::Complete {
|
||||
return Err(FunctionCallError::RespondToModel(
|
||||
"update_goal can only mark the existing goal complete; pause, resume, and budget-limited status changes are controlled by the user or system"
|
||||
.to_string(),
|
||||
));
|
||||
}
|
||||
let goal = session
|
||||
.set_thread_goal(
|
||||
turn_context,
|
||||
SetGoalRequest {
|
||||
objective: None,
|
||||
status: Some(ThreadGoalStatus::Complete),
|
||||
token_budget: None,
|
||||
},
|
||||
)
|
||||
.await
|
||||
.map_err(|err| FunctionCallError::RespondToModel(format_goal_error(err)))?;
|
||||
goal_response(Some(goal), CompletionBudgetReport::Include)
|
||||
}
|
||||
|
||||
fn format_goal_error(err: anyhow::Error) -> String {
|
||||
let mut message = err.to_string();
|
||||
for cause in err.chain().skip(1) {
|
||||
let _ = write!(message, ": {cause}");
|
||||
}
|
||||
message
|
||||
}
|
||||
|
||||
fn goal_response(
|
||||
goal: Option<ThreadGoal>,
|
||||
completion_budget_report: CompletionBudgetReport,
|
||||
) -> Result<FunctionToolOutput, FunctionCallError> {
|
||||
let response =
|
||||
serde_json::to_string_pretty(&GoalToolResponse::new(goal, completion_budget_report))
|
||||
.map_err(|err| FunctionCallError::Fatal(err.to_string()))?;
|
||||
Ok(FunctionToolOutput::from_text(response, Some(true)))
|
||||
}
|
||||
|
||||
fn completion_budget_report(goal: &ThreadGoal) -> Option<String> {
|
||||
let mut parts = Vec::new();
|
||||
if let Some(budget) = goal.token_budget {
|
||||
parts.push(format!("tokens used: {} of {budget}", goal.tokens_used));
|
||||
}
|
||||
if goal.time_used_seconds > 0 {
|
||||
parts.push(format!("time used: {} seconds", goal.time_used_seconds));
|
||||
}
|
||||
if parts.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(format!(
|
||||
"Goal achieved. Report final budget usage to the user: {}.",
|
||||
parts.join("; ")
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use codex_protocol::ThreadId;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
fn completed_budgeted_goal_response_reports_final_usage() {
|
||||
let goal = ThreadGoal {
|
||||
thread_id: ThreadId::new(),
|
||||
objective: "Keep optimizing".to_string(),
|
||||
status: ThreadGoalStatus::Complete,
|
||||
token_budget: Some(10_000),
|
||||
tokens_used: 3_250,
|
||||
time_used_seconds: 75,
|
||||
created_at: 1,
|
||||
updated_at: 2,
|
||||
};
|
||||
|
||||
let response = GoalToolResponse::new(Some(goal.clone()), CompletionBudgetReport::Include);
|
||||
|
||||
assert_eq!(
|
||||
response,
|
||||
GoalToolResponse {
|
||||
goal: Some(goal),
|
||||
remaining_tokens: Some(6_750),
|
||||
completion_budget_report: Some(
|
||||
"Goal achieved. Report final budget usage to the user: tokens used: 3250 of 10000; time used: 75 seconds."
|
||||
.to_string()
|
||||
),
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn completed_unbudgeted_goal_response_omits_budget_report() {
|
||||
let goal = ThreadGoal {
|
||||
thread_id: ThreadId::new(),
|
||||
objective: "Write a poem".to_string(),
|
||||
status: ThreadGoalStatus::Complete,
|
||||
token_budget: None,
|
||||
tokens_used: 120,
|
||||
time_used_seconds: 0,
|
||||
created_at: 1,
|
||||
updated_at: 2,
|
||||
};
|
||||
|
||||
let response = GoalToolResponse::new(Some(goal.clone()), CompletionBudgetReport::Include);
|
||||
|
||||
assert_eq!(
|
||||
response,
|
||||
GoalToolResponse {
|
||||
goal: Some(goal),
|
||||
remaining_tokens: None,
|
||||
completion_budget_report: None,
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
pub(crate) mod agent_jobs;
|
||||
pub(crate) mod apply_patch;
|
||||
mod dynamic;
|
||||
mod goal;
|
||||
mod list_dir;
|
||||
mod mcp;
|
||||
mod mcp_resource;
|
||||
@@ -36,6 +37,7 @@ pub use apply_patch::ApplyPatchHandler;
|
||||
use codex_protocol::models::AdditionalPermissionProfile;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
pub use dynamic::DynamicToolHandler;
|
||||
pub use goal::GoalHandler;
|
||||
pub use list_dir::ListDirHandler;
|
||||
pub use mcp::McpHandler;
|
||||
pub use mcp_resource::McpResourceHandler;
|
||||
|
||||
@@ -80,6 +80,7 @@ pub(crate) fn build_specs_with_discoverable_tools(
|
||||
use crate::tools::handlers::CodeModeExecuteHandler;
|
||||
use crate::tools::handlers::CodeModeWaitHandler;
|
||||
use crate::tools::handlers::DynamicToolHandler;
|
||||
use crate::tools::handlers::GoalHandler;
|
||||
use crate::tools::handlers::ListDirHandler;
|
||||
use crate::tools::handlers::McpHandler;
|
||||
use crate::tools::handlers::McpResourceHandler;
|
||||
@@ -148,6 +149,7 @@ pub(crate) fn build_specs_with_discoverable_tools(
|
||||
let plan_handler = Arc::new(PlanHandler);
|
||||
let apply_patch_handler = Arc::new(ApplyPatchHandler);
|
||||
let dynamic_tool_handler = Arc::new(DynamicToolHandler);
|
||||
let goal_handler = Arc::new(GoalHandler);
|
||||
let view_image_handler = Arc::new(ViewImageHandler);
|
||||
let mcp_handler = Arc::new(McpHandler);
|
||||
let mcp_resource_handler = Arc::new(McpResourceHandler);
|
||||
@@ -208,6 +210,9 @@ pub(crate) fn build_specs_with_discoverable_tools(
|
||||
ToolHandlerKind::FollowupTaskV2 => {
|
||||
builder.register_handler(handler.name, Arc::new(FollowupTaskHandlerV2));
|
||||
}
|
||||
ToolHandlerKind::Goal => {
|
||||
builder.register_handler(handler.name, goal_handler.clone());
|
||||
}
|
||||
ToolHandlerKind::ListAgentsV2 => {
|
||||
builder.register_handler(handler.name, Arc::new(ListAgentsHandlerV2));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user