chore: goal ext skeleton (#23288)

Skeleton of `/goal` in extension
Lot's of follow-ups coming
This commit is contained in:
jif-oai
2026-05-18 13:32:21 +02:00
committed by GitHub
parent da14dd2add
commit a80f07ec4a
9 changed files with 735 additions and 0 deletions

View File

@@ -0,0 +1,6 @@
load("//:defs.bzl", "codex_rust_crate")
codex_rust_crate(
name = "goal",
crate_name = "codex_goal_extension",
)

View File

@@ -0,0 +1,22 @@
[package]
edition.workspace = true
license.workspace = true
name = "codex-goal-extension"
version.workspace = true
[lib]
name = "codex_goal_extension"
path = "src/lib.rs"
test = false
doctest = false
[lints]
workspace = true
[dependencies]
async-trait = { workspace = true }
codex-extension-api = { workspace = true }
codex-protocol = { workspace = true }
codex-tools = { workspace = true }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }

View File

@@ -0,0 +1,73 @@
use codex_protocol::protocol::TokenUsage;
use std::collections::HashMap;
use std::sync::Mutex;
use std::sync::PoisonError;
#[derive(Debug, Default)]
pub(crate) struct GoalAccountingState {
inner: Mutex<GoalAccountingInner>,
}
#[derive(Debug, Default)]
struct GoalAccountingInner {
turns: HashMap<String, GoalTurnAccounting>,
unflushed_token_delta: i64,
}
#[derive(Debug, Default)]
struct GoalTurnAccounting {
token_delta: i64,
stopped: bool,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) struct RecordedTokenDelta {
pub(crate) turn_delta: i64,
pub(crate) thread_unflushed_delta: i64,
}
impl GoalAccountingState {
pub(crate) fn start_turn(&self, turn_id: impl Into<String>) {
let turn_id = turn_id.into();
self.inner().turns.entry(turn_id).or_default().stopped = false;
}
pub(crate) fn record_token_usage(
&self,
turn_id: impl Into<String>,
usage: &TokenUsage,
) -> Option<RecordedTokenDelta> {
let delta = goal_token_delta_for_usage(usage);
if delta <= 0 {
return None;
}
let turn_id = turn_id.into();
let mut inner = self.inner();
let turn = inner.turns.entry(turn_id).or_default();
turn.token_delta = turn.token_delta.saturating_add(delta);
let turn_delta = turn.token_delta;
inner.unflushed_token_delta = inner.unflushed_token_delta.saturating_add(delta);
Some(RecordedTokenDelta {
turn_delta,
thread_unflushed_delta: inner.unflushed_token_delta,
})
}
pub(crate) fn stop_turn(&self, turn_id: &str) {
if let Some(turn) = self.inner().turns.get_mut(turn_id) {
turn.stopped = true;
}
}
fn inner(&self) -> std::sync::MutexGuard<'_, GoalAccountingInner> {
self.inner.lock().unwrap_or_else(PoisonError::into_inner)
}
}
pub(crate) fn goal_token_delta_for_usage(usage: &TokenUsage) -> i64 {
usage
.input_tokens
.saturating_sub(usage.cached_input_tokens)
.saturating_add(usage.output_tokens.max(0))
}

View File

@@ -0,0 +1,278 @@
use std::sync::Arc;
use async_trait::async_trait;
use codex_extension_api::ConfigContributor;
use codex_extension_api::ExtensionData;
use codex_extension_api::ExtensionRegistryBuilder;
use codex_extension_api::ThreadLifecycleContributor;
use codex_extension_api::ThreadStartInput;
use codex_extension_api::TokenUsageContributor;
use codex_extension_api::ToolContributor;
use codex_extension_api::TurnAbortInput;
use codex_extension_api::TurnLifecycleContributor;
use codex_extension_api::TurnStartInput;
use codex_extension_api::TurnStopInput;
use codex_protocol::ThreadId;
use codex_protocol::protocol::ThreadGoal;
use codex_protocol::protocol::TokenUsageInfo;
use codex_protocol::protocol::TurnAbortReason;
use crate::accounting::GoalAccountingState;
use crate::tool::CreateGoalRequest;
use crate::tool::GoalToolExecutor;
#[derive(Clone, Debug)]
pub struct GoalExtensionConfig {
pub enabled: bool,
}
impl GoalExtensionConfig {
fn from_enabled(enabled: bool) -> Self {
Self { enabled }
}
}
#[derive(Clone)]
pub struct GoalExtension<C> {
backend: Arc<dyn GoalToolBackend>,
goals_enabled: Arc<dyn Fn(&C) -> bool + Send + Sync>,
}
impl<C> std::fmt::Debug for GoalExtension<C> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("GoalExtension").finish_non_exhaustive()
}
}
impl<C> GoalExtension<C> {
pub fn new(
backend: Arc<dyn GoalToolBackend>,
goals_enabled: impl Fn(&C) -> bool + Send + Sync + 'static,
) -> Self {
Self {
backend,
goals_enabled: Arc::new(goals_enabled),
}
}
pub fn without_backend(goals_enabled: impl Fn(&C) -> bool + Send + Sync + 'static) -> Self {
Self::new(Arc::new(NoGoalToolBackend), goals_enabled)
}
}
#[async_trait]
pub trait GoalToolBackend: Send + Sync {
async fn get_goal(&self, thread_id: ThreadId) -> Result<Option<ThreadGoal>, String>;
async fn create_goal(
&self,
thread_id: ThreadId,
request: CreateGoalRequest,
) -> Result<ThreadGoal, String>;
async fn complete_goal(&self, thread_id: ThreadId) -> Result<ThreadGoal, String>;
}
#[derive(Clone, Copy, Debug, Default)]
pub struct NoGoalToolBackend;
#[async_trait]
impl GoalToolBackend for NoGoalToolBackend {
async fn get_goal(&self, _thread_id: ThreadId) -> Result<Option<ThreadGoal>, String> {
Err(missing_backend_message())
}
async fn create_goal(
&self,
_thread_id: ThreadId,
_request: CreateGoalRequest,
) -> Result<ThreadGoal, String> {
Err(missing_backend_message())
}
async fn complete_goal(&self, _thread_id: ThreadId) -> Result<ThreadGoal, String> {
Err(missing_backend_message())
}
}
fn missing_backend_message() -> String {
// TODO: replace this fallback with a host-provided goal backend once
// ToolContributor invocations can reach thread-scoped goal persistence and
// the current turn context.
"goal tools are not connected to host goal persistence yet".to_string()
}
impl<C> ThreadLifecycleContributor<C> for GoalExtension<C>
where
C: Send + Sync + 'static,
{
fn on_thread_start(&self, input: ThreadStartInput<'_, C>) {
input
.thread_store
.insert(GoalExtensionConfig::from_enabled((self.goals_enabled)(
input.config,
)));
input
.thread_store
.get_or_init::<GoalAccountingState>(GoalAccountingState::default);
}
}
impl<C> ConfigContributor<C> for GoalExtension<C>
where
C: Send + Sync + 'static,
{
fn on_config_changed(
&self,
_session_store: &ExtensionData,
thread_store: &ExtensionData,
_previous_config: &C,
new_config: &C,
) {
thread_store.insert(GoalExtensionConfig::from_enabled((self.goals_enabled)(
new_config,
)));
}
}
impl<C> TurnLifecycleContributor for GoalExtension<C>
where
C: Send + Sync + 'static,
{
fn on_turn_start(&self, input: TurnStartInput<'_>) {
if !goal_enabled(input.thread_store) {
return;
}
// TODO: TurnStartInput should expose collaboration mode and token usage
// at turn start. Goals need mode to suppress plan-mode accounting and
// the token baseline to account deltas exactly.
accounting_state(input.thread_store).start_turn(input.turn_store.level_id());
}
fn on_turn_stop(&self, input: TurnStopInput<'_>) {
if !goal_enabled(input.thread_store) {
return;
}
// TODO: this should flush wall-clock and any unflushed token usage to
// persisted goal storage, emit ThreadGoalUpdated, and optionally inject
// budget-limit steering through a host event/input capability.
// TODO: the host also needs an idle/next-turn wake capability so an
// active goal can enqueue continuation context after the turn is fully
// cleared, only when there is no pending user or mailbox work.
accounting_state(input.thread_store).stop_turn(input.turn_store.level_id());
}
fn on_turn_abort(&self, input: TurnAbortInput<'_>) {
if !goal_enabled(input.thread_store) {
return;
}
accounting_state(input.thread_store).stop_turn(input.turn_store.level_id());
if input.reason == TurnAbortReason::Interrupted {
// TODO: interrupted turns should pause the active goal via persisted
// goal storage and emit ThreadGoalUpdated with turn_id None.
}
}
}
impl<C> TokenUsageContributor for GoalExtension<C>
where
C: Send + Sync + 'static,
{
fn on_token_usage(
&self,
_session_store: &ExtensionData,
thread_store: &ExtensionData,
turn_store: &ExtensionData,
token_usage: &TokenUsageInfo,
) {
if !goal_enabled(thread_store) {
return;
}
let Some(_recorded) = accounting_state(thread_store)
.record_token_usage(turn_store.level_id(), &token_usage.last_token_usage)
else {
return;
};
// TODO: TokenUsageContributor needs a host goal storage capability so
// this recorded delta can be committed to the active persisted goal.
// It also needs an event/input capability to emit ThreadGoalUpdated and
// inject budget-limit steering when accounting changes goal status.
// TODO: if the storage/event path must await, TokenUsageContributor
// either needs to become async or receive a fire-and-forget host sink.
}
}
// TODO: app-server initiated goal set/clear operations need a contributor or
// backend callback here. They currently happen outside thread/turn/token
// lifecycle, but the goal extension must observe them to account before
// mutation, refresh active-goal accounting, emit objective-update steering, and
// clear runtime state when a goal is removed.
impl<C> ToolContributor for GoalExtension<C>
where
C: Send + Sync + 'static,
{
fn tools(
&self,
_session_store: &ExtensionData,
thread_store: &ExtensionData,
) -> Vec<Arc<dyn codex_extension_api::ToolExecutor<codex_extension_api::ToolCall>>> {
if !goal_enabled(thread_store) {
return Vec::new();
}
let Ok(thread_id) = ThreadId::from_string(thread_store.level_id()) else {
return Vec::new();
};
vec![
Arc::new(GoalToolExecutor::get(thread_id, Arc::clone(&self.backend))),
Arc::new(GoalToolExecutor::create(
thread_id,
Arc::clone(&self.backend),
)),
Arc::new(GoalToolExecutor::update(
thread_id,
Arc::clone(&self.backend),
)),
]
}
}
pub fn install<C>(
registry: &mut ExtensionRegistryBuilder<C>,
goals_enabled: impl Fn(&C) -> bool + Send + Sync + 'static,
) where
C: Send + Sync + 'static,
{
install_with_backend(registry, Arc::new(NoGoalToolBackend), goals_enabled);
}
pub fn install_with_backend<C>(
registry: &mut ExtensionRegistryBuilder<C>,
backend: Arc<dyn GoalToolBackend>,
goals_enabled: impl Fn(&C) -> bool + Send + Sync + 'static,
) where
C: Send + Sync + 'static,
{
let extension = Arc::new(GoalExtension::new(backend, goals_enabled));
registry.thread_lifecycle_contributor(extension.clone());
registry.config_contributor(extension.clone());
registry.turn_lifecycle_contributor(extension.clone());
registry.token_usage_contributor(extension.clone());
registry.tool_contributor(extension);
}
fn goal_enabled(thread_store: &ExtensionData) -> bool {
thread_store
.get::<GoalExtensionConfig>()
.is_some_and(|config| config.enabled)
}
fn accounting_state(thread_store: &ExtensionData) -> Arc<GoalAccountingState> {
thread_store.get_or_init::<GoalAccountingState>(GoalAccountingState::default)
}

View File

@@ -0,0 +1,21 @@
//! Extension crate sketch for the `/goal` feature.
//!
//! This crate is intentionally not wired into the host yet. It contains the
//! goal tool specs, extension registration shape, and the parts of runtime
//! accounting that can be represented with today's extension API.
mod accounting;
mod extension;
mod spec;
mod tool;
pub use extension::GoalExtension;
pub use extension::GoalExtensionConfig;
pub use extension::GoalToolBackend;
pub use extension::NoGoalToolBackend;
pub use extension::install;
pub use extension::install_with_backend;
pub use spec::CREATE_GOAL_TOOL_NAME;
pub use spec::GET_GOAL_TOOL_NAME;
pub use spec::UPDATE_GOAL_TOOL_NAME;
pub use tool::CreateGoalRequest;

View File

@@ -0,0 +1,89 @@
//! Responses API tool definitions for persisted thread goals.
use codex_tools::JsonSchema;
use codex_tools::ResponsesApiTool;
use codex_tools::ToolSpec;
use serde_json::json;
use std::collections::BTreeMap;
pub const GET_GOAL_TOOL_NAME: &str = "get_goal";
pub const CREATE_GOAL_TOOL_NAME: &str = "create_goal";
pub const UPDATE_GOAL_TOOL_NAME: &str = "update_goal";
pub fn create_get_goal_tool() -> ToolSpec {
ToolSpec::Function(ResponsesApiTool {
name: GET_GOAL_TOOL_NAME.to_string(),
description: "Get the current goal for this thread, including status, budgets, token and elapsed-time usage, and remaining token budget."
.to_string(),
strict: false,
defer_loading: None,
parameters: JsonSchema::object(BTreeMap::new(), Some(Vec::new()), Some(false.into())),
output_schema: None,
})
}
pub fn create_create_goal_tool() -> ToolSpec {
let properties = BTreeMap::from([
(
"objective".to_string(),
JsonSchema::string(Some(
"Required. The concrete objective to start pursuing. This starts a new active goal only when no goal is currently defined; if a goal already exists, this tool fails."
.to_string(),
)),
),
(
"token_budget".to_string(),
JsonSchema::integer(Some(
"Optional positive token budget for the new active goal.".to_string(),
)),
),
]);
ToolSpec::Function(ResponsesApiTool {
name: CREATE_GOAL_TOOL_NAME.to_string(),
description: format!(
r#"Create a goal only when explicitly requested by the user or system/developer instructions; do not infer goals from ordinary tasks.
Set token_budget only when an explicit token budget is requested. Fails if a goal exists; use {UPDATE_GOAL_TOOL_NAME} only for status."#
),
strict: false,
defer_loading: None,
parameters: JsonSchema::object(
properties,
/*required*/ Some(vec!["objective".to_string()]),
Some(false.into()),
),
output_schema: None,
})
}
pub fn create_update_goal_tool() -> ToolSpec {
let properties = BTreeMap::from([(
"status".to_string(),
JsonSchema::string_enum(
vec![json!("complete")],
Some(
"Required. Set to complete only when the objective is achieved and no required work remains."
.to_string(),
),
),
)]);
ToolSpec::Function(ResponsesApiTool {
name: UPDATE_GOAL_TOOL_NAME.to_string(),
description: r#"Update the existing goal.
Use this tool only to mark the goal achieved.
Set status to `complete` only when the objective has actually been achieved and no required work remains.
Do not mark a goal complete merely because its budget is nearly exhausted or because you are stopping work.
You cannot use this tool to pause, resume, or budget-limit a goal; those status changes are controlled by the user or system.
When marking a budgeted goal achieved with status `complete`, report the final token usage from the tool result to the user."#
.to_string(),
strict: false,
defer_loading: None,
parameters: JsonSchema::object(
properties,
/*required*/ Some(vec!["status".to_string()]),
Some(false.into()),
),
output_schema: None,
})
}

View File

@@ -0,0 +1,231 @@
use std::sync::Arc;
use async_trait::async_trait;
use codex_extension_api::FunctionCallError;
use codex_extension_api::JsonToolOutput;
use codex_extension_api::ToolCall;
use codex_extension_api::ToolExecutor;
use codex_extension_api::ToolName;
use codex_extension_api::ToolOutput;
use codex_extension_api::ToolSpec;
use codex_protocol::ThreadId;
use codex_protocol::protocol::ThreadGoal;
use codex_protocol::protocol::ThreadGoalStatus;
use codex_protocol::protocol::validate_thread_goal_objective;
use serde::Deserialize;
use serde::Serialize;
use crate::extension::GoalToolBackend;
use crate::spec::CREATE_GOAL_TOOL_NAME;
use crate::spec::GET_GOAL_TOOL_NAME;
use crate::spec::UPDATE_GOAL_TOOL_NAME;
use crate::spec::create_create_goal_tool;
use crate::spec::create_get_goal_tool;
use crate::spec::create_update_goal_tool;
#[derive(Clone)]
pub(crate) struct GoalToolExecutor {
kind: GoalToolKind,
thread_id: ThreadId,
backend: Arc<dyn GoalToolBackend>,
}
#[derive(Clone, Copy)]
enum GoalToolKind {
Get,
Create,
Update,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "snake_case")]
pub struct CreateGoalRequest {
pub objective: String,
pub token_budget: Option<i64>,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "snake_case")]
struct UpdateGoalArgs {
status: ThreadGoalStatus,
}
#[derive(Debug, PartialEq, Serialize)]
#[serde(rename_all = "camelCase")]
struct GoalToolResponse {
goal: Option<ThreadGoal>,
remaining_tokens: Option<i64>,
completion_budget_report: Option<String>,
}
#[derive(Clone, Copy)]
enum CompletionBudgetReport {
Include,
Omit,
}
impl GoalToolExecutor {
pub(crate) fn get(thread_id: ThreadId, backend: Arc<dyn GoalToolBackend>) -> Self {
Self {
kind: GoalToolKind::Get,
thread_id,
backend,
}
}
pub(crate) fn create(thread_id: ThreadId, backend: Arc<dyn GoalToolBackend>) -> Self {
Self {
kind: GoalToolKind::Create,
thread_id,
backend,
}
}
pub(crate) fn update(thread_id: ThreadId, backend: Arc<dyn GoalToolBackend>) -> Self {
Self {
kind: GoalToolKind::Update,
thread_id,
backend,
}
}
}
#[async_trait]
impl ToolExecutor<ToolCall> for GoalToolExecutor {
fn tool_name(&self) -> ToolName {
ToolName::plain(match self.kind {
GoalToolKind::Get => GET_GOAL_TOOL_NAME,
GoalToolKind::Create => CREATE_GOAL_TOOL_NAME,
GoalToolKind::Update => UPDATE_GOAL_TOOL_NAME,
})
}
fn spec(&self) -> Option<ToolSpec> {
Some(match self.kind {
GoalToolKind::Get => create_get_goal_tool(),
GoalToolKind::Create => create_create_goal_tool(),
GoalToolKind::Update => create_update_goal_tool(),
})
}
async fn handle(&self, invocation: ToolCall) -> Result<Box<dyn ToolOutput>, FunctionCallError> {
match self.kind {
GoalToolKind::Get => self.handle_get(invocation).await,
GoalToolKind::Create => self.handle_create(invocation).await,
GoalToolKind::Update => self.handle_update(invocation).await,
}
}
}
impl GoalToolExecutor {
async fn handle_get(
&self,
invocation: ToolCall,
) -> Result<Box<dyn ToolOutput>, FunctionCallError> {
let _ = invocation.function_arguments()?;
let goal = self
.backend
.get_goal(self.thread_id)
.await
.map_err(FunctionCallError::RespondToModel)?;
goal_response(goal, CompletionBudgetReport::Omit)
}
async fn handle_create(
&self,
invocation: ToolCall,
) -> Result<Box<dyn ToolOutput>, FunctionCallError> {
let mut request: CreateGoalRequest = parse_arguments(invocation.function_arguments()?)?;
request.objective = request.objective.trim().to_string();
validate_thread_goal_objective(&request.objective)
.map_err(FunctionCallError::RespondToModel)?;
validate_goal_budget(request.token_budget).map_err(FunctionCallError::RespondToModel)?;
let goal = self
.backend
.create_goal(self.thread_id, request)
.await
.map_err(FunctionCallError::RespondToModel)?;
goal_response(Some(goal), CompletionBudgetReport::Omit)
}
async fn handle_update(
&self,
invocation: ToolCall,
) -> Result<Box<dyn ToolOutput>, FunctionCallError> {
let args: UpdateGoalArgs = parse_arguments(invocation.function_arguments()?)?;
if args.status != ThreadGoalStatus::Complete {
return Err(FunctionCallError::RespondToModel(
"update_goal can only mark the existing goal complete; pause, resume, and budget-limited status changes are controlled by the user or system"
.to_string(),
));
}
// TODO: update_goal needs a host callback before completion to flush
// final active-turn accounting with budget steering suppressed.
let goal = self
.backend
.complete_goal(self.thread_id)
.await
.map_err(FunctionCallError::RespondToModel)?;
goal_response(Some(goal), CompletionBudgetReport::Include)
}
}
fn parse_arguments<T>(arguments: &str) -> Result<T, FunctionCallError>
where
T: for<'de> Deserialize<'de>,
{
serde_json::from_str(arguments)
.map_err(|err| FunctionCallError::RespondToModel(err.to_string()))
}
fn validate_goal_budget(value: Option<i64>) -> Result<(), String> {
if let Some(value) = value
&& value <= 0
{
return Err("goal budgets must be positive when provided".to_string());
}
Ok(())
}
fn goal_response(
goal: Option<ThreadGoal>,
completion_budget_report: CompletionBudgetReport,
) -> Result<Box<dyn ToolOutput>, FunctionCallError> {
let value = serde_json::to_value(GoalToolResponse::new(goal, completion_budget_report))
.map_err(|err| FunctionCallError::Fatal(err.to_string()))?;
Ok(Box::new(JsonToolOutput::new(value)))
}
impl GoalToolResponse {
fn new(goal: Option<ThreadGoal>, report_mode: CompletionBudgetReport) -> Self {
let remaining_tokens = goal.as_ref().and_then(|goal| {
goal.token_budget
.map(|budget| (budget - goal.tokens_used).max(0))
});
let completion_budget_report = match report_mode {
CompletionBudgetReport::Include => goal
.as_ref()
.filter(|goal| goal.status == ThreadGoalStatus::Complete)
.and_then(completion_budget_report),
CompletionBudgetReport::Omit => None,
};
Self {
goal,
remaining_tokens,
completion_budget_report,
}
}
}
fn completion_budget_report(goal: &ThreadGoal) -> Option<String> {
if goal.token_budget.is_none() && goal.time_used_seconds <= 0 {
None
} else {
Some(
"Goal achieved. Report final usage from this tool result's structured goal fields. If `goal.tokenBudget` is present, include token usage from `goal.tokensUsed` and `goal.tokenBudget`. If `goal.timeUsedSeconds` is greater than 0, summarize elapsed time in a concise, human-friendly form appropriate to the response language."
.to_string(),
)
}
}