Gate goal tools by thread eligibility (#24925)

## Why

Goal tools create and update goal state for a persistent thread. The
extension was only checking whether goals were enabled before
advertising those tools, which meant they could be surfaced in contexts
that should not receive thread goal controls: ephemeral threads without
persistent thread state and review subagents.

Those sessions can still run the goal extension lifecycle, but the
thread tools should only be visible when the current thread can safely
use them.

## What changed

- Adds a `GoalRuntimeConfig` that separates goal enablement from whether
goal tools are available for the current thread.
- Computes tool eligibility on thread start from
`persistent_thread_state_available` and `SessionSource`, hiding tools
for review subagents.
- Uses `GoalRuntimeHandle::tools_visible()` when contributing thread
tools so enabled runtime state does not automatically imply tool
exposure.
- Adds backend coverage for hiding goal tools on ephemeral threads and
review subagents.

## Testing

- Added `goal_tools_hidden_for_ephemeral_threads`.
- Added `goal_tools_hidden_for_review_subagents`.
This commit is contained in:
jif-oai
2026-05-28 17:47:17 +02:00
committed by GitHub
parent bd2a732923
commit e426d48f6d
3 changed files with 79 additions and 6 deletions

View File

@@ -22,6 +22,8 @@ use codex_extension_api::TurnStartInput;
use codex_extension_api::TurnStopInput;
use codex_otel::MetricsClient;
use codex_protocol::ThreadId;
use codex_protocol::protocol::SessionSource;
use codex_protocol::protocol::SubAgentSource;
use codex_protocol::protocol::ThreadGoalStatus;
use codex_protocol::protocol::TokenUsageInfo;
@@ -29,6 +31,7 @@ use crate::accounting::BudgetLimitedGoalDisposition;
use crate::accounting::GoalAccountingState;
use crate::events::GoalEventEmitter;
use crate::metrics::GoalMetrics;
use crate::runtime::GoalRuntimeConfig;
use crate::runtime::GoalRuntimeHandle;
use crate::spec::UPDATE_GOAL_TOOL_NAME;
use crate::steering::budget_limit_steering_item;
@@ -85,6 +88,11 @@ where
{
async fn on_thread_start(&self, input: ThreadStartInput<'_, C>) {
let enabled = (self.goals_enabled)(input.config);
let tools_available_for_thread = input.persistent_thread_state_available
&& !matches!(
input.session_source,
SessionSource::SubAgent(SubAgentSource::Review)
);
input
.thread_store
.insert(GoalExtensionConfig::from_enabled(enabled));
@@ -102,7 +110,10 @@ where
self.metrics.clone(),
self.thread_manager.clone(),
accounting_state,
enabled,
GoalRuntimeConfig {
enabled,
tools_available_for_thread,
},
)
});
runtime.set_enabled(enabled);
@@ -330,7 +341,7 @@ where
let Some(runtime) = goal_runtime_handle(thread_store) else {
return Vec::new();
};
if !runtime.is_enabled() {
if !runtime.tools_visible() {
return Vec::new();
}

View File

@@ -20,6 +20,11 @@ pub struct GoalRuntimeHandle {
inner: Arc<GoalRuntimeInner>,
}
pub(crate) struct GoalRuntimeConfig {
pub(crate) enabled: bool,
pub(crate) tools_available_for_thread: bool,
}
struct GoalRuntimeInner {
thread_id: ThreadId,
state_dbs: Arc<codex_state::StateRuntime>,
@@ -28,6 +33,7 @@ struct GoalRuntimeInner {
thread_manager: Weak<ThreadManager>,
accounting_state: Arc<GoalAccountingState>,
enabled: AtomicBool,
tools_available_for_thread: bool,
}
pub(crate) struct AccountedGoalProgress {
@@ -66,7 +72,7 @@ impl GoalRuntimeHandle {
metrics: GoalMetrics,
thread_manager: Weak<ThreadManager>,
accounting_state: Arc<GoalAccountingState>,
enabled: bool,
config: GoalRuntimeConfig,
) -> Self {
Self {
inner: Arc::new(GoalRuntimeInner {
@@ -76,7 +82,8 @@ impl GoalRuntimeHandle {
metrics,
thread_manager,
accounting_state,
enabled: AtomicBool::new(enabled),
enabled: AtomicBool::new(config.enabled),
tools_available_for_thread: config.tools_available_for_thread,
}),
}
}
@@ -89,6 +96,10 @@ impl GoalRuntimeHandle {
self.inner.enabled.load(Ordering::Relaxed)
}
pub(crate) fn tools_visible(&self) -> bool {
self.is_enabled() && self.inner.tools_available_for_thread
}
pub(crate) fn thread_id(&self) -> ThreadId {
self.inner.thread_id
}

View File

@@ -28,6 +28,7 @@ use codex_protocol::config_types::Settings;
use codex_protocol::protocol::Event;
use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::SessionSource;
use codex_protocol::protocol::SubAgentSource;
use codex_protocol::protocol::ThreadGoalStatus;
use codex_protocol::protocol::TokenUsage;
use codex_protocol::protocol::TokenUsageInfo;
@@ -83,6 +84,38 @@ async fn installed_goal_tools_create_goal_and_fill_empty_preview() -> anyhow::Re
Ok(())
}
#[tokio::test]
async fn goal_tools_hidden_for_ephemeral_threads() -> anyhow::Result<()> {
let runtime = test_runtime().await?;
let thread_id = test_thread_id()?;
let tools = installed_tools_with_start(
runtime,
thread_id,
SessionSource::Cli,
/*persistent_thread_state_available*/ false,
)
.await;
assert_eq!(Vec::<String>::new(), tool_names(&tools));
Ok(())
}
#[tokio::test]
async fn goal_tools_hidden_for_review_subagents() -> anyhow::Result<()> {
let runtime = test_runtime().await?;
let thread_id = test_thread_id()?;
let tools = installed_tools_with_start(
runtime,
thread_id,
SessionSource::SubAgent(SubAgentSource::Review),
/*persistent_thread_state_available*/ true,
)
.await;
assert_eq!(Vec::<String>::new(), tool_names(&tools));
Ok(())
}
#[tokio::test]
async fn installed_goal_tools_reject_duplicate_goal_creation() -> anyhow::Result<()> {
let runtime = test_runtime().await?;
@@ -877,6 +910,21 @@ async fn thread_resume_rehydrates_active_goal_idle_accounting() -> anyhow::Resul
async fn installed_tools(
runtime: Arc<codex_state::StateRuntime>,
thread_id: ThreadId,
) -> Vec<Arc<dyn ToolExecutor<ToolCall>>> {
installed_tools_with_start(
runtime,
thread_id,
SessionSource::Cli,
/*persistent_thread_state_available*/ true,
)
.await
}
async fn installed_tools_with_start(
runtime: Arc<codex_state::StateRuntime>,
thread_id: ThreadId,
session_source: SessionSource,
persistent_thread_state_available: bool,
) -> Vec<Arc<dyn ToolExecutor<ToolCall>>> {
let mut builder = ExtensionRegistryBuilder::<()>::new();
install_with_backend(
@@ -889,13 +937,12 @@ async fn installed_tools(
let registry = builder.build();
let session_store = ExtensionData::new("session-1");
let thread_store = ExtensionData::new(thread_id.to_string());
let session_source = SessionSource::Cli;
for contributor in registry.thread_lifecycle_contributors() {
contributor
.on_thread_start(ThreadStartInput {
config: &(),
session_source: &session_source,
persistent_thread_state_available: true,
persistent_thread_state_available,
session_store: &session_store,
thread_store: &thread_store,
})
@@ -909,6 +956,10 @@ async fn installed_tools(
.collect()
}
fn tool_names(tools: &[Arc<dyn ToolExecutor<ToolCall>>]) -> Vec<String> {
tools.iter().map(|tool| tool.tool_name().name).collect()
}
struct GoalExtensionHarness {
registry: codex_extension_api::ExtensionRegistry<()>,
session_store: ExtensionData,