mirror of
https://github.com/openai/codex.git
synced 2026-05-29 15:30:22 +00:00
Gate goal tools by thread eligibility (#24925)
## Why Goal tools create and update goal state for a persistent thread. The extension was only checking whether goals were enabled before advertising those tools, which meant they could be surfaced in contexts that should not receive thread goal controls: ephemeral threads without persistent thread state and review subagents. Those sessions can still run the goal extension lifecycle, but the thread tools should only be visible when the current thread can safely use them. ## What changed - Adds a `GoalRuntimeConfig` that separates goal enablement from whether goal tools are available for the current thread. - Computes tool eligibility on thread start from `persistent_thread_state_available` and `SessionSource`, hiding tools for review subagents. - Uses `GoalRuntimeHandle::tools_visible()` when contributing thread tools so enabled runtime state does not automatically imply tool exposure. - Adds backend coverage for hiding goal tools on ephemeral threads and review subagents. ## Testing - Added `goal_tools_hidden_for_ephemeral_threads`. - Added `goal_tools_hidden_for_review_subagents`.
This commit is contained in:
@@ -22,6 +22,8 @@ use codex_extension_api::TurnStartInput;
|
||||
use codex_extension_api::TurnStopInput;
|
||||
use codex_otel::MetricsClient;
|
||||
use codex_protocol::ThreadId;
|
||||
use codex_protocol::protocol::SessionSource;
|
||||
use codex_protocol::protocol::SubAgentSource;
|
||||
use codex_protocol::protocol::ThreadGoalStatus;
|
||||
use codex_protocol::protocol::TokenUsageInfo;
|
||||
|
||||
@@ -29,6 +31,7 @@ use crate::accounting::BudgetLimitedGoalDisposition;
|
||||
use crate::accounting::GoalAccountingState;
|
||||
use crate::events::GoalEventEmitter;
|
||||
use crate::metrics::GoalMetrics;
|
||||
use crate::runtime::GoalRuntimeConfig;
|
||||
use crate::runtime::GoalRuntimeHandle;
|
||||
use crate::spec::UPDATE_GOAL_TOOL_NAME;
|
||||
use crate::steering::budget_limit_steering_item;
|
||||
@@ -85,6 +88,11 @@ where
|
||||
{
|
||||
async fn on_thread_start(&self, input: ThreadStartInput<'_, C>) {
|
||||
let enabled = (self.goals_enabled)(input.config);
|
||||
let tools_available_for_thread = input.persistent_thread_state_available
|
||||
&& !matches!(
|
||||
input.session_source,
|
||||
SessionSource::SubAgent(SubAgentSource::Review)
|
||||
);
|
||||
input
|
||||
.thread_store
|
||||
.insert(GoalExtensionConfig::from_enabled(enabled));
|
||||
@@ -102,7 +110,10 @@ where
|
||||
self.metrics.clone(),
|
||||
self.thread_manager.clone(),
|
||||
accounting_state,
|
||||
enabled,
|
||||
GoalRuntimeConfig {
|
||||
enabled,
|
||||
tools_available_for_thread,
|
||||
},
|
||||
)
|
||||
});
|
||||
runtime.set_enabled(enabled);
|
||||
@@ -330,7 +341,7 @@ where
|
||||
let Some(runtime) = goal_runtime_handle(thread_store) else {
|
||||
return Vec::new();
|
||||
};
|
||||
if !runtime.is_enabled() {
|
||||
if !runtime.tools_visible() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
|
||||
@@ -20,6 +20,11 @@ pub struct GoalRuntimeHandle {
|
||||
inner: Arc<GoalRuntimeInner>,
|
||||
}
|
||||
|
||||
pub(crate) struct GoalRuntimeConfig {
|
||||
pub(crate) enabled: bool,
|
||||
pub(crate) tools_available_for_thread: bool,
|
||||
}
|
||||
|
||||
struct GoalRuntimeInner {
|
||||
thread_id: ThreadId,
|
||||
state_dbs: Arc<codex_state::StateRuntime>,
|
||||
@@ -28,6 +33,7 @@ struct GoalRuntimeInner {
|
||||
thread_manager: Weak<ThreadManager>,
|
||||
accounting_state: Arc<GoalAccountingState>,
|
||||
enabled: AtomicBool,
|
||||
tools_available_for_thread: bool,
|
||||
}
|
||||
|
||||
pub(crate) struct AccountedGoalProgress {
|
||||
@@ -66,7 +72,7 @@ impl GoalRuntimeHandle {
|
||||
metrics: GoalMetrics,
|
||||
thread_manager: Weak<ThreadManager>,
|
||||
accounting_state: Arc<GoalAccountingState>,
|
||||
enabled: bool,
|
||||
config: GoalRuntimeConfig,
|
||||
) -> Self {
|
||||
Self {
|
||||
inner: Arc::new(GoalRuntimeInner {
|
||||
@@ -76,7 +82,8 @@ impl GoalRuntimeHandle {
|
||||
metrics,
|
||||
thread_manager,
|
||||
accounting_state,
|
||||
enabled: AtomicBool::new(enabled),
|
||||
enabled: AtomicBool::new(config.enabled),
|
||||
tools_available_for_thread: config.tools_available_for_thread,
|
||||
}),
|
||||
}
|
||||
}
|
||||
@@ -89,6 +96,10 @@ impl GoalRuntimeHandle {
|
||||
self.inner.enabled.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
pub(crate) fn tools_visible(&self) -> bool {
|
||||
self.is_enabled() && self.inner.tools_available_for_thread
|
||||
}
|
||||
|
||||
pub(crate) fn thread_id(&self) -> ThreadId {
|
||||
self.inner.thread_id
|
||||
}
|
||||
|
||||
@@ -28,6 +28,7 @@ use codex_protocol::config_types::Settings;
|
||||
use codex_protocol::protocol::Event;
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
use codex_protocol::protocol::SessionSource;
|
||||
use codex_protocol::protocol::SubAgentSource;
|
||||
use codex_protocol::protocol::ThreadGoalStatus;
|
||||
use codex_protocol::protocol::TokenUsage;
|
||||
use codex_protocol::protocol::TokenUsageInfo;
|
||||
@@ -83,6 +84,38 @@ async fn installed_goal_tools_create_goal_and_fill_empty_preview() -> anyhow::Re
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn goal_tools_hidden_for_ephemeral_threads() -> anyhow::Result<()> {
|
||||
let runtime = test_runtime().await?;
|
||||
let thread_id = test_thread_id()?;
|
||||
let tools = installed_tools_with_start(
|
||||
runtime,
|
||||
thread_id,
|
||||
SessionSource::Cli,
|
||||
/*persistent_thread_state_available*/ false,
|
||||
)
|
||||
.await;
|
||||
|
||||
assert_eq!(Vec::<String>::new(), tool_names(&tools));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn goal_tools_hidden_for_review_subagents() -> anyhow::Result<()> {
|
||||
let runtime = test_runtime().await?;
|
||||
let thread_id = test_thread_id()?;
|
||||
let tools = installed_tools_with_start(
|
||||
runtime,
|
||||
thread_id,
|
||||
SessionSource::SubAgent(SubAgentSource::Review),
|
||||
/*persistent_thread_state_available*/ true,
|
||||
)
|
||||
.await;
|
||||
|
||||
assert_eq!(Vec::<String>::new(), tool_names(&tools));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn installed_goal_tools_reject_duplicate_goal_creation() -> anyhow::Result<()> {
|
||||
let runtime = test_runtime().await?;
|
||||
@@ -877,6 +910,21 @@ async fn thread_resume_rehydrates_active_goal_idle_accounting() -> anyhow::Resul
|
||||
async fn installed_tools(
|
||||
runtime: Arc<codex_state::StateRuntime>,
|
||||
thread_id: ThreadId,
|
||||
) -> Vec<Arc<dyn ToolExecutor<ToolCall>>> {
|
||||
installed_tools_with_start(
|
||||
runtime,
|
||||
thread_id,
|
||||
SessionSource::Cli,
|
||||
/*persistent_thread_state_available*/ true,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn installed_tools_with_start(
|
||||
runtime: Arc<codex_state::StateRuntime>,
|
||||
thread_id: ThreadId,
|
||||
session_source: SessionSource,
|
||||
persistent_thread_state_available: bool,
|
||||
) -> Vec<Arc<dyn ToolExecutor<ToolCall>>> {
|
||||
let mut builder = ExtensionRegistryBuilder::<()>::new();
|
||||
install_with_backend(
|
||||
@@ -889,13 +937,12 @@ async fn installed_tools(
|
||||
let registry = builder.build();
|
||||
let session_store = ExtensionData::new("session-1");
|
||||
let thread_store = ExtensionData::new(thread_id.to_string());
|
||||
let session_source = SessionSource::Cli;
|
||||
for contributor in registry.thread_lifecycle_contributors() {
|
||||
contributor
|
||||
.on_thread_start(ThreadStartInput {
|
||||
config: &(),
|
||||
session_source: &session_source,
|
||||
persistent_thread_state_available: true,
|
||||
persistent_thread_state_available,
|
||||
session_store: &session_store,
|
||||
thread_store: &thread_store,
|
||||
})
|
||||
@@ -909,6 +956,10 @@ async fn installed_tools(
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn tool_names(tools: &[Arc<dyn ToolExecutor<ToolCall>>]) -> Vec<String> {
|
||||
tools.iter().map(|tool| tool.tool_name().name).collect()
|
||||
}
|
||||
|
||||
struct GoalExtensionHarness {
|
||||
registry: codex_extension_api::ExtensionRegistry<()>,
|
||||
session_store: ExtensionData,
|
||||
|
||||
Reference in New Issue
Block a user