diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 4acaeb7c27..174bcf2559 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -1859,6 +1859,7 @@ dependencies = [ "pretty_assertions", "regex-lite", "reqwest", + "schemars 0.8.22", "serde", "serde_json", "tempfile", @@ -1927,6 +1928,7 @@ dependencies = [ "codex-utils-cli", "codex-utils-json-to-toml", "codex-utils-pty", + "codex-web-search-extension", "core_test_support", "flate2", "futures", @@ -4125,6 +4127,28 @@ dependencies = [ "v8", ] +[[package]] +name = "codex-web-search-extension" +version = "0.0.0" +dependencies = [ + "async-trait", + "codex-api", + "codex-core", + "codex-extension-api", + "codex-features", + "codex-login", + "codex-model-provider", + "codex-model-provider-info", + "codex-protocol", + "codex-thread-store", + "codex-tools", + "codex-utils-output-truncation", + "http 1.4.0", + "pretty_assertions", + "schemars 0.8.22", + "serde_json", +] + [[package]] name = "codex-windows-sandbox" version = "0.0.0" diff --git a/codex-rs/Cargo.toml b/codex-rs/Cargo.toml index df169504f0..8713f1c123 100644 --- a/codex-rs/Cargo.toml +++ b/codex-rs/Cargo.toml @@ -48,6 +48,7 @@ members = [ "ext/goal", "ext/guardian", "ext/memories", + "ext/web-search", "external-agent-migration", "external-agent-sessions", "keyring-store", @@ -181,6 +182,7 @@ codex-lmstudio = { path = "lmstudio" } codex-login = { path = "login" } codex-message-history = { path = "message-history" } codex-memories-extension = { path = "ext/memories" } +codex-web-search-extension = { path = "ext/web-search" } codex-memories-read = { path = "memories/read" } codex-memories-write = { path = "memories/write" } codex-mcp = { path = "codex-mcp" } diff --git a/codex-rs/app-server/Cargo.toml b/codex-rs/app-server/Cargo.toml index 95baac4e9e..17ecd7dc3f 100644 --- a/codex-rs/app-server/Cargo.toml +++ b/codex-rs/app-server/Cargo.toml @@ -55,6 +55,7 @@ codex-file-search = { workspace = true } codex-chatgpt = { workspace = true } codex-login = { workspace = true } codex-memories-extension = { workspace = true } +codex-web-search-extension = { workspace = true } codex-memories-write = { workspace = true } codex-mcp = { workspace = true } codex-model-provider = { workspace = true } diff --git a/codex-rs/app-server/src/extensions.rs b/codex-rs/app-server/src/extensions.rs index 689eae2e9d..8edac9b5d6 100644 --- a/codex-rs/app-server/src/extensions.rs +++ b/codex-rs/app-server/src/extensions.rs @@ -9,16 +9,23 @@ use codex_extension_api::AgentSpawnFuture; use codex_extension_api::AgentSpawner; use codex_extension_api::ExtensionRegistry; use codex_extension_api::ExtensionRegistryBuilder; +use codex_login::AuthManager; use codex_protocol::ThreadId; use codex_protocol::error::CodexErr; +use codex_thread_store::ThreadStore; -pub(crate) fn thread_extensions(guardian_agent_spawner: S) -> Arc> +pub(crate) fn thread_extensions( + guardian_agent_spawner: S, + auth_manager: Arc, + thread_store: Arc, +) -> Arc> where S: AgentSpawner + 'static, { let mut builder = ExtensionRegistryBuilder::::new(); codex_guardian::install(&mut builder, guardian_agent_spawner); codex_memories_extension::install(&mut builder); + codex_web_search_extension::install(&mut builder, auth_manager, thread_store); Arc::new(builder.build()) } diff --git a/codex-rs/app-server/src/mcp_refresh.rs b/codex-rs/app-server/src/mcp_refresh.rs index f7d32b2ea8..d1e52275a3 100644 --- a/codex-rs/app-server/src/mcp_refresh.rs +++ b/codex-rs/app-server/src/mcp_refresh.rs @@ -183,12 +183,16 @@ mod tests { let thread_manager = Arc::new_cyclic(|thread_manager| { ThreadManager::new( &good_config, - auth_manager, + auth_manager.clone(), SessionSource::Exec, Arc::new(EnvironmentManager::default_for_tests()), - thread_extensions(guardian_agent_spawner(thread_manager.clone())), + thread_extensions( + guardian_agent_spawner(thread_manager.clone()), + auth_manager.clone(), + Arc::clone(&thread_store), + ), /*analytics_events_client*/ None, - thread_store, + Arc::clone(&thread_store), Some(state_db.clone()), "11111111-1111-4111-8111-111111111111".to_string(), /*attestation_provider*/ None, diff --git a/codex-rs/app-server/src/message_processor.rs b/codex-rs/app-server/src/message_processor.rs index 0e1f019de9..601c429d0f 100644 --- a/codex-rs/app-server/src/message_processor.rs +++ b/codex-rs/app-server/src/message_processor.rs @@ -310,7 +310,11 @@ impl MessageProcessor { auth_manager.clone(), session_source, environment_manager, - thread_extensions(guardian_agent_spawner(thread_manager.clone())), + thread_extensions( + guardian_agent_spawner(thread_manager.clone()), + auth_manager.clone(), + Arc::clone(&thread_store), + ), Some(analytics_events_client.clone()), Arc::clone(&thread_store), state_db.clone(), diff --git a/codex-rs/codex-api/Cargo.toml b/codex-rs/codex-api/Cargo.toml index 08f70cf33c..07d855725a 100644 --- a/codex-rs/codex-api/Cargo.toml +++ b/codex-rs/codex-api/Cargo.toml @@ -16,6 +16,7 @@ codex-utils-rustls-provider = { workspace = true } futures = { workspace = true } http = { workspace = true } reqwest = { workspace = true, features = ["json", "stream"] } +schemars = { workspace = true } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } thiserror = { workspace = true } diff --git a/codex-rs/codex-api/src/search.rs b/codex-rs/codex-api/src/search.rs index b841d06a30..8328415f83 100644 --- a/codex-rs/codex-api/src/search.rs +++ b/codex-rs/codex-api/src/search.rs @@ -1,5 +1,6 @@ use crate::common::Reasoning; use codex_protocol::models::ResponseItem; +use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; @@ -27,75 +28,100 @@ pub enum SearchInput { Items(Vec), } -#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq)] +#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, JsonSchema)] pub struct SearchCommands { + /// Query the internet search engine for a given list of queries. #[serde(skip_serializing_if = "Option::is_none")] pub search_query: Option>, + /// Query the image search engine for a given list of queries. #[serde(skip_serializing_if = "Option::is_none")] pub image_query: Option>, + /// Open pages by reference id or URL. #[serde(skip_serializing_if = "Option::is_none")] pub open: Option>, + /// Open links from previously opened pages. #[serde(skip_serializing_if = "Option::is_none")] pub click: Option>, + /// Find text patterns in pages. #[serde(skip_serializing_if = "Option::is_none")] pub find: Option>, + /// Take screenshots of PDF pages. #[serde(skip_serializing_if = "Option::is_none")] pub screenshot: Option>, + /// Look up prices for the given stock symbols. #[serde(skip_serializing_if = "Option::is_none")] pub finance: Option>, + /// Look up weather forecasts. #[serde(skip_serializing_if = "Option::is_none")] pub weather: Option>, + /// Look up sports schedules and standings. #[serde(skip_serializing_if = "Option::is_none")] pub sports: Option>, + /// Get time for the given UTC offsets. #[serde(skip_serializing_if = "Option::is_none")] pub time: Option>, + /// Set the length of the response to be returned. #[serde(skip_serializing_if = "Option::is_none")] pub response_length: Option, } -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema)] pub struct SearchQuery { + /// Search query. pub q: String, + /// Whether to filter by recency, as a number of recent days. #[serde(skip_serializing_if = "Option::is_none")] pub recency: Option, + /// Whether to filter by a specific list of domains. #[serde(skip_serializing_if = "Option::is_none")] pub domains: Option>, } -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema)] pub struct OpenOperation { + /// Reference id or URL to open. pub ref_id: String, + /// Line number to position the page at. #[serde(skip_serializing_if = "Option::is_none")] pub lineno: Option, } -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, JsonSchema)] pub struct ClickOperation { + /// Reference id containing the numbered link. pub ref_id: String, + /// Numbered link id to open. pub id: u64, } -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, JsonSchema)] pub struct FindOperation { + /// Reference id or URL to search within. pub ref_id: String, + /// Text pattern to find. pub pattern: String, } -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, JsonSchema)] pub struct ScreenshotOperation { + /// Reference id or URL to screenshot. pub ref_id: String, + /// Zero-indexed PDF page number. pub pageno: u64, } -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema)] pub struct FinanceOperation { + /// Ticker symbol to look up. pub ticker: String, + /// Asset type to look up. pub r#type: FinanceAssetType, + /// ISO 3166-1 alpha-3 country code, "OTC", or "" for cryptocurrency. #[serde(skip_serializing_if = "Option::is_none")] pub market: Option, } -#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, JsonSchema)] #[serde(rename_all = "lowercase")] pub enum FinanceAssetType { Equity, @@ -104,49 +130,61 @@ pub enum FinanceAssetType { Index, } -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema)] pub struct WeatherOperation { + /// Location in "Country, Area, City" format. pub location: String, + /// Start date in YYYY-MM-DD format. Defaults to today. #[serde(skip_serializing_if = "Option::is_none")] pub start: Option, + /// Number of days to return. Defaults to 7. #[serde(skip_serializing_if = "Option::is_none")] pub duration: Option, } -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema)] pub struct SportsOperation { + /// Tool name for sports requests. #[serde(skip_serializing_if = "Option::is_none")] pub tool: Option, + /// Sports function to call. pub r#fn: SportsFunction, + /// League to look up. pub league: SportsLeague, + /// Team to look up, using the common 3 or 4 letter alias used in broadcasts. #[serde(skip_serializing_if = "Option::is_none")] pub team: Option, + /// Opponent to use with `team` when narrowing the lookup. #[serde(skip_serializing_if = "Option::is_none")] pub opponent: Option, + /// Start date in YYYY-MM-DD format. #[serde(skip_serializing_if = "Option::is_none")] pub date_from: Option, + /// End date in YYYY-MM-DD format. #[serde(skip_serializing_if = "Option::is_none")] pub date_to: Option, + /// Number of games to return. #[serde(skip_serializing_if = "Option::is_none")] pub num_games: Option, + /// Locale for the lookup. #[serde(skip_serializing_if = "Option::is_none")] pub locale: Option, } -#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, JsonSchema)] #[serde(rename_all = "lowercase")] pub enum SportsToolName { Sports, } -#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, JsonSchema)] #[serde(rename_all = "lowercase")] pub enum SportsFunction { Schedule, Standings, } -#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, JsonSchema)] #[serde(rename_all = "lowercase")] pub enum SportsLeague { Nba, @@ -160,12 +198,13 @@ pub enum SportsLeague { Ipl, } -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, JsonSchema)] pub struct TimeOperation { + /// UTC offset formatted like "+03:00". pub utc_offset: String, } -#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, JsonSchema)] #[serde(rename_all = "lowercase")] pub enum SearchResponseLength { Short, diff --git a/codex-rs/core/config.schema.json b/codex-rs/core/config.schema.json index 6f85785f2e..57e1e66574 100644 --- a/codex-rs/core/config.schema.json +++ b/codex-rs/core/config.schema.json @@ -572,6 +572,9 @@ "sqlite": { "type": "boolean" }, + "standalone_web_search": { + "type": "boolean" + }, "steer": { "type": "boolean" }, @@ -4503,6 +4506,9 @@ "sqlite": { "type": "boolean" }, + "standalone_web_search": { + "type": "boolean" + }, "steer": { "type": "boolean" }, diff --git a/codex-rs/core/src/tools/spec_plan.rs b/codex-rs/core/src/tools/spec_plan.rs index c3656f2434..15257f57e5 100644 --- a/codex-rs/core/src/tools/spec_plan.rs +++ b/codex-rs/core/src/tools/spec_plan.rs @@ -241,8 +241,12 @@ fn spec_for_model_request( pub(crate) fn hosted_model_tool_specs(turn_context: &TurnContext) -> Vec { let mut specs = Vec::new(); let provider_capabilities = turn_context.provider.capabilities(); - let web_search_mode = provider_capabilities - .web_search + let standalone_web_search = turn_context + .config + .features + .enabled(Feature::StandaloneWebSearch) + && turn_context.provider.info().is_openai(); + let web_search_mode = (!standalone_web_search && provider_capabilities.web_search) .then_some(turn_context.config.web_search_mode.value()); let web_search_config = if provider_capabilities.web_search { turn_context.config.web_search_config.as_ref() diff --git a/codex-rs/core/src/tools/spec_plan_tests.rs b/codex-rs/core/src/tools/spec_plan_tests.rs index 32913bf2e7..f8a7e6aed3 100644 --- a/codex-rs/core/src/tools/spec_plan_tests.rs +++ b/codex-rs/core/src/tools/spec_plan_tests.rs @@ -948,6 +948,13 @@ async fn hosted_tools_follow_provider_auth_model_and_config_gates() { } ); + let standalone_web_search = probe(|turn| { + set_feature(turn, Feature::StandaloneWebSearch, /*enabled*/ true); + set_web_search_mode(turn, WebSearchMode::Live); + }) + .await; + standalone_web_search.assert_visible_lacks(&["web_search"]); + let unsupported_provider = probe(|turn| { set_web_search_mode(turn, WebSearchMode::Live); use_bedrock_provider(turn); diff --git a/codex-rs/ext/web-search/BUILD.bazel b/codex-rs/ext/web-search/BUILD.bazel new file mode 100644 index 0000000000..e8c26644f6 --- /dev/null +++ b/codex-rs/ext/web-search/BUILD.bazel @@ -0,0 +1,9 @@ +load("//:defs.bzl", "codex_rust_crate") + +codex_rust_crate( + name = "web-search", + crate_name = "codex_web_search_extension", + compile_data = [ + "web_run_description.md", + ], +) diff --git a/codex-rs/ext/web-search/Cargo.toml b/codex-rs/ext/web-search/Cargo.toml new file mode 100644 index 0000000000..79d6415436 --- /dev/null +++ b/codex-rs/ext/web-search/Cargo.toml @@ -0,0 +1,33 @@ +[package] +edition.workspace = true +license.workspace = true +name = "codex-web-search-extension" +version.workspace = true + +[lib] +name = "codex_web_search_extension" +path = "src/lib.rs" +doctest = false + +[lints] +workspace = true + +[dependencies] +async-trait = { workspace = true } +codex-api = { workspace = true } +codex-core = { workspace = true } +codex-extension-api = { workspace = true } +codex-features = { workspace = true } +codex-login = { workspace = true } +codex-model-provider = { workspace = true } +codex-model-provider-info = { workspace = true } +codex-protocol = { workspace = true } +codex-thread-store = { workspace = true } +codex-tools = { workspace = true } +codex-utils-output-truncation = { workspace = true } +http = { workspace = true } +schemars = { workspace = true } +serde_json = { workspace = true } + +[dev-dependencies] +pretty_assertions = { workspace = true } diff --git a/codex-rs/ext/web-search/src/extension.rs b/codex-rs/ext/web-search/src/extension.rs new file mode 100644 index 0000000000..591db8d8f9 --- /dev/null +++ b/codex-rs/ext/web-search/src/extension.rs @@ -0,0 +1,146 @@ +use std::sync::Arc; + +use codex_core::config::Config; +use codex_extension_api::ConfigContributor; +use codex_extension_api::ExtensionData; +use codex_extension_api::ExtensionRegistryBuilder; +use codex_extension_api::ThreadLifecycleContributor; +use codex_extension_api::ThreadStartInput; +use codex_extension_api::ToolContributor; +use codex_features::Feature; +use codex_login::AuthManager; +use codex_model_provider::create_model_provider; +use codex_model_provider_info::ModelProviderInfo; +use codex_protocol::ThreadId; +use codex_thread_store::ThreadStore; + +use crate::tool::WebSearchTool; + +#[derive(Clone)] +struct WebSearchExtension { + auth_manager: Arc, + thread_store: Arc, +} + +#[derive(Clone)] +struct WebSearchExtensionConfig { + enabled: bool, + provider: ModelProviderInfo, +} + +impl WebSearchExtensionConfig { + fn from_config(config: &Config) -> Self { + Self { + enabled: config.features.enabled(Feature::StandaloneWebSearch) + && config.model_provider.is_openai(), + provider: config.model_provider.clone(), + } + } +} + +#[async_trait::async_trait] +impl ThreadLifecycleContributor for WebSearchExtension { + async fn on_thread_start(&self, input: ThreadStartInput<'_, Config>) { + input + .thread_store + .insert(WebSearchExtensionConfig::from_config(input.config)); + } +} + +impl ConfigContributor for WebSearchExtension { + fn on_config_changed( + &self, + _session_store: &ExtensionData, + thread_store: &ExtensionData, + _previous_config: &Config, + new_config: &Config, + ) { + thread_store.insert(WebSearchExtensionConfig::from_config(new_config)); + } +} + +impl ToolContributor for WebSearchExtension { + fn tools( + &self, + session_store: &ExtensionData, + thread_store: &ExtensionData, + ) -> Vec>> { + let Some(config) = thread_store.get::() else { + return Vec::new(); + }; + if !config.enabled { + return Vec::new(); + } + let Ok(thread_id) = ThreadId::from_string(thread_store.level_id()) else { + return Vec::new(); + }; + + vec![Arc::new(WebSearchTool { + session_id: session_store.level_id().to_string(), + thread_id, + thread_store: Arc::clone(&self.thread_store), + provider: create_model_provider( + config.provider.clone(), + Some(self.auth_manager.clone()), + ), + })] + } +} + +pub fn install( + registry: &mut ExtensionRegistryBuilder, + auth_manager: Arc, + thread_store: Arc, +) { + let extension = Arc::new(WebSearchExtension { + auth_manager, + thread_store, + }); + registry.thread_lifecycle_contributor(extension.clone()); + registry.config_contributor(extension.clone()); + registry.tool_contributor(extension); +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use codex_extension_api::ExtensionData; + use codex_extension_api::ExtensionRegistryBuilder; + use codex_extension_api::ToolName; + use codex_login::CodexAuth; + use codex_model_provider_info::ModelProviderInfo; + use codex_thread_store::InMemoryThreadStore; + use pretty_assertions::assert_eq; + + use super::AuthManager; + use super::Config; + use super::WebSearchExtensionConfig; + use super::install; + + #[test] + fn installed_extension_contributes_web_run_when_enabled() { + let mut builder = ExtensionRegistryBuilder::::new(); + install( + &mut builder, + AuthManager::from_auth_for_testing(CodexAuth::from_api_key("dummy")), + Arc::new(InMemoryThreadStore::default()), + ); + let registry = builder.build(); + let session_store = ExtensionData::new("session"); + let thread_store = ExtensionData::new("11111111-1111-4111-8111-111111111111"); + thread_store.insert(WebSearchExtensionConfig { + enabled: true, + provider: ModelProviderInfo::create_openai_provider(/*base_url*/ None), + }); + + let tool_names = registry + .tool_contributors() + .iter() + .flat_map(|contributor| contributor.tools(&session_store, &thread_store)) + .map(|tool| tool.tool_name()) + .collect::>(); + + assert_eq!(tool_names, vec![ToolName::namespaced("web", "run")]); + } +} diff --git a/codex-rs/ext/web-search/src/history.rs b/codex-rs/ext/web-search/src/history.rs new file mode 100644 index 0000000000..ef33a8c7b2 --- /dev/null +++ b/codex-rs/ext/web-search/src/history.rs @@ -0,0 +1,265 @@ +use codex_api::SearchInput; +use codex_protocol::models::ContentItem; +use codex_protocol::models::ResponseItem; +use codex_protocol::protocol::EventMsg; +use codex_protocol::protocol::RolloutItem; +use codex_utils_output_truncation::TruncationPolicy; +use codex_utils_output_truncation::approx_token_count; +use codex_utils_output_truncation::truncate_text; + +const ASSISTANT_CONTEXT_TOKEN_LIMIT: usize = 1_000; + +/// Builds the persisted conversation tail for standalone web search. +/// +/// The tail keeps the previous user message, up to 1k tokens of assistant text +/// that followed it, and the current user message. +pub(crate) fn recent_input(items: &[RolloutItem]) -> Option { + let messages = recent_messages(items); + (!messages.is_empty()).then_some(SearchInput::Items(messages)) +} + +fn recent_messages(items: &[RolloutItem]) -> Vec { + let mut messages = Vec::new(); + for item in items { + match item { + RolloutItem::ResponseItem(item) => push_visible_message(&mut messages, item), + RolloutItem::Compacted(compacted) => { + if let Some(replacement_history) = &compacted.replacement_history { + messages.clear(); + for item in replacement_history { + push_visible_message(&mut messages, item); + } + } + } + RolloutItem::EventMsg(EventMsg::ThreadRolledBack(rollback)) => { + drop_last_user_turns(&mut messages, rollback.num_turns); + } + RolloutItem::SessionMeta(_) + | RolloutItem::TurnContext(_) + | RolloutItem::EventMsg(_) => {} + } + } + + let mut messages = keep_current_and_previous_turn(messages); + cap_assistant_text(&mut messages); + messages +} + +fn push_visible_message(messages: &mut Vec, item: &ResponseItem) { + if matches!( + item, + ResponseItem::Message { role, .. } if role == "user" || role == "assistant" + ) { + messages.push(item.clone()); + } +} + +fn drop_last_user_turns(messages: &mut Vec, count: u32) { + for _ in 0..count { + let Some(user_idx) = messages.iter().rposition(is_user_message) else { + messages.clear(); + return; + }; + messages.truncate(user_idx); + } +} + +fn is_user_message(item: &ResponseItem) -> bool { + matches!(item, ResponseItem::Message { role, .. } if role == "user") +} + +fn keep_current_and_previous_turn(mut messages: Vec) -> Vec { + let Some(current_user_idx) = messages.iter().rposition(is_user_message) else { + return Vec::new(); + }; + messages.truncate(current_user_idx + 1); + let previous_user_idx = messages[..current_user_idx] + .iter() + .rposition(is_user_message) + .unwrap_or(current_user_idx); + + messages.drain(..previous_user_idx); + messages +} + +fn cap_assistant_text(messages: &mut Vec) { + let mut remaining_budget = ASSISTANT_CONTEXT_TOKEN_LIMIT; + + messages.retain_mut(|item| { + let ResponseItem::Message { role, content, .. } = item else { + return true; + }; + if role != "assistant" { + return true; + } + + content.retain_mut(|content_item| { + let ContentItem::OutputText { text } = content_item else { + return true; + }; + if remaining_budget == 0 { + return false; + } + + let token_count = approx_token_count(text); + if token_count <= remaining_budget { + remaining_budget = remaining_budget.saturating_sub(token_count); + return true; + } + + *text = truncate_text(text, TruncationPolicy::Tokens(remaining_budget)); + remaining_budget = 0; + true + }); + !content.is_empty() + }); +} + +#[cfg(test)] +mod tests { + use codex_api::SearchInput; + use codex_protocol::models::ContentItem; + use codex_protocol::models::ImageDetail; + use codex_protocol::models::ResponseItem; + use codex_protocol::protocol::CompactedItem; + use codex_protocol::protocol::RolloutItem; + use codex_utils_output_truncation::TruncationPolicy; + use codex_utils_output_truncation::truncate_text; + use pretty_assertions::assert_eq; + + use super::ASSISTANT_CONTEXT_TOKEN_LIMIT; + use super::recent_input; + + fn message(role: &str, text: &str) -> ResponseItem { + ResponseItem::Message { + id: None, + role: role.to_string(), + content: vec![if role == "assistant" { + ContentItem::OutputText { + text: text.to_string(), + } + } else { + ContentItem::InputText { + text: text.to_string(), + } + }], + phase: None, + } + } + + fn rollout_message(role: &str, text: &str) -> RolloutItem { + RolloutItem::ResponseItem(message(role, text)) + } + + #[test] + fn keeps_current_user_and_previous_visible_turn() { + let items = vec![ + rollout_message("system", "system"), + rollout_message("user", "old user"), + rollout_message("assistant", "old assistant"), + rollout_message("user", "previous user"), + RolloutItem::ResponseItem(ResponseItem::FunctionCall { + id: None, + name: "tool".to_string(), + namespace: None, + arguments: "{}".to_string(), + call_id: "call-1".to_string(), + }), + rollout_message("assistant", "previous assistant"), + rollout_message("developer", "developer"), + rollout_message("user", "current user"), + rollout_message("assistant", "current commentary"), + ]; + + assert_eq!( + recent_input(&items), + Some(SearchInput::Items(vec![ + message("user", "previous user"), + message("assistant", "previous assistant"), + message("user", "current user"), + ])) + ); + } + + #[test] + fn uses_compaction_replacement_history() { + let items = vec![ + rollout_message("user", "stale user"), + RolloutItem::Compacted(CompactedItem { + message: "compacted".to_string(), + replacement_history: Some(vec![ + message("user", "previous user"), + message("assistant", "previous assistant"), + ]), + }), + rollout_message("user", "current user"), + ]; + + assert_eq!( + recent_input(&items), + Some(SearchInput::Items(vec![ + message("user", "previous user"), + message("assistant", "previous assistant"), + message("user", "current user"), + ])) + ); + } + + #[test] + fn preserves_image_content_from_recent_user_messages() { + let previous_user = ResponseItem::Message { + id: None, + role: "user".to_string(), + content: vec![ + ContentItem::InputText { + text: "previous user".to_string(), + }, + ContentItem::InputImage { + image_url: "data:image/png;base64,image".to_string(), + detail: Some(ImageDetail::High), + }, + ], + phase: None, + }; + let items = vec![ + RolloutItem::ResponseItem(previous_user.clone()), + rollout_message("assistant", "previous assistant"), + rollout_message("user", "current user"), + ]; + + assert_eq!( + recent_input(&items), + Some(SearchInput::Items(vec![ + previous_user, + message("assistant", "previous assistant"), + message("user", "current user"), + ])) + ); + } + + #[test] + fn caps_assistant_text_in_recent_tail() { + let long_assistant = "a".repeat(4_100); + let items = vec![ + rollout_message("user", "previous user"), + rollout_message("assistant", &long_assistant), + rollout_message("assistant", "after the assistant budget"), + rollout_message("user", "current user"), + ]; + + assert_eq!( + recent_input(&items), + Some(SearchInput::Items(vec![ + message("user", "previous user"), + message( + "assistant", + &truncate_text( + &long_assistant, + TruncationPolicy::Tokens(ASSISTANT_CONTEXT_TOKEN_LIMIT) + ), + ), + message("user", "current user"), + ])) + ); + } +} diff --git a/codex-rs/ext/web-search/src/lib.rs b/codex-rs/ext/web-search/src/lib.rs new file mode 100644 index 0000000000..1b18039e49 --- /dev/null +++ b/codex-rs/ext/web-search/src/lib.rs @@ -0,0 +1,7 @@ +mod extension; +mod history; +mod output; +mod schema; +mod tool; + +pub use extension::install; diff --git a/codex-rs/ext/web-search/src/output.rs b/codex-rs/ext/web-search/src/output.rs new file mode 100644 index 0000000000..96c9bc54d4 --- /dev/null +++ b/codex-rs/ext/web-search/src/output.rs @@ -0,0 +1,70 @@ +use codex_extension_api::ToolOutput; +use codex_extension_api::ToolPayload; +use codex_protocol::models::FunctionCallOutputContentItem; +use codex_protocol::models::FunctionCallOutputPayload; +use codex_protocol::models::ResponseInputItem; + +pub(crate) struct EncryptedSearchOutput { + encrypted_output: String, +} + +impl EncryptedSearchOutput { + pub(crate) fn new(encrypted_output: String) -> Self { + Self { encrypted_output } + } +} + +impl ToolOutput for EncryptedSearchOutput { + fn log_preview(&self) -> String { + "[encrypted standalone web search output]".to_string() + } + + fn success_for_logging(&self) -> bool { + true + } + + fn to_response_item(&self, call_id: &str, _payload: &ToolPayload) -> ResponseInputItem { + ResponseInputItem::FunctionCallOutput { + call_id: call_id.to_string(), + output: FunctionCallOutputPayload::from_content_items(vec![ + FunctionCallOutputContentItem::EncryptedContent { + encrypted_content: self.encrypted_output.clone(), + }, + ]), + } + } +} + +#[cfg(test)] +mod tests { + use codex_extension_api::ToolPayload; + use codex_protocol::models::FunctionCallOutputContentItem; + use codex_protocol::models::FunctionCallOutputPayload; + use codex_protocol::models::ResponseInputItem; + use pretty_assertions::assert_eq; + + use super::EncryptedSearchOutput; + use super::ToolOutput; + + #[test] + fn emits_encrypted_function_call_output() { + let output = EncryptedSearchOutput::new("encrypted-search-output".to_string()); + + assert_eq!( + output.to_response_item( + "call-1", + &ToolPayload::Function { + arguments: "{}".to_string(), + }, + ), + ResponseInputItem::FunctionCallOutput { + call_id: "call-1".to_string(), + output: FunctionCallOutputPayload::from_content_items(vec![ + FunctionCallOutputContentItem::EncryptedContent { + encrypted_content: "encrypted-search-output".to_string(), + }, + ]), + } + ); + } +} diff --git a/codex-rs/ext/web-search/src/schema.rs b/codex-rs/ext/web-search/src/schema.rs new file mode 100644 index 0000000000..2f71f1595c --- /dev/null +++ b/codex-rs/ext/web-search/src/schema.rs @@ -0,0 +1,36 @@ +use codex_api::SearchCommands; +use schemars::r#gen::SchemaSettings; +use serde_json::Map; +use serde_json::Value; + +pub(crate) fn commands_schema() -> Value { + let schema = SchemaSettings::draft2019_09() + .with(|settings| { + settings.inline_subschemas = true; + settings.option_add_null_type = false; + }) + .into_generator() + .into_root_schema_for::(); + let schema = match serde_json::to_value(schema) { + Ok(schema) => schema, + Err(err) => panic!("search commands schema should serialize: {err}"), + }; + let Value::Object(mut schema) = schema else { + unreachable!("search commands schema must be an object"); + }; + + let mut tool_schema = Map::new(); + for key in [ + "properties", + "required", + "type", + "additionalProperties", + "$defs", + "definitions", + ] { + if let Some(value) = schema.remove(key) { + tool_schema.insert(key.to_string(), value); + } + } + Value::Object(tool_schema) +} diff --git a/codex-rs/ext/web-search/src/tool.rs b/codex-rs/ext/web-search/src/tool.rs new file mode 100644 index 0000000000..4932a47cab --- /dev/null +++ b/codex-rs/ext/web-search/src/tool.rs @@ -0,0 +1,121 @@ +use std::sync::Arc; + +use codex_api::ReqwestTransport; +use codex_api::SearchClient; +use codex_api::SearchCommands; +use codex_api::SearchRequest; +use codex_extension_api::FunctionCallError; +use codex_extension_api::ResponsesApiTool; +use codex_extension_api::ToolCall; +use codex_extension_api::ToolExecutor; +use codex_extension_api::ToolName; +use codex_extension_api::ToolOutput; +use codex_extension_api::ToolSpec; +use codex_extension_api::parse_tool_input_schema; +use codex_login::default_client::build_reqwest_client; +use codex_model_provider::SharedModelProvider; +use codex_protocol::ThreadId; +use codex_thread_store::LoadThreadHistoryParams; +use codex_thread_store::ThreadStore; +use codex_tools::ResponsesApiNamespace; +use codex_tools::ResponsesApiNamespaceTool; +use codex_tools::default_namespace_description; +use http::HeaderMap; + +use crate::history::recent_input; +use crate::output::EncryptedSearchOutput; +use crate::schema::commands_schema; + +const WEB_NAMESPACE: &str = "web"; +const RUN_TOOL_NAME: &str = "run"; +const WEB_RUN_DESCRIPTION: &str = include_str!("../web_run_description.md"); + +pub(crate) struct WebSearchTool { + pub(crate) session_id: String, + pub(crate) thread_id: ThreadId, + pub(crate) thread_store: Arc, + pub(crate) provider: SharedModelProvider, +} + +#[async_trait::async_trait] +impl ToolExecutor for WebSearchTool { + fn tool_name(&self) -> ToolName { + ToolName::namespaced(WEB_NAMESPACE, RUN_TOOL_NAME) + } + + fn spec(&self) -> Option { + let parameters = match parse_tool_input_schema(&commands_schema()) { + Ok(parameters) => parameters, + Err(err) => panic!("search command schema should parse: {err}"), + }; + + Some(ToolSpec::Namespace(ResponsesApiNamespace { + name: WEB_NAMESPACE.to_string(), + description: default_namespace_description(WEB_NAMESPACE), + tools: vec![ResponsesApiNamespaceTool::Function(ResponsesApiTool { + name: RUN_TOOL_NAME.to_string(), + description: WEB_RUN_DESCRIPTION.to_string(), + strict: false, + parameters, + output_schema: None, + defer_loading: None, + })], + })) + } + + async fn handle(&self, call: ToolCall) -> Result, FunctionCallError> { + let commands = parse_commands(&call)?; + let history = self + .thread_store + .load_history(LoadThreadHistoryParams { + thread_id: self.thread_id, + include_archived: false, + }) + .await + .map_err(|err| FunctionCallError::Fatal(err.to_string()))?; + let provider = self + .provider + .api_provider() + .await + .map_err(|err| FunctionCallError::Fatal(err.to_string()))?; + let auth = self + .provider + .api_auth() + .await + .map_err(|err| FunctionCallError::Fatal(err.to_string()))?; + let client = SearchClient::new( + ReqwestTransport::new(build_reqwest_client()), + provider, + auth, + ); + let request = SearchRequest { + id: self.session_id.clone(), + model: None, + reasoning: None, + input: recent_input(&history.items), + commands: Some(commands), + settings: None, + max_output_tokens: Some( + u64::try_from(call.truncation_policy.token_budget()).unwrap_or(u64::MAX), + ), + }; + let response = client + .search(&request, HeaderMap::new()) + .await + .map_err(|err| FunctionCallError::Fatal(err.to_string()))?; + + Ok(Box::new(EncryptedSearchOutput::new( + response.encrypted_output, + ))) + } +} + +fn parse_commands(call: &ToolCall) -> Result { + let arguments = call.function_arguments()?; + if arguments.trim().is_empty() { + return Ok(SearchCommands::default()); + } + + serde_json::from_str(arguments) + .map_err(|err| FunctionCallError::RespondToModel(err.to_string())) +} diff --git a/codex-rs/ext/web-search/web_run_description.md b/codex-rs/ext/web-search/web_run_description.md new file mode 100644 index 0000000000..bccc3d81f6 --- /dev/null +++ b/codex-rs/ext/web-search/web_run_description.md @@ -0,0 +1,80 @@ +Tool for accessing the internet. + + +--- + +## Examples of different commands available in this tool + +Examples of different commands available in this tool: +* `search_query`: {"search_query": [{"q": "What is the capital of France?"}, {"q": "What is the capital of belgium?"}]}. Searches the internet for a given query (and optionally with a domain or recency filter) +* `image_query`: {"image_query":[{"q": "waterfalls"}]}. +* `open`: {"open": [{"ref_id": "turn0search0"}, {"ref_id": "https://www.openai.com", "lineno": 120}]} +* `click`: {"click": [{"ref_id": "turn0fetch3", "id": 17}]} +* `find`: {"find": [{"ref_id": "turn0fetch3", "pattern": "Annie Case"}]} +* `screenshot`: {"screenshot": [{"ref_id": "turn1view0", "pageno": 0}, {"ref_id": "turn1view0", "pageno": 3}]} +* `finance`: {"finance":[{"ticker":"AMD","type":"equity","market":"USA"}]}, {"finance":[{"ticker":"BTC","type":"crypto","market":""}]} +* `weather`: {"weather":[{"location":"San Francisco, CA"}]} +* `sports`: {"sports":[{"fn":"standings","league":"nfl"}, {"fn":"schedule","league":"nba","team":"GSW","date_from":"2025-02-24"}]} +* `time`: {"time":[{"utc_offset":"+03:00"}]} + +--- + +## Usage hints +To use this tool efficiently: +* Use multiple commands and queries in one call to get more results faster; e.g. {"search_query": [{"q": "bitcoin news"}], "finance":[{"ticker":"BTC","type":"crypto","market":""}], "find": [{"ref_id": "turn0search0", "pattern": "Annie Case"}, {"ref_id": "turn0search1", "pattern": "John Smith"}]} +* Use "response_length" to control the number of results returned by this tool, omit it if you intend to pass "short" in +* Only write required parameters; do not write empty lists or nulls where they could be omitted. +* `search_query` must have length at most 4 in each call. If it has length > 3, response_length must be medium or long +* If you find yourself in a situation where you accidentally call the `web.run` tool, it's best just to send an empty query: {"search_query": [{"q": ""}]}. + +--- + +## Decision boundary + +If the user makes an explicit request to search the internet, find latest information, look up, etc (or to not do so), you must obey their request. +When you make an assumption, always consider whether it is temporally stable; i.e. whether there's even a small (>10%) chance it has changed. If it is unstable, you must verify with browsing the internet for verification. + + +Below is a list of scenarios where browsing the internet MUST be used. PAY CLOSE ATTENTION: you MUST browse the internet in these cases. If you're unsure or on the fence, you MUST bias towards browsing the internet. +- The information could have changed recently: for example news; prices; laws; schedules; product specs; sports scores; economic indicators; political/public/company figures (e.g. the question relates to 'the president of country A' or 'the CEO of company B', which might change over time); rules; regulations; standards; software libraries that could be updated; exchange rates; recommendations (i.e., recommendations about various topics or things might be informed by what currently exists / is popular / is safe / is unsafe / is in the zeitgeist / etc.); and many many many more categories -- again, if you're on the fence, you MUST browse the internet! + - For news queries, prioritize more recent events, ensuring you compare publish dates and the date that the event happened. +- The user is seeking recommendations that could lead them to spend substantial time or money -- researching products, restaurants, travel plans, etc. +- The user wants (or would benefit from) direct quotes, links, or precise source attribution. +- A specific page, paper, dataset, PDF, or site is referenced and you haven't been given its contents. +- You're unsure about a fact, the topic is niche or emerging, or you suspect there's at least a 10% chance you will incorrectly recall it +- High-stakes accuracy matters (medical, legal, financial guidance). For these you generally should search by default because this information is highly temporally unstable +- The user explicitly says to search, browse, verify, or look it up. + + +--- + +## Special cases +If these conflict with any other instructions, these should take precedence. + + +- When the user asks for information about how to use OpenAI products, (ChatGPT, the OpenAI API, etc.), you should check the code in local env and only browse as fallback, when you browse restrict your sources to official OpenAI websites using the domains filter, unless otherwise requested. +- When using search to answer technical questions, you must only rely on primary sources (research papers, official documentation, etc.) +- Clearly indicate when you are making an inference from sources. + + +--- + +## Word limits +Responses may not excessively quote or draw on a specific source. There are several limits here: +- **Limit on verbatim quotes:** + - You may not quote more than 25 words verbatim from any single non-lyrical source, unless the source is reddit. + - For song lyrics, verbatim quotes must be limited to at most 10 words. + - Long quotes from reddit are allowed, as long as you indicate that those are direct quotes via a markdown blockquote starting with ">", copy verbatim, and link the source. +- **Word limits:** + - Each webpage source in the sources has a word limit label formatted like "[wordlim N]", in which N is the maximum number of words in the whole response that are attributed to that source. If omitted, the word limit is 200 words. + - Non-contiguous words derived from a given source must be counted to the word limit. + - The summarization limit N is a maximum for each source. + - When using multiple sources, their summarization limits add together. However, each article used must be relevant to the response. +- **Copyright compliance:** + - You must avoid providing full articles, long verbatim passages, or extensive direct quotes due to copyright concerns. + - If the user asked for a verbatim quote, the response should provide a short compliant excerpt and then answer with paraphrases and summaries. + - Again, this limit does not apply to reddit content, as long as it's appropriately indicated that those are direct quotes and you link to the source. + +--- + +Make sure to provide links to the sources you used in your response. diff --git a/codex-rs/features/src/lib.rs b/codex-rs/features/src/lib.rs index 4db7b56d25..79ed392df3 100644 --- a/codex-rs/features/src/lib.rs +++ b/codex-rs/features/src/lib.rs @@ -103,6 +103,8 @@ pub enum Feature { /// Allow the model to request web searches that fetch cached content. /// Takes precedence over `WebSearchRequest`. WebSearchCached, + /// Expose the extension-backed standalone web search tool. + StandaloneWebSearch, /// Use the legacy Landlock Linux sandbox fallback instead of the default /// bubblewrap pipeline. UseLegacyLandlock, @@ -785,6 +787,12 @@ pub const FEATURES: &[FeatureSpec] = &[ stage: Stage::Deprecated, default_enabled: false, }, + FeatureSpec { + id: Feature::StandaloneWebSearch, + key: "standalone_web_search", + stage: Stage::UnderDevelopment, + default_enabled: false, + }, FeatureSpec { id: Feature::SearchTool, key: "search_tool",