Compare commits

...

5 Commits

Author SHA1 Message Date
won
2455a3b0ff Restore thread ids in extension tool call fixtures 2026-06-01 11:44:44 -07:00
won
831758a2c0 Revert "Defer standalone image save-path hint"
This reverts commit 2e9606e804.
2026-06-01 11:43:31 -07:00
won
53a566d88a Remove stale extension tool call thread ids 2026-06-01 11:43:05 -07:00
won
2e9606e804 Defer standalone image save-path hint 2026-06-01 11:05:29 -07:00
won
bf947390ba draft 2026-05-29 16:37:35 -07:00
17 changed files with 445 additions and 153 deletions

1
codex-rs/Cargo.lock generated
View File

@@ -3042,6 +3042,7 @@ dependencies = [
"codex-model-provider-info",
"codex-protocol",
"codex-tools",
"codex-utils-absolute-path",
"http 1.4.0",
"pretty_assertions",
"schemars 0.8.22",

View File

@@ -0,0 +1,220 @@
use std::path::Path;
use std::time::Duration;
use anyhow::Context;
use anyhow::Result;
use app_test_support::ChatGptAuthFixture;
use app_test_support::McpProcess;
use app_test_support::to_response;
use app_test_support::write_chatgpt_auth;
use codex_app_server_protocol::ItemCompletedNotification;
use codex_app_server_protocol::JSONRPCResponse;
use codex_app_server_protocol::RequestId;
use codex_app_server_protocol::ThreadItem;
use codex_app_server_protocol::ThreadStartParams;
use codex_app_server_protocol::ThreadStartResponse;
use codex_app_server_protocol::TurnStartParams;
use codex_app_server_protocol::TurnStartResponse;
use codex_app_server_protocol::UserInput as V2UserInput;
use codex_config::types::AuthCredentialsStoreMode;
use core_test_support::responses;
use pretty_assertions::assert_eq;
use serde_json::json;
use tempfile::TempDir;
use tokio::time::timeout;
use wiremock::Mock;
use wiremock::MockServer;
use wiremock::ResponseTemplate;
use wiremock::matchers::method;
use wiremock::matchers::path;
const RESULT: &str = "cG5n";
// macOS and Windows Bazel CI can spend tens of seconds starting app-server
// subprocesses or processing test RPCs under load.
#[cfg(any(target_os = "macos", windows))]
const DEFAULT_READ_TIMEOUT: Duration = Duration::from_secs(60);
#[cfg(not(any(target_os = "macos", windows)))]
const DEFAULT_READ_TIMEOUT: Duration = Duration::from_secs(10);
#[tokio::test]
async fn standalone_image_generation_returns_saved_path_hint_to_model() -> Result<()> {
let call_id = "image-run-1";
let server = responses::start_mock_server().await;
mount_image_response(&server).await;
let response_mock = responses::mount_sse_sequence(
&server,
vec![
responses::sse(vec![
responses::ev_response_created("resp-1"),
responses::ev_function_call_with_namespace(
call_id,
"image_gen",
"imagegen",
&json!({
"action": "generate",
"prompt": "paint a blue whale",
})
.to_string(),
),
responses::ev_completed("resp-1"),
]),
responses::sse(vec![
responses::ev_assistant_message("msg-1", "Done"),
responses::ev_completed("resp-2"),
]),
],
)
.await;
let codex_home = TempDir::new()?;
create_config_toml(codex_home.path(), &server.uri())?;
write_chatgpt_auth(
codex_home.path(),
ChatGptAuthFixture::new("access-chatgpt"),
AuthCredentialsStoreMode::File,
)?;
let mut mcp = McpProcess::new_with_env(codex_home.path(), &[("OPENAI_API_KEY", None)]).await?;
timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
let thread_req = mcp
.send_thread_start_request(ThreadStartParams::default())
.await?;
let thread_resp: JSONRPCResponse = timeout(
DEFAULT_READ_TIMEOUT,
mcp.read_stream_until_response_message(RequestId::Integer(thread_req)),
)
.await??;
let ThreadStartResponse { thread, .. } = to_response::<ThreadStartResponse>(thread_resp)?;
let turn_req = mcp
.send_turn_start_request(TurnStartParams {
thread_id: thread.id,
client_user_message_id: None,
input: vec![V2UserInput::Text {
text: "Generate an image".to_string(),
text_elements: Vec::new(),
}],
..Default::default()
})
.await?;
let turn_resp: JSONRPCResponse = timeout(
DEFAULT_READ_TIMEOUT,
mcp.read_stream_until_response_message(RequestId::Integer(turn_req)),
)
.await??;
let _turn: TurnStartResponse = to_response::<TurnStartResponse>(turn_resp)?;
let completed = timeout(
DEFAULT_READ_TIMEOUT,
wait_for_image_generation_completed(&mut mcp),
)
.await??;
timeout(
DEFAULT_READ_TIMEOUT,
mcp.read_stream_until_notification_message("turn/completed"),
)
.await??;
let ThreadItem::ImageGeneration {
status,
revised_prompt,
result,
saved_path: Some(saved_path),
..
} = completed.item
else {
panic!("expected completed image generation item with saved path");
};
assert_eq!(status, "completed");
assert_eq!(revised_prompt.as_deref(), Some("paint a blue whale"));
assert_eq!(result, RESULT);
assert_eq!(std::fs::read(&saved_path)?, b"png");
let requests = response_mock.requests();
assert_eq!(requests.len(), 2);
let output = requests[1].function_call_output(call_id);
assert_eq!(
output["output"][0],
json!({
"type": "input_image",
"image_url": format!("data:image/png;base64,{RESULT}"),
"detail": "high",
})
);
let output_hint = output["output"][1]["text"]
.as_str()
.context("image output should include model-visible path hint")?;
assert!(
output_hint.contains(&saved_path.display().to_string()),
"output hint should identify the path core saved"
);
assert!(
!requests[1]
.message_input_texts("developer")
.iter()
.any(|text| text.contains("Generated images are saved to")),
"standalone image generation should not emit the legacy developer-message hint"
);
Ok(())
}
async fn wait_for_image_generation_completed(
mcp: &mut McpProcess,
) -> Result<ItemCompletedNotification> {
loop {
let notification = mcp
.read_stream_until_notification_message("item/completed")
.await?;
let completed: ItemCompletedNotification = serde_json::from_value(
notification
.params
.context("item/completed notification should include params")?,
)?;
if matches!(&completed.item, ThreadItem::ImageGeneration { .. }) {
return Ok(completed);
}
}
}
async fn mount_image_response(server: &MockServer) {
Mock::given(method("POST"))
.and(path("/api/codex/images/generations"))
.respond_with(ResponseTemplate::new(200).set_body_json(json!({
"created": 1,
"data": [{"b64_json": RESULT}],
})))
.expect(1)
.mount(server)
.await;
}
fn create_config_toml(codex_home: &Path, server_uri: &str) -> std::io::Result<()> {
std::fs::write(
codex_home.join("config.toml"),
format!(
r#"
model = "mock-model"
approval_policy = "never"
sandbox_mode = "read-only"
model_provider = "openai-custom"
chatgpt_base_url = "{server_uri}"
[features]
imagegenext = true
[model_providers.openai-custom]
name = "OpenAI"
base_url = "{server_uri}/api/codex"
wire_api = "responses"
request_max_retries = 0
stream_max_retries = 0
supports_websockets = false
requires_openai_auth = true
"#
),
)
}

View File

@@ -17,6 +17,7 @@ mod experimental_feature_list;
mod external_agent_config;
mod fs;
mod hooks_list;
mod image_generation;
mod initialize;
mod marketplace_add;
mod marketplace_remove;

View File

@@ -1,6 +1,16 @@
use super::ContextualUserFragment;
use std::fmt::Display;
/// Returns the model-facing hint for the host's generated-image artifact path.
pub fn image_generation_output_hint(
image_output_dir: impl Display,
image_output_path: impl Display,
) -> String {
format!(
"Generated images are saved to {image_output_dir} as {image_output_path} by default.\nIf you need to use a generated image at another path, copy it and leave the original in place unless the user explicitly asks you to delete it."
)
}
#[derive(Debug, Clone, PartialEq)]
pub(crate) struct ImageGenerationInstructions {
image_output_dir: String,
@@ -30,9 +40,6 @@ impl ContextualUserFragment for ImageGenerationInstructions {
}
fn body(&self) -> String {
format!(
"Generated images are saved to {} as {} by default.\nIf you need to use a generated image at another path, copy it and leave the original in place unless the user explicitly asks you to delete it.",
self.image_output_dir, self.image_output_path
)
image_generation_output_hint(&self.image_output_dir, &self.image_output_path)
}
}

View File

@@ -46,6 +46,7 @@ pub(crate) use fragments::AdditionalContextUserFragment;
pub(crate) use guardian_followup_review_reminder::GuardianFollowupReviewReminder;
pub(crate) use hook_additional_context::HookAdditionalContext;
pub(crate) use image_generation_instructions::ImageGenerationInstructions;
pub use image_generation_instructions::image_generation_output_hint;
pub use internal_model_context::InternalContextSource;
pub use internal_model_context::InternalModelContextFragment;
pub use internal_model_context::InvalidInternalContextSource;

View File

@@ -97,6 +97,7 @@ pub(crate) use skills::manager;
pub(crate) use skills::maybe_emit_implicit_skill_invocation;
pub(crate) use skills::skills_load_input_from_config;
mod stream_events_utils;
pub use stream_events_utils::image_generation_artifact_path;
pub mod test_support;
mod unified_exec;
pub mod windows_sandbox;

View File

@@ -38,7 +38,8 @@ use tracing::warn;
const GENERATED_IMAGE_ARTIFACTS_DIR: &str = "generated_images";
pub(crate) fn image_generation_artifact_path(
/// Returns the host-owned default artifact path for a generated image.
pub fn image_generation_artifact_path(
codex_home: &AbsolutePathBuf,
session_id: &str,
call_id: &str,
@@ -131,6 +132,7 @@ pub(crate) async fn persist_image_generation_item(
turn_context: &TurnContext,
image_item: &mut ImageGenerationItem,
) -> Option<AbsolutePathBuf> {
image_item.saved_path = None;
let session_id = sess.conversation_id.to_string();
match save_image_generation_result(
&turn_context.config.codex_home,
@@ -163,15 +165,12 @@ pub(crate) async fn persist_image_generation_item(
}
}
pub(crate) async fn finalize_image_generation_item(
async fn record_image_generation_instructions(
sess: &Session,
turn_context: &TurnContext,
image_item: &mut ImageGenerationItem,
image_item: &ImageGenerationItem,
) {
if persist_image_generation_item(sess, turn_context, image_item)
.await
.is_none()
{
if image_item.saved_path.is_none() {
return;
}
let session_id = sess.conversation_id.to_string();
@@ -530,27 +529,16 @@ pub(crate) async fn handle_non_tool_response_item(
| ResponseItem::WebSearchCall { .. }
| ResponseItem::ImageGenerationCall { .. } => {
let mut turn_item = parse_turn_item(item)?;
if let TurnItemContributorPolicy::Run(turn_store) = contributor_policy {
apply_turn_item_contributors(sess, turn_store, &mut turn_item).await;
}
if let TurnItem::AgentMessage(agent_message) = &mut turn_item {
let combined = agent_message
.content
.iter()
.map(|entry| match entry {
codex_protocol::items::AgentMessageContent::Text { text } => text.as_str(),
})
.collect::<String>();
let (stripped, memory_citation) =
strip_hidden_assistant_markup_and_parse_memory_citation(&combined, plan_mode);
agent_message.content =
vec![codex_protocol::items::AgentMessageContent::Text { text: stripped }];
if agent_message.memory_citation.is_none() {
agent_message.memory_citation = memory_citation;
}
}
if let TurnItem::ImageGeneration(image_item) = &mut turn_item {
finalize_image_generation_item(sess, turn_context, image_item).await;
finalize_turn_item(
sess,
turn_context,
contributor_policy,
&mut turn_item,
plan_mode,
)
.await;
if let TurnItem::ImageGeneration(image_item) = &turn_item {
record_image_generation_instructions(sess, turn_context, image_item).await;
}
Some(turn_item)
}
@@ -564,6 +552,37 @@ pub(crate) async fn handle_non_tool_response_item(
}
}
pub(crate) async fn finalize_turn_item(
sess: &Session,
turn_context: &TurnContext,
contributor_policy: TurnItemContributorPolicy<'_>,
turn_item: &mut TurnItem,
plan_mode: bool,
) {
if let TurnItemContributorPolicy::Run(turn_store) = contributor_policy {
apply_turn_item_contributors(sess, turn_store, turn_item).await;
}
if let TurnItem::AgentMessage(agent_message) = &mut *turn_item {
let combined = agent_message
.content
.iter()
.map(|entry| match entry {
codex_protocol::items::AgentMessageContent::Text { text } => text.as_str(),
})
.collect::<String>();
let (stripped, memory_citation) =
strip_hidden_assistant_markup_and_parse_memory_citation(&combined, plan_mode);
agent_message.content =
vec![codex_protocol::items::AgentMessageContent::Text { text: stripped }];
if agent_message.memory_citation.is_none() {
agent_message.memory_citation = memory_citation;
}
}
if let TurnItem::ImageGeneration(image_item) = &mut *turn_item {
persist_image_generation_item(sess, turn_context, image_item).await;
}
}
pub(crate) fn last_assistant_message_from_item(
item: &ResponseItem,
plan_mode: bool,

View File

@@ -4,19 +4,17 @@ use std::sync::Weak;
use codex_protocol::items::TurnItem;
use codex_tools::ConversationHistory;
use codex_tools::ExtensionTurnItem;
use codex_tools::ImageGenerationCompletionFuture;
use codex_tools::ToolCall as ExtensionToolCall;
use codex_tools::ToolName;
use codex_tools::ToolSpec;
use codex_tools::TurnItemEmissionFuture;
use codex_tools::TurnItemEmitter;
use crate::context::ContextualUserFragment;
use crate::context::ImageGenerationInstructions;
use crate::function_tool::FunctionCallError;
use crate::session::session::Session;
use crate::session::turn_context::TurnContext;
use crate::stream_events_utils::persist_image_generation_item;
use crate::stream_events_utils::TurnItemContributorPolicy;
use crate::stream_events_utils::finalize_turn_item;
use crate::tools::context::ToolInvocation;
use crate::tools::context::ToolOutput;
use crate::tools::context::ToolPayload;
@@ -71,6 +69,10 @@ struct CoreTurnItemEmitter {
fn extension_turn_item(item: ExtensionTurnItem) -> TurnItem {
match item {
ExtensionTurnItem::WebSearch(item) => TurnItem::WebSearch(item),
ExtensionTurnItem::ImageGeneration(mut item) => {
item.saved_path = None;
TurnItem::ImageGeneration(item)
}
}
}
@@ -80,8 +82,9 @@ impl TurnItemEmitter for CoreTurnItemEmitter {
let (Some(session), Some(turn)) = (self.session.upgrade(), self.turn.upgrade()) else {
return;
};
let item = extension_turn_item(item);
session.emit_turn_item_started(turn.as_ref(), &item).await;
session
.emit_turn_item_started(turn.as_ref(), &extension_turn_item(item))
.await;
})
}
@@ -90,60 +93,25 @@ impl TurnItemEmitter for CoreTurnItemEmitter {
let (Some(session), Some(turn)) = (self.session.upgrade(), self.turn.upgrade()) else {
return;
};
let item = extension_turn_item(item);
let mut item = extension_turn_item(item);
finalize_turn_item(
session.as_ref(),
turn.as_ref(),
TurnItemContributorPolicy::Run(turn.extension_data.as_ref()),
&mut item,
turn.collaboration_mode.mode == codex_protocol::config_types::ModeKind::Plan,
)
.await;
session.emit_turn_item_completed(turn.as_ref(), item).await;
})
}
fn image_generation_completed<'a>(
&'a self,
call_id: String,
prompt: String,
result: String,
) -> ImageGenerationCompletionFuture<'a> {
Box::pin(async move {
let (Some(session), Some(turn)) = (self.session.upgrade(), self.turn.upgrade()) else {
return None;
};
let mut item = codex_protocol::items::ImageGenerationItem {
id: call_id,
status: "completed".to_string(),
revised_prompt: Some(prompt),
result,
saved_path: None,
};
let output_hint =
persist_image_generation_item(session.as_ref(), turn.as_ref(), &mut item)
.await
.map(|saved_path| {
let output_dir = saved_path
.parent()
.unwrap_or_else(|| turn.config.codex_home.clone());
ImageGenerationInstructions::new(output_dir.display(), saved_path.display())
.body()
});
let started_item = codex_protocol::items::ImageGenerationItem {
id: item.id.clone(),
status: "in_progress".to_string(),
revised_prompt: None,
result: String::new(),
saved_path: None,
};
session
.emit_turn_item_started(turn.as_ref(), &TurnItem::ImageGeneration(started_item))
.await;
session
.emit_turn_item_completed(turn.as_ref(), TurnItem::ImageGeneration(item))
.await;
output_hint
})
}
}
async fn to_extension_call(invocation: &ToolInvocation) -> ExtensionToolCall {
let conversation_history =
ConversationHistory::new(invocation.session.clone_history().await.into_raw_items());
ExtensionToolCall {
thread_id: invocation.session.conversation_id.to_string(),
turn_id: invocation.turn.sub_id.clone(),
call_id: invocation.call_id.clone(),
tool_name: invocation.tool_name.clone(),
@@ -161,6 +129,8 @@ async fn to_extension_call(invocation: &ToolInvocation) -> ExtensionToolCall {
mod tests {
use std::sync::Arc;
use codex_extension_api::ExtensionData;
use codex_extension_api::TurnItemContributor;
use codex_protocol::items::TurnItem;
use codex_protocol::items::WebSearchItem;
use codex_protocol::models::ContentItem;
@@ -168,10 +138,13 @@ mod tests {
use codex_protocol::models::WebSearchAction;
use codex_protocol::protocol::EventMsg;
use codex_tools::ExtensionTurnItem;
use codex_utils_absolute_path::test_support::PathExt;
use codex_utils_absolute_path::test_support::test_path_buf;
use pretty_assertions::assert_eq;
use serde_json::json;
use tokio::sync::Mutex;
use super::CoreTurnItemEmitter;
use super::ExtensionToolAdapter;
use crate::tools::context::ToolCallSource;
use crate::tools::context::ToolInvocation;
@@ -306,6 +279,7 @@ mod tests {
let (session, turn, rx) = crate::session::tests::make_session_and_context_with_rx().await;
let weak_session = Arc::downgrade(&session);
let weak_turn = Arc::downgrade(&turn);
let thread_id = session.conversation_id.to_string();
let turn_id = turn.sub_id.clone();
let truncation_policy = turn.truncation_policy;
let history_item = ResponseItem::Message {
@@ -344,6 +318,7 @@ mod tests {
let captured_call = captured_call.lock().await.clone().expect("captured call");
assert!(weak_session.upgrade().is_none());
assert!(weak_turn.upgrade().is_none());
assert_eq!(captured_call.thread_id, thread_id);
assert_eq!(captured_call.turn_id, turn_id);
assert_eq!(captured_call.call_id, "call-extension");
assert_eq!(
@@ -401,8 +376,54 @@ mod tests {
assert_eq!(end.action, expected.action);
}
struct ImageGenerationExtensionExecutor {
output_hint: Arc<Mutex<Option<String>>>,
struct ImageGenerationExtensionExecutor;
#[derive(Debug)]
struct ExtensionTurnItemContributorRan;
struct RecordExtensionTurnItemContributor;
#[async_trait::async_trait]
impl TurnItemContributor for RecordExtensionTurnItemContributor {
async fn contribute(
&self,
_thread_store: &ExtensionData,
turn_store: &ExtensionData,
_item: &mut TurnItem,
) -> Result<(), String> {
turn_store.insert(ExtensionTurnItemContributorRan);
Ok(())
}
}
#[tokio::test]
async fn extension_completion_runs_turn_item_contributors() {
let (mut session, turn) = crate::session::tests::make_session_and_context().await;
let mut builder = codex_extension_api::ExtensionRegistryBuilder::new();
builder.turn_item_contributor(Arc::new(RecordExtensionTurnItemContributor));
session.services.extensions = Arc::new(builder.build());
let session = Arc::new(session);
let turn = Arc::new(turn);
let emitter = CoreTurnItemEmitter {
session: Arc::downgrade(&session),
turn: Arc::downgrade(&turn),
};
codex_tools::TurnItemEmitter::emit_completed(
&emitter,
ExtensionTurnItem::WebSearch(WebSearchItem {
id: "search-1".to_string(),
query: "contributors".to_string(),
action: WebSearchAction::Other,
}),
)
.await;
assert!(
turn.extension_data
.get::<ExtensionTurnItemContributorRan>()
.is_some()
);
}
#[async_trait::async_trait]
@@ -426,15 +447,28 @@ mod tests {
&self,
call: codex_tools::ToolCall,
) -> Result<Box<dyn codex_tools::ToolOutput>, codex_tools::FunctionCallError> {
let output_hint = call
.turn_item_emitter
.image_generation_completed(
call.call_id,
"A tiny blue square".to_string(),
"cG5n".to_string(),
)
call.turn_item_emitter
.emit_started(ExtensionTurnItem::ImageGeneration(
codex_protocol::items::ImageGenerationItem {
id: call.call_id.clone(),
status: "in_progress".to_string(),
revised_prompt: None,
result: String::new(),
saved_path: None,
},
))
.await;
call.turn_item_emitter
.emit_completed(ExtensionTurnItem::ImageGeneration(
codex_protocol::items::ImageGenerationItem {
id: call.call_id,
status: "completed".to_string(),
revised_prompt: Some("A tiny blue square".to_string()),
result: "cG5n".to_string(),
saved_path: Some(test_path_buf("/tmp/extension-claimed.png").abs()),
},
))
.await;
*self.output_hint.lock().await = output_hint;
Ok(Box::new(codex_tools::JsonToolOutput::new(
json!({ "ok": true }),
)))
@@ -443,10 +477,7 @@ mod tests {
#[tokio::test]
async fn image_generation_publication_is_finalized_by_core() {
let output_hint = Arc::new(Mutex::new(None));
let handler = ExtensionToolAdapter::new(Arc::new(ImageGenerationExtensionExecutor {
output_hint: Arc::clone(&output_hint),
}));
let handler = ExtensionToolAdapter::new(Arc::new(ImageGenerationExtensionExecutor));
let (session, turn, rx) = crate::session::tests::make_session_and_context_with_rx().await;
let expected_path = crate::stream_events_utils::image_generation_artifact_path(
&turn.config.codex_home,
@@ -513,17 +544,5 @@ mod tests {
std::fs::read(&expected_path).expect("generated artifact should be saved"),
b"png"
);
assert_eq!(
*output_hint.lock().await,
Some(format!(
"Generated images are saved to {} as {} by default.\n\
If you need to use a generated image at another path, copy it and leave the original in place unless the user explicitly asks you to delete it.",
expected_path
.parent()
.expect("generated image path should have a parent")
.display(),
expected_path.display(),
))
);
}
}

View File

@@ -13,7 +13,6 @@ pub use capabilities::ResponseItemInjector;
pub use codex_tools::ConversationHistory;
pub use codex_tools::ExtensionTurnItem;
pub use codex_tools::FunctionCallError;
pub use codex_tools::ImageGenerationCompletionFuture;
pub use codex_tools::JsonToolOutput;
pub use codex_tools::NoopTurnItemEmitter;
pub use codex_tools::ResponsesApiTool;

View File

@@ -1126,6 +1126,7 @@ fn tool_by_name<'a>(
fn tool_call(tool_name: &str, call_id: &str, arguments: serde_json::Value) -> ToolCall {
ToolCall {
thread_id: "thread-1".to_string(),
turn_id: "turn-1".to_string(),
call_id: call_id.to_string(),
tool_name: codex_extension_api::ToolName::plain(tool_name),

View File

@@ -23,6 +23,7 @@ codex-model-provider = { workspace = true }
codex-model-provider-info = { workspace = true }
codex-protocol = { workspace = true }
codex-tools = { workspace = true }
codex-utils-absolute-path = { workspace = true }
http = { workspace = true }
schemars = { workspace = true }
serde = { workspace = true, features = ["derive"] }

View File

@@ -13,6 +13,7 @@ use codex_features::Feature;
use codex_login::AuthManager;
use codex_model_provider::create_model_provider;
use codex_model_provider_info::ModelProviderInfo;
use codex_utils_absolute_path::AbsolutePathBuf;
use crate::backend::CodexImagesBackend;
use crate::tool::ImageGenerationTool;
@@ -26,6 +27,7 @@ struct ImageGenerationExtension {
struct ImageGenerationExtensionConfig {
enabled: bool,
provider: ModelProviderInfo,
codex_home: AbsolutePathBuf,
}
impl From<&Config> for ImageGenerationExtensionConfig {
@@ -35,6 +37,7 @@ impl From<&Config> for ImageGenerationExtensionConfig {
enabled: config.features.enabled(Feature::ImageGenExt)
&& config.model_provider.is_openai(),
provider: config.model_provider.clone(),
codex_home: config.codex_home.clone(),
}
}
}
@@ -76,9 +79,13 @@ impl ToolContributor for ImageGenerationExtension {
return Vec::new();
}
vec![Arc::new(ImageGenerationTool::new(CodexImagesBackend::new(
create_model_provider(config.provider.clone(), Some(self.auth_manager.clone())),
)))]
vec![Arc::new(ImageGenerationTool::new(
CodexImagesBackend::new(create_model_provider(
config.provider.clone(),
Some(self.auth_manager.clone()),
)),
config.codex_home.clone(),
))]
}
}

View File

@@ -3,6 +3,7 @@ use codex_api::ImageEditRequest;
use codex_api::ImageGenerationRequest;
use codex_api::ImageQuality;
use codex_api::ImageUrl;
use codex_core::context::image_generation_output_hint;
use codex_extension_api::ToolOutput;
use codex_extension_api::ToolPayload;
use codex_extension_api::ToolSpec;
@@ -26,7 +27,6 @@ use crate::IMAGE_GEN_NAMESPACE;
use crate::IMAGEGEN_TOOL_NAME;
const RESULT: &str = "cG5n";
const OUTPUT_HINT: &str = "Generated images are saved to /tmp as /tmp/call-1.png by default.";
#[test]
fn uses_reserved_image_gen_namespace() {
@@ -56,9 +56,10 @@ fn generate_uses_fixed_request_defaults() {
#[test]
fn generated_output_returns_image_input_and_output_hint() {
let output_hint = image_generation_output_hint("/tmp", "/tmp/call-1.png");
let output = GeneratedImageOutput {
result: RESULT.to_string(),
output_hint: Some(OUTPUT_HINT.to_string()),
output_hint: output_hint.clone(),
};
let ResponseInputItem::FunctionCallOutput {
@@ -78,9 +79,7 @@ fn generated_output_returns_image_input_and_output_hint() {
image_url: format!("data:image/png;base64,{RESULT}"),
detail: Some(DEFAULT_IMAGE_DETAIL),
},
FunctionCallOutputContentItem::InputText {
text: OUTPUT_HINT.to_string(),
},
FunctionCallOutputContentItem::InputText { text: output_hint },
]
);
}

View File

@@ -3,6 +3,9 @@ use codex_api::ImageEditRequest;
use codex_api::ImageGenerationRequest;
use codex_api::ImageQuality;
use codex_api::ImageUrl;
use codex_core::context::image_generation_output_hint;
use codex_core::image_generation_artifact_path;
use codex_extension_api::ExtensionTurnItem;
use codex_extension_api::FunctionCallError;
use codex_extension_api::ToolCall;
use codex_extension_api::ToolExecutor;
@@ -11,6 +14,7 @@ use codex_extension_api::ToolOutput;
use codex_extension_api::ToolPayload;
use codex_extension_api::ToolSpec;
use codex_extension_api::parse_tool_input_schema;
use codex_protocol::items::ImageGenerationItem;
use codex_protocol::models::ContentItem;
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
use codex_protocol::models::FunctionCallOutputBody;
@@ -23,6 +27,7 @@ use codex_tools::ResponsesApiNamespaceTool;
use codex_tools::ResponsesApiTool;
use codex_tools::ToolExposure;
use codex_tools::default_namespace_description;
use codex_utils_absolute_path::AbsolutePathBuf;
use schemars::JsonSchema;
use schemars::r#gen::SchemaSettings;
use serde::Deserialize;
@@ -40,12 +45,16 @@ const IMAGEGEN_DESCRIPTION: &str = include_str!("../imagegen_description.md");
#[derive(Clone)]
pub(crate) struct ImageGenerationTool {
backend: CodexImagesBackend,
codex_home: AbsolutePathBuf,
}
impl ImageGenerationTool {
/// Creates an image-generation tool backed by an image API executor.
pub(crate) fn new(backend: CodexImagesBackend) -> Self {
Self { backend }
pub(crate) fn new(backend: CodexImagesBackend, codex_home: AbsolutePathBuf) -> Self {
Self {
backend,
codex_home,
}
}
}
@@ -84,6 +93,15 @@ impl ToolExecutor<ToolCall> for ImageGenerationTool {
async fn handle(&self, call: ToolCall) -> Result<Box<dyn ToolOutput>, FunctionCallError> {
let args = parse_args(&call)?;
let request = request_for_action(&args, call.conversation_history.items())?;
call.turn_item_emitter
.emit_started(ExtensionTurnItem::ImageGeneration(ImageGenerationItem {
id: call.call_id.clone(),
status: "in_progress".to_string(),
revised_prompt: None,
result: String::new(),
saved_path: None,
}))
.await;
let response = match request {
ImageRequest::Generate(request) => self.backend.generate(request).await,
ImageRequest::Edit(request) => self.backend.edit(request).await,
@@ -96,10 +114,21 @@ impl ToolExecutor<ToolCall> for ImageGenerationTool {
"image generation returned no image data".to_string(),
));
};
let output_hint = call
.turn_item_emitter
.image_generation_completed(call.call_id.clone(), args.prompt, result.clone())
call.turn_item_emitter
.emit_completed(ExtensionTurnItem::ImageGeneration(ImageGenerationItem {
id: call.call_id.clone(),
status: "completed".to_string(),
revised_prompt: Some(args.prompt),
result: result.clone(),
saved_path: None,
}))
.await;
let output_path =
image_generation_artifact_path(&self.codex_home, &call.thread_id, &call.call_id);
let output_dir = output_path
.parent()
.unwrap_or_else(|| self.codex_home.clone());
let output_hint = image_generation_output_hint(output_dir.display(), output_path.display());
Ok(Box::new(GeneratedImageOutput {
result,
output_hint,
@@ -284,7 +313,7 @@ fn imagegen_tool_spec() -> ToolSpec {
struct GeneratedImageOutput {
result: String,
output_hint: Option<String>,
output_hint: String,
}
impl ToolOutput for GeneratedImageOutput {
@@ -304,11 +333,9 @@ impl ToolOutput for GeneratedImageOutput {
image_url: format!("data:image/png;base64,{}", self.result),
detail: Some(DEFAULT_IMAGE_DETAIL),
}];
if let Some(output_hint) = &self.output_hint {
content.push(FunctionCallOutputContentItem::InputText {
text: output_hint.clone(),
});
}
content.push(FunctionCallOutputContentItem::InputText {
text: self.output_hint.clone(),
});
ResponseInputItem::FunctionCallOutput {
call_id: call_id.to_string(),
output: FunctionCallOutputPayload {

View File

@@ -208,6 +208,7 @@ async fn add_ad_hoc_note_tool_creates_note_file() {
let output = tool
.handle(ToolCall {
thread_id: "thread-1".to_string(),
turn_id: "turn-1".to_string(),
call_id: "call-1".to_string(),
tool_name: memory_tool_name(crate::ADD_AD_HOC_NOTE_TOOL_NAME),
@@ -250,6 +251,7 @@ async fn add_ad_hoc_note_tool_rejects_paths_as_filenames() {
let result = tool
.handle(ToolCall {
thread_id: "thread-1".to_string(),
turn_id: "turn-1".to_string(),
call_id: "call-1".to_string(),
tool_name: memory_tool_name(crate::ADD_AD_HOC_NOTE_TOOL_NAME),
@@ -293,6 +295,7 @@ async fn read_tool_reads_memory_file() {
let output = tool
.handle(ToolCall {
thread_id: "thread-1".to_string(),
turn_id: "turn-1".to_string(),
call_id: "call-1".to_string(),
tool_name: memory_tool_name(crate::READ_TOOL_NAME),
@@ -339,6 +342,7 @@ async fn search_tool_accepts_multiple_queries() {
let output = tool
.handle(ToolCall {
thread_id: "thread-1".to_string(),
turn_id: "turn-1".to_string(),
call_id: "call-1".to_string(),
tool_name: memory_tool_name(crate::SEARCH_TOOL_NAME),
@@ -411,6 +415,7 @@ async fn search_tool_accepts_windowed_all_match_mode() {
let output = tool
.handle(ToolCall {
thread_id: "thread-1".to_string(),
turn_id: "turn-1".to_string(),
call_id: "call-1".to_string(),
tool_name: memory_tool_name(crate::SEARCH_TOOL_NAME),
@@ -463,6 +468,7 @@ async fn search_tool_rejects_legacy_single_query() {
let result = tool
.handle(ToolCall {
thread_id: "thread-1".to_string(),
turn_id: "turn-1".to_string(),
call_id: "call-1".to_string(),
tool_name: memory_tool_name(crate::SEARCH_TOOL_NAME),

View File

@@ -63,7 +63,6 @@ pub use responses_api::mcp_tool_to_responses_api_tool;
pub use responses_api::tool_definition_to_responses_api_tool;
pub use tool_call::ConversationHistory;
pub use tool_call::ExtensionTurnItem;
pub use tool_call::ImageGenerationCompletionFuture;
pub use tool_call::NoopTurnItemEmitter;
pub use tool_call::ToolCall;
pub use tool_call::TurnItemEmissionFuture;

View File

@@ -1,6 +1,7 @@
use crate::FunctionCallError;
use crate::ToolName;
use crate::ToolPayload;
use codex_protocol::items::ImageGenerationItem;
use codex_protocol::items::WebSearchItem;
use codex_protocol::models::ResponseItem;
use codex_utils_output_truncation::TruncationPolicy;
@@ -29,20 +30,14 @@ impl ConversationHistory {
/// Future returned when an extension tool emits a visible turn-item lifecycle event.
pub type TurnItemEmissionFuture<'a> = Pin<Box<dyn Future<Output = ()> + Send + 'a>>;
/// Future returned when an image-generation extension publishes completed image bytes.
pub type ImageGenerationCompletionFuture<'a> =
Pin<Box<dyn Future<Output = Option<String>> + Send + 'a>>;
/// Visible turn items that an extension fully owns and may emit as-is.
///
/// Add only item kinds that require no additional host finalization before
/// persistence or client delivery. Richer items need a host-owned publish path.
/// Visible turn items that an extension may publish into the host lifecycle.
#[derive(Clone, Debug, PartialEq)]
pub enum ExtensionTurnItem {
WebSearch(WebSearchItem),
ImageGeneration(ImageGenerationItem),
}
/// Host-provided capability for extension tools to emit finalized visible turn items.
/// Host-provided capability for extension tools to emit visible turn items.
///
/// Implementations route lifecycle events through the host's normal item event
/// pipeline, including any persistence and client delivery owned by the host.
@@ -50,21 +45,8 @@ pub trait TurnItemEmitter: Send + Sync {
/// Emits the beginning of one visible turn item.
fn emit_started<'a>(&'a self, item: ExtensionTurnItem) -> TurnItemEmissionFuture<'a>;
/// Emits the completion of one visible turn item.
/// Emits one visible turn item after host-owned finalization.
fn emit_completed<'a>(&'a self, item: ExtensionTurnItem) -> TurnItemEmissionFuture<'a>;
/// Publishes image bytes for host persistence and visible completion.
///
/// Returns persisted-artifact context for the extension's model-facing
/// function output when the host saves the generated image successfully.
fn image_generation_completed<'a>(
&'a self,
_call_id: String,
_prompt: String,
_result: String,
) -> ImageGenerationCompletionFuture<'a> {
Box::pin(std::future::ready(None))
}
}
/// Turn-item emitter used when a caller does not expose visible item emission.
@@ -84,6 +66,7 @@ impl TurnItemEmitter for NoopTurnItemEmitter {
// TODO: this is temporary and will disappear in the next PR (as we make codex-extension-api generic on Invocation.
#[derive(Clone)]
pub struct ToolCall {
pub thread_id: String,
pub turn_id: String,
pub call_id: String,
pub tool_name: ToolName,
@@ -96,6 +79,7 @@ pub struct ToolCall {
impl std::fmt::Debug for ToolCall {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("ToolCall")
.field("thread_id", &self.thread_id)
.field("turn_id", &self.turn_id)
.field("call_id", &self.call_id)
.field("tool_name", &self.tool_name)