mirror of
https://github.com/openai/codex.git
synced 2026-06-01 19:02:59 +00:00
Route extension image generation through the native image completion pipeline (#24972)
## Why The standalone `image_gen.imagegen` extension should behave like native image generation for artifact persistence and UI completion, while returning its save-location guidance as part of the tool result instead of injecting a developer message. ## What Changed - Added an image-generation completion hook for extension tools so core can persist generated images and emit the existing `ImageGeneration` lifecycle events. - Reused core image artifact persistence for extension output and removed extension-local save-path/file-writing logic. - Split shared image persistence from built-in finalization so native image generation keeps its existing developer-message instruction behavior. - Returned the generated image save-location instruction through the extension `FunctionCallOutput`, alongside the generated image input for model follow-up. - Preserved the existing image-generation event shape for current UI and replay compatibility. - Avoided cloning the full generated-image base64 payload when emitting the in-progress image item. - Removed dependencies no longer needed after moving persistence out of the extension crate. ## Fast Follow - Adjust the existing Extension API and add a general `TurnItem` finalization path for re-usability of code ## Validation - Ran `just fmt`. - Ran `just bazel-lock-update`. - Ran `just bazel-lock-check`. - Ran `just test -p codex-tools -p codex-extension-api -p codex-image-generation-extension`. - Ran `just test -p codex-core image_generation_publication_is_finalized_by_core`. - Ran `just test -p codex-core handle_output_item_done_records_image_save_history_message`. - Ran `just fix -p codex-tools -p codex-extension-api -p codex-core -p codex-image-generation-extension`.
This commit is contained in:
4
codex-rs/Cargo.lock
generated
4
codex-rs/Cargo.lock
generated
@@ -3033,7 +3033,6 @@ name = "codex-image-generation-extension"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"base64 0.22.1",
|
||||
"codex-api",
|
||||
"codex-core",
|
||||
"codex-extension-api",
|
||||
@@ -3048,9 +3047,6 @@ dependencies = [
|
||||
"schemars 0.8.22",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tempfile",
|
||||
"tokio",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
@@ -5,6 +5,7 @@ use base64::Engine;
|
||||
use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
|
||||
use codex_extension_api::ExtensionData;
|
||||
use codex_protocol::config_types::ModeKind;
|
||||
use codex_protocol::items::ImageGenerationItem;
|
||||
use codex_protocol::items::TurnItem;
|
||||
use codex_utils_stream_parser::strip_citations;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
@@ -125,6 +126,68 @@ async fn save_image_generation_result(
|
||||
Ok(path)
|
||||
}
|
||||
|
||||
pub(crate) async fn persist_image_generation_item(
|
||||
sess: &Session,
|
||||
turn_context: &TurnContext,
|
||||
image_item: &mut ImageGenerationItem,
|
||||
) -> Option<AbsolutePathBuf> {
|
||||
let session_id = sess.conversation_id.to_string();
|
||||
match save_image_generation_result(
|
||||
&turn_context.config.codex_home,
|
||||
&session_id,
|
||||
&image_item.id,
|
||||
&image_item.result,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(path) => {
|
||||
image_item.saved_path = Some(path.clone());
|
||||
Some(path)
|
||||
}
|
||||
Err(err) => {
|
||||
let output_path = image_generation_artifact_path(
|
||||
&turn_context.config.codex_home,
|
||||
&session_id,
|
||||
&image_item.id,
|
||||
);
|
||||
let output_dir = output_path
|
||||
.parent()
|
||||
.unwrap_or_else(|| turn_context.config.codex_home.clone());
|
||||
tracing::warn!(
|
||||
call_id = %image_item.id,
|
||||
output_dir = %output_dir.display(),
|
||||
"failed to save generated image: {err}"
|
||||
);
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn finalize_image_generation_item(
|
||||
sess: &Session,
|
||||
turn_context: &TurnContext,
|
||||
image_item: &mut ImageGenerationItem,
|
||||
) {
|
||||
if persist_image_generation_item(sess, turn_context, image_item)
|
||||
.await
|
||||
.is_none()
|
||||
{
|
||||
return;
|
||||
}
|
||||
let session_id = sess.conversation_id.to_string();
|
||||
let image_output_path =
|
||||
image_generation_artifact_path(&turn_context.config.codex_home, &session_id, "<image_id>");
|
||||
let image_output_dir = image_output_path
|
||||
.parent()
|
||||
.unwrap_or_else(|| turn_context.config.codex_home.clone());
|
||||
let message: ResponseItem = ContextualUserFragment::into(ImageGenerationInstructions::new(
|
||||
image_output_dir.display(),
|
||||
image_output_path.display(),
|
||||
));
|
||||
sess.record_conversation_items(turn_context, &[message])
|
||||
.await;
|
||||
}
|
||||
|
||||
/// Persist a completed model response item and record any cited memory usage.
|
||||
pub(crate) async fn record_completed_response_item(
|
||||
sess: &Session,
|
||||
@@ -487,49 +550,7 @@ pub(crate) async fn handle_non_tool_response_item(
|
||||
}
|
||||
}
|
||||
if let TurnItem::ImageGeneration(image_item) = &mut turn_item {
|
||||
let session_id = sess.conversation_id.to_string();
|
||||
match save_image_generation_result(
|
||||
&turn_context.config.codex_home,
|
||||
&session_id,
|
||||
&image_item.id,
|
||||
&image_item.result,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(path) => {
|
||||
image_item.saved_path = Some(path);
|
||||
let image_output_path = image_generation_artifact_path(
|
||||
&turn_context.config.codex_home,
|
||||
&session_id,
|
||||
"<image_id>",
|
||||
);
|
||||
let image_output_dir = image_output_path
|
||||
.parent()
|
||||
.unwrap_or_else(|| turn_context.config.codex_home.clone());
|
||||
let message: ResponseItem =
|
||||
ContextualUserFragment::into(ImageGenerationInstructions::new(
|
||||
image_output_dir.display(),
|
||||
image_output_path.display(),
|
||||
));
|
||||
sess.record_conversation_items(turn_context, &[message])
|
||||
.await;
|
||||
}
|
||||
Err(err) => {
|
||||
let output_path = image_generation_artifact_path(
|
||||
&turn_context.config.codex_home,
|
||||
&session_id,
|
||||
&image_item.id,
|
||||
);
|
||||
let output_dir = output_path
|
||||
.parent()
|
||||
.unwrap_or_else(|| turn_context.config.codex_home.clone());
|
||||
tracing::warn!(
|
||||
call_id = %image_item.id,
|
||||
output_dir = %output_dir.display(),
|
||||
"failed to save generated image: {err}"
|
||||
);
|
||||
}
|
||||
}
|
||||
finalize_image_generation_item(sess, turn_context, image_item).await;
|
||||
}
|
||||
Some(turn_item)
|
||||
}
|
||||
|
||||
@@ -4,15 +4,19 @@ use std::sync::Weak;
|
||||
use codex_protocol::items::TurnItem;
|
||||
use codex_tools::ConversationHistory;
|
||||
use codex_tools::ExtensionTurnItem;
|
||||
use codex_tools::ImageGenerationCompletionFuture;
|
||||
use codex_tools::ToolCall as ExtensionToolCall;
|
||||
use codex_tools::ToolName;
|
||||
use codex_tools::ToolSpec;
|
||||
use codex_tools::TurnItemEmissionFuture;
|
||||
use codex_tools::TurnItemEmitter;
|
||||
|
||||
use crate::context::ContextualUserFragment;
|
||||
use crate::context::ImageGenerationInstructions;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::session::session::Session;
|
||||
use crate::session::turn_context::TurnContext;
|
||||
use crate::stream_events_utils::persist_image_generation_item;
|
||||
use crate::tools::context::ToolInvocation;
|
||||
use crate::tools::context::ToolOutput;
|
||||
use crate::tools::context::ToolPayload;
|
||||
@@ -90,6 +94,50 @@ impl TurnItemEmitter for CoreTurnItemEmitter {
|
||||
session.emit_turn_item_completed(turn.as_ref(), item).await;
|
||||
})
|
||||
}
|
||||
|
||||
fn image_generation_completed<'a>(
|
||||
&'a self,
|
||||
call_id: String,
|
||||
prompt: String,
|
||||
result: String,
|
||||
) -> ImageGenerationCompletionFuture<'a> {
|
||||
Box::pin(async move {
|
||||
let (Some(session), Some(turn)) = (self.session.upgrade(), self.turn.upgrade()) else {
|
||||
return None;
|
||||
};
|
||||
let mut item = codex_protocol::items::ImageGenerationItem {
|
||||
id: call_id,
|
||||
status: "completed".to_string(),
|
||||
revised_prompt: Some(prompt),
|
||||
result,
|
||||
saved_path: None,
|
||||
};
|
||||
let output_hint =
|
||||
persist_image_generation_item(session.as_ref(), turn.as_ref(), &mut item)
|
||||
.await
|
||||
.map(|saved_path| {
|
||||
let output_dir = saved_path
|
||||
.parent()
|
||||
.unwrap_or_else(|| turn.config.codex_home.clone());
|
||||
ImageGenerationInstructions::new(output_dir.display(), saved_path.display())
|
||||
.body()
|
||||
});
|
||||
let started_item = codex_protocol::items::ImageGenerationItem {
|
||||
id: item.id.clone(),
|
||||
status: "in_progress".to_string(),
|
||||
revised_prompt: None,
|
||||
result: String::new(),
|
||||
saved_path: None,
|
||||
};
|
||||
session
|
||||
.emit_turn_item_started(turn.as_ref(), &TurnItem::ImageGeneration(started_item))
|
||||
.await;
|
||||
session
|
||||
.emit_turn_item_completed(turn.as_ref(), TurnItem::ImageGeneration(item))
|
||||
.await;
|
||||
output_hint
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
async fn to_extension_call(invocation: &ToolInvocation) -> ExtensionToolCall {
|
||||
@@ -352,4 +400,130 @@ mod tests {
|
||||
assert_eq!(end.query, expected.query);
|
||||
assert_eq!(end.action, expected.action);
|
||||
}
|
||||
|
||||
struct ImageGenerationExtensionExecutor {
|
||||
output_hint: Arc<Mutex<Option<String>>>,
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl codex_extension_api::ToolExecutor<codex_tools::ToolCall> for ImageGenerationExtensionExecutor {
|
||||
fn tool_name(&self) -> codex_tools::ToolName {
|
||||
codex_tools::ToolName::namespaced("image_gen", "imagegen")
|
||||
}
|
||||
|
||||
fn spec(&self) -> codex_tools::ToolSpec {
|
||||
codex_tools::ToolSpec::Function(codex_tools::ResponsesApiTool {
|
||||
name: "imagegen".to_string(),
|
||||
description: "Generates an image.".to_string(),
|
||||
strict: false,
|
||||
parameters: codex_tools::JsonSchema::default(),
|
||||
output_schema: None,
|
||||
defer_loading: None,
|
||||
})
|
||||
}
|
||||
|
||||
async fn handle(
|
||||
&self,
|
||||
call: codex_tools::ToolCall,
|
||||
) -> Result<Box<dyn codex_tools::ToolOutput>, codex_tools::FunctionCallError> {
|
||||
let output_hint = call
|
||||
.turn_item_emitter
|
||||
.image_generation_completed(
|
||||
call.call_id,
|
||||
"A tiny blue square".to_string(),
|
||||
"cG5n".to_string(),
|
||||
)
|
||||
.await;
|
||||
*self.output_hint.lock().await = output_hint;
|
||||
Ok(Box::new(codex_tools::JsonToolOutput::new(
|
||||
json!({ "ok": true }),
|
||||
)))
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn image_generation_publication_is_finalized_by_core() {
|
||||
let output_hint = Arc::new(Mutex::new(None));
|
||||
let handler = ExtensionToolAdapter::new(Arc::new(ImageGenerationExtensionExecutor {
|
||||
output_hint: Arc::clone(&output_hint),
|
||||
}));
|
||||
let (session, turn, rx) = crate::session::tests::make_session_and_context_with_rx().await;
|
||||
let expected_path = crate::stream_events_utils::image_generation_artifact_path(
|
||||
&turn.config.codex_home,
|
||||
&session.conversation_id.to_string(),
|
||||
"call-image",
|
||||
);
|
||||
let invocation = ToolInvocation {
|
||||
session,
|
||||
turn,
|
||||
cancellation_token: tokio_util::sync::CancellationToken::new(),
|
||||
tracker: Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new())),
|
||||
call_id: "call-image".to_string(),
|
||||
tool_name: codex_tools::ToolName::namespaced("image_gen", "imagegen"),
|
||||
source: ToolCallSource::Direct,
|
||||
payload: ToolPayload::Function {
|
||||
arguments: "{}".to_string(),
|
||||
},
|
||||
};
|
||||
|
||||
crate::tools::registry::ToolExecutor::handle(&handler, invocation)
|
||||
.await
|
||||
.expect("extension call should succeed");
|
||||
|
||||
let started = rx.recv().await.expect("item started event");
|
||||
let EventMsg::ItemStarted(started) = started.msg else {
|
||||
panic!("expected item started event");
|
||||
};
|
||||
let TurnItem::ImageGeneration(started_item) = started.item else {
|
||||
panic!("expected image generation item");
|
||||
};
|
||||
let begin = rx.recv().await.expect("legacy image start event");
|
||||
assert!(matches!(begin.msg, EventMsg::ImageGenerationBegin(_)));
|
||||
let completed = rx.recv().await.expect("item completed event");
|
||||
let EventMsg::ItemCompleted(completed) = completed.msg else {
|
||||
panic!("expected item completed event");
|
||||
};
|
||||
let TurnItem::ImageGeneration(completed_item) = completed.item else {
|
||||
panic!("expected image generation item");
|
||||
};
|
||||
let end = rx.recv().await.expect("legacy image end event");
|
||||
assert!(matches!(end.msg, EventMsg::ImageGenerationEnd(_)));
|
||||
|
||||
assert_eq!(
|
||||
started_item,
|
||||
codex_protocol::items::ImageGenerationItem {
|
||||
id: "call-image".to_string(),
|
||||
status: "in_progress".to_string(),
|
||||
revised_prompt: None,
|
||||
result: String::new(),
|
||||
saved_path: None,
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
completed_item,
|
||||
codex_protocol::items::ImageGenerationItem {
|
||||
id: "call-image".to_string(),
|
||||
status: "completed".to_string(),
|
||||
revised_prompt: Some("A tiny blue square".to_string()),
|
||||
result: "cG5n".to_string(),
|
||||
saved_path: Some(expected_path.clone()),
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
std::fs::read(&expected_path).expect("generated artifact should be saved"),
|
||||
b"png"
|
||||
);
|
||||
assert_eq!(
|
||||
*output_hint.lock().await,
|
||||
Some(format!(
|
||||
"Generated images are saved to {} as {} by default.\n\
|
||||
If you need to use a generated image at another path, copy it and leave the original in place unless the user explicitly asks you to delete it.",
|
||||
expected_path
|
||||
.parent()
|
||||
.expect("generated image path should have a parent")
|
||||
.display(),
|
||||
expected_path.display(),
|
||||
))
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@ pub use capabilities::ResponseItemInjector;
|
||||
pub use codex_tools::ConversationHistory;
|
||||
pub use codex_tools::ExtensionTurnItem;
|
||||
pub use codex_tools::FunctionCallError;
|
||||
pub use codex_tools::ImageGenerationCompletionFuture;
|
||||
pub use codex_tools::JsonToolOutput;
|
||||
pub use codex_tools::NoopTurnItemEmitter;
|
||||
pub use codex_tools::ResponsesApiTool;
|
||||
|
||||
@@ -14,7 +14,6 @@ workspace = true
|
||||
|
||||
[dependencies]
|
||||
async-trait = { workspace = true }
|
||||
base64 = { workspace = true }
|
||||
codex-api = { workspace = true }
|
||||
codex-core = { workspace = true }
|
||||
codex-extension-api = { workspace = true }
|
||||
@@ -28,10 +27,6 @@ http = { workspace = true }
|
||||
schemars = { workspace = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
serde_json = { workspace = true }
|
||||
tokio = { workspace = true, features = ["fs"] }
|
||||
tracing = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
pretty_assertions = { workspace = true }
|
||||
tempfile = { workspace = true }
|
||||
tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
use codex_core::config::Config;
|
||||
@@ -17,7 +16,6 @@ use codex_model_provider_info::ModelProviderInfo;
|
||||
|
||||
use crate::backend::CodexImagesBackend;
|
||||
use crate::tool::ImageGenerationTool;
|
||||
use crate::tool::generated_image_output_dir;
|
||||
|
||||
#[derive(Clone)]
|
||||
struct ImageGenerationExtension {
|
||||
@@ -28,7 +26,6 @@ struct ImageGenerationExtension {
|
||||
struct ImageGenerationExtensionConfig {
|
||||
enabled: bool,
|
||||
provider: ModelProviderInfo,
|
||||
codex_home: PathBuf,
|
||||
}
|
||||
|
||||
impl From<&Config> for ImageGenerationExtensionConfig {
|
||||
@@ -38,7 +35,6 @@ impl From<&Config> for ImageGenerationExtensionConfig {
|
||||
enabled: config.features.enabled(Feature::ImageGenExt)
|
||||
&& config.model_provider.is_openai(),
|
||||
provider: config.model_provider.clone(),
|
||||
codex_home: config.codex_home.to_path_buf(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -80,13 +76,9 @@ impl ToolContributor for ImageGenerationExtension {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
vec![Arc::new(ImageGenerationTool::new(
|
||||
CodexImagesBackend::new(create_model_provider(
|
||||
config.provider.clone(),
|
||||
Some(self.auth_manager.clone()),
|
||||
)),
|
||||
generated_image_output_dir(&config.codex_home, thread_store.level_id()),
|
||||
))]
|
||||
vec![Arc::new(ImageGenerationTool::new(CodexImagesBackend::new(
|
||||
create_model_provider(config.provider.clone(), Some(self.auth_manager.clone())),
|
||||
)))]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -20,14 +20,13 @@ use super::GeneratedImageOutput;
|
||||
use super::ImageRequest;
|
||||
use super::ImagegenAction;
|
||||
use super::ImagegenArgs;
|
||||
use super::generated_image_output_dir;
|
||||
use super::imagegen_tool_spec;
|
||||
use super::persist_generated_image;
|
||||
use super::request_for_action;
|
||||
use crate::IMAGE_GEN_NAMESPACE;
|
||||
use crate::IMAGEGEN_TOOL_NAME;
|
||||
|
||||
const RESULT: &str = "cG5n";
|
||||
const OUTPUT_HINT: &str = "Generated images are saved to /tmp as /tmp/call-1.png by default.";
|
||||
|
||||
#[test]
|
||||
fn uses_reserved_image_gen_namespace() {
|
||||
@@ -55,15 +54,11 @@ fn generate_uses_fixed_request_defaults() {
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn generated_output_returns_image_input_and_persists_artifact() {
|
||||
let tempdir = tempfile::tempdir().expect("tempdir");
|
||||
let output_hint = persist_generated_image(tempdir.path(), "call-1", RESULT)
|
||||
.await
|
||||
.expect("generated image should persist");
|
||||
#[test]
|
||||
fn generated_output_returns_image_input_and_output_hint() {
|
||||
let output = GeneratedImageOutput {
|
||||
result: RESULT.to_string(),
|
||||
output_hint: Some(output_hint),
|
||||
output_hint: Some(OUTPUT_HINT.to_string()),
|
||||
};
|
||||
|
||||
let ResponseInputItem::FunctionCallOutput {
|
||||
@@ -84,19 +79,10 @@ async fn generated_output_returns_image_input_and_persists_artifact() {
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
FunctionCallOutputContentItem::InputText {
|
||||
text: format!(
|
||||
"Generated images are saved to {} as {} by default.\n\
|
||||
If you need to use a generated image at another path, copy it and leave the original in place unless the user explicitly asks you to delete it.",
|
||||
tempdir.path().display(),
|
||||
tempdir.path().join("call-1.png").display(),
|
||||
),
|
||||
text: OUTPUT_HINT.to_string(),
|
||||
},
|
||||
]
|
||||
);
|
||||
assert_eq!(
|
||||
std::fs::read(tempdir.path().join("call-1.png")).expect("saved generated image"),
|
||||
b"png"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -265,14 +251,6 @@ fn edit_without_image_history_returns_tool_error() {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generated_image_output_dir_is_scoped_to_sanitized_thread_id() {
|
||||
assert_eq!(
|
||||
generated_image_output_dir(std::path::Path::new("/tmp/codex-home"), "thread/1"),
|
||||
std::path::PathBuf::from("/tmp/codex-home/generated_images/thread_1")
|
||||
);
|
||||
}
|
||||
|
||||
fn args(action: ImagegenAction, prompt: &str) -> ImagegenArgs {
|
||||
ImagegenArgs {
|
||||
prompt: prompt.to_string(),
|
||||
|
||||
@@ -1,8 +1,3 @@
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use base64::Engine;
|
||||
use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
|
||||
use codex_api::ImageBackground;
|
||||
use codex_api::ImageEditRequest;
|
||||
use codex_api::ImageGenerationRequest;
|
||||
@@ -41,21 +36,16 @@ use crate::backend::CodexImagesBackend;
|
||||
const IMAGE_MODEL: &str = "gpt-image-2";
|
||||
const MAX_EDIT_IMAGES: usize = 5;
|
||||
const IMAGEGEN_DESCRIPTION: &str = include_str!("../imagegen_description.md");
|
||||
const GENERATED_IMAGE_ARTIFACTS_DIR: &str = "generated_images";
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct ImageGenerationTool {
|
||||
backend: CodexImagesBackend,
|
||||
output_dir: PathBuf,
|
||||
}
|
||||
|
||||
impl ImageGenerationTool {
|
||||
/// Creates an image-generation tool backed by an image API executor.
|
||||
pub(crate) fn new(backend: CodexImagesBackend, output_dir: PathBuf) -> Self {
|
||||
Self {
|
||||
backend,
|
||||
output_dir,
|
||||
}
|
||||
pub(crate) fn new(backend: CodexImagesBackend) -> Self {
|
||||
Self { backend }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -94,7 +84,6 @@ impl ToolExecutor<ToolCall> for ImageGenerationTool {
|
||||
async fn handle(&self, call: ToolCall) -> Result<Box<dyn ToolOutput>, FunctionCallError> {
|
||||
let args = parse_args(&call)?;
|
||||
let request = request_for_action(&args, call.conversation_history.items())?;
|
||||
|
||||
let response = match request {
|
||||
ImageRequest::Generate(request) => self.backend.generate(request).await,
|
||||
ImageRequest::Edit(request) => self.backend.edit(request).await,
|
||||
@@ -107,18 +96,10 @@ impl ToolExecutor<ToolCall> for ImageGenerationTool {
|
||||
"image generation returned no image data".to_string(),
|
||||
));
|
||||
};
|
||||
let output_hint =
|
||||
match persist_generated_image(&self.output_dir, &call.call_id, &result).await {
|
||||
Ok(output_hint) => Some(output_hint),
|
||||
Err(err) => {
|
||||
tracing::warn!(
|
||||
call_id = %call.call_id,
|
||||
output_dir = %self.output_dir.display(),
|
||||
"failed to save generated image: {err}"
|
||||
);
|
||||
None
|
||||
}
|
||||
};
|
||||
let output_hint = call
|
||||
.turn_item_emitter
|
||||
.image_generation_completed(call.call_id.clone(), args.prompt, result.clone())
|
||||
.await;
|
||||
Ok(Box::new(GeneratedImageOutput {
|
||||
result,
|
||||
output_hint,
|
||||
@@ -268,58 +249,6 @@ fn parse_args(call: &ToolCall) -> Result<ImagegenArgs, FunctionCallError> {
|
||||
.map_err(|err| FunctionCallError::RespondToModel(err.to_string()))
|
||||
}
|
||||
|
||||
/// Resolves where generated images for one thread are persisted by the extension.
|
||||
pub(crate) fn generated_image_output_dir(codex_home: &Path, thread_id: &str) -> PathBuf {
|
||||
codex_home
|
||||
.join(GENERATED_IMAGE_ARTIFACTS_DIR)
|
||||
.join(sanitize_path_component(thread_id))
|
||||
}
|
||||
|
||||
fn generated_image_output_path(output_dir: &Path, call_id: &str) -> PathBuf {
|
||||
output_dir.join(format!("{}.png", sanitize_path_component(call_id)))
|
||||
}
|
||||
|
||||
fn sanitize_path_component(value: &str) -> String {
|
||||
let sanitized: String = value
|
||||
.chars()
|
||||
.map(|ch| {
|
||||
if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' {
|
||||
ch
|
||||
} else {
|
||||
'_'
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
if sanitized.is_empty() {
|
||||
"generated_image".to_string()
|
||||
} else {
|
||||
sanitized
|
||||
}
|
||||
}
|
||||
|
||||
async fn persist_generated_image(
|
||||
output_dir: &Path,
|
||||
call_id: &str,
|
||||
result: &str,
|
||||
) -> Result<String, String> {
|
||||
let bytes = BASE64_STANDARD
|
||||
.decode(result.trim().as_bytes())
|
||||
.map_err(|err| format!("invalid image generation payload: {err}"))?;
|
||||
tokio::fs::create_dir_all(output_dir)
|
||||
.await
|
||||
.map_err(|err| err.to_string())?;
|
||||
tokio::fs::write(generated_image_output_path(output_dir, call_id), bytes)
|
||||
.await
|
||||
.map_err(|err| err.to_string())?;
|
||||
|
||||
Ok(format!(
|
||||
"Generated images are saved to {} as {} by default.\n\
|
||||
If you need to use a generated image at another path, copy it and leave the original in place unless the user explicitly asks you to delete it.",
|
||||
output_dir.display(),
|
||||
generated_image_output_path(output_dir, call_id).display(),
|
||||
))
|
||||
}
|
||||
|
||||
/// Builds the namespace function schema exposed to the model.
|
||||
fn imagegen_tool_spec() -> ToolSpec {
|
||||
let mut schema_value = serde_json::to_value(
|
||||
@@ -369,7 +298,7 @@ impl ToolOutput for GeneratedImageOutput {
|
||||
true
|
||||
}
|
||||
|
||||
/// Returns generated bytes and persisted-artifact context for the model's follow-up response.
|
||||
/// Returns generated bytes and persisted-artifact context for model follow-up.
|
||||
fn to_response_item(&self, call_id: &str, _payload: &ToolPayload) -> ResponseInputItem {
|
||||
let mut content = vec![FunctionCallOutputContentItem::InputImage {
|
||||
image_url: format!("data:image/png;base64,{}", self.result),
|
||||
|
||||
@@ -63,6 +63,7 @@ pub use responses_api::mcp_tool_to_responses_api_tool;
|
||||
pub use responses_api::tool_definition_to_responses_api_tool;
|
||||
pub use tool_call::ConversationHistory;
|
||||
pub use tool_call::ExtensionTurnItem;
|
||||
pub use tool_call::ImageGenerationCompletionFuture;
|
||||
pub use tool_call::NoopTurnItemEmitter;
|
||||
pub use tool_call::ToolCall;
|
||||
pub use tool_call::TurnItemEmissionFuture;
|
||||
|
||||
@@ -29,6 +29,10 @@ impl ConversationHistory {
|
||||
/// Future returned when an extension tool emits a visible turn-item lifecycle event.
|
||||
pub type TurnItemEmissionFuture<'a> = Pin<Box<dyn Future<Output = ()> + Send + 'a>>;
|
||||
|
||||
/// Future returned when an image-generation extension publishes completed image bytes.
|
||||
pub type ImageGenerationCompletionFuture<'a> =
|
||||
Pin<Box<dyn Future<Output = Option<String>> + Send + 'a>>;
|
||||
|
||||
/// Visible turn items that an extension fully owns and may emit as-is.
|
||||
///
|
||||
/// Add only item kinds that require no additional host finalization before
|
||||
@@ -48,6 +52,19 @@ pub trait TurnItemEmitter: Send + Sync {
|
||||
|
||||
/// Emits the completion of one visible turn item.
|
||||
fn emit_completed<'a>(&'a self, item: ExtensionTurnItem) -> TurnItemEmissionFuture<'a>;
|
||||
|
||||
/// Publishes image bytes for host persistence and visible completion.
|
||||
///
|
||||
/// Returns persisted-artifact context for the extension's model-facing
|
||||
/// function output when the host saves the generated image successfully.
|
||||
fn image_generation_completed<'a>(
|
||||
&'a self,
|
||||
_call_id: String,
|
||||
_prompt: String,
|
||||
_result: String,
|
||||
) -> ImageGenerationCompletionFuture<'a> {
|
||||
Box::pin(std::future::ready(None))
|
||||
}
|
||||
}
|
||||
|
||||
/// Turn-item emitter used when a caller does not expose visible item emission.
|
||||
|
||||
Reference in New Issue
Block a user