plumbing image save hinit

This commit is contained in:
won
2026-05-28 16:48:24 -07:00
parent 03e3f56a50
commit f596fbdf0c
7 changed files with 115 additions and 45 deletions

View File

@@ -126,11 +126,11 @@ async fn save_image_generation_result(
Ok(path)
}
pub(crate) async fn finalize_image_generation_item(
pub(crate) async fn persist_image_generation_item(
sess: &Session,
turn_context: &TurnContext,
image_item: &mut ImageGenerationItem,
) {
) -> Option<AbsolutePathBuf> {
let session_id = sess.conversation_id.to_string();
match save_image_generation_result(
&turn_context.config.codex_home,
@@ -141,22 +141,8 @@ pub(crate) async fn finalize_image_generation_item(
.await
{
Ok(path) => {
image_item.saved_path = Some(path);
let image_output_path = image_generation_artifact_path(
&turn_context.config.codex_home,
&session_id,
"<image_id>",
);
let image_output_dir = image_output_path
.parent()
.unwrap_or_else(|| turn_context.config.codex_home.clone());
let message: ResponseItem =
ContextualUserFragment::into(ImageGenerationInstructions::new(
image_output_dir.display(),
image_output_path.display(),
));
sess.record_conversation_items(turn_context, &[message])
.await;
image_item.saved_path = Some(path.clone());
Some(path)
}
Err(err) => {
let output_path = image_generation_artifact_path(
@@ -172,10 +158,36 @@ pub(crate) async fn finalize_image_generation_item(
output_dir = %output_dir.display(),
"failed to save generated image: {err}"
);
None
}
}
}
pub(crate) async fn finalize_image_generation_item(
sess: &Session,
turn_context: &TurnContext,
image_item: &mut ImageGenerationItem,
) {
if persist_image_generation_item(sess, turn_context, image_item)
.await
.is_none()
{
return;
}
let session_id = sess.conversation_id.to_string();
let image_output_path =
image_generation_artifact_path(&turn_context.config.codex_home, &session_id, "<image_id>");
let image_output_dir = image_output_path
.parent()
.unwrap_or_else(|| turn_context.config.codex_home.clone());
let message: ResponseItem = ContextualUserFragment::into(ImageGenerationInstructions::new(
image_output_dir.display(),
image_output_path.display(),
));
sess.record_conversation_items(turn_context, &[message])
.await;
}
/// Persist a completed model response item and record any cited memory usage.
pub(crate) async fn record_completed_response_item(
sess: &Session,

View File

@@ -4,16 +4,19 @@ use std::sync::Weak;
use codex_protocol::items::TurnItem;
use codex_tools::ConversationHistory;
use codex_tools::ExtensionTurnItem;
use codex_tools::ImageGenerationCompletionFuture;
use codex_tools::ToolCall as ExtensionToolCall;
use codex_tools::ToolName;
use codex_tools::ToolSpec;
use codex_tools::TurnItemEmissionFuture;
use codex_tools::TurnItemEmitter;
use crate::context::ContextualUserFragment;
use crate::context::ImageGenerationInstructions;
use crate::function_tool::FunctionCallError;
use crate::session::session::Session;
use crate::session::turn_context::TurnContext;
use crate::stream_events_utils::finalize_image_generation_item;
use crate::stream_events_utils::persist_image_generation_item;
use crate::tools::context::ToolInvocation;
use crate::tools::context::ToolOutput;
use crate::tools::context::ToolPayload;
@@ -97,10 +100,10 @@ impl TurnItemEmitter for CoreTurnItemEmitter {
call_id: String,
prompt: String,
result: String,
) -> TurnItemEmissionFuture<'a> {
) -> ImageGenerationCompletionFuture<'a> {
Box::pin(async move {
let (Some(session), Some(turn)) = (self.session.upgrade(), self.turn.upgrade()) else {
return;
return None;
};
let mut item = codex_protocol::items::ImageGenerationItem {
id: call_id,
@@ -109,18 +112,30 @@ impl TurnItemEmitter for CoreTurnItemEmitter {
result,
saved_path: None,
};
finalize_image_generation_item(session.as_ref(), turn.as_ref(), &mut item).await;
let mut started_item = item.clone();
started_item.status = "in_progress".to_string();
started_item.revised_prompt = None;
started_item.result.clear();
started_item.saved_path = None;
let output_hint =
persist_image_generation_item(session.as_ref(), turn.as_ref(), &mut item)
.await
.map(|saved_path| {
let output_dir = saved_path
.parent()
.unwrap_or_else(|| turn.config.codex_home.clone());
ImageGenerationInstructions::new(output_dir.display(), saved_path.display())
.body()
});
let started_item = codex_protocol::items::ImageGenerationItem {
id: item.id.clone(),
status: "in_progress".to_string(),
revised_prompt: None,
result: String::new(),
saved_path: None,
};
session
.emit_turn_item_started(turn.as_ref(), &TurnItem::ImageGeneration(started_item))
.await;
session
.emit_turn_item_completed(turn.as_ref(), TurnItem::ImageGeneration(item))
.await;
output_hint
})
}
}
@@ -386,7 +401,9 @@ mod tests {
assert_eq!(end.action, expected.action);
}
struct ImageGenerationExtensionExecutor;
struct ImageGenerationExtensionExecutor {
output_hint: Arc<Mutex<Option<String>>>,
}
#[async_trait::async_trait]
impl codex_extension_api::ToolExecutor<codex_tools::ToolCall> for ImageGenerationExtensionExecutor {
@@ -409,13 +426,15 @@ mod tests {
&self,
call: codex_tools::ToolCall,
) -> Result<Box<dyn codex_tools::ToolOutput>, codex_tools::FunctionCallError> {
call.turn_item_emitter
let output_hint = call
.turn_item_emitter
.image_generation_completed(
call.call_id,
"A tiny blue square".to_string(),
"cG5n".to_string(),
)
.await;
*self.output_hint.lock().await = output_hint;
Ok(Box::new(codex_tools::JsonToolOutput::new(
json!({ "ok": true }),
)))
@@ -424,7 +443,10 @@ mod tests {
#[tokio::test]
async fn image_generation_publication_is_finalized_by_core() {
let handler = ExtensionToolAdapter::new(Arc::new(ImageGenerationExtensionExecutor));
let output_hint = Arc::new(Mutex::new(None));
let handler = ExtensionToolAdapter::new(Arc::new(ImageGenerationExtensionExecutor {
output_hint: Arc::clone(&output_hint),
}));
let (session, turn, rx) = crate::session::tests::make_session_and_context_with_rx().await;
let expected_path = crate::stream_events_utils::image_generation_artifact_path(
&turn.config.codex_home,
@@ -448,8 +470,6 @@ mod tests {
.await
.expect("extension call should succeed");
let instructions = rx.recv().await.expect("image path instructions event");
assert!(matches!(instructions.msg, EventMsg::RawResponseItem(_)));
let started = rx.recv().await.expect("item started event");
let EventMsg::ItemStarted(started) = started.msg else {
panic!("expected item started event");
@@ -490,8 +510,20 @@ mod tests {
}
);
assert_eq!(
std::fs::read(expected_path).expect("generated artifact should be saved"),
std::fs::read(&expected_path).expect("generated artifact should be saved"),
b"png"
);
assert_eq!(
*output_hint.lock().await,
Some(format!(
"Generated images are saved to {} as {} by default.\n\
If you need to use a generated image at another path, copy it and leave the original in place unless the user explicitly asks you to delete it.",
expected_path
.parent()
.expect("generated image path should have a parent")
.display(),
expected_path.display(),
))
);
}
}

View File

@@ -13,6 +13,7 @@ pub use capabilities::ResponseItemInjector;
pub use codex_tools::ConversationHistory;
pub use codex_tools::ExtensionTurnItem;
pub use codex_tools::FunctionCallError;
pub use codex_tools::ImageGenerationCompletionFuture;
pub use codex_tools::JsonToolOutput;
pub use codex_tools::NoopTurnItemEmitter;
pub use codex_tools::ResponsesApiTool;

View File

@@ -26,6 +26,7 @@ use crate::IMAGE_GEN_NAMESPACE;
use crate::IMAGEGEN_TOOL_NAME;
const RESULT: &str = "cG5n";
const OUTPUT_HINT: &str = "Generated images are saved to /tmp as /tmp/call-1.png by default.";
#[test]
fn uses_reserved_image_gen_namespace() {
@@ -54,9 +55,10 @@ fn generate_uses_fixed_request_defaults() {
}
#[test]
fn generated_output_returns_image_input() {
fn generated_output_returns_image_input_and_output_hint() {
let output = GeneratedImageOutput {
result: RESULT.to_string(),
output_hint: Some(OUTPUT_HINT.to_string()),
};
let ResponseInputItem::FunctionCallOutput {
@@ -71,10 +73,15 @@ fn generated_output_returns_image_input() {
};
assert_eq!(
content_items,
vec![FunctionCallOutputContentItem::InputImage {
image_url: format!("data:image/png;base64,{RESULT}"),
detail: Some(DEFAULT_IMAGE_DETAIL),
}]
vec![
FunctionCallOutputContentItem::InputImage {
image_url: format!("data:image/png;base64,{RESULT}"),
detail: Some(DEFAULT_IMAGE_DETAIL),
},
FunctionCallOutputContentItem::InputText {
text: OUTPUT_HINT.to_string(),
},
]
);
}

View File

@@ -96,10 +96,14 @@ impl ToolExecutor<ToolCall> for ImageGenerationTool {
"image generation returned no image data".to_string(),
));
};
call.turn_item_emitter
let output_hint = call
.turn_item_emitter
.image_generation_completed(call.call_id.clone(), args.prompt, result.clone())
.await;
Ok(Box::new(GeneratedImageOutput { result }))
Ok(Box::new(GeneratedImageOutput {
result,
output_hint,
}))
}
}
@@ -280,6 +284,7 @@ fn imagegen_tool_spec() -> ToolSpec {
struct GeneratedImageOutput {
result: String,
output_hint: Option<String>,
}
impl ToolOutput for GeneratedImageOutput {
@@ -293,12 +298,17 @@ impl ToolOutput for GeneratedImageOutput {
true
}
/// Returns generated bytes for model follow-up and later image edits.
/// Returns generated bytes and persisted-artifact context for model follow-up.
fn to_response_item(&self, call_id: &str, _payload: &ToolPayload) -> ResponseInputItem {
let content = vec![FunctionCallOutputContentItem::InputImage {
let mut content = vec![FunctionCallOutputContentItem::InputImage {
image_url: format!("data:image/png;base64,{}", self.result),
detail: Some(DEFAULT_IMAGE_DETAIL),
}];
if let Some(output_hint) = &self.output_hint {
content.push(FunctionCallOutputContentItem::InputText {
text: output_hint.clone(),
});
}
ResponseInputItem::FunctionCallOutput {
call_id: call_id.to_string(),
output: FunctionCallOutputPayload {

View File

@@ -63,6 +63,7 @@ pub use responses_api::mcp_tool_to_responses_api_tool;
pub use responses_api::tool_definition_to_responses_api_tool;
pub use tool_call::ConversationHistory;
pub use tool_call::ExtensionTurnItem;
pub use tool_call::ImageGenerationCompletionFuture;
pub use tool_call::NoopTurnItemEmitter;
pub use tool_call::ToolCall;
pub use tool_call::TurnItemEmissionFuture;

View File

@@ -29,6 +29,10 @@ impl ConversationHistory {
/// Future returned when an extension tool emits a visible turn-item lifecycle event.
pub type TurnItemEmissionFuture<'a> = Pin<Box<dyn Future<Output = ()> + Send + 'a>>;
/// Future returned when an image-generation extension publishes completed image bytes.
pub type ImageGenerationCompletionFuture<'a> =
Pin<Box<dyn Future<Output = Option<String>> + Send + 'a>>;
/// Visible turn items that an extension fully owns and may emit as-is.
///
/// Add only item kinds that require no additional host finalization before
@@ -49,14 +53,17 @@ pub trait TurnItemEmitter: Send + Sync {
/// Emits the completion of one visible turn item.
fn emit_completed<'a>(&'a self, item: ExtensionTurnItem) -> TurnItemEmissionFuture<'a>;
/// Publishes image bytes for host finalization and visible completion.
/// Publishes image bytes for host persistence and visible completion.
///
/// Returns persisted-artifact context for the extension's model-facing
/// function output when the host saves the generated image successfully.
fn image_generation_completed<'a>(
&'a self,
_call_id: String,
_prompt: String,
_result: String,
) -> TurnItemEmissionFuture<'a> {
Box::pin(std::future::ready(()))
) -> ImageGenerationCompletionFuture<'a> {
Box::pin(std::future::ready(None))
}
}