mirror of
https://github.com/openai/codex.git
synced 2026-05-29 23:40:29 +00:00
Add feature-gated standalone image generation extension (#24723)
## Why Add a standalone image generation path that can be exercised independently of hosted Responses image generation, while retaining the hosted tool as fallback unless the extension is actually available to the model. ## What changed - Added the `codex-image-generation-extension` crate with standalone generate/edit execution, prior-image selection for edits, model-visible image output, and local generated-image persistence. - Installed the extension in app-server behind the disabled-by-default `imagegenext` feature and backend eligibility checks. - Updated core tool planning so eligible `image_gen.imagegen` exposure replaces hosted `image_generation`, while unavailable configurations retain hosted fallback. - Added coverage for extension behavior, edit history reuse, feature gating, auth eligibility, and hosted-tool replacement. - The extension is installed through app-server only in this PR; other execution paths retain hosted image generation because hosted replacement occurs only when the standalone executor is actually registered and model-visible. - The initial extension contract intentionally fixes the image model to `gpt-image-2` and uses automatic image parameters. - Native generated-image history/card parity and rollout persistence cleanup are intentionally deferred follow-up work. ## Validation - `just test -p codex-image-generation-extension` - `just test -p codex-features` - `just test -p codex-core hosted_tools_follow_provider_auth_model_and_config_gates` - `just test -p codex-app-server` - `just fix -p codex-image-generation-extension -p codex-features -p codex-core -p codex-app-server` - `just fmt` - `just bazel-lock-update` - `just bazel-lock-check` --------- Co-authored-by: jif-oai <jif@openai.com>
This commit is contained in:
26
codex-rs/Cargo.lock
generated
26
codex-rs/Cargo.lock
generated
@@ -1928,6 +1928,7 @@ dependencies = [
|
||||
"codex-git-utils",
|
||||
"codex-guardian",
|
||||
"codex-hooks",
|
||||
"codex-image-generation-extension",
|
||||
"codex-login",
|
||||
"codex-mcp",
|
||||
"codex-memories-extension",
|
||||
@@ -3039,6 +3040,31 @@ dependencies = [
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codex-image-generation-extension"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"base64 0.22.1",
|
||||
"codex-api",
|
||||
"codex-core",
|
||||
"codex-extension-api",
|
||||
"codex-features",
|
||||
"codex-login",
|
||||
"codex-model-provider",
|
||||
"codex-model-provider-info",
|
||||
"codex-protocol",
|
||||
"codex-tools",
|
||||
"http 1.4.0",
|
||||
"pretty_assertions",
|
||||
"schemars 0.8.22",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tempfile",
|
||||
"tokio",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codex-install-context"
|
||||
version = "0.0.0"
|
||||
|
||||
@@ -47,6 +47,7 @@ members = [
|
||||
"ext/extension-api",
|
||||
"ext/goal",
|
||||
"ext/guardian",
|
||||
"ext/image-generation",
|
||||
"ext/memories",
|
||||
"ext/web-search",
|
||||
"external-agent-migration",
|
||||
@@ -165,6 +166,7 @@ codex-execpolicy = { path = "execpolicy" }
|
||||
codex-extension-api = { path = "ext/extension-api" }
|
||||
codex-goal-extension = { path = "ext/goal" }
|
||||
codex-guardian = { path = "ext/guardian" }
|
||||
codex-image-generation-extension = { path = "ext/image-generation" }
|
||||
codex-external-agent-migration = { path = "external-agent-migration" }
|
||||
codex-external-agent-sessions = { path = "external-agent-sessions" }
|
||||
codex-experimental-api-macros = { path = "codex-experimental-api-macros" }
|
||||
|
||||
@@ -54,6 +54,7 @@ codex-backend-client = { workspace = true }
|
||||
codex-file-search = { workspace = true }
|
||||
codex-chatgpt = { workspace = true }
|
||||
codex-login = { workspace = true }
|
||||
codex-image-generation-extension = { workspace = true }
|
||||
codex-memories-extension = { workspace = true }
|
||||
codex-web-search-extension = { workspace = true }
|
||||
codex-memories-write = { workspace = true }
|
||||
|
||||
@@ -31,7 +31,8 @@ where
|
||||
let mut builder = ExtensionRegistryBuilder::<Config>::with_event_sink(event_sink);
|
||||
codex_guardian::install(&mut builder, guardian_agent_spawner);
|
||||
codex_memories_extension::install(&mut builder, codex_otel::global());
|
||||
codex_web_search_extension::install(&mut builder, auth_manager);
|
||||
codex_web_search_extension::install(&mut builder, auth_manager.clone());
|
||||
codex_image_generation_extension::install(&mut builder, auth_manager);
|
||||
Arc::new(builder.build())
|
||||
}
|
||||
|
||||
|
||||
@@ -473,6 +473,9 @@
|
||||
"image_generation": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"imagegenext": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"in_app_browser": {
|
||||
"type": "boolean"
|
||||
},
|
||||
@@ -4564,6 +4567,9 @@
|
||||
"image_generation": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"imagegenext": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"in_app_browser": {
|
||||
"type": "boolean"
|
||||
},
|
||||
|
||||
@@ -85,6 +85,8 @@ use std::sync::Arc;
|
||||
use tracing::warn;
|
||||
|
||||
const MULTI_AGENT_V2_NAMESPACE_DESCRIPTION: &str = "Tools for spawning and managing sub-agents.";
|
||||
const IMAGE_GEN_NAMESPACE: &str = "image_gen";
|
||||
const IMAGEGEN_TOOL_NAME: &str = "imagegen";
|
||||
|
||||
type PlannedRuntime = Arc<dyn CoreToolRuntime>;
|
||||
|
||||
@@ -257,7 +259,9 @@ fn hosted_model_tool_specs(context: &CoreToolPlanContext<'_>) -> Vec<ToolSpec> {
|
||||
}) {
|
||||
specs.push(web_search_tool);
|
||||
}
|
||||
if image_generation_tool_enabled(turn_context) {
|
||||
if image_generation_tool_enabled(turn_context)
|
||||
&& !standalone_image_generation_available(turn_context, context.extension_tool_executors)
|
||||
{
|
||||
specs.push(create_image_generation_tool("png"));
|
||||
}
|
||||
specs
|
||||
@@ -316,21 +320,41 @@ fn agent_jobs_worker_tools_enabled(turn_context: &TurnContext) -> bool {
|
||||
}
|
||||
|
||||
fn image_generation_tool_enabled(turn_context: &TurnContext) -> bool {
|
||||
image_generation_runtime_enabled(turn_context)
|
||||
&& turn_context
|
||||
.features
|
||||
.get()
|
||||
.enabled(Feature::ImageGeneration)
|
||||
}
|
||||
|
||||
fn image_generation_runtime_enabled(turn_context: &TurnContext) -> bool {
|
||||
turn_context
|
||||
.auth_manager
|
||||
.as_deref()
|
||||
.is_some_and(AuthManager::current_auth_uses_codex_backend)
|
||||
&& turn_context.provider.capabilities().image_generation
|
||||
&& turn_context
|
||||
.features
|
||||
.get()
|
||||
.enabled(Feature::ImageGeneration)
|
||||
&& turn_context
|
||||
.model_info
|
||||
.input_modalities
|
||||
.contains(&InputModality::Image)
|
||||
}
|
||||
|
||||
fn standalone_image_generation_model_visible(turn_context: &TurnContext) -> bool {
|
||||
image_generation_runtime_enabled(turn_context)
|
||||
&& turn_context.features.get().enabled(Feature::ImageGenExt)
|
||||
&& namespace_tools_enabled(turn_context)
|
||||
}
|
||||
|
||||
fn standalone_image_generation_available(
|
||||
turn_context: &TurnContext,
|
||||
extension_tools: &[Arc<dyn ToolExecutor<ExtensionToolCall>>],
|
||||
) -> bool {
|
||||
standalone_image_generation_model_visible(turn_context)
|
||||
&& extension_tools.iter().any(|executor| {
|
||||
executor.tool_name() == ToolName::namespaced(IMAGE_GEN_NAMESPACE, IMAGEGEN_TOOL_NAME)
|
||||
})
|
||||
}
|
||||
|
||||
fn wait_agent_timeout_options(turn_context: &TurnContext) -> WaitAgentTimeoutOptions {
|
||||
if multi_agent_v2_enabled(turn_context) {
|
||||
return WaitAgentTimeoutOptions {
|
||||
@@ -839,6 +863,11 @@ fn append_extension_tool_executors(
|
||||
|
||||
for executor in executors.iter().cloned() {
|
||||
let tool_name = executor.tool_name();
|
||||
if tool_name == ToolName::namespaced(IMAGE_GEN_NAMESPACE, IMAGEGEN_TOOL_NAME)
|
||||
&& !standalone_image_generation_model_visible(turn_context)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
if !reserved_tool_names.insert(tool_name.clone()) {
|
||||
warn!("Skipping extension tool `{tool_name}`: tool already registered");
|
||||
continue;
|
||||
|
||||
@@ -960,6 +960,16 @@ async fn hosted_tools_follow_provider_auth_model_and_config_gates() {
|
||||
.await;
|
||||
image_generation.assert_visible_contains(&["image_generation"]);
|
||||
|
||||
let extension_flag_without_imagegen_tool = probe(|turn| {
|
||||
use_chatgpt_auth(turn);
|
||||
set_feature(turn, Feature::ImageGeneration, /*enabled*/ true);
|
||||
set_feature(turn, Feature::ImageGenExt, /*enabled*/ true);
|
||||
turn.model_info.input_modalities = vec![InputModality::Image];
|
||||
})
|
||||
.await;
|
||||
extension_flag_without_imagegen_tool.assert_visible_contains(&["image_generation"]);
|
||||
extension_flag_without_imagegen_tool.assert_visible_lacks(&["image_gen"]);
|
||||
|
||||
let live_web_search = probe(|turn| {
|
||||
set_web_search_mode(turn, WebSearchMode::Live);
|
||||
turn.model_info.web_search_tool_type = WebSearchToolType::TextAndImage;
|
||||
|
||||
9
codex-rs/ext/image-generation/BUILD.bazel
Normal file
9
codex-rs/ext/image-generation/BUILD.bazel
Normal file
@@ -0,0 +1,9 @@
|
||||
load("//:defs.bzl", "codex_rust_crate")
|
||||
|
||||
codex_rust_crate(
|
||||
name = "image-generation",
|
||||
crate_name = "codex_image_generation_extension",
|
||||
compile_data = [
|
||||
"imagegen_description.md",
|
||||
],
|
||||
)
|
||||
37
codex-rs/ext/image-generation/Cargo.toml
Normal file
37
codex-rs/ext/image-generation/Cargo.toml
Normal file
@@ -0,0 +1,37 @@
|
||||
[package]
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
name = "codex-image-generation-extension"
|
||||
version.workspace = true
|
||||
|
||||
[lib]
|
||||
name = "codex_image_generation_extension"
|
||||
path = "src/lib.rs"
|
||||
doctest = false
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
async-trait = { workspace = true }
|
||||
base64 = { workspace = true }
|
||||
codex-api = { workspace = true }
|
||||
codex-core = { workspace = true }
|
||||
codex-extension-api = { workspace = true }
|
||||
codex-features = { workspace = true }
|
||||
codex-login = { workspace = true }
|
||||
codex-model-provider = { workspace = true }
|
||||
codex-model-provider-info = { workspace = true }
|
||||
codex-protocol = { workspace = true }
|
||||
codex-tools = { workspace = true }
|
||||
http = { workspace = true }
|
||||
schemars = { workspace = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
serde_json = { workspace = true }
|
||||
tokio = { workspace = true, features = ["fs"] }
|
||||
tracing = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
pretty_assertions = { workspace = true }
|
||||
tempfile = { workspace = true }
|
||||
tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }
|
||||
11
codex-rs/ext/image-generation/imagegen_description.md
Normal file
11
codex-rs/ext/image-generation/imagegen_description.md
Normal file
@@ -0,0 +1,11 @@
|
||||
The `image_gen.imagegen` tool enables image generation from descriptions and editing of existing images based on specific instructions. Use it when:
|
||||
|
||||
- The user requests an image based on a scene description, such as a diagram, portrait, comic, meme, or any other visual.
|
||||
- The user wants to modify an attached or previously generated image with specific changes, including adding or removing elements, altering colors, improving quality/resolution, or transforming the style (e.g., cartoon, oil painting).
|
||||
|
||||
Guidelines:
|
||||
- Set `action` to `generate` when the user asks for a brand new image.
|
||||
- Set `action` to `edit` when the user asks to modify an existing image from the conversation history.
|
||||
- Directly generate the image without reconfirmation or clarification.
|
||||
- After each image generation, do not mention anything related to download. Do not summarize the image. Do not ask followup question. Do not say ANYTHING after you generate an image.
|
||||
- Always use this tool for image editing unless the user explicitly requests otherwise. Do not use the `python` tool for image editing unless specifically instructed.
|
||||
60
codex-rs/ext/image-generation/src/backend.rs
Normal file
60
codex-rs/ext/image-generation/src/backend.rs
Normal file
@@ -0,0 +1,60 @@
|
||||
use codex_api::ImageEditRequest;
|
||||
use codex_api::ImageGenerationRequest;
|
||||
use codex_api::ImageResponse;
|
||||
use codex_api::ImagesClient;
|
||||
use codex_api::ReqwestTransport;
|
||||
use codex_login::default_client::build_reqwest_client;
|
||||
use codex_model_provider::SharedModelProvider;
|
||||
use http::HeaderMap;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct CodexImagesBackend {
|
||||
provider: SharedModelProvider,
|
||||
}
|
||||
|
||||
impl CodexImagesBackend {
|
||||
/// Creates a backend that sends image requests through the active model provider.
|
||||
pub(crate) fn new(provider: SharedModelProvider) -> Self {
|
||||
Self { provider }
|
||||
}
|
||||
|
||||
/// Resolves the provider and auth required for the current image API request.
|
||||
async fn client(&self) -> Result<ImagesClient<ReqwestTransport>, String> {
|
||||
let provider = self
|
||||
.provider
|
||||
.api_provider()
|
||||
.await
|
||||
.map_err(|err| err.to_string())?;
|
||||
let auth = self
|
||||
.provider
|
||||
.api_auth()
|
||||
.await
|
||||
.map_err(|err| err.to_string())?;
|
||||
Ok(ImagesClient::new(
|
||||
ReqwestTransport::new(build_reqwest_client()),
|
||||
provider,
|
||||
auth,
|
||||
))
|
||||
}
|
||||
|
||||
/// Sends a standalone image generation request through the configured Images client.
|
||||
pub(crate) async fn generate(
|
||||
&self,
|
||||
request: ImageGenerationRequest,
|
||||
) -> Result<ImageResponse, String> {
|
||||
self.client()
|
||||
.await?
|
||||
.generate(&request, HeaderMap::new())
|
||||
.await
|
||||
.map_err(|err| err.to_string())
|
||||
}
|
||||
|
||||
/// Sends a standalone image edit request through the configured Images client.
|
||||
pub(crate) async fn edit(&self, request: ImageEditRequest) -> Result<ImageResponse, String> {
|
||||
self.client()
|
||||
.await?
|
||||
.edit(&request, HeaderMap::new())
|
||||
.await
|
||||
.map_err(|err| err.to_string())
|
||||
}
|
||||
}
|
||||
99
codex-rs/ext/image-generation/src/extension.rs
Normal file
99
codex-rs/ext/image-generation/src/extension.rs
Normal file
@@ -0,0 +1,99 @@
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
use codex_core::config::Config;
|
||||
use codex_extension_api::ConfigContributor;
|
||||
use codex_extension_api::ExtensionData;
|
||||
use codex_extension_api::ExtensionRegistryBuilder;
|
||||
use codex_extension_api::ThreadLifecycleContributor;
|
||||
use codex_extension_api::ThreadStartInput;
|
||||
use codex_extension_api::ToolCall;
|
||||
use codex_extension_api::ToolContributor;
|
||||
use codex_extension_api::ToolExecutor;
|
||||
use codex_features::Feature;
|
||||
use codex_login::AuthManager;
|
||||
use codex_model_provider::create_model_provider;
|
||||
use codex_model_provider_info::ModelProviderInfo;
|
||||
|
||||
use crate::backend::CodexImagesBackend;
|
||||
use crate::tool::ImageGenerationTool;
|
||||
use crate::tool::generated_image_output_dir;
|
||||
|
||||
#[derive(Clone)]
|
||||
struct ImageGenerationExtension {
|
||||
auth_manager: Arc<AuthManager>,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct ImageGenerationExtensionConfig {
|
||||
enabled: bool,
|
||||
provider: ModelProviderInfo,
|
||||
codex_home: PathBuf,
|
||||
}
|
||||
|
||||
impl From<&Config> for ImageGenerationExtensionConfig {
|
||||
/// Resolves whether standalone image generation should be available for a thread.
|
||||
fn from(config: &Config) -> Self {
|
||||
Self {
|
||||
enabled: config.features.enabled(Feature::ImageGenExt)
|
||||
&& config.model_provider.is_openai(),
|
||||
provider: config.model_provider.clone(),
|
||||
codex_home: config.codex_home.to_path_buf(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl ThreadLifecycleContributor<Config> for ImageGenerationExtension {
|
||||
/// Seeds image-generation availability when a thread begins.
|
||||
async fn on_thread_start(&self, input: ThreadStartInput<'_, Config>) {
|
||||
input
|
||||
.thread_store
|
||||
.insert(ImageGenerationExtensionConfig::from(input.config));
|
||||
}
|
||||
}
|
||||
|
||||
impl ConfigContributor<Config> for ImageGenerationExtension {
|
||||
/// Refreshes image-generation availability after thread configuration changes.
|
||||
fn on_config_changed(
|
||||
&self,
|
||||
_session_store: &ExtensionData,
|
||||
thread_store: &ExtensionData,
|
||||
_previous_config: &Config,
|
||||
new_config: &Config,
|
||||
) {
|
||||
thread_store.insert(ImageGenerationExtensionConfig::from(new_config));
|
||||
}
|
||||
}
|
||||
|
||||
impl ToolContributor for ImageGenerationExtension {
|
||||
/// Creates the image-generation tool exposed by this installed extension.
|
||||
fn tools(
|
||||
&self,
|
||||
_session_store: &ExtensionData,
|
||||
thread_store: &ExtensionData,
|
||||
) -> Vec<Arc<dyn ToolExecutor<ToolCall>>> {
|
||||
let Some(config) = thread_store.get::<ImageGenerationExtensionConfig>() else {
|
||||
return Vec::new();
|
||||
};
|
||||
if !config.enabled || !self.auth_manager.current_auth_uses_codex_backend() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
vec![Arc::new(ImageGenerationTool::new(
|
||||
CodexImagesBackend::new(create_model_provider(
|
||||
config.provider.clone(),
|
||||
Some(self.auth_manager.clone()),
|
||||
)),
|
||||
generated_image_output_dir(&config.codex_home, thread_store.level_id()),
|
||||
))]
|
||||
}
|
||||
}
|
||||
|
||||
/// Installs the feature-gated standalone image-generation extension contributors.
|
||||
pub fn install(registry: &mut ExtensionRegistryBuilder<Config>, auth_manager: Arc<AuthManager>) {
|
||||
let extension = Arc::new(ImageGenerationExtension { auth_manager });
|
||||
registry.thread_lifecycle_contributor(extension.clone());
|
||||
registry.config_contributor(extension.clone());
|
||||
registry.tool_contributor(extension);
|
||||
}
|
||||
8
codex-rs/ext/image-generation/src/lib.rs
Normal file
8
codex-rs/ext/image-generation/src/lib.rs
Normal file
@@ -0,0 +1,8 @@
|
||||
mod backend;
|
||||
mod extension;
|
||||
mod tool;
|
||||
|
||||
pub use extension::install;
|
||||
|
||||
pub(crate) const IMAGE_GEN_NAMESPACE: &str = "image_gen";
|
||||
pub(crate) const IMAGEGEN_TOOL_NAME: &str = "imagegen";
|
||||
341
codex-rs/ext/image-generation/src/tests.rs
Normal file
341
codex-rs/ext/image-generation/src/tests.rs
Normal file
@@ -0,0 +1,341 @@
|
||||
use codex_api::ImageBackground;
|
||||
use codex_api::ImageEditRequest;
|
||||
use codex_api::ImageGenerationRequest;
|
||||
use codex_api::ImageQuality;
|
||||
use codex_api::ImageUrl;
|
||||
use codex_extension_api::ToolOutput;
|
||||
use codex_extension_api::ToolPayload;
|
||||
use codex_extension_api::ToolSpec;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
|
||||
use codex_protocol::models::FunctionCallOutputBody;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
use codex_protocol::models::ResponseInputItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_tools::ResponsesApiNamespaceTool;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
use super::GeneratedImageOutput;
|
||||
use super::ImageRequest;
|
||||
use super::ImagegenAction;
|
||||
use super::ImagegenArgs;
|
||||
use super::generated_image_output_dir;
|
||||
use super::imagegen_tool_spec;
|
||||
use super::persist_generated_image;
|
||||
use super::request_for_action;
|
||||
use crate::IMAGE_GEN_NAMESPACE;
|
||||
use crate::IMAGEGEN_TOOL_NAME;
|
||||
|
||||
const RESULT: &str = "cG5n";
|
||||
|
||||
#[test]
|
||||
fn uses_reserved_image_gen_namespace() {
|
||||
let ToolSpec::Namespace(spec) = imagegen_tool_spec() else {
|
||||
panic!("imagegen should advertise a namespace tool");
|
||||
};
|
||||
assert_eq!(spec.name, IMAGE_GEN_NAMESPACE);
|
||||
let ResponsesApiNamespaceTool::Function(function) = &spec.tools[0];
|
||||
assert_eq!(function.name, IMAGEGEN_TOOL_NAME);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generate_uses_fixed_request_defaults() {
|
||||
assert_eq!(
|
||||
request_for_action(&args(ImagegenAction::Generate, "paint a moonlit lake"), &[])
|
||||
.expect("generation request should build"),
|
||||
ImageRequest::Generate(ImageGenerationRequest {
|
||||
prompt: "paint a moonlit lake".to_string(),
|
||||
background: Some(ImageBackground::Auto),
|
||||
model: "gpt-image-2".to_string(),
|
||||
n: None,
|
||||
quality: Some(ImageQuality::Auto),
|
||||
size: Some("auto".to_string()),
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn generated_output_returns_image_input_and_persists_artifact() {
|
||||
let tempdir = tempfile::tempdir().expect("tempdir");
|
||||
let output_hint = persist_generated_image(tempdir.path(), "call-1", RESULT)
|
||||
.await
|
||||
.expect("generated image should persist");
|
||||
let output = GeneratedImageOutput {
|
||||
result: RESULT.to_string(),
|
||||
output_hint: Some(output_hint),
|
||||
};
|
||||
|
||||
let ResponseInputItem::FunctionCallOutput {
|
||||
output: response_output,
|
||||
..
|
||||
} = output.to_response_item("call-1", &function_payload())
|
||||
else {
|
||||
panic!("imagegen should return function tool output");
|
||||
};
|
||||
let FunctionCallOutputBody::ContentItems(content_items) = response_output.body else {
|
||||
panic!("imagegen output should contain generated image bytes");
|
||||
};
|
||||
assert_eq!(
|
||||
content_items,
|
||||
vec![
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: format!("data:image/png;base64,{RESULT}"),
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
FunctionCallOutputContentItem::InputText {
|
||||
text: format!(
|
||||
"Generated images are saved to {} as {} by default.\n\
|
||||
If you need to use a generated image at another path, copy it and leave the original in place unless the user explicitly asks you to delete it.",
|
||||
tempdir.path().display(),
|
||||
tempdir.path().join("call-1.png").display(),
|
||||
),
|
||||
},
|
||||
]
|
||||
);
|
||||
assert_eq!(
|
||||
std::fs::read(tempdir.path().join("call-1.png")).expect("saved generated image"),
|
||||
b"png"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn edit_matches_context_selector_for_generated_images_after_latest_user_anchor() {
|
||||
let history = vec![
|
||||
generated_item("g1"),
|
||||
generated_item("g2"),
|
||||
generated_item("g3"),
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![
|
||||
ContentItem::InputImage {
|
||||
image_url: "data:image/png;base64,u1".to_string(),
|
||||
detail: None,
|
||||
},
|
||||
ContentItem::InputImage {
|
||||
image_url: "data:image/png;base64,u2".to_string(),
|
||||
detail: None,
|
||||
},
|
||||
],
|
||||
phase: None,
|
||||
},
|
||||
generated_item("g4"),
|
||||
generated_item("g5"),
|
||||
generated_item("g6"),
|
||||
generated_item("g7"),
|
||||
];
|
||||
|
||||
assert_eq!(
|
||||
edit_request("change the lighting", &history),
|
||||
expected_edit_request(
|
||||
"change the lighting",
|
||||
&[
|
||||
"data:image/png;base64,u1",
|
||||
"data:image/png;base64,u2",
|
||||
"data:image/png;base64,g5",
|
||||
"data:image/png;base64,g6",
|
||||
"data:image/png;base64,g7",
|
||||
]
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn edit_preserves_a_generated_image_when_user_anchor_fills_the_limit() {
|
||||
let history = vec![
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: ["a", "b", "c", "d", "e"]
|
||||
.into_iter()
|
||||
.map(|image| ContentItem::InputImage {
|
||||
image_url: format!("data:image/png;base64,{image}"),
|
||||
detail: None,
|
||||
})
|
||||
.collect(),
|
||||
phase: None,
|
||||
},
|
||||
generated_item("generated"),
|
||||
];
|
||||
|
||||
assert_eq!(
|
||||
edit_request("edit the last generated image", &history),
|
||||
expected_edit_request(
|
||||
"edit the last generated image",
|
||||
&[
|
||||
"data:image/png;base64,b",
|
||||
"data:image/png;base64,c",
|
||||
"data:image/png;base64,d",
|
||||
"data:image/png;base64,e",
|
||||
"data:image/png;base64,generated",
|
||||
]
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn edit_uses_latest_user_upload_before_a_text_only_follow_up() {
|
||||
let history = vec![
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputImage {
|
||||
image_url: "data:image/png;base64,user".to_string(),
|
||||
detail: None,
|
||||
}],
|
||||
phase: None,
|
||||
},
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputText {
|
||||
text: "edit this image".to_string(),
|
||||
}],
|
||||
phase: None,
|
||||
},
|
||||
];
|
||||
|
||||
assert_eq!(
|
||||
edit_request("change the lighting", &history),
|
||||
expected_edit_request("change the lighting", &["data:image/png;base64,user"])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn edit_reuses_images_from_prior_standalone_imagegen_calls() {
|
||||
let history = vec![
|
||||
ResponseItem::FunctionCall {
|
||||
id: None,
|
||||
name: IMAGEGEN_TOOL_NAME.to_string(),
|
||||
namespace: Some(IMAGE_GEN_NAMESPACE.to_string()),
|
||||
arguments: "{}".to_string(),
|
||||
call_id: "imagegen-1".to_string(),
|
||||
},
|
||||
generated_function_output("imagegen-1", "standalone"),
|
||||
];
|
||||
|
||||
assert_eq!(
|
||||
edit_request("change the lighting", &history),
|
||||
expected_edit_request("change the lighting", &["data:image/png;base64,standalone"])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn edit_keeps_newest_standalone_generated_images_when_over_limit() {
|
||||
let history = (1..=6)
|
||||
.flat_map(|index| {
|
||||
let call_id = format!("imagegen-{index}");
|
||||
vec![
|
||||
ResponseItem::FunctionCall {
|
||||
id: None,
|
||||
name: IMAGEGEN_TOOL_NAME.to_string(),
|
||||
namespace: Some(IMAGE_GEN_NAMESPACE.to_string()),
|
||||
arguments: "{}".to_string(),
|
||||
call_id: call_id.clone(),
|
||||
},
|
||||
generated_function_output(&call_id, &index.to_string()),
|
||||
]
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
assert_eq!(
|
||||
edit_request("change the lighting", &history),
|
||||
expected_edit_request(
|
||||
"change the lighting",
|
||||
&[
|
||||
"data:image/png;base64,2",
|
||||
"data:image/png;base64,3",
|
||||
"data:image/png;base64,4",
|
||||
"data:image/png;base64,5",
|
||||
"data:image/png;base64,6",
|
||||
]
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn edit_without_image_history_returns_tool_error() {
|
||||
let error = request_for_action(&args(ImagegenAction::Edit, "change the lighting"), &[])
|
||||
.expect_err("edit should require image context");
|
||||
|
||||
assert_eq!(
|
||||
error.to_string(),
|
||||
"image edit requested without any usable image in conversation history"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generated_image_output_dir_is_scoped_to_sanitized_thread_id() {
|
||||
assert_eq!(
|
||||
generated_image_output_dir(std::path::Path::new("/tmp/codex-home"), "thread/1"),
|
||||
std::path::PathBuf::from("/tmp/codex-home/generated_images/thread_1")
|
||||
);
|
||||
}
|
||||
|
||||
fn args(action: ImagegenAction, prompt: &str) -> ImagegenArgs {
|
||||
ImagegenArgs {
|
||||
prompt: prompt.to_string(),
|
||||
action,
|
||||
}
|
||||
}
|
||||
|
||||
fn edit_request(prompt: &str, history: &[ResponseItem]) -> ImageEditRequest {
|
||||
let ImageRequest::Edit(request) =
|
||||
request_for_action(&args(ImagegenAction::Edit, prompt), history)
|
||||
.expect("edit request should build")
|
||||
else {
|
||||
panic!("expected edit request");
|
||||
};
|
||||
request
|
||||
}
|
||||
|
||||
fn expected_edit_request(prompt: &str, images: &[&str]) -> ImageEditRequest {
|
||||
ImageEditRequest {
|
||||
images: images
|
||||
.iter()
|
||||
.map(|image_url| ImageUrl {
|
||||
image_url: (*image_url).to_string(),
|
||||
})
|
||||
.collect(),
|
||||
prompt: prompt.to_string(),
|
||||
background: Some(ImageBackground::Auto),
|
||||
model: "gpt-image-2".to_string(),
|
||||
n: None,
|
||||
quality: Some(ImageQuality::Auto),
|
||||
size: Some("auto".to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
fn generated_item(result: &str) -> ResponseItem {
|
||||
ResponseItem::ImageGenerationCall {
|
||||
id: format!("id-{result}"),
|
||||
status: "completed".to_string(),
|
||||
revised_prompt: None,
|
||||
result: result.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn generated_function_output(call_id: &str, result: &str) -> ResponseItem {
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: call_id.to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
body: FunctionCallOutputBody::ContentItems(vec![
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: format!("data:image/png;base64,{result}"),
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
FunctionCallOutputContentItem::InputText {
|
||||
text: "generated image save hint".to_string(),
|
||||
},
|
||||
]),
|
||||
success: Some(true),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn function_payload() -> ToolPayload {
|
||||
ToolPayload::Function {
|
||||
arguments: "{}".to_string(),
|
||||
}
|
||||
}
|
||||
395
codex-rs/ext/image-generation/src/tool.rs
Normal file
395
codex-rs/ext/image-generation/src/tool.rs
Normal file
@@ -0,0 +1,395 @@
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use base64::Engine;
|
||||
use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
|
||||
use codex_api::ImageBackground;
|
||||
use codex_api::ImageEditRequest;
|
||||
use codex_api::ImageGenerationRequest;
|
||||
use codex_api::ImageQuality;
|
||||
use codex_api::ImageUrl;
|
||||
use codex_extension_api::FunctionCallError;
|
||||
use codex_extension_api::ToolCall;
|
||||
use codex_extension_api::ToolExecutor;
|
||||
use codex_extension_api::ToolName;
|
||||
use codex_extension_api::ToolOutput;
|
||||
use codex_extension_api::ToolPayload;
|
||||
use codex_extension_api::ToolSpec;
|
||||
use codex_extension_api::parse_tool_input_schema;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
|
||||
use codex_protocol::models::FunctionCallOutputBody;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
use codex_protocol::models::ResponseInputItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_tools::ResponsesApiNamespace;
|
||||
use codex_tools::ResponsesApiNamespaceTool;
|
||||
use codex_tools::ResponsesApiTool;
|
||||
use codex_tools::ToolExposure;
|
||||
use codex_tools::default_namespace_description;
|
||||
use schemars::JsonSchema;
|
||||
use schemars::r#gen::SchemaSettings;
|
||||
use serde::Deserialize;
|
||||
use serde_json::Map;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::IMAGE_GEN_NAMESPACE;
|
||||
use crate::IMAGEGEN_TOOL_NAME;
|
||||
use crate::backend::CodexImagesBackend;
|
||||
|
||||
const IMAGE_MODEL: &str = "gpt-image-2";
|
||||
const MAX_EDIT_IMAGES: usize = 5;
|
||||
const IMAGEGEN_DESCRIPTION: &str = include_str!("../imagegen_description.md");
|
||||
const GENERATED_IMAGE_ARTIFACTS_DIR: &str = "generated_images";
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct ImageGenerationTool {
|
||||
backend: CodexImagesBackend,
|
||||
output_dir: PathBuf,
|
||||
}
|
||||
|
||||
impl ImageGenerationTool {
|
||||
/// Creates an image-generation tool backed by an image API executor.
|
||||
pub(crate) fn new(backend: CodexImagesBackend, output_dir: PathBuf) -> Self {
|
||||
Self {
|
||||
backend,
|
||||
output_dir,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, JsonSchema)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
struct ImagegenArgs {
|
||||
prompt: String,
|
||||
action: ImagegenAction,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, JsonSchema)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
enum ImagegenAction {
|
||||
Generate,
|
||||
Edit,
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl ToolExecutor<ToolCall> for ImageGenerationTool {
|
||||
/// Keeps the tool in the existing image-generation Responses namespace.
|
||||
fn tool_name(&self) -> ToolName {
|
||||
ToolName::namespaced(IMAGE_GEN_NAMESPACE, IMAGEGEN_TOOL_NAME)
|
||||
}
|
||||
|
||||
/// Advertises the model contract: a rewritten prompt and semantic action.
|
||||
fn spec(&self) -> ToolSpec {
|
||||
imagegen_tool_spec()
|
||||
}
|
||||
|
||||
/// Keeps this model-facing tool out of the nested code-mode tool surface.
|
||||
fn exposure(&self) -> ToolExposure {
|
||||
ToolExposure::DirectModelOnly
|
||||
}
|
||||
|
||||
/// Executes the selected image operation and returns the completed image result.
|
||||
async fn handle(&self, call: ToolCall) -> Result<Box<dyn ToolOutput>, FunctionCallError> {
|
||||
let args = parse_args(&call)?;
|
||||
let request = request_for_action(&args, call.conversation_history.items())?;
|
||||
|
||||
let response = match request {
|
||||
ImageRequest::Generate(request) => self.backend.generate(request).await,
|
||||
ImageRequest::Edit(request) => self.backend.edit(request).await,
|
||||
}
|
||||
.map_err(|err| {
|
||||
FunctionCallError::RespondToModel(format!("image generation failed: {err}"))
|
||||
})?;
|
||||
let Some(result) = response.data.into_iter().next().map(|data| data.b64_json) else {
|
||||
return Err(FunctionCallError::RespondToModel(
|
||||
"image generation returned no image data".to_string(),
|
||||
));
|
||||
};
|
||||
let output_hint =
|
||||
match persist_generated_image(&self.output_dir, &call.call_id, &result).await {
|
||||
Ok(output_hint) => Some(output_hint),
|
||||
Err(err) => {
|
||||
tracing::warn!(
|
||||
call_id = %call.call_id,
|
||||
output_dir = %self.output_dir.display(),
|
||||
"failed to save generated image: {err}"
|
||||
);
|
||||
None
|
||||
}
|
||||
};
|
||||
Ok(Box::new(GeneratedImageOutput {
|
||||
result,
|
||||
output_hint,
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
enum ImageRequest {
|
||||
Generate(ImageGenerationRequest),
|
||||
Edit(ImageEditRequest),
|
||||
}
|
||||
|
||||
/// Maps the model-selected action to the fixed image API request parameters.
|
||||
fn request_for_action(
|
||||
args: &ImagegenArgs,
|
||||
history: &[ResponseItem],
|
||||
) -> Result<ImageRequest, FunctionCallError> {
|
||||
match args.action {
|
||||
ImagegenAction::Generate => Ok(ImageRequest::Generate(ImageGenerationRequest {
|
||||
prompt: args.prompt.clone(),
|
||||
background: Some(ImageBackground::Auto),
|
||||
model: IMAGE_MODEL.to_string(),
|
||||
n: None,
|
||||
quality: Some(ImageQuality::Auto),
|
||||
size: Some("auto".to_string()),
|
||||
})),
|
||||
ImagegenAction::Edit => {
|
||||
let images = edit_images(history);
|
||||
if images.is_empty() {
|
||||
return Err(FunctionCallError::RespondToModel(
|
||||
"image edit requested without any usable image in conversation history"
|
||||
.to_string(),
|
||||
));
|
||||
}
|
||||
Ok(ImageRequest::Edit(ImageEditRequest {
|
||||
images,
|
||||
prompt: args.prompt.clone(),
|
||||
background: Some(ImageBackground::Auto),
|
||||
model: IMAGE_MODEL.to_string(),
|
||||
n: None,
|
||||
quality: Some(ImageQuality::Auto),
|
||||
size: Some("auto".to_string()),
|
||||
}))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Selects edit context using the hosted imagegen anchor and truncation behavior.
|
||||
fn edit_images(history: &[ResponseItem]) -> Vec<ImageUrl> {
|
||||
let latest_uploaded_images = history.iter().enumerate().rev().find_map(|(index, item)| {
|
||||
let ResponseItem::Message { role, content, .. } = item else {
|
||||
return None;
|
||||
};
|
||||
if role != "user" {
|
||||
return None;
|
||||
}
|
||||
let images = content
|
||||
.iter()
|
||||
.filter_map(|item| match item {
|
||||
ContentItem::InputImage { image_url, .. } => Some(ImageUrl {
|
||||
image_url: image_url.clone(),
|
||||
}),
|
||||
ContentItem::InputText { .. } | ContentItem::OutputText { .. } => None,
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
(!images.is_empty()).then_some((index, images))
|
||||
});
|
||||
let (user_images, follow_up_start) = latest_uploaded_images
|
||||
.map_or_else(|| (Vec::new(), 0), |(index, images)| (images, index + 1));
|
||||
let mut generated_images = Vec::new();
|
||||
for item in &history[follow_up_start..] {
|
||||
match item {
|
||||
ResponseItem::ImageGenerationCall { result, .. } if !result.is_empty() => {
|
||||
generated_images.push(ImageUrl {
|
||||
image_url: format!("data:image/png;base64,{result}"),
|
||||
});
|
||||
}
|
||||
ResponseItem::FunctionCallOutput { call_id, output }
|
||||
if history.iter().any(|item| {
|
||||
matches!(
|
||||
item,
|
||||
ResponseItem::FunctionCall {
|
||||
name,
|
||||
namespace: Some(namespace),
|
||||
call_id: function_call_id,
|
||||
..
|
||||
} if function_call_id == call_id
|
||||
&& name == IMAGEGEN_TOOL_NAME
|
||||
&& namespace == IMAGE_GEN_NAMESPACE
|
||||
)
|
||||
}) =>
|
||||
{
|
||||
generated_images.extend(output.content_items().into_iter().flatten().filter_map(
|
||||
|item| match item {
|
||||
FunctionCallOutputContentItem::InputImage { image_url, .. } => {
|
||||
Some(ImageUrl {
|
||||
image_url: image_url.clone(),
|
||||
})
|
||||
}
|
||||
FunctionCallOutputContentItem::InputText { .. }
|
||||
| FunctionCallOutputContentItem::EncryptedContent { .. } => None,
|
||||
},
|
||||
));
|
||||
}
|
||||
ResponseItem::Message { .. }
|
||||
| ResponseItem::Reasoning { .. }
|
||||
| ResponseItem::LocalShellCall { .. }
|
||||
| ResponseItem::FunctionCall { .. }
|
||||
| ResponseItem::ToolSearchCall { .. }
|
||||
| ResponseItem::FunctionCallOutput { .. }
|
||||
| ResponseItem::CustomToolCall { .. }
|
||||
| ResponseItem::CustomToolCallOutput { .. }
|
||||
| ResponseItem::ToolSearchOutput { .. }
|
||||
| ResponseItem::WebSearchCall { .. }
|
||||
| ResponseItem::ImageGenerationCall { .. }
|
||||
| ResponseItem::Compaction { .. }
|
||||
| ResponseItem::CompactionTrigger
|
||||
| ResponseItem::ContextCompaction { .. }
|
||||
| ResponseItem::Other => {}
|
||||
}
|
||||
}
|
||||
truncate_images(user_images, generated_images)
|
||||
}
|
||||
|
||||
/// Truncates edit inputs while preserving the newest generated image when possible.
|
||||
fn truncate_images(
|
||||
mut user_images: Vec<ImageUrl>,
|
||||
mut generated_images: Vec<ImageUrl>,
|
||||
) -> Vec<ImageUrl> {
|
||||
let mut excess = (user_images.len() + generated_images.len()).saturating_sub(MAX_EDIT_IMAGES);
|
||||
let drop_generated = excess.min(generated_images.len().saturating_sub(1));
|
||||
generated_images.drain(..drop_generated);
|
||||
excess -= drop_generated;
|
||||
let drop_user = excess.min(user_images.len());
|
||||
user_images.drain(..drop_user);
|
||||
excess -= drop_user;
|
||||
generated_images.drain(..excess);
|
||||
|
||||
user_images.extend(generated_images);
|
||||
user_images
|
||||
}
|
||||
|
||||
/// Parses the strict model-facing arguments for an image-generation call.
|
||||
fn parse_args(call: &ToolCall) -> Result<ImagegenArgs, FunctionCallError> {
|
||||
serde_json::from_str(call.function_arguments()?)
|
||||
.map_err(|err| FunctionCallError::RespondToModel(err.to_string()))
|
||||
}
|
||||
|
||||
/// Resolves where generated images for one thread are persisted by the extension.
|
||||
pub(crate) fn generated_image_output_dir(codex_home: &Path, thread_id: &str) -> PathBuf {
|
||||
codex_home
|
||||
.join(GENERATED_IMAGE_ARTIFACTS_DIR)
|
||||
.join(sanitize_path_component(thread_id))
|
||||
}
|
||||
|
||||
fn generated_image_output_path(output_dir: &Path, call_id: &str) -> PathBuf {
|
||||
output_dir.join(format!("{}.png", sanitize_path_component(call_id)))
|
||||
}
|
||||
|
||||
fn sanitize_path_component(value: &str) -> String {
|
||||
let sanitized: String = value
|
||||
.chars()
|
||||
.map(|ch| {
|
||||
if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' {
|
||||
ch
|
||||
} else {
|
||||
'_'
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
if sanitized.is_empty() {
|
||||
"generated_image".to_string()
|
||||
} else {
|
||||
sanitized
|
||||
}
|
||||
}
|
||||
|
||||
async fn persist_generated_image(
|
||||
output_dir: &Path,
|
||||
call_id: &str,
|
||||
result: &str,
|
||||
) -> Result<String, String> {
|
||||
let bytes = BASE64_STANDARD
|
||||
.decode(result.trim().as_bytes())
|
||||
.map_err(|err| format!("invalid image generation payload: {err}"))?;
|
||||
tokio::fs::create_dir_all(output_dir)
|
||||
.await
|
||||
.map_err(|err| err.to_string())?;
|
||||
tokio::fs::write(generated_image_output_path(output_dir, call_id), bytes)
|
||||
.await
|
||||
.map_err(|err| err.to_string())?;
|
||||
|
||||
Ok(format!(
|
||||
"Generated images are saved to {} as {} by default.\n\
|
||||
If you need to use a generated image at another path, copy it and leave the original in place unless the user explicitly asks you to delete it.",
|
||||
output_dir.display(),
|
||||
generated_image_output_path(output_dir, call_id).display(),
|
||||
))
|
||||
}
|
||||
|
||||
/// Builds the namespace function schema exposed to the model.
|
||||
fn imagegen_tool_spec() -> ToolSpec {
|
||||
let mut schema_value = serde_json::to_value(
|
||||
SchemaSettings::draft2019_09()
|
||||
.with(|settings| settings.inline_subschemas = true)
|
||||
.into_generator()
|
||||
.into_root_schema_for::<ImagegenArgs>(),
|
||||
)
|
||||
.unwrap_or_else(|err| panic!("imagegen schema should serialize: {err}"));
|
||||
let Value::Object(ref mut schema) = schema_value else {
|
||||
unreachable!("imagegen root schema must be an object");
|
||||
};
|
||||
let mut input_schema = Map::new();
|
||||
for key in ["properties", "required", "type", "additionalProperties"] {
|
||||
if let Some(value) = schema.remove(key) {
|
||||
input_schema.insert(key.to_string(), value);
|
||||
}
|
||||
}
|
||||
ToolSpec::Namespace(ResponsesApiNamespace {
|
||||
name: IMAGE_GEN_NAMESPACE.to_string(),
|
||||
description: default_namespace_description(IMAGE_GEN_NAMESPACE),
|
||||
tools: vec![ResponsesApiNamespaceTool::Function(ResponsesApiTool {
|
||||
name: IMAGEGEN_TOOL_NAME.to_string(),
|
||||
description: IMAGEGEN_DESCRIPTION.to_string(),
|
||||
strict: false,
|
||||
parameters: parse_tool_input_schema(&Value::Object(input_schema))
|
||||
.unwrap_or_else(|err| panic!("imagegen input schema should parse: {err}")),
|
||||
output_schema: None,
|
||||
defer_loading: None,
|
||||
})],
|
||||
})
|
||||
}
|
||||
|
||||
struct GeneratedImageOutput {
|
||||
result: String,
|
||||
output_hint: Option<String>,
|
||||
}
|
||||
|
||||
impl ToolOutput for GeneratedImageOutput {
|
||||
/// Avoids copying image bytes into tool-call telemetry.
|
||||
fn log_preview(&self) -> String {
|
||||
"[generated image]".to_string()
|
||||
}
|
||||
|
||||
/// Reports a completed images request as successful tool execution.
|
||||
fn success_for_logging(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
/// Returns generated bytes and persisted-artifact context for the model's follow-up response.
|
||||
fn to_response_item(&self, call_id: &str, _payload: &ToolPayload) -> ResponseInputItem {
|
||||
let mut content = vec![FunctionCallOutputContentItem::InputImage {
|
||||
image_url: format!("data:image/png;base64,{}", self.result),
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
}];
|
||||
if let Some(output_hint) = &self.output_hint {
|
||||
content.push(FunctionCallOutputContentItem::InputText {
|
||||
text: output_hint.clone(),
|
||||
});
|
||||
}
|
||||
ResponseInputItem::FunctionCallOutput {
|
||||
call_id: call_id.to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
body: FunctionCallOutputBody::ContentItems(content),
|
||||
success: Some(true),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[path = "tests.rs"]
|
||||
mod tests;
|
||||
@@ -170,6 +170,8 @@ pub enum Feature {
|
||||
ExternalMigration,
|
||||
/// Allow the model to invoke the built-in image generation tool.
|
||||
ImageGeneration,
|
||||
/// Replace hosted image generation with the standalone image-generation extension.
|
||||
ImageGenExt,
|
||||
/// Allow prompting and installing missing MCP dependencies.
|
||||
SkillMcpDependencyInstall,
|
||||
/// Removed compatibility flag for deleted skill env var dependency prompting.
|
||||
@@ -1053,6 +1055,12 @@ pub const FEATURES: &[FeatureSpec] = &[
|
||||
stage: Stage::Stable,
|
||||
default_enabled: true,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::ImageGenExt,
|
||||
key: "imagegenext",
|
||||
stage: Stage::UnderDevelopment,
|
||||
default_enabled: false,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::SkillMcpDependencyInstall,
|
||||
key: "skill_mcp_dependency_install",
|
||||
|
||||
@@ -244,6 +244,13 @@ fn image_generation_is_stable_and_enabled_by_default() {
|
||||
assert_eq!(Feature::ImageGeneration.default_enabled(), true);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn image_generation_extension_is_under_development_and_disabled_by_default() {
|
||||
assert_eq!(Feature::ImageGenExt.stage(), Stage::UnderDevelopment);
|
||||
assert_eq!(Feature::ImageGenExt.default_enabled(), false);
|
||||
assert_eq!(feature_for_key("imagegenext"), Some(Feature::ImageGenExt));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn use_legacy_landlock_config_records_deprecation_notice() {
|
||||
let mut entries = BTreeMap::new();
|
||||
|
||||
Reference in New Issue
Block a user