Add feature-gated standalone image generation extension (#24723)

## Why

Add a standalone image generation path that can be exercised
independently of hosted Responses image generation, while retaining the
hosted tool as fallback unless the extension is actually available to
the model.

## What changed

- Added the `codex-image-generation-extension` crate with standalone
generate/edit execution, prior-image selection for edits, model-visible
image output, and local generated-image persistence.
- Installed the extension in app-server behind the disabled-by-default
`imagegenext` feature and backend eligibility checks.
- Updated core tool planning so eligible `image_gen.imagegen` exposure
replaces hosted `image_generation`, while unavailable configurations
retain hosted fallback.
- Added coverage for extension behavior, edit history reuse, feature
gating, auth eligibility, and hosted-tool replacement.
- The extension is installed through app-server only in this PR; other
execution paths retain hosted image generation because hosted
replacement occurs only when the standalone executor is actually
registered and model-visible.
- The initial extension contract intentionally fixes the image model to
`gpt-image-2` and uses automatic image parameters.
- Native generated-image history/card parity and rollout persistence
cleanup are intentionally deferred follow-up work.

## Validation

- `just test -p codex-image-generation-extension`
- `just test -p codex-features`
- `just test -p codex-core
hosted_tools_follow_provider_auth_model_and_config_gates`
- `just test -p codex-app-server`
- `just fix -p codex-image-generation-extension -p codex-features -p
codex-core -p codex-app-server`
- `just fmt`
- `just bazel-lock-update`
- `just bazel-lock-check`

---------

Co-authored-by: jif-oai <jif@openai.com>
This commit is contained in:
Won Park
2026-05-28 11:44:55 -07:00
committed by GitHub
parent 462deb0426
commit ecb41fcb64
17 changed files with 1056 additions and 6 deletions

26
codex-rs/Cargo.lock generated
View File

@@ -1928,6 +1928,7 @@ dependencies = [
"codex-git-utils",
"codex-guardian",
"codex-hooks",
"codex-image-generation-extension",
"codex-login",
"codex-mcp",
"codex-memories-extension",
@@ -3039,6 +3040,31 @@ dependencies = [
"uuid",
]
[[package]]
name = "codex-image-generation-extension"
version = "0.0.0"
dependencies = [
"async-trait",
"base64 0.22.1",
"codex-api",
"codex-core",
"codex-extension-api",
"codex-features",
"codex-login",
"codex-model-provider",
"codex-model-provider-info",
"codex-protocol",
"codex-tools",
"http 1.4.0",
"pretty_assertions",
"schemars 0.8.22",
"serde",
"serde_json",
"tempfile",
"tokio",
"tracing",
]
[[package]]
name = "codex-install-context"
version = "0.0.0"

View File

@@ -47,6 +47,7 @@ members = [
"ext/extension-api",
"ext/goal",
"ext/guardian",
"ext/image-generation",
"ext/memories",
"ext/web-search",
"external-agent-migration",
@@ -165,6 +166,7 @@ codex-execpolicy = { path = "execpolicy" }
codex-extension-api = { path = "ext/extension-api" }
codex-goal-extension = { path = "ext/goal" }
codex-guardian = { path = "ext/guardian" }
codex-image-generation-extension = { path = "ext/image-generation" }
codex-external-agent-migration = { path = "external-agent-migration" }
codex-external-agent-sessions = { path = "external-agent-sessions" }
codex-experimental-api-macros = { path = "codex-experimental-api-macros" }

View File

@@ -54,6 +54,7 @@ codex-backend-client = { workspace = true }
codex-file-search = { workspace = true }
codex-chatgpt = { workspace = true }
codex-login = { workspace = true }
codex-image-generation-extension = { workspace = true }
codex-memories-extension = { workspace = true }
codex-web-search-extension = { workspace = true }
codex-memories-write = { workspace = true }

View File

@@ -31,7 +31,8 @@ where
let mut builder = ExtensionRegistryBuilder::<Config>::with_event_sink(event_sink);
codex_guardian::install(&mut builder, guardian_agent_spawner);
codex_memories_extension::install(&mut builder, codex_otel::global());
codex_web_search_extension::install(&mut builder, auth_manager);
codex_web_search_extension::install(&mut builder, auth_manager.clone());
codex_image_generation_extension::install(&mut builder, auth_manager);
Arc::new(builder.build())
}

View File

@@ -473,6 +473,9 @@
"image_generation": {
"type": "boolean"
},
"imagegenext": {
"type": "boolean"
},
"in_app_browser": {
"type": "boolean"
},
@@ -4564,6 +4567,9 @@
"image_generation": {
"type": "boolean"
},
"imagegenext": {
"type": "boolean"
},
"in_app_browser": {
"type": "boolean"
},

View File

@@ -85,6 +85,8 @@ use std::sync::Arc;
use tracing::warn;
const MULTI_AGENT_V2_NAMESPACE_DESCRIPTION: &str = "Tools for spawning and managing sub-agents.";
const IMAGE_GEN_NAMESPACE: &str = "image_gen";
const IMAGEGEN_TOOL_NAME: &str = "imagegen";
type PlannedRuntime = Arc<dyn CoreToolRuntime>;
@@ -257,7 +259,9 @@ fn hosted_model_tool_specs(context: &CoreToolPlanContext<'_>) -> Vec<ToolSpec> {
}) {
specs.push(web_search_tool);
}
if image_generation_tool_enabled(turn_context) {
if image_generation_tool_enabled(turn_context)
&& !standalone_image_generation_available(turn_context, context.extension_tool_executors)
{
specs.push(create_image_generation_tool("png"));
}
specs
@@ -316,21 +320,41 @@ fn agent_jobs_worker_tools_enabled(turn_context: &TurnContext) -> bool {
}
fn image_generation_tool_enabled(turn_context: &TurnContext) -> bool {
image_generation_runtime_enabled(turn_context)
&& turn_context
.features
.get()
.enabled(Feature::ImageGeneration)
}
fn image_generation_runtime_enabled(turn_context: &TurnContext) -> bool {
turn_context
.auth_manager
.as_deref()
.is_some_and(AuthManager::current_auth_uses_codex_backend)
&& turn_context.provider.capabilities().image_generation
&& turn_context
.features
.get()
.enabled(Feature::ImageGeneration)
&& turn_context
.model_info
.input_modalities
.contains(&InputModality::Image)
}
fn standalone_image_generation_model_visible(turn_context: &TurnContext) -> bool {
image_generation_runtime_enabled(turn_context)
&& turn_context.features.get().enabled(Feature::ImageGenExt)
&& namespace_tools_enabled(turn_context)
}
fn standalone_image_generation_available(
turn_context: &TurnContext,
extension_tools: &[Arc<dyn ToolExecutor<ExtensionToolCall>>],
) -> bool {
standalone_image_generation_model_visible(turn_context)
&& extension_tools.iter().any(|executor| {
executor.tool_name() == ToolName::namespaced(IMAGE_GEN_NAMESPACE, IMAGEGEN_TOOL_NAME)
})
}
fn wait_agent_timeout_options(turn_context: &TurnContext) -> WaitAgentTimeoutOptions {
if multi_agent_v2_enabled(turn_context) {
return WaitAgentTimeoutOptions {
@@ -839,6 +863,11 @@ fn append_extension_tool_executors(
for executor in executors.iter().cloned() {
let tool_name = executor.tool_name();
if tool_name == ToolName::namespaced(IMAGE_GEN_NAMESPACE, IMAGEGEN_TOOL_NAME)
&& !standalone_image_generation_model_visible(turn_context)
{
continue;
}
if !reserved_tool_names.insert(tool_name.clone()) {
warn!("Skipping extension tool `{tool_name}`: tool already registered");
continue;

View File

@@ -960,6 +960,16 @@ async fn hosted_tools_follow_provider_auth_model_and_config_gates() {
.await;
image_generation.assert_visible_contains(&["image_generation"]);
let extension_flag_without_imagegen_tool = probe(|turn| {
use_chatgpt_auth(turn);
set_feature(turn, Feature::ImageGeneration, /*enabled*/ true);
set_feature(turn, Feature::ImageGenExt, /*enabled*/ true);
turn.model_info.input_modalities = vec![InputModality::Image];
})
.await;
extension_flag_without_imagegen_tool.assert_visible_contains(&["image_generation"]);
extension_flag_without_imagegen_tool.assert_visible_lacks(&["image_gen"]);
let live_web_search = probe(|turn| {
set_web_search_mode(turn, WebSearchMode::Live);
turn.model_info.web_search_tool_type = WebSearchToolType::TextAndImage;

View File

@@ -0,0 +1,9 @@
load("//:defs.bzl", "codex_rust_crate")
codex_rust_crate(
name = "image-generation",
crate_name = "codex_image_generation_extension",
compile_data = [
"imagegen_description.md",
],
)

View File

@@ -0,0 +1,37 @@
[package]
edition.workspace = true
license.workspace = true
name = "codex-image-generation-extension"
version.workspace = true
[lib]
name = "codex_image_generation_extension"
path = "src/lib.rs"
doctest = false
[lints]
workspace = true
[dependencies]
async-trait = { workspace = true }
base64 = { workspace = true }
codex-api = { workspace = true }
codex-core = { workspace = true }
codex-extension-api = { workspace = true }
codex-features = { workspace = true }
codex-login = { workspace = true }
codex-model-provider = { workspace = true }
codex-model-provider-info = { workspace = true }
codex-protocol = { workspace = true }
codex-tools = { workspace = true }
http = { workspace = true }
schemars = { workspace = true }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
tokio = { workspace = true, features = ["fs"] }
tracing = { workspace = true }
[dev-dependencies]
pretty_assertions = { workspace = true }
tempfile = { workspace = true }
tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }

View File

@@ -0,0 +1,11 @@
The `image_gen.imagegen` tool enables image generation from descriptions and editing of existing images based on specific instructions. Use it when:
- The user requests an image based on a scene description, such as a diagram, portrait, comic, meme, or any other visual.
- The user wants to modify an attached or previously generated image with specific changes, including adding or removing elements, altering colors, improving quality/resolution, or transforming the style (e.g., cartoon, oil painting).
Guidelines:
- Set `action` to `generate` when the user asks for a brand new image.
- Set `action` to `edit` when the user asks to modify an existing image from the conversation history.
- Directly generate the image without reconfirmation or clarification.
- After each image generation, do not mention anything related to download. Do not summarize the image. Do not ask followup question. Do not say ANYTHING after you generate an image.
- Always use this tool for image editing unless the user explicitly requests otherwise. Do not use the `python` tool for image editing unless specifically instructed.

View File

@@ -0,0 +1,60 @@
use codex_api::ImageEditRequest;
use codex_api::ImageGenerationRequest;
use codex_api::ImageResponse;
use codex_api::ImagesClient;
use codex_api::ReqwestTransport;
use codex_login::default_client::build_reqwest_client;
use codex_model_provider::SharedModelProvider;
use http::HeaderMap;
#[derive(Clone)]
pub(crate) struct CodexImagesBackend {
provider: SharedModelProvider,
}
impl CodexImagesBackend {
/// Creates a backend that sends image requests through the active model provider.
pub(crate) fn new(provider: SharedModelProvider) -> Self {
Self { provider }
}
/// Resolves the provider and auth required for the current image API request.
async fn client(&self) -> Result<ImagesClient<ReqwestTransport>, String> {
let provider = self
.provider
.api_provider()
.await
.map_err(|err| err.to_string())?;
let auth = self
.provider
.api_auth()
.await
.map_err(|err| err.to_string())?;
Ok(ImagesClient::new(
ReqwestTransport::new(build_reqwest_client()),
provider,
auth,
))
}
/// Sends a standalone image generation request through the configured Images client.
pub(crate) async fn generate(
&self,
request: ImageGenerationRequest,
) -> Result<ImageResponse, String> {
self.client()
.await?
.generate(&request, HeaderMap::new())
.await
.map_err(|err| err.to_string())
}
/// Sends a standalone image edit request through the configured Images client.
pub(crate) async fn edit(&self, request: ImageEditRequest) -> Result<ImageResponse, String> {
self.client()
.await?
.edit(&request, HeaderMap::new())
.await
.map_err(|err| err.to_string())
}
}

View File

@@ -0,0 +1,99 @@
use std::path::PathBuf;
use std::sync::Arc;
use codex_core::config::Config;
use codex_extension_api::ConfigContributor;
use codex_extension_api::ExtensionData;
use codex_extension_api::ExtensionRegistryBuilder;
use codex_extension_api::ThreadLifecycleContributor;
use codex_extension_api::ThreadStartInput;
use codex_extension_api::ToolCall;
use codex_extension_api::ToolContributor;
use codex_extension_api::ToolExecutor;
use codex_features::Feature;
use codex_login::AuthManager;
use codex_model_provider::create_model_provider;
use codex_model_provider_info::ModelProviderInfo;
use crate::backend::CodexImagesBackend;
use crate::tool::ImageGenerationTool;
use crate::tool::generated_image_output_dir;
#[derive(Clone)]
struct ImageGenerationExtension {
auth_manager: Arc<AuthManager>,
}
#[derive(Clone)]
struct ImageGenerationExtensionConfig {
enabled: bool,
provider: ModelProviderInfo,
codex_home: PathBuf,
}
impl From<&Config> for ImageGenerationExtensionConfig {
/// Resolves whether standalone image generation should be available for a thread.
fn from(config: &Config) -> Self {
Self {
enabled: config.features.enabled(Feature::ImageGenExt)
&& config.model_provider.is_openai(),
provider: config.model_provider.clone(),
codex_home: config.codex_home.to_path_buf(),
}
}
}
#[async_trait::async_trait]
impl ThreadLifecycleContributor<Config> for ImageGenerationExtension {
/// Seeds image-generation availability when a thread begins.
async fn on_thread_start(&self, input: ThreadStartInput<'_, Config>) {
input
.thread_store
.insert(ImageGenerationExtensionConfig::from(input.config));
}
}
impl ConfigContributor<Config> for ImageGenerationExtension {
/// Refreshes image-generation availability after thread configuration changes.
fn on_config_changed(
&self,
_session_store: &ExtensionData,
thread_store: &ExtensionData,
_previous_config: &Config,
new_config: &Config,
) {
thread_store.insert(ImageGenerationExtensionConfig::from(new_config));
}
}
impl ToolContributor for ImageGenerationExtension {
/// Creates the image-generation tool exposed by this installed extension.
fn tools(
&self,
_session_store: &ExtensionData,
thread_store: &ExtensionData,
) -> Vec<Arc<dyn ToolExecutor<ToolCall>>> {
let Some(config) = thread_store.get::<ImageGenerationExtensionConfig>() else {
return Vec::new();
};
if !config.enabled || !self.auth_manager.current_auth_uses_codex_backend() {
return Vec::new();
}
vec![Arc::new(ImageGenerationTool::new(
CodexImagesBackend::new(create_model_provider(
config.provider.clone(),
Some(self.auth_manager.clone()),
)),
generated_image_output_dir(&config.codex_home, thread_store.level_id()),
))]
}
}
/// Installs the feature-gated standalone image-generation extension contributors.
pub fn install(registry: &mut ExtensionRegistryBuilder<Config>, auth_manager: Arc<AuthManager>) {
let extension = Arc::new(ImageGenerationExtension { auth_manager });
registry.thread_lifecycle_contributor(extension.clone());
registry.config_contributor(extension.clone());
registry.tool_contributor(extension);
}

View File

@@ -0,0 +1,8 @@
mod backend;
mod extension;
mod tool;
pub use extension::install;
pub(crate) const IMAGE_GEN_NAMESPACE: &str = "image_gen";
pub(crate) const IMAGEGEN_TOOL_NAME: &str = "imagegen";

View File

@@ -0,0 +1,341 @@
use codex_api::ImageBackground;
use codex_api::ImageEditRequest;
use codex_api::ImageGenerationRequest;
use codex_api::ImageQuality;
use codex_api::ImageUrl;
use codex_extension_api::ToolOutput;
use codex_extension_api::ToolPayload;
use codex_extension_api::ToolSpec;
use codex_protocol::models::ContentItem;
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
use codex_protocol::models::FunctionCallOutputBody;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::models::FunctionCallOutputPayload;
use codex_protocol::models::ResponseInputItem;
use codex_protocol::models::ResponseItem;
use codex_tools::ResponsesApiNamespaceTool;
use pretty_assertions::assert_eq;
use super::GeneratedImageOutput;
use super::ImageRequest;
use super::ImagegenAction;
use super::ImagegenArgs;
use super::generated_image_output_dir;
use super::imagegen_tool_spec;
use super::persist_generated_image;
use super::request_for_action;
use crate::IMAGE_GEN_NAMESPACE;
use crate::IMAGEGEN_TOOL_NAME;
const RESULT: &str = "cG5n";
#[test]
fn uses_reserved_image_gen_namespace() {
let ToolSpec::Namespace(spec) = imagegen_tool_spec() else {
panic!("imagegen should advertise a namespace tool");
};
assert_eq!(spec.name, IMAGE_GEN_NAMESPACE);
let ResponsesApiNamespaceTool::Function(function) = &spec.tools[0];
assert_eq!(function.name, IMAGEGEN_TOOL_NAME);
}
#[test]
fn generate_uses_fixed_request_defaults() {
assert_eq!(
request_for_action(&args(ImagegenAction::Generate, "paint a moonlit lake"), &[])
.expect("generation request should build"),
ImageRequest::Generate(ImageGenerationRequest {
prompt: "paint a moonlit lake".to_string(),
background: Some(ImageBackground::Auto),
model: "gpt-image-2".to_string(),
n: None,
quality: Some(ImageQuality::Auto),
size: Some("auto".to_string()),
})
);
}
#[tokio::test]
async fn generated_output_returns_image_input_and_persists_artifact() {
let tempdir = tempfile::tempdir().expect("tempdir");
let output_hint = persist_generated_image(tempdir.path(), "call-1", RESULT)
.await
.expect("generated image should persist");
let output = GeneratedImageOutput {
result: RESULT.to_string(),
output_hint: Some(output_hint),
};
let ResponseInputItem::FunctionCallOutput {
output: response_output,
..
} = output.to_response_item("call-1", &function_payload())
else {
panic!("imagegen should return function tool output");
};
let FunctionCallOutputBody::ContentItems(content_items) = response_output.body else {
panic!("imagegen output should contain generated image bytes");
};
assert_eq!(
content_items,
vec![
FunctionCallOutputContentItem::InputImage {
image_url: format!("data:image/png;base64,{RESULT}"),
detail: Some(DEFAULT_IMAGE_DETAIL),
},
FunctionCallOutputContentItem::InputText {
text: format!(
"Generated images are saved to {} as {} by default.\n\
If you need to use a generated image at another path, copy it and leave the original in place unless the user explicitly asks you to delete it.",
tempdir.path().display(),
tempdir.path().join("call-1.png").display(),
),
},
]
);
assert_eq!(
std::fs::read(tempdir.path().join("call-1.png")).expect("saved generated image"),
b"png"
);
}
#[test]
fn edit_matches_context_selector_for_generated_images_after_latest_user_anchor() {
let history = vec![
generated_item("g1"),
generated_item("g2"),
generated_item("g3"),
ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![
ContentItem::InputImage {
image_url: "data:image/png;base64,u1".to_string(),
detail: None,
},
ContentItem::InputImage {
image_url: "data:image/png;base64,u2".to_string(),
detail: None,
},
],
phase: None,
},
generated_item("g4"),
generated_item("g5"),
generated_item("g6"),
generated_item("g7"),
];
assert_eq!(
edit_request("change the lighting", &history),
expected_edit_request(
"change the lighting",
&[
"data:image/png;base64,u1",
"data:image/png;base64,u2",
"data:image/png;base64,g5",
"data:image/png;base64,g6",
"data:image/png;base64,g7",
]
)
);
}
#[test]
fn edit_preserves_a_generated_image_when_user_anchor_fills_the_limit() {
let history = vec![
ResponseItem::Message {
id: None,
role: "user".to_string(),
content: ["a", "b", "c", "d", "e"]
.into_iter()
.map(|image| ContentItem::InputImage {
image_url: format!("data:image/png;base64,{image}"),
detail: None,
})
.collect(),
phase: None,
},
generated_item("generated"),
];
assert_eq!(
edit_request("edit the last generated image", &history),
expected_edit_request(
"edit the last generated image",
&[
"data:image/png;base64,b",
"data:image/png;base64,c",
"data:image/png;base64,d",
"data:image/png;base64,e",
"data:image/png;base64,generated",
]
)
);
}
#[test]
fn edit_uses_latest_user_upload_before_a_text_only_follow_up() {
let history = vec![
ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputImage {
image_url: "data:image/png;base64,user".to_string(),
detail: None,
}],
phase: None,
},
ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputText {
text: "edit this image".to_string(),
}],
phase: None,
},
];
assert_eq!(
edit_request("change the lighting", &history),
expected_edit_request("change the lighting", &["data:image/png;base64,user"])
);
}
#[test]
fn edit_reuses_images_from_prior_standalone_imagegen_calls() {
let history = vec![
ResponseItem::FunctionCall {
id: None,
name: IMAGEGEN_TOOL_NAME.to_string(),
namespace: Some(IMAGE_GEN_NAMESPACE.to_string()),
arguments: "{}".to_string(),
call_id: "imagegen-1".to_string(),
},
generated_function_output("imagegen-1", "standalone"),
];
assert_eq!(
edit_request("change the lighting", &history),
expected_edit_request("change the lighting", &["data:image/png;base64,standalone"])
);
}
#[test]
fn edit_keeps_newest_standalone_generated_images_when_over_limit() {
let history = (1..=6)
.flat_map(|index| {
let call_id = format!("imagegen-{index}");
vec![
ResponseItem::FunctionCall {
id: None,
name: IMAGEGEN_TOOL_NAME.to_string(),
namespace: Some(IMAGE_GEN_NAMESPACE.to_string()),
arguments: "{}".to_string(),
call_id: call_id.clone(),
},
generated_function_output(&call_id, &index.to_string()),
]
})
.collect::<Vec<_>>();
assert_eq!(
edit_request("change the lighting", &history),
expected_edit_request(
"change the lighting",
&[
"data:image/png;base64,2",
"data:image/png;base64,3",
"data:image/png;base64,4",
"data:image/png;base64,5",
"data:image/png;base64,6",
]
)
);
}
#[test]
fn edit_without_image_history_returns_tool_error() {
let error = request_for_action(&args(ImagegenAction::Edit, "change the lighting"), &[])
.expect_err("edit should require image context");
assert_eq!(
error.to_string(),
"image edit requested without any usable image in conversation history"
);
}
#[test]
fn generated_image_output_dir_is_scoped_to_sanitized_thread_id() {
assert_eq!(
generated_image_output_dir(std::path::Path::new("/tmp/codex-home"), "thread/1"),
std::path::PathBuf::from("/tmp/codex-home/generated_images/thread_1")
);
}
fn args(action: ImagegenAction, prompt: &str) -> ImagegenArgs {
ImagegenArgs {
prompt: prompt.to_string(),
action,
}
}
fn edit_request(prompt: &str, history: &[ResponseItem]) -> ImageEditRequest {
let ImageRequest::Edit(request) =
request_for_action(&args(ImagegenAction::Edit, prompt), history)
.expect("edit request should build")
else {
panic!("expected edit request");
};
request
}
fn expected_edit_request(prompt: &str, images: &[&str]) -> ImageEditRequest {
ImageEditRequest {
images: images
.iter()
.map(|image_url| ImageUrl {
image_url: (*image_url).to_string(),
})
.collect(),
prompt: prompt.to_string(),
background: Some(ImageBackground::Auto),
model: "gpt-image-2".to_string(),
n: None,
quality: Some(ImageQuality::Auto),
size: Some("auto".to_string()),
}
}
fn generated_item(result: &str) -> ResponseItem {
ResponseItem::ImageGenerationCall {
id: format!("id-{result}"),
status: "completed".to_string(),
revised_prompt: None,
result: result.to_string(),
}
}
fn generated_function_output(call_id: &str, result: &str) -> ResponseItem {
ResponseItem::FunctionCallOutput {
call_id: call_id.to_string(),
output: FunctionCallOutputPayload {
body: FunctionCallOutputBody::ContentItems(vec![
FunctionCallOutputContentItem::InputImage {
image_url: format!("data:image/png;base64,{result}"),
detail: Some(DEFAULT_IMAGE_DETAIL),
},
FunctionCallOutputContentItem::InputText {
text: "generated image save hint".to_string(),
},
]),
success: Some(true),
},
}
}
fn function_payload() -> ToolPayload {
ToolPayload::Function {
arguments: "{}".to_string(),
}
}

View File

@@ -0,0 +1,395 @@
use std::path::Path;
use std::path::PathBuf;
use base64::Engine;
use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
use codex_api::ImageBackground;
use codex_api::ImageEditRequest;
use codex_api::ImageGenerationRequest;
use codex_api::ImageQuality;
use codex_api::ImageUrl;
use codex_extension_api::FunctionCallError;
use codex_extension_api::ToolCall;
use codex_extension_api::ToolExecutor;
use codex_extension_api::ToolName;
use codex_extension_api::ToolOutput;
use codex_extension_api::ToolPayload;
use codex_extension_api::ToolSpec;
use codex_extension_api::parse_tool_input_schema;
use codex_protocol::models::ContentItem;
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
use codex_protocol::models::FunctionCallOutputBody;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::models::FunctionCallOutputPayload;
use codex_protocol::models::ResponseInputItem;
use codex_protocol::models::ResponseItem;
use codex_tools::ResponsesApiNamespace;
use codex_tools::ResponsesApiNamespaceTool;
use codex_tools::ResponsesApiTool;
use codex_tools::ToolExposure;
use codex_tools::default_namespace_description;
use schemars::JsonSchema;
use schemars::r#gen::SchemaSettings;
use serde::Deserialize;
use serde_json::Map;
use serde_json::Value;
use crate::IMAGE_GEN_NAMESPACE;
use crate::IMAGEGEN_TOOL_NAME;
use crate::backend::CodexImagesBackend;
const IMAGE_MODEL: &str = "gpt-image-2";
const MAX_EDIT_IMAGES: usize = 5;
const IMAGEGEN_DESCRIPTION: &str = include_str!("../imagegen_description.md");
const GENERATED_IMAGE_ARTIFACTS_DIR: &str = "generated_images";
#[derive(Clone)]
pub(crate) struct ImageGenerationTool {
backend: CodexImagesBackend,
output_dir: PathBuf,
}
impl ImageGenerationTool {
/// Creates an image-generation tool backed by an image API executor.
pub(crate) fn new(backend: CodexImagesBackend, output_dir: PathBuf) -> Self {
Self {
backend,
output_dir,
}
}
}
#[derive(Debug, Deserialize, JsonSchema)]
#[serde(deny_unknown_fields)]
struct ImagegenArgs {
prompt: String,
action: ImagegenAction,
}
#[derive(Debug, Deserialize, JsonSchema)]
#[serde(rename_all = "lowercase")]
enum ImagegenAction {
Generate,
Edit,
}
#[async_trait::async_trait]
impl ToolExecutor<ToolCall> for ImageGenerationTool {
/// Keeps the tool in the existing image-generation Responses namespace.
fn tool_name(&self) -> ToolName {
ToolName::namespaced(IMAGE_GEN_NAMESPACE, IMAGEGEN_TOOL_NAME)
}
/// Advertises the model contract: a rewritten prompt and semantic action.
fn spec(&self) -> ToolSpec {
imagegen_tool_spec()
}
/// Keeps this model-facing tool out of the nested code-mode tool surface.
fn exposure(&self) -> ToolExposure {
ToolExposure::DirectModelOnly
}
/// Executes the selected image operation and returns the completed image result.
async fn handle(&self, call: ToolCall) -> Result<Box<dyn ToolOutput>, FunctionCallError> {
let args = parse_args(&call)?;
let request = request_for_action(&args, call.conversation_history.items())?;
let response = match request {
ImageRequest::Generate(request) => self.backend.generate(request).await,
ImageRequest::Edit(request) => self.backend.edit(request).await,
}
.map_err(|err| {
FunctionCallError::RespondToModel(format!("image generation failed: {err}"))
})?;
let Some(result) = response.data.into_iter().next().map(|data| data.b64_json) else {
return Err(FunctionCallError::RespondToModel(
"image generation returned no image data".to_string(),
));
};
let output_hint =
match persist_generated_image(&self.output_dir, &call.call_id, &result).await {
Ok(output_hint) => Some(output_hint),
Err(err) => {
tracing::warn!(
call_id = %call.call_id,
output_dir = %self.output_dir.display(),
"failed to save generated image: {err}"
);
None
}
};
Ok(Box::new(GeneratedImageOutput {
result,
output_hint,
}))
}
}
#[derive(Debug, PartialEq)]
enum ImageRequest {
Generate(ImageGenerationRequest),
Edit(ImageEditRequest),
}
/// Maps the model-selected action to the fixed image API request parameters.
fn request_for_action(
args: &ImagegenArgs,
history: &[ResponseItem],
) -> Result<ImageRequest, FunctionCallError> {
match args.action {
ImagegenAction::Generate => Ok(ImageRequest::Generate(ImageGenerationRequest {
prompt: args.prompt.clone(),
background: Some(ImageBackground::Auto),
model: IMAGE_MODEL.to_string(),
n: None,
quality: Some(ImageQuality::Auto),
size: Some("auto".to_string()),
})),
ImagegenAction::Edit => {
let images = edit_images(history);
if images.is_empty() {
return Err(FunctionCallError::RespondToModel(
"image edit requested without any usable image in conversation history"
.to_string(),
));
}
Ok(ImageRequest::Edit(ImageEditRequest {
images,
prompt: args.prompt.clone(),
background: Some(ImageBackground::Auto),
model: IMAGE_MODEL.to_string(),
n: None,
quality: Some(ImageQuality::Auto),
size: Some("auto".to_string()),
}))
}
}
}
/// Selects edit context using the hosted imagegen anchor and truncation behavior.
fn edit_images(history: &[ResponseItem]) -> Vec<ImageUrl> {
let latest_uploaded_images = history.iter().enumerate().rev().find_map(|(index, item)| {
let ResponseItem::Message { role, content, .. } = item else {
return None;
};
if role != "user" {
return None;
}
let images = content
.iter()
.filter_map(|item| match item {
ContentItem::InputImage { image_url, .. } => Some(ImageUrl {
image_url: image_url.clone(),
}),
ContentItem::InputText { .. } | ContentItem::OutputText { .. } => None,
})
.collect::<Vec<_>>();
(!images.is_empty()).then_some((index, images))
});
let (user_images, follow_up_start) = latest_uploaded_images
.map_or_else(|| (Vec::new(), 0), |(index, images)| (images, index + 1));
let mut generated_images = Vec::new();
for item in &history[follow_up_start..] {
match item {
ResponseItem::ImageGenerationCall { result, .. } if !result.is_empty() => {
generated_images.push(ImageUrl {
image_url: format!("data:image/png;base64,{result}"),
});
}
ResponseItem::FunctionCallOutput { call_id, output }
if history.iter().any(|item| {
matches!(
item,
ResponseItem::FunctionCall {
name,
namespace: Some(namespace),
call_id: function_call_id,
..
} if function_call_id == call_id
&& name == IMAGEGEN_TOOL_NAME
&& namespace == IMAGE_GEN_NAMESPACE
)
}) =>
{
generated_images.extend(output.content_items().into_iter().flatten().filter_map(
|item| match item {
FunctionCallOutputContentItem::InputImage { image_url, .. } => {
Some(ImageUrl {
image_url: image_url.clone(),
})
}
FunctionCallOutputContentItem::InputText { .. }
| FunctionCallOutputContentItem::EncryptedContent { .. } => None,
},
));
}
ResponseItem::Message { .. }
| ResponseItem::Reasoning { .. }
| ResponseItem::LocalShellCall { .. }
| ResponseItem::FunctionCall { .. }
| ResponseItem::ToolSearchCall { .. }
| ResponseItem::FunctionCallOutput { .. }
| ResponseItem::CustomToolCall { .. }
| ResponseItem::CustomToolCallOutput { .. }
| ResponseItem::ToolSearchOutput { .. }
| ResponseItem::WebSearchCall { .. }
| ResponseItem::ImageGenerationCall { .. }
| ResponseItem::Compaction { .. }
| ResponseItem::CompactionTrigger
| ResponseItem::ContextCompaction { .. }
| ResponseItem::Other => {}
}
}
truncate_images(user_images, generated_images)
}
/// Truncates edit inputs while preserving the newest generated image when possible.
fn truncate_images(
mut user_images: Vec<ImageUrl>,
mut generated_images: Vec<ImageUrl>,
) -> Vec<ImageUrl> {
let mut excess = (user_images.len() + generated_images.len()).saturating_sub(MAX_EDIT_IMAGES);
let drop_generated = excess.min(generated_images.len().saturating_sub(1));
generated_images.drain(..drop_generated);
excess -= drop_generated;
let drop_user = excess.min(user_images.len());
user_images.drain(..drop_user);
excess -= drop_user;
generated_images.drain(..excess);
user_images.extend(generated_images);
user_images
}
/// Parses the strict model-facing arguments for an image-generation call.
fn parse_args(call: &ToolCall) -> Result<ImagegenArgs, FunctionCallError> {
serde_json::from_str(call.function_arguments()?)
.map_err(|err| FunctionCallError::RespondToModel(err.to_string()))
}
/// Resolves where generated images for one thread are persisted by the extension.
pub(crate) fn generated_image_output_dir(codex_home: &Path, thread_id: &str) -> PathBuf {
codex_home
.join(GENERATED_IMAGE_ARTIFACTS_DIR)
.join(sanitize_path_component(thread_id))
}
fn generated_image_output_path(output_dir: &Path, call_id: &str) -> PathBuf {
output_dir.join(format!("{}.png", sanitize_path_component(call_id)))
}
fn sanitize_path_component(value: &str) -> String {
let sanitized: String = value
.chars()
.map(|ch| {
if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' {
ch
} else {
'_'
}
})
.collect();
if sanitized.is_empty() {
"generated_image".to_string()
} else {
sanitized
}
}
async fn persist_generated_image(
output_dir: &Path,
call_id: &str,
result: &str,
) -> Result<String, String> {
let bytes = BASE64_STANDARD
.decode(result.trim().as_bytes())
.map_err(|err| format!("invalid image generation payload: {err}"))?;
tokio::fs::create_dir_all(output_dir)
.await
.map_err(|err| err.to_string())?;
tokio::fs::write(generated_image_output_path(output_dir, call_id), bytes)
.await
.map_err(|err| err.to_string())?;
Ok(format!(
"Generated images are saved to {} as {} by default.\n\
If you need to use a generated image at another path, copy it and leave the original in place unless the user explicitly asks you to delete it.",
output_dir.display(),
generated_image_output_path(output_dir, call_id).display(),
))
}
/// Builds the namespace function schema exposed to the model.
fn imagegen_tool_spec() -> ToolSpec {
let mut schema_value = serde_json::to_value(
SchemaSettings::draft2019_09()
.with(|settings| settings.inline_subschemas = true)
.into_generator()
.into_root_schema_for::<ImagegenArgs>(),
)
.unwrap_or_else(|err| panic!("imagegen schema should serialize: {err}"));
let Value::Object(ref mut schema) = schema_value else {
unreachable!("imagegen root schema must be an object");
};
let mut input_schema = Map::new();
for key in ["properties", "required", "type", "additionalProperties"] {
if let Some(value) = schema.remove(key) {
input_schema.insert(key.to_string(), value);
}
}
ToolSpec::Namespace(ResponsesApiNamespace {
name: IMAGE_GEN_NAMESPACE.to_string(),
description: default_namespace_description(IMAGE_GEN_NAMESPACE),
tools: vec![ResponsesApiNamespaceTool::Function(ResponsesApiTool {
name: IMAGEGEN_TOOL_NAME.to_string(),
description: IMAGEGEN_DESCRIPTION.to_string(),
strict: false,
parameters: parse_tool_input_schema(&Value::Object(input_schema))
.unwrap_or_else(|err| panic!("imagegen input schema should parse: {err}")),
output_schema: None,
defer_loading: None,
})],
})
}
struct GeneratedImageOutput {
result: String,
output_hint: Option<String>,
}
impl ToolOutput for GeneratedImageOutput {
/// Avoids copying image bytes into tool-call telemetry.
fn log_preview(&self) -> String {
"[generated image]".to_string()
}
/// Reports a completed images request as successful tool execution.
fn success_for_logging(&self) -> bool {
true
}
/// Returns generated bytes and persisted-artifact context for the model's follow-up response.
fn to_response_item(&self, call_id: &str, _payload: &ToolPayload) -> ResponseInputItem {
let mut content = vec![FunctionCallOutputContentItem::InputImage {
image_url: format!("data:image/png;base64,{}", self.result),
detail: Some(DEFAULT_IMAGE_DETAIL),
}];
if let Some(output_hint) = &self.output_hint {
content.push(FunctionCallOutputContentItem::InputText {
text: output_hint.clone(),
});
}
ResponseInputItem::FunctionCallOutput {
call_id: call_id.to_string(),
output: FunctionCallOutputPayload {
body: FunctionCallOutputBody::ContentItems(content),
success: Some(true),
},
}
}
}
#[cfg(test)]
#[path = "tests.rs"]
mod tests;

View File

@@ -170,6 +170,8 @@ pub enum Feature {
ExternalMigration,
/// Allow the model to invoke the built-in image generation tool.
ImageGeneration,
/// Replace hosted image generation with the standalone image-generation extension.
ImageGenExt,
/// Allow prompting and installing missing MCP dependencies.
SkillMcpDependencyInstall,
/// Removed compatibility flag for deleted skill env var dependency prompting.
@@ -1053,6 +1055,12 @@ pub const FEATURES: &[FeatureSpec] = &[
stage: Stage::Stable,
default_enabled: true,
},
FeatureSpec {
id: Feature::ImageGenExt,
key: "imagegenext",
stage: Stage::UnderDevelopment,
default_enabled: false,
},
FeatureSpec {
id: Feature::SkillMcpDependencyInstall,
key: "skill_mcp_dependency_install",

View File

@@ -244,6 +244,13 @@ fn image_generation_is_stable_and_enabled_by_default() {
assert_eq!(Feature::ImageGeneration.default_enabled(), true);
}
#[test]
fn image_generation_extension_is_under_development_and_disabled_by_default() {
assert_eq!(Feature::ImageGenExt.stage(), Stage::UnderDevelopment);
assert_eq!(Feature::ImageGenExt.default_enabled(), false);
assert_eq!(feature_for_key("imagegenext"), Some(Feature::ImageGenExt));
}
#[test]
fn use_legacy_landlock_config_records_deprecation_notice() {
let mut entries = BTreeMap::new();