mirror of
https://github.com/openai/codex.git
synced 2026-06-02 11:22:01 +00:00
Compare commits
5 Commits
xli-codex/
...
won-standa
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2455a3b0ff | ||
|
|
831758a2c0 | ||
|
|
53a566d88a | ||
|
|
2e9606e804 | ||
|
|
bf947390ba |
1
codex-rs/Cargo.lock
generated
1
codex-rs/Cargo.lock
generated
@@ -3042,6 +3042,7 @@ dependencies = [
|
||||
"codex-model-provider-info",
|
||||
"codex-protocol",
|
||||
"codex-tools",
|
||||
"codex-utils-absolute-path",
|
||||
"http 1.4.0",
|
||||
"pretty_assertions",
|
||||
"schemars 0.8.22",
|
||||
|
||||
220
codex-rs/app-server/tests/suite/v2/image_generation.rs
Normal file
220
codex-rs/app-server/tests/suite/v2/image_generation.rs
Normal file
@@ -0,0 +1,220 @@
|
||||
use std::path::Path;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use app_test_support::ChatGptAuthFixture;
|
||||
use app_test_support::McpProcess;
|
||||
use app_test_support::to_response;
|
||||
use app_test_support::write_chatgpt_auth;
|
||||
use codex_app_server_protocol::ItemCompletedNotification;
|
||||
use codex_app_server_protocol::JSONRPCResponse;
|
||||
use codex_app_server_protocol::RequestId;
|
||||
use codex_app_server_protocol::ThreadItem;
|
||||
use codex_app_server_protocol::ThreadStartParams;
|
||||
use codex_app_server_protocol::ThreadStartResponse;
|
||||
use codex_app_server_protocol::TurnStartParams;
|
||||
use codex_app_server_protocol::TurnStartResponse;
|
||||
use codex_app_server_protocol::UserInput as V2UserInput;
|
||||
use codex_config::types::AuthCredentialsStoreMode;
|
||||
use core_test_support::responses;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::json;
|
||||
use tempfile::TempDir;
|
||||
use tokio::time::timeout;
|
||||
use wiremock::Mock;
|
||||
use wiremock::MockServer;
|
||||
use wiremock::ResponseTemplate;
|
||||
use wiremock::matchers::method;
|
||||
use wiremock::matchers::path;
|
||||
|
||||
const RESULT: &str = "cG5n";
|
||||
|
||||
// macOS and Windows Bazel CI can spend tens of seconds starting app-server
|
||||
// subprocesses or processing test RPCs under load.
|
||||
#[cfg(any(target_os = "macos", windows))]
|
||||
const DEFAULT_READ_TIMEOUT: Duration = Duration::from_secs(60);
|
||||
#[cfg(not(any(target_os = "macos", windows)))]
|
||||
const DEFAULT_READ_TIMEOUT: Duration = Duration::from_secs(10);
|
||||
|
||||
#[tokio::test]
|
||||
async fn standalone_image_generation_returns_saved_path_hint_to_model() -> Result<()> {
|
||||
let call_id = "image-run-1";
|
||||
let server = responses::start_mock_server().await;
|
||||
mount_image_response(&server).await;
|
||||
|
||||
let response_mock = responses::mount_sse_sequence(
|
||||
&server,
|
||||
vec![
|
||||
responses::sse(vec![
|
||||
responses::ev_response_created("resp-1"),
|
||||
responses::ev_function_call_with_namespace(
|
||||
call_id,
|
||||
"image_gen",
|
||||
"imagegen",
|
||||
&json!({
|
||||
"action": "generate",
|
||||
"prompt": "paint a blue whale",
|
||||
})
|
||||
.to_string(),
|
||||
),
|
||||
responses::ev_completed("resp-1"),
|
||||
]),
|
||||
responses::sse(vec![
|
||||
responses::ev_assistant_message("msg-1", "Done"),
|
||||
responses::ev_completed("resp-2"),
|
||||
]),
|
||||
],
|
||||
)
|
||||
.await;
|
||||
|
||||
let codex_home = TempDir::new()?;
|
||||
create_config_toml(codex_home.path(), &server.uri())?;
|
||||
write_chatgpt_auth(
|
||||
codex_home.path(),
|
||||
ChatGptAuthFixture::new("access-chatgpt"),
|
||||
AuthCredentialsStoreMode::File,
|
||||
)?;
|
||||
|
||||
let mut mcp = McpProcess::new_with_env(codex_home.path(), &[("OPENAI_API_KEY", None)]).await?;
|
||||
timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
|
||||
|
||||
let thread_req = mcp
|
||||
.send_thread_start_request(ThreadStartParams::default())
|
||||
.await?;
|
||||
let thread_resp: JSONRPCResponse = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp.read_stream_until_response_message(RequestId::Integer(thread_req)),
|
||||
)
|
||||
.await??;
|
||||
let ThreadStartResponse { thread, .. } = to_response::<ThreadStartResponse>(thread_resp)?;
|
||||
|
||||
let turn_req = mcp
|
||||
.send_turn_start_request(TurnStartParams {
|
||||
thread_id: thread.id,
|
||||
client_user_message_id: None,
|
||||
input: vec![V2UserInput::Text {
|
||||
text: "Generate an image".to_string(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
let turn_resp: JSONRPCResponse = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp.read_stream_until_response_message(RequestId::Integer(turn_req)),
|
||||
)
|
||||
.await??;
|
||||
let _turn: TurnStartResponse = to_response::<TurnStartResponse>(turn_resp)?;
|
||||
|
||||
let completed = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
wait_for_image_generation_completed(&mut mcp),
|
||||
)
|
||||
.await??;
|
||||
timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp.read_stream_until_notification_message("turn/completed"),
|
||||
)
|
||||
.await??;
|
||||
|
||||
let ThreadItem::ImageGeneration {
|
||||
status,
|
||||
revised_prompt,
|
||||
result,
|
||||
saved_path: Some(saved_path),
|
||||
..
|
||||
} = completed.item
|
||||
else {
|
||||
panic!("expected completed image generation item with saved path");
|
||||
};
|
||||
assert_eq!(status, "completed");
|
||||
assert_eq!(revised_prompt.as_deref(), Some("paint a blue whale"));
|
||||
assert_eq!(result, RESULT);
|
||||
assert_eq!(std::fs::read(&saved_path)?, b"png");
|
||||
|
||||
let requests = response_mock.requests();
|
||||
assert_eq!(requests.len(), 2);
|
||||
let output = requests[1].function_call_output(call_id);
|
||||
assert_eq!(
|
||||
output["output"][0],
|
||||
json!({
|
||||
"type": "input_image",
|
||||
"image_url": format!("data:image/png;base64,{RESULT}"),
|
||||
"detail": "high",
|
||||
})
|
||||
);
|
||||
let output_hint = output["output"][1]["text"]
|
||||
.as_str()
|
||||
.context("image output should include model-visible path hint")?;
|
||||
assert!(
|
||||
output_hint.contains(&saved_path.display().to_string()),
|
||||
"output hint should identify the path core saved"
|
||||
);
|
||||
assert!(
|
||||
!requests[1]
|
||||
.message_input_texts("developer")
|
||||
.iter()
|
||||
.any(|text| text.contains("Generated images are saved to")),
|
||||
"standalone image generation should not emit the legacy developer-message hint"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn wait_for_image_generation_completed(
|
||||
mcp: &mut McpProcess,
|
||||
) -> Result<ItemCompletedNotification> {
|
||||
loop {
|
||||
let notification = mcp
|
||||
.read_stream_until_notification_message("item/completed")
|
||||
.await?;
|
||||
let completed: ItemCompletedNotification = serde_json::from_value(
|
||||
notification
|
||||
.params
|
||||
.context("item/completed notification should include params")?,
|
||||
)?;
|
||||
if matches!(&completed.item, ThreadItem::ImageGeneration { .. }) {
|
||||
return Ok(completed);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn mount_image_response(server: &MockServer) {
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/api/codex/images/generations"))
|
||||
.respond_with(ResponseTemplate::new(200).set_body_json(json!({
|
||||
"created": 1,
|
||||
"data": [{"b64_json": RESULT}],
|
||||
})))
|
||||
.expect(1)
|
||||
.mount(server)
|
||||
.await;
|
||||
}
|
||||
|
||||
fn create_config_toml(codex_home: &Path, server_uri: &str) -> std::io::Result<()> {
|
||||
std::fs::write(
|
||||
codex_home.join("config.toml"),
|
||||
format!(
|
||||
r#"
|
||||
model = "mock-model"
|
||||
approval_policy = "never"
|
||||
sandbox_mode = "read-only"
|
||||
model_provider = "openai-custom"
|
||||
chatgpt_base_url = "{server_uri}"
|
||||
|
||||
[features]
|
||||
imagegenext = true
|
||||
|
||||
[model_providers.openai-custom]
|
||||
name = "OpenAI"
|
||||
base_url = "{server_uri}/api/codex"
|
||||
wire_api = "responses"
|
||||
request_max_retries = 0
|
||||
stream_max_retries = 0
|
||||
supports_websockets = false
|
||||
requires_openai_auth = true
|
||||
"#
|
||||
),
|
||||
)
|
||||
}
|
||||
@@ -17,6 +17,7 @@ mod experimental_feature_list;
|
||||
mod external_agent_config;
|
||||
mod fs;
|
||||
mod hooks_list;
|
||||
mod image_generation;
|
||||
mod initialize;
|
||||
mod marketplace_add;
|
||||
mod marketplace_remove;
|
||||
|
||||
@@ -1,6 +1,16 @@
|
||||
use super::ContextualUserFragment;
|
||||
use std::fmt::Display;
|
||||
|
||||
/// Returns the model-facing hint for the host's generated-image artifact path.
|
||||
pub fn image_generation_output_hint(
|
||||
image_output_dir: impl Display,
|
||||
image_output_path: impl Display,
|
||||
) -> String {
|
||||
format!(
|
||||
"Generated images are saved to {image_output_dir} as {image_output_path} by default.\nIf you need to use a generated image at another path, copy it and leave the original in place unless the user explicitly asks you to delete it."
|
||||
)
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub(crate) struct ImageGenerationInstructions {
|
||||
image_output_dir: String,
|
||||
@@ -30,9 +40,6 @@ impl ContextualUserFragment for ImageGenerationInstructions {
|
||||
}
|
||||
|
||||
fn body(&self) -> String {
|
||||
format!(
|
||||
"Generated images are saved to {} as {} by default.\nIf you need to use a generated image at another path, copy it and leave the original in place unless the user explicitly asks you to delete it.",
|
||||
self.image_output_dir, self.image_output_path
|
||||
)
|
||||
image_generation_output_hint(&self.image_output_dir, &self.image_output_path)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -46,6 +46,7 @@ pub(crate) use fragments::AdditionalContextUserFragment;
|
||||
pub(crate) use guardian_followup_review_reminder::GuardianFollowupReviewReminder;
|
||||
pub(crate) use hook_additional_context::HookAdditionalContext;
|
||||
pub(crate) use image_generation_instructions::ImageGenerationInstructions;
|
||||
pub use image_generation_instructions::image_generation_output_hint;
|
||||
pub use internal_model_context::InternalContextSource;
|
||||
pub use internal_model_context::InternalModelContextFragment;
|
||||
pub use internal_model_context::InvalidInternalContextSource;
|
||||
|
||||
@@ -97,6 +97,7 @@ pub(crate) use skills::manager;
|
||||
pub(crate) use skills::maybe_emit_implicit_skill_invocation;
|
||||
pub(crate) use skills::skills_load_input_from_config;
|
||||
mod stream_events_utils;
|
||||
pub use stream_events_utils::image_generation_artifact_path;
|
||||
pub mod test_support;
|
||||
mod unified_exec;
|
||||
pub mod windows_sandbox;
|
||||
|
||||
@@ -38,7 +38,8 @@ use tracing::warn;
|
||||
|
||||
const GENERATED_IMAGE_ARTIFACTS_DIR: &str = "generated_images";
|
||||
|
||||
pub(crate) fn image_generation_artifact_path(
|
||||
/// Returns the host-owned default artifact path for a generated image.
|
||||
pub fn image_generation_artifact_path(
|
||||
codex_home: &AbsolutePathBuf,
|
||||
session_id: &str,
|
||||
call_id: &str,
|
||||
@@ -131,6 +132,7 @@ pub(crate) async fn persist_image_generation_item(
|
||||
turn_context: &TurnContext,
|
||||
image_item: &mut ImageGenerationItem,
|
||||
) -> Option<AbsolutePathBuf> {
|
||||
image_item.saved_path = None;
|
||||
let session_id = sess.conversation_id.to_string();
|
||||
match save_image_generation_result(
|
||||
&turn_context.config.codex_home,
|
||||
@@ -163,15 +165,12 @@ pub(crate) async fn persist_image_generation_item(
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn finalize_image_generation_item(
|
||||
async fn record_image_generation_instructions(
|
||||
sess: &Session,
|
||||
turn_context: &TurnContext,
|
||||
image_item: &mut ImageGenerationItem,
|
||||
image_item: &ImageGenerationItem,
|
||||
) {
|
||||
if persist_image_generation_item(sess, turn_context, image_item)
|
||||
.await
|
||||
.is_none()
|
||||
{
|
||||
if image_item.saved_path.is_none() {
|
||||
return;
|
||||
}
|
||||
let session_id = sess.conversation_id.to_string();
|
||||
@@ -530,27 +529,16 @@ pub(crate) async fn handle_non_tool_response_item(
|
||||
| ResponseItem::WebSearchCall { .. }
|
||||
| ResponseItem::ImageGenerationCall { .. } => {
|
||||
let mut turn_item = parse_turn_item(item)?;
|
||||
if let TurnItemContributorPolicy::Run(turn_store) = contributor_policy {
|
||||
apply_turn_item_contributors(sess, turn_store, &mut turn_item).await;
|
||||
}
|
||||
if let TurnItem::AgentMessage(agent_message) = &mut turn_item {
|
||||
let combined = agent_message
|
||||
.content
|
||||
.iter()
|
||||
.map(|entry| match entry {
|
||||
codex_protocol::items::AgentMessageContent::Text { text } => text.as_str(),
|
||||
})
|
||||
.collect::<String>();
|
||||
let (stripped, memory_citation) =
|
||||
strip_hidden_assistant_markup_and_parse_memory_citation(&combined, plan_mode);
|
||||
agent_message.content =
|
||||
vec![codex_protocol::items::AgentMessageContent::Text { text: stripped }];
|
||||
if agent_message.memory_citation.is_none() {
|
||||
agent_message.memory_citation = memory_citation;
|
||||
}
|
||||
}
|
||||
if let TurnItem::ImageGeneration(image_item) = &mut turn_item {
|
||||
finalize_image_generation_item(sess, turn_context, image_item).await;
|
||||
finalize_turn_item(
|
||||
sess,
|
||||
turn_context,
|
||||
contributor_policy,
|
||||
&mut turn_item,
|
||||
plan_mode,
|
||||
)
|
||||
.await;
|
||||
if let TurnItem::ImageGeneration(image_item) = &turn_item {
|
||||
record_image_generation_instructions(sess, turn_context, image_item).await;
|
||||
}
|
||||
Some(turn_item)
|
||||
}
|
||||
@@ -564,6 +552,37 @@ pub(crate) async fn handle_non_tool_response_item(
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn finalize_turn_item(
|
||||
sess: &Session,
|
||||
turn_context: &TurnContext,
|
||||
contributor_policy: TurnItemContributorPolicy<'_>,
|
||||
turn_item: &mut TurnItem,
|
||||
plan_mode: bool,
|
||||
) {
|
||||
if let TurnItemContributorPolicy::Run(turn_store) = contributor_policy {
|
||||
apply_turn_item_contributors(sess, turn_store, turn_item).await;
|
||||
}
|
||||
if let TurnItem::AgentMessage(agent_message) = &mut *turn_item {
|
||||
let combined = agent_message
|
||||
.content
|
||||
.iter()
|
||||
.map(|entry| match entry {
|
||||
codex_protocol::items::AgentMessageContent::Text { text } => text.as_str(),
|
||||
})
|
||||
.collect::<String>();
|
||||
let (stripped, memory_citation) =
|
||||
strip_hidden_assistant_markup_and_parse_memory_citation(&combined, plan_mode);
|
||||
agent_message.content =
|
||||
vec![codex_protocol::items::AgentMessageContent::Text { text: stripped }];
|
||||
if agent_message.memory_citation.is_none() {
|
||||
agent_message.memory_citation = memory_citation;
|
||||
}
|
||||
}
|
||||
if let TurnItem::ImageGeneration(image_item) = &mut *turn_item {
|
||||
persist_image_generation_item(sess, turn_context, image_item).await;
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn last_assistant_message_from_item(
|
||||
item: &ResponseItem,
|
||||
plan_mode: bool,
|
||||
|
||||
@@ -4,19 +4,17 @@ use std::sync::Weak;
|
||||
use codex_protocol::items::TurnItem;
|
||||
use codex_tools::ConversationHistory;
|
||||
use codex_tools::ExtensionTurnItem;
|
||||
use codex_tools::ImageGenerationCompletionFuture;
|
||||
use codex_tools::ToolCall as ExtensionToolCall;
|
||||
use codex_tools::ToolName;
|
||||
use codex_tools::ToolSpec;
|
||||
use codex_tools::TurnItemEmissionFuture;
|
||||
use codex_tools::TurnItemEmitter;
|
||||
|
||||
use crate::context::ContextualUserFragment;
|
||||
use crate::context::ImageGenerationInstructions;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::session::session::Session;
|
||||
use crate::session::turn_context::TurnContext;
|
||||
use crate::stream_events_utils::persist_image_generation_item;
|
||||
use crate::stream_events_utils::TurnItemContributorPolicy;
|
||||
use crate::stream_events_utils::finalize_turn_item;
|
||||
use crate::tools::context::ToolInvocation;
|
||||
use crate::tools::context::ToolOutput;
|
||||
use crate::tools::context::ToolPayload;
|
||||
@@ -71,6 +69,10 @@ struct CoreTurnItemEmitter {
|
||||
fn extension_turn_item(item: ExtensionTurnItem) -> TurnItem {
|
||||
match item {
|
||||
ExtensionTurnItem::WebSearch(item) => TurnItem::WebSearch(item),
|
||||
ExtensionTurnItem::ImageGeneration(mut item) => {
|
||||
item.saved_path = None;
|
||||
TurnItem::ImageGeneration(item)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -80,8 +82,9 @@ impl TurnItemEmitter for CoreTurnItemEmitter {
|
||||
let (Some(session), Some(turn)) = (self.session.upgrade(), self.turn.upgrade()) else {
|
||||
return;
|
||||
};
|
||||
let item = extension_turn_item(item);
|
||||
session.emit_turn_item_started(turn.as_ref(), &item).await;
|
||||
session
|
||||
.emit_turn_item_started(turn.as_ref(), &extension_turn_item(item))
|
||||
.await;
|
||||
})
|
||||
}
|
||||
|
||||
@@ -90,60 +93,25 @@ impl TurnItemEmitter for CoreTurnItemEmitter {
|
||||
let (Some(session), Some(turn)) = (self.session.upgrade(), self.turn.upgrade()) else {
|
||||
return;
|
||||
};
|
||||
let item = extension_turn_item(item);
|
||||
let mut item = extension_turn_item(item);
|
||||
finalize_turn_item(
|
||||
session.as_ref(),
|
||||
turn.as_ref(),
|
||||
TurnItemContributorPolicy::Run(turn.extension_data.as_ref()),
|
||||
&mut item,
|
||||
turn.collaboration_mode.mode == codex_protocol::config_types::ModeKind::Plan,
|
||||
)
|
||||
.await;
|
||||
session.emit_turn_item_completed(turn.as_ref(), item).await;
|
||||
})
|
||||
}
|
||||
|
||||
fn image_generation_completed<'a>(
|
||||
&'a self,
|
||||
call_id: String,
|
||||
prompt: String,
|
||||
result: String,
|
||||
) -> ImageGenerationCompletionFuture<'a> {
|
||||
Box::pin(async move {
|
||||
let (Some(session), Some(turn)) = (self.session.upgrade(), self.turn.upgrade()) else {
|
||||
return None;
|
||||
};
|
||||
let mut item = codex_protocol::items::ImageGenerationItem {
|
||||
id: call_id,
|
||||
status: "completed".to_string(),
|
||||
revised_prompt: Some(prompt),
|
||||
result,
|
||||
saved_path: None,
|
||||
};
|
||||
let output_hint =
|
||||
persist_image_generation_item(session.as_ref(), turn.as_ref(), &mut item)
|
||||
.await
|
||||
.map(|saved_path| {
|
||||
let output_dir = saved_path
|
||||
.parent()
|
||||
.unwrap_or_else(|| turn.config.codex_home.clone());
|
||||
ImageGenerationInstructions::new(output_dir.display(), saved_path.display())
|
||||
.body()
|
||||
});
|
||||
let started_item = codex_protocol::items::ImageGenerationItem {
|
||||
id: item.id.clone(),
|
||||
status: "in_progress".to_string(),
|
||||
revised_prompt: None,
|
||||
result: String::new(),
|
||||
saved_path: None,
|
||||
};
|
||||
session
|
||||
.emit_turn_item_started(turn.as_ref(), &TurnItem::ImageGeneration(started_item))
|
||||
.await;
|
||||
session
|
||||
.emit_turn_item_completed(turn.as_ref(), TurnItem::ImageGeneration(item))
|
||||
.await;
|
||||
output_hint
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
async fn to_extension_call(invocation: &ToolInvocation) -> ExtensionToolCall {
|
||||
let conversation_history =
|
||||
ConversationHistory::new(invocation.session.clone_history().await.into_raw_items());
|
||||
ExtensionToolCall {
|
||||
thread_id: invocation.session.conversation_id.to_string(),
|
||||
turn_id: invocation.turn.sub_id.clone(),
|
||||
call_id: invocation.call_id.clone(),
|
||||
tool_name: invocation.tool_name.clone(),
|
||||
@@ -161,6 +129,8 @@ async fn to_extension_call(invocation: &ToolInvocation) -> ExtensionToolCall {
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use codex_extension_api::ExtensionData;
|
||||
use codex_extension_api::TurnItemContributor;
|
||||
use codex_protocol::items::TurnItem;
|
||||
use codex_protocol::items::WebSearchItem;
|
||||
use codex_protocol::models::ContentItem;
|
||||
@@ -168,10 +138,13 @@ mod tests {
|
||||
use codex_protocol::models::WebSearchAction;
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
use codex_tools::ExtensionTurnItem;
|
||||
use codex_utils_absolute_path::test_support::PathExt;
|
||||
use codex_utils_absolute_path::test_support::test_path_buf;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::json;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
use super::CoreTurnItemEmitter;
|
||||
use super::ExtensionToolAdapter;
|
||||
use crate::tools::context::ToolCallSource;
|
||||
use crate::tools::context::ToolInvocation;
|
||||
@@ -306,6 +279,7 @@ mod tests {
|
||||
let (session, turn, rx) = crate::session::tests::make_session_and_context_with_rx().await;
|
||||
let weak_session = Arc::downgrade(&session);
|
||||
let weak_turn = Arc::downgrade(&turn);
|
||||
let thread_id = session.conversation_id.to_string();
|
||||
let turn_id = turn.sub_id.clone();
|
||||
let truncation_policy = turn.truncation_policy;
|
||||
let history_item = ResponseItem::Message {
|
||||
@@ -344,6 +318,7 @@ mod tests {
|
||||
let captured_call = captured_call.lock().await.clone().expect("captured call");
|
||||
assert!(weak_session.upgrade().is_none());
|
||||
assert!(weak_turn.upgrade().is_none());
|
||||
assert_eq!(captured_call.thread_id, thread_id);
|
||||
assert_eq!(captured_call.turn_id, turn_id);
|
||||
assert_eq!(captured_call.call_id, "call-extension");
|
||||
assert_eq!(
|
||||
@@ -401,8 +376,54 @@ mod tests {
|
||||
assert_eq!(end.action, expected.action);
|
||||
}
|
||||
|
||||
struct ImageGenerationExtensionExecutor {
|
||||
output_hint: Arc<Mutex<Option<String>>>,
|
||||
struct ImageGenerationExtensionExecutor;
|
||||
|
||||
#[derive(Debug)]
|
||||
struct ExtensionTurnItemContributorRan;
|
||||
|
||||
struct RecordExtensionTurnItemContributor;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl TurnItemContributor for RecordExtensionTurnItemContributor {
|
||||
async fn contribute(
|
||||
&self,
|
||||
_thread_store: &ExtensionData,
|
||||
turn_store: &ExtensionData,
|
||||
_item: &mut TurnItem,
|
||||
) -> Result<(), String> {
|
||||
turn_store.insert(ExtensionTurnItemContributorRan);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn extension_completion_runs_turn_item_contributors() {
|
||||
let (mut session, turn) = crate::session::tests::make_session_and_context().await;
|
||||
let mut builder = codex_extension_api::ExtensionRegistryBuilder::new();
|
||||
builder.turn_item_contributor(Arc::new(RecordExtensionTurnItemContributor));
|
||||
session.services.extensions = Arc::new(builder.build());
|
||||
let session = Arc::new(session);
|
||||
let turn = Arc::new(turn);
|
||||
let emitter = CoreTurnItemEmitter {
|
||||
session: Arc::downgrade(&session),
|
||||
turn: Arc::downgrade(&turn),
|
||||
};
|
||||
|
||||
codex_tools::TurnItemEmitter::emit_completed(
|
||||
&emitter,
|
||||
ExtensionTurnItem::WebSearch(WebSearchItem {
|
||||
id: "search-1".to_string(),
|
||||
query: "contributors".to_string(),
|
||||
action: WebSearchAction::Other,
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
|
||||
assert!(
|
||||
turn.extension_data
|
||||
.get::<ExtensionTurnItemContributorRan>()
|
||||
.is_some()
|
||||
);
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
@@ -426,15 +447,28 @@ mod tests {
|
||||
&self,
|
||||
call: codex_tools::ToolCall,
|
||||
) -> Result<Box<dyn codex_tools::ToolOutput>, codex_tools::FunctionCallError> {
|
||||
let output_hint = call
|
||||
.turn_item_emitter
|
||||
.image_generation_completed(
|
||||
call.call_id,
|
||||
"A tiny blue square".to_string(),
|
||||
"cG5n".to_string(),
|
||||
)
|
||||
call.turn_item_emitter
|
||||
.emit_started(ExtensionTurnItem::ImageGeneration(
|
||||
codex_protocol::items::ImageGenerationItem {
|
||||
id: call.call_id.clone(),
|
||||
status: "in_progress".to_string(),
|
||||
revised_prompt: None,
|
||||
result: String::new(),
|
||||
saved_path: None,
|
||||
},
|
||||
))
|
||||
.await;
|
||||
call.turn_item_emitter
|
||||
.emit_completed(ExtensionTurnItem::ImageGeneration(
|
||||
codex_protocol::items::ImageGenerationItem {
|
||||
id: call.call_id,
|
||||
status: "completed".to_string(),
|
||||
revised_prompt: Some("A tiny blue square".to_string()),
|
||||
result: "cG5n".to_string(),
|
||||
saved_path: Some(test_path_buf("/tmp/extension-claimed.png").abs()),
|
||||
},
|
||||
))
|
||||
.await;
|
||||
*self.output_hint.lock().await = output_hint;
|
||||
Ok(Box::new(codex_tools::JsonToolOutput::new(
|
||||
json!({ "ok": true }),
|
||||
)))
|
||||
@@ -443,10 +477,7 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn image_generation_publication_is_finalized_by_core() {
|
||||
let output_hint = Arc::new(Mutex::new(None));
|
||||
let handler = ExtensionToolAdapter::new(Arc::new(ImageGenerationExtensionExecutor {
|
||||
output_hint: Arc::clone(&output_hint),
|
||||
}));
|
||||
let handler = ExtensionToolAdapter::new(Arc::new(ImageGenerationExtensionExecutor));
|
||||
let (session, turn, rx) = crate::session::tests::make_session_and_context_with_rx().await;
|
||||
let expected_path = crate::stream_events_utils::image_generation_artifact_path(
|
||||
&turn.config.codex_home,
|
||||
@@ -513,17 +544,5 @@ mod tests {
|
||||
std::fs::read(&expected_path).expect("generated artifact should be saved"),
|
||||
b"png"
|
||||
);
|
||||
assert_eq!(
|
||||
*output_hint.lock().await,
|
||||
Some(format!(
|
||||
"Generated images are saved to {} as {} by default.\n\
|
||||
If you need to use a generated image at another path, copy it and leave the original in place unless the user explicitly asks you to delete it.",
|
||||
expected_path
|
||||
.parent()
|
||||
.expect("generated image path should have a parent")
|
||||
.display(),
|
||||
expected_path.display(),
|
||||
))
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,7 +13,6 @@ pub use capabilities::ResponseItemInjector;
|
||||
pub use codex_tools::ConversationHistory;
|
||||
pub use codex_tools::ExtensionTurnItem;
|
||||
pub use codex_tools::FunctionCallError;
|
||||
pub use codex_tools::ImageGenerationCompletionFuture;
|
||||
pub use codex_tools::JsonToolOutput;
|
||||
pub use codex_tools::NoopTurnItemEmitter;
|
||||
pub use codex_tools::ResponsesApiTool;
|
||||
|
||||
@@ -1126,6 +1126,7 @@ fn tool_by_name<'a>(
|
||||
|
||||
fn tool_call(tool_name: &str, call_id: &str, arguments: serde_json::Value) -> ToolCall {
|
||||
ToolCall {
|
||||
thread_id: "thread-1".to_string(),
|
||||
turn_id: "turn-1".to_string(),
|
||||
call_id: call_id.to_string(),
|
||||
tool_name: codex_extension_api::ToolName::plain(tool_name),
|
||||
|
||||
@@ -23,6 +23,7 @@ codex-model-provider = { workspace = true }
|
||||
codex-model-provider-info = { workspace = true }
|
||||
codex-protocol = { workspace = true }
|
||||
codex-tools = { workspace = true }
|
||||
codex-utils-absolute-path = { workspace = true }
|
||||
http = { workspace = true }
|
||||
schemars = { workspace = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
|
||||
@@ -13,6 +13,7 @@ use codex_features::Feature;
|
||||
use codex_login::AuthManager;
|
||||
use codex_model_provider::create_model_provider;
|
||||
use codex_model_provider_info::ModelProviderInfo;
|
||||
use codex_utils_absolute_path::AbsolutePathBuf;
|
||||
|
||||
use crate::backend::CodexImagesBackend;
|
||||
use crate::tool::ImageGenerationTool;
|
||||
@@ -26,6 +27,7 @@ struct ImageGenerationExtension {
|
||||
struct ImageGenerationExtensionConfig {
|
||||
enabled: bool,
|
||||
provider: ModelProviderInfo,
|
||||
codex_home: AbsolutePathBuf,
|
||||
}
|
||||
|
||||
impl From<&Config> for ImageGenerationExtensionConfig {
|
||||
@@ -35,6 +37,7 @@ impl From<&Config> for ImageGenerationExtensionConfig {
|
||||
enabled: config.features.enabled(Feature::ImageGenExt)
|
||||
&& config.model_provider.is_openai(),
|
||||
provider: config.model_provider.clone(),
|
||||
codex_home: config.codex_home.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -76,9 +79,13 @@ impl ToolContributor for ImageGenerationExtension {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
vec![Arc::new(ImageGenerationTool::new(CodexImagesBackend::new(
|
||||
create_model_provider(config.provider.clone(), Some(self.auth_manager.clone())),
|
||||
)))]
|
||||
vec![Arc::new(ImageGenerationTool::new(
|
||||
CodexImagesBackend::new(create_model_provider(
|
||||
config.provider.clone(),
|
||||
Some(self.auth_manager.clone()),
|
||||
)),
|
||||
config.codex_home.clone(),
|
||||
))]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ use codex_api::ImageEditRequest;
|
||||
use codex_api::ImageGenerationRequest;
|
||||
use codex_api::ImageQuality;
|
||||
use codex_api::ImageUrl;
|
||||
use codex_core::context::image_generation_output_hint;
|
||||
use codex_extension_api::ToolOutput;
|
||||
use codex_extension_api::ToolPayload;
|
||||
use codex_extension_api::ToolSpec;
|
||||
@@ -26,7 +27,6 @@ use crate::IMAGE_GEN_NAMESPACE;
|
||||
use crate::IMAGEGEN_TOOL_NAME;
|
||||
|
||||
const RESULT: &str = "cG5n";
|
||||
const OUTPUT_HINT: &str = "Generated images are saved to /tmp as /tmp/call-1.png by default.";
|
||||
|
||||
#[test]
|
||||
fn uses_reserved_image_gen_namespace() {
|
||||
@@ -56,9 +56,10 @@ fn generate_uses_fixed_request_defaults() {
|
||||
|
||||
#[test]
|
||||
fn generated_output_returns_image_input_and_output_hint() {
|
||||
let output_hint = image_generation_output_hint("/tmp", "/tmp/call-1.png");
|
||||
let output = GeneratedImageOutput {
|
||||
result: RESULT.to_string(),
|
||||
output_hint: Some(OUTPUT_HINT.to_string()),
|
||||
output_hint: output_hint.clone(),
|
||||
};
|
||||
|
||||
let ResponseInputItem::FunctionCallOutput {
|
||||
@@ -78,9 +79,7 @@ fn generated_output_returns_image_input_and_output_hint() {
|
||||
image_url: format!("data:image/png;base64,{RESULT}"),
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
},
|
||||
FunctionCallOutputContentItem::InputText {
|
||||
text: OUTPUT_HINT.to_string(),
|
||||
},
|
||||
FunctionCallOutputContentItem::InputText { text: output_hint },
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
@@ -3,6 +3,9 @@ use codex_api::ImageEditRequest;
|
||||
use codex_api::ImageGenerationRequest;
|
||||
use codex_api::ImageQuality;
|
||||
use codex_api::ImageUrl;
|
||||
use codex_core::context::image_generation_output_hint;
|
||||
use codex_core::image_generation_artifact_path;
|
||||
use codex_extension_api::ExtensionTurnItem;
|
||||
use codex_extension_api::FunctionCallError;
|
||||
use codex_extension_api::ToolCall;
|
||||
use codex_extension_api::ToolExecutor;
|
||||
@@ -11,6 +14,7 @@ use codex_extension_api::ToolOutput;
|
||||
use codex_extension_api::ToolPayload;
|
||||
use codex_extension_api::ToolSpec;
|
||||
use codex_extension_api::parse_tool_input_schema;
|
||||
use codex_protocol::items::ImageGenerationItem;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::DEFAULT_IMAGE_DETAIL;
|
||||
use codex_protocol::models::FunctionCallOutputBody;
|
||||
@@ -23,6 +27,7 @@ use codex_tools::ResponsesApiNamespaceTool;
|
||||
use codex_tools::ResponsesApiTool;
|
||||
use codex_tools::ToolExposure;
|
||||
use codex_tools::default_namespace_description;
|
||||
use codex_utils_absolute_path::AbsolutePathBuf;
|
||||
use schemars::JsonSchema;
|
||||
use schemars::r#gen::SchemaSettings;
|
||||
use serde::Deserialize;
|
||||
@@ -40,12 +45,16 @@ const IMAGEGEN_DESCRIPTION: &str = include_str!("../imagegen_description.md");
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct ImageGenerationTool {
|
||||
backend: CodexImagesBackend,
|
||||
codex_home: AbsolutePathBuf,
|
||||
}
|
||||
|
||||
impl ImageGenerationTool {
|
||||
/// Creates an image-generation tool backed by an image API executor.
|
||||
pub(crate) fn new(backend: CodexImagesBackend) -> Self {
|
||||
Self { backend }
|
||||
pub(crate) fn new(backend: CodexImagesBackend, codex_home: AbsolutePathBuf) -> Self {
|
||||
Self {
|
||||
backend,
|
||||
codex_home,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -84,6 +93,15 @@ impl ToolExecutor<ToolCall> for ImageGenerationTool {
|
||||
async fn handle(&self, call: ToolCall) -> Result<Box<dyn ToolOutput>, FunctionCallError> {
|
||||
let args = parse_args(&call)?;
|
||||
let request = request_for_action(&args, call.conversation_history.items())?;
|
||||
call.turn_item_emitter
|
||||
.emit_started(ExtensionTurnItem::ImageGeneration(ImageGenerationItem {
|
||||
id: call.call_id.clone(),
|
||||
status: "in_progress".to_string(),
|
||||
revised_prompt: None,
|
||||
result: String::new(),
|
||||
saved_path: None,
|
||||
}))
|
||||
.await;
|
||||
let response = match request {
|
||||
ImageRequest::Generate(request) => self.backend.generate(request).await,
|
||||
ImageRequest::Edit(request) => self.backend.edit(request).await,
|
||||
@@ -96,10 +114,21 @@ impl ToolExecutor<ToolCall> for ImageGenerationTool {
|
||||
"image generation returned no image data".to_string(),
|
||||
));
|
||||
};
|
||||
let output_hint = call
|
||||
.turn_item_emitter
|
||||
.image_generation_completed(call.call_id.clone(), args.prompt, result.clone())
|
||||
call.turn_item_emitter
|
||||
.emit_completed(ExtensionTurnItem::ImageGeneration(ImageGenerationItem {
|
||||
id: call.call_id.clone(),
|
||||
status: "completed".to_string(),
|
||||
revised_prompt: Some(args.prompt),
|
||||
result: result.clone(),
|
||||
saved_path: None,
|
||||
}))
|
||||
.await;
|
||||
let output_path =
|
||||
image_generation_artifact_path(&self.codex_home, &call.thread_id, &call.call_id);
|
||||
let output_dir = output_path
|
||||
.parent()
|
||||
.unwrap_or_else(|| self.codex_home.clone());
|
||||
let output_hint = image_generation_output_hint(output_dir.display(), output_path.display());
|
||||
Ok(Box::new(GeneratedImageOutput {
|
||||
result,
|
||||
output_hint,
|
||||
@@ -284,7 +313,7 @@ fn imagegen_tool_spec() -> ToolSpec {
|
||||
|
||||
struct GeneratedImageOutput {
|
||||
result: String,
|
||||
output_hint: Option<String>,
|
||||
output_hint: String,
|
||||
}
|
||||
|
||||
impl ToolOutput for GeneratedImageOutput {
|
||||
@@ -304,11 +333,9 @@ impl ToolOutput for GeneratedImageOutput {
|
||||
image_url: format!("data:image/png;base64,{}", self.result),
|
||||
detail: Some(DEFAULT_IMAGE_DETAIL),
|
||||
}];
|
||||
if let Some(output_hint) = &self.output_hint {
|
||||
content.push(FunctionCallOutputContentItem::InputText {
|
||||
text: output_hint.clone(),
|
||||
});
|
||||
}
|
||||
content.push(FunctionCallOutputContentItem::InputText {
|
||||
text: self.output_hint.clone(),
|
||||
});
|
||||
ResponseInputItem::FunctionCallOutput {
|
||||
call_id: call_id.to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
|
||||
@@ -208,6 +208,7 @@ async fn add_ad_hoc_note_tool_creates_note_file() {
|
||||
|
||||
let output = tool
|
||||
.handle(ToolCall {
|
||||
thread_id: "thread-1".to_string(),
|
||||
turn_id: "turn-1".to_string(),
|
||||
call_id: "call-1".to_string(),
|
||||
tool_name: memory_tool_name(crate::ADD_AD_HOC_NOTE_TOOL_NAME),
|
||||
@@ -250,6 +251,7 @@ async fn add_ad_hoc_note_tool_rejects_paths_as_filenames() {
|
||||
|
||||
let result = tool
|
||||
.handle(ToolCall {
|
||||
thread_id: "thread-1".to_string(),
|
||||
turn_id: "turn-1".to_string(),
|
||||
call_id: "call-1".to_string(),
|
||||
tool_name: memory_tool_name(crate::ADD_AD_HOC_NOTE_TOOL_NAME),
|
||||
@@ -293,6 +295,7 @@ async fn read_tool_reads_memory_file() {
|
||||
|
||||
let output = tool
|
||||
.handle(ToolCall {
|
||||
thread_id: "thread-1".to_string(),
|
||||
turn_id: "turn-1".to_string(),
|
||||
call_id: "call-1".to_string(),
|
||||
tool_name: memory_tool_name(crate::READ_TOOL_NAME),
|
||||
@@ -339,6 +342,7 @@ async fn search_tool_accepts_multiple_queries() {
|
||||
|
||||
let output = tool
|
||||
.handle(ToolCall {
|
||||
thread_id: "thread-1".to_string(),
|
||||
turn_id: "turn-1".to_string(),
|
||||
call_id: "call-1".to_string(),
|
||||
tool_name: memory_tool_name(crate::SEARCH_TOOL_NAME),
|
||||
@@ -411,6 +415,7 @@ async fn search_tool_accepts_windowed_all_match_mode() {
|
||||
|
||||
let output = tool
|
||||
.handle(ToolCall {
|
||||
thread_id: "thread-1".to_string(),
|
||||
turn_id: "turn-1".to_string(),
|
||||
call_id: "call-1".to_string(),
|
||||
tool_name: memory_tool_name(crate::SEARCH_TOOL_NAME),
|
||||
@@ -463,6 +468,7 @@ async fn search_tool_rejects_legacy_single_query() {
|
||||
|
||||
let result = tool
|
||||
.handle(ToolCall {
|
||||
thread_id: "thread-1".to_string(),
|
||||
turn_id: "turn-1".to_string(),
|
||||
call_id: "call-1".to_string(),
|
||||
tool_name: memory_tool_name(crate::SEARCH_TOOL_NAME),
|
||||
|
||||
@@ -63,7 +63,6 @@ pub use responses_api::mcp_tool_to_responses_api_tool;
|
||||
pub use responses_api::tool_definition_to_responses_api_tool;
|
||||
pub use tool_call::ConversationHistory;
|
||||
pub use tool_call::ExtensionTurnItem;
|
||||
pub use tool_call::ImageGenerationCompletionFuture;
|
||||
pub use tool_call::NoopTurnItemEmitter;
|
||||
pub use tool_call::ToolCall;
|
||||
pub use tool_call::TurnItemEmissionFuture;
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use crate::FunctionCallError;
|
||||
use crate::ToolName;
|
||||
use crate::ToolPayload;
|
||||
use codex_protocol::items::ImageGenerationItem;
|
||||
use codex_protocol::items::WebSearchItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_utils_output_truncation::TruncationPolicy;
|
||||
@@ -29,20 +30,14 @@ impl ConversationHistory {
|
||||
/// Future returned when an extension tool emits a visible turn-item lifecycle event.
|
||||
pub type TurnItemEmissionFuture<'a> = Pin<Box<dyn Future<Output = ()> + Send + 'a>>;
|
||||
|
||||
/// Future returned when an image-generation extension publishes completed image bytes.
|
||||
pub type ImageGenerationCompletionFuture<'a> =
|
||||
Pin<Box<dyn Future<Output = Option<String>> + Send + 'a>>;
|
||||
|
||||
/// Visible turn items that an extension fully owns and may emit as-is.
|
||||
///
|
||||
/// Add only item kinds that require no additional host finalization before
|
||||
/// persistence or client delivery. Richer items need a host-owned publish path.
|
||||
/// Visible turn items that an extension may publish into the host lifecycle.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum ExtensionTurnItem {
|
||||
WebSearch(WebSearchItem),
|
||||
ImageGeneration(ImageGenerationItem),
|
||||
}
|
||||
|
||||
/// Host-provided capability for extension tools to emit finalized visible turn items.
|
||||
/// Host-provided capability for extension tools to emit visible turn items.
|
||||
///
|
||||
/// Implementations route lifecycle events through the host's normal item event
|
||||
/// pipeline, including any persistence and client delivery owned by the host.
|
||||
@@ -50,21 +45,8 @@ pub trait TurnItemEmitter: Send + Sync {
|
||||
/// Emits the beginning of one visible turn item.
|
||||
fn emit_started<'a>(&'a self, item: ExtensionTurnItem) -> TurnItemEmissionFuture<'a>;
|
||||
|
||||
/// Emits the completion of one visible turn item.
|
||||
/// Emits one visible turn item after host-owned finalization.
|
||||
fn emit_completed<'a>(&'a self, item: ExtensionTurnItem) -> TurnItemEmissionFuture<'a>;
|
||||
|
||||
/// Publishes image bytes for host persistence and visible completion.
|
||||
///
|
||||
/// Returns persisted-artifact context for the extension's model-facing
|
||||
/// function output when the host saves the generated image successfully.
|
||||
fn image_generation_completed<'a>(
|
||||
&'a self,
|
||||
_call_id: String,
|
||||
_prompt: String,
|
||||
_result: String,
|
||||
) -> ImageGenerationCompletionFuture<'a> {
|
||||
Box::pin(std::future::ready(None))
|
||||
}
|
||||
}
|
||||
|
||||
/// Turn-item emitter used when a caller does not expose visible item emission.
|
||||
@@ -84,6 +66,7 @@ impl TurnItemEmitter for NoopTurnItemEmitter {
|
||||
// TODO: this is temporary and will disappear in the next PR (as we make codex-extension-api generic on Invocation.
|
||||
#[derive(Clone)]
|
||||
pub struct ToolCall {
|
||||
pub thread_id: String,
|
||||
pub turn_id: String,
|
||||
pub call_id: String,
|
||||
pub tool_name: ToolName,
|
||||
@@ -96,6 +79,7 @@ pub struct ToolCall {
|
||||
impl std::fmt::Debug for ToolCall {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("ToolCall")
|
||||
.field("thread_id", &self.thread_id)
|
||||
.field("turn_id", &self.turn_id)
|
||||
.field("call_id", &self.call_id)
|
||||
.field("tool_name", &self.tool_name)
|
||||
|
||||
Reference in New Issue
Block a user