diff --git a/codex-rs/rmcp-client/src/bin/test_stdio_server.rs b/codex-rs/rmcp-client/src/bin/test_stdio_server.rs index 7805a7de9a..3719766acb 100644 --- a/codex-rs/rmcp-client/src/bin/test_stdio_server.rs +++ b/codex-rs/rmcp-client/src/bin/test_stdio_server.rs @@ -35,12 +35,19 @@ struct TestToolServer { const MEMO_URI: &str = "memo://codex/example-note"; const MEMO_CONTENT: &str = "This is a sample MCP resource served by the rmcp test server."; +const SMALL_PNG_BASE64: &str = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg=="; + pub fn stdio() -> (tokio::io::Stdin, tokio::io::Stdout) { (tokio::io::stdin(), tokio::io::stdout()) } + impl TestToolServer { fn new() -> Self { - let tools = vec![Self::echo_tool(), Self::image_tool()]; + let tools = vec![ + Self::echo_tool(), + Self::image_tool(), + Self::image_scenario_tool(), + ]; let resources = vec![Self::memo_resource()]; let resource_templates = vec![Self::memo_template()]; Self { @@ -86,6 +93,61 @@ impl TestToolServer { ) } + /// Tool intended for manual testing of Codex TUI rendering for MCP image tool results. + /// + /// This exists to exercise edge cases where a `CallToolResult.content` includes image blocks + /// that aren't the first item (or includes invalid image blocks before a valid image). + /// + /// Manual testing approach (Codex TUI): + /// - Build this binary: `cargo build -p codex-rmcp-client --bin test_stdio_server` + /// - Register it: + /// - `codex mcp add mcpimg -- /abs/path/to/test_stdio_server` + /// - Then in Codex TUI, ask it to call: + /// - `mcpimg.image_scenario({"scenario":"image_only"})` + /// - `mcpimg.image_scenario({"scenario":"text_then_image","caption":"Here is the image:"})` + /// - `mcpimg.image_scenario({"scenario":"invalid_base64_then_image"})` + /// - `mcpimg.image_scenario({"scenario":"invalid_image_bytes_then_image"})` + /// - `mcpimg.image_scenario({"scenario":"multiple_valid_images"})` + /// - `mcpimg.image_scenario({"scenario":"image_then_text","caption":"Here is the image:"})` + /// - `mcpimg.image_scenario({"scenario":"text_only","caption":"Here is the image:"})` + /// - You should see an extra history cell: `tool result (image output)`. + fn image_scenario_tool() -> Tool { + #[expect(clippy::expect_used)] + let schema: JsonObject = serde_json::from_value(serde_json::json!({ + "type": "object", + "properties": { + "scenario": { + "type": "string", + "enum": [ + "image_only", + "text_then_image", + "invalid_base64_then_image", + "invalid_image_bytes_then_image", + "multiple_valid_images", + "image_then_text", + "text_only" + ] + }, + "caption": { "type": "string" }, + "data_url": { + "type": "string", + "description": "Optional data URL like ...; if omitted, uses a built-in tiny PNG." + } + }, + "required": ["scenario"], + "additionalProperties": false + })) + .expect("image_scenario tool schema should deserialize"); + + Tool::new( + Cow::Borrowed("image_scenario"), + Cow::Borrowed( + "Return content blocks for manual testing of MCP image rendering scenarios.", + ), + Arc::new(schema), + ) + } + fn memo_resource() -> Resource { let raw = RawResource { uri: MEMO_URI.to_string(), @@ -125,6 +187,32 @@ struct EchoArgs { env_var: Option, } +#[derive(Deserialize, Debug)] +#[serde(rename_all = "snake_case")] +/// Scenarios for `image_scenario`, intended to exercise Codex TUI handling of MCP image outputs. +/// +/// The key behavior under test is that the TUI should render an image output cell if *any* +/// decodable image block exists in the tool result content, even if the first block is text or an +/// invalid image. +enum ImageScenario { + ImageOnly, + TextThenImage, + InvalidBase64ThenImage, + InvalidImageBytesThenImage, + MultipleValidImages, + ImageThenText, + TextOnly, +} + +#[derive(Deserialize, Debug)] +struct ImageScenarioArgs { + scenario: ImageScenario, + #[serde(default)] + caption: Option, + #[serde(default)] + data_url: Option, +} + impl ServerHandler for TestToolServer { fn get_info(&self) -> ServerInfo { ServerInfo { @@ -244,14 +332,6 @@ impl ServerHandler for TestToolServer { ) })?; - fn parse_data_url(url: &str) -> Option<(String, String)> { - let rest = url.strip_prefix("data:")?; - let (mime_and_opts, data) = rest.split_once(',')?; - let (mime, _opts) = - mime_and_opts.split_once(';').unwrap_or((mime_and_opts, "")); - Some((mime.to_string(), data.to_string())) - } - let (mime_type, data_b64) = parse_data_url(&data_url).ok_or_else(|| { McpError::invalid_params( format!("invalid data URL for image tool: {data_url}"), @@ -263,6 +343,10 @@ impl ServerHandler for TestToolServer { data_b64, mime_type, )])) } + "image_scenario" => { + let args = Self::parse_call_args::(&request, "image_scenario")?; + Self::image_scenario_result(args) + } other => Err(McpError::invalid_params( format!("unknown tool: {other}"), None, @@ -271,6 +355,89 @@ impl ServerHandler for TestToolServer { } } +impl TestToolServer { + fn parse_call_args Deserialize<'de>>( + request: &CallToolRequestParam, + tool_name: &'static str, + ) -> Result { + match request.arguments.as_ref() { + Some(arguments) => serde_json::from_value(serde_json::Value::Object( + arguments.clone().into_iter().collect(), + )) + .map_err(|err| McpError::invalid_params(err.to_string(), None)), + None => Err(McpError::invalid_params( + format!("missing arguments for {tool_name} tool"), + None, + )), + } + } + + fn image_scenario_result(args: ImageScenarioArgs) -> Result { + let (mime_type, valid_data_b64) = if let Some(data_url) = &args.data_url { + parse_data_url(data_url).ok_or_else(|| { + McpError::invalid_params( + format!("invalid data_url for image_scenario tool: {data_url}"), + None, + ) + })? + } else { + ("image/png".to_string(), SMALL_PNG_BASE64.to_string()) + }; + + let caption = args + .caption + .unwrap_or_else(|| "Here is the image:".to_string()); + + let mut content = Vec::new(); + match args.scenario { + ImageScenario::ImageOnly => { + content.push(rmcp::model::Content::image(valid_data_b64, mime_type)); + } + ImageScenario::TextThenImage => { + content.push(rmcp::model::Content::text(caption)); + content.push(rmcp::model::Content::image(valid_data_b64, mime_type)); + } + ImageScenario::InvalidBase64ThenImage => { + content.push(rmcp::model::Content::image( + "not-base64".to_string(), + "image/png".to_string(), + )); + content.push(rmcp::model::Content::image(valid_data_b64, mime_type)); + } + ImageScenario::InvalidImageBytesThenImage => { + content.push(rmcp::model::Content::image( + "bm90IGFuIGltYWdl".to_string(), + "image/png".to_string(), + )); + content.push(rmcp::model::Content::image(valid_data_b64, mime_type)); + } + ImageScenario::MultipleValidImages => { + content.push(rmcp::model::Content::image( + valid_data_b64.clone(), + mime_type.clone(), + )); + content.push(rmcp::model::Content::image(valid_data_b64, mime_type)); + } + ImageScenario::ImageThenText => { + content.push(rmcp::model::Content::image(valid_data_b64, mime_type)); + content.push(rmcp::model::Content::text(caption)); + } + ImageScenario::TextOnly => { + content.push(rmcp::model::Content::text(caption)); + } + } + + Ok(CallToolResult::success(content)) + } +} + +fn parse_data_url(url: &str) -> Option<(String, String)> { + let rest = url.strip_prefix("data:")?; + let (mime_and_opts, data) = rest.split_once(',')?; + let (mime, _opts) = mime_and_opts.split_once(';').unwrap_or((mime_and_opts, "")); + Some((mime.to_string(), data.to_string())) +} + #[tokio::main] async fn main() -> Result<(), Box> { eprintln!("starting rmcp test server"); diff --git a/codex-rs/tui/src/history_cell.rs b/codex-rs/tui/src/history_cell.rs index 02a148df4c..687d18fa6a 100644 --- a/codex-rs/tui/src/history_cell.rs +++ b/codex-rs/tui/src/history_cell.rs @@ -1429,44 +1429,62 @@ pub(crate) fn new_web_search_call( cell } -/// If the first content is an image, return a new cell with the image. -/// TODO(rgwood-dd): Handle images properly even if they're not the first result. +/// Returns an additional history cell if an MCP tool result includes a decodable image. +/// +/// This intentionally returns at most one cell: the first image in `CallToolResult.content` that +/// successfully base64-decodes and parses as an image. This is used as a lightweight “image output +/// exists” affordance separate from the main MCP tool call cell. +/// +/// Manual testing tip: +/// - Run the rmcp stdio test server (`codex-rs/rmcp-client/src/bin/test_stdio_server.rs`) and +/// register it as an MCP server via `codex mcp add`. +/// - Use its `image_scenario` tool with cases like `text_then_image`, +/// `invalid_base64_then_image`, or `invalid_image_bytes_then_image` to ensure this path triggers +/// even when the first block is not a valid image. fn try_new_completed_mcp_tool_call_with_image_output( result: &Result, ) -> Option { - match result { - Ok(mcp_types::CallToolResult { content, .. }) => { - if let Some(mcp_types::ContentBlock::ImageContent(image)) = content.first() { - let raw_data = match base64::engine::general_purpose::STANDARD.decode(&image.data) { - Ok(data) => data, - Err(e) => { - error!("Failed to decode image data: {e}"); - return None; - } - }; - let reader = match ImageReader::new(Cursor::new(raw_data)).with_guessed_format() { - Ok(reader) => reader, - Err(e) => { - error!("Failed to guess image format: {e}"); - return None; - } - }; + let image = result + .as_ref() + .ok()? + .content + .iter() + .find_map(decode_mcp_image)?; - let image = match reader.decode() { - Ok(image) => image, - Err(e) => { - error!("Image decoding failed: {e}"); - return None; - } - }; + Some(CompletedMcpToolCallWithImageOutput { _image: image }) +} - Some(CompletedMcpToolCallWithImageOutput { _image: image }) - } else { - None - } - } - _ => None, - } +/// Decodes an MCP `ImageContent` block into an in-memory image. +/// +/// Returns `None` when the block is not an image, when base64 decoding fails, when the format +/// cannot be inferred, or when the image decoder rejects the bytes. +fn decode_mcp_image(block: &mcp_types::ContentBlock) -> Option { + let image = match block { + mcp_types::ContentBlock::ImageContent(image) => image, + _ => return None, + }; + let raw_data = base64::engine::general_purpose::STANDARD + .decode(&image.data) + .map_err(|e| { + error!("Failed to decode image data: {e}"); + e + }) + .ok()?; + let reader = ImageReader::new(Cursor::new(raw_data)) + .with_guessed_format() + .map_err(|e| { + error!("Failed to guess image format: {e}"); + e + }) + .ok()?; + + reader + .decode() + .map_err(|e| { + error!("Image decoding failed: {e}"); + e + }) + .ok() } #[allow(clippy::disallowed_methods)] @@ -1929,9 +1947,12 @@ mod tests { use codex_core::protocol::ExecCommandSource; use mcp_types::CallToolResult; use mcp_types::ContentBlock; + use mcp_types::ImageContent; use mcp_types::TextContent; use mcp_types::Tool; use mcp_types::ToolInputSchema; + + const SMALL_PNG_BASE64: &str = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg=="; async fn test_config() -> Config { let codex_home = std::env::temp_dir(); ConfigBuilder::default() @@ -1957,6 +1978,15 @@ mod tests { render_lines(&cell.transcript_lines(u16::MAX)) } + fn image_block(data: &str) -> ContentBlock { + ContentBlock::ImageContent(ImageContent { + annotations: None, + data: data.to_string(), + mime_type: "image/png".into(), + r#type: "image".into(), + }) + } + #[test] fn unified_exec_interaction_cell_renders_input() { let cell = @@ -2251,6 +2281,63 @@ mod tests { insta::assert_snapshot!(rendered); } + #[test] + fn completed_mcp_tool_call_image_after_text_returns_extra_cell() { + let invocation = McpInvocation { + server: "image".into(), + tool: "generate".into(), + arguments: Some(json!({ + "prompt": "tiny image", + })), + }; + + let result = CallToolResult { + content: vec![ + ContentBlock::TextContent(TextContent { + annotations: None, + text: "Here is the image:".into(), + r#type: "text".into(), + }), + image_block(SMALL_PNG_BASE64), + ], + is_error: None, + structured_content: None, + }; + + let mut cell = new_active_mcp_tool_call("call-image".into(), invocation, true); + let extra_cell = cell + .complete(Duration::from_millis(25), Ok(result)) + .expect("expected image cell"); + + let rendered = render_lines(&extra_cell.display_lines(80)); + assert_eq!(rendered, vec!["tool result (image output)"]); + } + + #[test] + fn completed_mcp_tool_call_skips_invalid_image_blocks() { + let invocation = McpInvocation { + server: "image".into(), + tool: "generate".into(), + arguments: Some(json!({ + "prompt": "tiny image", + })), + }; + + let result = CallToolResult { + content: vec![image_block("not-base64"), image_block(SMALL_PNG_BASE64)], + is_error: None, + structured_content: None, + }; + + let mut cell = new_active_mcp_tool_call("call-image-2".into(), invocation, true); + let extra_cell = cell + .complete(Duration::from_millis(25), Ok(result)) + .expect("expected image cell"); + + let rendered = render_lines(&extra_cell.display_lines(80)); + assert_eq!(rendered, vec!["tool result (image output)"]); + } + #[test] fn completed_mcp_tool_call_error_snapshot() { let invocation = McpInvocation {