From 8543e39885956b5d38b416acd1868b5f643cc0ed Mon Sep 17 00:00:00 2001 From: Curtis 'Fjord' Hawthorne Date: Fri, 15 May 2026 15:04:04 -0700 Subject: [PATCH] Preserve image detail in app-server inputs (#20693) ## Summary - Add optional image detail to user image inputs across core, app-server v2, thread history/event mapping, and the generated app-server schemas/types. - Preserve requested detail when serializing Responses image inputs: omitted detail stays on the existing `high` default, while explicit `original` keeps local images on the original-resolution path. - Support `high`/`original` consistently for tool image outputs, including MCP `codex/imageDetail`, code-mode image helpers, and `view_image`. --- .../analytics/src/analytics_client_tests.rs | 2 + .../schema/json/ClientRequest.json | 24 ++- .../schema/json/ServerNotification.json | 29 +++ .../codex_app_server_protocol.schemas.json | 24 ++- .../codex_app_server_protocol.v2.schemas.json | 24 ++- .../json/v2/ItemCompletedNotification.json | 29 +++ .../json/v2/ItemStartedNotification.json | 29 +++ .../RawResponseItemCompletedNotification.json | 2 - .../schema/json/v2/ReviewStartResponse.json | 29 +++ .../schema/json/v2/ThreadForkResponse.json | 29 +++ .../schema/json/v2/ThreadListResponse.json | 29 +++ .../json/v2/ThreadMetadataUpdateResponse.json | 29 +++ .../schema/json/v2/ThreadReadResponse.json | 29 +++ .../schema/json/v2/ThreadResumeParams.json | 2 - .../schema/json/v2/ThreadResumeResponse.json | 29 +++ .../json/v2/ThreadRollbackResponse.json | 29 +++ .../schema/json/v2/ThreadStartResponse.json | 29 +++ .../json/v2/ThreadStartedNotification.json | 29 +++ .../json/v2/ThreadUnarchiveResponse.json | 29 +++ .../json/v2/TurnCompletedNotification.json | 29 +++ .../schema/json/v2/TurnStartParams.json | 29 +++ .../schema/json/v2/TurnStartResponse.json | 29 +++ .../json/v2/TurnStartedNotification.json | 29 +++ .../schema/json/v2/TurnSteerParams.json | 29 +++ .../schema/typescript/ImageDetail.ts | 2 +- .../schema/typescript/v2/UserInput.ts | 3 +- .../src/protocol/thread_history.rs | 91 ++++++++- .../src/protocol/v2/tests.rs | 32 ++++ .../src/protocol/v2/turn.rs | 21 +- .../app-server/src/bespoke_event_handling.rs | 2 + .../thread_processor_tests.rs | 1 + .../request_processors/token_usage_replay.rs | 2 + .../app-server/tests/suite/v2/thread_read.rs | 1 + .../app-server/tests/suite/v2/turn_start.rs | 180 ++++++++++++------ codex-rs/cli/src/main.rs | 2 +- codex-rs/code-mode/src/description.rs | 2 +- codex-rs/code-mode/src/response.rs | 2 - codex-rs/code-mode/src/runtime/value.rs | 8 +- codex-rs/code-mode/src/service.rs | 36 +++- codex-rs/core/src/agent/control.rs | 4 +- codex-rs/core/src/event_mapping.rs | 3 +- codex-rs/core/src/event_mapping_tests.rs | 20 +- .../core/src/personality_migration_tests.rs | 1 + .../session/rollout_reconstruction_tests.rs | 23 +++ codex-rs/core/src/session/tests.rs | 10 + codex-rs/core/src/thread_manager_tests.rs | 2 + .../src/tools/code_mode/response_adapter.rs | 2 - .../core/src/tools/handlers/view_image.rs | 84 +++++++- .../src/tools/handlers/view_image_spec.rs | 14 +- codex-rs/core/src/tools/spec_plan_tests.rs | 17 +- codex-rs/core/tests/suite/code_mode.rs | 2 +- codex-rs/core/tests/suite/compact.rs | 1 + codex-rs/core/tests/suite/compact_remote.rs | 5 +- .../core/tests/suite/compact_remote_parity.rs | 1 + codex-rs/core/tests/suite/image_rollout.rs | 2 + codex-rs/core/tests/suite/model_switching.rs | 1 + .../core/tests/suite/personality_migration.rs | 1 + codex-rs/core/tests/suite/resume_warning.rs | 1 + codex-rs/core/tests/suite/sqlite_state.rs | 1 + codex-rs/core/tests/suite/view_image.rs | 4 +- codex-rs/exec/src/lib.rs | 4 +- .../external-agent-sessions/src/export.rs | 1 + .../tests/suite/otel_export_routing_policy.rs | 2 + codex-rs/protocol/src/items.rs | 40 +++- codex-rs/protocol/src/models.rs | 100 ++++++++-- codex-rs/protocol/src/protocol.rs | 68 ++++++- codex-rs/protocol/src/user_input.rs | 16 +- codex-rs/rollout/src/recorder_tests.rs | 2 + codex-rs/rollout/src/tests.rs | 1 + codex-rs/state/src/extract.rs | 4 + codex-rs/thread-store/src/local/mod.rs | 1 + .../thread-store/src/thread_metadata_sync.rs | 1 + codex-rs/tools/src/image_detail.rs | 2 +- codex-rs/tools/src/image_detail_tests.rs | 8 +- codex-rs/tui/src/app/tests.rs | 2 +- .../tui/src/chatwidget/input_submission.rs | 2 + .../chatwidget/tests/composer_submission.rs | 9 +- .../src/chatwidget/tests/history_replay.rs | 4 + .../tui/src/chatwidget/tests/review_mode.rs | 1 + codex-rs/tui/src/chatwidget/user_messages.rs | 4 +- codex-rs/tui/src/ide_context/prompt.rs | 2 + 81 files changed, 1302 insertions(+), 156 deletions(-) diff --git a/codex-rs/analytics/src/analytics_client_tests.rs b/codex-rs/analytics/src/analytics_client_tests.rs index ed626699d3..f3c56bfb3d 100644 --- a/codex-rs/analytics/src/analytics_client_tests.rs +++ b/codex-rs/analytics/src/analytics_client_tests.rs @@ -279,6 +279,7 @@ fn sample_turn_start_request(thread_id: &str, request_id: i64) -> ClientRequest }, UserInput::Image { url: "https://example.com/a.png".to_string(), + detail: None, }, ], ..Default::default() @@ -399,6 +400,7 @@ fn sample_turn_steer_request( }, UserInput::LocalImage { path: "/tmp/a.png".into(), + detail: None, }, ], responsesapi_client_metadata: None, diff --git a/codex-rs/app-server-protocol/schema/json/ClientRequest.json b/codex-rs/app-server-protocol/schema/json/ClientRequest.json index fffc610128..5fc44d4730 100644 --- a/codex-rs/app-server-protocol/schema/json/ClientRequest.json +++ b/codex-rs/app-server-protocol/schema/json/ClientRequest.json @@ -1235,8 +1235,6 @@ }, "ImageDetail": { "enum": [ - "auto", - "low", "high", "original" ], @@ -4250,6 +4248,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "type": { "enum": [ "image" @@ -4270,6 +4279,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "path": { "type": "string" }, diff --git a/codex-rs/app-server-protocol/schema/json/ServerNotification.json b/codex-rs/app-server-protocol/schema/json/ServerNotification.json index a5fce14a4f..6e8b5d8a04 100644 --- a/codex-rs/app-server-protocol/schema/json/ServerNotification.json +++ b/codex-rs/app-server-protocol/schema/json/ServerNotification.json @@ -1932,6 +1932,13 @@ ], "type": "object" }, + "ImageDetail": { + "enum": [ + "high", + "original" + ], + "type": "string" + }, "ItemCompletedNotification": { "properties": { "completedAtMs": { @@ -4593,6 +4600,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "type": { "enum": [ "image" @@ -4613,6 +4631,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "path": { "type": "string" }, diff --git a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json index d1b318889b..5813afcdb2 100644 --- a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json +++ b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json @@ -9920,8 +9920,6 @@ }, "ImageDetail": { "enum": [ - "auto", - "low", "high", "original" ], @@ -18134,6 +18132,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/v2/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "type": { "enum": [ "image" @@ -18154,6 +18163,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/v2/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "path": { "type": "string" }, diff --git a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.v2.schemas.json b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.v2.schemas.json index bb926075ee..d14acc7a1b 100644 --- a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.v2.schemas.json +++ b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.v2.schemas.json @@ -6420,8 +6420,6 @@ }, "ImageDetail": { "enum": [ - "auto", - "low", "high", "original" ], @@ -15958,6 +15956,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "type": { "enum": [ "image" @@ -15978,6 +15987,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "path": { "type": "string" }, diff --git a/codex-rs/app-server-protocol/schema/json/v2/ItemCompletedNotification.json b/codex-rs/app-server-protocol/schema/json/v2/ItemCompletedNotification.json index 6909415c2a..7aed9d9722 100644 --- a/codex-rs/app-server-protocol/schema/json/v2/ItemCompletedNotification.json +++ b/codex-rs/app-server-protocol/schema/json/v2/ItemCompletedNotification.json @@ -285,6 +285,13 @@ ], "type": "object" }, + "ImageDetail": { + "enum": [ + "high", + "original" + ], + "type": "string" + }, "McpToolCallError": { "properties": { "message": { @@ -1179,6 +1186,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "type": { "enum": [ "image" @@ -1199,6 +1217,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "path": { "type": "string" }, diff --git a/codex-rs/app-server-protocol/schema/json/v2/ItemStartedNotification.json b/codex-rs/app-server-protocol/schema/json/v2/ItemStartedNotification.json index 758ceba32d..80a3cd2425 100644 --- a/codex-rs/app-server-protocol/schema/json/v2/ItemStartedNotification.json +++ b/codex-rs/app-server-protocol/schema/json/v2/ItemStartedNotification.json @@ -285,6 +285,13 @@ ], "type": "object" }, + "ImageDetail": { + "enum": [ + "high", + "original" + ], + "type": "string" + }, "McpToolCallError": { "properties": { "message": { @@ -1179,6 +1186,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "type": { "enum": [ "image" @@ -1199,6 +1217,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "path": { "type": "string" }, diff --git a/codex-rs/app-server-protocol/schema/json/v2/RawResponseItemCompletedNotification.json b/codex-rs/app-server-protocol/schema/json/v2/RawResponseItemCompletedNotification.json index bd88872fb6..74420ea57e 100644 --- a/codex-rs/app-server-protocol/schema/json/v2/RawResponseItemCompletedNotification.json +++ b/codex-rs/app-server-protocol/schema/json/v2/RawResponseItemCompletedNotification.json @@ -145,8 +145,6 @@ }, "ImageDetail": { "enum": [ - "auto", - "low", "high", "original" ], diff --git a/codex-rs/app-server-protocol/schema/json/v2/ReviewStartResponse.json b/codex-rs/app-server-protocol/schema/json/v2/ReviewStartResponse.json index 9afd1ae514..882fa1f598 100644 --- a/codex-rs/app-server-protocol/schema/json/v2/ReviewStartResponse.json +++ b/codex-rs/app-server-protocol/schema/json/v2/ReviewStartResponse.json @@ -422,6 +422,13 @@ ], "type": "object" }, + "ImageDetail": { + "enum": [ + "high", + "original" + ], + "type": "string" + }, "McpToolCallError": { "properties": { "message": { @@ -1452,6 +1459,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "type": { "enum": [ "image" @@ -1472,6 +1490,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "path": { "type": "string" }, diff --git a/codex-rs/app-server-protocol/schema/json/v2/ThreadForkResponse.json b/codex-rs/app-server-protocol/schema/json/v2/ThreadForkResponse.json index 1608b2f48c..3d66152a60 100644 --- a/codex-rs/app-server-protocol/schema/json/v2/ThreadForkResponse.json +++ b/codex-rs/app-server-protocol/schema/json/v2/ThreadForkResponse.json @@ -527,6 +527,13 @@ ], "type": "object" }, + "ImageDetail": { + "enum": [ + "high", + "original" + ], + "type": "string" + }, "McpToolCallError": { "properties": { "message": { @@ -2012,6 +2019,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "type": { "enum": [ "image" @@ -2032,6 +2050,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "path": { "type": "string" }, diff --git a/codex-rs/app-server-protocol/schema/json/v2/ThreadListResponse.json b/codex-rs/app-server-protocol/schema/json/v2/ThreadListResponse.json index f78fbaf27e..0693b19028 100644 --- a/codex-rs/app-server-protocol/schema/json/v2/ThreadListResponse.json +++ b/codex-rs/app-server-protocol/schema/json/v2/ThreadListResponse.json @@ -448,6 +448,13 @@ ], "type": "object" }, + "ImageDetail": { + "enum": [ + "high", + "original" + ], + "type": "string" + }, "McpToolCallError": { "properties": { "message": { @@ -1827,6 +1834,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "type": { "enum": [ "image" @@ -1847,6 +1865,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "path": { "type": "string" }, diff --git a/codex-rs/app-server-protocol/schema/json/v2/ThreadMetadataUpdateResponse.json b/codex-rs/app-server-protocol/schema/json/v2/ThreadMetadataUpdateResponse.json index 4268ad203a..d2e4ca6728 100644 --- a/codex-rs/app-server-protocol/schema/json/v2/ThreadMetadataUpdateResponse.json +++ b/codex-rs/app-server-protocol/schema/json/v2/ThreadMetadataUpdateResponse.json @@ -448,6 +448,13 @@ ], "type": "object" }, + "ImageDetail": { + "enum": [ + "high", + "original" + ], + "type": "string" + }, "McpToolCallError": { "properties": { "message": { @@ -1827,6 +1834,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "type": { "enum": [ "image" @@ -1847,6 +1865,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "path": { "type": "string" }, diff --git a/codex-rs/app-server-protocol/schema/json/v2/ThreadReadResponse.json b/codex-rs/app-server-protocol/schema/json/v2/ThreadReadResponse.json index fb0d80a047..431ca759de 100644 --- a/codex-rs/app-server-protocol/schema/json/v2/ThreadReadResponse.json +++ b/codex-rs/app-server-protocol/schema/json/v2/ThreadReadResponse.json @@ -448,6 +448,13 @@ ], "type": "object" }, + "ImageDetail": { + "enum": [ + "high", + "original" + ], + "type": "string" + }, "McpToolCallError": { "properties": { "message": { @@ -1827,6 +1834,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "type": { "enum": [ "image" @@ -1847,6 +1865,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "path": { "type": "string" }, diff --git a/codex-rs/app-server-protocol/schema/json/v2/ThreadResumeParams.json b/codex-rs/app-server-protocol/schema/json/v2/ThreadResumeParams.json index e8a437f003..253ebf4572 100644 --- a/codex-rs/app-server-protocol/schema/json/v2/ThreadResumeParams.json +++ b/codex-rs/app-server-protocol/schema/json/v2/ThreadResumeParams.json @@ -204,8 +204,6 @@ }, "ImageDetail": { "enum": [ - "auto", - "low", "high", "original" ], diff --git a/codex-rs/app-server-protocol/schema/json/v2/ThreadResumeResponse.json b/codex-rs/app-server-protocol/schema/json/v2/ThreadResumeResponse.json index 302c2e1069..f379455b59 100644 --- a/codex-rs/app-server-protocol/schema/json/v2/ThreadResumeResponse.json +++ b/codex-rs/app-server-protocol/schema/json/v2/ThreadResumeResponse.json @@ -527,6 +527,13 @@ ], "type": "object" }, + "ImageDetail": { + "enum": [ + "high", + "original" + ], + "type": "string" + }, "McpToolCallError": { "properties": { "message": { @@ -2012,6 +2019,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "type": { "enum": [ "image" @@ -2032,6 +2050,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "path": { "type": "string" }, diff --git a/codex-rs/app-server-protocol/schema/json/v2/ThreadRollbackResponse.json b/codex-rs/app-server-protocol/schema/json/v2/ThreadRollbackResponse.json index 204828c732..8d807d76cb 100644 --- a/codex-rs/app-server-protocol/schema/json/v2/ThreadRollbackResponse.json +++ b/codex-rs/app-server-protocol/schema/json/v2/ThreadRollbackResponse.json @@ -448,6 +448,13 @@ ], "type": "object" }, + "ImageDetail": { + "enum": [ + "high", + "original" + ], + "type": "string" + }, "McpToolCallError": { "properties": { "message": { @@ -1827,6 +1834,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "type": { "enum": [ "image" @@ -1847,6 +1865,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "path": { "type": "string" }, diff --git a/codex-rs/app-server-protocol/schema/json/v2/ThreadStartResponse.json b/codex-rs/app-server-protocol/schema/json/v2/ThreadStartResponse.json index 9dc08614cf..7d31c52f1c 100644 --- a/codex-rs/app-server-protocol/schema/json/v2/ThreadStartResponse.json +++ b/codex-rs/app-server-protocol/schema/json/v2/ThreadStartResponse.json @@ -527,6 +527,13 @@ ], "type": "object" }, + "ImageDetail": { + "enum": [ + "high", + "original" + ], + "type": "string" + }, "McpToolCallError": { "properties": { "message": { @@ -2012,6 +2019,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "type": { "enum": [ "image" @@ -2032,6 +2050,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "path": { "type": "string" }, diff --git a/codex-rs/app-server-protocol/schema/json/v2/ThreadStartedNotification.json b/codex-rs/app-server-protocol/schema/json/v2/ThreadStartedNotification.json index 759b5990be..3ec843e817 100644 --- a/codex-rs/app-server-protocol/schema/json/v2/ThreadStartedNotification.json +++ b/codex-rs/app-server-protocol/schema/json/v2/ThreadStartedNotification.json @@ -448,6 +448,13 @@ ], "type": "object" }, + "ImageDetail": { + "enum": [ + "high", + "original" + ], + "type": "string" + }, "McpToolCallError": { "properties": { "message": { @@ -1827,6 +1834,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "type": { "enum": [ "image" @@ -1847,6 +1865,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "path": { "type": "string" }, diff --git a/codex-rs/app-server-protocol/schema/json/v2/ThreadUnarchiveResponse.json b/codex-rs/app-server-protocol/schema/json/v2/ThreadUnarchiveResponse.json index f64400129a..ea48e34180 100644 --- a/codex-rs/app-server-protocol/schema/json/v2/ThreadUnarchiveResponse.json +++ b/codex-rs/app-server-protocol/schema/json/v2/ThreadUnarchiveResponse.json @@ -448,6 +448,13 @@ ], "type": "object" }, + "ImageDetail": { + "enum": [ + "high", + "original" + ], + "type": "string" + }, "McpToolCallError": { "properties": { "message": { @@ -1827,6 +1834,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "type": { "enum": [ "image" @@ -1847,6 +1865,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "path": { "type": "string" }, diff --git a/codex-rs/app-server-protocol/schema/json/v2/TurnCompletedNotification.json b/codex-rs/app-server-protocol/schema/json/v2/TurnCompletedNotification.json index e5e2558e9c..40b81e5a34 100644 --- a/codex-rs/app-server-protocol/schema/json/v2/TurnCompletedNotification.json +++ b/codex-rs/app-server-protocol/schema/json/v2/TurnCompletedNotification.json @@ -422,6 +422,13 @@ ], "type": "object" }, + "ImageDetail": { + "enum": [ + "high", + "original" + ], + "type": "string" + }, "McpToolCallError": { "properties": { "message": { @@ -1452,6 +1459,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "type": { "enum": [ "image" @@ -1472,6 +1490,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "path": { "type": "string" }, diff --git a/codex-rs/app-server-protocol/schema/json/v2/TurnStartParams.json b/codex-rs/app-server-protocol/schema/json/v2/TurnStartParams.json index 086d3c6ed1..ecea8f1997 100644 --- a/codex-rs/app-server-protocol/schema/json/v2/TurnStartParams.json +++ b/codex-rs/app-server-protocol/schema/json/v2/TurnStartParams.json @@ -99,6 +99,13 @@ ], "type": "object" }, + "ImageDetail": { + "enum": [ + "high", + "original" + ], + "type": "string" + }, "ModeKind": { "description": "Initial collaboration mode to use when the TUI starts.", "enum": [ @@ -351,6 +358,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "type": { "enum": [ "image" @@ -371,6 +389,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "path": { "type": "string" }, diff --git a/codex-rs/app-server-protocol/schema/json/v2/TurnStartResponse.json b/codex-rs/app-server-protocol/schema/json/v2/TurnStartResponse.json index a2eff7fdd8..ffe0a28d15 100644 --- a/codex-rs/app-server-protocol/schema/json/v2/TurnStartResponse.json +++ b/codex-rs/app-server-protocol/schema/json/v2/TurnStartResponse.json @@ -422,6 +422,13 @@ ], "type": "object" }, + "ImageDetail": { + "enum": [ + "high", + "original" + ], + "type": "string" + }, "McpToolCallError": { "properties": { "message": { @@ -1452,6 +1459,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "type": { "enum": [ "image" @@ -1472,6 +1490,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "path": { "type": "string" }, diff --git a/codex-rs/app-server-protocol/schema/json/v2/TurnStartedNotification.json b/codex-rs/app-server-protocol/schema/json/v2/TurnStartedNotification.json index 0952db2aca..cbde541e52 100644 --- a/codex-rs/app-server-protocol/schema/json/v2/TurnStartedNotification.json +++ b/codex-rs/app-server-protocol/schema/json/v2/TurnStartedNotification.json @@ -422,6 +422,13 @@ ], "type": "object" }, + "ImageDetail": { + "enum": [ + "high", + "original" + ], + "type": "string" + }, "McpToolCallError": { "properties": { "message": { @@ -1452,6 +1459,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "type": { "enum": [ "image" @@ -1472,6 +1490,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "path": { "type": "string" }, diff --git a/codex-rs/app-server-protocol/schema/json/v2/TurnSteerParams.json b/codex-rs/app-server-protocol/schema/json/v2/TurnSteerParams.json index a064d9e7e3..1b7cfbf400 100644 --- a/codex-rs/app-server-protocol/schema/json/v2/TurnSteerParams.json +++ b/codex-rs/app-server-protocol/schema/json/v2/TurnSteerParams.json @@ -20,6 +20,13 @@ ], "type": "object" }, + "ImageDetail": { + "enum": [ + "high", + "original" + ], + "type": "string" + }, "TextElement": { "properties": { "byteRange": { @@ -75,6 +82,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "type": { "enum": [ "image" @@ -95,6 +113,17 @@ }, { "properties": { + "detail": { + "anyOf": [ + { + "$ref": "#/definitions/ImageDetail" + }, + { + "type": "null" + } + ], + "default": null + }, "path": { "type": "string" }, diff --git a/codex-rs/app-server-protocol/schema/typescript/ImageDetail.ts b/codex-rs/app-server-protocol/schema/typescript/ImageDetail.ts index a48f07c088..5a62cc32f1 100644 --- a/codex-rs/app-server-protocol/schema/typescript/ImageDetail.ts +++ b/codex-rs/app-server-protocol/schema/typescript/ImageDetail.ts @@ -2,4 +2,4 @@ // This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. -export type ImageDetail = "auto" | "low" | "high" | "original"; +export type ImageDetail = "high" | "original"; diff --git a/codex-rs/app-server-protocol/schema/typescript/v2/UserInput.ts b/codex-rs/app-server-protocol/schema/typescript/v2/UserInput.ts index 38abc2338b..2ac37c5228 100644 --- a/codex-rs/app-server-protocol/schema/typescript/v2/UserInput.ts +++ b/codex-rs/app-server-protocol/schema/typescript/v2/UserInput.ts @@ -1,10 +1,11 @@ // GENERATED CODE! DO NOT MODIFY BY HAND! // This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. +import type { ImageDetail } from "../ImageDetail"; import type { TextElement } from "./TextElement"; export type UserInput = { "type": "text", text: string, /** * UI-defined spans within `text` used to render or persist special elements. */ -text_elements: Array, } | { "type": "image", url: string, } | { "type": "localImage", path: string, } | { "type": "skill", name: string, path: string, } | { "type": "mention", name: string, path: string, }; +text_elements: Array, } | { "type": "image", detail?: ImageDetail, url: string, } | { "type": "localImage", detail?: ImageDetail, path: string, } | { "type": "skill", name: string, path: string, } | { "type": "mention", name: string, path: string, }; diff --git a/codex-rs/app-server-protocol/src/protocol/thread_history.rs b/codex-rs/app-server-protocol/src/protocol/thread_history.rs index 1121d3a35b..3918e4a9bd 100644 --- a/codex-rs/app-server-protocol/src/protocol/thread_history.rs +++ b/codex-rs/app-server-protocol/src/protocol/thread_history.rs @@ -1077,12 +1077,18 @@ impl ThreadHistoryBuilder { }); } if let Some(images) = &payload.images { - for image in images { - content.push(UserInput::Image { url: image.clone() }); + for (idx, image) in images.iter().enumerate() { + content.push(UserInput::Image { + url: image.clone(), + detail: payload.image_details.get(idx).copied().flatten(), + }); } } - for path in &payload.local_images { - content.push(UserInput::LocalImage { path: path.clone() }); + for (idx, path) in payload.local_images.iter().enumerate() { + content.push(UserInput::LocalImage { + path: path.clone(), + detail: payload.local_image_details.get(idx).copied().flatten(), + }); } content } @@ -1203,6 +1209,7 @@ mod tests { use codex_protocol::items::UserMessageItem as CoreUserMessageItem; use codex_protocol::items::build_hook_prompt_message; use codex_protocol::mcp::CallToolResult; + use codex_protocol::models::ImageDetail; use codex_protocol::models::MessagePhase as CoreMessagePhase; use codex_protocol::models::WebSearchAction as CoreWebSearchAction; use codex_protocol::parse_command::ParsedCommand; @@ -1241,6 +1248,7 @@ mod tests { images: Some(vec!["https://example.com/one.png".into()]), text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() }), EventMsg::AgentMessage(AgentMessageEvent { message: "Hi there".into(), @@ -1258,6 +1266,7 @@ mod tests { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() }), EventMsg::AgentMessage(AgentMessageEvent { message: "Reply two".into(), @@ -1288,6 +1297,7 @@ mod tests { }, UserInput::Image { url: "https://example.com/one.png".into(), + detail: None, } ], } @@ -1335,6 +1345,45 @@ mod tests { ); } + #[test] + fn rebuilds_user_message_image_details_from_legacy_events() { + let local_path = PathBuf::from("/tmp/local.png"); + let events = vec![RolloutItem::EventMsg(EventMsg::UserMessage( + UserMessageEvent { + message: "inspect these".into(), + images: Some(vec!["https://example.com/image.png".into()]), + image_details: vec![Some(ImageDetail::Original)], + local_images: vec![local_path.clone()], + local_image_details: vec![Some(ImageDetail::Original)], + text_elements: Vec::new(), + }, + ))]; + + let turns = build_turns_from_rollout_items(&events); + + assert_eq!(turns.len(), 1); + assert_eq!( + turns[0].items[0], + ThreadItem::UserMessage { + id: "item-1".into(), + content: vec![ + UserInput::Text { + text: "inspect these".into(), + text_elements: Vec::new(), + }, + UserInput::Image { + url: "https://example.com/image.png".into(), + detail: Some(ImageDetail::Original), + }, + UserInput::LocalImage { + path: local_path, + detail: Some(ImageDetail::Original), + }, + ], + } + ); + } + #[test] fn ignores_non_plan_item_lifecycle_events() { let turn_id = "turn-1"; @@ -1351,6 +1400,7 @@ mod tests { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() }), EventMsg::ItemStarted(ItemStartedEvent { thread_id, @@ -1428,6 +1478,7 @@ mod tests { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() })), RolloutItem::EventMsg(EventMsg::ImageGenerationEnd(ImageGenerationEndEvent { call_id: "ig_123".into(), @@ -1485,6 +1536,7 @@ mod tests { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() }), EventMsg::AgentReasoning(AgentReasoningEvent { text: "first summary".into(), @@ -1537,6 +1589,7 @@ mod tests { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() }), EventMsg::AgentMessage(AgentMessageEvent { message: "Working...".into(), @@ -1554,6 +1607,7 @@ mod tests { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() }), EventMsg::AgentMessage(AgentMessageEvent { message: "Second attempt complete.".into(), @@ -1624,6 +1678,7 @@ mod tests { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() }), EventMsg::AgentMessage(AgentMessageEvent { message: "A1".into(), @@ -1635,6 +1690,7 @@ mod tests { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() }), EventMsg::AgentMessage(AgentMessageEvent { message: "A2".into(), @@ -1647,6 +1703,7 @@ mod tests { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() }), EventMsg::AgentMessage(AgentMessageEvent { message: "A3".into(), @@ -1712,6 +1769,7 @@ mod tests { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() }), EventMsg::AgentMessage(AgentMessageEvent { message: "A1".into(), @@ -1723,6 +1781,7 @@ mod tests { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() }), EventMsg::AgentMessage(AgentMessageEvent { message: "A2".into(), @@ -1754,12 +1813,14 @@ mod tests { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() }), EventMsg::UserMessage(UserMessageEvent { message: "Steer".into(), images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() }), EventMsg::TurnComplete(TurnCompleteEvent { turn_id: "turn-a".into(), @@ -1812,6 +1873,7 @@ mod tests { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() }), EventMsg::WebSearchEnd(WebSearchEndEvent { call_id: "search-1".into(), @@ -1984,6 +2046,7 @@ mod tests { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() }), EventMsg::DynamicToolCallRequest( codex_protocol::dynamic_tools::DynamicToolCallRequest { @@ -2049,6 +2112,7 @@ mod tests { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() }), EventMsg::ExecCommandEnd(ExecCommandEndEvent { call_id: "exec-declined".into(), @@ -2138,6 +2202,7 @@ mod tests { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() }), EventMsg::GuardianAssessment(GuardianAssessmentEvent { id: "review-guardian-exec".into(), @@ -2221,6 +2286,7 @@ mod tests { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() }), EventMsg::GuardianAssessment(GuardianAssessmentEvent { id: "review-guardian-execve".into(), @@ -2284,6 +2350,7 @@ mod tests { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() }), EventMsg::TurnComplete(TurnCompleteEvent { turn_id: "turn-a".into(), @@ -2303,6 +2370,7 @@ mod tests { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() }), EventMsg::ExecCommandEnd(ExecCommandEndEvent { call_id: "exec-late".into(), @@ -2376,6 +2444,7 @@ mod tests { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() }), EventMsg::TurnComplete(TurnCompleteEvent { turn_id: "turn-a".into(), @@ -2395,6 +2464,7 @@ mod tests { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() }), EventMsg::ExecCommandEnd(ExecCommandEndEvent { call_id: "exec-unknown-turn".into(), @@ -2463,6 +2533,7 @@ mod tests { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() }), EventMsg::PatchApplyBegin(PatchApplyBeginEvent { call_id: "patch-call".into(), @@ -2527,6 +2598,7 @@ mod tests { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() }), EventMsg::ApplyPatchApprovalRequest(ApplyPatchApprovalRequestEvent { call_id: "patch-call".into(), @@ -2591,6 +2663,7 @@ mod tests { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() }), EventMsg::TurnComplete(TurnCompleteEvent { turn_id: "turn-a".into(), @@ -2610,6 +2683,7 @@ mod tests { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() }), EventMsg::TurnComplete(TurnCompleteEvent { turn_id: "turn-a".into(), @@ -2657,6 +2731,7 @@ mod tests { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() }), EventMsg::TurnComplete(TurnCompleteEvent { turn_id: "turn-a".into(), @@ -2676,6 +2751,7 @@ mod tests { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() }), EventMsg::TurnAborted(TurnAbortedEvent { turn_id: Some("turn-a".into()), @@ -2748,6 +2824,7 @@ mod tests { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() }), EventMsg::CollabResumeEnd(codex_protocol::protocol::CollabResumeEndEvent { call_id: "resume-1".into(), @@ -2805,6 +2882,7 @@ mod tests { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() }), EventMsg::CollabAgentSpawnEnd(codex_protocol::protocol::CollabAgentSpawnEndEvent { call_id: "spawn-1".into(), @@ -2866,6 +2944,7 @@ mod tests { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() }), EventMsg::CollabAgentInteractionBegin( codex_protocol::protocol::CollabAgentInteractionBeginEvent { @@ -2929,6 +3008,7 @@ mod tests { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() }), EventMsg::AgentMessage(AgentMessageEvent { message: "done".into(), @@ -2965,6 +3045,7 @@ mod tests { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() }), EventMsg::TurnComplete(TurnCompleteEvent { turn_id: "turn-a".into(), @@ -3020,6 +3101,7 @@ mod tests { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() }), EventMsg::Error(ErrorEvent { message: "stream failure".into(), @@ -3077,6 +3159,7 @@ mod tests { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() })), RolloutItem::ResponseItem(hook_prompt), RolloutItem::EventMsg(EventMsg::TurnComplete(TurnCompleteEvent { diff --git a/codex-rs/app-server-protocol/src/protocol/v2/tests.rs b/codex-rs/app-server-protocol/src/protocol/v2/tests.rs index d17a3dd988..526cc22131 100644 --- a/codex-rs/app-server-protocol/src/protocol/v2/tests.rs +++ b/codex-rs/app-server-protocol/src/protocol/v2/tests.rs @@ -15,6 +15,7 @@ use codex_protocol::memory_citation::MemoryCitation as CoreMemoryCitation; use codex_protocol::memory_citation::MemoryCitationEntry as CoreMemoryCitationEntry; use codex_protocol::models::AdditionalPermissionProfile as CoreAdditionalPermissionProfile; use codex_protocol::models::FileSystemPermissions as CoreFileSystemPermissions; +use codex_protocol::models::ImageDetail; use codex_protocol::models::ManagedFileSystemPermissions as CoreManagedFileSystemPermissions; use codex_protocol::models::MessagePhase; use codex_protocol::models::NetworkPermissions as CoreNetworkPermissions; @@ -2319,9 +2320,11 @@ fn core_turn_item_into_thread_item_converts_supported_variants() { }, CoreUserInput::Image { image_url: "https://example.com/image.png".to_string(), + detail: Some(ImageDetail::Original), }, CoreUserInput::LocalImage { path: PathBuf::from("local/image.png"), + detail: Some(ImageDetail::Original), }, CoreUserInput::Skill { name: "skill-creator".to_string(), @@ -2345,9 +2348,11 @@ fn core_turn_item_into_thread_item_converts_supported_variants() { }, UserInput::Image { url: "https://example.com/image.png".to_string(), + detail: Some(ImageDetail::Original), }, UserInput::LocalImage { path: PathBuf::from("local/image.png"), + detail: Some(ImageDetail::Original), }, UserInput::Skill { name: "skill-creator".to_string(), @@ -2562,6 +2567,33 @@ fn core_turn_item_into_thread_item_converts_supported_variants() { ); } +#[test] +fn user_input_into_core_preserves_image_detail() { + assert_eq!( + UserInput::Image { + url: "https://example.com/image.png".to_string(), + detail: Some(ImageDetail::Original), + } + .into_core(), + CoreUserInput::Image { + image_url: "https://example.com/image.png".to_string(), + detail: Some(ImageDetail::Original), + } + ); + + assert_eq!( + UserInput::LocalImage { + path: PathBuf::from("local/image.png"), + detail: Some(ImageDetail::Original), + } + .into_core(), + CoreUserInput::LocalImage { + path: PathBuf::from("local/image.png"), + detail: Some(ImageDetail::Original), + } + ); +} + #[test] fn skills_list_params_serialization_uses_force_reload() { assert_eq!( diff --git a/codex-rs/app-server-protocol/src/protocol/v2/turn.rs b/codex-rs/app-server-protocol/src/protocol/v2/turn.rs index 8f4cd04e2d..8781dd2bd5 100644 --- a/codex-rs/app-server-protocol/src/protocol/v2/turn.rs +++ b/codex-rs/app-server-protocol/src/protocol/v2/turn.rs @@ -7,6 +7,7 @@ use codex_experimental_api_macros::ExperimentalApi; use codex_protocol::config_types::CollaborationMode; use codex_protocol::config_types::Personality; use codex_protocol::config_types::ReasoningSummary; +use codex_protocol::models::ImageDetail; use codex_protocol::openai_models::ReasoningEffort; use codex_protocol::plan_tool::PlanItemArg as CorePlanItemArg; use codex_protocol::plan_tool::StepStatus as CorePlanStepStatus; @@ -249,9 +250,15 @@ pub enum UserInput { text_elements: Vec, }, Image { + #[serde(default)] + #[ts(optional)] + detail: Option, url: String, }, LocalImage { + #[serde(default)] + #[ts(optional)] + detail: Option, path: PathBuf, }, Skill { @@ -274,8 +281,11 @@ impl UserInput { text, text_elements: text_elements.into_iter().map(Into::into).collect(), }, - UserInput::Image { url } => CoreUserInput::Image { image_url: url }, - UserInput::LocalImage { path } => CoreUserInput::LocalImage { path }, + UserInput::Image { url, detail } => CoreUserInput::Image { + image_url: url, + detail, + }, + UserInput::LocalImage { path, detail } => CoreUserInput::LocalImage { path, detail }, UserInput::Skill { name, path } => CoreUserInput::Skill { name, path }, UserInput::Mention { name, path } => CoreUserInput::Mention { name, path }, } @@ -292,8 +302,11 @@ impl From for UserInput { text, text_elements: text_elements.into_iter().map(Into::into).collect(), }, - CoreUserInput::Image { image_url } => UserInput::Image { url: image_url }, - CoreUserInput::LocalImage { path } => UserInput::LocalImage { path }, + CoreUserInput::Image { image_url, detail } => UserInput::Image { + url: image_url, + detail, + }, + CoreUserInput::LocalImage { path, detail } => UserInput::LocalImage { path, detail }, CoreUserInput::Skill { name, path } => UserInput::Skill { name, path }, CoreUserInput::Mention { name, path } => UserInput::Mention { name, path }, _ => unreachable!("unsupported user input variant"), diff --git a/codex-rs/app-server/src/bespoke_event_handling.rs b/codex-rs/app-server/src/bespoke_event_handling.rs index ddd7bf1a38..5b137c1091 100644 --- a/codex-rs/app-server/src/bespoke_event_handling.rs +++ b/codex-rs/app-server/src/bespoke_event_handling.rs @@ -2137,6 +2137,7 @@ mod tests { images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() })), RolloutItem::EventMsg(EventMsg::AgentMessage(AgentMessageEvent { message: "after rollback".to_string(), @@ -3210,6 +3211,7 @@ mod tests { images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() }), ); } diff --git a/codex-rs/app-server/src/request_processors/thread_processor_tests.rs b/codex-rs/app-server/src/request_processors/thread_processor_tests.rs index c2e6b9a55e..f9841421ca 100644 --- a/codex-rs/app-server/src/request_processors/thread_processor_tests.rs +++ b/codex-rs/app-server/src/request_processors/thread_processor_tests.rs @@ -214,6 +214,7 @@ mod thread_processor_behavior_tests { images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() }, ))]; let active_turn = Turn { diff --git a/codex-rs/app-server/src/request_processors/token_usage_replay.rs b/codex-rs/app-server/src/request_processors/token_usage_replay.rs index b19c4a61a0..b8c65645fc 100644 --- a/codex-rs/app-server/src/request_processors/token_usage_replay.rs +++ b/codex-rs/app-server/src/request_processors/token_usage_replay.rs @@ -152,6 +152,7 @@ mod tests { images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() })), RolloutItem::EventMsg(EventMsg::AgentMessage(AgentMessageEvent { message: "first answer".to_string(), @@ -167,6 +168,7 @@ mod tests { images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() })), ] } diff --git a/codex-rs/app-server/tests/suite/v2/thread_read.rs b/codex-rs/app-server/tests/suite/v2/thread_read.rs index fa143254a5..c8e3e179fb 100644 --- a/codex-rs/app-server/tests/suite/v2/thread_read.rs +++ b/codex-rs/app-server/tests/suite/v2/thread_read.rs @@ -1323,6 +1323,7 @@ fn store_history_items() -> Vec { images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() }, ))] } diff --git a/codex-rs/app-server/tests/suite/v2/turn_start.rs b/codex-rs/app-server/tests/suite/v2/turn_start.rs index afc37d73c4..844d9ab1bb 100644 --- a/codex-rs/app-server/tests/suite/v2/turn_start.rs +++ b/codex-rs/app-server/tests/suite/v2/turn_start.rs @@ -1,3 +1,4 @@ +use anyhow::Context; use anyhow::Result; use app_test_support::DEFAULT_CLIENT_NAME; use app_test_support::McpProcess; @@ -62,11 +63,13 @@ use codex_protocol::config_types::Personality; use codex_protocol::config_types::ReasoningSummary; use codex_protocol::config_types::Settings; use codex_protocol::models::BUILT_IN_PERMISSION_PROFILE_DANGER_FULL_ACCESS; +use codex_protocol::models::ImageDetail; use codex_protocol::openai_models::ReasoningEffort; use codex_protocol::user_input::MAX_USER_INPUT_TEXT_CHARS; use core_test_support::responses; use core_test_support::skip_if_no_network; use pretty_assertions::assert_eq; +use serde_json::Value; use serde_json::json; use std::collections::BTreeMap; use std::collections::HashMap; @@ -84,6 +87,11 @@ const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs const TEST_ORIGINATOR: &str = "codex_vscode"; const LOCAL_PRAGMATIC_TEMPLATE: &str = "You are a deeply pragmatic, effective software engineer."; const INVALID_REQUEST_ERROR_CODE: i64 = -32600; +const TINY_PNG_BYTES: &[u8] = &[ + 137, 80, 78, 71, 13, 10, 26, 10, 0, 0, 0, 13, 73, 72, 68, 82, 0, 0, 0, 1, 0, 0, 0, 1, 8, 6, 0, + 0, 0, 31, 21, 196, 137, 0, 0, 0, 11, 73, 68, 65, 84, 120, 156, 99, 96, 0, 2, 0, 0, 5, 0, 1, + 122, 94, 171, 63, 0, 0, 0, 0, 73, 69, 78, 68, 174, 66, 96, 130, +]; fn body_contains(req: &wiremock::Request, text: &str) -> bool { String::from_utf8(req.body.clone()) @@ -91,6 +99,107 @@ fn body_contains(req: &wiremock::Request, text: &str) -> bool { .is_some_and(|body| body.contains(text)) } +async fn run_local_image_turn(detail: Option) -> Result> { + // Two Codex turns hit the mock model (session start + turn/start). + let responses = vec![ + create_final_assistant_message_sse_response("Done")?, + create_final_assistant_message_sse_response("Done")?, + ]; + // Use the unchecked variant because the strict matcher does not currently + // cover image-bearing request payloads. + let server = create_mock_responses_server_sequence_unchecked(responses).await; + + let codex_home = TempDir::new()?; + create_config_toml( + codex_home.path(), + &server.uri(), + "never", + &BTreeMap::default(), + )?; + + let mut mcp = McpProcess::new(codex_home.path()).await?; + timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; + + let thread_req = mcp + .send_thread_start_request(ThreadStartParams { + model: Some("mock-model".to_string()), + ..Default::default() + }) + .await?; + let thread_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(thread_req)), + ) + .await??; + let ThreadStartResponse { thread, .. } = to_response::(thread_resp)?; + + let image_path = codex_home.path().join("image.png"); + std::fs::write(&image_path, TINY_PNG_BYTES)?; + + let turn_req = mcp + .send_turn_start_request(TurnStartParams { + thread_id: thread.id.clone(), + input: vec![V2UserInput::LocalImage { + path: image_path, + detail, + }], + ..Default::default() + }) + .await?; + let turn_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(turn_req)), + ) + .await??; + let TurnStartResponse { turn } = to_response::(turn_resp)?; + assert!(!turn.id.is_empty()); + + timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_notification_message("turn/completed"), + ) + .await??; + + received_response_input_images(&server).await +} + +async fn received_response_input_images(server: &wiremock::MockServer) -> Result> { + let requests = server + .received_requests() + .await + .context("failed to fetch received requests")?; + let mut input_images = Vec::new(); + + for request in requests { + if !request.url.path().ends_with("/responses") { + continue; + } + let body = request + .body_json::() + .context("request body should be JSON")?; + let Some(input) = body.get("input").and_then(Value::as_array) else { + continue; + }; + + for item in input { + if item.get("type").and_then(Value::as_str) != Some("message") { + continue; + } + let Some(content) = item.get("content").and_then(Value::as_array) else { + continue; + }; + input_images.extend( + content + .iter() + .filter(|span| span.get("type").and_then(Value::as_str) == Some("input_image")) + .cloned(), + ); + } + } + + Ok(input_images) +} + #[tokio::test] async fn turn_start_sends_originator_header() -> Result<()> { let responses = vec![create_final_assistant_message_sse_response("Done")?]; @@ -554,6 +663,7 @@ async fn turn_start_tracks_turn_event_analytics() -> Result<()> { thread_id: thread.id.clone(), input: vec![V2UserInput::Image { url: "https://example.com/a.png".to_string(), + detail: None, }], ..Default::default() }) @@ -1465,63 +1575,27 @@ async fn turn_start_uses_migrated_pragmatic_personality_without_override_v2() -> } #[tokio::test] -async fn turn_start_accepts_local_image_input() -> Result<()> { - // Two Codex turns hit the mock model (session start + turn/start). - let responses = vec![ - create_final_assistant_message_sse_response("Done")?, - create_final_assistant_message_sse_response("Done")?, - ]; - // Use the unchecked variant because the request payload includes a LocalImage - // which the strict matcher does not currently cover. - let server = create_mock_responses_server_sequence_unchecked(responses).await; +async fn turn_start_defaults_local_image_detail_to_high() -> Result<()> { + let input_images = run_local_image_turn(/*detail*/ None).await?; - let codex_home = TempDir::new()?; - create_config_toml( - codex_home.path(), - &server.uri(), - "never", - &BTreeMap::default(), - )?; + assert_eq!(input_images.len(), 1); + assert_eq!( + input_images[0].get("detail").and_then(Value::as_str), + Some("high") + ); - let mut mcp = McpProcess::new(codex_home.path()).await?; - timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; + Ok(()) +} - let thread_req = mcp - .send_thread_start_request(ThreadStartParams { - model: Some("mock-model".to_string()), - ..Default::default() - }) - .await?; - let thread_resp: JSONRPCResponse = timeout( - DEFAULT_READ_TIMEOUT, - mcp.read_stream_until_response_message(RequestId::Integer(thread_req)), - ) - .await??; - let ThreadStartResponse { thread, .. } = to_response::(thread_resp)?; +#[tokio::test] +async fn turn_start_forwards_custom_local_image_detail() -> Result<()> { + let input_images = run_local_image_turn(Some(ImageDetail::Original)).await?; - let image_path = codex_home.path().join("image.png"); - // No need to actually write the file; we just exercise the input path. - - let turn_req = mcp - .send_turn_start_request(TurnStartParams { - thread_id: thread.id.clone(), - input: vec![V2UserInput::LocalImage { path: image_path }], - ..Default::default() - }) - .await?; - let turn_resp: JSONRPCResponse = timeout( - DEFAULT_READ_TIMEOUT, - mcp.read_stream_until_response_message(RequestId::Integer(turn_req)), - ) - .await??; - let TurnStartResponse { turn } = to_response::(turn_resp)?; - assert!(!turn.id.is_empty()); - - timeout( - DEFAULT_READ_TIMEOUT, - mcp.read_stream_until_notification_message("turn/completed"), - ) - .await??; + assert_eq!(input_images.len(), 1); + assert_eq!( + input_images[0].get("detail").and_then(Value::as_str), + Some("original") + ); Ok(()) } diff --git a/codex-rs/cli/src/main.rs b/codex-rs/cli/src/main.rs index 87ac5d6676..7ffdff0011 100644 --- a/codex-rs/cli/src/main.rs +++ b/codex-rs/cli/src/main.rs @@ -1646,7 +1646,7 @@ async fn run_debug_prompt_input_command( .images .into_iter() .chain(cmd.images) - .map(|path| UserInput::LocalImage { path }) + .map(|path| UserInput::LocalImage { path, detail: None }) .collect::>(); if let Some(prompt) = cmd.prompt.or(interactive.prompt) { input.push(UserInput::Text { diff --git a/codex-rs/code-mode/src/description.rs b/codex-rs/code-mode/src/description.rs index 0c2813e51a..18d3c4555c 100644 --- a/codex-rs/code-mode/src/description.rs +++ b/codex-rs/code-mode/src/description.rs @@ -24,7 +24,7 @@ const EXEC_DESCRIPTION_TEMPLATE: &str = r#"Run JavaScript code to orchestrate/co - Global helpers: - `exit()`: Immediately ends the current script successfully (like an early return from the top level). - `text(value: string | number | boolean | undefined | null)`: Appends a text item. Non-string values are stringified with `JSON.stringify(...)` when possible. -- `image(imageUrlOrItem: string | { image_url: string; detail?: "auto" | "low" | "high" | "original" | null } | ImageContent, detail?: "auto" | "low" | "high" | "original" | null)`: Appends an image item. `image_url` can be an HTTPS URL or a base64-encoded `data:` URL. To forward an MCP tool image, pass an individual `ImageContent` block from `result.content`, for example `image(result.content[0])`. MCP image blocks may request detail with `_meta: { "codex/imageDetail": "original" }`. When provided, the second `detail` argument overrides any detail embedded in the first argument. +- `image(imageUrlOrItem: string | { image_url: string; detail?: "high" | "original" | null } | ImageContent, detail?: "high" | "original" | null)`: Appends an image item. `image_url` can be an HTTPS URL or a base64-encoded `data:` URL. To forward an MCP tool image, pass an individual `ImageContent` block from `result.content`, for example `image(result.content[0])`. MCP image blocks may request detail with `_meta: { "codex/imageDetail": "original" }`. When provided, the second `detail` argument overrides any detail embedded in the first argument. - `store(key: string, value: any)`: stores a serializable value under a string key for later `exec` calls in the same session. - `load(key: string)`: returns the stored value for a string key, or `undefined` if it is missing. - `notify(value: string | number | boolean | undefined | null)`: immediately injects an extra `custom_tool_call_output` for the current `exec` call. Values are stringified like `text(...)`. diff --git a/codex-rs/code-mode/src/response.rs b/codex-rs/code-mode/src/response.rs index 0ac3a03770..ae92639cc0 100644 --- a/codex-rs/code-mode/src/response.rs +++ b/codex-rs/code-mode/src/response.rs @@ -4,8 +4,6 @@ use serde::Serialize; #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] #[serde(rename_all = "lowercase")] pub enum ImageDetail { - Auto, - Low, High, Original, } diff --git a/codex-rs/code-mode/src/runtime/value.rs b/codex-rs/code-mode/src/runtime/value.rs index 8d76a832d3..865b5a569d 100644 --- a/codex-rs/code-mode/src/runtime/value.rs +++ b/codex-rs/code-mode/src/runtime/value.rs @@ -71,14 +71,10 @@ pub(super) fn normalize_output_image( Some(detail) => { let normalized = detail.to_ascii_lowercase(); Some(match normalized.as_str() { - "auto" => ImageDetail::Auto, - "low" => ImageDetail::Low, "high" => ImageDetail::High, "original" => ImageDetail::Original, _ => { - return Err( - "image detail must be one of: auto, low, high, original".to_string() - ); + return Err("image detail must be one of: high, original".to_string()); } }) } @@ -160,7 +156,7 @@ fn parse_mcp_output_image( .and_then(JsonValue::as_object) .and_then(|meta| meta.get(CODEX_IMAGE_DETAIL_META_KEY)) .and_then(JsonValue::as_str) - .filter(|detail| matches!(*detail, "auto" | "low" | "high" | "original")) + .filter(|detail| matches!(*detail, "high" | "original")) .map(str::to_string); Ok((image_url, detail)) } diff --git a/codex-rs/code-mode/src/service.rs b/codex-rs/code-mode/src/service.rs index 44e4be4939..1544f84f36 100644 --- a/codex-rs/code-mode/src/service.rs +++ b/codex-rs/code-mode/src/service.rs @@ -1308,7 +1308,7 @@ image({ image( { image_url: "https://example.com/image.jpg", - detail: "low", + detail: "high", }, "original", ); @@ -1348,7 +1348,7 @@ image( mimeType: "image/png", _meta: { "codex/imageDetail": "original" }, }, - "low", + "high", ); "# .to_string(), @@ -1364,7 +1364,7 @@ image( cell_id: "1".to_string(), content_items: vec![FunctionCallOutputContentItem::InputImage { image_url: "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==".to_string(), - detail: Some(crate::ImageDetail::Low), + detail: Some(crate::ImageDetail::High), }], stored_values: HashMap::new(), error_text: None, @@ -1372,6 +1372,36 @@ image( ); } + #[tokio::test] + async fn image_helper_rejects_unsupported_detail() { + let service = CodeModeService::new(); + + let response = service + .execute(ExecuteRequest { + source: r#" +image({ + image_url: "https://example.com/image.jpg", + detail: "low", +}); +"# + .to_string(), + yield_time_ms: None, + ..execute_request("") + }) + .await + .unwrap(); + + assert_eq!( + response, + RuntimeResponse::Result { + cell_id: "1".to_string(), + content_items: Vec::new(), + stored_values: HashMap::new(), + error_text: Some("image detail must be one of: high, original".to_string()), + } + ); + } + #[tokio::test] async fn image_helper_rejects_raw_mcp_result_container() { let service = CodeModeService::new(); diff --git a/codex-rs/core/src/agent/control.rs b/codex-rs/core/src/agent/control.rs index 329c6048b0..71bc026a13 100644 --- a/codex-rs/core/src/agent/control.rs +++ b/codex-rs/core/src/agent/control.rs @@ -1235,7 +1235,9 @@ pub(crate) fn render_input_preview(initial_operation: &Op) -> String { .map(|item| match item { UserInput::Text { text, .. } => text.clone(), UserInput::Image { .. } => "[image]".to_string(), - UserInput::LocalImage { path } => format!("[local_image:{}]", path.display()), + UserInput::LocalImage { path, .. } => { + format!("[local_image:{}]", path.display()) + } UserInput::Skill { name, path } => format!("[skill:${name}]({})", path.display()), UserInput::Mention { name, path } => format!("[mention:${name}]({path})"), _ => "[input]".to_string(), diff --git a/codex-rs/core/src/event_mapping.rs b/codex-rs/core/src/event_mapping.rs index e7c79e6dd2..d3cae7feed 100644 --- a/codex-rs/core/src/event_mapping.rs +++ b/codex-rs/core/src/event_mapping.rs @@ -90,9 +90,10 @@ fn parse_user_message(message: &[ContentItem]) -> Option { text_elements: Vec::new(), }); } - ContentItem::InputImage { image_url, .. } => { + ContentItem::InputImage { image_url, detail } => { content.push(UserInput::Image { image_url: image_url.clone(), + detail: *detail, }); } ContentItem::OutputText { text } => { diff --git a/codex-rs/core/src/event_mapping_tests.rs b/codex-rs/core/src/event_mapping_tests.rs index a70b2a69b0..c8311440eb 100644 --- a/codex-rs/core/src/event_mapping_tests.rs +++ b/codex-rs/core/src/event_mapping_tests.rs @@ -48,8 +48,14 @@ fn parses_user_message_with_text_and_two_images() { text: "Hello world".to_string(), text_elements: Vec::new(), }, - UserInput::Image { image_url: img1 }, - UserInput::Image { image_url: img2 }, + UserInput::Image { + image_url: img1, + detail: Some(DEFAULT_IMAGE_DETAIL), + }, + UserInput::Image { + image_url: img2, + detail: Some(DEFAULT_IMAGE_DETAIL), + }, ]; assert_eq!(user.content, expected_content); } @@ -87,7 +93,10 @@ fn skips_local_image_label_text() { match turn_item { TurnItem::UserMessage(user) => { let expected_content = vec![ - UserInput::Image { image_url }, + UserInput::Image { + image_url, + detail: Some(DEFAULT_IMAGE_DETAIL), + }, UserInput::Text { text: user_text, text_elements: Vec::new(), @@ -165,7 +174,10 @@ fn skips_unnamed_image_label_text() { match turn_item { TurnItem::UserMessage(user) => { let expected_content = vec![ - UserInput::Image { image_url }, + UserInput::Image { + image_url, + detail: Some(DEFAULT_IMAGE_DETAIL), + }, UserInput::Text { text: user_text, text_elements: Vec::new(), diff --git a/codex-rs/core/src/personality_migration_tests.rs b/codex-rs/core/src/personality_migration_tests.rs index 699e06fe67..b84d6dd3eb 100644 --- a/codex-rs/core/src/personality_migration_tests.rs +++ b/codex-rs/core/src/personality_migration_tests.rs @@ -72,6 +72,7 @@ async fn write_rollout_with_user_event(dir: &Path, thread_id: ThreadId) -> io::R images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() })), }; diff --git a/codex-rs/core/src/session/rollout_reconstruction_tests.rs b/codex-rs/core/src/session/rollout_reconstruction_tests.rs index 143b23d3a3..aa403316e0 100644 --- a/codex-rs/core/src/session/rollout_reconstruction_tests.rs +++ b/codex-rs/core/src/session/rollout_reconstruction_tests.rs @@ -143,6 +143,7 @@ async fn record_initial_history_resumed_hydrates_previous_turn_settings_from_lif images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() }, )), RolloutItem::TurnContext(previous_context_item), @@ -209,6 +210,7 @@ async fn reconstruct_history_rollback_keeps_history_and_metadata_in_sync_for_com images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() }, )), RolloutItem::TurnContext(first_context_item.clone()), @@ -237,6 +239,7 @@ async fn reconstruct_history_rollback_keeps_history_and_metadata_in_sync_for_com images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() }, )), RolloutItem::TurnContext(rolled_back_context_item), @@ -307,6 +310,7 @@ async fn reconstruct_history_rollback_keeps_history_and_metadata_in_sync_for_inc images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() }, )), RolloutItem::TurnContext(first_context_item.clone()), @@ -335,6 +339,7 @@ async fn reconstruct_history_rollback_keeps_history_and_metadata_in_sync_for_inc images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() }, )), RolloutItem::ResponseItem(turn_two_user), @@ -397,6 +402,7 @@ async fn reconstruct_history_rollback_skips_non_user_turns_for_history_and_metad images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() }, )), RolloutItem::TurnContext(first_context_item.clone()), @@ -425,6 +431,7 @@ async fn reconstruct_history_rollback_skips_non_user_turns_for_history_and_metad images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() }, )), RolloutItem::ResponseItem(turn_two_user), @@ -515,6 +522,7 @@ async fn reconstruct_history_rollback_counts_inter_agent_assistant_turns() { images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() }, )), RolloutItem::TurnContext(first_context_item.clone()), @@ -603,6 +611,7 @@ async fn reconstruct_history_rollback_clears_history_and_metadata_when_exceeding images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() }, )), RolloutItem::TurnContext(only_context_item), @@ -655,6 +664,7 @@ async fn record_initial_history_resumed_rollback_skips_only_user_turns() { images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() }, )), RolloutItem::TurnContext(previous_context_item), @@ -727,6 +737,7 @@ async fn record_initial_history_resumed_rollback_drops_incomplete_user_turn_comp images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() }, )), RolloutItem::TurnContext(previous_context_item.clone()), @@ -753,6 +764,7 @@ async fn record_initial_history_resumed_rollback_drops_incomplete_user_turn_comp images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() }, )), RolloutItem::Compacted(CompactedItem { @@ -884,6 +896,7 @@ async fn reconstruct_history_legacy_compaction_without_replacement_history_clear images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() }, )), RolloutItem::TurnContext(current_context_item), @@ -952,6 +965,7 @@ async fn record_initial_history_resumed_turn_context_after_compaction_reestablis images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() }, )), // Compaction clears baseline until a later TurnContextItem re-establishes it. @@ -1065,6 +1079,7 @@ async fn record_initial_history_resumed_aborted_turn_without_id_clears_active_tu images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() }, )), RolloutItem::TurnContext(previous_context_item), @@ -1091,6 +1106,7 @@ async fn record_initial_history_resumed_aborted_turn_without_id_clears_active_tu images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() }, )), RolloutItem::EventMsg(EventMsg::TurnAborted( @@ -1176,6 +1192,7 @@ async fn record_initial_history_resumed_unmatched_abort_preserves_active_turn_fo images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() }, )), RolloutItem::TurnContext(previous_context_item), @@ -1202,6 +1219,7 @@ async fn record_initial_history_resumed_unmatched_abort_preserves_active_turn_fo images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() }, )), RolloutItem::EventMsg(EventMsg::TurnAborted( @@ -1296,6 +1314,7 @@ async fn record_initial_history_resumed_trailing_incomplete_turn_compaction_clea images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() }, )), RolloutItem::TurnContext(previous_context_item), @@ -1322,6 +1341,7 @@ async fn record_initial_history_resumed_trailing_incomplete_turn_compaction_clea images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() }, )), RolloutItem::Compacted(CompactedItem { @@ -1372,6 +1392,7 @@ async fn record_initial_history_resumed_trailing_incomplete_turn_preserves_turn_ images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() }, )), RolloutItem::TurnContext(current_context_item.clone()), @@ -1450,6 +1471,7 @@ async fn record_initial_history_resumed_replaced_incomplete_compacted_turn_clear images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() }, )), RolloutItem::TurnContext(previous_context_item), @@ -1476,6 +1498,7 @@ async fn record_initial_history_resumed_replaced_incomplete_compacted_turn_clear images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() }, )), RolloutItem::Compacted(CompactedItem { diff --git a/codex-rs/core/src/session/tests.rs b/codex-rs/core/src/session/tests.rs index aa89efe1d4..cad702bca7 100644 --- a/codex-rs/core/src/session/tests.rs +++ b/codex-rs/core/src/session/tests.rs @@ -2364,6 +2364,7 @@ async fn record_initial_history_forked_hydrates_previous_turn_settings() { images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() }, )), RolloutItem::TurnContext(previous_context_item.clone()), @@ -2558,6 +2559,7 @@ async fn thread_rollback_recomputes_previous_turn_settings_and_reference_context images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() }, )), RolloutItem::TurnContext(first_context_item.clone()), @@ -2584,6 +2586,7 @@ async fn thread_rollback_recomputes_previous_turn_settings_and_reference_context images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() }, )), RolloutItem::TurnContext(rolled_back_context_item), @@ -2666,6 +2669,7 @@ async fn thread_rollback_restores_cleared_reference_context_item_after_compactio images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() })), RolloutItem::TurnContext(first_context_item.clone()), RolloutItem::ResponseItem(user_message("turn 1 user")), @@ -2709,6 +2713,7 @@ async fn thread_rollback_restores_cleared_reference_context_item_after_compactio images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() })), RolloutItem::TurnContext(TurnContextItem { turn_id: Some(rolled_back_turn_id.clone()), @@ -2763,6 +2768,7 @@ async fn thread_rollback_persists_marker_and_replays_cumulatively() { images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() })), RolloutItem::TurnContext(turn_context_item.clone()), RolloutItem::ResponseItem(user_message("turn 1 user")), @@ -2787,6 +2793,7 @@ async fn thread_rollback_persists_marker_and_replays_cumulatively() { images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() })), RolloutItem::TurnContext(turn_context_item.clone()), RolloutItem::ResponseItem(user_message("turn 2 user")), @@ -2811,6 +2818,7 @@ async fn thread_rollback_persists_marker_and_replays_cumulatively() { images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() })), RolloutItem::TurnContext(turn_context_item), RolloutItem::ResponseItem(user_message("turn 3 user")), @@ -7521,6 +7529,7 @@ async fn record_context_updates_and_set_reference_context_item_persists_full_rei images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() }, ))]) .await; @@ -7946,6 +7955,7 @@ async fn task_finish_emits_turn_item_lifecycle_for_leftover_pending_user_input() images, text_elements, local_images, + .. }) if message == "late pending input" && images == Some(Vec::new()) && text_elements.is_empty() diff --git a/codex-rs/core/src/thread_manager_tests.rs b/codex-rs/core/src/thread_manager_tests.rs index 9fedff9470..956486f182 100644 --- a/codex-rs/core/src/thread_manager_tests.rs +++ b/codex-rs/core/src/thread_manager_tests.rs @@ -1127,6 +1127,7 @@ fn completed_legacy_event_history_is_not_mid_turn() { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() })), RolloutItem::EventMsg(EventMsg::AgentMessage(AgentMessageEvent { message: "done".to_string(), @@ -1154,6 +1155,7 @@ fn mixed_response_and_legacy_user_event_history_is_mid_turn() { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() })), ]); diff --git a/codex-rs/core/src/tools/code_mode/response_adapter.rs b/codex-rs/core/src/tools/code_mode/response_adapter.rs index e20cf6a071..133d6e2fc1 100644 --- a/codex-rs/core/src/tools/code_mode/response_adapter.rs +++ b/codex-rs/core/src/tools/code_mode/response_adapter.rs @@ -17,8 +17,6 @@ impl IntoProtocol for CodeModeImageDetail { fn into_protocol(self) -> ImageDetail { let value = self; match value { - CodeModeImageDetail::Auto => ImageDetail::Auto, - CodeModeImageDetail::Low => ImageDetail::Low, CodeModeImageDetail::High => ImageDetail::High, CodeModeImageDetail::Original => ImageDetail::Original, } diff --git a/codex-rs/core/src/tools/handlers/view_image.rs b/codex-rs/core/src/tools/handlers/view_image.rs index 7ad6a79829..c3e6792c06 100644 --- a/codex-rs/core/src/tools/handlers/view_image.rs +++ b/codex-rs/core/src/tools/handlers/view_image.rs @@ -60,6 +60,7 @@ struct ViewImageArgs { #[derive(Clone, Copy, Eq, PartialEq)] enum ViewImageDetail { + High, Original, } @@ -114,16 +115,15 @@ impl ToolExecutor for ViewImageHandler { environment_id, detail, } = parse_arguments(&arguments)?; - // `view_image` accepts only its documented detail values: omit - // `detail` for the default path or set it to `original`. - // Other string values remain invalid rather than being silently - // reinterpreted. + // `high` is the explicit spelling of the default resized path. + // Other string values remain invalid rather than being silently reinterpreted. let detail = match detail.as_deref() { None => None, + Some("high") => Some(ViewImageDetail::High), Some("original") => Some(ViewImageDetail::Original), Some(detail) => { return Err(FunctionCallError::RespondToModel(format!( - "view_image.detail only supports `original`; omit `detail` for default resized behavior, got `{detail}`" + "view_image.detail only supports `high` or `original`; omit `detail` for default high resized behavior, got `{detail}`" ))); } }; @@ -175,11 +175,11 @@ impl ToolExecutor for ViewImageHandler { } else { PromptImageMode::ResizeToFit }; - let image_detail = Some(if use_original_detail { + let image_detail = if use_original_detail { ImageDetail::Original } else { DEFAULT_IMAGE_DETAIL - }); + }; let image = load_for_prompt_bytes(abs_path.as_path(), file_bytes, image_mode).map_err(|error| { @@ -208,7 +208,7 @@ impl CoreToolRuntime for ViewImageHandler {} pub struct ViewImageOutput { image_url: String, - image_detail: Option, + image_detail: ImageDetail, } impl ToolOutput for ViewImageOutput { @@ -224,7 +224,7 @@ impl ToolOutput for ViewImageOutput { let body = FunctionCallOutputBody::ContentItems(vec![FunctionCallOutputContentItem::InputImage { image_url: self.image_url.clone(), - detail: self.image_detail, + detail: Some(self.image_detail), }]); let output = FunctionCallOutputPayload { body, @@ -263,7 +263,7 @@ mod tests { fn code_mode_result_returns_image_url_object() { let output = ViewImageOutput { image_url: "data:image/png;base64,AAA".to_string(), - image_detail: Some(DEFAULT_IMAGE_DETAIL), + image_detail: DEFAULT_IMAGE_DETAIL, }; let result = output.code_mode_result(&ToolPayload::Function { @@ -317,4 +317,68 @@ mod tests { "{message}" ); } + + #[tokio::test] + async fn handle_rejects_unsupported_detail() { + let (session, turn) = make_session_and_context().await; + + let result = ViewImageHandler::default() + .handle(ToolInvocation { + session: Arc::new(session), + turn: Arc::new(turn), + cancellation_token: tokio_util::sync::CancellationToken::new(), + tracker: Arc::new(Mutex::new(TurnDiffTracker::new())), + call_id: "call-view-image".to_string(), + tool_name: codex_tools::ToolName::plain("view_image"), + source: ToolCallSource::Direct, + payload: ToolPayload::Function { + arguments: json!({ "path": "image.png", "detail": "low" }).to_string(), + }, + }) + .await; + + let Err(FunctionCallError::RespondToModel(message)) = result else { + panic!("expected unsupported detail error"); + }; + assert_eq!( + message, + "view_image.detail only supports `high` or `original`; omit `detail` for default high resized behavior, got `low`" + ); + } + + #[tokio::test(flavor = "multi_thread")] + async fn handle_accepts_explicit_high_detail() { + let (session, mut turn) = make_session_and_context().await; + let image_dir = tempfile::tempdir().expect("create image temp dir"); + let image_cwd = image_dir.abs(); + + turn.environments + .turn_environments + .first_mut() + .expect("default local turn environment") + .cwd = image_cwd.clone(); + let image_path = image_cwd.join("image.png"); + std::fs::write(image_path.as_path(), b"not a real image").expect("write test image"); + turn.permission_profile = PermissionProfile::Disabled; + + let result = ViewImageHandler::default() + .handle(ToolInvocation { + session: Arc::new(session), + turn: Arc::new(turn), + cancellation_token: tokio_util::sync::CancellationToken::new(), + tracker: Arc::new(Mutex::new(TurnDiffTracker::new())), + call_id: "call-view-image".to_string(), + tool_name: codex_tools::ToolName::plain("view_image"), + source: ToolCallSource::Direct, + payload: ToolPayload::Function { + arguments: json!({ "path": "image.png", "detail": "high" }).to_string(), + }, + }) + .await; + + let Err(FunctionCallError::RespondToModel(message)) = result else { + panic!("expected image processing error"); + }; + assert!(message.contains("unable to process image"), "{message}"); + } } diff --git a/codex-rs/core/src/tools/handlers/view_image_spec.rs b/codex-rs/core/src/tools/handlers/view_image_spec.rs index 7d1422a037..662cd639a6 100644 --- a/codex-rs/core/src/tools/handlers/view_image_spec.rs +++ b/codex-rs/core/src/tools/handlers/view_image_spec.rs @@ -20,9 +20,12 @@ pub fn create_view_image_tool(options: ViewImageToolOptions) -> ToolSpec { if options.can_request_original_image_detail { properties.insert( "detail".to_string(), - JsonSchema::string(Some( - "Optional detail override. The only supported value is `original`; omit this field for default resized behavior. Use `original` to preserve the file's original resolution instead of resizing to fit. This is important when high-fidelity image perception or precise localization is needed, especially for CUA agents.".to_string(), - )), + JsonSchema::string_enum( + vec![json!("high"), json!("original")], + Some( + "Optional detail override. Supported values are `high` and `original`; omit this field for default high resized behavior. Use `original` to preserve the file's original resolution instead of resizing to fit. This is important when high-fidelity image perception or precise localization is needed, especially for CUA agents.".to_string(), + ), + ), ); } if options.include_environment_id { @@ -55,8 +58,9 @@ fn view_image_output_schema() -> Value { "description": "Data URL for the loaded image." }, "detail": { - "type": ["string", "null"], - "description": "Image detail hint returned by view_image. Returns `original` when original resolution is preserved, otherwise `null`." + "type": "string", + "enum": ["high", "original"], + "description": "Image detail hint returned by view_image. Returns `high` for default resized behavior or `original` when original resolution is preserved." } }, "required": ["image_url", "detail"], diff --git a/codex-rs/core/src/tools/spec_plan_tests.rs b/codex-rs/core/src/tools/spec_plan_tests.rs index 063d75e6e8..333efe6f06 100644 --- a/codex-rs/core/src/tools/spec_plan_tests.rs +++ b/codex-rs/core/src/tools/spec_plan_tests.rs @@ -711,13 +711,14 @@ fn view_image_tool_includes_detail_with_original_detail_support() { }; let (properties, _) = expect_object_schema(parameters); assert!(properties.contains_key("detail")); - let description = expect_string_description( - properties - .get("detail") - .expect("view_image detail should include a description"), - ); - assert!(description.contains("only supported value is `original`")); - assert!(description.contains("omit this field for default resized behavior")); + let detail_schema = properties + .get("detail") + .expect("view_image detail should include a description"); + let description = expect_string_description(detail_schema); + let expected = vec![json!("high"), json!("original")]; + assert_eq!(detail_schema.enum_values.as_ref(), Some(&expected)); + assert!(description.contains("Supported values are `high` and `original`")); + assert!(description.contains("omit this field for default high resized behavior")); } #[test] @@ -2239,7 +2240,7 @@ fn code_mode_augments_builtin_tool_descriptions_with_typed_sample() { assert_eq!( description, - "View a local image from the filesystem (only use if given a full filepath by the user, and the image isn't already attached to the thread context within tags).\n\nexec tool declaration:\n```ts\ndeclare const tools: { view_image(args: {\n // Local filesystem path to an image file\n path: string;\n}): Promise<{\n // Image detail hint returned by view_image. Returns `original` when original resolution is preserved, otherwise `null`.\n detail: string | null;\n // Data URL for the loaded image.\n image_url: string;\n}>; };\n```" + "View a local image from the filesystem (only use if given a full filepath by the user, and the image isn't already attached to the thread context within tags).\n\nexec tool declaration:\n```ts\ndeclare const tools: { view_image(args: {\n // Local filesystem path to an image file\n path: string;\n}): Promise<{\n // Image detail hint returned by view_image. Returns `high` for default resized behavior or `original` when original resolution is preserved.\n detail: \"high\" | \"original\";\n // Data URL for the loaded image.\n image_url: string;\n}>; };\n```" ); } diff --git a/codex-rs/core/tests/suite/code_mode.rs b/codex-rs/core/tests/suite/code_mode.rs index ff7640f05e..f117631c4a 100644 --- a/codex-rs/core/tests/suite/code_mode.rs +++ b/codex-rs/core/tests/suite/code_mode.rs @@ -2466,7 +2466,7 @@ text(JSON.stringify(tool)); parsed, serde_json::json!({ "name": "view_image", - "description": "View a local image from the filesystem (only use if given a full filepath by the user, and the image isn't already attached to the thread context within tags).\n\nexec tool declaration:\n```ts\ndeclare const tools: { view_image(args: {\n // Local filesystem path to an image file\n path: string;\n}): Promise<{\n // Image detail hint returned by view_image. Returns `original` when original resolution is preserved, otherwise `null`.\n detail: string | null;\n // Data URL for the loaded image.\n image_url: string;\n}>; };\n```", + "description": "View a local image from the filesystem (only use if given a full filepath by the user, and the image isn't already attached to the thread context within tags).\n\nexec tool declaration:\n```ts\ndeclare const tools: { view_image(args: {\n // Local filesystem path to an image file\n path: string;\n}): Promise<{\n // Image detail hint returned by view_image. Returns `high` for default resized behavior or `original` when original resolution is preserved.\n detail: \"high\" | \"original\";\n // Data URL for the loaded image.\n image_url: string;\n}>; };\n```", }) ); diff --git a/codex-rs/core/tests/suite/compact.rs b/codex-rs/core/tests/suite/compact.rs index 68ddf1691b..be3423a85e 100644 --- a/codex-rs/core/tests/suite/compact.rs +++ b/codex-rs/core/tests/suite/compact.rs @@ -3272,6 +3272,7 @@ async fn snapshot_request_shape_pre_turn_compaction_including_incoming_user_mess items: vec![ UserInput::Image { image_url: image_url.clone(), + detail: None, }, UserInput::Text { text: "USER_THREE".to_string(), diff --git a/codex-rs/core/tests/suite/compact_remote.rs b/codex-rs/core/tests/suite/compact_remote.rs index 720ae10599..15197042fa 100644 --- a/codex-rs/core/tests/suite/compact_remote.rs +++ b/codex-rs/core/tests/suite/compact_remote.rs @@ -556,7 +556,10 @@ async fn assert_remote_manual_compact_request_parity( .submit(Op::UserInput { environments: None, items: vec![ - UserInput::Image { image_url }, + UserInput::Image { + image_url, + detail: None, + }, UserInput::Text { text: "TURN_FOUR_IMAGE_USER".to_string(), text_elements: Vec::new(), diff --git a/codex-rs/core/tests/suite/compact_remote_parity.rs b/codex-rs/core/tests/suite/compact_remote_parity.rs index 96616fa9cb..32c13b0b27 100644 --- a/codex-rs/core/tests/suite/compact_remote_parity.rs +++ b/codex-rs/core/tests/suite/compact_remote_parity.rs @@ -621,6 +621,7 @@ fn user_input_for_step(scenario_name: &str, idx: usize, step: Step) -> Vec anyhow::Resu items: vec![ UserInput::LocalImage { path: abs_path.clone(), + detail: None, }, UserInput::Text { text: "pasted image".to_string(), @@ -208,6 +209,7 @@ async fn drag_drop_image_persists_rollout_request_shape() -> anyhow::Result<()> items: vec![ UserInput::Image { image_url: image_url.clone(), + detail: None, }, UserInput::Text { text: "dropped image".to_string(), diff --git a/codex-rs/core/tests/suite/model_switching.rs b/codex-rs/core/tests/suite/model_switching.rs index 21ab43f10d..adce6e36eb 100644 --- a/codex-rs/core/tests/suite/model_switching.rs +++ b/codex-rs/core/tests/suite/model_switching.rs @@ -439,6 +439,7 @@ async fn model_change_from_image_to_text_strips_prior_image_content() -> Result< vec![ UserInput::Image { image_url: image_url.clone(), + detail: None, }, UserInput::Text { text: "first turn".to_string(), diff --git a/codex-rs/core/tests/suite/personality_migration.rs b/codex-rs/core/tests/suite/personality_migration.rs index 0b89a9cfba..5151d4e55d 100644 --- a/codex-rs/core/tests/suite/personality_migration.rs +++ b/codex-rs/core/tests/suite/personality_migration.rs @@ -88,6 +88,7 @@ async fn write_rollout_with_user_event(dir: &Path, thread_id: ThreadId) -> io::R images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() })), }; diff --git a/codex-rs/core/tests/suite/resume_warning.rs b/codex-rs/core/tests/suite/resume_warning.rs index cb545df351..ac916ba66d 100644 --- a/codex-rs/core/tests/suite/resume_warning.rs +++ b/codex-rs/core/tests/suite/resume_warning.rs @@ -64,6 +64,7 @@ fn resume_history( images: None, local_images: vec![], text_elements: vec![], + ..Default::default() })), RolloutItem::TurnContext(turn_ctx), RolloutItem::EventMsg(EventMsg::TurnComplete(TurnCompleteEvent { diff --git a/codex-rs/core/tests/suite/sqlite_state.rs b/codex-rs/core/tests/suite/sqlite_state.rs index 0cc7d07b16..ab0b4c27ea 100644 --- a/codex-rs/core/tests/suite/sqlite_state.rs +++ b/codex-rs/core/tests/suite/sqlite_state.rs @@ -168,6 +168,7 @@ async fn backfill_scans_existing_rollouts() -> Result<()> { images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() })), }, ]; diff --git a/codex-rs/core/tests/suite/view_image.rs b/codex-rs/core/tests/suite/view_image.rs index 5d5673b011..d78e8761fd 100644 --- a/codex-rs/core/tests/suite/view_image.rs +++ b/codex-rs/core/tests/suite/view_image.rs @@ -201,6 +201,7 @@ async fn assert_user_turn_local_image_resizes_to( &test, vec![UserInput::LocalImage { path: abs_path.clone(), + detail: None, }], session_model, )) @@ -814,7 +815,7 @@ async fn view_image_tool_errors_clearly_for_unsupported_detail_values() -> anyho .expect("output text present"); assert_eq!( output_text, - "view_image.detail only supports `original`; omit `detail` for default resized behavior, got `low`" + "view_image.detail only supports `high` or `original`; omit `detail` for default high resized behavior, got `low`" ); assert!( @@ -1487,6 +1488,7 @@ async fn replaces_invalid_local_image_after_bad_request() -> anyhow::Result<()> &test, vec![UserInput::LocalImage { path: abs_path.clone(), + detail: None, }], session_model, )) diff --git a/codex-rs/exec/src/lib.rs b/codex-rs/exec/src/lib.rs index 1004124ba5..3e2cf21051 100644 --- a/codex-rs/exec/src/lib.rs +++ b/codex-rs/exec/src/lib.rs @@ -637,7 +637,7 @@ async fn run_exec_session(args: ExecRunArgs) -> anyhow::Result<()> { let mut items: Vec = imgs .into_iter() .chain(args.images.iter().cloned()) - .map(|path| UserInput::LocalImage { path }) + .map(|path| UserInput::LocalImage { path, detail: None }) .collect(); items.push(UserInput::Text { text: prompt_text.clone(), @@ -657,7 +657,7 @@ async fn run_exec_session(args: ExecRunArgs) -> anyhow::Result<()> { let prompt_text = resolve_root_prompt(root_prompt); let mut items: Vec = imgs .into_iter() - .map(|path| UserInput::LocalImage { path }) + .map(|path| UserInput::LocalImage { path, detail: None }) .collect(); items.push(UserInput::Text { text: prompt_text.clone(), diff --git a/codex-rs/external-agent-sessions/src/export.rs b/codex-rs/external-agent-sessions/src/export.rs index 8f34f6ba5a..3682a4f7f8 100644 --- a/codex-rs/external-agent-sessions/src/export.rs +++ b/codex-rs/external-agent-sessions/src/export.rs @@ -81,6 +81,7 @@ fn rollout_items_from_messages(messages: &[ConversationMessage]) -> Vec Some(image_url.clone()), + UserInput::Image { image_url, .. } => Some(image_url.clone()), _ => None, }) .collect() } + pub fn image_details(&self) -> Vec> { + trim_trailing_default_image_details( + self.content + .iter() + .filter_map(|c| match c { + UserInput::Image { detail, .. } => Some(*detail), + _ => None, + }) + .collect(), + ) + } + pub fn local_image_paths(&self) -> Vec { self.content .iter() .filter_map(|c| match c { - UserInput::LocalImage { path } => Some(path.clone()), + UserInput::LocalImage { path, .. } => Some(path.clone()), _ => None, }) .collect() } + + pub fn local_image_details(&self) -> Vec> { + trim_trailing_default_image_details( + self.content + .iter() + .filter_map(|c| match c { + UserInput::LocalImage { detail, .. } => Some(*detail), + _ => None, + }) + .collect(), + ) + } +} + +fn trim_trailing_default_image_details( + mut details: Vec>, +) -> Vec> { + while matches!(details.last(), Some(None)) { + details.pop(); + } + details } impl HookPromptItem { diff --git a/codex-rs/protocol/src/models.rs b/codex-rs/protocol/src/models.rs index 2706a47c82..48a18a5cd9 100644 --- a/codex-rs/protocol/src/models.rs +++ b/codex-rs/protocol/src/models.rs @@ -721,8 +721,6 @@ pub enum ContentItem { #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, JsonSchema, TS)] #[serde(rename_all = "lowercase")] pub enum ImageDetail { - Auto, - Low, High, Original, } @@ -1064,8 +1062,13 @@ pub fn local_image_content_items_with_label_number( path: &std::path::Path, file_bytes: Vec, label_number: Option, - mode: PromptImageMode, + detail: ImageDetail, ) -> Vec { + let mode = match detail { + ImageDetail::Original => PromptImageMode::Original, + ImageDetail::High => PromptImageMode::ResizeToFit, + }; + match load_for_prompt_bytes(path, file_bytes, mode) { Ok(image) => { let mut items = Vec::with_capacity(3); @@ -1076,7 +1079,7 @@ pub fn local_image_content_items_with_label_number( } items.push(ContentItem::InputImage { image_url: image.into_data_url(), - detail: Some(DEFAULT_IMAGE_DETAIL), + detail: Some(detail), }); if label_number.is_some() { items.push(ContentItem::InputText { @@ -1221,29 +1224,31 @@ impl From> for ResponseInputItem { .into_iter() .flat_map(|c| match c { UserInput::Text { text, .. } => vec![ContentItem::InputText { text }], - UserInput::Image { image_url } => { + UserInput::Image { image_url, detail } => { image_index += 1; + let detail = detail.unwrap_or(DEFAULT_IMAGE_DETAIL); vec![ ContentItem::InputText { text: image_open_tag_text(), }, ContentItem::InputImage { image_url, - detail: Some(DEFAULT_IMAGE_DETAIL), + detail: Some(detail), }, ContentItem::InputText { text: image_close_tag_text(), }, ] } - UserInput::LocalImage { path } => { + UserInput::LocalImage { path, detail } => { image_index += 1; + let detail = detail.unwrap_or(DEFAULT_IMAGE_DETAIL); match std::fs::read(&path) { Ok(file_bytes) => local_image_content_items_with_label_number( &path, file_bytes, Some(image_index), - PromptImageMode::ResizeToFit, + detail, ), Err(err) => vec![local_image_error_placeholder(&path, err)], } @@ -1587,8 +1592,6 @@ fn convert_mcp_content_to_items( .and_then(|meta| meta.get(CODEX_IMAGE_DETAIL_META_KEY)) .and_then(serde_json::Value::as_str) .and_then(|detail| match detail { - "auto" => Some(ImageDetail::Auto), - "low" => Some(ImageDetail::Low), "high" => Some(ImageDetail::High), "original" => Some(ImageDetail::Original), _ => None, @@ -1633,6 +1636,14 @@ mod tests { use std::path::PathBuf; use tempfile::tempdir; + // A tiny valid PNG (1x1) so image conversion tests don't depend on cross-crate + // file paths, which break under Bazel sandboxing. + const TINY_PNG_BYTES: &[u8] = &[ + 137, 80, 78, 71, 13, 10, 26, 10, 0, 0, 0, 13, 73, 72, 68, 82, 0, 0, 0, 1, 0, 0, 0, 1, 8, 6, + 0, 0, 0, 31, 21, 196, 137, 0, 0, 0, 11, 73, 68, 65, 84, 120, 156, 99, 96, 0, 2, 0, 0, 5, 0, + 1, 122, 94, 171, 63, 0, 0, 0, 0, 73, 69, 78, 68, 174, 66, 96, 130, + ]; + #[test] fn response_input_message_conversion_preserves_phase() { let item = ResponseItem::from(ResponseInputItem::Message { @@ -2545,6 +2556,7 @@ mod tests { let item = ResponseInputItem::from(vec![UserInput::Image { image_url: image_url.clone(), + detail: None, }]); match item { @@ -2569,6 +2581,31 @@ mod tests { Ok(()) } + #[test] + fn image_user_input_preserves_requested_detail() -> Result<()> { + let image_url = "data:image/png;base64,abc".to_string(); + + let item = ResponseInputItem::from(vec![UserInput::Image { + image_url: image_url.clone(), + detail: Some(ImageDetail::Original), + }]); + + match item { + ResponseInputItem::Message { content, .. } => { + assert_eq!( + content.get(1), + Some(&ContentItem::InputImage { + image_url, + detail: Some(ImageDetail::Original), + }) + ); + } + other => panic!("expected message response but got {other:?}"), + } + + Ok(()) + } + #[test] fn tool_search_call_roundtrips() -> Result<()> { let parsed: ResponseItem = serde_json::from_str( @@ -2737,20 +2774,17 @@ mod tests { let image_url = "data:image/png;base64,abc".to_string(); let dir = tempdir()?; let local_path = dir.path().join("local.png"); - // A tiny valid PNG (1x1) so this test doesn't depend on cross-crate file paths, which - // break under Bazel sandboxing. - const TINY_PNG_BYTES: &[u8] = &[ - 137, 80, 78, 71, 13, 10, 26, 10, 0, 0, 0, 13, 73, 72, 68, 82, 0, 0, 0, 1, 0, 0, 0, 1, - 8, 6, 0, 0, 0, 31, 21, 196, 137, 0, 0, 0, 11, 73, 68, 65, 84, 120, 156, 99, 96, 0, 2, - 0, 0, 5, 0, 1, 122, 94, 171, 63, 0, 0, 0, 0, 73, 69, 78, 68, 174, 66, 96, 130, - ]; std::fs::write(&local_path, TINY_PNG_BYTES)?; let item = ResponseInputItem::from(vec![ UserInput::Image { image_url: image_url.clone(), + detail: None, + }, + UserInput::LocalImage { + path: local_path, + detail: None, }, - UserInput::LocalImage { path: local_path }, ]); match item { @@ -2797,6 +2831,33 @@ mod tests { Ok(()) } + #[test] + fn local_image_user_input_preserves_requested_detail() -> Result<()> { + let dir = tempdir()?; + let local_path = dir.path().join("local.png"); + std::fs::write(&local_path, TINY_PNG_BYTES)?; + + let item = ResponseInputItem::from(vec![UserInput::LocalImage { + path: local_path, + detail: Some(ImageDetail::Original), + }]); + + match item { + ResponseInputItem::Message { content, .. } => { + assert!(matches!( + content.get(1), + Some(ContentItem::InputImage { + detail: Some(ImageDetail::Original), + .. + }) + )); + } + other => panic!("expected message response but got {other:?}"), + } + + Ok(()) + } + #[test] fn local_image_read_error_adds_placeholder() -> Result<()> { let dir = tempdir()?; @@ -2804,6 +2865,7 @@ mod tests { let item = ResponseInputItem::from(vec![UserInput::LocalImage { path: missing_path.clone(), + detail: None, }]); match item { @@ -2838,6 +2900,7 @@ mod tests { let item = ResponseInputItem::from(vec![UserInput::LocalImage { path: json_path.clone(), + detail: None, }]); match item { @@ -2875,6 +2938,7 @@ mod tests { let item = ResponseInputItem::from(vec![UserInput::LocalImage { path: svg_path.clone(), + detail: None, }]); match item { diff --git a/codex-rs/protocol/src/protocol.rs b/codex-rs/protocol/src/protocol.rs index fac2616d04..94753780cb 100644 --- a/codex-rs/protocol/src/protocol.rs +++ b/codex-rs/protocol/src/protocol.rs @@ -34,6 +34,7 @@ use crate::memory_citation::MemoryCitation; use crate::models::ActivePermissionProfile; use crate::models::BaseInstructions; use crate::models::ContentItem; +use crate::models::ImageDetail; use crate::models::MessagePhase; use crate::models::PermissionProfile; use crate::models::ResponseInputItem; @@ -2225,7 +2226,7 @@ pub struct AgentMessageEvent { pub memory_citation: Option, } -#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)] +#[derive(Debug, Clone, Default, Deserialize, Serialize, JsonSchema, TS)] pub struct UserMessageEvent { pub message: String, /// Image URLs sourced from `UserInput::Image`. These are safe @@ -2233,11 +2234,19 @@ pub struct UserMessageEvent { /// the model. #[serde(skip_serializing_if = "Option::is_none")] pub images: Option>, + /// Detail hints for `images`, indexed in parallel. Missing entries imply + /// default image detail behavior. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub image_details: Vec>, /// Local file paths sourced from `UserInput::LocalImage`. These are kept so /// the UI can reattach images when editing history, and should not be sent /// to the model or treated as API-ready URLs. #[serde(default)] pub local_images: Vec, + /// Detail hints for `local_images`, indexed in parallel. Missing entries + /// imply default image detail behavior. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub local_image_details: Vec>, /// UI-defined spans within `message` used to render or persist special elements. #[serde(default)] pub text_elements: Vec, @@ -5133,6 +5142,7 @@ mod tests { images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() }; let json_event = serde_json::to_value(event)?; @@ -5148,6 +5158,62 @@ mod tests { Ok(()) } + #[test] + fn user_message_event_deserializes_without_image_detail_fields() -> Result<()> { + let event: UserMessageEvent = serde_json::from_value(json!({ + "message": "hello", + "images": ["https://example.com/image.png"], + "local_images": ["/tmp/local.png"], + "text_elements": [], + }))?; + + assert_eq!(event.message, "hello"); + assert_eq!( + event.images, + Some(vec!["https://example.com/image.png".to_string()]) + ); + assert_eq!(event.image_details, Vec::>::new()); + assert_eq!(event.local_images, vec![PathBuf::from("/tmp/local.png")]); + assert_eq!(event.local_image_details, Vec::>::new()); + assert_eq!(event.text_elements, Vec::new()); + + Ok(()) + } + + #[test] + fn user_message_item_legacy_event_preserves_image_details() { + let local_path = PathBuf::from("/tmp/local.png"); + let item = UserMessageItem::new(&[ + crate::user_input::UserInput::Image { + image_url: "https://example.com/first.png".to_string(), + detail: Some(ImageDetail::Original), + }, + crate::user_input::UserInput::Image { + image_url: "https://example.com/second.png".to_string(), + detail: None, + }, + crate::user_input::UserInput::LocalImage { + path: local_path.clone(), + detail: Some(ImageDetail::Original), + }, + ]); + + let EventMsg::UserMessage(event) = item.as_legacy_event() else { + panic!("expected user message event"); + }; + + assert_eq!( + event.images, + Some(vec![ + "https://example.com/first.png".to_string(), + "https://example.com/second.png".to_string(), + ]) + ); + assert_eq!(event.image_details, vec![Some(ImageDetail::Original)]); + assert_eq!(event.local_images, vec![local_path]); + assert_eq!(event.local_image_details, vec![Some(ImageDetail::Original)]); + } + #[test] fn turn_aborted_event_deserializes_without_turn_id() -> Result<()> { let event: EventMsg = serde_json::from_value(json!({ diff --git a/codex-rs/protocol/src/user_input.rs b/codex-rs/protocol/src/user_input.rs index 4ed112df8d..ce4cf99eba 100644 --- a/codex-rs/protocol/src/user_input.rs +++ b/codex-rs/protocol/src/user_input.rs @@ -3,6 +3,8 @@ use serde::Deserialize; use serde::Serialize; use ts_rs::TS; +use crate::models::ImageDetail; + /// Conservative cap so one user message cannot monopolize a large context window. pub const MAX_USER_INPUT_TEXT_CHARS: usize = 1 << 20; @@ -21,11 +23,21 @@ pub enum UserInput { text_elements: Vec, }, /// Pre‑encoded data: URI image. - Image { image_url: String }, + Image { + image_url: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + #[ts(optional)] + detail: Option, + }, /// Local image path provided by the user. This will be converted to an /// `Image` variant (base64 data URL) during request serialization. - LocalImage { path: std::path::PathBuf }, + LocalImage { + path: std::path::PathBuf, + #[serde(default, skip_serializing_if = "Option::is_none")] + #[ts(optional)] + detail: Option, + }, /// Skill selected by the user (name + path to SKILL.md). Skill { diff --git a/codex-rs/rollout/src/recorder_tests.rs b/codex-rs/rollout/src/recorder_tests.rs index ede7f720a9..8199f290d8 100644 --- a/codex-rs/rollout/src/recorder_tests.rs +++ b/codex-rs/rollout/src/recorder_tests.rs @@ -114,6 +114,7 @@ async fn state_db_init_backfills_before_returning() -> anyhow::Result<()> { images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() })), }, ]; @@ -404,6 +405,7 @@ async fn recorder_materializes_on_flush_with_pending_items() -> std::io::Result< images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() }, ))]) .await?; diff --git a/codex-rs/rollout/src/tests.rs b/codex-rs/rollout/src/tests.rs index 22f9a73a4e..bcd395d820 100644 --- a/codex-rs/rollout/src/tests.rs +++ b/codex-rs/rollout/src/tests.rs @@ -1404,6 +1404,7 @@ async fn test_updated_at_uses_file_mtime() -> Result<()> { images: None, text_elements: Vec::new(), local_images: Vec::new(), + ..Default::default() })), }; writeln!(file, "{}", serde_json::to_string(&user_event_line)?)?; diff --git a/codex-rs/state/src/extract.rs b/codex-rs/state/src/extract.rs index d815d444ce..886d267e4f 100644 --- a/codex-rs/state/src/extract.rs +++ b/codex-rs/state/src/extract.rs @@ -205,6 +205,7 @@ mod tests { images: Some(vec![]), local_images: vec![], text_elements: vec![], + ..Default::default() })); apply_rollout_item(&mut metadata, &item, "test-provider"); @@ -225,6 +226,7 @@ mod tests { images: Some(vec!["https://example.com/image.png".to_string()]), local_images: vec![], text_elements: vec![], + ..Default::default() })); apply_rollout_item(&mut metadata, &item, "test-provider"); @@ -248,6 +250,7 @@ mod tests { images: Some(vec![]), local_images: vec![], text_elements: vec![], + ..Default::default() })); apply_rollout_item(&mut metadata, &item, "test-provider"); @@ -287,6 +290,7 @@ mod tests { images: Some(vec![]), local_images: vec![], text_elements: vec![], + ..Default::default() })); apply_rollout_item(&mut metadata, &user_item, "test-provider"); diff --git a/codex-rs/thread-store/src/local/mod.rs b/codex-rs/thread-store/src/local/mod.rs index ab53e12bbe..f6a48a400b 100644 --- a/codex-rs/thread-store/src/local/mod.rs +++ b/codex-rs/thread-store/src/local/mod.rs @@ -1036,6 +1036,7 @@ mod tests { images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() })) } diff --git a/codex-rs/thread-store/src/thread_metadata_sync.rs b/codex-rs/thread-store/src/thread_metadata_sync.rs index 37afda8bc2..edc78a36c2 100644 --- a/codex-rs/thread-store/src/thread_metadata_sync.rs +++ b/codex-rs/thread-store/src/thread_metadata_sync.rs @@ -546,6 +546,7 @@ mod tests { images: None, local_images: Vec::new(), text_elements: Vec::new(), + ..Default::default() } } diff --git a/codex-rs/tools/src/image_detail.rs b/codex-rs/tools/src/image_detail.rs index 37086f691d..145cda663c 100644 --- a/codex-rs/tools/src/image_detail.rs +++ b/codex-rs/tools/src/image_detail.rs @@ -16,7 +16,7 @@ pub fn normalize_output_image_detail( Some(ImageDetail::Original) } Some(ImageDetail::Original) | None => None, - Some(ImageDetail::Auto | ImageDetail::Low | ImageDetail::High) => detail, + Some(ImageDetail::High) => Some(ImageDetail::High), } } diff --git a/codex-rs/tools/src/image_detail_tests.rs b/codex-rs/tools/src/image_detail_tests.rs index 393a962ac4..919537acf7 100644 --- a/codex-rs/tools/src/image_detail_tests.rs +++ b/codex-rs/tools/src/image_detail_tests.rs @@ -70,10 +70,6 @@ fn explicit_original_is_dropped_without_model_support() { fn explicit_non_original_detail_is_preserved() { let model_info = model_info(); - assert_eq!( - normalize_output_image_detail(&model_info, Some(ImageDetail::Low)), - Some(ImageDetail::Low) - ); assert_eq!( normalize_output_image_detail(&model_info, Some(ImageDetail::High)), Some(ImageDetail::High) @@ -92,7 +88,7 @@ fn sanitize_original_falls_back_to_high_without_support() { }, FunctionCallOutputContentItem::InputImage { image_url: "data:image/png;base64,BBB".to_string(), - detail: Some(ImageDetail::Low), + detail: Some(ImageDetail::High), }, ]; @@ -110,7 +106,7 @@ fn sanitize_original_falls_back_to_high_without_support() { }, FunctionCallOutputContentItem::InputImage { image_url: "data:image/png;base64,BBB".to_string(), - detail: Some(ImageDetail::Low), + detail: Some(ImageDetail::High), }, ] ); diff --git a/codex-rs/tui/src/app/tests.rs b/codex-rs/tui/src/app/tests.rs index bbc1802d17..c107bc777f 100644 --- a/codex-rs/tui/src/app/tests.rs +++ b/codex-rs/tui/src/app/tests.rs @@ -4727,7 +4727,7 @@ async fn backtrack_resubmit_preserves_data_image_urls_in_user_turn() { assert!(items.iter().any(|item| { matches!( item, - UserInput::Image { url } if url == &data_image_url + UserInput::Image { url, .. } if url == &data_image_url ) })); } diff --git a/codex-rs/tui/src/chatwidget/input_submission.rs b/codex-rs/tui/src/chatwidget/input_submission.rs index 6e9860bb77..2ead61f3dc 100644 --- a/codex-rs/tui/src/chatwidget/input_submission.rs +++ b/codex-rs/tui/src/chatwidget/input_submission.rs @@ -164,12 +164,14 @@ impl ChatWidget { for image_url in &remote_image_urls { items.push(UserInput::Image { url: image_url.clone(), + detail: None, }); } for image in &local_images { items.push(UserInput::LocalImage { path: image.path.clone(), + detail: None, }); } diff --git a/codex-rs/tui/src/chatwidget/tests/composer_submission.rs b/codex-rs/tui/src/chatwidget/tests/composer_submission.rs index 96477c8d38..3e8635c46a 100644 --- a/codex-rs/tui/src/chatwidget/tests/composer_submission.rs +++ b/codex-rs/tui/src/chatwidget/tests/composer_submission.rs @@ -58,7 +58,8 @@ async fn submission_preserves_text_elements_and_local_images() { assert_eq!( items[0], UserInput::LocalImage { - path: local_images[0].clone() + path: local_images[0].clone(), + detail: None, } ); assert_eq!( @@ -259,12 +260,14 @@ async fn submission_with_remote_and_local_images_keeps_local_placeholder_numberi items[0], UserInput::Image { url: remote_url.clone(), + detail: None, } ); assert_eq!( items[1], UserInput::LocalImage { path: local_images[0].clone(), + detail: None, } ); assert_eq!( @@ -342,6 +345,7 @@ async fn enter_with_only_remote_images_submits_user_turn() { items, vec![UserInput::Image { url: remote_url.clone(), + detail: None, }] ); assert_eq!(summary, None); @@ -1157,9 +1161,11 @@ fn user_message_display_from_inputs_matches_flattened_user_message_shape() { }, UserInput::Image { url: "https://example.com/remote.png".to_string(), + detail: None, }, UserInput::LocalImage { path: local_image.clone(), + detail: None, }, UserInput::Skill { name: "demo".to_string(), @@ -1232,6 +1238,7 @@ async fn committed_user_message_with_hidden_prompt_context_renders_local_images( }, UserInput::LocalImage { path: local_image.clone(), + detail: None, }, ], ); diff --git a/codex-rs/tui/src/chatwidget/tests/history_replay.rs b/codex-rs/tui/src/chatwidget/tests/history_replay.rs index 8282430306..1d1b19fdf5 100644 --- a/codex-rs/tui/src/chatwidget/tests/history_replay.rs +++ b/codex-rs/tui/src/chatwidget/tests/history_replay.rs @@ -119,6 +119,7 @@ async fn replayed_user_message_preserves_text_elements_and_local_images() { }, AppServerUserInput::LocalImage { path: local_images[0].clone(), + detail: None, }, ], ReplayKind::ResumeInitialMessages, @@ -188,6 +189,7 @@ async fn replayed_user_message_preserves_remote_image_urls() { }, AppServerUserInput::Image { url: remote_image_urls[0].clone(), + detail: None, }, ], ReplayKind::ResumeInitialMessages, @@ -441,6 +443,7 @@ async fn replayed_user_message_with_only_remote_images_renders_history_cell() { "user-1", vec![AppServerUserInput::Image { url: remote_image_urls[0].clone(), + detail: None, }], ReplayKind::ResumeInitialMessages, ); @@ -496,6 +499,7 @@ async fn replayed_user_message_with_only_local_images_renders_history_cell() { "user-1", vec![AppServerUserInput::LocalImage { path: local_images[0].clone(), + detail: None, }], ReplayKind::ResumeInitialMessages, ); diff --git a/codex-rs/tui/src/chatwidget/tests/review_mode.rs b/codex-rs/tui/src/chatwidget/tests/review_mode.rs index 8c16e0b929..48bc341f6b 100644 --- a/codex-rs/tui/src/chatwidget/tests/review_mode.rs +++ b/codex-rs/tui/src/chatwidget/tests/review_mode.rs @@ -597,6 +597,7 @@ async fn item_completed_pops_pending_steer_with_local_image_and_text_elements() vec![ UserInput::Image { url: "data:image/png;base64,placeholder".to_string(), + detail: None, }, UserInput::Text { text, diff --git a/codex-rs/tui/src/chatwidget/user_messages.rs b/codex-rs/tui/src/chatwidget/user_messages.rs index bf3f9e8fe6..cdcf650cb1 100644 --- a/codex-rs/tui/src/chatwidget/user_messages.rs +++ b/codex-rs/tui/src/chatwidget/user_messages.rs @@ -593,8 +593,8 @@ impl ChatWidget { ) }), ), - UserInput::Image { url } => remote_image_urls.push(url.clone()), - UserInput::LocalImage { path } => local_images.push(path.clone()), + UserInput::Image { url, .. } => remote_image_urls.push(url.clone()), + UserInput::LocalImage { path, .. } => local_images.push(path.clone()), UserInput::Skill { .. } | UserInput::Mention { .. } => {} } } diff --git a/codex-rs/tui/src/ide_context/prompt.rs b/codex-rs/tui/src/ide_context/prompt.rs index ec7e165ba8..884ebfb646 100644 --- a/codex-rs/tui/src/ide_context/prompt.rs +++ b/codex-rs/tui/src/ide_context/prompt.rs @@ -269,6 +269,7 @@ mod tests { let mut items = vec![ UserInput::LocalImage { path: PathBuf::from("/tmp/screenshot.png"), + detail: None, }, UserInput::Text { text, @@ -288,6 +289,7 @@ mod tests { vec![ UserInput::LocalImage { path: PathBuf::from("/tmp/screenshot.png"), + detail: None, }, UserInput::Text { text: format!("{expected_prefix}Ask $figma"),