mirror of
https://github.com/openai/codex.git
synced 2026-03-04 13:43:19 +00:00
Compare commits
2 Commits
fix/notify
...
ccy/input-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1eb12b30be | ||
|
|
0b996252d8 |
@@ -3791,6 +3791,25 @@
|
||||
],
|
||||
"type": "object"
|
||||
},
|
||||
"InputModality": {
|
||||
"description": "Canonical user-input modality tags advertised by a model.",
|
||||
"oneOf": [
|
||||
{
|
||||
"description": "Plain text turns and tool payloads.",
|
||||
"enum": [
|
||||
"text"
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"description": "Image attachments included in user turns.",
|
||||
"enum": [
|
||||
"image"
|
||||
],
|
||||
"type": "string"
|
||||
}
|
||||
]
|
||||
},
|
||||
"ItemCompletedNotification": {
|
||||
"properties": {
|
||||
"item": {
|
||||
@@ -5980,6 +5999,17 @@
|
||||
"description": "Version of the CLI that created the thread.",
|
||||
"type": "string"
|
||||
},
|
||||
"conversationModalities": {
|
||||
"default": null,
|
||||
"description": "Tri-state conversation modalities signal: - `None`: unknown / not yet determined - `Some([Text])`: known to be text-only - `Some([Text, Image])`: images are known to exist in context",
|
||||
"items": {
|
||||
"$ref": "#/definitions/InputModality"
|
||||
},
|
||||
"type": [
|
||||
"array",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"createdAt": {
|
||||
"description": "Unix timestamp (in seconds) when the thread was created.",
|
||||
"format": "int64",
|
||||
|
||||
@@ -14316,6 +14316,17 @@
|
||||
"description": "Version of the CLI that created the thread.",
|
||||
"type": "string"
|
||||
},
|
||||
"conversationModalities": {
|
||||
"default": null,
|
||||
"description": "Tri-state conversation modalities signal: - `None`: unknown / not yet determined - `Some([Text])`: known to be text-only - `Some([Text, Image])`: images are known to exist in context",
|
||||
"items": {
|
||||
"$ref": "#/definitions/v2/InputModality"
|
||||
},
|
||||
"type": [
|
||||
"array",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"createdAt": {
|
||||
"description": "Unix timestamp (in seconds) when the thread was created.",
|
||||
"format": "int64",
|
||||
|
||||
@@ -382,6 +382,25 @@
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"InputModality": {
|
||||
"description": "Canonical user-input modality tags advertised by a model.",
|
||||
"oneOf": [
|
||||
{
|
||||
"description": "Plain text turns and tool payloads.",
|
||||
"enum": [
|
||||
"text"
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"description": "Image attachments included in user turns.",
|
||||
"enum": [
|
||||
"image"
|
||||
],
|
||||
"type": "string"
|
||||
}
|
||||
]
|
||||
},
|
||||
"McpToolCallError": {
|
||||
"properties": {
|
||||
"message": {
|
||||
@@ -701,6 +720,17 @@
|
||||
"description": "Version of the CLI that created the thread.",
|
||||
"type": "string"
|
||||
},
|
||||
"conversationModalities": {
|
||||
"default": null,
|
||||
"description": "Tri-state conversation modalities signal: - `None`: unknown / not yet determined - `Some([Text])`: known to be text-only - `Some([Text, Image])`: images are known to exist in context",
|
||||
"items": {
|
||||
"$ref": "#/definitions/InputModality"
|
||||
},
|
||||
"type": [
|
||||
"array",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"createdAt": {
|
||||
"description": "Unix timestamp (in seconds) when the thread was created.",
|
||||
"format": "int64",
|
||||
|
||||
@@ -369,6 +369,25 @@
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"InputModality": {
|
||||
"description": "Canonical user-input modality tags advertised by a model.",
|
||||
"oneOf": [
|
||||
{
|
||||
"description": "Plain text turns and tool payloads.",
|
||||
"enum": [
|
||||
"text"
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"description": "Image attachments included in user turns.",
|
||||
"enum": [
|
||||
"image"
|
||||
],
|
||||
"type": "string"
|
||||
}
|
||||
]
|
||||
},
|
||||
"McpToolCallError": {
|
||||
"properties": {
|
||||
"message": {
|
||||
@@ -574,6 +593,17 @@
|
||||
"description": "Version of the CLI that created the thread.",
|
||||
"type": "string"
|
||||
},
|
||||
"conversationModalities": {
|
||||
"default": null,
|
||||
"description": "Tri-state conversation modalities signal: - `None`: unknown / not yet determined - `Some([Text])`: known to be text-only - `Some([Text, Image])`: images are known to exist in context",
|
||||
"items": {
|
||||
"$ref": "#/definitions/InputModality"
|
||||
},
|
||||
"type": [
|
||||
"array",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"createdAt": {
|
||||
"description": "Unix timestamp (in seconds) when the thread was created.",
|
||||
"format": "int64",
|
||||
|
||||
@@ -369,6 +369,25 @@
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"InputModality": {
|
||||
"description": "Canonical user-input modality tags advertised by a model.",
|
||||
"oneOf": [
|
||||
{
|
||||
"description": "Plain text turns and tool payloads.",
|
||||
"enum": [
|
||||
"text"
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"description": "Image attachments included in user turns.",
|
||||
"enum": [
|
||||
"image"
|
||||
],
|
||||
"type": "string"
|
||||
}
|
||||
]
|
||||
},
|
||||
"McpToolCallError": {
|
||||
"properties": {
|
||||
"message": {
|
||||
@@ -574,6 +593,17 @@
|
||||
"description": "Version of the CLI that created the thread.",
|
||||
"type": "string"
|
||||
},
|
||||
"conversationModalities": {
|
||||
"default": null,
|
||||
"description": "Tri-state conversation modalities signal: - `None`: unknown / not yet determined - `Some([Text])`: known to be text-only - `Some([Text, Image])`: images are known to exist in context",
|
||||
"items": {
|
||||
"$ref": "#/definitions/InputModality"
|
||||
},
|
||||
"type": [
|
||||
"array",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"createdAt": {
|
||||
"description": "Unix timestamp (in seconds) when the thread was created.",
|
||||
"format": "int64",
|
||||
|
||||
@@ -382,6 +382,25 @@
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"InputModality": {
|
||||
"description": "Canonical user-input modality tags advertised by a model.",
|
||||
"oneOf": [
|
||||
{
|
||||
"description": "Plain text turns and tool payloads.",
|
||||
"enum": [
|
||||
"text"
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"description": "Image attachments included in user turns.",
|
||||
"enum": [
|
||||
"image"
|
||||
],
|
||||
"type": "string"
|
||||
}
|
||||
]
|
||||
},
|
||||
"McpToolCallError": {
|
||||
"properties": {
|
||||
"message": {
|
||||
@@ -701,6 +720,17 @@
|
||||
"description": "Version of the CLI that created the thread.",
|
||||
"type": "string"
|
||||
},
|
||||
"conversationModalities": {
|
||||
"default": null,
|
||||
"description": "Tri-state conversation modalities signal: - `None`: unknown / not yet determined - `Some([Text])`: known to be text-only - `Some([Text, Image])`: images are known to exist in context",
|
||||
"items": {
|
||||
"$ref": "#/definitions/InputModality"
|
||||
},
|
||||
"type": [
|
||||
"array",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"createdAt": {
|
||||
"description": "Unix timestamp (in seconds) when the thread was created.",
|
||||
"format": "int64",
|
||||
|
||||
@@ -369,6 +369,25 @@
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"InputModality": {
|
||||
"description": "Canonical user-input modality tags advertised by a model.",
|
||||
"oneOf": [
|
||||
{
|
||||
"description": "Plain text turns and tool payloads.",
|
||||
"enum": [
|
||||
"text"
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"description": "Image attachments included in user turns.",
|
||||
"enum": [
|
||||
"image"
|
||||
],
|
||||
"type": "string"
|
||||
}
|
||||
]
|
||||
},
|
||||
"McpToolCallError": {
|
||||
"properties": {
|
||||
"message": {
|
||||
@@ -574,6 +593,17 @@
|
||||
"description": "Version of the CLI that created the thread.",
|
||||
"type": "string"
|
||||
},
|
||||
"conversationModalities": {
|
||||
"default": null,
|
||||
"description": "Tri-state conversation modalities signal: - `None`: unknown / not yet determined - `Some([Text])`: known to be text-only - `Some([Text, Image])`: images are known to exist in context",
|
||||
"items": {
|
||||
"$ref": "#/definitions/InputModality"
|
||||
},
|
||||
"type": [
|
||||
"array",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"createdAt": {
|
||||
"description": "Unix timestamp (in seconds) when the thread was created.",
|
||||
"format": "int64",
|
||||
|
||||
@@ -382,6 +382,25 @@
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"InputModality": {
|
||||
"description": "Canonical user-input modality tags advertised by a model.",
|
||||
"oneOf": [
|
||||
{
|
||||
"description": "Plain text turns and tool payloads.",
|
||||
"enum": [
|
||||
"text"
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"description": "Image attachments included in user turns.",
|
||||
"enum": [
|
||||
"image"
|
||||
],
|
||||
"type": "string"
|
||||
}
|
||||
]
|
||||
},
|
||||
"McpToolCallError": {
|
||||
"properties": {
|
||||
"message": {
|
||||
@@ -701,6 +720,17 @@
|
||||
"description": "Version of the CLI that created the thread.",
|
||||
"type": "string"
|
||||
},
|
||||
"conversationModalities": {
|
||||
"default": null,
|
||||
"description": "Tri-state conversation modalities signal: - `None`: unknown / not yet determined - `Some([Text])`: known to be text-only - `Some([Text, Image])`: images are known to exist in context",
|
||||
"items": {
|
||||
"$ref": "#/definitions/InputModality"
|
||||
},
|
||||
"type": [
|
||||
"array",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"createdAt": {
|
||||
"description": "Unix timestamp (in seconds) when the thread was created.",
|
||||
"format": "int64",
|
||||
|
||||
@@ -369,6 +369,25 @@
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"InputModality": {
|
||||
"description": "Canonical user-input modality tags advertised by a model.",
|
||||
"oneOf": [
|
||||
{
|
||||
"description": "Plain text turns and tool payloads.",
|
||||
"enum": [
|
||||
"text"
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"description": "Image attachments included in user turns.",
|
||||
"enum": [
|
||||
"image"
|
||||
],
|
||||
"type": "string"
|
||||
}
|
||||
]
|
||||
},
|
||||
"McpToolCallError": {
|
||||
"properties": {
|
||||
"message": {
|
||||
@@ -574,6 +593,17 @@
|
||||
"description": "Version of the CLI that created the thread.",
|
||||
"type": "string"
|
||||
},
|
||||
"conversationModalities": {
|
||||
"default": null,
|
||||
"description": "Tri-state conversation modalities signal: - `None`: unknown / not yet determined - `Some([Text])`: known to be text-only - `Some([Text, Image])`: images are known to exist in context",
|
||||
"items": {
|
||||
"$ref": "#/definitions/InputModality"
|
||||
},
|
||||
"type": [
|
||||
"array",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"createdAt": {
|
||||
"description": "Unix timestamp (in seconds) when the thread was created.",
|
||||
"format": "int64",
|
||||
|
||||
@@ -369,6 +369,25 @@
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"InputModality": {
|
||||
"description": "Canonical user-input modality tags advertised by a model.",
|
||||
"oneOf": [
|
||||
{
|
||||
"description": "Plain text turns and tool payloads.",
|
||||
"enum": [
|
||||
"text"
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"description": "Image attachments included in user turns.",
|
||||
"enum": [
|
||||
"image"
|
||||
],
|
||||
"type": "string"
|
||||
}
|
||||
]
|
||||
},
|
||||
"McpToolCallError": {
|
||||
"properties": {
|
||||
"message": {
|
||||
@@ -574,6 +593,17 @@
|
||||
"description": "Version of the CLI that created the thread.",
|
||||
"type": "string"
|
||||
},
|
||||
"conversationModalities": {
|
||||
"default": null,
|
||||
"description": "Tri-state conversation modalities signal: - `None`: unknown / not yet determined - `Some([Text])`: known to be text-only - `Some([Text, Image])`: images are known to exist in context",
|
||||
"items": {
|
||||
"$ref": "#/definitions/InputModality"
|
||||
},
|
||||
"type": [
|
||||
"array",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"createdAt": {
|
||||
"description": "Unix timestamp (in seconds) when the thread was created.",
|
||||
"format": "int64",
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
// GENERATED CODE! DO NOT MODIFY BY HAND!
|
||||
|
||||
// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
|
||||
import type { InputModality } from "../InputModality";
|
||||
import type { GitInfo } from "./GitInfo";
|
||||
import type { SessionSource } from "./SessionSource";
|
||||
import type { Turn } from "./Turn";
|
||||
@@ -10,6 +11,13 @@ export type Thread = { id: string,
|
||||
* Usually the first user message in the thread, if available.
|
||||
*/
|
||||
preview: string,
|
||||
/**
|
||||
* Tri-state conversation modalities signal:
|
||||
* - `None`: unknown / not yet determined
|
||||
* - `Some([Text])`: known to be text-only
|
||||
* - `Some([Text, Image])`: images are known to exist in context
|
||||
*/
|
||||
conversationModalities: Array<InputModality> | null,
|
||||
/**
|
||||
* Model provider used for this thread (for example, 'openai').
|
||||
*/
|
||||
|
||||
@@ -1942,6 +1942,12 @@ pub struct Thread {
|
||||
pub id: String,
|
||||
/// Usually the first user message in the thread, if available.
|
||||
pub preview: String,
|
||||
/// Tri-state conversation modalities signal:
|
||||
/// - `None`: unknown / not yet determined
|
||||
/// - `Some([Text])`: known to be text-only
|
||||
/// - `Some([Text, Image])`: images are known to exist in context
|
||||
#[serde(default)]
|
||||
pub conversation_modalities: Option<Vec<InputModality>>,
|
||||
/// Model provider used for this thread (for example, 'openai').
|
||||
pub model_provider: String,
|
||||
/// Unix timestamp (in seconds) when the thread was created.
|
||||
|
||||
@@ -5,6 +5,7 @@ use crate::codex_message_processor::TurnSummary;
|
||||
use crate::codex_message_processor::TurnSummaryStore;
|
||||
use crate::codex_message_processor::read_event_msgs_from_rollout;
|
||||
use crate::codex_message_processor::read_summary_from_rollout;
|
||||
use crate::codex_message_processor::resolve_conversation_modalities;
|
||||
use crate::codex_message_processor::summary_to_thread;
|
||||
use crate::error_code::INTERNAL_ERROR_CODE;
|
||||
use crate::error_code::INVALID_REQUEST_ERROR_CODE;
|
||||
@@ -46,6 +47,8 @@ use codex_app_server_protocol::PatchApplyStatus;
|
||||
use codex_app_server_protocol::PatchChangeKind as V2PatchChangeKind;
|
||||
use codex_app_server_protocol::PlanDeltaNotification;
|
||||
use codex_app_server_protocol::RawResponseItemCompletedNotification;
|
||||
use codex_protocol::openai_models::InputModality;
|
||||
use codex_protocol::openai_models::input_modalities_to_mask;
|
||||
use codex_app_server_protocol::ReasoningSummaryPartAddedNotification;
|
||||
use codex_app_server_protocol::ReasoningSummaryTextDeltaNotification;
|
||||
use codex_app_server_protocol::ReasoningTextDeltaNotification;
|
||||
@@ -100,6 +103,7 @@ use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::oneshot;
|
||||
use tracing::error;
|
||||
use tracing::warn;
|
||||
|
||||
type JsonValue = serde_json::Value;
|
||||
|
||||
@@ -852,6 +856,21 @@ pub(crate) async fn apply_bespoke_event_handling(
|
||||
.await;
|
||||
}
|
||||
EventMsg::RawResponseItem(raw_response_item_event) => {
|
||||
if raw_response_item_event.item.has_input_image() {
|
||||
let modalities = [InputModality::Text, InputModality::Image];
|
||||
if let Some(ctx) = conversation.state_db()
|
||||
&& let Err(err) = ctx
|
||||
.set_thread_conversation_modalities(
|
||||
conversation_id,
|
||||
input_modalities_to_mask(&modalities),
|
||||
)
|
||||
.await
|
||||
{
|
||||
warn!(
|
||||
"failed to persist conversation modalities for thread {conversation_id}: {err}"
|
||||
);
|
||||
}
|
||||
}
|
||||
maybe_emit_raw_response_item_completed(
|
||||
api_version,
|
||||
conversation_id,
|
||||
@@ -1100,7 +1119,14 @@ pub(crate) async fn apply_bespoke_event_handling(
|
||||
.await
|
||||
{
|
||||
Ok(summary) => {
|
||||
let mut thread = summary_to_thread(summary);
|
||||
let conversation_modalities = resolve_conversation_modalities(
|
||||
conversation.state_db().as_ref(),
|
||||
conversation_id,
|
||||
None,
|
||||
Some(rollout_path.as_path()),
|
||||
)
|
||||
.await;
|
||||
let mut thread = summary_to_thread(summary, conversation_modalities);
|
||||
match read_event_msgs_from_rollout(rollout_path.as_path()).await {
|
||||
Ok(events) => {
|
||||
thread.turns = build_turns_from_event_msgs(&events);
|
||||
|
||||
@@ -201,7 +201,7 @@ use codex_core::rollout_date_parts;
|
||||
use codex_core::sandboxing::SandboxPermissions;
|
||||
use codex_core::skills::remote::download_remote_skill;
|
||||
use codex_core::skills::remote::list_remote_skills;
|
||||
use codex_core::state_db::StateDbHandle;
|
||||
use codex_core::state_db::{StateDbHandle, get_state_db};
|
||||
use codex_core::state_db::open_if_present;
|
||||
use codex_core::token_data::parse_id_token;
|
||||
use codex_core::windows_sandbox::WindowsSandboxLevelExt;
|
||||
@@ -216,7 +216,11 @@ use codex_protocol::config_types::Personality;
|
||||
use codex_protocol::config_types::WindowsSandboxLevel;
|
||||
use codex_protocol::dynamic_tools::DynamicToolSpec as CoreDynamicToolSpec;
|
||||
use codex_protocol::items::TurnItem;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::openai_models::InputModality;
|
||||
use codex_protocol::openai_models::input_modalities_from_mask;
|
||||
use codex_protocol::openai_models::input_modalities_to_mask;
|
||||
use codex_protocol::protocol::AgentStatus;
|
||||
use codex_protocol::protocol::GitInfo as CoreGitInfo;
|
||||
use codex_protocol::protocol::McpAuthStatus as CoreMcpAuthStatus;
|
||||
@@ -1896,11 +1900,41 @@ impl CodexMessageProcessor {
|
||||
..
|
||||
} = new_conv;
|
||||
let config_snapshot = thread.config_snapshot().await;
|
||||
let thread = build_thread_from_snapshot(
|
||||
thread_id,
|
||||
&config_snapshot,
|
||||
session_configured.rollout_path.clone(),
|
||||
);
|
||||
let fallback_provider = self.config.model_provider_id.as_str();
|
||||
let state_db_ctx = get_state_db(&self.config, None).await;
|
||||
|
||||
// A bit hacky, but the summary contains a lot of useful information for the thread
|
||||
// that unfortunately does not get returned from thread_manager.start_thread().
|
||||
let thread = match session_configured.rollout_path.as_ref() {
|
||||
Some(rollout_path) => {
|
||||
match read_summary_from_rollout(rollout_path.as_path(), fallback_provider)
|
||||
.await
|
||||
{
|
||||
Ok(summary) => {
|
||||
let conversation_modalities = resolve_conversation_modalities(
|
||||
state_db_ctx.as_ref(),
|
||||
thread_id,
|
||||
None,
|
||||
Some(rollout_path.as_path()),
|
||||
)
|
||||
.await;
|
||||
summary_to_thread(summary, conversation_modalities)
|
||||
}
|
||||
Err(err) => {
|
||||
self.send_internal_error(
|
||||
request_id,
|
||||
format!(
|
||||
"failed to load rollout `{}` for thread {thread_id}: {err}",
|
||||
rollout_path.display()
|
||||
),
|
||||
)
|
||||
.await;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
None => build_ephemeral_thread(thread_id, &config_snapshot),
|
||||
};
|
||||
|
||||
let response = ThreadStartResponse {
|
||||
thread: thread.clone(),
|
||||
@@ -2211,7 +2245,7 @@ impl CodexMessageProcessor {
|
||||
message: format!("failed to update unarchived thread timestamp: {err}"),
|
||||
data: None,
|
||||
})?;
|
||||
if let Some(ctx) = state_db_ctx {
|
||||
if let Some(ctx) = state_db_ctx.as_ref() {
|
||||
let _ = ctx
|
||||
.mark_unarchived(thread_id, restored_path.as_path())
|
||||
.await;
|
||||
@@ -2224,7 +2258,14 @@ impl CodexMessageProcessor {
|
||||
message: format!("failed to read unarchived thread: {err}"),
|
||||
data: None,
|
||||
})?;
|
||||
Ok(summary_to_thread(summary))
|
||||
let conversation_modalities = resolve_conversation_modalities(
|
||||
state_db_ctx.as_ref(),
|
||||
summary.conversation_id,
|
||||
None,
|
||||
Some(summary.path.as_path()),
|
||||
)
|
||||
.await;
|
||||
Ok(summary_to_thread(summary, conversation_modalities))
|
||||
}
|
||||
.await;
|
||||
|
||||
@@ -2385,7 +2426,18 @@ impl CodexMessageProcessor {
|
||||
}
|
||||
};
|
||||
|
||||
let data = summaries.into_iter().map(summary_to_thread).collect();
|
||||
let state_db_ctx = get_state_db(&self.config, None).await;
|
||||
let mut data = Vec::with_capacity(summaries.len());
|
||||
for summary in summaries {
|
||||
let conversation_modalities = resolve_conversation_modalities(
|
||||
state_db_ctx.as_ref(),
|
||||
summary.conversation_id,
|
||||
None,
|
||||
Some(summary.path.as_path()),
|
||||
)
|
||||
.await;
|
||||
data.push(summary_to_thread(summary, conversation_modalities));
|
||||
}
|
||||
let response = ThreadListResponse { data, next_cursor };
|
||||
self.outgoing.send_response(request_id, response).await;
|
||||
}
|
||||
@@ -2499,12 +2551,30 @@ impl CodexMessageProcessor {
|
||||
return;
|
||||
}
|
||||
|
||||
let state_db_ctx = get_state_db(&self.config, None).await;
|
||||
|
||||
let mut thread = if let Some(summary) = db_summary {
|
||||
summary_to_thread(summary)
|
||||
let conversation_modalities = resolve_conversation_modalities(
|
||||
state_db_ctx.as_ref(),
|
||||
thread_uuid,
|
||||
None,
|
||||
rollout_path.as_deref(),
|
||||
)
|
||||
.await;
|
||||
summary_to_thread(summary, conversation_modalities)
|
||||
} else if let Some(rollout_path) = rollout_path.as_ref() {
|
||||
let fallback_provider = self.config.model_provider_id.as_str();
|
||||
match read_summary_from_rollout(rollout_path, fallback_provider).await {
|
||||
Ok(summary) => summary_to_thread(summary),
|
||||
Ok(summary) => {
|
||||
let conversation_modalities = resolve_conversation_modalities(
|
||||
state_db_ctx.as_ref(),
|
||||
thread_uuid,
|
||||
None,
|
||||
Some(rollout_path.as_path()),
|
||||
)
|
||||
.await;
|
||||
summary_to_thread(summary, conversation_modalities)
|
||||
}
|
||||
Err(err) => {
|
||||
self.send_internal_error(
|
||||
request_id,
|
||||
@@ -2539,7 +2609,17 @@ impl CodexMessageProcessor {
|
||||
if include_turns {
|
||||
rollout_path = loaded_rollout_path.clone();
|
||||
}
|
||||
build_thread_from_snapshot(thread_uuid, &config_snapshot, loaded_rollout_path)
|
||||
let mut thread = if loaded_rollout_path.is_some() {
|
||||
build_thread_from_snapshot(thread_uuid, &config_snapshot, loaded_rollout_path)
|
||||
} else {
|
||||
build_ephemeral_thread(thread_uuid, &config_snapshot)
|
||||
};
|
||||
if let Some(modalities) =
|
||||
fetch_state_db_conversation_modalities(state_db_ctx.as_ref(), thread_uuid).await
|
||||
{
|
||||
thread.conversation_modalities = Some(modalities);
|
||||
}
|
||||
thread
|
||||
};
|
||||
|
||||
if include_turns && let Some(rollout_path) = rollout_path.as_ref() {
|
||||
@@ -2726,6 +2806,7 @@ impl CodexMessageProcessor {
|
||||
};
|
||||
|
||||
let fallback_model_provider = config.model_provider_id.clone();
|
||||
let state_db_ctx = get_state_db(&config, None).await;
|
||||
|
||||
match self
|
||||
.thread_manager
|
||||
@@ -2762,13 +2843,13 @@ impl CodexMessageProcessor {
|
||||
);
|
||||
}
|
||||
|
||||
let mut thread = match read_summary_from_rollout(
|
||||
let summary = match read_summary_from_rollout(
|
||||
rollout_path.as_path(),
|
||||
fallback_model_provider.as_str(),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(summary) => summary_to_thread(summary),
|
||||
Ok(summary) => summary,
|
||||
Err(err) => {
|
||||
self.send_internal_error(
|
||||
request_id,
|
||||
@@ -2781,6 +2862,14 @@ impl CodexMessageProcessor {
|
||||
return;
|
||||
}
|
||||
};
|
||||
let conversation_modalities = resolve_conversation_modalities(
|
||||
state_db_ctx.as_ref(),
|
||||
thread_id,
|
||||
None,
|
||||
Some(rollout_path.as_path()),
|
||||
)
|
||||
.await;
|
||||
let mut thread = summary_to_thread(summary, conversation_modalities);
|
||||
thread.turns = initial_messages
|
||||
.as_deref()
|
||||
.map_or_else(Vec::new, build_turns_from_event_msgs);
|
||||
@@ -2970,13 +3059,13 @@ impl CodexMessageProcessor {
|
||||
);
|
||||
}
|
||||
|
||||
let mut thread = match read_summary_from_rollout(
|
||||
let summary = match read_summary_from_rollout(
|
||||
rollout_path.as_path(),
|
||||
fallback_model_provider.as_str(),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(summary) => summary_to_thread(summary),
|
||||
Ok(summary) => summary,
|
||||
Err(err) => {
|
||||
self.send_internal_error(
|
||||
request_id,
|
||||
@@ -2989,6 +3078,15 @@ impl CodexMessageProcessor {
|
||||
return;
|
||||
}
|
||||
};
|
||||
let state_db_ctx = get_state_db(&self.config, None).await;
|
||||
let conversation_modalities = resolve_conversation_modalities(
|
||||
state_db_ctx.as_ref(),
|
||||
thread_id,
|
||||
None,
|
||||
Some(rollout_path.as_path()),
|
||||
)
|
||||
.await;
|
||||
let mut thread = summary_to_thread(summary, conversation_modalities);
|
||||
thread.turns = initial_messages
|
||||
.as_deref()
|
||||
.map_or_else(Vec::new, build_turns_from_event_msgs);
|
||||
@@ -4285,6 +4383,10 @@ impl CodexMessageProcessor {
|
||||
return;
|
||||
};
|
||||
|
||||
let has_image_input = items.iter().any(|item| {
|
||||
matches!(item, WireInputItem::Image { .. } | WireInputItem::LocalImage { .. })
|
||||
});
|
||||
|
||||
let mapped_items: Vec<CoreInputItem> = items
|
||||
.into_iter()
|
||||
.map(|item| match item {
|
||||
@@ -4300,6 +4402,16 @@ impl CodexMessageProcessor {
|
||||
})
|
||||
.collect();
|
||||
|
||||
if has_image_input {
|
||||
let modalities = conversation_modalities_for_has_image(true);
|
||||
persist_conversation_modalities(
|
||||
conversation.state_db().as_ref(),
|
||||
conversation_id,
|
||||
&modalities,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
// Submit user input to the conversation.
|
||||
let _ = conversation
|
||||
.submit(Op::UserInput {
|
||||
@@ -4337,6 +4449,10 @@ impl CodexMessageProcessor {
|
||||
return;
|
||||
};
|
||||
|
||||
let has_image_input = items.iter().any(|item| {
|
||||
matches!(item, WireInputItem::Image { .. } | WireInputItem::LocalImage { .. })
|
||||
});
|
||||
|
||||
let mapped_items: Vec<CoreInputItem> = items
|
||||
.into_iter()
|
||||
.map(|item| match item {
|
||||
@@ -4352,6 +4468,16 @@ impl CodexMessageProcessor {
|
||||
})
|
||||
.collect();
|
||||
|
||||
if has_image_input {
|
||||
let modalities = conversation_modalities_for_has_image(true);
|
||||
persist_conversation_modalities(
|
||||
conversation.state_db().as_ref(),
|
||||
conversation_id,
|
||||
&modalities,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
let _ = conversation
|
||||
.submit(Op::UserTurn {
|
||||
items: mapped_items,
|
||||
@@ -5001,7 +5127,15 @@ impl CodexMessageProcessor {
|
||||
if let Some(rollout_path) = review_thread.rollout_path() {
|
||||
match read_summary_from_rollout(rollout_path.as_path(), fallback_provider).await {
|
||||
Ok(summary) => {
|
||||
let thread = summary_to_thread(summary);
|
||||
let state_db_ctx = get_state_db(&self.config, None).await;
|
||||
let conversation_modalities = resolve_conversation_modalities(
|
||||
state_db_ctx.as_ref(),
|
||||
summary.conversation_id,
|
||||
None,
|
||||
Some(rollout_path.as_path()),
|
||||
)
|
||||
.await;
|
||||
let thread = summary_to_thread(summary, conversation_modalities);
|
||||
let notif = ThreadStartedNotification { thread };
|
||||
self.outgoing
|
||||
.send_server_notification(ServerNotification::ThreadStarted(notif))
|
||||
@@ -5795,7 +5929,6 @@ pub(crate) async fn read_summary_from_rollout(
|
||||
fallback_provider: &str,
|
||||
) -> std::io::Result<ConversationSummary> {
|
||||
let head = read_head_for_summary(path).await?;
|
||||
|
||||
let Some(first) = head.first() else {
|
||||
return Err(IoError::other(format!(
|
||||
"rollout at {} is empty",
|
||||
@@ -5876,6 +6009,143 @@ pub(crate) async fn read_event_msgs_from_rollout(
|
||||
.collect())
|
||||
}
|
||||
|
||||
async fn fetch_state_db_conversation_modalities(
|
||||
state_db_ctx: Option<&StateDbHandle>,
|
||||
thread_id: ThreadId,
|
||||
) -> Option<Vec<InputModality>> {
|
||||
let ctx = state_db_ctx?;
|
||||
match ctx.get_thread(thread_id).await {
|
||||
Ok(Some(metadata)) => metadata
|
||||
.conversation_modalities
|
||||
.map(input_modalities_from_mask),
|
||||
Ok(None) => None,
|
||||
Err(err) => {
|
||||
warn!("failed to read conversation modalities for thread {thread_id}: {err}");
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn persist_conversation_modalities(
|
||||
state_db_ctx: Option<&StateDbHandle>,
|
||||
thread_id: ThreadId,
|
||||
conversation_modalities: &[InputModality],
|
||||
) {
|
||||
let Some(ctx) = state_db_ctx else {
|
||||
return;
|
||||
};
|
||||
if let Err(err) = ctx
|
||||
.set_thread_conversation_modalities(
|
||||
thread_id,
|
||||
input_modalities_to_mask(conversation_modalities),
|
||||
)
|
||||
.await
|
||||
{
|
||||
warn!(
|
||||
"failed to persist conversation modalities for thread {thread_id}: {err}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn resolve_conversation_modalities(
|
||||
state_db_ctx: Option<&StateDbHandle>,
|
||||
thread_id: ThreadId,
|
||||
current_modalities: Option<Vec<InputModality>>,
|
||||
rollout_path: Option<&Path>,
|
||||
) -> Option<Vec<InputModality>> {
|
||||
if let Some(modalities) = current_modalities {
|
||||
if modalities.contains(&InputModality::Image) {
|
||||
persist_conversation_modalities(state_db_ctx, thread_id, &modalities).await;
|
||||
}
|
||||
return Some(modalities);
|
||||
}
|
||||
|
||||
if let Some(value) = fetch_state_db_conversation_modalities(state_db_ctx, thread_id).await {
|
||||
return Some(value);
|
||||
}
|
||||
|
||||
let Some(rollout_path) = rollout_path else {
|
||||
return None;
|
||||
};
|
||||
|
||||
match read_rollout_has_image_context(rollout_path).await {
|
||||
Ok(Some(has_image_context)) => {
|
||||
let modalities = conversation_modalities_for_has_image(has_image_context);
|
||||
persist_conversation_modalities(state_db_ctx, thread_id, &modalities).await;
|
||||
Some(modalities)
|
||||
}
|
||||
Ok(None) => None,
|
||||
Err(err) => {
|
||||
warn!(
|
||||
"failed to determine conversation modalities for rollout {}: {err}",
|
||||
rollout_path.display()
|
||||
);
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn read_rollout_has_image_context(
|
||||
path: &Path,
|
||||
) -> std::io::Result<Option<bool>> {
|
||||
let items = match RolloutRecorder::get_rollout_history(path).await? {
|
||||
InitialHistory::New => Vec::new(),
|
||||
InitialHistory::Forked(items) => items,
|
||||
InitialHistory::Resumed(resumed) => resumed.history,
|
||||
};
|
||||
|
||||
let mut saw_input = false;
|
||||
for item in items.into_iter().rev() {
|
||||
match item {
|
||||
RolloutItem::ResponseItem(response_item) => {
|
||||
if response_item.has_input_image() {
|
||||
return Ok(Some(true));
|
||||
}
|
||||
if response_item_has_input_text(&response_item) {
|
||||
saw_input = true;
|
||||
}
|
||||
}
|
||||
RolloutItem::EventMsg(EventMsg::UserMessage(user_message)) => {
|
||||
if user_message
|
||||
.images
|
||||
.as_ref()
|
||||
.is_some_and(|images| !images.is_empty())
|
||||
|| !user_message.local_images.is_empty()
|
||||
{
|
||||
return Ok(Some(true));
|
||||
}
|
||||
saw_input = true;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
if saw_input {
|
||||
Ok(Some(false))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
fn response_item_has_input_text(item: &ResponseItem) -> bool {
|
||||
match item {
|
||||
ResponseItem::Message { content, .. } => {
|
||||
content
|
||||
.iter()
|
||||
.any(|item| matches!(item, ContentItem::InputText { .. } | ContentItem::OutputText { .. }))
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn conversation_modalities_for_has_image(has_image_context: bool) -> Vec<InputModality> {
|
||||
if has_image_context {
|
||||
vec![InputModality::Text, InputModality::Image]
|
||||
} else {
|
||||
vec![InputModality::Text]
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_conversation_summary(
|
||||
path: PathBuf,
|
||||
head: &[serde_json::Value],
|
||||
@@ -5909,7 +6179,6 @@ fn extract_conversation_summary(
|
||||
.unwrap_or_else(|| fallback_provider.to_string());
|
||||
let git_info = git.map(map_git_info);
|
||||
let updated_at = updated_at.or_else(|| timestamp.clone());
|
||||
|
||||
Some(ConversationSummary {
|
||||
conversation_id,
|
||||
timestamp,
|
||||
@@ -5961,6 +6230,7 @@ fn build_thread_from_snapshot(
|
||||
Thread {
|
||||
id: thread_id.to_string(),
|
||||
preview: String::new(),
|
||||
conversation_modalities: None,
|
||||
model_provider: config_snapshot.model_provider_id.clone(),
|
||||
created_at: now,
|
||||
updated_at: now,
|
||||
@@ -5973,7 +6243,10 @@ fn build_thread_from_snapshot(
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn summary_to_thread(summary: ConversationSummary) -> Thread {
|
||||
pub(crate) fn summary_to_thread(
|
||||
summary: ConversationSummary,
|
||||
resolved_conversation_modalities: Option<Vec<InputModality>>,
|
||||
) -> Thread {
|
||||
let ConversationSummary {
|
||||
conversation_id,
|
||||
path,
|
||||
@@ -5994,10 +6267,12 @@ pub(crate) fn summary_to_thread(summary: ConversationSummary) -> Thread {
|
||||
branch: info.branch,
|
||||
origin_url: info.origin_url,
|
||||
});
|
||||
let conversation_modalities = resolved_conversation_modalities;
|
||||
|
||||
Thread {
|
||||
id: conversation_id.to_string(),
|
||||
preview,
|
||||
conversation_modalities,
|
||||
model_provider,
|
||||
created_at: created_at.map(|dt| dt.timestamp()).unwrap_or(0),
|
||||
updated_at: updated_at.map(|dt| dt.timestamp()).unwrap_or(0),
|
||||
@@ -6158,4 +6433,59 @@ mod tests {
|
||||
assert_eq!(summary, expected);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn read_rollout_has_image_context_detects_images() -> Result<()> {
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
use codex_protocol::protocol::RolloutItem;
|
||||
use codex_protocol::protocol::RolloutLine;
|
||||
use codex_protocol::protocol::SessionMetaLine;
|
||||
use codex_protocol::protocol::UserMessageEvent;
|
||||
use std::fs;
|
||||
|
||||
let temp_dir = TempDir::new()?;
|
||||
let path = temp_dir.path().join("rollout.jsonl");
|
||||
|
||||
let conversation_id = ThreadId::from_string("f3225d70-c282-4eaf-bb39-c474f8194bcb")?;
|
||||
let timestamp = "2025-09-06T10:10:10.000Z".to_string();
|
||||
|
||||
let session_meta = SessionMeta {
|
||||
id: conversation_id,
|
||||
timestamp: timestamp.clone(),
|
||||
model_provider: None,
|
||||
..SessionMeta::default()
|
||||
};
|
||||
|
||||
let lines = vec![
|
||||
RolloutLine {
|
||||
timestamp: timestamp.clone(),
|
||||
item: RolloutItem::SessionMeta(SessionMetaLine {
|
||||
meta: session_meta,
|
||||
git: None,
|
||||
}),
|
||||
},
|
||||
RolloutLine {
|
||||
timestamp: timestamp.clone(),
|
||||
item: RolloutItem::EventMsg(EventMsg::UserMessage(UserMessageEvent {
|
||||
message: "legacy image event".to_string(),
|
||||
images: Some(vec!["data:image/png;base64,abc123".to_string()]),
|
||||
local_images: Vec::new(),
|
||||
text_elements: Vec::new(),
|
||||
})),
|
||||
},
|
||||
];
|
||||
|
||||
let mut contents = String::new();
|
||||
for line in lines {
|
||||
contents.push_str(&serde_json::to_string(&line)?);
|
||||
contents.push('\n');
|
||||
}
|
||||
fs::write(&path, contents)?;
|
||||
|
||||
assert_eq!(
|
||||
read_rollout_has_image_context(path.as_path()).await?,
|
||||
Some(true)
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -222,8 +222,10 @@ use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
|
||||
use codex_protocol::config_types::WindowsSandboxLevel;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::DeveloperInstructions;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use codex_protocol::models::ResponseInputItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::openai_models::InputModality;
|
||||
use codex_protocol::openai_models::ReasoningEffort as ReasoningEffortConfig;
|
||||
use codex_protocol::protocol::CodexErrorInfo;
|
||||
use codex_protocol::protocol::InitialHistory;
|
||||
@@ -232,6 +234,8 @@ use codex_utils_readiness::Readiness;
|
||||
use codex_utils_readiness::ReadinessFlag;
|
||||
use tokio::sync::watch;
|
||||
|
||||
const BLIND_IMAGE_PLACEHOLDER_TEXT: &str = "Placeholder for what used to be an image.";
|
||||
|
||||
/// The high-level interface to the Codex system.
|
||||
/// It operates as a queue pair where you send submissions and receive events.
|
||||
pub struct Codex {
|
||||
@@ -3859,7 +3863,19 @@ pub(crate) async fn run_turn(
|
||||
}
|
||||
|
||||
// Construct the input that we will send to the model.
|
||||
let sampling_request_input: Vec<ResponseItem> = { sess.clone_history().await.for_prompt() };
|
||||
let mut sampling_request_input: Vec<ResponseItem> = {
|
||||
sess.clone_history().await.for_prompt()
|
||||
};
|
||||
if !turn_context
|
||||
.model_info
|
||||
.input_modalities
|
||||
.contains(&InputModality::Image)
|
||||
{
|
||||
let (sanitized, replaced) = substitute_blind_images(sampling_request_input);
|
||||
if replaced {
|
||||
sampling_request_input = sanitized;
|
||||
}
|
||||
}
|
||||
|
||||
let sampling_request_input_messages = sampling_request_input
|
||||
.iter()
|
||||
@@ -4070,6 +4086,46 @@ fn codex_apps_connector_id(tool: &crate::mcp_connection_manager::ToolInfo) -> Op
|
||||
tool.connector_id.as_deref()
|
||||
}
|
||||
|
||||
fn substitute_blind_images(mut items: Vec<ResponseItem>) -> (Vec<ResponseItem>, bool) {
|
||||
let mut replaced = false;
|
||||
for item in &mut items {
|
||||
match item {
|
||||
ResponseItem::Message { content, .. } => {
|
||||
let mut new_content = Vec::with_capacity(content.len());
|
||||
for content_item in content.drain(..) {
|
||||
match content_item {
|
||||
ContentItem::InputImage { .. } => {
|
||||
replaced = true;
|
||||
new_content.push(ContentItem::InputText {
|
||||
text: BLIND_IMAGE_PLACEHOLDER_TEXT.to_string(),
|
||||
});
|
||||
}
|
||||
other => new_content.push(other),
|
||||
}
|
||||
}
|
||||
*content = new_content;
|
||||
}
|
||||
ResponseItem::FunctionCallOutput { output, .. } => {
|
||||
if let Some(content_items) = output.content_items_mut() {
|
||||
for content_item in content_items.iter_mut() {
|
||||
if matches!(
|
||||
content_item,
|
||||
FunctionCallOutputContentItem::InputImage { .. }
|
||||
) {
|
||||
*content_item = FunctionCallOutputContentItem::InputText {
|
||||
text: BLIND_IMAGE_PLACEHOLDER_TEXT.to_string(),
|
||||
};
|
||||
replaced = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
(items, replaced)
|
||||
}
|
||||
|
||||
struct SamplingRequestToolSelection<'a> {
|
||||
explicit_app_paths: &'a [String],
|
||||
skill_name_counts_lower: &'a HashMap<String, usize>,
|
||||
|
||||
@@ -72,6 +72,12 @@ pub enum ContentItem {
|
||||
OutputText { text: String },
|
||||
}
|
||||
|
||||
impl ContentItem {
|
||||
pub fn is_input_image(&self) -> bool {
|
||||
matches!(self, ContentItem::InputImage { .. })
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
/// Classifies an assistant message as interim commentary or final answer text.
|
||||
@@ -195,6 +201,24 @@ pub enum ResponseItem {
|
||||
Other,
|
||||
}
|
||||
|
||||
impl ResponseItem {
|
||||
pub fn has_input_image(&self) -> bool {
|
||||
match self {
|
||||
ResponseItem::Message { content, .. } => {
|
||||
content.iter().any(ContentItem::is_input_image)
|
||||
}
|
||||
ResponseItem::FunctionCallOutput { output, .. } => {
|
||||
output.content_items().is_some_and(|items| {
|
||||
items
|
||||
.iter()
|
||||
.any(FunctionCallOutputContentItem::is_input_image)
|
||||
})
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub const BASE_INSTRUCTIONS_DEFAULT: &str = include_str!("prompts/base_instructions/default.md");
|
||||
|
||||
/// Base instructions for the model in a thread. Corresponds to the `instructions` field in the ResponsesAPI.
|
||||
@@ -795,6 +819,12 @@ pub enum FunctionCallOutputContentItem {
|
||||
InputImage { image_url: String },
|
||||
}
|
||||
|
||||
impl FunctionCallOutputContentItem {
|
||||
pub fn is_input_image(&self) -> bool {
|
||||
matches!(self, FunctionCallOutputContentItem::InputImage { .. })
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts structured function-call output content into plain text for
|
||||
/// human-readable surfaces.
|
||||
///
|
||||
|
||||
@@ -72,6 +72,35 @@ pub enum InputModality {
|
||||
Image,
|
||||
}
|
||||
|
||||
pub const INPUT_MODALITY_TEXT_MASK: i64 = 1;
|
||||
pub const INPUT_MODALITY_IMAGE_MASK: i64 = 1 << 1;
|
||||
|
||||
pub fn input_modality_to_mask(modality: InputModality) -> i64 {
|
||||
match modality {
|
||||
InputModality::Text => INPUT_MODALITY_TEXT_MASK,
|
||||
InputModality::Image => INPUT_MODALITY_IMAGE_MASK,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn input_modalities_to_mask(modalities: &[InputModality]) -> i64 {
|
||||
modalities
|
||||
.iter()
|
||||
.copied()
|
||||
.map(input_modality_to_mask)
|
||||
.fold(0, |mask, bit| mask | bit)
|
||||
}
|
||||
|
||||
pub fn input_modalities_from_mask(mask: i64) -> Vec<InputModality> {
|
||||
let mut modalities = Vec::new();
|
||||
if mask & INPUT_MODALITY_TEXT_MASK != 0 {
|
||||
modalities.push(InputModality::Text);
|
||||
}
|
||||
if mask & INPUT_MODALITY_IMAGE_MASK != 0 {
|
||||
modalities.push(InputModality::Image);
|
||||
}
|
||||
modalities
|
||||
}
|
||||
|
||||
/// Backward-compatible default when `input_modalities` is omitted on the wire.
|
||||
///
|
||||
/// Legacy payloads predate modality metadata, so we conservatively assume both text and images are
|
||||
|
||||
2
codex-rs/state/migrations/0009_thread_model_history.sql
Normal file
2
codex-rs/state/migrations/0009_thread_model_history.sql
Normal file
@@ -0,0 +1,2 @@
|
||||
ALTER TABLE threads ADD COLUMN initial_model TEXT;
|
||||
ALTER TABLE threads ADD COLUMN model_history TEXT;
|
||||
@@ -0,0 +1 @@
|
||||
ALTER TABLE threads ADD COLUMN conversation_modalities INTEGER;
|
||||
@@ -1,4 +1,5 @@
|
||||
use crate::model::ThreadMetadata;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
use codex_protocol::protocol::RolloutItem;
|
||||
@@ -10,6 +11,7 @@ use serde::Serialize;
|
||||
use serde_json::Value;
|
||||
|
||||
const IMAGE_ONLY_USER_MESSAGE_PLACEHOLDER: &str = "[Image]";
|
||||
const MODEL_HISTORY_LIMIT: usize = 10;
|
||||
|
||||
/// Apply a rollout item to the metadata structure.
|
||||
pub fn apply_rollout_item(
|
||||
@@ -57,6 +59,7 @@ fn apply_turn_context(metadata: &mut ThreadMetadata, turn_ctx: &TurnContextItem)
|
||||
metadata.cwd = turn_ctx.cwd.clone();
|
||||
metadata.sandbox_policy = enum_to_string(&turn_ctx.sandbox_policy);
|
||||
metadata.approval_mode = enum_to_string(&turn_ctx.approval_policy);
|
||||
update_model_history(metadata, turn_ctx.model.as_str());
|
||||
}
|
||||
|
||||
fn apply_event_msg(metadata: &mut ThreadMetadata, event: &EventMsg) {
|
||||
@@ -76,15 +79,69 @@ fn apply_event_msg(metadata: &mut ThreadMetadata, event: &EventMsg) {
|
||||
metadata.title = title.to_string();
|
||||
}
|
||||
}
|
||||
if user
|
||||
.images
|
||||
.as_ref()
|
||||
.is_some_and(|images| !images.is_empty())
|
||||
|| !user.local_images.is_empty()
|
||||
{
|
||||
metadata.conversation_modalities = Some(
|
||||
codex_protocol::openai_models::INPUT_MODALITY_TEXT_MASK
|
||||
| codex_protocol::openai_models::INPUT_MODALITY_IMAGE_MASK,
|
||||
);
|
||||
} else if metadata.conversation_modalities.is_none() {
|
||||
metadata.conversation_modalities =
|
||||
Some(codex_protocol::openai_models::INPUT_MODALITY_TEXT_MASK);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
fn apply_response_item(_metadata: &mut ThreadMetadata, _item: &ResponseItem) {
|
||||
fn apply_response_item(metadata: &mut ThreadMetadata, item: &ResponseItem) {
|
||||
if item.has_input_image() {
|
||||
metadata.conversation_modalities = Some(
|
||||
codex_protocol::openai_models::INPUT_MODALITY_TEXT_MASK
|
||||
| codex_protocol::openai_models::INPUT_MODALITY_IMAGE_MASK,
|
||||
);
|
||||
} else if metadata.conversation_modalities.is_none() && response_item_has_input_text(item) {
|
||||
metadata.conversation_modalities =
|
||||
Some(codex_protocol::openai_models::INPUT_MODALITY_TEXT_MASK);
|
||||
}
|
||||
// Title and first_user_message are derived from EventMsg::UserMessage only.
|
||||
}
|
||||
|
||||
fn response_item_has_input_text(item: &ResponseItem) -> bool {
|
||||
match item {
|
||||
ResponseItem::Message { content, .. } => {
|
||||
content
|
||||
.iter()
|
||||
.any(|item| matches!(item, ContentItem::InputText { .. } | ContentItem::OutputText { .. }))
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn update_model_history(metadata: &mut ThreadMetadata, model: &str) {
|
||||
if model.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
if metadata.initial_model.is_none() {
|
||||
metadata.initial_model = Some(model.to_string());
|
||||
}
|
||||
|
||||
let history = metadata.model_history.get_or_insert_with(Vec::new);
|
||||
if let Some(pos) = history.iter().position(|slug| slug == model) {
|
||||
history.remove(pos);
|
||||
}
|
||||
history.push(model.to_string());
|
||||
if history.len() > MODEL_HISTORY_LIMIT {
|
||||
let overflow = history.len() - MODEL_HISTORY_LIMIT;
|
||||
history.drain(0..overflow);
|
||||
}
|
||||
}
|
||||
|
||||
fn strip_user_message_prefix(text: &str) -> &str {
|
||||
match text.find(USER_MESSAGE_BEGIN) {
|
||||
Some(idx) => text[idx + USER_MESSAGE_BEGIN.len()..].trim(),
|
||||
@@ -224,6 +281,9 @@ mod tests {
|
||||
approval_mode: "on-request".to_string(),
|
||||
tokens_used: 1,
|
||||
first_user_message: None,
|
||||
conversation_modalities: None,
|
||||
initial_model: None,
|
||||
model_history: None,
|
||||
archived_at: None,
|
||||
git_sha: None,
|
||||
git_branch: None,
|
||||
|
||||
@@ -6,6 +6,7 @@ use codex_protocol::ThreadId;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use codex_protocol::protocol::SessionSource;
|
||||
use serde_json::Value;
|
||||
use sqlx::Row;
|
||||
use sqlx::sqlite::SqliteRow;
|
||||
use std::path::PathBuf;
|
||||
@@ -78,6 +79,12 @@ pub struct ThreadMetadata {
|
||||
pub tokens_used: i64,
|
||||
/// First user message observed for this thread, if any.
|
||||
pub first_user_message: Option<String>,
|
||||
/// Tri-state conversation modalities bitmask (NULL means unknown).
|
||||
pub conversation_modalities: Option<i64>,
|
||||
/// First model selected in this thread, if known.
|
||||
pub initial_model: Option<String>,
|
||||
/// Bounded history of model slugs in recency order (JSON array).
|
||||
pub model_history: Option<Vec<String>>,
|
||||
/// The archive timestamp, if the thread is archived.
|
||||
pub archived_at: Option<DateTime<Utc>>,
|
||||
/// The git commit SHA, if known.
|
||||
@@ -119,6 +126,12 @@ pub struct ThreadMetadataBuilder {
|
||||
pub git_branch: Option<String>,
|
||||
/// The git origin URL, if known.
|
||||
pub git_origin_url: Option<String>,
|
||||
/// Tri-state conversation modalities bitmask, if known.
|
||||
pub conversation_modalities: Option<i64>,
|
||||
/// First model selected in this thread, if known.
|
||||
pub initial_model: Option<String>,
|
||||
/// Bounded history of model slugs in recency order.
|
||||
pub model_history: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
impl ThreadMetadataBuilder {
|
||||
@@ -144,6 +157,9 @@ impl ThreadMetadataBuilder {
|
||||
git_sha: None,
|
||||
git_branch: None,
|
||||
git_origin_url: None,
|
||||
conversation_modalities: None,
|
||||
initial_model: None,
|
||||
model_history: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -174,6 +190,9 @@ impl ThreadMetadataBuilder {
|
||||
approval_mode,
|
||||
tokens_used: 0,
|
||||
first_user_message: None,
|
||||
conversation_modalities: self.conversation_modalities,
|
||||
initial_model: self.initial_model.clone(),
|
||||
model_history: self.model_history.clone(),
|
||||
archived_at: self.archived_at.map(canonicalize_datetime),
|
||||
git_sha: self.git_sha.clone(),
|
||||
git_branch: self.git_branch.clone(),
|
||||
@@ -225,6 +244,15 @@ impl ThreadMetadata {
|
||||
if self.first_user_message != other.first_user_message {
|
||||
diffs.push("first_user_message");
|
||||
}
|
||||
if self.conversation_modalities != other.conversation_modalities {
|
||||
diffs.push("conversation_modalities");
|
||||
}
|
||||
if self.initial_model != other.initial_model {
|
||||
diffs.push("initial_model");
|
||||
}
|
||||
if self.model_history != other.model_history {
|
||||
diffs.push("model_history");
|
||||
}
|
||||
if self.archived_at != other.archived_at {
|
||||
diffs.push("archived_at");
|
||||
}
|
||||
@@ -260,6 +288,9 @@ pub(crate) struct ThreadRow {
|
||||
approval_mode: String,
|
||||
tokens_used: i64,
|
||||
first_user_message: String,
|
||||
conversation_modalities: Option<i64>,
|
||||
initial_model: Option<String>,
|
||||
model_history: Option<Vec<String>>,
|
||||
archived_at: Option<i64>,
|
||||
git_sha: Option<String>,
|
||||
git_branch: Option<String>,
|
||||
@@ -282,6 +313,9 @@ impl ThreadRow {
|
||||
approval_mode: row.try_get("approval_mode")?,
|
||||
tokens_used: row.try_get("tokens_used")?,
|
||||
first_user_message: row.try_get("first_user_message")?,
|
||||
conversation_modalities: row.try_get("conversation_modalities")?,
|
||||
initial_model: row.try_get("initial_model")?,
|
||||
model_history: parse_model_history(row.try_get("model_history")?)?,
|
||||
archived_at: row.try_get("archived_at")?,
|
||||
git_sha: row.try_get("git_sha")?,
|
||||
git_branch: row.try_get("git_branch")?,
|
||||
@@ -308,6 +342,9 @@ impl TryFrom<ThreadRow> for ThreadMetadata {
|
||||
approval_mode,
|
||||
tokens_used,
|
||||
first_user_message,
|
||||
conversation_modalities,
|
||||
initial_model,
|
||||
model_history,
|
||||
archived_at,
|
||||
git_sha,
|
||||
git_branch,
|
||||
@@ -327,6 +364,9 @@ impl TryFrom<ThreadRow> for ThreadMetadata {
|
||||
approval_mode,
|
||||
tokens_used,
|
||||
first_user_message: (!first_user_message.is_empty()).then_some(first_user_message),
|
||||
conversation_modalities,
|
||||
initial_model,
|
||||
model_history,
|
||||
archived_at: archived_at.map(epoch_seconds_to_datetime).transpose()?,
|
||||
git_sha,
|
||||
git_branch,
|
||||
@@ -335,6 +375,26 @@ impl TryFrom<ThreadRow> for ThreadMetadata {
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_model_history(raw: Option<String>) -> Result<Option<Vec<String>>> {
|
||||
let Some(raw) = raw else {
|
||||
return Ok(None);
|
||||
};
|
||||
if raw.trim().is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
let value: Value = serde_json::from_str(&raw)?;
|
||||
match value {
|
||||
Value::Array(items) => Ok(Some(
|
||||
items
|
||||
.into_iter()
|
||||
.filter_map(|item| item.as_str().map(|text| text.to_string()))
|
||||
.collect(),
|
||||
)),
|
||||
Value::Null => Ok(None),
|
||||
_ => Err(anyhow::anyhow!("invalid model_history payload")),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn anchor_from_item(item: &ThreadMetadata, sort_key: SortKey) -> Option<Anchor> {
|
||||
let id = Uuid::parse_str(&item.id.to_string()).ok()?;
|
||||
let ts = match sort_key {
|
||||
|
||||
@@ -183,6 +183,9 @@ SELECT
|
||||
approval_mode,
|
||||
tokens_used,
|
||||
first_user_message,
|
||||
conversation_modalities,
|
||||
initial_model,
|
||||
model_history,
|
||||
archived_at,
|
||||
git_sha,
|
||||
git_branch,
|
||||
@@ -302,6 +305,9 @@ SELECT
|
||||
approval_mode,
|
||||
tokens_used,
|
||||
first_user_message,
|
||||
conversation_modalities,
|
||||
initial_model,
|
||||
model_history,
|
||||
archived_at,
|
||||
git_sha,
|
||||
git_branch,
|
||||
@@ -441,6 +447,11 @@ FROM threads
|
||||
|
||||
/// Insert or replace thread metadata directly.
|
||||
pub async fn upsert_thread(&self, metadata: &crate::ThreadMetadata) -> anyhow::Result<()> {
|
||||
let model_history = metadata
|
||||
.model_history
|
||||
.as_ref()
|
||||
.map(serde_json::to_string)
|
||||
.transpose()?;
|
||||
sqlx::query(
|
||||
r#"
|
||||
INSERT INTO threads (
|
||||
@@ -457,12 +468,15 @@ INSERT INTO threads (
|
||||
approval_mode,
|
||||
tokens_used,
|
||||
first_user_message,
|
||||
conversation_modalities,
|
||||
initial_model,
|
||||
model_history,
|
||||
archived,
|
||||
archived_at,
|
||||
git_sha,
|
||||
git_branch,
|
||||
git_origin_url
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT(id) DO UPDATE SET
|
||||
rollout_path = excluded.rollout_path,
|
||||
created_at = excluded.created_at,
|
||||
@@ -476,6 +490,9 @@ ON CONFLICT(id) DO UPDATE SET
|
||||
approval_mode = excluded.approval_mode,
|
||||
tokens_used = excluded.tokens_used,
|
||||
first_user_message = excluded.first_user_message,
|
||||
conversation_modalities = excluded.conversation_modalities,
|
||||
initial_model = excluded.initial_model,
|
||||
model_history = excluded.model_history,
|
||||
archived = excluded.archived,
|
||||
archived_at = excluded.archived_at,
|
||||
git_sha = excluded.git_sha,
|
||||
@@ -496,6 +513,9 @@ ON CONFLICT(id) DO UPDATE SET
|
||||
.bind(metadata.approval_mode.as_str())
|
||||
.bind(metadata.tokens_used)
|
||||
.bind(metadata.first_user_message.as_deref().unwrap_or_default())
|
||||
.bind(metadata.conversation_modalities)
|
||||
.bind(metadata.initial_model.as_deref())
|
||||
.bind(model_history)
|
||||
.bind(metadata.archived_at.is_some())
|
||||
.bind(metadata.archived_at.map(datetime_to_epoch_seconds))
|
||||
.bind(metadata.git_sha.as_deref())
|
||||
@@ -506,6 +526,26 @@ ON CONFLICT(id) DO UPDATE SET
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Update the persisted conversation modalities mask for a thread.
|
||||
pub async fn set_thread_conversation_modalities(
|
||||
&self,
|
||||
thread_id: ThreadId,
|
||||
conversation_modalities: i64,
|
||||
) -> anyhow::Result<()> {
|
||||
sqlx::query(
|
||||
r#"
|
||||
UPDATE threads
|
||||
SET conversation_modalities = ?
|
||||
WHERE id = ?
|
||||
"#,
|
||||
)
|
||||
.bind(conversation_modalities)
|
||||
.bind(thread_id.to_string())
|
||||
.execute(self.pool.as_ref())
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Insert or update memory summaries for a thread.
|
||||
///
|
||||
/// This method always advances `updated_at`, even if summaries are unchanged.
|
||||
@@ -1402,6 +1442,11 @@ mod tests {
|
||||
approval_mode: crate::extract::enum_to_string(&AskForApproval::OnRequest),
|
||||
tokens_used: 0,
|
||||
first_user_message: Some("hello".to_string()),
|
||||
conversation_modalities: Some(
|
||||
codex_protocol::openai_models::INPUT_MODALITY_TEXT_MASK,
|
||||
),
|
||||
initial_model: Some("gpt-5.1-codex".to_string()),
|
||||
model_history: Some(vec!["gpt-5.1-codex".to_string()]),
|
||||
archived_at: None,
|
||||
git_sha: None,
|
||||
git_branch: None,
|
||||
|
||||
@@ -3152,13 +3152,6 @@ impl ChatWidget {
|
||||
/// When the model does not advertise image support, we keep the draft unchanged and surface a
|
||||
/// warning event so users can switch models or remove attachments.
|
||||
pub(crate) fn attach_image(&mut self, path: PathBuf) {
|
||||
if !self.current_model_supports_images() {
|
||||
self.add_to_history(history_cell::new_warning_event(
|
||||
self.image_inputs_not_supported_message(),
|
||||
));
|
||||
self.request_redraw();
|
||||
return;
|
||||
}
|
||||
tracing::info!("attach_image path={path:?}");
|
||||
self.bottom_pane.attach_image(path);
|
||||
self.request_redraw();
|
||||
@@ -3646,16 +3639,6 @@ impl ChatWidget {
|
||||
if text.is_empty() && local_images.is_empty() {
|
||||
return;
|
||||
}
|
||||
if !local_images.is_empty() && !self.current_model_supports_images() {
|
||||
self.restore_blocked_image_submission(
|
||||
text,
|
||||
text_elements,
|
||||
local_images,
|
||||
mention_bindings,
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
let mut items: Vec<UserInput> = Vec::new();
|
||||
|
||||
// Special-case: "!cmd" executes a local shell command instead of sending to the model.
|
||||
@@ -6075,34 +6058,8 @@ impl ChatWidget {
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
/// Return whether the effective model currently advertises image-input support.
|
||||
///
|
||||
/// We intentionally default to `true` when model metadata cannot be read so transient catalog
|
||||
/// failures do not hard-block user input in the UI.
|
||||
fn current_model_supports_images(&self) -> bool {
|
||||
let model = self.current_model();
|
||||
self.models_manager
|
||||
.try_list_models(&self.config)
|
||||
.ok()
|
||||
.and_then(|models| {
|
||||
models
|
||||
.into_iter()
|
||||
.find(|preset| preset.model == model)
|
||||
.map(|preset| preset.input_modalities.contains(&InputModality::Image))
|
||||
})
|
||||
.unwrap_or(true)
|
||||
}
|
||||
|
||||
fn sync_image_paste_enabled(&mut self) {
|
||||
let enabled = self.current_model_supports_images();
|
||||
self.bottom_pane.set_image_paste_enabled(enabled);
|
||||
}
|
||||
|
||||
fn image_inputs_not_supported_message(&self) -> String {
|
||||
format!(
|
||||
"Model {} does not support image inputs. Remove images or switch models.",
|
||||
self.current_model()
|
||||
)
|
||||
self.bottom_pane.set_image_paste_enabled(true);
|
||||
}
|
||||
|
||||
#[allow(dead_code)] // Used in tests
|
||||
|
||||
Reference in New Issue
Block a user