mirror of
https://github.com/openai/codex.git
synced 2026-05-26 14:04:48 +00:00
## Summary Persist Stop-hook continuation prompts as `user` messages instead of hidden `developer` messages + some requested integration tests This is a followup to @pakrym 's comment in https://github.com/openai/codex/pull/14532 to make sure stop-block continuation prompts match training for turn loops - Stop continuation now writes `<hook_prompt hook_run_id="...">stop hook's user prompt<hook_prompt>` - Introduces quick-xml dependency, though we already indirectly depended on it anyway via syntect - This PR only has about 500 lines of actual logic changes, the rest is tests/schema ## Testing Example run (with a sessionstart hook and 3 stop hooks) - this shows context added by session start, then two stop hooks sending their own additional prompts in a new turn. The model responds with a single message addressing both. Then when that turn ends, the hooks detect that they just ran using `stop_hook_active` and decide not to infinite loop test files for this (unzip, move codex -> .codex): [codex.zip](https://github.com/user-attachments/files/26075806/codex.zip) ``` › cats • Running SessionStart hook: lighting the observatory SessionStart hook (completed) warning: Hi, I'm a session start hook for wizard-tower (startup). hook context: A wimboltine stonpet is an exotic cuisine from hyperspace • Cats are tiny zen wizards, my friend: equal parts nap, mystery, and chaos. If you want, we can talk cat facts, cat breeds, cat names, or build something cat-themed in this repo. • Running Stop hook: checking the tower wards • Running Stop hook: sacking the guards • Running Stop hook: hiring the guards Stop hook (completed) warning: Wizard Tower Stop hook reviewed the completed reply (177 chars). Stop hook (blocked) warning: Wizard Tower Stop hook continuing conversation feedback: cook the stonpet Stop hook (blocked) warning: Wizard Tower Stop hook continuing conversation feedback: eat the cooked stonpet • Stonpet’s cooked, aloha style: flash-seared over a blue quasiflame, glazed with nebula salt, and rested until the hyperspace juices settle. Now we eat with gratitude, my friend. One mindful bite in, and the flavor is pure cosmic surf: smoky, bright, and totally out of this dimension. • Running Stop hook: checking the tower wards • Running Stop hook: sacking the guards • Running Stop hook: hiring the guards Stop hook (completed) warning: Wizard Tower Stop hook reviewed the completed reply (285 chars). Stop hook (completed) warning: Wizard Tower Stop hook saw a second pass and stayed calm to avoid a loop. Stop hook (completed) warning: Wizard Tower Stop hook saw a second pass and stayed calm to avoid a loop. ```
301 lines
11 KiB
Rust
301 lines
11 KiB
Rust
use std::collections::HashSet;
|
|
use std::sync::Arc;
|
|
|
|
use crate::Prompt;
|
|
use crate::codex::Session;
|
|
use crate::codex::TurnContext;
|
|
use crate::codex::built_tools;
|
|
use crate::compact::InitialContextInjection;
|
|
use crate::compact::insert_initial_context_before_last_real_user_or_summary;
|
|
use crate::context_manager::ContextManager;
|
|
use crate::context_manager::TotalTokenUsageBreakdown;
|
|
use crate::context_manager::estimate_response_item_model_visible_bytes;
|
|
use crate::context_manager::is_codex_generated_item;
|
|
use crate::error::CodexErr;
|
|
use crate::error::Result as CodexResult;
|
|
use crate::protocol::CompactedItem;
|
|
use crate::protocol::EventMsg;
|
|
use crate::protocol::TurnStartedEvent;
|
|
use codex_protocol::items::ContextCompactionItem;
|
|
use codex_protocol::items::TurnItem;
|
|
use codex_protocol::models::BaseInstructions;
|
|
use codex_protocol::models::ResponseItem;
|
|
use futures::TryFutureExt;
|
|
use tokio_util::sync::CancellationToken;
|
|
use tracing::error;
|
|
use tracing::info;
|
|
|
|
pub(crate) async fn run_inline_remote_auto_compact_task(
|
|
sess: Arc<Session>,
|
|
turn_context: Arc<TurnContext>,
|
|
initial_context_injection: InitialContextInjection,
|
|
) -> CodexResult<()> {
|
|
run_remote_compact_task_inner(&sess, &turn_context, initial_context_injection).await?;
|
|
Ok(())
|
|
}
|
|
|
|
pub(crate) async fn run_remote_compact_task(
|
|
sess: Arc<Session>,
|
|
turn_context: Arc<TurnContext>,
|
|
) -> CodexResult<()> {
|
|
let start_event = EventMsg::TurnStarted(TurnStartedEvent {
|
|
turn_id: turn_context.sub_id.clone(),
|
|
model_context_window: turn_context.model_context_window(),
|
|
collaboration_mode_kind: turn_context.collaboration_mode.mode,
|
|
});
|
|
sess.send_event(&turn_context, start_event).await;
|
|
|
|
run_remote_compact_task_inner(&sess, &turn_context, InitialContextInjection::DoNotInject).await
|
|
}
|
|
|
|
async fn run_remote_compact_task_inner(
|
|
sess: &Arc<Session>,
|
|
turn_context: &Arc<TurnContext>,
|
|
initial_context_injection: InitialContextInjection,
|
|
) -> CodexResult<()> {
|
|
if let Err(err) =
|
|
run_remote_compact_task_inner_impl(sess, turn_context, initial_context_injection).await
|
|
{
|
|
let event = EventMsg::Error(
|
|
err.to_error_event(Some("Error running remote compact task".to_string())),
|
|
);
|
|
sess.send_event(turn_context, event).await;
|
|
return Err(err);
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
async fn run_remote_compact_task_inner_impl(
|
|
sess: &Arc<Session>,
|
|
turn_context: &Arc<TurnContext>,
|
|
initial_context_injection: InitialContextInjection,
|
|
) -> CodexResult<()> {
|
|
let compaction_item = TurnItem::ContextCompaction(ContextCompactionItem::new());
|
|
sess.emit_turn_item_started(turn_context, &compaction_item)
|
|
.await;
|
|
let mut history = sess.clone_history().await;
|
|
let base_instructions = sess.get_base_instructions().await;
|
|
let deleted_items = trim_function_call_history_to_fit_context_window(
|
|
&mut history,
|
|
turn_context.as_ref(),
|
|
&base_instructions,
|
|
);
|
|
if deleted_items > 0 {
|
|
info!(
|
|
turn_id = %turn_context.sub_id,
|
|
deleted_items,
|
|
"trimmed history items before remote compaction"
|
|
);
|
|
}
|
|
// Required to keep `/undo` available after compaction
|
|
let ghost_snapshots: Vec<ResponseItem> = history
|
|
.raw_items()
|
|
.iter()
|
|
.filter(|item| matches!(item, ResponseItem::GhostSnapshot { .. }))
|
|
.cloned()
|
|
.collect();
|
|
|
|
let prompt_input = history.for_prompt(&turn_context.model_info.input_modalities);
|
|
let tool_router = built_tools(
|
|
sess.as_ref(),
|
|
turn_context.as_ref(),
|
|
&prompt_input,
|
|
&HashSet::new(),
|
|
/*skills_outcome*/ None,
|
|
&CancellationToken::new(),
|
|
)
|
|
.await?;
|
|
let prompt = Prompt {
|
|
input: prompt_input,
|
|
tools: tool_router.model_visible_specs(),
|
|
parallel_tool_calls: turn_context.model_info.supports_parallel_tool_calls,
|
|
base_instructions,
|
|
personality: turn_context.personality,
|
|
output_schema: None,
|
|
};
|
|
|
|
let mut new_history = sess
|
|
.services
|
|
.model_client
|
|
.compact_conversation_history(
|
|
&prompt,
|
|
&turn_context.model_info,
|
|
turn_context.reasoning_effort,
|
|
turn_context.reasoning_summary,
|
|
&turn_context.session_telemetry,
|
|
)
|
|
.or_else(|err| async {
|
|
let total_usage_breakdown = sess.get_total_token_usage_breakdown().await;
|
|
let compact_request_log_data =
|
|
build_compact_request_log_data(&prompt.input, &prompt.base_instructions.text);
|
|
log_remote_compact_failure(
|
|
turn_context,
|
|
&compact_request_log_data,
|
|
total_usage_breakdown,
|
|
&err,
|
|
);
|
|
Err(err)
|
|
})
|
|
.await?;
|
|
new_history = process_compacted_history(
|
|
sess.as_ref(),
|
|
turn_context.as_ref(),
|
|
new_history,
|
|
initial_context_injection,
|
|
)
|
|
.await;
|
|
|
|
if !ghost_snapshots.is_empty() {
|
|
new_history.extend(ghost_snapshots);
|
|
}
|
|
let reference_context_item = match initial_context_injection {
|
|
InitialContextInjection::DoNotInject => None,
|
|
InitialContextInjection::BeforeLastUserMessage => Some(turn_context.to_turn_context_item()),
|
|
};
|
|
let compacted_item = CompactedItem {
|
|
message: String::new(),
|
|
replacement_history: Some(new_history.clone()),
|
|
};
|
|
sess.replace_compacted_history(new_history, reference_context_item, compacted_item)
|
|
.await;
|
|
sess.recompute_token_usage(turn_context).await;
|
|
|
|
sess.emit_turn_item_completed(turn_context, compaction_item)
|
|
.await;
|
|
Ok(())
|
|
}
|
|
|
|
pub(crate) async fn process_compacted_history(
|
|
sess: &Session,
|
|
turn_context: &TurnContext,
|
|
mut compacted_history: Vec<ResponseItem>,
|
|
initial_context_injection: InitialContextInjection,
|
|
) -> Vec<ResponseItem> {
|
|
// Mid-turn compaction is the only path that must inject initial context above the last user
|
|
// message in the replacement history. Pre-turn compaction instead injects context after the
|
|
// compaction item, but mid-turn compaction keeps the compaction item last for model training.
|
|
let initial_context = if matches!(
|
|
initial_context_injection,
|
|
InitialContextInjection::BeforeLastUserMessage
|
|
) {
|
|
sess.build_initial_context(turn_context).await
|
|
} else {
|
|
Vec::new()
|
|
};
|
|
|
|
compacted_history.retain(should_keep_compacted_history_item);
|
|
insert_initial_context_before_last_real_user_or_summary(compacted_history, initial_context)
|
|
}
|
|
|
|
/// Returns whether an item from remote compaction output should be preserved.
|
|
///
|
|
/// Called while processing the model-provided compacted transcript, before we
|
|
/// append fresh canonical context from the current session.
|
|
///
|
|
/// We drop:
|
|
/// - `developer` messages because remote output can include stale/duplicated
|
|
/// instruction content.
|
|
/// - non-user-content `user` messages (session prefix/instruction wrappers),
|
|
/// while preserving real user messages and persisted hook prompts.
|
|
///
|
|
/// This intentionally keeps:
|
|
/// - `assistant` messages (future remote compaction models may emit them)
|
|
/// - `user`-role warnings and compaction-generated summary messages because
|
|
/// they parse as `TurnItem::UserMessage`.
|
|
fn should_keep_compacted_history_item(item: &ResponseItem) -> bool {
|
|
match item {
|
|
ResponseItem::Message { role, .. } if role == "developer" => false,
|
|
ResponseItem::Message { role, .. } if role == "user" => {
|
|
matches!(
|
|
crate::event_mapping::parse_turn_item(item),
|
|
Some(TurnItem::UserMessage(_) | TurnItem::HookPrompt(_))
|
|
)
|
|
}
|
|
ResponseItem::Message { role, .. } if role == "assistant" => true,
|
|
ResponseItem::Message { .. } => false,
|
|
ResponseItem::Compaction { .. } => true,
|
|
ResponseItem::Reasoning { .. }
|
|
| ResponseItem::LocalShellCall { .. }
|
|
| ResponseItem::FunctionCall { .. }
|
|
| ResponseItem::ToolSearchCall { .. }
|
|
| ResponseItem::FunctionCallOutput { .. }
|
|
| ResponseItem::ToolSearchOutput { .. }
|
|
| ResponseItem::CustomToolCall { .. }
|
|
| ResponseItem::CustomToolCallOutput { .. }
|
|
| ResponseItem::WebSearchCall { .. }
|
|
| ResponseItem::ImageGenerationCall { .. }
|
|
| ResponseItem::GhostSnapshot { .. }
|
|
| ResponseItem::Other => false,
|
|
}
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
struct CompactRequestLogData {
|
|
failing_compaction_request_model_visible_bytes: i64,
|
|
}
|
|
|
|
fn build_compact_request_log_data(
|
|
input: &[ResponseItem],
|
|
instructions: &str,
|
|
) -> CompactRequestLogData {
|
|
let failing_compaction_request_model_visible_bytes = input
|
|
.iter()
|
|
.map(estimate_response_item_model_visible_bytes)
|
|
.fold(
|
|
i64::try_from(instructions.len()).unwrap_or(i64::MAX),
|
|
i64::saturating_add,
|
|
);
|
|
|
|
CompactRequestLogData {
|
|
failing_compaction_request_model_visible_bytes,
|
|
}
|
|
}
|
|
|
|
fn log_remote_compact_failure(
|
|
turn_context: &TurnContext,
|
|
log_data: &CompactRequestLogData,
|
|
total_usage_breakdown: TotalTokenUsageBreakdown,
|
|
err: &CodexErr,
|
|
) {
|
|
error!(
|
|
turn_id = %turn_context.sub_id,
|
|
last_api_response_total_tokens = total_usage_breakdown.last_api_response_total_tokens,
|
|
all_history_items_model_visible_bytes = total_usage_breakdown.all_history_items_model_visible_bytes,
|
|
estimated_tokens_of_items_added_since_last_successful_api_response = total_usage_breakdown.estimated_tokens_of_items_added_since_last_successful_api_response,
|
|
estimated_bytes_of_items_added_since_last_successful_api_response = total_usage_breakdown.estimated_bytes_of_items_added_since_last_successful_api_response,
|
|
model_context_window_tokens = ?turn_context.model_context_window(),
|
|
failing_compaction_request_model_visible_bytes = log_data.failing_compaction_request_model_visible_bytes,
|
|
compact_error = %err,
|
|
"remote compaction failed"
|
|
);
|
|
}
|
|
|
|
fn trim_function_call_history_to_fit_context_window(
|
|
history: &mut ContextManager,
|
|
turn_context: &TurnContext,
|
|
base_instructions: &BaseInstructions,
|
|
) -> usize {
|
|
let mut deleted_items = 0usize;
|
|
let Some(context_window) = turn_context.model_context_window() else {
|
|
return deleted_items;
|
|
};
|
|
|
|
while history
|
|
.estimate_token_count_with_base_instructions(base_instructions)
|
|
.is_some_and(|estimated_tokens| estimated_tokens > context_window)
|
|
{
|
|
let Some(last_item) = history.raw_items().last() else {
|
|
break;
|
|
};
|
|
if !is_codex_generated_item(last_item) {
|
|
break;
|
|
}
|
|
if !history.remove_last_item() {
|
|
break;
|
|
}
|
|
deleted_items += 1;
|
|
}
|
|
|
|
deleted_items
|
|
}
|