Pass more params to compaction (#14247)

Pass more params to /compact. This should give us parity with the
/responses endpoint to improve caching.

I'm torn about the MCP await. Blocking will give us parity but it seems
like we explicitly don't block on MCPs. Happy either way
This commit is contained in:
Rasmus Rygaard
2026-03-10 16:39:57 -07:00
committed by GitHub
parent de2a73cd91
commit f8ef154a6b
5 changed files with 88 additions and 5 deletions

View File

@@ -21,6 +21,12 @@ pub struct CompactionInput<'a> {
pub model: &'a str,
pub input: &'a [ResponseItem],
pub instructions: &'a str,
pub tools: Vec<Value>,
pub parallel_tool_calls: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub reasoning: Option<Reasoning>,
#[serde(skip_serializing_if = "Option::is_none")]
pub text: Option<TextControls>,
}
/// Canonical input payload for the memory summarize endpoint.

View File

@@ -281,6 +281,8 @@ impl ModelClient {
&self,
prompt: &Prompt,
model_info: &ModelInfo,
effort: Option<ReasoningEffortConfig>,
summary: ReasoningSummaryConfig,
session_telemetry: &SessionTelemetry,
) -> Result<Vec<ResponseItem>> {
if prompt.input.is_empty() {
@@ -294,10 +296,29 @@ impl ModelClient {
.with_telemetry(Some(request_telemetry));
let instructions = prompt.base_instructions.text.clone();
let input = prompt.get_formatted_input();
let tools = create_tools_json_for_responses_api(&prompt.tools)?;
let reasoning = Self::build_reasoning(model_info, effort, summary);
let verbosity = if model_info.support_verbosity {
self.state.model_verbosity.or(model_info.default_verbosity)
} else {
if self.state.model_verbosity.is_some() {
warn!(
"model_verbosity is set but ignored as the model does not support verbosity: {}",
model_info.slug
);
}
None
};
let text = create_text_param_for_request(verbosity, &prompt.output_schema);
let payload = ApiCompactionInput {
model: &model_info.slug,
input: &prompt.input,
input: &input,
instructions: &instructions,
tools,
parallel_tool_calls: prompt.parallel_tool_calls,
reasoning,
text,
};
let mut extra_headers = self.build_subagent_headers();
@@ -375,6 +396,25 @@ impl ModelClient {
request_telemetry
}
fn build_reasoning(
model_info: &ModelInfo,
effort: Option<ReasoningEffortConfig>,
summary: ReasoningSummaryConfig,
) -> Option<Reasoning> {
if model_info.supports_reasoning_summaries {
Some(Reasoning {
effort: effort.or(model_info.default_reasoning_level),
summary: if summary == ReasoningSummaryConfig::None {
None
} else {
Some(summary)
},
})
} else {
None
}
}
/// Returns whether the Responses-over-WebSocket transport is active for this session.
///
/// This combines provider capability and feature gating; both must be true for websocket paths

View File

@@ -6227,7 +6227,7 @@ async fn run_sampling_request(
}
}
async fn built_tools(
pub(crate) async fn built_tools(
sess: &Session,
turn_context: &TurnContext,
input: &[ResponseItem],

View File

@@ -1,8 +1,10 @@
use std::collections::HashSet;
use std::sync::Arc;
use crate::Prompt;
use crate::codex::Session;
use crate::codex::TurnContext;
use crate::codex::built_tools;
use crate::compact::InitialContextInjection;
use crate::compact::insert_initial_context_before_last_real_user_or_summary;
use crate::context_manager::ContextManager;
@@ -19,6 +21,7 @@ use codex_protocol::items::TurnItem;
use codex_protocol::models::BaseInstructions;
use codex_protocol::models::ResponseItem;
use futures::TryFutureExt;
use tokio_util::sync::CancellationToken;
use tracing::error;
use tracing::info;
@@ -92,10 +95,20 @@ async fn run_remote_compact_task_inner_impl(
.cloned()
.collect();
let prompt_input = history.for_prompt(&turn_context.model_info.input_modalities);
let tool_router = built_tools(
sess.as_ref(),
turn_context.as_ref(),
&prompt_input,
&HashSet::new(),
None,
&CancellationToken::new(),
)
.await?;
let prompt = Prompt {
input: history.for_prompt(&turn_context.model_info.input_modalities),
tools: vec![],
parallel_tool_calls: false,
input: prompt_input,
tools: tool_router.specs(),
parallel_tool_calls: turn_context.model_info.supports_parallel_tool_calls,
base_instructions,
personality: turn_context.personality,
output_schema: None,
@@ -107,6 +120,8 @@ async fn run_remote_compact_task_inner_impl(
.compact_conversation_history(
&prompt,
&turn_context.model_info,
turn_context.reasoning_effort,
turn_context.reasoning_summary,
&turn_context.session_telemetry,
)
.or_else(|err| async {

View File

@@ -252,6 +252,28 @@ async fn remote_compact_replaces_history_for_followups() -> Result<()> {
compact_body.get("model").and_then(|v| v.as_str()),
Some(harness.test().session_configured.model.as_str())
);
let response_requests = responses_mock.requests();
let first_response_request = response_requests.first().expect("initial request missing");
assert_eq!(
compact_body["tools"],
first_response_request.body_json()["tools"],
"compact requests should send the same tools payload as /v1/responses"
);
assert_eq!(
compact_body["parallel_tool_calls"],
first_response_request.body_json()["parallel_tool_calls"],
"compact requests should match /v1/responses parallel_tool_calls"
);
assert_eq!(
compact_body["reasoning"],
first_response_request.body_json()["reasoning"],
"compact requests should match /v1/responses reasoning"
);
assert_eq!(
compact_body["text"],
first_response_request.body_json()["text"],
"compact requests should match /v1/responses text controls"
);
let compact_body_text = compact_body.to_string();
assert!(
compact_body_text.contains("hello remote compact"),