mirror of
https://github.com/openai/codex.git
synced 2026-06-01 19:02:59 +00:00
Add reasoning effort to turn tracing spans (#20060)
Why #19432 added token usage to the turn and response spans. This follow-up adds the configured reasoning effort so performance traces can be filtered by model effort. [example trace](https://openai.datadoghq.com/apm/trace/1ff708a87159ff4898bdc8bd6091ec18?graphType=waterfall&shouldShowLegend=true&spanID=6596351544047485652&traceQuery=) <img width="533" height="434" alt="Screenshot 2026-04-28 at 3 52 12 PM" src="https://github.com/user-attachments/assets/77ef32fc-d7cd-4eec-87b4-26c6798f1af8" /> What Changed - Adds `codex.turn.reasoning_effort` to the turn span. - Adds `codex.request.reasoning_effort` to `handle_responses`. - Extends the span test to cover explicit `high` effort with token usage. Testing - `cargo test -p codex-core turn_and_completed_response_spans_record_token_usage` - `cargo test -p codex-otel` - `just fmt` - `just fix -p codex-core` - `just fix -p codex-otel`
This commit is contained in:
@@ -1885,6 +1885,7 @@ async fn try_run_sampling_request(
|
||||
Box<dyn ToolArgumentDiffConsumer>,
|
||||
)> = None;
|
||||
let mut should_emit_turn_diff = false;
|
||||
let reasoning_effort = turn_context.effective_reasoning_effort_for_tracing();
|
||||
let plan_mode = turn_context.collaboration_mode.mode == ModeKind::Plan;
|
||||
let mut assistant_message_stream_parsers = AssistantMessageStreamParsers::new(plan_mode);
|
||||
let mut plan_mode_state = plan_mode.then(|| PlanModeStreamState::new(&turn_context.sub_id));
|
||||
@@ -1896,6 +1897,7 @@ async fn try_run_sampling_request(
|
||||
otel.name = field::Empty,
|
||||
tool_name = field::Empty,
|
||||
from = field::Empty,
|
||||
codex.request.reasoning_effort = %reasoning_effort,
|
||||
gen_ai.usage.input_tokens = field::Empty,
|
||||
gen_ai.usage.cache_read.input_tokens = field::Empty,
|
||||
gen_ai.usage.output_tokens = field::Empty,
|
||||
|
||||
@@ -118,6 +118,20 @@ impl TurnContext {
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) fn effective_reasoning_effort_for_tracing(&self) -> String {
|
||||
if self.model_info.supports_reasoning_summaries {
|
||||
match self
|
||||
.reasoning_effort
|
||||
.or(self.model_info.default_reasoning_level)
|
||||
{
|
||||
Some(effort) => effort.to_string(),
|
||||
None => "default".to_string(),
|
||||
}
|
||||
} else {
|
||||
"default".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn model_context_window(&self) -> Option<i64> {
|
||||
let effective_context_window_percent = self.model_info.effective_context_window_percent;
|
||||
self.model_info
|
||||
|
||||
@@ -366,12 +366,14 @@ impl Session {
|
||||
let task_cancellation_token = cancellation_token.child_token();
|
||||
// Task-owned turn spans keep a core-owned span open for the
|
||||
// full task lifecycle after the submission dispatch span ends.
|
||||
let reasoning_effort = turn_context.effective_reasoning_effort_for_tracing();
|
||||
let task_span = info_span!(
|
||||
"turn",
|
||||
otel.name = span_name,
|
||||
thread.id = %self.conversation_id,
|
||||
turn.id = %turn_context.sub_id,
|
||||
model = %turn_context.model_info.slug,
|
||||
codex.turn.reasoning_effort = %reasoning_effort,
|
||||
codex.turn.token_usage.input_tokens = field::Empty,
|
||||
codex.turn.token_usage.cached_input_tokens = field::Empty,
|
||||
codex.turn.token_usage.non_cached_input_tokens = field::Empty,
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use codex_core::config::Constrained;
|
||||
use codex_features::Feature;
|
||||
use codex_protocol::models::PermissionProfile;
|
||||
use codex_protocol::openai_models::ReasoningEffort;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
use codex_protocol::protocol::Op;
|
||||
@@ -595,8 +596,9 @@ async fn turn_and_completed_response_spans_record_token_usage() {
|
||||
)
|
||||
.await;
|
||||
|
||||
let TestCodex { codex, .. } = test_codex()
|
||||
let test = test_codex()
|
||||
.with_config(|config| {
|
||||
config.model_reasoning_effort = Some(ReasoningEffort::High);
|
||||
config
|
||||
.features
|
||||
.disable(Feature::GhostCommit)
|
||||
@@ -606,6 +608,8 @@ async fn turn_and_completed_response_spans_record_token_usage() {
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let TestCodex { codex, .. } = test;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
environments: None,
|
||||
@@ -625,7 +629,9 @@ async fn turn_and_completed_response_spans_record_token_usage() {
|
||||
|
||||
assert!(
|
||||
logs.lines().any(|line| {
|
||||
line.contains("handle_responses{otel.name=\"completed\"")
|
||||
line.contains("handle_responses{")
|
||||
&& line.contains("otel.name=\"completed\"")
|
||||
&& line.contains("codex.request.reasoning_effort=high")
|
||||
&& line.contains("gen_ai.usage.input_tokens=3")
|
||||
&& line.contains("gen_ai.usage.cache_read.input_tokens=1")
|
||||
&& line.contains("gen_ai.usage.output_tokens=5")
|
||||
@@ -637,6 +643,7 @@ async fn turn_and_completed_response_spans_record_token_usage() {
|
||||
assert!(
|
||||
logs.lines().any(|line| {
|
||||
line.contains("turn{otel.name=\"session_task.turn\"")
|
||||
&& line.contains("codex.turn.reasoning_effort=high")
|
||||
&& line.contains("codex.turn.token_usage.input_tokens=3")
|
||||
&& line.contains("codex.turn.token_usage.cached_input_tokens=1")
|
||||
&& line.contains("codex.turn.token_usage.non_cached_input_tokens=2")
|
||||
@@ -708,13 +715,18 @@ async fn handle_responses_span_records_response_kind_and_tool_name() {
|
||||
let logs = String::from_utf8(buffer.lock().unwrap().clone()).unwrap();
|
||||
|
||||
assert!(
|
||||
logs.contains("handle_responses{otel.name=\"function_call\"")
|
||||
&& logs.contains("tool_name=\"nonexistent\"")
|
||||
&& logs.contains("from=\"output_item_done\""),
|
||||
logs.lines().any(|line| {
|
||||
line.contains("handle_responses{")
|
||||
&& line.contains("otel.name=\"function_call\"")
|
||||
&& line.contains("tool_name=\"nonexistent\"")
|
||||
&& line.contains("from=\"output_item_done\"")
|
||||
}),
|
||||
"missing handle_responses span with function call metadata\nlogs:\n{logs}"
|
||||
);
|
||||
assert!(
|
||||
logs.contains("handle_responses{otel.name=\"completed\""),
|
||||
logs.lines().any(|line| {
|
||||
line.contains("handle_responses{") && line.contains("otel.name=\"completed\"")
|
||||
}),
|
||||
"missing handle_responses span for completion\nlogs:\n{logs}"
|
||||
);
|
||||
}
|
||||
@@ -766,7 +778,9 @@ async fn record_responses_sets_span_fields_for_response_events() {
|
||||
.await;
|
||||
|
||||
let TestCodex { codex, .. } = test_codex()
|
||||
.with_model("gpt-5.4")
|
||||
.with_config(|config| {
|
||||
config.model_reasoning_effort = Some(ReasoningEffort::High);
|
||||
config
|
||||
.features
|
||||
.disable(Feature::GhostCommit)
|
||||
@@ -806,22 +820,24 @@ async fn record_responses_sets_span_fields_for_response_events() {
|
||||
];
|
||||
|
||||
for (name, from, tool_name) in expected {
|
||||
let otel_name = format!("otel.name=\"{name}\"");
|
||||
let from_field = from.map(|from| format!("from=\"{from}\""));
|
||||
let tool_name_field = tool_name.map(|tool_name| format!("tool_name=\"{tool_name}\""));
|
||||
|
||||
assert!(
|
||||
logs.contains(&format!("handle_responses{{otel.name=\"{name}\"")),
|
||||
"missing otel.name={name}\nlogs:\n{logs}"
|
||||
logs.lines().any(|line| {
|
||||
line.contains("handle_responses{")
|
||||
&& line.contains(&otel_name)
|
||||
&& line.contains("codex.request.reasoning_effort=high")
|
||||
&& from_field
|
||||
.as_ref()
|
||||
.is_none_or(|from_field| line.contains(from_field))
|
||||
&& tool_name_field
|
||||
.as_ref()
|
||||
.is_none_or(|tool_name_field| line.contains(tool_name_field))
|
||||
}),
|
||||
"missing span fields for {name}\nlogs:\n{logs}"
|
||||
);
|
||||
if let Some(from) = from {
|
||||
assert!(
|
||||
logs.contains(&format!("from=\"{from}\"")),
|
||||
"missing from={from} for {name}\nlogs:\n{logs}"
|
||||
);
|
||||
}
|
||||
if let Some(tool_name) = tool_name {
|
||||
assert!(
|
||||
logs.contains(&format!("tool_name=\"{tool_name}\"")),
|
||||
"missing tool_name={tool_name} for {name}\nlogs:\n{logs}"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user