Add reasoning effort to turn tracing spans (#20060)

Why
#19432 added token usage to the turn and response spans. This follow-up
adds the configured reasoning effort so performance traces can be
filtered by model effort.

[example
trace](https://openai.datadoghq.com/apm/trace/1ff708a87159ff4898bdc8bd6091ec18?graphType=waterfall&shouldShowLegend=true&spanID=6596351544047485652&traceQuery=)
<img width="533" height="434" alt="Screenshot 2026-04-28 at 3 52 12 PM"
src="https://github.com/user-attachments/assets/77ef32fc-d7cd-4eec-87b4-26c6798f1af8"
/>


What Changed
- Adds `codex.turn.reasoning_effort` to the turn span.
- Adds `codex.request.reasoning_effort` to `handle_responses`.
- Extends the span test to cover explicit `high` effort with token
usage.

Testing
- `cargo test -p codex-core
turn_and_completed_response_spans_record_token_usage`
- `cargo test -p codex-otel`
- `just fmt`
- `just fix -p codex-core`
- `just fix -p codex-otel`
This commit is contained in:
charley-openai
2026-05-04 12:57:05 -07:00
committed by GitHub
parent 229b40aa21
commit a6599b8202
4 changed files with 54 additions and 20 deletions

View File

@@ -1885,6 +1885,7 @@ async fn try_run_sampling_request(
Box<dyn ToolArgumentDiffConsumer>,
)> = None;
let mut should_emit_turn_diff = false;
let reasoning_effort = turn_context.effective_reasoning_effort_for_tracing();
let plan_mode = turn_context.collaboration_mode.mode == ModeKind::Plan;
let mut assistant_message_stream_parsers = AssistantMessageStreamParsers::new(plan_mode);
let mut plan_mode_state = plan_mode.then(|| PlanModeStreamState::new(&turn_context.sub_id));
@@ -1896,6 +1897,7 @@ async fn try_run_sampling_request(
otel.name = field::Empty,
tool_name = field::Empty,
from = field::Empty,
codex.request.reasoning_effort = %reasoning_effort,
gen_ai.usage.input_tokens = field::Empty,
gen_ai.usage.cache_read.input_tokens = field::Empty,
gen_ai.usage.output_tokens = field::Empty,

View File

@@ -118,6 +118,20 @@ impl TurnContext {
)
}
pub(crate) fn effective_reasoning_effort_for_tracing(&self) -> String {
if self.model_info.supports_reasoning_summaries {
match self
.reasoning_effort
.or(self.model_info.default_reasoning_level)
{
Some(effort) => effort.to_string(),
None => "default".to_string(),
}
} else {
"default".to_string()
}
}
pub(crate) fn model_context_window(&self) -> Option<i64> {
let effective_context_window_percent = self.model_info.effective_context_window_percent;
self.model_info

View File

@@ -366,12 +366,14 @@ impl Session {
let task_cancellation_token = cancellation_token.child_token();
// Task-owned turn spans keep a core-owned span open for the
// full task lifecycle after the submission dispatch span ends.
let reasoning_effort = turn_context.effective_reasoning_effort_for_tracing();
let task_span = info_span!(
"turn",
otel.name = span_name,
thread.id = %self.conversation_id,
turn.id = %turn_context.sub_id,
model = %turn_context.model_info.slug,
codex.turn.reasoning_effort = %reasoning_effort,
codex.turn.token_usage.input_tokens = field::Empty,
codex.turn.token_usage.cached_input_tokens = field::Empty,
codex.turn.token_usage.non_cached_input_tokens = field::Empty,

View File

@@ -1,6 +1,7 @@
use codex_core::config::Constrained;
use codex_features::Feature;
use codex_protocol::models::PermissionProfile;
use codex_protocol::openai_models::ReasoningEffort;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::Op;
@@ -595,8 +596,9 @@ async fn turn_and_completed_response_spans_record_token_usage() {
)
.await;
let TestCodex { codex, .. } = test_codex()
let test = test_codex()
.with_config(|config| {
config.model_reasoning_effort = Some(ReasoningEffort::High);
config
.features
.disable(Feature::GhostCommit)
@@ -606,6 +608,8 @@ async fn turn_and_completed_response_spans_record_token_usage() {
.await
.unwrap();
let TestCodex { codex, .. } = test;
codex
.submit(Op::UserInput {
environments: None,
@@ -625,7 +629,9 @@ async fn turn_and_completed_response_spans_record_token_usage() {
assert!(
logs.lines().any(|line| {
line.contains("handle_responses{otel.name=\"completed\"")
line.contains("handle_responses{")
&& line.contains("otel.name=\"completed\"")
&& line.contains("codex.request.reasoning_effort=high")
&& line.contains("gen_ai.usage.input_tokens=3")
&& line.contains("gen_ai.usage.cache_read.input_tokens=1")
&& line.contains("gen_ai.usage.output_tokens=5")
@@ -637,6 +643,7 @@ async fn turn_and_completed_response_spans_record_token_usage() {
assert!(
logs.lines().any(|line| {
line.contains("turn{otel.name=\"session_task.turn\"")
&& line.contains("codex.turn.reasoning_effort=high")
&& line.contains("codex.turn.token_usage.input_tokens=3")
&& line.contains("codex.turn.token_usage.cached_input_tokens=1")
&& line.contains("codex.turn.token_usage.non_cached_input_tokens=2")
@@ -708,13 +715,18 @@ async fn handle_responses_span_records_response_kind_and_tool_name() {
let logs = String::from_utf8(buffer.lock().unwrap().clone()).unwrap();
assert!(
logs.contains("handle_responses{otel.name=\"function_call\"")
&& logs.contains("tool_name=\"nonexistent\"")
&& logs.contains("from=\"output_item_done\""),
logs.lines().any(|line| {
line.contains("handle_responses{")
&& line.contains("otel.name=\"function_call\"")
&& line.contains("tool_name=\"nonexistent\"")
&& line.contains("from=\"output_item_done\"")
}),
"missing handle_responses span with function call metadata\nlogs:\n{logs}"
);
assert!(
logs.contains("handle_responses{otel.name=\"completed\""),
logs.lines().any(|line| {
line.contains("handle_responses{") && line.contains("otel.name=\"completed\"")
}),
"missing handle_responses span for completion\nlogs:\n{logs}"
);
}
@@ -766,7 +778,9 @@ async fn record_responses_sets_span_fields_for_response_events() {
.await;
let TestCodex { codex, .. } = test_codex()
.with_model("gpt-5.4")
.with_config(|config| {
config.model_reasoning_effort = Some(ReasoningEffort::High);
config
.features
.disable(Feature::GhostCommit)
@@ -806,22 +820,24 @@ async fn record_responses_sets_span_fields_for_response_events() {
];
for (name, from, tool_name) in expected {
let otel_name = format!("otel.name=\"{name}\"");
let from_field = from.map(|from| format!("from=\"{from}\""));
let tool_name_field = tool_name.map(|tool_name| format!("tool_name=\"{tool_name}\""));
assert!(
logs.contains(&format!("handle_responses{{otel.name=\"{name}\"")),
"missing otel.name={name}\nlogs:\n{logs}"
logs.lines().any(|line| {
line.contains("handle_responses{")
&& line.contains(&otel_name)
&& line.contains("codex.request.reasoning_effort=high")
&& from_field
.as_ref()
.is_none_or(|from_field| line.contains(from_field))
&& tool_name_field
.as_ref()
.is_none_or(|tool_name_field| line.contains(tool_name_field))
}),
"missing span fields for {name}\nlogs:\n{logs}"
);
if let Some(from) = from {
assert!(
logs.contains(&format!("from=\"{from}\"")),
"missing from={from} for {name}\nlogs:\n{logs}"
);
}
if let Some(tool_name) = tool_name {
assert!(
logs.contains(&format!("tool_name=\"{tool_name}\"")),
"missing tool_name={tool_name} for {name}\nlogs:\n{logs}"
);
}
}
}