mirror of
https://github.com/openai/codex.git
synced 2026-05-28 15:00:16 +00:00
## Summary
Adds experimental `additionalContext` support to `turn/start` and
`turn/steer` so clients can provide ephemeral external context, such as
browser or automation state, without turning that plumbing into a
visible user prompt or triggering user-prompt lifecycle behavior.
## API Shape
The parameter shape is:
```ts
additionalContext?: Record<string, {
value: string
kind: "untrusted" | "application"
}> | null
```
Example:
```json
{
"additionalContext": {
"browser_info": {
"value": "Active tab is CI failures.",
"kind": "untrusted"
},
"automation_info": {
"value": "CI rerun is in progress.",
"kind": "application"
}
}
}
```
The keys are opaque and caller-defined.
## Context Injection
When provided, accepted entries are inserted into model context as
hidden contextual message items, not as visible thread user-message
items.
`kind: "untrusted"` entries are inserted with role `user`:
```text
<external_${key}>${value}</external_${key}>
```
`kind: "application"` entries are inserted with role `developer`:
```text
<${key}>${value}</${key}>
```
Values are not escaped. Each value is truncated to 1k approximate tokens
before wrapping.
For `turn/start`, accepted additional context is inserted before normal
user input. For `turn/steer`, additional context is merged only when the
steer includes non-empty user input; context-only steers still reject as
empty input.
## Dedupe Strategy
`AdditionalContextStore` lives on session state and stores the latest
complete additional-context map.
Each `turn/start` or non-empty `turn/steer` treats its
`additionalContext` as the current complete set of values. Entries are
injected only when the key is new or the exact entry for that key
changed, including `value` or `kind`. After merging, the store is
replaced with the provided map, so omitted keys are removed from the
retained set and can be injected again later if reintroduced.
Omitting `additionalContext`, passing `null`, or passing an empty object
resets the store to empty and injects nothing.
## What Changed
- Threads experimental v2 `additionalContext` through app-server into
core turn start and steer handling.
- Adds separate contextual fragment types for untrusted user-role
context and application developer-role context.
- Uses pending response input items so additional context can be
combined with normal user input without treating it as prompt text.
- Adds integration coverage for start/steer flow, role routing,
dedupe/reset behavior, deletion/re-add behavior, hook-blocked input
behavior, empty context-only steer rejection, external-fragment marker
matching, and truncation.
582 lines
20 KiB
Rust
582 lines
20 KiB
Rust
use anyhow::Result;
|
|
use codex_protocol::items::TurnItem;
|
|
use codex_protocol::protocol::AdditionalContextEntry;
|
|
use codex_protocol::protocol::AdditionalContextKind;
|
|
use codex_protocol::protocol::EventMsg;
|
|
use codex_protocol::protocol::ItemCompletedEvent;
|
|
use codex_protocol::protocol::Op;
|
|
use codex_protocol::user_input::UserInput;
|
|
use core_test_support::context_snapshot;
|
|
use core_test_support::context_snapshot::ContextSnapshotOptions;
|
|
use core_test_support::context_snapshot::ContextSnapshotRenderMode;
|
|
use core_test_support::responses::ev_completed;
|
|
use core_test_support::responses::ev_response_created;
|
|
use core_test_support::responses::mount_sse_once;
|
|
use core_test_support::responses::sse;
|
|
use core_test_support::responses::start_mock_server;
|
|
use core_test_support::skip_if_no_network;
|
|
use core_test_support::test_codex::test_codex;
|
|
use core_test_support::wait_for_event_match;
|
|
use pretty_assertions::assert_eq;
|
|
use std::collections::BTreeMap;
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn additional_context_is_model_visible_but_not_a_user_message_item() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_mock_server().await;
|
|
let request = mount_sse_once(
|
|
&server,
|
|
sse(vec![ev_response_created("resp-1"), ev_completed("resp-1")]),
|
|
)
|
|
.await;
|
|
let test = test_codex()
|
|
.with_config(|config| config.include_environment_context = false)
|
|
.build(&server)
|
|
.await?;
|
|
|
|
test.codex
|
|
.submit(Op::UserInput {
|
|
environments: None,
|
|
items: vec![UserInput::Text {
|
|
text: "inspect the active tab".to_string(),
|
|
text_elements: Vec::new(),
|
|
}],
|
|
final_output_json_schema: None,
|
|
responsesapi_client_metadata: None,
|
|
additional_context: BTreeMap::from([
|
|
(
|
|
"browser_info".to_string(),
|
|
AdditionalContextEntry {
|
|
value: "tab one".to_string(),
|
|
kind: AdditionalContextKind::Untrusted,
|
|
},
|
|
),
|
|
(
|
|
"automation_info".to_string(),
|
|
AdditionalContextEntry {
|
|
value: "run one".to_string(),
|
|
kind: AdditionalContextKind::Application,
|
|
},
|
|
),
|
|
]),
|
|
thread_settings: Default::default(),
|
|
})
|
|
.await?;
|
|
|
|
let user_item = wait_for_event_match(&test.codex, |event| match event {
|
|
EventMsg::ItemCompleted(ItemCompletedEvent {
|
|
item: TurnItem::UserMessage(item),
|
|
..
|
|
}) => Some(item.clone()),
|
|
_ => None,
|
|
})
|
|
.await;
|
|
assert_eq!(
|
|
user_item.content,
|
|
vec![UserInput::Text {
|
|
text: "inspect the active tab".to_string(),
|
|
text_elements: Vec::new(),
|
|
}]
|
|
);
|
|
wait_for_event_match(&test.codex, |event| {
|
|
matches!(event, EventMsg::TurnComplete(_)).then_some(())
|
|
})
|
|
.await;
|
|
|
|
let request = request.single_request();
|
|
insta::assert_snapshot!(
|
|
"additional_context_simple_input",
|
|
context_snapshot::format_labeled_requests_snapshot(
|
|
"additional context is inserted before the user turn input.",
|
|
&[("Request", &request)],
|
|
&ContextSnapshotOptions::default()
|
|
.strip_capability_instructions()
|
|
.render_mode(ContextSnapshotRenderMode::KindWithTextPrefix { max_chars: 160 }),
|
|
)
|
|
);
|
|
let developer_context_texts = request
|
|
.message_input_texts("developer")
|
|
.into_iter()
|
|
.filter(|text| text.starts_with("<automation_info>"))
|
|
.collect::<Vec<_>>();
|
|
assert_eq!(
|
|
developer_context_texts,
|
|
vec!["<automation_info>run one</automation_info>"]
|
|
);
|
|
assert_eq!(
|
|
request.message_input_texts("user"),
|
|
vec![
|
|
"<external_browser_info>tab one</external_browser_info>",
|
|
"inspect the active tab",
|
|
]
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn external_context_like_user_text_remains_a_user_message_item() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_mock_server().await;
|
|
let request = mount_sse_once(
|
|
&server,
|
|
sse(vec![ev_response_created("resp-1"), ev_completed("resp-1")]),
|
|
)
|
|
.await;
|
|
let test = test_codex()
|
|
.with_config(|config| config.include_environment_context = false)
|
|
.build(&server)
|
|
.await?;
|
|
let user_input = UserInput::Text {
|
|
text: "<external_api>".to_string(),
|
|
text_elements: Vec::new(),
|
|
};
|
|
|
|
test.codex
|
|
.submit(Op::UserInput {
|
|
environments: None,
|
|
items: vec![user_input.clone()],
|
|
final_output_json_schema: None,
|
|
responsesapi_client_metadata: None,
|
|
additional_context: BTreeMap::new(),
|
|
thread_settings: Default::default(),
|
|
})
|
|
.await?;
|
|
|
|
let user_item = wait_for_event_match(&test.codex, |event| match event {
|
|
EventMsg::ItemCompleted(ItemCompletedEvent {
|
|
item: TurnItem::UserMessage(item),
|
|
..
|
|
}) => Some(item.clone()),
|
|
_ => None,
|
|
})
|
|
.await;
|
|
assert_eq!(user_item.content, vec![user_input]);
|
|
wait_for_event_match(&test.codex, |event| {
|
|
matches!(event, EventMsg::TurnComplete(_)).then_some(())
|
|
})
|
|
.await;
|
|
|
|
let request = request.single_request();
|
|
assert_eq!(request.message_input_texts("user"), vec!["<external_api>"]);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn additional_context_trust_controls_message_role() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_mock_server().await;
|
|
let request = mount_sse_once(
|
|
&server,
|
|
sse(vec![ev_response_created("resp-1"), ev_completed("resp-1")]),
|
|
)
|
|
.await;
|
|
let test = test_codex()
|
|
.with_config(|config| config.include_environment_context = false)
|
|
.build(&server)
|
|
.await?;
|
|
|
|
test.codex
|
|
.submit(Op::UserInput {
|
|
environments: None,
|
|
items: vec![UserInput::Text {
|
|
text: "inspect context".to_string(),
|
|
text_elements: Vec::new(),
|
|
}],
|
|
final_output_json_schema: None,
|
|
responsesapi_client_metadata: None,
|
|
additional_context: BTreeMap::from([
|
|
(
|
|
"browser_info".to_string(),
|
|
AdditionalContextEntry {
|
|
value: "tab one".to_string(),
|
|
kind: AdditionalContextKind::Untrusted,
|
|
},
|
|
),
|
|
(
|
|
"automation_info".to_string(),
|
|
AdditionalContextEntry {
|
|
value: "run one".to_string(),
|
|
kind: AdditionalContextKind::Application,
|
|
},
|
|
),
|
|
]),
|
|
thread_settings: Default::default(),
|
|
})
|
|
.await?;
|
|
wait_for_event_match(&test.codex, |event| {
|
|
matches!(event, EventMsg::TurnComplete(_)).then_some(())
|
|
})
|
|
.await;
|
|
|
|
let request = request.single_request();
|
|
let developer_context_texts = request
|
|
.message_input_texts("developer")
|
|
.into_iter()
|
|
.filter(|text| text.starts_with("<automation_info>"))
|
|
.collect::<Vec<_>>();
|
|
assert_eq!(
|
|
developer_context_texts,
|
|
vec!["<automation_info>run one</automation_info>"]
|
|
);
|
|
assert_eq!(
|
|
request.message_input_texts("user"),
|
|
vec![
|
|
"<external_browser_info>tab one</external_browser_info>",
|
|
"inspect context",
|
|
]
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn additional_context_is_deduplicated_between_turns_while_retained() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_mock_server().await;
|
|
let first_request = mount_sse_once(
|
|
&server,
|
|
sse(vec![ev_response_created("resp-1"), ev_completed("resp-1")]),
|
|
)
|
|
.await;
|
|
let second_request = mount_sse_once(
|
|
&server,
|
|
sse(vec![ev_response_created("resp-2"), ev_completed("resp-2")]),
|
|
)
|
|
.await;
|
|
let test = test_codex()
|
|
.with_config(|config| config.include_environment_context = false)
|
|
.build(&server)
|
|
.await?;
|
|
let additional_context = BTreeMap::from([(
|
|
"browser_info".to_string(),
|
|
AdditionalContextEntry {
|
|
value: "same tab".to_string(),
|
|
kind: AdditionalContextKind::Untrusted,
|
|
},
|
|
)]);
|
|
|
|
test.codex
|
|
.submit(Op::UserInput {
|
|
environments: None,
|
|
items: vec![UserInput::Text {
|
|
text: "first turn".to_string(),
|
|
text_elements: Vec::new(),
|
|
}],
|
|
final_output_json_schema: None,
|
|
responsesapi_client_metadata: None,
|
|
additional_context: additional_context.clone(),
|
|
thread_settings: Default::default(),
|
|
})
|
|
.await?;
|
|
wait_for_event_match(&test.codex, |event| {
|
|
matches!(event, EventMsg::TurnComplete(_)).then_some(())
|
|
})
|
|
.await;
|
|
|
|
test.codex
|
|
.submit(Op::UserInput {
|
|
environments: None,
|
|
items: vec![UserInput::Text {
|
|
text: "second turn".to_string(),
|
|
text_elements: Vec::new(),
|
|
}],
|
|
final_output_json_schema: None,
|
|
responsesapi_client_metadata: None,
|
|
additional_context,
|
|
thread_settings: Default::default(),
|
|
})
|
|
.await?;
|
|
wait_for_event_match(&test.codex, |event| {
|
|
matches!(event, EventMsg::TurnComplete(_)).then_some(())
|
|
})
|
|
.await;
|
|
|
|
assert_eq!(
|
|
first_request.single_request().message_input_texts("user"),
|
|
vec![
|
|
"<external_browser_info>same tab</external_browser_info>",
|
|
"first turn",
|
|
]
|
|
);
|
|
assert_eq!(
|
|
second_request.single_request().message_input_texts("user"),
|
|
vec![
|
|
"<external_browser_info>same tab</external_browser_info>",
|
|
"first turn",
|
|
"second turn",
|
|
]
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn additional_context_removes_one_value_while_adding_another() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_mock_server().await;
|
|
let first_request = mount_sse_once(
|
|
&server,
|
|
sse(vec![ev_response_created("resp-1"), ev_completed("resp-1")]),
|
|
)
|
|
.await;
|
|
let second_request = mount_sse_once(
|
|
&server,
|
|
sse(vec![ev_response_created("resp-2"), ev_completed("resp-2")]),
|
|
)
|
|
.await;
|
|
let third_request = mount_sse_once(
|
|
&server,
|
|
sse(vec![ev_response_created("resp-3"), ev_completed("resp-3")]),
|
|
)
|
|
.await;
|
|
let test = test_codex()
|
|
.with_config(|config| config.include_environment_context = false)
|
|
.build(&server)
|
|
.await?;
|
|
|
|
test.codex
|
|
.submit(Op::UserInput {
|
|
environments: None,
|
|
items: vec![UserInput::Text {
|
|
text: "first turn".to_string(),
|
|
text_elements: Vec::new(),
|
|
}],
|
|
final_output_json_schema: None,
|
|
responsesapi_client_metadata: None,
|
|
additional_context: BTreeMap::from([
|
|
(
|
|
"automation_info".to_string(),
|
|
AdditionalContextEntry {
|
|
value: "run one".to_string(),
|
|
kind: AdditionalContextKind::Untrusted,
|
|
},
|
|
),
|
|
(
|
|
"browser_info".to_string(),
|
|
AdditionalContextEntry {
|
|
value: "tab one".to_string(),
|
|
kind: AdditionalContextKind::Untrusted,
|
|
},
|
|
),
|
|
]),
|
|
thread_settings: Default::default(),
|
|
})
|
|
.await?;
|
|
wait_for_event_match(&test.codex, |event| {
|
|
matches!(event, EventMsg::TurnComplete(_)).then_some(())
|
|
})
|
|
.await;
|
|
|
|
test.codex
|
|
.submit(Op::UserInput {
|
|
environments: None,
|
|
items: vec![UserInput::Text {
|
|
text: "second turn".to_string(),
|
|
text_elements: Vec::new(),
|
|
}],
|
|
final_output_json_schema: None,
|
|
responsesapi_client_metadata: None,
|
|
additional_context: BTreeMap::from([
|
|
(
|
|
"automation_info".to_string(),
|
|
AdditionalContextEntry {
|
|
value: "run one".to_string(),
|
|
kind: AdditionalContextKind::Untrusted,
|
|
},
|
|
),
|
|
(
|
|
"terminal_info".to_string(),
|
|
AdditionalContextEntry {
|
|
value: "pty one".to_string(),
|
|
kind: AdditionalContextKind::Untrusted,
|
|
},
|
|
),
|
|
]),
|
|
thread_settings: Default::default(),
|
|
})
|
|
.await?;
|
|
wait_for_event_match(&test.codex, |event| {
|
|
matches!(event, EventMsg::TurnComplete(_)).then_some(())
|
|
})
|
|
.await;
|
|
|
|
test.codex
|
|
.submit(Op::UserInput {
|
|
environments: None,
|
|
items: vec![UserInput::Text {
|
|
text: "third turn".to_string(),
|
|
text_elements: Vec::new(),
|
|
}],
|
|
final_output_json_schema: None,
|
|
responsesapi_client_metadata: None,
|
|
additional_context: BTreeMap::from([
|
|
(
|
|
"automation_info".to_string(),
|
|
AdditionalContextEntry {
|
|
value: "run one".to_string(),
|
|
kind: AdditionalContextKind::Untrusted,
|
|
},
|
|
),
|
|
(
|
|
"browser_info".to_string(),
|
|
AdditionalContextEntry {
|
|
value: "tab one".to_string(),
|
|
kind: AdditionalContextKind::Untrusted,
|
|
},
|
|
),
|
|
(
|
|
"terminal_info".to_string(),
|
|
AdditionalContextEntry {
|
|
value: "pty one".to_string(),
|
|
kind: AdditionalContextKind::Untrusted,
|
|
},
|
|
),
|
|
]),
|
|
thread_settings: Default::default(),
|
|
})
|
|
.await?;
|
|
wait_for_event_match(&test.codex, |event| {
|
|
matches!(event, EventMsg::TurnComplete(_)).then_some(())
|
|
})
|
|
.await;
|
|
|
|
assert_eq!(
|
|
first_request.single_request().message_input_texts("user"),
|
|
vec![
|
|
"<external_automation_info>run one</external_automation_info>",
|
|
"<external_browser_info>tab one</external_browser_info>",
|
|
"first turn",
|
|
]
|
|
);
|
|
assert_eq!(
|
|
second_request.single_request().message_input_texts("user"),
|
|
vec![
|
|
"<external_automation_info>run one</external_automation_info>",
|
|
"<external_browser_info>tab one</external_browser_info>",
|
|
"first turn",
|
|
"<external_terminal_info>pty one</external_terminal_info>",
|
|
"second turn",
|
|
]
|
|
);
|
|
assert_eq!(
|
|
third_request.single_request().message_input_texts("user"),
|
|
vec![
|
|
"<external_automation_info>run one</external_automation_info>",
|
|
"<external_browser_info>tab one</external_browser_info>",
|
|
"first turn",
|
|
"<external_terminal_info>pty one</external_terminal_info>",
|
|
"second turn",
|
|
"<external_browser_info>tab one</external_browser_info>",
|
|
"third turn",
|
|
]
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn additional_context_values_are_truncated_before_model_input() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
const MAX_EXPECTED_EXTERNAL_CONTEXT_TEXT_BYTES: usize = 5 * 1024;
|
|
|
|
let server = start_mock_server().await;
|
|
let request = mount_sse_once(
|
|
&server,
|
|
sse(vec![ev_response_created("resp-1"), ev_completed("resp-1")]),
|
|
)
|
|
.await;
|
|
let test = test_codex()
|
|
.with_config(|config| config.include_environment_context = false)
|
|
.build(&server)
|
|
.await?;
|
|
let long_browser_value = format!("browser-head-{}browser-tail", "b".repeat(40_000));
|
|
let long_automation_value = format!("automation-head-{}automation-tail", "a".repeat(40_000));
|
|
let untruncated_browser_fragment =
|
|
format!("<external_browser_info>{long_browser_value}</external_browser_info>");
|
|
let untruncated_automation_fragment =
|
|
format!("<automation_info>{long_automation_value}</automation_info>");
|
|
|
|
test.codex
|
|
.submit(Op::UserInput {
|
|
environments: None,
|
|
items: vec![UserInput::Text {
|
|
text: "summarize context".to_string(),
|
|
text_elements: Vec::new(),
|
|
}],
|
|
final_output_json_schema: None,
|
|
responsesapi_client_metadata: None,
|
|
additional_context: BTreeMap::from([
|
|
(
|
|
"automation_info".to_string(),
|
|
AdditionalContextEntry {
|
|
value: long_automation_value.clone(),
|
|
kind: AdditionalContextKind::Application,
|
|
},
|
|
),
|
|
(
|
|
"browser_info".to_string(),
|
|
AdditionalContextEntry {
|
|
value: long_browser_value.clone(),
|
|
kind: AdditionalContextKind::Untrusted,
|
|
},
|
|
),
|
|
]),
|
|
thread_settings: Default::default(),
|
|
})
|
|
.await?;
|
|
wait_for_event_match(&test.codex, |event| {
|
|
matches!(event, EventMsg::TurnComplete(_)).then_some(())
|
|
})
|
|
.await;
|
|
|
|
let request = request.single_request();
|
|
let developer_texts = request
|
|
.message_input_texts("developer")
|
|
.into_iter()
|
|
.filter(|text| text.starts_with("<automation_info>"))
|
|
.collect::<Vec<_>>();
|
|
let [automation_text] = developer_texts.as_slice() else {
|
|
panic!("expected application additional context, got {developer_texts:?}");
|
|
};
|
|
assert!(automation_text.starts_with(&format!(
|
|
"<automation_info>automation-head-{}",
|
|
"a".repeat(1024)
|
|
)));
|
|
assert!(automation_text.contains("tokens truncated"));
|
|
assert!(automation_text.ends_with("automation-tail</automation_info>"));
|
|
assert!(automation_text.len() < untruncated_automation_fragment.len());
|
|
assert!(
|
|
automation_text.len() <= MAX_EXPECTED_EXTERNAL_CONTEXT_TEXT_BYTES,
|
|
"application additional context was not capped before model input: {} bytes",
|
|
automation_text.len()
|
|
);
|
|
|
|
let user_texts = request.message_input_texts("user");
|
|
let [external_text, user_text] = user_texts.as_slice() else {
|
|
panic!("expected external context plus user input, got {user_texts:?}");
|
|
};
|
|
assert_eq!(user_text, "summarize context");
|
|
assert!(external_text.starts_with(&format!(
|
|
"<external_browser_info>browser-head-{}",
|
|
"b".repeat(1024)
|
|
)));
|
|
assert!(external_text.contains("tokens truncated"));
|
|
assert!(external_text.ends_with("browser-tail</external_browser_info>"));
|
|
assert!(external_text.len() < untruncated_browser_fragment.len());
|
|
assert!(
|
|
external_text.len() <= MAX_EXPECTED_EXTERNAL_CONTEXT_TEXT_BYTES,
|
|
"untrusted additional context was not capped before model input: {} bytes",
|
|
external_text.len()
|
|
);
|
|
|
|
Ok(())
|
|
}
|