mirror of
https://github.com/openai/codex.git
synced 2026-04-24 22:54:54 +00:00
Enterprises want to restrict the MCP servers their users can use.
Admins can now specify an allowlist of MCPs in `requirements.toml`. The
MCP servers are matched on both Name and Transport (local path or HTTP
URL) -- both must match to allow the MCP server. This prevents
circumventing the allowlist by renaming MCP servers in user config. (It
is still possible to replace the local path e.g. rewrite say
`/usr/local/github-mcp` with a nefarious MCP. We could allow hash
pinning in the future, but that would break updates. I also think this
represents a broader, out-of-scope problem.)
We introduce a new field to Constrained: "normalizer". In general, it is
a fn(T) -> T and applies when `Constrained<T>.set()` is called. In this
particular case, it disables MCP servers which do not match the
allowlist. An alternative solution would remove this and instead throw a
ConstraintError. That would stop Codex launching if any MCP server was
configured which didn't match. I think this is bad.
We currently reuse the enabled flag on MCP servers to disable them, but
don't propagate any information about why they are disabled. I'd like to
add that in a follow up PR, possibly by switching out enabled with an
enum.
In action:
```
# MCP server config has two MCPs. We are going to allowlist one of them.
➜ codex git:(gt/restrict-mcps) ✗ cat ~/.codex/config.toml | grep mcp_servers -A1
[mcp_servers.hello_world]
command = "hello-world-mcp"
--
[mcp_servers.docs]
command = "docs-mcp"
# Restrict the MCPs to the hello_world MCP.
➜ codex git:(gt/restrict-mcps) ✗ defaults read com.openai.codex requirements_toml_base64 | base64 -d
[mcp_server_allowlist.hello_world]
command = "hello-world-mcp"
# List the MCPs, observe hello_world is enabled and docs is disabled.
➜ codex git:(gt/restrict-mcps) ✗ just codex mcp list
cargo run --bin codex -- "$@"
Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.25s
Running `target/debug/codex mcp list`
Name Command Args Env Cwd Status Auth
docs docs-mcp - - - disabled Unsupported
hello_world hello-world-mcp - - - enabled Unsupported
# Remove the restrictions.
➜ codex git:(gt/restrict-mcps) ✗ defaults delete com.openai.codex requirements_toml_base64
# Observe both MCPs are enabled.
➜ codex git:(gt/restrict-mcps) ✗ just codex mcp list
cargo run --bin codex -- "$@"
Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.25s
Running `target/debug/codex mcp list`
Name Command Args Env Cwd Status Auth
docs docs-mcp - - - enabled Unsupported
hello_world hello-world-mcp - - - enabled Unsupported
# A new requirements that updates the command to one that does not match.
➜ codex git:(gt/restrict-mcps) ✗ cat ~/requirements.toml
[mcp_server_allowlist.hello_world]
command = "hello-world-mcp-v2"
# Use those requirements.
➜ codex git:(gt/restrict-mcps) ✗ defaults write com.openai.codex requirements_toml_base64 "$(base64 -i /Users/gt/requirements.toml)"
# Observe both MCPs are disabled.
➜ codex git:(gt/restrict-mcps) ✗ just codex mcp list
cargo run --bin codex -- "$@"
Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.75s
Running `target/debug/codex mcp list`
Name Command Args Env Cwd Status Auth
docs docs-mcp - - - disabled Unsupported
hello_world hello-world-mcp - - - disabled Unsupported
```
827 lines
27 KiB
Rust
827 lines
27 KiB
Rust
#![cfg(not(target_os = "windows"))]
|
|
#![allow(clippy::unwrap_used, clippy::expect_used)]
|
|
|
|
use anyhow::Context;
|
|
use anyhow::Result;
|
|
use codex_core::config::types::McpServerConfig;
|
|
use codex_core::config::types::McpServerTransportConfig;
|
|
use codex_core::protocol::AskForApproval;
|
|
use codex_core::protocol::EventMsg;
|
|
use codex_core::protocol::Op;
|
|
use codex_core::protocol::SandboxPolicy;
|
|
use codex_protocol::config_types::ReasoningSummary;
|
|
use codex_protocol::user_input::UserInput;
|
|
use core_test_support::assert_regex_match;
|
|
use core_test_support::responses;
|
|
use core_test_support::responses::ev_assistant_message;
|
|
use core_test_support::responses::ev_completed;
|
|
use core_test_support::responses::ev_function_call;
|
|
use core_test_support::responses::ev_response_created;
|
|
use core_test_support::responses::mount_sse_once;
|
|
use core_test_support::responses::mount_sse_sequence;
|
|
use core_test_support::responses::sse;
|
|
use core_test_support::responses::start_mock_server;
|
|
use core_test_support::skip_if_no_network;
|
|
use core_test_support::stdio_server_bin;
|
|
use core_test_support::test_codex::test_codex;
|
|
use core_test_support::wait_for_event;
|
|
use serde_json::Value;
|
|
use serde_json::json;
|
|
use std::collections::HashMap;
|
|
use std::time::Duration;
|
|
|
|
// Verifies byte-truncation formatting for function error output (RespondToModel errors)
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn truncate_function_error_trims_respond_to_model() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_mock_server().await;
|
|
let mut builder = test_codex().with_model("test-gpt-5.1-codex");
|
|
let test = builder.build(&server).await?;
|
|
|
|
// Construct a very long, non-existent path to force a RespondToModel error with a large message
|
|
let long_path = "long path text should trigger truncation".repeat(8_000);
|
|
let call_id = "grep-huge-error";
|
|
let args = json!({
|
|
"pattern": "alpha",
|
|
"path": long_path,
|
|
"limit": 10
|
|
});
|
|
let responses = vec![
|
|
sse(vec![
|
|
ev_response_created("resp-1"),
|
|
ev_function_call(call_id, "grep_files", &serde_json::to_string(&args)?),
|
|
ev_completed("resp-1"),
|
|
]),
|
|
sse(vec![
|
|
ev_assistant_message("msg-1", "done"),
|
|
ev_completed("resp-2"),
|
|
]),
|
|
];
|
|
let mock = mount_sse_sequence(&server, responses).await;
|
|
|
|
test.submit_turn_with_policy(
|
|
"trigger grep_files with long path to test truncation",
|
|
SandboxPolicy::DangerFullAccess,
|
|
)
|
|
.await?;
|
|
|
|
let output = mock
|
|
.function_call_output_text(call_id)
|
|
.context("function error output present")?;
|
|
|
|
tracing::debug!(output = %output, "truncated function error output");
|
|
|
|
// Expect plaintext with token-based truncation marker and no omitted-lines marker
|
|
assert!(
|
|
serde_json::from_str::<serde_json::Value>(&output).is_err(),
|
|
"expected error output to be plain text",
|
|
);
|
|
assert!(
|
|
!output.contains("Total output lines:"),
|
|
"error output should not include line-based truncation header: {output}",
|
|
);
|
|
let truncated_pattern = r"(?s)^unable to access `.*tokens truncated.*$";
|
|
assert_regex_match(truncated_pattern, &output);
|
|
assert!(
|
|
!output.contains("omitted"),
|
|
"line omission marker should not appear when no lines were dropped: {output}"
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
// Verifies that a standard tool call (shell_command) exceeding the model formatting
|
|
// limits is truncated before being sent back to the model.
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn tool_call_output_configured_limit_chars_type() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_mock_server().await;
|
|
|
|
// Use a model that exposes the shell_command tool.
|
|
let mut builder = test_codex().with_model("gpt-5.1").with_config(|config| {
|
|
config.tool_output_token_limit = Some(100_000);
|
|
});
|
|
|
|
let fixture = builder.build(&server).await?;
|
|
|
|
let call_id = "shell-too-large";
|
|
let command = if cfg!(windows) {
|
|
"for ($i=1; $i -le 100000; $i++) { Write-Output $i }"
|
|
} else {
|
|
"seq 1 100000"
|
|
};
|
|
let args = serde_json::json!({
|
|
"command": command,
|
|
"timeout_ms": 5_000,
|
|
});
|
|
|
|
// First response: model tells us to run the tool; second: complete the turn.
|
|
mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
responses::ev_response_created("resp-1"),
|
|
responses::ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
|
|
responses::ev_completed("resp-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
let mock2 = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
responses::ev_assistant_message("msg-1", "done"),
|
|
responses::ev_completed("resp-2"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
fixture
|
|
.submit_turn_with_policy("trigger big shell output", SandboxPolicy::DangerFullAccess)
|
|
.await?;
|
|
|
|
// Inspect what we sent back to the model; it should contain a truncated
|
|
// function_call_output for the shell call.
|
|
let output = mock2
|
|
.single_request()
|
|
.function_call_output_text(call_id)
|
|
.context("function_call_output present for shell call")?;
|
|
let output = output.replace("\r\n", "\n");
|
|
|
|
// Expect plain text (not JSON) containing the entire shell output.
|
|
assert!(
|
|
serde_json::from_str::<Value>(&output).is_err(),
|
|
"expected truncated shell output to be plain text"
|
|
);
|
|
|
|
assert!(
|
|
(400000..=401000).contains(&output.len()),
|
|
"we should be almost 100k tokens"
|
|
);
|
|
|
|
assert!(
|
|
!output.contains("tokens truncated"),
|
|
"shell output should not contain tokens truncated marker: {output}"
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
// Verifies that a standard tool call (shell_command) exceeding the model formatting
|
|
// limits is truncated before being sent back to the model.
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn tool_call_output_exceeds_limit_truncated_chars_limit() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_mock_server().await;
|
|
|
|
// Use a model that exposes the shell_command tool.
|
|
let mut builder = test_codex().with_model("gpt-5.1");
|
|
|
|
let fixture = builder.build(&server).await?;
|
|
|
|
let call_id = "shell-too-large";
|
|
let command = if cfg!(windows) {
|
|
"for ($i=1; $i -le 100000; $i++) { Write-Output $i }"
|
|
} else {
|
|
"seq 1 100000"
|
|
};
|
|
let args = serde_json::json!({
|
|
"command": command,
|
|
"timeout_ms": 5_000,
|
|
});
|
|
|
|
// First response: model tells us to run the tool; second: complete the turn.
|
|
mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
responses::ev_response_created("resp-1"),
|
|
responses::ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
|
|
responses::ev_completed("resp-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
let mock2 = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
responses::ev_assistant_message("msg-1", "done"),
|
|
responses::ev_completed("resp-2"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
fixture
|
|
.submit_turn_with_policy("trigger big shell output", SandboxPolicy::DangerFullAccess)
|
|
.await?;
|
|
|
|
// Inspect what we sent back to the model; it should contain a truncated
|
|
// function_call_output for the shell call.
|
|
let output = mock2
|
|
.single_request()
|
|
.function_call_output_text(call_id)
|
|
.context("function_call_output present for shell call")?;
|
|
let output = output.replace("\r\n", "\n");
|
|
|
|
// Expect plain text (not JSON) containing the entire shell output.
|
|
assert!(
|
|
serde_json::from_str::<Value>(&output).is_err(),
|
|
"expected truncated shell output to be plain text"
|
|
);
|
|
|
|
let truncated_pattern = r#"(?s)^Exit code: 0\nWall time: [0-9]+(?:\.[0-9]+)? seconds\nTotal output lines: 100000\nOutput:\n.*?…\d+ chars truncated….*$"#;
|
|
|
|
assert_regex_match(truncated_pattern, &output);
|
|
|
|
let len = output.len();
|
|
assert!(
|
|
(9_900..=10_100).contains(&len),
|
|
"expected ~10k chars after truncation, got {len}"
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
// Verifies that a standard tool call (shell_command) exceeding the model formatting
|
|
// limits is truncated before being sent back to the model.
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_mock_server().await;
|
|
|
|
// Use a model that exposes the shell_command tool.
|
|
let mut builder = test_codex().with_model("gpt-5.1-codex");
|
|
let fixture = builder.build(&server).await?;
|
|
|
|
let call_id = "shell-too-large";
|
|
let command = if cfg!(windows) {
|
|
"for ($i=1; $i -le 100000; $i++) { Write-Output $i }"
|
|
} else {
|
|
"seq 1 100000"
|
|
};
|
|
let args = serde_json::json!({
|
|
"command": command,
|
|
"timeout_ms": 5_000,
|
|
});
|
|
|
|
// First response: model tells us to run the tool; second: complete the turn.
|
|
mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
responses::ev_response_created("resp-1"),
|
|
responses::ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
|
|
responses::ev_completed("resp-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
let mock2 = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
responses::ev_assistant_message("msg-1", "done"),
|
|
responses::ev_completed("resp-2"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
fixture
|
|
.submit_turn_with_policy("trigger big shell output", SandboxPolicy::DangerFullAccess)
|
|
.await?;
|
|
|
|
// Inspect what we sent back to the model; it should contain a truncated
|
|
// function_call_output for the shell call.
|
|
let output = mock2
|
|
.single_request()
|
|
.function_call_output_text(call_id)
|
|
.context("function_call_output present for shell call")?;
|
|
let output = output.replace("\r\n", "\n");
|
|
|
|
// Expect plain text (not JSON) containing the entire shell output.
|
|
assert!(
|
|
serde_json::from_str::<Value>(&output).is_err(),
|
|
"expected truncated shell output to be plain text"
|
|
);
|
|
let truncated_pattern = r#"(?s)^Exit code: 0
|
|
Wall time: [0-9]+(?:\.[0-9]+)? seconds
|
|
Total output lines: 100000
|
|
Output:
|
|
1
|
|
2
|
|
3
|
|
4
|
|
5
|
|
6
|
|
.*…137224 tokens truncated.*
|
|
99999
|
|
100000
|
|
$"#;
|
|
assert_regex_match(truncated_pattern, &output);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
// Ensures shell_command outputs that exceed the line limit are truncated only once.
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn tool_call_output_truncated_only_once() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_mock_server().await;
|
|
|
|
let mut builder = test_codex().with_model("gpt-5.1-codex");
|
|
let fixture = builder.build(&server).await?;
|
|
let call_id = "shell-single-truncation";
|
|
let command = if cfg!(windows) {
|
|
"for ($i=1; $i -le 10000; $i++) { Write-Output $i }"
|
|
} else {
|
|
"seq 1 10000"
|
|
};
|
|
let args = serde_json::json!({
|
|
"command": command,
|
|
"timeout_ms": 5_000,
|
|
});
|
|
|
|
mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
responses::ev_response_created("resp-1"),
|
|
responses::ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
|
|
responses::ev_completed("resp-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
let mock2 = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
responses::ev_assistant_message("msg-1", "done"),
|
|
responses::ev_completed("resp-2"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
fixture
|
|
.submit_turn_with_policy("trigger big shell output", SandboxPolicy::DangerFullAccess)
|
|
.await?;
|
|
|
|
let output = mock2
|
|
.single_request()
|
|
.function_call_output_text(call_id)
|
|
.context("function_call_output present for shell call")?;
|
|
|
|
let truncation_markers = output.matches("tokens truncated").count();
|
|
|
|
assert_eq!(
|
|
truncation_markers, 1,
|
|
"shell output should carry only one truncation marker: {output}"
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
// Verifies that an MCP tool call result exceeding the model formatting limits
|
|
// is truncated before being sent back to the model.
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
|
|
async fn mcp_tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_mock_server().await;
|
|
|
|
let call_id = "rmcp-truncated";
|
|
let server_name = "rmcp";
|
|
let tool_name = format!("mcp__{server_name}__echo");
|
|
|
|
// Build a very large message to exceed 10KiB once serialized.
|
|
let large_msg = "long-message-with-newlines-".repeat(6000);
|
|
let args_json = serde_json::json!({ "message": large_msg });
|
|
|
|
mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
responses::ev_response_created("resp-1"),
|
|
responses::ev_function_call(call_id, &tool_name, &args_json.to_string()),
|
|
responses::ev_completed("resp-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
let mock2 = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
responses::ev_assistant_message("msg-1", "rmcp echo tool completed."),
|
|
responses::ev_completed("resp-2"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
// Compile the rmcp stdio test server and configure it.
|
|
let rmcp_test_server_bin = stdio_server_bin()?;
|
|
|
|
let mut builder = test_codex().with_config(move |config| {
|
|
let mut servers = config.mcp_servers.get().clone();
|
|
servers.insert(
|
|
server_name.to_string(),
|
|
codex_core::config::types::McpServerConfig {
|
|
transport: codex_core::config::types::McpServerTransportConfig::Stdio {
|
|
command: rmcp_test_server_bin,
|
|
args: Vec::new(),
|
|
env: None,
|
|
env_vars: Vec::new(),
|
|
cwd: None,
|
|
},
|
|
enabled: true,
|
|
startup_timeout_sec: Some(std::time::Duration::from_secs(10)),
|
|
tool_timeout_sec: None,
|
|
enabled_tools: None,
|
|
disabled_tools: None,
|
|
},
|
|
);
|
|
config
|
|
.mcp_servers
|
|
.set(servers)
|
|
.expect("test mcp servers should accept any configuration");
|
|
config.tool_output_token_limit = Some(500);
|
|
});
|
|
let fixture = builder.build(&server).await?;
|
|
|
|
fixture
|
|
.submit_turn_with_policy(
|
|
"call the rmcp echo tool with a very large message",
|
|
SandboxPolicy::ReadOnly,
|
|
)
|
|
.await?;
|
|
|
|
// The MCP tool call output is converted to a function_call_output for the model.
|
|
let output = mock2
|
|
.single_request()
|
|
.function_call_output_text(call_id)
|
|
.context("function_call_output present for rmcp call")?;
|
|
|
|
assert!(
|
|
!output.contains("Total output lines:"),
|
|
"MCP output should not include line-based truncation header: {output}"
|
|
);
|
|
|
|
let truncated_pattern = r#"(?s)^\{"echo":\s*"ECHOING: long-message-with-newlines-.*tokens truncated.*long-message-with-newlines-.*$"#;
|
|
assert_regex_match(truncated_pattern, &output);
|
|
assert!(output.len() < 2500, "{}", output.len());
|
|
|
|
Ok(())
|
|
}
|
|
|
|
// Verifies that an MCP image tool output is serialized as content_items array with
|
|
// the image preserved and no truncation summary appended (since there are no text items).
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
|
|
async fn mcp_image_output_preserves_image_and_no_text_summary() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_mock_server().await;
|
|
|
|
let call_id = "rmcp-image-no-trunc";
|
|
let server_name = "rmcp";
|
|
let tool_name = format!("mcp__{server_name}__image");
|
|
|
|
mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-1"),
|
|
ev_function_call(call_id, &tool_name, "{}"),
|
|
ev_completed("resp-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
let final_mock = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_assistant_message("msg-1", "done"),
|
|
ev_completed("resp-2"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
// Build the stdio rmcp server and pass a tiny PNG via data URL so it can construct ImageContent.
|
|
let rmcp_test_server_bin = stdio_server_bin()?;
|
|
|
|
// 1x1 PNG data URL
|
|
let openai_png = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMB/ee9bQAAAABJRU5ErkJggg==";
|
|
|
|
let mut builder = test_codex().with_config(move |config| {
|
|
let mut servers = config.mcp_servers.get().clone();
|
|
servers.insert(
|
|
server_name.to_string(),
|
|
McpServerConfig {
|
|
transport: McpServerTransportConfig::Stdio {
|
|
command: rmcp_test_server_bin,
|
|
args: Vec::new(),
|
|
env: Some(HashMap::from([(
|
|
"MCP_TEST_IMAGE_DATA_URL".to_string(),
|
|
openai_png.to_string(),
|
|
)])),
|
|
env_vars: Vec::new(),
|
|
cwd: None,
|
|
},
|
|
enabled: true,
|
|
startup_timeout_sec: Some(Duration::from_secs(10)),
|
|
tool_timeout_sec: None,
|
|
enabled_tools: None,
|
|
disabled_tools: None,
|
|
},
|
|
);
|
|
config
|
|
.mcp_servers
|
|
.set(servers)
|
|
.expect("test mcp servers should accept any configuration");
|
|
});
|
|
let fixture = builder.build(&server).await?;
|
|
let session_model = fixture.session_configured.model.clone();
|
|
|
|
fixture
|
|
.codex
|
|
.submit(Op::UserTurn {
|
|
items: vec![UserInput::Text {
|
|
text: "call the rmcp image tool".into(),
|
|
}],
|
|
final_output_json_schema: None,
|
|
cwd: fixture.cwd.path().to_path_buf(),
|
|
approval_policy: AskForApproval::Never,
|
|
sandbox_policy: SandboxPolicy::ReadOnly,
|
|
model: session_model,
|
|
effort: None,
|
|
summary: ReasoningSummary::Auto,
|
|
})
|
|
.await?;
|
|
|
|
// Wait for completion to ensure the outbound request is captured.
|
|
wait_for_event(&fixture.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
|
let output_item = final_mock.single_request().function_call_output(call_id);
|
|
// Expect exactly one array element: the image item; and no trailing summary text.
|
|
let output = output_item.get("output").expect("output");
|
|
assert!(output.is_array(), "expected array output");
|
|
let arr = output.as_array().unwrap();
|
|
assert_eq!(arr.len(), 1, "no truncation summary should be appended");
|
|
assert_eq!(
|
|
arr[0],
|
|
json!({"type": "input_image", "image_url": openai_png})
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
// Token-based policy should report token counts even when truncation is byte-estimated.
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn token_policy_marker_reports_tokens() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_mock_server().await;
|
|
let mut builder = test_codex()
|
|
.with_model("gpt-5.1-codex")
|
|
.with_config(|config| {
|
|
config.tool_output_token_limit = Some(50); // small budget to force truncation
|
|
});
|
|
let fixture = builder.build(&server).await?;
|
|
|
|
let call_id = "shell-token-marker";
|
|
let args = json!({
|
|
"command": "seq 1 150",
|
|
"timeout_ms": 5_000,
|
|
});
|
|
|
|
mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-1"),
|
|
ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
|
|
ev_completed("resp-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
let done_mock = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_assistant_message("msg-1", "done"),
|
|
ev_completed("resp-2"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
fixture
|
|
.submit_turn_with_policy("run the shell tool", SandboxPolicy::DangerFullAccess)
|
|
.await?;
|
|
|
|
let output = done_mock
|
|
.single_request()
|
|
.function_call_output_text(call_id)
|
|
.context("shell output present")?;
|
|
|
|
let pattern = r"(?s)^Exit code: 0\nWall time: [0-9]+(?:\.[0-9]+)? seconds\nTotal output lines: 150\nOutput:\n1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n11\n12\n13\n14\n15\n16\n17\n18\n19.*tokens truncated.*129\n130\n131\n132\n133\n134\n135\n136\n137\n138\n139\n140\n141\n142\n143\n144\n145\n146\n147\n148\n149\n150\n$";
|
|
|
|
assert_regex_match(pattern, &output);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
// Byte-based policy should report bytes removed.
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn byte_policy_marker_reports_bytes() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_mock_server().await;
|
|
let mut builder = test_codex().with_model("gpt-5.1").with_config(|config| {
|
|
config.tool_output_token_limit = Some(50); // ~200 byte cap
|
|
});
|
|
let fixture = builder.build(&server).await?;
|
|
|
|
let call_id = "shell-byte-marker";
|
|
let args = json!({
|
|
"command": "seq 1 150",
|
|
"timeout_ms": 5_000,
|
|
});
|
|
|
|
mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-1"),
|
|
ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
|
|
ev_completed("resp-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
let done_mock = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_assistant_message("msg-1", "done"),
|
|
ev_completed("resp-2"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
fixture
|
|
.submit_turn_with_policy("run the shell tool", SandboxPolicy::DangerFullAccess)
|
|
.await?;
|
|
|
|
let output = done_mock
|
|
.single_request()
|
|
.function_call_output_text(call_id)
|
|
.context("shell output present")?;
|
|
|
|
let pattern = r"(?s)^Exit code: 0\nWall time: [0-9]+(?:\.[0-9]+)? seconds\nTotal output lines: 150\nOutput:\n1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n11\n12\n13\n14\n15\n16\n17\n18\n19.*chars truncated.*129\n130\n131\n132\n133\n134\n135\n136\n137\n138\n139\n140\n141\n142\n143\n144\n145\n146\n147\n148\n149\n150\n$";
|
|
|
|
assert_regex_match(pattern, &output);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
// shell_command output should remain intact when the config opts into a large token budget.
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn shell_command_output_not_truncated_with_custom_limit() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_mock_server().await;
|
|
let mut builder = test_codex()
|
|
.with_model("gpt-5.1-codex")
|
|
.with_config(|config| {
|
|
config.tool_output_token_limit = Some(50_000); // ample budget
|
|
});
|
|
let fixture = builder.build(&server).await?;
|
|
|
|
let call_id = "shell-no-trunc";
|
|
let args = json!({
|
|
"command": "seq 1 1000",
|
|
"timeout_ms": 5_000,
|
|
});
|
|
let expected_body: String = (1..=1000).map(|i| format!("{i}\n")).collect();
|
|
|
|
mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-1"),
|
|
ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
|
|
ev_completed("resp-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
let done_mock = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_assistant_message("msg-1", "done"),
|
|
ev_completed("resp-2"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
fixture
|
|
.submit_turn_with_policy(
|
|
"run big output without truncation",
|
|
SandboxPolicy::DangerFullAccess,
|
|
)
|
|
.await?;
|
|
|
|
let output = done_mock
|
|
.single_request()
|
|
.function_call_output_text(call_id)
|
|
.context("shell output present")?;
|
|
|
|
assert!(
|
|
output.ends_with(&expected_body),
|
|
"expected entire shell output when budget increased: {output}"
|
|
);
|
|
assert!(
|
|
!output.contains("truncated"),
|
|
"output should remain untruncated with ample budget"
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
// MCP server output should also remain intact when the config increases the token limit.
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
|
|
async fn mcp_tool_call_output_not_truncated_with_custom_limit() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_mock_server().await;
|
|
|
|
let call_id = "rmcp-untruncated";
|
|
let server_name = "rmcp";
|
|
let tool_name = format!("mcp__{server_name}__echo");
|
|
let large_msg = "a".repeat(80_000);
|
|
let args_json = serde_json::json!({ "message": large_msg });
|
|
|
|
mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
responses::ev_response_created("resp-1"),
|
|
responses::ev_function_call(call_id, &tool_name, &args_json.to_string()),
|
|
responses::ev_completed("resp-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
let mock2 = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
responses::ev_assistant_message("msg-1", "rmcp echo tool completed."),
|
|
responses::ev_completed("resp-2"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
let rmcp_test_server_bin = stdio_server_bin()?;
|
|
|
|
let mut builder = test_codex().with_config(move |config| {
|
|
config.tool_output_token_limit = Some(50_000);
|
|
let mut servers = config.mcp_servers.get().clone();
|
|
servers.insert(
|
|
server_name.to_string(),
|
|
codex_core::config::types::McpServerConfig {
|
|
transport: codex_core::config::types::McpServerTransportConfig::Stdio {
|
|
command: rmcp_test_server_bin,
|
|
args: Vec::new(),
|
|
env: None,
|
|
env_vars: Vec::new(),
|
|
cwd: None,
|
|
},
|
|
enabled: true,
|
|
startup_timeout_sec: Some(std::time::Duration::from_secs(10)),
|
|
tool_timeout_sec: None,
|
|
enabled_tools: None,
|
|
disabled_tools: None,
|
|
},
|
|
);
|
|
config
|
|
.mcp_servers
|
|
.set(servers)
|
|
.expect("test mcp servers should accept any configuration");
|
|
});
|
|
let fixture = builder.build(&server).await?;
|
|
|
|
fixture
|
|
.submit_turn_with_policy(
|
|
"call the rmcp echo tool with a very large message",
|
|
SandboxPolicy::ReadOnly,
|
|
)
|
|
.await?;
|
|
|
|
let output = mock2
|
|
.single_request()
|
|
.function_call_output_text(call_id)
|
|
.context("function_call_output present for rmcp call")?;
|
|
|
|
let parsed: Value = serde_json::from_str(&output)?;
|
|
assert_eq!(
|
|
output.len(),
|
|
80031,
|
|
"parsed MCP output should retain its serialized length"
|
|
);
|
|
let expected_echo = format!("ECHOING: {large_msg}");
|
|
let echo_str = parsed["echo"]
|
|
.as_str()
|
|
.context("echo field should be a string in rmcp echo output")?;
|
|
assert_eq!(
|
|
echo_str.len(),
|
|
expected_echo.len(),
|
|
"echo length should match"
|
|
);
|
|
assert_eq!(echo_str, expected_echo);
|
|
assert!(
|
|
!output.contains("truncated"),
|
|
"output should not include truncation markers when limit is raised: {output}"
|
|
);
|
|
|
|
Ok(())
|
|
}
|