Reapply "Add app-server transport layer with websocket support" (#11370)

Reapply "Add app-server transport layer with websocket support" with
additional fixes from https://github.com/openai/codex/pull/11313/changes
to avoid deadlocking.

This reverts commit 47356ff83c.

## Summary

To avoid deadlocking when queues are full, we maintain separate tokio
tasks dedicated to incoming vs outgoing event handling
- split the app-server main loop into two tasks in
`run_main_with_transport`
   - inbound handling (`transport_event_rx`)
   - outbound handling (`outgoing_rx` + `thread_created_rx`)
- separate incoming and outgoing websocket tasks

## Validation

Integration tests, testing thoroughly e2e in codex app w/ >10 concurrent
requests

<img width="1365" height="979" alt="Screenshot 2026-02-10 at 2 54 22 PM"
src="https://github.com/user-attachments/assets/47ca2c13-f322-4e5c-bedd-25859cbdc45f"
/>

---------

Co-authored-by: jif-oai <jif@openai.com>
This commit is contained in:
Max Johnson
2026-02-11 10:13:39 -08:00
committed by GitHub
parent 577a416f9a
commit 7053aa5457
19 changed files with 1940 additions and 388 deletions

View File

@@ -36,8 +36,9 @@ async fn app_server_default_analytics_disabled_without_flag() -> Result<()> {
.map_err(|err| anyhow::anyhow!(err.to_string()))?;
// With analytics unset in the config and the default flag is false, metrics are disabled.
// No provider is built.
assert_eq!(provider.is_none(), true);
// A provider may still exist for non-metrics telemetry, so check metrics specifically.
let has_metrics = provider.as_ref().and_then(|otel| otel.metrics()).is_some();
assert_eq!(has_metrics, false);
Ok(())
}

View File

@@ -560,9 +560,22 @@ fn assert_layers_user_then_optional_system(
layers: &[codex_app_server_protocol::ConfigLayer],
user_file: AbsolutePathBuf,
) -> Result<()> {
assert_eq!(layers.len(), 2);
assert_eq!(layers[0].name, ConfigLayerSource::User { file: user_file });
assert!(matches!(layers[1].name, ConfigLayerSource::System { .. }));
let mut first_index = 0;
if matches!(
layers.first().map(|layer| &layer.name),
Some(ConfigLayerSource::LegacyManagedConfigTomlFromMdm)
) {
first_index = 1;
}
assert_eq!(layers.len(), first_index + 2);
assert_eq!(
layers[first_index].name,
ConfigLayerSource::User { file: user_file }
);
assert!(matches!(
layers[first_index + 1].name,
ConfigLayerSource::System { .. }
));
Ok(())
}
@@ -571,12 +584,25 @@ fn assert_layers_managed_user_then_optional_system(
managed_file: AbsolutePathBuf,
user_file: AbsolutePathBuf,
) -> Result<()> {
assert_eq!(layers.len(), 3);
let mut first_index = 0;
if matches!(
layers.first().map(|layer| &layer.name),
Some(ConfigLayerSource::LegacyManagedConfigTomlFromMdm)
) {
first_index = 1;
}
assert_eq!(layers.len(), first_index + 3);
assert_eq!(
layers[0].name,
layers[first_index].name,
ConfigLayerSource::LegacyManagedConfigTomlFromFile { file: managed_file }
);
assert_eq!(layers[1].name, ConfigLayerSource::User { file: user_file });
assert!(matches!(layers[2].name, ConfigLayerSource::System { .. }));
assert_eq!(
layers[first_index + 1].name,
ConfigLayerSource::User { file: user_file }
);
assert!(matches!(
layers[first_index + 2].name,
ConfigLayerSource::System { .. }
));
Ok(())
}

View File

@@ -0,0 +1,263 @@
use anyhow::Context;
use anyhow::Result;
use anyhow::bail;
use app_test_support::create_mock_responses_server_sequence_unchecked;
use codex_app_server_protocol::ClientInfo;
use codex_app_server_protocol::InitializeParams;
use codex_app_server_protocol::JSONRPCError;
use codex_app_server_protocol::JSONRPCMessage;
use codex_app_server_protocol::JSONRPCRequest;
use codex_app_server_protocol::JSONRPCResponse;
use codex_app_server_protocol::RequestId;
use futures::SinkExt;
use futures::StreamExt;
use serde_json::json;
use std::net::SocketAddr;
use std::path::Path;
use std::process::Stdio;
use tempfile::TempDir;
use tokio::io::AsyncBufReadExt;
use tokio::process::Child;
use tokio::process::Command;
use tokio::time::Duration;
use tokio::time::Instant;
use tokio::time::sleep;
use tokio::time::timeout;
use tokio_tungstenite::MaybeTlsStream;
use tokio_tungstenite::WebSocketStream;
use tokio_tungstenite::connect_async;
use tokio_tungstenite::tungstenite::Message as WebSocketMessage;
const DEFAULT_READ_TIMEOUT: Duration = Duration::from_secs(5);
type WsClient = WebSocketStream<MaybeTlsStream<tokio::net::TcpStream>>;
#[tokio::test]
async fn websocket_transport_routes_per_connection_handshake_and_responses() -> Result<()> {
let server = create_mock_responses_server_sequence_unchecked(Vec::new()).await;
let codex_home = TempDir::new()?;
create_config_toml(codex_home.path(), &server.uri(), "never")?;
let bind_addr = reserve_local_addr()?;
let mut process = spawn_websocket_server(codex_home.path(), bind_addr).await?;
let mut ws1 = connect_websocket(bind_addr).await?;
let mut ws2 = connect_websocket(bind_addr).await?;
send_initialize_request(&mut ws1, 1, "ws_client_one").await?;
let first_init = read_response_for_id(&mut ws1, 1).await?;
assert_eq!(first_init.id, RequestId::Integer(1));
// Initialize responses are request-scoped and must not leak to other
// connections.
assert_no_message(&mut ws2, Duration::from_millis(250)).await?;
send_config_read_request(&mut ws2, 2).await?;
let not_initialized = read_error_for_id(&mut ws2, 2).await?;
assert_eq!(not_initialized.error.message, "Not initialized");
send_initialize_request(&mut ws2, 3, "ws_client_two").await?;
let second_init = read_response_for_id(&mut ws2, 3).await?;
assert_eq!(second_init.id, RequestId::Integer(3));
// Same request-id on different connections must route independently.
send_config_read_request(&mut ws1, 77).await?;
send_config_read_request(&mut ws2, 77).await?;
let ws1_config = read_response_for_id(&mut ws1, 77).await?;
let ws2_config = read_response_for_id(&mut ws2, 77).await?;
assert_eq!(ws1_config.id, RequestId::Integer(77));
assert_eq!(ws2_config.id, RequestId::Integer(77));
assert!(ws1_config.result.get("config").is_some());
assert!(ws2_config.result.get("config").is_some());
process
.kill()
.await
.context("failed to stop websocket app-server process")?;
Ok(())
}
async fn spawn_websocket_server(codex_home: &Path, bind_addr: SocketAddr) -> Result<Child> {
let program = codex_utils_cargo_bin::cargo_bin("codex-app-server")
.context("should find app-server binary")?;
let mut cmd = Command::new(program);
cmd.arg("--listen")
.arg(format!("ws://{bind_addr}"))
.stdin(Stdio::null())
.stdout(Stdio::null())
.stderr(Stdio::piped())
.env("CODEX_HOME", codex_home)
.env("RUST_LOG", "debug");
let mut process = cmd
.kill_on_drop(true)
.spawn()
.context("failed to spawn websocket app-server process")?;
if let Some(stderr) = process.stderr.take() {
let mut stderr_reader = tokio::io::BufReader::new(stderr).lines();
tokio::spawn(async move {
while let Ok(Some(line)) = stderr_reader.next_line().await {
eprintln!("[websocket app-server stderr] {line}");
}
});
}
Ok(process)
}
fn reserve_local_addr() -> Result<SocketAddr> {
let listener = std::net::TcpListener::bind("127.0.0.1:0")?;
let addr = listener.local_addr()?;
drop(listener);
Ok(addr)
}
async fn connect_websocket(bind_addr: SocketAddr) -> Result<WsClient> {
let url = format!("ws://{bind_addr}");
let deadline = Instant::now() + Duration::from_secs(10);
loop {
match connect_async(&url).await {
Ok((stream, _response)) => return Ok(stream),
Err(err) => {
if Instant::now() >= deadline {
bail!("failed to connect websocket to {url}: {err}");
}
sleep(Duration::from_millis(50)).await;
}
}
}
}
async fn send_initialize_request(stream: &mut WsClient, id: i64, client_name: &str) -> Result<()> {
let params = InitializeParams {
client_info: ClientInfo {
name: client_name.to_string(),
title: Some("WebSocket Test Client".to_string()),
version: "0.1.0".to_string(),
},
capabilities: None,
};
send_request(
stream,
"initialize",
id,
Some(serde_json::to_value(params)?),
)
.await
}
async fn send_config_read_request(stream: &mut WsClient, id: i64) -> Result<()> {
send_request(
stream,
"config/read",
id,
Some(json!({ "includeLayers": false })),
)
.await
}
async fn send_request(
stream: &mut WsClient,
method: &str,
id: i64,
params: Option<serde_json::Value>,
) -> Result<()> {
let message = JSONRPCMessage::Request(JSONRPCRequest {
id: RequestId::Integer(id),
method: method.to_string(),
params,
});
send_jsonrpc(stream, message).await
}
async fn send_jsonrpc(stream: &mut WsClient, message: JSONRPCMessage) -> Result<()> {
let payload = serde_json::to_string(&message)?;
stream
.send(WebSocketMessage::Text(payload.into()))
.await
.context("failed to send websocket frame")
}
async fn read_response_for_id(stream: &mut WsClient, id: i64) -> Result<JSONRPCResponse> {
let target_id = RequestId::Integer(id);
loop {
let message = read_jsonrpc_message(stream).await?;
if let JSONRPCMessage::Response(response) = message
&& response.id == target_id
{
return Ok(response);
}
}
}
async fn read_error_for_id(stream: &mut WsClient, id: i64) -> Result<JSONRPCError> {
let target_id = RequestId::Integer(id);
loop {
let message = read_jsonrpc_message(stream).await?;
if let JSONRPCMessage::Error(err) = message
&& err.id == target_id
{
return Ok(err);
}
}
}
async fn read_jsonrpc_message(stream: &mut WsClient) -> Result<JSONRPCMessage> {
loop {
let frame = timeout(DEFAULT_READ_TIMEOUT, stream.next())
.await
.context("timed out waiting for websocket frame")?
.context("websocket stream ended unexpectedly")?
.context("failed to read websocket frame")?;
match frame {
WebSocketMessage::Text(text) => return Ok(serde_json::from_str(text.as_ref())?),
WebSocketMessage::Ping(payload) => {
stream.send(WebSocketMessage::Pong(payload)).await?;
}
WebSocketMessage::Pong(_) => {}
WebSocketMessage::Close(frame) => {
bail!("websocket closed unexpectedly: {frame:?}")
}
WebSocketMessage::Binary(_) => bail!("unexpected binary websocket frame"),
WebSocketMessage::Frame(_) => {}
}
}
}
async fn assert_no_message(stream: &mut WsClient, wait_for: Duration) -> Result<()> {
match timeout(wait_for, stream.next()).await {
Ok(Some(Ok(frame))) => bail!("unexpected frame while waiting for silence: {frame:?}"),
Ok(Some(Err(err))) => bail!("unexpected websocket read error: {err}"),
Ok(None) => bail!("websocket closed unexpectedly while waiting for silence"),
Err(_) => Ok(()),
}
}
fn create_config_toml(
codex_home: &Path,
server_uri: &str,
approval_policy: &str,
) -> std::io::Result<()> {
let config_toml = codex_home.join("config.toml");
std::fs::write(
config_toml,
format!(
r#"
model = "mock-model"
approval_policy = "{approval_policy}"
sandbox_mode = "read-only"
model_provider = "mock_provider"
[model_providers.mock_provider]
name = "Mock provider for test"
base_url = "{server_uri}/v1"
wire_api = "responses"
request_max_retries = 0
stream_max_retries = 0
"#
),
)
}

View File

@@ -4,6 +4,7 @@ mod app_list;
mod collaboration_mode_list;
mod compaction;
mod config_rpc;
mod connection_handling_websocket;
mod dynamic_tools;
mod experimental_api;
mod experimental_feature_list;

View File

@@ -5,8 +5,6 @@ use app_test_support::create_mock_responses_server_repeating_assistant;
use app_test_support::create_mock_responses_server_sequence;
use app_test_support::create_shell_command_sse_response;
use app_test_support::to_response;
use codex_app_server_protocol::CommandExecutionApprovalDecision;
use codex_app_server_protocol::CommandExecutionRequestApprovalResponse;
use codex_app_server_protocol::ItemCompletedNotification;
use codex_app_server_protocol::ItemStartedNotification;
use codex_app_server_protocol::JSONRPCError;
@@ -211,9 +209,7 @@ async fn review_start_exec_approval_item_id_matches_command_execution_item() ->
mcp.send_response(
request_id,
serde_json::to_value(CommandExecutionRequestApprovalResponse {
decision: CommandExecutionApprovalDecision::Accept,
})?,
serde_json::json!({ "decision": codex_core::protocol::ReviewDecision::Approved }),
)
.await?;
timeout(