mirror of
https://github.com/openai/codex.git
synced 2026-05-29 15:30:22 +00:00
Revert "Add app-server startup benchmark crate" (#24937)
Reverts openai/codex#24651, broke musl job https://github.com/openai/codex/actions/runs/26585495205/job/78330166927
This commit is contained in:
committed by
GitHub
parent
6c1215dac6
commit
c2508db60d
7
.github/workflows/rust-ci-full.yml
vendored
7
.github/workflows/rust-ci-full.yml
vendored
@@ -402,13 +402,6 @@ jobs:
|
||||
- name: cargo clippy
|
||||
run: cargo clippy --target ${{ matrix.target }} --tests --profile ${{ matrix.profile }} --timings -- -D warnings
|
||||
|
||||
- uses: taiki-e/install-action@44c6d64aa62cd779e873306675c7a58e86d6d532 # v2.62.49
|
||||
with:
|
||||
tool: just
|
||||
|
||||
- name: End-to-end benchmark smoke test
|
||||
run: just bench-e2e-smoke
|
||||
|
||||
- name: Upload Cargo timings (clippy)
|
||||
if: always()
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
|
||||
|
||||
11
codex-rs/Cargo.lock
generated
11
codex-rs/Cargo.lock
generated
@@ -2058,17 +2058,6 @@ dependencies = [
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codex-app-server-start-bench"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"codex-app-server-protocol",
|
||||
"divan",
|
||||
"serde_json",
|
||||
"tempfile",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codex-app-server-test-client"
|
||||
version = "0.0.0"
|
||||
|
||||
@@ -14,7 +14,6 @@ members = [
|
||||
"app-server-client",
|
||||
"app-server-protocol",
|
||||
"app-server-test-client",
|
||||
"benchmarks/app-server-start",
|
||||
"debug-client",
|
||||
"apply-patch",
|
||||
"arg0",
|
||||
|
||||
@@ -1,20 +0,0 @@
|
||||
load("//:defs.bzl", "codex_rust_crate", "workspace_root_test")
|
||||
|
||||
codex_rust_crate(
|
||||
name = "app-server-start-bench",
|
||||
crate_name = "codex_app_server_start_bench",
|
||||
)
|
||||
|
||||
workspace_root_test(
|
||||
name = "startup-bench",
|
||||
# Include the Bazel-built CLI in this target's runfiles and expose its
|
||||
# resolved executable path through the environment consumed by the runner.
|
||||
runfile_env = {
|
||||
"//codex-rs/cli:codex": "CODEX_BIN",
|
||||
},
|
||||
size = "large",
|
||||
tags = ["manual"],
|
||||
test_bin = ":codex-app-server-start-bench",
|
||||
timeout = "long",
|
||||
workspace_root_marker = "//codex-rs/utils/cargo-bin:repo_root.marker",
|
||||
)
|
||||
@@ -1,16 +0,0 @@
|
||||
[package]
|
||||
name = "codex-app-server-start-bench"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
publish = false
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = { workspace = true }
|
||||
codex-app-server-protocol = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
divan = { workspace = true }
|
||||
tempfile = { workspace = true }
|
||||
@@ -1,174 +0,0 @@
|
||||
use std::io::BufRead;
|
||||
use std::io::BufReader;
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::process::Child;
|
||||
use std::process::ChildStdin;
|
||||
use std::process::ChildStdout;
|
||||
use std::process::Command;
|
||||
use std::process::Stdio;
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use anyhow::bail;
|
||||
use codex_app_server_protocol::ClientInfo;
|
||||
use codex_app_server_protocol::ClientRequest;
|
||||
use codex_app_server_protocol::InitializeCapabilities;
|
||||
use codex_app_server_protocol::InitializeParams;
|
||||
use codex_app_server_protocol::InitializeResponse;
|
||||
use codex_app_server_protocol::JSONRPCMessage;
|
||||
use codex_app_server_protocol::JSONRPCNotification;
|
||||
use codex_app_server_protocol::JSONRPCResponse;
|
||||
use codex_app_server_protocol::RequestId;
|
||||
use divan::Bencher;
|
||||
|
||||
fn main() {
|
||||
divan::main();
|
||||
}
|
||||
|
||||
// Process startup is slow enough that 30 samples keep runs practical, and
|
||||
// sample size 1 lets each measured server be reaped before the next starts.
|
||||
// The benchmark warms one CODEX_HOME before timing to measure normal restarts.
|
||||
// This e2e runner receives its separately built Codex binary from just or Bazel.
|
||||
#[divan::bench(sample_count = 30, sample_size = 1, skip_ext_time)]
|
||||
#[allow(clippy::expect_used)]
|
||||
fn initialize_response(bencher: Bencher) {
|
||||
let codex_bin = std::env::var_os("CODEX_BIN")
|
||||
.map(PathBuf::from)
|
||||
.expect("CODEX_BIN must point to the codex binary; run via `just bench-e2e` or Bazel");
|
||||
let codex_home = tempfile::tempdir().expect("benchmark CODEX_HOME should be created");
|
||||
drop(
|
||||
start_until_initialize_response(&codex_bin, codex_home.path())
|
||||
.expect("benchmark CODEX_HOME should be initialized"),
|
||||
);
|
||||
|
||||
bencher.bench_local(|| {
|
||||
start_until_initialize_response(&codex_bin, codex_home.path())
|
||||
.expect("codex app-server should return an initialize response")
|
||||
});
|
||||
}
|
||||
|
||||
/// A running app-server that has returned a valid `initialize` response.
|
||||
///
|
||||
/// Divan drops benchmark outputs after it stops each timer interval. Returning
|
||||
/// this value from the measured closure keeps the `initialized` notification
|
||||
/// and forced process reaping outside startup latency.
|
||||
struct InitializedAppServer {
|
||||
child: Child,
|
||||
stdin: Option<ChildStdin>,
|
||||
stdout: Option<BufReader<ChildStdout>>,
|
||||
acknowledge_on_drop: bool,
|
||||
}
|
||||
|
||||
/// Spawn a stdio app-server and return once it responds successfully to `initialize`.
|
||||
fn start_until_initialize_response(
|
||||
codex_bin: &Path,
|
||||
codex_home: &Path,
|
||||
) -> Result<InitializedAppServer> {
|
||||
let request_id = RequestId::Integer(0);
|
||||
let child = Command::new(codex_bin)
|
||||
.arg("app-server")
|
||||
.arg("--listen")
|
||||
.arg("stdio://")
|
||||
.env("CODEX_HOME", codex_home)
|
||||
.stdin(Stdio::piped())
|
||||
.stdout(Stdio::piped())
|
||||
.stderr(Stdio::null())
|
||||
.spawn()
|
||||
.with_context(|| format!("failed to spawn `{}` app-server", codex_bin.display()))?;
|
||||
let mut server = InitializedAppServer {
|
||||
child,
|
||||
stdin: None,
|
||||
stdout: None,
|
||||
acknowledge_on_drop: false,
|
||||
};
|
||||
server.stdin = Some(
|
||||
server
|
||||
.child
|
||||
.stdin
|
||||
.take()
|
||||
.context("app-server stdin unavailable")?,
|
||||
);
|
||||
server.stdout = Some(BufReader::new(
|
||||
server
|
||||
.child
|
||||
.stdout
|
||||
.take()
|
||||
.context("app-server stdout unavailable")?,
|
||||
));
|
||||
|
||||
let request = ClientRequest::Initialize {
|
||||
request_id: request_id.clone(),
|
||||
params: InitializeParams {
|
||||
client_info: ClientInfo {
|
||||
name: "codex-app-server-start-bench".to_string(),
|
||||
title: Some("Codex App Server Start Benchmark".to_string()),
|
||||
version: env!("CARGO_PKG_VERSION").to_string(),
|
||||
},
|
||||
capabilities: Some(InitializeCapabilities {
|
||||
experimental_api: false,
|
||||
request_attestation: false,
|
||||
opt_out_notification_methods: None,
|
||||
}),
|
||||
},
|
||||
};
|
||||
let stdin = server
|
||||
.stdin
|
||||
.as_mut()
|
||||
.context("app-server stdin unavailable")?;
|
||||
writeln!(stdin, "{}", serde_json::to_string(&request)?)?;
|
||||
stdin
|
||||
.flush()
|
||||
.context("failed to flush initialize request")?;
|
||||
|
||||
let mut line = String::new();
|
||||
loop {
|
||||
line.clear();
|
||||
if server
|
||||
.stdout
|
||||
.as_mut()
|
||||
.context("app-server stdout unavailable")?
|
||||
.read_line(&mut line)?
|
||||
== 0
|
||||
{
|
||||
bail!("app-server closed stdout before returning initialize response");
|
||||
}
|
||||
|
||||
match serde_json::from_str::<JSONRPCMessage>(line.trim())? {
|
||||
JSONRPCMessage::Response(JSONRPCResponse { id, result }) if id == request_id => {
|
||||
let _: InitializeResponse = serde_json::from_value(result)
|
||||
.context("initialize response missing expected payload")?;
|
||||
server.acknowledge_on_drop = true;
|
||||
return Ok(server);
|
||||
}
|
||||
JSONRPCMessage::Error(error) if error.id == request_id => {
|
||||
bail!("initialize failed: {error:?}");
|
||||
}
|
||||
JSONRPCMessage::Request(_)
|
||||
| JSONRPCMessage::Response(_)
|
||||
| JSONRPCMessage::Notification(_)
|
||||
| JSONRPCMessage::Error(_) => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for InitializedAppServer {
|
||||
fn drop(&mut self) {
|
||||
if self.acknowledge_on_drop
|
||||
&& let Some(stdin) = self.stdin.as_mut()
|
||||
{
|
||||
let initialized = JSONRPCMessage::Notification(JSONRPCNotification {
|
||||
method: "initialized".to_string(),
|
||||
params: None,
|
||||
});
|
||||
if let Ok(payload) = serde_json::to_string(&initialized) {
|
||||
let _ = writeln!(stdin, "{payload}");
|
||||
let _ = stdin.flush();
|
||||
}
|
||||
}
|
||||
let _ = self.stdin.take();
|
||||
let _ = self.child.kill();
|
||||
let _ = self.child.wait();
|
||||
}
|
||||
}
|
||||
22
justfile
22
justfile
@@ -2,9 +2,6 @@ set working-directory := "codex-rs"
|
||||
set positional-arguments
|
||||
|
||||
rust_min_stack := "8388608" # 8 MiB
|
||||
e2e_benchmark_packages := "codex-app-server-start-bench"
|
||||
e2e_codex_bin := if os() == "windows" { "./target/release/codex.exe" } else { "./target/release/codex" }
|
||||
e2e_smoke_codex_bin := if os() == "windows" { "./target/debug/codex.exe" } else { "./target/debug/codex" }
|
||||
|
||||
# Display help
|
||||
help:
|
||||
@@ -33,11 +30,6 @@ app-server-test-client *args:
|
||||
cargo build -p codex-cli
|
||||
cargo run -p codex-app-server-test-client -- --codex-bin ./target/debug/codex "$@"
|
||||
|
||||
# Run end-to-end performance benchmarks that require a built Codex binary.
|
||||
bench-e2e *args:
|
||||
cargo build --release -p codex-cli --bin codex
|
||||
for package in {{ e2e_benchmark_packages }}; do CODEX_BIN="{{ e2e_codex_bin }}" cargo run --release -p "$package" -- --bench "$@"; done
|
||||
|
||||
# Format Rust and Python SDK code.
|
||||
fmt:
|
||||
cargo fmt -- --config imports_granularity=Item 2>/dev/null
|
||||
@@ -63,19 +55,13 @@ test *args:
|
||||
RUST_MIN_STACK={{ rust_min_stack }} cargo nextest run --no-fail-fast "$@"
|
||||
just bench-smoke
|
||||
|
||||
# Run unit-test-style benchmark targets managed entirely by Cargo.
|
||||
bench-unit *args:
|
||||
# Run explicit workspace benchmark targets.
|
||||
bench *args:
|
||||
cargo bench --workspace --bench '*' "$@"
|
||||
|
||||
# Smoke Cargo-managed benchmarks and compile e2e runners without measured binaries.
|
||||
# Run benchmark targets once to ensure they start successfully.
|
||||
bench-smoke:
|
||||
just bench-unit -- --test
|
||||
for package in {{ e2e_benchmark_packages }}; do cargo build -p "$package" --bin "$package"; done
|
||||
|
||||
# Run end-to-end performance benchmark targets once.
|
||||
bench-e2e-smoke:
|
||||
cargo build -p codex-cli --bin codex
|
||||
for package in {{ e2e_benchmark_packages }}; do CODEX_BIN="{{ e2e_smoke_codex_bin }}" cargo run -p "$package" -- --test; done
|
||||
just bench -- --test
|
||||
|
||||
# Build and run Codex from source using Bazel.
|
||||
# Note we have to use the combination of `[no-cd]` and `--run_under="cd $PWD &&"`
|
||||
|
||||
Reference in New Issue
Block a user