Compare commits

...

5 Commits

Author SHA1 Message Date
easong-openai
ae53c67805 import 2025-08-02 10:20:42 -07:00
easong-openai
5644ae97ee import path 2025-08-01 22:34:53 -07:00
pap-openai
5ec060320f Merge branch 'main' into initial-context-tool 2025-08-01 00:19:54 +01:00
easong-openai
2fd7c93f07 more test, fmt 2025-07-31 01:42:01 -07:00
easong-openai
fb51476d05 initial environment context feeding 2025-07-31 01:25:22 -07:00
2 changed files with 328 additions and 1 deletions

View File

@@ -4,6 +4,7 @@
use std::borrow::Cow;
use std::collections::HashMap;
use std::collections::HashSet;
use std::path::Path;
use std::path::PathBuf;
use std::sync::Arc;
use std::sync::Mutex;
@@ -91,6 +92,7 @@ use crate::safety::assess_safety_for_untrusted_command;
use crate::shell;
use crate::user_notification::UserNotification;
use crate::util::backoff;
use tokio::fs;
/// The high-level interface to the Codex system.
/// It operates as a queue pair where you send submissions and receive events.
@@ -945,7 +947,21 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
return;
}
let initial_input_for_turn = ResponseInputItem::from(input);
let mut effective_input = input;
let is_first_turn = {
let st = sess.state.lock().unwrap();
st.history.contents().is_empty()
};
if is_first_turn {
if let Some(env_text) = get_initial_env_context(&sess.cwd).await {
let mut prefixed = Vec::with_capacity(effective_input.len() + 1);
prefixed.push(InputItem::Text { text: env_text });
prefixed.extend(effective_input.into_iter());
effective_input = prefixed;
}
}
let initial_input_for_turn = ResponseInputItem::from(effective_input);
sess.record_conversation_items(&[initial_input_for_turn.clone().into()])
.await;
@@ -1106,6 +1122,128 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
sess.tx_event.send(event).await.ok();
}
async fn get_initial_env_context(cwd: &Path) -> Option<String> {
const MAX_LINES: usize = 50;
let mut dir = match fs::read_dir(cwd).await {
Ok(d) => d,
Err(_) => return None,
};
let mut lines = Vec::with_capacity(MAX_LINES);
let mut seen = 0usize;
let mut truncated = false;
loop {
match dir.next_entry().await {
Ok(Some(entry)) => {
let name = entry.file_name();
let name = name.to_string_lossy();
if name.starts_with('.') {
continue;
}
seen += 1;
if lines.len() < MAX_LINES {
lines.push(name.to_string());
} else {
truncated = true;
break;
}
}
Ok(None) => break,
Err(_) => return None,
}
}
if lines.is_empty() {
return None;
}
let mut out = String::new();
out.push_str("Environment snapshot (output of `ls | head -n 50` in cwd):\n");
out.push_str(&format!("{}\n\n", cwd.display()));
for line in &lines {
out.push_str(line);
out.push('\n');
}
if truncated || seen > lines.len() {
out.push_str("… (truncated)\n");
}
Some(out)
}
#[cfg(test)]
mod tests {
#![allow(clippy::expect_used, clippy::unwrap_used)]
use super::get_initial_env_context;
use std::fs;
use tempfile::TempDir;
#[tokio::test]
async fn env_context_truncates_after_50_visible_entries() {
let tmp = TempDir::new().unwrap();
// Create 65 non-hidden files and a few hidden ones. Dotfiles should be skipped.
for i in 0..65u32 {
fs::write(tmp.path().join(format!("file_{i:03}.txt")), b"x").unwrap();
}
fs::write(tmp.path().join(".hidden_a"), b"x").unwrap();
fs::write(tmp.path().join(".hidden_b"), b"x").unwrap();
let ctx = get_initial_env_context(tmp.path())
.await
.expect("should produce context");
// Header and cwd should be present.
assert!(ctx.starts_with("Environment snapshot (output of `ls | head -n 50` in cwd):\n"));
assert!(ctx.contains(&format!("{}\n\n", tmp.path().display())));
// Count listed entries (exclude header/cwd/blank and possible truncation marker)
let mut count = 0usize;
let mut saw_truncated = false;
for line in ctx.lines().skip(2) {
if line.is_empty() {
continue;
}
if line == "… (truncated)" {
saw_truncated = true;
break;
}
count += 1;
}
assert_eq!(count, 50, "should list exactly 50 visible entries");
assert!(
saw_truncated,
"should indicate truncation when more than 50 entries exist"
);
}
#[tokio::test]
async fn env_context_no_truncation_when_fewer_than_50() {
let tmp = TempDir::new().unwrap();
// 3 visible + 1 hidden
for name in ["a.txt", "b.txt", "c.txt", ".secret"] {
fs::write(tmp.path().join(name), b"x").unwrap();
}
let ctx = get_initial_env_context(tmp.path())
.await
.expect("should produce context");
let mut lines = ctx.lines().skip(2).filter(|l| !l.is_empty());
let mut names = Vec::new();
for l in &mut lines {
if l == "… (truncated)" {
panic!("should not be truncated");
}
names.push(l.to_string());
}
// Hidden should be excluded; only 3 visible entries.
assert_eq!(names.len(), 3);
assert!(names.iter().all(|n| n != ".secret"));
}
}
async fn run_turn(
sess: &Session,
sub_id: String,

View File

@@ -0,0 +1,189 @@
#![allow(clippy::expect_used, clippy::unwrap_used)]
use codex_core::Codex;
use codex_core::CodexSpawnOk;
use codex_core::ModelProviderInfo;
use codex_core::built_in_model_providers;
use codex_core::protocol::EventMsg;
use codex_core::protocol::InputItem;
use codex_core::protocol::Op;
use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
use codex_login::CodexAuth;
use core_test_support::load_default_config_for_test;
use core_test_support::load_sse_fixture_with_id;
use core_test_support::wait_for_event;
use tempfile::TempDir;
use wiremock::Mock;
use wiremock::MockServer;
use wiremock::ResponseTemplate;
use wiremock::matchers::method;
use wiremock::matchers::path;
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn first_turn_includes_environment_snapshot() {
if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
println!(
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
);
return;
}
// Create a temporary working directory with a few files (including a hidden one).
let cwd = TempDir::new().unwrap();
std::fs::write(cwd.path().join("a.txt"), b"x").unwrap();
std::fs::write(cwd.path().join("b.txt"), b"x").unwrap();
std::fs::write(cwd.path().join(".hidden"), b"x").unwrap();
// Mock Responses API server that immediately completes the turn.
let server = MockServer::start().await;
let sse = load_sse_fixture_with_id("tests/fixtures/completed_template.json", "resp1");
let first = ResponseTemplate::new(200)
.insert_header("content-type", "text/event-stream")
.set_body_raw(sse, "text/event-stream");
Mock::given(method("POST"))
.and(path("/v1/responses"))
.respond_with(first)
.mount(&server)
.await;
let model_provider = ModelProviderInfo {
base_url: Some(format!("{}/v1", server.uri())),
..built_in_model_providers()["openai"].clone()
};
// Initialize session using the temp cwd and the mock provider.
let codex_home = TempDir::new().unwrap();
let mut config = load_default_config_for_test(&codex_home);
config.model_provider = model_provider;
config.cwd = cwd.path().to_path_buf();
let ctrl_c = std::sync::Arc::new(tokio::sync::Notify::new());
let CodexSpawnOk { codex, .. } = Codex::spawn(
config,
Some(CodexAuth::from_api_key("Test API Key".to_string())),
ctrl_c.clone(),
)
.await
.unwrap();
// Submit a simple user message the agent should inject the environment snapshot as
// an additional content item at the start of the first user message.
codex
.submit(Op::UserInput {
items: vec![InputItem::Text {
text: "hello".into(),
}],
})
.await
.unwrap();
// Wait for the task to complete so the request is dispatched.
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
// Read the captured request and verify the first message content includes the snapshot.
let request = &server.received_requests().await.unwrap()[0];
let body = request.body_json::<serde_json::Value>().unwrap();
// We expect the first (and only) input item to be a user message with multiple content entries.
let first_input = &body["input"][0];
assert_eq!(first_input["role"], "user");
// The first content item should be the injected environment snapshot.
let first_text = first_input["content"][0]["text"].as_str().unwrap();
assert!(first_text.starts_with("Environment snapshot (output of `ls | head -n 50` in cwd):"));
// It should reference the cwd and include visible files, but not hidden ones.
assert!(first_text.contains(&cwd.path().display().to_string()));
assert!(first_text.contains("a.txt"));
assert!(first_text.contains("b.txt"));
assert!(!first_text.contains(".hidden"));
// The user's original message should appear in the second content item.
let second_text = first_input["content"][1]["text"].as_str().unwrap();
assert_eq!(second_text, "hello");
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn snapshot_is_not_injected_on_second_turn() {
if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
println!(
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
);
return;
}
// Prepare cwd with a couple of files (and a hidden one).
let cwd = TempDir::new().unwrap();
std::fs::write(cwd.path().join("first.txt"), b"x").unwrap();
std::fs::write(cwd.path().join("second.txt"), b"x").unwrap();
std::fs::write(cwd.path().join(".dot"), b"x").unwrap();
// Mock server that accepts two requests and completes both.
let server = MockServer::start().await;
let sse = load_sse_fixture_with_id("tests/fixtures/completed_template.json", "resp1");
let responder = ResponseTemplate::new(200)
.insert_header("content-type", "text/event-stream")
.set_body_raw(sse, "text/event-stream");
Mock::given(method("POST"))
.and(path("/v1/responses"))
.respond_with(responder)
.expect(2)
.mount(&server)
.await;
let model_provider = ModelProviderInfo {
base_url: Some(format!("{}/v1", server.uri())),
..built_in_model_providers()["openai"].clone()
};
let codex_home = TempDir::new().unwrap();
let mut config = load_default_config_for_test(&codex_home);
config.model_provider = model_provider;
config.cwd = cwd.path().to_path_buf();
let ctrl_c = std::sync::Arc::new(tokio::sync::Notify::new());
let CodexSpawnOk { codex, .. } = Codex::spawn(
config,
Some(CodexAuth::from_api_key("Test API Key".to_string())),
ctrl_c.clone(),
)
.await
.unwrap();
// First user message.
codex
.submit(Op::UserInput {
items: vec![InputItem::Text {
text: "first".into(),
}],
})
.await
.unwrap();
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
// Second user message.
codex
.submit(Op::UserInput {
items: vec![InputItem::Text {
text: "second".into(),
}],
})
.await
.unwrap();
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
// Verify the second request's last user message does not include the environment snapshot.
let requests = server.received_requests().await.unwrap();
assert!(
requests.len() >= 2,
"expected two requests to the mock server"
);
let second_req = &requests[1];
let body = second_req.body_json::<serde_json::Value>().unwrap();
let input = body["input"].as_array().expect("input array");
let last = input.last().expect("at least one input item");
assert_eq!(last["role"], "user");
let last_text = last["content"][0]["text"].as_str().unwrap();
// Should be exactly the submitted text, without the snapshot header prefix.
assert_eq!(last_text, "second");
}