Compare commits

...

2 Commits

Author SHA1 Message Date
Dylan Hurd
ecba6be8b6 use git status 2025-08-19 19:07:08 -07:00
Dylan Hurd
b0dbe712d1 [context] Add env context to every turn with gitinfo 2025-08-19 18:51:16 -07:00
5 changed files with 303 additions and 26 deletions

View File

@@ -134,6 +134,13 @@ If completing the user's task requires writing or modifying files, your code and
- Do not use one-letter variable names unless explicitly requested.
- NEVER output inline citations like "【F:README.md†L5-L14】" in your outputs. The CLI is not able to render these so they will just be broken in the UI. Instead, if you output valid filepaths, users will be able to click on them to open the files in their editor.
## Git awareness
Before starting a task and while making changes, read the `environment_context` provided by the harness and use it to guide your actions:
- Git info: review `Current git info` (`commit`, `branch`, `remote`) to align with the users context and check if the user has made other edits.
## Testing your work
If the codebase has tests or the ability to build or run, you should use them to verify that your work is complete. Generally, your testing philosophy should be to start as specific as possible to the code you changed so that you can catch issues efficiently, then make your way to broader tests as you build confidence. If there's no test for the code you changed, and if the adjacent patterns in the codebases show that there's a logical place for you to add a test, you may do so. However, do not add tests to codebases with no tests, or where the patterns don't indicate so.

View File

@@ -53,6 +53,7 @@ use crate::exec::StdoutStream;
use crate::exec::StreamOutput;
use crate::exec::process_exec_tool_call;
use crate::exec_env::create_env;
use crate::git_info::collect_git_info;
use crate::mcp_connection_manager::McpConnectionManager;
use crate::mcp_tool_call::handle_mcp_tool_call;
use crate::model_family::find_family_for_model;
@@ -267,6 +268,7 @@ pub(crate) struct TurnContext {
pub(crate) shell_environment_policy: ShellEnvironmentPolicy,
pub(crate) disable_response_storage: bool,
pub(crate) tools_config: ToolsConfig,
pub(crate) git_info: Option<crate::git_info::GitInfo>,
}
impl TurnContext {
@@ -472,6 +474,9 @@ impl Session {
model_reasoning_summary,
session_id,
);
// Collect git info for the initial cwd
let git_info_val = collect_git_info(&cwd).await;
let turn_context = TurnContext {
client,
tools_config: ToolsConfig::new(
@@ -488,6 +493,7 @@ impl Session {
shell_environment_policy: config.shell_environment_policy.clone(),
cwd,
disable_response_storage,
git_info: git_info_val,
};
let sess = Arc::new(Session {
session_id,
@@ -501,17 +507,11 @@ impl Session {
show_raw_agent_reasoning: config.show_raw_agent_reasoning,
});
// record the initial user instructions and environment context,
// regardless of whether we restored items.
let mut conversation_items = Vec::<ResponseItem>::with_capacity(2);
// record the initial user instructions (environment context is recorded per-turn)
let mut conversation_items = Vec::<ResponseItem>::with_capacity(1);
if let Some(user_instructions) = turn_context.user_instructions.as_deref() {
conversation_items.push(Prompt::format_user_instructions_message(user_instructions));
}
conversation_items.push(ResponseItem::from(EnvironmentContext::new(
turn_context.cwd.to_path_buf(),
turn_context.approval_policy,
turn_context.sandbox_policy.clone(),
)));
sess.record_conversation_items(&conversation_items).await;
// Dispatch the SessionConfiguredEvent first and then report any errors.
@@ -1051,6 +1051,13 @@ async fn submission_loop(
config.include_apply_patch_tool,
);
// Recompute git info if cwd changed
let new_git_info = if cwd.is_some() {
collect_git_info(&new_cwd).await
} else {
prev.git_info.clone()
};
let new_turn_context = TurnContext {
client,
tools_config,
@@ -1061,18 +1068,12 @@ async fn submission_loop(
shell_environment_policy: prev.shell_environment_policy.clone(),
cwd: new_cwd.clone(),
disable_response_storage: prev.disable_response_storage,
git_info: new_git_info,
};
// Install the new persistent context for subsequent tasks/turns.
turn_context = Arc::new(new_turn_context);
if cwd.is_some() || approval_policy.is_some() || sandbox_policy.is_some() {
sess.record_conversation_items(&[ResponseItem::from(EnvironmentContext::new(
new_cwd,
new_approval_policy,
new_sandbox_policy,
))])
.await;
}
// Environment context is recorded at the start of each turn.
}
Op::UserInput { items } => {
// attempt to inject input into current task
@@ -1117,6 +1118,9 @@ async fn submission_loop(
sess.session_id,
);
// Collect git info for the per-turn cwd
let per_turn_git_info = collect_git_info(&cwd).await;
let fresh_turn_context = TurnContext {
client,
tools_config: ToolsConfig::new(
@@ -1133,8 +1137,8 @@ async fn submission_loop(
shell_environment_policy: turn_context.shell_environment_policy.clone(),
cwd,
disable_response_storage: turn_context.disable_response_storage,
git_info: per_turn_git_info,
};
// TODO: record the new environment context in the conversation history
// no current task, spawn a new one with the perturn context
let task =
AgentTask::spawn(sess.clone(), Arc::new(fresh_turn_context), sub.id, items);
@@ -1302,8 +1306,17 @@ async fn run_task(
}
let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input);
sess.record_conversation_items(&[initial_input_for_turn.clone().into()])
.await;
// Record environment context first, then the user's initial input for this turn.
sess.record_conversation_items(&[
ResponseItem::from(EnvironmentContext::new(
turn_context.cwd.clone(),
turn_context.approval_policy,
turn_context.sandbox_policy.clone(),
turn_context.git_info.clone(),
)),
initial_input_for_turn.clone().into(),
])
.await;
let mut last_agent_message: Option<String> = None;
// Although from the perspective of codex.rs, TurnDiffTracker has the lifecycle of a Task which contains

View File

@@ -2,6 +2,8 @@ use serde::Deserialize;
use serde::Serialize;
use strum_macros::Display as DeriveDisplay;
use crate::git_info::GitInfo;
use crate::git_info::fmt_for_env_context;
use crate::models::ContentItem;
use crate::models::ResponseItem;
use crate::protocol::AskForApproval;
@@ -28,6 +30,7 @@ pub(crate) struct EnvironmentContext {
pub approval_policy: AskForApproval,
pub sandbox_mode: SandboxMode,
pub network_access: NetworkAccess,
pub git_info: Option<GitInfo>,
}
impl EnvironmentContext {
@@ -35,6 +38,7 @@ impl EnvironmentContext {
cwd: PathBuf,
approval_policy: AskForApproval,
sandbox_policy: SandboxPolicy,
git_info: Option<GitInfo>,
) -> Self {
Self {
cwd,
@@ -55,6 +59,7 @@ impl EnvironmentContext {
}
}
},
git_info,
}
}
}
@@ -69,6 +74,7 @@ impl Display for EnvironmentContext {
writeln!(f, "Approval policy: {}", self.approval_policy)?;
writeln!(f, "Sandbox mode: {}", self.sandbox_mode)?;
writeln!(f, "Network access: {}", self.network_access)?;
writeln!(f, "{}", fmt_for_env_context(self.git_info.as_ref()))?;
Ok(())
}
}

View File

@@ -9,7 +9,7 @@ use tokio::time::timeout;
/// Timeout for git commands to prevent freezing on large repositories
const GIT_COMMAND_TIMEOUT: TokioDuration = TokioDuration::from_secs(5);
#[derive(Serialize, Deserialize, Clone, Debug)]
#[derive(Serialize, Deserialize, Clone, Debug, Eq, PartialEq)]
pub struct GitInfo {
/// Current commit hash (SHA)
#[serde(skip_serializing_if = "Option::is_none")]
@@ -20,6 +20,9 @@ pub struct GitInfo {
/// Repository URL (if available from remote)
#[serde(skip_serializing_if = "Option::is_none")]
pub repository_url: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub diff_stat: Option<String>,
}
/// Collect git repository information from the given working directory using command-line git.
@@ -38,16 +41,18 @@ pub async fn collect_git_info(cwd: &Path) -> Option<GitInfo> {
}
// Run all git info collection commands in parallel
let (commit_result, branch_result, url_result) = tokio::join!(
let (commit_result, branch_result, url_result, diff_result) = tokio::join!(
run_git_command_with_timeout(&["rev-parse", "HEAD"], cwd),
run_git_command_with_timeout(&["rev-parse", "--abbrev-ref", "HEAD"], cwd),
run_git_command_with_timeout(&["remote", "get-url", "origin"], cwd)
run_git_command_with_timeout(&["remote", "get-url", "origin"], cwd),
run_git_command_with_timeout(&["status", "-unormal", "--porcelain"], cwd),
);
let mut git_info = GitInfo {
commit_hash: None,
branch: None,
repository_url: None,
diff_stat: None,
};
// Process commit hash
@@ -77,9 +82,41 @@ pub async fn collect_git_info(cwd: &Path) -> Option<GitInfo> {
git_info.repository_url = Some(url.trim().to_string());
}
// Process diff stat
if let Some(output) = diff_result
&& output.status.success()
&& let Ok(diff_stat) = String::from_utf8(output.stdout) {
git_info.diff_stat = Some(diff_stat.trim().to_string());
}
Some(git_info)
}
/// Format Git information for inclusion in the environment context block.
/// When `info` is `None`, it returns a simple "Current git info:\nnone" string.
pub fn fmt_for_env_context(info: Option<&GitInfo>) -> String {
let mut parts: Vec<String> = vec!["Current git info:".to_string()];
if let Some(info) = info {
if let Some(hash) = &info.commit_hash {
let short = &hash[..std::cmp::min(7, hash.len())];
parts.push(format!("commit {short}"));
}
if let Some(branch) = &info.branch {
parts.push(format!("branch {branch}"));
}
if let Some(url) = &info.repository_url {
parts.push(format!("remote {url}"));
}
if let Some(diff_stat) = &info.diff_stat {
parts.push("diff stat:".to_string());
parts.push(diff_stat.clone());
}
} else {
parts.push("none".to_string());
}
parts.join("\n")
}
/// Run a git command with a timeout to prevent blocking on large repositories
async fn run_git_command_with_timeout(args: &[&str], cwd: &Path) -> Option<std::process::Output> {
let result = timeout(
@@ -278,6 +315,7 @@ mod tests {
commit_hash: Some("abc123def456".to_string()),
branch: Some("main".to_string()),
repository_url: Some("https://github.com/example/repo.git".to_string()),
diff_stat: Some("1 file changed, 1 insertion(+), 1 deletion(-)".to_string()),
};
let json = serde_json::to_string(&git_info).expect("Should serialize GitInfo");
@@ -289,6 +327,10 @@ mod tests {
parsed["repository_url"],
"https://github.com/example/repo.git"
);
assert_eq!(
parsed["diff_stat"],
"1 file changed, 1 insertion(+), 1 deletion(-)"
);
}
#[test]
@@ -297,6 +339,7 @@ mod tests {
commit_hash: None,
branch: None,
repository_url: None,
diff_stat: None,
};
let json = serde_json::to_string(&git_info).expect("Should serialize GitInfo");
@@ -306,5 +349,6 @@ mod tests {
assert!(!parsed.as_object().unwrap().contains_key("commit_hash"));
assert!(!parsed.as_object().unwrap().contains_key("branch"));
assert!(!parsed.as_object().unwrap().contains_key("repository_url"));
assert!(!parsed.as_object().unwrap().contains_key("diff_stat"));
}
}

View File

@@ -19,6 +19,196 @@ use wiremock::ResponseTemplate;
use wiremock::matchers::method;
use wiremock::matchers::path;
/// Ensure previous environment_context messages remain stable, even if git status changes.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn environment_context_is_stable_across_git_changes() {
use pretty_assertions::assert_eq;
use std::process::Command;
// Start a mock server that will accept two streaming responses.
let server = MockServer::start().await;
let sse = sse_completed("resp");
let template = ResponseTemplate::new(200)
.insert_header("content-type", "text/event-stream")
.set_body_raw(sse, "text/event-stream");
Mock::given(method("POST"))
.and(path("/v1/responses"))
.respond_with(template)
.expect(2)
.mount(&server)
.await;
// Create a temp cwd and initialize a git repo with a known branch.
let cwd = TempDir::new().unwrap();
let repo = cwd.path();
let envs = vec![
("GIT_CONFIG_GLOBAL", "/dev/null"),
("GIT_CONFIG_NOSYSTEM", "1"),
];
Command::new("git")
.envs(envs.clone())
.arg("init")
.current_dir(repo)
.output()
.unwrap();
Command::new("git")
.envs(envs.clone())
.args(["config", "user.name", "Test User"])
.current_dir(repo)
.output()
.unwrap();
Command::new("git")
.envs(envs.clone())
.args(["config", "user.email", "test@example.com"])
.current_dir(repo)
.output()
.unwrap();
// Create a branch to stabilize branch name expectation.
Command::new("git")
.envs(envs.clone())
.args(["checkout", "-b", "envctx-test-branch"])
.current_dir(repo)
.output()
.unwrap();
std::fs::write(repo.join("a.txt"), "one").unwrap();
Command::new("git")
.envs(envs.clone())
.args(["add", "."])
.current_dir(repo)
.output()
.unwrap();
Command::new("git")
.envs(envs.clone())
.args(["commit", "-m", "first"])
.current_dir(repo)
.output()
.unwrap();
let commit1 = String::from_utf8(
Command::new("git")
.args(["rev-parse", "HEAD"])
.current_dir(repo)
.output()
.unwrap()
.stdout,
)
.unwrap();
let commit1_short = &commit1.trim()[..7];
// Configure Codex to use this cwd and our mock provider.
let model_provider = ModelProviderInfo {
base_url: Some(format!("{}/v1", server.uri())),
..built_in_model_providers()["openai"].clone()
};
let codex_home = TempDir::new().unwrap();
let mut config = load_default_config_for_test(&codex_home);
config.cwd = repo.to_path_buf();
config.model_provider = model_provider;
config.user_instructions = Some("be consistent and helpful".to_string());
let conversation_manager = ConversationManager::default();
let codex = conversation_manager
.new_conversation_with_auth(config, Some(CodexAuth::from_api_key("Test API Key")))
.await
.expect("create new conversation")
.conversation;
// First turn
codex
.submit(Op::UserInput {
items: vec![InputItem::Text {
text: "hello 1".into(),
}],
})
.await
.unwrap();
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
// Change the git state to a new commit between turns
std::fs::write(repo.join("b.txt"), "two").unwrap();
Command::new("git")
.envs(envs.clone())
.args(["add", "."])
.current_dir(repo)
.output()
.unwrap();
Command::new("git")
.envs(envs)
.args(["commit", "-m", "second"])
.current_dir(repo)
.output()
.unwrap();
let commit2 = String::from_utf8(
Command::new("git")
.args(["rev-parse", "HEAD"])
.current_dir(repo)
.output()
.unwrap()
.stdout,
)
.unwrap();
let commit2_short = &commit2.trim()[..7];
// Second turn
codex
.submit(Op::UserInput {
items: vec![InputItem::Text {
text: "hello 2".into(),
}],
})
.await
.unwrap();
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
// Verify two requests, and that the first env_context remains stable in the second request.
let requests = server.received_requests().await.unwrap();
assert_eq!(requests.len(), 2, "expected two POST requests");
let body1 = requests[0].body_json::<serde_json::Value>().unwrap();
let body2 = requests[1].body_json::<serde_json::Value>().unwrap();
let expected_env_text_1 = format!(
"<environment_context>\nCurrent working directory: {}\nApproval policy: on-request\nSandbox mode: read-only\nNetwork access: restricted\nGit info: commit {}, branch envctx-test-branch\n</environment_context>",
repo.to_string_lossy(),
commit1_short,
);
let expected_env_msg_1 = serde_json::json!({
"type": "message",
"id": serde_json::Value::Null,
"role": "user",
"content": [ { "type": "input_text", "text": expected_env_text_1 } ]
});
// body1 must include the env context as the second item (after user_instructions)
assert_eq!(body1["input"][1], expected_env_msg_1);
let expected_env_text_2 = format!(
"<environment_context>\nCurrent working directory: {}\nApproval policy: on-request\nSandbox mode: read-only\nNetwork access: restricted\nGit info: commit {}, branch envctx-test-branch\n</environment_context>",
repo.to_string_lossy(),
commit2_short,
);
let expected_env_msg_2 = serde_json::json!({
"type": "message",
"id": serde_json::Value::Null,
"role": "user",
"content": [ { "type": "input_text", "text": expected_env_text_2 } ]
});
let expected_user_message_2 = serde_json::json!({
"type": "message",
"id": serde_json::Value::Null,
"role": "user",
"content": [ { "type": "input_text", "text": "hello 2" } ]
});
// body2 should equal body1's full input concatenated with the new env ctx and user message
let expected_body2 = serde_json::json!(
[
body1["input"].as_array().unwrap().as_slice(),
[expected_env_msg_2, expected_user_message_2].as_slice(),
]
.concat()
);
assert_eq!(body2["input"], expected_body2);
}
/// Build minimal SSE stream with completed marker using the JSON fixture.
fn sse_completed(id: &str) -> String {
load_sse_fixture_with_id("tests/fixtures/completed_template.json", id)
@@ -86,7 +276,7 @@ async fn prefixes_context_and_instructions_once_and_consistently_across_requests
assert_eq!(requests.len(), 2, "expected two POST requests");
let expected_env_text = format!(
"<environment_context>\nCurrent working directory: {}\nApproval policy: on-request\nSandbox mode: read-only\nNetwork access: restricted\n</environment_context>",
"<environment_context>\nCurrent working directory: {}\nApproval policy: on-request\nSandbox mode: read-only\nNetwork access: restricted\nGit info: none\n</environment_context>",
cwd.path().to_string_lossy()
);
let expected_ui_text =
@@ -123,11 +313,17 @@ async fn prefixes_context_and_instructions_once_and_consistently_across_requests
"role": "user",
"content": [ { "type": "input_text", "text": "hello 2" } ]
});
let expected_env_msg_2 = serde_json::json!({
"type": "message",
"id": serde_json::Value::Null,
"role": "user",
"content": [ { "type": "input_text", "text": expected_env_text } ]
});
let body2 = requests[1].body_json::<serde_json::Value>().unwrap();
let expected_body2 = serde_json::json!(
[
body1["input"].as_array().unwrap().as_slice(),
[expected_user_message_2].as_slice(),
[expected_env_msg_2, expected_user_message_2].as_slice(),
]
.concat()
);
@@ -238,7 +434,7 @@ async fn overrides_turn_context_but_keeps_cached_prefix_and_key_constant() {
// After overriding the turn context, the environment context should be emitted again
// reflecting the new cwd, approval policy and sandbox settings.
let expected_env_text_2 = format!(
"<environment_context>\nCurrent working directory: {}\nApproval policy: never\nSandbox mode: workspace-write\nNetwork access: enabled\n</environment_context>",
"<environment_context>\nCurrent working directory: {}\nApproval policy: never\nSandbox mode: workspace-write\nNetwork access: enabled\nGit info: none\n</environment_context>",
new_cwd.path().to_string_lossy()
);
let expected_env_msg_2 = serde_json::json!({
@@ -351,10 +547,21 @@ async fn per_turn_overrides_keep_cached_prefix_and_key_constant() {
"role": "user",
"content": [ { "type": "input_text", "text": "hello 2" } ]
});
// After per-turn overrides, environment context is emitted again
let expected_env_text_2 = format!(
"<environment_context>\nCurrent working directory: {}\nApproval policy: never\nSandbox mode: workspace-write\nNetwork access: enabled\nGit info: none\n</environment_context>",
new_cwd.path().to_string_lossy()
);
let expected_env_msg_2 = serde_json::json!({
"type": "message",
"id": serde_json::Value::Null,
"role": "user",
"content": [ { "type": "input_text", "text": expected_env_text_2 } ]
});
let expected_body2 = serde_json::json!(
[
body1["input"].as_array().unwrap().as_slice(),
[expected_user_message_2].as_slice(),
[expected_env_msg_2, expected_user_message_2].as_slice(),
]
.concat()
);