Compare commits

...

4 Commits

Author SHA1 Message Date
pakrym-oai
e05f333e55 Python sdk 2025-09-30 14:35:22 -07:00
pakrym-oai
c89b0e1235 [SDK] Test that a tread can be continued with extra params (#4508) 2025-09-30 17:22:14 +00:00
jif-oai
f6a152848a chore: prompt update to enforce good usage of apply_patch (#3846)
Update prompt to prevent codex to use Python script or fancy commands to
edit files.

## Testing:
3 scenarios have been considered:
1. Rename codex to meca_code. Proceed to the whole refactor file by
file. Don't ask for approval at each step
2. Add a description to every single function you can find in the repo
3. Rewrite codex.rs in a more idiomatic way. Make sure to touch ONLY
this file and that clippy does not complain at the end

Before this update, 22% (estimation as it's sometimes hard to find all
the creative way the model find to edit files) of the file editions
where made using something else than a raw `apply_patch`

After this update, not a single edition without `apply_patch` was found

[EDIT]
I managed to have a few `["bash", "-lc", "apply_path"]` when reaching <
10% context left
2025-09-30 10:18:59 -07:00
dedrisian-oai
3592ecb23c Named args for custom prompts (#4474)
Here's the logic:

1. If text is empty and selector is open:
- Enter on a prompt without args should autosubmit the prompt
- Enter on a prompt with numeric args should add `/prompts:name ` to the
text input
- Enter on a prompt with named args should add `/prompts:name ARG1=""
ARG2=""` to the text input
2. If text is not empty but no args are passed:
- For prompts with numeric args -> we allow it to submit (params are
optional)
- For prompts with named args -> we throw an error (all params should
have values)

<img width="454" height="246" alt="Screenshot 2025-09-23 at 2 23 21 PM"
src="https://github.com/user-attachments/assets/fd180a1b-7d17-42ec-b231-8da48828b811"
/>
2025-09-30 10:06:41 -07:00
24 changed files with 1813 additions and 47 deletions

View File

@@ -5,6 +5,7 @@ You are Codex, based on GPT-5. You are running as a coding agent in the Codex CL
- The arguments to `shell` will be passed to execvp(). Most terminal commands should be prefixed with ["bash", "-lc"].
- Always set the `workdir` param when using the shell function. Do not use `cd` unless absolutely necessary.
- When searching for text or files, prefer using `rg` or `rg --files` respectively because `rg` is much faster than alternatives like `grep`. (If the `rg` command is not found, then use alternatives.)
- When editing or creating files, you MUST use apply_patch as a standalone tool without going through ["bash", "-lc"], `Python`, `cat`, `sed`, ... Example: functions.shell({"command":["apply_patch","*** Begin Patch\nAdd File: hello.txt\n+Hello, world!\n*** End Patch"]}).
## Editing constraints

View File

@@ -32,6 +32,8 @@ use crate::bottom_pane::paste_burst::FlushResult;
use crate::bottom_pane::prompt_args::expand_custom_prompt;
use crate::bottom_pane::prompt_args::expand_if_numeric_with_positional_args;
use crate::bottom_pane::prompt_args::parse_slash_name;
use crate::bottom_pane::prompt_args::prompt_argument_names;
use crate::bottom_pane::prompt_args::prompt_command_with_arg_placeholders;
use crate::bottom_pane::prompt_args::prompt_has_numeric_placeholders;
use crate::slash_command::SlashCommand;
use crate::style::user_message_style;
@@ -45,6 +47,7 @@ use crate::bottom_pane::textarea::TextArea;
use crate::bottom_pane::textarea::TextAreaState;
use crate::clipboard_paste::normalize_pasted_path;
use crate::clipboard_paste::pasted_image_format;
use crate::history_cell;
use crate::ui_consts::LIVE_PREFIX_COLS;
use codex_file_search::FileMatch;
use std::cell::RefCell;
@@ -72,6 +75,16 @@ struct AttachedImage {
path: PathBuf,
}
enum PromptSelectionMode {
Completion,
Submit,
}
enum PromptSelectionAction {
Insert { text: String, cursor: Option<usize> },
Submit { text: String },
}
pub(crate) struct ChatComposer {
textarea: TextArea,
textarea_state: RefCell<TextAreaState>,
@@ -449,17 +462,17 @@ impl ChatComposer {
}
CommandItem::UserPrompt(idx) => {
if let Some(prompt) = popup.prompt(idx) {
let name = prompt.name.clone();
let starts_with_cmd = first_line
.trim_start()
.starts_with(format!("/{PROMPTS_CMD_PREFIX}:{name}").as_str());
if !starts_with_cmd {
self.textarea.set_text(
format!("/{PROMPTS_CMD_PREFIX}:{name} ").as_str(),
);
}
if !self.textarea.text().is_empty() {
cursor_target = Some(self.textarea.text().len());
match prompt_selection_action(
prompt,
first_line,
PromptSelectionMode::Completion,
) {
PromptSelectionAction::Insert { text, cursor } => {
let target = cursor.unwrap_or(text.len());
self.textarea.set_text(&text);
cursor_target = Some(target);
}
PromptSelectionAction::Submit { .. } => {}
}
}
}
@@ -497,28 +510,21 @@ impl ChatComposer {
}
CommandItem::UserPrompt(idx) => {
if let Some(prompt) = popup.prompt(idx) {
let has_numeric = prompt_has_numeric_placeholders(&prompt.content);
if !has_numeric {
// No placeholders at all: auto-submit the literal content
self.textarea.set_text("");
return (InputResult::Submitted(prompt.content.clone()), true);
}
// Numeric placeholders present.
// If the user already typed positional args on the first line,
// expand immediately and submit; otherwise insert "/name " so
// they can type args.
let first_line = self.textarea.text().lines().next().unwrap_or("");
if let Some(expanded) =
expand_if_numeric_with_positional_args(prompt, first_line)
{
self.textarea.set_text("");
return (InputResult::Submitted(expanded), true);
} else {
let name = prompt.name.clone();
let text = format!("/{PROMPTS_CMD_PREFIX}:{name} ");
self.textarea.set_text(&text);
self.textarea.set_cursor(self.textarea.text().len());
match prompt_selection_action(
prompt,
first_line,
PromptSelectionMode::Submit,
) {
PromptSelectionAction::Submit { text } => {
self.textarea.set_text("");
return (InputResult::Submitted(text), true);
}
PromptSelectionAction::Insert { text, cursor } => {
let target = cursor.unwrap_or(text.len());
self.textarea.set_text(&text);
self.textarea.set_cursor(target);
return (InputResult::None, true);
}
}
}
return (InputResult::None, true);
@@ -932,6 +938,7 @@ impl ChatComposer {
return (InputResult::None, true);
}
let mut text = self.textarea.text().to_string();
let original_input = text.clone();
self.textarea.set_text("");
// Replace all pending pastes in the text
@@ -945,13 +952,20 @@ impl ChatComposer {
// If there is neither text nor attachments, suppress submission entirely.
let has_attachments = !self.attached_images.is_empty();
text = text.trim().to_string();
if let Some(expanded) =
expand_custom_prompt(&text, &self.custom_prompts).unwrap_or_default()
{
let expanded_prompt = match expand_custom_prompt(&text, &self.custom_prompts) {
Ok(expanded) => expanded,
Err(err) => {
self.app_event_tx.send(AppEvent::InsertHistoryCell(Box::new(
history_cell::new_error_event(err.user_message()),
)));
self.textarea.set_text(&original_input);
self.textarea.set_cursor(original_input.len());
return (InputResult::None, true);
}
};
if let Some(expanded) = expanded_prompt {
text = expanded;
}
if text.is_empty() && !has_attachments {
return (InputResult::None, true);
}
@@ -1513,6 +1527,54 @@ impl WidgetRef for ChatComposer {
}
}
fn prompt_selection_action(
prompt: &CustomPrompt,
first_line: &str,
mode: PromptSelectionMode,
) -> PromptSelectionAction {
let named_args = prompt_argument_names(&prompt.content);
let has_numeric = prompt_has_numeric_placeholders(&prompt.content);
match mode {
PromptSelectionMode::Completion => {
if !named_args.is_empty() {
let (text, cursor) =
prompt_command_with_arg_placeholders(&prompt.name, &named_args);
return PromptSelectionAction::Insert {
text,
cursor: Some(cursor),
};
}
if has_numeric {
let text = format!("/{PROMPTS_CMD_PREFIX}:{} ", prompt.name);
return PromptSelectionAction::Insert { text, cursor: None };
}
let text = format!("/{PROMPTS_CMD_PREFIX}:{}", prompt.name);
PromptSelectionAction::Insert { text, cursor: None }
}
PromptSelectionMode::Submit => {
if !named_args.is_empty() {
let (text, cursor) =
prompt_command_with_arg_placeholders(&prompt.name, &named_args);
return PromptSelectionAction::Insert {
text,
cursor: Some(cursor),
};
}
if has_numeric {
if let Some(expanded) = expand_if_numeric_with_positional_args(prompt, first_line) {
return PromptSelectionAction::Submit { text: expanded };
}
let text = format!("/{PROMPTS_CMD_PREFIX}:{} ", prompt.name);
return PromptSelectionAction::Insert { text, cursor: None };
}
PromptSelectionAction::Submit {
text: prompt.content.clone(),
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
@@ -1528,7 +1590,6 @@ mod tests {
use crate::bottom_pane::InputResult;
use crate::bottom_pane::chat_composer::AttachedImage;
use crate::bottom_pane::chat_composer::LARGE_PASTE_CHAR_THRESHOLD;
use crate::bottom_pane::footer::footer_height;
use crate::bottom_pane::prompt_args::extract_positional_args_for_prompt_line;
use crate::bottom_pane::textarea::TextArea;
use tokio::sync::mpsc::unbounded_channel;
@@ -2666,6 +2727,174 @@ mod tests {
assert!(composer.textarea.is_empty());
}
#[test]
fn custom_prompt_submission_expands_arguments() {
let (tx, _rx) = unbounded_channel::<AppEvent>();
let sender = AppEventSender::new(tx);
let mut composer = ChatComposer::new(
true,
sender,
false,
"Ask Codex to do anything".to_string(),
false,
);
composer.set_custom_prompts(vec![CustomPrompt {
name: "my-prompt".to_string(),
path: "/tmp/my-prompt.md".to_string().into(),
content: "Review $USER changes on $BRANCH".to_string(),
description: None,
argument_hint: None,
}]);
composer
.textarea
.set_text("/prompts:my-prompt USER=Alice BRANCH=main");
let (result, _needs_redraw) =
composer.handle_key_event(KeyEvent::new(KeyCode::Enter, KeyModifiers::NONE));
assert_eq!(
InputResult::Submitted("Review Alice changes on main".to_string()),
result
);
assert!(composer.textarea.is_empty());
}
#[test]
fn custom_prompt_submission_accepts_quoted_values() {
let (tx, _rx) = unbounded_channel::<AppEvent>();
let sender = AppEventSender::new(tx);
let mut composer = ChatComposer::new(
true,
sender,
false,
"Ask Codex to do anything".to_string(),
false,
);
composer.set_custom_prompts(vec![CustomPrompt {
name: "my-prompt".to_string(),
path: "/tmp/my-prompt.md".to_string().into(),
content: "Pair $USER with $BRANCH".to_string(),
description: None,
argument_hint: None,
}]);
composer
.textarea
.set_text("/prompts:my-prompt USER=\"Alice Smith\" BRANCH=dev-main");
let (result, _needs_redraw) =
composer.handle_key_event(KeyEvent::new(KeyCode::Enter, KeyModifiers::NONE));
assert_eq!(
InputResult::Submitted("Pair Alice Smith with dev-main".to_string()),
result
);
assert!(composer.textarea.is_empty());
}
#[test]
fn custom_prompt_invalid_args_reports_error() {
let (tx, mut rx) = unbounded_channel::<AppEvent>();
let sender = AppEventSender::new(tx);
let mut composer = ChatComposer::new(
true,
sender,
false,
"Ask Codex to do anything".to_string(),
false,
);
composer.set_custom_prompts(vec![CustomPrompt {
name: "my-prompt".to_string(),
path: "/tmp/my-prompt.md".to_string().into(),
content: "Review $USER changes".to_string(),
description: None,
argument_hint: None,
}]);
composer
.textarea
.set_text("/prompts:my-prompt USER=Alice stray");
let (result, _needs_redraw) =
composer.handle_key_event(KeyEvent::new(KeyCode::Enter, KeyModifiers::NONE));
assert_eq!(InputResult::None, result);
assert_eq!(
"/prompts:my-prompt USER=Alice stray",
composer.textarea.text()
);
let mut found_error = false;
while let Ok(event) = rx.try_recv() {
if let AppEvent::InsertHistoryCell(cell) = event {
let message = cell
.display_lines(80)
.into_iter()
.map(|line| line.to_string())
.collect::<Vec<_>>()
.join("\n");
assert!(message.contains("expected key=value"));
found_error = true;
break;
}
}
assert!(found_error, "expected error history cell to be sent");
}
#[test]
fn custom_prompt_missing_required_args_reports_error() {
let (tx, mut rx) = unbounded_channel::<AppEvent>();
let sender = AppEventSender::new(tx);
let mut composer = ChatComposer::new(
true,
sender,
false,
"Ask Codex to do anything".to_string(),
false,
);
composer.set_custom_prompts(vec![CustomPrompt {
name: "my-prompt".to_string(),
path: "/tmp/my-prompt.md".to_string().into(),
content: "Review $USER changes on $BRANCH".to_string(),
description: None,
argument_hint: None,
}]);
// Provide only one of the required args
composer.textarea.set_text("/prompts:my-prompt USER=Alice");
let (result, _needs_redraw) =
composer.handle_key_event(KeyEvent::new(KeyCode::Enter, KeyModifiers::NONE));
assert_eq!(InputResult::None, result);
assert_eq!("/prompts:my-prompt USER=Alice", composer.textarea.text());
let mut found_error = false;
while let Ok(event) = rx.try_recv() {
if let AppEvent::InsertHistoryCell(cell) = event {
let message = cell
.display_lines(80)
.into_iter()
.map(|line| line.to_string())
.collect::<Vec<_>>()
.join("\n");
assert!(message.to_lowercase().contains("missing required args"));
assert!(message.contains("BRANCH"));
found_error = true;
break;
}
}
assert!(
found_error,
"expected missing args error history cell to be sent"
);
}
#[test]
fn selecting_custom_prompt_with_args_expands_placeholders() {
// Support $1..$9 and $ARGUMENTS in prompt content.
@@ -2704,6 +2933,37 @@ mod tests {
assert_eq!(InputResult::Submitted(expected), result);
}
#[test]
fn numeric_prompt_positional_args_does_not_error() {
// Ensure that a prompt with only numeric placeholders does not trigger
// key=value parsing errors when given positional arguments.
let (tx, _rx) = unbounded_channel::<AppEvent>();
let sender = AppEventSender::new(tx);
let mut composer = ChatComposer::new(
true,
sender,
false,
"Ask Codex to do anything".to_string(),
false,
);
composer.set_custom_prompts(vec![CustomPrompt {
name: "elegant".to_string(),
path: "/tmp/elegant.md".to_string().into(),
content: "Echo: $ARGUMENTS".to_string(),
description: None,
argument_hint: None,
}]);
// Type positional args; should submit with numeric expansion, no errors.
composer.textarea.set_text("/prompts:elegant hi");
let (result, _needs_redraw) =
composer.handle_key_event(KeyEvent::new(KeyCode::Enter, KeyModifiers::NONE));
assert_eq!(InputResult::Submitted("Echo: hi".to_string()), result);
assert!(composer.textarea.is_empty());
}
#[test]
fn selecting_custom_prompt_with_no_args_inserts_template() {
let prompt_text = "X:$1 Y:$2 All:[$ARGUMENTS]";

View File

@@ -1,6 +1,60 @@
use codex_protocol::custom_prompts::CustomPrompt;
use codex_protocol::custom_prompts::PROMPTS_CMD_PREFIX;
use lazy_static::lazy_static;
use regex_lite::Regex;
use shlex::Shlex;
use std::collections::HashMap;
use std::collections::HashSet;
lazy_static! {
static ref PROMPT_ARG_REGEX: Regex =
Regex::new(r"\$[A-Z][A-Z0-9_]*").unwrap_or_else(|_| std::process::abort());
}
#[derive(Debug)]
pub enum PromptArgsError {
MissingAssignment { token: String },
MissingKey { token: String },
}
impl PromptArgsError {
fn describe(&self, command: &str) -> String {
match self {
PromptArgsError::MissingAssignment { token } => format!(
"Could not parse {command}: expected key=value but found '{token}'. Wrap values in double quotes if they contain spaces."
),
PromptArgsError::MissingKey { token } => {
format!("Could not parse {command}: expected a name before '=' in '{token}'.")
}
}
}
}
#[derive(Debug)]
pub enum PromptExpansionError {
Args {
command: String,
error: PromptArgsError,
},
MissingArgs {
command: String,
missing: Vec<String>,
},
}
impl PromptExpansionError {
pub fn user_message(&self) -> String {
match self {
PromptExpansionError::Args { command, error } => error.describe(command),
PromptExpansionError::MissingArgs { command, missing } => {
let list = missing.join(", ");
format!(
"Missing required args for {command}: {list}. Provide as key=value (quote values with spaces)."
)
}
}
}
}
/// Parse a first-line slash command of the form `/name <rest>`.
/// Returns `(name, rest_after_name)` if the line begins with `/` and contains
@@ -27,6 +81,54 @@ pub fn parse_positional_args(rest: &str) -> Vec<String> {
Shlex::new(rest).collect()
}
/// Extracts the unique placeholder variable names from a prompt template.
///
/// A placeholder is any token that matches the pattern `$[A-Z][A-Z0-9_]*`
/// (for example `$USER`). The function returns the variable names without
/// the leading `$`, de-duplicated and in the order of first appearance.
pub fn prompt_argument_names(content: &str) -> Vec<String> {
let mut seen = HashSet::new();
let mut names = Vec::new();
for m in PROMPT_ARG_REGEX.find_iter(content) {
if m.start() > 0 && content.as_bytes()[m.start() - 1] == b'$' {
continue;
}
let name = &content[m.start() + 1..m.end()];
// Exclude special positional aggregate token from named args.
if name == "ARGUMENTS" {
continue;
}
let name = name.to_string();
if seen.insert(name.clone()) {
names.push(name);
}
}
names
}
/// Parses the `key=value` pairs that follow a custom prompt name.
///
/// The input is split using shlex rules, so quoted values are supported
/// (for example `USER="Alice Smith"`). The function returns a map of parsed
/// arguments, or an error if a token is missing `=` or if the key is empty.
pub fn parse_prompt_inputs(rest: &str) -> Result<HashMap<String, String>, PromptArgsError> {
let mut map = HashMap::new();
if rest.trim().is_empty() {
return Ok(map);
}
for token in Shlex::new(rest) {
let Some((key, value)) = token.split_once('=') else {
return Err(PromptArgsError::MissingAssignment { token });
};
if key.is_empty() {
return Err(PromptArgsError::MissingKey { token });
}
map.insert(key.to_string(), value.to_string());
}
Ok(map)
}
/// Expands a message of the form `/prompts:name [value] [value] …` using a matching saved prompt.
///
/// If the text does not start with `/prompts:`, or if no prompt named `name` exists,
@@ -35,7 +137,7 @@ pub fn parse_positional_args(rest: &str) -> Vec<String> {
pub fn expand_custom_prompt(
text: &str,
custom_prompts: &[CustomPrompt],
) -> Result<Option<String>, ()> {
) -> Result<Option<String>, PromptExpansionError> {
let Some((name, rest)) = parse_slash_name(text) else {
return Ok(None);
};
@@ -49,14 +151,45 @@ pub fn expand_custom_prompt(
Some(prompt) => prompt,
None => return Ok(None),
};
// Only support numeric placeholders ($1..$9) and $ARGUMENTS.
if prompt_has_numeric_placeholders(&prompt.content) {
let pos_args: Vec<String> = Shlex::new(rest).collect();
let expanded = expand_numeric_placeholders(&prompt.content, &pos_args);
return Ok(Some(expanded));
// If there are named placeholders, expect key=value inputs.
let required = prompt_argument_names(&prompt.content);
if !required.is_empty() {
let inputs = parse_prompt_inputs(rest).map_err(|error| PromptExpansionError::Args {
command: format!("/{name}"),
error,
})?;
let missing: Vec<String> = required
.into_iter()
.filter(|k| !inputs.contains_key(k))
.collect();
if !missing.is_empty() {
return Err(PromptExpansionError::MissingArgs {
command: format!("/{name}"),
missing,
});
}
let content = &prompt.content;
let replaced = PROMPT_ARG_REGEX.replace_all(content, |caps: &regex_lite::Captures<'_>| {
if let Some(matched) = caps.get(0)
&& matched.start() > 0
&& content.as_bytes()[matched.start() - 1] == b'$'
{
return matched.as_str().to_string();
}
let whole = &caps[0];
let key = &whole[1..];
inputs
.get(key)
.cloned()
.unwrap_or_else(|| whole.to_string())
});
return Ok(Some(replaced.into_owned()));
}
// No recognized placeholders: return the literal content.
Ok(Some(prompt.content.clone()))
// Otherwise, treat it as numeric/positional placeholder prompt (or none).
let pos_args: Vec<String> = Shlex::new(rest).collect();
let expanded = expand_numeric_placeholders(&prompt.content, &pos_args);
Ok(Some(expanded))
}
/// Detect whether `content` contains numeric placeholders ($1..$9) or `$ARGUMENTS`.
@@ -107,6 +240,9 @@ pub fn expand_if_numeric_with_positional_args(
prompt: &CustomPrompt,
first_line: &str,
) -> Option<String> {
if !prompt_argument_names(&prompt.content).is_empty() {
return None;
}
if !prompt_has_numeric_placeholders(&prompt.content) {
return None;
}
@@ -159,3 +295,112 @@ pub fn expand_numeric_placeholders(content: &str, args: &[String]) -> String {
out.push_str(&content[i..]);
out
}
/// Constructs a command text for a custom prompt with arguments.
/// Returns the text and the cursor position (inside the first double quote).
pub fn prompt_command_with_arg_placeholders(name: &str, args: &[String]) -> (String, usize) {
let mut text = format!("/{PROMPTS_CMD_PREFIX}:{name}");
let mut cursor: usize = text.len();
for (i, arg) in args.iter().enumerate() {
text.push_str(format!(" {arg}=\"\"").as_str());
if i == 0 {
cursor = text.len() - 1; // inside first ""
}
}
(text, cursor)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn expand_arguments_basic() {
let prompts = vec![CustomPrompt {
name: "my-prompt".to_string(),
path: "/tmp/my-prompt.md".to_string().into(),
content: "Review $USER changes on $BRANCH".to_string(),
description: None,
argument_hint: None,
}];
let out =
expand_custom_prompt("/prompts:my-prompt USER=Alice BRANCH=main", &prompts).unwrap();
assert_eq!(out, Some("Review Alice changes on main".to_string()));
}
#[test]
fn quoted_values_ok() {
let prompts = vec![CustomPrompt {
name: "my-prompt".to_string(),
path: "/tmp/my-prompt.md".to_string().into(),
content: "Pair $USER with $BRANCH".to_string(),
description: None,
argument_hint: None,
}];
let out = expand_custom_prompt(
"/prompts:my-prompt USER=\"Alice Smith\" BRANCH=dev-main",
&prompts,
)
.unwrap();
assert_eq!(out, Some("Pair Alice Smith with dev-main".to_string()));
}
#[test]
fn invalid_arg_token_reports_error() {
let prompts = vec![CustomPrompt {
name: "my-prompt".to_string(),
path: "/tmp/my-prompt.md".to_string().into(),
content: "Review $USER changes".to_string(),
description: None,
argument_hint: None,
}];
let err = expand_custom_prompt("/prompts:my-prompt USER=Alice stray", &prompts)
.unwrap_err()
.user_message();
assert!(err.contains("expected key=value"));
}
#[test]
fn missing_required_args_reports_error() {
let prompts = vec![CustomPrompt {
name: "my-prompt".to_string(),
path: "/tmp/my-prompt.md".to_string().into(),
content: "Review $USER changes on $BRANCH".to_string(),
description: None,
argument_hint: None,
}];
let err = expand_custom_prompt("/prompts:my-prompt USER=Alice", &prompts)
.unwrap_err()
.user_message();
assert!(err.to_lowercase().contains("missing required args"));
assert!(err.contains("BRANCH"));
}
#[test]
fn escaped_placeholder_is_ignored() {
assert_eq!(
prompt_argument_names("literal $$USER"),
Vec::<String>::new()
);
assert_eq!(
prompt_argument_names("literal $$USER and $REAL"),
vec!["REAL".to_string()]
);
}
#[test]
fn escaped_placeholder_remains_literal() {
let prompts = vec![CustomPrompt {
name: "my-prompt".to_string(),
path: "/tmp/my-prompt.md".to_string().into(),
content: "literal $$USER".to_string(),
description: None,
argument_hint: None,
}];
let out = expand_custom_prompt("/prompts:my-prompt", &prompts).unwrap();
assert_eq!(out, Some("literal $$USER".to_string()));
}
}

13
sdk/python/.gitignore vendored Normal file
View File

@@ -0,0 +1,13 @@
__pycache__/
*.py[cod]
*.so
*.dylib
*.egg-info/
build/
dist/
.venv/
.pytest_cache/
.ruff_cache/
.mypy_cache/
.coverage
htmlcov/

43
sdk/python/README.md Normal file
View File

@@ -0,0 +1,43 @@
# openai-codex-sdk
A modern, minimalistic Python library project scaffold.
## Features
- PEP 621 `pyproject.toml` with `hatchling` build backend
- `src/` layout for package code
- Preconfigured tooling: Ruff, MyPy, and Pytest
- Ready for publishing to PyPI and local development
## Getting Started
```bash
python -m venv .venv
source .venv/bin/activate
pip install -U pip
pip install -e .[dev]
```
## Running Tests
```bash
pytest
```
## Linting & Formatting
```bash
ruff check src tests
ruff format src tests
mypy src
```
## Releasing
Update the version in `src/openai_codex_sdk/__about__.py` and `pyproject.toml`, then build and publish:
```bash
rm -rf dist
python -m build
python -m twine upload dist/*
```

64
sdk/python/pyproject.toml Normal file
View File

@@ -0,0 +1,64 @@
[build-system]
requires = ["hatchling>=1.25"]
build-backend = "hatchling.build"
[project]
name = "openai-codex-sdk"
version = "0.1.0"
description = "Modern minimalistic Python SDK scaffold."
readme = "README.md"
requires-python = ">=3.11"
license = {text = "MIT"}
authors = [{name = "Codex Team"}]
keywords = ["codex", "sdk", "template"]
classifiers = [
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
]
dependencies = []
[project.urls]
Homepage = "https://example.com/openai-codex-sdk"
Repository = "https://example.com/openai-codex-sdk.git"
[project.optional-dependencies]
dev = [
"mypy>=1.12",
"pytest>=8.3",
"pytest-cov>=5.0",
"pytest-asyncio>=0.24",
"ruff>=0.7",
"pyright>=1.1.379",
]
[tool.hatch.metadata]
allow-direct-references = true
[tool.hatch.build.targets.wheel]
packages = ["src/openai_codex_sdk"]
[tool.ruff]
line-length = 88
target-version = "py311"
[tool.ruff.lint]
select = ["E", "F", "I", "UP", "B", "A"]
[tool.ruff.format]
docstring-code-format = true
indent-style = "space"
[tool.pytest.ini_options]
minversion = "8.0"
addopts = "-ra --strict-markers"
testpaths = ["tests"]
[tool.mypy]
python_version = "3.11"
packages = ["openai_codex_sdk"]
strict = true
warn_unused_configs = true

View File

@@ -0,0 +1,7 @@
{
"$schema": "https://json.schemastore.org/pyrightconfig.json",
"include": ["src", "tests"],
"typeCheckingMode": "strict",
"venvPath": ".",
"venv": ".venv"
}

View File

@@ -0,0 +1,5 @@
"""Package metadata."""
__all__ = ["__version__"]
__version__ = "0.1.0"

View File

@@ -0,0 +1,66 @@
"""openai-codex-sdk public API."""
from .__about__ import __version__
from .codex import Codex
from .codex_options import CodexOptions
from .events import (
ItemCompletedEvent,
ItemStartedEvent,
ItemUpdatedEvent,
ThreadError,
ThreadErrorEvent,
ThreadEvent,
ThreadStartedEvent,
TurnCompletedEvent,
TurnFailedEvent,
TurnStartedEvent,
Usage,
)
from .items import (
AssistantMessageItem,
CommandExecutionItem,
ErrorItem,
FileChangeItem,
McpToolCallItem,
ReasoningItem,
ThreadItem,
TodoItem,
TodoListItem,
WebSearchItem,
)
from .thread import Input, RunResult, RunStreamedResult, Thread
from .turn_options import ApprovalMode, SandboxMode, TurnOptions
__all__ = [
"__version__",
"Codex",
"CodexOptions",
"Thread",
"RunResult",
"RunStreamedResult",
"Input",
"TurnOptions",
"ApprovalMode",
"SandboxMode",
"ThreadEvent",
"ThreadStartedEvent",
"TurnStartedEvent",
"TurnCompletedEvent",
"TurnFailedEvent",
"ItemStartedEvent",
"ItemUpdatedEvent",
"ItemCompletedEvent",
"ThreadError",
"ThreadErrorEvent",
"Usage",
"ThreadItem",
"AssistantMessageItem",
"ReasoningItem",
"CommandExecutionItem",
"FileChangeItem",
"McpToolCallItem",
"WebSearchItem",
"TodoListItem",
"TodoItem",
"ErrorItem",
]

View File

@@ -0,0 +1,20 @@
from __future__ import annotations
from .codex_options import CodexOptions
from .exec import CodexExec
from .thread import Thread
class Codex:
def __init__(self, options: CodexOptions) -> None:
if not options.executable_path:
raise ValueError("executable_path is required")
self._exec = CodexExec(options.executable_path)
self._options = options
def start_thread(self) -> Thread:
return Thread(self._exec, self._options)
def resume_thread(self, thread_id: str) -> Thread:
return Thread(self._exec, self._options, thread_id)

View File

@@ -0,0 +1,12 @@
from __future__ import annotations
from dataclasses import dataclass
@dataclass(slots=True)
class CodexOptions:
"""Configuration for creating a ``Codex`` client."""
executable_path: str
base_url: str | None = None
api_key: str | None = None

View File

@@ -0,0 +1,66 @@
from __future__ import annotations
from typing import Literal, TypedDict
from .items import ThreadItem
class ThreadStartedEvent(TypedDict):
type: Literal["thread.started"]
thread_id: str
class TurnStartedEvent(TypedDict):
type: Literal["turn.started"]
class Usage(TypedDict):
input_tokens: int
cached_input_tokens: int
output_tokens: int
class TurnCompletedEvent(TypedDict):
type: Literal["turn.completed"]
usage: Usage
class ThreadError(TypedDict):
message: str
class TurnFailedEvent(TypedDict):
type: Literal["turn.failed"]
error: ThreadError
class ItemStartedEvent(TypedDict):
type: Literal["item.started"]
item: ThreadItem
class ItemUpdatedEvent(TypedDict):
type: Literal["item.updated"]
item: ThreadItem
class ItemCompletedEvent(TypedDict):
type: Literal["item.completed"]
item: ThreadItem
class ThreadErrorEvent(TypedDict):
type: Literal["error"]
message: str
ThreadEvent = (
ThreadStartedEvent
| TurnStartedEvent
| TurnCompletedEvent
| TurnFailedEvent
| ItemStartedEvent
| ItemUpdatedEvent
| ItemCompletedEvent
| ThreadErrorEvent
)

View File

@@ -0,0 +1,80 @@
from __future__ import annotations
import asyncio
import os
from dataclasses import dataclass
from typing import AsyncGenerator
from .turn_options import SandboxMode
@dataclass(slots=True)
class CodexExecArgs:
input: str
base_url: str | None = None
api_key: str | None = None
thread_id: str | None = None
model: str | None = None
sandbox_mode: SandboxMode | None = None
class CodexExec:
def __init__(self, executable_path: str) -> None:
self._executable_path = executable_path
async def run(self, args: CodexExecArgs) -> AsyncGenerator[str, None]:
command_args: list[str] = ["exec", "--experimental-json"]
if args.model:
command_args.extend(["--model", args.model])
if args.sandbox_mode:
command_args.extend(["--sandbox", args.sandbox_mode])
if args.thread_id:
command_args.extend(["resume", args.thread_id, args.input])
else:
command_args.append(args.input)
env = dict(os.environ)
if args.base_url:
env["OPENAI_BASE_URL"] = args.base_url
if args.api_key:
env["OPENAI_API_KEY"] = args.api_key
try:
process = await asyncio.create_subprocess_exec(
self._executable_path,
*command_args,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
env=env,
)
except Exception as exc: # pragma: no cover - passthrough for caller
raise RuntimeError("Failed to start codex executable") from exc
if not process.stdout:
process.kill()
await process.wait()
raise RuntimeError("Child process has no stdout")
try:
while True:
line = await process.stdout.readline()
if not line:
break
yield line.decode("utf-8").rstrip("\n")
return_code = await process.wait()
if return_code != 0:
stderr_output = b""
if process.stderr:
stderr_output = await process.stderr.read()
message = stderr_output.decode("utf-8", errors="ignore").strip()
raise RuntimeError(
f"Codex Exec exited with code {return_code}" + (f": {message}" if message else "")
)
finally:
if process.returncode is None:
process.kill()
await process.wait()

View File

@@ -0,0 +1,85 @@
from __future__ import annotations
from typing import Literal, NotRequired, TypedDict
class CommandExecutionItem(TypedDict):
id: str
item_type: Literal["command_execution"]
command: str
aggregated_output: str
status: Literal["in_progress", "completed", "failed"]
exit_code: NotRequired[int]
class FileUpdateChange(TypedDict):
path: str
kind: Literal["add", "delete", "update"]
class FileChangeItem(TypedDict):
id: str
item_type: Literal["file_change"]
changes: list[FileUpdateChange]
status: Literal["completed", "failed"]
class McpToolCallItem(TypedDict):
id: str
item_type: Literal["mcp_tool_call"]
server: str
tool: str
status: Literal["in_progress", "completed", "failed"]
class AssistantMessageItem(TypedDict):
id: str
item_type: Literal["assistant_message"]
text: str
class ReasoningItem(TypedDict):
id: str
item_type: Literal["reasoning"]
text: str
class WebSearchItem(TypedDict):
id: str
item_type: Literal["web_search"]
query: str
class ErrorItem(TypedDict):
id: str
item_type: Literal["error"]
message: str
class TodoItem(TypedDict):
text: str
completed: bool
class TodoListItem(TypedDict):
id: str
item_type: Literal["todo_list"]
items: list[TodoItem]
class SessionItem(TypedDict):
id: str
item_type: Literal["session"]
session_id: str
ThreadItem = (
AssistantMessageItem
| ReasoningItem
| CommandExecutionItem
| FileChangeItem
| McpToolCallItem
| WebSearchItem
| TodoListItem
| ErrorItem
)

View File

@@ -0,0 +1,70 @@
from __future__ import annotations
import json
from dataclasses import dataclass
from typing import AsyncGenerator, cast
from .codex_options import CodexOptions
from .exec import CodexExec, CodexExecArgs
from .events import ItemCompletedEvent, ThreadEvent, ThreadStartedEvent
from .items import AssistantMessageItem, ThreadItem
from .turn_options import TurnOptions
Input = str
@dataclass(slots=True)
class RunResult:
items: list[ThreadItem]
final_response: str
@dataclass(slots=True)
class RunStreamedResult:
events: AsyncGenerator[ThreadEvent, None]
class Thread:
def __init__(self, codex_exec: CodexExec, options: CodexOptions, thread_id: str | None = None) -> None:
self._exec = codex_exec
self._options = options
self.id = thread_id
async def run_streamed(self, input: Input, options: TurnOptions | None = None) -> RunStreamedResult:
return RunStreamedResult(events=self._run_streamed_internal(input, options))
async def run(self, input: Input, options: TurnOptions | None = None) -> RunResult:
generator = self._run_streamed_internal(input, options)
items: list[ThreadItem] = []
final_response = ""
async for event in generator:
if event["type"] != "item.completed":
continue
completed = cast(ItemCompletedEvent, event)
item = completed["item"]
items.append(item)
if item["item_type"] == "assistant_message":
assistant_item = cast(AssistantMessageItem, item)
final_response = assistant_item["text"]
return RunResult(items=items, final_response=final_response)
async def _run_streamed_internal(
self, input: Input, options: TurnOptions | None
) -> AsyncGenerator[ThreadEvent, None]:
exec_args = CodexExecArgs(
input=input,
base_url=self._options.base_url,
api_key=self._options.api_key,
thread_id=self.id,
model=options.model if options else None,
sandbox_mode=options.sandbox_mode if options else None,
)
async for raw_event in self._exec.run(exec_args):
parsed = cast(ThreadEvent, json.loads(raw_event))
if parsed["type"] == "thread.started":
started = cast(ThreadStartedEvent, parsed)
self.id = started["thread_id"]
yield parsed

View File

@@ -0,0 +1,13 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Literal
ApprovalMode = Literal["never", "on-request", "on-failure", "untrusted"]
SandboxMode = Literal["read-only", "workspace-write", "danger-full-access"]
@dataclass(slots=True)
class TurnOptions:
model: str | None = None
sandbox_mode: SandboxMode | None = None

View File

View File

@@ -0,0 +1,27 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Callable
from pytest import MonkeyPatch
from openai_codex_sdk.exec import CodexExecArgs
from .responses_proxy import FakeExec, ResponsesProxy
@dataclass(slots=True)
class CodexExecSpyResult:
args: list[CodexExecArgs]
restore: Callable[[], None]
def install_codex_exec_spy(monkeypatch: MonkeyPatch, proxy: ResponsesProxy) -> CodexExecSpyResult:
calls: list[CodexExecArgs] = []
def factory(path: str) -> FakeExec:
return FakeExec(path, proxy, calls)
monkeypatch.setattr("openai_codex_sdk.codex.CodexExec", factory)
return CodexExecSpyResult(args=calls, restore=monkeypatch.undo)

View File

@@ -0,0 +1,37 @@
from __future__ import annotations
from collections.abc import AsyncIterator, Awaitable, Callable
import pytest
import pytest_asyncio
from pytest import MonkeyPatch
from .codex_exec_spy import CodexExecSpyResult, install_codex_exec_spy
from .responses_proxy import ResponsesProxy, ResponsesProxyOptions, start_responses_test_proxy
ProxyFactory = Callable[[ResponsesProxyOptions], Awaitable[ResponsesProxy]]
SpyFactory = Callable[[ResponsesProxy], CodexExecSpyResult]
@pytest_asyncio.fixture
async def make_responses_proxy() -> AsyncIterator[ProxyFactory]:
proxies: list[ResponsesProxy] = []
async def _make(options: ResponsesProxyOptions) -> ResponsesProxy:
proxy = await start_responses_test_proxy(options)
proxies.append(proxy)
return proxy
try:
yield _make
finally:
for proxy in proxies:
await proxy.close()
@pytest.fixture
def codex_exec_spy(monkeypatch: MonkeyPatch) -> SpyFactory:
def _install(proxy: ResponsesProxy) -> CodexExecSpyResult:
return install_codex_exec_spy(monkeypatch, proxy)
return _install

View File

@@ -0,0 +1,210 @@
from __future__ import annotations
import asyncio
import itertools
import json
from dataclasses import dataclass, field
from typing import Any, AsyncGenerator, TypedDict
from openai_codex_sdk.exec import CodexExecArgs
DEFAULT_RESPONSE_ID = "resp_mock"
DEFAULT_MESSAGE_ID = "msg_mock"
class SseEvent(TypedDict, total=False):
type: str
item: dict[str, Any]
response: dict[str, Any]
class SseResponseBody(TypedDict):
kind: str
events: list[SseEvent]
class ResponsesProxyOptions(TypedDict, total=False):
response_bodies: list[SseResponseBody]
status_code: int
class RecordedRequest(TypedDict):
body: str
json: dict[str, Any]
@dataclass(slots=True)
class ResponsesProxy:
response_bodies: list[SseResponseBody]
status_code: int
requests: list[RecordedRequest]
_response_index: int = field(init=False, default=0)
_thread_counter: itertools.count = field(init=False, default_factory=lambda: itertools.count(1))
_thread_histories: dict[str, list[str]] = field(init=False, default_factory=dict)
def __post_init__(self) -> None:
if not self.response_bodies:
raise ValueError("response_bodies is required")
async def close(self) -> None:
await asyncio.sleep(0)
def _next_thread_id(self) -> str:
return f"thread_{next(self._thread_counter)}"
def _next_response(self) -> SseResponseBody:
index = min(self._response_index, len(self.response_bodies) - 1)
self._response_index += 1
return self.response_bodies[index]
def _build_request(self, args: CodexExecArgs, thread_id: str) -> RecordedRequest:
history = self._thread_histories.get(thread_id, [])
input_entries: list[dict[str, Any]] = []
for text in history:
input_entries.append(
{
"role": "assistant",
"content": [
{
"type": "output_text",
"text": text,
}
],
}
)
input_entries.append(
{
"role": "user",
"content": [
{
"type": "input_text",
"text": args.input,
}
],
}
)
request_json: dict[str, Any] = {"input": input_entries}
if args.model is not None:
request_json["model"] = args.model
recorded = RecordedRequest(body=json.dumps(request_json), json=request_json)
self.requests.append(recorded)
return recorded
def record_run(self, args: CodexExecArgs) -> tuple[str, RecordedRequest, bool]:
if args.thread_id:
thread_id = args.thread_id
new_thread = False
else:
thread_id = self._next_thread_id()
new_thread = True
request = self._build_request(args, thread_id)
return thread_id, request, new_thread
def add_history(self, thread_id: str, text: str) -> None:
self._thread_histories.setdefault(thread_id, []).append(text)
def _convert_events(
self, response_body: SseResponseBody, thread_id: str, new_thread: bool
) -> list[dict[str, Any]]:
events: list[dict[str, Any]] = []
if new_thread:
events.append({"type": "thread.started", "thread_id": thread_id})
for event in response_body["events"]:
if event["type"] == "response.created":
events.append({"type": "turn.started"})
elif event["type"] == "response.output_item.done":
item = event["item"]
text = item["content"][0]["text"]
events.append(
{
"type": "item.completed",
"item": {
"id": item["id"],
"item_type": "assistant_message",
"text": text,
},
}
)
elif event["type"] == "response.completed":
events.append(
{
"type": "turn.completed",
"usage": {
"input_tokens": 0,
"cached_input_tokens": 0,
"output_tokens": 0,
},
}
)
return events
def next_events(self, thread_id: str, new_thread: bool) -> list[dict[str, Any]]:
response_body = self._next_response()
return self._convert_events(response_body, thread_id, new_thread)
class FakeExec:
def __init__(self, _path: str, proxy: ResponsesProxy, calls: list[CodexExecArgs]) -> None:
self._proxy = proxy
self.calls = calls
async def run(self, args: CodexExecArgs) -> AsyncGenerator[str, None]:
self.calls.append(args)
thread_id, _request, new_thread = self._proxy.record_run(args)
events = self._proxy.next_events(thread_id, new_thread)
for event in events:
if event["type"] == "item.completed":
item = event["item"]
text = item.get("text")
if text:
self._proxy.add_history(thread_id, text)
await asyncio.sleep(0)
yield json.dumps(event)
async def start_responses_test_proxy(options: ResponsesProxyOptions) -> ResponsesProxy:
response_bodies = options.get("response_bodies")
if response_bodies is None:
raise ValueError("response_bodies is required")
status_code = options.get("status_code", 200)
proxy = ResponsesProxy(response_bodies, status_code, requests=[])
return proxy
def sse(*events: SseEvent) -> SseResponseBody:
return {"kind": "sse", "events": list(events)}
def response_started(response_id: str = DEFAULT_RESPONSE_ID) -> SseEvent:
return {
"type": "response.created",
"response": {"id": response_id},
}
def assistant_message(text: str, item_id: str = DEFAULT_MESSAGE_ID) -> SseEvent:
return {
"type": "response.output_item.done",
"item": {
"type": "message",
"role": "assistant",
"id": item_id,
"content": [
{
"type": "output_text",
"text": text,
}
],
},
}
def response_completed(response_id: str = DEFAULT_RESPONSE_ID) -> SseEvent:
return {
"type": "response.completed",
"response": {"id": response_id},
}

View File

@@ -0,0 +1,172 @@
from __future__ import annotations
from pathlib import Path
from typing import AsyncGenerator, Callable
import pytest
from openai_codex_sdk import Codex, CodexOptions
from openai_codex_sdk.events import ThreadEvent
from .codex_exec_spy import CodexExecSpyResult
from .responses_proxy import (
ResponsesProxy,
assistant_message,
response_completed,
response_started,
sse,
)
CODEX_EXEC_PATH = Path(__file__).resolve().parents[2] / "codex-rs" / "target" / "debug" / "codex"
@pytest.mark.asyncio
async def test_returns_thread_events(
make_responses_proxy, codex_exec_spy: Callable[[ResponsesProxy], CodexExecSpyResult]
) -> None:
proxy = await make_responses_proxy(
{
"status_code": 200,
"response_bodies": [
sse(
response_started(),
assistant_message("Hi!"),
response_completed(),
)
],
}
)
codex_exec_spy(proxy)
client = Codex(CodexOptions(executable_path=str(CODEX_EXEC_PATH), base_url="http://proxy", api_key="test"))
thread = client.start_thread()
result = await thread.run_streamed("Hello, world!")
events: list[ThreadEvent] = []
async for event in result.events:
events.append(event)
assert events == [
{
"type": "thread.started",
"thread_id": "thread_1",
},
{"type": "turn.started"},
{
"type": "item.completed",
"item": {
"id": "msg_mock",
"item_type": "assistant_message",
"text": "Hi!",
},
},
{
"type": "turn.completed",
"usage": {
"input_tokens": 0,
"cached_input_tokens": 0,
"output_tokens": 0,
},
},
]
assert thread.id == "thread_1"
@pytest.mark.asyncio
async def test_sends_previous_items_when_run_streamed_called_twice(
make_responses_proxy, codex_exec_spy: Callable[[ResponsesProxy], CodexExecSpyResult]
) -> None:
proxy = await make_responses_proxy(
{
"status_code": 200,
"response_bodies": [
sse(
response_started("response_1"),
assistant_message("First response", "item_1"),
response_completed("response_1"),
),
sse(
response_started("response_2"),
assistant_message("Second response", "item_2"),
response_completed("response_2"),
),
],
}
)
codex_exec_spy(proxy)
client = Codex(CodexOptions(executable_path=str(CODEX_EXEC_PATH), base_url="http://proxy", api_key="test"))
thread = client.start_thread()
first = await thread.run_streamed("first input")
await _drain_events(first.events)
second = await thread.run_streamed("second input")
await _drain_events(second.events)
assert len(proxy.requests) >= 2
second_request = proxy.requests[1]
payload = second_request["json"]
assistant_entry = next((entry for entry in payload["input"] if entry["role"] == "assistant"), None)
assert assistant_entry is not None
assistant_text = next(
(item["text"] for item in assistant_entry.get("content", []) if item.get("type") == "output_text"),
None,
)
assert assistant_text == "First response"
@pytest.mark.asyncio
async def test_resumes_thread_by_id_when_streaming(
make_responses_proxy, codex_exec_spy: Callable[[ResponsesProxy], CodexExecSpyResult]
) -> None:
proxy = await make_responses_proxy(
{
"status_code": 200,
"response_bodies": [
sse(
response_started("response_1"),
assistant_message("First response", "item_1"),
response_completed("response_1"),
),
sse(
response_started("response_2"),
assistant_message("Second response", "item_2"),
response_completed("response_2"),
),
],
}
)
codex_exec_spy(proxy)
client = Codex(CodexOptions(executable_path=str(CODEX_EXEC_PATH), base_url="http://proxy", api_key="test"))
original_thread = client.start_thread()
first = await original_thread.run_streamed("first input")
await _drain_events(first.events)
resumed_thread = client.resume_thread(original_thread.id or "")
second = await resumed_thread.run_streamed("second input")
await _drain_events(second.events)
assert resumed_thread.id == original_thread.id
assert len(proxy.requests) >= 2
second_request = proxy.requests[1]
payload = second_request["json"]
assistant_entry = next((entry for entry in payload["input"] if entry["role"] == "assistant"), None)
assert assistant_entry is not None
assistant_text = next(
(item["text"] for item in assistant_entry.get("content", []) if item.get("type") == "output_text"),
None,
)
assert assistant_text == "First response"
async def _drain_events(events: AsyncGenerator[ThreadEvent, None]) -> None:
async for _ in events:
pass

View File

@@ -0,0 +1,223 @@
from __future__ import annotations
from pathlib import Path
from typing import Callable
import pytest
from openai_codex_sdk import Codex, CodexOptions
from openai_codex_sdk.turn_options import TurnOptions
from .codex_exec_spy import CodexExecSpyResult
from .responses_proxy import (
ResponsesProxy,
assistant_message,
response_completed,
response_started,
sse,
)
CODEX_EXEC_PATH = Path(__file__).resolve().parents[2] / "codex-rs" / "target" / "debug" / "codex"
@pytest.mark.asyncio
async def test_returns_thread_events(
make_responses_proxy, codex_exec_spy: Callable[[ResponsesProxy], CodexExecSpyResult]
) -> None:
proxy = await make_responses_proxy(
{
"status_code": 200,
"response_bodies": [
sse(
response_started(),
assistant_message("Hi!"),
response_completed(),
)
],
}
)
spy = codex_exec_spy(proxy)
client = Codex(CodexOptions(executable_path=str(CODEX_EXEC_PATH), base_url="http://proxy", api_key="test"))
thread = client.start_thread()
result = await thread.run("Hello, world!")
expected_items = [
{
"id": "msg_mock",
"item_type": "assistant_message",
"text": "Hi!",
}
]
assert result.items == expected_items
assert thread.id is not None
@pytest.mark.asyncio
async def test_sends_previous_items_when_run_called_twice(
make_responses_proxy, codex_exec_spy: Callable[[ResponsesProxy], CodexExecSpyResult]
) -> None:
proxy = await make_responses_proxy(
{
"status_code": 200,
"response_bodies": [
sse(
response_started("response_1"),
assistant_message("First response", "item_1"),
response_completed("response_1"),
),
sse(
response_started("response_2"),
assistant_message("Second response", "item_2"),
response_completed("response_2"),
),
],
}
)
codex_exec_spy(proxy)
client = Codex(CodexOptions(executable_path=str(CODEX_EXEC_PATH), base_url="http://proxy", api_key="test"))
thread = client.start_thread()
await thread.run("first input")
await thread.run("second input")
assert len(proxy.requests) >= 2
second_request = proxy.requests[1]
payload = second_request["json"]
assistant_entry = next((entry for entry in payload["input"] if entry["role"] == "assistant"), None)
assert assistant_entry is not None
assistant_text = next(
(item["text"] for item in assistant_entry.get("content", []) if item.get("type") == "output_text"),
None,
)
assert assistant_text == "First response"
@pytest.mark.asyncio
async def test_continues_thread_with_options(
make_responses_proxy, codex_exec_spy: Callable[[ResponsesProxy], CodexExecSpyResult]
) -> None:
proxy = await make_responses_proxy(
{
"status_code": 200,
"response_bodies": [
sse(
response_started("response_1"),
assistant_message("First response", "item_1"),
response_completed("response_1"),
),
sse(
response_started("response_2"),
assistant_message("Second response", "item_2"),
response_completed("response_2"),
),
],
}
)
codex_exec_spy(proxy)
client = Codex(CodexOptions(executable_path=str(CODEX_EXEC_PATH), base_url="http://proxy", api_key="test"))
thread = client.start_thread()
await thread.run("first input")
await thread.run("second input", TurnOptions(model="gpt-test-1"))
assert len(proxy.requests) >= 2
second_request = proxy.requests[1]
payload = second_request["json"]
assert payload.get("model") == "gpt-test-1"
assistant_entry = next((entry for entry in payload["input"] if entry["role"] == "assistant"), None)
assert assistant_entry is not None
assistant_text = next(
(item["text"] for item in assistant_entry.get("content", []) if item.get("type") == "output_text"),
None,
)
assert assistant_text == "First response"
@pytest.mark.asyncio
async def test_resumes_thread_by_id(
make_responses_proxy, codex_exec_spy: Callable[[ResponsesProxy], CodexExecSpyResult]
) -> None:
proxy = await make_responses_proxy(
{
"status_code": 200,
"response_bodies": [
sse(
response_started("response_1"),
assistant_message("First response", "item_1"),
response_completed("response_1"),
),
sse(
response_started("response_2"),
assistant_message("Second response", "item_2"),
response_completed("response_2"),
),
],
}
)
codex_exec_spy(proxy)
client = Codex(CodexOptions(executable_path=str(CODEX_EXEC_PATH), base_url="http://proxy", api_key="test"))
original_thread = client.start_thread()
await original_thread.run("first input")
resumed_thread = client.resume_thread(original_thread.id or "")
result = await resumed_thread.run("second input")
assert resumed_thread.id == original_thread.id
assert result.final_response == "Second response"
assert len(proxy.requests) >= 2
second_request = proxy.requests[1]
payload = second_request["json"]
assistant_entry = next((entry for entry in payload["input"] if entry["role"] == "assistant"), None)
assert assistant_entry is not None
assistant_text = next(
(item["text"] for item in assistant_entry.get("content", []) if item.get("type") == "output_text"),
None,
)
assert assistant_text == "First response"
@pytest.mark.asyncio
async def test_passes_turn_options_to_exec(
make_responses_proxy, codex_exec_spy: Callable[[ResponsesProxy], CodexExecSpyResult]
) -> None:
proxy = await make_responses_proxy(
{
"status_code": 200,
"response_bodies": [
sse(
response_started("response_1"),
assistant_message("Turn options applied", "item_1"),
response_completed("response_1"),
)
],
}
)
spy = codex_exec_spy(proxy)
client = Codex(CodexOptions(executable_path=str(CODEX_EXEC_PATH), base_url="http://proxy", api_key="test"))
thread = client.start_thread()
await thread.run(
"apply options",
TurnOptions(model="gpt-test-1", sandbox_mode="workspace-write"),
)
assert proxy.requests
payload = proxy.requests[0]["json"]
assert payload.get("model") == "gpt-test-1"
assert spy.args
command_args = spy.args[0]
assert command_args.sandbox_mode == "workspace-write"
assert command_args.model == "gpt-test-1"

View File

@@ -25,6 +25,7 @@ export type ResponsesProxy = {
};
export type ResponsesApiRequest = {
model?: string;
input: Array<{
role: string;
content?: Array<{ type: string; text: string }>;

View File

@@ -85,6 +85,52 @@ describe("Codex", () => {
}
});
it("continues the thread when run is called twice with options", async () => {
const { url, close, requests } = await startResponsesTestProxy({
statusCode: 200,
responseBodies: [
sse(
responseStarted("response_1"),
assistantMessage("First response", "item_1"),
responseCompleted("response_1"),
),
sse(
responseStarted("response_2"),
assistantMessage("Second response", "item_2"),
responseCompleted("response_2"),
),
],
});
try {
const client = new Codex({ executablePath: codexExecPath, baseUrl: url, apiKey: "test" });
const thread = client.startThread();
await thread.run("first input");
await thread.run("second input", {
model: "gpt-test-1",
});
// Check second request continues the same thread
expect(requests.length).toBeGreaterThanOrEqual(2);
const secondRequest = requests[1];
expect(secondRequest).toBeDefined();
const payload = secondRequest!.json;
expect(payload.model).toBe("gpt-test-1");
const assistantEntry = payload.input.find(
(entry: { role: string }) => entry.role === "assistant",
);
expect(assistantEntry).toBeDefined();
const assistantText = assistantEntry?.content?.find(
(item: { type: string; text: string }) => item.type === "output_text",
)?.text;
expect(assistantText).toBe("First response");
} finally {
await close();
}
});
it("resumes thread by id", async () => {
const { url, close, requests } = await startResponsesTestProxy({
statusCode: 200,