From 7f4d7ae3a45473cfe1e6e1e497a51f34f7b089d4 Mon Sep 17 00:00:00 2001 From: Michael Bolin Date: Tue, 19 May 2026 12:54:03 -0700 Subject: [PATCH] build: add Codex package builder (#23513) ## Why Codex CLI packaging is currently split across npm staging, standalone installers, and release bundle creation, which makes it hard to define and validate a single valid package directory. This adds the first standalone package builder so later release paths can converge on the same canonical layout. ## What changed - Added `scripts/build_codex_package.py` as the stable executable wrapper around `scripts/codex_package`. - Added modules for CLI parsing, target metadata, grouped cargo builds, package layout validation, and archive writing. - The builder creates a package directory with `codex-package.json`, `bin/`, `codex-resources/`, and `codex-path`, and can serialize it as `.tar.gz`, `.tar.zst`, or `.zip`. - Source-built artifacts are built by one grouped `cargo build`: `codex` for all targets, `bwrap` for Linux, and the Windows sandbox helpers for Windows. `rg` remains an input because it is vendored from upstream rather than built from this repo. - Added `scripts/codex_package/README.md` to document the package layout, source-built artifacts, and cargo profile behavior. ## Verification - Ran wrapper/module syntax compilation. - Ran `scripts/build_codex_package.py --help` from `/private/tmp`. - Ran fake-cargo package/archive builds for macOS, Linux, and Windows target layouts, including an assertion that generated tar archives contain no duplicate member names. --- [//]: # (BEGIN SAPLING FOOTER) Stack created with [Sapling](https://sapling-scm.com). Best reviewed with [ReviewStack](https://reviewstack.dev/openai/codex/pull/23513). * #23526 * __->__ #23513 --- scripts/build_codex_package.py | 16 ++++ scripts/codex_package/README.md | 39 ++++++++ scripts/codex_package/__init__.py | 1 + scripts/codex_package/archive.py | 87 +++++++++++++++++ scripts/codex_package/cargo.py | 106 +++++++++++++++++++++ scripts/codex_package/cli.py | 105 ++++++++++++++++++++ scripts/codex_package/layout.py | 153 ++++++++++++++++++++++++++++++ scripts/codex_package/targets.py | 114 ++++++++++++++++++++++ 8 files changed, 621 insertions(+) create mode 100755 scripts/build_codex_package.py create mode 100644 scripts/codex_package/README.md create mode 100644 scripts/codex_package/__init__.py create mode 100644 scripts/codex_package/archive.py create mode 100644 scripts/codex_package/cargo.py create mode 100644 scripts/codex_package/cli.py create mode 100644 scripts/codex_package/layout.py create mode 100644 scripts/codex_package/targets.py diff --git a/scripts/build_codex_package.py b/scripts/build_codex_package.py new file mode 100755 index 0000000000..8e1097c488 --- /dev/null +++ b/scripts/build_codex_package.py @@ -0,0 +1,16 @@ +#!/usr/bin/env python3 +"""Build a canonical Codex package directory and optional archive.""" + +from pathlib import Path +import sys + + +# Some developer environments set PYTHONSAFEPATH=1, which prevents Python from +# adding the script directory to sys.path. Add it explicitly so the local helper +# package remains importable when this executable is launched from any cwd. +sys.path.insert(0, str(Path(__file__).resolve().parent)) + +from codex_package.cli import main + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/codex_package/README.md b/scripts/codex_package/README.md new file mode 100644 index 0000000000..8c3c7ac58b --- /dev/null +++ b/scripts/codex_package/README.md @@ -0,0 +1,39 @@ +# Codex package builder + +This package contains the implementation behind `scripts/build_codex_package.py`. +The top-level script is the stable executable entry point; these modules keep the +package-building logic split by responsibility. + +The builder creates a canonical Codex package directory: + +```text +. +├── codex-package.json +├── bin +│ └── codex[.exe] +├── codex-resources +│ ├── bwrap # Linux only +│ ├── codex-command-runner.exe # Windows only +│ └── codex-windows-sandbox-setup.exe # Windows only +└── codex-path + └── rg[.exe] +``` + +The package directory is the primary artifact. Archive formats such as +`.tar.gz`, `.tar.zst`, and `.zip` are serializations of that directory. + +## Source-built artifacts + +Artifacts built from this repository are always built by the package builder in +one grouped `cargo build` command per package: + +- all targets: `codex` +- Linux targets: `bwrap` +- Windows targets: `codex-command-runner` and `codex-windows-sandbox-setup` + +The default cargo profile is `dev-small` because local iteration should favor +fast, small builds. Release jobs should pass `--cargo-profile release`. + +`rg` is not built from this repository, so it remains an input. If `--rg-bin` is +omitted, the builder looks in the existing `codex-cli/vendor//path/` +location. diff --git a/scripts/codex_package/__init__.py b/scripts/codex_package/__init__.py new file mode 100644 index 0000000000..bf8a68ab78 --- /dev/null +++ b/scripts/codex_package/__init__.py @@ -0,0 +1 @@ +"""Helpers for building canonical Codex package archives.""" diff --git a/scripts/codex_package/archive.py b/scripts/codex_package/archive.py new file mode 100644 index 0000000000..fe09c0a4f7 --- /dev/null +++ b/scripts/codex_package/archive.py @@ -0,0 +1,87 @@ +"""Archive writers for canonical Codex package directories.""" + +import shutil +import subprocess +import tarfile +import tempfile +import zipfile +from pathlib import Path + + +def write_archive(package_dir: Path, archive_path: Path, *, force: bool) -> None: + if is_relative_to(archive_path, package_dir): + raise RuntimeError( + f"Archive output must be outside the package directory: {archive_path}" + ) + + archive_path.parent.mkdir(parents=True, exist_ok=True) + if archive_path.exists(): + if not force: + raise RuntimeError(f"Archive output already exists: {archive_path}") + archive_path.unlink() + + archive_format = archive_format_for_path(archive_path) + if archive_format == "tar.gz": + write_tar_archive(package_dir, archive_path, mode="w:gz") + elif archive_format == "tar.zst": + write_tar_zst_archive(package_dir, archive_path) + elif archive_format == "zip": + write_zip_archive(package_dir, archive_path) + else: + raise AssertionError(f"unexpected archive format: {archive_format}") + + +def is_relative_to(path: Path, parent: Path) -> bool: + try: + path.relative_to(parent) + return True + except ValueError: + return False + + +def archive_format_for_path(path: Path) -> str: + suffixes = path.suffixes + if suffixes[-2:] == [".tar", ".gz"] or path.suffix == ".tgz": + return "tar.gz" + if suffixes[-2:] == [".tar", ".zst"]: + return "tar.zst" + if path.suffix == ".zip": + return "zip" + raise RuntimeError( + f"Unsupported archive suffix for {path}. Use .tar.gz, .tgz, .tar.zst, or .zip." + ) + + +def write_tar_archive(package_dir: Path, archive_path: Path, *, mode: str) -> None: + with tarfile.open(archive_path, mode) as archive: + for path in package_entries(package_dir): + archive.add( + path, + arcname=path.relative_to(package_dir), + recursive=False, + ) + + +def write_tar_zst_archive(package_dir: Path, archive_path: Path) -> None: + zstd = shutil.which("zstd") + if zstd is None: + raise RuntimeError("zstd is required to write .tar.zst archives.") + + with tempfile.TemporaryDirectory(prefix="codex-package-archive-") as temp_dir_str: + tar_path = Path(temp_dir_str) / "package.tar" + write_tar_archive(package_dir, tar_path, mode="w") + subprocess.check_call([zstd, "-T0", "-19", "-f", str(tar_path), "-o", str(archive_path)]) + + +def write_zip_archive(package_dir: Path, archive_path: Path) -> None: + with zipfile.ZipFile(archive_path, "w", compression=zipfile.ZIP_DEFLATED) as archive: + for path in package_entries(package_dir): + relative_path = path.relative_to(package_dir) + if path.is_dir(): + archive.write(path, f"{relative_path}/") + else: + archive.write(path, relative_path) + + +def package_entries(package_dir: Path) -> list[Path]: + return sorted(package_dir.rglob("*"), key=lambda path: path.relative_to(package_dir).as_posix()) diff --git a/scripts/codex_package/cargo.py b/scripts/codex_package/cargo.py new file mode 100644 index 0000000000..9ee2d2f832 --- /dev/null +++ b/scripts/codex_package/cargo.py @@ -0,0 +1,106 @@ +"""Cargo builds for source-built Codex package artifacts.""" + +import os +import subprocess +from dataclasses import dataclass +from pathlib import Path + +from .targets import REPO_ROOT +from .targets import TargetSpec + + +CODEX_RS_ROOT = REPO_ROOT / "codex-rs" + + +@dataclass(frozen=True) +class SourceBuildOutputs: + codex_bin: Path + bwrap_bin: Path | None + codex_command_runner_bin: Path | None + codex_windows_sandbox_setup_bin: Path | None + + +def build_source_binaries( + spec: TargetSpec, + *, + cargo: str, + profile: str, +) -> SourceBuildOutputs: + binaries = source_binaries_for_target(spec) + cmd = [ + cargo, + "build", + "--target", + spec.target, + "--profile", + profile, + ] + for binary in binaries: + cmd.extend(["--bin", binary]) + + print("+", " ".join(cmd)) + subprocess.run(cmd, cwd=CODEX_RS_ROOT, check=True) + + output_dir = cargo_profile_output_dir(spec, profile) + outputs = SourceBuildOutputs( + codex_bin=output_dir / spec.codex_name, + bwrap_bin=output_dir / "bwrap" if spec.is_linux else None, + codex_command_runner_bin=( + output_dir / "codex-command-runner.exe" if spec.is_windows else None + ), + codex_windows_sandbox_setup_bin=( + output_dir / "codex-windows-sandbox-setup.exe" if spec.is_windows else None + ), + ) + validate_source_outputs(outputs) + return outputs + + +def source_binaries_for_target(spec: TargetSpec) -> list[str]: + binaries = ["codex"] + if spec.is_linux: + binaries.append("bwrap") + if spec.is_windows: + binaries.extend( + [ + "codex-command-runner", + "codex-windows-sandbox-setup", + ] + ) + return binaries + + +def cargo_profile_output_dir(spec: TargetSpec, profile: str) -> Path: + target_dir = cargo_target_dir() + return target_dir / spec.target / cargo_profile_dirname(profile) + + +def cargo_target_dir() -> Path: + target_dir = os.environ.get("CARGO_TARGET_DIR") + if target_dir is None: + return CODEX_RS_ROOT / "target" + + path = Path(target_dir) + if path.is_absolute(): + return path + + return CODEX_RS_ROOT / path + + +def cargo_profile_dirname(profile: str) -> str: + if profile == "dev": + return "debug" + if profile == "release": + return "release" + return profile + + +def validate_source_outputs(outputs: SourceBuildOutputs) -> None: + for path in [ + outputs.codex_bin, + outputs.bwrap_bin, + outputs.codex_command_runner_bin, + outputs.codex_windows_sandbox_setup_bin, + ]: + if path is not None and not path.is_file(): + raise RuntimeError(f"cargo build did not produce expected binary: {path}") diff --git a/scripts/codex_package/cli.py b/scripts/codex_package/cli.py new file mode 100644 index 0000000000..beddffa3b9 --- /dev/null +++ b/scripts/codex_package/cli.py @@ -0,0 +1,105 @@ +"""Command-line interface for building Codex package directories.""" + +import argparse +from pathlib import Path + +from .archive import write_archive +from .cargo import build_source_binaries +from .layout import build_package_dir +from .layout import prepare_package_dir +from .layout import validate_package_dir +from .targets import TARGET_SPECS +from .targets import PackageInputs +from .targets import resolve_rg_bin + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Build a canonical Codex package directory and optional archive.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "--target", + required=True, + choices=sorted(TARGET_SPECS), + help="Rust target triple for the package.", + ) + parser.add_argument( + "--version", + default="0.0.0-dev", + help="Codex version to record in codex-package.json.", + ) + parser.add_argument( + "--variant", + default="codex", + help="Package variant to record in codex-package.json.", + ) + parser.add_argument( + "--package-dir", + type=Path, + required=True, + help="Output directory to create as the package root.", + ) + parser.add_argument( + "--archive-output", + type=Path, + help=( + "Optional archive output path. Supported suffixes: .tar.gz, .tgz, " + ".tar.zst, .zip." + ), + ) + parser.add_argument( + "--force", + action="store_true", + help="Replace an existing package directory or archive output.", + ) + parser.add_argument( + "--cargo", + default="cargo", + help="Cargo executable to use for source-built package artifacts.", + ) + parser.add_argument( + "--cargo-profile", + default="dev-small", + help=( + "Cargo profile for source-built package artifacts. Use release for " + "release packages." + ), + ) + parser.add_argument( + "--rg-bin", + type=Path, + help="Path to the ripgrep executable to place in codex-path/.", + ) + return parser.parse_args() + + +def main() -> int: + args = parse_args() + spec = TARGET_SPECS[args.target] + package_dir = args.package_dir.resolve() + + source_outputs = build_source_binaries( + spec, + cargo=args.cargo, + profile=args.cargo_profile, + ) + inputs = PackageInputs( + codex_bin=source_outputs.codex_bin, + rg_bin=resolve_rg_bin(spec, args.rg_bin), + bwrap_bin=source_outputs.bwrap_bin, + codex_command_runner_bin=source_outputs.codex_command_runner_bin, + codex_windows_sandbox_setup_bin=source_outputs.codex_windows_sandbox_setup_bin, + ) + prepare_package_dir(package_dir, force=args.force) + build_package_dir(package_dir, args.version, args.variant, spec, inputs) + validate_package_dir(package_dir, spec) + + archive_output = args.archive_output + if archive_output is not None: + archive_path = archive_output.resolve() + write_archive(package_dir, archive_path, force=args.force) + print(f"Built Codex package archive at {archive_path}") + + print(f"Built Codex package directory at {package_dir}") + return 0 diff --git a/scripts/codex_package/layout.py b/scripts/codex_package/layout.py new file mode 100644 index 0000000000..faf24ef810 --- /dev/null +++ b/scripts/codex_package/layout.py @@ -0,0 +1,153 @@ +"""Canonical Codex package directory layout.""" + +import json +import shutil +import stat +from pathlib import Path + +from .targets import PackageInputs +from .targets import TargetSpec + + +LAYOUT_VERSION = 1 + + +def prepare_package_dir(package_dir: Path, *, force: bool) -> None: + if package_dir.exists(): + if not package_dir.is_dir(): + raise RuntimeError(f"Package output exists and is not a directory: {package_dir}") + if any(package_dir.iterdir()): + if not force: + raise RuntimeError( + f"Package output directory is not empty: {package_dir}. " + "Pass --force to replace it." + ) + shutil.rmtree(package_dir) + + package_dir.mkdir(parents=True, exist_ok=True) + + +def build_package_dir( + package_dir: Path, + version: str, + variant: str, + spec: TargetSpec, + inputs: PackageInputs, +) -> None: + bin_dir = package_dir / "bin" + resources_dir = package_dir / "codex-resources" + path_dir = package_dir / "codex-path" + bin_dir.mkdir() + resources_dir.mkdir() + path_dir.mkdir() + + copy_executable(inputs.codex_bin, bin_dir / spec.codex_name, is_windows=spec.is_windows) + copy_executable(inputs.rg_bin, path_dir / spec.rg_name, is_windows=spec.is_windows) + + if inputs.bwrap_bin is not None: + copy_executable(inputs.bwrap_bin, resources_dir / "bwrap", is_windows=False) + + if inputs.codex_command_runner_bin is not None: + copy_executable( + inputs.codex_command_runner_bin, + resources_dir / "codex-command-runner.exe", + is_windows=True, + ) + + if inputs.codex_windows_sandbox_setup_bin is not None: + copy_executable( + inputs.codex_windows_sandbox_setup_bin, + resources_dir / "codex-windows-sandbox-setup.exe", + is_windows=True, + ) + + metadata = { + "layoutVersion": LAYOUT_VERSION, + "version": version, + "target": spec.target, + "variant": variant, + "entrypoint": f"bin/{spec.codex_name}", + "resourcesDir": "codex-resources", + "pathDir": "codex-path", + } + write_json(package_dir / "codex-package.json", metadata) + + +def validate_package_dir(package_dir: Path, spec: TargetSpec) -> None: + required_dirs = [ + Path("bin"), + Path("codex-resources"), + Path("codex-path"), + ] + for relative_dir in required_dirs: + path = package_dir / relative_dir + if not path.is_dir(): + raise RuntimeError(f"Missing package directory: {relative_dir}") + + metadata_path = package_dir / "codex-package.json" + if not metadata_path.is_file(): + raise RuntimeError("Missing package metadata: codex-package.json") + + with open(metadata_path, encoding="utf-8") as fh: + metadata = json.load(fh) + + expected_metadata = { + "layoutVersion": LAYOUT_VERSION, + "target": spec.target, + "entrypoint": f"bin/{spec.codex_name}", + "resourcesDir": "codex-resources", + "pathDir": "codex-path", + } + for key, expected in expected_metadata.items(): + actual = metadata.get(key) + if actual != expected: + raise RuntimeError( + f"Invalid package metadata field {key!r}: expected {expected!r}, got {actual!r}" + ) + + required_files = [ + Path("bin") / spec.codex_name, + Path("codex-path") / spec.rg_name, + ] + executable_files = list(required_files) + + if spec.is_linux: + required_files.append(Path("codex-resources") / "bwrap") + executable_files.append(Path("codex-resources") / "bwrap") + + if spec.is_windows: + required_files.extend( + [ + Path("codex-resources") / "codex-command-runner.exe", + Path("codex-resources") / "codex-windows-sandbox-setup.exe", + ] + ) + + for relative_file in required_files: + path = package_dir / relative_file + if not path.is_file(): + raise RuntimeError(f"Missing package file: {relative_file}") + + if not spec.is_windows: + for relative_file in executable_files: + path = package_dir / relative_file + if not is_executable(path): + raise RuntimeError(f"Package file is not executable: {relative_file}") + + +def copy_executable(src: Path, dest: Path, *, is_windows: bool) -> None: + dest.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(src, dest) + if not is_windows: + mode = dest.stat().st_mode + dest.chmod(mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) + + +def write_json(path: Path, value: object) -> None: + with open(path, "w", encoding="utf-8") as out: + json.dump(value, out, indent=2) + out.write("\n") + + +def is_executable(path: Path) -> bool: + return bool(path.stat().st_mode & stat.S_IXUSR) diff --git a/scripts/codex_package/targets.py b/scripts/codex_package/targets.py new file mode 100644 index 0000000000..bdba8bd766 --- /dev/null +++ b/scripts/codex_package/targets.py @@ -0,0 +1,114 @@ +"""Supported package targets and default binary discovery.""" + +import stat +from dataclasses import dataclass +from pathlib import Path + + +SCRIPT_DIR = Path(__file__).resolve().parents[1] +REPO_ROOT = SCRIPT_DIR.parent + + +@dataclass(frozen=True) +class TargetSpec: + target: str + is_windows: bool + is_linux: bool + + @property + def exe_suffix(self) -> str: + return ".exe" if self.is_windows else "" + + @property + def codex_name(self) -> str: + return f"codex{self.exe_suffix}" + + @property + def rg_name(self) -> str: + return f"rg{self.exe_suffix}" + + +@dataclass(frozen=True) +class PackageInputs: + codex_bin: Path + rg_bin: Path + bwrap_bin: Path | None + codex_command_runner_bin: Path | None + codex_windows_sandbox_setup_bin: Path | None + + +TARGET_SPECS: dict[str, TargetSpec] = { + "x86_64-unknown-linux-musl": TargetSpec( + target="x86_64-unknown-linux-musl", + is_windows=False, + is_linux=True, + ), + "aarch64-unknown-linux-musl": TargetSpec( + target="aarch64-unknown-linux-musl", + is_windows=False, + is_linux=True, + ), + "x86_64-apple-darwin": TargetSpec( + target="x86_64-apple-darwin", + is_windows=False, + is_linux=False, + ), + "aarch64-apple-darwin": TargetSpec( + target="aarch64-apple-darwin", + is_windows=False, + is_linux=False, + ), + "x86_64-pc-windows-msvc": TargetSpec( + target="x86_64-pc-windows-msvc", + is_windows=True, + is_linux=False, + ), + "aarch64-pc-windows-msvc": TargetSpec( + target="aarch64-pc-windows-msvc", + is_windows=True, + is_linux=False, + ), +} + + +def resolve_rg_bin(spec: TargetSpec, rg_bin: Path | None) -> Path: + return resolve_input_path( + rg_bin, + default_rg_candidates(spec), + "ripgrep executable", + "--rg-bin", + ) + + +def default_rg_candidates(spec: TargetSpec) -> list[Path]: + return [ + REPO_ROOT / "codex-cli" / "vendor" / spec.target / "path" / spec.rg_name, + ] + + +def resolve_input_path( + explicit_path: Path | None, + default_candidates: list[Path], + description: str, + flag_name: str, +) -> Path: + if explicit_path is not None: + path = explicit_path.resolve() + if not path.is_file(): + raise RuntimeError(f"{description} does not exist: {path}") + if not is_executable(path): + raise RuntimeError(f"{description} is not executable: {path}") + return path + + for candidate in default_candidates: + if candidate.is_file(): + return candidate.resolve() + + candidates = "\n".join(f" - {candidate}" for candidate in default_candidates) + raise RuntimeError( + f"Could not find {description}. Pass {flag_name}, or create one of:\n{candidates}" + ) + + +def is_executable(path: Path) -> bool: + return bool(path.stat().st_mode & (stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH))