From 85d16b5d6efd0b0304206b1b22afbe1be2fa92e3 Mon Sep 17 00:00:00 2001 From: Michael Bolin Date: Thu, 7 May 2026 23:47:15 -0700 Subject: [PATCH] scripts: add Makeself Codex dev builder --- scripts/build_makeself_codex.py | 434 ++++++++++++++++++++++++++++++++ 1 file changed, 434 insertions(+) create mode 100755 scripts/build_makeself_codex.py diff --git a/scripts/build_makeself_codex.py b/scripts/build_makeself_codex.py new file mode 100755 index 0000000000..d95b76d371 --- /dev/null +++ b/scripts/build_makeself_codex.py @@ -0,0 +1,434 @@ +#!/usr/bin/env python3 +"""Build a self-extracting Codex dev artifact with Makeself. + +This script is intentionally local-dev tooling, not release packaging. The +initial workflow it supports is: build a host-platform `codex` binary, wrap it +in one executable-looking `.run` file, copy that file into a container or other +test environment, and run it there without needing a separate install step. + +The runtime extraction location is the most important design constraint. Do not +let generated archives extract into `/tmp`, `/private/tmp`, `$TMPDIR`, or other +system temp locations. Codex treats running from temp directories as unsafe +because an agent with write access to the workspace could modify the executable +that is currently running. Generated archives should instead extract under +`$HOME/.cache/codex-dev`, which is deliberately separate from the normal +`$HOME/.cache/codex` cache used elsewhere by Codex. + +The extraction directory is content-addressed from the staged payload. That +means the same `.run` file extracts once and then reuses the cached tree, while +each changed local build gets a new directory. Prefer hashing the staged bytes +over adding random UUIDs or trying to infer repository state; the payload is +the thing that must be isolated. The `.run` file itself does not need to be +bit-for-bit reproducible for this cache key to be stable. + +The script patches the installed Makeself header at build time rather than +vendoring Makeself into this repository. The patch preserves Makeself maintenance +operations, but routes normal Codex CLI flags such as `--help` and `--version` +to the embedded `codex` binary so the generated file behaves like the tool it +wraps. Makeself help is moved to `--makeself-help`. + +Compression defaults to `none` (`makeself --nocomp`). This keeps the generated +file directly executable without requiring a decompressor in the target +environment and leaves room for external transport compression. Callers can opt +into Makeself-managed compression with `--compression gzip`, `--compression zstd`, +or another supported mode when that target-side dependency is acceptable. + +Build-time staging may use temporary directories. That is fine: only the +runtime extraction cache must avoid temp locations. +""" + +from __future__ import annotations + +import argparse +import hashlib +import os +import platform +import shutil +import stat +import subprocess +import sys +import tempfile +from pathlib import Path + + +REPO_ROOT = Path(__file__).resolve().parent.parent +CODEX_RS = REPO_ROOT / "codex-rs" +DEFAULT_PROFILE = "dev-small" +DEFAULT_COMPRESSION = "none" +DEFAULT_CACHE_ROOT = "$HOME/.cache/codex-dev" +COMPRESSION_CHOICES = ( + "none", + "gzip", + "pigz", + "zstd", + "bzip2", + "pbzip2", + "bzip3", + "xz", + "lzo", + "lz4", + "compress", +) +COMPLETE_SENTINEL = ".codex-makeself-complete" +RUNNER_NAME = "run-codex" + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__.splitlines()[0]) + parser.add_argument( + "--profile", + default=DEFAULT_PROFILE, + help=f"Cargo profile to build with. Default: {DEFAULT_PROFILE}.", + ) + parser.add_argument( + "--output", + type=Path, + default=None, + help="Path to write the generated Makeself archive. Default: dist/codex-dev/codex-dev.run.", + ) + parser.add_argument( + "--compression", + choices=COMPRESSION_CHOICES, + default=DEFAULT_COMPRESSION, + help=( + "Compression mode for the embedded Makeself payload. " + f"Default: {DEFAULT_COMPRESSION}." + ), + ) + parser.add_argument( + "--cache-root", + default=DEFAULT_CACHE_ROOT, + help=( + "Runtime cache root for extracted builds. Shell variables are preserved " + f"in the generated artifact. Default: {DEFAULT_CACHE_ROOT}." + ), + ) + parser.add_argument( + "--include-bwrap", + choices=("auto", "always", "never"), + default="auto", + help="Whether to build and bundle bwrap. Default: auto, which includes it on Linux.", + ) + parser.add_argument( + "--skip-cargo-build", + action="store_true", + help="Use existing Cargo build outputs instead of invoking cargo build.", + ) + parser.add_argument( + "--keep-staging-dir", + action="store_true", + help="Keep the temporary staged payload directory for inspection.", + ) + parser.add_argument( + "--makeself", + default="makeself", + help="Path to the makeself executable. Default: makeself from PATH.", + ) + parser.add_argument( + "--makeself-header", + type=Path, + default=None, + help="Path to makeself-header.sh. Default: infer from the makeself installation.", + ) + return parser.parse_args() + + +def run_command(cmd: list[str], cwd: Path) -> None: + print("+", " ".join(cmd), flush=True) + subprocess.run(cmd, cwd=cwd, check=True) + + +def cargo_profile_output_dir(profile_name: str) -> Path: + match profile_name: + case "dev": + profile_dir = "debug" + case "release": + profile_dir = "release" + case _: + profile_dir = profile_name + return CODEX_RS / "target" / profile_dir + + +def host_executable_name(name: str) -> str: + if os.name == "nt": + return f"{name}.exe" + return name + + +def should_include_bwrap(mode: str) -> bool: + match mode: + case "always": + return True + case "never": + return False + case "auto": + return platform.system() == "Linux" + case _: + raise ValueError(f"unexpected bwrap mode: {mode}") + + +def makeself_compression_arg(compression: str) -> str: + if compression == "none": + return "--nocomp" + return f"--{compression}" + + +def validate_cache_root(cache_root: str) -> None: + normalized = cache_root.rstrip("/") + forbidden_roots = { + "/tmp", + "/private/tmp", + "/var/tmp", + "/var/folders", + "/private/var/folders", + "$TMPDIR", + "${TMPDIR}", + "${TMPDIR:-/tmp}", + "${TMPDIR-/tmp}", + } + system_temp = Path(tempfile.gettempdir()).resolve() + forbidden_roots.add(str(system_temp)) + if normalized in forbidden_roots: + raise RuntimeError(f"Refusing to use temp directory as cache root: {cache_root}") + + forbidden_prefixes = tuple(f"{root}/" for root in sorted(forbidden_roots)) + if normalized.startswith(forbidden_prefixes): + raise RuntimeError(f"Refusing to use temp directory as cache root: {cache_root}") + + +def build_binaries(profile_name: str, include_bwrap: bool, skip_cargo_build: bool) -> None: + if skip_cargo_build: + return + + cmd = ["cargo", "build", "--profile", profile_name, "--bin", "codex"] + if include_bwrap: + cmd.extend(["--bin", "bwrap"]) + run_command(cmd, cwd=CODEX_RS) + + +def require_file(path: Path, description: str) -> None: + if not path.is_file(): + raise RuntimeError(f"Missing {description}: {path}") + + +def stage_payload(build_dir: Path, staging_dir: Path, include_bwrap: bool) -> None: + codex_name = host_executable_name("codex") + codex_src = build_dir / codex_name + require_file(codex_src, "codex binary") + shutil.copy2(codex_src, staging_dir / codex_name) + + if include_bwrap: + bwrap_src = build_dir / host_executable_name("bwrap") + require_file(bwrap_src, "bwrap binary") + resources_dir = staging_dir / "codex-resources" + resources_dir.mkdir(parents=True, exist_ok=True) + shutil.copy2(bwrap_src, resources_dir / host_executable_name("bwrap")) + + runner = staging_dir / RUNNER_NAME + runner.write_text( + "\n".join( + [ + "#!/bin/sh", + "set -eu", + f": > {COMPLETE_SENTINEL}", + f'exec ./{codex_name} "$@"', + "", + ] + ), + encoding="utf-8", + ) + runner.chmod(0o755) + + +def iter_staged_files(staging_dir: Path) -> list[Path]: + return sorted(path for path in staging_dir.rglob("*") if path.is_file()) + + +def hash_staged_tree(staging_dir: Path) -> str: + digest = hashlib.sha256() + for path in iter_staged_files(staging_dir): + relative_path = path.relative_to(staging_dir).as_posix() + mode = stat.S_IMODE(path.stat().st_mode) + digest.update(relative_path.encode("utf-8")) + digest.update(b"\0") + digest.update(f"{mode:o}".encode("ascii")) + digest.update(b"\0") + with path.open("rb") as file: + for chunk in iter(lambda: file.read(1024 * 1024), b""): + digest.update(chunk) + digest.update(b"\0") + return digest.hexdigest() + + +def infer_makeself_header(makeself: str) -> Path: + makeself_path = shutil.which(makeself) + if makeself_path is None: + candidate = Path(makeself) + if candidate.is_file(): + makeself_path = str(candidate) + else: + raise RuntimeError(f"Unable to find makeself executable: {makeself}") + + resolved = Path(makeself_path).resolve() + candidates = [ + resolved.parent / "makeself-header.sh", + resolved.parent.parent / "libexec" / "makeself-header.sh", + resolved.parent.parent / "share" / "makeself" / "makeself-header.sh", + Path("/usr/libexec/makeself-header.sh"), + Path("/usr/share/makeself/makeself-header.sh"), + Path("/usr/lib/makeself/makeself-header.sh"), + ] + for candidate in candidates: + if candidate.is_file(): + return candidate + + raise RuntimeError( + "Unable to infer makeself-header.sh. Pass --makeself-header with its path." + ) + + +def write_cached_makeself_header(source_header: Path, output_header: Path) -> None: + header = source_header.read_text(encoding="utf-8") + marker = 'if test x"\\$targetdir" = x.; then' + if marker not in header: + raise RuntimeError(f"Unable to patch Makeself header; marker not found in {source_header}") + header = pass_codex_options_through(header, source_header) + + cache_fast_path = f""" +# Codex dev artifacts use content-addressed --target directories. On cache hits, +# run the existing extraction instead of unpacking the payload again. +if test x"\\$keep" = xy -a x"\\$script" != x -a -f "\\$targetdir/{COMPLETE_SENTINEL}"; then + cd "\\$targetdir" || {{ + echo "Cannot enter cached target directory \\$targetdir" >&2 + exit 1 + }} + if test x"\\$quiet" = xn; then + echo "Using cached extraction in \\$targetdir" + fi + res=0 + if test x"\\$verbose" = xy; then + MS_Printf "OK to execute: \\$script \\$scriptargs \\$* ? [Y/n] " + read yn + if test x"\\$yn" = x -o x"\\$yn" = xy -o x"\\$yn" = xY; then + eval "\\"\\$script\\" \\$scriptargs "\\\\\\$@""; res=\\$? + fi + else + eval "\\"\\$script\\" \\$scriptargs "\\\\\\$@""; res=\\$? + fi + exit \\$res +fi + +""" + output_header.write_text(header.replace(marker, cache_fast_path + marker), encoding="utf-8") + + +def pass_codex_options_through(header: str, source_header: Path) -> str: + header = header.replace("-h | --help)", "--makeself-help)") + header = header.replace( + "\\$0 --help Print this message", + "\\$0 --makeself-help Print this message", + ) + + unrecognized_flag_block = """ -*) +\techo Unrecognized flag : "\\$1" >&2 +\tMS_Help +\texit 1 +\t;;""" + if unrecognized_flag_block not in header: + raise RuntimeError( + f"Unable to patch Makeself option parser; marker not found in {source_header}" + ) + + return header.replace( + unrecognized_flag_block, + """ -*) +\tbreak +\t;;""", + ) + + +def build_archive( + makeself: str, + compression: str, + header: Path, + staging_dir: Path, + output_path: Path, + target_dir: str, + tree_hash: str, +) -> None: + output_path.parent.mkdir(parents=True, exist_ok=True) + label = f"Codex dev build {tree_hash[:12]}" + cmd = [ + makeself, + makeself_compression_arg(compression), + "--sha256", + "--packaging-date", + f"content-sha256:{tree_hash}", + "--header", + str(header), + "--target", + target_dir, + str(staging_dir), + str(output_path), + label, + f"./{RUNNER_NAME}", + ] + run_command(cmd, cwd=REPO_ROOT) + + +def default_output_path() -> Path: + return REPO_ROOT / "dist" / "codex-dev" / "codex-dev.run" + + +def main() -> int: + args = parse_args() + validate_cache_root(args.cache_root) + include_bwrap = should_include_bwrap(args.include_bwrap) + build_binaries(args.profile, include_bwrap, args.skip_cargo_build) + + build_dir = cargo_profile_output_dir(args.profile) + output_path = args.output or default_output_path() + makeself_header = args.makeself_header or infer_makeself_header(args.makeself) + require_file(makeself_header, "makeself header") + + with tempfile.TemporaryDirectory(prefix="codex-makeself-") as temp_root_name: + temp_root = Path(temp_root_name) + staging_dir = temp_root / "payload" + staging_dir.mkdir() + patched_header = temp_root / "makeself-header-codex-cache.sh" + + stage_payload(build_dir, staging_dir, include_bwrap) + tree_hash = hash_staged_tree(staging_dir) + target_dir = f"{args.cache_root.rstrip('/')}/sha256-{tree_hash}" + write_cached_makeself_header(makeself_header, patched_header) + build_archive( + args.makeself, + args.compression, + patched_header, + staging_dir, + output_path, + target_dir, + tree_hash, + ) + + if args.keep_staging_dir: + kept_staging_dir = output_path.parent / f"payload-sha256-{tree_hash[:12]}" + if kept_staging_dir.exists(): + shutil.rmtree(kept_staging_dir) + shutil.copytree(staging_dir, kept_staging_dir, copy_function=shutil.copy2) + print(f"Kept staged payload at {kept_staging_dir}") + + print(f"Wrote {output_path}") + print(f"Payload sha256: {tree_hash}") + print(f"Runtime cache target: {target_dir}") + if not include_bwrap: + print("bwrap was not bundled; pass --include-bwrap=always to require it.") + return 0 + + +if __name__ == "__main__": + try: + sys.exit(main()) + except RuntimeError as exc: + print(f"error: {exc}", file=sys.stderr) + sys.exit(1)