mirror of
https://github.com/openai/codex.git
synced 2026-05-21 11:42:55 +00:00
435 lines
14 KiB
Python
Executable File
435 lines
14 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""Build a self-extracting Codex dev artifact with Makeself.
|
|
|
|
This script is intentionally local-dev tooling, not release packaging. The
|
|
initial workflow it supports is: build a host-platform `codex` binary, wrap it
|
|
in one executable-looking `.run` file, copy that file into a container or other
|
|
test environment, and run it there without needing a separate install step.
|
|
|
|
The runtime extraction location is the most important design constraint. Do not
|
|
let generated archives extract into `/tmp`, `/private/tmp`, `$TMPDIR`, or other
|
|
system temp locations. Codex treats running from temp directories as unsafe
|
|
because an agent with write access to the workspace could modify the executable
|
|
that is currently running. Generated archives should instead extract under
|
|
`$HOME/.cache/codex-dev`, which is deliberately separate from the normal
|
|
`$HOME/.cache/codex` cache used elsewhere by Codex.
|
|
|
|
The extraction directory is content-addressed from the staged payload. That
|
|
means the same `.run` file extracts once and then reuses the cached tree, while
|
|
each changed local build gets a new directory. Prefer hashing the staged bytes
|
|
over adding random UUIDs or trying to infer repository state; the payload is
|
|
the thing that must be isolated. The `.run` file itself does not need to be
|
|
bit-for-bit reproducible for this cache key to be stable.
|
|
|
|
The script patches the installed Makeself header at build time rather than
|
|
vendoring Makeself into this repository. The patch preserves Makeself maintenance
|
|
operations, but routes normal Codex CLI flags such as `--help` and `--version`
|
|
to the embedded `codex` binary so the generated file behaves like the tool it
|
|
wraps. Makeself help is moved to `--makeself-help`.
|
|
|
|
Compression defaults to `none` (`makeself --nocomp`). This keeps the generated
|
|
file directly executable without requiring a decompressor in the target
|
|
environment and leaves room for external transport compression. Callers can opt
|
|
into Makeself-managed compression with `--compression gzip`, `--compression zstd`,
|
|
or another supported mode when that target-side dependency is acceptable.
|
|
|
|
Build-time staging may use temporary directories. That is fine: only the
|
|
runtime extraction cache must avoid temp locations.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import hashlib
|
|
import os
|
|
import platform
|
|
import shutil
|
|
import stat
|
|
import subprocess
|
|
import sys
|
|
import tempfile
|
|
from pathlib import Path
|
|
|
|
|
|
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
CODEX_RS = REPO_ROOT / "codex-rs"
|
|
DEFAULT_PROFILE = "dev-small"
|
|
DEFAULT_COMPRESSION = "none"
|
|
DEFAULT_CACHE_ROOT = "$HOME/.cache/codex-dev"
|
|
COMPRESSION_CHOICES = (
|
|
"none",
|
|
"gzip",
|
|
"pigz",
|
|
"zstd",
|
|
"bzip2",
|
|
"pbzip2",
|
|
"bzip3",
|
|
"xz",
|
|
"lzo",
|
|
"lz4",
|
|
"compress",
|
|
)
|
|
COMPLETE_SENTINEL = ".codex-makeself-complete"
|
|
RUNNER_NAME = "run-codex"
|
|
|
|
|
|
def parse_args() -> argparse.Namespace:
|
|
parser = argparse.ArgumentParser(description=__doc__.splitlines()[0])
|
|
parser.add_argument(
|
|
"--profile",
|
|
default=DEFAULT_PROFILE,
|
|
help=f"Cargo profile to build with. Default: {DEFAULT_PROFILE}.",
|
|
)
|
|
parser.add_argument(
|
|
"--output",
|
|
type=Path,
|
|
default=None,
|
|
help="Path to write the generated Makeself archive. Default: dist/codex-dev/codex-dev.run.",
|
|
)
|
|
parser.add_argument(
|
|
"--compression",
|
|
choices=COMPRESSION_CHOICES,
|
|
default=DEFAULT_COMPRESSION,
|
|
help=(
|
|
"Compression mode for the embedded Makeself payload. "
|
|
f"Default: {DEFAULT_COMPRESSION}."
|
|
),
|
|
)
|
|
parser.add_argument(
|
|
"--cache-root",
|
|
default=DEFAULT_CACHE_ROOT,
|
|
help=(
|
|
"Runtime cache root for extracted builds. Shell variables are preserved "
|
|
f"in the generated artifact. Default: {DEFAULT_CACHE_ROOT}."
|
|
),
|
|
)
|
|
parser.add_argument(
|
|
"--include-bwrap",
|
|
choices=("auto", "always", "never"),
|
|
default="auto",
|
|
help="Whether to build and bundle bwrap. Default: auto, which includes it on Linux.",
|
|
)
|
|
parser.add_argument(
|
|
"--skip-cargo-build",
|
|
action="store_true",
|
|
help="Use existing Cargo build outputs instead of invoking cargo build.",
|
|
)
|
|
parser.add_argument(
|
|
"--keep-staging-dir",
|
|
action="store_true",
|
|
help="Keep the temporary staged payload directory for inspection.",
|
|
)
|
|
parser.add_argument(
|
|
"--makeself",
|
|
default="makeself",
|
|
help="Path to the makeself executable. Default: makeself from PATH.",
|
|
)
|
|
parser.add_argument(
|
|
"--makeself-header",
|
|
type=Path,
|
|
default=None,
|
|
help="Path to makeself-header.sh. Default: infer from the makeself installation.",
|
|
)
|
|
return parser.parse_args()
|
|
|
|
|
|
def run_command(cmd: list[str], cwd: Path) -> None:
|
|
print("+", " ".join(cmd), flush=True)
|
|
subprocess.run(cmd, cwd=cwd, check=True)
|
|
|
|
|
|
def cargo_profile_output_dir(profile_name: str) -> Path:
|
|
match profile_name:
|
|
case "dev":
|
|
profile_dir = "debug"
|
|
case "release":
|
|
profile_dir = "release"
|
|
case _:
|
|
profile_dir = profile_name
|
|
return CODEX_RS / "target" / profile_dir
|
|
|
|
|
|
def host_executable_name(name: str) -> str:
|
|
if os.name == "nt":
|
|
return f"{name}.exe"
|
|
return name
|
|
|
|
|
|
def should_include_bwrap(mode: str) -> bool:
|
|
match mode:
|
|
case "always":
|
|
return True
|
|
case "never":
|
|
return False
|
|
case "auto":
|
|
return platform.system() == "Linux"
|
|
case _:
|
|
raise ValueError(f"unexpected bwrap mode: {mode}")
|
|
|
|
|
|
def makeself_compression_arg(compression: str) -> str:
|
|
if compression == "none":
|
|
return "--nocomp"
|
|
return f"--{compression}"
|
|
|
|
|
|
def validate_cache_root(cache_root: str) -> None:
|
|
normalized = cache_root.rstrip("/")
|
|
forbidden_roots = {
|
|
"/tmp",
|
|
"/private/tmp",
|
|
"/var/tmp",
|
|
"/var/folders",
|
|
"/private/var/folders",
|
|
"$TMPDIR",
|
|
"${TMPDIR}",
|
|
"${TMPDIR:-/tmp}",
|
|
"${TMPDIR-/tmp}",
|
|
}
|
|
system_temp = Path(tempfile.gettempdir()).resolve()
|
|
forbidden_roots.add(str(system_temp))
|
|
if normalized in forbidden_roots:
|
|
raise RuntimeError(f"Refusing to use temp directory as cache root: {cache_root}")
|
|
|
|
forbidden_prefixes = tuple(f"{root}/" for root in sorted(forbidden_roots))
|
|
if normalized.startswith(forbidden_prefixes):
|
|
raise RuntimeError(f"Refusing to use temp directory as cache root: {cache_root}")
|
|
|
|
|
|
def build_binaries(profile_name: str, include_bwrap: bool, skip_cargo_build: bool) -> None:
|
|
if skip_cargo_build:
|
|
return
|
|
|
|
cmd = ["cargo", "build", "--profile", profile_name, "--bin", "codex"]
|
|
if include_bwrap:
|
|
cmd.extend(["--bin", "bwrap"])
|
|
run_command(cmd, cwd=CODEX_RS)
|
|
|
|
|
|
def require_file(path: Path, description: str) -> None:
|
|
if not path.is_file():
|
|
raise RuntimeError(f"Missing {description}: {path}")
|
|
|
|
|
|
def stage_payload(build_dir: Path, staging_dir: Path, include_bwrap: bool) -> None:
|
|
codex_name = host_executable_name("codex")
|
|
codex_src = build_dir / codex_name
|
|
require_file(codex_src, "codex binary")
|
|
shutil.copy2(codex_src, staging_dir / codex_name)
|
|
|
|
if include_bwrap:
|
|
bwrap_src = build_dir / host_executable_name("bwrap")
|
|
require_file(bwrap_src, "bwrap binary")
|
|
resources_dir = staging_dir / "codex-resources"
|
|
resources_dir.mkdir(parents=True, exist_ok=True)
|
|
shutil.copy2(bwrap_src, resources_dir / host_executable_name("bwrap"))
|
|
|
|
runner = staging_dir / RUNNER_NAME
|
|
runner.write_text(
|
|
"\n".join(
|
|
[
|
|
"#!/bin/sh",
|
|
"set -eu",
|
|
f": > {COMPLETE_SENTINEL}",
|
|
f'exec ./{codex_name} "$@"',
|
|
"",
|
|
]
|
|
),
|
|
encoding="utf-8",
|
|
)
|
|
runner.chmod(0o755)
|
|
|
|
|
|
def iter_staged_files(staging_dir: Path) -> list[Path]:
|
|
return sorted(path for path in staging_dir.rglob("*") if path.is_file())
|
|
|
|
|
|
def hash_staged_tree(staging_dir: Path) -> str:
|
|
digest = hashlib.sha256()
|
|
for path in iter_staged_files(staging_dir):
|
|
relative_path = path.relative_to(staging_dir).as_posix()
|
|
mode = stat.S_IMODE(path.stat().st_mode)
|
|
digest.update(relative_path.encode("utf-8"))
|
|
digest.update(b"\0")
|
|
digest.update(f"{mode:o}".encode("ascii"))
|
|
digest.update(b"\0")
|
|
with path.open("rb") as file:
|
|
for chunk in iter(lambda: file.read(1024 * 1024), b""):
|
|
digest.update(chunk)
|
|
digest.update(b"\0")
|
|
return digest.hexdigest()
|
|
|
|
|
|
def infer_makeself_header(makeself: str) -> Path:
|
|
makeself_path = shutil.which(makeself)
|
|
if makeself_path is None:
|
|
candidate = Path(makeself)
|
|
if candidate.is_file():
|
|
makeself_path = str(candidate)
|
|
else:
|
|
raise RuntimeError(f"Unable to find makeself executable: {makeself}")
|
|
|
|
resolved = Path(makeself_path).resolve()
|
|
candidates = [
|
|
resolved.parent / "makeself-header.sh",
|
|
resolved.parent.parent / "libexec" / "makeself-header.sh",
|
|
resolved.parent.parent / "share" / "makeself" / "makeself-header.sh",
|
|
Path("/usr/libexec/makeself-header.sh"),
|
|
Path("/usr/share/makeself/makeself-header.sh"),
|
|
Path("/usr/lib/makeself/makeself-header.sh"),
|
|
]
|
|
for candidate in candidates:
|
|
if candidate.is_file():
|
|
return candidate
|
|
|
|
raise RuntimeError(
|
|
"Unable to infer makeself-header.sh. Pass --makeself-header with its path."
|
|
)
|
|
|
|
|
|
def write_cached_makeself_header(source_header: Path, output_header: Path) -> None:
|
|
header = source_header.read_text(encoding="utf-8")
|
|
marker = 'if test x"\\$targetdir" = x.; then'
|
|
if marker not in header:
|
|
raise RuntimeError(f"Unable to patch Makeself header; marker not found in {source_header}")
|
|
header = pass_codex_options_through(header, source_header)
|
|
|
|
cache_fast_path = f"""
|
|
# Codex dev artifacts use content-addressed --target directories. On cache hits,
|
|
# run the existing extraction instead of unpacking the payload again.
|
|
if test x"\\$keep" = xy -a x"\\$script" != x -a -f "\\$targetdir/{COMPLETE_SENTINEL}"; then
|
|
cd "\\$targetdir" || {{
|
|
echo "Cannot enter cached target directory \\$targetdir" >&2
|
|
exit 1
|
|
}}
|
|
if test x"\\$quiet" = xn; then
|
|
echo "Using cached extraction in \\$targetdir"
|
|
fi
|
|
res=0
|
|
if test x"\\$verbose" = xy; then
|
|
MS_Printf "OK to execute: \\$script \\$scriptargs \\$* ? [Y/n] "
|
|
read yn
|
|
if test x"\\$yn" = x -o x"\\$yn" = xy -o x"\\$yn" = xY; then
|
|
eval "\\"\\$script\\" \\$scriptargs "\\\\\\$@""; res=\\$?
|
|
fi
|
|
else
|
|
eval "\\"\\$script\\" \\$scriptargs "\\\\\\$@""; res=\\$?
|
|
fi
|
|
exit \\$res
|
|
fi
|
|
|
|
"""
|
|
output_header.write_text(header.replace(marker, cache_fast_path + marker), encoding="utf-8")
|
|
|
|
|
|
def pass_codex_options_through(header: str, source_header: Path) -> str:
|
|
header = header.replace("-h | --help)", "--makeself-help)")
|
|
header = header.replace(
|
|
"\\$0 --help Print this message",
|
|
"\\$0 --makeself-help Print this message",
|
|
)
|
|
|
|
unrecognized_flag_block = """ -*)
|
|
\techo Unrecognized flag : "\\$1" >&2
|
|
\tMS_Help
|
|
\texit 1
|
|
\t;;"""
|
|
if unrecognized_flag_block not in header:
|
|
raise RuntimeError(
|
|
f"Unable to patch Makeself option parser; marker not found in {source_header}"
|
|
)
|
|
|
|
return header.replace(
|
|
unrecognized_flag_block,
|
|
""" -*)
|
|
\tbreak
|
|
\t;;""",
|
|
)
|
|
|
|
|
|
def build_archive(
|
|
makeself: str,
|
|
compression: str,
|
|
header: Path,
|
|
staging_dir: Path,
|
|
output_path: Path,
|
|
target_dir: str,
|
|
tree_hash: str,
|
|
) -> None:
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
label = f"Codex dev build {tree_hash[:12]}"
|
|
cmd = [
|
|
makeself,
|
|
makeself_compression_arg(compression),
|
|
"--sha256",
|
|
"--packaging-date",
|
|
f"content-sha256:{tree_hash}",
|
|
"--header",
|
|
str(header),
|
|
"--target",
|
|
target_dir,
|
|
str(staging_dir),
|
|
str(output_path),
|
|
label,
|
|
f"./{RUNNER_NAME}",
|
|
]
|
|
run_command(cmd, cwd=REPO_ROOT)
|
|
|
|
|
|
def default_output_path() -> Path:
|
|
return REPO_ROOT / "dist" / "codex-dev" / "codex-dev.run"
|
|
|
|
|
|
def main() -> int:
|
|
args = parse_args()
|
|
validate_cache_root(args.cache_root)
|
|
include_bwrap = should_include_bwrap(args.include_bwrap)
|
|
build_binaries(args.profile, include_bwrap, args.skip_cargo_build)
|
|
|
|
build_dir = cargo_profile_output_dir(args.profile)
|
|
output_path = args.output or default_output_path()
|
|
makeself_header = args.makeself_header or infer_makeself_header(args.makeself)
|
|
require_file(makeself_header, "makeself header")
|
|
|
|
with tempfile.TemporaryDirectory(prefix="codex-makeself-") as temp_root_name:
|
|
temp_root = Path(temp_root_name)
|
|
staging_dir = temp_root / "payload"
|
|
staging_dir.mkdir()
|
|
patched_header = temp_root / "makeself-header-codex-cache.sh"
|
|
|
|
stage_payload(build_dir, staging_dir, include_bwrap)
|
|
tree_hash = hash_staged_tree(staging_dir)
|
|
target_dir = f"{args.cache_root.rstrip('/')}/sha256-{tree_hash}"
|
|
write_cached_makeself_header(makeself_header, patched_header)
|
|
build_archive(
|
|
args.makeself,
|
|
args.compression,
|
|
patched_header,
|
|
staging_dir,
|
|
output_path,
|
|
target_dir,
|
|
tree_hash,
|
|
)
|
|
|
|
if args.keep_staging_dir:
|
|
kept_staging_dir = output_path.parent / f"payload-sha256-{tree_hash[:12]}"
|
|
if kept_staging_dir.exists():
|
|
shutil.rmtree(kept_staging_dir)
|
|
shutil.copytree(staging_dir, kept_staging_dir, copy_function=shutil.copy2)
|
|
print(f"Kept staged payload at {kept_staging_dir}")
|
|
|
|
print(f"Wrote {output_path}")
|
|
print(f"Payload sha256: {tree_hash}")
|
|
print(f"Runtime cache target: {target_dir}")
|
|
if not include_bwrap:
|
|
print("bwrap was not bundled; pass --include-bwrap=always to require it.")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
try:
|
|
sys.exit(main())
|
|
except RuntimeError as exc:
|
|
print(f"error: {exc}", file=sys.stderr)
|
|
sys.exit(1)
|