mirror of
https://github.com/openai/codex.git
synced 2026-05-23 04:24:21 +00:00
## Why Installing `@openai/codex` currently places a Dotslash `rg` manifest at `node_modules/@openai/codex/bin/rg`, even though the native optional dependency already ships the actual helper under `vendor/<target>/codex-path/rg`. The launcher prepends that `codex-path` directory, so the top-level `bin/rg` file is redundant in the npm install. The remaining direct consumers of the manifest are package-building paths: `scripts/codex_package/ripgrep.py` and `codex-cli/scripts/install_native_deps.py`. Keeping the manifest under `codex-cli/bin` makes it look like a shipped npm binary, so this moves it next to the package-builder code that owns it. The checked-in `@openai/codex` package metadata should likewise describe only the meta package payload; generated platform packages continue to publish `vendor`. ## What Changed - Moved the Dotslash ripgrep manifest from `codex-cli/bin/rg` to `scripts/codex_package/rg`. - Updated the package builder, npm native-artifact hydrator, README, and CLI help text to reference the new manifest location. - Stopped `codex-cli/scripts/build_npm_package.py` from copying `rg` into the `@openai/codex` meta package. - Narrowed the checked-in meta package `files` whitelist to `bin/codex.js`. ## Verification - `python3 -m unittest discover -s scripts/codex_package -p "test_*.py"` - `python3 -m unittest discover -s codex-cli/scripts -p "test_*.py"` - `python3 -m py_compile codex-cli/scripts/build_npm_package.py codex-cli/scripts/install_native_deps.py scripts/codex_package/ripgrep.py scripts/codex_package/cli.py scripts/stage_npm_packages.py` - `codex-cli/scripts/build_npm_package.py --package codex --version 0.0.0-test --pack-output <tmp>/codex-meta-no-vendor.tgz` - `tar -tf <tmp>/codex-meta-no-vendor.tgz` showed only `package/bin/codex.js`, `package/package.json`, and `package/README.md`. - Direct staging check showed `codex` uses `files: ["bin/codex.js"]` while `codex-darwin-arm64` still uses `files: ["vendor"]`. --- [//]: # (BEGIN SAPLING FOOTER) Stack created with [Sapling](https://sapling-scm.com). Best reviewed with [ReviewStack](https://reviewstack.dev/openai/codex/pull/23833). * #23836 * __->__ #23833
196 lines
6.1 KiB
Python
196 lines
6.1 KiB
Python
"""Fetch ripgrep from the DotSlash manifest used by the package builder."""
|
|
|
|
import hashlib
|
|
import json
|
|
import shutil
|
|
import stat
|
|
import tarfile
|
|
import tempfile
|
|
import zipfile
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from urllib.parse import urlparse
|
|
from urllib.request import urlopen
|
|
|
|
from .targets import REPO_ROOT
|
|
from .targets import TargetSpec
|
|
from .targets import resolve_input_path
|
|
|
|
|
|
RG_MANIFEST = REPO_ROOT / "scripts" / "codex_package" / "rg"
|
|
DOWNLOAD_TIMEOUT_SECS = 60
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class RgArtifact:
|
|
size: int
|
|
digest: str
|
|
archive_format: str
|
|
archive_member: str
|
|
url: str
|
|
|
|
|
|
def resolve_rg_bin(spec: TargetSpec, rg_bin: Path | None) -> Path:
|
|
if rg_bin is not None:
|
|
return resolve_input_path(rg_bin, "ripgrep executable", "--rg-bin")
|
|
|
|
return fetch_rg(spec)
|
|
|
|
|
|
def fetch_rg(
|
|
spec: TargetSpec,
|
|
*,
|
|
manifest_path: Path = RG_MANIFEST,
|
|
cache_root: Path | None = None,
|
|
) -> Path:
|
|
artifact = artifact_for_target(spec, manifest_path)
|
|
cache_dir = (cache_root or default_cache_root()) / f"{spec.target}-rg"
|
|
archive_path = cache_dir / archive_filename(artifact.url)
|
|
|
|
if not archive_is_valid(archive_path, artifact):
|
|
download_archive(artifact.url, archive_path)
|
|
try:
|
|
verify_archive(archive_path, artifact)
|
|
except RuntimeError:
|
|
archive_path.unlink(missing_ok=True)
|
|
raise
|
|
|
|
dest = cache_dir / spec.rg_name
|
|
extract_rg(archive_path, artifact, dest)
|
|
if not spec.is_windows:
|
|
mode = dest.stat().st_mode
|
|
dest.chmod(mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
|
|
return dest
|
|
|
|
|
|
def artifact_for_target(spec: TargetSpec, manifest_path: Path) -> RgArtifact:
|
|
manifest = load_manifest(manifest_path)
|
|
try:
|
|
platform_info = manifest["platforms"][spec.dotslash_platform]
|
|
except KeyError as exc:
|
|
raise RuntimeError(
|
|
f"ripgrep manifest {manifest_path} is missing platform {spec.dotslash_platform!r}"
|
|
) from exc
|
|
|
|
providers = platform_info.get("providers")
|
|
if not providers:
|
|
raise RuntimeError(
|
|
f"ripgrep manifest {manifest_path} has no providers for {spec.dotslash_platform!r}"
|
|
)
|
|
|
|
hash_name = platform_info.get("hash")
|
|
if hash_name != "sha256":
|
|
raise RuntimeError(
|
|
f"Unsupported ripgrep hash {hash_name!r} for "
|
|
f"{spec.dotslash_platform!r}; expected sha256"
|
|
)
|
|
|
|
return RgArtifact(
|
|
size=int(platform_info["size"]),
|
|
digest=str(platform_info["digest"]),
|
|
archive_format=str(platform_info["format"]),
|
|
archive_member=str(platform_info["path"]),
|
|
url=str(providers[0]["url"]),
|
|
)
|
|
|
|
|
|
def load_manifest(manifest_path: Path) -> dict:
|
|
text = manifest_path.read_text(encoding="utf-8")
|
|
if text.startswith("#!"):
|
|
text = "\n".join(text.splitlines()[1:])
|
|
return json.loads(text)
|
|
|
|
|
|
def default_cache_root() -> Path:
|
|
return Path(tempfile.gettempdir()) / "codex-package"
|
|
|
|
|
|
def archive_filename(url: str) -> str:
|
|
filename = Path(urlparse(url).path).name
|
|
if not filename:
|
|
raise RuntimeError(f"Unable to determine archive filename from {url}")
|
|
return filename
|
|
|
|
|
|
def archive_is_valid(archive_path: Path, artifact: RgArtifact) -> bool:
|
|
if not archive_path.is_file():
|
|
return False
|
|
try:
|
|
verify_archive(archive_path, artifact)
|
|
except RuntimeError:
|
|
archive_path.unlink(missing_ok=True)
|
|
return False
|
|
return True
|
|
|
|
|
|
def verify_archive(archive_path: Path, artifact: RgArtifact) -> None:
|
|
actual_size = archive_path.stat().st_size
|
|
if actual_size != artifact.size:
|
|
raise RuntimeError(
|
|
f"ripgrep archive {archive_path} has size {actual_size}, expected {artifact.size}"
|
|
)
|
|
|
|
digest = hashlib.sha256()
|
|
with open(archive_path, "rb") as fh:
|
|
for chunk in iter(lambda: fh.read(1024 * 1024), b""):
|
|
digest.update(chunk)
|
|
|
|
actual_digest = digest.hexdigest()
|
|
if actual_digest != artifact.digest:
|
|
raise RuntimeError(
|
|
f"ripgrep archive {archive_path} has sha256 {actual_digest}, "
|
|
f"expected {artifact.digest}"
|
|
)
|
|
|
|
|
|
def download_archive(url: str, archive_path: Path) -> None:
|
|
archive_path.parent.mkdir(parents=True, exist_ok=True)
|
|
temp_path = archive_path.with_suffix(f"{archive_path.suffix}.tmp")
|
|
temp_path.unlink(missing_ok=True)
|
|
try:
|
|
with urlopen(url, timeout=DOWNLOAD_TIMEOUT_SECS) as response:
|
|
with open(temp_path, "wb") as out:
|
|
shutil.copyfileobj(response, out)
|
|
temp_path.replace(archive_path)
|
|
finally:
|
|
temp_path.unlink(missing_ok=True)
|
|
|
|
|
|
def extract_rg(archive_path: Path, artifact: RgArtifact, dest: Path) -> None:
|
|
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
dest.unlink(missing_ok=True)
|
|
|
|
if artifact.archive_format == "tar.gz":
|
|
with tarfile.open(archive_path, "r:gz") as archive:
|
|
try:
|
|
member = archive.getmember(artifact.archive_member)
|
|
except KeyError as exc:
|
|
raise RuntimeError(
|
|
f"ripgrep archive {archive_path} is missing {artifact.archive_member!r}"
|
|
) from exc
|
|
|
|
extracted = archive.extractfile(member)
|
|
if extracted is None:
|
|
raise RuntimeError(
|
|
f"ripgrep archive member {artifact.archive_member!r} is not a file"
|
|
)
|
|
with extracted, open(dest, "wb") as out:
|
|
shutil.copyfileobj(extracted, out)
|
|
return
|
|
|
|
if artifact.archive_format == "zip":
|
|
with zipfile.ZipFile(archive_path) as archive:
|
|
try:
|
|
with archive.open(artifact.archive_member) as extracted:
|
|
with open(dest, "wb") as out:
|
|
shutil.copyfileobj(extracted, out)
|
|
except KeyError as exc:
|
|
raise RuntimeError(
|
|
f"ripgrep archive {archive_path} is missing {artifact.archive_member!r}"
|
|
) from exc
|
|
return
|
|
|
|
raise RuntimeError(
|
|
f"Unsupported ripgrep archive format {artifact.archive_format!r}; expected tar.gz or zip"
|
|
)
|