Files
codex/scripts/codex_package/ripgrep.py
Michael Bolin c07f66c9ec packaging: move rg manifest out of npm bin (#23833)
## Why

Installing `@openai/codex` currently places a Dotslash `rg` manifest at
`node_modules/@openai/codex/bin/rg`, even though the native optional
dependency already ships the actual helper under
`vendor/<target>/codex-path/rg`. The launcher prepends that `codex-path`
directory, so the top-level `bin/rg` file is redundant in the npm
install.

The remaining direct consumers of the manifest are package-building
paths: `scripts/codex_package/ripgrep.py` and
`codex-cli/scripts/install_native_deps.py`. Keeping the manifest under
`codex-cli/bin` makes it look like a shipped npm binary, so this moves
it next to the package-builder code that owns it. The checked-in
`@openai/codex` package metadata should likewise describe only the meta
package payload; generated platform packages continue to publish
`vendor`.

## What Changed

- Moved the Dotslash ripgrep manifest from `codex-cli/bin/rg` to
`scripts/codex_package/rg`.
- Updated the package builder, npm native-artifact hydrator, README, and
CLI help text to reference the new manifest location.
- Stopped `codex-cli/scripts/build_npm_package.py` from copying `rg`
into the `@openai/codex` meta package.
- Narrowed the checked-in meta package `files` whitelist to
`bin/codex.js`.

## Verification

- `python3 -m unittest discover -s scripts/codex_package -p "test_*.py"`
- `python3 -m unittest discover -s codex-cli/scripts -p "test_*.py"`
- `python3 -m py_compile codex-cli/scripts/build_npm_package.py
codex-cli/scripts/install_native_deps.py
scripts/codex_package/ripgrep.py scripts/codex_package/cli.py
scripts/stage_npm_packages.py`
- `codex-cli/scripts/build_npm_package.py --package codex --version
0.0.0-test --pack-output <tmp>/codex-meta-no-vendor.tgz`
- `tar -tf <tmp>/codex-meta-no-vendor.tgz` showed only
`package/bin/codex.js`, `package/package.json`, and `package/README.md`.
- Direct staging check showed `codex` uses `files: ["bin/codex.js"]`
while `codex-darwin-arm64` still uses `files: ["vendor"]`.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/23833).
* #23836
* __->__ #23833
2026-05-21 15:48:42 +00:00

196 lines
6.1 KiB
Python

"""Fetch ripgrep from the DotSlash manifest used by the package builder."""
import hashlib
import json
import shutil
import stat
import tarfile
import tempfile
import zipfile
from dataclasses import dataclass
from pathlib import Path
from urllib.parse import urlparse
from urllib.request import urlopen
from .targets import REPO_ROOT
from .targets import TargetSpec
from .targets import resolve_input_path
RG_MANIFEST = REPO_ROOT / "scripts" / "codex_package" / "rg"
DOWNLOAD_TIMEOUT_SECS = 60
@dataclass(frozen=True)
class RgArtifact:
size: int
digest: str
archive_format: str
archive_member: str
url: str
def resolve_rg_bin(spec: TargetSpec, rg_bin: Path | None) -> Path:
if rg_bin is not None:
return resolve_input_path(rg_bin, "ripgrep executable", "--rg-bin")
return fetch_rg(spec)
def fetch_rg(
spec: TargetSpec,
*,
manifest_path: Path = RG_MANIFEST,
cache_root: Path | None = None,
) -> Path:
artifact = artifact_for_target(spec, manifest_path)
cache_dir = (cache_root or default_cache_root()) / f"{spec.target}-rg"
archive_path = cache_dir / archive_filename(artifact.url)
if not archive_is_valid(archive_path, artifact):
download_archive(artifact.url, archive_path)
try:
verify_archive(archive_path, artifact)
except RuntimeError:
archive_path.unlink(missing_ok=True)
raise
dest = cache_dir / spec.rg_name
extract_rg(archive_path, artifact, dest)
if not spec.is_windows:
mode = dest.stat().st_mode
dest.chmod(mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
return dest
def artifact_for_target(spec: TargetSpec, manifest_path: Path) -> RgArtifact:
manifest = load_manifest(manifest_path)
try:
platform_info = manifest["platforms"][spec.dotslash_platform]
except KeyError as exc:
raise RuntimeError(
f"ripgrep manifest {manifest_path} is missing platform {spec.dotslash_platform!r}"
) from exc
providers = platform_info.get("providers")
if not providers:
raise RuntimeError(
f"ripgrep manifest {manifest_path} has no providers for {spec.dotslash_platform!r}"
)
hash_name = platform_info.get("hash")
if hash_name != "sha256":
raise RuntimeError(
f"Unsupported ripgrep hash {hash_name!r} for "
f"{spec.dotslash_platform!r}; expected sha256"
)
return RgArtifact(
size=int(platform_info["size"]),
digest=str(platform_info["digest"]),
archive_format=str(platform_info["format"]),
archive_member=str(platform_info["path"]),
url=str(providers[0]["url"]),
)
def load_manifest(manifest_path: Path) -> dict:
text = manifest_path.read_text(encoding="utf-8")
if text.startswith("#!"):
text = "\n".join(text.splitlines()[1:])
return json.loads(text)
def default_cache_root() -> Path:
return Path(tempfile.gettempdir()) / "codex-package"
def archive_filename(url: str) -> str:
filename = Path(urlparse(url).path).name
if not filename:
raise RuntimeError(f"Unable to determine archive filename from {url}")
return filename
def archive_is_valid(archive_path: Path, artifact: RgArtifact) -> bool:
if not archive_path.is_file():
return False
try:
verify_archive(archive_path, artifact)
except RuntimeError:
archive_path.unlink(missing_ok=True)
return False
return True
def verify_archive(archive_path: Path, artifact: RgArtifact) -> None:
actual_size = archive_path.stat().st_size
if actual_size != artifact.size:
raise RuntimeError(
f"ripgrep archive {archive_path} has size {actual_size}, expected {artifact.size}"
)
digest = hashlib.sha256()
with open(archive_path, "rb") as fh:
for chunk in iter(lambda: fh.read(1024 * 1024), b""):
digest.update(chunk)
actual_digest = digest.hexdigest()
if actual_digest != artifact.digest:
raise RuntimeError(
f"ripgrep archive {archive_path} has sha256 {actual_digest}, "
f"expected {artifact.digest}"
)
def download_archive(url: str, archive_path: Path) -> None:
archive_path.parent.mkdir(parents=True, exist_ok=True)
temp_path = archive_path.with_suffix(f"{archive_path.suffix}.tmp")
temp_path.unlink(missing_ok=True)
try:
with urlopen(url, timeout=DOWNLOAD_TIMEOUT_SECS) as response:
with open(temp_path, "wb") as out:
shutil.copyfileobj(response, out)
temp_path.replace(archive_path)
finally:
temp_path.unlink(missing_ok=True)
def extract_rg(archive_path: Path, artifact: RgArtifact, dest: Path) -> None:
dest.parent.mkdir(parents=True, exist_ok=True)
dest.unlink(missing_ok=True)
if artifact.archive_format == "tar.gz":
with tarfile.open(archive_path, "r:gz") as archive:
try:
member = archive.getmember(artifact.archive_member)
except KeyError as exc:
raise RuntimeError(
f"ripgrep archive {archive_path} is missing {artifact.archive_member!r}"
) from exc
extracted = archive.extractfile(member)
if extracted is None:
raise RuntimeError(
f"ripgrep archive member {artifact.archive_member!r} is not a file"
)
with extracted, open(dest, "wb") as out:
shutil.copyfileobj(extracted, out)
return
if artifact.archive_format == "zip":
with zipfile.ZipFile(archive_path) as archive:
try:
with archive.open(artifact.archive_member) as extracted:
with open(dest, "wb") as out:
shutil.copyfileobj(extracted, out)
except KeyError as exc:
raise RuntimeError(
f"ripgrep archive {archive_path} is missing {artifact.archive_member!r}"
) from exc
return
raise RuntimeError(
f"Unsupported ripgrep archive format {artifact.archive_format!r}; expected tar.gz or zip"
)