mirror of
https://github.com/openai/codex.git
synced 2026-05-23 12:34:25 +00:00
package: factor DotSlash executable fetching
Summary: - move the shared DotSlash archive download/cache/verify logic into scripts/codex_package/dotslash.py - update ripgrep packaging to use the shared helper while preserving existing cache keys and validation behavior Test Plan: - python3 -m py_compile scripts/codex_package/dotslash.py scripts/codex_package/ripgrep.py - python3 -m unittest discover scripts/codex_package
This commit is contained in:
223
scripts/codex_package/dotslash.py
Normal file
223
scripts/codex_package/dotslash.py
Normal file
@@ -0,0 +1,223 @@
|
||||
"""Fetch executable artifacts from checked-in DotSlash manifests."""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import shutil
|
||||
import stat
|
||||
import tarfile
|
||||
import tempfile
|
||||
import zipfile
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
from urllib.request import urlopen
|
||||
|
||||
from .targets import TargetSpec
|
||||
|
||||
|
||||
DOWNLOAD_TIMEOUT_SECS = 60
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DotSlashArtifact:
|
||||
size: int
|
||||
digest: str
|
||||
archive_format: str
|
||||
archive_member: str
|
||||
url: str
|
||||
|
||||
|
||||
def fetch_dotslash_executable(
|
||||
spec: TargetSpec,
|
||||
*,
|
||||
manifest_path: Path,
|
||||
artifact_label: str,
|
||||
cache_key: str,
|
||||
dest_name: str,
|
||||
executable: bool,
|
||||
missing_ok: bool = False,
|
||||
) -> Path | None:
|
||||
artifact = artifact_for_target(
|
||||
spec,
|
||||
manifest_path,
|
||||
artifact_label=artifact_label,
|
||||
missing_ok=missing_ok,
|
||||
)
|
||||
if artifact is None:
|
||||
return None
|
||||
|
||||
cache_dir = default_cache_root() / cache_key
|
||||
archive_path = cache_dir / archive_filename(artifact.url)
|
||||
|
||||
if not archive_is_valid(archive_path, artifact, artifact_label):
|
||||
download_archive(artifact.url, archive_path)
|
||||
try:
|
||||
verify_archive(archive_path, artifact, artifact_label)
|
||||
except RuntimeError:
|
||||
archive_path.unlink(missing_ok=True)
|
||||
raise
|
||||
|
||||
dest = cache_dir / dest_name
|
||||
extract_archive_member(archive_path, artifact, dest, artifact_label)
|
||||
if executable:
|
||||
mode = dest.stat().st_mode
|
||||
dest.chmod(mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
|
||||
return dest
|
||||
|
||||
|
||||
def artifact_for_target(
|
||||
spec: TargetSpec,
|
||||
manifest_path: Path,
|
||||
*,
|
||||
artifact_label: str,
|
||||
missing_ok: bool = False,
|
||||
) -> DotSlashArtifact | None:
|
||||
manifest = load_manifest(manifest_path)
|
||||
platform_info = manifest.get("platforms", {}).get(spec.dotslash_platform)
|
||||
if platform_info is None:
|
||||
if missing_ok:
|
||||
return None
|
||||
raise RuntimeError(
|
||||
f"{artifact_label} manifest {manifest_path} is missing platform "
|
||||
f"{spec.dotslash_platform!r}"
|
||||
)
|
||||
|
||||
providers = platform_info.get("providers")
|
||||
if not providers:
|
||||
raise RuntimeError(
|
||||
f"{artifact_label} manifest {manifest_path} has no providers for "
|
||||
f"{spec.dotslash_platform!r}"
|
||||
)
|
||||
|
||||
hash_name = platform_info.get("hash")
|
||||
if hash_name != "sha256":
|
||||
raise RuntimeError(
|
||||
f"Unsupported {artifact_label} hash {hash_name!r} for "
|
||||
f"{spec.dotslash_platform!r}; expected sha256"
|
||||
)
|
||||
|
||||
return DotSlashArtifact(
|
||||
size=int(platform_info["size"]),
|
||||
digest=str(platform_info["digest"]),
|
||||
archive_format=str(platform_info["format"]),
|
||||
archive_member=str(platform_info["path"]),
|
||||
url=str(providers[0]["url"]),
|
||||
)
|
||||
|
||||
|
||||
def load_manifest(manifest_path: Path) -> dict:
|
||||
text = manifest_path.read_text(encoding="utf-8")
|
||||
if text.startswith("#!"):
|
||||
text = "\n".join(text.splitlines()[1:])
|
||||
return json.loads(text)
|
||||
|
||||
|
||||
def default_cache_root() -> Path:
|
||||
return Path(tempfile.gettempdir()) / "codex-package"
|
||||
|
||||
|
||||
def archive_filename(url: str) -> str:
|
||||
filename = Path(urlparse(url).path).name
|
||||
if not filename:
|
||||
raise RuntimeError(f"Unable to determine archive filename from {url}")
|
||||
return filename
|
||||
|
||||
|
||||
def archive_is_valid(
|
||||
archive_path: Path,
|
||||
artifact: DotSlashArtifact,
|
||||
artifact_label: str,
|
||||
) -> bool:
|
||||
if not archive_path.is_file():
|
||||
return False
|
||||
try:
|
||||
verify_archive(archive_path, artifact, artifact_label)
|
||||
except RuntimeError:
|
||||
archive_path.unlink(missing_ok=True)
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def verify_archive(
|
||||
archive_path: Path,
|
||||
artifact: DotSlashArtifact,
|
||||
artifact_label: str,
|
||||
) -> None:
|
||||
actual_size = archive_path.stat().st_size
|
||||
if actual_size != artifact.size:
|
||||
raise RuntimeError(
|
||||
f"{artifact_label} archive {archive_path} has size {actual_size}, "
|
||||
f"expected {artifact.size}"
|
||||
)
|
||||
|
||||
digest = hashlib.sha256()
|
||||
with open(archive_path, "rb") as fh:
|
||||
for chunk in iter(lambda: fh.read(1024 * 1024), b""):
|
||||
digest.update(chunk)
|
||||
|
||||
actual_digest = digest.hexdigest()
|
||||
if actual_digest != artifact.digest:
|
||||
raise RuntimeError(
|
||||
f"{artifact_label} archive {archive_path} has sha256 {actual_digest}, "
|
||||
f"expected {artifact.digest}"
|
||||
)
|
||||
|
||||
|
||||
def download_archive(url: str, archive_path: Path) -> None:
|
||||
archive_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
temp_path = archive_path.with_suffix(f"{archive_path.suffix}.tmp")
|
||||
temp_path.unlink(missing_ok=True)
|
||||
try:
|
||||
with urlopen(url, timeout=DOWNLOAD_TIMEOUT_SECS) as response:
|
||||
with open(temp_path, "wb") as out:
|
||||
shutil.copyfileobj(response, out)
|
||||
temp_path.replace(archive_path)
|
||||
finally:
|
||||
temp_path.unlink(missing_ok=True)
|
||||
|
||||
|
||||
def extract_archive_member(
|
||||
archive_path: Path,
|
||||
artifact: DotSlashArtifact,
|
||||
dest: Path,
|
||||
artifact_label: str,
|
||||
) -> None:
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
dest.unlink(missing_ok=True)
|
||||
|
||||
if artifact.archive_format == "tar.gz":
|
||||
with tarfile.open(archive_path, "r:gz") as archive:
|
||||
try:
|
||||
member = archive.getmember(artifact.archive_member)
|
||||
except KeyError as exc:
|
||||
raise RuntimeError(
|
||||
f"{artifact_label} archive {archive_path} is missing "
|
||||
f"{artifact.archive_member!r}"
|
||||
) from exc
|
||||
|
||||
extracted = archive.extractfile(member)
|
||||
if extracted is None:
|
||||
raise RuntimeError(
|
||||
f"{artifact_label} archive member {artifact.archive_member!r} is not a file"
|
||||
)
|
||||
with extracted, open(dest, "wb") as out:
|
||||
shutil.copyfileobj(extracted, out)
|
||||
return
|
||||
|
||||
if artifact.archive_format == "zip":
|
||||
with zipfile.ZipFile(archive_path) as archive:
|
||||
try:
|
||||
with archive.open(artifact.archive_member) as extracted:
|
||||
with open(dest, "wb") as out:
|
||||
shutil.copyfileobj(extracted, out)
|
||||
except KeyError as exc:
|
||||
raise RuntimeError(
|
||||
f"{artifact_label} archive {archive_path} is missing "
|
||||
f"{artifact.archive_member!r}"
|
||||
) from exc
|
||||
return
|
||||
|
||||
raise RuntimeError(
|
||||
f"Unsupported {artifact_label} archive format {artifact.archive_format!r}; "
|
||||
"expected tar.gz or zip"
|
||||
)
|
||||
@@ -1,33 +1,14 @@
|
||||
"""Fetch ripgrep from the DotSlash manifest used by the package builder."""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import shutil
|
||||
import stat
|
||||
import tarfile
|
||||
import tempfile
|
||||
import zipfile
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
from urllib.request import urlopen
|
||||
|
||||
from .dotslash import fetch_dotslash_executable
|
||||
from .targets import REPO_ROOT
|
||||
from .targets import TargetSpec
|
||||
from .targets import resolve_input_path
|
||||
|
||||
|
||||
RG_MANIFEST = REPO_ROOT / "scripts" / "codex_package" / "rg"
|
||||
DOWNLOAD_TIMEOUT_SECS = 60
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RgArtifact:
|
||||
size: int
|
||||
digest: str
|
||||
archive_format: str
|
||||
archive_member: str
|
||||
url: str
|
||||
|
||||
|
||||
def resolve_rg_bin(spec: TargetSpec, rg_bin: Path | None) -> Path:
|
||||
@@ -41,155 +22,15 @@ def fetch_rg(
|
||||
spec: TargetSpec,
|
||||
*,
|
||||
manifest_path: Path = RG_MANIFEST,
|
||||
cache_root: Path | None = None,
|
||||
) -> Path:
|
||||
artifact = artifact_for_target(spec, manifest_path)
|
||||
cache_dir = (cache_root or default_cache_root()) / f"{spec.target}-rg"
|
||||
archive_path = cache_dir / archive_filename(artifact.url)
|
||||
|
||||
if not archive_is_valid(archive_path, artifact):
|
||||
download_archive(artifact.url, archive_path)
|
||||
try:
|
||||
verify_archive(archive_path, artifact)
|
||||
except RuntimeError:
|
||||
archive_path.unlink(missing_ok=True)
|
||||
raise
|
||||
|
||||
dest = cache_dir / spec.rg_name
|
||||
extract_rg(archive_path, artifact, dest)
|
||||
if not spec.is_windows:
|
||||
mode = dest.stat().st_mode
|
||||
dest.chmod(mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
|
||||
return dest
|
||||
|
||||
|
||||
def artifact_for_target(spec: TargetSpec, manifest_path: Path) -> RgArtifact:
|
||||
manifest = load_manifest(manifest_path)
|
||||
try:
|
||||
platform_info = manifest["platforms"][spec.dotslash_platform]
|
||||
except KeyError as exc:
|
||||
raise RuntimeError(
|
||||
f"ripgrep manifest {manifest_path} is missing platform {spec.dotslash_platform!r}"
|
||||
) from exc
|
||||
|
||||
providers = platform_info.get("providers")
|
||||
if not providers:
|
||||
raise RuntimeError(
|
||||
f"ripgrep manifest {manifest_path} has no providers for {spec.dotslash_platform!r}"
|
||||
)
|
||||
|
||||
hash_name = platform_info.get("hash")
|
||||
if hash_name != "sha256":
|
||||
raise RuntimeError(
|
||||
f"Unsupported ripgrep hash {hash_name!r} for "
|
||||
f"{spec.dotslash_platform!r}; expected sha256"
|
||||
)
|
||||
|
||||
return RgArtifact(
|
||||
size=int(platform_info["size"]),
|
||||
digest=str(platform_info["digest"]),
|
||||
archive_format=str(platform_info["format"]),
|
||||
archive_member=str(platform_info["path"]),
|
||||
url=str(providers[0]["url"]),
|
||||
)
|
||||
|
||||
|
||||
def load_manifest(manifest_path: Path) -> dict:
|
||||
text = manifest_path.read_text(encoding="utf-8")
|
||||
if text.startswith("#!"):
|
||||
text = "\n".join(text.splitlines()[1:])
|
||||
return json.loads(text)
|
||||
|
||||
|
||||
def default_cache_root() -> Path:
|
||||
return Path(tempfile.gettempdir()) / "codex-package"
|
||||
|
||||
|
||||
def archive_filename(url: str) -> str:
|
||||
filename = Path(urlparse(url).path).name
|
||||
if not filename:
|
||||
raise RuntimeError(f"Unable to determine archive filename from {url}")
|
||||
return filename
|
||||
|
||||
|
||||
def archive_is_valid(archive_path: Path, artifact: RgArtifact) -> bool:
|
||||
if not archive_path.is_file():
|
||||
return False
|
||||
try:
|
||||
verify_archive(archive_path, artifact)
|
||||
except RuntimeError:
|
||||
archive_path.unlink(missing_ok=True)
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def verify_archive(archive_path: Path, artifact: RgArtifact) -> None:
|
||||
actual_size = archive_path.stat().st_size
|
||||
if actual_size != artifact.size:
|
||||
raise RuntimeError(
|
||||
f"ripgrep archive {archive_path} has size {actual_size}, expected {artifact.size}"
|
||||
)
|
||||
|
||||
digest = hashlib.sha256()
|
||||
with open(archive_path, "rb") as fh:
|
||||
for chunk in iter(lambda: fh.read(1024 * 1024), b""):
|
||||
digest.update(chunk)
|
||||
|
||||
actual_digest = digest.hexdigest()
|
||||
if actual_digest != artifact.digest:
|
||||
raise RuntimeError(
|
||||
f"ripgrep archive {archive_path} has sha256 {actual_digest}, "
|
||||
f"expected {artifact.digest}"
|
||||
)
|
||||
|
||||
|
||||
def download_archive(url: str, archive_path: Path) -> None:
|
||||
archive_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
temp_path = archive_path.with_suffix(f"{archive_path.suffix}.tmp")
|
||||
temp_path.unlink(missing_ok=True)
|
||||
try:
|
||||
with urlopen(url, timeout=DOWNLOAD_TIMEOUT_SECS) as response:
|
||||
with open(temp_path, "wb") as out:
|
||||
shutil.copyfileobj(response, out)
|
||||
temp_path.replace(archive_path)
|
||||
finally:
|
||||
temp_path.unlink(missing_ok=True)
|
||||
|
||||
|
||||
def extract_rg(archive_path: Path, artifact: RgArtifact, dest: Path) -> None:
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
dest.unlink(missing_ok=True)
|
||||
|
||||
if artifact.archive_format == "tar.gz":
|
||||
with tarfile.open(archive_path, "r:gz") as archive:
|
||||
try:
|
||||
member = archive.getmember(artifact.archive_member)
|
||||
except KeyError as exc:
|
||||
raise RuntimeError(
|
||||
f"ripgrep archive {archive_path} is missing {artifact.archive_member!r}"
|
||||
) from exc
|
||||
|
||||
extracted = archive.extractfile(member)
|
||||
if extracted is None:
|
||||
raise RuntimeError(
|
||||
f"ripgrep archive member {artifact.archive_member!r} is not a file"
|
||||
)
|
||||
with extracted, open(dest, "wb") as out:
|
||||
shutil.copyfileobj(extracted, out)
|
||||
return
|
||||
|
||||
if artifact.archive_format == "zip":
|
||||
with zipfile.ZipFile(archive_path) as archive:
|
||||
try:
|
||||
with archive.open(artifact.archive_member) as extracted:
|
||||
with open(dest, "wb") as out:
|
||||
shutil.copyfileobj(extracted, out)
|
||||
except KeyError as exc:
|
||||
raise RuntimeError(
|
||||
f"ripgrep archive {archive_path} is missing {artifact.archive_member!r}"
|
||||
) from exc
|
||||
return
|
||||
|
||||
raise RuntimeError(
|
||||
f"Unsupported ripgrep archive format {artifact.archive_format!r}; expected tar.gz or zip"
|
||||
rg_bin = fetch_dotslash_executable(
|
||||
spec,
|
||||
manifest_path=manifest_path,
|
||||
artifact_label="ripgrep",
|
||||
cache_key=f"{spec.target}-rg",
|
||||
dest_name=spec.rg_name,
|
||||
executable=not spec.is_windows,
|
||||
)
|
||||
if rg_bin is None:
|
||||
raise AssertionError("ripgrep is required for all package targets")
|
||||
return rg_bin
|
||||
|
||||
Reference in New Issue
Block a user