feat(linux-sandbox): add bwrap support (#9938)

## Summary
This PR introduces a gated Bubblewrap (bwrap) Linux sandbox path. The
curent Linux sandbox path relies on in-process restrictions (including
Landlock). Bubblewrap gives us a more uniform filesystem isolation
model, especially explicit writable roots with the option to make some
directories read-only and granular network controls.

This is behind a feature flag so we can validate behavior safely before
making it the default.

- Added temporary rollout flag:
  - `features.use_linux_sandbox_bwrap`
- Preserved existing default path when the flag is off.
- In Bubblewrap mode:
- Added internal retry without /proc when /proc mount is not permitted
by the host/container.
This commit is contained in:
viyatb-oai
2026-02-04 11:13:17 -08:00
committed by GitHub
parent 95269ce88b
commit ae4de43ccc
31 changed files with 607 additions and 517 deletions

View File

@@ -1,3 +1,7 @@
//! In-process Linux sandbox primitives: `no_new_privs` and seccomp.
//!
//! Filesystem restrictions are enforced by bubblewrap in `linux_run_main`.
//! Landlock helpers remain available here as legacy/backup utilities.
use std::collections::BTreeMap;
use std::path::Path;
@@ -8,6 +12,7 @@ use codex_core::protocol::SandboxPolicy;
use codex_utils_absolute_path::AbsolutePathBuf;
use landlock::ABI;
#[allow(unused_imports)]
use landlock::Access;
use landlock::AccessFs;
use landlock::CompatLevel;
@@ -27,11 +32,24 @@ use seccompiler::apply_filter;
/// Apply sandbox policies inside this thread so only the child inherits
/// them, not the entire CLI process.
///
/// This function is responsible for:
/// - enabling `PR_SET_NO_NEW_PRIVS` when restrictions apply, and
/// - installing the network seccomp filter when network access is disabled.
///
/// Filesystem restrictions are intentionally handled by bubblewrap.
pub(crate) fn apply_sandbox_policy_to_current_thread(
sandbox_policy: &SandboxPolicy,
cwd: &Path,
apply_landlock_fs: bool,
) -> Result<()> {
if !sandbox_policy.has_full_disk_write_access() || !sandbox_policy.has_full_network_access() {
// `PR_SET_NO_NEW_PRIVS` is required for seccomp, but it also prevents
// setuid privilege elevation. Many `bwrap` deployments rely on setuid, so
// we avoid this unless we need seccomp or we are explicitly using the
// legacy Landlock filesystem pipeline.
if !sandbox_policy.has_full_network_access()
|| (apply_landlock_fs && !sandbox_policy.has_full_disk_write_access())
{
set_no_new_privs()?;
}
@@ -39,7 +57,7 @@ pub(crate) fn apply_sandbox_policy_to_current_thread(
install_network_seccomp_filter_on_current_thread()?;
}
if !sandbox_policy.has_full_disk_write_access() {
if apply_landlock_fs && !sandbox_policy.has_full_disk_write_access() {
let writable_roots = sandbox_policy
.get_writable_roots_with_cwd(cwd)
.into_iter()
@@ -54,6 +72,7 @@ pub(crate) fn apply_sandbox_policy_to_current_thread(
Ok(())
}
/// Enable `PR_SET_NO_NEW_PRIVS` so seccomp can be applied safely.
fn set_no_new_privs() -> Result<()> {
let result = unsafe { libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) };
if result != 0 {
@@ -68,6 +87,9 @@ fn set_no_new_privs() -> Result<()> {
///
/// # Errors
/// Returns [`CodexErr::Sandbox`] variants when the ruleset fails to apply.
///
/// Note: this is currently unused because filesystem sandboxing is performed
/// via bubblewrap. It is kept for reference and potential fallback use.
fn install_filesystem_landlock_rules_on_current_thread(
writable_roots: Vec<AbsolutePathBuf>,
) -> Result<()> {
@@ -98,6 +120,9 @@ fn install_filesystem_landlock_rules_on_current_thread(
/// Installs a seccomp filter that blocks outbound network access except for
/// AF_UNIX domain sockets.
///
/// The filter is applied to the current thread so only the sandboxed child
/// inherits it.
fn install_network_seccomp_filter_on_current_thread() -> std::result::Result<(), SandboxErr> {
// Build rule map.
let mut rules: BTreeMap<i64, Vec<SeccompRule>> = BTreeMap::new();