feat(network-proxy): add embedded OTEL policy audit logging (#12046)

**PR Summary**

This PR adds embedded-only OTEL policy audit logging for
`codex-network-proxy` and threads audit metadata from `codex-core` into
managed proxy startup.

### What changed
- Added structured audit event emission in `network_policy.rs` with
target `codex_otel.network_proxy`.
- Emitted:
- `codex.network_proxy.domain_policy_decision` once per domain-policy
evaluation.
  - `codex.network_proxy.block_decision` for non-domain denies.
- Added required policy/network fields, RFC3339 UTC millisecond
`event.timestamp`, and fallback defaults (`http.request.method="none"`,
`client.address="unknown"`).
- Added non-domain deny audit emission in HTTP/SOCKS handlers for
mode-guard and proxy-state denies, including unix-socket deny paths.
- Added `REASON_UNIX_SOCKET_UNSUPPORTED` and used it for unsupported
unix-socket auditing.
- Added `NetworkProxyAuditMetadata` to runtime/state, re-exported from
`lib.rs` and `state.rs`.
- Added `start_proxy_with_audit_metadata(...)` in core config, with
`start_proxy()` delegating to default metadata.
- Wired metadata construction in `codex.rs` from session/auth context,
including originator sanitization for OTEL-safe tagging.
- Updated `network-proxy/README.md` with embedded-mode audit schema and
behavior notes.
- Refactored HTTP block-audit emission to a small local helper to reduce
duplication.
- Preserved existing unix-socket proxy-disabled host/path behavior for
responses and blocked history while using an audit-only endpoint
override (`server.address="unix-socket"`, `server.port=0`).

### Explicit exclusions
- No standalone proxy OTEL startup work.
- No `main.rs` binary wiring.
- No `standalone_otel.rs`.
- No standalone docs/tests.

### Tests
- Extended `network_policy.rs` tests for event mapping, metadata
propagation, fallbacks, timestamp format, and target prefix.
- Extended HTTP tests to assert unix-socket deny block audit events.
- Extended SOCKS tests to cover deny emission from handler deny
branches.
- Added/updated core tests to verify audit metadata threading into
managed proxy state.

### Validation run
- `just fmt`
- `cargo test -p codex-network-proxy` 
- `cargo test -p codex-core` ran with one unrelated flaky timeout
(`shell_snapshot::tests::snapshot_shell_does_not_inherit_stdin`), and
the test passed when rerun directly 

---------

Co-authored-by: viyatb-oai <viyatb@openai.com>
This commit is contained in:
mcgrew-oai
2026-02-25 11:46:37 -05:00
committed by GitHub
parent 8362b79cb4
commit 9a393c9b6f
16 changed files with 1592 additions and 657 deletions

View File

@@ -38,6 +38,19 @@ const MAX_BLOCKED_EVENTS: usize = 200;
const DNS_LOOKUP_TIMEOUT: Duration = Duration::from_secs(2);
const NETWORK_POLICY_VIOLATION_PREFIX: &str = "CODEX_NETWORK_POLICY_VIOLATION";
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub struct NetworkProxyAuditMetadata {
pub conversation_id: Option<String>,
pub app_version: Option<String>,
pub user_account_id: Option<String>,
pub auth_mode: Option<String>,
pub originator: Option<String>,
pub user_email: Option<String>,
pub terminal_type: Option<String>,
pub model: Option<String>,
pub slug: Option<String>,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum HostBlockReason {
Denied,
@@ -187,6 +200,7 @@ pub struct NetworkProxyState {
state: Arc<RwLock<ConfigState>>,
reloader: Arc<dyn ConfigReloader>,
blocked_request_observer: Arc<RwLock<Option<Arc<dyn BlockedRequestObserver>>>>,
audit_metadata: NetworkProxyAuditMetadata,
}
impl std::fmt::Debug for NetworkProxyState {
@@ -203,24 +217,57 @@ impl Clone for NetworkProxyState {
state: self.state.clone(),
reloader: self.reloader.clone(),
blocked_request_observer: self.blocked_request_observer.clone(),
audit_metadata: self.audit_metadata.clone(),
}
}
}
impl NetworkProxyState {
pub fn with_reloader(state: ConfigState, reloader: Arc<dyn ConfigReloader>) -> Self {
Self::with_reloader_and_blocked_observer(state, reloader, None)
Self::with_reloader_and_audit_metadata(
state,
reloader,
NetworkProxyAuditMetadata::default(),
)
}
pub fn with_reloader_and_blocked_observer(
state: ConfigState,
reloader: Arc<dyn ConfigReloader>,
blocked_request_observer: Option<Arc<dyn BlockedRequestObserver>>,
) -> Self {
Self::with_reloader_and_audit_metadata_and_blocked_observer(
state,
reloader,
NetworkProxyAuditMetadata::default(),
blocked_request_observer,
)
}
pub fn with_reloader_and_audit_metadata(
state: ConfigState,
reloader: Arc<dyn ConfigReloader>,
audit_metadata: NetworkProxyAuditMetadata,
) -> Self {
Self::with_reloader_and_audit_metadata_and_blocked_observer(
state,
reloader,
audit_metadata,
None,
)
}
pub fn with_reloader_and_audit_metadata_and_blocked_observer(
state: ConfigState,
reloader: Arc<dyn ConfigReloader>,
audit_metadata: NetworkProxyAuditMetadata,
blocked_request_observer: Option<Arc<dyn BlockedRequestObserver>>,
) -> Self {
Self {
state: Arc::new(RwLock::new(state)),
reloader,
blocked_request_observer: Arc::new(RwLock::new(blocked_request_observer)),
audit_metadata,
}
}
@@ -232,6 +279,10 @@ impl NetworkProxyState {
*observer = blocked_request_observer;
}
pub fn audit_metadata(&self) -> &NetworkProxyAuditMetadata {
&self.audit_metadata
}
pub async fn current_cfg(&self) -> Result<NetworkProxyConfig> {
// Callers treat `NetworkProxyState` as a live view of policy. We reload-on-demand so edits to
// `config.toml` (including Codex-managed writes) take effect without a restart.