feat: add AWS SigV4 auth for OpenAI-compatible model providers (#17820)

## Summary

Add first-class Amazon Bedrock Mantle provider support so Codex can keep
using its existing Responses API transport with OpenAI-compatible
AWS-hosted endpoints such as AOA/Mantle.

This is needed for the AWS launch path, where provider traffic should
authenticate with AWS credentials instead of OpenAI bearer credentials.
Requests are authenticated immediately before transport send, so SigV4
signs the final method, URL, headers, and body bytes that `reqwest` will
send.

## What Changed

- Added a new `codex-aws-auth` crate for loading AWS SDK config,
resolving credentials, and signing finalized HTTP requests with AWS
SigV4.
- Added a built-in `amazon-bedrock` provider that targets Bedrock Mantle
Responses endpoints, defaults to `us-east-1`, supports region/profile
overrides, disables WebSockets, and does not require OpenAI auth.
- Added Amazon Bedrock auth resolution in `codex-model-provider`: prefer
`AWS_BEARER_TOKEN_BEDROCK` when set, otherwise use AWS SDK credentials
and SigV4 signing.
- Added `AuthProvider::apply_auth` and `Request::prepare_body_for_send`
so request-signing providers can sign the exact outbound request after
JSON serialization/compression.
- Determine the region by taking the `aws.region` config first (required
for bearer token codepath), and fallback to SDK default region.

## Testing
Amazon Bedrock Mantle Responses paths:

- Built the local Codex binary with `cargo build`.
- Verified the custom proxy-backed `aws` provider using `env_key =
"AWS_BEARER_TOKEN_BEDROCK"` streamed raw `responses` output with
`response.output_text.delta`, `response.completed`, and `mantle-env-ok`.
- Verified a full `codex exec --profile aws` turn returned
`mantle-env-ok`.
- Confirmed the custom provider used the bearer env var, not AWS profile
auth: bogus `AWS_PROFILE` still passed, empty env var failed locally,
and malformed env var reached Mantle and failed with `401
invalid_api_key`.
- Verified built-in `amazon-bedrock` with `AWS_BEARER_TOKEN_BEDROCK` set
passed despite bogus AWS profiles, returning `amazon-bedrock-env-ok`.
- Verified built-in `amazon-bedrock` SDK/SigV4 auth passed with
`AWS_BEARER_TOKEN_BEDROCK` unset and temporary AWS session env
credentials, returning `amazon-bedrock-sdk-env-ok`.
This commit is contained in:
Celia Chen
2026-04-21 18:11:17 -07:00
committed by GitHub
parent e18fe7a07f
commit 1cd3ad1f49
25 changed files with 1676 additions and 94 deletions

View File

@@ -5,6 +5,8 @@ use std::time::Duration;
use anyhow::Result;
use async_trait::async_trait;
use bytes::Bytes;
use codex_api::ApiError;
use codex_api::AuthError;
use codex_api::AuthProvider;
use codex_api::Compression;
use codex_api::Provider;
@@ -164,6 +166,59 @@ impl FlakyTransport {
}
}
#[derive(Clone)]
struct FailsOnceAuth {
attempts: Arc<Mutex<i64>>,
error: Arc<AuthError>,
}
impl FailsOnceAuth {
fn transient() -> Self {
Self {
attempts: Arc::new(Mutex::new(0)),
error: Arc::new(AuthError::Transient(
"sts temporarily unavailable".to_string(),
)),
}
}
fn build() -> Self {
Self {
attempts: Arc::new(Mutex::new(0)),
error: Arc::new(AuthError::Build("invalid auth configuration".to_string())),
}
}
fn attempts(&self) -> i64 {
*self
.attempts
.lock()
.unwrap_or_else(|err| panic!("mutex poisoned: {err}"))
}
}
#[async_trait]
impl AuthProvider for FailsOnceAuth {
fn add_auth_headers(&self, _headers: &mut HeaderMap) {}
async fn apply_auth(&self, request: Request) -> Result<Request, AuthError> {
let mut attempts = self
.attempts
.lock()
.unwrap_or_else(|err| panic!("mutex poisoned: {err}"));
*attempts += 1;
if *attempts == 1 {
return match self.error.as_ref() {
AuthError::Build(message) => Err(AuthError::Build(message.clone())),
AuthError::Transient(message) => Err(AuthError::Transient(message.clone())),
};
}
Ok(request)
}
}
#[async_trait]
impl HttpTransport for FlakyTransport {
async fn execute(&self, _req: Request) -> Result<Response, TransportError> {
@@ -296,6 +351,65 @@ async fn streaming_client_retries_on_transport_error() -> Result<()> {
Ok(())
}
#[tokio::test]
async fn streaming_client_retries_on_transient_auth_error() -> Result<()> {
let state = RecordingState::default();
let transport = RecordingTransport::new(state.clone());
let auth = FailsOnceAuth::transient();
let mut provider = provider("openai");
provider.retry.max_attempts = 2;
let client = ResponsesClient::new(transport, provider, Arc::new(auth.clone()));
let body = serde_json::json!({ "model": "gpt-test" });
let _stream = client
.stream(
body,
HeaderMap::new(),
Compression::None,
/*turn_state*/ None,
)
.await?;
assert_eq!(auth.attempts(), 2);
assert_eq!(state.take_stream_requests().len(), 1);
Ok(())
}
#[tokio::test]
async fn streaming_client_does_not_retry_auth_build_error() -> Result<()> {
let state = RecordingState::default();
let transport = RecordingTransport::new(state.clone());
let auth = FailsOnceAuth::build();
let mut provider = provider("openai");
provider.retry.max_attempts = 2;
let client = ResponsesClient::new(transport, provider, Arc::new(auth.clone()));
let body = serde_json::json!({ "model": "gpt-test" });
let result = client
.stream(
body,
HeaderMap::new(),
Compression::None,
/*turn_state*/ None,
)
.await;
let err = match result {
Ok(_) => panic!("auth build errors should fail without retry"),
Err(err) => err,
};
assert!(matches!(
err,
ApiError::Transport(TransportError::Build(message))
if message == "invalid auth configuration"
));
assert_eq!(auth.attempts(), 1);
assert_eq!(state.take_stream_requests().len(), 0);
Ok(())
}
#[tokio::test]
async fn azure_default_store_attaches_ids_and_headers() -> Result<()> {
let state = RecordingState::default();