mirror of
https://github.com/openai/codex.git
synced 2026-03-03 13:13:18 +00:00
Compare commits
1 Commits
fix/notify
...
codex/real
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7063b84ff3 |
@@ -277,6 +277,32 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
"RealtimeAudioToml": {
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"microphone": {
|
||||
"type": "string"
|
||||
},
|
||||
"speaker": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"RealtimeToml": {
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"audio": {
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/RealtimeAudioToml"
|
||||
}
|
||||
],
|
||||
"default": null
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"ConfigProfile": {
|
||||
"additionalProperties": false,
|
||||
"description": "Collection of common configuration options that a user can define as a unit in `config.toml`.",
|
||||
@@ -1541,6 +1567,15 @@
|
||||
"minimum": 0.0,
|
||||
"type": "integer"
|
||||
},
|
||||
"realtime": {
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/RealtimeToml"
|
||||
}
|
||||
],
|
||||
"default": null,
|
||||
"description": "Machine-local realtime audio device preferences used by realtime voice."
|
||||
},
|
||||
"chatgpt_base_url": {
|
||||
"description": "Base URL for requests to ChatGPT (as opposed to the OpenAI API).",
|
||||
"type": "string"
|
||||
|
||||
@@ -839,6 +839,38 @@ impl ConfigEditsBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
pub fn set_realtime_microphone(mut self, microphone: Option<&str>) -> Self {
|
||||
let segments = vec![
|
||||
"realtime".to_string(),
|
||||
"audio".to_string(),
|
||||
"microphone".to_string(),
|
||||
];
|
||||
match microphone {
|
||||
Some(microphone) => self.edits.push(ConfigEdit::SetPath {
|
||||
segments,
|
||||
value: value(microphone),
|
||||
}),
|
||||
None => self.edits.push(ConfigEdit::ClearPath { segments }),
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
pub fn set_realtime_speaker(mut self, speaker: Option<&str>) -> Self {
|
||||
let segments = vec![
|
||||
"realtime".to_string(),
|
||||
"audio".to_string(),
|
||||
"speaker".to_string(),
|
||||
];
|
||||
match speaker {
|
||||
Some(speaker) => self.edits.push(ConfigEdit::SetPath {
|
||||
segments,
|
||||
value: value(speaker),
|
||||
}),
|
||||
None => self.edits.push(ConfigEdit::ClearPath { segments }),
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
pub fn clear_legacy_windows_sandbox_keys(mut self) -> Self {
|
||||
for key in [
|
||||
"experimental_windows_sandbox",
|
||||
@@ -1804,6 +1836,54 @@ model_reasoning_effort = "high"
|
||||
assert_eq!(notice, Some(true));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn blocking_builder_set_realtime_audio_persists_and_clears() {
|
||||
let tmp = tempdir().expect("tmpdir");
|
||||
let codex_home = tmp.path();
|
||||
|
||||
ConfigEditsBuilder::new(codex_home)
|
||||
.set_realtime_microphone(Some("USB Mic"))
|
||||
.set_realtime_speaker(Some("Desk Speakers"))
|
||||
.apply_blocking()
|
||||
.expect("persist realtime audio");
|
||||
|
||||
let raw = std::fs::read_to_string(codex_home.join(CONFIG_TOML_FILE)).expect("read config");
|
||||
let config: TomlValue = toml::from_str(&raw).expect("parse config");
|
||||
let realtime_audio = config
|
||||
.get("realtime")
|
||||
.and_then(TomlValue::as_table)
|
||||
.and_then(|realtime| realtime.get("audio"))
|
||||
.and_then(TomlValue::as_table)
|
||||
.expect("realtime.audio table should exist");
|
||||
assert_eq!(
|
||||
realtime_audio.get("microphone").and_then(TomlValue::as_str),
|
||||
Some("USB Mic")
|
||||
);
|
||||
assert_eq!(
|
||||
realtime_audio.get("speaker").and_then(TomlValue::as_str),
|
||||
Some("Desk Speakers")
|
||||
);
|
||||
|
||||
ConfigEditsBuilder::new(codex_home)
|
||||
.set_realtime_microphone(None)
|
||||
.apply_blocking()
|
||||
.expect("clear realtime microphone");
|
||||
|
||||
let raw = std::fs::read_to_string(codex_home.join(CONFIG_TOML_FILE)).expect("read config");
|
||||
let config: TomlValue = toml::from_str(&raw).expect("parse config");
|
||||
let realtime_audio = config
|
||||
.get("realtime")
|
||||
.and_then(TomlValue::as_table)
|
||||
.and_then(|realtime| realtime.get("audio"))
|
||||
.and_then(TomlValue::as_table)
|
||||
.expect("realtime.audio table should exist");
|
||||
assert_eq!(realtime_audio.get("microphone"), None);
|
||||
assert_eq!(
|
||||
realtime_audio.get("speaker").and_then(TomlValue::as_str),
|
||||
Some("Desk Speakers")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replace_mcp_servers_blocking_clears_table_when_empty() {
|
||||
let tmp = tempdir().expect("tmpdir");
|
||||
|
||||
@@ -429,6 +429,9 @@ pub struct Config {
|
||||
/// Base URL for requests to ChatGPT (as opposed to the OpenAI API).
|
||||
pub chatgpt_base_url: String,
|
||||
|
||||
/// Machine-local realtime audio device preferences used by realtime voice.
|
||||
pub realtime_audio: RealtimeAudioConfig,
|
||||
|
||||
/// Experimental / do not use. Overrides only the realtime conversation
|
||||
/// websocket transport base URL (the `Op::RealtimeConversation` `/ws`
|
||||
/// connection) without changing normal provider HTTP requests.
|
||||
@@ -1178,6 +1181,10 @@ pub struct ConfigToml {
|
||||
/// Base URL for requests to ChatGPT (as opposed to the OpenAI API).
|
||||
pub chatgpt_base_url: Option<String>,
|
||||
|
||||
/// Machine-local realtime audio device preferences used by realtime voice.
|
||||
#[serde(default)]
|
||||
pub realtime: Option<RealtimeToml>,
|
||||
|
||||
/// Experimental / do not use. Overrides only the realtime conversation
|
||||
/// websocket transport base URL (the `Op::RealtimeConversation` `/ws`
|
||||
/// connection) without changing normal provider HTTP requests.
|
||||
@@ -1309,6 +1316,26 @@ impl ProjectConfig {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, PartialEq, Eq)]
|
||||
pub struct RealtimeAudioConfig {
|
||||
pub microphone: Option<String>,
|
||||
pub speaker: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq, Eq, JsonSchema)]
|
||||
#[schemars(deny_unknown_fields)]
|
||||
pub struct RealtimeToml {
|
||||
#[serde(default)]
|
||||
pub audio: Option<RealtimeAudioToml>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq, Eq, JsonSchema)]
|
||||
#[schemars(deny_unknown_fields)]
|
||||
pub struct RealtimeAudioToml {
|
||||
pub microphone: Option<String>,
|
||||
pub speaker: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq, JsonSchema)]
|
||||
#[schemars(deny_unknown_fields)]
|
||||
pub struct ToolsToml {
|
||||
@@ -2150,6 +2177,13 @@ impl Config {
|
||||
.chatgpt_base_url
|
||||
.or(cfg.chatgpt_base_url)
|
||||
.unwrap_or("https://chatgpt.com/backend-api/".to_string()),
|
||||
realtime_audio: cfg.realtime.and_then(|realtime| realtime.audio).map_or_else(
|
||||
RealtimeAudioConfig::default,
|
||||
|audio| RealtimeAudioConfig {
|
||||
microphone: audio.microphone,
|
||||
speaker: audio.speaker,
|
||||
},
|
||||
),
|
||||
experimental_realtime_ws_base_url: cfg.experimental_realtime_ws_base_url,
|
||||
experimental_realtime_ws_backend_prompt: cfg.experimental_realtime_ws_backend_prompt,
|
||||
forced_chatgpt_workspace_id,
|
||||
@@ -4767,6 +4801,7 @@ model_verbosity = "high"
|
||||
model_verbosity: None,
|
||||
personality: Some(Personality::Pragmatic),
|
||||
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
|
||||
realtime_audio: RealtimeAudioConfig::default(),
|
||||
experimental_realtime_ws_base_url: None,
|
||||
experimental_realtime_ws_backend_prompt: None,
|
||||
base_instructions: None,
|
||||
@@ -4893,6 +4928,7 @@ model_verbosity = "high"
|
||||
model_verbosity: None,
|
||||
personality: Some(Personality::Pragmatic),
|
||||
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
|
||||
realtime_audio: RealtimeAudioConfig::default(),
|
||||
experimental_realtime_ws_base_url: None,
|
||||
experimental_realtime_ws_backend_prompt: None,
|
||||
base_instructions: None,
|
||||
@@ -5017,6 +5053,7 @@ model_verbosity = "high"
|
||||
model_verbosity: None,
|
||||
personality: Some(Personality::Pragmatic),
|
||||
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
|
||||
realtime_audio: RealtimeAudioConfig::default(),
|
||||
experimental_realtime_ws_base_url: None,
|
||||
experimental_realtime_ws_backend_prompt: None,
|
||||
base_instructions: None,
|
||||
@@ -5127,6 +5164,7 @@ model_verbosity = "high"
|
||||
model_verbosity: Some(Verbosity::High),
|
||||
personality: Some(Personality::Pragmatic),
|
||||
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
|
||||
realtime_audio: RealtimeAudioConfig::default(),
|
||||
experimental_realtime_ws_base_url: None,
|
||||
experimental_realtime_ws_backend_prompt: None,
|
||||
base_instructions: None,
|
||||
@@ -5971,6 +6009,37 @@ experimental_realtime_ws_backend_prompt = "prompt from config"
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn realtime_audio_loads_from_config_toml() -> std::io::Result<()> {
|
||||
let cfg: ConfigToml = toml::from_str(
|
||||
r#"
|
||||
[realtime.audio]
|
||||
microphone = "USB Mic"
|
||||
speaker = "Desk Speakers"
|
||||
"#,
|
||||
)
|
||||
.expect("TOML deserialization should succeed");
|
||||
|
||||
let realtime_audio = cfg
|
||||
.realtime
|
||||
.as_ref()
|
||||
.and_then(|realtime| realtime.audio.as_ref())
|
||||
.expect("realtime audio config should be present");
|
||||
assert_eq!(realtime_audio.microphone.as_deref(), Some("USB Mic"));
|
||||
assert_eq!(realtime_audio.speaker.as_deref(), Some("Desk Speakers"));
|
||||
|
||||
let codex_home = TempDir::new()?;
|
||||
let config = Config::load_from_base_config_with_overrides(
|
||||
cfg,
|
||||
ConfigOverrides::default(),
|
||||
codex_home.path().to_path_buf(),
|
||||
)?;
|
||||
|
||||
assert_eq!(config.realtime_audio.microphone.as_deref(), Some("USB Mic"));
|
||||
assert_eq!(config.realtime_audio.speaker.as_deref(), Some("Desk Speakers"));
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
122
codex-rs/tui/src/audio_device.rs
Normal file
122
codex-rs/tui/src/audio_device.rs
Normal file
@@ -0,0 +1,122 @@
|
||||
use codex_core::config::Config;
|
||||
use cpal::traits::DeviceTrait;
|
||||
use cpal::traits::HostTrait;
|
||||
use tracing::warn;
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
enum AudioDeviceKind {
|
||||
Input,
|
||||
Output,
|
||||
}
|
||||
|
||||
impl AudioDeviceKind {
|
||||
fn noun(self) -> &'static str {
|
||||
match self {
|
||||
Self::Input => "input",
|
||||
Self::Output => "output",
|
||||
}
|
||||
}
|
||||
|
||||
fn configured_name(self, config: &Config) -> Option<&str> {
|
||||
match self {
|
||||
Self::Input => config.realtime_audio.microphone.as_deref(),
|
||||
Self::Output => config.realtime_audio.speaker.as_deref(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn select_configured_input_device_and_config(
|
||||
config: &Config,
|
||||
) -> Result<(cpal::Device, cpal::SupportedStreamConfig), String> {
|
||||
select_device_and_config(AudioDeviceKind::Input, config)
|
||||
}
|
||||
|
||||
pub(crate) fn select_configured_output_device_and_config(
|
||||
config: &Config,
|
||||
) -> Result<(cpal::Device, cpal::SupportedStreamConfig), String> {
|
||||
select_device_and_config(AudioDeviceKind::Output, config)
|
||||
}
|
||||
|
||||
fn select_device_and_config(
|
||||
kind: AudioDeviceKind,
|
||||
config: &Config,
|
||||
) -> Result<(cpal::Device, cpal::SupportedStreamConfig), String> {
|
||||
let host = cpal::default_host();
|
||||
let configured_name = kind.configured_name(config);
|
||||
let selected = configured_name
|
||||
.and_then(|name| find_device_by_name(&host, kind, name))
|
||||
.or_else(|| {
|
||||
let default_device = default_device(&host, kind);
|
||||
if let Some(name) = configured_name
|
||||
&& default_device.is_some()
|
||||
{
|
||||
warn!(
|
||||
"configured {} audio device `{name}` was unavailable; falling back to system default",
|
||||
kind.noun()
|
||||
);
|
||||
}
|
||||
default_device
|
||||
})
|
||||
.ok_or_else(|| missing_device_error(kind, configured_name))?;
|
||||
|
||||
let stream_config = default_config(&selected, kind)?;
|
||||
Ok((selected, stream_config))
|
||||
}
|
||||
|
||||
fn find_device_by_name(
|
||||
host: &cpal::Host,
|
||||
kind: AudioDeviceKind,
|
||||
name: &str,
|
||||
) -> Option<cpal::Device> {
|
||||
let devices = devices(host, kind).ok()?;
|
||||
devices
|
||||
.into_iter()
|
||||
.find(|device| device.name().ok().as_deref() == Some(name))
|
||||
}
|
||||
|
||||
fn devices(host: &cpal::Host, kind: AudioDeviceKind) -> Result<Vec<cpal::Device>, String> {
|
||||
match kind {
|
||||
AudioDeviceKind::Input => host
|
||||
.input_devices()
|
||||
.map(|devices| devices.collect())
|
||||
.map_err(|err| format!("failed to enumerate input audio devices: {err}")),
|
||||
AudioDeviceKind::Output => host
|
||||
.output_devices()
|
||||
.map(|devices| devices.collect())
|
||||
.map_err(|err| format!("failed to enumerate output audio devices: {err}")),
|
||||
}
|
||||
}
|
||||
|
||||
fn default_device(host: &cpal::Host, kind: AudioDeviceKind) -> Option<cpal::Device> {
|
||||
match kind {
|
||||
AudioDeviceKind::Input => host.default_input_device(),
|
||||
AudioDeviceKind::Output => host.default_output_device(),
|
||||
}
|
||||
}
|
||||
|
||||
fn default_config(
|
||||
device: &cpal::Device,
|
||||
kind: AudioDeviceKind,
|
||||
) -> Result<cpal::SupportedStreamConfig, String> {
|
||||
match kind {
|
||||
AudioDeviceKind::Input => device
|
||||
.default_input_config()
|
||||
.map_err(|err| format!("failed to get default input config: {err}")),
|
||||
AudioDeviceKind::Output => device
|
||||
.default_output_config()
|
||||
.map_err(|err| format!("failed to get default output config: {err}")),
|
||||
}
|
||||
}
|
||||
|
||||
fn missing_device_error(kind: AudioDeviceKind, configured_name: Option<&str>) -> String {
|
||||
match (kind, configured_name) {
|
||||
(AudioDeviceKind::Input, Some(name)) => format!(
|
||||
"configured input audio device `{name}` was unavailable and no default input audio device was found"
|
||||
),
|
||||
(AudioDeviceKind::Output, Some(name)) => format!(
|
||||
"configured output audio device `{name}` was unavailable and no default output audio device was found"
|
||||
),
|
||||
(AudioDeviceKind::Input, None) => "no input audio device available".to_string(),
|
||||
(AudioDeviceKind::Output, None) => "no output audio device available".to_string(),
|
||||
}
|
||||
}
|
||||
@@ -207,7 +207,7 @@ impl ChatWidget {
|
||||
{
|
||||
if self.realtime_conversation.audio_player.is_none() {
|
||||
self.realtime_conversation.audio_player =
|
||||
crate::voice::RealtimeAudioPlayer::start().ok();
|
||||
crate::voice::RealtimeAudioPlayer::start(&self.config).ok();
|
||||
}
|
||||
if let Some(player) = &self.realtime_conversation.audio_player
|
||||
&& let Err(err) = player.enqueue_frame(frame)
|
||||
@@ -231,7 +231,10 @@ impl ChatWidget {
|
||||
self.realtime_conversation.meter_placeholder_id = Some(placeholder_id.clone());
|
||||
self.request_redraw();
|
||||
|
||||
let capture = match crate::voice::VoiceCapture::start_realtime(self.app_event_tx.clone()) {
|
||||
let capture = match crate::voice::VoiceCapture::start_realtime(
|
||||
&self.config,
|
||||
self.app_event_tx.clone(),
|
||||
) {
|
||||
Ok(capture) => capture,
|
||||
Err(err) => {
|
||||
self.remove_transcription_placeholder(&placeholder_id);
|
||||
@@ -250,7 +253,7 @@ impl ChatWidget {
|
||||
self.realtime_conversation.capture = Some(capture);
|
||||
if self.realtime_conversation.audio_player.is_none() {
|
||||
self.realtime_conversation.audio_player =
|
||||
crate::voice::RealtimeAudioPlayer::start().ok();
|
||||
crate::voice::RealtimeAudioPlayer::start(&self.config).ok();
|
||||
}
|
||||
|
||||
std::thread::spawn(move || {
|
||||
|
||||
@@ -61,6 +61,8 @@ mod app_backtrack;
|
||||
mod app_event;
|
||||
mod app_event_sender;
|
||||
mod ascii_animation;
|
||||
#[cfg(all(not(target_os = "linux"), feature = "voice-input"))]
|
||||
mod audio_device;
|
||||
mod bottom_pane;
|
||||
mod chatwidget;
|
||||
mod cli;
|
||||
@@ -121,6 +123,7 @@ mod voice;
|
||||
mod voice {
|
||||
use crate::app_event::AppEvent;
|
||||
use crate::app_event_sender::AppEventSender;
|
||||
use codex_core::config::Config;
|
||||
use codex_protocol::protocol::RealtimeAudioFrame;
|
||||
use std::sync::Arc;
|
||||
use std::sync::Mutex;
|
||||
@@ -144,7 +147,7 @@ mod voice {
|
||||
Err("voice input is unavailable in this build".to_string())
|
||||
}
|
||||
|
||||
pub fn start_realtime(_tx: AppEventSender) -> Result<Self, String> {
|
||||
pub fn start_realtime(_config: &Config, _tx: AppEventSender) -> Result<Self, String> {
|
||||
Err("voice input is unavailable in this build".to_string())
|
||||
}
|
||||
|
||||
@@ -184,7 +187,7 @@ mod voice {
|
||||
}
|
||||
|
||||
impl RealtimeAudioPlayer {
|
||||
pub(crate) fn start() -> Result<Self, String> {
|
||||
pub(crate) fn start(_config: &Config) -> Result<Self, String> {
|
||||
Err("voice output is unavailable in this build".to_string())
|
||||
}
|
||||
|
||||
|
||||
@@ -51,7 +51,7 @@ pub struct VoiceCapture {
|
||||
|
||||
impl VoiceCapture {
|
||||
pub fn start() -> Result<Self, String> {
|
||||
let (device, config) = select_input_device_and_config()?;
|
||||
let (device, config) = select_default_input_device_and_config()?;
|
||||
|
||||
let sample_rate = config.sample_rate().0;
|
||||
let channels = config.channels();
|
||||
@@ -74,8 +74,8 @@ impl VoiceCapture {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn start_realtime(tx: AppEventSender) -> Result<Self, String> {
|
||||
let (device, config) = select_input_device_and_config()?;
|
||||
pub fn start_realtime(config: &Config, tx: AppEventSender) -> Result<Self, String> {
|
||||
let (device, config) = select_realtime_input_device_and_config(config)?;
|
||||
|
||||
let sample_rate = config.sample_rate().0;
|
||||
let channels = config.channels();
|
||||
@@ -262,7 +262,8 @@ pub fn transcribe_async(
|
||||
// Voice input helpers
|
||||
// -------------------------
|
||||
|
||||
fn select_input_device_and_config() -> Result<(cpal::Device, cpal::SupportedStreamConfig), String> {
|
||||
fn select_default_input_device_and_config()
|
||||
-> Result<(cpal::Device, cpal::SupportedStreamConfig), String> {
|
||||
let host = cpal::default_host();
|
||||
let device = host
|
||||
.default_input_device()
|
||||
@@ -273,6 +274,12 @@ fn select_input_device_and_config() -> Result<(cpal::Device, cpal::SupportedStre
|
||||
Ok((device, config))
|
||||
}
|
||||
|
||||
fn select_realtime_input_device_and_config(
|
||||
config: &Config,
|
||||
) -> Result<(cpal::Device, cpal::SupportedStreamConfig), String> {
|
||||
crate::audio_device::select_configured_input_device_and_config(config)
|
||||
}
|
||||
|
||||
fn build_input_stream(
|
||||
device: &cpal::Device,
|
||||
config: &cpal::SupportedStreamConfig,
|
||||
@@ -466,14 +473,9 @@ pub(crate) struct RealtimeAudioPlayer {
|
||||
}
|
||||
|
||||
impl RealtimeAudioPlayer {
|
||||
pub(crate) fn start() -> Result<Self, String> {
|
||||
let host = cpal::default_host();
|
||||
let device = host
|
||||
.default_output_device()
|
||||
.ok_or_else(|| "no output audio device available".to_string())?;
|
||||
let config = device
|
||||
.default_output_config()
|
||||
.map_err(|e| format!("failed to get default output config: {e}"))?;
|
||||
pub(crate) fn start(config: &Config) -> Result<Self, String> {
|
||||
let (device, config) =
|
||||
crate::audio_device::select_configured_output_device_and_config(config)?;
|
||||
let output_sample_rate = config.sample_rate().0;
|
||||
let output_channels = config.channels();
|
||||
let queue = Arc::new(Mutex::new(VecDeque::new()));
|
||||
|
||||
Reference in New Issue
Block a user