Compare commits

...

2 Commits

Author SHA1 Message Date
Ahmed Ibrahim
bd37f95e43 Add realtime audio device picker
Add a TUI picker for realtime microphone and speaker selection.

Co-authored-by: Codex <noreply@openai.com>
2026-02-25 17:56:08 -08:00
Ahmed Ibrahim
7063b84ff3 Add realtime audio device config
Add microphone and speaker selection config for realtime audio.

Co-authored-by: Codex <noreply@openai.com>
2026-02-25 17:53:29 -08:00
18 changed files with 843 additions and 25 deletions

View File

@@ -277,6 +277,32 @@
}
]
},
"RealtimeAudioToml": {
"additionalProperties": false,
"properties": {
"microphone": {
"type": "string"
},
"speaker": {
"type": "string"
}
},
"type": "object"
},
"RealtimeToml": {
"additionalProperties": false,
"properties": {
"audio": {
"allOf": [
{
"$ref": "#/definitions/RealtimeAudioToml"
}
],
"default": null
}
},
"type": "object"
},
"ConfigProfile": {
"additionalProperties": false,
"description": "Collection of common configuration options that a user can define as a unit in `config.toml`.",
@@ -1541,6 +1567,15 @@
"minimum": 0.0,
"type": "integer"
},
"realtime": {
"allOf": [
{
"$ref": "#/definitions/RealtimeToml"
}
],
"default": null,
"description": "Machine-local realtime audio device preferences used by realtime voice."
},
"chatgpt_base_url": {
"description": "Base URL for requests to ChatGPT (as opposed to the OpenAI API).",
"type": "string"

View File

@@ -839,6 +839,38 @@ impl ConfigEditsBuilder {
self
}
pub fn set_realtime_microphone(mut self, microphone: Option<&str>) -> Self {
let segments = vec![
"realtime".to_string(),
"audio".to_string(),
"microphone".to_string(),
];
match microphone {
Some(microphone) => self.edits.push(ConfigEdit::SetPath {
segments,
value: value(microphone),
}),
None => self.edits.push(ConfigEdit::ClearPath { segments }),
}
self
}
pub fn set_realtime_speaker(mut self, speaker: Option<&str>) -> Self {
let segments = vec![
"realtime".to_string(),
"audio".to_string(),
"speaker".to_string(),
];
match speaker {
Some(speaker) => self.edits.push(ConfigEdit::SetPath {
segments,
value: value(speaker),
}),
None => self.edits.push(ConfigEdit::ClearPath { segments }),
}
self
}
pub fn clear_legacy_windows_sandbox_keys(mut self) -> Self {
for key in [
"experimental_windows_sandbox",
@@ -1804,6 +1836,54 @@ model_reasoning_effort = "high"
assert_eq!(notice, Some(true));
}
#[test]
fn blocking_builder_set_realtime_audio_persists_and_clears() {
let tmp = tempdir().expect("tmpdir");
let codex_home = tmp.path();
ConfigEditsBuilder::new(codex_home)
.set_realtime_microphone(Some("USB Mic"))
.set_realtime_speaker(Some("Desk Speakers"))
.apply_blocking()
.expect("persist realtime audio");
let raw = std::fs::read_to_string(codex_home.join(CONFIG_TOML_FILE)).expect("read config");
let config: TomlValue = toml::from_str(&raw).expect("parse config");
let realtime_audio = config
.get("realtime")
.and_then(TomlValue::as_table)
.and_then(|realtime| realtime.get("audio"))
.and_then(TomlValue::as_table)
.expect("realtime.audio table should exist");
assert_eq!(
realtime_audio.get("microphone").and_then(TomlValue::as_str),
Some("USB Mic")
);
assert_eq!(
realtime_audio.get("speaker").and_then(TomlValue::as_str),
Some("Desk Speakers")
);
ConfigEditsBuilder::new(codex_home)
.set_realtime_microphone(None)
.apply_blocking()
.expect("clear realtime microphone");
let raw = std::fs::read_to_string(codex_home.join(CONFIG_TOML_FILE)).expect("read config");
let config: TomlValue = toml::from_str(&raw).expect("parse config");
let realtime_audio = config
.get("realtime")
.and_then(TomlValue::as_table)
.and_then(|realtime| realtime.get("audio"))
.and_then(TomlValue::as_table)
.expect("realtime.audio table should exist");
assert_eq!(realtime_audio.get("microphone"), None);
assert_eq!(
realtime_audio.get("speaker").and_then(TomlValue::as_str),
Some("Desk Speakers")
);
}
#[test]
fn replace_mcp_servers_blocking_clears_table_when_empty() {
let tmp = tempdir().expect("tmpdir");

View File

@@ -429,6 +429,9 @@ pub struct Config {
/// Base URL for requests to ChatGPT (as opposed to the OpenAI API).
pub chatgpt_base_url: String,
/// Machine-local realtime audio device preferences used by realtime voice.
pub realtime_audio: RealtimeAudioConfig,
/// Experimental / do not use. Overrides only the realtime conversation
/// websocket transport base URL (the `Op::RealtimeConversation` `/ws`
/// connection) without changing normal provider HTTP requests.
@@ -1178,6 +1181,10 @@ pub struct ConfigToml {
/// Base URL for requests to ChatGPT (as opposed to the OpenAI API).
pub chatgpt_base_url: Option<String>,
/// Machine-local realtime audio device preferences used by realtime voice.
#[serde(default)]
pub realtime: Option<RealtimeToml>,
/// Experimental / do not use. Overrides only the realtime conversation
/// websocket transport base URL (the `Op::RealtimeConversation` `/ws`
/// connection) without changing normal provider HTTP requests.
@@ -1309,6 +1316,26 @@ impl ProjectConfig {
}
}
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct RealtimeAudioConfig {
pub microphone: Option<String>,
pub speaker: Option<String>,
}
#[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq, Eq, JsonSchema)]
#[schemars(deny_unknown_fields)]
pub struct RealtimeToml {
#[serde(default)]
pub audio: Option<RealtimeAudioToml>,
}
#[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq, Eq, JsonSchema)]
#[schemars(deny_unknown_fields)]
pub struct RealtimeAudioToml {
pub microphone: Option<String>,
pub speaker: Option<String>,
}
#[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq, JsonSchema)]
#[schemars(deny_unknown_fields)]
pub struct ToolsToml {
@@ -2150,6 +2177,13 @@ impl Config {
.chatgpt_base_url
.or(cfg.chatgpt_base_url)
.unwrap_or("https://chatgpt.com/backend-api/".to_string()),
realtime_audio: cfg.realtime.and_then(|realtime| realtime.audio).map_or_else(
RealtimeAudioConfig::default,
|audio| RealtimeAudioConfig {
microphone: audio.microphone,
speaker: audio.speaker,
},
),
experimental_realtime_ws_base_url: cfg.experimental_realtime_ws_base_url,
experimental_realtime_ws_backend_prompt: cfg.experimental_realtime_ws_backend_prompt,
forced_chatgpt_workspace_id,
@@ -4767,6 +4801,7 @@ model_verbosity = "high"
model_verbosity: None,
personality: Some(Personality::Pragmatic),
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
realtime_audio: RealtimeAudioConfig::default(),
experimental_realtime_ws_base_url: None,
experimental_realtime_ws_backend_prompt: None,
base_instructions: None,
@@ -4893,6 +4928,7 @@ model_verbosity = "high"
model_verbosity: None,
personality: Some(Personality::Pragmatic),
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
realtime_audio: RealtimeAudioConfig::default(),
experimental_realtime_ws_base_url: None,
experimental_realtime_ws_backend_prompt: None,
base_instructions: None,
@@ -5017,6 +5053,7 @@ model_verbosity = "high"
model_verbosity: None,
personality: Some(Personality::Pragmatic),
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
realtime_audio: RealtimeAudioConfig::default(),
experimental_realtime_ws_base_url: None,
experimental_realtime_ws_backend_prompt: None,
base_instructions: None,
@@ -5127,6 +5164,7 @@ model_verbosity = "high"
model_verbosity: Some(Verbosity::High),
personality: Some(Personality::Pragmatic),
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
realtime_audio: RealtimeAudioConfig::default(),
experimental_realtime_ws_base_url: None,
experimental_realtime_ws_backend_prompt: None,
base_instructions: None,
@@ -5971,6 +6009,37 @@ experimental_realtime_ws_backend_prompt = "prompt from config"
);
Ok(())
}
#[test]
fn realtime_audio_loads_from_config_toml() -> std::io::Result<()> {
let cfg: ConfigToml = toml::from_str(
r#"
[realtime.audio]
microphone = "USB Mic"
speaker = "Desk Speakers"
"#,
)
.expect("TOML deserialization should succeed");
let realtime_audio = cfg
.realtime
.as_ref()
.and_then(|realtime| realtime.audio.as_ref())
.expect("realtime audio config should be present");
assert_eq!(realtime_audio.microphone.as_deref(), Some("USB Mic"));
assert_eq!(realtime_audio.speaker.as_deref(), Some("Desk Speakers"));
let codex_home = TempDir::new()?;
let config = Config::load_from_base_config_with_overrides(
cfg,
ConfigOverrides::default(),
codex_home.path().to_path_buf(),
)?;
assert_eq!(config.realtime_audio.microphone.as_deref(), Some("USB Mic"));
assert_eq!(config.realtime_audio.speaker.as_deref(), Some("Desk Speakers"));
Ok(())
}
}
#[cfg(test)]

View File

@@ -1,5 +1,7 @@
use crate::app_backtrack::BacktrackState;
use crate::app_event::AppEvent;
#[cfg(all(not(target_os = "linux"), feature = "voice-input"))]
use crate::app_event::RealtimeAudioDeviceKind;
use crate::app_event::ExitMode;
#[cfg(target_os = "windows")]
use crate::app_event::WindowsSandboxEnableMode;
@@ -2013,6 +2015,9 @@ impl App {
AppEvent::UpdatePersonality(personality) => {
self.on_update_personality(personality);
}
AppEvent::OpenRealtimeAudioDeviceSelection { kind } => {
self.chat_widget.open_realtime_audio_device_selection(kind);
}
AppEvent::OpenReasoningPopup { model } => {
self.chat_widget.open_reasoning_popup(model);
}
@@ -2438,6 +2443,57 @@ impl App {
}
}
}
#[cfg(all(not(target_os = "linux"), feature = "voice-input"))]
AppEvent::PersistRealtimeAudioDeviceSelection { kind, name } => {
let builder = match kind {
RealtimeAudioDeviceKind::Microphone => {
ConfigEditsBuilder::new(&self.config.codex_home)
.set_realtime_microphone(name.as_deref())
}
RealtimeAudioDeviceKind::Speaker => {
ConfigEditsBuilder::new(&self.config.codex_home)
.set_realtime_speaker(name.as_deref())
}
};
match builder.apply().await {
Ok(()) => {
match kind {
RealtimeAudioDeviceKind::Microphone => {
self.config.realtime_audio.microphone = name.clone();
}
RealtimeAudioDeviceKind::Speaker => {
self.config.realtime_audio.speaker = name.clone();
}
}
self.chat_widget.set_realtime_audio_device(kind, name.clone());
if self.chat_widget.realtime_conversation_is_live() {
self.chat_widget.open_realtime_audio_restart_prompt(kind);
} else {
let selection = name.unwrap_or_else(|| "System default".to_string());
self.chat_widget.add_info_message(
format!("Realtime {} set to {selection}", kind.noun()),
None,
);
}
}
Err(err) => {
tracing::error!(
error = %err,
"failed to persist realtime audio selection"
);
self.chat_widget.add_error_message(format!(
"Failed to save realtime {}: {err}",
kind.noun()
));
}
}
}
#[cfg(all(not(target_os = "linux"), feature = "voice-input"))]
AppEvent::RestartRealtimeAudioDevice { kind } => {
self.chat_widget.restart_realtime_audio_device(kind);
}
AppEvent::UpdateAskForApprovalPolicy(policy) => {
self.runtime_approval_policy_override = Some(policy);
if let Err(err) = self.config.permissions.approval_policy.set(policy) {

View File

@@ -29,6 +29,29 @@ use codex_protocol::openai_models::ReasoningEffort;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::SandboxPolicy;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum RealtimeAudioDeviceKind {
Microphone,
Speaker,
}
impl RealtimeAudioDeviceKind {
pub(crate) fn title(self) -> &'static str {
match self {
Self::Microphone => "Microphone",
Self::Speaker => "Speaker",
}
}
#[cfg(all(not(target_os = "linux"), feature = "voice-input"))]
pub(crate) fn noun(self) -> &'static str {
match self {
Self::Microphone => "microphone",
Self::Speaker => "speaker",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[cfg_attr(not(target_os = "windows"), allow(dead_code))]
pub(crate) enum WindowsSandboxEnableMode {
@@ -166,6 +189,24 @@ pub(crate) enum AppEvent {
personality: Personality,
},
/// Open the device picker for a realtime microphone or speaker.
OpenRealtimeAudioDeviceSelection {
kind: RealtimeAudioDeviceKind,
},
/// Persist the selected realtime microphone or speaker to top-level config.
#[cfg(all(not(target_os = "linux"), feature = "voice-input"))]
PersistRealtimeAudioDeviceSelection {
kind: RealtimeAudioDeviceKind,
name: Option<String>,
},
/// Restart the selected realtime microphone or speaker locally.
#[cfg(all(not(target_os = "linux"), feature = "voice-input"))]
RestartRealtimeAudioDevice {
kind: RealtimeAudioDeviceKind,
},
/// Open the reasoning selection popup after picking a model.
OpenReasoningPopup {
model: ModelPreset,

View File

@@ -0,0 +1,123 @@
use codex_core::config::Config;
use cpal::traits::DeviceTrait;
use cpal::traits::HostTrait;
use tracing::warn;
use crate::app_event::RealtimeAudioDeviceKind;
pub(crate) fn list_realtime_audio_device_names(
kind: RealtimeAudioDeviceKind,
) -> Result<Vec<String>, String> {
let host = cpal::default_host();
let mut device_names = Vec::new();
for device in devices(&host, kind)? {
let Ok(name) = device.name() else {
continue;
};
if !device_names.contains(&name) {
device_names.push(name);
}
}
Ok(device_names)
}
pub(crate) fn select_configured_input_device_and_config(
config: &Config,
) -> Result<(cpal::Device, cpal::SupportedStreamConfig), String> {
select_device_and_config(RealtimeAudioDeviceKind::Microphone, config)
}
pub(crate) fn select_configured_output_device_and_config(
config: &Config,
) -> Result<(cpal::Device, cpal::SupportedStreamConfig), String> {
select_device_and_config(RealtimeAudioDeviceKind::Speaker, config)
}
fn select_device_and_config(
kind: RealtimeAudioDeviceKind,
config: &Config,
) -> Result<(cpal::Device, cpal::SupportedStreamConfig), String> {
let host = cpal::default_host();
let configured_name = configured_name(kind, config);
let selected = configured_name
.and_then(|name| find_device_by_name(&host, kind, name))
.or_else(|| {
let default_device = default_device(&host, kind);
if let Some(name) = configured_name && default_device.is_some() {
warn!(
"configured {} audio device `{name}` was unavailable; falling back to system default",
kind.noun()
);
}
default_device
})
.ok_or_else(|| missing_device_error(kind, configured_name))?;
let stream_config = default_config(&selected, kind)?;
Ok((selected, stream_config))
}
fn configured_name(kind: RealtimeAudioDeviceKind, config: &Config) -> Option<&str> {
match kind {
RealtimeAudioDeviceKind::Microphone => config.realtime_audio.microphone.as_deref(),
RealtimeAudioDeviceKind::Speaker => config.realtime_audio.speaker.as_deref(),
}
}
fn find_device_by_name(
host: &cpal::Host,
kind: RealtimeAudioDeviceKind,
name: &str,
) -> Option<cpal::Device> {
let devices = devices(host, kind).ok()?;
devices
.into_iter()
.find(|device| device.name().ok().as_deref() == Some(name))
}
fn devices(host: &cpal::Host, kind: RealtimeAudioDeviceKind) -> Result<Vec<cpal::Device>, String> {
match kind {
RealtimeAudioDeviceKind::Microphone => host
.input_devices()
.map(|devices| devices.collect())
.map_err(|err| format!("failed to enumerate input audio devices: {err}")),
RealtimeAudioDeviceKind::Speaker => host
.output_devices()
.map(|devices| devices.collect())
.map_err(|err| format!("failed to enumerate output audio devices: {err}")),
}
}
fn default_device(host: &cpal::Host, kind: RealtimeAudioDeviceKind) -> Option<cpal::Device> {
match kind {
RealtimeAudioDeviceKind::Microphone => host.default_input_device(),
RealtimeAudioDeviceKind::Speaker => host.default_output_device(),
}
}
fn default_config(
device: &cpal::Device,
kind: RealtimeAudioDeviceKind,
) -> Result<cpal::SupportedStreamConfig, String> {
match kind {
RealtimeAudioDeviceKind::Microphone => device
.default_input_config()
.map_err(|err| format!("failed to get default input config: {err}")),
RealtimeAudioDeviceKind::Speaker => device
.default_output_config()
.map_err(|err| format!("failed to get default output config: {err}")),
}
}
fn missing_device_error(kind: RealtimeAudioDeviceKind, configured_name: Option<&str>) -> String {
match (kind, configured_name) {
(RealtimeAudioDeviceKind::Microphone, Some(name)) => {
format!("configured microphone `{name}` was unavailable and no default input audio device was found")
}
(RealtimeAudioDeviceKind::Speaker, Some(name)) => {
format!("configured speaker `{name}` was unavailable and no default output audio device was found")
}
(RealtimeAudioDeviceKind::Microphone, None) => "no input audio device available".to_string(),
(RealtimeAudioDeviceKind::Speaker, None) => "no output audio device available".to_string(),
}
}

View File

@@ -390,6 +390,7 @@ pub(crate) struct ChatComposer {
connectors_enabled: bool,
personality_command_enabled: bool,
realtime_conversation_enabled: bool,
audio_device_selection_enabled: bool,
windows_degraded_sandbox_active: bool,
status_line_value: Option<Line<'static>>,
status_line_enabled: bool,
@@ -496,6 +497,7 @@ impl ChatComposer {
connectors_enabled: false,
personality_command_enabled: false,
realtime_conversation_enabled: false,
audio_device_selection_enabled: false,
windows_degraded_sandbox_active: false,
status_line_value: None,
status_line_enabled: false,
@@ -584,6 +586,10 @@ impl ChatComposer {
self.realtime_conversation_enabled = enabled;
}
pub fn set_audio_device_selection_enabled(&mut self, enabled: bool) {
self.audio_device_selection_enabled = enabled;
}
pub fn set_voice_transcription_enabled(&mut self, enabled: bool) {
self.voice_state.transcription_enabled = enabled;
if !enabled {
@@ -2267,6 +2273,7 @@ impl ChatComposer {
self.connectors_enabled,
self.personality_command_enabled,
self.realtime_conversation_enabled,
self.audio_device_selection_enabled,
self.windows_degraded_sandbox_active,
)
.is_some();
@@ -2467,6 +2474,7 @@ impl ChatComposer {
self.connectors_enabled,
self.personality_command_enabled,
self.realtime_conversation_enabled,
self.audio_device_selection_enabled,
self.windows_degraded_sandbox_active,
)
{
@@ -2502,6 +2510,7 @@ impl ChatComposer {
self.connectors_enabled,
self.personality_command_enabled,
self.realtime_conversation_enabled,
self.audio_device_selection_enabled,
self.windows_degraded_sandbox_active,
)?;
@@ -3335,6 +3344,7 @@ impl ChatComposer {
self.connectors_enabled,
self.personality_command_enabled,
self.realtime_conversation_enabled,
self.audio_device_selection_enabled,
self.windows_degraded_sandbox_active,
)
.is_some();
@@ -3397,6 +3407,7 @@ impl ChatComposer {
self.connectors_enabled,
self.personality_command_enabled,
self.realtime_conversation_enabled,
self.audio_device_selection_enabled,
self.windows_degraded_sandbox_active,
) {
return true;
@@ -3451,6 +3462,7 @@ impl ChatComposer {
let connectors_enabled = self.connectors_enabled;
let personality_command_enabled = self.personality_command_enabled;
let realtime_conversation_enabled = self.realtime_conversation_enabled;
let audio_device_selection_enabled = self.audio_device_selection_enabled;
let mut command_popup = CommandPopup::new(
self.custom_prompts.clone(),
CommandPopupFlags {
@@ -3458,6 +3470,7 @@ impl ChatComposer {
connectors_enabled,
personality_command_enabled,
realtime_conversation_enabled,
audio_device_selection_enabled,
windows_degraded_sandbox_active: self.windows_degraded_sandbox_active,
},
);

View File

@@ -40,6 +40,7 @@ pub(crate) struct CommandPopupFlags {
pub(crate) connectors_enabled: bool,
pub(crate) personality_command_enabled: bool,
pub(crate) realtime_conversation_enabled: bool,
pub(crate) audio_device_selection_enabled: bool,
pub(crate) windows_degraded_sandbox_active: bool,
}
@@ -51,6 +52,7 @@ impl CommandPopup {
flags.connectors_enabled,
flags.personality_command_enabled,
flags.realtime_conversation_enabled,
flags.audio_device_selection_enabled,
flags.windows_degraded_sandbox_active,
)
.into_iter()
@@ -498,6 +500,7 @@ mod tests {
connectors_enabled: false,
personality_command_enabled: true,
realtime_conversation_enabled: false,
audio_device_selection_enabled: false,
windows_degraded_sandbox_active: false,
},
);
@@ -518,6 +521,7 @@ mod tests {
connectors_enabled: false,
personality_command_enabled: true,
realtime_conversation_enabled: false,
audio_device_selection_enabled: false,
windows_degraded_sandbox_active: false,
},
);
@@ -538,6 +542,7 @@ mod tests {
connectors_enabled: false,
personality_command_enabled: false,
realtime_conversation_enabled: false,
audio_device_selection_enabled: false,
windows_degraded_sandbox_active: false,
},
);
@@ -566,6 +571,7 @@ mod tests {
connectors_enabled: false,
personality_command_enabled: true,
realtime_conversation_enabled: false,
audio_device_selection_enabled: false,
windows_degraded_sandbox_active: false,
},
);
@@ -577,6 +583,36 @@ mod tests {
}
}
#[test]
fn audio_command_hidden_when_audio_device_selection_is_disabled() {
let mut popup = CommandPopup::new(
Vec::new(),
CommandPopupFlags {
collaboration_modes_enabled: false,
connectors_enabled: false,
personality_command_enabled: true,
realtime_conversation_enabled: true,
audio_device_selection_enabled: false,
windows_degraded_sandbox_active: false,
},
);
popup.on_composer_text_change("/aud".to_string());
let cmds: Vec<&str> = popup
.filtered_items()
.into_iter()
.filter_map(|item| match item {
CommandItem::Builtin(cmd) => Some(cmd.command()),
CommandItem::UserPrompt(_) => None,
})
.collect();
assert!(
!cmds.contains(&"audio"),
"expected '/audio' to be hidden when audio device selection is disabled, got {cmds:?}"
);
}
#[test]
fn debug_commands_are_hidden_from_popup() {
let popup = CommandPopup::new(Vec::new(), CommandPopupFlags::default());

View File

@@ -302,6 +302,11 @@ impl BottomPane {
self.request_redraw();
}
pub fn set_audio_device_selection_enabled(&mut self, enabled: bool) {
self.composer.set_audio_device_selection_enabled(enabled);
self.request_redraw();
}
pub fn set_voice_transcription_enabled(&mut self, enabled: bool) {
self.composer.set_voice_transcription_enabled(enabled);
self.request_redraw();

View File

@@ -14,6 +14,7 @@ pub(crate) fn builtins_for_input(
connectors_enabled: bool,
personality_command_enabled: bool,
realtime_conversation_enabled: bool,
audio_device_selection_enabled: bool,
allow_elevate_sandbox: bool,
) -> Vec<(&'static str, SlashCommand)> {
built_in_slash_commands()
@@ -26,6 +27,7 @@ pub(crate) fn builtins_for_input(
.filter(|(_, cmd)| connectors_enabled || *cmd != SlashCommand::Apps)
.filter(|(_, cmd)| personality_command_enabled || *cmd != SlashCommand::Personality)
.filter(|(_, cmd)| realtime_conversation_enabled || *cmd != SlashCommand::Realtime)
.filter(|(_, cmd)| audio_device_selection_enabled || *cmd != SlashCommand::Audio)
.collect()
}
@@ -36,6 +38,7 @@ pub(crate) fn find_builtin_command(
connectors_enabled: bool,
personality_command_enabled: bool,
realtime_conversation_enabled: bool,
audio_device_selection_enabled: bool,
allow_elevate_sandbox: bool,
) -> Option<SlashCommand> {
builtins_for_input(
@@ -43,6 +46,7 @@ pub(crate) fn find_builtin_command(
connectors_enabled,
personality_command_enabled,
realtime_conversation_enabled,
audio_device_selection_enabled,
allow_elevate_sandbox,
)
.into_iter()
@@ -57,6 +61,7 @@ pub(crate) fn has_builtin_prefix(
connectors_enabled: bool,
personality_command_enabled: bool,
realtime_conversation_enabled: bool,
audio_device_selection_enabled: bool,
allow_elevate_sandbox: bool,
) -> bool {
builtins_for_input(
@@ -64,6 +69,7 @@ pub(crate) fn has_builtin_prefix(
connectors_enabled,
personality_command_enabled,
realtime_conversation_enabled,
audio_device_selection_enabled,
allow_elevate_sandbox,
)
.into_iter()
@@ -77,14 +83,14 @@ mod tests {
#[test]
fn debug_command_still_resolves_for_dispatch() {
let cmd = find_builtin_command("debug-config", true, true, true, false, false);
let cmd = find_builtin_command("debug-config", true, true, true, false, false, false);
assert_eq!(cmd, Some(SlashCommand::DebugConfig));
}
#[test]
fn clear_command_resolves_for_dispatch() {
assert_eq!(
find_builtin_command("clear", true, true, true, false, false),
find_builtin_command("clear", true, true, true, false, false, false),
Some(SlashCommand::Clear)
);
}
@@ -92,7 +98,23 @@ mod tests {
#[test]
fn realtime_command_is_hidden_when_realtime_is_disabled() {
assert_eq!(
find_builtin_command("realtime", true, true, true, false, false),
find_builtin_command("realtime", true, true, true, false, true, false),
None
);
}
#[test]
fn audio_command_is_hidden_when_realtime_is_disabled() {
assert_eq!(
find_builtin_command("audio", true, true, true, false, false, false),
None
);
}
#[test]
fn audio_command_is_hidden_when_audio_device_selection_is_disabled() {
assert_eq!(
find_builtin_command("audio", true, true, true, true, false, false),
None
);
}

View File

@@ -37,6 +37,9 @@ use std::sync::atomic::Ordering;
use std::time::Duration;
use std::time::Instant;
use crate::app_event::RealtimeAudioDeviceKind;
#[cfg(all(not(target_os = "linux"), feature = "voice-input"))]
use crate::audio_device::list_realtime_audio_device_names;
use crate::bottom_pane::StatusLineItem;
use crate::bottom_pane::StatusLineSetupView;
use crate::status::RateLimitWindowDisplay;
@@ -855,6 +858,10 @@ impl ChatWidget {
&& cfg!(not(target_os = "linux"))
}
fn realtime_audio_device_selection_enabled(&self) -> bool {
self.realtime_conversation_enabled() && cfg!(feature = "voice-input")
}
/// Synchronize the bottom-pane "task running" indicator with the current lifecycles.
///
/// The bottom pane only has one running flag, but this module treats it as a derived state of
@@ -2885,6 +2892,9 @@ impl ChatWidget {
widget
.bottom_pane
.set_realtime_conversation_enabled(widget.realtime_conversation_enabled());
widget
.bottom_pane
.set_audio_device_selection_enabled(widget.realtime_audio_device_selection_enabled());
widget
.bottom_pane
.set_status_line_enabled(!widget.configured_status_line_items().is_empty());
@@ -3062,6 +3072,9 @@ impl ChatWidget {
widget
.bottom_pane
.set_realtime_conversation_enabled(widget.realtime_conversation_enabled());
widget
.bottom_pane
.set_audio_device_selection_enabled(widget.realtime_audio_device_selection_enabled());
widget
.bottom_pane
.set_status_line_enabled(!widget.configured_status_line_items().is_empty());
@@ -3228,6 +3241,9 @@ impl ChatWidget {
widget
.bottom_pane
.set_realtime_conversation_enabled(widget.realtime_conversation_enabled());
widget
.bottom_pane
.set_audio_device_selection_enabled(widget.realtime_audio_device_selection_enabled());
widget
.bottom_pane
.set_status_line_enabled(!widget.configured_status_line_items().is_empty());
@@ -3540,6 +3556,12 @@ impl ChatWidget {
self.start_realtime_conversation();
}
}
SlashCommand::Audio => {
if !self.realtime_audio_device_selection_enabled() {
return;
}
self.open_realtime_audio_popup();
}
SlashCommand::Personality => {
self.open_personality_popup();
}
@@ -5279,6 +5301,167 @@ impl ChatWidget {
});
}
pub(crate) fn open_realtime_audio_popup(&mut self) {
let items = [
RealtimeAudioDeviceKind::Microphone,
RealtimeAudioDeviceKind::Speaker,
]
.into_iter()
.map(|kind| {
let description = Some(format!(
"Current: {}",
self.current_realtime_audio_selection_label(kind)
));
let actions: Vec<SelectionAction> = vec![Box::new(move |tx| {
tx.send(AppEvent::OpenRealtimeAudioDeviceSelection { kind });
})];
SelectionItem {
name: kind.title().to_string(),
description,
actions,
dismiss_on_select: true,
..Default::default()
}
})
.collect();
let mut header = ColumnRenderable::new();
header.push(Line::from("Realtime audio".bold()));
header.push(Line::from(
"Choose microphone and speaker for realtime voice.".dim(),
));
self.bottom_pane.show_selection_view(SelectionViewParams {
header: Box::new(header),
footer_hint: Some(standard_popup_hint_line()),
items,
..Default::default()
});
}
#[cfg(all(not(target_os = "linux"), feature = "voice-input"))]
pub(crate) fn open_realtime_audio_device_selection(&mut self, kind: RealtimeAudioDeviceKind) {
match list_realtime_audio_device_names(kind) {
Ok(device_names) => {
self.open_realtime_audio_device_selection_with_names(kind, device_names);
}
Err(err) => {
self.add_error_message(format!(
"Failed to load realtime {} devices: {err}",
kind.noun()
));
}
}
}
#[cfg(any(target_os = "linux", not(feature = "voice-input")))]
pub(crate) fn open_realtime_audio_device_selection(&mut self, kind: RealtimeAudioDeviceKind) {
let _ = kind;
}
#[cfg(all(not(target_os = "linux"), feature = "voice-input"))]
fn open_realtime_audio_device_selection_with_names(
&mut self,
kind: RealtimeAudioDeviceKind,
device_names: Vec<String>,
) {
let current_selection = self.current_realtime_audio_device_name(kind);
let current_available = current_selection
.as_deref()
.is_some_and(|name| device_names.iter().any(|device_name| device_name == name));
let mut items = vec![SelectionItem {
name: "System default".to_string(),
description: Some("Use your operating system default device.".to_string()),
is_current: current_selection.is_none(),
actions: vec![Box::new(move |tx| {
tx.send(AppEvent::PersistRealtimeAudioDeviceSelection { kind, name: None });
})],
dismiss_on_select: true,
..Default::default()
}];
if let Some(selection) = current_selection.as_deref()
&& !current_available
{
items.push(SelectionItem {
name: format!("Unavailable: {selection}"),
description: Some("Configured device is not currently available.".to_string()),
is_current: true,
is_disabled: true,
disabled_reason: Some("Reconnect the device or choose another one.".to_string()),
..Default::default()
});
}
items.extend(device_names.into_iter().map(|device_name| {
let persisted_name = device_name.clone();
let actions: Vec<SelectionAction> = vec![Box::new(move |tx| {
tx.send(AppEvent::PersistRealtimeAudioDeviceSelection {
kind,
name: Some(persisted_name.clone()),
});
})];
SelectionItem {
is_current: current_selection.as_deref() == Some(device_name.as_str()),
name: device_name,
actions,
dismiss_on_select: true,
..Default::default()
}
}));
let mut header = ColumnRenderable::new();
header.push(Line::from(format!("Select {}", kind.title()).bold()));
header.push(Line::from(
"Saved devices apply to realtime voice only.".dim(),
));
self.bottom_pane.show_selection_view(SelectionViewParams {
header: Box::new(header),
footer_hint: Some(standard_popup_hint_line()),
items,
..Default::default()
});
}
#[cfg(all(not(target_os = "linux"), feature = "voice-input"))]
pub(crate) fn open_realtime_audio_restart_prompt(&mut self, kind: RealtimeAudioDeviceKind) {
let restart_actions: Vec<SelectionAction> = vec![Box::new(move |tx| {
tx.send(AppEvent::RestartRealtimeAudioDevice { kind });
})];
let items = vec![
SelectionItem {
name: "Restart now".to_string(),
description: Some(format!("Restart local {} audio now.", kind.noun())),
actions: restart_actions,
dismiss_on_select: true,
..Default::default()
},
SelectionItem {
name: "Apply later".to_string(),
description: Some(format!(
"Keep the current {} until local audio starts again.",
kind.noun()
)),
dismiss_on_select: true,
..Default::default()
},
];
let mut header = ColumnRenderable::new();
header.push(Line::from(format!("Restart {} now?", kind.title()).bold()));
header.push(Line::from(
"Configuration is saved. Restart local audio to use it immediately.".dim(),
));
self.bottom_pane.show_selection_view(SelectionViewParams {
header: Box::new(header),
footer_hint: Some(standard_popup_hint_line()),
items,
..Default::default()
});
}
fn model_menu_header(&self, title: &str, subtitle: &str) -> Box<dyn Renderable> {
let title = title.to_string();
let subtitle = subtitle.to_string();
@@ -6535,6 +6718,8 @@ impl ChatWidget {
let realtime_conversation_enabled = self.realtime_conversation_enabled();
self.bottom_pane
.set_realtime_conversation_enabled(realtime_conversation_enabled);
self.bottom_pane
.set_audio_device_selection_enabled(self.realtime_audio_device_selection_enabled());
if !realtime_conversation_enabled && self.realtime_conversation.is_live() {
self.request_realtime_conversation_close(Some(
"Realtime voice mode was closed because the feature was disabled.".to_string(),
@@ -6624,6 +6809,18 @@ impl ChatWidget {
self.config.personality = Some(personality);
}
#[cfg(all(not(target_os = "linux"), feature = "voice-input"))]
pub(crate) fn set_realtime_audio_device(
&mut self,
kind: RealtimeAudioDeviceKind,
name: Option<String>,
) {
match kind {
RealtimeAudioDeviceKind::Microphone => self.config.realtime_audio.microphone = name,
RealtimeAudioDeviceKind::Speaker => self.config.realtime_audio.speaker = name,
}
}
/// Set the syntax theme override in the widget's config copy.
pub(crate) fn set_tui_theme(&mut self, theme: Option<String>) {
self.config.tui_theme = theme;
@@ -6652,6 +6849,23 @@ impl ChatWidget {
.unwrap_or_else(|| self.current_collaboration_mode.model())
}
#[cfg(all(not(target_os = "linux"), feature = "voice-input"))]
pub(crate) fn realtime_conversation_is_live(&self) -> bool {
self.realtime_conversation.is_live()
}
fn current_realtime_audio_device_name(&self, kind: RealtimeAudioDeviceKind) -> Option<String> {
match kind {
RealtimeAudioDeviceKind::Microphone => self.config.realtime_audio.microphone.clone(),
RealtimeAudioDeviceKind::Speaker => self.config.realtime_audio.speaker.clone(),
}
}
fn current_realtime_audio_selection_label(&self, kind: RealtimeAudioDeviceKind) -> String {
self.current_realtime_audio_device_name(kind)
.unwrap_or_else(|| "System default".to_string())
}
fn sync_personality_command_enabled(&mut self) {
self.bottom_pane
.set_personality_command_enabled(self.config.features.enabled(Feature::Personality));

View File

@@ -207,7 +207,7 @@ impl ChatWidget {
{
if self.realtime_conversation.audio_player.is_none() {
self.realtime_conversation.audio_player =
crate::voice::RealtimeAudioPlayer::start().ok();
crate::voice::RealtimeAudioPlayer::start(&self.config).ok();
}
if let Some(player) = &self.realtime_conversation.audio_player
&& let Err(err) = player.enqueue_frame(frame)
@@ -231,7 +231,10 @@ impl ChatWidget {
self.realtime_conversation.meter_placeholder_id = Some(placeholder_id.clone());
self.request_redraw();
let capture = match crate::voice::VoiceCapture::start_realtime(self.app_event_tx.clone()) {
let capture = match crate::voice::VoiceCapture::start_realtime(
&self.config,
self.app_event_tx.clone(),
) {
Ok(capture) => capture,
Err(err) => {
self.remove_transcription_placeholder(&placeholder_id);
@@ -250,7 +253,7 @@ impl ChatWidget {
self.realtime_conversation.capture = Some(capture);
if self.realtime_conversation.audio_player.is_none() {
self.realtime_conversation.audio_player =
crate::voice::RealtimeAudioPlayer::start().ok();
crate::voice::RealtimeAudioPlayer::start(&self.config).ok();
}
std::thread::spawn(move || {
@@ -275,8 +278,45 @@ impl ChatWidget {
#[cfg(target_os = "linux")]
fn start_realtime_local_audio(&mut self) {}
#[cfg(all(not(target_os = "linux"), feature = "voice-input"))]
pub(crate) fn restart_realtime_audio_device(&mut self, kind: RealtimeAudioDeviceKind) {
if !self.realtime_conversation.is_live() {
return;
}
match kind {
RealtimeAudioDeviceKind::Microphone => {
self.stop_realtime_microphone();
self.start_realtime_local_audio();
}
RealtimeAudioDeviceKind::Speaker => {
self.stop_realtime_speaker();
match crate::voice::RealtimeAudioPlayer::start(&self.config) {
Ok(player) => {
self.realtime_conversation.audio_player = Some(player);
}
Err(err) => {
self.add_error_message(format!("Failed to start speaker output: {err}"));
}
}
}
}
self.request_redraw();
}
#[cfg(not(target_os = "linux"))]
fn stop_realtime_local_audio(&mut self) {
self.stop_realtime_microphone();
self.stop_realtime_speaker();
}
#[cfg(target_os = "linux")]
fn stop_realtime_local_audio(&mut self) {
self.realtime_conversation.meter_placeholder_id = None;
}
#[cfg(not(target_os = "linux"))]
fn stop_realtime_microphone(&mut self) {
if let Some(flag) = self.realtime_conversation.capture_stop_flag.take() {
flag.store(true, Ordering::Relaxed);
}
@@ -286,13 +326,12 @@ impl ChatWidget {
if let Some(id) = self.realtime_conversation.meter_placeholder_id.take() {
self.remove_transcription_placeholder(&id);
}
}
#[cfg(not(target_os = "linux"))]
fn stop_realtime_speaker(&mut self) {
if let Some(player) = self.realtime_conversation.audio_player.take() {
player.clear();
}
}
#[cfg(target_os = "linux")]
fn stop_realtime_local_audio(&mut self) {
self.realtime_conversation.meter_placeholder_id = None;
}
}

View File

@@ -0,0 +1,11 @@
---
source: tui/src/chatwidget/tests.rs
expression: popup
---
Realtime audio
Choose microphone and speaker for realtime voice.
1. Microphone Current: System default
2. Speaker Current: System default
Press enter to confirm or esc to go back

View File

@@ -0,0 +1,18 @@
---
source: tui/src/chatwidget/tests.rs
expression: popup
---
Select Microphone
Saved devices apply to realtime voice only.
1. System default Use your operating system
default device.
2. Unavailable: Studio Mic (current) (disabled) Configured device is not
currently available.
(disabled: Reconnect the
device or choose another
one.)
3. Built-in Mic
4. USB Mic
Press enter to confirm or esc to go back

View File

@@ -6,6 +6,8 @@
use super::*;
use crate::app_event::AppEvent;
#[cfg(all(not(target_os = "linux"), feature = "voice-input"))]
use crate::app_event::RealtimeAudioDeviceKind;
use crate::app_event::ExitMode;
use crate::app_event_sender::AppEventSender;
use crate::bottom_pane::FeedbackAudience;
@@ -5990,6 +5992,52 @@ async fn personality_selection_popup_snapshot() {
assert_snapshot!("personality_selection_popup", popup);
}
#[cfg(all(not(target_os = "linux"), feature = "voice-input"))]
#[tokio::test]
async fn realtime_audio_selection_popup_snapshot() {
let (mut chat, _rx, _op_rx) = make_chatwidget_manual(Some("gpt-5.2-codex")).await;
chat.open_realtime_audio_popup();
let popup = render_bottom_popup(&chat, 80);
assert_snapshot!("realtime_audio_selection_popup", popup);
}
#[cfg(all(not(target_os = "linux"), feature = "voice-input"))]
#[tokio::test]
async fn realtime_microphone_picker_popup_snapshot() {
let (mut chat, _rx, _op_rx) = make_chatwidget_manual(Some("gpt-5.2-codex")).await;
chat.config.realtime_audio.microphone = Some("Studio Mic".to_string());
chat.open_realtime_audio_device_selection_with_names(
RealtimeAudioDeviceKind::Microphone,
vec!["Built-in Mic".to_string(), "USB Mic".to_string()],
);
let popup = render_bottom_popup(&chat, 80);
assert_snapshot!("realtime_microphone_picker_popup", popup);
}
#[cfg(all(not(target_os = "linux"), feature = "voice-input"))]
#[tokio::test]
async fn realtime_audio_picker_emits_persist_event() {
let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(Some("gpt-5.2-codex")).await;
chat.open_realtime_audio_device_selection_with_names(
RealtimeAudioDeviceKind::Speaker,
vec!["Desk Speakers".to_string(), "Headphones".to_string()],
);
chat.handle_key_event(KeyEvent::new(KeyCode::Down, KeyModifiers::NONE));
chat.handle_key_event(KeyEvent::new(KeyCode::Down, KeyModifiers::NONE));
chat.handle_key_event(KeyEvent::new(KeyCode::Enter, KeyModifiers::NONE));
assert_matches!(
rx.try_recv(),
Ok(AppEvent::PersistRealtimeAudioDeviceSelection {
kind: RealtimeAudioDeviceKind::Speaker,
name: Some(name),
}) if name == "Headphones"
);
}
#[tokio::test]
async fn model_picker_hides_show_in_picker_false_models_from_cache() {
let (mut chat, _rx, _op_rx) = make_chatwidget_manual(Some("test-visible-model")).await;

View File

@@ -61,6 +61,8 @@ mod app_backtrack;
mod app_event;
mod app_event_sender;
mod ascii_animation;
#[cfg(all(not(target_os = "linux"), feature = "voice-input"))]
mod audio_device;
mod bottom_pane;
mod chatwidget;
mod cli;
@@ -121,6 +123,7 @@ mod voice;
mod voice {
use crate::app_event::AppEvent;
use crate::app_event_sender::AppEventSender;
use codex_core::config::Config;
use codex_protocol::protocol::RealtimeAudioFrame;
use std::sync::Arc;
use std::sync::Mutex;
@@ -144,7 +147,7 @@ mod voice {
Err("voice input is unavailable in this build".to_string())
}
pub fn start_realtime(_tx: AppEventSender) -> Result<Self, String> {
pub fn start_realtime(_config: &Config, _tx: AppEventSender) -> Result<Self, String> {
Err("voice input is unavailable in this build".to_string())
}
@@ -184,7 +187,7 @@ mod voice {
}
impl RealtimeAudioPlayer {
pub(crate) fn start() -> Result<Self, String> {
pub(crate) fn start(_config: &Config) -> Result<Self, String> {
Err("voice output is unavailable in this build".to_string())
}

View File

@@ -51,6 +51,7 @@ pub enum SlashCommand {
Clear,
Personality,
Realtime,
Audio,
TestApproval,
// Debugging commands.
#[strum(serialize = "debug-m-drop")]
@@ -89,6 +90,7 @@ impl SlashCommand {
SlashCommand::Model => "choose what model and reasoning effort to use",
SlashCommand::Personality => "choose a communication style for Codex",
SlashCommand::Realtime => "toggle realtime voice mode (experimental)",
SlashCommand::Audio => "select microphone/speaker for realtime voice",
SlashCommand::Plan => "switch to Plan mode",
SlashCommand::Collab => "change collaboration mode (experimental)",
SlashCommand::Agent => "switch the active agent thread",
@@ -163,6 +165,7 @@ impl SlashCommand {
SlashCommand::Rollout => true,
SlashCommand::TestApproval => true,
SlashCommand::Realtime => true,
SlashCommand::Audio => true,
SlashCommand::Collab => true,
SlashCommand::Agent => true,
SlashCommand::Statusline => false,

View File

@@ -51,7 +51,7 @@ pub struct VoiceCapture {
impl VoiceCapture {
pub fn start() -> Result<Self, String> {
let (device, config) = select_input_device_and_config()?;
let (device, config) = select_default_input_device_and_config()?;
let sample_rate = config.sample_rate().0;
let channels = config.channels();
@@ -74,8 +74,8 @@ impl VoiceCapture {
})
}
pub fn start_realtime(tx: AppEventSender) -> Result<Self, String> {
let (device, config) = select_input_device_and_config()?;
pub fn start_realtime(config: &Config, tx: AppEventSender) -> Result<Self, String> {
let (device, config) = select_realtime_input_device_and_config(config)?;
let sample_rate = config.sample_rate().0;
let channels = config.channels();
@@ -262,7 +262,8 @@ pub fn transcribe_async(
// Voice input helpers
// -------------------------
fn select_input_device_and_config() -> Result<(cpal::Device, cpal::SupportedStreamConfig), String> {
fn select_default_input_device_and_config()
-> Result<(cpal::Device, cpal::SupportedStreamConfig), String> {
let host = cpal::default_host();
let device = host
.default_input_device()
@@ -273,6 +274,12 @@ fn select_input_device_and_config() -> Result<(cpal::Device, cpal::SupportedStre
Ok((device, config))
}
fn select_realtime_input_device_and_config(
config: &Config,
) -> Result<(cpal::Device, cpal::SupportedStreamConfig), String> {
crate::audio_device::select_configured_input_device_and_config(config)
}
fn build_input_stream(
device: &cpal::Device,
config: &cpal::SupportedStreamConfig,
@@ -466,14 +473,9 @@ pub(crate) struct RealtimeAudioPlayer {
}
impl RealtimeAudioPlayer {
pub(crate) fn start() -> Result<Self, String> {
let host = cpal::default_host();
let device = host
.default_output_device()
.ok_or_else(|| "no output audio device available".to_string())?;
let config = device
.default_output_config()
.map_err(|e| format!("failed to get default output config: {e}"))?;
pub(crate) fn start(config: &Config) -> Result<Self, String> {
let (device, config) =
crate::audio_device::select_configured_output_device_and_config(config)?;
let output_sample_rate = config.sample_rate().0;
let output_channels = config.channels();
let queue = Arc::new(Mutex::new(VecDeque::new()));