Compare commits

...

6 Commits

Author SHA1 Message Date
Ahmed Ibrahim
6cdc5d450b codex: fix CI failure on PR #13193
Co-authored-by: Codex <noreply@openai.com>
2026-03-01 17:19:08 -07:00
Ahmed Ibrahim
88f4a24dfa codex: fix CI failure on PR #13193
Co-authored-by: Codex <noreply@openai.com>
2026-03-01 17:13:18 -07:00
Ahmed Ibrahim
f574ec3213 Persist stable voice device ids
Co-authored-by: Codex <noreply@openai.com>
2026-03-01 17:09:03 -07:00
Ahmed Ibrahim
c7e3d9affb Document Linux ALSA build requirements
Co-authored-by: Codex <noreply@openai.com>
2026-03-01 17:08:56 -07:00
Ahmed Ibrahim
73178a7c3f codex: fix CI failure on PR #13192
Co-authored-by: Codex <noreply@openai.com>
2026-03-01 16:50:09 -07:00
Ahmed Ibrahim
b337aa00ce Upgrade cpal and enable cross-platform tui voice
Co-authored-by: Codex <noreply@openai.com>
2026-03-01 16:16:15 -07:00
25 changed files with 432 additions and 573 deletions

View File

@@ -55,6 +55,14 @@ jobs:
with:
node-version-file: codex-rs/node-version.txt
- name: Install Linux build dependencies
if: ${{ runner.os == 'Linux' }}
shell: bash
run: |
set -euo pipefail
sudo apt-get update -y
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends libasound2-dev pkg-config libcap-dev
# Some integration tests rely on DotSlash being installed.
# See https://github.com/openai/codex/pull/7617.
- name: Install DotSlash

View File

@@ -190,7 +190,7 @@ jobs:
set -euo pipefail
if command -v apt-get >/dev/null 2>&1; then
sudo apt-get update -y
packages=(pkg-config libcap-dev)
packages=(libasound2-dev pkg-config libcap-dev)
if [[ "${{ matrix.target }}" == 'x86_64-unknown-linux-musl' || "${{ matrix.target }}" == 'aarch64-unknown-linux-musl' ]]; then
packages+=(libubsan1)
fi
@@ -510,7 +510,7 @@ jobs:
set -euo pipefail
if command -v apt-get >/dev/null 2>&1; then
sudo apt-get update -y
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends pkg-config libcap-dev
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends libasound2-dev pkg-config libcap-dev
fi
# Some integration tests rely on DotSlash being installed.
# See https://github.com/openai/codex/pull/7617.

View File

@@ -107,13 +107,13 @@ jobs:
echo "Total RAM: ${total_ram}"
echo "Disk usage:"
df -h .
- name: Install Linux bwrap build dependencies
- name: Install Linux build dependencies
if: ${{ runner.os == 'Linux' }}
shell: bash
run: |
set -euo pipefail
sudo apt-get update -y
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends pkg-config libcap-dev
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends libasound2-dev pkg-config libcap-dev
- name: Install UBSan runtime (musl)
if: ${{ matrix.target == 'x86_64-unknown-linux-musl' || matrix.target == 'aarch64-unknown-linux-musl' }}
shell: bash

View File

@@ -18,7 +18,7 @@ jobs:
run: |
set -euo pipefail
sudo apt-get update -y
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends pkg-config libcap-dev
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends libasound2-dev pkg-config libcap-dev
- name: Setup pnpm
uses: pnpm/action-setup@v4

29
MODULE.bazel.lock generated

File diff suppressed because one or more lines are too long

View File

@@ -42,6 +42,8 @@ Each GitHub Release contains many executables, but in practice, you likely want
Each archive contains a single entry with the platform baked into the name (e.g., `codex-x86_64-unknown-linux-musl`), so you likely want to rename it to `codex` after extracting it.
On Linux, these prebuilt binaries are the recommended install path and should work on most mainstream desktops with ALSA available. If you build Codex from source on Linux instead, install the ALSA development headers first (for example, `libasound2-dev` on Debian/Ubuntu).
</details>
### Using Codex with your ChatGPT plan

237
codex-rs/Cargo.lock generated
View File

@@ -293,9 +293,9 @@ checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
[[package]]
name = "alsa"
version = "0.9.1"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed7572b7ba83a31e20d1b48970ee402d2e3e0537dcfe0a3ff4d6eb7508617d43"
checksum = "812947049edcd670a82cd5c73c3661d2e58468577ba8489de58e1a73c04cbd5d"
dependencies = [
"alsa-sys",
"bitflags 2.10.0",
@@ -305,9 +305,9 @@ dependencies = [
[[package]]
name = "alsa-sys"
version = "0.3.1"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db8fee663d06c4e303404ef5f40488a53e062f89ba8bfed81f42325aafad1527"
checksum = "ad7569085a265dd3f607ebecce7458eaab2132a84393534c95b18dcbc3f31e04"
dependencies = [
"libc",
"pkg-config",
@@ -380,7 +380,7 @@ version = "1.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
dependencies = [
"windows-sys 0.61.2",
"windows-sys 0.60.2",
]
[[package]]
@@ -391,7 +391,7 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
dependencies = [
"anstyle",
"once_cell_polyfill",
"windows-sys 0.61.2",
"windows-sys 0.60.2",
]
[[package]]
@@ -935,24 +935,6 @@ dependencies = [
"serde",
]
[[package]]
name = "bindgen"
version = "0.72.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895"
dependencies = [
"bitflags 2.10.0",
"cexpr",
"clang-sys",
"itertools 0.13.0",
"proc-macro2",
"quote",
"regex",
"rustc-hash 2.1.1",
"shlex",
"syn 2.0.114",
]
[[package]]
name = "bit-set"
version = "0.5.3"
@@ -1190,15 +1172,6 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c"
[[package]]
name = "cexpr"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
dependencies = [
"nom 7.1.3",
]
[[package]]
name = "cfg-if"
version = "1.0.4"
@@ -1283,17 +1256,6 @@ dependencies = [
"zeroize",
]
[[package]]
name = "clang-sys"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
dependencies = [
"glob",
"libc",
"libloading",
]
[[package]]
name = "clap"
version = "4.5.58"
@@ -2856,32 +2818,25 @@ dependencies = [
[[package]]
name = "coreaudio-rs"
version = "0.11.3"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "321077172d79c662f64f5071a03120748d5bb652f5231570141be24cfcd2bace"
checksum = "d15c3c3cee7c087938f7ad1c3098840b3ef1f1bdc7f6e496336c3b1e7a6f3914"
dependencies = [
"bitflags 1.3.2",
"core-foundation-sys",
"coreaudio-sys",
]
[[package]]
name = "coreaudio-sys"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ceec7a6067e62d6f931a2baf6f3a751f4a892595bcec1461a3c94ef9949864b6"
dependencies = [
"bindgen",
"bitflags 2.10.0",
"libc",
"objc2-audio-toolbox",
"objc2-core-audio",
"objc2-core-audio-types",
"objc2-core-foundation",
]
[[package]]
name = "cpal"
version = "0.15.3"
version = "0.17.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "873dab07c8f743075e57f524c583985fbaf745602acbe916a01539364369a779"
checksum = "d8942da362c0f0d895d7cac616263f2f9424edc5687364dfd1d25ef7eba506d7"
dependencies = [
"alsa",
"core-foundation-sys",
"coreaudio-rs",
"dasp_sample",
"jni",
@@ -2890,11 +2845,19 @@ dependencies = [
"mach2",
"ndk",
"ndk-context",
"oboe",
"num-derive",
"num-traits",
"objc2",
"objc2-audio-toolbox",
"objc2-avf-audio",
"objc2-core-audio",
"objc2-core-audio-types",
"objc2-core-foundation",
"objc2-foundation",
"wasm-bindgen",
"wasm-bindgen-futures",
"web-sys",
"windows 0.54.0",
"windows 0.62.2",
]
[[package]]
@@ -3452,7 +3415,7 @@ dependencies = [
"libc",
"option-ext",
"redox_users 0.5.2",
"windows-sys 0.61.2",
"windows-sys 0.59.0",
]
[[package]]
@@ -3697,7 +3660,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
dependencies = [
"libc",
"windows-sys 0.61.2",
"windows-sys 0.52.0",
]
[[package]]
@@ -4228,12 +4191,6 @@ version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7"
[[package]]
name = "glob"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
[[package]]
name = "globset"
version = "0.4.18"
@@ -4692,7 +4649,7 @@ dependencies = [
"js-sys",
"log",
"wasm-bindgen",
"windows-core 0.62.2",
"windows-core 0.58.0",
]
[[package]]
@@ -5090,7 +5047,7 @@ checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46"
dependencies = [
"hermit-abi",
"libc",
"windows-sys 0.61.2",
"windows-sys 0.52.0",
]
[[package]]
@@ -5314,16 +5271,6 @@ dependencies = [
"pkg-config",
]
[[package]]
name = "libloading"
version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
dependencies = [
"cfg-if",
"windows-link",
]
[[package]]
name = "libm"
version = "0.2.16"
@@ -5517,9 +5464,9 @@ dependencies = [
[[package]]
name = "mach2"
version = "0.4.3"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d640282b302c0bb0a2a8e0233ead9035e3bed871f0b7e81fe4a1ec829765db44"
checksum = "6a1b95cd5421ec55b445b5ae102f5ea0e768de1f82bd3001e11f426c269c3aea"
dependencies = [
"libc",
]
@@ -5709,9 +5656,9 @@ dependencies = [
[[package]]
name = "ndk"
version = "0.8.0"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2076a31b7010b17a38c01907c45b945e8f11495ee4dd588309718901b1f7a5b7"
checksum = "c3f42e7bbe13d351b6bead8286a43aac9534b82bd3cc43e47037f012ebfd62d4"
dependencies = [
"bitflags 2.10.0",
"jni-sys",
@@ -5729,9 +5676,9 @@ checksum = "27b02d87554356db9e9a873add8782d4ea6e3e58ea071a9adb9a2e8ddb884a8b"
[[package]]
name = "ndk-sys"
version = "0.5.0+25.2.9519653"
version = "0.6.0+11769913"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c196769dd60fd4f363e11d948139556a344e79d451aeb2fa2fd040738ef7691"
checksum = "ee6cda3051665f1fb8d9e08fc35c96d5a244fb1be711a03b71118828afc9a873"
dependencies = [
"jni-sys",
]
@@ -5846,7 +5793,7 @@ version = "0.50.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
dependencies = [
"windows-sys 0.61.2",
"windows-sys 0.59.0",
]
[[package]]
@@ -6057,6 +6004,31 @@ dependencies = [
"objc2-foundation",
]
[[package]]
name = "objc2-audio-toolbox"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6948501a91121d6399b79abaa33a8aa4ea7857fe019f341b8c23ad6e81b79b08"
dependencies = [
"bitflags 2.10.0",
"libc",
"objc2",
"objc2-core-audio",
"objc2-core-audio-types",
"objc2-core-foundation",
"objc2-foundation",
]
[[package]]
name = "objc2-avf-audio"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13a380031deed8e99db00065c45937da434ca987c034e13b87e4441f9e4090be"
dependencies = [
"objc2",
"objc2-foundation",
]
[[package]]
name = "objc2-cloud-kit"
version = "0.3.2"
@@ -6068,6 +6040,29 @@ dependencies = [
"objc2-foundation",
]
[[package]]
name = "objc2-core-audio"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1eebcea8b0dbff5f7c8504f3107c68fc061a3eb44932051c8cf8a68d969c3b2"
dependencies = [
"dispatch2",
"objc2",
"objc2-core-audio-types",
"objc2-core-foundation",
"objc2-foundation",
]
[[package]]
name = "objc2-core-audio-types"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a89f2ec274a0cf4a32642b2991e8b351a404d290da87bb6a9a9d8632490bd1c"
dependencies = [
"bitflags 2.10.0",
"objc2",
]
[[package]]
name = "objc2-core-data"
version = "0.3.2"
@@ -6085,7 +6080,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536"
dependencies = [
"bitflags 2.10.0",
"block2",
"dispatch2",
"libc",
"objc2",
]
@@ -6216,29 +6213,6 @@ dependencies = [
"memchr",
]
[[package]]
name = "oboe"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8b61bebd49e5d43f5f8cc7ee2891c16e0f41ec7954d36bcb6c14c5e0de867fb"
dependencies = [
"jni",
"ndk",
"ndk-context",
"num-derive",
"num-traits",
"oboe-sys",
]
[[package]]
name = "oboe-sys"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c8bb09a4a2b1d668170cfe0a7d5bc103f8999fb316c98099b6a9939c9f2e79d"
dependencies = [
"cc",
]
[[package]]
name = "oid-registry"
version = "0.8.1"
@@ -6490,7 +6464,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d8fae84b431384b68627d0f9b3b1245fcf9f46f6c0e3dc902e9dce64edd1967"
dependencies = [
"libc",
"windows-sys 0.61.2",
"windows-sys 0.48.0",
]
[[package]]
@@ -7891,7 +7865,7 @@ dependencies = [
"errno",
"libc",
"linux-raw-sys 0.11.0",
"windows-sys 0.61.2",
"windows-sys 0.52.0",
]
[[package]]
@@ -9289,7 +9263,7 @@ dependencies = [
"getrandom 0.3.4",
"once_cell",
"rustix 1.1.3",
"windows-sys 0.61.2",
"windows-sys 0.52.0",
]
[[package]]
@@ -10661,7 +10635,7 @@ version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
dependencies = [
"windows-sys 0.61.2",
"windows-sys 0.48.0",
]
[[package]]
@@ -10670,16 +10644,6 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows"
version = "0.54.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9252e5725dbed82865af151df558e754e4a3c2c30818359eb17465f1346a1b49"
dependencies = [
"windows-core 0.54.0",
"windows-targets 0.52.6",
]
[[package]]
name = "windows"
version = "0.58.0"
@@ -10711,16 +10675,6 @@ dependencies = [
"windows-core 0.62.2",
]
[[package]]
name = "windows-core"
version = "0.54.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "12661b9c89351d684a50a8a643ce5f608e20243b9fb84687800163429f161d65"
dependencies = [
"windows-result 0.1.2",
"windows-targets 0.52.6",
]
[[package]]
name = "windows-core"
version = "0.58.0"
@@ -10829,15 +10783,6 @@ dependencies = [
"windows-strings 0.5.1",
]
[[package]]
name = "windows-result"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e383302e8ec8515204254685643de10811af0ed97ea37210dc26fb0032647f8"
dependencies = [
"windows-targets 0.52.6",
]
[[package]]
name = "windows-result"
version = "0.2.0"

View File

@@ -1,6 +1,6 @@
# Codex CLI (Rust Implementation)
We provide Codex CLI as a standalone, native executable to ensure a zero-dependency install.
We provide Codex CLI as a standalone, native executable. For most users, the simplest install path is a prebuilt package or release binary.
## Installing Codex
@@ -13,6 +13,8 @@ codex
You can also install via Homebrew (`brew install --cask codex`) or download a platform-specific release directly from our [GitHub Releases](https://github.com/openai/codex/releases).
On Linux, the prebuilt release binaries are the recommended path. If you build from source on Linux, install ALSA development headers first (for example, `libasound2-dev` on Debian/Ubuntu).
## Documentation quickstart
- First run with Codex? Start with [`docs/getting-started.md`](../docs/getting-started.md) (links to the walkthrough for prompts, keyboard shortcuts, and session management).

View File

@@ -1588,7 +1588,7 @@
}
],
"default": null,
"description": "Machine-local realtime audio device preferences used by realtime voice."
"description": "Machine-local audio device preferences. `audio.microphone` applies to transcription and realtime voice input. `audio.speaker` applies to realtime voice output."
},
"background_terminal_max_timeout": {
"description": "Maximum poll window for background terminal output (`write_stdin`), in milliseconds. Default: `300000` (5 minutes).",

View File

@@ -872,7 +872,7 @@ impl ConfigEditsBuilder {
self
}
pub fn set_realtime_microphone(mut self, microphone: Option<&str>) -> Self {
pub fn set_audio_microphone(mut self, microphone: Option<&str>) -> Self {
let segments = vec!["audio".to_string(), "microphone".to_string()];
match microphone {
Some(microphone) => self.edits.push(ConfigEdit::SetPath {
@@ -884,7 +884,7 @@ impl ConfigEditsBuilder {
self
}
pub fn set_realtime_speaker(mut self, speaker: Option<&str>) -> Self {
pub fn set_audio_speaker(mut self, speaker: Option<&str>) -> Self {
let segments = vec!["audio".to_string(), "speaker".to_string()];
match speaker {
Some(speaker) => self.edits.push(ConfigEdit::SetPath {
@@ -1893,8 +1893,8 @@ model_reasoning_effort = "high"
let codex_home = tmp.path();
ConfigEditsBuilder::new(codex_home)
.set_realtime_microphone(Some("USB Mic"))
.set_realtime_speaker(Some("Desk Speakers"))
.set_audio_microphone(Some("USB Mic"))
.set_audio_speaker(Some("Desk Speakers"))
.apply_blocking()
.expect("persist realtime audio");
@@ -1914,7 +1914,7 @@ model_reasoning_effort = "high"
);
ConfigEditsBuilder::new(codex_home)
.set_realtime_microphone(None)
.set_audio_microphone(None)
.apply_blocking()
.expect("clear realtime microphone");

View File

@@ -1182,7 +1182,9 @@ pub struct ConfigToml {
/// Base URL for requests to ChatGPT (as opposed to the OpenAI API).
pub chatgpt_base_url: Option<String>,
/// Machine-local realtime audio device preferences used by realtime voice.
/// Machine-local audio device preferences.
/// `audio.microphone` applies to transcription and realtime voice input.
/// `audio.speaker` applies to realtime voice output.
#[serde(default)]
pub audio: Option<RealtimeAudioToml>,
@@ -6132,8 +6134,8 @@ experimental_realtime_ws_backend_prompt = "prompt from config"
let cfg: ConfigToml = toml::from_str(
r#"
[audio]
microphone = "USB Mic"
speaker = "Desk Speakers"
microphone = "input-device-id"
speaker = "output-device-id"
"#,
)
.expect("TOML deserialization should succeed");
@@ -6142,8 +6144,11 @@ speaker = "Desk Speakers"
.audio
.as_ref()
.expect("realtime audio config should be present");
assert_eq!(realtime_audio.microphone.as_deref(), Some("USB Mic"));
assert_eq!(realtime_audio.speaker.as_deref(), Some("Desk Speakers"));
assert_eq!(
realtime_audio.microphone.as_deref(),
Some("input-device-id")
);
assert_eq!(realtime_audio.speaker.as_deref(), Some("output-device-id"));
let codex_home = TempDir::new()?;
let config = Config::load_from_base_config_with_overrides(
@@ -6152,10 +6157,13 @@ speaker = "Desk Speakers"
codex_home.path().to_path_buf(),
)?;
assert_eq!(config.realtime_audio.microphone.as_deref(), Some("USB Mic"));
assert_eq!(
config.realtime_audio.microphone.as_deref(),
Some("input-device-id")
);
assert_eq!(
config.realtime_audio.speaker.as_deref(),
Some("Desk Speakers")
Some("output-device-id")
);
Ok(())
}

View File

@@ -13,12 +13,11 @@ name = "codex_tui"
path = "src/lib.rs"
[features]
default = ["voice-input"]
default = []
# Enable vt100-based tests (emulator) when running with `--features vt100-tests`.
vt100-tests = []
# Gate verbose debug logging inside the TUI implementation.
debug-logs = []
voice-input = ["dep:cpal", "dep:hound"]
[lints]
workspace = true
@@ -103,14 +102,12 @@ unicode-width = { workspace = true }
url = { workspace = true }
webbrowser = { workspace = true }
uuid = { workspace = true }
cpal = { version = "0.17.3" }
hound = { version = "3.5" }
codex-windows-sandbox = { workspace = true }
tokio-util = { workspace = true, features = ["time"] }
[target.'cfg(not(target_os = "linux"))'.dependencies]
cpal = { version = "0.15", optional = true }
hound = { version = "3.5", optional = true }
[target.'cfg(unix)'.dependencies]
libc = { workspace = true }

View File

@@ -1,7 +1,7 @@
use crate::app_backtrack::BacktrackState;
use crate::app_event::AppEvent;
use crate::app_event::ExitMode;
use crate::app_event::RealtimeAudioDeviceKind;
use crate::app_event::VoiceAudioDeviceKind;
#[cfg(target_os = "windows")]
use crate::app_event::WindowsSandboxEnableMode;
use crate::app_event_sender::AppEventSender;
@@ -2228,8 +2228,8 @@ impl App {
AppEvent::UpdatePersonality(personality) => {
self.on_update_personality(personality);
}
AppEvent::OpenRealtimeAudioDeviceSelection { kind } => {
self.chat_widget.open_realtime_audio_device_selection(kind);
AppEvent::OpenVoiceAudioDeviceSelection { kind } => {
self.chat_widget.open_voice_audio_device_selection(kind);
}
AppEvent::OpenReasoningPopup { model } => {
self.chat_widget.open_reasoning_popup(model);
@@ -2656,37 +2656,38 @@ impl App {
}
}
}
AppEvent::PersistRealtimeAudioDeviceSelection { kind, name } => {
AppEvent::PersistVoiceAudioDeviceSelection { kind, device_id } => {
let builder = match kind {
RealtimeAudioDeviceKind::Microphone => {
VoiceAudioDeviceKind::Microphone => {
ConfigEditsBuilder::new(&self.config.codex_home)
.set_realtime_microphone(name.as_deref())
.set_audio_microphone(device_id.as_deref())
}
RealtimeAudioDeviceKind::Speaker => {
VoiceAudioDeviceKind::Speaker => {
ConfigEditsBuilder::new(&self.config.codex_home)
.set_realtime_speaker(name.as_deref())
.set_audio_speaker(device_id.as_deref())
}
};
match builder.apply().await {
Ok(()) => {
match kind {
RealtimeAudioDeviceKind::Microphone => {
self.config.realtime_audio.microphone = name.clone();
VoiceAudioDeviceKind::Microphone => {
self.config.realtime_audio.microphone = device_id.clone();
}
RealtimeAudioDeviceKind::Speaker => {
self.config.realtime_audio.speaker = name.clone();
VoiceAudioDeviceKind::Speaker => {
self.config.realtime_audio.speaker = device_id.clone();
}
}
self.chat_widget
.set_realtime_audio_device(kind, name.clone());
.set_voice_audio_device(kind, device_id.clone());
if self.chat_widget.realtime_conversation_is_live() {
self.chat_widget.open_realtime_audio_restart_prompt(kind);
self.chat_widget.open_voice_audio_restart_prompt(kind);
} else {
let selection = name.unwrap_or_else(|| "System default".to_string());
let selection =
device_id.unwrap_or_else(|| "System default".to_string());
self.chat_widget.add_info_message(
format!("Realtime {} set to {selection}", kind.noun()),
format!("Audio {} set to {selection}", kind.noun()),
None,
);
}
@@ -2694,17 +2695,15 @@ impl App {
Err(err) => {
tracing::error!(
error = %err,
"failed to persist realtime audio selection"
"failed to persist audio device selection"
);
self.chat_widget.add_error_message(format!(
"Failed to save realtime {}: {err}",
kind.noun()
));
self.chat_widget
.add_error_message(format!("Failed to save {}: {err}", kind.noun()));
}
}
}
AppEvent::RestartRealtimeAudioDevice { kind } => {
self.chat_widget.restart_realtime_audio_device(kind);
AppEvent::RestartVoiceAudioDevice { kind } => {
self.chat_widget.restart_voice_audio_device(kind);
}
AppEvent::UpdateAskForApprovalPolicy(policy) => {
self.runtime_approval_policy_override = Some(policy);
@@ -3099,15 +3098,13 @@ impl App {
));
}
},
#[cfg(not(target_os = "linux"))]
AppEvent::TranscriptionComplete { id, text } => {
self.chat_widget.replace_transcription(&id, &text);
}
#[cfg(not(target_os = "linux"))]
AppEvent::TranscriptionFailed { id, error: _ } => {
AppEvent::TranscriptionFailed { id, error } => {
tracing::error!("voice transcription failed: {error}");
self.chat_widget.remove_transcription_placeholder(&id);
}
#[cfg(not(target_os = "linux"))]
AppEvent::UpdateRecordingMeter { id, text } => {
// Update in place to preserve the element id for subsequent frames.
let updated = self.chat_widget.update_transcription_in_place(&id, &text);

View File

@@ -30,12 +30,12 @@ use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::SandboxPolicy;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum RealtimeAudioDeviceKind {
pub(crate) enum VoiceAudioDeviceKind {
Microphone,
Speaker,
}
impl RealtimeAudioDeviceKind {
impl VoiceAudioDeviceKind {
pub(crate) fn title(self) -> &'static str {
match self {
Self::Microphone => "Microphone",
@@ -198,23 +198,19 @@ pub(crate) enum AppEvent {
},
/// Open the device picker for a realtime microphone or speaker.
OpenRealtimeAudioDeviceSelection {
kind: RealtimeAudioDeviceKind,
OpenVoiceAudioDeviceSelection {
kind: VoiceAudioDeviceKind,
},
/// Persist the selected realtime microphone or speaker to top-level config.
#[cfg_attr(
any(target_os = "linux", not(feature = "voice-input")),
allow(dead_code)
)]
PersistRealtimeAudioDeviceSelection {
kind: RealtimeAudioDeviceKind,
name: Option<String>,
/// Persist the selected microphone or speaker device id to top-level config.
PersistVoiceAudioDeviceSelection {
kind: VoiceAudioDeviceKind,
device_id: Option<String>,
},
/// Restart the selected realtime microphone or speaker locally.
RestartRealtimeAudioDevice {
kind: RealtimeAudioDeviceKind,
/// Restart the selected microphone or speaker locally.
RestartVoiceAudioDevice {
kind: VoiceAudioDeviceKind,
},
/// Open the reasoning selection popup after picking a model.
@@ -377,21 +373,18 @@ pub(crate) enum AppEvent {
/// Live update for the in-progress voice recording placeholder. Carries
/// the placeholder `id` and the text to display (e.g., an ASCII meter).
#[cfg(not(target_os = "linux"))]
UpdateRecordingMeter {
id: String,
text: String,
},
/// Voice transcription finished for the given placeholder id.
#[cfg(not(target_os = "linux"))]
TranscriptionComplete {
id: String,
text: String,
},
/// Voice transcription failed; remove the placeholder identified by `id`.
#[cfg(not(target_os = "linux"))]
TranscriptionFailed {
id: String,
#[allow(dead_code)]

View File

@@ -3,37 +3,53 @@ use cpal::traits::DeviceTrait;
use cpal::traits::HostTrait;
use tracing::warn;
use crate::app_event::RealtimeAudioDeviceKind;
use crate::app_event::VoiceAudioDeviceKind;
const PREFERRED_INPUT_SAMPLE_RATE: u32 = 24_000;
const PREFERRED_INPUT_CHANNELS: u16 = 1;
pub(crate) fn list_realtime_audio_device_names(
kind: RealtimeAudioDeviceKind,
) -> Result<Vec<String>, String> {
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) struct VoiceAudioDevice {
pub(crate) id: String,
pub(crate) label: String,
}
pub(crate) fn list_voice_audio_devices(
kind: VoiceAudioDeviceKind,
) -> Result<Vec<VoiceAudioDevice>, String> {
let host = cpal::default_host();
let mut device_names = Vec::new();
for device in devices(&host, kind)? {
let Ok(name) = device.name() else {
continue;
};
if !device_names.contains(&name) {
device_names.push(name);
}
}
Ok(device_names)
Ok(devices(&host, kind)?
.into_iter()
.filter_map(|device| {
let label = device.description().ok()?.to_string();
let id = device.id().ok()?.to_string();
Some(VoiceAudioDevice { id, label })
})
.collect())
}
pub(crate) fn select_configured_input_device_and_config(
config: &Config,
) -> Result<(cpal::Device, cpal::SupportedStreamConfig), String> {
select_device_and_config(RealtimeAudioDeviceKind::Microphone, config)
select_device_and_config(
VoiceAudioDeviceKind::Microphone,
config.realtime_audio.microphone.as_deref(),
)
}
pub(crate) fn select_configured_output_device_and_config(
config: &Config,
) -> Result<(cpal::Device, cpal::SupportedStreamConfig), String> {
select_device_and_config(RealtimeAudioDeviceKind::Speaker, config)
select_device_and_config(
VoiceAudioDeviceKind::Speaker,
config.realtime_audio.speaker.as_deref(),
)
}
pub(crate) fn select_input_device_and_config(
device_id: Option<&str>,
) -> Result<(cpal::Device, cpal::SupportedStreamConfig), String> {
select_device_and_config(VoiceAudioDeviceKind::Microphone, device_id)
}
pub(crate) fn preferred_input_config(
@@ -52,7 +68,7 @@ pub(crate) fn preferred_input_config(
_ => return None,
};
let sample_rate = preferred_input_sample_rate(&range);
let sample_rate_penalty = sample_rate.0.abs_diff(PREFERRED_INPUT_SAMPLE_RATE);
let sample_rate_penalty = sample_rate.abs_diff(PREFERRED_INPUT_SAMPLE_RATE);
let channel_penalty = range.channels().abs_diff(PREFERRED_INPUT_CHANNELS);
Some((
(sample_rate_penalty, channel_penalty, sample_format_rank),
@@ -66,111 +82,104 @@ pub(crate) fn preferred_input_config(
}
fn select_device_and_config(
kind: RealtimeAudioDeviceKind,
config: &Config,
kind: VoiceAudioDeviceKind,
configured_device_id: Option<&str>,
) -> Result<(cpal::Device, cpal::SupportedStreamConfig), String> {
let host = cpal::default_host();
let configured_name = configured_name(kind, config);
let selected = configured_name
.and_then(|name| find_device_by_name(&host, kind, name))
let selected = configured_device_id
.and_then(|device_id| configured_device(&host, kind, device_id))
.or_else(|| {
let default_device = default_device(&host, kind);
if let Some(name) = configured_name && default_device.is_some() {
if let Some(device_id) = configured_device_id && default_device.is_some() {
warn!(
"configured {} audio device `{name}` was unavailable; falling back to system default",
"configured {} audio device `{device_id}` was unavailable; falling back to system default",
kind.noun()
);
}
default_device
})
.ok_or_else(|| missing_device_error(kind, configured_name))?;
.ok_or_else(|| missing_device_error(kind, configured_device_id))?;
let stream_config = match kind {
RealtimeAudioDeviceKind::Microphone => preferred_input_config(&selected)?,
RealtimeAudioDeviceKind::Speaker => default_config(&selected, kind)?,
VoiceAudioDeviceKind::Microphone => preferred_input_config(&selected)?,
VoiceAudioDeviceKind::Speaker => default_config(&selected, kind)?,
};
Ok((selected, stream_config))
}
fn configured_name(kind: RealtimeAudioDeviceKind, config: &Config) -> Option<&str> {
fn configured_device(
host: &cpal::Host,
kind: VoiceAudioDeviceKind,
device_id: &str,
) -> Option<cpal::Device> {
let parsed_id = device_id.parse().ok()?;
let device = host.device_by_id(&parsed_id)?;
match kind {
RealtimeAudioDeviceKind::Microphone => config.realtime_audio.microphone.as_deref(),
RealtimeAudioDeviceKind::Speaker => config.realtime_audio.speaker.as_deref(),
VoiceAudioDeviceKind::Microphone => device.default_input_config().ok().map(|_| device),
VoiceAudioDeviceKind::Speaker => device.default_output_config().ok().map(|_| device),
}
}
fn find_device_by_name(
host: &cpal::Host,
kind: RealtimeAudioDeviceKind,
name: &str,
) -> Option<cpal::Device> {
let devices = devices(host, kind).ok()?;
devices
.into_iter()
.find(|device| device.name().ok().as_deref() == Some(name))
}
fn devices(host: &cpal::Host, kind: RealtimeAudioDeviceKind) -> Result<Vec<cpal::Device>, String> {
fn devices(host: &cpal::Host, kind: VoiceAudioDeviceKind) -> Result<Vec<cpal::Device>, String> {
match kind {
RealtimeAudioDeviceKind::Microphone => host
VoiceAudioDeviceKind::Microphone => host
.input_devices()
.map(|devices| devices.collect())
.map_err(|err| format!("failed to enumerate input audio devices: {err}")),
RealtimeAudioDeviceKind::Speaker => host
VoiceAudioDeviceKind::Speaker => host
.output_devices()
.map(|devices| devices.collect())
.map_err(|err| format!("failed to enumerate output audio devices: {err}")),
}
}
fn default_device(host: &cpal::Host, kind: RealtimeAudioDeviceKind) -> Option<cpal::Device> {
fn default_device(host: &cpal::Host, kind: VoiceAudioDeviceKind) -> Option<cpal::Device> {
match kind {
RealtimeAudioDeviceKind::Microphone => host.default_input_device(),
RealtimeAudioDeviceKind::Speaker => host.default_output_device(),
VoiceAudioDeviceKind::Microphone => host.default_input_device(),
VoiceAudioDeviceKind::Speaker => host.default_output_device(),
}
}
fn default_config(
device: &cpal::Device,
kind: RealtimeAudioDeviceKind,
kind: VoiceAudioDeviceKind,
) -> Result<cpal::SupportedStreamConfig, String> {
match kind {
RealtimeAudioDeviceKind::Microphone => device
VoiceAudioDeviceKind::Microphone => device
.default_input_config()
.map_err(|err| format!("failed to get default input config: {err}")),
RealtimeAudioDeviceKind::Speaker => device
VoiceAudioDeviceKind::Speaker => device
.default_output_config()
.map_err(|err| format!("failed to get default output config: {err}")),
}
}
fn preferred_input_sample_rate(range: &cpal::SupportedStreamConfigRange) -> cpal::SampleRate {
let min = range.min_sample_rate().0;
let max = range.max_sample_rate().0;
let min = range.min_sample_rate();
let max = range.max_sample_rate();
if (min..=max).contains(&PREFERRED_INPUT_SAMPLE_RATE) {
cpal::SampleRate(PREFERRED_INPUT_SAMPLE_RATE)
PREFERRED_INPUT_SAMPLE_RATE
} else if PREFERRED_INPUT_SAMPLE_RATE < min {
cpal::SampleRate(min)
min
} else {
cpal::SampleRate(max)
max
}
}
fn missing_device_error(kind: RealtimeAudioDeviceKind, configured_name: Option<&str>) -> String {
match (kind, configured_name) {
(RealtimeAudioDeviceKind::Microphone, Some(name)) => {
fn missing_device_error(kind: VoiceAudioDeviceKind, configured_device_id: Option<&str>) -> String {
match (kind, configured_device_id) {
(VoiceAudioDeviceKind::Microphone, Some(device_id)) => {
format!(
"configured microphone `{name}` was unavailable and no default input audio device was found"
"configured microphone `{device_id}` was unavailable and no default input audio device was found"
)
}
(RealtimeAudioDeviceKind::Speaker, Some(name)) => {
(VoiceAudioDeviceKind::Speaker, Some(device_id)) => {
format!(
"configured speaker `{name}` was unavailable and no default output audio device was found"
"configured speaker `{device_id}` was unavailable and no default output audio device was found"
)
}
(RealtimeAudioDeviceKind::Microphone, None) => {
"no input audio device available".to_string()
}
(RealtimeAudioDeviceKind::Speaker, None) => "no output audio device available".to_string(),
(VoiceAudioDeviceKind::Microphone, None) => "no input audio device available".to_string(),
(VoiceAudioDeviceKind::Speaker, None) => "no output audio device available".to_string(),
}
}

View File

@@ -216,15 +216,12 @@ use std::collections::VecDeque;
use std::ops::Range;
use std::path::PathBuf;
use std::sync::Arc;
#[cfg(not(target_os = "linux"))]
use std::sync::Mutex;
use std::sync::atomic::AtomicBool;
use std::sync::atomic::Ordering;
#[cfg(not(target_os = "linux"))]
use std::thread;
use std::time::Duration;
use std::time::Instant;
#[cfg(not(target_os = "linux"))]
use tokio::runtime::Handle;
/// If the pasted content exceeds this number of characters, replace it with a
/// placeholder in the UI.
@@ -321,13 +318,9 @@ struct VoiceState {
space_hold_trigger: Option<Arc<AtomicBool>>,
key_release_supported: bool,
space_hold_repeat_seen: bool,
#[cfg(not(target_os = "linux"))]
voice: Option<crate::voice::VoiceCapture>,
#[cfg(not(target_os = "linux"))]
recording_placeholder_id: Option<String>,
#[cfg(not(target_os = "linux"))]
space_recording_started_at: Option<Instant>,
#[cfg(not(target_os = "linux"))]
space_recording_last_repeat_at: Option<Instant>,
}
@@ -381,7 +374,6 @@ pub(crate) struct ChatComposer {
footer_flash: Option<FooterFlash>,
context_window_percent: Option<i64>,
// Monotonically increasing identifier for textarea elements we insert.
#[cfg(not(target_os = "linux"))]
next_element_id: u64,
context_window_used_tokens: Option<i64>,
skills: Option<Vec<SkillMetadata>>,
@@ -396,6 +388,7 @@ pub(crate) struct ChatComposer {
personality_command_enabled: bool,
realtime_conversation_enabled: bool,
audio_device_selection_enabled: bool,
audio_microphone: Option<String>,
windows_degraded_sandbox_active: bool,
status_line_value: Option<Line<'static>>,
status_line_enabled: bool,
@@ -487,7 +480,6 @@ impl ChatComposer {
selected_remote_image_index: None,
footer_flash: None,
context_window_percent: None,
#[cfg(not(target_os = "linux"))]
next_element_id: 0,
context_window_used_tokens: None,
skills: None,
@@ -502,6 +494,7 @@ impl ChatComposer {
personality_command_enabled: false,
realtime_conversation_enabled: false,
audio_device_selection_enabled: false,
audio_microphone: None,
windows_degraded_sandbox_active: false,
status_line_value: None,
status_line_enabled: false,
@@ -511,7 +504,6 @@ impl ChatComposer {
this
}
#[cfg(not(target_os = "linux"))]
fn next_id(&mut self) -> String {
let id = self.next_element_id;
self.next_element_id = self.next_element_id.wrapping_add(1);
@@ -583,6 +575,10 @@ impl ChatComposer {
self.audio_device_selection_enabled = enabled;
}
pub fn set_audio_microphone(&mut self, microphone: Option<String>) {
self.audio_microphone = microphone;
}
/// Compatibility shim for tests that still toggle the removed steer mode flag.
#[cfg(test)]
pub fn set_steer_enabled(&mut self, _enabled: bool) {}
@@ -598,9 +594,8 @@ impl ChatComposer {
}
}
#[cfg(not(target_os = "linux"))]
fn voice_transcription_enabled(&self) -> bool {
self.voice_state.transcription_enabled && cfg!(not(target_os = "linux"))
self.voice_state.transcription_enabled
}
/// Centralized feature gating keeps config checks out of call sites.
fn popups_enabled(&self) -> bool {
@@ -671,7 +666,6 @@ impl ChatComposer {
}
// Hide the cursor while recording voice input.
#[cfg(not(target_os = "linux"))]
if self.voice_state.voice.is_some() {
return None;
}
@@ -732,7 +726,6 @@ impl ChatComposer {
/// In all cases, clears any paste-burst Enter suppression state so a real paste cannot affect
/// the next user Enter key, then syncs popup state.
pub fn handle_paste(&mut self, pasted: String) -> bool {
#[cfg(not(target_os = "linux"))]
if self.voice_state.voice.is_some() {
return false;
}
@@ -980,7 +973,6 @@ impl ChatComposer {
local_image_paths: Vec<PathBuf>,
mention_bindings: Vec<MentionBinding>,
) {
#[cfg(not(target_os = "linux"))]
self.stop_all_transcription_spinners();
// Clear any existing content, placeholders, and attachments first.
@@ -2760,15 +2752,6 @@ impl ChatComposer {
}
}
#[cfg(target_os = "linux")]
fn handle_voice_space_key_event(
&mut self,
_key_event: &KeyEvent,
) -> Option<(InputResult, bool)> {
None
}
#[cfg(not(target_os = "linux"))]
fn handle_voice_space_key_event(
&mut self,
key_event: &KeyEvent,
@@ -2843,15 +2826,6 @@ impl ChatComposer {
}
}
#[cfg(target_os = "linux")]
fn handle_key_event_while_recording(
&mut self,
_key_event: KeyEvent,
) -> Option<(InputResult, bool)> {
None
}
#[cfg(not(target_os = "linux"))]
fn handle_key_event_while_recording(
&mut self,
key_event: KeyEvent,
@@ -3624,7 +3598,6 @@ impl ChatComposer {
self.has_focus = has_focus;
}
#[cfg(not(target_os = "linux"))]
pub(crate) fn is_recording(&self) -> bool {
self.voice_state.voice.is_some()
}
@@ -3662,7 +3635,6 @@ impl ChatComposer {
}
}
#[cfg(not(target_os = "linux"))]
fn schedule_space_hold_timer(flag: Arc<AtomicBool>, frame: Option<FrameRequester>) {
const HOLD_DELAY_MILLIS: u64 = 500;
if let Ok(handle) = Handle::try_current() {
@@ -3680,7 +3652,6 @@ impl ChatComposer {
}
}
#[cfg(not(target_os = "linux"))]
fn complete_space_hold_timer(flag: Arc<AtomicBool>, frame: Option<FrameRequester>) {
flag.store(true, Ordering::Relaxed);
if let Some(frame) = frame {
@@ -3705,7 +3676,6 @@ impl ChatComposer {
}
}
#[cfg(not(target_os = "linux"))]
impl ChatComposer {
pub(crate) fn process_space_hold_trigger(&mut self) {
if self.voice_transcription_enabled()
@@ -3849,7 +3819,7 @@ impl ChatComposer {
/// Start voice capture and insert a placeholder element for the live meter.
/// Returns true if recording began and UI should redraw; false on failure.
fn start_recording_with_placeholder(&mut self) -> bool {
match crate::voice::VoiceCapture::start() {
match crate::voice::VoiceCapture::start(self.audio_microphone.as_deref()) {
Ok(vc) => {
self.voice_state.voice = Some(vc);
if self.voice_state.key_release_supported {
@@ -3973,7 +3943,6 @@ impl ChatComposer {
self.textarea.update_named_element_by_id(id, text)
}
#[cfg(not(target_os = "linux"))]
pub fn insert_transcription_placeholder(&mut self, text: &str) -> String {
let id = self.next_id();
self.textarea.insert_named_element(text, id.clone());
@@ -6461,7 +6430,6 @@ mod tests {
assert!(!composer.voice_state.space_hold_repeat_seen);
}
#[cfg(not(target_os = "linux"))]
#[test]
fn space_hold_timeout_without_release_or_repeat_keeps_typed_space() {
let (tx, _rx) = unbounded_channel::<AppEvent>();
@@ -6493,7 +6461,6 @@ mod tests {
assert!(!composer.voice_state.space_hold_repeat_seen);
}
#[cfg(not(target_os = "linux"))]
#[test]
fn space_hold_timeout_with_repeat_uses_hold_path_without_release() {
let (tx, _rx) = unbounded_channel::<AppEvent>();
@@ -6527,7 +6494,6 @@ mod tests {
}
}
#[cfg(not(target_os = "linux"))]
#[test]
fn space_hold_timeout_with_repeat_does_not_duplicate_existing_space() {
let (tx, _rx) = unbounded_channel::<AppEvent>();
@@ -6561,7 +6527,6 @@ mod tests {
}
}
#[cfg(not(target_os = "linux"))]
#[test]
fn replace_transcription_stops_spinner_for_placeholder() {
let (tx, _rx) = unbounded_channel::<AppEvent>();
@@ -6588,7 +6553,6 @@ mod tests {
assert_eq!(composer.textarea.text(), "transcribed text");
}
#[cfg(not(target_os = "linux"))]
#[test]
fn set_text_content_stops_all_transcription_spinners() {
let (tx, _rx) = unbounded_channel::<AppEvent>();

View File

@@ -303,6 +303,11 @@ impl BottomPane {
self.request_redraw();
}
pub fn set_audio_microphone(&mut self, microphone: Option<String>) {
self.composer.set_audio_microphone(microphone);
self.request_redraw();
}
pub fn set_voice_transcription_enabled(&mut self, enabled: bool) {
self.composer.set_voice_transcription_enabled(enabled);
self.request_redraw();
@@ -346,7 +351,6 @@ impl BottomPane {
pub fn handle_key_event(&mut self, key_event: KeyEvent) -> InputResult {
// Do not globally intercept space; only composer handles hold-to-talk.
// While recording, route all keys to the composer so it can stop on release or next key.
#[cfg(not(target_os = "linux"))]
if self.composer.is_recording() {
let (_ir, needs_redraw) = self.composer.handle_key_event(key_event);
if needs_redraw {
@@ -480,7 +484,6 @@ impl BottomPane {
// Space hold timeout is handled inside ChatComposer via an internal timer.
pub(crate) fn pre_draw_tick(&mut self) {
// Allow composer to process any time-based transitions before drawing
#[cfg(not(target_os = "linux"))]
self.composer.process_space_hold_trigger();
self.composer.sync_popups();
}
@@ -1039,7 +1042,6 @@ impl BottomPane {
}
}
#[cfg(not(target_os = "linux"))]
impl BottomPane {
pub(crate) fn insert_transcription_placeholder(&mut self, text: &str) -> String {
let id = self.composer.insert_transcription_placeholder(text);

View File

@@ -894,7 +894,6 @@ impl TextArea {
id
}
#[cfg(not(target_os = "linux"))]
pub fn insert_named_element(&mut self, text: &str, id: String) {
let start = self.clamp_pos_for_insertion(self.cursor_pos);
self.insert_str_at(start, text);

View File

@@ -37,9 +37,9 @@ use std::sync::atomic::Ordering;
use std::time::Duration;
use std::time::Instant;
use crate::app_event::RealtimeAudioDeviceKind;
#[cfg(all(not(target_os = "linux"), feature = "voice-input"))]
use crate::audio_device::list_realtime_audio_device_names;
use crate::app_event::VoiceAudioDeviceKind;
use crate::audio_device::VoiceAudioDevice;
use crate::audio_device::list_voice_audio_devices;
use crate::bottom_pane::StatusLineItem;
use crate::bottom_pane::StatusLineSetupView;
use crate::status::RateLimitWindowDisplay;
@@ -858,11 +858,10 @@ enum ReplayKind {
impl ChatWidget {
fn realtime_conversation_enabled(&self) -> bool {
self.config.features.enabled(Feature::RealtimeConversation)
&& cfg!(not(target_os = "linux"))
}
fn realtime_audio_device_selection_enabled(&self) -> bool {
self.realtime_conversation_enabled() && cfg!(feature = "voice-input")
self.realtime_conversation_enabled()
}
/// Synchronize the bottom-pane "task running" indicator with the current lifecycles.
@@ -2905,6 +2904,9 @@ impl ChatWidget {
widget.bottom_pane.set_voice_transcription_enabled(
widget.config.features.enabled(Feature::VoiceTranscription),
);
widget
.bottom_pane
.set_audio_microphone(widget.config.realtime_audio.microphone.clone());
widget
.bottom_pane
.set_realtime_conversation_enabled(widget.realtime_conversation_enabled());
@@ -3084,6 +3086,9 @@ impl ChatWidget {
widget.bottom_pane.set_voice_transcription_enabled(
widget.config.features.enabled(Feature::VoiceTranscription),
);
widget
.bottom_pane
.set_audio_microphone(widget.config.realtime_audio.microphone.clone());
widget
.bottom_pane
.set_realtime_conversation_enabled(widget.realtime_conversation_enabled());
@@ -3252,6 +3257,9 @@ impl ChatWidget {
widget.bottom_pane.set_voice_transcription_enabled(
widget.config.features.enabled(Feature::VoiceTranscription),
);
widget
.bottom_pane
.set_audio_microphone(widget.config.realtime_audio.microphone.clone());
widget
.bottom_pane
.set_realtime_conversation_enabled(widget.realtime_conversation_enabled());
@@ -5317,17 +5325,17 @@ impl ChatWidget {
pub(crate) fn open_realtime_audio_popup(&mut self) {
let items = [
RealtimeAudioDeviceKind::Microphone,
RealtimeAudioDeviceKind::Speaker,
VoiceAudioDeviceKind::Microphone,
VoiceAudioDeviceKind::Speaker,
]
.into_iter()
.map(|kind| {
let description = Some(format!(
"Current: {}",
self.current_realtime_audio_selection_label(kind)
self.current_voice_audio_selection_label(kind)
));
let actions: Vec<SelectionAction> = vec![Box::new(move |tx| {
tx.send(AppEvent::OpenRealtimeAudioDeviceSelection { kind });
tx.send(AppEvent::OpenVoiceAudioDeviceSelection { kind });
})];
SelectionItem {
name: kind.title().to_string(),
@@ -5348,52 +5356,43 @@ impl ChatWidget {
});
}
#[cfg(all(not(target_os = "linux"), feature = "voice-input"))]
pub(crate) fn open_realtime_audio_device_selection(&mut self, kind: RealtimeAudioDeviceKind) {
match list_realtime_audio_device_names(kind) {
Ok(device_names) => {
self.open_realtime_audio_device_selection_with_names(kind, device_names);
pub(crate) fn open_voice_audio_device_selection(&mut self, kind: VoiceAudioDeviceKind) {
match list_voice_audio_devices(kind) {
Ok(devices) => {
self.open_voice_audio_device_selection_with_devices(kind, devices);
}
Err(err) => {
self.add_error_message(format!(
"Failed to load realtime {} devices: {err}",
kind.noun()
));
self.add_error_message(format!("Failed to load {} devices: {err}", kind.noun()));
}
}
}
#[cfg(any(target_os = "linux", not(feature = "voice-input")))]
pub(crate) fn open_realtime_audio_device_selection(&mut self, kind: RealtimeAudioDeviceKind) {
let _ = kind;
}
#[cfg(all(not(target_os = "linux"), feature = "voice-input"))]
fn open_realtime_audio_device_selection_with_names(
fn open_voice_audio_device_selection_with_devices(
&mut self,
kind: RealtimeAudioDeviceKind,
device_names: Vec<String>,
kind: VoiceAudioDeviceKind,
devices: Vec<VoiceAudioDevice>,
) {
let current_selection = self.current_realtime_audio_device_name(kind);
let current_selection = self.current_voice_audio_device_id(kind);
let current_available = current_selection
.as_deref()
.is_some_and(|name| device_names.iter().any(|device_name| device_name == name));
.is_some_and(|device_id| devices.iter().any(|device| device.id == device_id));
let mut items = vec![SelectionItem {
name: "System default".to_string(),
description: Some("Use your operating system default device.".to_string()),
is_current: current_selection.is_none(),
actions: vec![Box::new(move |tx| {
tx.send(AppEvent::PersistRealtimeAudioDeviceSelection { kind, name: None });
tx.send(AppEvent::PersistVoiceAudioDeviceSelection {
kind,
device_id: None,
});
})],
dismiss_on_select: true,
..Default::default()
}];
if let Some(selection) = current_selection.as_deref()
&& !current_available
{
if current_selection.is_some() && !current_available {
items.push(SelectionItem {
name: format!("Unavailable: {selection}"),
name: "Unavailable device".to_string(),
description: Some("Configured device is not currently available.".to_string()),
is_current: true,
is_disabled: true,
@@ -5402,17 +5401,32 @@ impl ChatWidget {
});
}
items.extend(device_names.into_iter().map(|device_name| {
let persisted_name = device_name.clone();
let duplicate_counts =
devices
.iter()
.fold(HashMap::<String, usize>::new(), |mut counts, device| {
*counts.entry(device.label.clone()).or_default() += 1;
counts
});
items.extend(devices.into_iter().map(|device| {
let persisted_device_id = device.id.clone();
let description = (duplicate_counts
.get(&device.label)
.copied()
.unwrap_or_default()
> 1)
.then(|| format!("Device ID: {}", Self::compact_device_id(&device.id)));
let actions: Vec<SelectionAction> = vec![Box::new(move |tx| {
tx.send(AppEvent::PersistRealtimeAudioDeviceSelection {
tx.send(AppEvent::PersistVoiceAudioDeviceSelection {
kind,
name: Some(persisted_name.clone()),
device_id: Some(persisted_device_id.clone()),
});
})];
SelectionItem {
is_current: current_selection.as_deref() == Some(device_name.as_str()),
name: device_name,
is_current: current_selection.as_deref() == Some(device.id.as_str()),
name: device.label,
description,
actions,
dismiss_on_select: true,
..Default::default()
@@ -5421,9 +5435,15 @@ impl ChatWidget {
let mut header = ColumnRenderable::new();
header.push(Line::from(format!("Select {}", kind.title()).bold()));
header.push(Line::from(
"Saved devices apply to realtime voice only.".dim(),
));
let subtitle = match kind {
VoiceAudioDeviceKind::Microphone => {
"Saved microphone applies to transcription and realtime voice.".dim()
}
VoiceAudioDeviceKind::Speaker => {
"Saved speaker applies to realtime voice output.".dim()
}
};
header.push(Line::from(subtitle));
self.bottom_pane.show_selection_view(SelectionViewParams {
header: Box::new(header),
@@ -5433,9 +5453,9 @@ impl ChatWidget {
});
}
pub(crate) fn open_realtime_audio_restart_prompt(&mut self, kind: RealtimeAudioDeviceKind) {
pub(crate) fn open_voice_audio_restart_prompt(&mut self, kind: VoiceAudioDeviceKind) {
let restart_actions: Vec<SelectionAction> = vec![Box::new(move |tx| {
tx.send(AppEvent::RestartRealtimeAudioDevice { kind });
tx.send(AppEvent::RestartVoiceAudioDevice { kind });
})];
let items = vec![
SelectionItem {
@@ -6814,14 +6834,17 @@ impl ChatWidget {
self.config.personality = Some(personality);
}
pub(crate) fn set_realtime_audio_device(
pub(crate) fn set_voice_audio_device(
&mut self,
kind: RealtimeAudioDeviceKind,
name: Option<String>,
kind: VoiceAudioDeviceKind,
device_id: Option<String>,
) {
match kind {
RealtimeAudioDeviceKind::Microphone => self.config.realtime_audio.microphone = name,
RealtimeAudioDeviceKind::Speaker => self.config.realtime_audio.speaker = name,
VoiceAudioDeviceKind::Microphone => {
self.config.realtime_audio.microphone = device_id.clone();
self.bottom_pane.set_audio_microphone(device_id);
}
VoiceAudioDeviceKind::Speaker => self.config.realtime_audio.speaker = device_id,
}
}
@@ -6857,16 +6880,38 @@ impl ChatWidget {
self.realtime_conversation.is_active()
}
fn current_realtime_audio_device_name(&self, kind: RealtimeAudioDeviceKind) -> Option<String> {
fn current_voice_audio_device_id(&self, kind: VoiceAudioDeviceKind) -> Option<String> {
match kind {
RealtimeAudioDeviceKind::Microphone => self.config.realtime_audio.microphone.clone(),
RealtimeAudioDeviceKind::Speaker => self.config.realtime_audio.speaker.clone(),
VoiceAudioDeviceKind::Microphone => self.config.realtime_audio.microphone.clone(),
VoiceAudioDeviceKind::Speaker => self.config.realtime_audio.speaker.clone(),
}
}
fn current_realtime_audio_selection_label(&self, kind: RealtimeAudioDeviceKind) -> String {
self.current_realtime_audio_device_name(kind)
.unwrap_or_else(|| "System default".to_string())
fn current_voice_audio_selection_label(&self, kind: VoiceAudioDeviceKind) -> String {
let Some(device_id) = self.current_voice_audio_device_id(kind) else {
return "System default".to_string();
};
list_voice_audio_devices(kind)
.ok()
.and_then(|devices| {
devices
.into_iter()
.find(|device| device.id == device_id)
.map(|device| device.label)
})
.unwrap_or_else(|| "Unavailable device".to_string())
}
fn compact_device_id(device_id: &str) -> String {
const MAX_LEN: usize = 18;
if device_id.len() <= MAX_LEN {
return device_id.to_string();
}
let start = &device_id[..8];
let end = &device_id[device_id.len() - 8..];
format!("{start}...{end}")
}
fn sync_personality_command_enabled(&mut self) {
@@ -7922,7 +7967,6 @@ impl ChatWidget {
}
}
#[cfg(not(target_os = "linux"))]
impl ChatWidget {
pub(crate) fn replace_transcription(&mut self, id: &str, text: &str) {
self.bottom_pane.replace_transcription(id, text);

View File

@@ -24,11 +24,8 @@ pub(super) struct RealtimeConversationUiState {
session_id: Option<String>,
warned_audio_only_submission: bool,
meter_placeholder_id: Option<String>,
#[cfg(not(target_os = "linux"))]
capture_stop_flag: Option<Arc<AtomicBool>>,
#[cfg(not(target_os = "linux"))]
capture: Option<crate::voice::VoiceCapture>,
#[cfg(not(target_os = "linux"))]
audio_player: Option<crate::voice::RealtimeAudioPlayer>,
}
@@ -207,25 +204,17 @@ impl ChatWidget {
}
fn enqueue_realtime_audio_out(&mut self, frame: &RealtimeAudioFrame) {
#[cfg(not(target_os = "linux"))]
{
if self.realtime_conversation.audio_player.is_none() {
self.realtime_conversation.audio_player =
crate::voice::RealtimeAudioPlayer::start(&self.config).ok();
}
if let Some(player) = &self.realtime_conversation.audio_player
&& let Err(err) = player.enqueue_frame(frame)
{
warn!("failed to play realtime audio: {err}");
}
if self.realtime_conversation.audio_player.is_none() {
self.realtime_conversation.audio_player =
crate::voice::RealtimeAudioPlayer::start(&self.config).ok();
}
#[cfg(target_os = "linux")]
if let Some(player) = &self.realtime_conversation.audio_player
&& let Err(err) = player.enqueue_frame(frame)
{
let _ = frame;
warn!("failed to play realtime audio: {err}");
}
}
#[cfg(not(target_os = "linux"))]
fn start_realtime_local_audio(&mut self) {
if self.realtime_conversation.capture_stop_flag.is_some() {
return;
@@ -279,21 +268,17 @@ impl ChatWidget {
});
}
#[cfg(target_os = "linux")]
fn start_realtime_local_audio(&mut self) {}
#[cfg(all(not(target_os = "linux"), feature = "voice-input"))]
pub(crate) fn restart_realtime_audio_device(&mut self, kind: RealtimeAudioDeviceKind) {
pub(crate) fn restart_voice_audio_device(&mut self, kind: VoiceAudioDeviceKind) {
if !self.realtime_conversation.is_active() {
return;
}
match kind {
RealtimeAudioDeviceKind::Microphone => {
VoiceAudioDeviceKind::Microphone => {
self.stop_realtime_microphone();
self.start_realtime_local_audio();
}
RealtimeAudioDeviceKind::Speaker => {
VoiceAudioDeviceKind::Speaker => {
self.stop_realtime_speaker();
match crate::voice::RealtimeAudioPlayer::start(&self.config) {
Ok(player) => {
@@ -308,23 +293,11 @@ impl ChatWidget {
self.request_redraw();
}
#[cfg(any(target_os = "linux", not(feature = "voice-input")))]
pub(crate) fn restart_realtime_audio_device(&mut self, kind: RealtimeAudioDeviceKind) {
let _ = kind;
}
#[cfg(not(target_os = "linux"))]
fn stop_realtime_local_audio(&mut self) {
self.stop_realtime_microphone();
self.stop_realtime_speaker();
}
#[cfg(target_os = "linux")]
fn stop_realtime_local_audio(&mut self) {
self.realtime_conversation.meter_placeholder_id = None;
}
#[cfg(not(target_os = "linux"))]
fn stop_realtime_microphone(&mut self) {
if let Some(flag) = self.realtime_conversation.capture_stop_flag.take() {
flag.store(true, Ordering::Relaxed);
@@ -337,7 +310,6 @@ impl ChatWidget {
}
}
#[cfg(not(target_os = "linux"))]
fn stop_realtime_speaker(&mut self) {
if let Some(player) = self.realtime_conversation.audio_player.take() {
player.clear();

View File

@@ -3,15 +3,14 @@ source: tui/src/chatwidget/tests.rs
expression: popup
---
Select Microphone
Saved devices apply to realtime voice only.
Saved microphone applies to transcription and realtime voice.
1. System default Use your operating system
default device.
2. Unavailable: Studio Mic (current) (disabled) Configured device is not
currently available.
(disabled: Reconnect the
device or choose another
one.)
1. System default Use your operating system
default device.
2. Unavailable device (current) (disabled) Configured device is not
currently available. (disabled:
Reconnect the device or choose
another one.)
3. Built-in Mic
4. USB Mic

View File

@@ -7,9 +7,9 @@
use super::*;
use crate::app_event::AppEvent;
use crate::app_event::ExitMode;
#[cfg(all(not(target_os = "linux"), feature = "voice-input"))]
use crate::app_event::RealtimeAudioDeviceKind;
use crate::app_event::VoiceAudioDeviceKind;
use crate::app_event_sender::AppEventSender;
use crate::audio_device::VoiceAudioDevice;
use crate::bottom_pane::FeedbackAudience;
use crate::bottom_pane::LocalImageAttachment;
use crate::bottom_pane::MentionBinding;
@@ -6011,7 +6011,6 @@ async fn personality_selection_popup_snapshot() {
assert_snapshot!("personality_selection_popup", popup);
}
#[cfg(all(not(target_os = "linux"), feature = "voice-input"))]
#[tokio::test]
async fn realtime_audio_selection_popup_snapshot() {
let (mut chat, _rx, _op_rx) = make_chatwidget_manual(Some("gpt-5.2-codex")).await;
@@ -6021,7 +6020,6 @@ async fn realtime_audio_selection_popup_snapshot() {
assert_snapshot!("realtime_audio_selection_popup", popup);
}
#[cfg(all(not(target_os = "linux"), feature = "voice-input"))]
#[tokio::test]
async fn realtime_audio_selection_popup_narrow_snapshot() {
let (mut chat, _rx, _op_rx) = make_chatwidget_manual(Some("gpt-5.2-codex")).await;
@@ -6031,27 +6029,43 @@ async fn realtime_audio_selection_popup_narrow_snapshot() {
assert_snapshot!("realtime_audio_selection_popup_narrow", popup);
}
#[cfg(all(not(target_os = "linux"), feature = "voice-input"))]
#[tokio::test]
async fn realtime_microphone_picker_popup_snapshot() {
let (mut chat, _rx, _op_rx) = make_chatwidget_manual(Some("gpt-5.2-codex")).await;
chat.config.realtime_audio.microphone = Some("Studio Mic".to_string());
chat.open_realtime_audio_device_selection_with_names(
RealtimeAudioDeviceKind::Microphone,
vec!["Built-in Mic".to_string(), "USB Mic".to_string()],
chat.config.realtime_audio.microphone = Some("mic-unavailable".to_string());
chat.open_voice_audio_device_selection_with_devices(
VoiceAudioDeviceKind::Microphone,
vec![
VoiceAudioDevice {
id: "mic-1".to_string(),
label: "Built-in Mic".to_string(),
},
VoiceAudioDevice {
id: "mic-2".to_string(),
label: "USB Mic".to_string(),
},
],
);
let popup = render_bottom_popup(&chat, 80);
assert_snapshot!("realtime_microphone_picker_popup", popup);
}
#[cfg(all(not(target_os = "linux"), feature = "voice-input"))]
#[tokio::test]
async fn realtime_audio_picker_emits_persist_event() {
let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(Some("gpt-5.2-codex")).await;
chat.open_realtime_audio_device_selection_with_names(
RealtimeAudioDeviceKind::Speaker,
vec!["Desk Speakers".to_string(), "Headphones".to_string()],
chat.open_voice_audio_device_selection_with_devices(
VoiceAudioDeviceKind::Speaker,
vec![
VoiceAudioDevice {
id: "speaker-1".to_string(),
label: "Desk Speakers".to_string(),
},
VoiceAudioDevice {
id: "speaker-2".to_string(),
label: "Headphones".to_string(),
},
],
);
chat.handle_key_event(KeyEvent::new(KeyCode::Down, KeyModifiers::NONE));
@@ -6060,10 +6074,10 @@ async fn realtime_audio_picker_emits_persist_event() {
assert_matches!(
rx.try_recv(),
Ok(AppEvent::PersistRealtimeAudioDeviceSelection {
kind: RealtimeAudioDeviceKind::Speaker,
name: Some(name),
}) if name == "Headphones"
Ok(AppEvent::PersistVoiceAudioDeviceSelection {
kind: VoiceAudioDeviceKind::Speaker,
device_id: Some(device_id),
}) if device_id == "speaker-2"
);
}

View File

@@ -63,7 +63,6 @@ mod app_backtrack;
mod app_event;
mod app_event_sender;
mod ascii_animation;
#[cfg(all(not(target_os = "linux"), feature = "voice-input"))]
mod audio_device;
mod bottom_pane;
mod chatwidget;
@@ -119,99 +118,7 @@ pub mod update_action;
mod update_prompt;
mod updates;
mod version;
#[cfg(all(not(target_os = "linux"), feature = "voice-input"))]
mod voice;
#[cfg(all(not(target_os = "linux"), not(feature = "voice-input")))]
mod voice {
use crate::app_event::AppEvent;
use crate::app_event_sender::AppEventSender;
use codex_core::config::Config;
use codex_protocol::protocol::RealtimeAudioFrame;
use std::sync::Arc;
use std::sync::Mutex;
use std::sync::atomic::AtomicBool;
use std::sync::atomic::AtomicU16;
pub struct RecordedAudio {
pub data: Vec<i16>,
pub sample_rate: u32,
pub channels: u16,
}
pub struct VoiceCapture;
pub(crate) struct RecordingMeterState;
pub(crate) struct RealtimeAudioPlayer;
impl VoiceCapture {
pub fn start() -> Result<Self, String> {
Err("voice input is unavailable in this build".to_string())
}
pub fn start_realtime(_config: &Config, _tx: AppEventSender) -> Result<Self, String> {
Err("voice input is unavailable in this build".to_string())
}
pub fn stop(self) -> Result<RecordedAudio, String> {
Err("voice input is unavailable in this build".to_string())
}
pub fn data_arc(&self) -> Arc<Mutex<Vec<i16>>> {
Arc::new(Mutex::new(Vec::new()))
}
pub fn stopped_flag(&self) -> Arc<AtomicBool> {
Arc::new(AtomicBool::new(true))
}
pub fn sample_rate(&self) -> u32 {
0
}
pub fn channels(&self) -> u16 {
0
}
pub fn last_peak_arc(&self) -> Arc<AtomicU16> {
Arc::new(AtomicU16::new(0))
}
}
impl RecordingMeterState {
pub(crate) fn new() -> Self {
Self
}
pub(crate) fn next_text(&mut self, _peak: u16) -> String {
"⠤⠤⠤⠤".to_string()
}
}
impl RealtimeAudioPlayer {
pub(crate) fn start(_config: &Config) -> Result<Self, String> {
Err("voice output is unavailable in this build".to_string())
}
pub(crate) fn enqueue_frame(&self, _frame: &RealtimeAudioFrame) -> Result<(), String> {
Err("voice output is unavailable in this build".to_string())
}
pub(crate) fn clear(&self) {}
}
pub fn transcribe_async(
id: String,
_audio: RecordedAudio,
_context: Option<String>,
tx: AppEventSender,
) {
tx.send(AppEvent::TranscriptionFailed {
id,
error: "voice input is unavailable in this build".to_string(),
});
}
}
mod wrapping;
#[cfg(test)]

View File

@@ -11,7 +11,6 @@ use codex_protocol::protocol::ConversationAudioParams;
use codex_protocol::protocol::Op;
use codex_protocol::protocol::RealtimeAudioFrame;
use cpal::traits::DeviceTrait;
use cpal::traits::HostTrait;
use cpal::traits::StreamTrait;
use hound::SampleFormat;
use hound::WavSpec;
@@ -54,10 +53,10 @@ pub struct VoiceCapture {
}
impl VoiceCapture {
pub fn start() -> Result<Self, String> {
let (device, config) = select_default_input_device_and_config()?;
pub fn start(device_id: Option<&str>) -> Result<Self, String> {
let (device, config) = select_input_device_and_config(device_id)?;
let sample_rate = config.sample_rate().0;
let sample_rate = config.sample_rate();
let channels = config.channels();
let data: Arc<Mutex<Vec<i16>>> = Arc::new(Mutex::new(Vec::new()));
let stopped = Arc::new(AtomicBool::new(false));
@@ -81,7 +80,7 @@ impl VoiceCapture {
pub fn start_realtime(config: &Config, tx: AppEventSender) -> Result<Self, String> {
let (device, config) = select_realtime_input_device_and_config(config)?;
let sample_rate = config.sample_rate().0;
let sample_rate = config.sample_rate();
let channels = config.channels();
let data: Arc<Mutex<Vec<i16>>> = Arc::new(Mutex::new(Vec::new()));
let stopped = Arc::new(AtomicBool::new(false));
@@ -266,14 +265,10 @@ pub fn transcribe_async(
// Voice input helpers
// -------------------------
fn select_default_input_device_and_config()
-> Result<(cpal::Device, cpal::SupportedStreamConfig), String> {
let host = cpal::default_host();
let device = host
.default_input_device()
.ok_or_else(|| "no input audio device available".to_string())?;
let config = crate::audio_device::preferred_input_config(&device)?;
Ok((device, config))
fn select_input_device_and_config(
device_id: Option<&str>,
) -> Result<(cpal::Device, cpal::SupportedStreamConfig), String> {
crate::audio_device::select_input_device_and_config(device_id)
}
fn select_realtime_input_device_and_config(
@@ -493,7 +488,7 @@ impl RealtimeAudioPlayer {
pub(crate) fn start(config: &Config) -> Result<Self, String> {
let (device, config) =
crate::audio_device::select_configured_output_device_and_config(config)?;
let output_sample_rate = config.sample_rate().0;
let output_sample_rate = config.sample_rate();
let output_channels = config.channels();
let queue = Arc::new(Mutex::new(VecDeque::new()));
let stream = build_output_stream(&device, &config, Arc::clone(&queue))?;

View File

@@ -49,6 +49,15 @@ just test
cargo test --all-features
```
On Linux, building from source currently requires ALSA development headers because local voice support is built with `cpal`. On Debian/Ubuntu, install them with:
```bash
sudo apt-get update
sudo apt-get install -y libasound2-dev
```
If you just want to run Codex on Linux, prefer the prebuilt GitHub Release binaries instead of building from source.
## Tracing / verbose logging
Codex is written in Rust, so it honors the `RUST_LOG` environment variable to configure its logging behavior.