tui: make voice hold delay configurable

Add a config value for the Space hold threshold, wire it into both TUI composer implementations, and document the default behavior.

Also mirror the voice transcription feature key and Space-hold delay in config-schema, and keep placeholder replacement available on Linux for the voice path.

(cherry picked from commit 9fa36434d566153cc69d2fabac095e313d93be51)

(cherry picked from commit 749a0f42619c8cb66514c0bce220b775c10916e3)
This commit is contained in:
dank-openai
2026-04-23 17:20:10 -04:00
parent a7c5f106cb
commit be5f34a898
18 changed files with 2295 additions and 78 deletions

View File

@@ -590,6 +590,9 @@
"use_linux_sandbox_bwrap": {
"type": "boolean"
},
"voice_transcription": {
"type": "boolean"
},
"web_search": {
"type": "boolean"
},
@@ -3633,6 +3636,9 @@
"use_linux_sandbox_bwrap": {
"type": "boolean"
},
"voice_transcription": {
"type": "boolean"
},
"web_search": {
"type": "boolean"
},
@@ -4043,6 +4049,12 @@
],
"description": "Collection of settings that are specific to the TUI."
},
"voice_transcription_space_hold_delay_ms": {
"description": "Delay before holding Space on a non-empty composer switches into voice transcription instead of inserting a literal space.",
"format": "uint64",
"minimum": 0.0,
"type": "integer"
},
"watchdog_interval_s": {
"description": "Watchdog polling interval in seconds.",
"format": "int64",

View File

@@ -6045,6 +6045,8 @@ async fn test_precedence_fixture_with_o3_profile() -> std::io::Result<()> {
personality: Some(Personality::Pragmatic),
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
realtime_audio: RealtimeAudioConfig::default(),
voice_transcription_space_hold_delay_ms:
DEFAULT_VOICE_TRANSCRIPTION_SPACE_HOLD_DELAY_MS,
experimental_realtime_start_instructions: None,
experimental_realtime_ws_base_url: None,
experimental_realtime_ws_model: None,
@@ -6244,6 +6246,7 @@ async fn test_precedence_fixture_with_gpt3_profile() -> std::io::Result<()> {
personality: Some(Personality::Pragmatic),
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
realtime_audio: RealtimeAudioConfig::default(),
voice_transcription_space_hold_delay_ms: DEFAULT_VOICE_TRANSCRIPTION_SPACE_HOLD_DELAY_MS,
experimental_realtime_start_instructions: None,
experimental_realtime_ws_base_url: None,
experimental_realtime_ws_model: None,
@@ -6397,6 +6400,7 @@ async fn test_precedence_fixture_with_zdr_profile() -> std::io::Result<()> {
personality: Some(Personality::Pragmatic),
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
realtime_audio: RealtimeAudioConfig::default(),
voice_transcription_space_hold_delay_ms: DEFAULT_VOICE_TRANSCRIPTION_SPACE_HOLD_DELAY_MS,
experimental_realtime_start_instructions: None,
experimental_realtime_ws_base_url: None,
experimental_realtime_ws_model: None,
@@ -6535,6 +6539,7 @@ async fn test_precedence_fixture_with_gpt5_profile() -> std::io::Result<()> {
personality: Some(Personality::Pragmatic),
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
realtime_audio: RealtimeAudioConfig::default(),
voice_transcription_space_hold_delay_ms: DEFAULT_VOICE_TRANSCRIPTION_SPACE_HOLD_DELAY_MS,
experimental_realtime_start_instructions: None,
experimental_realtime_ws_base_url: None,
experimental_realtime_ws_model: None,
@@ -8677,6 +8682,29 @@ speaker = "Desk Speakers"
Ok(())
}
#[tokio::test]
async fn voice_transcription_space_hold_delay_loads_from_config_toml() -> std::io::Result<()> {
let cfg: ConfigToml = toml::from_str(
r#"
voice_transcription_space_hold_delay_ms = 250
"#,
)
.expect("TOML deserialization should succeed");
assert_eq!(cfg.voice_transcription_space_hold_delay_ms, Some(250));
let codex_home = TempDir::new()?;
let config = Config::load_from_base_config_with_overrides(
cfg,
ConfigOverrides::default(),
codex_home.abs(),
)
.await?;
assert_eq!(config.voice_transcription_space_hold_delay_ms, 250);
Ok(())
}
#[derive(Deserialize, Debug, PartialEq)]
struct TuiTomlTest {
#[serde(default, flatten)]

View File

@@ -168,6 +168,8 @@ pub(crate) const DEFAULT_AGENT_MAX_DEPTH: i32 = 1;
pub(crate) const DEFAULT_AGENT_JOB_MAX_RUNTIME_SECONDS: Option<u64> = None;
const LOCAL_DEV_BUILD_VERSION: &str = "0.0.0";
pub(crate) const DEFAULT_WATCHDOG_INTERVAL_S: i64 = 10;
/// Default delay before holding Space on a non-empty composer switches into voice transcription.
pub const DEFAULT_VOICE_TRANSCRIPTION_SPACE_HOLD_DELAY_MS: u64 = 1_000;
pub const CONFIG_TOML_FILE: &str = "config.toml";
@@ -672,6 +674,10 @@ pub struct Config {
/// Machine-local realtime audio device preferences used by realtime voice.
pub realtime_audio: RealtimeAudioConfig,
/// Delay before holding Space on a non-empty composer switches into voice
/// transcription instead of inserting a literal space.
pub voice_transcription_space_hold_delay_ms: u64,
/// Experimental / do not use. Overrides only the realtime conversation
/// websocket transport base URL (the `Op::RealtimeConversation`
/// `/v1/realtime`
@@ -2804,6 +2810,9 @@ impl Config {
microphone: audio.microphone,
speaker: audio.speaker,
}),
voice_transcription_space_hold_delay_ms: cfg
.voice_transcription_space_hold_delay_ms
.unwrap_or(DEFAULT_VOICE_TRANSCRIPTION_SPACE_HOLD_DELAY_MS),
experimental_realtime_ws_base_url: cfg.experimental_realtime_ws_base_url,
experimental_realtime_ws_model: cfg.experimental_realtime_ws_model,
realtime: cfg