voice transcription (#3381)

Adds voice transcription on press-and-hold of spacebar.


https://github.com/user-attachments/assets/85039314-26f3-46d1-a83b-8c4a4a1ecc21

---------

Co-authored-by: Codex <199175422+chatgpt-codex-connector[bot]@users.noreply.github.com>
Co-authored-by: David Zbarsky <zbarsky@openai.com>
This commit is contained in:
Jeremy Rose
2026-02-23 14:15:18 -08:00
committed by GitHub
parent 50953ea39a
commit 855e275591
17 changed files with 2538 additions and 446 deletions

View File

@@ -109,6 +109,17 @@
//! edits and renders a placeholder prompt instead of the editable textarea. This is part of the
//! overall state machine, since it affects which transitions are even possible from a given UI
//! state.
//!
//! # Voice Hold-To-Talk Without Key Release
//!
//! On terminals that do not report `KeyEventKind::Release`, space hold-to-talk uses repeated
//! space key events as "still held" evidence:
//!
//! - For pending holds (non-empty composer), if timeout elapses without any repeated space event,
//! we treat the key as a normal typed space.
//! - If repeated space events are seen before timeout, we proceed with hold-to-talk.
//! - While recording, repeated space events keep the recording alive; if they stop for a short
//! window, we stop and transcribe.
use crate::bottom_pane::footer::mode_indicator_line;
use crate::key_hint;
use crate::key_hint::KeyBinding;
@@ -191,6 +202,7 @@ use crate::bottom_pane::textarea::TextAreaState;
use crate::clipboard_paste::normalize_pasted_path;
use crate::clipboard_paste::pasted_image_format;
use crate::history_cell;
use crate::tui::FrameRequester;
use crate::ui_consts::LIVE_PREFIX_COLS;
use codex_chatgpt::connectors;
use codex_chatgpt::connectors::AppInfo;
@@ -202,9 +214,17 @@ use std::collections::HashSet;
use std::collections::VecDeque;
use std::ops::Range;
use std::path::PathBuf;
use std::sync::Arc;
#[cfg(not(target_os = "linux"))]
use std::sync::Mutex;
use std::sync::atomic::AtomicBool;
use std::sync::atomic::Ordering;
#[cfg(not(target_os = "linux"))]
use std::thread;
use std::time::Duration;
use std::time::Instant;
#[cfg(not(target_os = "linux"))]
use tokio::runtime::Handle;
/// If the pasted content exceeds this number of characters, replace it with a
/// placeholder in the UI.
const LARGE_PASTE_CHAR_THRESHOLD: usize = 1000;
@@ -284,6 +304,35 @@ impl ChatComposerConfig {
}
}
}
#[derive(Default)]
struct VoiceState {
transcription_enabled: bool,
// Spacebar hold-to-talk state.
space_hold_started_at: Option<Instant>,
space_hold_element_id: Option<String>,
space_hold_trigger: Option<Arc<AtomicBool>>,
key_release_supported: bool,
space_hold_repeat_seen: bool,
#[cfg(not(target_os = "linux"))]
voice: Option<crate::voice::VoiceCapture>,
#[cfg(not(target_os = "linux"))]
recording_placeholder_id: Option<String>,
#[cfg(not(target_os = "linux"))]
space_recording_started_at: Option<Instant>,
#[cfg(not(target_os = "linux"))]
space_recording_last_repeat_at: Option<Instant>,
}
impl VoiceState {
fn new(key_release_supported: bool) -> Self {
Self {
key_release_supported,
..Default::default()
}
}
}
pub(crate) struct ChatComposer {
textarea: TextArea,
textarea_state: RefCell<TextAreaState>,
@@ -299,10 +348,14 @@ pub(crate) struct ChatComposer {
pending_pastes: Vec<(String, String)>,
large_paste_counters: HashMap<usize, usize>,
has_focus: bool,
frame_requester: Option<FrameRequester>,
/// Invariant: attached images are labeled in vec order as
/// `[Image #M+1]..[Image #N]`, where `M` is the number of remote images.
attached_images: Vec<AttachedImage>,
placeholder_text: String,
voice_state: VoiceState,
// Spinner control flags keyed by placeholder id; set to true to stop.
spinner_stop_flags: HashMap<String, Arc<AtomicBool>>,
is_task_running: bool,
/// When false, the composer is temporarily read-only (e.g. during sandbox setup).
input_enabled: bool,
@@ -320,6 +373,9 @@ pub(crate) struct ChatComposer {
selected_remote_image_index: Option<usize>,
footer_flash: Option<FooterFlash>,
context_window_percent: Option<i64>,
// Monotonically increasing identifier for textarea elements we insert.
#[cfg(not(target_os = "linux"))]
next_element_id: u64,
context_window_used_tokens: Option<i64>,
skills: Option<Vec<SkillMetadata>>,
connectors_snapshot: Option<ConnectorsSnapshot>,
@@ -407,8 +463,11 @@ impl ChatComposer {
pending_pastes: Vec::new(),
large_paste_counters: HashMap::new(),
has_focus: has_input_focus,
frame_requester: None,
attached_images: Vec::new(),
placeholder_text,
voice_state: VoiceState::new(enhanced_keys_supported),
spinner_stop_flags: HashMap::new(),
is_task_running: false,
input_enabled: true,
input_disabled_placeholder: None,
@@ -421,6 +480,8 @@ impl ChatComposer {
selected_remote_image_index: None,
footer_flash: None,
context_window_percent: None,
#[cfg(not(target_os = "linux"))]
next_element_id: 0,
context_window_used_tokens: None,
skills: None,
connectors_snapshot: None,
@@ -442,6 +503,17 @@ impl ChatComposer {
this
}
#[cfg(not(target_os = "linux"))]
fn next_id(&mut self) -> String {
let id = self.next_element_id;
self.next_element_id = self.next_element_id.wrapping_add(1);
id.to_string()
}
pub(crate) fn set_frame_requester(&mut self, frame_requester: FrameRequester) {
self.frame_requester = Some(frame_requester);
}
pub fn set_skill_mentions(&mut self, skills: Option<Vec<SkillMetadata>>) {
self.skills = skills;
}
@@ -505,6 +577,23 @@ impl ChatComposer {
pub fn set_personality_command_enabled(&mut self, enabled: bool) {
self.personality_command_enabled = enabled;
}
pub fn set_voice_transcription_enabled(&mut self, enabled: bool) {
self.voice_state.transcription_enabled = enabled;
if !enabled {
self.voice_state.space_hold_started_at = None;
if let Some(id) = self.voice_state.space_hold_element_id.take() {
let _ = self.textarea.replace_element_by_id(&id, " ");
}
self.voice_state.space_hold_trigger = None;
self.voice_state.space_hold_repeat_seen = false;
}
}
#[cfg(not(target_os = "linux"))]
fn voice_transcription_enabled(&self) -> bool {
self.voice_state.transcription_enabled && cfg!(not(target_os = "linux"))
}
/// Centralized feature gating keeps config checks out of call sites.
fn popups_enabled(&self) -> bool {
self.config.popups_enabled
@@ -568,6 +657,20 @@ impl ChatComposer {
}
}
pub fn cursor_pos(&self, area: Rect) -> Option<(u16, u16)> {
if !self.input_enabled {
return None;
}
// Hide the cursor while recording voice input.
#[cfg(not(target_os = "linux"))]
if self.voice_state.voice.is_some() {
return None;
}
let [_, _, textarea_rect, _] = self.layout_areas(area);
let state = *self.textarea_state.borrow();
self.textarea.cursor_pos_with_state(textarea_rect, state)
}
/// Returns true if the composer currently contains no user-entered input.
pub(crate) fn is_empty(&self) -> bool {
self.textarea.is_empty()
@@ -621,6 +724,10 @@ impl ChatComposer {
/// In all cases, clears any paste-burst Enter suppression state so a real paste cannot affect
/// the next user Enter key, then syncs popup state.
pub fn handle_paste(&mut self, pasted: String) -> bool {
#[cfg(not(target_os = "linux"))]
if self.voice_state.voice.is_some() {
return false;
}
let pasted = pasted.replace("\r\n", "\n").replace('\r', "\n");
let char_count = pasted.chars().count();
if char_count > LARGE_PASTE_CHAR_THRESHOLD {
@@ -633,9 +740,8 @@ impl ChatComposer {
{
self.textarea.insert_str(" ");
} else {
self.textarea.insert_str(&pasted);
self.insert_str(&pasted);
}
// Explicit paste events should not trigger Enter suppression.
self.paste_burst.clear_after_explicit_paste();
self.sync_popups();
true
@@ -866,6 +972,9 @@ impl ChatComposer {
local_image_paths: Vec<PathBuf>,
mention_bindings: Vec<MentionBinding>,
) {
#[cfg(not(target_os = "linux"))]
self.stop_all_transcription_spinners();
// Clear any existing content, placeholders, and attachments first.
self.textarea.set_text_clearing_elements("");
self.pending_pastes.clear();
@@ -1124,20 +1233,56 @@ impl ChatComposer {
/// Handle a key event coming from the main UI.
pub fn handle_key_event(&mut self, key_event: KeyEvent) -> (InputResult, bool) {
if matches!(key_event.kind, KeyEventKind::Release) {
self.voice_state.key_release_supported = true;
}
// Timer-based conversion is handled in the pre-draw tick.
// If recording, stop on Space release when supported. On terminals without key-release
// events, Space repeat events are handled as "still held" and stop is driven by timeout
// in `process_space_hold_trigger`.
if let Some(result) = self.handle_key_event_while_recording(key_event) {
return result;
}
if !self.input_enabled {
return (InputResult::None, false);
}
// Outside of recording, ignore all key releases globally except for Space,
// which is handled explicitly for hold-to-talk behavior below.
if matches!(key_event.kind, KeyEventKind::Release)
&& !matches!(key_event.code, KeyCode::Char(' '))
{
return (InputResult::None, false);
}
// If a space hold is pending and another non-space key is pressed, cancel the hold
// and convert the element into a plain space.
if self.voice_state.space_hold_started_at.is_some()
&& !matches!(key_event.code, KeyCode::Char(' '))
{
self.voice_state.space_hold_started_at = None;
if let Some(id) = self.voice_state.space_hold_element_id.take() {
let _ = self.textarea.replace_element_by_id(&id, " ");
}
self.voice_state.space_hold_trigger = None;
self.voice_state.space_hold_repeat_seen = false;
// fall through to normal handling of this other key
}
if let Some(result) = self.handle_voice_space_key_event(&key_event) {
return result;
}
let result = match &mut self.active_popup {
ActivePopup::Command(_) => self.handle_key_event_with_slash_popup(key_event),
ActivePopup::File(_) => self.handle_key_event_with_file_popup(key_event),
ActivePopup::Skill(_) => self.handle_key_event_with_skill_popup(key_event),
ActivePopup::None => self.handle_key_event_without_popup(key_event),
};
// Update (or hide/show) popup after processing the key.
self.sync_popups();
result
}
@@ -2535,6 +2680,7 @@ impl ChatComposer {
// -------------------------------------------------------------
KeyEvent {
code: KeyCode::Up | KeyCode::Down,
kind: KeyEventKind::Press | KeyEventKind::Repeat,
..
}
| KeyEvent {
@@ -2588,6 +2734,136 @@ impl ChatComposer {
}
}
#[cfg(target_os = "linux")]
fn handle_voice_space_key_event(
&mut self,
_key_event: &KeyEvent,
) -> Option<(InputResult, bool)> {
None
}
#[cfg(not(target_os = "linux"))]
fn handle_voice_space_key_event(
&mut self,
key_event: &KeyEvent,
) -> Option<(InputResult, bool)> {
if !self.voice_transcription_enabled() || !matches!(key_event.code, KeyCode::Char(' ')) {
return None;
}
match key_event.kind {
KeyEventKind::Press => {
if self.paste_burst.is_active() {
return None;
}
// If textarea is empty, start recording immediately without inserting a space.
if self.textarea.text().is_empty() {
if self.start_recording_with_placeholder() {
return Some((InputResult::None, true));
}
return None;
}
// If a hold is already pending, swallow further press events to
// avoid inserting multiple spaces and resetting the timer on key repeat.
if self.voice_state.space_hold_started_at.is_some() {
if !self.voice_state.key_release_supported {
self.voice_state.space_hold_repeat_seen = true;
}
return Some((InputResult::None, false));
}
// Insert a named element that renders as a space so we can later
// remove it on timeout or convert it to a plain space on release.
let elem_id = self.next_id();
self.textarea.insert_named_element(" ", elem_id.clone());
// Record pending hold metadata.
self.voice_state.space_hold_started_at = Some(Instant::now());
self.voice_state.space_hold_element_id = Some(elem_id);
self.voice_state.space_hold_repeat_seen = false;
// Spawn a delayed task to flip an atomic flag; we check it on next key event.
let flag = Arc::new(AtomicBool::new(false));
let frame = self.frame_requester.clone();
Self::schedule_space_hold_timer(flag.clone(), frame);
self.voice_state.space_hold_trigger = Some(flag);
Some((InputResult::None, true))
}
// If we see a repeat before release, handling occurs in the top-level pending block.
KeyEventKind::Repeat => {
// Swallow repeats while a hold is pending to avoid extra spaces.
if self.voice_state.space_hold_started_at.is_some() {
if !self.voice_state.key_release_supported {
self.voice_state.space_hold_repeat_seen = true;
}
return Some((InputResult::None, false));
}
// Fallback: if no pending hold, treat as normal input.
None
}
// Space release without pending (fallback): treat as normal input.
KeyEventKind::Release => {
// If a hold is pending, convert the element to a plain space and clear state.
self.voice_state.space_hold_started_at = None;
if let Some(id) = self.voice_state.space_hold_element_id.take() {
let _ = self.textarea.replace_element_by_id(&id, " ");
}
self.voice_state.space_hold_trigger = None;
self.voice_state.space_hold_repeat_seen = false;
Some((InputResult::None, true))
}
}
}
#[cfg(target_os = "linux")]
fn handle_key_event_while_recording(
&mut self,
_key_event: KeyEvent,
) -> Option<(InputResult, bool)> {
None
}
#[cfg(not(target_os = "linux"))]
fn handle_key_event_while_recording(
&mut self,
key_event: KeyEvent,
) -> Option<(InputResult, bool)> {
if self.voice_state.voice.is_some() {
let should_stop = if self.voice_state.key_release_supported {
match key_event.kind {
KeyEventKind::Release => matches!(key_event.code, KeyCode::Char(' ')),
KeyEventKind::Press | KeyEventKind::Repeat => {
!matches!(key_event.code, KeyCode::Char(' '))
}
}
} else {
match key_event.kind {
KeyEventKind::Release => matches!(key_event.code, KeyCode::Char(' ')),
KeyEventKind::Press | KeyEventKind::Repeat => {
if matches!(key_event.code, KeyCode::Char(' ')) {
self.voice_state.space_recording_last_repeat_at = Some(Instant::now());
false
} else {
true
}
}
}
};
if should_stop {
let needs_redraw = self.stop_recording_and_start_transcription();
return Some((InputResult::None, needs_redraw));
}
// Swallow non-stopping keys while recording.
return Some((InputResult::None, false));
}
None
}
fn is_bang_shell_command(&self) -> bool {
self.textarea.text().trim_start().starts_with('!')
}
@@ -2607,8 +2883,6 @@ impl ChatComposer {
true
}
FlushResult::Typed(ch) => {
// Mirror insert_str() behavior so popups stay in sync when a
// pending fast char flushes as normal typed input.
self.textarea.insert_str(ch.to_string().as_str());
self.sync_popups();
true
@@ -2632,6 +2906,12 @@ impl ChatComposer {
/// otherwise `clear_window_after_non_char()` can leave buffered text waiting without a
/// timestamp to time out against.
fn handle_input_basic(&mut self, input: KeyEvent) -> (InputResult, bool) {
// Ignore key releases here to avoid treating them as additional input
// (e.g., appending the same character twice via paste-burst logic).
if !matches!(input.kind, KeyEventKind::Press | KeyEventKind::Repeat) {
return (InputResult::None, false);
}
self.handle_input_basic_with_time(input, Instant::now())
}
@@ -2897,7 +3177,7 @@ impl ChatComposer {
.map(|items| if items.is_empty() { 0 } else { 1 })
}
fn sync_popups(&mut self) {
pub(crate) fn sync_popups(&mut self) {
self.sync_slash_command_elements();
if !self.popups_enabled() {
self.active_popup = ActivePopup::None;
@@ -3311,6 +3591,11 @@ impl ChatComposer {
self.has_focus = has_focus;
}
#[cfg(not(target_os = "linux"))]
pub(crate) fn is_recording(&self) -> bool {
self.voice_state.voice.is_some()
}
#[allow(dead_code)]
pub(crate) fn set_input_enabled(&mut self, enabled: bool, placeholder: Option<String>) {
self.input_enabled = enabled;
@@ -3344,6 +3629,32 @@ impl ChatComposer {
}
}
#[cfg(not(target_os = "linux"))]
fn schedule_space_hold_timer(flag: Arc<AtomicBool>, frame: Option<FrameRequester>) {
const HOLD_DELAY_MILLIS: u64 = 500;
if let Ok(handle) = Handle::try_current() {
let flag_clone = flag;
let frame_clone = frame;
handle.spawn(async move {
tokio::time::sleep(Duration::from_millis(HOLD_DELAY_MILLIS)).await;
Self::complete_space_hold_timer(flag_clone, frame_clone);
});
} else {
thread::spawn(move || {
thread::sleep(Duration::from_millis(HOLD_DELAY_MILLIS));
Self::complete_space_hold_timer(flag, frame);
});
}
}
#[cfg(not(target_os = "linux"))]
fn complete_space_hold_timer(flag: Arc<AtomicBool>, frame: Option<FrameRequester>) {
flag.store(true, Ordering::Relaxed);
if let Some(frame) = frame {
frame.schedule_frame();
}
}
pub(crate) fn set_status_line(&mut self, status_line: Option<Line<'static>>) -> bool {
if self.status_line_value == status_line {
return false;
@@ -3361,6 +3672,280 @@ impl ChatComposer {
}
}
#[cfg(not(target_os = "linux"))]
impl ChatComposer {
pub(crate) fn process_space_hold_trigger(&mut self) {
if self.voice_transcription_enabled()
&& let Some(flag) = self.voice_state.space_hold_trigger.as_ref()
&& flag.load(Ordering::Relaxed)
&& self.voice_state.space_hold_started_at.is_some()
&& self.voice_state.voice.is_none()
{
let _ = self.on_space_hold_timeout();
}
const SPACE_REPEAT_INITIAL_GRACE_MILLIS: u64 = 700;
const SPACE_REPEAT_IDLE_TIMEOUT_MILLIS: u64 = 250;
if !self.voice_state.key_release_supported && self.voice_state.voice.is_some() {
let now = Instant::now();
let initial_grace = Duration::from_millis(SPACE_REPEAT_INITIAL_GRACE_MILLIS);
let repeat_idle_timeout = Duration::from_millis(SPACE_REPEAT_IDLE_TIMEOUT_MILLIS);
if let Some(started_at) = self.voice_state.space_recording_started_at
&& now.saturating_duration_since(started_at) >= initial_grace
{
let should_stop = match self.voice_state.space_recording_last_repeat_at {
Some(last_repeat_at) => {
now.saturating_duration_since(last_repeat_at) >= repeat_idle_timeout
}
None => true,
};
if should_stop {
let _ = self.stop_recording_and_start_transcription();
}
}
}
}
/// Called when the 500ms space hold timeout elapses.
///
/// On terminals without key-release reporting, this only transitions into voice capture if we
/// observed repeated Space events while pending; otherwise the keypress is treated as a typed
/// space.
pub(crate) fn on_space_hold_timeout(&mut self) -> bool {
if !self.voice_transcription_enabled() {
return false;
}
if self.voice_state.voice.is_some() {
return false;
}
if self.voice_state.space_hold_started_at.is_some() {
if !self.voice_state.key_release_supported && !self.voice_state.space_hold_repeat_seen {
if let Some(id) = self.voice_state.space_hold_element_id.take() {
let _ = self.textarea.replace_element_by_id(&id, " ");
}
self.voice_state.space_hold_started_at = None;
self.voice_state.space_hold_trigger = None;
self.voice_state.space_hold_repeat_seen = false;
return true;
}
// Preserve the typed space when transitioning into voice capture, but
// avoid duplicating an existing trailing space. In either case,
// convert/remove the temporary named element before inserting the
// recording/transcribing placeholder.
if let Some(id) = self.voice_state.space_hold_element_id.take() {
let replacement = if self
.textarea
.named_element_range(&id)
.and_then(|range| self.textarea.text()[..range.start].chars().next_back())
.is_some_and(|ch| ch == ' ')
{
""
} else {
" "
};
let _ = self.textarea.replace_element_by_id(&id, replacement);
}
// Clear pending state before starting capture
self.voice_state.space_hold_started_at = None;
self.voice_state.space_hold_trigger = None;
self.voice_state.space_hold_repeat_seen = false;
// Start voice capture
self.start_recording_with_placeholder()
} else {
false
}
}
/// Stop recording if active, update the placeholder, and spawn background transcription.
/// Returns true if the UI should redraw.
fn stop_recording_and_start_transcription(&mut self) -> bool {
let Some(vc) = self.voice_state.voice.take() else {
return false;
};
self.voice_state.space_recording_started_at = None;
self.voice_state.space_recording_last_repeat_at = None;
match vc.stop() {
Ok(audio) => {
// If the recording is too short, remove the placeholder immediately
// and skip the transcribing state entirely.
let total_samples = audio.data.len() as f32;
let samples_per_second = (audio.sample_rate as f32) * (audio.channels as f32);
let duration_seconds = if samples_per_second > 0.0 {
total_samples / samples_per_second
} else {
0.0
};
const MIN_DURATION_SECONDS: f32 = 1.0;
if duration_seconds < MIN_DURATION_SECONDS {
if let Some(id) = self.voice_state.recording_placeholder_id.take() {
let _ = self.textarea.replace_element_by_id(&id, "");
}
return true;
}
// Otherwise, update the placeholder to show a spinner and proceed.
let id = match self.voice_state.recording_placeholder_id.take() {
Some(id) => id,
None => self.next_id(),
};
let placeholder_range = self.textarea.named_element_range(&id);
let prompt_source = if let Some(range) = &placeholder_range {
self.textarea.text()[..range.start].to_string()
} else {
self.textarea.text().to_string()
};
// Initialize with first spinner frame immediately.
let _ = self.textarea.update_named_element_by_id(&id, "");
// Spawn animated braille spinner until transcription finishes (or times out).
self.spawn_transcribing_spinner(id.clone());
let tx = self.app_event_tx.clone();
crate::voice::transcribe_async(id, audio, Some(prompt_source), tx);
true
}
Err(e) => {
tracing::error!("failed to stop voice capture: {e}");
true
}
}
}
/// Start voice capture and insert a placeholder element for the live meter.
/// Returns true if recording began and UI should redraw; false on failure.
fn start_recording_with_placeholder(&mut self) -> bool {
match crate::voice::VoiceCapture::start() {
Ok(vc) => {
self.voice_state.voice = Some(vc);
if self.voice_state.key_release_supported {
self.voice_state.space_recording_started_at = None;
} else {
self.voice_state.space_recording_started_at = Some(Instant::now());
}
self.voice_state.space_recording_last_repeat_at = None;
// Insert visible placeholder for the meter (no label)
let id = self.next_id();
self.textarea.insert_named_element("", id.clone());
self.voice_state.recording_placeholder_id = Some(id);
// Spawn metering animation
if let Some(v) = &self.voice_state.voice {
let data = v.data_arc();
let stop = v.stopped_flag();
let sr = v.sample_rate();
let ch = v.channels();
let peak = v.last_peak_arc();
if let Some(idref) = &self.voice_state.recording_placeholder_id {
self.spawn_recording_meter(idref.clone(), sr, ch, data, peak, stop);
}
}
true
}
Err(e) => {
self.voice_state.space_recording_started_at = None;
self.voice_state.space_recording_last_repeat_at = None;
tracing::error!("failed to start voice capture: {e}");
false
}
}
}
fn spawn_recording_meter(
&self,
id: String,
_sample_rate: u32,
_channels: u16,
_data: Arc<Mutex<Vec<i16>>>,
last_peak: Arc<std::sync::atomic::AtomicU16>,
stop: Arc<std::sync::atomic::AtomicBool>,
) {
let tx = self.app_event_tx.clone();
let task = move || {
use std::time::Duration;
let mut meter = crate::voice::RecordingMeterState::new();
loop {
if stop.load(Ordering::Relaxed) {
break;
}
let text = meter.next_text(last_peak.load(Ordering::Relaxed));
tx.send(crate::app_event::AppEvent::UpdateRecordingMeter {
id: id.clone(),
text,
});
thread::sleep(Duration::from_millis(100));
}
};
if let Ok(handle) = Handle::try_current() {
handle.spawn_blocking(task);
} else {
thread::spawn(task);
}
}
fn spawn_transcribing_spinner(&mut self, id: String) {
self.stop_transcription_spinner(&id);
let stop = Arc::new(AtomicBool::new(false));
self.spinner_stop_flags
.insert(id.clone(), Arc::clone(&stop));
let tx = self.app_event_tx.clone();
let task = move || {
use std::time::Duration;
let frames: Vec<&'static str> = vec!["", "", "", "", "", "", "", "", "", ""];
let mut i: usize = 0;
// Safety stop after ~60s to avoid a runaway task if events are lost.
let max_ticks = 600usize; // 600 * 100ms = 60s
for _ in 0..max_ticks {
if stop.load(Ordering::Relaxed) {
break;
}
let text = frames[i % frames.len()].to_string();
tx.send(crate::app_event::AppEvent::UpdateRecordingMeter {
id: id.clone(),
text,
});
i = i.wrapping_add(1);
thread::sleep(Duration::from_millis(100));
}
};
if let Ok(handle) = Handle::try_current() {
handle.spawn_blocking(task);
} else {
thread::spawn(task);
}
}
fn stop_transcription_spinner(&mut self, id: &str) {
if let Some(flag) = self.spinner_stop_flags.remove(id) {
flag.store(true, Ordering::Relaxed);
}
}
fn stop_all_transcription_spinners(&mut self) {
for (_id, flag) in self.spinner_stop_flags.drain() {
flag.store(true, Ordering::Relaxed);
}
}
pub fn replace_transcription(&mut self, id: &str, text: &str) {
self.stop_transcription_spinner(id);
let _ = self.textarea.replace_element_by_id(id, text);
}
pub fn update_transcription_in_place(&mut self, id: &str, text: &str) -> bool {
self.textarea.update_named_element_by_id(id, text)
}
pub fn remove_transcription_placeholder(&mut self, id: &str) {
self.stop_transcription_spinner(id);
let _ = self.textarea.replace_element_by_id(id, "");
}
}
fn skill_display_name(skill: &SkillMetadata) -> &str {
skill
.interface
@@ -3787,6 +4372,15 @@ fn prompt_selection_action(
}
}
impl Drop for ChatComposer {
fn drop(&mut self) {
// Stop any running spinner tasks.
for (_id, flag) in self.spinner_stop_flags.drain() {
flag.store(true, Ordering::Relaxed);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
@@ -5527,11 +6121,19 @@ mod tests {
fn type_chars_humanlike(composer: &mut ChatComposer, chars: &[char]) {
use crossterm::event::KeyCode;
use crossterm::event::KeyEvent;
use crossterm::event::KeyEventKind;
use crossterm::event::KeyModifiers;
for &ch in chars {
let _ = composer.handle_key_event(KeyEvent::new(KeyCode::Char(ch), KeyModifiers::NONE));
std::thread::sleep(ChatComposer::recommended_paste_flush_delay());
let _ = composer.flush_paste_burst_if_due();
if ch == ' ' {
let _ = composer.handle_key_event(KeyEvent::new_with_kind(
KeyCode::Char(' '),
KeyModifiers::NONE,
KeyEventKind::Release,
));
}
}
}
@@ -5621,6 +6223,195 @@ mod tests {
assert!(found_error, "expected error history cell to be sent");
}
#[test]
fn voice_transcription_disabled_treats_space_as_normal_input() {
use crossterm::event::KeyCode;
use crossterm::event::KeyEvent;
use crossterm::event::KeyEventKind;
use crossterm::event::KeyModifiers;
let (tx, _rx) = unbounded_channel::<AppEvent>();
let sender = AppEventSender::new(tx);
let mut composer = ChatComposer::new(
true,
sender,
false,
"Ask Codex to do anything".to_string(),
true,
);
composer.set_text_content("x".to_string(), Vec::new(), Vec::new());
composer.move_cursor_to_end();
let _ = composer.handle_key_event(KeyEvent::new(KeyCode::Char(' '), KeyModifiers::NONE));
let _ = composer.handle_key_event(KeyEvent::new_with_kind(
KeyCode::Char(' '),
KeyModifiers::NONE,
KeyEventKind::Release,
));
assert_eq!("x ", composer.textarea.text());
assert!(composer.voice_state.space_hold_started_at.is_none());
assert!(composer.voice_state.space_hold_element_id.is_none());
assert!(composer.voice_state.space_hold_trigger.is_none());
assert!(!composer.voice_state.space_hold_repeat_seen);
}
#[cfg(not(target_os = "linux"))]
#[test]
fn space_hold_timeout_without_release_or_repeat_keeps_typed_space() {
let (tx, _rx) = unbounded_channel::<AppEvent>();
let sender = AppEventSender::new(tx);
let mut composer = ChatComposer::new(
true,
sender,
false,
"Ask Codex to do anything".to_string(),
false,
);
composer.set_voice_transcription_enabled(true);
composer.set_text_content("x".to_string(), Vec::new(), Vec::new());
composer.move_cursor_to_end();
let elem_id = "space-hold".to_string();
composer.textarea.insert_named_element(" ", elem_id.clone());
composer.voice_state.space_hold_started_at = Some(Instant::now());
composer.voice_state.space_hold_element_id = Some(elem_id);
composer.voice_state.space_hold_trigger = Some(Arc::new(AtomicBool::new(true)));
composer.voice_state.key_release_supported = false;
composer.voice_state.space_hold_repeat_seen = false;
assert_eq!("x ", composer.textarea.text());
composer.process_space_hold_trigger();
assert_eq!("x ", composer.textarea.text());
assert!(composer.voice_state.space_hold_started_at.is_none());
assert!(!composer.voice_state.space_hold_repeat_seen);
}
#[cfg(not(target_os = "linux"))]
#[test]
fn space_hold_timeout_with_repeat_uses_hold_path_without_release() {
let (tx, _rx) = unbounded_channel::<AppEvent>();
let sender = AppEventSender::new(tx);
let mut composer = ChatComposer::new(
true,
sender,
false,
"Ask Codex to do anything".to_string(),
false,
);
composer.set_voice_transcription_enabled(true);
composer.set_text_content("x".to_string(), Vec::new(), Vec::new());
composer.move_cursor_to_end();
let elem_id = "space-hold".to_string();
composer.textarea.insert_named_element(" ", elem_id.clone());
composer.voice_state.space_hold_started_at = Some(Instant::now());
composer.voice_state.space_hold_element_id = Some(elem_id);
composer.voice_state.space_hold_trigger = Some(Arc::new(AtomicBool::new(true)));
composer.voice_state.key_release_supported = false;
composer.voice_state.space_hold_repeat_seen = true;
composer.process_space_hold_trigger();
assert_eq!("x ", composer.textarea.text());
assert!(composer.voice_state.space_hold_started_at.is_none());
assert!(!composer.voice_state.space_hold_repeat_seen);
if composer.is_recording() {
let _ = composer.stop_recording_and_start_transcription();
}
}
#[cfg(not(target_os = "linux"))]
#[test]
fn space_hold_timeout_with_repeat_does_not_duplicate_existing_space() {
let (tx, _rx) = unbounded_channel::<AppEvent>();
let sender = AppEventSender::new(tx);
let mut composer = ChatComposer::new(
true,
sender,
false,
"Ask Codex to do anything".to_string(),
false,
);
composer.set_voice_transcription_enabled(true);
composer.set_text_content("x ".to_string(), Vec::new(), Vec::new());
composer.move_cursor_to_end();
let elem_id = "space-hold".to_string();
composer.textarea.insert_named_element(" ", elem_id.clone());
composer.voice_state.space_hold_started_at = Some(Instant::now());
composer.voice_state.space_hold_element_id = Some(elem_id);
composer.voice_state.space_hold_trigger = Some(Arc::new(AtomicBool::new(true)));
composer.voice_state.key_release_supported = false;
composer.voice_state.space_hold_repeat_seen = true;
composer.process_space_hold_trigger();
assert_eq!("x ", composer.textarea.text());
assert!(composer.voice_state.space_hold_started_at.is_none());
assert!(!composer.voice_state.space_hold_repeat_seen);
if composer.is_recording() {
let _ = composer.stop_recording_and_start_transcription();
}
}
#[cfg(not(target_os = "linux"))]
#[test]
fn replace_transcription_stops_spinner_for_placeholder() {
let (tx, _rx) = unbounded_channel::<AppEvent>();
let sender = AppEventSender::new(tx);
let mut composer = ChatComposer::new(
true,
sender,
false,
"Ask Codex to do anything".to_string(),
false,
);
let id = "voice-placeholder".to_string();
composer.textarea.insert_named_element("", id.clone());
let flag = Arc::new(AtomicBool::new(false));
composer
.spinner_stop_flags
.insert(id.clone(), Arc::clone(&flag));
composer.replace_transcription(&id, "transcribed text");
assert!(flag.load(Ordering::Relaxed));
assert!(!composer.spinner_stop_flags.contains_key(&id));
assert_eq!(composer.textarea.text(), "transcribed text");
}
#[cfg(not(target_os = "linux"))]
#[test]
fn set_text_content_stops_all_transcription_spinners() {
let (tx, _rx) = unbounded_channel::<AppEvent>();
let sender = AppEventSender::new(tx);
let mut composer = ChatComposer::new(
true,
sender,
false,
"Ask Codex to do anything".to_string(),
false,
);
let flag_one = Arc::new(AtomicBool::new(false));
let flag_two = Arc::new(AtomicBool::new(false));
composer
.spinner_stop_flags
.insert("voice-1".to_string(), Arc::clone(&flag_one));
composer
.spinner_stop_flags
.insert("voice-2".to_string(), Arc::clone(&flag_two));
composer.set_text_content("draft".to_string(), Vec::new(), Vec::new());
assert!(flag_one.load(Ordering::Relaxed));
assert!(flag_two.load(Ordering::Relaxed));
assert!(composer.spinner_stop_flags.is_empty());
}
#[test]
fn extract_args_supports_quoted_paths_single_arg() {
let args = extract_positional_args_for_prompt_line(

View File

@@ -33,6 +33,7 @@ use codex_protocol::request_user_input::RequestUserInputEvent;
use codex_protocol::user_input::TextElement;
use crossterm::event::KeyCode;
use crossterm::event::KeyEvent;
use crossterm::event::KeyEventKind;
use ratatui::buffer::Buffer;
use ratatui::layout::Rect;
use ratatui::text::Line;
@@ -204,8 +205,8 @@ impl BottomPane {
placeholder_text,
disable_paste_burst,
);
composer.set_frame_requester(frame_requester.clone());
composer.set_skill_mentions(skills);
Self {
composer,
view_stack: Vec::new(),
@@ -291,6 +292,11 @@ impl BottomPane {
self.request_redraw();
}
pub fn set_voice_transcription_enabled(&mut self, enabled: bool) {
self.composer.set_voice_transcription_enabled(enabled);
self.request_redraw();
}
/// Update the key hint shown next to queued messages so it matches the
/// binding that `ChatWidget` actually listens for.
pub(crate) fn set_queued_message_edit_binding(&mut self, binding: KeyBinding) {
@@ -327,8 +333,23 @@ impl BottomPane {
/// Forward a key event to the active view or the composer.
pub fn handle_key_event(&mut self, key_event: KeyEvent) -> InputResult {
// Do not globally intercept space; only composer handles hold-to-talk.
// While recording, route all keys to the composer so it can stop on release or next key.
#[cfg(not(target_os = "linux"))]
if self.composer.is_recording() {
let (_ir, needs_redraw) = self.composer.handle_key_event(key_event);
if needs_redraw {
self.request_redraw();
}
return InputResult::None;
}
// If a modal/view is active, handle it here; otherwise forward to composer.
if !self.view_stack.is_empty() {
if key_event.kind == KeyEventKind::Release {
return InputResult::None;
}
// We need three pieces of information after routing the key:
// whether Esc completed the view, whether the view finished for any
// reason, and whether a paste-burst timer should be scheduled.
@@ -432,6 +453,7 @@ impl BottomPane {
}
} else {
let needs_redraw = self.composer.handle_paste(pasted);
self.composer.sync_popups();
if needs_redraw {
self.request_redraw();
}
@@ -440,9 +462,18 @@ impl BottomPane {
pub(crate) fn insert_str(&mut self, text: &str) {
self.composer.insert_str(text);
self.composer.sync_popups();
self.request_redraw();
}
// Space hold timeout is handled inside ChatComposer via an internal timer.
pub(crate) fn pre_draw_tick(&mut self) {
// Allow composer to process any time-based transitions before drawing
#[cfg(not(target_os = "linux"))]
self.composer.process_space_hold_trigger();
self.composer.sync_popups();
}
/// Replace the composer text with `text`.
///
/// This is intended for fresh input where mention linkage does not need to
@@ -895,6 +926,7 @@ impl BottomPane {
.on_history_entry_response(log_id, offset, entry);
if updated {
self.composer.sync_popups();
self.request_redraw();
}
}
@@ -973,6 +1005,30 @@ impl BottomPane {
}
}
#[cfg(not(target_os = "linux"))]
impl BottomPane {
pub(crate) fn replace_transcription(&mut self, id: &str, text: &str) {
self.composer.replace_transcription(id, text);
self.composer.sync_popups();
self.request_redraw();
}
pub(crate) fn update_transcription_in_place(&mut self, id: &str, text: &str) -> bool {
let updated = self.composer.update_transcription_in_place(id, text);
if updated {
self.composer.sync_popups();
self.request_redraw();
}
updated
}
pub(crate) fn remove_transcription_placeholder(&mut self, id: &str) {
self.composer.remove_transcription_placeholder(id);
self.composer.sync_popups();
self.request_redraw();
}
}
impl Renderable for BottomPane {
fn render(&self, area: Rect, buf: &mut Buffer) {
self.as_renderable().render(area, buf);
@@ -993,6 +1049,7 @@ mod tests {
use crate::status_indicator_widget::StatusDetailsCapitalization;
use codex_protocol::protocol::Op;
use codex_protocol::protocol::SkillScope;
use crossterm::event::KeyEventKind;
use crossterm::event::KeyModifiers;
use insta::assert_snapshot;
use ratatui::buffer::Buffer;
@@ -1571,4 +1628,58 @@ mod tests {
assert_eq!(on_ctrl_c_calls.get(), 0);
assert_eq!(handle_calls.get(), 1);
}
#[test]
fn release_events_are_ignored_for_active_view() {
#[derive(Default)]
struct CountingView {
handle_calls: Rc<Cell<usize>>,
}
impl Renderable for CountingView {
fn render(&self, _area: Rect, _buf: &mut Buffer) {}
fn desired_height(&self, _width: u16) -> u16 {
0
}
}
impl BottomPaneView for CountingView {
fn handle_key_event(&mut self, _key_event: KeyEvent) {
self.handle_calls
.set(self.handle_calls.get().saturating_add(1));
}
}
let (tx_raw, _rx) = unbounded_channel::<AppEvent>();
let tx = AppEventSender::new(tx_raw);
let mut pane = BottomPane::new(BottomPaneParams {
app_event_tx: tx,
frame_requester: FrameRequester::test_dummy(),
has_input_focus: true,
enhanced_keys_supported: false,
placeholder_text: "Ask Codex to do anything".to_string(),
disable_paste_burst: false,
animations_enabled: true,
skills: Some(Vec::new()),
});
let handle_calls = Rc::new(Cell::new(0));
pane.push_view(Box::new(CountingView {
handle_calls: Rc::clone(&handle_calls),
}));
pane.handle_key_event(KeyEvent::new_with_kind(
KeyCode::Down,
KeyModifiers::NONE,
KeyEventKind::Press,
));
pane.handle_key_event(KeyEvent::new_with_kind(
KeyCode::Down,
KeyModifiers::NONE,
KeyEventKind::Release,
));
assert_eq!(handle_calls.get(), 1);
}
}

View File

@@ -3,6 +3,7 @@ use codex_protocol::user_input::ByteRange;
use codex_protocol::user_input::TextElement as UserTextElement;
use crossterm::event::KeyCode;
use crossterm::event::KeyEvent;
use crossterm::event::KeyEventKind;
use crossterm::event::KeyModifiers;
use ratatui::buffer::Buffer;
use ratatui::layout::Rect;
@@ -27,6 +28,7 @@ fn is_word_separator(ch: char) -> bool {
struct TextElement {
id: u64,
range: Range<usize>,
name: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
@@ -101,6 +103,7 @@ impl TextArea {
self.elements.push(TextElement {
id,
range: start..end,
name: None,
});
}
self.elements.sort_by_key(|e| e.range.start);
@@ -256,6 +259,11 @@ impl TextArea {
}
pub fn input(&mut self, event: KeyEvent) {
// Only process key presses or repeats; ignore releases to avoid inserting
// characters on key-up events when modifiers are no longer reported.
if !matches!(event.kind, KeyEventKind::Press | KeyEventKind::Repeat) {
return;
}
match event {
// Some terminals (or configurations) send Control key chords as
// C0 control characters without reporting the CONTROL modifier.
@@ -886,6 +894,73 @@ impl TextArea {
id
}
#[cfg(not(target_os = "linux"))]
pub fn insert_named_element(&mut self, text: &str, id: String) {
let start = self.clamp_pos_for_insertion(self.cursor_pos);
self.insert_str_at(start, text);
let end = start + text.len();
self.add_element_with_id(start..end, Some(id));
// Place cursor at end of inserted element
self.set_cursor(end);
}
pub fn replace_element_by_id(&mut self, id: &str, text: &str) -> bool {
if let Some(idx) = self
.elements
.iter()
.position(|e| e.name.as_deref() == Some(id))
{
let range = self.elements[idx].range.clone();
self.replace_range_raw(range, text);
self.elements.retain(|e| e.name.as_deref() != Some(id));
true
} else {
false
}
}
/// Update the element's text in place, preserving its id so callers can
/// update it again later (e.g. recording -> transcribing -> final).
#[allow(dead_code)]
pub fn update_named_element_by_id(&mut self, id: &str, text: &str) -> bool {
if let Some(elem_idx) = self
.elements
.iter()
.position(|e| e.name.as_deref() == Some(id))
{
let old_range = self.elements[elem_idx].range.clone();
let start = old_range.start;
self.replace_range_raw(old_range, text);
// After replace_range_raw, the old element entry was removed if fully overlapped.
// Re-add an updated element with the same id and new range.
let new_end = start + text.len();
self.add_element_with_id(start..new_end, Some(id.to_string()));
true
} else {
false
}
}
#[allow(dead_code)]
pub fn named_element_range(&self, id: &str) -> Option<std::ops::Range<usize>> {
self.elements
.iter()
.find(|e| e.name.as_deref() == Some(id))
.map(|e| e.range.clone())
}
fn add_element_with_id(&mut self, range: Range<usize>, name: Option<String>) -> u64 {
let id = self.next_element_id();
let elem = TextElement { id, range, name };
self.elements.push(elem);
self.elements.sort_by_key(|e| e.range.start);
id
}
fn add_element(&mut self, range: Range<usize>) -> u64 {
self.add_element_with_id(range, None)
}
/// Mark an existing text range as an atomic element without changing the text.
///
/// This is used to convert already-typed tokens (like `/plan`) into elements
@@ -910,12 +985,7 @@ impl TextArea {
{
return None;
}
let id = self.next_element_id();
self.elements.push(TextElement {
id,
range: start..end,
});
self.elements.sort_by_key(|e| e.range.start);
let id = self.add_element(start..end);
Some(id)
}
@@ -931,20 +1001,11 @@ impl TextArea {
len_before != self.elements.len()
}
fn add_element(&mut self, range: Range<usize>) -> u64 {
let id = self.next_element_id();
let elem = TextElement { id, range };
self.elements.push(elem);
self.elements.sort_by_key(|e| e.range.start);
id
}
fn next_element_id(&mut self) -> u64 {
let id = self.next_element_id;
self.next_element_id = self.next_element_id.saturating_add(1);
id
}
fn find_element_containing(&self, pos: usize) -> Option<usize> {
self.elements
.iter()