Files
codex/prs/bolinfest/study/PR-1467-study.md
2025-09-02 15:17:45 -07:00

3.8 KiB
Raw Blame History

DOs

  • Compute Byte Offsets From Char Cursor: Convert a char-based cursor column to a safe byte offset before slicing.
let cursor_byte = line.chars().take(col).map(|c| c.len_utf8()).sum::<usize>();
let before = &line[..cursor_byte];
let after  = &line[cursor_byte..];
  • Find Boundaries With char_indices + is_whitespace: Use Unicode-aware whitespace and advance by the found chars byte length.
let start = before
    .char_indices()
    .rfind(|(_, c)| c.is_whitespace())
    .map(|(i, c)| i + c.len_utf8())
    .unwrap_or(0);

let end_rel = after
    .char_indices()
    .find(|(_, c)| c.is_whitespace())
    .map(|(i, _)| i)
    .unwrap_or(after.len());

let end = cursor_byte + end_rel;
  • Honor All Unicode Whitespace: Treat tabs and full-width spaces as token boundaries.
let full_width_space = '\u{3000}'; // IDEOGRAPHIC SPACE
assert!(full_width_space.is_whitespace());
  • Early-Return On Invalid Spans: Bail out if the computed range is empty or not an @-token.
if start >= end { return None; }
let token = &line[start..end];
if !token.starts_with('@') || token == "@" { return None; }
  • Replace Using Byte Slices And Inline Formatting: Build the new line with the computed byte indices and captured identifiers.
let new_line = format!("{}{} {}", &line[..start], replacement, &line[end..]);
  • Use Captured Identifiers In Messages: Prefer inline names in assert!/format! rather than positional args.
assert_eq!(result, expected, "Failed: {description} - input: '{input}', cursor: {cursor_pos}");
  • Test ASCII + Unicode Thoroughly: Cover mixed scripts, emoji, tabs, and full-width spaces.
let cases = vec![
    ("@İstanbul", 3, Some("İstanbul".to_string())),
    ("test\u{3000}@file", 6, Some("file".to_string())),
    ("aaa@aaa", 4, None),
];
for (input, cursor_pos, expected) in cases {
    let mut ta = tui_textarea::TextArea::default();
    ta.insert_str(input);
    ta.move_cursor(tui_textarea::CursorMove::Jump(0, cursor_pos));
    let result = ChatComposer::current_at_token(&ta);
    assert_eq!(result, expected, "input='{input}', cursor={cursor_pos}");
}

DONTs

  • Dont Mix Char Columns With Byte Indices: Never clamp a char column with line.len() (bytes).
// wrong
let col = col.min(line.len());

// right
let cursor_byte = line.chars().take(col).map(|c| c.len_utf8()).sum::<usize>();
  • Dont Add A Fixed +1 For Multibyte Chars: Advance by the matched chars byte length.
// wrong
let start = before.rfind(|c: char| c.is_whitespace()).map(|i| i + 1).unwrap_or(0);

// right
let start = before.char_indices().rfind(|(_, c)| c.is_whitespace())
    .map(|(i, c)| i + c.len_utf8()).unwrap_or(0);
  • Dont Assume ASCII Space Only: Searching for ' ' misses tabs and CJK spaces.
// wrong
let end_rel = after.find(' ').unwrap_or(after.len());

// right
let end_rel = after.char_indices()
    .find(|(_, c)| c.is_whitespace())
    .map(|(i, _)| i)
    .unwrap_or(after.len());
  • Dont Slice With A Char Index: Convert to a byte offset first.
// wrong
let before = &line[..col]; // col is chars, not bytes

// right
let cursor_byte = line.chars().take(col).map(|c| c.len_utf8()).sum::<usize>();
let before = &line[..cursor_byte];
  • Dont Treat Connected Tokens As Mentions: Require a boundary before @.
use tui_textarea::{TextArea, CursorMove};
let mut ta = TextArea::default();
ta.insert_str("aaa@aaa");
ta.move_cursor(CursorMove::Jump(0, 4));
assert_eq!(ChatComposer::current_at_token(&ta), None);
  • Dont Use Positional Formatting When Capture Works: Prefer {name} over {} with args.
// wrong
assert_eq!(result, expected, "Failed: {} - '{}'", description, input);

// right
assert_eq!(result, expected, "Failed: {description} - '{input}'");