mirror of
https://github.com/openai/codex.git
synced 2026-04-24 14:45:27 +00:00
feat(tui): add multi-language syntax highlighting via syntect
Replace tree-sitter-bash and tree-sitter-highlight with syntect for code highlighting. Syntect's HighlightLines API resolves overlapping spans internally, significantly simplifying the implementation. - Add syntect workspace dependency, remove tree-sitter-bash/highlight from tui - Rewrite highlight.rs with syntect singletons (SyntaxSet + base16-ocean.dark theme) - Normalize language aliases (js→javascript, py→python, rs→rust, etc.) - Multi-strategy syntax lookup: token, name, case-insensitive name, extension - Add guardrails: skip highlighting for inputs >512KB or >10K lines - Comprehensive tests for style conversion, language resolution, content preservation
This commit is contained in:
95
codex-rs/Cargo.lock
generated
95
codex-rs/Cargo.lock
generated
@@ -852,6 +852,15 @@ version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1"
|
||||
|
||||
[[package]]
|
||||
name = "bincode"
|
||||
version = "1.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bit-set"
|
||||
version = "0.5.3"
|
||||
@@ -2297,6 +2306,7 @@ dependencies = [
|
||||
"strum 0.27.2",
|
||||
"strum_macros 0.27.2",
|
||||
"supports-color 3.0.2",
|
||||
"syntect",
|
||||
"tempfile",
|
||||
"textwrap 0.16.2",
|
||||
"thiserror 2.0.18",
|
||||
@@ -2307,8 +2317,6 @@ dependencies = [
|
||||
"tracing",
|
||||
"tracing-appender",
|
||||
"tracing-subscriber",
|
||||
"tree-sitter-bash",
|
||||
"tree-sitter-highlight",
|
||||
"unicode-segmentation",
|
||||
"unicode-width 0.2.1",
|
||||
"url",
|
||||
@@ -5157,6 +5165,12 @@ dependencies = [
|
||||
"vcpkg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "linked-hash-map"
|
||||
version = "0.5.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f"
|
||||
|
||||
[[package]]
|
||||
name = "linux-keyutils"
|
||||
version = "0.2.4"
|
||||
@@ -5964,6 +5978,28 @@ version = "1.70.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
|
||||
|
||||
[[package]]
|
||||
name = "onig"
|
||||
version = "6.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0"
|
||||
dependencies = [
|
||||
"bitflags 2.10.0",
|
||||
"libc",
|
||||
"once_cell",
|
||||
"onig_sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "onig_sys"
|
||||
version = "69.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c7f86c6eef3d6df15f23bcfb6af487cbd2fed4e5581d58d5bf1f5f8b7f6727dc"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"pkg-config",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opaque-debug"
|
||||
version = "0.3.1"
|
||||
@@ -6381,6 +6417,19 @@ version = "0.3.32"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
|
||||
|
||||
[[package]]
|
||||
name = "plist"
|
||||
version = "1.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "740ebea15c5d1428f910cd1a5f52cebf8d25006245ed8ade92702f4943d91e07"
|
||||
dependencies = [
|
||||
"base64 0.22.1",
|
||||
"indexmap 2.13.0",
|
||||
"quick-xml",
|
||||
"serde",
|
||||
"time",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "png"
|
||||
version = "0.18.0"
|
||||
@@ -8886,6 +8935,27 @@ dependencies = [
|
||||
"syn 2.0.114",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syntect"
|
||||
version = "5.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "656b45c05d95a5704399aeef6bd0ddec7b2b3531b7c9e900abbf7c4d2190c925"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"flate2",
|
||||
"fnv",
|
||||
"once_cell",
|
||||
"onig",
|
||||
"plist",
|
||||
"regex-syntax 0.8.8",
|
||||
"serde",
|
||||
"serde_derive",
|
||||
"serde_json",
|
||||
"thiserror 2.0.18",
|
||||
"walkdir",
|
||||
"yaml-rust",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sys-locale"
|
||||
version = "0.3.2"
|
||||
@@ -9622,18 +9692,6 @@ dependencies = [
|
||||
"tree-sitter-language",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-highlight"
|
||||
version = "0.25.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "adc5f880ad8d8f94e88cb81c3557024cf1a8b75e3b504c50481ed4f5a6006ff3"
|
||||
dependencies = [
|
||||
"regex",
|
||||
"streaming-iterator",
|
||||
"thiserror 2.0.18",
|
||||
"tree-sitter",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-language"
|
||||
version = "0.1.7"
|
||||
@@ -10965,6 +11023,15 @@ dependencies = [
|
||||
"lzma-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "yaml-rust"
|
||||
version = "0.4.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85"
|
||||
dependencies = [
|
||||
"linked-hash-map",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "yansi"
|
||||
version = "1.0.1"
|
||||
|
||||
@@ -277,6 +277,7 @@ tracing-test = "0.2.5"
|
||||
tree-sitter = "0.25.10"
|
||||
tree-sitter-bash = "0.25"
|
||||
tree-sitter-highlight = "0.25.10"
|
||||
syntect = "5"
|
||||
ts-rs = "11"
|
||||
tungstenite = { version = "0.27.0", features = ["deflate", "proxy"] }
|
||||
uds_windows = "1.1.0"
|
||||
|
||||
@@ -93,8 +93,7 @@ toml = { workspace = true }
|
||||
tracing = { workspace = true, features = ["log"] }
|
||||
tracing-appender = { workspace = true }
|
||||
tracing-subscriber = { workspace = true, features = ["env-filter"] }
|
||||
tree-sitter-bash = { workspace = true }
|
||||
tree-sitter-highlight = { workspace = true }
|
||||
syntect = { workspace = true }
|
||||
unicode-segmentation = { workspace = true }
|
||||
unicode-width = { workspace = true }
|
||||
url = { workspace = true }
|
||||
|
||||
@@ -1,154 +1,216 @@
|
||||
use ratatui::style::Color as RtColor;
|
||||
use ratatui::style::Modifier;
|
||||
use ratatui::style::Style;
|
||||
use ratatui::style::Stylize;
|
||||
use ratatui::text::Line;
|
||||
use ratatui::text::Span;
|
||||
use std::sync::OnceLock;
|
||||
use tree_sitter_highlight::Highlight;
|
||||
use tree_sitter_highlight::HighlightConfiguration;
|
||||
use tree_sitter_highlight::HighlightEvent;
|
||||
use tree_sitter_highlight::Highlighter;
|
||||
use syntect::easy::HighlightLines;
|
||||
use syntect::highlighting::FontStyle;
|
||||
use syntect::highlighting::Style as SyntectStyle;
|
||||
use syntect::highlighting::Theme;
|
||||
use syntect::highlighting::ThemeSet;
|
||||
use syntect::parsing::SyntaxReference;
|
||||
use syntect::parsing::SyntaxSet;
|
||||
use syntect::util::LinesWithEndings;
|
||||
|
||||
// Ref: https://github.com/tree-sitter/tree-sitter-bash/blob/master/queries/highlights.scm
|
||||
#[derive(Copy, Clone)]
|
||||
enum BashHighlight {
|
||||
Comment,
|
||||
Constant,
|
||||
Embedded,
|
||||
Function,
|
||||
Keyword,
|
||||
Number,
|
||||
Operator,
|
||||
Property,
|
||||
String,
|
||||
// -- Global singletons -------------------------------------------------------
|
||||
|
||||
static SYNTAX_SET: OnceLock<SyntaxSet> = OnceLock::new();
|
||||
static THEME: OnceLock<Theme> = OnceLock::new();
|
||||
|
||||
fn syntax_set() -> &'static SyntaxSet {
|
||||
SYNTAX_SET.get_or_init(SyntaxSet::load_defaults_newlines)
|
||||
}
|
||||
|
||||
impl BashHighlight {
|
||||
const ALL: [Self; 9] = [
|
||||
Self::Comment,
|
||||
Self::Constant,
|
||||
Self::Embedded,
|
||||
Self::Function,
|
||||
Self::Keyword,
|
||||
Self::Number,
|
||||
Self::Operator,
|
||||
Self::Property,
|
||||
Self::String,
|
||||
];
|
||||
|
||||
const fn as_str(self) -> &'static str {
|
||||
match self {
|
||||
Self::Comment => "comment",
|
||||
Self::Constant => "constant",
|
||||
Self::Embedded => "embedded",
|
||||
Self::Function => "function",
|
||||
Self::Keyword => "keyword",
|
||||
Self::Number => "number",
|
||||
Self::Operator => "operator",
|
||||
Self::Property => "property",
|
||||
Self::String => "string",
|
||||
}
|
||||
}
|
||||
|
||||
fn style(self) -> Style {
|
||||
match self {
|
||||
Self::Comment | Self::Operator | Self::String => Style::default().dim(),
|
||||
_ => Style::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static HIGHLIGHT_CONFIG: OnceLock<HighlightConfiguration> = OnceLock::new();
|
||||
|
||||
fn highlight_names() -> &'static [&'static str] {
|
||||
static NAMES: OnceLock<[&'static str; BashHighlight::ALL.len()]> = OnceLock::new();
|
||||
NAMES
|
||||
.get_or_init(|| BashHighlight::ALL.map(BashHighlight::as_str))
|
||||
.as_slice()
|
||||
}
|
||||
|
||||
fn highlight_config() -> &'static HighlightConfiguration {
|
||||
HIGHLIGHT_CONFIG.get_or_init(|| {
|
||||
let language = tree_sitter_bash::LANGUAGE.into();
|
||||
#[expect(clippy::expect_used)]
|
||||
let mut config = HighlightConfiguration::new(
|
||||
language,
|
||||
"bash",
|
||||
tree_sitter_bash::HIGHLIGHT_QUERY,
|
||||
"",
|
||||
"",
|
||||
)
|
||||
.expect("load bash highlight query");
|
||||
config.configure(highlight_names());
|
||||
config
|
||||
fn theme() -> &'static Theme {
|
||||
THEME.get_or_init(|| {
|
||||
let ts = ThemeSet::load_defaults();
|
||||
ts.themes["base16-ocean.dark"].clone()
|
||||
})
|
||||
}
|
||||
|
||||
fn highlight_for(highlight: Highlight) -> BashHighlight {
|
||||
BashHighlight::ALL[highlight.0]
|
||||
}
|
||||
// -- Language normalization ---------------------------------------------------
|
||||
|
||||
fn push_segment(lines: &mut Vec<Line<'static>>, segment: &str, style: Option<Style>) {
|
||||
for (i, part) in segment.split('\n').enumerate() {
|
||||
if i > 0 {
|
||||
lines.push(Line::from(""));
|
||||
}
|
||||
if part.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let span = match style {
|
||||
Some(style) => Span::styled(part.to_string(), style),
|
||||
None => part.to_string().into(),
|
||||
};
|
||||
if let Some(last) = lines.last_mut() {
|
||||
last.spans.push(span);
|
||||
}
|
||||
/// Normalize common language aliases to canonical names that syntect can
|
||||
/// resolve via name or extension lookup.
|
||||
fn normalize_lang(lang: &str) -> &str {
|
||||
match lang {
|
||||
"js" | "jsx" => "javascript",
|
||||
"ts" => "typescript",
|
||||
"tsx" => "tsx",
|
||||
"py" | "python3" => "python",
|
||||
"rb" => "ruby",
|
||||
"rs" => "rust",
|
||||
"go" | "golang" => "go",
|
||||
"c" | "h" => "c",
|
||||
"c++" | "cc" | "cxx" | "hpp" | "hxx" | "hh" => "cpp",
|
||||
"yml" => "yaml",
|
||||
"sh" | "zsh" | "shell" | "bash" => "bash",
|
||||
"kt" => "kotlin",
|
||||
"md" => "markdown",
|
||||
"sql" => "sql",
|
||||
"lua" => "lua",
|
||||
"zig" => "zig",
|
||||
"swift" => "swift",
|
||||
"java" => "java",
|
||||
other => other,
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert a bash script into per-line styled content using tree-sitter's
|
||||
/// bash highlight query. The highlighter is streamed so multi-line content is
|
||||
/// split into `Line`s while preserving style boundaries.
|
||||
pub(crate) fn highlight_bash_to_lines(script: &str) -> Vec<Line<'static>> {
|
||||
let mut highlighter = Highlighter::new();
|
||||
let iterator =
|
||||
match highlighter.highlight(highlight_config(), script.as_bytes(), None, |_| None) {
|
||||
Ok(iter) => iter,
|
||||
Err(_) => return vec![script.to_string().into()],
|
||||
};
|
||||
// -- Style conversion (syntect -> ratatui) ------------------------------------
|
||||
|
||||
let mut lines: Vec<Line<'static>> = vec![Line::from("")];
|
||||
let mut highlight_stack: Vec<Highlight> = Vec::new();
|
||||
/// Convert a syntect `Style` to a ratatui `Style`.
|
||||
///
|
||||
/// Syntax highlighting themes inherently produce RGB colors, so we allow
|
||||
/// `Color::Rgb` here despite the project-wide preference for ANSI colors.
|
||||
#[allow(clippy::disallowed_methods)]
|
||||
fn convert_style(syn_style: SyntectStyle) -> Style {
|
||||
let mut rt_style = Style::default();
|
||||
|
||||
for event in iterator {
|
||||
match event {
|
||||
Ok(HighlightEvent::HighlightStart(highlight)) => highlight_stack.push(highlight),
|
||||
Ok(HighlightEvent::HighlightEnd) => {
|
||||
highlight_stack.pop();
|
||||
}
|
||||
Ok(HighlightEvent::Source { start, end }) => {
|
||||
if start == end {
|
||||
continue;
|
||||
}
|
||||
let style = highlight_stack.last().map(|h| highlight_for(*h).style());
|
||||
push_segment(&mut lines, &script[start..end], style);
|
||||
}
|
||||
Err(_) => return vec![script.to_string().into()],
|
||||
}
|
||||
// Map foreground color when visible.
|
||||
let fg = syn_style.foreground;
|
||||
if fg.a > 0 {
|
||||
rt_style = rt_style.fg(RtColor::Rgb(fg.r, fg.g, fg.b));
|
||||
}
|
||||
// Intentionally skip background to avoid overwriting terminal bg.
|
||||
|
||||
if syn_style.font_style.contains(FontStyle::BOLD) {
|
||||
rt_style.add_modifier |= Modifier::BOLD;
|
||||
}
|
||||
if syn_style.font_style.contains(FontStyle::ITALIC) {
|
||||
rt_style.add_modifier |= Modifier::ITALIC;
|
||||
}
|
||||
if syn_style.font_style.contains(FontStyle::UNDERLINE) {
|
||||
rt_style.add_modifier |= Modifier::UNDERLINED;
|
||||
}
|
||||
|
||||
if lines.is_empty() {
|
||||
vec![Line::from("")]
|
||||
rt_style
|
||||
}
|
||||
|
||||
// -- Syntax lookup ------------------------------------------------------------
|
||||
|
||||
/// Try to find a syntect `SyntaxReference` for the given language identifier.
|
||||
///
|
||||
/// Resolution order:
|
||||
/// 1. By token (matches against file_extensions case-insensitively).
|
||||
/// 2. By exact syntax name (e.g. "Rust", "Python").
|
||||
/// 3. By case-insensitive syntax name (e.g. "rust" -> "Rust").
|
||||
/// 4. By raw (un-normalized) input as file extension.
|
||||
fn find_syntax(lang: &str) -> Option<&'static SyntaxReference> {
|
||||
let ss = syntax_set();
|
||||
let normalized = normalize_lang(lang);
|
||||
|
||||
// Try by token (matches against file_extensions case-insensitively).
|
||||
if let Some(s) = ss.find_syntax_by_token(normalized) {
|
||||
return Some(s);
|
||||
}
|
||||
// Try by exact syntax name (e.g. "Rust", "Python").
|
||||
if let Some(s) = ss.find_syntax_by_name(normalized) {
|
||||
return Some(s);
|
||||
}
|
||||
// Try case-insensitive name match (e.g. "rust" -> "Rust").
|
||||
let lower = normalized.to_ascii_lowercase();
|
||||
if let Some(s) = ss
|
||||
.syntaxes()
|
||||
.iter()
|
||||
.find(|s| s.name.to_ascii_lowercase() == lower)
|
||||
{
|
||||
return Some(s);
|
||||
}
|
||||
// Try raw (un-normalized) input as file extension.
|
||||
if let Some(s) = ss.find_syntax_by_extension(lang) {
|
||||
return Some(s);
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
// -- Guardrail constants ------------------------------------------------------
|
||||
|
||||
/// Skip highlighting for inputs larger than 512 KB to avoid excessive memory
|
||||
/// and CPU usage. Callers fall back to plain unstyled text.
|
||||
const MAX_HIGHLIGHT_BYTES: usize = 512 * 1024;
|
||||
|
||||
/// Skip highlighting for inputs with more than 10,000 lines.
|
||||
const MAX_HIGHLIGHT_LINES: usize = 10_000;
|
||||
|
||||
// -- Core highlighting --------------------------------------------------------
|
||||
|
||||
/// Parse `code` using syntect for `lang` and return per-line styled spans.
|
||||
/// Each inner Vec represents one source line. Returns None when the language
|
||||
/// is not recognized or the input exceeds safety limits.
|
||||
fn highlight_to_line_spans(code: &str, lang: &str) -> Option<Vec<Vec<Span<'static>>>> {
|
||||
// Empty input has nothing to highlight; fall back to the plain text path
|
||||
// which correctly produces a single empty Line.
|
||||
if code.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Bail out early for oversized inputs to avoid excessive resource usage.
|
||||
if code.len() > MAX_HIGHLIGHT_BYTES
|
||||
|| code.as_bytes().iter().filter(|&&b| b == b'\n').count() > MAX_HIGHLIGHT_LINES
|
||||
{
|
||||
return None;
|
||||
}
|
||||
|
||||
let syntax = find_syntax(lang)?;
|
||||
let mut h = HighlightLines::new(syntax, theme());
|
||||
let mut lines: Vec<Vec<Span<'static>>> = Vec::new();
|
||||
|
||||
for line in LinesWithEndings::from(code) {
|
||||
let ranges = h.highlight_line(line, syntax_set()).ok()?;
|
||||
let mut spans: Vec<Span<'static>> = Vec::new();
|
||||
for (style, text) in ranges {
|
||||
// Strip trailing newline since we handle line breaks ourselves.
|
||||
let text = text.trim_end_matches('\n');
|
||||
if text.is_empty() {
|
||||
continue;
|
||||
}
|
||||
spans.push(Span::styled(text.to_string(), convert_style(style)));
|
||||
}
|
||||
if spans.is_empty() {
|
||||
spans.push(Span::raw(String::new()));
|
||||
}
|
||||
lines.push(spans);
|
||||
}
|
||||
|
||||
Some(lines)
|
||||
}
|
||||
|
||||
// -- Public API ---------------------------------------------------------------
|
||||
|
||||
/// Highlight code in any supported language, returning styled ratatui Lines.
|
||||
/// Falls back to plain unstyled text when the language is not recognized.
|
||||
pub(crate) fn highlight_code_to_lines(code: &str, lang: &str) -> Vec<Line<'static>> {
|
||||
if let Some(line_spans) = highlight_to_line_spans(code, lang) {
|
||||
line_spans.into_iter().map(Line::from).collect()
|
||||
} else {
|
||||
lines
|
||||
// Fallback: plain text, one Line per source line.
|
||||
code.split('\n')
|
||||
.map(|l| Line::from(l.to_string()))
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
/// Backward-compatible wrapper for bash highlighting used by exec cells.
|
||||
pub(crate) fn highlight_bash_to_lines(script: &str) -> Vec<Line<'static>> {
|
||||
highlight_code_to_lines(script, "bash")
|
||||
}
|
||||
|
||||
/// Highlight code and return per-line styled spans for diff integration.
|
||||
/// Returns None if the language is unsupported.
|
||||
pub(crate) fn highlight_code_to_styled_spans(
|
||||
code: &str,
|
||||
lang: &str,
|
||||
) -> Option<Vec<Vec<Span<'static>>>> {
|
||||
highlight_to_line_spans(code, lang)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
use ratatui::style::Modifier;
|
||||
|
||||
/// Reconstruct plain text from highlighted Lines.
|
||||
fn reconstructed(lines: &[Line<'static>]) -> String {
|
||||
lines
|
||||
.iter()
|
||||
@@ -162,75 +224,207 @@ mod tests {
|
||||
.join("\n")
|
||||
}
|
||||
|
||||
fn dimmed_tokens(lines: &[Line<'static>]) -> Vec<String> {
|
||||
lines
|
||||
.iter()
|
||||
.flat_map(|l| l.spans.iter())
|
||||
.filter(|sp| sp.style.add_modifier.contains(Modifier::DIM))
|
||||
.map(|sp| sp.content.clone().into_owned())
|
||||
.map(|token| token.trim().to_string())
|
||||
.filter(|token| !token.is_empty())
|
||||
.collect()
|
||||
#[test]
|
||||
fn highlight_rust_has_keyword_style() {
|
||||
let code = "fn main() {}";
|
||||
let lines = highlight_code_to_lines(code, "rust");
|
||||
assert_eq!(reconstructed(&lines), code);
|
||||
|
||||
// The `fn` keyword should have a non-default style (some color).
|
||||
let fn_span = lines[0].spans.iter().find(|sp| sp.content.as_ref() == "fn");
|
||||
assert!(fn_span.is_some(), "expected a span containing 'fn'");
|
||||
let style = fn_span.map(|s| s.style).unwrap_or_default();
|
||||
assert!(
|
||||
style.fg.is_some() || style.add_modifier != Modifier::empty(),
|
||||
"expected fn keyword to have non-default style, got {style:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dims_expected_bash_operators() {
|
||||
let s = "echo foo && bar || baz | qux & (echo hi)";
|
||||
let lines = highlight_bash_to_lines(s);
|
||||
assert_eq!(reconstructed(&lines), s);
|
||||
|
||||
let dimmed = dimmed_tokens(&lines);
|
||||
assert!(dimmed.contains(&"&&".to_string()));
|
||||
assert!(dimmed.contains(&"|".to_string()));
|
||||
assert!(!dimmed.contains(&"echo".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dims_redirects_and_strings() {
|
||||
let s = "echo \"hi\" > out.txt; echo 'ok'";
|
||||
let lines = highlight_bash_to_lines(s);
|
||||
assert_eq!(reconstructed(&lines), s);
|
||||
|
||||
let dimmed = dimmed_tokens(&lines);
|
||||
assert!(dimmed.contains(&">".to_string()));
|
||||
assert!(dimmed.contains(&"\"hi\"".to_string()));
|
||||
assert!(dimmed.contains(&"'ok'".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn highlights_command_and_strings() {
|
||||
let s = "echo \"hi\"";
|
||||
let lines = highlight_bash_to_lines(s);
|
||||
let mut echo_style = None;
|
||||
let mut string_style = None;
|
||||
for span in &lines[0].spans {
|
||||
let text = span.content.as_ref();
|
||||
if text == "echo" {
|
||||
echo_style = Some(span.style);
|
||||
}
|
||||
if text == "\"hi\"" {
|
||||
string_style = Some(span.style);
|
||||
fn highlight_unknown_lang_falls_back() {
|
||||
let code = "some random text";
|
||||
let lines = highlight_code_to_lines(code, "xyzlang");
|
||||
assert_eq!(reconstructed(&lines), code);
|
||||
// Should be plain text with no styling.
|
||||
for line in &lines {
|
||||
for span in &line.spans {
|
||||
assert_eq!(
|
||||
span.style,
|
||||
Style::default(),
|
||||
"expected default style for unknown language"
|
||||
);
|
||||
}
|
||||
}
|
||||
let echo_style = echo_style.expect("echo span missing");
|
||||
let string_style = string_style.expect("string span missing");
|
||||
assert!(echo_style.fg.is_none());
|
||||
assert!(!echo_style.add_modifier.contains(Modifier::DIM));
|
||||
assert!(string_style.add_modifier.contains(Modifier::DIM));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn highlights_heredoc_body_as_string() {
|
||||
let s = "cat <<EOF\nheredoc body\nEOF";
|
||||
let lines = highlight_bash_to_lines(s);
|
||||
let body_line = &lines[1];
|
||||
let mut body_style = None;
|
||||
for span in &body_line.spans {
|
||||
if span.content.as_ref() == "heredoc body" {
|
||||
body_style = Some(span.style);
|
||||
}
|
||||
fn highlight_empty_string() {
|
||||
let lines = highlight_code_to_lines("", "rust");
|
||||
assert_eq!(lines.len(), 1);
|
||||
assert_eq!(reconstructed(&lines), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn highlight_bash_preserves_content() {
|
||||
let script = "echo \"hello world\" && ls -la | grep foo";
|
||||
let lines = highlight_bash_to_lines(script);
|
||||
assert_eq!(reconstructed(&lines), script);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_lang_aliases() {
|
||||
assert_eq!(normalize_lang("js"), "javascript");
|
||||
assert_eq!(normalize_lang("jsx"), "javascript");
|
||||
assert_eq!(normalize_lang("ts"), "typescript");
|
||||
assert_eq!(normalize_lang("py"), "python");
|
||||
assert_eq!(normalize_lang("rb"), "ruby");
|
||||
assert_eq!(normalize_lang("rs"), "rust");
|
||||
assert_eq!(normalize_lang("c++"), "cpp");
|
||||
assert_eq!(normalize_lang("cc"), "cpp");
|
||||
assert_eq!(normalize_lang("yml"), "yaml");
|
||||
assert_eq!(normalize_lang("sh"), "bash");
|
||||
assert_eq!(normalize_lang("zsh"), "bash");
|
||||
assert_eq!(normalize_lang("shell"), "bash");
|
||||
assert_eq!(normalize_lang("kt"), "kotlin");
|
||||
assert_eq!(normalize_lang("md"), "markdown");
|
||||
assert_eq!(normalize_lang("rust"), "rust");
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(clippy::disallowed_methods)]
|
||||
fn style_conversion_correctness() {
|
||||
let syn = SyntectStyle {
|
||||
foreground: syntect::highlighting::Color {
|
||||
r: 255,
|
||||
g: 128,
|
||||
b: 0,
|
||||
a: 255,
|
||||
},
|
||||
background: syntect::highlighting::Color {
|
||||
r: 0,
|
||||
g: 0,
|
||||
b: 0,
|
||||
a: 255,
|
||||
},
|
||||
font_style: FontStyle::BOLD | FontStyle::ITALIC,
|
||||
};
|
||||
let rt = convert_style(syn);
|
||||
assert_eq!(rt.fg, Some(RtColor::Rgb(255, 128, 0)));
|
||||
// Background is intentionally skipped.
|
||||
assert_eq!(rt.bg, None);
|
||||
assert!(rt.add_modifier.contains(Modifier::BOLD));
|
||||
assert!(rt.add_modifier.contains(Modifier::ITALIC));
|
||||
assert!(!rt.add_modifier.contains(Modifier::UNDERLINED));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn highlight_multiline_python() {
|
||||
let code = "def hello():\n print(\"hi\")\n return 42";
|
||||
let lines = highlight_code_to_lines(code, "python");
|
||||
assert_eq!(reconstructed(&lines), code);
|
||||
assert_eq!(lines.len(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn highlight_code_to_styled_spans_returns_none_for_unknown() {
|
||||
assert!(highlight_code_to_styled_spans("x", "xyzlang").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn highlight_code_to_styled_spans_returns_some_for_known() {
|
||||
let result = highlight_code_to_styled_spans("let x = 1;", "rust");
|
||||
assert!(result.is_some());
|
||||
let spans = result.unwrap_or_default();
|
||||
assert!(!spans.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn highlight_markdown_preserves_content() {
|
||||
let code = "```sh\nprintf 'fenced within fenced\\n'\n```";
|
||||
let lines = highlight_code_to_lines(code, "markdown");
|
||||
let result = reconstructed(&lines);
|
||||
assert_eq!(
|
||||
result, code,
|
||||
"markdown highlighting must preserve content exactly"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn highlight_large_input_falls_back() {
|
||||
// Input exceeding MAX_HIGHLIGHT_BYTES should return None (plain text
|
||||
// fallback) rather than attempting to parse.
|
||||
let big = "x".repeat(MAX_HIGHLIGHT_BYTES + 1);
|
||||
let result = highlight_code_to_styled_spans(&big, "rust");
|
||||
assert!(result.is_none(), "oversized input should fall back to None");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn highlight_many_lines_falls_back() {
|
||||
// Input exceeding MAX_HIGHLIGHT_LINES should return None.
|
||||
let many_lines = "let x = 1;\n".repeat(MAX_HIGHLIGHT_LINES + 1);
|
||||
let result = highlight_code_to_styled_spans(&many_lines, "rust");
|
||||
assert!(result.is_none(), "too many lines should fall back to None");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_lang_new_aliases() {
|
||||
assert_eq!(normalize_lang("go"), "go");
|
||||
assert_eq!(normalize_lang("golang"), "go");
|
||||
assert_eq!(normalize_lang("c"), "c");
|
||||
assert_eq!(normalize_lang("h"), "c");
|
||||
assert_eq!(normalize_lang("hpp"), "cpp");
|
||||
assert_eq!(normalize_lang("hxx"), "cpp");
|
||||
assert_eq!(normalize_lang("hh"), "cpp");
|
||||
assert_eq!(normalize_lang("tsx"), "tsx");
|
||||
assert_eq!(normalize_lang("sql"), "sql");
|
||||
assert_eq!(normalize_lang("lua"), "lua");
|
||||
assert_eq!(normalize_lang("zig"), "zig");
|
||||
assert_eq!(normalize_lang("swift"), "swift");
|
||||
assert_eq!(normalize_lang("java"), "java");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn find_syntax_resolves_all_canonical_languages() {
|
||||
// Every canonical name that normalize_lang produces AND that syntect's
|
||||
// default syntax set supports must resolve. Note: syntect's defaults
|
||||
// do NOT include TypeScript, TSX, Kotlin, Swift, or Zig, so those are
|
||||
// intentionally omitted here (they gracefully fall back to plain text).
|
||||
let canonical = [
|
||||
"javascript",
|
||||
"python",
|
||||
"ruby",
|
||||
"rust",
|
||||
"go",
|
||||
"c",
|
||||
"cpp",
|
||||
"yaml",
|
||||
"bash",
|
||||
"markdown",
|
||||
"sql",
|
||||
"lua",
|
||||
"java",
|
||||
];
|
||||
for lang in canonical {
|
||||
assert!(
|
||||
find_syntax(lang).is_some(),
|
||||
"find_syntax({lang:?}) returned None — syntect cannot resolve this canonical name"
|
||||
);
|
||||
}
|
||||
// Also verify common raw extensions resolve.
|
||||
let extensions = ["rs", "py", "js", "rb", "go", "sh", "md", "yml"];
|
||||
for ext in extensions {
|
||||
assert!(
|
||||
find_syntax(ext).is_some(),
|
||||
"find_syntax({ext:?}) returned None — extension lookup failed"
|
||||
);
|
||||
}
|
||||
// Unsupported languages should return None (graceful fallback).
|
||||
let unsupported = ["typescript", "tsx", "kotlin", "swift", "zig"];
|
||||
for lang in unsupported {
|
||||
assert!(
|
||||
find_syntax(lang).is_none(),
|
||||
"find_syntax({lang:?}) unexpectedly returned Some — update test if syntect added support"
|
||||
);
|
||||
}
|
||||
let body_style = body_style.expect("missing heredoc span");
|
||||
assert!(body_style.add_modifier.contains(Modifier::DIM));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user