feat(tui): add multi-language syntax highlighting via syntect

Replace tree-sitter-bash and tree-sitter-highlight with syntect for
code highlighting. Syntect's HighlightLines API resolves overlapping
spans internally, significantly simplifying the implementation.

- Add syntect workspace dependency, remove tree-sitter-bash/highlight from tui
- Rewrite highlight.rs with syntect singletons (SyntaxSet + base16-ocean.dark theme)
- Normalize language aliases (js→javascript, py→python, rs→rust, etc.)
- Multi-strategy syntax lookup: token, name, case-insensitive name, extension
- Add guardrails: skip highlighting for inputs >512KB or >10K lines
- Comprehensive tests for style conversion, language resolution, content preservation
This commit is contained in:
Felipe Coury
2026-02-08 21:05:49 -03:00
parent b17148f13a
commit 776e4b0aa8
4 changed files with 462 additions and 201 deletions

95
codex-rs/Cargo.lock generated
View File

@@ -852,6 +852,15 @@ version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1"
[[package]]
name = "bincode"
version = "1.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad"
dependencies = [
"serde",
]
[[package]]
name = "bit-set"
version = "0.5.3"
@@ -2297,6 +2306,7 @@ dependencies = [
"strum 0.27.2",
"strum_macros 0.27.2",
"supports-color 3.0.2",
"syntect",
"tempfile",
"textwrap 0.16.2",
"thiserror 2.0.18",
@@ -2307,8 +2317,6 @@ dependencies = [
"tracing",
"tracing-appender",
"tracing-subscriber",
"tree-sitter-bash",
"tree-sitter-highlight",
"unicode-segmentation",
"unicode-width 0.2.1",
"url",
@@ -5157,6 +5165,12 @@ dependencies = [
"vcpkg",
]
[[package]]
name = "linked-hash-map"
version = "0.5.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f"
[[package]]
name = "linux-keyutils"
version = "0.2.4"
@@ -5964,6 +5978,28 @@ version = "1.70.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
[[package]]
name = "onig"
version = "6.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0"
dependencies = [
"bitflags 2.10.0",
"libc",
"once_cell",
"onig_sys",
]
[[package]]
name = "onig_sys"
version = "69.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7f86c6eef3d6df15f23bcfb6af487cbd2fed4e5581d58d5bf1f5f8b7f6727dc"
dependencies = [
"cc",
"pkg-config",
]
[[package]]
name = "opaque-debug"
version = "0.3.1"
@@ -6381,6 +6417,19 @@ version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
[[package]]
name = "plist"
version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "740ebea15c5d1428f910cd1a5f52cebf8d25006245ed8ade92702f4943d91e07"
dependencies = [
"base64 0.22.1",
"indexmap 2.13.0",
"quick-xml",
"serde",
"time",
]
[[package]]
name = "png"
version = "0.18.0"
@@ -8886,6 +8935,27 @@ dependencies = [
"syn 2.0.114",
]
[[package]]
name = "syntect"
version = "5.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "656b45c05d95a5704399aeef6bd0ddec7b2b3531b7c9e900abbf7c4d2190c925"
dependencies = [
"bincode",
"flate2",
"fnv",
"once_cell",
"onig",
"plist",
"regex-syntax 0.8.8",
"serde",
"serde_derive",
"serde_json",
"thiserror 2.0.18",
"walkdir",
"yaml-rust",
]
[[package]]
name = "sys-locale"
version = "0.3.2"
@@ -9622,18 +9692,6 @@ dependencies = [
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-highlight"
version = "0.25.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "adc5f880ad8d8f94e88cb81c3557024cf1a8b75e3b504c50481ed4f5a6006ff3"
dependencies = [
"regex",
"streaming-iterator",
"thiserror 2.0.18",
"tree-sitter",
]
[[package]]
name = "tree-sitter-language"
version = "0.1.7"
@@ -10965,6 +11023,15 @@ dependencies = [
"lzma-sys",
]
[[package]]
name = "yaml-rust"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85"
dependencies = [
"linked-hash-map",
]
[[package]]
name = "yansi"
version = "1.0.1"

View File

@@ -277,6 +277,7 @@ tracing-test = "0.2.5"
tree-sitter = "0.25.10"
tree-sitter-bash = "0.25"
tree-sitter-highlight = "0.25.10"
syntect = "5"
ts-rs = "11"
tungstenite = { version = "0.27.0", features = ["deflate", "proxy"] }
uds_windows = "1.1.0"

View File

@@ -93,8 +93,7 @@ toml = { workspace = true }
tracing = { workspace = true, features = ["log"] }
tracing-appender = { workspace = true }
tracing-subscriber = { workspace = true, features = ["env-filter"] }
tree-sitter-bash = { workspace = true }
tree-sitter-highlight = { workspace = true }
syntect = { workspace = true }
unicode-segmentation = { workspace = true }
unicode-width = { workspace = true }
url = { workspace = true }

View File

@@ -1,154 +1,216 @@
use ratatui::style::Color as RtColor;
use ratatui::style::Modifier;
use ratatui::style::Style;
use ratatui::style::Stylize;
use ratatui::text::Line;
use ratatui::text::Span;
use std::sync::OnceLock;
use tree_sitter_highlight::Highlight;
use tree_sitter_highlight::HighlightConfiguration;
use tree_sitter_highlight::HighlightEvent;
use tree_sitter_highlight::Highlighter;
use syntect::easy::HighlightLines;
use syntect::highlighting::FontStyle;
use syntect::highlighting::Style as SyntectStyle;
use syntect::highlighting::Theme;
use syntect::highlighting::ThemeSet;
use syntect::parsing::SyntaxReference;
use syntect::parsing::SyntaxSet;
use syntect::util::LinesWithEndings;
// Ref: https://github.com/tree-sitter/tree-sitter-bash/blob/master/queries/highlights.scm
#[derive(Copy, Clone)]
enum BashHighlight {
Comment,
Constant,
Embedded,
Function,
Keyword,
Number,
Operator,
Property,
String,
// -- Global singletons -------------------------------------------------------
static SYNTAX_SET: OnceLock<SyntaxSet> = OnceLock::new();
static THEME: OnceLock<Theme> = OnceLock::new();
fn syntax_set() -> &'static SyntaxSet {
SYNTAX_SET.get_or_init(SyntaxSet::load_defaults_newlines)
}
impl BashHighlight {
const ALL: [Self; 9] = [
Self::Comment,
Self::Constant,
Self::Embedded,
Self::Function,
Self::Keyword,
Self::Number,
Self::Operator,
Self::Property,
Self::String,
];
const fn as_str(self) -> &'static str {
match self {
Self::Comment => "comment",
Self::Constant => "constant",
Self::Embedded => "embedded",
Self::Function => "function",
Self::Keyword => "keyword",
Self::Number => "number",
Self::Operator => "operator",
Self::Property => "property",
Self::String => "string",
}
}
fn style(self) -> Style {
match self {
Self::Comment | Self::Operator | Self::String => Style::default().dim(),
_ => Style::default(),
}
}
}
static HIGHLIGHT_CONFIG: OnceLock<HighlightConfiguration> = OnceLock::new();
fn highlight_names() -> &'static [&'static str] {
static NAMES: OnceLock<[&'static str; BashHighlight::ALL.len()]> = OnceLock::new();
NAMES
.get_or_init(|| BashHighlight::ALL.map(BashHighlight::as_str))
.as_slice()
}
fn highlight_config() -> &'static HighlightConfiguration {
HIGHLIGHT_CONFIG.get_or_init(|| {
let language = tree_sitter_bash::LANGUAGE.into();
#[expect(clippy::expect_used)]
let mut config = HighlightConfiguration::new(
language,
"bash",
tree_sitter_bash::HIGHLIGHT_QUERY,
"",
"",
)
.expect("load bash highlight query");
config.configure(highlight_names());
config
fn theme() -> &'static Theme {
THEME.get_or_init(|| {
let ts = ThemeSet::load_defaults();
ts.themes["base16-ocean.dark"].clone()
})
}
fn highlight_for(highlight: Highlight) -> BashHighlight {
BashHighlight::ALL[highlight.0]
}
// -- Language normalization ---------------------------------------------------
fn push_segment(lines: &mut Vec<Line<'static>>, segment: &str, style: Option<Style>) {
for (i, part) in segment.split('\n').enumerate() {
if i > 0 {
lines.push(Line::from(""));
}
if part.is_empty() {
continue;
}
let span = match style {
Some(style) => Span::styled(part.to_string(), style),
None => part.to_string().into(),
};
if let Some(last) = lines.last_mut() {
last.spans.push(span);
}
/// Normalize common language aliases to canonical names that syntect can
/// resolve via name or extension lookup.
fn normalize_lang(lang: &str) -> &str {
match lang {
"js" | "jsx" => "javascript",
"ts" => "typescript",
"tsx" => "tsx",
"py" | "python3" => "python",
"rb" => "ruby",
"rs" => "rust",
"go" | "golang" => "go",
"c" | "h" => "c",
"c++" | "cc" | "cxx" | "hpp" | "hxx" | "hh" => "cpp",
"yml" => "yaml",
"sh" | "zsh" | "shell" | "bash" => "bash",
"kt" => "kotlin",
"md" => "markdown",
"sql" => "sql",
"lua" => "lua",
"zig" => "zig",
"swift" => "swift",
"java" => "java",
other => other,
}
}
/// Convert a bash script into per-line styled content using tree-sitter's
/// bash highlight query. The highlighter is streamed so multi-line content is
/// split into `Line`s while preserving style boundaries.
pub(crate) fn highlight_bash_to_lines(script: &str) -> Vec<Line<'static>> {
let mut highlighter = Highlighter::new();
let iterator =
match highlighter.highlight(highlight_config(), script.as_bytes(), None, |_| None) {
Ok(iter) => iter,
Err(_) => return vec![script.to_string().into()],
};
// -- Style conversion (syntect -> ratatui) ------------------------------------
let mut lines: Vec<Line<'static>> = vec![Line::from("")];
let mut highlight_stack: Vec<Highlight> = Vec::new();
/// Convert a syntect `Style` to a ratatui `Style`.
///
/// Syntax highlighting themes inherently produce RGB colors, so we allow
/// `Color::Rgb` here despite the project-wide preference for ANSI colors.
#[allow(clippy::disallowed_methods)]
fn convert_style(syn_style: SyntectStyle) -> Style {
let mut rt_style = Style::default();
for event in iterator {
match event {
Ok(HighlightEvent::HighlightStart(highlight)) => highlight_stack.push(highlight),
Ok(HighlightEvent::HighlightEnd) => {
highlight_stack.pop();
}
Ok(HighlightEvent::Source { start, end }) => {
if start == end {
continue;
}
let style = highlight_stack.last().map(|h| highlight_for(*h).style());
push_segment(&mut lines, &script[start..end], style);
}
Err(_) => return vec![script.to_string().into()],
}
// Map foreground color when visible.
let fg = syn_style.foreground;
if fg.a > 0 {
rt_style = rt_style.fg(RtColor::Rgb(fg.r, fg.g, fg.b));
}
// Intentionally skip background to avoid overwriting terminal bg.
if syn_style.font_style.contains(FontStyle::BOLD) {
rt_style.add_modifier |= Modifier::BOLD;
}
if syn_style.font_style.contains(FontStyle::ITALIC) {
rt_style.add_modifier |= Modifier::ITALIC;
}
if syn_style.font_style.contains(FontStyle::UNDERLINE) {
rt_style.add_modifier |= Modifier::UNDERLINED;
}
if lines.is_empty() {
vec![Line::from("")]
rt_style
}
// -- Syntax lookup ------------------------------------------------------------
/// Try to find a syntect `SyntaxReference` for the given language identifier.
///
/// Resolution order:
/// 1. By token (matches against file_extensions case-insensitively).
/// 2. By exact syntax name (e.g. "Rust", "Python").
/// 3. By case-insensitive syntax name (e.g. "rust" -> "Rust").
/// 4. By raw (un-normalized) input as file extension.
fn find_syntax(lang: &str) -> Option<&'static SyntaxReference> {
let ss = syntax_set();
let normalized = normalize_lang(lang);
// Try by token (matches against file_extensions case-insensitively).
if let Some(s) = ss.find_syntax_by_token(normalized) {
return Some(s);
}
// Try by exact syntax name (e.g. "Rust", "Python").
if let Some(s) = ss.find_syntax_by_name(normalized) {
return Some(s);
}
// Try case-insensitive name match (e.g. "rust" -> "Rust").
let lower = normalized.to_ascii_lowercase();
if let Some(s) = ss
.syntaxes()
.iter()
.find(|s| s.name.to_ascii_lowercase() == lower)
{
return Some(s);
}
// Try raw (un-normalized) input as file extension.
if let Some(s) = ss.find_syntax_by_extension(lang) {
return Some(s);
}
None
}
// -- Guardrail constants ------------------------------------------------------
/// Skip highlighting for inputs larger than 512 KB to avoid excessive memory
/// and CPU usage. Callers fall back to plain unstyled text.
const MAX_HIGHLIGHT_BYTES: usize = 512 * 1024;
/// Skip highlighting for inputs with more than 10,000 lines.
const MAX_HIGHLIGHT_LINES: usize = 10_000;
// -- Core highlighting --------------------------------------------------------
/// Parse `code` using syntect for `lang` and return per-line styled spans.
/// Each inner Vec represents one source line. Returns None when the language
/// is not recognized or the input exceeds safety limits.
fn highlight_to_line_spans(code: &str, lang: &str) -> Option<Vec<Vec<Span<'static>>>> {
// Empty input has nothing to highlight; fall back to the plain text path
// which correctly produces a single empty Line.
if code.is_empty() {
return None;
}
// Bail out early for oversized inputs to avoid excessive resource usage.
if code.len() > MAX_HIGHLIGHT_BYTES
|| code.as_bytes().iter().filter(|&&b| b == b'\n').count() > MAX_HIGHLIGHT_LINES
{
return None;
}
let syntax = find_syntax(lang)?;
let mut h = HighlightLines::new(syntax, theme());
let mut lines: Vec<Vec<Span<'static>>> = Vec::new();
for line in LinesWithEndings::from(code) {
let ranges = h.highlight_line(line, syntax_set()).ok()?;
let mut spans: Vec<Span<'static>> = Vec::new();
for (style, text) in ranges {
// Strip trailing newline since we handle line breaks ourselves.
let text = text.trim_end_matches('\n');
if text.is_empty() {
continue;
}
spans.push(Span::styled(text.to_string(), convert_style(style)));
}
if spans.is_empty() {
spans.push(Span::raw(String::new()));
}
lines.push(spans);
}
Some(lines)
}
// -- Public API ---------------------------------------------------------------
/// Highlight code in any supported language, returning styled ratatui Lines.
/// Falls back to plain unstyled text when the language is not recognized.
pub(crate) fn highlight_code_to_lines(code: &str, lang: &str) -> Vec<Line<'static>> {
if let Some(line_spans) = highlight_to_line_spans(code, lang) {
line_spans.into_iter().map(Line::from).collect()
} else {
lines
// Fallback: plain text, one Line per source line.
code.split('\n')
.map(|l| Line::from(l.to_string()))
.collect()
}
}
/// Backward-compatible wrapper for bash highlighting used by exec cells.
pub(crate) fn highlight_bash_to_lines(script: &str) -> Vec<Line<'static>> {
highlight_code_to_lines(script, "bash")
}
/// Highlight code and return per-line styled spans for diff integration.
/// Returns None if the language is unsupported.
pub(crate) fn highlight_code_to_styled_spans(
code: &str,
lang: &str,
) -> Option<Vec<Vec<Span<'static>>>> {
highlight_to_line_spans(code, lang)
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
use ratatui::style::Modifier;
/// Reconstruct plain text from highlighted Lines.
fn reconstructed(lines: &[Line<'static>]) -> String {
lines
.iter()
@@ -162,75 +224,207 @@ mod tests {
.join("\n")
}
fn dimmed_tokens(lines: &[Line<'static>]) -> Vec<String> {
lines
.iter()
.flat_map(|l| l.spans.iter())
.filter(|sp| sp.style.add_modifier.contains(Modifier::DIM))
.map(|sp| sp.content.clone().into_owned())
.map(|token| token.trim().to_string())
.filter(|token| !token.is_empty())
.collect()
#[test]
fn highlight_rust_has_keyword_style() {
let code = "fn main() {}";
let lines = highlight_code_to_lines(code, "rust");
assert_eq!(reconstructed(&lines), code);
// The `fn` keyword should have a non-default style (some color).
let fn_span = lines[0].spans.iter().find(|sp| sp.content.as_ref() == "fn");
assert!(fn_span.is_some(), "expected a span containing 'fn'");
let style = fn_span.map(|s| s.style).unwrap_or_default();
assert!(
style.fg.is_some() || style.add_modifier != Modifier::empty(),
"expected fn keyword to have non-default style, got {style:?}"
);
}
#[test]
fn dims_expected_bash_operators() {
let s = "echo foo && bar || baz | qux & (echo hi)";
let lines = highlight_bash_to_lines(s);
assert_eq!(reconstructed(&lines), s);
let dimmed = dimmed_tokens(&lines);
assert!(dimmed.contains(&"&&".to_string()));
assert!(dimmed.contains(&"|".to_string()));
assert!(!dimmed.contains(&"echo".to_string()));
}
#[test]
fn dims_redirects_and_strings() {
let s = "echo \"hi\" > out.txt; echo 'ok'";
let lines = highlight_bash_to_lines(s);
assert_eq!(reconstructed(&lines), s);
let dimmed = dimmed_tokens(&lines);
assert!(dimmed.contains(&">".to_string()));
assert!(dimmed.contains(&"\"hi\"".to_string()));
assert!(dimmed.contains(&"'ok'".to_string()));
}
#[test]
fn highlights_command_and_strings() {
let s = "echo \"hi\"";
let lines = highlight_bash_to_lines(s);
let mut echo_style = None;
let mut string_style = None;
for span in &lines[0].spans {
let text = span.content.as_ref();
if text == "echo" {
echo_style = Some(span.style);
}
if text == "\"hi\"" {
string_style = Some(span.style);
fn highlight_unknown_lang_falls_back() {
let code = "some random text";
let lines = highlight_code_to_lines(code, "xyzlang");
assert_eq!(reconstructed(&lines), code);
// Should be plain text with no styling.
for line in &lines {
for span in &line.spans {
assert_eq!(
span.style,
Style::default(),
"expected default style for unknown language"
);
}
}
let echo_style = echo_style.expect("echo span missing");
let string_style = string_style.expect("string span missing");
assert!(echo_style.fg.is_none());
assert!(!echo_style.add_modifier.contains(Modifier::DIM));
assert!(string_style.add_modifier.contains(Modifier::DIM));
}
#[test]
fn highlights_heredoc_body_as_string() {
let s = "cat <<EOF\nheredoc body\nEOF";
let lines = highlight_bash_to_lines(s);
let body_line = &lines[1];
let mut body_style = None;
for span in &body_line.spans {
if span.content.as_ref() == "heredoc body" {
body_style = Some(span.style);
}
fn highlight_empty_string() {
let lines = highlight_code_to_lines("", "rust");
assert_eq!(lines.len(), 1);
assert_eq!(reconstructed(&lines), "");
}
#[test]
fn highlight_bash_preserves_content() {
let script = "echo \"hello world\" && ls -la | grep foo";
let lines = highlight_bash_to_lines(script);
assert_eq!(reconstructed(&lines), script);
}
#[test]
fn normalize_lang_aliases() {
assert_eq!(normalize_lang("js"), "javascript");
assert_eq!(normalize_lang("jsx"), "javascript");
assert_eq!(normalize_lang("ts"), "typescript");
assert_eq!(normalize_lang("py"), "python");
assert_eq!(normalize_lang("rb"), "ruby");
assert_eq!(normalize_lang("rs"), "rust");
assert_eq!(normalize_lang("c++"), "cpp");
assert_eq!(normalize_lang("cc"), "cpp");
assert_eq!(normalize_lang("yml"), "yaml");
assert_eq!(normalize_lang("sh"), "bash");
assert_eq!(normalize_lang("zsh"), "bash");
assert_eq!(normalize_lang("shell"), "bash");
assert_eq!(normalize_lang("kt"), "kotlin");
assert_eq!(normalize_lang("md"), "markdown");
assert_eq!(normalize_lang("rust"), "rust");
}
#[test]
#[allow(clippy::disallowed_methods)]
fn style_conversion_correctness() {
let syn = SyntectStyle {
foreground: syntect::highlighting::Color {
r: 255,
g: 128,
b: 0,
a: 255,
},
background: syntect::highlighting::Color {
r: 0,
g: 0,
b: 0,
a: 255,
},
font_style: FontStyle::BOLD | FontStyle::ITALIC,
};
let rt = convert_style(syn);
assert_eq!(rt.fg, Some(RtColor::Rgb(255, 128, 0)));
// Background is intentionally skipped.
assert_eq!(rt.bg, None);
assert!(rt.add_modifier.contains(Modifier::BOLD));
assert!(rt.add_modifier.contains(Modifier::ITALIC));
assert!(!rt.add_modifier.contains(Modifier::UNDERLINED));
}
#[test]
fn highlight_multiline_python() {
let code = "def hello():\n print(\"hi\")\n return 42";
let lines = highlight_code_to_lines(code, "python");
assert_eq!(reconstructed(&lines), code);
assert_eq!(lines.len(), 3);
}
#[test]
fn highlight_code_to_styled_spans_returns_none_for_unknown() {
assert!(highlight_code_to_styled_spans("x", "xyzlang").is_none());
}
#[test]
fn highlight_code_to_styled_spans_returns_some_for_known() {
let result = highlight_code_to_styled_spans("let x = 1;", "rust");
assert!(result.is_some());
let spans = result.unwrap_or_default();
assert!(!spans.is_empty());
}
#[test]
fn highlight_markdown_preserves_content() {
let code = "```sh\nprintf 'fenced within fenced\\n'\n```";
let lines = highlight_code_to_lines(code, "markdown");
let result = reconstructed(&lines);
assert_eq!(
result, code,
"markdown highlighting must preserve content exactly"
);
}
#[test]
fn highlight_large_input_falls_back() {
// Input exceeding MAX_HIGHLIGHT_BYTES should return None (plain text
// fallback) rather than attempting to parse.
let big = "x".repeat(MAX_HIGHLIGHT_BYTES + 1);
let result = highlight_code_to_styled_spans(&big, "rust");
assert!(result.is_none(), "oversized input should fall back to None");
}
#[test]
fn highlight_many_lines_falls_back() {
// Input exceeding MAX_HIGHLIGHT_LINES should return None.
let many_lines = "let x = 1;\n".repeat(MAX_HIGHLIGHT_LINES + 1);
let result = highlight_code_to_styled_spans(&many_lines, "rust");
assert!(result.is_none(), "too many lines should fall back to None");
}
#[test]
fn normalize_lang_new_aliases() {
assert_eq!(normalize_lang("go"), "go");
assert_eq!(normalize_lang("golang"), "go");
assert_eq!(normalize_lang("c"), "c");
assert_eq!(normalize_lang("h"), "c");
assert_eq!(normalize_lang("hpp"), "cpp");
assert_eq!(normalize_lang("hxx"), "cpp");
assert_eq!(normalize_lang("hh"), "cpp");
assert_eq!(normalize_lang("tsx"), "tsx");
assert_eq!(normalize_lang("sql"), "sql");
assert_eq!(normalize_lang("lua"), "lua");
assert_eq!(normalize_lang("zig"), "zig");
assert_eq!(normalize_lang("swift"), "swift");
assert_eq!(normalize_lang("java"), "java");
}
#[test]
fn find_syntax_resolves_all_canonical_languages() {
// Every canonical name that normalize_lang produces AND that syntect's
// default syntax set supports must resolve. Note: syntect's defaults
// do NOT include TypeScript, TSX, Kotlin, Swift, or Zig, so those are
// intentionally omitted here (they gracefully fall back to plain text).
let canonical = [
"javascript",
"python",
"ruby",
"rust",
"go",
"c",
"cpp",
"yaml",
"bash",
"markdown",
"sql",
"lua",
"java",
];
for lang in canonical {
assert!(
find_syntax(lang).is_some(),
"find_syntax({lang:?}) returned None — syntect cannot resolve this canonical name"
);
}
// Also verify common raw extensions resolve.
let extensions = ["rs", "py", "js", "rb", "go", "sh", "md", "yml"];
for ext in extensions {
assert!(
find_syntax(ext).is_some(),
"find_syntax({ext:?}) returned None — extension lookup failed"
);
}
// Unsupported languages should return None (graceful fallback).
let unsupported = ["typescript", "tsx", "kotlin", "swift", "zig"];
for lang in unsupported {
assert!(
find_syntax(lang).is_none(),
"find_syntax({lang:?}) unexpectedly returned Some — update test if syntect added support"
);
}
let body_style = body_style.expect("missing heredoc span");
assert!(body_style.add_modifier.contains(Modifier::DIM));
}
}