mirror of
https://github.com/openai/codex.git
synced 2026-04-24 14:45:27 +00:00
refactor(tui): remove normalize_lang, delegate to two-face directly
two-face's ~250 language syntax set resolves almost all names and extensions on its own. Remove the redundant normalize_lang layer and keep only 3 patched aliases (golang, python3, shell) that two-face cannot resolve. This enables highlighting for any language in bat's syntax set (elixir, haskell, scala, dart, php, html, css, etc.) without needing explicit entries.
This commit is contained in:
@@ -34,34 +34,6 @@ fn theme() -> &'static Theme {
|
||||
})
|
||||
}
|
||||
|
||||
// -- Language normalization ---------------------------------------------------
|
||||
|
||||
/// Normalize common language aliases to canonical names that syntect can
|
||||
/// resolve via name or extension lookup.
|
||||
fn normalize_lang(lang: &str) -> &str {
|
||||
match lang {
|
||||
"js" | "jsx" => "javascript",
|
||||
"ts" => "typescript",
|
||||
"tsx" => "tsx",
|
||||
"py" | "python3" => "python",
|
||||
"rb" => "ruby",
|
||||
"rs" => "rust",
|
||||
"go" | "golang" => "go",
|
||||
"c" | "h" => "c",
|
||||
"c++" | "cc" | "cxx" | "hpp" | "hxx" | "hh" => "cpp",
|
||||
"yml" => "yaml",
|
||||
"sh" | "zsh" | "shell" | "bash" => "bash",
|
||||
"kt" => "kotlin",
|
||||
"md" => "markdown",
|
||||
"sql" => "sql",
|
||||
"lua" => "lua",
|
||||
"zig" => "zig",
|
||||
"swift" => "swift",
|
||||
"java" => "java",
|
||||
other => other,
|
||||
}
|
||||
}
|
||||
|
||||
// -- Style conversion (syntect -> ratatui) ------------------------------------
|
||||
|
||||
/// Convert a syntect `Style` to a ratatui `Style`.
|
||||
@@ -94,25 +66,29 @@ fn convert_style(syn_style: SyntectStyle) -> Style {
|
||||
|
||||
/// Try to find a syntect `SyntaxReference` for the given language identifier.
|
||||
///
|
||||
/// Resolution order:
|
||||
/// 1. By token (matches against file_extensions case-insensitively).
|
||||
/// 2. By exact syntax name (e.g. "Rust", "Python").
|
||||
/// 3. By case-insensitive syntax name (e.g. "rust" -> "Rust").
|
||||
/// 4. By raw (un-normalized) input as file extension.
|
||||
/// two-face's extended syntax set (~250 languages) resolves most names and
|
||||
/// extensions directly. We only patch the few aliases it cannot handle.
|
||||
fn find_syntax(lang: &str) -> Option<&'static SyntaxReference> {
|
||||
let ss = syntax_set();
|
||||
let normalized = normalize_lang(lang);
|
||||
|
||||
// Try by token (matches against file_extensions case-insensitively).
|
||||
if let Some(s) = ss.find_syntax_by_token(normalized) {
|
||||
// Aliases that two-face does not resolve on its own.
|
||||
let patched = match lang {
|
||||
"golang" => "go",
|
||||
"python3" => "python",
|
||||
"shell" => "bash",
|
||||
_ => lang,
|
||||
};
|
||||
|
||||
// Try by token (matches file_extensions case-insensitively).
|
||||
if let Some(s) = ss.find_syntax_by_token(patched) {
|
||||
return Some(s);
|
||||
}
|
||||
// Try by exact syntax name (e.g. "Rust", "Python").
|
||||
if let Some(s) = ss.find_syntax_by_name(normalized) {
|
||||
if let Some(s) = ss.find_syntax_by_name(patched) {
|
||||
return Some(s);
|
||||
}
|
||||
// Try case-insensitive name match (e.g. "rust" -> "Rust").
|
||||
let lower = normalized.to_ascii_lowercase();
|
||||
let lower = patched.to_ascii_lowercase();
|
||||
if let Some(s) = ss
|
||||
.syntaxes()
|
||||
.iter()
|
||||
@@ -120,7 +96,7 @@ fn find_syntax(lang: &str) -> Option<&'static SyntaxReference> {
|
||||
{
|
||||
return Some(s);
|
||||
}
|
||||
// Try raw (un-normalized) input as file extension.
|
||||
// Try raw input as file extension.
|
||||
if let Some(s) = ss.find_syntax_by_extension(lang) {
|
||||
return Some(s);
|
||||
}
|
||||
@@ -274,25 +250,6 @@ mod tests {
|
||||
assert_eq!(reconstructed(&lines), script);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_lang_aliases() {
|
||||
assert_eq!(normalize_lang("js"), "javascript");
|
||||
assert_eq!(normalize_lang("jsx"), "javascript");
|
||||
assert_eq!(normalize_lang("ts"), "typescript");
|
||||
assert_eq!(normalize_lang("py"), "python");
|
||||
assert_eq!(normalize_lang("rb"), "ruby");
|
||||
assert_eq!(normalize_lang("rs"), "rust");
|
||||
assert_eq!(normalize_lang("c++"), "cpp");
|
||||
assert_eq!(normalize_lang("cc"), "cpp");
|
||||
assert_eq!(normalize_lang("yml"), "yaml");
|
||||
assert_eq!(normalize_lang("sh"), "bash");
|
||||
assert_eq!(normalize_lang("zsh"), "bash");
|
||||
assert_eq!(normalize_lang("shell"), "bash");
|
||||
assert_eq!(normalize_lang("kt"), "kotlin");
|
||||
assert_eq!(normalize_lang("md"), "markdown");
|
||||
assert_eq!(normalize_lang("rust"), "rust");
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(clippy::disallowed_methods)]
|
||||
fn style_conversion_correctness() {
|
||||
@@ -371,25 +328,9 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_lang_new_aliases() {
|
||||
assert_eq!(normalize_lang("go"), "go");
|
||||
assert_eq!(normalize_lang("golang"), "go");
|
||||
assert_eq!(normalize_lang("c"), "c");
|
||||
assert_eq!(normalize_lang("h"), "c");
|
||||
assert_eq!(normalize_lang("hpp"), "cpp");
|
||||
assert_eq!(normalize_lang("hxx"), "cpp");
|
||||
assert_eq!(normalize_lang("hh"), "cpp");
|
||||
assert_eq!(normalize_lang("tsx"), "tsx");
|
||||
assert_eq!(normalize_lang("sql"), "sql");
|
||||
assert_eq!(normalize_lang("lua"), "lua");
|
||||
assert_eq!(normalize_lang("zig"), "zig");
|
||||
assert_eq!(normalize_lang("swift"), "swift");
|
||||
assert_eq!(normalize_lang("java"), "java");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn find_syntax_resolves_all_canonical_languages() {
|
||||
let canonical = [
|
||||
fn find_syntax_resolves_languages_and_aliases() {
|
||||
// Languages resolved directly by two-face's extended syntax set.
|
||||
let languages = [
|
||||
"javascript",
|
||||
"typescript",
|
||||
"tsx",
|
||||
@@ -408,19 +349,43 @@ mod tests {
|
||||
"zig",
|
||||
"swift",
|
||||
"java",
|
||||
"elixir",
|
||||
"haskell",
|
||||
"scala",
|
||||
"dart",
|
||||
"r",
|
||||
"perl",
|
||||
"php",
|
||||
"html",
|
||||
"css",
|
||||
"json",
|
||||
"toml",
|
||||
"xml",
|
||||
"dockerfile",
|
||||
];
|
||||
for lang in canonical {
|
||||
for lang in languages {
|
||||
assert!(
|
||||
find_syntax(lang).is_some(),
|
||||
"find_syntax({lang:?}) returned None"
|
||||
);
|
||||
}
|
||||
let extensions = ["rs", "py", "js", "ts", "rb", "go", "sh", "md", "yml"];
|
||||
// Common file extensions.
|
||||
let extensions = [
|
||||
"rs", "py", "js", "ts", "rb", "go", "sh", "md", "yml", "kt", "ex", "hs", "pl", "php",
|
||||
"css", "html",
|
||||
];
|
||||
for ext in extensions {
|
||||
assert!(
|
||||
find_syntax(ext).is_some(),
|
||||
"find_syntax({ext:?}) returned None"
|
||||
);
|
||||
}
|
||||
// Patched aliases that two-face cannot resolve on its own.
|
||||
for alias in ["golang", "python3", "shell"] {
|
||||
assert!(
|
||||
find_syntax(alias).is_some(),
|
||||
"find_syntax({alias:?}) returned None — patched alias broken"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user