mirror of
https://github.com/openai/codex.git
synced 2026-04-24 22:54:54 +00:00
## 🐛 Problem Users running commands with non-ASCII characters (like Russian text "пример") in Windows/WSL environments experience garbled text in VSCode's shell preview window, with Unicode replacement characters (�) appearing instead of the actual text. **Issue**: https://github.com/openai/codex/issues/6178 ## 🔧 Root Cause The issue was in `StreamOutput<Vec<u8>>::from_utf8_lossy()` method in `codex-rs/core/src/exec.rs`, which used `String::from_utf8_lossy()` to convert shell output bytes to strings. This function immediately replaces any invalid UTF-8 byte sequences with replacement characters, without attempting to decode using other common encodings. In Windows/WSL environments, shell output often uses encodings like: - Windows-1252 (common Windows encoding) - Latin-1/ISO-8859-1 (extended ASCII) ## 🛠️ Solution Replaced the simple `String::from_utf8_lossy()` call with intelligent encoding detection via a new `bytes_to_string_smart()` function that tries multiple encoding strategies: 1. **UTF-8** (fast path for valid UTF-8) 2. **Windows-1252** (handles Windows-specific characters in 0x80-0x9F range) 3. **Latin-1** (fallback for extended ASCII) 4. **Lossy UTF-8** (final fallback, same as before) ## 📁 Changes ### New Files - `codex-rs/core/src/text_encoding.rs` - Smart encoding detection module - `codex-rs/core/tests/suite/text_encoding_fix.rs` - Integration tests ### Modified Files - `codex-rs/core/src/lib.rs` - Added text_encoding module - `codex-rs/core/src/exec.rs` - Updated StreamOutput::from_utf8_lossy() - `codex-rs/core/tests/suite/mod.rs` - Registered new test module ## ✅ Testing - **5 unit tests** covering UTF-8, Windows-1252, Latin-1, and fallback scenarios - **2 integration tests** simulating the exact Issue #6178 scenario - **Demonstrates improvement** over the previous `String::from_utf8_lossy()` approach All tests pass: ```bash cargo test -p codex-core text_encoding cargo test -p codex-core test_shell_output_encoding_issue_6178 ``` ## 🎯 Impact - ✅ **Eliminates garbled text** in VSCode shell preview for non-ASCII content - ✅ **Supports Windows/WSL environments** with proper encoding detection - ✅ **Zero performance impact** for UTF-8 text (fast path) - ✅ **Backward compatible** - UTF-8 content works exactly as before - ✅ **Handles edge cases** with robust fallback mechanism ## 🧪 Test Scenarios The fix has been tested with: - Russian text ("пример") - Windows-1252 quotation marks (""test") - Latin-1 accented characters ("café") - Mixed encoding content - Invalid byte sequences (graceful fallback) ## 📋 Checklist - [X] Addresses the reported issue - [X] Includes comprehensive tests - [X] Maintains backward compatibility - [X] Follows project coding conventions - [X] No breaking changes --------- Co-authored-by: Josh McKinney <joshka@openai.com>
131 lines
4.1 KiB
TOML
131 lines
4.1 KiB
TOML
[package]
|
|
edition = "2024"
|
|
name = "codex-core"
|
|
version = { workspace = true }
|
|
|
|
[lib]
|
|
doctest = false
|
|
name = "codex_core"
|
|
path = "src/lib.rs"
|
|
|
|
[lints]
|
|
workspace = true
|
|
|
|
[dependencies]
|
|
anyhow = { workspace = true }
|
|
askama = { workspace = true }
|
|
async-channel = { workspace = true }
|
|
async-trait = { workspace = true }
|
|
base64 = { workspace = true }
|
|
bytes = { workspace = true }
|
|
chrono = { workspace = true, features = ["serde"] }
|
|
chardetng = { workspace = true }
|
|
codex-app-server-protocol = { workspace = true }
|
|
codex-apply-patch = { workspace = true }
|
|
codex-async-utils = { workspace = true }
|
|
codex-execpolicy = { workspace = true }
|
|
codex-file-search = { workspace = true }
|
|
codex-git = { workspace = true }
|
|
codex-keyring-store = { workspace = true }
|
|
codex-otel = { workspace = true, features = ["otel"] }
|
|
codex-protocol = { workspace = true }
|
|
codex-rmcp-client = { workspace = true }
|
|
codex-utils-pty = { workspace = true }
|
|
codex-utils-readiness = { workspace = true }
|
|
codex-utils-string = { workspace = true }
|
|
codex-utils-tokenizer = { workspace = true }
|
|
codex-windows-sandbox = { package = "codex-windows-sandbox", path = "../windows-sandbox-rs" }
|
|
dirs = { workspace = true }
|
|
dunce = { workspace = true }
|
|
env-flags = { workspace = true }
|
|
encoding_rs = { workspace = true }
|
|
eventsource-stream = { workspace = true }
|
|
futures = { workspace = true }
|
|
http = { workspace = true }
|
|
indexmap = { workspace = true }
|
|
keyring = { workspace = true, features = ["crypto-rust"] }
|
|
libc = { workspace = true }
|
|
mcp-types = { workspace = true }
|
|
os_info = { workspace = true }
|
|
rand = { workspace = true }
|
|
regex-lite = { workspace = true }
|
|
reqwest = { workspace = true, features = ["json", "stream"] }
|
|
serde = { workspace = true, features = ["derive"] }
|
|
serde_json = { workspace = true }
|
|
sha1 = { workspace = true }
|
|
sha2 = { workspace = true }
|
|
shlex = { workspace = true }
|
|
similar = { workspace = true }
|
|
strum_macros = { workspace = true }
|
|
tempfile = { workspace = true }
|
|
test-case = "3.3.1"
|
|
test-log = { workspace = true }
|
|
thiserror = { workspace = true }
|
|
time = { workspace = true, features = [
|
|
"formatting",
|
|
"parsing",
|
|
"local-offset",
|
|
"macros",
|
|
] }
|
|
tokio = { workspace = true, features = [
|
|
"io-std",
|
|
"macros",
|
|
"process",
|
|
"rt-multi-thread",
|
|
"signal",
|
|
] }
|
|
tokio-util = { workspace = true, features = ["rt"] }
|
|
toml = { workspace = true }
|
|
toml_edit = { workspace = true }
|
|
tracing = { workspace = true, features = ["log"] }
|
|
tree-sitter = { workspace = true }
|
|
tree-sitter-bash = { workspace = true }
|
|
uuid = { workspace = true, features = ["serde", "v4", "v5"] }
|
|
which = { workspace = true }
|
|
wildmatch = { workspace = true }
|
|
|
|
|
|
[target.'cfg(target_os = "linux")'.dependencies]
|
|
landlock = { workspace = true }
|
|
seccompiler = { workspace = true }
|
|
keyring = { workspace = true, features = ["linux-native-async-persistent"] }
|
|
|
|
[target.'cfg(target_os = "macos")'.dependencies]
|
|
core-foundation = "0.9"
|
|
keyring = { workspace = true, features = ["apple-native"] }
|
|
|
|
# Build OpenSSL from source for musl builds.
|
|
[target.x86_64-unknown-linux-musl.dependencies]
|
|
openssl-sys = { workspace = true, features = ["vendored"] }
|
|
|
|
# Build OpenSSL from source for musl builds.
|
|
[target.aarch64-unknown-linux-musl.dependencies]
|
|
openssl-sys = { workspace = true, features = ["vendored"] }
|
|
|
|
[target.'cfg(target_os = "windows")'.dependencies]
|
|
keyring = { workspace = true, features = ["windows-native"] }
|
|
|
|
[target.'cfg(any(target_os = "freebsd", target_os = "openbsd"))'.dependencies]
|
|
keyring = { workspace = true, features = ["sync-secret-service"] }
|
|
|
|
[dev-dependencies]
|
|
assert_cmd = { workspace = true }
|
|
assert_matches = { workspace = true }
|
|
codex-arg0 = { workspace = true }
|
|
core_test_support = { workspace = true }
|
|
ctor = { workspace = true }
|
|
escargot = { workspace = true }
|
|
image = { workspace = true, features = ["jpeg", "png"] }
|
|
maplit = { workspace = true }
|
|
predicates = { workspace = true }
|
|
pretty_assertions = { workspace = true }
|
|
serial_test = { workspace = true }
|
|
tempfile = { workspace = true }
|
|
tokio-test = { workspace = true }
|
|
tracing-test = { workspace = true, features = ["no-env-filter"] }
|
|
walkdir = { workspace = true }
|
|
wiremock = { workspace = true }
|
|
|
|
[package.metadata.cargo-shear]
|
|
ignored = ["openssl-sys"]
|