mirror of
https://github.com/TiddlyWiki/TiddlyWiki5.git
synced 2026-04-30 17:37:10 +00:00
Fix incorrect base64 encoding of astral-plane text (#4813)
Most astral-plane text is emojis like U+1F4DA BOOKS (📚), but some
languages like Osage have their alphabet entirely in the supplementary
multilingual plane as well. For proper support of languages like Osage,
and newer emojis, the UTF-8 decode and encode functions need to properly
handle codepoints above U+FFFF, which are represented by a surrogate
pair in Javascript strings.
This commit is contained in:
@@ -25,6 +25,19 @@ describe("Utility tests", function() {
|
||||
expect(psa(" [[Tidd\u00a0ler8]] two ")).toEqual(["Tidd\u00a0ler8","two"]);
|
||||
});
|
||||
|
||||
it("should handle base64 encoding emojis", function() {
|
||||
var booksEmoji = "📚";
|
||||
expect(booksEmoji).toBe(booksEmoji);
|
||||
// 📚 is U+1F4DA BOOKS, which is represented by surrogate pair 0xD83D 0xDCDA in Javascript
|
||||
expect(booksEmoji.length).toBe(2);
|
||||
expect(booksEmoji.charCodeAt(0)).toBe(55357); // 0xD83D
|
||||
expect(booksEmoji.charCodeAt(1)).toBe(56538); // 0xDCDA
|
||||
expect($tw.utils.base64Encode(booksEmoji)).not.toBe("7aC97bOa", "if base64 is 7aC97bOa then surrogate pairs were incorrectly treated as codepoints");
|
||||
expect($tw.utils.base64Encode(booksEmoji)).toBe("8J+Tmg==", "if surrogate pairs are correctly treated as a single code unit then base64 should be 8J+Tmg==");
|
||||
expect($tw.utils.base64Decode("8J+Tmg==")).toBe(booksEmoji);
|
||||
expect($tw.utils.base64Decode($tw.utils.base64Encode(booksEmoji))).toBe(booksEmoji, "should round-trip correctly");
|
||||
});
|
||||
|
||||
it("should handle stringifying a string array", function() {
|
||||
var str = $tw.utils.stringifyList;
|
||||
expect(str([])).toEqual("");
|
||||
|
||||
Reference in New Issue
Block a user