From 326ae61929a9a271a45f4863e7e4d13a33adbbe7 Mon Sep 17 00:00:00 2001 From: Robin Munn Date: Wed, 18 Oct 2023 22:08:56 +0700 Subject: [PATCH] Fix encodebase64 and decodebase64 filters (#7683) * Fix encodebase64 and decodebase64 filters The documentation for encodebase64 says that the input is treated as binary data, but in fact the input is being treated as text data, with an extra UTF-8 encoding step being performed first. Likewise, the decodebase64 documentation says that it outputs binary data, but in fact it will do a UTF-8 decoding step before producing output, which will in fact garble binary data. This commit changes the behavior of encodebase64 and decodebase64 to match what the documentation says they do. It also adds an optional `text` suffix to both filters to keep the current behavior. Finally, an optional `urlsafe` suffix is added to both filters to allow them to use the "URL-safe" variant of base64 (using `-` instead of `+` and `_` instead of `/`). * Try to fix failing test Turns out a little more than this is going to be needed. * Fix binary base64 encoding, including unit tests * Update base64 filter documentation * Can't use replaceAll, too new Have to use String.replace with a global regex instead * Replace uses of window.btoa() in rest of code Since window.btoa() is not available under Node.js, we'll replace all uses of it with the $tw.utils.base64encode() function that now works correctly for binary data. * Add link to UTF-8 glossary definition at MDN --- core/modules/filters/encodings.js | 8 +++-- core/modules/savers/github.js | 2 +- core/modules/utils/dom/http.js | 2 +- core/modules/utils/utils.js | 31 ++++++++++++++++--- editions/test/tiddlers/tests/test-utils.js | 23 ++++++++++++++ .../filters/decodebase64 Operator.tid | 7 ++++- .../filters/encodebase64 Operator.tid | 7 ++++- 7 files changed, 70 insertions(+), 10 deletions(-) diff --git a/core/modules/filters/encodings.js b/core/modules/filters/encodings.js index 23c743a03e..a43a15f76e 100644 --- a/core/modules/filters/encodings.js +++ b/core/modules/filters/encodings.js @@ -18,16 +18,20 @@ Export our filter functions exports.decodebase64 = function(source,operator,options) { var results = []; + var binary = operator.suffixes && operator.suffixes.indexOf("binary") !== -1; + var urlsafe = operator.suffixes && operator.suffixes.indexOf("urlsafe") !== -1; source(function(tiddler,title) { - results.push($tw.utils.base64Decode(title)); + results.push($tw.utils.base64Decode(title,binary,urlsafe)); }); return results; }; exports.encodebase64 = function(source,operator,options) { var results = []; + var binary = operator.suffixes && operator.suffixes.indexOf("binary") !== -1; + var urlsafe = operator.suffixes && operator.suffixes.indexOf("urlsafe") !== -1; source(function(tiddler,title) { - results.push($tw.utils.base64Encode(title)); + results.push($tw.utils.base64Encode(title,binary,urlsafe)); }); return results; }; diff --git a/core/modules/savers/github.js b/core/modules/savers/github.js index f9b87263d4..c0a34f2d64 100644 --- a/core/modules/savers/github.js +++ b/core/modules/savers/github.js @@ -31,7 +31,7 @@ GitHubSaver.prototype.save = function(text,method,callback) { headers = { "Accept": "application/vnd.github.v3+json", "Content-Type": "application/json;charset=UTF-8", - "Authorization": "Basic " + window.btoa(username + ":" + password), + "Authorization": "Basic " + $tw.utils.base64Encode(username + ":" + password), "If-None-Match": "" }; // Bail if we don't have everything we need diff --git a/core/modules/utils/dom/http.js b/core/modules/utils/dom/http.js index 083381d8dd..27c3e65d65 100644 --- a/core/modules/utils/dom/http.js +++ b/core/modules/utils/dom/http.js @@ -187,7 +187,7 @@ HttpClientRequest.prototype.send = function(callback) { for (var i=0; i> operator produces), so only certain byte sequences in the input are valid. If the input is binary data encoded in base64 format (such as an image, audio file, video file, etc.), then use the optional `binary` suffix, which will allow all byte sequences. Note that the output will then be binary, ''not'' text, and should probably not be passed into further filter operators. + +The optional `urlsafe` suffix, if present, causes the decoder to assume that the base64 input uses `-` and `_` instead of `+` and `/` for the 62nd and 63rd characters of the base64 "alphabet", which is usually referred to as "URL-safe base64" or "bae64url". <<.operator-examples "decodebase64">> diff --git a/editions/tw5.com/tiddlers/filters/encodebase64 Operator.tid b/editions/tw5.com/tiddlers/filters/encodebase64 Operator.tid index ddbc730785..a7943d726c 100644 --- a/editions/tw5.com/tiddlers/filters/encodebase64 Operator.tid +++ b/editions/tw5.com/tiddlers/filters/encodebase64 Operator.tid @@ -1,6 +1,7 @@ caption: encodebase64 op-input: a [[selection of titles|Title Selection]] op-output: the input with base 64 encoding applied +op-suffix: optional: `binary` to treat input as binary data, `urlsafe` for URL-safe output op-parameter: op-parameter-name: op-purpose: apply base 64 encoding to a string @@ -11,6 +12,10 @@ from-version: 5.2.6 See Mozilla Developer Network for details of [[base 64 encoding|https://developer.mozilla.org/en-US/docs/Glossary/Base64]]. TiddlyWiki uses [[library code from @nijikokun|https://gist.github.com/Nijikokun/5192472]] to handle the conversion. -The input strings are interpreted as binary data. The output strings are base64 encoded. +The input strings are interpreted as [[UTF-8 encoded|https://developer.mozilla.org/en-US/docs/Glossary/UTF-8]] text (or binary data instead if the `binary` suffix is present). The output strings are base64 encoded. + +The optional `binary` suffix, if present, causes the input string to be interpreted as binary data instead of text. Normally, an extra UTF-8 encoding step will be added before the base64 output is produced, so that emojis and other Unicode characters will be encoded correctly. If the input is binary data, such as an image, audio file, video, etc., then the UTF-8 encoding step would produce incorrect results, so using the `binary` suffix causes the UTF-8 encoding step to be skipped. + +The optional `urlsafe` suffix, if present, will use the alternate "URL-safe" base64 encoding, where `-` and `_` are used instead of `+` and `/` respectively, allowing the result to be used in URL query parameters or filenames. <<.operator-examples "encodebase64">>