diff --git a/deps/common/src/logseq/common/util.cljs b/deps/common/src/logseq/common/util.cljs index 558b0e8829..4f4eb26c34 100644 --- a/deps/common/src/logseq/common/util.cljs +++ b/deps/common/src/logseq/common/util.cljs @@ -18,12 +18,6 @@ (log/error :decode-uri-component-failed uri) uri))) -(defn safe-url-decode - [string] - (if (string/includes? string "%") - (some-> string str safe-decode-uri-component) - string)) - (defn path-normalize "Normalize file path (for reading paths from FS, not required by writing) Keep capitalization senstivity" @@ -129,12 +123,7 @@ result)) (map string/trim)))) -(defn decode-namespace-underlines - "Decode namespace underlines to slashed; - If continuous underlines, only decode at start; - Having empty namespace is invalid." - [string] - (string/replace string "___" "/")) +(def url-encoded-pattern #"(?i)%[0-9a-f]{2}") ;; (?i) for case-insensitive mode (defn page-name-sanity "Sanitize the page-name. Unify different diacritics and other visual differences. @@ -146,24 +135,6 @@ (remove-boundary-slashes) (path-normalize))) -(defn make-valid-namespaces - "Remove those empty namespaces from title to make it a valid page name." - [title] - (->> (string/split title "/") - (remove empty?) - (string/join "/"))) - -(def url-encoded-pattern #"(?i)%[0-9a-f]{2}") ;; (?i) for case-insensitive mode - -(defn- tri-lb-title-parsing - "Parsing file name under the new file name format - Avoid calling directly" - [file-name] - (some-> file-name - (decode-namespace-underlines) - (string/replace url-encoded-pattern safe-url-decode) - (make-valid-namespaces))) - (defn page-name-sanity-lc "Sanitize the query string for a page name (mandate for :block/name)" [s] @@ -202,22 +173,6 @@ ;; default (keyword format))) -(defn path->file-name - ;; Only for internal paths, as they are converted to POXIS already - ;; https://github.com/logseq/logseq/blob/48b8e54e0fdd8fbd2c5d25b7f1912efef8814714/deps/graph-parser/src/logseq/graph_parser/extract.cljc#L32 - ;; Should be converted to POXIS first for external paths - [path] - (if (string/includes? path "/") - (last (split-last "/" path)) - path)) - -(defn path->file-body - [path] - (when-let [file-name (path->file-name path)] - (if (string/includes? file-name ".") - (first (split-last "." file-name)) - file-name))) - (defn path->file-ext [path-or-file-name] (second (re-find #"(?:\.)(\w+)[^.]*$" path-or-file-name))) @@ -245,27 +200,6 @@ (catch :default _ false))) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Keep for backward compatibility ;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; Rule of dir-ver 0 -;; Source: https://github.com/logseq/logseq/blob/e7110eea6790eda5861fdedb6b02c2a78b504cd9/deps/graph-parser/src/logseq/graph_parser/extract.cljc#L35 -(defn legacy-title-parsing - [file-name-body] - (let [title (string/replace file-name-body "." "/")] - (or (safe-decode-uri-component title) title))) - -;; Register sanitization / parsing fns in: -;; logseq.common.util (parsing only) -;; frontend.util.fs (sanitization only) -(defn title-parsing - "Convert file name in the given file name format to page title" - [file-name-body filename-format] - (case filename-format - :triple-lowbar (tri-lb-title-parsing file-name-body) - (legacy-title-parsing file-name-body))) - (defn safe-read-string ([content] (safe-read-string {} content)) diff --git a/deps/common/test/logseq/common/util_test.cljs b/deps/common/test/logseq/common/util_test.cljs index fc42fdf22a..98d97df6af 100644 --- a/deps/common/test/logseq/common/util_test.cljs +++ b/deps/common/test/logseq/common/util_test.cljs @@ -27,7 +27,9 @@ "C:\\Users\\foo\\Documents\\audio.mp3" "mp3" "/root/Documents/audio" nil "/root/Documents/audio." nil - "special/characters/aäääöüß.7z" "7z")) + "special/characters/aäääöüß.7z" "7z" + "asldk lakls .lsad" "lsad" + "中文asldk lakls .lsad" "lsad")) (deftest url? (are [x y] diff --git a/deps/graph-parser/src/logseq/graph_parser/extract.cljc b/deps/graph-parser/src/logseq/graph_parser/extract.cljc index 011af5bf11..d34d887785 100644 --- a/deps/graph-parser/src/logseq/graph_parser/extract.cljc +++ b/deps/graph-parser/src/logseq/graph_parser/extract.cljc @@ -27,6 +27,66 @@ (common-util/safe-decode-uri-component (string/replace result "." "/")) result)))) +(defn- path->file-name + ;; Only for internal paths, as they are converted to POXIS already + ;; https://github.com/logseq/logseq/blob/48b8e54e0fdd8fbd2c5d25b7f1912efef8814714/deps/graph-parser/src/logseq/graph_parser/extract.cljc#L32 + ;; Should be converted to POXIS first for external paths + [path] + (if (string/includes? path "/") + (last (common-util/split-last "/" path)) + path)) + +(defn- path->file-body + [path] + (when-let [file-name (path->file-name path)] + (if (string/includes? file-name ".") + (first (common-util/split-last "." file-name)) + file-name))) + +(defn- safe-url-decode + [string] + (if (string/includes? string "%") + (some-> string str common-util/safe-decode-uri-component) + string)) + +(defn- decode-namespace-underlines + "Decode namespace underlines to slashed; + If continuous underlines, only decode at start; + Having empty namespace is invalid." + [string] + (string/replace string "___" "/")) + +(defn- make-valid-namespaces + "Remove those empty namespaces from title to make it a valid page name." + [title] + (->> (string/split title "/") + (remove empty?) + (string/join "/"))) + +(defn- tri-lb-title-parsing + "Parsing file name under the new file name format + Avoid calling directly" + [file-name] + (some-> file-name + (decode-namespace-underlines) + (string/replace common-util/url-encoded-pattern safe-url-decode) + (make-valid-namespaces))) + +;; Keep for backward compatibility +;; Rule of dir-ver 0 +;; Source: https://github.com/logseq/logseq/blob/e7110eea6790eda5861fdedb6b02c2a78b504cd9/deps/graph-parser/src/logseq/graph_parser/extract.cljc#L35 +(defn- legacy-title-parsing + [file-name-body] + (let [title (string/replace file-name-body "." "/")] + (or (common-util/safe-decode-uri-component title) title))) + +(defn title-parsing + "Convert file name in the given file name format to page title" + [file-name-body filename-format] + (case filename-format + :triple-lowbar (tri-lb-title-parsing file-name-body) + (legacy-title-parsing file-name-body))) + (defn- get-page-name "Get page name with overridden order of `title::` property @@ -54,9 +114,9 @@ (and first-block (string? title) title)) - file-name (when-let [result (common-util/path->file-body file)] + file-name (when-let [result (path->file-body file)] (if (common-config/mldoc-support? (common-util/get-file-ext file)) - (common-util/title-parsing result filename-format) + (title-parsing result filename-format) result))] (or property-name file-name diff --git a/deps/graph-parser/test/logseq/graph_parser/extract_test.cljs b/deps/graph-parser/test/logseq/graph_parser/extract_test.cljs index dca75a5d26..704bfb56b3 100644 --- a/deps/graph-parser/test/logseq/graph_parser/extract_test.cljs +++ b/deps/graph-parser/test/logseq/graph_parser/extract_test.cljs @@ -3,6 +3,44 @@ [logseq.graph-parser.extract :as extract] [clojure.pprint :as pprint])) +;; This is a copy of frontend.util.fs/multiplatform-reserved-chars for reserved chars testing +(def multiplatform-reserved-chars ":\\*\\?\"<>|\\#\\\\") + +;; Stuffs should be parsable (don't crash) when users dump some random files +(deftest page-name-parsing-tests + (is (string? (#'extract/tri-lb-title-parsing "___-_-_-_---___----"))) + (is (string? (#'extract/tri-lb-title-parsing "_____///____---___----"))) + (is (string? (#'extract/tri-lb-title-parsing "/_/////---/_----"))) + (is (string? (#'extract/tri-lb-title-parsing "/\\#*%lasdf\\//__--dsll_____----....-._0x2B"))) + (is (string? (#'extract/tri-lb-title-parsing "/\\#*%l;;&&;&\\//__--dsll_____----....-._0x2B"))) + (is (string? (#'extract/tri-lb-title-parsing multiplatform-reserved-chars))) + (is (string? (#'extract/tri-lb-title-parsing "dsa&;l dsalfjk jkl")))) + +(deftest uri-decoding-tests + (is (= (#'extract/safe-url-decode "%*-sd%%%saf%=lks") "%*-sd%%%saf%=lks")) ;; Contains %, but invalid + (is (= (#'extract/safe-url-decode "%2FDownloads%2FCNN%3AIs%5CAll%3AYou%20Need.pdf") "/Downloads/CNN:Is\\All:You Need.pdf")) + (is (= (#'extract/safe-url-decode "asldkflksdaf啦放假啦睡觉啦啊啥的都撒娇浪费;dla") "asldkflksdaf啦放假啦睡觉啦啊啥的都撒娇浪费;dla"))) + +(deftest page-name-sanitization-backward-tests + (is (= "abc.def.ghi.jkl" (#'extract/tri-lb-title-parsing "abc.def.ghi.jkl"))) + (is (= "abc/def/ghi/jkl" (#'extract/tri-lb-title-parsing "abc%2Fdef%2Fghi%2Fjkl"))) + (is (= "abc%/def/ghi/jkl" (#'extract/tri-lb-title-parsing "abc%25%2Fdef%2Fghi%2Fjkl"))) + (is (= "abc%2——ef/ghi/jkl" (#'extract/tri-lb-title-parsing "abc%2——ef%2Fghi%2Fjkl"))) + (is (= "abc&2Fghi/jkl" (#'extract/tri-lb-title-parsing "abc&2Fghi%2Fjkl"))) + (is (= "abc<2Fghi/jkl" (#'extract/tri-lb-title-parsing "abc<2Fghi%2Fjkl"))) + (is (= "abc%2Fghi/jkl" (#'extract/tri-lb-title-parsing "abc%2Fghi%2Fjkl"))) + (is (= "abc;&;2Fghi/jkl" (#'extract/tri-lb-title-parsing "abc;&;2Fghi%2Fjkl"))) + ;; happens when importing some compatible files on *nix / macOS + (is (= multiplatform-reserved-chars (#'extract/tri-lb-title-parsing multiplatform-reserved-chars)))) + +(deftest path-utils-tests + (is (= "asldk lakls " (#'extract/path->file-body "/data/app/asldk lakls .lsad"))) + (is (= "asldk lakls " (#'extract/path->file-body "asldk lakls .lsad"))) + (is (= "asldk lakls" (#'extract/path->file-body "asldk lakls"))) + (is (= "asldk lakls" (#'extract/path->file-body "/data/app/asldk lakls"))) + (is (= "asldk lakls" (#'extract/path->file-body "file://data/app/asldk lakls.as"))) + (is (= "中文asldk lakls" (#'extract/path->file-body "file://中文data/app/中文asldk lakls.as")))) + (defn- extract [text] (let [{:keys [blocks]} (extract/extract "a.md" text {:block-pattern "-"}) diff --git a/deps/graph-parser/test/logseq/graph_parser/util/file_name_test.cljs b/deps/graph-parser/test/logseq/graph_parser/util/file_name_test.cljs deleted file mode 100644 index d628a006e1..0000000000 --- a/deps/graph-parser/test/logseq/graph_parser/util/file_name_test.cljs +++ /dev/null @@ -1,43 +0,0 @@ -(ns logseq.graph-parser.util.file-name-test - (:require [logseq.common.util :as common-util] - [cljs.test :refer [is deftest]])) - -;; This is a copy of frontend.util.fs/multiplatform-reserved-chars for reserved chars testing -(def multiplatform-reserved-chars ":\\*\\?\"<>|\\#\\\\") - -;; Stuffs should be parsable (don't crash) when users dump some random files -(deftest page-name-parsing-tests - (is (string? (#'common-util/tri-lb-title-parsing "___-_-_-_---___----"))) - (is (string? (#'common-util/tri-lb-title-parsing "_____///____---___----"))) - (is (string? (#'common-util/tri-lb-title-parsing "/_/////---/_----"))) - (is (string? (#'common-util/tri-lb-title-parsing "/\\#*%lasdf\\//__--dsll_____----....-._0x2B"))) - (is (string? (#'common-util/tri-lb-title-parsing "/\\#*%l;;&&;&\\//__--dsll_____----....-._0x2B"))) - (is (string? (#'common-util/tri-lb-title-parsing multiplatform-reserved-chars))) - (is (string? (#'common-util/tri-lb-title-parsing "dsa&;l dsalfjk jkl")))) - -(deftest uri-decoding-tests - (is (= (common-util/safe-url-decode "%*-sd%%%saf%=lks") "%*-sd%%%saf%=lks")) ;; Contains %, but invalid - (is (= (common-util/safe-url-decode "%2FDownloads%2FCNN%3AIs%5CAll%3AYou%20Need.pdf") "/Downloads/CNN:Is\\All:You Need.pdf")) - (is (= (common-util/safe-url-decode "asldkflksdaf啦放假啦睡觉啦啊啥的都撒娇浪费;dla") "asldkflksdaf啦放假啦睡觉啦啊啥的都撒娇浪费;dla"))) - -(deftest page-name-sanitization-backward-tests - (is (= "abc.def.ghi.jkl" (#'common-util/tri-lb-title-parsing "abc.def.ghi.jkl"))) - (is (= "abc/def/ghi/jkl" (#'common-util/tri-lb-title-parsing "abc%2Fdef%2Fghi%2Fjkl"))) - (is (= "abc%/def/ghi/jkl" (#'common-util/tri-lb-title-parsing "abc%25%2Fdef%2Fghi%2Fjkl"))) - (is (= "abc%2——ef/ghi/jkl" (#'common-util/tri-lb-title-parsing "abc%2——ef%2Fghi%2Fjkl"))) - (is (= "abc&2Fghi/jkl" (#'common-util/tri-lb-title-parsing "abc&2Fghi%2Fjkl"))) - (is (= "abc<2Fghi/jkl" (#'common-util/tri-lb-title-parsing "abc<2Fghi%2Fjkl"))) - (is (= "abc%2Fghi/jkl" (#'common-util/tri-lb-title-parsing "abc%2Fghi%2Fjkl"))) - (is (= "abc;&;2Fghi/jkl" (#'common-util/tri-lb-title-parsing "abc;&;2Fghi%2Fjkl"))) - ;; happens when importing some compatible files on *nix / macOS - (is (= multiplatform-reserved-chars (#'common-util/tri-lb-title-parsing multiplatform-reserved-chars)))) - -(deftest path-utils-tests - (is (= "asldk lakls " (common-util/path->file-body "/data/app/asldk lakls .lsad"))) - (is (= "asldk lakls " (common-util/path->file-body "asldk lakls .lsad"))) - (is (= "asldk lakls" (common-util/path->file-body "asldk lakls"))) - (is (= "asldk lakls" (common-util/path->file-body "/data/app/asldk lakls"))) - (is (= "asldk lakls" (common-util/path->file-body "file://data/app/asldk lakls.as"))) - (is (= "中文asldk lakls" (common-util/path->file-body "file://中文data/app/中文asldk lakls.as"))) - (is (= "lsad" (common-util/path->file-ext "asldk lakls .lsad"))) - (is (= "lsad" (common-util/path->file-ext "中文asldk lakls .lsad")))) diff --git a/src/test/frontend/db/name_sanity_test.cljs b/src/test/frontend/db/name_sanity_test.cljs index e81171c6d0..7d34763fae 100644 --- a/src/test/frontend/db/name_sanity_test.cljs +++ b/src/test/frontend/db/name_sanity_test.cljs @@ -1,7 +1,7 @@ (ns frontend.db.name-sanity-test (:require [cljs.test :refer [deftest testing is]] [clojure.string :as string] - [logseq.common.util :as common-util] + [logseq.graph-parser.extract :as extract] [frontend.worker.handler.page.rename :as worker-page-rename] [frontend.util.fs :as fs-util] [frontend.worker.file.util :as wfu])) @@ -11,7 +11,7 @@ [page-name] (testing (str "Test sanitization page-name: " page-name) (let [file-name (#'wfu/tri-lb-file-name-sanity page-name) - page-name' (#'common-util/tri-lb-title-parsing file-name) + page-name' (#'extract/tri-lb-title-parsing file-name) url-single (js/encodeURIComponent file-name) url-double (js/encodeURIComponent url-single) file-name' (js/decodeURIComponent url-single)