Files
logseq/deps/graph-parser/test/logseq/graph_parser/extract_test.cljs
Gabriel Horner 538d10a20a fix: rm whiteboard and tldraw from graph-parser
Also disable test block which is causing an intermittent ref issue
to investigate later
2026-01-22 13:19:58 -05:00

131 lines
5.1 KiB
Clojure
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
(ns logseq.graph-parser.extract-test
(:require [cljs.test :refer [deftest is are]]
[datascript.core :as d]
[logseq.graph-parser.extract :as extract]))
;; This is a copy of frontend.components.repo/multiplatform-reserved-chars for reserved chars testing
(def multiplatform-reserved-chars ":\\*\\?\"<>|\\#\\\\")
;; Stuffs should be parsable (don't crash) when users dump some random files
(deftest page-name-parsing-tests
(is (string? (#'extract/tri-lb-title-parsing "___-_-_-_---___----")))
(is (string? (#'extract/tri-lb-title-parsing "_____///____---___----")))
(is (string? (#'extract/tri-lb-title-parsing "/_/////---/_----")))
(is (string? (#'extract/tri-lb-title-parsing "/\\#*%lasdf\\//__--dsll_____----....-._0x2B")))
(is (string? (#'extract/tri-lb-title-parsing "/\\#*%l;;&&;&\\//__--dsll_____----....-._0x2B")))
(is (string? (#'extract/tri-lb-title-parsing multiplatform-reserved-chars)))
(is (string? (#'extract/tri-lb-title-parsing "dsa&amp&semi;l dsalfjk jkl"))))
(deftest uri-decoding-tests
(is (= (#'extract/safe-url-decode "%*-sd%%%saf%=lks") "%*-sd%%%saf%=lks")) ;; Contains %, but invalid
(is (= (#'extract/safe-url-decode "%2FDownloads%2FCNN%3AIs%5CAll%3AYou%20Need.pdf") "/Downloads/CNN:Is\\All:You Need.pdf"))
(is (= (#'extract/safe-url-decode "asldkflksdaf啦放假啦睡觉啦啊啥的都撒娇浪费dla") "asldkflksdaf啦放假啦睡觉啦啊啥的都撒娇浪费dla")))
(deftest page-name-sanitization-backward-tests
(is (= "abc.def.ghi.jkl" (#'extract/tri-lb-title-parsing "abc.def.ghi.jkl")))
(is (= "abc/def/ghi/jkl" (#'extract/tri-lb-title-parsing "abc%2Fdef%2Fghi%2Fjkl")))
(is (= "abc%/def/ghi/jkl" (#'extract/tri-lb-title-parsing "abc%25%2Fdef%2Fghi%2Fjkl")))
(is (= "abc%2——ef/ghi/jkl" (#'extract/tri-lb-title-parsing "abc%2——ef%2Fghi%2Fjkl")))
(is (= "abc&amp;2Fghi/jkl" (#'extract/tri-lb-title-parsing "abc&amp;2Fghi%2Fjkl")))
(is (= "abc&lt;2Fghi/jkl" (#'extract/tri-lb-title-parsing "abc&lt;2Fghi%2Fjkl")))
(is (= "abc&percnt;2Fghi/jkl" (#'extract/tri-lb-title-parsing "abc&percnt;2Fghi%2Fjkl")))
(is (= "abc&semi;&;2Fghi/jkl" (#'extract/tri-lb-title-parsing "abc&semi;&;2Fghi%2Fjkl")))
;; happens when importing some compatible files on *nix / macOS
(is (= multiplatform-reserved-chars (#'extract/tri-lb-title-parsing multiplatform-reserved-chars))))
(deftest path-utils-tests
(is (= "asldk lakls " (#'extract/path->file-body "/data/app/asldk lakls .lsad")))
(is (= "asldk lakls " (#'extract/path->file-body "asldk lakls .lsad")))
(is (= "asldk lakls" (#'extract/path->file-body "asldk lakls")))
(is (= "asldk lakls" (#'extract/path->file-body "/data/app/asldk lakls")))
(is (= "asldk lakls" (#'extract/path->file-body "file://data/app/asldk lakls.as")))
(is (= "中文asldk lakls" (#'extract/path->file-body "file://中文data/app/中文asldk lakls.as"))))
;; Bare minimum schema to test extract
(def file-schema
{:block/uuid {:db/unique :db.unique/identity}
:block/name {:db/unique :db.unique/identity}})
(defn- extract [file content & [options]]
(extract/extract file
content
(merge {:block-pattern "-" :db (d/empty-db file-schema)
:verbose false}
options)))
(defn- extract-block-content
[text]
(let [{:keys [blocks]} (extract "a.md" text)]
(mapv :block/title blocks)))
(defn- extract-title [file text]
(-> (extract file text) :pages first :block/properties :title))
(deftest extract-blocks-for-headings
(is (= ["a" "b" "c"]
(extract-block-content
"- a
- b
- c")))
(is (= ["## hello" "world" "nice" "nice" "bingo" "world"]
(extract-block-content "## hello
- world
- nice
- nice
- bingo
- world")))
(is (= ["# a" "## b" "### c" "#### d" "### e" "f" "g" "h" "i" "j"]
(extract-block-content "# a
## b
### c
#### d
### e
- f
- g
- h
- i
- j"))))
(deftest parse-page-title
(is (= nil
(extract-title "foo.org" "")))
(is (= "Howdy"
(extract-title "foo.org" "#+title: Howdy")))
(is (= "Howdy"
(extract-title "foo.org" "#+TITLE: Howdy")))
(is (= "Howdy"
(extract-title "foo.org" "#+TiTlE: Howdy")))
(is (= "diagram/abcdef"
(extract-title "foo.org" ":PROPERTIES:
:ID: 72289d9a-eb2f-427b-ad97-b605a4b8c59b
:END:
#+TITLE: diagram/abcdef")))
(is (= "diagram/abcdef"
(extract-title "foo.org" ":PROPERTIES:
:ID: 72289d9a-eb2f-427b-ad97-b605a4b8c59b
:END:
#+title: diagram/abcdef"))))
(deftest extract-blocks-with-property-pages-config
(are [extract-args expected-refs]
(= expected-refs
(->> (apply extract extract-args)
:blocks
(mapcat #(->> % :block/refs (map :block/name)))
set))
["a.md" "foo:: #bar\nbaz:: #bing" {:user-config {:property-pages/enabled? true}}]
#{"bar" "bing" "foo" "baz"}
["a.md" "foo:: #bar\nbaz:: #bing" {:user-config {:property-pages/enabled? false}}]
#{"bar" "bing"}))
(deftest test-regression-1902
(is (= ["line1" "line2" "line3" "line4"]
(extract-block-content
"- line1
- line2
- line3
- line4"))))