Fix issues related to importing file to db (#12353)

* fix multiline block containing attributes will be truncated and lose data after importing

* fix the issues with block attribute name contains "/" after importing

* standardize eol, all files except .bat files use lf

* compatible with windows path

* fix multiple line importing issues
This commit is contained in:
megayu
2026-01-30 17:36:12 +08:00
committed by GitHub
parent 220a0549fc
commit 60297778a8
7 changed files with 143 additions and 49 deletions

3
.gitattributes vendored Normal file
View File

@@ -0,0 +1,3 @@
* text=auto eol=lf
*.bat text eol=crlf

View File

@@ -226,6 +226,7 @@
(subs (str k) 1)
k)
k (-> (string/lower-case k)
(string/replace "/" "-")
(string/replace " " "-")
(string/replace "_" "-"))]
(if (gp-property/valid-property-name? (str ":" k))
@@ -644,7 +645,7 @@
properties))
(defn- construct-block
[block properties* timestamps body encoded-content format pos-meta {:keys [block-pattern db date-formatter remove-properties? db-graph-mode? export-to-db-graph?]}]
[block properties* timestamps body encoded-content format pos-meta {:keys [block-pattern db date-formatter remove-properties? remove-logbook? remove-deadline-scheduled? db-graph-mode? export-to-db-graph?]}]
(let [id (get-custom-id-or-new-id properties*)
block-tags (and export-to-db-graph? (get-in properties* [:properties :tags]))
;; For export, remove tags from properties as they are being converted to classes
@@ -686,7 +687,11 @@
block)
title (cond->> (get-block-content encoded-content block format pos-meta block-pattern)
remove-properties?
(gp-property/remove-properties (get block :format :markdown)))
(gp-property/remove-properties (get block :format :markdown))
remove-logbook?
(gp-property/remove-logbook)
remove-deadline-scheduled?
(gp-property/remove-deadline-scheduled))
block (assoc block :block/title title)
block (if (seq timestamps)
(merge block (timestamps->scheduled-and-deadline timestamps))
@@ -756,28 +761,23 @@
block-idx 0
timestamps {}
properties {}
body []]
body []
prev-block-num 0]
(if (seq ast-blocks)
(let [[ast-block pos-meta] (first ast-blocks)]
(cond
(paragraph-timestamp-block? ast-block)
(let [timestamps (extract-timestamps ast-block)
timestamps' (merge timestamps timestamps)]
(recur headings (rest ast-blocks) (inc block-idx) timestamps' properties body))
(let [ts (extract-timestamps ast-block)
timestamps' (merge timestamps ts)]
(recur headings (rest ast-blocks) (inc block-idx) timestamps' properties body (inc prev-block-num)))
(gp-property/properties-ast? ast-block)
(let [properties (extract-properties (second ast-block) (assoc user-config :format format))]
(recur headings (rest ast-blocks) (inc block-idx) timestamps properties body))
(recur headings (rest ast-blocks) (inc block-idx) timestamps properties body (inc prev-block-num)))
(heading-block? ast-block)
;; for db-graphs cut multi-line when there is property, deadline/scheduled or logbook text in :block/title
(let [cut-multiline? (and export-to-db-graph?
(when-let [prev-block (first (get all-blocks (dec block-idx)))]
(or (and (gp-property/properties-ast? prev-block)
(not= "Custom" (ffirst (get all-blocks (- block-idx 2)))))
(= ["Drawer" "logbook"] (take 2 prev-block))
(and (= "Paragraph" (first prev-block))
(seq (set/intersection (set (flatten prev-block)) #{"Deadline" "Scheduled"}))))))
(let [cut-multiline? (and export-to-db-graph? (= prev-block-num 0))
prev-blocks (map first (subvec all-blocks (max 0 (- block-idx prev-block-num)) block-idx))
pos-meta' (if cut-multiline?
pos-meta
;; fix start_pos
@@ -785,12 +785,14 @@
(if (seq headings)
(get-in (last headings) [:meta :start_pos])
nil)))
;; Remove properties text from custom queries in db graphs
;; Remove properties, deadline/scheduled and logbook text from title in db graphs
options' (assoc options
:remove-properties?
(and export-to-db-graph?
(and (gp-property/properties-ast? (first (get all-blocks (dec block-idx))))
(= "Custom" (ffirst (get all-blocks (- block-idx 2)))))))
(and export-to-db-graph? (some gp-property/properties-ast? prev-blocks))
:remove-logbook?
(and export-to-db-graph? (some #(= ["Drawer" "logbook"] (take 2 %)) prev-blocks))
:remove-deadline-scheduled?
(and export-to-db-graph? (some #(seq (set/intersection (set (flatten %)) #{"Deadline" "Scheduled"})) prev-blocks)))
block' (construct-block ast-block properties timestamps body encoded-content format pos-meta' options')
block'' (cond
db-graph-mode?
@@ -799,11 +801,10 @@
(assoc block' :block.temp/ast-blocks (cons ast-block body))
:else
(assoc block' :macros (extract-macros-from-ast (cons ast-block body))))]
(recur (conj headings block'') (rest ast-blocks) (inc block-idx) {} {} []))
(recur (conj headings block'') (rest ast-blocks) (inc block-idx) {} {} [] 0))
:else
(recur headings (rest ast-blocks) (inc block-idx) timestamps properties (conj body ast-block))))
(recur headings (rest ast-blocks) (inc block-idx) timestamps properties (conj body ast-block) (inc prev-block-num))))
[(-> (reverse headings)
sanity-blocks-data)
body

View File

@@ -1229,9 +1229,10 @@
(defn- build-pdf-annotations-tx
"Builds tx for pdf annotations when a pdf has an annotations EDN file under assets/"
[parent-asset-path assets parent-asset pdf-annotation-pages opts]
(let [asset-edn-path (node-path/join common-config/local-assets-dir
(safe-sanitize-file-name
(node-path/basename (string/replace-first parent-asset-path #"(?i)\.pdf$" ".edn"))))
(let [asset-edn-path (path/path-normalize
(node-path/join common-config/local-assets-dir
(safe-sanitize-file-name
(node-path/basename (string/replace-first parent-asset-path #"(?i)\.pdf$" ".edn")))))
asset-md-name (str "hls__" (safe-sanitize-file-name
(node-path/basename (string/replace-first parent-asset-path #"(?i)\.pdf$" ".md"))))]
(when-let [asset-edn-map (get @assets asset-edn-path)]
@@ -2168,8 +2169,10 @@
(-> (select-keys options [:notify-user :default-config :<save-config-file])
(set/rename-keys {:<save-config-file :<save-file})))]
(let [files (common-config/remove-hidden-files *files config rpath-key)
logseq-file? #(string/starts-with? (get % rpath-key) "logseq/")
asset-file? #(string/starts-with? (get % rpath-key) "assets/")
normalized-rpath (fn [f]
(some-> (get f rpath-key) path/path-normalize))
logseq-file? #(string/starts-with? (normalized-rpath %) "logseq/")
asset-file? #(string/starts-with? (normalized-rpath %) "assets/")
doc-files (->> files
(remove #(or (logseq-file? %) (asset-file? %)))
(filter #(contains? #{"md" "org" "markdown" "edn"} (path/file-ext (:path %)))))

View File

@@ -176,3 +176,46 @@
:else
content))
(defn remove-logbook
[content]
(when (string? content)
(let [lines (string/split-lines content)
[result _in-logbook?]
(reduce (fn [[acc in-logbook?] line]
(let [trimmed (string/trim line)
upper (string/upper-case trimmed)]
(cond
(string/starts-with? upper ":LOGBOOK:")
[acc true]
(and in-logbook? (string/starts-with? upper ":END:"))
[acc false]
in-logbook?
[acc true]
:else
[(conj acc line) in-logbook?])))
[[] false]
lines)]
(string/join "\n" result))))
(defn remove-deadline-scheduled
[content]
(when (string? content)
(let [lines (string/split-lines content)]
(if (= 1 (count lines))
content
(let [first-line (first lines)
rest-lines (rest lines)
rest-lines (keep (fn [line]
(let [upper (string/upper-case (string/triml line))]
(if (or (string/starts-with? upper "DEADLINE: ")
(string/starts-with? upper "SCHEDULED: "))
(let [cleaned (-> line (string/replace #"(?i)(?:^|\s)(DEADLINE|SCHEDULED):\s+<[^>]*>" "") string/trim)]
(when-not (string/blank? cleaned)
cleaned))
line)))
rest-lines)]
(string/join "\n" (cons first-line rest-lines)))))))

View File

@@ -213,7 +213,7 @@
(is (= 32 (count (d/q '[:find ?b :where [?b :block/tags :logseq.class/Journal]] @conn))))
(is (= 5 (count (d/q '[:find ?b :where [?b :block/tags :logseq.class/Asset]] @conn))))
(is (= 4 (count (d/q '[:find ?b :where [?b :block/tags :logseq.class/Task]] @conn))))
(is (= 5 (count (d/q '[:find ?b :where [?b :block/tags :logseq.class/Task]] @conn))))
(is (= 4 (count (d/q '[:find ?b :where [?b :block/tags :logseq.class/Query]] @conn))))
(is (= 2 (count (d/q '[:find ?b :where [?b :block/tags :logseq.class/Card]] @conn))))
(is (= 5 (count (d/q '[:find ?b :where [?b :block/tags :logseq.class/Quote-block]] @conn))))
@@ -589,8 +589,36 @@
(testing "multiline blocks"
(is (= "|markdown| table|\n|some|thing|" (:block/title (db-test/find-block-by-content @conn #"markdown.*table"))))
(is (= "multiline block\na 2nd\nand a 3rd" (:block/title (db-test/find-block-by-content @conn #"multiline block"))))
(is (= "logbook block" (:block/title (db-test/find-block-by-content @conn #"logbook block")))))
(is (= "normal multiline block\na 2nd\nand a 3rd" (:block/title (db-test/find-block-by-content @conn #"normal multiline block"))))
(is (= "colored multiline block\nlast line" (:block/title (db-test/find-block-by-content @conn #"colored multiline block"))))
(let [block (db-test/find-block-by-content @conn #"multiline block with prop and deadline")]
(is (= "multiline block with prop and deadline\nlast line" (:block/title block)))
(is (= 20221126
(-> (db-test/readable-properties block)
:logseq.property/deadline
date-time-util/ms->journal-day))
"multiline block has correct journal as property value")
(is (= "red"
(-> (db-test/readable-properties block)
:logseq.property/background-color))
"multiline block has correct background color as property value"))
(let [block (db-test/find-block-by-content @conn #"multiline block with deadline and scheduled in 1 line and sth else")]
(is (= "multiline block with deadline and scheduled in 1 line and sth else\nsomething else\nlast line" (:block/title block)))
(is (= 20221126
(-> (db-test/readable-properties block)
:logseq.property/deadline
date-time-util/ms->journal-day))
"multiline block with deadline and scheduled has correct deadline journal as property value")
(is (= 20221126
(-> (db-test/readable-properties block)
:logseq.property/scheduled
date-time-util/ms->journal-day))
"multiline block with deadline and scheduled has correct scheduled journal as property value"))
(is (= "logbook block" (:block/title (db-test/find-block-by-content @conn #"^logbook block"))))
(is (= "multiline logbook block\nlast line" (:block/title (db-test/find-block-by-content @conn #"multiline logbook block")))))
(testing ":block/refs"
(let [page (db-test/find-page-by-title @conn "chat-gpt")]
@@ -623,7 +651,7 @@
count))
"Correct number of user classes")
(is (= 4 (count (d/q '[:find ?b :where [?b :block/tags :logseq.class/Task]] @conn))))
(is (= 5 (count (d/q '[:find ?b :where [?b :block/tags :logseq.class/Task]] @conn))))
(is (= 4 (count (d/q '[:find ?b :where [?b :block/tags :logseq.class/Query]] @conn))))
(is (= 2 (count (d/q '[:find ?b :where [?b :block/tags :logseq.class/Card]] @conn))))

View File

@@ -5,9 +5,25 @@
|some|thing|
- block with props
prop-num:: 10
- multiline block
- normal multiline block
a 2nd
and a 3rd
- colored multiline block
background-color:: red
last line
- multiline block with prop and deadline
background-color:: red
DEADLINE: <2022-11-26 Sat>
last line
- multiline block with deadline and scheduled in 1 line and sth else
DEADLINE: <2022-11-26 Sat> SCHEDULED: <2022-11-26 Sat> something else
last line
- DONE multiline logbook block
:LOGBOOK:
CLOCK: [2024-08-07 Wed 11:47:50]
CLOCK: [2024-08-07 Wed 11:47:53]
:END:
last line
- DOING logbook block
:LOGBOOK:
CLOCK: [2024-08-07 Wed 11:47:50]