enhance: speed up import of doc files and make them scriptable

Refactored the largest part of the import to used promesa which made it
nbb compatible and made large imports like docs 10-25% faster.  Also
updated db-import script to use this work
This commit is contained in:
Gabriel Horner
2024-02-26 16:20:03 -05:00
parent 5d915de2e2
commit e3cc4bad36
3 changed files with 136 additions and 88 deletions

View File

@@ -3,43 +3,70 @@
developing the import feature and for engineers who want to customize
the import process"
(:require [clojure.string :as string]
[clojure.edn :as edn]
[datascript.core :as d]
["path" :as node-path]
["os" :as os]
["fs" :as fs]
["fs/promises" :as fsp]
[nbb.core :as nbb]
[babashka.cli :as cli]
[logseq.common.config :as common-config]
[logseq.graph-parser.exporter :as gp-exporter]
[logseq.tasks.db-graph.create-graph :as create-graph]))
[logseq.common.graph :as common-graph]
[logseq.common.config :as common-config]
[logseq.tasks.db-graph.create-graph :as create-graph]
[promesa.core :as p]))
(defn- setup-import-options
[db config user-options]
{:extract-options {:date-formatter (common-config/get-date-formatter config)
:user-config config
:filename-format (or (:file/name-format config) :legacy)}
:user-options user-options
:page-tags-uuid (:block/uuid (d/entity db [:block/name "pagetags"]))
:import-state (gp-exporter/new-import-state)
:macros (:macros config)})
(defn- remove-hidden-files [dir config files]
(if (seq (:hidden config))
(->> files
(map #(assoc % ::rel-path (node-path/relative dir (:rpath %))))
((fn [files] (common-config/remove-hidden-files files config ::rel-path)))
(map #(dissoc % ::rel-path)))
files))
(defn- import-file-graph-to-db [file-graph conn db-name]
;; TODO: Read in repo config
(let [import-options (setup-import-options @conn
{:file/name-format :triple-lowbar}
{:graph-name db-name})
;; TODO: Read files dir and port more from import
file file-graph
m {:file/path file
:file/content (str (fs/readFileSync file))}]
(gp-exporter/add-file-to-db-graph conn (:file/path m) (:file/content m) import-options)))
(defn- build-graph-files
"Given a graph directory, return absolute, allowed file paths and their contents in preparation
for parsing"
[dir* config]
(let [dir (node-path/resolve dir*)]
(->> (common-graph/get-files dir)
(mapv #(hash-map :rpath %))
(remove-hidden-files dir config))))
(defn- read-config
"Reads repo-specific config from logseq/config.edn"
[dir]
(let [config-file (str dir "/" common-config/app-name "/config.edn")]
(if (fs/existsSync config-file)
(-> config-file fs/readFileSync str edn/read-string)
{})))
(defn- import-file-graph-to-db [file-graph-dir conn user-options]
(let [config (read-config file-graph-dir)
import-options (gp-exporter/setup-import-options
@conn
config
user-options
{:notify-user prn})
;; TODO: Remove logseq/ filter when higher-level import fn is available
files (remove #(re-find #"logseq/" (:rpath %)) (build-graph-files file-graph-dir config))]
;; (prn :files (count files) files)
(gp-exporter/import-from-doc-files!
conn files #(p/let [s (fsp/readFile (:rpath %))] (str s)) import-options)))
(def spec
"Options spec"
{:help {:alias :h
:desc "Print help"}
:verbose {:alias :v
:desc "Verbose mode"}})
:desc "Verbose mode"}
:tag-classes {:alias :t
:coerce []
:desc "List of tags to convert to classes"}
:property-classes {:alias :p
:coerce []
:desc "List of properties whose values convert to classes"}})
(defn -main [args]
(let [[file-graph db-graph-dir] args
@@ -55,9 +82,10 @@
[(node-path/join (os/homedir) "logseq" "graphs") db-graph-dir])
file-graph' (node-path/join (or js/process.env.ORIGINAL_PWD ".") file-graph)
conn (create-graph/init-conn dir db-name)]
(import-file-graph-to-db file-graph' conn db-name)
(when (:verbose options) (println "Transacted" (count (d/datoms @conn :eavt)) "datoms"))
(println "Created graph" (str db-name "!"))))
(p/do!
(import-file-graph-to-db file-graph' conn (merge options {:graph-name db-name}))
(when (:verbose options) (println "Transacted" (count (d/datoms @conn :eavt)) "datoms"))
(println "Created graph" (str db-name "!")))))
(when (= nbb/*file* (:file (meta #'-main)))
(-main *command-line-args*))