Merge branch 'master' into refactor/tech-stack-upgrade

This commit is contained in:
Mega Yu
2026-03-12 17:10:35 +08:00
31 changed files with 1326 additions and 172 deletions

View File

@@ -9,41 +9,80 @@
[nbb.classpath :as cp]
[nbb.core :as nbb]))
(def *ids (atom #{}))
(defn get-next-id
[]
(let [id (random-uuid)]
(if (@*ids id)
(get-next-id)
(do
(swap! *ids conj id)
id))))
(def ^:private default-block-title "Block")
(def ^:private target-entities-per-batch 25000)
(def ^:private max-pages-per-batch 1000)
(defn build-pages
[start-idx n]
(let [ids (repeatedly n get-next-id)]
(map-indexed
(fn [idx id]
{:block/uuid id
:block/title (str "Page-" (+ start-idx idx))})
ids)))
(defn- parse-long-option
[value]
(if (string? value)
(js/parseInt value 10)
value))
(defn build-blocks
[size]
(vec (repeatedly size
(fn []
(let [id (get-next-id)]
{:block/uuid id
:block/title (str id)})))))
(defn- create-init-data
(defn- normalize-options
[options]
(let [pages (build-pages 0 (:pages options))]
{:pages-and-blocks
(mapv #(hash-map :page % :blocks (build-blocks (:blocks options)))
pages)
;; Custom id fn because transaction chunks may separate blocks and pages from each other
:page-id-fn (fn [b] [:block/uuid (:block/uuid b)])}))
(update-vals options parse-long-option))
(defn default-batch-pages
[blocks-per-page]
(-> (quot target-entities-per-batch (max 1 (inc blocks-per-page)))
(max 1)
(min max-pages-per-batch)))
(defn- build-blocks
[blocks-per-page next-id]
(loop [block-idx 0
blocks (transient [])]
(if (= block-idx blocks-per-page)
(persistent! blocks)
(recur (inc block-idx)
(conj! blocks
{:block/uuid (next-id)
:block/title default-block-title})))))
(defn build-page-and-blocks-batch
([start-idx page-count blocks-per-page]
(build-page-and-blocks-batch start-idx page-count blocks-per-page random-uuid))
([start-idx page-count blocks-per-page next-id]
(loop [page-idx 0
pages-and-blocks (transient [])]
(if (= page-idx page-count)
(persistent! pages-and-blocks)
(recur (inc page-idx)
(conj! pages-and-blocks
{:page {:block/uuid (next-id)
:block/title (str "Page-" (+ start-idx page-idx))}
:blocks (build-blocks blocks-per-page next-id)}))))))
(defn page-and-block-batches
([{:keys [pages blocks batch-pages]}]
(page-and-block-batches {:pages pages
:blocks blocks
:batch-pages batch-pages}
random-uuid))
([{:keys [pages blocks batch-pages]} next-id]
(let [batch-pages' (or batch-pages (default-batch-pages blocks))]
((fn step [start-idx]
(lazy-seq
(when (< start-idx pages)
(cons (build-page-and-blocks-batch start-idx
(min batch-pages' (- pages start-idx))
blocks
next-id)
(step (+ start-idx batch-pages'))))))
0))))
(defn- transact-batch!
[conn pages-and-blocks]
(let [{:keys [init-tx block-props-tx]} (outliner-cli/build-blocks-tx {:pages-and-blocks pages-and-blocks})]
(d/transact! conn init-tx)
(when (seq block-props-tx)
(d/transact! conn block-props-tx))))
(defn- total-batches
[{:keys [pages blocks batch-pages]}]
(let [batch-pages' (or batch-pages (default-batch-pages blocks))]
(js/Math.ceil (/ pages batch-pages'))))
(def spec
"Options spec"
@@ -54,34 +93,36 @@
:desc "Number of pages to create"}
:blocks {:alias :b
:default 20
:desc "Number of blocks to create"}})
:desc "Number of blocks to create per page"}
:batch-pages {:alias :t
:desc "Number of pages to build and transact per batch"}})
(defn parse-args
[args]
{:graph-dir (first args)
:options (normalize-options (cli/parse-opts (rest args) {:spec spec}))})
(defn -main [args]
(let [graph-dir (first args)
options (cli/parse-opts args {:spec spec})
(let [{:keys [graph-dir options]} (parse-args args)
_ (when (or (nil? graph-dir) (:help options))
(println (str "Usage: $0 GRAPH-NAME [OPTIONS]\nOptions:\n"
(cli/format-opts {:spec spec})))
(js/process.exit 1))
{:keys [pages blocks batch-pages]} options
[dir db-name] (if (string/includes? graph-dir "/")
((juxt node-path/dirname node-path/basename) graph-dir)
[(node-path/join (os/homedir) "logseq" "graphs") graph-dir])
conn (outliner-cli/init-conn dir db-name {:classpath (cp/get-classpath)})
_ (println "Building tx ...")
{:keys [init-tx]} (outliner-cli/build-blocks-tx (create-init-data options))]
(println "Built" (count init-tx) "tx," (count (filter :block/title init-tx)) "pages and"
(count (filter :block/title init-tx)) "blocks ...")
;; Vary the chunking with page size up to a max to avoid OOM
(let [tx-chunks (partition-all (min (:pages options) 30000) init-tx)]
(loop [chunks tx-chunks
chunk-num 1]
(when-let [chunk (first chunks)]
(println "Transacting chunk" chunk-num "of" (count tx-chunks)
"starting with block:" (pr-str (select-keys (first chunk) [:block/title :block/title])))
(d/transact! conn chunk)
(recur (rest chunks) (inc chunk-num)))))
#_(d/transact! conn blocks-tx)
(println "Created graph" (str db-name " with " (count (d/datoms @conn :eavt)) " datoms!"))))
total-batches' (total-batches options)
pages-per-batch (or batch-pages (default-batch-pages blocks))
total-blocks (* pages blocks)]
(println "Creating graph with" pages "pages and" total-blocks "blocks"
"using" total-batches' "batch(es) of up to" pages-per-batch "pages ...")
(doseq [[batch-num pages-and-blocks] (map-indexed vector (page-and-block-batches options))]
(println "Transacting batch" (inc batch-num) "of" total-batches'
"with" (count pages-and-blocks) "pages")
(transact-batch! conn pages-and-blocks))
(println "Created graph" db-name "with" pages "pages and" total-blocks "blocks.")))
(when (= nbb/*file* (nbb/invoked-file))
(-main *command-line-args*))

View File

@@ -0,0 +1,69 @@
(ns logseq.tasks.db-graph.create-graph-with-large-sizes-test
(:require [cljs.test :refer [deftest is testing]]
[logseq.tasks.db-graph.create-graph-with-large-sizes :as sut]))
(deftest build-page-and-blocks-batch-builds-the-requested-graph-slice
(let [id-seq (map #(str "id-" %) (range))
next-id (let [ids (atom id-seq)]
(fn []
(let [id (first @ids)]
(swap! ids rest)
id)))
batch (#'sut/build-page-and-blocks-batch 10 2 3 next-id)]
(is (= 2 (count batch)))
(is (= ["Page-10" "Page-11"]
(map (comp :block/title :page) batch)))
(is (= ["id-0" "id-4"]
(map (comp :block/uuid :page) batch)))
(is (= [["Block" "Block" "Block"]
["Block" "Block" "Block"]]
(map (fn [{:keys [blocks]}]
(mapv :block/title blocks))
batch)))
(is (= [["id-1" "id-2" "id-3"]
["id-5" "id-6" "id-7"]]
(map (fn [{:keys [blocks]}]
(mapv :block/uuid blocks))
batch)))))
(deftest page-and-block-batches-only-realize-requested-batches
(let [calls (atom 0)
next-id (fn []
(swap! calls inc)
(str "id-" @calls))
batches (#'sut/page-and-block-batches {:pages 50000
:blocks 50
:batch-pages 100}
next-id)
first-batch (first batches)]
(is (= 100 (count first-batch)))
(is (= (* 100 51) @calls)
"Only the first batch should be realized")
(is (= "Page-0" (get-in first-batch [0 :page :block/title])))
(is (= "Page-99" (get-in first-batch [99 :page :block/title])))))
(deftest default-batching-keeps-large-graphs-bounded
(testing "50k pages with 50 blocks are split into many batches instead of one giant tx"
(let [batch-pages (#'sut/default-batch-pages 50)]
(is (< batch-pages 50000))
(is (pos? batch-pages))
(is (= batch-pages
(count (first (#'sut/page-and-block-batches {:pages 50000
:blocks 50}
(constantly "id")))))))))
(deftest page-and-block-batches-handle-empty-input
(is (= []
(into [] (#'sut/page-and-block-batches {:pages 0
:blocks 50}
(constantly "id"))))))
(deftest parse-args-keeps-the-graph-name-separate-from-cli-options
(let [{:keys [graph-dir options]} (sut/parse-args ["large-graph"
"-p" "3"
"-b" "2"
"-t" "1"])]
(is (= "large-graph" graph-dir))
(is (= 3 (:pages options)))
(is (= 2 (:blocks options)))
(is (= 1 (:batch-pages options)))))

View File

@@ -0,0 +1,8 @@
(ns logseq.tasks.test-runner
(:require [cljs.test :as test]
[logseq.tasks.db-graph.create-graph-with-large-sizes-test]))
(defn -main [& _]
(let [{:keys [fail error]} (test/run-tests 'logseq.tasks.db-graph.create-graph-with-large-sizes-test)]
(when (pos? (+ fail error))
(js/process.exit 1))))