From 42c1785bb87e368ae3c7c20cbc612ea929aade41 Mon Sep 17 00:00:00 2001 From: Gabriel Horner Date: Tue, 6 Feb 2024 12:51:02 -0500 Subject: [PATCH] enhance: add script to build large graphs with configurable sizes --- scripts/README.md | 19 +++++ .../create_graph_with_large_sizes.cljs | 84 +++++++++++++++++++ 2 files changed, 103 insertions(+) create mode 100644 scripts/src/logseq/tasks/db_graph/create_graph_with_large_sizes.cljs diff --git a/scripts/README.md b/scripts/README.md index 47788e4d17..b7221a58d5 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -31,6 +31,25 @@ properties. Read the docs in [logseq.tasks.db-graph.create-graph](src/logseq/tasks/db_graph/create_graph.cljs) for specifics on the EDN map. +To create large graphs with varying size: + +``` +$ yarn -s nbb-logseq src/logseq/tasks/db_graph/create_graph_with_large_sizes.cljs large +Building tx ... +Built 21000 tx, 1000 pages and 20000 blocks ... +Transacting chunk 1 of 21 starting with block: #:block{:name "page-0"} +... +Created graph large with 187810 datoms! + +# To see options available +$ yarn -s nbb-logseq src/logseq/tasks/db_graph/create_graph_with_large_sizes.cljs -h +Usage: $0 GRAPH-NAME [OPTIONS] +Options: + -h, --help Print help + -p, --pages 1000 Number of pages to create + -b, --blocks 20 Number of blocks to create +``` + Another example is the `create_graph_with_schema_org.cljs` script which creates a graph with the https://schema.org/ ontology with as many of the classes and properties as possible: diff --git a/scripts/src/logseq/tasks/db_graph/create_graph_with_large_sizes.cljs b/scripts/src/logseq/tasks/db_graph/create_graph_with_large_sizes.cljs new file mode 100644 index 0000000000..fc425c2fc3 --- /dev/null +++ b/scripts/src/logseq/tasks/db_graph/create_graph_with_large_sizes.cljs @@ -0,0 +1,84 @@ +(ns logseq.tasks.db-graph.create-graph-with-large-sizes + "Script that generates graphs at large sizes" + (:require [logseq.tasks.db-graph.create-graph :as create-graph] + [clojure.string :as string] + [datascript.core :as d] + [babashka.cli :as cli] + ["path" :as node-path] + ["os" :as os] + [nbb.core :as nbb])) + +(def *ids (atom #{})) +(defn get-next-id + [] + (let [id (random-uuid)] + (if (@*ids id) + (get-next-id) + (do + (swap! *ids conj id) + id)))) + +(defn build-pages + [start-idx n] + (let [ids (repeatedly n get-next-id)] + (map-indexed + (fn [idx id] + {:block/uuid id + :block/name (str "page-" (+ start-idx idx))}) + ids))) + +(defn build-blocks + [size] + (vec (repeatedly size + (fn [] + (let [id (get-next-id)] + {:block/uuid id + :block/content (str id)}))))) + +(defn- create-init-data + [options] + (let [pages (build-pages 0 (:pages options))] + {:pages-and-blocks + (mapv #(hash-map :page % :blocks (build-blocks (:blocks options))) + pages)})) + +(def spec + "Options spec" + {:help {:alias :h + :desc "Print help"} + :pages {:alias :p + :default 1000 + :desc "Number of pages to create"} + :blocks {:alias :b + :default 20 + :desc "Number of blocks to create"}}) + +(defn -main [args] + (let [graph-dir (first args) + options (cli/parse-opts args {:spec spec}) + _ (when (or (nil? graph-dir) (:help options)) + (println (str "Usage: $0 GRAPH-NAME [OPTIONS]\nOptions:\n" + (cli/format-opts {:spec spec}))) + (js/process.exit 1)) + [dir db-name] (if (string/includes? graph-dir "/") + ((juxt node-path/dirname node-path/basename) graph-dir) + [(node-path/join (os/homedir) "logseq" "graphs") graph-dir]) + conn (create-graph/init-conn dir db-name) + _ (println "Building tx ...") + blocks-tx (create-graph/create-blocks-tx (create-init-data options))] + (println "Built" (count blocks-tx) "tx," (count (filter :block/name blocks-tx)) "pages and" + (count (filter :block/content blocks-tx)) "blocks ...") + ;; Vary the chunking with page size for now + (let [tx-chunks (partition-all (:pages options) blocks-tx)] + (loop [chunks tx-chunks + chunk-num 1] + (when-let [chunk (first chunks)] + (println "Transacting chunk" chunk-num "of" (count tx-chunks) + "starting with block:" (pr-str (select-keys (first chunk) [:block/content :block/name]))) + (d/transact! conn chunk) + (recur (rest chunks) (inc chunk-num))))) + #_(d/transact! conn blocks-tx) + (println "Created graph" (str db-name " with " (count (d/datoms @conn :eavt)) " datoms!")))) + +(when (= nbb/*file* (:file (meta #'-main))) + (-main *command-line-args*)) \ No newline at end of file