From 69ba15252ee47378446f317be95f35e48d1cb322 Mon Sep 17 00:00:00 2001 From: Gabriel Horner Date: Fri, 17 Jun 2022 12:09:21 -0400 Subject: [PATCH] cli/parse-graph returns ast data as well This data was not returned and is valuable for CI jobs and possibly other functionality --- deps/graph-parser/.clj-kondo/config.edn | 3 ++- .../graph-parser/src/logseq/graph_parser.cljs | 16 +++++++----- .../src/logseq/graph_parser/cli.cljs | 17 ++++++++----- .../src/logseq/graph_parser/extract.cljc | 18 +++++++------ .../test/logseq/graph_parser/cli_test.cljs | 22 ++++++++++++---- .../logseq/graph_parser/extract_test.cljs | 11 ++++---- src/main/frontend/handler/file.cljs | 25 ++++++++++--------- 7 files changed, 68 insertions(+), 44 deletions(-) diff --git a/deps/graph-parser/.clj-kondo/config.edn b/deps/graph-parser/.clj-kondo/config.edn index b90675ee75..5621d41915 100644 --- a/deps/graph-parser/.clj-kondo/config.edn +++ b/deps/graph-parser/.clj-kondo/config.edn @@ -1,6 +1,7 @@ {:linters {:consistent-alias - {:aliases {datascript.core d + {:aliases {clojure.string string + datascript.core d logseq.graph-parser graph-parser logseq.graph-parser.text text logseq.graph-parser.block gp-block diff --git a/deps/graph-parser/src/logseq/graph_parser.cljs b/deps/graph-parser/src/logseq/graph_parser.cljs index 994b1b5e91..fc71717d6b 100644 --- a/deps/graph-parser/src/logseq/graph_parser.cljs +++ b/deps/graph-parser/src/logseq/graph_parser.cljs @@ -24,14 +24,15 @@ (db-set-file-content! conn file content) (let [format (gp-util/get-format file) file-content [{:file/path file}] - tx (if (contains? gp-config/mldoc-support-formats format) + {:keys [tx ast]} + (if (contains? gp-config/mldoc-support-formats format) (let [extract-options' (merge {:block-pattern (gp-config/get-block-pattern format) :date-formatter "MMM do, yyyy" :supported-formats (gp-config/supported-formats)} extract-options {:db @conn}) - [pages blocks] - (extract/extract-blocks-pages file content extract-options') + {:keys [pages blocks ast]} + (extract/extract file content extract-options') delete-blocks (delete-blocks-fn (first pages) file) block-ids (map (fn [block] {:block/uuid (:block/uuid block)}) blocks) block-refs-ids (->> (mapcat :block/refs blocks) @@ -44,13 +45,16 @@ pages (extract/with-ref-pages pages blocks) pages-index (map #(select-keys % [:block/name]) pages)] ;; does order matter? - (concat file-content pages-index delete-blocks pages block-ids blocks)) - file-content) + {:tx (concat file-content pages-index delete-blocks pages block-ids blocks) + :ast ast}) + {:tx file-content}) tx (concat tx [(cond-> {:file/path file} new? ;; TODO: use file system timestamp? (assoc :file/created-at (date-time-util/time-ms)))])] - (d/transact! conn (gp-util/remove-nils tx) (select-keys options [:new-graph? :from-disk?])))) + {:tx + (d/transact! conn (gp-util/remove-nils tx) (select-keys options [:new-graph? :from-disk?])) + :ast ast})) (defn filter-files "Filters files in preparation for parsing. Only includes files that are diff --git a/deps/graph-parser/src/logseq/graph_parser/cli.cljs b/deps/graph-parser/src/logseq/graph_parser/cli.cljs index 281f8a0ad9..f2961994a9 100644 --- a/deps/graph-parser/src/logseq/graph_parser/cli.cljs +++ b/deps/graph-parser/src/logseq/graph_parser/cli.cljs @@ -46,8 +46,12 @@ TODO: Fail fast when process exits 1" [conn files {:keys [config] :as options}] (let [extract-options (merge {:date-formatter (gp-config/get-date-formatter config)} (select-keys options [:verbose]))] - (doseq [{:file/keys [path content]} files] - (graph-parser/parse-file conn path content {:extract-options extract-options})))) + (mapv + (fn [{:file/keys [path content]}] + (let [{:keys [ast]} + (graph-parser/parse-file conn path content {:extract-options extract-options})] + {:file path :ast ast})) + files))) (defn parse-graph "Parses a given graph directory and returns a datascript connection and all @@ -61,8 +65,9 @@ TODO: Fail fast when process exits 1" ([dir options] (let [files (or (:files options) (build-graph-files dir)) conn (ldb/start-conn) - config (read-config dir)] - (when-not (:files options) (println "Parsing" (count files) "files...")) - (parse-files conn files (merge options {:config config})) + config (read-config dir) + _ (when-not (:files options) (println "Parsing" (count files) "files...")) + asts (parse-files conn files (merge options {:config config}))] {:conn conn - :files (map :file/path files)}))) + :files (map :file/path files) + :asts asts}))) diff --git a/deps/graph-parser/src/logseq/graph_parser/extract.cljc b/deps/graph-parser/src/logseq/graph_parser/extract.cljc index 4b46698d09..518fda5c2f 100644 --- a/deps/graph-parser/src/logseq/graph_parser/extract.cljc +++ b/deps/graph-parser/src/logseq/graph_parser/extract.cljc @@ -138,16 +138,17 @@ (catch :default e (log/error :exception e)))) -(defn extract-blocks-pages +(defn extract + "Extracts pages, blocks and ast from given file" [file content {:keys [user-config verbose] :or {verbose true} :as options}] (if (string/blank? content) [] (let [format (gp-util/get-format file) _ (when verbose (println "Parsing start: " file)) ast (gp-mldoc/->edn content (gp-mldoc/default-config format - ;; {:parse_outline_only? true} - ) - user-config)] + ;; {:parse_outline_only? true} + ) + user-config)] (when verbose (println "Parsing finished: " file)) (let [first-block (ffirst ast) properties (let [properties (and (gp-property/properties-ast? first-block) @@ -165,10 +166,11 @@ (update properties :filters (fn [v] (string/replace (or v "") "\\" ""))) - properties)))] - (extract-pages-and-blocks - format ast properties - file content options))))) + properties))) + [pages blocks] (extract-pages-and-blocks format ast properties file content options)] + {:pages pages + :blocks blocks + :ast ast})))) (defn- with-block-uuid [pages] diff --git a/deps/graph-parser/test/logseq/graph_parser/cli_test.cljs b/deps/graph-parser/test/logseq/graph_parser/cli_test.cljs index 16ee5de53b..cab27e4457 100644 --- a/deps/graph-parser/test/logseq/graph_parser/cli_test.cljs +++ b/deps/graph-parser/test/logseq/graph_parser/cli_test.cljs @@ -1,13 +1,25 @@ (ns logseq.graph-parser.cli-test - (:require [cljs.test :refer [deftest]] + (:require [cljs.test :refer [deftest is testing]] [logseq.graph-parser.cli :as gp-cli] - [logseq.graph-parser.test.docs-graph-helper :as docs-graph-helper])) + [logseq.graph-parser.test.docs-graph-helper :as docs-graph-helper] + [clojure.string :as string])) ;; Integration test that test parsing a large graph like docs (deftest ^:integration parse-graph (let [graph-dir "test/docs" _ (docs-graph-helper/clone-docs-repo-if-not-exists graph-dir) - {:keys [conn files]} (gp-cli/parse-graph graph-dir) - db @conn] + {:keys [conn files asts]} (gp-cli/parse-graph graph-dir)] - (docs-graph-helper/docs-graph-assertions db files))) + (docs-graph-helper/docs-graph-assertions @conn files) + + (testing "Asts" + (is (seq asts) "Asts returned are non-zero") + (is (= files (map :file asts)) + "There's an ast returned for every file processed") + (is (empty? (remove #(or + (seq (:ast %)) + ;; logseq files don't have ast + ;; could also used gp-config but API isn't public yet + (string/includes? (:file %) (str graph-dir "/logseq/"))) + asts)) + "Parsed files shouldn't have empty asts")))) diff --git a/deps/graph-parser/test/logseq/graph_parser/extract_test.cljs b/deps/graph-parser/test/logseq/graph_parser/extract_test.cljs index 5265df41a8..605d9ad9d3 100644 --- a/deps/graph-parser/test/logseq/graph_parser/extract_test.cljs +++ b/deps/graph-parser/test/logseq/graph_parser/extract_test.cljs @@ -5,16 +5,15 @@ (defn- extract [text] - (let [result (extract/extract-blocks-pages "a.md" text {:block-pattern "-"}) - result (last result) - lefts (map (juxt :block/parent :block/left) result)] + (let [{:keys [blocks]} (extract/extract "a.md" text {:block-pattern "-"}) + lefts (map (juxt :block/parent :block/left) blocks)] (if (not= (count lefts) (count (distinct lefts))) (do - (pprint/pprint (map (fn [x] (select-keys x [:block/uuid :block/level :block/content :block/left])) result)) + (pprint/pprint (map (fn [x] (select-keys x [:block/uuid :block/level :block/content :block/left])) blocks)) (throw (js/Error. ":block/parent && :block/left conflicts"))) - (mapv :block/content result)))) + (mapv :block/content blocks)))) -(deftest test-extract-blocks-pages +(deftest test-extract [] (is (= ["a" "b" "c"] (extract diff --git a/src/main/frontend/handler/file.cljs b/src/main/frontend/handler/file.cljs index bde3d52493..17c50b4b8a 100644 --- a/src/main/frontend/handler/file.cljs +++ b/src/main/frontend/handler/file.cljs @@ -130,18 +130,19 @@ file) file (gp-util/path-normalize file) new? (nil? (db/entity [:file/path file]))] - (graph-parser/parse-file - (db/get-db repo-url false) - file - content - (merge options - {:new? new? - :delete-blocks-fn (partial get-delete-blocks repo-url) - :extract-options {:user-config (state/get-config) - :date-formatter (state/get-date-formatter) - :page-name-order (state/page-name-order) - :block-pattern (config/get-block-pattern (gp-util/get-format file)) - :supported-formats (gp-config/supported-formats)}}))))) + (:tx + (graph-parser/parse-file + (db/get-db repo-url false) + file + content + (merge options + {:new? new? + :delete-blocks-fn (partial get-delete-blocks repo-url) + :extract-options {:user-config (state/get-config) + :date-formatter (state/get-date-formatter) + :page-name-order (state/page-name-order) + :block-pattern (config/get-block-pattern (gp-util/get-format file)) + :supported-formats (gp-config/supported-formats)}})))))) ;; TODO: Remove this function in favor of `alter-files` (defn alter-file