diff --git a/bb.edn b/bb.edn index e2b4813088..54397764ff 100644 --- a/bb.edn +++ b/bb.edn @@ -207,7 +207,10 @@ logseq.tasks.lang/validate-translations file-sync:integration-tests - logseq.tasks.file-sync/integration-tests} + logseq.tasks.file-sync/integration-tests + + ai:check-common-errors + logseq.tasks.common-errors/check-common-errors} :tasks/config {:large-vars diff --git a/deps/db/src/logseq/db/frontend/property.cljs b/deps/db/src/logseq/db/frontend/property.cljs index dab0d68974..d2717c23fb 100644 --- a/deps/db/src/logseq/db/frontend/property.cljs +++ b/deps/db/src/logseq/db/frontend/property.cljs @@ -543,9 +543,6 @@ :logseq.property.history/scalar-value {:title "History scalar value" :schema {:type :any :hide? true}} - :logseq.property/created-by {:title "Node created by(deprecated)" - :schema {:type :string - :hide? true}} :logseq.property/created-by-ref {:title "Node created by" :schema {:type :entity :hide? true} diff --git a/deps/db/src/logseq/db/sqlite/export.cljs b/deps/db/src/logseq/db/sqlite/export.cljs index 8811ec5f50..0c3472db60 100644 --- a/deps/db/src/logseq/db/sqlite/export.cljs +++ b/deps/db/src/logseq/db/sqlite/export.cljs @@ -81,6 +81,8 @@ (or (block-title pvalue) (:logseq.property/value pvalue)))) +(defonce ignored-properties [:logseq.property/created-by-ref :logseq.property.embedding/hnsw-label-updated-at]) + (defn- buildable-properties "Originally copied from db-test/readable-properties. Modified so that property values are valid sqlite.build EDN" @@ -104,31 +106,30 @@ ent-properties (when (and (not (:block/closed-value-property pvalue)) (seq ent-properties*)) (buildable-properties db' ent-properties* properties-config' options'))] (build-pvalue-entity-default db ent-properties pvalue options'))))))] - (let [ignored-properties [:logseq.property/created-by-ref]] - (->> (apply dissoc ent-properties ignored-properties) - (map (fn [[k v]] - [k - ;; handle user closed value properties. built-ins have idents and shouldn't be handled here - (if (and (not (db-property/logseq-property? k)) - (or (:block/closed-value-property v) - (and (set? v) (:block/closed-value-property (first v))))) - (let [find-closed-uuid (fn [val] - (or (some #(when (= (:value %) (db-property/property-value-content val)) - (:uuid %)) - (get-in properties-config [k :build/closed-values])) - (throw (ex-info (str "No closed value found for content: " (pr-str (db-property/property-value-content val))) {:properties properties-config}))))] - (if (set? v) - (set (map #(vector :block/uuid (find-closed-uuid %)) v)) - [:block/uuid (find-closed-uuid v)])) - (cond - (de/entity? v) - (build-pvalue-entity db (d/entity db k) v properties-config options) - (and (set? v) (every? de/entity? v)) - (let [property-ent (d/entity db k)] - (set (map #(build-pvalue-entity db property-ent % properties-config options) v))) - :else - v))])) - (into {}))))) + (->> (apply dissoc ent-properties ignored-properties) + (map (fn [[k v]] + [k + ;; handle user closed value properties. built-ins have idents and shouldn't be handled here + (if (and (not (db-property/logseq-property? k)) + (or (:block/closed-value-property v) + (and (set? v) (:block/closed-value-property (first v))))) + (let [find-closed-uuid (fn [val] + (or (some #(when (= (:value %) (db-property/property-value-content val)) + (:uuid %)) + (get-in properties-config [k :build/closed-values])) + (throw (ex-info (str "No closed value found for content: " (pr-str (db-property/property-value-content val))) {:properties properties-config}))))] + (if (set? v) + (set (map #(vector :block/uuid (find-closed-uuid %)) v)) + [:block/uuid (find-closed-uuid v)])) + (cond + (de/entity? v) + (build-pvalue-entity db (d/entity db k) v properties-config options) + (and (set? v) (every? de/entity? v)) + (let [property-ent (d/entity db k)] + (set (map #(build-pvalue-entity db property-ent % properties-config options) v))) + :else + v))])) + (into {})))) (defn- build-export-properties "The caller of this fn is responsible for building :build/:property-classes unless shallow-copy?" diff --git a/deps/db/src/logseq/db/sqlite/util.cljs b/deps/db/src/logseq/db/sqlite/util.cljs index 6202f9f457..575280b161 100644 --- a/deps/db/src/logseq/db/sqlite/util.cljs +++ b/deps/db/src/logseq/db/sqlite/util.cljs @@ -133,4 +133,5 @@ (fn [db-ident] [:db/retractEntity db-ident]) [:logseq.kv/graph-uuid :logseq.kv/graph-local-tx - :logseq.kv/remote-schema-version]))) + :logseq.kv/remote-schema-version + :logseq.kv/graph-text-embedding-model-name]))) diff --git a/prompts/clojure_errors.md b/prompts/clojure_errors.md new file mode 100644 index 0000000000..9f82aa93c0 --- /dev/null +++ b/prompts/clojure_errors.md @@ -0,0 +1,3 @@ +You're Clojure expert, you're responsible to check those common errors: + +1. `empty?` should be used instead of `empty` when a boolean value is expected in an expression. diff --git a/scripts/src/logseq/tasks/common_errors.clj b/scripts/src/logseq/tasks/common_errors.clj new file mode 100644 index 0000000000..a60a04c7a8 --- /dev/null +++ b/scripts/src/logseq/tasks/common_errors.clj @@ -0,0 +1,15 @@ +(ns logseq.tasks.common-errors + "Task to use AI to detect common errors" + (:require [babashka.fs :as fs] + [babashka.process :refer [shell]] + [clojure.string :as string])) + +(defn check-common-errors + [] + (let [prompt (String. (fs/read-all-bytes "prompts/clojure_errors.md")) + diff (:out (shell {:out :string} "git diff --no-prefix -U100 -- '*.cljs'"))] + (when-not (string/blank? diff) + (let [command (format "gh models run openai/gpt-5 \"%s\"" + (str prompt + (format "\n\n %s" diff)))] + (shell command))))) diff --git a/src/dev-cljs/shadow/user.clj b/src/dev-cljs/shadow/user.clj index d2c93776c3..8faba9c9b4 100644 --- a/src/dev-cljs/shadow/user.clj +++ b/src/dev-cljs/shadow/user.clj @@ -19,11 +19,7 @@ ;; Get the runtime id from http://localhost:9630/runtimes, pick the one which shows `browser-worker` (defn worker-repl ([] - (when-let [runtime-id (->> (api/repl-runtimes :workers) - (filter (fn [runtime] (= :browser-worker (:host runtime)))) - (map :client-id) - (apply max))] - (worker-repl runtime-id))) + (worker-repl :old)) ([runtime-id-or-which] (assert runtime-id-or-which "runtime-id shouldn't be empty") (if diff --git a/src/main/frontend/components/settings.cljs b/src/main/frontend/components/settings.cljs index 89e8de28c9..cdc0056726 100644 --- a/src/main/frontend/components/settings.cljs +++ b/src/main/frontend/components/settings.cljs @@ -1252,7 +1252,7 @@ (c.m/index-info repo :indexing?]) (shui/button {:size :sm diff --git a/src/main/frontend/handler/db_based/import.cljs b/src/main/frontend/handler/db_based/import.cljs index 2a6ab0ede2..0b941d2b3f 100644 --- a/src/main/frontend/handler/db_based/import.cljs +++ b/src/main/frontend/handler/db_based/import.cljs @@ -26,7 +26,7 @@ (repo-handler/restore-and-setup-repo! graph {:import-type :sqlite-db}) (state/set-current-repo! graph) (persist-db/> (d/datoms @conn :avet :logseq.property.embedding/hnsw-label-updated-at) + (map (fn [d] + [:db/retract (:e d) :logseq.property.embedding/hnsw-label-updated-at])))) + ;; Mark vector embedding + mark-embedding-tx-data (->> (keep (fn [datom] (when (and (= :block/title (:a datom)) (:added datom) (not (string/blank? (:v datom)))) + (:e datom))) tx-data) + ;; Mark block embedding to be computed + (map (fn [id] [:db/add id :logseq.property.embedding/hnsw-label-updated-at 0]))) + tx-data (concat remove-old-hnsw-tx-data mark-embedding-tx-data)] + (when (seq tx-data) + (d/transact! conn tx-data + {:skip-refresh? true + :pipeline-replace? true}))))) + (defn listen-db-changes! [repo conn & {:keys [handler-keys]}] (let [handlers (if (seq handler-keys) @@ -64,6 +86,8 @@ (d/listen! conn ::listen-db-changes! (fn listen-db-changes!-inner [{:keys [tx-data _db-before _db-after tx-meta] :as tx-report}] + (remove-old-embeddings-and-reset-new-updates! conn tx-data tx-meta) + (let [tx-meta (merge (batch-tx/get-batch-opts) tx-meta) pipeline-replace? (:pipeline-replace? tx-meta) in-batch-tx-mode? (:batch-tx/batch-tx-mode? tx-meta)] diff --git a/src/main/frontend/worker/db_worker.cljs b/src/main/frontend/worker/db_worker.cljs index b2b57bb857..a0e00c3d23 100644 --- a/src/main/frontend/worker/db_worker.cljs +++ b/src/main/frontend/worker/db_worker.cljs @@ -745,17 +745,9 @@ [repo model-name] (js/Promise. (embedding/task--load-model repo model-name))) -(def-thread-api :thread-api/vec-search-embedding-stale-blocks - [repo] - (embedding/embedding-stale-blocks! repo)) - -(def-thread-api :thread-api/vec-search-re-embedding-graph-data - [repo] - (embedding/re-embedding-graph-data! repo)) - (def-thread-api :thread-api/vec-search-embedding-graph - [repo] - (embedding/embedding-graph! repo)) + [repo opts] + (embedding/embedding-graph! repo opts)) (def-thread-api :thread-api/vec-search-search [repo query-string nums-neighbors] diff --git a/src/main/frontend/worker/embedding.cljs b/src/main/frontend/worker/embedding.cljs index 177f204d23..6078e56a84 100644 --- a/src/main/frontend/worker/embedding.cljs +++ b/src/main/frontend/worker/embedding.cljs @@ -80,13 +80,16 @@ (defn- stale-block-lazy-seq [db reset?] - (->> (rseq (d/index-range db :block/updated-at nil nil)) - (sequence - (comp (map #(d/entity db (:e %))) - (filter (stale-block-filter-preds reset?)) - (map (fn [b] - (assoc b :block.temp/text-to-embedding - (db-content/recur-replace-uuid-in-block-title b) + (let [datoms (if reset? + (rseq (d/index-range db :block/updated-at nil nil)) + (d/datoms db :avet :logseq.property.embedding/hnsw-label-updated-at 0))] + (->> datoms + (sequence + (comp (map #(d/entity db (:e %))) + (filter (stale-block-filter-preds reset?)) + (map (fn [b] + (assoc b :block.temp/text-to-embedding + (db-content/recur-replace-uuid-in-block-title b) ;; FIXME: tags and properties can affect sorting ;; (str (db-content/recur-replace-uuid-in-block-title b) ;; (let [tags (->> (:block/tags b) @@ -95,7 +98,7 @@ ;; (str " " (string/join ", " (map (fn [t] (str "#" t)) tags))))) ;; (when-let [desc (:block/title (:logseq.property/description b))] ;; (str "\nDescription: " desc))) - ))))))) + )))))))) (defn- partition-by-text-size [text-size] (let [*current-size (volatile! 0) @@ -155,13 +158,15 @@ (defn- task--embedding-stale-blocks! "embedding outdated block-data outdate rule: block/updated-at > :logseq.property.embedding/hnsw-label-updated-at" - [repo] + [repo reset-embedding?] (m/sp (when-let [^js infer-worker @worker-state/*infer-worker] (when-let [conn (worker-state/get-datascript-conn repo)] (let [stale-blocks (stale-block-lazy-seq @conn false)] (when (seq stale-blocks) (m/? (task--update-index-info!* repo infer-worker true)) + (when reset-embedding? + (c.m/ id 2147483647)) (map :db/id stale-block-chunk)) @@ -175,61 +180,36 @@ false)) tx-data (labels-update-tx-data @conn e+updated-at-coll)] (d/transact! conn tx-data {:skip-refresh? true}) - (m/? (task--update-index-info!* repo infer-worker true)))) - (c.m/ id 2147483647)) (map :db/id block-chunk)) - (throw (ex-info "Wrong db/id" {:data (filter (fn [item] (> (:db/id item) 2147483647)) block-chunk)}))) - _ (c.m/> + (d/datoms @conn :avet :block/title) + (map (fn [d] + [:db/add (:e d) :logseq.property.embedding/hnsw-label-updated-at 0])))] + (d/transact! conn mark-embedding-tx-data {:skip-refresh? true}))) + + (embedding-stale-blocks! repo reset-embedding?))))) (defn task--embedding-model-info [repo]