diff --git a/bb.edn b/bb.edn
index e2b4813088..54397764ff 100644
--- a/bb.edn
+++ b/bb.edn
@@ -207,7 +207,10 @@
logseq.tasks.lang/validate-translations
file-sync:integration-tests
- logseq.tasks.file-sync/integration-tests}
+ logseq.tasks.file-sync/integration-tests
+
+ ai:check-common-errors
+ logseq.tasks.common-errors/check-common-errors}
:tasks/config
{:large-vars
diff --git a/deps/db/src/logseq/db/frontend/property.cljs b/deps/db/src/logseq/db/frontend/property.cljs
index dab0d68974..d2717c23fb 100644
--- a/deps/db/src/logseq/db/frontend/property.cljs
+++ b/deps/db/src/logseq/db/frontend/property.cljs
@@ -543,9 +543,6 @@
:logseq.property.history/scalar-value {:title "History scalar value"
:schema {:type :any
:hide? true}}
- :logseq.property/created-by {:title "Node created by(deprecated)"
- :schema {:type :string
- :hide? true}}
:logseq.property/created-by-ref {:title "Node created by"
:schema {:type :entity
:hide? true}
diff --git a/deps/db/src/logseq/db/sqlite/export.cljs b/deps/db/src/logseq/db/sqlite/export.cljs
index 8811ec5f50..0c3472db60 100644
--- a/deps/db/src/logseq/db/sqlite/export.cljs
+++ b/deps/db/src/logseq/db/sqlite/export.cljs
@@ -81,6 +81,8 @@
(or (block-title pvalue)
(:logseq.property/value pvalue))))
+(defonce ignored-properties [:logseq.property/created-by-ref :logseq.property.embedding/hnsw-label-updated-at])
+
(defn- buildable-properties
"Originally copied from db-test/readable-properties. Modified so that property values are
valid sqlite.build EDN"
@@ -104,31 +106,30 @@
ent-properties (when (and (not (:block/closed-value-property pvalue)) (seq ent-properties*))
(buildable-properties db' ent-properties* properties-config' options'))]
(build-pvalue-entity-default db ent-properties pvalue options'))))))]
- (let [ignored-properties [:logseq.property/created-by-ref]]
- (->> (apply dissoc ent-properties ignored-properties)
- (map (fn [[k v]]
- [k
- ;; handle user closed value properties. built-ins have idents and shouldn't be handled here
- (if (and (not (db-property/logseq-property? k))
- (or (:block/closed-value-property v)
- (and (set? v) (:block/closed-value-property (first v)))))
- (let [find-closed-uuid (fn [val]
- (or (some #(when (= (:value %) (db-property/property-value-content val))
- (:uuid %))
- (get-in properties-config [k :build/closed-values]))
- (throw (ex-info (str "No closed value found for content: " (pr-str (db-property/property-value-content val))) {:properties properties-config}))))]
- (if (set? v)
- (set (map #(vector :block/uuid (find-closed-uuid %)) v))
- [:block/uuid (find-closed-uuid v)]))
- (cond
- (de/entity? v)
- (build-pvalue-entity db (d/entity db k) v properties-config options)
- (and (set? v) (every? de/entity? v))
- (let [property-ent (d/entity db k)]
- (set (map #(build-pvalue-entity db property-ent % properties-config options) v)))
- :else
- v))]))
- (into {})))))
+ (->> (apply dissoc ent-properties ignored-properties)
+ (map (fn [[k v]]
+ [k
+ ;; handle user closed value properties. built-ins have idents and shouldn't be handled here
+ (if (and (not (db-property/logseq-property? k))
+ (or (:block/closed-value-property v)
+ (and (set? v) (:block/closed-value-property (first v)))))
+ (let [find-closed-uuid (fn [val]
+ (or (some #(when (= (:value %) (db-property/property-value-content val))
+ (:uuid %))
+ (get-in properties-config [k :build/closed-values]))
+ (throw (ex-info (str "No closed value found for content: " (pr-str (db-property/property-value-content val))) {:properties properties-config}))))]
+ (if (set? v)
+ (set (map #(vector :block/uuid (find-closed-uuid %)) v))
+ [:block/uuid (find-closed-uuid v)]))
+ (cond
+ (de/entity? v)
+ (build-pvalue-entity db (d/entity db k) v properties-config options)
+ (and (set? v) (every? de/entity? v))
+ (let [property-ent (d/entity db k)]
+ (set (map #(build-pvalue-entity db property-ent % properties-config options) v)))
+ :else
+ v))]))
+ (into {}))))
(defn- build-export-properties
"The caller of this fn is responsible for building :build/:property-classes unless shallow-copy?"
diff --git a/deps/db/src/logseq/db/sqlite/util.cljs b/deps/db/src/logseq/db/sqlite/util.cljs
index 6202f9f457..575280b161 100644
--- a/deps/db/src/logseq/db/sqlite/util.cljs
+++ b/deps/db/src/logseq/db/sqlite/util.cljs
@@ -133,4 +133,5 @@
(fn [db-ident] [:db/retractEntity db-ident])
[:logseq.kv/graph-uuid
:logseq.kv/graph-local-tx
- :logseq.kv/remote-schema-version])))
+ :logseq.kv/remote-schema-version
+ :logseq.kv/graph-text-embedding-model-name])))
diff --git a/prompts/clojure_errors.md b/prompts/clojure_errors.md
new file mode 100644
index 0000000000..9f82aa93c0
--- /dev/null
+++ b/prompts/clojure_errors.md
@@ -0,0 +1,3 @@
+You're Clojure expert, you're responsible to check those common errors:
+
+1. `empty?` should be used instead of `empty` when a boolean value is expected in an expression.
diff --git a/scripts/src/logseq/tasks/common_errors.clj b/scripts/src/logseq/tasks/common_errors.clj
new file mode 100644
index 0000000000..a60a04c7a8
--- /dev/null
+++ b/scripts/src/logseq/tasks/common_errors.clj
@@ -0,0 +1,15 @@
+(ns logseq.tasks.common-errors
+ "Task to use AI to detect common errors"
+ (:require [babashka.fs :as fs]
+ [babashka.process :refer [shell]]
+ [clojure.string :as string]))
+
+(defn check-common-errors
+ []
+ (let [prompt (String. (fs/read-all-bytes "prompts/clojure_errors.md"))
+ diff (:out (shell {:out :string} "git diff --no-prefix -U100 -- '*.cljs'"))]
+ (when-not (string/blank? diff)
+ (let [command (format "gh models run openai/gpt-5 \"%s\""
+ (str prompt
+ (format "\n\n %s" diff)))]
+ (shell command)))))
diff --git a/src/dev-cljs/shadow/user.clj b/src/dev-cljs/shadow/user.clj
index d2c93776c3..8faba9c9b4 100644
--- a/src/dev-cljs/shadow/user.clj
+++ b/src/dev-cljs/shadow/user.clj
@@ -19,11 +19,7 @@
;; Get the runtime id from http://localhost:9630/runtimes, pick the one which shows `browser-worker`
(defn worker-repl
([]
- (when-let [runtime-id (->> (api/repl-runtimes :workers)
- (filter (fn [runtime] (= :browser-worker (:host runtime))))
- (map :client-id)
- (apply max))]
- (worker-repl runtime-id)))
+ (worker-repl :old))
([runtime-id-or-which]
(assert runtime-id-or-which "runtime-id shouldn't be empty")
(if
diff --git a/src/main/frontend/components/settings.cljs b/src/main/frontend/components/settings.cljs
index 89e8de28c9..cdc0056726 100644
--- a/src/main/frontend/components/settings.cljs
+++ b/src/main/frontend/components/settings.cljs
@@ -1252,7 +1252,7 @@
(c.m/
(state/index-info repo :indexing?])
(shui/button
{:size :sm
diff --git a/src/main/frontend/handler/db_based/import.cljs b/src/main/frontend/handler/db_based/import.cljs
index 2a6ab0ede2..0b941d2b3f 100644
--- a/src/main/frontend/handler/db_based/import.cljs
+++ b/src/main/frontend/handler/db_based/import.cljs
@@ -26,7 +26,7 @@
(repo-handler/restore-and-setup-repo! graph {:import-type :sqlite-db})
(state/set-current-repo! graph)
(persist-db/> (d/datoms @conn :avet :logseq.property.embedding/hnsw-label-updated-at)
+ (map (fn [d]
+ [:db/retract (:e d) :logseq.property.embedding/hnsw-label-updated-at]))))
+ ;; Mark vector embedding
+ mark-embedding-tx-data (->> (keep (fn [datom] (when (and (= :block/title (:a datom)) (:added datom) (not (string/blank? (:v datom))))
+ (:e datom))) tx-data)
+ ;; Mark block embedding to be computed
+ (map (fn [id] [:db/add id :logseq.property.embedding/hnsw-label-updated-at 0])))
+ tx-data (concat remove-old-hnsw-tx-data mark-embedding-tx-data)]
+ (when (seq tx-data)
+ (d/transact! conn tx-data
+ {:skip-refresh? true
+ :pipeline-replace? true})))))
+
(defn listen-db-changes!
[repo conn & {:keys [handler-keys]}]
(let [handlers (if (seq handler-keys)
@@ -64,6 +86,8 @@
(d/listen! conn ::listen-db-changes!
(fn listen-db-changes!-inner
[{:keys [tx-data _db-before _db-after tx-meta] :as tx-report}]
+ (remove-old-embeddings-and-reset-new-updates! conn tx-data tx-meta)
+
(let [tx-meta (merge (batch-tx/get-batch-opts) tx-meta)
pipeline-replace? (:pipeline-replace? tx-meta)
in-batch-tx-mode? (:batch-tx/batch-tx-mode? tx-meta)]
diff --git a/src/main/frontend/worker/db_worker.cljs b/src/main/frontend/worker/db_worker.cljs
index b2b57bb857..a0e00c3d23 100644
--- a/src/main/frontend/worker/db_worker.cljs
+++ b/src/main/frontend/worker/db_worker.cljs
@@ -745,17 +745,9 @@
[repo model-name]
(js/Promise. (embedding/task--load-model repo model-name)))
-(def-thread-api :thread-api/vec-search-embedding-stale-blocks
- [repo]
- (embedding/embedding-stale-blocks! repo))
-
-(def-thread-api :thread-api/vec-search-re-embedding-graph-data
- [repo]
- (embedding/re-embedding-graph-data! repo))
-
(def-thread-api :thread-api/vec-search-embedding-graph
- [repo]
- (embedding/embedding-graph! repo))
+ [repo opts]
+ (embedding/embedding-graph! repo opts))
(def-thread-api :thread-api/vec-search-search
[repo query-string nums-neighbors]
diff --git a/src/main/frontend/worker/embedding.cljs b/src/main/frontend/worker/embedding.cljs
index 177f204d23..6078e56a84 100644
--- a/src/main/frontend/worker/embedding.cljs
+++ b/src/main/frontend/worker/embedding.cljs
@@ -80,13 +80,16 @@
(defn- stale-block-lazy-seq
[db reset?]
- (->> (rseq (d/index-range db :block/updated-at nil nil))
- (sequence
- (comp (map #(d/entity db (:e %)))
- (filter (stale-block-filter-preds reset?))
- (map (fn [b]
- (assoc b :block.temp/text-to-embedding
- (db-content/recur-replace-uuid-in-block-title b)
+ (let [datoms (if reset?
+ (rseq (d/index-range db :block/updated-at nil nil))
+ (d/datoms db :avet :logseq.property.embedding/hnsw-label-updated-at 0))]
+ (->> datoms
+ (sequence
+ (comp (map #(d/entity db (:e %)))
+ (filter (stale-block-filter-preds reset?))
+ (map (fn [b]
+ (assoc b :block.temp/text-to-embedding
+ (db-content/recur-replace-uuid-in-block-title b)
;; FIXME: tags and properties can affect sorting
;; (str (db-content/recur-replace-uuid-in-block-title b)
;; (let [tags (->> (:block/tags b)
@@ -95,7 +98,7 @@
;; (str " " (string/join ", " (map (fn [t] (str "#" t)) tags)))))
;; (when-let [desc (:block/title (:logseq.property/description b))]
;; (str "\nDescription: " desc)))
- )))))))
+ ))))))))
(defn- partition-by-text-size
[text-size]
(let [*current-size (volatile! 0)
@@ -155,13 +158,15 @@
(defn- task--embedding-stale-blocks!
"embedding outdated block-data
outdate rule: block/updated-at > :logseq.property.embedding/hnsw-label-updated-at"
- [repo]
+ [repo reset-embedding?]
(m/sp
(when-let [^js infer-worker @worker-state/*infer-worker]
(when-let [conn (worker-state/get-datascript-conn repo)]
(let [stale-blocks (stale-block-lazy-seq @conn false)]
(when (seq stale-blocks)
(m/? (task--update-index-info!* repo infer-worker true))
+ (when reset-embedding?
+ (c.m/ (.force-reset-index! infer-worker repo)))
(doseq [stale-block-chunk (sequence (partition-by-text-size (get-partition-size repo)) stale-blocks)]
(let [e+updated-at-coll (map (juxt :db/id :block/updated-at) stale-block-chunk)
_ (when (some (fn [id] (> id 2147483647)) (map :db/id stale-block-chunk))
@@ -175,61 +180,36 @@
false))
tx-data (labels-update-tx-data @conn e+updated-at-coll)]
(d/transact! conn tx-data {:skip-refresh? true})
- (m/? (task--update-index-info!* repo infer-worker true))))
- (c.m/ (.write-index! infer-worker repo))
+ (m/? (task--update-index-info!* repo infer-worker true))
+ (c.m/ (.write-index! infer-worker repo))))
(m/? (task--update-index-info!* repo infer-worker false))))))))
-(defn- task--re-embedding-graph-data!
- "force re-embedding all block-data in graph"
- [repo]
- (m/sp
- (when-let [^js infer-worker @worker-state/*infer-worker]
- (when-let [conn (worker-state/get-datascript-conn repo)]
- (m/? (task--update-index-info!* repo infer-worker true))
- (c.m/ (.force-reset-index! infer-worker repo))
- (let [all-blocks (stale-block-lazy-seq @conn true)]
- (doseq [block-chunk (sequence (partition-by-text-size (get-partition-size repo)) all-blocks)]
- (let [e+updated-at-coll (map (juxt :db/id :block/updated-at) block-chunk)
- _ (when (some (fn [id] (> id 2147483647)) (map :db/id block-chunk))
- (throw (ex-info "Wrong db/id" {:data (filter (fn [item] (> (:db/id item) 2147483647)) block-chunk)})))
- _ (c.m/
- (.text-embedding+store!
- infer-worker repo
- (into-array (map :block.temp/text-to-embedding block-chunk))
- (into-array (map :db/id block-chunk))
- false))
- tx-data (labels-update-tx-data @conn e+updated-at-coll)]
- (d/transact! conn tx-data {:skip-refresh? true})
- (m/? (task--update-index-info!* repo infer-worker true)))))
- (c.m/ (.write-index! infer-worker repo))
- (m/? (task--update-index-info!* repo infer-worker false))))))
-
-(defn embedding-stale-blocks!
- [repo]
+(defn- embedding-stale-blocks!
+ [repo reset-embedding?]
(when-not (indexing? repo)
(let [canceler (c.m/run-task
:embedding-stale-blocks!
- (task--embedding-stale-blocks! repo)
- :succ (constantly nil))]
- (reset-*vector-search-state! repo :canceler canceler))))
-
-(defn re-embedding-graph-data!
- [repo]
- (when-not (indexing? repo)
- (let [canceler (c.m/run-task
- :re-embedding-graph-data!
- (task--re-embedding-graph-data! repo)
+ (task--embedding-stale-blocks! repo reset-embedding?)
:succ (constantly nil))]
(reset-*vector-search-state! repo :canceler canceler))))
(defn embedding-graph!
- [repo]
+ [repo {:keys [reset-embedding?]
+ :or {reset-embedding? false}}]
(when-not (indexing? repo)
(when-let [conn (worker-state/get-datascript-conn repo)]
(when (ldb/get-key-value @conn :logseq.kv/graph-text-embedding-model-name)
- (if (first (d/datoms @conn :avet :logseq.property.embedding/hnsw-label-updated-at)) ; embedding exists
- (embedding-stale-blocks! repo)
- (re-embedding-graph-data! repo))))))
+ (when (or reset-embedding?
+ ;; embedding not exists yet
+ (empty? (d/datoms @conn :avet :logseq.property.embedding/hnsw-label-updated-at)))
+ ;; reset embedding
+ (let [mark-embedding-tx-data (->>
+ (d/datoms @conn :avet :block/title)
+ (map (fn [d]
+ [:db/add (:e d) :logseq.property.embedding/hnsw-label-updated-at 0])))]
+ (d/transact! conn mark-embedding-tx-data {:skip-refresh? true})))
+
+ (embedding-stale-blocks! repo reset-embedding?)))))
(defn task--embedding-model-info
[repo]