Merge remote-tracking branch 'origin/master' into enhance/rtc-migrate

This commit is contained in:
rcmerci
2025-08-13 17:00:34 +08:00
15 changed files with 139 additions and 112 deletions

View File

@@ -19,11 +19,7 @@
;; Get the runtime id from http://localhost:9630/runtimes, pick the one which shows `browser-worker`
(defn worker-repl
([]
(when-let [runtime-id (->> (api/repl-runtimes :workers)
(filter (fn [runtime] (= :browser-worker (:host runtime))))
(map :client-id)
(apply max))]
(worker-repl runtime-id)))
(worker-repl :old))
([runtime-id-or-which]
(assert runtime-id-or-which "runtime-id shouldn't be empty")
(if

View File

@@ -1252,7 +1252,7 @@
(c.m/<?
(state/<invoke-db-worker :thread-api/vec-search-cancel-indexing repo))
(c.m/<?
(state/<invoke-db-worker :thread-api/vec-search-re-embedding-graph-data repo)))
(state/<invoke-db-worker :thread-api/vec-search-embedding-graph repo {:reset-embedding? true})))
:succ (constantly nil)))}
current-model
(assoc :value current-model))

View File

@@ -76,14 +76,14 @@
{:size :sm
:class "mx-2"
:on-click (fn [_]
(state/<invoke-db-worker :thread-api/vec-search-embedding-stale-blocks repo))}
"embedding-stale-blocks")
(state/<invoke-db-worker :thread-api/vec-search-embedding-graph repo {}))}
"embedding-blocks")
(shui/button
{:size :sm
:class "mx-2"
:on-click (fn [_]
(state/<invoke-db-worker :thread-api/vec-search-re-embedding-graph-data repo))}
"force-embedding-all-graph-blocks")
(state/<invoke-db-worker :thread-api/vec-search-embedding-graph repo {:reset-embedding? true}))}
"full-embedding-blocks")
(when (get-in vec-search-state [:repo->index-info repo :indexing?])
(shui/button
{:size :sm

View File

@@ -26,7 +26,7 @@
(repo-handler/restore-and-setup-repo! graph {:import-type :sqlite-db})
(state/set-current-repo! graph)
(persist-db/<export-db graph {})
(db/transact! graph (sqlite-util/import-tx :sqlite-db))
(db/transact! graph (sqlite-util/import-tx :sqlite-db) {:import-db? true})
(finished-ok-handler))
(p/catch
(fn [e]
@@ -44,7 +44,7 @@
:datoms datoms})
(state/add-repo! {:url graph})
(repo-handler/restore-and-setup-repo! graph {:import-type :debug-transit})
(db/transact! graph (sqlite-util/import-tx :debug-transit))
(db/transact! graph (sqlite-util/import-tx :debug-transit) {:import-db? true})
(state/set-current-repo! graph)
(finished-ok-handler))))
@@ -61,7 +61,8 @@
;; (cljs.pprint/pprint _txs)
(if error
(notification/show! error :error)
(let [tx-meta {::sqlite-export/imported-data? true}
(let [tx-meta {::sqlite-export/imported-data? true
:import-db? true}
repo (state/get-current-repo)]
(p/do
(db/transact! repo init-tx tx-meta)

View File

@@ -25,6 +25,6 @@
(m/? (m/sleep 1000))
(c.m/<? (state/<invoke-db-worker :thread-api/vec-search-init-embedding-model repo))
(m/?< (c.m/clock (* 30 1000)))
(c.m/<? (state/<invoke-db-worker :thread-api/vec-search-embedding-graph repo))
(c.m/<? (state/<invoke-db-worker :thread-api/vec-search-embedding-graph repo {}))
(catch Cancelled _
(m/amb)))))))

View File

@@ -3,6 +3,7 @@
(:require [clojure.string :as string]
[dommy.core :as dom]
[electron.ipc :as ipc]
[frontend.common.missionary :as c.m]
[frontend.common.search-fuzzy :as fuzzy]
[frontend.config :as config]
[frontend.db :as db]
@@ -11,7 +12,9 @@
[frontend.state :as state]
[frontend.storage :as storage]
[frontend.util :as util]
[logseq.db :as ldb]
[logseq.graph-parser.text :as text]
[missionary.core :as m]
[promesa.core :as p]))
(defn sanity-search-content
@@ -115,11 +118,22 @@
(rebuild-indices! false))
([notice?]
(println "Starting to rebuild search indices!")
(p/let [_ (search/rebuild-indices!)]
(when notice?
(notification/show!
"Search indices rebuilt successfully!"
:success)))))
(when-let [repo (state/get-current-repo)]
(p/do!
(search/rebuild-indices!)
(when (ldb/get-key-value (db/get-db) :logseq.kv/graph-text-embedding-model-name)
(c.m/run-task
::rebuild-embeddings
(m/sp
(c.m/<?
(state/<invoke-db-worker :thread-api/vec-search-cancel-indexing repo))
(c.m/<?
(state/<invoke-db-worker :thread-api/vec-search-embedding-graph repo {:reset-embedding? true})))
:succ (constantly nil)))
(when notice?
(notification/show!
"Search indices rebuilt successfully!"
:success))))))
(defn highlight-exact-query
[content q]

View File

@@ -1,6 +1,7 @@
(ns frontend.worker.db-listener
"Db listeners for worker-db."
(:require [datascript.core :as d]
(:require [clojure.string :as string]
[datascript.core :as d]
[frontend.common.thread-api :as thread-api]
[frontend.worker.pipeline :as worker-pipeline]
[frontend.worker.rtc.gen-client-op :as gen-client-op]
@@ -8,6 +9,7 @@
[frontend.worker.shared-service :as shared-service]
[frontend.worker.state :as worker-state]
[logseq.common.util :as common-util]
[logseq.db :as ldb]
[logseq.outliner.batch-tx :as batch-tx]
[promesa.core :as p]))
@@ -32,6 +34,7 @@
(when-not from-disk?
(p/do!
;; Sync SQLite search
(let [{:keys [blocks-to-remove-set blocks-to-add]} (search/sync-search-indice repo tx-report')]
(when (seq blocks-to-remove-set)
((@thread-api/*thread-apis :thread-api/search-delete-blocks) repo blocks-to-remove-set))
@@ -46,6 +49,25 @@
(prn :tx-data tx-data)
(prn :tx-meta tx-meta)))
(defn- remove-old-embeddings-and-reset-new-updates!
[conn tx-data tx-meta]
(when (ldb/db-based-graph? @conn)
(let [;; Remove old :logseq.property.embedding/hnsw-label-updated-at when importing a graph
remove-old-hnsw-tx-data (when (:import-db? tx-meta)
(->> (d/datoms @conn :avet :logseq.property.embedding/hnsw-label-updated-at)
(map (fn [d]
[:db/retract (:e d) :logseq.property.embedding/hnsw-label-updated-at]))))
;; Mark vector embedding
mark-embedding-tx-data (->> (keep (fn [datom] (when (and (= :block/title (:a datom)) (:added datom) (not (string/blank? (:v datom))))
(:e datom))) tx-data)
;; Mark block embedding to be computed
(map (fn [id] [:db/add id :logseq.property.embedding/hnsw-label-updated-at 0])))
tx-data (concat remove-old-hnsw-tx-data mark-embedding-tx-data)]
(when (seq tx-data)
(d/transact! conn tx-data
{:skip-refresh? true
:pipeline-replace? true})))))
(defn listen-db-changes!
[repo conn & {:keys [handler-keys]}]
(let [handlers (if (seq handler-keys)
@@ -64,6 +86,8 @@
(d/listen! conn ::listen-db-changes!
(fn listen-db-changes!-inner
[{:keys [tx-data _db-before _db-after tx-meta] :as tx-report}]
(remove-old-embeddings-and-reset-new-updates! conn tx-data tx-meta)
(let [tx-meta (merge (batch-tx/get-batch-opts) tx-meta)
pipeline-replace? (:pipeline-replace? tx-meta)
in-batch-tx-mode? (:batch-tx/batch-tx-mode? tx-meta)]

View File

@@ -745,17 +745,9 @@
[repo model-name]
(js/Promise. (embedding/task--load-model repo model-name)))
(def-thread-api :thread-api/vec-search-embedding-stale-blocks
[repo]
(embedding/embedding-stale-blocks! repo))
(def-thread-api :thread-api/vec-search-re-embedding-graph-data
[repo]
(embedding/re-embedding-graph-data! repo))
(def-thread-api :thread-api/vec-search-embedding-graph
[repo]
(embedding/embedding-graph! repo))
[repo opts]
(embedding/embedding-graph! repo opts))
(def-thread-api :thread-api/vec-search-search
[repo query-string nums-neighbors]

View File

@@ -80,13 +80,16 @@
(defn- stale-block-lazy-seq
[db reset?]
(->> (rseq (d/index-range db :block/updated-at nil nil))
(sequence
(comp (map #(d/entity db (:e %)))
(filter (stale-block-filter-preds reset?))
(map (fn [b]
(assoc b :block.temp/text-to-embedding
(db-content/recur-replace-uuid-in-block-title b)
(let [datoms (if reset?
(rseq (d/index-range db :block/updated-at nil nil))
(d/datoms db :avet :logseq.property.embedding/hnsw-label-updated-at 0))]
(->> datoms
(sequence
(comp (map #(d/entity db (:e %)))
(filter (stale-block-filter-preds reset?))
(map (fn [b]
(assoc b :block.temp/text-to-embedding
(db-content/recur-replace-uuid-in-block-title b)
;; FIXME: tags and properties can affect sorting
;; (str (db-content/recur-replace-uuid-in-block-title b)
;; (let [tags (->> (:block/tags b)
@@ -95,7 +98,7 @@
;; (str " " (string/join ", " (map (fn [t] (str "#" t)) tags)))))
;; (when-let [desc (:block/title (:logseq.property/description b))]
;; (str "\nDescription: " desc)))
)))))))
))))))))
(defn- partition-by-text-size
[text-size]
(let [*current-size (volatile! 0)
@@ -155,13 +158,15 @@
(defn- task--embedding-stale-blocks!
"embedding outdated block-data
outdate rule: block/updated-at > :logseq.property.embedding/hnsw-label-updated-at"
[repo]
[repo reset-embedding?]
(m/sp
(when-let [^js infer-worker @worker-state/*infer-worker]
(when-let [conn (worker-state/get-datascript-conn repo)]
(let [stale-blocks (stale-block-lazy-seq @conn false)]
(when (seq stale-blocks)
(m/? (task--update-index-info!* repo infer-worker true))
(when reset-embedding?
(c.m/<? (.force-reset-index! infer-worker repo)))
(doseq [stale-block-chunk (sequence (partition-by-text-size (get-partition-size repo)) stale-blocks)]
(let [e+updated-at-coll (map (juxt :db/id :block/updated-at) stale-block-chunk)
_ (when (some (fn [id] (> id 2147483647)) (map :db/id stale-block-chunk))
@@ -175,61 +180,36 @@
false))
tx-data (labels-update-tx-data @conn e+updated-at-coll)]
(d/transact! conn tx-data {:skip-refresh? true})
(m/? (task--update-index-info!* repo infer-worker true))))
(c.m/<? (.write-index! infer-worker repo))
(m/? (task--update-index-info!* repo infer-worker true))
(c.m/<? (.write-index! infer-worker repo))))
(m/? (task--update-index-info!* repo infer-worker false))))))))
(defn- task--re-embedding-graph-data!
"force re-embedding all block-data in graph"
[repo]
(m/sp
(when-let [^js infer-worker @worker-state/*infer-worker]
(when-let [conn (worker-state/get-datascript-conn repo)]
(m/? (task--update-index-info!* repo infer-worker true))
(c.m/<? (.force-reset-index! infer-worker repo))
(let [all-blocks (stale-block-lazy-seq @conn true)]
(doseq [block-chunk (sequence (partition-by-text-size (get-partition-size repo)) all-blocks)]
(let [e+updated-at-coll (map (juxt :db/id :block/updated-at) block-chunk)
_ (when (some (fn [id] (> id 2147483647)) (map :db/id block-chunk))
(throw (ex-info "Wrong db/id" {:data (filter (fn [item] (> (:db/id item) 2147483647)) block-chunk)})))
_ (c.m/<?
(.text-embedding+store!
infer-worker repo
(into-array (map :block.temp/text-to-embedding block-chunk))
(into-array (map :db/id block-chunk))
false))
tx-data (labels-update-tx-data @conn e+updated-at-coll)]
(d/transact! conn tx-data {:skip-refresh? true})
(m/? (task--update-index-info!* repo infer-worker true)))))
(c.m/<? (.write-index! infer-worker repo))
(m/? (task--update-index-info!* repo infer-worker false))))))
(defn embedding-stale-blocks!
[repo]
(defn- embedding-stale-blocks!
[repo reset-embedding?]
(when-not (indexing? repo)
(let [canceler (c.m/run-task
:embedding-stale-blocks!
(task--embedding-stale-blocks! repo)
:succ (constantly nil))]
(reset-*vector-search-state! repo :canceler canceler))))
(defn re-embedding-graph-data!
[repo]
(when-not (indexing? repo)
(let [canceler (c.m/run-task
:re-embedding-graph-data!
(task--re-embedding-graph-data! repo)
(task--embedding-stale-blocks! repo reset-embedding?)
:succ (constantly nil))]
(reset-*vector-search-state! repo :canceler canceler))))
(defn embedding-graph!
[repo]
[repo {:keys [reset-embedding?]
:or {reset-embedding? false}}]
(when-not (indexing? repo)
(when-let [conn (worker-state/get-datascript-conn repo)]
(when (ldb/get-key-value @conn :logseq.kv/graph-text-embedding-model-name)
(if (first (d/datoms @conn :avet :logseq.property.embedding/hnsw-label-updated-at)) ; embedding exists
(embedding-stale-blocks! repo)
(re-embedding-graph-data! repo))))))
(when (or reset-embedding?
;; embedding not exists yet
(empty? (d/datoms @conn :avet :logseq.property.embedding/hnsw-label-updated-at)))
;; reset embedding
(let [mark-embedding-tx-data (->>
(d/datoms @conn :avet :block/title)
(map (fn [d]
[:db/add (:e d) :logseq.property.embedding/hnsw-label-updated-at 0])))]
(d/transact! conn mark-embedding-tx-data {:skip-refresh? true})))
(embedding-stale-blocks! repo reset-embedding?)))))
(defn task--embedding-model-info
[repo]