feat(vec-search): debug ui show load-model-progress

This commit is contained in:
rcmerci
2025-03-21 01:38:39 +08:00
parent a7273847bc
commit bb226a4147
9 changed files with 76 additions and 28 deletions

View File

@@ -17,6 +17,7 @@
^js worker @db-browser/*worker
[model-info set-model-info] (hooks/use-state nil)
[vec-search-state set-vec-search-state] (hooks/use-state nil)
[load-model-progress set-load-model-progress] (hooks/use-state nil)
[query-string set-query-string] (hooks/use-state nil)
[result set-result] (hooks/use-state nil)]
(hooks/use-effect!
@@ -24,9 +25,20 @@
(c.m/run-task
(m/reduce
(fn [_ v] (set-vec-search-state v))
vector-search-flows/vector-search-state-flow)
(m/ap
(m/?> vector-search-flows/infer-worker-ready-flow)
(c.m/<? (.vec-search-update-index-info worker repo))
(m/?> vector-search-flows/vector-search-state-flow)))
::update-vec-search-state :succ (constantly nil)))
[])
(hooks/use-effect!
(fn []
(c.m/run-task
(m/reduce
(fn [_ v] (set-load-model-progress v))
vector-search-flows/load-model-progress-flow)
::update-load-model-progress :succ (constantly nil)))
[])
(hooks/use-effect!
(fn []
(c.m/run-task
@@ -50,7 +62,8 @@
[(hooks/use-debounced-value query-string 200)])
[:div
[:b "State"]
(let [state-map (get-in vec-search-state [:repo->index-info repo])]
(let [state-map (assoc (get-in vec-search-state [:repo->index-info repo])
:load-model-progress load-model-progress)]
[:pre.select-text
(with-out-str
(fipp/pprint state-map {:width 10}))])

View File

@@ -15,5 +15,8 @@
(def vector-search-state-flow
(m/watch (:vector-search/state @state/state)))
(def load-model-progress-flow
(m/watch (:vector-search/load-model-progress @state/state)))
(comment
((m/reduce (fn [_ x] (prn :xx x)) vector-search-state-flow) prn js/console.log))

View File

@@ -1095,6 +1095,9 @@
(defmethod handle :editor/hide-action-bar []
(shui/popup-hide! :selection-action-bar))
(defmethod handle :vector-search/load-model-progress [[_ data]]
(state/set-state! :vector-search/load-model-progress data))
(defn run!
[]
(let [chan (state/get-events-chan)]

View File

@@ -1,5 +1,5 @@
(ns frontend.handler.worker
"Handle messages received from the db worker"
"Handle messages received from the webworkers"
(:require [cljs-bean.core :as bean]
[frontend.handler.file-based.file :as file-handler]
[frontend.handler.notification :as notification]
@@ -52,6 +52,9 @@
(defmethod handle :capture-error [_ _worker data]
(state/pub-event! [:capture-error data]))
(defmethod handle :vector-search/load-model-progress [_ _ data]
(state/pub-event! [:vector-search/load-model-progress data]))
(defmethod handle :default [_ _worker data]
(prn :debug "Worker data not handled: " data))

View File

@@ -7,6 +7,7 @@
[frontend.inference-worker.state :as infer-worker.state]
[frontend.worker-common.util :as worker-util]
[lambdaisland.glogi :as log]
[logseq.common.config :as common-config]
[missionary.core :as m]
[promesa.core :as p]))
@@ -173,13 +174,15 @@
"Xenova/jina-embeddings-v2-base-zh" {:tf-config {:dtype "fp32"}
:hnsw-config {:dims 768}}})
(def ^:private *load-model-progress (atom nil))
(defn <load-model
[model-name]
(when-let [config (get available-embedding-models model-name)]
(p/let [extractor (pipeline "feature-extraction" model-name
(clj->js (-> (:tf-config config)
(assoc "device" "webgpu")
(assoc "progress_callback" #(log/info :progress %)))))]
(assoc "progress_callback" #(reset! *load-model-progress %)))))]
(reset! infer-worker.state/*extractor extractor)
(reset! infer-worker.state/*model-name+config [model-name config])
true)))
@@ -192,6 +195,13 @@
(.setDebugLogs (.-EmscriptenFileSystemManager ^js @infer-worker.state/*hnswlib) true)
(log/info :loaded :hnswlib))))
(when-not common-config/PUBLISHING
(c.m/run-background-task
::push-load-model-progress
(m/reduce
(fn [_ v] (worker-util/post-message :vector-search/load-model-progress v))
(c.m/throttle 500 (m/watch *load-model-progress)))))
(comment
(def repo "repo-1")
(def hnsw (ensure-hnsw-index! repo))

View File

@@ -164,6 +164,7 @@
worker (js/Worker. (str worker-url "?electron=" (util/electron?) "&publishing=" config/publishing?))
wrapped-worker (Comlink/wrap worker)
t1 (util/time-ms)]
(worker-handler/handle-message! worker wrapped-worker)
(reset! state/*infer-worker wrapped-worker)
(p/do!
(.init wrapped-worker)

View File

@@ -332,7 +332,8 @@
:db/async-queries (atom {})
:db/latest-transacted-entity-uuids (atom {})
:vector-search/state (atom {})})))
:vector-search/state (atom {})
:vector-search/load-model-progress (atom nil)})))
;; Block ast state
;; ===============

View File

@@ -955,7 +955,11 @@
(vec-search-cancel-indexing
[this repo]
(embedding/cancel-indexing repo)))
(embedding/cancel-indexing repo))
(vec-search-update-index-info
[this repo]
(js/Promise. (embedding/task--update-index-info! repo))))
(defn- rename-page!
[repo conn page-uuid new-name]

View File

@@ -111,13 +111,25 @@
[:db/add e :logseq.property.embedding/hnsw-label-updated-at updated-at]]))
e+updated-at-coll added-labels)))
(defn- task--update-index-info!
[repo ^js infer-worker indexing?*]
(defn- task--update-index-info!*
([repo ^js infer-worker]
(m/sp
(reset-*vector-search-state! repo :index-info
(merge (:index-info @*vector-search-state)
(js->clj (c.m/<? (.index-info infer-worker repo))
:keywordize-keys true)))))
([repo ^js infer-worker indexing?*]
(m/sp
(reset-*vector-search-state! repo :index-info
(assoc (js->clj (c.m/<? (.index-info infer-worker repo))
:keywordize-keys true)
:indexing? indexing?*)))))
(defn task--update-index-info!
[repo]
(m/sp
(reset-*vector-search-state! repo :index-info
(assoc (js->clj (c.m/<? (.index-info infer-worker repo))
:keywordize-keys true)
:indexing? indexing?*))))
(when-let [^js infer-worker @worker-state/*infer-worker]
(m/? (task--update-index-info!* repo infer-worker)))))
(defn- task--embedding-stale-blocks!
"embedding outdated block-data
@@ -126,21 +138,20 @@
(m/sp
(when-let [^js infer-worker @worker-state/*infer-worker]
(when-let [conn (worker-state/get-datascript-conn repo)]
(m/? (task--update-index-info! repo infer-worker true))
(m/? (task--update-index-info!* repo infer-worker true))
(let [stale-blocks (stale-block-lazy-seq @conn false)]
(doseq [stale-block-chunk (sequence (partition-by-text-size 2000) stale-blocks)]
(let [e+updated-at-coll (map (juxt :db/id :block/updated-at) stale-block-chunk)
delete-labels (into-array (keep :logseq.property.embedding/hnsw-label stale-block-chunk))
added-labels (worker-util/profile :text-embedding
(c.m/<?
(.text-embedding+store!
infer-worker repo (into-array (map :block.temp/text-to-embedding stale-block-chunk))
delete-labels false)))
added-labels (c.m/<?
(.text-embedding+store!
infer-worker repo (into-array (map :block.temp/text-to-embedding stale-block-chunk))
delete-labels false))
tx-data (labels-update-tx-data @conn e+updated-at-coll added-labels)]
(d/transact! conn tx-data)
(m/? (task--update-index-info! repo infer-worker true))))
(m/? (task--update-index-info!* repo infer-worker true))))
(c.m/<? (.write-index! infer-worker repo))
(m/? (task--update-index-info! repo infer-worker false)))))))
(m/? (task--update-index-info!* repo infer-worker false)))))))
(defn- task--re-embedding-graph-data!
"force re-embedding all block-data in graph"
@@ -148,21 +159,20 @@
(m/sp
(when-let [^js infer-worker @worker-state/*infer-worker]
(when-let [conn (worker-state/get-datascript-conn repo)]
(m/? (task--update-index-info! repo infer-worker true))
(m/? (task--update-index-info!* repo infer-worker true))
(c.m/<? (.force-reset-index! infer-worker repo))
(let [all-blocks (stale-block-lazy-seq @conn true)]
(doseq [block-chunk (sequence (partition-by-text-size 2000) all-blocks)]
(let [e+updated-at-coll (map (juxt :db/id :block/updated-at) block-chunk)
added-labels (worker-util/profile :text-embedding
(c.m/<?
(.text-embedding+store!
infer-worker repo (into-array (map :block.temp/text-to-embedding block-chunk))
nil false)))
added-labels (c.m/<?
(.text-embedding+store!
infer-worker repo (into-array (map :block.temp/text-to-embedding block-chunk))
nil false))
tx-data (labels-update-tx-data @conn e+updated-at-coll added-labels)]
(d/transact! conn tx-data)
(m/? (task--update-index-info! repo infer-worker true)))))
(m/? (task--update-index-info!* repo infer-worker true)))))
(c.m/<? (.write-index! infer-worker repo))
(m/? (task--update-index-info! repo infer-worker false))))))
(m/? (task--update-index-info!* repo infer-worker false))))))
(defn embedding-stale-blocks!
[repo]