enhance(search): speed up indexing and show progress

This commit is contained in:
Tienson Qin
2026-04-16 03:43:43 +08:00
parent 843f871dc6
commit 7edeae6971
8 changed files with 216 additions and 52 deletions

View File

@@ -347,11 +347,25 @@
(ldb/page? page) (:block/parent page))
[:div.ls-block-breadcrumb
[:div.text-sm
(component-block/breadcrumb {}
(component-block/breadcrumb {}
(state/get-current-repo)
(:block/uuid page)
{:header? true})]])))
(rum/defc search-index-progress < rum/reactive
[]
(let [current-repo (state/get-current-repo)
{:keys [running? repo progress]} (or (state/sub :search/index-build) {})
progress' (-> (or progress 0)
(max 0)
(min 100))]
(when (and running? (= repo current-repo))
[:div.search-index-progress
[ui/loading ""]
[:span.search-index-progress__text (str "Indexing " progress' "%")]
[:div.search-index-progress__bar
[:div.search-index-progress__bar-fill {:style {:width (str progress' "%")}}]]])))
(rum/defc ^:large-vars/cleanup-todo header-aux < rum/reactive
[{:keys [current-repo default-home new-block-mode]}]
(let [electron-mac? (and util/mac? (util/electron?))
@@ -414,6 +428,7 @@
(rtc-indicator/downloading-detail))
(when (user-handler/logged-in?)
(rtc-indicator/uploading-detail))
(search-index-progress)
(when (and (not= (state/get-current-route) :home)
(not custom-home-page?))

View File

@@ -341,4 +341,24 @@ html.is-zoomed-native-ios {
max-width: 34ch;
}
}
.search-index-progress {
@apply flex items-center gap-2 rounded px-2 py-1 text-xs opacity-90;
-webkit-app-region: no-drag;
background-color: var(--ls-tertiary-background-color);
}
.search-index-progress__text {
@apply whitespace-nowrap;
}
.search-index-progress__bar {
@apply h-1 w-16 overflow-hidden rounded;
background-color: var(--ls-quaternary-background-color);
}
.search-index-progress__bar-fill {
@apply h-full transition-all duration-200;
background-color: var(--ls-link-text-color);
}
}

View File

@@ -62,6 +62,12 @@
[error]
(string/includes? (or (ex-message error) (str error)) "decrypt-aes-key"))
(defn- <build-search-index!
[repo]
(-> (state/<invoke-db-worker :thread-api/search-build-blocks-indice-in-worker repo)
(p/catch (fn [error]
(js/console.error "Search index build error:" error)))))
(defn- schedule-search-index-build!
[repo]
(when-let [timeout-id @*search-index-build-timeout]
@@ -76,9 +82,7 @@
(state/input-idle? repo :diff 5000)
(do
(reset! *search-index-build-timeout nil)
(-> (state/<invoke-db-worker :thread-api/search-build-blocks-indice-in-worker repo)
(p/catch (fn [error]
(js/console.error "Search index build error:" error)))))
(<build-search-index! repo))
:else
(schedule-search-index-build! repo)))
@@ -110,7 +114,7 @@
(p/do!
(p/delay 5000)
(p/let [repo (state/get-current-repo)
_ (state/<invoke-db-worker :thread-api/search-build-blocks-indice-in-worker repo)]
_ (<build-search-index! repo)]
(when state/lsp-enabled?
(doseq [service (state/get-all-plugin-services-with-type :search)]
(search-plugin/call-service! service "search:rebuildPagesIndice" {})

View File

@@ -21,6 +21,38 @@
[repo diff]
(state/input-idle? repo :diff diff))
(def-thread-api :thread-api/search-index-build-progress
[repo {:keys [status progress processed total]}]
(let [prev-state (get @state/state :search/index-build)
current-repo (state/get-current-repo)
visible-repo? (or (= repo current-repo)
(= repo (:repo prev-state)))]
(when visible-repo?
(case status
:idle
(state/set-state! :search/index-build
(assoc (or prev-state {})
:running? false
:repo repo))
:running
(state/set-state! :search/index-build
{:running? true
:repo repo
:progress (or progress 0)
:processed (or processed 0)
:total (or total 0)})
:completed
(state/set-state! :search/index-build
{:running? false
:repo repo
:progress (or progress 0)
:processed (or processed 0)
:total (or total 0)})
nil))
nil))
(defn- ask-persist-permission!
[]
(p/let [persistent? (.persist js/navigator.storage)]

View File

@@ -89,6 +89,11 @@
:search/result nil
:search/graph-filters []
:search/engines {}
:search/index-build {:running? false
:repo nil
:progress 0
:processed 0
:total 0}
;; modals
:modal/dropdowns {}

View File

@@ -73,7 +73,7 @@
Bump to force a rebuild when the index format changes."
1)
(def ^:private search-index-build-batch-size 1000)
(def ^:private search-index-build-batch-size 5000)
(def ^:private search-index-build-time-budget-ms 8)
(def ^:private search-index-build-idle-diff-ms 1000)
(def ^:private search-index-build-pause-ms 300)
@@ -500,6 +500,11 @@
:repo repo
:build-id build-id}))))
(defn- report-search-index-progress!
[repo payload]
(-> (worker-state/<invoke-main-thread :thread-api/search-index-build-progress repo payload)
(p/catch (fn [_error] nil))))
(comment
(def-thread-api :thread-api/get-version
[]
@@ -892,26 +897,51 @@
[repo search-db conn build-id]
(ensure-active-search-index-build! repo build-id)
(search/truncate-table! search-db)
(let [db @conn]
(p/loop [remaining (seq (d/datoms db :avet :block/uuid))]
(ensure-active-search-index-build! repo build-id)
(p/let [_ (<wait-for-search-index-idle! repo build-id)]
(if (seq remaining)
(let [[batch remaining'] (take-block-datoms-batch remaining
search-index-build-batch-size
search-index-build-time-budget-ms)
indexed (->> batch
(keep #(d/entity db (:e %)))
(remove search/hidden-entity?)
(keep search/block->index))]
(prn :debug :build-search-indice :remaining (count remaining))
(when (seq indexed)
(search/upsert-blocks! search-db (bean/->js indexed)))
(p/let [_ (js/Promise. (fn [resolve] (js/setTimeout resolve 0)))]
(p/recur remaining')))
(do
(ensure-active-search-index-build! repo build-id)
(.exec search-db (str "PRAGMA user_version = " search-db-version))))))))
(let [db @conn
datoms (d/datoms db :avet :block/uuid)
total (count datoms)]
(p/do!
(report-search-index-progress! repo {:build-id build-id
:status :running
:progress 0
:processed 0
:total total})
(<wait-for-search-index-idle! repo build-id)
(p/loop [remaining (seq datoms)
processed 0
last-progress 0]
(ensure-active-search-index-build! repo build-id)
(if (seq remaining)
(let [[batch remaining'] (take-block-datoms-batch remaining
search-index-build-batch-size
search-index-build-time-budget-ms)
processed' (+ processed (count batch))
indexed (->> batch
(keep #(d/entity db (:e %)))
(remove search/hidden-entity?)
(keep search/block->index))
progress (if (zero? total)
100
(min 100 (int (* 100 (/ processed' total)))))
should-report? (> progress last-progress)]
(when (seq indexed)
(search/upsert-blocks! search-db (bean/->js indexed)))
(when should-report?
(report-search-index-progress! repo {:build-id build-id
:status :running
:progress progress
:processed processed'
:total total}))
(p/let [_ (js/Promise. (fn [resolve] (js/setTimeout resolve 0)))]
(p/recur remaining' processed' (if should-report? progress last-progress))))
(do
(ensure-active-search-index-build! repo build-id)
(.exec search-db (str "PRAGMA user_version = " search-db-version))
(report-search-index-progress! repo {:build-id build-id
:status :completed
:progress 100
:processed total
:total total})))))))
(def-thread-api :thread-api/search-build-blocks-indice-in-worker
[repo & [force?]]
@@ -929,6 +959,9 @@
(when-not (= :search/stale-index-build (:type (ex-data error)))
(throw error))))
(p/finally (fn []
(when (= build-id (get @*search-index-build-ids repo))
(report-search-index-progress! repo {:build-id build-id
:status :idle}))
(clear-search-index-build! repo build-id)))))))))))
(def-thread-api :thread-api/search-build-pages-indice

View File

@@ -102,21 +102,43 @@ DROP TRIGGER IF EXISTS blocks_au;
(str "(" (->> (map (fn [id] (str "'" id "'")) ids)
(string/join ", ")) ")"))
(def ^:private upsert-blocks-batch-size 2000)
(def ^:private upsert-blocks-sql
(memoize
(fn [row-count]
(str "INSERT INTO blocks (id, title, page) VALUES "
(string/join ", " (repeat row-count "(?, ?, ?)"))
" ON CONFLICT (id) DO UPDATE SET (title, page) = (excluded.title, excluded.page)"))))
(defn- valid-upsert-block?
[item]
(and (common-util/uuid-string? (.-id item))
(common-util/uuid-string? (.-page item))))
(defn- throw-upsert-blocks-error!
[item]
(js/console.error "Upsert blocks wrong data: ")
(js/console.dir item)
(throw (ex-info "Search upsert-blocks wrong data: "
(bean/->clj item))))
(defn- upsert-bind-params
[batch]
(into-array
(mapcat (fn [item]
[(.-id item) (.-title item) (.-page item)])
batch)))
(defn upsert-blocks!
[^Object db blocks]
(.transaction db (fn [tx]
(doseq [item blocks]
(if (and (common-util/uuid-string? (.-id item))
(common-util/uuid-string? (.-page item)))
(.exec tx #js {:sql "INSERT INTO blocks (id, title, page) VALUES ($id, $title, $page) ON CONFLICT (id) DO UPDATE SET (title, page) = ($title, $page)"
:bind #js {:$id (.-id item)
:$title (.-title item)
:$page (.-page item)}})
(do
(js/console.error "Upsert blocks wrong data: ")
(js/console.dir item)
(throw (ex-info "Search upsert-blocks wrong data: "
(bean/->clj item)))))))))
(doseq [batch (partition-all upsert-blocks-batch-size blocks)]
(doseq [item blocks]
(when-not (valid-upsert-block? item)
(throw-upsert-blocks-error! item)))
(.exec tx #js {:sql (upsert-blocks-sql (count batch))
:bind (upsert-bind-params batch)})))))
(defn delete-blocks!
[db ids]
@@ -448,20 +470,18 @@ DROP TRIGGER IF EXISTS blocks_au;
"Build a block title indice from scratch.
Incremental page title indice is implemented in frontend.search.sync-search-indice!"
[repo db]
(prn :debug :build-fuzzy-search-indice :graph repo)
(time
(let [blocks (->> (get-all-fuzzy-supported-blocks db)
(map block->index)
(bean/->js))
indice (fuse. blocks
(clj->js {:keys ["title"]
:shouldSort true
:tokenize true
:distance 1024
:threshold 0.5 ;; search for 50% match from the start
:minMatchCharLength 1}))]
(swap! fuzzy-search-indices assoc repo indice)
indice)))
(let [blocks (->> (get-all-fuzzy-supported-blocks db)
(map block->index)
(bean/->js))
indice (fuse. blocks
(clj->js {:keys ["title"]
:shouldSort true
:tokenize true
:distance 1024
:threshold 0.5 ;; search for 50% match from the start
:minMatchCharLength 1}))]
(swap! fuzzy-search-indices assoc repo indice)
indice))
(defn fuzzy-search
"Return a list of blocks (pages && tagged blocks) that match the query. Takes the following

View File

@@ -224,3 +224,38 @@
(is (some? ctor))
(is (= 1 (count result)))
(is (= "alpha beta" (get-in result [0 :item :title])))))))
(deftest upsert-blocks-batches-rows-into-single-sql-statement
(let [calls (atom [])
tx #js {:exec (fn [opts]
(swap! calls conj {:sql (aget opts "sql")
:bind (js->clj (aget opts "bind"))}))}
db #js {:transaction (fn [f] (f tx))}
blocks (clj->js [{:id "67e55044-10b1-426f-9247-bb680e5fe0c8"
:title "alpha"
:page "67e55044-10b1-426f-9247-bb680e5fe0c8"}
{:id "8f14e45f-ea6e-4be8-b53f-bf0f2ca8a5db"
:title "beta"
:page "8f14e45f-ea6e-4be8-b53f-bf0f2ca8a5db"}
{:id "9d5ed678-fe57-4bcf-bf4d-6f2fd5f8995d"
:title "gamma"
:page "9d5ed678-fe57-4bcf-bf4d-6f2fd5f8995d"}])]
(search/upsert-blocks! db blocks)
(is (= 1 (count @calls)))
(is (= "INSERT INTO blocks (id, title, page) VALUES (?, ?, ?), (?, ?, ?), (?, ?, ?) ON CONFLICT (id) DO UPDATE SET (title, page) = (excluded.title, excluded.page)"
(:sql (first @calls))))
(is (= ["67e55044-10b1-426f-9247-bb680e5fe0c8" "alpha" "67e55044-10b1-426f-9247-bb680e5fe0c8"
"8f14e45f-ea6e-4be8-b53f-bf0f2ca8a5db" "beta" "8f14e45f-ea6e-4be8-b53f-bf0f2ca8a5db"
"9d5ed678-fe57-4bcf-bf4d-6f2fd5f8995d" "gamma" "9d5ed678-fe57-4bcf-bf4d-6f2fd5f8995d"]
(:bind (first @calls))))))
(deftest upsert-blocks-throws-on-invalid-input
(let [tx #js {:exec (fn [_opts] nil)}
db #js {:transaction (fn [f] (f tx))}
error (try
(search/upsert-blocks! db (clj->js [{:id "not-uuid" :title "alpha" :page "not-uuid"}]))
nil
(catch :default e e))]
(is (some? error))
(is (re-find #"Search upsert-blocks wrong data"
(or (ex-message error) (str error))))))