From a973c4fb8522076d63da9a44d35e01cfc01616c0 Mon Sep 17 00:00:00 2001 From: Tienson Qin Date: Wed, 6 May 2026 21:33:52 +0800 Subject: [PATCH] fix: speed up large graph search --- src/main/frontend/worker/search.cljs | 114 +++++++++++++++++----- src/test/frontend/worker/search_test.cljs | 104 ++++++++++++++++++-- 2 files changed, 182 insertions(+), 36 deletions(-) diff --git a/src/main/frontend/worker/search.cljs b/src/main/frontend/worker/search.cljs index 37d8eeeecf..2c0f19fac7 100644 --- a/src/main/frontend/worker/search.cljs +++ b/src/main/frontend/worker/search.cljs @@ -50,12 +50,17 @@ ;; Check https://www.sqlite.org/fts5.html#the_experimental_trigram_tokenizer. (.exec db "CREATE VIRTUAL TABLE IF NOT EXISTS blocks_fts USING fts5(id, title, page, tokenize=\"trigram\")")) +(defn- create-blocks-title-index! + [db] + (.exec db "CREATE INDEX IF NOT EXISTS blocks_title_nocase_idx ON blocks(title COLLATE NOCASE)")) + (defn create-tables-and-triggers! "Open a SQLite db for search index" [db] (try (create-blocks-table! db) (create-blocks-fts-table! db) + (create-blocks-title-index! db) (add-blocks-fts-triggers! db) (catch :default e (prn "Failed to create tables and triggers") @@ -404,6 +409,50 @@ DROP TRIGGER IF EXISTS blocks_au; [q] (str "%" (string/join "%" (map like-escape-char q)) "%")) +(defn- exec-search-blocks-fuzzy + [db sql bind] + (-> (.exec db (bean/->js + {:sql sql + :bind bind + :rowMode "array"})) + bean/->clj)) + +(defn- fuzzy-block-rows->results + [q blocks] + (->> blocks + (keep (fn [[id page title]] + (when title + (let [keyword-score (fuzzy/score q title)] + (when (pos? keyword-score) + {:id id + :keyword-score keyword-score + :page page + :title title}))))) + (sort-by (juxt (fn [{:keys [id page]}] + (not= id page)) + (comp - :keyword-score))))) + +(defn- multi-term-query? + [q] + (boolean (re-find #"\S\s+\S" q))) + +(defn- exact-title-query? + [q] + (not (re-find #"\s" q))) + +(defn- search-blocks-exact-title-aux + [db q page limit] + (try + (let [sql (str "select id, page, title from blocks where " + (if page "page = ? and " "") + "title = ? COLLATE NOCASE limit ?") + bind (if page [page q limit] [q limit]) + blocks (exec-search-blocks-fuzzy db sql bind)] + (fuzzy-block-rows->results q blocks)) + (catch :default e + (prn :debug "Exact title search blocks failed: ") + (js/console.error e)))) + (defn- search-blocks-fuzzy-aux [db q page limit] (let [q (some-> q @@ -413,28 +462,30 @@ DROP TRIGGER IF EXISTS blocks_au; (when-not (string/blank? q) (try (let [candidate-limit (fuzzy-candidate-limit limit) - sql (str "select id, page, title from blocks where " - (if page "page = ? and " "") - "lower(title) like ? escape '\\' " - "order by id = page desc limit ?") - bind (if page - [page (fuzzy-like-pattern q) candidate-limit] - [(fuzzy-like-pattern q) candidate-limit]) - result (.exec db (bean/->js - {:sql sql - :bind bind - :rowMode "array"})) - blocks (bean/->clj result)] - (->> blocks - (keep (fn [[id page title]] - (when title - (let [keyword-score (fuzzy/score q title)] - (when (pos? keyword-score) - {:id id - :keyword-score keyword-score - :page page - :title title}))))) - (sort-by :keyword-score #(compare %2 %1)))) + pattern (fuzzy-like-pattern q) + blocks (if page + (exec-search-blocks-fuzzy + db + "select id, page, title from blocks where page = ? and lower(title) like ? escape '\\' limit ?" + [page pattern candidate-limit]) + (let [page-blocks (exec-search-blocks-fuzzy + db + "select id, page, title from blocks where id = page and lower(title) like ? escape '\\' limit ?" + [pattern candidate-limit]) + page-ids (set (map first page-blocks)) + remaining (- candidate-limit (count page-blocks))] + (if (pos? remaining) + (let [block-candidates (exec-search-blocks-fuzzy + db + "select id, page, title from blocks where lower(title) like ? escape '\\' limit ?" + [pattern (+ remaining (count page-blocks))]) + block-candidates (->> block-candidates + (remove (fn [[id]] + (contains? page-ids id))) + (take remaining))] + (concat page-blocks block-candidates)) + page-blocks)))] + (fuzzy-block-rows->results q blocks)) (catch :default e (prn :debug "Fuzzy search blocks failed: ") (js/console.error e)))))) @@ -640,24 +691,33 @@ DROP TRIGGER IF EXISTS blocks_au; (str "%" (string/replace q #"\s+" "%") "%")) limit (or limit 100) limit-p (or search-limit limit) + exact-title-result (when (and (not page-only?) + (exact-title-query? q)) + (search-blocks-exact-title-aux search-db q page limit-p)) + enough-exact-title-results? (>= (count exact-title-result) limit-p) ;; don't use sqlite snippet function anymore, all snippets will be handled by ensure-highlighted-snippet select "select id, page, title, rank from blocks_fts where " pg-sql (if page "page = ? and" "") match-sql (if (ns-util/namespace-page? q) - (str select pg-sql " title match ? or title match ? order by rank limit ?") - (str select pg-sql " title match ? order by rank limit ?")) + (str select pg-sql " title match ? or title match ? limit ?") + (str select pg-sql " title match ? limit ?")) non-match-sql (str select pg-sql " title like ? limit ?") - matched-result (when-not page-only? + matched-result (when (and (not page-only?) + (not enough-exact-title-results?)) (search-blocks-aux search-db match-sql q match-input page limit-p (ns-util/namespace-page? q))) non-match-result (when (and (not page-only?) non-match-input) (->> (search-blocks-aux search-db non-match-sql q non-match-input page limit-p) (map (fn [result] (assoc result :keyword-score (fuzzy/score q (:title result))))))) - fuzzy-result (search-blocks-fuzzy-aux search-db q page limit) + skip-fuzzy? (or enough-exact-title-results? + (and (multi-term-query? q) + (seq matched-result))) + fuzzy-result (when-not skip-fuzzy? + (search-blocks-fuzzy-aux search-db q page limit)) ;; _ (prn :debug "Search results before combine:" enable-snippet? (map :snippet matched-result)) ;; _ (doseq [item (concat fuzzy-result matched-result)] ;; (prn :debug :keyword-search-result item)) - combined-result (combine-results @conn (concat fuzzy-result matched-result non-match-result)) + combined-result (combine-results @conn (concat exact-title-result fuzzy-result matched-result non-match-result)) code-class (when code-only? (d/entity @conn :logseq.class/Code-block)) matched-count (when include-matched-count? diff --git a/src/test/frontend/worker/search_test.cljs b/src/test/frontend/worker/search_test.cljs index 1cd05eedc5..bc18dd586d 100644 --- a/src/test/frontend/worker/search_test.cljs +++ b/src/test/frontend/worker/search_test.cljs @@ -192,7 +192,7 @@ (deftest search-blocks-aux-bind-count (testing "namespace match SQL keeps bind count aligned" - (let [sql "select id, page, title, rank from blocks_fts where title match ? or title match ? order by rank limit ?" + (let [sql "select id, page, title, rank from blocks_fts where title match ? or title match ? limit ?" result (#'search/search-blocks-aux (checking-db) sql "a/b" "a/b" nil 10 true)] (is (some? result)) (is (empty? result)))) @@ -211,8 +211,18 @@ (deftest search-blocks-large-graph-benchmark-regression (testing "cmd-k and autocomplete queries must not scan the full Datascript graph while typing" - (let [db (checking-db)] - (is (empty? (search/search-blocks (atom :large-db) db "alpha" {:limit 10})))))) + (let [calls (atom []) + db #js {:exec (fn [opts] + (let [sql (aget opts "sql") + bind (aget opts "bind") + expected (sql-placeholder-count sql) + actual (count bind)] + (swap! calls conj sql) + (when-not (= expected actual) + (throw (js/Error. (str "Bind index " (inc expected) " is out of range.")))) + #js []))}] + (is (empty? (search/search-blocks (atom :large-db) db "alpha" {:limit 10}))) + (is (not-any? #(string/includes? % "order by rank") @calls))))) (deftest search-blocks-fuzzy-matches-from-search-db (testing "subsequence fuzzy matching comes from the search db without an in-memory index" @@ -240,17 +250,22 @@ (is (some #(= ["%n%w%p%" 40] (:bind %)) @calls)))))) (deftest search-blocks-fuzzy-prioritizes-page-candidates - (testing "large graphs keep page rows in the bounded fuzzy candidate window" + (testing "large graphs keep page rows first without sorting the whole blocks table" (let [page-id "67e55044-10b1-426f-9247-bb680e5fe0c8" block-id "8f14e45f-ea6e-4be8-b53f-bf0f2ca8a5db" calls (atom []) db #js {:exec (fn [opts] (let [sql (aget opts "sql")] (swap! calls conj sql) - (if (and (string/includes? sql "order by id = page desc") - (string/includes? sql "lower(title) like ?")) - (clj->js [[page-id page-id "New Project"] - [block-id page-id "New Project task"]]) + (cond + (string/includes? sql "title = ? COLLATE NOCASE") + #js [] + + (and (string/includes? sql "id = page") + (string/includes? sql "lower(title) like ?")) + (clj->js [[page-id page-id "New Project"]]) + + :else (clj->js [[block-id page-id "New Project task"]]))))}] (with-redefs [search/combine-results (fn [_db results] results) search/search-result->block-result @@ -258,7 +273,40 @@ result)] (let [result (vec (search/search-blocks (atom :large-db) db "nwp" {:limit 10}))] (is (some #(= page-id (:id %)) result)) - (is (some #(string/includes? % "order by id = page desc") @calls))))))) + (is (= page-id (:id (first result)))) + (is (some #(string/includes? % "id = page") @calls)) + (is (not-any? #(string/includes? % "order by id = page desc") @calls))))))) + +(deftest search-blocks-skips-fuzzy-for-multi-term-keyword-hits + (testing "exact multi-term FTS hits do not pay a broad fuzzy LIKE scan" + (let [page-id "29089538-74f7-44b6-954b-494ca9e82182" + calls (atom []) + db #js {:exec (fn [opts] + (let [sql (aget opts "sql")] + (swap! calls conj sql) + (cond + (string/includes? sql "lower(title) like ?") + (throw (js/Error. "fuzzy LIKE should not run")) + + (string/includes? sql "title match ?") + (clj->js [[page-id page-id "Page-10000" -16 nil]]) + + :else + #js [])))}] + (with-redefs [search/combine-results (fn [_db results] results) + search/search-result->block-result + (fn [_conn _q _code-class _option result] + result)] + (let [result (vec (search/search-blocks (atom :large-db) + db + "page 10000" + {:limit 10}))] + (is (= [{:id page-id + :page page-id + :title "Page-10000"}] + (mapv #(select-keys % [:id :page :title]) result))) + (is (some #(string/includes? % "title match ?") @calls)) + (is (not-any? #(string/includes? % "lower(title) like ?") @calls))))))) (defn- test-uuid-string [n] @@ -266,6 +314,44 @@ (str "00000000-0000-0000-0000-" (subs (str "000000000000" hex) (count hex))))) +(deftest search-blocks-skips-fts-for-enough-exact-title-hits + (testing "very common exact titles avoid expensive FTS scans" + (let [calls (atom []) + exact-rows (mapv (fn [n] + [(test-uuid-string n) + (test-uuid-string 999) + "Block"]) + (range 100 200)) + db #js {:exec (fn [opts] + (let [sql (aget opts "sql")] + (swap! calls conj sql) + (cond + (string/includes? sql "title = ? COLLATE NOCASE") + (clj->js exact-rows) + + (string/includes? sql "title match ?") + (throw (js/Error. "FTS should not run")) + + (string/includes? sql "lower(title) like ?") + (throw (js/Error. "fuzzy LIKE should not run")) + + :else + #js [])))}] + (with-redefs [search/combine-results (fn [_db results] results) + search/search-result->block-result + (fn [_conn _q _code-class _option result] + result)] + (let [result (vec (search/search-blocks (atom :large-db) + db + "block" + {:limit 10 + :search-limit 100}))] + (is (= 10 (count result))) + (is (every? #(= "Block" (:title %)) result)) + (is (some #(string/includes? % "title = ? COLLATE NOCASE") @calls)) + (is (not-any? #(string/includes? % "title match ?") @calls)) + (is (not-any? #(string/includes? % "lower(title) like ?") @calls))))))) + (deftest combine-results-large-result-benchmark (testing "large search result sets combine without quadratic scans and keep page boost ranking" (let [ids (mapv test-uuid-string (range 1 2501))