mirror of
https://github.com/logseq/logseq.git
synced 2026-05-20 10:52:38 +00:00
adjust weights for keyword search and semantic search
This commit is contained in:
@@ -1,8 +1,8 @@
|
||||
(ns frontend.common.search-fuzzy
|
||||
"fuzzy search. Used by frontend and worker namespaces"
|
||||
(:require [clojure.string :as string]
|
||||
(:require ["remove-accents" :as removeAccents]
|
||||
[cljs-bean.core :as bean]
|
||||
["remove-accents" :as removeAccents]))
|
||||
[clojure.string :as string]))
|
||||
|
||||
(def MAX-STRING-LENGTH 1000.0)
|
||||
|
||||
@@ -28,10 +28,19 @@
|
||||
(/ (- maxed mined)
|
||||
maxed)))))
|
||||
|
||||
(defn search-normalize
|
||||
"Normalize string for searching (loose)"
|
||||
[s remove-accents?]
|
||||
(when s
|
||||
(let [normalize-str (.normalize (string/lower-case s) "NFKC")]
|
||||
(if remove-accents?
|
||||
(removeAccents normalize-str)
|
||||
normalize-str))))
|
||||
|
||||
(defn score
|
||||
[oquery ostr]
|
||||
(let [query (clean-str oquery)
|
||||
original-s (clean-str ostr)]
|
||||
(let [query (-> (clean-str oquery) (search-normalize true))
|
||||
original-s (-> (clean-str ostr) (search-normalize true))]
|
||||
(loop [q (seq (char-array query))
|
||||
s (seq (char-array original-s))
|
||||
mult 1
|
||||
@@ -68,24 +77,14 @@
|
||||
(dec idx)
|
||||
(- score' 0.1)))))))
|
||||
|
||||
(defn search-normalize
|
||||
"Normalize string for searching (loose)"
|
||||
[s remove-accents?]
|
||||
(when s
|
||||
(let [normalize-str (.normalize (string/lower-case s) "NFKC")]
|
||||
(if remove-accents?
|
||||
(removeAccents normalize-str)
|
||||
normalize-str))))
|
||||
|
||||
(defn fuzzy-search
|
||||
[data query & {:keys [limit extract-fn]
|
||||
:or {limit 20}}]
|
||||
(let [query (search-normalize query true)]
|
||||
(->> (take limit
|
||||
(sort-by :score (comp - compare)
|
||||
(filter #(< 0 (:score %))
|
||||
(for [item data]
|
||||
(let [s (str (if extract-fn (extract-fn item) item))]
|
||||
{:data item
|
||||
:score (score query (search-normalize s true))})))))
|
||||
(map :data))))
|
||||
(->> (take limit
|
||||
(sort-by :score (comp - compare)
|
||||
(filter #(< 0 (:score %))
|
||||
(for [item data]
|
||||
(let [s (str (if extract-fn (extract-fn item) item))]
|
||||
{:data item
|
||||
:score (score query s)})))))
|
||||
(map :data)))
|
||||
|
||||
@@ -257,10 +257,11 @@
|
||||
(js->clj (c.m/<? (.search infer-worker repo query-string nums-neighbors)) :keywordize-keys true))]
|
||||
(->> (map vector distances neighbors)
|
||||
(keep (fn [[distance label]]
|
||||
(when-not (or (js/isNaN distance) (> distance 0.5))
|
||||
(when-not (or (js/isNaN distance) (> distance 0.35))
|
||||
(when-let [block (d/entity @conn label)]
|
||||
{:block block
|
||||
:distance distance})))))))))))
|
||||
(when (:block/title block)
|
||||
{:block block
|
||||
:distance distance}))))))))))))
|
||||
|
||||
(def ^:private vector-search-state-flow
|
||||
(m/eduction
|
||||
|
||||
@@ -25,14 +25,21 @@
|
||||
|
||||
;; Configuration for re-ranking
|
||||
(def config
|
||||
{:keyword-weight 0.4
|
||||
:semantic-weight 0.6})
|
||||
{:keyword-weight 0.9
|
||||
:semantic-weight 0.1})
|
||||
|
||||
(defn- log-score
|
||||
[score]
|
||||
(if (> score 2)
|
||||
(js/Math.log score)
|
||||
score))
|
||||
|
||||
;; Normalize scores to [0, 1] range using min-max normalization
|
||||
(defn normalize-score [score min-score max-score]
|
||||
(if (= min-score max-score)
|
||||
0.0
|
||||
(let [normalized (/ (- score min-score) (- max-score min-score))]
|
||||
(let [normalized (/ (log-score (- score min-score))
|
||||
(log-score (- max-score min-score)))]
|
||||
(max 0.0 (min 1.0 normalized)))))
|
||||
|
||||
(defn- add-blocks-fts-triggers!
|
||||
@@ -184,15 +191,16 @@ DROP TRIGGER IF EXISTS blocks_au;
|
||||
:bind bind
|
||||
:rowMode "array"}))
|
||||
blocks (bean/->clj result)]
|
||||
(map (fn [block]
|
||||
(let [[id page title rank snippet] (if enable-snippet?
|
||||
(update block 4 get-snippet-result)
|
||||
block)]
|
||||
{:id id
|
||||
:keyword-score (when rank (Math/abs rank))
|
||||
:page page
|
||||
:title title
|
||||
:snippet snippet})) blocks))
|
||||
(keep (fn [block]
|
||||
(let [[id page title rank snippet] (if enable-snippet?
|
||||
(update block 4 get-snippet-result)
|
||||
block)]
|
||||
(when title
|
||||
{:id id
|
||||
:keyword-score (+ (fuzzy/score q title) (js/Math.abs rank))
|
||||
:page page
|
||||
:title title
|
||||
:snippet snippet}))) blocks))
|
||||
(catch :default e
|
||||
(prn :debug "Search blocks failed: ")
|
||||
(js/console.error e))))
|
||||
@@ -320,9 +328,9 @@ DROP TRIGGER IF EXISTS blocks_au;
|
||||
(* (:semantic-weight config) s-score)
|
||||
(cond
|
||||
(ldb/page? block)
|
||||
1
|
||||
0.001
|
||||
(:block/tags block)
|
||||
0.02
|
||||
0.0005
|
||||
:else
|
||||
0))]
|
||||
(merge result
|
||||
@@ -368,7 +376,10 @@ DROP TRIGGER IF EXISTS blocks_au;
|
||||
non-match-result (when (and (not page-only?) non-match-input)
|
||||
(search-blocks-aux search-db non-match-sql q non-match-input page limit enable-snippet?))
|
||||
;; fuzzy is too slow for large graphs
|
||||
fuzzy-result (when-not (or page large-graph?) (fuzzy-search repo @conn q option))
|
||||
fuzzy-result (when-not (or page large-graph?)
|
||||
(->> (fuzzy-search repo @conn q option)
|
||||
(map (fn [result]
|
||||
(assoc result :keyword-score (fuzzy/score q (:title result)))))))
|
||||
semantic-search-result* (m/? (embedding/task--search repo q 10))
|
||||
semantic-search-result (->> semantic-search-result*
|
||||
(map (fn [{:keys [block distance]}]
|
||||
|
||||
Reference in New Issue
Block a user