enhance(embedding): add user-defined class/prop

This commit is contained in:
rcmerci
2025-03-15 23:48:26 +08:00
parent 0c82a420c7
commit b19f0d91ca

View File

@@ -9,13 +9,19 @@
[logseq.db :as ldb] [logseq.db :as ldb]
[missionary.core :as m])) [missionary.core :as m]))
;;; TODOs:
;;; - [x] add :logseq.property/description into text-to-embedding
;;; - add tags to text-to-embedding
(defn- stale-block-filter-preds (defn- stale-block-filter-preds
"When `reset?`, ignore :logseq.property.embedding/hnsw-label-updated-at in block" "When `reset?`, ignore :logseq.property.embedding/hnsw-label-updated-at in block"
[reset?] [reset?]
(let [preds (cond-> [(comp nil? :db/ident) (let [preds (cond-> [(fn [b]
(fn [b] (let [db-ident (:db/ident b)
(let [title (:block/title b)] title (:block/title b)]
(and (not (string/blank? title)) (and (or (nil? db-ident)
(not (string/starts-with? (namespace db-ident) "logseq.")))
(not (string/blank? title))
(not (ldb/hidden? title)) (not (ldb/hidden? title))
(nil? (:logseq.property/view-for b)) (nil? (:logseq.property/view-for b))
(not (keyword-identical? (not (keyword-identical?
@@ -34,7 +40,8 @@
[db reset?] [db reset?]
(->> (rseq (d/index-range db :block/updated-at nil nil)) (->> (rseq (d/index-range db :block/updated-at nil nil))
(sequence (sequence
(comp (map #(d/entity db (:e %))) ;; NOTE: assoc :block.temp/search?, so uuid in :block/title will be replaced by content
(comp (map #(assoc (d/entity db (:e %)) :block.temp/search? true))
(filter (stale-block-filter-preds reset?)) (filter (stale-block-filter-preds reset?))
(map (fn [b] (map (fn [b]
(assoc b :block.temp/text-to-embedding (assoc b :block.temp/text-to-embedding
@@ -124,7 +131,7 @@
(let [^js infer-worker @worker-state/*infer-worker] (let [^js infer-worker @worker-state/*infer-worker]
(assert (some? infer-worker)) (assert (some? infer-worker))
(let [{:keys [distances neighbors] :as r} (let [{:keys [distances neighbors] :as r}
(worker-util/profile :search (worker-util/profile (str "search: '" query-string "'")
(js->clj (c.m/<? (.search infer-worker repo query-string nums-neighbors)) :keywordize-keys true)) (js->clj (c.m/<? (.search infer-worker repo query-string nums-neighbors)) :keywordize-keys true))
labels (->> (map vector distances neighbors) labels (->> (map vector distances neighbors)
(keep (fn [[distance label]] (when-not (js/isNaN distance) label)))) (keep (fn [[distance label]] (when-not (js/isNaN distance) label))))
@@ -134,10 +141,17 @@
(sort-by :tx >))) labels) (sort-by :tx >))) labels)
result-es (keep (comp :e first) datoms) result-es (keep (comp :e first) datoms)
es-with-outdated-hnsw-label (map :e (mapcat next datoms)) es-with-outdated-hnsw-label (map :e (mapcat next datoms))
blocks (map #(select-keys (d/entity @conn %) [:db/id :block/title]) result-es)] blocks (map #(select-keys (assoc (d/entity @conn %) :block.temp/search? true)
[:db/id :block/title :logseq.property.embedding/hnsw-label]) result-es)]
(remove-outdated-hnsw-label! conn es-with-outdated-hnsw-label) (remove-outdated-hnsw-label! conn es-with-outdated-hnsw-label)
(prn :query-result r) (prn :query-result r)
(pp/pprint blocks))))) (pp/print-table ["id" "hnsw-label" "title"] (map #(-> %
(update-keys name)
(update-vals (fn [v]
(if (and (string? v) (> (count v) 60))
(str (subs v 0 60) "[TRUNCATED]")
v))))
blocks))))))
(comment (comment
(def repo (frontend.worker.state/get-current-repo)) (def repo (frontend.worker.state/get-current-repo))
@@ -146,4 +160,4 @@
((<embedding-stale-blocks! repo conn) prn js/console.log) ((<embedding-stale-blocks! repo conn) prn js/console.log)
((<re-embedding-graph-data! repo conn) prn js/console.log) ((<re-embedding-graph-data! repo conn) prn js/console.log)
((<search repo conn "note zhiyuan" 10) prn js/console.log)) ((<search repo conn "perf performance datomic stat" 10) prn js/console.log))