mirror of
https://github.com/logseq/logseq.git
synced 2026-04-24 22:25:01 +00:00
feat: text-encode multiple embeddings
This commit is contained in:
@@ -331,7 +331,7 @@ export interface IPluginTextEncoderServiceHooks {
|
||||
name: string
|
||||
options?: Record<string, any>
|
||||
|
||||
textEncode: (text: string) => Promise<Float32Array>
|
||||
textEncode: (text: string) => Promise<Float32Array[]>
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -95,7 +95,7 @@
|
||||
"@isomorphic-git/lightning-fs": "^4.6.0",
|
||||
"@logseq/capacitor-file-sync": "0.0.35",
|
||||
"@logseq/diff-merge": "0.2.2",
|
||||
"@logseq/logmind": "^0.1.2",
|
||||
"@logseq/logmind": "^0.1.5",
|
||||
"@logseq/react-tweet-embed": "1.3.1-1",
|
||||
"@radix-ui/colors": "^0.1.8",
|
||||
"@sentry/react": "^6.18.2",
|
||||
|
||||
@@ -42,12 +42,27 @@
|
||||
([store embed key data]
|
||||
(.add store embed key data)))
|
||||
|
||||
;; (defn addmany
|
||||
;; "Add multiple records to the vector store
|
||||
;; - store: store handler (conn)
|
||||
;; - embed: the vector to be added
|
||||
;; - key: identifier for the record
|
||||
;; - data: attached metadata for the record (notice: IPC required, so don't send big objects)
|
||||
|
||||
;; Returns a promise of the vector store addition
|
||||
;; or throw an error if the store doesn't exist"
|
||||
;; ([store embeds key]
|
||||
;; (.addmany store embeds key))
|
||||
;; ([store embeds key data]
|
||||
;; (.addmany store embeds key data)))
|
||||
|
||||
(defn rm
|
||||
"Remove a record from the vector store
|
||||
- store: store handler (conn)
|
||||
- key: identifier for the record
|
||||
|
||||
Returns a promise of the vector store removal
|
||||
true for success, false for failure
|
||||
or throw an error if the store doesn't exist"
|
||||
[store key]
|
||||
(.remove store key))
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
(ns frontend.search.semantic
|
||||
"Browser implementation of search protocol"
|
||||
(:require ["@logseq/logmind" :refer [taskQueue]]
|
||||
[cljs-bean.core :as bean]
|
||||
[promesa.core :as p]
|
||||
[frontend.search.protocol :as protocol]
|
||||
[frontend.ai.vector-store :as vector-store]
|
||||
[frontend.ai.text-encoder :as text-encoder]
|
||||
[promesa.core :as p]
|
||||
[frontend.state :as state]
|
||||
[logseq.graph-parser.util :as gp-util]))
|
||||
|
||||
@@ -27,12 +28,10 @@
|
||||
nil)
|
||||
|
||||
(transact-blocks! [_this {:keys [blocks-to-remove-set
|
||||
blocks-to-add]
|
||||
:as data}]
|
||||
;; Step 1: encoding all sentences
|
||||
;; Step 2: inference vec length
|
||||
;; Step 3: create vector store (optional)
|
||||
;; Setp 4: add to vec store
|
||||
blocks-to-add]}]
|
||||
;; Step 1: create vector store handler
|
||||
;; Step 2: deal with blocks-to-remove-set
|
||||
;; Step 3: deal with blocks-to-add
|
||||
;; {:blocks-to-remove-set #{16634}, :blocks-to-add ({:id 16634, :uuid "647dcfc7-2aba-4015-8b71-cdf73c552761", :page 12, :content "adding me 2"})}
|
||||
;; Handling blocks to add
|
||||
(let [encoder (state/get-semsearch-encoder)
|
||||
@@ -41,28 +40,44 @@
|
||||
store-conn (if encoder-dim
|
||||
(vector-store/create (idstr-template-string repo) encoder-dim)
|
||||
(throw (js/Error. (str "record modelDim is not found in options of registrated encoder " encoder-name))))
|
||||
addtask-fn (fn [block] (.addTask taskQueue (:uuid block)
|
||||
(fn [] ;; Promise factory
|
||||
;; TODO Junyi: Block Chunker
|
||||
(p/let [data {:snippet (gp-util/safe-subs (:content block) 0 20)
|
||||
:page (:page block)
|
||||
:id (:id block)}
|
||||
embed (text-encoder/text-encode (:content block) encoder-name)]
|
||||
(vector-store/add store-conn embed (:uuid block) data)))))]
|
||||
(mapv addtask-fn blocks-to-add)))
|
||||
eid-del->vs (fn [eid]
|
||||
;; Would replace existing promise in queue (if any)
|
||||
;; If the promise is already in pending state,
|
||||
;; there's a race condition that the promise executed
|
||||
;; before the pending promise is resolved
|
||||
(let [del->vs (fn [] ;; Promise factory
|
||||
(vector-store/rm store-conn (str eid)))]
|
||||
(.addTask taskQueue (str eid) del->vs)))
|
||||
block-add->vs (fn [block]
|
||||
;; Would replace the task if there is already a task with the same id in the queue
|
||||
;; Here we use stringified id as key to keep consistency with the logMind type annotation
|
||||
(let [add->vs (fn []
|
||||
(p/let [metadata {:snippet (gp-util/safe-subs (:content block) 0 20)
|
||||
:page (:page block)
|
||||
:id (:id block)
|
||||
:uuid (:uuid block)}
|
||||
embeds (text-encoder/text-encode (:content block) encoder-name)
|
||||
_ (vector-store/rm store-conn (str (:id block)))
|
||||
emb-add->vs (fn [embed]
|
||||
(vector-store/add store-conn embed (str (:id block)) (bean/->js metadata)))]
|
||||
(p/all (mapv emb-add->vs embeds))))]
|
||||
(.addTask taskQueue (str (:id block)) add->vs)))]
|
||||
;; Delete first, then add
|
||||
(mapv eid-del->vs blocks-to-remove-set)
|
||||
(mapv block-add->vs blocks-to-add)))
|
||||
|
||||
(transact-pages! [_this data]
|
||||
|
||||
(vector-store/create "test" 128)
|
||||
(transact-pages! [_this data]
|
||||
(prn "semantic: transact-pages!") ;; TODO Junyi
|
||||
(prn data))
|
||||
|
||||
(truncate-blocks! [_this]
|
||||
(-> repo
|
||||
(idstr-template-string)
|
||||
(vector-store/reset)))
|
||||
(-> repo
|
||||
(idstr-template-string)
|
||||
(vector-store/reset))
|
||||
(.clean taskQueue))
|
||||
|
||||
(remove-db! [_this]
|
||||
(-> repo
|
||||
(idstr-template-string)
|
||||
(vector-store/reset))))
|
||||
(-> repo
|
||||
(idstr-template-string)
|
||||
(vector-store/reset))
|
||||
(.clean taskQueue)))
|
||||
|
||||
@@ -519,10 +519,10 @@
|
||||
resolved "https://registry.yarnpkg.com/@logseq/diff-merge/-/diff-merge-0.2.2.tgz#583bd8c8c66d5ff05ea70906475efaa078e839a3"
|
||||
integrity sha512-0WeKNhq8PsjvunOqNEd9aSM4tgiClwhonXgXzrQ4KYj8VoyLaEAyEWWGOAoE7mwR+aqwM+bMB4MxuNFywnUb8A==
|
||||
|
||||
"@logseq/logmind@^0.1.2":
|
||||
version "0.1.2"
|
||||
resolved "https://registry.yarnpkg.com/@logseq/logmind/-/logmind-0.1.2.tgz#026eed5cc225f5df1b7d2cc63f665d46f7209c3a"
|
||||
integrity sha512-JIoWslOW2T94YRVCk8HwwBGRZUD1kQks1v+00MHLwRBni/9nw/BjeSuEmOPhYb2WLBELRmwprqtddyQM2Kvqkw==
|
||||
"@logseq/logmind@^0.1.5":
|
||||
version "0.1.5"
|
||||
resolved "https://registry.yarnpkg.com/@logseq/logmind/-/logmind-0.1.5.tgz#e2c4b84df938942972553be8f35242da4d15c40d"
|
||||
integrity sha512-ZcQmnVwpIisvtdyqO6GaEAfwbqOJbSX/FroyTBsPZcvY/T7It5VyCpNKXVSvdaC8NdhCi+xEkvX5woniUin1KA==
|
||||
dependencies:
|
||||
"@xenova/transformers" "^2.3.0"
|
||||
compromise "^14.8.0"
|
||||
|
||||
Reference in New Issue
Block a user