feat: text-encode multiple embeddings

This commit is contained in:
Junyi Du
2023-08-03 16:59:19 +08:00
parent 4691f94e36
commit 6253b54ec7
5 changed files with 61 additions and 31 deletions

View File

@@ -331,7 +331,7 @@ export interface IPluginTextEncoderServiceHooks {
name: string
options?: Record<string, any>
textEncode: (text: string) => Promise<Float32Array>
textEncode: (text: string) => Promise<Float32Array[]>
}
/**

View File

@@ -95,7 +95,7 @@
"@isomorphic-git/lightning-fs": "^4.6.0",
"@logseq/capacitor-file-sync": "0.0.35",
"@logseq/diff-merge": "0.2.2",
"@logseq/logmind": "^0.1.2",
"@logseq/logmind": "^0.1.5",
"@logseq/react-tweet-embed": "1.3.1-1",
"@radix-ui/colors": "^0.1.8",
"@sentry/react": "^6.18.2",

View File

@@ -42,12 +42,27 @@
([store embed key data]
(.add store embed key data)))
;; (defn addmany
;; "Add multiple records to the vector store
;; - store: store handler (conn)
;; - embed: the vector to be added
;; - key: identifier for the record
;; - data: attached metadata for the record (notice: IPC required, so don't send big objects)
;; Returns a promise of the vector store addition
;; or throw an error if the store doesn't exist"
;; ([store embeds key]
;; (.addmany store embeds key))
;; ([store embeds key data]
;; (.addmany store embeds key data)))
(defn rm
"Remove a record from the vector store
- store: store handler (conn)
- key: identifier for the record
Returns a promise of the vector store removal
true for success, false for failure
or throw an error if the store doesn't exist"
[store key]
(.remove store key))

View File

@@ -1,10 +1,11 @@
(ns frontend.search.semantic
"Browser implementation of search protocol"
(:require ["@logseq/logmind" :refer [taskQueue]]
[cljs-bean.core :as bean]
[promesa.core :as p]
[frontend.search.protocol :as protocol]
[frontend.ai.vector-store :as vector-store]
[frontend.ai.text-encoder :as text-encoder]
[promesa.core :as p]
[frontend.state :as state]
[logseq.graph-parser.util :as gp-util]))
@@ -27,12 +28,10 @@
nil)
(transact-blocks! [_this {:keys [blocks-to-remove-set
blocks-to-add]
:as data}]
;; Step 1: encoding all sentences
;; Step 2: inference vec length
;; Step 3: create vector store (optional)
;; Setp 4: add to vec store
blocks-to-add]}]
;; Step 1: create vector store handler
;; Step 2: deal with blocks-to-remove-set
;; Step 3: deal with blocks-to-add
;; {:blocks-to-remove-set #{16634}, :blocks-to-add ({:id 16634, :uuid "647dcfc7-2aba-4015-8b71-cdf73c552761", :page 12, :content "adding me 2"})}
;; Handling blocks to add
(let [encoder (state/get-semsearch-encoder)
@@ -41,28 +40,44 @@
store-conn (if encoder-dim
(vector-store/create (idstr-template-string repo) encoder-dim)
(throw (js/Error. (str "record modelDim is not found in options of registrated encoder " encoder-name))))
addtask-fn (fn [block] (.addTask taskQueue (:uuid block)
(fn [] ;; Promise factory
;; TODO Junyi: Block Chunker
(p/let [data {:snippet (gp-util/safe-subs (:content block) 0 20)
:page (:page block)
:id (:id block)}
embed (text-encoder/text-encode (:content block) encoder-name)]
(vector-store/add store-conn embed (:uuid block) data)))))]
(mapv addtask-fn blocks-to-add)))
eid-del->vs (fn [eid]
;; Would replace existing promise in queue (if any)
;; If the promise is already in pending state,
;; there's a race condition that the promise executed
;; before the pending promise is resolved
(let [del->vs (fn [] ;; Promise factory
(vector-store/rm store-conn (str eid)))]
(.addTask taskQueue (str eid) del->vs)))
block-add->vs (fn [block]
;; Would replace the task if there is already a task with the same id in the queue
;; Here we use stringified id as key to keep consistency with the logMind type annotation
(let [add->vs (fn []
(p/let [metadata {:snippet (gp-util/safe-subs (:content block) 0 20)
:page (:page block)
:id (:id block)
:uuid (:uuid block)}
embeds (text-encoder/text-encode (:content block) encoder-name)
_ (vector-store/rm store-conn (str (:id block)))
emb-add->vs (fn [embed]
(vector-store/add store-conn embed (str (:id block)) (bean/->js metadata)))]
(p/all (mapv emb-add->vs embeds))))]
(.addTask taskQueue (str (:id block)) add->vs)))]
;; Delete first, then add
(mapv eid-del->vs blocks-to-remove-set)
(mapv block-add->vs blocks-to-add)))
(transact-pages! [_this data]
(vector-store/create "test" 128)
(transact-pages! [_this data]
(prn "semantic: transact-pages!") ;; TODO Junyi
(prn data))
(truncate-blocks! [_this]
(-> repo
(idstr-template-string)
(vector-store/reset)))
(-> repo
(idstr-template-string)
(vector-store/reset))
(.clean taskQueue))
(remove-db! [_this]
(-> repo
(idstr-template-string)
(vector-store/reset))))
(-> repo
(idstr-template-string)
(vector-store/reset))
(.clean taskQueue)))

View File

@@ -519,10 +519,10 @@
resolved "https://registry.yarnpkg.com/@logseq/diff-merge/-/diff-merge-0.2.2.tgz#583bd8c8c66d5ff05ea70906475efaa078e839a3"
integrity sha512-0WeKNhq8PsjvunOqNEd9aSM4tgiClwhonXgXzrQ4KYj8VoyLaEAyEWWGOAoE7mwR+aqwM+bMB4MxuNFywnUb8A==
"@logseq/logmind@^0.1.2":
version "0.1.2"
resolved "https://registry.yarnpkg.com/@logseq/logmind/-/logmind-0.1.2.tgz#026eed5cc225f5df1b7d2cc63f665d46f7209c3a"
integrity sha512-JIoWslOW2T94YRVCk8HwwBGRZUD1kQks1v+00MHLwRBni/9nw/BjeSuEmOPhYb2WLBELRmwprqtddyQM2Kvqkw==
"@logseq/logmind@^0.1.5":
version "0.1.5"
resolved "https://registry.yarnpkg.com/@logseq/logmind/-/logmind-0.1.5.tgz#e2c4b84df938942972553be8f35242da4d15c40d"
integrity sha512-ZcQmnVwpIisvtdyqO6GaEAfwbqOJbSX/FroyTBsPZcvY/T7It5VyCpNKXVSvdaC8NdhCi+xEkvX5woniUin1KA==
dependencies:
"@xenova/transformers" "^2.3.0"
compromise "^14.8.0"