mirror of
https://github.com/logseq/logseq.git
synced 2026-05-22 11:44:10 +00:00
switch to EAV-style incremental hashing
This commit is contained in:
155
deps/db-sync/src/logseq/db_sync/checksum.cljs
vendored
155
deps/db-sync/src/logseq/db_sync/checksum.cljs
vendored
@@ -75,18 +75,12 @@
|
||||
[db eid]
|
||||
(:block/uuid (d/entity db eid)))
|
||||
|
||||
(def ^:private checksum-ref-attrs
|
||||
[:block/parent :block/page])
|
||||
|
||||
(defn- dependent-eids
|
||||
[db eids]
|
||||
(->> eids
|
||||
(mapcat (fn [eid]
|
||||
(mapcat (fn [attr]
|
||||
(map :e (d/datoms db :avet attr eid)))
|
||||
checksum-ref-attrs)))
|
||||
(filter number?)
|
||||
distinct))
|
||||
(defn- normalize-checksum-value
|
||||
[db attr value]
|
||||
(case attr
|
||||
:block/parent (get-block-uuid db value)
|
||||
:block/page (get-block-uuid db value)
|
||||
value))
|
||||
|
||||
(defn- entity-values
|
||||
[db eid e2ee?]
|
||||
@@ -112,33 +106,66 @@
|
||||
(when-let [ent (d/entity db eid)]
|
||||
(uuid? (:block/uuid ent))))
|
||||
|
||||
(defn- entity-digest
|
||||
(defn- entity-checksum-tuples
|
||||
[db eid e2ee?]
|
||||
(when (checksum-eligible-entity? db eid)
|
||||
(let [{:keys [block/uuid block/title block/name block/parent block/page block/order]} (entity-values db eid e2ee?)]
|
||||
(cond-> [fnv-offset djb-offset]
|
||||
true (digest-string (str uuid))
|
||||
true (hash-code field-separator)
|
||||
(not e2ee?) (digest-string title)
|
||||
(not e2ee?) (hash-code field-separator)
|
||||
(not e2ee?) (digest-string name)
|
||||
(not e2ee?) (hash-code field-separator)
|
||||
true (digest-string (some-> parent :block/uuid str))
|
||||
true (hash-code field-separator)
|
||||
true (digest-string (some-> page :block/uuid str))
|
||||
true (digest-string (some-> order str))))))
|
||||
(when-let [entity-uuid (get-block-uuid db eid)]
|
||||
(let [attrs (relevant-attrs e2ee?)]
|
||||
(->> (d/datoms db :eavt eid)
|
||||
(keep (fn [{:keys [a v]}]
|
||||
(when (contains? attrs a)
|
||||
[entity-uuid
|
||||
a
|
||||
(normalize-checksum-value db a v)])))
|
||||
set))))
|
||||
|
||||
(defn- tuple-digest
|
||||
[[entity-uuid attr value]]
|
||||
(-> [fnv-offset djb-offset]
|
||||
(digest-string (str entity-uuid))
|
||||
(hash-code field-separator)
|
||||
(digest-string (str attr))
|
||||
(hash-code field-separator)
|
||||
(digest-string (some-> value str))))
|
||||
|
||||
(defn- subtract-digest
|
||||
[[sum-fnv sum-djb] [fnv djb]]
|
||||
[(sub-step sum-fnv fnv)
|
||||
(sub-step sum-djb djb)])
|
||||
|
||||
(defn- add-digest
|
||||
[[sum-fnv sum-djb] [fnv djb]]
|
||||
[(add-step sum-fnv fnv)
|
||||
(add-step sum-djb djb)])
|
||||
|
||||
(defn- db-checksum-tuples
|
||||
[db e2ee?]
|
||||
(->> (d/datoms db :avet :block/uuid)
|
||||
(mapcat (fn [{:keys [e]}]
|
||||
(entity-checksum-tuples db e e2ee?)))))
|
||||
|
||||
(defn- datom->checksum-tuple
|
||||
[db attrs datom]
|
||||
(let [attr (:a datom)
|
||||
eid (:e datom)]
|
||||
(when (and (contains? attrs attr)
|
||||
(number? eid))
|
||||
(when-let [entity-uuid (get-block-uuid db eid)]
|
||||
[entity-uuid
|
||||
attr
|
||||
(normalize-checksum-value db attr (:v datom))]))))
|
||||
|
||||
(defn- existing-entity-in-db?
|
||||
[db eid]
|
||||
(and (number? eid)
|
||||
(some? (d/entity db eid))))
|
||||
|
||||
(defn recompute-checksum
|
||||
[db]
|
||||
(let [e2ee? (ldb/get-graph-rtc-e2ee? db)
|
||||
eids (->> (d/datoms db :avet :block/uuid)
|
||||
(map :e))]
|
||||
(->> eids
|
||||
(reduce (fn [[sum-fnv sum-djb] eid]
|
||||
(if-let [[fnv djb] (entity-digest db eid e2ee?)]
|
||||
[(add-step sum-fnv fnv)
|
||||
(add-step sum-djb djb)]
|
||||
[sum-fnv sum-djb]))
|
||||
tuples (db-checksum-tuples db e2ee?)]
|
||||
(->> tuples
|
||||
(reduce (fn [checksum-state tuple]
|
||||
(add-digest checksum-state (tuple-digest tuple)))
|
||||
[0 0])
|
||||
state->checksum)))
|
||||
|
||||
@@ -175,37 +202,33 @@
|
||||
(if (not= before-e2ee? after-e2ee?)
|
||||
;; E2EE mode changes the global digest semantics, so incremental deltas are invalid.
|
||||
(recompute-checksum db-after)
|
||||
(let [direct-eids (->> tx-data
|
||||
(remove (fn [d]
|
||||
(contains? #{:block/tx-id} (:a d))))
|
||||
(keep (fn [d]
|
||||
(let [e (:e d)]
|
||||
(when (number? e) e))))
|
||||
distinct)
|
||||
affected-eids (->> (concat direct-eids
|
||||
(dependent-eids db-before direct-eids)
|
||||
(dependent-eids db-after direct-eids))
|
||||
distinct)
|
||||
changed-uuids (->> affected-eids
|
||||
(mapcat (fn [eid]
|
||||
[(:block/uuid (d/entity db-before eid))
|
||||
(:block/uuid (d/entity db-after eid))]))
|
||||
(remove nil?)
|
||||
distinct)
|
||||
(let [tx-data (or tx-data [])
|
||||
initial-state (if (valid-checksum? checksum)
|
||||
(checksum->state checksum)
|
||||
(checksum->state (recompute-checksum db-before)))]
|
||||
(->> changed-uuids
|
||||
(reduce (fn [[sum-fnv sum-djb] uuid]
|
||||
(let [old-digest (when-let [eid (:db/id (d/entity db-before [:block/uuid uuid]))]
|
||||
(entity-digest db-before eid after-e2ee?))
|
||||
new-digest (when-let [eid (:db/id (d/entity db-after [:block/uuid uuid]))]
|
||||
(entity-digest db-after eid after-e2ee?))]
|
||||
[(cond-> sum-fnv
|
||||
old-digest (sub-step (first old-digest))
|
||||
new-digest (add-step (first new-digest)))
|
||||
(cond-> sum-djb
|
||||
old-digest (sub-step (second old-digest))
|
||||
new-digest (add-step (second new-digest)))]))
|
||||
initial-state)
|
||||
state->checksum)))))
|
||||
(checksum->state (recompute-checksum db-before)))
|
||||
;; UUID mutation on an existing entity can implicitly affect
|
||||
;; normalized parent/page tuples of referencing entities.
|
||||
;; Keep incremental logic simple and robust by full recompute.
|
||||
existing-uuid-mutation?
|
||||
(some (fn [{:keys [a e]}]
|
||||
(and (= :block/uuid a)
|
||||
(existing-entity-in-db? db-before e)))
|
||||
tx-data)
|
||||
attrs (relevant-attrs after-e2ee?)
|
||||
removed-tuples (keep #(when (false? (:added %))
|
||||
(datom->checksum-tuple db-before attrs %))
|
||||
tx-data)
|
||||
added-tuples (keep #(when (:added %)
|
||||
(datom->checksum-tuple db-after attrs %))
|
||||
tx-data)
|
||||
state-after-removals (reduce (fn [checksum-state tuple]
|
||||
(subtract-digest checksum-state (tuple-digest tuple)))
|
||||
initial-state
|
||||
removed-tuples)
|
||||
state-after-additions (reduce (fn [checksum-state tuple]
|
||||
(add-digest checksum-state (tuple-digest tuple)))
|
||||
state-after-removals
|
||||
added-tuples)]
|
||||
(if existing-uuid-mutation?
|
||||
(recompute-checksum db-after)
|
||||
(state->checksum state-after-additions))))))
|
||||
|
||||
1
deps/db-sync/src/logseq/db_sync/storage.cljs
vendored
1
deps/db-sync/src/logseq/db_sync/storage.cljs
vendored
@@ -153,6 +153,7 @@
|
||||
(let [full-checksum (sync-checksum/recompute-checksum db-after)]
|
||||
(when (and prev-checksum (not= checksum full-checksum))
|
||||
(prn :debug :before-checksum-error {:prev-checksum prev-checksum
|
||||
:new-checksum checksum
|
||||
:recomputed-after-checksum full-checksum
|
||||
:tx-meta tx-meta
|
||||
:tx-data tx-data
|
||||
|
||||
@@ -193,7 +193,7 @@
|
||||
child (some #(when (= child-uuid (:block/uuid %)) %) blocks)]
|
||||
(is (false? e2ee?))
|
||||
(is (= (checksum/recompute-checksum db) checksum))
|
||||
(is (= #{:block/uuid :block/title :block/name :block/parent :block/page}
|
||||
(is (= #{:block/uuid :block/title :block/name :block/parent :block/page :block/order}
|
||||
(set attrs)))
|
||||
(is (= 4 (count blocks)))
|
||||
(is (= child-parent-uuid (:block/parent child)))
|
||||
@@ -208,7 +208,30 @@
|
||||
{:keys [checksum attrs blocks e2ee?]} (checksum/recompute-checksum-diagnostics db)]
|
||||
(is e2ee?)
|
||||
(is (= (checksum/recompute-checksum db) checksum))
|
||||
(is (= #{:block/uuid :block/parent :block/page}
|
||||
(is (= #{:block/uuid :block/parent :block/page :block/order}
|
||||
(set attrs)))
|
||||
(is (every? #(not (contains? % :block/title)) blocks))
|
||||
(is (every? #(not (contains? % :block/name)) blocks)))))
|
||||
|
||||
(deftest incremental-checksum-is-invariant-across-tx-partitioning-test
|
||||
(testing "incremental checksum converges to the same value regardless of tx partitioning"
|
||||
(let [db0 (sample-db)
|
||||
tx-a [[:db/add 4 :block/order "aBL"]
|
||||
[:db/add 4 :block/title "Child v2"]]
|
||||
tx-b [[:db/add 3 :block/order "aBK"]
|
||||
[:db/add 4 :block/parent 2]
|
||||
[:db/add 4 :block/page 2]]
|
||||
one-shot-report (d/with db0 (into tx-a tx-b))
|
||||
one-shot-checksum (checksum/update-checksum (checksum/recompute-checksum db0)
|
||||
one-shot-report)
|
||||
checksum0 (checksum/recompute-checksum db0)
|
||||
report-a (d/with db0 tx-a)
|
||||
checksum-a (checksum/update-checksum checksum0 report-a)
|
||||
db-a (:db-after report-a)
|
||||
report-b (d/with db-a tx-b)
|
||||
checksum-b (checksum/update-checksum checksum-a report-b)
|
||||
db-final (:db-after report-b)
|
||||
full-final (checksum/recompute-checksum db-final)]
|
||||
(is (= full-final one-shot-checksum))
|
||||
(is (= full-final checksum-b))
|
||||
(is (= one-shot-checksum checksum-b)))))
|
||||
|
||||
Reference in New Issue
Block a user