switch to EAV-style incremental hashing

This commit is contained in:
Tienson Qin
2026-04-07 03:32:02 +08:00
parent 49c1166cfc
commit 56e85624dc
3 changed files with 115 additions and 68 deletions

View File

@@ -75,18 +75,12 @@
[db eid]
(:block/uuid (d/entity db eid)))
(def ^:private checksum-ref-attrs
[:block/parent :block/page])
(defn- dependent-eids
[db eids]
(->> eids
(mapcat (fn [eid]
(mapcat (fn [attr]
(map :e (d/datoms db :avet attr eid)))
checksum-ref-attrs)))
(filter number?)
distinct))
(defn- normalize-checksum-value
[db attr value]
(case attr
:block/parent (get-block-uuid db value)
:block/page (get-block-uuid db value)
value))
(defn- entity-values
[db eid e2ee?]
@@ -112,33 +106,66 @@
(when-let [ent (d/entity db eid)]
(uuid? (:block/uuid ent))))
(defn- entity-digest
(defn- entity-checksum-tuples
[db eid e2ee?]
(when (checksum-eligible-entity? db eid)
(let [{:keys [block/uuid block/title block/name block/parent block/page block/order]} (entity-values db eid e2ee?)]
(cond-> [fnv-offset djb-offset]
true (digest-string (str uuid))
true (hash-code field-separator)
(not e2ee?) (digest-string title)
(not e2ee?) (hash-code field-separator)
(not e2ee?) (digest-string name)
(not e2ee?) (hash-code field-separator)
true (digest-string (some-> parent :block/uuid str))
true (hash-code field-separator)
true (digest-string (some-> page :block/uuid str))
true (digest-string (some-> order str))))))
(when-let [entity-uuid (get-block-uuid db eid)]
(let [attrs (relevant-attrs e2ee?)]
(->> (d/datoms db :eavt eid)
(keep (fn [{:keys [a v]}]
(when (contains? attrs a)
[entity-uuid
a
(normalize-checksum-value db a v)])))
set))))
(defn- tuple-digest
[[entity-uuid attr value]]
(-> [fnv-offset djb-offset]
(digest-string (str entity-uuid))
(hash-code field-separator)
(digest-string (str attr))
(hash-code field-separator)
(digest-string (some-> value str))))
(defn- subtract-digest
[[sum-fnv sum-djb] [fnv djb]]
[(sub-step sum-fnv fnv)
(sub-step sum-djb djb)])
(defn- add-digest
[[sum-fnv sum-djb] [fnv djb]]
[(add-step sum-fnv fnv)
(add-step sum-djb djb)])
(defn- db-checksum-tuples
[db e2ee?]
(->> (d/datoms db :avet :block/uuid)
(mapcat (fn [{:keys [e]}]
(entity-checksum-tuples db e e2ee?)))))
(defn- datom->checksum-tuple
[db attrs datom]
(let [attr (:a datom)
eid (:e datom)]
(when (and (contains? attrs attr)
(number? eid))
(when-let [entity-uuid (get-block-uuid db eid)]
[entity-uuid
attr
(normalize-checksum-value db attr (:v datom))]))))
(defn- existing-entity-in-db?
[db eid]
(and (number? eid)
(some? (d/entity db eid))))
(defn recompute-checksum
[db]
(let [e2ee? (ldb/get-graph-rtc-e2ee? db)
eids (->> (d/datoms db :avet :block/uuid)
(map :e))]
(->> eids
(reduce (fn [[sum-fnv sum-djb] eid]
(if-let [[fnv djb] (entity-digest db eid e2ee?)]
[(add-step sum-fnv fnv)
(add-step sum-djb djb)]
[sum-fnv sum-djb]))
tuples (db-checksum-tuples db e2ee?)]
(->> tuples
(reduce (fn [checksum-state tuple]
(add-digest checksum-state (tuple-digest tuple)))
[0 0])
state->checksum)))
@@ -175,37 +202,33 @@
(if (not= before-e2ee? after-e2ee?)
;; E2EE mode changes the global digest semantics, so incremental deltas are invalid.
(recompute-checksum db-after)
(let [direct-eids (->> tx-data
(remove (fn [d]
(contains? #{:block/tx-id} (:a d))))
(keep (fn [d]
(let [e (:e d)]
(when (number? e) e))))
distinct)
affected-eids (->> (concat direct-eids
(dependent-eids db-before direct-eids)
(dependent-eids db-after direct-eids))
distinct)
changed-uuids (->> affected-eids
(mapcat (fn [eid]
[(:block/uuid (d/entity db-before eid))
(:block/uuid (d/entity db-after eid))]))
(remove nil?)
distinct)
(let [tx-data (or tx-data [])
initial-state (if (valid-checksum? checksum)
(checksum->state checksum)
(checksum->state (recompute-checksum db-before)))]
(->> changed-uuids
(reduce (fn [[sum-fnv sum-djb] uuid]
(let [old-digest (when-let [eid (:db/id (d/entity db-before [:block/uuid uuid]))]
(entity-digest db-before eid after-e2ee?))
new-digest (when-let [eid (:db/id (d/entity db-after [:block/uuid uuid]))]
(entity-digest db-after eid after-e2ee?))]
[(cond-> sum-fnv
old-digest (sub-step (first old-digest))
new-digest (add-step (first new-digest)))
(cond-> sum-djb
old-digest (sub-step (second old-digest))
new-digest (add-step (second new-digest)))]))
initial-state)
state->checksum)))))
(checksum->state (recompute-checksum db-before)))
;; UUID mutation on an existing entity can implicitly affect
;; normalized parent/page tuples of referencing entities.
;; Keep incremental logic simple and robust by full recompute.
existing-uuid-mutation?
(some (fn [{:keys [a e]}]
(and (= :block/uuid a)
(existing-entity-in-db? db-before e)))
tx-data)
attrs (relevant-attrs after-e2ee?)
removed-tuples (keep #(when (false? (:added %))
(datom->checksum-tuple db-before attrs %))
tx-data)
added-tuples (keep #(when (:added %)
(datom->checksum-tuple db-after attrs %))
tx-data)
state-after-removals (reduce (fn [checksum-state tuple]
(subtract-digest checksum-state (tuple-digest tuple)))
initial-state
removed-tuples)
state-after-additions (reduce (fn [checksum-state tuple]
(add-digest checksum-state (tuple-digest tuple)))
state-after-removals
added-tuples)]
(if existing-uuid-mutation?
(recompute-checksum db-after)
(state->checksum state-after-additions))))))

View File

@@ -153,6 +153,7 @@
(let [full-checksum (sync-checksum/recompute-checksum db-after)]
(when (and prev-checksum (not= checksum full-checksum))
(prn :debug :before-checksum-error {:prev-checksum prev-checksum
:new-checksum checksum
:recomputed-after-checksum full-checksum
:tx-meta tx-meta
:tx-data tx-data

View File

@@ -193,7 +193,7 @@
child (some #(when (= child-uuid (:block/uuid %)) %) blocks)]
(is (false? e2ee?))
(is (= (checksum/recompute-checksum db) checksum))
(is (= #{:block/uuid :block/title :block/name :block/parent :block/page}
(is (= #{:block/uuid :block/title :block/name :block/parent :block/page :block/order}
(set attrs)))
(is (= 4 (count blocks)))
(is (= child-parent-uuid (:block/parent child)))
@@ -208,7 +208,30 @@
{:keys [checksum attrs blocks e2ee?]} (checksum/recompute-checksum-diagnostics db)]
(is e2ee?)
(is (= (checksum/recompute-checksum db) checksum))
(is (= #{:block/uuid :block/parent :block/page}
(is (= #{:block/uuid :block/parent :block/page :block/order}
(set attrs)))
(is (every? #(not (contains? % :block/title)) blocks))
(is (every? #(not (contains? % :block/name)) blocks)))))
(deftest incremental-checksum-is-invariant-across-tx-partitioning-test
(testing "incremental checksum converges to the same value regardless of tx partitioning"
(let [db0 (sample-db)
tx-a [[:db/add 4 :block/order "aBL"]
[:db/add 4 :block/title "Child v2"]]
tx-b [[:db/add 3 :block/order "aBK"]
[:db/add 4 :block/parent 2]
[:db/add 4 :block/page 2]]
one-shot-report (d/with db0 (into tx-a tx-b))
one-shot-checksum (checksum/update-checksum (checksum/recompute-checksum db0)
one-shot-report)
checksum0 (checksum/recompute-checksum db0)
report-a (d/with db0 tx-a)
checksum-a (checksum/update-checksum checksum0 report-a)
db-a (:db-after report-a)
report-b (d/with db-a tx-b)
checksum-b (checksum/update-checksum checksum-a report-b)
db-final (:db-after report-b)
full-final (checksum/recompute-checksum db-final)]
(is (= full-final one-shot-checksum))
(is (= full-final checksum-b))
(is (= one-shot-checksum checksum-b)))))