fix: checksum

This commit is contained in:
Tienson Qin
2026-04-07 04:51:06 +08:00
parent ad935fe1c4
commit c37cc3395c
4 changed files with 181 additions and 34 deletions

View File

@@ -1,5 +1,6 @@
(ns logseq.db-sync.checksum
(:require [datascript.core :as d]
(:require [clojure.set :as set]
[datascript.core :as d]
[logseq.db :as ldb]))
(def ^:private fnv-offset 2166136261)
@@ -104,7 +105,12 @@
(defn- checksum-eligible-entity?
[db eid]
(when-let [ent (d/entity db eid)]
(uuid? (:block/uuid ent))))
(and (uuid? (:block/uuid ent))
(not (ldb/built-in? ent))
(nil? (:logseq.property/deleted-at ent))
(or (ldb/page? ent)
(some? (:block/page ent))
(some? (:block/name ent))))))
(defn- entity-checksum-tuples
[db eid e2ee?]
@@ -141,18 +147,72 @@
[db e2ee?]
(->> (d/datoms db :avet :block/uuid)
(mapcat (fn [{:keys [e]}]
(entity-checksum-tuples db e e2ee?)))))
(when (checksum-eligible-entity? db e)
(entity-checksum-tuples db e e2ee?))))))
(defn- datom->checksum-tuple
[db attrs datom]
(let [attr (:a datom)
eid (:e datom)]
(when (and (contains? attrs attr)
(number? eid))
(when-let [entity-uuid (get-block-uuid db eid)]
[entity-uuid
attr
(normalize-checksum-value db attr (:v datom))]))))
(defn- referrer-eids-for-target
[db target-eid]
(when (number? target-eid)
(concat
(map :e (d/datoms db :avet :block/parent target-eid))
(map :e (d/datoms db :avet :block/page target-eid)))))
(defn- tx-ref-target-eids
[tx-data]
(->> tx-data
(keep (fn [{:keys [a v]}]
(when (and (#{:block/parent :block/page :block/uuid} a)
(number? v))
v)))
set))
(defn- touched-checksum-eids
[db-before db-after tx-data]
(let [direct-eids
(->> tx-data
(keep :e)
(filter number?)
set)
;; Any entity referenced by parent/page/uuid changes may affect
;; normalized tuple values of other entities, so include referrers
;; from both before and after DBs.
target-eids
(tx-ref-target-eids tx-data)
referrer-eids
(->> target-eids
(mapcat (fn [target-eid]
(concat
(referrer-eids-for-target db-before target-eid)
(referrer-eids-for-target db-after target-eid))))
(filter number?)
set)
candidate-eids
(set/union direct-eids referrer-eids)]
(->> candidate-eids
(filter (fn [eid]
(or (checksum-eligible-entity? db-before eid)
(checksum-eligible-entity? db-after eid))))
set)))
(defn- net-tuple-delta
[db-before db-after e2ee? tx-data]
(let [touched-eids (touched-checksum-eids db-before db-after tx-data)]
(reduce
(fn [{:keys [removed added]} eid]
(let [before-tuples (if (checksum-eligible-entity? db-before eid)
(or (entity-checksum-tuples db-before eid e2ee?) #{})
#{})
after-tuples (if (checksum-eligible-entity? db-after eid)
(or (entity-checksum-tuples db-after eid e2ee?) #{})
#{})]
{:removed (into removed (set/difference before-tuples after-tuples))
:added (into added (set/difference after-tuples before-tuples))}))
{:removed #{}
:added #{}}
touched-eids)))
(defn recompute-checksum
[db]
@@ -176,7 +236,8 @@
blocks (->> eids
(keep (fn [eid]
(when (checksum-eligible-entity? db eid)
(let [{:keys [block/uuid block/title block/name block/parent block/page :block/order]} (entity-values db eid e2ee?)]
(let [{:block/keys [uuid title name parent page order]}
(entity-values db eid e2ee?)]
(cond-> {:block/uuid uuid
:block/parent parent
:block/page page
@@ -193,29 +254,27 @@
(defn update-checksum
[checksum {:keys [db-before db-after tx-data]}]
(let [before-e2ee? (ldb/get-graph-rtc-e2ee? db-before)
after-e2ee? (ldb/get-graph-rtc-e2ee? db-after)]
(if (not= before-e2ee? after-e2ee?)
after-e2ee? (ldb/get-graph-rtc-e2ee? db-after)
tx-data (or tx-data [])]
(cond
(not= before-e2ee? after-e2ee?)
;; E2EE mode changes the global digest semantics, so incremental deltas are invalid.
(recompute-checksum db-after)
(let [tx-data (or tx-data [])
initial-state (if (valid-checksum? checksum)
(empty? tx-data)
checksum
:else
(let [initial-state (if (valid-checksum? checksum)
(checksum->state checksum)
(checksum->state (recompute-checksum db-before)))
attrs (relevant-attrs after-e2ee?)
removed-tuples (->> tx-data
(keep #(when (false? (:added %))
(datom->checksum-tuple db-before attrs %)))
set)
added-tuples (->> tx-data
(keep #(when (:added %)
(datom->checksum-tuple db-after attrs %)))
set)
{:keys [removed added]} (net-tuple-delta db-before db-after after-e2ee? tx-data)
state-after-removals (reduce (fn [checksum-state tuple]
(subtract-digest checksum-state (tuple-digest tuple)))
initial-state
removed-tuples)
removed)
state-after-additions (reduce (fn [checksum-state tuple]
(add-digest checksum-state (tuple-digest tuple)))
state-after-removals
added-tuples)]
added)]
(state->checksum state-after-additions)))))

View File

@@ -147,11 +147,11 @@
(defn- append-tx-for-tx-report
[sql {:keys [db-after db-before tx-data tx-meta] :as tx-report}]
(let [prev-checksum (get-checksum sql)
checksum (sync-checksum/update-checksum prev-checksum tx-report)
;; checksum (sync-checksum/recompute-checksum db-after)
]
checksum (sync-checksum/update-checksum prev-checksum tx-report)]
(let [full-checksum (sync-checksum/recompute-checksum db-after)]
(when (and prev-checksum (not= checksum full-checksum))
(when (and prev-checksum
(seq tx-data)
(not= checksum full-checksum))
(prn :debug :before-checksum-error {:prev-checksum prev-checksum
:new-checksum checksum
:recomputed-after-checksum full-checksum

View File

@@ -217,3 +217,91 @@
(is (= full-final one-shot-checksum))
(is (= full-final checksum-b))
(is (= one-shot-checksum checksum-b)))))
(deftest incremental-checksum-handles-rebase-like-toggle-churn-test
(testing "incremental checksum uses net tuple delta when batch contains add/retract/add churn"
(let [db0 (sample-db)
checksum0 (checksum/recompute-checksum db0)
;; Simulate rebase churn on a tuple absent in db0: add -> retract -> add
report-1 (d/with db0 [[:db/add 4 :block/order "aBL"]])
db1 (:db-after report-1)
report-2 (d/with db1 [[:db/retract 4 :block/order "aBL"]])
db2 (:db-after report-2)
report-3 (d/with db2 [[:db/add 4 :block/order "aBL"]])
db3 (:db-after report-3)
batch-report {:db-before db0
:db-after db3
:tx-data (vec (concat (:tx-data report-1)
(:tx-data report-2)
(:tx-data report-3)))}
full-final (checksum/recompute-checksum db3)
incremental (checksum/update-checksum checksum0 batch-report)]
(is (not= checksum0 full-final))
(is (= full-final incremental)))))
(deftest incremental-checksum-handles-transient-entity-churn-in-one-batch-test
(testing "incremental checksum remains stable when a newly created block is retracted in the same batch"
(let [db0 (sample-db)
checksum0 (checksum/recompute-checksum db0)
page-id 1
parent-id 3
transient-uuid (random-uuid)
report-1 (d/with db0 [{:db/id -1
:block/uuid transient-uuid
:block/title "Transient"
:block/parent parent-id
:block/page page-id}])
db1 (:db-after report-1)
report-2 (d/with db1 [[:db/retractEntity [:block/uuid transient-uuid]]])
db2 (:db-after report-2)
batch-report {:db-before db0
:db-after db2
:tx-data (vec (concat (:tx-data report-1)
(:tx-data report-2)))}
full-final (checksum/recompute-checksum db2)
incremental (checksum/update-checksum checksum0 batch-report)]
(is (= checksum0 full-final))
(is (= full-final incremental)))))
(deftest incremental-checksum-handles-new-entity-attr-replacement-in-one-batch-test
(testing "incremental checksum cancels replaced attrs on entities created inside the same batch"
(let [db0 (sample-db)
checksum0 (checksum/recompute-checksum db0)
page-id 1
parent-id 3
transient-uuid (random-uuid)
report-1 (d/with db0 [{:db/id -1
:block/uuid transient-uuid
:block/title "Transient"
:block/order "a1"
:block/parent parent-id
:block/page page-id}])
db1 (:db-after report-1)
report-2 (d/with db1 [[:db/add [:block/uuid transient-uuid] :block/order "a2"]])
db2 (:db-after report-2)
batch-report {:db-before db0
:db-after db2
:tx-data (vec (concat (:tx-data report-1)
(:tx-data report-2)))}
full-final (checksum/recompute-checksum db2)
incremental (checksum/update-checksum checksum0 batch-report)]
(is (= full-final incremental)))))
(deftest checksum-ignores-non-page-non-block-entities-test
(testing "entities with uuid but without page semantics do not affect checksum"
(let [db0 (sample-db)
checksum0 (checksum/recompute-checksum db0)
internal-uuid (random-uuid)
tx-report (d/with db0 [{:db/id -1
:block/uuid internal-uuid
:block/order "zz"}])
db1 (:db-after tx-report)
full1 (checksum/recompute-checksum db1)
incremental1 (checksum/update-checksum checksum0 tx-report)
tx-report-2 (d/with db1 [[:db/add [:block/uuid internal-uuid] :block/order "aa"]])
full2 (checksum/recompute-checksum (:db-after tx-report-2))
incremental2 (checksum/update-checksum full1 tx-report-2)]
(is (= checksum0 full1))
(is (= checksum0 incremental1))
(is (= checksum0 full2))
(is (= full2 incremental2)))))

View File

@@ -73,10 +73,10 @@
[{:keys [tx-data] :as tx-report}]
(when-not (:batch-tx? @conn)
(when (seq tx-data)
(db-sync/update-local-sync-checksum! repo tx-report)
(let [tx-report' (if sync-db-to-main-thread?
(sync-db-to-main-thread repo conn tx-report)
tx-report)
opt {:repo repo}]
(db-sync/update-local-sync-checksum! repo tx-report')
(doseq [[k handler-fn] handlers]
(handler-fn k opt tx-report')))))))))