fix: gc unused addresses

GC rule:
Remove all non referenced addrs except some internal addrs from the
kvs table.

5 internal addrs:
1. db root (addr: 1)
2. tail (addr: 0)
3. eavt set addr
4. avet set addr
5. aevt set addr

Those set addrs are stored in the db's metadata.
This commit is contained in:
Tienson Qin
2025-05-29 07:13:17 +08:00
parent 84183068fb
commit cfce99d401

View File

@@ -141,28 +141,34 @@
(rebuild-db-from-datoms! conn sqlite-db)
(worker-util/post-message :notification ["The graph has been successfully rebuilt." :success false]))))
(comment
(defn- gc-kvs-table!
[^Object db]
(let [schema (some->> (.exec db #js {:sql "select content from kvs where addr = 0"
:rowMode "array"})
bean/->clj
ffirst
sqlite-util/transit-read)
result (->> (.exec db #js {:sql "select addr, addresses from kvs"
:rowMode "array"})
bean/->clj
(map (fn [[addr addresses]]
[addr (bean/->clj (js/JSON.parse addresses))])))
used-addresses (set (concat (mapcat second result)
[0 1 (:eavt schema) (:avet schema) (:aevt schema)]))
unused-addresses (clojure.set/difference (set (map first result)) used-addresses)]
(when unused-addresses
(prn :debug :db-gc :unused-addresses unused-addresses)
(.transaction db (fn [tx]
(doseq [addr unused-addresses]
(.exec tx #js {:sql "Delete from kvs where addr = ?"
:bind #js [addr]}))))))))
(defonce get-non-refed-addrs-sql
"WITH all_referenced AS (
SELECT CAST(value AS INTEGER) AS addr
FROM kvs, json_each(kvs.addresses)
)
SELECT kvs.addr
FROM kvs
WHERE kvs.addr NOT IN (SELECT addr FROM all_referenced)")
(defn- gc-kvs-table!
[^Object db]
(let [schema (some->> (.exec db #js {:sql "select content from kvs where addr = 0"
:rowMode "array"})
bean/->clj
ffirst
sqlite-util/transit-read)
internal-addrs (set [0 1 (:eavt schema) (:avet schema) (:aevt schema)])
non-refed-addrs (->> (.exec db #js {:sql get-non-refed-addrs-sql
:rowMode "array"})
(map first)
set)
unused-addresses (clojure.set/difference non-refed-addrs internal-addrs)]
(when unused-addresses
(prn :debug :db-gc :unused-addresses unused-addresses)
(.transaction db (fn [tx]
(doseq [addr unused-addresses]
(.exec tx #js {:sql "Delete from kvs where addr = ?"
:bind #js [addr]})))))))
(defn- find-missing-addresses
[conn ^Object db & {:keys [delete-addrs]}]
@@ -199,22 +205,7 @@
(.transaction db (fn [tx]
(doseq [item data]
(.exec tx #js {:sql "INSERT INTO kvs (addr, content, addresses) values ($addr, $content, $addresses) on conflict(addr) do update set content = $content, addresses = $addresses"
:bind item}))))
;; (when (seq delete-addrs)
;; (let [result (.exec db #js {:sql get-to-delete-unused-addresses-sql
;; :bind (js/JSON.stringify (clj->js delete-addrs))
;; :rowMode "array"})
;; non-refed-addrs (map #(aget % 0) result)]
;; (when (seq non-refed-addrs)
;; (.transaction db (fn [tx]
;; (doseq [addr non-refed-addrs]
;; (.exec tx #js {:sql "Delete from kvs where addr = ?"
;; :bind #js [addr]})))))
;; (let [missing-addrs (when worker-util/dev?
;; (seq (find-missing-addresses nil db {:delete-addrs non-refed-addrs})))]
;; (when (seq missing-addrs)
;; (worker-util/post-message :notification [(str "Bug!! Missing addresses: " missing-addrs) :error false])))))
))
:bind item}))))))
(defn restore-data-from-addr
"Update sqlite-cli/restore-data-from-addr when making changes"
@@ -354,6 +345,9 @@
(select-keys opts [:import-type :graph-git-sha]))]
(d/transact! conn initial-data {:initial-db? true})))
(worker-util/profile "gc kvs db" (gc-kvs-table! db))
(worker-util/profile "gc client ops db" (gc-kvs-table! client-ops-db))
;; TODO: remove this once we can ensure there's no bug for missing addresses
;; because it's slow for large graphs
(when-not import-type