diff --git a/deps/workers/docs/milestones/agents/00-index.md b/deps/workers/docs/milestones/agents/00-index.md index 5a0c3c63c0..b4d04f159c 100644 --- a/deps/workers/docs/milestones/agents/00-index.md +++ b/deps/workers/docs/milestones/agents/00-index.md @@ -24,3 +24,4 @@ Milestones are tracked as separate files in this folder: - `18-m18-github-app-installation-tokens.md` - `19-m19-vercel-sandbox-runtime-and-snapshot.md` - `20-m20-resume-session-with-persisted-workspace.md` +- `21-m21-store-snapshots-metadata-in-d1.md` diff --git a/deps/workers/docs/milestones/agents/21-m21-store-snapshots-metadata-in-d1.md b/deps/workers/docs/milestones/agents/21-m21-store-snapshots-metadata-in-d1.md new file mode 100644 index 0000000000..24a4e764d2 --- /dev/null +++ b/deps/workers/docs/milestones/agents/21-m21-store-snapshots-metadata-in-d1.md @@ -0,0 +1,57 @@ +# M21: Store Snapshots Metadata in D1 + +Status: Implemented +Target: Use D1 as the durable source of truth for sandbox checkpoint metadata across sessions. + +## Goal +Persist checkpoint metadata by `repo+branch` in D1 so new sessions can restore from existing snapshots without relying on per-isolate in-memory cache or per-session DO storage fallback. + +## Why M21 +- In-memory snapshot caches are not reliable across isolate restarts and deployments. +- DO storage fallback is session-local and cannot reliably resume across independent tasks/sessions. +- D1 gives a shared, durable lookup path for checkpoint metadata. + +## Scope +1) Add `AGENTS_DB` D1 binding for agents worker environments. +2) Add checkpoint metadata store in agents worker: +- key: `repo_key + branch` +- value: `provider`, `snapshot_id`, `backup_key`, `backup_dir`, `checkpoint_at` +- retention: 30-day TTL with opportunistic cleanup. +3) Make runtime provisioning use D1 checkpoint lookup for restore metadata. +4) Upsert D1 metadata when checkpoint/snapshot success persists session checkpoint. +5) Remove provider-side in-memory restore cache fallback. + +## Out of Scope +- Persisting snapshot payloads in D1. +- UI changes for checkpoint browsing. +- Backfilling historical checkpoint records. + +## Data Model +Table: `sandbox_checkpoints` +- `repo_key TEXT NOT NULL` +- `branch TEXT NOT NULL` +- `provider TEXT NOT NULL` +- `snapshot_id TEXT NOT NULL` +- `backup_key TEXT` +- `backup_dir TEXT` +- `checkpoint_at INTEGER NOT NULL` +- `updated_at INTEGER NOT NULL` +- `expires_at INTEGER NOT NULL` +- `PRIMARY KEY (repo_key, branch)` +- index: `idx_sandbox_checkpoints_expires_at(expires_at)` + +## Implementation Notes +- Added `logseq.agents.checkpoint-store` for D1 load/upsert and schema ensure. +- `do.cljs` now: + - loads checkpoint from D1 during ` provider non-empty-str string/lower-case)) + +(defn- task-repo-url + [task] + (some-> (get-in task [:project :repo-url]) non-empty-str)) + +(defn- task-branch + [task] + (or (some-> (get-in task [:project :base-branch]) source-control/sanitize-branch-name) + (some-> (get-in task [:project :branch]) source-control/sanitize-branch-name) + "main")) + +(defn- task-repo-key + [task] + (when-let [repo-url (task-repo-url task)] + (let [{:keys [provider owner name]} (source-control/repo-ref repo-url)] + (if (and (string? provider) (string? owner) (string? name)) + (str provider "/" (string/lower-case owner) "/" (string/lower-case name)) + (string/lower-case repo-url))))) + +(defn- task-key + [task] + (let [repo-key (task-repo-key task) + branch (some-> (task-branch task) string/lower-case)] + (when (and (string? repo-key) (string? branch)) + {:repo-key repo-key + :branch branch}))) + +(defn- normalize-checkpoint + [checkpoint] + (let [snapshot-id (some-> (:snapshot-id checkpoint) non-empty-str) + provider (some-> (:provider checkpoint) normalize-provider) + backup-key (some-> (:backup-key checkpoint) non-empty-str) + backup-dir (some-> (:backup-dir checkpoint) non-empty-str) + checkpoint-at (:checkpoint-at checkpoint)] + (when (string? snapshot-id) + (cond-> {:snapshot-id snapshot-id} + (string? provider) (assoc :provider provider) + (string? backup-key) (assoc :backup-key backup-key) + (string? backup-dir) (assoc :backup-dir backup-dir) + (number? checkpoint-at) (assoc :checkpoint-at checkpoint-at))))) + +(defn- checkpoint-from-row + [^js row] + (when row + (normalize-checkpoint + {:provider (aget row "provider") + :snapshot-id (aget row "snapshot_id") + :backup-key (aget row "backup_key") + :backup-dir (aget row "backup_dir") + :checkpoint-at (aget row "checkpoint_at")}))) + +(defn- maybe-cleanup? + [] + (< (js/Math.random) cleanup-sample-rate)) + +(defn- db-binding + [^js env] + (aget env "AGENTS_DB")) + +(defn- ? " + "limit 1") + repo-key + branch + now-ms) + rows (common/get-sql-rows result)] + (checkpoint-from-row (first rows))) + (p/resolved nil)) + (p/resolved nil))) + +(defn (checkpoint-store/ (p/let [stored-checkpoint ( (p/let [d1-checkpoint (-> (checkpoint-store/ {} (string? by) (assoc :by by) @@ -746,24 +747,19 @@ (defn- (checkpoint-store/ expires-at-ms now)) - (assoc acc k v) - acc))) - {} - entries))))) - -(defn- cloudflare-backup-entry - [backup-key] - (prune-cloudflare-backup-cache!) - (when (string? backup-key) - (get @cloudflare-backup-cache backup-key))) - -(defn- remember-cloudflare-backup! - [backup-key backup-id] - (when (and (string? backup-key) (string? backup-id)) - (let [now (js/Date.now) - ttl-ms (* cloudflare-snapshot-ttl-seconds 1000)] - (swap! cloudflare-backup-cache assoc backup-key {:id backup-id - :ttl-seconds cloudflare-snapshot-ttl-seconds - :expires-at-ms (+ now ttl-ms) - :updated-at-ms now})))) - -(defn- forget-cloudflare-backup! - [backup-key] - (when (string? backup-key) - (swap! cloudflare-backup-cache dissoc backup-key))) - -(defn- prune-vercel-snapshot-cache! - [] - (let [now (js/Date.now)] - (swap! vercel-snapshot-cache - (fn [entries] - (reduce-kv (fn [acc k v] - (let [expires-at-ms (:expires-at-ms v) - snapshot-id (:id v)] - (if (and (string? k) - (string? snapshot-id) - (number? expires-at-ms) - (> expires-at-ms now)) - (assoc acc k v) - acc))) - {} - entries))))) - -(defn- vercel-snapshot-entry - [backup-key] - (prune-vercel-snapshot-cache!) - (when (string? backup-key) - (get @vercel-snapshot-cache backup-key))) - -(defn- remember-vercel-snapshot! - [backup-key snapshot-id source-dir] - (when (and (string? backup-key) (string? snapshot-id)) - (let [now (js/Date.now) - ttl-ms (* cloudflare-snapshot-ttl-seconds 1000)] - (swap! vercel-snapshot-cache assoc backup-key {:id snapshot-id - :dir source-dir - :ttl-seconds cloudflare-snapshot-ttl-seconds - :expires-at-ms (+ now ttl-ms) - :updated-at-ms now})))) - -(defn- forget-vercel-snapshot! - [backup-key] - (when (string? backup-key) - (swap! vercel-snapshot-cache dissoc backup-key))) - (defn- sanitize-backup-name [value] (let [raw (or (some-> value str string/lower-case) "snapshot") @@ -1157,13 +1078,8 @@ :json-body (message-payload message)})) (defn- backup-id str string/trim not-empty) - backup-id (or explicit-backup-id - (:id entry)) - from-cache? (and (nil? explicit-backup-id) - (map? entry)) + [^js sandbox _backup-key target-dir {:keys [backup-id]}] + (let [backup-id (some-> backup-id str string/trim not-empty) restore-backup (js-method sandbox "restoreBackup")] (cond (or (not (string? target-dir)) @@ -1178,19 +1094,13 @@ :dir target-dir}] (-> (->promise (.restoreBackup sandbox backup)) (p/then (fn [_] - (when (and (string? backup-key) (string? backup-id)) - (remember-cloudflare-backup! backup-key backup-id)) (log/debug :agent/cloudflare-backup-restored - {:backup-key backup-key - :backup-id backup-id + {:backup-id backup-id :dir target-dir}) true)) (p/catch (fn [error] - (when (and from-cache? (string? backup-key)) - (forget-cloudflare-backup! backup-key)) (log/error :agent/cloudflare-backup-restore-failed - {:backup-key backup-key - :backup-id backup-id + {:backup-id backup-id :dir target-dir :error (str error)}) false))))))) @@ -1532,38 +1442,6 @@ _ ( ( result (string? backup-key) (assoc :backup-key backup-key) (string? backup-dir) (assoc :backup-dir backup-dir) @@ -2105,9 +1977,7 @@ :runtime runtime}))) (let [sandbox (cloudflare-sandbox env sandbox-id)] (p/let [result ( result (string? backup-key) (assoc :backup-key backup-key) (string? backup-dir) (assoc :backup-dir backup-dir) diff --git a/deps/workers/test/logseq/agents/do_test.cljs b/deps/workers/test/logseq/agents/do_test.cljs index 2d5452b65c..e46e396b32 100644 --- a/deps/workers/test/logseq/agents/do_test.cljs +++ b/deps/workers/test/logseq/agents/do_test.cljs @@ -3,7 +3,8 @@ [clojure.string :as string] [logseq.agents.do :as agent-do] [logseq.agents.runtime-provider :as runtime-provider] - [logseq.agents.source-control :as source-control])) + [logseq.agents.source-control :as source-control] + [logseq.sync.common :as common])) (defn- make-agent-storage [] (let [data (js/Map.)] @@ -285,6 +286,129 @@ (is false (str "unexpected provision checkpoint error: " error)) (done)))))))) +(deftest provision-runtime-loads-checkpoint-from-d1-test + (testing "provision runtime should load sandbox checkpoint from D1 metadata by repo+branch" + (async done + (let [env #js {"AGENT_RUNTIME_PROVIDER" "vercel" + "AGENTS_DB" #js {}} + self (make-self env) + task {:id "sess-d1-checkpoint" + :agent "codex" + :project {:repo-url "https://github.com/logseq/logseq" + :base-branch "main"}} + passed-task (atom nil) + runtime {:provider "vercel" + :session-id "runtime-d1" + :sandbox-id "sbx-d1"} + provider (reify runtime-provider/RuntimeProvider + ( (.put (.-storage self) + "session" + (clj->js {:id "sess-d1-checkpoint" + :status "running" + :task task + :audit {} + :created-at 0 + :updated-at 0})) + (.then (fn [_] + (with-redefs [runtime-provider/resolve-provider (fn [_env _runtime] provider) + runtime-provider/provider-id (fn [_provider] "vercel") + agent-do/start-runtime-events-stream-background! (fn [& _] nil) + common/ (.put (.-storage self) + "session" + (clj->js {:id "sess-no-storage-fallback" + :status "running" + :task task + :audit {} + :created-at 0 + :updated-at 0})) + (.then (fn [_] + (.put (.-storage self) + "sandbox.checkpoint" + (clj->js {:provider "vercel" + :snapshot-id "from-storage-should-not-be-used"})))) + (.then (fn [_] + (with-redefs [runtime-provider/resolve-provider (fn [_env _runtime] provider) + runtime-provider/provider-id (fn [_provider] "vercel") + agent-do/start-runtime-events-stream-background! (fn [& _] nil) + common/ (.put (.-storage self) "session" (clj->js session)) + (.then (fn [_] + (with-redefs [common/ (runtime-provider/ (runtime-provider/