From 295823315b43ead324bb5aa73df6db1fbc6c05f0 Mon Sep 17 00:00:00 2001 From: Tienson Qin Date: Mon, 2 Feb 2026 06:12:28 +0800 Subject: [PATCH] m4 --- deps/db-sync/README.md | 23 +++ ...ent-service-document-driven-development.md | 21 +-- .../docs/milestones/agents/00-index.md | 10 ++ .../agents/01-m1-architecture-api-shape.md | 5 + .../agents/02-m2-control-plane-prototype.md | 5 + .../agents/03-m3-sandbox-agent-integration.md | 8 + .../agents/04-m4-operational-readiness.md | 148 +++++++++++++++ .../scripts/start-local-sandbox-agent.sh | 26 +++ deps/db-sync/scripts/start-weather-session.sh | 65 +++++++ .../src/logseq/db_sync/malli_schema.cljs | 14 +- .../src/logseq/db_sync/worker/agent/do.cljs | 168 ++++++++++++++---- .../logseq/db_sync/worker/agent/sandbox.cljs | 40 +++-- .../logseq/db_sync/worker/agent/session.cljs | 35 ++++ .../logseq/db_sync/worker/handler/agent.cljs | 24 +++ .../logseq/db_sync/worker/routes/index.cljs | 4 + .../logseq/db_sync/agent_sandbox_test.cljs | 12 +- .../logseq/db_sync/agent_session_test.cljs | 32 ++++ .../logseq/db_sync/worker_routes_test.cljs | 18 ++ ... => 0002-nodejs-db-sync-server-adapter.md} | 0 docs/agent-guide/003-agent-service.md | 36 ++++ 20 files changed, 620 insertions(+), 74 deletions(-) create mode 100644 deps/db-sync/docs/milestones/agents/00-index.md create mode 100644 deps/db-sync/docs/milestones/agents/01-m1-architecture-api-shape.md create mode 100644 deps/db-sync/docs/milestones/agents/02-m2-control-plane-prototype.md create mode 100644 deps/db-sync/docs/milestones/agents/03-m3-sandbox-agent-integration.md create mode 100644 deps/db-sync/docs/milestones/agents/04-m4-operational-readiness.md create mode 100755 deps/db-sync/scripts/start-local-sandbox-agent.sh create mode 100755 deps/db-sync/scripts/start-weather-session.sh rename docs/adr/{0001-nodejs-db-sync-server-adapter.md => 0002-nodejs-db-sync-server-adapter.md} (100%) diff --git a/deps/db-sync/README.md b/deps/db-sync/README.md index 5304fe1963..04c7c29d1f 100644 --- a/deps/db-sync/README.md +++ b/deps/db-sync/README.md @@ -65,6 +65,27 @@ cd deps/db-sync npm run test:node-adapter ``` +### Local Sandbox Agent (for agent sessions) + +Use the local sandbox-agent repo at `~/Codes/projects/sandbox-agent`: + +```bash +cd deps/db-sync +./scripts/start-local-sandbox-agent.sh +``` + +Then run db-sync worker with: + +```bash +SANDBOX_AGENT_URL=http://127.0.0.1:2468 +``` + +If sandbox-agent runs with token auth, also set: + +```bash +SANDBOX_AGENT_TOKEN=... +``` + ## Environment Variables | Variable | Purpose | @@ -82,6 +103,8 @@ npm run test:node-adapter | COGNITO_ISSUER | Cognito issuer URL | | COGNITO_CLIENT_ID | Cognito client id | | COGNITO_JWKS_URL | Cognito JWKS URL | +| SANDBOX_AGENT_URL | sandbox-agent base URL for agent sessions | +| SANDBOX_AGENT_TOKEN | Optional bearer token for sandbox-agent | ## Notes - Protocol definitions live in `docs/agent-guide/db-sync/protocol.md`. diff --git a/deps/db-sync/docs/adr/0002-agent-service-document-driven-development.md b/deps/db-sync/docs/adr/0002-agent-service-document-driven-development.md index 287e3e94aa..3883d55ab5 100644 --- a/deps/db-sync/docs/adr/0002-agent-service-document-driven-development.md +++ b/deps/db-sync/docs/adr/0002-agent-service-document-driven-development.md @@ -69,25 +69,8 @@ centralize session persistence, and keep execution isolated from production. - Add observability (logs, metrics, session replay) and a permission model. ## Milestones -1) Architecture + API shape (M1) - - Confirm document-driven workflow entrypoints and task schema. - - Define session lifecycle, event model, and required auth surfaces. - - Draft API contracts for control plane and agent runtime. - -2) Control plane prototype (M2) - - Implement Durable Object session coordination. - - Add streaming and multi-client observation. - - Wire basic auth and session persistence. - -3) Sandbox + agent integration (M3) - - Provision sandbox per session and run Sandbox Agent inside it. - - Implement adapter to select Codex/Claude Code backends. - - Run end-to-end task execution from Logseq doc to agent output. - -4) Operational readiness (M4) - - Add logs, metrics, and session replay. - - Add permission model and audit trails. - - Document setup and rollout for internal dogfooding. +Milestones moved to: +- `docs/milestones/agents/00-index.md` ## References - https://github.com/rivet-dev/sandbox-agent diff --git a/deps/db-sync/docs/milestones/agents/00-index.md b/deps/db-sync/docs/milestones/agents/00-index.md new file mode 100644 index 0000000000..590511009b --- /dev/null +++ b/deps/db-sync/docs/milestones/agents/00-index.md @@ -0,0 +1,10 @@ +# Agent Service Milestones + +Date: 2026-02-01 +Status: Active + +Milestones are tracked as separate files in this folder: +- `01-m1-architecture-api-shape.md` +- `02-m2-control-plane-prototype.md` +- `03-m3-sandbox-agent-integration.md` +- `04-m4-operational-readiness.md` diff --git a/deps/db-sync/docs/milestones/agents/01-m1-architecture-api-shape.md b/deps/db-sync/docs/milestones/agents/01-m1-architecture-api-shape.md new file mode 100644 index 0000000000..394eea1d0b --- /dev/null +++ b/deps/db-sync/docs/milestones/agents/01-m1-architecture-api-shape.md @@ -0,0 +1,5 @@ +# M1: Architecture + API Shape + +- Confirm document-driven workflow entrypoints and task schema. +- Define session lifecycle, event model, and required auth surfaces. +- Draft API contracts for control plane and agent runtime. diff --git a/deps/db-sync/docs/milestones/agents/02-m2-control-plane-prototype.md b/deps/db-sync/docs/milestones/agents/02-m2-control-plane-prototype.md new file mode 100644 index 0000000000..eae917c0f6 --- /dev/null +++ b/deps/db-sync/docs/milestones/agents/02-m2-control-plane-prototype.md @@ -0,0 +1,5 @@ +# M2: Control Plane Prototype + +- Implement Durable Object session coordination. +- Add streaming and multi-client observation. +- Wire basic auth and session persistence. diff --git a/deps/db-sync/docs/milestones/agents/03-m3-sandbox-agent-integration.md b/deps/db-sync/docs/milestones/agents/03-m3-sandbox-agent-integration.md new file mode 100644 index 0000000000..350246ef22 --- /dev/null +++ b/deps/db-sync/docs/milestones/agents/03-m3-sandbox-agent-integration.md @@ -0,0 +1,8 @@ +# M3: Sandbox + Agent Integration + +- Use local sandbox-agent at `~/Codes/projects/sandbox-agent` as the runtime. +- Provision sessions through sandbox-agent HTTP API (`/v1/sessions/{session_id}`). +- Send task messages through `/v1/sessions/{session_id}/messages`. +- Track sandbox session metadata in DO runtime state. +- Implement adapter to select Codex/Claude Code backends. +- Run end-to-end task execution from Logseq doc to agent output. diff --git a/deps/db-sync/docs/milestones/agents/04-m4-operational-readiness.md b/deps/db-sync/docs/milestones/agents/04-m4-operational-readiness.md new file mode 100644 index 0000000000..51b3289efd --- /dev/null +++ b/deps/db-sync/docs/milestones/agents/04-m4-operational-readiness.md @@ -0,0 +1,148 @@ +# M4: Operational Readiness (Full Sandbox-Agent Integration) + +Status: Planned +Target: Production-ready, fully wired Sandbox Agent integration for Logseq task-driven development. + +## Goal +Make agent sessions reliable, observable, secure, and controllable end-to-end: +- `#Task` -> session creation -> sandbox-agent run -> live events -> pause/resume/interrupt -> replay/audit -> completion. + +## Scope +- Control plane (Worker + Durable Object) and sandbox-agent runtime integration. +- Event bridge, approval flow, reliability controls, observability, security, and rollout readiness. + +## Out of Scope +- New product UI redesign in Logseq. +- Non-coding agent providers that do not expose compatible runtime streams. + +## Exit Criteria (M4 Done) +1) Live event bridge is stable and replayable. +2) Pause/resume/interrupt works across Codex and Claude Code providers. +3) Approval policy gates privileged tool calls. +4) Session replay/audit trail is complete and queryable. +5) Alerting, dashboards, and runbook are in place. +6) E2E integration tests pass in CI. + +## Workstreams + +### WS1: Event Bridge Completion +- Consume sandbox runtime stream (`/v1/sessions/:id/events/sse`) in the DO. +- Map runtime events into control-plane canonical events: + - `agent.message` + - `agent.tool_call` + - `agent.tool_result` + - `agent.artifact` + - `agent.summary` + - `session.running|paused|completed|failed` +- Persist mapped events with monotonic cursor and event-id. +- Broadcast to `/sessions/:id/stream` subscribers. +- Add replay endpoint semantics with filters: + - `since` (timestamp) + - `cursor` + - `limit` + +Acceptance: +- Reconnect from dropped client resumes from cursor with no event loss or duplication. + +### WS2: Agent Control Semantics (Pause/Resume/Interrupt) +- Add explicit control endpoints: + - `POST /sessions/:id/pause` + - `POST /sessions/:id/resume` + - `POST /sessions/:id/interrupt` +- Enforce state machine transitions: + - `running -> paused -> running` + - `running|paused -> canceled|failed|completed` +- Ensure "new orders" while paused are queued and applied deterministically on resume. +- Ensure interrupt stops active tool/task execution where provider supports it. + +Acceptance: +- User can pause any running session and inject new instructions before resuming. + +### WS3: Approval and Permission Model +- Define policy model for tool permissions by workspace/user/session: + - allow/deny lists + - privileged tools requiring approval +- Add approval lifecycle events: + - `agent.approval_requested` + - `agent.approval_granted` + - `agent.approval_denied` +- Block runtime continuation until approval decision for gated actions. +- Record approver identity, timestamp, reason, and affected call-id. + +Acceptance: +- Privileged calls cannot execute without explicit approval. + +### WS4: Reliability and Recovery +- Add retry/backoff policies for sandbox create/message/stream operations. +- Implement heartbeat + idle timeout management. +- Add reconciliation job for "stuck" sessions (no heartbeat / stalled stream). +- Add idempotency for session creation and message submission. +- Add crash/redeploy recovery path from stored session/runtime metadata. + +Acceptance: +- Sessions recover safely after transient failures and deploy restarts. + +### WS5: Observability, Replay, and Audit +- Structured logs for each stage with `session-id`, `task-id`, `workspace-id`, `user-id`. +- Metrics: + - session create latency + - stream lag + - event throughput + - approval wait time + - failure rates by phase/provider +- Dashboards and alerts: + - high failure rate + - stalled sessions + - replay backlog +- Audit completeness checks: + - every action has actor + timestamp + target session. + +Acceptance: +- On-call can diagnose a failed session from logs + replay alone. + +### WS6: Security Hardening +- Per-session scoped runtime token and expiry/rotation. +- Enforce repo/workdir boundaries and forbidden command policy. +- Rate limit create/message/control endpoints per user/workspace. +- Redact secrets from logs/events/artifacts. +- Validate all external input payloads + enforce payload size limits. + +Acceptance: +- Security review passes for least privilege and data exposure controls. + +### WS7: Test & Rollout Readiness +- Add E2E tests: + - task create -> run -> summary + - pause -> new order -> resume + - approval required -> grant/deny paths + - stream reconnect + replay cursor + - provider swap (Codex/Claude) +- Add chaos tests for disconnects/timeouts/retries. +- Internal dogfood rollout phases: + 1) canary users + 2) broader internal usage + 3) default-on for selected workspaces +- Add rollback playbook with feature flags. + +Acceptance: +- CI gates M4 features with stable E2E pass and rollback verified. + +## Deliverables +- Updated API surface (control + replay + control actions). +- Canonical event schema with cursor semantics. +- Approval policy config + enforcement. +- Observability dashboards + alerts + runbook. +- Security checklist and rollout checklist. + +## Dependencies +- Stable sandbox-agent deployment and auth configuration. +- Local dev baseline: `~/Codes/projects/sandbox-agent` server reachable at + `SANDBOX_AGENT_URL` (default `http://127.0.0.1:2468`). +- Durable Object storage schema finalized for event indexing/cursor. +- Provider-level support for pause/interrupt semantics. + +## Risks and Mitigations +- Provider behavior differences (Codex vs Claude): use capability matrix + adapter fallback paths. +- Stream gaps/duplication: enforce cursor-based idempotent replay and monotonic ordering checks. +- Long-running session drift: add heartbeat watchdog + reconciliation. +- Operational complexity: phase rollout and feature-flag risky controls. diff --git a/deps/db-sync/scripts/start-local-sandbox-agent.sh b/deps/db-sync/scripts/start-local-sandbox-agent.sh new file mode 100755 index 0000000000..5096a01afe --- /dev/null +++ b/deps/db-sync/scripts/start-local-sandbox-agent.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +set -euo pipefail + +SANDBOX_AGENT_REPO="${SANDBOX_AGENT_REPO:-$HOME/Codes/projects/sandbox-agent}" +SANDBOX_AGENT_HOST="${SANDBOX_AGENT_HOST:-127.0.0.1}" +SANDBOX_AGENT_PORT="${SANDBOX_AGENT_PORT:-2468}" +SANDBOX_AGENT_TOKEN="${SANDBOX_AGENT_TOKEN:-}" + +if [ ! -d "$SANDBOX_AGENT_REPO" ]; then + echo "sandbox-agent repo not found: $SANDBOX_AGENT_REPO" >&2 + exit 1 +fi + +cd "$SANDBOX_AGENT_REPO" + +if [ -n "$SANDBOX_AGENT_TOKEN" ]; then + exec cargo run -p sandbox-agent -- server \ + --host "$SANDBOX_AGENT_HOST" \ + --port "$SANDBOX_AGENT_PORT" \ + --token "$SANDBOX_AGENT_TOKEN" +else + exec cargo run -p sandbox-agent -- server \ + --host "$SANDBOX_AGENT_HOST" \ + --port "$SANDBOX_AGENT_PORT" \ + --no-token +fi diff --git a/deps/db-sync/scripts/start-weather-session.sh b/deps/db-sync/scripts/start-weather-session.sh new file mode 100755 index 0000000000..b8ee548921 --- /dev/null +++ b/deps/db-sync/scripts/start-weather-session.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash +set -euo pipefail + +BASE_URL="${BASE_URL:-http://127.0.0.1:8787}" +TOKEN="${TOKEN:-dev-token}" +SESSION_ID="${SESSION_ID:-task-weather-hangzhou-001}" + +create_payload() { + cat < {:method method :headers headers} @@ -21,27 +21,33 @@ (assoc :body (js/JSON.stringify (clj->js body))))] (platform/request url (clj->js init)))) +(defn- parse-json-or-default [^js resp fallback] + (let [content-type (.get (.-headers resp) "content-type")] + (if (and (string? content-type) (string/includes? content-type "application/json")) + (.then (.json resp) #(js->clj % :keywordize-keys true)) + (js/Promise.resolve fallback)))) + (defn clj json :keywordize-keys true)))) + json (parse-json-or-default resp {})] + (assoc json :session-id session-id)))) (defn clj json :keywordize-keys true)))) + status (.-status resp) + json (parse-json-or-default resp {:ok (<= 200 status 299) :status status})] + json))) diff --git a/deps/db-sync/src/logseq/db_sync/worker/agent/session.cljs b/deps/db-sync/src/logseq/db_sync/worker/agent/session.cljs index fd29c2d17e..9f67bdd818 100644 --- a/deps/db-sync/src/logseq/db_sync/worker/agent/session.cljs +++ b/deps/db-sync/src/logseq/db_sync/worker/agent/session.cljs @@ -18,6 +18,18 @@ "session.canceled" "canceled" nil)) +(def ^:private transitions + {"created" #{"running" "paused" "failed" "canceled" "completed"} + "running" #{"running" "paused" "failed" "canceled" "completed"} + "paused" #{"running" "paused" "failed" "canceled" "completed"} + "completed" #{} + "failed" #{} + "canceled" #{}}) + +(defn transition-allowed? + [from to] + (contains? (get transitions from #{}) to)) + (defn append-event [session events {:keys [type data event-id ts]}] (let [event-id (or event-id (str (random-uuid))) ts (or ts (common/now-ms)) @@ -30,3 +42,26 @@ updated (assoc session :status next-status :updated-at ts) events (conj (vec events) event)] [updated events event])) + +(defn enqueue-order [session order] + (update session :pending-orders (fnil conj []) order)) + +(defn drain-orders [session] + (let [orders (vec (:pending-orders session))] + [orders (assoc session :pending-orders [])])) + +(defn filter-events + [events {:keys [since-ts limit]}] + (let [events (if (number? since-ts) + (filter #(> (:ts %) since-ts) events) + events) + events (vec events)] + (cond + (and (number? limit) (pos? limit)) + (subvec events 0 (min (count events) limit)) + + (and (number? limit) (<= limit 0)) + [] + + :else + events))) diff --git a/deps/db-sync/src/logseq/db_sync/worker/handler/agent.cljs b/deps/db-sync/src/logseq/db_sync/worker/handler/agent.cljs index 7a419446d9..ebb944923b 100644 --- a/deps/db-sync/src/logseq/db_sync/worker/handler/agent.cljs +++ b/deps/db-sync/src/logseq/db_sync/worker/handler/agent.cljs @@ -109,6 +109,16 @@ (forward-request stub do-url "POST" headers nil)) (http/error-response "server error" 500))))) +(defn- handle-control [{:keys [env request url claims route]} control-path] + (let [session-id (get-in route [:path-params :session-id])] + (if-not (string? session-id) + (http/bad-request "invalid session id") + (if-let [^js stub (session-stub env session-id)] + (let [headers (base-headers request claims) + do-url (str (.-origin url) control-path)] + (forward-request stub do-url "POST" headers nil)) + (http/error-response "server error" 500))))) + (defn- handle-stream [{:keys [env request url claims route]}] (let [session-id (get-in route [:path-params :session-id])] (if-not (string? session-id) @@ -119,12 +129,26 @@ (forward-request stub do-url "GET" headers nil)) (http/error-response "server error" 500))))) +(defn- handle-events [{:keys [env request url claims route]}] + (let [session-id (get-in route [:path-params :session-id])] + (if-not (string? session-id) + (http/bad-request "invalid session id") + (if-let [^js stub (session-stub env session-id)] + (let [headers (base-headers request claims) + do-url (str (.-origin url) "/__session__/events" (.-search url))] + (forward-request stub do-url "GET" headers nil)) + (http/error-response "server error" 500))))) + (defn handle [{:keys [route] :as ctx}] (case (:handler route) :sessions/create (handle-create ctx) :sessions/get (handle-get ctx) :sessions/messages (handle-messages ctx) + :sessions/pause (handle-control ctx "/__session__/pause") + :sessions/resume (handle-control ctx "/__session__/resume") + :sessions/interrupt (handle-control ctx "/__session__/interrupt") :sessions/cancel (handle-cancel ctx) + :sessions/events (handle-events ctx) :sessions/stream (handle-stream ctx) (http/not-found))) diff --git a/deps/db-sync/src/logseq/db_sync/worker/routes/index.cljs b/deps/db-sync/src/logseq/db_sync/worker/routes/index.cljs index 25c67df4ec..9262cf8bc2 100644 --- a/deps/db-sync/src/logseq/db_sync/worker/routes/index.cljs +++ b/deps/db-sync/src/logseq/db_sync/worker/routes/index.cljs @@ -27,7 +27,11 @@ ["/:session-id" ["" {:methods {"GET" :sessions/get}}] ["/messages" {:methods {"POST" :sessions/messages}}] + ["/pause" {:methods {"POST" :sessions/pause}}] + ["/resume" {:methods {"POST" :sessions/resume}}] + ["/interrupt" {:methods {"POST" :sessions/interrupt}}] ["/cancel" {:methods {"POST" :sessions/cancel}}] + ["/events" {:methods {"GET" :sessions/events}}] ["/stream" {:methods {"GET" :sessions/stream}}]]]]) (def ^:private router diff --git a/deps/db-sync/test/logseq/db_sync/agent_sandbox_test.cljs b/deps/db-sync/test/logseq/db_sync/agent_sandbox_test.cljs index cbd85a9b2b..276c86d106 100644 --- a/deps/db-sync/test/logseq/db_sync/agent_sandbox_test.cljs +++ b/deps/db-sync/test/logseq/db_sync/agent_sandbox_test.cljs @@ -12,9 +12,9 @@ (testing "builds sandbox session endpoints" (let [base "https://sandbox.example" session-id "sess-1"] - (is (= "https://sandbox.example/sandbox/sessions" - (sandbox/sessions-url base))) - (is (= "https://sandbox.example/sandbox/sessions/sess-1/messages" - (sandbox/messages-url base session-id))) - (is (= "https://sandbox.example/sandbox/sessions/sess-1/stream" - (sandbox/stream-url base session-id)))))) + (is (= "https://sandbox.example/v1/sessions" + (sandbox/sessions-base-url base))) + (is (= "https://sandbox.example/v1/sessions/sess-1" + (sandbox/session-url base session-id))) + (is (= "https://sandbox.example/v1/sessions/sess-1/messages" + (sandbox/messages-url base session-id)))))) diff --git a/deps/db-sync/test/logseq/db_sync/agent_session_test.cljs b/deps/db-sync/test/logseq/db_sync/agent_session_test.cljs index 2549489cfa..0e19ec2dd5 100644 --- a/deps/db-sync/test/logseq/db_sync/agent_session_test.cljs +++ b/deps/db-sync/test/logseq/db_sync/agent_session_test.cljs @@ -41,3 +41,35 @@ (is (= 1 (count events1))) (is (string? (:event-id e1))) (is (number? (:ts e1)))))) + +(deftest session-event-filter-test + (testing "filters events by since-ts and limit" + (let [events [{:event-id "e1" :ts 10} + {:event-id "e2" :ts 20} + {:event-id "e3" :ts 30} + {:event-id "e4" :ts 40}]] + (is (= ["e3" "e4"] + (map :event-id (session/filter-events events {:since-ts 25})))) + (is (= ["e2" "e3"] + (map :event-id (session/filter-events events {:since-ts 15 :limit 2})))) + (is (= ["e1"] + (map :event-id (session/filter-events events {:limit 1}))))))) + +(deftest session-transition-allowed-test + (testing "pause/resume transitions are state-aware" + (is (true? (session/transition-allowed? "created" "running"))) + (is (true? (session/transition-allowed? "running" "paused"))) + (is (true? (session/transition-allowed? "paused" "running"))) + (is (false? (session/transition-allowed? "completed" "running"))) + (is (false? (session/transition-allowed? "failed" "paused"))))) + +(deftest session-pending-orders-test + (testing "pending orders queue is append-only and clearable" + (let [base {:id "task-1" + :status "paused" + :pending-orders []} + with-one (session/enqueue-order base {:message "one"}) + with-two (session/enqueue-order with-one {:message "two"}) + [orders cleared] (session/drain-orders with-two)] + (is (= ["one" "two"] (map :message orders))) + (is (= [] (:pending-orders cleared)))))) diff --git a/deps/db-sync/test/logseq/db_sync/worker_routes_test.cljs b/deps/db-sync/test/logseq/db_sync/worker_routes_test.cljs index 0722de4c43..0e4d5c79e6 100644 --- a/deps/db-sync/test/logseq/db_sync/worker_routes_test.cljs +++ b/deps/db-sync/test/logseq/db_sync/worker_routes_test.cljs @@ -63,3 +63,21 @@ (let [match (routes/match-route "GET" "/sessions/session-4/stream")] (is (= :sessions/stream (:handler match))) (is (= "session-4" (get-in match [:path-params :session-id])))))) + +(deftest match-route-sessions-events-test + (testing "sessions events routes" + (let [match (routes/match-route "GET" "/sessions/session-9/events")] + (is (= :sessions/events (:handler match))) + (is (= "session-9" (get-in match [:path-params :session-id])))))) + +(deftest match-route-sessions-control-test + (testing "sessions control routes" + (let [match (routes/match-route "POST" "/sessions/session-10/pause")] + (is (= :sessions/pause (:handler match))) + (is (= "session-10" (get-in match [:path-params :session-id])))) + (let [match (routes/match-route "POST" "/sessions/session-11/resume")] + (is (= :sessions/resume (:handler match))) + (is (= "session-11" (get-in match [:path-params :session-id])))) + (let [match (routes/match-route "POST" "/sessions/session-12/interrupt")] + (is (= :sessions/interrupt (:handler match))) + (is (= "session-12" (get-in match [:path-params :session-id])))))) diff --git a/docs/adr/0001-nodejs-db-sync-server-adapter.md b/docs/adr/0002-nodejs-db-sync-server-adapter.md similarity index 100% rename from docs/adr/0001-nodejs-db-sync-server-adapter.md rename to docs/adr/0002-nodejs-db-sync-server-adapter.md diff --git a/docs/agent-guide/003-agent-service.md b/docs/agent-guide/003-agent-service.md index 4a8b7c24da..3ae23f0a7d 100644 --- a/docs/agent-guide/003-agent-service.md +++ b/docs/agent-guide/003-agent-service.md @@ -189,3 +189,39 @@ Errors and idempotency: } } ``` + +## Sandbox Agent Integration Milestones +1) M3.1 Runtime handshake + - Finalize Sandbox Agent auth/config (`SANDBOX_AGENT_URL`, token handling). + - Ensure session provisioning always records sandbox session metadata. + +2) M3.2 Event bridge + - Consume `/sandbox/sessions/:id/stream` and map runtime events into the + control-plane event model (`agent.message`, `agent.tool_call`, + `agent.tool_result`, `agent.artifact`, `agent.summary`). + - Persist bridged events and broadcast to `/sessions/:id/stream`. + +3) M3.3 Tool approvals + - Add approval flow for privileged tool calls. + - Emit `agent.approval_requested` / `agent.approval_granted` / + `agent.approval_denied` events and enforce decision before continuing. + +4) M3.4 Reliability + - Add retry/backoff for sandbox create/message calls. + - Add reconciliation for disconnected streams and delayed event delivery. + - Add idempotent replay cursor for stream resume. + +5) M4.1 Observability and audit + - Emit structured logs and metrics for session lifecycle and sandbox calls. + - Add event replay endpoint with filters and pagination. + - Ensure actor identity is attached to all audit events. + +6) M4.2 Security hardening + - Enforce per-session scoped credentials and expiry. + - Add policy checks for allowed tools and repo/workdir boundaries. + - Add rate limiting and abuse safeguards per user/workspace. + +7) M4.3 Production readiness + - Add integration tests for provisioning, message roundtrip, stream bridge, + approvals, and replay. + - Add runbook, dashboards, alert thresholds, and rollback procedure.