From b61aeca1ac26e1344c0dc3a63c7bbc8d98d9f0a9 Mon Sep 17 00:00:00 2001 From: scheinriese Date: Fri, 20 Feb 2026 01:03:35 +0100 Subject: [PATCH] Add AI-powered demo graph generation pipeline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces a file-based pipeline for generating realistic demo graphs using Claude Code agents. Archetype definitions (properties, classes, cast specs, timeline) drive JSON content generation, which a deterministic assembly script converts to sqlite.build EDN for import. - Archetype definitions in deps/db/ (shared between CLI and future UI) - Assembly script: JSON cast + journals → assembled.edn - bb task: `dev:assemble-demo` for one-command assembly - Project command for Claude Code: `/generate-demo-graph` - .gitignore: exclude .context/ (agent working directory) Co-Authored-By: Claude Opus 4.6 --- .claude/commands/generate-demo-graph.md | 121 ++++++ .gitignore | 1 + bb.edn | 6 + deps/db/src/logseq/db/demo_archetypes.cljs | 159 ++++++++ .../logseq/tasks/db_graph/demo_assembly.cljs | 366 ++++++++++++++++++ 5 files changed, 653 insertions(+) create mode 100644 .claude/commands/generate-demo-graph.md create mode 100644 deps/db/src/logseq/db/demo_archetypes.cljs create mode 100644 scripts/src/logseq/tasks/db_graph/demo_assembly.cljs diff --git a/.claude/commands/generate-demo-graph.md b/.claude/commands/generate-demo-graph.md new file mode 100644 index 0000000000..b97c82d734 --- /dev/null +++ b/.claude/commands/generate-demo-graph.md @@ -0,0 +1,121 @@ +# Generate Demo Graph + +Generate a realistic demo graph for Logseq DB using the file-based pipeline. + +## Overview + +This command generates a ~300-page realistic demo graph with 6 months of journal entries, people, projects, books, meetings, tasks, and cross-references. The graph uses the **Operator/PM archetype** — a product manager who uses Logseq for daily work. + +## Prerequisites + +Read these files before generating: + +1. **Archetype definitions**: `deps/db/src/logseq/db/demo_archetypes.cljs` — Properties, classes, cast spec, timeline, journal patterns +2. **Assembly script**: `scripts/src/logseq/tasks/db_graph/demo_assembly.cljs` — JSON→EDN conversion logic +3. **Cast manifest** (if regenerating journals only): `.context/demo-graphs/operator/cast-manifest.txt` + +## Pipeline + +All intermediate files go in `.context/demo-graphs/operator/` (gitignored). + +### Step 1: Write Ontology (deterministic) + +Copy `:properties` and `:classes` from `operator-properties` and `operator-classes` in `demo_archetypes.cljs` into EDN format: + +``` +.context/demo-graphs/operator/00-ontology.edn +``` + +Format: `{:properties {...} :classes {...}}` + +### Step 2: Generate Cast (1 agent) + +Generate `01-cast.json` — a JSON array of entities. Each entity: + +```json +{"name": "Sarah Chen", "tags": ["Person"], "properties": {"role": "Senior PM", "email": "sarah@example.com", "company": "Meridian Labs"}} +``` + +Target counts from `operator-cast-spec`: +- 25 people (10 coworkers, 4 reports, 3 executives, 4 friends, 2 family, 2 external) +- 12 authors +- 4 companies (1 employer, 2 clients, 1 partner) +- 7 projects (mix of active, backlog, paused, done) +- 14 books (mix of completed, reading, want to read, abandoned) +- 6 tools, 4 subscriptions, 10 ideas, 4 OKRs, 3 decisions + +### Step 3: Extract Cast Manifest + +Create `cast-manifest.txt` from the cast — a flat list of all names and roles, organized by category. This is the only file journal agents need to read (keeps their context small). + +### Step 4: Generate Journals (parallel agents) + +Launch 3 agents in parallel, each generating 2 months of journals: + +| File | Months | Phase | Activity | +|------|--------|-------|----------| +| `02-journals-aug-sep.json` | Aug–Sep 2025 | Onboarding & Q3 kickoff | High | +| `03-journals-oct-nov.json` | Oct–Nov 2025 | Deep execution | Medium | +| `04-journals-dec-jan.json` | Dec 2025–Jan 2026 | Q4 wrap-up & holidays | Low | + +Each agent reads `cast-manifest.txt` and the archetype definitions. + +### Step 5: Assemble & Import + +```bash +bb dev:assemble-demo .context/demo-graphs/operator/ +bb dev:create my-demo .context/demo-graphs/operator/assembled.edn +``` + +The graph appears at `~/logseq/graphs/my-demo/db.sqlite`, discoverable by localhost:3001 or :3003. + +## JSON Format Rules + +### Journal entries + +```json +[ + {"date": 20250815, "blocks": [ + {"text": "Quick sync with [[Sarah Chen]] about [[Collaborative Workspaces]]"}, + {"text": "Sprint Planning", "tags": ["Meeting"], + "properties": {"attendees": ["Sarah Chen", "James Liu"], "agenda": "Review priorities"}, + "children": [ + {"text": "Agreed to focus on API refactor first"}, + {"text": "[[James Liu]] will handle the migration piece"} + ]}, + {"text": "Review PR #847", "task": {"status": "todo", "priority": "medium"}}, + {"text": "Finish quarterly report", "task": {"status": "doing", "priority": "high", "deadline": 20250820}} + ]}, + {"date": 20250816}, + {"date": 20250817, "blocks": [{"text": "Started reading [[The Adjacent Room]]"}]} +] +``` + +### Critical rules + +- **Every day in the range must appear** (empty days: `{"date": 20250816}`) +- **Use `[[wiki links]]`** for cross-references in `text` — use EXACT names from cast-manifest.txt +- **`tags`**: Array of class names (e.g., `["Meeting"]`, `["Reflection"]`) +- **`properties`**: Object with property keys matching the ontology. Node properties use arrays of names: `"attendees": ["Sarah Chen", "James Liu"]` +- **`task`**: Shorthand for built-in Task class. Status: `backlog`, `todo`, `doing`, `in-review`, `done`, `canceled`. Priority: `low`, `medium`, `high`, `urgent` +- **`children`**: Nested blocks (recursive, same format) +- **Closed value strings must match exactly**: `"Active"`, `"Backlog"`, `"Paused"`, `"Done"` for project-status; `"Reading"`, `"Completed"`, `"Want to Read"`, `"Abandoned"` for reading-status; etc. +- **Dates are integers**: `YYYYMMDD` format (e.g., `20250815`) + +### Content quality guidelines + +- **Vary day types**: ~20% empty, 15% minimal (1-2 blocks), 15% meeting-heavy, 15% task-focused, 8% reflection, 7% reading notes, 20% mixed +- **Use realistic PM language**: sprint planning, stakeholder alignment, PRD reviews, design critiques, 1:1s, retros +- **Cross-reference liberally**: mention people, projects, books, tools, ideas naturally +- **Include personal life**: friend meetups, family calls, book reading, subscription content, hobby mentions +- **Progress arcs**: projects should evolve over months (kickoff → progress → blockers → resolution) +- **Activity levels**: high (Aug-Sep onboarding), medium (Oct-Nov execution), low (Dec-Jan holidays) + +## Regenerating Parts + +You can regenerate individual pieces without redoing everything: + +- **New cast**: Regenerate `01-cast.json`, update `cast-manifest.txt`, regenerate all journals +- **One month of journals**: Regenerate just that file (e.g., `03-journals-oct-nov.json`), re-run assembly +- **Add content**: Edit any JSON file, re-run `bb dev:assemble-demo` then `bb dev:create` +- **Fresh import**: Delete old graph first: `rm -rf ~/logseq/graphs/`, then `bb dev:create` diff --git a/.gitignore b/.gitignore index ebc7ee6326..66eb36e9a9 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,7 @@ cljs-test-runner-out .lein-* .nrepl-* .DS_Store +.context/ report.html strings.csv diff --git a/bb.edn b/bb.edn index 43ab0b6855..1a0546b9a4 100644 --- a/bb.edn +++ b/bb.edn @@ -89,6 +89,12 @@ :task (apply shell {:dir "deps/db" :extra-env {"ORIGINAL_PWD" (fs/cwd)}} "yarn -s nbb-logseq -cp src:../outliner/src:script script/create_graph.cljs" *command-line-args*)} + dev:assemble-demo + {:doc "Assemble demo graph intermediate files (JSON+EDN) into assembled.edn" + :requires ([babashka.fs :as fs]) + :task (apply shell {:dir "deps/db" :extra-env {"ORIGINAL_PWD" (fs/cwd)}} + "yarn -s nbb-logseq -cp src:../../scripts/src ../../scripts/src/logseq/tasks/db_graph/demo_assembly.cljs" *command-line-args*)} + dev:diff-graphs {:doc "Diffs two DB graphs" :requires ([babashka.fs :as fs]) diff --git a/deps/db/src/logseq/db/demo_archetypes.cljs b/deps/db/src/logseq/db/demo_archetypes.cljs new file mode 100644 index 0000000000..8ac9711718 --- /dev/null +++ b/deps/db/src/logseq/db/demo_archetypes.cljs @@ -0,0 +1,159 @@ +(ns logseq.db.demo-archetypes + "Archetype definitions for AI-generated demo graphs. + Each archetype defines a tag taxonomy, property schemas, cast spec, + timeline spec, and journal pattern weights. Used by both CLI scripts + (Phase 1) and the browser app (Phase 2) for graph generation.") + +;; ============================================================================= +;; Operator/PM Archetype +;; ============================================================================= +;; A product manager / operator who uses Logseq for daily work: +;; meeting notes, project tracking, reading lists, decision logs, OKRs. +;; Closest to a real power-user graph with rich cross-references. + +(def operator-properties + "Properties for the Operator/PM archetype. + Note: Built-in :logseq.property/status, :logseq.property/priority, + :logseq.property/deadline are NOT redefined — they're used via + :logseq.class/Task for TODO items in journals." + {:role {:logseq.property/type :default} + :email {:logseq.property/type :default} + :company {:logseq.property/type :node + :build/property-classes [:Company]} + :url {:logseq.property/type :url} + :attendees {:logseq.property/type :node + :db/cardinality :many + :build/property-classes [:Person]} + :agenda {:logseq.property/type :default} + :notes {:logseq.property/type :default} + :project-status {:logseq.property/type :default + :build/closed-values + [{:value "Backlog" :icon {:id "circle-dotted" :type :tabler-icon}} + {:value "Active" :icon {:id "circle-half-2" :type :tabler-icon}} + {:value "Paused" :icon {:id "player-pause" :type :tabler-icon}} + {:value "Done" :icon {:id "circle-check" :type :tabler-icon}}]} + :team {:logseq.property/type :node + :db/cardinality :many + :build/property-classes [:Person]} + :project {:logseq.property/type :node + :build/property-classes [:Project]} + :author {:logseq.property/type :node + :db/cardinality :many + :build/property-classes [:Author]} + :reading-status {:logseq.property/type :default + :build/closed-values + [{:value "Want to Read" :icon {:id "eye" :type :tabler-icon}} + {:value "Reading" :icon {:id "book" :type :tabler-icon}} + {:value "Completed" :icon {:id "circle-check" :type :tabler-icon}} + {:value "Abandoned" :icon {:id "circle-x" :type :tabler-icon}}]} + :genre {:logseq.property/type :default} + :source {:logseq.property/type :url} + :price {:logseq.property/type :default} + :frequency {:logseq.property/type :default} + :category {:logseq.property/type :default} + :quarter {:logseq.property/type :default} + :okr-status {:logseq.property/type :default + :build/closed-values + [{:value "On Track" :icon {:id "circle-check" :type :tabler-icon}} + {:value "At Risk" :icon {:id "alert-triangle" :type :tabler-icon}} + {:value "Off Track" :icon {:id "circle-x" :type :tabler-icon}}]} + :decision-status {:logseq.property/type :default + :build/closed-values + [{:value "Proposed" :icon {:id "message-circle" :type :tabler-icon}} + {:value "Accepted" :icon {:id "circle-check" :type :tabler-icon}} + {:value "Revisited" :icon {:id "refresh" :type :tabler-icon}}]} + :stakeholders {:logseq.property/type :node + :db/cardinality :many + :build/property-classes [:Person]}}) + +(def operator-classes + "Classes (tags) for the Operator/PM archetype. + Note: We do NOT create a custom Task class — built-in :logseq.class/Task + is used for TODO items in journals with built-in status/priority/deadline. + + Inheritance: + - #Author extends #Person → Author tag page shows genre + role, email, company + - #Book extends #Read → Book tag page shows author, genre + reading-status" + {:Person {:build/class-properties [:role :email :company] + :build/properties {:logseq.property.class/default-icon {:type :tabler-icon :id "user"}}} + :Author {:build/class-extends [:Person] + :build/class-properties [:genre] + :build/properties {:logseq.property.class/default-icon {:type :tabler-icon :id "feather"}}} + :Company {:build/class-properties [:url] + :build/properties {:logseq.property.class/default-icon {:type :tabler-icon :id "building"}}} + :Meeting {:build/class-properties [:attendees :agenda :notes] + :build/properties {:logseq.property.class/default-icon {:type :tabler-icon :id "calendar-event"}}} + :Project {:build/class-properties [:project-status :team] + :build/properties {:logseq.property.class/default-icon {:type :tabler-icon :id "briefcase"}}} + :Read {:build/class-properties [:reading-status] + :build/properties {:logseq.property.class/default-icon {:type :tabler-icon :id "book-2"}}} + :Book {:build/class-extends [:Read] + :build/class-properties [:author :genre] + :build/properties {:logseq.property.class/default-icon {:type :tabler-icon :id "book"}}} + :Idea {:build/properties {:logseq.property.class/default-icon {:type :tabler-icon :id "bulb"}}} + :Reflection {:build/properties {:logseq.property.class/default-icon {:type :tabler-icon :id "pencil"}}} + :Tool {:build/class-properties [:url :category] + :build/properties {:logseq.property.class/default-icon {:type :tabler-icon :id "tool"}}} + :Subscription {:build/class-properties [:source :price :frequency] + :build/properties {:logseq.property.class/default-icon {:type :tabler-icon :id "credit-card"}}} + :OKR {:build/class-properties [:quarter :okr-status] + :build/properties {:logseq.property.class/default-icon {:type :tabler-icon :id "target"}}} + :Decision {:build/class-properties [:decision-status :stakeholders] + :build/properties {:logseq.property.class/default-icon {:type :tabler-icon :id "scale"}}}}) + +(def operator-cast-spec + "How many entities of each type to generate for the Operator/PM archetype." + {:people {:count 25 + :mix {:coworker 10 :report 4 :executive 3 + :friend 4 :family 2 :acquaintance 2}} + :authors {:count 12} + :companies {:count 4 + :mix {:employer 1 :client 2 :partner 1}} + :projects {:count 7 + :mix {:product-feature 3 :internal-tool 2 :migration 1 :process 1}} + :books {:count 14 + :mix {:business 4 :design 3 :fiction 3 :self-help 2 :technical 2}} + :tools {:count 6} + :subscriptions {:count 4} + :ideas {:count 10}}) + +(def operator-timeline-spec + "Timeline configuration for journal generation." + {:duration-months 6 + :start-date 20250801 ;; Aug 1, 2025 + :end-date 20260131 ;; Jan 31, 2026 + :phases + [{:months [20250801 20250901] :label "Onboarding & Q3 kickoff" :activity :high} + {:months [20251001 20251101] :label "Deep execution phase" :activity :medium} + {:months [20251201 20260101] :label "Q4 wrap-up & holidays" :activity :low}]}) + +(def operator-journal-patterns + "Weighted patterns for journal day content. + Weights should sum to ~100. Empty days are explicit." + {:empty-day 20 ;; No content at all + :minimal-day 15 ;; 1-2 quick notes or links + :meeting-heavy-day 15 ;; 2-3 meetings with structured notes + :task-focused-day 15 ;; Several TODOs, progress updates + :reflection-day 8 ;; Longer freeform writing, ideas + :reading-day 7 ;; Book notes, article links + :mixed-day 20 ;; Combination of the above + }) + +(def operator-archetype + "Complete Operator/PM archetype definition." + {:name "Operator/PM" + :description "A product manager or operator who uses Logseq for daily work: + meeting notes, project tracking, reading lists, decision logs, and OKRs." + :properties operator-properties + :classes operator-classes + :cast-spec operator-cast-spec + :timeline operator-timeline-spec + :patterns operator-journal-patterns}) + +;; ============================================================================= +;; Archetype Registry +;; ============================================================================= + +(def archetypes + "All available archetype definitions, keyed by identifier." + {:operator operator-archetype}) diff --git a/scripts/src/logseq/tasks/db_graph/demo_assembly.cljs b/scripts/src/logseq/tasks/db_graph/demo_assembly.cljs new file mode 100644 index 0000000000..98ec7c5bcf --- /dev/null +++ b/scripts/src/logseq/tasks/db_graph/demo_assembly.cljs @@ -0,0 +1,366 @@ +(ns logseq.tasks.db-graph.demo-assembly + "Assembles AI-generated demo graph intermediate files (JSON + EDN) into + a single sqlite.build EDN file ready for import via `bb dev:create`. + + Reads numbered files from a directory: + 00-ontology.edn - Properties + classes (deterministic) + 01-cast.json - People, projects, books, tools + 02-journals-*.json - Journal entries (1 file per 2-month batch) + + Outputs assembled.edn in the same directory." + (:require ["fs" :as fs] + ["path" :as node-path] + [babashka.cli :as cli] + [clojure.edn :as edn] + [clojure.string :as string] + [cljs.pprint :as pprint] + [nbb.core :as nbb])) + +;; ============================================================================= +;; Reserved page names (must never be created) +;; ============================================================================= + +(def reserved-page-names + #{"Library" "Quick add" "Contents" "$$$favorites" "$$$views"}) + +;; ============================================================================= +;; JSON parsing helpers +;; ============================================================================= + +(defn- read-json + "Read and parse a JSON file." + [path] + (-> path fs/readFileSync str js/JSON.parse (js->clj :keywordize-keys true))) + +(defn- read-edn + "Read and parse an EDN file." + [path] + (-> path fs/readFileSync str edn/read-string)) + +;; ============================================================================= +;; Timestamp generation +;; ============================================================================= + +(defn- date-int->timestamp + "Convert YYYYMMDD integer to Unix milliseconds (9am UTC on that day)." + [date-int] + (let [year (quot date-int 10000) + month (rem (quot date-int 100) 100) + day (rem date-int 100)] + (.getTime (js/Date. year (dec month) day 9 0 0 0)))) + +(defn- block-timestamp + "Generate a timestamp for a block within a journal day. + Offsets each block by 30 minutes from 9am." + [date-int block-index] + (+ (date-int->timestamp date-int) (* block-index 30 60 1000))) + +;; ============================================================================= +;; Closed value UUID resolution +;; ============================================================================= + +(defn- ensure-closed-value-uuids + "Ensure all closed values in the ontology have UUIDs. + Mutates the ontology in place by adding generated UUIDs where missing." + [ontology] + (update ontology :properties + (fn [props] + (into {} + (map (fn [[k v]] + (if (:build/closed-values v) + [k (update v :build/closed-values + (fn [cvs] + (mapv (fn [cv] + (if (:uuid cv) + cv + (assoc cv :uuid (random-uuid)))) + cvs)))] + [k v])) + props))))) + +(defn- build-closed-value-index + "Build a lookup index: {prop-key {\"value-string\" uuid}} for fast resolution." + [ontology] + (into {} + (keep (fn [[prop-key prop-def]] + (when-let [cvs (:build/closed-values prop-def)] + [prop-key (into {} + (map (fn [cv] [(:value cv) (:uuid cv)]) + cvs))])) + (:properties ontology)))) + +;; ============================================================================= +;; JSON → EDN conversion: Cast +;; ============================================================================= + +(defn- convert-property-value + "Convert a JSON property value to EDN format based on property type. + For node properties (arrays of names), wraps in [:build/page ...] refs. + For closed-value properties, resolves string to [:block/uuid ...] ref. + For simple strings/numbers, passes through." + [prop-key value ontology closed-value-index] + (let [prop-def (get-in ontology [:properties prop-key]) + prop-type (:logseq.property/type prop-def) + cardinality (:db/cardinality prop-def) + closed-values (get closed-value-index prop-key)] + (cond + ;; Closed value property — resolve string to UUID ref + (and closed-values (string? value)) + (if-let [uuid (get closed-values value)] + [:block/uuid uuid] + (do (println "WARNING: No closed value match for" prop-key "=" (pr-str value)) + value)) + + ;; Node property with cardinality many — set of page refs + (and (= prop-type :node) (= cardinality :many) (sequential? value)) + (set (mapv (fn [name] [:build/page {:block/title name}]) value)) + + ;; Node property single — page ref + (and (= prop-type :node) (string? value)) + [:build/page {:block/title value}] + + ;; Date property — journal page ref + (and (= prop-type :date) (number? value)) + [:build/page {:build/journal value}] + + ;; Date property many + (and (= prop-type :date) (= cardinality :many) (sequential? value)) + (set (mapv (fn [d] [:build/page {:build/journal d}]) value)) + + ;; URL, default, number, checkbox — pass through + :else value))) + +(defn- convert-cast-entity + "Convert a single cast entity from JSON to sqlite.build EDN page format." + [entity ontology closed-value-index] + (let [{:keys [name tags properties]} entity + build-tags (mapv keyword tags) + build-props (when properties + (into {} + (map (fn [[k v]] + [(keyword k) (convert-property-value (keyword k) v ontology closed-value-index)]) + properties)))] + {:page (cond-> {:block/title name + :build/tags build-tags} + build-props (assoc :build/properties build-props))})) + +(defn- convert-cast + "Convert the full cast JSON to sqlite.build :pages-and-blocks entries." + [cast-data ontology closed-value-index] + (mapv #(convert-cast-entity % ontology closed-value-index) cast-data)) + +;; ============================================================================= +;; JSON → EDN conversion: Journals +;; ============================================================================= + +(defn- convert-task-block + "Convert a task shorthand to built-in :logseq.class/Task with built-in properties." + [task-map] + (let [{:keys [status priority deadline]} task-map + status-kw (when status + (keyword "logseq.property" (str "status." status))) + priority-kw (when priority + (keyword "logseq.property" (str "priority." priority)))] + (cond-> {:build/tags [:logseq.class/Task]} + status-kw (assoc-in [:build/properties :logseq.property/status] status-kw) + priority-kw (assoc-in [:build/properties :logseq.property/priority] priority-kw) + deadline (assoc-in [:build/properties :logseq.property/deadline] + (date-int->timestamp deadline))))) + +(defn- convert-journal-block + "Convert a single journal block from JSON to EDN format." + [block date-int block-index ontology closed-value-index] + (let [{:keys [text tags properties children task]} block + base {:block/title text + :block/created-at (block-timestamp date-int block-index)} + ;; Add tags + base (if (seq tags) + (assoc base :build/tags (mapv keyword tags)) + base) + ;; Add task tags/properties (merges with existing tags) + base (if task + (let [task-data (convert-task-block task) + existing-tags (or (:build/tags base) []) + task-tags (:build/tags task-data) + merged-tags (vec (distinct (concat existing-tags task-tags)))] + (-> base + (assoc :build/tags merged-tags) + (update :build/properties merge (:build/properties task-data)))) + base) + ;; Add regular properties + base (if (seq properties) + (let [converted (into {} + (map (fn [[k v]] + [(keyword k) + (convert-property-value (keyword k) v ontology closed-value-index)]) + properties))] + (update base :build/properties merge converted)) + base) + ;; Add children (recursive) + base (if (seq children) + (assoc base :build/children + (vec (map-indexed + (fn [i child] + (convert-journal-block child date-int (+ block-index 1 i) ontology closed-value-index)) + children))) + base)] + base)) + +(defn- convert-journal-day + "Convert a single journal day from JSON to EDN format." + [day-data ontology closed-value-index] + (let [{:keys [date blocks]} day-data + timestamp (date-int->timestamp date)] + (if (seq blocks) + {:page (cond-> {:build/journal date + :block/created-at timestamp + :block/updated-at (+ timestamp (* (count blocks) 30 60 1000))} + true identity) + :blocks (vec (map-indexed + (fn [i block] + (convert-journal-block block date i ontology closed-value-index)) + blocks))} + ;; Empty day + {:page {:build/journal date}}))) + +(defn- convert-journals + "Convert journal JSON files to sqlite.build :pages-and-blocks entries." + [journal-data ontology closed-value-index] + (mapv #(convert-journal-day % ontology closed-value-index) journal-data)) + +;; ============================================================================= +;; Validation +;; ============================================================================= + +(defn- collect-page-names + "Collect all page names from :pages-and-blocks." + [pages-and-blocks] + (set (keep (fn [{:keys [page]}] + (:block/title page)) + pages-and-blocks))) + +(defn- collect-page-refs + "Collect all [:build/page {:block/title ...}] references from nested data." + [data] + (cond + (and (vector? data) (= :build/page (first data))) + (when-let [title (get-in data [1 :block/title])] + [title]) + + (map? data) + (mapcat collect-page-refs (vals data)) + + (set? data) + (mapcat collect-page-refs data) + + (sequential? data) + (mapcat collect-page-refs data) + + :else nil)) + +(defn- validate-assembled + "Validate the assembled EDN before writing. Returns a vector of error strings." + [assembled] + (let [pages-and-blocks (:pages-and-blocks assembled) + page-names (collect-page-names pages-and-blocks) + all-refs (set (collect-page-refs pages-and-blocks)) + errors (atom [])] + ;; Check reserved page names + (doseq [name page-names] + (when (reserved-page-names name) + (swap! errors conj (str "Reserved page name used: " name)))) + ;; Check referential integrity (warn, not error, since :auto-create-ontology? handles some) + (doseq [ref all-refs] + (when-not (page-names ref) + (swap! errors conj (str "WARNING: Referenced page not found in cast: " ref + " (will be auto-created as plain page)")))) + @errors)) + +;; ============================================================================= +;; Assembly +;; ============================================================================= + +(defn- list-numbered-files + "List files matching NN-*.json or NN-*.edn pattern, sorted by prefix." + [dir] + (->> (js->clj (.readdirSync fs dir)) + (filter #(re-matches #"\d{2}-.*\.(json|edn)" %)) + sort)) + +(defn assemble + "Assemble a demo graph directory into a single sqlite.build EDN map." + [dir] + (let [files (list-numbered-files dir) + _ (when (empty? files) + (println "Error: No numbered files found in" dir) + (js/process.exit 1)) + ;; Read ontology (00-ontology.edn) + ontology-file (first (filter #(string/starts-with? % "00-") files)) + _ (when-not ontology-file + (println "Error: No 00-ontology.edn found in" dir) + (js/process.exit 1)) + raw-ontology (read-edn (node-path/join dir ontology-file)) + ;; Ensure closed values have UUIDs and build lookup index + ontology (ensure-closed-value-uuids raw-ontology) + closed-value-index (build-closed-value-index ontology) + ;; Read cast (01-cast.json) + cast-file (first (filter #(string/starts-with? % "01-") files)) + _ (when-not cast-file + (println "Error: No 01-cast.json found in" dir) + (js/process.exit 1)) + cast-data (read-json (node-path/join dir cast-file)) + cast-pages (convert-cast cast-data ontology closed-value-index) + ;; Read all journal files (02-*.json, 03-*.json, etc.) + journal-files (filter #(and (string/ends-with? % ".json") + (not (string/starts-with? % "01-"))) + files) + journal-pages (mapcat (fn [f] + (let [data (read-json (node-path/join dir f))] + (convert-journals data ontology closed-value-index))) + journal-files) + ;; Assemble final EDN + assembled {:auto-create-ontology? true + :properties (:properties ontology) + :classes (:classes ontology) + :pages-and-blocks (vec (concat cast-pages journal-pages))}] + assembled)) + +;; ============================================================================= +;; CLI +;; ============================================================================= + +(def spec + {:help {:alias :h + :desc "Print help"}}) + +(defn -main [args] + (let [options (cli/parse-opts args {:spec spec}) + dir (first args) + _ (when (or (nil? dir) (:help options)) + (println "Usage: $0 DIRECTORY\n\nAssembles numbered JSON/EDN files into assembled.edn") + (js/process.exit 1)) + dir (if (node-path/isAbsolute dir) + dir + (node-path/join (or js/process.env.ORIGINAL_PWD ".") dir)) + _ (when-not (fs/existsSync dir) + (println "Error: Directory does not exist:" dir) + (js/process.exit 1)) + assembled (assemble dir) + errors (validate-assembled assembled) + output-path (node-path/join dir "assembled.edn")] + ;; Print validation results + (when (seq errors) + (println "\nValidation issues:") + (doseq [e errors] + (println " " e)) + (println)) + ;; Write assembled EDN + (fs/writeFileSync output-path (with-out-str (pprint/pprint assembled))) + (let [page-count (count (:pages-and-blocks assembled)) + block-count (reduce + (map #(count (:blocks % [])) (:pages-and-blocks assembled))) + class-count (count (:classes assembled))] + (println (str "Assembled " page-count " pages, " block-count " blocks, " + class-count " classes → " output-path))))) + +(when (= nbb/*file* (nbb/invoked-file)) + (-main *command-line-args*))