fix: ensure markdown processor can handle the colons that arent technically valid yaml

This commit is contained in:
Aiden Cline
2026-01-15 12:46:30 -06:00
parent 05fbf7eb78
commit 8cb0f199ee
2 changed files with 117 additions and 7 deletions

View File

@@ -14,8 +14,60 @@ export namespace ConfigMarkdown {
return Array.from(template.matchAll(SHELL_REGEX))
}
export function preprocessFrontmatter(content: string): string {
const match = content.match(/^---\r?\n([\s\S]*?)\r?\n---/)
if (!match) return content
const frontmatter = match[1]
const lines = frontmatter.split("\n")
const result: string[] = []
for (const line of lines) {
// skip comments and empty lines
if (line.trim().startsWith("#") || line.trim() === "") {
result.push(line)
continue
}
// skip lines that are continuations (indented)
if (line.match(/^\s+/)) {
result.push(line)
continue
}
// match key: value pattern
const kvMatch = line.match(/^([a-zA-Z_][a-zA-Z0-9_]*)\s*:\s*(.*)$/)
if (!kvMatch) {
result.push(line)
continue
}
const key = kvMatch[1]
const value = kvMatch[2].trim()
// skip if value is empty, already quoted, or uses block scalar
if (value === "" || value === ">" || value === "|" || value.startsWith('"') || value.startsWith("'")) {
result.push(line)
continue
}
// if value contains a colon, convert to block scalar
if (value.includes(":")) {
result.push(`${key}: |`)
result.push(` ${value}`)
continue
}
result.push(line)
}
const processed = result.join("\n")
return content.replace(frontmatter, () => processed)
}
export async function parse(filePath: string) {
const template = await Bun.file(filePath).text()
const raw = await Bun.file(filePath).text()
const template = preprocessFrontmatter(raw)
try {
const md = matter(template)

View File

@@ -94,20 +94,36 @@ describe("ConfigMarkdown: frontmatter parsing", async () => {
const template = `---
description: "This is a description wrapped in quotes"
# field: this is a commented out field that should be ignored
# occupation: This man has the following occupation: Software Engineer
occupation: This man has the following occupation: Software Engineer
title: 'Hello World'
name: John "Doe"
family: He has no 'family'
summary: >
This is a summary
url: https://example.com:8080/path?query=value
time: The time is 12:30:00 PM
nested: First: Second: Third: Fourth
quoted_colon: "Already quoted: no change needed"
single_quoted_colon: 'Single quoted: also fine'
mixed: He said "hello: world" and then left
empty:
dollar: Use $' and $& for special patterns
---
Content
Content that should not be parsed:
fake_field: this is not yaml
another: neither is this
time: 10:30:00 AM
url: https://should-not-be-parsed.com:3000
The above lines look like YAML but are just content.
`
const matter = await import("gray-matter")
const parsed = matter.default(template)
const preprocessed = ConfigMarkdown.preprocessFrontmatter(template)
const parsed = matter.default(preprocessed)
test("should parse without throwing", () => {
expect(parsed).toBeDefined()
@@ -119,6 +135,10 @@ Content
expect(parsed.data.description).toBe("This is a description wrapped in quotes")
})
test("should extract occupation field with colon in value", () => {
expect(parsed.data.occupation).toBe("This man has the following occupation: Software Engineer\n")
})
test("should extract title field with single quotes", () => {
expect(parsed.data.title).toBe("Hello World")
})
@@ -137,10 +157,48 @@ Content
test("should not include commented fields in data", () => {
expect(parsed.data.field).toBeUndefined()
expect(parsed.data.occupation).toBeUndefined()
})
test("should extract content after frontmatter", () => {
expect(parsed.content.trim()).toBe("Content")
test("should extract URL with port", () => {
expect(parsed.data.url).toBe("https://example.com:8080/path?query=value\n")
})
test("should extract time with colons", () => {
expect(parsed.data.time).toBe("The time is 12:30:00 PM\n")
})
test("should extract value with multiple colons", () => {
expect(parsed.data.nested).toBe("First: Second: Third: Fourth\n")
})
test("should preserve already double-quoted values with colons", () => {
expect(parsed.data.quoted_colon).toBe("Already quoted: no change needed")
})
test("should preserve already single-quoted values with colons", () => {
expect(parsed.data.single_quoted_colon).toBe("Single quoted: also fine")
})
test("should extract value with quotes and colons mixed", () => {
expect(parsed.data.mixed).toBe('He said "hello: world" and then left\n')
})
test("should handle empty values", () => {
expect(parsed.data.empty).toBeNull()
})
test("should handle dollar sign replacement patterns literally", () => {
expect(parsed.data.dollar).toBe("Use $' and $& for special patterns")
})
test("should not parse fake yaml from content", () => {
expect(parsed.data.fake_field).toBeUndefined()
expect(parsed.data.another).toBeUndefined()
})
test("should extract content after frontmatter without modification", () => {
expect(parsed.content).toContain("Content that should not be parsed:")
expect(parsed.content).toContain("fake_field: this is not yaml")
expect(parsed.content).toContain("url: https://should-not-be-parsed.com:3000")
})
})