Spaces:
Configuration error
Configuration error
| /** | |
| * Parse the streaming markdown produced by the Granite reconciler into the | |
| * four-section briefing IA the design system expects. | |
| * | |
| * The reconciler's prompt (app/reconcile.py:EXTRA_SYSTEM_PROMPT) enforces | |
| * **bold-stop** section heads, one per line: | |
| * **Status.** | |
| * **Empirical evidence.** | |
| * **Modeled scenarios.** | |
| * **Policy context.** | |
| * The model occasionally drops them inline; the backend's | |
| * _split_inline_headers normaliser fixes that before yielding final text, | |
| * but mid-stream we still tolerate inline forms ourselves. | |
| * | |
| * Within a section, every sentence with a `[doc_id]` citation is a Claim. | |
| * The claim's tier is inferred from the cited doc_id family via | |
| * tierForDocId(). Multiple cites on one sentence: the tier of the first | |
| * cited doc wins (visual margin glyph), all cites still rendered. | |
| */ | |
| import type { BriefingBlock, Citation, ClaimPart } from '$lib/types/claim'; | |
| import { tierForDocId, type Tier } from '$lib/types/tier'; | |
| const CANONICAL_SECTIONS: Array<{ key: string; label: string; n: string; tier?: Tier; aliases: string[] }> = [ | |
| { key: 'status', label: 'Status', n: '01', aliases: ['status'] }, | |
| { key: 'empirical', label: 'Empirical evidence', n: '02', tier: 'empirical', aliases: ['empirical evidence', 'empirical'] }, | |
| { key: 'modeled', label: 'Modeled scenarios', n: '03', tier: 'modeled', aliases: ['modeled scenarios', 'modeled'] }, | |
| { key: 'policy', label: 'Policy context', n: '04', aliases: ['policy context', 'policy'] } | |
| ]; | |
| function findSection(rawTitle: string) { | |
| const t = rawTitle.toLowerCase().replace(/[.:]+\s*$/, '').trim(); | |
| return CANONICAL_SECTIONS.find((s) => s.aliases.includes(t)); | |
| } | |
| // Match either `**Heading.**` (the canonical reconciler output) or the | |
| // markdown-headed `## 01 Heading` form we use in the static demo data. | |
| const SECTION_HEAD_RE = /(^|\n)\s*(?:\*\*([A-Z][A-Za-z\s/]+?)\.\s*\*\*|#{1,3}\s*(0[1-4])\s*[:\-β.]?\s*([^\n]+))/g; | |
| export interface ParseResult { | |
| blocks: BriefingBlock[]; | |
| citations: Record<string, Citation>; | |
| /** Doc IDs cited in the body but not in the provided citation registry. */ | |
| unresolvedDocIds: string[]; | |
| } | |
| /** | |
| * Build a Citation record from a doc_id and any backend-supplied metadata. | |
| * The reconciler has the registry; we keep this conservative so unknown | |
| * doc IDs still render with sensible defaults. | |
| */ | |
| export function citationFromMeta( | |
| n: number, | |
| docId: string, | |
| meta?: Partial<Pick<Citation, 'source' | 'title' | 'url' | 'vintage' | 'retrieved'>> | |
| ): Citation { | |
| return { | |
| id: docId, | |
| n, | |
| tier: tierForDocId(docId), | |
| source: meta?.source ?? docId.split(/[_-]/)[0].toUpperCase(), | |
| title: meta?.title ?? docId, | |
| docId, | |
| url: meta?.url ?? '', | |
| vintage: meta?.vintage ?? '', | |
| retrieved: meta?.retrieved ?? '' | |
| }; | |
| } | |
| const CITE_RE = /\[([a-z][a-z0-9_]*(?:\s*,\s*[a-z][a-z0-9_]*)*)\]/gi; | |
| function splitSentences(text: string): string[] { | |
| const parts = text.split(/(?<=[.!?])\s+(?=[A-Z(])/g); | |
| return parts.filter((s) => s.trim().length > 0); | |
| } | |
| function parseSentenceParts( | |
| sentence: string, | |
| cites: Record<string, Citation>, | |
| registerCite: (docId: string) => Citation | |
| ): ClaimPart[] { | |
| let cursor = 0; | |
| const parts: ClaimPart[] = []; | |
| let firstTier: Tier | undefined; | |
| const matches = [...sentence.matchAll(CITE_RE)]; | |
| if (matches.length === 0) { | |
| return [{ text: sentence }]; | |
| } | |
| for (const m of matches) { | |
| const before = sentence.slice(cursor, m.index ?? 0); | |
| const docIds = m[1].split(/\s*,\s*/).filter(Boolean); | |
| cursor = (m.index ?? 0) + m[0].length; | |
| const tier = tierForDocId(docIds[0]); | |
| if (!firstTier) firstTier = tier; | |
| parts.push({ text: before, tier, cite: docIds[0] }); | |
| for (const id of docIds) { | |
| if (!cites[id]) cites[id] = registerCite(id); | |
| } | |
| } | |
| if (cursor < sentence.length) { | |
| const tail = sentence.slice(cursor); | |
| if (tail.trim()) parts.push({ text: tail }); | |
| } | |
| return parts; | |
| } | |
| /** | |
| * Parse a fully-or-partially-streamed briefing markdown string into blocks. | |
| * Safe to call repeatedly during streaming β re-parses from scratch. | |
| */ | |
| export function parseBriefing( | |
| markdown: string, | |
| knownCitations: Record<string, Citation> = {} | |
| ): ParseResult { | |
| const cites: Record<string, Citation> = { ...knownCitations }; | |
| let nextN = Object.values(cites).reduce((m, c) => Math.max(m, c.n), 0) + 1; | |
| const unresolvedDocIds = new Set<string>(); | |
| const registerCite = (docId: string): Citation => { | |
| if (!knownCitations[docId]) unresolvedDocIds.add(docId); | |
| const c = citationFromMeta(nextN++, docId); | |
| return c; | |
| }; | |
| const blocks: BriefingBlock[] = []; | |
| type Idx = { num: string; label: string; tier?: Tier; titleExtra?: string; start: number; bodyStart: number }; | |
| const indices: Idx[] = []; | |
| let m: RegExpExecArray | null; | |
| SECTION_HEAD_RE.lastIndex = 0; | |
| while ((m = SECTION_HEAD_RE.exec(markdown))) { | |
| if (m[2] !== undefined) { | |
| // **Heading.** form | |
| const sec = findSection(m[2]); | |
| if (!sec) continue; | |
| indices.push({ | |
| num: sec.n, | |
| label: sec.label, | |
| tier: sec.tier, | |
| start: m.index + m[1].length, | |
| bodyStart: m.index + m[0].length | |
| }); | |
| } else if (m[3] !== undefined) { | |
| // ## 0n Heading form (used by the static demo) | |
| const num = m[3]; | |
| const title = (m[4] ?? '').trim(); | |
| const sec = CANONICAL_SECTIONS.find((s) => s.n === num) ?? findSection(title); | |
| indices.push({ | |
| num, | |
| label: sec?.label ?? title, | |
| tier: sec?.tier, | |
| titleExtra: sec && title.toLowerCase() !== sec.label.toLowerCase() ? title : undefined, | |
| start: m.index + m[1].length, | |
| bodyStart: m.index + m[0].length | |
| }); | |
| } | |
| } | |
| // Pre-section preamble. Don't render β the reconciler doesn't emit one and | |
| // we don't want a stray HTML escape of the bold-marker prefix to flash. | |
| for (let i = 0; i < indices.length; i++) { | |
| const sec = indices[i]; | |
| const next = indices[i + 1]; | |
| const body = markdown.slice(sec.bodyStart, next ? next.start : markdown.length).trim(); | |
| if (!body) continue; | |
| blocks.push({ | |
| kind: 'head', | |
| n: sec.num, | |
| label: sec.label, | |
| tier: sec.tier, | |
| title: sec.titleExtra | |
| }); | |
| for (const para of body.split(/\n\s*\n/)) { | |
| const flat = para.replace(/\s+/g, ' ').trim(); | |
| if (!flat) continue; | |
| const sentences = splitSentences(flat); | |
| const parts: ClaimPart[] = []; | |
| for (const s of sentences) { | |
| parts.push(...parseSentenceParts(s, cites, registerCite)); | |
| parts.push({ text: ' ' }); | |
| } | |
| while (parts.length && parts[parts.length - 1].text.trim() === '' && !parts[parts.length - 1].tier) { | |
| parts.pop(); | |
| } | |
| if (parts.length) blocks.push({ kind: 'prose', parts }); | |
| } | |
| } | |
| // Fallback: if the model hasn't emitted any recognised section head yet | |
| // (or won't β e.g. live_now intent), render the whole markdown as one | |
| // implicit "Status" block so the reader sees something during streaming. | |
| if (blocks.length === 0 && markdown.trim()) { | |
| blocks.push({ kind: 'head', n: '01', label: 'Status' }); | |
| const flat = markdown.replace(/\s+/g, ' ').trim(); | |
| const sentences = splitSentences(flat); | |
| const parts: ClaimPart[] = []; | |
| for (const s of sentences) { | |
| parts.push(...parseSentenceParts(s, cites, registerCite)); | |
| parts.push({ text: ' ' }); | |
| } | |
| while (parts.length && parts[parts.length - 1].text.trim() === '' && !parts[parts.length - 1].tier) { | |
| parts.pop(); | |
| } | |
| if (parts.length) blocks.push({ kind: 'prose', parts }); | |
| } | |
| return { blocks, citations: cites, unresolvedDocIds: [...unresolvedDocIds] }; | |
| } | |
| /** | |
| * HTML escape β kept around because the v0.4.1 parser used it for the | |
| * status-preamble fallback path. The v0.4.2 parser drops the preamble | |
| * entirely (the reconciler doesn't emit one), so this is currently | |
| * dead-code documentation. If the preamble path comes back, wire it | |
| * here. | |
| */ | |
| // eslint-disable-next-line @typescript-eslint/no-unused-vars | |
| function escapeHtml(s: string): string { | |
| return s | |
| .replace(/&/g, '&') | |
| .replace(/</g, '<') | |
| .replace(/>/g, '>') | |
| .replace(/"/g, '"'); | |
| } | |