| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| export type NodeInterval = { |
| id: string; |
| start: number; |
| end: number; |
| |
| label: string; |
| }; |
|
|
| |
| export type NodeAssignment = { |
| nodeId: string; |
| weight: number; |
| }; |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| export type AlignmentCase = 'exact' | 'contained' | 'union' | 'overlap' | 'empty'; |
|
|
| export type ResolveResult = { |
| assignments: NodeAssignment[]; |
| kase: AlignmentCase; |
| }; |
|
|
| |
| |
| |
| |
| export function resolveAttrOffsetToNodes( |
| sortedNodes: ReadonlyArray<NodeInterval>, |
| attrStart: number, |
| attrEnd: number, |
| ): ResolveResult { |
| if (attrEnd <= attrStart) return { assignments: [], kase: 'empty' }; |
|
|
| const overlapping: Array<{ node: NodeInterval; overlap: number }> = []; |
| for (const n of sortedNodes) { |
| if (n.end <= attrStart) continue; |
| if (n.start >= attrEnd) break; |
| const s = Math.max(n.start, attrStart); |
| const e = Math.min(n.end, attrEnd); |
| if (e > s) overlapping.push({ node: n, overlap: e - s }); |
| } |
| if (overlapping.length === 0) return { assignments: [], kase: 'empty' }; |
|
|
| const attrLen = attrEnd - attrStart; |
| const assignments: NodeAssignment[] = overlapping.map(({ node, overlap }) => ({ |
| nodeId: node.id, |
| weight: overlap / attrLen, |
| })); |
|
|
| if (overlapping.length === 1) { |
| const only = overlapping[0]!.node; |
| if (only.start === attrStart && only.end === attrEnd) { |
| return { assignments, kase: 'exact' }; |
| } |
| return { assignments, kase: 'contained' }; |
| } |
|
|
| const first = overlapping[0]!.node; |
| const last = overlapping[overlapping.length - 1]!.node; |
| const coversExactly = first.start === attrStart && last.end === attrEnd; |
| let contiguous = coversExactly; |
| if (contiguous) { |
| for (let i = 0; i < overlapping.length - 1; i++) { |
| if (overlapping[i]!.node.end !== overlapping[i + 1]!.node.start) { |
| contiguous = false; |
| break; |
| } |
| } |
| } |
| return { assignments, kase: contiguous ? 'union' : 'overlap' }; |
| } |
|
|
| |
| export type PieceEntry = { |
| offset: [number, number]; |
| raw: string; |
| score: number; |
| }; |
|
|
| |
| export type NodeAggregatedEntry = { |
| nodeId: string; |
| offset: [number, number]; |
| raw: string; |
| |
| score: number; |
| |
| |
| |
| alignmentTooltipLines?: string[]; |
| }; |
|
|
| |
| export type AlignWarnContext = { |
| |
| step?: number; |
| |
| targetToken?: string; |
| }; |
|
|
| |
| export const GEN_ATTR_DAG_ALIGN_LOG_PREFIX = '[genAttributeDagView.align]'; |
|
|
| |
| |
| |
| export function formatAlignmentPieceLine( |
| kase: AlignmentCase, |
| as: number, |
| ae: number, |
| attr: PieceEntry, |
| assignments: ReadonlyArray<NodeAssignment>, |
| warnCtx?: AlignWarnContext, |
| ): string { |
| const ctx = |
| (warnCtx?.step !== undefined ? ` step=${warnCtx.step}` : '') + |
| (warnCtx?.targetToken !== undefined ? ` target="${warnCtx.targetToken}"` : ''); |
| const detail = assignments.length |
| ? assignments.map((a) => `${a.nodeId}×${a.weight.toFixed(3)}`).join(', ') |
| : '(none)'; |
| return `${kase} attr=[${as},${ae}) "${attr.raw}" score=${attr.score}${ctx} → ${detail}`; |
| } |
|
|
| |
| export function formatAlignmentWarnLine( |
| kase: AlignmentCase, |
| as: number, |
| ae: number, |
| attr: PieceEntry, |
| assignments: ReadonlyArray<NodeAssignment>, |
| warnCtx?: AlignWarnContext, |
| ): string { |
| return `${GEN_ATTR_DAG_ALIGN_LOG_PREFIX} ${formatAlignmentPieceLine(kase, as, ae, attr, assignments, warnCtx)}`; |
| } |
|
|
| |
| const alignmentWarnOnceKeys = new Set<string>(); |
|
|
| function alignmentWarnDedupeKey(kase: AlignmentCase, as: number, ae: number): string { |
| return `${kase}\0${as}\0${ae}`; |
| } |
|
|
| |
| export function clearGenAttributeDagAlignmentWarnDedupe(): void { |
| alignmentWarnOnceKeys.clear(); |
| } |
|
|
| |
| |
| |
| |
| |
| export function alignAndAggregateByNode( |
| entries: ReadonlyArray<PieceEntry>, |
| nodes: ReadonlyArray<NodeInterval>, |
| warnCtx?: AlignWarnContext, |
| ): NodeAggregatedEntry[] { |
| const sorted = nodes.slice().sort((a, b) => a.start - b.start); |
| const byNodeId = new Map<string, NodeInterval>(); |
| for (const n of sorted) byNodeId.set(n.id, n); |
|
|
| const acc = new Map<string, NodeAggregatedEntry>(); |
| const order: string[] = []; |
|
|
| for (const attr of entries) { |
| const [as, ae] = attr.offset; |
| const { assignments, kase } = resolveAttrOffsetToNodes(sorted, as, ae); |
| const warnLine = |
| kase !== 'exact' |
| ? formatAlignmentWarnLine(kase, as, ae, attr, assignments, warnCtx) |
| : null; |
| if (warnLine !== null) { |
| const dedupeKey = alignmentWarnDedupeKey(kase, as, ae); |
| if (!alignmentWarnOnceKeys.has(dedupeKey)) { |
| alignmentWarnOnceKeys.add(dedupeKey); |
| |
| console.warn(warnLine); |
| } |
| } |
| const pieceAdjusted = kase !== 'exact'; |
| for (const a of assignments) { |
| const node = byNodeId.get(a.nodeId); |
| if (!node) continue; |
| const delta = attr.score * a.weight; |
| const existing = acc.get(a.nodeId); |
| if (existing) { |
| existing.score += delta; |
| if (pieceAdjusted && warnLine !== null) { |
| if (!existing.alignmentTooltipLines) { |
| existing.alignmentTooltipLines = []; |
| } |
| existing.alignmentTooltipLines.push(warnLine); |
| } |
| } else { |
| acc.set(a.nodeId, { |
| nodeId: a.nodeId, |
| offset: [node.start, node.end], |
| raw: node.label, |
| score: delta, |
| alignmentTooltipLines: pieceAdjusted && warnLine !== null ? [warnLine] : undefined, |
| }); |
| order.push(a.nodeId); |
| } |
| } |
| } |
|
|
| return order.map((id) => acc.get(id)!); |
| } |
|
|