muthuk1's picture
Add missing files: LICENSE, Dockerfile, .github, tests, e2e, eval, scripts, configs
a0ebf39 verified
import type { PPTElement } from '@/lib/types/slides';
import type { Stage, Scene } from '@/lib/types/stage';
// ==================== Scenario ====================
export interface EvalTurn {
userMessage: string;
checkpoint?: boolean;
}
export interface EvalScenario {
id: string;
name: string;
description: string;
tags: string[];
initialStoreState: {
stage: Stage | null;
scenes: Scene[];
currentSceneId: string | null;
whiteboardElements?: PPTElement[];
};
config: {
agentIds: string[];
sessionType: 'qa' | 'discussion';
};
turns: EvalTurn[];
model?: string;
repeat?: number;
}
// ==================== Scoring ====================
export interface DimensionScore {
score: number;
reason: string;
}
export interface VlmScore {
readability: DimensionScore;
overlap: DimensionScore;
rendering_correctness: DimensionScore;
content_completeness: DimensionScore;
layout_logic: DimensionScore;
overall: number;
issues: string[];
}
// ==================== Results ====================
export interface CheckpointResult {
turnIndex: number;
screenshotPath: string;
/** null when VLM scoring failed — screenshot is still preserved. */
score: VlmScore | null;
elements: PPTElement[];
}
export interface ScenarioRunResult {
scenarioId: string;
runIndex: number;
model: string;
checkpoints: CheckpointResult[];
/** Per-turn wall-clock latency (ms) from runAgentLoop start to end. */
turnDurationsMs?: number[];
error?: string;
}
export interface EvalReport {
timestamp: string;
model: string;
scenarios: ScenarioRunResult[];
}