File size: 1,626 Bytes

a0ebf39

import type { PPTElement } from '@/lib/types/slides';
import type { Stage, Scene } from '@/lib/types/stage';

// ==================== Scenario ====================

export interface EvalTurn {
  userMessage: string;
  checkpoint?: boolean;
}

export interface EvalScenario {
  id: string;
  name: string;
  description: string;
  tags: string[];
  initialStoreState: {
    stage: Stage | null;
    scenes: Scene[];
    currentSceneId: string | null;
    whiteboardElements?: PPTElement[];
  };
  config: {
    agentIds: string[];
    sessionType: 'qa' | 'discussion';
  };
  turns: EvalTurn[];
  model?: string;
  repeat?: number;
}

// ==================== Scoring ====================

export interface DimensionScore {
  score: number;
  reason: string;
}

export interface VlmScore {
  readability: DimensionScore;
  overlap: DimensionScore;
  rendering_correctness: DimensionScore;
  content_completeness: DimensionScore;
  layout_logic: DimensionScore;
  overall: number;
  issues: string[];
}

// ==================== Results ====================

export interface CheckpointResult {
  turnIndex: number;
  screenshotPath: string;
  /** null when VLM scoring failed — screenshot is still preserved. */
  score: VlmScore | null;
  elements: PPTElement[];
}

export interface ScenarioRunResult {
  scenarioId: string;
  runIndex: number;
  model: string;
  checkpoints: CheckpointResult[];
  /** Per-turn wall-clock latency (ms) from runAgentLoop start to end. */
  turnDurationsMs?: number[];
  error?: string;
}

export interface EvalReport {
  timestamp: string;
  model: string;
  scenarios: ScenarioRunResult[];
}