OpenMAIC-React / tests /eval /outline-language /reporter.test.ts
muthuk1's picture
Add missing files: LICENSE, Dockerfile, .github, tests, e2e, eval, scripts, configs
a0ebf39 verified
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
import { mkdtempSync, rmSync, readFileSync, existsSync } from 'fs';
import { tmpdir } from 'os';
import { join } from 'path';
import { writeReport } from '@/eval/outline-language/reporter';
import type { EvalResult } from '@/eval/outline-language/types';
describe('writeReport', () => {
let runDir: string;
beforeEach(() => {
runDir = mkdtempSync(join(tmpdir(), 'reporter-test-'));
});
afterEach(() => {
rmSync(runDir, { recursive: true, force: true });
});
const sample: EvalResult[] = [
{
case_id: 'en-001',
category: 'english-teaching',
requirement: 'teach English to beginners',
groundTruth: 'English as primary',
directive: 'Use English as the teaching language',
outlinesCount: 3,
judgePassed: true,
judgeReason: 'correct language',
},
{
case_id: 'zh-002',
category: 'chinese-teaching',
requirement: '用中文讲授数学',
groundTruth: '以中文为主',
directive: 'Use Chinese throughout',
outlinesCount: 2,
judgePassed: false,
judgeReason: 'wrong phrasing',
},
];
it('writes a report.md file with header, per-case detail, and summary table', () => {
const path = writeReport(runDir, sample, {
inferenceModel: 'openai:gpt-4.1',
judgeModel: 'anthropic:claude-haiku-4-5',
});
expect(existsSync(path)).toBe(true);
expect(path).toBe(join(runDir, 'report.md'));
const content = readFileSync(path, 'utf-8');
expect(content).toContain('# Outline Language Inference Eval Results');
expect(content).toContain('**Model**: openai:gpt-4.1');
expect(content).toContain('**Judge model**: anthropic:claude-haiku-4-5');
expect(content).toContain('**Passed**: 1/2');
expect(content).toContain('### PASS en-001');
expect(content).toContain('### **FAIL** zh-002');
expect(content).toContain('## Summary');
expect(content).toContain('| # | Case | Category | Outlines | Result | Judge reason |');
expect(content).toContain('| 1 | en-001 | english-teaching | 3 | PASS | correct language |');
expect(content).toContain('| 2 | zh-002 | chinese-teaching | 2 | FAIL | wrong phrasing |');
});
it('returns a successfully written path even when all cases pass', () => {
const allPass: EvalResult[] = [{ ...sample[0] }];
const path = writeReport(runDir, allPass, {
inferenceModel: 'openai:gpt-4.1',
judgeModel: 'openai:gpt-4.1',
});
const content = readFileSync(path, 'utf-8');
expect(content).toContain('**Passed**: 1/1 (100%)');
});
});