Add missing files: LICENSE, Dockerfile, .github, tests, e2e, eval, scripts, configs

a0ebf39 verified 11 days ago

2.64 kB

	import { describe, it, expect, beforeEach, afterEach } from 'vitest';
	import { mkdtempSync, rmSync, readFileSync, existsSync } from 'fs';
	import { tmpdir } from 'os';
	import { join } from 'path';
	import { writeReport } from '@/eval/outline-language/reporter';
	import type { EvalResult } from '@/eval/outline-language/types';

	describe('writeReport', () => {
	let runDir: string;

	beforeEach(() => {
	runDir = mkdtempSync(join(tmpdir(), 'reporter-test-'));
	});

	afterEach(() => {
	rmSync(runDir, { recursive: true, force: true });
	});

	const sample: EvalResult[] = [
	{
	case_id: 'en-001',
	category: 'english-teaching',
	requirement: 'teach English to beginners',
	groundTruth: 'English as primary',
	directive: 'Use English as the teaching language',
	outlinesCount: 3,
	judgePassed: true,
	judgeReason: 'correct language',
	},
	{
	case_id: 'zh-002',
	category: 'chinese-teaching',
	requirement: '用中文讲授数学',
	groundTruth: '以中文为主',
	directive: 'Use Chinese throughout',
	outlinesCount: 2,
	judgePassed: false,
	judgeReason: 'wrong phrasing',
	},
	];

	it('writes a report.md file with header, per-case detail, and summary table', () => {
	const path = writeReport(runDir, sample, {
	inferenceModel: 'openai:gpt-4.1',
	judgeModel: 'anthropic:claude-haiku-4-5',
	});
	expect(existsSync(path)).toBe(true);
	expect(path).toBe(join(runDir, 'report.md'));

	const content = readFileSync(path, 'utf-8');
	expect(content).toContain('# Outline Language Inference Eval Results');
	expect(content).toContain('Model: openai:gpt-4.1');
	expect(content).toContain('Judge model: anthropic:claude-haiku-4-5');
	expect(content).toContain('Passed: 1/2');
	expect(content).toContain('### PASS en-001');
	expect(content).toContain('### FAIL zh-002');
	expect(content).toContain('## Summary');
	expect(content).toContain('\| # \| Case \| Category \| Outlines \| Result \| Judge reason \|');
	expect(content).toContain('\| 1 \| en-001 \| english-teaching \| 3 \| PASS \| correct language \|');
	expect(content).toContain('\| 2 \| zh-002 \| chinese-teaching \| 2 \| FAIL \| wrong phrasing \|');
	});

	it('returns a successfully written path even when all cases pass', () => {
	const allPass: EvalResult[] = [{ ...sample[0] }];
	const path = writeReport(runDir, allPass, {
	inferenceModel: 'openai:gpt-4.1',
	judgeModel: 'openai:gpt-4.1',
	});
	const content = readFileSync(path, 'utf-8');
	expect(content).toContain('Passed: 1/1 (100%)');
	});
	});