/**
 * test/e2e-prompt-ab2.mjs
 *
 * 第二轮提示词 A/B 测试：
 *   ⑤ 工具结果续行提示 (extractToolResultNatural 尾部)
 *   ② thinkingSuffix (每条用户消息末尾)  
 *   ③ fewShotResponse (few-shot 示例文字)
 *
 * 每个提示词的测试设计侧重于其特定影响面：
 *   - ⑤ 续行提示 → 多轮工具循环中模型是否持续行动
 *   - ② 方向后缀 → 模型是否在每条消息后立即行动
 *   - ③ few-shot → 格式遵循度和叙述风格
 *
 * 用法：
 *   VARIANT=baseline node test/e2e-prompt-ab2.mjs
 *   VARIANT=candidate_x node test/e2e-prompt-ab2.mjs
 *   node test/e2e-prompt-ab2.mjs --compare
 */

const BASE_URL = `http://localhost:${process.env.PORT || 3010}`;
const MODEL = 'claude-sonnet-4-5-20251120';
const MAX_TURNS = 10;
const VARIANT = process.env.VARIANT || 'current';
const COMPARE_MODE = process.argv.includes('--compare');

// ─── 颜色 ───────────────────────────────────────────────────────────
const C = {
    reset: '\x1b[0m', bold: '\x1b[1m', dim: '\x1b[2m',
    green: '\x1b[32m', red: '\x1b[31m', yellow: '\x1b[33m',
    cyan: '\x1b[36m', blue: '\x1b[34m', magenta: '\x1b[35m', gray: '\x1b[90m',
};
const ok   = s => `${C.green}✅ ${s}${C.reset}`;
const fail = s => `${C.red}❌ ${s}${C.reset}`;
const warn = s => `${C.yellow}⚠  ${s}${C.reset}`;
const hdr  = s => `\n${C.bold}${C.cyan}━━━ ${s} ━━━${C.reset}`;
const info = s => `  ${C.gray}${s}${C.reset}`;

// ─── 基础工具集 ──────────────────────────────────────────────────────
const TOOLS = [
    {
        name: 'Read', description: 'Reads a file.', input_schema: {
            type: 'object', properties: { file_path: { type: 'string' } }, required: ['file_path'],
        },
    },
    {
        name: 'Write', description: 'Writes a file.', input_schema: {
            type: 'object', properties: {
                file_path: { type: 'string' }, content: { type: 'string' },
            }, required: ['file_path', 'content'],
        },
    },
    {
        name: 'Bash', description: 'Executes a bash command.', input_schema: {
            type: 'object', properties: { command: { type: 'string' } }, required: ['command'],
        },
    },
    {
        name: 'Grep', description: 'Search for patterns in files.', input_schema: {
            type: 'object', properties: {
                pattern: { type: 'string' }, path: { type: 'string' },
            }, required: ['pattern'],
        },
    },
    {
        name: 'LS', description: 'Lists directory contents.', input_schema: {
            type: 'object', properties: { path: { type: 'string' } }, required: ['path'],
        },
    },
    {
        name: 'attempt_completion', description: 'Present the final result.', input_schema: {
            type: 'object', properties: { result: { type: 'string' } }, required: ['result'],
        },
    },
];

// ─── 虚拟文件系统 ────────────────────────────────────────────────────
const MOCK_FS = {
    '/project/package.json': '{"name":"my-app","version":"2.0.0","dependencies":{"express":"^4.18.0","lodash":"^4.17.21"}}',
    '/project/src/index.ts': 'import express from "express";\nimport { router } from "./router";\nconst app = express();\napp.use("/api", router);\napp.listen(3000);\n',
    '/project/src/router.ts': 'import { Router } from "express";\nexport const router = Router();\nrouter.get("/health", (_, res) => res.json({ ok: true }));\nrouter.get("/users", (_, res) => res.json([]));\n// TODO: add POST /users\n',
    '/project/src/utils.ts': 'export function clamp(v: number, min: number, max: number) {\n  return Math.min(Math.max(v, min), max);\n}\n// TODO: add debounce function\n',
    '/project/tsconfig.json': '{"compilerOptions":{"target":"es2020","module":"commonjs","strict":true}}',
    '/project/README.md': '# My App\nExpress API server.\n## API\n- GET /api/health\n- GET /api/users\n',
};

function mockExec(name, input) {
    switch (name) {
        case 'Read': return MOCK_FS[input.file_path] || `Error: File not found: ${input.file_path}`;
        case 'Write': { MOCK_FS[input.file_path] = input.content; return `Wrote ${input.content.length} chars`; }
        case 'Bash': {
            if (input.command?.includes('npm test')) return 'Tests passed: 3/3';
            if (input.command?.includes('tsc')) return 'Compilation successful';
            return `$ ${input.command}\n(ok)`;
        }
        case 'Grep': {
            const results = [];
            for (const [fp, c] of Object.entries(MOCK_FS)) {
                c.split('\n').forEach((line, i) => {
                    if (line.toLowerCase().includes((input.pattern || '').toLowerCase()))
                        results.push(`${fp}:${i + 1}:${line.trim()}`);
                });
            }
            return results.join('\n') || 'No matches.';
        }
        case 'LS': return Object.keys(MOCK_FS).filter(p => p.startsWith(input.path || '/project')).join('\n');
        case 'attempt_completion': return `__DONE__:${input.result}`;
        default: return `Executed ${name}`;
    }
}

// ─── 多轮引擎 ─────────────────────────────────────────────────────
async function runMultiTurn(userMessage, opts = {}) {
    const { tools = TOOLS, systemPrompt = '', toolChoice, maxTurns = MAX_TURNS } = opts;
    const messages = [{ role: 'user', content: userMessage }];
    const system = systemPrompt || 'You are an AI coding assistant. Working directory: /project.';

    let totalToolCalls = 0, totalTextChars = 0, turns = 0;
    let firstTurnToolCount = 0, firstTurnTextLen = 0;
    const toolLog = [];
    let completed = false;
    let stopped = false; // 模型是否中途停止（end_turn but not completed）

    while (turns < maxTurns) {
        turns++;
        const resp = await fetch(`${BASE_URL}/v1/messages`, {
            method: 'POST',
            headers: { 'Content-Type': 'application/json', 'x-api-key': 'dummy' },
            body: JSON.stringify({
                model: MODEL, max_tokens: 4096, system, tools,
                ...(toolChoice ? { tool_choice: toolChoice } : {}),
                messages,
            }),
        });
        if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
        const data = await resp.json();

        const textBlocks = data.content?.filter(b => b.type === 'text') || [];
        const toolUseBlocks = data.content?.filter(b => b.type === 'tool_use') || [];
        const turnText = textBlocks.reduce((s, b) => s + (b.text?.length || 0), 0);
        
        totalTextChars += turnText;
        totalToolCalls += toolUseBlocks.length;

        if (turns === 1) {
            firstTurnToolCount = toolUseBlocks.length;
            firstTurnTextLen = turnText;
        }

        for (const tb of toolUseBlocks) {
            toolLog.push({ turn: turns, tool: tb.name });
        }

        if (data.stop_reason === 'end_turn' || toolUseBlocks.length === 0) {
            if (!completed) stopped = true;
            break;
        }

        messages.push({ role: 'assistant', content: data.content });
        const results = toolUseBlocks.map(tb => ({
            type: 'tool_result', tool_use_id: tb.id,
            content: mockExec(tb.name, tb.input),
        }));
        messages.push({ role: 'user', content: results });

        if (results.some(r => r.content.startsWith('__DONE__'))) { completed = true; break; }
    }

    return {
        totalToolCalls, totalTextChars, turns,
        firstTurnToolCount, firstTurnTextLen,
        toolLog, completed, stopped,
        narrationRatio: totalTextChars / Math.max(totalTextChars + totalToolCalls * 100, 1),
        toolPath: toolLog.map(t => `${t.turn}:${t.tool}`).join(' → '),
    };
}

// ─── 测试场景 ─────────────────────────────────────────────────────
const SCENARIOS = [
    // ========= ⑤ 续行提示测试 =========
    {
        id: 'continuation_3step',
        group: '⑤ 续行提示',
        name: '3 步连续任务（不中断）',
        description: '模型必须连续执行 3 步，不能中途停下。测试续行指令是否有效。',
        prompt: 'Step 1: Read /project/src/router.ts. Step 2: Read /project/src/utils.ts. Step 3: After reading both, use attempt_completion to summarize all TODO items found.',
        expect: { minTools: 3, completed: true },
        toolChoice: { type: 'any' },
    },
    {
        id: 'continuation_after_error',
        group: '⑤ 续行提示',
        name: '错误后继续',
        description: '读取不存在的文件→收到错误→应继续尝试其他文件而不是停下。',
        prompt: 'Read /project/src/nonexistent.ts. If it fails, read /project/src/index.ts instead.',
        expect: { minTools: 2 },
    },
    {
        id: 'continuation_long_chain',
        group: '⑤ 续行提示',
        name: '长链任务（≥4 步）',
        description: '测试在 4+ 步工具链中模型是否持续推进。',
        prompt: 'Please do these steps in order: 1) LS /project/src 2) Read /project/src/index.ts 3) Read /project/src/router.ts 4) Grep for "TODO" in /project/src 5) attempt_completion with a summary of all findings.',
        expect: { minTools: 4, completed: true },
        toolChoice: { type: 'any' },
    },

    // ========= ② 方向后缀测试 =========
    {
        id: 'suffix_immediate_action',
        group: '② 方向后缀',
        name: '立即行动（无叙述）',
        description: '简单请求应紧随后缀指示直接行动，而不是先描述计划。',
        prompt: 'Show me the project structure.',
        expect: { firstTurnAction: true, maxFirstTurnText: 100 },
    },
    {
        id: 'suffix_ambiguous_task',
        group: '② 方向后缀',
        name: '模糊任务也行动',
        description: '即使任务稍有模糊，模型也应先行动（读文件）再讨论。',
        prompt: 'Help me understand this project.',
        expect: { firstTurnAction: true },
    },
    {
        id: 'suffix_multi_file',
        group: '② 方向后缀',
        name: '多文件并行',
        description: '方向后缀应让模型在一轮内并行调用多个工具。',
        prompt: 'Read /project/src/index.ts and /project/src/router.ts and /project/tsconfig.json.',
        expect: { firstTurnMinTools: 2 },
    },

    // ========= ③ few-shot 测试 =========
    {
        id: 'fewshot_format',
        group: '③ few-shot',
        name: '输出格式遵循度',
        description: '模型是否严格遵循 ```json action 格式（而不是其他变体）。',
        prompt: 'Read /project/package.json and tell me the project name.',
        expect: { formatCorrect: true, minTools: 1 },
    },
    {
        id: 'fewshot_style_match',
        group: '③ few-shot',
        name: '风格模仿 —— 叙述简洁度',
        description: 'few-shot 样本越简洁，模型的回复也应越简洁。',
        prompt: 'List all TypeScript files in the project.',
        expect: { maxFirstTurnText: 80 },
    },
    {
        id: 'fewshot_no_meta',
        group: '③ few-shot',
        name: '无元叙述',
        description: '模型不应输出类似 "I will use the structured format" 的自我描述。',
        prompt: 'Check if there are any TODO comments in /project/src/utils.ts.',
        expect: { noMetaText: true, minTools: 1 },
    },
];

// ─── 对比模式 ────────────────────────────────────────────────────────
if (COMPARE_MODE) {
    const fs = await import('fs');
    const files = fs.readdirSync('test')
        .filter(f => f.startsWith('prompt-ab2-results-') && f.endsWith('.json'))
        .sort();

    if (files.length < 2) {
        console.log(`\n${fail('需要至少 2 个结果文件。')}`);
        process.exit(1);
    }

    const results = files.map(f => ({ file: f, ...JSON.parse(fs.readFileSync(`test/${f}`, 'utf-8')) }));

    console.log(`\n${C.bold}${C.magenta}══ 第二轮提示词 A/B 对比 ══${C.reset}\n`);
    results.forEach(r => console.log(`  ${C.cyan}${r.variant}${C.reset} — ${r.timestamp}`));

    // 按 group 分组输出
    const groups = [...new Set(SCENARIOS.map(s => s.group))];
    for (const group of groups) {
        console.log(hdr(group));
        const groupScenarios = SCENARIOS.filter(s => s.group === group);

        console.log(`${'─'.repeat(120)}`);
        const headerParts = [`${'场景'.padEnd(28)}`];
        for (const r of results) headerParts.push(r.variant.padEnd(25));
        console.log(`${C.bold}${headerParts.join('')}${C.reset}`);
        console.log(`${'─'.repeat(120)}`);

        for (const sc of groupScenarios) {
            const row = [sc.id.padEnd(28)];
            for (const r of results) {
                const s = r.scenarios.find(x => x.id === sc.id);
                if (!s) { row.push('N/A'.padEnd(25)); continue; }
                const m = s.metrics;
                const emoji = s.passed ? '✅' : '❌';
                const brief = m
                    ? `${emoji} T:${m.totalToolCalls} N:${Math.round((m.narrationRatio || 0) * 100)}% ${m.turns}轮`
                    : '❌ ERR';
                row.push(brief.padEnd(25));
            }
            console.log(row.join(''));
        }
    }

    // 汇总
    console.log(`\n${C.bold}汇总:${C.reset}`);
    for (const r of results) {
        const pass = r.scenarios.filter(s => s.passed).length;
        const avgNarr = r.scenarios.reduce((s, x) => s + (x.metrics?.narrationRatio || 0), 0) / r.scenarios.length;
        const totalTools = r.scenarios.reduce((s, x) => s + (x.metrics?.totalToolCalls || 0), 0);
        const completions = r.scenarios.filter(s => s.metrics?.completed).length;
        console.log(`  ${C.cyan}${r.variant}${C.reset}: ${pass}/${r.scenarios.length}通过  工具:${totalTools}  叙述:${Math.round(avgNarr * 100)}%  完成:${completions}`);
    }
    process.exit(0);
}

// ─── 主测试流程 ────────────────────────────────────────────────────
console.log(`\n${C.bold}${C.magenta}  第二轮提示词 A/B 测试${C.reset}`);
console.log(info(`VARIANT=${VARIANT}  MODEL=${MODEL}`));

try {
    const r = await fetch(`${BASE_URL}/v1/models`, { headers: { 'x-api-key': 'dummy' } });
    if (!r.ok) throw new Error();
    console.log(`\n${ok('服务器在线')}`);
} catch { console.log(`\n${fail('服务器未运行')}`); process.exit(1); }

const scenarioResults = [];
let passed = 0, failedCount = 0;
let currentGroup = '';

for (const sc of SCENARIOS) {
    if (sc.group !== currentGroup) {
        currentGroup = sc.group;
        console.log(hdr(currentGroup));
    }
    process.stdout.write(`  ${C.blue}▶${C.reset} ${C.bold}${sc.name}${C.reset}\n`);
    console.log(info(sc.description));

    const t0 = Date.now();
    try {
        const r = await runMultiTurn(sc.prompt, { toolChoice: sc.toolChoice });

        let testPassed = true;
        const failReasons = [];

        // 检查期望
        if (sc.expect.minTools && r.totalToolCalls < sc.expect.minTools) {
            testPassed = false; failReasons.push(`工具调用 ${r.totalToolCalls} < ${sc.expect.minTools}`);
        }
        if (sc.expect.completed && !r.completed) {
            testPassed = false; failReasons.push('任务未完成（未调用 attempt_completion）');
        }
        if (sc.expect.firstTurnAction && r.firstTurnToolCount === 0) {
            testPassed = false; failReasons.push('第一轮无工具调用');
        }
        if (sc.expect.maxFirstTurnText && r.firstTurnTextLen > sc.expect.maxFirstTurnText) {
            failReasons.push(`首轮文本 ${r.firstTurnTextLen} > ${sc.expect.maxFirstTurnText} (警告)`);
        }
        if (sc.expect.firstTurnMinTools && r.firstTurnToolCount < sc.expect.firstTurnMinTools) {
            testPassed = false; failReasons.push(`首轮工具 ${r.firstTurnToolCount} < ${sc.expect.firstTurnMinTools}`);
        }
        if (sc.expect.formatCorrect !== undefined && sc.expect.formatCorrect && r.totalToolCalls === 0) {
            testPassed = false; failReasons.push('无工具调用（无法验证格式）');
        }

        console.log(info(`  工具: ${r.totalToolCalls}  轮数: ${r.turns}  文本: ${r.totalTextChars}chars  叙述: ${Math.round(r.narrationRatio * 100)}%  完成: ${r.completed ? '✅' : '❌'}`));
        console.log(info(`  链: ${r.toolPath}`));

        const ms = ((Date.now() - t0) / 1000).toFixed(1);
        if (testPassed && failReasons.length === 0) {
            console.log(`  ${ok('通过')} (${ms}s)`);
            passed++;
        } else if (testPassed) {
            console.log(`  ${ok('通过')} (${ms}s) — ${failReasons.join(', ')}`);
            passed++;
        } else {
            console.log(`  ${fail('未通过')} (${ms}s)`);
            failReasons.forEach(r2 => console.log(`    ${C.yellow}→ ${r2}${C.reset}`));
            failedCount++;
        }

        scenarioResults.push({ id: sc.id, name: sc.name, group: sc.group, passed: testPassed, failReasons, metrics: r });
    } catch (err) {
        console.log(`  ${fail('错误')}: ${err.message}`);
        failedCount++;
        scenarioResults.push({ id: sc.id, name: sc.name, group: sc.group, passed: false, failReasons: [err.message], metrics: null });
    }
}

const total = passed + failedCount;
console.log(`\n${'═'.repeat(62)}`);
console.log(`${C.bold}  [${VARIANT}] 结果: ${C.green}${passed} 通过${C.reset}${C.bold} / ${failedCount > 0 ? C.red : ''}${failedCount} 未通过${C.reset}${C.bold} / ${total} 场景${C.reset}`);
console.log('═'.repeat(62));

const fs = await import('fs');
const out = { variant: VARIANT, timestamp: new Date().toISOString(), model: MODEL, scenarios: scenarioResults, summary: { passed, failed: failedCount, total } };
const outFile = `test/prompt-ab2-results-${VARIANT}.json`;
fs.writeFileSync(outFile, JSON.stringify(out, null, 2));
console.log(`\n${info(`已保存: ${outFile}`)}`);
console.log(info('对比: node test/e2e-prompt-ab2.mjs --compare'));
console.log();