csa / test /e2e-agentic.mjs
ricebug's picture
Upload 76 files
ca8ab2d verified
/**
* test/e2e-agentic.mjs
*
* ้ซ˜็บง็ซฏๅˆฐ็ซฏๆต‹่ฏ•๏ผšๆจกๆ‹Ÿ Claude Code ็œŸๅฎž Agentic ๅพช็Žฏ
*
* ็‰น็‚น๏ผš
* - ไฝฟ็”จไธŽ Claude Code ๅฎŒๅ…จไธ€่‡ด็š„ๅทฅๅ…ทๅฎšไน‰๏ผˆRead/Write/Bash/Glob/Grep/LS ็ญ‰๏ผ‰
* - ่‡ชๅŠจ้ฉฑๅŠจๅคš่ฝฎ tool_use โ†’ tool_result ๅพช็Žฏ๏ผŒ็›ดๅˆฐ end_turn
* - ้ชŒ่ฏๅคๆ‚ๅคšๆญฅไปปๅŠก๏ผˆๅˆ†ๆžไปฃ็  โ†’ ไฟฎๆ”น โ†’ ้ชŒ่ฏ๏ผ‰
*
* ่ฟ่กŒๆ–นๅผ๏ผš
* node test/e2e-agentic.mjs
* PORT=3010 node test/e2e-agentic.mjs
*/
const BASE_URL = `http://localhost:${process.env.PORT || 3010}`;
const MODEL = 'claude-sonnet-4-5-20251120'; // Claude Code ้ป˜่ฎคไฝฟ็”จ็š„ๆจกๅž‹
const MAX_TURNS = 12; // ๆœ€ๅคšๅ…่ฎธ 12 ่ฝฎๅทฅๅ…ท่ฐƒ็”จ๏ผŒ้˜ฒๆญขๆญปๅพช็Žฏ
// โ”€โ”€โ”€ ้ขœ่‰ฒ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
const C = {
reset: '\x1b[0m', bold: '\x1b[1m', dim: '\x1b[2m',
green: '\x1b[32m', red: '\x1b[31m', yellow: '\x1b[33m',
cyan: '\x1b[36m', blue: '\x1b[34m', magenta: '\x1b[35m', gray: '\x1b[90m',
};
const ok = s => `${C.green}โœ… ${s}${C.reset}`;
const fail = s => `${C.red}โŒ ${s}${C.reset}`;
const warn = s => `${C.yellow}โš  ${s}${C.reset}`;
const hdr = s => `\n${C.bold}${C.cyan}โ”โ”โ” ${s} โ”โ”โ”${C.reset}`;
const tool = s => ` ${C.magenta}๐Ÿ”ง ${s}${C.reset}`;
const info = s => ` ${C.gray}${s}${C.reset}`;
// โ”€โ”€โ”€ Claude Code ๅฎŒๆ•ดๅทฅๅ…ท้›†ๅฎšไน‰ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
const CLAUDE_CODE_TOOLS = [
{
name: 'Read',
description: 'Reads a file from the local filesystem. You can read a specific line range or the entire file. Always prefer reading specific sections rather than entire large files.',
input_schema: {
type: 'object',
properties: {
file_path: { type: 'string', description: 'The absolute path to the file to read' },
start_line: { type: 'integer', description: 'The line number to start reading from (1-indexed, optional)' },
end_line: { type: 'integer', description: 'The line number to stop reading at (1-indexed, inclusive, optional)' },
},
required: ['file_path'],
},
},
{
name: 'Write',
description: 'Write a file to the local filesystem. Overwrites the existing file if there is one.',
input_schema: {
type: 'object',
properties: {
file_path: { type: 'string', description: 'The absolute path to the file to write' },
content: { type: 'string', description: 'The content to write to the file' },
},
required: ['file_path', 'content'],
},
},
{
name: 'Edit',
description: 'This is a tool for editing files. For moving or renaming files, you should generally use the Bash tool with the `mv` command instead.',
input_schema: {
type: 'object',
properties: {
file_path: { type: 'string', description: 'The absolute path to the file to modify' },
old_string: { type: 'string', description: 'The text to replace.' },
new_string: { type: 'string', description: 'The edited text to replace the old_string.' },
replace_all: { type: 'boolean', description: 'Replace all occurrences (default: false)' },
},
required: ['file_path', 'old_string', 'new_string'],
},
},
{
name: 'Bash',
description: 'Executes a given bash command in a persistent shell session.',
input_schema: {
type: 'object',
properties: {
command: { type: 'string', description: 'The command to execute' },
timeout: { type: 'integer', description: 'Optional timeout in milliseconds (max 600000)' },
},
required: ['command'],
},
},
{
name: 'Glob',
description: 'Fast file pattern matching tool that works with any codebase size.',
input_schema: {
type: 'object',
properties: {
pattern: { type: 'string', description: 'The glob pattern to match files against (e.g. "**/*.ts")' },
path: { type: 'string', description: 'The directory to search in (optional, defaults to working directory)' },
},
required: ['pattern'],
},
},
{
name: 'Grep',
description: 'Fast content search tool that works with any codebase size.',
input_schema: {
type: 'object',
properties: {
pattern: { type: 'string', description: 'The regex pattern to search for' },
path: { type: 'string', description: 'The path to search in (file or directory)' },
include: { type: 'string', description: 'Glob pattern for files to include (e.g. "*.ts")' },
case_sensitive: { type: 'boolean', description: 'Whether the search is case-sensitive (default: false)' },
},
required: ['pattern'],
},
},
{
name: 'LS',
description: 'Lists files and directories in a given path.',
input_schema: {
type: 'object',
properties: {
path: { type: 'string', description: 'The directory path to list' },
ignore: { type: 'array', items: { type: 'string' }, description: 'List of glob patterns to ignore' },
},
required: ['path'],
},
},
{
name: 'TodoRead',
description: 'Read the current todo list for the session.',
input_schema: { type: 'object', properties: {} },
},
{
name: 'TodoWrite',
description: 'Create and manage a todo list for tracking tasks.',
input_schema: {
type: 'object',
properties: {
todos: {
type: 'array',
items: {
type: 'object',
properties: {
id: { type: 'string' },
content: { type: 'string' },
status: { type: 'string', enum: ['pending', 'in_progress', 'completed'] },
priority: { type: 'string', enum: ['high', 'medium', 'low'] },
},
required: ['id', 'content', 'status', 'priority'],
},
},
},
required: ['todos'],
},
},
{
name: 'WebFetch',
description: 'Fetch content from a URL and return the text content.',
input_schema: {
type: 'object',
properties: {
url: { type: 'string', description: 'The URL to fetch' },
prompt: { type: 'string', description: 'What specific information to extract from the page' },
},
required: ['url', 'prompt'],
},
},
{
name: 'attempt_completion',
description: 'Once you have completed the task, use this tool to present the result to the user. Provide a final summary of what you did.',
input_schema: {
type: 'object',
properties: {
result: { type: 'string', description: 'The result of the task' },
command: { type: 'string', description: 'Optional command to demonstrate the result' },
},
required: ['result'],
},
},
{
name: 'ask_followup_question',
description: 'Ask the user a follow-up question to clarify requirements.',
input_schema: {
type: 'object',
properties: {
question: { type: 'string', description: 'The question to ask' },
options: { type: 'array', items: { type: 'string' }, description: 'Optional list of choices' },
},
required: ['question'],
},
},
];
// โ”€โ”€โ”€ ่™šๆ‹Ÿๆ–‡ไปถ็ณป็ปŸ๏ผˆๆจกๆ‹Ÿ้กน็›ฎ็ป“ๆž„๏ผ‰โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
const VIRTUAL_FS = {
'/project/package.json': JSON.stringify({
name: 'my-app',
version: '1.0.0',
scripts: { test: 'jest', build: 'tsc', dev: 'ts-node src/index.ts' },
dependencies: { express: '^4.18.0', uuid: '^9.0.0' },
devDependencies: { typescript: '^5.0.0', jest: '^29.0.0' },
}, null, 2),
'/project/src/index.ts': `import express from 'express';
import { router } from './routes/api';
const app = express();
app.use(express.json());
app.use('/api', router);
const PORT = process.env.PORT || 3000;
app.listen(PORT, () => console.log(\`Server running on port \${PORT}\`));
export default app;
`,
'/project/src/routes/api.ts': `import { Router } from 'express';
import { UserController } from '../controllers/user';
export const router = Router();
const ctrl = new UserController();
router.get('/users', ctrl.list);
router.get('/users/:id', ctrl.get);
router.post('/users', ctrl.create);
// BUG: missing delete route
`,
'/project/src/controllers/user.ts': `import { Request, Response } from 'express';
export class UserController {
private users: Array<{id: string, name: string, email: string}> = [];
list = (req: Request, res: Response) => {
res.json(this.users);
}
get = (req: Request, res: Response) => {
const user = this.users.find(u => u.id === req.params.id);
if (!user) return res.status(404).json({ error: 'User not found' });
res.json(user);
}
create = (req: Request, res: Response) => {
// BUG: no validation on input fields
const user = { id: Date.now().toString(), ...req.body };
this.users.push(user);
res.status(201).json(user);
}
// Missing: delete method
}
`,
'/project/src/models/user.ts': `export interface User {
id: string;
name: string;
email: string;
createdAt: Date;
}
export interface CreateUserDto {
name: string;
email: string;
}
`,
'/project/tests/user.test.ts': `import { UserController } from '../src/controllers/user';
describe('UserController', () => {
it('should create a user', () => {
// TODO: implement
});
it('should list users', () => {
// TODO: implement
});
});
`,
};
// โ”€โ”€โ”€ ่™šๆ‹Ÿ todo ๅญ˜ๅ‚จ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
let virtualTodos = [];
// โ”€โ”€โ”€ ๅทฅๅ…ทๆ‰ง่กŒๅ™จ๏ผˆๆจกๆ‹Ÿ็œŸๅฎž Claude Code ๅทฅๅ…ทๆ‰ง่กŒ็ป“ๆžœ๏ผ‰โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
function executeTool(name, input) {
switch (name) {
case 'LS': {
const path = input.path || '/project';
const allPaths = Object.keys(VIRTUAL_FS);
const files = allPaths
.filter(p => p.startsWith(path))
.map(p => p.replace(path, '').replace(/^\//, ''))
.filter(p => p.length > 0);
return files.length > 0
? files.join('\n')
: `Directory listing of ${path}:\n(empty)`;
}
case 'Glob': {
const pattern = input.pattern.replace(/\*\*/g, '').replace(/\*/g, '');
const ext = pattern.replace(/^\./, '');
const matches = Object.keys(VIRTUAL_FS).filter(p =>
p.endsWith(ext) || p.includes(pattern.replace('*.', '.'))
);
return matches.length > 0
? matches.join('\n')
: `No files matching ${input.pattern}`;
}
case 'Grep': {
const results = [];
for (const [fp, content] of Object.entries(VIRTUAL_FS)) {
const lines = content.split('\n');
lines.forEach((line, i) => {
if (line.toLowerCase().includes(input.pattern.toLowerCase())) {
results.push(`${fp}:${i + 1}:${line.trim()}`);
}
});
}
return results.length > 0
? results.join('\n')
: `No matches for "${input.pattern}"`;
}
case 'Read': {
const content = VIRTUAL_FS[input.file_path];
if (!content) return `Error: File not found: ${input.file_path}`;
if (input.start_line || input.end_line) {
const lines = content.split('\n');
const start = (input.start_line || 1) - 1;
const end = input.end_line || lines.length;
return lines.slice(start, end).join('\n');
}
return content;
}
case 'Write': {
VIRTUAL_FS[input.file_path] = input.content;
return `Successfully wrote ${input.content.length} characters to ${input.file_path}`;
}
case 'Edit': {
const content = VIRTUAL_FS[input.file_path];
if (!content) return `Error: File not found: ${input.file_path}`;
if (!content.includes(input.old_string)) {
return `Error: old_string not found in ${input.file_path}`;
}
const newContent = input.replace_all
? content.replaceAll(input.old_string, input.new_string)
: content.replace(input.old_string, input.new_string);
VIRTUAL_FS[input.file_path] = newContent;
return `Successfully edited ${input.file_path}`;
}
case 'Bash': {
const cmd = input.command;
// ๆจกๆ‹Ÿๅธธ่งๅ‘ฝไปค่พ“ๅ‡บ
if (cmd.includes('ls') || cmd.includes('find')) {
return Object.keys(VIRTUAL_FS).join('\n');
}
if (cmd.includes('cat ')) {
const path = cmd.split('cat ')[1]?.trim();
return VIRTUAL_FS[path] || `cat: ${path}: No such file or directory`;
}
if (cmd.includes('grep')) {
return executeTool('Grep', { pattern: cmd.split('"')[1] || cmd.split("'")[1] || 'todo', path: '/project' });
}
if (cmd.includes('npm test') || cmd.includes('jest')) {
return `PASS tests/user.test.ts\n UserController\n โœ“ should create a user (pending)\n โœ“ should list users (pending)\n\nTest Suites: 1 passed, 1 total`;
}
if (cmd.includes('tsc') || cmd.includes('build')) {
return `src/routes/api.ts compiled successfully\nNo errors found`;
}
return `$ ${cmd}\n(command executed successfully)`;
}
case 'TodoRead': {
if (virtualTodos.length === 0) return 'No todos yet.';
return JSON.stringify(virtualTodos, null, 2);
}
case 'TodoWrite': {
virtualTodos = input.todos;
return `Todo list updated with ${input.todos.length} items`;
}
case 'WebFetch':
return `[Fetched ${input.url}]\n\nThis is simulated web content. The page contains documentation about the requested topic: ${input.prompt}`;
case 'attempt_completion':
return `__TASK_COMPLETE__:${input.result}`;
case 'ask_followup_question':
return `__ASK__:${input.question}`;
default:
return `Tool ${name} executed with input: ${JSON.stringify(input)}`;
}
}
// โ”€โ”€โ”€ Agentic ๅพช็Žฏ้ฉฑๅŠจๅ™จ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
async function runAgentLoop(userMessage, { label = '', verbose = false, extraTools, toolChoice } = {}) {
const messages = [{ role: 'user', content: userMessage }];
// ๆ›ดๅผบ็š„ system prompt๏ผšๆ˜Ž็กฎ่ฆๆฑ‚ tool-first๏ผŒ็ฆๆญขไธ่ฐƒๅทฅๅ…ทๅฐฑๅ›ž็ญ”
const systemPrompt = [
'You are an AI coding assistant with full file system access.',
'CRITICAL RULES:',
'1. You MUST use tools to read files before discussing their content. Never guess file contents.',
'2. You MUST use Write or Edit tools to actually modify files. Never just show code in text.',
'3. You MUST use Bash to run commands. Never pretend to run them.',
'4. Always use LS or Glob first to discover files if you are not sure about paths.',
'5. Use attempt_completion when the task is fully done.',
'6. Working directory is /project. All files are accessible via the Read tool.',
].join('\n');
let turnCount = 0;
const toolCallLog = [];
let finalResult = null;
while (turnCount < MAX_TURNS) {
turnCount++;
// ๅ‘้€่ฏทๆฑ‚
const resp = await fetch(`${BASE_URL}/v1/messages`, {
method: 'POST',
headers: { 'Content-Type': 'application/json', 'x-api-key': 'dummy' },
body: JSON.stringify({
model: MODEL,
max_tokens: 8096,
system: systemPrompt,
tools: extraTools ? CLAUDE_CODE_TOOLS.filter(t => extraTools.includes(t.name)) : CLAUDE_CODE_TOOLS,
...(toolChoice ? { tool_choice: toolChoice } : {}),
messages,
}),
});
if (!resp.ok) {
const text = await resp.text();
throw new Error(`HTTP ${resp.status}: ${text.substring(0, 200)}`);
}
const data = await resp.json();
if (verbose) {
const textBlock = data.content?.find(b => b.type === 'text');
if (textBlock?.text) {
console.log(info(` [Turn ${turnCount}] ๆจกๅž‹ๆ–‡ๆœฌ: "${textBlock.text.substring(0, 100)}..."`));
}
}
// ๆ”ถ้›†ๆœฌ่ฝฎๅทฅๅ…ท่ฐƒ็”จ
const toolUseBlocks = data.content?.filter(b => b.type === 'tool_use') || [];
if (data.stop_reason === 'end_turn' || toolUseBlocks.length === 0) {
// ไปปๅŠก่‡ช็„ถ็ป“ๆŸ
const textBlock = data.content?.find(b => b.type === 'text');
finalResult = textBlock?.text || '(no text response)';
break;
}
// ่ฎฐๅฝ•ๅทฅๅ…ท่ฐƒ็”จ
for (const tb of toolUseBlocks) {
toolCallLog.push({ turn: turnCount, tool: tb.name, input: tb.input });
if (verbose) {
console.log(tool(`[Turn ${turnCount}] ${tb.name}(${JSON.stringify(tb.input).substring(0, 80)})`));
} else {
process.stdout.write(`${C.magenta}โ†’${tb.name}${C.reset} `);
}
}
// ๆŠŠ assistant ็š„ๅ“ๅบ”ๅŠ ๅ…ฅๅކๅฒ
messages.push({ role: 'assistant', content: data.content });
// ๆ‰ง่กŒๅทฅๅ…ทๅนถๆ”ถ้›†็ป“ๆžœ
const toolResults = [];
for (const tb of toolUseBlocks) {
const result = executeTool(tb.name, tb.input);
// ๆฃ€ๆŸฅไปปๅŠกๅฎŒๆˆไฟกๅท
if (typeof result === 'string' && result.startsWith('__TASK_COMPLETE__:')) {
finalResult = result.replace('__TASK_COMPLETE__:', '');
toolCallLog.push({ turn: turnCount, tool: '__DONE__', result: finalResult });
}
toolResults.push({
type: 'tool_result',
tool_use_id: tb.id,
content: typeof result === 'string' ? result : JSON.stringify(result),
});
}
// ๆŠŠๅทฅๅ…ท็ป“ๆžœๅŠ ๅ…ฅๅކๅฒ
messages.push({ role: 'user', content: toolResults });
// ๅฆ‚ๆžœๆœ‰ๅฎŒๆˆไฟกๅทๅฐฑ้€€ๅ‡บๅพช็Žฏ
if (finalResult !== null && toolCallLog.some(t => t.tool === '__DONE__')) break;
}
if (!verbose) process.stdout.write('\n');
return { toolCallLog, finalResult, turns: turnCount };
}
// โ”€โ”€โ”€ ๆต‹่ฏ•ๆก†ๆžถ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
let passed = 0, failed = 0;
const allResults = [];
async function test(name, fn) {
const t0 = Date.now();
process.stdout.write(`\n ${C.blue}โ–ถ${C.reset} ${C.bold}${name}${C.reset}\n`);
try {
const result = await fn();
const ms = ((Date.now() - t0) / 1000).toFixed(1);
console.log(` ${ok('้€š่ฟ‡')} (${ms}s, ${result?.turns || '?'} ่ฝฎๅทฅๅ…ท่ฐƒ็”จ)`);
if (result?.toolCallLog) {
const summary = result.toolCallLog
.filter(t => t.tool !== '__DONE__')
.map(t => `${t.turn}:${t.tool}`)
.join(' โ†’ ');
console.log(info(` ่ทฏๅพ„: ${summary}`));
}
if (result?.finalResult) {
console.log(info(` ็ป“ๆžœ: "${String(result.finalResult).substring(0, 120)}..."`));
}
passed++;
allResults.push({ name, ok: true });
} catch (e) {
const ms = ((Date.now() - t0) / 1000).toFixed(1);
console.log(` ${fail('ๅคฑ่ดฅ')} (${ms}s)`);
console.log(` ${C.red}${e.message}${C.reset}`);
failed++;
allResults.push({ name, ok: false, error: e.message });
}
}
// โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
// ๆฃ€ๆต‹ๆœๅŠกๅ™จ
// โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
console.log(`\n${C.bold}${C.magenta} Cursor2API โ€” Claude Code Agentic ๅŽ‹ๆต‹${C.reset}`);
console.log(info(` BASE_URL=${BASE_URL} MODEL=${MODEL} MAX_TURNS=${MAX_TURNS}`));
try {
const r = await fetch(`${BASE_URL}/v1/models`, { headers: { 'x-api-key': 'dummy' } });
if (!r.ok) throw new Error();
console.log(`\n${ok('ๆœๅŠกๅ™จๅœจ็บฟ')}`);
} catch {
console.log(`\n${fail('ๆœๅŠกๅ™จๆœช่ฟ่กŒ๏ผŒ่ฏทๅ…ˆ npm run dev')}\n`);
process.exit(1);
}
// โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
// ๅœบๆ™ฏ 1๏ผš้กน็›ฎ็ป“ๆž„ๆŽข็ดข๏ผˆLS โ†’ Glob โ†’ Read๏ผ‰
// โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
console.log(hdr('ๅœบๆ™ฏ 1๏ผš้กน็›ฎ็ป“ๆž„ๆŽข็ดข'));
await test('ๆŽข็ดข้กน็›ฎ็ป“ๆž„ๅนถๆ€ป็ป“', async () => {
const result = await runAgentLoop(
`Use the LS tool on /project to list all files. Then use Glob with pattern "**/*.ts" to find TypeScript files. Read at least one of the source files. Finally summarize what the project does.`,
{ label: 'ๆŽข็ดข' }
);
const { toolCallLog } = result;
const usedExplore = toolCallLog.some(t => ['LS', 'Glob', 'Read'].includes(t.tool));
if (!usedExplore) throw new Error(`ๆœชไฝฟ็”จไปปไฝ•ๆŽข็ดขๅทฅๅ…ทใ€‚ๅฎž้™…่ฐƒ็”จ: ${toolCallLog.map(t => t.tool).join(', ')}`);
return result;
});
// โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
// ๅœบๆ™ฏ 2๏ผšไปฃ็ ๅฎกๆŸฅ๏ผˆRead โ†’ Grep โ†’ ่พ“ๅ‡บ้—ฎ้ข˜ๅˆ—่กจ๏ผ‰
// โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
console.log(hdr('ๅœบๆ™ฏ 2๏ผšไปฃ็ ๅฎกๆŸฅไธŽ Bug ๅ‘็Žฐ'));
await test('ๅฎกๆŸฅ UserController ๅนถๆ‰พๅˆฐ Bug', async () => {
const result = await runAgentLoop(
`Use the Read tool to read these two files:
1. /project/src/controllers/user.ts
2. /project/src/routes/api.ts
After reading both files, list all bugs, missing features, and security issues you find.`,
{ label: 'ๅฎกๆŸฅ' }
);
const { toolCallLog, finalResult } = result;
const readPaths = toolCallLog.filter(t => t.tool === 'Read').map(t => t.input.file_path || '');
if (readPaths.length === 0) throw new Error('ๆœช่ฏปๅ–ไปปไฝ•ๆ–‡ไปถ');
const mentionsBug = finalResult && (
finalResult.toLowerCase().includes('bug') ||
finalResult.toLowerCase().includes('missing') ||
finalResult.toLowerCase().includes('delete') ||
finalResult.toLowerCase().includes('valid')
);
if (!mentionsBug) throw new Error(`็ป“ๆžœๆœชๆๅŠๅทฒ็Ÿฅ Bug: "${finalResult?.substring(0, 200)}"`);
return result;
});
// โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
// ๅœบๆ™ฏ 3๏ผšTodoWrite ไปปๅŠก่ง„ๅˆ’ โ†’ ๆ‰ง่กŒๅคšๆญฅไปปๅŠก
// โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
console.log(hdr('ๅœบๆ™ฏ 3๏ผšไปปๅŠก่ง„ๅˆ’ + ๅคšๆญฅๆ‰ง่กŒ'));
await test('็”จ Todo ่ง„ๅˆ’ๅนถไฟฎๅค็ผบๅคฑ็š„ delete ่ทฏ็”ฑ', async () => {
virtualTodos = [];
const result = await runAgentLoop(
`Task: add DELETE /users/:id route to the Express app.
Steps you MUST follow using tools:
1. Call TodoWrite with 3 todos: "Read controller", "Add delete method", "Add delete route"
2. Call Read on /project/src/controllers/user.ts
3. Call Read on /project/src/routes/api.ts
4. Call Write on /project/src/controllers/user.ts with the full updated content (add delete method)
5. Call Write on /project/src/routes/api.ts with the full updated content (add DELETE route)
6. Call TodoWrite again marking all todos completed`,
{ label: 'ไฟฎๅค', toolChoice: { type: 'any' } } // โ† tool_choice=any ๅผบๅˆถๅทฅๅ…ท่ฐƒ็”จ
);
const { toolCallLog } = result;
const usedTodo = toolCallLog.some(t => t.tool === 'TodoWrite');
if (!usedTodo) console.log(warn(' ๆœชไฝฟ็”จ TodoWrite'));
const usedRead = toolCallLog.some(t => t.tool === 'Read');
if (!usedRead) throw new Error('ๆœช่ฏปๅ–ไปปไฝ•ๆ–‡ไปถ');
const usedWrite = toolCallLog.some(t => ['Write', 'Edit'].includes(t.tool));
if (!usedWrite) throw new Error('ๆœชๅ†™ๅ…ฅไปปไฝ•ๆ–‡ไปถ๏ผˆไฟฎๅคๆœชๅฎŒๆˆ๏ผ‰');
const controllerContent = VIRTUAL_FS['/project/src/controllers/user.ts'] || '';
const routeContent = VIRTUAL_FS['/project/src/routes/api.ts'] || '';
const controllerFixed = controllerContent.includes('delete') || controllerContent.includes('Delete');
const routeFixed = routeContent.includes('delete') || routeContent.includes('DELETE');
console.log(info(` Controller ๅทฒไฟฎๅค: ${controllerFixed ? 'โœ…' : 'โŒ'}`));
console.log(info(` Routes ๅทฒไฟฎๅค: ${routeFixed ? 'โœ…' : 'โŒ'}`));
if (!controllerFixed && !routeFixed) throw new Error('่™šๆ‹Ÿๆ–‡ไปถ็ณป็ปŸๆœช่ขซไฟฎๆ”น');
return result;
});
// โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
// ๅœบๆ™ฏ 4๏ผšGrep ๆœ็ดข + ๆ‰น้‡ไฟฎๆ”น๏ผˆๅคšๅทฅๅ…ทๅ่ฐƒ๏ผ‰
// โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
console.log(hdr('ๅœบๆ™ฏ 4๏ผšGrep ๆœ็ดข + ๆ‰น้‡ไฟฎๆ”น'));
await test('ๆœ็ดขๆ‰€ๆœ‰ TODO ๆณจ้‡Šๅนถๅกซๅ†™ๆต‹่ฏ•ๅฎž็Žฐ', async () => {
const result = await runAgentLoop(
`You MUST use tools in this exact order:
1. Call Grep with pattern "TODO" and path "/project/tests" โ€” this shows you line numbers only, NOT the full file
2. Call Read on /project/tests/user.test.ts โ€” you NEED this to see the full file content before editing
3. Call Write on /project/tests/user.test.ts โ€” write the complete updated file with the two TODO test cases implemented using real assertions`,
{ label: 'grep+edit', toolChoice: { type: 'any' } }
);
const { toolCallLog } = result;
const usedGrep = toolCallLog.some(t => t.tool === 'Grep');
const usedRead = toolCallLog.some(t => t.tool === 'Read');
const usedWrite = toolCallLog.some(t => ['Write', 'Edit'].includes(t.tool));
console.log(info(` Grep: ${usedGrep ? 'โœ…' : 'โŒ'} Read: ${usedRead ? 'โœ…' : 'โš (ๅฏ้€‰)'} Write: ${usedWrite ? 'โœ…' : 'โŒ'}`));
if (!usedWrite) throw new Error('ๆœชไฟฎๆ”นๆต‹่ฏ•ๆ–‡ไปถ');
if (!usedGrep && !usedRead) throw new Error('ๆœชๆœ็ดขๆˆ–่ฏปๅ–ไปปไฝ•ๆ–‡ไปถ');
const testContent = VIRTUAL_FS['/project/tests/user.test.ts'] || '';
const hasImpl = testContent.includes('expect') || testContent.includes('assert') ||
testContent.includes('toEqual') || testContent.includes('toBe');
console.log(info(` ๆต‹่ฏ•ๅฎž็Žฐๅทฒๅ†™ๅ…ฅ: ${hasImpl ? 'โœ…' : 'โŒ'}`));
if (!hasImpl) throw new Error('ๆต‹่ฏ•ๆ–‡ไปถๆœชๅŒ…ๅซ็œŸๆญฃ็š„ๆ–ญ่จ€ๅฎž็Žฐ');
return result;
});
// โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
// ๅœบๆ™ฏ 5๏ผšBash ๅทฅๅ…ท่ฐƒ็”จ๏ผˆ่ท‘ๆต‹่ฏ•/ๆž„ๅปบ๏ผ‰
// โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
console.log(hdr('ๅœบๆ™ฏ 5๏ผšBash ๆ‰ง่กŒ + ๅ“ๅบ”็ป“ๆžœ'));
await test('่ท‘ๆž„ๅปบๅนถๆฃ€ๆŸฅ่พ“ๅ‡บ', async () => {
const result = await runAgentLoop(
`Use the Bash tool to run these commands one at a time:
1. Bash: {"command": "cd /project && npm run build"}
2. Bash: {"command": "cd /project && npm test"}
Report what each command outputs.`,
{ label: 'bash' }
);
const { toolCallLog } = result;
const usedBash = toolCallLog.some(t => t.tool === 'Bash');
if (!usedBash) throw new Error('ๆœชไฝฟ็”จ Bash ๅทฅๅ…ท');
return result;
});
// โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
// ๅœบๆ™ฏ 6๏ผšattempt_completion ๆญฃ็กฎ้€€ๅ‡บ
// โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
console.log(hdr('ๅœบๆ™ฏ 6๏ผšattempt_completion ๅฎŒๆˆไฟกๅท'));
await test('ไปปๅŠกๅฎŒๆˆๆ—ถไฝฟ็”จ attempt_completion', async () => {
const result = await runAgentLoop(
`Use the Read tool to read /project/package.json. Then call attempt_completion with a summary of: project name, version, and all dependencies listed.`,
{ label: 'completion', toolChoice: { type: 'any' } } // โ† tool_choice=any ๅผบๅˆถๅทฅๅ…ท่ฐƒ็”จ
);
const { toolCallLog } = result;
const usedRead = toolCallLog.some(t => t.tool === 'Read');
if (!usedRead) throw new Error('ๆœช่ฏปๅ– package.json');
const usedCompletion = toolCallLog.some(t => t.tool === 'attempt_completion');
if (!usedCompletion) {
if (!result.finalResult) throw new Error('ๆœชไฝฟ็”จ attempt_completion๏ผŒไนŸๆฒกๆœ‰ๆœ€็ปˆๆ–‡ๆœฌ');
console.log(warn(' ๆจกๅž‹ๆœชไฝฟ็”จ attempt_completion๏ผŒไฝ†ๆœ‰ๆœ€็ปˆๆ–‡ๆœฌ๏ผˆๅฏๆŽฅๅ—๏ผ‰'));
}
return result;
});
// โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
// ๅœบๆ™ฏ 7๏ผš้•ฟ้“พๅคš่ฝฎ Agentic๏ผˆRead โ†’ Grep โ†’ Edit โ†’ Bash โ†’ ๅฎŒๆˆ๏ผ‰
// โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
console.log(hdr('ๅœบๆ™ฏ 7๏ผšๅฎŒๆ•ด Agentic ้“พ๏ผˆโ‰ฅ4่ฝฎ๏ผ‰'));
await test('ๅฎŒๆ•ด้‡ๆž„ไปปๅŠก๏ผšๅขžๅŠ ่พ“ๅ…ฅ้ชŒ่ฏ', async () => {
// ้‡็ฝฎ่™šๆ‹Ÿ FS ไธญ controller ๅˆฐๅŽŸๅง‹็Šถๆ€
VIRTUAL_FS['/project/src/controllers/user.ts'] = `import { Request, Response } from 'express';
export class UserController {
private users: Array<{id: string, name: string, email: string}> = [];
list = (req: Request, res: Response) => {
res.json(this.users);
}
get = (req: Request, res: Response) => {
const user = this.users.find(u => u.id === req.params.id);
if (!user) return res.status(404).json({ error: 'User not found' });
res.json(user);
}
create = (req: Request, res: Response) => {
// BUG: no validation on input fields
const user = { id: Date.now().toString(), ...req.body };
this.users.push(user);
res.status(201).json(user);
}
}
`;
const result = await runAgentLoop(
`The create method in /project/src/controllers/user.ts has a security bug: it has no input validation.
Please:
1. Read the user model at /project/src/models/user.ts to understand the schema
2. Read the controller file
3. Add proper validation (check name and email are present and valid)
4. Use Grep to verify no other files need the same fix
5. Run a quick test with Bash to confirm nothing is broken
6. Call attempt_completion when done`,
{ label: '้‡ๆž„', verbose: false }
);
const { toolCallLog, turns } = result;
if (turns < 3) throw new Error(`ๆœŸๆœ›่‡ณๅฐ‘ 3 ่ฝฎ่ฐƒ็”จ๏ผŒๅฎž้™… ${turns} ่ฝฎ`);
const usedTools = [...new Set(toolCallLog.map(t => t.tool))];
console.log(info(` ไฝฟ็”จ็š„ๅทฅๅ…ท้›†: ${usedTools.join(', ')}`));
// ้ชŒ่ฏ Read ไบ†ๆจกๅž‹ๅ’Œ Controller
const readFiles = toolCallLog.filter(t => t.tool === 'Read').map(t => t.input.file_path);
console.log(info(` ่ฏปๅ–็š„ๆ–‡ไปถ: ${readFiles.join(', ')}`));
// ้ชŒ่ฏไฟฎๆ”นไบ†ๆ–‡ไปถ
const modified = toolCallLog.some(t => ['Write', 'Edit'].includes(t.tool));
if (!modified) throw new Error('ๆœชไฟฎๆ”นไปปไฝ•ๆ–‡ไปถ');
// ๆฃ€ๆŸฅ controller ๆ˜ฏๅฆ็œŸ็š„่ขซไฟฎๆ”นไบ†
const ctrl = VIRTUAL_FS['/project/src/controllers/user.ts'];
const hasValidation = ctrl.includes('valid') || ctrl.includes('400') || ctrl.includes('required') || ctrl.includes('!req.body');
console.log(info(` ้ชŒ่ฏ้€ป่พ‘ๅทฒๆทปๅŠ : ${hasValidation ? 'โœ…' : 'โŒ๏ผˆๆจกๅž‹ๅฏ่ƒฝๆœ‰ไธๅŒๅฎž็Žฐๆ–นๅผ๏ผ‰'}`));
return result;
});
// โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
// ๆฑ‡ๆ€ป
// โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
const total = passed + failed;
console.log(`\n${'โ•'.repeat(62)}`);
console.log(`${C.bold} Agentic ๅŽ‹ๆต‹็ป“ๆžœ: ${C.green}${passed} ้€š่ฟ‡${C.reset}${C.bold} / ${failed > 0 ? C.red : ''}${failed} ๅคฑ่ดฅ${C.reset}${C.bold} / ${total} ๅœบๆ™ฏ${C.reset}`);
console.log('โ•'.repeat(62) + '\n');
if (failed > 0) {
console.log(`${C.red}ๅคฑ่ดฅ็š„ๅœบๆ™ฏ:${C.reset}`);
allResults.filter(r => !r.ok).forEach(r => {
console.log(` - ${r.name}`);
console.log(` ${r.error}`);
});
console.log();
process.exit(1);
}