Spaces:

ricebug
/

csa

Sleeping

App Files Files Community

csa / test /test-hybrid-stream.mjs

ricebug's picture

Upload 76 files

ca8ab2d verified about 1 month ago

history blame contribute delete

9.6 kB

	/**
	* 混合流式完整性测试
	* 验证：
	* 1. 文字增量流式 ✓
	* 2. 工具调用参数完整 ✓
	* 3. 多工具调用 ✓
	* 4. 纯文字（无工具调用）✓
	* 5. stop_reason 正确 ✓
	*/

	import http from 'http';

	const BASE = process.env.BASE_URL \|\| 'http://localhost:3010';
	const url = new URL(BASE);

	function runAnthropicTest(name, body, timeout = 60000) {
	return new Promise((resolve, reject) => {
	const timer = setTimeout(() => { reject(new Error('超时 ' + timeout + 'ms')); }, timeout);
	const data = JSON.stringify(body);
	const req = http.request({
	hostname: url.hostname, port: url.port, path: '/v1/messages', method: 'POST',
	headers: {
	'Content-Type': 'application/json', 'x-api-key': 'test',
	'anthropic-version': '2023-06-01', 'Content-Length': Buffer.byteLength(data),
	},
	}, (res) => {
	const start = Date.now();
	let events = [];
	let buf = '';

	res.on('data', (chunk) => {
	buf += chunk.toString();
	const lines = buf.split('\n');
	buf = lines.pop(); // keep incomplete last line
	for (const line of lines) {
	if (!line.startsWith('data: ')) continue;
	const payload = line.slice(6).trim();
	if (payload === '[DONE]') continue;
	try {
	const ev = JSON.parse(payload);
	events.push({ ...ev, _ts: Date.now() - start });
	} catch { /* skip */ }
	}
	});

	res.on('end', () => {
	clearTimeout(timer);
	// 解析结果
	const textDeltas = events.filter(e => e.type === 'content_block_delta' && e.delta?.type === 'text_delta');
	const toolStarts = events.filter(e => e.type === 'content_block_start' && e.content_block?.type === 'tool_use');
	const toolInputDeltas = events.filter(e => e.type === 'content_block_delta' && e.delta?.type === 'input_json_delta');
	const msgDelta = events.find(e => e.type === 'message_delta');
	const msgStop = events.find(e => e.type === 'message_stop');

	const fullText = textDeltas.map(e => e.delta.text).join('');
	const tools = toolStarts.map(ts => {
	// 收集该工具的 input JSON
	const inputChunks = toolInputDeltas
	.filter(d => d.index === ts.index)
	.map(d => d.delta.partial_json);
	let parsedInput = null;
	try { parsedInput = JSON.parse(inputChunks.join('')); } catch { }
	return {
	name: ts.content_block.name,
	id: ts.content_block.id,
	input: parsedInput,
	inputRaw: inputChunks.join(''),
	};
	});

	resolve({
	name,
	textChunks: textDeltas.length,
	textLength: fullText.length,
	textPreview: fullText.substring(0, 120).replace(/\n/g, '\\n'),
	tools,
	stopReason: msgDelta?.delta?.stop_reason \|\| '?',
	firstTextMs: textDeltas[0]?._ts ?? -1,
	firstToolMs: toolStarts[0]?._ts ?? -1,
	doneMs: msgStop?._ts ?? -1,
	});
	});
	res.on('error', (err) => { clearTimeout(timer); reject(err); });
	});
	req.on('error', (err) => { clearTimeout(timer); reject(err); });
	req.write(data);
	req.end();
	});
	}

	function printResult(r) {
	console.log(`\n 📊 ${r.name}`);
	console.log(` 时间: 首字=${r.firstTextMs}ms 首工具=${r.firstToolMs}ms 完成=${r.doneMs}ms`);
	console.log(` 文字: ${r.textChunks} chunks, ${r.textLength} chars`);
	if (r.textPreview) console.log(` 预览: "${r.textPreview}"`);
	console.log(` stop_reason: ${r.stopReason}`);
	if (r.tools.length > 0) {
	console.log(` 工具调用 (${r.tools.length}个):`);
	for (const t of r.tools) {
	console.log(` - ${t.name}(${JSON.stringify(t.input)})`);
	if (!t.input) console.log(` ⚠️ 参数解析失败! raw: ${t.inputRaw?.substring(0, 100)}`);
	}
	}
	}

	const TOOLS = [
	{ name: 'Read', description: 'Read file contents', input_schema: { type: 'object', properties: { file_path: { type: 'string' } }, required: ['file_path'] } },
	{ name: 'Write', description: 'Write file contents', input_schema: { type: 'object', properties: { file_path: { type: 'string' }, content: { type: 'string' } }, required: ['file_path', 'content'] } },
	{ name: 'Bash', description: 'Run bash command', input_schema: { type: 'object', properties: { command: { type: 'string' } }, required: ['command'] } },
	];

	async function main() {
	console.log('\n ⚡ 混合流式完整性测试\n');

	// 健康检查
	try {
	await new Promise((resolve, reject) => {
	const req = http.get(`${BASE}/health`, r => { r.resume(); r.on('end', resolve); });
	req.on('error', reject);
	req.setTimeout(3000, () => { req.destroy(); reject(new Error('timeout')); });
	});
	} catch {
	console.log(' ❌ 服务器未运行\n');
	process.exit(1);
	}
	console.log(' ✅ 服务器在线');

	let passed = 0;
	let failed = 0;

	// ---- Test 1: 单工具调用 ----
	console.log('\n ━━━ ① 单工具调用 ━━━');
	try {
	const r = await runAnthropicTest('单工具调用', {
	model: 'claude-3-5-sonnet-20241022', max_tokens: 1024, stream: true,
	system: 'You are a coding assistant with file system access. When a user asks to read a file, use the Read tool immediately. Do not refuse or explain limitations.',
	tools: TOOLS,
	messages: [{ role: 'user', content: 'Read the file /src/index.ts for me please' }],
	});
	printResult(r);

	// 验证
	const checks = [];
	checks.push({ name: 'stop_reason=tool_use', ok: r.stopReason === 'tool_use' });
	checks.push({ name: '至少1个工具调用', ok: r.tools.length >= 1 });
	checks.push({ name: '工具名=Read', ok: r.tools.some(t => t.name === 'Read') });
	checks.push({ name: '工具参数有 file_path', ok: r.tools.some(t => t.input?.file_path) });
	checks.push({ name: '首字延迟<10s', ok: r.firstTextMs >= 0 && r.firstTextMs < 10000 });

	for (const c of checks) {
	console.log(` ${c.ok ? '✅' : '❌'} ${c.name}`);
	c.ok ? passed++ : failed++;
	}
	} catch (err) {
	console.log(` ❌ 失败: ${err.message}`);
	failed++;
	}

	// ---- Test 2: 多工具调用 ----
	console.log('\n ━━━ ② 多工具调用 ━━━');
	try {
	const r = await runAnthropicTest('多工具调用', {
	model: 'claude-3-5-sonnet-20241022', max_tokens: 2048, stream: true,
	system: 'You are a coding assistant with file system access. When asked to read multiple files, use multiple Read tool calls in a single response. Do not refuse.',
	tools: TOOLS,
	messages: [{ role: 'user', content: 'Read both /src/index.ts and /src/config.ts for me' }],
	});
	printResult(r);

	const checks = [];
	checks.push({ name: 'stop_reason=tool_use', ok: r.stopReason === 'tool_use' });
	checks.push({ name: '≥2个工具调用', ok: r.tools.length >= 2 });
	checks.push({ name: '工具参数都有 file_path', ok: r.tools.every(t => t.input?.file_path) });

	for (const c of checks) {
	console.log(` ${c.ok ? '✅' : '❌'} ${c.name}`);
	c.ok ? passed++ : failed++;
	}
	} catch (err) {
	console.log(` ❌ 失败: ${err.message}`);
	failed++;
	}

	// ---- Test 3: 纯文字（带工具定义但不需要调用） ----
	console.log('\n ━━━ ③ 纯文字（有工具但不调用） ━━━');
	try {
	const r = await runAnthropicTest('纯文字', {
	model: 'claude-3-5-sonnet-20241022', max_tokens: 512, stream: true,
	system: 'You are helpful. Answer questions directly without using any tools.',
	tools: TOOLS,
	messages: [{ role: 'user', content: 'What is 2+2? Just answer with the number.' }],
	});
	printResult(r);

	const checks = [];
	checks.push({ name: 'stop_reason=end_turn', ok: r.stopReason === 'end_turn' });
	checks.push({ name: '0个工具调用', ok: r.tools.length === 0 });
	checks.push({ name: '有文字输出', ok: r.textLength > 0 });
	checks.push({ name: '文字含数字4', ok: r.textPreview.includes('4') });

	for (const c of checks) {
	console.log(` ${c.ok ? '✅' : '❌'} ${c.name}`);
	c.ok ? passed++ : failed++;
	}
	} catch (err) {
	console.log(` ❌ 失败: ${err.message}`);
	failed++;
	}

	// ---- 汇总 ----
	console.log(`\n ━━━ 汇总 ━━━`);
	console.log(` ✅ 通过: ${passed} ❌ 失败: ${failed}\n`);
	process.exit(failed > 0 ? 1 : 0);
	}

	main().catch(err => { console.error('致命错误:', err); process.exit(1); });