FasterDFlash
/

Hanrui

Model card Files Files and versions

Hanrui / SpecForge-ext /download_mtbench_data.py

Lekr0's picture

Add files using upload-large-folder tool

7c50656 verified 5 days ago

history blame contribute delete

1.33 kB

	#!/usr/bin/env python3
	"""
	下载并转换 MT-Bench 数据到本地目录
	"""
	import json
	import os
	import requests

	# 目标目录
	DATA_DIR = "/workspace/hanrui/datasets/mtbench"
	os.makedirs(DATA_DIR, exist_ok=True)

	# 下载 MT-Bench 问题数据
	url = "https://raw.githubusercontent.com/lm-sys/FastChat/main/fastchat/llm_judge/data/mt_bench/question.jsonl"
	output_file = os.path.join(DATA_DIR, "question.jsonl")

	print(f"Downloading MT-Bench questions from {url}")
	print(f"Saving to {output_file}")

	try:
	# 使用代理下载
	proxies = {
	'http': 'http://10.1.2.1:7890',
	'https': 'http://10.1.2.1:7890',
	}

	response = requests.get(url, proxies=proxies, timeout=30)
	response.raise_for_status()

	with open(output_file, 'wb') as f:
	f.write(response.content)

	print(f"✓ Downloaded successfully")

	# 验证数据
	with open(output_file, 'r') as f:
	lines = f.readlines()

	print(f"✓ Total questions: {len(lines)}")

	# 显示第一个问题
	first_question = json.loads(lines[0])
	print(f"\nFirst question:")
	print(json.dumps(first_question, indent=2))

	except Exception as e:
	print(f"✗ Download failed: {e}")
	print(f"\nPlease manually download from:")
	print(f" {url}")
	print(f"And save to:")
	print(f" {output_file}")