Hanrui / SpecForge-ext /download_mtbench_data.py
Lekr0's picture
Add files using upload-large-folder tool
7c50656 verified
#!/usr/bin/env python3
"""
下载并转换 MT-Bench 数据到本地目录
"""
import json
import os
import requests
# 目标目录
DATA_DIR = "/workspace/hanrui/datasets/mtbench"
os.makedirs(DATA_DIR, exist_ok=True)
# 下载 MT-Bench 问题数据
url = "https://raw.githubusercontent.com/lm-sys/FastChat/main/fastchat/llm_judge/data/mt_bench/question.jsonl"
output_file = os.path.join(DATA_DIR, "question.jsonl")
print(f"Downloading MT-Bench questions from {url}")
print(f"Saving to {output_file}")
try:
# 使用代理下载
proxies = {
'http': 'http://10.1.2.1:7890',
'https': 'http://10.1.2.1:7890',
}
response = requests.get(url, proxies=proxies, timeout=30)
response.raise_for_status()
with open(output_file, 'wb') as f:
f.write(response.content)
print(f"✓ Downloaded successfully")
# 验证数据
with open(output_file, 'r') as f:
lines = f.readlines()
print(f"✓ Total questions: {len(lines)}")
# 显示第一个问题
first_question = json.loads(lines[0])
print(f"\nFirst question:")
print(json.dumps(first_question, indent=2))
except Exception as e:
print(f"✗ Download failed: {e}")
print(f"\nPlease manually download from:")
print(f" {url}")
print(f"And save to:")
print(f" {output_file}")