#!/usr/bin/env python3 """ 下载并转换 MT-Bench 数据到本地目录 """ import json import os import requests # 目标目录 DATA_DIR = "/workspace/hanrui/datasets/mtbench" os.makedirs(DATA_DIR, exist_ok=True) # 下载 MT-Bench 问题数据 url = "https://raw.githubusercontent.com/lm-sys/FastChat/main/fastchat/llm_judge/data/mt_bench/question.jsonl" output_file = os.path.join(DATA_DIR, "question.jsonl") print(f"Downloading MT-Bench questions from {url}") print(f"Saving to {output_file}") try: # 使用代理下载 proxies = { 'http': 'http://10.1.2.1:7890', 'https': 'http://10.1.2.1:7890', } response = requests.get(url, proxies=proxies, timeout=30) response.raise_for_status() with open(output_file, 'wb') as f: f.write(response.content) print(f"✓ Downloaded successfully") # 验证数据 with open(output_file, 'r') as f: lines = f.readlines() print(f"✓ Total questions: {len(lines)}") # 显示第一个问题 first_question = json.loads(lines[0]) print(f"\nFirst question:") print(json.dumps(first_question, indent=2)) except Exception as e: print(f"✗ Download failed: {e}") print(f"\nPlease manually download from:") print(f" {url}") print(f"And save to:") print(f" {output_file}")