| |
| """ |
| 下载并转换 MT-Bench 数据到本地目录 |
| """ |
| import json |
| import os |
| import requests |
|
|
| |
| DATA_DIR = "/workspace/hanrui/datasets/mtbench" |
| os.makedirs(DATA_DIR, exist_ok=True) |
|
|
| |
| url = "https://raw.githubusercontent.com/lm-sys/FastChat/main/fastchat/llm_judge/data/mt_bench/question.jsonl" |
| output_file = os.path.join(DATA_DIR, "question.jsonl") |
|
|
| print(f"Downloading MT-Bench questions from {url}") |
| print(f"Saving to {output_file}") |
|
|
| try: |
| |
| proxies = { |
| 'http': 'http://10.1.2.1:7890', |
| 'https': 'http://10.1.2.1:7890', |
| } |
|
|
| response = requests.get(url, proxies=proxies, timeout=30) |
| response.raise_for_status() |
|
|
| with open(output_file, 'wb') as f: |
| f.write(response.content) |
|
|
| print(f"✓ Downloaded successfully") |
|
|
| |
| with open(output_file, 'r') as f: |
| lines = f.readlines() |
|
|
| print(f"✓ Total questions: {len(lines)}") |
|
|
| |
| first_question = json.loads(lines[0]) |
| print(f"\nFirst question:") |
| print(json.dumps(first_question, indent=2)) |
|
|
| except Exception as e: |
| print(f"✗ Download failed: {e}") |
| print(f"\nPlease manually download from:") |
| print(f" {url}") |
| print(f"And save to:") |
| print(f" {output_file}") |
|
|