| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| """Evaluate baseline LiquidAI/LFM2.5-1.2B-Instruct — list tasks first, then run.""" |
|
|
| import subprocess |
| import sys |
| import json |
|
|
| |
| print("=== Listing available leaderboard tasks ===") |
| list_cmd = [sys.executable, "-m", "lighteval", "tasks", "list"] |
| result = subprocess.run(list_cmd, capture_output=True, text=True) |
|
|
| |
| leaderboard_tasks = [] |
| for line in result.stdout.split("\n"): |
| if "leaderboard" in line.lower() or "mmlu" in line.lower() or "arc" in line.lower() or "truthful" in line.lower(): |
| leaderboard_tasks.append(line.strip()) |
| print(line.strip()) |
|
|
| print(f"\n=== Found {len(leaderboard_tasks)} matching tasks ===") |
|
|
| |
| print("\n=== First 50 tasks from full list ===") |
| for line in result.stdout.split("\n")[:50]: |
| print(line) |
|
|
| |
| model_args = "model_name=LiquidAI/LFM2.5-1.2B-Instruct,trust_remote_code=True" |
|
|
| |
| for tasks in [ |
| "leaderboard|mmlu|5|0", |
| "community|mmlu|5|0", |
| "lighteval|mmlu|5|0", |
| "original|mmlu|5|0", |
| ]: |
| print(f"\n=== Trying task format: {tasks} ===") |
| cmd = [sys.executable, "-m", "lighteval", "accelerate", model_args, tasks, "--output-dir", "./eval_results_baseline"] |
| r = subprocess.run(cmd, capture_output=True, text=True) |
| if r.returncode == 0: |
| print("SUCCESS!") |
| print(r.stdout[-2000:] if len(r.stdout) > 2000 else r.stdout) |
| sys.exit(0) |
| else: |
| err_snippet = r.stderr[-500:] if len(r.stderr) > 500 else r.stderr |
| print(f"Failed: {err_snippet}") |
|
|
| print("\nAll task formats failed. Dumping full task list to stdout.") |
| print(result.stdout) |
| sys.exit(1) |
|
|