| import json | |
| from pathlib import Path | |
| from typing import Any | |
| REQUIRED_FIELDS = {"x", "y", "reasoning_type"} | |
| def load_dataset(path: str | Path) -> list[dict[str, Any]]: | |
| """Load and validate a CARB-style JSON dataset.""" | |
| dataset_path = Path(path) | |
| with dataset_path.open("r", encoding="utf-8") as f: | |
| rows = json.load(f) | |
| if not isinstance(rows, list): | |
| raise ValueError("Dataset must be a JSON array.") | |
| for index, row in enumerate(rows): | |
| missing = REQUIRED_FIELDS.difference(row) | |
| if missing: | |
| raise ValueError(f"Row {index} missing fields: {sorted(missing)}") | |
| if row["y"] not in (0, 1): | |
| raise ValueError(f"Row {index} has non-binary label: {row['y']!r}") | |
| return rows | |