File size: 755 Bytes
09f4a33 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 | import json
from pathlib import Path
from typing import Any
REQUIRED_FIELDS = {"x", "y", "reasoning_type"}
def load_dataset(path: str | Path) -> list[dict[str, Any]]:
"""Load and validate a CARB-style JSON dataset."""
dataset_path = Path(path)
with dataset_path.open("r", encoding="utf-8") as f:
rows = json.load(f)
if not isinstance(rows, list):
raise ValueError("Dataset must be a JSON array.")
for index, row in enumerate(rows):
missing = REQUIRED_FIELDS.difference(row)
if missing:
raise ValueError(f"Row {index} missing fields: {sorted(missing)}")
if row["y"] not in (0, 1):
raise ValueError(f"Row {index} has non-binary label: {row['y']!r}")
return rows
|