File size: 755 Bytes
09f4a33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import json
from pathlib import Path
from typing import Any


REQUIRED_FIELDS = {"x", "y", "reasoning_type"}


def load_dataset(path: str | Path) -> list[dict[str, Any]]:
    """Load and validate a CARB-style JSON dataset."""
    dataset_path = Path(path)
    with dataset_path.open("r", encoding="utf-8") as f:
        rows = json.load(f)

    if not isinstance(rows, list):
        raise ValueError("Dataset must be a JSON array.")

    for index, row in enumerate(rows):
        missing = REQUIRED_FIELDS.difference(row)
        if missing:
            raise ValueError(f"Row {index} missing fields: {sorted(missing)}")
        if row["y"] not in (0, 1):
            raise ValueError(f"Row {index} has non-binary label: {row['y']!r}")

    return rows