Spaces:

obversarystudios
/

carb-observability-space

Sleeping

Brian Moran

Add CARB observability pipeline

1b435f0 13 days ago

1.08 kB

	import json
	from pathlib import Path
	from typing import Any


	REQUIRED_FIELDS = {"x", "y", "reasoning_type"}


	def load_dataset(path: str \| Path) -> list[dict[str, Any]]:
	"""Load and validate the small CARB-style seed dataset."""
	dataset_path = Path(path)
	with dataset_path.open("r", encoding="utf-8") as f:
	rows = json.load(f)

	if not isinstance(rows, list):
	raise ValueError("Dataset must be a JSON list.")

	for index, row in enumerate(rows):
	missing = REQUIRED_FIELDS.difference(row)
	if missing:
	raise ValueError(f"Row {index} is missing required fields: {sorted(missing)}")
	if row["y"] not in (0, 1):
	raise ValueError(f"Row {index} has non-binary label: {row['y']!r}")
	if not isinstance(row["x"], str) or not row["x"].strip():
	raise ValueError(f"Row {index} has an empty input string.")
	if not isinstance(row["reasoning_type"], str) or not row["reasoning_type"].strip():
	raise ValueError(f"Row {index} has an empty reasoning_type.")

	return rows