Spaces:

qimma
/

leaderboard

Running on CPU Upgrade

Alyafeai commited on Mar 2

Commit

034f762

1 Parent(s): 5e5f8d3

fix issue with some parquet files have different formats

Files changed (1) hide show

backend/data_loader.py CHANGED Viewed

@@ -414,11 +414,28 @@ def _as_dict(value: Any) -> Dict[str, Any]:
 def _py_scalar(value: Any) -> Any:
     if isinstance(value, np.generic):
         return value.item()
     return value
 def _extract_predicted_answer(model_response: Dict[str, Any], choices: List[Any]) -> Any:
     logprobs = model_response.get("logprobs")
     if logprobs is not None and choices:
@@ -500,7 +517,7 @@ def _read_detail_parquet(path: str, subtask: str) -> List[Dict[str, Any]]:
             or ""
         )
-        rows.append({
             "subtask": subtask,
             "question_id": _py_scalar(doc.get("id")),
             "task_name": _py_scalar(doc.get("task_name")),
@@ -513,7 +530,7 @@ def _read_detail_parquet(path: str, subtask: str) -> List[Dict[str, Any]]:
             "is_correct": is_correct,
             "metric_name": metric_name,
             "metric": metric_value,
-        })
     return rows

 def _py_scalar(value: Any) -> Any:
+    if isinstance(value, np.ndarray):
+        if value.ndim == 0:
+            return _py_scalar(value.item())
+        if value.size == 1:
+            return _py_scalar(value.reshape(-1)[0])
+        return [_py_scalar(v) for v in value.tolist()]
     if isinstance(value, np.generic):
         return value.item()
     return value
+def _json_safe(value: Any) -> Any:
+    value = _py_scalar(value)
+    if isinstance(value, dict):
+        return {str(k): _json_safe(v) for k, v in value.items()}
+    if isinstance(value, list):
+        return [_json_safe(v) for v in value]
+    if isinstance(value, tuple):
+        return [_json_safe(v) for v in value]
+    return value
 def _extract_predicted_answer(model_response: Dict[str, Any], choices: List[Any]) -> Any:
     logprobs = model_response.get("logprobs")
     if logprobs is not None and choices:
             or ""
         )
+        rows.append(_json_safe({
             "subtask": subtask,
             "question_id": _py_scalar(doc.get("id")),
             "task_name": _py_scalar(doc.get("task_name")),
             "is_correct": is_correct,
             "metric_name": metric_name,
             "metric": metric_value,
+        }))
     return rows