Spaces:
Sleeping
Sleeping
feat: strict logging
Browse files- inference.py +14 -9
inference.py
CHANGED
|
@@ -62,6 +62,9 @@ def _env_flag(name: str, default: bool) -> bool:
|
|
| 62 |
return raw_value.strip().lower() not in {"0", "false", "no", "off", ""}
|
| 63 |
|
| 64 |
|
|
|
|
|
|
|
|
|
|
| 65 |
def strict_task_score(raw_score: float, *, used_fallback: bool) -> float:
|
| 66 |
if used_fallback and _env_flag("STRICT_BASELINE_SCORING", True):
|
| 67 |
return 0.0
|
|
@@ -1035,11 +1038,12 @@ def main() -> None:
|
|
| 1035 |
|
| 1036 |
assert task_result is not None
|
| 1037 |
results.append(task_result)
|
| 1038 |
-
|
| 1039 |
-
|
| 1040 |
-
|
| 1041 |
-
|
| 1042 |
-
|
|
|
|
| 1043 |
|
| 1044 |
mean_score = sum(result["score"] for result in results) / len(results)
|
| 1045 |
raw_mean_score = sum(
|
|
@@ -1057,10 +1061,11 @@ def main() -> None:
|
|
| 1057 |
"results": results,
|
| 1058 |
}
|
| 1059 |
output_path.write_text(json.dumps(payload, indent=2), encoding="utf-8")
|
| 1060 |
-
|
| 1061 |
-
|
| 1062 |
-
|
| 1063 |
-
|
|
|
|
| 1064 |
|
| 1065 |
|
| 1066 |
if __name__ == "__main__":
|
|
|
|
| 62 |
return raw_value.strip().lower() not in {"0", "false", "no", "off", ""}
|
| 63 |
|
| 64 |
|
| 65 |
+
VERBOSE_STDERR = _env_flag("INFERENCE_VERBOSE_STDERR", False)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
def strict_task_score(raw_score: float, *, used_fallback: bool) -> float:
|
| 69 |
if used_fallback and _env_flag("STRICT_BASELINE_SCORING", True):
|
| 70 |
return 0.0
|
|
|
|
| 1038 |
|
| 1039 |
assert task_result is not None
|
| 1040 |
results.append(task_result)
|
| 1041 |
+
if VERBOSE_STDERR:
|
| 1042 |
+
sys.stderr.write(
|
| 1043 |
+
f"{task_id.value}: score={task_result['score']:.4f} "
|
| 1044 |
+
f"raw_score={task_result.get('raw_score', task_result['score']):.4f} "
|
| 1045 |
+
f"fallback={str(task_result['used_fallback']).lower()}\n"
|
| 1046 |
+
)
|
| 1047 |
|
| 1048 |
mean_score = sum(result["score"] for result in results) / len(results)
|
| 1049 |
raw_mean_score = sum(
|
|
|
|
| 1061 |
"results": results,
|
| 1062 |
}
|
| 1063 |
output_path.write_text(json.dumps(payload, indent=2), encoding="utf-8")
|
| 1064 |
+
if VERBOSE_STDERR:
|
| 1065 |
+
sys.stderr.write(
|
| 1066 |
+
f"mean_score={mean_score:.4f} raw_mean_score={raw_mean_score:.4f}\n"
|
| 1067 |
+
)
|
| 1068 |
+
sys.stderr.write(f"wrote={output_path}\n")
|
| 1069 |
|
| 1070 |
|
| 1071 |
if __name__ == "__main__":
|