ehsaaniqbal commited on
Commit
289a9cc
·
unverified ·
1 Parent(s): 0c4aa05

feat: strict logging

Browse files
Files changed (1) hide show
  1. inference.py +14 -9
inference.py CHANGED
@@ -62,6 +62,9 @@ def _env_flag(name: str, default: bool) -> bool:
62
  return raw_value.strip().lower() not in {"0", "false", "no", "off", ""}
63
 
64
 
 
 
 
65
  def strict_task_score(raw_score: float, *, used_fallback: bool) -> float:
66
  if used_fallback and _env_flag("STRICT_BASELINE_SCORING", True):
67
  return 0.0
@@ -1035,11 +1038,12 @@ def main() -> None:
1035
 
1036
  assert task_result is not None
1037
  results.append(task_result)
1038
- sys.stderr.write(
1039
- f"{task_id.value}: score={task_result['score']:.4f} "
1040
- f"raw_score={task_result.get('raw_score', task_result['score']):.4f} "
1041
- f"fallback={str(task_result['used_fallback']).lower()}\n"
1042
- )
 
1043
 
1044
  mean_score = sum(result["score"] for result in results) / len(results)
1045
  raw_mean_score = sum(
@@ -1057,10 +1061,11 @@ def main() -> None:
1057
  "results": results,
1058
  }
1059
  output_path.write_text(json.dumps(payload, indent=2), encoding="utf-8")
1060
- sys.stderr.write(
1061
- f"mean_score={mean_score:.4f} raw_mean_score={raw_mean_score:.4f}\n"
1062
- )
1063
- sys.stderr.write(f"wrote={output_path}\n")
 
1064
 
1065
 
1066
  if __name__ == "__main__":
 
62
  return raw_value.strip().lower() not in {"0", "false", "no", "off", ""}
63
 
64
 
65
+ VERBOSE_STDERR = _env_flag("INFERENCE_VERBOSE_STDERR", False)
66
+
67
+
68
  def strict_task_score(raw_score: float, *, used_fallback: bool) -> float:
69
  if used_fallback and _env_flag("STRICT_BASELINE_SCORING", True):
70
  return 0.0
 
1038
 
1039
  assert task_result is not None
1040
  results.append(task_result)
1041
+ if VERBOSE_STDERR:
1042
+ sys.stderr.write(
1043
+ f"{task_id.value}: score={task_result['score']:.4f} "
1044
+ f"raw_score={task_result.get('raw_score', task_result['score']):.4f} "
1045
+ f"fallback={str(task_result['used_fallback']).lower()}\n"
1046
+ )
1047
 
1048
  mean_score = sum(result["score"] for result in results) / len(results)
1049
  raw_mean_score = sum(
 
1061
  "results": results,
1062
  }
1063
  output_path.write_text(json.dumps(payload, indent=2), encoding="utf-8")
1064
+ if VERBOSE_STDERR:
1065
+ sys.stderr.write(
1066
+ f"mean_score={mean_score:.4f} raw_mean_score={raw_mean_score:.4f}\n"
1067
+ )
1068
+ sys.stderr.write(f"wrote={output_path}\n")
1069
 
1070
 
1071
  if __name__ == "__main__":