Jayant-Kernel commited on
Commit
66bdd16
·
1 Parent(s): 3d9195a

fix: parse_action confidence bug, numeric answers bug, missing reasoning field bug

Browse files
Files changed (1) hide show
  1. evaluate.py +17 -16
evaluate.py CHANGED
@@ -61,28 +61,29 @@ SYSTEM_PROMPT = """You are answering factual questions. Respond ONLY with a JSON
61
  import re
62
 
63
  def parse_action(text):
64
- cleaned = re.sub(r"```(?:json)?\s*", "", text).strip()
 
65
  try:
66
- obj = json.loads(cleaned)
67
- if isinstance(obj, dict) and "reasoning" in obj:
 
 
 
 
 
 
 
 
 
68
  return {
69
  "reasoning": str(obj.get("reasoning", "")),
70
- "answer": str(obj.get("answer", "")),
71
- "confidence": float(max(0, min(1, obj.get("confidence", 0.5)))),
72
  "abstain": bool(obj.get("abstain", False)),
73
  "is_final": bool(obj.get("is_final", True)),
74
  }
75
- except:
76
- pass
77
- # Try to extract answer from plain text patterns
78
- answer = ""
79
- m = re.search(r'"answer"\s*:\s*"([^"]+)"', cleaned)
80
- if m:
81
- answer = m.group(1)
82
- elif re.search(r'\b(yes|no|true|false)\b', cleaned, re.I):
83
- answer = re.search(r'\b(yes|no|true|false)\b', cleaned, re.I).group(0)
84
- if answer:
85
- return {"reasoning": cleaned[:100], "answer": answer, "confidence": 0.4, "abstain": False, "is_final": True}
86
  return {"reasoning":"","answer":"","confidence":0.0,"abstain":True,"is_final":True}
87
 
88
  def evaluate_model(model_name, label, n_episodes=30, is_trained=False):
 
61
  import re
62
 
63
  def parse_action(text):
64
+ text = re.sub(r"```(?:json)?\s*", "", text).strip()
65
+ text = re.sub(r"```\s*$", "", text).strip()
66
  try:
67
+ obj = json.loads(text)
68
+ if isinstance(obj, dict) and ("answer" in obj or "reasoning" in obj):
69
+ answer = obj.get("answer", "")
70
+ if isinstance(answer, (int, float)):
71
+ answer = str(answer)
72
+ else:
73
+ answer = str(answer)
74
+
75
+ confidence = float(obj.get("confidence", 0.5))
76
+ confidence = max(0.0, min(1.0, confidence))
77
+
78
  return {
79
  "reasoning": str(obj.get("reasoning", "")),
80
+ "answer": answer,
81
+ "confidence": confidence,
82
  "abstain": bool(obj.get("abstain", False)),
83
  "is_final": bool(obj.get("is_final", True)),
84
  }
85
+ except Exception as e:
86
+ print(f"Parse error: {e}, text: {text[:100]}")
 
 
 
 
 
 
 
 
 
87
  return {"reasoning":"","answer":"","confidence":0.0,"abstain":True,"is_final":True}
88
 
89
  def evaluate_model(model_name, label, n_episodes=30, is_trained=False):