Spaces:

Ajsaxena
/

deceit1

Paused

Jayant-Kernel commited on 13 days ago

Commit

66bdd16

1 Parent(s): 3d9195a

fix: parse_action confidence bug, numeric answers bug, missing reasoning field bug

Files changed (1) hide show

evaluate.py CHANGED Viewed

@@ -61,28 +61,29 @@ SYSTEM_PROMPT = """You are answering factual questions. Respond ONLY with a JSON
 import re
 def parse_action(text):
-    cleaned = re.sub(r"```(?:json)?\s*", "", text).strip()
     try:
-        obj = json.loads(cleaned)
-        if isinstance(obj, dict) and "reasoning" in obj:
             return {
                 "reasoning": str(obj.get("reasoning", "")),
-                "answer": str(obj.get("answer", "")),
-                "confidence": float(max(0, min(1, obj.get("confidence", 0.5)))),
                 "abstain": bool(obj.get("abstain", False)),
                 "is_final": bool(obj.get("is_final", True)),
             }
-    except:
-        pass
-    # Try to extract answer from plain text patterns
-    answer = ""
-    m = re.search(r'"answer"\s*:\s*"([^"]+)"', cleaned)
-    if m:
-        answer = m.group(1)
-    elif re.search(r'\b(yes|no|true|false)\b', cleaned, re.I):
-        answer = re.search(r'\b(yes|no|true|false)\b', cleaned, re.I).group(0)
-    if answer:
-        return {"reasoning": cleaned[:100], "answer": answer, "confidence": 0.4, "abstain": False, "is_final": True}
     return {"reasoning":"","answer":"","confidence":0.0,"abstain":True,"is_final":True}
 def evaluate_model(model_name, label, n_episodes=30, is_trained=False):

 import re
 def parse_action(text):
+    text = re.sub(r"```(?:json)?\s*", "", text).strip()
+    text = re.sub(r"```\s*$", "", text).strip()
     try:
+        obj = json.loads(text)
+        if isinstance(obj, dict) and ("answer" in obj or "reasoning" in obj):
+            answer = obj.get("answer", "")
+            if isinstance(answer, (int, float)):
+                answer = str(answer)
+            else:
+                answer = str(answer)
+            confidence = float(obj.get("confidence", 0.5))
+            confidence = max(0.0, min(1.0, confidence))
             return {
                 "reasoning": str(obj.get("reasoning", "")),
+                "answer": answer,
+                "confidence": confidence,
                 "abstain": bool(obj.get("abstain", False)),
                 "is_final": bool(obj.get("is_final", True)),
             }
+    except Exception as e:
+        print(f"Parse error: {e}, text: {text[:100]}")
     return {"reasoning":"","answer":"","confidence":0.0,"abstain":True,"is_final":True}
 def evaluate_model(model_name, label, n_episodes=30, is_trained=False):