Rohan03
/

purpose-agent

@@ -394,33 +394,52 @@ class PurposeFunction:
     def _fallback_evaluate(self, messages: list[ChatMessage]) -> dict[str, Any]:
         """Text-based fallback when structured output is unavailable."""
-        raw = self.llm.generate(messages, temperature=0.2)
         import re
         phi_before = 0.0
         phi_after = 0.0
         # Try to extract scores from text
         before_match = re.search(r'[Φφ]\s*\(?state_?before\)?\s*[=:]\s*([\d.]+)', raw, re.IGNORECASE)
         after_match = re.search(r'[Φφ]\s*\(?state_?after\)?\s*[=:]\s*([\d.]+)', raw, re.IGNORECASE)
         if before_match:
-            phi_before = float(before_match.group(1))
         if after_match:
-            phi_after = float(after_match.group(1))
-        # Also try "Score: X/10" patterns
-        if not before_match:
-            score_matches = re.findall(r'(\d+\.?\d*)\s*/?\s*10', raw)
             if len(score_matches) >= 2:
-                phi_before = float(score_matches[0])
-                phi_after = float(score_matches[1])
             elif len(score_matches) == 1:
-                phi_after = float(score_matches[0])
         confidence_match = re.search(r'confidence\s*[=:]\s*([\d.]+)', raw, re.IGNORECASE)
-        confidence = float(confidence_match.group(1)) if confidence_match else 0.4
         return {
             "phi_before": phi_before,

     def _fallback_evaluate(self, messages: list[ChatMessage]) -> dict[str, Any]:
         """Text-based fallback when structured output is unavailable."""
+        raw = self.llm.generate(messages, temperature=0.2, max_tokens=2000)
         import re
+        def safe_float(s, default=0.0):
+            """Parse float from string, handling trailing dots and garbage."""
+            try:
+                return float(s.rstrip('.'))
+            except (ValueError, TypeError):
+                return default
         phi_before = 0.0
         phi_after = 0.0
+        # Try to extract JSON block first (most reliable)
+        json_match = re.search(r'\{[^{}]*"phi_before"[^{}]*\}', raw, re.DOTALL)
+        if json_match:
+            try:
+                parsed = json.loads(json_match.group())
+                return parsed
+            except (json.JSONDecodeError, ValueError):
+                pass
         # Try to extract scores from text
         before_match = re.search(r'[Φφ]\s*\(?state_?before\)?\s*[=:]\s*([\d.]+)', raw, re.IGNORECASE)
         after_match = re.search(r'[Φφ]\s*\(?state_?after\)?\s*[=:]\s*([\d.]+)', raw, re.IGNORECASE)
         if before_match:
+            phi_before = safe_float(before_match.group(1))
         if after_match:
+            phi_after = safe_float(after_match.group(1))
+        # Also try "Score: X/10" patterns (only if we found Φ markers)
+        if not before_match and not after_match:
+            score_matches = re.findall(r'(\d+\.?\d*)\s*/\s*10', raw)  # require explicit /10
             if len(score_matches) >= 2:
+                phi_before = safe_float(score_matches[0])
+                phi_after = safe_float(score_matches[1])
             elif len(score_matches) == 1:
+                phi_after = safe_float(score_matches[0])
+        # If no scores found, return conservative defaults (don't guess from random numbers)
+        # This is honest: if the LLM didn't produce parseable scores, admit uncertainty
         confidence_match = re.search(r'confidence\s*[=:]\s*([\d.]+)', raw, re.IGNORECASE)
+        confidence = safe_float(confidence_match.group(1), 0.4) if confidence_match else 0.4
         return {
             "phi_before": phi_before,