Rohan03 commited on
Commit
22fb57a
·
verified ·
1 Parent(s): 0b10145

fix: real-model robustness — purpose_agent/purpose_function.py

Browse files
Files changed (1) hide show
  1. purpose_agent/purpose_function.py +29 -10
purpose_agent/purpose_function.py CHANGED
@@ -394,33 +394,52 @@ class PurposeFunction:
394
 
395
  def _fallback_evaluate(self, messages: list[ChatMessage]) -> dict[str, Any]:
396
  """Text-based fallback when structured output is unavailable."""
397
- raw = self.llm.generate(messages, temperature=0.2)
398
 
399
  import re
400
 
 
 
 
 
 
 
 
401
  phi_before = 0.0
402
  phi_after = 0.0
403
 
 
 
 
 
 
 
 
 
 
404
  # Try to extract scores from text
405
  before_match = re.search(r'[Φφ]\s*\(?state_?before\)?\s*[=:]\s*([\d.]+)', raw, re.IGNORECASE)
406
  after_match = re.search(r'[Φφ]\s*\(?state_?after\)?\s*[=:]\s*([\d.]+)', raw, re.IGNORECASE)
407
 
408
  if before_match:
409
- phi_before = float(before_match.group(1))
410
  if after_match:
411
- phi_after = float(after_match.group(1))
412
 
413
- # Also try "Score: X/10" patterns
414
- if not before_match:
415
- score_matches = re.findall(r'(\d+\.?\d*)\s*/?\s*10', raw)
416
  if len(score_matches) >= 2:
417
- phi_before = float(score_matches[0])
418
- phi_after = float(score_matches[1])
419
  elif len(score_matches) == 1:
420
- phi_after = float(score_matches[0])
 
 
 
421
 
422
  confidence_match = re.search(r'confidence\s*[=:]\s*([\d.]+)', raw, re.IGNORECASE)
423
- confidence = float(confidence_match.group(1)) if confidence_match else 0.4
424
 
425
  return {
426
  "phi_before": phi_before,
 
394
 
395
  def _fallback_evaluate(self, messages: list[ChatMessage]) -> dict[str, Any]:
396
  """Text-based fallback when structured output is unavailable."""
397
+ raw = self.llm.generate(messages, temperature=0.2, max_tokens=2000)
398
 
399
  import re
400
 
401
+ def safe_float(s, default=0.0):
402
+ """Parse float from string, handling trailing dots and garbage."""
403
+ try:
404
+ return float(s.rstrip('.'))
405
+ except (ValueError, TypeError):
406
+ return default
407
+
408
  phi_before = 0.0
409
  phi_after = 0.0
410
 
411
+ # Try to extract JSON block first (most reliable)
412
+ json_match = re.search(r'\{[^{}]*"phi_before"[^{}]*\}', raw, re.DOTALL)
413
+ if json_match:
414
+ try:
415
+ parsed = json.loads(json_match.group())
416
+ return parsed
417
+ except (json.JSONDecodeError, ValueError):
418
+ pass
419
+
420
  # Try to extract scores from text
421
  before_match = re.search(r'[Φφ]\s*\(?state_?before\)?\s*[=:]\s*([\d.]+)', raw, re.IGNORECASE)
422
  after_match = re.search(r'[Φφ]\s*\(?state_?after\)?\s*[=:]\s*([\d.]+)', raw, re.IGNORECASE)
423
 
424
  if before_match:
425
+ phi_before = safe_float(before_match.group(1))
426
  if after_match:
427
+ phi_after = safe_float(after_match.group(1))
428
 
429
+ # Also try "Score: X/10" patterns (only if we found Φ markers)
430
+ if not before_match and not after_match:
431
+ score_matches = re.findall(r'(\d+\.?\d*)\s*/\s*10', raw) # require explicit /10
432
  if len(score_matches) >= 2:
433
+ phi_before = safe_float(score_matches[0])
434
+ phi_after = safe_float(score_matches[1])
435
  elif len(score_matches) == 1:
436
+ phi_after = safe_float(score_matches[0])
437
+
438
+ # If no scores found, return conservative defaults (don't guess from random numbers)
439
+ # This is honest: if the LLM didn't produce parseable scores, admit uncertainty
440
 
441
  confidence_match = re.search(r'confidence\s*[=:]\s*([\d.]+)', raw, re.IGNORECASE)
442
+ confidence = safe_float(confidence_match.group(1), 0.4) if confidence_match else 0.4
443
 
444
  return {
445
  "phi_before": phi_before,