Spaces:

Vikaspandey582003
/

echo-ultimate

Sleeping

App Files Files Community

echo-ultimate / env /parser.py

Vikaspandey582003

Upload folder using huggingface_hub

acb327b verified 13 days ago

raw

history blame contribute delete

10.7 kB

	"""
	ECHO ULTIMATE — Robust <confidence><answer> parser.
	Handles 15+ edge cases. NEVER crashes. Always returns a ParseResult.
	"""

	import re
	import logging
	from dataclasses import dataclass, field
	from typing import Optional

	logger = logging.getLogger(__name__)

	# ── Regex patterns ────────────────────────────────────────────────────────────
	_CONF_TAG_RE = re.compile(r"<confidence>\s([^<]?)\s*</confidence>", re.IGNORECASE \| re.DOTALL)
	_ANS_TAG_RE = re.compile(r"<answer>\s(.?)\s*</answer>", re.IGNORECASE \| re.DOTALL)
	_NUM_RE = re.compile(r"-?\d+(?:\.\d+)?")
	_QUOTES_RE = re.compile(r'^["\'](.+)["\']$', re.DOTALL)

	# Verbal confidence map
	_VERBAL_MAP = {
	"very sure": 90, "very certain": 90, "extremely sure": 95, "absolutely sure": 98,
	"certain": 88, "confident": 78, "sure": 75, "fairly sure": 70,
	"somewhat sure": 60, "unsure": 35, "uncertain": 30, "not sure": 25,
	"very unsure": 15, "very uncertain": 15, "no idea": 5, "no clue": 5,
	"high": 85, "medium": 50, "low": 25, "moderate": 55,
	"probably": 65, "likely": 65, "unlikely": 30, "doubtful": 20,
	}

	DEFAULT_CONFIDENCE = 50


	@dataclass
	class ParseResult:
	"""Result of parsing one LLM response."""
	confidence: int = DEFAULT_CONFIDENCE
	answer: str = ""
	parse_success: bool = False
	confidence_source: str = "default" # "tag"\|"default"\|"clipped"\|"inferred"\|"verbal"
	answer_source: str = "empty" # "tag"\|"last_sentence"\|"full_text"\|"empty"
	is_abstention: bool = False # True if answer is "I don't know"
	raw: str = ""


	# ── Confidence extraction ─────────────────────────────────────────────────────

	def _extract_confidence(text: str) -> tuple[int, str]:
	"""Return (confidence_int, source_label). Never raises."""
	matches = _CONF_TAG_RE.findall(text)
	if not matches:
	return DEFAULT_CONFIDENCE, "default"

	raw = matches[0].strip() # use first match only (edge case 8)

	if not raw:
	return DEFAULT_CONFIDENCE, "default"

	# Edge case 6: verbal confidence
	raw_lower = raw.lower()
	for phrase, val in _VERBAL_MAP.items():
	if phrase in raw_lower:
	return val, "verbal"

	# Edge case 7 + 10 + 11: float / out-of-range number
	nums = _NUM_RE.findall(raw.replace(",", ""))
	if nums:
	try:
	val = round(float(nums[0]))
	clipped = max(0, min(100, val))
	source = "clipped" if clipped != val else "tag"
	return clipped, source
	except ValueError:
	pass

	return DEFAULT_CONFIDENCE, "default"


	# ── Answer extraction ─────────────────────────────────────────────────────────

	def _extract_answer(text: str) -> tuple[str, str]:
	"""Return (answer_str, source_label). Never raises."""
	matches = _ANS_TAG_RE.findall(text)
	if matches:
	raw_ans = matches[0].strip()

	# Edge case 13: strip surrounding quotes
	m = _QUOTES_RE.match(raw_ans)
	if m:
	raw_ans = m.group(1).strip()

	return raw_ans, "tag"

	# No answer tag — fall back to text after </confidence>
	after_conf = re.split(r"</confidence>", text, flags=re.IGNORECASE, maxsplit=1)
	if len(after_conf) > 1:
	tail = after_conf[1].strip()
	# Remove any remaining tags
	tail = re.sub(r"<[^>]+>", " ", tail).strip()
	if tail:
	return tail, "full_text"

	# Last sentence fallback
	clean = re.sub(r"<[^>]+>.*?</[^>]+>", " ", text, flags=re.DOTALL)
	clean = re.sub(r"<[^>]+>", " ", clean).strip()
	sentences = [s.strip() for s in re.split(r"[.!?]", clean) if s.strip()]
	if sentences:
	return sentences[-1], "last_sentence"

	return "", "empty"


	# ── Main parse function ───────────────────────────────────────────────────────

	def parse_response(text) -> ParseResult:
	"""
	Parse an LLM response into confidence and answer.

	Handles edge cases:
	1. Perfect format
	2. Reversed tags
	3. No confidence tag → default 50
	4. No answer tag → extract from remaining text
	5. Confidence out of range → clip to [0,100]
	6. Verbal confidence ("high", "low", "very sure") → mapped to int
	7. Float confidence → rounded
	8. Multiple tags → first occurrence
	9. Nested tags → regex extracts correctly
	10. Confidence > 100 → clipped to 100
	11. Negative confidence → clipped to 0
	12. Empty answer → empty string
	13. Answer with quotes → stripped
	14. "I don't know" → is_abstention=True, confidence=5
	15. None / non-string input → safe defaults
	"""
	if text is None:
	return ParseResult(raw="")

	if not isinstance(text, str):
	try:
	text = str(text)
	except Exception:
	return ParseResult(raw="")

	conf, conf_src = _extract_confidence(text)
	ans, ans_src = _extract_answer(text)

	# Edge case 14: abstention detection
	is_abstention = False
	if ans and any(phrase in ans.lower() for phrase in
	["i don't know", "i do not know", "i'm not sure", "no idea", "don't know"]):
	is_abstention = True
	conf = min(conf, 10)
	conf_src = "inferred"

	parse_success = (conf_src == "tag" or conf_src == "verbal") and ans_src == "tag"

	return ParseResult(
	confidence=conf,
	answer=ans,
	parse_success=parse_success,
	confidence_source=conf_src,
	answer_source=ans_src,
	is_abstention=is_abstention,
	raw=text,
	)


	# ── Prompt formatting ─────────────────────────────────────────────────────────

	def format_prompt(
	question: str,
	domain: str,
	difficulty: str = "medium",
	show_difficulty: bool = True,
	) -> str:
	"""
	Build a formatted prompt combining the system instruction + question.

	Args:
	show_difficulty: Phase 1 shows difficulty; Phase 2+ hides it.
	"""
	from config import cfg

	domain_hints = {
	"math": "This is a math problem. Give a numeric answer.",
	"logic": "This is a logic/reasoning question. Give the letter (A/B/C/D).",
	"factual": "This is a factual question. Give a concise text answer.",
	"science": "This is a science question. Give the letter or a concise answer.",
	"medical": "This is a medical question. Give the letter (A/B/C/D).",
	"coding": "This is a coding question. Give a concise answer.",
	"creative": "This is a creative question. Give a short text answer.",
	}
	hint = domain_hints.get(domain, "Give a concise answer.")

	diff_str = f" [{difficulty.upper()}]" if show_difficulty else ""
	header = f"Domain: {domain.capitalize()}{diff_str}\n{hint}\n\n"

	return f"{cfg.SYSTEM_PROMPT}\n\n{header}Question: {question}"


	# ── Self-tests ────────────────────────────────────────────────────────────────

	if __name__ == "__main__":
	failures = []

	def check(text, exp_conf, exp_ans, label, exp_abst=False):
	r = parse_response(text)
	ok = True
	if exp_conf is not None and r.confidence != exp_conf:
	failures.append(f"[{label}] confidence: expected {exp_conf}, got {r.confidence}")
	ok = False
	if exp_ans is not None and r.answer != exp_ans:
	failures.append(f"[{label}] answer: expected '{exp_ans}', got '{r.answer}'")
	ok = False
	if r.is_abstention != exp_abst:
	failures.append(f"[{label}] is_abstention: expected {exp_abst}, got {r.is_abstention}")
	ok = False
	if ok:
	print(f" ✅ {label}")

	print("Running ECHO Ultimate parser tests…")

	check("<confidence>75</confidence><answer>Paris</answer>", 75, "Paris", "1. perfect format")
	check("<answer>Paris</answer><confidence>75</confidence>", 75, "Paris", "2. reversed tags")
	check("<answer>London</answer>", DEFAULT_CONFIDENCE, "London", "3. no confidence tag")
	check("<confidence>55</confidence>", 55, None, "4. no answer tag")
	check("<confidence>150</confidence><answer>x</answer>", 100, "x", "5. confidence clipped high")
	check("<confidence>high</confidence><answer>Paris</answer>", 85, "Paris", "6. verbal 'high'")
	check("<confidence>very sure</confidence><answer>yes</answer>", 90, "yes", "6b. verbal 'very sure'")
	check("<confidence>73.6</confidence><answer>42</answer>", 74, "42", "7. float confidence")
	check("<confidence>80</confidence><answer>A</answer><confidence>30</confidence>", 80, "A", "8. multiple tags")
	check("<confidence>95</confidence><answer>Rome</answer>", 95, "Rome", "9. normal nested")
	check("<confidence>200</confidence><answer>x</answer>", 100, "x", "10. > 100 clipped")
	check("<confidence>-5</confidence><answer>x</answer>", 0, "x", "11. negative clipped")
	check("<confidence>50</confidence><answer></answer>", 50, "", "12. empty answer")
	check('<confidence>70</confidence><answer>"Paris"</answer>', 70, "Paris", "13. quoted answer")
	r14 = parse_response("<confidence>80</confidence><answer>I don't know</answer>")
	assert r14.is_abstention, "14. abstention flag"
	assert r14.confidence <= 10, "14. abstention confidence"
	print(" ✅ 14. I don't know → abstention=True, conf≤10")
	check(None, DEFAULT_CONFIDENCE, "", "15. None input")
	check(42, DEFAULT_CONFIDENCE, None, "15b. int input")
	check("", DEFAULT_CONFIDENCE, "", "15c. empty string")
	check(" <confidence> 60 </confidence> <answer> Berlin </answer> ", 60, "Berlin", "whitespace trimmed")
	check("<CONFIDENCE>80</CONFIDENCE><ANSWER>Rome</ANSWER>", 80, "Rome", "uppercase tags")
	check("<confidence>50</confidence><answer>The Eiffel Tower</answer>", 50, "The Eiffel Tower", "multi-word answer")

	if failures:
	print("\n❌ FAILURES:")
	for f in failures:
	print(f" {f}")
	else:
	print("\n✅ All parser tests passed.")