Spaces:

MHamdan
/

SPARKNET

Sleeping

App Files Files Community

SPARKNET / src /rag /agentic /critic.py

MHamdan

Initial commit: SPARKNET framework

d520909 3 months ago

raw

history blame contribute delete

17.4 kB

	"""
	Critic Agent

	Validates generated answers for hallucination and factual accuracy.
	Follows FAANG best practices for production RAG systems.

	Key Features:
	- Hallucination detection
	- Citation verification
	- Factual consistency checking
	- Confidence scoring
	- Actionable feedback for self-correction
	"""

	from typing import List, Optional, Dict, Any, Tuple
	from pydantic import BaseModel, Field
	from loguru import logger
	from enum import Enum
	import json
	import re

	try:
	import httpx
	HTTPX_AVAILABLE = True
	except ImportError:
	HTTPX_AVAILABLE = False

	from .synthesizer import SynthesisResult, Citation
	from .reranker import RankedResult


	class IssueType(str, Enum):
	"""Types of validation issues."""
	HALLUCINATION = "hallucination" # Information not in sources
	UNSUPPORTED_CLAIM = "unsupported_claim" # Claim without citation
	INCORRECT_CITATION = "incorrect_citation" # Citation doesn't support claim
	CONTRADICTION = "contradiction" # Contradicts source material
	INCOMPLETE = "incomplete" # Missing important information
	FACTUAL_ERROR = "factual_error" # Verifiable factual mistake


	class ValidationIssue(BaseModel):
	"""A single validation issue found."""
	issue_type: IssueType
	severity: float = Field(ge=0.0, le=1.0) # 0 = minor, 1 = critical
	description: str
	problematic_text: Optional[str] = None
	suggestion: Optional[str] = None
	citation_index: Optional[int] = None


	class CriticResult(BaseModel):
	"""Result of answer validation."""
	is_valid: bool
	confidence: float
	issues: List[ValidationIssue]

	# Detailed scores
	hallucination_score: float = Field(ge=0.0, le=1.0) # 0 = no hallucination
	citation_accuracy: float = Field(ge=0.0, le=1.0)
	factual_consistency: float = Field(ge=0.0, le=1.0)

	# For self-correction
	needs_revision: bool = False
	revision_suggestions: List[str] = Field(default_factory=list)


	class CriticConfig(BaseModel):
	"""Configuration for critic agent."""
	# LLM settings
	model: str = Field(default="llama3.2:3b")
	base_url: str = Field(default="http://localhost:11434")
	temperature: float = Field(default=0.1)

	# Validation thresholds
	hallucination_threshold: float = Field(default=0.3)
	citation_accuracy_threshold: float = Field(default=0.7)
	overall_confidence_threshold: float = Field(default=0.6)

	# Validation options
	check_hallucination: bool = Field(default=True)
	check_citations: bool = Field(default=True)
	check_consistency: bool = Field(default=True)


	class CriticAgent:
	"""
	Validates generated answers for quality and accuracy.

	Capabilities:
	1. Hallucination detection
	2. Citation verification
	3. Factual consistency checking
	4. Actionable revision suggestions
	"""

	HALLUCINATION_PROMPT = """Analyze this answer for hallucination - information NOT supported by the provided sources.

	SOURCES:
	{sources}

	ANSWER:
	{answer}

	For each claim in the answer, determine if it is:
	1. SUPPORTED - Directly supported by the sources
	2. PARTIALLY_SUPPORTED - Somewhat supported but with additions
	3. UNSUPPORTED - Not found in sources (hallucination)

	Respond with JSON:
	{{
	"claims": [
	{{"text": "claim text", "status": "SUPPORTED\|PARTIALLY_SUPPORTED\|UNSUPPORTED", "source_index": 1 or null}}
	],
	"hallucination_score": 0.0-1.0,
	"issues": ["list of specific issues found"]
	}}"""

	CITATION_PROMPT = """Verify that each citation in this answer correctly references the source material.

	SOURCES:
	{sources}

	ANSWER WITH CITATIONS:
	{answer}

	For each citation [N], check if the claim it supports is actually in source N.

	Respond with JSON:
	{{
	"citation_checks": [
	{{"citation_index": 1, "is_accurate": true/false, "reason": "explanation"}}
	],
	"overall_accuracy": 0.0-1.0
	}}"""

	def __init__(self, config: Optional[CriticConfig] = None):
	"""
	Initialize Critic Agent.

	Args:
	config: Critic configuration
	"""
	self.config = config or CriticConfig()
	logger.info(f"CriticAgent initialized (model={self.config.model})")

	def validate(
	self,
	synthesis_result: SynthesisResult,
	sources: List[RankedResult],
	) -> CriticResult:
	"""
	Validate a synthesized answer.

	Args:
	synthesis_result: The generated answer with citations
	sources: Source chunks used for generation

	Returns:
	CriticResult with validation details
	"""
	issues = []
	hallucination_score = 0.0
	citation_accuracy = 1.0
	factual_consistency = 1.0

	# Skip validation for abstained answers
	if synthesis_result.abstained:
	return CriticResult(
	is_valid=True,
	confidence=1.0,
	issues=[],
	hallucination_score=0.0,
	citation_accuracy=1.0,
	factual_consistency=1.0,
	)

	# Check for hallucination
	if self.config.check_hallucination and HTTPX_AVAILABLE:
	h_score, h_issues = self._check_hallucination(
	synthesis_result.answer,
	sources,
	)
	hallucination_score = h_score
	issues.extend(h_issues)

	# Check citation accuracy
	if self.config.check_citations and synthesis_result.citations:
	c_accuracy, c_issues = self._check_citations(
	synthesis_result.answer,
	synthesis_result.citations,
	sources,
	)
	citation_accuracy = c_accuracy
	issues.extend(c_issues)

	# Check factual consistency
	if self.config.check_consistency:
	f_score, f_issues = self._check_consistency(
	synthesis_result.answer,
	sources,
	)
	factual_consistency = f_score
	issues.extend(f_issues)

	# Calculate overall confidence
	confidence = (
	0.4 * (1 - hallucination_score) +
	0.4 * citation_accuracy +
	0.2 * factual_consistency
	)

	# Determine if valid
	is_valid = (
	hallucination_score < self.config.hallucination_threshold and
	citation_accuracy >= self.config.citation_accuracy_threshold and
	confidence >= self.config.overall_confidence_threshold
	)

	# Generate revision suggestions if needed
	needs_revision = not is_valid and len(issues) > 0
	revision_suggestions = self._generate_revision_suggestions(issues) if needs_revision else []

	return CriticResult(
	is_valid=is_valid,
	confidence=confidence,
	issues=issues,
	hallucination_score=hallucination_score,
	citation_accuracy=citation_accuracy,
	factual_consistency=factual_consistency,
	needs_revision=needs_revision,
	revision_suggestions=revision_suggestions,
	)

	def _check_hallucination(
	self,
	answer: str,
	sources: List[RankedResult],
	) -> Tuple[float, List[ValidationIssue]]:
	"""Check for hallucination using LLM."""
	# Build source context
	source_text = self._format_sources(sources)

	prompt = self.HALLUCINATION_PROMPT.format(
	sources=source_text,
	answer=answer,
	)

	try:
	with httpx.Client(timeout=30.0) as client:
	response = client.post(
	f"{self.config.base_url}/api/generate",
	json={
	"model": self.config.model,
	"prompt": prompt,
	"stream": False,
	"options": {
	"temperature": self.config.temperature,
	"num_predict": 1024,
	},
	},
	)
	response.raise_for_status()
	result = response.json()

	# Parse response
	response_text = result.get("response", "")
	data = self._parse_json_response(response_text)

	hallucination_score = data.get("hallucination_score", 0.0)

	issues = []
	for claim in data.get("claims", []):
	if claim.get("status") == "UNSUPPORTED":
	issues.append(ValidationIssue(
	issue_type=IssueType.HALLUCINATION,
	severity=0.8,
	description=f"Unsupported claim: {claim.get('text', '')}",
	problematic_text=claim.get("text"),
	suggestion="Remove or find supporting source",
	))
	elif claim.get("status") == "PARTIALLY_SUPPORTED":
	issues.append(ValidationIssue(
	issue_type=IssueType.UNSUPPORTED_CLAIM,
	severity=0.4,
	description=f"Partially supported: {claim.get('text', '')}",
	problematic_text=claim.get("text"),
	suggestion="Verify claim against source",
	))

	return hallucination_score, issues

	except Exception as e:
	logger.warning(f"Hallucination check failed: {e}")
	# Fall back to heuristic check
	return self._heuristic_hallucination_check(answer, sources)

	def _heuristic_hallucination_check(
	self,
	answer: str,
	sources: List[RankedResult],
	) -> Tuple[float, List[ValidationIssue]]:
	"""Simple heuristic hallucination check."""
	# Combine all source text
	source_text = " ".join(s.text.lower() for s in sources)
	answer_lower = answer.lower()

	# Check for proper nouns/entities not in sources
	# Simple approach: look for capitalized words
	answer_words = set(re.findall(r'\b[A-Z][a-z]+\b', answer))
	source_words = set(re.findall(r'\b[A-Z][a-z]+\b', " ".join(s.text for s in sources)))

	unsupported_entities = answer_words - source_words
	# Filter out common words
	common_words = {"The", "This", "That", "However", "Therefore", "Additionally", "Based", "According"}
	unsupported_entities = unsupported_entities - common_words

	issues = []
	for entity in list(unsupported_entities)[:3]: # Limit issues
	issues.append(ValidationIssue(
	issue_type=IssueType.HALLUCINATION,
	severity=0.5,
	description=f"Entity '{entity}' not found in sources",
	problematic_text=entity,
	))

	# Calculate score based on unsupported entities
	if answer_words:
	score = len(unsupported_entities) / len(answer_words)
	else:
	score = 0.0

	return min(score, 1.0), issues

	def _check_citations(
	self,
	answer: str,
	citations: List[Citation],
	sources: List[RankedResult],
	) -> Tuple[float, List[ValidationIssue]]:
	"""Verify citation accuracy."""
	if not citations:
	# No citations when expected
	return 0.0, [ValidationIssue(
	issue_type=IssueType.UNSUPPORTED_CLAIM,
	severity=0.6,
	description="Answer contains no citations",
	suggestion="Add citations to support claims",
	)]

	# Build source context
	source_text = self._format_sources(sources)

	if HTTPX_AVAILABLE:
	try:
	prompt = self.CITATION_PROMPT.format(
	sources=source_text,
	answer=answer,
	)

	with httpx.Client(timeout=30.0) as client:
	response = client.post(
	f"{self.config.base_url}/api/generate",
	json={
	"model": self.config.model,
	"prompt": prompt,
	"stream": False,
	"options": {
	"temperature": self.config.temperature,
	"num_predict": 512,
	},
	},
	)
	response.raise_for_status()
	result = response.json()

	response_text = result.get("response", "")
	data = self._parse_json_response(response_text)

	accuracy = data.get("overall_accuracy", 1.0)

	issues = []
	for check in data.get("citation_checks", []):
	if not check.get("is_accurate", True):
	issues.append(ValidationIssue(
	issue_type=IssueType.INCORRECT_CITATION,
	severity=0.6,
	description=f"Citation [{check.get('citation_index')}]: {check.get('reason', 'Inaccurate')}",
	citation_index=check.get("citation_index"),
	suggestion="Verify citation matches source",
	))

	return accuracy, issues

	except Exception as e:
	logger.warning(f"Citation check failed: {e}")

	# Fallback: basic citation presence check
	citation_pattern = r'\[(\d+)\]'
	used_citations = set(int(m) for m in re.findall(citation_pattern, answer))

	if not used_citations:
	return 0.5, []

	# Check if citation indices are valid
	valid_indices = set(range(1, len(sources) + 1))
	invalid = used_citations - valid_indices

	issues = []
	for idx in invalid:
	issues.append(ValidationIssue(
	issue_type=IssueType.INCORRECT_CITATION,
	severity=0.7,
	description=f"Citation [{idx}] references non-existent source",
	citation_index=idx,
	))

	accuracy = 1.0 - (len(invalid) / len(used_citations)) if used_citations else 1.0
	return accuracy, issues

	def _check_consistency(
	self,
	answer: str,
	sources: List[RankedResult],
	) -> Tuple[float, List[ValidationIssue]]:
	"""Check for internal and external consistency."""
	issues = []

	# Check for contradictory statements (simplified)
	contradictions = self._detect_contradictions(answer)
	for contradiction in contradictions:
	issues.append(ValidationIssue(
	issue_type=IssueType.CONTRADICTION,
	severity=0.7,
	description=contradiction,
	))

	# Check for completeness (are key source points addressed?)
	# Simplified: just check answer isn't too short
	if len(answer) < 50 and len(sources) > 0:
	issues.append(ValidationIssue(
	issue_type=IssueType.INCOMPLETE,
	severity=0.4,
	description="Answer may be incomplete given available sources",
	suggestion="Expand answer to include more relevant information",
	))

	score = 1.0 - (0.2 * len(issues))
	return max(score, 0.0), issues

	def _detect_contradictions(self, text: str) -> List[str]:
	"""Simple contradiction detection."""
	contradictions = []

	# Look for negation patterns that might indicate contradiction
	sentences = text.split('.')
	for i, sent in enumerate(sentences):
	sent_lower = sent.lower()
	# Check for contradictory conjunctions
	if any(c in sent_lower for c in ["however", "but", "although"]):
	# This could be legitimate contrast, so low severity
	pass

	return contradictions

	def _format_sources(self, sources: List[RankedResult]) -> str:
	"""Format sources for prompt."""
	parts = []
	for i, source in enumerate(sources, 1):
	parts.append(f"[{i}] {source.text[:500]}")
	return "\n\n".join(parts)

	def _parse_json_response(self, text: str) -> Dict[str, Any]:
	"""Parse JSON from LLM response."""
	try:
	json_match = re.search(r'\{[\s\S]*\}', text)
	if json_match:
	return json.loads(json_match.group())
	except json.JSONDecodeError:
	pass
	return {}

	def _generate_revision_suggestions(
	self,
	issues: List[ValidationIssue],
	) -> List[str]:
	"""Generate actionable revision suggestions."""
	suggestions = []

	for issue in issues:
	if issue.suggestion:
	suggestions.append(issue.suggestion)
	elif issue.issue_type == IssueType.HALLUCINATION:
	suggestions.append(
	f"Remove or verify: {issue.problematic_text or 'unsupported claim'}"
	)
	elif issue.issue_type == IssueType.INCORRECT_CITATION:
	suggestions.append(
	f"Fix citation [{issue.citation_index}] to match source"
	)

	return list(set(suggestions))[:5] # Deduplicate and limit