|
|
| import spacy
|
| from spacy.tokens import Doc, Span
|
| from typing import List, Tuple
|
| import data_models
|
|
|
| from rich.console import Console
|
| import re
|
|
|
| console = Console()
|
|
|
|
|
| CITATION_PHRASES = {
|
| "according to", "study shows", "research indicates", "data suggests",
|
| "experts say", "report finds", "source:", "evidence shows", "demonstrates that",
|
| "reported by", "stated by", "cited in"
|
| }
|
| URL_REGEX = r"(?:https?://|www\.)[^\s/$.?#].[^\s]*"
|
|
|
|
|
| def has_potential_evidence_indicator(sent: Span) -> Tuple[bool, str, str]:
|
| sent_text = sent.text; sent_text_lower = sent.text.lower()
|
| if not sent_text.strip(): return False, "", ""
|
| urls = re.findall(URL_REGEX, sent_text)
|
| if urls: return True, "URL", urls[0]
|
| if re.search(r"\b\d{3,}\b", sent_text) or '%' in sent_text or re.search(r"\b\d+(?:\.\d+)?\b", sent_text):
|
| match = re.search(r"\b\d+(?:\.\d+)?%?\b", sent_text)
|
| trigger_text = match.group(0) if match else "Number/Percentage"
|
| return True, "Numerical Data", trigger_text
|
| for phrase in CITATION_PHRASES:
|
| if f" {phrase} " in f" {sent_text_lower} " or sent_text_lower.startswith(f"{phrase} "):
|
| return True, "Citation Phrase", phrase
|
| return False, "", ""
|
|
|
|
|
| def simplified_evidence_analyzer(
|
| doc: Doc,
|
| argument_components: List[data_models.ArgumentComponent]
|
|
|
| ) -> List[data_models.Finding]:
|
| """
|
| Tespit edilen iddiaları (Claim) inceler ve SADECE kendi cümlelerinde
|
| basit kanıt göstergeleri olup olmadığını kontrol eder (V1 Tarzı Basit).
|
| """
|
| findings = []
|
| claims_data = [(idx, comp) for idx, comp in enumerate(argument_components) if comp.component_type == "Claim"]
|
| sentences = list(doc.sents)
|
| num_sentences = len(sentences)
|
|
|
| if not claims_data:
|
| console.print(" -> No claims found to analyze for evidence.", style="dim"); return findings
|
|
|
| console.print(f" -> Analyzing {len(claims_data)} claims for evidence indicators (Simplified: Same sentence only)...", style="dim")
|
|
|
| for claim_comp_idx, claim in claims_data:
|
| claim_sentence_idx = claim.sentence_index
|
| claim_text_snippet = claim.text[:100] + "..."
|
|
|
| if not (0 <= claim_sentence_idx < num_sentences):
|
| console.print(f"[yellow]Warn: Invalid sentence index {claim_sentence_idx} for claim comp_idx {claim_comp_idx}, skipping.[/yellow]"); continue
|
|
|
| claim_sentence_span = sentences[claim_sentence_idx]
|
|
|
|
|
| has_indicator, indicator_type, indicator_text = has_potential_evidence_indicator(claim_sentence_span)
|
|
|
| if has_indicator:
|
|
|
| findings.append(data_models.Finding(
|
| finding_type="EvidenceIndicator",
|
| description=f"Potential evidence indicator ('{indicator_type}') found in the same sentence as the claim.",
|
| severity="Info",
|
| span_start=claim_sentence_span.start_char,
|
| span_end=claim_sentence_span.end_char,
|
| details={
|
| "indicator_type": indicator_type, "indicator_trigger": indicator_text,
|
| "location": "same_sentence", "linked_claim_index": claim_comp_idx,
|
| "claim_text": claim_text_snippet
|
| }
|
| ))
|
| else:
|
|
|
| findings.append(data_models.Finding(
|
| finding_type="EvidenceStatus",
|
| description="Claim lacks explicit evidence indicator in the same sentence.",
|
| severity="Medium",
|
| span_start=claim_sentence_span.start_char,
|
| span_end=claim_sentence_span.end_char,
|
| details={"claim_text": claim_text_snippet}
|
| ))
|
|
|
| console.print(f" -> Simplified Evidence Analyzer generated {len(findings)} findings.", style="dim")
|
| return findings |