Spaces:

Vincsipe
/

paperhawk

Running

File size: 3,269 Bytes

7ff7119

"""quote_validator_node — anti-hallucination layer #7.

Validates the LLM-provided ``_quotes`` field against the source ``full_text``.
If a quote does not appear in the source (after normalization: whitespace +
diacritics + case folding), the LLM hallucinated → confidence is downgraded
to "low" and a risk is logged.

Original prototype-agentic system did not have this check; we add it here
as an explicit node.
"""

from __future__ import annotations

from graph.states.pipeline_state import ProcessedDocument, Risk
from validation.quote_validator import downgrade_confidence, validate_quotes


async def quote_validator_node(state: dict) -> dict:
    """Walk the documents list and validate each doc's _quotes field.

    Returns ``{"documents": [pd_updated], "risks": [risk_for_invalid]}``.
    The merge_doc_results and merge_risks reducers upsert into the parent state.

    NB: this node runs in the parent pipeline_graph, NOT inside extract_subgraph
    (after the Send fan-in, so we see all docs' extracted data together).
    """
    documents: list[ProcessedDocument] = state.get("documents") or []
    if not documents:
        return {}

    updated_docs: list[ProcessedDocument] = []
    new_risks: list[Risk] = []

    for pd in documents:
        if pd.extracted is None or pd.ingested is None:
            updated_docs.append(pd)
            continue

        full_text = pd.ingested.full_text or ""
        valid, invalid = validate_quotes(pd.extracted.raw, full_text)

        if invalid:
            # Downgrade confidence on invalid quotes
            new_raw = downgrade_confidence(dict(pd.extracted.raw), invalid)
            new_extracted = pd.extracted.model_copy(update={
                "raw": new_raw,
                "confidence": new_raw.get("_confidence", {}),
            })
            updated_docs.append(pd.model_copy(update={"extracted": new_extracted}))

            # Only emit a "low" severity flag if the proportion of invalid quotes
            # is significant (>= 50%). Stochastic LLM paraphrasing alone does
            # not warrant a flag.
            valid, _ = validate_quotes(pd.extracted.raw, full_text)
            total = len(invalid) + len(valid)
            invalid_ratio = len(invalid) / max(1, total)
            if invalid_ratio >= 0.5:
                new_risks.append(Risk(
                    description=(
                        f"{pd.ingested.file_name}: {len(invalid)}/{total} quote(s) not found "
                        "in the source document (suspected LLM hallucination)."
                    ),
                    severity="low",
                    rationale=(
                        "The schema-level ``_quotes`` field contains text that does not appear "
                        "in the normalized full_text. Affected fields' confidence has been "
                        "downgraded to 'low'."
                    ),
                    kind="validation",
                    affected_document=pd.ingested.file_name,
                    source_check_id="quote_validator",
                ))
        else:
            updated_docs.append(pd)

    out: dict = {}
    if updated_docs:
        out["documents"] = updated_docs
    if new_risks:
        out["risks"] = new_risks
    return out