Spaces:

sahilfarib
/

FactEval

Running

App Files Files Community

Sahil al farib commited on 2 days ago

Commit

7dc5288

1 Parent(s): d2524a5

feat: complete Phase 1-3 adoption features

Browse files

Files changed (12) hide show

demo/app.py +17 -13
facteval/__init__.py +7 -4
facteval/__pycache__/__init__.cpython-312.pyc +0 -0
facteval/__pycache__/calibrator.cpython-312.pyc +0 -0
facteval/__pycache__/claim_extractor.cpython-312.pyc +0 -0
facteval/__pycache__/cli.cpython-312.pyc +0 -0
facteval/__pycache__/config.cpython-312.pyc +0 -0
facteval/__pycache__/core.cpython-312.pyc +0 -0
facteval/__pycache__/models.cpython-312.pyc +0 -0
facteval/__pycache__/retriever.cpython-312.pyc +0 -0
facteval/__pycache__/verifier.cpython-312.pyc +0 -0
facteval/core.py +17 -4

demo/app.py CHANGED Viewed

@@ -7,25 +7,21 @@ Run on Colab: Upload facteval/ folder, then run this file.
 import json
 import gradio as gr
-from facteval import check, verify
 EXAMPLES = [
     [
-        "Paris is the capital of Germany and has 5 million people.",
-        "Paris is the capital of France. Paris has approximately 2.2 million inhabitants.\nGermany's capital is Berlin.",
     ],
     [
-        "Python was created by Guido van Rossum and first released in 2005.",
-        "Python was created by Guido van Rossum and first released in 1991.",
     ],
     [
-        "The Amazon rainforest produces 20% of the world's oxygen and spans across nine countries.",
-        "The Amazon rainforest produces about 6% of the world's oxygen.\nThe Amazon rainforest spans across nine countries in South America.",
-    ],
-    [
-        "Albert Einstein developed the theory of relativity and won the Nobel Prize in Physics in 1921 for his work on the photoelectric effect.",
-        "Albert Einstein developed the theory of relativity. He won the Nobel Prize in Physics in 1921 for his explanation of the photoelectric effect.",
-    ],
 ]
@@ -39,7 +35,7 @@ def run_check(answer: str, contexts: str, calibrator_path: str = ""):
         return "⚠️ Please enter at least one context passage.", "", "", ""
     cal_path = calibrator_path.strip() if calibrator_path.strip() else None
-    result = check(answer, context_list, calibrator_path=cal_path)
     # 1. Highlighted answer (the viral feature)
     highlighted_html = f"""
@@ -159,11 +155,13 @@ with gr.Blocks(
             answer_input = gr.Textbox(
                 label="LLM Answer",
                 placeholder="Enter the text to fact-check...",
                 lines=4,
             )
             context_input = gr.Textbox(
                 label="Reference Contexts (one per line)",
                 placeholder="Enter ground truth passages, one per line...",
                 lines=5,
             )
             calibrator_input = gr.Textbox(
@@ -198,5 +196,11 @@ with gr.Blocks(
         label="Try these examples",
     )
 if __name__ == "__main__":
     demo.launch(share=True)

 import json
 import gradio as gr
+from facteval import analyze, fast_check
 EXAMPLES = [
     [
+        "Patient presents with acute appendicitis. Given 500mg Amoxicillin. Discharge scheduled for tomorrow.",
+        "Patient was diagnosed with acute appendicitis and underwent successful appendectomy. Post-operative care includes IV fluids and rest. No antibiotics were administered. Patient will remain under observation for 48 hours."
     ],
     [
+        "Tesla's Q3 revenue reached $25 billion, a 40% year-over-year increase. The company delivered 500,000 vehicles in the quarter.",
+        "Tesla reported Q3 revenue of $23.35 billion, representing a 9% year-over-year increase. Vehicle deliveries for the quarter totaled 435,059."
     ],
     [
+        "To start a React project, run `npm init react-app my-app` in your terminal. This will install React v17 by default.",
+        "To create a new React single-page application, the recommended command is `npx create-react-app my-app`. This installs the latest stable version of React, currently v18."
+    ]
 ]
         return "⚠️ Please enter at least one context passage.", "", "", ""
     cal_path = calibrator_path.strip() if calibrator_path.strip() else None
+    result = analyze(answer, context_list, calibrator_path=cal_path)
     # 1. Highlighted answer (the viral feature)
     highlighted_html = f"""
             answer_input = gr.Textbox(
                 label="LLM Answer",
                 placeholder="Enter the text to fact-check...",
+                value=EXAMPLES[0][0],
                 lines=4,
             )
             context_input = gr.Textbox(
                 label="Reference Contexts (one per line)",
                 placeholder="Enter ground truth passages, one per line...",
+                value=EXAMPLES[0][1],
                 lines=5,
             )
             calibrator_input = gr.Textbox(
         label="Try these examples",
     )
+    demo.load(
+        fn=run_check,
+        inputs=[answer_input, context_input, calibrator_input],
+        outputs=[highlighted_output, details_output, summary_output, json_output],
+    )
 if __name__ == "__main__":
     demo.launch(share=True)

facteval/__init__.py CHANGED Viewed

@@ -61,14 +61,17 @@ suppress_stdout = suppress_loading_noise
 # ── Public API ───────────────────────────────────────────────────────────────
-from facteval.core import check, verify
 from facteval.models import Claim, Evidence, ClaimWithEvidence
 from facteval.verifier import FactLabel, VerificationResult
-__version__ = "0.1.0"
 __all__ = [
-    "check",
-    "verify",
     "Claim",
     "Evidence",
     "ClaimWithEvidence",

 # ── Public API ───────────────────────────────────────────────────────────────
+from facteval.core import analyze, fast_check, check, verify, evaluate
 from facteval.models import Claim, Evidence, ClaimWithEvidence
 from facteval.verifier import FactLabel, VerificationResult
+__version__ = "0.1.1"
 __all__ = [
+    "analyze",
+    "fast_check",
+    "evaluate",  # Drop-in evaluator alias
+    "check",     # Backcompat
+    "verify",    # Backcompat
     "Claim",
     "Evidence",
     "ClaimWithEvidence",

facteval/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (3.92 kB). View file

facteval/__pycache__/calibrator.cpython-312.pyc ADDED Viewed

Binary file (4.36 kB). View file

facteval/__pycache__/claim_extractor.cpython-312.pyc ADDED Viewed

Binary file (6.23 kB). View file

facteval/__pycache__/cli.cpython-312.pyc ADDED Viewed

Binary file (5.34 kB). View file

facteval/__pycache__/config.cpython-312.pyc ADDED Viewed

Binary file (861 Bytes). View file

facteval/__pycache__/core.cpython-312.pyc ADDED Viewed

Binary file (12.2 kB). View file

facteval/__pycache__/models.cpython-312.pyc ADDED Viewed

Binary file (2.53 kB). View file

facteval/__pycache__/retriever.cpython-312.pyc ADDED Viewed

Binary file (6.87 kB). View file

facteval/__pycache__/verifier.cpython-312.pyc ADDED Viewed

Binary file (10.7 kB). View file

facteval/core.py CHANGED Viewed

@@ -71,7 +71,7 @@ def _get_calibrator(path: str | None = None) -> Calibrator:
 # ── Full pipeline ────────────────────────────────────────────────────────────
-def check(
     answer: str,
     contexts: list[str],
     top_k: int = 3,
@@ -115,7 +115,7 @@ def check(
 # ── Lightweight mode ─────────────────────────────────────────────────────────
-def verify(
     claims: list[str],
     contexts: list[str],
     top_k: int = 3,
@@ -134,7 +134,7 @@ def verify(
         calibrator_path: Path to a pre-fitted calibrator pickle file.
     Returns:
-        Same output format as check().
     """
     t0 = time.perf_counter()
@@ -190,9 +190,17 @@ def _run_pipeline(
     for cd in claim_dicts:
         cd["diagnostics"] = _diagnose(cd)
     return {
         "claims": claim_dicts,
-        "summary": _build_summary(results),
         "highlighted_answer": _highlight_answer_semantic(
             answer, claim_dicts, retriever.embedder
         ),
@@ -200,6 +208,11 @@ def _run_pipeline(
         "pipeline_time_seconds": round(elapsed, 3),
     }
 # ── Diagnostics ──────────────────────────────────────────────────────────────

 # ── Full pipeline ────────────────────────────────────────────────────────────
+def analyze(
     answer: str,
     contexts: list[str],
     top_k: int = 3,
 # ── Lightweight mode ─────────────────────────────────────────────────────────
+def fast_check(
     claims: list[str],
     contexts: list[str],
     top_k: int = 3,
         calibrator_path: Path to a pre-fitted calibrator pickle file.
     Returns:
+        Same output format as analyze().
     """
     t0 = time.perf_counter()
     for cd in claim_dicts:
         cd["diagnostics"] = _diagnose(cd)
+    summary = _build_summary(results)
+    # User feedback logging (feels alive)
+    hallucinations = summary.get("contradicted", 0)
+    supported = summary.get("supported", 0)
+    print(f"✔ Found {hallucinations} hallucination(s)")
+    print(f"✔ {supported} supported claim(s)")
     return {
         "claims": claim_dicts,
+        "summary": summary,
         "highlighted_answer": _highlight_answer_semantic(
             answer, claim_dicts, retriever.embedder
         ),
         "pipeline_time_seconds": round(elapsed, 3),
     }
+# Backward compatibility aliases
+check = analyze
+verify = fast_check
+evaluate = analyze
 # ── Diagnostics ──────────────────────────────────────────────────────────────