Spaces:
Running
Running
Sahil al farib commited on
Commit Β·
7dc5288
1
Parent(s): d2524a5
feat: complete Phase 1-3 adoption features
Browse files- demo/app.py +17 -13
- facteval/__init__.py +7 -4
- facteval/__pycache__/__init__.cpython-312.pyc +0 -0
- facteval/__pycache__/calibrator.cpython-312.pyc +0 -0
- facteval/__pycache__/claim_extractor.cpython-312.pyc +0 -0
- facteval/__pycache__/cli.cpython-312.pyc +0 -0
- facteval/__pycache__/config.cpython-312.pyc +0 -0
- facteval/__pycache__/core.cpython-312.pyc +0 -0
- facteval/__pycache__/models.cpython-312.pyc +0 -0
- facteval/__pycache__/retriever.cpython-312.pyc +0 -0
- facteval/__pycache__/verifier.cpython-312.pyc +0 -0
- facteval/core.py +17 -4
demo/app.py
CHANGED
|
@@ -7,25 +7,21 @@ Run on Colab: Upload facteval/ folder, then run this file.
|
|
| 7 |
|
| 8 |
import json
|
| 9 |
import gradio as gr
|
| 10 |
-
from facteval import
|
| 11 |
|
| 12 |
EXAMPLES = [
|
| 13 |
[
|
| 14 |
-
"
|
| 15 |
-
"
|
| 16 |
],
|
| 17 |
[
|
| 18 |
-
"
|
| 19 |
-
"
|
| 20 |
],
|
| 21 |
[
|
| 22 |
-
"
|
| 23 |
-
"
|
| 24 |
-
]
|
| 25 |
-
[
|
| 26 |
-
"Albert Einstein developed the theory of relativity and won the Nobel Prize in Physics in 1921 for his work on the photoelectric effect.",
|
| 27 |
-
"Albert Einstein developed the theory of relativity. He won the Nobel Prize in Physics in 1921 for his explanation of the photoelectric effect.",
|
| 28 |
-
],
|
| 29 |
]
|
| 30 |
|
| 31 |
|
|
@@ -39,7 +35,7 @@ def run_check(answer: str, contexts: str, calibrator_path: str = ""):
|
|
| 39 |
return "β οΈ Please enter at least one context passage.", "", "", ""
|
| 40 |
|
| 41 |
cal_path = calibrator_path.strip() if calibrator_path.strip() else None
|
| 42 |
-
result =
|
| 43 |
|
| 44 |
# 1. Highlighted answer (the viral feature)
|
| 45 |
highlighted_html = f"""
|
|
@@ -159,11 +155,13 @@ with gr.Blocks(
|
|
| 159 |
answer_input = gr.Textbox(
|
| 160 |
label="LLM Answer",
|
| 161 |
placeholder="Enter the text to fact-check...",
|
|
|
|
| 162 |
lines=4,
|
| 163 |
)
|
| 164 |
context_input = gr.Textbox(
|
| 165 |
label="Reference Contexts (one per line)",
|
| 166 |
placeholder="Enter ground truth passages, one per line...",
|
|
|
|
| 167 |
lines=5,
|
| 168 |
)
|
| 169 |
calibrator_input = gr.Textbox(
|
|
@@ -198,5 +196,11 @@ with gr.Blocks(
|
|
| 198 |
label="Try these examples",
|
| 199 |
)
|
| 200 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
if __name__ == "__main__":
|
| 202 |
demo.launch(share=True)
|
|
|
|
| 7 |
|
| 8 |
import json
|
| 9 |
import gradio as gr
|
| 10 |
+
from facteval import analyze, fast_check
|
| 11 |
|
| 12 |
EXAMPLES = [
|
| 13 |
[
|
| 14 |
+
"Patient presents with acute appendicitis. Given 500mg Amoxicillin. Discharge scheduled for tomorrow.",
|
| 15 |
+
"Patient was diagnosed with acute appendicitis and underwent successful appendectomy. Post-operative care includes IV fluids and rest. No antibiotics were administered. Patient will remain under observation for 48 hours."
|
| 16 |
],
|
| 17 |
[
|
| 18 |
+
"Tesla's Q3 revenue reached $25 billion, a 40% year-over-year increase. The company delivered 500,000 vehicles in the quarter.",
|
| 19 |
+
"Tesla reported Q3 revenue of $23.35 billion, representing a 9% year-over-year increase. Vehicle deliveries for the quarter totaled 435,059."
|
| 20 |
],
|
| 21 |
[
|
| 22 |
+
"To start a React project, run `npm init react-app my-app` in your terminal. This will install React v17 by default.",
|
| 23 |
+
"To create a new React single-page application, the recommended command is `npx create-react-app my-app`. This installs the latest stable version of React, currently v18."
|
| 24 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
]
|
| 26 |
|
| 27 |
|
|
|
|
| 35 |
return "β οΈ Please enter at least one context passage.", "", "", ""
|
| 36 |
|
| 37 |
cal_path = calibrator_path.strip() if calibrator_path.strip() else None
|
| 38 |
+
result = analyze(answer, context_list, calibrator_path=cal_path)
|
| 39 |
|
| 40 |
# 1. Highlighted answer (the viral feature)
|
| 41 |
highlighted_html = f"""
|
|
|
|
| 155 |
answer_input = gr.Textbox(
|
| 156 |
label="LLM Answer",
|
| 157 |
placeholder="Enter the text to fact-check...",
|
| 158 |
+
value=EXAMPLES[0][0],
|
| 159 |
lines=4,
|
| 160 |
)
|
| 161 |
context_input = gr.Textbox(
|
| 162 |
label="Reference Contexts (one per line)",
|
| 163 |
placeholder="Enter ground truth passages, one per line...",
|
| 164 |
+
value=EXAMPLES[0][1],
|
| 165 |
lines=5,
|
| 166 |
)
|
| 167 |
calibrator_input = gr.Textbox(
|
|
|
|
| 196 |
label="Try these examples",
|
| 197 |
)
|
| 198 |
|
| 199 |
+
demo.load(
|
| 200 |
+
fn=run_check,
|
| 201 |
+
inputs=[answer_input, context_input, calibrator_input],
|
| 202 |
+
outputs=[highlighted_output, details_output, summary_output, json_output],
|
| 203 |
+
)
|
| 204 |
+
|
| 205 |
if __name__ == "__main__":
|
| 206 |
demo.launch(share=True)
|
facteval/__init__.py
CHANGED
|
@@ -61,14 +61,17 @@ suppress_stdout = suppress_loading_noise
|
|
| 61 |
|
| 62 |
|
| 63 |
# ββ Public API βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 64 |
-
from facteval.core import check, verify
|
| 65 |
from facteval.models import Claim, Evidence, ClaimWithEvidence
|
| 66 |
from facteval.verifier import FactLabel, VerificationResult
|
| 67 |
|
| 68 |
-
__version__ = "0.1.
|
| 69 |
__all__ = [
|
| 70 |
-
"
|
| 71 |
-
"
|
|
|
|
|
|
|
|
|
|
| 72 |
"Claim",
|
| 73 |
"Evidence",
|
| 74 |
"ClaimWithEvidence",
|
|
|
|
| 61 |
|
| 62 |
|
| 63 |
# ββ Public API βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 64 |
+
from facteval.core import analyze, fast_check, check, verify, evaluate
|
| 65 |
from facteval.models import Claim, Evidence, ClaimWithEvidence
|
| 66 |
from facteval.verifier import FactLabel, VerificationResult
|
| 67 |
|
| 68 |
+
__version__ = "0.1.1"
|
| 69 |
__all__ = [
|
| 70 |
+
"analyze",
|
| 71 |
+
"fast_check",
|
| 72 |
+
"evaluate", # Drop-in evaluator alias
|
| 73 |
+
"check", # Backcompat
|
| 74 |
+
"verify", # Backcompat
|
| 75 |
"Claim",
|
| 76 |
"Evidence",
|
| 77 |
"ClaimWithEvidence",
|
facteval/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (3.92 kB). View file
|
|
|
facteval/__pycache__/calibrator.cpython-312.pyc
ADDED
|
Binary file (4.36 kB). View file
|
|
|
facteval/__pycache__/claim_extractor.cpython-312.pyc
ADDED
|
Binary file (6.23 kB). View file
|
|
|
facteval/__pycache__/cli.cpython-312.pyc
ADDED
|
Binary file (5.34 kB). View file
|
|
|
facteval/__pycache__/config.cpython-312.pyc
ADDED
|
Binary file (861 Bytes). View file
|
|
|
facteval/__pycache__/core.cpython-312.pyc
ADDED
|
Binary file (12.2 kB). View file
|
|
|
facteval/__pycache__/models.cpython-312.pyc
ADDED
|
Binary file (2.53 kB). View file
|
|
|
facteval/__pycache__/retriever.cpython-312.pyc
ADDED
|
Binary file (6.87 kB). View file
|
|
|
facteval/__pycache__/verifier.cpython-312.pyc
ADDED
|
Binary file (10.7 kB). View file
|
|
|
facteval/core.py
CHANGED
|
@@ -71,7 +71,7 @@ def _get_calibrator(path: str | None = None) -> Calibrator:
|
|
| 71 |
|
| 72 |
# ββ Full pipeline ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 73 |
|
| 74 |
-
def
|
| 75 |
answer: str,
|
| 76 |
contexts: list[str],
|
| 77 |
top_k: int = 3,
|
|
@@ -115,7 +115,7 @@ def check(
|
|
| 115 |
|
| 116 |
# ββ Lightweight mode βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 117 |
|
| 118 |
-
def
|
| 119 |
claims: list[str],
|
| 120 |
contexts: list[str],
|
| 121 |
top_k: int = 3,
|
|
@@ -134,7 +134,7 @@ def verify(
|
|
| 134 |
calibrator_path: Path to a pre-fitted calibrator pickle file.
|
| 135 |
|
| 136 |
Returns:
|
| 137 |
-
Same output format as
|
| 138 |
"""
|
| 139 |
t0 = time.perf_counter()
|
| 140 |
|
|
@@ -190,9 +190,17 @@ def _run_pipeline(
|
|
| 190 |
for cd in claim_dicts:
|
| 191 |
cd["diagnostics"] = _diagnose(cd)
|
| 192 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
return {
|
| 194 |
"claims": claim_dicts,
|
| 195 |
-
"summary":
|
| 196 |
"highlighted_answer": _highlight_answer_semantic(
|
| 197 |
answer, claim_dicts, retriever.embedder
|
| 198 |
),
|
|
@@ -200,6 +208,11 @@ def _run_pipeline(
|
|
| 200 |
"pipeline_time_seconds": round(elapsed, 3),
|
| 201 |
}
|
| 202 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
|
| 204 |
# ββ Diagnostics ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 205 |
|
|
|
|
| 71 |
|
| 72 |
# ββ Full pipeline ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 73 |
|
| 74 |
+
def analyze(
|
| 75 |
answer: str,
|
| 76 |
contexts: list[str],
|
| 77 |
top_k: int = 3,
|
|
|
|
| 115 |
|
| 116 |
# ββ Lightweight mode βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 117 |
|
| 118 |
+
def fast_check(
|
| 119 |
claims: list[str],
|
| 120 |
contexts: list[str],
|
| 121 |
top_k: int = 3,
|
|
|
|
| 134 |
calibrator_path: Path to a pre-fitted calibrator pickle file.
|
| 135 |
|
| 136 |
Returns:
|
| 137 |
+
Same output format as analyze().
|
| 138 |
"""
|
| 139 |
t0 = time.perf_counter()
|
| 140 |
|
|
|
|
| 190 |
for cd in claim_dicts:
|
| 191 |
cd["diagnostics"] = _diagnose(cd)
|
| 192 |
|
| 193 |
+
summary = _build_summary(results)
|
| 194 |
+
|
| 195 |
+
# User feedback logging (feels alive)
|
| 196 |
+
hallucinations = summary.get("contradicted", 0)
|
| 197 |
+
supported = summary.get("supported", 0)
|
| 198 |
+
print(f"β Found {hallucinations} hallucination(s)")
|
| 199 |
+
print(f"β {supported} supported claim(s)")
|
| 200 |
+
|
| 201 |
return {
|
| 202 |
"claims": claim_dicts,
|
| 203 |
+
"summary": summary,
|
| 204 |
"highlighted_answer": _highlight_answer_semantic(
|
| 205 |
answer, claim_dicts, retriever.embedder
|
| 206 |
),
|
|
|
|
| 208 |
"pipeline_time_seconds": round(elapsed, 3),
|
| 209 |
}
|
| 210 |
|
| 211 |
+
# Backward compatibility aliases
|
| 212 |
+
check = analyze
|
| 213 |
+
verify = fast_check
|
| 214 |
+
evaluate = analyze
|
| 215 |
+
|
| 216 |
|
| 217 |
# ββ Diagnostics ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 218 |
|