TruthLens / app.py
DevPatel0611's picture
Clean build with correct gitignore
86b932c
import os
import sys
import json
import time
import pandas as pd
import numpy as np
import streamlit as st
_ROOT = os.path.dirname(os.path.abspath(__file__))
if _ROOT not in sys.path:
sys.path.insert(0, _ROOT)
# ── Page config ──────────────────────────────────────────────────────────────
st.set_page_config(
page_title="TruthLens Β· Fake News Detector",
page_icon="πŸ”",
layout="wide",
initial_sidebar_state="collapsed",
)
# ── Global CSS ───────────────────────────────────────────────────────────────
st.markdown("""
<style>
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&display=swap');
/* ── Reset ── */
html, body, [data-testid="stAppViewContainer"] {
font-family: 'Inter', sans-serif;
background: #f4f6fb;
color: #1e293b;
}
[data-testid="stMain"] { background: #f4f6fb; }
.block-container {
padding-top: 2.5rem !important;
padding-bottom: 2rem !important;
max-width: 920px;
}
/* ── Remove Streamlit chrome ── */
header[data-testid="stHeader"] { display: none; }
footer { display: none; }
#MainMenu { display: none; }
[data-testid="stSidebar"] { display: none; }
/* ── Predict button ── */
.stButton > button[kind="primary"] {
background: linear-gradient(135deg, #3b82f6 0%, #6366f1 100%) !important;
color: #fff !important;
border: none !important;
border-radius: 12px !important;
font-weight: 700 !important;
font-size: 1.05rem !important;
letter-spacing: 0.02em;
padding: 0.75rem 2rem !important;
transition: transform 0.15s, box-shadow 0.2s;
box-shadow: 0 4px 16px rgba(59,130,246,0.2);
}
.stButton > button[kind="primary"]:hover {
transform: translateY(-1px);
box-shadow: 0 6px 24px rgba(59,130,246,0.3) !important;
}
/* ── Tab styling ── */
[data-testid="stTabs"] button {
color: #94a3b8 !important;
font-size: 0.92rem !important;
font-weight: 500 !important;
padding: 10px 20px !important;
}
[data-testid="stTabs"] button[aria-selected="true"] {
color: #1e293b !important;
border-bottom: 2px solid #3b82f6 !important;
font-weight: 600 !important;
}
/* ── Verdict banner ── */
.verdict-box {
border-radius: 16px;
padding: 32px 36px;
margin-bottom: 28px;
display: flex;
align-items: center;
gap: 24px;
animation: fadeSlide 0.5s ease;
}
@keyframes fadeSlide {
from { opacity: 0; transform: translateY(-16px); }
to { opacity: 1; transform: translateY(0); }
}
.verdict-emoji { font-size: 3.5rem; line-height: 1; }
.verdict-label { font-size: 1.8rem; font-weight: 800; letter-spacing: -0.03em; }
.verdict-conf { font-size: 1rem; opacity: 0.85; margin-top: 6px; font-weight: 400; }
.verdict-explain { font-size: 0.88rem; color: #64748b; margin-top: 6px; line-height: 1.5; }
/* ── Info cards ── */
.info-card {
background: #ffffff;
border: 1px solid #e2e8f0;
border-radius: 12px;
padding: 20px 24px;
margin: 12px 0;
line-height: 1.6;
color: #475569;
}
.info-card b { color: #1e293b; }
/* ── Freshness bar ── */
.fresh-track { background: #e2e8f0; border-radius: 8px; height: 12px; margin: 10px 0 6px; overflow: hidden; }
.fresh-fill { height: 100%; border-radius: 8px; transition: width 0.8s ease; }
/* ── Source card ── */
.source-card {
background: #ffffff;
border: 1px solid #e2e8f0;
border-radius: 12px;
padding: 18px 22px;
margin: 10px 0;
display: flex;
justify-content: space-between;
align-items: flex-start;
gap: 16px;
}
.source-text { flex: 1; font-size: 0.88rem; line-height: 1.5; color: #475569; }
.source-score { text-align: center; min-width: 60px; }
.source-score-val { font-size: 1.4rem; font-weight: 700; font-family: 'Inter', sans-serif; }
.source-score-tag { font-size: 0.65rem; text-transform: uppercase; letter-spacing: 0.1em; margin-top: 4px; }
/* ── Hero ── */
.hero-wrap { text-align: center; padding: 60px 20px 40px; }
.hero-icon { font-size: 4rem; margin-bottom: 16px; }
.hero-title { font-size: 2.4rem; font-weight: 800; letter-spacing: -0.04em; color: #0f172a; }
.hero-sub { font-size: 1.05rem; color: #64748b; margin-top: 12px; line-height: 1.6; max-width: 520px; margin-left: auto; margin-right: auto; }
/* ── How-it-works ── */
.how-grid { display: grid; grid-template-columns: repeat(3, 1fr); gap: 16px; margin: 36px 0; }
.how-card {
background: #ffffff;
border: 1px solid #e2e8f0;
border-radius: 12px;
padding: 24px;
text-align: center;
box-shadow: 0 1px 3px rgba(0,0,0,0.04);
}
.how-num { font-size: 2rem; margin-bottom: 8px; }
.how-title { font-size: 0.95rem; font-weight: 600; margin-bottom: 6px; color: #0f172a; }
.how-desc { font-size: 0.82rem; color: #64748b; line-height: 1.5; }
/* ── Verdict legend ── */
.legend-row {
display: flex;
gap: 24px;
justify-content: center;
flex-wrap: wrap;
margin: 20px 0;
}
.legend-item { font-size: 0.85rem; color: #64748b; }
/* ── Metric overrides ── */
[data-testid="stMetric"] {
background: #ffffff;
border: 1px solid #e2e8f0;
border-radius: 10px;
padding: 14px 18px !important;
box-shadow: 0 1px 3px rgba(0,0,0,0.04);
}
[data-testid="stMetricLabel"] { color: #64748b !important; font-size: 0.78rem !important; }
[data-testid="stMetricValue"] { color: #0f172a !important; font-size: 1.3rem !important; }
/* ── Expander ── */
[data-testid="stExpander"] {
background: #ffffff !important;
border: 1px solid #e2e8f0 !important;
border-radius: 10px !important;
}
/* ── Text inputs ── */
[data-testid="stTextInput"] input, [data-testid="stTextArea"] textarea {
background: #ffffff !important;
border: 1px solid #cbd5e1 !important;
border-radius: 8px !important;
color: #1e293b !important;
}
[data-testid="stTextInput"] input:focus, [data-testid="stTextArea"] textarea:focus {
border-color: #3b82f6 !important;
box-shadow: 0 0 0 2px rgba(59,130,246,0.15) !important;
}
/* ── Select slider / radio ── */
[data-testid="stSlider"] label, .stRadio label { color: #475569 !important; }
/* ── Progress bar ── */
[data-testid="stProgress"] > div > div > div > div { background: linear-gradient(90deg, #3b82f6, #6366f1) !important; }
</style>
""", unsafe_allow_html=True)
# ── Cached inference loader ──────────────────────────────────────────────────
@st.cache_resource(show_spinner=False)
def load_pipeline():
from src.stage4_inference import predict_article, ModelNotTrainedError
return predict_article, ModelNotTrainedError
# ── Session state ────────────────────────────────────────────────────────────
for k, v in [("analyzed", False), ("last_result", None), ("last_input", "")]:
if k not in st.session_state:
st.session_state[k] = v
# =============================================================================
# LANDING PAGE (shown before any analysis)
# =============================================================================
if not st.session_state["analyzed"]:
# ── Hero section ──
st.markdown("""
<div class="hero-wrap">
<div class="hero-icon">πŸ”</div>
<div class="hero-title">TruthLens</div>
<div class="hero-sub">
Paste any news article or drop a URL β€” our AI will tell you
if it's real, fake, or outdated in seconds.
</div>
</div>
""", unsafe_allow_html=True)
# ── How it works ──
st.markdown("""
<div class="how-grid">
<div class="how-card">
<div class="how-num">πŸ“‹</div>
<div class="how-title">Paste or Link</div>
<div class="how-desc">Drop in the article text or a URL. We'll extract everything automatically.</div>
</div>
<div class="how-card">
<div class="how-num">⚑</div>
<div class="how-title">Instant Analysis</div>
<div class="how-desc">Our AI analyzes language patterns, checks freshness, and searches live sources.</div>
</div>
<div class="how-card">
<div class="how-num">βœ…</div>
<div class="how-title">Get Your Verdict</div>
<div class="how-desc">See a clear REAL / FAKE / OUTDATED verdict with a confidence score and explanation.</div>
</div>
</div>
""", unsafe_allow_html=True)
# ── Input area ──
input_tab = st.radio("How would you like to provide the article?",
["✍️ Write or paste text", "πŸ”— Paste a URL"],
horizontal=True, label_visibility="visible")
input_text, input_title, input_url, input_date, input_domain = "", "", "", "", ""
if input_tab == "✍️ Write or paste text":
input_title = st.text_input("Headline (optional)",
placeholder="e.g. Breaking: Scientists discover high-speed interstellar travel")
input_text = st.text_area("Article content",
height=180,
placeholder="Paste the full article body here…")
# ── Auto-extract title from pasted text if headline field is empty ──
if not input_title.strip() and input_text.strip():
if input_text.lower().startswith("title:"):
lines = input_text.split("\n", 1)
input_title = lines[0].replace("Title:", "").replace("title:", "").strip()
input_text = lines[1].replace("Body:", "").replace("body:", "").strip() if len(lines) > 1 else ""
else:
# Fallback: first sentence is title
input_title = input_text.split(".")[0].strip()
else:
input_url = st.text_input("Article URL",
placeholder="https://www.example.com/news/breaking-story")
st.caption("We'll automatically extract the title, body, and publish date.")
# ── Analysis mode (kept minimal β€” user doesn't need to understand internals)
speed = st.select_slider("Analysis depth",
options=["Quick", "Standard", "Deep"],
value="Deep",
help="Quick β‰ˆ 2 sec Β· Standard β‰ˆ 10 sec Β· Deep β‰ˆ 30 sec (most accurate)")
speed_map = {"Quick": "fast", "Standard": "balanced", "Deep": "full"}
selected_mode = speed_map[speed]
# ── Predict button ──
predict_clicked = st.button("πŸ” Check this article", use_container_width=True, type="primary")
# ── Verdict legend ──
st.markdown("""
<div class="legend-row">
<div class="legend-item">🟒 Verified True</div>
<div class="legend-item">πŸ”΄ Likely Fake</div>
<div class="legend-item">🟑 Outdated</div>
<div class="legend-item">🟠 Needs Review</div>
</div>
""", unsafe_allow_html=True)
# ── Execute prediction ──
if predict_clicked:
# Validate
if input_tab == "✍️ Write or paste text":
if not input_text or len(input_text.split()) < 10:
st.warning("⚠️ Please paste at least a few sentences so we can analyze it properly.")
st.stop()
else:
if not input_url:
st.warning("⚠️ Please enter a URL first.")
st.stop()
try:
import newspaper
from urllib.parse import urlparse
art = newspaper.Article(input_url)
art.download()
art.parse()
input_title = art.title or ""
input_text = art.text or ""
input_date = art.publish_date.isoformat() if art.publish_date else ""
input_domain = urlparse(input_url).netloc
if len(input_text.split()) < 10:
st.warning("⚠️ Couldn't extract enough text from that URL. Try pasting the article directly.")
st.stop()
except Exception:
st.error("❌ Couldn't fetch that URL. Please check the link or paste the text directly.")
st.stop()
predict_article, ModelNotTrainedError = load_pipeline()
with st.status("πŸ” Analyzing article…", expanded=True) as status:
st.write("πŸ“– Reading article…")
time.sleep(0.3)
st.write("🧠 Running AI analysis…")
try:
result = predict_article(
title=input_title,
text=input_text,
source_domain=input_domain,
published_date=input_date,
mode=selected_mode,
)
st.write("πŸ• Checking article freshness…")
st.write("🌐 Searching live sources…")
status.update(label="βœ… Done!", state="complete")
st.session_state["last_result"] = result
st.session_state["last_input"] = input_text
st.session_state["analyzed"] = True
st.rerun()
except ModelNotTrainedError:
status.update(label="❌ Setup required", state="error")
st.error("The AI models haven't been trained yet.")
st.info("Ask your administrator to run: `python run_pipeline.py --stage 1 2 3`")
st.stop()
except Exception as e:
status.update(label="❌ Error", state="error")
st.error(f"Something went wrong: {e}")
st.stop()
# =============================================================================
# RESULTS PAGE (shown after analysis)
# =============================================================================
else:
res = st.session_state["last_result"]
verdict = res.get("verdict", "UNKNOWN")
final_score = res.get("final_score", 0.0)
scores = res.get("scores", {})
confidence = res.get("confidence", "MEDIUM")
action = res.get("recommended_action", "Flag for review")
top_reasons = res.get("top_reasons", [])
missing_signals = res.get("missing_signals", [])
adv_flags = res.get("adversarial_flags", [])
wc = res.get("word_count", 0)
probas = res.get("base_model_probas", {})
votes = res.get("base_model_votes", {})
fresh_case = res.get("freshness_case", "B")
fresh_signals = res.get("freshness_signals_found", [])
deductions = res.get("deductions_applied", [])
entities = res.get("entities_found", [])
# ── Map verdict to display ──
V = {
"TRUE": {"bg":"#f0fdf4", "bdr":"#86efac", "icon":"🟒", "label":"This appears to be true", "color":"#15803d",
"explain":"Source, claims, language, and AI models all align with credible journalism."},
"UNCERTAIN": {"bg":"#fff7ed", "bdr":"#fdba74", "icon":"🟠", "label":"Uncertain β€” needs review", "color":"#c2410c",
"explain":"Mixed signals detected. We recommend verifying the sources yourself before sharing."},
"LIKELY FALSE": {"bg":"#fef2f2", "bdr":"#fca5a5", "icon":"πŸ”΄", "label":"Likely false", "color":"#b91c1c",
"explain":"Multiple signals indicate this content may be fabricated or misleading."},
"FALSE": {"bg":"#fef2f2", "bdr":"#fca5a5", "icon":"β›”", "label":"This looks fake", "color":"#991b1b",
"explain":"Strong evidence of misinformation. Do not share without independent verification."},
}
vc = V.get(verdict, {"bg":"#f8fafc","bdr":"#cbd5e1","icon":"βšͺ","label":verdict,"color":"#475569",
"explain":"Analysis complete."})
# ── Verdict banner ──
score_pct = final_score * 100
st.markdown(f"""
<div class="verdict-box" style="background:{vc['bg']}; border:1px solid {vc['bdr']};">
<div class="verdict-emoji">{vc['icon']}</div>
<div>
<div class="verdict-label" style="color:{vc['color']};">{vc['label']}</div>
<div class="verdict-conf" style="color:{vc['color']};">Score: {score_pct:.0f}% Β· Confidence: {confidence}</div>
<div class="verdict-explain">{vc['explain']}</div>
</div>
</div>
""", unsafe_allow_html=True)
# ── Recommended action badge ──
action_colors = {
"Publish": ("#f0fdf4", "#15803d"),
"Flag for review": ("#fff7ed", "#c2410c"),
"Suppress": ("#fef2f2", "#b91c1c"),
"Escalate": ("#fef2f2", "#991b1b"),
}
abg, acol = action_colors.get(action, ("#f8fafc", "#475569"))
st.markdown(f"""
<div style="background:{abg}; border-radius:8px; padding:10px 16px; display:inline-block; margin-bottom:24px;">
<span style="font-weight:600; color:{acol};">Recommended: {action}</span>
</div>
""", unsafe_allow_html=True)
# ── Tabs ──
tab_why, tab_fresh, tab_sources, tab_details = st.tabs(
["🧠 Why this verdict?", "πŸ• Freshness", "🌐 Live sources", "πŸ“‹ Details"]
)
# ── TAB 1: Why this verdict ──────────────────────────────────────────
with tab_why:
# ── 5-Signal Score Breakdown ──
st.markdown("#### Signal Breakdown")
SIGNAL_INFO = [
("Source", "source", "Is the outlet known and accountable?"),
("Claims", "claim", "Are facts verifiable with named entities?"),
("Language", "linguistic", "Is the writing neutral and attributed?"),
("Freshness", "freshness", "How recent is the content?"),
("AI Models", "model_vote", "What do the AI models think?"),
]
WEIGHTS = {"source": "30%", "claim": "30%", "linguistic": "20%", "freshness": "10%", "model_vote": "10%"}
cols = st.columns(5)
for i, (label, key, desc) in enumerate(SIGNAL_INFO):
val = scores.get(key, 0.0)
pct = val * 100
if pct >= 70:
col_hex = "#15803d"
elif pct >= 50:
col_hex = "#ca8a04"
else:
col_hex = "#b91c1c"
with cols[i]:
st.markdown(f"""
<div style="text-align:center; background:#ffffff; border:1px solid #e2e8f0;
border-radius:10px; padding:16px 8px; box-shadow:0 1px 3px rgba(0,0,0,0.04);">
<div style="font-size:1.6rem; font-weight:800; color:{col_hex};">{pct:.0f}%</div>
<div style="font-size:0.85rem; font-weight:600; color:#0f172a; margin-top:4px;">{label}</div>
<div style="font-size:0.7rem; color:#94a3b8; margin-top:2px;">Weight: {WEIGHTS[key]}</div>
</div>
""", unsafe_allow_html=True)
st.markdown("")
# ── Progress bars for each signal ──
for label, key, desc in SIGNAL_INFO:
val = scores.get(key, 0.0)
st.caption(f"**{label}** β€” {desc}")
st.progress(min(val, 1.0))
st.markdown("---")
# ── Top Reasons ──
if top_reasons:
st.markdown("#### Key Factors")
for r in top_reasons:
if any(neg in r.lower() for neg in ["fake", "false", "unknown", "not", "manipulation", "adversarial", "sensationalism", "reduces", "could not", "inconsistent", "missing"]):
st.markdown(f"πŸ”΄ {r}")
else:
st.markdown(f"🟒 {r}")
st.markdown("---")
# ── What did each AI model think? ──
st.markdown("#### AI Model Votes")
MODEL_NAMES = [
("Statistical", "logistic", "lr_proba"),
("Language", "lstm", "lstm_proba"),
("Deep A", "distilbert", "distilbert_proba"),
("Deep B", "roberta", "roberta_proba"),
]
mcols = st.columns(len(MODEL_NAMES))
for i, (nice_name, vote_key, pk) in enumerate(MODEL_NAMES):
vote_val = votes.get(vote_key)
prob_val = probas.get(pk)
with mcols[i]:
if vote_val is None or prob_val is None or np.isnan(prob_val):
st.metric(nice_name, "Skipped")
else:
lbl = "Real" if int(vote_val) == 1 else "Fake"
st.metric(nice_name, lbl, f"{prob_val*100:.0f}%")
if res.get("short_text_warning"):
st.warning("⚠️ Short article (under 50 words) β€” confidence is dampened.")
st.caption(f"Article length: {wc} words")
# ── TAB 2: Freshness ─────────────────────────────────────────────────
with tab_fresh:
fresh_val = scores.get("freshness", 0.5)
bar_pct = int(fresh_val * 100)
if fresh_val >= 0.70:
fbg, flbl, fdesc = "#f0fdf4", "🟒 Fresh", "This article appears to be recent."
fbar = "#16a34a"
elif fresh_val >= 0.40:
fbg, flbl, fdesc = "#fefce8", "🟑 Moderate", "Article may not be very recent."
fbar = "#ca8a04"
else:
fbg, flbl, fdesc = "#fef2f2", "πŸ”΄ Outdated", "This article appears to be old."
fbar = "#dc2626"
st.markdown(f"""
<div style="background:{fbg}; border-radius:12px; padding:20px 24px; margin-bottom:20px;">
<div style="font-size:1.2rem; font-weight:600;">{flbl}</div>
<div style="font-size:0.88rem; color:#64748b; margin-top:8px;">{fdesc}</div>
<div class="fresh-track">
<div class="fresh-fill" style="width:{bar_pct}%; background:{fbar};"></div>
</div>
<div style="font-size:0.8rem; color:#6b7280; margin-top:4px;">Freshness: {fresh_val:.0%}</div>
</div>
""", unsafe_allow_html=True)
# Case indicator
case_label = "πŸ“… Date-based scoring" if fresh_case == "A" else "πŸ”Ž Contextual signal scanning (no date found)"
st.markdown(f"""
<div class="info-card">
<b>Method:</b> {case_label}
</div>
""", unsafe_allow_html=True)
# Signals found (Case B)
if fresh_case == "B" and fresh_signals:
st.markdown("**Signals detected:**")
for sig in fresh_signals:
st.markdown(f"βœ… {sig}")
elif fresh_case == "B":
st.caption("No contextual freshness signals were found in the article text.")
# ── TAB 3: Live sources ──────────────────────────────────────────────
with tab_sources:
rag_data = res.get("rag_results")
source_list = []
if isinstance(rag_data, dict):
source_list = rag_data.get("data", [])
elif isinstance(rag_data, list):
source_list = rag_data
if not source_list:
st.markdown("""
<div class="info-card">
<b>Live source check was not triggered</b><br><br>
Live source verification runs when freshness is ambiguous.
This analysis relied on the 5-signal scoring framework instead.
</div>
""", unsafe_allow_html=True)
else:
st.caption(f"Compared against {len(source_list)} live web results.")
for item in source_list:
snippet = item.get("snippet", "")
sim = item.get("similarity", 0.0)
if sim > 0.65:
sc_col, sc_tag = "#16a34a", "Supports"
elif sim < 0.30:
sc_col, sc_tag = "#dc2626", "Conflicts"
else:
sc_col, sc_tag = "#ca8a04", "Neutral"
st.markdown(f"""
<div class="source-card">
<div class="source-text">{snippet}</div>
<div class="source-score">
<div class="source-score-val" style="color:{sc_col};">{sim:.0%}</div>
<div class="source-score-tag" style="color:{sc_col};">{sc_tag}</div>
</div>
</div>
""", unsafe_allow_html=True)
# ── TAB 4: Details ───────────────────────────────────────────────────
with tab_details:
# ── Missing Signals ──
if missing_signals:
st.markdown("#### ⚠️ Missing Signals")
for ms in missing_signals:
st.markdown(f"- {ms}")
st.markdown("")
# ── Adversarial Flags ──
if adv_flags:
st.markdown("#### 🚩 Adversarial Flags Triggered")
for af in adv_flags:
st.error(f"🚩 {af}")
st.caption("Adversarial flags cap the final score at 25% maximum.")
st.markdown("")
# ── Linguistic Deductions ──
if deductions:
st.markdown("#### πŸ“ Linguistic Deductions")
for d in deductions:
st.markdown(f"- {d}")
st.markdown("")
# ── Named Entities Found ──
if entities:
st.markdown("#### 🏷️ Entities Detected")
st.markdown(", ".join([f"`{e}`" for e in entities]))
q_attr = res.get("quotes_attributed", 0)
q_total = res.get("quotes_total", 0)
if q_total > 0:
st.caption(f"Quotes: {q_attr}/{q_total} attributed")
st.markdown("")
# ── Summary Table ──
st.markdown("#### Analysis Summary")
rows = [
("Verdict", vc["label"]),
("Final Score", f"{score_pct:.1f}%"),
("Confidence", confidence),
("Action", action),
("Word Count", str(wc)),
("Freshness", f"{scores.get('freshness', 0):.0%} (Case {fresh_case})"),
]
df_rep = pd.DataFrame(rows, columns=["Field", "Value"])
st.dataframe(df_rep, use_container_width=True, hide_index=True, height=240)
with st.expander("πŸ”§ Raw JSON (for developers)"):
st.code(json.dumps(res, indent=2, default=str), language="json")
# ── Analyze another ──
st.markdown("---")
if st.button("← Analyze another article", use_container_width=True):
st.session_state["analyzed"] = False
st.session_state["last_result"] = None
st.rerun()