"""
FinSight Dashboard — LLM-Powered Earnings Intelligence
Stage 5: Production Streamlit Dashboard
Pages:
1. Overview — project summary, pipeline, key stats
2. Model Results — walk-forward IC/AUC comparison, year-by-year
3. SHAP Analysis — interactive feature importance
4. Backtest — equity curve, drawdown, quarterly P&L
5. Explorer — browse transcripts with live sentiment
Run:
streamlit run src/dashboard/app.py
"""
import sys
from pathlib import Path
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import streamlit as st
sys.path.insert(0, str(Path(__file__).resolve().parent))
from config import PROCESSED_DIR, EXPERIMENTS_DIR
# ── Page config ────────────────────────────────────────────────────────────────
st.set_page_config(
page_title="FinSight | Earnings Intelligence",
page_icon="📈",
layout="wide",
initial_sidebar_state="expanded",
)
# ── Global CSS ─────────────────────────────────────────────────────────────────
st.markdown("""
""", unsafe_allow_html=True)
# ── Layout helper — avoids duplicate xaxis/yaxis conflicts ────────────────────
BASE_LAYOUT = dict(
paper_bgcolor="#0d1117",
plot_bgcolor="#0a0e1a",
font=dict(color="#b0bec5", family="Inter, sans-serif"),
margin=dict(l=50, r=30, t=50, b=50),
colorway=["#64b5f6","#66bb6a","#ffa726","#ef5350","#ab47bc","#26c6da"],
)
BASE_XAXIS = dict(gridcolor="#1a2035", linecolor="#1e2433", zerolinecolor="#1e2433")
BASE_YAXIS = dict(gridcolor="#1a2035", linecolor="#1e2433", zerolinecolor="#1e2433")
def L(**kwargs):
"""
Merge base dark-theme layout with chart-specific overrides.
Merges xaxis/yaxis dicts instead of replacing them, which avoids
the 'multiple values for keyword argument xaxis' TypeError.
"""
out = dict(**BASE_LAYOUT)
if "xaxis" in kwargs:
out["xaxis"] = {**BASE_XAXIS, **kwargs.pop("xaxis")}
else:
out["xaxis"] = BASE_XAXIS
if "yaxis" in kwargs:
out["yaxis"] = {**BASE_YAXIS, **kwargs.pop("yaxis")}
else:
out["yaxis"] = BASE_YAXIS
out.update(kwargs)
return out
# ── Global helpers ─────────────────────────────────────────────────────────────
def metric_card(col, value, label, delta="", delta_type="neu"):
"""Render a dark-theme KPI card inside a Streamlit column."""
col.markdown(f"""
""", unsafe_allow_html=True)
# ── Data loaders ───────────────────────────────────────────────────────────────
@st.cache_data(show_spinner=False)
def load_feature_matrix():
p = PROCESSED_DIR / "feature_matrix.parquet"
return pd.read_parquet(p) if p.exists() else pd.DataFrame()
@st.cache_data(show_spinner=False)
def load_model_results():
p = EXPERIMENTS_DIR / "model_results.csv"
return pd.read_csv(p) if p.exists() else pd.DataFrame()
@st.cache_data(show_spinner=False)
def load_backtest():
p = EXPERIMENTS_DIR / "backtest_results.csv"
return pd.read_csv(p) if p.exists() else pd.DataFrame()
@st.cache_data(show_spinner=False)
def load_shap():
p = EXPERIMENTS_DIR / "shap_values.parquet"
return pd.read_parquet(p) if p.exists() else pd.DataFrame()
# ── Sidebar ────────────────────────────────────────────────────────────────────
with st.sidebar:
st.markdown("""
📈 FinSight
LLM-Powered Earnings Intelligence
""", unsafe_allow_html=True)
page = st.radio(
"Navigation",
["Overview",
"Model Performance",
"Feature Importance",
"Backtest Results",
"Transcript Explorer"],
label_visibility="collapsed",
)
st.markdown("
", unsafe_allow_html=True)
st.markdown("""
Stack
FinBERT · ChromaDB · XGBoost
LightGBM · SHAP · Streamlit
Data
14,584 earnings transcripts
601 S&P 500 companies
2018 – 2024
Author
Rajveer Singh Pall
""", unsafe_allow_html=True)
# ═══════════════════════════════════════════════════════════════════════════════
# PAGE 1 — OVERVIEW
# ═══════════════════════════════════════════════════════════════════════════════
if page == "Overview":
fm = load_feature_matrix()
mr = load_model_results()
st.markdown("""
FinSight
Earnings Intelligence System
An end-to-end machine learning pipeline that extracts alpha signals
from S&P 500 earnings call transcripts using FinBERT sentiment analysis,
RAG-based structured feature extraction, and walk-forward validated
gradient boosting models.
""", unsafe_allow_html=True)
st.markdown("
", unsafe_allow_html=True)
best_ic = float(mr["ic"].max()) if not mr.empty else 0.0198
best_auc = float(mr["auc"].max()) if not mr.empty else 0.5201
best_hr = float(mr["hit_rate"].max()) if not mr.empty else 0.5427
n_rows = len(fm) if not fm.empty else 13442
c1,c2,c3,c4,c5 = st.columns(5)
metric_card(c1, "14,584", "Transcripts", "601 companies", "neu")
metric_card(c2, f"{n_rows:,}", "Training Samples", "2018–2024", "neu")
metric_card(c3, f"{best_ic:.4f}", "Best IC", "LightGBM", "pos")
metric_card(c4, f"{best_auc:.4f}","Best AUC", "XGBoost 2024", "pos")
metric_card(c5, f"{best_hr:.4f}", "Best Hit Rate", "Walk-forward", "pos")
st.markdown("
", unsafe_allow_html=True)
# Pipeline
st.markdown("",
unsafe_allow_html=True)
steps = [
("🗄️","Stage 1","Data Ingestion", "SEC EDGAR · yfinance\n14,584 transcripts"),
("🧠","Stage 2","NLP Pipeline", "FinBERT · ChromaDB RAG\n34 features"),
("🤖","Stage 3","ML Models", "XGBoost · LightGBM\nWalk-forward CV"),
("📉","Stage 4","Backtesting", "Long-short strategy\n10bps TC"),
("🖥️","Stage 5","Dashboard", "Streamlit · Plotly\nHugging Face Spaces"),
]
cols = st.columns(len(steps))
for col, (icon, stage, title, desc) in zip(cols, steps):
col.markdown(f"""
{icon}
{stage}
{title}
{desc}
""", unsafe_allow_html=True)
st.markdown("
", unsafe_allow_html=True)
left, right = st.columns([1.1, 1])
with left:
st.markdown("",
unsafe_allow_html=True)
for f in [
"Analyst negativity > management positivity. "
"qa_neg_ratio (SHAP=0.054) is the single strongest feature. "
"Analyst pushback in Q&A contains more information than prepared remarks.",
"NLP reduces prediction variance by 87%. "
"Baseline IC std=0.114 vs LightGBM std=0.009 — "
"far more consistent across years.",
"Consistent with weak-form EMH. "
"Positive IC (0.0198) exists but cannot overcome 10bps transaction "
"costs at a 5-day holding period.",
"RAG guidance relevance is top-5. "
"Semantic relevance of the guidance section — not just its content — "
"carries significant predictive signal.",
]:
st.markdown(f"{f}
",
unsafe_allow_html=True)
with right:
st.markdown("",
unsafe_allow_html=True)
if not fm.empty:
yr = fm.groupby("year").size().reset_index(name="count")
fig = go.Figure(go.Bar(
x=yr["year"].astype(str),
y=yr["count"],
marker=dict(color=yr["count"],
colorscale=[[0,"#1a237e"],[1,"#64b5f6"]],
showscale=False),
text=yr["count"], textposition="outside",
textfont=dict(size=11),
))
fig.update_layout(**L(title="Transcript Count by Year", height=300,
showlegend=False,
xaxis=dict(title="Year"),
yaxis=dict(title="Transcripts")))
st.plotly_chart(fig, use_container_width=True)
# Sentiment heatmap
if not fm.empty and "mgmt_net_sentiment" in fm.columns:
st.markdown("",
unsafe_allow_html=True)
heat = (fm.groupby(["ticker","year"])["mgmt_net_sentiment"]
.mean().reset_index())
top_t = fm["ticker"].value_counts().head(30).index
heat = heat[heat["ticker"].isin(top_t)]
pivot = heat.pivot(index="ticker", columns="year",
values="mgmt_net_sentiment")
fig2 = go.Figure(go.Heatmap(
z=pivot.values,
x=[str(c) for c in pivot.columns],
y=pivot.index,
colorscale=[[0,"#b71c1c"],[0.35,"#e53935"],
[0.5,"#263238"],[0.65,"#1565c0"],[1,"#64b5f6"]],
zmid=0,
colorbar=dict(title="Net Sentiment", tickfont=dict(size=10)),
hovertemplate="Ticker: %{y}
Year: %{x}
Sentiment: %{z:.3f}",
))
fig2.update_layout(**L(
title="Management Net Sentiment — Top 30 Tickers × Year",
height=500,
xaxis=dict(title="Year"),
yaxis=dict(title=""),
))
st.plotly_chart(fig2, use_container_width=True)
# ═══════════════════════════════════════════════════════════════════════════════
# PAGE 2 — MODEL PERFORMANCE
# ═══════════════════════════════════════════════════════════════════════════════
elif page == "Model Performance":
mr = load_model_results()
st.markdown("Model Performance
",
unsafe_allow_html=True)
st.markdown("Walk-forward validation (2021–2024). "
"Train on 3 prior years, test on held-out year. Zero data leakage.
",
unsafe_allow_html=True)
st.markdown("
", unsafe_allow_html=True)
if mr.empty:
st.error("model_results.csv not found. Run Stage 3 first.")
st.stop()
summary = (
mr.groupby("model")[["ic","hit_rate","auc"]]
.agg({"ic":["mean","std"],"hit_rate":["mean","std"],"auc":["mean","std"]})
.round(4)
)
summary.columns = ["IC Mean","IC Std","Hit Rate Mean","Hit Rate Std",
"AUC Mean","AUC Std"]
summary = summary.sort_values("IC Mean", ascending=False)
st.markdown("",
unsafe_allow_html=True)
def color_ic(val):
if isinstance(val, float):
if val > 0.015: return "color: #66bb6a; font-weight:600"
if val < 0: return "color: #ef5350"
return ""
st.dataframe(
summary.style.applymap(color_ic, subset=["IC Mean"]).format("{:.4f}"),
use_container_width=True, height=220,
)
st.markdown("
", unsafe_allow_html=True)
st.markdown("",
unsafe_allow_html=True)
MODEL_COLORS = {
"Baseline": "#ffa726",
"FinBERT_only": "#26c6da",
"RAG_only": "#ab47bc",
"XGBoost_all": "#ef5350",
"LightGBM_all": "#66bb6a",
}
fig = go.Figure()
for m in mr["model"].unique():
sub = mr[mr["model"]==m].sort_values("test_year")
fig.add_trace(go.Scatter(
x=sub["test_year"].astype(int),
y=sub["ic"],
mode="lines+markers", name=m,
line=dict(color=MODEL_COLORS.get(m,"#64b5f6"), width=2.5),
marker=dict(size=9),
hovertemplate=f"{m}
Year: %{{x}}
IC: %{{y:.4f}}",
))
fig.add_hline(y=0, line_dash="dash", line_color="#546e7a", line_width=1.2)
fig.update_layout(**L(
title="Walk-Forward IC — Positive = Predictive",
height=380,
xaxis=dict(tickvals=[2021,2022,2023,2024], title="Year"),
yaxis=dict(title="Information Coefficient"),
legend=dict(bgcolor="#0d1117", bordercolor="#1e2433", borderwidth=1),
))
st.plotly_chart(fig, use_container_width=True)
col1, col2 = st.columns(2)
with col1:
st.markdown("Hit Rate by Year
",
unsafe_allow_html=True)
fig2 = go.Figure()
for m in mr["model"].unique():
sub = mr[mr["model"]==m].sort_values("test_year")
fig2.add_trace(go.Scatter(
x=sub["test_year"].astype(int), y=sub["hit_rate"],
mode="lines+markers", name=m,
line=dict(color=MODEL_COLORS.get(m,"#64b5f6"), width=2),
marker=dict(size=7), showlegend=False,
))
fig2.add_hline(y=0.5, line_dash="dot", line_color="#546e7a", line_width=1)
fig2.update_layout(**L(
height=300, title="Hit Rate (>0.5 = better than coin flip)",
xaxis=dict(tickvals=[2021,2022,2023,2024]),
yaxis=dict(title="Hit Rate"),
))
st.plotly_chart(fig2, use_container_width=True)
with col2:
st.markdown("AUC by Year
",
unsafe_allow_html=True)
fig3 = go.Figure()
for m in mr["model"].unique():
sub = mr[mr["model"]==m].sort_values("test_year")
fig3.add_trace(go.Scatter(
x=sub["test_year"].astype(int), y=sub["auc"],
mode="lines+markers", name=m,
line=dict(color=MODEL_COLORS.get(m,"#64b5f6"), width=2),
marker=dict(size=7), showlegend=False,
))
fig3.add_hline(y=0.5, line_dash="dot", line_color="#546e7a", line_width=1)
fig3.update_layout(**L(
height=300, title="AUC-ROC (>0.5 = better than random)",
xaxis=dict(tickvals=[2021,2022,2023,2024]),
yaxis=dict(title="AUC"),
))
st.plotly_chart(fig3, use_container_width=True)
st.markdown("",
unsafe_allow_html=True)
ic_std = mr.groupby("model")["ic"].std().sort_values()
ic_mean = mr.groupby("model")["ic"].mean()
bar_colors = ["#66bb6a" if ic_mean[m] > 0 else "#ef5350" for m in ic_std.index]
fig4 = go.Figure(go.Bar(
y=ic_std.index, x=ic_std.values, orientation="h",
marker_color=bar_colors,
text=[f"σ={v:.4f}" for v in ic_std.values],
textposition="outside", textfont=dict(size=11),
))
fig4.update_layout(**L(
title="IC Standard Deviation — Lower = More Consistent",
height=280,
xaxis=dict(title="IC Std Dev"),
yaxis=dict(title=""),
))
st.plotly_chart(fig4, use_container_width=True)
st.markdown("""
Interpretation: The Baseline's high IC mean (0.043) is
misleading — its std of 0.114 shows extreme instability driven by lucky
quarters. LightGBM achieves IC=0.020 with std=0.009, making it
10× more stable. In live trading, consistency matters
far more than occasional lucky peaks.
""", unsafe_allow_html=True)
# ═══════════════════════════════════════════════════════════════════════════════
# PAGE 3 — FEATURE IMPORTANCE
# ═══════════════════════════════════════════════════════════════════════════════
elif page == "Feature Importance":
shap_df = load_shap()
fm = load_feature_matrix()
st.markdown("Feature Importance
",
unsafe_allow_html=True)
st.markdown("SHAP values computed on LightGBM (best model). "
"Shows which features actually drive predictions.
",
unsafe_allow_html=True)
st.markdown("
", unsafe_allow_html=True)
if shap_df.empty:
st.error("shap_values.parquet not found. Run run_shap.py first.")
st.stop()
mean_shap = shap_df.abs().mean().sort_values(ascending=False)
def feat_color(name):
if name.startswith("rag_"): return "#64b5f6"
if name.startswith("mgmt_"): return "#66bb6a"
if name.startswith("qa_"): return "#ffa726"
return "#ab47bc"
def feat_group(name):
if name.startswith("rag_"): return "RAG Features"
if name.startswith("mgmt_"): return "Management FinBERT"
if name.startswith("qa_"): return "QA FinBERT"
return "Other"
st.markdown("",
unsafe_allow_html=True)
top20 = mean_shap.head(20)[::-1]
fig = go.Figure(go.Bar(
y=top20.index, x=top20.values, orientation="h",
marker_color=[feat_color(n) for n in top20.index],
text=[f"{v:.4f}" for v in top20.values],
textposition="outside", textfont=dict(size=10),
hovertemplate="%{y}
Mean |SHAP|: %{x:.4f}",
))
fig.update_layout(**L(
height=520,
title="Feature Importance — 🔵 RAG | 🟢 Mgmt FinBERT | 🟠 QA FinBERT",
xaxis=dict(title="Mean |SHAP Value|"),
yaxis=dict(title=""),
))
st.plotly_chart(fig, use_container_width=True)
col1, col2 = st.columns(2)
with col1:
st.markdown("",
unsafe_allow_html=True)
gs = (mean_shap.reset_index()
.rename(columns={"index":"feature", 0:"shap"}))
gs.columns = ["feature","shap"]
gs["group"] = gs["feature"].apply(feat_group)
gt = gs.groupby("group")["shap"].sum()
fig2 = go.Figure(go.Pie(
labels=gt.index, values=gt.values, hole=0.55,
marker=dict(colors=["#64b5f6","#66bb6a","#ffa726","#ab47bc"]),
textinfo="label+percent", textfont=dict(size=12),
hovertemplate="%{label}
Total SHAP: %{value:.4f}
%{percent}",
))
fig2.update_layout(**L(
height=320, showlegend=False,
annotations=[dict(text="SHAP
Groups", x=0.5, y=0.5,
font_size=13, showarrow=False,
font_color="#b0bec5")],
))
st.plotly_chart(fig2, use_container_width=True)
with col2:
st.markdown("",
unsafe_allow_html=True)
if not fm.empty and "target_5d_up" in fm.columns:
feat_cols = [c for c in shap_df.columns if c in fm.columns]
corrs = fm[feat_cols+["target_5d_up"]].corr()["target_5d_up"].drop("target_5d_up")
cdf = pd.DataFrame({
"feature": corrs.index,
"shap": mean_shap.reindex(corrs.index).fillna(0).values,
"corr": corrs.values,
"group": [feat_group(f) for f in corrs.index],
})
cmap = {
"RAG Features": "#64b5f6",
"Management FinBERT": "#66bb6a",
"QA FinBERT": "#ffa726",
"Other": "#ab47bc",
}
fig3 = px.scatter(
cdf, x="corr", y="shap", color="group",
color_discrete_map=cmap, hover_data=["feature"],
labels={"corr":"Pearson Corr with Target",
"shap":"Mean |SHAP Value|"},
height=320,
)
fig3.add_vline(x=0, line_dash="dash", line_color="#546e7a")
fig3.update_layout(**L(
title="SHAP Importance vs Linear Correlation",
showlegend=False,
))
st.plotly_chart(fig3, use_container_width=True)
st.markdown("",
unsafe_allow_html=True)
insights = [
("🏆 #1 — qa_neg_ratio",
"Proportion of negative sentences in analyst Q&A. When analysts "
"push back hard, it signals market-moving information that management "
"tried to downplay."),
("📊 #2 — mgmt_sent_vol",
"Volatility of management's sentence-level sentiment. Inconsistent "
"messaging — mixing optimism with caution — often precedes larger "
"price moves."),
("📝 #3 — qa_n_sentences",
"Length of the Q&A section. Longer Q&A sessions indicate "
"more analyst scrutiny, which correlates with uncertainty about "
"the quarter's results."),
("😶 #4 — mgmt_mean_neu",
"Neutral sentiment ratio in management remarks. Deliberately neutral "
"language can mask very good or very bad news — a hedging signal."),
("🎯 #5 — rag_guidance_relevance",
"Semantic similarity of the guidance section to specific numerical "
"guidance queries. More relevant guidance sections contain concrete "
"targets that markets react to more strongly."),
]
cols = st.columns(len(insights))
for col, (title, body) in zip(cols, insights):
col.markdown(f"""
""", unsafe_allow_html=True)
# ═══════════════════════════════════════════════════════════════════════════════
# PAGE 4 — BACKTEST
# ═══════════════════════════════════════════════════════════════════════════════
elif page == "Backtest Results":
bt = load_backtest()
st.markdown("Backtest Results
",
unsafe_allow_html=True)
st.markdown("Long-short quartile portfolio. "
"Long top-25% predicted stocks, short bottom-25%. "
"5-day holding period. 10bps round-trip transaction cost.
",
unsafe_allow_html=True)
st.markdown("
", unsafe_allow_html=True)
if bt.empty:
st.error("backtest_results.csv not found. Run Stage 4 first.")
st.stop()
bt = bt.sort_values(["year","quarter"]).reset_index(drop=True)
bt["period"] = bt["year"].astype(str) + "-Q" + bt["quarter"].astype(str)
rets = bt["net_ret"]
cum = (1 + rets).cumprod()
peak = cum.cummax()
dd = (cum - peak) / peak
n_yrs = len(bt) / 4
ann_ret = float((1 + rets).prod() ** (1/n_yrs) - 1)
ann_vol = float(rets.std() * np.sqrt(4))
sharpe = ann_ret / ann_vol if ann_vol != 0 else 0.0
max_dd = float(dd.min())
hit = float((rets > 0).mean())
c1,c2,c3,c4,c5 = st.columns(5)
metric_card(c1, f"{ann_ret*100:.2f}%", "Ann. Return",
"After TC", "pos" if ann_ret > 0 else "neg")
metric_card(c2, f"{sharpe:.3f}", "Sharpe Ratio",
">1.0 = excellent", "pos" if sharpe > 0 else "neg")
metric_card(c3, f"{max_dd*100:.2f}%", "Max Drawdown",
"Peak-to-trough", "neg")
metric_card(c4, f"{hit*100:.0f}%", "Win Rate",
"Profitable quarters", "pos" if hit > 0.5 else "neg")
metric_card(c5, str(len(bt)), "Quarters Tested",
"2021–2024", "neu")
st.markdown("
", unsafe_allow_html=True)
st.markdown("",
unsafe_allow_html=True)
fig = make_subplots(rows=2, cols=1, row_heights=[0.72,0.28],
shared_xaxes=True, vertical_spacing=0.04)
fig.add_trace(go.Scatter(
x=bt["period"], y=cum.values,
mode="lines+markers",
line=dict(color="#64b5f6", width=2.5),
marker=dict(size=7),
fill="tozeroy", fillcolor="rgba(100,181,246,0.06)",
name="Cumulative Return",
hovertemplate="%{x}
Cumulative: %{y:.4f}",
), row=1, col=1)
fig.add_hline(y=1.0, line_dash="dash", line_color="#546e7a",
line_width=1, row=1, col=1)
fig.add_trace(go.Bar(
x=bt["period"], y=dd.values*100,
marker_color="#ef5350", opacity=0.7, name="Drawdown %",
hovertemplate="%{x}
Drawdown: %{y:.2f}%",
), row=2, col=1)
fig.update_layout(
paper_bgcolor="#0d1117", plot_bgcolor="#0a0e1a",
font=dict(color="#b0bec5"),
margin=dict(l=50,r=30,t=50,b=80),
title="FinSight Long-Short Strategy — 2021 to 2024",
height=500, showlegend=False,
xaxis2=dict(tickangle=45, tickfont_size=10,
gridcolor="#1a2035", linecolor="#1e2433"),
yaxis=dict(title="Cumulative Return",
gridcolor="#1a2035", linecolor="#1e2433"),
yaxis2=dict(title="DD %",
gridcolor="#1a2035", linecolor="#1e2433"),
)
fig.update_xaxes(gridcolor="#1a2035", linecolor="#1e2433")
st.plotly_chart(fig, use_container_width=True)
col1, col2 = st.columns(2)
with col1:
st.markdown("Quarterly Net Returns
",
unsafe_allow_html=True)
q_colors = ["#66bb6a" if r > 0 else "#ef5350" for r in rets]
fig2 = go.Figure(go.Bar(
x=bt["period"], y=rets.values*100,
marker_color=q_colors,
text=[f"{v*100:.2f}%" for v in rets.values],
textposition="outside", textfont=dict(size=9),
hovertemplate="%{x}
Net Return: %{y:.2f}%",
))
fig2.add_hline(y=0, line_color="#546e7a", line_width=1)
fig2.update_layout(**L(
height=320,
title="Net Return per Quarter (after 10bps TC)",
xaxis=dict(tickangle=45, tickfont=dict(size=9)),
yaxis=dict(title="Net Return (%)"),
))
st.plotly_chart(fig2, use_container_width=True)
with col2:
st.markdown("Long vs Short Leg Hit Rate
",
unsafe_allow_html=True)
fig3 = go.Figure()
fig3.add_trace(go.Scatter(
x=bt["period"], y=bt["long_hit"],
mode="lines+markers",
line=dict(color="#66bb6a", width=2),
marker=dict(size=7), name="Long Leg",
))
fig3.add_trace(go.Scatter(
x=bt["period"], y=bt["short_hit"],
mode="lines+markers",
line=dict(color="#ef5350", width=2),
marker=dict(size=7), name="Short Leg",
))
fig3.add_hline(y=0.5, line_dash="dot", line_color="#546e7a")
fig3.update_layout(**L(
height=320,
title="Direction Accuracy — Long & Short Legs",
xaxis=dict(tickangle=45, tickfont=dict(size=9)),
yaxis=dict(title="Hit Rate"),
legend=dict(bgcolor="#0d1117", bordercolor="#1e2433"),
))
st.plotly_chart(fig3, use_container_width=True)
st.markdown("",
unsafe_allow_html=True)
disp = bt[["period","net_ret","long_ret","short_ret",
"long_hit","short_hit","n_stocks","q_size"]].copy()
disp.columns = ["Quarter","Net Ret","Long Ret","Short Ret",
"Long Hit","Short Hit","N Stocks","Leg Size"]
def color_ret(val):
if isinstance(val, float):
if val > 0: return "color: #66bb6a"
if val < 0: return "color: #ef5350"
return ""
st.dataframe(
disp.style.applymap(color_ret,
subset=["Net Ret","Long Ret","Short Ret"])
.format({c:"{:.4f}" for c in
["Net Ret","Long Ret","Short Ret",
"Long Hit","Short Hit"]}),
use_container_width=True, hide_index=True,
)
st.markdown("""
Context: A Sharpe of -0.81 with a 5-day holding period
is consistent with academic literature on post-earnings announcement
drift (Chan et al. 1996, Lerman et al. 2008). The signal exists
(IC=0.0198) but is too weak to survive round-trip transaction costs at
this frequency. Extending to 20-day holding periods is the natural
next step.
""", unsafe_allow_html=True)
# ═══════════════════════════════════════════════════════════════════════════════
# PAGE 5 — TRANSCRIPT EXPLORER
# ═══════════════════════════════════════════════════════════════════════════════
elif page == "Transcript Explorer":
fm = load_feature_matrix()
st.markdown("Transcript Explorer
",
unsafe_allow_html=True)
st.markdown("Browse sentiment profiles for any company "
"and quarter in the dataset.
",
unsafe_allow_html=True)
st.markdown("
", unsafe_allow_html=True)
if fm.empty:
st.error("Feature matrix not found.")
st.stop()
col1, col2, col3 = st.columns([2,1,1])
with col1:
all_tickers = sorted(fm["ticker"].dropna().unique())
default_idx = all_tickers.index("AAPL") if "AAPL" in all_tickers else 0
ticker = st.selectbox("Select Ticker", all_tickers, index=default_idx)
with col2:
years = sorted(fm["year"].unique(), reverse=True)
year = st.selectbox("Year", years)
with col3:
quarters = sorted(fm[fm["year"]==year]["quarter"].unique())
quarter = st.selectbox("Quarter", quarters)
row = fm[(fm["ticker"]==ticker) &
(fm["year"]==year) &
(fm["quarter"]==quarter)]
if row.empty:
st.warning("No data for this combination.")
st.stop()
row = row.iloc[0]
ret_5d = row.get("ret_5d", 0)
target = int(row.get("target_5d_up", 0))
st.markdown(f"""
{ticker}
{int(year)} Q{int(quarter)}
{"▲ UP" if target==1 else "▼ DOWN"} 5d
5d Return: {float(ret_5d)*100:.2f}%
""", unsafe_allow_html=True)
left, right = st.columns([1.2, 1])
with left:
st.markdown("Sentiment Breakdown
",
unsafe_allow_html=True)
cats = ["Mgmt Positive","Mgmt Neutral","Mgmt Negative",
"QA Positive","QA Neutral","QA Negative"]
vals = [
float(row.get("mgmt_mean_pos", 0) or 0),
float(row.get("mgmt_mean_neu", 0) or 0),
float(row.get("mgmt_mean_neg", 0) or 0),
float(row.get("qa_mean_pos", 0) or 0),
float(row.get("qa_mean_neu", 0) or 0),
float(row.get("qa_mean_neg", 0) or 0),
]
vals_c = vals + [vals[0]]
cats_c = cats + [cats[0]]
fig = go.Figure(go.Scatterpolar(
r=vals_c, theta=cats_c, fill="toself",
fillcolor="rgba(100,181,246,0.15)",
line=dict(color="#64b5f6", width=2), name=ticker,
))
fig.update_layout(
paper_bgcolor="#0d1117",
font=dict(color="#b0bec5"),
polar=dict(
bgcolor="#0d1117",
radialaxis=dict(visible=True, range=[0,1],
gridcolor="#1a2035", linecolor="#1a2035",
tickfont=dict(size=9, color="#546e7a")),
angularaxis=dict(gridcolor="#1a2035", linecolor="#1a2035",
tickfont=dict(size=10, color="#b0bec5")),
),
height=360, showlegend=False,
title=f"{ticker} — Sentiment Radar",
margin=dict(l=40,r=40,t=50,b=40),
)
st.plotly_chart(fig, use_container_width=True)
with right:
st.markdown("Feature Scores
",
unsafe_allow_html=True)
def score_bar(label, val, invert=False):
if val is None or pd.isna(val):
return
v = float(val)
pct = max(0, min(1, v)) * 100
color = "#ef5350" if invert else "#64b5f6"
st.markdown(f"""
""", unsafe_allow_html=True)
score_bar("Mgmt Net Sentiment", row.get("mgmt_net_sentiment"))
score_bar("QA Net Sentiment", row.get("qa_net_sentiment"))
score_bar("Mgmt Negativity", row.get("mgmt_neg_ratio"), invert=True)
score_bar("QA Negativity", row.get("qa_neg_ratio"), invert=True)
score_bar("Guidance Specificity", row.get("rag_guidance_specificity_score"))
score_bar("Mgmt Confidence", row.get("rag_management_confidence_score"))
score_bar("Forward Looking", row.get("rag_forward_looking_score"))
score_bar("New Risks", row.get("rag_new_risks_score"), invert=True)
score_bar("Cost Pressure", row.get("rag_cost_pressure_score"), invert=True)
# Historical trend
st.markdown(f"",
unsafe_allow_html=True)
td = fm[fm["ticker"]==ticker].copy().sort_values(["year","quarter"])
td["period"] = td["year"].astype(str) + "-Q" + td["quarter"].astype(str)
if len(td) > 1:
fig2 = go.Figure()
for col_name, label, color in [
("mgmt_net_sentiment", "Mgmt Sentiment", "#66bb6a"),
("qa_net_sentiment", "QA Sentiment", "#64b5f6"),
("mgmt_neg_ratio", "Mgmt Negativity","#ef5350"),
]:
if col_name in td.columns:
fig2.add_trace(go.Scatter(
x=td["period"], y=td[col_name],
mode="lines+markers", name=label,
line=dict(color=color, width=2),
marker=dict(size=6),
hovertemplate=f"{label}
%{{x}}
%{{y:.3f}}",
))
# Mark selected quarter — use index position to avoid type issues
cur_period = f"{int(year)}-Q{int(quarter)}"
if cur_period in td["period"].values:
cur_idx = td[td["period"]==cur_period].index[0]
cur_pos = td["period"].tolist().index(cur_period)
fig2.add_vrect(
x0=cur_period, x1=cur_period,
line_dash="dot", line_color="#ffa726", line_width=2,
)
fig2.add_hline(y=0, line_dash="dash", line_color="#546e7a", line_width=0.8)
fig2.update_layout(**L(
height=320,
title=f"{ticker} — Sentiment Over Time",
xaxis=dict(tickangle=45, tickfont=dict(size=9)),
yaxis=dict(title="Score"),
legend=dict(bgcolor="#0d1117", bordercolor="#1e2433"),
))
st.plotly_chart(fig2, use_container_width=True)
# Scatter: sentiment vs return
if "ret_5d" in td.columns and "mgmt_net_sentiment" in td.columns:
st.markdown("Sentiment vs 5-Day Return
",
unsafe_allow_html=True)
tc = td.dropna(subset=["ret_5d","mgmt_net_sentiment"]).copy()
tc["ret_pct"] = tc["ret_5d"].astype(float) * 100
sc_colors = ["#66bb6a" if r > 0 else "#ef5350"
for r in tc["ret_pct"]]
fig3 = go.Figure(go.Scatter(
x=tc["mgmt_net_sentiment"].astype(float),
y=tc["ret_pct"],
mode="markers+text",
text=tc["period"],
textposition="top center",
textfont=dict(size=8, color="#546e7a"),
marker=dict(color=sc_colors, size=9, opacity=0.85),
hovertemplate=(
"%{text}
"
"Mgmt Sentiment: %{x:.3f}
"
"5d Return: %{y:.2f}%"
),
))
fig3.add_vline(x=0, line_dash="dash", line_color="#546e7a")
fig3.add_hline(y=0, line_dash="dash", line_color="#546e7a")
fig3.update_layout(**L(
height=340,
title=f"{ticker} — Mgmt Sentiment vs 5-Day Return",
xaxis=dict(title="Management Net Sentiment"),
yaxis=dict(title="5-Day Return (%)"),
))
st.plotly_chart(fig3, use_container_width=True)
else:
st.info("Not enough historical data for this ticker.")