Spaces:
Sleeping
Sleeping
Rajveer Pall commited on
Upload app.py with huggingface_hub
Browse files
app.py
CHANGED
|
@@ -1,1012 +1,1012 @@
|
|
| 1 |
-
|
| 2 |
-
FinSight Dashboard
|
| 3 |
-
Stage 5: Production Streamlit Dashboard
|
| 4 |
-
|
| 5 |
-
Pages:
|
| 6 |
-
1. Overview
|
| 7 |
-
2. Model Results
|
| 8 |
-
3. SHAP Analysis
|
| 9 |
-
4. Backtest
|
| 10 |
-
5. Explorer
|
| 11 |
-
|
| 12 |
-
Run:
|
| 13 |
-
streamlit run src/dashboard/app.py
|
| 14 |
-
"""
|
| 15 |
-
|
| 16 |
-
import sys
|
| 17 |
-
from pathlib import Path
|
| 18 |
-
import warnings
|
| 19 |
-
warnings.filterwarnings("ignore")
|
| 20 |
-
|
| 21 |
-
import numpy as np
|
| 22 |
-
import pandas as pd
|
| 23 |
-
import plotly.express as px
|
| 24 |
-
import plotly.graph_objects as go
|
| 25 |
-
from plotly.subplots import make_subplots
|
| 26 |
-
import streamlit as st
|
| 27 |
-
|
| 28 |
-
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
| 29 |
-
from config import PROCESSED_DIR, EXPERIMENTS_DIR
|
| 30 |
-
|
| 31 |
-
#
|
| 32 |
-
|
| 33 |
-
st.set_page_config(
|
| 34 |
-
page_title="FinSight | Earnings Intelligence",
|
| 35 |
-
page_icon="
|
| 36 |
-
layout="wide",
|
| 37 |
-
initial_sidebar_state="expanded",
|
| 38 |
-
)
|
| 39 |
-
|
| 40 |
-
#
|
| 41 |
-
|
| 42 |
-
st.markdown("""
|
| 43 |
-
<style>
|
| 44 |
-
[data-testid="stAppViewContainer"] { background: #0a0e1a; color: #e8eaf6; }
|
| 45 |
-
[data-testid="stSidebar"] {
|
| 46 |
-
background: #0d1117;
|
| 47 |
-
border-right: 1px solid #1e2433;
|
| 48 |
-
}
|
| 49 |
-
[data-testid="stSidebar"] .stRadio label {
|
| 50 |
-
color: #8892b0 !important;
|
| 51 |
-
font-size: 0.9rem;
|
| 52 |
-
}
|
| 53 |
-
.metric-card {
|
| 54 |
-
background: linear-gradient(135deg, #0d1117 0%, #161b27 100%);
|
| 55 |
-
border: 1px solid #1e2d4a;
|
| 56 |
-
border-radius: 12px;
|
| 57 |
-
padding: 20px 24px;
|
| 58 |
-
text-align: center;
|
| 59 |
-
transition: border-color 0.2s;
|
| 60 |
-
}
|
| 61 |
-
.metric-card:hover { border-color: #3d5a99; }
|
| 62 |
-
.metric-value { font-size: 2rem; font-weight: 700; color: #64b5f6; line-height: 1.1; }
|
| 63 |
-
.metric-label {
|
| 64 |
-
font-size: 0.78rem; color: #8892b0;
|
| 65 |
-
text-transform: uppercase; letter-spacing: 1px; margin-top: 6px;
|
| 66 |
-
}
|
| 67 |
-
.metric-delta { font-size: 0.82rem; margin-top: 4px; }
|
| 68 |
-
.delta-pos { color: #66bb6a; }
|
| 69 |
-
.delta-neg { color: #ef5350; }
|
| 70 |
-
.delta-neu { color: #8892b0; }
|
| 71 |
-
.section-header {
|
| 72 |
-
font-size: 1.4rem; font-weight: 700; color: #e8eaf6;
|
| 73 |
-
border-left: 4px solid #3d5a99; padding-left: 12px;
|
| 74 |
-
margin: 28px 0 16px 0;
|
| 75 |
-
}
|
| 76 |
-
.subsection { font-size: 1rem; font-weight: 600; color: #8892b0; margin: 16px 0 8px 0; }
|
| 77 |
-
.hero-title {
|
| 78 |
-
font-size: 2.8rem; font-weight: 800;
|
| 79 |
-
background: linear-gradient(90deg, #64b5f6, #7c4dff, #64b5f6);
|
| 80 |
-
background-size: 200%;
|
| 81 |
-
-webkit-background-clip: text; -webkit-text-fill-color: transparent;
|
| 82 |
-
line-height: 1.2;
|
| 83 |
-
}
|
| 84 |
-
.hero-sub {
|
| 85 |
-
font-size: 1.1rem; color: #8892b0; margin-top: 8px;
|
| 86 |
-
max-width: 680px; line-height: 1.6;
|
| 87 |
-
}
|
| 88 |
-
.pipeline-step {
|
| 89 |
-
background: #0d1117; border: 1px solid #1e2433;
|
| 90 |
-
border-radius: 10px; padding: 14px 16px; text-align: center;
|
| 91 |
-
}
|
| 92 |
-
.pipeline-icon { font-size: 1.6rem; }
|
| 93 |
-
.pipeline-label { font-size: 0.78rem; color: #8892b0; margin-top: 4px; }
|
| 94 |
-
.pipeline-title { font-size: 0.9rem; font-weight: 600; color: #cfd8dc; }
|
| 95 |
-
.insight-box {
|
| 96 |
-
background: #0d1117; border-left: 3px solid #3d5a99;
|
| 97 |
-
border-radius: 0 8px 8px 0; padding: 12px 16px; margin: 8px 0;
|
| 98 |
-
font-size: 0.88rem; color: #b0bec5; line-height: 1.6;
|
| 99 |
-
}
|
| 100 |
-
.insight-box strong { color: #64b5f6; }
|
| 101 |
-
.badge {
|
| 102 |
-
display: inline-block; padding: 2px 10px; border-radius: 20px;
|
| 103 |
-
font-size: 0.72rem; font-weight: 600; margin: 2px;
|
| 104 |
-
}
|
| 105 |
-
.badge-blue { background: #1a237e22; color: #64b5f6; border: 1px solid #1a237e; }
|
| 106 |
-
.badge-green { background: #1b5e2022; color: #66bb6a; border: 1px solid #1b5e20; }
|
| 107 |
-
.badge-red { background: #b71c1c22; color: #ef9a9a; border: 1px solid #b71c1c; }
|
| 108 |
-
hr { border-color: #1e2433 !important; }
|
| 109 |
-
::-webkit-scrollbar { width: 6px; }
|
| 110 |
-
::-webkit-scrollbar-track { background: #0a0e1a; }
|
| 111 |
-
::-webkit-scrollbar-thumb { background: #1e2d4a; border-radius: 3px; }
|
| 112 |
-
</style>
|
| 113 |
-
""", unsafe_allow_html=True)
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
#
|
| 117 |
-
|
| 118 |
-
BASE_LAYOUT = dict(
|
| 119 |
-
paper_bgcolor="#0d1117",
|
| 120 |
-
plot_bgcolor="#0a0e1a",
|
| 121 |
-
font=dict(color="#b0bec5", family="Inter, sans-serif"),
|
| 122 |
-
margin=dict(l=50, r=30, t=50, b=50),
|
| 123 |
-
colorway=["#64b5f6","#66bb6a","#ffa726","#ef5350","#ab47bc","#26c6da"],
|
| 124 |
-
)
|
| 125 |
-
BASE_XAXIS = dict(gridcolor="#1a2035", linecolor="#1e2433", zerolinecolor="#1e2433")
|
| 126 |
-
BASE_YAXIS = dict(gridcolor="#1a2035", linecolor="#1e2433", zerolinecolor="#1e2433")
|
| 127 |
-
|
| 128 |
-
def L(**kwargs):
|
| 129 |
-
"""
|
| 130 |
-
Merge base dark-theme layout with chart-specific overrides.
|
| 131 |
-
Merges xaxis/yaxis dicts instead of replacing them, which avoids
|
| 132 |
-
the 'multiple values for keyword argument xaxis' TypeError.
|
| 133 |
-
"""
|
| 134 |
-
out = dict(**BASE_LAYOUT)
|
| 135 |
-
if "xaxis" in kwargs:
|
| 136 |
-
out["xaxis"] = {**BASE_XAXIS, **kwargs.pop("xaxis")}
|
| 137 |
-
else:
|
| 138 |
-
out["xaxis"] = BASE_XAXIS
|
| 139 |
-
if "yaxis" in kwargs:
|
| 140 |
-
out["yaxis"] = {**BASE_YAXIS, **kwargs.pop("yaxis")}
|
| 141 |
-
else:
|
| 142 |
-
out["yaxis"] = BASE_YAXIS
|
| 143 |
-
out.update(kwargs)
|
| 144 |
-
return out
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
#
|
| 148 |
-
|
| 149 |
-
def metric_card(col, value, label, delta="", delta_type="neu"):
|
| 150 |
-
"""Render a dark-theme KPI card inside a Streamlit column."""
|
| 151 |
-
col.markdown(f"""
|
| 152 |
-
<div class='metric-card'>
|
| 153 |
-
<div class='metric-value'>{value}</div>
|
| 154 |
-
<div class='metric-label'>{label}</div>
|
| 155 |
-
<div class='metric-delta delta-{delta_type}'>{delta}</div>
|
| 156 |
-
</div>""", unsafe_allow_html=True)
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
#
|
| 160 |
-
|
| 161 |
-
@st.cache_data(show_spinner=False)
|
| 162 |
-
def load_feature_matrix():
|
| 163 |
-
p = PROCESSED_DIR / "feature_matrix.parquet"
|
| 164 |
-
return pd.read_parquet(p) if p.exists() else pd.DataFrame()
|
| 165 |
-
|
| 166 |
-
@st.cache_data(show_spinner=False)
|
| 167 |
-
def load_model_results():
|
| 168 |
-
p = EXPERIMENTS_DIR / "model_results.csv"
|
| 169 |
-
return pd.read_csv(p) if p.exists() else pd.DataFrame()
|
| 170 |
-
|
| 171 |
-
@st.cache_data(show_spinner=False)
|
| 172 |
-
def load_backtest():
|
| 173 |
-
p = EXPERIMENTS_DIR / "backtest_results.csv"
|
| 174 |
-
return pd.read_csv(p) if p.exists() else pd.DataFrame()
|
| 175 |
-
|
| 176 |
-
@st.cache_data(show_spinner=False)
|
| 177 |
-
def load_shap():
|
| 178 |
-
p = EXPERIMENTS_DIR / "shap_values.parquet"
|
| 179 |
-
return pd.read_parquet(p) if p.exists() else pd.DataFrame()
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
#
|
| 183 |
-
|
| 184 |
-
with st.sidebar:
|
| 185 |
-
st.markdown("""
|
| 186 |
-
<div style='padding:12px 0 20px 0;'>
|
| 187 |
-
<div style='font-size:1.5rem;font-weight:800;color:#64b5f6;'>
|
| 188 |
-
<div style='font-size:0.75rem;color:#8892b0;margin-top:4px;'>
|
| 189 |
-
LLM-Powered Earnings Intelligence
|
| 190 |
-
</div>
|
| 191 |
-
</div>
|
| 192 |
-
""", unsafe_allow_html=True)
|
| 193 |
-
|
| 194 |
-
page = st.radio(
|
| 195 |
-
"Navigation",
|
| 196 |
-
["
|
| 197 |
-
"
|
| 198 |
-
"
|
| 199 |
-
"
|
| 200 |
-
"
|
| 201 |
-
label_visibility="collapsed",
|
| 202 |
-
)
|
| 203 |
-
|
| 204 |
-
st.markdown("<hr>", unsafe_allow_html=True)
|
| 205 |
-
st.markdown("""
|
| 206 |
-
<div style='font-size:0.72rem;color:#8892b0;line-height:1.8;'>
|
| 207 |
-
<b style='color:#cfd8dc;'>Stack</b><br>
|
| 208 |
-
FinBERT
|
| 209 |
-
LightGBM
|
| 210 |
-
<b style='color:#cfd8dc;'>Data</b><br>
|
| 211 |
-
14,584 earnings transcripts<br>
|
| 212 |
-
601 S&P 500 companies<br>
|
| 213 |
-
2018
|
| 214 |
-
<b style='color:#cfd8dc;'>Author</b><br>
|
| 215 |
-
Rajveer Singh Pall
|
| 216 |
-
</div>
|
| 217 |
-
""", unsafe_allow_html=True)
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
#
|
| 221 |
-
# PAGE 1
|
| 222 |
-
#
|
| 223 |
-
|
| 224 |
-
if page == "
|
| 225 |
-
fm = load_feature_matrix()
|
| 226 |
-
mr = load_model_results()
|
| 227 |
-
|
| 228 |
-
st.markdown("""
|
| 229 |
-
<div style='padding:24px 0 8px 0;'>
|
| 230 |
-
<div class='hero-title'>FinSight</div>
|
| 231 |
-
<div class='hero-title' style='font-size:1.8rem;color:#7c4dff;'>
|
| 232 |
-
Earnings Intelligence System
|
| 233 |
-
</div>
|
| 234 |
-
<div class='hero-sub'>
|
| 235 |
-
An end-to-end machine learning pipeline that extracts alpha signals
|
| 236 |
-
from S&P 500 earnings call transcripts using FinBERT sentiment analysis,
|
| 237 |
-
RAG-based structured feature extraction, and walk-forward validated
|
| 238 |
-
gradient boosting models.
|
| 239 |
-
</div>
|
| 240 |
-
</div>
|
| 241 |
-
""", unsafe_allow_html=True)
|
| 242 |
-
|
| 243 |
-
st.markdown("<hr>", unsafe_allow_html=True)
|
| 244 |
-
|
| 245 |
-
best_ic = float(mr["ic"].max()) if not mr.empty else 0.0198
|
| 246 |
-
best_auc = float(mr["auc"].max()) if not mr.empty else 0.5201
|
| 247 |
-
best_hr = float(mr["hit_rate"].max()) if not mr.empty else 0.5427
|
| 248 |
-
n_rows = len(fm) if not fm.empty else 13442
|
| 249 |
-
|
| 250 |
-
c1,c2,c3,c4,c5 = st.columns(5)
|
| 251 |
-
metric_card(c1, "14,584", "Transcripts", "601 companies", "neu")
|
| 252 |
-
metric_card(c2, f"{n_rows:,}", "Training Samples", "
|
| 253 |
-
metric_card(c3, f"{best_ic:.4f}", "Best IC", "LightGBM", "pos")
|
| 254 |
-
metric_card(c4, f"{best_auc:.4f}","Best AUC", "XGBoost 2024", "pos")
|
| 255 |
-
metric_card(c5, f"{best_hr:.4f}", "Best Hit Rate", "Walk-forward", "pos")
|
| 256 |
-
|
| 257 |
-
st.markdown("<br>", unsafe_allow_html=True)
|
| 258 |
-
|
| 259 |
-
# Pipeline
|
| 260 |
-
st.markdown("<div class='section-header'>System Architecture</div>",
|
| 261 |
-
unsafe_allow_html=True)
|
| 262 |
-
steps = [
|
| 263 |
-
("
|
| 264 |
-
("
|
| 265 |
-
("
|
| 266 |
-
("
|
| 267 |
-
("
|
| 268 |
-
]
|
| 269 |
-
cols = st.columns(len(steps))
|
| 270 |
-
for col, (icon, stage, title, desc) in zip(cols, steps):
|
| 271 |
-
col.markdown(f"""
|
| 272 |
-
<div class='pipeline-step'>
|
| 273 |
-
<div class='pipeline-icon'>{icon}</div>
|
| 274 |
-
<div class='pipeline-label'>{stage}</div>
|
| 275 |
-
<div class='pipeline-title'>{title}</div>
|
| 276 |
-
<div style='font-size:0.72rem;color:#546e7a;margin-top:4px;line-height:1.5;'>
|
| 277 |
-
{desc}</div>
|
| 278 |
-
</div>""", unsafe_allow_html=True)
|
| 279 |
-
|
| 280 |
-
st.markdown("<br>", unsafe_allow_html=True)
|
| 281 |
-
|
| 282 |
-
left, right = st.columns([1.1, 1])
|
| 283 |
-
|
| 284 |
-
with left:
|
| 285 |
-
st.markdown("<div class='section-header'>Key Findings</div>",
|
| 286 |
-
unsafe_allow_html=True)
|
| 287 |
-
for f in [
|
| 288 |
-
"<strong>Analyst negativity > management positivity.</strong> "
|
| 289 |
-
"qa_neg_ratio (SHAP=0.054) is the single strongest feature. "
|
| 290 |
-
"Analyst pushback in Q&A contains more information than prepared remarks.",
|
| 291 |
-
|
| 292 |
-
"<strong>NLP reduces prediction variance by 87%.</strong> "
|
| 293 |
-
"Baseline IC std=0.114 vs LightGBM std=0.009
|
| 294 |
-
"far more consistent across years.",
|
| 295 |
-
|
| 296 |
-
"<strong>Consistent with weak-form EMH.</strong> "
|
| 297 |
-
"Positive IC (0.0198) exists but cannot overcome 10bps transaction "
|
| 298 |
-
"costs at a 5-day holding period.",
|
| 299 |
-
|
| 300 |
-
"<strong>RAG guidance relevance is top-5.</strong> "
|
| 301 |
-
"Semantic relevance of the guidance section
|
| 302 |
-
"carries significant predictive signal.",
|
| 303 |
-
]:
|
| 304 |
-
st.markdown(f"<div class='insight-box'>{f}</div>",
|
| 305 |
-
unsafe_allow_html=True)
|
| 306 |
-
|
| 307 |
-
with right:
|
| 308 |
-
st.markdown("<div class='section-header'>Dataset Coverage</div>",
|
| 309 |
-
unsafe_allow_html=True)
|
| 310 |
-
if not fm.empty:
|
| 311 |
-
yr = fm.groupby("year").size().reset_index(name="count")
|
| 312 |
-
fig = go.Figure(go.Bar(
|
| 313 |
-
x=yr["year"].astype(str),
|
| 314 |
-
y=yr["count"],
|
| 315 |
-
marker=dict(color=yr["count"],
|
| 316 |
-
colorscale=[[0,"#1a237e"],[1,"#64b5f6"]],
|
| 317 |
-
showscale=False),
|
| 318 |
-
text=yr["count"], textposition="outside",
|
| 319 |
-
textfont=dict(size=11),
|
| 320 |
-
))
|
| 321 |
-
fig.update_layout(**L(title="Transcript Count by Year", height=300,
|
| 322 |
-
showlegend=False,
|
| 323 |
-
xaxis=dict(title="Year"),
|
| 324 |
-
yaxis=dict(title="Transcripts")))
|
| 325 |
-
st.plotly_chart(fig, use_container_width=True)
|
| 326 |
-
|
| 327 |
-
# Sentiment heatmap
|
| 328 |
-
if not fm.empty and "mgmt_net_sentiment" in fm.columns:
|
| 329 |
-
st.markdown("<div class='section-header'>Sentiment Landscape</div>",
|
| 330 |
-
unsafe_allow_html=True)
|
| 331 |
-
heat = (fm.groupby(["ticker","year"])["mgmt_net_sentiment"]
|
| 332 |
-
.mean().reset_index())
|
| 333 |
-
top_t = fm["ticker"].value_counts().head(30).index
|
| 334 |
-
heat = heat[heat["ticker"].isin(top_t)]
|
| 335 |
-
pivot = heat.pivot(index="ticker", columns="year",
|
| 336 |
-
values="mgmt_net_sentiment")
|
| 337 |
-
fig2 = go.Figure(go.Heatmap(
|
| 338 |
-
z=pivot.values,
|
| 339 |
-
x=[str(c) for c in pivot.columns],
|
| 340 |
-
y=pivot.index,
|
| 341 |
-
colorscale=[[0,"#b71c1c"],[0.35,"#e53935"],
|
| 342 |
-
[0.5,"#263238"],[0.65,"#1565c0"],[1,"#64b5f6"]],
|
| 343 |
-
zmid=0,
|
| 344 |
-
colorbar=dict(title="Net Sentiment", tickfont=dict(size=10)),
|
| 345 |
-
hovertemplate="Ticker: %{y}<br>Year: %{x}<br>Sentiment: %{z:.3f}<extra></extra>",
|
| 346 |
-
))
|
| 347 |
-
fig2.update_layout(**L(
|
| 348 |
-
title="Management Net Sentiment
|
| 349 |
-
height=500,
|
| 350 |
-
xaxis=dict(title="Year"),
|
| 351 |
-
yaxis=dict(title=""),
|
| 352 |
-
))
|
| 353 |
-
st.plotly_chart(fig2, use_container_width=True)
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
#
|
| 357 |
-
# PAGE 2
|
| 358 |
-
#
|
| 359 |
-
|
| 360 |
-
elif page == "
|
| 361 |
-
mr = load_model_results()
|
| 362 |
-
|
| 363 |
-
st.markdown("<div class='hero-title' style='font-size:2rem;'>Model Performance</div>",
|
| 364 |
-
unsafe_allow_html=True)
|
| 365 |
-
st.markdown("<div class='hero-sub'>Walk-forward validation (
|
| 366 |
-
"Train on 3 prior years, test on held-out year. Zero data leakage.</div>",
|
| 367 |
-
unsafe_allow_html=True)
|
| 368 |
-
st.markdown("<hr>", unsafe_allow_html=True)
|
| 369 |
-
|
| 370 |
-
if mr.empty:
|
| 371 |
-
st.error("model_results.csv not found. Run Stage 3 first.")
|
| 372 |
-
st.stop()
|
| 373 |
-
|
| 374 |
-
summary = (
|
| 375 |
-
mr.groupby("model")[["ic","hit_rate","auc"]]
|
| 376 |
-
.agg({"ic":["mean","std"],"hit_rate":["mean","std"],"auc":["mean","std"]})
|
| 377 |
-
.round(4)
|
| 378 |
-
)
|
| 379 |
-
summary.columns = ["IC Mean","IC Std","Hit Rate Mean","Hit Rate Std",
|
| 380 |
-
"AUC Mean","AUC Std"]
|
| 381 |
-
summary = summary.sort_values("IC Mean", ascending=False)
|
| 382 |
-
|
| 383 |
-
st.markdown("<div class='section-header'>Model Comparison</div>",
|
| 384 |
-
unsafe_allow_html=True)
|
| 385 |
-
|
| 386 |
-
def color_ic(val):
|
| 387 |
-
if isinstance(val, float):
|
| 388 |
-
if val > 0.015: return "color: #66bb6a; font-weight:600"
|
| 389 |
-
if val < 0: return "color: #ef5350"
|
| 390 |
-
return ""
|
| 391 |
-
|
| 392 |
-
st.dataframe(
|
| 393 |
-
summary.style.applymap(color_ic, subset=["IC Mean"]).format("{:.4f}"),
|
| 394 |
-
use_container_width=True, height=220,
|
| 395 |
-
)
|
| 396 |
-
|
| 397 |
-
st.markdown("<br>", unsafe_allow_html=True)
|
| 398 |
-
st.markdown("<div class='section-header'>Information Coefficient by Year</div>",
|
| 399 |
-
unsafe_allow_html=True)
|
| 400 |
-
|
| 401 |
-
MODEL_COLORS = {
|
| 402 |
-
"Baseline": "#ffa726",
|
| 403 |
-
"FinBERT_only": "#26c6da",
|
| 404 |
-
"RAG_only": "#ab47bc",
|
| 405 |
-
"XGBoost_all": "#ef5350",
|
| 406 |
-
"LightGBM_all": "#66bb6a",
|
| 407 |
-
}
|
| 408 |
-
|
| 409 |
-
fig = go.Figure()
|
| 410 |
-
for m in mr["model"].unique():
|
| 411 |
-
sub = mr[mr["model"]==m].sort_values("test_year")
|
| 412 |
-
fig.add_trace(go.Scatter(
|
| 413 |
-
x=sub["test_year"].astype(int),
|
| 414 |
-
y=sub["ic"],
|
| 415 |
-
mode="lines+markers", name=m,
|
| 416 |
-
line=dict(color=MODEL_COLORS.get(m,"#64b5f6"), width=2.5),
|
| 417 |
-
marker=dict(size=9),
|
| 418 |
-
hovertemplate=f"<b>{m}</b><br>Year: %{{x}}<br>IC: %{{y:.4f}}<extra></extra>",
|
| 419 |
-
))
|
| 420 |
-
fig.add_hline(y=0, line_dash="dash", line_color="#546e7a", line_width=1.2)
|
| 421 |
-
fig.update_layout(**L(
|
| 422 |
-
title="Walk-Forward IC
|
| 423 |
-
height=380,
|
| 424 |
-
xaxis=dict(tickvals=[2021,2022,2023,2024], title="Year"),
|
| 425 |
-
yaxis=dict(title="Information Coefficient"),
|
| 426 |
-
legend=dict(bgcolor="#0d1117", bordercolor="#1e2433", borderwidth=1),
|
| 427 |
-
))
|
| 428 |
-
st.plotly_chart(fig, use_container_width=True)
|
| 429 |
-
|
| 430 |
-
col1, col2 = st.columns(2)
|
| 431 |
-
|
| 432 |
-
with col1:
|
| 433 |
-
st.markdown("<div class='subsection'>Hit Rate by Year</div>",
|
| 434 |
-
unsafe_allow_html=True)
|
| 435 |
-
fig2 = go.Figure()
|
| 436 |
-
for m in mr["model"].unique():
|
| 437 |
-
sub = mr[mr["model"]==m].sort_values("test_year")
|
| 438 |
-
fig2.add_trace(go.Scatter(
|
| 439 |
-
x=sub["test_year"].astype(int), y=sub["hit_rate"],
|
| 440 |
-
mode="lines+markers", name=m,
|
| 441 |
-
line=dict(color=MODEL_COLORS.get(m,"#64b5f6"), width=2),
|
| 442 |
-
marker=dict(size=7), showlegend=False,
|
| 443 |
-
))
|
| 444 |
-
fig2.add_hline(y=0.5, line_dash="dot", line_color="#546e7a", line_width=1)
|
| 445 |
-
fig2.update_layout(**L(
|
| 446 |
-
height=300, title="Hit Rate (>0.5 = better than coin flip)",
|
| 447 |
-
xaxis=dict(tickvals=[2021,2022,2023,2024]),
|
| 448 |
-
yaxis=dict(title="Hit Rate"),
|
| 449 |
-
))
|
| 450 |
-
st.plotly_chart(fig2, use_container_width=True)
|
| 451 |
-
|
| 452 |
-
with col2:
|
| 453 |
-
st.markdown("<div class='subsection'>AUC by Year</div>",
|
| 454 |
-
unsafe_allow_html=True)
|
| 455 |
-
fig3 = go.Figure()
|
| 456 |
-
for m in mr["model"].unique():
|
| 457 |
-
sub = mr[mr["model"]==m].sort_values("test_year")
|
| 458 |
-
fig3.add_trace(go.Scatter(
|
| 459 |
-
x=sub["test_year"].astype(int), y=sub["auc"],
|
| 460 |
-
mode="lines+markers", name=m,
|
| 461 |
-
line=dict(color=MODEL_COLORS.get(m,"#64b5f6"), width=2),
|
| 462 |
-
marker=dict(size=7), showlegend=False,
|
| 463 |
-
))
|
| 464 |
-
fig3.add_hline(y=0.5, line_dash="dot", line_color="#546e7a", line_width=1)
|
| 465 |
-
fig3.update_layout(**L(
|
| 466 |
-
height=300, title="AUC-ROC (>0.5 = better than random)",
|
| 467 |
-
xaxis=dict(tickvals=[2021,2022,2023,2024]),
|
| 468 |
-
yaxis=dict(title="AUC"),
|
| 469 |
-
))
|
| 470 |
-
st.plotly_chart(fig3, use_container_width=True)
|
| 471 |
-
|
| 472 |
-
st.markdown("<div class='section-header'>Stability Analysis
|
| 473 |
-
unsafe_allow_html=True)
|
| 474 |
-
ic_std = mr.groupby("model")["ic"].std().sort_values()
|
| 475 |
-
ic_mean = mr.groupby("model")["ic"].mean()
|
| 476 |
-
bar_colors = ["#66bb6a" if ic_mean[m] > 0 else "#ef5350" for m in ic_std.index]
|
| 477 |
-
|
| 478 |
-
fig4 = go.Figure(go.Bar(
|
| 479 |
-
y=ic_std.index, x=ic_std.values, orientation="h",
|
| 480 |
-
marker_color=bar_colors,
|
| 481 |
-
text=[f"
|
| 482 |
-
textposition="outside", textfont=dict(size=11),
|
| 483 |
-
))
|
| 484 |
-
fig4.update_layout(**L(
|
| 485 |
-
title="IC Standard Deviation
|
| 486 |
-
height=280,
|
| 487 |
-
xaxis=dict(title="IC Std Dev"),
|
| 488 |
-
yaxis=dict(title=""),
|
| 489 |
-
))
|
| 490 |
-
st.plotly_chart(fig4, use_container_width=True)
|
| 491 |
-
|
| 492 |
-
st.markdown("""
|
| 493 |
-
<div class='insight-box'>
|
| 494 |
-
<strong>Interpretation:</strong> The Baseline's high IC mean (0.043) is
|
| 495 |
-
misleading
|
| 496 |
-
quarters. LightGBM achieves IC=0.020 with std=0.009, making it
|
| 497 |
-
<strong>
|
| 498 |
-
far more than occasional lucky peaks.
|
| 499 |
-
</div>
|
| 500 |
-
""", unsafe_allow_html=True)
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
#
|
| 504 |
-
# PAGE 3
|
| 505 |
-
#
|
| 506 |
-
|
| 507 |
-
elif page == "
|
| 508 |
-
shap_df = load_shap()
|
| 509 |
-
fm = load_feature_matrix()
|
| 510 |
-
|
| 511 |
-
st.markdown("<div class='hero-title' style='font-size:2rem;'>Feature Importance</div>",
|
| 512 |
-
unsafe_allow_html=True)
|
| 513 |
-
st.markdown("<div class='hero-sub'>SHAP values computed on LightGBM (best model). "
|
| 514 |
-
"Shows which features actually drive predictions.</div>",
|
| 515 |
-
unsafe_allow_html=True)
|
| 516 |
-
st.markdown("<hr>", unsafe_allow_html=True)
|
| 517 |
-
|
| 518 |
-
if shap_df.empty:
|
| 519 |
-
st.error("shap_values.parquet not found. Run run_shap.py first.")
|
| 520 |
-
st.stop()
|
| 521 |
-
|
| 522 |
-
mean_shap = shap_df.abs().mean().sort_values(ascending=False)
|
| 523 |
-
|
| 524 |
-
def feat_color(name):
|
| 525 |
-
if name.startswith("rag_"): return "#64b5f6"
|
| 526 |
-
if name.startswith("mgmt_"): return "#66bb6a"
|
| 527 |
-
if name.startswith("qa_"): return "#ffa726"
|
| 528 |
-
return "#ab47bc"
|
| 529 |
-
|
| 530 |
-
def feat_group(name):
|
| 531 |
-
if name.startswith("rag_"): return "RAG Features"
|
| 532 |
-
if name.startswith("mgmt_"): return "Management FinBERT"
|
| 533 |
-
if name.startswith("qa_"): return "QA FinBERT"
|
| 534 |
-
return "Other"
|
| 535 |
-
|
| 536 |
-
st.markdown("<div class='section-header'>Top 20 Features by Mean |SHAP|</div>",
|
| 537 |
-
unsafe_allow_html=True)
|
| 538 |
-
top20 = mean_shap.head(20)[::-1]
|
| 539 |
-
fig = go.Figure(go.Bar(
|
| 540 |
-
y=top20.index, x=top20.values, orientation="h",
|
| 541 |
-
marker_color=[feat_color(n) for n in top20.index],
|
| 542 |
-
text=[f"{v:.4f}" for v in top20.values],
|
| 543 |
-
textposition="outside", textfont=dict(size=10),
|
| 544 |
-
hovertemplate="<b>%{y}</b><br>Mean |SHAP|: %{x:.4f}<extra></extra>",
|
| 545 |
-
))
|
| 546 |
-
fig.update_layout(**L(
|
| 547 |
-
height=520,
|
| 548 |
-
title="Feature Importance
|
| 549 |
-
xaxis=dict(title="Mean |SHAP Value|"),
|
| 550 |
-
yaxis=dict(title=""),
|
| 551 |
-
))
|
| 552 |
-
st.plotly_chart(fig, use_container_width=True)
|
| 553 |
-
|
| 554 |
-
col1, col2 = st.columns(2)
|
| 555 |
-
|
| 556 |
-
with col1:
|
| 557 |
-
st.markdown("<div class='section-header'>Importance by Feature Group</div>",
|
| 558 |
-
unsafe_allow_html=True)
|
| 559 |
-
gs = (mean_shap.reset_index()
|
| 560 |
-
.rename(columns={"index":"feature", 0:"shap"}))
|
| 561 |
-
gs.columns = ["feature","shap"]
|
| 562 |
-
gs["group"] = gs["feature"].apply(feat_group)
|
| 563 |
-
gt = gs.groupby("group")["shap"].sum()
|
| 564 |
-
|
| 565 |
-
fig2 = go.Figure(go.Pie(
|
| 566 |
-
labels=gt.index, values=gt.values, hole=0.55,
|
| 567 |
-
marker=dict(colors=["#64b5f6","#66bb6a","#ffa726","#ab47bc"]),
|
| 568 |
-
textinfo="label+percent", textfont=dict(size=12),
|
| 569 |
-
hovertemplate="<b>%{label}</b><br>Total SHAP: %{value:.4f}<br>%{percent}<extra></extra>",
|
| 570 |
-
))
|
| 571 |
-
fig2.update_layout(**L(
|
| 572 |
-
height=320, showlegend=False,
|
| 573 |
-
annotations=[dict(text="SHAP<br>Groups", x=0.5, y=0.5,
|
| 574 |
-
font_size=13, showarrow=False,
|
| 575 |
-
font_color="#b0bec5")],
|
| 576 |
-
))
|
| 577 |
-
st.plotly_chart(fig2, use_container_width=True)
|
| 578 |
-
|
| 579 |
-
with col2:
|
| 580 |
-
st.markdown("<div class='section-header'>SHAP vs Correlation with Target</div>",
|
| 581 |
-
unsafe_allow_html=True)
|
| 582 |
-
if not fm.empty and "target_5d_up" in fm.columns:
|
| 583 |
-
feat_cols = [c for c in shap_df.columns if c in fm.columns]
|
| 584 |
-
corrs = fm[feat_cols+["target_5d_up"]].corr()["target_5d_up"].drop("target_5d_up")
|
| 585 |
-
cdf = pd.DataFrame({
|
| 586 |
-
"feature": corrs.index,
|
| 587 |
-
"shap": mean_shap.reindex(corrs.index).fillna(0).values,
|
| 588 |
-
"corr": corrs.values,
|
| 589 |
-
"group": [feat_group(f) for f in corrs.index],
|
| 590 |
-
})
|
| 591 |
-
cmap = {
|
| 592 |
-
"RAG Features": "#64b5f6",
|
| 593 |
-
"Management FinBERT": "#66bb6a",
|
| 594 |
-
"QA FinBERT": "#ffa726",
|
| 595 |
-
"Other": "#ab47bc",
|
| 596 |
-
}
|
| 597 |
-
fig3 = px.scatter(
|
| 598 |
-
cdf, x="corr", y="shap", color="group",
|
| 599 |
-
color_discrete_map=cmap, hover_data=["feature"],
|
| 600 |
-
labels={"corr":"Pearson Corr with Target",
|
| 601 |
-
"shap":"Mean |SHAP Value|"},
|
| 602 |
-
height=320,
|
| 603 |
-
)
|
| 604 |
-
fig3.add_vline(x=0, line_dash="dash", line_color="#546e7a")
|
| 605 |
-
fig3.update_layout(**L(
|
| 606 |
-
title="SHAP Importance vs Linear Correlation",
|
| 607 |
-
showlegend=False,
|
| 608 |
-
))
|
| 609 |
-
st.plotly_chart(fig3, use_container_width=True)
|
| 610 |
-
|
| 611 |
-
st.markdown("<div class='section-header'>Feature Insights</div>",
|
| 612 |
-
unsafe_allow_html=True)
|
| 613 |
-
insights = [
|
| 614 |
-
("
|
| 615 |
-
"Proportion of negative sentences in analyst Q&A. When analysts "
|
| 616 |
-
"push back hard, it signals market-moving information that management "
|
| 617 |
-
"tried to downplay."),
|
| 618 |
-
("
|
| 619 |
-
"Volatility of management's sentence-level sentiment. Inconsistent "
|
| 620 |
-
"messaging
|
| 621 |
-
"price moves."),
|
| 622 |
-
("
|
| 623 |
-
"Length of the Q&A section. Longer Q&A sessions indicate "
|
| 624 |
-
"more analyst scrutiny, which correlates with uncertainty about "
|
| 625 |
-
"the quarter's results."),
|
| 626 |
-
("
|
| 627 |
-
"Neutral sentiment ratio in management remarks. Deliberately neutral "
|
| 628 |
-
"language can mask very good or very bad news
|
| 629 |
-
("
|
| 630 |
-
"Semantic similarity of the guidance section to specific numerical "
|
| 631 |
-
"guidance queries. More relevant guidance sections contain concrete "
|
| 632 |
-
"targets that markets react to more strongly."),
|
| 633 |
-
]
|
| 634 |
-
cols = st.columns(len(insights))
|
| 635 |
-
for col, (title, body) in zip(cols, insights):
|
| 636 |
-
col.markdown(f"""
|
| 637 |
-
<div class='pipeline-step' style='text-align:left;height:190px;'>
|
| 638 |
-
<div style='font-size:0.82rem;font-weight:700;color:#64b5f6;
|
| 639 |
-
margin-bottom:8px;'>{title}</div>
|
| 640 |
-
<div style='font-size:0.76rem;color:#8892b0;line-height:1.6;'>
|
| 641 |
-
{body}</div>
|
| 642 |
-
</div>""", unsafe_allow_html=True)
|
| 643 |
-
|
| 644 |
-
|
| 645 |
-
#
|
| 646 |
-
# PAGE 4
|
| 647 |
-
#
|
| 648 |
-
|
| 649 |
-
elif page == "
|
| 650 |
-
bt = load_backtest()
|
| 651 |
-
|
| 652 |
-
st.markdown("<div class='hero-title' style='font-size:2rem;'>Backtest Results</div>",
|
| 653 |
-
unsafe_allow_html=True)
|
| 654 |
-
st.markdown("<div class='hero-sub'>Long-short quartile portfolio. "
|
| 655 |
-
"Long top-25% predicted stocks, short bottom-25%. "
|
| 656 |
-
"5-day holding period. 10bps round-trip transaction cost.</div>",
|
| 657 |
-
unsafe_allow_html=True)
|
| 658 |
-
st.markdown("<hr>", unsafe_allow_html=True)
|
| 659 |
-
|
| 660 |
-
if bt.empty:
|
| 661 |
-
st.error("backtest_results.csv not found. Run Stage 4 first.")
|
| 662 |
-
st.stop()
|
| 663 |
-
|
| 664 |
-
bt = bt.sort_values(["year","quarter"]).reset_index(drop=True)
|
| 665 |
-
bt["period"] = bt["year"].astype(str) + "-Q" + bt["quarter"].astype(str)
|
| 666 |
-
rets = bt["net_ret"]
|
| 667 |
-
cum = (1 + rets).cumprod()
|
| 668 |
-
peak = cum.cummax()
|
| 669 |
-
dd = (cum - peak) / peak
|
| 670 |
-
|
| 671 |
-
n_yrs = len(bt) / 4
|
| 672 |
-
ann_ret = float((1 + rets).prod() ** (1/n_yrs) - 1)
|
| 673 |
-
ann_vol = float(rets.std() * np.sqrt(4))
|
| 674 |
-
sharpe = ann_ret / ann_vol if ann_vol != 0 else 0.0
|
| 675 |
-
max_dd = float(dd.min())
|
| 676 |
-
hit = float((rets > 0).mean())
|
| 677 |
-
|
| 678 |
-
c1,c2,c3,c4,c5 = st.columns(5)
|
| 679 |
-
metric_card(c1, f"{ann_ret*100:.2f}%", "Ann. Return",
|
| 680 |
-
"After TC", "pos" if ann_ret > 0 else "neg")
|
| 681 |
-
metric_card(c2, f"{sharpe:.3f}", "Sharpe Ratio",
|
| 682 |
-
">1.0 = excellent", "pos" if sharpe > 0 else "neg")
|
| 683 |
-
metric_card(c3, f"{max_dd*100:.2f}%", "Max Drawdown",
|
| 684 |
-
"Peak-to-trough", "neg")
|
| 685 |
-
metric_card(c4, f"{hit*100:.0f}%", "Win Rate",
|
| 686 |
-
"Profitable quarters", "pos" if hit > 0.5 else "neg")
|
| 687 |
-
metric_card(c5, str(len(bt)), "Quarters Tested",
|
| 688 |
-
"
|
| 689 |
-
|
| 690 |
-
st.markdown("<br>", unsafe_allow_html=True)
|
| 691 |
-
st.markdown("<div class='section-header'>Equity Curve</div>",
|
| 692 |
-
unsafe_allow_html=True)
|
| 693 |
-
|
| 694 |
-
fig = make_subplots(rows=2, cols=1, row_heights=[0.72,0.28],
|
| 695 |
-
shared_xaxes=True, vertical_spacing=0.04)
|
| 696 |
-
fig.add_trace(go.Scatter(
|
| 697 |
-
x=bt["period"], y=cum.values,
|
| 698 |
-
mode="lines+markers",
|
| 699 |
-
line=dict(color="#64b5f6", width=2.5),
|
| 700 |
-
marker=dict(size=7),
|
| 701 |
-
fill="tozeroy", fillcolor="rgba(100,181,246,0.06)",
|
| 702 |
-
name="Cumulative Return",
|
| 703 |
-
hovertemplate="<b>%{x}</b><br>Cumulative: %{y:.4f}<extra></extra>",
|
| 704 |
-
), row=1, col=1)
|
| 705 |
-
fig.add_hline(y=1.0, line_dash="dash", line_color="#546e7a",
|
| 706 |
-
line_width=1, row=1, col=1)
|
| 707 |
-
fig.add_trace(go.Bar(
|
| 708 |
-
x=bt["period"], y=dd.values*100,
|
| 709 |
-
marker_color="#ef5350", opacity=0.7, name="Drawdown %",
|
| 710 |
-
hovertemplate="<b>%{x}</b><br>Drawdown: %{y:.2f}%<extra></extra>",
|
| 711 |
-
), row=2, col=1)
|
| 712 |
-
|
| 713 |
-
fig.update_layout(
|
| 714 |
-
paper_bgcolor="#0d1117", plot_bgcolor="#0a0e1a",
|
| 715 |
-
font=dict(color="#b0bec5"),
|
| 716 |
-
margin=dict(l=50,r=30,t=50,b=80),
|
| 717 |
-
title="FinSight Long-Short Strategy
|
| 718 |
-
height=500, showlegend=False,
|
| 719 |
-
xaxis2=dict(tickangle=45, tickfont_size=10,
|
| 720 |
-
gridcolor="#1a2035", linecolor="#1e2433"),
|
| 721 |
-
yaxis=dict(title="Cumulative Return",
|
| 722 |
-
gridcolor="#1a2035", linecolor="#1e2433"),
|
| 723 |
-
yaxis2=dict(title="DD %",
|
| 724 |
-
gridcolor="#1a2035", linecolor="#1e2433"),
|
| 725 |
-
)
|
| 726 |
-
fig.update_xaxes(gridcolor="#1a2035", linecolor="#1e2433")
|
| 727 |
-
st.plotly_chart(fig, use_container_width=True)
|
| 728 |
-
|
| 729 |
-
col1, col2 = st.columns(2)
|
| 730 |
-
|
| 731 |
-
with col1:
|
| 732 |
-
st.markdown("<div class='subsection'>Quarterly Net Returns</div>",
|
| 733 |
-
unsafe_allow_html=True)
|
| 734 |
-
q_colors = ["#66bb6a" if r > 0 else "#ef5350" for r in rets]
|
| 735 |
-
fig2 = go.Figure(go.Bar(
|
| 736 |
-
x=bt["period"], y=rets.values*100,
|
| 737 |
-
marker_color=q_colors,
|
| 738 |
-
text=[f"{v*100:.2f}%" for v in rets.values],
|
| 739 |
-
textposition="outside", textfont=dict(size=9),
|
| 740 |
-
hovertemplate="<b>%{x}</b><br>Net Return: %{y:.2f}%<extra></extra>",
|
| 741 |
-
))
|
| 742 |
-
fig2.add_hline(y=0, line_color="#546e7a", line_width=1)
|
| 743 |
-
fig2.update_layout(**L(
|
| 744 |
-
height=320,
|
| 745 |
-
title="Net Return per Quarter (after 10bps TC)",
|
| 746 |
-
xaxis=dict(tickangle=45, tickfont=dict(size=9)),
|
| 747 |
-
yaxis=dict(title="Net Return (%)"),
|
| 748 |
-
))
|
| 749 |
-
st.plotly_chart(fig2, use_container_width=True)
|
| 750 |
-
|
| 751 |
-
with col2:
|
| 752 |
-
st.markdown("<div class='subsection'>Long vs Short Leg Hit Rate</div>",
|
| 753 |
-
unsafe_allow_html=True)
|
| 754 |
-
fig3 = go.Figure()
|
| 755 |
-
fig3.add_trace(go.Scatter(
|
| 756 |
-
x=bt["period"], y=bt["long_hit"],
|
| 757 |
-
mode="lines+markers",
|
| 758 |
-
line=dict(color="#66bb6a", width=2),
|
| 759 |
-
marker=dict(size=7), name="Long Leg",
|
| 760 |
-
))
|
| 761 |
-
fig3.add_trace(go.Scatter(
|
| 762 |
-
x=bt["period"], y=bt["short_hit"],
|
| 763 |
-
mode="lines+markers",
|
| 764 |
-
line=dict(color="#ef5350", width=2),
|
| 765 |
-
marker=dict(size=7), name="Short Leg",
|
| 766 |
-
))
|
| 767 |
-
fig3.add_hline(y=0.5, line_dash="dot", line_color="#546e7a")
|
| 768 |
-
fig3.update_layout(**L(
|
| 769 |
-
height=320,
|
| 770 |
-
title="Direction Accuracy
|
| 771 |
-
xaxis=dict(tickangle=45, tickfont=dict(size=9)),
|
| 772 |
-
yaxis=dict(title="Hit Rate"),
|
| 773 |
-
legend=dict(bgcolor="#0d1117", bordercolor="#1e2433"),
|
| 774 |
-
))
|
| 775 |
-
st.plotly_chart(fig3, use_container_width=True)
|
| 776 |
-
|
| 777 |
-
st.markdown("<div class='section-header'>Quarterly Breakdown</div>",
|
| 778 |
-
unsafe_allow_html=True)
|
| 779 |
-
disp = bt[["period","net_ret","long_ret","short_ret",
|
| 780 |
-
"long_hit","short_hit","n_stocks","q_size"]].copy()
|
| 781 |
-
disp.columns = ["Quarter","Net Ret","Long Ret","Short Ret",
|
| 782 |
-
"Long Hit","Short Hit","N Stocks","Leg Size"]
|
| 783 |
-
|
| 784 |
-
def color_ret(val):
|
| 785 |
-
if isinstance(val, float):
|
| 786 |
-
if val > 0: return "color: #66bb6a"
|
| 787 |
-
if val < 0: return "color: #ef5350"
|
| 788 |
-
return ""
|
| 789 |
-
|
| 790 |
-
st.dataframe(
|
| 791 |
-
disp.style.applymap(color_ret,
|
| 792 |
-
subset=["Net Ret","Long Ret","Short Ret"])
|
| 793 |
-
.format({c:"{:.4f}" for c in
|
| 794 |
-
["Net Ret","Long Ret","Short Ret",
|
| 795 |
-
"Long Hit","Short Hit"]}),
|
| 796 |
-
use_container_width=True, hide_index=True,
|
| 797 |
-
)
|
| 798 |
-
|
| 799 |
-
st.markdown("""
|
| 800 |
-
<div class='insight-box'>
|
| 801 |
-
<strong>Context:</strong> A Sharpe of -0.81 with a 5-day holding period
|
| 802 |
-
is consistent with academic literature on post-earnings announcement
|
| 803 |
-
drift (Chan et al. 1996, Lerman et al. 2008). The signal exists
|
| 804 |
-
(IC=0.0198) but is too weak to survive round-trip transaction costs at
|
| 805 |
-
this frequency. Extending to 20-day holding periods is the natural
|
| 806 |
-
next step.
|
| 807 |
-
</div>
|
| 808 |
-
""", unsafe_allow_html=True)
|
| 809 |
-
|
| 810 |
-
|
| 811 |
-
#
|
| 812 |
-
# PAGE 5
|
| 813 |
-
#
|
| 814 |
-
|
| 815 |
-
elif page == "
|
| 816 |
-
fm = load_feature_matrix()
|
| 817 |
-
|
| 818 |
-
st.markdown("<div class='hero-title' style='font-size:2rem;'>Transcript Explorer</div>",
|
| 819 |
-
unsafe_allow_html=True)
|
| 820 |
-
st.markdown("<div class='hero-sub'>Browse sentiment profiles for any company "
|
| 821 |
-
"and quarter in the dataset.</div>",
|
| 822 |
-
unsafe_allow_html=True)
|
| 823 |
-
st.markdown("<hr>", unsafe_allow_html=True)
|
| 824 |
-
|
| 825 |
-
if fm.empty:
|
| 826 |
-
st.error("Feature matrix not found.")
|
| 827 |
-
st.stop()
|
| 828 |
-
|
| 829 |
-
col1, col2, col3 = st.columns([2,1,1])
|
| 830 |
-
with col1:
|
| 831 |
-
all_tickers = sorted(fm["ticker"].dropna().unique())
|
| 832 |
-
default_idx = all_tickers.index("AAPL") if "AAPL" in all_tickers else 0
|
| 833 |
-
ticker = st.selectbox("Select Ticker", all_tickers, index=default_idx)
|
| 834 |
-
with col2:
|
| 835 |
-
years = sorted(fm["year"].unique(), reverse=True)
|
| 836 |
-
year = st.selectbox("Year", years)
|
| 837 |
-
with col3:
|
| 838 |
-
quarters = sorted(fm[fm["year"]==year]["quarter"].unique())
|
| 839 |
-
quarter = st.selectbox("Quarter", quarters)
|
| 840 |
-
|
| 841 |
-
row = fm[(fm["ticker"]==ticker) &
|
| 842 |
-
(fm["year"]==year) &
|
| 843 |
-
(fm["quarter"]==quarter)]
|
| 844 |
-
|
| 845 |
-
if row.empty:
|
| 846 |
-
st.warning("No data for this combination.")
|
| 847 |
-
st.stop()
|
| 848 |
-
|
| 849 |
-
row = row.iloc[0]
|
| 850 |
-
|
| 851 |
-
ret_5d = row.get("ret_5d", 0)
|
| 852 |
-
target = int(row.get("target_5d_up", 0))
|
| 853 |
-
st.markdown(f"""
|
| 854 |
-
<div style='display:flex;align-items:center;gap:16px;margin:16px 0;'>
|
| 855 |
-
<div style='font-size:2rem;font-weight:800;color:#64b5f6;'>{ticker}</div>
|
| 856 |
-
<div style='font-size:1rem;color:#8892b0;'>{int(year)} Q{int(quarter)}</div>
|
| 857 |
-
<div class='badge badge-{"green" if target==1 else "red"}'>
|
| 858 |
-
{"
|
| 859 |
-
</div>
|
| 860 |
-
<div class='badge badge-blue'>
|
| 861 |
-
5d Return: {float(ret_5d)*100:.2f}%
|
| 862 |
-
</div>
|
| 863 |
-
</div>
|
| 864 |
-
""", unsafe_allow_html=True)
|
| 865 |
-
|
| 866 |
-
left, right = st.columns([1.2, 1])
|
| 867 |
-
|
| 868 |
-
with left:
|
| 869 |
-
st.markdown("<div class='subsection'>Sentiment Breakdown</div>",
|
| 870 |
-
unsafe_allow_html=True)
|
| 871 |
-
cats = ["Mgmt Positive","Mgmt Neutral","Mgmt Negative",
|
| 872 |
-
"QA Positive","QA Neutral","QA Negative"]
|
| 873 |
-
vals = [
|
| 874 |
-
float(row.get("mgmt_mean_pos", 0) or 0),
|
| 875 |
-
float(row.get("mgmt_mean_neu", 0) or 0),
|
| 876 |
-
float(row.get("mgmt_mean_neg", 0) or 0),
|
| 877 |
-
float(row.get("qa_mean_pos", 0) or 0),
|
| 878 |
-
float(row.get("qa_mean_neu", 0) or 0),
|
| 879 |
-
float(row.get("qa_mean_neg", 0) or 0),
|
| 880 |
-
]
|
| 881 |
-
vals_c = vals + [vals[0]]
|
| 882 |
-
cats_c = cats + [cats[0]]
|
| 883 |
-
fig = go.Figure(go.Scatterpolar(
|
| 884 |
-
r=vals_c, theta=cats_c, fill="toself",
|
| 885 |
-
fillcolor="rgba(100,181,246,0.15)",
|
| 886 |
-
line=dict(color="#64b5f6", width=2), name=ticker,
|
| 887 |
-
))
|
| 888 |
-
fig.update_layout(
|
| 889 |
-
paper_bgcolor="#0d1117",
|
| 890 |
-
font=dict(color="#b0bec5"),
|
| 891 |
-
polar=dict(
|
| 892 |
-
bgcolor="#0d1117",
|
| 893 |
-
radialaxis=dict(visible=True, range=[0,1],
|
| 894 |
-
gridcolor="#1a2035", linecolor="#1a2035",
|
| 895 |
-
tickfont=dict(size=9, color="#546e7a")),
|
| 896 |
-
angularaxis=dict(gridcolor="#1a2035", linecolor="#1a2035",
|
| 897 |
-
tickfont=dict(size=10, color="#b0bec5")),
|
| 898 |
-
),
|
| 899 |
-
height=360, showlegend=False,
|
| 900 |
-
title=f"{ticker}
|
| 901 |
-
margin=dict(l=40,r=40,t=50,b=40),
|
| 902 |
-
)
|
| 903 |
-
st.plotly_chart(fig, use_container_width=True)
|
| 904 |
-
|
| 905 |
-
with right:
|
| 906 |
-
st.markdown("<div class='subsection'>Feature Scores</div>",
|
| 907 |
-
unsafe_allow_html=True)
|
| 908 |
-
|
| 909 |
-
def score_bar(label, val, invert=False):
|
| 910 |
-
if val is None or pd.isna(val):
|
| 911 |
-
return
|
| 912 |
-
v = float(val)
|
| 913 |
-
pct = max(0, min(1, v)) * 100
|
| 914 |
-
color = "#ef5350" if invert else "#64b5f6"
|
| 915 |
-
st.markdown(f"""
|
| 916 |
-
<div style='margin:8px 0;'>
|
| 917 |
-
<div style='display:flex;justify-content:space-between;
|
| 918 |
-
font-size:0.8rem;color:#8892b0;margin-bottom:3px;'>
|
| 919 |
-
<span>{label}</span><span>{v:.3f}</span>
|
| 920 |
-
</div>
|
| 921 |
-
<div style='background:#1a2035;border-radius:4px;height:6px;'>
|
| 922 |
-
<div style='background:{color};width:{pct:.0f}%;
|
| 923 |
-
height:6px;border-radius:4px;'></div>
|
| 924 |
-
</div>
|
| 925 |
-
</div>""", unsafe_allow_html=True)
|
| 926 |
-
|
| 927 |
-
score_bar("Mgmt Net Sentiment", row.get("mgmt_net_sentiment"))
|
| 928 |
-
score_bar("QA Net Sentiment", row.get("qa_net_sentiment"))
|
| 929 |
-
score_bar("Mgmt Negativity", row.get("mgmt_neg_ratio"), invert=True)
|
| 930 |
-
score_bar("QA Negativity", row.get("qa_neg_ratio"), invert=True)
|
| 931 |
-
score_bar("Guidance Specificity", row.get("rag_guidance_specificity_score"))
|
| 932 |
-
score_bar("Mgmt Confidence", row.get("rag_management_confidence_score"))
|
| 933 |
-
score_bar("Forward Looking", row.get("rag_forward_looking_score"))
|
| 934 |
-
score_bar("New Risks", row.get("rag_new_risks_score"), invert=True)
|
| 935 |
-
score_bar("Cost Pressure", row.get("rag_cost_pressure_score"), invert=True)
|
| 936 |
-
|
| 937 |
-
# Historical trend
|
| 938 |
-
st.markdown(f"<div class='section-header'>{ticker}
|
| 939 |
-
unsafe_allow_html=True)
|
| 940 |
-
|
| 941 |
-
td = fm[fm["ticker"]==ticker].copy().sort_values(["year","quarter"])
|
| 942 |
-
td["period"] = td["year"].astype(str) + "-Q" + td["quarter"].astype(str)
|
| 943 |
-
|
| 944 |
-
if len(td) > 1:
|
| 945 |
-
fig2 = go.Figure()
|
| 946 |
-
for col_name, label, color in [
|
| 947 |
-
("mgmt_net_sentiment", "Mgmt Sentiment", "#66bb6a"),
|
| 948 |
-
("qa_net_sentiment", "QA Sentiment", "#64b5f6"),
|
| 949 |
-
("mgmt_neg_ratio", "Mgmt Negativity","#ef5350"),
|
| 950 |
-
]:
|
| 951 |
-
if col_name in td.columns:
|
| 952 |
-
fig2.add_trace(go.Scatter(
|
| 953 |
-
x=td["period"], y=td[col_name],
|
| 954 |
-
mode="lines+markers", name=label,
|
| 955 |
-
line=dict(color=color, width=2),
|
| 956 |
-
marker=dict(size=6),
|
| 957 |
-
hovertemplate=f"<b>{label}</b><br>%{{x}}<br>%{{y:.3f}}<extra></extra>",
|
| 958 |
-
))
|
| 959 |
-
|
| 960 |
-
# Mark selected quarter
|
| 961 |
-
cur_period = f"{int(year)}-Q{int(quarter)}"
|
| 962 |
-
if cur_period in td["period"].values:
|
| 963 |
-
cur_idx = td[td["period"]==cur_period].index[0]
|
| 964 |
-
cur_pos = td["period"].tolist().index(cur_period)
|
| 965 |
-
fig2.add_vrect(
|
| 966 |
-
x0=cur_period, x1=cur_period,
|
| 967 |
-
line_dash="dot", line_color="#ffa726", line_width=2,
|
| 968 |
-
)
|
| 969 |
-
|
| 970 |
-
fig2.add_hline(y=0, line_dash="dash", line_color="#546e7a", line_width=0.8)
|
| 971 |
-
fig2.update_layout(**L(
|
| 972 |
-
height=320,
|
| 973 |
-
title=f"{ticker}
|
| 974 |
-
xaxis=dict(tickangle=45, tickfont=dict(size=9)),
|
| 975 |
-
yaxis=dict(title="Score"),
|
| 976 |
-
legend=dict(bgcolor="#0d1117", bordercolor="#1e2433"),
|
| 977 |
-
))
|
| 978 |
-
st.plotly_chart(fig2, use_container_width=True)
|
| 979 |
-
|
| 980 |
-
# Scatter: sentiment vs return
|
| 981 |
-
if "ret_5d" in td.columns and "mgmt_net_sentiment" in td.columns:
|
| 982 |
-
st.markdown("<div class='subsection'>Sentiment vs 5-Day Return</div>",
|
| 983 |
-
unsafe_allow_html=True)
|
| 984 |
-
tc = td.dropna(subset=["ret_5d","mgmt_net_sentiment"]).copy()
|
| 985 |
-
tc["ret_pct"] = tc["ret_5d"].astype(float) * 100
|
| 986 |
-
sc_colors = ["#66bb6a" if r > 0 else "#ef5350"
|
| 987 |
-
for r in tc["ret_pct"]]
|
| 988 |
-
fig3 = go.Figure(go.Scatter(
|
| 989 |
-
x=tc["mgmt_net_sentiment"].astype(float),
|
| 990 |
-
y=tc["ret_pct"],
|
| 991 |
-
mode="markers+text",
|
| 992 |
-
text=tc["period"],
|
| 993 |
-
textposition="top center",
|
| 994 |
-
textfont=dict(size=8, color="#546e7a"),
|
| 995 |
-
marker=dict(color=sc_colors, size=9, opacity=0.85),
|
| 996 |
-
hovertemplate=(
|
| 997 |
-
"<b>%{text}</b><br>"
|
| 998 |
-
"Mgmt Sentiment: %{x:.3f}<br>"
|
| 999 |
-
"5d Return: %{y:.2f}%<extra></extra>"
|
| 1000 |
-
),
|
| 1001 |
-
))
|
| 1002 |
-
fig3.add_vline(x=0, line_dash="dash", line_color="#546e7a")
|
| 1003 |
-
fig3.add_hline(y=0, line_dash="dash", line_color="#546e7a")
|
| 1004 |
-
fig3.update_layout(**L(
|
| 1005 |
-
height=340,
|
| 1006 |
-
title=f"{ticker}
|
| 1007 |
-
xaxis=dict(title="Management Net Sentiment"),
|
| 1008 |
-
yaxis=dict(title="5-Day Return (%)"),
|
| 1009 |
-
))
|
| 1010 |
-
st.plotly_chart(fig3, use_container_width=True)
|
| 1011 |
-
else:
|
| 1012 |
-
st.info("Not enough historical data for this ticker.")
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
FinSight Dashboard — LLM-Powered Earnings Intelligence
|
| 3 |
+
Stage 5: Production Streamlit Dashboard
|
| 4 |
+
|
| 5 |
+
Pages:
|
| 6 |
+
1. Overview — project summary, pipeline, key stats
|
| 7 |
+
2. Model Results — walk-forward IC/AUC comparison, year-by-year
|
| 8 |
+
3. SHAP Analysis — interactive feature importance
|
| 9 |
+
4. Backtest — equity curve, drawdown, quarterly P&L
|
| 10 |
+
5. Explorer — browse transcripts with live sentiment
|
| 11 |
+
|
| 12 |
+
Run:
|
| 13 |
+
streamlit run src/dashboard/app.py
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
import sys
|
| 17 |
+
from pathlib import Path
|
| 18 |
+
import warnings
|
| 19 |
+
warnings.filterwarnings("ignore")
|
| 20 |
+
|
| 21 |
+
import numpy as np
|
| 22 |
+
import pandas as pd
|
| 23 |
+
import plotly.express as px
|
| 24 |
+
import plotly.graph_objects as go
|
| 25 |
+
from plotly.subplots import make_subplots
|
| 26 |
+
import streamlit as st
|
| 27 |
+
|
| 28 |
+
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
| 29 |
+
from config import PROCESSED_DIR, EXPERIMENTS_DIR
|
| 30 |
+
|
| 31 |
+
# ── Page config ────────────────────────────────────────────────────────────────
|
| 32 |
+
|
| 33 |
+
st.set_page_config(
|
| 34 |
+
page_title="FinSight | Earnings Intelligence",
|
| 35 |
+
page_icon="📈",
|
| 36 |
+
layout="wide",
|
| 37 |
+
initial_sidebar_state="expanded",
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
# ── Global CSS ─────────────────────────────────────────────────────────────────
|
| 41 |
+
|
| 42 |
+
st.markdown("""
|
| 43 |
+
<style>
|
| 44 |
+
[data-testid="stAppViewContainer"] { background: #0a0e1a; color: #e8eaf6; }
|
| 45 |
+
[data-testid="stSidebar"] {
|
| 46 |
+
background: #0d1117;
|
| 47 |
+
border-right: 1px solid #1e2433;
|
| 48 |
+
}
|
| 49 |
+
[data-testid="stSidebar"] .stRadio label {
|
| 50 |
+
color: #8892b0 !important;
|
| 51 |
+
font-size: 0.9rem;
|
| 52 |
+
}
|
| 53 |
+
.metric-card {
|
| 54 |
+
background: linear-gradient(135deg, #0d1117 0%, #161b27 100%);
|
| 55 |
+
border: 1px solid #1e2d4a;
|
| 56 |
+
border-radius: 12px;
|
| 57 |
+
padding: 20px 24px;
|
| 58 |
+
text-align: center;
|
| 59 |
+
transition: border-color 0.2s;
|
| 60 |
+
}
|
| 61 |
+
.metric-card:hover { border-color: #3d5a99; }
|
| 62 |
+
.metric-value { font-size: 2rem; font-weight: 700; color: #64b5f6; line-height: 1.1; }
|
| 63 |
+
.metric-label {
|
| 64 |
+
font-size: 0.78rem; color: #8892b0;
|
| 65 |
+
text-transform: uppercase; letter-spacing: 1px; margin-top: 6px;
|
| 66 |
+
}
|
| 67 |
+
.metric-delta { font-size: 0.82rem; margin-top: 4px; }
|
| 68 |
+
.delta-pos { color: #66bb6a; }
|
| 69 |
+
.delta-neg { color: #ef5350; }
|
| 70 |
+
.delta-neu { color: #8892b0; }
|
| 71 |
+
.section-header {
|
| 72 |
+
font-size: 1.4rem; font-weight: 700; color: #e8eaf6;
|
| 73 |
+
border-left: 4px solid #3d5a99; padding-left: 12px;
|
| 74 |
+
margin: 28px 0 16px 0;
|
| 75 |
+
}
|
| 76 |
+
.subsection { font-size: 1rem; font-weight: 600; color: #8892b0; margin: 16px 0 8px 0; }
|
| 77 |
+
.hero-title {
|
| 78 |
+
font-size: 2.8rem; font-weight: 800;
|
| 79 |
+
background: linear-gradient(90deg, #64b5f6, #7c4dff, #64b5f6);
|
| 80 |
+
background-size: 200%;
|
| 81 |
+
-webkit-background-clip: text; -webkit-text-fill-color: transparent;
|
| 82 |
+
line-height: 1.2;
|
| 83 |
+
}
|
| 84 |
+
.hero-sub {
|
| 85 |
+
font-size: 1.1rem; color: #8892b0; margin-top: 8px;
|
| 86 |
+
max-width: 680px; line-height: 1.6;
|
| 87 |
+
}
|
| 88 |
+
.pipeline-step {
|
| 89 |
+
background: #0d1117; border: 1px solid #1e2433;
|
| 90 |
+
border-radius: 10px; padding: 14px 16px; text-align: center;
|
| 91 |
+
}
|
| 92 |
+
.pipeline-icon { font-size: 1.6rem; }
|
| 93 |
+
.pipeline-label { font-size: 0.78rem; color: #8892b0; margin-top: 4px; }
|
| 94 |
+
.pipeline-title { font-size: 0.9rem; font-weight: 600; color: #cfd8dc; }
|
| 95 |
+
.insight-box {
|
| 96 |
+
background: #0d1117; border-left: 3px solid #3d5a99;
|
| 97 |
+
border-radius: 0 8px 8px 0; padding: 12px 16px; margin: 8px 0;
|
| 98 |
+
font-size: 0.88rem; color: #b0bec5; line-height: 1.6;
|
| 99 |
+
}
|
| 100 |
+
.insight-box strong { color: #64b5f6; }
|
| 101 |
+
.badge {
|
| 102 |
+
display: inline-block; padding: 2px 10px; border-radius: 20px;
|
| 103 |
+
font-size: 0.72rem; font-weight: 600; margin: 2px;
|
| 104 |
+
}
|
| 105 |
+
.badge-blue { background: #1a237e22; color: #64b5f6; border: 1px solid #1a237e; }
|
| 106 |
+
.badge-green { background: #1b5e2022; color: #66bb6a; border: 1px solid #1b5e20; }
|
| 107 |
+
.badge-red { background: #b71c1c22; color: #ef9a9a; border: 1px solid #b71c1c; }
|
| 108 |
+
hr { border-color: #1e2433 !important; }
|
| 109 |
+
::-webkit-scrollbar { width: 6px; }
|
| 110 |
+
::-webkit-scrollbar-track { background: #0a0e1a; }
|
| 111 |
+
::-webkit-scrollbar-thumb { background: #1e2d4a; border-radius: 3px; }
|
| 112 |
+
</style>
|
| 113 |
+
""", unsafe_allow_html=True)
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
# ── Layout helper — avoids duplicate xaxis/yaxis conflicts ────────────────────
|
| 117 |
+
|
| 118 |
+
BASE_LAYOUT = dict(
|
| 119 |
+
paper_bgcolor="#0d1117",
|
| 120 |
+
plot_bgcolor="#0a0e1a",
|
| 121 |
+
font=dict(color="#b0bec5", family="Inter, sans-serif"),
|
| 122 |
+
margin=dict(l=50, r=30, t=50, b=50),
|
| 123 |
+
colorway=["#64b5f6","#66bb6a","#ffa726","#ef5350","#ab47bc","#26c6da"],
|
| 124 |
+
)
|
| 125 |
+
BASE_XAXIS = dict(gridcolor="#1a2035", linecolor="#1e2433", zerolinecolor="#1e2433")
|
| 126 |
+
BASE_YAXIS = dict(gridcolor="#1a2035", linecolor="#1e2433", zerolinecolor="#1e2433")
|
| 127 |
+
|
| 128 |
+
def L(**kwargs):
|
| 129 |
+
"""
|
| 130 |
+
Merge base dark-theme layout with chart-specific overrides.
|
| 131 |
+
Merges xaxis/yaxis dicts instead of replacing them, which avoids
|
| 132 |
+
the 'multiple values for keyword argument xaxis' TypeError.
|
| 133 |
+
"""
|
| 134 |
+
out = dict(**BASE_LAYOUT)
|
| 135 |
+
if "xaxis" in kwargs:
|
| 136 |
+
out["xaxis"] = {**BASE_XAXIS, **kwargs.pop("xaxis")}
|
| 137 |
+
else:
|
| 138 |
+
out["xaxis"] = BASE_XAXIS
|
| 139 |
+
if "yaxis" in kwargs:
|
| 140 |
+
out["yaxis"] = {**BASE_YAXIS, **kwargs.pop("yaxis")}
|
| 141 |
+
else:
|
| 142 |
+
out["yaxis"] = BASE_YAXIS
|
| 143 |
+
out.update(kwargs)
|
| 144 |
+
return out
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
# ── Global helpers ─────────────────────────────────────────────────────────────
|
| 148 |
+
|
| 149 |
+
def metric_card(col, value, label, delta="", delta_type="neu"):
|
| 150 |
+
"""Render a dark-theme KPI card inside a Streamlit column."""
|
| 151 |
+
col.markdown(f"""
|
| 152 |
+
<div class='metric-card'>
|
| 153 |
+
<div class='metric-value'>{value}</div>
|
| 154 |
+
<div class='metric-label'>{label}</div>
|
| 155 |
+
<div class='metric-delta delta-{delta_type}'>{delta}</div>
|
| 156 |
+
</div>""", unsafe_allow_html=True)
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
# ── Data loaders ───────────────────────────────────────────────────────────────
|
| 160 |
+
|
| 161 |
+
@st.cache_data(show_spinner=False)
|
| 162 |
+
def load_feature_matrix():
|
| 163 |
+
p = PROCESSED_DIR / "feature_matrix.parquet"
|
| 164 |
+
return pd.read_parquet(p) if p.exists() else pd.DataFrame()
|
| 165 |
+
|
| 166 |
+
@st.cache_data(show_spinner=False)
|
| 167 |
+
def load_model_results():
|
| 168 |
+
p = EXPERIMENTS_DIR / "model_results.csv"
|
| 169 |
+
return pd.read_csv(p) if p.exists() else pd.DataFrame()
|
| 170 |
+
|
| 171 |
+
@st.cache_data(show_spinner=False)
|
| 172 |
+
def load_backtest():
|
| 173 |
+
p = EXPERIMENTS_DIR / "backtest_results.csv"
|
| 174 |
+
return pd.read_csv(p) if p.exists() else pd.DataFrame()
|
| 175 |
+
|
| 176 |
+
@st.cache_data(show_spinner=False)
|
| 177 |
+
def load_shap():
|
| 178 |
+
p = EXPERIMENTS_DIR / "shap_values.parquet"
|
| 179 |
+
return pd.read_parquet(p) if p.exists() else pd.DataFrame()
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
# ── Sidebar ────────────────────────────────────────────────────────────────────
|
| 183 |
+
|
| 184 |
+
with st.sidebar:
|
| 185 |
+
st.markdown("""
|
| 186 |
+
<div style='padding:12px 0 20px 0;'>
|
| 187 |
+
<div style='font-size:1.5rem;font-weight:800;color:#64b5f6;'>📈 FinSight</div>
|
| 188 |
+
<div style='font-size:0.75rem;color:#8892b0;margin-top:4px;'>
|
| 189 |
+
LLM-Powered Earnings Intelligence
|
| 190 |
+
</div>
|
| 191 |
+
</div>
|
| 192 |
+
""", unsafe_allow_html=True)
|
| 193 |
+
|
| 194 |
+
page = st.radio(
|
| 195 |
+
"Navigation",
|
| 196 |
+
["Overview",
|
| 197 |
+
"Model Performance",
|
| 198 |
+
"Feature Importance",
|
| 199 |
+
"Backtest Results",
|
| 200 |
+
"Transcript Explorer"],
|
| 201 |
+
label_visibility="collapsed",
|
| 202 |
+
)
|
| 203 |
+
|
| 204 |
+
st.markdown("<hr>", unsafe_allow_html=True)
|
| 205 |
+
st.markdown("""
|
| 206 |
+
<div style='font-size:0.72rem;color:#8892b0;line-height:1.8;'>
|
| 207 |
+
<b style='color:#cfd8dc;'>Stack</b><br>
|
| 208 |
+
FinBERT · ChromaDB · XGBoost<br>
|
| 209 |
+
LightGBM · SHAP · Streamlit<br><br>
|
| 210 |
+
<b style='color:#cfd8dc;'>Data</b><br>
|
| 211 |
+
14,584 earnings transcripts<br>
|
| 212 |
+
601 S&P 500 companies<br>
|
| 213 |
+
2018 – 2024<br><br>
|
| 214 |
+
<b style='color:#cfd8dc;'>Author</b><br>
|
| 215 |
+
Rajveer Singh Pall
|
| 216 |
+
</div>
|
| 217 |
+
""", unsafe_allow_html=True)
|
| 218 |
+
|
| 219 |
+
|
| 220 |
+
# ═══════════════════════════════════════════════════════════════════════════════
|
| 221 |
+
# PAGE 1 — OVERVIEW
|
| 222 |
+
# ═══════════════════════════════════════════════════════════════════════════════
|
| 223 |
+
|
| 224 |
+
if page == "Overview":
|
| 225 |
+
fm = load_feature_matrix()
|
| 226 |
+
mr = load_model_results()
|
| 227 |
+
|
| 228 |
+
st.markdown("""
|
| 229 |
+
<div style='padding:24px 0 8px 0;'>
|
| 230 |
+
<div class='hero-title'>FinSight</div>
|
| 231 |
+
<div class='hero-title' style='font-size:1.8rem;color:#7c4dff;'>
|
| 232 |
+
Earnings Intelligence System
|
| 233 |
+
</div>
|
| 234 |
+
<div class='hero-sub'>
|
| 235 |
+
An end-to-end machine learning pipeline that extracts alpha signals
|
| 236 |
+
from S&P 500 earnings call transcripts using FinBERT sentiment analysis,
|
| 237 |
+
RAG-based structured feature extraction, and walk-forward validated
|
| 238 |
+
gradient boosting models.
|
| 239 |
+
</div>
|
| 240 |
+
</div>
|
| 241 |
+
""", unsafe_allow_html=True)
|
| 242 |
+
|
| 243 |
+
st.markdown("<hr>", unsafe_allow_html=True)
|
| 244 |
+
|
| 245 |
+
best_ic = float(mr["ic"].max()) if not mr.empty else 0.0198
|
| 246 |
+
best_auc = float(mr["auc"].max()) if not mr.empty else 0.5201
|
| 247 |
+
best_hr = float(mr["hit_rate"].max()) if not mr.empty else 0.5427
|
| 248 |
+
n_rows = len(fm) if not fm.empty else 13442
|
| 249 |
+
|
| 250 |
+
c1,c2,c3,c4,c5 = st.columns(5)
|
| 251 |
+
metric_card(c1, "14,584", "Transcripts", "601 companies", "neu")
|
| 252 |
+
metric_card(c2, f"{n_rows:,}", "Training Samples", "2018–2024", "neu")
|
| 253 |
+
metric_card(c3, f"{best_ic:.4f}", "Best IC", "LightGBM", "pos")
|
| 254 |
+
metric_card(c4, f"{best_auc:.4f}","Best AUC", "XGBoost 2024", "pos")
|
| 255 |
+
metric_card(c5, f"{best_hr:.4f}", "Best Hit Rate", "Walk-forward", "pos")
|
| 256 |
+
|
| 257 |
+
st.markdown("<br>", unsafe_allow_html=True)
|
| 258 |
+
|
| 259 |
+
# Pipeline
|
| 260 |
+
st.markdown("<div class='section-header'>System Architecture</div>",
|
| 261 |
+
unsafe_allow_html=True)
|
| 262 |
+
steps = [
|
| 263 |
+
("🗄️","Stage 1","Data Ingestion", "SEC EDGAR · yfinance\n14,584 transcripts"),
|
| 264 |
+
("🧠","Stage 2","NLP Pipeline", "FinBERT · ChromaDB RAG\n34 features"),
|
| 265 |
+
("🤖","Stage 3","ML Models", "XGBoost · LightGBM\nWalk-forward CV"),
|
| 266 |
+
("📉","Stage 4","Backtesting", "Long-short strategy\n10bps TC"),
|
| 267 |
+
("🖥️","Stage 5","Dashboard", "Streamlit · Plotly\nHugging Face Spaces"),
|
| 268 |
+
]
|
| 269 |
+
cols = st.columns(len(steps))
|
| 270 |
+
for col, (icon, stage, title, desc) in zip(cols, steps):
|
| 271 |
+
col.markdown(f"""
|
| 272 |
+
<div class='pipeline-step'>
|
| 273 |
+
<div class='pipeline-icon'>{icon}</div>
|
| 274 |
+
<div class='pipeline-label'>{stage}</div>
|
| 275 |
+
<div class='pipeline-title'>{title}</div>
|
| 276 |
+
<div style='font-size:0.72rem;color:#546e7a;margin-top:4px;line-height:1.5;'>
|
| 277 |
+
{desc}</div>
|
| 278 |
+
</div>""", unsafe_allow_html=True)
|
| 279 |
+
|
| 280 |
+
st.markdown("<br>", unsafe_allow_html=True)
|
| 281 |
+
|
| 282 |
+
left, right = st.columns([1.1, 1])
|
| 283 |
+
|
| 284 |
+
with left:
|
| 285 |
+
st.markdown("<div class='section-header'>Key Findings</div>",
|
| 286 |
+
unsafe_allow_html=True)
|
| 287 |
+
for f in [
|
| 288 |
+
"<strong>Analyst negativity > management positivity.</strong> "
|
| 289 |
+
"qa_neg_ratio (SHAP=0.054) is the single strongest feature. "
|
| 290 |
+
"Analyst pushback in Q&A contains more information than prepared remarks.",
|
| 291 |
+
|
| 292 |
+
"<strong>NLP reduces prediction variance by 87%.</strong> "
|
| 293 |
+
"Baseline IC std=0.114 vs LightGBM std=0.009 — "
|
| 294 |
+
"far more consistent across years.",
|
| 295 |
+
|
| 296 |
+
"<strong>Consistent with weak-form EMH.</strong> "
|
| 297 |
+
"Positive IC (0.0198) exists but cannot overcome 10bps transaction "
|
| 298 |
+
"costs at a 5-day holding period.",
|
| 299 |
+
|
| 300 |
+
"<strong>RAG guidance relevance is top-5.</strong> "
|
| 301 |
+
"Semantic relevance of the guidance section — not just its content — "
|
| 302 |
+
"carries significant predictive signal.",
|
| 303 |
+
]:
|
| 304 |
+
st.markdown(f"<div class='insight-box'>{f}</div>",
|
| 305 |
+
unsafe_allow_html=True)
|
| 306 |
+
|
| 307 |
+
with right:
|
| 308 |
+
st.markdown("<div class='section-header'>Dataset Coverage</div>",
|
| 309 |
+
unsafe_allow_html=True)
|
| 310 |
+
if not fm.empty:
|
| 311 |
+
yr = fm.groupby("year").size().reset_index(name="count")
|
| 312 |
+
fig = go.Figure(go.Bar(
|
| 313 |
+
x=yr["year"].astype(str),
|
| 314 |
+
y=yr["count"],
|
| 315 |
+
marker=dict(color=yr["count"],
|
| 316 |
+
colorscale=[[0,"#1a237e"],[1,"#64b5f6"]],
|
| 317 |
+
showscale=False),
|
| 318 |
+
text=yr["count"], textposition="outside",
|
| 319 |
+
textfont=dict(size=11),
|
| 320 |
+
))
|
| 321 |
+
fig.update_layout(**L(title="Transcript Count by Year", height=300,
|
| 322 |
+
showlegend=False,
|
| 323 |
+
xaxis=dict(title="Year"),
|
| 324 |
+
yaxis=dict(title="Transcripts")))
|
| 325 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 326 |
+
|
| 327 |
+
# Sentiment heatmap
|
| 328 |
+
if not fm.empty and "mgmt_net_sentiment" in fm.columns:
|
| 329 |
+
st.markdown("<div class='section-header'>Sentiment Landscape</div>",
|
| 330 |
+
unsafe_allow_html=True)
|
| 331 |
+
heat = (fm.groupby(["ticker","year"])["mgmt_net_sentiment"]
|
| 332 |
+
.mean().reset_index())
|
| 333 |
+
top_t = fm["ticker"].value_counts().head(30).index
|
| 334 |
+
heat = heat[heat["ticker"].isin(top_t)]
|
| 335 |
+
pivot = heat.pivot(index="ticker", columns="year",
|
| 336 |
+
values="mgmt_net_sentiment")
|
| 337 |
+
fig2 = go.Figure(go.Heatmap(
|
| 338 |
+
z=pivot.values,
|
| 339 |
+
x=[str(c) for c in pivot.columns],
|
| 340 |
+
y=pivot.index,
|
| 341 |
+
colorscale=[[0,"#b71c1c"],[0.35,"#e53935"],
|
| 342 |
+
[0.5,"#263238"],[0.65,"#1565c0"],[1,"#64b5f6"]],
|
| 343 |
+
zmid=0,
|
| 344 |
+
colorbar=dict(title="Net Sentiment", tickfont=dict(size=10)),
|
| 345 |
+
hovertemplate="Ticker: %{y}<br>Year: %{x}<br>Sentiment: %{z:.3f}<extra></extra>",
|
| 346 |
+
))
|
| 347 |
+
fig2.update_layout(**L(
|
| 348 |
+
title="Management Net Sentiment — Top 30 Tickers × Year",
|
| 349 |
+
height=500,
|
| 350 |
+
xaxis=dict(title="Year"),
|
| 351 |
+
yaxis=dict(title=""),
|
| 352 |
+
))
|
| 353 |
+
st.plotly_chart(fig2, use_container_width=True)
|
| 354 |
+
|
| 355 |
+
|
| 356 |
+
# ═══════════════════════════════════════════════════════════════════════════════
|
| 357 |
+
# PAGE 2 — MODEL PERFORMANCE
|
| 358 |
+
# ═══════════════════════════════════════════════════════════════════════════════
|
| 359 |
+
|
| 360 |
+
elif page == "Model Performance":
|
| 361 |
+
mr = load_model_results()
|
| 362 |
+
|
| 363 |
+
st.markdown("<div class='hero-title' style='font-size:2rem;'>Model Performance</div>",
|
| 364 |
+
unsafe_allow_html=True)
|
| 365 |
+
st.markdown("<div class='hero-sub'>Walk-forward validation (2021–2024). "
|
| 366 |
+
"Train on 3 prior years, test on held-out year. Zero data leakage.</div>",
|
| 367 |
+
unsafe_allow_html=True)
|
| 368 |
+
st.markdown("<hr>", unsafe_allow_html=True)
|
| 369 |
+
|
| 370 |
+
if mr.empty:
|
| 371 |
+
st.error("model_results.csv not found. Run Stage 3 first.")
|
| 372 |
+
st.stop()
|
| 373 |
+
|
| 374 |
+
summary = (
|
| 375 |
+
mr.groupby("model")[["ic","hit_rate","auc"]]
|
| 376 |
+
.agg({"ic":["mean","std"],"hit_rate":["mean","std"],"auc":["mean","std"]})
|
| 377 |
+
.round(4)
|
| 378 |
+
)
|
| 379 |
+
summary.columns = ["IC Mean","IC Std","Hit Rate Mean","Hit Rate Std",
|
| 380 |
+
"AUC Mean","AUC Std"]
|
| 381 |
+
summary = summary.sort_values("IC Mean", ascending=False)
|
| 382 |
+
|
| 383 |
+
st.markdown("<div class='section-header'>Model Comparison</div>",
|
| 384 |
+
unsafe_allow_html=True)
|
| 385 |
+
|
| 386 |
+
def color_ic(val):
|
| 387 |
+
if isinstance(val, float):
|
| 388 |
+
if val > 0.015: return "color: #66bb6a; font-weight:600"
|
| 389 |
+
if val < 0: return "color: #ef5350"
|
| 390 |
+
return ""
|
| 391 |
+
|
| 392 |
+
st.dataframe(
|
| 393 |
+
summary.style.applymap(color_ic, subset=["IC Mean"]).format("{:.4f}"),
|
| 394 |
+
use_container_width=True, height=220,
|
| 395 |
+
)
|
| 396 |
+
|
| 397 |
+
st.markdown("<br>", unsafe_allow_html=True)
|
| 398 |
+
st.markdown("<div class='section-header'>Information Coefficient by Year</div>",
|
| 399 |
+
unsafe_allow_html=True)
|
| 400 |
+
|
| 401 |
+
MODEL_COLORS = {
|
| 402 |
+
"Baseline": "#ffa726",
|
| 403 |
+
"FinBERT_only": "#26c6da",
|
| 404 |
+
"RAG_only": "#ab47bc",
|
| 405 |
+
"XGBoost_all": "#ef5350",
|
| 406 |
+
"LightGBM_all": "#66bb6a",
|
| 407 |
+
}
|
| 408 |
+
|
| 409 |
+
fig = go.Figure()
|
| 410 |
+
for m in mr["model"].unique():
|
| 411 |
+
sub = mr[mr["model"]==m].sort_values("test_year")
|
| 412 |
+
fig.add_trace(go.Scatter(
|
| 413 |
+
x=sub["test_year"].astype(int),
|
| 414 |
+
y=sub["ic"],
|
| 415 |
+
mode="lines+markers", name=m,
|
| 416 |
+
line=dict(color=MODEL_COLORS.get(m,"#64b5f6"), width=2.5),
|
| 417 |
+
marker=dict(size=9),
|
| 418 |
+
hovertemplate=f"<b>{m}</b><br>Year: %{{x}}<br>IC: %{{y:.4f}}<extra></extra>",
|
| 419 |
+
))
|
| 420 |
+
fig.add_hline(y=0, line_dash="dash", line_color="#546e7a", line_width=1.2)
|
| 421 |
+
fig.update_layout(**L(
|
| 422 |
+
title="Walk-Forward IC — Positive = Predictive",
|
| 423 |
+
height=380,
|
| 424 |
+
xaxis=dict(tickvals=[2021,2022,2023,2024], title="Year"),
|
| 425 |
+
yaxis=dict(title="Information Coefficient"),
|
| 426 |
+
legend=dict(bgcolor="#0d1117", bordercolor="#1e2433", borderwidth=1),
|
| 427 |
+
))
|
| 428 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 429 |
+
|
| 430 |
+
col1, col2 = st.columns(2)
|
| 431 |
+
|
| 432 |
+
with col1:
|
| 433 |
+
st.markdown("<div class='subsection'>Hit Rate by Year</div>",
|
| 434 |
+
unsafe_allow_html=True)
|
| 435 |
+
fig2 = go.Figure()
|
| 436 |
+
for m in mr["model"].unique():
|
| 437 |
+
sub = mr[mr["model"]==m].sort_values("test_year")
|
| 438 |
+
fig2.add_trace(go.Scatter(
|
| 439 |
+
x=sub["test_year"].astype(int), y=sub["hit_rate"],
|
| 440 |
+
mode="lines+markers", name=m,
|
| 441 |
+
line=dict(color=MODEL_COLORS.get(m,"#64b5f6"), width=2),
|
| 442 |
+
marker=dict(size=7), showlegend=False,
|
| 443 |
+
))
|
| 444 |
+
fig2.add_hline(y=0.5, line_dash="dot", line_color="#546e7a", line_width=1)
|
| 445 |
+
fig2.update_layout(**L(
|
| 446 |
+
height=300, title="Hit Rate (>0.5 = better than coin flip)",
|
| 447 |
+
xaxis=dict(tickvals=[2021,2022,2023,2024]),
|
| 448 |
+
yaxis=dict(title="Hit Rate"),
|
| 449 |
+
))
|
| 450 |
+
st.plotly_chart(fig2, use_container_width=True)
|
| 451 |
+
|
| 452 |
+
with col2:
|
| 453 |
+
st.markdown("<div class='subsection'>AUC by Year</div>",
|
| 454 |
+
unsafe_allow_html=True)
|
| 455 |
+
fig3 = go.Figure()
|
| 456 |
+
for m in mr["model"].unique():
|
| 457 |
+
sub = mr[mr["model"]==m].sort_values("test_year")
|
| 458 |
+
fig3.add_trace(go.Scatter(
|
| 459 |
+
x=sub["test_year"].astype(int), y=sub["auc"],
|
| 460 |
+
mode="lines+markers", name=m,
|
| 461 |
+
line=dict(color=MODEL_COLORS.get(m,"#64b5f6"), width=2),
|
| 462 |
+
marker=dict(size=7), showlegend=False,
|
| 463 |
+
))
|
| 464 |
+
fig3.add_hline(y=0.5, line_dash="dot", line_color="#546e7a", line_width=1)
|
| 465 |
+
fig3.update_layout(**L(
|
| 466 |
+
height=300, title="AUC-ROC (>0.5 = better than random)",
|
| 467 |
+
xaxis=dict(tickvals=[2021,2022,2023,2024]),
|
| 468 |
+
yaxis=dict(title="AUC"),
|
| 469 |
+
))
|
| 470 |
+
st.plotly_chart(fig3, use_container_width=True)
|
| 471 |
+
|
| 472 |
+
st.markdown("<div class='section-header'>Stability Analysis — IC Variance</div>",
|
| 473 |
+
unsafe_allow_html=True)
|
| 474 |
+
ic_std = mr.groupby("model")["ic"].std().sort_values()
|
| 475 |
+
ic_mean = mr.groupby("model")["ic"].mean()
|
| 476 |
+
bar_colors = ["#66bb6a" if ic_mean[m] > 0 else "#ef5350" for m in ic_std.index]
|
| 477 |
+
|
| 478 |
+
fig4 = go.Figure(go.Bar(
|
| 479 |
+
y=ic_std.index, x=ic_std.values, orientation="h",
|
| 480 |
+
marker_color=bar_colors,
|
| 481 |
+
text=[f"σ={v:.4f}" for v in ic_std.values],
|
| 482 |
+
textposition="outside", textfont=dict(size=11),
|
| 483 |
+
))
|
| 484 |
+
fig4.update_layout(**L(
|
| 485 |
+
title="IC Standard Deviation — Lower = More Consistent",
|
| 486 |
+
height=280,
|
| 487 |
+
xaxis=dict(title="IC Std Dev"),
|
| 488 |
+
yaxis=dict(title=""),
|
| 489 |
+
))
|
| 490 |
+
st.plotly_chart(fig4, use_container_width=True)
|
| 491 |
+
|
| 492 |
+
st.markdown("""
|
| 493 |
+
<div class='insight-box'>
|
| 494 |
+
<strong>Interpretation:</strong> The Baseline's high IC mean (0.043) is
|
| 495 |
+
misleading — its std of 0.114 shows extreme instability driven by lucky
|
| 496 |
+
quarters. LightGBM achieves IC=0.020 with std=0.009, making it
|
| 497 |
+
<strong>10× more stable</strong>. In live trading, consistency matters
|
| 498 |
+
far more than occasional lucky peaks.
|
| 499 |
+
</div>
|
| 500 |
+
""", unsafe_allow_html=True)
|
| 501 |
+
|
| 502 |
+
|
| 503 |
+
# ═══════════════════════════════════════════════════════════════════════════════
|
| 504 |
+
# PAGE 3 — FEATURE IMPORTANCE
|
| 505 |
+
# ═══════════════════════════════════════════════════════════════════════════════
|
| 506 |
+
|
| 507 |
+
elif page == "Feature Importance":
|
| 508 |
+
shap_df = load_shap()
|
| 509 |
+
fm = load_feature_matrix()
|
| 510 |
+
|
| 511 |
+
st.markdown("<div class='hero-title' style='font-size:2rem;'>Feature Importance</div>",
|
| 512 |
+
unsafe_allow_html=True)
|
| 513 |
+
st.markdown("<div class='hero-sub'>SHAP values computed on LightGBM (best model). "
|
| 514 |
+
"Shows which features actually drive predictions.</div>",
|
| 515 |
+
unsafe_allow_html=True)
|
| 516 |
+
st.markdown("<hr>", unsafe_allow_html=True)
|
| 517 |
+
|
| 518 |
+
if shap_df.empty:
|
| 519 |
+
st.error("shap_values.parquet not found. Run run_shap.py first.")
|
| 520 |
+
st.stop()
|
| 521 |
+
|
| 522 |
+
mean_shap = shap_df.abs().mean().sort_values(ascending=False)
|
| 523 |
+
|
| 524 |
+
def feat_color(name):
|
| 525 |
+
if name.startswith("rag_"): return "#64b5f6"
|
| 526 |
+
if name.startswith("mgmt_"): return "#66bb6a"
|
| 527 |
+
if name.startswith("qa_"): return "#ffa726"
|
| 528 |
+
return "#ab47bc"
|
| 529 |
+
|
| 530 |
+
def feat_group(name):
|
| 531 |
+
if name.startswith("rag_"): return "RAG Features"
|
| 532 |
+
if name.startswith("mgmt_"): return "Management FinBERT"
|
| 533 |
+
if name.startswith("qa_"): return "QA FinBERT"
|
| 534 |
+
return "Other"
|
| 535 |
+
|
| 536 |
+
st.markdown("<div class='section-header'>Top 20 Features by Mean |SHAP|</div>",
|
| 537 |
+
unsafe_allow_html=True)
|
| 538 |
+
top20 = mean_shap.head(20)[::-1]
|
| 539 |
+
fig = go.Figure(go.Bar(
|
| 540 |
+
y=top20.index, x=top20.values, orientation="h",
|
| 541 |
+
marker_color=[feat_color(n) for n in top20.index],
|
| 542 |
+
text=[f"{v:.4f}" for v in top20.values],
|
| 543 |
+
textposition="outside", textfont=dict(size=10),
|
| 544 |
+
hovertemplate="<b>%{y}</b><br>Mean |SHAP|: %{x:.4f}<extra></extra>",
|
| 545 |
+
))
|
| 546 |
+
fig.update_layout(**L(
|
| 547 |
+
height=520,
|
| 548 |
+
title="Feature Importance — 🔵 RAG | 🟢 Mgmt FinBERT | 🟠 QA FinBERT",
|
| 549 |
+
xaxis=dict(title="Mean |SHAP Value|"),
|
| 550 |
+
yaxis=dict(title=""),
|
| 551 |
+
))
|
| 552 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 553 |
+
|
| 554 |
+
col1, col2 = st.columns(2)
|
| 555 |
+
|
| 556 |
+
with col1:
|
| 557 |
+
st.markdown("<div class='section-header'>Importance by Feature Group</div>",
|
| 558 |
+
unsafe_allow_html=True)
|
| 559 |
+
gs = (mean_shap.reset_index()
|
| 560 |
+
.rename(columns={"index":"feature", 0:"shap"}))
|
| 561 |
+
gs.columns = ["feature","shap"]
|
| 562 |
+
gs["group"] = gs["feature"].apply(feat_group)
|
| 563 |
+
gt = gs.groupby("group")["shap"].sum()
|
| 564 |
+
|
| 565 |
+
fig2 = go.Figure(go.Pie(
|
| 566 |
+
labels=gt.index, values=gt.values, hole=0.55,
|
| 567 |
+
marker=dict(colors=["#64b5f6","#66bb6a","#ffa726","#ab47bc"]),
|
| 568 |
+
textinfo="label+percent", textfont=dict(size=12),
|
| 569 |
+
hovertemplate="<b>%{label}</b><br>Total SHAP: %{value:.4f}<br>%{percent}<extra></extra>",
|
| 570 |
+
))
|
| 571 |
+
fig2.update_layout(**L(
|
| 572 |
+
height=320, showlegend=False,
|
| 573 |
+
annotations=[dict(text="SHAP<br>Groups", x=0.5, y=0.5,
|
| 574 |
+
font_size=13, showarrow=False,
|
| 575 |
+
font_color="#b0bec5")],
|
| 576 |
+
))
|
| 577 |
+
st.plotly_chart(fig2, use_container_width=True)
|
| 578 |
+
|
| 579 |
+
with col2:
|
| 580 |
+
st.markdown("<div class='section-header'>SHAP vs Correlation with Target</div>",
|
| 581 |
+
unsafe_allow_html=True)
|
| 582 |
+
if not fm.empty and "target_5d_up" in fm.columns:
|
| 583 |
+
feat_cols = [c for c in shap_df.columns if c in fm.columns]
|
| 584 |
+
corrs = fm[feat_cols+["target_5d_up"]].corr()["target_5d_up"].drop("target_5d_up")
|
| 585 |
+
cdf = pd.DataFrame({
|
| 586 |
+
"feature": corrs.index,
|
| 587 |
+
"shap": mean_shap.reindex(corrs.index).fillna(0).values,
|
| 588 |
+
"corr": corrs.values,
|
| 589 |
+
"group": [feat_group(f) for f in corrs.index],
|
| 590 |
+
})
|
| 591 |
+
cmap = {
|
| 592 |
+
"RAG Features": "#64b5f6",
|
| 593 |
+
"Management FinBERT": "#66bb6a",
|
| 594 |
+
"QA FinBERT": "#ffa726",
|
| 595 |
+
"Other": "#ab47bc",
|
| 596 |
+
}
|
| 597 |
+
fig3 = px.scatter(
|
| 598 |
+
cdf, x="corr", y="shap", color="group",
|
| 599 |
+
color_discrete_map=cmap, hover_data=["feature"],
|
| 600 |
+
labels={"corr":"Pearson Corr with Target",
|
| 601 |
+
"shap":"Mean |SHAP Value|"},
|
| 602 |
+
height=320,
|
| 603 |
+
)
|
| 604 |
+
fig3.add_vline(x=0, line_dash="dash", line_color="#546e7a")
|
| 605 |
+
fig3.update_layout(**L(
|
| 606 |
+
title="SHAP Importance vs Linear Correlation",
|
| 607 |
+
showlegend=False,
|
| 608 |
+
))
|
| 609 |
+
st.plotly_chart(fig3, use_container_width=True)
|
| 610 |
+
|
| 611 |
+
st.markdown("<div class='section-header'>Feature Insights</div>",
|
| 612 |
+
unsafe_allow_html=True)
|
| 613 |
+
insights = [
|
| 614 |
+
("🏆 #1 — qa_neg_ratio",
|
| 615 |
+
"Proportion of negative sentences in analyst Q&A. When analysts "
|
| 616 |
+
"push back hard, it signals market-moving information that management "
|
| 617 |
+
"tried to downplay."),
|
| 618 |
+
("📊 #2 — mgmt_sent_vol",
|
| 619 |
+
"Volatility of management's sentence-level sentiment. Inconsistent "
|
| 620 |
+
"messaging — mixing optimism with caution — often precedes larger "
|
| 621 |
+
"price moves."),
|
| 622 |
+
("📝 #3 — qa_n_sentences",
|
| 623 |
+
"Length of the Q&A section. Longer Q&A sessions indicate "
|
| 624 |
+
"more analyst scrutiny, which correlates with uncertainty about "
|
| 625 |
+
"the quarter's results."),
|
| 626 |
+
("😶 #4 — mgmt_mean_neu",
|
| 627 |
+
"Neutral sentiment ratio in management remarks. Deliberately neutral "
|
| 628 |
+
"language can mask very good or very bad news — a hedging signal."),
|
| 629 |
+
("🎯 #5 — rag_guidance_relevance",
|
| 630 |
+
"Semantic similarity of the guidance section to specific numerical "
|
| 631 |
+
"guidance queries. More relevant guidance sections contain concrete "
|
| 632 |
+
"targets that markets react to more strongly."),
|
| 633 |
+
]
|
| 634 |
+
cols = st.columns(len(insights))
|
| 635 |
+
for col, (title, body) in zip(cols, insights):
|
| 636 |
+
col.markdown(f"""
|
| 637 |
+
<div class='pipeline-step' style='text-align:left;height:190px;'>
|
| 638 |
+
<div style='font-size:0.82rem;font-weight:700;color:#64b5f6;
|
| 639 |
+
margin-bottom:8px;'>{title}</div>
|
| 640 |
+
<div style='font-size:0.76rem;color:#8892b0;line-height:1.6;'>
|
| 641 |
+
{body}</div>
|
| 642 |
+
</div>""", unsafe_allow_html=True)
|
| 643 |
+
|
| 644 |
+
|
| 645 |
+
# ═══════════════════════════════════════════════════════════════════════════════
|
| 646 |
+
# PAGE 4 — BACKTEST
|
| 647 |
+
# ═══════════════════════════════════════════════════════════════════════════════
|
| 648 |
+
|
| 649 |
+
elif page == "Backtest Results":
|
| 650 |
+
bt = load_backtest()
|
| 651 |
+
|
| 652 |
+
st.markdown("<div class='hero-title' style='font-size:2rem;'>Backtest Results</div>",
|
| 653 |
+
unsafe_allow_html=True)
|
| 654 |
+
st.markdown("<div class='hero-sub'>Long-short quartile portfolio. "
|
| 655 |
+
"Long top-25% predicted stocks, short bottom-25%. "
|
| 656 |
+
"5-day holding period. 10bps round-trip transaction cost.</div>",
|
| 657 |
+
unsafe_allow_html=True)
|
| 658 |
+
st.markdown("<hr>", unsafe_allow_html=True)
|
| 659 |
+
|
| 660 |
+
if bt.empty:
|
| 661 |
+
st.error("backtest_results.csv not found. Run Stage 4 first.")
|
| 662 |
+
st.stop()
|
| 663 |
+
|
| 664 |
+
bt = bt.sort_values(["year","quarter"]).reset_index(drop=True)
|
| 665 |
+
bt["period"] = bt["year"].astype(str) + "-Q" + bt["quarter"].astype(str)
|
| 666 |
+
rets = bt["net_ret"]
|
| 667 |
+
cum = (1 + rets).cumprod()
|
| 668 |
+
peak = cum.cummax()
|
| 669 |
+
dd = (cum - peak) / peak
|
| 670 |
+
|
| 671 |
+
n_yrs = len(bt) / 4
|
| 672 |
+
ann_ret = float((1 + rets).prod() ** (1/n_yrs) - 1)
|
| 673 |
+
ann_vol = float(rets.std() * np.sqrt(4))
|
| 674 |
+
sharpe = ann_ret / ann_vol if ann_vol != 0 else 0.0
|
| 675 |
+
max_dd = float(dd.min())
|
| 676 |
+
hit = float((rets > 0).mean())
|
| 677 |
+
|
| 678 |
+
c1,c2,c3,c4,c5 = st.columns(5)
|
| 679 |
+
metric_card(c1, f"{ann_ret*100:.2f}%", "Ann. Return",
|
| 680 |
+
"After TC", "pos" if ann_ret > 0 else "neg")
|
| 681 |
+
metric_card(c2, f"{sharpe:.3f}", "Sharpe Ratio",
|
| 682 |
+
">1.0 = excellent", "pos" if sharpe > 0 else "neg")
|
| 683 |
+
metric_card(c3, f"{max_dd*100:.2f}%", "Max Drawdown",
|
| 684 |
+
"Peak-to-trough", "neg")
|
| 685 |
+
metric_card(c4, f"{hit*100:.0f}%", "Win Rate",
|
| 686 |
+
"Profitable quarters", "pos" if hit > 0.5 else "neg")
|
| 687 |
+
metric_card(c5, str(len(bt)), "Quarters Tested",
|
| 688 |
+
"2021–2024", "neu")
|
| 689 |
+
|
| 690 |
+
st.markdown("<br>", unsafe_allow_html=True)
|
| 691 |
+
st.markdown("<div class='section-header'>Equity Curve</div>",
|
| 692 |
+
unsafe_allow_html=True)
|
| 693 |
+
|
| 694 |
+
fig = make_subplots(rows=2, cols=1, row_heights=[0.72,0.28],
|
| 695 |
+
shared_xaxes=True, vertical_spacing=0.04)
|
| 696 |
+
fig.add_trace(go.Scatter(
|
| 697 |
+
x=bt["period"], y=cum.values,
|
| 698 |
+
mode="lines+markers",
|
| 699 |
+
line=dict(color="#64b5f6", width=2.5),
|
| 700 |
+
marker=dict(size=7),
|
| 701 |
+
fill="tozeroy", fillcolor="rgba(100,181,246,0.06)",
|
| 702 |
+
name="Cumulative Return",
|
| 703 |
+
hovertemplate="<b>%{x}</b><br>Cumulative: %{y:.4f}<extra></extra>",
|
| 704 |
+
), row=1, col=1)
|
| 705 |
+
fig.add_hline(y=1.0, line_dash="dash", line_color="#546e7a",
|
| 706 |
+
line_width=1, row=1, col=1)
|
| 707 |
+
fig.add_trace(go.Bar(
|
| 708 |
+
x=bt["period"], y=dd.values*100,
|
| 709 |
+
marker_color="#ef5350", opacity=0.7, name="Drawdown %",
|
| 710 |
+
hovertemplate="<b>%{x}</b><br>Drawdown: %{y:.2f}%<extra></extra>",
|
| 711 |
+
), row=2, col=1)
|
| 712 |
+
|
| 713 |
+
fig.update_layout(
|
| 714 |
+
paper_bgcolor="#0d1117", plot_bgcolor="#0a0e1a",
|
| 715 |
+
font=dict(color="#b0bec5"),
|
| 716 |
+
margin=dict(l=50,r=30,t=50,b=80),
|
| 717 |
+
title="FinSight Long-Short Strategy — 2021 to 2024",
|
| 718 |
+
height=500, showlegend=False,
|
| 719 |
+
xaxis2=dict(tickangle=45, tickfont_size=10,
|
| 720 |
+
gridcolor="#1a2035", linecolor="#1e2433"),
|
| 721 |
+
yaxis=dict(title="Cumulative Return",
|
| 722 |
+
gridcolor="#1a2035", linecolor="#1e2433"),
|
| 723 |
+
yaxis2=dict(title="DD %",
|
| 724 |
+
gridcolor="#1a2035", linecolor="#1e2433"),
|
| 725 |
+
)
|
| 726 |
+
fig.update_xaxes(gridcolor="#1a2035", linecolor="#1e2433")
|
| 727 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 728 |
+
|
| 729 |
+
col1, col2 = st.columns(2)
|
| 730 |
+
|
| 731 |
+
with col1:
|
| 732 |
+
st.markdown("<div class='subsection'>Quarterly Net Returns</div>",
|
| 733 |
+
unsafe_allow_html=True)
|
| 734 |
+
q_colors = ["#66bb6a" if r > 0 else "#ef5350" for r in rets]
|
| 735 |
+
fig2 = go.Figure(go.Bar(
|
| 736 |
+
x=bt["period"], y=rets.values*100,
|
| 737 |
+
marker_color=q_colors,
|
| 738 |
+
text=[f"{v*100:.2f}%" for v in rets.values],
|
| 739 |
+
textposition="outside", textfont=dict(size=9),
|
| 740 |
+
hovertemplate="<b>%{x}</b><br>Net Return: %{y:.2f}%<extra></extra>",
|
| 741 |
+
))
|
| 742 |
+
fig2.add_hline(y=0, line_color="#546e7a", line_width=1)
|
| 743 |
+
fig2.update_layout(**L(
|
| 744 |
+
height=320,
|
| 745 |
+
title="Net Return per Quarter (after 10bps TC)",
|
| 746 |
+
xaxis=dict(tickangle=45, tickfont=dict(size=9)),
|
| 747 |
+
yaxis=dict(title="Net Return (%)"),
|
| 748 |
+
))
|
| 749 |
+
st.plotly_chart(fig2, use_container_width=True)
|
| 750 |
+
|
| 751 |
+
with col2:
|
| 752 |
+
st.markdown("<div class='subsection'>Long vs Short Leg Hit Rate</div>",
|
| 753 |
+
unsafe_allow_html=True)
|
| 754 |
+
fig3 = go.Figure()
|
| 755 |
+
fig3.add_trace(go.Scatter(
|
| 756 |
+
x=bt["period"], y=bt["long_hit"],
|
| 757 |
+
mode="lines+markers",
|
| 758 |
+
line=dict(color="#66bb6a", width=2),
|
| 759 |
+
marker=dict(size=7), name="Long Leg",
|
| 760 |
+
))
|
| 761 |
+
fig3.add_trace(go.Scatter(
|
| 762 |
+
x=bt["period"], y=bt["short_hit"],
|
| 763 |
+
mode="lines+markers",
|
| 764 |
+
line=dict(color="#ef5350", width=2),
|
| 765 |
+
marker=dict(size=7), name="Short Leg",
|
| 766 |
+
))
|
| 767 |
+
fig3.add_hline(y=0.5, line_dash="dot", line_color="#546e7a")
|
| 768 |
+
fig3.update_layout(**L(
|
| 769 |
+
height=320,
|
| 770 |
+
title="Direction Accuracy — Long & Short Legs",
|
| 771 |
+
xaxis=dict(tickangle=45, tickfont=dict(size=9)),
|
| 772 |
+
yaxis=dict(title="Hit Rate"),
|
| 773 |
+
legend=dict(bgcolor="#0d1117", bordercolor="#1e2433"),
|
| 774 |
+
))
|
| 775 |
+
st.plotly_chart(fig3, use_container_width=True)
|
| 776 |
+
|
| 777 |
+
st.markdown("<div class='section-header'>Quarterly Breakdown</div>",
|
| 778 |
+
unsafe_allow_html=True)
|
| 779 |
+
disp = bt[["period","net_ret","long_ret","short_ret",
|
| 780 |
+
"long_hit","short_hit","n_stocks","q_size"]].copy()
|
| 781 |
+
disp.columns = ["Quarter","Net Ret","Long Ret","Short Ret",
|
| 782 |
+
"Long Hit","Short Hit","N Stocks","Leg Size"]
|
| 783 |
+
|
| 784 |
+
def color_ret(val):
|
| 785 |
+
if isinstance(val, float):
|
| 786 |
+
if val > 0: return "color: #66bb6a"
|
| 787 |
+
if val < 0: return "color: #ef5350"
|
| 788 |
+
return ""
|
| 789 |
+
|
| 790 |
+
st.dataframe(
|
| 791 |
+
disp.style.applymap(color_ret,
|
| 792 |
+
subset=["Net Ret","Long Ret","Short Ret"])
|
| 793 |
+
.format({c:"{:.4f}" for c in
|
| 794 |
+
["Net Ret","Long Ret","Short Ret",
|
| 795 |
+
"Long Hit","Short Hit"]}),
|
| 796 |
+
use_container_width=True, hide_index=True,
|
| 797 |
+
)
|
| 798 |
+
|
| 799 |
+
st.markdown("""
|
| 800 |
+
<div class='insight-box'>
|
| 801 |
+
<strong>Context:</strong> A Sharpe of -0.81 with a 5-day holding period
|
| 802 |
+
is consistent with academic literature on post-earnings announcement
|
| 803 |
+
drift (Chan et al. 1996, Lerman et al. 2008). The signal exists
|
| 804 |
+
(IC=0.0198) but is too weak to survive round-trip transaction costs at
|
| 805 |
+
this frequency. Extending to 20-day holding periods is the natural
|
| 806 |
+
next step.
|
| 807 |
+
</div>
|
| 808 |
+
""", unsafe_allow_html=True)
|
| 809 |
+
|
| 810 |
+
|
| 811 |
+
# ═══════════════════════════════════════════════════════════════════════════════
|
| 812 |
+
# PAGE 5 — TRANSCRIPT EXPLORER
|
| 813 |
+
# ═══════════════════════════════════════════════════════════════════════════════
|
| 814 |
+
|
| 815 |
+
elif page == "Transcript Explorer":
|
| 816 |
+
fm = load_feature_matrix()
|
| 817 |
+
|
| 818 |
+
st.markdown("<div class='hero-title' style='font-size:2rem;'>Transcript Explorer</div>",
|
| 819 |
+
unsafe_allow_html=True)
|
| 820 |
+
st.markdown("<div class='hero-sub'>Browse sentiment profiles for any company "
|
| 821 |
+
"and quarter in the dataset.</div>",
|
| 822 |
+
unsafe_allow_html=True)
|
| 823 |
+
st.markdown("<hr>", unsafe_allow_html=True)
|
| 824 |
+
|
| 825 |
+
if fm.empty:
|
| 826 |
+
st.error("Feature matrix not found.")
|
| 827 |
+
st.stop()
|
| 828 |
+
|
| 829 |
+
col1, col2, col3 = st.columns([2,1,1])
|
| 830 |
+
with col1:
|
| 831 |
+
all_tickers = sorted(fm["ticker"].dropna().unique())
|
| 832 |
+
default_idx = all_tickers.index("AAPL") if "AAPL" in all_tickers else 0
|
| 833 |
+
ticker = st.selectbox("Select Ticker", all_tickers, index=default_idx)
|
| 834 |
+
with col2:
|
| 835 |
+
years = sorted(fm["year"].unique(), reverse=True)
|
| 836 |
+
year = st.selectbox("Year", years)
|
| 837 |
+
with col3:
|
| 838 |
+
quarters = sorted(fm[fm["year"]==year]["quarter"].unique())
|
| 839 |
+
quarter = st.selectbox("Quarter", quarters)
|
| 840 |
+
|
| 841 |
+
row = fm[(fm["ticker"]==ticker) &
|
| 842 |
+
(fm["year"]==year) &
|
| 843 |
+
(fm["quarter"]==quarter)]
|
| 844 |
+
|
| 845 |
+
if row.empty:
|
| 846 |
+
st.warning("No data for this combination.")
|
| 847 |
+
st.stop()
|
| 848 |
+
|
| 849 |
+
row = row.iloc[0]
|
| 850 |
+
|
| 851 |
+
ret_5d = row.get("ret_5d", 0)
|
| 852 |
+
target = int(row.get("target_5d_up", 0))
|
| 853 |
+
st.markdown(f"""
|
| 854 |
+
<div style='display:flex;align-items:center;gap:16px;margin:16px 0;'>
|
| 855 |
+
<div style='font-size:2rem;font-weight:800;color:#64b5f6;'>{ticker}</div>
|
| 856 |
+
<div style='font-size:1rem;color:#8892b0;'>{int(year)} Q{int(quarter)}</div>
|
| 857 |
+
<div class='badge badge-{"green" if target==1 else "red"}'>
|
| 858 |
+
{"▲ UP" if target==1 else "▼ DOWN"} 5d
|
| 859 |
+
</div>
|
| 860 |
+
<div class='badge badge-blue'>
|
| 861 |
+
5d Return: {float(ret_5d)*100:.2f}%
|
| 862 |
+
</div>
|
| 863 |
+
</div>
|
| 864 |
+
""", unsafe_allow_html=True)
|
| 865 |
+
|
| 866 |
+
left, right = st.columns([1.2, 1])
|
| 867 |
+
|
| 868 |
+
with left:
|
| 869 |
+
st.markdown("<div class='subsection'>Sentiment Breakdown</div>",
|
| 870 |
+
unsafe_allow_html=True)
|
| 871 |
+
cats = ["Mgmt Positive","Mgmt Neutral","Mgmt Negative",
|
| 872 |
+
"QA Positive","QA Neutral","QA Negative"]
|
| 873 |
+
vals = [
|
| 874 |
+
float(row.get("mgmt_mean_pos", 0) or 0),
|
| 875 |
+
float(row.get("mgmt_mean_neu", 0) or 0),
|
| 876 |
+
float(row.get("mgmt_mean_neg", 0) or 0),
|
| 877 |
+
float(row.get("qa_mean_pos", 0) or 0),
|
| 878 |
+
float(row.get("qa_mean_neu", 0) or 0),
|
| 879 |
+
float(row.get("qa_mean_neg", 0) or 0),
|
| 880 |
+
]
|
| 881 |
+
vals_c = vals + [vals[0]]
|
| 882 |
+
cats_c = cats + [cats[0]]
|
| 883 |
+
fig = go.Figure(go.Scatterpolar(
|
| 884 |
+
r=vals_c, theta=cats_c, fill="toself",
|
| 885 |
+
fillcolor="rgba(100,181,246,0.15)",
|
| 886 |
+
line=dict(color="#64b5f6", width=2), name=ticker,
|
| 887 |
+
))
|
| 888 |
+
fig.update_layout(
|
| 889 |
+
paper_bgcolor="#0d1117",
|
| 890 |
+
font=dict(color="#b0bec5"),
|
| 891 |
+
polar=dict(
|
| 892 |
+
bgcolor="#0d1117",
|
| 893 |
+
radialaxis=dict(visible=True, range=[0,1],
|
| 894 |
+
gridcolor="#1a2035", linecolor="#1a2035",
|
| 895 |
+
tickfont=dict(size=9, color="#546e7a")),
|
| 896 |
+
angularaxis=dict(gridcolor="#1a2035", linecolor="#1a2035",
|
| 897 |
+
tickfont=dict(size=10, color="#b0bec5")),
|
| 898 |
+
),
|
| 899 |
+
height=360, showlegend=False,
|
| 900 |
+
title=f"{ticker} — Sentiment Radar",
|
| 901 |
+
margin=dict(l=40,r=40,t=50,b=40),
|
| 902 |
+
)
|
| 903 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 904 |
+
|
| 905 |
+
with right:
|
| 906 |
+
st.markdown("<div class='subsection'>Feature Scores</div>",
|
| 907 |
+
unsafe_allow_html=True)
|
| 908 |
+
|
| 909 |
+
def score_bar(label, val, invert=False):
|
| 910 |
+
if val is None or pd.isna(val):
|
| 911 |
+
return
|
| 912 |
+
v = float(val)
|
| 913 |
+
pct = max(0, min(1, v)) * 100
|
| 914 |
+
color = "#ef5350" if invert else "#64b5f6"
|
| 915 |
+
st.markdown(f"""
|
| 916 |
+
<div style='margin:8px 0;'>
|
| 917 |
+
<div style='display:flex;justify-content:space-between;
|
| 918 |
+
font-size:0.8rem;color:#8892b0;margin-bottom:3px;'>
|
| 919 |
+
<span>{label}</span><span>{v:.3f}</span>
|
| 920 |
+
</div>
|
| 921 |
+
<div style='background:#1a2035;border-radius:4px;height:6px;'>
|
| 922 |
+
<div style='background:{color};width:{pct:.0f}%;
|
| 923 |
+
height:6px;border-radius:4px;'></div>
|
| 924 |
+
</div>
|
| 925 |
+
</div>""", unsafe_allow_html=True)
|
| 926 |
+
|
| 927 |
+
score_bar("Mgmt Net Sentiment", row.get("mgmt_net_sentiment"))
|
| 928 |
+
score_bar("QA Net Sentiment", row.get("qa_net_sentiment"))
|
| 929 |
+
score_bar("Mgmt Negativity", row.get("mgmt_neg_ratio"), invert=True)
|
| 930 |
+
score_bar("QA Negativity", row.get("qa_neg_ratio"), invert=True)
|
| 931 |
+
score_bar("Guidance Specificity", row.get("rag_guidance_specificity_score"))
|
| 932 |
+
score_bar("Mgmt Confidence", row.get("rag_management_confidence_score"))
|
| 933 |
+
score_bar("Forward Looking", row.get("rag_forward_looking_score"))
|
| 934 |
+
score_bar("New Risks", row.get("rag_new_risks_score"), invert=True)
|
| 935 |
+
score_bar("Cost Pressure", row.get("rag_cost_pressure_score"), invert=True)
|
| 936 |
+
|
| 937 |
+
# Historical trend
|
| 938 |
+
st.markdown(f"<div class='section-header'>{ticker} — Historical Sentiment</div>",
|
| 939 |
+
unsafe_allow_html=True)
|
| 940 |
+
|
| 941 |
+
td = fm[fm["ticker"]==ticker].copy().sort_values(["year","quarter"])
|
| 942 |
+
td["period"] = td["year"].astype(str) + "-Q" + td["quarter"].astype(str)
|
| 943 |
+
|
| 944 |
+
if len(td) > 1:
|
| 945 |
+
fig2 = go.Figure()
|
| 946 |
+
for col_name, label, color in [
|
| 947 |
+
("mgmt_net_sentiment", "Mgmt Sentiment", "#66bb6a"),
|
| 948 |
+
("qa_net_sentiment", "QA Sentiment", "#64b5f6"),
|
| 949 |
+
("mgmt_neg_ratio", "Mgmt Negativity","#ef5350"),
|
| 950 |
+
]:
|
| 951 |
+
if col_name in td.columns:
|
| 952 |
+
fig2.add_trace(go.Scatter(
|
| 953 |
+
x=td["period"], y=td[col_name],
|
| 954 |
+
mode="lines+markers", name=label,
|
| 955 |
+
line=dict(color=color, width=2),
|
| 956 |
+
marker=dict(size=6),
|
| 957 |
+
hovertemplate=f"<b>{label}</b><br>%{{x}}<br>%{{y:.3f}}<extra></extra>",
|
| 958 |
+
))
|
| 959 |
+
|
| 960 |
+
# Mark selected quarter — use index position to avoid type issues
|
| 961 |
+
cur_period = f"{int(year)}-Q{int(quarter)}"
|
| 962 |
+
if cur_period in td["period"].values:
|
| 963 |
+
cur_idx = td[td["period"]==cur_period].index[0]
|
| 964 |
+
cur_pos = td["period"].tolist().index(cur_period)
|
| 965 |
+
fig2.add_vrect(
|
| 966 |
+
x0=cur_period, x1=cur_period,
|
| 967 |
+
line_dash="dot", line_color="#ffa726", line_width=2,
|
| 968 |
+
)
|
| 969 |
+
|
| 970 |
+
fig2.add_hline(y=0, line_dash="dash", line_color="#546e7a", line_width=0.8)
|
| 971 |
+
fig2.update_layout(**L(
|
| 972 |
+
height=320,
|
| 973 |
+
title=f"{ticker} — Sentiment Over Time",
|
| 974 |
+
xaxis=dict(tickangle=45, tickfont=dict(size=9)),
|
| 975 |
+
yaxis=dict(title="Score"),
|
| 976 |
+
legend=dict(bgcolor="#0d1117", bordercolor="#1e2433"),
|
| 977 |
+
))
|
| 978 |
+
st.plotly_chart(fig2, use_container_width=True)
|
| 979 |
+
|
| 980 |
+
# Scatter: sentiment vs return
|
| 981 |
+
if "ret_5d" in td.columns and "mgmt_net_sentiment" in td.columns:
|
| 982 |
+
st.markdown("<div class='subsection'>Sentiment vs 5-Day Return</div>",
|
| 983 |
+
unsafe_allow_html=True)
|
| 984 |
+
tc = td.dropna(subset=["ret_5d","mgmt_net_sentiment"]).copy()
|
| 985 |
+
tc["ret_pct"] = tc["ret_5d"].astype(float) * 100
|
| 986 |
+
sc_colors = ["#66bb6a" if r > 0 else "#ef5350"
|
| 987 |
+
for r in tc["ret_pct"]]
|
| 988 |
+
fig3 = go.Figure(go.Scatter(
|
| 989 |
+
x=tc["mgmt_net_sentiment"].astype(float),
|
| 990 |
+
y=tc["ret_pct"],
|
| 991 |
+
mode="markers+text",
|
| 992 |
+
text=tc["period"],
|
| 993 |
+
textposition="top center",
|
| 994 |
+
textfont=dict(size=8, color="#546e7a"),
|
| 995 |
+
marker=dict(color=sc_colors, size=9, opacity=0.85),
|
| 996 |
+
hovertemplate=(
|
| 997 |
+
"<b>%{text}</b><br>"
|
| 998 |
+
"Mgmt Sentiment: %{x:.3f}<br>"
|
| 999 |
+
"5d Return: %{y:.2f}%<extra></extra>"
|
| 1000 |
+
),
|
| 1001 |
+
))
|
| 1002 |
+
fig3.add_vline(x=0, line_dash="dash", line_color="#546e7a")
|
| 1003 |
+
fig3.add_hline(y=0, line_dash="dash", line_color="#546e7a")
|
| 1004 |
+
fig3.update_layout(**L(
|
| 1005 |
+
height=340,
|
| 1006 |
+
title=f"{ticker} — Mgmt Sentiment vs 5-Day Return",
|
| 1007 |
+
xaxis=dict(title="Management Net Sentiment"),
|
| 1008 |
+
yaxis=dict(title="5-Day Return (%)"),
|
| 1009 |
+
))
|
| 1010 |
+
st.plotly_chart(fig3, use_container_width=True)
|
| 1011 |
+
else:
|
| 1012 |
+
st.info("Not enough historical data for this ticker.")
|