Rajveer Pall commited on
Commit
83a3762
·
verified ·
1 Parent(s): be17862

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +1012 -1012
app.py CHANGED
@@ -1,1012 +1,1012 @@
1
- """
2
- FinSight Dashboard — LLM-Powered Earnings Intelligence
3
- Stage 5: Production Streamlit Dashboard
4
-
5
- Pages:
6
- 1. Overview — project summary, pipeline, key stats
7
- 2. Model Results — walk-forward IC/AUC comparison, year-by-year
8
- 3. SHAP Analysis — interactive feature importance
9
- 4. Backtest — equity curve, drawdown, quarterly P&L
10
- 5. Explorer — browse transcripts with live sentiment
11
-
12
- Run:
13
- streamlit run src/dashboard/app.py
14
- """
15
-
16
- import sys
17
- from pathlib import Path
18
- import warnings
19
- warnings.filterwarnings("ignore")
20
-
21
- import numpy as np
22
- import pandas as pd
23
- import plotly.express as px
24
- import plotly.graph_objects as go
25
- from plotly.subplots import make_subplots
26
- import streamlit as st
27
-
28
- sys.path.insert(0, str(Path(__file__).resolve().parent))
29
- from config import PROCESSED_DIR, EXPERIMENTS_DIR
30
-
31
- # ── Page config ────────────────────────────────────────────────────────────────
32
-
33
- st.set_page_config(
34
- page_title="FinSight | Earnings Intelligence",
35
- page_icon="📈",
36
- layout="wide",
37
- initial_sidebar_state="expanded",
38
- )
39
-
40
- # ── Global CSS ─────────────────────────────────────────────────────────────────
41
-
42
- st.markdown("""
43
- <style>
44
- [data-testid="stAppViewContainer"] { background: #0a0e1a; color: #e8eaf6; }
45
- [data-testid="stSidebar"] {
46
- background: #0d1117;
47
- border-right: 1px solid #1e2433;
48
- }
49
- [data-testid="stSidebar"] .stRadio label {
50
- color: #8892b0 !important;
51
- font-size: 0.9rem;
52
- }
53
- .metric-card {
54
- background: linear-gradient(135deg, #0d1117 0%, #161b27 100%);
55
- border: 1px solid #1e2d4a;
56
- border-radius: 12px;
57
- padding: 20px 24px;
58
- text-align: center;
59
- transition: border-color 0.2s;
60
- }
61
- .metric-card:hover { border-color: #3d5a99; }
62
- .metric-value { font-size: 2rem; font-weight: 700; color: #64b5f6; line-height: 1.1; }
63
- .metric-label {
64
- font-size: 0.78rem; color: #8892b0;
65
- text-transform: uppercase; letter-spacing: 1px; margin-top: 6px;
66
- }
67
- .metric-delta { font-size: 0.82rem; margin-top: 4px; }
68
- .delta-pos { color: #66bb6a; }
69
- .delta-neg { color: #ef5350; }
70
- .delta-neu { color: #8892b0; }
71
- .section-header {
72
- font-size: 1.4rem; font-weight: 700; color: #e8eaf6;
73
- border-left: 4px solid #3d5a99; padding-left: 12px;
74
- margin: 28px 0 16px 0;
75
- }
76
- .subsection { font-size: 1rem; font-weight: 600; color: #8892b0; margin: 16px 0 8px 0; }
77
- .hero-title {
78
- font-size: 2.8rem; font-weight: 800;
79
- background: linear-gradient(90deg, #64b5f6, #7c4dff, #64b5f6);
80
- background-size: 200%;
81
- -webkit-background-clip: text; -webkit-text-fill-color: transparent;
82
- line-height: 1.2;
83
- }
84
- .hero-sub {
85
- font-size: 1.1rem; color: #8892b0; margin-top: 8px;
86
- max-width: 680px; line-height: 1.6;
87
- }
88
- .pipeline-step {
89
- background: #0d1117; border: 1px solid #1e2433;
90
- border-radius: 10px; padding: 14px 16px; text-align: center;
91
- }
92
- .pipeline-icon { font-size: 1.6rem; }
93
- .pipeline-label { font-size: 0.78rem; color: #8892b0; margin-top: 4px; }
94
- .pipeline-title { font-size: 0.9rem; font-weight: 600; color: #cfd8dc; }
95
- .insight-box {
96
- background: #0d1117; border-left: 3px solid #3d5a99;
97
- border-radius: 0 8px 8px 0; padding: 12px 16px; margin: 8px 0;
98
- font-size: 0.88rem; color: #b0bec5; line-height: 1.6;
99
- }
100
- .insight-box strong { color: #64b5f6; }
101
- .badge {
102
- display: inline-block; padding: 2px 10px; border-radius: 20px;
103
- font-size: 0.72rem; font-weight: 600; margin: 2px;
104
- }
105
- .badge-blue { background: #1a237e22; color: #64b5f6; border: 1px solid #1a237e; }
106
- .badge-green { background: #1b5e2022; color: #66bb6a; border: 1px solid #1b5e20; }
107
- .badge-red { background: #b71c1c22; color: #ef9a9a; border: 1px solid #b71c1c; }
108
- hr { border-color: #1e2433 !important; }
109
- ::-webkit-scrollbar { width: 6px; }
110
- ::-webkit-scrollbar-track { background: #0a0e1a; }
111
- ::-webkit-scrollbar-thumb { background: #1e2d4a; border-radius: 3px; }
112
- </style>
113
- """, unsafe_allow_html=True)
114
-
115
-
116
- # ── Layout helper — avoids duplicate xaxis/yaxis conflicts ────────────────────
117
-
118
- BASE_LAYOUT = dict(
119
- paper_bgcolor="#0d1117",
120
- plot_bgcolor="#0a0e1a",
121
- font=dict(color="#b0bec5", family="Inter, sans-serif"),
122
- margin=dict(l=50, r=30, t=50, b=50),
123
- colorway=["#64b5f6","#66bb6a","#ffa726","#ef5350","#ab47bc","#26c6da"],
124
- )
125
- BASE_XAXIS = dict(gridcolor="#1a2035", linecolor="#1e2433", zerolinecolor="#1e2433")
126
- BASE_YAXIS = dict(gridcolor="#1a2035", linecolor="#1e2433", zerolinecolor="#1e2433")
127
-
128
- def L(**kwargs):
129
- """
130
- Merge base dark-theme layout with chart-specific overrides.
131
- Merges xaxis/yaxis dicts instead of replacing them, which avoids
132
- the 'multiple values for keyword argument xaxis' TypeError.
133
- """
134
- out = dict(**BASE_LAYOUT)
135
- if "xaxis" in kwargs:
136
- out["xaxis"] = {**BASE_XAXIS, **kwargs.pop("xaxis")}
137
- else:
138
- out["xaxis"] = BASE_XAXIS
139
- if "yaxis" in kwargs:
140
- out["yaxis"] = {**BASE_YAXIS, **kwargs.pop("yaxis")}
141
- else:
142
- out["yaxis"] = BASE_YAXIS
143
- out.update(kwargs)
144
- return out
145
-
146
-
147
- # ── Global helpers ─────────────────────────────────────────────────────────────
148
-
149
- def metric_card(col, value, label, delta="", delta_type="neu"):
150
- """Render a dark-theme KPI card inside a Streamlit column."""
151
- col.markdown(f"""
152
- <div class='metric-card'>
153
- <div class='metric-value'>{value}</div>
154
- <div class='metric-label'>{label}</div>
155
- <div class='metric-delta delta-{delta_type}'>{delta}</div>
156
- </div>""", unsafe_allow_html=True)
157
-
158
-
159
- # ── Data loaders ───────────────────────────────────────────────────────────────
160
-
161
- @st.cache_data(show_spinner=False)
162
- def load_feature_matrix():
163
- p = PROCESSED_DIR / "feature_matrix.parquet"
164
- return pd.read_parquet(p) if p.exists() else pd.DataFrame()
165
-
166
- @st.cache_data(show_spinner=False)
167
- def load_model_results():
168
- p = EXPERIMENTS_DIR / "model_results.csv"
169
- return pd.read_csv(p) if p.exists() else pd.DataFrame()
170
-
171
- @st.cache_data(show_spinner=False)
172
- def load_backtest():
173
- p = EXPERIMENTS_DIR / "backtest_results.csv"
174
- return pd.read_csv(p) if p.exists() else pd.DataFrame()
175
-
176
- @st.cache_data(show_spinner=False)
177
- def load_shap():
178
- p = EXPERIMENTS_DIR / "shap_values.parquet"
179
- return pd.read_parquet(p) if p.exists() else pd.DataFrame()
180
-
181
-
182
- # ── Sidebar ────────────────────────────────────────────────────────────────────
183
-
184
- with st.sidebar:
185
- st.markdown("""
186
- <div style='padding:12px 0 20px 0;'>
187
- <div style='font-size:1.5rem;font-weight:800;color:#64b5f6;'>📈 FinSight</div>
188
- <div style='font-size:0.75rem;color:#8892b0;margin-top:4px;'>
189
- LLM-Powered Earnings Intelligence
190
- </div>
191
- </div>
192
- """, unsafe_allow_html=True)
193
-
194
- page = st.radio(
195
- "Navigation",
196
- ["🏠 Overview",
197
- "📊 Model Performance",
198
- "🔍 Feature Importance",
199
- "💹 Backtest Results",
200
- "🔎 Transcript Explorer"],
201
- label_visibility="collapsed",
202
- )
203
-
204
- st.markdown("<hr>", unsafe_allow_html=True)
205
- st.markdown("""
206
- <div style='font-size:0.72rem;color:#8892b0;line-height:1.8;'>
207
- <b style='color:#cfd8dc;'>Stack</b><br>
208
- FinBERT · ChromaDB · XGBoost<br>
209
- LightGBM · SHAP · Streamlit<br><br>
210
- <b style='color:#cfd8dc;'>Data</b><br>
211
- 14,584 earnings transcripts<br>
212
- 601 S&amp;P 500 companies<br>
213
- 2018 – 2024<br><br>
214
- <b style='color:#cfd8dc;'>Author</b><br>
215
- Rajveer Singh Pall
216
- </div>
217
- """, unsafe_allow_html=True)
218
-
219
-
220
- # ═══════════════════════════════════════════════════════════════════════════════
221
- # PAGE 1 — OVERVIEW
222
- # ═══════════════════════════════════════════════════════════════════════════════
223
-
224
- if page == "🏠 Overview":
225
- fm = load_feature_matrix()
226
- mr = load_model_results()
227
-
228
- st.markdown("""
229
- <div style='padding:24px 0 8px 0;'>
230
- <div class='hero-title'>FinSight</div>
231
- <div class='hero-title' style='font-size:1.8rem;color:#7c4dff;'>
232
- Earnings Intelligence System
233
- </div>
234
- <div class='hero-sub'>
235
- An end-to-end machine learning pipeline that extracts alpha signals
236
- from S&amp;P 500 earnings call transcripts using FinBERT sentiment analysis,
237
- RAG-based structured feature extraction, and walk-forward validated
238
- gradient boosting models.
239
- </div>
240
- </div>
241
- """, unsafe_allow_html=True)
242
-
243
- st.markdown("<hr>", unsafe_allow_html=True)
244
-
245
- best_ic = float(mr["ic"].max()) if not mr.empty else 0.0198
246
- best_auc = float(mr["auc"].max()) if not mr.empty else 0.5201
247
- best_hr = float(mr["hit_rate"].max()) if not mr.empty else 0.5427
248
- n_rows = len(fm) if not fm.empty else 13442
249
-
250
- c1,c2,c3,c4,c5 = st.columns(5)
251
- metric_card(c1, "14,584", "Transcripts", "601 companies", "neu")
252
- metric_card(c2, f"{n_rows:,}", "Training Samples", "2018–2024", "neu")
253
- metric_card(c3, f"{best_ic:.4f}", "Best IC", "LightGBM", "pos")
254
- metric_card(c4, f"{best_auc:.4f}","Best AUC", "XGBoost 2024", "pos")
255
- metric_card(c5, f"{best_hr:.4f}", "Best Hit Rate", "Walk-forward", "pos")
256
-
257
- st.markdown("<br>", unsafe_allow_html=True)
258
-
259
- # Pipeline
260
- st.markdown("<div class='section-header'>System Architecture</div>",
261
- unsafe_allow_html=True)
262
- steps = [
263
- ("🗄️","Stage 1","Data Ingestion", "SEC EDGAR · yfinance\n14,584 transcripts"),
264
- ("🧠","Stage 2","NLP Pipeline", "FinBERT · ChromaDB RAG\n34 features"),
265
- ("🤖","Stage 3","ML Models", "XGBoost · LightGBM\nWalk-forward CV"),
266
- ("📉","Stage 4","Backtesting", "Long-short strategy\n10bps TC"),
267
- ("🖥️","Stage 5","Dashboard", "Streamlit · Plotly\nHugging Face Spaces"),
268
- ]
269
- cols = st.columns(len(steps))
270
- for col, (icon, stage, title, desc) in zip(cols, steps):
271
- col.markdown(f"""
272
- <div class='pipeline-step'>
273
- <div class='pipeline-icon'>{icon}</div>
274
- <div class='pipeline-label'>{stage}</div>
275
- <div class='pipeline-title'>{title}</div>
276
- <div style='font-size:0.72rem;color:#546e7a;margin-top:4px;line-height:1.5;'>
277
- {desc}</div>
278
- </div>""", unsafe_allow_html=True)
279
-
280
- st.markdown("<br>", unsafe_allow_html=True)
281
-
282
- left, right = st.columns([1.1, 1])
283
-
284
- with left:
285
- st.markdown("<div class='section-header'>Key Findings</div>",
286
- unsafe_allow_html=True)
287
- for f in [
288
- "<strong>Analyst negativity &gt; management positivity.</strong> "
289
- "qa_neg_ratio (SHAP=0.054) is the single strongest feature. "
290
- "Analyst pushback in Q&amp;A contains more information than prepared remarks.",
291
-
292
- "<strong>NLP reduces prediction variance by 87%.</strong> "
293
- "Baseline IC std=0.114 vs LightGBM std=0.009 — "
294
- "far more consistent across years.",
295
-
296
- "<strong>Consistent with weak-form EMH.</strong> "
297
- "Positive IC (0.0198) exists but cannot overcome 10bps transaction "
298
- "costs at a 5-day holding period.",
299
-
300
- "<strong>RAG guidance relevance is top-5.</strong> "
301
- "Semantic relevance of the guidance section — not just its content — "
302
- "carries significant predictive signal.",
303
- ]:
304
- st.markdown(f"<div class='insight-box'>{f}</div>",
305
- unsafe_allow_html=True)
306
-
307
- with right:
308
- st.markdown("<div class='section-header'>Dataset Coverage</div>",
309
- unsafe_allow_html=True)
310
- if not fm.empty:
311
- yr = fm.groupby("year").size().reset_index(name="count")
312
- fig = go.Figure(go.Bar(
313
- x=yr["year"].astype(str),
314
- y=yr["count"],
315
- marker=dict(color=yr["count"],
316
- colorscale=[[0,"#1a237e"],[1,"#64b5f6"]],
317
- showscale=False),
318
- text=yr["count"], textposition="outside",
319
- textfont=dict(size=11),
320
- ))
321
- fig.update_layout(**L(title="Transcript Count by Year", height=300,
322
- showlegend=False,
323
- xaxis=dict(title="Year"),
324
- yaxis=dict(title="Transcripts")))
325
- st.plotly_chart(fig, use_container_width=True)
326
-
327
- # Sentiment heatmap
328
- if not fm.empty and "mgmt_net_sentiment" in fm.columns:
329
- st.markdown("<div class='section-header'>Sentiment Landscape</div>",
330
- unsafe_allow_html=True)
331
- heat = (fm.groupby(["ticker","year"])["mgmt_net_sentiment"]
332
- .mean().reset_index())
333
- top_t = fm["ticker"].value_counts().head(30).index
334
- heat = heat[heat["ticker"].isin(top_t)]
335
- pivot = heat.pivot(index="ticker", columns="year",
336
- values="mgmt_net_sentiment")
337
- fig2 = go.Figure(go.Heatmap(
338
- z=pivot.values,
339
- x=[str(c) for c in pivot.columns],
340
- y=pivot.index,
341
- colorscale=[[0,"#b71c1c"],[0.35,"#e53935"],
342
- [0.5,"#263238"],[0.65,"#1565c0"],[1,"#64b5f6"]],
343
- zmid=0,
344
- colorbar=dict(title="Net Sentiment", tickfont=dict(size=10)),
345
- hovertemplate="Ticker: %{y}<br>Year: %{x}<br>Sentiment: %{z:.3f}<extra></extra>",
346
- ))
347
- fig2.update_layout(**L(
348
- title="Management Net Sentiment — Top 30 Tickers × Year",
349
- height=500,
350
- xaxis=dict(title="Year"),
351
- yaxis=dict(title=""),
352
- ))
353
- st.plotly_chart(fig2, use_container_width=True)
354
-
355
-
356
- # ═══════════════════════════════════════════════════════════════════════════════
357
- # PAGE 2 — MODEL PERFORMANCE
358
- # ═══════════════════════════════════════════════════════════════════════════════
359
-
360
- elif page == "📊 Model Performance":
361
- mr = load_model_results()
362
-
363
- st.markdown("<div class='hero-title' style='font-size:2rem;'>Model Performance</div>",
364
- unsafe_allow_html=True)
365
- st.markdown("<div class='hero-sub'>Walk-forward validation (2021–2024). "
366
- "Train on 3 prior years, test on held-out year. Zero data leakage.</div>",
367
- unsafe_allow_html=True)
368
- st.markdown("<hr>", unsafe_allow_html=True)
369
-
370
- if mr.empty:
371
- st.error("model_results.csv not found. Run Stage 3 first.")
372
- st.stop()
373
-
374
- summary = (
375
- mr.groupby("model")[["ic","hit_rate","auc"]]
376
- .agg({"ic":["mean","std"],"hit_rate":["mean","std"],"auc":["mean","std"]})
377
- .round(4)
378
- )
379
- summary.columns = ["IC Mean","IC Std","Hit Rate Mean","Hit Rate Std",
380
- "AUC Mean","AUC Std"]
381
- summary = summary.sort_values("IC Mean", ascending=False)
382
-
383
- st.markdown("<div class='section-header'>Model Comparison</div>",
384
- unsafe_allow_html=True)
385
-
386
- def color_ic(val):
387
- if isinstance(val, float):
388
- if val > 0.015: return "color: #66bb6a; font-weight:600"
389
- if val < 0: return "color: #ef5350"
390
- return ""
391
-
392
- st.dataframe(
393
- summary.style.applymap(color_ic, subset=["IC Mean"]).format("{:.4f}"),
394
- use_container_width=True, height=220,
395
- )
396
-
397
- st.markdown("<br>", unsafe_allow_html=True)
398
- st.markdown("<div class='section-header'>Information Coefficient by Year</div>",
399
- unsafe_allow_html=True)
400
-
401
- MODEL_COLORS = {
402
- "Baseline": "#ffa726",
403
- "FinBERT_only": "#26c6da",
404
- "RAG_only": "#ab47bc",
405
- "XGBoost_all": "#ef5350",
406
- "LightGBM_all": "#66bb6a",
407
- }
408
-
409
- fig = go.Figure()
410
- for m in mr["model"].unique():
411
- sub = mr[mr["model"]==m].sort_values("test_year")
412
- fig.add_trace(go.Scatter(
413
- x=sub["test_year"].astype(int),
414
- y=sub["ic"],
415
- mode="lines+markers", name=m,
416
- line=dict(color=MODEL_COLORS.get(m,"#64b5f6"), width=2.5),
417
- marker=dict(size=9),
418
- hovertemplate=f"<b>{m}</b><br>Year: %{{x}}<br>IC: %{{y:.4f}}<extra></extra>",
419
- ))
420
- fig.add_hline(y=0, line_dash="dash", line_color="#546e7a", line_width=1.2)
421
- fig.update_layout(**L(
422
- title="Walk-Forward IC — Positive = Predictive",
423
- height=380,
424
- xaxis=dict(tickvals=[2021,2022,2023,2024], title="Year"),
425
- yaxis=dict(title="Information Coefficient"),
426
- legend=dict(bgcolor="#0d1117", bordercolor="#1e2433", borderwidth=1),
427
- ))
428
- st.plotly_chart(fig, use_container_width=True)
429
-
430
- col1, col2 = st.columns(2)
431
-
432
- with col1:
433
- st.markdown("<div class='subsection'>Hit Rate by Year</div>",
434
- unsafe_allow_html=True)
435
- fig2 = go.Figure()
436
- for m in mr["model"].unique():
437
- sub = mr[mr["model"]==m].sort_values("test_year")
438
- fig2.add_trace(go.Scatter(
439
- x=sub["test_year"].astype(int), y=sub["hit_rate"],
440
- mode="lines+markers", name=m,
441
- line=dict(color=MODEL_COLORS.get(m,"#64b5f6"), width=2),
442
- marker=dict(size=7), showlegend=False,
443
- ))
444
- fig2.add_hline(y=0.5, line_dash="dot", line_color="#546e7a", line_width=1)
445
- fig2.update_layout(**L(
446
- height=300, title="Hit Rate (>0.5 = better than coin flip)",
447
- xaxis=dict(tickvals=[2021,2022,2023,2024]),
448
- yaxis=dict(title="Hit Rate"),
449
- ))
450
- st.plotly_chart(fig2, use_container_width=True)
451
-
452
- with col2:
453
- st.markdown("<div class='subsection'>AUC by Year</div>",
454
- unsafe_allow_html=True)
455
- fig3 = go.Figure()
456
- for m in mr["model"].unique():
457
- sub = mr[mr["model"]==m].sort_values("test_year")
458
- fig3.add_trace(go.Scatter(
459
- x=sub["test_year"].astype(int), y=sub["auc"],
460
- mode="lines+markers", name=m,
461
- line=dict(color=MODEL_COLORS.get(m,"#64b5f6"), width=2),
462
- marker=dict(size=7), showlegend=False,
463
- ))
464
- fig3.add_hline(y=0.5, line_dash="dot", line_color="#546e7a", line_width=1)
465
- fig3.update_layout(**L(
466
- height=300, title="AUC-ROC (>0.5 = better than random)",
467
- xaxis=dict(tickvals=[2021,2022,2023,2024]),
468
- yaxis=dict(title="AUC"),
469
- ))
470
- st.plotly_chart(fig3, use_container_width=True)
471
-
472
- st.markdown("<div class='section-header'>Stability Analysis — IC Variance</div>",
473
- unsafe_allow_html=True)
474
- ic_std = mr.groupby("model")["ic"].std().sort_values()
475
- ic_mean = mr.groupby("model")["ic"].mean()
476
- bar_colors = ["#66bb6a" if ic_mean[m] > 0 else "#ef5350" for m in ic_std.index]
477
-
478
- fig4 = go.Figure(go.Bar(
479
- y=ic_std.index, x=ic_std.values, orientation="h",
480
- marker_color=bar_colors,
481
- text=[f"σ={v:.4f}" for v in ic_std.values],
482
- textposition="outside", textfont=dict(size=11),
483
- ))
484
- fig4.update_layout(**L(
485
- title="IC Standard Deviation — Lower = More Consistent",
486
- height=280,
487
- xaxis=dict(title="IC Std Dev"),
488
- yaxis=dict(title=""),
489
- ))
490
- st.plotly_chart(fig4, use_container_width=True)
491
-
492
- st.markdown("""
493
- <div class='insight-box'>
494
- <strong>Interpretation:</strong> The Baseline's high IC mean (0.043) is
495
- misleading — its std of 0.114 shows extreme instability driven by lucky
496
- quarters. LightGBM achieves IC=0.020 with std=0.009, making it
497
- <strong>10× more stable</strong>. In live trading, consistency matters
498
- far more than occasional lucky peaks.
499
- </div>
500
- """, unsafe_allow_html=True)
501
-
502
-
503
- # ═════════════════════════════════════════════════â��â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•â•
504
- # PAGE 3 — FEATURE IMPORTANCE
505
- # ═══════════════════════════════════════════════════════════════════════════════
506
-
507
- elif page == "🔍 Feature Importance":
508
- shap_df = load_shap()
509
- fm = load_feature_matrix()
510
-
511
- st.markdown("<div class='hero-title' style='font-size:2rem;'>Feature Importance</div>",
512
- unsafe_allow_html=True)
513
- st.markdown("<div class='hero-sub'>SHAP values computed on LightGBM (best model). "
514
- "Shows which features actually drive predictions.</div>",
515
- unsafe_allow_html=True)
516
- st.markdown("<hr>", unsafe_allow_html=True)
517
-
518
- if shap_df.empty:
519
- st.error("shap_values.parquet not found. Run run_shap.py first.")
520
- st.stop()
521
-
522
- mean_shap = shap_df.abs().mean().sort_values(ascending=False)
523
-
524
- def feat_color(name):
525
- if name.startswith("rag_"): return "#64b5f6"
526
- if name.startswith("mgmt_"): return "#66bb6a"
527
- if name.startswith("qa_"): return "#ffa726"
528
- return "#ab47bc"
529
-
530
- def feat_group(name):
531
- if name.startswith("rag_"): return "RAG Features"
532
- if name.startswith("mgmt_"): return "Management FinBERT"
533
- if name.startswith("qa_"): return "QA FinBERT"
534
- return "Other"
535
-
536
- st.markdown("<div class='section-header'>Top 20 Features by Mean |SHAP|</div>",
537
- unsafe_allow_html=True)
538
- top20 = mean_shap.head(20)[::-1]
539
- fig = go.Figure(go.Bar(
540
- y=top20.index, x=top20.values, orientation="h",
541
- marker_color=[feat_color(n) for n in top20.index],
542
- text=[f"{v:.4f}" for v in top20.values],
543
- textposition="outside", textfont=dict(size=10),
544
- hovertemplate="<b>%{y}</b><br>Mean |SHAP|: %{x:.4f}<extra></extra>",
545
- ))
546
- fig.update_layout(**L(
547
- height=520,
548
- title="Feature Importance — 🔵 RAG | 🟢 Mgmt FinBERT | 🟠 QA FinBERT",
549
- xaxis=dict(title="Mean |SHAP Value|"),
550
- yaxis=dict(title=""),
551
- ))
552
- st.plotly_chart(fig, use_container_width=True)
553
-
554
- col1, col2 = st.columns(2)
555
-
556
- with col1:
557
- st.markdown("<div class='section-header'>Importance by Feature Group</div>",
558
- unsafe_allow_html=True)
559
- gs = (mean_shap.reset_index()
560
- .rename(columns={"index":"feature", 0:"shap"}))
561
- gs.columns = ["feature","shap"]
562
- gs["group"] = gs["feature"].apply(feat_group)
563
- gt = gs.groupby("group")["shap"].sum()
564
-
565
- fig2 = go.Figure(go.Pie(
566
- labels=gt.index, values=gt.values, hole=0.55,
567
- marker=dict(colors=["#64b5f6","#66bb6a","#ffa726","#ab47bc"]),
568
- textinfo="label+percent", textfont=dict(size=12),
569
- hovertemplate="<b>%{label}</b><br>Total SHAP: %{value:.4f}<br>%{percent}<extra></extra>",
570
- ))
571
- fig2.update_layout(**L(
572
- height=320, showlegend=False,
573
- annotations=[dict(text="SHAP<br>Groups", x=0.5, y=0.5,
574
- font_size=13, showarrow=False,
575
- font_color="#b0bec5")],
576
- ))
577
- st.plotly_chart(fig2, use_container_width=True)
578
-
579
- with col2:
580
- st.markdown("<div class='section-header'>SHAP vs Correlation with Target</div>",
581
- unsafe_allow_html=True)
582
- if not fm.empty and "target_5d_up" in fm.columns:
583
- feat_cols = [c for c in shap_df.columns if c in fm.columns]
584
- corrs = fm[feat_cols+["target_5d_up"]].corr()["target_5d_up"].drop("target_5d_up")
585
- cdf = pd.DataFrame({
586
- "feature": corrs.index,
587
- "shap": mean_shap.reindex(corrs.index).fillna(0).values,
588
- "corr": corrs.values,
589
- "group": [feat_group(f) for f in corrs.index],
590
- })
591
- cmap = {
592
- "RAG Features": "#64b5f6",
593
- "Management FinBERT": "#66bb6a",
594
- "QA FinBERT": "#ffa726",
595
- "Other": "#ab47bc",
596
- }
597
- fig3 = px.scatter(
598
- cdf, x="corr", y="shap", color="group",
599
- color_discrete_map=cmap, hover_data=["feature"],
600
- labels={"corr":"Pearson Corr with Target",
601
- "shap":"Mean |SHAP Value|"},
602
- height=320,
603
- )
604
- fig3.add_vline(x=0, line_dash="dash", line_color="#546e7a")
605
- fig3.update_layout(**L(
606
- title="SHAP Importance vs Linear Correlation",
607
- showlegend=False,
608
- ))
609
- st.plotly_chart(fig3, use_container_width=True)
610
-
611
- st.markdown("<div class='section-header'>Feature Insights</div>",
612
- unsafe_allow_html=True)
613
- insights = [
614
- ("🏆 #1 — qa_neg_ratio",
615
- "Proportion of negative sentences in analyst Q&amp;A. When analysts "
616
- "push back hard, it signals market-moving information that management "
617
- "tried to downplay."),
618
- ("📊 #2 — mgmt_sent_vol",
619
- "Volatility of management's sentence-level sentiment. Inconsistent "
620
- "messaging — mixing optimism with caution — often precedes larger "
621
- "price moves."),
622
- ("📝 #3 — qa_n_sentences",
623
- "Length of the Q&amp;A section. Longer Q&amp;A sessions indicate "
624
- "more analyst scrutiny, which correlates with uncertainty about "
625
- "the quarter's results."),
626
- ("😶 #4 — mgmt_mean_neu",
627
- "Neutral sentiment ratio in management remarks. Deliberately neutral "
628
- "language can mask very good or very bad news — a hedging signal."),
629
- ("🎯 #5 — rag_guidance_relevance",
630
- "Semantic similarity of the guidance section to specific numerical "
631
- "guidance queries. More relevant guidance sections contain concrete "
632
- "targets that markets react to more strongly."),
633
- ]
634
- cols = st.columns(len(insights))
635
- for col, (title, body) in zip(cols, insights):
636
- col.markdown(f"""
637
- <div class='pipeline-step' style='text-align:left;height:190px;'>
638
- <div style='font-size:0.82rem;font-weight:700;color:#64b5f6;
639
- margin-bottom:8px;'>{title}</div>
640
- <div style='font-size:0.76rem;color:#8892b0;line-height:1.6;'>
641
- {body}</div>
642
- </div>""", unsafe_allow_html=True)
643
-
644
-
645
- # ═══════════════════════════════════════════════════════════════════════════════
646
- # PAGE 4 — BACKTEST
647
- # ═══════════════════════════════════════════════════════════════════════════════
648
-
649
- elif page == "💹 Backtest Results":
650
- bt = load_backtest()
651
-
652
- st.markdown("<div class='hero-title' style='font-size:2rem;'>Backtest Results</div>",
653
- unsafe_allow_html=True)
654
- st.markdown("<div class='hero-sub'>Long-short quartile portfolio. "
655
- "Long top-25% predicted stocks, short bottom-25%. "
656
- "5-day holding period. 10bps round-trip transaction cost.</div>",
657
- unsafe_allow_html=True)
658
- st.markdown("<hr>", unsafe_allow_html=True)
659
-
660
- if bt.empty:
661
- st.error("backtest_results.csv not found. Run Stage 4 first.")
662
- st.stop()
663
-
664
- bt = bt.sort_values(["year","quarter"]).reset_index(drop=True)
665
- bt["period"] = bt["year"].astype(str) + "-Q" + bt["quarter"].astype(str)
666
- rets = bt["net_ret"]
667
- cum = (1 + rets).cumprod()
668
- peak = cum.cummax()
669
- dd = (cum - peak) / peak
670
-
671
- n_yrs = len(bt) / 4
672
- ann_ret = float((1 + rets).prod() ** (1/n_yrs) - 1)
673
- ann_vol = float(rets.std() * np.sqrt(4))
674
- sharpe = ann_ret / ann_vol if ann_vol != 0 else 0.0
675
- max_dd = float(dd.min())
676
- hit = float((rets > 0).mean())
677
-
678
- c1,c2,c3,c4,c5 = st.columns(5)
679
- metric_card(c1, f"{ann_ret*100:.2f}%", "Ann. Return",
680
- "After TC", "pos" if ann_ret > 0 else "neg")
681
- metric_card(c2, f"{sharpe:.3f}", "Sharpe Ratio",
682
- ">1.0 = excellent", "pos" if sharpe > 0 else "neg")
683
- metric_card(c3, f"{max_dd*100:.2f}%", "Max Drawdown",
684
- "Peak-to-trough", "neg")
685
- metric_card(c4, f"{hit*100:.0f}%", "Win Rate",
686
- "Profitable quarters", "pos" if hit > 0.5 else "neg")
687
- metric_card(c5, str(len(bt)), "Quarters Tested",
688
- "2021–2024", "neu")
689
-
690
- st.markdown("<br>", unsafe_allow_html=True)
691
- st.markdown("<div class='section-header'>Equity Curve</div>",
692
- unsafe_allow_html=True)
693
-
694
- fig = make_subplots(rows=2, cols=1, row_heights=[0.72,0.28],
695
- shared_xaxes=True, vertical_spacing=0.04)
696
- fig.add_trace(go.Scatter(
697
- x=bt["period"], y=cum.values,
698
- mode="lines+markers",
699
- line=dict(color="#64b5f6", width=2.5),
700
- marker=dict(size=7),
701
- fill="tozeroy", fillcolor="rgba(100,181,246,0.06)",
702
- name="Cumulative Return",
703
- hovertemplate="<b>%{x}</b><br>Cumulative: %{y:.4f}<extra></extra>",
704
- ), row=1, col=1)
705
- fig.add_hline(y=1.0, line_dash="dash", line_color="#546e7a",
706
- line_width=1, row=1, col=1)
707
- fig.add_trace(go.Bar(
708
- x=bt["period"], y=dd.values*100,
709
- marker_color="#ef5350", opacity=0.7, name="Drawdown %",
710
- hovertemplate="<b>%{x}</b><br>Drawdown: %{y:.2f}%<extra></extra>",
711
- ), row=2, col=1)
712
-
713
- fig.update_layout(
714
- paper_bgcolor="#0d1117", plot_bgcolor="#0a0e1a",
715
- font=dict(color="#b0bec5"),
716
- margin=dict(l=50,r=30,t=50,b=80),
717
- title="FinSight Long-Short Strategy — 2021 to 2024",
718
- height=500, showlegend=False,
719
- xaxis2=dict(tickangle=45, tickfont_size=10,
720
- gridcolor="#1a2035", linecolor="#1e2433"),
721
- yaxis=dict(title="Cumulative Return",
722
- gridcolor="#1a2035", linecolor="#1e2433"),
723
- yaxis2=dict(title="DD %",
724
- gridcolor="#1a2035", linecolor="#1e2433"),
725
- )
726
- fig.update_xaxes(gridcolor="#1a2035", linecolor="#1e2433")
727
- st.plotly_chart(fig, use_container_width=True)
728
-
729
- col1, col2 = st.columns(2)
730
-
731
- with col1:
732
- st.markdown("<div class='subsection'>Quarterly Net Returns</div>",
733
- unsafe_allow_html=True)
734
- q_colors = ["#66bb6a" if r > 0 else "#ef5350" for r in rets]
735
- fig2 = go.Figure(go.Bar(
736
- x=bt["period"], y=rets.values*100,
737
- marker_color=q_colors,
738
- text=[f"{v*100:.2f}%" for v in rets.values],
739
- textposition="outside", textfont=dict(size=9),
740
- hovertemplate="<b>%{x}</b><br>Net Return: %{y:.2f}%<extra></extra>",
741
- ))
742
- fig2.add_hline(y=0, line_color="#546e7a", line_width=1)
743
- fig2.update_layout(**L(
744
- height=320,
745
- title="Net Return per Quarter (after 10bps TC)",
746
- xaxis=dict(tickangle=45, tickfont=dict(size=9)),
747
- yaxis=dict(title="Net Return (%)"),
748
- ))
749
- st.plotly_chart(fig2, use_container_width=True)
750
-
751
- with col2:
752
- st.markdown("<div class='subsection'>Long vs Short Leg Hit Rate</div>",
753
- unsafe_allow_html=True)
754
- fig3 = go.Figure()
755
- fig3.add_trace(go.Scatter(
756
- x=bt["period"], y=bt["long_hit"],
757
- mode="lines+markers",
758
- line=dict(color="#66bb6a", width=2),
759
- marker=dict(size=7), name="Long Leg",
760
- ))
761
- fig3.add_trace(go.Scatter(
762
- x=bt["period"], y=bt["short_hit"],
763
- mode="lines+markers",
764
- line=dict(color="#ef5350", width=2),
765
- marker=dict(size=7), name="Short Leg",
766
- ))
767
- fig3.add_hline(y=0.5, line_dash="dot", line_color="#546e7a")
768
- fig3.update_layout(**L(
769
- height=320,
770
- title="Direction Accuracy — Long &amp; Short Legs",
771
- xaxis=dict(tickangle=45, tickfont=dict(size=9)),
772
- yaxis=dict(title="Hit Rate"),
773
- legend=dict(bgcolor="#0d1117", bordercolor="#1e2433"),
774
- ))
775
- st.plotly_chart(fig3, use_container_width=True)
776
-
777
- st.markdown("<div class='section-header'>Quarterly Breakdown</div>",
778
- unsafe_allow_html=True)
779
- disp = bt[["period","net_ret","long_ret","short_ret",
780
- "long_hit","short_hit","n_stocks","q_size"]].copy()
781
- disp.columns = ["Quarter","Net Ret","Long Ret","Short Ret",
782
- "Long Hit","Short Hit","N Stocks","Leg Size"]
783
-
784
- def color_ret(val):
785
- if isinstance(val, float):
786
- if val > 0: return "color: #66bb6a"
787
- if val < 0: return "color: #ef5350"
788
- return ""
789
-
790
- st.dataframe(
791
- disp.style.applymap(color_ret,
792
- subset=["Net Ret","Long Ret","Short Ret"])
793
- .format({c:"{:.4f}" for c in
794
- ["Net Ret","Long Ret","Short Ret",
795
- "Long Hit","Short Hit"]}),
796
- use_container_width=True, hide_index=True,
797
- )
798
-
799
- st.markdown("""
800
- <div class='insight-box'>
801
- <strong>Context:</strong> A Sharpe of -0.81 with a 5-day holding period
802
- is consistent with academic literature on post-earnings announcement
803
- drift (Chan et al. 1996, Lerman et al. 2008). The signal exists
804
- (IC=0.0198) but is too weak to survive round-trip transaction costs at
805
- this frequency. Extending to 20-day holding periods is the natural
806
- next step.
807
- </div>
808
- """, unsafe_allow_html=True)
809
-
810
-
811
- # ═══════════════════════════════════════════════════════════════════════════════
812
- # PAGE 5 — TRANSCRIPT EXPLORER
813
- # ═══════════════════════════════════════════════════════════════════════════════
814
-
815
- elif page == "🔎 Transcript Explorer":
816
- fm = load_feature_matrix()
817
-
818
- st.markdown("<div class='hero-title' style='font-size:2rem;'>Transcript Explorer</div>",
819
- unsafe_allow_html=True)
820
- st.markdown("<div class='hero-sub'>Browse sentiment profiles for any company "
821
- "and quarter in the dataset.</div>",
822
- unsafe_allow_html=True)
823
- st.markdown("<hr>", unsafe_allow_html=True)
824
-
825
- if fm.empty:
826
- st.error("Feature matrix not found.")
827
- st.stop()
828
-
829
- col1, col2, col3 = st.columns([2,1,1])
830
- with col1:
831
- all_tickers = sorted(fm["ticker"].dropna().unique())
832
- default_idx = all_tickers.index("AAPL") if "AAPL" in all_tickers else 0
833
- ticker = st.selectbox("Select Ticker", all_tickers, index=default_idx)
834
- with col2:
835
- years = sorted(fm["year"].unique(), reverse=True)
836
- year = st.selectbox("Year", years)
837
- with col3:
838
- quarters = sorted(fm[fm["year"]==year]["quarter"].unique())
839
- quarter = st.selectbox("Quarter", quarters)
840
-
841
- row = fm[(fm["ticker"]==ticker) &
842
- (fm["year"]==year) &
843
- (fm["quarter"]==quarter)]
844
-
845
- if row.empty:
846
- st.warning("No data for this combination.")
847
- st.stop()
848
-
849
- row = row.iloc[0]
850
-
851
- ret_5d = row.get("ret_5d", 0)
852
- target = int(row.get("target_5d_up", 0))
853
- st.markdown(f"""
854
- <div style='display:flex;align-items:center;gap:16px;margin:16px 0;'>
855
- <div style='font-size:2rem;font-weight:800;color:#64b5f6;'>{ticker}</div>
856
- <div style='font-size:1rem;color:#8892b0;'>{int(year)} Q{int(quarter)}</div>
857
- <div class='badge badge-{"green" if target==1 else "red"}'>
858
- {"â–² UP" if target==1 else "â–¼ DOWN"} 5d
859
- </div>
860
- <div class='badge badge-blue'>
861
- 5d Return: {float(ret_5d)*100:.2f}%
862
- </div>
863
- </div>
864
- """, unsafe_allow_html=True)
865
-
866
- left, right = st.columns([1.2, 1])
867
-
868
- with left:
869
- st.markdown("<div class='subsection'>Sentiment Breakdown</div>",
870
- unsafe_allow_html=True)
871
- cats = ["Mgmt Positive","Mgmt Neutral","Mgmt Negative",
872
- "QA Positive","QA Neutral","QA Negative"]
873
- vals = [
874
- float(row.get("mgmt_mean_pos", 0) or 0),
875
- float(row.get("mgmt_mean_neu", 0) or 0),
876
- float(row.get("mgmt_mean_neg", 0) or 0),
877
- float(row.get("qa_mean_pos", 0) or 0),
878
- float(row.get("qa_mean_neu", 0) or 0),
879
- float(row.get("qa_mean_neg", 0) or 0),
880
- ]
881
- vals_c = vals + [vals[0]]
882
- cats_c = cats + [cats[0]]
883
- fig = go.Figure(go.Scatterpolar(
884
- r=vals_c, theta=cats_c, fill="toself",
885
- fillcolor="rgba(100,181,246,0.15)",
886
- line=dict(color="#64b5f6", width=2), name=ticker,
887
- ))
888
- fig.update_layout(
889
- paper_bgcolor="#0d1117",
890
- font=dict(color="#b0bec5"),
891
- polar=dict(
892
- bgcolor="#0d1117",
893
- radialaxis=dict(visible=True, range=[0,1],
894
- gridcolor="#1a2035", linecolor="#1a2035",
895
- tickfont=dict(size=9, color="#546e7a")),
896
- angularaxis=dict(gridcolor="#1a2035", linecolor="#1a2035",
897
- tickfont=dict(size=10, color="#b0bec5")),
898
- ),
899
- height=360, showlegend=False,
900
- title=f"{ticker} — Sentiment Radar",
901
- margin=dict(l=40,r=40,t=50,b=40),
902
- )
903
- st.plotly_chart(fig, use_container_width=True)
904
-
905
- with right:
906
- st.markdown("<div class='subsection'>Feature Scores</div>",
907
- unsafe_allow_html=True)
908
-
909
- def score_bar(label, val, invert=False):
910
- if val is None or pd.isna(val):
911
- return
912
- v = float(val)
913
- pct = max(0, min(1, v)) * 100
914
- color = "#ef5350" if invert else "#64b5f6"
915
- st.markdown(f"""
916
- <div style='margin:8px 0;'>
917
- <div style='display:flex;justify-content:space-between;
918
- font-size:0.8rem;color:#8892b0;margin-bottom:3px;'>
919
- <span>{label}</span><span>{v:.3f}</span>
920
- </div>
921
- <div style='background:#1a2035;border-radius:4px;height:6px;'>
922
- <div style='background:{color};width:{pct:.0f}%;
923
- height:6px;border-radius:4px;'></div>
924
- </div>
925
- </div>""", unsafe_allow_html=True)
926
-
927
- score_bar("Mgmt Net Sentiment", row.get("mgmt_net_sentiment"))
928
- score_bar("QA Net Sentiment", row.get("qa_net_sentiment"))
929
- score_bar("Mgmt Negativity", row.get("mgmt_neg_ratio"), invert=True)
930
- score_bar("QA Negativity", row.get("qa_neg_ratio"), invert=True)
931
- score_bar("Guidance Specificity", row.get("rag_guidance_specificity_score"))
932
- score_bar("Mgmt Confidence", row.get("rag_management_confidence_score"))
933
- score_bar("Forward Looking", row.get("rag_forward_looking_score"))
934
- score_bar("New Risks", row.get("rag_new_risks_score"), invert=True)
935
- score_bar("Cost Pressure", row.get("rag_cost_pressure_score"), invert=True)
936
-
937
- # Historical trend
938
- st.markdown(f"<div class='section-header'>{ticker} — Historical Sentiment</div>",
939
- unsafe_allow_html=True)
940
-
941
- td = fm[fm["ticker"]==ticker].copy().sort_values(["year","quarter"])
942
- td["period"] = td["year"].astype(str) + "-Q" + td["quarter"].astype(str)
943
-
944
- if len(td) > 1:
945
- fig2 = go.Figure()
946
- for col_name, label, color in [
947
- ("mgmt_net_sentiment", "Mgmt Sentiment", "#66bb6a"),
948
- ("qa_net_sentiment", "QA Sentiment", "#64b5f6"),
949
- ("mgmt_neg_ratio", "Mgmt Negativity","#ef5350"),
950
- ]:
951
- if col_name in td.columns:
952
- fig2.add_trace(go.Scatter(
953
- x=td["period"], y=td[col_name],
954
- mode="lines+markers", name=label,
955
- line=dict(color=color, width=2),
956
- marker=dict(size=6),
957
- hovertemplate=f"<b>{label}</b><br>%{{x}}<br>%{{y:.3f}}<extra></extra>",
958
- ))
959
-
960
- # Mark selected quarter — use index position to avoid type issues
961
- cur_period = f"{int(year)}-Q{int(quarter)}"
962
- if cur_period in td["period"].values:
963
- cur_idx = td[td["period"]==cur_period].index[0]
964
- cur_pos = td["period"].tolist().index(cur_period)
965
- fig2.add_vrect(
966
- x0=cur_period, x1=cur_period,
967
- line_dash="dot", line_color="#ffa726", line_width=2,
968
- )
969
-
970
- fig2.add_hline(y=0, line_dash="dash", line_color="#546e7a", line_width=0.8)
971
- fig2.update_layout(**L(
972
- height=320,
973
- title=f"{ticker} — Sentiment Over Time",
974
- xaxis=dict(tickangle=45, tickfont=dict(size=9)),
975
- yaxis=dict(title="Score"),
976
- legend=dict(bgcolor="#0d1117", bordercolor="#1e2433"),
977
- ))
978
- st.plotly_chart(fig2, use_container_width=True)
979
-
980
- # Scatter: sentiment vs return
981
- if "ret_5d" in td.columns and "mgmt_net_sentiment" in td.columns:
982
- st.markdown("<div class='subsection'>Sentiment vs 5-Day Return</div>",
983
- unsafe_allow_html=True)
984
- tc = td.dropna(subset=["ret_5d","mgmt_net_sentiment"]).copy()
985
- tc["ret_pct"] = tc["ret_5d"].astype(float) * 100
986
- sc_colors = ["#66bb6a" if r > 0 else "#ef5350"
987
- for r in tc["ret_pct"]]
988
- fig3 = go.Figure(go.Scatter(
989
- x=tc["mgmt_net_sentiment"].astype(float),
990
- y=tc["ret_pct"],
991
- mode="markers+text",
992
- text=tc["period"],
993
- textposition="top center",
994
- textfont=dict(size=8, color="#546e7a"),
995
- marker=dict(color=sc_colors, size=9, opacity=0.85),
996
- hovertemplate=(
997
- "<b>%{text}</b><br>"
998
- "Mgmt Sentiment: %{x:.3f}<br>"
999
- "5d Return: %{y:.2f}%<extra></extra>"
1000
- ),
1001
- ))
1002
- fig3.add_vline(x=0, line_dash="dash", line_color="#546e7a")
1003
- fig3.add_hline(y=0, line_dash="dash", line_color="#546e7a")
1004
- fig3.update_layout(**L(
1005
- height=340,
1006
- title=f"{ticker} — Mgmt Sentiment vs 5-Day Return",
1007
- xaxis=dict(title="Management Net Sentiment"),
1008
- yaxis=dict(title="5-Day Return (%)"),
1009
- ))
1010
- st.plotly_chart(fig3, use_container_width=True)
1011
- else:
1012
- st.info("Not enough historical data for this ticker.")
 
1
+ """
2
+ FinSight Dashboard LLM-Powered Earnings Intelligence
3
+ Stage 5: Production Streamlit Dashboard
4
+
5
+ Pages:
6
+ 1. Overview project summary, pipeline, key stats
7
+ 2. Model Results walk-forward IC/AUC comparison, year-by-year
8
+ 3. SHAP Analysis interactive feature importance
9
+ 4. Backtest equity curve, drawdown, quarterly P&L
10
+ 5. Explorer browse transcripts with live sentiment
11
+
12
+ Run:
13
+ streamlit run src/dashboard/app.py
14
+ """
15
+
16
+ import sys
17
+ from pathlib import Path
18
+ import warnings
19
+ warnings.filterwarnings("ignore")
20
+
21
+ import numpy as np
22
+ import pandas as pd
23
+ import plotly.express as px
24
+ import plotly.graph_objects as go
25
+ from plotly.subplots import make_subplots
26
+ import streamlit as st
27
+
28
+ sys.path.insert(0, str(Path(__file__).resolve().parent))
29
+ from config import PROCESSED_DIR, EXPERIMENTS_DIR
30
+
31
+ # ── Page config ────────────────────────────────────────────────────────────────
32
+
33
+ st.set_page_config(
34
+ page_title="FinSight | Earnings Intelligence",
35
+ page_icon="📈",
36
+ layout="wide",
37
+ initial_sidebar_state="expanded",
38
+ )
39
+
40
+ # ── Global CSS ─────────────────────────────────────────────────────────────────
41
+
42
+ st.markdown("""
43
+ <style>
44
+ [data-testid="stAppViewContainer"] { background: #0a0e1a; color: #e8eaf6; }
45
+ [data-testid="stSidebar"] {
46
+ background: #0d1117;
47
+ border-right: 1px solid #1e2433;
48
+ }
49
+ [data-testid="stSidebar"] .stRadio label {
50
+ color: #8892b0 !important;
51
+ font-size: 0.9rem;
52
+ }
53
+ .metric-card {
54
+ background: linear-gradient(135deg, #0d1117 0%, #161b27 100%);
55
+ border: 1px solid #1e2d4a;
56
+ border-radius: 12px;
57
+ padding: 20px 24px;
58
+ text-align: center;
59
+ transition: border-color 0.2s;
60
+ }
61
+ .metric-card:hover { border-color: #3d5a99; }
62
+ .metric-value { font-size: 2rem; font-weight: 700; color: #64b5f6; line-height: 1.1; }
63
+ .metric-label {
64
+ font-size: 0.78rem; color: #8892b0;
65
+ text-transform: uppercase; letter-spacing: 1px; margin-top: 6px;
66
+ }
67
+ .metric-delta { font-size: 0.82rem; margin-top: 4px; }
68
+ .delta-pos { color: #66bb6a; }
69
+ .delta-neg { color: #ef5350; }
70
+ .delta-neu { color: #8892b0; }
71
+ .section-header {
72
+ font-size: 1.4rem; font-weight: 700; color: #e8eaf6;
73
+ border-left: 4px solid #3d5a99; padding-left: 12px;
74
+ margin: 28px 0 16px 0;
75
+ }
76
+ .subsection { font-size: 1rem; font-weight: 600; color: #8892b0; margin: 16px 0 8px 0; }
77
+ .hero-title {
78
+ font-size: 2.8rem; font-weight: 800;
79
+ background: linear-gradient(90deg, #64b5f6, #7c4dff, #64b5f6);
80
+ background-size: 200%;
81
+ -webkit-background-clip: text; -webkit-text-fill-color: transparent;
82
+ line-height: 1.2;
83
+ }
84
+ .hero-sub {
85
+ font-size: 1.1rem; color: #8892b0; margin-top: 8px;
86
+ max-width: 680px; line-height: 1.6;
87
+ }
88
+ .pipeline-step {
89
+ background: #0d1117; border: 1px solid #1e2433;
90
+ border-radius: 10px; padding: 14px 16px; text-align: center;
91
+ }
92
+ .pipeline-icon { font-size: 1.6rem; }
93
+ .pipeline-label { font-size: 0.78rem; color: #8892b0; margin-top: 4px; }
94
+ .pipeline-title { font-size: 0.9rem; font-weight: 600; color: #cfd8dc; }
95
+ .insight-box {
96
+ background: #0d1117; border-left: 3px solid #3d5a99;
97
+ border-radius: 0 8px 8px 0; padding: 12px 16px; margin: 8px 0;
98
+ font-size: 0.88rem; color: #b0bec5; line-height: 1.6;
99
+ }
100
+ .insight-box strong { color: #64b5f6; }
101
+ .badge {
102
+ display: inline-block; padding: 2px 10px; border-radius: 20px;
103
+ font-size: 0.72rem; font-weight: 600; margin: 2px;
104
+ }
105
+ .badge-blue { background: #1a237e22; color: #64b5f6; border: 1px solid #1a237e; }
106
+ .badge-green { background: #1b5e2022; color: #66bb6a; border: 1px solid #1b5e20; }
107
+ .badge-red { background: #b71c1c22; color: #ef9a9a; border: 1px solid #b71c1c; }
108
+ hr { border-color: #1e2433 !important; }
109
+ ::-webkit-scrollbar { width: 6px; }
110
+ ::-webkit-scrollbar-track { background: #0a0e1a; }
111
+ ::-webkit-scrollbar-thumb { background: #1e2d4a; border-radius: 3px; }
112
+ </style>
113
+ """, unsafe_allow_html=True)
114
+
115
+
116
+ # ── Layout helper avoids duplicate xaxis/yaxis conflicts ────────────────────
117
+
118
+ BASE_LAYOUT = dict(
119
+ paper_bgcolor="#0d1117",
120
+ plot_bgcolor="#0a0e1a",
121
+ font=dict(color="#b0bec5", family="Inter, sans-serif"),
122
+ margin=dict(l=50, r=30, t=50, b=50),
123
+ colorway=["#64b5f6","#66bb6a","#ffa726","#ef5350","#ab47bc","#26c6da"],
124
+ )
125
+ BASE_XAXIS = dict(gridcolor="#1a2035", linecolor="#1e2433", zerolinecolor="#1e2433")
126
+ BASE_YAXIS = dict(gridcolor="#1a2035", linecolor="#1e2433", zerolinecolor="#1e2433")
127
+
128
+ def L(**kwargs):
129
+ """
130
+ Merge base dark-theme layout with chart-specific overrides.
131
+ Merges xaxis/yaxis dicts instead of replacing them, which avoids
132
+ the 'multiple values for keyword argument xaxis' TypeError.
133
+ """
134
+ out = dict(**BASE_LAYOUT)
135
+ if "xaxis" in kwargs:
136
+ out["xaxis"] = {**BASE_XAXIS, **kwargs.pop("xaxis")}
137
+ else:
138
+ out["xaxis"] = BASE_XAXIS
139
+ if "yaxis" in kwargs:
140
+ out["yaxis"] = {**BASE_YAXIS, **kwargs.pop("yaxis")}
141
+ else:
142
+ out["yaxis"] = BASE_YAXIS
143
+ out.update(kwargs)
144
+ return out
145
+
146
+
147
+ # ── Global helpers ─────────────────────────────────────────────────────────────
148
+
149
+ def metric_card(col, value, label, delta="", delta_type="neu"):
150
+ """Render a dark-theme KPI card inside a Streamlit column."""
151
+ col.markdown(f"""
152
+ <div class='metric-card'>
153
+ <div class='metric-value'>{value}</div>
154
+ <div class='metric-label'>{label}</div>
155
+ <div class='metric-delta delta-{delta_type}'>{delta}</div>
156
+ </div>""", unsafe_allow_html=True)
157
+
158
+
159
+ # ── Data loaders ───────────────────────────────────────────────────────────────
160
+
161
+ @st.cache_data(show_spinner=False)
162
+ def load_feature_matrix():
163
+ p = PROCESSED_DIR / "feature_matrix.parquet"
164
+ return pd.read_parquet(p) if p.exists() else pd.DataFrame()
165
+
166
+ @st.cache_data(show_spinner=False)
167
+ def load_model_results():
168
+ p = EXPERIMENTS_DIR / "model_results.csv"
169
+ return pd.read_csv(p) if p.exists() else pd.DataFrame()
170
+
171
+ @st.cache_data(show_spinner=False)
172
+ def load_backtest():
173
+ p = EXPERIMENTS_DIR / "backtest_results.csv"
174
+ return pd.read_csv(p) if p.exists() else pd.DataFrame()
175
+
176
+ @st.cache_data(show_spinner=False)
177
+ def load_shap():
178
+ p = EXPERIMENTS_DIR / "shap_values.parquet"
179
+ return pd.read_parquet(p) if p.exists() else pd.DataFrame()
180
+
181
+
182
+ # ── Sidebar ────────────────────────────────────────────────────────────────────
183
+
184
+ with st.sidebar:
185
+ st.markdown("""
186
+ <div style='padding:12px 0 20px 0;'>
187
+ <div style='font-size:1.5rem;font-weight:800;color:#64b5f6;'>📈 FinSight</div>
188
+ <div style='font-size:0.75rem;color:#8892b0;margin-top:4px;'>
189
+ LLM-Powered Earnings Intelligence
190
+ </div>
191
+ </div>
192
+ """, unsafe_allow_html=True)
193
+
194
+ page = st.radio(
195
+ "Navigation",
196
+ ["Overview",
197
+ "Model Performance",
198
+ "Feature Importance",
199
+ "Backtest Results",
200
+ "Transcript Explorer"],
201
+ label_visibility="collapsed",
202
+ )
203
+
204
+ st.markdown("<hr>", unsafe_allow_html=True)
205
+ st.markdown("""
206
+ <div style='font-size:0.72rem;color:#8892b0;line-height:1.8;'>
207
+ <b style='color:#cfd8dc;'>Stack</b><br>
208
+ FinBERT · ChromaDB · XGBoost<br>
209
+ LightGBM · SHAP · Streamlit<br><br>
210
+ <b style='color:#cfd8dc;'>Data</b><br>
211
+ 14,584 earnings transcripts<br>
212
+ 601 S&amp;P 500 companies<br>
213
+ 2018 2024<br><br>
214
+ <b style='color:#cfd8dc;'>Author</b><br>
215
+ Rajveer Singh Pall
216
+ </div>
217
+ """, unsafe_allow_html=True)
218
+
219
+
220
+ # ═══════════════════════════════════════════════════════════════════════════════
221
+ # PAGE 1 OVERVIEW
222
+ # ═══════════════════════════════════════════════════════════════════════════════
223
+
224
+ if page == "Overview":
225
+ fm = load_feature_matrix()
226
+ mr = load_model_results()
227
+
228
+ st.markdown("""
229
+ <div style='padding:24px 0 8px 0;'>
230
+ <div class='hero-title'>FinSight</div>
231
+ <div class='hero-title' style='font-size:1.8rem;color:#7c4dff;'>
232
+ Earnings Intelligence System
233
+ </div>
234
+ <div class='hero-sub'>
235
+ An end-to-end machine learning pipeline that extracts alpha signals
236
+ from S&amp;P 500 earnings call transcripts using FinBERT sentiment analysis,
237
+ RAG-based structured feature extraction, and walk-forward validated
238
+ gradient boosting models.
239
+ </div>
240
+ </div>
241
+ """, unsafe_allow_html=True)
242
+
243
+ st.markdown("<hr>", unsafe_allow_html=True)
244
+
245
+ best_ic = float(mr["ic"].max()) if not mr.empty else 0.0198
246
+ best_auc = float(mr["auc"].max()) if not mr.empty else 0.5201
247
+ best_hr = float(mr["hit_rate"].max()) if not mr.empty else 0.5427
248
+ n_rows = len(fm) if not fm.empty else 13442
249
+
250
+ c1,c2,c3,c4,c5 = st.columns(5)
251
+ metric_card(c1, "14,584", "Transcripts", "601 companies", "neu")
252
+ metric_card(c2, f"{n_rows:,}", "Training Samples", "2018–2024", "neu")
253
+ metric_card(c3, f"{best_ic:.4f}", "Best IC", "LightGBM", "pos")
254
+ metric_card(c4, f"{best_auc:.4f}","Best AUC", "XGBoost 2024", "pos")
255
+ metric_card(c5, f"{best_hr:.4f}", "Best Hit Rate", "Walk-forward", "pos")
256
+
257
+ st.markdown("<br>", unsafe_allow_html=True)
258
+
259
+ # Pipeline
260
+ st.markdown("<div class='section-header'>System Architecture</div>",
261
+ unsafe_allow_html=True)
262
+ steps = [
263
+ ("🗄️","Stage 1","Data Ingestion", "SEC EDGAR · yfinance\n14,584 transcripts"),
264
+ ("🧠","Stage 2","NLP Pipeline", "FinBERT · ChromaDB RAG\n34 features"),
265
+ ("🤖","Stage 3","ML Models", "XGBoost · LightGBM\nWalk-forward CV"),
266
+ ("📉","Stage 4","Backtesting", "Long-short strategy\n10bps TC"),
267
+ ("🖥️","Stage 5","Dashboard", "Streamlit · Plotly\nHugging Face Spaces"),
268
+ ]
269
+ cols = st.columns(len(steps))
270
+ for col, (icon, stage, title, desc) in zip(cols, steps):
271
+ col.markdown(f"""
272
+ <div class='pipeline-step'>
273
+ <div class='pipeline-icon'>{icon}</div>
274
+ <div class='pipeline-label'>{stage}</div>
275
+ <div class='pipeline-title'>{title}</div>
276
+ <div style='font-size:0.72rem;color:#546e7a;margin-top:4px;line-height:1.5;'>
277
+ {desc}</div>
278
+ </div>""", unsafe_allow_html=True)
279
+
280
+ st.markdown("<br>", unsafe_allow_html=True)
281
+
282
+ left, right = st.columns([1.1, 1])
283
+
284
+ with left:
285
+ st.markdown("<div class='section-header'>Key Findings</div>",
286
+ unsafe_allow_html=True)
287
+ for f in [
288
+ "<strong>Analyst negativity &gt; management positivity.</strong> "
289
+ "qa_neg_ratio (SHAP=0.054) is the single strongest feature. "
290
+ "Analyst pushback in Q&amp;A contains more information than prepared remarks.",
291
+
292
+ "<strong>NLP reduces prediction variance by 87%.</strong> "
293
+ "Baseline IC std=0.114 vs LightGBM std=0.009 "
294
+ "far more consistent across years.",
295
+
296
+ "<strong>Consistent with weak-form EMH.</strong> "
297
+ "Positive IC (0.0198) exists but cannot overcome 10bps transaction "
298
+ "costs at a 5-day holding period.",
299
+
300
+ "<strong>RAG guidance relevance is top-5.</strong> "
301
+ "Semantic relevance of the guidance section not just its content "
302
+ "carries significant predictive signal.",
303
+ ]:
304
+ st.markdown(f"<div class='insight-box'>{f}</div>",
305
+ unsafe_allow_html=True)
306
+
307
+ with right:
308
+ st.markdown("<div class='section-header'>Dataset Coverage</div>",
309
+ unsafe_allow_html=True)
310
+ if not fm.empty:
311
+ yr = fm.groupby("year").size().reset_index(name="count")
312
+ fig = go.Figure(go.Bar(
313
+ x=yr["year"].astype(str),
314
+ y=yr["count"],
315
+ marker=dict(color=yr["count"],
316
+ colorscale=[[0,"#1a237e"],[1,"#64b5f6"]],
317
+ showscale=False),
318
+ text=yr["count"], textposition="outside",
319
+ textfont=dict(size=11),
320
+ ))
321
+ fig.update_layout(**L(title="Transcript Count by Year", height=300,
322
+ showlegend=False,
323
+ xaxis=dict(title="Year"),
324
+ yaxis=dict(title="Transcripts")))
325
+ st.plotly_chart(fig, use_container_width=True)
326
+
327
+ # Sentiment heatmap
328
+ if not fm.empty and "mgmt_net_sentiment" in fm.columns:
329
+ st.markdown("<div class='section-header'>Sentiment Landscape</div>",
330
+ unsafe_allow_html=True)
331
+ heat = (fm.groupby(["ticker","year"])["mgmt_net_sentiment"]
332
+ .mean().reset_index())
333
+ top_t = fm["ticker"].value_counts().head(30).index
334
+ heat = heat[heat["ticker"].isin(top_t)]
335
+ pivot = heat.pivot(index="ticker", columns="year",
336
+ values="mgmt_net_sentiment")
337
+ fig2 = go.Figure(go.Heatmap(
338
+ z=pivot.values,
339
+ x=[str(c) for c in pivot.columns],
340
+ y=pivot.index,
341
+ colorscale=[[0,"#b71c1c"],[0.35,"#e53935"],
342
+ [0.5,"#263238"],[0.65,"#1565c0"],[1,"#64b5f6"]],
343
+ zmid=0,
344
+ colorbar=dict(title="Net Sentiment", tickfont=dict(size=10)),
345
+ hovertemplate="Ticker: %{y}<br>Year: %{x}<br>Sentiment: %{z:.3f}<extra></extra>",
346
+ ))
347
+ fig2.update_layout(**L(
348
+ title="Management Net Sentiment Top 30 Tickers × Year",
349
+ height=500,
350
+ xaxis=dict(title="Year"),
351
+ yaxis=dict(title=""),
352
+ ))
353
+ st.plotly_chart(fig2, use_container_width=True)
354
+
355
+
356
+ # ═══════════════════════════════════════════════════════════════════════════════
357
+ # PAGE 2 MODEL PERFORMANCE
358
+ # ═══════════════════════════════════════════════════════════════════════════════
359
+
360
+ elif page == "Model Performance":
361
+ mr = load_model_results()
362
+
363
+ st.markdown("<div class='hero-title' style='font-size:2rem;'>Model Performance</div>",
364
+ unsafe_allow_html=True)
365
+ st.markdown("<div class='hero-sub'>Walk-forward validation (2021–2024). "
366
+ "Train on 3 prior years, test on held-out year. Zero data leakage.</div>",
367
+ unsafe_allow_html=True)
368
+ st.markdown("<hr>", unsafe_allow_html=True)
369
+
370
+ if mr.empty:
371
+ st.error("model_results.csv not found. Run Stage 3 first.")
372
+ st.stop()
373
+
374
+ summary = (
375
+ mr.groupby("model")[["ic","hit_rate","auc"]]
376
+ .agg({"ic":["mean","std"],"hit_rate":["mean","std"],"auc":["mean","std"]})
377
+ .round(4)
378
+ )
379
+ summary.columns = ["IC Mean","IC Std","Hit Rate Mean","Hit Rate Std",
380
+ "AUC Mean","AUC Std"]
381
+ summary = summary.sort_values("IC Mean", ascending=False)
382
+
383
+ st.markdown("<div class='section-header'>Model Comparison</div>",
384
+ unsafe_allow_html=True)
385
+
386
+ def color_ic(val):
387
+ if isinstance(val, float):
388
+ if val > 0.015: return "color: #66bb6a; font-weight:600"
389
+ if val < 0: return "color: #ef5350"
390
+ return ""
391
+
392
+ st.dataframe(
393
+ summary.style.applymap(color_ic, subset=["IC Mean"]).format("{:.4f}"),
394
+ use_container_width=True, height=220,
395
+ )
396
+
397
+ st.markdown("<br>", unsafe_allow_html=True)
398
+ st.markdown("<div class='section-header'>Information Coefficient by Year</div>",
399
+ unsafe_allow_html=True)
400
+
401
+ MODEL_COLORS = {
402
+ "Baseline": "#ffa726",
403
+ "FinBERT_only": "#26c6da",
404
+ "RAG_only": "#ab47bc",
405
+ "XGBoost_all": "#ef5350",
406
+ "LightGBM_all": "#66bb6a",
407
+ }
408
+
409
+ fig = go.Figure()
410
+ for m in mr["model"].unique():
411
+ sub = mr[mr["model"]==m].sort_values("test_year")
412
+ fig.add_trace(go.Scatter(
413
+ x=sub["test_year"].astype(int),
414
+ y=sub["ic"],
415
+ mode="lines+markers", name=m,
416
+ line=dict(color=MODEL_COLORS.get(m,"#64b5f6"), width=2.5),
417
+ marker=dict(size=9),
418
+ hovertemplate=f"<b>{m}</b><br>Year: %{{x}}<br>IC: %{{y:.4f}}<extra></extra>",
419
+ ))
420
+ fig.add_hline(y=0, line_dash="dash", line_color="#546e7a", line_width=1.2)
421
+ fig.update_layout(**L(
422
+ title="Walk-Forward IC Positive = Predictive",
423
+ height=380,
424
+ xaxis=dict(tickvals=[2021,2022,2023,2024], title="Year"),
425
+ yaxis=dict(title="Information Coefficient"),
426
+ legend=dict(bgcolor="#0d1117", bordercolor="#1e2433", borderwidth=1),
427
+ ))
428
+ st.plotly_chart(fig, use_container_width=True)
429
+
430
+ col1, col2 = st.columns(2)
431
+
432
+ with col1:
433
+ st.markdown("<div class='subsection'>Hit Rate by Year</div>",
434
+ unsafe_allow_html=True)
435
+ fig2 = go.Figure()
436
+ for m in mr["model"].unique():
437
+ sub = mr[mr["model"]==m].sort_values("test_year")
438
+ fig2.add_trace(go.Scatter(
439
+ x=sub["test_year"].astype(int), y=sub["hit_rate"],
440
+ mode="lines+markers", name=m,
441
+ line=dict(color=MODEL_COLORS.get(m,"#64b5f6"), width=2),
442
+ marker=dict(size=7), showlegend=False,
443
+ ))
444
+ fig2.add_hline(y=0.5, line_dash="dot", line_color="#546e7a", line_width=1)
445
+ fig2.update_layout(**L(
446
+ height=300, title="Hit Rate (>0.5 = better than coin flip)",
447
+ xaxis=dict(tickvals=[2021,2022,2023,2024]),
448
+ yaxis=dict(title="Hit Rate"),
449
+ ))
450
+ st.plotly_chart(fig2, use_container_width=True)
451
+
452
+ with col2:
453
+ st.markdown("<div class='subsection'>AUC by Year</div>",
454
+ unsafe_allow_html=True)
455
+ fig3 = go.Figure()
456
+ for m in mr["model"].unique():
457
+ sub = mr[mr["model"]==m].sort_values("test_year")
458
+ fig3.add_trace(go.Scatter(
459
+ x=sub["test_year"].astype(int), y=sub["auc"],
460
+ mode="lines+markers", name=m,
461
+ line=dict(color=MODEL_COLORS.get(m,"#64b5f6"), width=2),
462
+ marker=dict(size=7), showlegend=False,
463
+ ))
464
+ fig3.add_hline(y=0.5, line_dash="dot", line_color="#546e7a", line_width=1)
465
+ fig3.update_layout(**L(
466
+ height=300, title="AUC-ROC (>0.5 = better than random)",
467
+ xaxis=dict(tickvals=[2021,2022,2023,2024]),
468
+ yaxis=dict(title="AUC"),
469
+ ))
470
+ st.plotly_chart(fig3, use_container_width=True)
471
+
472
+ st.markdown("<div class='section-header'>Stability Analysis IC Variance</div>",
473
+ unsafe_allow_html=True)
474
+ ic_std = mr.groupby("model")["ic"].std().sort_values()
475
+ ic_mean = mr.groupby("model")["ic"].mean()
476
+ bar_colors = ["#66bb6a" if ic_mean[m] > 0 else "#ef5350" for m in ic_std.index]
477
+
478
+ fig4 = go.Figure(go.Bar(
479
+ y=ic_std.index, x=ic_std.values, orientation="h",
480
+ marker_color=bar_colors,
481
+ text=[f"σ={v:.4f}" for v in ic_std.values],
482
+ textposition="outside", textfont=dict(size=11),
483
+ ))
484
+ fig4.update_layout(**L(
485
+ title="IC Standard Deviation Lower = More Consistent",
486
+ height=280,
487
+ xaxis=dict(title="IC Std Dev"),
488
+ yaxis=dict(title=""),
489
+ ))
490
+ st.plotly_chart(fig4, use_container_width=True)
491
+
492
+ st.markdown("""
493
+ <div class='insight-box'>
494
+ <strong>Interpretation:</strong> The Baseline's high IC mean (0.043) is
495
+ misleading its std of 0.114 shows extreme instability driven by lucky
496
+ quarters. LightGBM achieves IC=0.020 with std=0.009, making it
497
+ <strong>10× more stable</strong>. In live trading, consistency matters
498
+ far more than occasional lucky peaks.
499
+ </div>
500
+ """, unsafe_allow_html=True)
501
+
502
+
503
+ # ═══════════════════════════════════════════════════════════════════════════════
504
+ # PAGE 3 FEATURE IMPORTANCE
505
+ # ═══════════════════════════════════════════════════════════════════════════════
506
+
507
+ elif page == "Feature Importance":
508
+ shap_df = load_shap()
509
+ fm = load_feature_matrix()
510
+
511
+ st.markdown("<div class='hero-title' style='font-size:2rem;'>Feature Importance</div>",
512
+ unsafe_allow_html=True)
513
+ st.markdown("<div class='hero-sub'>SHAP values computed on LightGBM (best model). "
514
+ "Shows which features actually drive predictions.</div>",
515
+ unsafe_allow_html=True)
516
+ st.markdown("<hr>", unsafe_allow_html=True)
517
+
518
+ if shap_df.empty:
519
+ st.error("shap_values.parquet not found. Run run_shap.py first.")
520
+ st.stop()
521
+
522
+ mean_shap = shap_df.abs().mean().sort_values(ascending=False)
523
+
524
+ def feat_color(name):
525
+ if name.startswith("rag_"): return "#64b5f6"
526
+ if name.startswith("mgmt_"): return "#66bb6a"
527
+ if name.startswith("qa_"): return "#ffa726"
528
+ return "#ab47bc"
529
+
530
+ def feat_group(name):
531
+ if name.startswith("rag_"): return "RAG Features"
532
+ if name.startswith("mgmt_"): return "Management FinBERT"
533
+ if name.startswith("qa_"): return "QA FinBERT"
534
+ return "Other"
535
+
536
+ st.markdown("<div class='section-header'>Top 20 Features by Mean |SHAP|</div>",
537
+ unsafe_allow_html=True)
538
+ top20 = mean_shap.head(20)[::-1]
539
+ fig = go.Figure(go.Bar(
540
+ y=top20.index, x=top20.values, orientation="h",
541
+ marker_color=[feat_color(n) for n in top20.index],
542
+ text=[f"{v:.4f}" for v in top20.values],
543
+ textposition="outside", textfont=dict(size=10),
544
+ hovertemplate="<b>%{y}</b><br>Mean |SHAP|: %{x:.4f}<extra></extra>",
545
+ ))
546
+ fig.update_layout(**L(
547
+ height=520,
548
+ title="Feature Importance 🔵 RAG | 🟢 Mgmt FinBERT | 🟠 QA FinBERT",
549
+ xaxis=dict(title="Mean |SHAP Value|"),
550
+ yaxis=dict(title=""),
551
+ ))
552
+ st.plotly_chart(fig, use_container_width=True)
553
+
554
+ col1, col2 = st.columns(2)
555
+
556
+ with col1:
557
+ st.markdown("<div class='section-header'>Importance by Feature Group</div>",
558
+ unsafe_allow_html=True)
559
+ gs = (mean_shap.reset_index()
560
+ .rename(columns={"index":"feature", 0:"shap"}))
561
+ gs.columns = ["feature","shap"]
562
+ gs["group"] = gs["feature"].apply(feat_group)
563
+ gt = gs.groupby("group")["shap"].sum()
564
+
565
+ fig2 = go.Figure(go.Pie(
566
+ labels=gt.index, values=gt.values, hole=0.55,
567
+ marker=dict(colors=["#64b5f6","#66bb6a","#ffa726","#ab47bc"]),
568
+ textinfo="label+percent", textfont=dict(size=12),
569
+ hovertemplate="<b>%{label}</b><br>Total SHAP: %{value:.4f}<br>%{percent}<extra></extra>",
570
+ ))
571
+ fig2.update_layout(**L(
572
+ height=320, showlegend=False,
573
+ annotations=[dict(text="SHAP<br>Groups", x=0.5, y=0.5,
574
+ font_size=13, showarrow=False,
575
+ font_color="#b0bec5")],
576
+ ))
577
+ st.plotly_chart(fig2, use_container_width=True)
578
+
579
+ with col2:
580
+ st.markdown("<div class='section-header'>SHAP vs Correlation with Target</div>",
581
+ unsafe_allow_html=True)
582
+ if not fm.empty and "target_5d_up" in fm.columns:
583
+ feat_cols = [c for c in shap_df.columns if c in fm.columns]
584
+ corrs = fm[feat_cols+["target_5d_up"]].corr()["target_5d_up"].drop("target_5d_up")
585
+ cdf = pd.DataFrame({
586
+ "feature": corrs.index,
587
+ "shap": mean_shap.reindex(corrs.index).fillna(0).values,
588
+ "corr": corrs.values,
589
+ "group": [feat_group(f) for f in corrs.index],
590
+ })
591
+ cmap = {
592
+ "RAG Features": "#64b5f6",
593
+ "Management FinBERT": "#66bb6a",
594
+ "QA FinBERT": "#ffa726",
595
+ "Other": "#ab47bc",
596
+ }
597
+ fig3 = px.scatter(
598
+ cdf, x="corr", y="shap", color="group",
599
+ color_discrete_map=cmap, hover_data=["feature"],
600
+ labels={"corr":"Pearson Corr with Target",
601
+ "shap":"Mean |SHAP Value|"},
602
+ height=320,
603
+ )
604
+ fig3.add_vline(x=0, line_dash="dash", line_color="#546e7a")
605
+ fig3.update_layout(**L(
606
+ title="SHAP Importance vs Linear Correlation",
607
+ showlegend=False,
608
+ ))
609
+ st.plotly_chart(fig3, use_container_width=True)
610
+
611
+ st.markdown("<div class='section-header'>Feature Insights</div>",
612
+ unsafe_allow_html=True)
613
+ insights = [
614
+ ("🏆 #1 qa_neg_ratio",
615
+ "Proportion of negative sentences in analyst Q&amp;A. When analysts "
616
+ "push back hard, it signals market-moving information that management "
617
+ "tried to downplay."),
618
+ ("📊 #2 mgmt_sent_vol",
619
+ "Volatility of management's sentence-level sentiment. Inconsistent "
620
+ "messaging mixing optimism with caution often precedes larger "
621
+ "price moves."),
622
+ ("📝 #3 qa_n_sentences",
623
+ "Length of the Q&amp;A section. Longer Q&amp;A sessions indicate "
624
+ "more analyst scrutiny, which correlates with uncertainty about "
625
+ "the quarter's results."),
626
+ ("😶 #4 mgmt_mean_neu",
627
+ "Neutral sentiment ratio in management remarks. Deliberately neutral "
628
+ "language can mask very good or very bad news a hedging signal."),
629
+ ("🎯 #5 rag_guidance_relevance",
630
+ "Semantic similarity of the guidance section to specific numerical "
631
+ "guidance queries. More relevant guidance sections contain concrete "
632
+ "targets that markets react to more strongly."),
633
+ ]
634
+ cols = st.columns(len(insights))
635
+ for col, (title, body) in zip(cols, insights):
636
+ col.markdown(f"""
637
+ <div class='pipeline-step' style='text-align:left;height:190px;'>
638
+ <div style='font-size:0.82rem;font-weight:700;color:#64b5f6;
639
+ margin-bottom:8px;'>{title}</div>
640
+ <div style='font-size:0.76rem;color:#8892b0;line-height:1.6;'>
641
+ {body}</div>
642
+ </div>""", unsafe_allow_html=True)
643
+
644
+
645
+ # ═══════════════════════════════════════════════════════════════════════════════
646
+ # PAGE 4 BACKTEST
647
+ # ═══════════════════════════════════════════════════════════════════════════════
648
+
649
+ elif page == "Backtest Results":
650
+ bt = load_backtest()
651
+
652
+ st.markdown("<div class='hero-title' style='font-size:2rem;'>Backtest Results</div>",
653
+ unsafe_allow_html=True)
654
+ st.markdown("<div class='hero-sub'>Long-short quartile portfolio. "
655
+ "Long top-25% predicted stocks, short bottom-25%. "
656
+ "5-day holding period. 10bps round-trip transaction cost.</div>",
657
+ unsafe_allow_html=True)
658
+ st.markdown("<hr>", unsafe_allow_html=True)
659
+
660
+ if bt.empty:
661
+ st.error("backtest_results.csv not found. Run Stage 4 first.")
662
+ st.stop()
663
+
664
+ bt = bt.sort_values(["year","quarter"]).reset_index(drop=True)
665
+ bt["period"] = bt["year"].astype(str) + "-Q" + bt["quarter"].astype(str)
666
+ rets = bt["net_ret"]
667
+ cum = (1 + rets).cumprod()
668
+ peak = cum.cummax()
669
+ dd = (cum - peak) / peak
670
+
671
+ n_yrs = len(bt) / 4
672
+ ann_ret = float((1 + rets).prod() ** (1/n_yrs) - 1)
673
+ ann_vol = float(rets.std() * np.sqrt(4))
674
+ sharpe = ann_ret / ann_vol if ann_vol != 0 else 0.0
675
+ max_dd = float(dd.min())
676
+ hit = float((rets > 0).mean())
677
+
678
+ c1,c2,c3,c4,c5 = st.columns(5)
679
+ metric_card(c1, f"{ann_ret*100:.2f}%", "Ann. Return",
680
+ "After TC", "pos" if ann_ret > 0 else "neg")
681
+ metric_card(c2, f"{sharpe:.3f}", "Sharpe Ratio",
682
+ ">1.0 = excellent", "pos" if sharpe > 0 else "neg")
683
+ metric_card(c3, f"{max_dd*100:.2f}%", "Max Drawdown",
684
+ "Peak-to-trough", "neg")
685
+ metric_card(c4, f"{hit*100:.0f}%", "Win Rate",
686
+ "Profitable quarters", "pos" if hit > 0.5 else "neg")
687
+ metric_card(c5, str(len(bt)), "Quarters Tested",
688
+ "2021–2024", "neu")
689
+
690
+ st.markdown("<br>", unsafe_allow_html=True)
691
+ st.markdown("<div class='section-header'>Equity Curve</div>",
692
+ unsafe_allow_html=True)
693
+
694
+ fig = make_subplots(rows=2, cols=1, row_heights=[0.72,0.28],
695
+ shared_xaxes=True, vertical_spacing=0.04)
696
+ fig.add_trace(go.Scatter(
697
+ x=bt["period"], y=cum.values,
698
+ mode="lines+markers",
699
+ line=dict(color="#64b5f6", width=2.5),
700
+ marker=dict(size=7),
701
+ fill="tozeroy", fillcolor="rgba(100,181,246,0.06)",
702
+ name="Cumulative Return",
703
+ hovertemplate="<b>%{x}</b><br>Cumulative: %{y:.4f}<extra></extra>",
704
+ ), row=1, col=1)
705
+ fig.add_hline(y=1.0, line_dash="dash", line_color="#546e7a",
706
+ line_width=1, row=1, col=1)
707
+ fig.add_trace(go.Bar(
708
+ x=bt["period"], y=dd.values*100,
709
+ marker_color="#ef5350", opacity=0.7, name="Drawdown %",
710
+ hovertemplate="<b>%{x}</b><br>Drawdown: %{y:.2f}%<extra></extra>",
711
+ ), row=2, col=1)
712
+
713
+ fig.update_layout(
714
+ paper_bgcolor="#0d1117", plot_bgcolor="#0a0e1a",
715
+ font=dict(color="#b0bec5"),
716
+ margin=dict(l=50,r=30,t=50,b=80),
717
+ title="FinSight Long-Short Strategy 2021 to 2024",
718
+ height=500, showlegend=False,
719
+ xaxis2=dict(tickangle=45, tickfont_size=10,
720
+ gridcolor="#1a2035", linecolor="#1e2433"),
721
+ yaxis=dict(title="Cumulative Return",
722
+ gridcolor="#1a2035", linecolor="#1e2433"),
723
+ yaxis2=dict(title="DD %",
724
+ gridcolor="#1a2035", linecolor="#1e2433"),
725
+ )
726
+ fig.update_xaxes(gridcolor="#1a2035", linecolor="#1e2433")
727
+ st.plotly_chart(fig, use_container_width=True)
728
+
729
+ col1, col2 = st.columns(2)
730
+
731
+ with col1:
732
+ st.markdown("<div class='subsection'>Quarterly Net Returns</div>",
733
+ unsafe_allow_html=True)
734
+ q_colors = ["#66bb6a" if r > 0 else "#ef5350" for r in rets]
735
+ fig2 = go.Figure(go.Bar(
736
+ x=bt["period"], y=rets.values*100,
737
+ marker_color=q_colors,
738
+ text=[f"{v*100:.2f}%" for v in rets.values],
739
+ textposition="outside", textfont=dict(size=9),
740
+ hovertemplate="<b>%{x}</b><br>Net Return: %{y:.2f}%<extra></extra>",
741
+ ))
742
+ fig2.add_hline(y=0, line_color="#546e7a", line_width=1)
743
+ fig2.update_layout(**L(
744
+ height=320,
745
+ title="Net Return per Quarter (after 10bps TC)",
746
+ xaxis=dict(tickangle=45, tickfont=dict(size=9)),
747
+ yaxis=dict(title="Net Return (%)"),
748
+ ))
749
+ st.plotly_chart(fig2, use_container_width=True)
750
+
751
+ with col2:
752
+ st.markdown("<div class='subsection'>Long vs Short Leg Hit Rate</div>",
753
+ unsafe_allow_html=True)
754
+ fig3 = go.Figure()
755
+ fig3.add_trace(go.Scatter(
756
+ x=bt["period"], y=bt["long_hit"],
757
+ mode="lines+markers",
758
+ line=dict(color="#66bb6a", width=2),
759
+ marker=dict(size=7), name="Long Leg",
760
+ ))
761
+ fig3.add_trace(go.Scatter(
762
+ x=bt["period"], y=bt["short_hit"],
763
+ mode="lines+markers",
764
+ line=dict(color="#ef5350", width=2),
765
+ marker=dict(size=7), name="Short Leg",
766
+ ))
767
+ fig3.add_hline(y=0.5, line_dash="dot", line_color="#546e7a")
768
+ fig3.update_layout(**L(
769
+ height=320,
770
+ title="Direction Accuracy Long &amp; Short Legs",
771
+ xaxis=dict(tickangle=45, tickfont=dict(size=9)),
772
+ yaxis=dict(title="Hit Rate"),
773
+ legend=dict(bgcolor="#0d1117", bordercolor="#1e2433"),
774
+ ))
775
+ st.plotly_chart(fig3, use_container_width=True)
776
+
777
+ st.markdown("<div class='section-header'>Quarterly Breakdown</div>",
778
+ unsafe_allow_html=True)
779
+ disp = bt[["period","net_ret","long_ret","short_ret",
780
+ "long_hit","short_hit","n_stocks","q_size"]].copy()
781
+ disp.columns = ["Quarter","Net Ret","Long Ret","Short Ret",
782
+ "Long Hit","Short Hit","N Stocks","Leg Size"]
783
+
784
+ def color_ret(val):
785
+ if isinstance(val, float):
786
+ if val > 0: return "color: #66bb6a"
787
+ if val < 0: return "color: #ef5350"
788
+ return ""
789
+
790
+ st.dataframe(
791
+ disp.style.applymap(color_ret,
792
+ subset=["Net Ret","Long Ret","Short Ret"])
793
+ .format({c:"{:.4f}" for c in
794
+ ["Net Ret","Long Ret","Short Ret",
795
+ "Long Hit","Short Hit"]}),
796
+ use_container_width=True, hide_index=True,
797
+ )
798
+
799
+ st.markdown("""
800
+ <div class='insight-box'>
801
+ <strong>Context:</strong> A Sharpe of -0.81 with a 5-day holding period
802
+ is consistent with academic literature on post-earnings announcement
803
+ drift (Chan et al. 1996, Lerman et al. 2008). The signal exists
804
+ (IC=0.0198) but is too weak to survive round-trip transaction costs at
805
+ this frequency. Extending to 20-day holding periods is the natural
806
+ next step.
807
+ </div>
808
+ """, unsafe_allow_html=True)
809
+
810
+
811
+ # ═══════════════════════════════════════════════════════════════════════════════
812
+ # PAGE 5 TRANSCRIPT EXPLORER
813
+ # ═══════════════════════════════════════════════════════════════════════════════
814
+
815
+ elif page == "Transcript Explorer":
816
+ fm = load_feature_matrix()
817
+
818
+ st.markdown("<div class='hero-title' style='font-size:2rem;'>Transcript Explorer</div>",
819
+ unsafe_allow_html=True)
820
+ st.markdown("<div class='hero-sub'>Browse sentiment profiles for any company "
821
+ "and quarter in the dataset.</div>",
822
+ unsafe_allow_html=True)
823
+ st.markdown("<hr>", unsafe_allow_html=True)
824
+
825
+ if fm.empty:
826
+ st.error("Feature matrix not found.")
827
+ st.stop()
828
+
829
+ col1, col2, col3 = st.columns([2,1,1])
830
+ with col1:
831
+ all_tickers = sorted(fm["ticker"].dropna().unique())
832
+ default_idx = all_tickers.index("AAPL") if "AAPL" in all_tickers else 0
833
+ ticker = st.selectbox("Select Ticker", all_tickers, index=default_idx)
834
+ with col2:
835
+ years = sorted(fm["year"].unique(), reverse=True)
836
+ year = st.selectbox("Year", years)
837
+ with col3:
838
+ quarters = sorted(fm[fm["year"]==year]["quarter"].unique())
839
+ quarter = st.selectbox("Quarter", quarters)
840
+
841
+ row = fm[(fm["ticker"]==ticker) &
842
+ (fm["year"]==year) &
843
+ (fm["quarter"]==quarter)]
844
+
845
+ if row.empty:
846
+ st.warning("No data for this combination.")
847
+ st.stop()
848
+
849
+ row = row.iloc[0]
850
+
851
+ ret_5d = row.get("ret_5d", 0)
852
+ target = int(row.get("target_5d_up", 0))
853
+ st.markdown(f"""
854
+ <div style='display:flex;align-items:center;gap:16px;margin:16px 0;'>
855
+ <div style='font-size:2rem;font-weight:800;color:#64b5f6;'>{ticker}</div>
856
+ <div style='font-size:1rem;color:#8892b0;'>{int(year)} Q{int(quarter)}</div>
857
+ <div class='badge badge-{"green" if target==1 else "red"}'>
858
+ {" UP" if target==1 else " DOWN"} 5d
859
+ </div>
860
+ <div class='badge badge-blue'>
861
+ 5d Return: {float(ret_5d)*100:.2f}%
862
+ </div>
863
+ </div>
864
+ """, unsafe_allow_html=True)
865
+
866
+ left, right = st.columns([1.2, 1])
867
+
868
+ with left:
869
+ st.markdown("<div class='subsection'>Sentiment Breakdown</div>",
870
+ unsafe_allow_html=True)
871
+ cats = ["Mgmt Positive","Mgmt Neutral","Mgmt Negative",
872
+ "QA Positive","QA Neutral","QA Negative"]
873
+ vals = [
874
+ float(row.get("mgmt_mean_pos", 0) or 0),
875
+ float(row.get("mgmt_mean_neu", 0) or 0),
876
+ float(row.get("mgmt_mean_neg", 0) or 0),
877
+ float(row.get("qa_mean_pos", 0) or 0),
878
+ float(row.get("qa_mean_neu", 0) or 0),
879
+ float(row.get("qa_mean_neg", 0) or 0),
880
+ ]
881
+ vals_c = vals + [vals[0]]
882
+ cats_c = cats + [cats[0]]
883
+ fig = go.Figure(go.Scatterpolar(
884
+ r=vals_c, theta=cats_c, fill="toself",
885
+ fillcolor="rgba(100,181,246,0.15)",
886
+ line=dict(color="#64b5f6", width=2), name=ticker,
887
+ ))
888
+ fig.update_layout(
889
+ paper_bgcolor="#0d1117",
890
+ font=dict(color="#b0bec5"),
891
+ polar=dict(
892
+ bgcolor="#0d1117",
893
+ radialaxis=dict(visible=True, range=[0,1],
894
+ gridcolor="#1a2035", linecolor="#1a2035",
895
+ tickfont=dict(size=9, color="#546e7a")),
896
+ angularaxis=dict(gridcolor="#1a2035", linecolor="#1a2035",
897
+ tickfont=dict(size=10, color="#b0bec5")),
898
+ ),
899
+ height=360, showlegend=False,
900
+ title=f"{ticker} Sentiment Radar",
901
+ margin=dict(l=40,r=40,t=50,b=40),
902
+ )
903
+ st.plotly_chart(fig, use_container_width=True)
904
+
905
+ with right:
906
+ st.markdown("<div class='subsection'>Feature Scores</div>",
907
+ unsafe_allow_html=True)
908
+
909
+ def score_bar(label, val, invert=False):
910
+ if val is None or pd.isna(val):
911
+ return
912
+ v = float(val)
913
+ pct = max(0, min(1, v)) * 100
914
+ color = "#ef5350" if invert else "#64b5f6"
915
+ st.markdown(f"""
916
+ <div style='margin:8px 0;'>
917
+ <div style='display:flex;justify-content:space-between;
918
+ font-size:0.8rem;color:#8892b0;margin-bottom:3px;'>
919
+ <span>{label}</span><span>{v:.3f}</span>
920
+ </div>
921
+ <div style='background:#1a2035;border-radius:4px;height:6px;'>
922
+ <div style='background:{color};width:{pct:.0f}%;
923
+ height:6px;border-radius:4px;'></div>
924
+ </div>
925
+ </div>""", unsafe_allow_html=True)
926
+
927
+ score_bar("Mgmt Net Sentiment", row.get("mgmt_net_sentiment"))
928
+ score_bar("QA Net Sentiment", row.get("qa_net_sentiment"))
929
+ score_bar("Mgmt Negativity", row.get("mgmt_neg_ratio"), invert=True)
930
+ score_bar("QA Negativity", row.get("qa_neg_ratio"), invert=True)
931
+ score_bar("Guidance Specificity", row.get("rag_guidance_specificity_score"))
932
+ score_bar("Mgmt Confidence", row.get("rag_management_confidence_score"))
933
+ score_bar("Forward Looking", row.get("rag_forward_looking_score"))
934
+ score_bar("New Risks", row.get("rag_new_risks_score"), invert=True)
935
+ score_bar("Cost Pressure", row.get("rag_cost_pressure_score"), invert=True)
936
+
937
+ # Historical trend
938
+ st.markdown(f"<div class='section-header'>{ticker} Historical Sentiment</div>",
939
+ unsafe_allow_html=True)
940
+
941
+ td = fm[fm["ticker"]==ticker].copy().sort_values(["year","quarter"])
942
+ td["period"] = td["year"].astype(str) + "-Q" + td["quarter"].astype(str)
943
+
944
+ if len(td) > 1:
945
+ fig2 = go.Figure()
946
+ for col_name, label, color in [
947
+ ("mgmt_net_sentiment", "Mgmt Sentiment", "#66bb6a"),
948
+ ("qa_net_sentiment", "QA Sentiment", "#64b5f6"),
949
+ ("mgmt_neg_ratio", "Mgmt Negativity","#ef5350"),
950
+ ]:
951
+ if col_name in td.columns:
952
+ fig2.add_trace(go.Scatter(
953
+ x=td["period"], y=td[col_name],
954
+ mode="lines+markers", name=label,
955
+ line=dict(color=color, width=2),
956
+ marker=dict(size=6),
957
+ hovertemplate=f"<b>{label}</b><br>%{{x}}<br>%{{y:.3f}}<extra></extra>",
958
+ ))
959
+
960
+ # Mark selected quarter use index position to avoid type issues
961
+ cur_period = f"{int(year)}-Q{int(quarter)}"
962
+ if cur_period in td["period"].values:
963
+ cur_idx = td[td["period"]==cur_period].index[0]
964
+ cur_pos = td["period"].tolist().index(cur_period)
965
+ fig2.add_vrect(
966
+ x0=cur_period, x1=cur_period,
967
+ line_dash="dot", line_color="#ffa726", line_width=2,
968
+ )
969
+
970
+ fig2.add_hline(y=0, line_dash="dash", line_color="#546e7a", line_width=0.8)
971
+ fig2.update_layout(**L(
972
+ height=320,
973
+ title=f"{ticker} Sentiment Over Time",
974
+ xaxis=dict(tickangle=45, tickfont=dict(size=9)),
975
+ yaxis=dict(title="Score"),
976
+ legend=dict(bgcolor="#0d1117", bordercolor="#1e2433"),
977
+ ))
978
+ st.plotly_chart(fig2, use_container_width=True)
979
+
980
+ # Scatter: sentiment vs return
981
+ if "ret_5d" in td.columns and "mgmt_net_sentiment" in td.columns:
982
+ st.markdown("<div class='subsection'>Sentiment vs 5-Day Return</div>",
983
+ unsafe_allow_html=True)
984
+ tc = td.dropna(subset=["ret_5d","mgmt_net_sentiment"]).copy()
985
+ tc["ret_pct"] = tc["ret_5d"].astype(float) * 100
986
+ sc_colors = ["#66bb6a" if r > 0 else "#ef5350"
987
+ for r in tc["ret_pct"]]
988
+ fig3 = go.Figure(go.Scatter(
989
+ x=tc["mgmt_net_sentiment"].astype(float),
990
+ y=tc["ret_pct"],
991
+ mode="markers+text",
992
+ text=tc["period"],
993
+ textposition="top center",
994
+ textfont=dict(size=8, color="#546e7a"),
995
+ marker=dict(color=sc_colors, size=9, opacity=0.85),
996
+ hovertemplate=(
997
+ "<b>%{text}</b><br>"
998
+ "Mgmt Sentiment: %{x:.3f}<br>"
999
+ "5d Return: %{y:.2f}%<extra></extra>"
1000
+ ),
1001
+ ))
1002
+ fig3.add_vline(x=0, line_dash="dash", line_color="#546e7a")
1003
+ fig3.add_hline(y=0, line_dash="dash", line_color="#546e7a")
1004
+ fig3.update_layout(**L(
1005
+ height=340,
1006
+ title=f"{ticker} Mgmt Sentiment vs 5-Day Return",
1007
+ xaxis=dict(title="Management Net Sentiment"),
1008
+ yaxis=dict(title="5-Day Return (%)"),
1009
+ ))
1010
+ st.plotly_chart(fig3, use_container_width=True)
1011
+ else:
1012
+ st.info("Not enough historical data for this ticker.")