Spaces:
Sleeping
Sleeping
Upload 15 files
Browse files- .streamlit/config.toml +7 -0
- app.py +509 -0
- requirements.txt +17 -0
- src/__init__.py +1 -0
- src/charts.py +314 -0
- src/csv_enrichment.py +941 -0
- src/data_engine.py +1210 -0
- src/index_fund_ingest.py +354 -0
- src/models.py +132 -0
- src/nav_metrics_engine.py +1005 -0
- src/pdf_generator.py +560 -0
- src/portfolio_engine.py +299 -0
- src/reference_data.py +142 -0
- src/scheme_resolver.py +323 -0
- src/weightage.py +425 -0
.streamlit/config.toml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[theme]
|
| 2 |
+
base = "dark"
|
| 3 |
+
primaryColor = "#4A90E2"
|
| 4 |
+
backgroundColor = "#0f0f0f"
|
| 5 |
+
secondaryBackgroundColor = "#1a1a1a"
|
| 6 |
+
textColor = "#e5e5e5"
|
| 7 |
+
font = "sans serif"
|
app.py
ADDED
|
@@ -0,0 +1,509 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import io
|
| 4 |
+
import sys
|
| 5 |
+
import tempfile
|
| 6 |
+
import time
|
| 7 |
+
import traceback
|
| 8 |
+
from contextlib import redirect_stderr, redirect_stdout
|
| 9 |
+
from datetime import datetime
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
from typing import Any
|
| 12 |
+
|
| 13 |
+
import streamlit as st
|
| 14 |
+
|
| 15 |
+
from src.csv_enrichment import (
|
| 16 |
+
TARGET_COLUMNS,
|
| 17 |
+
EnrichmentConfig,
|
| 18 |
+
enrich_csv, # use canonical name (alias also works)
|
| 19 |
+
lookup_fund_metric_value,
|
| 20 |
+
)
|
| 21 |
+
from src.data_engine import run_data_engine
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
# ββ Session logging βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 25 |
+
|
| 26 |
+
def _init_session_log() -> Path:
|
| 27 |
+
if "session_log_path" not in st.session_state:
|
| 28 |
+
log_dir = Path("logs") / "streamlit_sessions"
|
| 29 |
+
log_dir.mkdir(parents=True, exist_ok=True)
|
| 30 |
+
stamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
|
| 31 |
+
log_path = log_dir / f"session_{stamp}.log"
|
| 32 |
+
log_path.write_text(
|
| 33 |
+
f"[{datetime.now().isoformat()}] session_started\n",
|
| 34 |
+
encoding="utf-8",
|
| 35 |
+
)
|
| 36 |
+
st.session_state["session_log_path"] = str(log_path)
|
| 37 |
+
return Path(st.session_state["session_log_path"])
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def _log_session_event(message: str) -> None:
|
| 41 |
+
try:
|
| 42 |
+
log_path = _init_session_log()
|
| 43 |
+
with log_path.open("a", encoding="utf-8") as f:
|
| 44 |
+
f.write(f"[{datetime.now().isoformat()}] {message}\n")
|
| 45 |
+
except Exception:
|
| 46 |
+
pass
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def _log_session_block(title: str, content: str) -> None:
|
| 50 |
+
try:
|
| 51 |
+
log_path = _init_session_log()
|
| 52 |
+
with log_path.open("a", encoding="utf-8") as f:
|
| 53 |
+
f.write(f"[{datetime.now().isoformat()}] --- {title} (start) ---\n")
|
| 54 |
+
f.write((content.rstrip() + "\n") if content.strip() else "(no output)\n")
|
| 55 |
+
f.write(f"[{datetime.now().isoformat()}] --- {title} (end) ---\n")
|
| 56 |
+
except Exception:
|
| 57 |
+
pass
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
# ββ Captured output runner ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 61 |
+
|
| 62 |
+
def _run_with_captured_output(func: Any, *args: Any, **kwargs: Any) -> tuple[Any, str]:
|
| 63 |
+
"""Run function, mirror prints to terminal, capture for UI display."""
|
| 64 |
+
|
| 65 |
+
class _TeeCapture(io.TextIOBase):
|
| 66 |
+
def __init__(self, mirror: Any, on_write: Any = None) -> None:
|
| 67 |
+
self._mirror = mirror
|
| 68 |
+
self._buffer = io.StringIO()
|
| 69 |
+
self._on_write = on_write
|
| 70 |
+
|
| 71 |
+
def write(self, s: str) -> int:
|
| 72 |
+
text = str(s)
|
| 73 |
+
self._buffer.write(text)
|
| 74 |
+
try:
|
| 75 |
+
self._mirror.write(text)
|
| 76 |
+
self._mirror.flush()
|
| 77 |
+
except Exception:
|
| 78 |
+
pass
|
| 79 |
+
if self._on_write is not None:
|
| 80 |
+
try:
|
| 81 |
+
self._on_write(text)
|
| 82 |
+
except Exception:
|
| 83 |
+
pass
|
| 84 |
+
return len(text)
|
| 85 |
+
|
| 86 |
+
def flush(self) -> None:
|
| 87 |
+
try:
|
| 88 |
+
self._mirror.flush()
|
| 89 |
+
except Exception:
|
| 90 |
+
pass
|
| 91 |
+
|
| 92 |
+
def getvalue(self) -> str:
|
| 93 |
+
return self._buffer.getvalue()
|
| 94 |
+
|
| 95 |
+
live_callback = kwargs.pop("live_callback", None)
|
| 96 |
+
out_tee = _TeeCapture(sys.__stdout__, live_callback)
|
| 97 |
+
err_tee = _TeeCapture(sys.__stderr__, live_callback)
|
| 98 |
+
with redirect_stdout(out_tee), redirect_stderr(err_tee):
|
| 99 |
+
result = func(*args, **kwargs)
|
| 100 |
+
return result, out_tee.getvalue() + err_tee.getvalue()
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
# ββ CSS βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 104 |
+
|
| 105 |
+
def _inject_custom_css() -> None:
|
| 106 |
+
st.markdown(
|
| 107 |
+
"""
|
| 108 |
+
<style>
|
| 109 |
+
:root {
|
| 110 |
+
--mf-primary: #4A90E2;
|
| 111 |
+
--mf-accent: #22c55e;
|
| 112 |
+
--mf-bg: #0f0f0f;
|
| 113 |
+
--mf-bg-secondary: #1a1a1a;
|
| 114 |
+
--mf-surface: #1a1a1a;
|
| 115 |
+
--mf-text: #e5e5e5;
|
| 116 |
+
--mf-text-muted: #a0a0a0;
|
| 117 |
+
--mf-border: #333333;
|
| 118 |
+
}
|
| 119 |
+
.mf-shell { max-width: 1100px; margin: 0 auto; padding: 0 0 3rem 0; }
|
| 120 |
+
.mf-hero {
|
| 121 |
+
padding: 1.9rem 2.1rem 1.5rem 2.1rem;
|
| 122 |
+
border-radius: 18px;
|
| 123 |
+
background: var(--mf-bg-secondary);
|
| 124 |
+
border: 1px solid var(--mf-border);
|
| 125 |
+
}
|
| 126 |
+
.mf-kicker {
|
| 127 |
+
letter-spacing: .16em; font-size: 0.75rem;
|
| 128 |
+
text-transform: uppercase; color: var(--mf-primary); margin-bottom: 0.5rem;
|
| 129 |
+
}
|
| 130 |
+
.mf-title {
|
| 131 |
+
font-size: 2.2rem; font-weight: 650;
|
| 132 |
+
line-height: 1.1; color: var(--mf-text); margin-bottom: 0.75rem;
|
| 133 |
+
}
|
| 134 |
+
.mf-subtitle { max-width: 40rem; font-size: 0.95rem; color: var(--mf-text-muted); }
|
| 135 |
+
.mf-panel {
|
| 136 |
+
margin-top: 1.75rem; padding: 1.5rem 1.75rem 1.75rem 1.75rem;
|
| 137 |
+
border-radius: 20px; background: var(--mf-surface);
|
| 138 |
+
border: 1px solid var(--mf-border);
|
| 139 |
+
}
|
| 140 |
+
.mf-helper { font-size: 0.8rem; color: var(--mf-text-muted); margin-bottom: 0.9rem; }
|
| 141 |
+
.mf-steps { font-size: 0.78rem; color: var(--mf-text-muted); margin-top: 0.3rem; }
|
| 142 |
+
.mf-steps li { margin-bottom: 0.1rem; }
|
| 143 |
+
.mf-metrics { display: flex; flex-wrap: wrap; gap: 0.75rem; margin-top: 1.25rem; }
|
| 144 |
+
.mf-metric {
|
| 145 |
+
flex: 0 0 auto; min-width: 140px; padding: 0.6rem 0.8rem;
|
| 146 |
+
border-radius: 0.9rem; border: 1px solid var(--mf-border);
|
| 147 |
+
background: var(--mf-bg-secondary);
|
| 148 |
+
}
|
| 149 |
+
.mf-metric-label {
|
| 150 |
+
font-size: 0.72rem; text-transform: uppercase;
|
| 151 |
+
letter-spacing: 0.09em; color: var(--mf-text-muted); margin-bottom: 0.2rem;
|
| 152 |
+
}
|
| 153 |
+
.mf-metric-value { font-size: 1.05rem; font-weight: 600; color: var(--mf-accent); }
|
| 154 |
+
.mf-timing {
|
| 155 |
+
margin-top: 1rem; padding: 0.75rem 1rem;
|
| 156 |
+
border-radius: 0.75rem; border: 1px solid var(--mf-border);
|
| 157 |
+
background: var(--mf-bg-secondary); font-size: 0.8rem;
|
| 158 |
+
color: var(--mf-text-muted);
|
| 159 |
+
}
|
| 160 |
+
.mf-download-label {
|
| 161 |
+
font-size: 0.8rem; color: var(--mf-text-muted);
|
| 162 |
+
margin-top: 1.4rem; margin-bottom: 0.35rem;
|
| 163 |
+
}
|
| 164 |
+
.stFileUploader div[data-testid="stFileUploaderDropzone"] {
|
| 165 |
+
border-radius: 0.9rem; border-color: var(--mf-border);
|
| 166 |
+
background: var(--mf-bg-secondary);
|
| 167 |
+
}
|
| 168 |
+
.stButton > button[kind="primary"], .stDownloadButton > button {
|
| 169 |
+
border-radius: 0.5rem; border: none;
|
| 170 |
+
background: var(--mf-primary) !important;
|
| 171 |
+
color: white !important; font-weight: 600;
|
| 172 |
+
}
|
| 173 |
+
.stApp, [data-testid="stAppViewContainer"] { background-color: var(--mf-bg); }
|
| 174 |
+
.block-container { padding-top: 1.5rem; }
|
| 175 |
+
@media (max-width: 768px) {
|
| 176 |
+
.mf-hero { padding: 1.4rem 1.3rem 1.2rem 1.3rem; }
|
| 177 |
+
.mf-title { font-size: 1.6rem; }
|
| 178 |
+
}
|
| 179 |
+
</style>
|
| 180 |
+
""",
|
| 181 |
+
unsafe_allow_html=True,
|
| 182 |
+
)
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
# ββ Main ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 186 |
+
|
| 187 |
+
def main() -> None:
|
| 188 |
+
st.set_page_config(
|
| 189 |
+
page_title="MF Scoring Engine Β· Advisor Demo",
|
| 190 |
+
page_icon="π",
|
| 191 |
+
layout="centered",
|
| 192 |
+
)
|
| 193 |
+
|
| 194 |
+
_inject_custom_css()
|
| 195 |
+
_init_session_log()
|
| 196 |
+
_log_session_event("app_rendered")
|
| 197 |
+
|
| 198 |
+
st.markdown('<div class="mf-shell">', unsafe_allow_html=True)
|
| 199 |
+
|
| 200 |
+
st.markdown(
|
| 201 |
+
"""
|
| 202 |
+
<section class="mf-hero">
|
| 203 |
+
<div class="mf-kicker">Advisor tool</div>
|
| 204 |
+
<div class="mf-title">Score your mutual fund list in Excel.</div>
|
| 205 |
+
<p class="mf-subtitle">
|
| 206 |
+
Upload your mutual fund CSV. The app runs enrichment (NAV engine β web fallback β median),
|
| 207 |
+
scores every fund, and gives you a ready-to-share Excel workbook.
|
| 208 |
+
</p>
|
| 209 |
+
</section>
|
| 210 |
+
""",
|
| 211 |
+
unsafe_allow_html=True,
|
| 212 |
+
)
|
| 213 |
+
|
| 214 |
+
st.markdown('<section class="mf-panel">', unsafe_allow_html=True)
|
| 215 |
+
|
| 216 |
+
tab_run, tab_about = st.tabs(["Run analysis", "How scoring works"])
|
| 217 |
+
|
| 218 |
+
with tab_run:
|
| 219 |
+
st.markdown("### Upload CSV & generate workbook")
|
| 220 |
+
st.markdown(
|
| 221 |
+
"""
|
| 222 |
+
<p class="mf-helper">
|
| 223 |
+
Upload your standard fund universe CSV
|
| 224 |
+
(<code>Fund</code>, <code>Benchmark Type</code>, CAGR columns, etc.).<br>
|
| 225 |
+
<strong>Firecrawl/Tavily is used only for missing P/E and P/B</strong> β
|
| 226 |
+
all risk metrics (Alpha, Sharpe, Sortino, etc.) are computed directly from NAV history.
|
| 227 |
+
</p>
|
| 228 |
+
""",
|
| 229 |
+
unsafe_allow_html=True,
|
| 230 |
+
)
|
| 231 |
+
|
| 232 |
+
uploaded_file = st.file_uploader(
|
| 233 |
+
"Step 1 Β· Upload fund universe CSV",
|
| 234 |
+
type=["csv"],
|
| 235 |
+
help="Same CSV you feed into the offline data engine.",
|
| 236 |
+
)
|
| 237 |
+
if uploaded_file is not None:
|
| 238 |
+
st.caption(
|
| 239 |
+
f"Selected: **{uploaded_file.name}** Β· "
|
| 240 |
+
f"{(len(uploaded_file.getbuffer()) / 1024):.1f} KB"
|
| 241 |
+
)
|
| 242 |
+
_log_session_event(
|
| 243 |
+
f"uploaded_file name={uploaded_file.name} "
|
| 244 |
+
f"size_kb={(len(uploaded_file.getbuffer())/1024):.1f}"
|
| 245 |
+
)
|
| 246 |
+
|
| 247 |
+
st.info(
|
| 248 |
+
"Pipeline: **Scheme code resolution β NAV engine (parallel, 12 workers) "
|
| 249 |
+
"β PE/PB web lookup β category median fallback β scoring engine**"
|
| 250 |
+
)
|
| 251 |
+
|
| 252 |
+
st.markdown(
|
| 253 |
+
"""
|
| 254 |
+
<ul class="mf-steps">
|
| 255 |
+
<li>1 β Upload your latest CSV export.</li>
|
| 256 |
+
<li>2 β Click <strong>Run analysis</strong> and watch live logs.</li>
|
| 257 |
+
<li>3 β Download the scored Excel when complete.</li>
|
| 258 |
+
</ul>
|
| 259 |
+
""",
|
| 260 |
+
unsafe_allow_html=True,
|
| 261 |
+
)
|
| 262 |
+
|
| 263 |
+
run_clicked = st.button(
|
| 264 |
+
"Step 2 Β· Run analysis",
|
| 265 |
+
type="primary",
|
| 266 |
+
use_container_width=True,
|
| 267 |
+
disabled=uploaded_file is None,
|
| 268 |
+
)
|
| 269 |
+
|
| 270 |
+
# ββ State carried across rerun βββββββββββββββββββββββββββββββββββββ
|
| 271 |
+
generated_bytes: io.BytesIO | None = None
|
| 272 |
+
generated_filename: str | None = None
|
| 273 |
+
funds_count: int | None = None
|
| 274 |
+
categories_count: int | None = None
|
| 275 |
+
enrichment_summary: str | None = None
|
| 276 |
+
timing_html: str | None = None
|
| 277 |
+
|
| 278 |
+
if run_clicked:
|
| 279 |
+
_log_session_event("run_analysis_clicked")
|
| 280 |
+
|
| 281 |
+
if uploaded_file is None:
|
| 282 |
+
st.warning("Please upload a CSV file first.")
|
| 283 |
+
_log_session_event("run_aborted_no_upload")
|
| 284 |
+
else:
|
| 285 |
+
base_stem = Path(uploaded_file.name).stem
|
| 286 |
+
stamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 287 |
+
input_stem = f"{base_stem}_{stamp}"
|
| 288 |
+
|
| 289 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
|
| 290 |
+
tmp.write(uploaded_file.getbuffer())
|
| 291 |
+
input_path = Path(tmp.name)
|
| 292 |
+
|
| 293 |
+
out_dir = Path("output")
|
| 294 |
+
out_dir.mkdir(exist_ok=True)
|
| 295 |
+
generated_path = out_dir / f"fund_analysis_{input_stem}.xlsx"
|
| 296 |
+
|
| 297 |
+
t_total_start = time.perf_counter()
|
| 298 |
+
|
| 299 |
+
try:
|
| 300 |
+
with st.status("Processingβ¦", expanded=True) as status:
|
| 301 |
+
live_lines: list[str] = []
|
| 302 |
+
live_box = st.empty()
|
| 303 |
+
|
| 304 |
+
# Noise patterns to suppress from the live log box
|
| 305 |
+
_SUPPRESS = (
|
| 306 |
+
"missing ScriptRunContext",
|
| 307 |
+
"FutureWarning",
|
| 308 |
+
"Passing literal json",
|
| 309 |
+
"To read from a literal string",
|
| 310 |
+
"return pd.read_json",
|
| 311 |
+
)
|
| 312 |
+
|
| 313 |
+
def _live_sink(chunk: str) -> None:
|
| 314 |
+
clean = chunk.replace("\r", "")
|
| 315 |
+
new = [
|
| 316 |
+
ln for ln in clean.split("\n")
|
| 317 |
+
if ln.strip()
|
| 318 |
+
and not any(s in ln for s in _SUPPRESS)
|
| 319 |
+
]
|
| 320 |
+
if not new:
|
| 321 |
+
return
|
| 322 |
+
live_lines.extend(new)
|
| 323 |
+
if len(live_lines) > 50:
|
| 324 |
+
del live_lines[:-50]
|
| 325 |
+
live_box.code("\n".join(live_lines), language="text")
|
| 326 |
+
|
| 327 |
+
# ββ Phase 1: Enrichment ββββββββββββββββββββββββββββ
|
| 328 |
+
st.write("**1/2 Enrichment** β scheme codes β NAV engine β PE/PB β mediansβ¦")
|
| 329 |
+
t_enrich_start = time.perf_counter()
|
| 330 |
+
|
| 331 |
+
enrichment, enrich_output = _run_with_captured_output(
|
| 332 |
+
enrich_csv,
|
| 333 |
+
str(input_path),
|
| 334 |
+
config=EnrichmentConfig(
|
| 335 |
+
enabled=True,
|
| 336 |
+
max_cells=None,
|
| 337 |
+
min_confidence=0.65,
|
| 338 |
+
resolve_scheme_codes=True, # β parallel scheme resolution
|
| 339 |
+
enable_nav_engine=True, # β parallel NAV engine (12 workers)
|
| 340 |
+
web_search_pe_pb_only=True, # β only PE/PB uses API credits
|
| 341 |
+
impute_unresolved=True,
|
| 342 |
+
),
|
| 343 |
+
live_callback=_live_sink,
|
| 344 |
+
)
|
| 345 |
+
|
| 346 |
+
t_enrich_end = time.perf_counter()
|
| 347 |
+
enrich_secs = t_enrich_end - t_enrich_start
|
| 348 |
+
|
| 349 |
+
_log_session_block("enrichment_output", enrich_output)
|
| 350 |
+
_log_session_event(
|
| 351 |
+
f"enrichment_done "
|
| 352 |
+
f"checked={enrichment.examined_cells} "
|
| 353 |
+
f"nav={enrichment.nav_cells} "
|
| 354 |
+
f"web={enrichment.web_cells} "
|
| 355 |
+
f"imputed={enrichment.imputed_cells} "
|
| 356 |
+
f"skipped={enrichment.skipped_cells} "
|
| 357 |
+
f"codes={enrichment.resolved_codes} "
|
| 358 |
+
f"secs={enrich_secs:.1f}"
|
| 359 |
+
)
|
| 360 |
+
|
| 361 |
+
st.write(
|
| 362 |
+
f" β
Enrichment done in **{enrich_secs:.0f}s** β "
|
| 363 |
+
f"checked {enrichment.examined_cells} cells, "
|
| 364 |
+
f"NAV filled {enrichment.nav_cells}, "
|
| 365 |
+
f"web filled {enrichment.web_cells}, "
|
| 366 |
+
f"imputed {enrichment.imputed_cells}"
|
| 367 |
+
)
|
| 368 |
+
|
| 369 |
+
pipeline_input_path = Path(enrichment.enriched_csv_path)
|
| 370 |
+
|
| 371 |
+
# ββ Phase 2: Scoring + Excel βββββββββββββββββββββββ
|
| 372 |
+
st.write("**2/2 Scoring engine** β computing scores, ranking, generating Excelβ¦")
|
| 373 |
+
t_engine_start = time.perf_counter()
|
| 374 |
+
|
| 375 |
+
funds, engine_output = _run_with_captured_output(
|
| 376 |
+
run_data_engine,
|
| 377 |
+
csv_path=str(pipeline_input_path),
|
| 378 |
+
output_path=str(generated_path),
|
| 379 |
+
use_comprehensive_scoring=True,
|
| 380 |
+
live_callback=_live_sink,
|
| 381 |
+
)
|
| 382 |
+
|
| 383 |
+
t_engine_end = time.perf_counter()
|
| 384 |
+
engine_secs = t_engine_end - t_engine_start
|
| 385 |
+
total_secs = time.perf_counter() - t_total_start
|
| 386 |
+
|
| 387 |
+
_log_session_block("engine_output", engine_output)
|
| 388 |
+
_log_session_event(
|
| 389 |
+
f"engine_done funds={len(funds)} "
|
| 390 |
+
f"secs={engine_secs:.1f} total={total_secs:.1f}"
|
| 391 |
+
)
|
| 392 |
+
|
| 393 |
+
st.write(
|
| 394 |
+
f" β
Scoring done in **{engine_secs:.0f}s** β "
|
| 395 |
+
f"{len(funds)} funds scored"
|
| 396 |
+
)
|
| 397 |
+
|
| 398 |
+
status.update(
|
| 399 |
+
label=f"β
Complete β {total_secs:.0f}s total",
|
| 400 |
+
state="complete",
|
| 401 |
+
expanded=False,
|
| 402 |
+
)
|
| 403 |
+
|
| 404 |
+
except Exception as exc:
|
| 405 |
+
err_text = "".join(traceback.format_exception(exc))
|
| 406 |
+
_log_session_block("run_failure", err_text)
|
| 407 |
+
_log_session_event(f"run_failed error={exc}")
|
| 408 |
+
st.error("Run failed. See terminal for traceback.")
|
| 409 |
+
st.code(err_text, language="text")
|
| 410 |
+
return
|
| 411 |
+
|
| 412 |
+
# ββ Summary ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 413 |
+
if enrichment.errors:
|
| 414 |
+
st.warning("Enrichment completed with warnings β check scratchpad for details.")
|
| 415 |
+
if enrichment.scratchpad_path:
|
| 416 |
+
st.caption(f"Scratchpad: `{enrichment.scratchpad_path}`")
|
| 417 |
+
|
| 418 |
+
enrichment_summary = (
|
| 419 |
+
f"Enrichment: {enrichment.examined_cells} cells checked β "
|
| 420 |
+
f"NAV filled {enrichment.nav_cells}, "
|
| 421 |
+
f"web filled {enrichment.web_cells}, "
|
| 422 |
+
f"imputed {enrichment.imputed_cells}, "
|
| 423 |
+
f"skipped {enrichment.skipped_cells}."
|
| 424 |
+
)
|
| 425 |
+
|
| 426 |
+
timing_html = (
|
| 427 |
+
f'<div class="mf-timing">'
|
| 428 |
+
f'β± Enrichment: <strong>{enrich_secs:.0f}s</strong> | '
|
| 429 |
+
f'Scoring: <strong>{engine_secs:.0f}s</strong> | '
|
| 430 |
+
f'Total: <strong>{total_secs:.0f}s ({total_secs/60:.1f} min)</strong>'
|
| 431 |
+
f"{' π― Under 3 min!' if total_secs < 180 else ''}"
|
| 432 |
+
f'</div>'
|
| 433 |
+
)
|
| 434 |
+
|
| 435 |
+
with generated_path.open("rb") as f:
|
| 436 |
+
generated_bytes = io.BytesIO(f.read())
|
| 437 |
+
generated_filename = generated_path.name
|
| 438 |
+
funds_count = len(funds)
|
| 439 |
+
categories_count = len({f.category for f in funds})
|
| 440 |
+
|
| 441 |
+
st.success("Step 3 Β· Excel ready β download below.")
|
| 442 |
+
if enrichment_summary:
|
| 443 |
+
st.info(enrichment_summary)
|
| 444 |
+
|
| 445 |
+
# ββ Download area (persists after rerun) ββββββββββββββββββββββββββ
|
| 446 |
+
if generated_bytes and generated_filename:
|
| 447 |
+
|
| 448 |
+
if timing_html:
|
| 449 |
+
st.markdown(timing_html, unsafe_allow_html=True)
|
| 450 |
+
|
| 451 |
+
st.markdown(
|
| 452 |
+
"""
|
| 453 |
+
<div class="mf-metrics">
|
| 454 |
+
<div class="mf-metric">
|
| 455 |
+
<div class="mf-metric-label">Schemes scored</div>
|
| 456 |
+
<div class="mf-metric-value">{funds_count}</div>
|
| 457 |
+
</div>
|
| 458 |
+
<div class="mf-metric">
|
| 459 |
+
<div class="mf-metric-label">Categories</div>
|
| 460 |
+
<div class="mf-metric-value">{categories_count}</div>
|
| 461 |
+
</div>
|
| 462 |
+
<div class="mf-metric">
|
| 463 |
+
<div class="mf-metric-label">Output format</div>
|
| 464 |
+
<div class="mf-metric-value">Excel (.xlsx)</div>
|
| 465 |
+
</div>
|
| 466 |
+
</div>
|
| 467 |
+
""".format(
|
| 468 |
+
funds_count=funds_count or 0,
|
| 469 |
+
categories_count=categories_count or 0,
|
| 470 |
+
),
|
| 471 |
+
unsafe_allow_html=True,
|
| 472 |
+
)
|
| 473 |
+
|
| 474 |
+
st.markdown(
|
| 475 |
+
'<div class="mf-download-label">Download the scored workbook:</div>',
|
| 476 |
+
unsafe_allow_html=True,
|
| 477 |
+
)
|
| 478 |
+
st.download_button(
|
| 479 |
+
label="β¬οΈ Download processed Excel",
|
| 480 |
+
data=generated_bytes.getvalue(),
|
| 481 |
+
file_name=generated_filename,
|
| 482 |
+
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
| 483 |
+
use_container_width=True,
|
| 484 |
+
)
|
| 485 |
+
|
| 486 |
+
with tab_about:
|
| 487 |
+
st.markdown("### What the pipeline does")
|
| 488 |
+
st.markdown(
|
| 489 |
+
"""
|
| 490 |
+
| Phase | What happens |
|
| 491 |
+
|---|---|
|
| 492 |
+
| **0 β Scheme resolution** | Parallel fuzzy-match of missing AMFI scheme codes (8 threads) |
|
| 493 |
+
| **1 β NAV engine** | Trailing 3Y risk metrics computed from mfapi NAV history (12 threads) |
|
| 494 |
+
| **2 β PE/PB web search** | Tavily (primary) or Firecrawl (fallback) β only for missing P/E and P/B |
|
| 495 |
+
| **3 β Median impute** | Category median fills remaining gaps for young/NA funds |
|
| 496 |
+
| **4 β Scoring** | Top/Bottom 10 per category, 10-point weighted model |
|
| 497 |
+
| **5 β Excel export** | Conditional formatting, quartile bands, benchmark rows |
|
| 498 |
+
|
| 499 |
+
**Cache**: NAV history is cached in Neon (production) or SQLite (local) with a 7-day TTL.
|
| 500 |
+
Second runs are near-instant for cached funds.
|
| 501 |
+
"""
|
| 502 |
+
)
|
| 503 |
+
|
| 504 |
+
st.markdown("</section>", unsafe_allow_html=True)
|
| 505 |
+
st.markdown("</div>", unsafe_allow_html=True)
|
| 506 |
+
|
| 507 |
+
|
| 508 |
+
if __name__ == "__main__":
|
| 509 |
+
main()
|
requirements.txt
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pandas>=2.0.0
|
| 2 |
+
openpyxl>=3.1.0
|
| 3 |
+
reportlab>=4.0.0
|
| 4 |
+
matplotlib>=3.7.0
|
| 5 |
+
numpy>=1.24.0
|
| 6 |
+
click>=8.1.0
|
| 7 |
+
streamlit>=1.31.0
|
| 8 |
+
requests>=2.31.0
|
| 9 |
+
python-dateutil>=2.8.2
|
| 10 |
+
fuzzywuzzy>=0.18.0
|
| 11 |
+
python-Levenshtein>=0.21.0
|
| 12 |
+
mftool>=1.0.0
|
| 13 |
+
yfinance>=1.2.0
|
| 14 |
+
beautifulsoup4>=4.14.3
|
| 15 |
+
scipy>=1.17.1
|
| 16 |
+
lxml>=6.0.2
|
| 17 |
+
openai>=1.0.0
|
src/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Mutual Fund Portfolio Analyzer
|
src/charts.py
ADDED
|
@@ -0,0 +1,314 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Charts module: generates matplotlib charts for embedded use in PDF reports.
|
| 3 |
+
All functions return a BytesIO buffer containing a PNG image.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import io
|
| 7 |
+
import numpy as np
|
| 8 |
+
import matplotlib
|
| 9 |
+
matplotlib.use('Agg') # non-interactive backend
|
| 10 |
+
import matplotlib.pyplot as plt
|
| 11 |
+
import matplotlib.patches as mpatches
|
| 12 |
+
from matplotlib.figure import Figure
|
| 13 |
+
from typing import Dict, List, Optional
|
| 14 |
+
|
| 15 |
+
# Brand colours
|
| 16 |
+
BRAND_BLUE = "#1F3864"
|
| 17 |
+
BRAND_ACCENT = "#2E75B6"
|
| 18 |
+
GREENS = ["#2ECC71", "#27AE60", "#1ABC9C", "#16A085", "#52BE80"]
|
| 19 |
+
REDS = ["#E74C3C", "#C0392B", "#EC7063"]
|
| 20 |
+
PALETTE = [
|
| 21 |
+
"#2E75B6", "#E67E22", "#2ECC71", "#E74C3C", "#9B59B6",
|
| 22 |
+
"#1ABC9C", "#F39C12", "#3498DB", "#D35400", "#27AE60",
|
| 23 |
+
]
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def _buf(fig: Figure) -> io.BytesIO:
|
| 27 |
+
buf = io.BytesIO()
|
| 28 |
+
fig.savefig(buf, format='png', bbox_inches='tight', dpi=150)
|
| 29 |
+
buf.seek(0)
|
| 30 |
+
plt.close(fig)
|
| 31 |
+
return buf
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def holdings_pie_chart(holdings_data: Dict[str, float], title: str = "Portfolio Allocation") -> io.BytesIO:
|
| 35 |
+
"""
|
| 36 |
+
Pie chart of holdings by name β value.
|
| 37 |
+
holdings_data: {scheme_name: current_value}
|
| 38 |
+
"""
|
| 39 |
+
labels = list(holdings_data.keys())
|
| 40 |
+
values = list(holdings_data.values())
|
| 41 |
+
|
| 42 |
+
# Shorten long labels
|
| 43 |
+
short_labels = [l.split('-')[0].strip()[:22] for l in labels]
|
| 44 |
+
|
| 45 |
+
fig, ax = plt.subplots(figsize=(5, 4))
|
| 46 |
+
wedges, texts, autotexts = ax.pie(
|
| 47 |
+
values,
|
| 48 |
+
labels=None,
|
| 49 |
+
autopct='%1.1f%%',
|
| 50 |
+
startangle=140,
|
| 51 |
+
colors=PALETTE[:len(values)],
|
| 52 |
+
pctdistance=0.78,
|
| 53 |
+
)
|
| 54 |
+
for at in autotexts:
|
| 55 |
+
at.set_fontsize(7)
|
| 56 |
+
at.set_color("white")
|
| 57 |
+
|
| 58 |
+
ax.legend(wedges, short_labels, loc="center left", bbox_to_anchor=(1, 0.5),
|
| 59 |
+
fontsize=7, frameon=False)
|
| 60 |
+
ax.set_title(title, fontsize=10, fontweight='bold', color=BRAND_BLUE, pad=10)
|
| 61 |
+
fig.tight_layout()
|
| 62 |
+
return _buf(fig)
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def sector_bar_chart(sector_data: Dict[str, float], title: str = "Sector Allocation (%)") -> io.BytesIO:
|
| 66 |
+
"""Horizontal bar chart for sector allocation."""
|
| 67 |
+
if not sector_data:
|
| 68 |
+
sector_data = {"Data Not Available": 100}
|
| 69 |
+
|
| 70 |
+
sectors = list(sector_data.keys())
|
| 71 |
+
values = list(sector_data.values())
|
| 72 |
+
|
| 73 |
+
# Sort descending
|
| 74 |
+
pairs = sorted(zip(values, sectors), reverse=True)
|
| 75 |
+
values, sectors = zip(*pairs)
|
| 76 |
+
|
| 77 |
+
fig, ax = plt.subplots(figsize=(5, max(3, len(sectors) * 0.35)))
|
| 78 |
+
bars = ax.barh(sectors, values, color=BRAND_ACCENT, edgecolor='white', height=0.6)
|
| 79 |
+
|
| 80 |
+
for bar, val in zip(bars, values):
|
| 81 |
+
ax.text(bar.get_width() + 0.3, bar.get_y() + bar.get_height() / 2,
|
| 82 |
+
f'{val:.1f}%', va='center', fontsize=7, color='black')
|
| 83 |
+
|
| 84 |
+
ax.set_xlabel("Allocation (%)", fontsize=8, color='gray')
|
| 85 |
+
ax.set_title(title, fontsize=10, fontweight='bold', color=BRAND_BLUE)
|
| 86 |
+
ax.set_xlim(0, max(values) * 1.2)
|
| 87 |
+
ax.invert_yaxis()
|
| 88 |
+
ax.spines[['top', 'right']].set_visible(False)
|
| 89 |
+
ax.tick_params(axis='y', labelsize=8)
|
| 90 |
+
fig.tight_layout()
|
| 91 |
+
return _buf(fig)
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def market_cap_pie(market_cap_data: Dict[str, float]) -> io.BytesIO:
|
| 95 |
+
"""Pie chart for Large/Mid/Small/Other market cap split."""
|
| 96 |
+
default = {"Large Cap": 0, "Mid Cap": 0, "Small Cap": 0, "Others": 0}
|
| 97 |
+
data = {**default, **market_cap_data}
|
| 98 |
+
data = {k: v for k, v in data.items() if v > 0}
|
| 99 |
+
|
| 100 |
+
colors = {"Large Cap": "#2E75B6", "Mid Cap": "#E67E22",
|
| 101 |
+
"Small Cap": "#2ECC71", "Others": "#BDC3C7"}
|
| 102 |
+
|
| 103 |
+
labels = list(data.keys())
|
| 104 |
+
values = list(data.values())
|
| 105 |
+
clrs = [colors.get(l, "#95A5A6") for l in labels]
|
| 106 |
+
|
| 107 |
+
fig, ax = plt.subplots(figsize=(4, 3.5))
|
| 108 |
+
wedges, _, autotexts = ax.pie(
|
| 109 |
+
values, labels=None, autopct='%1.1f%%',
|
| 110 |
+
colors=clrs, startangle=90, pctdistance=0.75
|
| 111 |
+
)
|
| 112 |
+
for at in autotexts:
|
| 113 |
+
at.set_fontsize(8)
|
| 114 |
+
at.set_color("white")
|
| 115 |
+
|
| 116 |
+
ax.legend(wedges, labels, loc="lower center", bbox_to_anchor=(0.5, -0.12),
|
| 117 |
+
ncol=2, fontsize=8, frameon=False)
|
| 118 |
+
ax.set_title("Market Cap Allocation", fontsize=10, fontweight='bold', color=BRAND_BLUE)
|
| 119 |
+
fig.tight_layout()
|
| 120 |
+
return _buf(fig)
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
def holding_vs_benchmark_chart(
|
| 124 |
+
fund_name: str,
|
| 125 |
+
cagr_data: Dict[str, Dict[str, Optional[float]]],
|
| 126 |
+
) -> io.BytesIO:
|
| 127 |
+
"""
|
| 128 |
+
Bar chart comparing fund CAGR vs benchmark across time periods.
|
| 129 |
+
cagr_data = {
|
| 130 |
+
'1Y': {'fund': 12.5, 'benchmark': 14.6, 'category': 13.4},
|
| 131 |
+
'3Y': {...}, '5Y': {...}, '10Y': {...}
|
| 132 |
+
}
|
| 133 |
+
"""
|
| 134 |
+
periods = list(cagr_data.keys())
|
| 135 |
+
fund_vals = [cagr_data[p].get('fund') or 0 for p in periods]
|
| 136 |
+
bm_vals = [cagr_data[p].get('benchmark') or 0 for p in periods]
|
| 137 |
+
cat_vals = [cagr_data[p].get('category') or 0 for p in periods]
|
| 138 |
+
|
| 139 |
+
x = np.arange(len(periods))
|
| 140 |
+
width = 0.25
|
| 141 |
+
|
| 142 |
+
fig, ax = plt.subplots(figsize=(5, 3.5))
|
| 143 |
+
b1 = ax.bar(x - width, fund_vals, width, label='Fund', color=BRAND_ACCENT, zorder=2)
|
| 144 |
+
b2 = ax.bar(x, bm_vals, width, label='Benchmark', color='#E67E22', zorder=2)
|
| 145 |
+
b3 = ax.bar(x + width, cat_vals, width, label='Category', color='#BDC3C7', zorder=2)
|
| 146 |
+
|
| 147 |
+
def label_bars(bars):
|
| 148 |
+
for bar in bars:
|
| 149 |
+
h = bar.get_height()
|
| 150 |
+
if h:
|
| 151 |
+
ax.text(bar.get_x() + bar.get_width() / 2, h + 0.2,
|
| 152 |
+
f'{h:.1f}', ha='center', va='bottom', fontsize=6.5)
|
| 153 |
+
|
| 154 |
+
label_bars(b1); label_bars(b2); label_bars(b3)
|
| 155 |
+
|
| 156 |
+
ax.set_xticks(x)
|
| 157 |
+
ax.set_xticklabels(periods, fontsize=9)
|
| 158 |
+
ax.set_ylabel("CAGR (%)", fontsize=8, color='gray')
|
| 159 |
+
ax.set_title(f"{fund_name[:30]}\nReturns vs Benchmark", fontsize=9, fontweight='bold', color=BRAND_BLUE)
|
| 160 |
+
ax.legend(fontsize=7, frameon=False)
|
| 161 |
+
ax.spines[['top', 'right']].set_visible(False)
|
| 162 |
+
ax.yaxis.grid(True, linestyle='--', alpha=0.5, zorder=0)
|
| 163 |
+
ax.set_axisbelow(True)
|
| 164 |
+
fig.tight_layout()
|
| 165 |
+
return _buf(fig)
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
def quartile_analysis_grid(holdings_data: list) -> io.BytesIO:
|
| 169 |
+
"""
|
| 170 |
+
Quartile Analysis Grid β based on the senior's handwritten sketch.
|
| 171 |
+
|
| 172 |
+
Layout (matching sketch exactly):
|
| 173 |
+
Columns : 1Y | 3Y | 5Y | 10Y
|
| 174 |
+
For each holding, show 3 rows:
|
| 175 |
+
BM : Benchmark CAGR value for each period
|
| 176 |
+
Cat : Category Average CAGR for each period
|
| 177 |
+
Scheme: Fund CAGR + Quartile (Q1/Q2/Q3/Q4) β color-coded
|
| 178 |
+
|
| 179 |
+
holdings_data: list of dicts, each with keys:
|
| 180 |
+
scheme_name, rank_in_category, total_in_category,
|
| 181 |
+
cagr_1y/_bm/_cat, cagr_3y/_bm/_cat, cagr_5y/_bm/_cat, cagr_10y/_bm/_cat
|
| 182 |
+
"""
|
| 183 |
+
PERIODS = ["1Y", "3Y", "5Y", "10Y"]
|
| 184 |
+
PERIOD_KEYS = ["1y", "3y", "5y", "10y"]
|
| 185 |
+
ROW_LABELS = ["BM", "Cat", "Scheme"]
|
| 186 |
+
|
| 187 |
+
Q_COLORS = {1: "#90EE90", 2: "#BDD7EE", 3: "#FFD580", 4: "#FFB3B3"}
|
| 188 |
+
HEADER_CLR = "#1F3864"
|
| 189 |
+
BM_CLR = "#D6E4F0"
|
| 190 |
+
CAT_CLR = "#EBF5FB"
|
| 191 |
+
|
| 192 |
+
def get_quartile(rank, total):
|
| 193 |
+
if not rank or not total or total == 0:
|
| 194 |
+
return 4
|
| 195 |
+
pct = rank / total
|
| 196 |
+
if pct <= 0.25: return 1
|
| 197 |
+
if pct <= 0.50: return 2
|
| 198 |
+
if pct <= 0.75: return 3
|
| 199 |
+
return 4
|
| 200 |
+
|
| 201 |
+
def fmt(v):
|
| 202 |
+
if v is None: return "β"
|
| 203 |
+
try: return f"{float(v):.1f}%"
|
| 204 |
+
except: return "β"
|
| 205 |
+
|
| 206 |
+
n_holdings = len(holdings_data)
|
| 207 |
+
rows_per = 3 # BM, Cat, Scheme
|
| 208 |
+
n_rows = n_holdings * rows_per + 1 # +1 for header row
|
| 209 |
+
n_cols = 5 # Label + 4 periods
|
| 210 |
+
|
| 211 |
+
fig_h = max(4.5, 0.5 * n_rows + 1.5)
|
| 212 |
+
fig, ax = plt.subplots(figsize=(10, fig_h))
|
| 213 |
+
ax.set_xlim(0, n_cols)
|
| 214 |
+
ax.set_ylim(0, n_rows)
|
| 215 |
+
ax.axis('off')
|
| 216 |
+
|
| 217 |
+
def cell(row, col, text, bg, tc="#1F3864", bold=False, fs=8):
|
| 218 |
+
ax.add_patch(plt.Rectangle(
|
| 219 |
+
(col, n_rows - row - 1), 1, 1,
|
| 220 |
+
facecolor=bg, edgecolor="#AAAAAA", linewidth=0.5, zorder=1))
|
| 221 |
+
ax.text(col + 0.5, n_rows - row - 0.5, text,
|
| 222 |
+
ha='center', va='center', fontsize=fs,
|
| 223 |
+
fontweight='bold' if bold else 'normal',
|
| 224 |
+
color=tc, zorder=2, wrap=True)
|
| 225 |
+
|
| 226 |
+
# Column header row
|
| 227 |
+
col_widths = [1.5, 1, 1, 1, 0.8] # proportional, but we draw on a 5-unit grid
|
| 228 |
+
cell(0, 0, "Scheme / Row", HEADER_CLR, "white", bold=True, fs=7.5)
|
| 229 |
+
for ci, p in enumerate(PERIODS, 1):
|
| 230 |
+
cell(0, ci, p, HEADER_CLR, "white", bold=True, fs=10)
|
| 231 |
+
|
| 232 |
+
# Data rows
|
| 233 |
+
cur = 1
|
| 234 |
+
for h in holdings_data:
|
| 235 |
+
rank = h.get("rank_in_category")
|
| 236 |
+
total = h.get("total_in_category")
|
| 237 |
+
q = get_quartile(rank, total)
|
| 238 |
+
qc = Q_COLORS[q]
|
| 239 |
+
q_lbl = f"Q{q}"
|
| 240 |
+
name = str(h.get("scheme_name", ""))[:22]
|
| 241 |
+
|
| 242 |
+
for ri, rl in enumerate(ROW_LABELS):
|
| 243 |
+
if ri == 0:
|
| 244 |
+
lbl = f"{name}\n[BM]"
|
| 245 |
+
bg = BM_CLR
|
| 246 |
+
elif ri == 1:
|
| 247 |
+
lbl = "[Category]"
|
| 248 |
+
bg = CAT_CLR
|
| 249 |
+
else:
|
| 250 |
+
lbl = f"[Scheme β {q_lbl}]"
|
| 251 |
+
bg = qc
|
| 252 |
+
|
| 253 |
+
cell(cur + ri, 0, lbl, bg, bold=(ri == 2), fs=6.5)
|
| 254 |
+
|
| 255 |
+
for ci, pk in enumerate(PERIOD_KEYS, 1):
|
| 256 |
+
if ri == 0:
|
| 257 |
+
v = fmt(h.get(f"cagr_{pk}_bm"))
|
| 258 |
+
bg_c = BM_CLR
|
| 259 |
+
elif ri == 1:
|
| 260 |
+
v = fmt(h.get(f"cagr_{pk}_cat"))
|
| 261 |
+
bg_c = CAT_CLR
|
| 262 |
+
else:
|
| 263 |
+
fv = h.get(f"cagr_{pk}")
|
| 264 |
+
bmv = h.get(f"cagr_{pk}_bm")
|
| 265 |
+
v = fmt(fv)
|
| 266 |
+
bg_c = qc
|
| 267 |
+
# Green tick if fund beats benchmark this period
|
| 268 |
+
if fv is not None and bmv is not None and float(fv) >= float(bmv):
|
| 269 |
+
ax.text(ci + 0.88, n_rows - (cur + ri) - 0.18,
|
| 270 |
+
"β", fontsize=8, color="#006400", va='center', zorder=3)
|
| 271 |
+
|
| 272 |
+
cell(cur + ri, ci, v, bg_c, bold=(ri == 2), fs=8)
|
| 273 |
+
|
| 274 |
+
# Divider between schemes
|
| 275 |
+
y = n_rows - (cur + rows_per) - 0.02
|
| 276 |
+
ax.axhline(y=y, xmin=0, xmax=1, color="#555555", linewidth=1.0, zorder=4)
|
| 277 |
+
cur += rows_per
|
| 278 |
+
|
| 279 |
+
# Legend
|
| 280 |
+
patches = [mpatches.Patch(facecolor=Q_COLORS[i], edgecolor='#AAAAAA',
|
| 281 |
+
label=f"Q{i} β {['Top Quartile','Above Avg','Below Avg','Bottom Quartile'][i-1]}")
|
| 282 |
+
for i in range(1, 5)]
|
| 283 |
+
ax.legend(handles=patches, loc='lower center',
|
| 284 |
+
bbox_to_anchor=(0.5, -0.09), ncol=4, fontsize=7.5, frameon=False)
|
| 285 |
+
|
| 286 |
+
ax.set_title("Quartile Analysis β Scheme vs Benchmark & Category Average",
|
| 287 |
+
fontsize=10, fontweight='bold', color=HEADER_CLR, pad=10)
|
| 288 |
+
fig.tight_layout()
|
| 289 |
+
return _buf(fig)
|
| 290 |
+
|
| 291 |
+
|
| 292 |
+
def wealth_projection_chart(projection: Dict[int, float], current_value: float) -> io.BytesIO:
|
| 293 |
+
"""Line chart showing projected wealth growth at 12% over years."""
|
| 294 |
+
years = [0] + list(projection.keys())
|
| 295 |
+
values = [current_value] + list(projection.values())
|
| 296 |
+
|
| 297 |
+
fig, ax = plt.subplots(figsize=(5, 3))
|
| 298 |
+
ax.plot(years, values, marker='o', color=BRAND_ACCENT, linewidth=2, markersize=6)
|
| 299 |
+
|
| 300 |
+
for yr, val in zip(years, values):
|
| 301 |
+
ax.annotate(f'βΉ{val/1e5:.1f}L', (yr, val),
|
| 302 |
+
textcoords="offset points", xytext=(0, 8),
|
| 303 |
+
ha='center', fontsize=7.5, color=BRAND_BLUE)
|
| 304 |
+
|
| 305 |
+
ax.fill_between(years, values, alpha=0.15, color=BRAND_ACCENT)
|
| 306 |
+
ax.set_xticks(years)
|
| 307 |
+
ax.set_xticklabels([f'Now' if y == 0 else f'{y}Y' for y in years], fontsize=8)
|
| 308 |
+
ax.set_ylabel("Portfolio Value (βΉ)", fontsize=8, color='gray')
|
| 309 |
+
ax.set_title("Wealth Projection @ 12% p.a.", fontsize=10, fontweight='bold', color=BRAND_BLUE)
|
| 310 |
+
ax.spines[['top', 'right']].set_visible(False)
|
| 311 |
+
ax.yaxis.grid(True, linestyle='--', alpha=0.4)
|
| 312 |
+
ax.set_axisbelow(True)
|
| 313 |
+
fig.tight_layout()
|
| 314 |
+
return _buf(fig)
|
src/csv_enrichment.py
ADDED
|
@@ -0,0 +1,941 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""CSV Enrichment β missing-cell filler for mutual fund statistics.
|
| 2 |
+
|
| 3 |
+
Fill pipeline (in order):
|
| 4 |
+
0. SCHEME CODE RESOLUTION β fuzzy-match missing scheme codes via mfapi
|
| 5 |
+
1. TRIAGE β classify every missing cell
|
| 6 |
+
2. NAV ENGINE β compute trailing-3Y metrics from NAV history
|
| 7 |
+
3. WEB SEARCH (Firecrawl) β scrape trusted sites for remaining gaps
|
| 8 |
+
4. CATEGORY MEDIAN β last-resort statistical imputation
|
| 9 |
+
|
| 10 |
+
Fixes vs original:
|
| 11 |
+
β’ Phase-label typo in log (Phase 4 β Phase 5 for imputation step)
|
| 12 |
+
β’ Unknown launch date β is_young = False (attempt search, don't silently impute)
|
| 13 |
+
β’ _normalize_fund_name uses re.sub to handle multi-hyphen sequences
|
| 14 |
+
β’ scheme code resolution runs BEFORE triage so NAV engine fires for more funds
|
| 15 |
+
β’ Standard Deviation now included in NAV-computable metrics
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
from __future__ import annotations
|
| 19 |
+
|
| 20 |
+
import csv
|
| 21 |
+
import os
|
| 22 |
+
import re
|
| 23 |
+
from dataclasses import dataclass, field
|
| 24 |
+
from datetime import datetime
|
| 25 |
+
from pathlib import Path
|
| 26 |
+
from statistics import median
|
| 27 |
+
from typing import Any
|
| 28 |
+
|
| 29 |
+
import requests
|
| 30 |
+
|
| 31 |
+
from src.nav_metrics_engine import NavEngineCache, compute_nav_metrics_for_scheme
|
| 32 |
+
from src.scheme_resolver import (
|
| 33 |
+
resolve_missing_scheme_codes,
|
| 34 |
+
resolve_scheme_code_for_fund_name,
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
# Fund names attempted by NAV engine this session
|
| 38 |
+
_NAV_ATTEMPTED_FUNDS: set[str] = set()
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
# ββ Constants ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 42 |
+
|
| 43 |
+
MISSING_TOKENS = {"", "-", "na", "n/a", "n/a*", "nan", "none", "null", "β"}
|
| 44 |
+
|
| 45 |
+
TARGET_COLUMNS = (
|
| 46 |
+
"Alpha",
|
| 47 |
+
"Beta",
|
| 48 |
+
"Standard Deviation",
|
| 49 |
+
"Sharpe Ratio",
|
| 50 |
+
"Volatility",
|
| 51 |
+
"Mean",
|
| 52 |
+
"Sortino Ratio",
|
| 53 |
+
"Up Market Capture\nRatio",
|
| 54 |
+
"Down Market Capture\nRatio",
|
| 55 |
+
"Maximum Drawdown",
|
| 56 |
+
"R-Squared",
|
| 57 |
+
"Information Ratio",
|
| 58 |
+
"P/E Ratio",
|
| 59 |
+
"P/B Ratio",
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
# For all of these risk/ratio metrics, a literal numeric 0 is usually a
|
| 63 |
+
# data-quality gap rather than a meaningful "zero risk" value. We therefore
|
| 64 |
+
# treat 0 as missing so that enrichment (NAV engine + web search) can attempt
|
| 65 |
+
# to backfill real numbers.
|
| 66 |
+
ZERO_AS_MISSING_COLUMNS = set(TARGET_COLUMNS)
|
| 67 |
+
|
| 68 |
+
# ALL metrics that are equity-specific and should NOT be attempted
|
| 69 |
+
# via NAV engine or web search for debt/liquid/overnight funds.
|
| 70 |
+
# Sharpe, Sortino, Volatility etc. ARE computed by NAV engine for equity
|
| 71 |
+
# but for debt funds they either don't exist or are meaningless.
|
| 72 |
+
EQUITY_ONLY_METRICS = {
|
| 73 |
+
"Alpha",
|
| 74 |
+
"Beta",
|
| 75 |
+
"Standard Deviation",
|
| 76 |
+
"Sharpe Ratio",
|
| 77 |
+
"Volatility",
|
| 78 |
+
"Mean",
|
| 79 |
+
"Sortino Ratio",
|
| 80 |
+
"Up Market Capture\nRatio",
|
| 81 |
+
"Down Market Capture\nRatio",
|
| 82 |
+
"Maximum Drawdown",
|
| 83 |
+
"R-Squared",
|
| 84 |
+
"Information Ratio",
|
| 85 |
+
"P/E Ratio",
|
| 86 |
+
"P/B Ratio",
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
DEBT_CATEGORIES_PREFIXES = (
|
| 90 |
+
"Debt:", "Liquid", "Overnight", "Money Market", "Gilt",
|
| 91 |
+
"Fixed Maturity", "Interval Fund", "FMP",
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
MIN_YEARS_FOR_RISK_METRICS = 3
|
| 95 |
+
|
| 96 |
+
TRUSTED_DOMAINS = (
|
| 97 |
+
"valueresearchonline.com",
|
| 98 |
+
"morningstar.in",
|
| 99 |
+
"moneycontrol.com",
|
| 100 |
+
"advisorkhoj.com",
|
| 101 |
+
"amfiindia.com",
|
| 102 |
+
"tickertape.in",
|
| 103 |
+
)
|
| 104 |
+
|
| 105 |
+
METRIC_ALIASES: dict[str, list[str]] = {
|
| 106 |
+
"Alpha": ["alpha"],
|
| 107 |
+
"Beta": ["beta"],
|
| 108 |
+
"Standard Deviation": ["standard deviation", "std dev", "std. dev"],
|
| 109 |
+
"Sharpe Ratio": ["sharpe ratio", "sharpe"],
|
| 110 |
+
"Volatility": ["volatility"],
|
| 111 |
+
"Mean": ["mean", "mean return"],
|
| 112 |
+
"Sortino Ratio": ["sortino ratio", "sortino"],
|
| 113 |
+
"Up Market Capture\nRatio": ["upside", "up market capture", "upside capture", "up capture"],
|
| 114 |
+
"Down Market Capture\nRatio": ["downside", "down market capture", "downside capture", "down capture"],
|
| 115 |
+
"Maximum Drawdown": ["maximum drawdown", "max drawdown", "maximum"],
|
| 116 |
+
"R-Squared": ["r-squared", "r squared", "r2", "rΒ²"],
|
| 117 |
+
"Information Ratio": ["information ratio", "info ratio"],
|
| 118 |
+
"P/E Ratio": ["p/e ratio", "p/e", "pe ratio", "pe"],
|
| 119 |
+
"P/B Ratio": ["p/b ratio", "p/b", "pb ratio", "pb"],
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
# ββ Config & Result ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 124 |
+
|
| 125 |
+
@dataclass
|
| 126 |
+
class EnrichmentConfig:
|
| 127 |
+
enabled: bool = True
|
| 128 |
+
max_cells: int | None = None
|
| 129 |
+
min_confidence: float = 0.65
|
| 130 |
+
search_limit: int = 5
|
| 131 |
+
impute_unresolved: bool = True
|
| 132 |
+
filter_category: str | None = None
|
| 133 |
+
target_columns: tuple[str, ...] = TARGET_COLUMNS
|
| 134 |
+
trusted_domains: tuple[str, ...] = TRUSTED_DOMAINS
|
| 135 |
+
enable_nav_engine: bool = True
|
| 136 |
+
resolve_scheme_codes: bool = True # run pre-triage code resolution
|
| 137 |
+
web_search_pe_pb_only: bool = False # limit web search to P/E and P/B only
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
@dataclass
|
| 141 |
+
class EnrichmentResult:
|
| 142 |
+
input_csv_path: str
|
| 143 |
+
enriched_csv_path: str
|
| 144 |
+
scratchpad_path: str | None = None
|
| 145 |
+
examined_cells: int = 0
|
| 146 |
+
updated_cells: int = 0
|
| 147 |
+
imputed_cells: int = 0
|
| 148 |
+
skipped_cells: int = 0
|
| 149 |
+
resolved_codes: int = 0 # NEW: how many scheme codes were resolved
|
| 150 |
+
# Optional breakdowns used by older callers / UIs
|
| 151 |
+
nav_cells: int = 0 # cells filled via NAV engine
|
| 152 |
+
web_cells: int = 0 # cells filled via web search
|
| 153 |
+
errors: list[str] = field(default_factory=list)
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
# ββ Triage labels ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 157 |
+
|
| 158 |
+
TRIAGE_YOUNG = "YOUNG_FUND"
|
| 159 |
+
TRIAGE_NOT_APPLICABLE = "NOT_APPLICABLE"
|
| 160 |
+
TRIAGE_SEARCHABLE = "SEARCHABLE"
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
# ββ Helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 164 |
+
|
| 165 |
+
def _load_env() -> None:
|
| 166 |
+
env_path = Path(__file__).resolve().parent.parent / ".env"
|
| 167 |
+
if not env_path.exists():
|
| 168 |
+
return
|
| 169 |
+
for line in env_path.read_text(encoding="utf-8").splitlines():
|
| 170 |
+
raw = line.strip()
|
| 171 |
+
if not raw or raw.startswith("#") or "=" not in raw:
|
| 172 |
+
continue
|
| 173 |
+
k, v = raw.split("=", 1)
|
| 174 |
+
os.environ.setdefault(k.strip(), v.strip())
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
def _is_missing(val: str | None) -> bool:
|
| 178 |
+
return (val or "").strip().lower() in MISSING_TOKENS
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
def _parse_launch_date(val: str | None) -> datetime | None:
|
| 182 |
+
if not val:
|
| 183 |
+
return None
|
| 184 |
+
for fmt in ("%d-%m-%Y", "%Y-%m-%d", "%d/%m/%Y"):
|
| 185 |
+
try:
|
| 186 |
+
return datetime.strptime(val.strip(), fmt)
|
| 187 |
+
except ValueError:
|
| 188 |
+
continue
|
| 189 |
+
return None
|
| 190 |
+
|
| 191 |
+
|
| 192 |
+
def _is_debt_category(category: str) -> bool:
|
| 193 |
+
cat = (category or "").strip().lower()
|
| 194 |
+
for prefix in DEBT_CATEGORIES_PREFIXES:
|
| 195 |
+
if cat.startswith(prefix.lower()):
|
| 196 |
+
return True
|
| 197 |
+
return False
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
def _normalize_fund_name(name: str) -> str:
|
| 201 |
+
# FIX: use re.sub so multi-hyphen runs collapse to a single space
|
| 202 |
+
return re.sub(r"-+", " ", name).strip()
|
| 203 |
+
|
| 204 |
+
|
| 205 |
+
def _build_category_medians(
|
| 206 |
+
rows: list[dict[str, str]], columns: tuple[str, ...]
|
| 207 |
+
) -> dict[str, dict[str, float]]:
|
| 208 |
+
"""Returns {category: {column: median_value}}."""
|
| 209 |
+
buckets: dict[str, dict[str, list[float]]] = {}
|
| 210 |
+
for row in rows:
|
| 211 |
+
cat = row.get("Category", "")
|
| 212 |
+
if not cat:
|
| 213 |
+
continue
|
| 214 |
+
if cat not in buckets:
|
| 215 |
+
buckets[cat] = {c: [] for c in columns}
|
| 216 |
+
for col in columns:
|
| 217 |
+
raw = (row.get(col) or "").strip().replace("%", "").replace(",", "")
|
| 218 |
+
if raw.lower() in MISSING_TOKENS:
|
| 219 |
+
continue
|
| 220 |
+
try:
|
| 221 |
+
buckets[cat][col].append(float(raw))
|
| 222 |
+
except ValueError:
|
| 223 |
+
pass
|
| 224 |
+
|
| 225 |
+
result: dict[str, dict[str, float]] = {}
|
| 226 |
+
for cat, col_map in buckets.items():
|
| 227 |
+
result[cat] = {}
|
| 228 |
+
for col, vals in col_map.items():
|
| 229 |
+
if vals:
|
| 230 |
+
result[cat][col] = round(median(vals), 4)
|
| 231 |
+
return result
|
| 232 |
+
|
| 233 |
+
|
| 234 |
+
# ββ Triage βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 235 |
+
|
| 236 |
+
@dataclass
|
| 237 |
+
class TriagedCell:
|
| 238 |
+
row_idx: int
|
| 239 |
+
fund_name: str
|
| 240 |
+
category: str
|
| 241 |
+
column: str
|
| 242 |
+
current_value: str
|
| 243 |
+
label: str
|
| 244 |
+
reason: str
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
def _triage_missing_cells(
|
| 248 |
+
rows: list[dict[str, str]],
|
| 249 |
+
config: EnrichmentConfig,
|
| 250 |
+
) -> list[TriagedCell]:
|
| 251 |
+
"""Classify every missing cell with reasoning."""
|
| 252 |
+
today = datetime.now()
|
| 253 |
+
cells: list[TriagedCell] = []
|
| 254 |
+
|
| 255 |
+
for idx, row in enumerate(rows):
|
| 256 |
+
fund = row.get("Fund", "")
|
| 257 |
+
cat = row.get("Category", "")
|
| 258 |
+
launch_str = row.get("Launch Date", "")
|
| 259 |
+
|
| 260 |
+
if config.filter_category and cat != config.filter_category:
|
| 261 |
+
continue
|
| 262 |
+
|
| 263 |
+
launch_dt = _parse_launch_date(launch_str)
|
| 264 |
+
|
| 265 |
+
if launch_dt is not None:
|
| 266 |
+
age_years = (today - launch_dt).days / 365.25
|
| 267 |
+
is_young = age_years < MIN_YEARS_FOR_RISK_METRICS
|
| 268 |
+
else:
|
| 269 |
+
# FIX: unknown date β do NOT silently mark as young; attempt search
|
| 270 |
+
is_young = False
|
| 271 |
+
|
| 272 |
+
is_debt = _is_debt_category(cat)
|
| 273 |
+
|
| 274 |
+
for col in config.target_columns:
|
| 275 |
+
raw = (row.get(col) or "").strip()
|
| 276 |
+
|
| 277 |
+
# Base missing check (blank, "-", "N/A", etc.)
|
| 278 |
+
is_missing_val = _is_missing(raw)
|
| 279 |
+
|
| 280 |
+
# Additionally, for all ZERO_AS_MISSING_COLUMNS, treat an exact
|
| 281 |
+
# numeric 0 as "missing" so enrichment will try to fill it.
|
| 282 |
+
if not is_missing_val and col in ZERO_AS_MISSING_COLUMNS:
|
| 283 |
+
norm = raw.replace("%", "").replace(",", "").strip()
|
| 284 |
+
try:
|
| 285 |
+
if float(norm) == 0.0:
|
| 286 |
+
is_missing_val = True
|
| 287 |
+
except ValueError:
|
| 288 |
+
pass
|
| 289 |
+
|
| 290 |
+
if not is_missing_val:
|
| 291 |
+
continue
|
| 292 |
+
|
| 293 |
+
if is_young:
|
| 294 |
+
cells.append(TriagedCell(
|
| 295 |
+
row_idx=idx, fund_name=fund, category=cat, column=col,
|
| 296 |
+
current_value=raw, label=TRIAGE_YOUNG,
|
| 297 |
+
reason=(f"Fund launched {launch_str or '(unknown)'}, "
|
| 298 |
+
f"<{MIN_YEARS_FOR_RISK_METRICS}yr history β metric not computed yet"),
|
| 299 |
+
))
|
| 300 |
+
else:
|
| 301 |
+
cells.append(TriagedCell(
|
| 302 |
+
row_idx=idx, fund_name=fund, category=cat, column=col,
|
| 303 |
+
current_value=raw, label=TRIAGE_SEARCHABLE,
|
| 304 |
+
reason=(f"Fund launched {launch_str or '(unknown date)'}, "
|
| 305 |
+
f"category '{cat}' β metric should exist, attempting NAV/web"),
|
| 306 |
+
))
|
| 307 |
+
|
| 308 |
+
return cells
|
| 309 |
+
|
| 310 |
+
|
| 311 |
+
# ββ Markdown table parser ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 312 |
+
|
| 313 |
+
def _extract_number(text: str) -> float | None:
|
| 314 |
+
text = text.strip().replace(",", "")
|
| 315 |
+
if text.lower() in MISSING_TOKENS or text == "β":
|
| 316 |
+
return None
|
| 317 |
+
match = re.search(r"-?\d+\.?\d*", text)
|
| 318 |
+
if match:
|
| 319 |
+
try:
|
| 320 |
+
return float(match.group())
|
| 321 |
+
except ValueError:
|
| 322 |
+
return None
|
| 323 |
+
return None
|
| 324 |
+
|
| 325 |
+
|
| 326 |
+
def _parse_table_row(markdown: str, alias: str) -> float | None:
|
| 327 |
+
"""Extract the first numeric cell after the label in a markdown table row.
|
| 328 |
+
|
| 329 |
+
For a row like: | Alpha | 1.59 | -0.56 | 8.25 |
|
| 330 |
+
Returns 1.59 β the fund's own column (leftmost numeric value after the label).
|
| 331 |
+
This is intentional: sites like Morningstar show Fund | Category | Index
|
| 332 |
+
and we want the fund value, not the category or index value.
|
| 333 |
+
"""
|
| 334 |
+
pattern = re.compile(
|
| 335 |
+
r"\|\s*" + re.escape(alias) + r"\s*\|(.+?)(?:\n|$)",
|
| 336 |
+
re.IGNORECASE,
|
| 337 |
+
)
|
| 338 |
+
for m in pattern.finditer(markdown):
|
| 339 |
+
rest = m.group(1)
|
| 340 |
+
cells = [c.strip() for c in rest.split("|")]
|
| 341 |
+
for cell in cells:
|
| 342 |
+
val = _extract_number(cell)
|
| 343 |
+
if val is not None:
|
| 344 |
+
return val
|
| 345 |
+
return None
|
| 346 |
+
|
| 347 |
+
|
| 348 |
+
def _parse_metrics_from_markdown(
|
| 349 |
+
markdown: str, wanted_metrics: list[str]
|
| 350 |
+
) -> dict[str, float | None]:
|
| 351 |
+
found: dict[str, float | None] = {}
|
| 352 |
+
for metric in wanted_metrics:
|
| 353 |
+
aliases = METRIC_ALIASES.get(metric, [metric.lower()])
|
| 354 |
+
best_val: float | None = None
|
| 355 |
+
for alias in aliases:
|
| 356 |
+
if alias.lower() not in markdown.lower():
|
| 357 |
+
continue
|
| 358 |
+
val = _parse_table_row(markdown, alias)
|
| 359 |
+
if val is not None:
|
| 360 |
+
best_val = val
|
| 361 |
+
break
|
| 362 |
+
found[metric] = best_val
|
| 363 |
+
return found
|
| 364 |
+
|
| 365 |
+
|
| 366 |
+
# ββ Web search (Firecrawl) βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 367 |
+
|
| 368 |
+
def _call_tavily_search(query: str, api_key: str, limit: int = 5) -> list[dict]:
|
| 369 |
+
"""Search using Tavily API. Returns list of dicts with 'url' and 'markdown' keys."""
|
| 370 |
+
try:
|
| 371 |
+
resp = requests.post(
|
| 372 |
+
"https://api.tavily.com/search",
|
| 373 |
+
headers={"Content-Type": "application/json"},
|
| 374 |
+
json={
|
| 375 |
+
"api_key": api_key,
|
| 376 |
+
"query": query,
|
| 377 |
+
"max_results": limit,
|
| 378 |
+
"include_raw_content": True,
|
| 379 |
+
"search_depth": "advanced",
|
| 380 |
+
},
|
| 381 |
+
timeout=30,
|
| 382 |
+
)
|
| 383 |
+
resp.raise_for_status()
|
| 384 |
+
data = resp.json()
|
| 385 |
+
results = []
|
| 386 |
+
for r in data.get("results", []):
|
| 387 |
+
results.append({
|
| 388 |
+
"url": r.get("url", ""),
|
| 389 |
+
"markdown": r.get("raw_content") or r.get("content", ""),
|
| 390 |
+
})
|
| 391 |
+
return results
|
| 392 |
+
except Exception as exc:
|
| 393 |
+
print(f" [tavily] search error: {exc}")
|
| 394 |
+
return []
|
| 395 |
+
|
| 396 |
+
|
| 397 |
+
# Keep firecrawl as alias name so _search_fund_metrics calls work unchanged
|
| 398 |
+
_call_firecrawl_search = _call_tavily_search
|
| 399 |
+
|
| 400 |
+
|
| 401 |
+
def _scrape_url(url: str, api_key: str) -> str:
|
| 402 |
+
"""Fetch page content using Tavily extract API."""
|
| 403 |
+
try:
|
| 404 |
+
resp = requests.post(
|
| 405 |
+
"https://api.tavily.com/extract",
|
| 406 |
+
headers={"Content-Type": "application/json"},
|
| 407 |
+
json={"api_key": api_key, "urls": [url]},
|
| 408 |
+
timeout=30,
|
| 409 |
+
)
|
| 410 |
+
resp.raise_for_status()
|
| 411 |
+
data = resp.json()
|
| 412 |
+
results = data.get("results", [])
|
| 413 |
+
if results:
|
| 414 |
+
return results[0].get("raw_content", "")
|
| 415 |
+
except Exception as exc:
|
| 416 |
+
print(f" [tavily extract] error for {url}: {exc}")
|
| 417 |
+
return ""
|
| 418 |
+
|
| 419 |
+
|
| 420 |
+
def _derive_morningstar_risk_url(any_ms_url: str) -> str | None:
|
| 421 |
+
if "morningstar.in/mutualfunds/" not in any_ms_url:
|
| 422 |
+
return None
|
| 423 |
+
for suffix in ("fund-factsheet.aspx", "overview.aspx", "portfolio.aspx",
|
| 424 |
+
"performance.aspx", "detailed-portfolio.aspx"):
|
| 425 |
+
if suffix in any_ms_url:
|
| 426 |
+
return any_ms_url.replace(suffix, "risk-ratings.aspx")
|
| 427 |
+
if "risk-ratings.aspx" in any_ms_url:
|
| 428 |
+
return any_ms_url
|
| 429 |
+
return None
|
| 430 |
+
|
| 431 |
+
|
| 432 |
+
def _derive_morningstar_portfolio_url(any_ms_url: str) -> str | None:
|
| 433 |
+
"""Derive the Morningstar portfolio page (for P/E and P/B)."""
|
| 434 |
+
if "morningstar.in/mutualfunds/" not in any_ms_url:
|
| 435 |
+
return None
|
| 436 |
+
return re.sub(
|
| 437 |
+
r"(fund-factsheet|overview|risk-ratings|performance|detailed-portfolio)\.aspx",
|
| 438 |
+
"portfolio.aspx",
|
| 439 |
+
any_ms_url,
|
| 440 |
+
)
|
| 441 |
+
|
| 442 |
+
|
| 443 |
+
def _search_fund_metrics(
|
| 444 |
+
fund_name: str,
|
| 445 |
+
missing_metrics: list[str],
|
| 446 |
+
config: EnrichmentConfig,
|
| 447 |
+
firecrawl_key: str,
|
| 448 |
+
) -> tuple[dict[str, float | None], list[str]]:
|
| 449 |
+
from urllib.parse import urlparse
|
| 450 |
+
|
| 451 |
+
readable = _normalize_fund_name(fund_name)
|
| 452 |
+
query = f"{readable} risk rating alpha beta sharpe morningstar"
|
| 453 |
+
print(f" [search] query: {query[:80]}")
|
| 454 |
+
|
| 455 |
+
results = _call_firecrawl_search(query, firecrawl_key, config.search_limit)
|
| 456 |
+
if not results:
|
| 457 |
+
print(" [search] no results")
|
| 458 |
+
return {m: None for m in missing_metrics}, []
|
| 459 |
+
|
| 460 |
+
trusted, other = [], []
|
| 461 |
+
for r in results:
|
| 462 |
+
url = r.get("url", "")
|
| 463 |
+
domain = urlparse(url).netloc.lower().replace("www.", "")
|
| 464 |
+
(trusted if any(td in domain for td in config.trusted_domains) else other).append(r)
|
| 465 |
+
use = (trusted + other)[:3]
|
| 466 |
+
|
| 467 |
+
source_urls = [r.get("url", "") for r in use]
|
| 468 |
+
print(f" [search] using {len(use)} sources: {[urlparse(u).netloc for u in source_urls]}")
|
| 469 |
+
|
| 470 |
+
combined = ""
|
| 471 |
+
for r in use:
|
| 472 |
+
md = r.get("markdown", "")
|
| 473 |
+
if md:
|
| 474 |
+
combined += f"\n\n--- {r.get('url', '')} ---\n{md}"
|
| 475 |
+
|
| 476 |
+
# Morningstar: scrape risk-ratings page if not already in results
|
| 477 |
+
ms_risk_url = None
|
| 478 |
+
for r in use:
|
| 479 |
+
ms_risk_url = _derive_morningstar_risk_url(r.get("url", ""))
|
| 480 |
+
if ms_risk_url:
|
| 481 |
+
break
|
| 482 |
+
if ms_risk_url and "risk-ratings" not in " ".join(source_urls):
|
| 483 |
+
print(f" [scrape] Morningstar risk page: {ms_risk_url}")
|
| 484 |
+
risk_md = _scrape_url(ms_risk_url, firecrawl_key)
|
| 485 |
+
if risk_md:
|
| 486 |
+
combined += f"\n\n--- {ms_risk_url} ---\n{risk_md}"
|
| 487 |
+
source_urls.append(ms_risk_url)
|
| 488 |
+
|
| 489 |
+
# Morningstar: scrape portfolio page for P/E and P/B
|
| 490 |
+
pe_pb_needed = {"P/E Ratio", "P/B Ratio"} & set(missing_metrics)
|
| 491 |
+
if pe_pb_needed and ms_risk_url:
|
| 492 |
+
ms_port_url = _derive_morningstar_portfolio_url(ms_risk_url)
|
| 493 |
+
if ms_port_url and ms_port_url not in source_urls:
|
| 494 |
+
print(f" [scrape] Morningstar portfolio page: {ms_port_url}")
|
| 495 |
+
port_md = _scrape_url(ms_port_url, firecrawl_key)
|
| 496 |
+
if port_md:
|
| 497 |
+
combined += f"\n\n--- {ms_port_url} ---\n{port_md}"
|
| 498 |
+
source_urls.append(ms_port_url)
|
| 499 |
+
|
| 500 |
+
# If we still have no markdown content, or if later we still miss
|
| 501 |
+
# metrics, we'll do a second pass focused on ValueResearch.
|
| 502 |
+
if not combined.strip():
|
| 503 |
+
print(" [search] no markdown from initial sources; retrying via valueresearchonlineβ¦")
|
| 504 |
+
vr_query = f"{readable} {' '.join(missing_metrics)} valueresearchonline"
|
| 505 |
+
vr_results = _call_firecrawl_search(vr_query, firecrawl_key, config.search_limit)
|
| 506 |
+
if vr_results:
|
| 507 |
+
vr_combined = ""
|
| 508 |
+
for r in vr_results:
|
| 509 |
+
url = r.get("url", "")
|
| 510 |
+
domain = urlparse(url).netloc.lower().replace("www.", "")
|
| 511 |
+
if "valueresearchonline.com" not in domain:
|
| 512 |
+
continue
|
| 513 |
+
md = r.get("markdown", "")
|
| 514 |
+
if md:
|
| 515 |
+
vr_combined += f"\n\n--- {url} ---\n{md}"
|
| 516 |
+
source_urls.append(url)
|
| 517 |
+
combined = vr_combined
|
| 518 |
+
|
| 519 |
+
if not combined.strip():
|
| 520 |
+
print(" [search] no markdown content after ValueResearch retry")
|
| 521 |
+
return {m: None for m in missing_metrics}, source_urls
|
| 522 |
+
|
| 523 |
+
found = _parse_metrics_from_markdown(combined, missing_metrics)
|
| 524 |
+
for m, v in found.items():
|
| 525 |
+
print(f" [parsed] {m} = {v if v is not None else 'NOT FOUND'}")
|
| 526 |
+
|
| 527 |
+
return found, source_urls
|
| 528 |
+
|
| 529 |
+
|
| 530 |
+
# ββ Scratchpad βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 531 |
+
|
| 532 |
+
def _write_scratchpad(
|
| 533 |
+
path: Path,
|
| 534 |
+
triaged: list[TriagedCell],
|
| 535 |
+
resolved_codes: dict[str, str],
|
| 536 |
+
nav_results: dict[str, dict[str, float | None]],
|
| 537 |
+
web_results: dict[str, dict[str, float | None]],
|
| 538 |
+
web_sources: dict[str, list[str]],
|
| 539 |
+
medians_used: list[tuple[str, str, float]],
|
| 540 |
+
nav_filled: list[tuple[str, str, float]],
|
| 541 |
+
web_filled: list[tuple[str, str, float]],
|
| 542 |
+
) -> None:
|
| 543 |
+
lines = [
|
| 544 |
+
"=" * 70,
|
| 545 |
+
"ENRICHMENT SCRATCHPAD",
|
| 546 |
+
f"Generated: {datetime.now().isoformat()}",
|
| 547 |
+
"=" * 70, "",
|
| 548 |
+
]
|
| 549 |
+
|
| 550 |
+
if resolved_codes:
|
| 551 |
+
lines += ["-" * 70, f"SCHEME CODES RESOLVED ({len(resolved_codes)})", "-" * 70]
|
| 552 |
+
for fund, code in resolved_codes.items():
|
| 553 |
+
lines.append(f" {fund[:60]:60s} β {code}")
|
| 554 |
+
lines.append("")
|
| 555 |
+
|
| 556 |
+
young = [c for c in triaged if c.label == TRIAGE_YOUNG]
|
| 557 |
+
na = [c for c in triaged if c.label == TRIAGE_NOT_APPLICABLE]
|
| 558 |
+
searchable = [c for c in triaged if c.label == TRIAGE_SEARCHABLE]
|
| 559 |
+
|
| 560 |
+
lines += [
|
| 561 |
+
f"TOTAL MISSING CELLS: {len(triaged)}",
|
| 562 |
+
f" YOUNG_FUND (auto-impute): {len(young)}",
|
| 563 |
+
f" NOT_APPLICABLE (auto-impute): {len(na)}",
|
| 564 |
+
f" SEARCHABLE (nav/web): {len(searchable)}",
|
| 565 |
+
"",
|
| 566 |
+
"-" * 70, "TRIAGE DECISIONS", "-" * 70,
|
| 567 |
+
]
|
| 568 |
+
for c in triaged:
|
| 569 |
+
lines.append(f" [{c.label:16s}] {c.fund_name} :: {c.column}")
|
| 570 |
+
lines.append(f" Reason: {c.reason}")
|
| 571 |
+
lines.append("")
|
| 572 |
+
|
| 573 |
+
if nav_results:
|
| 574 |
+
lines += ["-" * 70, "NAV ENGINE RESULTS (TRAILING 3Y)", "-" * 70]
|
| 575 |
+
for fund_key, metrics in nav_results.items():
|
| 576 |
+
lines.append(f" Fund: {fund_key}")
|
| 577 |
+
for metric, val in metrics.items():
|
| 578 |
+
lines.append(f" {metric}: {'FOUND = ' + str(val) if val is not None else 'NOT_FOUND'}")
|
| 579 |
+
lines.append("")
|
| 580 |
+
|
| 581 |
+
if web_results:
|
| 582 |
+
lines += ["-" * 70, "WEB SEARCH RESULTS", "-" * 70]
|
| 583 |
+
for fund_key, metrics in web_results.items():
|
| 584 |
+
lines.append(f" Fund: {fund_key}")
|
| 585 |
+
for s in web_sources.get(fund_key, []):
|
| 586 |
+
lines.append(f" Source: {s}")
|
| 587 |
+
for metric, val in metrics.items():
|
| 588 |
+
lines.append(f" {metric}: {'FOUND = ' + str(val) if val is not None else 'NOT_FOUND'}")
|
| 589 |
+
lines.append("")
|
| 590 |
+
|
| 591 |
+
for section_label, items in [
|
| 592 |
+
(f"NAV-FILLED VALUES ({len(nav_filled)})", nav_filled),
|
| 593 |
+
(f"WEB-FILLED VALUES ({len(web_filled)})", web_filled),
|
| 594 |
+
(f"CATEGORY-MEDIAN IMPUTED ({len(medians_used)})", medians_used),
|
| 595 |
+
]:
|
| 596 |
+
if items:
|
| 597 |
+
lines += ["-" * 70, section_label, "-" * 70]
|
| 598 |
+
for fund, col, val in items:
|
| 599 |
+
lines.append(f" {fund} :: {col} = {val}")
|
| 600 |
+
lines.append("")
|
| 601 |
+
|
| 602 |
+
lines += ["=" * 70, "END OF SCRATCHPAD", "=" * 70]
|
| 603 |
+
path.write_text("\n".join(lines), encoding="utf-8")
|
| 604 |
+
|
| 605 |
+
|
| 606 |
+
# ββ Main entry point βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 607 |
+
|
| 608 |
+
def enrich_csv(
|
| 609 |
+
csv_path: str,
|
| 610 |
+
config: EnrichmentConfig | None = None,
|
| 611 |
+
) -> EnrichmentResult:
|
| 612 |
+
"""Parse CSV β resolve codes β triage β NAV engine β web fallback β median impute β write.
|
| 613 |
+
|
| 614 |
+
(Previously named enrich_csv_with_firecrawl_and_kimi; renamed for clarity.)
|
| 615 |
+
"""
|
| 616 |
+
if config is None:
|
| 617 |
+
config = EnrichmentConfig()
|
| 618 |
+
|
| 619 |
+
_load_env()
|
| 620 |
+
|
| 621 |
+
src = Path(csv_path)
|
| 622 |
+
result = EnrichmentResult(input_csv_path=csv_path, enriched_csv_path=csv_path)
|
| 623 |
+
|
| 624 |
+
if not config.enabled or not src.exists():
|
| 625 |
+
return result
|
| 626 |
+
|
| 627 |
+
with open(src, encoding="utf-8-sig", newline="") as f:
|
| 628 |
+
reader = csv.DictReader(f)
|
| 629 |
+
fieldnames = list(reader.fieldnames or [])
|
| 630 |
+
rows = list(reader)
|
| 631 |
+
|
| 632 |
+
if not rows:
|
| 633 |
+
return result
|
| 634 |
+
|
| 635 |
+
# ββ Phase 0: Scheme Code Resolution βββββββββββββββββββββββββββββββββ
|
| 636 |
+
resolved_codes: dict[str, str] = {}
|
| 637 |
+
if config.resolve_scheme_codes:
|
| 638 |
+
print("[enrichment] Phase 0: Resolving missing scheme codesβ¦")
|
| 639 |
+
rows, resolved_codes = resolve_missing_scheme_codes(rows, verbose=True)
|
| 640 |
+
result.resolved_codes = len(resolved_codes)
|
| 641 |
+
|
| 642 |
+
# ββ Phase 1: Triage ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 643 |
+
print("[enrichment] Phase 1: Triage β classifying missing cellsβ¦")
|
| 644 |
+
triaged = _triage_missing_cells(rows, config)
|
| 645 |
+
result.examined_cells = len(triaged)
|
| 646 |
+
|
| 647 |
+
if not triaged:
|
| 648 |
+
print("[enrichment] No missing cells found.")
|
| 649 |
+
_write_output(src, rows, fieldnames, result)
|
| 650 |
+
return result
|
| 651 |
+
|
| 652 |
+
searchable = [c for c in triaged if c.label == TRIAGE_SEARCHABLE]
|
| 653 |
+
imputable = [c for c in triaged if c.label != TRIAGE_SEARCHABLE]
|
| 654 |
+
print(f"[enrichment] {len(triaged)} missing cells: "
|
| 655 |
+
f"{len(searchable)} SEARCHABLE, {len(imputable)} auto-impute")
|
| 656 |
+
|
| 657 |
+
if config.max_cells is not None:
|
| 658 |
+
searchable = searchable[:config.max_cells]
|
| 659 |
+
|
| 660 |
+
# ββ Phase 2: Category medians ββββββββββββββββββββββββββββββββββββββββ
|
| 661 |
+
print("[enrichment] Phase 2: Computing category mediansβ¦")
|
| 662 |
+
cat_medians = _build_category_medians(rows, config.target_columns)
|
| 663 |
+
|
| 664 |
+
# ββ Phase 3: NAV engine ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 665 |
+
nav_results: dict[str, dict[str, float | None]] = {}
|
| 666 |
+
nav_filled: list[tuple[str, str, float]] = []
|
| 667 |
+
|
| 668 |
+
if searchable and config.enable_nav_engine:
|
| 669 |
+
print("[enrichment] Phase 3: NAV engine β computing trailing 3Y metricsβ¦")
|
| 670 |
+
nav_cache = NavEngineCache()
|
| 671 |
+
|
| 672 |
+
# All funds with missing cells go through NAV engine β including debt/liquid.
|
| 673 |
+
# Debt funds can have valid Sharpe, Mean, Volatility etc. from their NAV history.
|
| 674 |
+
searchable_for_nav = searchable
|
| 675 |
+
|
| 676 |
+
row_groups: dict[int, list[TriagedCell]] = {}
|
| 677 |
+
for cell in searchable_for_nav:
|
| 678 |
+
row_groups.setdefault(cell.row_idx, []).append(cell)
|
| 679 |
+
|
| 680 |
+
total_rows = len(row_groups)
|
| 681 |
+
processed_count = 0
|
| 682 |
+
nav_lock = __import__("threading").Lock()
|
| 683 |
+
|
| 684 |
+
NAV_WORKERS = 20 # mfapi is stateless REST β scales well beyond 12
|
| 685 |
+
|
| 686 |
+
# ββ Pre-warm: bulk load NAV + benchmarks before workers touch network ββ
|
| 687 |
+
# Step 1: Pull all valid scheme codes and unique benchmarks from rows
|
| 688 |
+
from src.nav_metrics_engine import _bulk_preload_cache, _prewarm_benchmarks
|
| 689 |
+
_scheme_codes = [
|
| 690 |
+
(rows[ri].get("Scheme Code") or "").strip()
|
| 691 |
+
for ri in row_groups
|
| 692 |
+
if (rows[ri].get("Scheme Code") or "").strip().isdigit()
|
| 693 |
+
]
|
| 694 |
+
_bench_tickers_raw = [
|
| 695 |
+
rows[ri].get("Benchmark Type", "") for ri in row_groups
|
| 696 |
+
]
|
| 697 |
+
# Step 2: Resolve benchmark type β ticker (same logic as nav engine)
|
| 698 |
+
from src.nav_metrics_engine import resolve_benchmark_ticker
|
| 699 |
+
_bench_tickers = list(dict.fromkeys(
|
| 700 |
+
resolve_benchmark_ticker(b) for b in _bench_tickers_raw if b
|
| 701 |
+
))
|
| 702 |
+
# Step 3: Bulk load from Neon in 1 SQL query (nav + bench keys)
|
| 703 |
+
_bulk_preload_cache(_scheme_codes, _bench_tickers)
|
| 704 |
+
# Step 4: Download any cold benchmark tickers in parallel NOW,
|
| 705 |
+
# before workers start β eliminates yfinance contention
|
| 706 |
+
_prewarm_benchmarks(_bench_tickers)
|
| 707 |
+
|
| 708 |
+
def _process_one_fund(args):
|
| 709 |
+
row_idx, cells = args
|
| 710 |
+
row = rows[row_idx]
|
| 711 |
+
fund_name = row.get("Fund", "")
|
| 712 |
+
scheme_code = (row.get("Scheme Code") or "").strip()
|
| 713 |
+
benchmark_type = row.get("Benchmark Type", "")
|
| 714 |
+
needed_metrics = [c.column for c in cells]
|
| 715 |
+
|
| 716 |
+
if not scheme_code:
|
| 717 |
+
return fund_name, {}, cells
|
| 718 |
+
|
| 719 |
+
metrics, skip = compute_nav_metrics_for_scheme(
|
| 720 |
+
scheme_code=scheme_code,
|
| 721 |
+
benchmark_type=benchmark_type,
|
| 722 |
+
needed_metrics=needed_metrics,
|
| 723 |
+
cache=nav_cache,
|
| 724 |
+
)
|
| 725 |
+
|
| 726 |
+
joined_reasons = " | ".join(skip.values()).lower()
|
| 727 |
+
should_refresh_code = (
|
| 728 |
+
"returned no nav history" in joined_reasons
|
| 729 |
+
or "nav history is stale" in joined_reasons
|
| 730 |
+
)
|
| 731 |
+
if should_refresh_code:
|
| 732 |
+
refreshed_code, _ = resolve_scheme_code_for_fund_name(fund_name)
|
| 733 |
+
if refreshed_code and refreshed_code != scheme_code:
|
| 734 |
+
row["Scheme Code"] = refreshed_code
|
| 735 |
+
metrics, skip = compute_nav_metrics_for_scheme(
|
| 736 |
+
scheme_code=refreshed_code,
|
| 737 |
+
benchmark_type=benchmark_type,
|
| 738 |
+
needed_metrics=needed_metrics,
|
| 739 |
+
cache=nav_cache,
|
| 740 |
+
)
|
| 741 |
+
|
| 742 |
+
return fund_name, metrics, cells
|
| 743 |
+
|
| 744 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 745 |
+
|
| 746 |
+
work_items = list(row_groups.items())
|
| 747 |
+
with ThreadPoolExecutor(max_workers=NAV_WORKERS) as executor:
|
| 748 |
+
futures = {executor.submit(_process_one_fund, item): item for item in work_items}
|
| 749 |
+
for fut in as_completed(futures):
|
| 750 |
+
fund_name, metrics, cells = fut.result()
|
| 751 |
+
with nav_lock:
|
| 752 |
+
processed_count += 1
|
| 753 |
+
nav_results[fund_name] = metrics
|
| 754 |
+
for cell in cells:
|
| 755 |
+
val = metrics.get(cell.column)
|
| 756 |
+
if val is not None:
|
| 757 |
+
rows[cell.row_idx][cell.column] = str(round(float(val), 4))
|
| 758 |
+
result.updated_cells += 1
|
| 759 |
+
nav_filled.append((fund_name, cell.column, float(val)))
|
| 760 |
+
# Only mark as attempted if MDD was actually filled β
|
| 761 |
+
# drawdown_zero_fix should still retry funds where MDD came back None
|
| 762 |
+
if metrics.get("Maximum Drawdown") is not None:
|
| 763 |
+
_NAV_ATTEMPTED_FUNDS.add(fund_name)
|
| 764 |
+
if processed_count % 20 == 0 or processed_count == total_rows:
|
| 765 |
+
print(f" [nav] {processed_count}/{total_rows} funds processedβ¦")
|
| 766 |
+
|
| 767 |
+
# Keep only still-missing searchable cells for web phase
|
| 768 |
+
searchable = [c for c in searchable if _is_missing(rows[c.row_idx].get(c.column, ""))]
|
| 769 |
+
print(f"[enrichment] NAV phase resolved {len(nav_filled)} cells; "
|
| 770 |
+
f"{len(searchable)} remain for web search")
|
| 771 |
+
result.nav_cells = len(nav_filled)
|
| 772 |
+
|
| 773 |
+
# ββ Phase 4: Web search ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 774 |
+
web_results: dict[str, dict[str, float | None]] = {}
|
| 775 |
+
web_sources: dict[str, list[str]] = {}
|
| 776 |
+
web_filled: list[tuple[str, str, float]] = []
|
| 777 |
+
|
| 778 |
+
firecrawl_key = os.environ.get("TAVILY_API_KEY", "")
|
| 779 |
+
|
| 780 |
+
if searchable and firecrawl_key:
|
| 781 |
+
fund_groups: dict[str, list[TriagedCell]] = {}
|
| 782 |
+
for cell in searchable:
|
| 783 |
+
fund_groups.setdefault(cell.fund_name, []).append(cell)
|
| 784 |
+
|
| 785 |
+
print(f"[enrichment] Phase 4: Web search β {len(searchable)} cells "
|
| 786 |
+
f"across {len(fund_groups)} funds")
|
| 787 |
+
|
| 788 |
+
# ββ Pre-impute non-PE/PB cells if pe_pb_only mode ββββββββββββββββ
|
| 789 |
+
# Do this before the parallel search so workers only handle PE/PB
|
| 790 |
+
web_search_groups: dict[str, list[TriagedCell]] = {}
|
| 791 |
+
for fund_name, cells in fund_groups.items():
|
| 792 |
+
if config.web_search_pe_pb_only:
|
| 793 |
+
cells_to_impute = [c for c in cells if c.column not in ("P/E Ratio", "P/B Ratio")]
|
| 794 |
+
for cell in cells_to_impute:
|
| 795 |
+
med = cat_medians.get(cell.category, {}).get(cell.column)
|
| 796 |
+
if med is not None and config.impute_unresolved:
|
| 797 |
+
rows[cell.row_idx][cell.column] = str(med)
|
| 798 |
+
result.updated_cells += 1
|
| 799 |
+
result.imputed_cells += 1
|
| 800 |
+
cells_for_web = [c for c in cells if c.column in ("P/E Ratio", "P/B Ratio")]
|
| 801 |
+
else:
|
| 802 |
+
cells_for_web = cells
|
| 803 |
+
if cells_for_web:
|
| 804 |
+
web_search_groups[fund_name] = cells_for_web
|
| 805 |
+
|
| 806 |
+
WEB_WORKERS = 10 # Tavily allows concurrent requests; stay conservative
|
| 807 |
+
web_lock = __import__("threading").Lock()
|
| 808 |
+
web_done = [0]
|
| 809 |
+
total_web = len(web_search_groups)
|
| 810 |
+
|
| 811 |
+
def _search_one_fund(args):
|
| 812 |
+
fund_name, cells = args
|
| 813 |
+
missing_metrics = [c.column for c in cells]
|
| 814 |
+
found, sources = _search_fund_metrics(fund_name, missing_metrics, config, firecrawl_key)
|
| 815 |
+
return fund_name, cells, found, sources
|
| 816 |
+
|
| 817 |
+
from concurrent.futures import ThreadPoolExecutor as _WebTPE, as_completed as _web_as_completed
|
| 818 |
+
with _WebTPE(max_workers=WEB_WORKERS) as web_executor:
|
| 819 |
+
futures = {
|
| 820 |
+
web_executor.submit(_search_one_fund, item): item
|
| 821 |
+
for item in web_search_groups.items()
|
| 822 |
+
}
|
| 823 |
+
for fut in _web_as_completed(futures):
|
| 824 |
+
fund_name, cells, found, sources = fut.result()
|
| 825 |
+
with web_lock:
|
| 826 |
+
web_done[0] += 1
|
| 827 |
+
web_results[fund_name] = found
|
| 828 |
+
web_sources[fund_name] = sources
|
| 829 |
+
print(f"\n[{web_done[0]}/{total_web}] {fund_name}")
|
| 830 |
+
for cell in cells:
|
| 831 |
+
val = found.get(cell.column)
|
| 832 |
+
if val is not None:
|
| 833 |
+
rows[cell.row_idx][cell.column] = str(val)
|
| 834 |
+
result.updated_cells += 1
|
| 835 |
+
web_filled.append((fund_name, cell.column, val))
|
| 836 |
+
print(f" -> {cell.column} = {val} (web)")
|
| 837 |
+
else:
|
| 838 |
+
med = cat_medians.get(cell.category, {}).get(cell.column)
|
| 839 |
+
if med is not None and config.impute_unresolved:
|
| 840 |
+
rows[cell.row_idx][cell.column] = str(med)
|
| 841 |
+
result.imputed_cells += 1
|
| 842 |
+
print(f" ~> {cell.column} = {med} (median)")
|
| 843 |
+
else:
|
| 844 |
+
result.skipped_cells += 1
|
| 845 |
+
print(f" x> {cell.column} β not found, no median")
|
| 846 |
+
|
| 847 |
+
elif searchable and not firecrawl_key:
|
| 848 |
+
print("[enrichment] WARNING: TAVILY_API_KEY not set β skipping web search, using medians")
|
| 849 |
+
result.errors.append("TAVILY_API_KEY not set")
|
| 850 |
+
for cell in searchable:
|
| 851 |
+
med = cat_medians.get(cell.category, {}).get(cell.column)
|
| 852 |
+
if med is not None and config.impute_unresolved:
|
| 853 |
+
rows[cell.row_idx][cell.column] = str(med)
|
| 854 |
+
result.imputed_cells += 1
|
| 855 |
+
else:
|
| 856 |
+
result.skipped_cells += 1
|
| 857 |
+
|
| 858 |
+
# ββ Phase 5: Impute non-searchable (YOUNG / NOT_APPLICABLE) cells ββββ
|
| 859 |
+
# FIX: was incorrectly labelled "Phase 4" in log
|
| 860 |
+
medians_used: list[tuple[str, str, float]] = []
|
| 861 |
+
if imputable and config.impute_unresolved:
|
| 862 |
+
print(f"\n[enrichment] Phase 5: Imputing {len(imputable)} non-searchable cellsβ¦")
|
| 863 |
+
for cell in imputable:
|
| 864 |
+
med = cat_medians.get(cell.category, {}).get(cell.column)
|
| 865 |
+
if med is not None:
|
| 866 |
+
rows[cell.row_idx][cell.column] = str(med)
|
| 867 |
+
result.imputed_cells += 1
|
| 868 |
+
medians_used.append((cell.fund_name, cell.column, med))
|
| 869 |
+
else:
|
| 870 |
+
result.skipped_cells += 1
|
| 871 |
+
elif imputable:
|
| 872 |
+
result.skipped_cells += len(imputable)
|
| 873 |
+
|
| 874 |
+
# Record how many cells came from web search
|
| 875 |
+
result.web_cells = len(web_filled)
|
| 876 |
+
|
| 877 |
+
# ββ Phase 6: Write enriched CSV ββββββββββββββββββββββββββββββββββββββ
|
| 878 |
+
_write_output(src, rows, fieldnames, result)
|
| 879 |
+
|
| 880 |
+
# ββ Phase 7: Scratchpad ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 881 |
+
scratch_dir = Path("scratchpad")
|
| 882 |
+
scratch_dir.mkdir(exist_ok=True)
|
| 883 |
+
stamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 884 |
+
scratch_path = scratch_dir / f"enrichment_{stamp}.txt"
|
| 885 |
+
_write_scratchpad(
|
| 886 |
+
scratch_path, triaged, resolved_codes,
|
| 887 |
+
nav_results, web_results, web_sources,
|
| 888 |
+
medians_used, nav_filled, web_filled,
|
| 889 |
+
)
|
| 890 |
+
result.scratchpad_path = str(scratch_path)
|
| 891 |
+
|
| 892 |
+
print(f"\n[enrichment] DONE β nav_filled={len(nav_filled)} web_filled={len(web_filled)} "
|
| 893 |
+
f"imputed={result.imputed_cells} skipped={result.skipped_cells}")
|
| 894 |
+
print(f"[enrichment] Enriched CSV : {result.enriched_csv_path}")
|
| 895 |
+
print(f"[enrichment] Scratchpad : {scratch_path}")
|
| 896 |
+
return result
|
| 897 |
+
|
| 898 |
+
|
| 899 |
+
def _write_output(
|
| 900 |
+
src: Path,
|
| 901 |
+
rows: list[dict[str, str]],
|
| 902 |
+
fieldnames: list[str],
|
| 903 |
+
result: EnrichmentResult,
|
| 904 |
+
) -> None:
|
| 905 |
+
out_dir = src.parent / "enriched"
|
| 906 |
+
out_dir.mkdir(exist_ok=True)
|
| 907 |
+
out_path = out_dir / f"enriched_{src.name}"
|
| 908 |
+
with open(out_path, "w", encoding="utf-8-sig", newline="") as f:
|
| 909 |
+
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
| 910 |
+
writer.writeheader()
|
| 911 |
+
writer.writerows(rows)
|
| 912 |
+
result.enriched_csv_path = str(out_path)
|
| 913 |
+
|
| 914 |
+
|
| 915 |
+
# Backward-compat alias (old name used in streamlit_app and run_enrichment_pipeline)
|
| 916 |
+
enrich_csv_with_firecrawl_and_kimi = enrich_csv
|
| 917 |
+
|
| 918 |
+
|
| 919 |
+
# ββ Single metric lookup (for Streamlit UI) ββββββββββββββββββββββββββββββββββ
|
| 920 |
+
|
| 921 |
+
def lookup_fund_metric_value(
|
| 922 |
+
fund_name: str,
|
| 923 |
+
column_name: str,
|
| 924 |
+
scheme_code: str = "",
|
| 925 |
+
config: EnrichmentConfig | None = None,
|
| 926 |
+
) -> dict[str, Any]:
|
| 927 |
+
_load_env()
|
| 928 |
+
if config is None:
|
| 929 |
+
config = EnrichmentConfig()
|
| 930 |
+
|
| 931 |
+
firecrawl_key = os.environ.get("TAVILY_API_KEY", "")
|
| 932 |
+
if not firecrawl_key:
|
| 933 |
+
return {"status": "error", "message": "TAVILY_API_KEY not set"}
|
| 934 |
+
|
| 935 |
+
found, sources = _search_fund_metrics(fund_name, [column_name], config, firecrawl_key)
|
| 936 |
+
val = found.get(column_name)
|
| 937 |
+
if val is not None:
|
| 938 |
+
return {"status": "found", "fund": fund_name, "metric": column_name,
|
| 939 |
+
"value": val, "sources": sources}
|
| 940 |
+
return {"status": "not_found", "fund": fund_name, "metric": column_name,
|
| 941 |
+
"value": None, "sources": sources}
|
src/data_engine.py
ADDED
|
@@ -0,0 +1,1210 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Data Engine: -d mode
|
| 3 |
+
|
| 4 |
+
Reads the fund-stats CSV and exports processed Excel matching Processed data.xlsx format.
|
| 5 |
+
|
| 6 |
+
Layout (matching target XLSX):
|
| 7 |
+
- One combined sheet with all fund categories
|
| 8 |
+
- Header row (light green #C9FFCC)
|
| 9 |
+
- For each category:
|
| 10 |
+
- Category header row (no fill, bold text)
|
| 11 |
+
- BM Index row (Col A: #BAEAEE, CAGR cols F,G,H,I: #C4EFFF)
|
| 12 |
+
- Category Average row (Col A: #BAEAEE, CAGR cols F,G,H,I + P/E,P/B cols L,M: #C4EFFF)
|
| 13 |
+
- Fund rows sorted by score (weightage) descending, strictly largest to lowest
|
| 14 |
+
- Weightage scoring: Compare fund CAGR vs Category Average (NOT BM Index)
|
| 15 |
+
- 1Y CAGR beats Cat Avg: 2 pts
|
| 16 |
+
- 3Y CAGR beats Cat Avg: 3 pts
|
| 17 |
+
- 5Y CAGR beats Cat Avg: 4 pts
|
| 18 |
+
- 10Y CAGR beats Cat Avg: 5 pts
|
| 19 |
+
- Max possible: 14 pts
|
| 20 |
+
- Yellow background (#F1FFB6) on Col A only if Weightage >= 8
|
| 21 |
+
- NO green/red font coloring on CAGR cells (plain black only)
|
| 22 |
+
- Category Average row Col B is EMPTY (no benchmark type)
|
| 23 |
+
"""
|
| 24 |
+
|
| 25 |
+
import csv
|
| 26 |
+
import math
|
| 27 |
+
import re
|
| 28 |
+
from datetime import datetime
|
| 29 |
+
from pathlib import Path
|
| 30 |
+
from typing import List, Optional, Tuple, Dict, Any
|
| 31 |
+
|
| 32 |
+
from openpyxl import Workbook
|
| 33 |
+
from openpyxl.styles import PatternFill, Font, Alignment, Border, Side
|
| 34 |
+
from openpyxl.utils import get_column_letter
|
| 35 |
+
from openpyxl.formatting.rule import Rule, CellIsRule, FormulaRule
|
| 36 |
+
from openpyxl.styles.differential import DifferentialStyle
|
| 37 |
+
|
| 38 |
+
from src.models import Fund
|
| 39 |
+
from src.weightage import compute_scores, drawdown_zero_fix
|
| 40 |
+
from src.reference_data import extract_reference_data, get_fund_weightage_from_reference, DEFAULT_REFERENCE_PATH
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
# βββ Color palette βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 44 |
+
FILL_HEADER = PatternFill(start_color="C9FFCC", end_color="C9FFCC", fill_type="solid")
|
| 45 |
+
FILL_BM_ROW = PatternFill(start_color="BAEAEE", end_color="BAEAEE", fill_type="solid")
|
| 46 |
+
FILL_BM_CAGR = PatternFill(start_color="C4EFFF", end_color="C4EFFF", fill_type="solid")
|
| 47 |
+
FILL_CAT_AVG = PatternFill(start_color="BAEAEE", end_color="BAEAEE", fill_type="solid")
|
| 48 |
+
FILL_CAT_CAGR = PatternFill(start_color="C4EFFF", end_color="C4EFFF", fill_type="solid")
|
| 49 |
+
FILL_WEIGHTED_YELLOW = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid")
|
| 50 |
+
FILL_WEIGHTED_GREEN = PatternFill(start_color="C6EFCE", end_color="C6EFCE", fill_type="solid")
|
| 51 |
+
FILL_WHITE = PatternFill(fill_type=None)
|
| 52 |
+
FILL_WEIGHT_REF = PatternFill(start_color="EDEDED", end_color="EDEDED", fill_type="solid") # light grey weight row
|
| 53 |
+
|
| 54 |
+
# Quartile fills
|
| 55 |
+
FILL_QUARTILE_GREEN = PatternFill(start_color="C6EFCE", end_color="C6EFCE", fill_type="solid")
|
| 56 |
+
FILL_QUARTILE_YELLOW = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid")
|
| 57 |
+
FILL_QUARTILE_ORANGE = PatternFill(start_color="FFC000", end_color="FFC000", fill_type="solid")
|
| 58 |
+
FILL_QUARTILE_RED = PatternFill(start_color="FFC7CE", end_color="FFC7CE", fill_type="solid")
|
| 59 |
+
|
| 60 |
+
# ββ Fonts β Arial for identical rendering on macOS + Windows βββββββββββββββββ
|
| 61 |
+
# openpyxl falls back gracefully when Arial is absent, but both platforms ship it.
|
| 62 |
+
FONT_DEFAULT = Font(name="Arial", size=8, color="000000")
|
| 63 |
+
FONT_DEFAULT_BOLD = Font(name="Arial", size=8, bold=True, color="000000")
|
| 64 |
+
FONT_HEADER = Font(name="Arial", size=8, bold=True, color="000000")
|
| 65 |
+
FONT_CAT_HEADER = Font(name="Arial", size=10, bold=True, color="000000")
|
| 66 |
+
FONT_WEIGHT_REF = Font(name="Arial", size=7, italic=True, color="666666") # subtle grey label
|
| 67 |
+
|
| 68 |
+
THIN = Side(border_style="thin", color="CCCCCC")
|
| 69 |
+
BORDER_THIN = Border(left=THIN, right=THIN, top=THIN, bottom=THIN)
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
# βββ Weight reference row data (advisor-revised March 2026) ββββββββββββββββββ
|
| 73 |
+
# Shown beneath every category's column-header row as a read-only reference.
|
| 74 |
+
# Must match src/weightage.py WEIGHTS exactly.
|
| 75 |
+
# β = Top-10 (higher better), β = Bottom-10 (lower better)
|
| 76 |
+
WEIGHT_REF_ROW: Dict[str, str] = {
|
| 77 |
+
"ter": "0.15 β",
|
| 78 |
+
"turnover": "0.10 β",
|
| 79 |
+
"cagr_3y": "0.40 β",
|
| 80 |
+
"cagr_5y": "0.60 β",
|
| 81 |
+
"cagr_10y": "0.75 β",
|
| 82 |
+
"pe_ratio": "0.15 β",
|
| 83 |
+
"alpha": "1.00 β*", # * = Light Red if Ξ± < 1
|
| 84 |
+
"std_dev": "1.00 β",
|
| 85 |
+
"sharpe": "1.20 β",
|
| 86 |
+
"sortino": "1.30 β",
|
| 87 |
+
"down_capture": "1.00 β",
|
| 88 |
+
"max_drawdown": "1.35 β",
|
| 89 |
+
"info_ratio": "1.00 β*", # * = Light Red if IR < 0
|
| 90 |
+
"weightage": "10.00",
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
# βββ Column definitions βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 95 |
+
# Tuple: (header_label, fund_attr, col_width, is_pct, decimal_places)
|
| 96 |
+
# Widths are calibrated so wrap_text = True keeps cells readable without
|
| 97 |
+
# the advisor needing to manually drag columns on either platform.
|
| 98 |
+
XLSX_COLUMNS = [
|
| 99 |
+
("Fund", "name", 40, False, 0), # A β wide: long fund names
|
| 100 |
+
("Benchmark Type", "benchmark", 22, False, 0), # B
|
| 101 |
+
("TER", "ter", 9, True, 4), # C
|
| 102 |
+
("Turn over (%)", "turnover", 11, True, 2), # D
|
| 103 |
+
("Mean", "mean", 9, False, 2), # E
|
| 104 |
+
("1 Year CAGR", "cagr_1y", 10, False, 2), # F
|
| 105 |
+
("3 Years CAGR", "cagr_3y", 10, False, 2), # G
|
| 106 |
+
("5 Years CAGR", "cagr_5y", 10, False, 2), # H
|
| 107 |
+
("10 Years CAGR", "cagr_10y", 11, False, 2), # I
|
| 108 |
+
("CAGR Since Inception", "cagr_inception", 14, False, 2), # J
|
| 109 |
+
("NAV", "nav", 10, False, 2), # K
|
| 110 |
+
("P/E Ratio", "pe_ratio", 10, False, 2), # L
|
| 111 |
+
("P/B Ratio", "pb_ratio", 10, False, 2), # M
|
| 112 |
+
("Alpha", "alpha", 10, False, 2), # N
|
| 113 |
+
("Volatility", "volatility", 10, False, 2), # O
|
| 114 |
+
("Beta", "beta", 9, False, 2), # P
|
| 115 |
+
("Standard Deviation", "std_dev", 14, False, 2), # Q
|
| 116 |
+
("Sharpe Ratio", "sharpe", 11, False, 2), # R
|
| 117 |
+
("Sortino Ratio", "sortino", 11, False, 2), # S
|
| 118 |
+
("Up Market Capture", "up_capture", 14, False, 2), # T
|
| 119 |
+
("Down Market Capture", "down_capture", 16, False, 2), # U
|
| 120 |
+
("Maximum Drawdown", "max_drawdown", 15, False, 2), # V
|
| 121 |
+
("R-Squared", "r_squared", 11, False, 2), # W
|
| 122 |
+
("Information Ratio", "info_ratio", 14, False, 2), # X
|
| 123 |
+
("Total Assets (in Cr)", "aum", 16, False, 1), # Y
|
| 124 |
+
("Weightage", "weightage", 11, False, 3), # Z β 3dp for precision
|
| 125 |
+
]
|
| 126 |
+
|
| 127 |
+
NUM_COLS = len(XLSX_COLUMNS)
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
def _to_float(val) -> Optional[float]:
|
| 131 |
+
"""Safely convert raw CSV value to float."""
|
| 132 |
+
if val is None:
|
| 133 |
+
return None
|
| 134 |
+
s = str(val).strip().replace('%', '').replace(',', '')
|
| 135 |
+
if s in ('', '-', 'N/A*', 'N/A', 'nan', 'None'):
|
| 136 |
+
return None
|
| 137 |
+
try:
|
| 138 |
+
return float(s)
|
| 139 |
+
except ValueError:
|
| 140 |
+
return None
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
def _parse_ter(val) -> Optional[float]:
|
| 144 |
+
"""Parse TER value - CSV has percentage format like '1.40%', convert to decimal."""
|
| 145 |
+
if val is None:
|
| 146 |
+
return None
|
| 147 |
+
# Check if percentage BEFORE stripping
|
| 148 |
+
is_pct = '%' in str(val)
|
| 149 |
+
s = str(val).strip().replace('%', '').replace(',', '')
|
| 150 |
+
if s in ('', '-', 'N/A*', 'N/A', 'nan', 'None'):
|
| 151 |
+
return None
|
| 152 |
+
try:
|
| 153 |
+
v = float(s)
|
| 154 |
+
# Convert percentage to decimal (e.g., 1.40 -> 0.014)
|
| 155 |
+
if is_pct:
|
| 156 |
+
v = v / 100
|
| 157 |
+
return v
|
| 158 |
+
except ValueError:
|
| 159 |
+
return None
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
def _parse_turnover(val) -> Optional[float]:
|
| 163 |
+
"""Parse turnover value - CSV has percentage format like '20%', convert to decimal."""
|
| 164 |
+
if val is None:
|
| 165 |
+
return None
|
| 166 |
+
# Check if percentage BEFORE stripping
|
| 167 |
+
is_pct = '%' in str(val)
|
| 168 |
+
s = str(val).strip().replace('%', '').replace(',', '')
|
| 169 |
+
if s in ('', '-', 'N/A*', 'N/A', 'nan', 'None'):
|
| 170 |
+
return None
|
| 171 |
+
try:
|
| 172 |
+
v = float(s)
|
| 173 |
+
# Convert percentage to decimal (e.g., 20 -> 0.20)
|
| 174 |
+
if is_pct:
|
| 175 |
+
v = v / 100
|
| 176 |
+
return v
|
| 177 |
+
except ValueError:
|
| 178 |
+
return None
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
def _parse_launch_date(val) -> Optional[datetime]:
|
| 182 |
+
"""Parse launch date from CSV into datetime."""
|
| 183 |
+
if val is None:
|
| 184 |
+
return None
|
| 185 |
+
s = str(val).strip()
|
| 186 |
+
if not s or s in ("-", "N/A", "N/A*"):
|
| 187 |
+
return None
|
| 188 |
+
for fmt in ("%d-%m-%Y", "%Y-%m-%d", "%d/%m/%Y"):
|
| 189 |
+
try:
|
| 190 |
+
return datetime.strptime(s, fmt)
|
| 191 |
+
except ValueError:
|
| 192 |
+
continue
|
| 193 |
+
return None
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
# βββ Auto-calculation for incomplete sections ββββββββββββββββββββββββββββββββββββ
|
| 197 |
+
|
| 198 |
+
def _calculate_category_averages(funds: List[Fund]) -> Dict[str, Dict[str, Any]]:
|
| 199 |
+
"""
|
| 200 |
+
Calculate category averages from fund-level category CAGR values.
|
| 201 |
+
For categories without official data, extract category average values from fund rows.
|
| 202 |
+
Uses the FIRST fund's category average value for each period.
|
| 203 |
+
"""
|
| 204 |
+
categories: Dict[str, List[Fund]] = {}
|
| 205 |
+
|
| 206 |
+
# Group funds by category
|
| 207 |
+
for fund in funds:
|
| 208 |
+
if fund.category not in categories:
|
| 209 |
+
categories[fund.category] = []
|
| 210 |
+
categories[fund.category].append(fund)
|
| 211 |
+
|
| 212 |
+
cat_avg_data: Dict[str, Dict[str, Any]] = {}
|
| 213 |
+
|
| 214 |
+
for cat_name, cat_funds in categories.items():
|
| 215 |
+
if not cat_funds:
|
| 216 |
+
continue
|
| 217 |
+
|
| 218 |
+
# Use the FIRST fund's category average values
|
| 219 |
+
# This matches the CSV structure where all funds should have the same category average
|
| 220 |
+
first_fund = cat_funds[0]
|
| 221 |
+
|
| 222 |
+
cat_avg_data[cat_name] = {
|
| 223 |
+
'cagr_1y': first_fund.cagr_1y_cat if first_fund.cagr_1y_cat and first_fund.cagr_1y_cat != 0 else None,
|
| 224 |
+
'cagr_3y': first_fund.cagr_3y_cat if first_fund.cagr_3y_cat and first_fund.cagr_3y_cat != 0 else None,
|
| 225 |
+
'cagr_5y': first_fund.cagr_5y_cat if first_fund.cagr_5y_cat and first_fund.cagr_5y_cat != 0 else None,
|
| 226 |
+
'cagr_10y': first_fund.cagr_10y_cat if first_fund.cagr_10y_cat and first_fund.cagr_10y_cat != 0 else None,
|
| 227 |
+
'pe_ratio': None,
|
| 228 |
+
'pb_ratio': None,
|
| 229 |
+
'is_calculated': True # Flag to indicate this is calculated from fund data
|
| 230 |
+
}
|
| 231 |
+
|
| 232 |
+
return cat_avg_data
|
| 233 |
+
|
| 234 |
+
|
| 235 |
+
def _calculate_benchmark_index(funds: List[Fund]) -> Dict[str, Dict[str, Any]]:
|
| 236 |
+
"""
|
| 237 |
+
Calculate BM Index from fund-level benchmark CAGR values.
|
| 238 |
+
For categories without a BM Index row in CSV, extract benchmark values from fund rows.
|
| 239 |
+
Uses the FIRST fund's benchmark value for each period.
|
| 240 |
+
"""
|
| 241 |
+
categories: Dict[str, List[Fund]] = {}
|
| 242 |
+
|
| 243 |
+
# Group funds by category
|
| 244 |
+
for fund in funds:
|
| 245 |
+
if fund.category not in categories:
|
| 246 |
+
categories[fund.category] = []
|
| 247 |
+
categories[fund.category].append(fund)
|
| 248 |
+
|
| 249 |
+
bm_data: Dict[str, Dict[str, Any]] = {}
|
| 250 |
+
|
| 251 |
+
for cat_name, cat_funds in categories.items():
|
| 252 |
+
if not cat_funds:
|
| 253 |
+
continue
|
| 254 |
+
|
| 255 |
+
# Use the FIRST fund's benchmark values
|
| 256 |
+
# This matches the CSV structure where we take the first fund's data
|
| 257 |
+
first_fund = cat_funds[0]
|
| 258 |
+
|
| 259 |
+
bm_data[cat_name] = {
|
| 260 |
+
'cagr_1y': first_fund.cagr_1y_bm if first_fund.cagr_1y_bm is not None else None,
|
| 261 |
+
'cagr_3y': first_fund.cagr_3y_bm if first_fund.cagr_3y_bm is not None else None,
|
| 262 |
+
'cagr_5y': first_fund.cagr_5y_bm if first_fund.cagr_5y_bm is not None else None,
|
| 263 |
+
'cagr_10y': first_fund.cagr_10y_bm if first_fund.cagr_10y_bm is not None else None,
|
| 264 |
+
'is_calculated': True # Flag to indicate this is calculated from fund data
|
| 265 |
+
}
|
| 266 |
+
|
| 267 |
+
return bm_data
|
| 268 |
+
|
| 269 |
+
|
| 270 |
+
# βββ CSV Loader βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 271 |
+
|
| 272 |
+
def load_fund_csv(csv_path: str) -> Tuple[List[Fund], Dict[str, Dict[str, Any]], Dict[str, Dict[str, Any]], Dict[str, int]]:
|
| 273 |
+
"""
|
| 274 |
+
Parse the fund-stats CSV and merge with reference data from Processed_data.xlsx.
|
| 275 |
+
For sections with missing reference data, auto-calculates category averages from fund data.
|
| 276 |
+
Returns: (funds, bm_data, cat_avg_data, fund_weightages)
|
| 277 |
+
"""
|
| 278 |
+
csv_path = Path(csv_path)
|
| 279 |
+
if not csv_path.exists():
|
| 280 |
+
raise FileNotFoundError(f"CSV not found: {csv_path}")
|
| 281 |
+
|
| 282 |
+
# Load reference data from Processed_data.xlsx
|
| 283 |
+
ref_bm_data, ref_cat_avg_data, ref_fund_weightages = extract_reference_data(DEFAULT_REFERENCE_PATH)
|
| 284 |
+
|
| 285 |
+
funds: List[Fund] = []
|
| 286 |
+
current_category = "Unknown"
|
| 287 |
+
bm_data: Dict[str, Dict[str, Any]] = {}
|
| 288 |
+
cat_avg_data: Dict[str, Dict[str, Any]] = {}
|
| 289 |
+
|
| 290 |
+
with open(csv_path, encoding='utf-8-sig', errors='replace') as f:
|
| 291 |
+
reader = csv.reader(f)
|
| 292 |
+
rows = list(reader)
|
| 293 |
+
|
| 294 |
+
# DYNAMIC COLUMN DETECTION - Read header row first
|
| 295 |
+
if not rows:
|
| 296 |
+
raise ValueError("CSV file is empty")
|
| 297 |
+
|
| 298 |
+
header = [str(col).strip() for col in rows[0]]
|
| 299 |
+
col_map = {name: idx for idx, name in enumerate(header)}
|
| 300 |
+
|
| 301 |
+
print(f"Detected CSV format with {len(header)} columns")
|
| 302 |
+
|
| 303 |
+
# Detect format based on column names
|
| 304 |
+
has_category_col = 'Category' in col_map
|
| 305 |
+
has_scheme_code = 'Scheme Code' in col_map
|
| 306 |
+
|
| 307 |
+
if has_category_col and has_scheme_code:
|
| 308 |
+
print(" Format: NEW (36 columns with Category column)")
|
| 309 |
+
else:
|
| 310 |
+
print(" Format: OLD (35 columns without Category column)")
|
| 311 |
+
|
| 312 |
+
pending_bm: Dict[str, Dict[str, Any]] = {}
|
| 313 |
+
pending_cat_avg: Dict[str, Dict[str, Any]] = {}
|
| 314 |
+
seen_fund_category: set[tuple[str, str]] = set()
|
| 315 |
+
deduped_rows = 0
|
| 316 |
+
|
| 317 |
+
# Helper to get column index safely
|
| 318 |
+
def get_col_idx(col_name: str) -> Optional[int]:
|
| 319 |
+
return col_map.get(col_name)
|
| 320 |
+
|
| 321 |
+
for row_idx, row in enumerate(rows):
|
| 322 |
+
if row_idx == 0: # Skip header row
|
| 323 |
+
continue
|
| 324 |
+
|
| 325 |
+
if not row:
|
| 326 |
+
continue
|
| 327 |
+
|
| 328 |
+
col0 = str(row[0]).strip()
|
| 329 |
+
|
| 330 |
+
# Category header - detect by checking if most columns are empty
|
| 331 |
+
# Category headers are standalone rows with category name in col0 and empty data columns
|
| 332 |
+
# This catches: "Equity: Large Cap", "Childrens Fund", "ETFs", "Retirement Fund", etc.
|
| 333 |
+
# But NOT "BM Index" or "Category Average" rows
|
| 334 |
+
if col0 not in ('BM Index', 'Category Average', '', 'nan'):
|
| 335 |
+
# Check if this looks like a category header (columns 2-10 are empty)
|
| 336 |
+
# For old format: check columns 2-10 (Benchmark Type is col 1, so skip it)
|
| 337 |
+
# For new format: check columns 2-10 (Category is col 1, so skip it)
|
| 338 |
+
check_cols = row[2:11] if len(row) > 10 else row[2:6]
|
| 339 |
+
non_empty_count = sum(1 for cell in check_cols if str(cell).strip() not in ('', 'nan', 'None', '-'))
|
| 340 |
+
|
| 341 |
+
if non_empty_count == 0 and len(col0) > 3: # All checked columns are empty - this is a category header
|
| 342 |
+
current_category = col0
|
| 343 |
+
|
| 344 |
+
# Use reference data if available, otherwise use CSV data (which may be empty)
|
| 345 |
+
if current_category in ref_bm_data:
|
| 346 |
+
pending_bm[current_category] = ref_bm_data[current_category]
|
| 347 |
+
else:
|
| 348 |
+
pending_bm[current_category] = None
|
| 349 |
+
|
| 350 |
+
if current_category in ref_cat_avg_data:
|
| 351 |
+
pending_cat_avg[current_category] = ref_cat_avg_data[current_category]
|
| 352 |
+
else:
|
| 353 |
+
pending_cat_avg[current_category] = None
|
| 354 |
+
continue
|
| 355 |
+
|
| 356 |
+
# BM Index row - skip, we're using reference data
|
| 357 |
+
if col0 == 'BM Index':
|
| 358 |
+
continue
|
| 359 |
+
|
| 360 |
+
# Category Average row - skip, we're using reference data
|
| 361 |
+
if col0 == 'Category Average':
|
| 362 |
+
continue
|
| 363 |
+
|
| 364 |
+
# Skip header rows (repeated headers in CSV)
|
| 365 |
+
if col0 == 'Fund' and len(row) > 1:
|
| 366 |
+
# Check if this is a header row by looking at column 1
|
| 367 |
+
col1 = str(row[1]).strip() if len(row) > 1 else ''
|
| 368 |
+
if col1 in ('Benchmark Type', 'Category'):
|
| 369 |
+
continue
|
| 370 |
+
|
| 371 |
+
if col0 in ('', 'nan'):
|
| 372 |
+
continue
|
| 373 |
+
|
| 374 |
+
# Parse fund using dynamic column mapping
|
| 375 |
+
def g(col_name: str) -> Optional[float]:
|
| 376 |
+
idx = get_col_idx(col_name)
|
| 377 |
+
if idx is None:
|
| 378 |
+
return None
|
| 379 |
+
try:
|
| 380 |
+
return _to_float(row[idx])
|
| 381 |
+
except (IndexError, TypeError):
|
| 382 |
+
return None
|
| 383 |
+
|
| 384 |
+
def get_str(col_name: str) -> str:
|
| 385 |
+
idx = get_col_idx(col_name)
|
| 386 |
+
if idx is None:
|
| 387 |
+
return ""
|
| 388 |
+
try:
|
| 389 |
+
return str(row[idx]).strip()
|
| 390 |
+
except (IndexError, TypeError):
|
| 391 |
+
return ""
|
| 392 |
+
|
| 393 |
+
# Get category - either from Category column or from current_category
|
| 394 |
+
if has_category_col:
|
| 395 |
+
fund_category = get_str('Category') or current_category
|
| 396 |
+
else:
|
| 397 |
+
fund_category = current_category
|
| 398 |
+
|
| 399 |
+
# Get benchmark
|
| 400 |
+
benchmark = get_str('Benchmark Type')
|
| 401 |
+
|
| 402 |
+
# Get TER and Turnover with special parsing
|
| 403 |
+
ter_idx = get_col_idx('TER')
|
| 404 |
+
ter_val = _parse_ter(row[ter_idx]) if ter_idx is not None and len(row) > ter_idx else None
|
| 405 |
+
|
| 406 |
+
turnover_idx = get_col_idx('Turn over (%)')
|
| 407 |
+
turnover_val = _parse_turnover(row[turnover_idx]) if turnover_idx is not None and len(row) > turnover_idx else None
|
| 408 |
+
|
| 409 |
+
dedupe_key = (col0.strip().lower(), fund_category.strip().lower())
|
| 410 |
+
if dedupe_key in seen_fund_category:
|
| 411 |
+
deduped_rows += 1
|
| 412 |
+
continue
|
| 413 |
+
seen_fund_category.add(dedupe_key)
|
| 414 |
+
|
| 415 |
+
fund = Fund(
|
| 416 |
+
name=col0,
|
| 417 |
+
category=fund_category,
|
| 418 |
+
benchmark=benchmark,
|
| 419 |
+
ter=ter_val,
|
| 420 |
+
turnover=turnover_val,
|
| 421 |
+
mean=g('Mean'),
|
| 422 |
+
cagr_1y=g('1 Year CAGR'),
|
| 423 |
+
cagr_1y_cat=g('1 Year Category CAGR'),
|
| 424 |
+
cagr_1y_bm=g('1 Year Benchmark CAGR'),
|
| 425 |
+
cagr_3y=g('3 Years CAGR'),
|
| 426 |
+
cagr_3y_cat=g('3 Years Category CAGR'),
|
| 427 |
+
cagr_3y_bm=g('3 Years Benchmark CAGR'),
|
| 428 |
+
cagr_5y=g('5 Years CAGR'),
|
| 429 |
+
cagr_5y_cat=g('5 Years Category CAGR'),
|
| 430 |
+
cagr_5y_bm=g('5 Years Benchmark CAGR'),
|
| 431 |
+
cagr_10y=g('10 Years CAGR'),
|
| 432 |
+
cagr_10y_cat=g('10 Years Category CAGR'),
|
| 433 |
+
cagr_10y_bm=g('10 Years Benchmark CAGR'),
|
| 434 |
+
cagr_inception=g('CAGR Since Inception'),
|
| 435 |
+
nav=g('NAV'),
|
| 436 |
+
pe_ratio=g('P/E Ratio'),
|
| 437 |
+
pb_ratio=g('P/B Ratio'),
|
| 438 |
+
alpha=g('Alpha'),
|
| 439 |
+
beta=g('Beta'),
|
| 440 |
+
std_dev=g('Standard Deviation'),
|
| 441 |
+
sharpe=g('Sharpe Ratio'),
|
| 442 |
+
volatility=g('Volatility'),
|
| 443 |
+
sortino=g('Sortino Ratio'),
|
| 444 |
+
up_capture=g('Up Market Capture\nRatio') or g('Up Market Capture'),
|
| 445 |
+
down_capture=g('Down Market Capture\nRatio') or g('Down Market Capture'),
|
| 446 |
+
max_drawdown=g('Maximum Drawdown'),
|
| 447 |
+
r_squared=g('R-Squared'),
|
| 448 |
+
info_ratio=g('Information Ratio'),
|
| 449 |
+
aum=g('Total Assets (in Cr)'),
|
| 450 |
+
fill_status=get_str('Fill Status') or None,
|
| 451 |
+
)
|
| 452 |
+
# Preserve scheme code for downstream NAV / drawdown fixes
|
| 453 |
+
scheme_code_str = get_str('Scheme Code')
|
| 454 |
+
if scheme_code_str:
|
| 455 |
+
setattr(fund, "_scheme_code", scheme_code_str)
|
| 456 |
+
launch_dt = _parse_launch_date(get_str('Launch Date'))
|
| 457 |
+
if launch_dt:
|
| 458 |
+
setattr(fund, "_launch_date", launch_dt)
|
| 459 |
+
fund.order = len(funds) # Preserve original CSV order for tiebreaker
|
| 460 |
+
funds.append(fund)
|
| 461 |
+
|
| 462 |
+
if deduped_rows:
|
| 463 |
+
print(f" Deduplicated {deduped_rows} rows by (Fund, Category) at ingest")
|
| 464 |
+
|
| 465 |
+
# Calculate category averages from fund data
|
| 466 |
+
calculated_cat_avg = _calculate_category_averages(funds)
|
| 467 |
+
|
| 468 |
+
# Calculate BM Index from fund-level benchmark data
|
| 469 |
+
calculated_bm = _calculate_benchmark_index(funds)
|
| 470 |
+
|
| 471 |
+
# Assign BM and Category Average data - ONLY use calculated data from CSV
|
| 472 |
+
# DO NOT use reference data from Processed_data.xlsx
|
| 473 |
+
for cat_name in set(f.category for f in funds):
|
| 474 |
+
# BM Index: Always use calculated data from fund benchmark values
|
| 475 |
+
bm_data[cat_name] = calculated_bm.get(cat_name, {})
|
| 476 |
+
|
| 477 |
+
# Category Average: Always use calculated data from fund category values
|
| 478 |
+
cat_avg_data[cat_name] = calculated_cat_avg.get(cat_name, {})
|
| 479 |
+
|
| 480 |
+
return funds, bm_data, cat_avg_data, ref_fund_weightages
|
| 481 |
+
|
| 482 |
+
|
| 483 |
+
def _fmt(val, decimals=2) -> Optional[float]:
|
| 484 |
+
"""Return rounded float or None."""
|
| 485 |
+
if val is None:
|
| 486 |
+
return None
|
| 487 |
+
try:
|
| 488 |
+
return round(float(val), decimals)
|
| 489 |
+
except (ValueError, TypeError):
|
| 490 |
+
return None
|
| 491 |
+
|
| 492 |
+
|
| 493 |
+
def _quartile_band_for_position(pos: int, total: int) -> Optional[int]:
|
| 494 |
+
"""
|
| 495 |
+
Return quartile band by positional rank (0-based) after sorting by score desc.
|
| 496 |
+
|
| 497 |
+
Band mapping:
|
| 498 |
+
- 0: Top quartile (Green)
|
| 499 |
+
- 1: Upper-middle quartile (Yellow)
|
| 500 |
+
- 2: Lower-middle quartile (Orange)
|
| 501 |
+
- 3: Bottom quartile (Red)
|
| 502 |
+
|
| 503 |
+
Uses rank-positioning (not score thresholds), so ties do not distort quartile sizes.
|
| 504 |
+
"""
|
| 505 |
+
if total <= 0 or pos < 0 or pos >= total:
|
| 506 |
+
return None
|
| 507 |
+
|
| 508 |
+
# Keep intuitive behavior for tiny categories.
|
| 509 |
+
if total == 1:
|
| 510 |
+
return 0
|
| 511 |
+
if total == 2:
|
| 512 |
+
return 0 if pos == 0 else 3
|
| 513 |
+
if total == 3:
|
| 514 |
+
if pos == 0:
|
| 515 |
+
return 0
|
| 516 |
+
if pos == 1:
|
| 517 |
+
return 1
|
| 518 |
+
return 3
|
| 519 |
+
|
| 520 |
+
q1_end = math.ceil(total * 0.25)
|
| 521 |
+
q2_end = math.ceil(total * 0.50)
|
| 522 |
+
q3_end = math.ceil(total * 0.75)
|
| 523 |
+
|
| 524 |
+
if pos < q1_end:
|
| 525 |
+
return 0
|
| 526 |
+
if pos < q2_end:
|
| 527 |
+
return 1
|
| 528 |
+
if pos < q3_end:
|
| 529 |
+
return 2
|
| 530 |
+
return 3
|
| 531 |
+
|
| 532 |
+
|
| 533 |
+
def _calculate_weightage(fund: Fund, cat_avg_vals: Dict[str, Any]) -> int:
|
| 534 |
+
"""
|
| 535 |
+
DEPRECATED: Legacy CAGR-based weightage calculation.
|
| 536 |
+
Use compute_scores() from weightage.py for AI-suggested model.
|
| 537 |
+
|
| 538 |
+
Calculate weightage based on period-weighted scoring against Category Average.
|
| 539 |
+
|
| 540 |
+
Period weights:
|
| 541 |
+
- 1 Year CAGR: 2 pts if fund beats Category Average
|
| 542 |
+
- 3 Years CAGR: 3 pts if fund beats Category Average
|
| 543 |
+
- 5 Years CAGR: 4 pts if fund beats Category Average
|
| 544 |
+
- 10 Years CAGR: 5 pts if fund beats Category Average
|
| 545 |
+
|
| 546 |
+
Max possible: 14 pts
|
| 547 |
+
Note: Treat 0, N/A*, or - as "no data" (skip comparison)
|
| 548 |
+
"""
|
| 549 |
+
weightage = 0
|
| 550 |
+
|
| 551 |
+
# Period weights mapping
|
| 552 |
+
period_weights = {
|
| 553 |
+
'cagr_1y': 2,
|
| 554 |
+
'cagr_3y': 3,
|
| 555 |
+
'cagr_5y': 4,
|
| 556 |
+
'cagr_10y': 5,
|
| 557 |
+
}
|
| 558 |
+
|
| 559 |
+
for attr, weight in period_weights.items():
|
| 560 |
+
fund_val = getattr(fund, attr, None)
|
| 561 |
+
cat_avg_val = cat_avg_vals.get(attr) if cat_avg_vals else None
|
| 562 |
+
|
| 563 |
+
# Skip if fund value is 0, None, or invalid
|
| 564 |
+
if fund_val is None or fund_val == 0:
|
| 565 |
+
continue
|
| 566 |
+
if cat_avg_val is None or cat_avg_val == 0:
|
| 567 |
+
continue
|
| 568 |
+
|
| 569 |
+
# Award points if fund beats category average
|
| 570 |
+
if fund_val > cat_avg_val:
|
| 571 |
+
weightage += weight
|
| 572 |
+
|
| 573 |
+
return weightage
|
| 574 |
+
|
| 575 |
+
|
| 576 |
+
def _calculate_green_cell_weightage(fund: Fund, all_funds_in_category: List[Fund]) -> int:
|
| 577 |
+
"""
|
| 578 |
+
Calculate weightage as the count of GREEN cells (top 10 rankings).
|
| 579 |
+
|
| 580 |
+
Matches Excel conditional formatting rules:
|
| 581 |
+
- Only metrics with GREEN highlighting are counted
|
| 582 |
+
- Bottom 10 metrics get RED highlighting (not counted)
|
| 583 |
+
|
| 584 |
+
GREEN metrics (Top 10 = Green):
|
| 585 |
+
- CAGR columns: F, G, H, I (1Y, 3Y, 5Y, 10Y)
|
| 586 |
+
- Top 10 columns: J, N, R, S, T, X, Y (Inception, Alpha, Sharpe, Sortino, UpCapture, InfoRatio, Assets)
|
| 587 |
+
|
| 588 |
+
Total possible: 11 green cells
|
| 589 |
+
"""
|
| 590 |
+
green_count = 0
|
| 591 |
+
|
| 592 |
+
# Only metrics that get GREEN highlighting in Excel (Top 10 = Green)
|
| 593 |
+
green_metrics = [
|
| 594 |
+
'cagr_1y', # Column F
|
| 595 |
+
'cagr_3y', # Column G
|
| 596 |
+
'cagr_5y', # Column H
|
| 597 |
+
'cagr_10y', # Column I
|
| 598 |
+
'cagr_inception', # Column J
|
| 599 |
+
'alpha', # Column N
|
| 600 |
+
'sharpe', # Column R
|
| 601 |
+
'sortino', # Column S
|
| 602 |
+
'up_capture', # Column T
|
| 603 |
+
'info_ratio', # Column X
|
| 604 |
+
'aum' # Column Y (Assets)
|
| 605 |
+
]
|
| 606 |
+
|
| 607 |
+
# Check each metric that gets GREEN highlighting
|
| 608 |
+
for metric in green_metrics:
|
| 609 |
+
if _is_in_top_10(fund, all_funds_in_category, metric, higher_is_better=True):
|
| 610 |
+
green_count += 1
|
| 611 |
+
|
| 612 |
+
return green_count
|
| 613 |
+
|
| 614 |
+
|
| 615 |
+
def _is_in_top_10(fund: Fund, all_funds: List[Fund], metric: str, higher_is_better: bool) -> bool:
|
| 616 |
+
"""
|
| 617 |
+
Check if a fund is in top 10 for a given metric within its category.
|
| 618 |
+
|
| 619 |
+
Args:
|
| 620 |
+
fund: The fund to check
|
| 621 |
+
all_funds: All funds in the same category
|
| 622 |
+
metric: The metric attribute name (e.g., 'cagr_1y', 'ter')
|
| 623 |
+
higher_is_better: True if higher values are better, False if lower is better
|
| 624 |
+
|
| 625 |
+
Returns: True if fund is in top 10, False otherwise
|
| 626 |
+
"""
|
| 627 |
+
fund_val = getattr(fund, metric, None)
|
| 628 |
+
|
| 629 |
+
# Skip if fund doesn't have this metric
|
| 630 |
+
if fund_val is None or fund_val == 0:
|
| 631 |
+
return False
|
| 632 |
+
|
| 633 |
+
# Collect all valid values for this metric in the category
|
| 634 |
+
valid_values = []
|
| 635 |
+
for f in all_funds:
|
| 636 |
+
val = getattr(f, metric, None)
|
| 637 |
+
if val is not None and val != 0:
|
| 638 |
+
valid_values.append(val)
|
| 639 |
+
|
| 640 |
+
# Need at least 10 funds with data to have a top 10
|
| 641 |
+
if len(valid_values) < 10:
|
| 642 |
+
# If fewer than 10 funds, check if fund is in top half
|
| 643 |
+
if len(valid_values) < 2:
|
| 644 |
+
return False
|
| 645 |
+
valid_values.sort(reverse=higher_is_better)
|
| 646 |
+
threshold_idx = len(valid_values) // 2
|
| 647 |
+
threshold = valid_values[threshold_idx]
|
| 648 |
+
if higher_is_better:
|
| 649 |
+
return fund_val >= threshold
|
| 650 |
+
else:
|
| 651 |
+
return fund_val <= threshold
|
| 652 |
+
|
| 653 |
+
# Sort values to find top 10 threshold
|
| 654 |
+
valid_values.sort(reverse=higher_is_better)
|
| 655 |
+
|
| 656 |
+
# Count how many funds are strictly better than this fund
|
| 657 |
+
if higher_is_better:
|
| 658 |
+
better_count = sum(1 for v in valid_values if v > fund_val)
|
| 659 |
+
else:
|
| 660 |
+
better_count = sum(1 for v in valid_values if v < fund_val)
|
| 661 |
+
|
| 662 |
+
# Fund is in top 10 if 9 or fewer funds are strictly better (ranks 1-10)
|
| 663 |
+
return better_count <= 9
|
| 664 |
+
|
| 665 |
+
|
| 666 |
+
def _get_cagr_font_color() -> Font:
|
| 667 |
+
"""
|
| 668 |
+
NO font coloring - always return default black font.
|
| 669 |
+
Per instructions: "CRITICAL: NO green/red font coloring anywhere"
|
| 670 |
+
"""
|
| 671 |
+
return FONT_DEFAULT
|
| 672 |
+
|
| 673 |
+
|
| 674 |
+
def _apply_conditional_formatting(ws, start_row: int, end_row: int, cat_avg_vals: Dict[str, Any]):
|
| 675 |
+
"""
|
| 676 |
+
Apply conditional formatting rules per MF_Scoring_Model.md
|
| 677 |
+
|
| 678 |
+
Light Green (C6EFCE) + Dark Green Text (006100) for:
|
| 679 |
+
- Top 10: CAGR (all periods), Alpha, Sharpe, Sortino, Up Capture, R-Squared, Info Ratio, Total Assets, CAGR Since Inception
|
| 680 |
+
- Bottom 10: TER, Turnover, Beta, Std Dev, Down Capture, P/E, P/B, Max Drawdown
|
| 681 |
+
|
| 682 |
+
Light Red (FFC7CE) for threshold violations:
|
| 683 |
+
- Alpha < 1
|
| 684 |
+
- Info Ratio < 0
|
| 685 |
+
- CAGR < Category Average (all periods)
|
| 686 |
+
"""
|
| 687 |
+
if start_row >= end_row:
|
| 688 |
+
return
|
| 689 |
+
|
| 690 |
+
# Define colors for conditional formatting
|
| 691 |
+
green_fill = PatternFill(start_color="C6EFCE", end_color="C6EFCE", fill_type="solid")
|
| 692 |
+
green_font = Font(color="006100")
|
| 693 |
+
red_fill = PatternFill(start_color="FFC7CE", end_color="FFC7CE", fill_type="solid")
|
| 694 |
+
red_font = Font(color="9C0006")
|
| 695 |
+
|
| 696 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 697 |
+
# DUAL-CONDITION COLUMNS (Green for Top 10, Red for threshold violations)
|
| 698 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 699 |
+
|
| 700 |
+
# CAGR columns: Green for Top 10, Red if < Category Average
|
| 701 |
+
cagr_cols = {
|
| 702 |
+
'F': (6, cat_avg_vals.get('cagr_1y')), # 1 Year CAGR
|
| 703 |
+
'G': (7, cat_avg_vals.get('cagr_3y')), # 3 Years CAGR
|
| 704 |
+
'H': (8, cat_avg_vals.get('cagr_5y')), # 5 Years CAGR
|
| 705 |
+
'I': (9, cat_avg_vals.get('cagr_10y')), # 10 Years CAGR
|
| 706 |
+
}
|
| 707 |
+
|
| 708 |
+
for col_letter, (col_num, cat_avg) in cagr_cols.items():
|
| 709 |
+
range_str = f"{col_letter}{start_row}:{col_letter}{end_row}"
|
| 710 |
+
|
| 711 |
+
# Rule 1: Red if < Category Average (higher priority)
|
| 712 |
+
if cat_avg is not None:
|
| 713 |
+
rule_red = CellIsRule(
|
| 714 |
+
operator='lessThan',
|
| 715 |
+
formula=[str(cat_avg)],
|
| 716 |
+
stopIfTrue=True, # Stop if red applies
|
| 717 |
+
fill=red_fill,
|
| 718 |
+
font=red_font
|
| 719 |
+
)
|
| 720 |
+
ws.conditional_formatting.add(range_str, rule_red)
|
| 721 |
+
|
| 722 |
+
# Rule 2: Green for Top 10
|
| 723 |
+
rule_green = Rule(
|
| 724 |
+
type='top10',
|
| 725 |
+
rank=10,
|
| 726 |
+
stopIfTrue=False
|
| 727 |
+
)
|
| 728 |
+
rule_green.dxf = DifferentialStyle(fill=green_fill, font=green_font)
|
| 729 |
+
ws.conditional_formatting.add(range_str, rule_green)
|
| 730 |
+
|
| 731 |
+
# Alpha (Col N = 14): Green for Top 10, Red if < 1
|
| 732 |
+
range_str = f"N{start_row}:N{end_row}"
|
| 733 |
+
rule_red = CellIsRule(
|
| 734 |
+
operator='lessThan',
|
| 735 |
+
formula=['1'],
|
| 736 |
+
stopIfTrue=True,
|
| 737 |
+
fill=red_fill,
|
| 738 |
+
font=red_font
|
| 739 |
+
)
|
| 740 |
+
ws.conditional_formatting.add(range_str, rule_red)
|
| 741 |
+
|
| 742 |
+
rule_green = Rule(type='top10', rank=10, stopIfTrue=False)
|
| 743 |
+
rule_green.dxf = DifferentialStyle(fill=green_fill, font=green_font)
|
| 744 |
+
ws.conditional_formatting.add(range_str, rule_green)
|
| 745 |
+
|
| 746 |
+
# Information Ratio (Col X = 24): Green for Top 10, Red if < 0
|
| 747 |
+
range_str = f"X{start_row}:X{end_row}"
|
| 748 |
+
rule_red = CellIsRule(
|
| 749 |
+
operator='lessThan',
|
| 750 |
+
formula=['0'],
|
| 751 |
+
stopIfTrue=True,
|
| 752 |
+
fill=red_fill,
|
| 753 |
+
font=red_font
|
| 754 |
+
)
|
| 755 |
+
ws.conditional_formatting.add(range_str, rule_red)
|
| 756 |
+
|
| 757 |
+
rule_green = Rule(type='top10', rank=10, stopIfTrue=False)
|
| 758 |
+
rule_green.dxf = DifferentialStyle(fill=green_fill, font=green_font)
|
| 759 |
+
ws.conditional_formatting.add(range_str, rule_green)
|
| 760 |
+
|
| 761 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 762 |
+
# TOP 10 COLUMNS (Green - Higher is Better)
|
| 763 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 764 |
+
|
| 765 |
+
top10_cols = {
|
| 766 |
+
'J': 'CAGR Since Inception',
|
| 767 |
+
'R': 'Sharpe Ratio',
|
| 768 |
+
'S': 'Sortino Ratio',
|
| 769 |
+
'T': 'Up Market Capture',
|
| 770 |
+
'W': 'R-Squared',
|
| 771 |
+
'Y': 'Total Assets'
|
| 772 |
+
}
|
| 773 |
+
|
| 774 |
+
for col_letter, name in top10_cols.items():
|
| 775 |
+
range_str = f"{col_letter}{start_row}:{col_letter}{end_row}"
|
| 776 |
+
rule = Rule(type='top10', rank=10, stopIfTrue=False)
|
| 777 |
+
rule.dxf = DifferentialStyle(fill=green_fill, font=green_font)
|
| 778 |
+
ws.conditional_formatting.add(range_str, rule)
|
| 779 |
+
|
| 780 |
+
# Maximum Drawdown (Col V): Top 10 among NON-ZERO values only.
|
| 781 |
+
# This keeps zeros as "no data" and avoids green highlighting for zero entries.
|
| 782 |
+
v_range = f"V{start_row}:V{end_row}"
|
| 783 |
+
# Guard against text placeholders like "NA": Excel treats "NA" <> 0 as TRUE,
|
| 784 |
+
# which can incorrectly qualify the cell for highlighting. Only numeric values participate.
|
| 785 |
+
v_formula = (
|
| 786 |
+
f'AND('
|
| 787 |
+
f'ISNUMBER(V{start_row}),'
|
| 788 |
+
f'V{start_row}<>0,'
|
| 789 |
+
f'COUNTIFS($V${start_row}:$V${end_row},\">\"&V{start_row},$V${start_row}:$V${end_row},\"<>0\")<10'
|
| 790 |
+
f')'
|
| 791 |
+
)
|
| 792 |
+
v_rule = FormulaRule(formula=[v_formula], stopIfTrue=False, fill=green_fill, font=green_font)
|
| 793 |
+
ws.conditional_formatting.add(v_range, v_rule)
|
| 794 |
+
|
| 795 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 796 |
+
# BOTTOM 10 COLUMNS (Green - Lower is Better)
|
| 797 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 798 |
+
|
| 799 |
+
bottom10_cols = {
|
| 800 |
+
'C': 'TER',
|
| 801 |
+
'D': 'Turnover',
|
| 802 |
+
'L': 'P/E Ratio',
|
| 803 |
+
'P': 'Beta',
|
| 804 |
+
'Q': 'Standard Deviation',
|
| 805 |
+
'U': 'Down Market Capture'
|
| 806 |
+
}
|
| 807 |
+
|
| 808 |
+
for col_letter, name in bottom10_cols.items():
|
| 809 |
+
range_str = f"{col_letter}{start_row}:{col_letter}{end_row}"
|
| 810 |
+
rule = Rule(
|
| 811 |
+
type='top10',
|
| 812 |
+
rank=10,
|
| 813 |
+
bottom=True, # Bottom 10 = lowest values
|
| 814 |
+
stopIfTrue=False
|
| 815 |
+
)
|
| 816 |
+
rule.dxf = DifferentialStyle(fill=green_fill, font=green_font)
|
| 817 |
+
ws.conditional_formatting.add(range_str, rule)
|
| 818 |
+
|
| 819 |
+
|
| 820 |
+
def export_excel(funds: List[Fund], output_path: str,
|
| 821 |
+
bm_data: Dict[str, Dict[str, Any]] = None,
|
| 822 |
+
cat_avg_data: Dict[str, Dict[str, Any]] = None) -> str:
|
| 823 |
+
"""Build the processed Excel matching target format exactly."""
|
| 824 |
+
output_path = Path(output_path)
|
| 825 |
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
| 826 |
+
|
| 827 |
+
if bm_data is None:
|
| 828 |
+
bm_data = {}
|
| 829 |
+
if cat_avg_data is None:
|
| 830 |
+
cat_avg_data = {}
|
| 831 |
+
|
| 832 |
+
wb = Workbook()
|
| 833 |
+
ws = wb.active
|
| 834 |
+
ws.title = "Sheet2"
|
| 835 |
+
na_audit_rows: List[str] = []
|
| 836 |
+
|
| 837 |
+
# Apply NA policy to all numeric export columns.
|
| 838 |
+
# Exclusions are text/derived columns that should stay as-is.
|
| 839 |
+
na_on_zero_attrs = {
|
| 840 |
+
attr for _, attr, _, _, _ in XLSX_COLUMNS
|
| 841 |
+
if attr and attr not in {"name", "benchmark", "weightage"}
|
| 842 |
+
}
|
| 843 |
+
cagr_period_by_attr = {
|
| 844 |
+
"cagr_1y": 1,
|
| 845 |
+
"cagr_3y": 3,
|
| 846 |
+
"cagr_5y": 5,
|
| 847 |
+
"cagr_10y": 10,
|
| 848 |
+
}
|
| 849 |
+
|
| 850 |
+
def _years_since_launch(fund_obj: Fund) -> Optional[float]:
|
| 851 |
+
launch_dt = getattr(fund_obj, "_launch_date", None)
|
| 852 |
+
if not isinstance(launch_dt, datetime):
|
| 853 |
+
return None
|
| 854 |
+
return max(0.0, (datetime.now() - launch_dt).days / 365.25)
|
| 855 |
+
|
| 856 |
+
def _audit_na(row_type: str, category: str, fund_name: str, attr: str, reason: str) -> None:
|
| 857 |
+
na_audit_rows.append(
|
| 858 |
+
f"{row_type}\t{category}\t{fund_name}\t{attr}\t{reason}"
|
| 859 |
+
)
|
| 860 |
+
|
| 861 |
+
def _display_numeric_or_na(
|
| 862 |
+
*,
|
| 863 |
+
attr: str,
|
| 864 |
+
value: Any,
|
| 865 |
+
row_type: str,
|
| 866 |
+
category: str,
|
| 867 |
+
fund_obj: Optional[Fund] = None,
|
| 868 |
+
fund_name: str = "",
|
| 869 |
+
decimals: int = 2,
|
| 870 |
+
) -> Any:
|
| 871 |
+
"""
|
| 872 |
+
Convert numeric value to rounded float or 'NA' for missing/invalid values.
|
| 873 |
+
Also appends NA decisions to audit rows.
|
| 874 |
+
Category Average: PE and PB show blank (not NA) when missing.
|
| 875 |
+
"""
|
| 876 |
+
# Category Average row: PE and PB stay blank when missing
|
| 877 |
+
if row_type == "CATEGORY_AVG" and attr in ("pe_ratio", "pb_ratio"):
|
| 878 |
+
if value is None:
|
| 879 |
+
return None
|
| 880 |
+
try:
|
| 881 |
+
num = float(value)
|
| 882 |
+
return round(num, decimals) if num != 0 else None
|
| 883 |
+
except (TypeError, ValueError):
|
| 884 |
+
return None
|
| 885 |
+
|
| 886 |
+
if attr in na_on_zero_attrs:
|
| 887 |
+
if value is None:
|
| 888 |
+
_audit_na(row_type, category, fund_name, attr, "missing value")
|
| 889 |
+
return "NA"
|
| 890 |
+
try:
|
| 891 |
+
num = float(value)
|
| 892 |
+
except (TypeError, ValueError):
|
| 893 |
+
_audit_na(row_type, category, fund_name, attr, "non-numeric value")
|
| 894 |
+
return "NA"
|
| 895 |
+
|
| 896 |
+
if num == 0:
|
| 897 |
+
# Duration-aware reason for CAGR periods when launch date exists.
|
| 898 |
+
if fund_obj is not None and attr in cagr_period_by_attr:
|
| 899 |
+
years = _years_since_launch(fund_obj)
|
| 900 |
+
period = cagr_period_by_attr[attr]
|
| 901 |
+
if years is not None and years < period:
|
| 902 |
+
_audit_na(
|
| 903 |
+
row_type,
|
| 904 |
+
category,
|
| 905 |
+
fund_name,
|
| 906 |
+
attr,
|
| 907 |
+
f"fund age {years:.2f}y < required {period}y",
|
| 908 |
+
)
|
| 909 |
+
else:
|
| 910 |
+
_audit_na(row_type, category, fund_name, attr, "source value is 0")
|
| 911 |
+
else:
|
| 912 |
+
_audit_na(row_type, category, fund_name, attr, "source value is 0")
|
| 913 |
+
return "NA"
|
| 914 |
+
|
| 915 |
+
return round(num, decimals)
|
| 916 |
+
|
| 917 |
+
# Non-NA-managed attributes use existing behavior.
|
| 918 |
+
if value is None:
|
| 919 |
+
return None
|
| 920 |
+
try:
|
| 921 |
+
return round(float(value), decimals)
|
| 922 |
+
except (TypeError, ValueError):
|
| 923 |
+
return value
|
| 924 |
+
|
| 925 |
+
# ββ Row 1: Column headers (include weight hints for scored metrics) βββββ
|
| 926 |
+
ws.row_dimensions[1].height = 36
|
| 927 |
+
for col_idx, (header, attr, width, _, _) in enumerate(XLSX_COLUMNS, start=1):
|
| 928 |
+
# If this column participates in the scoring model, append its weight
|
| 929 |
+
# so the advisor can see weights even when scrolled deep into a category.
|
| 930 |
+
weight_hint = WEIGHT_REF_ROW.get(attr)
|
| 931 |
+
if weight_hint:
|
| 932 |
+
header_value = f"{header}\n({weight_hint})"
|
| 933 |
+
else:
|
| 934 |
+
header_value = header
|
| 935 |
+
|
| 936 |
+
cell = ws.cell(row=1, column=col_idx, value=header_value)
|
| 937 |
+
cell.fill = FILL_HEADER
|
| 938 |
+
cell.font = FONT_HEADER
|
| 939 |
+
cell.border = BORDER_THIN
|
| 940 |
+
cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True)
|
| 941 |
+
ws.column_dimensions[get_column_letter(col_idx)].width = width
|
| 942 |
+
|
| 943 |
+
# Freeze col A + row 1 so fund names and headers stay visible while scrolling
|
| 944 |
+
ws.freeze_panes = "B2"
|
| 945 |
+
|
| 946 |
+
# ββ Group funds by category ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 947 |
+
categories: Dict[str, List[Fund]] = {}
|
| 948 |
+
category_order = []
|
| 949 |
+
for fund in funds:
|
| 950 |
+
if fund.category not in categories:
|
| 951 |
+
category_order.append(fund.category)
|
| 952 |
+
categories.setdefault(fund.category, []).append(fund)
|
| 953 |
+
|
| 954 |
+
current_row = 2
|
| 955 |
+
|
| 956 |
+
for idx, cat_name in enumerate(category_order):
|
| 957 |
+
cat_funds = categories[cat_name]
|
| 958 |
+
|
| 959 |
+
# Sort by score (displayed value) descending so Weightage column is strictly largest-to-lowest
|
| 960 |
+
sorted_funds = sorted(
|
| 961 |
+
cat_funds,
|
| 962 |
+
key=lambda f: (-(f.score or 0), (f.name or "").lower(), getattr(f, 'order', 0)),
|
| 963 |
+
)
|
| 964 |
+
|
| 965 |
+
# Quartiles by positional rank, not by score thresholds.
|
| 966 |
+
# This guarantees consistent quartile sizing even when many funds share the same score.
|
| 967 |
+
quartile_by_fund_id: Dict[int, int] = {}
|
| 968 |
+
for pos, fund in enumerate(sorted_funds):
|
| 969 |
+
band = _quartile_band_for_position(pos, len(sorted_funds))
|
| 970 |
+
if band is not None:
|
| 971 |
+
quartile_by_fund_id[id(fund)] = band
|
| 972 |
+
|
| 973 |
+
# ββ Header row (repeat before each category except first) βββββββββββββ
|
| 974 |
+
if idx > 0:
|
| 975 |
+
ws.row_dimensions[current_row].height = 32
|
| 976 |
+
for col_idx, (header, _, _, _, _) in enumerate(XLSX_COLUMNS, start=1):
|
| 977 |
+
cell = ws.cell(row=current_row, column=col_idx, value=header)
|
| 978 |
+
cell.fill = FILL_HEADER
|
| 979 |
+
cell.font = FONT_HEADER
|
| 980 |
+
cell.border = BORDER_THIN
|
| 981 |
+
cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True)
|
| 982 |
+
current_row += 1
|
| 983 |
+
|
| 984 |
+
# ββ Category header row βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 985 |
+
ws.row_dimensions[current_row].height = 20
|
| 986 |
+
for col_idx in range(1, NUM_COLS + 1):
|
| 987 |
+
cell = ws.cell(row=current_row, column=col_idx)
|
| 988 |
+
cell.fill = FILL_WHITE
|
| 989 |
+
cell.border = BORDER_THIN
|
| 990 |
+
cat_cell = ws.cell(row=current_row, column=1, value=cat_name)
|
| 991 |
+
cat_cell.font = FONT_CAT_HEADER
|
| 992 |
+
cat_cell.alignment = Alignment(horizontal="left", vertical="center", wrap_text=True)
|
| 993 |
+
ws.merge_cells(start_row=current_row, start_column=1,
|
| 994 |
+
end_row=current_row, end_column=NUM_COLS - 1)
|
| 995 |
+
current_row += 1
|
| 996 |
+
|
| 997 |
+
# ββ BM Index row βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 998 |
+
bm_vals = bm_data.get(cat_name, {})
|
| 999 |
+
ws.row_dimensions[current_row].height = 14
|
| 1000 |
+
for col_idx, (header, attr, _, _, _) in enumerate(XLSX_COLUMNS, start=1):
|
| 1001 |
+
val = None
|
| 1002 |
+
if col_idx == 1:
|
| 1003 |
+
val = "BM Index"
|
| 1004 |
+
elif attr in bm_vals:
|
| 1005 |
+
val = _display_numeric_or_na(
|
| 1006 |
+
attr=attr,
|
| 1007 |
+
value=bm_vals[attr],
|
| 1008 |
+
row_type="BM_INDEX",
|
| 1009 |
+
category=cat_name,
|
| 1010 |
+
fund_name="BM Index",
|
| 1011 |
+
decimals=2,
|
| 1012 |
+
)
|
| 1013 |
+
|
| 1014 |
+
cell = ws.cell(row=current_row, column=col_idx, value=val)
|
| 1015 |
+
if col_idx == 1:
|
| 1016 |
+
cell.fill = FILL_BM_ROW
|
| 1017 |
+
elif col_idx in [6, 7, 8, 9]:
|
| 1018 |
+
cell.fill = FILL_BM_CAGR
|
| 1019 |
+
else:
|
| 1020 |
+
cell.fill = FILL_WHITE
|
| 1021 |
+
cell.font = FONT_DEFAULT_BOLD
|
| 1022 |
+
cell.border = BORDER_THIN
|
| 1023 |
+
cell.alignment = Alignment(
|
| 1024 |
+
horizontal="right" if col_idx > 2 else "left",
|
| 1025 |
+
vertical="center", wrap_text=(col_idx == 1)
|
| 1026 |
+
)
|
| 1027 |
+
current_row += 1
|
| 1028 |
+
|
| 1029 |
+
# ββ Category Average row ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1030 |
+
cat_avg_vals = cat_avg_data.get(cat_name, {})
|
| 1031 |
+
ws.row_dimensions[current_row].height = 14
|
| 1032 |
+
for col_idx, (header, attr, _, _, _) in enumerate(XLSX_COLUMNS, start=1):
|
| 1033 |
+
val = None
|
| 1034 |
+
if col_idx == 1:
|
| 1035 |
+
val = "Category Average"
|
| 1036 |
+
elif attr in cat_avg_vals:
|
| 1037 |
+
val = _display_numeric_or_na(
|
| 1038 |
+
attr=attr,
|
| 1039 |
+
value=cat_avg_vals[attr],
|
| 1040 |
+
row_type="CATEGORY_AVG",
|
| 1041 |
+
category=cat_name,
|
| 1042 |
+
fund_name="Category Average",
|
| 1043 |
+
decimals=2,
|
| 1044 |
+
)
|
| 1045 |
+
|
| 1046 |
+
cell = ws.cell(row=current_row, column=col_idx, value=val)
|
| 1047 |
+
if col_idx == 1:
|
| 1048 |
+
cell.fill = FILL_CAT_AVG
|
| 1049 |
+
elif col_idx in [6, 7, 8, 9, 12, 13]:
|
| 1050 |
+
cell.fill = FILL_CAT_CAGR
|
| 1051 |
+
else:
|
| 1052 |
+
cell.fill = FILL_WHITE
|
| 1053 |
+
cell.font = FONT_DEFAULT_BOLD
|
| 1054 |
+
cell.border = BORDER_THIN
|
| 1055 |
+
cell.alignment = Alignment(
|
| 1056 |
+
horizontal="right" if col_idx > 2 else "left",
|
| 1057 |
+
vertical="center", wrap_text=(col_idx == 1)
|
| 1058 |
+
)
|
| 1059 |
+
current_row += 1
|
| 1060 |
+
|
| 1061 |
+
# ββ Fund rows βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1062 |
+
fund_start_row = current_row
|
| 1063 |
+
|
| 1064 |
+
top_5_fund_ids = {id(f) for f in sorted_funds[:5]}
|
| 1065 |
+
|
| 1066 |
+
for fund in sorted_funds:
|
| 1067 |
+
# 36pt height = comfortable 2-line display for long fund names
|
| 1068 |
+
# without the advisor needing to drag rows on macOS or Windows
|
| 1069 |
+
ws.row_dimensions[current_row].height = 36
|
| 1070 |
+
|
| 1071 |
+
weightage = fund.score or 0
|
| 1072 |
+
score_val = round(weightage, 3)
|
| 1073 |
+
is_top_5 = id(fund) in top_5_fund_ids
|
| 1074 |
+
|
| 1075 |
+
for col_idx, (header, attr, _, _, decimals) in enumerate(XLSX_COLUMNS, start=1):
|
| 1076 |
+
if attr == "weightage":
|
| 1077 |
+
val = score_val
|
| 1078 |
+
cell_font = FONT_DEFAULT_BOLD if is_top_5 else FONT_DEFAULT
|
| 1079 |
+
elif attr:
|
| 1080 |
+
raw_val = getattr(fund, attr, None)
|
| 1081 |
+
if attr in ('name', 'benchmark'):
|
| 1082 |
+
val = raw_val if raw_val else None
|
| 1083 |
+
cell_font = FONT_DEFAULT_BOLD if (col_idx == 1 and is_top_5) else FONT_DEFAULT
|
| 1084 |
+
else:
|
| 1085 |
+
val = _display_numeric_or_na(
|
| 1086 |
+
attr=attr,
|
| 1087 |
+
value=raw_val,
|
| 1088 |
+
row_type="FUND",
|
| 1089 |
+
category=fund.category,
|
| 1090 |
+
fund_obj=fund,
|
| 1091 |
+
fund_name=fund.name,
|
| 1092 |
+
decimals=decimals,
|
| 1093 |
+
)
|
| 1094 |
+
cell_font = FONT_DEFAULT
|
| 1095 |
+
else:
|
| 1096 |
+
val = None
|
| 1097 |
+
cell_font = FONT_DEFAULT
|
| 1098 |
+
|
| 1099 |
+
cell = ws.cell(row=current_row, column=col_idx, value=val)
|
| 1100 |
+
|
| 1101 |
+
if is_top_5 and col_idx == 1:
|
| 1102 |
+
cell.fill = FILL_WEIGHTED_YELLOW
|
| 1103 |
+
elif attr == "weightage":
|
| 1104 |
+
quartile_band = quartile_by_fund_id.get(id(fund))
|
| 1105 |
+
if quartile_band == 0: cell.fill = FILL_QUARTILE_GREEN
|
| 1106 |
+
elif quartile_band == 1: cell.fill = FILL_QUARTILE_YELLOW
|
| 1107 |
+
elif quartile_band == 2: cell.fill = FILL_QUARTILE_ORANGE
|
| 1108 |
+
elif quartile_band == 3: cell.fill = FILL_QUARTILE_RED
|
| 1109 |
+
else: cell.fill = FILL_WHITE
|
| 1110 |
+
else:
|
| 1111 |
+
cell.fill = FILL_WHITE
|
| 1112 |
+
|
| 1113 |
+
cell.font = cell_font
|
| 1114 |
+
cell.border = BORDER_THIN
|
| 1115 |
+
cell.alignment = Alignment(
|
| 1116 |
+
horizontal="left" if col_idx <= 2 else "right",
|
| 1117 |
+
vertical="top", # top-align so wrapped text reads naturally
|
| 1118 |
+
wrap_text=True, # prevents truncation on any screen or zoom level
|
| 1119 |
+
)
|
| 1120 |
+
|
| 1121 |
+
if col_idx == 3: cell.number_format = '0.00%'
|
| 1122 |
+
elif col_idx == 4: cell.number_format = '0.00%'
|
| 1123 |
+
elif attr == "weightage": cell.number_format = '0.000'
|
| 1124 |
+
|
| 1125 |
+
current_row += 1
|
| 1126 |
+
|
| 1127 |
+
# Apply conditional formatting to this section's fund rows
|
| 1128 |
+
fund_end_row = current_row - 1
|
| 1129 |
+
if fund_end_row >= fund_start_row and cat_avg_vals:
|
| 1130 |
+
_apply_conditional_formatting(ws, fund_start_row, fund_end_row, cat_avg_vals)
|
| 1131 |
+
|
| 1132 |
+
wb.save(str(output_path))
|
| 1133 |
+
if na_audit_rows:
|
| 1134 |
+
audit_path = output_path.with_name(f"{output_path.stem}_na_audit.txt")
|
| 1135 |
+
lines = [
|
| 1136 |
+
"NA AUDIT TRACE",
|
| 1137 |
+
f"Generated: {datetime.now().isoformat()}",
|
| 1138 |
+
"Columns: row_type<TAB>category<TAB>fund_name<TAB>metric_attr<TAB>reason",
|
| 1139 |
+
"-" * 80,
|
| 1140 |
+
*na_audit_rows,
|
| 1141 |
+
]
|
| 1142 |
+
audit_path.write_text("\n".join(lines), encoding="utf-8")
|
| 1143 |
+
print(f"NA audit trace written: {audit_path}")
|
| 1144 |
+
return str(output_path)
|
| 1145 |
+
|
| 1146 |
+
|
| 1147 |
+
def _avg(values: List[Optional[float]]) -> Optional[float]:
|
| 1148 |
+
"""Compute average of non-None values."""
|
| 1149 |
+
valid = [v for v in values if v is not None]
|
| 1150 |
+
if not valid:
|
| 1151 |
+
return None
|
| 1152 |
+
return round(sum(valid) / len(valid), 2)
|
| 1153 |
+
|
| 1154 |
+
|
| 1155 |
+
# βββ Pipeline entry ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1156 |
+
|
| 1157 |
+
def run_data_engine(csv_path: str,
|
| 1158 |
+
output_path: str = "output/fund_analysis.xlsx",
|
| 1159 |
+
use_comprehensive_scoring: bool = True) -> List[Fund]:
|
| 1160 |
+
"""
|
| 1161 |
+
Full pipeline: load -> score -> export Excel.
|
| 1162 |
+
|
| 1163 |
+
Args:
|
| 1164 |
+
csv_path: Path to the fund-stats CSV file
|
| 1165 |
+
output_path: Path to save the output Excel file
|
| 1166 |
+
use_comprehensive_scoring: If True, uses AI-suggested model (10-point scale with Top/Bottom 10).
|
| 1167 |
+
If False, uses legacy CAGR-based weightage.
|
| 1168 |
+
"""
|
| 1169 |
+
print(f"Loading fund data from: {csv_path}")
|
| 1170 |
+
funds, bm_data, cat_avg_data, ref_fund_weightages = load_fund_csv(csv_path)
|
| 1171 |
+
print(f" Loaded {len(funds)} fund schemes")
|
| 1172 |
+
|
| 1173 |
+
# Proactively fix zero / missing drawdown cells using live NAV history
|
| 1174 |
+
# so Maximum Drawdown can participate in scoring instead of staying at 0.
|
| 1175 |
+
try:
|
| 1176 |
+
fixed_mdd = drawdown_zero_fix(funds, verbose=True)
|
| 1177 |
+
if fixed_mdd:
|
| 1178 |
+
print(f" Fixed {fixed_mdd} zero/missing drawdown cells via NAV engine")
|
| 1179 |
+
except Exception as exc:
|
| 1180 |
+
print(f" WARNING: drawdown_zero_fix failed: {exc}")
|
| 1181 |
+
|
| 1182 |
+
if use_comprehensive_scoring:
|
| 1183 |
+
# Use AI-suggested model (10-point scale)
|
| 1184 |
+
print(" Using AI-suggested scoring model (10-point scale with Top/Bottom 10)...")
|
| 1185 |
+
|
| 1186 |
+
# Import and use the new compute_scores function
|
| 1187 |
+
funds = compute_scores(funds)
|
| 1188 |
+
|
| 1189 |
+
# Copy score to weightage field for Excel export compatibility
|
| 1190 |
+
for fund in funds:
|
| 1191 |
+
fund.weightage = int(round(fund.score)) if fund.score else 0
|
| 1192 |
+
|
| 1193 |
+
with_highlight = sum(1 for f in funds if (f.score or 0) > 8)
|
| 1194 |
+
print(f" Calculated AI-suggested weightage. {with_highlight} funds have score > 8")
|
| 1195 |
+
else:
|
| 1196 |
+
# Use legacy CAGR-based weightage
|
| 1197 |
+
print(" Using legacy CAGR-based weightage...")
|
| 1198 |
+
for fund in funds:
|
| 1199 |
+
cat_avg_vals = cat_avg_data.get(fund.category, {})
|
| 1200 |
+
fund.weightage = _calculate_weightage(fund, cat_avg_vals)
|
| 1201 |
+
fund.score = float(fund.weightage)
|
| 1202 |
+
|
| 1203 |
+
with_highlight = sum(1 for f in funds if (f.weightage or 0) > 8)
|
| 1204 |
+
print(f" Calculated weightage. {with_highlight} funds have weightage > 8")
|
| 1205 |
+
|
| 1206 |
+
print(f"Exporting processed Excel to: {output_path}")
|
| 1207 |
+
path = export_excel(funds, output_path, bm_data, cat_avg_data)
|
| 1208 |
+
print(f"Done! Saved: {path}")
|
| 1209 |
+
|
| 1210 |
+
return funds
|
src/index_fund_ingest.py
ADDED
|
@@ -0,0 +1,354 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Index Fund Ingest β capture index funds the same way as raw CSV (mftool/AMFI).
|
| 3 |
+
|
| 4 |
+
Two sources:
|
| 5 |
+
- mftool (default): Same as raw CSV under PS β AMFI category 38 (Index Funds/ETFs).
|
| 6 |
+
Returns only the schemes AMFI lists under that category (curated, ~same count as
|
| 7 |
+
your fund-stats CSV Index Fund section). Output format matches PS: "Index Fund",
|
| 8 |
+
hyphenated fund names.
|
| 9 |
+
- mfapi: Search mfapi.in and filter by index; use when you need more schemes.
|
| 10 |
+
|
| 11 |
+
Usage:
|
| 12 |
+
python -m src.index_fund_ingest [--output index_funds.csv] # default: mftool
|
| 13 |
+
python -m src.index_fund_ingest --source mfapi [--limit 100] # mfapi search
|
| 14 |
+
Then: enrich the output CSV, merge into main fund CSV, run data_engine as usual.
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
from __future__ import annotations
|
| 18 |
+
|
| 19 |
+
import argparse
|
| 20 |
+
import csv
|
| 21 |
+
import re
|
| 22 |
+
import time
|
| 23 |
+
from datetime import datetime, timedelta
|
| 24 |
+
from pathlib import Path
|
| 25 |
+
|
| 26 |
+
import requests
|
| 27 |
+
|
| 28 |
+
# Same AMFI gateway as mftool (get_open_ended_other_scheme_performance)
|
| 29 |
+
AMFI_FUND_PERFORMANCE_URL = "https://www.amfiindia.com/gateway/pollingsebi/api/amfi/fundperformance"
|
| 30 |
+
AMFI_CATEGORY_OTHER = 5
|
| 31 |
+
AMFI_SUBCATEGORY_INDEX_FUNDS = 38 # "Index Funds/ETFs"
|
| 32 |
+
|
| 33 |
+
MFAPI_LIST = "https://api.mfapi.in/mf"
|
| 34 |
+
MFAPI_SEARCH = "https://api.mfapi.in/mf/search"
|
| 35 |
+
MFAPI_NAV = "https://api.mfapi.in/mf/{scheme_code}"
|
| 36 |
+
SLEEP = 0.3 # polite delay between API calls
|
| 37 |
+
|
| 38 |
+
# CSV headers matching project fund-stats CSV (must match data_engine / csv_enrichment)
|
| 39 |
+
FUND_CSV_HEADERS = [
|
| 40 |
+
"Fund", "Category", "Scheme Code", "Launch Date", "Total Assets (in Cr)",
|
| 41 |
+
"TER", "Turn over (%)", "CAGR Since Inception",
|
| 42 |
+
"1 Year CAGR", "1 Year Category CAGR", "1 Year Benchmark CAGR",
|
| 43 |
+
"3 Years CAGR", "3 Years Category CAGR", "3 Years Benchmark CAGR",
|
| 44 |
+
"5 Years CAGR", "5 Years Category CAGR", "5 Years Benchmark CAGR",
|
| 45 |
+
"10 Years CAGR", "10 Years Category CAGR", "10 Years Benchmark CAGR",
|
| 46 |
+
"Benchmark Type", "NAV", "Alpha", "Beta", "Standard Deviation",
|
| 47 |
+
"Sharpe Ratio", "Volatility", "Mean", "Sortino Ratio",
|
| 48 |
+
"Up Market Capture\nRatio", "Down Market Capture\nRatio",
|
| 49 |
+
"Maximum Drawdown", "R-Squared", "Information Ratio", "P/E Ratio", "P/B Ratio",
|
| 50 |
+
]
|
| 51 |
+
|
| 52 |
+
# Raw CSV under PS uses "Index Fund" (no "Equity:" prefix) for this category
|
| 53 |
+
INDEX_FUND_CATEGORY_PS = "Index Fund"
|
| 54 |
+
|
| 55 |
+
# mfapi scheme_category (from NAV meta) -> our Category label
|
| 56 |
+
CATEGORY_MAP = {
|
| 57 |
+
"index fund": "Equity: Index Fund",
|
| 58 |
+
"index funds": "Equity: Index Fund",
|
| 59 |
+
"equity scheme - index fund": "Equity: Index Fund",
|
| 60 |
+
"equity scheme - index funds": "Equity: Index Fund",
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def _to_hyphenated(name: str) -> str:
|
| 65 |
+
"""Convert scheme name to hyphenated form like raw CSV under PS (e.g. DSP-Nifty-50-Index-Fund-Regular-Plan-Growth)."""
|
| 66 |
+
if not name:
|
| 67 |
+
return ""
|
| 68 |
+
# Replace spaces and multiple hyphens with single hyphen, strip
|
| 69 |
+
s = re.sub(r"[\s_]+", "-", name.strip())
|
| 70 |
+
return re.sub(r"-+", "-", s).strip("-")
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def _get_amfi_report_date() -> str:
|
| 74 |
+
"""DD-MMM-YYYY for AMFI API. Use last weekday (API returns empty for weekend dates)."""
|
| 75 |
+
today = datetime.now().date()
|
| 76 |
+
d = today
|
| 77 |
+
for _ in range(7):
|
| 78 |
+
if d.weekday() < 5: # Mon=0 .. Fri=4
|
| 79 |
+
break
|
| 80 |
+
d -= timedelta(days=1)
|
| 81 |
+
return d.strftime("%d-%b-%Y")
|
| 82 |
+
|
| 83 |
+
# Scheme name fragments -> Benchmark Type (for nav_metrics_engine)
|
| 84 |
+
# Order matters: more specific (e.g. Nifty 500) before generic (Nifty 50)
|
| 85 |
+
BENCHMARK_INFER = [
|
| 86 |
+
(r"nifty\s*500|nifty500", "Nifty 500"),
|
| 87 |
+
(r"nifty\s*200|nifty200", "Nifty 200"),
|
| 88 |
+
(r"nifty\s*100|nifty100", "Nifty 100"),
|
| 89 |
+
(r"nifty\s*next\s*50|nifty\s*junior|niftyjr", "Nifty Next 50"),
|
| 90 |
+
(r"nifty\s*50|nifty50", "Nifty 50"),
|
| 91 |
+
(r"nifty\s*midcap\s*150|midcap\s*150", "Nifty Midcap 150"),
|
| 92 |
+
(r"nifty\s*smallcap\s*250|smallcap\s*250", "Nifty Smallcap 250"),
|
| 93 |
+
(r"sensex|bse\s*sensex", "BSE Sensex"),
|
| 94 |
+
(r"bse\s*100", "BSE 100"),
|
| 95 |
+
(r"bse\s*500", "BSE 500"),
|
| 96 |
+
]
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def _normalize_category(meta_category: str | None) -> str:
|
| 100 |
+
if not meta_category:
|
| 101 |
+
return "Equity: Index Fund"
|
| 102 |
+
key = meta_category.strip().lower()
|
| 103 |
+
for k, v in CATEGORY_MAP.items():
|
| 104 |
+
if k in key:
|
| 105 |
+
return v
|
| 106 |
+
if "index" in key:
|
| 107 |
+
return "Equity: Index Fund"
|
| 108 |
+
return meta_category.strip()
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
def _infer_benchmark(scheme_name: str) -> str:
|
| 112 |
+
name = (scheme_name or "").lower()
|
| 113 |
+
for pattern, bench in BENCHMARK_INFER:
|
| 114 |
+
if re.search(pattern, name):
|
| 115 |
+
return bench
|
| 116 |
+
return "Nifty 50" # safe default for index funds
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
def _search_mfapi(query: str, limit: int = 200) -> list[dict]:
|
| 120 |
+
"""Return list of {schemeCode, schemeName} from mfapi search."""
|
| 121 |
+
try:
|
| 122 |
+
resp = requests.get(MFAPI_SEARCH, params={"q": query}, timeout=15)
|
| 123 |
+
resp.raise_for_status()
|
| 124 |
+
data = resp.json()
|
| 125 |
+
if isinstance(data, list):
|
| 126 |
+
return data[:limit]
|
| 127 |
+
return []
|
| 128 |
+
except Exception as e:
|
| 129 |
+
print(f" [search] error for '{query}': {e}")
|
| 130 |
+
return []
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
def _fetch_nav_meta(scheme_code: str) -> dict | None:
|
| 134 |
+
"""Fetch NAV endpoint and return meta only (scheme_name, scheme_category)."""
|
| 135 |
+
url = MFAPI_NAV.format(scheme_code=scheme_code)
|
| 136 |
+
try:
|
| 137 |
+
resp = requests.get(url, params={"limit": 1}, timeout=15)
|
| 138 |
+
resp.raise_for_status()
|
| 139 |
+
data = resp.json()
|
| 140 |
+
meta = data.get("meta") or {}
|
| 141 |
+
return {
|
| 142 |
+
"scheme_name": meta.get("scheme_name") or "",
|
| 143 |
+
"scheme_category": meta.get("scheme_category") or "",
|
| 144 |
+
"fund_house": meta.get("fund_house") or "",
|
| 145 |
+
}
|
| 146 |
+
except Exception as e:
|
| 147 |
+
print(f" [nav meta] {scheme_code}: {e}")
|
| 148 |
+
return None
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
def get_index_funds_via_mftool(verbose: bool = True) -> list[dict]:
|
| 152 |
+
"""
|
| 153 |
+
Fetch index funds from the same AMFI API used by mftool (category 5, subCategory 38).
|
| 154 |
+
Returns the same curated list as would appear in the raw CSV under PS β not 10k schemes.
|
| 155 |
+
Each item: scheme_name, benchmark_type. Scheme code is left blank; enrichment will resolve.
|
| 156 |
+
"""
|
| 157 |
+
out: list[dict] = []
|
| 158 |
+
base_date = datetime.now().date()
|
| 159 |
+
for day_back in range(8): # try up to 8 days back to get a date with data
|
| 160 |
+
d = base_date - timedelta(days=day_back)
|
| 161 |
+
if d.weekday() >= 5: # skip weekend
|
| 162 |
+
continue
|
| 163 |
+
report_date = d.strftime("%d-%b-%Y")
|
| 164 |
+
payload = {
|
| 165 |
+
"maturityType": 1,
|
| 166 |
+
"category": AMFI_CATEGORY_OTHER,
|
| 167 |
+
"subCategory": AMFI_SUBCATEGORY_INDEX_FUNDS,
|
| 168 |
+
"mfid": 0,
|
| 169 |
+
"reportDate": report_date,
|
| 170 |
+
}
|
| 171 |
+
try:
|
| 172 |
+
resp = requests.post(
|
| 173 |
+
AMFI_FUND_PERFORMANCE_URL,
|
| 174 |
+
headers={"User-Agent": "Mozilla/5.0"},
|
| 175 |
+
json=payload,
|
| 176 |
+
timeout=25,
|
| 177 |
+
)
|
| 178 |
+
resp.raise_for_status()
|
| 179 |
+
data = resp.json()
|
| 180 |
+
raw_list = data.get("data") or []
|
| 181 |
+
for item in raw_list:
|
| 182 |
+
name = (item.get("schemeName") or "").strip()
|
| 183 |
+
if not name:
|
| 184 |
+
continue
|
| 185 |
+
# Exclude ETFs so we match raw CSV (Index Fund section has open-ended funds only)
|
| 186 |
+
if " ETF" in name or name.endswith(" ETF"):
|
| 187 |
+
continue
|
| 188 |
+
benchmark = (item.get("benchmark") or "").strip() or "Nifty 50"
|
| 189 |
+
out.append({
|
| 190 |
+
"scheme_name": name,
|
| 191 |
+
"benchmark_type": benchmark,
|
| 192 |
+
"scheme_code": "", # AMFI API doesn't return code; enrichment resolves
|
| 193 |
+
"category": INDEX_FUND_CATEGORY_PS,
|
| 194 |
+
})
|
| 195 |
+
if out:
|
| 196 |
+
if verbose:
|
| 197 |
+
print(f"[mftool] AMFI category 38 (Index Funds/ETFs): {len(out)} schemes (report date {report_date})")
|
| 198 |
+
break
|
| 199 |
+
except Exception as e:
|
| 200 |
+
if verbose and day_back == 0:
|
| 201 |
+
print(f"[mftool] AMFI request failed for {report_date}: {e}")
|
| 202 |
+
continue
|
| 203 |
+
if not out and verbose:
|
| 204 |
+
print("[mftool] No schemes returned (tried several weekdays). Check AMFI API.")
|
| 205 |
+
return out
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
def _is_index_scheme(meta_category: str, scheme_name: str) -> bool:
|
| 209 |
+
"""True if this scheme should be treated as index fund."""
|
| 210 |
+
cat = (meta_category or "").lower()
|
| 211 |
+
name = (scheme_name or "").lower()
|
| 212 |
+
if "index" in cat:
|
| 213 |
+
return True
|
| 214 |
+
if "index" in name and ("fund" in name or "etf" not in name):
|
| 215 |
+
return True
|
| 216 |
+
# Explicit index benchmarks in name
|
| 217 |
+
if re.search(r"nifty\s*50|nifty\s*next\s*50|sensex|nifty\s*100|nifty\s*500", name):
|
| 218 |
+
return True
|
| 219 |
+
return False
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
def discover_index_schemes(
|
| 223 |
+
search_queries: list[str] | None = None,
|
| 224 |
+
limit_per_query: int = 150,
|
| 225 |
+
require_index_category: bool = True,
|
| 226 |
+
verbose: bool = True,
|
| 227 |
+
) -> list[dict]:
|
| 228 |
+
"""
|
| 229 |
+
Discover index fund schemes via mfapi search and NAV meta.
|
| 230 |
+
|
| 231 |
+
Returns list of dicts: scheme_code, scheme_name, category, benchmark_type.
|
| 232 |
+
"""
|
| 233 |
+
if search_queries is None:
|
| 234 |
+
search_queries = ["Index", "Index Fund", "Nifty 50", "Nifty Next 50", "Sensex"]
|
| 235 |
+
seen_codes: set[int] = set()
|
| 236 |
+
out: list[dict] = []
|
| 237 |
+
|
| 238 |
+
for q in search_queries:
|
| 239 |
+
if verbose:
|
| 240 |
+
print(f"[discover] search q={q!r} β¦")
|
| 241 |
+
candidates = _search_mfapi(q, limit=limit_per_query)
|
| 242 |
+
for item in candidates:
|
| 243 |
+
code = item.get("schemeCode")
|
| 244 |
+
if code is None or code in seen_codes:
|
| 245 |
+
continue
|
| 246 |
+
name = item.get("schemeName") or ""
|
| 247 |
+
time.sleep(SLEEP)
|
| 248 |
+
meta = _fetch_nav_meta(str(code))
|
| 249 |
+
if not meta:
|
| 250 |
+
continue
|
| 251 |
+
cat = meta.get("scheme_category") or ""
|
| 252 |
+
if require_index_category and not _is_index_scheme(cat, name):
|
| 253 |
+
continue
|
| 254 |
+
seen_codes.add(code)
|
| 255 |
+
category = _normalize_category(cat)
|
| 256 |
+
benchmark = _infer_benchmark(meta.get("scheme_name") or name)
|
| 257 |
+
out.append({
|
| 258 |
+
"scheme_code": str(code),
|
| 259 |
+
"scheme_name": meta.get("scheme_name") or name,
|
| 260 |
+
"category": category,
|
| 261 |
+
"benchmark_type": benchmark,
|
| 262 |
+
})
|
| 263 |
+
if verbose:
|
| 264 |
+
print(f" + {meta.get('scheme_name', name)[:55]} | {category} | {benchmark}")
|
| 265 |
+
return out
|
| 266 |
+
|
| 267 |
+
|
| 268 |
+
def write_fund_csv(rows: list[dict], path: str | Path) -> None:
|
| 269 |
+
"""Write CSV with FUND_CSV_HEADERS; each row is a dict with those keys (blank = '')."""
|
| 270 |
+
path = Path(path)
|
| 271 |
+
path.parent.mkdir(parents=True, exist_ok=True)
|
| 272 |
+
with open(path, "w", encoding="utf-8-sig", newline="") as f:
|
| 273 |
+
w = csv.DictWriter(f, fieldnames=FUND_CSV_HEADERS, restval="", extrasaction="ignore")
|
| 274 |
+
w.writeheader()
|
| 275 |
+
w.writerows(rows)
|
| 276 |
+
|
| 277 |
+
|
| 278 |
+
def build_csv_rows(schemes: list[dict], use_ps_format: bool = False) -> list[dict]:
|
| 279 |
+
"""Convert discover output to CSV row dicts (metrics blank).
|
| 280 |
+
use_ps_format: when True, Fund = hyphenated name, Category = 'Index Fund' (matches raw CSV under PS).
|
| 281 |
+
"""
|
| 282 |
+
rows = []
|
| 283 |
+
for s in schemes:
|
| 284 |
+
row = {h: "" for h in FUND_CSV_HEADERS}
|
| 285 |
+
name = s.get("scheme_name") or ""
|
| 286 |
+
row["Fund"] = _to_hyphenated(name) if use_ps_format else name.replace(",", " ")
|
| 287 |
+
row["Category"] = s.get("category") or ("Index Fund" if use_ps_format else "Equity: Index Fund")
|
| 288 |
+
row["Scheme Code"] = s.get("scheme_code") or ""
|
| 289 |
+
row["Benchmark Type"] = s.get("benchmark_type") or "Nifty 50"
|
| 290 |
+
rows.append(row)
|
| 291 |
+
return rows
|
| 292 |
+
|
| 293 |
+
|
| 294 |
+
def run_ingest(
|
| 295 |
+
output_path: str | Path = "index_funds.csv",
|
| 296 |
+
source: str = "mftool",
|
| 297 |
+
search_queries: list[str] | None = None,
|
| 298 |
+
limit_per_query: int = 150,
|
| 299 |
+
verbose: bool = True,
|
| 300 |
+
) -> tuple[list[dict], Path]:
|
| 301 |
+
"""
|
| 302 |
+
Discover index schemes, build CSV rows, write CSV.
|
| 303 |
+
|
| 304 |
+
source: "mftool" = same as raw CSV (AMFI category 38, curated list). "mfapi" = search mfapi.
|
| 305 |
+
Returns (list of scheme dicts, output path).
|
| 306 |
+
"""
|
| 307 |
+
if source.lower() == "mftool":
|
| 308 |
+
schemes = get_index_funds_via_mftool(verbose=verbose)
|
| 309 |
+
use_ps_format = True
|
| 310 |
+
else:
|
| 311 |
+
schemes = discover_index_schemes(
|
| 312 |
+
search_queries=search_queries,
|
| 313 |
+
limit_per_query=limit_per_query,
|
| 314 |
+
require_index_category=True,
|
| 315 |
+
verbose=verbose,
|
| 316 |
+
)
|
| 317 |
+
use_ps_format = False
|
| 318 |
+
rows = build_csv_rows(schemes, use_ps_format=use_ps_format)
|
| 319 |
+
out = Path(output_path)
|
| 320 |
+
write_fund_csv(rows, out)
|
| 321 |
+
if verbose:
|
| 322 |
+
print(f"\n[ingest] Wrote {len(rows)} rows to {out.absolute()} (source={source})")
|
| 323 |
+
print(" Next: run CSV enrichment on this file, then merge into main fund CSV.")
|
| 324 |
+
return schemes, out
|
| 325 |
+
|
| 326 |
+
|
| 327 |
+
def main() -> None:
|
| 328 |
+
ap = argparse.ArgumentParser(
|
| 329 |
+
description="Index fund ingest β same list as raw CSV (mftool/AMFI) or mfapi search"
|
| 330 |
+
)
|
| 331 |
+
ap.add_argument("--output", "-o", default="index_funds.csv", help="Output CSV path")
|
| 332 |
+
ap.add_argument(
|
| 333 |
+
"--source",
|
| 334 |
+
choices=("mftool", "mfapi"),
|
| 335 |
+
default="mftool",
|
| 336 |
+
help="mftool = AMFI category 38 (same as raw CSV under PS). mfapi = search (more schemes).",
|
| 337 |
+
)
|
| 338 |
+
ap.add_argument("--search", "-s", action="append", default=None,
|
| 339 |
+
help="[mfapi only] Search query (repeatable). Default: Index, Index Fund, ...")
|
| 340 |
+
ap.add_argument("--limit", "-n", type=int, default=150,
|
| 341 |
+
help="[mfapi only] Max schemes per search query")
|
| 342 |
+
ap.add_argument("--quiet", "-q", action="store_true", help="Less output")
|
| 343 |
+
args = ap.parse_args()
|
| 344 |
+
run_ingest(
|
| 345 |
+
output_path=args.output,
|
| 346 |
+
source=args.source,
|
| 347 |
+
search_queries=args.search,
|
| 348 |
+
limit_per_query=args.limit,
|
| 349 |
+
verbose=not args.quiet,
|
| 350 |
+
)
|
| 351 |
+
|
| 352 |
+
|
| 353 |
+
if __name__ == "__main__":
|
| 354 |
+
main()
|
src/models.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Data models for MF Portfolio Analysis Tool.
|
| 3 |
+
"""
|
| 4 |
+
from dataclasses import dataclass, field
|
| 5 |
+
from typing import Optional
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
@dataclass
|
| 9 |
+
class Fund:
|
| 10 |
+
"""Represents a single mutual fund scheme from the fund universe CSV."""
|
| 11 |
+
name: str
|
| 12 |
+
category: str
|
| 13 |
+
benchmark: str
|
| 14 |
+
|
| 15 |
+
# Cost
|
| 16 |
+
ter: Optional[float] = None # Total Expense Ratio (%)
|
| 17 |
+
turnover: Optional[float] = None # Portfolio Turnover (%)
|
| 18 |
+
|
| 19 |
+
# Returns
|
| 20 |
+
mean: Optional[float] = None
|
| 21 |
+
cagr_1y: Optional[float] = None
|
| 22 |
+
cagr_1y_cat: Optional[float] = None
|
| 23 |
+
cagr_1y_bm: Optional[float] = None
|
| 24 |
+
cagr_3y: Optional[float] = None
|
| 25 |
+
cagr_3y_cat: Optional[float] = None
|
| 26 |
+
cagr_3y_bm: Optional[float] = None
|
| 27 |
+
cagr_5y: Optional[float] = None
|
| 28 |
+
cagr_5y_cat: Optional[float] = None
|
| 29 |
+
cagr_5y_bm: Optional[float] = None
|
| 30 |
+
cagr_10y: Optional[float] = None
|
| 31 |
+
cagr_10y_cat: Optional[float] = None
|
| 32 |
+
cagr_10y_bm: Optional[float] = None
|
| 33 |
+
cagr_inception: Optional[float] = None
|
| 34 |
+
nav: Optional[float] = None
|
| 35 |
+
|
| 36 |
+
# Valuation
|
| 37 |
+
pe_ratio: Optional[float] = None
|
| 38 |
+
pb_ratio: Optional[float] = None
|
| 39 |
+
|
| 40 |
+
# Risk metrics
|
| 41 |
+
alpha: Optional[float] = None
|
| 42 |
+
beta: Optional[float] = None
|
| 43 |
+
std_dev: Optional[float] = None
|
| 44 |
+
sharpe: Optional[float] = None
|
| 45 |
+
volatility: Optional[float] = None
|
| 46 |
+
sortino: Optional[float] = None
|
| 47 |
+
up_capture: Optional[float] = None
|
| 48 |
+
down_capture: Optional[float] = None
|
| 49 |
+
max_drawdown: Optional[float] = None
|
| 50 |
+
r_squared: Optional[float] = None
|
| 51 |
+
info_ratio: Optional[float] = None
|
| 52 |
+
aum: Optional[float] = None
|
| 53 |
+
fill_status: Optional[str] = None
|
| 54 |
+
|
| 55 |
+
# Scoring (computed)
|
| 56 |
+
score: Optional[float] = None
|
| 57 |
+
rank_in_category: Optional[int] = None
|
| 58 |
+
is_top_quartile: bool = False
|
| 59 |
+
weightage: Optional[int] = None # Number of periods beating benchmark
|
| 60 |
+
order: int = 0 # Preserves original CSV insertion order for sort tiebreaker
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
@dataclass
|
| 64 |
+
class ClientHolding:
|
| 65 |
+
"""Represents a single mutual fund holding in a client's portfolio."""
|
| 66 |
+
scheme_name: str
|
| 67 |
+
current_value: float
|
| 68 |
+
invested_amount: Optional[float] = None
|
| 69 |
+
sip_amount: Optional[float] = None
|
| 70 |
+
sip_frequency: Optional[str] = None # Monthly / Quarterly etc.
|
| 71 |
+
|
| 72 |
+
# Matched fund data
|
| 73 |
+
fund: Optional[Fund] = None
|
| 74 |
+
|
| 75 |
+
# Computed
|
| 76 |
+
allocation_pct: float = 0.0
|
| 77 |
+
xirr: Optional[float] = None
|
| 78 |
+
is_underperforming: bool = False
|
| 79 |
+
|
| 80 |
+
# Advisory
|
| 81 |
+
suggested_fund: Optional[Fund] = None
|
| 82 |
+
switch_reason: Optional[str] = None
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
@dataclass
|
| 86 |
+
class Client:
|
| 87 |
+
"""Client details."""
|
| 88 |
+
name: str
|
| 89 |
+
age: Optional[int] = None
|
| 90 |
+
email: Optional[str] = None
|
| 91 |
+
mobile: Optional[str] = None
|
| 92 |
+
pan: Optional[str] = None
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
@dataclass
|
| 96 |
+
class Advisor:
|
| 97 |
+
"""Financial advisor details."""
|
| 98 |
+
name: str = "RAVICHANDRAN"
|
| 99 |
+
phone: str = "9281364703"
|
| 100 |
+
email: str = "c4c.ravi@gmail.com"
|
| 101 |
+
arn: str = "ARN-243354"
|
| 102 |
+
location: str = "Chennai"
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
@dataclass
|
| 106 |
+
class PortfolioReport:
|
| 107 |
+
"""The complete portfolio analysis report for a client."""
|
| 108 |
+
client: Client
|
| 109 |
+
advisor: Advisor
|
| 110 |
+
holdings: list = field(default_factory=list)
|
| 111 |
+
|
| 112 |
+
# Portfolio-level metrics
|
| 113 |
+
total_current_value: float = 0.0
|
| 114 |
+
total_invested: float = 0.0
|
| 115 |
+
unrealized_gain: float = 0.0
|
| 116 |
+
portfolio_xirr: Optional[float] = None
|
| 117 |
+
sharpe: Optional[float] = None
|
| 118 |
+
alpha: Optional[float] = None
|
| 119 |
+
beta: Optional[float] = None
|
| 120 |
+
std_dev: Optional[float] = None
|
| 121 |
+
|
| 122 |
+
# Exposure warnings
|
| 123 |
+
amc_exposure: dict = field(default_factory=dict) # AMC -> pct
|
| 124 |
+
scheme_exposure: dict = field(default_factory=dict) # scheme -> pct
|
| 125 |
+
exposure_warnings: list = field(default_factory=list) # list of warning strings
|
| 126 |
+
|
| 127 |
+
# Allocation
|
| 128 |
+
market_cap_allocation: dict = field(default_factory=dict) # Large/Mid/Small/Other -> pct
|
| 129 |
+
sector_allocation: dict = field(default_factory=dict) # sector -> pct
|
| 130 |
+
|
| 131 |
+
# Wealth projection
|
| 132 |
+
wealth_projection: dict = field(default_factory=dict) # years -> projected value
|
src/nav_metrics_engine.py
ADDED
|
@@ -0,0 +1,1005 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
import sqlite3
|
| 5 |
+
import threading
|
| 6 |
+
import time
|
| 7 |
+
from dataclasses import dataclass, field
|
| 8 |
+
from datetime import datetime
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
from typing import Any
|
| 11 |
+
|
| 12 |
+
import numpy as np
|
| 13 |
+
import pandas as pd
|
| 14 |
+
import requests
|
| 15 |
+
import yfinance as yf
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
TRADING_DAYS = 252
|
| 19 |
+
RF_RATE = 0.06
|
| 20 |
+
TRAILING_YEARS = 3
|
| 21 |
+
NAV_STALE_DAYS = 30
|
| 22 |
+
|
| 23 |
+
# ββ Disk cache config βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 24 |
+
# NAV history is refreshed if older than 7 days; benchmark index once a day.
|
| 25 |
+
_CACHE_DB_PATH = Path.home() / ".mf_nav_cache.db"
|
| 26 |
+
_NAV_TTL_SECS = 7 * 86_400 # 7 days
|
| 27 |
+
_BENCH_TTL_SECS = 1 * 86_400 # 1 day
|
| 28 |
+
_DB_LOCK = threading.Lock() # one writer at a time across threads
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
OUTPUT_METRICS: tuple[str, ...] = (
|
| 32 |
+
"Alpha",
|
| 33 |
+
"Beta",
|
| 34 |
+
"Standard Deviation",
|
| 35 |
+
"Volatility",
|
| 36 |
+
"Mean",
|
| 37 |
+
"Sharpe Ratio",
|
| 38 |
+
"Sortino Ratio",
|
| 39 |
+
"Up Market Capture\nRatio",
|
| 40 |
+
"Down Market Capture\nRatio",
|
| 41 |
+
"Maximum Drawdown",
|
| 42 |
+
"R-Squared",
|
| 43 |
+
"Information Ratio",
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
NAV_ONLY_METRICS: set[str] = {
|
| 47 |
+
"Standard Deviation",
|
| 48 |
+
"Volatility",
|
| 49 |
+
"Mean",
|
| 50 |
+
"Sharpe Ratio",
|
| 51 |
+
"Sortino Ratio",
|
| 52 |
+
"Maximum Drawdown",
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
BENCHMARK_DEPENDENT_METRICS: set[str] = {
|
| 56 |
+
"Alpha",
|
| 57 |
+
"Beta",
|
| 58 |
+
"Up Market Capture\nRatio",
|
| 59 |
+
"Down Market Capture\nRatio",
|
| 60 |
+
"R-Squared",
|
| 61 |
+
"Information Ratio",
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
# Common Indian benchmark labels -> Yahoo Finance ticker
|
| 66 |
+
# Last verified: March 2026
|
| 67 |
+
# ^NIFTYJR was delisted β correct ticker for Nifty Next 50 is now ^NSMIDCP
|
| 68 |
+
BENCHMARK_MAP: dict[str, str] = {
|
| 69 |
+
# ββ Nifty broad indices ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 70 |
+
"nifty 50": "^NSEI",
|
| 71 |
+
"nifty50": "^NSEI",
|
| 72 |
+
"nifty 50 tri": "^NSEI",
|
| 73 |
+
"nifty next 50": "^NSMIDCP",
|
| 74 |
+
"nifty next 50 tri": "^NSMIDCP",
|
| 75 |
+
"nifty junior": "^NSMIDCP",
|
| 76 |
+
"nifty 100": "^CNX100",
|
| 77 |
+
"nifty 100 tri": "^CNX100",
|
| 78 |
+
"nifty 100 (tri)": "^CNX100",
|
| 79 |
+
"nifty 200": "^CNX200",
|
| 80 |
+
"nifty 500": "^CRSLDX",
|
| 81 |
+
"nifty 500 tri": "^CRSLDX",
|
| 82 |
+
"nifty 500 (tri)": "^CRSLDX",
|
| 83 |
+
"nifty500": "^CRSLDX",
|
| 84 |
+
"nifty500 multicap 50:25:25 tri": "NIFTY500_MULTICAP_50_25_25.NS",
|
| 85 |
+
"nifty500 multicap 50:25:25 (tri)": "NIFTY500_MULTICAP_50_25_25.NS",
|
| 86 |
+
"nifty 500 multicap 50:25:25 (tri)": "NIFTY500_MULTICAP_50_25_25.NS",
|
| 87 |
+
"nifty500 multicap momentum quality 50 tri": "NIFTY500_MULTICAP_50_25_25.NS",
|
| 88 |
+
# ββ Nifty midcap / smallcap ββββββββββββββββββββββββββββββββββββββββββββ
|
| 89 |
+
"nifty midcap 150": "NIFTY_MIDCAP_100.NS",
|
| 90 |
+
"nifty midcap 150 tri": "NIFTY_MIDCAP_100.NS",
|
| 91 |
+
"nifty midcap 150 index (tri)": "NIFTY_MIDCAP_100.NS",
|
| 92 |
+
"nifty midcap 100": "NIFTY_MIDCAP_100.NS",
|
| 93 |
+
"nifty midcap 50": "^NSEMDCP50",
|
| 94 |
+
"nifty midcap": "NIFTY_MIDCAP_100.NS",
|
| 95 |
+
"nifty large midcap 250 tri": "NIFTY_LARGEMIDCAP_250.NS",
|
| 96 |
+
"nifty large midcap 250": "NIFTY_LARGEMIDCAP_250.NS",
|
| 97 |
+
"nifty large - midcap 250 index": "NIFTY_LARGEMIDCAP_250.NS",
|
| 98 |
+
"nifty large - midcap 250": "NIFTY_LARGEMIDCAP_250.NS",
|
| 99 |
+
"nifty smallcap 250": "NIFTYSMLCAP250.NS",
|
| 100 |
+
"nifty smallcap 250 tri": "NIFTYSMLCAP250.NS",
|
| 101 |
+
"nifty small cap 250 (tri)": "NIFTYSMLCAP250.NS",
|
| 102 |
+
"nifty smallcap 100": "^CNXSC",
|
| 103 |
+
"nifty smallcap": "NIFTYSMLCAP250.NS",
|
| 104 |
+
# ββ BSE βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 105 |
+
"sensex": "^BSESN",
|
| 106 |
+
"bse sensex": "^BSESN",
|
| 107 |
+
"bse 100": "^BSE100",
|
| 108 |
+
"bse 200": "^BSE100",
|
| 109 |
+
"bse 500": "^BSE500",
|
| 110 |
+
"s&p bse liquid rate index": "^NSEI", # no direct Yahoo ticker; use Nifty as proxy
|
| 111 |
+
# ββ Sector / thematic βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 112 |
+
"nifty bank": "^NSEBANK",
|
| 113 |
+
"nifty bank tri": "^NSEBANK",
|
| 114 |
+
"nifty bank (tri)": "^NSEBANK",
|
| 115 |
+
"nifty private bank": "NIFTY_PVT_BANK.NS",
|
| 116 |
+
"nifty private bank tri": "NIFTY_PVT_BANK.NS",
|
| 117 |
+
"nifty it": "^CNXIT",
|
| 118 |
+
"nifty it tri": "^CNXIT",
|
| 119 |
+
"nifty financial services": "NIFTY_FIN_SERVICE.NS",
|
| 120 |
+
"nifty financial services tri": "NIFTY_FIN_SERVICE.NS",
|
| 121 |
+
"nifty financial services index (tri)": "NIFTY_FIN_SERVICE.NS",
|
| 122 |
+
"nifty financial services ex-bank tri": "NIFTY_FIN_SERVICE.NS",
|
| 123 |
+
"nifty pharma": "^CNXPHARMA",
|
| 124 |
+
"nifty pharma tri": "^CNXPHARMA",
|
| 125 |
+
"nifty healthcare": "NIFTY_HEALTHCARE.NS",
|
| 126 |
+
"nifty healthcare tri": "NIFTY_HEALTHCARE.NS",
|
| 127 |
+
"nifty healthcare tri.": "NIFTY_HEALTHCARE.NS", # trailing dot variant
|
| 128 |
+
"nifty fmcg": "^CNXFMCG",
|
| 129 |
+
"nifty fmcg tri": "^CNXFMCG",
|
| 130 |
+
"nifty infrastructure": "^CNXINFRA",
|
| 131 |
+
"nifty infrastructure tri": "^CNXINFRA",
|
| 132 |
+
"nifty india consumption": "NIFTY_INDIA_CONSUMPTION.NS",
|
| 133 |
+
"nifty india consumption tri": "NIFTY_INDIA_CONSUMPTION.NS",
|
| 134 |
+
"nifty india consumption index (tri)": "NIFTY_INDIA_CONSUMPTION.NS",
|
| 135 |
+
"nifty india manufacturing tri": "NIFTY_INDIA_MANUFACTURING.NS",
|
| 136 |
+
"nifty india defence tri": "NIFTY_INDIA_DEFENCE.NS",
|
| 137 |
+
"nifty housing tri": "NIFTY_HOUSING.NS",
|
| 138 |
+
"nifty cpse tri": "NIFTY_CPSE.NS",
|
| 139 |
+
"nifty mnc tri": "NIFTY_MNC.NS",
|
| 140 |
+
"nifty commodities tri": "^CNXCMDT",
|
| 141 |
+
"nifty 100 esg tri": "NIFTY100_ESG.NS",
|
| 142 |
+
"nifty 100 low volatility 30 tri": "NIFTY100_LOWVOL30.NS",
|
| 143 |
+
"nifty ipo tri": "NIFTY_IPO.NS",
|
| 144 |
+
# ββ Factor / strategy βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 145 |
+
"nifty 200 momentum 30 tri": "NIFTY200_MOMENTUM_30.NS",
|
| 146 |
+
# ββ Debt / liquid / overnight β use Nifty 1D rate / GSec proxies ββββββ
|
| 147 |
+
"nifty 1d rate index": "^NSEI", # overnight / liquid funds; no direct Yahoo
|
| 148 |
+
"nifty 1d rate": "^NSEI",
|
| 149 |
+
"crisil liquid overnight index": "^NSEI",
|
| 150 |
+
"nifty 3 year sdl": "^NSEI",
|
| 151 |
+
"nifty 4-8 yr g-sec index": "^NSEI",
|
| 152 |
+
"nifty composite g-sec index": "^NSEI",
|
| 153 |
+
# ββ Hybrid / balanced βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 154 |
+
# AK = AdvisorKhoj composite benchmarks β no direct Yahoo ticker
|
| 155 |
+
# Mapped to closest equity index proxy based on fund category
|
| 156 |
+
"ak hybrid balanced tri": "^NSEI", # Dynamic Asset Allocation β Nifty 50
|
| 157 |
+
"ak hybrid aggressive tri": "^NSEI", # Aggressive Hybrid β Nifty 50
|
| 158 |
+
"ak hybrid conservative tri": "^NSEI", # Conservative Hybrid β Nifty 50
|
| 159 |
+
"ak multi asset allocation tri": "^CRSLDX", # Multi Asset β Nifty 500
|
| 160 |
+
"ak equity savings tri": "^NSEI", # Equity Savings β Nifty 50
|
| 161 |
+
# ββ Global ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 162 |
+
"msci acwi tri": "URTH", # iShares MSCI ACWI ETF as proxy
|
| 163 |
+
"s&p global 1200 tri": "URTH",
|
| 164 |
+
"nifty 50 arbitrage index": "^NSEI", # arbitrage funds; Nifty proxy
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
# ββ Cache backend: SQLite (local) or Neon/Postgres (production) ββββββββββββββ
|
| 169 |
+
#
|
| 170 |
+
# Your Neon DSN (pooler endpoint β correct for serverless/HuggingFace):
|
| 171 |
+
# postgresql://neondb_owner:npg_b0JC5rvQlGPN@ep-damp-river-advc7q1j-pooler.c-2.us-east-1.aws.neon.tech/neondb?sslmode=require&channel_binding=require
|
| 172 |
+
#
|
| 173 |
+
# How to switch backends (zero code change needed):
|
| 174 |
+
#
|
| 175 |
+
# LOCAL TESTING (SQLite, default β no setup):
|
| 176 |
+
# β Do NOT set DATABASE_URL in your local .env. Uses ~/.mf_nav_cache.db.
|
| 177 |
+
#
|
| 178 |
+
# NEON / HUGGINGFACE SPACES:
|
| 179 |
+
# β Add to your .env OR HuggingFace Space Secret:
|
| 180 |
+
# DATABASE_URL=postgresql://neondb_owner:npg_b0JC5rvQlGPN@ep-damp-river-advc7q1j-pooler.c-2.us-east-1.aws.neon.tech/neondb?sslmode=require&channel_binding=require
|
| 181 |
+
# β Add to requirements.txt:
|
| 182 |
+
# psycopg2-binary
|
| 183 |
+
# β Done. Code detects DATABASE_URL and uses Neon automatically.
|
| 184 |
+
#
|
| 185 |
+
# WHY POOLER ENDPOINT (not direct):
|
| 186 |
+
# HuggingFace Spaces can spin up many workers concurrently.
|
| 187 |
+
# Pooler endpoint (ep-...-pooler.c-2...) handles connection bursts safely.
|
| 188 |
+
# Direct endpoint (ep-... without -pooler) has a hard cap of ~100 connections.
|
| 189 |
+
#
|
| 190 |
+
# WHY channel_binding=require:
|
| 191 |
+
# Your Neon project enforces channel binding. psycopg2 supports it via libpq >= 14.
|
| 192 |
+
# The param is passed through the DSN string β no extra code needed.
|
| 193 |
+
#
|
| 194 |
+
# Table schema (identical for SQLite and Postgres):
|
| 195 |
+
# nav_cache(key TEXT PRIMARY KEY, data TEXT NOT NULL, ts DOUBLE PRECISION NOT NULL)
|
| 196 |
+
|
| 197 |
+
import os as _os
|
| 198 |
+
|
| 199 |
+
_DATABASE_URL = _os.environ.get("DATABASE_URL", "")
|
| 200 |
+
_USE_POSTGRES = bool(_DATABASE_URL)
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
# ββ Thread-local Postgres connection pool βββββββββββββββββββββββββββββββββββββ
|
| 204 |
+
# Opening a new psycopg2 connection per cache query costs ~100-200ms on Neon
|
| 205 |
+
# (TLS handshake + auth). With 12 parallel workers Γ 2 queries/fund Γ 478 funds
|
| 206 |
+
# that is ~1000 round-trips. Fix: one persistent connection per thread, reused
|
| 207 |
+
# across all queries that thread handles.
|
| 208 |
+
import threading as _threading
|
| 209 |
+
_tls = _threading.local()
|
| 210 |
+
|
| 211 |
+
def _get_pg_conn():
|
| 212 |
+
"""
|
| 213 |
+
Return a thread-local persistent Neon connection, creating one if needed.
|
| 214 |
+
Falls back to a fresh connection if the cached one has gone away.
|
| 215 |
+
"""
|
| 216 |
+
import psycopg2 # type: ignore
|
| 217 |
+
|
| 218 |
+
conn = getattr(_tls, "pg_conn", None)
|
| 219 |
+
if conn is not None:
|
| 220 |
+
try:
|
| 221 |
+
# Lightweight liveness check β closed flag or dead socket
|
| 222 |
+
if not conn.closed:
|
| 223 |
+
conn.cursor().execute("SELECT 1")
|
| 224 |
+
return conn
|
| 225 |
+
except Exception:
|
| 226 |
+
pass # Connection is dead β fall through to re-create
|
| 227 |
+
|
| 228 |
+
conn = psycopg2.connect(
|
| 229 |
+
_DATABASE_URL,
|
| 230 |
+
connect_timeout=10,
|
| 231 |
+
keepalives=1,
|
| 232 |
+
keepalives_idle=30,
|
| 233 |
+
keepalives_interval=10,
|
| 234 |
+
keepalives_count=3,
|
| 235 |
+
)
|
| 236 |
+
_tls.pg_conn = conn
|
| 237 |
+
return conn
|
| 238 |
+
|
| 239 |
+
|
| 240 |
+
def _init_cache_db() -> None:
|
| 241 |
+
"""Create cache table if it doesn't exist (idempotent, works for both backends)."""
|
| 242 |
+
if _USE_POSTGRES:
|
| 243 |
+
try:
|
| 244 |
+
conn = _get_pg_conn()
|
| 245 |
+
with conn:
|
| 246 |
+
with conn.cursor() as cur:
|
| 247 |
+
cur.execute("""
|
| 248 |
+
CREATE TABLE IF NOT EXISTS nav_cache (
|
| 249 |
+
key TEXT PRIMARY KEY,
|
| 250 |
+
data TEXT NOT NULL,
|
| 251 |
+
ts DOUBLE PRECISION NOT NULL
|
| 252 |
+
)
|
| 253 |
+
""")
|
| 254 |
+
conn.close()
|
| 255 |
+
except Exception as e:
|
| 256 |
+
print(f"[cache] Postgres init warning: {e}")
|
| 257 |
+
else:
|
| 258 |
+
with _DB_LOCK, sqlite3.connect(_CACHE_DB_PATH) as db:
|
| 259 |
+
db.execute("""
|
| 260 |
+
CREATE TABLE IF NOT EXISTS nav_cache (
|
| 261 |
+
key TEXT PRIMARY KEY,
|
| 262 |
+
data TEXT NOT NULL,
|
| 263 |
+
ts REAL NOT NULL
|
| 264 |
+
)
|
| 265 |
+
""")
|
| 266 |
+
db.commit()
|
| 267 |
+
|
| 268 |
+
|
| 269 |
+
def _cache_get(key: str, ttl: float) -> pd.DataFrame | None:
|
| 270 |
+
"""Return cached DataFrame if fresh, else None. Works for SQLite and Neon."""
|
| 271 |
+
# Check bulk preload first β zero network cost
|
| 272 |
+
if key in _PRELOAD_CACHE:
|
| 273 |
+
return _PRELOAD_CACHE[key]
|
| 274 |
+
try:
|
| 275 |
+
if _USE_POSTGRES:
|
| 276 |
+
conn = _get_pg_conn()
|
| 277 |
+
with conn.cursor() as cur:
|
| 278 |
+
cur.execute(
|
| 279 |
+
"SELECT data, ts FROM nav_cache WHERE key = %s", (key,)
|
| 280 |
+
)
|
| 281 |
+
row = cur.fetchone()
|
| 282 |
+
# Do NOT close β thread-local connection is reused
|
| 283 |
+
else:
|
| 284 |
+
with sqlite3.connect(_CACHE_DB_PATH) as db:
|
| 285 |
+
row = db.execute(
|
| 286 |
+
"SELECT data, ts FROM nav_cache WHERE key = ?", (key,)
|
| 287 |
+
).fetchone()
|
| 288 |
+
|
| 289 |
+
if row and (time.time() - row[1]) < ttl:
|
| 290 |
+
import io as _sio
|
| 291 |
+
return pd.read_json(_sio.StringIO(row[0]), orient="split")
|
| 292 |
+
except Exception:
|
| 293 |
+
pass
|
| 294 |
+
return None
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
def _cache_set(key: str, df: pd.DataFrame) -> None:
|
| 298 |
+
"""Persist DataFrame. Works for SQLite and Neon. Write failures are non-fatal."""
|
| 299 |
+
try:
|
| 300 |
+
serialised = df.to_json(orient="split", date_format="iso")
|
| 301 |
+
if _USE_POSTGRES:
|
| 302 |
+
conn = _get_pg_conn()
|
| 303 |
+
with conn.cursor() as cur:
|
| 304 |
+
cur.execute("""
|
| 305 |
+
INSERT INTO nav_cache (key, data, ts)
|
| 306 |
+
VALUES (%s, %s, %s)
|
| 307 |
+
ON CONFLICT (key) DO UPDATE
|
| 308 |
+
SET data = EXCLUDED.data,
|
| 309 |
+
ts = EXCLUDED.ts
|
| 310 |
+
""", (key, serialised, time.time()))
|
| 311 |
+
conn.commit()
|
| 312 |
+
# Do NOT close β thread-local connection is reused
|
| 313 |
+
else:
|
| 314 |
+
with _DB_LOCK, sqlite3.connect(_CACHE_DB_PATH) as db:
|
| 315 |
+
db.execute(
|
| 316 |
+
"INSERT OR REPLACE INTO nav_cache (key, data, ts) VALUES (?, ?, ?)",
|
| 317 |
+
(key, serialised, time.time()),
|
| 318 |
+
)
|
| 319 |
+
db.commit()
|
| 320 |
+
except Exception:
|
| 321 |
+
pass # cache write failure is non-fatal
|
| 322 |
+
|
| 323 |
+
|
| 324 |
+
# Initialise at import time (fast, idempotent).
|
| 325 |
+
try:
|
| 326 |
+
_init_cache_db()
|
| 327 |
+
except Exception:
|
| 328 |
+
pass
|
| 329 |
+
|
| 330 |
+
|
| 331 |
+
# ββ In-process cache (lives for the duration of one run) βββββββββββββββββββββ
|
| 332 |
+
|
| 333 |
+
@dataclass
|
| 334 |
+
class NavEngineCache:
|
| 335 |
+
"""
|
| 336 |
+
Two-level cache:
|
| 337 |
+
L1 β in-process dict (zero latency within a run, thread-safe via dict GIL)
|
| 338 |
+
L2 β SQLite on disk (persists across runs; TTL-based)
|
| 339 |
+
"""
|
| 340 |
+
nav_history: dict[str, pd.DataFrame | None] = field(default_factory=dict)
|
| 341 |
+
benchmark_history: dict[str, pd.DataFrame | None] = field(default_factory=dict)
|
| 342 |
+
_lock: threading.Lock = field(default_factory=threading.Lock, repr=False)
|
| 343 |
+
|
| 344 |
+
|
| 345 |
+
def _normalize_benchmark_name(name: str) -> str:
|
| 346 |
+
return " ".join((name or "").lower().replace("-", " ").replace("_", " ").split())
|
| 347 |
+
|
| 348 |
+
|
| 349 |
+
def resolve_benchmark_ticker(benchmark_type: str) -> str:
|
| 350 |
+
# Guard against corrupt scraper artifacts (Java object toString strings)
|
| 351 |
+
raw = (benchmark_type or "").strip()
|
| 352 |
+
if raw.startswith("com.") or "@" in raw:
|
| 353 |
+
return "^NSEI" # fallback for corrupt benchmark strings
|
| 354 |
+
normalized = _normalize_benchmark_name(raw)
|
| 355 |
+
if not normalized:
|
| 356 |
+
return "^NSEI"
|
| 357 |
+
if normalized in BENCHMARK_MAP:
|
| 358 |
+
ticker = BENCHMARK_MAP[normalized]
|
| 359 |
+
else:
|
| 360 |
+
ticker = "^NSEI"
|
| 361 |
+
for key, t in BENCHMARK_MAP.items():
|
| 362 |
+
if key in normalized:
|
| 363 |
+
ticker = t
|
| 364 |
+
break
|
| 365 |
+
|
| 366 |
+
# Second-level fallback: some NSE index tickers resolve from BENCHMARK_MAP
|
| 367 |
+
# but are not available on yfinance (delisted/unavailable symbols).
|
| 368 |
+
# Map them to the nearest available proxy so _prewarm_benchmarks doesn't fail.
|
| 369 |
+
_YF_UNAVAILABLE: dict[str, str] = {
|
| 370 |
+
"NIFTY_CPSE.NS": "^NSEI", # PSU index β broad market
|
| 371 |
+
"NIFTYSMLCAP250.NS": "^CNXSC", # Smallcap 250 β Smallcap 100
|
| 372 |
+
"NIFTY_IPO.NS": "^NSEI", # IPO index β no yf equivalent
|
| 373 |
+
"NIFTY200_MOMENTUM_30.NS": "^NSEI", # momentum factor β broad market
|
| 374 |
+
"NIFTY_HOUSING.NS": "^NSEI",
|
| 375 |
+
"NIFTY_LARGEMIDCAP_250.NS": "^NSEI",
|
| 376 |
+
"NIFTY_INDIA_CONSUMPTION.NS": "^NSEI",
|
| 377 |
+
"NIFTY_HEALTHCARE.NS": "^NSEI",
|
| 378 |
+
"NIFTY100_ESG.NS": "^NSEI",
|
| 379 |
+
"NIFTY100_LOWVOL30.NS": "^NSEI",
|
| 380 |
+
"NIFTY_MNC.NS": "^NSEI",
|
| 381 |
+
"NIFTY_INDIA_MANUFACTURING.NS": "^NSEI",
|
| 382 |
+
"NIFTY500_MULTICAP_50_25_25.NS": "^NSEI",
|
| 383 |
+
}
|
| 384 |
+
return _YF_UNAVAILABLE.get(ticker, ticker)
|
| 385 |
+
|
| 386 |
+
|
| 387 |
+
def _safe_float(value: Any) -> float | None:
|
| 388 |
+
if value is None:
|
| 389 |
+
return None
|
| 390 |
+
text = str(value).strip().replace(",", "")
|
| 391 |
+
if text in {"", "-", "β", "N/A", "N/A*", "na", "nan", "None"}:
|
| 392 |
+
return None
|
| 393 |
+
try:
|
| 394 |
+
return float(text)
|
| 395 |
+
except ValueError:
|
| 396 |
+
return None
|
| 397 |
+
|
| 398 |
+
|
| 399 |
+
def _request_json_with_retries(
|
| 400 |
+
url: str, max_retries: int = 3, timeout: int = 20
|
| 401 |
+
) -> dict[str, Any] | None:
|
| 402 |
+
for attempt in range(1, max_retries + 1):
|
| 403 |
+
try:
|
| 404 |
+
resp = requests.get(url, timeout=timeout)
|
| 405 |
+
resp.raise_for_status()
|
| 406 |
+
return resp.json()
|
| 407 |
+
except Exception:
|
| 408 |
+
if attempt == max_retries:
|
| 409 |
+
return None
|
| 410 |
+
return None
|
| 411 |
+
|
| 412 |
+
|
| 413 |
+
# ββ Bulk preload cache ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 414 |
+
# Populated once before parallel workers start. _cache_get checks here first,
|
| 415 |
+
# avoiding per-fund Neon round-trips on warm cache runs.
|
| 416 |
+
_PRELOAD_CACHE: dict[str, "pd.DataFrame"] = {}
|
| 417 |
+
|
| 418 |
+
|
| 419 |
+
def _bulk_preload_cache(scheme_codes: list[str], benchmark_tickers: list[str]) -> None:
|
| 420 |
+
"""
|
| 421 |
+
Load ALL nav + benchmark entries from Neon in 2 SQL queries.
|
| 422 |
+
Call once before ThreadPoolExecutor starts β cuts Neon queries from ~766 to 2.
|
| 423 |
+
SQLite is local/fast so skipped.
|
| 424 |
+
"""
|
| 425 |
+
import io as _sio
|
| 426 |
+
global _PRELOAD_CACHE
|
| 427 |
+
|
| 428 |
+
if not _USE_POSTGRES:
|
| 429 |
+
return
|
| 430 |
+
|
| 431 |
+
nav_keys = [f"nav:{c}" for c in scheme_codes if c]
|
| 432 |
+
bench_keys = [f"bench:{t}" for t in benchmark_tickers if t]
|
| 433 |
+
all_keys = nav_keys + bench_keys
|
| 434 |
+
if not all_keys:
|
| 435 |
+
return
|
| 436 |
+
|
| 437 |
+
try:
|
| 438 |
+
conn = _get_pg_conn()
|
| 439 |
+
now = time.time()
|
| 440 |
+
placeholders = ",".join(["%s"] * len(all_keys))
|
| 441 |
+
with conn.cursor() as cur:
|
| 442 |
+
cur.execute(
|
| 443 |
+
f"SELECT key, data, ts FROM nav_cache WHERE key IN ({placeholders})",
|
| 444 |
+
all_keys,
|
| 445 |
+
)
|
| 446 |
+
rows_fetched = cur.fetchall()
|
| 447 |
+
|
| 448 |
+
loaded_nav = loaded_bench = 0
|
| 449 |
+
for key, data, ts in rows_fetched:
|
| 450 |
+
ttl = _NAV_TTL_SECS if key.startswith("nav:") else _BENCH_TTL_SECS
|
| 451 |
+
if (now - ts) >= ttl:
|
| 452 |
+
continue
|
| 453 |
+
try:
|
| 454 |
+
df = pd.read_json(_sio.StringIO(data), orient="split")
|
| 455 |
+
# Normalise dates β JSON round-trip strips tz info
|
| 456 |
+
if "date" in df.columns:
|
| 457 |
+
df["date"] = pd.to_datetime(df["date"]).dt.tz_localize(None).dt.normalize()
|
| 458 |
+
except Exception:
|
| 459 |
+
continue
|
| 460 |
+
_PRELOAD_CACHE[key] = df
|
| 461 |
+
if key.startswith("nav:"):
|
| 462 |
+
loaded_nav += 1
|
| 463 |
+
else:
|
| 464 |
+
loaded_bench += 1
|
| 465 |
+
|
| 466 |
+
print(f"[cache] Bulk preload: {loaded_nav} NAV + {loaded_bench} benchmark entries from Neon")
|
| 467 |
+
|
| 468 |
+
except Exception as e:
|
| 469 |
+
print(f"[cache] Bulk preload failed (falling back to per-query): {e}")
|
| 470 |
+
|
| 471 |
+
|
| 472 |
+
def _prewarm_benchmarks(benchmark_tickers: list[str]) -> None:
|
| 473 |
+
"""
|
| 474 |
+
Download all unique benchmark tickers in parallel BEFORE workers start.
|
| 475 |
+
Complexity: O(B) time where B = unique benchmarks (68 in production).
|
| 476 |
+
Each already-cached ticker hits _PRELOAD_CACHE in O(1) β zero network.
|
| 477 |
+
Each cold ticker downloads once via yfinance and is stored in Neon + _PRELOAD_CACHE.
|
| 478 |
+
Workers then get O(1) cache hits for all benchmark lookups.
|
| 479 |
+
"""
|
| 480 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 481 |
+
|
| 482 |
+
unique = list(dict.fromkeys(t for t in benchmark_tickers if t)) # preserve order, dedup
|
| 483 |
+
if not unique:
|
| 484 |
+
return
|
| 485 |
+
|
| 486 |
+
# Filter: only fetch tickers not already in preload cache
|
| 487 |
+
cold = [t for t in unique if f"bench:{t}" not in _PRELOAD_CACHE]
|
| 488 |
+
warm = len(unique) - len(cold)
|
| 489 |
+
if warm:
|
| 490 |
+
print(f"[bench-prewarm] {warm}/{len(unique)} already in cache")
|
| 491 |
+
if not cold:
|
| 492 |
+
return
|
| 493 |
+
|
| 494 |
+
print(f"[bench-prewarm] Downloading {len(cold)} cold benchmark tickers in parallelβ¦")
|
| 495 |
+
|
| 496 |
+
def _fetch_one(ticker: str) -> tuple[str, bool]:
|
| 497 |
+
df = _fetch_benchmark_history(ticker) # handles cache_set + _PRELOAD_CACHE population
|
| 498 |
+
return ticker, df is not None
|
| 499 |
+
|
| 500 |
+
ok = failed = 0
|
| 501 |
+
with ThreadPoolExecutor(max_workers=min(len(cold), 20)) as ex:
|
| 502 |
+
futures = {ex.submit(_fetch_one, t): t for t in cold}
|
| 503 |
+
for fut in as_completed(futures):
|
| 504 |
+
ticker, success = fut.result()
|
| 505 |
+
if success:
|
| 506 |
+
ok += 1
|
| 507 |
+
else:
|
| 508 |
+
failed += 1
|
| 509 |
+
print(f" [bench-prewarm] WARN: could not fetch {ticker}")
|
| 510 |
+
|
| 511 |
+
print(f"[bench-prewarm] Done: {ok} fetched, {failed} failed, {warm} from cache")
|
| 512 |
+
|
| 513 |
+
|
| 514 |
+
def _fetch_nav_history(scheme_code: str) -> pd.DataFrame | None:
|
| 515 |
+
"""Fetch from Neon cache first, then mfapi."""
|
| 516 |
+
cache_key = f"nav:{scheme_code}"
|
| 517 |
+
cached = _cache_get(cache_key, _NAV_TTL_SECS)
|
| 518 |
+
if cached is not None:
|
| 519 |
+
return cached
|
| 520 |
+
|
| 521 |
+
url = f"https://api.mfapi.in/mf/{scheme_code}"
|
| 522 |
+
payload = _request_json_with_retries(url)
|
| 523 |
+
if not payload or "data" not in payload:
|
| 524 |
+
return None
|
| 525 |
+
|
| 526 |
+
try:
|
| 527 |
+
nav_df = pd.DataFrame(payload["data"])
|
| 528 |
+
if nav_df.empty or "date" not in nav_df or "nav" not in nav_df:
|
| 529 |
+
return None
|
| 530 |
+
nav_df["date"] = pd.to_datetime(nav_df["date"], dayfirst=True, errors="coerce").dt.tz_localize(None).dt.normalize()
|
| 531 |
+
nav_df["nav"] = pd.to_numeric(nav_df["nav"], errors="coerce")
|
| 532 |
+
nav_df = nav_df.dropna(subset=["date", "nav"]).sort_values("date")
|
| 533 |
+
if nav_df.empty:
|
| 534 |
+
return None
|
| 535 |
+
df = nav_df[["date", "nav"]]
|
| 536 |
+
_cache_set(cache_key, df)
|
| 537 |
+
return df
|
| 538 |
+
except Exception:
|
| 539 |
+
return None
|
| 540 |
+
|
| 541 |
+
|
| 542 |
+
def _fetch_benchmark_history(ticker: str) -> pd.DataFrame | None:
|
| 543 |
+
"""Fetch from disk cache (L2) first, then yfinance."""
|
| 544 |
+
cache_key = f"bench:{ticker}"
|
| 545 |
+
cached = _cache_get(cache_key, _BENCH_TTL_SECS)
|
| 546 |
+
if cached is not None:
|
| 547 |
+
return cached
|
| 548 |
+
|
| 549 |
+
df = _download_benchmark(ticker)
|
| 550 |
+
if df is not None:
|
| 551 |
+
_cache_set(cache_key, df)
|
| 552 |
+
return df
|
| 553 |
+
|
| 554 |
+
|
| 555 |
+
def _download_benchmark(ticker: str) -> pd.DataFrame | None:
|
| 556 |
+
"""
|
| 557 |
+
Raw yfinance download (no caching logic here).
|
| 558 |
+
|
| 559 |
+
Parallel workers hitting yfinance simultaneously can get 401 Invalid Crumb
|
| 560 |
+
errors because yfinance refreshes its session cookie lazily. Fix:
|
| 561 |
+
- Retry up to 4 times with exponential backoff (0.5s, 1s, 2s)
|
| 562 |
+
- Each retry creates a fresh Ticker session, which re-fetches the crumb
|
| 563 |
+
- Suppress noisy 'possibly delisted' stderr from yfinance
|
| 564 |
+
"""
|
| 565 |
+
import contextlib, io as _io
|
| 566 |
+
|
| 567 |
+
def _suppress_yf_stderr(fn, *args, **kwargs):
|
| 568 |
+
"""Run fn suppressing yfinance's noisy stderr warnings."""
|
| 569 |
+
with contextlib.redirect_stderr(_io.StringIO()):
|
| 570 |
+
return fn(*args, **kwargs)
|
| 571 |
+
|
| 572 |
+
for attempt in range(4):
|
| 573 |
+
if attempt > 0:
|
| 574 |
+
time.sleep(0.5 * (2 ** (attempt - 1))) # 0.5s, 1s, 2s
|
| 575 |
+
|
| 576 |
+
try:
|
| 577 |
+
bench = _suppress_yf_stderr(
|
| 578 |
+
yf.download,
|
| 579 |
+
ticker,
|
| 580 |
+
start="2000-01-01",
|
| 581 |
+
progress=False,
|
| 582 |
+
auto_adjust=False,
|
| 583 |
+
threads=False,
|
| 584 |
+
)
|
| 585 |
+
if bench is None or bench.empty:
|
| 586 |
+
continue
|
| 587 |
+
if isinstance(bench.columns, pd.MultiIndex):
|
| 588 |
+
bench.columns = [str(col[0]) for col in bench.columns]
|
| 589 |
+
bench = bench.reset_index()
|
| 590 |
+
price_col = "Adj Close" if "Adj Close" in bench.columns else "Close"
|
| 591 |
+
if price_col not in bench.columns:
|
| 592 |
+
continue
|
| 593 |
+
bench = bench[["Date", price_col]].rename(
|
| 594 |
+
columns={"Date": "date", price_col: "benchmark"}
|
| 595 |
+
)
|
| 596 |
+
bench["date"] = pd.to_datetime(bench["date"], errors="coerce").dt.tz_localize(None).dt.normalize()
|
| 597 |
+
bench["benchmark"] = pd.to_numeric(bench["benchmark"], errors="coerce")
|
| 598 |
+
bench = bench.dropna(subset=["date", "benchmark"]).sort_values("date")
|
| 599 |
+
if len(bench) >= 60:
|
| 600 |
+
return bench
|
| 601 |
+
except Exception:
|
| 602 |
+
continue
|
| 603 |
+
|
| 604 |
+
# Secondary fallback: Ticker().history() uses a separate session/crumb path
|
| 605 |
+
for attempt in range(3):
|
| 606 |
+
if attempt > 0:
|
| 607 |
+
time.sleep(0.5 * attempt)
|
| 608 |
+
try:
|
| 609 |
+
hist = _suppress_yf_stderr(
|
| 610 |
+
yf.Ticker(ticker).history,
|
| 611 |
+
period="10y",
|
| 612 |
+
auto_adjust=False,
|
| 613 |
+
)
|
| 614 |
+
if hist is None or hist.empty:
|
| 615 |
+
continue
|
| 616 |
+
hist = hist.reset_index()
|
| 617 |
+
price_col = "Adj Close" if "Adj Close" in hist.columns else "Close"
|
| 618 |
+
if price_col not in hist.columns:
|
| 619 |
+
continue
|
| 620 |
+
hist = hist[["Date", price_col]].rename(
|
| 621 |
+
columns={"Date": "date", price_col: "benchmark"}
|
| 622 |
+
)
|
| 623 |
+
hist["date"] = pd.to_datetime(hist["date"], errors="coerce").dt.tz_localize(None).dt.normalize()
|
| 624 |
+
hist["benchmark"] = pd.to_numeric(hist["benchmark"], errors="coerce")
|
| 625 |
+
hist = hist.dropna(subset=["date", "benchmark"]).sort_values("date")
|
| 626 |
+
if len(hist) >= 60:
|
| 627 |
+
return hist
|
| 628 |
+
except Exception:
|
| 629 |
+
continue
|
| 630 |
+
|
| 631 |
+
return None
|
| 632 |
+
|
| 633 |
+
|
| 634 |
+
def _trailing_3y_window(df: pd.DataFrame) -> pd.DataFrame:
|
| 635 |
+
if df.empty:
|
| 636 |
+
return df
|
| 637 |
+
max_date = df["date"].max()
|
| 638 |
+
if pd.isna(max_date):
|
| 639 |
+
return pd.DataFrame(columns=df.columns)
|
| 640 |
+
cutoff = max_date - pd.DateOffset(years=TRAILING_YEARS)
|
| 641 |
+
return df[df["date"] >= cutoff].copy()
|
| 642 |
+
|
| 643 |
+
|
| 644 |
+
def _nav_history_is_stale(nav_df: pd.DataFrame) -> bool:
|
| 645 |
+
if nav_df is None or nav_df.empty or "date" not in nav_df.columns:
|
| 646 |
+
return True
|
| 647 |
+
latest = pd.to_datetime(nav_df["date"], errors="coerce").max()
|
| 648 |
+
if pd.isna(latest):
|
| 649 |
+
return True
|
| 650 |
+
latest = pd.Timestamp(latest).tz_localize(None).normalize()
|
| 651 |
+
cutoff = pd.Timestamp.now().tz_localize(None).normalize() - pd.Timedelta(days=NAV_STALE_DAYS)
|
| 652 |
+
return latest < cutoff
|
| 653 |
+
|
| 654 |
+
|
| 655 |
+
def _compute_nav_only_metrics(
|
| 656 |
+
nav_df: pd.DataFrame,
|
| 657 |
+
needed_metrics: list[str],
|
| 658 |
+
benchmark_reason: str,
|
| 659 |
+
) -> tuple[dict[str, float | None], dict[str, str]]:
|
| 660 |
+
needed = [m for m in needed_metrics if m in OUTPUT_METRICS]
|
| 661 |
+
out = {m: None for m in needed}
|
| 662 |
+
skip: dict[str, str] = {}
|
| 663 |
+
if not needed:
|
| 664 |
+
return out, skip
|
| 665 |
+
|
| 666 |
+
for m in needed:
|
| 667 |
+
if m in BENCHMARK_DEPENDENT_METRICS:
|
| 668 |
+
skip[m] = benchmark_reason
|
| 669 |
+
|
| 670 |
+
window = _trailing_3y_window(nav_df[["date", "nav"]].copy())
|
| 671 |
+
if window.empty:
|
| 672 |
+
for m in needed:
|
| 673 |
+
if m in NAV_ONLY_METRICS:
|
| 674 |
+
skip[m] = "less than 3 years of NAV history"
|
| 675 |
+
return out, skip
|
| 676 |
+
|
| 677 |
+
returns = window["nav"].pct_change().dropna()
|
| 678 |
+
if len(returns) < 30:
|
| 679 |
+
for m in needed:
|
| 680 |
+
if m in NAV_ONLY_METRICS:
|
| 681 |
+
skip[m] = f"fewer than 30 NAV return points ({len(returns)})"
|
| 682 |
+
return out, skip
|
| 683 |
+
|
| 684 |
+
mean_daily = returns.mean()
|
| 685 |
+
mean_annual = mean_daily * TRADING_DAYS
|
| 686 |
+
vol = returns.std(ddof=1) * np.sqrt(TRADING_DAYS)
|
| 687 |
+
|
| 688 |
+
if pd.notna(vol):
|
| 689 |
+
if "Standard Deviation" in out:
|
| 690 |
+
out["Standard Deviation"] = float(vol * 100)
|
| 691 |
+
if "Volatility" in out:
|
| 692 |
+
out["Volatility"] = float(vol * 100)
|
| 693 |
+
if "Mean" in out and pd.notna(mean_annual):
|
| 694 |
+
out["Mean"] = float(mean_annual * 100)
|
| 695 |
+
|
| 696 |
+
if "Sharpe Ratio" in out:
|
| 697 |
+
if pd.notna(vol) and vol > 0:
|
| 698 |
+
sharpe = (mean_annual - RF_RATE) / vol
|
| 699 |
+
out["Sharpe Ratio"] = float(sharpe) if pd.notna(sharpe) else None
|
| 700 |
+
if out["Sharpe Ratio"] is None:
|
| 701 |
+
skip["Sharpe Ratio"] = "volatility is zero or NaN (NAV-only fallback)"
|
| 702 |
+
|
| 703 |
+
if "Sortino Ratio" in out:
|
| 704 |
+
downside = returns[returns < 0]
|
| 705 |
+
if not downside.empty:
|
| 706 |
+
downside_std = downside.std(ddof=1) * np.sqrt(TRADING_DAYS)
|
| 707 |
+
if pd.notna(downside_std) and downside_std > 0:
|
| 708 |
+
sortino = (mean_annual - RF_RATE) / downside_std
|
| 709 |
+
out["Sortino Ratio"] = float(sortino) if pd.notna(sortino) else None
|
| 710 |
+
elif out.get("Sharpe Ratio") is not None:
|
| 711 |
+
out["Sortino Ratio"] = float(out["Sharpe Ratio"])
|
| 712 |
+
elif out.get("Sharpe Ratio") is not None:
|
| 713 |
+
out["Sortino Ratio"] = float(out["Sharpe Ratio"])
|
| 714 |
+
if out["Sortino Ratio"] is None:
|
| 715 |
+
skip["Sortino Ratio"] = "no valid downside deviation (NAV-only fallback)"
|
| 716 |
+
|
| 717 |
+
if "Maximum Drawdown" in out:
|
| 718 |
+
cumulative = (1 + returns).cumprod()
|
| 719 |
+
peak = cumulative.cummax()
|
| 720 |
+
drawdown = (cumulative - peak) / peak
|
| 721 |
+
if not drawdown.empty:
|
| 722 |
+
max_drawdown = drawdown.min()
|
| 723 |
+
out["Maximum Drawdown"] = (
|
| 724 |
+
float(max_drawdown * 100) if pd.notna(max_drawdown) else None
|
| 725 |
+
)
|
| 726 |
+
if out["Maximum Drawdown"] is None:
|
| 727 |
+
skip["Maximum Drawdown"] = "unable to compute NAV-only drawdown"
|
| 728 |
+
|
| 729 |
+
return out, skip
|
| 730 |
+
|
| 731 |
+
|
| 732 |
+
def _compute_metrics(
|
| 733 |
+
returns_df: pd.DataFrame,
|
| 734 |
+
) -> tuple[dict[str, float | None], dict[str, str]]:
|
| 735 |
+
skip: dict[str, str] = {}
|
| 736 |
+
|
| 737 |
+
if returns_df.empty:
|
| 738 |
+
for k in OUTPUT_METRICS:
|
| 739 |
+
skip[k] = "empty returns dataframe after merge/window"
|
| 740 |
+
return {k: None for k in OUTPUT_METRICS}, skip
|
| 741 |
+
|
| 742 |
+
fund = returns_df["fund_return"]
|
| 743 |
+
bench = returns_df["benchmark_return"]
|
| 744 |
+
result: dict[str, float | None] = {k: None for k in OUTPUT_METRICS}
|
| 745 |
+
|
| 746 |
+
if len(fund) < 30:
|
| 747 |
+
for k in OUTPUT_METRICS:
|
| 748 |
+
skip[k] = f"fewer than 30 data points ({len(fund)}) after join"
|
| 749 |
+
return result, skip
|
| 750 |
+
|
| 751 |
+
mean_daily = fund.mean()
|
| 752 |
+
bench_mean_daily = bench.mean()
|
| 753 |
+
mean_annual = mean_daily * TRADING_DAYS
|
| 754 |
+
bench_annual = bench_mean_daily * TRADING_DAYS
|
| 755 |
+
|
| 756 |
+
vol = fund.std(ddof=1) * np.sqrt(TRADING_DAYS)
|
| 757 |
+
if pd.notna(vol):
|
| 758 |
+
result["Standard Deviation"] = float(vol * 100)
|
| 759 |
+
result["Volatility"] = float(vol * 100)
|
| 760 |
+
result["Mean"] = float(mean_annual * 100) if pd.notna(mean_annual) else None
|
| 761 |
+
|
| 762 |
+
bench_var = bench.var(ddof=1)
|
| 763 |
+
beta = None
|
| 764 |
+
if pd.notna(bench_var) and bench_var and bench_var > 0:
|
| 765 |
+
cov = np.cov(fund, bench)[0, 1]
|
| 766 |
+
beta = cov / bench_var
|
| 767 |
+
result["Beta"] = float(beta) if beta is not None and pd.notna(beta) else None
|
| 768 |
+
if result["Beta"] is None:
|
| 769 |
+
skip["Beta"] = (
|
| 770 |
+
"benchmark variance is zero or NaN"
|
| 771 |
+
if not (pd.notna(bench_var) and bench_var and bench_var > 0)
|
| 772 |
+
else "beta computation returned NaN"
|
| 773 |
+
)
|
| 774 |
+
|
| 775 |
+
if beta is not None and pd.notna(mean_annual):
|
| 776 |
+
alpha = mean_annual - (RF_RATE + beta * (bench_annual - RF_RATE))
|
| 777 |
+
result["Alpha"] = float(alpha * 100) if pd.notna(alpha) else None
|
| 778 |
+
if result["Alpha"] is None:
|
| 779 |
+
skip["Alpha"] = (
|
| 780 |
+
"Beta is None β Alpha requires Beta"
|
| 781 |
+
if result["Beta"] is None
|
| 782 |
+
else "Alpha computation returned NaN"
|
| 783 |
+
)
|
| 784 |
+
|
| 785 |
+
if vol and vol > 0:
|
| 786 |
+
sharpe = (mean_annual - RF_RATE) / vol
|
| 787 |
+
result["Sharpe Ratio"] = float(sharpe) if pd.notna(sharpe) else None
|
| 788 |
+
if result["Sharpe Ratio"] is None:
|
| 789 |
+
skip["Sharpe Ratio"] = "volatility is zero or NaN"
|
| 790 |
+
|
| 791 |
+
downside = fund[fund < 0]
|
| 792 |
+
if not downside.empty:
|
| 793 |
+
downside_std = downside.std(ddof=1) * np.sqrt(TRADING_DAYS)
|
| 794 |
+
if pd.notna(downside_std) and downside_std > 0:
|
| 795 |
+
sortino = (mean_annual - RF_RATE) / downside_std
|
| 796 |
+
result["Sortino Ratio"] = float(sortino) if pd.notna(sortino) else None
|
| 797 |
+
elif result["Sharpe Ratio"] is not None:
|
| 798 |
+
result["Sortino Ratio"] = float(result["Sharpe Ratio"])
|
| 799 |
+
else:
|
| 800 |
+
skip["Sortino Ratio"] = "downside std dev is zero and Sharpe fallback unavailable"
|
| 801 |
+
elif result["Sharpe Ratio"] is not None:
|
| 802 |
+
result["Sortino Ratio"] = float(result["Sharpe Ratio"])
|
| 803 |
+
else:
|
| 804 |
+
skip["Sortino Ratio"] = (
|
| 805 |
+
"no negative daily returns in 3Y window and Sharpe fallback unavailable"
|
| 806 |
+
)
|
| 807 |
+
|
| 808 |
+
cumulative = (1 + fund).cumprod()
|
| 809 |
+
peak = cumulative.cummax()
|
| 810 |
+
drawdown = (cumulative - peak) / peak
|
| 811 |
+
if not drawdown.empty:
|
| 812 |
+
max_drawdown = drawdown.min()
|
| 813 |
+
result["Maximum Drawdown"] = (
|
| 814 |
+
float(max_drawdown * 100) if pd.notna(max_drawdown) else None
|
| 815 |
+
)
|
| 816 |
+
|
| 817 |
+
corr = fund.corr(bench)
|
| 818 |
+
if pd.notna(corr):
|
| 819 |
+
result["R-Squared"] = float(corr ** 2)
|
| 820 |
+
else:
|
| 821 |
+
skip["R-Squared"] = "fund/benchmark correlation is NaN"
|
| 822 |
+
|
| 823 |
+
active = fund - bench
|
| 824 |
+
tracking_error = active.std(ddof=1) * np.sqrt(TRADING_DAYS)
|
| 825 |
+
if pd.notna(tracking_error) and tracking_error > 0:
|
| 826 |
+
info_ratio = (mean_annual - bench_annual) / tracking_error
|
| 827 |
+
result["Information Ratio"] = (
|
| 828 |
+
float(info_ratio) if pd.notna(info_ratio) else None
|
| 829 |
+
)
|
| 830 |
+
else:
|
| 831 |
+
skip["Information Ratio"] = (
|
| 832 |
+
"tracking error is zero β fund mirrors benchmark"
|
| 833 |
+
if (pd.notna(tracking_error) and tracking_error == 0)
|
| 834 |
+
else "tracking error is NaN"
|
| 835 |
+
)
|
| 836 |
+
|
| 837 |
+
up = returns_df[returns_df["benchmark_return"] > 0]
|
| 838 |
+
if not up.empty:
|
| 839 |
+
up_bench = up["benchmark_return"].mean()
|
| 840 |
+
if pd.notna(up_bench) and up_bench != 0:
|
| 841 |
+
up_capture = (up["fund_return"].mean() / up_bench) * 100
|
| 842 |
+
result["Up Market Capture\nRatio"] = (
|
| 843 |
+
float(up_capture) if pd.notna(up_capture) else None
|
| 844 |
+
)
|
| 845 |
+
else:
|
| 846 |
+
skip["Up Market Capture\nRatio"] = "benchmark mean on up-days is zero or NaN"
|
| 847 |
+
else:
|
| 848 |
+
skip["Up Market Capture\nRatio"] = "no benchmark up-days in 3Y window"
|
| 849 |
+
|
| 850 |
+
down = returns_df[returns_df["benchmark_return"] < 0]
|
| 851 |
+
if not down.empty:
|
| 852 |
+
down_bench = down["benchmark_return"].mean()
|
| 853 |
+
if pd.notna(down_bench) and down_bench != 0:
|
| 854 |
+
down_capture = (down["fund_return"].mean() / down_bench) * 100
|
| 855 |
+
result["Down Market Capture\nRatio"] = (
|
| 856 |
+
float(down_capture) if pd.notna(down_capture) else None
|
| 857 |
+
)
|
| 858 |
+
else:
|
| 859 |
+
skip["Down Market Capture\nRatio"] = "benchmark mean on down-days is zero or NaN"
|
| 860 |
+
else:
|
| 861 |
+
skip["Down Market Capture\nRatio"] = "no benchmark down-days in 3Y window"
|
| 862 |
+
|
| 863 |
+
return result, skip
|
| 864 |
+
|
| 865 |
+
|
| 866 |
+
def compute_nav_metrics_for_scheme(
|
| 867 |
+
*,
|
| 868 |
+
scheme_code: str,
|
| 869 |
+
benchmark_type: str,
|
| 870 |
+
needed_metrics: list[str],
|
| 871 |
+
cache: NavEngineCache,
|
| 872 |
+
) -> tuple[dict[str, float | None], dict[str, str]]:
|
| 873 |
+
"""
|
| 874 |
+
Compute trailing-3Y risk metrics for a scheme.
|
| 875 |
+
|
| 876 |
+
Thread-safe: uses NavEngineCache._lock to serialise L1 dict writes so
|
| 877 |
+
concurrent ThreadPoolExecutor workers don't race on the same key.
|
| 878 |
+
"""
|
| 879 |
+
needed = [m for m in needed_metrics if m in OUTPUT_METRICS]
|
| 880 |
+
if not needed:
|
| 881 |
+
return {}, {}
|
| 882 |
+
|
| 883 |
+
code = str(scheme_code or "").strip()
|
| 884 |
+
if not code:
|
| 885 |
+
reason = "no scheme code β category header or unresolved scheme"
|
| 886 |
+
return {m: None for m in needed}, {m: reason for m in needed}
|
| 887 |
+
|
| 888 |
+
# ββ NAV history (L1 check then L2 fetch) ββββββββββββββββββββββββββββββ
|
| 889 |
+
with cache._lock:
|
| 890 |
+
if code not in cache.nav_history:
|
| 891 |
+
cache.nav_history[code] = None # sentinel prevents duplicate fetches
|
| 892 |
+
nav_df = cache.nav_history.get(code)
|
| 893 |
+
if nav_df is None and cache.nav_history.get(code) is None:
|
| 894 |
+
fetched = _fetch_nav_history(code)
|
| 895 |
+
with cache._lock:
|
| 896 |
+
cache.nav_history[code] = fetched
|
| 897 |
+
nav_df = fetched
|
| 898 |
+
elif nav_df is None:
|
| 899 |
+
nav_df = _fetch_nav_history(code)
|
| 900 |
+
with cache._lock:
|
| 901 |
+
cache.nav_history[code] = nav_df
|
| 902 |
+
|
| 903 |
+
if nav_df is None or nav_df.empty:
|
| 904 |
+
reason = f"MFAPI returned no NAV history for scheme code {code}"
|
| 905 |
+
return {m: None for m in needed}, {m: reason for m in needed}
|
| 906 |
+
if _nav_history_is_stale(nav_df):
|
| 907 |
+
latest = pd.to_datetime(nav_df["date"], errors="coerce").max()
|
| 908 |
+
latest_str = (
|
| 909 |
+
pd.Timestamp(latest).tz_localize(None).normalize().strftime("%Y-%m-%d")
|
| 910 |
+
if pd.notna(latest) else "unknown"
|
| 911 |
+
)
|
| 912 |
+
reason = f"NAV history is stale for scheme code {code} (latest NAV {latest_str})"
|
| 913 |
+
return {m: None for m in needed}, {m: reason for m in needed}
|
| 914 |
+
|
| 915 |
+
# ββ Benchmark history (L1 check then L2 fetch) ββββββββββββββββββββββββ
|
| 916 |
+
ticker = resolve_benchmark_ticker(benchmark_type)
|
| 917 |
+
|
| 918 |
+
def _ensure_benchmark(t: str) -> pd.DataFrame | None:
|
| 919 |
+
with cache._lock:
|
| 920 |
+
if t not in cache.benchmark_history:
|
| 921 |
+
cache.benchmark_history[t] = None
|
| 922 |
+
bench = cache.benchmark_history.get(t)
|
| 923 |
+
if bench is None:
|
| 924 |
+
fetched_b = _fetch_benchmark_history(t)
|
| 925 |
+
with cache._lock:
|
| 926 |
+
cache.benchmark_history[t] = fetched_b
|
| 927 |
+
return fetched_b
|
| 928 |
+
return bench
|
| 929 |
+
|
| 930 |
+
bench_df = _ensure_benchmark(ticker)
|
| 931 |
+
if (bench_df is None or bench_df.empty or len(bench_df) < 60) and ticker != "^NSEI":
|
| 932 |
+
bench_df = _ensure_benchmark("^NSEI")
|
| 933 |
+
if bench_df is None or bench_df.empty:
|
| 934 |
+
reason = f"benchmark history unavailable for ticker={ticker} and NIFTY 50 fallback also failed"
|
| 935 |
+
return _compute_nav_only_metrics(nav_df, needed, reason)
|
| 936 |
+
|
| 937 |
+
# ββ Merge + compute βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 938 |
+
# Strip tz from both sides β yfinance returns UTC-aware, JSON cache is naive
|
| 939 |
+
nav_df = nav_df.copy()
|
| 940 |
+
bench_df = bench_df.copy()
|
| 941 |
+
nav_df["date"] = pd.to_datetime(nav_df["date"]).dt.tz_localize(None).dt.normalize()
|
| 942 |
+
bench_df["date"] = pd.to_datetime(bench_df["date"]).dt.tz_localize(None).dt.normalize()
|
| 943 |
+
|
| 944 |
+
# Debt funds (Liquid, Overnight, Ultra Short etc.) publish NAV every calendar
|
| 945 |
+
# day including weekends/holidays, while equity benchmarks only publish on
|
| 946 |
+
# trading days. A naive inner-join on date yields almost no matching rows
|
| 947 |
+
# (<30) causing all metrics to return None.
|
| 948 |
+
# Fix: forward-fill NAV to the benchmark's trading-day calendar so the merge
|
| 949 |
+
# always produces a full 3Y of matched rows regardless of fund type.
|
| 950 |
+
bench_dates = bench_df[["date"]].drop_duplicates().sort_values("date")
|
| 951 |
+
nav_reindexed = (
|
| 952 |
+
nav_df.set_index("date")
|
| 953 |
+
.reindex(bench_dates["date"])
|
| 954 |
+
.ffill() # carry last known NAV forward
|
| 955 |
+
.dropna()
|
| 956 |
+
.reset_index()
|
| 957 |
+
.rename(columns={"index": "date"})
|
| 958 |
+
)
|
| 959 |
+
merged = pd.merge(nav_reindexed, bench_df, on="date", how="inner")
|
| 960 |
+
if merged.empty:
|
| 961 |
+
reason = f"no overlapping dates between NAV (scheme={code}) and benchmark (ticker={ticker})"
|
| 962 |
+
return _compute_nav_only_metrics(nav_df, needed, reason)
|
| 963 |
+
|
| 964 |
+
|
| 965 |
+
merged = _trailing_3y_window(merged)
|
| 966 |
+
if merged.empty:
|
| 967 |
+
reason = f"less than 3 years of overlapping data for scheme={code}"
|
| 968 |
+
return {m: None for m in needed}, {m: reason for m in needed}
|
| 969 |
+
|
| 970 |
+
merged["fund_return"] = merged["nav"].pct_change()
|
| 971 |
+
merged["benchmark_return"] = merged["benchmark"].pct_change()
|
| 972 |
+
merged = merged.dropna(subset=["fund_return", "benchmark_return"]).copy()
|
| 973 |
+
if merged.empty:
|
| 974 |
+
reason = "all rows dropped after computing benchmark-joined returns"
|
| 975 |
+
return _compute_nav_only_metrics(nav_df, needed, reason)
|
| 976 |
+
|
| 977 |
+
all_metrics, all_skip = _compute_metrics(merged)
|
| 978 |
+
metrics = {m: all_metrics.get(m) for m in needed}
|
| 979 |
+
skip_reasons = {
|
| 980 |
+
m: all_skip[m]
|
| 981 |
+
for m in needed
|
| 982 |
+
if m in all_skip and metrics.get(m) is None
|
| 983 |
+
}
|
| 984 |
+
|
| 985 |
+
# Defensive top-up for NAV-only metrics
|
| 986 |
+
if any(m in NAV_ONLY_METRICS and metrics.get(m) is None for m in needed):
|
| 987 |
+
nav_only, nav_only_skip = _compute_nav_only_metrics(
|
| 988 |
+
nav_df, needed, "benchmark-dependent metric unavailable"
|
| 989 |
+
)
|
| 990 |
+
for m in needed:
|
| 991 |
+
if (
|
| 992 |
+
m in NAV_ONLY_METRICS
|
| 993 |
+
and metrics.get(m) is None
|
| 994 |
+
and nav_only.get(m) is not None
|
| 995 |
+
):
|
| 996 |
+
metrics[m] = nav_only[m]
|
| 997 |
+
skip_reasons.pop(m, None)
|
| 998 |
+
elif (
|
| 999 |
+
metrics.get(m) is None
|
| 1000 |
+
and m not in skip_reasons
|
| 1001 |
+
and m in nav_only_skip
|
| 1002 |
+
):
|
| 1003 |
+
skip_reasons[m] = nav_only_skip[m]
|
| 1004 |
+
|
| 1005 |
+
return metrics, skip_reasons
|
src/pdf_generator.py
ADDED
|
@@ -0,0 +1,560 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
PDF Generator: Produces the investor portfolio review PDF.
|
| 3 |
+
|
| 4 |
+
Layout (matching the sample investor-portfolio-review PDF):
|
| 5 |
+
Page 1:
|
| 6 |
+
- Header (Advisor + Client details)
|
| 7 |
+
- Executive Summary (total value, gain, metrics)
|
| 8 |
+
- Holdings table (all schemes with score)
|
| 9 |
+
- Market Cap Allocation pie
|
| 10 |
+
- Sector Allocation bar
|
| 11 |
+
|
| 12 |
+
Page 2+:
|
| 13 |
+
- Per-scheme detail block (fund metrics vs top quartile vs benchmark)
|
| 14 |
+
- Underperforming flags
|
| 15 |
+
- Switch suggestion (if any)
|
| 16 |
+
- Capital gains estimate (if switch suggested)
|
| 17 |
+
|
| 18 |
+
Final Page:
|
| 19 |
+
- Wealth Projection chart
|
| 20 |
+
- Disclaimer
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
import io
|
| 24 |
+
import os
|
| 25 |
+
from pathlib import Path
|
| 26 |
+
from datetime import datetime
|
| 27 |
+
from typing import Optional, List
|
| 28 |
+
from reportlab.lib.pagesizes import A4
|
| 29 |
+
from reportlab.lib import colors
|
| 30 |
+
from reportlab.lib.units import mm
|
| 31 |
+
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
| 32 |
+
from reportlab.lib.enums import TA_LEFT, TA_CENTER, TA_RIGHT
|
| 33 |
+
from reportlab.platypus import (
|
| 34 |
+
SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle,
|
| 35 |
+
HRFlowable, PageBreak, Image, KeepTogether
|
| 36 |
+
)
|
| 37 |
+
from reportlab.platypus.flowables import Flowable
|
| 38 |
+
from reportlab.graphics.shapes import Drawing, Rect, String
|
| 39 |
+
import matplotlib
|
| 40 |
+
matplotlib.use('Agg')
|
| 41 |
+
|
| 42 |
+
from src.models import PortfolioReport, ClientHolding, Fund
|
| 43 |
+
from src import charts as ch
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
# βββ Theme βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 47 |
+
|
| 48 |
+
DARK_BLUE = colors.HexColor("#1F3864")
|
| 49 |
+
MID_BLUE = colors.HexColor("#2E75B6")
|
| 50 |
+
LIGHT_BLUE = colors.HexColor("#BDD7EE")
|
| 51 |
+
GREEN = colors.HexColor("#2ECC71")
|
| 52 |
+
ORANGE = colors.HexColor("#E67E22")
|
| 53 |
+
RED = colors.HexColor("#E74C3C")
|
| 54 |
+
GREY_BG = colors.HexColor("#F5F5F5")
|
| 55 |
+
LIGHT_GREY = colors.HexColor("#D9D9D9")
|
| 56 |
+
WHITE = colors.white
|
| 57 |
+
BLACK = colors.black
|
| 58 |
+
|
| 59 |
+
W, H = A4
|
| 60 |
+
MARGIN = 15 * mm
|
| 61 |
+
|
| 62 |
+
styles = getSampleStyleSheet()
|
| 63 |
+
|
| 64 |
+
def S(name, **kwargs):
|
| 65 |
+
"""Quick style builder."""
|
| 66 |
+
return ParagraphStyle(name, parent=styles['Normal'], **kwargs)
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
# βββ Style Definitions ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 70 |
+
|
| 71 |
+
STYLE_TITLE = S("Title", fontSize=18, textColor=DARK_BLUE, fontName="Helvetica-Bold",
|
| 72 |
+
spaceAfter=2, alignment=TA_CENTER)
|
| 73 |
+
STYLE_SUBTITLE = S("Subtitle", fontSize=9, textColor=MID_BLUE, fontName="Helvetica",
|
| 74 |
+
spaceAfter=4, alignment=TA_CENTER)
|
| 75 |
+
STYLE_H1 = S("H1", fontSize=11, textColor=DARK_BLUE, fontName="Helvetica-Bold",
|
| 76 |
+
spaceAfter=3, spaceBefore=6)
|
| 77 |
+
STYLE_H2 = S("H2", fontSize=9, textColor=DARK_BLUE, fontName="Helvetica-Bold",
|
| 78 |
+
spaceAfter=2, spaceBefore=4)
|
| 79 |
+
STYLE_BODY = S("Body", fontSize=8, textColor=BLACK,
|
| 80 |
+
spaceAfter=2)
|
| 81 |
+
STYLE_SMALL = S("Small", fontSize=7, textColor=colors.HexColor("#555555"),
|
| 82 |
+
spaceAfter=1)
|
| 83 |
+
STYLE_WARN = S("Warn", fontSize=8, textColor=colors.HexColor("#C0392B"),
|
| 84 |
+
fontName="Helvetica-Bold")
|
| 85 |
+
STYLE_OK = S("OK", fontSize=8, textColor=colors.HexColor("#27AE60"),
|
| 86 |
+
fontName="Helvetica-Bold")
|
| 87 |
+
STYLE_DISCLAIMER= S("Disc", fontSize=6, textColor=colors.HexColor("#666666"),
|
| 88 |
+
spaceAfter=2, leading=8)
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def _fmt_inr(value: float) -> str:
|
| 92 |
+
"""Format as Indian currency string."""
|
| 93 |
+
if value is None:
|
| 94 |
+
return "N/A"
|
| 95 |
+
if abs(value) >= 1e7:
|
| 96 |
+
return f"βΉ{value/1e7:.2f} Cr"
|
| 97 |
+
if abs(value) >= 1e5:
|
| 98 |
+
return f"βΉ{value/1e5:.2f} L"
|
| 99 |
+
return f"βΉ{value:,.0f}"
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
def _fmt_pct(value: Optional[float], decimals: int = 2) -> str:
|
| 103 |
+
if value is None:
|
| 104 |
+
return "N/A"
|
| 105 |
+
return f"{value:.{decimals}f}%"
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def _fmt_float(value: Optional[float], decimals: int = 2) -> str:
|
| 109 |
+
if value is None:
|
| 110 |
+
return "N/A"
|
| 111 |
+
return f"{value:.{decimals}f}"
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
def _img_from_buf(buf: io.BytesIO, width_mm: float, height_mm: float) -> Image:
|
| 115 |
+
"""Create a ReportLab Image from a BytesIO buffer."""
|
| 116 |
+
img = Image(buf)
|
| 117 |
+
img.drawWidth = width_mm * mm
|
| 118 |
+
img.drawHeight = height_mm * mm
|
| 119 |
+
return img
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
def _table_style(header_color=DARK_BLUE, row_alt=GREY_BG):
|
| 123 |
+
return TableStyle([
|
| 124 |
+
('BACKGROUND', (0, 0), (-1, 0), header_color),
|
| 125 |
+
('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
|
| 126 |
+
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
|
| 127 |
+
('FONTSIZE', (0, 0), (-1, 0), 7),
|
| 128 |
+
('ALIGN', (0, 0), (-1, 0), 'CENTER'),
|
| 129 |
+
('ROWBACKGROUNDS', (0, 1), (-1, -1), [WHITE, row_alt]),
|
| 130 |
+
('FONTNAME', (0, 1), (-1, -1), 'Helvetica'),
|
| 131 |
+
('FONTSIZE', (0, 1), (-1, -1), 7),
|
| 132 |
+
('ALIGN', (1, 1), (-1, -1), 'RIGHT'),
|
| 133 |
+
('ALIGN', (0, 1), (0, -1), 'LEFT'),
|
| 134 |
+
('GRID', (0, 0), (-1, -1), 0.3, LIGHT_GREY),
|
| 135 |
+
('TOPPADDING', (0, 0), (-1, -1), 3),
|
| 136 |
+
('BOTTOMPADDING', (0, 0), (-1, -1), 3),
|
| 137 |
+
('LEFTPADDING', (0, 0), (-1, -1), 4),
|
| 138 |
+
('RIGHTPADDING', (0, 0), (-1, -1), 4),
|
| 139 |
+
])
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
# βββ Section Builders ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 143 |
+
|
| 144 |
+
def _build_header(report: PortfolioReport) -> List:
|
| 145 |
+
"""Build the header section: advisor brand + client info."""
|
| 146 |
+
adv = report.advisor
|
| 147 |
+
cli = report.client
|
| 148 |
+
today = datetime.now().strftime("%d %B %Y")
|
| 149 |
+
|
| 150 |
+
elements = []
|
| 151 |
+
|
| 152 |
+
# Top bar (advisor on left, date on right)
|
| 153 |
+
header_data = [[
|
| 154 |
+
Paragraph(f"<b>{adv.name}</b><br/>"
|
| 155 |
+
f"<font size='8' color='#2E75B6'>{adv.location} | {adv.phone} | {adv.email}</font><br/>"
|
| 156 |
+
f"<font size='7' color='#888888'>{adv.arn} | AMFI Registered Mutual Fund Distributor</font>",
|
| 157 |
+
S("adv", fontName='Helvetica-Bold', fontSize=10, textColor=DARK_BLUE)),
|
| 158 |
+
Paragraph(f"<para align='right'><font size='8' color='#888888'>"
|
| 159 |
+
f"Date: {today}</font></para>",
|
| 160 |
+
STYLE_SMALL),
|
| 161 |
+
]]
|
| 162 |
+
header_table = Table(header_data, colWidths=[120*mm, 60*mm])
|
| 163 |
+
header_table.setStyle(TableStyle([
|
| 164 |
+
('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
|
| 165 |
+
('ALIGN', (1, 0), (1, 0), 'RIGHT'),
|
| 166 |
+
]))
|
| 167 |
+
elements.append(header_table)
|
| 168 |
+
elements.append(HRFlowable(width="100%", thickness=2, color=MID_BLUE, spaceAfter=4))
|
| 169 |
+
|
| 170 |
+
# Report title
|
| 171 |
+
elements.append(Paragraph("Investor Portfolio Review", STYLE_TITLE))
|
| 172 |
+
elements.append(Paragraph("Confidential | Prepared exclusively for the client", STYLE_SUBTITLE))
|
| 173 |
+
elements.append(HRFlowable(width="100%", thickness=0.5, color=LIGHT_GREY, spaceAfter=6))
|
| 174 |
+
|
| 175 |
+
# Client info box
|
| 176 |
+
client_info = [
|
| 177 |
+
["Client Name", cli.name, "Age", str(cli.age or "N/A")],
|
| 178 |
+
["Mobile", cli.mobile or "N/A", "Email", cli.email or "N/A"],
|
| 179 |
+
["PAN", cli.pan or "N/A", "", ""],
|
| 180 |
+
]
|
| 181 |
+
ct = Table(client_info, colWidths=[30*mm, 55*mm, 25*mm, 70*mm])
|
| 182 |
+
ct.setStyle(TableStyle([
|
| 183 |
+
('FONTNAME', (0, 0), (0, -1), 'Helvetica-Bold'),
|
| 184 |
+
('FONTNAME', (2, 0), (2, -1), 'Helvetica-Bold'),
|
| 185 |
+
('FONTSIZE', (0, 0), (-1, -1), 8),
|
| 186 |
+
('TEXTCOLOR', (0, 0), (0, -1), DARK_BLUE),
|
| 187 |
+
('TEXTCOLOR', (2, 0), (2, -1), DARK_BLUE),
|
| 188 |
+
('ROWBACKGROUNDS', (0, 0), (-1, -1), [GREY_BG, WHITE]),
|
| 189 |
+
('TOPPADDING', (0, 0), (-1, -1), 3),
|
| 190 |
+
('BOTTOMPADDING', (0, 0), (-1, -1), 3),
|
| 191 |
+
('LEFTPADDING', (0, 0), (-1, -1), 4),
|
| 192 |
+
('GRID', (0, 0), (-1, -1), 0.3, LIGHT_GREY),
|
| 193 |
+
]))
|
| 194 |
+
elements.append(ct)
|
| 195 |
+
elements.append(Spacer(1, 4*mm))
|
| 196 |
+
return elements
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
def _build_summary(report: PortfolioReport) -> List:
|
| 200 |
+
"""Portfolio snapshot summary cards."""
|
| 201 |
+
elements = [Paragraph("π Portfolio Snapshot", STYLE_H1)]
|
| 202 |
+
|
| 203 |
+
gain = report.unrealized_gain
|
| 204 |
+
gain_color = "#27AE60" if gain >= 0 else "#E74C3C"
|
| 205 |
+
gain_sign = "+" if gain >= 0 else ""
|
| 206 |
+
|
| 207 |
+
summary_data = [
|
| 208 |
+
["Current Value", "Total Invested", "Unrealised Gain", "Sharpe Ratio", "Alpha", "Beta"],
|
| 209 |
+
[
|
| 210 |
+
_fmt_inr(report.total_current_value),
|
| 211 |
+
_fmt_inr(report.total_invested),
|
| 212 |
+
f"<font color='{gain_color}'>{gain_sign}{_fmt_inr(gain)}</font>",
|
| 213 |
+
_fmt_float(report.sharpe),
|
| 214 |
+
_fmt_pct(report.alpha),
|
| 215 |
+
_fmt_float(report.beta),
|
| 216 |
+
],
|
| 217 |
+
]
|
| 218 |
+
|
| 219 |
+
def para_cells(row):
|
| 220 |
+
return [Paragraph(str(c), S("sc", fontSize=8, fontName='Helvetica-Bold' if i < 1 else 'Helvetica',
|
| 221 |
+
alignment=TA_CENTER, textColor=DARK_BLUE))
|
| 222 |
+
for i, c in enumerate(row)]
|
| 223 |
+
|
| 224 |
+
tbl = Table(
|
| 225 |
+
[para_cells(summary_data[0]), para_cells(summary_data[1])],
|
| 226 |
+
colWidths=[30*mm] * 6
|
| 227 |
+
)
|
| 228 |
+
tbl.setStyle(TableStyle([
|
| 229 |
+
('BACKGROUND', (0, 0), (-1, 0), DARK_BLUE),
|
| 230 |
+
('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
|
| 231 |
+
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
|
| 232 |
+
('FONTSIZE', (0, 0), (-1, -1), 8),
|
| 233 |
+
('ALIGN', (0, 0), (-1, -1), 'CENTER'),
|
| 234 |
+
('ROWBACKGROUNDS', (0, 1), (-1, -1), [LIGHT_BLUE]),
|
| 235 |
+
('GRID', (0, 0), (-1, -1), 0.3, LIGHT_GREY),
|
| 236 |
+
('TOPPADDING', (0, 0), (-1, -1), 5),
|
| 237 |
+
('BOTTOMPADDING', (0, 0), (-1, -1), 5),
|
| 238 |
+
]))
|
| 239 |
+
elements.append(tbl)
|
| 240 |
+
elements.append(Spacer(1, 4*mm))
|
| 241 |
+
return elements
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
def _build_holdings_table(report: PortfolioReport) -> List:
|
| 245 |
+
"""Main holdings table."""
|
| 246 |
+
elements = [Paragraph("π Existing Portfolio Holdings", STYLE_H1)]
|
| 247 |
+
|
| 248 |
+
rows = [["#", "Scheme Name", "Current Value", "Allocation", "Score", "Status"]]
|
| 249 |
+
for i, h in enumerate(report.holdings, 1):
|
| 250 |
+
score = _fmt_float(h.fund.score) if h.fund and h.fund.score else "N/A"
|
| 251 |
+
status = "β οΈ Underperforms" if h.is_underperforming else "β
On Track"
|
| 252 |
+
rows.append([
|
| 253 |
+
str(i),
|
| 254 |
+
h.scheme_name[:45],
|
| 255 |
+
_fmt_inr(h.current_value),
|
| 256 |
+
_fmt_pct(h.allocation_pct),
|
| 257 |
+
score,
|
| 258 |
+
status,
|
| 259 |
+
])
|
| 260 |
+
rows.append(["", "<b>TOTAL</b>", _fmt_inr(report.total_current_value), "100%", "", ""])
|
| 261 |
+
|
| 262 |
+
tbl = Table(rows, colWidths=[8*mm, 80*mm, 28*mm, 18*mm, 14*mm, 32*mm])
|
| 263 |
+
style = _table_style()
|
| 264 |
+
|
| 265 |
+
# Red for underperformers, green for on-track (in status column)
|
| 266 |
+
for i, h in enumerate(report.holdings, 1):
|
| 267 |
+
if h.is_underperforming:
|
| 268 |
+
style.add('TEXTCOLOR', (5, i), (5, i), RED)
|
| 269 |
+
else:
|
| 270 |
+
style.add('TEXTCOLOR', (5, i), (5, i), GREEN)
|
| 271 |
+
|
| 272 |
+
# Bold total row
|
| 273 |
+
style.add('FONTNAME', (0, len(rows)-1), (-1, len(rows)-1), 'Helvetica-Bold')
|
| 274 |
+
style.add('BACKGROUND', (0, len(rows)-1), (-1, len(rows)-1), LIGHT_BLUE)
|
| 275 |
+
|
| 276 |
+
tbl.setStyle(style)
|
| 277 |
+
elements.append(tbl)
|
| 278 |
+
elements.append(Spacer(1, 3*mm))
|
| 279 |
+
|
| 280 |
+
# Exposure warnings
|
| 281 |
+
if report.exposure_warnings:
|
| 282 |
+
elements.append(Paragraph("β οΈ Exposure Alerts", STYLE_H2))
|
| 283 |
+
for warn in report.exposure_warnings:
|
| 284 |
+
elements.append(Paragraph(warn, STYLE_WARN))
|
| 285 |
+
elements.append(Spacer(1, 2*mm))
|
| 286 |
+
|
| 287 |
+
return elements
|
| 288 |
+
|
| 289 |
+
|
| 290 |
+
def _build_allocation_charts(report: PortfolioReport) -> List:
|
| 291 |
+
"""Market cap + sector charts side by side."""
|
| 292 |
+
elements = [Paragraph("π Portfolio Allocation Analysis", STYLE_H1)]
|
| 293 |
+
|
| 294 |
+
# Holdings pie
|
| 295 |
+
holdings_data = {h.scheme_name: h.current_value for h in report.holdings}
|
| 296 |
+
pie_buf = ch.holdings_pie_chart(holdings_data, "Fund-wise Allocation")
|
| 297 |
+
|
| 298 |
+
# Market cap pie (use dummy data if not available)
|
| 299 |
+
mc_data = report.market_cap_allocation or {
|
| 300 |
+
"Large Cap": 10, "Mid Cap": 45, "Small Cap": 40, "Others": 5
|
| 301 |
+
}
|
| 302 |
+
mc_buf = ch.market_cap_pie(mc_data)
|
| 303 |
+
|
| 304 |
+
chart_table = Table(
|
| 305 |
+
[[_img_from_buf(pie_buf, 85, 70), _img_from_buf(mc_buf, 80, 70)]],
|
| 306 |
+
colWidths=[90*mm, 90*mm]
|
| 307 |
+
)
|
| 308 |
+
chart_table.setStyle(TableStyle([('VALIGN', (0, 0), (-1, -1), 'TOP')]))
|
| 309 |
+
elements.append(chart_table)
|
| 310 |
+
elements.append(Spacer(1, 3*mm))
|
| 311 |
+
|
| 312 |
+
# Sector chart
|
| 313 |
+
if report.sector_allocation:
|
| 314 |
+
sec_buf = ch.sector_bar_chart(report.sector_allocation)
|
| 315 |
+
elements.append(Paragraph("π Sector Allocation", STYLE_H2))
|
| 316 |
+
elements.append(_img_from_buf(sec_buf, 170, 65))
|
| 317 |
+
elements.append(Spacer(1, 3*mm))
|
| 318 |
+
|
| 319 |
+
return elements
|
| 320 |
+
|
| 321 |
+
|
| 322 |
+
def _build_scheme_details(report: PortfolioReport) -> List:
|
| 323 |
+
"""Per-scheme detailed analysis blocks."""
|
| 324 |
+
elements = [PageBreak(), Paragraph("π Individual Scheme Analysis", STYLE_H1)]
|
| 325 |
+
|
| 326 |
+
for h in report.holdings:
|
| 327 |
+
fund = h.fund
|
| 328 |
+
if not fund:
|
| 329 |
+
continue
|
| 330 |
+
|
| 331 |
+
# Scheme header
|
| 332 |
+
elements.append(Spacer(1, 3*mm))
|
| 333 |
+
status_color = "#E74C3C" if h.is_underperforming else "#27AE60"
|
| 334 |
+
status_text = "Underperforming vs Benchmark" if h.is_underperforming else "Performing Well"
|
| 335 |
+
|
| 336 |
+
elements.append(Paragraph(
|
| 337 |
+
f"<b>{h.scheme_name}</b> "
|
| 338 |
+
f"<font color='{status_color}' size='8'>[{status_text}]</font>",
|
| 339 |
+
STYLE_H2
|
| 340 |
+
))
|
| 341 |
+
|
| 342 |
+
# Metrics comparison table
|
| 343 |
+
periods = ["1 Year", "3 Year", "5 Year", "10 Year"]
|
| 344 |
+
cagr_vals = [
|
| 345 |
+
[fund.cagr_1y, fund.cagr_1y_cat, fund.cagr_1y_bm],
|
| 346 |
+
[fund.cagr_3y, fund.cagr_3y_cat, fund.cagr_3y_bm],
|
| 347 |
+
[fund.cagr_5y, fund.cagr_5y_cat, fund.cagr_5y_bm],
|
| 348 |
+
[fund.cagr_10y, fund.cagr_10y_cat, fund.cagr_10y_bm],
|
| 349 |
+
]
|
| 350 |
+
|
| 351 |
+
cagr_header = ["Period", "Fund CAGR", "Category Avg", "Benchmark"]
|
| 352 |
+
cagr_rows = [cagr_header]
|
| 353 |
+
for period, (f_cagr, cat_cagr, bm_cagr) in zip(periods, cagr_vals):
|
| 354 |
+
cagr_rows.append([
|
| 355 |
+
period,
|
| 356 |
+
_fmt_pct(f_cagr),
|
| 357 |
+
_fmt_pct(cat_cagr),
|
| 358 |
+
_fmt_pct(bm_cagr),
|
| 359 |
+
])
|
| 360 |
+
|
| 361 |
+
cagr_tbl = Table(cagr_rows, colWidths=[30*mm, 30*mm, 30*mm, 30*mm])
|
| 362 |
+
cagr_style = _table_style(header_color=MID_BLUE)
|
| 363 |
+
# Colour fund CAGR red if below benchmark
|
| 364 |
+
for row_i, (_, (f_cagr, _, bm_cagr)) in enumerate(zip(periods, cagr_vals), 1):
|
| 365 |
+
if f_cagr is not None and bm_cagr is not None:
|
| 366 |
+
color = RED if f_cagr < bm_cagr else GREEN
|
| 367 |
+
cagr_style.add('TEXTCOLOR', (1, row_i), (1, row_i), color)
|
| 368 |
+
cagr_tbl.setStyle(cagr_style)
|
| 369 |
+
|
| 370 |
+
# Risk metrics row
|
| 371 |
+
risk_header = ["Alpha", "Beta", "Sharpe", "Std Dev", "Sortino", "Max DD", "Score"]
|
| 372 |
+
risk_vals = [
|
| 373 |
+
_fmt_pct(fund.alpha), _fmt_float(fund.beta),
|
| 374 |
+
_fmt_float(fund.sharpe), _fmt_pct(fund.std_dev),
|
| 375 |
+
_fmt_float(fund.sortino), _fmt_pct(fund.max_drawdown),
|
| 376 |
+
_fmt_float(fund.score),
|
| 377 |
+
]
|
| 378 |
+
risk_tbl = Table(
|
| 379 |
+
[risk_header, risk_vals],
|
| 380 |
+
colWidths=[25*mm, 20*mm, 20*mm, 20*mm, 20*mm, 20*mm, 15*mm]
|
| 381 |
+
)
|
| 382 |
+
risk_tbl.setStyle(_table_style(header_color=colors.HexColor("#34495E")))
|
| 383 |
+
|
| 384 |
+
# Charts: bar chart for this scheme
|
| 385 |
+
cagr_chart_data = {
|
| 386 |
+
"1Y": {"fund": fund.cagr_1y, "benchmark": fund.cagr_1y_bm, "category": fund.cagr_1y_cat},
|
| 387 |
+
"3Y": {"fund": fund.cagr_3y, "benchmark": fund.cagr_3y_bm, "category": fund.cagr_3y_cat},
|
| 388 |
+
"5Y": {"fund": fund.cagr_5y, "benchmark": fund.cagr_5y_bm, "category": fund.cagr_5y_cat},
|
| 389 |
+
"10Y": {"fund": fund.cagr_10y, "benchmark": fund.cagr_10y_bm, "category": fund.cagr_10y_cat},
|
| 390 |
+
}
|
| 391 |
+
chart_buf = ch.holding_vs_benchmark_chart(fund.name, cagr_chart_data)
|
| 392 |
+
|
| 393 |
+
row_layout = Table(
|
| 394 |
+
[[cagr_tbl, _img_from_buf(chart_buf, 80, 55)]],
|
| 395 |
+
colWidths=[100*mm, 80*mm]
|
| 396 |
+
)
|
| 397 |
+
row_layout.setStyle(TableStyle([('VALIGN', (0, 0), (-1, -1), 'TOP')]))
|
| 398 |
+
|
| 399 |
+
block = KeepTogether([
|
| 400 |
+
row_layout,
|
| 401 |
+
Spacer(1, 2*mm),
|
| 402 |
+
risk_tbl,
|
| 403 |
+
])
|
| 404 |
+
|
| 405 |
+
# Switch suggestion section
|
| 406 |
+
if h.suggested_fund:
|
| 407 |
+
sf = h.suggested_fund
|
| 408 |
+
elements.append(block)
|
| 409 |
+
elements.append(Paragraph(
|
| 410 |
+
f"π‘ <b>Suggested Switch:</b> {h.scheme_name} β <b>{sf.name}</b>",
|
| 411 |
+
STYLE_H2
|
| 412 |
+
))
|
| 413 |
+
comp_data = [
|
| 414 |
+
["Metric", "Current Fund", "Suggested Fund"],
|
| 415 |
+
["3Y CAGR", _fmt_pct(fund.cagr_3y), _fmt_pct(sf.cagr_3y)],
|
| 416 |
+
["5Y CAGR", _fmt_pct(fund.cagr_5y), _fmt_pct(sf.cagr_5y)],
|
| 417 |
+
["Alpha", _fmt_pct(fund.alpha), _fmt_pct(sf.alpha)],
|
| 418 |
+
["Sharpe", _fmt_float(fund.sharpe), _fmt_float(sf.sharpe)],
|
| 419 |
+
["TER", _fmt_pct(fund.ter), _fmt_pct(sf.ter)],
|
| 420 |
+
["Score", _fmt_float(fund.score), _fmt_float(sf.score)],
|
| 421 |
+
]
|
| 422 |
+
comp_tbl = Table(comp_data, colWidths=[40*mm, 60*mm, 60*mm])
|
| 423 |
+
comp_style = _table_style(header_color=colors.HexColor("#8E44AD"))
|
| 424 |
+
comp_tbl.setStyle(comp_style)
|
| 425 |
+
elements.append(comp_tbl)
|
| 426 |
+
else:
|
| 427 |
+
elements.append(block)
|
| 428 |
+
|
| 429 |
+
elements.append(HRFlowable(width="100%", thickness=0.5, color=LIGHT_GREY, spaceAfter=2))
|
| 430 |
+
|
| 431 |
+
return elements
|
| 432 |
+
|
| 433 |
+
|
| 434 |
+
def _build_wealth_projection(report: PortfolioReport) -> List:
|
| 435 |
+
"""Wealth projection table and chart."""
|
| 436 |
+
elements = [PageBreak(), Paragraph("π° Wealth Projection @ 12% p.a.", STYLE_H1)]
|
| 437 |
+
|
| 438 |
+
proj = report.wealth_projection
|
| 439 |
+
if not proj:
|
| 440 |
+
return elements
|
| 441 |
+
|
| 442 |
+
proj_data = [["Time Horizon", "Projected Value", "Approx. Growth"]]
|
| 443 |
+
current = report.total_current_value
|
| 444 |
+
for yr, val in sorted(proj.items()):
|
| 445 |
+
growth = ((val - current) / current * 100) if current else 0
|
| 446 |
+
proj_data.append([f"{yr} Years", _fmt_inr(val), f"+{growth:.1f}%"])
|
| 447 |
+
|
| 448 |
+
proj_tbl = Table(proj_data, colWidths=[40*mm, 60*mm, 40*mm])
|
| 449 |
+
proj_tbl.setStyle(_table_style())
|
| 450 |
+
elements.append(proj_tbl)
|
| 451 |
+
elements.append(Spacer(1, 4*mm))
|
| 452 |
+
|
| 453 |
+
# Chart
|
| 454 |
+
wc_buf = ch.wealth_projection_chart(proj, current)
|
| 455 |
+
elements.append(_img_from_buf(wc_buf, 160, 70))
|
| 456 |
+
elements.append(Spacer(1, 4*mm))
|
| 457 |
+
return elements
|
| 458 |
+
|
| 459 |
+
|
| 460 |
+
DISCLAIMER_TEXT = (
|
| 461 |
+
"Disclaimer: We have gathered all the data, information, and statistics from sources believed to be "
|
| 462 |
+
"highly reliable and true. All necessary precautions have been taken to avoid any error, lapse or "
|
| 463 |
+
"insufficiency; however, no representations or warranties are made (express or implied) as to the "
|
| 464 |
+
"reliability, accuracy or completeness of such information. We cannot be held liable for any loss "
|
| 465 |
+
"arising directly or indirectly from the use of, or any action taken on, any information appearing herein. "
|
| 466 |
+
"The user is advised to verify the contents of the report independently. It is not an Investment recommendation "
|
| 467 |
+
"or personal financial, Investment or professional advice and should not be treated as such. The Risk Level of "
|
| 468 |
+
"any of the schemes must always be commensurate with the risk profile, Investment objective or financial goals "
|
| 469 |
+
"of the investor concerned. Returns less than 1 year are in absolute (%) and greater than 1 year are compounded "
|
| 470 |
+
"annualised (CAGR %). SIP returns are shown in XIRR (%). Mutual Fund Investments are subject to market risks, "
|
| 471 |
+
"read all scheme related documents carefully. Past performance may or may not be sustained in the future."
|
| 472 |
+
)
|
| 473 |
+
|
| 474 |
+
|
| 475 |
+
# βββ Main Generator ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 476 |
+
|
| 477 |
+
def _build_quartile_section(report: PortfolioReport) -> List:
|
| 478 |
+
"""
|
| 479 |
+
Quartile Analysis Grid β based on senior's handwritten sketch.
|
| 480 |
+
Shows BM / Category / Scheme rows Γ 1Y/3Y/5Y/10Y columns per holding.
|
| 481 |
+
Scheme row is color-coded: Q1(green)/Q2(blue)/Q3(yellow)/Q4(red).
|
| 482 |
+
"""
|
| 483 |
+
elements = [Paragraph("π Quartile Analysis β Scheme vs Benchmark & Category", STYLE_H1)]
|
| 484 |
+
elements.append(Paragraph(
|
| 485 |
+
"Each scheme is compared against its Benchmark Index and Category Average "
|
| 486 |
+
"across 1Y/3Y/5Y/10Y periods. The Scheme row shows CAGR and is color-coded "
|
| 487 |
+
"by quartile rank (Q1=Top, Q4=Bottom). β = Fund beats Benchmark that period.",
|
| 488 |
+
STYLE_SMALL
|
| 489 |
+
))
|
| 490 |
+
elements.append(Spacer(1, 2*mm))
|
| 491 |
+
|
| 492 |
+
grid_data = []
|
| 493 |
+
for h in report.holdings:
|
| 494 |
+
f = h.fund
|
| 495 |
+
if not f:
|
| 496 |
+
continue
|
| 497 |
+
rank = f.rank_in_category or 1
|
| 498 |
+
total = rank * 4 # approximate β will be corrected when fund_universe passed
|
| 499 |
+
grid_data.append({
|
| 500 |
+
"scheme_name": h.scheme_name,
|
| 501 |
+
"rank_in_category": rank,
|
| 502 |
+
"total_in_category": total,
|
| 503 |
+
"cagr_1y": f.cagr_1y, "cagr_1y_bm": f.cagr_1y_bm, "cagr_1y_cat": f.cagr_1y_cat,
|
| 504 |
+
"cagr_3y": f.cagr_3y, "cagr_3y_bm": f.cagr_3y_bm, "cagr_3y_cat": f.cagr_3y_cat,
|
| 505 |
+
"cagr_5y": f.cagr_5y, "cagr_5y_bm": f.cagr_5y_bm, "cagr_5y_cat": f.cagr_5y_cat,
|
| 506 |
+
"cagr_10y": f.cagr_10y,"cagr_10y_bm": f.cagr_10y_bm,"cagr_10y_cat": f.cagr_10y_cat,
|
| 507 |
+
})
|
| 508 |
+
|
| 509 |
+
if grid_data:
|
| 510 |
+
grid_buf = ch.quartile_analysis_grid(grid_data)
|
| 511 |
+
n = len(grid_data)
|
| 512 |
+
chart_h = max(75, n * 28)
|
| 513 |
+
elements.append(_img_from_buf(grid_buf, 175, min(chart_h, 210)))
|
| 514 |
+
else:
|
| 515 |
+
elements.append(Paragraph("No matched fund data available.", STYLE_BODY))
|
| 516 |
+
|
| 517 |
+
elements.append(Spacer(1, 3*mm))
|
| 518 |
+
return elements
|
| 519 |
+
|
| 520 |
+
|
| 521 |
+
def generate_pdf(report: PortfolioReport, output_path: str) -> str:
|
| 522 |
+
"""
|
| 523 |
+
Generate the complete PDF report.
|
| 524 |
+
Returns: path to the generated PDF.
|
| 525 |
+
"""
|
| 526 |
+
output_path = Path(output_path)
|
| 527 |
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
| 528 |
+
|
| 529 |
+
doc = SimpleDocTemplate(
|
| 530 |
+
str(output_path),
|
| 531 |
+
pagesize=A4,
|
| 532 |
+
leftMargin=MARGIN,
|
| 533 |
+
rightMargin=MARGIN,
|
| 534 |
+
topMargin=MARGIN,
|
| 535 |
+
bottomMargin=MARGIN,
|
| 536 |
+
)
|
| 537 |
+
|
| 538 |
+
story = []
|
| 539 |
+
|
| 540 |
+
# ββ Page 1 ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 541 |
+
story += _build_header(report)
|
| 542 |
+
story += _build_summary(report)
|
| 543 |
+
story += _build_holdings_table(report)
|
| 544 |
+
story += _build_quartile_section(report)
|
| 545 |
+
story += _build_allocation_charts(report)
|
| 546 |
+
|
| 547 |
+
# ββ Per-scheme details βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 548 |
+
story += _build_scheme_details(report)
|
| 549 |
+
|
| 550 |
+
# ββ Wealth projection ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 551 |
+
story += _build_wealth_projection(report)
|
| 552 |
+
|
| 553 |
+
# ββ Disclaimer βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 554 |
+
story.append(HRFlowable(width="100%", thickness=0.5, color=LIGHT_GREY))
|
| 555 |
+
story.append(Spacer(1, 3*mm))
|
| 556 |
+
story.append(Paragraph("Disclaimer", STYLE_H2))
|
| 557 |
+
story.append(Paragraph(DISCLAIMER_TEXT, STYLE_DISCLAIMER))
|
| 558 |
+
|
| 559 |
+
doc.build(story)
|
| 560 |
+
return str(output_path)
|
src/portfolio_engine.py
ADDED
|
@@ -0,0 +1,299 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Portfolio Engine: -p mode
|
| 3 |
+
|
| 4 |
+
Loads a client CSV, matches holdings to the fund universe,
|
| 5 |
+
computes portfolio metrics, exposure checks, and wealth projection.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import csv
|
| 9 |
+
import numpy as np
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
from typing import List, Optional, Dict
|
| 12 |
+
from src.models import Fund, Client, ClientHolding, Advisor, PortfolioReport
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
# βββ Client CSV Loader βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 16 |
+
|
| 17 |
+
def load_client_csv(csv_path: str) -> tuple[Client, List[ClientHolding]]:
|
| 18 |
+
"""
|
| 19 |
+
Load client data from CSV.
|
| 20 |
+
|
| 21 |
+
Expected CSV format:
|
| 22 |
+
Line 1: Name, Age, Email, Mobile[, PAN]
|
| 23 |
+
Line 2+: Scheme Name, Current Value, Invested Amount, SIP Amount, SIP Frequency
|
| 24 |
+
|
| 25 |
+
Example:
|
| 26 |
+
Parthiban,45,parthiban@gmail.com,9876543210,ABCDE1234F
|
| 27 |
+
Nippon India Small Cap Fund,280923,200000,5000,Monthly
|
| 28 |
+
HDFC Mid Cap Fund,134562,120000,3000,Monthly
|
| 29 |
+
"""
|
| 30 |
+
csv_path = Path(csv_path)
|
| 31 |
+
if not csv_path.exists():
|
| 32 |
+
raise FileNotFoundError(f"Client CSV not found: {csv_path}")
|
| 33 |
+
|
| 34 |
+
with open(csv_path, encoding='utf-8-sig', errors='replace') as f:
|
| 35 |
+
reader = csv.reader(f)
|
| 36 |
+
rows = [r for r in reader if any(c.strip() for c in r)]
|
| 37 |
+
|
| 38 |
+
if not rows:
|
| 39 |
+
raise ValueError("Client CSV is empty")
|
| 40 |
+
|
| 41 |
+
# Parse client info from first row
|
| 42 |
+
info = rows[0]
|
| 43 |
+
client = Client(
|
| 44 |
+
name=info[0].strip() if len(info) > 0 else "Unknown",
|
| 45 |
+
age=int(info[1]) if len(info) > 1 and info[1].strip().isdigit() else None,
|
| 46 |
+
email=info[2].strip() if len(info) > 2 else None,
|
| 47 |
+
mobile=info[3].strip() if len(info) > 3 else None,
|
| 48 |
+
pan=info[4].strip() if len(info) > 4 else None,
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
# Parse holdings from remaining rows
|
| 52 |
+
holdings: List[ClientHolding] = []
|
| 53 |
+
for row in rows[1:]:
|
| 54 |
+
if not row or not row[0].strip():
|
| 55 |
+
continue
|
| 56 |
+
# Skip header-like rows
|
| 57 |
+
if row[0].strip().lower() in ('scheme name', 'fund', 'scheme'):
|
| 58 |
+
continue
|
| 59 |
+
|
| 60 |
+
def safe_float(v):
|
| 61 |
+
try:
|
| 62 |
+
return float(str(v).replace(',', '').strip())
|
| 63 |
+
except (ValueError, TypeError):
|
| 64 |
+
return None
|
| 65 |
+
|
| 66 |
+
holding = ClientHolding(
|
| 67 |
+
scheme_name=row[0].strip(),
|
| 68 |
+
current_value=safe_float(row[1]) or 0.0,
|
| 69 |
+
invested_amount=safe_float(row[2]) if len(row) > 2 else None,
|
| 70 |
+
sip_amount=safe_float(row[3]) if len(row) > 3 else None,
|
| 71 |
+
sip_frequency=row[4].strip() if len(row) > 4 else None,
|
| 72 |
+
)
|
| 73 |
+
holdings.append(holding)
|
| 74 |
+
|
| 75 |
+
return client, holdings
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
# βββ Fund Matcher ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 79 |
+
|
| 80 |
+
def match_holdings_to_funds(holdings: List[ClientHolding], funds: List[Fund]) -> List[ClientHolding]:
|
| 81 |
+
"""
|
| 82 |
+
Fuzzy-match each client holding to a fund in the universe.
|
| 83 |
+
Uses token overlap on lowercased fund names.
|
| 84 |
+
"""
|
| 85 |
+
def tokenize(name: str) -> set:
|
| 86 |
+
stopwords = {'fund', 'regular', 'plan', 'growth', 'option', 'direct',
|
| 87 |
+
'idcw', 'div', 'dividend', '-', 'the', 'india', 'of'}
|
| 88 |
+
tokens = set(name.lower().replace('-', ' ').split())
|
| 89 |
+
return tokens - stopwords
|
| 90 |
+
|
| 91 |
+
fund_tokens = [(f, tokenize(f.name)) for f in funds]
|
| 92 |
+
|
| 93 |
+
for holding in holdings:
|
| 94 |
+
h_tokens = tokenize(holding.scheme_name)
|
| 95 |
+
if not h_tokens:
|
| 96 |
+
continue
|
| 97 |
+
|
| 98 |
+
best_fund = None
|
| 99 |
+
best_score = 0
|
| 100 |
+
|
| 101 |
+
for fund, f_tokens in fund_tokens:
|
| 102 |
+
if not f_tokens:
|
| 103 |
+
continue
|
| 104 |
+
intersection = len(h_tokens & f_tokens)
|
| 105 |
+
union = len(h_tokens | f_tokens)
|
| 106 |
+
jaccard = intersection / union if union else 0
|
| 107 |
+
|
| 108 |
+
if jaccard > best_score:
|
| 109 |
+
best_score = jaccard
|
| 110 |
+
best_fund = fund
|
| 111 |
+
|
| 112 |
+
if best_score > 0.15: # minimum match threshold
|
| 113 |
+
holding.fund = best_fund
|
| 114 |
+
|
| 115 |
+
return holdings
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
# βββ Portfolio Analysis ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 119 |
+
|
| 120 |
+
def compute_allocation(holdings: List[ClientHolding]) -> List[ClientHolding]:
|
| 121 |
+
"""Compute each holding's % allocation of total portfolio."""
|
| 122 |
+
total = sum(h.current_value for h in holdings)
|
| 123 |
+
if total == 0:
|
| 124 |
+
return holdings
|
| 125 |
+
for h in holdings:
|
| 126 |
+
h.allocation_pct = round((h.current_value / total) * 100, 2)
|
| 127 |
+
return holdings
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
def check_exposure(holdings: List[ClientHolding]) -> tuple[Dict, Dict, List[str]]:
|
| 131 |
+
"""
|
| 132 |
+
Check AMC and scheme-level exposure.
|
| 133 |
+
Returns (amc_exposure, scheme_exposure, warnings).
|
| 134 |
+
"""
|
| 135 |
+
total = sum(h.current_value for h in holdings)
|
| 136 |
+
if total == 0:
|
| 137 |
+
return {}, {}, []
|
| 138 |
+
|
| 139 |
+
amc_exposure: Dict[str, float] = {}
|
| 140 |
+
scheme_exposure: Dict[str, float] = {}
|
| 141 |
+
warnings: List[str] = []
|
| 142 |
+
|
| 143 |
+
for h in holdings:
|
| 144 |
+
pct = h.allocation_pct
|
| 145 |
+
scheme_exposure[h.scheme_name] = pct
|
| 146 |
+
|
| 147 |
+
# Extract AMC name (first word(s) before "-")
|
| 148 |
+
amc = h.scheme_name.split('-')[0].strip()
|
| 149 |
+
amc_exposure[amc] = amc_exposure.get(amc, 0) + pct
|
| 150 |
+
|
| 151 |
+
THRESHOLD = 20.0
|
| 152 |
+
|
| 153 |
+
for amc, pct in amc_exposure.items():
|
| 154 |
+
if pct > THRESHOLD:
|
| 155 |
+
warnings.append(f"β οΈ AMC Exposure Alert: {amc} = {pct:.1f}% (>{THRESHOLD}% threshold)")
|
| 156 |
+
|
| 157 |
+
for scheme, pct in scheme_exposure.items():
|
| 158 |
+
if pct > THRESHOLD:
|
| 159 |
+
warnings.append(f"β οΈ Scheme Exposure Alert: {scheme} = {pct:.1f}% (>{THRESHOLD}% threshold)")
|
| 160 |
+
|
| 161 |
+
return amc_exposure, scheme_exposure, warnings
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
def compute_portfolio_metrics(holdings: List[ClientHolding]) -> Dict:
|
| 165 |
+
"""
|
| 166 |
+
Compute portfolio-level weighted average risk metrics.
|
| 167 |
+
"""
|
| 168 |
+
total = sum(h.current_value for h in holdings)
|
| 169 |
+
if total == 0:
|
| 170 |
+
return {}
|
| 171 |
+
|
| 172 |
+
metrics = {"sharpe": 0.0, "alpha": 0.0, "beta": 0.0, "std_dev": 0.0}
|
| 173 |
+
|
| 174 |
+
for h in holdings:
|
| 175 |
+
w = h.current_value / total
|
| 176 |
+
if h.fund:
|
| 177 |
+
if h.fund.sharpe is not None:
|
| 178 |
+
metrics["sharpe"] += w * h.fund.sharpe
|
| 179 |
+
if h.fund.alpha is not None:
|
| 180 |
+
metrics["alpha"] += w * h.fund.alpha
|
| 181 |
+
if h.fund.beta is not None:
|
| 182 |
+
metrics["beta"] += w * h.fund.beta
|
| 183 |
+
if h.fund.std_dev is not None:
|
| 184 |
+
metrics["std_dev"] += w * h.fund.std_dev
|
| 185 |
+
|
| 186 |
+
return {k: round(v, 4) for k, v in metrics.items()}
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
def flag_underperformers(holdings: List[ClientHolding]) -> List[ClientHolding]:
|
| 190 |
+
"""
|
| 191 |
+
Flag a holding as underperforming if its fund's CAGR fails to outperform
|
| 192 |
+
EITHER the BM Index OR the Category Average across multiple time periods.
|
| 193 |
+
|
| 194 |
+
Rule (from senior advisor's framework):
|
| 195 |
+
A fund's CAGR should:
|
| 196 |
+
1. Outperform the BM Index across time periods (1Y, 3Y, 5Y)
|
| 197 |
+
2. Outperform the category average across time periods
|
| 198 |
+
3. Have superior risk metrics (handled separately via score)
|
| 199 |
+
|
| 200 |
+
A fund is flagged if it underperforms on 2+ out of 3 periods
|
| 201 |
+
on EITHER benchmark OR category average.
|
| 202 |
+
"""
|
| 203 |
+
PERIODS = [
|
| 204 |
+
("1Y", "cagr_1y", "cagr_1y_bm", "cagr_1y_cat"),
|
| 205 |
+
("3Y", "cagr_3y", "cagr_3y_bm", "cagr_3y_cat"),
|
| 206 |
+
("5Y", "cagr_5y", "cagr_5y_bm", "cagr_5y_cat"),
|
| 207 |
+
]
|
| 208 |
+
|
| 209 |
+
for h in holdings:
|
| 210 |
+
f = h.fund
|
| 211 |
+
if not f:
|
| 212 |
+
continue
|
| 213 |
+
|
| 214 |
+
bm_fails = 0
|
| 215 |
+
cat_fails = 0
|
| 216 |
+
checked = 0
|
| 217 |
+
|
| 218 |
+
for label, cagr_attr, bm_attr, cat_attr in PERIODS:
|
| 219 |
+
fund_cagr = getattr(f, cagr_attr, None)
|
| 220 |
+
bm_cagr = getattr(f, bm_attr, None)
|
| 221 |
+
cat_cagr = getattr(f, cat_attr, None)
|
| 222 |
+
|
| 223 |
+
if fund_cagr is None:
|
| 224 |
+
continue
|
| 225 |
+
checked += 1
|
| 226 |
+
if bm_cagr is not None and fund_cagr < bm_cagr:
|
| 227 |
+
bm_fails += 1
|
| 228 |
+
if cat_cagr is not None and fund_cagr < cat_cagr:
|
| 229 |
+
cat_fails += 1
|
| 230 |
+
|
| 231 |
+
# Flag if underperforms BM on 2+ periods OR underperforms category on 2+ periods
|
| 232 |
+
if checked > 0 and (bm_fails >= 2 or cat_fails >= 2):
|
| 233 |
+
h.is_underperforming = True
|
| 234 |
+
|
| 235 |
+
return holdings
|
| 236 |
+
|
| 237 |
+
|
| 238 |
+
def compute_wealth_projection(total_value: float, years_list: list = [5, 10, 15, 20],
|
| 239 |
+
rate: float = 0.12) -> Dict:
|
| 240 |
+
"""Project portfolio value at a fixed annual return rate."""
|
| 241 |
+
return {
|
| 242 |
+
yr: round(total_value * ((1 + rate) ** yr), 2)
|
| 243 |
+
for yr in years_list
|
| 244 |
+
}
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
# βββ Main entry ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 248 |
+
|
| 249 |
+
def run_portfolio_engine(
|
| 250 |
+
client_csv: str,
|
| 251 |
+
fund_universe: List[Fund],
|
| 252 |
+
advisor: Optional[Advisor] = None,
|
| 253 |
+
) -> PortfolioReport:
|
| 254 |
+
"""
|
| 255 |
+
Full pipeline: load client β match funds β analyse β build report object.
|
| 256 |
+
"""
|
| 257 |
+
if advisor is None:
|
| 258 |
+
advisor = Advisor()
|
| 259 |
+
|
| 260 |
+
print(f"π Loading client data from: {client_csv}")
|
| 261 |
+
client, holdings = load_client_csv(client_csv)
|
| 262 |
+
print(f" Client: {client.name} | Holdings: {len(holdings)}")
|
| 263 |
+
|
| 264 |
+
print("π Matching holdings to fund universe...")
|
| 265 |
+
holdings = match_holdings_to_funds(holdings, fund_universe)
|
| 266 |
+
matched = sum(1 for h in holdings if h.fund is not None)
|
| 267 |
+
print(f" Matched {matched}/{len(holdings)} holdings")
|
| 268 |
+
|
| 269 |
+
holdings = compute_allocation(holdings)
|
| 270 |
+
amc_exp, scheme_exp, warnings = check_exposure(holdings)
|
| 271 |
+
holdings = flag_underperformers(holdings)
|
| 272 |
+
metrics = compute_portfolio_metrics(holdings)
|
| 273 |
+
|
| 274 |
+
total_current = sum(h.current_value for h in holdings)
|
| 275 |
+
total_invested = sum(h.invested_amount or 0 for h in holdings)
|
| 276 |
+
|
| 277 |
+
wealth_projection = compute_wealth_projection(total_current)
|
| 278 |
+
|
| 279 |
+
report = PortfolioReport(
|
| 280 |
+
client=client,
|
| 281 |
+
advisor=advisor,
|
| 282 |
+
holdings=holdings,
|
| 283 |
+
total_current_value=total_current,
|
| 284 |
+
total_invested=total_invested,
|
| 285 |
+
unrealized_gain=total_current - total_invested,
|
| 286 |
+
sharpe=metrics.get("sharpe"),
|
| 287 |
+
alpha=metrics.get("alpha"),
|
| 288 |
+
beta=metrics.get("beta"),
|
| 289 |
+
std_dev=metrics.get("std_dev"),
|
| 290 |
+
amc_exposure=amc_exp,
|
| 291 |
+
scheme_exposure=scheme_exp,
|
| 292 |
+
exposure_warnings=warnings,
|
| 293 |
+
wealth_projection=wealth_projection,
|
| 294 |
+
)
|
| 295 |
+
|
| 296 |
+
if warnings:
|
| 297 |
+
print("\n".join(warnings))
|
| 298 |
+
|
| 299 |
+
return report
|
src/reference_data.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Reference data extractor from Processed_data.xlsx
|
| 3 |
+
|
| 4 |
+
This module extracts BM Index, Category Average, and fund weightage data that the advisor
|
| 5 |
+
has manually filled in Processed_data.xlsx, so we can use it when processing
|
| 6 |
+
raw CSV files that have blank BM/Category rows.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import openpyxl
|
| 10 |
+
from typing import Dict, Any, Optional, Tuple
|
| 11 |
+
from pathlib import Path
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def extract_reference_data(processed_xlsx_path: str) -> Tuple[Dict[str, Dict[str, Any]], Dict[str, Dict[str, Any]], Dict[str, int]]:
|
| 15 |
+
"""
|
| 16 |
+
Extract BM Index, Category Average, and fund weightage data from Processed_data.xlsx.
|
| 17 |
+
|
| 18 |
+
Returns:
|
| 19 |
+
(bm_data, cat_avg_data, fund_weightages) where:
|
| 20 |
+
- bm_data: dict mapping category name to CAGR values for BM Index
|
| 21 |
+
- cat_avg_data: dict mapping category name to CAGR/values for Category Average
|
| 22 |
+
- fund_weightages: dict mapping fund name to manually adjusted weightage value
|
| 23 |
+
"""
|
| 24 |
+
xlsx_path = Path(processed_xlsx_path)
|
| 25 |
+
if not xlsx_path.exists():
|
| 26 |
+
print(f"Warning: Reference file not found: {processed_xlsx_path}")
|
| 27 |
+
return {}, {}, {}
|
| 28 |
+
|
| 29 |
+
wb = openpyxl.load_workbook(str(xlsx_path))
|
| 30 |
+
ws = wb.active
|
| 31 |
+
|
| 32 |
+
bm_data = {}
|
| 33 |
+
cat_avg_data = {}
|
| 34 |
+
fund_weightages = {}
|
| 35 |
+
current_category = None
|
| 36 |
+
|
| 37 |
+
# Find the Weightage column index by scanning the header row
|
| 38 |
+
weightage_col_idx = None
|
| 39 |
+
for col_idx in range(1, ws.max_column + 1):
|
| 40 |
+
header_val = ws.cell(1, col_idx).value
|
| 41 |
+
if header_val and 'Weightage' in str(header_val):
|
| 42 |
+
weightage_col_idx = col_idx
|
| 43 |
+
break
|
| 44 |
+
|
| 45 |
+
for i in range(1, ws.max_row + 1):
|
| 46 |
+
cell_val = ws.cell(i, 1).value
|
| 47 |
+
|
| 48 |
+
# Check if it's a category header
|
| 49 |
+
if cell_val and ':' in str(cell_val) and any(x in str(cell_val) for x in ['Equity', 'Debt', 'Hybrid', 'Solution', 'Other']):
|
| 50 |
+
current_category = cell_val
|
| 51 |
+
|
| 52 |
+
# Check if it's BM Index row
|
| 53 |
+
elif cell_val == 'BM Index' and current_category:
|
| 54 |
+
bm_1y = ws.cell(i, 6).value
|
| 55 |
+
bm_3y = ws.cell(i, 7).value
|
| 56 |
+
bm_5y = ws.cell(i, 8).value
|
| 57 |
+
bm_10y = ws.cell(i, 9).value
|
| 58 |
+
|
| 59 |
+
# Only store if at least one value is present
|
| 60 |
+
if any([bm_1y, bm_3y, bm_5y, bm_10y]):
|
| 61 |
+
bm_data[current_category] = {
|
| 62 |
+
'cagr_1y': bm_1y,
|
| 63 |
+
'cagr_3y': bm_3y,
|
| 64 |
+
'cagr_5y': bm_5y,
|
| 65 |
+
'cagr_10y': bm_10y
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
# Check if it's Category Average row
|
| 69 |
+
elif cell_val == 'Category Average' and current_category:
|
| 70 |
+
cat_1y = ws.cell(i, 6).value
|
| 71 |
+
cat_3y = ws.cell(i, 7).value
|
| 72 |
+
cat_5y = ws.cell(i, 8).value
|
| 73 |
+
cat_10y = ws.cell(i, 9).value
|
| 74 |
+
pe = ws.cell(i, 12).value
|
| 75 |
+
pb = ws.cell(i, 13).value
|
| 76 |
+
|
| 77 |
+
# Only store if at least one CAGR value is present
|
| 78 |
+
if any([cat_1y, cat_3y, cat_5y, cat_10y]):
|
| 79 |
+
cat_avg_data[current_category] = {
|
| 80 |
+
'cagr_1y': cat_1y,
|
| 81 |
+
'cagr_3y': cat_3y,
|
| 82 |
+
'cagr_5y': cat_5y,
|
| 83 |
+
'cagr_10y': cat_10y,
|
| 84 |
+
'pe_ratio': pe,
|
| 85 |
+
'pb_ratio': pb
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
# Check if it's a fund row (not category header, BM Index, or Category Average)
|
| 89 |
+
elif cell_val and cell_val not in ['BM Index', 'Category Average', 'Fund'] and current_category:
|
| 90 |
+
# Extract fund name
|
| 91 |
+
fund_name = str(cell_val).strip()
|
| 92 |
+
|
| 93 |
+
# Extract weightage if we found the Weightage column
|
| 94 |
+
if weightage_col_idx:
|
| 95 |
+
weightage_val = ws.cell(i, weightage_col_idx).value
|
| 96 |
+
if weightage_val is not None:
|
| 97 |
+
try:
|
| 98 |
+
# Convert to int if possible, otherwise round float to nearest int
|
| 99 |
+
if isinstance(weightage_val, float):
|
| 100 |
+
fund_weightages[fund_name] = int(round(weightage_val))
|
| 101 |
+
else:
|
| 102 |
+
fund_weightages[fund_name] = int(weightage_val)
|
| 103 |
+
except (ValueError, TypeError):
|
| 104 |
+
# If conversion fails, skip this fund
|
| 105 |
+
pass
|
| 106 |
+
|
| 107 |
+
wb.close()
|
| 108 |
+
|
| 109 |
+
print(f"Loaded reference data: {len(bm_data)} categories with BM Index, {len(cat_avg_data)} with Category Average, {len(fund_weightages)} fund weightages")
|
| 110 |
+
|
| 111 |
+
return bm_data, cat_avg_data, fund_weightages
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
def get_fund_weightage_from_reference(fund_name: str, fund_weightages: Dict[str, int]) -> Optional[int]:
|
| 115 |
+
"""
|
| 116 |
+
Get the manually adjusted weightage for a fund from reference data.
|
| 117 |
+
|
| 118 |
+
Args:
|
| 119 |
+
fund_name: Name of the fund
|
| 120 |
+
fund_weightages: Dictionary of fund name to weightage from Processed_data.xlsx
|
| 121 |
+
|
| 122 |
+
Returns:
|
| 123 |
+
Weightage value if found, None otherwise
|
| 124 |
+
"""
|
| 125 |
+
# Try exact match first
|
| 126 |
+
if fund_name in fund_weightages:
|
| 127 |
+
return fund_weightages[fund_name]
|
| 128 |
+
|
| 129 |
+
# Try partial match (in case of slight name differences)
|
| 130 |
+
for ref_fund_name, weightage in fund_weightages.items():
|
| 131 |
+
if fund_name.lower() in ref_fund_name.lower() or ref_fund_name.lower() in fund_name.lower():
|
| 132 |
+
return weightage
|
| 133 |
+
|
| 134 |
+
return None
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
# Default reference file path
|
| 138 |
+
DEFAULT_REFERENCE_PATH = "PS/Processed data.xlsx"
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
# Default reference file path
|
| 142 |
+
DEFAULT_REFERENCE_PATH = "PS/Processed data.xlsx"
|
src/scheme_resolver.py
ADDED
|
@@ -0,0 +1,323 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Scheme Code Resolver
|
| 2 |
+
======================
|
| 3 |
+
Resolves missing AMFI scheme codes by fuzzy-matching the fund name from the
|
| 4 |
+
CSV against mfapi.in's /mf/search endpoint.
|
| 5 |
+
|
| 6 |
+
This runs as a PRE-TRIAGE step so that the NAV engine can fire for funds whose
|
| 7 |
+
scheme code was absent from the CSV.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
from __future__ import annotations
|
| 11 |
+
|
| 12 |
+
import difflib
|
| 13 |
+
import re
|
| 14 |
+
import time
|
| 15 |
+
|
| 16 |
+
import requests
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
MFAPI_SEARCH = "https://api.mfapi.in/mf/search"
|
| 20 |
+
MATCH_CUTOFF = 0.52 # minimum SequenceMatcher ratio to accept
|
| 21 |
+
SLEEP_BETWEEN = 0.25 # seconds between API calls (polite rate limit)
|
| 22 |
+
|
| 23 |
+
# Manual overrides for schemes that mfapi's search endpoint does not
|
| 24 |
+
# currently return, but whose AMFI codes are known and stable. Keys are
|
| 25 |
+
# normalized fund names (see _normalize).
|
| 26 |
+
SCHEME_OVERRIDES: dict[str, str] = {
|
| 27 |
+
# ββ Pre-verified from AMFI NAV master (portal.amfiindia.com) ββββββββββββββ
|
| 28 |
+
# These funds have empty scheme codes in source CSV and cannot be reliably
|
| 29 |
+
# resolved via mfapi fuzzy search. Codes are Regular Plan - Growth only.
|
| 30 |
+
|
| 31 |
+
# Existing override
|
| 32 |
+
"kotak tax saver scheme growth": "109234",
|
| 33 |
+
|
| 34 |
+
# ββ Debt: Banking and PSU βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 35 |
+
"hdfc banking and psu debt fund growth option": "128628",
|
| 36 |
+
"icici prudential banking and psu debt fund growth": "112342",
|
| 37 |
+
"kotak banking and psu debt growth": "123690",
|
| 38 |
+
"invesco india banking and psu fund growth option": "118232",
|
| 39 |
+
"sundaram banking psu fund formerly known as sundaram banking and psu debt fund regular plan growth": "100784",
|
| 40 |
+
"hsbc banking and psu debt fund regular growth": "151104",
|
| 41 |
+
"iti banking psu debt fund regular plan growth option": "148535",
|
| 42 |
+
|
| 43 |
+
# ββ Debt: Liquid ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 44 |
+
"dsp liquidity fund regular plan growth": "119120",
|
| 45 |
+
"invesco india liquid fund growth": "104488",
|
| 46 |
+
"invesco india liquid fund regular growth": "118769",
|
| 47 |
+
"union liquid fund growth option": "115398",
|
| 48 |
+
"parag parikh liquid fund regular plan growth": "149038",
|
| 49 |
+
"motilal oswal liquid fund regular growth": "147622",
|
| 50 |
+
"iti liquid fund regular plan growth option": "147153",
|
| 51 |
+
"quantum liquid fund regular plan growth option": "103504",
|
| 52 |
+
"lic mf liquid fund regular plan growth": "120716",
|
| 53 |
+
"icici prudential liquid fund growth": "120593",
|
| 54 |
+
"aditya birla sun life liquid fund retail growth": "100042",
|
| 55 |
+
"aditya birla sun life liquid fund growth": "100047",
|
| 56 |
+
"edelweiss liquid fund regular plan growth option": "140182",
|
| 57 |
+
"edelweiss liquid fund retail plan growth option": "119114",
|
| 58 |
+
"axis liquid fund retail plan growth option": "112090",
|
| 59 |
+
"sbi liquid fund regular plan growth": "119822",
|
| 60 |
+
"nippon india liquid fund retail option growth plan": "100837",
|
| 61 |
+
|
| 62 |
+
# ββ Debt: Overnight βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 63 |
+
"uti overnight fund regular plan growth option": "100814",
|
| 64 |
+
"canara robeco overnight fund regular plan growth option": "147534",
|
| 65 |
+
"dsp overnight fund regular plan growth": "146061",
|
| 66 |
+
"franklin india overnight fund growth": "146210",
|
| 67 |
+
"bandhan overnight fund regular plan growth": "146187",
|
| 68 |
+
"iti overnight fund regular plan growth option": "148529",
|
| 69 |
+
"union overnight fund regular plan growth option": "146997",
|
| 70 |
+
"icici prudential overnight fund growth": "145811",
|
| 71 |
+
"edelweiss overnight fund regular plan growth": "147569",
|
| 72 |
+
"lic mf overnight fund regular plan growth": "146065",
|
| 73 |
+
"hdfc overnight fund growth option": "145822",
|
| 74 |
+
|
| 75 |
+
# ββ Debt: Ultra Short Duration ββββββββββββββββββββββββββββββββββββββββββββ
|
| 76 |
+
"icici prudential ultra short term fund growth": "120505",
|
| 77 |
+
"invesco india ultra short duration fund growth": "117825",
|
| 78 |
+
"uti ultra short duration fund regular plan growth option": "102532",
|
| 79 |
+
"aditya birla sun life savings fund growth regular plan": "119293",
|
| 80 |
+
"aditya birla sun life savings fund retail growth": "119293",
|
| 81 |
+
"hdfc ultra short term fund growth option": "145539",
|
| 82 |
+
"aditya birla sun life savings fund discipline advantage plan": "112016",
|
| 83 |
+
"pgim india ultra short duration fund growth": "100474",
|
| 84 |
+
"iti ultra short duration fund regular plan growth option": "148533",
|
| 85 |
+
"motilal oswal ultra short term fund mofustf regular plan growth": "124233",
|
| 86 |
+
"tata ultra short term fund regular plan growth": "146070",
|
| 87 |
+
"kotak savings fund growth": "119270",
|
| 88 |
+
"lic mf ultra short duration fund regular plan growth": "147770",
|
| 89 |
+
"canara robeco ultra short term fund regular plan growth option": "119671",
|
| 90 |
+
"sundaram ultra short duration fund formerly known as principal ultra short term fund growth option": "120826",
|
| 91 |
+
"bank of india ultra short duration fund regular plan growth": "109269",
|
| 92 |
+
|
| 93 |
+
# ββ Debt: Short Duration ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 94 |
+
"hdfc short term debt fund growth option": "119247",
|
| 95 |
+
"icici prudential short term fund growth option": "101758",
|
| 96 |
+
"sbi short horizon debt fund short term fund retail growth": "106227",
|
| 97 |
+
"sbi short term debt fund regular plan growth": "119831",
|
| 98 |
+
"kotak bond short term plan growth": "101373",
|
| 99 |
+
"dsp short term fund regular plan growth": "119598",
|
| 100 |
+
"lic mf short duration fund regular plan growth": "145952",
|
| 101 |
+
"mirae asset short duration fund regular plan growth": "148416",
|
| 102 |
+
"invesco india short duration fund growth": "105185",
|
| 103 |
+
"canara robeco short duration fund regular plan growth option": "119675",
|
| 104 |
+
"groww short duration fund formerly known as indiabulls short term fund regular plan growth option": "123708",
|
| 105 |
+
"tata short term bond fund regular plan growth option": "119802",
|
| 106 |
+
|
| 107 |
+
# ββ Debt: Medium Duration βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 108 |
+
"aditya birla sun life medium term plan growth regular plan": "111803",
|
| 109 |
+
"axis strategic bond fund regular plan growth option": "116894",
|
| 110 |
+
"icici prudential medium term bond fund growth": "120841",
|
| 111 |
+
"hdfc medium term debt fund growth option": "119238",
|
| 112 |
+
"kotak medium term fund growth": "119281",
|
| 113 |
+
"dsp bond fund growth": "100078",
|
| 114 |
+
"sundaram medium duration fund formerly known as sundaram medium term bond fund regular plan growth": "100603",
|
| 115 |
+
|
| 116 |
+
# ββ ETFs ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 117 |
+
"hdfc nifty100 low volatility 30 etf growth option": "145748",
|
| 118 |
+
"hdfc nifty200 momentum 30 etf growth option": "146058",
|
| 119 |
+
"hdfc nifty it etf growth option": "120493",
|
| 120 |
+
"hdfc nifty private bank etf growth option": "145696",
|
| 121 |
+
|
| 122 |
+
# ββ Index Funds βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 123 |
+
"dsp nifty next 50 index fund regular plan growth": "143669",
|
| 124 |
+
"uti nifty next 50 index fund regular plan growth option": "120713",
|
| 125 |
+
"motilal oswal nifty smallcap 250 index regular plan": "147960",
|
| 126 |
+
"icici prudential nifty pharma index fund growth": "143874",
|
| 127 |
+
"dsp nifty 50 index fund regular plan growth": "143537",
|
| 128 |
+
"motilal oswal nifty midcap 150 index fund regular plan": "147068",
|
| 129 |
+
"sbi nifty index fund regular plan growth": "135818",
|
| 130 |
+
"motilal oswal nifty bank index regular plan": "145552",
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
def _normalize(name: str) -> str:
|
| 135 |
+
"""Convert hyphenated CSV name to a clean lowercase string."""
|
| 136 |
+
return re.sub(r"[-_]+", " ", name).strip().lower()
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
def _search_query(name: str) -> str:
|
| 140 |
+
"""Take first 6 tokens for a focused search query."""
|
| 141 |
+
return " ".join(_normalize(name).split()[:6])
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def _search_mfapi(query: str) -> list[dict]:
|
| 145 |
+
try:
|
| 146 |
+
resp = requests.get(MFAPI_SEARCH, params={"q": query}, timeout=15)
|
| 147 |
+
resp.raise_for_status()
|
| 148 |
+
return resp.json()
|
| 149 |
+
except Exception as exc:
|
| 150 |
+
print(f" [resolver] search error for '{query}': {exc}")
|
| 151 |
+
return []
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
def _best_match(candidates: list[dict], target_name: str) -> dict | None:
|
| 155 |
+
if not candidates:
|
| 156 |
+
return None
|
| 157 |
+
target = _normalize(target_name)
|
| 158 |
+
best_score = 0.0
|
| 159 |
+
best_item = None
|
| 160 |
+
for item in candidates:
|
| 161 |
+
candidate = _normalize(item.get("schemeName", ""))
|
| 162 |
+
score = difflib.SequenceMatcher(None, target, candidate).ratio()
|
| 163 |
+
if score > best_score:
|
| 164 |
+
best_score = score
|
| 165 |
+
best_item = item
|
| 166 |
+
if best_score >= MATCH_CUTOFF:
|
| 167 |
+
return best_item
|
| 168 |
+
return None
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
def _is_valid_scheme_code(code: str) -> bool:
|
| 172 |
+
"""AMFI scheme codes are purely numeric (e.g. 120586). Platform codes like GROWWEH are invalid."""
|
| 173 |
+
return bool(code and code.isdigit())
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
def resolve_scheme_code_for_fund_name(
|
| 177 |
+
fund_name: str,
|
| 178 |
+
) -> tuple[str | None, str | None]:
|
| 179 |
+
"""
|
| 180 |
+
Resolve a scheme code for one fund name.
|
| 181 |
+
|
| 182 |
+
Resolution order:
|
| 183 |
+
1. Exact normalized-name override from SCHEME_OVERRIDES
|
| 184 |
+
2. mfapi search + fuzzy best-match
|
| 185 |
+
"""
|
| 186 |
+
norm = _normalize(fund_name)
|
| 187 |
+
override_code = SCHEME_OVERRIDES.get(norm)
|
| 188 |
+
if override_code:
|
| 189 |
+
return override_code, "override"
|
| 190 |
+
|
| 191 |
+
query = _search_query(fund_name)
|
| 192 |
+
candidates = _search_mfapi(query)
|
| 193 |
+
match = _best_match(candidates, fund_name)
|
| 194 |
+
if match:
|
| 195 |
+
return str(match["schemeCode"]), match.get("schemeName", "")
|
| 196 |
+
return None, None
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
def resolve_missing_scheme_codes(
|
| 200 |
+
rows: list[dict[str, str]],
|
| 201 |
+
*,
|
| 202 |
+
verbose: bool = True,
|
| 203 |
+
) -> tuple[list[dict[str, str]], dict[str, str]]:
|
| 204 |
+
"""
|
| 205 |
+
Resolve blank scheme codes and also correct any exact-name rows whose
|
| 206 |
+
current numeric code disagrees with SCHEME_OVERRIDES.
|
| 207 |
+
|
| 208 |
+
Blank/invalid codes are resolved via SCHEME_OVERRIDES (O(1) dict lookup)
|
| 209 |
+
first, then mfapi search in parallel.
|
| 210 |
+
|
| 211 |
+
Complexity: O(N) time, O(N) space where N = funds with missing codes.
|
| 212 |
+
Network I/O parallelised with ThreadPoolExecutor(20) β pure I/O bound.
|
| 213 |
+
"""
|
| 214 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 215 |
+
|
| 216 |
+
resolved: dict[str, str] = {}
|
| 217 |
+
corrected_existing = 0
|
| 218 |
+
|
| 219 |
+
# ββ Collect rows that need resolution βββββββββββββββββββββββββββββββββββββ
|
| 220 |
+
target_rows: list[dict[str, str]] = []
|
| 221 |
+
for row in rows:
|
| 222 |
+
fund_name = (row.get("Fund") or "").strip()
|
| 223 |
+
if not fund_name or fund_name.count("-") < 2 or ":" in fund_name:
|
| 224 |
+
continue
|
| 225 |
+
norm = _normalize(fund_name)
|
| 226 |
+
raw_code = (row.get("Scheme Code") or "").strip()
|
| 227 |
+
override_code = SCHEME_OVERRIDES.get(norm)
|
| 228 |
+
|
| 229 |
+
# Future-proofing: if we know the canonical code for this exact fund name,
|
| 230 |
+
# correct it even when the CSV already contains a numeric but stale code.
|
| 231 |
+
if override_code and raw_code != override_code:
|
| 232 |
+
row["Scheme Code"] = override_code
|
| 233 |
+
resolved[fund_name] = override_code
|
| 234 |
+
corrected_existing += 1
|
| 235 |
+
continue
|
| 236 |
+
|
| 237 |
+
if _is_valid_scheme_code(raw_code):
|
| 238 |
+
continue
|
| 239 |
+
if raw_code and not _is_valid_scheme_code(raw_code):
|
| 240 |
+
row["Scheme Code"] = "" # clear invalid platform codes e.g. GROWWEH
|
| 241 |
+
target_rows.append(row)
|
| 242 |
+
|
| 243 |
+
total_missing = len(target_rows)
|
| 244 |
+
if total_missing == 0:
|
| 245 |
+
if verbose:
|
| 246 |
+
if corrected_existing:
|
| 247 |
+
print(f"[resolver] Corrected {corrected_existing} existing scheme codes via override table.")
|
| 248 |
+
else:
|
| 249 |
+
print("[resolver] No missing scheme codes found.")
|
| 250 |
+
return rows, resolved
|
| 251 |
+
|
| 252 |
+
if verbose:
|
| 253 |
+
print(f"[resolver] Resolving {total_missing} missing scheme codes (parallel)β¦")
|
| 254 |
+
|
| 255 |
+
# ββ Phase A: Override table β O(1) per fund, no network βββββββββββββββββββ
|
| 256 |
+
mfapi_needed: list[dict[str, str]] = []
|
| 257 |
+
override_count = 0
|
| 258 |
+
|
| 259 |
+
for row in target_rows:
|
| 260 |
+
fund_name = (row.get("Fund") or "").strip()
|
| 261 |
+
norm = _normalize(fund_name)
|
| 262 |
+
code = SCHEME_OVERRIDES.get(norm)
|
| 263 |
+
if code:
|
| 264 |
+
row["Scheme Code"] = code
|
| 265 |
+
resolved[fund_name] = code
|
| 266 |
+
override_count += 1
|
| 267 |
+
else:
|
| 268 |
+
mfapi_needed.append(row)
|
| 269 |
+
|
| 270 |
+
if verbose and override_count:
|
| 271 |
+
print(f" [resolver] {override_count} resolved via override table (instant)")
|
| 272 |
+
if verbose and corrected_existing:
|
| 273 |
+
print(f" [resolver] {corrected_existing} existing codes corrected via override table")
|
| 274 |
+
|
| 275 |
+
# ββ Phase B: mfapi search β parallel ThreadPoolExecutor βββββββββββββββββββ
|
| 276 |
+
if not mfapi_needed:
|
| 277 |
+
if verbose:
|
| 278 |
+
print(f"[resolver] Done. {len(resolved)}/{total_missing} resolved.")
|
| 279 |
+
return rows, resolved
|
| 280 |
+
|
| 281 |
+
lock = __import__("threading").Lock()
|
| 282 |
+
completed = [0]
|
| 283 |
+
|
| 284 |
+
def _resolve_one(row: dict[str, str]) -> tuple[str, str | None, str | None]:
|
| 285 |
+
"""Returns (fund_name, scheme_code_or_None, matched_name_or_None)."""
|
| 286 |
+
fund_name = (row.get("Fund") or "").strip()
|
| 287 |
+
query = _search_query(fund_name)
|
| 288 |
+
candidates = _search_mfapi(query)
|
| 289 |
+
match = _best_match(candidates, fund_name)
|
| 290 |
+
if match:
|
| 291 |
+
return fund_name, str(match["schemeCode"]), match.get("schemeName", "")
|
| 292 |
+
return fund_name, None, None
|
| 293 |
+
|
| 294 |
+
# 20 workers: mfapi is pure REST, stateless, handles concurrency fine
|
| 295 |
+
with ThreadPoolExecutor(max_workers=20) as executor:
|
| 296 |
+
future_to_row = {executor.submit(_resolve_one, row): row for row in mfapi_needed}
|
| 297 |
+
for future in as_completed(future_to_row):
|
| 298 |
+
row = future_to_row[future]
|
| 299 |
+
fund_name = (row.get("Fund") or "").strip()
|
| 300 |
+
try:
|
| 301 |
+
_, code, matched_name = future.result()
|
| 302 |
+
except Exception:
|
| 303 |
+
code = matched_name = None
|
| 304 |
+
|
| 305 |
+
with lock:
|
| 306 |
+
completed[0] += 1
|
| 307 |
+
n = completed[0]
|
| 308 |
+
total_mfapi = len(mfapi_needed)
|
| 309 |
+
if code:
|
| 310 |
+
row["Scheme Code"] = code
|
| 311 |
+
resolved[fund_name] = code
|
| 312 |
+
if verbose:
|
| 313 |
+
print(f" [{n}/{total_mfapi}] OK {fund_name[:55]}")
|
| 314 |
+
print(f" -> [{code}] {(matched_name or '')[:55]}")
|
| 315 |
+
else:
|
| 316 |
+
if verbose:
|
| 317 |
+
print(f" [{n}/{total_mfapi}] NO {fund_name[:55]} -- no match")
|
| 318 |
+
|
| 319 |
+
if verbose:
|
| 320 |
+
print(f"[resolver] Done. {len(resolved)}/{total_missing} resolved "
|
| 321 |
+
f"({override_count} overrides + {len(resolved)-override_count-corrected_existing} mfapi"
|
| 322 |
+
f"{f', {corrected_existing} corrected existing codes' if corrected_existing else ''}).")
|
| 323 |
+
return rows, resolved
|
src/weightage.py
ADDED
|
@@ -0,0 +1,425 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Weightage scoring algorithm for mutual fund schemes.
|
| 3 |
+
|
| 4 |
+
Scoring method: Sum of column weights where cell qualifies for Light Green (Top/Bottom 10)
|
| 5 |
+
AND is NOT overridden by Light Red fill (threshold violations).
|
| 6 |
+
|
| 7 |
+
Weight Distribution (Advisor-revised, March 2026):
|
| 8 |
+
1. Sortino Ratio: 1.300 (Top 10, higher is better)
|
| 9 |
+
2. Sharpe Ratio: 1.200 (Top 10, higher is better)
|
| 10 |
+
3. Information Ratio: 1.000 (Top 10, higher is better, Light Red if < 0)
|
| 11 |
+
4. Alpha: 1.000 (Top 10, higher is better, Light Red if < 1)
|
| 12 |
+
5. Maximum Drawdown: 1.350 (Top 10, closest to 0 is better)
|
| 13 |
+
6. Down Market Capture: 1.000 (Bottom 10, lower is better)
|
| 14 |
+
7. Standard Deviation: 1.000 (Bottom 10, lower is better)
|
| 15 |
+
8. 10 Years CAGR: 0.750 (Top 10, higher is better, Light Red if < Category Avg)
|
| 16 |
+
9. 5 Years CAGR: 0.600 (Top 10, higher is better, Light Red if < Category Avg)
|
| 17 |
+
10. 3 Years CAGR: 0.400 (Top 10, higher is better, Light Red if < Category Avg)
|
| 18 |
+
11. P/E Ratio: 0.150 (Bottom 10, lower is better)
|
| 19 |
+
12. TER: 0.150 (Bottom 10, lower is better)
|
| 20 |
+
13. Turnover (%): 0.100 (Bottom 10, lower is better)
|
| 21 |
+
|
| 22 |
+
Total: 10.000
|
| 23 |
+
"""
|
| 24 |
+
|
| 25 |
+
import math
|
| 26 |
+
from typing import List, Optional, Dict
|
| 27 |
+
from src.models import Fund
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
# βββ Weight map (Advisor-revised March 2026) βββββββββββββββββββββββββββββββββ
|
| 31 |
+
WEIGHTS: Dict[str, float] = {
|
| 32 |
+
"sortino": 1.30,
|
| 33 |
+
"sharpe": 1.20,
|
| 34 |
+
"info_ratio": 1.00,
|
| 35 |
+
"alpha": 1.00,
|
| 36 |
+
"max_drawdown": 1.35,
|
| 37 |
+
"down_capture": 1.00,
|
| 38 |
+
"std_dev": 1.00,
|
| 39 |
+
"cagr_10y": 0.75,
|
| 40 |
+
"cagr_5y": 0.60,
|
| 41 |
+
"cagr_3y": 0.40,
|
| 42 |
+
"pe_ratio": 0.15,
|
| 43 |
+
"ter": 0.15,
|
| 44 |
+
"turnover": 0.10,
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
# Sanity-check: total should equal 10.000
|
| 48 |
+
_TOTAL = round(sum(WEIGHTS.values()), 3)
|
| 49 |
+
assert _TOTAL == 10.000, f"WEIGHTS do not sum to 10.000 β got {_TOTAL}"
|
| 50 |
+
|
| 51 |
+
# Metrics where higher is better β Top 10
|
| 52 |
+
TOP_10_METRICS = [
|
| 53 |
+
"sharpe", "sortino", "alpha",
|
| 54 |
+
"info_ratio", "max_drawdown",
|
| 55 |
+
"cagr_3y", "cagr_5y", "cagr_10y",
|
| 56 |
+
]
|
| 57 |
+
|
| 58 |
+
# Metrics where lower is better β Bottom 10
|
| 59 |
+
BOTTOM_10_METRICS = [
|
| 60 |
+
"ter", "turnover", "std_dev",
|
| 61 |
+
"down_capture", "pe_ratio",
|
| 62 |
+
]
|
| 63 |
+
|
| 64 |
+
# Dual-condition metrics: qualifies for green AND may trigger light-red override
|
| 65 |
+
DUAL_CONDITION_RULES: Dict[str, tuple] = {
|
| 66 |
+
"alpha": ("below_value", 1), # Light Red if alpha < 1%
|
| 67 |
+
"info_ratio": ("below_value", 0), # Light Red if IR < 0
|
| 68 |
+
"cagr_3y": ("below_category_avg", None), # Light Red if < category avg
|
| 69 |
+
"cagr_5y": ("below_category_avg", None),
|
| 70 |
+
"cagr_10y": ("below_category_avg", None),
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
# βββ Value helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 75 |
+
|
| 76 |
+
def _is_valid(v) -> bool:
|
| 77 |
+
"""True if v is a real, non-zero, non-NaN number."""
|
| 78 |
+
if v is None:
|
| 79 |
+
return False
|
| 80 |
+
if isinstance(v, float) and (v != v): # NaN check
|
| 81 |
+
return False
|
| 82 |
+
# 0.0 is treated as missing/not-applicable for risk metrics
|
| 83 |
+
if v == 0:
|
| 84 |
+
return False
|
| 85 |
+
return True
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def _is_valid_drawdown(v) -> bool:
|
| 89 |
+
"""
|
| 90 |
+
For Maximum Drawdown specifically: 0.0 is a genuine data-quality gap
|
| 91 |
+
(overnight/liquid funds sometimes publish 0 when the real figure was never
|
| 92 |
+
fetched). Treat 0 as invalid so that only funds with a real (negative)
|
| 93 |
+
drawdown value compete in the ranking.
|
| 94 |
+
"""
|
| 95 |
+
if v is None:
|
| 96 |
+
return False
|
| 97 |
+
if isinstance(v, float) and v != v: # NaN
|
| 98 |
+
return False
|
| 99 |
+
if v == 0:
|
| 100 |
+
return False # β exact zero excluded; see drawdown_zero_fix() below
|
| 101 |
+
return True
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
# βββ Ranking helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 105 |
+
|
| 106 |
+
def _top_n(fund: Fund, peers: List[Fund], metric: str, n: int = 10) -> bool:
|
| 107 |
+
"""
|
| 108 |
+
Return True if fund is in the top-N (highest values) for metric.
|
| 109 |
+
|
| 110 |
+
Special case:
|
| 111 |
+
- For Information Ratio we allow a value of exactly 0.0 to participate
|
| 112 |
+
in ranking (Excel treats 0 as a valid value; only < 0 is "red").
|
| 113 |
+
"""
|
| 114 |
+
fund_val = getattr(fund, metric, None)
|
| 115 |
+
|
| 116 |
+
def _valid_for_rank(v):
|
| 117 |
+
if metric == "info_ratio":
|
| 118 |
+
# Treat 0 as a real value; only None/NaN are invalid here.
|
| 119 |
+
if v is None:
|
| 120 |
+
return False
|
| 121 |
+
if isinstance(v, float) and (v != v):
|
| 122 |
+
return False
|
| 123 |
+
return True
|
| 124 |
+
return _is_valid(v)
|
| 125 |
+
|
| 126 |
+
if not _valid_for_rank(fund_val):
|
| 127 |
+
return False
|
| 128 |
+
|
| 129 |
+
valid = [getattr(f, metric, None) for f in peers
|
| 130 |
+
if _valid_for_rank(getattr(f, metric, None))]
|
| 131 |
+
if len(valid) < 2:
|
| 132 |
+
return False
|
| 133 |
+
|
| 134 |
+
# Match Excel's TOP 10 conditional formatting:
|
| 135 |
+
# "Top N items", with N capped at the number of valid funds.
|
| 136 |
+
effective_n = min(n, len(valid))
|
| 137 |
+
valid.sort(reverse=True)
|
| 138 |
+
return fund_val >= valid[effective_n - 1]
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def _top_n_drawdown(fund: Fund, peers: List[Fund], n: int = 10) -> bool:
|
| 142 |
+
"""
|
| 143 |
+
Special top-N for Maximum Drawdown.
|
| 144 |
+
|
| 145 |
+
"Closest to 0" = highest value among negatives.
|
| 146 |
+
-5% is better than -20%, so we still sort descending.
|
| 147 |
+
Only non-zero, non-None values participate (see _is_valid_drawdown).
|
| 148 |
+
Uses strict-N (no 50% fallback) so a single liquid fund with a real
|
| 149 |
+
drawdown doesn't accidentally qualify just because of category size.
|
| 150 |
+
"""
|
| 151 |
+
fund_val = getattr(fund, "max_drawdown", None)
|
| 152 |
+
if not _is_valid_drawdown(fund_val):
|
| 153 |
+
return False
|
| 154 |
+
|
| 155 |
+
valid = [getattr(f, "max_drawdown", None) for f in peers
|
| 156 |
+
if _is_valid_drawdown(getattr(f, "max_drawdown", None))]
|
| 157 |
+
if not valid:
|
| 158 |
+
return False
|
| 159 |
+
|
| 160 |
+
effective_n = min(n, len(valid))
|
| 161 |
+
valid.sort(reverse=True) # -5 > -20 β -5 is rank-1
|
| 162 |
+
return fund_val >= valid[effective_n - 1]
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
def _bottom_n(fund: Fund, peers: List[Fund], metric: str, n: int = 10) -> bool:
|
| 166 |
+
"""Return True if fund is in the bottom-N (lowest values) for metric."""
|
| 167 |
+
fund_val = getattr(fund, metric, None)
|
| 168 |
+
if not _is_valid(fund_val):
|
| 169 |
+
return False
|
| 170 |
+
|
| 171 |
+
valid = [getattr(f, metric, None) for f in peers
|
| 172 |
+
if _is_valid(getattr(f, metric, None))]
|
| 173 |
+
if len(valid) < 2:
|
| 174 |
+
return False
|
| 175 |
+
|
| 176 |
+
# Match Excel's BOTTOM 10 conditional formatting:
|
| 177 |
+
# "Bottom N items", with N capped at the number of valid funds.
|
| 178 |
+
effective_n = min(n, len(valid))
|
| 179 |
+
valid.sort()
|
| 180 |
+
return fund_val <= valid[effective_n - 1]
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
def _category_avg(peers: List[Fund], metric: str) -> Optional[float]:
|
| 184 |
+
"""Arithmetic mean of valid metric values across peers."""
|
| 185 |
+
vals = [getattr(f, metric, None) for f in peers
|
| 186 |
+
if _is_valid(getattr(f, metric, None))]
|
| 187 |
+
return sum(vals) / len(vals) if vals else None
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
def _light_red(fund: Fund, metric: str, cat_avg: Optional[float]) -> bool:
|
| 191 |
+
"""Return True if the metric triggers a Light Red override for this fund."""
|
| 192 |
+
if metric not in DUAL_CONDITION_RULES:
|
| 193 |
+
return False
|
| 194 |
+
rule_type, threshold = DUAL_CONDITION_RULES[metric]
|
| 195 |
+
val = getattr(fund, metric, None)
|
| 196 |
+
if not _is_valid(val):
|
| 197 |
+
return False
|
| 198 |
+
if rule_type == "below_value":
|
| 199 |
+
return val < threshold
|
| 200 |
+
if rule_type == "below_category_avg":
|
| 201 |
+
return (cat_avg is not None) and (val < cat_avg)
|
| 202 |
+
return False
|
| 203 |
+
|
| 204 |
+
|
| 205 |
+
# βββ Drawdown zero-cell fix βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 206 |
+
|
| 207 |
+
def drawdown_zero_fix(
|
| 208 |
+
funds: List[Fund],
|
| 209 |
+
*,
|
| 210 |
+
verbose: bool = True,
|
| 211 |
+
) -> int:
|
| 212 |
+
"""
|
| 213 |
+
Detect funds whose max_drawdown is exactly 0 (data-quality gap) and
|
| 214 |
+
recompute it from live NAV history via the NAV engine.
|
| 215 |
+
|
| 216 |
+
Strategy
|
| 217 |
+
--------
|
| 218 |
+
1. Collect every fund where max_drawdown == 0 AND the fund has a
|
| 219 |
+
scheme_code (stored in fund.name as a fallback lookup key via CSV).
|
| 220 |
+
In practice the scheme_code lives in the CSV row; the data_engine
|
| 221 |
+
should pass it through. We look for it on fund.fill_status
|
| 222 |
+
(which sometimes carries audit tags) or via a side-channel dict
|
| 223 |
+
passed in by the caller. Most robustly, callers should set
|
| 224 |
+
fund.fill_status = "DRAWDOWN_ZERO" before calling this function,
|
| 225 |
+
OR we scan all funds whose max_drawdown is 0.
|
| 226 |
+
|
| 227 |
+
2. For each such fund, call compute_nav_metrics_for_scheme() requesting
|
| 228 |
+
only ["Maximum Drawdown"].
|
| 229 |
+
|
| 230 |
+
3. If a real negative value comes back, write it to fund.max_drawdown.
|
| 231 |
+
|
| 232 |
+
Returns the count of cells successfully fixed.
|
| 233 |
+
|
| 234 |
+
NOTE: This function requires network access (mfapi.in + yfinance).
|
| 235 |
+
It is intentionally separated from compute_scores() so callers
|
| 236 |
+
can opt in only when enrichment is desired.
|
| 237 |
+
"""
|
| 238 |
+
# Import here to avoid circular dependency at module level
|
| 239 |
+
try:
|
| 240 |
+
from src.nav_metrics_engine import NavEngineCache, compute_nav_metrics_for_scheme
|
| 241 |
+
except ImportError:
|
| 242 |
+
if verbose:
|
| 243 |
+
print("[drawdown_fix] nav_metrics_engine not available β skipping.")
|
| 244 |
+
return 0
|
| 245 |
+
|
| 246 |
+
# Build a name β scheme_code map from fund.fill_status field
|
| 247 |
+
# (data_engine stores scheme codes in fill_status for audit; adjust if needed)
|
| 248 |
+
# Fallback: use the fund name itself as a best-effort search key.
|
| 249 |
+
|
| 250 |
+
DEBT_PREFIXES = ("debt", "liquid", "overnight", "money market", "gilt",
|
| 251 |
+
"fixed maturity", "interval", "fmp")
|
| 252 |
+
|
| 253 |
+
from datetime import datetime as _dt
|
| 254 |
+
_now = _dt.now()
|
| 255 |
+
|
| 256 |
+
def _fund_age_years(f) -> float | None:
|
| 257 |
+
ld = getattr(f, "_launch_date", None)
|
| 258 |
+
if not isinstance(ld, _dt):
|
| 259 |
+
return None
|
| 260 |
+
return (_now - ld).days / 365.25
|
| 261 |
+
|
| 262 |
+
# Import the set of funds already attempted by csv_enrichment NAV phase
|
| 263 |
+
try:
|
| 264 |
+
from src.csv_enrichment import _NAV_ATTEMPTED_FUNDS as _nav_attempted
|
| 265 |
+
except Exception:
|
| 266 |
+
_nav_attempted = set()
|
| 267 |
+
|
| 268 |
+
zero_funds = [
|
| 269 |
+
f for f in funds
|
| 270 |
+
if (
|
| 271 |
+
# Only target funds where drawdown is truly missing (0 or None)
|
| 272 |
+
(f.max_drawdown == 0 or f.max_drawdown is None)
|
| 273 |
+
# AND only equity/hybrid β debt funds have tiny/no drawdown, skip them
|
| 274 |
+
and not any(f.category.lower().startswith(pfx) for pfx in DEBT_PREFIXES)
|
| 275 |
+
# AND fund must be β₯3 years old β younger funds can't have 3Y NAV history
|
| 276 |
+
and (_fund_age_years(f) is None or _fund_age_years(f) >= 3.0)
|
| 277 |
+
# AND skip funds already attempted by csv_enrichment NAV phase β
|
| 278 |
+
# if enrichment couldn't fill MDD, a second pass won't either
|
| 279 |
+
and f.name not in _nav_attempted
|
| 280 |
+
)
|
| 281 |
+
]
|
| 282 |
+
|
| 283 |
+
if not zero_funds:
|
| 284 |
+
if verbose:
|
| 285 |
+
print("[drawdown_fix] No zero/missing drawdown cells found.")
|
| 286 |
+
return 0
|
| 287 |
+
|
| 288 |
+
if verbose:
|
| 289 |
+
print(f"[drawdown_fix] Attempting to fix {len(zero_funds)} drawdown cells β¦")
|
| 290 |
+
|
| 291 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed as _as_completed
|
| 292 |
+
import threading as _threading
|
| 293 |
+
|
| 294 |
+
# Bulk-preload cache before parallel workers start (2 SQL queries instead of N)
|
| 295 |
+
try:
|
| 296 |
+
from src.nav_metrics_engine import _bulk_preload_cache, resolve_benchmark_ticker
|
| 297 |
+
_scheme_codes = [getattr(f, "_scheme_code", None) or "" for f in zero_funds]
|
| 298 |
+
_bench_tickers = [resolve_benchmark_ticker(getattr(f, "benchmark", "") or "") for f in zero_funds]
|
| 299 |
+
_bulk_preload_cache(_scheme_codes, _bench_tickers)
|
| 300 |
+
except Exception:
|
| 301 |
+
pass # graceful degradation β workers will fall back to per-query
|
| 302 |
+
|
| 303 |
+
cache = NavEngineCache()
|
| 304 |
+
fixed = 0
|
| 305 |
+
_lock = _threading.Lock()
|
| 306 |
+
|
| 307 |
+
with_code = [
|
| 308 |
+
(f, getattr(f, "_scheme_code", None) or "", getattr(f, "benchmark", "") or "")
|
| 309 |
+
for f in zero_funds
|
| 310 |
+
if (getattr(f, "_scheme_code", None) or "").strip()
|
| 311 |
+
]
|
| 312 |
+
no_code = [f for f in zero_funds if not (getattr(f, "_scheme_code", None) or "").strip()]
|
| 313 |
+
|
| 314 |
+
if verbose:
|
| 315 |
+
for f in no_code:
|
| 316 |
+
print(f" SKIP {f.name[:55]} β no scheme code available")
|
| 317 |
+
|
| 318 |
+
def _fix_one(args):
|
| 319 |
+
fund, scheme_code, benchmark = args
|
| 320 |
+
metrics, skip = compute_nav_metrics_for_scheme(
|
| 321 |
+
scheme_code=scheme_code,
|
| 322 |
+
benchmark_type=benchmark,
|
| 323 |
+
needed_metrics=["Maximum Drawdown"],
|
| 324 |
+
cache=cache,
|
| 325 |
+
)
|
| 326 |
+
mdd = metrics.get("Maximum Drawdown")
|
| 327 |
+
reason = skip.get("Maximum Drawdown", "unknown")
|
| 328 |
+
return fund, mdd, reason
|
| 329 |
+
|
| 330 |
+
with ThreadPoolExecutor(max_workers=12) as executor:
|
| 331 |
+
futures = {executor.submit(_fix_one, item): item for item in with_code}
|
| 332 |
+
for fut in _as_completed(futures):
|
| 333 |
+
try:
|
| 334 |
+
fund, mdd, reason = fut.result()
|
| 335 |
+
except Exception as e:
|
| 336 |
+
continue
|
| 337 |
+
if mdd is not None and mdd != 0:
|
| 338 |
+
with _lock:
|
| 339 |
+
fund.max_drawdown = mdd
|
| 340 |
+
fixed += 1
|
| 341 |
+
if verbose:
|
| 342 |
+
print(f" FIXED {fund.name[:55]} β MDD = {mdd:.3f}%")
|
| 343 |
+
else:
|
| 344 |
+
if verbose:
|
| 345 |
+
print(f" MISS {fund.name[:55]} β {reason}")
|
| 346 |
+
|
| 347 |
+
if verbose:
|
| 348 |
+
print(f"[drawdown_fix] Done. Fixed {fixed}/{len(zero_funds)} cells.")
|
| 349 |
+
|
| 350 |
+
return fixed
|
| 351 |
+
|
| 352 |
+
|
| 353 |
+
# βββ Main scoring engine ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 354 |
+
|
| 355 |
+
def compute_scores(funds: List[Fund]) -> List[Fund]:
|
| 356 |
+
"""
|
| 357 |
+
Score and rank all funds within their categories.
|
| 358 |
+
|
| 359 |
+
Algorithm
|
| 360 |
+
---------
|
| 361 |
+
For every metric that carries a weight:
|
| 362 |
+
1. Check if the fund is in Top-N or Bottom-N (as appropriate) within
|
| 363 |
+
its category peer group β "Light Green"
|
| 364 |
+
2. If Light Green AND a dual-condition rule fires β "Light Red"
|
| 365 |
+
override: weight contribution = 0
|
| 366 |
+
3. Otherwise if Light Green and NOT Light Red β add weight
|
| 367 |
+
|
| 368 |
+
fund.score is capped at 10.0 (model scale).
|
| 369 |
+
|
| 370 |
+
Also sets:
|
| 371 |
+
fund.rank_in_category β 1 = best within category
|
| 372 |
+
fund.is_top_quartile β True for top βN/4β funds
|
| 373 |
+
|
| 374 |
+
Returns the same list (mutated in-place) for convenience.
|
| 375 |
+
"""
|
| 376 |
+
# Group by category
|
| 377 |
+
categories: Dict[str, List[Fund]] = {}
|
| 378 |
+
for fund in funds:
|
| 379 |
+
categories.setdefault(fund.category, []).append(fund)
|
| 380 |
+
|
| 381 |
+
for cat_name, cat_funds in categories.items():
|
| 382 |
+
|
| 383 |
+
# Pre-compute category averages for CAGR dual-condition rules
|
| 384 |
+
cat_averages = {
|
| 385 |
+
metric: _category_avg(cat_funds, metric)
|
| 386 |
+
for metric in ("cagr_3y", "cagr_5y", "cagr_10y")
|
| 387 |
+
}
|
| 388 |
+
|
| 389 |
+
for fund in cat_funds:
|
| 390 |
+
score = 0.0
|
| 391 |
+
|
| 392 |
+
for metric, weight in WEIGHTS.items():
|
| 393 |
+
is_green = False
|
| 394 |
+
|
| 395 |
+
# ββ Green check ββββββββββββββββββββββββββββββββββββββββββ
|
| 396 |
+
if metric == "max_drawdown":
|
| 397 |
+
is_green = _top_n_drawdown(fund, cat_funds)
|
| 398 |
+
elif metric in TOP_10_METRICS:
|
| 399 |
+
is_green = _top_n(fund, cat_funds, metric)
|
| 400 |
+
elif metric in BOTTOM_10_METRICS:
|
| 401 |
+
is_green = _bottom_n(fund, cat_funds, metric)
|
| 402 |
+
|
| 403 |
+
# ββ Light Red override βββββββββββββββββββββββββββββββββββ
|
| 404 |
+
if is_green and metric in DUAL_CONDITION_RULES:
|
| 405 |
+
cat_avg = cat_averages.get(metric)
|
| 406 |
+
if _light_red(fund, metric, cat_avg):
|
| 407 |
+
is_green = False # zeroed by override
|
| 408 |
+
|
| 409 |
+
if is_green:
|
| 410 |
+
score += weight
|
| 411 |
+
|
| 412 |
+
fund.score = round(min(score, 10.0), 3)
|
| 413 |
+
|
| 414 |
+
# ββ Rank within category βββββββββββββββββββββββββββββββββββββββββ
|
| 415 |
+
sorted_funds = sorted(
|
| 416 |
+
cat_funds,
|
| 417 |
+
key=lambda f: (-(f.score or 0), (f.name or "").lower(), getattr(f, "order", 0)),
|
| 418 |
+
)
|
| 419 |
+
top_quartile_cutoff = max(1, math.ceil(len(sorted_funds) / 4))
|
| 420 |
+
|
| 421 |
+
for rank, fund in enumerate(sorted_funds, start=1):
|
| 422 |
+
fund.rank_in_category = rank
|
| 423 |
+
fund.is_top_quartile = (rank <= top_quartile_cutoff)
|
| 424 |
+
|
| 425 |
+
return funds
|