Spaces:
Build error
Build error
| """ | |
| analysis.py โ NLGๆฅๅใ่กไธๅฝฑๅใ่ฏไผฐๆๆ ใๆถ่ๅฎ้ช | |
| ==================================================== | |
| ไป v2_championship.py ๆๅบ็ๅๆไธ่ฏไผฐๆจกๅใ | |
| """ | |
| import pandas as pd | |
| import numpy as np | |
| from sklearn.linear_model import QuantileRegressor | |
| from sklearn.preprocessing import StandardScaler | |
| from config import FACTOR_GROUPS, INDUSTRIES, INDUSTRY_ZH | |
| # โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| # INDUSTRY IMPACT RULE ENGINE | |
| # โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| def apply_industry_rules(rec): | |
| """ๅบไบ้ฃ้ฉ็ญ็บงใๅ็ฝฎใๅ ๅญ็ญๆจๆญๅ่กไธ็้ฃ้ฉๅๅปบ่ฎฎใ""" | |
| risk_level = rec.get('risk_level', 'Low') | |
| risk_bias = rec.get('risk_bias', 'Balanced') | |
| vol_ratio = rec.get('vol_ratio', 1.0) | |
| top_fac = rec.get('top_factor', 'Unknown') | |
| is_high = risk_level == 'High' | |
| is_medium = risk_level == 'Medium' | |
| is_upward = risk_bias == 'Upward' | |
| is_downward = risk_bias == 'Downward' | |
| wide = vol_ratio > 1.3 | |
| rules = {} | |
| for industry in INDUSTRIES: | |
| ind_risk = 'Low' | |
| ind_action = 'Routine monitoring' | |
| if industry == 'Aviation': | |
| if is_high and is_upward: | |
| ind_risk, ind_action = 'High', 'Increase hedging coverage; review fuel cost budget' | |
| elif is_high: | |
| ind_risk, ind_action = 'High', 'Elevated volatility; prepare contingency liquidity' | |
| elif is_upward: | |
| ind_risk, ind_action = 'Medium-High', 'Monitor fuel cost exposure; consider forward contracts' | |
| elif is_medium: | |
| ind_risk, ind_action = 'Medium', 'Review quarterly fuel hedging strategy' | |
| if top_fac == 'Demand': ind_action += '; demand-driven โ cost pressure may persist' | |
| elif top_fac == 'Supply': ind_action += '; supply-driven โ watch OPEC decisions' | |
| elif top_fac == 'Risk_Geo': ind_action += '; geopolitical risk โ event monitoring' | |
| elif industry == 'Logistics': | |
| if is_high and is_upward: | |
| ind_risk, ind_action = 'Medium-High', 'Review transport cost pass-through; working capital buffer' | |
| elif is_high: | |
| ind_risk, ind_action = 'Medium', 'Monitor diesel/freight cost exposure' | |
| elif is_medium and is_upward: | |
| ind_risk, ind_action = 'Medium', 'Review fuel surcharge mechanisms' | |
| elif industry == 'Chemicals': | |
| if is_upward and top_fac == 'Supply': | |
| ind_risk, ind_action = 'High', 'Feedstock cost pressure; margin compression likely' | |
| elif is_high: | |
| ind_risk, ind_action = 'Medium-High', 'Monitor naphtha/ethylene spread; review procurement' | |
| elif wide: | |
| ind_risk, ind_action = 'Medium', 'Profit uncertainty elevated; scenario planning advised' | |
| elif industry == 'Manufacturing': | |
| if is_high and is_upward: | |
| ind_risk, ind_action = 'High', 'Energy cost surge risk; review energy hedging' | |
| elif is_high: | |
| ind_risk, ind_action = 'Medium', 'Elevated input cost volatility' | |
| elif is_medium: | |
| ind_risk, ind_action = 'Medium', 'Monitor energy procurement costs' | |
| elif industry == 'Upstream_OG': | |
| if is_downward and is_high: | |
| ind_risk, ind_action = 'High', 'Revenue decline risk; review covenant compliance & liquidity' | |
| elif is_downward: | |
| ind_risk, ind_action = 'Medium-High', 'Downside tail expanding; monitor cash flow coverage' | |
| elif is_upward: | |
| ind_risk, ind_action = 'Low', 'Revenue tailwind; capex commitment review advised' | |
| else: | |
| ind_risk, ind_action = 'Low-Medium', 'Balanced outlook' | |
| rules[f'{industry}_risk'] = ind_risk | |
| rules[f'{industry}_action'] = ind_action | |
| return rules | |
| # โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| # REGIME ECONOMIC NARRATIVES | |
| # โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| REGIME_NARRATIVES = { | |
| '2008 ้่ๅฑๆบ': { | |
| 'history': '2008ๅนดๅ จ็้่ๅฑๆบๆ้ด๏ผๆฒนไปทไป$147ๆด่ท่ณ$32๏ผ้ๅน 78%ใ้ๆฑ็ซฏๅดฉๅกๆฏไธปๅ โโๅ จ็GDPๆถ็ผฉใ่ดธๆ้ๅใๅถ้ ไธPMIๆฎ้่ท็ ด40ใ', | |
| 'implication': '้ๆฑๅดฉๅกๆ ผๅฑไธ๏ผไธๆธธๆๆฌ็ซฏไผไธ็ญๆๅ็ไบไฝๆฒนไปท๏ผไฝๆป้ๆฑ่็ผฉๆ็ดฏๆดไฝ่ฅๆถใไธๆธธไผไธ้ขไธดๆๅคงๅฒๅปใ', | |
| 'hedge_advice': 'ไธๆธธไผไธๅบ็ซๅณ้ๅฎ่ฟๆ้ๅฎไปทๆ ผ๏ผไธๆธธไผไธๅฏ้ขไฝๅปบไป๏ผ้ๅฎไฝไปทๅๆใ', | |
| }, | |
| '2014 ้กตๅฒฉๆฒนๅฒๅป': { | |
| 'history': '2014-16ๅนด็พๅฝ้กตๅฒฉๆฒนไบง้็ๅ๏ผ+400ไธๆกถ/ๆฅ๏ผ๏ผๅ ๅ OPECๆ็ปๅไบง๏ผๆฒนไปทไป$110่ท่ณ$26ใไพ็ป่ฟๅฉไธปๅฏผใ', | |
| 'implication': 'ไพ็ป่ฟๅฉๆ ผๅฑๆ็ปญๆถ้ด้ฟ๏ผ18ไธชๆไปฅไธ๏ผ๏ผไธๆธธไผไธๆๆฌไผๅฟๅฏๆ็ปญ๏ผไธๆธธไผไธ้่ฆ้็ปๅบๅกใ็ผฉๅ่ตๆฌๅผๆฏใ', | |
| 'hedge_advice': '้็นๅ ณๆณจ่ฟๆๆฒ็บฟ็ปๆ๏ผcontangoๅ ๆทฑ๏ผ๏ผๅฉ็จๆ่ดง้ไปท็ชๅฃใไธๆธธไผไธๅปถ้ฟ้่ดญๅ็บฆๆ้ใ', | |
| }, | |
| '2020 COVID': { | |
| 'history': '2020ๅนดCOVID-19ๅฏผ่ดๅ จ็้ๆฑๆดๅ2000ไธๆกถ/ๆฅ๏ผWTIๆ่ดงๅๅฒๆง่ท่ณ่ดๅผใ้ๆฑๅฒๅป+ไปๅจๅฑๆบๅ้ๆๅปใ', | |
| 'implication': 'ๆ็ซฏ้ๆฑๅฒๅปไธ๏ผ่ช็ฉบไธๅฎข่ฟ้้90%+๏ผ็ฉๆต้พไธญๆญใไฝๅค่้ๅบฆๅฏ่ฝ่ถ ้ขๆโโVๅๅๅผนๆฏๅๅฒๅธธๆใ', | |
| 'hedge_advice': '็ญๆ๏ผไฟๆ็ฐ้ๆตๅผนๆง๏ผ้ฟๅ ่ฟๅบฆๅฅไฟใไธญๆ๏ผๅ ณๆณจOPEC+ๅ่ฐๅไบงไฟกๅท๏ผ้ขไฝๅปบ็ซๅคๅคดๅคดๅฏธใ', | |
| }, | |
| '2022 ไฟไนๅฒ็ช': { | |
| 'history': '2022ๅนดไฟไนๅฒ็ชๅฏผ่ดไฟ็ฝๆฏๅๆฒนๅบๅฃๅๅถ่ฃ๏ผไพ็ป็ผบๅฃ+ๅฐ็ผๆบขไปทๆจๅๅธไผฆ็น่ณ$130+ใๅฐ็ผๆฟๆฒป+ไพ็ปๅ้ๅฒๅปใ', | |
| 'implication': 'ๅฐ็ผ้ฉฑๅจ็ไปทๆ ผ้ฃๅ้ๅธธ็ช็ถไฝ็ญๆ๏ผ3-6ไธชๆ๏ผ๏ผ้ๅๅถ่ฃ้ๅบๅๆฟไปฃไพ็ป้ๆญฅๆถๅๆบขไปทใ', | |
| 'hedge_advice': 'ไบไปถ้ฉฑๅจ่กๆ ไธญ๏ผๆๆ็ญ็ฅไผไบๆ่ดงโโไนฐๅ ฅ็ๆถจๆๆ้ๅฎไธ้ๆๆฌ๏ผไฟ็ไปทๆ ผๅ่ฝ็ๆถ็็ฉบ้ดใ', | |
| }, | |
| '2023 OPECๅไบง': { | |
| 'history': '2023ๅนดOPEC+ไธปๅจๅไบง200ไธๆกถ/ๆฅ๏ผๆๅบๆฒนไปทๅจ$70-90ๅบ้ดใไพ็ป็ฎก็ๅๅธๅบ๏ผไปทๆ ผๆณขๅจ็่พไฝใ', | |
| 'implication': 'OPECๅไบงๆ ผๅฑไธ๏ผไปทๆ ผๅบ้ดๅฏ้ขๆตๆง่พ้ซ๏ผไฝไธ่ก้ฃ้ฉๆฅ่ชๅไบงๆง่ก็ไธๆปๅ้OPECๅขไบงใ', | |
| 'hedge_advice': 'ไฝๆณขๅจ็ฏๅข้ๅไฝฟ็จ้ถๆๆฌ้ข๏ผcollar๏ผ็ญ็ฅ๏ผ้ๅฎ็ชไปทๆ ผๅธฆใ', | |
| }, | |
| 'ๅธธๆ/ไฝๆณขๅจ': { | |
| 'history': 'ๆฒนไปทๅคไบๅธธๆๆณขๅจๅบ้ด๏ผๆ ๆๆพๅไธๅ ๅญไธปๅฏผใๅธๅบๅคไบไพ้ๅบๆฌๅนณ่กก็ถๆใ', | |
| 'implication': 'ๅธธๆไธๅ ณๆณจ็ปๆๆงๅๅไฟกๅทโโOPECไผ่ฎฎๅณ็ญใ็พๅฝ้ปไบๆฐ่ถๅฟใไธญๅฝPMI่ตฐๅใ', | |
| 'hedge_advice': 'ๅธธๆไธๅฏนๅฒๆฏไพๅฏ้ๅฝ้ไฝ๏ผ25-40%๏ผ๏ผไฝฟ็จไฝๆๆฌๆ่ดง้ไปทๅณๅฏใ', | |
| }, | |
| } | |
| # โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| # NLG REPORT GENERATION (ENHANCED) | |
| # โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| def generate_nlg_report(row): | |
| """็ๆๅๆๆทฑๅบฆ้ฃ้ฉ็ ๅคๆฅๅ๏ผๅซ็ปๆตๅญฆๅไบๅๅฏนๅฒๅปบ่ฎฎใ""" | |
| date = pd.Timestamp(row['test_date']).strftime('%Yๅนด%mๆ') | |
| rl = row['risk_level'] | |
| rb = row['risk_bias'] | |
| top = row.get('top_factor', 'Unknown') | |
| q10 = row['pred_q10_1m'] * 100 | |
| q50 = row['pred_q50_1m'] * 100 | |
| q90 = row['pred_q90_1m'] * 100 | |
| vol = row['pred_vol'] * 100 | |
| rl_zh = {'Low': 'ไฝ', 'Medium': 'ไธญ็ญ', 'High': '้ซ'}.get(rl, rl) | |
| rb_zh = {'Upward': 'ๅไธ่ก', 'Downward': 'ๅไธ่ก', 'Balanced': 'ๅ่กก'}.get(rb, rb) | |
| factor_zh = { | |
| 'Price': 'ไปทๆ ผ่ๅจ', 'Supply': 'ไพ็ป็ซฏ', 'Demand': '้ๆฑ็ซฏ', | |
| 'Risk_Geo': 'ๅฐ็ผๆฟๆฒป/้ฃ้ฉ', 'Technical': 'ๆๆฏ้ข', | |
| }.get(top, top) | |
| # โโ Part 1: ๆ ธๅฟๅคๆญ โโ | |
| summary = ( | |
| f"ใ{date}ๆฒนไปท้ฃ้ฉ็ ๅคใ\n" | |
| f"โ ๆ ธๅฟๅคๆญ๏ผ้ฃ้ฉ็ญ็บง{rl_zh}๏ผๆนๅ{rb_zh}๏ผ็ฑ{factor_zh}ๅ ๅญไธปๅฏผใ\n" | |
| f"โ 1M้ขๆตๅบ้ด๏ผ[{q10:+.1f}%, {q90:+.1f}%]๏ผไธญๆข{q50:+.1f}%๏ผๆณขๅจ็{vol:.1f}%ใ\n" | |
| ) | |
| # 3M | |
| if pd.notna(row.get('pred_q10_3m')): | |
| summary += f"โ 3M้ขๆตๅบ้ด๏ผ[{row['pred_q10_3m']*100:+.1f}%, {row['pred_q90_3m']*100:+.1f}%]๏ผไธญๆข{row['pred_q50_3m']*100:+.1f}%ใ\n" | |
| # CQR | |
| cqr_lo = row.get('cqr_q10_1m', None) | |
| if cqr_lo is not None and pd.notna(cqr_lo): | |
| summary += f"โ CQRๆ กๅๅบ้ด๏ผ[{cqr_lo*100:+.1f}%, {row['cqr_q90_1m']*100:+.1f}%]๏ผๅๅธ่ช็ฑ่ฆ็ไฟ่ฏ๏ผใ\n" | |
| # โโ Part 2: Regime ็ปๆตๅญฆๅไบ โโ | |
| regime = row.get('regime_match', '') | |
| regime_sim = row.get('regime_similarity', 0) | |
| if regime and regime != 'Unknown': | |
| summary += f"\nโถ ๆ ผๅฑ่ฏๅซ๏ผๅฝๅๆๆฅ่ฟใ{regime}ใ๏ผ็ธไผผๅบฆ{regime_sim*100:.0f}%๏ผ\n" | |
| narr = REGIME_NARRATIVES.get(regime, {}) | |
| if narr: | |
| summary += f" ๅๅฒๅ็ ง๏ผ{narr['history']}\n" | |
| summary += f" ๅฝๅๅฏ็คบ๏ผ{narr['implication']}\n" | |
| summary += f" ๅฏนๅฒๅปบ่ฎฎ๏ผ{narr['hedge_advice']}\n" | |
| # โโ Part 3: ่กไธๅฝฑๅ โโ | |
| high_risk = [] | |
| med_risk = [] | |
| for ind in INDUSTRIES: | |
| risk = str(row.get(f'{ind}_risk', 'Low')) | |
| if 'High' in risk: | |
| high_risk.append(INDUSTRY_ZH.get(ind, ind)) | |
| elif 'Medium' in risk: | |
| med_risk.append(INDUSTRY_ZH.get(ind, ind)) | |
| if high_risk: | |
| summary += f"\nโถ ้ซ้ฃ้ฉ่กไธ๏ผ{'ใ'.join(high_risk)}โโๅปบ่ฎฎๆๅๅฅไฟ่ฆ็็่ณ60-80%ใ\n" | |
| if med_risk: | |
| summary += f"โถ ไธญ้ฃ้ฉ่กไธ๏ผ{'ใ'.join(med_risk)}โโๅปบ่ฎฎ็ปดๆ25-50%ๅฅไฟ่ฆ็ใ\n" | |
| if not high_risk and not med_risk: | |
| summary += f"\nโถ ๅ่กไธ้ฃ้ฉๅๅคไบๅฏๆงๆฐดๅนณ๏ผๅปบ่ฎฎ็ปดๆๅธธ่งๅฅไฟๆฏไพใ\n" | |
| # โโ Part 4: ๅๅๆต่ฏ โโ | |
| base = row.get('scenario_base', 0) * 100 | |
| vix = row.get('scenario_vix_shock', 0) * 100 | |
| supply = row.get('scenario_supply_cut', 0) * 100 | |
| demand = row.get('scenario_demand_crash', 0) * 100 | |
| worst = min(supply, demand) | |
| summary += ( | |
| f"\nโถ ๅๅๆต่ฏ๏ผๅบๅ{base:+.1f}% | VIX็ฟปๅ{vix:+.1f}% | " | |
| f"ไพ็ปไธญๆญ{supply:+.1f}% | ้ๆฑๅดฉๅก{demand:+.1f}%\n" | |
| f" ๆๅคงไธ่ก้ฃ้ฉ๏ผ{worst:+.1f}%๏ผๅปบ่ฎฎ้ข็็ธๅบๆตๅจๆง็ผๅฒใ\n" | |
| ) | |
| return summary | |
| def generate_all_reports(results): | |
| """ไธบๆๆๆไปฝ็ๆ NLG ๆฅๅใ""" | |
| reports = {} | |
| for _, row in results.iterrows(): | |
| dt = pd.Timestamp(row['test_date']).strftime('%Y-%m') | |
| reports[dt] = generate_nlg_report(row) | |
| return reports | |
| # โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| # EVALUATION METRICS | |
| # โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| def evaluate_results(results): | |
| """่ฎก็ฎๅ จ้ข็่ฏไผฐๆๆ ๅนถๆๅฐใ""" | |
| # ๅช่ฏไผฐๆๅฎ้ ๅผ็ๆไปฝ๏ผๆ้ค live forecast๏ผ | |
| eval_mask = results['actual_ret_1m'].notna() | |
| results = results[eval_mask].copy() | |
| ar = results['actual_ret_1m'].values | |
| av = results['actual_vol'].values | |
| n = len(results) | |
| print(f"\n{'='*65}") | |
| print("V2 CHAMPIONSHIP โ EVALUATION") | |
| print("=" * 65) | |
| # 1M Interval | |
| print(f"\n--- 1M INTERVAL ---") | |
| models = [('QR (vol-adapt)', 'pred'), ('LightGBM', 'lgb')] | |
| if 'cqr_q10_1m' in results.columns: | |
| models.append(('Conformal QR', 'cqr')) | |
| for model_name, prefix in models: | |
| q10 = results[f'{prefix}_q10_1m'].values | |
| q90 = results[f'{prefix}_q90_1m'].values | |
| cov = ((ar >= q10) & (ar <= q90)).mean() | |
| wis = ((q90-q10) + (2/0.2)*np.maximum(q10-ar, 0) + (2/0.2)*np.maximum(ar-q90, 0)).mean() | |
| vm = np.median(av) | |
| hi = av > vm | |
| cov_hi = ((ar[hi] >= q10[hi]) & (ar[hi] <= q90[hi])).mean() if hi.sum() > 0 else 0 | |
| print(f" {model_name:<18} Cov={cov:.1%} HiCov={cov_hi:.1%} WIS={wis:.4f}") | |
| nq10 = np.full(n, np.quantile(ar, 0.10)) | |
| nq90 = np.full(n, np.quantile(ar, 0.90)) | |
| naive_wis = ((nq90-nq10) + (2/0.2)*np.maximum(nq10-ar, 0) + (2/0.2)*np.maximum(ar-nq90, 0)).mean() | |
| print(f" {'Naive':<18} WIS={naive_wis:.4f}") | |
| # 3M | |
| print(f"\n--- 3M INTERVAL ---") | |
| m3 = results['actual_ret_3m'].notna() | |
| if m3.sum() > 10: | |
| ar3 = results.loc[m3, 'actual_ret_3m'].values | |
| q10_3 = results.loc[m3, 'pred_q10_3m'].values | |
| q90_3 = results.loc[m3, 'pred_q90_3m'].values | |
| cov3 = ((ar3 >= q10_3) & (ar3 <= q90_3)).mean() | |
| wis3 = ((q90_3-q10_3) + (2/0.2)*np.maximum(q10_3-ar3, 0) + (2/0.2)*np.maximum(ar3-q90_3, 0)).mean() | |
| print(f" QR 3M (vol-adapt) Cov={cov3:.1%} WIS={wis3:.4f} n={m3.sum()}") | |
| # Vol | |
| print(f"\n--- VOL SCORE ---") | |
| pv = results['pred_vol'].values | |
| bl = results['baseline_ewma'].values | |
| for nm, prd in [('V2 BL+Resid', pv), ('EWMA', bl)]: | |
| from sklearn.metrics import mean_squared_error | |
| rmse = np.sqrt(mean_squared_error(av, prd)) | |
| corr = np.corrcoef(av, prd)[0, 1] | |
| print(f" {nm:<18} RMSE={rmse:.4f} corr={corr:+.3f}") | |
| # Risk levels | |
| print(f"\n--- RISK LEVELS ---") | |
| for lvl in ['Low', 'Medium', 'High']: | |
| mask = results['risk_level'] == lvl | |
| if mask.sum() > 0: | |
| print(f" {lvl:<8}: vol={av[mask].mean():.4f} n={mask.sum()}") | |
| # Factor frequency | |
| print(f"\n--- FACTOR FREQ ---") | |
| if 'top_factor' in results.columns: | |
| for fac, cnt in results['top_factor'].value_counts().items(): | |
| print(f" {fac:<12}: {cnt} ({cnt/n:.1%})") | |
| # SHAP | |
| print(f"\n--- SHAP (avg) ---") | |
| for g in FACTOR_GROUPS: | |
| col = f'shap_{g}' | |
| if col in results.columns: | |
| print(f" {g:<12}: {results[col].abs().mean():.4f}") | |
| # Scenario | |
| print(f"\n--- SCENARIO (latest) ---") | |
| lat = results.iloc[-1] | |
| print(f" Base: {lat['scenario_base']*100:+.1f}%") | |
| print(f" VIX shock: {lat['scenario_vix_shock']*100:+.1f}%") | |
| print(f" Supply cut: {lat['scenario_supply_cut']*100:+.1f}%") | |
| print(f" Demand crash: {lat['scenario_demand_crash']*100:+.1f}%") | |
| # NLG sample | |
| print(f"\n--- NLG REPORT (latest) ---") | |
| print(generate_nlg_report(results.iloc[-1])) | |
| # โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| # ABLATION EXPERIMENTS | |
| # โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| def _run_qr_eval(panel, feat_list, train_window=120): | |
| """ๅ ้จ่พ ๅฉ๏ผๅฏน็ปๅฎ็นๅพๅญ้่ท walk-forward QR ๅนถ่ฟๅ (cov, wis, n)ใ""" | |
| hits, totals, wis_list = 0, 0, [] | |
| for i in range(train_window, len(panel) - 1): | |
| train_df = panel.iloc[max(0, i - train_window):i] | |
| test_df = panel.iloc[i:i + 1] | |
| avail = [f for f in feat_list if f in train_df.columns and train_df[f].notna().mean() > 0.8] | |
| if len(avail) < 2: | |
| continue | |
| X_tr = train_df[avail].fillna(train_df[avail].median()) | |
| X_te = test_df[avail].fillna(train_df[avail].median()) | |
| sc = StandardScaler() | |
| X_tr_s = sc.fit_transform(X_tr) | |
| X_te_s = sc.transform(X_te) | |
| y = train_df['target_ret_1m'].dropna() | |
| mask = y.index.isin(X_tr.index) | |
| y = y[mask] | |
| X_tr_s = X_tr_s[:len(y)] | |
| actual = panel['target_ret_1m'].iloc[i] | |
| if np.isnan(actual): | |
| continue | |
| try: | |
| qr10 = QuantileRegressor(quantile=0.10, alpha=0.1, solver='highs') | |
| qr90 = QuantileRegressor(quantile=0.90, alpha=0.1, solver='highs') | |
| qr10.fit(X_tr_s, y) | |
| qr90.fit(X_tr_s, y) | |
| p10, p90 = qr10.predict(X_te_s)[0], qr90.predict(X_te_s)[0] | |
| if p10 > p90: p10, p90 = p90, p10 | |
| hit = 1 if p10 <= actual <= p90 else 0 | |
| hits += hit | |
| totals += 1 | |
| wis_list.append((p90-p10) + (2/0.2)*max(p10-actual, 0) + (2/0.2)*max(actual-p90, 0)) | |
| except: | |
| continue | |
| cov = hits / totals if totals > 0 else 0 | |
| wis = np.mean(wis_list) if wis_list else 999 | |
| return cov, wis, totals | |
| def run_ablation(panel, features): | |
| """่ฟ่กๆถ่ๅฎ้ช๏ผ่ฎญ็ป็ชๅฃ + ๅ ๅญ็ป leave-one-outใ""" | |
| print(f"\n--- ABLATION EXPERIMENTS ---") | |
| ablation_results = [] | |
| # โโ Part 1: Window ablation โโ | |
| print(" [็ชๅฃๆถ่]") | |
| for w in [84, 120, 180]: | |
| cov, wis, n_ab = _run_qr_eval(panel, features, train_window=w) | |
| ablation_results.append({ | |
| 'type': 'window', 'param': w, 'param_label': f'{w}ๆ', | |
| 'cov': round(cov, 3), 'wis': round(wis, 4), 'n': n_ab | |
| }) | |
| print(f" Window={w:>3}: Cov={cov:.1%} WIS={wis:.4f} n={n_ab}") | |
| # โโ Part 2: Factor-group leave-one-out โโ | |
| print(" [ๅ ๅญ็ปๆถ่ โ Leave-One-Out]") | |
| # Baseline: all factors | |
| base_cov, base_wis, base_n = _run_qr_eval(panel, features) | |
| ablation_results.append({ | |
| 'type': 'factor_group', 'param': 'ALL', 'param_label': 'ๅ จ้จๅ ๅญ', | |
| 'cov': round(base_cov, 3), 'wis': round(base_wis, 4), 'n': base_n | |
| }) | |
| print(f" ALL (baseline): Cov={base_cov:.1%} WIS={base_wis:.4f}") | |
| group_zh = {'Price': 'ไปทๆ ผ่ๅจ', 'Supply': 'ไพ็ป็ซฏ', 'Demand': '้ๆฑ็ซฏ', | |
| 'Risk_Geo': 'ๅฐ็ผ/้ฃ้ฉ', 'Technical': 'ๆๆฏ้ข'} | |
| for group, members in FACTOR_GROUPS.items(): | |
| # Remove this group's features | |
| reduced = [f for f in features if f not in members] | |
| if len(reduced) < 2: | |
| continue | |
| cov, wis, n_ab = _run_qr_eval(panel, reduced) | |
| delta_cov = cov - base_cov | |
| delta_wis = wis - base_wis | |
| ablation_results.append({ | |
| 'type': 'factor_group', 'param': group, | |
| 'param_label': f'ๅป้ค{group_zh.get(group, group)}', | |
| 'cov': round(cov, 3), 'wis': round(wis, 4), 'n': n_ab, | |
| 'delta_cov': round(delta_cov, 3), 'delta_wis': round(delta_wis, 4), | |
| }) | |
| print(f" -{group:<12}: Cov={cov:.1%} (ฮ{delta_cov:+.1%}) " | |
| f"WIS={wis:.4f} (ฮ{delta_wis:+.4f})") | |
| return ablation_results | |