oilverse-api / core /analysis.py
ๅญ™ๅฎถๆ˜Ž
deploy: OilVerse for HuggingFace (Node.js 18 fix)
fab9847
"""
analysis.py โ€” NLGๆŠฅๅ‘Šใ€่กŒไธšๅฝฑๅ“ใ€่ฏ„ไผฐๆŒ‡ๆ ‡ใ€ๆถˆ่žๅฎž้ชŒ
====================================================
ไปŽ v2_championship.py ๆ‹†ๅ‡บ็š„ๅˆ†ๆžไธŽ่ฏ„ไผฐๆจกๅ—ใ€‚
"""
import pandas as pd
import numpy as np
from sklearn.linear_model import QuantileRegressor
from sklearn.preprocessing import StandardScaler
from config import FACTOR_GROUPS, INDUSTRIES, INDUSTRY_ZH
# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
# INDUSTRY IMPACT RULE ENGINE
# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
def apply_industry_rules(rec):
"""ๅŸบไบŽ้ฃŽ้™ฉ็ญ‰็บงใ€ๅ็ฝฎใ€ๅ› ๅญ็ญ‰ๆŽจๆ–ญๅ„่กŒไธš็š„้ฃŽ้™ฉๅ’Œๅปบ่ฎฎใ€‚"""
risk_level = rec.get('risk_level', 'Low')
risk_bias = rec.get('risk_bias', 'Balanced')
vol_ratio = rec.get('vol_ratio', 1.0)
top_fac = rec.get('top_factor', 'Unknown')
is_high = risk_level == 'High'
is_medium = risk_level == 'Medium'
is_upward = risk_bias == 'Upward'
is_downward = risk_bias == 'Downward'
wide = vol_ratio > 1.3
rules = {}
for industry in INDUSTRIES:
ind_risk = 'Low'
ind_action = 'Routine monitoring'
if industry == 'Aviation':
if is_high and is_upward:
ind_risk, ind_action = 'High', 'Increase hedging coverage; review fuel cost budget'
elif is_high:
ind_risk, ind_action = 'High', 'Elevated volatility; prepare contingency liquidity'
elif is_upward:
ind_risk, ind_action = 'Medium-High', 'Monitor fuel cost exposure; consider forward contracts'
elif is_medium:
ind_risk, ind_action = 'Medium', 'Review quarterly fuel hedging strategy'
if top_fac == 'Demand': ind_action += '; demand-driven โ†’ cost pressure may persist'
elif top_fac == 'Supply': ind_action += '; supply-driven โ†’ watch OPEC decisions'
elif top_fac == 'Risk_Geo': ind_action += '; geopolitical risk โ†’ event monitoring'
elif industry == 'Logistics':
if is_high and is_upward:
ind_risk, ind_action = 'Medium-High', 'Review transport cost pass-through; working capital buffer'
elif is_high:
ind_risk, ind_action = 'Medium', 'Monitor diesel/freight cost exposure'
elif is_medium and is_upward:
ind_risk, ind_action = 'Medium', 'Review fuel surcharge mechanisms'
elif industry == 'Chemicals':
if is_upward and top_fac == 'Supply':
ind_risk, ind_action = 'High', 'Feedstock cost pressure; margin compression likely'
elif is_high:
ind_risk, ind_action = 'Medium-High', 'Monitor naphtha/ethylene spread; review procurement'
elif wide:
ind_risk, ind_action = 'Medium', 'Profit uncertainty elevated; scenario planning advised'
elif industry == 'Manufacturing':
if is_high and is_upward:
ind_risk, ind_action = 'High', 'Energy cost surge risk; review energy hedging'
elif is_high:
ind_risk, ind_action = 'Medium', 'Elevated input cost volatility'
elif is_medium:
ind_risk, ind_action = 'Medium', 'Monitor energy procurement costs'
elif industry == 'Upstream_OG':
if is_downward and is_high:
ind_risk, ind_action = 'High', 'Revenue decline risk; review covenant compliance & liquidity'
elif is_downward:
ind_risk, ind_action = 'Medium-High', 'Downside tail expanding; monitor cash flow coverage'
elif is_upward:
ind_risk, ind_action = 'Low', 'Revenue tailwind; capex commitment review advised'
else:
ind_risk, ind_action = 'Low-Medium', 'Balanced outlook'
rules[f'{industry}_risk'] = ind_risk
rules[f'{industry}_action'] = ind_action
return rules
# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
# REGIME ECONOMIC NARRATIVES
# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
REGIME_NARRATIVES = {
'2008 ้‡‘่žๅฑๆœบ': {
'history': '2008ๅนดๅ…จ็ƒ้‡‘่žๅฑๆœบๆœŸ้—ด๏ผŒๆฒนไปทไปŽ$147ๆšด่ทŒ่‡ณ$32๏ผŒ้™ๅน…78%ใ€‚้œ€ๆฑ‚็ซฏๅดฉๅกŒๆ˜ฏไธปๅ› โ€”โ€”ๅ…จ็ƒGDPๆ”ถ็ผฉใ€่ดธๆ˜“้”ๅ‡ใ€ๅˆถ้€ ไธšPMIๆ™ฎ้่ทŒ็ ด40ใ€‚',
'implication': '้œ€ๆฑ‚ๅดฉๅกŒๆ ผๅฑ€ไธ‹๏ผŒไธ‹ๆธธๆˆๆœฌ็ซฏไผไธš็ŸญๆœŸๅ—็›ŠไบŽไฝŽๆฒนไปท๏ผŒไฝ†ๆ€ป้œ€ๆฑ‚่Ž็ผฉๆ‹–็ดฏๆ•ดไฝ“่ฅๆ”ถใ€‚ไธŠๆธธไผไธš้ขไธดๆœ€ๅคงๅ†ฒๅ‡ปใ€‚',
'hedge_advice': 'ไธŠๆธธไผไธšๅบ”็ซ‹ๅณ้”ๅฎš่ฟœๆœŸ้”€ๅ”ฎไปทๆ ผ๏ผ›ไธ‹ๆธธไผไธšๅฏ้€ขไฝŽๅปบไป“๏ผŒ้”ๅฎšไฝŽไปทๅŽŸๆ–™ใ€‚',
},
'2014 ้กตๅฒฉๆฒนๅ†ฒๅ‡ป': {
'history': '2014-16ๅนด็พŽๅ›ฝ้กตๅฒฉๆฒนไบง้‡็ˆ†ๅ‘๏ผˆ+400ไธ‡ๆกถ/ๆ—ฅ๏ผ‰๏ผŒๅ ๅŠ OPECๆ‹’็ปๅ‡ไบง๏ผŒๆฒนไปทไปŽ$110่ทŒ่‡ณ$26ใ€‚ไพ›็ป™่ฟ‡ๅ‰ฉไธปๅฏผใ€‚',
'implication': 'ไพ›็ป™่ฟ‡ๅ‰ฉๆ ผๅฑ€ๆŒ็ปญๆ—ถ้—ด้•ฟ๏ผˆ18ไธชๆœˆไปฅไธŠ๏ผ‰๏ผŒไธ‹ๆธธไผไธšๆˆๆœฌไผ˜ๅŠฟๅฏๆŒ็ปญ๏ผ›ไธŠๆธธไผไธš้œ€่ฆ้‡็ป„ๅ€บๅŠกใ€็ผฉๅ‡่ต„ๆœฌๅผ€ๆ”ฏใ€‚',
'hedge_advice': '้‡็‚นๅ…ณๆณจ่ฟœๆœŸๆ›ฒ็บฟ็ป“ๆž„๏ผˆcontangoๅŠ ๆทฑ๏ผ‰๏ผŒๅˆฉ็”จๆœŸ่ดง้”ไปท็ช—ๅฃใ€‚ไธ‹ๆธธไผไธšๅปถ้•ฟ้‡‡่ดญๅˆ็บฆๆœŸ้™ใ€‚',
},
'2020 COVID': {
'history': '2020ๅนดCOVID-19ๅฏผ่‡ดๅ…จ็ƒ้œ€ๆฑ‚ๆšดๅ‡2000ไธ‡ๆกถ/ๆ—ฅ๏ผŒWTIๆœŸ่ดงๅކๅฒๆ€ง่ทŒ่‡ณ่ดŸๅ€ผใ€‚้œ€ๆฑ‚ๅ†ฒๅ‡ป+ไป“ๅ‚จๅฑๆœบๅŒ้‡ๆ‰“ๅ‡ปใ€‚',
'implication': 'ๆž็ซฏ้œ€ๆฑ‚ๅ†ฒๅ‡ปไธ‹๏ผŒ่ˆช็ฉบไธšๅฎข่ฟ้‡้™90%+๏ผŒ็‰ฉๆต้“พไธญๆ–ญใ€‚ไฝ†ๅค่‹้€Ÿๅบฆๅฏ่ƒฝ่ถ…้ข„ๆœŸโ€”โ€”Vๅž‹ๅๅผนๆ˜ฏๅކๅฒๅธธๆ€ใ€‚',
'hedge_advice': '็ŸญๆœŸ๏ผšไฟๆŒ็Žฐ้‡‘ๆตๅผนๆ€ง๏ผŒ้ฟๅ…่ฟ‡ๅบฆๅฅ—ไฟใ€‚ไธญๆœŸ๏ผšๅ…ณๆณจOPEC+ๅ่ฐƒๅ‡ไบงไฟกๅท๏ผŒ้€ขไฝŽๅปบ็ซ‹ๅคšๅคดๅคดๅฏธใ€‚',
},
'2022 ไฟ„ไนŒๅ†ฒ็ช': {
'history': '2022ๅนดไฟ„ไนŒๅ†ฒ็ชๅฏผ่‡ดไฟ„็ฝ—ๆ–ฏๅŽŸๆฒนๅ‡บๅฃๅ—ๅˆถ่ฃ๏ผŒไพ›็ป™็ผบๅฃ+ๅœฐ็ผ˜ๆบขไปทๆŽจๅ‡ๅธƒไผฆ็‰น่‡ณ$130+ใ€‚ๅœฐ็ผ˜ๆ”ฟๆฒป+ไพ›็ป™ๅŒ้‡ๅ†ฒๅ‡ปใ€‚',
'implication': 'ๅœฐ็ผ˜้ฉฑๅŠจ็š„ไปทๆ ผ้ฃ™ๅ‡้€šๅธธ็ช็„ถไฝ†็Ÿญๆš‚๏ผˆ3-6ไธชๆœˆ๏ผ‰๏ผŒ้šๅŽๅˆถ่ฃ้€‚ๅบ”ๅ’Œๆ›ฟไปฃไพ›็ป™้€ๆญฅๆถˆๅŒ–ๆบขไปทใ€‚',
'hedge_advice': 'ไบ‹ไปถ้ฉฑๅŠจ่กŒๆƒ…ไธญ๏ผŒๆœŸๆƒ็ญ–็•ฅไผ˜ไบŽๆœŸ่ดงโ€”โ€”ไนฐๅ…ฅ็œ‹ๆถจๆœŸๆƒ้”ๅฎšไธŠ้™ๆˆๆœฌ๏ผŒไฟ็•™ไปทๆ ผๅ›ž่ฝ็š„ๆ”ถ็›Š็ฉบ้—ดใ€‚',
},
'2023 OPECๅ‡ไบง': {
'history': '2023ๅนดOPEC+ไธปๅŠจๅ‡ไบง200ไธ‡ๆกถ/ๆ—ฅ๏ผŒๆ‰˜ๅบ•ๆฒนไปทๅœจ$70-90ๅŒบ้—ดใ€‚ไพ›็ป™็ฎก็†ๅž‹ๅธ‚ๅœบ๏ผŒไปทๆ ผๆณขๅŠจ็އ่พƒไฝŽใ€‚',
'implication': 'OPECๅ‡ไบงๆ ผๅฑ€ไธ‹๏ผŒไปทๆ ผๅŒบ้—ดๅฏ้ข„ๆต‹ๆ€ง่พƒ้ซ˜๏ผŒไฝ†ไธ‹่กŒ้ฃŽ้™ฉๆฅ่‡ชๅ‡ไบงๆ‰ง่กŒ็އไธ‹ๆป‘ๅ’Œ้žOPECๅขžไบงใ€‚',
'hedge_advice': 'ไฝŽๆณขๅŠจ็Žฏๅขƒ้€‚ๅˆไฝฟ็”จ้›ถๆˆๆœฌ้ข†๏ผˆcollar๏ผ‰็ญ–็•ฅ๏ผŒ้”ๅฎš็ช„ไปทๆ ผๅธฆใ€‚',
},
'ๅธธๆ€/ไฝŽๆณขๅŠจ': {
'history': 'ๆฒนไปทๅค„ไบŽๅธธๆ€ๆณขๅŠจๅŒบ้—ด๏ผŒๆ— ๆ˜Žๆ˜พๅ•ไธ€ๅ› ๅญไธปๅฏผใ€‚ๅธ‚ๅœบๅค„ไบŽไพ›้œ€ๅŸบๆœฌๅนณ่กก็Šถๆ€ใ€‚',
'implication': 'ๅธธๆ€ไธ‹ๅ…ณๆณจ็ป“ๆž„ๆ€งๅ˜ๅŒ–ไฟกๅทโ€”โ€”OPECไผš่ฎฎๅ†ณ็ญ–ใ€็พŽๅ›ฝ้’ปไบ•ๆ•ฐ่ถ‹ๅŠฟใ€ไธญๅ›ฝPMI่ตฐๅ‘ใ€‚',
'hedge_advice': 'ๅธธๆ€ไธ‹ๅฏนๅ†ฒๆฏ”ไพ‹ๅฏ้€‚ๅฝ“้™ไฝŽ๏ผˆ25-40%๏ผ‰๏ผŒไฝฟ็”จไฝŽๆˆๆœฌๆœŸ่ดง้”ไปทๅณๅฏใ€‚',
},
}
# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
# NLG REPORT GENERATION (ENHANCED)
# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
def generate_nlg_report(row):
"""็”Ÿๆˆๅ•ๆœˆๆทฑๅบฆ้ฃŽ้™ฉ็ ”ๅˆคๆŠฅๅ‘Š๏ผŒๅซ็ปๆตŽๅญฆๅ™ไบ‹ๅ’Œๅฏนๅ†ฒๅปบ่ฎฎใ€‚"""
date = pd.Timestamp(row['test_date']).strftime('%Yๅนด%mๆœˆ')
rl = row['risk_level']
rb = row['risk_bias']
top = row.get('top_factor', 'Unknown')
q10 = row['pred_q10_1m'] * 100
q50 = row['pred_q50_1m'] * 100
q90 = row['pred_q90_1m'] * 100
vol = row['pred_vol'] * 100
rl_zh = {'Low': 'ไฝŽ', 'Medium': 'ไธญ็ญ‰', 'High': '้ซ˜'}.get(rl, rl)
rb_zh = {'Upward': 'ๅไธŠ่กŒ', 'Downward': 'ๅไธ‹่กŒ', 'Balanced': 'ๅ‡่กก'}.get(rb, rb)
factor_zh = {
'Price': 'ไปทๆ ผ่”ๅŠจ', 'Supply': 'ไพ›็ป™็ซฏ', 'Demand': '้œ€ๆฑ‚็ซฏ',
'Risk_Geo': 'ๅœฐ็ผ˜ๆ”ฟๆฒป/้ฃŽ้™ฉ', 'Technical': 'ๆŠ€ๆœฏ้ข',
}.get(top, top)
# โ”€โ”€ Part 1: ๆ ธๅฟƒๅˆคๆ–ญ โ”€โ”€
summary = (
f"ใ€{date}ๆฒนไปท้ฃŽ้™ฉ็ ”ๅˆคใ€‘\n"
f"โ–  ๆ ธๅฟƒๅˆคๆ–ญ๏ผš้ฃŽ้™ฉ็ญ‰็บง{rl_zh}๏ผŒๆ–นๅ‘{rb_zh}๏ผŒ็”ฑ{factor_zh}ๅ› ๅญไธปๅฏผใ€‚\n"
f"โ–  1M้ข„ๆต‹ๅŒบ้—ด๏ผš[{q10:+.1f}%, {q90:+.1f}%]๏ผŒไธญๆžข{q50:+.1f}%๏ผŒๆณขๅŠจ็އ{vol:.1f}%ใ€‚\n"
)
# 3M
if pd.notna(row.get('pred_q10_3m')):
summary += f"โ–  3M้ข„ๆต‹ๅŒบ้—ด๏ผš[{row['pred_q10_3m']*100:+.1f}%, {row['pred_q90_3m']*100:+.1f}%]๏ผŒไธญๆžข{row['pred_q50_3m']*100:+.1f}%ใ€‚\n"
# CQR
cqr_lo = row.get('cqr_q10_1m', None)
if cqr_lo is not None and pd.notna(cqr_lo):
summary += f"โ–  CQRๆ กๅ‡†ๅŒบ้—ด๏ผš[{cqr_lo*100:+.1f}%, {row['cqr_q90_1m']*100:+.1f}%]๏ผˆๅˆ†ๅธƒ่‡ช็”ฑ่ฆ†็›–ไฟ่ฏ๏ผ‰ใ€‚\n"
# โ”€โ”€ Part 2: Regime ็ปๆตŽๅญฆๅ™ไบ‹ โ”€โ”€
regime = row.get('regime_match', '')
regime_sim = row.get('regime_similarity', 0)
if regime and regime != 'Unknown':
summary += f"\nโ–ถ ๆ ผๅฑ€่ฏ†ๅˆซ๏ผšๅฝ“ๅ‰ๆœ€ๆŽฅ่ฟ‘ใ€Œ{regime}ใ€๏ผˆ็›ธไผผๅบฆ{regime_sim*100:.0f}%๏ผ‰\n"
narr = REGIME_NARRATIVES.get(regime, {})
if narr:
summary += f" ๅކๅฒๅ‚็…ง๏ผš{narr['history']}\n"
summary += f" ๅฝ“ๅ‰ๅฏ็คบ๏ผš{narr['implication']}\n"
summary += f" ๅฏนๅ†ฒๅปบ่ฎฎ๏ผš{narr['hedge_advice']}\n"
# โ”€โ”€ Part 3: ่กŒไธšๅฝฑๅ“ โ”€โ”€
high_risk = []
med_risk = []
for ind in INDUSTRIES:
risk = str(row.get(f'{ind}_risk', 'Low'))
if 'High' in risk:
high_risk.append(INDUSTRY_ZH.get(ind, ind))
elif 'Medium' in risk:
med_risk.append(INDUSTRY_ZH.get(ind, ind))
if high_risk:
summary += f"\nโ–ถ ้ซ˜้ฃŽ้™ฉ่กŒไธš๏ผš{'ใ€'.join(high_risk)}โ€”โ€”ๅปบ่ฎฎๆๅ‡ๅฅ—ไฟ่ฆ†็›–็އ่‡ณ60-80%ใ€‚\n"
if med_risk:
summary += f"โ–ถ ไธญ้ฃŽ้™ฉ่กŒไธš๏ผš{'ใ€'.join(med_risk)}โ€”โ€”ๅปบ่ฎฎ็ปดๆŒ25-50%ๅฅ—ไฟ่ฆ†็›–ใ€‚\n"
if not high_risk and not med_risk:
summary += f"\nโ–ถ ๅ„่กŒไธš้ฃŽ้™ฉๅ‡ๅค„ไบŽๅฏๆŽงๆฐดๅนณ๏ผŒๅปบ่ฎฎ็ปดๆŒๅธธ่ง„ๅฅ—ไฟๆฏ”ไพ‹ใ€‚\n"
# โ”€โ”€ Part 4: ๅŽ‹ๅŠ›ๆต‹่ฏ• โ”€โ”€
base = row.get('scenario_base', 0) * 100
vix = row.get('scenario_vix_shock', 0) * 100
supply = row.get('scenario_supply_cut', 0) * 100
demand = row.get('scenario_demand_crash', 0) * 100
worst = min(supply, demand)
summary += (
f"\nโ–ถ ๅŽ‹ๅŠ›ๆต‹่ฏ•๏ผšๅŸบๅ‡†{base:+.1f}% | VIX็ฟปๅ€{vix:+.1f}% | "
f"ไพ›็ป™ไธญๆ–ญ{supply:+.1f}% | ้œ€ๆฑ‚ๅดฉๅกŒ{demand:+.1f}%\n"
f" ๆœ€ๅคงไธ‹่กŒ้ฃŽ้™ฉ๏ผš{worst:+.1f}%๏ผŒๅปบ่ฎฎ้ข„็•™็›ธๅบ”ๆตๅŠจๆ€ง็ผ“ๅ†ฒใ€‚\n"
)
return summary
def generate_all_reports(results):
"""ไธบๆ‰€ๆœ‰ๆœˆไปฝ็”Ÿๆˆ NLG ๆŠฅๅ‘Šใ€‚"""
reports = {}
for _, row in results.iterrows():
dt = pd.Timestamp(row['test_date']).strftime('%Y-%m')
reports[dt] = generate_nlg_report(row)
return reports
# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
# EVALUATION METRICS
# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
def evaluate_results(results):
"""่ฎก็ฎ—ๅ…จ้ข็š„่ฏ„ไผฐๆŒ‡ๆ ‡ๅนถๆ‰“ๅฐใ€‚"""
# ๅช่ฏ„ไผฐๆœ‰ๅฎž้™…ๅ€ผ็š„ๆœˆไปฝ๏ผˆๆŽ’้™ค live forecast๏ผ‰
eval_mask = results['actual_ret_1m'].notna()
results = results[eval_mask].copy()
ar = results['actual_ret_1m'].values
av = results['actual_vol'].values
n = len(results)
print(f"\n{'='*65}")
print("V2 CHAMPIONSHIP โ€” EVALUATION")
print("=" * 65)
# 1M Interval
print(f"\n--- 1M INTERVAL ---")
models = [('QR (vol-adapt)', 'pred'), ('LightGBM', 'lgb')]
if 'cqr_q10_1m' in results.columns:
models.append(('Conformal QR', 'cqr'))
for model_name, prefix in models:
q10 = results[f'{prefix}_q10_1m'].values
q90 = results[f'{prefix}_q90_1m'].values
cov = ((ar >= q10) & (ar <= q90)).mean()
wis = ((q90-q10) + (2/0.2)*np.maximum(q10-ar, 0) + (2/0.2)*np.maximum(ar-q90, 0)).mean()
vm = np.median(av)
hi = av > vm
cov_hi = ((ar[hi] >= q10[hi]) & (ar[hi] <= q90[hi])).mean() if hi.sum() > 0 else 0
print(f" {model_name:<18} Cov={cov:.1%} HiCov={cov_hi:.1%} WIS={wis:.4f}")
nq10 = np.full(n, np.quantile(ar, 0.10))
nq90 = np.full(n, np.quantile(ar, 0.90))
naive_wis = ((nq90-nq10) + (2/0.2)*np.maximum(nq10-ar, 0) + (2/0.2)*np.maximum(ar-nq90, 0)).mean()
print(f" {'Naive':<18} WIS={naive_wis:.4f}")
# 3M
print(f"\n--- 3M INTERVAL ---")
m3 = results['actual_ret_3m'].notna()
if m3.sum() > 10:
ar3 = results.loc[m3, 'actual_ret_3m'].values
q10_3 = results.loc[m3, 'pred_q10_3m'].values
q90_3 = results.loc[m3, 'pred_q90_3m'].values
cov3 = ((ar3 >= q10_3) & (ar3 <= q90_3)).mean()
wis3 = ((q90_3-q10_3) + (2/0.2)*np.maximum(q10_3-ar3, 0) + (2/0.2)*np.maximum(ar3-q90_3, 0)).mean()
print(f" QR 3M (vol-adapt) Cov={cov3:.1%} WIS={wis3:.4f} n={m3.sum()}")
# Vol
print(f"\n--- VOL SCORE ---")
pv = results['pred_vol'].values
bl = results['baseline_ewma'].values
for nm, prd in [('V2 BL+Resid', pv), ('EWMA', bl)]:
from sklearn.metrics import mean_squared_error
rmse = np.sqrt(mean_squared_error(av, prd))
corr = np.corrcoef(av, prd)[0, 1]
print(f" {nm:<18} RMSE={rmse:.4f} corr={corr:+.3f}")
# Risk levels
print(f"\n--- RISK LEVELS ---")
for lvl in ['Low', 'Medium', 'High']:
mask = results['risk_level'] == lvl
if mask.sum() > 0:
print(f" {lvl:<8}: vol={av[mask].mean():.4f} n={mask.sum()}")
# Factor frequency
print(f"\n--- FACTOR FREQ ---")
if 'top_factor' in results.columns:
for fac, cnt in results['top_factor'].value_counts().items():
print(f" {fac:<12}: {cnt} ({cnt/n:.1%})")
# SHAP
print(f"\n--- SHAP (avg) ---")
for g in FACTOR_GROUPS:
col = f'shap_{g}'
if col in results.columns:
print(f" {g:<12}: {results[col].abs().mean():.4f}")
# Scenario
print(f"\n--- SCENARIO (latest) ---")
lat = results.iloc[-1]
print(f" Base: {lat['scenario_base']*100:+.1f}%")
print(f" VIX shock: {lat['scenario_vix_shock']*100:+.1f}%")
print(f" Supply cut: {lat['scenario_supply_cut']*100:+.1f}%")
print(f" Demand crash: {lat['scenario_demand_crash']*100:+.1f}%")
# NLG sample
print(f"\n--- NLG REPORT (latest) ---")
print(generate_nlg_report(results.iloc[-1]))
# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
# ABLATION EXPERIMENTS
# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
def _run_qr_eval(panel, feat_list, train_window=120):
"""ๅ†…้ƒจ่พ…ๅŠฉ๏ผšๅฏน็ป™ๅฎš็‰นๅพๅญ้›†่ท‘ walk-forward QR ๅนถ่ฟ”ๅ›ž (cov, wis, n)ใ€‚"""
hits, totals, wis_list = 0, 0, []
for i in range(train_window, len(panel) - 1):
train_df = panel.iloc[max(0, i - train_window):i]
test_df = panel.iloc[i:i + 1]
avail = [f for f in feat_list if f in train_df.columns and train_df[f].notna().mean() > 0.8]
if len(avail) < 2:
continue
X_tr = train_df[avail].fillna(train_df[avail].median())
X_te = test_df[avail].fillna(train_df[avail].median())
sc = StandardScaler()
X_tr_s = sc.fit_transform(X_tr)
X_te_s = sc.transform(X_te)
y = train_df['target_ret_1m'].dropna()
mask = y.index.isin(X_tr.index)
y = y[mask]
X_tr_s = X_tr_s[:len(y)]
actual = panel['target_ret_1m'].iloc[i]
if np.isnan(actual):
continue
try:
qr10 = QuantileRegressor(quantile=0.10, alpha=0.1, solver='highs')
qr90 = QuantileRegressor(quantile=0.90, alpha=0.1, solver='highs')
qr10.fit(X_tr_s, y)
qr90.fit(X_tr_s, y)
p10, p90 = qr10.predict(X_te_s)[0], qr90.predict(X_te_s)[0]
if p10 > p90: p10, p90 = p90, p10
hit = 1 if p10 <= actual <= p90 else 0
hits += hit
totals += 1
wis_list.append((p90-p10) + (2/0.2)*max(p10-actual, 0) + (2/0.2)*max(actual-p90, 0))
except:
continue
cov = hits / totals if totals > 0 else 0
wis = np.mean(wis_list) if wis_list else 999
return cov, wis, totals
def run_ablation(panel, features):
"""่ฟ่กŒๆถˆ่žๅฎž้ชŒ๏ผš่ฎญ็ปƒ็ช—ๅฃ + ๅ› ๅญ็ป„ leave-one-outใ€‚"""
print(f"\n--- ABLATION EXPERIMENTS ---")
ablation_results = []
# โ”€โ”€ Part 1: Window ablation โ”€โ”€
print(" [็ช—ๅฃๆถˆ่ž]")
for w in [84, 120, 180]:
cov, wis, n_ab = _run_qr_eval(panel, features, train_window=w)
ablation_results.append({
'type': 'window', 'param': w, 'param_label': f'{w}ๆœˆ',
'cov': round(cov, 3), 'wis': round(wis, 4), 'n': n_ab
})
print(f" Window={w:>3}: Cov={cov:.1%} WIS={wis:.4f} n={n_ab}")
# โ”€โ”€ Part 2: Factor-group leave-one-out โ”€โ”€
print(" [ๅ› ๅญ็ป„ๆถˆ่ž โ€” Leave-One-Out]")
# Baseline: all factors
base_cov, base_wis, base_n = _run_qr_eval(panel, features)
ablation_results.append({
'type': 'factor_group', 'param': 'ALL', 'param_label': 'ๅ…จ้ƒจๅ› ๅญ',
'cov': round(base_cov, 3), 'wis': round(base_wis, 4), 'n': base_n
})
print(f" ALL (baseline): Cov={base_cov:.1%} WIS={base_wis:.4f}")
group_zh = {'Price': 'ไปทๆ ผ่”ๅŠจ', 'Supply': 'ไพ›็ป™็ซฏ', 'Demand': '้œ€ๆฑ‚็ซฏ',
'Risk_Geo': 'ๅœฐ็ผ˜/้ฃŽ้™ฉ', 'Technical': 'ๆŠ€ๆœฏ้ข'}
for group, members in FACTOR_GROUPS.items():
# Remove this group's features
reduced = [f for f in features if f not in members]
if len(reduced) < 2:
continue
cov, wis, n_ab = _run_qr_eval(panel, reduced)
delta_cov = cov - base_cov
delta_wis = wis - base_wis
ablation_results.append({
'type': 'factor_group', 'param': group,
'param_label': f'ๅŽป้™ค{group_zh.get(group, group)}',
'cov': round(cov, 3), 'wis': round(wis, 4), 'n': n_ab,
'delta_cov': round(delta_cov, 3), 'delta_wis': round(delta_wis, 4),
})
print(f" -{group:<12}: Cov={cov:.1%} (ฮ”{delta_cov:+.1%}) "
f"WIS={wis:.4f} (ฮ”{delta_wis:+.4f})")
return ablation_results