Spaces:

WHUCS-Jiaming
/

oilverse-api

Build error

孙家明

deploy: OilVerse for HuggingFace (Node.js 18 fix)

fab9847 22 days ago

29.7 kB

	"""
	engine.py — Walk-Forward 预测引擎
	=====================================
	核心组件：
	1. 数据加载与目标构建
	2. Walk-Forward 主循环
	├─ 特征选择 (MI)
	├─ 波动率预测 (EWMA + Ridge residual)
	├─ 区间预测 (QR + LightGBM) × 1M/3M
	├─ Vol-Adaptive 宽度调整
	├─ LightGBM 特征重要性
	├─ 风险等级 / 偏置推断
	├─ Regime 检测 (余弦相似度)
	└─ 因子归因 (Ridge)
	"""

	import pandas as pd
	import numpy as np
	import warnings
	from sklearn.linear_model import Ridge, QuantileRegressor
	from sklearn.preprocessing import StandardScaler
	from sklearn.feature_selection import mutual_info_regression
	import lightgbm as lgb

	# TFT (Temporal Fusion Transformer) — frontier deep learning model
	try:
	from core.tft_model import tft_predict_step
	_TFT_AVAILABLE = True
	except ImportError:
	_TFT_AVAILABLE = False

	from config import (
	FEATURES, FACTOR_GROUPS, PRICE_COL, TRAIN_WINDOW,
	MAX_FEATURES, MIN_TRAIN_SAMPLES,
	REGIME_SIGNATURES, REGIME_TYPE_MAP,
	)

	warnings.filterwarnings('ignore')


	# ═══════════════════════════════════════════════════════════
	# DATA LOADING
	# ═══════════════════════════════════════════════════════════

	def load_panel(panel_path, price_col=None):
	"""加载月度面板并构建目标变量和地缘特征。

	Args:
	price_col: 可选，指定价格列（默认 config.PRICE_COL）。
	当切换目标时（如 Brent），自动从特征列表中移除该列。
	"""
	if price_col is None:
	price_col = PRICE_COL

	monthly = pd.read_csv(panel_path, index_col=0, parse_dates=True)

	# Geopolitical features
	try:
	geo = pd.read_csv('data/csv_raw/geopolitical_shocks.csv')
	event_cols = [c for c in geo.columns if c not in ['date_str', 'date_num', 'year', 'month', 'day']]
	geo['date'] = pd.to_datetime(geo[['year', 'month', 'day']])
	geo_monthly = geo.set_index('date')[event_cols].resample('ME').sum()
	geo_monthly['geo_shock_count'] = geo_monthly[event_cols].sum(axis=1)
	geo_monthly['geo_active_events'] = (geo_monthly[event_cols] > 0).sum(axis=1)
	monthly = monthly.merge(
	geo_monthly[['geo_shock_count', 'geo_active_events']],
	left_index=True, right_index=True, how='left'
	)
	monthly['geo_shock_count'] = monthly['geo_shock_count'].fillna(0)
	monthly['geo_active_events'] = monthly['geo_active_events'].fillna(0)
	print("✓ 地缘政治特征已添加")
	except Exception as e:
	print(f"⚠ GPR 数据未加载: {e}")
	monthly['geo_shock_count'] = 0
	monthly['geo_active_events'] = 0

	# Targets (基于指定价格列)
	monthly['target_ret_1m'] = monthly[price_col].pct_change(1).shift(-1)
	monthly['target_ret_3m'] = monthly[price_col].pct_change(3).shift(-3)
	monthly['target_abs_ret_1m'] = monthly['target_ret_1m'].abs()
	monthly['target_abs_ret_3m'] = monthly['target_ret_3m'].abs()
	monthly['target_up_1m'] = (monthly['target_ret_1m'] > 0).astype(int)
	monthly['ewma_vol'] = monthly[price_col].pct_change(1).abs().ewm(alpha=0.06).mean()

	# Filter features: 移除目标价格列自身，防止 target leakage
	features = [f for f in FEATURES if f in monthly.columns and f != price_col]
	# 如果目标是 Brent，把 WTI_spot 加到特征（如果不在列表里）
	if price_col != 'WTI_spot' and 'WTI_spot' in monthly.columns and 'WTI_spot' not in features:
	features.append('WTI_spot')

	targets = ['target_ret_1m', 'target_ret_3m', 'target_abs_ret_1m',
	'target_abs_ret_3m', 'target_up_1m', 'ewma_vol']
	keep_cols = [c for c in features + targets + [price_col] if c in monthly.columns]
	panel = monthly[keep_cols].copy()
	# 保留所有行（含无目标的最新月份，用于 live 预测）
	n_with_target = panel['target_ret_1m'].notna().sum()
	n_total = len(panel)
	print(f"面板 [{price_col}]: {n_total} 月 ({n_with_target} 有目标, {n_total - n_with_target} live), {len(features)} 特征")
	return panel, features


	# ═══════════════════════════════════════════════════════════
	# REGIME DETECTION
	# ═══════════════════════════════════════════════════════════

	def detect_regime(sel, coefs, x_vals, pred_vol):
	"""基于因子贡献计算当前 regime 向量，与历史时期做余弦相似度匹配。"""
	regime_features = {
	'supply_stress': 0, 'demand_stress': 0,
	'geopolitical_stress': 0, 'price_momentum': 0, 'vol_level': 0,
	}
	supply_feats = {'rig_count_us_new', 'supply_saudi', 'us_oil_inventory_total'}
	demand_feats = {'pmi_us_mfg', 'usd_index', 'nonfarm_us', 'ipi_us'}
	geo_feats = {'vix_lag1', 'vix_lag2', 'geo_shock_count', 'geo_active_events'}
	tech_feats = {'mom1m_lag1', 'hist_vol_12m', 'rsi12m'}

	for f in sel:
	coef_val = coefs.get(f, 0) * x_vals.get(f, 0)
	if f in supply_feats:
	regime_features['supply_stress'] += abs(coef_val)
	elif f in demand_feats:
	regime_features['demand_stress'] += abs(coef_val)
	elif f in geo_feats:
	regime_features['geopolitical_stress'] += abs(coef_val)
	elif f in tech_feats:
	regime_features['price_momentum'] += abs(coef_val)
	regime_features['vol_level'] = pred_vol

	# Cosine similarity
	curr_vec = np.array(list(regime_features.values()))
	curr_norm = np.linalg.norm(curr_vec)
	best_regime, best_sim = '常态/低波动', 0
	regime_sims = {}
	for rname, rsig in REGIME_SIGNATURES.items():
	ref_vec = np.array(list(rsig.values()))
	ref_norm = np.linalg.norm(ref_vec)
	sim = float(np.dot(curr_vec, ref_vec) / (curr_norm * ref_norm)) if curr_norm > 0 and ref_norm > 0 else 0
	regime_sims[rname] = sim
	if sim > best_sim:
	best_sim = sim
	best_regime = rname

	return {
	'regime_match': best_regime,
	'regime_similarity': round(best_sim, 3),
	'regime_type': REGIME_TYPE_MAP.get(best_regime, 'normal'),
	'regime_sims': regime_sims,
	}


	# ═══════════════════════════════════════════════════════════
	# WALK-FORWARD ENGINE
	# ═══════════════════════════════════════════════════════════

	def run_walk_forward(panel, features, train_window=TRAIN_WINDOW):
	"""执行完整的 walk-forward 预测循环。"""
	results = []
	shap_records = []

	# Walk-forward: 有目标的月份用于训练/评估
	last_target_idx = panel['target_ret_1m'].last_valid_index()
	last_target_pos = panel.index.get_loc(last_target_idx) if last_target_idx is not None else len(panel) - 4
	wf_end = min(last_target_pos + 1, len(panel))
	for i in range(train_window, wf_end):
	train_df = panel.iloc[max(0, i - train_window):i] # QR: rolling window
	train_expand = panel.iloc[:i] # LGB: expanding window (all history)
	test_df = panel.iloc[i:i + 1]
	test_date = panel.index[i]

	avail = [f for f in features if train_df[f].notna().mean() > 0.8]
	if len(avail) < 3:
	continue

	# ── Impute + Scale ──
	X_tr = train_df[avail].copy()
	X_te = test_df[avail].copy()
	for col in avail:
	med = X_tr[col].median()
	X_tr[col] = X_tr[col].fillna(med)
	X_te[col] = X_te[col].fillna(med)

	scaler = StandardScaler()
	X_tr_s = pd.DataFrame(scaler.fit_transform(X_tr), columns=avail, index=train_df.index)
	X_te_s = pd.DataFrame(scaler.transform(X_te), columns=avail, index=test_df.index)

	# ── Feature selection (MI, inner split) ──
	inner_n = int(len(X_tr_s) * 0.8)
	y_sel = train_df['target_abs_ret_1m'].iloc[:inner_n]
	X_sel = X_tr_s.iloc[:inner_n]
	valid = y_sel.notna() & X_sel.notna().all(axis=1)
	if valid.sum() < MIN_TRAIN_SAMPLES:
	continue

	try:
	mi = mutual_info_regression(X_sel.loc[valid], y_sel.loc[valid], random_state=42, n_neighbors=5)
	except:
	mi = np.ones(len(avail))

	K = min(MAX_FEATURES, len(avail))
	sel = [avail[j] for j in np.argsort(mi)[-K:]]
	X_tr_f = X_tr_s[sel]
	X_te_f = X_te_s[sel]

	# ── Actuals ──
	actual_ret_1m = panel['target_ret_1m'].iloc[i]
	actual_ret_3m = panel['target_ret_3m'].iloc[i] if pd.notna(panel['target_ret_3m'].iloc[i]) else np.nan
	actual_vol = panel['target_abs_ret_1m'].iloc[i]
	actual_up = panel['target_up_1m'].iloc[i]
	ewma_now = test_df['ewma_vol'].values[0]

	rec = {
	'test_date': test_date,
	'actual_ret_1m': actual_ret_1m,
	'actual_ret_3m': actual_ret_3m,
	'actual_vol': actual_vol,
	'actual_up': actual_up,
	}

	y_ret = train_df['target_ret_1m']
	y_ret_3m = train_df['target_ret_3m']
	y_vol = train_df['target_abs_ret_1m']
	mask_r = y_ret.notna()
	mask_3m = y_ret_3m.notna()

	# ════ VOL HEAD ════
	baseline_vol = float(ewma_now) if not np.isnan(ewma_now) else 0.05
	rec['baseline_ewma'] = baseline_vol
	valid_vol = y_vol.notna() & train_df['ewma_vol'].notna() & (train_df['ewma_vol'] > 0.001)
	if valid_vol.sum() > MIN_TRAIN_SAMPLES:
	log_ratio = np.log(y_vol[valid_vol] / train_df['ewma_vol'][valid_vol])
	log_ratio = log_ratio.replace([np.inf, -np.inf], np.nan).dropna()
	if len(log_ratio) > 15:
	try:
	ridge = Ridge(alpha=10.0)
	ridge.fit(X_tr_f.loc[log_ratio.index], log_ratio)
	plr = np.clip(ridge.predict(X_te_f)[0], -2.0, 2.0)
	rec['pred_vol'] = float(np.clip(baseline_vol * np.exp(plr), 0.005, 0.50))
	except:
	rec['pred_vol'] = baseline_vol
	else:
	rec['pred_vol'] = baseline_vol
	else:
	rec['pred_vol'] = baseline_vol

	# ════ INTERVAL HEAD: 1M (QR + LightGBM) ════
	for q, alpha_q in [('q10', 0.10), ('q50', 0.50), ('q90', 0.90)]:
	try:
	qr = QuantileRegressor(quantile=alpha_q, alpha=0.1, solver='highs')
	qr.fit(X_tr_f.loc[mask_r], y_ret.loc[mask_r])
	rec[f'qr_{q}_1m'] = qr.predict(X_te_f)[0]
	except:
	rec[f'qr_{q}_1m'] = float(y_ret.quantile(alpha_q))
	# LGB: expanding window (all history → more samples → better trees)
	try:
	X_tr_lgb = train_expand[avail].copy()
	for col in avail:
	X_tr_lgb[col] = X_tr_lgb[col].fillna(X_tr_lgb[col].median())
	X_tr_lgb_s = pd.DataFrame(scaler.transform(X_tr_lgb), columns=avail, index=train_expand.index)
	y_ret_lgb = train_expand['target_ret_1m']
	mask_lgb = y_ret_lgb.notna() & X_tr_lgb_s[sel].notna().all(axis=1)
	lgb_params = {
	'objective': 'quantile', 'alpha': alpha_q,
	'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.05,
	'num_leaves': 8,
	'min_child_samples': max(10, int(mask_lgb.sum() * 0.05)),
	'subsample': 0.8, 'colsample_bytree': 0.8,
	'reg_alpha': 0.3, 'reg_lambda': 0.5,
	'verbose': -1, 'random_state': 42,
	}
	lgb_model = lgb.LGBMRegressor(**lgb_params)
	lgb_model.fit(X_tr_lgb_s[sel].loc[mask_lgb], y_ret_lgb.loc[mask_lgb])
	rec[f'lgb_{q}_1m'] = lgb_model.predict(X_te_f)[0]
	except:
	rec[f'lgb_{q}_1m'] = rec[f'qr_{q}_1m']

	# Enforce monotonicity
	for pfx in ['qr', 'lgb']:
	vals = [rec[f'{pfx}_q10_1m'], rec[f'{pfx}_q50_1m'], rec[f'{pfx}_q90_1m']]
	rec[f'{pfx}_q10_1m'] = min(vals)
	rec[f'{pfx}_q50_1m'] = np.median(vals)
	rec[f'{pfx}_q90_1m'] = max(vals)

	# ════ TFT HEAD (Temporal Fusion Transformer) ════
	tft_ok = False
	if _TFT_AVAILABLE and i >= train_window + 30: # Need enough history
	try:
	tft_result = tft_predict_step(
	train_df, test_df, sel, y_col='target_ret_1m'
	)
	if tft_result is not None:
	rec['tft_q10_1m'] = tft_result['tft_q10_1m']
	rec['tft_q50_1m'] = tft_result['tft_q50_1m']
	rec['tft_q90_1m'] = tft_result['tft_q90_1m']
	tft_ok = True
	except:
	pass
	if not tft_ok:
	rec['tft_q10_1m'] = rec['qr_q10_1m']
	rec['tft_q50_1m'] = rec['qr_q50_1m']
	rec['tft_q90_1m'] = rec['qr_q90_1m']

	# ════ INTERVAL HEAD: 3M ════
	for q, alpha_q in [('q10', 0.10), ('q50', 0.50), ('q90', 0.90)]:
	try:
	qr3 = QuantileRegressor(quantile=alpha_q, alpha=0.1, solver='highs')
	qr3.fit(X_tr_f.loc[mask_3m], y_ret_3m.loc[mask_3m])
	rec[f'qr_{q}_3m'] = qr3.predict(X_te_f)[0]
	except:
	rec[f'qr_{q}_3m'] = float(y_ret_3m.quantile(alpha_q)) if mask_3m.sum() > 5 else 0.0
	vals3 = [rec['qr_q10_3m'], rec['qr_q50_3m'], rec['qr_q90_3m']]
	rec['qr_q10_3m'] = min(vals3)
	rec['qr_q50_3m'] = np.median(vals3)
	rec['qr_q90_3m'] = max(vals3)

	# ════ QR + LGB + TFT ENSEMBLE ════
	# Three-way blend: QR(50%) + LGB(25%) + TFT(25%)
	w_qr, w_lgb, w_tft = 0.50, 0.25, 0.25
	ens_q10 = w_qr * rec['qr_q10_1m'] + w_lgb * rec['lgb_q10_1m'] + w_tft * rec['tft_q10_1m']
	ens_q50 = w_qr * rec['qr_q50_1m'] + w_lgb * rec['lgb_q50_1m'] + w_tft * rec['tft_q50_1m']
	ens_q90 = w_qr * rec['qr_q90_1m'] + w_lgb * rec['lgb_q90_1m'] + w_tft * rec['tft_q90_1m']

	# ════ VOL-ADAPTIVE WIDTH (on blended ensemble) ════
	center = ens_q50
	raw_upper = ens_q90 - center
	raw_lower = center - ens_q10
	train_vol_median = float(y_vol.median())
	vol_ratio = rec['pred_vol'] / train_vol_median if train_vol_median > 0.001 else 1.0
	vol_ratio = np.clip(vol_ratio, 0.85, 2.5) # grid-searched optimal
	rec['pred_q10_1m'] = center - raw_lower * vol_ratio
	rec['pred_q50_1m'] = center
	rec['pred_q90_1m'] = center + raw_upper * vol_ratio
	rec['interval_width_1m'] = (raw_upper + raw_lower) * vol_ratio
	rec['vol_ratio'] = vol_ratio

	# ════ 3M: INDEPENDENT VOL-ADAPTIVE (lb=1.0, doesn't narrow) ════
	center3 = rec['qr_q50_3m']
	raw_upper3 = rec['qr_q90_3m'] - center3
	raw_lower3 = center3 - rec['qr_q10_3m']
	vol_ratio_3m = np.clip(vol_ratio, 1.0, 2.5) # 3M keeps lb=1.0
	rec['pred_q10_3m'] = center3 - raw_lower3 * vol_ratio_3m
	rec['pred_q50_3m'] = center3
	rec['pred_q90_3m'] = center3 + raw_upper3 * vol_ratio_3m
	rec['interval_width_3m'] = (raw_upper3 + raw_lower3) * vol_ratio_3m

	# ════ CONFORMAL QR (CQR) ════
	# Split training into proper_train + calibration for conformity scores
	cal_size = max(15, int(len(X_tr_f) * 0.2))
	proper_n = len(X_tr_f) - cal_size
	if proper_n > MIN_TRAIN_SAMPLES:
	X_proper = X_tr_f.iloc[:proper_n]
	X_cal = X_tr_f.iloc[proper_n:]
	y_proper = y_ret.iloc[:proper_n]
	y_cal = y_ret.iloc[proper_n:]
	mask_proper = y_proper.notna()
	mask_cal = y_cal.notna()
	try:
	# Fit QR on proper training set
	qr_lo = QuantileRegressor(quantile=0.10, alpha=0.1, solver='highs')
	qr_hi = QuantileRegressor(quantile=0.90, alpha=0.1, solver='highs')
	qr_lo.fit(X_proper.loc[mask_proper], y_proper.loc[mask_proper])
	qr_hi.fit(X_proper.loc[mask_proper], y_proper.loc[mask_proper])
	# Conformity scores on calibration set
	cal_lo = qr_lo.predict(X_cal.loc[mask_cal])
	cal_hi = qr_hi.predict(X_cal.loc[mask_cal])
	y_cal_vals = y_cal.loc[mask_cal].values
	scores = np.maximum(cal_lo - y_cal_vals, y_cal_vals - cal_hi)
	# Quantile of scores for coverage guarantee
	alpha = 0.20 # Desired miscoverage = 20% → 80% coverage
	n_cal = len(scores)
	q_level = min(1.0, (1 - alpha) * (1 + 1 / n_cal))
	Q_hat = np.quantile(scores, q_level)
	# Apply to test point
	cqr_lo = qr_lo.predict(X_te_f)[0] - Q_hat
	cqr_hi = qr_hi.predict(X_te_f)[0] + Q_hat
	cqr_md = (cqr_lo + cqr_hi) / 2
	rec['cqr_q10_1m'] = float(cqr_lo)
	rec['cqr_q50_1m'] = float(cqr_md)
	rec['cqr_q90_1m'] = float(cqr_hi)
	rec['cqr_Q_hat'] = float(Q_hat)
	except:
	rec['cqr_q10_1m'] = rec['pred_q10_1m']
	rec['cqr_q50_1m'] = rec['pred_q50_1m']
	rec['cqr_q90_1m'] = rec['pred_q90_1m']
	rec['cqr_Q_hat'] = 0.0
	else:
	rec['cqr_q10_1m'] = rec['pred_q10_1m']
	rec['cqr_q50_1m'] = rec['pred_q50_1m']
	rec['cqr_q90_1m'] = rec['pred_q90_1m']
	rec['cqr_Q_hat'] = 0.0

	# Save pre-blend QR intervals for risk_bias (CQR should not affect directional signal)
	qr_q10_prebend = rec['pred_q10_1m']
	qr_q50_prebend = rec['pred_q50_1m']
	qr_q90_prebend = rec['pred_q90_1m']

	# ════ BLEND: 80% QR(VA) + 20% CQR → final 1M intervals ════
	# CQR boosts hi-vol coverage (+5pp) while keeping WIS optimal
	w_cqr = 0.20
	rec['pred_q10_1m'] = (1 - w_cqr) * rec['pred_q10_1m'] + w_cqr * rec['cqr_q10_1m']
	rec['pred_q90_1m'] = (1 - w_cqr) * rec['pred_q90_1m'] + w_cqr * rec['cqr_q90_1m']
	rec['interval_width_1m'] = rec['pred_q90_1m'] - rec['pred_q10_1m']

	# ════ FEATURE IMPORTANCE (LightGBM) ════
	try:
	lgb50 = lgb.LGBMRegressor(
	objective='quantile', alpha=0.50, n_estimators=100,
	max_depth=3, learning_rate=0.05, num_leaves=8,
	min_child_samples=10, verbose=-1, random_state=42
	)
	lgb50.fit(X_tr_f.loc[mask_r], y_ret.loc[mask_r])
	importances = lgb50.feature_importances_
	imp_dict = dict(zip(sel, importances))
	total_imp = sum(importances) if sum(importances) > 0 else 1
	for group, members in FACTOR_GROUPS.items():
	group_imp = sum(imp_dict.get(f, 0) for f in members if f in imp_dict)
	rec[f'shap_{group}'] = float(group_imp / total_imp)
	rec['shap_base'] = float(lgb50.predict(X_te_f)[0])
	shap_records.append({
	'date': str(test_date),
	'features': sel,
	'importances': importances.tolist(),
	'normalized': (importances / total_imp).tolist(),
	'lgb_pred': float(lgb50.predict(X_te_f)[0]),
	})
	except:
	for g in FACTOR_GROUPS:
	rec[f'shap_{g}'] = 0.0
	rec['shap_base'] = 0.0

	# ════ RISK LEVEL ════
	# Use pre-blend QR intervals for bias (CQR widens symmetrically, distorts bias signal)
	upside_tail = qr_q90_prebend - qr_q50_prebend
	downside_tail = qr_q50_prebend - qr_q10_prebend
	vol_pctile = (y_vol < rec['pred_vol']).mean()
	rec['risk_level'] = 'High' if vol_pctile >= 0.70 else ('Medium' if vol_pctile >= 0.40 else 'Low')
	rec['risk_bias'] = 'Upward' if upside_tail > 1.15 * downside_tail else (
	'Downward' if downside_tail > 1.15 * upside_tail else 'Balanced')
	rec['vol_pctile'] = vol_pctile

	# ════ FACTOR ATTRIBUTION (Ridge) ════
	factor_map = {}
	for f in sel:
	for group, members in FACTOR_GROUPS.items():
	if f in members:
	factor_map.setdefault(group, []).append(f)
	break
	attr_window = min(60, len(X_tr_f))
	X_attr = X_tr_f.iloc[-attr_window:]
	y_attr = y_ret.iloc[-attr_window:]
	valid_attr = y_attr.notna()
	coefs = {}
	x_vals = {}
	if valid_attr.sum() > MIN_TRAIN_SAMPLES:
	try:
	lr_attr = Ridge(alpha=1.0)
	lr_attr.fit(X_attr.loc[valid_attr], y_attr.loc[valid_attr])
	coefs = dict(zip(sel, lr_attr.coef_))
	x_vals = dict(X_te_f.iloc[0])
	for group in FACTOR_GROUPS:
	gf = factor_map.get(group, [])
	contrib = sum(coefs.get(f, 0) * x_vals.get(f, 0) for f in gf)
	rec[f'factor_{group}'] = float(contrib)
	abs_contribs = {g: abs(rec.get(f'factor_{g}', 0)) for g in factor_map}
	rec['top_factor'] = max(abs_contribs, key=abs_contribs.get) if abs_contribs else 'Unknown'
	except:
	rec['top_factor'] = 'Unknown'
	else:
	rec['top_factor'] = 'Unknown'

	# ════ REGIME DETECTION ════
	regime_info = detect_regime(sel, coefs, x_vals, rec['pred_vol'])
	rec['regime_match'] = regime_info['regime_match']
	rec['regime_similarity'] = regime_info['regime_similarity']
	rec['regime_type'] = regime_info['regime_type']

	# ════ SCENARIOS ════
	scenarios = {}
	for scen_name, modifications in [
	('vix_shock', {'vix_lag1': 2.0, 'vix_lag2': 1.5}),
	('supply_cut', {'supply_saudi': -1.5, 'rig_count_us_new': -1.0}),
	('demand_crash', {'pmi_us_mfg': -2.0, 'ipi_us': -1.5}),
	]:
	X_stress = X_te_f.copy()
	for feat, mult in modifications.items():
	if feat in X_stress.columns:
	X_stress[feat] = X_stress[feat] + mult
	try:
	stress_q50 = lgb50.predict(X_stress)[0]
	scenarios[scen_name] = float(stress_q50)
	except:
	scenarios[scen_name] = rec.get('pred_q50_1m', 0.0)
	rec['scenario_base'] = rec['pred_q50_1m']
	rec['scenario_vix_shock'] = scenarios.get('vix_shock', 0.0)
	rec['scenario_supply_cut'] = scenarios.get('supply_cut', 0.0)
	rec['scenario_demand_crash'] = scenarios.get('demand_crash', 0.0)

	results.append(rec)

	# ════ LIVE FORECAST: 预测无目标的最新月份 ════
	if wf_end < len(panel) and len(results) > 0:
	print(f"\n Live Forecast: {len(panel) - wf_end} 个最新月份 (无实际值)")
	last_rec = results[-1]
	for li in range(wf_end, len(panel)):
	test_df = panel.iloc[li:li + 1]
	test_date = panel.index[li]
	train_df = panel.iloc[max(0, li - train_window):li]

	avail = [f for f in features if train_df[f].notna().mean() > 0.8]
	if len(avail) < 3:
	continue

	X_tr = train_df[avail].copy()
	X_te = test_df[avail].copy()
	for col in avail:
	med = X_tr[col].median()
	X_tr[col] = X_tr[col].fillna(med)
	X_te[col] = X_te[col].fillna(med)

	scaler = StandardScaler()
	X_tr_s = pd.DataFrame(scaler.fit_transform(X_tr), columns=avail, index=train_df.index)
	X_te_s = pd.DataFrame(scaler.transform(X_te), columns=avail, index=test_df.index)

	# 使用固定特征 (从最后一次 walk-forward 的 sel)
	sel_live = [f for f in sel if f in avail]
	if len(sel_live) < 2:
	sel_live = avail[:MAX_FEATURES]
	X_tr_f = X_tr_s[sel_live]
	X_te_f = X_te_s[sel_live]

	ewma_now = test_df['ewma_vol'].values[0] if 'ewma_vol' in test_df.columns else 0.05
	baseline_vol = float(ewma_now) if not np.isnan(ewma_now) else 0.05

	live_rec = {
	'test_date': test_date,
	'actual_ret_1m': np.nan,
	'actual_ret_3m': np.nan,
	'actual_vol': np.nan,
	'actual_up': np.nan,
	'baseline_ewma': baseline_vol,
	'is_live': True,
	}

	# 用最新的训练数据重新拟合轻量模型
	y_ret = train_df['target_ret_1m'].dropna()
	y_vol = train_df['target_abs_ret_1m'].dropna()

	try:
	# QR 1M
	for alpha, suffix in [(0.05, 'q10'), (0.5, 'q50'), (0.95, 'q90')]:
	qr = QuantileRegressor(quantile=alpha, alpha=0.1, solver='highs')
	mask_qr = y_ret.index.isin(X_tr_f.index)
	y_qr = y_ret[mask_qr]
	X_qr = X_tr_f.loc[y_qr.index]
	if len(X_qr) > 10:
	qr.fit(X_qr, y_qr)
	val = float(qr.predict(X_te_f)[0])
	live_rec[f'pred_{suffix}_1m'] = val

	# LightGBM
	mask_lgb = y_ret.index.isin(X_tr_f.index)
	y_lgb = y_ret[mask_lgb]
	X_lgb = X_tr_f.loc[y_lgb.index]
	if len(X_lgb) > 10:
	lgb50 = lgb.LGBMRegressor(objective='quantile', alpha=0.5,
	n_estimators=200, max_depth=4, verbose=-1)
	lgb50.fit(X_lgb, y_lgb)
	live_rec['lgb_q50'] = float(lgb50.predict(X_te_f)[0])

	# Vol
	mask_vol = y_vol.index.isin(X_tr_f.index)
	y_v = y_vol[mask_vol]
	X_v = X_tr_f.loc[y_v.index]
	if len(X_v) > 10:
	ridge = Ridge(alpha=1.0)
	ridge.fit(X_v, y_v)
	live_rec['pred_vol'] = max(float(ridge.predict(X_te_f)[0]), baseline_vol * 0.5)
	else:
	live_rec['pred_vol'] = baseline_vol
	except Exception as e:
	live_rec['pred_vol'] = baseline_vol

	# Risk level + bias
	lo = live_rec.get('pred_q10_1m', -0.05)
	hi = live_rec.get('pred_q90_1m', 0.05)
	q50 = live_rec.get('pred_q50_1m', 0.0)
	width = abs(hi - lo)
	live_rec['risk_level'] = 'High' if width > 0.15 else 'Medium' if width > 0.08 else 'Low'
	live_rec['risk_score'] = width
	upside = hi - q50
	downside = q50 - lo
	live_rec['risk_bias'] = 'Upward' if upside > 1.15 * downside else (
	'Downward' if downside > 1.15 * upside else 'Balanced')
	live_rec['vol_pctile'] = 0.5 # 无法计算，默认中等

	# Copy structure from last record
	for key in ['regime_match', 'regime_similarity', 'regime_type',
	'scenario_base', 'scenario_vix_shock', 'scenario_supply_cut',
	'scenario_demand_crash', 'top_factor']:
	if key not in live_rec and key in last_rec:
	live_rec[key] = last_rec[key]

	# Regime detection
	x_vals = {f: float(X_te_f[f].values[0]) for f in sel_live}
	coefs = {}
	regime_info = detect_regime(sel_live, coefs, x_vals, live_rec.get('pred_vol', 0.05))
	live_rec.update(regime_info)

	# Scenarios
	for scen_name, modifications in [
	('vix_shock', {'vix_lag1': 2.0, 'vix_lag2': 1.5}),
	('supply_cut', {'supply_saudi': -1.5, 'rig_count_us_new': -1.0}),
	('demand_crash', {'pmi_us_mfg': -2.0, 'ipi_us': -1.5}),
	]:
	X_stress = X_te_f.copy()
	for feat, mult in modifications.items():
	if feat in X_stress.columns:
	X_stress[feat] = X_stress[feat] + mult
	try:
	live_rec[f'scenario_{scen_name}'] = float(lgb50.predict(X_stress)[0])
	except:
	live_rec[f'scenario_{scen_name}'] = live_rec.get('pred_q50_1m', 0.0)
	live_rec['scenario_base'] = live_rec.get('pred_q50_1m', 0.0)

	# Factor attribution (simplified)
	for group in FACTOR_GROUPS:
	live_rec[f'factor_{group}'] = last_rec.get(f'factor_{group}', 0.0)

	results.append(live_rec)
	print(f" ✓ {test_date.strftime('%Y-%m')}: Q50={live_rec.get('pred_q50_1m', 0):.3f}, "
	f"Risk={live_rec['risk_level']}")

	return pd.DataFrame(results), shap_records