oilverse-api / output /v2_feature_selection.json
孙家明
deploy: OilVerse for HuggingFace (Node.js 18 fix)
fab9847
{
"total_raw": 949,
"n_csv_files": 33,
"stages": [
{
"stage": 0,
"name": "原始指标",
"count": 949,
"rule": "33个CSV文件"
},
{
"stage": 1,
"name": "数据可用性",
"count": 136,
"rule": "缺失率<30%, 覆盖>120月"
},
{
"stage": 2,
"name": "单变量相关性",
"count": 82,
"rule": "|corr(feature, return)|>0.03"
},
{
"stage": 3,
"name": "共线性去重",
"count": 70,
"rule": "组内pair |corr|<0.85"
},
{
"stage": 4,
"name": "MI非线性筛选",
"count": 70,
"rule": "MI(feature; return)排序"
},
{
"stage": 5,
"name": "最终选择",
"count": 18,
"rule": "经济学意义+因子组分配"
}
],
"final_features": [
{
"feature": "Brent_spot",
"factor_group": "Price",
"mi_score": 0,
"corr": 0
},
{
"feature": "natgas_spot_henry",
"factor_group": "Price",
"mi_score": 0,
"corr": 0
},
{
"feature": "iron_ore_spot",
"factor_group": "Price",
"mi_score": 0,
"corr": 0
},
{
"feature": "rig_count_us_new",
"factor_group": "Supply",
"mi_score": 0.0041,
"corr": -0.0327
},
{
"feature": "supply_saudi",
"factor_group": "Supply",
"mi_score": 0,
"corr": 0
},
{
"feature": "us_oil_inventory_total",
"factor_group": "Supply",
"mi_score": 0,
"corr": 0
},
{
"feature": "pmi_us_mfg",
"factor_group": "Demand",
"mi_score": 0,
"corr": 0
},
{
"feature": "usd_index",
"factor_group": "Demand",
"mi_score": 0.0205,
"corr": -0.0446
},
{
"feature": "nonfarm_us",
"factor_group": "Demand",
"mi_score": 0,
"corr": 0
},
{
"feature": "ipi_us",
"factor_group": "Demand",
"mi_score": 0,
"corr": 0
},
{
"feature": "vix_lag1",
"factor_group": "Risk_Geo",
"mi_score": 0,
"corr": 0
},
{
"feature": "vix_lag2",
"factor_group": "Risk_Geo",
"mi_score": 0.0661,
"corr": 0.1591
},
{
"feature": "mom1m_lag1",
"factor_group": "Technical",
"mi_score": 0.0341,
"corr": 0.0906
},
{
"feature": "hist_vol_12m",
"factor_group": "Technical",
"mi_score": 0.065,
"corr": 0.2184
},
{
"feature": "rsi12m",
"factor_group": "Technical",
"mi_score": 0.0901,
"corr": 0.2111
},
{
"feature": "news_oil_sentiment",
"factor_group": "Alternative",
"mi_score": 0.0,
"corr": 0.0311
},
{
"feature": "news_geo_tone",
"factor_group": "Alternative",
"mi_score": 0.0566,
"corr": -0.0511
},
{
"feature": "news_article_volume",
"factor_group": "Alternative",
"mi_score": 0.017,
"corr": 0.2619
}
],
"rejected_examples": [
"mom1m",
"bb_pct",
"mom3m",
"sota_ts_residual",
"mom6m",
"natgas_index",
"cpi_japan",
"hibor_3m",
"gdp_australia_yoy",
"bb_lower"
],
"file_inventory": [
{
"file": "commodities.csv",
"n_cols": 31,
"cols": [
"指标名称",
"国际现货价格:黄金",
"国际现货价格:白银",
"国际现货价格:铜(A级)",
"国际现货价格:铝",
"国际现货价格:铅",
"国际现货价格:锌",
"国际现货价格:镍",
"实际市场价:橡胶:全球:当月值",
"实际市场价:可可豆:全球:当月值"
]
},
{
"file": "commodity_extended.csv",
"n_cols": 38,
"cols": [
"natgas_index",
"agri_prices_橡胶",
"agri_prices_可可豆",
"agri_prices_罗布斯塔咖啡(豆)",
"agri_prices_大豆",
"agri_prices_大麦",
"agri_prices_燕麦",
"agri_prices_猪的肉",
"agri_prices_家禽",
"agri_prices_小麦"
]
},
{
"file": "financial_markets.csv",
"n_cols": 22,
"cols": [
"指标名称",
"CBOE:波动率指数(VIX):收盘",
"CBOE:波动性指数(VVIX):开盘",
"标准普尔500波动率指数(VIX)",
"指标名称.1",
"美元兑欧元",
"美元兑人民币",
"美元兑日元",
"美元兑瑞士法郎",
"美元兑英镑"
]
},
{
"file": "geopolitical_shocks.csv",
"n_cols": 27,
"cols": [
"death_of_king",
"pingjin",
"crisis_of_iran",
"turkey_to_iraq",
"us_to_iran",
"dep_of_dollar",
"finance_crisis",
"QE1",
"debt_of_pigs",
"QE2"
]
},
{
"file": "global_gdp.csv",
"n_cols": 14,
"cols": [
"gdp_china",
"gdp_china_growth",
"gdp_us",
"gdp_japan",
"gdp_germany",
"gdp_uk",
"gdp_france",
"gdp_australia",
"gdp_newzealand",
"gdp_india_latam_国内生产总值[亿]"
]
},
{
"file": "iea_energy_policy.csv",
"n_cols": 15,
"cols": [
"title",
"description",
"status",
"jurisdiction",
"source",
"datePromulgated",
"yearEnded",
"learnMore",
"learnMoreLanguage",
"dateModified"
]
},
{
"file": "live_market_data.csv",
"n_cols": 16,
"cols": [
"wti_futures_live",
"brent_futures_live",
"oil_vix_live",
"vix_live",
"uso_etf_live",
"xle_etf_live",
"xop_etf_live",
"gold_futures_live",
"natgas_futures_live",
"heating_oil_live"
]
},
{
"file": "llm_event_scores.csv",
"n_cols": 3,
"cols": [
"event_name",
"llm_intensity",
"llm_sentiment"
]
},
{
"file": "macro_economics.csv",
"n_cols": 41,
"cols": [
"指标名称",
"中国:制造业PMI",
"美国:ISM:制造业PMI",
"欧洲:制造业PMI",
"中国:非制造业PMI:服务业",
"美国:ISM:服务业PMI",
"美国:工业生产指数:非季调",
"欧洲:工业生产指数",
"中国:工业生产指数(生产加权):季调",
"美国:非农就业人数:季调:环比"
]
},
{
"file": "macro_extended.csv",
"n_cols": 20,
"cols": [
"cpi_china",
"pmi_china",
"ppi_china",
"pmi_china_nonsvc",
"cpi_us",
"pmi_us",
"ppi_us_同比",
"ppi_us_同比.1",
"pmi_us_svc",
"employment_us_季调"
]
},
{
"file": "oil_composite_features.csv",
"n_cols": 12,
"cols": [
"指标名称",
"us_oil_consumption",
"india_oil_consumption",
"china_oil_consumption",
"china_oil_import",
"china_refinery_yoy",
"global_reserves",
"murban_futures",
"rig_count_us_new",
"rig_count_opec_new"
]
},
{
"file": "oil_demand_by_product.csv",
"n_cols": 42,
"cols": [
"指标名称",
"石油需求量:欧洲四国:液化石油气:当月值",
"石油需求量:欧洲四国:汽油:当月值",
"Unnamed: 3",
"指标名称.1",
"石油需求量:欧洲四国:石脑油:当月值",
"Unnamed: 6",
"指标名称.2",
"石油需求量:欧洲四国:汽油:当月值.1",
"石油需求量:欧洲四国:航空煤油:当月值"
]
},
{
"file": "oil_futures_positioning.csv",
"n_cols": 39,
"cols": [
"指标名称",
"期货收盘价(连续):Murban原油:ICE",
"ICE:原油:期货和期权:净持仓多头前8名占比",
"ICE:原油:期货:掉期商:多头持仓:交易者数量",
"ICE:原油:期货:净持仓多头前4名占比",
"ICE:原油:期货和期权:掉期商:多头持仓",
"ICE:原油:期货:管理基金:多头持仓:持仓数量占比",
"ICE:原油:期货和期权:管理基金:套利持仓",
"ICE:原油:期货:掉期商:多头持仓",
"ICE:原油:期货和期权:管理基金:空头持仓:持仓数量占比"
]
},
{
"file": "oil_market_extended.csv",
"n_cols": 28,
"cols": [
"wti_futures_settlement",
"brent_futures_settlement",
"opec_basket",
"micex_brent",
"ice_brent",
"oil_futures_close",
"ice_positioning_1_ICE低硫轻质原油(WTI)期货期权持仓数量占比净持仓前8名多头",
"ice_positioning_1_ICE低硫轻质原油(WTI)期货期权持仓数量占比净持仓前4名多头",
"ice_positioning_1_ICE低硫轻质原油(WTI)期货期权持仓交易者数量报告头寸商业交易商多头",
"ice_positioning_1_ICE低硫轻质原油(WTI)期货期权持仓数量占比非报告头寸空头总计"
]
},
{
"file": "oil_prices.csv",
"n_cols": 23,
"cols": [
"指标名称",
"现货价:WTI原油",
"Unnamed: 2",
"Unnamed: 3",
"指标名称.1",
"现货价:Brent原油",
"Unnamed: 6",
"Unnamed: 7",
"指标名称.2",
"市场价(亚洲FOB):原油:阿曼"
]
}
]
}