oilverse-api / scripts /fetch_cftc.py
孙家明
deploy: OilVerse for HuggingFace (Node.js 18 fix)
fab9847
"""
GitHub Actions 用: 从 CFTC 下载 COT 持仓报告
"""
import pandas as pd
import os, io, zipfile, requests
OUTPUT = 'data/cloud'
os.makedirs(OUTPUT, exist_ok=True)
print("Fetching CFTC COT reports...")
# 当年 + 去年数据
years = [2025, 2026]
all_data = []
for year in years:
url = f"https://www.cftc.gov/files/dea/history/fut_disagg_xls_{year}.zip"
try:
resp = requests.get(url, timeout=60)
if resp.status_code == 200:
z = zipfile.ZipFile(io.BytesIO(resp.content))
for fname in z.namelist():
if fname.endswith('.xls') or fname.endswith('.xlsx'):
df = pd.read_excel(z.open(fname))
# 筛选原油相关
oil = df[df['Market_and_Exchange_Names'].str.contains(
'CRUDE OIL|PETROLEUM|BRENT|WTI', case=False, na=False)]
if len(oil) > 0:
all_data.append(oil)
print(f" ✓ {year}: {len(oil)} rows from {fname}")
except Exception as e:
print(f" ✗ {year}: {str(e)[:50]}")
if all_data:
df_all = pd.concat(all_data, ignore_index=True)
outpath = os.path.join(OUTPUT, 'cftc_positioning.csv')
df_all.to_csv(outpath, index=False)
print(f"✓ Saved {len(df_all)} rows to {outpath}")
else:
print("✗ No CFTC data fetched")