oilverse-api / scripts /fetch_census.py
孙家明
deploy: OilVerse for HuggingFace (Node.js 18 fix)
fab9847
"""
GitHub Actions 用: 从 US Census Bureau 下载石油贸易数据
v3: 使用正确的 API endpoint
"""
import pandas as pd
import requests, os
OUTPUT = 'data/cloud'
os.makedirs(OUTPUT, exist_ok=True)
print("Fetching US Census trade data...")
trades = []
# Census International Trade API - 正确参数名
# Docs: https://api.census.gov/data/timeseries/intltrade/exports/hs.html
for flow, val_field in [('exports', 'ALL_VAL_MO'), ('imports', 'GEN_VAL_MO')]:
url = f"https://api.census.gov/data/timeseries/intltrade/{flow}/hs"
# 先查可用变量
try:
var_resp = requests.get(f"{url}/variables.json", timeout=10)
if var_resp.status_code == 200:
vars_data = var_resp.json()
avail = list(vars_data.get('variables', {}).keys())[:10]
print(f" {flow} available vars: {avail}")
except:
pass
# 尝试不同参数组合
param_sets = [
{'get': f'CTY_CODE,CTY_NAME,{val_field}', 'COMM_LVL': 'HS6', 'I_COMMODITY': '270900', 'time': 'from 2024-01'},
{'get': f'CTY_CODE,CTY_NAME,{val_field}', 'COMM_LVL': 'HS4', 'I_COMMODITY': '2709', 'time': 'from 2024-01'},
{'get': 'CTY_CODE,CTY_NAME,GEN_VAL_MO', 'COMM_LVL': 'HS2', 'I_COMMODITY': '27', 'time': 'from 2024-01'},
]
for i, params in enumerate(param_sets):
try:
resp = requests.get(url, params=params, timeout=30)
if resp.status_code == 200:
data = resp.json()
if len(data) > 1:
df = pd.DataFrame(data[1:], columns=data[0])
df['flow'] = flow
trades.append(df)
print(f" ✓ {flow} attempt {i+1}: {len(df)} rows")
break
else:
if i == 0: # only log first failure
print(f" ✗ {flow} attempt {i+1}: HTTP {resp.status_code} - {resp.text[:100]}")
except Exception as e:
print(f" ✗ {flow} attempt {i+1}: {str(e)[:50]}")
if trades:
df_all = pd.concat(trades, ignore_index=True)
outpath = os.path.join(OUTPUT, 'census_oil_trade.csv')
df_all.to_csv(outpath, index=False)
print(f"\n✓ Saved {len(df_all)} rows to {outpath}")
else:
pd.DataFrame().to_csv(os.path.join(OUTPUT, 'census_oil_trade.csv'))
print("\n⚠ No trade data, saved empty placeholder")