Spaces:

WHUCS-Jiaming
/

oilverse-api

Build error

oilverse-api / scripts /fetch_worldbank.py

孙家明

deploy: OilVerse for HuggingFace (Node.js 18 fix)

fab9847 24 days ago

1.57 kB

	"""
	GitHub Actions 用: 从 World Bank 下载商品价格 Pink Sheet
	v3: 不依赖日期解析，直接保存所有数据
	"""
	import pandas as pd
	import os

	OUTPUT = 'data/cloud'
	os.makedirs(OUTPUT, exist_ok=True)

	print("Fetching World Bank Pink Sheet...")
	url = "https://thedocs.worldbank.org/en/doc/5d903e848db1d1b83e0ec8f744e55570-0350012021/related/CMO-Historical-Data-Monthly.xlsx"

	try:
	xls = pd.ExcelFile(url, engine='openpyxl')
	print(f" Sheets: {xls.sheet_names}")

	# 读 Monthly Prices，跳过标题行
	df = pd.read_excel(xls, sheet_name='Monthly Prices', skiprows=4, index_col=0)
	print(f" Raw: {df.shape}")
	print(f" Index samples: {list(df.index[:3])}, type: {type(df.index[0])}")

	# 日期在 index 里，可能是 "1960M01" 格式
	# 直接保留原始 index，不做 to_datetime
	# 只保留有数值的行（去掉纯文字行）
	first_col = df.iloc[:, 0]
	numeric_mask = pd.to_numeric(first_col, errors='coerce').notna()
	df = df[numeric_mask]
	print(f" After numeric filter: {df.shape}")

	# 转数值
	df_out = df.apply(pd.to_numeric, errors='coerce')
	df_out = df_out.dropna(how='all')

	outpath = os.path.join(OUTPUT, 'worldbank_commodities.csv')
	df_out.to_csv(outpath)
	print(f"✓ Saved {len(df_out)} rows × {len(df_out.columns)} cols")
	print(f" Last 3 rows index: {list(df_out.index[-3:])}")
	print(f" Columns: {list(df_out.columns[:10])}...")

	except Exception as e:
	print(f"✗ Failed: {e}")
	import traceback
	traceback.print_exc()