Spaces:
Build error
Build error
| """ | |
| GitHub Actions 用: 从 World Bank 下载商品价格 Pink Sheet | |
| v3: 不依赖日期解析,直接保存所有数据 | |
| """ | |
| import pandas as pd | |
| import os | |
| OUTPUT = 'data/cloud' | |
| os.makedirs(OUTPUT, exist_ok=True) | |
| print("Fetching World Bank Pink Sheet...") | |
| url = "https://thedocs.worldbank.org/en/doc/5d903e848db1d1b83e0ec8f744e55570-0350012021/related/CMO-Historical-Data-Monthly.xlsx" | |
| try: | |
| xls = pd.ExcelFile(url, engine='openpyxl') | |
| print(f" Sheets: {xls.sheet_names}") | |
| # 读 Monthly Prices,跳过标题行 | |
| df = pd.read_excel(xls, sheet_name='Monthly Prices', skiprows=4, index_col=0) | |
| print(f" Raw: {df.shape}") | |
| print(f" Index samples: {list(df.index[:3])}, type: {type(df.index[0])}") | |
| # 日期在 index 里,可能是 "1960M01" 格式 | |
| # 直接保留原始 index,不做 to_datetime | |
| # 只保留有数值的行(去掉纯文字行) | |
| first_col = df.iloc[:, 0] | |
| numeric_mask = pd.to_numeric(first_col, errors='coerce').notna() | |
| df = df[numeric_mask] | |
| print(f" After numeric filter: {df.shape}") | |
| # 转数值 | |
| df_out = df.apply(pd.to_numeric, errors='coerce') | |
| df_out = df_out.dropna(how='all') | |
| outpath = os.path.join(OUTPUT, 'worldbank_commodities.csv') | |
| df_out.to_csv(outpath) | |
| print(f"✓ Saved {len(df_out)} rows × {len(df_out.columns)} cols") | |
| print(f" Last 3 rows index: {list(df_out.index[-3:])}") | |
| print(f" Columns: {list(df_out.columns[:10])}...") | |
| except Exception as e: | |
| print(f"✗ Failed: {e}") | |
| import traceback | |
| traceback.print_exc() | |