| """ |
| upload_data.py |
| -------------- |
| Uploads the ODIN runtime data to Hugging Face Hub (run this ONCE as the repo owner). |
| |
| Uploads: |
| data/processed/ — cleaned DDR / WITSML / EDM CSVs |
| data/knowledge_base/ — Volve history ChromaDB vector store |
| data/viking_context/ — OpenViking ChromaDB vector store |
| |
| Usage: |
| huggingface-cli login # authenticate first |
| python scripts/upload_data.py |
| |
| Requirements: |
| pip install huggingface_hub |
| """ |
| import sys |
| from pathlib import Path |
|
|
| HF_REPO_ID = "KoopaK/OdinDB" |
| ROOT = Path(__file__).parent.parent |
|
|
| UPLOAD_DIRS = [ |
| ROOT / "data" / "processed", |
| ROOT / "data" / "knowledge_base", |
| ROOT / "data" / "viking_context", |
| ] |
|
|
| def main(): |
| try: |
| from huggingface_hub import HfApi, create_repo |
| except ImportError: |
| print("huggingface_hub not installed. Run: pip install huggingface_hub") |
| sys.exit(1) |
|
|
| api = HfApi() |
|
|
| |
| try: |
| create_repo(HF_REPO_ID, repo_type="dataset", exist_ok=True, private=False) |
| print(f"Dataset repo ready: https://huggingface.co/datasets/{HF_REPO_ID}\n") |
| except Exception as e: |
| print(f"Repo creation warning (may already exist): {e}") |
|
|
| for folder in UPLOAD_DIRS: |
| if not folder.exists(): |
| print(f"Skipping {folder} (not found)") |
| continue |
| hf_path = folder.relative_to(ROOT) |
| print(f"Uploading {folder} → {hf_path} …") |
| api.upload_folder( |
| repo_id = HF_REPO_ID, |
| repo_type = "dataset", |
| folder_path = str(folder), |
| path_in_repo= str(hf_path), |
| ) |
| print(f" ✓ {hf_path} uploaded\n") |
|
|
| print("All done. Judges can now download with:") |
| print(" python scripts/download_data.py") |
|
|
| if __name__ == "__main__": |
| main() |
|
|