Odin / scripts /upload_data.py
ODIN
Update HF dataset repo ID to KoopaK/OdinDB
9e6cc8a
"""
upload_data.py
--------------
Uploads the ODIN runtime data to Hugging Face Hub (run this ONCE as the repo owner).
Uploads:
data/processed/ — cleaned DDR / WITSML / EDM CSVs
data/knowledge_base/ — Volve history ChromaDB vector store
data/viking_context/ — OpenViking ChromaDB vector store
Usage:
huggingface-cli login # authenticate first
python scripts/upload_data.py
Requirements:
pip install huggingface_hub
"""
import sys
from pathlib import Path
HF_REPO_ID = "KoopaK/OdinDB"
ROOT = Path(__file__).parent.parent
UPLOAD_DIRS = [
ROOT / "data" / "processed",
ROOT / "data" / "knowledge_base",
ROOT / "data" / "viking_context",
]
def main():
try:
from huggingface_hub import HfApi, create_repo
except ImportError:
print("huggingface_hub not installed. Run: pip install huggingface_hub")
sys.exit(1)
api = HfApi()
# Create dataset repo if it doesn't exist
try:
create_repo(HF_REPO_ID, repo_type="dataset", exist_ok=True, private=False)
print(f"Dataset repo ready: https://huggingface.co/datasets/{HF_REPO_ID}\n")
except Exception as e:
print(f"Repo creation warning (may already exist): {e}")
for folder in UPLOAD_DIRS:
if not folder.exists():
print(f"Skipping {folder} (not found)")
continue
hf_path = folder.relative_to(ROOT) # e.g. data/processed
print(f"Uploading {folder}{hf_path} …")
api.upload_folder(
repo_id = HF_REPO_ID,
repo_type = "dataset",
folder_path = str(folder),
path_in_repo= str(hf_path),
)
print(f" ✓ {hf_path} uploaded\n")
print("All done. Judges can now download with:")
print(" python scripts/download_data.py")
if __name__ == "__main__":
main()