File size: 1,868 Bytes
67e93c9 9e6cc8a 67e93c9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 | """
upload_data.py
--------------
Uploads the ODIN runtime data to Hugging Face Hub (run this ONCE as the repo owner).
Uploads:
data/processed/ — cleaned DDR / WITSML / EDM CSVs
data/knowledge_base/ — Volve history ChromaDB vector store
data/viking_context/ — OpenViking ChromaDB vector store
Usage:
huggingface-cli login # authenticate first
python scripts/upload_data.py
Requirements:
pip install huggingface_hub
"""
import sys
from pathlib import Path
HF_REPO_ID = "KoopaK/OdinDB"
ROOT = Path(__file__).parent.parent
UPLOAD_DIRS = [
ROOT / "data" / "processed",
ROOT / "data" / "knowledge_base",
ROOT / "data" / "viking_context",
]
def main():
try:
from huggingface_hub import HfApi, create_repo
except ImportError:
print("huggingface_hub not installed. Run: pip install huggingface_hub")
sys.exit(1)
api = HfApi()
# Create dataset repo if it doesn't exist
try:
create_repo(HF_REPO_ID, repo_type="dataset", exist_ok=True, private=False)
print(f"Dataset repo ready: https://huggingface.co/datasets/{HF_REPO_ID}\n")
except Exception as e:
print(f"Repo creation warning (may already exist): {e}")
for folder in UPLOAD_DIRS:
if not folder.exists():
print(f"Skipping {folder} (not found)")
continue
hf_path = folder.relative_to(ROOT) # e.g. data/processed
print(f"Uploading {folder} → {hf_path} …")
api.upload_folder(
repo_id = HF_REPO_ID,
repo_type = "dataset",
folder_path = str(folder),
path_in_repo= str(hf_path),
)
print(f" ✓ {hf_path} uploaded\n")
print("All done. Judges can now download with:")
print(" python scripts/download_data.py")
if __name__ == "__main__":
main()
|