File size: 1,868 Bytes
67e93c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9e6cc8a
67e93c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
"""
upload_data.py
--------------
Uploads the ODIN runtime data to Hugging Face Hub (run this ONCE as the repo owner).

Uploads:
  data/processed/      — cleaned DDR / WITSML / EDM CSVs
  data/knowledge_base/ — Volve history ChromaDB vector store
  data/viking_context/ — OpenViking ChromaDB vector store

Usage:
    huggingface-cli login          # authenticate first
    python scripts/upload_data.py

Requirements:
    pip install huggingface_hub
"""
import sys
from pathlib import Path

HF_REPO_ID = "KoopaK/OdinDB"
ROOT       = Path(__file__).parent.parent

UPLOAD_DIRS = [
    ROOT / "data" / "processed",
    ROOT / "data" / "knowledge_base",
    ROOT / "data" / "viking_context",
]

def main():
    try:
        from huggingface_hub import HfApi, create_repo
    except ImportError:
        print("huggingface_hub not installed. Run: pip install huggingface_hub")
        sys.exit(1)

    api = HfApi()

    # Create dataset repo if it doesn't exist
    try:
        create_repo(HF_REPO_ID, repo_type="dataset", exist_ok=True, private=False)
        print(f"Dataset repo ready: https://huggingface.co/datasets/{HF_REPO_ID}\n")
    except Exception as e:
        print(f"Repo creation warning (may already exist): {e}")

    for folder in UPLOAD_DIRS:
        if not folder.exists():
            print(f"Skipping {folder} (not found)")
            continue
        hf_path = folder.relative_to(ROOT)   # e.g. data/processed
        print(f"Uploading {folder}{hf_path} …")
        api.upload_folder(
            repo_id     = HF_REPO_ID,
            repo_type   = "dataset",
            folder_path = str(folder),
            path_in_repo= str(hf_path),
        )
        print(f"  ✓ {hf_path} uploaded\n")

    print("All done. Judges can now download with:")
    print("  python scripts/download_data.py")

if __name__ == "__main__":
    main()