feat: created script to push datasets to huggingface
Browse files- scripts/push_dataset.py +29 -2
scripts/push_dataset.py
CHANGED
|
@@ -5,9 +5,28 @@ import os
|
|
| 5 |
import tempfile
|
| 6 |
from pathlib import Path
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
from huggingface_hub import HfApi
|
| 9 |
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
def push_dataset(jsonl_path: str, repo_id: str, token: str):
|
| 12 |
api = HfApi(token=token)
|
| 13 |
api.create_repo(
|
|
@@ -84,8 +103,16 @@ if __name__ == "__main__":
|
|
| 84 |
parser = argparse.ArgumentParser()
|
| 85 |
parser.add_argument("--data", default="data/episodes_v2.jsonl")
|
| 86 |
parser.add_argument("--repo", default="sh4shv4t/parlay-episodes")
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
args = parser.parse_args()
|
| 89 |
if not args.token:
|
| 90 |
-
raise ValueError(
|
|
|
|
|
|
|
|
|
|
| 91 |
push_dataset(args.data, args.repo, args.token)
|
|
|
|
| 5 |
import tempfile
|
| 6 |
from pathlib import Path
|
| 7 |
|
| 8 |
+
# Load .env from project root (same pattern as the rest of the app)
|
| 9 |
+
_ROOT = Path(__file__).resolve().parents[1]
|
| 10 |
+
try:
|
| 11 |
+
from dotenv import load_dotenv
|
| 12 |
+
|
| 13 |
+
load_dotenv(_ROOT / ".env")
|
| 14 |
+
load_dotenv(_ROOT / ".env.local")
|
| 15 |
+
except ImportError:
|
| 16 |
+
pass
|
| 17 |
+
|
| 18 |
from huggingface_hub import HfApi
|
| 19 |
|
| 20 |
|
| 21 |
+
def _resolve_hf_token() -> str | None:
|
| 22 |
+
"""HF_TOKEN, HUGGING_FACE_HUB_TOKEN, or huggingface-cli default (hub reads env)."""
|
| 23 |
+
for key in ("HF_TOKEN", "HUGGING_FACE_HUB_TOKEN"):
|
| 24 |
+
v = os.environ.get(key)
|
| 25 |
+
if v and str(v).strip():
|
| 26 |
+
return str(v).strip()
|
| 27 |
+
return None
|
| 28 |
+
|
| 29 |
+
|
| 30 |
def push_dataset(jsonl_path: str, repo_id: str, token: str):
|
| 31 |
api = HfApi(token=token)
|
| 32 |
api.create_repo(
|
|
|
|
| 103 |
parser = argparse.ArgumentParser()
|
| 104 |
parser.add_argument("--data", default="data/episodes_v2.jsonl")
|
| 105 |
parser.add_argument("--repo", default="sh4shv4t/parlay-episodes")
|
| 106 |
+
default_tok = _resolve_hf_token()
|
| 107 |
+
parser.add_argument(
|
| 108 |
+
"--token",
|
| 109 |
+
default=default_tok,
|
| 110 |
+
help="Hugging Face token (or set HF_TOKEN / HUGGING_FACE_HUB_TOKEN, or .env)",
|
| 111 |
+
)
|
| 112 |
args = parser.parse_args()
|
| 113 |
if not args.token:
|
| 114 |
+
raise ValueError(
|
| 115 |
+
"No Hugging Face token found. Add HF_TOKEN=... to .env in the project root, "
|
| 116 |
+
"export HF_TOKEN (or HUGGING_FACE_HUB_TOKEN), or pass --token."
|
| 117 |
+
)
|
| 118 |
push_dataset(args.data, args.repo, args.token)
|