CAJAL-4B / upload_to_hf.py
Agnuxo's picture
Add upload_to_hf.py
574b462 verified
#!/usr/bin/env python3
"""
Upload CAJAL-4B models and documentation to HuggingFace.
No emojis in console output (Windows compatibility).
"""
import os, sys, subprocess, json
from pathlib import Path
# ─── Config ──────────────────────────────────────────────────────────────
HF_TOKEN = os.environ.get("HF_TOKEN") or (sys.argv[1] if len(sys.argv)>1 else None)
HF_REPO_ID = "Agnuxo/CAJAL-4B"
MODEL_DIR = Path(r"D:\PROJECTS\CAJAL\outputs\CAJAL-4B")
# ──────────────────────────────────────────────────────────────────────────
def ensure_hf_hub():
try:
from huggingface_hub import HfApi
return HfApi
except ImportError:
print("[INSTALL] Installing huggingface_hub...")
subprocess.check_call([sys.executable, "-m", "pip", "install", "huggingface_hub", "-q"])
from huggingface_hub import HfApi
return HfApi
def main():
global HF_TOKEN
if not HF_TOKEN:
print("ERROR: No HF_TOKEN provided.")
print("Usage: python upload_to_hf.py --token YOUR_HF_TOKEN")
print(" or: set HF_TOKEN env var then python upload_to_hf.py")
sys.exit(1)
HfApi = ensure_hf_hub()
api = HfApi(token=HF_TOKEN)
# 1. Create repo (skip if already exists)
# Repo already created manually - use repo_info instead
print("[PACKAGE] Accessing repo:", HF_REPO_ID)
try:
api.create_repo(repo_id=HF_REPO_ID, repo_type="model", exist_ok=True)
except Exception as e:
# Ignore 401 if repo already exists (we verified it above)
if "401" in str(e) or "Unauthorized" in str(e):
print("[OK] Repo already exists (proceeding with uploads)")
else:
print("[WARN] Repo check:", e)
# 2. README (Model Card)
readme = MODEL_DIR / "README.md"
if readme.exists():
print("[UPLOAD] README.md...")
try:
api.upload_file(
path_or_fileobj=str(readme),
path_in_repo="README.md",
repo_id=HF_REPO_ID,
repo_type="model",
commit_message="Add professional Model Card with harness results",
)
print("[OK] README.md uploaded")
except Exception as e:
print("[ERROR] README upload failed:", e)
else:
print("[WARN] README.md not found β€” skipping")
# 3. Model files (GGUF)
model_files = [
("CAJAL-4B-f16.gguf", "Full precision FP16"),
("CAJAL-4B-q8_0.gguf", "8-bit quantization"),
("CAJAL-4B-q4_k_m.gguf", "4-bit q4_k_m quantization"),
]
for fname, desc in model_files:
fpath = MODEL_DIR / fname
if not fpath.exists():
print(f"[WARN] Missing {fname} β€” skipping")
continue
size_mb = fpath.stat().st_size / (1024*1024)
print(f"[UPLOAD] {fname} ({size_mb:.1f} MB) β€” {desc}")
try:
api.upload_file(
path_or_fileobj=str(fpath),
path_in_repo=fname,
repo_id=HF_REPO_ID,
repo_type="model",
commit_message=f"Upload {fname}: {desc}",
)
print(f"[OK] {fname} uploaded")
except Exception as e:
print(f"[ERROR] {fname} failed: {e}")
# 4. Harness & results (reproducibility)
aux_files = [
("harness.py", "Production paper-generation harness (fixed)"),
("harness_results.jsonl", "Results log"),
("harness_best.json", "Best paper record (score 7.0)"),
("analyze_topics.py", "Topic overlap analysis"),
("publish_hf.py", "HF publication script"),
("upload_to_hf.py", "Simple uploader (this script)"),
]
for fname, desc in aux_files:
fpath = MODEL_DIR / fname
if fpath.exists():
print(f"[UPLOAD] {fname} β€” {desc}")
try:
api.upload_file(
path_or_fileobj=str(fpath),
path_in_repo=fname,
repo_id=HF_REPO_ID,
repo_type="model",
commit_message=f"Add {fname}: {desc}",
)
print(f"[OK] {fname} uploaded")
except Exception as e:
print(f"[WARN] {fname} skipped: {e}")
# 5. Docs directory
docs_dir = MODEL_DIR / "docs"
if docs_dir.exists():
for f in docs_dir.iterdir():
if f.is_file():
print(f"[UPLOAD] docs/{f.name}")
try:
api.upload_file(
path_or_fileobj=str(f),
path_in_repo=f"docs/{f.name}",
repo_id=HF_REPO_ID,
repo_type="model",
commit_message=f"Add docs/{f.name}",
)
print(f"[OK] docs/{f.name} uploaded")
except Exception as e:
print(f"[WARN] docs/{f.name} skipped: {e}")
print("\n[COMPLETE] Publication finished!")
print("URL: https://huggingface.co/" + HF_REPO_ID)
print("GitHub: https://github.com/Agnuxo1/CAJAL")
if __name__ == "__main__":
main()