ASTERIZER commited on
Commit
aba5a87
·
verified ·
1 Parent(s): aee6168

Upload upload_full_sft_to_hf.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. upload_full_sft_to_hf.py +57 -0
upload_full_sft_to_hf.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ from pathlib import Path
4
+
5
+ from huggingface_hub import HfApi
6
+
7
+
8
+ def parse_args():
9
+ parser = argparse.ArgumentParser(description="Upload a full SFT run folder to a Hugging Face model repo")
10
+ parser.add_argument("--repo-id", default="ASTERIZER/LUNA-100M")
11
+ parser.add_argument("--folder", default="Base/out/sft/rag_mcp_full_sft")
12
+ parser.add_argument("--path-in-repo", default="rag_mcp_full_sft")
13
+ parser.add_argument("--private", action="store_true")
14
+ return parser.parse_args()
15
+
16
+
17
+ def main():
18
+ args = parse_args()
19
+ token = os.environ.get("HF_TOKEN")
20
+ if not token:
21
+ raise RuntimeError("HF_TOKEN is required in the environment to upload the full SFT folder")
22
+
23
+ folder = Path(args.folder)
24
+ if not folder.exists():
25
+ raise FileNotFoundError(f"Full SFT output folder not found: {folder}")
26
+ if not folder.is_dir():
27
+ raise NotADirectoryError(f"Expected a folder, got: {folder}")
28
+
29
+ required_files = [folder / "final" / "model.pth", folder / "latest.pt"]
30
+ missing = [str(path) for path in required_files if not path.exists()]
31
+ if missing:
32
+ raise FileNotFoundError("Missing expected full SFT files: " + ", ".join(missing))
33
+
34
+ api = HfApi(token=token)
35
+ api.create_repo(
36
+ repo_id=args.repo_id,
37
+ repo_type="model",
38
+ private=args.private,
39
+ exist_ok=True,
40
+ )
41
+ api.upload_folder(
42
+ repo_id=args.repo_id,
43
+ repo_type="model",
44
+ folder_path=str(folder),
45
+ path_in_repo=args.path_in_repo,
46
+ )
47
+ print(
48
+ "uploaded_full_sft "
49
+ f"repo_id={args.repo_id} "
50
+ f"folder={folder} "
51
+ f"path_in_repo={args.path_in_repo} "
52
+ f"url=https://huggingface.co/{args.repo_id}/tree/main/{args.path_in_repo}"
53
+ )
54
+
55
+
56
+ if __name__ == "__main__":
57
+ main()