""" Push LUNA training code + config to a Hugging Face model repo. Uploads everything needed to run LoRA SFT on a GPU instance. Usage: HF_TOKEN=hf_xxx python push_code_to_hf.py """ import os from huggingface_hub import HfApi, create_repo HF_REPO = "ASTERIZER/LUNA-Training" TOKEN = os.environ.get("HF_TOKEN") FILES_TO_PUSH = [ # Core training scripts "sft_train.py", "lora_sft_train.py", "upload_lora_to_hf.py", "upload_full_sft_to_hf.py", "chat_full_sft.py", "train.py", "chat.py", "generate.py", # Configs "rag_mcp_lora_config.yaml", "rag_mcp_full_sft_config.yaml", "sft_config.yaml", "train_config.yaml", # Requirements "requirements.txt", # Validation / benchmarking "validate_sft.py", "check_sft_alignment.py", "validate_and_quantize.py", # Dataset builder "Base/Datasets/rag_mcp_sft/build_rag_mcp_sft_dataset.py", "Base/Datasets/rag_mcp_sft/push_to_hf.py", "Base/Datasets/rag_mcp_sft/BUILD_REPORT.md", "Base/Datasets/rag_mcp_sft/FINETUNE_COMMANDS.md", "Base/Datasets/rag_mcp_sft/README.md", "Base/Datasets/rag_mcp_sft/source_manifest.json", "Base/Datasets/rag_mcp_sft/sample_preview.json", # Tokenizer config (small files only) "Base/checkpoints/EleutherAI/pythia-160m/config.json", "Base/checkpoints/EleutherAI/pythia-160m/tokenizer_config.json", "Base/checkpoints/EleutherAI/pythia-160m/tokenizer.json", # Shell scripts "setup_and_sft.sh", "setup_and_train.sh", # GPU run script "gpu_train.sh", "gpu_full_sft.sh", # README "README.md", ] def main(): if not TOKEN: raise RuntimeError("Set HF_TOKEN environment variable") api = HfApi(token=TOKEN) create_repo( repo_id=HF_REPO, token=TOKEN, repo_type="model", exist_ok=True, private=False, ) print(f"Repo ready: https://huggingface.co/{HF_REPO}") pushed = 0 for fpath in FILES_TO_PUSH: if not os.path.exists(fpath): print(f" SKIP (not found): {fpath}") continue api.upload_file( path_or_fileobj=fpath, path_in_repo=fpath, repo_id=HF_REPO, token=TOKEN, ) print(f" OK: {fpath}") pushed += 1 print(f"\nPushed {pushed}/{len(FILES_TO_PUSH)} files to https://huggingface.co/{HF_REPO}") if __name__ == "__main__": main()