| """ |
| Comprehensive script to publish model and codebase to Hugging Face Hub |
| """ |
| import argparse |
| import os |
| import sys |
| from pathlib import Path |
| from huggingface_hub import HfApi, create_repo, upload_folder, upload_file |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
|
| |
| sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) |
|
|
|
|
| def publish_to_hub( |
| model_path: str, |
| repo_id: str, |
| private: bool = False, |
| upload_code: bool = True, |
| upload_model: bool = True |
| ): |
| """ |
| Publish model and codebase to Hugging Face Hub. |
| |
| Args: |
| model_path: Path to the trained model |
| repo_id: Full repository ID (e.g., "username/repo-name") |
| private: Whether to make the repository private |
| upload_code: Whether to upload code files |
| upload_model: Whether to upload the model |
| """ |
| print("=" * 70) |
| print("Publishing to Hugging Face Hub") |
| print("=" * 70) |
| print(f"\nRepository: {repo_id}") |
| print(f"Private: {private}") |
| print(f"Upload Model: {upload_model}") |
| print(f"Upload Code: {upload_code}") |
| |
| api = HfApi() |
| |
| |
| print("\n[1/4] Creating/verifying repository...") |
| try: |
| create_repo( |
| repo_id=repo_id, |
| repo_type="model", |
| exist_ok=True, |
| private=private |
| ) |
| print(f"✓ Repository ready: {repo_id}") |
| except Exception as e: |
| print(f"✗ Error creating repository: {e}") |
| print("\nMake sure you're logged in:") |
| print(" huggingface-cli login") |
| return False |
| |
| |
| if upload_model: |
| print("\n[2/4] Uploading model and tokenizer...") |
| try: |
| if not os.path.exists(model_path): |
| print(f"✗ Model path not found: {model_path}") |
| print(" Skipping model upload. You can upload it later.") |
| else: |
| tokenizer = AutoTokenizer.from_pretrained(model_path) |
| model = AutoModelForSequenceClassification.from_pretrained(model_path) |
| |
| model.push_to_hub(repo_id) |
| tokenizer.push_to_hub(repo_id) |
| print("✓ Model and tokenizer uploaded") |
| except Exception as e: |
| print(f"✗ Error uploading model: {e}") |
| print(" You can upload the model separately later.") |
| else: |
| print("\n[2/4] Skipping model upload (--no-model flag)") |
| |
| |
| if upload_code: |
| print("\n[3/4] Uploading code files...") |
| try: |
| repo_root = Path(__file__).parent.parent |
| |
| |
| code_files = [ |
| "train.py", |
| "inference.py", |
| "config.yaml", |
| "requirements.txt", |
| "setup.py", |
| "README.md", |
| "MODEL_CARD.md", |
| "LICENSE", |
| ".gitignore" |
| ] |
| |
| |
| code_dirs = [ |
| "src", |
| "scripts" |
| ] |
| |
| uploaded_count = 0 |
| |
| |
| for file_name in code_files: |
| file_path = repo_root / file_name |
| if file_path.exists(): |
| try: |
| upload_file( |
| path_or_fileobj=str(file_path), |
| path_in_repo=file_name, |
| repo_id=repo_id, |
| repo_type="model" |
| ) |
| print(f" ✓ Uploaded {file_name}") |
| uploaded_count += 1 |
| except Exception as e: |
| print(f" âš Could not upload {file_name}: {e}") |
| |
| |
| for dir_name in code_dirs: |
| dir_path = repo_root / dir_name |
| if dir_path.exists() and dir_path.is_dir(): |
| try: |
| upload_folder( |
| folder_path=str(dir_path), |
| path_in_repo=dir_name, |
| repo_id=repo_id, |
| repo_type="model", |
| ignore_patterns=["__pycache__", "*.pyc", ".DS_Store"] |
| ) |
| print(f" ✓ Uploaded {dir_name}/") |
| uploaded_count += 1 |
| except Exception as e: |
| print(f" âš Could not upload {dir_name}/: {e}") |
| |
| print(f"\n✓ Uploaded {uploaded_count} code files/directories") |
| |
| except Exception as e: |
| print(f"✗ Error uploading code: {e}") |
| else: |
| print("\n[3/4] Skipping code upload (--no-code flag)") |
| |
| |
| print("\n[4/4] Publishing complete!") |
| print("\n" + "=" * 70) |
| print("Success! 🎉") |
| print("=" * 70) |
| print(f"\nYour model is now available at:") |
| print(f"https://huggingface.co/{repo_id}") |
| |
| if upload_model: |
| print("\nTo use your model:") |
| print(f""" |
| from transformers import pipeline |
| |
| classifier = pipeline("text-classification", model="{repo_id}") |
| |
| # Classify a comment |
| result = classifier("This function uses dynamic programming for O(n) time complexity") |
| print(result) |
| """) |
| |
| return True |
|
|
|
|
| if __name__ == "__main__": |
| parser = argparse.ArgumentParser( |
| description="Publish model and codebase to Hugging Face Hub", |
| formatter_class=argparse.RawDescriptionHelpFormatter, |
| epilog=""" |
| Examples: |
| # Publish everything (model + code) |
| python scripts/publish_to_hub.py --repo-id Snaseem2026/code-comment-classifier |
| |
| # Publish only code (no model) |
| python scripts/publish_to_hub.py --repo-id Snaseem2026/code-comment-classifier --no-model |
| |
| # Publish only model (no code) |
| python scripts/publish_to_hub.py --repo-id Snaseem2026/code-comment-classifier --no-code |
| |
| # Private repository |
| python scripts/publish_to_hub.py --repo-id Snaseem2026/code-comment-classifier --private |
| """ |
| ) |
| parser.add_argument( |
| "--model-path", |
| type=str, |
| default="./results/final_model", |
| help="Path to the trained model" |
| ) |
| parser.add_argument( |
| "--repo-id", |
| type=str, |
| default="Snaseem2026/code-comment-classifier", |
| help="Full repository ID (e.g., 'username/repo-name')" |
| ) |
| parser.add_argument( |
| "--private", |
| action="store_true", |
| help="Make the repository private" |
| ) |
| parser.add_argument( |
| "--no-code", |
| action="store_true", |
| help="Skip uploading code files" |
| ) |
| parser.add_argument( |
| "--no-model", |
| action="store_true", |
| help="Skip uploading model files" |
| ) |
| parser.add_argument( |
| "--yes", |
| action="store_true", |
| help="Skip confirmation prompt" |
| ) |
| |
| args = parser.parse_args() |
| |
| print("\n" + "=" * 70) |
| print("Hugging Face Hub Publishing") |
| print("=" * 70) |
| print("\nBefore publishing, make sure you:") |
| print("1. Have a Hugging Face account") |
| print("2. Are logged in: huggingface-cli login") |
| print("3. Have reviewed MODEL_CARD.md and README.md") |
| print(f"4. Model path exists: {args.model_path} ({'✓' if os.path.exists(args.model_path) else '✗'})") |
| |
| if not args.yes: |
| print("\n" + "=" * 70) |
| response = input(f"\nProceed with publishing to {args.repo_id}? (yes/no): ") |
| if response.lower() not in ['yes', 'y']: |
| print("Publishing cancelled.") |
| sys.exit(0) |
| |
| success = publish_to_hub( |
| model_path=args.model_path, |
| repo_id=args.repo_id, |
| private=args.private, |
| upload_code=not args.no_code, |
| upload_model=not args.no_model |
| ) |
| |
| if not success: |
| sys.exit(1) |
|
|