infy / scripts /download_lightweight_models.py
shourya
Update app docs, setup scripts, and utils
62a67da
#!/usr/bin/env python3
"""
Download lightweight/quantized models and save locally for git commit
Models will be stored in repo, no network needed during demos
"""
import os
import sys
from pathlib import Path
from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification
import shutil
print("=" * 70)
print("πŸ€— Downloading Lightweight Models for Local Storage")
print("=" * 70)
# Create models directory
MODELS_DIR = Path("models")
MODELS_DIR.mkdir(exist_ok=True)
# Use smaller/quantized models
MODELS = {
"sentiment": {
"model_id": "distilbert-base-uncased-finetuned-sst-2-english",
"desc": "Sentiment Analysis (DistilBERT - small & fast)"
},
"tokenizer": {
"model_id": "bert-base-uncased",
"desc": "Tokenizer (BERT base)"
},
}
print(f"\nπŸ“ Saving models to: {MODELS_DIR.absolute()}\n")
for name, config in MODELS.items():
model_id = config["model_id"]
desc = config["desc"]
try:
print(f"⏳ Downloading {desc}...")
model_path = MODELS_DIR / name
# Download and save locally
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSequenceClassification.from_pretrained(model_id)
# Save to local directory
tokenizer.save_pretrained(model_path / "tokenizer")
model.save_pretrained(model_path / "model")
# Calculate size
size_mb = sum(f.stat().st_size for f in model_path.rglob("*")) / (1024 * 1024)
print(f"βœ… {desc}: {size_mb:.1f} MB\n")
except Exception as e:
print(f"❌ Error downloading {name}: {str(e)}\n")
print("=" * 70)
print("βœ… Models downloaded!")
print("=" * 70)
# Calculate total size
total_size = sum(f.stat().st_size for f in MODELS_DIR.rglob("*")) / (1024 * 1024)
print(f"\nπŸ“Š Total size: {total_size:.1f} MB")
print(f"πŸ“ Location: {MODELS_DIR.absolute()}")
print(f"\nπŸ’‘ Next: Commit these models to git")
print(f" git add models/")
print(f" git commit -m 'Add pre-downloaded models for offline use'")
print(f" git push origin main")