"""
extract_head.py
===============
Run this ONCE on your local machine (where torch is installed):

    cd D:\CoE\deploy
    python extract_head.py

Reads best_model_phase1.pt (1.1 GB) and saves ONLY the fine-tuned layers:
  - fusion.*          (attention + FFN + norms)     ~12 MB
  - classifier.*      (final classification head)
  - uncertainty_head.*
  - *_proj.*          (lightweight projection adapters)

These total ~25 MB — well within HF's 1 GB limit.
The four backbone encoders (CLIP, ViT, ResNet, EfficientNet) are NOT saved
because app.py downloads them from HF Hub at runtime for free.
"""

import torch, os

CHECKPOINT = os.path.join(
    os.path.dirname(__file__),
    "..", "universal_vision_checkpoints", "best_model_phase1.pt"
)
OUTPUT = os.path.join(os.path.dirname(__file__), "head_weights.pt")

print(f"Loading: {os.path.abspath(CHECKPOINT)}")
ckpt = torch.load(CHECKPOINT, map_location="cpu", weights_only=False)
state = ckpt.get("model_state_dict", ckpt)

# These are the BACKBONE prefixes — we drop them (loaded from HF Hub instead)
BACKBONE_PREFIXES = ("clip_model.", "vit.", "resnet.", "efficientnet.")

head_state = {
    k: v for k, v in state.items()
    if not any(k.startswith(p) for p in BACKBONE_PREFIXES)
}

total_mb = sum(v.numel() * v.element_size() for v in state.values()) / 1024**2
head_mb  = sum(v.numel() * v.element_size() for v in head_state.values()) / 1024**2

print(f"\nFull checkpoint : {total_mb:.1f} MB  ({len(state)} tensors)")
print(f"Head only       : {head_mb:.2f} MB  ({len(head_state)} tensors)")
print("\nSaved keys:")
for k, v in head_state.items():
    kb = v.numel() * v.element_size() / 1024
    print(f"  {k:55s}  {str(tuple(v.shape)):25s}  {kb:.1f} KB")

torch.save({"model_state_dict": head_state}, OUTPUT)
print(f"\n✅ Saved to: {os.path.abspath(OUTPUT)}")
print(f"   Size: {os.path.getsize(OUTPUT)/1024**2:.2f} MB")
print("\nNext step: push head_weights.pt to your HF Space repo (no LFS needed).")