| """Local model loader β loads encoder models from local cache, falls back to HF. |
| |
| Model directories (saved via model.save_pretrained()): |
| dinov2-small/ β facebook/dinov2-small (21M params, 384-dim) vision |
| vit-base/ β google/vit-base-patch16-224 (86M, 768-dim) vision fallback |
| moonshine-base/ β UsefulSensors/moonshine-base (62M, 416-dim) audio |
| pig-vae/ β Wan2.1 VAE checkpoint (84M params) video latent codec |
| |
| Usage: |
| from arbitor.encoders.models import load_encoder, load_processor |
| |
| model = load_encoder("dinov2-small") |
| processor = load_processor("dinov2-small", "image") |
| |
| Download models: |
| python -m arbitor.encoders.models.download |
| """ |
| import os |
|
|
| _MODELS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__))) |
|
|
| |
| REGISTRY = { |
| "dinov2-small": { |
| "local": os.path.join(_MODELS_DIR, "dinov2-small"), |
| "hf": "facebook/dinov2-small", |
| "type": "auto", |
| }, |
| "vit-base": { |
| "local": os.path.join(_MODELS_DIR, "vit-base"), |
| "hf": "google/vit-base-patch16-224", |
| "type": "auto", |
| }, |
| "moonshine-base": { |
| "local": os.path.join(_MODELS_DIR, "moonshine-base"), |
| "hf": "UsefulSensors/moonshine-base", |
| "type": "auto", |
| }, |
| } |
|
|
|
|
| def resolve_path(name: str) -> tuple[str, dict]: |
| """Return (local_path_or_hf_name, registry_entry).""" |
| entry = REGISTRY.get(name) |
| if entry is None: |
| raise ValueError(f"Unknown model: {name}. Options: {list(REGISTRY.keys())}") |
| if os.path.isdir(entry["local"]): |
| return entry["local"], entry |
| return entry["hf"], entry |
|
|
|
|
| def load_encoder(name: str, device=None, **kwargs): |
| """Load model from local cache, falling back to HuggingFace. |
| |
| Args: |
| name: Short name ("dinov2-small", "vit-base", "moonshine-base") |
| device: Optional device to move model to (e.g. "cuda", "cpu") |
| Returns: |
| Loaded model in eval mode |
| """ |
| from transformers import AutoModel |
|
|
| path, entry = resolve_path(name) |
| model = AutoModel.from_pretrained(path, low_cpu_mem_usage=True, **kwargs) |
| model.eval() |
| if device: |
| model = model.to(device) |
| return model |
|
|
|
|
| def load_processor(name: str, modality: str = "image"): |
| """Load processor (image processor or feature extractor) from local cache. |
| |
| Args: |
| name: Short model name |
| modality: "image" for AutoImageProcessor, "audio" for AutoFeatureExtractor |
| Returns: |
| Processor instance |
| """ |
| path, _ = resolve_path(name) |
| if modality == "audio": |
| from transformers import AutoFeatureExtractor |
| return AutoFeatureExtractor.from_pretrained(path) |
| else: |
| from transformers import AutoImageProcessor |
| return AutoImageProcessor.from_pretrained(path) |
|
|