Spaces:
Sleeping
Sleeping
| """ | |
| finetune_owlv2.py — CLI for fine-tuning OWLv2 on a COCO-format dataset. | |
| Usage: | |
| uv run python scripts/finetune_owlv2.py | |
| uv run python scripts/finetune_owlv2.py --epochs 20 --lr 5e-5 | |
| uv run python scripts/finetune_owlv2.py --unfreeze-vision --backbone-lr 1e-5 | |
| uv run python scripts/finetune_owlv2.py --resume models/owlv2-finetuned/checkpoint-epoch-005 | |
| Recommended hardware: | |
| CUDA (Windows/Linux) — use fp16 for speed, set --device cuda | |
| MPS (Apple Silicon) — fp32 only, slower but functional for small datasets | |
| CPU — very slow, only for tiny sanity-check runs | |
| Typical first run: | |
| 1. make export # build data/labeled/coco_export.json | |
| 2. make finetune # train with defaults | |
| 3. Update app.py to load from models/owlv2-finetuned/best | |
| """ | |
| from __future__ import annotations | |
| import logging | |
| from pathlib import Path | |
| import click | |
| import torch | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| from autolabel.config import settings | |
| from autolabel.finetune import FinetuneConfig, run_finetune | |
| from autolabel.utils import setup_logging | |
| PROJECT_ROOT = Path(__file__).resolve().parent.parent | |
| DEFAULT_OUTPUT = PROJECT_ROOT / "models" / "owlv2-finetuned" | |
| def main( | |
| coco_json: Path, | |
| image_dir: Path, | |
| output_dir: Path, | |
| model: str, | |
| epochs: int, | |
| batch_size: int, | |
| grad_accum: int, | |
| lr: float, | |
| val_split: float, | |
| warmup_steps: int, | |
| save_every: int, | |
| unfreeze_vision: bool, | |
| backbone_lr: float, | |
| resume: Path | None, | |
| device: str, | |
| verbose: bool, | |
| ) -> None: | |
| """Fine-tune OWLv2 on your labeled COCO dataset.""" | |
| setup_logging(logging.DEBUG if verbose else logging.INFO) | |
| dtype = torch.float16 if device == "cuda" else torch.float32 | |
| cfg = FinetuneConfig( | |
| coco_json=coco_json, | |
| image_dir=image_dir, | |
| output_dir=output_dir, | |
| model_name=model, | |
| device=device, | |
| torch_dtype=dtype, | |
| epochs=epochs, | |
| batch_size=batch_size, | |
| grad_accum_steps=grad_accum, | |
| lr=lr, | |
| backbone_lr=backbone_lr if unfreeze_vision else 0.0, | |
| val_split=val_split, | |
| warmup_steps=warmup_steps, | |
| save_every=save_every, | |
| unfreeze_vision=unfreeze_vision, | |
| resume_from=resume, | |
| ) | |
| click.echo(f"Fine-tuning OWLv2 on {coco_json}") | |
| click.echo(f" device : {device} ({dtype})") | |
| click.echo(f" epochs : {epochs}") | |
| click.echo(f" effective bs : {batch_size * grad_accum}") | |
| click.echo(f" heads lr : {lr}") | |
| click.echo(f" unfreeze ViT : {unfreeze_vision}") | |
| click.echo(f" output : {output_dir}") | |
| click.echo() | |
| run_finetune(cfg) | |
| if __name__ == "__main__": | |
| main() | |