LabelPlayground / scripts /finetune_owlv2.py
Erick
Upload folder using huggingface_hub
47cb9bd verified
"""
finetune_owlv2.py — CLI for fine-tuning OWLv2 on a COCO-format dataset.
Usage:
uv run python scripts/finetune_owlv2.py
uv run python scripts/finetune_owlv2.py --epochs 20 --lr 5e-5
uv run python scripts/finetune_owlv2.py --unfreeze-vision --backbone-lr 1e-5
uv run python scripts/finetune_owlv2.py --resume models/owlv2-finetuned/checkpoint-epoch-005
Recommended hardware:
CUDA (Windows/Linux) — use fp16 for speed, set --device cuda
MPS (Apple Silicon) — fp32 only, slower but functional for small datasets
CPU — very slow, only for tiny sanity-check runs
Typical first run:
1. make export # build data/labeled/coco_export.json
2. make finetune # train with defaults
3. Update app.py to load from models/owlv2-finetuned/best
"""
from __future__ import annotations
import logging
from pathlib import Path
import click
import torch
from dotenv import load_dotenv
load_dotenv()
from autolabel.config import settings
from autolabel.finetune import FinetuneConfig, run_finetune
from autolabel.utils import setup_logging
PROJECT_ROOT = Path(__file__).resolve().parent.parent
DEFAULT_OUTPUT = PROJECT_ROOT / "models" / "owlv2-finetuned"
@click.command()
@click.option(
"--coco-json",
default=str(settings.labeled_dir / "coco_export.json"),
show_default=True,
type=click.Path(exists=True, path_type=Path),
help="COCO JSON file produced by `make export`.",
)
@click.option(
"--image-dir",
default=str(settings.raw_dir),
show_default=True,
type=click.Path(exists=True, file_okay=False, path_type=Path),
help="Directory containing the source images (matched by file_name in COCO JSON).",
)
@click.option(
"--output-dir",
default=str(DEFAULT_OUTPUT),
show_default=True,
type=click.Path(file_okay=False, path_type=Path),
help="Directory to save checkpoints and the best model.",
)
@click.option("--model", default=settings.model, show_default=True,
help="Base model to fine-tune.")
@click.option("--epochs", default=10, show_default=True, type=int)
@click.option("--batch-size", default=1, show_default=True, type=int,
help="Images per forward pass. Keep at 1 for OWLv2-large on ≤8 GB VRAM.")
@click.option("--grad-accum", default=4, show_default=True, type=int,
help="Gradient accumulation steps. Effective batch = batch_size * grad_accum.")
@click.option("--lr", default=1e-4, show_default=True, type=float,
help="Learning rate for detection heads.")
@click.option("--val-split", default=0.2, show_default=True, type=float,
help="Fraction of data to use for validation.")
@click.option("--warmup-steps", default=50, show_default=True, type=int)
@click.option("--save-every", default=1, show_default=True, type=int,
help="Save a checkpoint every N epochs.")
@click.option(
"--unfreeze-vision", is_flag=True, default=False,
help="Also fine-tune the ViT image encoder (needs more VRAM, slower).",
)
@click.option(
"--backbone-lr", default=1e-5, show_default=True, type=float,
help="LR for the vision encoder when --unfreeze-vision is set.",
)
@click.option(
"--resume",
default=None,
type=click.Path(path_type=Path),
help="Path to a saved checkpoint to resume from.",
)
@click.option(
"--device",
default=settings.device,
show_default=True,
help="Torch device: cuda | mps | cpu.",
)
@click.option("--verbose", "-v", is_flag=True, default=False)
def main(
coco_json: Path,
image_dir: Path,
output_dir: Path,
model: str,
epochs: int,
batch_size: int,
grad_accum: int,
lr: float,
val_split: float,
warmup_steps: int,
save_every: int,
unfreeze_vision: bool,
backbone_lr: float,
resume: Path | None,
device: str,
verbose: bool,
) -> None:
"""Fine-tune OWLv2 on your labeled COCO dataset."""
setup_logging(logging.DEBUG if verbose else logging.INFO)
dtype = torch.float16 if device == "cuda" else torch.float32
cfg = FinetuneConfig(
coco_json=coco_json,
image_dir=image_dir,
output_dir=output_dir,
model_name=model,
device=device,
torch_dtype=dtype,
epochs=epochs,
batch_size=batch_size,
grad_accum_steps=grad_accum,
lr=lr,
backbone_lr=backbone_lr if unfreeze_vision else 0.0,
val_split=val_split,
warmup_steps=warmup_steps,
save_every=save_every,
unfreeze_vision=unfreeze_vision,
resume_from=resume,
)
click.echo(f"Fine-tuning OWLv2 on {coco_json}")
click.echo(f" device : {device} ({dtype})")
click.echo(f" epochs : {epochs}")
click.echo(f" effective bs : {batch_size * grad_accum}")
click.echo(f" heads lr : {lr}")
click.echo(f" unfreeze ViT : {unfreeze_vision}")
click.echo(f" output : {output_dir}")
click.echo()
run_finetune(cfg)
if __name__ == "__main__":
main()