Spaces:

bytestream89
/

LabelPlayground

Sleeping

LabelPlayground / scripts /finetune_owlv2.py

Erick

Upload folder using huggingface_hub

47cb9bd verified about 2 months ago

5.02 kB

	"""
	finetune_owlv2.py — CLI for fine-tuning OWLv2 on a COCO-format dataset.

	Usage:
	uv run python scripts/finetune_owlv2.py
	uv run python scripts/finetune_owlv2.py --epochs 20 --lr 5e-5
	uv run python scripts/finetune_owlv2.py --unfreeze-vision --backbone-lr 1e-5
	uv run python scripts/finetune_owlv2.py --resume models/owlv2-finetuned/checkpoint-epoch-005

	Recommended hardware:
	CUDA (Windows/Linux) — use fp16 for speed, set --device cuda
	MPS (Apple Silicon) — fp32 only, slower but functional for small datasets
	CPU — very slow, only for tiny sanity-check runs

	Typical first run:
	1. make export # build data/labeled/coco_export.json
	2. make finetune # train with defaults
	3. Update app.py to load from models/owlv2-finetuned/best
	"""

	from __future__ import annotations

	import logging
	from pathlib import Path

	import click
	import torch
	from dotenv import load_dotenv

	load_dotenv()

	from autolabel.config import settings
	from autolabel.finetune import FinetuneConfig, run_finetune
	from autolabel.utils import setup_logging

	PROJECT_ROOT = Path(__file__).resolve().parent.parent
	DEFAULT_OUTPUT = PROJECT_ROOT / "models" / "owlv2-finetuned"


	@click.command()
	@click.option(
	"--coco-json",
	default=str(settings.labeled_dir / "coco_export.json"),
	show_default=True,
	type=click.Path(exists=True, path_type=Path),
	help="COCO JSON file produced by `make export`.",
	)
	@click.option(
	"--image-dir",
	default=str(settings.raw_dir),
	show_default=True,
	type=click.Path(exists=True, file_okay=False, path_type=Path),
	help="Directory containing the source images (matched by file_name in COCO JSON).",
	)
	@click.option(
	"--output-dir",
	default=str(DEFAULT_OUTPUT),
	show_default=True,
	type=click.Path(file_okay=False, path_type=Path),
	help="Directory to save checkpoints and the best model.",
	)
	@click.option("--model", default=settings.model, show_default=True,
	help="Base model to fine-tune.")
	@click.option("--epochs", default=10, show_default=True, type=int)
	@click.option("--batch-size", default=1, show_default=True, type=int,
	help="Images per forward pass. Keep at 1 for OWLv2-large on ≤8 GB VRAM.")
	@click.option("--grad-accum", default=4, show_default=True, type=int,
	help="Gradient accumulation steps. Effective batch = batch_size * grad_accum.")
	@click.option("--lr", default=1e-4, show_default=True, type=float,
	help="Learning rate for detection heads.")
	@click.option("--val-split", default=0.2, show_default=True, type=float,
	help="Fraction of data to use for validation.")
	@click.option("--warmup-steps", default=50, show_default=True, type=int)
	@click.option("--save-every", default=1, show_default=True, type=int,
	help="Save a checkpoint every N epochs.")
	@click.option(
	"--unfreeze-vision", is_flag=True, default=False,
	help="Also fine-tune the ViT image encoder (needs more VRAM, slower).",
	)
	@click.option(
	"--backbone-lr", default=1e-5, show_default=True, type=float,
	help="LR for the vision encoder when --unfreeze-vision is set.",
	)
	@click.option(
	"--resume",
	default=None,
	type=click.Path(path_type=Path),
	help="Path to a saved checkpoint to resume from.",
	)
	@click.option(
	"--device",
	default=settings.device,
	show_default=True,
	help="Torch device: cuda \| mps \| cpu.",
	)
	@click.option("--verbose", "-v", is_flag=True, default=False)
	def main(
	coco_json: Path,
	image_dir: Path,
	output_dir: Path,
	model: str,
	epochs: int,
	batch_size: int,
	grad_accum: int,
	lr: float,
	val_split: float,
	warmup_steps: int,
	save_every: int,
	unfreeze_vision: bool,
	backbone_lr: float,
	resume: Path \| None,
	device: str,
	verbose: bool,
	) -> None:
	"""Fine-tune OWLv2 on your labeled COCO dataset."""
	setup_logging(logging.DEBUG if verbose else logging.INFO)

	dtype = torch.float16 if device == "cuda" else torch.float32

	cfg = FinetuneConfig(
	coco_json=coco_json,
	image_dir=image_dir,
	output_dir=output_dir,
	model_name=model,
	device=device,
	torch_dtype=dtype,
	epochs=epochs,
	batch_size=batch_size,
	grad_accum_steps=grad_accum,
	lr=lr,
	backbone_lr=backbone_lr if unfreeze_vision else 0.0,
	val_split=val_split,
	warmup_steps=warmup_steps,
	save_every=save_every,
	unfreeze_vision=unfreeze_vision,
	resume_from=resume,
	)

	click.echo(f"Fine-tuning OWLv2 on {coco_json}")
	click.echo(f" device : {device} ({dtype})")
	click.echo(f" epochs : {epochs}")
	click.echo(f" effective bs : {batch_size * grad_accum}")
	click.echo(f" heads lr : {lr}")
	click.echo(f" unfreeze ViT : {unfreeze_vision}")
	click.echo(f" output : {output_dir}")
	click.echo()

	run_finetune(cfg)


	if __name__ == "__main__":
	main()