asdf98
/

BokehFlow

@@ -5,18 +5,15 @@
    "metadata": {},
    "source": [
     "# 🎬 BokehFlow Training Notebook\n",
-    "## Smart download: only f/2.0 pairs, parallel, with resume\n",
     "\n",
-    "**Downloads only what's needed:**\n",
-    "| Subset | Files | Size | Download Time |\n",
-    "|--------|-------|------|---------------|\n",
-    "| 200 scenes | 400 images | ~234 MB | ~2 min |\n",
-    "| 500 scenes | 1000 images | ~586 MB | ~4 min |\n",
-    "| All 3958 | 7918 images | ~4.5 GB | ~25 min |\n",
     "\n",
-    "Default: **500 scenes (~586MB)**. Cached — re-running skips downloaded files.\n",
-    "\n",
-    "**Just run all cells.**"
    ]
   },
   {
@@ -35,10 +32,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#@title Step 1: Download BokehFlow code\n",
     "from huggingface_hub import hf_hub_download\n",
     "hf_hub_download(repo_id='asdf98/BokehFlow', filename='bokehflow.py', local_dir='.')\n",
-    "print('✓ BokehFlow code ready')"
    ]
   },
   {
@@ -47,28 +44,28 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#@title Step 2: Config\n",
     "CONFIG = {\n",
     "    # Model\n",
     "    'variant': 'nano',        # 'nano'=583K, 'small'=3.1M, 'base'=12M\n",
     "    \n",
-    "    # Data\n",
-    "    'max_scenes': 500,        # 200=quick test(234MB), 500=good(586MB), None=all(4.5GB)\n",
-    "    'target_fstop': 2.0,\n",
     "    'crop_size': 256,\n",
-    "    'data_dir': '/tmp/realbokeh',  # /tmp = fast SSD on Colab/Kaggle\n",
     "    \n",
     "    # Training\n",
-    "    'batch_size': 4,          # 4 for T4, 8 for A100\n",
     "    'num_epochs': 10,\n",
     "    'lr': 3e-4,\n",
     "    'weight_decay': 0.05,\n",
     "    'max_grad_norm': 1.0,\n",
-    "    'num_workers': 2,         # 2 for Colab, 4 for Kaggle\n",
     "    'output_dir': './checkpoints',\n",
     "}\n",
     "\n",
-    "import torch, os\n",
     "NUM_GPUS = torch.cuda.device_count()\n",
     "DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
     "print(f'Device: {DEVICE}' + (f' ({torch.cuda.get_device_name(0)})' if torch.cuda.is_available() else ''))\n",
@@ -84,123 +81,81 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#@title Step 3: Smart download — only f/2.0 input+GT pairs, parallel, cached\n",
-    "import asyncio, aiohttp, json, time, random\n",
-    "from pathlib import Path\n",
-    "from concurrent.futures import ThreadPoolExecutor, as_completed\n",
-    "from tqdm.auto import tqdm\n",
     "import nest_asyncio; nest_asyncio.apply()\n",
     "\n",
     "HF_BASE = 'https://huggingface.co/datasets/timseizinger/RealBokeh_3MP/resolve/main'\n",
     "DATA = Path(CONFIG['data_dir'])\n",
     "\n",
-    "# --- Phase 1: Fetch metadata (3s, async) ---\n",
-    "print('Phase 1: Fetching metadata...')\n",
     "t0 = time.time()\n",
     "\n",
-    "async def _fetch_metas(concurrency=50):\n",
     "    sem = asyncio.Semaphore(concurrency)\n",
     "    conn = aiohttp.TCPConnector(limit=concurrency)\n",
     "    async def fetch(session, i):\n",
     "        async with sem:\n",
-    "            url = f'{HF_BASE}/train/metadata/{i}.json'\n",
     "            try:\n",
-    "                async with session.get(url) as r:\n",
     "                    if r.status == 200: return await r.json(content_type=None)\n",
     "            except: pass\n",
     "            return None\n",
     "    async with aiohttp.ClientSession(connector=conn) as s:\n",
     "        return await asyncio.gather(*[fetch(s, i) for i in range(1, 3961)])\n",
     "\n",
-    "metas = [m for m in asyncio.run(_fetch_metas()) if m]\n",
-    "print(f'  {len(metas)} scenes in {time.time()-t0:.1f}s')\n",
     "\n",
-    "# Build download list: only input + f/2.0 GT\n",
-    "pairs = []\n",
-    "for m in metas:\n",
-    "    gt_path = None\n",
     "    for tp, av in zip(m['target_images'], m['target_avs']):\n",
     "        if abs(av - CONFIG['target_fstop']) < 0.05:\n",
-    "            gt_path = tp; break\n",
-    "    if gt_path is None: continue\n",
-    "    pairs.append({\n",
-    "        'input_rel': m['source_image'],       # e.g. 'in/1_f22.JPG'\n",
-    "        'gt_rel':    gt_path,                  # e.g. 'gt/1/1_f2.0.JPG'\n",
-    "        'f_number':  CONFIG['target_fstop'],\n",
-    "        'focal_mm':  float(m.get('focal_length', 50)),\n",
-    "        'focus_m':   float(m.get('focus_plane_distance', 2.0)),\n",
-    "    })\n",
-    "random.shuffle(pairs)\n",
-    "if CONFIG['max_scenes']:\n",
-    "    pairs = pairs[:CONFIG['max_scenes']]\n",
-    "print(f'  {len(pairs)} pairs selected for download')\n",
     "\n",
-    "# --- Phase 2: Download images (parallel, with retry + skip cached) ---\n",
-    "print(f'\\nPhase 2: Downloading images to {DATA}...')\n",
-    "import requests\n",
-    "from requests.adapters import HTTPAdapter\n",
-    "from urllib3.util.retry import Retry\n",
-    "\n",
-    "def _make_session():\n",
-    "    \"\"\"Session with automatic retry on 429/500/503.\"\"\"\n",
-    "    s = requests.Session()\n",
-    "    retries = Retry(\n",
-    "        total=5,\n",
-    "        backoff_factor=1.0,       # 1s, 2s, 4s, 8s, 16s\n",
-    "        status_forcelist=[429, 500, 502, 503],\n",
-    "        allowed_methods=['GET'],\n",
-    "    )\n",
-    "    s.mount('https://', HTTPAdapter(max_retries=retries))\n",
-    "    # Add HF token if available (higher rate limits)\n",
-    "    hf_token = os.environ.get('HF_TOKEN', '')\n",
-    "    if hf_token:\n",
-    "        s.headers['Authorization'] = f'Bearer {hf_token}'\n",
-    "    return s\n",
-    "\n",
-    "def _download_file(rel_path, session):\n",
-    "    \"\"\"Download one file to DATA/train/{rel_path}. Skips if exists.\"\"\"\n",
-    "    local = DATA / 'train' / rel_path\n",
-    "    if local.exists() and local.stat().st_size > 1000:\n",
-    "        return 'cached'\n",
-    "    local.parent.mkdir(parents=True, exist_ok=True)\n",
-    "    url = f'{HF_BASE}/train/{rel_path}'\n",
-    "    r = session.get(url, timeout=60)\n",
-    "    r.raise_for_status()\n",
-    "    local.write_bytes(r.content)\n",
-    "    return 'downloaded'\n",
     "\n",
-    "# Collect all files to download\n",
-    "all_files = set()\n",
-    "for p in pairs:\n",
-    "    all_files.add(p['input_rel'])\n",
-    "    all_files.add(p['gt_rel'])\n",
     "\n",
-    "# Download with 8 threads (conservative to avoid 429)\n",
     "t0 = time.time()\n",
-    "downloaded, cached = 0, 0\n",
-    "pbar = tqdm(total=len(all_files), desc='Downloading')\n",
-    "\n",
-    "# Use thread-local sessions to avoid connection pool issues\n",
-    "import threading\n",
-    "_local = threading.local()\n",
-    "\n",
-    "def _dl(rel_path):\n",
-    "    if not hasattr(_local, 'session'):\n",
-    "        _local.session = _make_session()\n",
-    "    return _download_file(rel_path, _local.session)\n",
-    "\n",
-    "with ThreadPoolExecutor(max_workers=8) as ex:\n",
-    "    futures = {ex.submit(_dl, f): f for f in all_files}\n",
-    "    for fut in as_completed(futures):\n",
-    "        result = fut.result()\n",
-    "        if result == 'cached': cached += 1\n",
-    "        else: downloaded += 1\n",
-    "        pbar.update(1)\n",
-    "pbar.close()\n",
-    "\n",
-    "elapsed = time.time() - t0\n",
-    "print(f'\\n✓ Done in {elapsed:.0f}s: {downloaded} downloaded, {cached} cached')\n",
-    "print(f'  Disk usage: ~{sum(f.stat().st_size for f in DATA.rglob(\"*.JPG\"))/1e6:.0f} MB')"
    ]
   },
   {
@@ -209,34 +164,29 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#@title Step 4: Dataset (reads from disk — fast, no network)\n",
     "from torch.utils.data import Dataset, DataLoader\n",
     "from torchvision import transforms\n",
     "from PIL import Image\n",
     "\n",
     "class RealBokehDisk(Dataset):\n",
-    "    \"\"\"Reads pre-downloaded image pairs from disk. Zero network at training time.\"\"\"\n",
     "    def __init__(self, pairs, data_dir, crop_size=256):\n",
     "        self.pairs = pairs\n",
-    "        self.data_dir = Path(data_dir) / 'train'\n",
-    "        self.crop_size = crop_size\n",
     "        self.to_tensor = transforms.ToTensor()\n",
-    "        # Verify a sample\n",
-    "        p = pairs[0]\n",
-    "        assert (self.data_dir / p['input_rel']).exists(), f\"Missing: {p['input_rel']}\"\n",
-    "        assert (self.data_dir / p['gt_rel']).exists(), f\"Missing: {p['gt_rel']}\"\n",
-    "        print(f'  Dataset: {len(pairs)} pairs, reading from disk (fast)')\n",
     "\n",
-    "    def __len__(self):\n",
-    "        return len(self.pairs)\n",
     "\n",
     "    def __getitem__(self, idx):\n",
     "        p = self.pairs[idx]\n",
-    "        inp = Image.open(self.data_dir / p['input_rel']).convert('RGB')\n",
-    "        gt  = Image.open(self.data_dir / p['gt_rel']).convert('RGB')\n",
-    "\n",
-    "        # Synchronized random crop + flip\n",
-    "        cs = self.crop_size\n",
     "        w, h = inp.size\n",
     "        if w >= cs and h >= cs:\n",
     "            x, y = random.randint(0, w-cs), random.randint(0, h-cs)\n",
@@ -248,7 +198,6 @@
     "        if random.random() > 0.5:\n",
     "            inp = inp.transpose(Image.FLIP_LEFT_RIGHT)\n",
     "            gt  = gt.transpose(Image.FLIP_LEFT_RIGHT)\n",
-    "\n",
     "        return {\n",
     "            'input':  self.to_tensor(inp),\n",
     "            'target': self.to_tensor(gt),\n",
@@ -257,21 +206,17 @@
     "            'focus_distance_m': torch.tensor(p['focus_m'],  dtype=torch.float32),\n",
     "        }\n",
     "\n",
-    "train_ds = RealBokehDisk(pairs, CONFIG['data_dir'], CONFIG['crop_size'])\n",
     "train_loader = DataLoader(\n",
-    "    train_ds,\n",
-    "    batch_size=CONFIG['batch_size'],\n",
-    "    shuffle=True,\n",
-    "    num_workers=CONFIG['num_workers'],\n",
-    "    pin_memory=True,\n",
-    "    drop_last=True,\n",
-    "    persistent_workers=True,\n",
     ")\n",
-    "print(f'✓ DataLoader: {len(train_loader)} batches/epoch')\n",
     "\n",
-    "# Quick sanity check\n",
-    "batch = next(iter(train_loader))\n",
-    "print(f'  Batch shapes: input={batch[\"input\"].shape}, target={batch[\"target\"].shape}')"
    ]
   },
   {
@@ -288,9 +233,7 @@
     "if NUM_GPUS > 1:\n",
     "    model = torch.nn.DataParallel(model)\n",
     "model = model.to(DEVICE)\n",
-    "\n",
-    "n_params = sum(p.numel() for p in model.parameters())\n",
-    "print(f'✓ BokehFlow-{CONFIG[\"variant\"].capitalize()}: {n_params:,} params on {DEVICE}')"
    ]
   },
   {
@@ -299,7 +242,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#@title Step 6: Train\n",
     "optimizer = torch.optim.AdamW(model.parameters(), lr=CONFIG['lr'], weight_decay=CONFIG['weight_decay'])\n",
     "scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=CONFIG['num_epochs']*len(train_loader))\n",
     "criterion = BokehFlowLoss(lambda_depth=0.5)\n",
@@ -311,7 +256,7 @@
     "    model.train()\n",
     "    total_loss = 0.0\n",
     "    t0 = time.time()\n",
-    "    pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{CONFIG[\"num_epochs\"]}')\n",
     "\n",
     "    for batch in pbar:\n",
     "        inp   = batch['input'].to(DEVICE)\n",
@@ -321,8 +266,7 @@
     "        focus = batch['focus_distance_m'].to(DEVICE)\n",
     "\n",
     "        out = model(inp, f_num, focal, focus)\n",
-    "        losses = criterion(out, {'bokeh_gt': tgt})\n",
-    "        loss = losses['total']\n",
     "\n",
     "        optimizer.zero_grad()\n",
     "        loss.backward()\n",
@@ -335,14 +279,12 @@
     "\n",
     "    avg = total_loss / len(train_loader)\n",
     "    dt = time.time() - t0\n",
-    "    print(f'  avg_loss={avg:.4f}  time={dt:.0f}s  ({dt/len(train_loader):.2f}s/batch)')\n",
     "\n",
     "    state = model.module.state_dict() if hasattr(model, 'module') else model.state_dict()\n",
-    "    ckpt = f'{CONFIG[\"output_dir\"]}/bokehflow_{CONFIG[\"variant\"]}_ep{epoch+1}.pt'\n",
-    "    torch.save({'epoch': epoch+1, 'model': state, 'loss': avg}, ckpt)\n",
-    "    print(f'  ✓ {ckpt}')\n",
     "\n",
-    "print('\\n✓ Training complete!')"
    ]
   },
   {
@@ -353,24 +295,17 @@
    "source": [
     "#@title Step 7: Visualize\n",
     "import matplotlib.pyplot as plt\n",
-    "\n",
     "model.eval()\n",
     "s = train_ds[0]\n",
     "with torch.no_grad():\n",
-    "    out = model(\n",
-    "        s['input'].unsqueeze(0).to(DEVICE),\n",
-    "        s['f_number'].unsqueeze(0).to(DEVICE),\n",
-    "        s['focal_length_mm'].unsqueeze(0).to(DEVICE),\n",
-    "        s['focus_distance_m'].unsqueeze(0).to(DEVICE),\n",
-    "    )\n",
-    "\n",
-    "fig, ax = plt.subplots(1, 3, figsize=(15, 5))\n",
-    "ax[0].imshow(s['input'].permute(1,2,0).cpu()); ax[0].set_title('Input (f/22)')\n",
-    "ax[1].imshow(out['bokeh'][0].permute(1,2,0).cpu().clamp(0,1)); ax[1].set_title('BokehFlow')\n",
-    "ax[2].imshow(s['target'].permute(1,2,0).cpu()); ax[2].set_title('GT (f/2.0)')\n",
     "for a in ax: a.axis('off')\n",
-    "plt.tight_layout(); plt.savefig('result.png', dpi=100); plt.show()\n",
-    "print('✓ Done!')"
    ]
   }
  ],

    "metadata": {},
    "source": [
     "# 🎬 BokehFlow Training Notebook\n",
+    "## ~90s download → train from disk. No 429 errors.\n",
     "\n",
+    "| Subset | Download | Disk | Train time/epoch (T4) |\n",
+    "|--------|----------|------|-----------------------|\n",
+    "| 200 scenes | ~30s | ~320 MB | ~3 min |\n",
+    "| **500 scenes** | **~80s** | **~800 MB** | **~7 min** |\n",
+    "| All 3958 | ~10 min | ~4.5 GB | ~45 min |\n",
     "\n",
+    "**Just run all cells. Default = 500 scenes.**"
    ]
   },
   {
    "metadata": {},
    "outputs": [],
    "source": [
+    "#@title Step 1: Download BokehFlow model code\n",
     "from huggingface_hub import hf_hub_download\n",
     "hf_hub_download(repo_id='asdf98/BokehFlow', filename='bokehflow.py', local_dir='.')\n",
+    "print('✓ BokehFlow ready')"
    ]
   },
   {
    "metadata": {},
    "outputs": [],
    "source": [
+    "#@title Step 2: Config — change max_scenes to control download size\n",
     "CONFIG = {\n",
     "    # Model\n",
     "    'variant': 'nano',        # 'nano'=583K, 'small'=3.1M, 'base'=12M\n",
     "    \n",
+    "    # Data — controls download size\n",
+    "    'max_scenes': 500,        # 200=~30s download, 500=~80s, None=all ~10min\n",
+    "    'target_fstop': 2.0,      # Which bokeh level to train on\n",
     "    'crop_size': 256,\n",
+    "    'data_dir': '/tmp/realbokeh',\n",
     "    \n",
     "    # Training\n",
+    "    'batch_size': 4,          # 4 for T4 16GB, 8 for A100\n",
     "    'num_epochs': 10,\n",
     "    'lr': 3e-4,\n",
     "    'weight_decay': 0.05,\n",
     "    'max_grad_norm': 1.0,\n",
+    "    'num_workers': 2,\n",
     "    'output_dir': './checkpoints',\n",
     "}\n",
     "\n",
+    "import torch, os, time, random, json\n",
     "NUM_GPUS = torch.cuda.device_count()\n",
     "DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
     "print(f'Device: {DEVICE}' + (f' ({torch.cuda.get_device_name(0)})' if torch.cuda.is_available() else ''))\n",
    "metadata": {},
    "outputs": [],
    "source": [
+    "#@title Step 3: Download data — ~80s for 500 scenes, cached on re-run\n",
+    "import asyncio, aiohttp\n",
     "import nest_asyncio; nest_asyncio.apply()\n",
+    "from pathlib import Path\n",
+    "from huggingface_hub import snapshot_download\n",
     "\n",
     "HF_BASE = 'https://huggingface.co/datasets/timseizinger/RealBokeh_3MP/resolve/main'\n",
     "DATA = Path(CONFIG['data_dir'])\n",
     "\n",
+    "# ---- Phase 1: Fetch metadata async (3-5s) ----\n",
+    "print('Phase 1/2: Fetching metadata...')\n",
     "t0 = time.time()\n",
     "\n",
+    "async def _fetch_metas(concurrency=30):\n",
     "    sem = asyncio.Semaphore(concurrency)\n",
     "    conn = aiohttp.TCPConnector(limit=concurrency)\n",
     "    async def fetch(session, i):\n",
     "        async with sem:\n",
     "            try:\n",
+    "                async with session.get(f'{HF_BASE}/train/metadata/{i}.json') as r:\n",
     "                    if r.status == 200: return await r.json(content_type=None)\n",
     "            except: pass\n",
     "            return None\n",
     "    async with aiohttp.ClientSession(connector=conn) as s:\n",
     "        return await asyncio.gather(*[fetch(s, i) for i in range(1, 3961)])\n",
     "\n",
+    "all_metas = [m for m in asyncio.run(_fetch_metas()) if m]\n",
+    "print(f'  {len(all_metas)} scenes indexed in {time.time()-t0:.1f}s')\n",
     "\n",
+    "# ---- Build pairs + download patterns ----\n",
+    "scene_pairs = []  # (meta, gt_rel_path)\n",
+    "for m in all_metas:\n",
     "    for tp, av in zip(m['target_images'], m['target_avs']):\n",
     "        if abs(av - CONFIG['target_fstop']) < 0.05:\n",
+    "            scene_pairs.append((m, tp))\n",
+    "            break\n",
     "\n",
+    "random.shuffle(scene_pairs)\n",
+    "if CONFIG['max_scenes']:\n",
+    "    scene_pairs = scene_pairs[:CONFIG['max_scenes']]\n",
+    "\n",
+    "# Build exact file list for snapshot_download\n",
+    "allow_patterns = []\n",
+    "training_pairs = []\n",
+    "for m, gt_rel in scene_pairs:\n",
+    "    inp_rel = m['source_image']  # e.g. 'in/1_f22.JPG'\n",
+    "    allow_patterns.append(f'train/{inp_rel}')\n",
+    "    allow_patterns.append(f'train/{gt_rel}')\n",
+    "    training_pairs.append({\n",
+    "        'input_rel': inp_rel,\n",
+    "        'gt_rel': gt_rel,\n",
+    "        'f_number': CONFIG['target_fstop'],\n",
+    "        'focal_mm': float(m.get('focal_length', 50)),\n",
+    "        'focus_m': float(m.get('focus_plane_distance', 2.0)),\n",
+    "    })\n",
     "\n",
+    "print(f'  {len(training_pairs)} pairs → {len(allow_patterns)} files to download')\n",
     "\n",
+    "# ---- Phase 2: Download via snapshot_download (uses HF optimized transfer, no 429) ----\n",
+    "print(f'\\nPhase 2/2: Downloading images (skip if cached)...')\n",
     "t0 = time.time()\n",
+    "snapshot_download(\n",
+    "    'timseizinger/RealBokeh_3MP',\n",
+    "    repo_type='dataset',\n",
+    "    local_dir=str(DATA),\n",
+    "    allow_patterns=allow_patterns,\n",
+    ")\n",
+    "dt = time.time() - t0\n",
+    "\n",
+    "# Verify\n",
+    "n_files = sum(1 for f in (DATA/'train').rglob('*.JPG'))\n",
+    "total_mb = sum(f.stat().st_size for f in (DATA/'train').rglob('*.JPG')) / 1e6\n",
+    "print(f'\\n✓ {n_files} files ({total_mb:.0f} MB) ready in {dt:.0f}s')\n",
+    "if dt < 2:\n",
+    "    print('  (cached from previous run)')"
    ]
   },
   {
    "metadata": {},
    "outputs": [],
    "source": [
+    "#@title Step 4: Create DataLoader (reads from disk — fast)\n",
     "from torch.utils.data import Dataset, DataLoader\n",
     "from torchvision import transforms\n",
     "from PIL import Image\n",
     "\n",
     "class RealBokehDisk(Dataset):\n",
     "    def __init__(self, pairs, data_dir, crop_size=256):\n",
     "        self.pairs = pairs\n",
+    "        self.root = Path(data_dir) / 'train'\n",
+    "        self.cs = crop_size\n",
     "        self.to_tensor = transforms.ToTensor()\n",
+    "        # Verify\n",
+    "        ok = sum(1 for p in pairs if (self.root/p['input_rel']).exists() and (self.root/p['gt_rel']).exists())\n",
+    "        print(f'  Dataset: {ok}/{len(pairs)} pairs verified on disk')\n",
+    "        self.pairs = [p for p in pairs if (self.root/p['input_rel']).exists() and (self.root/p['gt_rel']).exists()]\n",
     "\n",
+    "    def __len__(self): return len(self.pairs)\n",
     "\n",
     "    def __getitem__(self, idx):\n",
     "        p = self.pairs[idx]\n",
+    "        inp = Image.open(self.root / p['input_rel']).convert('RGB')\n",
+    "        gt  = Image.open(self.root / p['gt_rel']).convert('RGB')\n",
+    "        cs = self.cs\n",
     "        w, h = inp.size\n",
     "        if w >= cs and h >= cs:\n",
     "            x, y = random.randint(0, w-cs), random.randint(0, h-cs)\n",
     "        if random.random() > 0.5:\n",
     "            inp = inp.transpose(Image.FLIP_LEFT_RIGHT)\n",
     "            gt  = gt.transpose(Image.FLIP_LEFT_RIGHT)\n",
     "        return {\n",
     "            'input':  self.to_tensor(inp),\n",
     "            'target': self.to_tensor(gt),\n",
     "            'focus_distance_m': torch.tensor(p['focus_m'],  dtype=torch.float32),\n",
     "        }\n",
     "\n",
+    "train_ds = RealBokehDisk(training_pairs, CONFIG['data_dir'], CONFIG['crop_size'])\n",
     "train_loader = DataLoader(\n",
+    "    train_ds, batch_size=CONFIG['batch_size'], shuffle=True,\n",
+    "    num_workers=CONFIG['num_workers'], pin_memory=True,\n",
+    "    drop_last=True, persistent_workers=True,\n",
     ")\n",
+    "print(f'✓ {len(train_loader)} batches/epoch')\n",
     "\n",
+    "# Sanity check\n",
+    "b = next(iter(train_loader))\n",
+    "print(f'  input={b[\"input\"].shape} target={b[\"target\"].shape}')"
    ]
   },
   {
     "if NUM_GPUS > 1:\n",
     "    model = torch.nn.DataParallel(model)\n",
     "model = model.to(DEVICE)\n",
+    "print(f'✓ BokehFlow-{CONFIG[\"variant\"].capitalize()}: {sum(p.numel() for p in model.parameters()):,} params')"
    ]
   },
   {
    "metadata": {},
    "outputs": [],
    "source": [
+    "#@title Step 6: Train!\n",
+    "from tqdm.auto import tqdm\n",
+    "\n",
     "optimizer = torch.optim.AdamW(model.parameters(), lr=CONFIG['lr'], weight_decay=CONFIG['weight_decay'])\n",
     "scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=CONFIG['num_epochs']*len(train_loader))\n",
     "criterion = BokehFlowLoss(lambda_depth=0.5)\n",
     "    model.train()\n",
     "    total_loss = 0.0\n",
     "    t0 = time.time()\n",
+    "    pbar = tqdm(train_loader, desc=f'Ep {epoch+1}/{CONFIG[\"num_epochs\"]}')\n",
     "\n",
     "    for batch in pbar:\n",
     "        inp   = batch['input'].to(DEVICE)\n",
     "        focus = batch['focus_distance_m'].to(DEVICE)\n",
     "\n",
     "        out = model(inp, f_num, focal, focus)\n",
+    "        loss = criterion(out, {'bokeh_gt': tgt})['total']\n",
     "\n",
     "        optimizer.zero_grad()\n",
     "        loss.backward()\n",
     "\n",
     "    avg = total_loss / len(train_loader)\n",
     "    dt = time.time() - t0\n",
+    "    print(f'  loss={avg:.4f}  time={dt:.0f}s')\n",
     "\n",
     "    state = model.module.state_dict() if hasattr(model, 'module') else model.state_dict()\n",
+    "    torch.save({'epoch': epoch+1, 'model': state, 'loss': avg}, f'{CONFIG[\"output_dir\"]}/ep{epoch+1}.pt')\n",
     "\n",
+    "print('\\n✓ Done!')"
    ]
   },
   {
    "source": [
     "#@title Step 7: Visualize\n",
     "import matplotlib.pyplot as plt\n",
     "model.eval()\n",
     "s = train_ds[0]\n",
     "with torch.no_grad():\n",
+    "    o = model(s['input'].unsqueeze(0).to(DEVICE), s['f_number'].unsqueeze(0).to(DEVICE),\n",
+    "              s['focal_length_mm'].unsqueeze(0).to(DEVICE), s['focus_distance_m'].unsqueeze(0).to(DEVICE))\n",
+    "fig,ax = plt.subplots(1,3,figsize=(15,5))\n",
+    "ax[0].imshow(s['input'].permute(1,2,0).cpu()); ax[0].set_title('Input f/22')\n",
+    "ax[1].imshow(o['bokeh'][0].permute(1,2,0).cpu().clamp(0,1)); ax[1].set_title('BokehFlow')\n",
+    "ax[2].imshow(s['target'].permute(1,2,0).cpu()); ax[2].set_title('GT f/2.0')\n",
     "for a in ax: a.axis('off')\n",
+    "plt.tight_layout(); plt.savefig('result.png'); plt.show()"
    ]
   }
  ],