asdf98
/

BokehFlow

@@ -5,15 +5,17 @@
    "metadata": {},
    "source": [
     "# 🎬 BokehFlow Training Notebook\n",
-    "## Train on Free Colab T4 or Kaggle Dual-GPU\n",
     "\n",
-    "**Just run all cells.** Default config trains BokehFlow-Nano on RealBokeh dataset.\n",
     "\n",
-    "| Platform | GPU | VRAM | Expected Time (1 epoch) |\n",
-    "|----------|-----|------|------------------------|\n",
-    "| Colab Free | T4 | 16GB | ~45 min |\n",
-    "| Kaggle | 2×T4 | 2×16GB | ~25 min |\n",
-    "| Colab Pro | A100 | 40GB | ~10 min |"
    ]
   },
   {
@@ -22,10 +24,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# ============================================================\n",
-    "# STEP 0: Install dependencies\n",
-    "# ============================================================\n",
-    "!pip install -q torch torchvision Pillow huggingface_hub tqdm"
    ]
   },
   {
@@ -34,9 +34,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# ============================================================\n",
-    "# STEP 1: Download BokehFlow architecture\n",
-    "# ============================================================\n",
     "from huggingface_hub import hf_hub_download\n",
     "hf_hub_download(repo_id='asdf98/BokehFlow', filename='bokehflow.py', local_dir='.')\n",
     "print('✓ BokehFlow downloaded')"
@@ -48,74 +46,28 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# ============================================================\n",
-    "# STEP 2: Configuration — CHANGE THESE IF YOU WANT\n",
-    "# ============================================================\n",
     "CONFIG = {\n",
-    "    # Model\n",
-    "    'variant': 'nano',        # 'nano'=583K params, 'small'=3.1M, 'base'=12M\n",
-    "    \n",
-    "    # Training\n",
-    "    'batch_size': 4,          # 4 for T4 16GB, 8 for A100\n",
-    "    'crop_size': 256,         # 256x256 random crops\n",
-    "    'num_epochs': 5,          # 5 epochs for demo, 50+ for full training\n",
     "    'lr': 3e-4,\n",
     "    'weight_decay': 0.05,\n",
     "    'max_grad_norm': 1.0,\n",
-    "    \n",
-    "    # Data\n",
-    "    'num_workers': 2,         # 2 for Colab, 4 for Kaggle\n",
-    "    'max_train_samples': 500, # Limit for quick test. Set None for full dataset.\n",
-    "    \n",
-    "    # Target f-stop (train on f/2.0 bokeh)\n",
-    "    'target_fstop': 2.0,\n",
-    "    \n",
-    "    # Save\n",
-    "    'save_every': 1,          # Save checkpoint every N epochs\n",
     "    'output_dir': './checkpoints',\n",
     "}\n",
     "\n",
-    "# Auto-detect Kaggle dual GPU\n",
     "import torch\n",
     "NUM_GPUS = torch.cuda.device_count()\n",
-    "print(f'GPUs: {NUM_GPUS}, Device: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else \"CPU\"}')\n",
     "if NUM_GPUS > 1:\n",
-    "    print(f'Kaggle dual-GPU detected! Will use DataParallel.')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# ============================================================\n",
-    "# STEP 3: Dataset — Download RealBokeh (raw images, ~19GB)\n",
-    "# For free Colab/Kaggle, we use the HF Hub API to stream\n",
-    "# ============================================================\n",
-    "import os, json, re, glob\n",
-    "from pathlib import Path\n",
-    "from huggingface_hub import snapshot_download\n",
-    "\n",
-    "# Only download the train split input images + f/2.0 GT + metadata\n",
-    "# This saves bandwidth vs full 19GB\n",
-    "DATA_DIR = './realbokeh'\n",
-    "\n",
-    "if not os.path.exists(f'{DATA_DIR}/train/in'):\n",
-    "    print('Downloading RealBokeh train split (input + metadata)...')\n",
-    "    print('This downloads ~5GB. On Colab it takes ~3-5 minutes.')\n",
-    "    snapshot_download(\n",
-    "        repo_id='timseizinger/RealBokeh_3MP',\n",
-    "        repo_type='dataset',\n",
-    "        local_dir=DATA_DIR,\n",
-    "        allow_patterns=['train/in/*', 'train/metadata/*', 'train/gt/*/f2.0*',\n",
-    "                        'train/gt/*/*_f2.0*',\n",
-    "                        'validation/in/*', 'validation/metadata/*', \n",
-    "                        'validation/gt/*/*_f2.0*'],\n",
-    "    )\n",
-    "    print('✓ Dataset downloaded')\n",
-    "else:\n",
-    "    print('✓ Dataset already exists')"
    ]
   },
   {
@@ -124,128 +76,135 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# ============================================================\n",
-    "# STEP 4: PyTorch Dataset class for RealBokeh\n",
-    "# ============================================================\n",
-    "import torch\n",
     "from torch.utils.data import Dataset, DataLoader\n",
     "from torchvision import transforms\n",
-    "from PIL import Image\n",
-    "import random\n",
     "\n",
-    "class RealBokehDataset(Dataset):\n",
-    "    \"\"\"RealBokeh dataset for BokehFlow training.\n",
-    "    \n",
-    "    Each sample returns:\n",
-    "        input_img: (3, crop_size, crop_size) sharp f/22 image\n",
-    "        target_img: (3, crop_size, crop_size) bokeh GT at target f-stop\n",
-    "        f_number: scalar f-stop value\n",
-    "        focal_length_mm: scalar focal length\n",
-    "        focus_distance_m: scalar focus distance in meters\n",
-    "    \"\"\"\n",
-    "    \n",
-    "    def __init__(self, data_dir, split='train', crop_size=256, \n",
-    "                 target_fstop=2.0, max_samples=None):\n",
-    "        self.data_dir = Path(data_dir) / split\n",
-    "        self.crop_size = crop_size\n",
-    "        self.target_fstop = target_fstop\n",
-    "        \n",
-    "        # Load metadata\n",
-    "        self.samples = []\n",
-    "        meta_dir = self.data_dir / 'metadata'\n",
-    "        if not meta_dir.exists():\n",
-    "            raise FileNotFoundError(f'No metadata at {meta_dir}')\n",
-    "        \n",
-    "        for meta_file in sorted(meta_dir.glob('*.json')):\n",
-    "            with open(meta_file) as f:\n",
-    "                meta = json.load(f)\n",
-    "            \n",
-    "            # Find target f-stop image\n",
-    "            fstop_str = f'f{target_fstop}'\n",
-    "            gt_path = None\n",
-    "            for img, av in zip(meta['target_images'], meta['target_avs']):\n",
-    "                if abs(av - target_fstop) < 0.01:\n",
-    "                    gt_path = self.data_dir / img\n",
-    "                    break\n",
-    "            \n",
-    "            if gt_path is None or not gt_path.exists():\n",
-    "                continue\n",
-    "            \n",
-    "            input_path = self.data_dir / meta['source_image']\n",
-    "            if not input_path.exists():\n",
     "                continue\n",
-    "            \n",
-    "            self.samples.append({\n",
-    "                'input': str(input_path),\n",
-    "                'target': str(gt_path),\n",
-    "                'f_number': target_fstop,\n",
-    "                'focal_length_mm': float(meta['focal_length']),\n",
-    "                'focus_distance_m': float(meta['focus_plane_distance']),\n",
     "            })\n",
-    "        \n",
     "        if max_samples:\n",
-    "            self.samples = self.samples[:max_samples]\n",
-    "        \n",
-    "        print(f'{split}: {len(self.samples)} paired samples found')\n",
-    "        \n",
     "        self.to_tensor = transforms.ToTensor()\n",
-    "    \n",
     "    def __len__(self):\n",
-    "        return len(self.samples)\n",
-    "    \n",
     "    def __getitem__(self, idx):\n",
-    "        s = self.samples[idx]\n",
-    "        \n",
-    "        # Load images\n",
-    "        inp = Image.open(s['input']).convert('RGB')\n",
-    "        tgt = Image.open(s['target']).convert('RGB')\n",
-    "        \n",
-    "        # Random crop (same crop for both)\n",
-    "        w, h = inp.size\n",
     "        cs = self.crop_size\n",
     "        if w >= cs and h >= cs:\n",
-    "            x = random.randint(0, w - cs)\n",
-    "            y = random.randint(0, h - cs)\n",
     "            inp = inp.crop((x, y, x+cs, y+cs))\n",
-    "            tgt = tgt.crop((x, y, x+cs, y+cs))\n",
     "        else:\n",
     "            inp = inp.resize((cs, cs), Image.LANCZOS)\n",
-    "            tgt = tgt.resize((cs, cs), Image.LANCZOS)\n",
-    "        \n",
-    "        # Random horizontal flip\n",
     "        if random.random() > 0.5:\n",
     "            inp = inp.transpose(Image.FLIP_LEFT_RIGHT)\n",
-    "            tgt = tgt.transpose(Image.FLIP_LEFT_RIGHT)\n",
-    "        \n",
-    "        inp_t = self.to_tensor(inp)  # [0,1] range\n",
-    "        tgt_t = self.to_tensor(tgt)\n",
-    "        \n",
     "        return {\n",
-    "            'input': inp_t,\n",
-    "            'target': tgt_t,\n",
-    "            'f_number': torch.tensor(s['f_number'], dtype=torch.float32),\n",
-    "            'focal_length_mm': torch.tensor(s['focal_length_mm'], dtype=torch.float32),\n",
-    "            'focus_distance_m': torch.tensor(s['focus_distance_m'], dtype=torch.float32),\n",
     "        }\n",
     "\n",
-    "# Create datasets\n",
-    "train_ds = RealBokehDataset(\n",
-    "    DATA_DIR, split='train', \n",
     "    crop_size=CONFIG['crop_size'],\n",
     "    target_fstop=CONFIG['target_fstop'],\n",
-    "    max_samples=CONFIG['max_train_samples'],\n",
     ")\n",
     "\n",
     "train_loader = DataLoader(\n",
-    "    train_ds, \n",
     "    batch_size=CONFIG['batch_size'],\n",
     "    shuffle=True,\n",
     "    num_workers=CONFIG['num_workers'],\n",
-    "    pin_memory=True,\n",
     "    drop_last=True,\n",
     ")\n",
-    "\n",
-    "print(f'\\n✓ DataLoader ready: {len(train_loader)} batches per epoch')"
    ]
   },
   {
@@ -254,24 +213,38 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# ============================================================\n",
-    "# STEP 5: Create model\n",
-    "# ============================================================\n",
     "from bokehflow import BokehFlow, BokehFlowConfig, BokehFlowLoss, model_summary\n",
     "\n",
     "config = BokehFlowConfig(variant=CONFIG['variant'])\n",
     "model = BokehFlow(config)\n",
     "\n",
-    "# Multi-GPU support for Kaggle\n",
-    "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
     "if NUM_GPUS > 1:\n",
     "    model = torch.nn.DataParallel(model)\n",
-    "    print(f'Using DataParallel on {NUM_GPUS} GPUs')\n",
-    "model = model.to(device)\n",
     "\n",
-    "# Print summary\n",
-    "print(model_summary(config))\n",
-    "print(f'Device: {device}')"
    ]
   },
   {
@@ -280,85 +253,53 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# ============================================================\n",
-    "# STEP 6: Training loop\n",
-    "# ============================================================\n",
-    "import torch.nn.functional as F\n",
     "from tqdm.auto import tqdm\n",
-    "import time\n",
-    "\n",
-    "optimizer = torch.optim.AdamW(\n",
-    "    model.parameters(), \n",
-    "    lr=CONFIG['lr'], \n",
-    "    weight_decay=CONFIG['weight_decay']\n",
-    ")\n",
-    "\n",
-    "scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(\n",
-    "    optimizer, T_max=CONFIG['num_epochs'] * len(train_loader)\n",
-    ")\n",
     "\n",
     "criterion = BokehFlowLoss(lambda_depth=0.5)\n",
-    "\n",
     "os.makedirs(CONFIG['output_dir'], exist_ok=True)\n",
     "\n",
-    "# Training\n",
-    "print(f'\\n{\"=\"*60}')\n",
-    "print(f'Starting training: {CONFIG[\"num_epochs\"]} epochs')\n",
-    "print(f'{\"=\"*60}\\n')\n",
     "\n",
     "for epoch in range(CONFIG['num_epochs']):\n",
     "    model.train()\n",
-    "    epoch_loss = 0.0\n",
-    "    epoch_start = time.time()\n",
-    "    \n",
     "    pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{CONFIG[\"num_epochs\"]}')\n",
     "    for step, batch in enumerate(pbar):\n",
-    "        # Move to device\n",
-    "        inp = batch['input'].to(device)\n",
-    "        tgt = batch['target'].to(device)\n",
-    "        f_num = batch['f_number'].to(device)\n",
-    "        focal = batch['focal_length_mm'].to(device)\n",
-    "        focus = batch['focus_distance_m'].to(device)\n",
-    "        \n",
-    "        # Forward\n",
-    "        output = model(inp, f_num, focal, focus)\n",
-    "        \n",
-    "        # Loss\n",
-    "        losses = criterion(\n",
-    "            output if not isinstance(output, dict) else output,\n",
-    "            {'bokeh_gt': tgt}\n",
-    "        )\n",
     "        loss = losses['total']\n",
-    "        \n",
-    "        # Backward\n",
     "        optimizer.zero_grad()\n",
     "        loss.backward()\n",
     "        torch.nn.utils.clip_grad_norm_(model.parameters(), CONFIG['max_grad_norm'])\n",
     "        optimizer.step()\n",
     "        scheduler.step()\n",
-    "        \n",
-    "        epoch_loss += loss.item()\n",
-    "        pbar.set_postfix({\n",
-    "            'loss': f'{loss.item():.4f}',\n",
-    "            'lr': f'{scheduler.get_last_lr()[0]:.2e}',\n",
-    "        })\n",
-    "    \n",
-    "    avg_loss = epoch_loss / len(train_loader)\n",
-    "    elapsed = time.time() - epoch_start\n",
-    "    print(f'Epoch {epoch+1}: avg_loss={avg_loss:.4f}, time={elapsed:.0f}s')\n",
-    "    \n",
     "    # Save checkpoint\n",
-    "    if (epoch + 1) % CONFIG['save_every'] == 0:\n",
-    "        ckpt_path = f'{CONFIG[\"output_dir\"]}/bokehflow_{CONFIG[\"variant\"]}_epoch{epoch+1}.pt'\n",
-    "        state = model.module.state_dict() if hasattr(model, 'module') else model.state_dict()\n",
-    "        torch.save({\n",
-    "            'epoch': epoch + 1,\n",
-    "            'model_state_dict': state,\n",
-    "            'optimizer_state_dict': optimizer.state_dict(),\n",
-    "            'loss': avg_loss,\n",
-    "            'config': CONFIG,\n",
-    "        }, ckpt_path)\n",
-    "        print(f'  ✓ Saved checkpoint: {ckpt_path}')\n",
     "\n",
     "print(f'\\n✓ Training complete!')"
    ]
@@ -369,34 +310,48 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# ============================================================\n",
-    "# STEP 7: Quick inference test\n",
-    "# ============================================================\n",
     "import matplotlib.pyplot as plt\n",
     "\n",
     "model.eval()\n",
     "with torch.no_grad():\n",
-    "    sample = train_ds[0]\n",
-    "    inp = sample['input'].unsqueeze(0).to(device)\n",
     "    out = model(\n",
-    "        inp,\n",
-    "        sample['f_number'].unsqueeze(0).to(device),\n",
-    "        sample['focal_length_mm'].unsqueeze(0).to(device),\n",
-    "        sample['focus_distance_m'].unsqueeze(0).to(device),\n",
     "    )\n",
     "\n",
     "fig, axes = plt.subplots(1, 3, figsize=(15, 5))\n",
-    "axes[0].imshow(sample['input'].permute(1,2,0).numpy())\n",
-    "axes[0].set_title('Input (f/22)')\n",
-    "axes[1].imshow(out['bokeh'][0].cpu().permute(1,2,0).clamp(0,1).numpy())\n",
-    "axes[1].set_title('BokehFlow Output')\n",
-    "axes[2].imshow(sample['target'].permute(1,2,0).numpy())\n",
-    "axes[2].set_title('Ground Truth (f/2.0)')\n",
     "for ax in axes: ax.axis('off')\n",
     "plt.tight_layout()\n",
-    "plt.savefig('result.png', dpi=100)\n",
     "plt.show()\n",
-    "print('✓ Inference test complete')"
    ]
   }
  ],
@@ -409,7 +364,8 @@
   "language_info": {
    "name": "python",
    "version": "3.10.0"
-  }
  },
  "nbformat": 4,
  "nbformat_minor": 4

    "metadata": {},
    "source": [
     "# 🎬 BokehFlow Training Notebook\n",
+    "## Zero-download streaming — starts training in ~5 seconds\n",
     "\n",
+    "**How it works:** Metadata (3960 tiny JSONs) fetched async in 3s. Images streamed on-demand via HTTP during training. **Zero disk usage, zero wait.**\n",
     "\n",
+    "| Platform | GPU | Batch/s | Notes |\n",
+    "|----------|-----|---------|-------|\n",
+    "| Colab Free | T4 16GB | ~2-3s | 4 workers, prefetch hides latency |\n",
+    "| Kaggle | 2×T4 | ~1.5s | DataParallel + 8 workers |\n",
+    "| Colab Pro | A100 | ~1s | 8 workers |\n",
+    "\n",
+    "**Just run all cells. No config changes needed.**"
    ]
   },
   {
    "metadata": {},
    "outputs": [],
    "source": [
+    "#@title Step 0: Install (15s)\n",
+    "!pip install -q torch torchvision Pillow huggingface_hub tqdm aiohttp"
    ]
   },
   {
    "metadata": {},
    "outputs": [],
    "source": [
+    "#@title Step 1: Download BokehFlow model code (2s)\n",
     "from huggingface_hub import hf_hub_download\n",
     "hf_hub_download(repo_id='asdf98/BokehFlow', filename='bokehflow.py', local_dir='.')\n",
     "print('✓ BokehFlow downloaded')"
    "metadata": {},
    "outputs": [],
    "source": [
+    "#@title Step 2: Config\n",
     "CONFIG = {\n",
+    "    'variant': 'nano',        # 'nano'=583K, 'small'=3.1M, 'base'=12M\n",
+    "    'batch_size': 4,          # 4 for T4, 8 for A100\n",
+    "    'crop_size': 256,         # Training crop size\n",
+    "    'num_epochs': 5,\n",
     "    'lr': 3e-4,\n",
     "    'weight_decay': 0.05,\n",
     "    'max_grad_norm': 1.0,\n",
+    "    'num_workers': 4,         # 4 for Colab, 8 for Kaggle\n",
+    "    'target_fstop': 2.0,      # Train on max bokeh (f/2.0)\n",
+    "    'max_samples': None,      # None=all 3958, or set 200 for quick test\n",
     "    'output_dir': './checkpoints',\n",
     "}\n",
     "\n",
     "import torch\n",
     "NUM_GPUS = torch.cuda.device_count()\n",
+    "DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
+    "print(f'Device: {DEVICE}' + (f' ({torch.cuda.get_device_name(0)})' if torch.cuda.is_available() else ''))\n",
     "if NUM_GPUS > 1:\n",
+    "    CONFIG['num_workers'] = 8\n",
+    "    print(f'Kaggle dual-GPU detected → {NUM_GPUS} GPUs, {CONFIG[\"num_workers\"]} workers')"
    ]
   },
   {
    "metadata": {},
    "outputs": [],
    "source": [
+    "#@title Step 3: Streaming Dataset — NO download, starts in ~3s\n",
+    "import asyncio, aiohttp, json, io, os, random, time, requests\n",
+    "from PIL import Image\n",
     "from torch.utils.data import Dataset, DataLoader\n",
     "from torchvision import transforms\n",
+    "from concurrent.futures import ThreadPoolExecutor\n",
     "\n",
+    "HF_BASE = 'https://huggingface.co/datasets/timseizinger/RealBokeh_3MP/resolve/main'\n",
+    "\n",
+    "# ---- Async metadata fetch (3960 JSONs in ~3s) ----\n",
+    "async def _fetch_all_metadata(split='train', concurrency=50):\n",
+    "    split_counts = {'train': 3960, 'validation': 220, 'test': 220}\n",
+    "    n = split_counts.get(split, 220)\n",
+    "    async def fetch_one(session, sem, sid):\n",
+    "        async with sem:\n",
+    "            url = f'{HF_BASE}/{split}/metadata/{sid}.json'\n",
+    "            try:\n",
+    "                async with session.get(url) as r:\n",
+    "                    if r.status == 200:\n",
+    "                        return await r.json(content_type=None)\n",
+    "            except:\n",
+    "                pass\n",
+    "            return None\n",
+    "    sem = asyncio.Semaphore(concurrency)\n",
+    "    conn = aiohttp.TCPConnector(limit=concurrency, force_close=False)\n",
+    "    async with aiohttp.ClientSession(connector=conn) as session:\n",
+    "        results = await asyncio.gather(*[fetch_one(session, sem, i) for i in range(1, n+1)])\n",
+    "    return [r for r in results if r is not None]\n",
+    "\n",
+    "def _build_pairs(metas, split, target_fstop=None):\n",
+    "    pairs = []\n",
+    "    for m in metas:\n",
+    "        for tgt_path, tgt_av in zip(m['target_images'], m['target_avs']):\n",
+    "            if target_fstop is not None and abs(tgt_av - target_fstop) > 0.05:\n",
     "                continue\n",
+    "            pairs.append({\n",
+    "                'input_path': f\"{split}/{m['source_image']}\",\n",
+    "                'gt_path':    f'{split}/{tgt_path}',\n",
+    "                'f_number':   tgt_av,\n",
+    "                'focal_mm':   float(m.get('focal_length', 50)),\n",
+    "                'focus_m':    float(m.get('focus_plane_distance', 2.0)),\n",
     "            })\n",
+    "    return pairs\n",
+    "\n",
+    "def _fetch_img(path):\n",
+    "    \"\"\"HTTP fetch image → PIL. No disk write.\"\"\"\n",
+    "    r = requests.get(f'{HF_BASE}/{path}', timeout=30)\n",
+    "    r.raise_for_status()\n",
+    "    return Image.open(io.BytesIO(r.content)).convert('RGB')\n",
+    "\n",
+    "class RealBokehStream(Dataset):\n",
+    "    \"\"\"Streaming dataset. Zero disk. Images fetched on-demand via HTTP.\"\"\"\n",
+    "    def __init__(self, split='train', crop_size=256, target_fstop=2.0, max_samples=None):\n",
+    "        t0 = time.time()\n",
+    "        # Async fetch all metadata (~3s)\n",
+    "        try:\n",
+    "            loop = asyncio.get_event_loop()\n",
+    "            if loop.is_running():  # Colab/Jupyter has running loop\n",
+    "                import nest_asyncio; nest_asyncio.apply()\n",
+    "        except RuntimeError:\n",
+    "            pass\n",
+    "        metas = asyncio.run(_fetch_all_metadata(split))\n",
+    "        self.pairs = _build_pairs(metas, split, target_fstop)\n",
+    "        random.shuffle(self.pairs)\n",
     "        if max_samples:\n",
+    "            self.pairs = self.pairs[:max_samples]\n",
+    "        self.crop_size = crop_size\n",
     "        self.to_tensor = transforms.ToTensor()\n",
+    "        print(f'  {split}: {len(self.pairs)} pairs ready in {time.time()-t0:.1f}s (zero disk)')\n",
+    "\n",
     "    def __len__(self):\n",
+    "        return len(self.pairs)\n",
+    "\n",
     "    def __getitem__(self, idx):\n",
+    "        p = self.pairs[idx]\n",
+    "        # Fetch input + GT concurrently (2 threads)\n",
+    "        with ThreadPoolExecutor(2) as ex:\n",
+    "            f1 = ex.submit(_fetch_img, p['input_path'])\n",
+    "            f2 = ex.submit(_fetch_img, p['gt_path'])\n",
+    "            inp, gt = f1.result(), f2.result()\n",
+    "\n",
+    "        # Synchronized random crop + flip on both images\n",
     "        cs = self.crop_size\n",
+    "        w, h = inp.size\n",
     "        if w >= cs and h >= cs:\n",
+    "            x, y = random.randint(0, w-cs), random.randint(0, h-cs)\n",
     "            inp = inp.crop((x, y, x+cs, y+cs))\n",
+    "            gt  = gt.crop((x, y, x+cs, y+cs))\n",
     "        else:\n",
     "            inp = inp.resize((cs, cs), Image.LANCZOS)\n",
+    "            gt  = gt.resize((cs, cs), Image.LANCZOS)\n",
     "        if random.random() > 0.5:\n",
     "            inp = inp.transpose(Image.FLIP_LEFT_RIGHT)\n",
+    "            gt  = gt.transpose(Image.FLIP_LEFT_RIGHT)\n",
+    "\n",
     "        return {\n",
+    "            'input':  self.to_tensor(inp),\n",
+    "            'target': self.to_tensor(gt),\n",
+    "            'f_number':        torch.tensor(p['f_number'], dtype=torch.float32),\n",
+    "            'focal_length_mm': torch.tensor(p['focal_mm'], dtype=torch.float32),\n",
+    "            'focus_distance_m':torch.tensor(p['focus_m'],  dtype=torch.float32),\n",
     "        }\n",
     "\n",
+    "# ---- Create dataset + loader ----\n",
+    "print('Fetching metadata (no images downloaded yet)...')\n",
+    "try:\n",
+    "    import nest_asyncio; nest_asyncio.apply()  # needed for Jupyter\n",
+    "except ImportError:\n",
+    "    !pip install -q nest_asyncio\n",
+    "    import nest_asyncio; nest_asyncio.apply()\n",
+    "\n",
+    "train_ds = RealBokehStream(\n",
+    "    split='train',\n",
     "    crop_size=CONFIG['crop_size'],\n",
     "    target_fstop=CONFIG['target_fstop'],\n",
+    "    max_samples=CONFIG['max_samples'],\n",
     ")\n",
     "\n",
     "train_loader = DataLoader(\n",
+    "    train_ds,\n",
     "    batch_size=CONFIG['batch_size'],\n",
     "    shuffle=True,\n",
     "    num_workers=CONFIG['num_workers'],\n",
+    "    prefetch_factor=2,\n",
+    "    persistent_workers=True,\n",
     "    drop_last=True,\n",
     ")\n",
+    "print(f'✓ DataLoader: {len(train_loader)} batches/epoch, {CONFIG[\"num_workers\"]} workers')\n",
+    "print(f'  Images streamed on-the-fly. Disk usage: 0 MB')"
    ]
   },
   {
    "metadata": {},
    "outputs": [],
    "source": [
+    "#@title Step 4: Sanity check — fetch 1 batch\n",
+    "import time\n",
+    "t0 = time.time()\n",
+    "batch = next(iter(train_loader))\n",
+    "t1 = time.time()\n",
+    "print(f'First batch fetched in {t1-t0:.1f}s')\n",
+    "print(f'  input:  {batch[\"input\"].shape}')\n",
+    "print(f'  target: {batch[\"target\"].shape}')\n",
+    "print(f'  f_number: {batch[\"f_number\"]}')\n",
+    "print(f'  focal_mm: {batch[\"focal_length_mm\"]}')\n",
+    "print(f'  focus_m:  {batch[\"focus_distance_m\"]}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#@title Step 5: Create model\n",
     "from bokehflow import BokehFlow, BokehFlowConfig, BokehFlowLoss, model_summary\n",
     "\n",
     "config = BokehFlowConfig(variant=CONFIG['variant'])\n",
     "model = BokehFlow(config)\n",
     "\n",
     "if NUM_GPUS > 1:\n",
     "    model = torch.nn.DataParallel(model)\n",
+    "    print(f'DataParallel on {NUM_GPUS} GPUs')\n",
+    "model = model.to(DEVICE)\n",
     "\n",
+    "total_params = sum(p.numel() for p in model.parameters())\n",
+    "print(f'\\n✓ BokehFlow-{CONFIG[\"variant\"].capitalize()}: {total_params:,} params on {DEVICE}')"
    ]
   },
   {
    "metadata": {},
    "outputs": [],
    "source": [
+    "#@title Step 6: Train!\n",
     "from tqdm.auto import tqdm\n",
+    "import torch.nn.functional as F\n",
     "\n",
+    "optimizer = torch.optim.AdamW(model.parameters(), lr=CONFIG['lr'], weight_decay=CONFIG['weight_decay'])\n",
+    "scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=CONFIG['num_epochs'] * len(train_loader))\n",
     "criterion = BokehFlowLoss(lambda_depth=0.5)\n",
     "os.makedirs(CONFIG['output_dir'], exist_ok=True)\n",
     "\n",
+    "print(f'Training: {CONFIG[\"num_epochs\"]} epochs × {len(train_loader)} batches')\n",
+    "print(f'Images streamed from HF Hub — no disk needed\\n')\n",
     "\n",
     "for epoch in range(CONFIG['num_epochs']):\n",
     "    model.train()\n",
+    "    running_loss = 0.0\n",
+    "    t_epoch = time.time()\n",
     "    pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{CONFIG[\"num_epochs\"]}')\n",
+    "\n",
     "    for step, batch in enumerate(pbar):\n",
+    "        inp = batch['input'].to(DEVICE)\n",
+    "        tgt = batch['target'].to(DEVICE)\n",
+    "        f_num = batch['f_number'].to(DEVICE)\n",
+    "        focal = batch['focal_length_mm'].to(DEVICE)\n",
+    "        focus = batch['focus_distance_m'].to(DEVICE)\n",
+    "\n",
+    "        out = model(inp, f_num, focal, focus)\n",
+    "        losses = criterion(out, {'bokeh_gt': tgt})\n",
     "        loss = losses['total']\n",
+    "\n",
     "        optimizer.zero_grad()\n",
     "        loss.backward()\n",
     "        torch.nn.utils.clip_grad_norm_(model.parameters(), CONFIG['max_grad_norm'])\n",
     "        optimizer.step()\n",
     "        scheduler.step()\n",
+    "\n",
+    "        running_loss += loss.item()\n",
+    "        pbar.set_postfix(loss=f'{loss.item():.4f}', lr=f'{scheduler.get_last_lr()[0]:.1e}')\n",
+    "\n",
+    "    avg = running_loss / len(train_loader)\n",
+    "    elapsed = time.time() - t_epoch\n",
+    "    print(f'  → avg_loss={avg:.4f}  time={elapsed:.0f}s  ({elapsed/len(train_loader):.1f}s/batch)')\n",
+    "\n",
     "    # Save checkpoint\n",
+    "    state = model.module.state_dict() if hasattr(model, 'module') else model.state_dict()\n",
+    "    ckpt = f'{CONFIG[\"output_dir\"]}/bokehflow_{CONFIG[\"variant\"]}_ep{epoch+1}.pt'\n",
+    "    torch.save({'epoch': epoch+1, 'model': state, 'loss': avg, 'config': CONFIG}, ckpt)\n",
+    "    print(f'  ✓ Saved {ckpt}')\n",
     "\n",
     "print(f'\\n✓ Training complete!')"
    ]
    "metadata": {},
    "outputs": [],
    "source": [
+    "#@title Step 7: Visualize result\n",
     "import matplotlib.pyplot as plt\n",
     "\n",
     "model.eval()\n",
+    "sample = train_ds[0]\n",
     "with torch.no_grad():\n",
     "    out = model(\n",
+    "        sample['input'].unsqueeze(0).to(DEVICE),\n",
+    "        sample['f_number'].unsqueeze(0).to(DEVICE),\n",
+    "        sample['focal_length_mm'].unsqueeze(0).to(DEVICE),\n",
+    "        sample['focus_distance_m'].unsqueeze(0).to(DEVICE),\n",
     "    )\n",
     "\n",
     "fig, axes = plt.subplots(1, 3, figsize=(15, 5))\n",
+    "axes[0].imshow(sample['input'].permute(1,2,0).cpu().numpy())\n",
+    "axes[0].set_title('Input (f/22 sharp)')\n",
+    "axes[1].imshow(out['bokeh'][0].permute(1,2,0).cpu().clamp(0,1).numpy())\n",
+    "axes[1].set_title('BokehFlow output')\n",
+    "axes[2].imshow(sample['target'].permute(1,2,0).cpu().numpy())\n",
+    "axes[2].set_title('Ground truth (f/2.0)')\n",
     "for ax in axes: ax.axis('off')\n",
     "plt.tight_layout()\n",
+    "plt.savefig('result.png', dpi=100, bbox_inches='tight')\n",
     "plt.show()\n",
+    "print('✓ Done!')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#@title (Optional) Push trained model to HuggingFace Hub\n",
+    "# from huggingface_hub import HfApi, login\n",
+    "# login()  # paste your HF token\n",
+    "# api = HfApi()\n",
+    "# api.upload_file(\n",
+    "#     path_or_fileobj=f'{CONFIG[\"output_dir\"]}/bokehflow_{CONFIG[\"variant\"]}_ep{CONFIG[\"num_epochs\"]}.pt',\n",
+    "#     path_in_repo=f'checkpoints/bokehflow_{CONFIG[\"variant\"]}.pt',\n",
+    "#     repo_id='YOUR_USERNAME/BokehFlow-trained',\n",
+    "# )"
    ]
   }
  ],
   "language_info": {
    "name": "python",
    "version": "3.10.0"
+  },
+  "accelerator": "GPU"
  },
  "nbformat": 4,
  "nbformat_minor": 4