asdf98
/

BokehFlow

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 🎬 BokehFlow Training Notebook\n",
+    "## Train on Free Colab T4 or Kaggle Dual-GPU\n",
+    "\n",
+    "**Just run all cells.** Default config trains BokehFlow-Nano on RealBokeh dataset.\n",
+    "\n",
+    "| Platform | GPU | VRAM | Expected Time (1 epoch) |\n",
+    "|----------|-----|------|------------------------|\n",
+    "| Colab Free | T4 | 16GB | ~45 min |\n",
+    "| Kaggle | 2×T4 | 2×16GB | ~25 min |\n",
+    "| Colab Pro | A100 | 40GB | ~10 min |"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ============================================================\n",
+    "# STEP 0: Install dependencies\n",
+    "# ============================================================\n",
+    "!pip install -q torch torchvision Pillow huggingface_hub tqdm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ============================================================\n",
+    "# STEP 1: Download BokehFlow architecture\n",
+    "# ============================================================\n",
+    "from huggingface_hub import hf_hub_download\n",
+    "hf_hub_download(repo_id='asdf98/BokehFlow', filename='bokehflow.py', local_dir='.')\n",
+    "print('✓ BokehFlow downloaded')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ============================================================\n",
+    "# STEP 2: Configuration — CHANGE THESE IF YOU WANT\n",
+    "# ============================================================\n",
+    "CONFIG = {\n",
+    "    # Model\n",
+    "    'variant': 'nano',        # 'nano'=583K params, 'small'=3.1M, 'base'=12M\n",
+    "    \n",
+    "    # Training\n",
+    "    'batch_size': 4,          # 4 for T4 16GB, 8 for A100\n",
+    "    'crop_size': 256,         # 256x256 random crops\n",
+    "    'num_epochs': 5,          # 5 epochs for demo, 50+ for full training\n",
+    "    'lr': 3e-4,\n",
+    "    'weight_decay': 0.05,\n",
+    "    'max_grad_norm': 1.0,\n",
+    "    \n",
+    "    # Data\n",
+    "    'num_workers': 2,         # 2 for Colab, 4 for Kaggle\n",
+    "    'max_train_samples': 500, # Limit for quick test. Set None for full dataset.\n",
+    "    \n",
+    "    # Target f-stop (train on f/2.0 bokeh)\n",
+    "    'target_fstop': 2.0,\n",
+    "    \n",
+    "    # Save\n",
+    "    'save_every': 1,          # Save checkpoint every N epochs\n",
+    "    'output_dir': './checkpoints',\n",
+    "}\n",
+    "\n",
+    "# Auto-detect Kaggle dual GPU\n",
+    "import torch\n",
+    "NUM_GPUS = torch.cuda.device_count()\n",
+    "print(f'GPUs: {NUM_GPUS}, Device: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else \"CPU\"}')\n",
+    "if NUM_GPUS > 1:\n",
+    "    print(f'Kaggle dual-GPU detected! Will use DataParallel.')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ============================================================\n",
+    "# STEP 3: Dataset — Download RealBokeh (raw images, ~19GB)\n",
+    "# For free Colab/Kaggle, we use the HF Hub API to stream\n",
+    "# ============================================================\n",
+    "import os, json, re, glob\n",
+    "from pathlib import Path\n",
+    "from huggingface_hub import snapshot_download\n",
+    "\n",
+    "# Only download the train split input images + f/2.0 GT + metadata\n",
+    "# This saves bandwidth vs full 19GB\n",
+    "DATA_DIR = './realbokeh'\n",
+    "\n",
+    "if not os.path.exists(f'{DATA_DIR}/train/in'):\n",
+    "    print('Downloading RealBokeh train split (input + metadata)...')\n",
+    "    print('This downloads ~5GB. On Colab it takes ~3-5 minutes.')\n",
+    "    snapshot_download(\n",
+    "        repo_id='timseizinger/RealBokeh_3MP',\n",
+    "        repo_type='dataset',\n",
+    "        local_dir=DATA_DIR,\n",
+    "        allow_patterns=['train/in/*', 'train/metadata/*', 'train/gt/*/f2.0*',\n",
+    "                        'train/gt/*/*_f2.0*',\n",
+    "                        'validation/in/*', 'validation/metadata/*', \n",
+    "                        'validation/gt/*/*_f2.0*'],\n",
+    "    )\n",
+    "    print('✓ Dataset downloaded')\n",
+    "else:\n",
+    "    print('✓ Dataset already exists')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ============================================================\n",
+    "# STEP 4: PyTorch Dataset class for RealBokeh\n",
+    "# ============================================================\n",
+    "import torch\n",
+    "from torch.utils.data import Dataset, DataLoader\n",
+    "from torchvision import transforms\n",
+    "from PIL import Image\n",
+    "import random\n",
+    "\n",
+    "class RealBokehDataset(Dataset):\n",
+    "    \"\"\"RealBokeh dataset for BokehFlow training.\n",
+    "    \n",
+    "    Each sample returns:\n",
+    "        input_img: (3, crop_size, crop_size) sharp f/22 image\n",
+    "        target_img: (3, crop_size, crop_size) bokeh GT at target f-stop\n",
+    "        f_number: scalar f-stop value\n",
+    "        focal_length_mm: scalar focal length\n",
+    "        focus_distance_m: scalar focus distance in meters\n",
+    "    \"\"\"\n",
+    "    \n",
+    "    def __init__(self, data_dir, split='train', crop_size=256, \n",
+    "                 target_fstop=2.0, max_samples=None):\n",
+    "        self.data_dir = Path(data_dir) / split\n",
+    "        self.crop_size = crop_size\n",
+    "        self.target_fstop = target_fstop\n",
+    "        \n",
+    "        # Load metadata\n",
+    "        self.samples = []\n",
+    "        meta_dir = self.data_dir / 'metadata'\n",
+    "        if not meta_dir.exists():\n",
+    "            raise FileNotFoundError(f'No metadata at {meta_dir}')\n",
+    "        \n",
+    "        for meta_file in sorted(meta_dir.glob('*.json')):\n",
+    "            with open(meta_file) as f:\n",
+    "                meta = json.load(f)\n",
+    "            \n",
+    "            # Find target f-stop image\n",
+    "            fstop_str = f'f{target_fstop}'\n",
+    "            gt_path = None\n",
+    "            for img, av in zip(meta['target_images'], meta['target_avs']):\n",
+    "                if abs(av - target_fstop) < 0.01:\n",
+    "                    gt_path = self.data_dir / img\n",
+    "                    break\n",
+    "            \n",
+    "            if gt_path is None or not gt_path.exists():\n",
+    "                continue\n",
+    "            \n",
+    "            input_path = self.data_dir / meta['source_image']\n",
+    "            if not input_path.exists():\n",
+    "                continue\n",
+    "            \n",
+    "            self.samples.append({\n",
+    "                'input': str(input_path),\n",
+    "                'target': str(gt_path),\n",
+    "                'f_number': target_fstop,\n",
+    "                'focal_length_mm': float(meta['focal_length']),\n",
+    "                'focus_distance_m': float(meta['focus_plane_distance']),\n",
+    "            })\n",
+    "        \n",
+    "        if max_samples:\n",
+    "            self.samples = self.samples[:max_samples]\n",
+    "        \n",
+    "        print(f'{split}: {len(self.samples)} paired samples found')\n",
+    "        \n",
+    "        self.to_tensor = transforms.ToTensor()\n",
+    "    \n",
+    "    def __len__(self):\n",
+    "        return len(self.samples)\n",
+    "    \n",
+    "    def __getitem__(self, idx):\n",
+    "        s = self.samples[idx]\n",
+    "        \n",
+    "        # Load images\n",
+    "        inp = Image.open(s['input']).convert('RGB')\n",
+    "        tgt = Image.open(s['target']).convert('RGB')\n",
+    "        \n",
+    "        # Random crop (same crop for both)\n",
+    "        w, h = inp.size\n",
+    "        cs = self.crop_size\n",
+    "        if w >= cs and h >= cs:\n",
+    "            x = random.randint(0, w - cs)\n",
+    "            y = random.randint(0, h - cs)\n",
+    "            inp = inp.crop((x, y, x+cs, y+cs))\n",
+    "            tgt = tgt.crop((x, y, x+cs, y+cs))\n",
+    "        else:\n",
+    "            inp = inp.resize((cs, cs), Image.LANCZOS)\n",
+    "            tgt = tgt.resize((cs, cs), Image.LANCZOS)\n",
+    "        \n",
+    "        # Random horizontal flip\n",
+    "        if random.random() > 0.5:\n",
+    "            inp = inp.transpose(Image.FLIP_LEFT_RIGHT)\n",
+    "            tgt = tgt.transpose(Image.FLIP_LEFT_RIGHT)\n",
+    "        \n",
+    "        inp_t = self.to_tensor(inp)  # [0,1] range\n",
+    "        tgt_t = self.to_tensor(tgt)\n",
+    "        \n",
+    "        return {\n",
+    "            'input': inp_t,\n",
+    "            'target': tgt_t,\n",
+    "            'f_number': torch.tensor(s['f_number'], dtype=torch.float32),\n",
+    "            'focal_length_mm': torch.tensor(s['focal_length_mm'], dtype=torch.float32),\n",
+    "            'focus_distance_m': torch.tensor(s['focus_distance_m'], dtype=torch.float32),\n",
+    "        }\n",
+    "\n",
+    "# Create datasets\n",
+    "train_ds = RealBokehDataset(\n",
+    "    DATA_DIR, split='train', \n",
+    "    crop_size=CONFIG['crop_size'],\n",
+    "    target_fstop=CONFIG['target_fstop'],\n",
+    "    max_samples=CONFIG['max_train_samples'],\n",
+    ")\n",
+    "\n",
+    "train_loader = DataLoader(\n",
+    "    train_ds, \n",
+    "    batch_size=CONFIG['batch_size'],\n",
+    "    shuffle=True,\n",
+    "    num_workers=CONFIG['num_workers'],\n",
+    "    pin_memory=True,\n",
+    "    drop_last=True,\n",
+    ")\n",
+    "\n",
+    "print(f'\\n✓ DataLoader ready: {len(train_loader)} batches per epoch')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ============================================================\n",
+    "# STEP 5: Create model\n",
+    "# ============================================================\n",
+    "from bokehflow import BokehFlow, BokehFlowConfig, BokehFlowLoss, model_summary\n",
+    "\n",
+    "config = BokehFlowConfig(variant=CONFIG['variant'])\n",
+    "model = BokehFlow(config)\n",
+    "\n",
+    "# Multi-GPU support for Kaggle\n",
+    "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
+    "if NUM_GPUS > 1:\n",
+    "    model = torch.nn.DataParallel(model)\n",
+    "    print(f'Using DataParallel on {NUM_GPUS} GPUs')\n",
+    "model = model.to(device)\n",
+    "\n",
+    "# Print summary\n",
+    "print(model_summary(config))\n",
+    "print(f'Device: {device}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ============================================================\n",
+    "# STEP 6: Training loop\n",
+    "# ============================================================\n",
+    "import torch.nn.functional as F\n",
+    "from tqdm.auto import tqdm\n",
+    "import time\n",
+    "\n",
+    "optimizer = torch.optim.AdamW(\n",
+    "    model.parameters(), \n",
+    "    lr=CONFIG['lr'], \n",
+    "    weight_decay=CONFIG['weight_decay']\n",
+    ")\n",
+    "\n",
+    "scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(\n",
+    "    optimizer, T_max=CONFIG['num_epochs'] * len(train_loader)\n",
+    ")\n",
+    "\n",
+    "criterion = BokehFlowLoss(lambda_depth=0.5)\n",
+    "\n",
+    "os.makedirs(CONFIG['output_dir'], exist_ok=True)\n",
+    "\n",
+    "# Training\n",
+    "print(f'\\n{\"=\"*60}')\n",
+    "print(f'Starting training: {CONFIG[\"num_epochs\"]} epochs')\n",
+    "print(f'{\"=\"*60}\\n')\n",
+    "\n",
+    "for epoch in range(CONFIG['num_epochs']):\n",
+    "    model.train()\n",
+    "    epoch_loss = 0.0\n",
+    "    epoch_start = time.time()\n",
+    "    \n",
+    "    pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{CONFIG[\"num_epochs\"]}')\n",
+    "    for step, batch in enumerate(pbar):\n",
+    "        # Move to device\n",
+    "        inp = batch['input'].to(device)\n",
+    "        tgt = batch['target'].to(device)\n",
+    "        f_num = batch['f_number'].to(device)\n",
+    "        focal = batch['focal_length_mm'].to(device)\n",
+    "        focus = batch['focus_distance_m'].to(device)\n",
+    "        \n",
+    "        # Forward\n",
+    "        output = model(inp, f_num, focal, focus)\n",
+    "        \n",
+    "        # Loss\n",
+    "        losses = criterion(\n",
+    "            output if not isinstance(output, dict) else output,\n",
+    "            {'bokeh_gt': tgt}\n",
+    "        )\n",
+    "        loss = losses['total']\n",
+    "        \n",
+    "        # Backward\n",
+    "        optimizer.zero_grad()\n",
+    "        loss.backward()\n",
+    "        torch.nn.utils.clip_grad_norm_(model.parameters(), CONFIG['max_grad_norm'])\n",
+    "        optimizer.step()\n",
+    "        scheduler.step()\n",
+    "        \n",
+    "        epoch_loss += loss.item()\n",
+    "        pbar.set_postfix({\n",
+    "            'loss': f'{loss.item():.4f}',\n",
+    "            'lr': f'{scheduler.get_last_lr()[0]:.2e}',\n",
+    "        })\n",
+    "    \n",
+    "    avg_loss = epoch_loss / len(train_loader)\n",
+    "    elapsed = time.time() - epoch_start\n",
+    "    print(f'Epoch {epoch+1}: avg_loss={avg_loss:.4f}, time={elapsed:.0f}s')\n",
+    "    \n",
+    "    # Save checkpoint\n",
+    "    if (epoch + 1) % CONFIG['save_every'] == 0:\n",
+    "        ckpt_path = f'{CONFIG[\"output_dir\"]}/bokehflow_{CONFIG[\"variant\"]}_epoch{epoch+1}.pt'\n",
+    "        state = model.module.state_dict() if hasattr(model, 'module') else model.state_dict()\n",
+    "        torch.save({\n",
+    "            'epoch': epoch + 1,\n",
+    "            'model_state_dict': state,\n",
+    "            'optimizer_state_dict': optimizer.state_dict(),\n",
+    "            'loss': avg_loss,\n",
+    "            'config': CONFIG,\n",
+    "        }, ckpt_path)\n",
+    "        print(f'  ✓ Saved checkpoint: {ckpt_path}')\n",
+    "\n",
+    "print(f'\\n✓ Training complete!')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ============================================================\n",
+    "# STEP 7: Quick inference test\n",
+    "# ============================================================\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "model.eval()\n",
+    "with torch.no_grad():\n",
+    "    sample = train_ds[0]\n",
+    "    inp = sample['input'].unsqueeze(0).to(device)\n",
+    "    out = model(\n",
+    "        inp,\n",
+    "        sample['f_number'].unsqueeze(0).to(device),\n",
+    "        sample['focal_length_mm'].unsqueeze(0).to(device),\n",
+    "        sample['focus_distance_m'].unsqueeze(0).to(device),\n",
+    "    )\n",
+    "\n",
+    "fig, axes = plt.subplots(1, 3, figsize=(15, 5))\n",
+    "axes[0].imshow(sample['input'].permute(1,2,0).numpy())\n",
+    "axes[0].set_title('Input (f/22)')\n",
+    "axes[1].imshow(out['bokeh'][0].cpu().permute(1,2,0).clamp(0,1).numpy())\n",
+    "axes[1].set_title('BokehFlow Output')\n",
+    "axes[2].imshow(sample['target'].permute(1,2,0).numpy())\n",
+    "axes[2].set_title('Ground Truth (f/2.0)')\n",
+    "for ax in axes: ax.axis('off')\n",
+    "plt.tight_layout()\n",
+    "plt.savefig('result.png', dpi=100)\n",
+    "plt.show()\n",
+    "print('✓ Inference test complete')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.10.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}