| ## config.yaml | |
| ## Neural Sinkhorn Gradient Flow (NSGF++) Configuration | |
| ## Based on arXiv:2401.14069 | |
| # ============================================================ | |
| # 2D Synthetic Experiments (Section 5.1, Appendix E.1) | |
| # ============================================================ | |
| experiment_2d: | |
| # Datasets: 8gaussians, moons, scurve, checkerboard, 8gaussians_moons | |
| dataset: "8gaussians" | |
| source: "gaussian" # source distribution: standard Gaussian N(0, I) | |
| # MLP Architecture (Appendix E.1: 3 hidden layers, 256 hidden units) | |
| model: | |
| input_dim: 2 | |
| hidden_dim: 256 | |
| num_hidden_layers: 3 | |
| time_emb_dim: 64 | |
| activation: "silu" | |
| # Sinkhorn gradient flow parameters | |
| sinkhorn: | |
| epsilon: 0.1 # regularization coefficient ε | |
| blur: 0.5 # GeomLoss blur parameter (blur^p ~ ε) | |
| scaling: 0.80 # GeomLoss multiscale scaling | |
| eta: 1.0 # gradient flow step size η | |
| num_steps: 10 # T: number of gradient flow time steps | |
| batch_size: 256 # n: minibatch size for Sinkhorn flow | |
| # Trajectory pool | |
| pool: | |
| num_batches: 200 # number of batches to build pool | |
| experience_replay: true | |
| # Velocity field matching training | |
| training: | |
| num_iterations: 20000 | |
| batch_size: 256 | |
| learning_rate: 0.001 | |
| optimizer: "adam" | |
| beta1: 0.9 | |
| beta2: 0.999 | |
| weight_decay: 0.0 | |
| # Inference / Sampling | |
| inference: | |
| num_euler_steps: 10 # 10 or 100 Euler steps (uniform schedule) | |
| num_samples: 1024 # samples for evaluation | |
| # Evaluation | |
| evaluation: | |
| num_test_samples: 1024 # W2 computed against 1024 test samples | |
| metric: "w2" # 2-Wasserstein distance | |
| # ============================================================ | |
| # Image Benchmark Experiments (Section 5.2, Appendix E.2) | |
| # ============================================================ | |
| experiment_mnist: | |
| dataset: "mnist" | |
| image_size: 28 | |
| in_channels: 1 | |
| # UNet Architecture (Appendix E.2, Dhariwal & Nichol 2021) | |
| unet: | |
| model_channels: 32 # base channels | |
| num_res_blocks: 1 # depth = 1 | |
| channel_mult: [1, 2, 2] | |
| num_heads: 1 | |
| num_head_channels: -1 # use num_heads instead | |
| attention_resolutions: [16] | |
| dropout: 0.0 | |
| use_scale_shift_norm: true # AdaGN | |
| # Sinkhorn gradient flow (Phase 1) | |
| sinkhorn: | |
| blur: 0.5 | |
| scaling: 0.80 | |
| eta: 1.0 | |
| num_steps: 5 # T <= 5 for NSGF phase | |
| batch_size: 256 | |
| # Trajectory pool (Appendix E.2: 256 batch * 1500 batches * 5 steps < 20GB) | |
| pool: | |
| num_batches: 1500 | |
| storage_limit_gb: 20 | |
| # Velocity field matching training (NSGF model) | |
| nsgf_training: | |
| num_iterations: 100000 | |
| batch_size: 128 | |
| learning_rate: 0.0001 | |
| optimizer: "adam" | |
| beta1: 0.9 | |
| beta2: 0.999 | |
| weight_decay: 0.0 | |
| # Neural Straight Flow (Phase 2) | |
| nsf_training: | |
| num_iterations: 100000 | |
| batch_size: 128 | |
| learning_rate: 0.0001 | |
| optimizer: "adam" | |
| beta1: 0.9 | |
| beta2: 0.999 | |
| weight_decay: 0.0 | |
| # Phase-transition time predictor (CNN) | |
| time_predictor: | |
| conv_channels: [32, 64, 128, 256] | |
| kernel_size: 3 | |
| stride: 1 | |
| padding: 1 | |
| pool_size: 2 | |
| num_iterations: 40000 | |
| learning_rate: 0.0001 | |
| batch_size: 128 | |
| # Inference | |
| inference: | |
| nsgf_steps: 5 # 5-step Euler in NSGF phase | |
| nsf_steps: 55 # remaining steps for straight flow | |
| total_nfe: 60 # total NFE = nsgf_steps + nsf_steps | |
| # Evaluation (Appendix E.2: FID between 10K gen and test) | |
| evaluation: | |
| num_generated: 10000 | |
| metrics: ["fid"] | |
| experiment_cifar10: | |
| dataset: "cifar10" | |
| image_size: 32 | |
| in_channels: 3 | |
| # UNet Architecture (Appendix E.2) | |
| unet: | |
| model_channels: 128 # base channels | |
| num_res_blocks: 2 # depth = 2 | |
| channel_mult: [1, 2, 2, 2] | |
| num_heads: 4 | |
| num_head_channels: 64 | |
| attention_resolutions: [16] | |
| dropout: 0.0 | |
| use_scale_shift_norm: true | |
| # Sinkhorn gradient flow (Phase 1) | |
| # NOTE: batch_size reduced from paper's 128 to 32 for T4 16GB VRAM. | |
| # Sinkhorn on 3072-dim flattened vectors (3x32x32) with tensorized backend | |
| # uses O(N^2 * D) memory. 128 samples OOMs on T4; 32 fits comfortably. | |
| # Compensate by increasing pool batches (32 * 10000 = 320K ≈ 128 * 2500). | |
| sinkhorn: | |
| blur: 1.0 | |
| scaling: 0.85 | |
| eta: 1.0 | |
| num_steps: 5 | |
| batch_size: 32 | |
| # Trajectory pool — adjusted for smaller Sinkhorn batch | |
| # 32 batch * 10000 batches * 5 steps = 1.6M entries (same order as paper) | |
| pool: | |
| num_batches: 10000 | |
| storage_limit_gb: 45 | |
| # Velocity field matching training (NSGF model) | |
| nsgf_training: | |
| num_iterations: 200000 | |
| batch_size: 128 | |
| learning_rate: 0.0001 | |
| optimizer: "adam" | |
| beta1: 0.9 | |
| beta2: 0.999 | |
| weight_decay: 0.0 | |
| # Neural Straight Flow (Phase 2) | |
| nsf_training: | |
| num_iterations: 200000 | |
| batch_size: 128 | |
| learning_rate: 0.0001 | |
| optimizer: "adam" | |
| beta1: 0.9 | |
| beta2: 0.999 | |
| weight_decay: 0.0 | |
| # Phase-transition time predictor (same CNN architecture) | |
| time_predictor: | |
| conv_channels: [32, 64, 128, 256] | |
| kernel_size: 3 | |
| stride: 1 | |
| padding: 1 | |
| pool_size: 2 | |
| num_iterations: 40000 | |
| learning_rate: 0.0001 | |
| batch_size: 128 | |
| # Inference | |
| inference: | |
| nsgf_steps: 5 | |
| nsf_steps: 54 | |
| total_nfe: 59 # paper reports NFE=59 for CIFAR-10 | |
| # Evaluation | |
| evaluation: | |
| num_generated: 10000 | |
| metrics: ["fid", "is"] | |
| # Paper target: FID=5.55, IS=8.86 | |