rogermt commited on
Commit
370da72
·
verified ·
1 Parent(s): 39fb7ad

Upload config.yaml

Browse files
Files changed (1) hide show
  1. config.yaml +199 -0
config.yaml ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## config.yaml
2
+ ## Neural Sinkhorn Gradient Flow (NSGF++) Configuration
3
+ ## Based on arXiv:2401.14069
4
+
5
+ # ============================================================
6
+ # 2D Synthetic Experiments (Section 5.1, Appendix E.1)
7
+ # ============================================================
8
+ experiment_2d:
9
+ # Datasets: 8gaussians, moons, scurve, checkerboard, 8gaussians_moons
10
+ dataset: "8gaussians"
11
+ source: "gaussian" # source distribution: standard Gaussian N(0, I)
12
+
13
+ # MLP Architecture (Appendix E.1: 3 hidden layers, 256 hidden units)
14
+ model:
15
+ input_dim: 2
16
+ hidden_dim: 256
17
+ num_hidden_layers: 3
18
+ time_emb_dim: 64
19
+ activation: "silu"
20
+
21
+ # Sinkhorn gradient flow parameters
22
+ sinkhorn:
23
+ epsilon: 0.1 # regularization coefficient ε
24
+ blur: 0.5 # GeomLoss blur parameter (blur^p ~ ε)
25
+ scaling: 0.80 # GeomLoss multiscale scaling
26
+ eta: 1.0 # gradient flow step size η
27
+ num_steps: 10 # T: number of gradient flow time steps
28
+ batch_size: 256 # n: minibatch size for Sinkhorn flow
29
+
30
+ # Trajectory pool
31
+ pool:
32
+ num_batches: 200 # number of batches to build pool
33
+ experience_replay: true
34
+
35
+ # Velocity field matching training
36
+ training:
37
+ num_iterations: 20000
38
+ batch_size: 256
39
+ learning_rate: 0.001
40
+ optimizer: "adam"
41
+ beta1: 0.9
42
+ beta2: 0.999
43
+ weight_decay: 0.0
44
+
45
+ # Inference / Sampling
46
+ inference:
47
+ num_euler_steps: 10 # 10 or 100 Euler steps (uniform schedule)
48
+ num_samples: 1024 # samples for evaluation
49
+
50
+ # Evaluation
51
+ evaluation:
52
+ num_test_samples: 1024 # W2 computed against 1024 test samples
53
+ metric: "w2" # 2-Wasserstein distance
54
+
55
+ # ============================================================
56
+ # Image Benchmark Experiments (Section 5.2, Appendix E.2)
57
+ # ============================================================
58
+ experiment_mnist:
59
+ dataset: "mnist"
60
+ image_size: 28
61
+ in_channels: 1
62
+
63
+ # UNet Architecture (Appendix E.2, Dhariwal & Nichol 2021)
64
+ unet:
65
+ model_channels: 32 # base channels
66
+ num_res_blocks: 1 # depth = 1
67
+ channel_mult: [1, 2, 2]
68
+ num_heads: 1
69
+ num_head_channels: -1 # use num_heads instead
70
+ attention_resolutions: [16]
71
+ dropout: 0.0
72
+ use_scale_shift_norm: true # AdaGN
73
+
74
+ # Sinkhorn gradient flow (Phase 1)
75
+ sinkhorn:
76
+ blur: 0.5
77
+ scaling: 0.80
78
+ eta: 1.0
79
+ num_steps: 5 # T <= 5 for NSGF phase
80
+ batch_size: 256
81
+
82
+ # Trajectory pool (Appendix E.2: 256 batch * 1500 batches * 5 steps < 20GB)
83
+ pool:
84
+ num_batches: 1500
85
+ storage_limit_gb: 20
86
+
87
+ # Velocity field matching training (NSGF model)
88
+ nsgf_training:
89
+ num_iterations: 100000
90
+ batch_size: 128
91
+ learning_rate: 0.0001
92
+ optimizer: "adam"
93
+ beta1: 0.9
94
+ beta2: 0.999
95
+ weight_decay: 0.0
96
+
97
+ # Neural Straight Flow (Phase 2)
98
+ nsf_training:
99
+ num_iterations: 100000
100
+ batch_size: 128
101
+ learning_rate: 0.0001
102
+ optimizer: "adam"
103
+ beta1: 0.9
104
+ beta2: 0.999
105
+ weight_decay: 0.0
106
+
107
+ # Phase-transition time predictor (CNN)
108
+ time_predictor:
109
+ conv_channels: [32, 64, 128, 256]
110
+ kernel_size: 3
111
+ stride: 1
112
+ padding: 1
113
+ pool_size: 2
114
+ num_iterations: 40000
115
+ learning_rate: 0.0001
116
+ batch_size: 128
117
+
118
+ # Inference
119
+ inference:
120
+ nsgf_steps: 5 # 5-step Euler in NSGF phase
121
+ nsf_steps: 55 # remaining steps for straight flow
122
+ total_nfe: 60 # total NFE = nsgf_steps + nsf_steps
123
+
124
+ # Evaluation (Appendix E.2: FID between 10K gen and test)
125
+ evaluation:
126
+ num_generated: 10000
127
+ metrics: ["fid"]
128
+
129
+ experiment_cifar10:
130
+ dataset: "cifar10"
131
+ image_size: 32
132
+ in_channels: 3
133
+
134
+ # UNet Architecture (Appendix E.2)
135
+ unet:
136
+ model_channels: 128 # base channels
137
+ num_res_blocks: 2 # depth = 2
138
+ channel_mult: [1, 2, 2, 2]
139
+ num_heads: 4
140
+ num_head_channels: 64
141
+ attention_resolutions: [16]
142
+ dropout: 0.0
143
+ use_scale_shift_norm: true
144
+
145
+ # Sinkhorn gradient flow (Phase 1)
146
+ sinkhorn:
147
+ blur: 1.0
148
+ scaling: 0.85
149
+ eta: 1.0
150
+ num_steps: 5
151
+ batch_size: 128
152
+
153
+ # Trajectory pool (Appendix E.2: 128 batch * 2500 batches * 5 steps ~ 45GB)
154
+ pool:
155
+ num_batches: 2500
156
+ storage_limit_gb: 45
157
+
158
+ # Velocity field matching training (NSGF model)
159
+ nsgf_training:
160
+ num_iterations: 200000
161
+ batch_size: 128
162
+ learning_rate: 0.0001
163
+ optimizer: "adam"
164
+ beta1: 0.9
165
+ beta2: 0.999
166
+ weight_decay: 0.0
167
+
168
+ # Neural Straight Flow (Phase 2)
169
+ nsf_training:
170
+ num_iterations: 200000
171
+ batch_size: 128
172
+ learning_rate: 0.0001
173
+ optimizer: "adam"
174
+ beta1: 0.9
175
+ beta2: 0.999
176
+ weight_decay: 0.0
177
+
178
+ # Phase-transition time predictor (same CNN architecture)
179
+ time_predictor:
180
+ conv_channels: [32, 64, 128, 256]
181
+ kernel_size: 3
182
+ stride: 1
183
+ padding: 1
184
+ pool_size: 2
185
+ num_iterations: 40000
186
+ learning_rate: 0.0001
187
+ batch_size: 128
188
+
189
+ # Inference
190
+ inference:
191
+ nsgf_steps: 5
192
+ nsf_steps: 54
193
+ total_nfe: 59 # paper reports NFE=59 for CIFAR-10
194
+
195
+ # Evaluation
196
+ evaluation:
197
+ num_generated: 10000
198
+ metrics: ["fid", "is"]
199
+ # Paper target: FID=5.55, IS=8.86