KadirYilmaz commited on
Commit
8ee3bf4
·
verified ·
1 Parent(s): fdbec2a

Add joint_training_small outputs

Browse files
.gitattributes CHANGED
@@ -37,3 +37,8 @@ Volt_experiments/single_dataset/nuscenes/train.log filter=lfs diff=lfs merge=lfs
37
  Volt_experiments/single_dataset/scannet/train.log filter=lfs diff=lfs merge=lfs -text
38
  Volt_experiments/single_dataset/scannet200/train.log filter=lfs diff=lfs merge=lfs -text
39
  Volt_experiments/single_dataset/waymo/train.log filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
37
  Volt_experiments/single_dataset/scannet/train.log filter=lfs diff=lfs merge=lfs -text
38
  Volt_experiments/single_dataset/scannet200/train.log filter=lfs diff=lfs merge=lfs -text
39
  Volt_experiments/single_dataset/waymo/train.log filter=lfs diff=lfs merge=lfs -text
40
+ Volt_experiments/joint_training_small/nuscenes/train.log filter=lfs diff=lfs merge=lfs -text
41
+ Volt_experiments/joint_training_small/scannet/train.log filter=lfs diff=lfs merge=lfs -text
42
+ Volt_experiments/joint_training_small/scannet200/train.log filter=lfs diff=lfs merge=lfs -text
43
+ Volt_experiments/joint_training_small/semantic_kitti/train.log filter=lfs diff=lfs merge=lfs -text
44
+ Volt_experiments/joint_training_small/waymo/train.log filter=lfs diff=lfs merge=lfs -text
Volt_experiments/joint_training_small/nuscenes/config.py ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ weight = None
2
+ resume = False
3
+ evaluate = True
4
+ test_only = False
5
+ seed = 12644538
6
+ save_path = 'exp/nuscenes/2026-05-04_005622'
7
+ num_worker = 24
8
+ batch_size = 16
9
+ gradient_accumulation_steps = 1
10
+ batch_size_val = None
11
+ batch_size_test = None
12
+ epoch = 40
13
+ eval_epoch = 40
14
+ clip_grad = None
15
+ use_ema = True
16
+ ema_decay = 0.999
17
+ dataset_ratios = None
18
+ sync_bn = False
19
+ enable_amp = True
20
+ amp_dtype = 'float16'
21
+ empty_cache = False
22
+ empty_cache_per_epoch = False
23
+ find_unused_parameters = True
24
+ enable_wandb = True
25
+ wandb_project = 'Volt'
26
+ wandb_key = None
27
+ mix_prob = 0.2
28
+ param_dicts = None
29
+ hooks = [
30
+ dict(type='CheckpointLoader'),
31
+ dict(type='ModelHook'),
32
+ dict(type='IterationTimer', warmup_iter=2),
33
+ dict(type='InformationWriter'),
34
+ dict(type='SemSegEvaluator'),
35
+ dict(type='CheckpointSaver', save_freq=None),
36
+ dict(type='PreciseEvaluator', test_last=False)
37
+ ]
38
+ train = dict(type='MultiDatasetTrainer')
39
+ test = dict(type='SemSegTester', verbose=True)
40
+ model = dict(
41
+ type='DefaultSegmentorV2',
42
+ backbone_out_channels=128,
43
+ backbone=dict(
44
+ type='Volt',
45
+ in_channels=4,
46
+ embed_dim=384,
47
+ depth=12,
48
+ num_heads=6,
49
+ mlp_ratio=4,
50
+ init_values=None,
51
+ qk_norm=True,
52
+ drop_path=0.3,
53
+ stride=5,
54
+ kernel_size=5,
55
+ increase_drop_path=True,
56
+ up_mlp_dim=128),
57
+ teacher=dict(
58
+ type='DefaultSegmentorV2',
59
+ backbone=dict(
60
+ type='SpUNet-v1m3',
61
+ in_channels=4,
62
+ num_classes=0,
63
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
64
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
65
+ enc_mode=False,
66
+ conditions=('nuScenes', 'SemanticKITTI', 'Waymo'),
67
+ zero_init=False,
68
+ norm_decouple=True,
69
+ norm_adaptive=False,
70
+ norm_affine=True),
71
+ backbone_out_channels=96,
72
+ conditions=('nuScenes', 'SemanticKITTI', 'Waymo'),
73
+ num_classes=(16, 19, 22)),
74
+ teacher_weights='exp/nuscenes/2026-02-25_010147/model/model_best.pth',
75
+ criteria=[
76
+ dict(
77
+ type='CrossEntropyLoss',
78
+ loss_weight=1.0,
79
+ label_smoothing=0.1,
80
+ ignore_index=-1),
81
+ dict(
82
+ type='LovaszLoss',
83
+ mode='multiclass',
84
+ loss_weight=1.0,
85
+ ignore_index=-1)
86
+ ],
87
+ conditions=('nuScenes', 'SemanticKITTI', 'Waymo'),
88
+ num_classes=(16, 19, 22))
89
+ optimizer = dict(type='AdamW', lr=0.002, weight_decay=0.05)
90
+ scheduler = dict(
91
+ type='OneCycleLR',
92
+ max_lr=0.002,
93
+ pct_start=0.04,
94
+ anneal_strategy='cos',
95
+ div_factor=10.0,
96
+ final_div_factor=100.0)
97
+ ignore_index = -1
98
+ names = [
99
+ 'barrier', 'bicycle', 'bus', 'car', 'construction_vehicle', 'motorcycle',
100
+ 'pedestrian', 'traffic_cone', 'trailer', 'truck', 'driveable_surface',
101
+ 'other_flat', 'sidewalk', 'terrain', 'manmade', 'vegetation'
102
+ ]
103
+ data = dict(
104
+ num_classes=16,
105
+ ignore_index=-1,
106
+ names=[
107
+ 'barrier', 'bicycle', 'bus', 'car', 'construction_vehicle',
108
+ 'motorcycle', 'pedestrian', 'traffic_cone', 'trailer', 'truck',
109
+ 'driveable_surface', 'other_flat', 'sidewalk', 'terrain', 'manmade',
110
+ 'vegetation'
111
+ ],
112
+ train=dict(
113
+ type='ConcatDataset',
114
+ datasets=[
115
+ dict(
116
+ type='NuScenesDataset',
117
+ split='train',
118
+ data_root='data/nuscenes',
119
+ transform=[
120
+ dict(
121
+ type='RandomRotate',
122
+ angle=[-1, 1],
123
+ axis='z',
124
+ center=[0, 0, 0],
125
+ p=0.5),
126
+ dict(
127
+ type='RandomRotate',
128
+ angle=[-0.015625, 0.015625],
129
+ axis='x',
130
+ p=0.5),
131
+ dict(
132
+ type='RandomRotate',
133
+ angle=[-0.015625, 0.015625],
134
+ axis='y',
135
+ p=0.5),
136
+ dict(
137
+ type='PointClipDistance',
138
+ max_dist=70.0,
139
+ z_min=-4.0,
140
+ z_max=2.0),
141
+ dict(type='RandomScale', scale=[0.9, 1.1]),
142
+ dict(type='RandomFlip', p=0.5),
143
+ dict(type='RandomJitter', sigma=0.005, clip=0.02),
144
+ dict(
145
+ type='GridSample',
146
+ grid_size=0.05,
147
+ hash_type='fnv',
148
+ mode='train',
149
+ return_grid_coord=True),
150
+ dict(type='SphereCrop', sample_rate=0.6, mode='random'),
151
+ dict(type='Update', keys_dict=dict(condition='nuScenes')),
152
+ dict(type='ToTensor'),
153
+ dict(
154
+ type='Collect',
155
+ keys=('coord', 'grid_coord', 'segment', 'condition'),
156
+ feat_keys=('coord', 'strength'))
157
+ ],
158
+ test_mode=False,
159
+ ignore_index=-1,
160
+ loop=2),
161
+ dict(
162
+ type='SemanticKITTIDataset',
163
+ split='train',
164
+ data_root='data/semantic_kitti',
165
+ transform=[
166
+ dict(
167
+ type='RandomDropout',
168
+ dropout_ratio=0.2,
169
+ dropout_application_ratio=0.2),
170
+ dict(
171
+ type='InstanceCutMix',
172
+ db_path='data/semantic_kitti_instances/train.h5'),
173
+ dict(
174
+ type='RandomRotate',
175
+ angle=[-1, 1],
176
+ axis='z',
177
+ center=[0, 0, 0],
178
+ p=0.5),
179
+ dict(
180
+ type='RandomRotate',
181
+ angle=[-0.015625, 0.015625],
182
+ axis='x',
183
+ p=0.5),
184
+ dict(
185
+ type='RandomRotate',
186
+ angle=[-0.015625, 0.015625],
187
+ axis='y',
188
+ p=0.5),
189
+ dict(
190
+ type='PointClipDistance',
191
+ max_dist=50.0,
192
+ z_min=-4.0,
193
+ z_max=2.0),
194
+ dict(type='RandomScale', scale=[0.9, 1.1]),
195
+ dict(
196
+ type='RandomShift',
197
+ shift=((-0.2, 0.2), (-0.2, 0.2), (-0.2, 0.2))),
198
+ dict(type='RandomFlip', p=0.5),
199
+ dict(type='RandomJitter', sigma=0.005, clip=0.02),
200
+ dict(type='InstanceShift', p=0.5, shift_range=[4, 4, 0.5]),
201
+ dict(
202
+ type='InstanceRotate',
203
+ p=0.5,
204
+ axis='z',
205
+ angle=[-0.5, 0.5]),
206
+ dict(type='InstanceScale', p=0.5, scale=[0.9, 1.1]),
207
+ dict(
208
+ type='GridSample',
209
+ grid_size=0.05,
210
+ hash_type='fnv',
211
+ mode='train',
212
+ return_grid_coord=True),
213
+ dict(type='SphereCrop', sample_rate=0.6, mode='random'),
214
+ dict(
215
+ type='Update',
216
+ keys_dict=dict(condition='SemanticKITTI')),
217
+ dict(type='ToTensor'),
218
+ dict(
219
+ type='Collect',
220
+ keys=('coord', 'grid_coord', 'segment', 'condition'),
221
+ feat_keys=('coord', 'strength'))
222
+ ],
223
+ test_mode=False,
224
+ ignore_index=-1,
225
+ loop=1),
226
+ dict(
227
+ type='WaymoDataset',
228
+ split='training',
229
+ data_root='data/waymo',
230
+ transform=[
231
+ dict(
232
+ type='RandomDropout',
233
+ dropout_ratio=0.2,
234
+ dropout_application_ratio=0.2),
235
+ dict(
236
+ type='RandomRotate',
237
+ angle=[-1, 1],
238
+ axis='z',
239
+ center=[0, 0, 0],
240
+ p=0.5),
241
+ dict(
242
+ type='RandomRotate',
243
+ angle=[-0.015625, 0.015625],
244
+ axis='x',
245
+ p=0.5),
246
+ dict(
247
+ type='RandomRotate',
248
+ angle=[-0.015625, 0.015625],
249
+ axis='y',
250
+ p=0.5),
251
+ dict(
252
+ type='PointClipDistance',
253
+ max_dist=75.0,
254
+ z_min=-4.0,
255
+ z_max=2.0),
256
+ dict(type='RandomScale', scale=[0.9, 1.1]),
257
+ dict(
258
+ type='RandomShift',
259
+ shift=((-0.2, 0.2), (-0.2, 0.2), (-0.2, 0.2))),
260
+ dict(type='RandomFlip', p=0.5),
261
+ dict(type='RandomJitter', sigma=0.005, clip=0.02),
262
+ dict(
263
+ type='GridSample',
264
+ grid_size=0.05,
265
+ hash_type='fnv',
266
+ mode='train',
267
+ return_grid_coord=True),
268
+ dict(type='SphereCrop', sample_rate=0.6, mode='random'),
269
+ dict(type='SphereCrop', point_max=102400, mode='random'),
270
+ dict(type='Update', keys_dict=dict(condition='Waymo')),
271
+ dict(type='ToTensor'),
272
+ dict(
273
+ type='Collect',
274
+ keys=('coord', 'grid_coord', 'segment', 'condition'),
275
+ feat_keys=('coord', 'strength'))
276
+ ],
277
+ test_mode=False,
278
+ ignore_index=-1,
279
+ loop=1)
280
+ ],
281
+ loop=1),
282
+ val=dict(
283
+ type='NuScenesDataset',
284
+ split='val',
285
+ data_root='data/nuscenes',
286
+ transform=[
287
+ dict(type='Copy', keys_dict=dict(segment='origin_segment')),
288
+ dict(
289
+ type='PointClipDistance', max_dist=70.0, z_min=-4.0,
290
+ z_max=2.0),
291
+ dict(
292
+ type='GridSample',
293
+ grid_size=0.05,
294
+ hash_type='fnv',
295
+ mode='train',
296
+ return_grid_coord=True,
297
+ return_inverse=True),
298
+ dict(type='Update', keys_dict=dict(condition='nuScenes')),
299
+ dict(type='ToTensor'),
300
+ dict(
301
+ type='Collect',
302
+ keys=('coord', 'grid_coord', 'segment', 'origin_segment',
303
+ 'inverse', 'condition'),
304
+ feat_keys=('coord', 'strength'))
305
+ ],
306
+ test_mode=False,
307
+ ignore_index=-1),
308
+ test=dict(
309
+ type='NuScenesDataset',
310
+ split='val',
311
+ data_root='data/nuscenes',
312
+ transform=[
313
+ dict(
314
+ type='PointClipDistance', max_dist=70.0, z_min=-4.0,
315
+ z_max=2.0),
316
+ dict(type='Copy', keys_dict=dict(segment='origin_segment')),
317
+ dict(
318
+ type='GridSample',
319
+ grid_size=0.025,
320
+ hash_type='fnv',
321
+ mode='train',
322
+ return_inverse=True)
323
+ ],
324
+ test_mode=True,
325
+ test_cfg=dict(
326
+ voxelize=dict(
327
+ type='GridSample',
328
+ grid_size=0.05,
329
+ hash_type='fnv',
330
+ mode='test',
331
+ return_grid_coord=True),
332
+ crop=None,
333
+ post_transform=[
334
+ dict(type='Update', keys_dict=dict(condition='nuScenes')),
335
+ dict(type='ToTensor'),
336
+ dict(
337
+ type='Collect',
338
+ keys=('coord', 'grid_coord', 'index', 'condition'),
339
+ feat_keys=('coord', 'strength'))
340
+ ],
341
+ aug_transform=[[{
342
+ 'type': 'RandomScale',
343
+ 'scale': [0.9, 0.9]
344
+ }], [{
345
+ 'type': 'RandomScale',
346
+ 'scale': [0.95, 0.95]
347
+ }], [{
348
+ 'type': 'RandomScale',
349
+ 'scale': [1, 1]
350
+ }], [{
351
+ 'type': 'RandomScale',
352
+ 'scale': [1.05, 1.05]
353
+ }], [{
354
+ 'type': 'RandomScale',
355
+ 'scale': [1.1, 1.1]
356
+ }],
357
+ [{
358
+ 'type': 'RandomScale',
359
+ 'scale': [0.9, 0.9]
360
+ }, {
361
+ 'type': 'RandomFlip',
362
+ 'p': 1
363
+ }],
364
+ [{
365
+ 'type': 'RandomScale',
366
+ 'scale': [0.95, 0.95]
367
+ }, {
368
+ 'type': 'RandomFlip',
369
+ 'p': 1
370
+ }],
371
+ [{
372
+ 'type': 'RandomScale',
373
+ 'scale': [1, 1]
374
+ }, {
375
+ 'type': 'RandomFlip',
376
+ 'p': 1
377
+ }],
378
+ [{
379
+ 'type': 'RandomScale',
380
+ 'scale': [1.05, 1.05]
381
+ }, {
382
+ 'type': 'RandomFlip',
383
+ 'p': 1
384
+ }],
385
+ [{
386
+ 'type': 'RandomScale',
387
+ 'scale': [1.1, 1.1]
388
+ }, {
389
+ 'type': 'RandomFlip',
390
+ 'p': 1
391
+ }]]),
392
+ ignore_index=-1))
Volt_experiments/joint_training_small/nuscenes/model/model_best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:380a2380442baf0706cac41db68f050fa8a70dcb6d623e33bde0dcd50873d4dd
3
+ size 378004481
Volt_experiments/joint_training_small/nuscenes/model/model_last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be555278472ebc6cebc57203370ba9fd5917fa5f9ce2a427d722517f011b3578
3
+ size 378004481
Volt_experiments/joint_training_small/nuscenes/train.log ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c41e6c0824bd49d5a6569bf6f89752a6bfdb6f92c1553a94c57ece22f2e42a0a
3
+ size 33798056
Volt_experiments/joint_training_small/scannet/config.py ADDED
@@ -0,0 +1,536 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ weight = None
2
+ resume = False
3
+ evaluate = True
4
+ test_only = False
5
+ seed = 10598737
6
+ save_path = 'exp/scannet/2026-05-03_012618'
7
+ num_worker = 96
8
+ batch_size = 16
9
+ gradient_accumulation_steps = 1
10
+ batch_size_val = None
11
+ batch_size_test = None
12
+ epoch = 500
13
+ eval_epoch = 100
14
+ clip_grad = None
15
+ use_ema = True
16
+ ema_decay = 0.999
17
+ dataset_ratios = [1, 1, 1, 1]
18
+ sync_bn = False
19
+ enable_amp = True
20
+ amp_dtype = 'float16'
21
+ empty_cache = False
22
+ empty_cache_per_epoch = False
23
+ find_unused_parameters = True
24
+ enable_wandb = True
25
+ wandb_project = 'Volt'
26
+ wandb_key = None
27
+ mix_prob = 0.85
28
+ param_dicts = None
29
+ hooks = [
30
+ dict(type='CheckpointLoader'),
31
+ dict(type='ModelHook'),
32
+ dict(type='IterationTimer', warmup_iter=2),
33
+ dict(type='InformationWriter'),
34
+ dict(type='SemSegEvaluator'),
35
+ dict(type='CheckpointSaver', save_freq=None),
36
+ dict(type='PreciseEvaluator', test_last=False)
37
+ ]
38
+ train = dict(type='DefaultTrainer')
39
+ test = dict(type='SemSegTester', verbose=True)
40
+ model = dict(
41
+ type='DefaultSegmentorV2',
42
+ backbone_out_channels=128,
43
+ backbone=dict(
44
+ type='Volt',
45
+ in_channels=6,
46
+ embed_dim=384,
47
+ depth=12,
48
+ num_heads=6,
49
+ mlp_ratio=4,
50
+ init_values=None,
51
+ qk_norm=True,
52
+ drop_path=0.3,
53
+ stride=5,
54
+ kernel_size=5,
55
+ increase_drop_path=True,
56
+ up_mlp_dim=128),
57
+ teacher=dict(
58
+ type='DefaultSegmentorV2',
59
+ backbone=dict(
60
+ type='SpUNet-v1m1',
61
+ in_channels=6,
62
+ num_classes=0,
63
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
64
+ layers=(2, 3, 4, 6, 2, 2, 2, 2)),
65
+ backbone_out_channels=96,
66
+ conditions=('ScanNet', 'ScanNet200', 'ScanNet++', 'ARKitScenes'),
67
+ num_classes=(20, 200, 100, 185)),
68
+ teacher_weights='exp/multi/2026-01-21_214020/model/model_best.pth',
69
+ criteria=[
70
+ dict(
71
+ type='CrossEntropyLoss',
72
+ loss_weight=1.0,
73
+ label_smoothing=0.1,
74
+ ignore_index=-1),
75
+ dict(
76
+ type='LovaszLoss',
77
+ mode='multiclass',
78
+ loss_weight=1.0,
79
+ ignore_index=-1)
80
+ ],
81
+ conditions=('ScanNet', 'ScanNet200', 'ScanNet++', 'ARKitScenes'),
82
+ num_classes=(20, 200, 100, 185))
83
+ optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.05)
84
+ scheduler = dict(
85
+ type='OneCycleLR',
86
+ max_lr=0.001,
87
+ pct_start=0.05,
88
+ anneal_strategy='cos',
89
+ div_factor=10.0,
90
+ final_div_factor=1000.0)
91
+ ignore_index = -1
92
+ data = dict(
93
+ num_classes=20,
94
+ ignore_index=-1,
95
+ names=[
96
+ 'wall', 'floor', 'cabinet', 'bed', 'chair', 'sofa', 'table', 'door',
97
+ 'window', 'bookshelf', 'picture', 'counter', 'desk', 'curtain',
98
+ 'refridgerator', 'shower curtain', 'toilet', 'sink', 'bathtub',
99
+ 'otherfurniture'
100
+ ],
101
+ train=dict(
102
+ type='ConcatDataset',
103
+ datasets=[
104
+ dict(
105
+ type='ScanNetDataset',
106
+ split='train',
107
+ data_root='data/scannet',
108
+ transform=[
109
+ dict(type='CenterShift', apply_z=True),
110
+ dict(
111
+ type='RandomDropout',
112
+ dropout_ratio=0.2,
113
+ dropout_application_ratio=0.2),
114
+ dict(
115
+ type='RandomRotate',
116
+ angle=[-1, 1],
117
+ axis='z',
118
+ center=[0, 0, 0],
119
+ p=0.5),
120
+ dict(
121
+ type='RandomRotate',
122
+ angle=[-0.015625, 0.015625],
123
+ axis='x',
124
+ p=0.5),
125
+ dict(
126
+ type='RandomRotate',
127
+ angle=[-0.015625, 0.015625],
128
+ axis='y',
129
+ p=0.5),
130
+ dict(type='RandomScale', scale=[0.9, 1.1]),
131
+ dict(type='RandomFlip', p=0.5),
132
+ dict(type='RandomJitter', sigma=0.005, clip=0.02),
133
+ dict(
134
+ type='ElasticDistortion',
135
+ distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
136
+ dict(
137
+ type='ChromaticAutoContrast', p=0.2,
138
+ blend_factor=None),
139
+ dict(type='ChromaticTranslation', p=0.95, ratio=0.05),
140
+ dict(type='ChromaticJitter', p=0.95, std=0.05),
141
+ dict(
142
+ type='InstanceShift',
143
+ p=0.2,
144
+ shift_range=[0.1, 0.1, 0.1]),
145
+ dict(
146
+ type='InstanceRotate',
147
+ p=0.2,
148
+ axis='z',
149
+ angle=[-0.25, 0.25]),
150
+ dict(type='InstanceFlip', p=0.2, flip_prob=0.5),
151
+ dict(type='InstanceScale', p=0.2, scale=[0.9, 1.1]),
152
+ dict(type='InstanceDropOut', p=0.1, drop_ratio=0.5),
153
+ dict(type='InstanceColorDropout', p=0.2, drop_value=0),
154
+ dict(type='SwapInstances', p=0.2),
155
+ dict(
156
+ type='GridSample',
157
+ grid_size=0.02,
158
+ hash_type='fnv',
159
+ mode='train',
160
+ return_grid_coord=True),
161
+ dict(type='SphereCrop', sample_rate=0.6, mode='random'),
162
+ dict(type='CenterShift', apply_z=False),
163
+ dict(type='NormalizeColor'),
164
+ dict(type='Update', keys_dict=dict(condition='ScanNet')),
165
+ dict(type='ToTensor'),
166
+ dict(
167
+ type='Collect',
168
+ keys=('coord', 'grid_coord', 'segment', 'condition'),
169
+ feat_keys=('color', 'normal'))
170
+ ],
171
+ test_mode=False,
172
+ ignore_index=-1,
173
+ loop=1),
174
+ dict(
175
+ type='ScanNet200Dataset',
176
+ split='train',
177
+ data_root='data/scannet',
178
+ transform=[
179
+ dict(type='CenterShift', apply_z=True),
180
+ dict(
181
+ type='RandomDropout',
182
+ dropout_ratio=0.2,
183
+ dropout_application_ratio=0.2),
184
+ dict(
185
+ type='RandomRotate',
186
+ angle=[-1, 1],
187
+ axis='z',
188
+ center=[0, 0, 0],
189
+ p=0.5),
190
+ dict(
191
+ type='RandomRotate',
192
+ angle=[-0.015625, 0.015625],
193
+ axis='x',
194
+ p=0.5),
195
+ dict(
196
+ type='RandomRotate',
197
+ angle=[-0.015625, 0.015625],
198
+ axis='y',
199
+ p=0.5),
200
+ dict(type='RandomScale', scale=[0.9, 1.1]),
201
+ dict(type='RandomFlip', p=0.5),
202
+ dict(type='RandomJitter', sigma=0.005, clip=0.02),
203
+ dict(
204
+ type='ElasticDistortion',
205
+ distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
206
+ dict(
207
+ type='ChromaticAutoContrast', p=0.2,
208
+ blend_factor=None),
209
+ dict(type='ChromaticTranslation', p=0.95, ratio=0.05),
210
+ dict(type='ChromaticJitter', p=0.95, std=0.05),
211
+ dict(
212
+ type='InstanceShift',
213
+ p=0.2,
214
+ shift_range=[0.1, 0.1, 0.1]),
215
+ dict(
216
+ type='InstanceRotate',
217
+ p=0.2,
218
+ axis='z',
219
+ angle=[-0.25, 0.25]),
220
+ dict(type='InstanceFlip', p=0.2, flip_prob=0.5),
221
+ dict(type='InstanceScale', p=0.2, scale=[0.9, 1.1]),
222
+ dict(type='InstanceDropOut', p=0.1, drop_ratio=0.5),
223
+ dict(type='InstanceColorDropout', p=0.2, drop_value=0),
224
+ dict(type='SwapInstances', p=0.2),
225
+ dict(
226
+ type='GridSample',
227
+ grid_size=0.02,
228
+ hash_type='fnv',
229
+ mode='train',
230
+ return_grid_coord=True),
231
+ dict(type='SphereCrop', sample_rate=0.6, mode='random'),
232
+ dict(type='CenterShift', apply_z=False),
233
+ dict(type='NormalizeColor'),
234
+ dict(
235
+ type='Update', keys_dict=dict(condition='ScanNet200')),
236
+ dict(type='ToTensor'),
237
+ dict(
238
+ type='Collect',
239
+ keys=('coord', 'grid_coord', 'segment', 'condition'),
240
+ feat_keys=('color', 'normal'))
241
+ ],
242
+ test_mode=False,
243
+ ignore_index=-1,
244
+ loop=1),
245
+ dict(
246
+ type='ScanNetPPDataset',
247
+ split='train',
248
+ data_root='data/scannetpp',
249
+ transform=[
250
+ dict(type='SphereCrop', point_max=1000000, mode='random'),
251
+ dict(type='CenterShift', apply_z=True),
252
+ dict(
253
+ type='RandomDropout',
254
+ dropout_ratio=0.2,
255
+ dropout_application_ratio=0.2),
256
+ dict(
257
+ type='RandomRotate',
258
+ angle=[-1, 1],
259
+ axis='z',
260
+ center=[0, 0, 0],
261
+ p=0.5),
262
+ dict(
263
+ type='RandomRotate',
264
+ angle=[-0.015625, 0.015625],
265
+ axis='x',
266
+ p=0.5),
267
+ dict(
268
+ type='RandomRotate',
269
+ angle=[-0.015625, 0.015625],
270
+ axis='y',
271
+ p=0.5),
272
+ dict(type='RandomScale', scale=[0.9, 1.1]),
273
+ dict(type='RandomFlip', p=0.5),
274
+ dict(type='RandomJitter', sigma=0.005, clip=0.02),
275
+ dict(
276
+ type='ChromaticAutoContrast', p=0.2,
277
+ blend_factor=None),
278
+ dict(type='ChromaticTranslation', p=0.95, ratio=0.05),
279
+ dict(type='ChromaticJitter', p=0.95, std=0.05),
280
+ dict(
281
+ type='InstanceShift',
282
+ p=0.2,
283
+ shift_range=[0.1, 0.1, 0.1]),
284
+ dict(
285
+ type='InstanceRotate',
286
+ p=0.2,
287
+ axis='z',
288
+ angle=[-0.25, 0.25]),
289
+ dict(type='InstanceFlip', p=0.2, flip_prob=0.5),
290
+ dict(type='InstanceScale', p=0.2, scale=[0.9, 1.1]),
291
+ dict(type='InstanceDropOut', p=0.1, drop_ratio=0.5),
292
+ dict(type='InstanceColorDropout', p=0.2, drop_value=0),
293
+ dict(type='SwapInstances', p=0.2),
294
+ dict(
295
+ type='GridSample',
296
+ grid_size=0.02,
297
+ hash_type='fnv',
298
+ mode='train',
299
+ return_grid_coord=True),
300
+ dict(type='SphereCrop', sample_rate=0.6, mode='random'),
301
+ dict(type='SphereCrop', point_max=204800, mode='random'),
302
+ dict(type='CenterShift', apply_z=False),
303
+ dict(type='NormalizeColor'),
304
+ dict(type='Update', keys_dict=dict(condition='ScanNet++')),
305
+ dict(type='ToTensor'),
306
+ dict(
307
+ type='Collect',
308
+ keys=('coord', 'grid_coord', 'segment', 'condition'),
309
+ feat_keys=('color', 'normal'))
310
+ ],
311
+ test_mode=False,
312
+ ignore_index=-1,
313
+ loop=1),
314
+ dict(
315
+ type='ARKitScenesLabelMakerDataset',
316
+ split=['train', 'val'],
317
+ data_root='data/arkitscenes',
318
+ transform=[
319
+ dict(type='CenterShift', apply_z=True),
320
+ dict(
321
+ type='RandomDropout',
322
+ dropout_ratio=0.2,
323
+ dropout_application_ratio=0.2),
324
+ dict(
325
+ type='RandomRotate',
326
+ angle=[-1, 1],
327
+ axis='z',
328
+ center=[0, 0, 0],
329
+ p=0.5),
330
+ dict(
331
+ type='RandomRotate',
332
+ angle=[-0.015625, 0.015625],
333
+ axis='x',
334
+ p=0.5),
335
+ dict(
336
+ type='RandomRotate',
337
+ angle=[-0.015625, 0.015625],
338
+ axis='y',
339
+ p=0.5),
340
+ dict(type='RandomScale', scale=[0.9, 1.1]),
341
+ dict(type='RandomFlip', p=0.5),
342
+ dict(type='RandomJitter', sigma=0.005, clip=0.02),
343
+ dict(
344
+ type='ElasticDistortion',
345
+ distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
346
+ dict(
347
+ type='ChromaticAutoContrast', p=0.2,
348
+ blend_factor=None),
349
+ dict(type='ChromaticTranslation', p=0.95, ratio=0.05),
350
+ dict(type='ChromaticJitter', p=0.95, std=0.05),
351
+ dict(
352
+ type='GridSample',
353
+ grid_size=0.02,
354
+ hash_type='fnv',
355
+ mode='train',
356
+ return_grid_coord=True),
357
+ dict(type='SphereCrop', sample_rate=0.6, mode='random'),
358
+ dict(type='SphereCrop', point_max=102400, mode='random'),
359
+ dict(type='CenterShift', apply_z=False),
360
+ dict(type='NormalizeColor'),
361
+ dict(
362
+ type='Update',
363
+ keys_dict=dict(condition='ARKitScenes')),
364
+ dict(type='ToTensor'),
365
+ dict(
366
+ type='Collect',
367
+ keys=('coord', 'grid_coord', 'segment', 'condition'),
368
+ feat_keys=('color', 'normal'))
369
+ ],
370
+ test_mode=False,
371
+ ignore_index=-1,
372
+ loop=1)
373
+ ],
374
+ loop=5),
375
+ val=dict(
376
+ type='ScanNetDataset',
377
+ split='val',
378
+ data_root='data/scannet',
379
+ transform=[
380
+ dict(type='CenterShift', apply_z=True),
381
+ dict(type='Copy', keys_dict=dict(segment='origin_segment')),
382
+ dict(
383
+ type='GridSample',
384
+ grid_size=0.02,
385
+ hash_type='fnv',
386
+ mode='train',
387
+ return_grid_coord=True,
388
+ return_inverse=True),
389
+ dict(type='CenterShift', apply_z=False),
390
+ dict(type='NormalizeColor'),
391
+ dict(type='Update', keys_dict=dict(condition='ScanNet')),
392
+ dict(type='ToTensor'),
393
+ dict(
394
+ type='Collect',
395
+ keys=('coord', 'grid_coord', 'segment', 'origin_segment',
396
+ 'inverse', 'condition'),
397
+ feat_keys=('color', 'normal'))
398
+ ],
399
+ test_mode=False),
400
+ test=dict(
401
+ type='ScanNetDataset',
402
+ split='val',
403
+ data_root='data/scannet',
404
+ transform=[
405
+ dict(type='CenterShift', apply_z=True),
406
+ dict(type='NormalizeColor')
407
+ ],
408
+ test_mode=True,
409
+ test_cfg=dict(
410
+ voxelize=dict(
411
+ type='GridSample',
412
+ grid_size=0.02,
413
+ hash_type='fnv',
414
+ mode='test',
415
+ return_grid_coord=True),
416
+ crop=None,
417
+ post_transform=[
418
+ dict(type='CenterShift', apply_z=False),
419
+ dict(type='Update', keys_dict=dict(condition='ScanNet')),
420
+ dict(type='ToTensor'),
421
+ dict(
422
+ type='Collect',
423
+ keys=('coord', 'grid_coord', 'index', 'condition'),
424
+ feat_keys=('color', 'normal'))
425
+ ],
426
+ aug_transform=[[{
427
+ 'type': 'RandomRotateTargetAngle',
428
+ 'angle': [0],
429
+ 'axis': 'z',
430
+ 'center': [0, 0, 0],
431
+ 'p': 1
432
+ }],
433
+ [{
434
+ 'type': 'RandomRotateTargetAngle',
435
+ 'angle': [0.5],
436
+ 'axis': 'z',
437
+ 'center': [0, 0, 0],
438
+ 'p': 1
439
+ }],
440
+ [{
441
+ 'type': 'RandomRotateTargetAngle',
442
+ 'angle': [1],
443
+ 'axis': 'z',
444
+ 'center': [0, 0, 0],
445
+ 'p': 1
446
+ }],
447
+ [{
448
+ 'type': 'RandomRotateTargetAngle',
449
+ 'angle': [1.5],
450
+ 'axis': 'z',
451
+ 'center': [0, 0, 0],
452
+ 'p': 1
453
+ }],
454
+ [{
455
+ 'type': 'RandomRotateTargetAngle',
456
+ 'angle': [0],
457
+ 'axis': 'z',
458
+ 'center': [0, 0, 0],
459
+ 'p': 1
460
+ }, {
461
+ 'type': 'RandomScale',
462
+ 'scale': [0.95, 0.95]
463
+ }],
464
+ [{
465
+ 'type': 'RandomRotateTargetAngle',
466
+ 'angle': [0.5],
467
+ 'axis': 'z',
468
+ 'center': [0, 0, 0],
469
+ 'p': 1
470
+ }, {
471
+ 'type': 'RandomScale',
472
+ 'scale': [0.95, 0.95]
473
+ }],
474
+ [{
475
+ 'type': 'RandomRotateTargetAngle',
476
+ 'angle': [1],
477
+ 'axis': 'z',
478
+ 'center': [0, 0, 0],
479
+ 'p': 1
480
+ }, {
481
+ 'type': 'RandomScale',
482
+ 'scale': [0.95, 0.95]
483
+ }],
484
+ [{
485
+ 'type': 'RandomRotateTargetAngle',
486
+ 'angle': [1.5],
487
+ 'axis': 'z',
488
+ 'center': [0, 0, 0],
489
+ 'p': 1
490
+ }, {
491
+ 'type': 'RandomScale',
492
+ 'scale': [0.95, 0.95]
493
+ }],
494
+ [{
495
+ 'type': 'RandomRotateTargetAngle',
496
+ 'angle': [0],
497
+ 'axis': 'z',
498
+ 'center': [0, 0, 0],
499
+ 'p': 1
500
+ }, {
501
+ 'type': 'RandomScale',
502
+ 'scale': [1.05, 1.05]
503
+ }],
504
+ [{
505
+ 'type': 'RandomRotateTargetAngle',
506
+ 'angle': [0.5],
507
+ 'axis': 'z',
508
+ 'center': [0, 0, 0],
509
+ 'p': 1
510
+ }, {
511
+ 'type': 'RandomScale',
512
+ 'scale': [1.05, 1.05]
513
+ }],
514
+ [{
515
+ 'type': 'RandomRotateTargetAngle',
516
+ 'angle': [1],
517
+ 'axis': 'z',
518
+ 'center': [0, 0, 0],
519
+ 'p': 1
520
+ }, {
521
+ 'type': 'RandomScale',
522
+ 'scale': [1.05, 1.05]
523
+ }],
524
+ [{
525
+ 'type': 'RandomRotateTargetAngle',
526
+ 'angle': [1.5],
527
+ 'axis': 'z',
528
+ 'center': [0, 0, 0],
529
+ 'p': 1
530
+ }, {
531
+ 'type': 'RandomScale',
532
+ 'scale': [1.05, 1.05]
533
+ }], [{
534
+ 'type': 'RandomFlip',
535
+ 'p': 1
536
+ }]])))
Volt_experiments/joint_training_small/scannet/model/model_best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcfa0f94fbe13292c059307cc87a521fbd47fc375c35b96e0e6d43fd476fba2a
3
+ size 381395577
Volt_experiments/joint_training_small/scannet/model/model_last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:624a426a8500997ad087e40e34d67869efbe4457b0eff49f8b61005b6eaf1680
3
+ size 381395641
Volt_experiments/joint_training_small/scannet/train.log ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd326e149192d080dbf3ddba37d4d81795d6f765822f55f9c43f2a4601fc7cc3
3
+ size 25353593
Volt_experiments/joint_training_small/scannet200/config.py ADDED
@@ -0,0 +1,544 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ weight = None
2
+ resume = False
3
+ evaluate = True
4
+ test_only = False
5
+ seed = 50190337
6
+ save_path = 'exp/scannet200/2026-05-03_105701'
7
+ num_worker = 96
8
+ batch_size = 16
9
+ gradient_accumulation_steps = 1
10
+ batch_size_val = None
11
+ batch_size_test = None
12
+ epoch = 600
13
+ eval_epoch = 100
14
+ clip_grad = None
15
+ use_ema = True
16
+ ema_decay = 0.999
17
+ dataset_ratios = [1, 1, 1]
18
+ sync_bn = False
19
+ enable_amp = True
20
+ amp_dtype = 'float16'
21
+ empty_cache = False
22
+ empty_cache_per_epoch = False
23
+ find_unused_parameters = True
24
+ enable_wandb = True
25
+ wandb_project = 'Volt'
26
+ wandb_key = None
27
+ mix_prob = 0.85
28
+ param_dicts = None
29
+ hooks = [
30
+ dict(type='CheckpointLoader'),
31
+ dict(type='ModelHook'),
32
+ dict(type='IterationTimer', warmup_iter=2),
33
+ dict(type='InformationWriter'),
34
+ dict(type='SemSegEvaluator'),
35
+ dict(type='CheckpointSaver', save_freq=None),
36
+ dict(type='PreciseEvaluator', test_last=False)
37
+ ]
38
+ train = dict(type='DefaultTrainer')
39
+ test = dict(type='SemSegTester', verbose=True)
40
+ CLASS_LABELS_200 = (
41
+ 'wall', 'chair', 'floor', 'table', 'door', 'couch', 'cabinet', 'shelf',
42
+ 'desk', 'office chair', 'bed', 'pillow', 'sink', 'picture', 'window',
43
+ 'toilet', 'bookshelf', 'monitor', 'curtain', 'book', 'armchair',
44
+ 'coffee table', 'box', 'refrigerator', 'lamp', 'kitchen cabinet', 'towel',
45
+ 'clothes', 'tv', 'nightstand', 'counter', 'dresser', 'stool', 'cushion',
46
+ 'plant', 'ceiling', 'bathtub', 'end table', 'dining table', 'keyboard',
47
+ 'bag', 'backpack', 'toilet paper', 'printer', 'tv stand', 'whiteboard',
48
+ 'blanket', 'shower curtain', 'trash can', 'closet', 'stairs', 'microwave',
49
+ 'stove', 'shoe', 'computer tower', 'bottle', 'bin', 'ottoman', 'bench',
50
+ 'board', 'washing machine', 'mirror', 'copier', 'basket', 'sofa chair',
51
+ 'file cabinet', 'fan', 'laptop', 'shower', 'paper', 'person',
52
+ 'paper towel dispenser', 'oven', 'blinds', 'rack', 'plate', 'blackboard',
53
+ 'piano', 'suitcase', 'rail', 'radiator', 'recycling bin', 'container',
54
+ 'wardrobe', 'soap dispenser', 'telephone', 'bucket', 'clock', 'stand',
55
+ 'light', 'laundry basket', 'pipe', 'clothes dryer', 'guitar',
56
+ 'toilet paper holder', 'seat', 'speaker', 'column', 'bicycle', 'ladder',
57
+ 'bathroom stall', 'shower wall', 'cup', 'jacket', 'storage bin',
58
+ 'coffee maker', 'dishwasher', 'paper towel roll', 'machine', 'mat',
59
+ 'windowsill', 'bar', 'toaster', 'bulletin board', 'ironing board',
60
+ 'fireplace', 'soap dish', 'kitchen counter', 'doorframe',
61
+ 'toilet paper dispenser', 'mini fridge', 'fire extinguisher', 'ball',
62
+ 'hat', 'shower curtain rod', 'water cooler', 'paper cutter', 'tray',
63
+ 'shower door', 'pillar', 'ledge', 'toaster oven', 'mouse',
64
+ 'toilet seat cover dispenser', 'furniture', 'cart', 'storage container',
65
+ 'scale', 'tissue box', 'light switch', 'crate', 'power outlet',
66
+ 'decoration', 'sign', 'projector', 'closet door', 'vacuum cleaner',
67
+ 'candle', 'plunger', 'stuffed animal', 'headphones', 'dish rack', 'broom',
68
+ 'guitar case', 'range hood', 'dustpan', 'hair dryer', 'water bottle',
69
+ 'handicap bar', 'purse', 'vent', 'shower floor', 'water pitcher',
70
+ 'mailbox', 'bowl', 'paper bag', 'alarm clock', 'music stand',
71
+ 'projector screen', 'divider', 'laundry detergent', 'bathroom counter',
72
+ 'object', 'bathroom vanity', 'closet wall', 'laundry hamper',
73
+ 'bathroom stall door', 'ceiling light', 'trash bin', 'dumbbell',
74
+ 'stair rail', 'tube', 'bathroom cabinet', 'cd case', 'closet rod',
75
+ 'coffee kettle', 'structure', 'shower head', 'keyboard piano',
76
+ 'case of water bottles', 'coat rack', 'storage organizer', 'folded chair',
77
+ 'fire alarm', 'power strip', 'calendar', 'poster', 'potted plant',
78
+ 'luggage', 'mattress')
79
+ model = dict(
80
+ type='DefaultSegmentorV2',
81
+ backbone_out_channels=128,
82
+ backbone=dict(
83
+ type='Volt',
84
+ in_channels=6,
85
+ embed_dim=384,
86
+ depth=12,
87
+ num_heads=6,
88
+ mlp_ratio=4,
89
+ init_values=None,
90
+ qk_norm=True,
91
+ drop_path=0.3,
92
+ stride=5,
93
+ kernel_size=5,
94
+ increase_drop_path=True,
95
+ up_mlp_dim=128),
96
+ teacher=dict(
97
+ type='DefaultSegmentorV2',
98
+ backbone=dict(
99
+ type='SpUNet-v1m1',
100
+ in_channels=6,
101
+ num_classes=0,
102
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
103
+ layers=(2, 3, 4, 6, 2, 2, 2, 2)),
104
+ backbone_out_channels=96,
105
+ conditions=('ScanNet', 'ScanNet200', 'ScanNet++', 'ARKitScenes'),
106
+ num_classes=(20, 200, 100, 185)),
107
+ teacher_weights='exp/multi/2026-01-21_214020/model/model_best.pth',
108
+ criteria=[
109
+ dict(
110
+ type='CrossEntropyLoss',
111
+ loss_weight=1.0,
112
+ label_smoothing=0.1,
113
+ ignore_index=-1),
114
+ dict(
115
+ type='LovaszLoss',
116
+ mode='multiclass',
117
+ loss_weight=1.0,
118
+ ignore_index=-1)
119
+ ],
120
+ conditions=('ScanNet', 'ScanNet200', 'ScanNet++', 'ARKitScenes'),
121
+ num_classes=(20, 200, 100, 185))
122
+ optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.05)
123
+ scheduler = dict(
124
+ type='OneCycleLR',
125
+ max_lr=0.001,
126
+ pct_start=0.05,
127
+ anneal_strategy='cos',
128
+ div_factor=10.0,
129
+ final_div_factor=1000.0)
130
+ ignore_index = -1
131
+ data = dict(
132
+ num_classes=200,
133
+ ignore_index=-1,
134
+ names=(
135
+ 'wall', 'chair', 'floor', 'table', 'door', 'couch', 'cabinet', 'shelf',
136
+ 'desk', 'office chair', 'bed', 'pillow', 'sink', 'picture', 'window',
137
+ 'toilet', 'bookshelf', 'monitor', 'curtain', 'book', 'armchair',
138
+ 'coffee table', 'box', 'refrigerator', 'lamp', 'kitchen cabinet',
139
+ 'towel', 'clothes', 'tv', 'nightstand', 'counter', 'dresser', 'stool',
140
+ 'cushion', 'plant', 'ceiling', 'bathtub', 'end table', 'dining table',
141
+ 'keyboard', 'bag', 'backpack', 'toilet paper', 'printer', 'tv stand',
142
+ 'whiteboard', 'blanket', 'shower curtain', 'trash can', 'closet',
143
+ 'stairs', 'microwave', 'stove', 'shoe', 'computer tower', 'bottle',
144
+ 'bin', 'ottoman', 'bench', 'board', 'washing machine', 'mirror',
145
+ 'copier', 'basket', 'sofa chair', 'file cabinet', 'fan', 'laptop',
146
+ 'shower', 'paper', 'person', 'paper towel dispenser', 'oven', 'blinds',
147
+ 'rack', 'plate', 'blackboard', 'piano', 'suitcase', 'rail', 'radiator',
148
+ 'recycling bin', 'container', 'wardrobe', 'soap dispenser',
149
+ 'telephone', 'bucket', 'clock', 'stand', 'light', 'laundry basket',
150
+ 'pipe', 'clothes dryer', 'guitar', 'toilet paper holder', 'seat',
151
+ 'speaker', 'column', 'bicycle', 'ladder', 'bathroom stall',
152
+ 'shower wall', 'cup', 'jacket', 'storage bin', 'coffee maker',
153
+ 'dishwasher', 'paper towel roll', 'machine', 'mat', 'windowsill',
154
+ 'bar', 'toaster', 'bulletin board', 'ironing board', 'fireplace',
155
+ 'soap dish', 'kitchen counter', 'doorframe', 'toilet paper dispenser',
156
+ 'mini fridge', 'fire extinguisher', 'ball', 'hat',
157
+ 'shower curtain rod', 'water cooler', 'paper cutter', 'tray',
158
+ 'shower door', 'pillar', 'ledge', 'toaster oven', 'mouse',
159
+ 'toilet seat cover dispenser', 'furniture', 'cart',
160
+ 'storage container', 'scale', 'tissue box', 'light switch', 'crate',
161
+ 'power outlet', 'decoration', 'sign', 'projector', 'closet door',
162
+ 'vacuum cleaner', 'candle', 'plunger', 'stuffed animal', 'headphones',
163
+ 'dish rack', 'broom', 'guitar case', 'range hood', 'dustpan',
164
+ 'hair dryer', 'water bottle', 'handicap bar', 'purse', 'vent',
165
+ 'shower floor', 'water pitcher', 'mailbox', 'bowl', 'paper bag',
166
+ 'alarm clock', 'music stand', 'projector screen', 'divider',
167
+ 'laundry detergent', 'bathroom counter', 'object', 'bathroom vanity',
168
+ 'closet wall', 'laundry hamper', 'bathroom stall door',
169
+ 'ceiling light', 'trash bin', 'dumbbell', 'stair rail', 'tube',
170
+ 'bathroom cabinet', 'cd case', 'closet rod', 'coffee kettle',
171
+ 'structure', 'shower head', 'keyboard piano', 'case of water bottles',
172
+ 'coat rack', 'storage organizer', 'folded chair', 'fire alarm',
173
+ 'power strip', 'calendar', 'poster', 'potted plant', 'luggage',
174
+ 'mattress'),
175
+ train=dict(
176
+ type='ConcatDataset',
177
+ datasets=[
178
+ dict(
179
+ type='ScanNet200Dataset',
180
+ split='train',
181
+ data_root='data/scannet',
182
+ transform=[
183
+ dict(type='CenterShift', apply_z=True),
184
+ dict(
185
+ type='RandomDropout',
186
+ dropout_ratio=0.2,
187
+ dropout_application_ratio=0.2),
188
+ dict(
189
+ type='RandomRotate',
190
+ angle=[-1, 1],
191
+ axis='z',
192
+ center=[0, 0, 0],
193
+ p=0.5),
194
+ dict(
195
+ type='RandomRotate',
196
+ angle=[-0.015625, 0.015625],
197
+ axis='x',
198
+ p=0.5),
199
+ dict(
200
+ type='RandomRotate',
201
+ angle=[-0.015625, 0.015625],
202
+ axis='y',
203
+ p=0.5),
204
+ dict(type='RandomScale', scale=[0.9, 1.1]),
205
+ dict(type='RandomFlip', p=0.5),
206
+ dict(type='RandomJitter', sigma=0.005, clip=0.02),
207
+ dict(
208
+ type='ElasticDistortion',
209
+ distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
210
+ dict(
211
+ type='ChromaticAutoContrast', p=0.2,
212
+ blend_factor=None),
213
+ dict(type='ChromaticTranslation', p=0.95, ratio=0.05),
214
+ dict(type='ChromaticJitter', p=0.95, std=0.05),
215
+ dict(
216
+ type='InstanceShift',
217
+ p=0.2,
218
+ shift_range=[0.1, 0.1, 0.1]),
219
+ dict(
220
+ type='InstanceRotate',
221
+ p=0.2,
222
+ axis='z',
223
+ angle=[-0.25, 0.25]),
224
+ dict(type='InstanceFlip', p=0.2, flip_prob=0.5),
225
+ dict(type='InstanceScale', p=0.2, scale=[0.9, 1.1]),
226
+ dict(type='InstanceDropOut', p=0.1, drop_ratio=0.5),
227
+ dict(type='InstanceColorDropout', p=0.2, drop_value=0),
228
+ dict(type='SwapInstances', p=0.2),
229
+ dict(
230
+ type='GridSample',
231
+ grid_size=0.02,
232
+ hash_type='fnv',
233
+ mode='train',
234
+ return_grid_coord=True),
235
+ dict(type='SphereCrop', sample_rate=0.6, mode='random'),
236
+ dict(type='CenterShift', apply_z=False),
237
+ dict(type='NormalizeColor'),
238
+ dict(
239
+ type='Update', keys_dict=dict(condition='ScanNet200')),
240
+ dict(type='ToTensor'),
241
+ dict(
242
+ type='Collect',
243
+ keys=('coord', 'grid_coord', 'segment', 'condition'),
244
+ feat_keys=('color', 'normal'))
245
+ ],
246
+ test_mode=False,
247
+ ignore_index=-1,
248
+ loop=1),
249
+ dict(
250
+ type='ScanNetPPDataset',
251
+ split='train',
252
+ data_root='data/scannetpp',
253
+ transform=[
254
+ dict(type='SphereCrop', point_max=1000000, mode='random'),
255
+ dict(type='CenterShift', apply_z=True),
256
+ dict(
257
+ type='RandomDropout',
258
+ dropout_ratio=0.2,
259
+ dropout_application_ratio=0.2),
260
+ dict(
261
+ type='RandomRotate',
262
+ angle=[-1, 1],
263
+ axis='z',
264
+ center=[0, 0, 0],
265
+ p=0.5),
266
+ dict(
267
+ type='RandomRotate',
268
+ angle=[-0.015625, 0.015625],
269
+ axis='x',
270
+ p=0.5),
271
+ dict(
272
+ type='RandomRotate',
273
+ angle=[-0.015625, 0.015625],
274
+ axis='y',
275
+ p=0.5),
276
+ dict(type='RandomScale', scale=[0.9, 1.1]),
277
+ dict(type='RandomFlip', p=0.5),
278
+ dict(type='RandomJitter', sigma=0.005, clip=0.02),
279
+ dict(
280
+ type='ChromaticAutoContrast', p=0.2,
281
+ blend_factor=None),
282
+ dict(type='ChromaticTranslation', p=0.95, ratio=0.05),
283
+ dict(type='ChromaticJitter', p=0.95, std=0.05),
284
+ dict(
285
+ type='InstanceShift',
286
+ p=0.2,
287
+ shift_range=[0.1, 0.1, 0.1]),
288
+ dict(
289
+ type='InstanceRotate',
290
+ p=0.2,
291
+ axis='z',
292
+ angle=[-0.25, 0.25]),
293
+ dict(type='InstanceFlip', p=0.2, flip_prob=0.5),
294
+ dict(type='InstanceScale', p=0.2, scale=[0.9, 1.1]),
295
+ dict(type='InstanceDropOut', p=0.1, drop_ratio=0.5),
296
+ dict(type='InstanceColorDropout', p=0.2, drop_value=0),
297
+ dict(type='SwapInstances', p=0.2),
298
+ dict(
299
+ type='GridSample',
300
+ grid_size=0.02,
301
+ hash_type='fnv',
302
+ mode='train',
303
+ return_grid_coord=True),
304
+ dict(type='SphereCrop', sample_rate=0.6, mode='random'),
305
+ dict(type='SphereCrop', point_max=204800, mode='random'),
306
+ dict(type='CenterShift', apply_z=False),
307
+ dict(type='NormalizeColor'),
308
+ dict(type='Update', keys_dict=dict(condition='ScanNet++')),
309
+ dict(type='ToTensor'),
310
+ dict(
311
+ type='Collect',
312
+ keys=('coord', 'grid_coord', 'segment', 'condition'),
313
+ feat_keys=('color', 'normal'))
314
+ ],
315
+ test_mode=False,
316
+ ignore_index=-1,
317
+ loop=1),
318
+ dict(
319
+ type='ARKitScenesLabelMakerDataset',
320
+ split=['train', 'val'],
321
+ data_root='data/arkitscenes',
322
+ transform=[
323
+ dict(type='CenterShift', apply_z=True),
324
+ dict(
325
+ type='RandomDropout',
326
+ dropout_ratio=0.2,
327
+ dropout_application_ratio=0.2),
328
+ dict(
329
+ type='RandomRotate',
330
+ angle=[-1, 1],
331
+ axis='z',
332
+ center=[0, 0, 0],
333
+ p=0.5),
334
+ dict(
335
+ type='RandomRotate',
336
+ angle=[-0.015625, 0.015625],
337
+ axis='x',
338
+ p=0.5),
339
+ dict(
340
+ type='RandomRotate',
341
+ angle=[-0.015625, 0.015625],
342
+ axis='y',
343
+ p=0.5),
344
+ dict(type='RandomScale', scale=[0.9, 1.1]),
345
+ dict(type='RandomFlip', p=0.5),
346
+ dict(type='RandomJitter', sigma=0.005, clip=0.02),
347
+ dict(
348
+ type='ElasticDistortion',
349
+ distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
350
+ dict(
351
+ type='ChromaticAutoContrast', p=0.2,
352
+ blend_factor=None),
353
+ dict(type='ChromaticTranslation', p=0.95, ratio=0.05),
354
+ dict(type='ChromaticJitter', p=0.95, std=0.05),
355
+ dict(
356
+ type='GridSample',
357
+ grid_size=0.02,
358
+ hash_type='fnv',
359
+ mode='train',
360
+ return_grid_coord=True),
361
+ dict(type='SphereCrop', sample_rate=0.6, mode='random'),
362
+ dict(type='SphereCrop', point_max=102400, mode='random'),
363
+ dict(type='CenterShift', apply_z=False),
364
+ dict(type='NormalizeColor'),
365
+ dict(
366
+ type='Update',
367
+ keys_dict=dict(condition='ARKitScenes')),
368
+ dict(type='ToTensor'),
369
+ dict(
370
+ type='Collect',
371
+ keys=('coord', 'grid_coord', 'segment', 'condition'),
372
+ feat_keys=('color', 'normal'))
373
+ ],
374
+ test_mode=False,
375
+ ignore_index=-1,
376
+ loop=1)
377
+ ],
378
+ loop=6),
379
+ val=dict(
380
+ type='ScanNet200Dataset',
381
+ split='val',
382
+ data_root='data/scannet',
383
+ transform=[
384
+ dict(type='CenterShift', apply_z=True),
385
+ dict(type='Copy', keys_dict=dict(segment='origin_segment')),
386
+ dict(
387
+ type='GridSample',
388
+ grid_size=0.02,
389
+ hash_type='fnv',
390
+ mode='train',
391
+ return_grid_coord=True,
392
+ return_inverse=True),
393
+ dict(type='CenterShift', apply_z=False),
394
+ dict(type='NormalizeColor'),
395
+ dict(type='Update', keys_dict=dict(condition='ScanNet200')),
396
+ dict(type='ToTensor'),
397
+ dict(
398
+ type='Collect',
399
+ keys=('coord', 'grid_coord', 'segment', 'origin_segment',
400
+ 'inverse', 'condition'),
401
+ feat_keys=('color', 'normal'))
402
+ ],
403
+ test_mode=False),
404
+ test=dict(
405
+ type='ScanNet200Dataset',
406
+ split='val',
407
+ data_root='data/scannet',
408
+ transform=[
409
+ dict(type='CenterShift', apply_z=True),
410
+ dict(type='NormalizeColor')
411
+ ],
412
+ test_mode=True,
413
+ test_cfg=dict(
414
+ voxelize=dict(
415
+ type='GridSample',
416
+ grid_size=0.02,
417
+ hash_type='fnv',
418
+ mode='test',
419
+ return_grid_coord=True),
420
+ crop=None,
421
+ post_transform=[
422
+ dict(type='CenterShift', apply_z=False),
423
+ dict(type='Update', keys_dict=dict(condition='ScanNet200')),
424
+ dict(type='ToTensor'),
425
+ dict(
426
+ type='Collect',
427
+ keys=('coord', 'grid_coord', 'index', 'condition'),
428
+ feat_keys=('color', 'normal'))
429
+ ],
430
+ aug_transform=[[{
431
+ 'type': 'RandomRotateTargetAngle',
432
+ 'angle': [0],
433
+ 'axis': 'z',
434
+ 'center': [0, 0, 0],
435
+ 'p': 1
436
+ }],
437
+ [{
438
+ 'type': 'RandomRotateTargetAngle',
439
+ 'angle': [0.5],
440
+ 'axis': 'z',
441
+ 'center': [0, 0, 0],
442
+ 'p': 1
443
+ }],
444
+ [{
445
+ 'type': 'RandomRotateTargetAngle',
446
+ 'angle': [1],
447
+ 'axis': 'z',
448
+ 'center': [0, 0, 0],
449
+ 'p': 1
450
+ }],
451
+ [{
452
+ 'type': 'RandomRotateTargetAngle',
453
+ 'angle': [1.5],
454
+ 'axis': 'z',
455
+ 'center': [0, 0, 0],
456
+ 'p': 1
457
+ }],
458
+ [{
459
+ 'type': 'RandomRotateTargetAngle',
460
+ 'angle': [0],
461
+ 'axis': 'z',
462
+ 'center': [0, 0, 0],
463
+ 'p': 1
464
+ }, {
465
+ 'type': 'RandomScale',
466
+ 'scale': [0.95, 0.95]
467
+ }],
468
+ [{
469
+ 'type': 'RandomRotateTargetAngle',
470
+ 'angle': [0.5],
471
+ 'axis': 'z',
472
+ 'center': [0, 0, 0],
473
+ 'p': 1
474
+ }, {
475
+ 'type': 'RandomScale',
476
+ 'scale': [0.95, 0.95]
477
+ }],
478
+ [{
479
+ 'type': 'RandomRotateTargetAngle',
480
+ 'angle': [1],
481
+ 'axis': 'z',
482
+ 'center': [0, 0, 0],
483
+ 'p': 1
484
+ }, {
485
+ 'type': 'RandomScale',
486
+ 'scale': [0.95, 0.95]
487
+ }],
488
+ [{
489
+ 'type': 'RandomRotateTargetAngle',
490
+ 'angle': [1.5],
491
+ 'axis': 'z',
492
+ 'center': [0, 0, 0],
493
+ 'p': 1
494
+ }, {
495
+ 'type': 'RandomScale',
496
+ 'scale': [0.95, 0.95]
497
+ }],
498
+ [{
499
+ 'type': 'RandomRotateTargetAngle',
500
+ 'angle': [0],
501
+ 'axis': 'z',
502
+ 'center': [0, 0, 0],
503
+ 'p': 1
504
+ }, {
505
+ 'type': 'RandomScale',
506
+ 'scale': [1.05, 1.05]
507
+ }],
508
+ [{
509
+ 'type': 'RandomRotateTargetAngle',
510
+ 'angle': [0.5],
511
+ 'axis': 'z',
512
+ 'center': [0, 0, 0],
513
+ 'p': 1
514
+ }, {
515
+ 'type': 'RandomScale',
516
+ 'scale': [1.05, 1.05]
517
+ }],
518
+ [{
519
+ 'type': 'RandomRotateTargetAngle',
520
+ 'angle': [1],
521
+ 'axis': 'z',
522
+ 'center': [0, 0, 0],
523
+ 'p': 1
524
+ }, {
525
+ 'type': 'RandomScale',
526
+ 'scale': [1.05, 1.05]
527
+ }],
528
+ [{
529
+ 'type': 'RandomRotateTargetAngle',
530
+ 'angle': [1.5],
531
+ 'axis': 'z',
532
+ 'center': [0, 0, 0],
533
+ 'p': 1
534
+ }, {
535
+ 'type': 'RandomScale',
536
+ 'scale': [1.05, 1.05]
537
+ }], [{
538
+ 'type': 'RandomFlip',
539
+ 'p': 1
540
+ }]])))
541
+ num_worker_per_gpu = 12
542
+ batch_size_per_gpu = 2
543
+ batch_size_val_per_gpu = 1
544
+ batch_size_test_per_gpu = 1
Volt_experiments/joint_training_small/scannet200/model/model_best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd634d0d1b5918668bc3a02bd5e4737d8a2ac66fd303cf3acbfaac7c6130e0f1
3
+ size 381350961
Volt_experiments/joint_training_small/scannet200/model/model_last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b7e8cdfaa83bfa45911682002244774601a82d3d86c42072baa781da92ebf9d
3
+ size 381350961
Volt_experiments/joint_training_small/scannet200/train.log ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1db212fb19e5aa51ebc6d1a1b3a0809fc6c7c0fddb963247315140b6c67d193f
3
+ size 25176805
Volt_experiments/joint_training_small/semantic_kitti/config.py ADDED
@@ -0,0 +1,398 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ weight = None
2
+ resume = False
3
+ evaluate = True
4
+ test_only = False
5
+ seed = 53039635
6
+ save_path = 'exp/semantic_kitti/2026-04-28_232453'
7
+ num_worker = 24
8
+ batch_size = 16
9
+ gradient_accumulation_steps = 1
10
+ batch_size_val = None
11
+ batch_size_test = None
12
+ epoch = 30
13
+ eval_epoch = 30
14
+ clip_grad = None
15
+ use_ema = True
16
+ ema_decay = 0.999
17
+ sync_bn = False
18
+ enable_amp = True
19
+ amp_dtype = 'float16'
20
+ empty_cache = False
21
+ empty_cache_per_epoch = False
22
+ find_unused_parameters = True
23
+ enable_wandb = True
24
+ wandb_project = 'Volt'
25
+ wandb_key = None
26
+ mix_prob = 0.5
27
+ param_dicts = None
28
+ hooks = [
29
+ dict(type='CheckpointLoader'),
30
+ dict(type='ModelHook'),
31
+ dict(type='IterationTimer', warmup_iter=2),
32
+ dict(type='InformationWriter'),
33
+ dict(type='SemSegEvaluator'),
34
+ dict(type='CheckpointSaver', save_freq=None),
35
+ dict(type='PreciseEvaluator', test_last=False)
36
+ ]
37
+ train = dict(type='MultiDatasetTrainer')
38
+ test = dict(type='SemSegTester', verbose=True)
39
+ model = dict(
40
+ type='DefaultSegmentorV2',
41
+ backbone_out_channels=128,
42
+ backbone=dict(
43
+ type='Volt',
44
+ in_channels=4,
45
+ embed_dim=384,
46
+ depth=12,
47
+ num_heads=6,
48
+ mlp_ratio=4,
49
+ init_values=None,
50
+ qk_norm=True,
51
+ drop_path=0.3,
52
+ stride=5,
53
+ kernel_size=5,
54
+ increase_drop_path=True,
55
+ up_mlp_dim=128),
56
+ teacher=dict(
57
+ type='DefaultSegmentorV2',
58
+ backbone=dict(
59
+ type='SpUNet-v1m3',
60
+ in_channels=4,
61
+ num_classes=0,
62
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
63
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
64
+ enc_mode=False,
65
+ conditions=('nuScenes', 'SemanticKITTI', 'Waymo'),
66
+ zero_init=False,
67
+ norm_decouple=True,
68
+ norm_adaptive=False,
69
+ norm_affine=True),
70
+ backbone_out_channels=96,
71
+ conditions=('nuScenes', 'SemanticKITTI', 'Waymo'),
72
+ num_classes=(16, 19, 22)),
73
+ teacher_weights='exp/nuscenes/2026-02-25_010147/model/model_best.pth',
74
+ criteria=[
75
+ dict(
76
+ type='CrossEntropyLoss',
77
+ loss_weight=1.0,
78
+ label_smoothing=0.1,
79
+ ignore_index=-1),
80
+ dict(
81
+ type='LovaszLoss',
82
+ mode='multiclass',
83
+ loss_weight=1.0,
84
+ ignore_index=-1)
85
+ ],
86
+ conditions=('nuScenes', 'SemanticKITTI', 'Waymo'),
87
+ num_classes=(16, 19, 22))
88
+ optimizer = dict(type='AdamW', lr=0.002, weight_decay=0.05)
89
+ scheduler = dict(
90
+ type='OneCycleLR',
91
+ max_lr=0.002,
92
+ pct_start=0.04,
93
+ anneal_strategy='cos',
94
+ div_factor=10.0,
95
+ final_div_factor=100.0)
96
+ ignore_index = -1
97
+ names = [
98
+ 'car', 'bicycle', 'motorcycle', 'truck', 'other-vehicle', 'person',
99
+ 'bicyclist', 'motorcyclist', 'road', 'parking', 'sidewalk', 'other-ground',
100
+ 'building', 'fence', 'vegetation', 'trunk', 'terrain', 'pole',
101
+ 'traffic-sign'
102
+ ]
103
+ data = dict(
104
+ num_classes=19,
105
+ ignore_index=-1,
106
+ names=[
107
+ 'car', 'bicycle', 'motorcycle', 'truck', 'other-vehicle', 'person',
108
+ 'bicyclist', 'motorcyclist', 'road', 'parking', 'sidewalk',
109
+ 'other-ground', 'building', 'fence', 'vegetation', 'trunk', 'terrain',
110
+ 'pole', 'traffic-sign'
111
+ ],
112
+ train=dict(
113
+ type='ConcatDataset',
114
+ datasets=[
115
+ dict(
116
+ type='SemanticKITTIDataset',
117
+ split='train',
118
+ data_root='data/semantic_kitti',
119
+ transform=[
120
+ dict(
121
+ type='RandomDropout',
122
+ dropout_ratio=0.2,
123
+ dropout_application_ratio=0.2),
124
+ dict(
125
+ type='InstanceCutMix',
126
+ db_path='data/semantic_kitti_instances/train.h5'),
127
+ dict(
128
+ type='RandomRotate',
129
+ angle=[-1, 1],
130
+ axis='z',
131
+ center=[0, 0, 0],
132
+ p=0.5),
133
+ dict(
134
+ type='RandomRotate',
135
+ angle=[-0.015625, 0.015625],
136
+ axis='x',
137
+ p=0.5),
138
+ dict(
139
+ type='RandomRotate',
140
+ angle=[-0.015625, 0.015625],
141
+ axis='y',
142
+ p=0.5),
143
+ dict(
144
+ type='PointClipDistance',
145
+ max_dist=50.0,
146
+ z_min=-4.0,
147
+ z_max=2.0),
148
+ dict(type='RandomScale', scale=[0.9, 1.1]),
149
+ dict(
150
+ type='RandomShift',
151
+ shift=((-0.2, 0.2), (-0.2, 0.2), (-0.2, 0.2))),
152
+ dict(type='RandomFlip', p=0.5),
153
+ dict(type='RandomJitter', sigma=0.005, clip=0.02),
154
+ dict(type='InstanceShift', p=0.5, shift_range=[4, 4, 0.5]),
155
+ dict(
156
+ type='InstanceRotate',
157
+ p=0.5,
158
+ axis='z',
159
+ angle=[-0.5, 0.5]),
160
+ dict(type='InstanceScale', p=0.5, scale=[0.9, 1.1]),
161
+ dict(
162
+ type='GridSample',
163
+ grid_size=0.05,
164
+ hash_type='fnv',
165
+ mode='train',
166
+ return_grid_coord=True),
167
+ dict(type='SphereCrop', sample_rate=0.6, mode='random'),
168
+ dict(
169
+ type='Update',
170
+ keys_dict=dict(condition='SemanticKITTI')),
171
+ dict(type='ToTensor'),
172
+ dict(
173
+ type='Collect',
174
+ keys=('coord', 'grid_coord', 'segment', 'condition'),
175
+ feat_keys=('coord', 'strength'))
176
+ ],
177
+ test_mode=False,
178
+ ignore_index=-1,
179
+ loop=1),
180
+ dict(
181
+ type='WaymoDataset',
182
+ split='training',
183
+ data_root='data/waymo',
184
+ transform=[
185
+ dict(
186
+ type='RandomDropout',
187
+ dropout_ratio=0.2,
188
+ dropout_application_ratio=0.2),
189
+ dict(
190
+ type='RandomRotate',
191
+ angle=[-1, 1],
192
+ axis='z',
193
+ center=[0, 0, 0],
194
+ p=0.5),
195
+ dict(
196
+ type='RandomRotate',
197
+ angle=[-0.015625, 0.015625],
198
+ axis='x',
199
+ p=0.5),
200
+ dict(
201
+ type='RandomRotate',
202
+ angle=[-0.015625, 0.015625],
203
+ axis='y',
204
+ p=0.5),
205
+ dict(
206
+ type='PointClipDistance',
207
+ max_dist=75.0,
208
+ z_min=-4.0,
209
+ z_max=2.0),
210
+ dict(type='RandomScale', scale=[0.9, 1.1]),
211
+ dict(
212
+ type='RandomShift',
213
+ shift=((-0.2, 0.2), (-0.2, 0.2), (-0.2, 0.2))),
214
+ dict(type='RandomFlip', p=0.5),
215
+ dict(type='RandomJitter', sigma=0.005, clip=0.02),
216
+ dict(
217
+ type='GridSample',
218
+ grid_size=0.05,
219
+ hash_type='fnv',
220
+ mode='train',
221
+ return_grid_coord=True),
222
+ dict(type='SphereCrop', sample_rate=0.6, mode='random'),
223
+ dict(type='SphereCrop', point_max=102400, mode='random'),
224
+ dict(type='Update', keys_dict=dict(condition='Waymo')),
225
+ dict(type='ToTensor'),
226
+ dict(
227
+ type='Collect',
228
+ keys=('coord', 'grid_coord', 'segment', 'condition'),
229
+ feat_keys=('coord', 'strength'))
230
+ ],
231
+ test_mode=False,
232
+ ignore_index=-1,
233
+ loop=1),
234
+ dict(
235
+ type='NuScenesDataset',
236
+ split='train',
237
+ data_root='data/nuscenes',
238
+ transform=[
239
+ dict(
240
+ type='RandomRotateTargetAngle',
241
+ angle=[-0.5],
242
+ axis='z',
243
+ center=[0, 0, 0],
244
+ p=1),
245
+ dict(
246
+ type='RandomRotate',
247
+ angle=[-1, 1],
248
+ axis='z',
249
+ center=[0, 0, 0],
250
+ p=0.5),
251
+ dict(
252
+ type='RandomRotate',
253
+ angle=[-0.015625, 0.015625],
254
+ axis='x',
255
+ p=0.5),
256
+ dict(
257
+ type='RandomRotate',
258
+ angle=[-0.015625, 0.015625],
259
+ axis='y',
260
+ p=0.5),
261
+ dict(
262
+ type='PointClipDistance',
263
+ max_dist=70.0,
264
+ z_min=-4.0,
265
+ z_max=2.0),
266
+ dict(type='RandomScale', scale=[0.9, 1.1]),
267
+ dict(type='RandomFlip', p=0.5),
268
+ dict(type='RandomJitter', sigma=0.005, clip=0.02),
269
+ dict(
270
+ type='GridSample',
271
+ grid_size=0.05,
272
+ hash_type='fnv',
273
+ mode='train',
274
+ return_grid_coord=True),
275
+ dict(type='SphereCrop', sample_rate=0.6, mode='random'),
276
+ dict(type='Update', keys_dict=dict(condition='nuScenes')),
277
+ dict(type='ToTensor'),
278
+ dict(
279
+ type='Collect',
280
+ keys=('coord', 'grid_coord', 'segment', 'condition'),
281
+ feat_keys=('coord', 'strength'))
282
+ ],
283
+ test_mode=False,
284
+ ignore_index=-1,
285
+ loop=2)
286
+ ],
287
+ loop=1),
288
+ val=dict(
289
+ type='SemanticKITTIDataset',
290
+ split='val',
291
+ data_root='data/semantic_kitti',
292
+ transform=[
293
+ dict(type='Copy', keys_dict=dict(segment='origin_segment')),
294
+ dict(
295
+ type='PointClipDistance', max_dist=50.0, z_min=-4.0,
296
+ z_max=2.0),
297
+ dict(
298
+ type='GridSample',
299
+ grid_size=0.05,
300
+ hash_type='fnv',
301
+ mode='train',
302
+ return_grid_coord=True,
303
+ return_inverse=True),
304
+ dict(type='Update', keys_dict=dict(condition='SemanticKITTI')),
305
+ dict(type='ToTensor'),
306
+ dict(
307
+ type='Collect',
308
+ keys=('coord', 'grid_coord', 'segment', 'origin_segment',
309
+ 'inverse', 'condition'),
310
+ feat_keys=('coord', 'strength'))
311
+ ],
312
+ test_mode=False,
313
+ ignore_index=-1),
314
+ test=dict(
315
+ type='SemanticKITTIDataset',
316
+ split='val',
317
+ data_root='data/semantic_kitti',
318
+ transform=[
319
+ dict(
320
+ type='PointClipDistance', max_dist=50.0, z_min=-4.0,
321
+ z_max=2.0),
322
+ dict(type='Copy', keys_dict=dict(segment='origin_segment')),
323
+ dict(
324
+ type='GridSample',
325
+ grid_size=0.025,
326
+ hash_type='fnv',
327
+ mode='train',
328
+ return_inverse=True)
329
+ ],
330
+ test_mode=True,
331
+ test_cfg=dict(
332
+ voxelize=dict(
333
+ type='GridSample',
334
+ grid_size=0.05,
335
+ hash_type='fnv',
336
+ mode='test',
337
+ return_grid_coord=True),
338
+ crop=None,
339
+ post_transform=[
340
+ dict(type='Update', keys_dict=dict(condition='SemanticKITTI')),
341
+ dict(type='ToTensor'),
342
+ dict(
343
+ type='Collect',
344
+ keys=('coord', 'grid_coord', 'index', 'condition'),
345
+ feat_keys=('coord', 'strength'))
346
+ ],
347
+ aug_transform=[[{
348
+ 'type': 'RandomScale',
349
+ 'scale': [0.9, 0.9]
350
+ }], [{
351
+ 'type': 'RandomScale',
352
+ 'scale': [0.95, 0.95]
353
+ }], [{
354
+ 'type': 'RandomScale',
355
+ 'scale': [1, 1]
356
+ }], [{
357
+ 'type': 'RandomScale',
358
+ 'scale': [1.05, 1.05]
359
+ }], [{
360
+ 'type': 'RandomScale',
361
+ 'scale': [1.1, 1.1]
362
+ }],
363
+ [{
364
+ 'type': 'RandomScale',
365
+ 'scale': [0.9, 0.9]
366
+ }, {
367
+ 'type': 'RandomFlip',
368
+ 'p': 1
369
+ }],
370
+ [{
371
+ 'type': 'RandomScale',
372
+ 'scale': [0.95, 0.95]
373
+ }, {
374
+ 'type': 'RandomFlip',
375
+ 'p': 1
376
+ }],
377
+ [{
378
+ 'type': 'RandomScale',
379
+ 'scale': [1, 1]
380
+ }, {
381
+ 'type': 'RandomFlip',
382
+ 'p': 1
383
+ }],
384
+ [{
385
+ 'type': 'RandomScale',
386
+ 'scale': [1.05, 1.05]
387
+ }, {
388
+ 'type': 'RandomFlip',
389
+ 'p': 1
390
+ }],
391
+ [{
392
+ 'type': 'RandomScale',
393
+ 'scale': [1.1, 1.1]
394
+ }, {
395
+ 'type': 'RandomFlip',
396
+ 'p': 1
397
+ }]]),
398
+ ignore_index=-1))
Volt_experiments/joint_training_small/semantic_kitti/model/model_best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d326a7b2e62f94a1021d18f5dad9eb13070f42b65877812fe68b6b3ff101f50
3
+ size 378004481
Volt_experiments/joint_training_small/semantic_kitti/model/model_last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4145f6ec18427f30ed583f21d38d1a7bf6a6a47d169400b255c6cff95314dbac
3
+ size 378006849
Volt_experiments/joint_training_small/semantic_kitti/train.log ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a04c18271095423c75f6087c67ff1325ad911cba3de867726fb9b36e94db1433
3
+ size 29370538
Volt_experiments/joint_training_small/waymo/config.py ADDED
@@ -0,0 +1,398 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ weight = None
2
+ resume = False
3
+ evaluate = True
4
+ test_only = False
5
+ seed = 56946903
6
+ save_path = 'exp/waymo/2026-05-05_095255'
7
+ num_worker = 24
8
+ batch_size = 16
9
+ gradient_accumulation_steps = 1
10
+ batch_size_val = None
11
+ batch_size_test = None
12
+ epoch = 25
13
+ eval_epoch = 25
14
+ clip_grad = None
15
+ use_ema = True
16
+ ema_decay = 0.999
17
+ dataset_ratios = None
18
+ sync_bn = False
19
+ enable_amp = True
20
+ amp_dtype = 'float16'
21
+ empty_cache = False
22
+ empty_cache_per_epoch = False
23
+ find_unused_parameters = True
24
+ enable_wandb = True
25
+ wandb_project = 'Volt'
26
+ wandb_key = None
27
+ mix_prob = 0.2
28
+ param_dicts = None
29
+ hooks = [
30
+ dict(type='CheckpointLoader'),
31
+ dict(type='ModelHook'),
32
+ dict(type='IterationTimer', warmup_iter=2),
33
+ dict(type='InformationWriter'),
34
+ dict(type='SemSegEvaluator'),
35
+ dict(type='CheckpointSaver', save_freq=None),
36
+ dict(type='PreciseEvaluator', test_last=False)
37
+ ]
38
+ train = dict(type='MultiDatasetTrainer')
39
+ test = dict(type='SemSegTester', verbose=True)
40
+ model = dict(
41
+ type='DefaultSegmentorV2',
42
+ backbone_out_channels=128,
43
+ backbone=dict(
44
+ type='Volt',
45
+ in_channels=4,
46
+ embed_dim=384,
47
+ depth=12,
48
+ num_heads=6,
49
+ mlp_ratio=4,
50
+ init_values=None,
51
+ qk_norm=True,
52
+ drop_path=0.3,
53
+ stride=5,
54
+ kernel_size=5,
55
+ increase_drop_path=True,
56
+ up_mlp_dim=128),
57
+ teacher=dict(
58
+ type='DefaultSegmentorV2',
59
+ backbone=dict(
60
+ type='SpUNet-v1m3',
61
+ in_channels=4,
62
+ num_classes=0,
63
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
64
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
65
+ enc_mode=False,
66
+ conditions=('nuScenes', 'SemanticKITTI', 'Waymo'),
67
+ zero_init=False,
68
+ norm_decouple=True,
69
+ norm_adaptive=False,
70
+ norm_affine=True),
71
+ backbone_out_channels=96,
72
+ conditions=('nuScenes', 'SemanticKITTI', 'Waymo'),
73
+ num_classes=(16, 19, 22)),
74
+ teacher_weights='exp/nuscenes/2026-02-25_010147/model/model_best.pth',
75
+ criteria=[
76
+ dict(
77
+ type='CrossEntropyLoss',
78
+ loss_weight=1.0,
79
+ label_smoothing=0.1,
80
+ ignore_index=-1),
81
+ dict(
82
+ type='LovaszLoss',
83
+ mode='multiclass',
84
+ loss_weight=1.0,
85
+ ignore_index=-1)
86
+ ],
87
+ conditions=('nuScenes', 'SemanticKITTI', 'Waymo'),
88
+ num_classes=(16, 19, 22))
89
+ optimizer = dict(type='AdamW', lr=0.002, weight_decay=0.05)
90
+ scheduler = dict(
91
+ type='OneCycleLR',
92
+ max_lr=0.002,
93
+ pct_start=0.04,
94
+ anneal_strategy='cos',
95
+ div_factor=10.0,
96
+ final_div_factor=100.0)
97
+ ignore_index = -1
98
+ names = [
99
+ 'Car', 'Truck', 'Bus', 'Other Vehicle', 'Motorcyclist', 'Bicyclist',
100
+ 'Pedestrian', 'Sign', 'Traffic Light', 'Pole', 'Construction Cone',
101
+ 'Bicycle', 'Motorcycle', 'Building', 'Vegetation', 'Tree Trunk', 'Curb',
102
+ 'Road', 'Lane Marker', 'Other Ground', 'Walkable', 'Sidewalk'
103
+ ]
104
+ data = dict(
105
+ num_classes=22,
106
+ ignore_index=-1,
107
+ names=[
108
+ 'Car', 'Truck', 'Bus', 'Other Vehicle', 'Motorcyclist', 'Bicyclist',
109
+ 'Pedestrian', 'Sign', 'Traffic Light', 'Pole', 'Construction Cone',
110
+ 'Bicycle', 'Motorcycle', 'Building', 'Vegetation', 'Tree Trunk',
111
+ 'Curb', 'Road', 'Lane Marker', 'Other Ground', 'Walkable', 'Sidewalk'
112
+ ],
113
+ train=dict(
114
+ type='ConcatDataset',
115
+ datasets=[
116
+ dict(
117
+ type='WaymoDataset',
118
+ split='training',
119
+ data_root='data/waymo',
120
+ transform=[
121
+ dict(
122
+ type='RandomDropout',
123
+ dropout_ratio=0.2,
124
+ dropout_application_ratio=0.2),
125
+ dict(
126
+ type='RandomRotate',
127
+ angle=[-1, 1],
128
+ axis='z',
129
+ center=[0, 0, 0],
130
+ p=0.5),
131
+ dict(
132
+ type='RandomRotate',
133
+ angle=[-0.015625, 0.015625],
134
+ axis='x',
135
+ p=0.5),
136
+ dict(
137
+ type='RandomRotate',
138
+ angle=[-0.015625, 0.015625],
139
+ axis='y',
140
+ p=0.5),
141
+ dict(
142
+ type='PointClipDistance',
143
+ max_dist=75.0,
144
+ z_min=-4.0,
145
+ z_max=2.0),
146
+ dict(type='RandomScale', scale=[0.9, 1.1]),
147
+ dict(
148
+ type='RandomShift',
149
+ shift=((-0.2, 0.2), (-0.2, 0.2), (-0.2, 0.2))),
150
+ dict(type='RandomFlip', p=0.5),
151
+ dict(type='RandomJitter', sigma=0.005, clip=0.02),
152
+ dict(
153
+ type='GridSample',
154
+ grid_size=0.05,
155
+ hash_type='fnv',
156
+ mode='train',
157
+ return_grid_coord=True),
158
+ dict(type='SphereCrop', sample_rate=0.6, mode='random'),
159
+ dict(type='Update', keys_dict=dict(condition='Waymo')),
160
+ dict(type='ToTensor'),
161
+ dict(
162
+ type='Collect',
163
+ keys=('coord', 'grid_coord', 'segment', 'condition'),
164
+ feat_keys=('coord', 'strength'))
165
+ ],
166
+ test_mode=False,
167
+ ignore_index=-1,
168
+ loop=1),
169
+ dict(
170
+ type='SemanticKITTIDataset',
171
+ split='train',
172
+ data_root='data/semantic_kitti',
173
+ transform=[
174
+ dict(
175
+ type='RandomDropout',
176
+ dropout_ratio=0.2,
177
+ dropout_application_ratio=0.2),
178
+ dict(
179
+ type='InstanceCutMix',
180
+ db_path='data/semantic_kitti_instances/train.h5'),
181
+ dict(
182
+ type='RandomRotate',
183
+ angle=[-1, 1],
184
+ axis='z',
185
+ center=[0, 0, 0],
186
+ p=0.5),
187
+ dict(
188
+ type='RandomRotate',
189
+ angle=[-0.015625, 0.015625],
190
+ axis='x',
191
+ p=0.5),
192
+ dict(
193
+ type='RandomRotate',
194
+ angle=[-0.015625, 0.015625],
195
+ axis='y',
196
+ p=0.5),
197
+ dict(
198
+ type='PointClipDistance',
199
+ max_dist=50.0,
200
+ z_min=-4.0,
201
+ z_max=2.0),
202
+ dict(type='RandomScale', scale=[0.9, 1.1]),
203
+ dict(
204
+ type='RandomShift',
205
+ shift=((-0.2, 0.2), (-0.2, 0.2), (-0.2, 0.2))),
206
+ dict(type='RandomFlip', p=0.5),
207
+ dict(type='RandomJitter', sigma=0.005, clip=0.02),
208
+ dict(type='InstanceShift', p=0.5, shift_range=[4, 4, 0.5]),
209
+ dict(
210
+ type='InstanceRotate',
211
+ p=0.5,
212
+ axis='z',
213
+ angle=[-0.5, 0.5]),
214
+ dict(type='InstanceScale', p=0.5, scale=[0.9, 1.1]),
215
+ dict(
216
+ type='GridSample',
217
+ grid_size=0.05,
218
+ hash_type='fnv',
219
+ mode='train',
220
+ return_grid_coord=True),
221
+ dict(type='SphereCrop', sample_rate=0.6, mode='random'),
222
+ dict(
223
+ type='Update',
224
+ keys_dict=dict(condition='SemanticKITTI')),
225
+ dict(type='ToTensor'),
226
+ dict(
227
+ type='Collect',
228
+ keys=('coord', 'grid_coord', 'segment', 'condition'),
229
+ feat_keys=('coord', 'strength'))
230
+ ],
231
+ test_mode=False,
232
+ ignore_index=-1,
233
+ loop=1),
234
+ dict(
235
+ type='NuScenesDataset',
236
+ split='train',
237
+ data_root='data/nuscenes',
238
+ transform=[
239
+ dict(
240
+ type='RandomRotateTargetAngle',
241
+ angle=[-0.5],
242
+ axis='z',
243
+ center=[0, 0, 0],
244
+ p=1),
245
+ dict(
246
+ type='RandomRotate',
247
+ angle=[-1, 1],
248
+ axis='z',
249
+ center=[0, 0, 0],
250
+ p=0.5),
251
+ dict(
252
+ type='RandomRotate',
253
+ angle=[-0.015625, 0.015625],
254
+ axis='x',
255
+ p=0.5),
256
+ dict(
257
+ type='RandomRotate',
258
+ angle=[-0.015625, 0.015625],
259
+ axis='y',
260
+ p=0.5),
261
+ dict(
262
+ type='PointClipDistance',
263
+ max_dist=70.0,
264
+ z_min=-4.0,
265
+ z_max=2.0),
266
+ dict(type='RandomScale', scale=[0.9, 1.1]),
267
+ dict(type='RandomFlip', p=0.5),
268
+ dict(type='RandomJitter', sigma=0.005, clip=0.02),
269
+ dict(
270
+ type='GridSample',
271
+ grid_size=0.05,
272
+ hash_type='fnv',
273
+ mode='train',
274
+ return_grid_coord=True),
275
+ dict(type='SphereCrop', sample_rate=0.6, mode='random'),
276
+ dict(type='Update', keys_dict=dict(condition='nuScenes')),
277
+ dict(type='ToTensor'),
278
+ dict(
279
+ type='Collect',
280
+ keys=('coord', 'grid_coord', 'segment', 'condition'),
281
+ feat_keys=('coord', 'strength'))
282
+ ],
283
+ test_mode=False,
284
+ ignore_index=-1,
285
+ loop=2)
286
+ ],
287
+ loop=1),
288
+ val=dict(
289
+ type='WaymoDataset',
290
+ split='validation',
291
+ data_root='data/waymo',
292
+ transform=[
293
+ dict(type='Copy', keys_dict=dict(segment='origin_segment')),
294
+ dict(
295
+ type='PointClipDistance', max_dist=75.0, z_min=-4.0,
296
+ z_max=2.0),
297
+ dict(
298
+ type='GridSample',
299
+ grid_size=0.05,
300
+ hash_type='fnv',
301
+ mode='train',
302
+ return_grid_coord=True,
303
+ return_inverse=True),
304
+ dict(type='Update', keys_dict=dict(condition='Waymo')),
305
+ dict(type='ToTensor'),
306
+ dict(
307
+ type='Collect',
308
+ keys=('coord', 'grid_coord', 'segment', 'origin_segment',
309
+ 'inverse', 'condition'),
310
+ feat_keys=('coord', 'strength'))
311
+ ],
312
+ test_mode=False,
313
+ ignore_index=-1),
314
+ test=dict(
315
+ type='WaymoDataset',
316
+ split='validation',
317
+ data_root='data/waymo',
318
+ transform=[
319
+ dict(
320
+ type='PointClipDistance', max_dist=75.0, z_min=-4.0,
321
+ z_max=2.0),
322
+ dict(type='Copy', keys_dict=dict(segment='origin_segment')),
323
+ dict(
324
+ type='GridSample',
325
+ grid_size=0.025,
326
+ hash_type='fnv',
327
+ mode='train',
328
+ return_inverse=True)
329
+ ],
330
+ test_mode=True,
331
+ test_cfg=dict(
332
+ voxelize=dict(
333
+ type='GridSample',
334
+ grid_size=0.05,
335
+ hash_type='fnv',
336
+ mode='test',
337
+ return_grid_coord=True),
338
+ crop=None,
339
+ post_transform=[
340
+ dict(type='Update', keys_dict=dict(condition='Waymo')),
341
+ dict(type='ToTensor'),
342
+ dict(
343
+ type='Collect',
344
+ keys=('coord', 'grid_coord', 'index', 'condition'),
345
+ feat_keys=('coord', 'strength'))
346
+ ],
347
+ aug_transform=[[{
348
+ 'type': 'RandomScale',
349
+ 'scale': [0.9, 0.9]
350
+ }], [{
351
+ 'type': 'RandomScale',
352
+ 'scale': [0.95, 0.95]
353
+ }], [{
354
+ 'type': 'RandomScale',
355
+ 'scale': [1, 1]
356
+ }], [{
357
+ 'type': 'RandomScale',
358
+ 'scale': [1.05, 1.05]
359
+ }], [{
360
+ 'type': 'RandomScale',
361
+ 'scale': [1.1, 1.1]
362
+ }],
363
+ [{
364
+ 'type': 'RandomScale',
365
+ 'scale': [0.9, 0.9]
366
+ }, {
367
+ 'type': 'RandomFlip',
368
+ 'p': 1
369
+ }],
370
+ [{
371
+ 'type': 'RandomScale',
372
+ 'scale': [0.95, 0.95]
373
+ }, {
374
+ 'type': 'RandomFlip',
375
+ 'p': 1
376
+ }],
377
+ [{
378
+ 'type': 'RandomScale',
379
+ 'scale': [1, 1]
380
+ }, {
381
+ 'type': 'RandomFlip',
382
+ 'p': 1
383
+ }],
384
+ [{
385
+ 'type': 'RandomScale',
386
+ 'scale': [1.05, 1.05]
387
+ }, {
388
+ 'type': 'RandomFlip',
389
+ 'p': 1
390
+ }],
391
+ [{
392
+ 'type': 'RandomScale',
393
+ 'scale': [1.1, 1.1]
394
+ }, {
395
+ 'type': 'RandomFlip',
396
+ 'p': 1
397
+ }]]),
398
+ ignore_index=-1))
Volt_experiments/joint_training_small/waymo/model/model_best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92b545fa79952b16728dac82f77f3d5ee9a1bed751e2b7c0d191f94ae696da2f
3
+ size 378006849
Volt_experiments/joint_training_small/waymo/model/model_last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92b545fa79952b16728dac82f77f3d5ee9a1bed751e2b7c0d191f94ae696da2f
3
+ size 378006849
Volt_experiments/joint_training_small/waymo/train.log ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63eb6ca8c684fd5004a21a96ed3760614fd40b919891a4e7b640ca318b4a281d
3
+ size 35180982