KadirYilmaz commited on
Commit
b520356
·
verified ·
1 Parent(s): 04d6a69

Add single-dataset Volt experiment outputs

Browse files
Files changed (25) hide show
  1. .gitattributes +4 -0
  2. Volt_experiments/single_dataset/nuscenes/config.py +254 -0
  3. Volt_experiments/single_dataset/nuscenes/model/model_best.pth +3 -0
  4. Volt_experiments/single_dataset/nuscenes/model/model_last.pth +3 -0
  5. Volt_experiments/single_dataset/nuscenes/train.log +3 -0
  6. Volt_experiments/single_dataset/scannet/config.py +315 -0
  7. Volt_experiments/single_dataset/scannet/model/model_best.pth +3 -0
  8. Volt_experiments/single_dataset/scannet/model/model_last.pth +3 -0
  9. Volt_experiments/single_dataset/scannet/train.log +3 -0
  10. Volt_experiments/single_dataset/scannet200/config.py +389 -0
  11. Volt_experiments/single_dataset/scannet200/model/model_best.pth +3 -0
  12. Volt_experiments/single_dataset/scannet200/model/model_last.pth +3 -0
  13. Volt_experiments/single_dataset/scannet200/train.log +3 -0
  14. Volt_experiments/single_dataset/scannetpp/config.py +354 -0
  15. Volt_experiments/single_dataset/scannetpp/model/model_best.pth +3 -0
  16. Volt_experiments/single_dataset/scannetpp/model/model_last.pth +3 -0
  17. Volt_experiments/single_dataset/scannetpp/train.log +0 -0
  18. Volt_experiments/single_dataset/semantic_kitti/config.py +273 -0
  19. Volt_experiments/single_dataset/semantic_kitti/model/model_best.pth +3 -0
  20. Volt_experiments/single_dataset/semantic_kitti/model/model_last.pth +3 -0
  21. Volt_experiments/single_dataset/semantic_kitti/train.log +0 -0
  22. Volt_experiments/single_dataset/waymo/config.py +263 -0
  23. Volt_experiments/single_dataset/waymo/model/model_best.pth +3 -0
  24. Volt_experiments/single_dataset/waymo/model/model_last.pth +3 -0
  25. Volt_experiments/single_dataset/waymo/train.log +3 -0
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Volt_experiments/single_dataset/nuscenes/train.log filter=lfs diff=lfs merge=lfs -text
37
+ Volt_experiments/single_dataset/scannet/train.log filter=lfs diff=lfs merge=lfs -text
38
+ Volt_experiments/single_dataset/scannet200/train.log filter=lfs diff=lfs merge=lfs -text
39
+ Volt_experiments/single_dataset/waymo/train.log filter=lfs diff=lfs merge=lfs -text
Volt_experiments/single_dataset/nuscenes/config.py ADDED
@@ -0,0 +1,254 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ weight = None
2
+ resume = False
3
+ evaluate = True
4
+ test_only = False
5
+ seed = 38847342
6
+ save_path = 'exp/nuscenes/2026-04-24_120759'
7
+ num_worker = 24
8
+ batch_size = 16
9
+ gradient_accumulation_steps = 1
10
+ batch_size_val = None
11
+ batch_size_test = None
12
+ epoch = 50
13
+ eval_epoch = 50
14
+ clip_grad = None
15
+ use_ema = True
16
+ ema_decay = 0.999
17
+ sync_bn = False
18
+ enable_amp = True
19
+ amp_dtype = 'float16'
20
+ empty_cache = False
21
+ empty_cache_per_epoch = False
22
+ find_unused_parameters = False
23
+ enable_wandb = True
24
+ wandb_project = 'Volt'
25
+ wandb_key = None
26
+ mix_prob = 0.85
27
+ param_dicts = None
28
+ hooks = [
29
+ dict(type='CheckpointLoader'),
30
+ dict(type='ModelHook'),
31
+ dict(type='IterationTimer', warmup_iter=2),
32
+ dict(type='InformationWriter'),
33
+ dict(type='SemSegEvaluator'),
34
+ dict(type='CheckpointSaver', save_freq=None),
35
+ dict(type='PreciseEvaluator', test_last=False)
36
+ ]
37
+ train = dict(type='DefaultTrainer')
38
+ test = dict(type='SemSegTester', verbose=True)
39
+ model = dict(
40
+ type='DefaultSegmentorV2',
41
+ num_classes=16,
42
+ backbone_out_channels=128,
43
+ backbone=dict(
44
+ type='Volt',
45
+ in_channels=4,
46
+ embed_dim=384,
47
+ depth=12,
48
+ num_heads=6,
49
+ mlp_ratio=4,
50
+ init_values=None,
51
+ qk_norm=True,
52
+ drop_path=0.3,
53
+ stride=5,
54
+ kernel_size=5,
55
+ increase_drop_path=True,
56
+ up_mlp_dim=128),
57
+ teacher=dict(
58
+ type='DefaultSegmentor',
59
+ backbone=dict(
60
+ type='SpUNet-v1m1',
61
+ in_channels=4,
62
+ num_classes=16,
63
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
64
+ layers=(2, 3, 4, 6, 2, 2, 2, 2))),
65
+ teacher_weights='weights/teacher_weights/nuscenes_unet_teacher.pth',
66
+ criteria=[
67
+ dict(
68
+ type='CrossEntropyLoss',
69
+ loss_weight=1.0,
70
+ label_smoothing=0.1,
71
+ ignore_index=-1),
72
+ dict(
73
+ type='LovaszLoss',
74
+ mode='multiclass',
75
+ loss_weight=1.0,
76
+ ignore_index=-1)
77
+ ])
78
+ optimizer = dict(type='AdamW', lr=0.002, weight_decay=0.05)
79
+ scheduler = dict(
80
+ type='OneCycleLR',
81
+ max_lr=0.002,
82
+ pct_start=0.04,
83
+ anneal_strategy='cos',
84
+ div_factor=10.0,
85
+ final_div_factor=100.0)
86
+ dataset_type = 'NuScenesDataset'
87
+ data_root = 'data/nuscenes'
88
+ ignore_index = -1
89
+ names = [
90
+ 'barrier', 'bicycle', 'bus', 'car', 'construction_vehicle', 'motorcycle',
91
+ 'pedestrian', 'traffic_cone', 'trailer', 'truck', 'driveable_surface',
92
+ 'other_flat', 'sidewalk', 'terrain', 'manmade', 'vegetation'
93
+ ]
94
+ data = dict(
95
+ num_classes=16,
96
+ ignore_index=-1,
97
+ names=[
98
+ 'barrier', 'bicycle', 'bus', 'car', 'construction_vehicle',
99
+ 'motorcycle', 'pedestrian', 'traffic_cone', 'trailer', 'truck',
100
+ 'driveable_surface', 'other_flat', 'sidewalk', 'terrain', 'manmade',
101
+ 'vegetation'
102
+ ],
103
+ train=dict(
104
+ type='NuScenesDataset',
105
+ split='train',
106
+ data_root='data/nuscenes',
107
+ transform=[
108
+ dict(
109
+ type='RandomRotate',
110
+ angle=[-1, 1],
111
+ axis='z',
112
+ center=[0, 0, 0],
113
+ p=0.5),
114
+ dict(
115
+ type='RandomRotate',
116
+ angle=[-0.015625, 0.015625],
117
+ axis='x',
118
+ p=0.5),
119
+ dict(
120
+ type='RandomRotate',
121
+ angle=[-0.015625, 0.015625],
122
+ axis='y',
123
+ p=0.5),
124
+ dict(
125
+ type='PointClipDistance', max_dist=70.0, z_min=-4.0,
126
+ z_max=2.0),
127
+ dict(type='RandomScale', scale=[0.9, 1.1]),
128
+ dict(type='RandomFlip', p=0.5),
129
+ dict(type='RandomJitter', sigma=0.005, clip=0.02),
130
+ dict(
131
+ type='GridSample',
132
+ grid_size=0.05,
133
+ hash_type='fnv',
134
+ mode='train',
135
+ return_grid_coord=True),
136
+ dict(type='SphereCrop', sample_rate=0.6, mode='random'),
137
+ dict(type='ToTensor'),
138
+ dict(
139
+ type='Collect',
140
+ keys=('coord', 'grid_coord', 'segment'),
141
+ feat_keys=('coord', 'strength'))
142
+ ],
143
+ test_mode=False,
144
+ ignore_index=-1,
145
+ loop=1),
146
+ val=dict(
147
+ type='NuScenesDataset',
148
+ split='val',
149
+ data_root='data/nuscenes',
150
+ transform=[
151
+ dict(type='Copy', keys_dict=dict(segment='origin_segment')),
152
+ dict(
153
+ type='PointClipDistance', max_dist=70.0, z_min=-4.0,
154
+ z_max=2.0),
155
+ dict(
156
+ type='GridSample',
157
+ grid_size=0.05,
158
+ hash_type='fnv',
159
+ mode='train',
160
+ return_grid_coord=True,
161
+ return_inverse=True),
162
+ dict(type='ToTensor'),
163
+ dict(
164
+ type='Collect',
165
+ keys=('coord', 'grid_coord', 'segment', 'origin_segment',
166
+ 'inverse'),
167
+ feat_keys=('coord', 'strength'))
168
+ ],
169
+ test_mode=False,
170
+ ignore_index=-1),
171
+ test=dict(
172
+ type='NuScenesDataset',
173
+ split='val',
174
+ data_root='data/nuscenes',
175
+ transform=[
176
+ dict(
177
+ type='PointClipDistance', max_dist=70.0, z_min=-4.0,
178
+ z_max=2.0),
179
+ dict(type='Copy', keys_dict=dict(segment='origin_segment')),
180
+ dict(
181
+ type='GridSample',
182
+ grid_size=0.025,
183
+ hash_type='fnv',
184
+ mode='train',
185
+ return_inverse=True)
186
+ ],
187
+ test_mode=True,
188
+ test_cfg=dict(
189
+ voxelize=dict(
190
+ type='GridSample',
191
+ grid_size=0.05,
192
+ hash_type='fnv',
193
+ mode='test',
194
+ return_grid_coord=True),
195
+ crop=None,
196
+ post_transform=[
197
+ dict(type='ToTensor'),
198
+ dict(
199
+ type='Collect',
200
+ keys=('coord', 'grid_coord', 'index'),
201
+ feat_keys=('coord', 'strength'))
202
+ ],
203
+ aug_transform=[[{
204
+ 'type': 'RandomScale',
205
+ 'scale': [0.9, 0.9]
206
+ }], [{
207
+ 'type': 'RandomScale',
208
+ 'scale': [0.95, 0.95]
209
+ }], [{
210
+ 'type': 'RandomScale',
211
+ 'scale': [1, 1]
212
+ }], [{
213
+ 'type': 'RandomScale',
214
+ 'scale': [1.05, 1.05]
215
+ }], [{
216
+ 'type': 'RandomScale',
217
+ 'scale': [1.1, 1.1]
218
+ }],
219
+ [{
220
+ 'type': 'RandomScale',
221
+ 'scale': [0.9, 0.9]
222
+ }, {
223
+ 'type': 'RandomFlip',
224
+ 'p': 1
225
+ }],
226
+ [{
227
+ 'type': 'RandomScale',
228
+ 'scale': [0.95, 0.95]
229
+ }, {
230
+ 'type': 'RandomFlip',
231
+ 'p': 1
232
+ }],
233
+ [{
234
+ 'type': 'RandomScale',
235
+ 'scale': [1, 1]
236
+ }, {
237
+ 'type': 'RandomFlip',
238
+ 'p': 1
239
+ }],
240
+ [{
241
+ 'type': 'RandomScale',
242
+ 'scale': [1.05, 1.05]
243
+ }, {
244
+ 'type': 'RandomFlip',
245
+ 'p': 1
246
+ }],
247
+ [{
248
+ 'type': 'RandomScale',
249
+ 'scale': [1.1, 1.1]
250
+ }, {
251
+ 'type': 'RandomFlip',
252
+ 'p': 1
253
+ }]]),
254
+ ignore_index=-1))
Volt_experiments/single_dataset/nuscenes/model/model_best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5d9b77ef6e49a5a90109434520067ebae61662e1b7bb8f62f4bf69ec01f7eb6
3
+ size 377822993
Volt_experiments/single_dataset/nuscenes/model/model_last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5d9b77ef6e49a5a90109434520067ebae61662e1b7bb8f62f4bf69ec01f7eb6
3
+ size 377822993
Volt_experiments/single_dataset/nuscenes/train.log ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efec167baaa8737cf58c483a3d80aef94a39229d428ad27c1f8b616a7f942ac1
3
+ size 25950324
Volt_experiments/single_dataset/scannet/config.py ADDED
@@ -0,0 +1,315 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ weight = None
2
+ resume = False
3
+ evaluate = True
4
+ test_only = False
5
+ seed = 37898871
6
+ save_path = 'exp/scannet/2026-04-24_120048'
7
+ num_worker = 24
8
+ batch_size = 16
9
+ gradient_accumulation_steps = 1
10
+ batch_size_val = None
11
+ batch_size_test = None
12
+ epoch = 800
13
+ eval_epoch = 100
14
+ clip_grad = None
15
+ use_ema = True
16
+ ema_decay = 0.999
17
+ sync_bn = False
18
+ enable_amp = True
19
+ amp_dtype = 'float16'
20
+ empty_cache = False
21
+ empty_cache_per_epoch = False
22
+ find_unused_parameters = False
23
+ enable_wandb = True
24
+ wandb_project = 'Volt'
25
+ wandb_key = None
26
+ mix_prob = 0.85
27
+ param_dicts = None
28
+ hooks = [
29
+ dict(type='CheckpointLoader'),
30
+ dict(type='ModelHook'),
31
+ dict(type='IterationTimer', warmup_iter=2),
32
+ dict(type='InformationWriter'),
33
+ dict(type='SemSegEvaluator'),
34
+ dict(type='CheckpointSaver', save_freq=None),
35
+ dict(type='PreciseEvaluator', test_last=False)
36
+ ]
37
+ train = dict(type='DefaultTrainer')
38
+ test = dict(type='SemSegTester', verbose=True)
39
+ model = dict(
40
+ type='DefaultSegmentorV2',
41
+ num_classes=20,
42
+ backbone_out_channels=128,
43
+ backbone=dict(
44
+ type='Volt',
45
+ in_channels=6,
46
+ embed_dim=384,
47
+ depth=12,
48
+ num_heads=6,
49
+ mlp_ratio=4,
50
+ init_values=None,
51
+ qk_norm=True,
52
+ drop_path=0.3,
53
+ stride=5,
54
+ kernel_size=5,
55
+ increase_drop_path=True,
56
+ up_mlp_dim=128),
57
+ teacher=dict(
58
+ type='DefaultSegmentor',
59
+ backbone=dict(
60
+ type='SpUNet-v1m1',
61
+ in_channels=6,
62
+ num_classes=20,
63
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
64
+ layers=(2, 3, 4, 6, 2, 2, 2, 2))),
65
+ teacher_weights='weights/teacher_weights/scannet_unet_teacher.pth',
66
+ criteria=[
67
+ dict(
68
+ type='CrossEntropyLoss',
69
+ loss_weight=1.0,
70
+ label_smoothing=0.1,
71
+ ignore_index=-1),
72
+ dict(
73
+ type='LovaszLoss',
74
+ mode='multiclass',
75
+ loss_weight=1.0,
76
+ ignore_index=-1)
77
+ ])
78
+ optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.05)
79
+ scheduler = dict(
80
+ type='OneCycleLR',
81
+ max_lr=0.001,
82
+ pct_start=0.05,
83
+ anneal_strategy='cos',
84
+ div_factor=10.0,
85
+ final_div_factor=1000.0)
86
+ dataset_type = 'ScanNetDataset'
87
+ data_root = 'data/scannet'
88
+ data = dict(
89
+ num_classes=20,
90
+ ignore_index=-1,
91
+ names=[
92
+ 'wall', 'floor', 'cabinet', 'bed', 'chair', 'sofa', 'table', 'door',
93
+ 'window', 'bookshelf', 'picture', 'counter', 'desk', 'curtain',
94
+ 'refridgerator', 'shower curtain', 'toilet', 'sink', 'bathtub',
95
+ 'otherfurniture'
96
+ ],
97
+ train=dict(
98
+ type='ScanNetDataset',
99
+ split='train',
100
+ data_root='data/scannet',
101
+ transform=[
102
+ dict(type='CenterShift', apply_z=True),
103
+ dict(
104
+ type='RandomDropout',
105
+ dropout_ratio=0.2,
106
+ dropout_application_ratio=0.2),
107
+ dict(
108
+ type='RandomRotate',
109
+ angle=[-1, 1],
110
+ axis='z',
111
+ center=[0, 0, 0],
112
+ p=0.5),
113
+ dict(
114
+ type='RandomRotate',
115
+ angle=[-0.015625, 0.015625],
116
+ axis='x',
117
+ p=0.5),
118
+ dict(
119
+ type='RandomRotate',
120
+ angle=[-0.015625, 0.015625],
121
+ axis='y',
122
+ p=0.5),
123
+ dict(type='RandomScale', scale=[0.9, 1.1]),
124
+ dict(type='RandomFlip', p=0.5),
125
+ dict(type='RandomJitter', sigma=0.005, clip=0.02),
126
+ dict(
127
+ type='ElasticDistortion',
128
+ distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
129
+ dict(type='ChromaticAutoContrast', p=0.2, blend_factor=None),
130
+ dict(type='ChromaticTranslation', p=0.95, ratio=0.05),
131
+ dict(type='ChromaticJitter', p=0.95, std=0.05),
132
+ dict(type='InstanceShift', p=0.2, shift_range=[0.1, 0.1, 0.1]),
133
+ dict(type='InstanceRotate', p=0.2, axis='z', angle=[-0.25, 0.25]),
134
+ dict(type='InstanceFlip', p=0.2, flip_prob=0.5),
135
+ dict(type='InstanceScale', p=0.2, scale=[0.9, 1.1]),
136
+ dict(type='InstanceDropOut', p=0.1, drop_ratio=0.5),
137
+ dict(type='InstanceColorDropout', p=0.2, drop_value=0),
138
+ dict(type='SwapInstances', p=0.2),
139
+ dict(
140
+ type='GridSample',
141
+ grid_size=0.02,
142
+ hash_type='fnv',
143
+ mode='train',
144
+ return_grid_coord=True),
145
+ dict(type='SphereCrop', sample_rate=0.6, mode='random'),
146
+ dict(type='CenterShift', apply_z=False),
147
+ dict(type='NormalizeColor'),
148
+ dict(type='ToTensor'),
149
+ dict(
150
+ type='Collect',
151
+ keys=('coord', 'grid_coord', 'segment'),
152
+ feat_keys=('color', 'normal'))
153
+ ],
154
+ test_mode=False,
155
+ loop=8),
156
+ val=dict(
157
+ type='ScanNetDataset',
158
+ split='val',
159
+ data_root='data/scannet',
160
+ transform=[
161
+ dict(type='CenterShift', apply_z=True),
162
+ dict(type='Copy', keys_dict=dict(segment='origin_segment')),
163
+ dict(
164
+ type='GridSample',
165
+ grid_size=0.02,
166
+ hash_type='fnv',
167
+ mode='train',
168
+ return_grid_coord=True,
169
+ return_inverse=True),
170
+ dict(type='CenterShift', apply_z=False),
171
+ dict(type='NormalizeColor'),
172
+ dict(type='ToTensor'),
173
+ dict(
174
+ type='Collect',
175
+ keys=('coord', 'grid_coord', 'segment', 'origin_segment',
176
+ 'inverse'),
177
+ feat_keys=('color', 'normal'))
178
+ ],
179
+ test_mode=False),
180
+ test=dict(
181
+ type='ScanNetDataset',
182
+ split='val',
183
+ data_root='data/scannet',
184
+ transform=[
185
+ dict(type='CenterShift', apply_z=True),
186
+ dict(type='NormalizeColor')
187
+ ],
188
+ test_mode=True,
189
+ test_cfg=dict(
190
+ voxelize=dict(
191
+ type='GridSample',
192
+ grid_size=0.02,
193
+ hash_type='fnv',
194
+ mode='test',
195
+ return_grid_coord=True),
196
+ crop=None,
197
+ post_transform=[
198
+ dict(type='CenterShift', apply_z=False),
199
+ dict(type='ToTensor'),
200
+ dict(
201
+ type='Collect',
202
+ keys=('coord', 'grid_coord', 'index'),
203
+ feat_keys=('color', 'normal'))
204
+ ],
205
+ aug_transform=[[{
206
+ 'type': 'RandomRotateTargetAngle',
207
+ 'angle': [0],
208
+ 'axis': 'z',
209
+ 'center': [0, 0, 0],
210
+ 'p': 1
211
+ }],
212
+ [{
213
+ 'type': 'RandomRotateTargetAngle',
214
+ 'angle': [0.5],
215
+ 'axis': 'z',
216
+ 'center': [0, 0, 0],
217
+ 'p': 1
218
+ }],
219
+ [{
220
+ 'type': 'RandomRotateTargetAngle',
221
+ 'angle': [1],
222
+ 'axis': 'z',
223
+ 'center': [0, 0, 0],
224
+ 'p': 1
225
+ }],
226
+ [{
227
+ 'type': 'RandomRotateTargetAngle',
228
+ 'angle': [1.5],
229
+ 'axis': 'z',
230
+ 'center': [0, 0, 0],
231
+ 'p': 1
232
+ }],
233
+ [{
234
+ 'type': 'RandomRotateTargetAngle',
235
+ 'angle': [0],
236
+ 'axis': 'z',
237
+ 'center': [0, 0, 0],
238
+ 'p': 1
239
+ }, {
240
+ 'type': 'RandomScale',
241
+ 'scale': [0.95, 0.95]
242
+ }],
243
+ [{
244
+ 'type': 'RandomRotateTargetAngle',
245
+ 'angle': [0.5],
246
+ 'axis': 'z',
247
+ 'center': [0, 0, 0],
248
+ 'p': 1
249
+ }, {
250
+ 'type': 'RandomScale',
251
+ 'scale': [0.95, 0.95]
252
+ }],
253
+ [{
254
+ 'type': 'RandomRotateTargetAngle',
255
+ 'angle': [1],
256
+ 'axis': 'z',
257
+ 'center': [0, 0, 0],
258
+ 'p': 1
259
+ }, {
260
+ 'type': 'RandomScale',
261
+ 'scale': [0.95, 0.95]
262
+ }],
263
+ [{
264
+ 'type': 'RandomRotateTargetAngle',
265
+ 'angle': [1.5],
266
+ 'axis': 'z',
267
+ 'center': [0, 0, 0],
268
+ 'p': 1
269
+ }, {
270
+ 'type': 'RandomScale',
271
+ 'scale': [0.95, 0.95]
272
+ }],
273
+ [{
274
+ 'type': 'RandomRotateTargetAngle',
275
+ 'angle': [0],
276
+ 'axis': 'z',
277
+ 'center': [0, 0, 0],
278
+ 'p': 1
279
+ }, {
280
+ 'type': 'RandomScale',
281
+ 'scale': [1.05, 1.05]
282
+ }],
283
+ [{
284
+ 'type': 'RandomRotateTargetAngle',
285
+ 'angle': [0.5],
286
+ 'axis': 'z',
287
+ 'center': [0, 0, 0],
288
+ 'p': 1
289
+ }, {
290
+ 'type': 'RandomScale',
291
+ 'scale': [1.05, 1.05]
292
+ }],
293
+ [{
294
+ 'type': 'RandomRotateTargetAngle',
295
+ 'angle': [1],
296
+ 'axis': 'z',
297
+ 'center': [0, 0, 0],
298
+ 'p': 1
299
+ }, {
300
+ 'type': 'RandomScale',
301
+ 'scale': [1.05, 1.05]
302
+ }],
303
+ [{
304
+ 'type': 'RandomRotateTargetAngle',
305
+ 'angle': [1.5],
306
+ 'axis': 'z',
307
+ 'center': [0, 0, 0],
308
+ 'p': 1
309
+ }, {
310
+ 'type': 'RandomScale',
311
+ 'scale': [1.05, 1.05]
312
+ }], [{
313
+ 'type': 'RandomFlip',
314
+ 'p': 1
315
+ }]])))
Volt_experiments/single_dataset/scannet/model/model_best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:062858262f8f9dcd375b71786d16963e37ba5dc522028f9bf4954e987351054c
3
+ size 379375377
Volt_experiments/single_dataset/scannet/model/model_last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f9d0a9d58b7de88f3e6f64f7b4d9f975bcdf3a51e63b8f2836d02ca02d49faf
3
+ size 379375377
Volt_experiments/single_dataset/scannet/train.log ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1683dd1394c272eda246ed91f2a60da377feebd8d2af908aee3046290a1de47
3
+ size 10735269
Volt_experiments/single_dataset/scannet200/config.py ADDED
@@ -0,0 +1,389 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ weight = None
2
+ resume = False
3
+ evaluate = True
4
+ test_only = False
5
+ seed = 7980693
6
+ save_path = 'exp/scannet200/2026-04-23_205124'
7
+ num_worker = 24
8
+ batch_size = 16
9
+ gradient_accumulation_steps = 1
10
+ batch_size_val = None
11
+ batch_size_test = None
12
+ epoch = 800
13
+ eval_epoch = 100
14
+ clip_grad = None
15
+ use_ema = True
16
+ ema_decay = 0.999
17
+ sync_bn = False
18
+ enable_amp = True
19
+ amp_dtype = 'float16'
20
+ empty_cache = False
21
+ empty_cache_per_epoch = False
22
+ find_unused_parameters = False
23
+ enable_wandb = True
24
+ wandb_project = 'Volt'
25
+ wandb_key = None
26
+ mix_prob = 0.85
27
+ param_dicts = None
28
+ hooks = [
29
+ dict(type='CheckpointLoader'),
30
+ dict(type='ModelHook'),
31
+ dict(type='IterationTimer', warmup_iter=2),
32
+ dict(type='InformationWriter'),
33
+ dict(type='SemSegEvaluator'),
34
+ dict(type='CheckpointSaver', save_freq=None),
35
+ dict(type='PreciseEvaluator', test_last=False)
36
+ ]
37
+ train = dict(type='DefaultTrainer')
38
+ test = dict(type='SemSegTester', verbose=True)
39
+ CLASS_LABELS_200 = (
40
+ 'wall', 'chair', 'floor', 'table', 'door', 'couch', 'cabinet', 'shelf',
41
+ 'desk', 'office chair', 'bed', 'pillow', 'sink', 'picture', 'window',
42
+ 'toilet', 'bookshelf', 'monitor', 'curtain', 'book', 'armchair',
43
+ 'coffee table', 'box', 'refrigerator', 'lamp', 'kitchen cabinet', 'towel',
44
+ 'clothes', 'tv', 'nightstand', 'counter', 'dresser', 'stool', 'cushion',
45
+ 'plant', 'ceiling', 'bathtub', 'end table', 'dining table', 'keyboard',
46
+ 'bag', 'backpack', 'toilet paper', 'printer', 'tv stand', 'whiteboard',
47
+ 'blanket', 'shower curtain', 'trash can', 'closet', 'stairs', 'microwave',
48
+ 'stove', 'shoe', 'computer tower', 'bottle', 'bin', 'ottoman', 'bench',
49
+ 'board', 'washing machine', 'mirror', 'copier', 'basket', 'sofa chair',
50
+ 'file cabinet', 'fan', 'laptop', 'shower', 'paper', 'person',
51
+ 'paper towel dispenser', 'oven', 'blinds', 'rack', 'plate', 'blackboard',
52
+ 'piano', 'suitcase', 'rail', 'radiator', 'recycling bin', 'container',
53
+ 'wardrobe', 'soap dispenser', 'telephone', 'bucket', 'clock', 'stand',
54
+ 'light', 'laundry basket', 'pipe', 'clothes dryer', 'guitar',
55
+ 'toilet paper holder', 'seat', 'speaker', 'column', 'bicycle', 'ladder',
56
+ 'bathroom stall', 'shower wall', 'cup', 'jacket', 'storage bin',
57
+ 'coffee maker', 'dishwasher', 'paper towel roll', 'machine', 'mat',
58
+ 'windowsill', 'bar', 'toaster', 'bulletin board', 'ironing board',
59
+ 'fireplace', 'soap dish', 'kitchen counter', 'doorframe',
60
+ 'toilet paper dispenser', 'mini fridge', 'fire extinguisher', 'ball',
61
+ 'hat', 'shower curtain rod', 'water cooler', 'paper cutter', 'tray',
62
+ 'shower door', 'pillar', 'ledge', 'toaster oven', 'mouse',
63
+ 'toilet seat cover dispenser', 'furniture', 'cart', 'storage container',
64
+ 'scale', 'tissue box', 'light switch', 'crate', 'power outlet',
65
+ 'decoration', 'sign', 'projector', 'closet door', 'vacuum cleaner',
66
+ 'candle', 'plunger', 'stuffed animal', 'headphones', 'dish rack', 'broom',
67
+ 'guitar case', 'range hood', 'dustpan', 'hair dryer', 'water bottle',
68
+ 'handicap bar', 'purse', 'vent', 'shower floor', 'water pitcher',
69
+ 'mailbox', 'bowl', 'paper bag', 'alarm clock', 'music stand',
70
+ 'projector screen', 'divider', 'laundry detergent', 'bathroom counter',
71
+ 'object', 'bathroom vanity', 'closet wall', 'laundry hamper',
72
+ 'bathroom stall door', 'ceiling light', 'trash bin', 'dumbbell',
73
+ 'stair rail', 'tube', 'bathroom cabinet', 'cd case', 'closet rod',
74
+ 'coffee kettle', 'structure', 'shower head', 'keyboard piano',
75
+ 'case of water bottles', 'coat rack', 'storage organizer', 'folded chair',
76
+ 'fire alarm', 'power strip', 'calendar', 'poster', 'potted plant',
77
+ 'luggage', 'mattress')
78
+ model = dict(
79
+ type='DefaultSegmentorV2',
80
+ num_classes=200,
81
+ backbone_out_channels=128,
82
+ backbone=dict(
83
+ type='Volt',
84
+ in_channels=6,
85
+ embed_dim=384,
86
+ depth=12,
87
+ num_heads=6,
88
+ mlp_ratio=4,
89
+ init_values=None,
90
+ qk_norm=True,
91
+ drop_path=0.3,
92
+ stride=5,
93
+ kernel_size=5,
94
+ increase_drop_path=True,
95
+ up_mlp_dim=128),
96
+ teacher=dict(
97
+ type='DefaultSegmentor',
98
+ backbone=dict(
99
+ type='SpUNet-v1m1',
100
+ in_channels=6,
101
+ num_classes=200,
102
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
103
+ layers=(2, 3, 4, 6, 2, 2, 2, 2))),
104
+ teacher_weights='weights/teacher_weights/scannet200_unet_teacher.pth',
105
+ criteria=[
106
+ dict(
107
+ type='CrossEntropyLoss',
108
+ loss_weight=1.0,
109
+ label_smoothing=0.1,
110
+ ignore_index=-1),
111
+ dict(
112
+ type='LovaszLoss',
113
+ mode='multiclass',
114
+ loss_weight=1.0,
115
+ ignore_index=-1)
116
+ ])
117
+ optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.05)
118
+ scheduler = dict(
119
+ type='OneCycleLR',
120
+ max_lr=0.001,
121
+ pct_start=0.05,
122
+ anneal_strategy='cos',
123
+ div_factor=10.0,
124
+ final_div_factor=1000.0)
125
+ dataset_type = 'ScanNet200Dataset'
126
+ data_root = 'data/scannet'
127
+ data = dict(
128
+ num_classes=200,
129
+ ignore_index=-1,
130
+ names=(
131
+ 'wall', 'chair', 'floor', 'table', 'door', 'couch', 'cabinet', 'shelf',
132
+ 'desk', 'office chair', 'bed', 'pillow', 'sink', 'picture', 'window',
133
+ 'toilet', 'bookshelf', 'monitor', 'curtain', 'book', 'armchair',
134
+ 'coffee table', 'box', 'refrigerator', 'lamp', 'kitchen cabinet',
135
+ 'towel', 'clothes', 'tv', 'nightstand', 'counter', 'dresser', 'stool',
136
+ 'cushion', 'plant', 'ceiling', 'bathtub', 'end table', 'dining table',
137
+ 'keyboard', 'bag', 'backpack', 'toilet paper', 'printer', 'tv stand',
138
+ 'whiteboard', 'blanket', 'shower curtain', 'trash can', 'closet',
139
+ 'stairs', 'microwave', 'stove', 'shoe', 'computer tower', 'bottle',
140
+ 'bin', 'ottoman', 'bench', 'board', 'washing machine', 'mirror',
141
+ 'copier', 'basket', 'sofa chair', 'file cabinet', 'fan', 'laptop',
142
+ 'shower', 'paper', 'person', 'paper towel dispenser', 'oven', 'blinds',
143
+ 'rack', 'plate', 'blackboard', 'piano', 'suitcase', 'rail', 'radiator',
144
+ 'recycling bin', 'container', 'wardrobe', 'soap dispenser',
145
+ 'telephone', 'bucket', 'clock', 'stand', 'light', 'laundry basket',
146
+ 'pipe', 'clothes dryer', 'guitar', 'toilet paper holder', 'seat',
147
+ 'speaker', 'column', 'bicycle', 'ladder', 'bathroom stall',
148
+ 'shower wall', 'cup', 'jacket', 'storage bin', 'coffee maker',
149
+ 'dishwasher', 'paper towel roll', 'machine', 'mat', 'windowsill',
150
+ 'bar', 'toaster', 'bulletin board', 'ironing board', 'fireplace',
151
+ 'soap dish', 'kitchen counter', 'doorframe', 'toilet paper dispenser',
152
+ 'mini fridge', 'fire extinguisher', 'ball', 'hat',
153
+ 'shower curtain rod', 'water cooler', 'paper cutter', 'tray',
154
+ 'shower door', 'pillar', 'ledge', 'toaster oven', 'mouse',
155
+ 'toilet seat cover dispenser', 'furniture', 'cart',
156
+ 'storage container', 'scale', 'tissue box', 'light switch', 'crate',
157
+ 'power outlet', 'decoration', 'sign', 'projector', 'closet door',
158
+ 'vacuum cleaner', 'candle', 'plunger', 'stuffed animal', 'headphones',
159
+ 'dish rack', 'broom', 'guitar case', 'range hood', 'dustpan',
160
+ 'hair dryer', 'water bottle', 'handicap bar', 'purse', 'vent',
161
+ 'shower floor', 'water pitcher', 'mailbox', 'bowl', 'paper bag',
162
+ 'alarm clock', 'music stand', 'projector screen', 'divider',
163
+ 'laundry detergent', 'bathroom counter', 'object', 'bathroom vanity',
164
+ 'closet wall', 'laundry hamper', 'bathroom stall door',
165
+ 'ceiling light', 'trash bin', 'dumbbell', 'stair rail', 'tube',
166
+ 'bathroom cabinet', 'cd case', 'closet rod', 'coffee kettle',
167
+ 'structure', 'shower head', 'keyboard piano', 'case of water bottles',
168
+ 'coat rack', 'storage organizer', 'folded chair', 'fire alarm',
169
+ 'power strip', 'calendar', 'poster', 'potted plant', 'luggage',
170
+ 'mattress'),
171
+ train=dict(
172
+ type='ScanNet200Dataset',
173
+ split='train',
174
+ data_root='data/scannet',
175
+ transform=[
176
+ dict(type='CenterShift', apply_z=True),
177
+ dict(
178
+ type='RandomDropout',
179
+ dropout_ratio=0.2,
180
+ dropout_application_ratio=0.2),
181
+ dict(
182
+ type='RandomRotate',
183
+ angle=[-1, 1],
184
+ axis='z',
185
+ center=[0, 0, 0],
186
+ p=0.5),
187
+ dict(
188
+ type='RandomRotate',
189
+ angle=[-0.015625, 0.015625],
190
+ axis='x',
191
+ p=0.5),
192
+ dict(
193
+ type='RandomRotate',
194
+ angle=[-0.015625, 0.015625],
195
+ axis='y',
196
+ p=0.5),
197
+ dict(type='RandomScale', scale=[0.9, 1.1]),
198
+ dict(type='RandomFlip', p=0.5),
199
+ dict(type='RandomJitter', sigma=0.005, clip=0.02),
200
+ dict(
201
+ type='ElasticDistortion',
202
+ distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
203
+ dict(type='ChromaticAutoContrast', p=0.2, blend_factor=None),
204
+ dict(type='ChromaticTranslation', p=0.95, ratio=0.05),
205
+ dict(type='ChromaticJitter', p=0.95, std=0.05),
206
+ dict(type='InstanceShift', p=0.2, shift_range=[0.1, 0.1, 0.1]),
207
+ dict(type='InstanceRotate', p=0.2, axis='z', angle=[-0.25, 0.25]),
208
+ dict(type='InstanceFlip', p=0.2, flip_prob=0.5),
209
+ dict(type='InstanceScale', p=0.2, scale=[0.9, 1.1]),
210
+ dict(type='InstanceDropOut', p=0.1, drop_ratio=0.5),
211
+ dict(type='InstanceColorDropout', p=0.2, drop_value=0),
212
+ dict(type='SwapInstances', p=0.2),
213
+ dict(
214
+ type='GridSample',
215
+ grid_size=0.02,
216
+ hash_type='fnv',
217
+ mode='train',
218
+ return_grid_coord=True),
219
+ dict(type='SphereCrop', sample_rate=0.6, mode='random'),
220
+ dict(type='CenterShift', apply_z=False),
221
+ dict(type='NormalizeColor'),
222
+ dict(type='ToTensor'),
223
+ dict(
224
+ type='Collect',
225
+ keys=('coord', 'grid_coord', 'segment'),
226
+ feat_keys=('color', 'normal'))
227
+ ],
228
+ test_mode=False,
229
+ loop=8),
230
+ val=dict(
231
+ type='ScanNet200Dataset',
232
+ split='val',
233
+ data_root='data/scannet',
234
+ transform=[
235
+ dict(type='CenterShift', apply_z=True),
236
+ dict(type='Copy', keys_dict=dict(segment='origin_segment')),
237
+ dict(
238
+ type='GridSample',
239
+ grid_size=0.02,
240
+ hash_type='fnv',
241
+ mode='train',
242
+ return_grid_coord=True,
243
+ return_inverse=True),
244
+ dict(type='CenterShift', apply_z=False),
245
+ dict(type='NormalizeColor'),
246
+ dict(type='ToTensor'),
247
+ dict(
248
+ type='Collect',
249
+ keys=('coord', 'grid_coord', 'segment', 'origin_segment',
250
+ 'inverse'),
251
+ feat_keys=('color', 'normal'))
252
+ ],
253
+ test_mode=False),
254
+ test=dict(
255
+ type='ScanNet200Dataset',
256
+ split='val',
257
+ data_root='data/scannet',
258
+ transform=[
259
+ dict(type='CenterShift', apply_z=True),
260
+ dict(type='NormalizeColor')
261
+ ],
262
+ test_mode=True,
263
+ test_cfg=dict(
264
+ voxelize=dict(
265
+ type='GridSample',
266
+ grid_size=0.02,
267
+ hash_type='fnv',
268
+ mode='test',
269
+ return_grid_coord=True),
270
+ crop=None,
271
+ post_transform=[
272
+ dict(type='CenterShift', apply_z=False),
273
+ dict(type='ToTensor'),
274
+ dict(
275
+ type='Collect',
276
+ keys=('coord', 'grid_coord', 'index'),
277
+ feat_keys=('color', 'normal'))
278
+ ],
279
+ aug_transform=[[{
280
+ 'type': 'RandomRotateTargetAngle',
281
+ 'angle': [0],
282
+ 'axis': 'z',
283
+ 'center': [0, 0, 0],
284
+ 'p': 1
285
+ }],
286
+ [{
287
+ 'type': 'RandomRotateTargetAngle',
288
+ 'angle': [0.5],
289
+ 'axis': 'z',
290
+ 'center': [0, 0, 0],
291
+ 'p': 1
292
+ }],
293
+ [{
294
+ 'type': 'RandomRotateTargetAngle',
295
+ 'angle': [1],
296
+ 'axis': 'z',
297
+ 'center': [0, 0, 0],
298
+ 'p': 1
299
+ }],
300
+ [{
301
+ 'type': 'RandomRotateTargetAngle',
302
+ 'angle': [1.5],
303
+ 'axis': 'z',
304
+ 'center': [0, 0, 0],
305
+ 'p': 1
306
+ }],
307
+ [{
308
+ 'type': 'RandomRotateTargetAngle',
309
+ 'angle': [0],
310
+ 'axis': 'z',
311
+ 'center': [0, 0, 0],
312
+ 'p': 1
313
+ }, {
314
+ 'type': 'RandomScale',
315
+ 'scale': [0.95, 0.95]
316
+ }],
317
+ [{
318
+ 'type': 'RandomRotateTargetAngle',
319
+ 'angle': [0.5],
320
+ 'axis': 'z',
321
+ 'center': [0, 0, 0],
322
+ 'p': 1
323
+ }, {
324
+ 'type': 'RandomScale',
325
+ 'scale': [0.95, 0.95]
326
+ }],
327
+ [{
328
+ 'type': 'RandomRotateTargetAngle',
329
+ 'angle': [1],
330
+ 'axis': 'z',
331
+ 'center': [0, 0, 0],
332
+ 'p': 1
333
+ }, {
334
+ 'type': 'RandomScale',
335
+ 'scale': [0.95, 0.95]
336
+ }],
337
+ [{
338
+ 'type': 'RandomRotateTargetAngle',
339
+ 'angle': [1.5],
340
+ 'axis': 'z',
341
+ 'center': [0, 0, 0],
342
+ 'p': 1
343
+ }, {
344
+ 'type': 'RandomScale',
345
+ 'scale': [0.95, 0.95]
346
+ }],
347
+ [{
348
+ 'type': 'RandomRotateTargetAngle',
349
+ 'angle': [0],
350
+ 'axis': 'z',
351
+ 'center': [0, 0, 0],
352
+ 'p': 1
353
+ }, {
354
+ 'type': 'RandomScale',
355
+ 'scale': [1.05, 1.05]
356
+ }],
357
+ [{
358
+ 'type': 'RandomRotateTargetAngle',
359
+ 'angle': [0.5],
360
+ 'axis': 'z',
361
+ 'center': [0, 0, 0],
362
+ 'p': 1
363
+ }, {
364
+ 'type': 'RandomScale',
365
+ 'scale': [1.05, 1.05]
366
+ }],
367
+ [{
368
+ 'type': 'RandomRotateTargetAngle',
369
+ 'angle': [1],
370
+ 'axis': 'z',
371
+ 'center': [0, 0, 0],
372
+ 'p': 1
373
+ }, {
374
+ 'type': 'RandomScale',
375
+ 'scale': [1.05, 1.05]
376
+ }],
377
+ [{
378
+ 'type': 'RandomRotateTargetAngle',
379
+ 'angle': [1.5],
380
+ 'axis': 'z',
381
+ 'center': [0, 0, 0],
382
+ 'p': 1
383
+ }, {
384
+ 'type': 'RandomScale',
385
+ 'scale': [1.05, 1.05]
386
+ }], [{
387
+ 'type': 'RandomFlip',
388
+ 'p': 1
389
+ }]])))
Volt_experiments/single_dataset/scannet200/model/model_best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f8c308b72ff776e2277dd4083ce3e489d20c044341ecac312169a154f352e65
3
+ size 380118289
Volt_experiments/single_dataset/scannet200/model/model_last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3280f9c0d6f2ef4933719ca8216701825d06333de6b5d7b6af136654fcc3fe77
3
+ size 380118289
Volt_experiments/single_dataset/scannet200/train.log ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5be54d0f583b1c6510e96ec06e41192c9d055146635b2f41e884940d00dc3b0
3
+ size 12823628
Volt_experiments/single_dataset/scannetpp/config.py ADDED
@@ -0,0 +1,354 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ weight = None
2
+ resume = False
3
+ evaluate = True
4
+ test_only = False
5
+ seed = 20286765
6
+ save_path = 'exp/scannetpp/2026-04-22_112552'
7
+ num_worker = 24
8
+ batch_size = 16
9
+ gradient_accumulation_steps = 1
10
+ batch_size_val = None
11
+ batch_size_test = None
12
+ epoch = 800
13
+ eval_epoch = 100
14
+ clip_grad = None
15
+ use_ema = True
16
+ ema_decay = 0.999
17
+ sync_bn = False
18
+ enable_amp = True
19
+ amp_dtype = 'float16'
20
+ empty_cache = False
21
+ empty_cache_per_epoch = False
22
+ find_unused_parameters = False
23
+ enable_wandb = True
24
+ wandb_project = 'Volt'
25
+ wandb_key = None
26
+ mix_prob = 0.85
27
+ param_dicts = None
28
+ hooks = [
29
+ dict(type='CheckpointLoader'),
30
+ dict(type='ModelHook'),
31
+ dict(type='IterationTimer', warmup_iter=2),
32
+ dict(type='InformationWriter'),
33
+ dict(type='SemSegEvaluator'),
34
+ dict(type='CheckpointSaver', save_freq=None),
35
+ dict(type='PreciseEvaluator', test_last=False)
36
+ ]
37
+ train = dict(type='DefaultTrainer')
38
+ test = dict(type='SemSegTester', verbose=True)
39
+ class_names = [
40
+ 'wall', 'ceiling', 'floor', 'table', 'door', 'ceiling lamp', 'cabinet',
41
+ 'blinds', 'curtain', 'chair', 'storage cabinet', 'office chair',
42
+ 'bookshelf', 'whiteboard', 'window', 'box', 'window frame', 'monitor',
43
+ 'shelf', 'doorframe', 'pipe', 'heater', 'kitchen cabinet', 'sofa',
44
+ 'windowsill', 'bed', 'shower wall', 'trash can', 'book', 'plant',
45
+ 'blanket', 'tv', 'computer tower', 'kitchen counter', 'refrigerator',
46
+ 'jacket', 'electrical duct', 'sink', 'bag', 'picture', 'pillow', 'towel',
47
+ 'suitcase', 'backpack', 'crate', 'keyboard', 'rack', 'toilet', 'paper',
48
+ 'printer', 'poster', 'painting', 'microwave', 'board', 'shoes', 'socket',
49
+ 'bottle', 'bucket', 'cushion', 'basket', 'shoe rack', 'telephone',
50
+ 'file folder', 'cloth', 'blind rail', 'laptop', 'plant pot', 'exhaust fan',
51
+ 'cup', 'coat hanger', 'light switch', 'speaker', 'table lamp', 'air vent',
52
+ 'clothes hanger', 'kettle', 'smoke detector', 'container', 'power strip',
53
+ 'slippers', 'paper bag', 'mouse', 'cutting board', 'toilet paper',
54
+ 'paper towel', 'pot', 'clock', 'pan', 'tap', 'jar', 'soap dispenser',
55
+ 'binder', 'bowl', 'tissue box', 'whiteboard eraser', 'toilet brush',
56
+ 'spray bottle', 'headphones', 'stapler', 'marker'
57
+ ]
58
+ data = dict(
59
+ names=[
60
+ 'wall', 'ceiling', 'floor', 'table', 'door', 'ceiling lamp', 'cabinet',
61
+ 'blinds', 'curtain', 'chair', 'storage cabinet', 'office chair',
62
+ 'bookshelf', 'whiteboard', 'window', 'box', 'window frame', 'monitor',
63
+ 'shelf', 'doorframe', 'pipe', 'heater', 'kitchen cabinet', 'sofa',
64
+ 'windowsill', 'bed', 'shower wall', 'trash can', 'book', 'plant',
65
+ 'blanket', 'tv', 'computer tower', 'kitchen counter', 'refrigerator',
66
+ 'jacket', 'electrical duct', 'sink', 'bag', 'picture', 'pillow',
67
+ 'towel', 'suitcase', 'backpack', 'crate', 'keyboard', 'rack', 'toilet',
68
+ 'paper', 'printer', 'poster', 'painting', 'microwave', 'board',
69
+ 'shoes', 'socket', 'bottle', 'bucket', 'cushion', 'basket',
70
+ 'shoe rack', 'telephone', 'file folder', 'cloth', 'blind rail',
71
+ 'laptop', 'plant pot', 'exhaust fan', 'cup', 'coat hanger',
72
+ 'light switch', 'speaker', 'table lamp', 'air vent', 'clothes hanger',
73
+ 'kettle', 'smoke detector', 'container', 'power strip', 'slippers',
74
+ 'paper bag', 'mouse', 'cutting board', 'toilet paper', 'paper towel',
75
+ 'pot', 'clock', 'pan', 'tap', 'jar', 'soap dispenser', 'binder',
76
+ 'bowl', 'tissue box', 'whiteboard eraser', 'toilet brush',
77
+ 'spray bottle', 'headphones', 'stapler', 'marker'
78
+ ],
79
+ num_classes=100,
80
+ ignore_index=-1,
81
+ train=dict(
82
+ type='ScanNetPPDataset',
83
+ split='train',
84
+ data_root='data/scannetpp',
85
+ transform=[
86
+ dict(type='SphereCrop', point_max=1000000, mode='random'),
87
+ dict(type='CenterShift', apply_z=True),
88
+ dict(
89
+ type='RandomDropout',
90
+ dropout_ratio=0.2,
91
+ dropout_application_ratio=0.2),
92
+ dict(
93
+ type='RandomRotate',
94
+ angle=[-1, 1],
95
+ axis='z',
96
+ center=[0, 0, 0],
97
+ p=0.5),
98
+ dict(
99
+ type='RandomRotate',
100
+ angle=[-0.015625, 0.015625],
101
+ axis='x',
102
+ p=0.5),
103
+ dict(
104
+ type='RandomRotate',
105
+ angle=[-0.015625, 0.015625],
106
+ axis='y',
107
+ p=0.5),
108
+ dict(type='RandomScale', scale=[0.9, 1.1]),
109
+ dict(type='RandomFlip', p=0.5),
110
+ dict(type='RandomJitter', sigma=0.005, clip=0.02),
111
+ dict(type='ChromaticAutoContrast', p=0.2, blend_factor=None),
112
+ dict(type='ChromaticTranslation', p=0.95, ratio=0.05),
113
+ dict(type='ChromaticJitter', p=0.95, std=0.05),
114
+ dict(type='InstanceShift', p=0.2, shift_range=[0.1, 0.1, 0.1]),
115
+ dict(type='InstanceRotate', p=0.2, axis='z', angle=[-0.25, 0.25]),
116
+ dict(type='InstanceFlip', p=0.2, flip_prob=0.5),
117
+ dict(type='InstanceScale', p=0.2, scale=[0.9, 1.1]),
118
+ dict(type='InstanceDropOut', p=0.1, drop_ratio=0.5),
119
+ dict(type='InstanceColorDropout', p=0.2, drop_value=0),
120
+ dict(type='SwapInstances', p=0.2),
121
+ dict(
122
+ type='GridSample',
123
+ grid_size=0.02,
124
+ hash_type='fnv',
125
+ mode='train',
126
+ return_grid_coord=True),
127
+ dict(type='SphereCrop', sample_rate=0.6, mode='random'),
128
+ dict(type='SphereCrop', point_max=204800, mode='random'),
129
+ dict(type='CenterShift', apply_z=False),
130
+ dict(type='NormalizeColor'),
131
+ dict(type='ToTensor'),
132
+ dict(
133
+ type='Collect',
134
+ keys=('coord', 'grid_coord', 'segment'),
135
+ feat_keys=('color', 'normal'))
136
+ ],
137
+ test_mode=False,
138
+ loop=8),
139
+ val=dict(
140
+ type='ScanNetPPDataset',
141
+ split='val',
142
+ data_root='data/scannetpp',
143
+ transform=[
144
+ dict(type='CenterShift', apply_z=True),
145
+ dict(type='Copy', keys_dict=dict(segment='origin_segment')),
146
+ dict(
147
+ type='GridSample',
148
+ grid_size=0.02,
149
+ hash_type='fnv',
150
+ mode='train',
151
+ return_grid_coord=True,
152
+ return_inverse=True),
153
+ dict(type='CenterShift', apply_z=False),
154
+ dict(type='NormalizeColor'),
155
+ dict(type='ToTensor'),
156
+ dict(
157
+ type='Collect',
158
+ keys=('coord', 'grid_coord', 'segment', 'origin_segment',
159
+ 'inverse'),
160
+ feat_keys=('color', 'normal'))
161
+ ],
162
+ test_mode=False),
163
+ test=dict(
164
+ type='ScanNetPPDataset',
165
+ split='val',
166
+ data_root='data/scannetpp',
167
+ transform=[
168
+ dict(type='CenterShift', apply_z=True),
169
+ dict(type='NormalizeColor'),
170
+ dict(type='Copy', keys_dict=dict(segment='origin_segment')),
171
+ dict(
172
+ type='GridSample',
173
+ grid_size=0.01,
174
+ hash_type='fnv',
175
+ mode='train',
176
+ return_inverse=True)
177
+ ],
178
+ test_mode=True,
179
+ test_cfg=dict(
180
+ voxelize=dict(
181
+ type='GridSample',
182
+ grid_size=0.02,
183
+ hash_type='fnv',
184
+ mode='test',
185
+ return_grid_coord=True),
186
+ crop=None,
187
+ post_transform=[
188
+ dict(type='CenterShift', apply_z=False),
189
+ dict(type='ToTensor'),
190
+ dict(
191
+ type='Collect',
192
+ keys=('coord', 'grid_coord', 'index'),
193
+ feat_keys=('color', 'normal'))
194
+ ],
195
+ aug_transform=[[{
196
+ 'type': 'RandomRotateTargetAngle',
197
+ 'angle': [0],
198
+ 'axis': 'z',
199
+ 'center': [0, 0, 0],
200
+ 'p': 1
201
+ }],
202
+ [{
203
+ 'type': 'RandomRotateTargetAngle',
204
+ 'angle': [0.5],
205
+ 'axis': 'z',
206
+ 'center': [0, 0, 0],
207
+ 'p': 1
208
+ }],
209
+ [{
210
+ 'type': 'RandomRotateTargetAngle',
211
+ 'angle': [1],
212
+ 'axis': 'z',
213
+ 'center': [0, 0, 0],
214
+ 'p': 1
215
+ }],
216
+ [{
217
+ 'type': 'RandomRotateTargetAngle',
218
+ 'angle': [1.5],
219
+ 'axis': 'z',
220
+ 'center': [0, 0, 0],
221
+ 'p': 1
222
+ }],
223
+ [{
224
+ 'type': 'RandomRotateTargetAngle',
225
+ 'angle': [0],
226
+ 'axis': 'z',
227
+ 'center': [0, 0, 0],
228
+ 'p': 1
229
+ }, {
230
+ 'type': 'RandomScale',
231
+ 'scale': [0.95, 0.95]
232
+ }],
233
+ [{
234
+ 'type': 'RandomRotateTargetAngle',
235
+ 'angle': [0.5],
236
+ 'axis': 'z',
237
+ 'center': [0, 0, 0],
238
+ 'p': 1
239
+ }, {
240
+ 'type': 'RandomScale',
241
+ 'scale': [0.95, 0.95]
242
+ }],
243
+ [{
244
+ 'type': 'RandomRotateTargetAngle',
245
+ 'angle': [1],
246
+ 'axis': 'z',
247
+ 'center': [0, 0, 0],
248
+ 'p': 1
249
+ }, {
250
+ 'type': 'RandomScale',
251
+ 'scale': [0.95, 0.95]
252
+ }],
253
+ [{
254
+ 'type': 'RandomRotateTargetAngle',
255
+ 'angle': [1.5],
256
+ 'axis': 'z',
257
+ 'center': [0, 0, 0],
258
+ 'p': 1
259
+ }, {
260
+ 'type': 'RandomScale',
261
+ 'scale': [0.95, 0.95]
262
+ }],
263
+ [{
264
+ 'type': 'RandomRotateTargetAngle',
265
+ 'angle': [0],
266
+ 'axis': 'z',
267
+ 'center': [0, 0, 0],
268
+ 'p': 1
269
+ }, {
270
+ 'type': 'RandomScale',
271
+ 'scale': [1.05, 1.05]
272
+ }],
273
+ [{
274
+ 'type': 'RandomRotateTargetAngle',
275
+ 'angle': [0.5],
276
+ 'axis': 'z',
277
+ 'center': [0, 0, 0],
278
+ 'p': 1
279
+ }, {
280
+ 'type': 'RandomScale',
281
+ 'scale': [1.05, 1.05]
282
+ }],
283
+ [{
284
+ 'type': 'RandomRotateTargetAngle',
285
+ 'angle': [1],
286
+ 'axis': 'z',
287
+ 'center': [0, 0, 0],
288
+ 'p': 1
289
+ }, {
290
+ 'type': 'RandomScale',
291
+ 'scale': [1.05, 1.05]
292
+ }],
293
+ [{
294
+ 'type': 'RandomRotateTargetAngle',
295
+ 'angle': [1.5],
296
+ 'axis': 'z',
297
+ 'center': [0, 0, 0],
298
+ 'p': 1
299
+ }, {
300
+ 'type': 'RandomScale',
301
+ 'scale': [1.05, 1.05]
302
+ }], [{
303
+ 'type': 'RandomFlip',
304
+ 'p': 1
305
+ }]])))
306
+ model = dict(
307
+ type='DefaultSegmentorV2',
308
+ num_classes=100,
309
+ backbone_out_channels=128,
310
+ backbone=dict(
311
+ type='Volt',
312
+ in_channels=6,
313
+ embed_dim=384,
314
+ depth=12,
315
+ num_heads=6,
316
+ mlp_ratio=4,
317
+ init_values=None,
318
+ qk_norm=True,
319
+ drop_path=0.3,
320
+ stride=5,
321
+ kernel_size=5,
322
+ increase_drop_path=True,
323
+ up_mlp_dim=128),
324
+ teacher=dict(
325
+ type='DefaultSegmentor',
326
+ backbone=dict(
327
+ type='SpUNet-v1m1',
328
+ in_channels=6,
329
+ num_classes=100,
330
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
331
+ layers=(2, 3, 4, 6, 2, 2, 2, 2))),
332
+ teacher_weights='weights/scannetpp_unet_teacher.pth',
333
+ criteria=[
334
+ dict(
335
+ type='CrossEntropyLoss',
336
+ loss_weight=1.0,
337
+ label_smoothing=0.1,
338
+ ignore_index=-1),
339
+ dict(
340
+ type='LovaszLoss',
341
+ mode='multiclass',
342
+ loss_weight=1.0,
343
+ ignore_index=-1)
344
+ ])
345
+ optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.05)
346
+ scheduler = dict(
347
+ type='OneCycleLR',
348
+ max_lr=0.001,
349
+ pct_start=0.05,
350
+ anneal_strategy='cos',
351
+ div_factor=10.0,
352
+ final_div_factor=1000.0)
353
+ dataset_type = 'ScanNetPPDataset'
354
+ data_root = 'data/scannetpp'
Volt_experiments/single_dataset/scannetpp/model/model_best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0da7d6f4d167c6fe507f615714705663d37c22323382d1b98a3b7f56561d27c7
3
+ size 379705617
Volt_experiments/single_dataset/scannetpp/model/model_last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c26943a9888c67420443d79edcb42e12daef2ea3aed7fb9b93b959b759481a87
3
+ size 379705617
Volt_experiments/single_dataset/scannetpp/train.log ADDED
The diff for this file is too large to render. See raw diff
 
Volt_experiments/single_dataset/semantic_kitti/config.py ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ weight = None
2
+ resume = False
3
+ evaluate = True
4
+ test_only = False
5
+ seed = 34972726
6
+ save_path = 'exp/semantic_kitti/2026-04-24_202427'
7
+ num_worker = 24
8
+ batch_size = 16
9
+ gradient_accumulation_steps = 1
10
+ batch_size_val = None
11
+ batch_size_test = None
12
+ epoch = 20
13
+ eval_epoch = 20
14
+ clip_grad = None
15
+ use_ema = True
16
+ ema_decay = 0.999
17
+ sync_bn = False
18
+ enable_amp = True
19
+ amp_dtype = 'float16'
20
+ empty_cache = False
21
+ empty_cache_per_epoch = False
22
+ find_unused_parameters = False
23
+ enable_wandb = True
24
+ wandb_project = 'Volt'
25
+ wandb_key = None
26
+ mix_prob = 0.85
27
+ param_dicts = None
28
+ hooks = [
29
+ dict(type='CheckpointLoader'),
30
+ dict(type='ModelHook'),
31
+ dict(type='IterationTimer', warmup_iter=2),
32
+ dict(type='InformationWriter'),
33
+ dict(type='SemSegEvaluator'),
34
+ dict(type='CheckpointSaver', save_freq=None),
35
+ dict(type='PreciseEvaluator', test_last=False)
36
+ ]
37
+ train = dict(type='DefaultTrainer')
38
+ test = dict(type='SemSegTester', verbose=True)
39
+ model = dict(
40
+ type='DefaultSegmentorV2',
41
+ num_classes=19,
42
+ backbone_out_channels=128,
43
+ backbone=dict(
44
+ type='Volt',
45
+ in_channels=4,
46
+ embed_dim=384,
47
+ depth=12,
48
+ num_heads=6,
49
+ mlp_ratio=4,
50
+ init_values=None,
51
+ qk_norm=True,
52
+ drop_path=0.3,
53
+ stride=5,
54
+ kernel_size=5,
55
+ increase_drop_path=True,
56
+ up_mlp_dim=128),
57
+ teacher=dict(
58
+ type='DefaultSegmentor',
59
+ backbone=dict(
60
+ type='SpUNet-v1m1',
61
+ in_channels=4,
62
+ num_classes=19,
63
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
64
+ layers=(2, 3, 4, 6, 2, 2, 2, 2))),
65
+ teacher_weights='weights/teacher_weights/semantic_kitti_unet_teacher.pth',
66
+ criteria=[
67
+ dict(
68
+ type='CrossEntropyLoss',
69
+ weight=[
70
+ 3.1557, 8.7029, 7.8281, 6.1354, 6.3161, 7.9937, 8.9704,
71
+ 10.1922, 1.6155, 4.2187, 1.9385, 5.5455, 2.0198, 2.6261,
72
+ 1.3212, 5.1102, 2.5492, 5.8585, 7.3929
73
+ ],
74
+ loss_weight=1.0,
75
+ label_smoothing=0.1,
76
+ ignore_index=-1),
77
+ dict(
78
+ type='LovaszLoss',
79
+ mode='multiclass',
80
+ loss_weight=1.0,
81
+ ignore_index=-1)
82
+ ])
83
+ optimizer = dict(type='AdamW', lr=0.002, weight_decay=0.05)
84
+ scheduler = dict(
85
+ type='OneCycleLR',
86
+ max_lr=0.002,
87
+ pct_start=0.04,
88
+ anneal_strategy='cos',
89
+ div_factor=10.0,
90
+ final_div_factor=100.0)
91
+ dataset_type = 'SemanticKITTIDataset'
92
+ data_root = 'data/semantic_kitti'
93
+ ignore_index = -1
94
+ names = [
95
+ 'car', 'bicycle', 'motorcycle', 'truck', 'other-vehicle', 'person',
96
+ 'bicyclist', 'motorcyclist', 'road', 'parking', 'sidewalk', 'other-ground',
97
+ 'building', 'fence', 'vegetation', 'trunk', 'terrain', 'pole',
98
+ 'traffic-sign'
99
+ ]
100
+ data = dict(
101
+ num_classes=19,
102
+ ignore_index=-1,
103
+ names=[
104
+ 'car', 'bicycle', 'motorcycle', 'truck', 'other-vehicle', 'person',
105
+ 'bicyclist', 'motorcyclist', 'road', 'parking', 'sidewalk',
106
+ 'other-ground', 'building', 'fence', 'vegetation', 'trunk', 'terrain',
107
+ 'pole', 'traffic-sign'
108
+ ],
109
+ train=dict(
110
+ type='SemanticKITTIDataset',
111
+ split='train',
112
+ data_root='data/semantic_kitti',
113
+ transform=[
114
+ dict(
115
+ type='RandomDropout',
116
+ dropout_ratio=0.2,
117
+ dropout_application_ratio=0.2),
118
+ dict(
119
+ type='InstanceCutMix',
120
+ db_path='data/semantic_kitti_instances/train.h5'),
121
+ dict(
122
+ type='RandomRotate',
123
+ angle=[-1, 1],
124
+ axis='z',
125
+ center=[0, 0, 0],
126
+ p=0.5),
127
+ dict(
128
+ type='RandomRotate',
129
+ angle=[-0.015625, 0.015625],
130
+ axis='x',
131
+ p=0.5),
132
+ dict(
133
+ type='RandomRotate',
134
+ angle=[-0.015625, 0.015625],
135
+ axis='y',
136
+ p=0.5),
137
+ dict(
138
+ type='PointClipDistance', max_dist=50.0, z_min=-4.0,
139
+ z_max=2.0),
140
+ dict(type='RandomScale', scale=[0.9, 1.1]),
141
+ dict(
142
+ type='RandomShift',
143
+ shift=((-0.2, 0.2), (-0.2, 0.2), (-0.2, 0.2))),
144
+ dict(type='RandomFlip', p=0.5),
145
+ dict(type='RandomJitter', sigma=0.005, clip=0.02),
146
+ dict(type='InstanceShift', p=0.5, shift_range=[4, 4, 0.5]),
147
+ dict(type='InstanceRotate', p=0.5, axis='z', angle=[-0.5, 0.5]),
148
+ dict(type='InstanceScale', p=0.5, scale=[0.9, 1.1]),
149
+ dict(
150
+ type='GridSample',
151
+ grid_size=0.05,
152
+ hash_type='fnv',
153
+ mode='train',
154
+ return_grid_coord=True),
155
+ dict(type='SphereCrop', sample_rate=0.6, mode='random'),
156
+ dict(type='ToTensor'),
157
+ dict(
158
+ type='Collect',
159
+ keys=('coord', 'grid_coord', 'segment'),
160
+ feat_keys=('coord', 'strength'))
161
+ ],
162
+ test_mode=False,
163
+ ignore_index=-1,
164
+ loop=1),
165
+ val=dict(
166
+ type='SemanticKITTIDataset',
167
+ split='val',
168
+ data_root='data/semantic_kitti',
169
+ transform=[
170
+ dict(type='Copy', keys_dict=dict(segment='origin_segment')),
171
+ dict(
172
+ type='PointClipDistance', max_dist=50.0, z_min=-4.0,
173
+ z_max=2.0),
174
+ dict(
175
+ type='GridSample',
176
+ grid_size=0.05,
177
+ hash_type='fnv',
178
+ mode='train',
179
+ return_grid_coord=True,
180
+ return_inverse=True),
181
+ dict(type='ToTensor'),
182
+ dict(
183
+ type='Collect',
184
+ keys=('coord', 'grid_coord', 'segment', 'origin_segment',
185
+ 'inverse'),
186
+ feat_keys=('coord', 'strength'))
187
+ ],
188
+ test_mode=False,
189
+ ignore_index=-1),
190
+ test=dict(
191
+ type='SemanticKITTIDataset',
192
+ split='val',
193
+ data_root='data/semantic_kitti',
194
+ transform=[
195
+ dict(
196
+ type='PointClipDistance', max_dist=50.0, z_min=-4.0,
197
+ z_max=2.0),
198
+ dict(type='Copy', keys_dict=dict(segment='origin_segment')),
199
+ dict(
200
+ type='GridSample',
201
+ grid_size=0.025,
202
+ hash_type='fnv',
203
+ mode='train',
204
+ return_inverse=True)
205
+ ],
206
+ test_mode=True,
207
+ test_cfg=dict(
208
+ voxelize=dict(
209
+ type='GridSample',
210
+ grid_size=0.05,
211
+ hash_type='fnv',
212
+ mode='test',
213
+ return_grid_coord=True),
214
+ crop=None,
215
+ post_transform=[
216
+ dict(type='ToTensor'),
217
+ dict(
218
+ type='Collect',
219
+ keys=('coord', 'grid_coord', 'index'),
220
+ feat_keys=('coord', 'strength'))
221
+ ],
222
+ aug_transform=[[{
223
+ 'type': 'RandomScale',
224
+ 'scale': [0.9, 0.9]
225
+ }], [{
226
+ 'type': 'RandomScale',
227
+ 'scale': [0.95, 0.95]
228
+ }], [{
229
+ 'type': 'RandomScale',
230
+ 'scale': [1, 1]
231
+ }], [{
232
+ 'type': 'RandomScale',
233
+ 'scale': [1.05, 1.05]
234
+ }], [{
235
+ 'type': 'RandomScale',
236
+ 'scale': [1.1, 1.1]
237
+ }],
238
+ [{
239
+ 'type': 'RandomScale',
240
+ 'scale': [0.9, 0.9]
241
+ }, {
242
+ 'type': 'RandomFlip',
243
+ 'p': 1
244
+ }],
245
+ [{
246
+ 'type': 'RandomScale',
247
+ 'scale': [0.95, 0.95]
248
+ }, {
249
+ 'type': 'RandomFlip',
250
+ 'p': 1
251
+ }],
252
+ [{
253
+ 'type': 'RandomScale',
254
+ 'scale': [1, 1]
255
+ }, {
256
+ 'type': 'RandomFlip',
257
+ 'p': 1
258
+ }],
259
+ [{
260
+ 'type': 'RandomScale',
261
+ 'scale': [1.05, 1.05]
262
+ }, {
263
+ 'type': 'RandomFlip',
264
+ 'p': 1
265
+ }],
266
+ [{
267
+ 'type': 'RandomScale',
268
+ 'scale': [1.1, 1.1]
269
+ }, {
270
+ 'type': 'RandomFlip',
271
+ 'p': 1
272
+ }]]),
273
+ ignore_index=-1))
Volt_experiments/single_dataset/semantic_kitti/model/model_best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66906d466e4ffd5cb97fd5aa26be03ec17387b40561f67c60ef11b4ff639a26b
3
+ size 377835281
Volt_experiments/single_dataset/semantic_kitti/model/model_last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f60b09ac30d42d9ed5d5ff44a0cf91ed2aae091e5fc7b8b619f6a732f1beede
3
+ size 377835281
Volt_experiments/single_dataset/semantic_kitti/train.log ADDED
The diff for this file is too large to render. See raw diff
 
Volt_experiments/single_dataset/waymo/config.py ADDED
@@ -0,0 +1,263 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ weight = None
2
+ resume = False
3
+ evaluate = True
4
+ test_only = False
5
+ seed = 47918425
6
+ save_path = 'exp/waymo/2026-04-24_121009'
7
+ num_worker = 24
8
+ batch_size = 16
9
+ gradient_accumulation_steps = 1
10
+ batch_size_val = None
11
+ batch_size_test = None
12
+ epoch = 30
13
+ eval_epoch = 30
14
+ clip_grad = None
15
+ use_ema = True
16
+ ema_decay = 0.999
17
+ sync_bn = False
18
+ enable_amp = True
19
+ amp_dtype = 'float16'
20
+ empty_cache = False
21
+ empty_cache_per_epoch = False
22
+ find_unused_parameters = False
23
+ enable_wandb = True
24
+ wandb_project = 'Volt'
25
+ wandb_key = None
26
+ mix_prob = 0.2
27
+ param_dicts = None
28
+ hooks = [
29
+ dict(type='CheckpointLoader'),
30
+ dict(type='ModelHook'),
31
+ dict(type='IterationTimer', warmup_iter=2),
32
+ dict(type='InformationWriter'),
33
+ dict(type='SemSegEvaluator'),
34
+ dict(type='CheckpointSaver', save_freq=None),
35
+ dict(type='PreciseEvaluator', test_last=False)
36
+ ]
37
+ train = dict(type='DefaultTrainer')
38
+ test = dict(type='SemSegTester', verbose=True)
39
+ model = dict(
40
+ type='DefaultSegmentorV2',
41
+ num_classes=22,
42
+ backbone_out_channels=128,
43
+ backbone=dict(
44
+ type='Volt',
45
+ in_channels=4,
46
+ embed_dim=384,
47
+ depth=12,
48
+ num_heads=6,
49
+ mlp_ratio=4,
50
+ init_values=None,
51
+ qk_norm=True,
52
+ drop_path=0.3,
53
+ stride=5,
54
+ kernel_size=5,
55
+ increase_drop_path=True,
56
+ up_mlp_dim=128),
57
+ teacher=dict(
58
+ type='DefaultSegmentor',
59
+ backbone=dict(
60
+ type='SpUNet-v1m1',
61
+ in_channels=4,
62
+ num_classes=22,
63
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
64
+ layers=(2, 3, 4, 6, 2, 2, 2, 2))),
65
+ teacher_weights='weights/teacher_weights/waymo_unet_teacher.pth',
66
+ criteria=[
67
+ dict(
68
+ type='CrossEntropyLoss',
69
+ loss_weight=1.0,
70
+ label_smoothing=0.1,
71
+ ignore_index=-1),
72
+ dict(
73
+ type='LovaszLoss',
74
+ mode='multiclass',
75
+ loss_weight=1.0,
76
+ ignore_index=-1)
77
+ ])
78
+ optimizer = dict(type='AdamW', lr=0.002, weight_decay=0.05)
79
+ scheduler = dict(
80
+ type='OneCycleLR',
81
+ max_lr=0.002,
82
+ pct_start=0.04,
83
+ anneal_strategy='cos',
84
+ div_factor=10.0,
85
+ final_div_factor=100.0)
86
+ dataset_type = 'WaymoDataset'
87
+ data_root = 'data/waymo'
88
+ ignore_index = -1
89
+ names = [
90
+ 'Car', 'Truck', 'Bus', 'Other Vehicle', 'Motorcyclist', 'Bicyclist',
91
+ 'Pedestrian', 'Sign', 'Traffic Light', 'Pole', 'Construction Cone',
92
+ 'Bicycle', 'Motorcycle', 'Building', 'Vegetation', 'Tree Trunk', 'Curb',
93
+ 'Road', 'Lane Marker', 'Other Ground', 'Walkable', 'Sidewalk'
94
+ ]
95
+ data = dict(
96
+ num_classes=22,
97
+ ignore_index=-1,
98
+ names=[
99
+ 'Car', 'Truck', 'Bus', 'Other Vehicle', 'Motorcyclist', 'Bicyclist',
100
+ 'Pedestrian', 'Sign', 'Traffic Light', 'Pole', 'Construction Cone',
101
+ 'Bicycle', 'Motorcycle', 'Building', 'Vegetation', 'Tree Trunk',
102
+ 'Curb', 'Road', 'Lane Marker', 'Other Ground', 'Walkable', 'Sidewalk'
103
+ ],
104
+ train=dict(
105
+ type='WaymoDataset',
106
+ split='training',
107
+ data_root='data/waymo',
108
+ transform=[
109
+ dict(
110
+ type='RandomDropout',
111
+ dropout_ratio=0.2,
112
+ dropout_application_ratio=0.2),
113
+ dict(
114
+ type='RandomRotate',
115
+ angle=[-1, 1],
116
+ axis='z',
117
+ center=[0, 0, 0],
118
+ p=0.5),
119
+ dict(
120
+ type='RandomRotate',
121
+ angle=[-0.015625, 0.015625],
122
+ axis='x',
123
+ p=0.5),
124
+ dict(
125
+ type='RandomRotate',
126
+ angle=[-0.015625, 0.015625],
127
+ axis='y',
128
+ p=0.5),
129
+ dict(
130
+ type='PointClipDistance', max_dist=75.0, z_min=-4.0,
131
+ z_max=2.0),
132
+ dict(type='RandomScale', scale=[0.9, 1.1]),
133
+ dict(
134
+ type='RandomShift',
135
+ shift=((-0.2, 0.2), (-0.2, 0.2), (-0.2, 0.2))),
136
+ dict(type='RandomFlip', p=0.5),
137
+ dict(type='RandomJitter', sigma=0.005, clip=0.02),
138
+ dict(
139
+ type='GridSample',
140
+ grid_size=0.05,
141
+ hash_type='fnv',
142
+ mode='train',
143
+ return_grid_coord=True),
144
+ dict(type='SphereCrop', sample_rate=0.6, mode='random'),
145
+ dict(type='SphereCrop', point_max=102400, mode='random'),
146
+ dict(type='ToTensor'),
147
+ dict(
148
+ type='Collect',
149
+ keys=('coord', 'grid_coord', 'segment'),
150
+ feat_keys=('coord', 'strength'))
151
+ ],
152
+ test_mode=False,
153
+ ignore_index=-1,
154
+ loop=1),
155
+ val=dict(
156
+ type='WaymoDataset',
157
+ split='validation',
158
+ data_root='data/waymo',
159
+ transform=[
160
+ dict(type='Copy', keys_dict=dict(segment='origin_segment')),
161
+ dict(
162
+ type='PointClipDistance', max_dist=75.0, z_min=-4.0,
163
+ z_max=2.0),
164
+ dict(
165
+ type='GridSample',
166
+ grid_size=0.05,
167
+ hash_type='fnv',
168
+ mode='train',
169
+ return_grid_coord=True,
170
+ return_inverse=True),
171
+ dict(type='ToTensor'),
172
+ dict(
173
+ type='Collect',
174
+ keys=('coord', 'grid_coord', 'segment', 'origin_segment',
175
+ 'inverse'),
176
+ feat_keys=('coord', 'strength'))
177
+ ],
178
+ test_mode=False,
179
+ ignore_index=-1),
180
+ test=dict(
181
+ type='WaymoDataset',
182
+ split='validation',
183
+ data_root='data/waymo',
184
+ transform=[
185
+ dict(
186
+ type='PointClipDistance', max_dist=75.0, z_min=-4.0,
187
+ z_max=2.0),
188
+ dict(type='Copy', keys_dict=dict(segment='origin_segment')),
189
+ dict(
190
+ type='GridSample',
191
+ grid_size=0.025,
192
+ hash_type='fnv',
193
+ mode='train',
194
+ return_inverse=True)
195
+ ],
196
+ test_mode=True,
197
+ test_cfg=dict(
198
+ voxelize=dict(
199
+ type='GridSample',
200
+ grid_size=0.05,
201
+ hash_type='fnv',
202
+ mode='test',
203
+ return_grid_coord=True),
204
+ crop=None,
205
+ post_transform=[
206
+ dict(type='ToTensor'),
207
+ dict(
208
+ type='Collect',
209
+ keys=('coord', 'grid_coord', 'index'),
210
+ feat_keys=('coord', 'strength'))
211
+ ],
212
+ aug_transform=[[{
213
+ 'type': 'RandomScale',
214
+ 'scale': [0.9, 0.9]
215
+ }], [{
216
+ 'type': 'RandomScale',
217
+ 'scale': [0.95, 0.95]
218
+ }], [{
219
+ 'type': 'RandomScale',
220
+ 'scale': [1, 1]
221
+ }], [{
222
+ 'type': 'RandomScale',
223
+ 'scale': [1.05, 1.05]
224
+ }], [{
225
+ 'type': 'RandomScale',
226
+ 'scale': [1.1, 1.1]
227
+ }],
228
+ [{
229
+ 'type': 'RandomScale',
230
+ 'scale': [0.9, 0.9]
231
+ }, {
232
+ 'type': 'RandomFlip',
233
+ 'p': 1
234
+ }],
235
+ [{
236
+ 'type': 'RandomScale',
237
+ 'scale': [0.95, 0.95]
238
+ }, {
239
+ 'type': 'RandomFlip',
240
+ 'p': 1
241
+ }],
242
+ [{
243
+ 'type': 'RandomScale',
244
+ 'scale': [1, 1]
245
+ }, {
246
+ 'type': 'RandomFlip',
247
+ 'p': 1
248
+ }],
249
+ [{
250
+ 'type': 'RandomScale',
251
+ 'scale': [1.05, 1.05]
252
+ }, {
253
+ 'type': 'RandomFlip',
254
+ 'p': 1
255
+ }],
256
+ [{
257
+ 'type': 'RandomScale',
258
+ 'scale': [1.1, 1.1]
259
+ }, {
260
+ 'type': 'RandomFlip',
261
+ 'p': 1
262
+ }]]),
263
+ ignore_index=-1))
Volt_experiments/single_dataset/waymo/model/model_best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f819421fae6c6ab7f05b70e899d85b8c5b2dfd1bdb565185241cb52b79768a13
3
+ size 377847569
Volt_experiments/single_dataset/waymo/model/model_last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ace562fdadeb8351171d47fcf65e8bb6b02a3986fb3dc4b51822effbc7e929f8
3
+ size 377847569
Volt_experiments/single_dataset/waymo/train.log ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6597f76dd17c450f9a3d670c2f4cc0d93179b8940800d97d1a6a32bf5e4e5cda
3
+ size 18631068