Bavantha11 commited on
Commit
81933f4
·
verified ·
1 Parent(s): 42a4af4

Upload configs/scannet/m2h_mx_b.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. configs/scannet/m2h_mx_b.yaml +92 -0
configs/scannet/m2h_mx_b.yaml ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset:
2
+ name: ScanNet
3
+ root: data/scannet
4
+ image_size: [480, 640]
5
+ num_classes: 40 # keep head compatible with NYUDv2 fine-tuning
6
+ min_depth: 0.1
7
+ max_depth: 10.0
8
+ visual_min_depth: 0.1
9
+ visual_max_depth: 10.0
10
+ augment:
11
+ random_scale: [0.9, 1.15]
12
+ random_crop: true
13
+ horizontal_flip: true
14
+ color_jitter: {brightness: 0.25, contrast: 0.25, saturation: 0.2, hue: 0.1}
15
+ erase_prob: 0.15
16
+
17
+ training:
18
+ epochs: 160
19
+ batch_size: 8
20
+ eval_batch_size: 12
21
+ num_workers: 8
22
+ device: cuda
23
+ mixed_precision: true
24
+ log_interval: 200
25
+ ckpt_interval: 1
26
+ grad_clip: 1.0
27
+ output_dir: outputs/scannet_m2h_mx_b
28
+ ema_decay: 0.999
29
+ eval_use_ema: false
30
+
31
+ optimization:
32
+ lr: 1.0e-4
33
+ weight_decay: 0.05
34
+ betas: [0.9, 0.999]
35
+ warmup_epochs: 0
36
+ scheduler:
37
+ type: onecycle
38
+ max_lr_factor: 3.0
39
+ pct_start: 0.1
40
+ div_factor: 5.0
41
+ final_div_factor: 25.0
42
+
43
+ tasks:
44
+ include_semseg: true
45
+ include_depth: true
46
+ include_edge: false
47
+ include_normals: false
48
+ include_plane: false
49
+ include_confidence: false
50
+
51
+ loss:
52
+ weights:
53
+ semseg: 2.0
54
+ depth_si: 3.0
55
+ focal_for_edges: false
56
+ depth_scale_weight: 0.1
57
+ depth_coarse_weight: 0.3
58
+ depth_offset_weight: 0.15
59
+ depth_bin_weight: 0.3
60
+ use_uncertainty_balancer: false
61
+
62
+ model:
63
+ arch: m2h_mx_b
64
+ num_classes: 40
65
+ min_depth: 0.1
66
+ max_depth: 10.0
67
+
68
+ m2h_mx:
69
+ decoder_dim: 256
70
+ num_seg_classes: 40
71
+ backbone_lr_scale: 0.05
72
+ ltc_window_size: 4
73
+ hm_d_state: 32
74
+ hm_drop_path: 0.1
75
+ gtf_extra_levels: 2
76
+ train_last_n_blocks: 2
77
+ intermediate_layer_indices: [2, 5, 8, 11]
78
+ num_register_tokens: 4
79
+ use_lora: true
80
+ lora_rank: 16
81
+ lora_alpha: 32.0
82
+ lora_dropout: 0.05
83
+ backbone_name: facebook/dinov3-vitb16-pretrain-lvd1689m
84
+ depth_bins: 64
85
+ depth_aux_weight: 0.4
86
+ aux_weights:
87
+ semseg: 0.4
88
+ depth: 0.4
89
+
90
+ validation:
91
+ interval_steps: 1000
92
+ save_best_on: ["sem_mIoU", "dep_AbsRel"]