Bavantha11 commited on
Commit
1f5a2a0
·
verified ·
1 Parent(s): 6f7be77

Upload configs/cityscapes/m2h_mx_l.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. configs/cityscapes/m2h_mx_l.yaml +96 -0
configs/cityscapes/m2h_mx_l.yaml ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset:
2
+ name: Cityscapes
3
+ root: data/cityscapes
4
+ disparity_subdir: crestereo_disparity
5
+ crop_bottom: 224
6
+ image_size: [800, 2048]
7
+ num_classes: 19
8
+ min_depth: 0.0
9
+ max_depth: 80.0
10
+ visual_min_depth: 0.0
11
+ visual_max_depth: 80.0
12
+ augment:
13
+ random_scale: [0.8, 1.2]
14
+ random_crop: true
15
+ horizontal_flip: true
16
+ color_jitter: {brightness: 0.2, contrast: 0.2, saturation: 0.2, hue: 0.1}
17
+ erase_prob: 0.05
18
+ blur_prob: 0.08
19
+ noise_std: 0.01
20
+ gamma: [0.9, 1.1]
21
+
22
+ training:
23
+ epochs: 160
24
+ batch_size: 2
25
+ eval_batch_size: 4
26
+ num_workers: 8
27
+ device: cuda
28
+ mixed_precision: true
29
+ log_interval: 25
30
+ ckpt_interval: 1
31
+ grad_clip: 1.0
32
+ output_dir: outputs/cityscapes_m2h_mx_l
33
+ ema_decay: 0.999
34
+ eval_use_ema: false
35
+ use_static_graph: false
36
+ finetune: true # reset scheduler/steps when resuming from ScanNet weights
37
+
38
+ optimization:
39
+ lr: 7.0e-5 # lower for stability; warmup added
40
+ weight_decay: 0.02
41
+ betas: [0.9, 0.999]
42
+ warmup_epochs: 5
43
+ scheduler:
44
+ type: cosine
45
+ min_lr: 5.0e-6
46
+
47
+ tasks:
48
+ include_semseg: true
49
+ include_depth: true
50
+ include_edge: false
51
+ include_normals: false
52
+ include_plane: false
53
+ include_confidence: false
54
+
55
+ loss:
56
+ weights:
57
+ semseg: 2.5 # emphasize semantics
58
+ depth_si: 2.0 # keep depth learning strong
59
+ focal_for_edges: false
60
+ depth_scale_weight: 0.05
61
+ depth_coarse_weight: 0.25
62
+ depth_offset_weight: 0.2
63
+ depth_bin_weight: 0.25
64
+ use_uncertainty_balancer: false
65
+
66
+ model:
67
+ arch: m2h_mx_l
68
+ num_classes: 19
69
+ min_depth: 0.0
70
+ max_depth: 80.0
71
+
72
+ m2h_mx:
73
+ decoder_dim: 256
74
+ num_seg_classes: 19
75
+ backbone_lr_scale: 0.03 # lower backbone LR when all blocks are unfrozen
76
+ ltc_window_size: 4
77
+ hm_d_state: 32
78
+ hm_drop_path: 0.1
79
+ gtf_extra_levels: 2
80
+ train_last_n_blocks: 24 # DINOv3-L has 24 blocks; unfreeze all for fine-tuning
81
+ intermediate_layer_indices: [5, 11, 17, 23]
82
+ num_register_tokens: 4
83
+ use_lora: true
84
+ lora_rank: 16
85
+ lora_alpha: 32.0
86
+ lora_dropout: 0.05
87
+ backbone_name: facebook/dinov3-vitl16-pretrain-lvd1689m
88
+ depth_bins: 64
89
+ depth_aux_weight: 0.2
90
+ aux_weights:
91
+ semseg: 0.5
92
+ depth: 0.2
93
+
94
+ validation:
95
+ interval_steps: 100
96
+ save_best_on: ["sem_mIoU", "dep_AbsRel"]