sabertoaster commited on
Commit
4dd2544
·
verified ·
1 Parent(s): 6526584

Upload folder using huggingface_hub

Browse files
output/two_stage_encoding/config.yaml ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ out_dir: output/two_stage_encoding
2
+ seed: 3315
3
+ overwrite: false
4
+ device: cuda
5
+ batch_size: 16
6
+ stage1:
7
+ epochs: 10
8
+ lr: 0.0003
9
+ weight_decay: 0.1
10
+ model:
11
+ embed_dim: 192
12
+ encoder_kernel_size: 45
13
+ decoder_kernel_size: 0
14
+ hidden_model: null
15
+ global_pool: avg
16
+ encoder_causal: false
17
+ encoder_positive: false
18
+ encoder_blockwise: false
19
+ pool_num_heads: 3
20
+ with_shared_decoder: true
21
+ with_subject_decoders: true
22
+ transformer:
23
+ num_heads: 3
24
+ depth: 6
25
+ mlp_ratio: 4.0
26
+ conv1dnext:
27
+ depth: 6
28
+ kernel_size: 11
29
+ causal: false
30
+ stage2:
31
+ epochs: 10
32
+ lr: 0.0001
33
+ weight_decay: 0.01
34
+ cfm:
35
+ solver: euler
36
+ sigma_min: 0.0001
37
+ decoder:
38
+ channels:
39
+ - 256
40
+ - 256
41
+ dropout: 0.05
42
+ attention_head_dim: 64
43
+ n_blocks: 1
44
+ num_mid_blocks: 2
45
+ num_heads: 2
46
+ act_fn: snakebeta
47
+ down_block_type: transformer
48
+ mid_block_type: transformer
49
+ up_block_type: transformer
50
+ subjects:
51
+ - 1
52
+ - 2
53
+ - 3
54
+ - 5
55
+ features:
56
+ internvl3_8b:
57
+ model: InternVL3_8B
58
+ layers:
59
+ layers.20: language_model.model.layers.20.post_attention_layernorm
60
+ internvl3_14b:
61
+ model: InternVL3_14B
62
+ layers:
63
+ layers.20: language_model.model.layers.20.post_attention_layernorm
64
+ layers.30: language_model.model.layers.30.post_attention_layernorm
65
+ qwen-2-5-omni-3b:
66
+ model: qwen2-5_3B
67
+ layers:
68
+ layers.10: model.layers.10.post_attention_layernorm
69
+ layers.15: model.layers.15.post_attention_layernorm
70
+ layers.20: model.layers.20.post_attention_layernorm
71
+ norm: model.norm
72
+ qwen-2-5-omni-7b:
73
+ model: qwen-2-5-omni-7b
74
+ layers:
75
+ layers.5: model.layers.5.post_attention_layernorm
76
+ layers.10: model.layers.10.post_attention_layernorm
77
+ layers.15: model.layers.15.post_attention_layernorm
78
+ layers.20: model.layers.20.post_attention_layernorm
79
+ norm: model.norm
80
+ whisper:
81
+ model: whisper
82
+ layers:
83
+ layers.12: layers.12.fc2
84
+ layers.25: layers.25.fc2
85
+ layers.31: layers.31.fc2
86
+ norm: layer_norm
87
+ llama_3.2_1b:
88
+ model: Llama-3.2-1B
89
+ layers:
90
+ layers.7: model.layers.7
91
+ layers.11: model.layers.11
92
+ layers.15: model.layers.15
93
+ llama_3.2_3b:
94
+ model: Llama-3.2-3B
95
+ layers:
96
+ layers.7: model.layers.7
97
+ layers.11: model.layers.11
98
+ layers.15: model.layers.15
99
+ layers.19: model.layers.19
100
+ layers.23: model.layers.23
101
+ vjepa2:
102
+ model: vjepa2_avg_feat
103
+ layers:
104
+ layers.5: encoder.layer.5.norm1_avg
105
+ layers.15: encoder.layer.15.norm1_avg
106
+ layers.25: encoder.layer.25.norm1_avg
107
+ layers.35: encoder.layer.35.norm1_avg
108
+ norm: encoder.layernorm_avg
109
+ include_features:
110
+ - internvl3_14b/layers.30
111
+ datasets:
112
+ train:
113
+ filter:
114
+ seasons:
115
+ - 1
116
+ sample_length: 64
117
+ num_samples: 2000
118
+ shuffle: true
119
+ seed: 42
120
+ val_s6:
121
+ filter:
122
+ seasons:
123
+ - 6
124
+ movies: []
125
+ sample_length: null
126
+ num_samples: null
127
+ shuffle: false
128
+ val_figures:
129
+ filter:
130
+ seasons: []
131
+ movies:
132
+ - figures
133
+ sample_length: null
134
+ num_samples: null
135
+ shuffle: false
136
+ val_set_name: val_figures
137
+ datasets_root: /workspace/data
output/two_stage_encoding/heatmaps/stage1_sub1.png ADDED
output/two_stage_encoding/heatmaps/stage1_sub2.png ADDED
output/two_stage_encoding/heatmaps/stage1_sub3.png ADDED
output/two_stage_encoding/heatmaps/stage1_sub5.png ADDED
output/two_stage_encoding/heatmaps/stage2_ep0_sub1.png ADDED
output/two_stage_encoding/heatmaps/stage2_ep0_sub2.png ADDED
output/two_stage_encoding/heatmaps/stage2_ep0_sub3.png ADDED
output/two_stage_encoding/heatmaps/stage2_ep0_sub5.png ADDED
output/two_stage_encoding/heatmaps/stage2_ep5_sub1.png ADDED
output/two_stage_encoding/heatmaps/stage2_ep5_sub2.png ADDED
output/two_stage_encoding/heatmaps/stage2_ep5_sub3.png ADDED
output/two_stage_encoding/heatmaps/stage2_ep5_sub5.png ADDED
output/two_stage_encoding/heatmaps/stage2_ep9_sub1.png ADDED
output/two_stage_encoding/heatmaps/stage2_ep9_sub2.png ADDED
output/two_stage_encoding/heatmaps/stage2_ep9_sub3.png ADDED
output/two_stage_encoding/heatmaps/stage2_ep9_sub5.png ADDED
output/two_stage_encoding/stage1_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98179fd2307efe54791f2b97ed5cf4efbb722aa292ab269c592e75a9b06513a5
3
+ size 7833845
output/two_stage_encoding/stage1_loss_curve.png ADDED
output/two_stage_encoding/stage2_epoch_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4013c39fc311e3cb8ae3efb9745e96a68ecc9779a25e18885640e0d5e2ded13
3
+ size 240515294
output/two_stage_encoding/stage2_epoch_5.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0477043b81bc838b582451d42a41c362e7ebaff7c5b4a8078e50720090701e9d
3
+ size 240515294
output/two_stage_encoding/stage2_epoch_9.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a72612fcfce5d9b8866b66d9f5eb275d342422719c97f969ca7fcd510047b00
3
+ size 240515294
output/two_stage_encoding/stage2_loss_curve.png ADDED