zixianma02 commited on
Commit
d2d7839
·
verified ·
1 Parent(s): 6080213

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +49 -0
  2. config.yaml +831 -0
  3. model_and_optim/__0_14.distcp +3 -0
  4. model_and_optim/__0_5.distcp +3 -0
  5. model_and_optim/__0_6.distcp +3 -0
  6. model_and_optim/__10_11.distcp +3 -0
  7. model_and_optim/__10_14.distcp +3 -0
  8. model_and_optim/__10_2.distcp +3 -0
  9. model_and_optim/__10_4.distcp +3 -0
  10. model_and_optim/__12_1.distcp +3 -0
  11. model_and_optim/__12_8.distcp +3 -0
  12. model_and_optim/__13_3.distcp +3 -0
  13. model_and_optim/__14_14.distcp +3 -0
  14. model_and_optim/__14_2.distcp +3 -0
  15. model_and_optim/__15_1.distcp +3 -0
  16. model_and_optim/__15_11.distcp +3 -0
  17. model_and_optim/__15_4.distcp +3 -0
  18. model_and_optim/__16_7.distcp +3 -0
  19. model_and_optim/__18_14.distcp +3 -0
  20. model_and_optim/__19_14.distcp +3 -0
  21. model_and_optim/__1_14.distcp +3 -0
  22. model_and_optim/__1_4.distcp +3 -0
  23. model_and_optim/__1_8.distcp +3 -0
  24. model_and_optim/__1_9.distcp +3 -0
  25. model_and_optim/__20_10.distcp +3 -0
  26. model_and_optim/__21_0.distcp +3 -0
  27. model_and_optim/__22_9.distcp +3 -0
  28. model_and_optim/__23_15.distcp +3 -0
  29. model_and_optim/__23_3.distcp +3 -0
  30. model_and_optim/__23_6.distcp +3 -0
  31. model_and_optim/__24_4.distcp +3 -0
  32. model_and_optim/__25_10.distcp +3 -0
  33. model_and_optim/__25_15.distcp +3 -0
  34. model_and_optim/__25_6.distcp +3 -0
  35. model_and_optim/__26_1.distcp +3 -0
  36. model_and_optim/__27_0.distcp +3 -0
  37. model_and_optim/__27_2.distcp +3 -0
  38. model_and_optim/__28_5.distcp +3 -0
  39. model_and_optim/__29_9.distcp +3 -0
  40. model_and_optim/__30_15.distcp +3 -0
  41. model_and_optim/__31_10.distcp +3 -0
  42. model_and_optim/__31_2.distcp +3 -0
  43. model_and_optim/__31_7.distcp +3 -0
  44. model_and_optim/__5_12.distcp +3 -0
  45. model_and_optim/__5_13.distcp +3 -0
  46. model_and_optim/__6_7.distcp +3 -0
  47. model_and_optim/__6_9.distcp +3 -0
  48. model_and_optim/__7_0.distcp +3 -0
  49. model_and_optim/__7_11.distcp +3 -0
  50. model_and_optim/__8_13.distcp +3 -0
.gitattributes CHANGED
@@ -33,3 +33,52 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ model_and_optim/__12_1.distcp filter=lfs diff=lfs merge=lfs -text
37
+ model_and_optim/__15_11.distcp filter=lfs diff=lfs merge=lfs -text
38
+ model_and_optim/__6_7.distcp filter=lfs diff=lfs merge=lfs -text
39
+ model_and_optim/__26_1.distcp filter=lfs diff=lfs merge=lfs -text
40
+ model_and_optim/__31_2.distcp filter=lfs diff=lfs merge=lfs -text
41
+ model_and_optim/__29_9.distcp filter=lfs diff=lfs merge=lfs -text
42
+ model_and_optim/__13_3.distcp filter=lfs diff=lfs merge=lfs -text
43
+ model_and_optim/__27_0.distcp filter=lfs diff=lfs merge=lfs -text
44
+ model_and_optim/__5_13.distcp filter=lfs diff=lfs merge=lfs -text
45
+ model_and_optim/__15_1.distcp filter=lfs diff=lfs merge=lfs -text
46
+ model_and_optim/__30_15.distcp filter=lfs diff=lfs merge=lfs -text
47
+ model_and_optim/__12_8.distcp filter=lfs diff=lfs merge=lfs -text
48
+ model_and_optim/__1_8.distcp filter=lfs diff=lfs merge=lfs -text
49
+ model_and_optim/__25_10.distcp filter=lfs diff=lfs merge=lfs -text
50
+ model_and_optim/__27_2.distcp filter=lfs diff=lfs merge=lfs -text
51
+ model_and_optim/__10_11.distcp filter=lfs diff=lfs merge=lfs -text
52
+ model_and_optim/__14_2.distcp filter=lfs diff=lfs merge=lfs -text
53
+ model_and_optim/__24_4.distcp filter=lfs diff=lfs merge=lfs -text
54
+ model_and_optim/__22_9.distcp filter=lfs diff=lfs merge=lfs -text
55
+ model_and_optim/__1_14.distcp filter=lfs diff=lfs merge=lfs -text
56
+ model_and_optim/__25_15.distcp filter=lfs diff=lfs merge=lfs -text
57
+ model_and_optim/__5_12.distcp filter=lfs diff=lfs merge=lfs -text
58
+ model_and_optim/__19_14.distcp filter=lfs diff=lfs merge=lfs -text
59
+ model_and_optim/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
60
+ model_and_optim/__23_3.distcp filter=lfs diff=lfs merge=lfs -text
61
+ model_and_optim/__31_7.distcp filter=lfs diff=lfs merge=lfs -text
62
+ model_and_optim/__14_14.distcp filter=lfs diff=lfs merge=lfs -text
63
+ model_and_optim/__16_7.distcp filter=lfs diff=lfs merge=lfs -text
64
+ model_and_optim/__0_5.distcp filter=lfs diff=lfs merge=lfs -text
65
+ model_and_optim/__10_14.distcp filter=lfs diff=lfs merge=lfs -text
66
+ model_and_optim/__23_6.distcp filter=lfs diff=lfs merge=lfs -text
67
+ model_and_optim/__15_4.distcp filter=lfs diff=lfs merge=lfs -text
68
+ model_and_optim/__31_10.distcp filter=lfs diff=lfs merge=lfs -text
69
+ model_and_optim/__21_0.distcp filter=lfs diff=lfs merge=lfs -text
70
+ model_and_optim/__1_4.distcp filter=lfs diff=lfs merge=lfs -text
71
+ model_and_optim/__6_9.distcp filter=lfs diff=lfs merge=lfs -text
72
+ model_and_optim/__28_5.distcp filter=lfs diff=lfs merge=lfs -text
73
+ model_and_optim/__25_6.distcp filter=lfs diff=lfs merge=lfs -text
74
+ model_and_optim/__20_10.distcp filter=lfs diff=lfs merge=lfs -text
75
+ model_and_optim/__18_14.distcp filter=lfs diff=lfs merge=lfs -text
76
+ model_and_optim/__8_4.distcp filter=lfs diff=lfs merge=lfs -text
77
+ model_and_optim/__10_4.distcp filter=lfs diff=lfs merge=lfs -text
78
+ model_and_optim/__0_14.distcp filter=lfs diff=lfs merge=lfs -text
79
+ model_and_optim/__1_9.distcp filter=lfs diff=lfs merge=lfs -text
80
+ model_and_optim/__10_2.distcp filter=lfs diff=lfs merge=lfs -text
81
+ model_and_optim/__8_13.distcp filter=lfs diff=lfs merge=lfs -text
82
+ model_and_optim/__23_15.distcp filter=lfs diff=lfs merge=lfs -text
83
+ model_and_optim/__7_11.distcp filter=lfs diff=lfs merge=lfs -text
84
+ model_and_optim/__0_6.distcp filter=lfs diff=lfs merge=lfs -text
config.yaml ADDED
@@ -0,0 +1,831 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: multitask_train
2
+ model:
3
+ model_name: molmo
4
+ llm:
5
+ d_model: 4096
6
+ n_heads: 32
7
+ n_kv_heads: 8
8
+ head_dim: null
9
+ qkv_bias: false
10
+ clip_qkv: null
11
+ n_layers: 36
12
+ mlp_ratio: 4
13
+ mlp_hidden_size: 24576
14
+ activation_type: swiglu
15
+ block_type: sequential
16
+ rope: true
17
+ rope_full_precision: true
18
+ rope_theta: 1000000.0
19
+ rope_type: default
20
+ rope_factor: null
21
+ rope_high_freq_factor: null
22
+ rope_low_freq_factor: null
23
+ rope_original_max_position_embeddings: null
24
+ attention_type: sdpa
25
+ float32_attention: true
26
+ attention_dropout: 0.0
27
+ attention_layer_norm: true
28
+ attention_layer_norm_type: qwen3
29
+ residual_dropout: 0.1
30
+ response_residual_dropout: 0.0
31
+ layer_norm_type: rms
32
+ layer_norm_with_affine: true
33
+ layer_norm_eps: 1.0e-06
34
+ attention_layer_norm_with_affine: true
35
+ max_sequence_length: 4096
36
+ max_position_embeddings: null
37
+ include_bias: false
38
+ bias_for_layer_norm: null
39
+ norm_after: false
40
+ moe_num_experts: 8
41
+ moe_top_k: 2
42
+ moe_mlp_impl: sparse
43
+ moe_log_expert_assignment: false
44
+ moe_shared_expert: false
45
+ moe_lbl_in_fp32: false
46
+ moe_interleave: false
47
+ moe_loss_weight: 0.1
48
+ moe_zloss_weight: null
49
+ moe_dropless: true
50
+ moe_capacity_factor: 1.25
51
+ embedding_dropout: 0.0
52
+ scale_logits: false
53
+ vocab_size: 151936
54
+ additional_vocab_size: 128
55
+ weight_tying: false
56
+ embedding_size: 151936
57
+ use_position_ids: true
58
+ tokenizer:
59
+ identifier: Qwen/Qwen3-8B
60
+ tokenizer_dir: null
61
+ init_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen3-8b.pt
62
+ init_incremental: null
63
+ new_embedding_init_range: 0.02
64
+ initializer_range: 0.02
65
+ normalize_input_embeds: false
66
+ activation_checkpoint: whole_layer
67
+ compile: blocks
68
+ fix_pad_tokenizer: false
69
+ init_std: 0.02
70
+ init_fn: normal
71
+ init_cutoff_factor: null
72
+ vision_backbone:
73
+ vit:
74
+ image_model_type: siglip
75
+ image_default_input_size:
76
+ - 378
77
+ - 378
78
+ image_patch_size: 14
79
+ image_pos_patch_size: 14
80
+ image_emb_dim: 1152
81
+ image_num_heads: 16
82
+ image_num_key_value_heads: 16
83
+ image_num_layers: 27
84
+ image_head_dim: 72
85
+ image_mlp_dim: 4304
86
+ image_mlp_activations: gelu_pytorch_tanh
87
+ image_dropout_rate: 0.0
88
+ image_num_pos: 729
89
+ image_norm_eps: 1.0e-06
90
+ attention_dropout: 0.0
91
+ residual_dropout: 0.0
92
+ initializer_range: 0.02
93
+ float32_attention: true
94
+ attention_type: sdpa
95
+ activation_checkpointing: true
96
+ init_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/siglip2-so400m-14-384.pt
97
+ resize_mode: siglip
98
+ pad_value: 0.0
99
+ normalize: siglip
100
+ image_pooling_2d: attention_meanq
101
+ pooling_attention_mask: false
102
+ image_projector: mlp
103
+ image_padding_embed: null
104
+ vit_layers:
105
+ - -3
106
+ - -9
107
+ skip_unused_layers: true
108
+ image_feature_dropout: 0.0
109
+ connector_activation_checkpointing: true
110
+ compile_vit: blocks
111
+ compile_connector: dynamic
112
+ normalize_on_gpu: false
113
+ data_formatter:
114
+ prompt_templates: uber_model
115
+ message_format: role
116
+ system_prompt: demo_or_style
117
+ always_start_with_space: false
118
+ default_inference_len: 65
119
+ select_answer: best
120
+ debug: false
121
+ image_last: false
122
+ format_message_list: null
123
+ p_one_message: 0.0
124
+ eval_system_prompt_mapping: null
125
+ timestamp_mode: 50-percent-seconds
126
+ p_choice_content_in_mc: 1.0
127
+ mm_preprocessor:
128
+ crop_mode: overlap-and-resize-c2
129
+ use_col_tokens: true
130
+ max_crops: 8
131
+ pooling_w: 2
132
+ pooling_h: 2
133
+ overlap_margins:
134
+ - 4
135
+ - 4
136
+ max_images: null
137
+ max_multi_image_crops: 4
138
+ max_answer_len: null
139
+ last_message_loss_only: false
140
+ loss_token_weighting: root_subsegments
141
+ max_text_tokens: null
142
+ image_padding_mask: false
143
+ legacy_image_mask: false
144
+ bi_directional_attn: null
145
+ seed: 6198
146
+ epoch: null
147
+ dry_run: false
148
+ ft_llm: true
149
+ ft_vit: true
150
+ ft_connector: true
151
+ ft_embedding: lm_head
152
+ optimizer:
153
+ name: adamw
154
+ learning_rate: 0.0001
155
+ weight_decay: 0.01
156
+ betas:
157
+ - 0.9
158
+ - 0.95
159
+ eps: 1.0e-05
160
+ connector_learning_rate: 5.0e-06
161
+ vit_learning_rate: 5.0e-06
162
+ llm_learning_rate: 1.0e-05
163
+ frame_selector_learning_rate: 0.0001
164
+ temporal_token_scorer_learning_rate: 0.0001
165
+ connector_weight_decay: 0.0
166
+ vit_weight_decay: 0.0
167
+ llm_weight_decay: 0.0
168
+ frame_selector_weight_decay: 0.01
169
+ temporal_token_scorer_weight_decay: 0.01
170
+ connector_betas:
171
+ - 0.9
172
+ - 0.95
173
+ vit_betas:
174
+ - 0.9
175
+ - 0.95
176
+ llm_betas:
177
+ - 0.9
178
+ - 0.95
179
+ frame_selector_betas:
180
+ - 0.9
181
+ - 0.95
182
+ temporal_token_scorer_betas:
183
+ - 0.9
184
+ - 0.95
185
+ connector_eps: 1.0e-06
186
+ vit_eps: 1.0e-06
187
+ llm_eps: 1.0e-06
188
+ frame_selector_eps: 1.0e-06
189
+ temporal_token_scorer_eps: 1.0e-06
190
+ metrics_log_interval: -1
191
+ scheduler:
192
+ name: multimodal
193
+ units: steps
194
+ t_warmup: 100
195
+ t_max: null
196
+ alpha_f: 0.1
197
+ connector_t_warmup: 200
198
+ vit_t_warmup: 200
199
+ llm_t_warmup: 200
200
+ frame_selector_t_warmup: 200
201
+ temporal_token_scorer_t_warmup: 200
202
+ grad_clip_warmup_steps: null
203
+ grad_clip_warmup_factor: null
204
+ warmup_min_lr: 0.0
205
+ data:
206
+ dataset: null
207
+ mixture: null
208
+ root_size_mixture:
209
+ - rate: 0.15
210
+ mixture:
211
+ pixmo_ask_model_anything: null
212
+ pixmo_cap: 50000.0
213
+ pixmo_cap_qa_as_user_qa: null
214
+ pixmo_pointing_explanations: null
215
+ - rate: 0.5
216
+ mixture:
217
+ coco_2014_vqa_multi: null
218
+ text_vqa: null
219
+ okvqa: null
220
+ chart_qa_weighted: null
221
+ doc_qa: null
222
+ info_qa: null
223
+ ai2_diagram_v2_mix_transparent: null
224
+ a_okvqa_mc: null
225
+ a_okvqa_da: null
226
+ android_control: null
227
+ science_qa_img: null
228
+ tabwmp_da: null
229
+ st_qa: null
230
+ tally_qa: null
231
+ pixmo_clocks: 250000.0
232
+ dv_qa: 10000.0
233
+ figure_qa: 10000.0
234
+ plot_qa: 20000.0
235
+ cosyn_chart_exp: null
236
+ cosyn_chemical_exp: null
237
+ cosyn_diagram_exp: null
238
+ cosyn_document: null
239
+ cosyn_math_exp: null
240
+ cosyn_music_exp: null
241
+ cosyn_table_exp: null
242
+ - rate: 0.35
243
+ mixture:
244
+ pixmo_points_train: null
245
+ pixmo_count_train: null
246
+ pixmo_points_high_freq_train: null
247
+ cosyn_point: null
248
+ kwargs_mixture: null
249
+ split: train
250
+ seed: 50189
251
+ pad: to_max
252
+ sequence_length: 2304
253
+ max_text_seq_len: null
254
+ shuffle: true
255
+ start_index: 0
256
+ packing: null
257
+ num_workers: 2
258
+ drop_last: true
259
+ pin_memory: true
260
+ prefetch_factor: null
261
+ persistent_workers: false
262
+ timeout: 0
263
+ restore_dataloader: true
264
+ fast_forward_batches: null
265
+ evaluators: []
266
+ eval_interval: 2000
267
+ inf_evaluators:
268
+ - label: chart_qa
269
+ data:
270
+ dataset: chart_qa
271
+ mixture: null
272
+ root_size_mixture: null
273
+ kwargs_mixture: null
274
+ split: validation
275
+ seed: 691203
276
+ pad: to_max
277
+ sequence_length: 1792
278
+ max_text_seq_len: null
279
+ shuffle: true
280
+ start_index: 0
281
+ packing: null
282
+ num_workers: 2
283
+ drop_last: true
284
+ pin_memory: true
285
+ prefetch_factor: null
286
+ persistent_workers: true
287
+ timeout: 0
288
+ evaluator:
289
+ n_to_log: 0
290
+ num_wandb_examples: 32
291
+ save_predictions: null
292
+ save_tokens: false
293
+ vqa_eval: relaxed_correctness,scifi_relaxed_correctness,em
294
+ pointing_eval: false
295
+ count_eval: false
296
+ point_count_eval: false
297
+ android_eval: false
298
+ clock_eval: false
299
+ clock_bench_eval: false
300
+ math_vista_eval: false
301
+ temp_compass_eval: ''
302
+ temp_compass_disable_api: false
303
+ video_mme_eval: ''
304
+ mlvu_gen_eval: false
305
+ long_video_bench_eval: false
306
+ plm_fgqa_eval: false
307
+ long_video_bench_caption_eval: false
308
+ vinoground_eval: false
309
+ vixmo_caption_eval: false
310
+ refexp_eval: false
311
+ coco_caption_eval: false
312
+ qv_highlights_eval: false
313
+ tomato: false
314
+ temporal_bench: false
315
+ max_new_tokens: 12
316
+ device_batch_size: 4
317
+ subset_num_batches: null
318
+ max_examples: 2048
319
+ console_log_interval: 20
320
+ include_image: false
321
+ - label: chart_qa_exp
322
+ data:
323
+ dataset: chart_qa_exp
324
+ mixture: null
325
+ root_size_mixture: null
326
+ kwargs_mixture: null
327
+ split: validation
328
+ seed: 691203
329
+ pad: to_max
330
+ sequence_length: 1792
331
+ max_text_seq_len: null
332
+ shuffle: true
333
+ start_index: 0
334
+ packing: null
335
+ num_workers: 2
336
+ drop_last: true
337
+ pin_memory: true
338
+ prefetch_factor: null
339
+ persistent_workers: true
340
+ timeout: 0
341
+ evaluator:
342
+ n_to_log: 0
343
+ num_wandb_examples: 32
344
+ save_predictions: null
345
+ save_tokens: false
346
+ vqa_eval: relaxed_correctness,scifi_relaxed_correctness,em
347
+ pointing_eval: false
348
+ count_eval: false
349
+ point_count_eval: false
350
+ android_eval: false
351
+ clock_eval: false
352
+ clock_bench_eval: false
353
+ math_vista_eval: false
354
+ temp_compass_eval: ''
355
+ temp_compass_disable_api: false
356
+ video_mme_eval: ''
357
+ mlvu_gen_eval: false
358
+ long_video_bench_eval: false
359
+ plm_fgqa_eval: false
360
+ long_video_bench_caption_eval: false
361
+ vinoground_eval: false
362
+ vixmo_caption_eval: false
363
+ refexp_eval: false
364
+ coco_caption_eval: false
365
+ qv_highlights_eval: false
366
+ tomato: false
367
+ temporal_bench: false
368
+ max_new_tokens: 256
369
+ device_batch_size: 4
370
+ subset_num_batches: null
371
+ max_examples: 2048
372
+ console_log_interval: 20
373
+ include_image: false
374
+ - label: info_qa
375
+ data:
376
+ dataset: info_qa
377
+ mixture: null
378
+ root_size_mixture: null
379
+ kwargs_mixture: null
380
+ split: validation
381
+ seed: 691203
382
+ pad: to_max
383
+ sequence_length: 1792
384
+ max_text_seq_len: null
385
+ shuffle: true
386
+ start_index: 0
387
+ packing: null
388
+ num_workers: 2
389
+ drop_last: true
390
+ pin_memory: true
391
+ prefetch_factor: null
392
+ persistent_workers: true
393
+ timeout: 0
394
+ evaluator:
395
+ n_to_log: 0
396
+ num_wandb_examples: 32
397
+ save_predictions: null
398
+ save_tokens: false
399
+ vqa_eval: ansl,em
400
+ pointing_eval: false
401
+ count_eval: false
402
+ point_count_eval: false
403
+ android_eval: false
404
+ clock_eval: false
405
+ clock_bench_eval: false
406
+ math_vista_eval: false
407
+ temp_compass_eval: ''
408
+ temp_compass_disable_api: false
409
+ video_mme_eval: ''
410
+ mlvu_gen_eval: false
411
+ long_video_bench_eval: false
412
+ plm_fgqa_eval: false
413
+ long_video_bench_caption_eval: false
414
+ vinoground_eval: false
415
+ vixmo_caption_eval: false
416
+ refexp_eval: false
417
+ coco_caption_eval: false
418
+ qv_highlights_eval: false
419
+ tomato: false
420
+ temporal_bench: false
421
+ max_new_tokens: 12
422
+ device_batch_size: 4
423
+ subset_num_batches: null
424
+ max_examples: 2048
425
+ console_log_interval: 20
426
+ include_image: false
427
+ - label: doc_qa
428
+ data:
429
+ dataset: doc_qa
430
+ mixture: null
431
+ root_size_mixture: null
432
+ kwargs_mixture: null
433
+ split: validation
434
+ seed: 691203
435
+ pad: to_max
436
+ sequence_length: 1792
437
+ max_text_seq_len: null
438
+ shuffle: true
439
+ start_index: 0
440
+ packing: null
441
+ num_workers: 2
442
+ drop_last: true
443
+ pin_memory: true
444
+ prefetch_factor: null
445
+ persistent_workers: true
446
+ timeout: 0
447
+ evaluator:
448
+ n_to_log: 0
449
+ num_wandb_examples: 32
450
+ save_predictions: null
451
+ save_tokens: false
452
+ vqa_eval: ansl,em
453
+ pointing_eval: false
454
+ count_eval: false
455
+ point_count_eval: false
456
+ android_eval: false
457
+ clock_eval: false
458
+ clock_bench_eval: false
459
+ math_vista_eval: false
460
+ temp_compass_eval: ''
461
+ temp_compass_disable_api: false
462
+ video_mme_eval: ''
463
+ mlvu_gen_eval: false
464
+ long_video_bench_eval: false
465
+ plm_fgqa_eval: false
466
+ long_video_bench_caption_eval: false
467
+ vinoground_eval: false
468
+ vixmo_caption_eval: false
469
+ refexp_eval: false
470
+ coco_caption_eval: false
471
+ qv_highlights_eval: false
472
+ tomato: false
473
+ temporal_bench: false
474
+ max_new_tokens: 12
475
+ device_batch_size: 4
476
+ subset_num_batches: null
477
+ max_examples: 2048
478
+ console_log_interval: 20
479
+ include_image: false
480
+ - label: ai2_diagram
481
+ data:
482
+ dataset: ai2_diagram_v2_mix_transparent
483
+ mixture: null
484
+ root_size_mixture: null
485
+ kwargs_mixture: null
486
+ split: validation
487
+ seed: 691203
488
+ pad: to_max
489
+ sequence_length: 1792
490
+ max_text_seq_len: null
491
+ shuffle: true
492
+ start_index: 0
493
+ packing: null
494
+ num_workers: 2
495
+ drop_last: true
496
+ pin_memory: true
497
+ prefetch_factor: null
498
+ persistent_workers: true
499
+ timeout: 0
500
+ evaluator:
501
+ n_to_log: 0
502
+ num_wandb_examples: 32
503
+ save_predictions: null
504
+ save_tokens: false
505
+ vqa_eval: mc_ai2d_opaque,mc_ai2d_transparent
506
+ pointing_eval: false
507
+ count_eval: false
508
+ point_count_eval: false
509
+ android_eval: false
510
+ clock_eval: false
511
+ clock_bench_eval: false
512
+ math_vista_eval: false
513
+ temp_compass_eval: ''
514
+ temp_compass_disable_api: false
515
+ video_mme_eval: ''
516
+ mlvu_gen_eval: false
517
+ long_video_bench_eval: false
518
+ plm_fgqa_eval: false
519
+ long_video_bench_caption_eval: false
520
+ vinoground_eval: false
521
+ vixmo_caption_eval: false
522
+ refexp_eval: false
523
+ coco_caption_eval: false
524
+ qv_highlights_eval: false
525
+ tomato: false
526
+ temporal_bench: false
527
+ max_new_tokens: 12
528
+ device_batch_size: 4
529
+ subset_num_batches: null
530
+ max_examples: 2048
531
+ console_log_interval: 20
532
+ include_image: false
533
+ - label: coco_2014_vqa
534
+ data:
535
+ dataset: coco_2014_vqa
536
+ mixture: null
537
+ root_size_mixture: null
538
+ kwargs_mixture: null
539
+ split: validation
540
+ seed: 691203
541
+ pad: to_max
542
+ sequence_length: 1792
543
+ max_text_seq_len: null
544
+ shuffle: true
545
+ start_index: 0
546
+ packing: null
547
+ num_workers: 2
548
+ drop_last: true
549
+ pin_memory: true
550
+ prefetch_factor: null
551
+ persistent_workers: true
552
+ timeout: 0
553
+ evaluator:
554
+ n_to_log: 0
555
+ num_wandb_examples: 32
556
+ save_predictions: null
557
+ save_tokens: false
558
+ vqa_eval: vqa_score
559
+ pointing_eval: false
560
+ count_eval: false
561
+ point_count_eval: false
562
+ android_eval: false
563
+ clock_eval: false
564
+ clock_bench_eval: false
565
+ math_vista_eval: false
566
+ temp_compass_eval: ''
567
+ temp_compass_disable_api: false
568
+ video_mme_eval: ''
569
+ mlvu_gen_eval: false
570
+ long_video_bench_eval: false
571
+ plm_fgqa_eval: false
572
+ long_video_bench_caption_eval: false
573
+ vinoground_eval: false
574
+ vixmo_caption_eval: false
575
+ refexp_eval: false
576
+ coco_caption_eval: false
577
+ qv_highlights_eval: false
578
+ tomato: false
579
+ temporal_bench: false
580
+ max_new_tokens: 12
581
+ device_batch_size: 4
582
+ subset_num_batches: null
583
+ max_examples: 2048
584
+ console_log_interval: 20
585
+ include_image: false
586
+ - label: pixmo_clocks
587
+ data:
588
+ dataset: pixmo_clocks
589
+ mixture: null
590
+ root_size_mixture: null
591
+ kwargs_mixture: null
592
+ split: validation
593
+ seed: 691203
594
+ pad: to_max
595
+ sequence_length: 1792
596
+ max_text_seq_len: null
597
+ shuffle: true
598
+ start_index: 0
599
+ packing: null
600
+ num_workers: 2
601
+ drop_last: true
602
+ pin_memory: true
603
+ prefetch_factor: null
604
+ persistent_workers: true
605
+ timeout: 0
606
+ evaluator:
607
+ n_to_log: 0
608
+ num_wandb_examples: 32
609
+ save_predictions: null
610
+ save_tokens: false
611
+ vqa_eval: ''
612
+ pointing_eval: false
613
+ count_eval: false
614
+ point_count_eval: false
615
+ android_eval: false
616
+ clock_eval: true
617
+ clock_bench_eval: false
618
+ math_vista_eval: false
619
+ temp_compass_eval: ''
620
+ temp_compass_disable_api: false
621
+ video_mme_eval: ''
622
+ mlvu_gen_eval: false
623
+ long_video_bench_eval: false
624
+ plm_fgqa_eval: false
625
+ long_video_bench_caption_eval: false
626
+ vinoground_eval: false
627
+ vixmo_caption_eval: false
628
+ refexp_eval: false
629
+ coco_caption_eval: false
630
+ qv_highlights_eval: false
631
+ tomato: false
632
+ temporal_bench: false
633
+ max_new_tokens: 12
634
+ device_batch_size: 4
635
+ subset_num_batches: null
636
+ max_examples: 2048
637
+ console_log_interval: 20
638
+ include_image: false
639
+ - label: android_control_ll
640
+ data:
641
+ dataset: android_control_ll
642
+ mixture: null
643
+ root_size_mixture: null
644
+ kwargs_mixture: null
645
+ split: validation
646
+ seed: 691203
647
+ pad: to_max
648
+ sequence_length: 1792
649
+ max_text_seq_len: null
650
+ shuffle: true
651
+ start_index: 0
652
+ packing: null
653
+ num_workers: 2
654
+ drop_last: true
655
+ pin_memory: true
656
+ prefetch_factor: null
657
+ persistent_workers: true
658
+ timeout: 0
659
+ evaluator:
660
+ n_to_log: 0
661
+ num_wandb_examples: 32
662
+ save_predictions: null
663
+ save_tokens: false
664
+ vqa_eval: ''
665
+ pointing_eval: false
666
+ count_eval: false
667
+ point_count_eval: false
668
+ android_eval: true
669
+ clock_eval: false
670
+ clock_bench_eval: false
671
+ math_vista_eval: false
672
+ temp_compass_eval: ''
673
+ temp_compass_disable_api: false
674
+ video_mme_eval: ''
675
+ mlvu_gen_eval: false
676
+ long_video_bench_eval: false
677
+ plm_fgqa_eval: false
678
+ long_video_bench_caption_eval: false
679
+ vinoground_eval: false
680
+ vixmo_caption_eval: false
681
+ refexp_eval: false
682
+ coco_caption_eval: false
683
+ qv_highlights_eval: false
684
+ tomato: false
685
+ temporal_bench: false
686
+ max_new_tokens: 16
687
+ device_batch_size: 4
688
+ subset_num_batches: null
689
+ max_examples: 2048
690
+ console_log_interval: 20
691
+ include_image: false
692
+ - label: pointing_eval
693
+ data:
694
+ dataset: pointing_eval
695
+ mixture: null
696
+ root_size_mixture: null
697
+ kwargs_mixture: null
698
+ split: test
699
+ seed: 691203
700
+ pad: to_max
701
+ sequence_length: 1792
702
+ max_text_seq_len: null
703
+ shuffle: true
704
+ start_index: 0
705
+ packing: null
706
+ num_workers: 2
707
+ drop_last: true
708
+ pin_memory: true
709
+ prefetch_factor: null
710
+ persistent_workers: true
711
+ timeout: 0
712
+ evaluator:
713
+ n_to_log: 0
714
+ num_wandb_examples: 32
715
+ save_predictions: null
716
+ save_tokens: false
717
+ vqa_eval: ''
718
+ pointing_eval: true
719
+ count_eval: false
720
+ point_count_eval: false
721
+ android_eval: false
722
+ clock_eval: false
723
+ clock_bench_eval: false
724
+ math_vista_eval: false
725
+ temp_compass_eval: ''
726
+ temp_compass_disable_api: false
727
+ video_mme_eval: ''
728
+ mlvu_gen_eval: false
729
+ long_video_bench_eval: false
730
+ plm_fgqa_eval: false
731
+ long_video_bench_caption_eval: false
732
+ vinoground_eval: false
733
+ vixmo_caption_eval: false
734
+ refexp_eval: false
735
+ coco_caption_eval: false
736
+ qv_highlights_eval: false
737
+ tomato: false
738
+ temporal_bench: false
739
+ max_new_tokens: 192
740
+ device_batch_size: 4
741
+ subset_num_batches: null
742
+ max_examples: 2048
743
+ console_log_interval: 20
744
+ include_image: false
745
+ inf_eval_interval: 2000
746
+ eval_on_last_step: true
747
+ eval_on_load: false
748
+ save_folder: /weka/oe-training-default/sanghol/molmo/models/uber-v1/uber3.4-synthetic-siglip2-qwen3_8b
749
+ checkpointer_config:
750
+ save_thread_count: null
751
+ load_thread_count: null
752
+ pre_download: false
753
+ work_dir: null
754
+ throttle_uploads: false
755
+ canceled_check_interval: 50
756
+ save_interval: 1000
757
+ save_at: null
758
+ save_final_optim: true
759
+ save_num_checkpoints_to_keep: 1
760
+ save_final_unsharded_checkpoint: false
761
+ save_interval_ephemeral: null
762
+ save_overwrite: true
763
+ load_path: null
764
+ reset_optimizer_state: false
765
+ reset_trainer_state: false
766
+ initial_model_checkpoint: /weka/oe-training-default/sanghol/molmo/models/dense-cap-v1/captioner-qwen3_8b/step22347
767
+ allow_resume: true
768
+ max_duration: 30000
769
+ global_train_batch_size: 256
770
+ device_train_microbatch_size: 4
771
+ max_grad_norm: 1.0
772
+ multi_component_grad_norm: true
773
+ batch_divisor: global_batch
774
+ max_grad_norm_ratio: null
775
+ precision: amp_bf16
776
+ wandb:
777
+ project: molmo2-dev
778
+ entity: prior-ai2
779
+ group: uber-v1
780
+ name: uber3.4-synthetic-siglip2-qwen3_8b
781
+ tags:
782
+ - watching
783
+ log_artifacts: false
784
+ rank_zero_only: true
785
+ log_interval: 20
786
+ allow_resume: false
787
+ beaker_log_interval: 50
788
+ speed_monitor:
789
+ window_size: 20
790
+ gpu_flops_available: null
791
+ console_log_interval: 20
792
+ gen1_gc_interval: 1
793
+ compile:
794
+ mode: default
795
+ fullgraph: false
796
+ dynamic: false
797
+ backend: inductor
798
+ activation_checkpointing: true
799
+ fsdp:
800
+ fsdp2: true
801
+ precision: float
802
+ use_orig_params: true
803
+ wrapping_strategy: by_block_and_size
804
+ sharding_strategy: FULL_SHARD
805
+ hybrid_sharding_num_model_replicas: null
806
+ softmax_auxiliary_loss: true
807
+ softmax_auxiliary_loss_scale: 0.0001
808
+ saliency_score_loss_wt: null
809
+ frame_score_loss_wt: null
810
+ frame_score_loss_type: mse
811
+ frame_score_loss_target: 0.7
812
+ time_limit: null
813
+ extra_steps_after_cancel: 10
814
+ python_profiling: false
815
+ torch_profiling: false
816
+ stop_at: 30000
817
+ stop_after: null
818
+ fused_loss: false
819
+ compile_loss: true
820
+ runtime_data:
821
+ args: /gantry-runtime/launch_scripts/train_multitask_model.py 3.4-synthetic /weka/oe-training-default/sanghol/molmo/models/dense-cap-v1/captioner-qwen3_8b/step22347
822
+ --save_overwrite --save_interval=1000 --wandb.group=uber-v1 --wandb.name=uber3.4-synthetic-siglip2-qwen3_8b
823
+ --save_folder=/weka/oe-training-default/sanghol/molmo/models/uber-v1/uber3.4-synthetic-siglip2-qwen3_8b
824
+ hostname: jupiter-cs-aus-109.reviz.ai2.in
825
+ date: 09/13/2025, 07:25
826
+ world_size: 32
827
+ resuming_from: null
828
+ beaker_experiment_id: 01K50Z8VKCVN9Z4X917WEEP9VQ
829
+ beaker_experiment_url: https://beaker.org/ex/01K50Z8VKCVN9Z4X917WEEP9VQ
830
+ wandb_id: xhs8xbpk
831
+ wandb_url: https://wandb.ai/prior-ai2/molmo2-dev/runs/xhs8xbpk
model_and_optim/__0_14.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04ebf7580a878e6b6b25f9bf03dc8597d8ecf9bfbd0c3dd4bb0aa193860bb2c1
3
+ size 193681551
model_and_optim/__0_5.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c4d8ee380591eda64ade07e10fbc2915838318d75ebc412993b85fcaed0c229
3
+ size 193561739
model_and_optim/__0_6.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccbdb8a6f3a059c2a3da60e3320b5f0f19af2315b3302d6367f5fe0c0d2dd5e1
3
+ size 193581259
model_and_optim/__10_11.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e14545b3df6bb160f9b7348806c2d8e711277125ecda2d9dc57725eaecb99e3c
3
+ size 193704827
model_and_optim/__10_14.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:148c41bb91946fae036bc2b2f9f882c677035354a45141ae05cfdb5fc3684a5f
3
+ size 193578101
model_and_optim/__10_2.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb7f0caa2ef2eeb61931f0bbf6974c13f1c07d5f8601285a9162e01d2352d4a9
3
+ size 193461027
model_and_optim/__10_4.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c6f6820e1adb60e03289103e69762c7acb878fb01e5a96382551a83b26b508f
3
+ size 193461636
model_and_optim/__12_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73b07b57f992282ab233063cd08032a803ce6c6021b5cfa6811782d98b6592d2
3
+ size 193467212
model_and_optim/__12_8.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b856c459d4c6897d915b713cf70df4e8dffb0a49a276d3399be8f14c25418558
3
+ size 193401014
model_and_optim/__13_3.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:466278110b0058f39efbf5fe08cb40453f5b6173563e9f01caa48befb99e0e1b
3
+ size 193451811
model_and_optim/__14_14.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a70b1182d865ca32ef338c936bdc259c44d25c2cf6ef09ff85a5a94ecae6853a
3
+ size 193578101
model_and_optim/__14_2.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed0c549dee6732cd6f129c43e6455eb7e6fc55c78fa9a4d3321883c8d95e0648
3
+ size 193461027
model_and_optim/__15_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35bec598dee2e6c081d1623745a076f867c727dbd5630e06ad3bf05b2699770c
3
+ size 193467212
model_and_optim/__15_11.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e79da0e258b60db86b8025c07b993ed35c2ebc79dd99f00da0a2e74d08820f4
3
+ size 193704827
model_and_optim/__15_4.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d82d9a894c446de2bfc3d9ba35dfd32bc15e0c0194db01967d1bcada94e2c568
3
+ size 193461636
model_and_optim/__16_7.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec66a54d4ef5180acba44c094e6110a175ffdc7b4187ed7ef257d5c6f74d2682
3
+ size 193440900
model_and_optim/__18_14.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c71d00cfee908af783bb5c157595795986a93e59a5a8b4cdc4d4252cf4267cd8
3
+ size 193578101
model_and_optim/__19_14.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75826ba0bcab9c19075e4f4c1b40b8d904c1db80c8873fff3b968a42c3887cd2
3
+ size 193578101
model_and_optim/__1_14.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3aa839e1d98da1656b85afa368d740a95c62311670936111f7d321b470b6a195
3
+ size 193578101
model_and_optim/__1_4.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8dface0174b05bb668161ac4e61e7bb64abc6ad4c8ca7d3f122c84aa18c5a9c
3
+ size 193461636
model_and_optim/__1_8.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bbcc23e3f583f537bb5b40706bf137304efb040091b8e52a7186a2db12481a7
3
+ size 193401014
model_and_optim/__1_9.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3f6682e1dba102c645e0c6ccc9a9c758b9d4854a715d13fc9bc3dbff7920e97
3
+ size 193394102
model_and_optim/__20_10.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9c17d9af2524a42c9a9bdd1b531bdf5d75ecc6e5b4688377ae4f4a7f17de5da
3
+ size 193427326
model_and_optim/__21_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfff6720de93ee9c2ff8f1efde3229d6070dcedd51f9dfa07d45a6075acf1f9c
3
+ size 193405798
model_and_optim/__22_9.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbc6f09fd3b0c8d36f73710f8188c71ae3600e2c0434d609345c244c5951c66a
3
+ size 193394102
model_and_optim/__23_15.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc0f98a106de9cad78141f361b19b51c21b341b41aac1c70c2df0a18cb938590
3
+ size 193567308
model_and_optim/__23_3.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d87d535f75281a382008936bc27387d2ab2d4ff3465382bcff7b61be6d58a783
3
+ size 193451811
model_and_optim/__23_6.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e93f60ea866036c45c0929faeaa65668d3a5b1a25f4cb2c14cc5bd71e08b5c1
3
+ size 193445508
model_and_optim/__24_4.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f410c1c76b8672a70b374d69ea6678f4b82b78c56468acc07ddd570a83712604
3
+ size 193461636
model_and_optim/__25_10.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e99a2f1f661117de8cfdf2298d02e6b863ae48fa36c19ee7ff6f8fa67cf40f0
3
+ size 193427326
model_and_optim/__25_15.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7d75adac4dca2715d8d8b252ac39afd9f6b8a268ad7a4156e32ed7f23184dba
3
+ size 193567308
model_and_optim/__25_6.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9786ada91a5609b13330d09ca950716c1fb4280b5e106626e68b97444784fb1e
3
+ size 193445508
model_and_optim/__26_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:916dd8cfcbe1200ce346c68d070fb00ca9e28c573a841546c792282b4899adf1
3
+ size 193467212
model_and_optim/__27_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d059390b063cdbb68876351d35f82feb83799490b8fae054395e0b0cb3d13225
3
+ size 193405798
model_and_optim/__27_2.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d51c741c4cfad841a1d5f37c6f7c90c3ff95f63fea30984d28937312460b4cdc
3
+ size 193461027
model_and_optim/__28_5.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84aef054ca4035e2ca0032722a7058de8ba4f64030338524812070a0fadcb3e5
3
+ size 193457028
model_and_optim/__29_9.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4655890b30b0ce7ff2af03b8378cfa84d5ad7237b7abf063aae8efde98bbb61
3
+ size 193394102
model_and_optim/__30_15.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23822029b0e6d4a7f6e74bda0831e7b64df8a546afb1e9f651037a087a2902ac
3
+ size 193567308
model_and_optim/__31_10.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff5295ce59390a8b2842e1e879a1c774f67bec154f8dfb2813982a18ccd5b895
3
+ size 192763774
model_and_optim/__31_2.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cfa6c1a8fe5add6ab11d8fa4c4b3a3010684f866cccbd2cb34668a8257b5f06
3
+ size 193166115
model_and_optim/__31_7.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81cce0c5804b272b2efa38e2fc4c4335f5e09c695463f60616a5f66f240e7f03
3
+ size 193440900
model_and_optim/__5_12.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d5cf25e01585e94eb927d3082bad45dec8f94f2f7a8643b9c163bc198447e1d
3
+ size 193697915
model_and_optim/__5_13.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5eacba755bb6c4756617c3653321edd3229d4de5d44b89a1d83aef2828faad5
3
+ size 193691003
model_and_optim/__6_7.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d9d1f8a6bca0796bce4c7315b14f585ba088e68efb2d44696dae03c55b57836
3
+ size 193440900
model_and_optim/__6_9.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9454663677d49650300a9b109ed9b85a5d782aa49375ac3d515ed8e0458f375c
3
+ size 193394102
model_and_optim/__7_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0b38a1471fa25b831aa8f8f4bbaae15614b7ce3985e6283a0990670e7e59bcb
3
+ size 193405798
model_and_optim/__7_11.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:245c3ef961fa27a43f67a434e3dd2fbd6cf139dbdeb0952ace67bf82d223b2df
3
+ size 193704827
model_and_optim/__8_13.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:875775552bc1c90ca51ffb4624128576a63d49a3dc709dd9946adbf5b1418438
3
+ size 193691003