diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..8da0b8c1f3ac1a836c0a3c28abe67985c9dc29a0 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,52 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +model_and_optim/__12_1.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__15_11.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__6_7.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__26_1.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__31_2.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__29_9.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__13_3.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__27_0.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__5_13.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__15_1.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__30_15.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__12_8.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__1_8.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__25_10.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__27_2.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__10_11.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__14_2.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__24_4.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__22_9.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__1_14.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__25_15.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__5_12.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__19_14.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__23_3.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__31_7.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__14_14.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__16_7.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__0_5.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__10_14.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__23_6.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__15_4.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__31_10.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__21_0.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__1_4.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__6_9.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__28_5.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__25_6.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__20_10.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__18_14.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__8_4.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__10_4.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__0_14.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__1_9.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__10_2.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__8_13.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__23_15.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__7_11.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__0_6.distcp filter=lfs diff=lfs merge=lfs -text diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8d4c133422604d5afcb7cb542986fe4b174548b3 --- /dev/null +++ b/config.yaml @@ -0,0 +1,831 @@ +run_name: multitask_train +model: + model_name: molmo + llm: + d_model: 4096 + n_heads: 32 + n_kv_heads: 8 + head_dim: null + qkv_bias: false + clip_qkv: null + n_layers: 36 + mlp_ratio: 4 + mlp_hidden_size: 24576 + activation_type: swiglu + block_type: sequential + rope: true + rope_full_precision: true + rope_theta: 1000000.0 + rope_type: default + rope_factor: null + rope_high_freq_factor: null + rope_low_freq_factor: null + rope_original_max_position_embeddings: null + attention_type: sdpa + float32_attention: true + attention_dropout: 0.0 + attention_layer_norm: true + attention_layer_norm_type: qwen3 + residual_dropout: 0.1 + response_residual_dropout: 0.0 + layer_norm_type: rms + layer_norm_with_affine: true + layer_norm_eps: 1.0e-06 + attention_layer_norm_with_affine: true + max_sequence_length: 4096 + max_position_embeddings: null + include_bias: false + bias_for_layer_norm: null + norm_after: false + moe_num_experts: 8 + moe_top_k: 2 + moe_mlp_impl: sparse + moe_log_expert_assignment: false + moe_shared_expert: false + moe_lbl_in_fp32: false + moe_interleave: false + moe_loss_weight: 0.1 + moe_zloss_weight: null + moe_dropless: true + moe_capacity_factor: 1.25 + embedding_dropout: 0.0 + scale_logits: false + vocab_size: 151936 + additional_vocab_size: 128 + weight_tying: false + embedding_size: 151936 + use_position_ids: true + tokenizer: + identifier: Qwen/Qwen3-8B + tokenizer_dir: null + init_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen3-8b.pt + init_incremental: null + new_embedding_init_range: 0.02 + initializer_range: 0.02 + normalize_input_embeds: false + activation_checkpoint: whole_layer + compile: blocks + fix_pad_tokenizer: false + init_std: 0.02 + init_fn: normal + init_cutoff_factor: null + vision_backbone: + vit: + image_model_type: siglip + image_default_input_size: + - 378 + - 378 + image_patch_size: 14 + image_pos_patch_size: 14 + image_emb_dim: 1152 + image_num_heads: 16 + image_num_key_value_heads: 16 + image_num_layers: 27 + image_head_dim: 72 + image_mlp_dim: 4304 + image_mlp_activations: gelu_pytorch_tanh + image_dropout_rate: 0.0 + image_num_pos: 729 + image_norm_eps: 1.0e-06 + attention_dropout: 0.0 + residual_dropout: 0.0 + initializer_range: 0.02 + float32_attention: true + attention_type: sdpa + activation_checkpointing: true + init_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/siglip2-so400m-14-384.pt + resize_mode: siglip + pad_value: 0.0 + normalize: siglip + image_pooling_2d: attention_meanq + pooling_attention_mask: false + image_projector: mlp + image_padding_embed: null + vit_layers: + - -3 + - -9 + skip_unused_layers: true + image_feature_dropout: 0.0 + connector_activation_checkpointing: true + compile_vit: blocks + compile_connector: dynamic + normalize_on_gpu: false + data_formatter: + prompt_templates: uber_model + message_format: role + system_prompt: demo_or_style + always_start_with_space: false + default_inference_len: 65 + select_answer: best + debug: false + image_last: false + format_message_list: null + p_one_message: 0.0 + eval_system_prompt_mapping: null + timestamp_mode: 50-percent-seconds + p_choice_content_in_mc: 1.0 + mm_preprocessor: + crop_mode: overlap-and-resize-c2 + use_col_tokens: true + max_crops: 8 + pooling_w: 2 + pooling_h: 2 + overlap_margins: + - 4 + - 4 + max_images: null + max_multi_image_crops: 4 + max_answer_len: null + last_message_loss_only: false + loss_token_weighting: root_subsegments + max_text_tokens: null + image_padding_mask: false + legacy_image_mask: false + bi_directional_attn: null +seed: 6198 +epoch: null +dry_run: false +ft_llm: true +ft_vit: true +ft_connector: true +ft_embedding: lm_head +optimizer: + name: adamw + learning_rate: 0.0001 + weight_decay: 0.01 + betas: + - 0.9 + - 0.95 + eps: 1.0e-05 + connector_learning_rate: 5.0e-06 + vit_learning_rate: 5.0e-06 + llm_learning_rate: 1.0e-05 + frame_selector_learning_rate: 0.0001 + temporal_token_scorer_learning_rate: 0.0001 + connector_weight_decay: 0.0 + vit_weight_decay: 0.0 + llm_weight_decay: 0.0 + frame_selector_weight_decay: 0.01 + temporal_token_scorer_weight_decay: 0.01 + connector_betas: + - 0.9 + - 0.95 + vit_betas: + - 0.9 + - 0.95 + llm_betas: + - 0.9 + - 0.95 + frame_selector_betas: + - 0.9 + - 0.95 + temporal_token_scorer_betas: + - 0.9 + - 0.95 + connector_eps: 1.0e-06 + vit_eps: 1.0e-06 + llm_eps: 1.0e-06 + frame_selector_eps: 1.0e-06 + temporal_token_scorer_eps: 1.0e-06 + metrics_log_interval: -1 +scheduler: + name: multimodal + units: steps + t_warmup: 100 + t_max: null + alpha_f: 0.1 + connector_t_warmup: 200 + vit_t_warmup: 200 + llm_t_warmup: 200 + frame_selector_t_warmup: 200 + temporal_token_scorer_t_warmup: 200 + grad_clip_warmup_steps: null + grad_clip_warmup_factor: null + warmup_min_lr: 0.0 +data: + dataset: null + mixture: null + root_size_mixture: + - rate: 0.15 + mixture: + pixmo_ask_model_anything: null + pixmo_cap: 50000.0 + pixmo_cap_qa_as_user_qa: null + pixmo_pointing_explanations: null + - rate: 0.5 + mixture: + coco_2014_vqa_multi: null + text_vqa: null + okvqa: null + chart_qa_weighted: null + doc_qa: null + info_qa: null + ai2_diagram_v2_mix_transparent: null + a_okvqa_mc: null + a_okvqa_da: null + android_control: null + science_qa_img: null + tabwmp_da: null + st_qa: null + tally_qa: null + pixmo_clocks: 250000.0 + dv_qa: 10000.0 + figure_qa: 10000.0 + plot_qa: 20000.0 + cosyn_chart_exp: null + cosyn_chemical_exp: null + cosyn_diagram_exp: null + cosyn_document: null + cosyn_math_exp: null + cosyn_music_exp: null + cosyn_table_exp: null + - rate: 0.35 + mixture: + pixmo_points_train: null + pixmo_count_train: null + pixmo_points_high_freq_train: null + cosyn_point: null + kwargs_mixture: null + split: train + seed: 50189 + pad: to_max + sequence_length: 2304 + max_text_seq_len: null + shuffle: true + start_index: 0 + packing: null + num_workers: 2 + drop_last: true + pin_memory: true + prefetch_factor: null + persistent_workers: false + timeout: 0 +restore_dataloader: true +fast_forward_batches: null +evaluators: [] +eval_interval: 2000 +inf_evaluators: +- label: chart_qa + data: + dataset: chart_qa + mixture: null + root_size_mixture: null + kwargs_mixture: null + split: validation + seed: 691203 + pad: to_max + sequence_length: 1792 + max_text_seq_len: null + shuffle: true + start_index: 0 + packing: null + num_workers: 2 + drop_last: true + pin_memory: true + prefetch_factor: null + persistent_workers: true + timeout: 0 + evaluator: + n_to_log: 0 + num_wandb_examples: 32 + save_predictions: null + save_tokens: false + vqa_eval: relaxed_correctness,scifi_relaxed_correctness,em + pointing_eval: false + count_eval: false + point_count_eval: false + android_eval: false + clock_eval: false + clock_bench_eval: false + math_vista_eval: false + temp_compass_eval: '' + temp_compass_disable_api: false + video_mme_eval: '' + mlvu_gen_eval: false + long_video_bench_eval: false + plm_fgqa_eval: false + long_video_bench_caption_eval: false + vinoground_eval: false + vixmo_caption_eval: false + refexp_eval: false + coco_caption_eval: false + qv_highlights_eval: false + tomato: false + temporal_bench: false + max_new_tokens: 12 + device_batch_size: 4 + subset_num_batches: null + max_examples: 2048 + console_log_interval: 20 + include_image: false +- label: chart_qa_exp + data: + dataset: chart_qa_exp + mixture: null + root_size_mixture: null + kwargs_mixture: null + split: validation + seed: 691203 + pad: to_max + sequence_length: 1792 + max_text_seq_len: null + shuffle: true + start_index: 0 + packing: null + num_workers: 2 + drop_last: true + pin_memory: true + prefetch_factor: null + persistent_workers: true + timeout: 0 + evaluator: + n_to_log: 0 + num_wandb_examples: 32 + save_predictions: null + save_tokens: false + vqa_eval: relaxed_correctness,scifi_relaxed_correctness,em + pointing_eval: false + count_eval: false + point_count_eval: false + android_eval: false + clock_eval: false + clock_bench_eval: false + math_vista_eval: false + temp_compass_eval: '' + temp_compass_disable_api: false + video_mme_eval: '' + mlvu_gen_eval: false + long_video_bench_eval: false + plm_fgqa_eval: false + long_video_bench_caption_eval: false + vinoground_eval: false + vixmo_caption_eval: false + refexp_eval: false + coco_caption_eval: false + qv_highlights_eval: false + tomato: false + temporal_bench: false + max_new_tokens: 256 + device_batch_size: 4 + subset_num_batches: null + max_examples: 2048 + console_log_interval: 20 + include_image: false +- label: info_qa + data: + dataset: info_qa + mixture: null + root_size_mixture: null + kwargs_mixture: null + split: validation + seed: 691203 + pad: to_max + sequence_length: 1792 + max_text_seq_len: null + shuffle: true + start_index: 0 + packing: null + num_workers: 2 + drop_last: true + pin_memory: true + prefetch_factor: null + persistent_workers: true + timeout: 0 + evaluator: + n_to_log: 0 + num_wandb_examples: 32 + save_predictions: null + save_tokens: false + vqa_eval: ansl,em + pointing_eval: false + count_eval: false + point_count_eval: false + android_eval: false + clock_eval: false + clock_bench_eval: false + math_vista_eval: false + temp_compass_eval: '' + temp_compass_disable_api: false + video_mme_eval: '' + mlvu_gen_eval: false + long_video_bench_eval: false + plm_fgqa_eval: false + long_video_bench_caption_eval: false + vinoground_eval: false + vixmo_caption_eval: false + refexp_eval: false + coco_caption_eval: false + qv_highlights_eval: false + tomato: false + temporal_bench: false + max_new_tokens: 12 + device_batch_size: 4 + subset_num_batches: null + max_examples: 2048 + console_log_interval: 20 + include_image: false +- label: doc_qa + data: + dataset: doc_qa + mixture: null + root_size_mixture: null + kwargs_mixture: null + split: validation + seed: 691203 + pad: to_max + sequence_length: 1792 + max_text_seq_len: null + shuffle: true + start_index: 0 + packing: null + num_workers: 2 + drop_last: true + pin_memory: true + prefetch_factor: null + persistent_workers: true + timeout: 0 + evaluator: + n_to_log: 0 + num_wandb_examples: 32 + save_predictions: null + save_tokens: false + vqa_eval: ansl,em + pointing_eval: false + count_eval: false + point_count_eval: false + android_eval: false + clock_eval: false + clock_bench_eval: false + math_vista_eval: false + temp_compass_eval: '' + temp_compass_disable_api: false + video_mme_eval: '' + mlvu_gen_eval: false + long_video_bench_eval: false + plm_fgqa_eval: false + long_video_bench_caption_eval: false + vinoground_eval: false + vixmo_caption_eval: false + refexp_eval: false + coco_caption_eval: false + qv_highlights_eval: false + tomato: false + temporal_bench: false + max_new_tokens: 12 + device_batch_size: 4 + subset_num_batches: null + max_examples: 2048 + console_log_interval: 20 + include_image: false +- label: ai2_diagram + data: + dataset: ai2_diagram_v2_mix_transparent + mixture: null + root_size_mixture: null + kwargs_mixture: null + split: validation + seed: 691203 + pad: to_max + sequence_length: 1792 + max_text_seq_len: null + shuffle: true + start_index: 0 + packing: null + num_workers: 2 + drop_last: true + pin_memory: true + prefetch_factor: null + persistent_workers: true + timeout: 0 + evaluator: + n_to_log: 0 + num_wandb_examples: 32 + save_predictions: null + save_tokens: false + vqa_eval: mc_ai2d_opaque,mc_ai2d_transparent + pointing_eval: false + count_eval: false + point_count_eval: false + android_eval: false + clock_eval: false + clock_bench_eval: false + math_vista_eval: false + temp_compass_eval: '' + temp_compass_disable_api: false + video_mme_eval: '' + mlvu_gen_eval: false + long_video_bench_eval: false + plm_fgqa_eval: false + long_video_bench_caption_eval: false + vinoground_eval: false + vixmo_caption_eval: false + refexp_eval: false + coco_caption_eval: false + qv_highlights_eval: false + tomato: false + temporal_bench: false + max_new_tokens: 12 + device_batch_size: 4 + subset_num_batches: null + max_examples: 2048 + console_log_interval: 20 + include_image: false +- label: coco_2014_vqa + data: + dataset: coco_2014_vqa + mixture: null + root_size_mixture: null + kwargs_mixture: null + split: validation + seed: 691203 + pad: to_max + sequence_length: 1792 + max_text_seq_len: null + shuffle: true + start_index: 0 + packing: null + num_workers: 2 + drop_last: true + pin_memory: true + prefetch_factor: null + persistent_workers: true + timeout: 0 + evaluator: + n_to_log: 0 + num_wandb_examples: 32 + save_predictions: null + save_tokens: false + vqa_eval: vqa_score + pointing_eval: false + count_eval: false + point_count_eval: false + android_eval: false + clock_eval: false + clock_bench_eval: false + math_vista_eval: false + temp_compass_eval: '' + temp_compass_disable_api: false + video_mme_eval: '' + mlvu_gen_eval: false + long_video_bench_eval: false + plm_fgqa_eval: false + long_video_bench_caption_eval: false + vinoground_eval: false + vixmo_caption_eval: false + refexp_eval: false + coco_caption_eval: false + qv_highlights_eval: false + tomato: false + temporal_bench: false + max_new_tokens: 12 + device_batch_size: 4 + subset_num_batches: null + max_examples: 2048 + console_log_interval: 20 + include_image: false +- label: pixmo_clocks + data: + dataset: pixmo_clocks + mixture: null + root_size_mixture: null + kwargs_mixture: null + split: validation + seed: 691203 + pad: to_max + sequence_length: 1792 + max_text_seq_len: null + shuffle: true + start_index: 0 + packing: null + num_workers: 2 + drop_last: true + pin_memory: true + prefetch_factor: null + persistent_workers: true + timeout: 0 + evaluator: + n_to_log: 0 + num_wandb_examples: 32 + save_predictions: null + save_tokens: false + vqa_eval: '' + pointing_eval: false + count_eval: false + point_count_eval: false + android_eval: false + clock_eval: true + clock_bench_eval: false + math_vista_eval: false + temp_compass_eval: '' + temp_compass_disable_api: false + video_mme_eval: '' + mlvu_gen_eval: false + long_video_bench_eval: false + plm_fgqa_eval: false + long_video_bench_caption_eval: false + vinoground_eval: false + vixmo_caption_eval: false + refexp_eval: false + coco_caption_eval: false + qv_highlights_eval: false + tomato: false + temporal_bench: false + max_new_tokens: 12 + device_batch_size: 4 + subset_num_batches: null + max_examples: 2048 + console_log_interval: 20 + include_image: false +- label: android_control_ll + data: + dataset: android_control_ll + mixture: null + root_size_mixture: null + kwargs_mixture: null + split: validation + seed: 691203 + pad: to_max + sequence_length: 1792 + max_text_seq_len: null + shuffle: true + start_index: 0 + packing: null + num_workers: 2 + drop_last: true + pin_memory: true + prefetch_factor: null + persistent_workers: true + timeout: 0 + evaluator: + n_to_log: 0 + num_wandb_examples: 32 + save_predictions: null + save_tokens: false + vqa_eval: '' + pointing_eval: false + count_eval: false + point_count_eval: false + android_eval: true + clock_eval: false + clock_bench_eval: false + math_vista_eval: false + temp_compass_eval: '' + temp_compass_disable_api: false + video_mme_eval: '' + mlvu_gen_eval: false + long_video_bench_eval: false + plm_fgqa_eval: false + long_video_bench_caption_eval: false + vinoground_eval: false + vixmo_caption_eval: false + refexp_eval: false + coco_caption_eval: false + qv_highlights_eval: false + tomato: false + temporal_bench: false + max_new_tokens: 16 + device_batch_size: 4 + subset_num_batches: null + max_examples: 2048 + console_log_interval: 20 + include_image: false +- label: pointing_eval + data: + dataset: pointing_eval + mixture: null + root_size_mixture: null + kwargs_mixture: null + split: test + seed: 691203 + pad: to_max + sequence_length: 1792 + max_text_seq_len: null + shuffle: true + start_index: 0 + packing: null + num_workers: 2 + drop_last: true + pin_memory: true + prefetch_factor: null + persistent_workers: true + timeout: 0 + evaluator: + n_to_log: 0 + num_wandb_examples: 32 + save_predictions: null + save_tokens: false + vqa_eval: '' + pointing_eval: true + count_eval: false + point_count_eval: false + android_eval: false + clock_eval: false + clock_bench_eval: false + math_vista_eval: false + temp_compass_eval: '' + temp_compass_disable_api: false + video_mme_eval: '' + mlvu_gen_eval: false + long_video_bench_eval: false + plm_fgqa_eval: false + long_video_bench_caption_eval: false + vinoground_eval: false + vixmo_caption_eval: false + refexp_eval: false + coco_caption_eval: false + qv_highlights_eval: false + tomato: false + temporal_bench: false + max_new_tokens: 192 + device_batch_size: 4 + subset_num_batches: null + max_examples: 2048 + console_log_interval: 20 + include_image: false +inf_eval_interval: 2000 +eval_on_last_step: true +eval_on_load: false +save_folder: /weka/oe-training-default/sanghol/molmo/models/uber-v1/uber3.4-synthetic-siglip2-qwen3_8b +checkpointer_config: + save_thread_count: null + load_thread_count: null + pre_download: false + work_dir: null + throttle_uploads: false +canceled_check_interval: 50 +save_interval: 1000 +save_at: null +save_final_optim: true +save_num_checkpoints_to_keep: 1 +save_final_unsharded_checkpoint: false +save_interval_ephemeral: null +save_overwrite: true +load_path: null +reset_optimizer_state: false +reset_trainer_state: false +initial_model_checkpoint: /weka/oe-training-default/sanghol/molmo/models/dense-cap-v1/captioner-qwen3_8b/step22347 +allow_resume: true +max_duration: 30000 +global_train_batch_size: 256 +device_train_microbatch_size: 4 +max_grad_norm: 1.0 +multi_component_grad_norm: true +batch_divisor: global_batch +max_grad_norm_ratio: null +precision: amp_bf16 +wandb: + project: molmo2-dev + entity: prior-ai2 + group: uber-v1 + name: uber3.4-synthetic-siglip2-qwen3_8b + tags: + - watching + log_artifacts: false + rank_zero_only: true + log_interval: 20 + allow_resume: false +beaker_log_interval: 50 +speed_monitor: + window_size: 20 + gpu_flops_available: null +console_log_interval: 20 +gen1_gc_interval: 1 +compile: + mode: default + fullgraph: false + dynamic: false + backend: inductor +activation_checkpointing: true +fsdp: + fsdp2: true + precision: float + use_orig_params: true + wrapping_strategy: by_block_and_size + sharding_strategy: FULL_SHARD + hybrid_sharding_num_model_replicas: null +softmax_auxiliary_loss: true +softmax_auxiliary_loss_scale: 0.0001 +saliency_score_loss_wt: null +frame_score_loss_wt: null +frame_score_loss_type: mse +frame_score_loss_target: 0.7 +time_limit: null +extra_steps_after_cancel: 10 +python_profiling: false +torch_profiling: false +stop_at: 30000 +stop_after: null +fused_loss: false +compile_loss: true +runtime_data: + args: /gantry-runtime/launch_scripts/train_multitask_model.py 3.4-synthetic /weka/oe-training-default/sanghol/molmo/models/dense-cap-v1/captioner-qwen3_8b/step22347 + --save_overwrite --save_interval=1000 --wandb.group=uber-v1 --wandb.name=uber3.4-synthetic-siglip2-qwen3_8b + --save_folder=/weka/oe-training-default/sanghol/molmo/models/uber-v1/uber3.4-synthetic-siglip2-qwen3_8b + hostname: jupiter-cs-aus-109.reviz.ai2.in + date: 09/13/2025, 07:25 + world_size: 32 + resuming_from: null + beaker_experiment_id: 01K50Z8VKCVN9Z4X917WEEP9VQ + beaker_experiment_url: https://beaker.org/ex/01K50Z8VKCVN9Z4X917WEEP9VQ + wandb_id: xhs8xbpk + wandb_url: https://wandb.ai/prior-ai2/molmo2-dev/runs/xhs8xbpk diff --git a/model_and_optim/__0_14.distcp b/model_and_optim/__0_14.distcp new file mode 100644 index 0000000000000000000000000000000000000000..9ee9161eba379da8675e24d80b9f04075735396a --- /dev/null +++ b/model_and_optim/__0_14.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04ebf7580a878e6b6b25f9bf03dc8597d8ecf9bfbd0c3dd4bb0aa193860bb2c1 +size 193681551 diff --git a/model_and_optim/__0_5.distcp b/model_and_optim/__0_5.distcp new file mode 100644 index 0000000000000000000000000000000000000000..270b732140361719a30e76719cfb179f7cbd7fb9 --- /dev/null +++ b/model_and_optim/__0_5.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c4d8ee380591eda64ade07e10fbc2915838318d75ebc412993b85fcaed0c229 +size 193561739 diff --git a/model_and_optim/__0_6.distcp b/model_and_optim/__0_6.distcp new file mode 100644 index 0000000000000000000000000000000000000000..195010dd7f0eece899ac10a0c3d7cb378f96eab0 --- /dev/null +++ b/model_and_optim/__0_6.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccbdb8a6f3a059c2a3da60e3320b5f0f19af2315b3302d6367f5fe0c0d2dd5e1 +size 193581259 diff --git a/model_and_optim/__10_11.distcp b/model_and_optim/__10_11.distcp new file mode 100644 index 0000000000000000000000000000000000000000..00c4725ee754eab531696db3bfef5441538d46bf --- /dev/null +++ b/model_and_optim/__10_11.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e14545b3df6bb160f9b7348806c2d8e711277125ecda2d9dc57725eaecb99e3c +size 193704827 diff --git a/model_and_optim/__10_14.distcp b/model_and_optim/__10_14.distcp new file mode 100644 index 0000000000000000000000000000000000000000..cef2f5289c71f6f59c29c3a28e66cedfc6ca5b5d --- /dev/null +++ b/model_and_optim/__10_14.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:148c41bb91946fae036bc2b2f9f882c677035354a45141ae05cfdb5fc3684a5f +size 193578101 diff --git a/model_and_optim/__10_2.distcp b/model_and_optim/__10_2.distcp new file mode 100644 index 0000000000000000000000000000000000000000..0e4ecc5a2ec255e2a6e0c6a41e7d33a0242ffacd --- /dev/null +++ b/model_and_optim/__10_2.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb7f0caa2ef2eeb61931f0bbf6974c13f1c07d5f8601285a9162e01d2352d4a9 +size 193461027 diff --git a/model_and_optim/__10_4.distcp b/model_and_optim/__10_4.distcp new file mode 100644 index 0000000000000000000000000000000000000000..db5ae166adeca474f80b8fd38220adc79ccf0ae0 --- /dev/null +++ b/model_and_optim/__10_4.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c6f6820e1adb60e03289103e69762c7acb878fb01e5a96382551a83b26b508f +size 193461636 diff --git a/model_and_optim/__12_1.distcp b/model_and_optim/__12_1.distcp new file mode 100644 index 0000000000000000000000000000000000000000..45a3390d984c9ea1ffedf00a9386bc847898df45 --- /dev/null +++ b/model_and_optim/__12_1.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73b07b57f992282ab233063cd08032a803ce6c6021b5cfa6811782d98b6592d2 +size 193467212 diff --git a/model_and_optim/__12_8.distcp b/model_and_optim/__12_8.distcp new file mode 100644 index 0000000000000000000000000000000000000000..b57882dbfb1acaa09fe4f43bfe2e1530f8d77cd9 --- /dev/null +++ b/model_and_optim/__12_8.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b856c459d4c6897d915b713cf70df4e8dffb0a49a276d3399be8f14c25418558 +size 193401014 diff --git a/model_and_optim/__13_3.distcp b/model_and_optim/__13_3.distcp new file mode 100644 index 0000000000000000000000000000000000000000..958538fba01c38d12006b5d25520431e517bccd3 --- /dev/null +++ b/model_and_optim/__13_3.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:466278110b0058f39efbf5fe08cb40453f5b6173563e9f01caa48befb99e0e1b +size 193451811 diff --git a/model_and_optim/__14_14.distcp b/model_and_optim/__14_14.distcp new file mode 100644 index 0000000000000000000000000000000000000000..f1464556b15b6ac3ec1fa049bea8fbd179c9f240 --- /dev/null +++ b/model_and_optim/__14_14.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a70b1182d865ca32ef338c936bdc259c44d25c2cf6ef09ff85a5a94ecae6853a +size 193578101 diff --git a/model_and_optim/__14_2.distcp b/model_and_optim/__14_2.distcp new file mode 100644 index 0000000000000000000000000000000000000000..6f02b53b99b40b6e7c6ea90dbd11f7c8083003d1 --- /dev/null +++ b/model_and_optim/__14_2.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed0c549dee6732cd6f129c43e6455eb7e6fc55c78fa9a4d3321883c8d95e0648 +size 193461027 diff --git a/model_and_optim/__15_1.distcp b/model_and_optim/__15_1.distcp new file mode 100644 index 0000000000000000000000000000000000000000..428ff2a60e883f4f0650f70dfd50b956449180d1 --- /dev/null +++ b/model_and_optim/__15_1.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35bec598dee2e6c081d1623745a076f867c727dbd5630e06ad3bf05b2699770c +size 193467212 diff --git a/model_and_optim/__15_11.distcp b/model_and_optim/__15_11.distcp new file mode 100644 index 0000000000000000000000000000000000000000..7b960412621c3f4d58eaba083921fbf6b7c441f2 --- /dev/null +++ b/model_and_optim/__15_11.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e79da0e258b60db86b8025c07b993ed35c2ebc79dd99f00da0a2e74d08820f4 +size 193704827 diff --git a/model_and_optim/__15_4.distcp b/model_and_optim/__15_4.distcp new file mode 100644 index 0000000000000000000000000000000000000000..fee8b6dc6b253288cb4e25752da3162168dbbbd3 --- /dev/null +++ b/model_and_optim/__15_4.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d82d9a894c446de2bfc3d9ba35dfd32bc15e0c0194db01967d1bcada94e2c568 +size 193461636 diff --git a/model_and_optim/__16_7.distcp b/model_and_optim/__16_7.distcp new file mode 100644 index 0000000000000000000000000000000000000000..0a74374dc2f6b2d7ef1958383633ccd7b7932fa7 --- /dev/null +++ b/model_and_optim/__16_7.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec66a54d4ef5180acba44c094e6110a175ffdc7b4187ed7ef257d5c6f74d2682 +size 193440900 diff --git a/model_and_optim/__18_14.distcp b/model_and_optim/__18_14.distcp new file mode 100644 index 0000000000000000000000000000000000000000..cf81b5f928359cd738b2da28a98f02bebe877f38 --- /dev/null +++ b/model_and_optim/__18_14.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c71d00cfee908af783bb5c157595795986a93e59a5a8b4cdc4d4252cf4267cd8 +size 193578101 diff --git a/model_and_optim/__19_14.distcp b/model_and_optim/__19_14.distcp new file mode 100644 index 0000000000000000000000000000000000000000..54de8502e245511a02e7363e07588adb77dbca3b --- /dev/null +++ b/model_and_optim/__19_14.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75826ba0bcab9c19075e4f4c1b40b8d904c1db80c8873fff3b968a42c3887cd2 +size 193578101 diff --git a/model_and_optim/__1_14.distcp b/model_and_optim/__1_14.distcp new file mode 100644 index 0000000000000000000000000000000000000000..e913cc07c16845655bce18435fd76f68b8c5a3cc --- /dev/null +++ b/model_and_optim/__1_14.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aa839e1d98da1656b85afa368d740a95c62311670936111f7d321b470b6a195 +size 193578101 diff --git a/model_and_optim/__1_4.distcp b/model_and_optim/__1_4.distcp new file mode 100644 index 0000000000000000000000000000000000000000..8cb85027057647fbb4c1e08f5f6a982f532a67ce --- /dev/null +++ b/model_and_optim/__1_4.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8dface0174b05bb668161ac4e61e7bb64abc6ad4c8ca7d3f122c84aa18c5a9c +size 193461636 diff --git a/model_and_optim/__1_8.distcp b/model_and_optim/__1_8.distcp new file mode 100644 index 0000000000000000000000000000000000000000..879d9f0082aaa0cad02026fed35b0822ef4c2880 --- /dev/null +++ b/model_and_optim/__1_8.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bbcc23e3f583f537bb5b40706bf137304efb040091b8e52a7186a2db12481a7 +size 193401014 diff --git a/model_and_optim/__1_9.distcp b/model_and_optim/__1_9.distcp new file mode 100644 index 0000000000000000000000000000000000000000..5219c1a4ee729b9dcd63d4e354f6660fa5f42eab --- /dev/null +++ b/model_and_optim/__1_9.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3f6682e1dba102c645e0c6ccc9a9c758b9d4854a715d13fc9bc3dbff7920e97 +size 193394102 diff --git a/model_and_optim/__20_10.distcp b/model_and_optim/__20_10.distcp new file mode 100644 index 0000000000000000000000000000000000000000..43097d6da744c6dc63d92a6931bda42b21e4aad3 --- /dev/null +++ b/model_and_optim/__20_10.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9c17d9af2524a42c9a9bdd1b531bdf5d75ecc6e5b4688377ae4f4a7f17de5da +size 193427326 diff --git a/model_and_optim/__21_0.distcp b/model_and_optim/__21_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..408cc5ec27fde00666b0a4265f9ba5e54ae91027 --- /dev/null +++ b/model_and_optim/__21_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfff6720de93ee9c2ff8f1efde3229d6070dcedd51f9dfa07d45a6075acf1f9c +size 193405798 diff --git a/model_and_optim/__22_9.distcp b/model_and_optim/__22_9.distcp new file mode 100644 index 0000000000000000000000000000000000000000..86dd8390ccce7dfcb89944e9b14fb6625b81eb96 --- /dev/null +++ b/model_and_optim/__22_9.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbc6f09fd3b0c8d36f73710f8188c71ae3600e2c0434d609345c244c5951c66a +size 193394102 diff --git a/model_and_optim/__23_15.distcp b/model_and_optim/__23_15.distcp new file mode 100644 index 0000000000000000000000000000000000000000..6a6bb8aeeb2d400073d271af273a584cc4bf3f83 --- /dev/null +++ b/model_and_optim/__23_15.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc0f98a106de9cad78141f361b19b51c21b341b41aac1c70c2df0a18cb938590 +size 193567308 diff --git a/model_and_optim/__23_3.distcp b/model_and_optim/__23_3.distcp new file mode 100644 index 0000000000000000000000000000000000000000..d6982bbeddd7dd8a71b7257ea54acdf854e14252 --- /dev/null +++ b/model_and_optim/__23_3.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d87d535f75281a382008936bc27387d2ab2d4ff3465382bcff7b61be6d58a783 +size 193451811 diff --git a/model_and_optim/__23_6.distcp b/model_and_optim/__23_6.distcp new file mode 100644 index 0000000000000000000000000000000000000000..25f12a920a4158ea4a8f491e311ba979f82e0788 --- /dev/null +++ b/model_and_optim/__23_6.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e93f60ea866036c45c0929faeaa65668d3a5b1a25f4cb2c14cc5bd71e08b5c1 +size 193445508 diff --git a/model_and_optim/__24_4.distcp b/model_and_optim/__24_4.distcp new file mode 100644 index 0000000000000000000000000000000000000000..309c6b810afedca08712904e7a2bfee083c580c2 --- /dev/null +++ b/model_and_optim/__24_4.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f410c1c76b8672a70b374d69ea6678f4b82b78c56468acc07ddd570a83712604 +size 193461636 diff --git a/model_and_optim/__25_10.distcp b/model_and_optim/__25_10.distcp new file mode 100644 index 0000000000000000000000000000000000000000..884c53f3243ee8fe3e58137f21a9e216b3c7b94b --- /dev/null +++ b/model_and_optim/__25_10.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e99a2f1f661117de8cfdf2298d02e6b863ae48fa36c19ee7ff6f8fa67cf40f0 +size 193427326 diff --git a/model_and_optim/__25_15.distcp b/model_and_optim/__25_15.distcp new file mode 100644 index 0000000000000000000000000000000000000000..63fc00835fc20c1f52975e8d492348fff4360fda --- /dev/null +++ b/model_and_optim/__25_15.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7d75adac4dca2715d8d8b252ac39afd9f6b8a268ad7a4156e32ed7f23184dba +size 193567308 diff --git a/model_and_optim/__25_6.distcp b/model_and_optim/__25_6.distcp new file mode 100644 index 0000000000000000000000000000000000000000..7f34d93dbee5110457315c0d8921c4ddd2638b0c --- /dev/null +++ b/model_and_optim/__25_6.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9786ada91a5609b13330d09ca950716c1fb4280b5e106626e68b97444784fb1e +size 193445508 diff --git a/model_and_optim/__26_1.distcp b/model_and_optim/__26_1.distcp new file mode 100644 index 0000000000000000000000000000000000000000..0c6cddc59745c1d32c325a590271734a9c5bce18 --- /dev/null +++ b/model_and_optim/__26_1.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:916dd8cfcbe1200ce346c68d070fb00ca9e28c573a841546c792282b4899adf1 +size 193467212 diff --git a/model_and_optim/__27_0.distcp b/model_and_optim/__27_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..76f5d737686d6697ada64e74918952042f2c1c6d --- /dev/null +++ b/model_and_optim/__27_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d059390b063cdbb68876351d35f82feb83799490b8fae054395e0b0cb3d13225 +size 193405798 diff --git a/model_and_optim/__27_2.distcp b/model_and_optim/__27_2.distcp new file mode 100644 index 0000000000000000000000000000000000000000..5e10bba8cc1044dab2ed95ee9d6c810051bea357 --- /dev/null +++ b/model_and_optim/__27_2.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d51c741c4cfad841a1d5f37c6f7c90c3ff95f63fea30984d28937312460b4cdc +size 193461027 diff --git a/model_and_optim/__28_5.distcp b/model_and_optim/__28_5.distcp new file mode 100644 index 0000000000000000000000000000000000000000..78d60acd5bcf858c7002cf197832ff0f302141a1 --- /dev/null +++ b/model_and_optim/__28_5.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84aef054ca4035e2ca0032722a7058de8ba4f64030338524812070a0fadcb3e5 +size 193457028 diff --git a/model_and_optim/__29_9.distcp b/model_and_optim/__29_9.distcp new file mode 100644 index 0000000000000000000000000000000000000000..628e31997e2767bccb166e8b7212f5af9bc3fc0a --- /dev/null +++ b/model_and_optim/__29_9.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4655890b30b0ce7ff2af03b8378cfa84d5ad7237b7abf063aae8efde98bbb61 +size 193394102 diff --git a/model_and_optim/__30_15.distcp b/model_and_optim/__30_15.distcp new file mode 100644 index 0000000000000000000000000000000000000000..0b113aab99f5c2f1e9129231e931ffa048135200 --- /dev/null +++ b/model_and_optim/__30_15.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23822029b0e6d4a7f6e74bda0831e7b64df8a546afb1e9f651037a087a2902ac +size 193567308 diff --git a/model_and_optim/__31_10.distcp b/model_and_optim/__31_10.distcp new file mode 100644 index 0000000000000000000000000000000000000000..2fa21f3d4f6ebb59ed1b675c0c0a258f78cffae5 --- /dev/null +++ b/model_and_optim/__31_10.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff5295ce59390a8b2842e1e879a1c774f67bec154f8dfb2813982a18ccd5b895 +size 192763774 diff --git a/model_and_optim/__31_2.distcp b/model_and_optim/__31_2.distcp new file mode 100644 index 0000000000000000000000000000000000000000..15b2905d050f5563683f22fd6f6eef2183685387 --- /dev/null +++ b/model_and_optim/__31_2.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cfa6c1a8fe5add6ab11d8fa4c4b3a3010684f866cccbd2cb34668a8257b5f06 +size 193166115 diff --git a/model_and_optim/__31_7.distcp b/model_and_optim/__31_7.distcp new file mode 100644 index 0000000000000000000000000000000000000000..5d53d330efc6f66d9e44551c5d126f9bea83f07e --- /dev/null +++ b/model_and_optim/__31_7.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81cce0c5804b272b2efa38e2fc4c4335f5e09c695463f60616a5f66f240e7f03 +size 193440900 diff --git a/model_and_optim/__5_12.distcp b/model_and_optim/__5_12.distcp new file mode 100644 index 0000000000000000000000000000000000000000..f7a9c63fbb2e60e2ac3202ca2637c689986e095d --- /dev/null +++ b/model_and_optim/__5_12.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d5cf25e01585e94eb927d3082bad45dec8f94f2f7a8643b9c163bc198447e1d +size 193697915 diff --git a/model_and_optim/__5_13.distcp b/model_and_optim/__5_13.distcp new file mode 100644 index 0000000000000000000000000000000000000000..dcbec083261360b7735738080b556d8b3f7129c8 --- /dev/null +++ b/model_and_optim/__5_13.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5eacba755bb6c4756617c3653321edd3229d4de5d44b89a1d83aef2828faad5 +size 193691003 diff --git a/model_and_optim/__6_7.distcp b/model_and_optim/__6_7.distcp new file mode 100644 index 0000000000000000000000000000000000000000..a0a399cd24d7b6d7af4595a00ed50c9c40c372fb --- /dev/null +++ b/model_and_optim/__6_7.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d9d1f8a6bca0796bce4c7315b14f585ba088e68efb2d44696dae03c55b57836 +size 193440900 diff --git a/model_and_optim/__6_9.distcp b/model_and_optim/__6_9.distcp new file mode 100644 index 0000000000000000000000000000000000000000..f19b75ec040a72e169e8cbfab80fdc77af7ba655 --- /dev/null +++ b/model_and_optim/__6_9.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9454663677d49650300a9b109ed9b85a5d782aa49375ac3d515ed8e0458f375c +size 193394102 diff --git a/model_and_optim/__7_0.distcp b/model_and_optim/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..ab8c9bef2632d421c7c0660aabf3cf83aec5621c --- /dev/null +++ b/model_and_optim/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0b38a1471fa25b831aa8f8f4bbaae15614b7ce3985e6283a0990670e7e59bcb +size 193405798 diff --git a/model_and_optim/__7_11.distcp b/model_and_optim/__7_11.distcp new file mode 100644 index 0000000000000000000000000000000000000000..67e4171f2c6241b1d755ffd12a1d0e2dfd70196a --- /dev/null +++ b/model_and_optim/__7_11.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:245c3ef961fa27a43f67a434e3dd2fbd6cf139dbdeb0952ace67bf82d223b2df +size 193704827 diff --git a/model_and_optim/__8_13.distcp b/model_and_optim/__8_13.distcp new file mode 100644 index 0000000000000000000000000000000000000000..330168a5efbe51b060716386143f55a28fa2120c --- /dev/null +++ b/model_and_optim/__8_13.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:875775552bc1c90ca51ffb4624128576a63d49a3dc709dd9946adbf5b1418438 +size 193691003 diff --git a/model_and_optim/__8_4.distcp b/model_and_optim/__8_4.distcp new file mode 100644 index 0000000000000000000000000000000000000000..56863430ef251b5c221d8015c9327be44b985492 --- /dev/null +++ b/model_and_optim/__8_4.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65454079e8329991c5a4ff17ef5bff264f465eb32ae8e4e25b99ba54714ccdad +size 193461636