diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..7f6703f5edc05413181c24a239a183301248480a 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,36 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +model_and_optim/__3_13.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__7_1.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__8_1.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__4_9.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__14_6.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__2_2.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__3_14.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__12_0.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__5_2.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__1_12.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__15_8.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__6_7.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__15_3.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__9_9.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__1_14.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__11_0.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__14_14.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__7_7.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__5_1.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__11_5.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__5_3.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__8_6.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__13_1.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__9_14.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__3_6.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__7_13.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__6_8.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__2_8.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__13_14.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__4_2.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__9_3.distcp filter=lfs diff=lfs merge=lfs -text +model_and_optim/__9_1.distcp filter=lfs diff=lfs merge=lfs -text diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..907897fe70e5d99fce9ad0c9c34340c699d4097c --- /dev/null +++ b/config.yaml @@ -0,0 +1,629 @@ +run_name: multitask_train +model: + model_name: molmo + llm: + d_model: 2560 + n_heads: 32 + n_kv_heads: 8 + head_dim: 128 + qkv_bias: false + clip_qkv: null + n_layers: 36 + mlp_ratio: 4 + mlp_hidden_size: 19456 + activation_type: swiglu + block_type: sequential + rope: true + rope_full_precision: true + rope_theta: 1000000.0 + rope_type: default + rope_factor: null + rope_high_freq_factor: null + rope_low_freq_factor: null + rope_original_max_position_embeddings: null + attention_type: sdpa + float32_attention: true + attention_dropout: 0.0 + attention_layer_norm: true + attention_layer_norm_type: qwen3 + residual_dropout: 0.1 + response_residual_dropout: 0.0 + layer_norm_type: rms + layer_norm_with_affine: true + layer_norm_eps: 1.0e-06 + attention_layer_norm_with_affine: true + max_sequence_length: 4096 + max_position_embeddings: null + include_bias: false + bias_for_layer_norm: null + norm_after: false + moe_num_experts: 8 + moe_top_k: 2 + moe_mlp_impl: sparse + moe_log_expert_assignment: false + moe_shared_expert: false + moe_lbl_in_fp32: false + moe_interleave: false + moe_loss_weight: 0.1 + moe_zloss_weight: null + moe_dropless: true + moe_capacity_factor: 1.25 + embedding_dropout: 0.0 + scale_logits: false + vocab_size: 151936 + additional_vocab_size: 128 + weight_tying: true + embedding_size: 151936 + use_position_ids: true + tokenizer: + identifier: Qwen/Qwen3-4B + tokenizer_dir: null + init_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen3-4b.pt + init_incremental: null + new_embedding_init_range: 0.02 + initializer_range: 0.02 + normalize_input_embeds: false + activation_checkpoint: whole_layer + compile: blocks + fix_pad_tokenizer: false + init_std: 0.02 + init_fn: normal + init_cutoff_factor: null + vision_backbone: + vit: + image_model_type: siglip + image_default_input_size: + - 378 + - 378 + image_patch_size: 14 + image_pos_patch_size: 14 + image_emb_dim: 1152 + image_num_heads: 16 + image_num_key_value_heads: 16 + image_num_layers: 27 + image_head_dim: 72 + image_mlp_dim: 4304 + image_mlp_activations: gelu_pytorch_tanh + image_dropout_rate: 0.0 + image_num_pos: 729 + image_norm_eps: 1.0e-06 + attention_dropout: 0.0 + residual_dropout: 0.0 + initializer_range: 0.02 + float32_attention: true + attention_type: sdpa + activation_checkpointing: true + init_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/siglip2-so400m-14-384.pt + resize_mode: siglip + pad_value: 0.0 + normalize: siglip + image_pooling_2d: attention_meanq + pooling_attention_mask: false + image_projector: mlp + image_padding_embed: null + vit_layers: + - -3 + - -9 + skip_unused_layers: true + image_feature_dropout: 0.0 + connector_activation_checkpointing: true + compile_vit: blocks + compile_connector: dynamic + normalize_on_gpu: false + data_formatter: + prompt_templates: uber_model + message_format: role + system_prompt: demo_or_style + always_start_with_space: false + default_inference_len: 65 + select_answer: best + debug: false + image_last: false + format_message_list: null + p_one_message: 0.0 + eval_system_prompt_mapping: null + timestamp_mode: 50-percent-seconds + p_choice_content_in_mc: 1.0 + mm_preprocessor: + crop_mode: overlap-and-resize-c2 + use_col_tokens: true + max_crops: 8 + pooling_w: 2 + pooling_h: 2 + overlap_margins: + - 4 + - 4 + max_images: null + max_multi_image_crops: 4 + max_answer_len: null + last_message_loss_only: false + loss_token_weighting: root_subsegments + max_text_tokens: null + image_padding_mask: false + legacy_image_mask: false + bi_directional_attn: null +seed: 6198 +epoch: null +dry_run: false +ft_llm: true +ft_vit: true +ft_connector: true +ft_embedding: lm_head +optimizer: + name: adamw + learning_rate: 0.0001 + weight_decay: 0.01 + betas: + - 0.9 + - 0.95 + eps: 1.0e-05 + connector_learning_rate: 5.0e-06 + vit_learning_rate: 5.0e-06 + llm_learning_rate: 1.0e-05 + frame_selector_learning_rate: 0.0001 + temporal_token_scorer_learning_rate: 0.0001 + connector_weight_decay: 0.0 + vit_weight_decay: 0.0 + llm_weight_decay: 0.0 + frame_selector_weight_decay: 0.01 + temporal_token_scorer_weight_decay: 0.01 + connector_betas: + - 0.9 + - 0.95 + vit_betas: + - 0.9 + - 0.95 + llm_betas: + - 0.9 + - 0.95 + frame_selector_betas: + - 0.9 + - 0.95 + temporal_token_scorer_betas: + - 0.9 + - 0.95 + connector_eps: 1.0e-06 + vit_eps: 1.0e-06 + llm_eps: 1.0e-06 + frame_selector_eps: 1.0e-06 + temporal_token_scorer_eps: 1.0e-06 + metrics_log_interval: -1 +scheduler: + name: multimodal + units: steps + t_warmup: 100 + t_max: null + alpha_f: 0.1 + connector_t_warmup: 200 + vit_t_warmup: 200 + llm_t_warmup: 200 + frame_selector_t_warmup: 200 + temporal_token_scorer_t_warmup: 200 + grad_clip_warmup_steps: null + grad_clip_warmup_factor: null + warmup_min_lr: 0.0 +data: + dataset: null + mixture: null + root_size_mixture: + - rate: 0.15 + mixture: + pixmo_ask_model_anything: null + pixmo_cap: 50000.0 + pixmo_cap_qa_as_user_qa: null + pixmo_pointing_explanations: null + - rate: 0.5 + mixture: + coco_2014_vqa_multi: null + text_vqa: null + okvqa: null + chart_qa_weighted: null + doc_qa: null + info_qa: null + ai2_diagram_v2_mix_transparent: null + a_okvqa_mc: null + a_okvqa_da: null + android_control: null + science_qa_img: null + tabwmp_da: null + st_qa: null + tally_qa: null + pixmo_clocks: 250000.0 + dv_qa: 10000.0 + figure_qa: 10000.0 + plot_qa: 20000.0 + cosyn_chart_exp: null + cosyn_chemical_exp: null + cosyn_diagram_exp: null + cosyn_document: null + cosyn_math_exp: null + cosyn_music_exp: null + cosyn_table_exp: null + - rate: 0.35 + mixture: + pixmo_points_train: null + pixmo_count_train: null + pixmo_points_high_freq_train: null + cosyn_point: null + kwargs_mixture: null + split: train + seed: 50189 + pad: to_max + sequence_length: 2304 + max_text_seq_len: null + shuffle: true + start_index: 0 + packing: null + num_workers: 2 + drop_last: true + pin_memory: true + prefetch_factor: null + persistent_workers: false + timeout: 0 +restore_dataloader: true +fast_forward_batches: null +evaluators: [] +eval_interval: 2000 +inf_evaluators: +- label: chart_qa_exp + data: + dataset: chart_qa_exp + mixture: null + root_size_mixture: null + kwargs_mixture: null + split: validation + seed: 691203 + pad: to_max + sequence_length: 1792 + max_text_seq_len: null + shuffle: true + start_index: 0 + packing: null + num_workers: 2 + drop_last: true + pin_memory: true + prefetch_factor: null + persistent_workers: true + timeout: 0 + evaluator: + n_to_log: 0 + num_wandb_examples: 32 + save_predictions: null + save_tokens: false + vqa_eval: relaxed_correctness,scifi_relaxed_correctness,em + pointing_eval: false + count_eval: false + point_count_eval: false + android_eval: false + clock_eval: false + clock_bench_eval: false + math_vista_eval: false + temp_compass_eval: '' + temp_compass_disable_api: false + video_mme_eval: '' + mme_videoocr_mc_eval: false + mlvu_gen_eval: false + long_video_bench_eval: false + plm_fgqa_eval: false + long_video_bench_caption_eval: false + vinoground_eval: false + vixmo_caption_eval: false + dream1k_caption_eval: false + refexp_eval: false + coco_caption_eval: false + qv_highlights_eval: false + tomato: false + temporal_bench: false + max_new_tokens: 256 + device_batch_size: 4 + subset_num_batches: null + max_examples: 2048 + console_log_interval: 20 + include_image: false +- label: info_qa + data: + dataset: info_qa + mixture: null + root_size_mixture: null + kwargs_mixture: null + split: validation + seed: 691203 + pad: to_max + sequence_length: 1792 + max_text_seq_len: null + shuffle: true + start_index: 0 + packing: null + num_workers: 2 + drop_last: true + pin_memory: true + prefetch_factor: null + persistent_workers: true + timeout: 0 + evaluator: + n_to_log: 0 + num_wandb_examples: 32 + save_predictions: null + save_tokens: false + vqa_eval: ansl,em + pointing_eval: false + count_eval: false + point_count_eval: false + android_eval: false + clock_eval: false + clock_bench_eval: false + math_vista_eval: false + temp_compass_eval: '' + temp_compass_disable_api: false + video_mme_eval: '' + mme_videoocr_mc_eval: false + mlvu_gen_eval: false + long_video_bench_eval: false + plm_fgqa_eval: false + long_video_bench_caption_eval: false + vinoground_eval: false + vixmo_caption_eval: false + dream1k_caption_eval: false + refexp_eval: false + coco_caption_eval: false + qv_highlights_eval: false + tomato: false + temporal_bench: false + max_new_tokens: 12 + device_batch_size: 4 + subset_num_batches: null + max_examples: 2048 + console_log_interval: 20 + include_image: false +- label: doc_qa + data: + dataset: doc_qa + mixture: null + root_size_mixture: null + kwargs_mixture: null + split: validation + seed: 691203 + pad: to_max + sequence_length: 1792 + max_text_seq_len: null + shuffle: true + start_index: 0 + packing: null + num_workers: 2 + drop_last: true + pin_memory: true + prefetch_factor: null + persistent_workers: true + timeout: 0 + evaluator: + n_to_log: 0 + num_wandb_examples: 32 + save_predictions: null + save_tokens: false + vqa_eval: ansl,em + pointing_eval: false + count_eval: false + point_count_eval: false + android_eval: false + clock_eval: false + clock_bench_eval: false + math_vista_eval: false + temp_compass_eval: '' + temp_compass_disable_api: false + video_mme_eval: '' + mme_videoocr_mc_eval: false + mlvu_gen_eval: false + long_video_bench_eval: false + plm_fgqa_eval: false + long_video_bench_caption_eval: false + vinoground_eval: false + vixmo_caption_eval: false + dream1k_caption_eval: false + refexp_eval: false + coco_caption_eval: false + qv_highlights_eval: false + tomato: false + temporal_bench: false + max_new_tokens: 12 + device_batch_size: 4 + subset_num_batches: null + max_examples: 2048 + console_log_interval: 20 + include_image: false +- label: ai2_diagram + data: + dataset: ai2_diagram_v2_mix_transparent + mixture: null + root_size_mixture: null + kwargs_mixture: null + split: validation + seed: 691203 + pad: to_max + sequence_length: 1792 + max_text_seq_len: null + shuffle: true + start_index: 0 + packing: null + num_workers: 2 + drop_last: true + pin_memory: true + prefetch_factor: null + persistent_workers: true + timeout: 0 + evaluator: + n_to_log: 0 + num_wandb_examples: 32 + save_predictions: null + save_tokens: false + vqa_eval: mc_ai2d_opaque,mc_ai2d_transparent + pointing_eval: false + count_eval: false + point_count_eval: false + android_eval: false + clock_eval: false + clock_bench_eval: false + math_vista_eval: false + temp_compass_eval: '' + temp_compass_disable_api: false + video_mme_eval: '' + mme_videoocr_mc_eval: false + mlvu_gen_eval: false + long_video_bench_eval: false + plm_fgqa_eval: false + long_video_bench_caption_eval: false + vinoground_eval: false + vixmo_caption_eval: false + dream1k_caption_eval: false + refexp_eval: false + coco_caption_eval: false + qv_highlights_eval: false + tomato: false + temporal_bench: false + max_new_tokens: 32 + device_batch_size: 4 + subset_num_batches: null + max_examples: 2048 + console_log_interval: 20 + include_image: false +- label: coco_2014_vqa + data: + dataset: coco_2014_vqa + mixture: null + root_size_mixture: null + kwargs_mixture: null + split: validation + seed: 691203 + pad: to_max + sequence_length: 1792 + max_text_seq_len: null + shuffle: true + start_index: 0 + packing: null + num_workers: 2 + drop_last: true + pin_memory: true + prefetch_factor: null + persistent_workers: true + timeout: 0 + evaluator: + n_to_log: 0 + num_wandb_examples: 32 + save_predictions: null + save_tokens: false + vqa_eval: vqa_score + pointing_eval: false + count_eval: false + point_count_eval: false + android_eval: false + clock_eval: false + clock_bench_eval: false + math_vista_eval: false + temp_compass_eval: '' + temp_compass_disable_api: false + video_mme_eval: '' + mme_videoocr_mc_eval: false + mlvu_gen_eval: false + long_video_bench_eval: false + plm_fgqa_eval: false + long_video_bench_caption_eval: false + vinoground_eval: false + vixmo_caption_eval: false + dream1k_caption_eval: false + refexp_eval: false + coco_caption_eval: false + qv_highlights_eval: false + tomato: false + temporal_bench: false + max_new_tokens: 12 + device_batch_size: 4 + subset_num_batches: null + max_examples: 2048 + console_log_interval: 20 + include_image: false +inf_eval_interval: 2000 +eval_on_last_step: true +eval_on_load: false +save_folder: /weka/oe-training-default/sanghol/molmo/models/uber-v1/uber3.4-synthetic-siglip2-qwen3_4b +checkpointer_config: + save_thread_count: null + load_thread_count: null + pre_download: false + work_dir: null + throttle_uploads: false +canceled_check_interval: 50 +save_interval: 1000 +save_at: null +save_final_optim: true +save_num_checkpoints_to_keep: 1 +save_final_unsharded_checkpoint: false +save_interval_ephemeral: null +save_overwrite: true +load_path: null +reset_optimizer_state: false +reset_trainer_state: false +initial_model_checkpoint: /weka/oe-training-default/chrisk/molmo/models/dense-cap-v1/captioner-siglip2-qwen3_4b/step22347 +allow_resume: true +max_duration: 30000 +global_train_batch_size: 256 +device_train_microbatch_size: 4 +max_grad_norm: 1.0 +multi_component_grad_norm: true +batch_divisor: global_batch +max_grad_norm_ratio: null +precision: amp_bf16 +wandb: + project: molmo2-dev + entity: prior-ai2 + group: uber-v1 + name: uber3.4-synthetic-siglip2-qwen3_4b + tags: + - watching + log_artifacts: false + rank_zero_only: true + log_interval: 20 + allow_resume: false +beaker_log_interval: 50 +speed_monitor: + window_size: 20 + gpu_flops_available: null +console_log_interval: 20 +gen1_gc_interval: 1 +compile: + mode: default + fullgraph: false + dynamic: false + backend: inductor +activation_checkpointing: true +fsdp: + fsdp2: true + precision: float + use_orig_params: true + wrapping_strategy: by_block_and_size + sharding_strategy: FULL_SHARD + hybrid_sharding_num_model_replicas: null +softmax_auxiliary_loss: true +softmax_auxiliary_loss_scale: 0.0001 +saliency_score_loss_wt: null +frame_score_loss_wt: null +frame_score_loss_type: mse +frame_score_loss_target: 0.7 +time_limit: null +extra_steps_after_cancel: 10 +python_profiling: false +torch_profiling: false +stop_at: 30000 +stop_after: null +fused_loss: false +compile_loss: true +runtime_data: + args: /gantry-runtime/launch_scripts/train_multitask_model.py 3.4-synthetic /weka/oe-training-default/chrisk/molmo/models/dense-cap-v1/captioner-siglip2-qwen3_4b + --save_overwrite --save_interval=1000 --wandb.group=uber-v1 --wandb.name=uber3.4-synthetic-siglip2-qwen3_4b + --save_folder=/weka/oe-training-default/sanghol/molmo/models/uber-v1/uber3.4-synthetic-siglip2-qwen3_4b + hostname: jupiter-cs-aus-112.reviz.ai2.in + date: 09/17/2025, 10:56 + world_size: 16 + resuming_from: /weka/oe-training-default/sanghol/molmo/models/uber-v1/uber3.4-synthetic-siglip2-qwen3_4b/step27000 + beaker_experiment_id: 01K567APCW88M30XRRXSACWXPR + beaker_experiment_url: https://beaker.org/ex/01K567APCW88M30XRRXSACWXPR + wandb_id: 5307t37m + wandb_url: https://wandb.ai/prior-ai2/molmo2-dev/runs/5307t37m diff --git a/model_and_optim/__11_0.distcp b/model_and_optim/__11_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..d547fff9accc405eebbda7e644cf91ddd8a4b2de --- /dev/null +++ b/model_and_optim/__11_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:036d3530bc0f84e4b39bb554e3f5366508408a07682acc89ce4c6b6c25ff25d8 +size 197127554 diff --git a/model_and_optim/__11_5.distcp b/model_and_optim/__11_5.distcp new file mode 100644 index 0000000000000000000000000000000000000000..69988f6861237623369de085e53fd354b4f5a507 --- /dev/null +++ b/model_and_optim/__11_5.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4d89b473d945cd8aa4afda5ac21e4c174ce99fb6dc2d28b5ed59d26f354fa2e +size 197309771 diff --git a/model_and_optim/__12_0.distcp b/model_and_optim/__12_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..11b1af3822aaba8124248e3f5aeb30f5942423f4 --- /dev/null +++ b/model_and_optim/__12_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3867052c128477976fa5fce2163827afa499d476da8e75f5cad7ab330de1733d +size 197127554 diff --git a/model_and_optim/__13_1.distcp b/model_and_optim/__13_1.distcp new file mode 100644 index 0000000000000000000000000000000000000000..7eb6bc01d5a58e8ad5e0971025ca175e213aa85c --- /dev/null +++ b/model_and_optim/__13_1.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b6cfa136d98a316a592271f724b5e92cc85e791164f1972f0b65e89c15604fb +size 197124856 diff --git a/model_and_optim/__13_14.distcp b/model_and_optim/__13_14.distcp new file mode 100644 index 0000000000000000000000000000000000000000..68e9750c109869979003bf94501f12713974d980 --- /dev/null +++ b/model_and_optim/__13_14.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:544981e85717915dccd2cd32953a82ba670de251c7e51ba1c905d36bd8f4d618 +size 197319955 diff --git a/model_and_optim/__14_14.distcp b/model_and_optim/__14_14.distcp new file mode 100644 index 0000000000000000000000000000000000000000..646238580e22d48f15a107d888314fd9fc6f7037 --- /dev/null +++ b/model_and_optim/__14_14.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18e0b83932a8388f8f75c991e177747007d8d22ee3f4721cc3bb1855befe8356 +size 197319955 diff --git a/model_and_optim/__14_6.distcp b/model_and_optim/__14_6.distcp new file mode 100644 index 0000000000000000000000000000000000000000..1f2836ddaac06b7e33ebf6c2a760fd157e969d80 --- /dev/null +++ b/model_and_optim/__14_6.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44e3fdfb3854204248eeaaf824c438c02cd2940942e7196329598dbe982c7527 +size 197308194 diff --git a/model_and_optim/__15_3.distcp b/model_and_optim/__15_3.distcp new file mode 100644 index 0000000000000000000000000000000000000000..a92d8b372cc26bc235580940b8a845dda0abc2ee --- /dev/null +++ b/model_and_optim/__15_3.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67e1f8771208100081d129c3312f300125293afeb5555b9ea2530019f6b8ce00 +size 197124856 diff --git a/model_and_optim/__15_8.distcp b/model_and_optim/__15_8.distcp new file mode 100644 index 0000000000000000000000000000000000000000..4102f7117e43ea3af6e01e0abaf04a2f4e12d319 --- /dev/null +++ b/model_and_optim/__15_8.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93d9a3f125531fabe7c86738b0109c19e2359935a5a22b14ac81268a0352ff4c +size 197195331 diff --git a/model_and_optim/__1_12.distcp b/model_and_optim/__1_12.distcp new file mode 100644 index 0000000000000000000000000000000000000000..9f6018eca05f7fae0b74145677670354ea7361ce --- /dev/null +++ b/model_and_optim/__1_12.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0ce2d6636e08b88487346486793577ea5759c1fa02e5a2dc0238af77d447a7b +size 197145891 diff --git a/model_and_optim/__1_14.distcp b/model_and_optim/__1_14.distcp new file mode 100644 index 0000000000000000000000000000000000000000..3d99fbfbd7def9a3598c3595a4cefd35011e3572 --- /dev/null +++ b/model_and_optim/__1_14.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10db0f47254174128fabd4f06b9f325d0b76b95da216c16fbd9044f55419bd1f +size 197319955 diff --git a/model_and_optim/__2_0.distcp b/model_and_optim/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..7fb23768b22d5cc6b3ff4fc77a774466be994fce --- /dev/null +++ b/model_and_optim/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1822b1376bd567b8a5eeebf12b54f4961ad33c20a5f61825a0c931cb0c44626 +size 197127554 diff --git a/model_and_optim/__2_2.distcp b/model_and_optim/__2_2.distcp new file mode 100644 index 0000000000000000000000000000000000000000..94bbd110cad5dc496cb9e90e11110142667d2cfa --- /dev/null +++ b/model_and_optim/__2_2.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b5c9561b5b294105cc426c204ba94aa969af192ea31541317dc0f522978fd03 +size 197124856 diff --git a/model_and_optim/__2_8.distcp b/model_and_optim/__2_8.distcp new file mode 100644 index 0000000000000000000000000000000000000000..c4510b79e188206d350c97fc8b3bfe5325262ca9 --- /dev/null +++ b/model_and_optim/__2_8.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:511fe5311412e0c60695113c12fdb49de202320f988d281bd5b11abcb0eacb92 +size 197195331 diff --git a/model_and_optim/__3_13.distcp b/model_and_optim/__3_13.distcp new file mode 100644 index 0000000000000000000000000000000000000000..721ea57551cba0340059343309a9de3b33bb67ad --- /dev/null +++ b/model_and_optim/__3_13.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e8eea91aa896c0c0929dd40bd6af91d37364ffdc39d0b1153355dd012626dba +size 197319955 diff --git a/model_and_optim/__3_14.distcp b/model_and_optim/__3_14.distcp new file mode 100644 index 0000000000000000000000000000000000000000..a2f1c810ff2d76b006de0da547a44de83d664bb2 --- /dev/null +++ b/model_and_optim/__3_14.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcd757baf9d4dc3823fb5684477d6cc9c95d845086fd366e3d2b48a6439ef7f7 +size 197319955 diff --git a/model_and_optim/__3_6.distcp b/model_and_optim/__3_6.distcp new file mode 100644 index 0000000000000000000000000000000000000000..3409c7794e47feb15e1995f4be8220867cb7483c --- /dev/null +++ b/model_and_optim/__3_6.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18bd08d0b52978d27846f8ba3c73ca54faa6ba35ac85b0105c3af4f47cde0dfa +size 197308194 diff --git a/model_and_optim/__4_2.distcp b/model_and_optim/__4_2.distcp new file mode 100644 index 0000000000000000000000000000000000000000..0ef78bba4d8214be1c004bd0c10e388afdf883fa --- /dev/null +++ b/model_and_optim/__4_2.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa030e27ea0e133e2a904a68107813863ed72b8534f9dda9b30a2387f4fa8b4d +size 197124856 diff --git a/model_and_optim/__4_9.distcp b/model_and_optim/__4_9.distcp new file mode 100644 index 0000000000000000000000000000000000000000..72abdd661b65370334675b357efa9d14ca2c27cb --- /dev/null +++ b/model_and_optim/__4_9.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5039f31d76b1530e3db4ae1d283e297e592d75d869244647e07f05acb584fd4a +size 197182971 diff --git a/model_and_optim/__5_1.distcp b/model_and_optim/__5_1.distcp new file mode 100644 index 0000000000000000000000000000000000000000..34d81e45276151ccdd6385d61f1a6344fa5a7053 --- /dev/null +++ b/model_and_optim/__5_1.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0714aaff801c1f87f68c2e865a8c92806864c716f59e3c2e387c66debb9bb020 +size 197124856 diff --git a/model_and_optim/__5_2.distcp b/model_and_optim/__5_2.distcp new file mode 100644 index 0000000000000000000000000000000000000000..1dfd395c026da9a6132925ae5e993b11eefbb8e5 --- /dev/null +++ b/model_and_optim/__5_2.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0942f55880e603dfcfd2dde9eb9246bd19b03d977cb5799a530cd5a3f3880eaa +size 197124856 diff --git a/model_and_optim/__5_3.distcp b/model_and_optim/__5_3.distcp new file mode 100644 index 0000000000000000000000000000000000000000..4bdbf27cd6f21c0443eaa9be841940dd220ea55c --- /dev/null +++ b/model_and_optim/__5_3.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:235fa65eda6e32eee2534977603d237baf9726bd3555e3d228b3882c777e4a00 +size 197124856 diff --git a/model_and_optim/__6_7.distcp b/model_and_optim/__6_7.distcp new file mode 100644 index 0000000000000000000000000000000000000000..c6321bc5d5cb7b92f1f0d80ae6e93bfc89c8a7e3 --- /dev/null +++ b/model_and_optim/__6_7.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42f2bfca1f88be1bfdbe97c9cc96cc8baed131fc024b578214f857a2da140fc5 +size 197182971 diff --git a/model_and_optim/__6_8.distcp b/model_and_optim/__6_8.distcp new file mode 100644 index 0000000000000000000000000000000000000000..10387ad631fbcbd5453dc338e26758e6b8d6c4ac --- /dev/null +++ b/model_and_optim/__6_8.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f17b8747158fada7b747868de492d9698885baa2beedbefe2b0d21990013c1d +size 197195331 diff --git a/model_and_optim/__7_1.distcp b/model_and_optim/__7_1.distcp new file mode 100644 index 0000000000000000000000000000000000000000..8ebe8a9d3332f4d3567130aba1e538a2914e5b7d --- /dev/null +++ b/model_and_optim/__7_1.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d4eb9bf8637ea2bb7c39dc4720e39dbd3ef5109742c8c4a59f8e2c99271a861 +size 197124856 diff --git a/model_and_optim/__7_13.distcp b/model_and_optim/__7_13.distcp new file mode 100644 index 0000000000000000000000000000000000000000..bf85e30aee6e5bca4ee21cef8572a832c9ef6596 --- /dev/null +++ b/model_and_optim/__7_13.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36a1d8c511e834e147bba36295cec622a4b76ec7ac8f893511ec03d66d46de61 +size 197319955 diff --git a/model_and_optim/__7_7.distcp b/model_and_optim/__7_7.distcp new file mode 100644 index 0000000000000000000000000000000000000000..b3aae200766204cd4696599a687f8b4d23bd69cc --- /dev/null +++ b/model_and_optim/__7_7.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26405a974e58bf62834ec8b429b584633892ff41f47f14d07c14b278b76a39e4 +size 197182971 diff --git a/model_and_optim/__8_1.distcp b/model_and_optim/__8_1.distcp new file mode 100644 index 0000000000000000000000000000000000000000..318b369e512cb062fa17360b411abe3a5f19d196 --- /dev/null +++ b/model_and_optim/__8_1.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4bb5740b18cf7c1574195df8f44a2aab2d5731eadb900a7a04e86fa37756eb5 +size 197124856 diff --git a/model_and_optim/__8_6.distcp b/model_and_optim/__8_6.distcp new file mode 100644 index 0000000000000000000000000000000000000000..7098c7f1c09334e32b4c6eae6d1edb3d6e9b794a --- /dev/null +++ b/model_and_optim/__8_6.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97ef9e41bd3eeff3b2856205661404f9c6aae7ec3bd53ad1cceffa6a1935d188 +size 197308194 diff --git a/model_and_optim/__9_1.distcp b/model_and_optim/__9_1.distcp new file mode 100644 index 0000000000000000000000000000000000000000..1214c08dd8bda070618622c1b79adf1677eef1b1 --- /dev/null +++ b/model_and_optim/__9_1.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9984326b5354a57ca982982b56426d1046c24a2e68f02d3e7597587438383e8e +size 197124856 diff --git a/model_and_optim/__9_14.distcp b/model_and_optim/__9_14.distcp new file mode 100644 index 0000000000000000000000000000000000000000..1247fcb15a7060ec7d1c9c0a0a2c416a679a0163 --- /dev/null +++ b/model_and_optim/__9_14.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f766bc24548057646be5e3e26380b50ab67d5cc7f67c1ee03a869c80d5aa681f +size 197319955 diff --git a/model_and_optim/__9_3.distcp b/model_and_optim/__9_3.distcp new file mode 100644 index 0000000000000000000000000000000000000000..8ddb1559f3df0c1850c70dac5b7524e1e2e8292a --- /dev/null +++ b/model_and_optim/__9_3.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23942e76b2fcc80ba1c6d7153c7cd5f14c481b4a109ba1983f2151576265b825 +size 197124856 diff --git a/model_and_optim/__9_9.distcp b/model_and_optim/__9_9.distcp new file mode 100644 index 0000000000000000000000000000000000000000..e8074bef9a887cb02967efbbb49287fba1448695 --- /dev/null +++ b/model_and_optim/__9_9.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6efa678dd4a9c00f71f39a9029b0bbf46743630e6c29a1c6c7bfa82d9beee0df +size 197182971 diff --git a/train/rank0.pt b/train/rank0.pt new file mode 100644 index 0000000000000000000000000000000000000000..2eaf217852ecce1fe8e2c044ec7bed0e1e0e269d --- /dev/null +++ b/train/rank0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:790482fab49a0c3d2c64a9984b8fdc8b0115b669feac925eb8a2c2d51bdf85fc +size 14997 diff --git a/train/rank1.pt b/train/rank1.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb893b3350940352a422dc8a7b21045b31019792 --- /dev/null +++ b/train/rank1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:565ca8a17981b2bec78be27fbc6a592fabe408801a1b870c8b28ecda63e2ef99 +size 15061 diff --git a/train/rank10.pt b/train/rank10.pt new file mode 100644 index 0000000000000000000000000000000000000000..ad859e2762d79b7300b355885e6f237609eeb2fb --- /dev/null +++ b/train/rank10.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26290f02251c772bfafa80337265b84771ace54a31ca80d81e4b043e7b3b6738 +size 15069 diff --git a/train/rank11.pt b/train/rank11.pt new file mode 100644 index 0000000000000000000000000000000000000000..7864e14666ec8bbb224ffa4a2ba36f77a7cb6b28 --- /dev/null +++ b/train/rank11.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ebdc2c5fb99cd077b786716c1ff9774704307c37d9e82eb8108976052b54e62 +size 15069 diff --git a/train/rank12.pt b/train/rank12.pt new file mode 100644 index 0000000000000000000000000000000000000000..a5fdacd136b1cfd8aadacd8ed811ee001d98a537 --- /dev/null +++ b/train/rank12.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53bd2bdf4e4829c6af91be63447a01eeb27b6e7909405f1de9e089cb9b88c240 +size 15069 diff --git a/train/rank13.pt b/train/rank13.pt new file mode 100644 index 0000000000000000000000000000000000000000..7d6c3d974be9d5fda3452efc6e9ca428e84647f5 --- /dev/null +++ b/train/rank13.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:106b7bf9c85649c99541d2bd79eddee0109332b21094df28f18a0d62e69aa622 +size 15069 diff --git a/train/rank14.pt b/train/rank14.pt new file mode 100644 index 0000000000000000000000000000000000000000..9b7928714293c680d8ba849a49689f71e5981a2b --- /dev/null +++ b/train/rank14.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14fb7452113d272c5674e2630da1e79ae9f8bc87ac85234f8024c17e1b287991 +size 15069 diff --git a/train/rank15.pt b/train/rank15.pt new file mode 100644 index 0000000000000000000000000000000000000000..1012a45308e1bc07233196d116a1a86d64ecfa3e --- /dev/null +++ b/train/rank15.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d533fd5701673ab6de194178693854038b5f6b570c5fcc56555f68365103217b +size 15069 diff --git a/train/rank2.pt b/train/rank2.pt new file mode 100644 index 0000000000000000000000000000000000000000..8d35e398812ccb434ab6a977ad47209c5056fd62 --- /dev/null +++ b/train/rank2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f7076eb17dffd55535f64c004c2cd1a0af952f1d92ece76edc645d41fe7fde9 +size 15061 diff --git a/train/rank3.pt b/train/rank3.pt new file mode 100644 index 0000000000000000000000000000000000000000..ea56d2f9f90b49fe564f592c22e2e9b471f43753 --- /dev/null +++ b/train/rank3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6257eb07a70516fae3192d00df2c9b9a1b5515edab68e25bab7703446a8a2f78 +size 15061 diff --git a/train/rank4.pt b/train/rank4.pt new file mode 100644 index 0000000000000000000000000000000000000000..581683565508a4583e8ad2c42fad41116c0d0226 --- /dev/null +++ b/train/rank4.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74dc3674cfa577eaa5d0e9968ab31069e33b3eca5f003cc41af41970880d8661 +size 15061 diff --git a/train/rank5.pt b/train/rank5.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc427dbe7e9bee186ab174df9c38c5077e4b5eb3 --- /dev/null +++ b/train/rank5.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a66aebfe1843cbdb2ec33d9db1d8ff39815fb59da3074fd26b2f4ad1fa8d36bc +size 15061 diff --git a/train/rank6.pt b/train/rank6.pt new file mode 100644 index 0000000000000000000000000000000000000000..0e6ac5d67756e8668a95564c6114f300b660d949 --- /dev/null +++ b/train/rank6.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:220671d73c8066bb3775b27ca91e81897d22c081f7532875a7d7dd0463b39ddb +size 15061 diff --git a/train/rank7.pt b/train/rank7.pt new file mode 100644 index 0000000000000000000000000000000000000000..c3c86ec1c5099551ebd7bcdb7cba55ed8d80e425 --- /dev/null +++ b/train/rank7.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baaac80ef69d25c6e82f184cc612948bc4fac1ecbe31ddc187c594eea901be59 +size 15061 diff --git a/train/rank8.pt b/train/rank8.pt new file mode 100644 index 0000000000000000000000000000000000000000..a5bf31a43d5cc9c6921a41d7b3909f0b96f660e0 --- /dev/null +++ b/train/rank8.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a28511be4e382e3117f3f6951da76b80c10fbe282321987c50251bfa14b7ef20 +size 15061 diff --git a/train/rank9.pt b/train/rank9.pt new file mode 100644 index 0000000000000000000000000000000000000000..6b89b19e70fe77130b15628dc63c62f3d3f4ab00 --- /dev/null +++ b/train/rank9.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:146f3de4264077ec894af571eb8be19c9bfc94f25f17a822ba1f7bde37bae8ef +size 15061