MgGladys commited on Feb 9

Commit

43feefe

verified ·

1 Parent(s): 2acd7e7

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

assets/example.jpg +0 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/added_tokens.json +24 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/chat_template.jinja +7 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/added_tokens.json +24 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/chat_template.jinja +7 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/merges.txt +0 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/preprocessor_config.json +29 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/special_tokens_map.json +31 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/tokenizer_config.json +208 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/trainer_state.json +734 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/vocab.json +0 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/added_tokens.json +24 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/chat_template.jinja +7 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/merges.txt +0 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/preprocessor_config.json +29 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/special_tokens_map.json +31 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/tokenizer_config.json +208 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/trainer_state.json +0 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/vocab.json +0 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-700/added_tokens.json +24 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-700/chat_template.jinja +7 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-700/merges.txt +0 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-700/preprocessor_config.json +29 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-700/special_tokens_map.json +31 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-700/tokenizer_config.json +208 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-700/trainer_state.json +0 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-900/added_tokens.json +24 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-900/chat_template.jinja +7 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-900/preprocessor_config.json +29 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-900/special_tokens_map.json +31 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-900/tokenizer_config.json +208 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-900/trainer_state.json +0 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/merges.txt +0 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/preprocessor_config.json +29 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/special_tokens_map.json +31 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/tokenizer_config.json +208 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/train_cls.log +0 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/vocab.json +0 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/train_cls.log +0 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/debug-internal.log +6 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/debug.log +22 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/run-20251224_071407-autp4ou6/files/output.log +96 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/run-20251224_071407-autp4ou6/files/requirements.txt +147 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/run-20251224_071407-autp4ou6/files/wandb-metadata.json +83 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/run-20251224_071407-autp4ou6/logs/debug-core.log +7 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/run-20251224_071407-autp4ou6/logs/debug-internal.log +6 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/run-20251224_071407-autp4ou6/logs/debug.log +22 -0
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/run-20251224_071407-autp4ou6/run-autp4ou6.wandb +0 -0
experiments/examples/llava_next/demo.py +46 -0
experiments/examples/llava_next/run_eval.sh +12 -0

assets/example.jpg ADDED Viewed

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/added_tokens.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "</tool_call>": 151658,
+  "<tool_call>": 151657,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,7 @@

+{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system
+You are a helpful assistant.<|im_end|>
+{% endif %}<|im_start|>{{ message['role'] }}
+{% if message['content'] is string %}{{ message['content'] }}<|im_end|>
+{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>
+{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant
+{% endif %}

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/added_tokens.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "</tool_call>": 151658,
+  "<tool_call>": 151657,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,7 @@

+{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system
+You are a helpful assistant.<|im_end|>
+{% endif %}<|im_start|>{{ message['role'] }}
+{% if message['content'] is string %}{{ message['content'] }}<|im_end|>
+{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>
+{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant
+{% endif %}

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_processor_type": "Qwen2_5_VLImageProcessor",
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "max_pixels": 1003520,
+  "merge_size": 2,
+  "min_pixels": 3136,
+  "patch_size": 14,
+  "processor_class": "Qwen2_5_VLProcessor",
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "max_pixels": 1003520,
+    "min_pixels": 3136
+  },
+  "temporal_patch_size": 2
+}

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,208 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "processor_class": "Qwen2_5_VLProcessor",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/trainer_state.json ADDED Viewed

	@@ -0,0 +1,734 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.026048450117218024,
+  "eval_steps": 500,
+  "global_step": 100,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.00026048450117218026,
+      "grad_norm": 6.6370320320129395,
+      "learning_rate": 0.0,
+      "loss": 1.5389,
+      "step": 1
+    },
+    {
+      "epoch": 0.0005209690023443605,
+      "grad_norm": 7.1702704429626465,
+      "learning_rate": 5e-06,
+      "loss": 1.3234,
+      "step": 2
+    },
+    {
+      "epoch": 0.0007814535035165407,
+      "grad_norm": 8.348443984985352,
+      "learning_rate": 1e-05,
+      "loss": 1.2501,
+      "step": 3
+    },
+    {
+      "epoch": 0.001041938004688721,
+      "grad_norm": 7.599966526031494,
+      "learning_rate": 1.5e-05,
+      "loss": 1.4676,
+      "step": 4
+    },
+    {
+      "epoch": 0.0013024225058609013,
+      "grad_norm": 5.925275802612305,
+      "learning_rate": 2e-05,
+      "loss": 1.1892,
+      "step": 5
+    },
+    {
+      "epoch": 0.0015629070070330815,
+      "grad_norm": 7.288003921508789,
+      "learning_rate": 2.5e-05,
+      "loss": 1.492,
+      "step": 6
+    },
+    {
+      "epoch": 0.0018233915082052619,
+      "grad_norm": 7.2629218101501465,
+      "learning_rate": 3e-05,
+      "loss": 1.3525,
+      "step": 7
+    },
+    {
+      "epoch": 0.002083876009377442,
+      "grad_norm": 9.106829643249512,
+      "learning_rate": 3.5000000000000004e-05,
+      "loss": 1.6033,
+      "step": 8
+    },
+    {
+      "epoch": 0.0023443605105496223,
+      "grad_norm": 8.875594139099121,
+      "learning_rate": 4e-05,
+      "loss": 1.5238,
+      "step": 9
+    },
+    {
+      "epoch": 0.0026048450117218025,
+      "grad_norm": 7.383709907531738,
+      "learning_rate": 4.4999999999999996e-05,
+      "loss": 1.4859,
+      "step": 10
+    },
+    {
+      "epoch": 0.0028653295128939827,
+      "grad_norm": 7.463179111480713,
+      "learning_rate": 5e-05,
+      "loss": 1.3504,
+      "step": 11
+    },
+    {
+      "epoch": 0.003125814014066163,
+      "grad_norm": 7.137135028839111,
+      "learning_rate": 5.5e-05,
+      "loss": 1.358,
+      "step": 12
+    },
+    {
+      "epoch": 0.003386298515238343,
+      "grad_norm": 5.68809175491333,
+      "learning_rate": 6e-05,
+      "loss": 1.2017,
+      "step": 13
+    },
+    {
+      "epoch": 0.0036467830164105238,
+      "grad_norm": 6.024169921875,
+      "learning_rate": 6.500000000000001e-05,
+      "loss": 1.1491,
+      "step": 14
+    },
+    {
+      "epoch": 0.003907267517582704,
+      "grad_norm": 5.510103225708008,
+      "learning_rate": 7.000000000000001e-05,
+      "loss": 1.1809,
+      "step": 15
+    },
+    {
+      "epoch": 0.004167752018754884,
+      "grad_norm": 6.086293697357178,
+      "learning_rate": 7.5e-05,
+      "loss": 1.2069,
+      "step": 16
+    },
+    {
+      "epoch": 0.004428236519927064,
+      "grad_norm": 5.8847551345825195,
+      "learning_rate": 8e-05,
+      "loss": 1.2933,
+      "step": 17
+    },
+    {
+      "epoch": 0.004688721021099245,
+      "grad_norm": 5.263647079467773,
+      "learning_rate": 8.5e-05,
+      "loss": 1.0476,
+      "step": 18
+    },
+    {
+      "epoch": 0.004949205522271425,
+      "grad_norm": 5.684865951538086,
+      "learning_rate": 8.999999999999999e-05,
+      "loss": 1.1442,
+      "step": 19
+    },
+    {
+      "epoch": 0.005209690023443605,
+      "grad_norm": 4.671970844268799,
+      "learning_rate": 9.5e-05,
+      "loss": 1.0422,
+      "step": 20
+    },
+    {
+      "epoch": 0.005470174524615785,
+      "grad_norm": 7.935784816741943,
+      "learning_rate": 0.0001,
+      "loss": 1.1025,
+      "step": 21
+    },
+    {
+      "epoch": 0.0057306590257879654,
+      "grad_norm": 4.634947299957275,
+      "learning_rate": 0.000105,
+      "loss": 0.9849,
+      "step": 22
+    },
+    {
+      "epoch": 0.005991143526960146,
+      "grad_norm": 4.8161516189575195,
+      "learning_rate": 0.00011,
+      "loss": 0.9843,
+      "step": 23
+    },
+    {
+      "epoch": 0.006251628028132326,
+      "grad_norm": 4.3339762687683105,
+      "learning_rate": 0.000115,
+      "loss": 0.8714,
+      "step": 24
+    },
+    {
+      "epoch": 0.006512112529304506,
+      "grad_norm": 3.4047181606292725,
+      "learning_rate": 0.00012,
+      "loss": 0.8898,
+      "step": 25
+    },
+    {
+      "epoch": 0.006772597030476686,
+      "grad_norm": 4.15224552154541,
+      "learning_rate": 0.000125,
+      "loss": 1.0079,
+      "step": 26
+    },
+    {
+      "epoch": 0.0070330815316488665,
+      "grad_norm": 3.5006914138793945,
+      "learning_rate": 0.00013000000000000002,
+      "loss": 0.8013,
+      "step": 27
+    },
+    {
+      "epoch": 0.0072935660328210476,
+      "grad_norm": 2.773101806640625,
+      "learning_rate": 0.000135,
+      "loss": 0.7086,
+      "step": 28
+    },
+    {
+      "epoch": 0.007554050533993228,
+      "grad_norm": 2.4410135746002197,
+      "learning_rate": 0.00014000000000000001,
+      "loss": 0.7198,
+      "step": 29
+    },
+    {
+      "epoch": 0.007814535035165408,
+      "grad_norm": 2.5674309730529785,
+      "learning_rate": 0.000145,
+      "loss": 0.6359,
+      "step": 30
+    },
+    {
+      "epoch": 0.008075019536337588,
+      "grad_norm": 2.310837984085083,
+      "learning_rate": 0.00015,
+      "loss": 0.6039,
+      "step": 31
+    },
+    {
+      "epoch": 0.008335504037509768,
+      "grad_norm": 2.4884161949157715,
+      "learning_rate": 0.000155,
+      "loss": 0.8962,
+      "step": 32
+    },
+    {
+      "epoch": 0.008595988538681949,
+      "grad_norm": 5.428861141204834,
+      "learning_rate": 0.00016,
+      "loss": 0.5576,
+      "step": 33
+    },
+    {
+      "epoch": 0.008856473039854129,
+      "grad_norm": 2.035452127456665,
+      "learning_rate": 0.000165,
+      "loss": 0.5866,
+      "step": 34
+    },
+    {
+      "epoch": 0.009116957541026309,
+      "grad_norm": 4.757160663604736,
+      "learning_rate": 0.00017,
+      "loss": 0.4413,
+      "step": 35
+    },
+    {
+      "epoch": 0.00937744204219849,
+      "grad_norm": 2.8071913719177246,
+      "learning_rate": 0.000175,
+      "loss": 0.7013,
+      "step": 36
+    },
+    {
+      "epoch": 0.00963792654337067,
+      "grad_norm": 3.3390369415283203,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.348,
+      "step": 37
+    },
+    {
+      "epoch": 0.00989841104454285,
+      "grad_norm": 2.469451665878296,
+      "learning_rate": 0.000185,
+      "loss": 0.72,
+      "step": 38
+    },
+    {
+      "epoch": 0.01015889554571503,
+      "grad_norm": 2.7830817699432373,
+      "learning_rate": 0.00019,
+      "loss": 0.671,
+      "step": 39
+    },
+    {
+      "epoch": 0.01041938004688721,
+      "grad_norm": 3.005566358566284,
+      "learning_rate": 0.00019500000000000002,
+      "loss": 0.5808,
+      "step": 40
+    },
+    {
+      "epoch": 0.01067986454805939,
+      "grad_norm": 2.8901026248931885,
+      "learning_rate": 0.0002,
+      "loss": 0.6022,
+      "step": 41
+    },
+    {
+      "epoch": 0.01094034904923157,
+      "grad_norm": 2.004911422729492,
+      "learning_rate": 0.000205,
+      "loss": 0.525,
+      "step": 42
+    },
+    {
+      "epoch": 0.01120083355040375,
+      "grad_norm": 2.9986109733581543,
+      "learning_rate": 0.00021,
+      "loss": 0.6073,
+      "step": 43
+    },
+    {
+      "epoch": 0.011461318051575931,
+      "grad_norm": 3.4304168224334717,
+      "learning_rate": 0.000215,
+      "loss": 0.5203,
+      "step": 44
+    },
+    {
+      "epoch": 0.011721802552748111,
+      "grad_norm": 2.295295000076294,
+      "learning_rate": 0.00022,
+      "loss": 0.3148,
+      "step": 45
+    },
+    {
+      "epoch": 0.011982287053920291,
+      "grad_norm": 3.9490885734558105,
+      "learning_rate": 0.00022500000000000002,
+      "loss": 0.5378,
+      "step": 46
+    },
+    {
+      "epoch": 0.012242771555092472,
+      "grad_norm": 2.3454151153564453,
+      "learning_rate": 0.00023,
+      "loss": 0.3085,
+      "step": 47
+    },
+    {
+      "epoch": 0.012503256056264652,
+      "grad_norm": 2.9150779247283936,
+      "learning_rate": 0.000235,
+      "loss": 0.432,
+      "step": 48
+    },
+    {
+      "epoch": 0.012763740557436832,
+      "grad_norm": 2.1253578662872314,
+      "learning_rate": 0.00024,
+      "loss": 0.1773,
+      "step": 49
+    },
+    {
+      "epoch": 0.013024225058609012,
+      "grad_norm": 3.5161190032958984,
+      "learning_rate": 0.000245,
+      "loss": 0.581,
+      "step": 50
+    },
+    {
+      "epoch": 0.013284709559781192,
+      "grad_norm": 1.8895039558410645,
+      "learning_rate": 0.00025,
+      "loss": 0.4554,
+      "step": 51
+    },
+    {
+      "epoch": 0.013545194060953373,
+      "grad_norm": 1.1252281665802002,
+      "learning_rate": 0.000255,
+      "loss": 0.3409,
+      "step": 52
+    },
+    {
+      "epoch": 0.013805678562125553,
+      "grad_norm": 2.6543619632720947,
+      "learning_rate": 0.00026000000000000003,
+      "loss": 0.594,
+      "step": 53
+    },
+    {
+      "epoch": 0.014066163063297733,
+      "grad_norm": 3.3003315925598145,
+      "learning_rate": 0.00026500000000000004,
+      "loss": 0.5753,
+      "step": 54
+    },
+    {
+      "epoch": 0.014326647564469915,
+      "grad_norm": 2.486830234527588,
+      "learning_rate": 0.00027,
+      "loss": 0.2907,
+      "step": 55
+    },
+    {
+      "epoch": 0.014587132065642095,
+      "grad_norm": 1.5953302383422852,
+      "learning_rate": 0.000275,
+      "loss": 0.4006,
+      "step": 56
+    },
+    {
+      "epoch": 0.014847616566814275,
+      "grad_norm": 1.8115977048873901,
+      "learning_rate": 0.00028000000000000003,
+      "loss": 0.2677,
+      "step": 57
+    },
+    {
+      "epoch": 0.015108101067986456,
+      "grad_norm": 2.282597780227661,
+      "learning_rate": 0.000285,
+      "loss": 0.6526,
+      "step": 58
+    },
+    {
+      "epoch": 0.015368585569158636,
+      "grad_norm": 1.4348944425582886,
+      "learning_rate": 0.00029,
+      "loss": 0.4291,
+      "step": 59
+    },
+    {
+      "epoch": 0.015629070070330816,
+      "grad_norm": 2.0866997241973877,
+      "learning_rate": 0.000295,
+      "loss": 0.1811,
+      "step": 60
+    },
+    {
+      "epoch": 0.015889554571502994,
+      "grad_norm": 1.6576564311981201,
+      "learning_rate": 0.0003,
+      "loss": 0.4088,
+      "step": 61
+    },
+    {
+      "epoch": 0.016150039072675176,
+      "grad_norm": 1.635674238204956,
+      "learning_rate": 0.000305,
+      "loss": 0.4438,
+      "step": 62
+    },
+    {
+      "epoch": 0.016410523573847355,
+      "grad_norm": 1.140415072441101,
+      "learning_rate": 0.00031,
+      "loss": 0.3589,
+      "step": 63
+    },
+    {
+      "epoch": 0.016671008075019537,
+      "grad_norm": 1.7953686714172363,
+      "learning_rate": 0.000315,
+      "loss": 0.2778,
+      "step": 64
+    },
+    {
+      "epoch": 0.016931492576191715,
+      "grad_norm": 2.2324233055114746,
+      "learning_rate": 0.00032,
+      "loss": 0.5049,
+      "step": 65
+    },
+    {
+      "epoch": 0.017191977077363897,
+      "grad_norm": 2.036297559738159,
+      "learning_rate": 0.00032500000000000004,
+      "loss": 0.2636,
+      "step": 66
+    },
+    {
+      "epoch": 0.017452461578536076,
+      "grad_norm": 2.2596747875213623,
+      "learning_rate": 0.00033,
+      "loss": 0.3743,
+      "step": 67
+    },
+    {
+      "epoch": 0.017712946079708258,
+      "grad_norm": 1.6777313947677612,
+      "learning_rate": 0.000335,
+      "loss": 0.3978,
+      "step": 68
+    },
+    {
+      "epoch": 0.017973430580880436,
+      "grad_norm": 1.6452847719192505,
+      "learning_rate": 0.00034,
+      "loss": 0.1836,
+      "step": 69
+    },
+    {
+      "epoch": 0.018233915082052618,
+      "grad_norm": 1.7216978073120117,
+      "learning_rate": 0.000345,
+      "loss": 0.4191,
+      "step": 70
+    },
+    {
+      "epoch": 0.018494399583224796,
+      "grad_norm": 1.7111387252807617,
+      "learning_rate": 0.00035,
+      "loss": 0.1812,
+      "step": 71
+    },
+    {
+      "epoch": 0.01875488408439698,
+      "grad_norm": 1.6676584482192993,
+      "learning_rate": 0.000355,
+      "loss": 0.4526,
+      "step": 72
+    },
+    {
+      "epoch": 0.019015368585569157,
+      "grad_norm": 0.9286651611328125,
+      "learning_rate": 0.00035999999999999997,
+      "loss": 0.2746,
+      "step": 73
+    },
+    {
+      "epoch": 0.01927585308674134,
+      "grad_norm": 3.234783411026001,
+      "learning_rate": 0.000365,
+      "loss": 0.5224,
+      "step": 74
+    },
+    {
+      "epoch": 0.01953633758791352,
+      "grad_norm": 1.3695653676986694,
+      "learning_rate": 0.00037,
+      "loss": 0.3308,
+      "step": 75
+    },
+    {
+      "epoch": 0.0197968220890857,
+      "grad_norm": 2.9995968341827393,
+      "learning_rate": 0.000375,
+      "loss": 0.4817,
+      "step": 76
+    },
+    {
+      "epoch": 0.02005730659025788,
+      "grad_norm": 1.8912553787231445,
+      "learning_rate": 0.00038,
+      "loss": 0.2722,
+      "step": 77
+    },
+    {
+      "epoch": 0.02031779109143006,
+      "grad_norm": 1.3702706098556519,
+      "learning_rate": 0.00038500000000000003,
+      "loss": 0.1064,
+      "step": 78
+    },
+    {
+      "epoch": 0.02057827559260224,
+      "grad_norm": 0.9273198246955872,
+      "learning_rate": 0.00039000000000000005,
+      "loss": 0.1233,
+      "step": 79
+    },
+    {
+      "epoch": 0.02083876009377442,
+      "grad_norm": 0.8209530711174011,
+      "learning_rate": 0.000395,
+      "loss": 0.2622,
+      "step": 80
+    },
+    {
+      "epoch": 0.021099244594946602,
+      "grad_norm": 1.4749599695205688,
+      "learning_rate": 0.0004,
+      "loss": 0.2999,
+      "step": 81
+    },
+    {
+      "epoch": 0.02135972909611878,
+      "grad_norm": 1.1133017539978027,
+      "learning_rate": 0.00040500000000000003,
+      "loss": 0.2929,
+      "step": 82
+    },
+    {
+      "epoch": 0.021620213597290962,
+      "grad_norm": 1.235826015472412,
+      "learning_rate": 0.00041,
+      "loss": 0.3106,
+      "step": 83
+    },
+    {
+      "epoch": 0.02188069809846314,
+      "grad_norm": 0.9904353022575378,
+      "learning_rate": 0.000415,
+      "loss": 0.2101,
+      "step": 84
+    },
+    {
+      "epoch": 0.022141182599635323,
+      "grad_norm": 1.2953742742538452,
+      "learning_rate": 0.00042,
+      "loss": 0.1131,
+      "step": 85
+    },
+    {
+      "epoch": 0.0224016671008075,
+      "grad_norm": 1.10429048538208,
+      "learning_rate": 0.000425,
+      "loss": 0.2727,
+      "step": 86
+    },
+    {
+      "epoch": 0.022662151601979683,
+      "grad_norm": 1.048660159111023,
+      "learning_rate": 0.00043,
+      "loss": 0.1082,
+      "step": 87
+    },
+    {
+      "epoch": 0.022922636103151862,
+      "grad_norm": 0.857686460018158,
+      "learning_rate": 0.000435,
+      "loss": 0.1475,
+      "step": 88
+    },
+    {
+      "epoch": 0.023183120604324044,
+      "grad_norm": 0.9353561401367188,
+      "learning_rate": 0.00044,
+      "loss": 0.2123,
+      "step": 89
+    },
+    {
+      "epoch": 0.023443605105496222,
+      "grad_norm": 1.4590015411376953,
+      "learning_rate": 0.00044500000000000003,
+      "loss": 0.2988,
+      "step": 90
+    },
+    {
+      "epoch": 0.023704089606668404,
+      "grad_norm": 0.9181132316589355,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.2321,
+      "step": 91
+    },
+    {
+      "epoch": 0.023964574107840583,
+      "grad_norm": 0.7688923478126526,
+      "learning_rate": 0.000455,
+      "loss": 0.16,
+      "step": 92
+    },
+    {
+      "epoch": 0.024225058609012765,
+      "grad_norm": 1.0974979400634766,
+      "learning_rate": 0.00046,
+      "loss": 0.2135,
+      "step": 93
+    },
+    {
+      "epoch": 0.024485543110184943,
+      "grad_norm": 1.083938717842102,
+      "learning_rate": 0.000465,
+      "loss": 0.1931,
+      "step": 94
+    },
+    {
+      "epoch": 0.024746027611357125,
+      "grad_norm": 0.5162568688392639,
+      "learning_rate": 0.00047,
+      "loss": 0.0853,
+      "step": 95
+    },
+    {
+      "epoch": 0.025006512112529303,
+      "grad_norm": 0.8454329967498779,
+      "learning_rate": 0.000475,
+      "loss": 0.1723,
+      "step": 96
+    },
+    {
+      "epoch": 0.025266996613701485,
+      "grad_norm": 0.9237842559814453,
+      "learning_rate": 0.00048,
+      "loss": 0.1858,
+      "step": 97
+    },
+    {
+      "epoch": 0.025527481114873664,
+      "grad_norm": 0.8391311168670654,
+      "learning_rate": 0.00048499999999999997,
+      "loss": 0.1558,
+      "step": 98
+    },
+    {
+      "epoch": 0.025787965616045846,
+      "grad_norm": 0.7986068725585938,
+      "learning_rate": 0.00049,
+      "loss": 0.2043,
+      "step": 99
+    },
+    {
+      "epoch": 0.026048450117218024,
+      "grad_norm": 0.3467917740345001,
+      "learning_rate": 0.000495,
+      "loss": 0.0386,
+      "step": 100
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 1000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 100,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0.0,
+  "train_batch_size": 256,
+  "trial_name": null,
+  "trial_params": null
+}

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/added_tokens.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "</tool_call>": 151658,
+  "<tool_call>": 151657,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,7 @@

+{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system
+You are a helpful assistant.<|im_end|>
+{% endif %}<|im_start|>{{ message['role'] }}
+{% if message['content'] is string %}{{ message['content'] }}<|im_end|>
+{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>
+{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant
+{% endif %}

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_processor_type": "Qwen2_5_VLImageProcessor",
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "max_pixels": 1003520,
+  "merge_size": 2,
+  "min_pixels": 3136,
+  "patch_size": 14,
+  "processor_class": "Qwen2_5_VLProcessor",
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "max_pixels": 1003520,
+    "min_pixels": 3136
+  },
+  "temporal_patch_size": 2
+}

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,208 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "processor_class": "Qwen2_5_VLProcessor",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-700/added_tokens.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "</tool_call>": 151658,
+  "<tool_call>": 151657,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-700/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,7 @@

+{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system
+You are a helpful assistant.<|im_end|>
+{% endif %}<|im_start|>{{ message['role'] }}
+{% if message['content'] is string %}{{ message['content'] }}<|im_end|>
+{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>
+{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant
+{% endif %}

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-700/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-700/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_processor_type": "Qwen2_5_VLImageProcessor",
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "max_pixels": 1003520,
+  "merge_size": 2,
+  "min_pixels": 3136,
+  "patch_size": 14,
+  "processor_class": "Qwen2_5_VLProcessor",
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "max_pixels": 1003520,
+    "min_pixels": 3136
+  },
+  "temporal_patch_size": 2
+}

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-700/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-700/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,208 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "processor_class": "Qwen2_5_VLProcessor",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-700/trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-900/added_tokens.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "</tool_call>": 151658,
+  "<tool_call>": 151657,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-900/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,7 @@

+{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system
+You are a helpful assistant.<|im_end|>
+{% endif %}<|im_start|>{{ message['role'] }}
+{% if message['content'] is string %}{{ message['content'] }}<|im_end|>
+{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>
+{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant
+{% endif %}

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-900/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_processor_type": "Qwen2_5_VLImageProcessor",
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "max_pixels": 1003520,
+  "merge_size": 2,
+  "min_pixels": 3136,
+  "patch_size": 14,
+  "processor_class": "Qwen2_5_VLProcessor",
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "max_pixels": 1003520,
+    "min_pixels": 3136
+  },
+  "temporal_patch_size": 2
+}

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-900/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-900/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,208 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "processor_class": "Qwen2_5_VLProcessor",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-900/trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_processor_type": "Qwen2_5_VLImageProcessor",
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "max_pixels": 1003520,
+  "merge_size": 2,
+  "min_pixels": 3136,
+  "patch_size": 14,
+  "processor_class": "Qwen2_5_VLProcessor",
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "max_pixels": 1003520,
+    "min_pixels": 3136
+  },
+  "temporal_patch_size": 2
+}

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,208 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "processor_class": "Qwen2_5_VLProcessor",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/train_cls.log ADDED Viewed

The diff for this file is too large to render. See raw diff

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/train_cls.log ADDED Viewed

The diff for this file is too large to render. See raw diff

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/debug-internal.log ADDED Viewed

	@@ -0,0 +1,6 @@

+{"time":"2025-12-24T07:14:07.358028809Z","level":"INFO","msg":"stream: starting","core version":"0.22.2"}
+{"time":"2025-12-24T07:14:07.535812656Z","level":"INFO","msg":"stream: created new stream","id":"autp4ou6"}
+{"time":"2025-12-24T07:14:07.535864052Z","level":"INFO","msg":"handler: started","stream_id":"autp4ou6"}
+{"time":"2025-12-24T07:14:07.535912974Z","level":"INFO","msg":"stream: started","id":"autp4ou6"}
+{"time":"2025-12-24T07:14:07.535933442Z","level":"INFO","msg":"writer: started","stream_id":"autp4ou6"}
+{"time":"2025-12-24T07:14:07.535937845Z","level":"INFO","msg":"sender: started","stream_id":"autp4ou6"}

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/debug.log ADDED Viewed

	@@ -0,0 +1,22 @@

+2025-12-24 07:14:07,146 INFO    MainThread:3703561 [wandb_setup.py:_flush():81] Current SDK version is 0.22.2
+2025-12-24 07:14:07,146 INFO    MainThread:3703561 [wandb_setup.py:_flush():81] Configure stats pid to 3703561
+2025-12-24 07:14:07,146 INFO    MainThread:3703561 [wandb_setup.py:_flush():81] Loading settings from /home/v-menggao/.config/wandb/settings
+2025-12-24 07:14:07,146 INFO    MainThread:3703561 [wandb_setup.py:_flush():81] Loading settings from /home/v-menggao/code/VLM2Vec/wandb/settings
+2025-12-24 07:14:07,146 INFO    MainThread:3703561 [wandb_setup.py:_flush():81] Loading settings from environment variables
+2025-12-24 07:14:07,146 INFO    MainThread:3703561 [wandb_init.py:setup_run_log_directory():705] Logging user logs to /home/v-menggao/code/VLM2Vec/experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/run-20251224_071407-autp4ou6/logs/debug.log
+2025-12-24 07:14:07,146 INFO    MainThread:3703561 [wandb_init.py:setup_run_log_directory():706] Logging internal logs to /home/v-menggao/code/VLM2Vec/experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/run-20251224_071407-autp4ou6/logs/debug-internal.log
+2025-12-24 07:14:07,146 INFO    MainThread:3703561 [wandb_init.py:init():832] calling init triggers
+2025-12-24 07:14:07,146 INFO    MainThread:3703561 [wandb_init.py:init():837] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2025-12-24 07:14:07,146 INFO    MainThread:3703561 [wandb_init.py:init():880] starting backend
+2025-12-24 07:14:07,352 INFO    MainThread:3703561 [wandb_init.py:init():883] sending inform_init request
+2025-12-24 07:14:07,356 INFO    MainThread:3703561 [wandb_init.py:init():891] backend started and connected
+2025-12-24 07:14:07,358 INFO    MainThread:3703561 [wandb_init.py:init():961] updated telemetry
+2025-12-24 07:14:07,363 INFO    MainThread:3703561 [wandb_init.py:init():985] communicating run to backend with 90.0 second timeout
+2025-12-24 07:14:07,809 INFO    MainThread:3703561 [wandb_init.py:init():1036] starting run threads in backend
+2025-12-24 07:14:07,912 INFO    MainThread:3703561 [wandb_run.py:_console_start():2509] atexit reg
+2025-12-24 07:14:07,912 INFO    MainThread:3703561 [wandb_run.py:_redirect():2357] redirect: wrap_raw
+2025-12-24 07:14:07,912 INFO    MainThread:3703561 [wandb_run.py:_redirect():2426] Wrapping output streams.
+2025-12-24 07:14:07,912 INFO    MainThread:3703561 [wandb_run.py:_redirect():2449] Redirects installed.
+2025-12-24 07:14:07,915 INFO    MainThread:3703561 [wandb_init.py:init():1076] run started, returning control to user process
+2025-12-24 07:14:07,916 INFO    MainThread:3703561 [wandb_run.py:_config_callback():1392] config_cb None None {'output_dir': '/home/v-menggao/code/VLM2Vec/experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'eval_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 128, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 0.0005, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': 1000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 100, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/home/v-menggao/code/VLM2Vec/experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/runs/Dec24_07-04-56_GCRAZGDL1688', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 1, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': True, 'eval_steps': None, 'dataloader_num_workers': 1, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new', 'disable_tqdm': False, 'remove_unused_columns': False, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': True, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'image_encoder_freeze': False, 'resume_from': 'none', 'project_name': None, 'grad_cache': False, 'gc_q_chunk_size': 2, 'gc_p_chunk_size': 2, 'interleave_stopping_strategy': 'all_exhausted', 'interleave_batch_size': 0}

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/run-20251224_071407-autp4ou6/files/output.log ADDED Viewed

	@@ -0,0 +1,96 @@

+  0%|          | 2/1000 [00:16<2:09:56,  7.81s/it]
+[DBG][env] AOP_ENABLED=1 APPLY=both LAYER=12 SELECTION=attention KEEP_T=0.5 KEEP_V=1 VPOOL_ENABLED=1 VPOOL_LAYER=1
+[RANK][mid] top1=0.78% top5=6.25% top10=10.16% dist={'min': 1, 'p25': 34, 'med': 59, 'p75': 93, 'max': 127}
+[RANK][last] top1=10.94% top5=15.62% top10=21.09% dist={'min': 1, 'p25': 16, 'med': 56, 'p75': 93, 'max': 118}
+[WARN] last layer top1 < 40%. 建议先 AOP_ENABLED=0/VPOOL_ENABLED=0 进行对照，确认基座检索能力。
+[Probe Step 0] Loss: 0.6135
+  - Pred Probs (need_last=1): mean=0.4198, std=0.4385
+  - Labels: need_last=0.1016, safe=0.8984
+  - mid_hit: 0.78%, last_hit: 10.94%
+  - both_correct: 0.78%, both_wrong: 89.06%
+[Gradient Check After Backward - Step 0]
+  - Total Grad Norm: 2.564083
+  - Has Gradient: True
+{'loss': 0.6135, 'grad_norm': 2.5640830993652344, 'learning_rate': 0.0, 'epoch': 0.0}
+[DBG][env] AOP_ENABLED=1 APPLY=both LAYER=12 SELECTION=attention KEEP_T=0.5 KEEP_V=1 VPOOL_ENABLED=1 VPOOL_LAYER=1
+[RANK][mid] top1=1.56% top5=5.47% top10=9.38% dist={'min': 1, 'p25': 31, 'med': 63, 'p75': 101, 'max': 128}
+[RANK][last] top1=11.72% top5=19.53% top10=23.44% dist={'min': 1, 'p25': 13, 'med': 48, 'p75': 87, 'max': 128}
+[WARN] last layer top1 < 40%. 建议先 AOP_ENABLED=0/VPOOL_ENABLED=0 进行对照，确认基座检索能力。
+[Probe Step 1] Loss: 0.8630
+  - Pred Probs (need_last=1): mean=0.3642, std=0.4361
+  - Labels: need_last=0.1016, safe=0.8984
+  - mid_hit: 1.56%, last_hit: 11.72%
+  - both_correct: 1.56%, both_wrong: 88.28%
+[Gradient Check After Backward - Step 1]
+  - Total Grad Norm: 2.883063
+  - Has Gradient: True
+{'loss': 0.863, 'grad_norm': 2.8830628395080566, 'learning_rate': 5e-06, 'epoch': 0.0}
+[DBG][env] AOP_ENABLED=1 APPLY=both LAYER=12 SELECTION=attention KEEP_T=0.5 KEEP_V=1 VPOOL_ENABLED=1 VPOOL_LAYER=1
+[RANK][mid] top1=0.00% top5=7.03% top10=11.72% dist={'min': 2, 'p25': 28, 'med': 55, 'p75': 87, 'max': 127}
+[RANK][last] top1=7.03% top5=14.06% top10=18.75% dist={'min': 1, 'p25': 15, 'med': 42, 'p75': 69, 'max': 125}
+[WARN] last layer top1 < 40%. 建议先 AOP_ENABLED=0/VPOOL_ENABLED=0 进行对照，确认基座检索能力。
+[Probe Step 2] Loss: 0.4115
+  - Pred Probs (need_last=1): mean=0.4447, std=0.4333
+  - Labels: need_last=0.0703, safe=0.9297
+  - mid_hit: 0.00%, last_hit: 7.03%
+  - both_correct: 0.00%, both_wrong: 92.97%
+[Gradient Check After Backward - Step 2]
+  - Total Grad Norm: 4.093573
+  - Has Gradient: True
+{'loss': 0.4115, 'grad_norm': 4.093572616577148, 'learning_rate': 1e-05, 'epoch': 0.0}
+[Probe Step 3] Loss: 0.6938
+  - Pred Probs (need_last=1): mean=0.3258, std=0.4012
+  - Labels: need_last=0.1016, safe=0.8984
+  - mid_hit: 0.78%, last_hit: 10.94%
+  - both_correct: 0.78%, both_wrong: 89.06%
+{'loss': 0.6938, 'grad_norm': 2.6786575317382812, 'learning_rate': 1.5e-05, 'epoch': 0.0}
+[Probe Step 4] Loss: 0.5551
+  - Pred Probs (need_last=1): mean=0.3782, std=0.4288
+  - Labels: need_last=0.0703, safe=0.9297
+  - mid_hit: 1.56%, last_hit: 8.59%
+  - both_correct: 1.56%, both_wrong: 91.41%
+{'loss': 0.5551, 'grad_norm': 2.923421621322632, 'learning_rate': 2e-05, 'epoch': 0.0}
+[Probe Step 5] Loss: 0.3958
+  - Pred Probs (need_last=1): mean=0.3890, std=0.4212
+  - Labels: need_last=0.0625, safe=0.9375
+  - mid_hit: 1.56%, last_hit: 6.25%
+  - both_correct: 0.00%, both_wrong: 92.19%
+{'loss': 0.3958, 'grad_norm': 3.3633787631988525, 'learning_rate': 2.5e-05, 'epoch': 0.0}
+[Probe Step 6] Loss: 0.4503
+  - Pred Probs (need_last=1): mean=0.3575, std=0.3862
+  - Labels: need_last=0.1406, safe=0.8594
+  - mid_hit: 0.78%, last_hit: 14.84%
+  - both_correct: 0.78%, both_wrong: 85.16%
+{'loss': 0.4503, 'grad_norm': 2.4892380237579346, 'learning_rate': 3e-05, 'epoch': 0.0}
+[Probe Step 7] Loss: 2.2221
+  - Pred Probs (need_last=1): mean=0.2568, std=0.3742
+  - Labels: need_last=0.5547, safe=0.4453
+  - mid_hit: 40.62%, last_hit: 95.31%
+  - both_correct: 39.84%, both_wrong: 3.91%
+{'loss': 2.2221, 'grad_norm': 19.515727996826172, 'learning_rate': 3.5000000000000004e-05, 'epoch': 0.0}
+[Probe Step 8] Loss: 0.3645
+  - Pred Probs (need_last=1): mean=0.3536, std=0.4082
+  - Labels: need_last=0.0547, safe=0.9453
+  - mid_hit: 1.56%, last_hit: 7.03%
+  - both_correct: 1.56%, both_wrong: 92.97%
+{'loss': 0.3645, 'grad_norm': 2.7149243354797363, 'learning_rate': 4e-05, 'epoch': 0.0}
+[Probe Step 9] Loss: 0.4055
+  - Pred Probs (need_last=1): mean=0.3187, std=0.3982
+  - Labels: need_last=0.0859, safe=0.9141
+  - mid_hit: 0.78%, last_hit: 9.38%
+  - both_correct: 0.78%, both_wrong: 90.62%
+{'loss': 0.4055, 'grad_norm': 2.0654330253601074, 'learning_rate': 4.4999999999999996e-05, 'epoch': 0.0}
+{'loss': 0.23, 'grad_norm': 2.538350820541382, 'learning_rate': 5e-05, 'epoch': 0.0}

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/run-20251224_071407-autp4ou6/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,147 @@

+dill==0.3.8
+ray==2.50.0
+torchvision==0.23.0
+nvidia-cublas-cu12==12.8.4.1
+pandas==2.3.3
+nvidia-cusparse-cu12==12.5.8.93
+stack_data==0.6.3
+widgetsnbextension==4.0.15
+huggingface-hub==0.35.3
+nest_asyncio==1.6.0
+pydantic==2.12.0
+tifffile==2025.5.10
+threadpoolctl==3.6.0
+ninja==1.13.0
+imgkit==1.2.3
+gitdb==4.0.12
+regex==2025.9.18
+requests==2.32.5
+propcache==0.4.1
+nvidia-cufft-cu12==11.3.3.83
+ipython==8.37.0
+aiohappyeyeballs==2.6.1
+exceptiongroup==1.3.0
+tzdata==2025.2
+pyarrow==21.0.0
+av==15.1.0
+pure_eval==0.2.3
+contourpy==1.3.2
+nvidia-cudnn-cu12==9.10.2.21
+datasets==3.3.0
+sentry-sdk==2.41.0
+platformdirs==4.5.0
+charset-normalizer==3.4.3
+ipykernel==7.1.0
+tokenizers==0.21.4
+traitlets==5.14.3
+accelerate==1.10.1
+typing-inspection==0.4.2
+pillow==11.3.0
+lazy_loader==0.4
+multidict==6.7.0
+packaging==25.0
+sympy==1.14.0
+asttokens==3.0.0
+scikit-learn==1.7.2
+py-cpuinfo==9.0.0
+Pygments==2.19.2
+aiohttp==3.13.0
+PyYAML==6.0.3
+Markdown==3.10
+matplotlib-inline==0.2.1
+Jinja2==3.1.6
+torch==2.8.0
+setuptools==80.9.0
+nvidia-cusolver-cu12==11.7.3.90
+GitPython==3.1.45
+MarkupSafe==3.0.3
+importlib_metadata==8.7.0
+tornado==6.5.1
+opencv-contrib-python==4.11.0.86
+qwen-vl-utils==0.0.8
+six==1.17.0
+frozenlist==1.8.0
+hjson==3.1.0
+networkx==3.4.2
+comm==0.2.3
+referencing==0.36.2
+wcwidth==0.2.14
+nvidia-nvtx-cu12==12.8.90
+protobuf==6.32.1
+pip==25.2
+flash_attn==2.8.3
+pexpect==4.9.0
+hnswlib==0.8.0
+nvidia-cuda-nvrtc-cu12==12.8.93
+pytz==2025.2
+pyparsing==3.2.5
+wrapt==1.17.3
+click==8.2.1
+hf-xet==1.1.10
+parso==0.8.5
+nvidia-curand-cu12==10.3.9.90
+typing_extensions==4.15.0
+triton==3.4.0
+nvidia-cufile-cu12==1.13.1.3
+idna==3.10
+nvidia-cusparselt-cu12==0.7.1
+jupyter_client==8.6.3
+einops==0.8.1
+scikit-image==0.25.2
+decord==0.6.0
+smmap==5.0.2
+urllib3==2.5.0
+psutil==5.9.1
+psutil==7.1.0
+imageio==2.37.0
+peft==0.17.1
+seaborn==0.13.2
+safetensors==0.6.2
+timm==1.0.20
+certifi==2025.10.5
+ipywidgets==8.1.8
+xxhash==3.6.0
+jsonschema-specifications==2025.9.1
+attrs==25.4.0
+jsonschema==4.25.1
+filelock==3.20.0
+ptyprocess==0.7.0
+debugpy==1.8.16
+jedi==0.19.2
+prompt_toolkit==3.0.52
+nvidia-cuda-runtime-cu12==12.8.90
+jupyterlab_widgets==3.0.16
+pyzmq==27.1.0
+executing==2.2.1
+cycler==0.12.1
+nvidia-cuda-cupti-cu12==12.8.90
+wandb==0.22.2
+opencv-python==4.11.0.86
+fsspec==2024.12.0
+async-timeout==5.0.1
+mpmath==1.3.0
+pydantic_core==2.41.1
+tqdm==4.67.1
+annotated-types==0.7.0
+transformers==4.52.3
+decorator==5.2.1
+zipp==3.23.0
+wheel==0.45.1
+numpy==1.26.4
+multiprocess==0.70.16
+pytrec_eval==0.5
+yarl==1.22.0
+aiosignal==1.4.0
+sentencepiece==0.2.1
+scipy==1.15.3
+matplotlib==3.10.7
+msgpack==1.1.2
+joblib==1.5.2
+fonttools==4.60.1
+rpds-py==0.27.1
+kiwisolver==1.4.9
+nvidia-nccl-cu12==2.27.3
+pickleshare==0.7.5
+jupyter_core==5.9.1
+python-dateutil==2.9.0.post0
+nvidia-nvjitlink-cu12==12.8.93

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/run-20251224_071407-autp4ou6/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,83 @@

+{
+  "os": "Linux-6.14.0-1014-azure-x86_64-with-glibc2.39",
+  "python": "CPython 3.10.18",
+  "startedAt": "2025-12-24T07:14:07.144808Z",
+  "args": [
+    "--model_name",
+    "Qwen/Qwen2.5-VL-3B-Instruct",
+    "--checkpoint_path",
+    "/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4/checkpoint-5000",
+    "--bf16",
+    "--pooling",
+    "eos",
+    "--normalize",
+    "True",
+    "--temperature",
+    "0.02",
+    "--dataloader_num_workers",
+    "1",
+    "--dataset_config",
+    "/home/v-menggao/code/VLM2Vec/experiments/public/train/train_image.yaml",
+    "--run_name",
+    "Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new",
+    "--output_dir",
+    "/home/v-menggao/code/VLM2Vec/experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new",
+    "--per_device_train_batch_size",
+    "128",
+    "--dataloader_drop_last",
+    "True",
+    "--learning_rate",
+    "5e-4",
+    "--max_grad_norm",
+    "1.0",
+    "--max_steps",
+    "1000",
+    "--warmup_steps",
+    "100",
+    "--save_steps",
+    "100",
+    "--logging_steps",
+    "1",
+    "--save_safetensors",
+    "True",
+    "--remove_unused_columns",
+    "False",
+    "--report_to",
+    "wandb"
+  ],
+  "program": "/home/v-menggao/code/VLM2Vec/train_early_exit_AOP_pooling_new.py",
+  "codePath": "train_early_exit_AOP_pooling_new.py",
+  "codePathLocal": "train_early_exit_AOP_pooling_new.py",
+  "git": {
+    "remote": "https://github.com/GaoMengGladys/Code.git",
+    "commit": "108c5660f7ad8147a02f61f938da28accbf76578"
+  },
+  "email": "2646894013@qq.com",
+  "root": "/home/v-menggao/code/VLM2Vec/experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new",
+  "host": "GCRAZGDL1688",
+  "executable": "/home/v-menggao/miniconda3/envs/VLMtoVec/bin/python3.10",
+  "cpu_count": 24,
+  "cpu_count_logical": 24,
+  "gpu": "NVIDIA A100 80GB PCIe",
+  "gpu_count": 1,
+  "disk": {
+    "/": {
+      "total": "1063956480000",
+      "used": "859166535680"
+    }
+  },
+  "memory": {
+    "total": "232208756736"
+  },
+  "gpu_nvidia": [
+    {
+      "name": "NVIDIA A100 80GB PCIe",
+      "memoryTotal": "85899345920",
+      "cudaCores": 6912,
+      "architecture": "Ampere",
+      "uuid": "GPU-67f1e796-8888-cac0-c0cf-8d69ebc02416"
+    }
+  ],
+  "cudaVersion": "13.0",
+  "writerId": "szqm7qad1kv1te65zdsv0jerv7e0jmdv"
+}

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/run-20251224_071407-autp4ou6/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,7 @@

+{"time":"2025-12-24T07:14:07.162478266Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpqjzxgwgh/port-3703561.txt","pid":3703561,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-12-24T07:14:07.163050755Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":3703561}
+{"time":"2025-12-24T07:14:07.163022417Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-3703561-3723985-3254824786/socket","Net":"unix"}}
+{"time":"2025-12-24T07:14:07.352777492Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-12-24T07:14:07.357943239Z","level":"INFO","msg":"handleInformInit: received","streamId":"autp4ou6","id":"1(@)"}
+{"time":"2025-12-24T07:14:07.535919206Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"autp4ou6","id":"1(@)"}
+{"time":"2025-12-24T07:14:58.627974022Z","level":"INFO","msg":"server: parent process exited, terminating service process"}

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/run-20251224_071407-autp4ou6/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,6 @@

+{"time":"2025-12-24T07:14:07.358028809Z","level":"INFO","msg":"stream: starting","core version":"0.22.2"}
+{"time":"2025-12-24T07:14:07.535812656Z","level":"INFO","msg":"stream: created new stream","id":"autp4ou6"}
+{"time":"2025-12-24T07:14:07.535864052Z","level":"INFO","msg":"handler: started","stream_id":"autp4ou6"}
+{"time":"2025-12-24T07:14:07.535912974Z","level":"INFO","msg":"stream: started","id":"autp4ou6"}
+{"time":"2025-12-24T07:14:07.535933442Z","level":"INFO","msg":"writer: started","stream_id":"autp4ou6"}
+{"time":"2025-12-24T07:14:07.535937845Z","level":"INFO","msg":"sender: started","stream_id":"autp4ou6"}

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/run-20251224_071407-autp4ou6/logs/debug.log ADDED Viewed

	@@ -0,0 +1,22 @@

+2025-12-24 07:14:07,146 INFO    MainThread:3703561 [wandb_setup.py:_flush():81] Current SDK version is 0.22.2
+2025-12-24 07:14:07,146 INFO    MainThread:3703561 [wandb_setup.py:_flush():81] Configure stats pid to 3703561
+2025-12-24 07:14:07,146 INFO    MainThread:3703561 [wandb_setup.py:_flush():81] Loading settings from /home/v-menggao/.config/wandb/settings
+2025-12-24 07:14:07,146 INFO    MainThread:3703561 [wandb_setup.py:_flush():81] Loading settings from /home/v-menggao/code/VLM2Vec/wandb/settings
+2025-12-24 07:14:07,146 INFO    MainThread:3703561 [wandb_setup.py:_flush():81] Loading settings from environment variables
+2025-12-24 07:14:07,146 INFO    MainThread:3703561 [wandb_init.py:setup_run_log_directory():705] Logging user logs to /home/v-menggao/code/VLM2Vec/experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/run-20251224_071407-autp4ou6/logs/debug.log
+2025-12-24 07:14:07,146 INFO    MainThread:3703561 [wandb_init.py:setup_run_log_directory():706] Logging internal logs to /home/v-menggao/code/VLM2Vec/experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/run-20251224_071407-autp4ou6/logs/debug-internal.log
+2025-12-24 07:14:07,146 INFO    MainThread:3703561 [wandb_init.py:init():832] calling init triggers
+2025-12-24 07:14:07,146 INFO    MainThread:3703561 [wandb_init.py:init():837] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2025-12-24 07:14:07,146 INFO    MainThread:3703561 [wandb_init.py:init():880] starting backend
+2025-12-24 07:14:07,352 INFO    MainThread:3703561 [wandb_init.py:init():883] sending inform_init request
+2025-12-24 07:14:07,356 INFO    MainThread:3703561 [wandb_init.py:init():891] backend started and connected
+2025-12-24 07:14:07,358 INFO    MainThread:3703561 [wandb_init.py:init():961] updated telemetry
+2025-12-24 07:14:07,363 INFO    MainThread:3703561 [wandb_init.py:init():985] communicating run to backend with 90.0 second timeout
+2025-12-24 07:14:07,809 INFO    MainThread:3703561 [wandb_init.py:init():1036] starting run threads in backend
+2025-12-24 07:14:07,912 INFO    MainThread:3703561 [wandb_run.py:_console_start():2509] atexit reg
+2025-12-24 07:14:07,912 INFO    MainThread:3703561 [wandb_run.py:_redirect():2357] redirect: wrap_raw
+2025-12-24 07:14:07,912 INFO    MainThread:3703561 [wandb_run.py:_redirect():2426] Wrapping output streams.
+2025-12-24 07:14:07,912 INFO    MainThread:3703561 [wandb_run.py:_redirect():2449] Redirects installed.
+2025-12-24 07:14:07,915 INFO    MainThread:3703561 [wandb_init.py:init():1076] run started, returning control to user process
+2025-12-24 07:14:07,916 INFO    MainThread:3703561 [wandb_run.py:_config_callback():1392] config_cb None None {'output_dir': '/home/v-menggao/code/VLM2Vec/experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'eval_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 128, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 0.0005, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': 1000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 100, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/home/v-menggao/code/VLM2Vec/experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/runs/Dec24_07-04-56_GCRAZGDL1688', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 1, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': True, 'eval_steps': None, 'dataloader_num_workers': 1, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new', 'disable_tqdm': False, 'remove_unused_columns': False, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': True, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'image_encoder_freeze': False, 'resume_from': 'none', 'project_name': None, 'grad_cache': False, 'gc_q_chunk_size': 2, 'gc_p_chunk_size': 2, 'interleave_stopping_strategy': 'all_exhausted', 'interleave_batch_size': 0}

experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/run-20251224_071407-autp4ou6/run-autp4ou6.wandb ADDED Viewed

Binary file (32.8 kB). View file

experiments/examples/llava_next/demo.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from src.model import MMEBModel
+from src.arguments import ModelArguments
+from src.utils import load_processor
+import torch
+from transformers import HfArgumentParser, AutoProcessor
+from PIL import Image
+import numpy as np
+model_args = ModelArguments(
+    model_name='TIGER-Lab/VLM2Vec-LLaVa-Next',
+    pooling='last',
+    normalize=True,
+    model_backbone='llava_next')
+processor = load_processor(model_args)
+model = MMEBModel.load(model_args, is_trainable=False)
+model.eval()
+model = model.to('cuda', dtype=torch.bfloat16)
+# Image + Text -> Text
+inputs = processor(text='<image> Represent the given image with the following question: What is in the image',
+                   images=Image.open('figures/example.jpg'),
+                   return_tensors="pt")
+inputs = {key: value.to('cuda') for key, value in inputs.items()}
+qry_output = model(qry=inputs)["qry_reps"]
+string = 'A cat and a dog'
+inputs = processor(text=string,
+                   images=None,
+                   return_tensors="pt")
+inputs = {key: value.to('cuda') for key, value in inputs.items()}
+tgt_output = model(tgt=inputs)["tgt_reps"]
+print(string, '=', model.compute_similarity(qry_output, tgt_output))
+## A cat and a dog = tensor([[0.4414]], device='cuda:0', dtype=torch.bfloat16)
+string = 'A cat and a tiger'
+inputs = processor(text=string,
+                   images=None,
+                   return_tensors="pt")
+inputs = {key: value.to('cuda') for key, value in inputs.items()}
+tgt_output = model(tgt=inputs)["tgt_reps"]
+print(string, '=', model.compute_similarity(qry_output, tgt_output))
+## A cat and a tiger = tensor([[0.3555]], device='cuda:0', dtype=torch.bfloat16)

experiments/examples/llava_next/run_eval.sh ADDED Viewed

	@@ -0,0 +1,12 @@

+export PYTHONPATH=../VLM2Vec/:$PYTHONPATH
+CUDA_VISIBLE_DEVICES=0 python eval.py \
+  --model_name TIGER-Lab/VLM2Vec-LLaVA-v1.6-LoRA \
+  --image_dir DATA_DIR/MMEB_test/MMEB_Test_1K_New/images/ \
+  --encode_output_path OUTPUT_DIR/MMEB_eval/VLM2Vec-Full/ \
+  --pooling eos --normalize True \
+  --dataset_name TIGER-Lab/MMEB-eval \
+  --dataset_split test \
+  --subset_name N24News ImageNet-A ImageNet-R WebQA GQA Visual7W \
+  --image_resolution high \
+  --per_device_eval_batch_size 64