MgGladys commited on
Commit
43feefe
·
verified ·
1 Parent(s): 2acd7e7

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. assets/example.jpg +0 -0
  2. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/added_tokens.json +24 -0
  3. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/chat_template.jinja +7 -0
  4. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/added_tokens.json +24 -0
  5. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/chat_template.jinja +7 -0
  6. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/merges.txt +0 -0
  7. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/preprocessor_config.json +29 -0
  8. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/special_tokens_map.json +31 -0
  9. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/tokenizer_config.json +208 -0
  10. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/trainer_state.json +734 -0
  11. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/vocab.json +0 -0
  12. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/added_tokens.json +24 -0
  13. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/chat_template.jinja +7 -0
  14. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/merges.txt +0 -0
  15. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/preprocessor_config.json +29 -0
  16. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/special_tokens_map.json +31 -0
  17. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/tokenizer_config.json +208 -0
  18. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/trainer_state.json +0 -0
  19. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/vocab.json +0 -0
  20. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-700/added_tokens.json +24 -0
  21. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-700/chat_template.jinja +7 -0
  22. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-700/merges.txt +0 -0
  23. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-700/preprocessor_config.json +29 -0
  24. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-700/special_tokens_map.json +31 -0
  25. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-700/tokenizer_config.json +208 -0
  26. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-700/trainer_state.json +0 -0
  27. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-900/added_tokens.json +24 -0
  28. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-900/chat_template.jinja +7 -0
  29. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-900/preprocessor_config.json +29 -0
  30. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-900/special_tokens_map.json +31 -0
  31. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-900/tokenizer_config.json +208 -0
  32. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-900/trainer_state.json +0 -0
  33. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/merges.txt +0 -0
  34. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/preprocessor_config.json +29 -0
  35. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/special_tokens_map.json +31 -0
  36. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/tokenizer_config.json +208 -0
  37. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/train_cls.log +0 -0
  38. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/vocab.json +0 -0
  39. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/train_cls.log +0 -0
  40. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/debug-internal.log +6 -0
  41. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/debug.log +22 -0
  42. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/run-20251224_071407-autp4ou6/files/output.log +96 -0
  43. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/run-20251224_071407-autp4ou6/files/requirements.txt +147 -0
  44. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/run-20251224_071407-autp4ou6/files/wandb-metadata.json +83 -0
  45. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/run-20251224_071407-autp4ou6/logs/debug-core.log +7 -0
  46. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/run-20251224_071407-autp4ou6/logs/debug-internal.log +6 -0
  47. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/run-20251224_071407-autp4ou6/logs/debug.log +22 -0
  48. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/run-20251224_071407-autp4ou6/run-autp4ou6.wandb +0 -0
  49. experiments/examples/llava_next/demo.py +46 -0
  50. experiments/examples/llava_next/run_eval.sh +12 -0
assets/example.jpg ADDED
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/chat_template.jinja ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system
2
+ You are a helpful assistant.<|im_end|>
3
+ {% endif %}<|im_start|>{{ message['role'] }}
4
+ {% if message['content'] is string %}{{ message['content'] }}<|im_end|>
5
+ {% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>
6
+ {% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant
7
+ {% endif %}
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/chat_template.jinja ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system
2
+ You are a helpful assistant.<|im_end|>
3
+ {% endif %}<|im_start|>{{ message['role'] }}
4
+ {% if message['content'] is string %}{{ message['content'] }}<|im_end|>
5
+ {% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>
6
+ {% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant
7
+ {% endif %}
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": true,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.48145466,
8
+ 0.4578275,
9
+ 0.40821073
10
+ ],
11
+ "image_processor_type": "Qwen2_5_VLImageProcessor",
12
+ "image_std": [
13
+ 0.26862954,
14
+ 0.26130258,
15
+ 0.27577711
16
+ ],
17
+ "max_pixels": 1003520,
18
+ "merge_size": 2,
19
+ "min_pixels": 3136,
20
+ "patch_size": 14,
21
+ "processor_class": "Qwen2_5_VLProcessor",
22
+ "resample": 3,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "size": {
25
+ "max_pixels": 1003520,
26
+ "min_pixels": 3136
27
+ },
28
+ "temporal_patch_size": 2
29
+ }
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/tokenizer_config.json ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": null,
198
+ "clean_up_tokenization_spaces": false,
199
+ "eos_token": "<|im_end|>",
200
+ "errors": "replace",
201
+ "extra_special_tokens": {},
202
+ "model_max_length": 131072,
203
+ "pad_token": "<|endoftext|>",
204
+ "processor_class": "Qwen2_5_VLProcessor",
205
+ "split_special_tokens": false,
206
+ "tokenizer_class": "Qwen2Tokenizer",
207
+ "unk_token": null
208
+ }
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/trainer_state.json ADDED
@@ -0,0 +1,734 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.026048450117218024,
6
+ "eval_steps": 500,
7
+ "global_step": 100,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.00026048450117218026,
14
+ "grad_norm": 6.6370320320129395,
15
+ "learning_rate": 0.0,
16
+ "loss": 1.5389,
17
+ "step": 1
18
+ },
19
+ {
20
+ "epoch": 0.0005209690023443605,
21
+ "grad_norm": 7.1702704429626465,
22
+ "learning_rate": 5e-06,
23
+ "loss": 1.3234,
24
+ "step": 2
25
+ },
26
+ {
27
+ "epoch": 0.0007814535035165407,
28
+ "grad_norm": 8.348443984985352,
29
+ "learning_rate": 1e-05,
30
+ "loss": 1.2501,
31
+ "step": 3
32
+ },
33
+ {
34
+ "epoch": 0.001041938004688721,
35
+ "grad_norm": 7.599966526031494,
36
+ "learning_rate": 1.5e-05,
37
+ "loss": 1.4676,
38
+ "step": 4
39
+ },
40
+ {
41
+ "epoch": 0.0013024225058609013,
42
+ "grad_norm": 5.925275802612305,
43
+ "learning_rate": 2e-05,
44
+ "loss": 1.1892,
45
+ "step": 5
46
+ },
47
+ {
48
+ "epoch": 0.0015629070070330815,
49
+ "grad_norm": 7.288003921508789,
50
+ "learning_rate": 2.5e-05,
51
+ "loss": 1.492,
52
+ "step": 6
53
+ },
54
+ {
55
+ "epoch": 0.0018233915082052619,
56
+ "grad_norm": 7.2629218101501465,
57
+ "learning_rate": 3e-05,
58
+ "loss": 1.3525,
59
+ "step": 7
60
+ },
61
+ {
62
+ "epoch": 0.002083876009377442,
63
+ "grad_norm": 9.106829643249512,
64
+ "learning_rate": 3.5000000000000004e-05,
65
+ "loss": 1.6033,
66
+ "step": 8
67
+ },
68
+ {
69
+ "epoch": 0.0023443605105496223,
70
+ "grad_norm": 8.875594139099121,
71
+ "learning_rate": 4e-05,
72
+ "loss": 1.5238,
73
+ "step": 9
74
+ },
75
+ {
76
+ "epoch": 0.0026048450117218025,
77
+ "grad_norm": 7.383709907531738,
78
+ "learning_rate": 4.4999999999999996e-05,
79
+ "loss": 1.4859,
80
+ "step": 10
81
+ },
82
+ {
83
+ "epoch": 0.0028653295128939827,
84
+ "grad_norm": 7.463179111480713,
85
+ "learning_rate": 5e-05,
86
+ "loss": 1.3504,
87
+ "step": 11
88
+ },
89
+ {
90
+ "epoch": 0.003125814014066163,
91
+ "grad_norm": 7.137135028839111,
92
+ "learning_rate": 5.5e-05,
93
+ "loss": 1.358,
94
+ "step": 12
95
+ },
96
+ {
97
+ "epoch": 0.003386298515238343,
98
+ "grad_norm": 5.68809175491333,
99
+ "learning_rate": 6e-05,
100
+ "loss": 1.2017,
101
+ "step": 13
102
+ },
103
+ {
104
+ "epoch": 0.0036467830164105238,
105
+ "grad_norm": 6.024169921875,
106
+ "learning_rate": 6.500000000000001e-05,
107
+ "loss": 1.1491,
108
+ "step": 14
109
+ },
110
+ {
111
+ "epoch": 0.003907267517582704,
112
+ "grad_norm": 5.510103225708008,
113
+ "learning_rate": 7.000000000000001e-05,
114
+ "loss": 1.1809,
115
+ "step": 15
116
+ },
117
+ {
118
+ "epoch": 0.004167752018754884,
119
+ "grad_norm": 6.086293697357178,
120
+ "learning_rate": 7.5e-05,
121
+ "loss": 1.2069,
122
+ "step": 16
123
+ },
124
+ {
125
+ "epoch": 0.004428236519927064,
126
+ "grad_norm": 5.8847551345825195,
127
+ "learning_rate": 8e-05,
128
+ "loss": 1.2933,
129
+ "step": 17
130
+ },
131
+ {
132
+ "epoch": 0.004688721021099245,
133
+ "grad_norm": 5.263647079467773,
134
+ "learning_rate": 8.5e-05,
135
+ "loss": 1.0476,
136
+ "step": 18
137
+ },
138
+ {
139
+ "epoch": 0.004949205522271425,
140
+ "grad_norm": 5.684865951538086,
141
+ "learning_rate": 8.999999999999999e-05,
142
+ "loss": 1.1442,
143
+ "step": 19
144
+ },
145
+ {
146
+ "epoch": 0.005209690023443605,
147
+ "grad_norm": 4.671970844268799,
148
+ "learning_rate": 9.5e-05,
149
+ "loss": 1.0422,
150
+ "step": 20
151
+ },
152
+ {
153
+ "epoch": 0.005470174524615785,
154
+ "grad_norm": 7.935784816741943,
155
+ "learning_rate": 0.0001,
156
+ "loss": 1.1025,
157
+ "step": 21
158
+ },
159
+ {
160
+ "epoch": 0.0057306590257879654,
161
+ "grad_norm": 4.634947299957275,
162
+ "learning_rate": 0.000105,
163
+ "loss": 0.9849,
164
+ "step": 22
165
+ },
166
+ {
167
+ "epoch": 0.005991143526960146,
168
+ "grad_norm": 4.8161516189575195,
169
+ "learning_rate": 0.00011,
170
+ "loss": 0.9843,
171
+ "step": 23
172
+ },
173
+ {
174
+ "epoch": 0.006251628028132326,
175
+ "grad_norm": 4.3339762687683105,
176
+ "learning_rate": 0.000115,
177
+ "loss": 0.8714,
178
+ "step": 24
179
+ },
180
+ {
181
+ "epoch": 0.006512112529304506,
182
+ "grad_norm": 3.4047181606292725,
183
+ "learning_rate": 0.00012,
184
+ "loss": 0.8898,
185
+ "step": 25
186
+ },
187
+ {
188
+ "epoch": 0.006772597030476686,
189
+ "grad_norm": 4.15224552154541,
190
+ "learning_rate": 0.000125,
191
+ "loss": 1.0079,
192
+ "step": 26
193
+ },
194
+ {
195
+ "epoch": 0.0070330815316488665,
196
+ "grad_norm": 3.5006914138793945,
197
+ "learning_rate": 0.00013000000000000002,
198
+ "loss": 0.8013,
199
+ "step": 27
200
+ },
201
+ {
202
+ "epoch": 0.0072935660328210476,
203
+ "grad_norm": 2.773101806640625,
204
+ "learning_rate": 0.000135,
205
+ "loss": 0.7086,
206
+ "step": 28
207
+ },
208
+ {
209
+ "epoch": 0.007554050533993228,
210
+ "grad_norm": 2.4410135746002197,
211
+ "learning_rate": 0.00014000000000000001,
212
+ "loss": 0.7198,
213
+ "step": 29
214
+ },
215
+ {
216
+ "epoch": 0.007814535035165408,
217
+ "grad_norm": 2.5674309730529785,
218
+ "learning_rate": 0.000145,
219
+ "loss": 0.6359,
220
+ "step": 30
221
+ },
222
+ {
223
+ "epoch": 0.008075019536337588,
224
+ "grad_norm": 2.310837984085083,
225
+ "learning_rate": 0.00015,
226
+ "loss": 0.6039,
227
+ "step": 31
228
+ },
229
+ {
230
+ "epoch": 0.008335504037509768,
231
+ "grad_norm": 2.4884161949157715,
232
+ "learning_rate": 0.000155,
233
+ "loss": 0.8962,
234
+ "step": 32
235
+ },
236
+ {
237
+ "epoch": 0.008595988538681949,
238
+ "grad_norm": 5.428861141204834,
239
+ "learning_rate": 0.00016,
240
+ "loss": 0.5576,
241
+ "step": 33
242
+ },
243
+ {
244
+ "epoch": 0.008856473039854129,
245
+ "grad_norm": 2.035452127456665,
246
+ "learning_rate": 0.000165,
247
+ "loss": 0.5866,
248
+ "step": 34
249
+ },
250
+ {
251
+ "epoch": 0.009116957541026309,
252
+ "grad_norm": 4.757160663604736,
253
+ "learning_rate": 0.00017,
254
+ "loss": 0.4413,
255
+ "step": 35
256
+ },
257
+ {
258
+ "epoch": 0.00937744204219849,
259
+ "grad_norm": 2.8071913719177246,
260
+ "learning_rate": 0.000175,
261
+ "loss": 0.7013,
262
+ "step": 36
263
+ },
264
+ {
265
+ "epoch": 0.00963792654337067,
266
+ "grad_norm": 3.3390369415283203,
267
+ "learning_rate": 0.00017999999999999998,
268
+ "loss": 0.348,
269
+ "step": 37
270
+ },
271
+ {
272
+ "epoch": 0.00989841104454285,
273
+ "grad_norm": 2.469451665878296,
274
+ "learning_rate": 0.000185,
275
+ "loss": 0.72,
276
+ "step": 38
277
+ },
278
+ {
279
+ "epoch": 0.01015889554571503,
280
+ "grad_norm": 2.7830817699432373,
281
+ "learning_rate": 0.00019,
282
+ "loss": 0.671,
283
+ "step": 39
284
+ },
285
+ {
286
+ "epoch": 0.01041938004688721,
287
+ "grad_norm": 3.005566358566284,
288
+ "learning_rate": 0.00019500000000000002,
289
+ "loss": 0.5808,
290
+ "step": 40
291
+ },
292
+ {
293
+ "epoch": 0.01067986454805939,
294
+ "grad_norm": 2.8901026248931885,
295
+ "learning_rate": 0.0002,
296
+ "loss": 0.6022,
297
+ "step": 41
298
+ },
299
+ {
300
+ "epoch": 0.01094034904923157,
301
+ "grad_norm": 2.004911422729492,
302
+ "learning_rate": 0.000205,
303
+ "loss": 0.525,
304
+ "step": 42
305
+ },
306
+ {
307
+ "epoch": 0.01120083355040375,
308
+ "grad_norm": 2.9986109733581543,
309
+ "learning_rate": 0.00021,
310
+ "loss": 0.6073,
311
+ "step": 43
312
+ },
313
+ {
314
+ "epoch": 0.011461318051575931,
315
+ "grad_norm": 3.4304168224334717,
316
+ "learning_rate": 0.000215,
317
+ "loss": 0.5203,
318
+ "step": 44
319
+ },
320
+ {
321
+ "epoch": 0.011721802552748111,
322
+ "grad_norm": 2.295295000076294,
323
+ "learning_rate": 0.00022,
324
+ "loss": 0.3148,
325
+ "step": 45
326
+ },
327
+ {
328
+ "epoch": 0.011982287053920291,
329
+ "grad_norm": 3.9490885734558105,
330
+ "learning_rate": 0.00022500000000000002,
331
+ "loss": 0.5378,
332
+ "step": 46
333
+ },
334
+ {
335
+ "epoch": 0.012242771555092472,
336
+ "grad_norm": 2.3454151153564453,
337
+ "learning_rate": 0.00023,
338
+ "loss": 0.3085,
339
+ "step": 47
340
+ },
341
+ {
342
+ "epoch": 0.012503256056264652,
343
+ "grad_norm": 2.9150779247283936,
344
+ "learning_rate": 0.000235,
345
+ "loss": 0.432,
346
+ "step": 48
347
+ },
348
+ {
349
+ "epoch": 0.012763740557436832,
350
+ "grad_norm": 2.1253578662872314,
351
+ "learning_rate": 0.00024,
352
+ "loss": 0.1773,
353
+ "step": 49
354
+ },
355
+ {
356
+ "epoch": 0.013024225058609012,
357
+ "grad_norm": 3.5161190032958984,
358
+ "learning_rate": 0.000245,
359
+ "loss": 0.581,
360
+ "step": 50
361
+ },
362
+ {
363
+ "epoch": 0.013284709559781192,
364
+ "grad_norm": 1.8895039558410645,
365
+ "learning_rate": 0.00025,
366
+ "loss": 0.4554,
367
+ "step": 51
368
+ },
369
+ {
370
+ "epoch": 0.013545194060953373,
371
+ "grad_norm": 1.1252281665802002,
372
+ "learning_rate": 0.000255,
373
+ "loss": 0.3409,
374
+ "step": 52
375
+ },
376
+ {
377
+ "epoch": 0.013805678562125553,
378
+ "grad_norm": 2.6543619632720947,
379
+ "learning_rate": 0.00026000000000000003,
380
+ "loss": 0.594,
381
+ "step": 53
382
+ },
383
+ {
384
+ "epoch": 0.014066163063297733,
385
+ "grad_norm": 3.3003315925598145,
386
+ "learning_rate": 0.00026500000000000004,
387
+ "loss": 0.5753,
388
+ "step": 54
389
+ },
390
+ {
391
+ "epoch": 0.014326647564469915,
392
+ "grad_norm": 2.486830234527588,
393
+ "learning_rate": 0.00027,
394
+ "loss": 0.2907,
395
+ "step": 55
396
+ },
397
+ {
398
+ "epoch": 0.014587132065642095,
399
+ "grad_norm": 1.5953302383422852,
400
+ "learning_rate": 0.000275,
401
+ "loss": 0.4006,
402
+ "step": 56
403
+ },
404
+ {
405
+ "epoch": 0.014847616566814275,
406
+ "grad_norm": 1.8115977048873901,
407
+ "learning_rate": 0.00028000000000000003,
408
+ "loss": 0.2677,
409
+ "step": 57
410
+ },
411
+ {
412
+ "epoch": 0.015108101067986456,
413
+ "grad_norm": 2.282597780227661,
414
+ "learning_rate": 0.000285,
415
+ "loss": 0.6526,
416
+ "step": 58
417
+ },
418
+ {
419
+ "epoch": 0.015368585569158636,
420
+ "grad_norm": 1.4348944425582886,
421
+ "learning_rate": 0.00029,
422
+ "loss": 0.4291,
423
+ "step": 59
424
+ },
425
+ {
426
+ "epoch": 0.015629070070330816,
427
+ "grad_norm": 2.0866997241973877,
428
+ "learning_rate": 0.000295,
429
+ "loss": 0.1811,
430
+ "step": 60
431
+ },
432
+ {
433
+ "epoch": 0.015889554571502994,
434
+ "grad_norm": 1.6576564311981201,
435
+ "learning_rate": 0.0003,
436
+ "loss": 0.4088,
437
+ "step": 61
438
+ },
439
+ {
440
+ "epoch": 0.016150039072675176,
441
+ "grad_norm": 1.635674238204956,
442
+ "learning_rate": 0.000305,
443
+ "loss": 0.4438,
444
+ "step": 62
445
+ },
446
+ {
447
+ "epoch": 0.016410523573847355,
448
+ "grad_norm": 1.140415072441101,
449
+ "learning_rate": 0.00031,
450
+ "loss": 0.3589,
451
+ "step": 63
452
+ },
453
+ {
454
+ "epoch": 0.016671008075019537,
455
+ "grad_norm": 1.7953686714172363,
456
+ "learning_rate": 0.000315,
457
+ "loss": 0.2778,
458
+ "step": 64
459
+ },
460
+ {
461
+ "epoch": 0.016931492576191715,
462
+ "grad_norm": 2.2324233055114746,
463
+ "learning_rate": 0.00032,
464
+ "loss": 0.5049,
465
+ "step": 65
466
+ },
467
+ {
468
+ "epoch": 0.017191977077363897,
469
+ "grad_norm": 2.036297559738159,
470
+ "learning_rate": 0.00032500000000000004,
471
+ "loss": 0.2636,
472
+ "step": 66
473
+ },
474
+ {
475
+ "epoch": 0.017452461578536076,
476
+ "grad_norm": 2.2596747875213623,
477
+ "learning_rate": 0.00033,
478
+ "loss": 0.3743,
479
+ "step": 67
480
+ },
481
+ {
482
+ "epoch": 0.017712946079708258,
483
+ "grad_norm": 1.6777313947677612,
484
+ "learning_rate": 0.000335,
485
+ "loss": 0.3978,
486
+ "step": 68
487
+ },
488
+ {
489
+ "epoch": 0.017973430580880436,
490
+ "grad_norm": 1.6452847719192505,
491
+ "learning_rate": 0.00034,
492
+ "loss": 0.1836,
493
+ "step": 69
494
+ },
495
+ {
496
+ "epoch": 0.018233915082052618,
497
+ "grad_norm": 1.7216978073120117,
498
+ "learning_rate": 0.000345,
499
+ "loss": 0.4191,
500
+ "step": 70
501
+ },
502
+ {
503
+ "epoch": 0.018494399583224796,
504
+ "grad_norm": 1.7111387252807617,
505
+ "learning_rate": 0.00035,
506
+ "loss": 0.1812,
507
+ "step": 71
508
+ },
509
+ {
510
+ "epoch": 0.01875488408439698,
511
+ "grad_norm": 1.6676584482192993,
512
+ "learning_rate": 0.000355,
513
+ "loss": 0.4526,
514
+ "step": 72
515
+ },
516
+ {
517
+ "epoch": 0.019015368585569157,
518
+ "grad_norm": 0.9286651611328125,
519
+ "learning_rate": 0.00035999999999999997,
520
+ "loss": 0.2746,
521
+ "step": 73
522
+ },
523
+ {
524
+ "epoch": 0.01927585308674134,
525
+ "grad_norm": 3.234783411026001,
526
+ "learning_rate": 0.000365,
527
+ "loss": 0.5224,
528
+ "step": 74
529
+ },
530
+ {
531
+ "epoch": 0.01953633758791352,
532
+ "grad_norm": 1.3695653676986694,
533
+ "learning_rate": 0.00037,
534
+ "loss": 0.3308,
535
+ "step": 75
536
+ },
537
+ {
538
+ "epoch": 0.0197968220890857,
539
+ "grad_norm": 2.9995968341827393,
540
+ "learning_rate": 0.000375,
541
+ "loss": 0.4817,
542
+ "step": 76
543
+ },
544
+ {
545
+ "epoch": 0.02005730659025788,
546
+ "grad_norm": 1.8912553787231445,
547
+ "learning_rate": 0.00038,
548
+ "loss": 0.2722,
549
+ "step": 77
550
+ },
551
+ {
552
+ "epoch": 0.02031779109143006,
553
+ "grad_norm": 1.3702706098556519,
554
+ "learning_rate": 0.00038500000000000003,
555
+ "loss": 0.1064,
556
+ "step": 78
557
+ },
558
+ {
559
+ "epoch": 0.02057827559260224,
560
+ "grad_norm": 0.9273198246955872,
561
+ "learning_rate": 0.00039000000000000005,
562
+ "loss": 0.1233,
563
+ "step": 79
564
+ },
565
+ {
566
+ "epoch": 0.02083876009377442,
567
+ "grad_norm": 0.8209530711174011,
568
+ "learning_rate": 0.000395,
569
+ "loss": 0.2622,
570
+ "step": 80
571
+ },
572
+ {
573
+ "epoch": 0.021099244594946602,
574
+ "grad_norm": 1.4749599695205688,
575
+ "learning_rate": 0.0004,
576
+ "loss": 0.2999,
577
+ "step": 81
578
+ },
579
+ {
580
+ "epoch": 0.02135972909611878,
581
+ "grad_norm": 1.1133017539978027,
582
+ "learning_rate": 0.00040500000000000003,
583
+ "loss": 0.2929,
584
+ "step": 82
585
+ },
586
+ {
587
+ "epoch": 0.021620213597290962,
588
+ "grad_norm": 1.235826015472412,
589
+ "learning_rate": 0.00041,
590
+ "loss": 0.3106,
591
+ "step": 83
592
+ },
593
+ {
594
+ "epoch": 0.02188069809846314,
595
+ "grad_norm": 0.9904353022575378,
596
+ "learning_rate": 0.000415,
597
+ "loss": 0.2101,
598
+ "step": 84
599
+ },
600
+ {
601
+ "epoch": 0.022141182599635323,
602
+ "grad_norm": 1.2953742742538452,
603
+ "learning_rate": 0.00042,
604
+ "loss": 0.1131,
605
+ "step": 85
606
+ },
607
+ {
608
+ "epoch": 0.0224016671008075,
609
+ "grad_norm": 1.10429048538208,
610
+ "learning_rate": 0.000425,
611
+ "loss": 0.2727,
612
+ "step": 86
613
+ },
614
+ {
615
+ "epoch": 0.022662151601979683,
616
+ "grad_norm": 1.048660159111023,
617
+ "learning_rate": 0.00043,
618
+ "loss": 0.1082,
619
+ "step": 87
620
+ },
621
+ {
622
+ "epoch": 0.022922636103151862,
623
+ "grad_norm": 0.857686460018158,
624
+ "learning_rate": 0.000435,
625
+ "loss": 0.1475,
626
+ "step": 88
627
+ },
628
+ {
629
+ "epoch": 0.023183120604324044,
630
+ "grad_norm": 0.9353561401367188,
631
+ "learning_rate": 0.00044,
632
+ "loss": 0.2123,
633
+ "step": 89
634
+ },
635
+ {
636
+ "epoch": 0.023443605105496222,
637
+ "grad_norm": 1.4590015411376953,
638
+ "learning_rate": 0.00044500000000000003,
639
+ "loss": 0.2988,
640
+ "step": 90
641
+ },
642
+ {
643
+ "epoch": 0.023704089606668404,
644
+ "grad_norm": 0.9181132316589355,
645
+ "learning_rate": 0.00045000000000000004,
646
+ "loss": 0.2321,
647
+ "step": 91
648
+ },
649
+ {
650
+ "epoch": 0.023964574107840583,
651
+ "grad_norm": 0.7688923478126526,
652
+ "learning_rate": 0.000455,
653
+ "loss": 0.16,
654
+ "step": 92
655
+ },
656
+ {
657
+ "epoch": 0.024225058609012765,
658
+ "grad_norm": 1.0974979400634766,
659
+ "learning_rate": 0.00046,
660
+ "loss": 0.2135,
661
+ "step": 93
662
+ },
663
+ {
664
+ "epoch": 0.024485543110184943,
665
+ "grad_norm": 1.083938717842102,
666
+ "learning_rate": 0.000465,
667
+ "loss": 0.1931,
668
+ "step": 94
669
+ },
670
+ {
671
+ "epoch": 0.024746027611357125,
672
+ "grad_norm": 0.5162568688392639,
673
+ "learning_rate": 0.00047,
674
+ "loss": 0.0853,
675
+ "step": 95
676
+ },
677
+ {
678
+ "epoch": 0.025006512112529303,
679
+ "grad_norm": 0.8454329967498779,
680
+ "learning_rate": 0.000475,
681
+ "loss": 0.1723,
682
+ "step": 96
683
+ },
684
+ {
685
+ "epoch": 0.025266996613701485,
686
+ "grad_norm": 0.9237842559814453,
687
+ "learning_rate": 0.00048,
688
+ "loss": 0.1858,
689
+ "step": 97
690
+ },
691
+ {
692
+ "epoch": 0.025527481114873664,
693
+ "grad_norm": 0.8391311168670654,
694
+ "learning_rate": 0.00048499999999999997,
695
+ "loss": 0.1558,
696
+ "step": 98
697
+ },
698
+ {
699
+ "epoch": 0.025787965616045846,
700
+ "grad_norm": 0.7986068725585938,
701
+ "learning_rate": 0.00049,
702
+ "loss": 0.2043,
703
+ "step": 99
704
+ },
705
+ {
706
+ "epoch": 0.026048450117218024,
707
+ "grad_norm": 0.3467917740345001,
708
+ "learning_rate": 0.000495,
709
+ "loss": 0.0386,
710
+ "step": 100
711
+ }
712
+ ],
713
+ "logging_steps": 1,
714
+ "max_steps": 1000,
715
+ "num_input_tokens_seen": 0,
716
+ "num_train_epochs": 1,
717
+ "save_steps": 100,
718
+ "stateful_callbacks": {
719
+ "TrainerControl": {
720
+ "args": {
721
+ "should_epoch_stop": false,
722
+ "should_evaluate": false,
723
+ "should_log": false,
724
+ "should_save": true,
725
+ "should_training_stop": false
726
+ },
727
+ "attributes": {}
728
+ }
729
+ },
730
+ "total_flos": 0.0,
731
+ "train_batch_size": 256,
732
+ "trial_name": null,
733
+ "trial_params": null
734
+ }
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/chat_template.jinja ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system
2
+ You are a helpful assistant.<|im_end|>
3
+ {% endif %}<|im_start|>{{ message['role'] }}
4
+ {% if message['content'] is string %}{{ message['content'] }}<|im_end|>
5
+ {% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>
6
+ {% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant
7
+ {% endif %}
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": true,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.48145466,
8
+ 0.4578275,
9
+ 0.40821073
10
+ ],
11
+ "image_processor_type": "Qwen2_5_VLImageProcessor",
12
+ "image_std": [
13
+ 0.26862954,
14
+ 0.26130258,
15
+ 0.27577711
16
+ ],
17
+ "max_pixels": 1003520,
18
+ "merge_size": 2,
19
+ "min_pixels": 3136,
20
+ "patch_size": 14,
21
+ "processor_class": "Qwen2_5_VLProcessor",
22
+ "resample": 3,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "size": {
25
+ "max_pixels": 1003520,
26
+ "min_pixels": 3136
27
+ },
28
+ "temporal_patch_size": 2
29
+ }
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/tokenizer_config.json ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": null,
198
+ "clean_up_tokenization_spaces": false,
199
+ "eos_token": "<|im_end|>",
200
+ "errors": "replace",
201
+ "extra_special_tokens": {},
202
+ "model_max_length": 131072,
203
+ "pad_token": "<|endoftext|>",
204
+ "processor_class": "Qwen2_5_VLProcessor",
205
+ "split_special_tokens": false,
206
+ "tokenizer_class": "Qwen2Tokenizer",
207
+ "unk_token": null
208
+ }
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-700/added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-700/chat_template.jinja ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system
2
+ You are a helpful assistant.<|im_end|>
3
+ {% endif %}<|im_start|>{{ message['role'] }}
4
+ {% if message['content'] is string %}{{ message['content'] }}<|im_end|>
5
+ {% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>
6
+ {% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant
7
+ {% endif %}
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-700/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-700/preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": true,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.48145466,
8
+ 0.4578275,
9
+ 0.40821073
10
+ ],
11
+ "image_processor_type": "Qwen2_5_VLImageProcessor",
12
+ "image_std": [
13
+ 0.26862954,
14
+ 0.26130258,
15
+ 0.27577711
16
+ ],
17
+ "max_pixels": 1003520,
18
+ "merge_size": 2,
19
+ "min_pixels": 3136,
20
+ "patch_size": 14,
21
+ "processor_class": "Qwen2_5_VLProcessor",
22
+ "resample": 3,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "size": {
25
+ "max_pixels": 1003520,
26
+ "min_pixels": 3136
27
+ },
28
+ "temporal_patch_size": 2
29
+ }
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-700/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-700/tokenizer_config.json ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": null,
198
+ "clean_up_tokenization_spaces": false,
199
+ "eos_token": "<|im_end|>",
200
+ "errors": "replace",
201
+ "extra_special_tokens": {},
202
+ "model_max_length": 131072,
203
+ "pad_token": "<|endoftext|>",
204
+ "processor_class": "Qwen2_5_VLProcessor",
205
+ "split_special_tokens": false,
206
+ "tokenizer_class": "Qwen2Tokenizer",
207
+ "unk_token": null
208
+ }
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-700/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-900/added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-900/chat_template.jinja ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system
2
+ You are a helpful assistant.<|im_end|>
3
+ {% endif %}<|im_start|>{{ message['role'] }}
4
+ {% if message['content'] is string %}{{ message['content'] }}<|im_end|>
5
+ {% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>
6
+ {% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant
7
+ {% endif %}
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-900/preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": true,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.48145466,
8
+ 0.4578275,
9
+ 0.40821073
10
+ ],
11
+ "image_processor_type": "Qwen2_5_VLImageProcessor",
12
+ "image_std": [
13
+ 0.26862954,
14
+ 0.26130258,
15
+ 0.27577711
16
+ ],
17
+ "max_pixels": 1003520,
18
+ "merge_size": 2,
19
+ "min_pixels": 3136,
20
+ "patch_size": 14,
21
+ "processor_class": "Qwen2_5_VLProcessor",
22
+ "resample": 3,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "size": {
25
+ "max_pixels": 1003520,
26
+ "min_pixels": 3136
27
+ },
28
+ "temporal_patch_size": 2
29
+ }
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-900/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-900/tokenizer_config.json ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": null,
198
+ "clean_up_tokenization_spaces": false,
199
+ "eos_token": "<|im_end|>",
200
+ "errors": "replace",
201
+ "extra_special_tokens": {},
202
+ "model_max_length": 131072,
203
+ "pad_token": "<|endoftext|>",
204
+ "processor_class": "Qwen2_5_VLProcessor",
205
+ "split_special_tokens": false,
206
+ "tokenizer_class": "Qwen2Tokenizer",
207
+ "unk_token": null
208
+ }
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-900/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": true,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.48145466,
8
+ 0.4578275,
9
+ 0.40821073
10
+ ],
11
+ "image_processor_type": "Qwen2_5_VLImageProcessor",
12
+ "image_std": [
13
+ 0.26862954,
14
+ 0.26130258,
15
+ 0.27577711
16
+ ],
17
+ "max_pixels": 1003520,
18
+ "merge_size": 2,
19
+ "min_pixels": 3136,
20
+ "patch_size": 14,
21
+ "processor_class": "Qwen2_5_VLProcessor",
22
+ "resample": 3,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "size": {
25
+ "max_pixels": 1003520,
26
+ "min_pixels": 3136
27
+ },
28
+ "temporal_patch_size": 2
29
+ }
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/tokenizer_config.json ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": null,
198
+ "clean_up_tokenization_spaces": false,
199
+ "eos_token": "<|im_end|>",
200
+ "errors": "replace",
201
+ "extra_special_tokens": {},
202
+ "model_max_length": 131072,
203
+ "pad_token": "<|endoftext|>",
204
+ "processor_class": "Qwen2_5_VLProcessor",
205
+ "split_special_tokens": false,
206
+ "tokenizer_class": "Qwen2Tokenizer",
207
+ "unk_token": null
208
+ }
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/train_cls.log ADDED
The diff for this file is too large to render. See raw diff
 
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/train_cls.log ADDED
The diff for this file is too large to render. See raw diff
 
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/debug-internal.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-12-24T07:14:07.358028809Z","level":"INFO","msg":"stream: starting","core version":"0.22.2"}
2
+ {"time":"2025-12-24T07:14:07.535812656Z","level":"INFO","msg":"stream: created new stream","id":"autp4ou6"}
3
+ {"time":"2025-12-24T07:14:07.535864052Z","level":"INFO","msg":"handler: started","stream_id":"autp4ou6"}
4
+ {"time":"2025-12-24T07:14:07.535912974Z","level":"INFO","msg":"stream: started","id":"autp4ou6"}
5
+ {"time":"2025-12-24T07:14:07.535933442Z","level":"INFO","msg":"writer: started","stream_id":"autp4ou6"}
6
+ {"time":"2025-12-24T07:14:07.535937845Z","level":"INFO","msg":"sender: started","stream_id":"autp4ou6"}
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/debug.log ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-12-24 07:14:07,146 INFO MainThread:3703561 [wandb_setup.py:_flush():81] Current SDK version is 0.22.2
2
+ 2025-12-24 07:14:07,146 INFO MainThread:3703561 [wandb_setup.py:_flush():81] Configure stats pid to 3703561
3
+ 2025-12-24 07:14:07,146 INFO MainThread:3703561 [wandb_setup.py:_flush():81] Loading settings from /home/v-menggao/.config/wandb/settings
4
+ 2025-12-24 07:14:07,146 INFO MainThread:3703561 [wandb_setup.py:_flush():81] Loading settings from /home/v-menggao/code/VLM2Vec/wandb/settings
5
+ 2025-12-24 07:14:07,146 INFO MainThread:3703561 [wandb_setup.py:_flush():81] Loading settings from environment variables
6
+ 2025-12-24 07:14:07,146 INFO MainThread:3703561 [wandb_init.py:setup_run_log_directory():705] Logging user logs to /home/v-menggao/code/VLM2Vec/experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/run-20251224_071407-autp4ou6/logs/debug.log
7
+ 2025-12-24 07:14:07,146 INFO MainThread:3703561 [wandb_init.py:setup_run_log_directory():706] Logging internal logs to /home/v-menggao/code/VLM2Vec/experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/run-20251224_071407-autp4ou6/logs/debug-internal.log
8
+ 2025-12-24 07:14:07,146 INFO MainThread:3703561 [wandb_init.py:init():832] calling init triggers
9
+ 2025-12-24 07:14:07,146 INFO MainThread:3703561 [wandb_init.py:init():837] wandb.init called with sweep_config: {}
10
+ config: {'_wandb': {}}
11
+ 2025-12-24 07:14:07,146 INFO MainThread:3703561 [wandb_init.py:init():880] starting backend
12
+ 2025-12-24 07:14:07,352 INFO MainThread:3703561 [wandb_init.py:init():883] sending inform_init request
13
+ 2025-12-24 07:14:07,356 INFO MainThread:3703561 [wandb_init.py:init():891] backend started and connected
14
+ 2025-12-24 07:14:07,358 INFO MainThread:3703561 [wandb_init.py:init():961] updated telemetry
15
+ 2025-12-24 07:14:07,363 INFO MainThread:3703561 [wandb_init.py:init():985] communicating run to backend with 90.0 second timeout
16
+ 2025-12-24 07:14:07,809 INFO MainThread:3703561 [wandb_init.py:init():1036] starting run threads in backend
17
+ 2025-12-24 07:14:07,912 INFO MainThread:3703561 [wandb_run.py:_console_start():2509] atexit reg
18
+ 2025-12-24 07:14:07,912 INFO MainThread:3703561 [wandb_run.py:_redirect():2357] redirect: wrap_raw
19
+ 2025-12-24 07:14:07,912 INFO MainThread:3703561 [wandb_run.py:_redirect():2426] Wrapping output streams.
20
+ 2025-12-24 07:14:07,912 INFO MainThread:3703561 [wandb_run.py:_redirect():2449] Redirects installed.
21
+ 2025-12-24 07:14:07,915 INFO MainThread:3703561 [wandb_init.py:init():1076] run started, returning control to user process
22
+ 2025-12-24 07:14:07,916 INFO MainThread:3703561 [wandb_run.py:_config_callback():1392] config_cb None None {'output_dir': '/home/v-menggao/code/VLM2Vec/experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'eval_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 128, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 0.0005, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': 1000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 100, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/home/v-menggao/code/VLM2Vec/experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/runs/Dec24_07-04-56_GCRAZGDL1688', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 1, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': True, 'eval_steps': None, 'dataloader_num_workers': 1, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new', 'disable_tqdm': False, 'remove_unused_columns': False, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': True, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'image_encoder_freeze': False, 'resume_from': 'none', 'project_name': None, 'grad_cache': False, 'gc_q_chunk_size': 2, 'gc_p_chunk_size': 2, 'interleave_stopping_strategy': 'all_exhausted', 'interleave_batch_size': 0}
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/run-20251224_071407-autp4ou6/files/output.log ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 0%| | 2/1000 [00:16<2:09:56, 7.81s/it]
2
+ [DBG][env] AOP_ENABLED=1 APPLY=both LAYER=12 SELECTION=attention KEEP_T=0.5 KEEP_V=1 VPOOL_ENABLED=1 VPOOL_LAYER=1
3
+ [RANK][mid] top1=0.78% top5=6.25% top10=10.16% dist={'min': 1, 'p25': 34, 'med': 59, 'p75': 93, 'max': 127}
4
+ [RANK][last] top1=10.94% top5=15.62% top10=21.09% dist={'min': 1, 'p25': 16, 'med': 56, 'p75': 93, 'max': 118}
5
+ [WARN] last layer top1 < 40%. 建议先 AOP_ENABLED=0/VPOOL_ENABLED=0 进行对照,确认基座检索能力。
6
+
7
+ [Probe Step 0] Loss: 0.6135
8
+ - Pred Probs (need_last=1): mean=0.4198, std=0.4385
9
+ - Labels: need_last=0.1016, safe=0.8984
10
+ - mid_hit: 0.78%, last_hit: 10.94%
11
+ - both_correct: 0.78%, both_wrong: 89.06%
12
+
13
+ [Gradient Check After Backward - Step 0]
14
+ - Total Grad Norm: 2.564083
15
+ - Has Gradient: True
16
+ {'loss': 0.6135, 'grad_norm': 2.5640830993652344, 'learning_rate': 0.0, 'epoch': 0.0}
17
+ [DBG][env] AOP_ENABLED=1 APPLY=both LAYER=12 SELECTION=attention KEEP_T=0.5 KEEP_V=1 VPOOL_ENABLED=1 VPOOL_LAYER=1
18
+ [RANK][mid] top1=1.56% top5=5.47% top10=9.38% dist={'min': 1, 'p25': 31, 'med': 63, 'p75': 101, 'max': 128}
19
+ [RANK][last] top1=11.72% top5=19.53% top10=23.44% dist={'min': 1, 'p25': 13, 'med': 48, 'p75': 87, 'max': 128}
20
+ [WARN] last layer top1 < 40%. 建议先 AOP_ENABLED=0/VPOOL_ENABLED=0 进行对照,确认基座检索能力。
21
+
22
+ [Probe Step 1] Loss: 0.8630
23
+ - Pred Probs (need_last=1): mean=0.3642, std=0.4361
24
+ - Labels: need_last=0.1016, safe=0.8984
25
+ - mid_hit: 1.56%, last_hit: 11.72%
26
+ - both_correct: 1.56%, both_wrong: 88.28%
27
+
28
+ [Gradient Check After Backward - Step 1]
29
+ - Total Grad Norm: 2.883063
30
+ - Has Gradient: True
31
+ {'loss': 0.863, 'grad_norm': 2.8830628395080566, 'learning_rate': 5e-06, 'epoch': 0.0}
32
+ [DBG][env] AOP_ENABLED=1 APPLY=both LAYER=12 SELECTION=attention KEEP_T=0.5 KEEP_V=1 VPOOL_ENABLED=1 VPOOL_LAYER=1
33
+ [RANK][mid] top1=0.00% top5=7.03% top10=11.72% dist={'min': 2, 'p25': 28, 'med': 55, 'p75': 87, 'max': 127}
34
+ [RANK][last] top1=7.03% top5=14.06% top10=18.75% dist={'min': 1, 'p25': 15, 'med': 42, 'p75': 69, 'max': 125}
35
+ [WARN] last layer top1 < 40%. 建议先 AOP_ENABLED=0/VPOOL_ENABLED=0 进行对照,确认基座检索能力。
36
+
37
+ [Probe Step 2] Loss: 0.4115
38
+ - Pred Probs (need_last=1): mean=0.4447, std=0.4333
39
+ - Labels: need_last=0.0703, safe=0.9297
40
+ - mid_hit: 0.00%, last_hit: 7.03%
41
+ - both_correct: 0.00%, both_wrong: 92.97%
42
+
43
+ [Gradient Check After Backward - Step 2]
44
+ - Total Grad Norm: 4.093573
45
+ - Has Gradient: True
46
+ {'loss': 0.4115, 'grad_norm': 4.093572616577148, 'learning_rate': 1e-05, 'epoch': 0.0}
47
+
48
+ [Probe Step 3] Loss: 0.6938
49
+ - Pred Probs (need_last=1): mean=0.3258, std=0.4012
50
+ - Labels: need_last=0.1016, safe=0.8984
51
+ - mid_hit: 0.78%, last_hit: 10.94%
52
+ - both_correct: 0.78%, both_wrong: 89.06%
53
+ {'loss': 0.6938, 'grad_norm': 2.6786575317382812, 'learning_rate': 1.5e-05, 'epoch': 0.0}
54
+
55
+ [Probe Step 4] Loss: 0.5551
56
+ - Pred Probs (need_last=1): mean=0.3782, std=0.4288
57
+ - Labels: need_last=0.0703, safe=0.9297
58
+ - mid_hit: 1.56%, last_hit: 8.59%
59
+ - both_correct: 1.56%, both_wrong: 91.41%
60
+ {'loss': 0.5551, 'grad_norm': 2.923421621322632, 'learning_rate': 2e-05, 'epoch': 0.0}
61
+
62
+ [Probe Step 5] Loss: 0.3958
63
+ - Pred Probs (need_last=1): mean=0.3890, std=0.4212
64
+ - Labels: need_last=0.0625, safe=0.9375
65
+ - mid_hit: 1.56%, last_hit: 6.25%
66
+ - both_correct: 0.00%, both_wrong: 92.19%
67
+ {'loss': 0.3958, 'grad_norm': 3.3633787631988525, 'learning_rate': 2.5e-05, 'epoch': 0.0}
68
+
69
+ [Probe Step 6] Loss: 0.4503
70
+ - Pred Probs (need_last=1): mean=0.3575, std=0.3862
71
+ - Labels: need_last=0.1406, safe=0.8594
72
+ - mid_hit: 0.78%, last_hit: 14.84%
73
+ - both_correct: 0.78%, both_wrong: 85.16%
74
+ {'loss': 0.4503, 'grad_norm': 2.4892380237579346, 'learning_rate': 3e-05, 'epoch': 0.0}
75
+
76
+ [Probe Step 7] Loss: 2.2221
77
+ - Pred Probs (need_last=1): mean=0.2568, std=0.3742
78
+ - Labels: need_last=0.5547, safe=0.4453
79
+ - mid_hit: 40.62%, last_hit: 95.31%
80
+ - both_correct: 39.84%, both_wrong: 3.91%
81
+ {'loss': 2.2221, 'grad_norm': 19.515727996826172, 'learning_rate': 3.5000000000000004e-05, 'epoch': 0.0}
82
+
83
+ [Probe Step 8] Loss: 0.3645
84
+ - Pred Probs (need_last=1): mean=0.3536, std=0.4082
85
+ - Labels: need_last=0.0547, safe=0.9453
86
+ - mid_hit: 1.56%, last_hit: 7.03%
87
+ - both_correct: 1.56%, both_wrong: 92.97%
88
+ {'loss': 0.3645, 'grad_norm': 2.7149243354797363, 'learning_rate': 4e-05, 'epoch': 0.0}
89
+
90
+ [Probe Step 9] Loss: 0.4055
91
+ - Pred Probs (need_last=1): mean=0.3187, std=0.3982
92
+ - Labels: need_last=0.0859, safe=0.9141
93
+ - mid_hit: 0.78%, last_hit: 9.38%
94
+ - both_correct: 0.78%, both_wrong: 90.62%
95
+ {'loss': 0.4055, 'grad_norm': 2.0654330253601074, 'learning_rate': 4.4999999999999996e-05, 'epoch': 0.0}
96
+ {'loss': 0.23, 'grad_norm': 2.538350820541382, 'learning_rate': 5e-05, 'epoch': 0.0}
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/run-20251224_071407-autp4ou6/files/requirements.txt ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dill==0.3.8
2
+ ray==2.50.0
3
+ torchvision==0.23.0
4
+ nvidia-cublas-cu12==12.8.4.1
5
+ pandas==2.3.3
6
+ nvidia-cusparse-cu12==12.5.8.93
7
+ stack_data==0.6.3
8
+ widgetsnbextension==4.0.15
9
+ huggingface-hub==0.35.3
10
+ nest_asyncio==1.6.0
11
+ pydantic==2.12.0
12
+ tifffile==2025.5.10
13
+ threadpoolctl==3.6.0
14
+ ninja==1.13.0
15
+ imgkit==1.2.3
16
+ gitdb==4.0.12
17
+ regex==2025.9.18
18
+ requests==2.32.5
19
+ propcache==0.4.1
20
+ nvidia-cufft-cu12==11.3.3.83
21
+ ipython==8.37.0
22
+ aiohappyeyeballs==2.6.1
23
+ exceptiongroup==1.3.0
24
+ tzdata==2025.2
25
+ pyarrow==21.0.0
26
+ av==15.1.0
27
+ pure_eval==0.2.3
28
+ contourpy==1.3.2
29
+ nvidia-cudnn-cu12==9.10.2.21
30
+ datasets==3.3.0
31
+ sentry-sdk==2.41.0
32
+ platformdirs==4.5.0
33
+ charset-normalizer==3.4.3
34
+ ipykernel==7.1.0
35
+ tokenizers==0.21.4
36
+ traitlets==5.14.3
37
+ accelerate==1.10.1
38
+ typing-inspection==0.4.2
39
+ pillow==11.3.0
40
+ lazy_loader==0.4
41
+ multidict==6.7.0
42
+ packaging==25.0
43
+ sympy==1.14.0
44
+ asttokens==3.0.0
45
+ scikit-learn==1.7.2
46
+ py-cpuinfo==9.0.0
47
+ Pygments==2.19.2
48
+ aiohttp==3.13.0
49
+ PyYAML==6.0.3
50
+ Markdown==3.10
51
+ matplotlib-inline==0.2.1
52
+ Jinja2==3.1.6
53
+ torch==2.8.0
54
+ setuptools==80.9.0
55
+ nvidia-cusolver-cu12==11.7.3.90
56
+ GitPython==3.1.45
57
+ MarkupSafe==3.0.3
58
+ importlib_metadata==8.7.0
59
+ tornado==6.5.1
60
+ opencv-contrib-python==4.11.0.86
61
+ qwen-vl-utils==0.0.8
62
+ six==1.17.0
63
+ frozenlist==1.8.0
64
+ hjson==3.1.0
65
+ networkx==3.4.2
66
+ comm==0.2.3
67
+ referencing==0.36.2
68
+ wcwidth==0.2.14
69
+ nvidia-nvtx-cu12==12.8.90
70
+ protobuf==6.32.1
71
+ pip==25.2
72
+ flash_attn==2.8.3
73
+ pexpect==4.9.0
74
+ hnswlib==0.8.0
75
+ nvidia-cuda-nvrtc-cu12==12.8.93
76
+ pytz==2025.2
77
+ pyparsing==3.2.5
78
+ wrapt==1.17.3
79
+ click==8.2.1
80
+ hf-xet==1.1.10
81
+ parso==0.8.5
82
+ nvidia-curand-cu12==10.3.9.90
83
+ typing_extensions==4.15.0
84
+ triton==3.4.0
85
+ nvidia-cufile-cu12==1.13.1.3
86
+ idna==3.10
87
+ nvidia-cusparselt-cu12==0.7.1
88
+ jupyter_client==8.6.3
89
+ einops==0.8.1
90
+ scikit-image==0.25.2
91
+ decord==0.6.0
92
+ smmap==5.0.2
93
+ urllib3==2.5.0
94
+ psutil==5.9.1
95
+ psutil==7.1.0
96
+ imageio==2.37.0
97
+ peft==0.17.1
98
+ seaborn==0.13.2
99
+ safetensors==0.6.2
100
+ timm==1.0.20
101
+ certifi==2025.10.5
102
+ ipywidgets==8.1.8
103
+ xxhash==3.6.0
104
+ jsonschema-specifications==2025.9.1
105
+ attrs==25.4.0
106
+ jsonschema==4.25.1
107
+ filelock==3.20.0
108
+ ptyprocess==0.7.0
109
+ debugpy==1.8.16
110
+ jedi==0.19.2
111
+ prompt_toolkit==3.0.52
112
+ nvidia-cuda-runtime-cu12==12.8.90
113
+ jupyterlab_widgets==3.0.16
114
+ pyzmq==27.1.0
115
+ executing==2.2.1
116
+ cycler==0.12.1
117
+ nvidia-cuda-cupti-cu12==12.8.90
118
+ wandb==0.22.2
119
+ opencv-python==4.11.0.86
120
+ fsspec==2024.12.0
121
+ async-timeout==5.0.1
122
+ mpmath==1.3.0
123
+ pydantic_core==2.41.1
124
+ tqdm==4.67.1
125
+ annotated-types==0.7.0
126
+ transformers==4.52.3
127
+ decorator==5.2.1
128
+ zipp==3.23.0
129
+ wheel==0.45.1
130
+ numpy==1.26.4
131
+ multiprocess==0.70.16
132
+ pytrec_eval==0.5
133
+ yarl==1.22.0
134
+ aiosignal==1.4.0
135
+ sentencepiece==0.2.1
136
+ scipy==1.15.3
137
+ matplotlib==3.10.7
138
+ msgpack==1.1.2
139
+ joblib==1.5.2
140
+ fonttools==4.60.1
141
+ rpds-py==0.27.1
142
+ kiwisolver==1.4.9
143
+ nvidia-nccl-cu12==2.27.3
144
+ pickleshare==0.7.5
145
+ jupyter_core==5.9.1
146
+ python-dateutil==2.9.0.post0
147
+ nvidia-nvjitlink-cu12==12.8.93
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/run-20251224_071407-autp4ou6/files/wandb-metadata.json ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.14.0-1014-azure-x86_64-with-glibc2.39",
3
+ "python": "CPython 3.10.18",
4
+ "startedAt": "2025-12-24T07:14:07.144808Z",
5
+ "args": [
6
+ "--model_name",
7
+ "Qwen/Qwen2.5-VL-3B-Instruct",
8
+ "--checkpoint_path",
9
+ "/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4/checkpoint-5000",
10
+ "--bf16",
11
+ "--pooling",
12
+ "eos",
13
+ "--normalize",
14
+ "True",
15
+ "--temperature",
16
+ "0.02",
17
+ "--dataloader_num_workers",
18
+ "1",
19
+ "--dataset_config",
20
+ "/home/v-menggao/code/VLM2Vec/experiments/public/train/train_image.yaml",
21
+ "--run_name",
22
+ "Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new",
23
+ "--output_dir",
24
+ "/home/v-menggao/code/VLM2Vec/experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new",
25
+ "--per_device_train_batch_size",
26
+ "128",
27
+ "--dataloader_drop_last",
28
+ "True",
29
+ "--learning_rate",
30
+ "5e-4",
31
+ "--max_grad_norm",
32
+ "1.0",
33
+ "--max_steps",
34
+ "1000",
35
+ "--warmup_steps",
36
+ "100",
37
+ "--save_steps",
38
+ "100",
39
+ "--logging_steps",
40
+ "1",
41
+ "--save_safetensors",
42
+ "True",
43
+ "--remove_unused_columns",
44
+ "False",
45
+ "--report_to",
46
+ "wandb"
47
+ ],
48
+ "program": "/home/v-menggao/code/VLM2Vec/train_early_exit_AOP_pooling_new.py",
49
+ "codePath": "train_early_exit_AOP_pooling_new.py",
50
+ "codePathLocal": "train_early_exit_AOP_pooling_new.py",
51
+ "git": {
52
+ "remote": "https://github.com/GaoMengGladys/Code.git",
53
+ "commit": "108c5660f7ad8147a02f61f938da28accbf76578"
54
+ },
55
+ "email": "2646894013@qq.com",
56
+ "root": "/home/v-menggao/code/VLM2Vec/experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new",
57
+ "host": "GCRAZGDL1688",
58
+ "executable": "/home/v-menggao/miniconda3/envs/VLMtoVec/bin/python3.10",
59
+ "cpu_count": 24,
60
+ "cpu_count_logical": 24,
61
+ "gpu": "NVIDIA A100 80GB PCIe",
62
+ "gpu_count": 1,
63
+ "disk": {
64
+ "/": {
65
+ "total": "1063956480000",
66
+ "used": "859166535680"
67
+ }
68
+ },
69
+ "memory": {
70
+ "total": "232208756736"
71
+ },
72
+ "gpu_nvidia": [
73
+ {
74
+ "name": "NVIDIA A100 80GB PCIe",
75
+ "memoryTotal": "85899345920",
76
+ "cudaCores": 6912,
77
+ "architecture": "Ampere",
78
+ "uuid": "GPU-67f1e796-8888-cac0-c0cf-8d69ebc02416"
79
+ }
80
+ ],
81
+ "cudaVersion": "13.0",
82
+ "writerId": "szqm7qad1kv1te65zdsv0jerv7e0jmdv"
83
+ }
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/run-20251224_071407-autp4ou6/logs/debug-core.log ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {"time":"2025-12-24T07:14:07.162478266Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpqjzxgwgh/port-3703561.txt","pid":3703561,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-12-24T07:14:07.163050755Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":3703561}
3
+ {"time":"2025-12-24T07:14:07.163022417Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-3703561-3723985-3254824786/socket","Net":"unix"}}
4
+ {"time":"2025-12-24T07:14:07.352777492Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-12-24T07:14:07.357943239Z","level":"INFO","msg":"handleInformInit: received","streamId":"autp4ou6","id":"1(@)"}
6
+ {"time":"2025-12-24T07:14:07.535919206Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"autp4ou6","id":"1(@)"}
7
+ {"time":"2025-12-24T07:14:58.627974022Z","level":"INFO","msg":"server: parent process exited, terminating service process"}
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/run-20251224_071407-autp4ou6/logs/debug-internal.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-12-24T07:14:07.358028809Z","level":"INFO","msg":"stream: starting","core version":"0.22.2"}
2
+ {"time":"2025-12-24T07:14:07.535812656Z","level":"INFO","msg":"stream: created new stream","id":"autp4ou6"}
3
+ {"time":"2025-12-24T07:14:07.535864052Z","level":"INFO","msg":"handler: started","stream_id":"autp4ou6"}
4
+ {"time":"2025-12-24T07:14:07.535912974Z","level":"INFO","msg":"stream: started","id":"autp4ou6"}
5
+ {"time":"2025-12-24T07:14:07.535933442Z","level":"INFO","msg":"writer: started","stream_id":"autp4ou6"}
6
+ {"time":"2025-12-24T07:14:07.535937845Z","level":"INFO","msg":"sender: started","stream_id":"autp4ou6"}
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/run-20251224_071407-autp4ou6/logs/debug.log ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-12-24 07:14:07,146 INFO MainThread:3703561 [wandb_setup.py:_flush():81] Current SDK version is 0.22.2
2
+ 2025-12-24 07:14:07,146 INFO MainThread:3703561 [wandb_setup.py:_flush():81] Configure stats pid to 3703561
3
+ 2025-12-24 07:14:07,146 INFO MainThread:3703561 [wandb_setup.py:_flush():81] Loading settings from /home/v-menggao/.config/wandb/settings
4
+ 2025-12-24 07:14:07,146 INFO MainThread:3703561 [wandb_setup.py:_flush():81] Loading settings from /home/v-menggao/code/VLM2Vec/wandb/settings
5
+ 2025-12-24 07:14:07,146 INFO MainThread:3703561 [wandb_setup.py:_flush():81] Loading settings from environment variables
6
+ 2025-12-24 07:14:07,146 INFO MainThread:3703561 [wandb_init.py:setup_run_log_directory():705] Logging user logs to /home/v-menggao/code/VLM2Vec/experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/run-20251224_071407-autp4ou6/logs/debug.log
7
+ 2025-12-24 07:14:07,146 INFO MainThread:3703561 [wandb_init.py:setup_run_log_directory():706] Logging internal logs to /home/v-menggao/code/VLM2Vec/experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/run-20251224_071407-autp4ou6/logs/debug-internal.log
8
+ 2025-12-24 07:14:07,146 INFO MainThread:3703561 [wandb_init.py:init():832] calling init triggers
9
+ 2025-12-24 07:14:07,146 INFO MainThread:3703561 [wandb_init.py:init():837] wandb.init called with sweep_config: {}
10
+ config: {'_wandb': {}}
11
+ 2025-12-24 07:14:07,146 INFO MainThread:3703561 [wandb_init.py:init():880] starting backend
12
+ 2025-12-24 07:14:07,352 INFO MainThread:3703561 [wandb_init.py:init():883] sending inform_init request
13
+ 2025-12-24 07:14:07,356 INFO MainThread:3703561 [wandb_init.py:init():891] backend started and connected
14
+ 2025-12-24 07:14:07,358 INFO MainThread:3703561 [wandb_init.py:init():961] updated telemetry
15
+ 2025-12-24 07:14:07,363 INFO MainThread:3703561 [wandb_init.py:init():985] communicating run to backend with 90.0 second timeout
16
+ 2025-12-24 07:14:07,809 INFO MainThread:3703561 [wandb_init.py:init():1036] starting run threads in backend
17
+ 2025-12-24 07:14:07,912 INFO MainThread:3703561 [wandb_run.py:_console_start():2509] atexit reg
18
+ 2025-12-24 07:14:07,912 INFO MainThread:3703561 [wandb_run.py:_redirect():2357] redirect: wrap_raw
19
+ 2025-12-24 07:14:07,912 INFO MainThread:3703561 [wandb_run.py:_redirect():2426] Wrapping output streams.
20
+ 2025-12-24 07:14:07,912 INFO MainThread:3703561 [wandb_run.py:_redirect():2449] Redirects installed.
21
+ 2025-12-24 07:14:07,915 INFO MainThread:3703561 [wandb_init.py:init():1076] run started, returning control to user process
22
+ 2025-12-24 07:14:07,916 INFO MainThread:3703561 [wandb_run.py:_config_callback():1392] config_cb None None {'output_dir': '/home/v-menggao/code/VLM2Vec/experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'eval_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 128, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 0.0005, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': 1000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 100, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/home/v-menggao/code/VLM2Vec/experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/runs/Dec24_07-04-56_GCRAZGDL1688', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 1, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': True, 'eval_steps': None, 'dataloader_num_workers': 1, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new', 'disable_tqdm': False, 'remove_unused_columns': False, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': True, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'image_encoder_freeze': False, 'resume_from': 'none', 'project_name': None, 'grad_cache': False, 'gc_q_chunk_size': 2, 'gc_p_chunk_size': 2, 'interleave_stopping_strategy': 'all_exhausted', 'interleave_batch_size': 0}
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128_new/wandb/run-20251224_071407-autp4ou6/run-autp4ou6.wandb ADDED
Binary file (32.8 kB). View file
 
experiments/examples/llava_next/demo.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.model import MMEBModel
2
+ from src.arguments import ModelArguments
3
+ from src.utils import load_processor
4
+
5
+ import torch
6
+ from transformers import HfArgumentParser, AutoProcessor
7
+ from PIL import Image
8
+ import numpy as np
9
+
10
+
11
+ model_args = ModelArguments(
12
+ model_name='TIGER-Lab/VLM2Vec-LLaVa-Next',
13
+ pooling='last',
14
+ normalize=True,
15
+ model_backbone='llava_next')
16
+
17
+ processor = load_processor(model_args)
18
+
19
+ model = MMEBModel.load(model_args, is_trainable=False)
20
+ model.eval()
21
+ model = model.to('cuda', dtype=torch.bfloat16)
22
+
23
+ # Image + Text -> Text
24
+ inputs = processor(text='<image> Represent the given image with the following question: What is in the image',
25
+ images=Image.open('figures/example.jpg'),
26
+ return_tensors="pt")
27
+ inputs = {key: value.to('cuda') for key, value in inputs.items()}
28
+ qry_output = model(qry=inputs)["qry_reps"]
29
+
30
+ string = 'A cat and a dog'
31
+ inputs = processor(text=string,
32
+ images=None,
33
+ return_tensors="pt")
34
+ inputs = {key: value.to('cuda') for key, value in inputs.items()}
35
+ tgt_output = model(tgt=inputs)["tgt_reps"]
36
+ print(string, '=', model.compute_similarity(qry_output, tgt_output))
37
+ ## A cat and a dog = tensor([[0.4414]], device='cuda:0', dtype=torch.bfloat16)
38
+
39
+ string = 'A cat and a tiger'
40
+ inputs = processor(text=string,
41
+ images=None,
42
+ return_tensors="pt")
43
+ inputs = {key: value.to('cuda') for key, value in inputs.items()}
44
+ tgt_output = model(tgt=inputs)["tgt_reps"]
45
+ print(string, '=', model.compute_similarity(qry_output, tgt_output))
46
+ ## A cat and a tiger = tensor([[0.3555]], device='cuda:0', dtype=torch.bfloat16)
experiments/examples/llava_next/run_eval.sh ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export PYTHONPATH=../VLM2Vec/:$PYTHONPATH
2
+
3
+ CUDA_VISIBLE_DEVICES=0 python eval.py \
4
+ --model_name TIGER-Lab/VLM2Vec-LLaVA-v1.6-LoRA \
5
+ --image_dir DATA_DIR/MMEB_test/MMEB_Test_1K_New/images/ \
6
+ --encode_output_path OUTPUT_DIR/MMEB_eval/VLM2Vec-Full/ \
7
+ --pooling eos --normalize True \
8
+ --dataset_name TIGER-Lab/MMEB-eval \
9
+ --dataset_split test \
10
+ --subset_name N24News ImageNet-A ImageNet-R WebQA GQA Visual7W \
11
+ --image_resolution high \
12
+ --per_device_eval_batch_size 64