MgGladys commited on
Commit
3902497
·
verified ·
1 Parent(s): 1f923cf

Add files using upload-large-folder tool

Browse files
Files changed (50) hide show
  1. experiments/Qwen2_5vl_3B_add_distill_0.2_0.6_11_23_Classifier_Layer12_ImgText_V5_i_ret/checkpoint-200/merges.txt +0 -0
  2. experiments/Qwen2_5vl_3B_add_distill_0.2_0.6_11_23_Classifier_Layer12_ImgText_V5_i_ret/checkpoint-200/vocab.json +0 -0
  3. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz1024/checkpoint-200/merges.txt +0 -0
  4. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz1024/checkpoint-200/vocab.json +0 -0
  5. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz1024/checkpoint-300/vocab.json +0 -0
  6. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/added_tokens.json +24 -0
  7. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/chat_template.jinja +7 -0
  8. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/added_tokens.json +24 -0
  9. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/chat_template.jinja +7 -0
  10. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/preprocessor_config.json +29 -0
  11. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/special_tokens_map.json +31 -0
  12. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/tokenizer_config.json +208 -0
  13. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/trainer_state.json +734 -0
  14. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/added_tokens.json +24 -0
  15. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/chat_template.jinja +7 -0
  16. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/preprocessor_config.json +29 -0
  17. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/special_tokens_map.json +31 -0
  18. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/tokenizer_config.json +208 -0
  19. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/merges.txt +0 -0
  20. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/preprocessor_config.json +29 -0
  21. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/tokenizer_config.json +208 -0
  22. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/train_cls.log +0 -0
  23. experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/vocab.json +0 -0
  24. experiments/public/eval/eval_1gpu.sh +81 -0
  25. experiments/public/eval/eval_1gpu_aop.sh +79 -0
  26. experiments/public/eval/eval_1gpu_cut_layer.sh +76 -0
  27. experiments/public/eval/eval_1gpu_cut_layer_AOP_text.sh +103 -0
  28. experiments/public/eval/eval_1gpu_cut_layer_unified_new.sh +131 -0
  29. experiments/public/eval/eval_1gpu_early_exit_classifier.sh +70 -0
  30. experiments/public/eval/eval_1gpu_early_exit_classifier_AOP_attn_pooling.sh +102 -0
  31. experiments/public/eval/eval_1gpu_early_exit_classifier_AOP_pooling.sh +212 -0
  32. experiments/public/eval/eval_1gpu_early_exit_classifier_AOP_pooling_new.sh +102 -0
  33. experiments/public/eval/eval_1gpu_early_exit_classifier_V5.sh +88 -0
  34. experiments/public/eval/eval_1gpu_early_exit_classifier_V5_new.sh +99 -0
  35. experiments/public/eval/eval_1gpu_multilayer_AOP_attn_pooling.sh +108 -0
  36. experiments/public/eval/eval_1gpu_multilayer_AOP_new.sh +106 -0
  37. experiments/public/eval/eval_1gpu_output_attn.sh +83 -0
  38. experiments/public/eval/eval_vlm2vecv1_8gpu.sh +71 -0
  39. experiments/public/eval/image_retrival.yaml +101 -0
  40. experiments/public/eval/mieb_any2any_retrieval_lite.yaml +55 -0
  41. experiments/public/eval/mieb_any2any_retrieval_lite2.yaml +55 -0
  42. experiments/public/eval/run_batch_benchmark.sh +112 -0
  43. experiments/public/eval/scan_threshold.sh +176 -0
  44. experiments/public/eval/visdoc_retrival.yaml +141 -0
  45. experiments/public/train/train_alltasks.yaml +395 -0
  46. experiments/public/train/train_image.yaml +161 -0
  47. experiments/public/train/train_image1.yaml +160 -0
  48. experiments/public/train/train_v2-gp.sh +103 -0
  49. experiments/public/train/train_v2-qwen2vl-2B_imageonly_add_CRD.sh +109 -0
  50. experiments/public/train/train_v2-qwen2vl-2B_imageonly_layer_prune.sh +99 -0
experiments/Qwen2_5vl_3B_add_distill_0.2_0.6_11_23_Classifier_Layer12_ImgText_V5_i_ret/checkpoint-200/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
experiments/Qwen2_5vl_3B_add_distill_0.2_0.6_11_23_Classifier_Layer12_ImgText_V5_i_ret/checkpoint-200/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz1024/checkpoint-200/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz1024/checkpoint-200/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz1024/checkpoint-300/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/chat_template.jinja ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system
2
+ You are a helpful assistant.<|im_end|>
3
+ {% endif %}<|im_start|>{{ message['role'] }}
4
+ {% if message['content'] is string %}{{ message['content'] }}<|im_end|>
5
+ {% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>
6
+ {% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant
7
+ {% endif %}
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/chat_template.jinja ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system
2
+ You are a helpful assistant.<|im_end|>
3
+ {% endif %}<|im_start|>{{ message['role'] }}
4
+ {% if message['content'] is string %}{{ message['content'] }}<|im_end|>
5
+ {% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>
6
+ {% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant
7
+ {% endif %}
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": true,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.48145466,
8
+ 0.4578275,
9
+ 0.40821073
10
+ ],
11
+ "image_processor_type": "Qwen2_5_VLImageProcessor",
12
+ "image_std": [
13
+ 0.26862954,
14
+ 0.26130258,
15
+ 0.27577711
16
+ ],
17
+ "max_pixels": 1003520,
18
+ "merge_size": 2,
19
+ "min_pixels": 3136,
20
+ "patch_size": 14,
21
+ "processor_class": "Qwen2_5_VLProcessor",
22
+ "resample": 3,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "size": {
25
+ "max_pixels": 1003520,
26
+ "min_pixels": 3136
27
+ },
28
+ "temporal_patch_size": 2
29
+ }
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/tokenizer_config.json ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": null,
198
+ "clean_up_tokenization_spaces": false,
199
+ "eos_token": "<|im_end|>",
200
+ "errors": "replace",
201
+ "extra_special_tokens": {},
202
+ "model_max_length": 131072,
203
+ "pad_token": "<|endoftext|>",
204
+ "processor_class": "Qwen2_5_VLProcessor",
205
+ "split_special_tokens": false,
206
+ "tokenizer_class": "Qwen2Tokenizer",
207
+ "unk_token": null
208
+ }
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-100/trainer_state.json ADDED
@@ -0,0 +1,734 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.013017443374121323,
6
+ "eval_steps": 500,
7
+ "global_step": 100,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.00013017443374121324,
14
+ "grad_norm": 9.027134895324707,
15
+ "learning_rate": 0.0,
16
+ "loss": 1.7121,
17
+ "step": 1
18
+ },
19
+ {
20
+ "epoch": 0.0002603488674824265,
21
+ "grad_norm": 6.82881498336792,
22
+ "learning_rate": 5e-06,
23
+ "loss": 1.3212,
24
+ "step": 2
25
+ },
26
+ {
27
+ "epoch": 0.00039052330122363966,
28
+ "grad_norm": 9.780951499938965,
29
+ "learning_rate": 1e-05,
30
+ "loss": 1.3029,
31
+ "step": 3
32
+ },
33
+ {
34
+ "epoch": 0.000520697734964853,
35
+ "grad_norm": 6.956725597381592,
36
+ "learning_rate": 1.5e-05,
37
+ "loss": 1.3596,
38
+ "step": 4
39
+ },
40
+ {
41
+ "epoch": 0.0006508721687060661,
42
+ "grad_norm": 7.1865010261535645,
43
+ "learning_rate": 2e-05,
44
+ "loss": 1.0132,
45
+ "step": 5
46
+ },
47
+ {
48
+ "epoch": 0.0007810466024472793,
49
+ "grad_norm": 6.9718475341796875,
50
+ "learning_rate": 2.5e-05,
51
+ "loss": 1.0072,
52
+ "step": 6
53
+ },
54
+ {
55
+ "epoch": 0.0009112210361884926,
56
+ "grad_norm": 7.56270170211792,
57
+ "learning_rate": 3e-05,
58
+ "loss": 0.9878,
59
+ "step": 7
60
+ },
61
+ {
62
+ "epoch": 0.001041395469929706,
63
+ "grad_norm": 9.146328926086426,
64
+ "learning_rate": 3.5000000000000004e-05,
65
+ "loss": 1.0033,
66
+ "step": 8
67
+ },
68
+ {
69
+ "epoch": 0.001171569903670919,
70
+ "grad_norm": 7.27562952041626,
71
+ "learning_rate": 4e-05,
72
+ "loss": 1.2293,
73
+ "step": 9
74
+ },
75
+ {
76
+ "epoch": 0.0013017443374121322,
77
+ "grad_norm": 7.683845520019531,
78
+ "learning_rate": 4.4999999999999996e-05,
79
+ "loss": 1.0291,
80
+ "step": 10
81
+ },
82
+ {
83
+ "epoch": 0.0014319187711533455,
84
+ "grad_norm": 6.271151542663574,
85
+ "learning_rate": 5e-05,
86
+ "loss": 0.9344,
87
+ "step": 11
88
+ },
89
+ {
90
+ "epoch": 0.0015620932048945586,
91
+ "grad_norm": 7.351341247558594,
92
+ "learning_rate": 5.5e-05,
93
+ "loss": 1.1735,
94
+ "step": 12
95
+ },
96
+ {
97
+ "epoch": 0.001692267638635772,
98
+ "grad_norm": 9.452715873718262,
99
+ "learning_rate": 6e-05,
100
+ "loss": 1.6768,
101
+ "step": 13
102
+ },
103
+ {
104
+ "epoch": 0.0018224420723769851,
105
+ "grad_norm": 4.848631858825684,
106
+ "learning_rate": 6.500000000000001e-05,
107
+ "loss": 1.1856,
108
+ "step": 14
109
+ },
110
+ {
111
+ "epoch": 0.0019526165061181985,
112
+ "grad_norm": 4.508008003234863,
113
+ "learning_rate": 7.000000000000001e-05,
114
+ "loss": 1.1299,
115
+ "step": 15
116
+ },
117
+ {
118
+ "epoch": 0.002082790939859412,
119
+ "grad_norm": 7.280736446380615,
120
+ "learning_rate": 7.5e-05,
121
+ "loss": 1.1765,
122
+ "step": 16
123
+ },
124
+ {
125
+ "epoch": 0.0022129653736006247,
126
+ "grad_norm": 5.3748979568481445,
127
+ "learning_rate": 8e-05,
128
+ "loss": 0.7167,
129
+ "step": 17
130
+ },
131
+ {
132
+ "epoch": 0.002343139807341838,
133
+ "grad_norm": 4.538390636444092,
134
+ "learning_rate": 8.5e-05,
135
+ "loss": 0.8025,
136
+ "step": 18
137
+ },
138
+ {
139
+ "epoch": 0.0024733142410830514,
140
+ "grad_norm": 5.350919723510742,
141
+ "learning_rate": 8.999999999999999e-05,
142
+ "loss": 1.0602,
143
+ "step": 19
144
+ },
145
+ {
146
+ "epoch": 0.0026034886748242643,
147
+ "grad_norm": 7.6904802322387695,
148
+ "learning_rate": 9.5e-05,
149
+ "loss": 0.9535,
150
+ "step": 20
151
+ },
152
+ {
153
+ "epoch": 0.0027336631085654777,
154
+ "grad_norm": 3.215092658996582,
155
+ "learning_rate": 0.0001,
156
+ "loss": 0.6306,
157
+ "step": 21
158
+ },
159
+ {
160
+ "epoch": 0.002863837542306691,
161
+ "grad_norm": 3.4990460872650146,
162
+ "learning_rate": 0.000105,
163
+ "loss": 0.8273,
164
+ "step": 22
165
+ },
166
+ {
167
+ "epoch": 0.0029940119760479044,
168
+ "grad_norm": 6.226487159729004,
169
+ "learning_rate": 0.00011,
170
+ "loss": 0.7278,
171
+ "step": 23
172
+ },
173
+ {
174
+ "epoch": 0.0031241864097891173,
175
+ "grad_norm": 8.820632934570312,
176
+ "learning_rate": 0.000115,
177
+ "loss": 1.1691,
178
+ "step": 24
179
+ },
180
+ {
181
+ "epoch": 0.0032543608435303306,
182
+ "grad_norm": 4.559078216552734,
183
+ "learning_rate": 0.00012,
184
+ "loss": 0.7181,
185
+ "step": 25
186
+ },
187
+ {
188
+ "epoch": 0.003384535277271544,
189
+ "grad_norm": 4.220932960510254,
190
+ "learning_rate": 0.000125,
191
+ "loss": 1.1495,
192
+ "step": 26
193
+ },
194
+ {
195
+ "epoch": 0.003514709711012757,
196
+ "grad_norm": 3.457106590270996,
197
+ "learning_rate": 0.00013000000000000002,
198
+ "loss": 0.6497,
199
+ "step": 27
200
+ },
201
+ {
202
+ "epoch": 0.0036448841447539702,
203
+ "grad_norm": 4.938692092895508,
204
+ "learning_rate": 0.000135,
205
+ "loss": 0.8021,
206
+ "step": 28
207
+ },
208
+ {
209
+ "epoch": 0.0037750585784951836,
210
+ "grad_norm": 4.849185943603516,
211
+ "learning_rate": 0.00014000000000000001,
212
+ "loss": 0.4922,
213
+ "step": 29
214
+ },
215
+ {
216
+ "epoch": 0.003905233012236397,
217
+ "grad_norm": 2.6389944553375244,
218
+ "learning_rate": 0.000145,
219
+ "loss": 0.7901,
220
+ "step": 30
221
+ },
222
+ {
223
+ "epoch": 0.00403540744597761,
224
+ "grad_norm": 3.179384231567383,
225
+ "learning_rate": 0.00015,
226
+ "loss": 0.4517,
227
+ "step": 31
228
+ },
229
+ {
230
+ "epoch": 0.004165581879718824,
231
+ "grad_norm": 3.68798828125,
232
+ "learning_rate": 0.000155,
233
+ "loss": 0.7674,
234
+ "step": 32
235
+ },
236
+ {
237
+ "epoch": 0.004295756313460036,
238
+ "grad_norm": 3.3014638423919678,
239
+ "learning_rate": 0.00016,
240
+ "loss": 0.6232,
241
+ "step": 33
242
+ },
243
+ {
244
+ "epoch": 0.0044259307472012495,
245
+ "grad_norm": 5.8319993019104,
246
+ "learning_rate": 0.000165,
247
+ "loss": 0.7051,
248
+ "step": 34
249
+ },
250
+ {
251
+ "epoch": 0.004556105180942463,
252
+ "grad_norm": 5.789146423339844,
253
+ "learning_rate": 0.00017,
254
+ "loss": 0.9646,
255
+ "step": 35
256
+ },
257
+ {
258
+ "epoch": 0.004686279614683676,
259
+ "grad_norm": 3.3160910606384277,
260
+ "learning_rate": 0.000175,
261
+ "loss": 0.7404,
262
+ "step": 36
263
+ },
264
+ {
265
+ "epoch": 0.0048164540484248895,
266
+ "grad_norm": 2.0886712074279785,
267
+ "learning_rate": 0.00017999999999999998,
268
+ "loss": 0.4553,
269
+ "step": 37
270
+ },
271
+ {
272
+ "epoch": 0.004946628482166103,
273
+ "grad_norm": 3.526718854904175,
274
+ "learning_rate": 0.000185,
275
+ "loss": 0.6724,
276
+ "step": 38
277
+ },
278
+ {
279
+ "epoch": 0.005076802915907316,
280
+ "grad_norm": 1.9652310609817505,
281
+ "learning_rate": 0.00019,
282
+ "loss": 0.4729,
283
+ "step": 39
284
+ },
285
+ {
286
+ "epoch": 0.005206977349648529,
287
+ "grad_norm": 3.9210290908813477,
288
+ "learning_rate": 0.00019500000000000002,
289
+ "loss": 0.9257,
290
+ "step": 40
291
+ },
292
+ {
293
+ "epoch": 0.005337151783389742,
294
+ "grad_norm": 2.2785885334014893,
295
+ "learning_rate": 0.0002,
296
+ "loss": 0.3922,
297
+ "step": 41
298
+ },
299
+ {
300
+ "epoch": 0.005467326217130955,
301
+ "grad_norm": 5.556844711303711,
302
+ "learning_rate": 0.000205,
303
+ "loss": 0.8272,
304
+ "step": 42
305
+ },
306
+ {
307
+ "epoch": 0.005597500650872169,
308
+ "grad_norm": 1.7946547269821167,
309
+ "learning_rate": 0.00021,
310
+ "loss": 0.2776,
311
+ "step": 43
312
+ },
313
+ {
314
+ "epoch": 0.005727675084613382,
315
+ "grad_norm": 1.6659146547317505,
316
+ "learning_rate": 0.000215,
317
+ "loss": 0.2818,
318
+ "step": 44
319
+ },
320
+ {
321
+ "epoch": 0.005857849518354595,
322
+ "grad_norm": 2.9105308055877686,
323
+ "learning_rate": 0.00022,
324
+ "loss": 0.733,
325
+ "step": 45
326
+ },
327
+ {
328
+ "epoch": 0.005988023952095809,
329
+ "grad_norm": 1.707923173904419,
330
+ "learning_rate": 0.00022500000000000002,
331
+ "loss": 0.2418,
332
+ "step": 46
333
+ },
334
+ {
335
+ "epoch": 0.006118198385837021,
336
+ "grad_norm": 1.9957884550094604,
337
+ "learning_rate": 0.00023,
338
+ "loss": 0.6494,
339
+ "step": 47
340
+ },
341
+ {
342
+ "epoch": 0.006248372819578235,
343
+ "grad_norm": 2.3562097549438477,
344
+ "learning_rate": 0.000235,
345
+ "loss": 0.4304,
346
+ "step": 48
347
+ },
348
+ {
349
+ "epoch": 0.006378547253319448,
350
+ "grad_norm": 2.6113295555114746,
351
+ "learning_rate": 0.00024,
352
+ "loss": 0.6548,
353
+ "step": 49
354
+ },
355
+ {
356
+ "epoch": 0.006508721687060661,
357
+ "grad_norm": 2.3105404376983643,
358
+ "learning_rate": 0.000245,
359
+ "loss": 0.5747,
360
+ "step": 50
361
+ },
362
+ {
363
+ "epoch": 0.006638896120801875,
364
+ "grad_norm": 2.4724414348602295,
365
+ "learning_rate": 0.00025,
366
+ "loss": 0.3501,
367
+ "step": 51
368
+ },
369
+ {
370
+ "epoch": 0.006769070554543088,
371
+ "grad_norm": 2.129112482070923,
372
+ "learning_rate": 0.000255,
373
+ "loss": 0.3983,
374
+ "step": 52
375
+ },
376
+ {
377
+ "epoch": 0.006899244988284301,
378
+ "grad_norm": 1.734704852104187,
379
+ "learning_rate": 0.00026000000000000003,
380
+ "loss": 0.4274,
381
+ "step": 53
382
+ },
383
+ {
384
+ "epoch": 0.007029419422025514,
385
+ "grad_norm": 1.7710378170013428,
386
+ "learning_rate": 0.00026500000000000004,
387
+ "loss": 0.2733,
388
+ "step": 54
389
+ },
390
+ {
391
+ "epoch": 0.007159593855766727,
392
+ "grad_norm": 3.876213788986206,
393
+ "learning_rate": 0.00027,
394
+ "loss": 0.2934,
395
+ "step": 55
396
+ },
397
+ {
398
+ "epoch": 0.0072897682895079405,
399
+ "grad_norm": 1.6544724702835083,
400
+ "learning_rate": 0.000275,
401
+ "loss": 0.42,
402
+ "step": 56
403
+ },
404
+ {
405
+ "epoch": 0.007419942723249154,
406
+ "grad_norm": 4.511378288269043,
407
+ "learning_rate": 0.00028000000000000003,
408
+ "loss": 0.7193,
409
+ "step": 57
410
+ },
411
+ {
412
+ "epoch": 0.007550117156990367,
413
+ "grad_norm": 1.969791293144226,
414
+ "learning_rate": 0.000285,
415
+ "loss": 0.2931,
416
+ "step": 58
417
+ },
418
+ {
419
+ "epoch": 0.0076802915907315805,
420
+ "grad_norm": 1.4399250745773315,
421
+ "learning_rate": 0.00029,
422
+ "loss": 0.2678,
423
+ "step": 59
424
+ },
425
+ {
426
+ "epoch": 0.007810466024472794,
427
+ "grad_norm": 2.075308084487915,
428
+ "learning_rate": 0.000295,
429
+ "loss": 0.5184,
430
+ "step": 60
431
+ },
432
+ {
433
+ "epoch": 0.007940640458214007,
434
+ "grad_norm": 2.092390775680542,
435
+ "learning_rate": 0.0003,
436
+ "loss": 0.501,
437
+ "step": 61
438
+ },
439
+ {
440
+ "epoch": 0.00807081489195522,
441
+ "grad_norm": 1.3803796768188477,
442
+ "learning_rate": 0.000305,
443
+ "loss": 0.0933,
444
+ "step": 62
445
+ },
446
+ {
447
+ "epoch": 0.008200989325696434,
448
+ "grad_norm": 2.6716833114624023,
449
+ "learning_rate": 0.00031,
450
+ "loss": 0.4907,
451
+ "step": 63
452
+ },
453
+ {
454
+ "epoch": 0.008331163759437647,
455
+ "grad_norm": 2.602332353591919,
456
+ "learning_rate": 0.000315,
457
+ "loss": 0.5355,
458
+ "step": 64
459
+ },
460
+ {
461
+ "epoch": 0.008461338193178859,
462
+ "grad_norm": 1.9427075386047363,
463
+ "learning_rate": 0.00032,
464
+ "loss": 0.2417,
465
+ "step": 65
466
+ },
467
+ {
468
+ "epoch": 0.008591512626920072,
469
+ "grad_norm": 2.076782703399658,
470
+ "learning_rate": 0.00032500000000000004,
471
+ "loss": 0.2974,
472
+ "step": 66
473
+ },
474
+ {
475
+ "epoch": 0.008721687060661286,
476
+ "grad_norm": 1.6976258754730225,
477
+ "learning_rate": 0.00033,
478
+ "loss": 0.2383,
479
+ "step": 67
480
+ },
481
+ {
482
+ "epoch": 0.008851861494402499,
483
+ "grad_norm": 1.6441351175308228,
484
+ "learning_rate": 0.000335,
485
+ "loss": 0.2017,
486
+ "step": 68
487
+ },
488
+ {
489
+ "epoch": 0.008982035928143712,
490
+ "grad_norm": 2.251415252685547,
491
+ "learning_rate": 0.00034,
492
+ "loss": 0.3529,
493
+ "step": 69
494
+ },
495
+ {
496
+ "epoch": 0.009112210361884926,
497
+ "grad_norm": 1.3723615407943726,
498
+ "learning_rate": 0.000345,
499
+ "loss": 0.2136,
500
+ "step": 70
501
+ },
502
+ {
503
+ "epoch": 0.009242384795626139,
504
+ "grad_norm": 2.3022258281707764,
505
+ "learning_rate": 0.00035,
506
+ "loss": 0.3552,
507
+ "step": 71
508
+ },
509
+ {
510
+ "epoch": 0.009372559229367352,
511
+ "grad_norm": 1.7158514261245728,
512
+ "learning_rate": 0.000355,
513
+ "loss": 0.2876,
514
+ "step": 72
515
+ },
516
+ {
517
+ "epoch": 0.009502733663108566,
518
+ "grad_norm": 2.0729708671569824,
519
+ "learning_rate": 0.00035999999999999997,
520
+ "loss": 0.3345,
521
+ "step": 73
522
+ },
523
+ {
524
+ "epoch": 0.009632908096849779,
525
+ "grad_norm": 0.8926207423210144,
526
+ "learning_rate": 0.000365,
527
+ "loss": 0.145,
528
+ "step": 74
529
+ },
530
+ {
531
+ "epoch": 0.009763082530590992,
532
+ "grad_norm": 1.281984806060791,
533
+ "learning_rate": 0.00037,
534
+ "loss": 0.2553,
535
+ "step": 75
536
+ },
537
+ {
538
+ "epoch": 0.009893256964332206,
539
+ "grad_norm": 2.1244750022888184,
540
+ "learning_rate": 0.000375,
541
+ "loss": 0.4454,
542
+ "step": 76
543
+ },
544
+ {
545
+ "epoch": 0.010023431398073419,
546
+ "grad_norm": 2.00681209564209,
547
+ "learning_rate": 0.00038,
548
+ "loss": 0.2888,
549
+ "step": 77
550
+ },
551
+ {
552
+ "epoch": 0.010153605831814632,
553
+ "grad_norm": 2.414694309234619,
554
+ "learning_rate": 0.00038500000000000003,
555
+ "loss": 0.3445,
556
+ "step": 78
557
+ },
558
+ {
559
+ "epoch": 0.010283780265555844,
560
+ "grad_norm": 1.4376050233840942,
561
+ "learning_rate": 0.00039000000000000005,
562
+ "loss": 0.3805,
563
+ "step": 79
564
+ },
565
+ {
566
+ "epoch": 0.010413954699297057,
567
+ "grad_norm": 1.5109490156173706,
568
+ "learning_rate": 0.000395,
569
+ "loss": 0.298,
570
+ "step": 80
571
+ },
572
+ {
573
+ "epoch": 0.01054412913303827,
574
+ "grad_norm": 1.4980159997940063,
575
+ "learning_rate": 0.0004,
576
+ "loss": 0.3296,
577
+ "step": 81
578
+ },
579
+ {
580
+ "epoch": 0.010674303566779484,
581
+ "grad_norm": 0.8917379379272461,
582
+ "learning_rate": 0.00040500000000000003,
583
+ "loss": 0.2573,
584
+ "step": 82
585
+ },
586
+ {
587
+ "epoch": 0.010804478000520697,
588
+ "grad_norm": 1.4543973207473755,
589
+ "learning_rate": 0.00041,
590
+ "loss": 0.3317,
591
+ "step": 83
592
+ },
593
+ {
594
+ "epoch": 0.01093465243426191,
595
+ "grad_norm": 1.2531291246414185,
596
+ "learning_rate": 0.000415,
597
+ "loss": 0.3687,
598
+ "step": 84
599
+ },
600
+ {
601
+ "epoch": 0.011064826868003124,
602
+ "grad_norm": 1.4232031106948853,
603
+ "learning_rate": 0.00042,
604
+ "loss": 0.1944,
605
+ "step": 85
606
+ },
607
+ {
608
+ "epoch": 0.011195001301744337,
609
+ "grad_norm": 1.066874384880066,
610
+ "learning_rate": 0.000425,
611
+ "loss": 0.2827,
612
+ "step": 86
613
+ },
614
+ {
615
+ "epoch": 0.01132517573548555,
616
+ "grad_norm": 1.0397121906280518,
617
+ "learning_rate": 0.00043,
618
+ "loss": 0.2561,
619
+ "step": 87
620
+ },
621
+ {
622
+ "epoch": 0.011455350169226764,
623
+ "grad_norm": 1.2276612520217896,
624
+ "learning_rate": 0.000435,
625
+ "loss": 0.0961,
626
+ "step": 88
627
+ },
628
+ {
629
+ "epoch": 0.011585524602967977,
630
+ "grad_norm": 1.4861217737197876,
631
+ "learning_rate": 0.00044,
632
+ "loss": 0.2329,
633
+ "step": 89
634
+ },
635
+ {
636
+ "epoch": 0.01171569903670919,
637
+ "grad_norm": 1.859115481376648,
638
+ "learning_rate": 0.00044500000000000003,
639
+ "loss": 0.2767,
640
+ "step": 90
641
+ },
642
+ {
643
+ "epoch": 0.011845873470450404,
644
+ "grad_norm": 1.5194251537322998,
645
+ "learning_rate": 0.00045000000000000004,
646
+ "loss": 0.2665,
647
+ "step": 91
648
+ },
649
+ {
650
+ "epoch": 0.011976047904191617,
651
+ "grad_norm": 1.2869577407836914,
652
+ "learning_rate": 0.000455,
653
+ "loss": 0.128,
654
+ "step": 92
655
+ },
656
+ {
657
+ "epoch": 0.01210622233793283,
658
+ "grad_norm": 1.3539648056030273,
659
+ "learning_rate": 0.00046,
660
+ "loss": 0.2405,
661
+ "step": 93
662
+ },
663
+ {
664
+ "epoch": 0.012236396771674042,
665
+ "grad_norm": 1.1017889976501465,
666
+ "learning_rate": 0.000465,
667
+ "loss": 0.2318,
668
+ "step": 94
669
+ },
670
+ {
671
+ "epoch": 0.012366571205415256,
672
+ "grad_norm": 1.0330371856689453,
673
+ "learning_rate": 0.00047,
674
+ "loss": 0.2629,
675
+ "step": 95
676
+ },
677
+ {
678
+ "epoch": 0.01249674563915647,
679
+ "grad_norm": 1.0031756162643433,
680
+ "learning_rate": 0.000475,
681
+ "loss": 0.152,
682
+ "step": 96
683
+ },
684
+ {
685
+ "epoch": 0.012626920072897682,
686
+ "grad_norm": 0.9949682950973511,
687
+ "learning_rate": 0.00048,
688
+ "loss": 0.2203,
689
+ "step": 97
690
+ },
691
+ {
692
+ "epoch": 0.012757094506638896,
693
+ "grad_norm": 1.5362247228622437,
694
+ "learning_rate": 0.00048499999999999997,
695
+ "loss": 0.2322,
696
+ "step": 98
697
+ },
698
+ {
699
+ "epoch": 0.01288726894038011,
700
+ "grad_norm": 1.273103952407837,
701
+ "learning_rate": 0.00049,
702
+ "loss": 0.2898,
703
+ "step": 99
704
+ },
705
+ {
706
+ "epoch": 0.013017443374121323,
707
+ "grad_norm": 0.6677097678184509,
708
+ "learning_rate": 0.000495,
709
+ "loss": 0.1565,
710
+ "step": 100
711
+ }
712
+ ],
713
+ "logging_steps": 1,
714
+ "max_steps": 1000,
715
+ "num_input_tokens_seen": 0,
716
+ "num_train_epochs": 1,
717
+ "save_steps": 100,
718
+ "stateful_callbacks": {
719
+ "TrainerControl": {
720
+ "args": {
721
+ "should_epoch_stop": false,
722
+ "should_evaluate": false,
723
+ "should_log": false,
724
+ "should_save": true,
725
+ "should_training_stop": false
726
+ },
727
+ "attributes": {}
728
+ }
729
+ },
730
+ "total_flos": 0.0,
731
+ "train_batch_size": 128,
732
+ "trial_name": null,
733
+ "trial_params": null
734
+ }
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/chat_template.jinja ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system
2
+ You are a helpful assistant.<|im_end|>
3
+ {% endif %}<|im_start|>{{ message['role'] }}
4
+ {% if message['content'] is string %}{{ message['content'] }}<|im_end|>
5
+ {% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>
6
+ {% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant
7
+ {% endif %}
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": true,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.48145466,
8
+ 0.4578275,
9
+ 0.40821073
10
+ ],
11
+ "image_processor_type": "Qwen2_5_VLImageProcessor",
12
+ "image_std": [
13
+ 0.26862954,
14
+ 0.26130258,
15
+ 0.27577711
16
+ ],
17
+ "max_pixels": 1003520,
18
+ "merge_size": 2,
19
+ "min_pixels": 3136,
20
+ "patch_size": 14,
21
+ "processor_class": "Qwen2_5_VLProcessor",
22
+ "resample": 3,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "size": {
25
+ "max_pixels": 1003520,
26
+ "min_pixels": 3136
27
+ },
28
+ "temporal_patch_size": 2
29
+ }
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000/tokenizer_config.json ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": null,
198
+ "clean_up_tokenization_spaces": false,
199
+ "eos_token": "<|im_end|>",
200
+ "errors": "replace",
201
+ "extra_special_tokens": {},
202
+ "model_max_length": 131072,
203
+ "pad_token": "<|endoftext|>",
204
+ "processor_class": "Qwen2_5_VLProcessor",
205
+ "split_special_tokens": false,
206
+ "tokenizer_class": "Qwen2Tokenizer",
207
+ "unk_token": null
208
+ }
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": true,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.48145466,
8
+ 0.4578275,
9
+ 0.40821073
10
+ ],
11
+ "image_processor_type": "Qwen2_5_VLImageProcessor",
12
+ "image_std": [
13
+ 0.26862954,
14
+ 0.26130258,
15
+ 0.27577711
16
+ ],
17
+ "max_pixels": 1003520,
18
+ "merge_size": 2,
19
+ "min_pixels": 3136,
20
+ "patch_size": 14,
21
+ "processor_class": "Qwen2_5_VLProcessor",
22
+ "resample": 3,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "size": {
25
+ "max_pixels": 1003520,
26
+ "min_pixels": 3136
27
+ },
28
+ "temporal_patch_size": 2
29
+ }
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/tokenizer_config.json ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": null,
198
+ "clean_up_tokenization_spaces": false,
199
+ "eos_token": "<|im_end|>",
200
+ "errors": "replace",
201
+ "extra_special_tokens": {},
202
+ "model_max_length": 131072,
203
+ "pad_token": "<|endoftext|>",
204
+ "processor_class": "Qwen2_5_VLProcessor",
205
+ "split_special_tokens": false,
206
+ "tokenizer_class": "Qwen2Tokenizer",
207
+ "unk_token": null
208
+ }
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/train_cls.log ADDED
The diff for this file is too large to render. See raw diff
 
experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
experiments/public/eval/eval_1gpu.sh ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ echo "==> Environment"
4
+ echo "conda location: $(which conda)"
5
+ echo "Python location: $(which python)"
6
+ echo "Python version: $(python --version)"
7
+ echo ""
8
+
9
+ cd VLM2Vec/ || exit
10
+
11
+ # ==============================================================================
12
+ # Configuration
13
+ # ==============================================================================
14
+ CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
15
+ BATCH_SIZE=1
16
+ # MODALITIES=("image_retrival" "video_retrival")
17
+ # MODALITIES=("image_retrival")
18
+ MODALITIES=("mieb_any2any_retrieval_lite")
19
+ DATA_BASEDIR="~/data/vlm2vec_eval/MMEB-V2"
20
+ # OUTPUT_BASEDIR="~/exps/vlm2vec_bsz128"
21
+ # OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/Qwen2vl_2B.add_mlp_try1/checkpoint-500" #_qry_cand_diff_ratio
22
+ OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/Qwen2_5vl_3B_single_node_image_ret_10_29_h100/checkpoint-5000_DART_2_0.75_0_0" #_qry_cand_diff_ratio
23
+ # OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/try_add_mlp_try/stage1" #_qry_cand_diff_ratio
24
+
25
+ # ==> Define models and their base output paths here
26
+ # Format: "MODEL_NAME;BASE_OUTPUT_PATH"
27
+ declare -a MODEL_SPECS
28
+ MODEL_SPECS+=( "/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2_5vl_3B_single_node_image_ret_10_29_h100/checkpoint-5000;qwen2_5_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B" )
29
+ # /home/v-menggao/code/VLM2Vec/~/experiments/Qwen2vl_2B.image/checkpoint-1000
30
+ # MODEL_SPECS+=( "code-kunkun/LamRA-Ret-Qwen2.5VL-7b;lamra_qwen25;$OUTPUT_BASEDIR/LamRA-Ret-Qwen2.5VL-7b" )
31
+ # MODEL_SPECS+=( "/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2_5vl_3B_multi_layer_12_-1_0.1_0.9/checkpoint-5000;qwen2_5_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B" )
32
+ # MODEL_SPECS+=( "Qwen/Qwen2.5-VL-3B-Instruct;qwen2_5_vl;$OUTPUT_BASEDIR/VLM2Vec-Qwen2.5VL-3B" )
33
+ # MODEL_SPECS+=( "Alibaba-NLP/gme-Qwen2-VL-2B-Instruct;gme;$OUTPUT_BASEDIR/gme-Qwen2-VL-2B-Instruct" )
34
+ # MODEL_SPECS+=( "Alibaba-NLP/gme-Qwen2-VL-7B-Instruct;gme;$OUTPUT_BASEDIR/gme-Qwen2-VL-7B-Instruct" )
35
+ # MODEL_SPECS+=( "code-kunkun/LamRA-Ret;lamra;$OUTPUT_BASEDIR/LamRA-Ret" )lamra_qwen25
36
+ # MODEL_SPECS+=( "code-kunkun/LamRA-Ret-Qwen2.5VL-7b;lamra_qwen25;$OUTPUT_BASEDIR/LamRA-Ret-Qwen2.5VL-7b" )
37
+ # MODEL_SPECS+=( "vidore/colpali-v1.3;colpali;$OUTPUT_BASEDIR/colpali-v1.3" )
38
+
39
+ # ==============================================================================
40
+ # Main Execution Loop
41
+ # ==============================================================================
42
+ # Loop through each model specification
43
+ for spec in "${MODEL_SPECS[@]}"; do
44
+ # Parse the model name and base output path from the spec string
45
+ IFS=';' read -r MODEL_NAME MODEL_BACKBONE BASE_OUTPUT_PATH <<< "$spec"
46
+
47
+ echo "================================================="
48
+ echo "🚀 Processing Model: $MODEL_NAME"
49
+ echo "================================================="
50
+
51
+ # Loop through each modality for the current model
52
+ for MODALITY in "${MODALITIES[@]}"; do
53
+ DATA_CONFIG_PATH="/home/v-menggao/code/VLM2Vec/experiments/public/eval/$MODALITY.yaml"
54
+ OUTPUT_PATH="$BASE_OUTPUT_PATH/$MODALITY/"
55
+
56
+ echo "-------------------------------------------------"
57
+ echo " - Modality: $MODALITY"
58
+ echo " - Output Path: $OUTPUT_PATH"
59
+
60
+ # Ensure the output directory exists
61
+ mkdir -p "$OUTPUT_PATH"
62
+
63
+ cmd="CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python eval_test_time.py \
64
+ --pooling eos \
65
+ --normalize true \
66
+ --per_device_eval_batch_size $BATCH_SIZE \
67
+ --model_backbone \"$MODEL_BACKBONE\" \
68
+ --model_name \"$MODEL_NAME\" \
69
+ --dataset_config \"$DATA_CONFIG_PATH\" \
70
+ --encode_output_path \"$OUTPUT_PATH\" \
71
+ --data_basedir \"$DATA_BASEDIR\""
72
+
73
+ echo " - Executing command..."
74
+ # echo "$cmd" # Uncomment for debugging the exact command
75
+ eval "$cmd"
76
+ echo " - Done."
77
+ echo "-------------------------------------------------"
78
+ done
79
+ done
80
+
81
+ echo "✅ All jobs completed."
experiments/public/eval/eval_1gpu_aop.sh ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ echo "==> Environment"
4
+ echo "conda location: $(which conda)"
5
+ echo "Python location: $(which python)"
6
+ echo "Python version: $(python --version)"
7
+ echo ""
8
+
9
+ cd VLM2Vec/ || exit
10
+
11
+ # ==============================================================================
12
+ # Configuration
13
+ # ==============================================================================
14
+ CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
15
+ BATCH_SIZE=1
16
+ # MODALITIES=("image_retrival" "video_retrival")
17
+ MODALITIES=("image_retrival")
18
+ # MODALITIES=("visdoc_retrival")
19
+ DATA_BASEDIR="~/data/vlm2vec_eval/MMEB-V2"
20
+ # OUTPUT_BASEDIR="~/exps/vlm2vec_bsz128"
21
+ OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/VLM2Vec_2_5_analysis_aop" #_qry_cand_diff_ratio
22
+
23
+ # ==> Define models and their base output paths here
24
+ # Format: "MODEL_NAME;BASE_OUTPUT_PATH"
25
+ declare -a MODEL_SPECS
26
+ # MODEL_SPECS+=( "VLM2Vec/VLM2Vec-V2.0;qwen2_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B" )
27
+ # MODEL_SPECS+=( "code-kunkun/LamRA-Ret-Qwen2.5VL-7b;lamra_qwen25;$OUTPUT_BASEDIR/LamRA-Ret-Qwen2.5VL-7b" )
28
+ # MODEL_SPECS+=( "/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2_5vl_3B_multi_layer_12_-1_0.1_0.9/checkpoint-5000;qwen2_5_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B" )
29
+ MODEL_SPECS+=( "/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2_5vl_7B_single_node_alltask_online_doc_data_12_11_h100;qwen2_5_vl;$OUTPUT_BASEDIR/VLM2Vec-Qwen2.5VL-7B" ) # Qwen/Qwen2.5-VL-3B-Instruct
30
+ # MODEL_SPECS+=( "Alibaba-NLP/gme-Qwen2-VL-2B-Instruct;gme;$OUTPUT_BASEDIR/gme-Qwen2-VL-2B-Instruct" )
31
+ # MODEL_SPECS+=( "Alibaba-NLP/gme-Qwen2-VL-7B-Instruct;gme;$OUTPUT_BASEDIR/gme-Qwen2-VL-7B-Instruct" )
32
+ # MODEL_SPECS+=( "code-kunkun/LamRA-Ret;lamra;$OUTPUT_BASEDIR/LamRA-Ret" )lamra_qwen25
33
+ # MODEL_SPECS+=( "code-kunkun/LamRA-Ret-Qwen2.5VL-7b;lamra_qwen25;$OUTPUT_BASEDIR/LamRA-Ret-Qwen2.5VL-7b" )
34
+ # MODEL_SPECS+=( "vidore/colpali-v1.3;colpali;$OUTPUT_BASEDIR/colpali-v1.3" )
35
+
36
+
37
+ # ==============================================================================
38
+ # Main Execution Loop
39
+ # ==============================================================================
40
+ # Loop through each model specification
41
+ for spec in "${MODEL_SPECS[@]}"; do
42
+ # Parse the model name and base output path from the spec string
43
+ IFS=';' read -r MODEL_NAME MODEL_BACKBONE BASE_OUTPUT_PATH <<< "$spec"
44
+
45
+ echo "================================================="
46
+ echo "🚀 Processing Model: $MODEL_NAME"
47
+ echo "================================================="
48
+
49
+ # Loop through each modality for the current model
50
+ for MODALITY in "${MODALITIES[@]}"; do
51
+ DATA_CONFIG_PATH="/home/v-menggao/code/VLM2Vec/experiments/public/eval/$MODALITY.yaml"
52
+ OUTPUT_PATH="$BASE_OUTPUT_PATH/$MODALITY/"
53
+
54
+ echo "-------------------------------------------------"
55
+ echo " - Modality: $MODALITY"
56
+ echo " - Output Path: $OUTPUT_PATH"
57
+
58
+ # Ensure the output directory exists
59
+ mkdir -p "$OUTPUT_PATH"
60
+
61
+ cmd="CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python eval_test_time_aop.py \
62
+ --pooling eos \
63
+ --normalize true \
64
+ --per_device_eval_batch_size $BATCH_SIZE \
65
+ --model_backbone \"$MODEL_BACKBONE\" \
66
+ --model_name \"$MODEL_NAME\" \
67
+ --dataset_config \"$DATA_CONFIG_PATH\" \
68
+ --encode_output_path \"$OUTPUT_PATH\" \
69
+ --data_basedir \"$DATA_BASEDIR\""
70
+
71
+ echo " - Executing command..."
72
+ # echo "$cmd" # Uncomment for debugging the exact command
73
+ eval "$cmd"
74
+ echo " - Done."
75
+ echo "-------------------------------------------------"
76
+ done
77
+ done
78
+
79
+ echo "✅ All jobs completed."
experiments/public/eval/eval_1gpu_cut_layer.sh ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ echo "==> Environment"
3
+ echo "conda location: $(which conda)"
4
+ echo "Python location: $(which python)"
5
+ echo "Python version: $(python --version)"
6
+ echo ""
7
+
8
+ cd VLM2Vec/ || exit
9
+ # ==============================================================================
10
+ # Configuration
11
+ # ==============================================================================
12
+ CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
13
+ BATCH_SIZE=4
14
+ MODALITIES=("image_retrival")
15
+ # MODALITIES=("image_retrival" "video_retrival" "visdoc_retrival")
16
+ # MODALITIES=("video_retrival")
17
+ DATA_BASEDIR="~/data/vlm2vec_eval/MMEB-V2"
18
+ # OUTPUT_BASEDIR="~/exps/vlm2vec_bsz128"
19
+ # OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/VLM2Vec/cut_layer" #_qry_cand_diff_ratio
20
+ OUTPUT_BASEDIR="/home/v-menggao/code/vlmvector_qwen25vl_train_multi_layer_distill_AOP_pooling_layer8_ablation_1230/result" #_qry_cand_diff_ratio
21
+
22
+
23
+ # ==> Define models and their base output paths here
24
+ # Format: "MODEL_NAME;BASE_OUTPUT_PATH"
25
+ declare -a MODEL_SPECS
26
+ # MODEL_SPECS+=( "VLM2Vec/VLM2Vec-V2.0;qwen2_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B" )
27
+ MODEL_SPECS+=( "/home/v-menggao/code/vlmvector_qwen25vl_train_multi_layer_distill_AOP_pooling_layer8_ablation_1230/checkpoint-900;qwen2_5_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B" )
28
+ # /home/v-menggao/code/VLM2Vec/~/experiments/Qwen2vl_2B.image/checkpoint-1000
29
+ # MODEL_SPECS+=( "/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2vl_2B_single_node_image_ret_10_30_h100/checkpoint-1200;qwen2_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B" )
30
+ # MODEL_SPECS+=( "/home/v-menggao/code/VLM2Vec/~/experiments/vlm2vec_train_2.5_3b_multilayer_distill_add_weight_image_ret_11_18_a100_2/checkpoint-1000;qwen2_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B" )
31
+ # MODEL_SPECS+=( "Alibaba-NLP/gme-Qwen2-VL-2B-Instruct;gme;$OUTPUT_BASEDIR/gme-Qwen2-VL-2B-Instruct" )
32
+ # MODEL_SPECS+=( "Alibaba-NLP/gme-Qwen2-VL-7B-Instruct;gme;$OUTPUT_BASEDIR/gme-Qwen2-VL-7B-Instruct" )
33
+
34
+ # ==============================================================================
35
+ # Main Execution Loop
36
+ # ==============================================================================
37
+ # Loop through each model specification
38
+ for spec in "${MODEL_SPECS[@]}"; do
39
+ # Parse the model name and base output path from the spec string
40
+ IFS=';' read -r MODEL_NAME MODEL_BACKBONE BASE_OUTPUT_PATH <<< "$spec"
41
+
42
+ echo "================================================="
43
+ echo "🚀 Processing Model: $MODEL_NAME"
44
+ echo "================================================="
45
+
46
+ # Loop through each modality for the current model
47
+ for MODALITY in "${MODALITIES[@]}"; do
48
+ DATA_CONFIG_PATH="/home/v-menggao/code/VLM2Vec/experiments/public/eval/$MODALITY.yaml"
49
+ OUTPUT_PATH="$BASE_OUTPUT_PATH/$MODALITY/"
50
+
51
+ echo "-------------------------------------------------"
52
+ echo " - Modality: $MODALITY"
53
+ echo " - Output Path: $OUTPUT_PATH"
54
+
55
+ # Ensure the output directory exists
56
+ mkdir -p "$OUTPUT_PATH"
57
+
58
+ cmd="LM_LAYERS='4,8,12,16,20,last' CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python eval_test_time_cut_layer.py \
59
+ --pooling eos \
60
+ --normalize true \
61
+ --per_device_eval_batch_size $BATCH_SIZE \
62
+ --model_backbone \"$MODEL_BACKBONE\" \
63
+ --model_name \"$MODEL_NAME\" \
64
+ --dataset_config \"$DATA_CONFIG_PATH\" \
65
+ --encode_output_path \"$OUTPUT_PATH\" \
66
+ --data_basedir \"$DATA_BASEDIR\""
67
+
68
+ echo " - Executing command..."
69
+ # echo "$cmd" # Uncomment for debugging the exact command
70
+ eval "$cmd"
71
+ echo " - Done."
72
+ echo "-------------------------------------------------"
73
+ done
74
+ done
75
+
76
+ echo "✅ All jobs completed."
experiments/public/eval/eval_1gpu_cut_layer_AOP_text.sh ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ echo "==> Environment"
3
+ echo "conda location: $(which conda)"
4
+ echo "Python location: $(which python)"
5
+ echo "Python version: $(python --version)"
6
+ echo ""
7
+
8
+ cd VLM2Vec/ || exit
9
+ # ==============================================================================
10
+ # Configuration
11
+ # ==============================================================================
12
+ CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
13
+ BATCH_SIZE=64
14
+ MODALITIES=("image_retrival")
15
+ DATA_BASEDIR="~/data/vlm2vec_eval/MMEB-V2"
16
+ # OUTPUT_BASEDIR="~/exps/vlm2vec_bsz128"
17
+ # OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/VLM2Vec_AOP/t_0.5_8_i_0.5_16_both_l12_bsz64" #_qry_cand_diff_ratio
18
+ OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/Qwen2_5vl_3B_single_node_image_ret_10_29_h100/checkpoint-5000/t_0.5_both_l10_bsz64_new" #_qry_cand_diff_ratio
19
+
20
+ # export AOP_ENABLED=1
21
+ # export AOP_APPLY=qry
22
+ # export AOP_LAYER=8
23
+ # export AOP_MODE=delta
24
+ # export AOP_DELTA=0.12
25
+ # export AOP_KHAT=1.6
26
+ # export AOP_MIN_KEEP=64
27
+ # export AOP_USE_BIAS=1
28
+
29
+ export AOP_ENABLED=1
30
+ export AOP_APPLY=both
31
+ export AOP_LAYER=10
32
+ export AOP_MODE=ratio
33
+ # export AOP_KEEP_RATIO=0.1
34
+ export AOP_MIN_KEEP=0
35
+ # export AOP_DEBUG=1
36
+ export AOP_SELECTION=random # 关键 aop | attention | random
37
+
38
+ export AOP_PRUNE_VISION=0
39
+ export AOP_PRUNE_TEXT=1
40
+ # 比例模式:视觉保留 10%,文本保留 80%
41
+ export AOP_KEEP_RATIO_VISION=0.5
42
+ export AOP_KEEP_RATIO_TEXT=0.5
43
+ # 保底
44
+ export AOP_MIN_KEEP_VISION=8
45
+ export AOP_MIN_KEEP_TEXT=8
46
+ # 文本保护
47
+ export AOP_PROTECT_TEXT_LAST=8
48
+ export AOP_PROTECT_SPECIAL=1
49
+ export AOP_RANDOM_SEED=42
50
+ export AOP_DEBUG=1
51
+
52
+ # ==> Define models and their base output paths here
53
+ # Format: "MODEL_NAME;BASE_OUTPUT_PATH"
54
+ declare -a MODEL_SPECS
55
+ # MODEL_SPECS+=( "VLM2Vec/VLM2Vec-V2.0;qwen2_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B" )
56
+ # /home/v-menggao/code/VLM2Vec/~/experiments/Qwen2vl_2B.image/checkpoint-1000
57
+ MODEL_SPECS+=( "/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2_5vl_3B_single_node_image_ret_10_29_h100/checkpoint-5000;qwen2_5_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B" )
58
+ # MODEL_SPECS+=( "Alibaba-NLP/gme-Qwen2-VL-2B-Instruct;gme;$OUTPUT_BASEDIR/gme-Qwen2-VL-2B-Instruct" )
59
+ # MODEL_SPECS+=( "Alibaba-NLP/gme-Qwen2-VL-7B-Instruct;gme;$OUTPUT_BASEDIR/gme-Qwen2-VL-7B-Instruct" )
60
+
61
+ # ==============================================================================
62
+ # Main Execution Loop
63
+ # ==============================================================================
64
+ # Loop through each model specification
65
+ for spec in "${MODEL_SPECS[@]}"; do
66
+ # Parse the model name and base output path from the spec string
67
+ IFS=';' read -r MODEL_NAME MODEL_BACKBONE BASE_OUTPUT_PATH <<< "$spec"
68
+
69
+ echo "================================================="
70
+ echo "🚀 Processing Model: $MODEL_NAME"
71
+ echo "================================================="
72
+
73
+ # Loop through each modality for the current model
74
+ for MODALITY in "${MODALITIES[@]}"; do
75
+ DATA_CONFIG_PATH="/home/v-menggao/code/VLM2Vec/experiments/public/eval/$MODALITY.yaml"
76
+ OUTPUT_PATH="$BASE_OUTPUT_PATH/$MODALITY/"
77
+
78
+ echo "-------------------------------------------------"
79
+ echo " - Modality: $MODALITY"
80
+ echo " - Output Path: $OUTPUT_PATH"
81
+
82
+ # Ensure the output directory exists
83
+ mkdir -p "$OUTPUT_PATH"
84
+
85
+ cmd="LM_LAYERS='last' CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python eval_test_time_cut_layer_AOP_add_text_cut.py \
86
+ --pooling eos \
87
+ --normalize true \
88
+ --per_device_eval_batch_size $BATCH_SIZE \
89
+ --model_backbone \"$MODEL_BACKBONE\" \
90
+ --model_name \"$MODEL_NAME\" \
91
+ --dataset_config \"$DATA_CONFIG_PATH\" \
92
+ --encode_output_path \"$OUTPUT_PATH\" \
93
+ --data_basedir \"$DATA_BASEDIR\""
94
+
95
+ echo " - Executing command..."
96
+ # echo "$cmd" # Uncomment for debugging the exact command
97
+ eval "$cmd"
98
+ echo " - Done."
99
+ echo "-------------------------------------------------"
100
+ done
101
+ done
102
+
103
+ echo "✅ All jobs completed."
experiments/public/eval/eval_1gpu_cut_layer_unified_new.sh ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -e
3
+
4
+ echo "==> Environment"
5
+ echo "conda: $(which conda)"
6
+ echo "python: $(which python)"
7
+ python --version
8
+ echo ""
9
+
10
+ # 确保在 VLM2Vec 目录下
11
+ # 注意:cd VLM2Vec/ 这行我移除了,因为您的集群调用显示是在 clone Code/ 之后 cd Code/
12
+ # 如果 VLM2Vec 在 Code/ 目录下,请取消下面一行的注释
13
+ # cd VLM2Vec/ || exit 1
14
+ echo "Current directory: $(pwd)"
15
+
16
+
17
+ # ==============================================================================
18
+ # Config (Read from Environment Variables or use Defaults)
19
+ # ==============================================================================
20
+ echo "==> Loading Configuration..."
21
+
22
+ # --- 基础配置 ---
23
+ CUDA_VISIBLE_DEVICES="${EVAL_CUDA_DEVICES:-"0,1,2,3,4,5,6,7"}"
24
+ BATCH_SIZE="${EVAL_BATCH_SIZE:-64}"
25
+
26
+ # --- 模态配置 (从空格分隔的字符串 E VAL_MODALITIES 读取) ---
27
+ MODALITIES_STR="${EVAL_MODALITIES:-"image_retrival video_retrival visdoc_retrival"}"
28
+ read -r -a MODALITIES <<< "$MODALITIES_STR"
29
+
30
+ # --- 路径配置 ---
31
+ DATA_BASEDIR="${EVAL_DATA_BASEDIR:-"~/data/vlm2vec_eval/MMEB-V2"}"
32
+ OUTPUT_BASEDIR="${EVAL_OUTPUT_BASEDIR:-"~/exps/vlm2vec_unified_eval"}"
33
+
34
+ # --- 模型清单 ---
35
+ # (重要) 默认模型规格现在在 OUTPUT_BASEDIR 被定义 *之后* 才设置, 以确保路径正确
36
+ DEFAULT_MODEL_SPEC="VLM2Vec/VLM2Vec-V2.0;qwen2_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B"
37
+ MODEL_SPEC_TO_ADD="${EVAL_MODEL_SPEC:-"$DEFAULT_MODEL_SPEC"}"
38
+
39
+ declare -a MODEL_SPECS
40
+ MODEL_SPECS+=("$MODEL_SPEC_TO_ADD")
41
+
42
+ # ==============================================================================
43
+ # Cut-layer 配置 (Exported for Python)
44
+ # ==============================================================================
45
+ export LM_LAYERS="${EVAL_LM_LAYERS:-'16,20,24,last'}"
46
+
47
+ # ==============================================================================
48
+ # VisionZip 配置 (Exported for Python)
49
+ # ==============================================================================
50
+ export ZIP_ENABLED=${EVAL_ZIP_ENABLED:-0}
51
+ export ZIP_APPLY="${EVAL_ZIP_APPLY:-"both"}"
52
+ export ZIP_METHOD="${EVAL_ZIP_METHOD:-"visionzip"}"
53
+ export ZIP_KEEP_DOM=${EVAL_ZIP_KEEP_DOM:-0.90}
54
+ export ZIP_KEEP_CTX=${EVAL_ZIP_KEEP_CTX:-0.10}
55
+
56
+ # ==============================================================================
57
+ # AOP 配置 (Exported for Python)
58
+ # ==============================================================================
59
+ export AOP_ENABLED=${EVAL_AOP_ENABLED:-0}
60
+ export AOP_APPLY="${EVAL_AOP_APPLY:-"both"}"
61
+ export AOP_LAYER=${EVAL_AOP_LAYER:-20}
62
+ export AOP_MODE="${EVAL_AOP_MODE:-"ratio"}"
63
+ export AOP_KEEP_RATIO=${EVAL_AOP_KEEP_RATIO:-0.10}
64
+ export AOP_MIN_KEEP=${EVAL_AOP_MIN_KEEP:-64}
65
+ export AOP_DELTA=${EVAL_AOP_DELTA:-0.10}
66
+ export AOP_KHAT=${EVAL_AOP_KHAT:-1.0}
67
+ export AOP_USE_BIAS=${EVAL_AOP_USE_BIAS:-1}
68
+ export AOP_ATTN_IMPL="${EVAL_AOP_ATTN_IMPL:-"sdpa"}"
69
+ export AOP_DEBUG=${EVAL_AOP_DEBUG:-0} # 默认关闭 debug
70
+
71
+ # ==============================================================================
72
+ # 打印最终配置
73
+ # ==============================================================================
74
+ echo "--- Final Configuration ---"
75
+ echo "CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES"
76
+ echo "BATCH_SIZE: $BATCH_SIZE"
77
+ echo "MODALITIES: ${MODALITIES[*]}"
78
+ echo "DATA_BASEDIR: $DATA_BASEDIR"
79
+ echo "OUTPUT_BASEDIR: $OUTPUT_BASEDIR"
80
+ echo "MODEL_SPECS: ${MODEL_SPECS[*]}"
81
+ echo "LM_LAYERS: $LM_LAYERS"
82
+ echo "ZIP_ENABLED: $ZIP_ENABLED"
83
+ echo "AOP_ENABLED: $AOP_ENABLED"
84
+ if [ "$ZIP_ENABLED" -ne 0 ]; then
85
+ echo "ZIP_APPLY: $ZIP_APPLY, ZIP_METHOD: $ZIP_METHOD, ZIP_KEEP_DOM: $ZIP_KEEP_DOM, ZIP_KEEP_CTX: $ZIP_KEEP_CTX"
86
+ fi
87
+ if [ "$AOP_ENABLED" -ne 0 ]; then
88
+ echo "AOP_APPLY: $AOP_APPLY, AOP_LAYER: $AOP_LAYER, AOP_MODE: $AOP_MODE, AOP_KEEP_RATIO: $AOP_KEEP_RATIO"
89
+ fi
90
+ echo "---------------------------"
91
+
92
+ # ==============================================================================
93
+ # Run
94
+ # ==============================================================================
95
+ for spec in "${MODEL_SPECS[@]}"; do
96
+ IFS=';' read -r MODEL_NAME MODEL_BACKBONE BASE_OUTPUT_PATH <<< "$spec"
97
+
98
+ echo "================================================="
99
+ echo "🚀 Model: $MODEL_NAME"
100
+ echo "================================================="
101
+
102
+ for MODALITY in "${MODALITIES[@]}"; do
103
+ # 假设 VLM2Vec 目录就是 Code/ 目录,或者脚本在 Code/ 目录下运行
104
+ # 并且 experiments/ 目录在 Code/ 下
105
+ DATA_CONFIG_PATH="experiments/public/eval/${MODALITY}.yaml"
106
+ OUTPUT_PATH="$BASE_OUTPUT_PATH/$MODALITY/"
107
+ mkdir -p "$OUTPUT_PATH"
108
+
109
+ echo "-------------------------------------------------"
110
+ echo " - Modality: $MODALITY"
111
+ echo " - Output: $OUTPUT_PATH"
112
+ echo " - Config: $DATA_CONFIG_PATH"
113
+
114
+ cmd="CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python eval_cut_layer_unified.py \
115
+ --pooling eos \
116
+ --normalize true \
117
+ --per_device_eval_batch_size $BATCH_SIZE \
118
+ --model_backbone \"$MODEL_BACKBONE\" \
119
+ --model_name \"$MODEL_NAME\" \
120
+ --dataset_config \"$DATA_CONFIG_PATH\" \
121
+ --encode_output_path \"$OUTPUT_PATH\" \
122
+ --data_basedir \"$DATA_BASEDIR\""
123
+
124
+ echo " - Executing..."
125
+ # echo "$cmd" # Debug: 打印完整命令
126
+ eval "$cmd"
127
+ echo " - Done."
128
+ done
129
+ done
130
+
131
+ echo "✅ All jobs completed."
experiments/public/eval/eval_1gpu_early_exit_classifier.sh ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ echo "==> Environment"
3
+ echo "conda location: $(which conda)"
4
+ echo "Python location: $(which python)"
5
+ echo "Python version: $(python --version)"
6
+ echo ""
7
+
8
+ cd VLM2Vec/ || exit
9
+
10
+ # ==============================================================================
11
+ # Configuration
12
+ # ==============================================================================
13
+ CUDA_VISIBLE_DEVICES="0"
14
+ BATCH_SIZE=32
15
+
16
+ # 【AOP 配置】Token Pruning - 暂时关闭以匹配baseline
17
+ export AOP_ENABLED=0
18
+ # export AOP_APPLY=qry
19
+ # export AOP_LAYER=12 # AOP 剪裁发生在哪一层 (需要 < EE_LAYER)
20
+ # export AOP_MODE=ratio
21
+ # export AOP_KEEP_RATIO_VISION=1.0 # 视觉 Token 保留 100%
22
+ # export AOP_KEEP_RATIO_TEXT=1.0 # 文本 Token 不剪
23
+ # export AOP_SELECTION=aop # 使用注意力选择
24
+
25
+ # 【EE 配置】Early Exit via Classifier
26
+ export EE_ENABLED=1
27
+ export EE_LAYER=12 # 早停判定层
28
+ export EE_METHOD=classifier # 使用分类器
29
+ # export EE_DEBUG_MODE=1
30
+ export EE_THRESHOLD=0.99 # 早停阈值
31
+ export EE_TOPK=200
32
+ # 分类器路径:可以是 checkpoint 目录或 .pt 文件
33
+ export EE_CLASSIFIER_PATH="/home/v-menggao/code/VLM2Vec/~/experiments/checkpoint-600"
34
+
35
+ MODALITIES=("image_retrival")
36
+ DATA_BASEDIR="~/data/vlm2vec_eval/MMEB-V2"
37
+ OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/Qwen2_5vl_3B_Classifier_Eval_AOP_Fused_0.99"
38
+
39
+ # 模型路径 (VLM)
40
+ MODEL_CHECKPOINT="/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2_5vl_3B_multi_layer_12_-1_0.1_0.9/checkpoint-5000"
41
+ MODEL_NAME="qwen2_5_vl"
42
+
43
+ echo "================================================="
44
+ echo "🚀 Pipeline: AOP + Early Exit Classifier"
45
+ echo "🚀 Classifier: $EE_CLASSIFIER_PATH"
46
+ echo "🚀 Threshold: $EE_THRESHOLD"
47
+ echo "================================================="
48
+
49
+ for MODALITY in "${MODALITIES[@]}"; do
50
+ DATA_CONFIG_PATH="/home/v-menggao/code/VLM2Vec/experiments/public/eval/$MODALITY.yaml"
51
+ OUTPUT_PATH="$OUTPUT_BASEDIR/$MODALITY/"
52
+
53
+ mkdir -p "$OUTPUT_PATH"
54
+
55
+ cmd="CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python eval_test_time_with_classifier.py \
56
+ --pooling eos \
57
+ --normalize true \
58
+ --per_device_eval_batch_size $BATCH_SIZE \
59
+ --model_backbone \"$MODEL_NAME\" \
60
+ --model_name \"$MODEL_CHECKPOINT\" \
61
+ --dataset_config \"$DATA_CONFIG_PATH\" \
62
+ --encode_output_path \"$OUTPUT_PATH\" \
63
+ --data_basedir \"$DATA_BASEDIR\""
64
+
65
+ echo " - Executing command..."
66
+ eval "$cmd"
67
+ echo " - Done."
68
+ done
69
+
70
+ echo "✅ All jobs completed."
experiments/public/eval/eval_1gpu_early_exit_classifier_AOP_attn_pooling.sh ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ echo "==> Environment"
3
+ echo "conda location: $(which conda)"
4
+ echo "Python location: $(which python)"
5
+ echo "Python version: $(python --version)"
6
+ echo ""
7
+
8
+ cd VLM2Vec/ || exit
9
+
10
+ # ==============================================================================
11
+ # Configuration
12
+ # ==============================================================================
13
+ CUDA_VISIBLE_DEVICES="0"
14
+ BATCH_SIZE=64
15
+
16
+ # 【AOP 配置】Token Pruning
17
+ export AOP_ENABLED=1
18
+ export AOP_APPLY=both # 只剪 query;要剪 cand 可以设 both
19
+ export AOP_LAYER=12
20
+ export AOP_MODE=ratio
21
+ export AOP_SELECTION=attention # 使用注意力作为重要性分数
22
+ export AOP_ATTENTION_AGG=mean # head 聚合方式:mean/max/sum
23
+
24
+ # 开启图像 / 文本剪枝
25
+ export AOP_PRUNE_VISION=1
26
+ export AOP_PRUNE_TEXT=1
27
+
28
+ # 可选:合理的比例和保底
29
+ export AOP_KEEP_RATIO_VISION=1.0 # 先不剪图像也行,想剪再改成 0.5 等
30
+ export AOP_KEEP_RATIO_TEXT=0.5
31
+ export AOP_MIN_KEEP_VISION=8
32
+ export AOP_MIN_KEEP_TEXT=8
33
+ export AOP_PROTECT_TEXT_LAST=8
34
+ export AOP_PROTECT_SPECIAL=1
35
+ export AOP_MONITOR=0
36
+
37
+ # 【VPOOL 配置】Vision Token Pooling
38
+ export VPOOL_ENABLED=1
39
+ export VPOOL_APPLY=both # qry | tgt | both
40
+ export VPOOL_LAYER=1 # pooling 发生的层(进入第1层前)
41
+ export VPOOL_KERNEL=2 # 2x2
42
+ export VPOOL_STRIDE=2
43
+ export VPOOL_METHOD=attn # NEW: attn pooling
44
+ export VPOOL_ATTN_TAU=1.0 # NEW: attn pooling temperature
45
+ export VPOOL_ONLY_VISION=1
46
+ export VPOOL_PROTECT_CLS=1
47
+ export VPOOL_MONITOR=1
48
+
49
+ # 如果想看 debug,可以加:
50
+ # export AOP_DEBUG=1
51
+
52
+ # 【EE 配置】Early Exit via Classifier
53
+ export EE_ENABLED=1
54
+ export EE_LAYER=12
55
+ export EE_METHOD=classifier
56
+ export EE_DEBUG_MODE=1
57
+ # 阈值根据 V5 分析结果填入,或者保留默认值
58
+ export EE_THRESHOLD=0
59
+ export EE_TOPK=200
60
+ export EE_PROFILE=1
61
+ export EE_TOPK_EMB=10
62
+ export EE_SKIP_LM_HEAD=1
63
+
64
+ # 【关键】分类器路径 (替换为您 V5 训练的 checkpoint 路径)
65
+ export EE_CLASSIFIER_PATH="/home/v-menggao/code/VLM2Vec/experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000"
66
+
67
+ MODALITIES=("image_retrival")
68
+ DATA_BASEDIR="~/data/vlm2vec_eval/MMEB-V2"
69
+ # 【关键】修改输出目录
70
+ OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000_0.45_try2"
71
+
72
+ MODEL_CHECKPOINT="/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2_5vl_3B_multilayer_distill_AOP_12_attn_pooling_new_all_12_26_h100_4_node_sigma4/checkpoint-1300"
73
+ MODEL_NAME="qwen2_5_vl"
74
+
75
+ echo "================================================="
76
+ echo "🚀 Pipeline: AOP + Early Exit Classifier (V5)"
77
+ echo "🚀 Classifier: $EE_CLASSIFIER_PATH"
78
+ echo "================================================="
79
+
80
+ for MODALITY in "${MODALITIES[@]}"; do
81
+ DATA_CONFIG_PATH="/home/v-menggao/code/VLM2Vec/experiments/public/eval/$MODALITY.yaml"
82
+ OUTPUT_PATH="$OUTPUT_BASEDIR/$MODALITY/"
83
+
84
+ mkdir -p "$OUTPUT_PATH"
85
+
86
+ # 【关键】调用 V5 脚本
87
+ cmd="CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python eval_test_time_with_classifier_AOP_attn_pooling.py \
88
+ --pooling eos \
89
+ --normalize true \
90
+ --per_device_eval_batch_size $BATCH_SIZE \
91
+ --model_backbone \"$MODEL_NAME\" \
92
+ --model_name \"$MODEL_CHECKPOINT\" \
93
+ --dataset_config \"$DATA_CONFIG_PATH\" \
94
+ --encode_output_path \"$OUTPUT_PATH\" \
95
+ --data_basedir \"$DATA_BASEDIR\""
96
+
97
+ echo " - Executing command..."
98
+ eval "$cmd"
99
+ echo " - Done."
100
+ done
101
+
102
+ echo "✅ All jobs completed."
experiments/public/eval/eval_1gpu_early_exit_classifier_AOP_pooling.sh ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ echo "==> Environment"
3
+ echo "conda location: $(which conda)"
4
+ echo "Python location: $(which python)"
5
+ echo "Python version: $(python --version)"
6
+ echo ""
7
+
8
+ cd VLM2Vec/ || exit
9
+
10
+ # ==============================================================================
11
+ # Configuration
12
+ # ==============================================================================
13
+ CUDA_VISIBLE_DEVICES="0"
14
+ BATCH_SIZE=64
15
+
16
+ # 【AOP 配置】Token Pruning
17
+ export AOP_ENABLED=0
18
+ export AOP_APPLY=both # 只剪 query;要剪 cand 可以设 both
19
+ export AOP_LAYER=12
20
+ export AOP_MODE=ratio
21
+ export AOP_SELECTION=attention # 使用注意力作为重要性分数
22
+ export AOP_ATTENTION_AGG=mean # head 聚合方式:mean/max/sum
23
+
24
+ # 开启图像 / 文本剪枝
25
+ export AOP_PRUNE_VISION=0
26
+ export AOP_PRUNE_TEXT=1
27
+
28
+ # 可选:合理的比例和保底
29
+ export AOP_KEEP_RATIO_VISION=1.0 # 先不剪图像也行,想剪再改成 0.5 等
30
+ export AOP_KEEP_RATIO_TEXT=0.5
31
+ export AOP_MIN_KEEP_VISION=8
32
+ export AOP_MIN_KEEP_TEXT=8
33
+ export AOP_PROTECT_TEXT_LAST=8
34
+ export AOP_PROTECT_SPECIAL=1
35
+ export AOP_MONITOR=0
36
+
37
+ # 【VPOOL 配置】Vision Token Pooling
38
+ export VPOOL_ENABLED=0
39
+ export VPOOL_APPLY=both # qry | tgt | both
40
+ export VPOOL_LAYER=1 # pooling 发生的层(进入第1层前)
41
+ export VPOOL_KERNEL=2 # 2x2
42
+ export VPOOL_STRIDE=2
43
+ export VPOOL_METHOD=avg # avg | max | linear | conv
44
+ export VPOOL_ONLY_VISION=1 # 仅对视觉 token 生效
45
+ export VPOOL_PROTECT_CLS=1
46
+ export VPOOL_MONITOR=0 # 如需打印长度变化可设为 1
47
+
48
+ # 如果想看 debug,可以加:
49
+ # export AOP_DEBUG=1
50
+
51
+ # 【EE 配置】Early Exit via Classifier
52
+ export EE_ENABLED=0
53
+ export EE_LAYER=12
54
+ export EE_METHOD=classifier
55
+ export EE_DEBUG_MODE=1
56
+ # 阈值根据 V5 分析结果填入,或者保留默认值
57
+ export EE_THRESHOLD=0
58
+ export EE_TOPK=200
59
+ export EE_PROFILE=1
60
+ export VPOOL_COMPILE=1
61
+ export EE_TORCH_PROFILE=1
62
+ export EE_TOPK_EMB=10
63
+
64
+ export STAGE_PROFILE=1
65
+ export STAGE_PROFILE_PRINT=1 # 可选:每个 dataset 结束打印
66
+
67
+ # 【关键】分类器路径 (替换为您 V5 训练的 checkpoint 路径)
68
+ export EE_CLASSIFIER_PATH="/home/v-menggao/code/VLM2Vec/experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000"
69
+
70
+ MODALITIES=("image_retrival")
71
+ DATA_BASEDIR="~/data/vlm2vec_eval/MMEB-V2"
72
+ # 【关键】修改输出目录
73
+ OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000_optimaized_wo"
74
+
75
+ MODEL_CHECKPOINT="/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2_5vl_3B_multilayer_distill_AOP_pooling_all_12_10_h100_4/checkpoint-5000"
76
+ MODEL_NAME="qwen2_5_vl"
77
+
78
+ echo "================================================="
79
+ echo "🚀 Pipeline: AOP + Early Exit Classifier (V5)"
80
+ echo "🚀 Classifier: $EE_CLASSIFIER_PATH"
81
+ echo "================================================="
82
+
83
+ for MODALITY in "${MODALITIES[@]}"; do
84
+ DATA_CONFIG_PATH="/home/v-menggao/code/VLM2Vec/experiments/public/eval/$MODALITY.yaml"
85
+ OUTPUT_PATH="$OUTPUT_BASEDIR/$MODALITY/"
86
+
87
+ mkdir -p "$OUTPUT_PATH"
88
+
89
+ # 【关键】调用 V5 脚本
90
+ cmd="CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python eval_test_time_with_classifier_AOP_pooling.py \
91
+ --pooling eos \
92
+ --normalize true \
93
+ --per_device_eval_batch_size $BATCH_SIZE \
94
+ --model_backbone \"$MODEL_NAME\" \
95
+ --model_name \"$MODEL_CHECKPOINT\" \
96
+ --dataset_config \"$DATA_CONFIG_PATH\" \
97
+ --encode_output_path \"$OUTPUT_PATH\" \
98
+ --data_basedir \"$DATA_BASEDIR\""
99
+
100
+ echo " - Executing command..."
101
+ eval "$cmd"
102
+ echo " - Done."
103
+ done
104
+
105
+ echo "✅ All jobs completed."
106
+
107
+
108
+ # #!/bin/bash
109
+ # echo "==> Environment"
110
+ # echo "conda location: $(which conda)"
111
+ # echo "Python location: $(which python)"
112
+ # echo "Python version: $(python --version)"
113
+ # echo ""
114
+
115
+ # cd VLM2Vec/ || exit
116
+
117
+ # # ==============================================================================
118
+ # # Configuration
119
+ # # ==============================================================================
120
+ # CUDA_VISIBLE_DEVICES="0"
121
+ # BATCH_SIZE=64
122
+
123
+ # # 【AOP 配置】Token Pruning
124
+ # export AOP_ENABLED=1
125
+ # export AOP_APPLY=both # 只剪 query;要剪 cand 可以设 both
126
+ # export AOP_LAYER=12
127
+ # export AOP_MODE=ratio
128
+ # export AOP_SELECTION=attention # 使用注意力作为重要性分数
129
+ # export AOP_ATTENTION_AGG=mean # head 聚合方式:mean/max/sum
130
+
131
+ # # 开启图像 / 文本剪枝
132
+ # export AOP_PRUNE_VISION=0
133
+ # export AOP_PRUNE_TEXT=1
134
+
135
+ # # 可选:合理的比例和保底
136
+ # export AOP_KEEP_RATIO_VISION=1.0 # 先不剪图像也行,想剪再改成 0.5 等
137
+ # export AOP_KEEP_RATIO_TEXT=0.5
138
+ # export AOP_MIN_KEEP_VISION=8
139
+ # export AOP_MIN_KEEP_TEXT=8
140
+ # export AOP_PROTECT_TEXT_LAST=8
141
+ # export AOP_PROTECT_SPECIAL=1
142
+ # export AOP_MONITOR=0
143
+
144
+ # # 【VPOOL 配置】Vision Token Pooling
145
+ # export VPOOL_ENABLED=1
146
+ # export VPOOL_APPLY=both # qry | tgt | both
147
+ # export VPOOL_LAYER=1 # pooling 发生的层(进入第1层前)
148
+ # export VPOOL_KERNEL=2 # 2x2
149
+ # export VPOOL_STRIDE=2
150
+ # export VPOOL_METHOD=avg # avg | max | linear | conv
151
+ # export VPOOL_ONLY_VISION=1 # 仅对视觉 token 生效
152
+ # export VPOOL_PROTECT_CLS=1
153
+ # export VPOOL_MONITOR=0 # 如需打印长度变化可设为 1
154
+
155
+ # # 如果想看 debug,可以加:
156
+ # # export AOP_DEBUG=1
157
+
158
+ # # 【EE 配置】Early Exit via Classifier
159
+ # export EE_ENABLED=1
160
+ # export EE_LAYER=12
161
+ # export EE_METHOD=classifier
162
+ # export EE_DEBUG_MODE=1
163
+ # # 阈值根据 V5 分析结果填入,或者保留默认值
164
+ # export EE_THRESHOLD=0
165
+ # export EE_TOPK=200
166
+ # export EE_PROFILE=1
167
+ # export VPOOL_COMPILE=1
168
+ # export EE_TORCH_PROFILE=1
169
+ # export EE_TOPK_EMB=10
170
+
171
+ # export STAGE_PROFILE=1
172
+ # export STAGE_PROFILE_PRINT=1 # 可选:每个 dataset 结束打印
173
+
174
+ # # 【关键】分类器路径 (替换为您 V5 训练的 checkpoint 路径)
175
+ # export EE_CLASSIFIER_PATH="/home/v-menggao/code/VLM2Vec/experiments/Qwen2_5vl_7B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz512/checkpoint-1000"
176
+
177
+ # MODALITIES=("image_retrival")
178
+ # DATA_BASEDIR="~/data/vlm2vec_eval/MMEB-V2"
179
+ # # 【关键】修改输出目录
180
+ # OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/Qwen2_5vl_7B_multilayer_distill_AOP_pooling_all_12_10_h100_4_Classifier_Layer12_V5_i_ret_bsz512/checkpoint-1000_new"
181
+
182
+ # MODEL_CHECKPOINT="/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2_5vl_7B_multilayer_distill_AOP_pooling_all_12_10_h100_4/checkpoint-5000"
183
+ # MODEL_NAME="qwen2_5_vl"
184
+
185
+ # echo "================================================="
186
+ # echo "🚀 Pipeline: AOP + Early Exit Classifier (V5)"
187
+ # echo "🚀 Classifier: $EE_CLASSIFIER_PATH"
188
+ # echo "================================================="
189
+
190
+ # for MODALITY in "${MODALITIES[@]}"; do
191
+ # DATA_CONFIG_PATH="/home/v-menggao/code/VLM2Vec/experiments/public/eval/$MODALITY.yaml"
192
+ # OUTPUT_PATH="$OUTPUT_BASEDIR/$MODALITY/"
193
+
194
+ # mkdir -p "$OUTPUT_PATH"
195
+
196
+ # # 【关键】调用 V5 脚本
197
+ # cmd="CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python eval_test_time_with_classifier_AOP_pooling.py \
198
+ # --pooling eos \
199
+ # --normalize true \
200
+ # --per_device_eval_batch_size $BATCH_SIZE \
201
+ # --model_backbone \"$MODEL_NAME\" \
202
+ # --model_name \"$MODEL_CHECKPOINT\" \
203
+ # --dataset_config \"$DATA_CONFIG_PATH\" \
204
+ # --encode_output_path \"$OUTPUT_PATH\" \
205
+ # --data_basedir \"$DATA_BASEDIR\""
206
+
207
+ # echo " - Executing command..."
208
+ # eval "$cmd"
209
+ # echo " - Done."
210
+ # done
211
+
212
+ # echo "✅ All jobs completed."
experiments/public/eval/eval_1gpu_early_exit_classifier_AOP_pooling_new.sh ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ echo "==> Environment"
3
+ echo "conda location: $(which conda)"
4
+ echo "Python location: $(which python)"
5
+ echo "Python version: $(python --version)"
6
+ echo ""
7
+
8
+ cd VLM2Vec/ || exit
9
+
10
+ # ==============================================================================
11
+ # Configuration
12
+ # ==============================================================================
13
+ CUDA_VISIBLE_DEVICES="0"
14
+ BATCH_SIZE=64
15
+
16
+ # 【AOP 配置】Token Pruning
17
+ export AOP_ENABLED=0
18
+ export AOP_APPLY=both # 只剪 query;要剪 cand 可以设 both
19
+ export AOP_LAYER=10
20
+ export AOP_MODE=ratio
21
+ export AOP_SELECTION=attention # 使用注意力作为重要性分数
22
+ export AOP_ATTENTION_AGG=mean # head 聚合方式:mean/max/sum
23
+
24
+ # 开启图像 / 文本剪枝
25
+ export AOP_PRUNE_VISION=1
26
+ export AOP_PRUNE_TEXT=1
27
+
28
+ # 可选:合理的比例和保底
29
+ export AOP_KEEP_RATIO_VISION=1.0 # 先不剪图像也行,想剪再改成 0.5 等
30
+ export AOP_KEEP_RATIO_TEXT=0.5
31
+ export AOP_MIN_KEEP_VISION=8
32
+ export AOP_MIN_KEEP_TEXT=8
33
+ export AOP_PROTECT_TEXT_LAST=8
34
+ export AOP_PROTECT_SPECIAL=1
35
+ export AOP_MONITOR=0
36
+
37
+ # 【VPOOL 配置】Vision Token Pooling
38
+ export VPOOL_ENABLED=1
39
+ export VPOOL_APPLY=both # qry | tgt | both
40
+ export VPOOL_LAYER=1 # pooling 发生的层(进入第1层前)
41
+ export VPOOL_KERNEL=2 # 2x2
42
+ export VPOOL_STRIDE=2
43
+ export VPOOL_METHOD=avg # avg | max | linear | conv
44
+ export VPOOL_ONLY_VISION=1 # 仅对视觉 token 生效
45
+ export VPOOL_PROTECT_CLS=1
46
+ export VPOOL_MONITOR=0 # 如需打印长度变化可设为 1
47
+
48
+ # 如果想看 debug,可以加:
49
+ # export AOP_DEBUG=1
50
+
51
+ # 【EE 配置】Early Exit via Classifier
52
+ export EE_ENABLED=1
53
+ export EE_LAYER=12
54
+ export EE_METHOD=classifier
55
+ export EE_DEBUG_MODE=1
56
+ # 阈值根据 V5 分析结果填入,或者保留默认值
57
+ export EE_THRESHOLD=0
58
+ export EE_TOPK=200
59
+ export EE_PROFILE=1
60
+ export EE_TOPK_EMB=10
61
+ export EE_SKIP_LM_HEAD=1
62
+
63
+ # 【关键】分类器路径 (替换为您 V5 训练的 checkpoint 路径)
64
+ # export EE_CLASSIFIER_PATH="/home/v-menggao/code/VLM2Vec/experiments/Qwen2_5vl_7B_multilayer_distill_aop_10_pooling_i_ret_12_30_h100_2_node_Classifier_L12_i_ret_bsz512/checkpoint-1000"
65
+ export EE_CLASSIFIER_PATH="/home/v-menggao/code/VLM2Vec/experiments/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000"
66
+
67
+ MODALITIES=("image_retrival")
68
+ DATA_BASEDIR="~/data/vlm2vec_eval/MMEB-V2"
69
+ # 【关键】修改输出目录
70
+ OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/Qwen2_5vl_3B_multilayer_distill_AOP_10_pooling_12_26_a100_multinode_Classifier_Layer12_V5_i_ret_bsz128/checkpoint-1000_only_Vision"
71
+
72
+ MODEL_CHECKPOINT="/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2_5vl_7B_multilayer_distill_aop_10_pooling_i_ret_12_30_h100_2_node/checkpoint-5000"
73
+ MODEL_NAME="qwen2_5_vl"
74
+
75
+ echo "================================================="
76
+ echo "🚀 Pipeline: AOP + Early Exit Classifier (V5)"
77
+ echo "🚀 Classifier: $EE_CLASSIFIER_PATH"
78
+ echo "================================================="
79
+
80
+ for MODALITY in "${MODALITIES[@]}"; do
81
+ DATA_CONFIG_PATH="/home/v-menggao/code/VLM2Vec/experiments/public/eval/$MODALITY.yaml"
82
+ OUTPUT_PATH="$OUTPUT_BASEDIR/$MODALITY/"
83
+
84
+ mkdir -p "$OUTPUT_PATH"
85
+
86
+ # 【关键】调用 V5 脚本
87
+ cmd="CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python eval_test_time_with_classifier_AOP_pooling_new.py \
88
+ --pooling eos \
89
+ --normalize true \
90
+ --per_device_eval_batch_size $BATCH_SIZE \
91
+ --model_backbone \"$MODEL_NAME\" \
92
+ --model_name \"$MODEL_CHECKPOINT\" \
93
+ --dataset_config \"$DATA_CONFIG_PATH\" \
94
+ --encode_output_path \"$OUTPUT_PATH\" \
95
+ --data_basedir \"$DATA_BASEDIR\""
96
+
97
+ echo " - Executing command..."
98
+ eval "$cmd"
99
+ echo " - Done."
100
+ done
101
+
102
+ echo "✅ All jobs completed."
experiments/public/eval/eval_1gpu_early_exit_classifier_V5.sh ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ echo "==> Environment"
3
+ echo "conda location: $(which conda)"
4
+ echo "Python location: $(which python)"
5
+ echo "Python version: $(python --version)"
6
+ echo ""
7
+
8
+ cd VLM2Vec/ || exit
9
+
10
+ # ==============================================================================
11
+ # Configuration
12
+ # ==============================================================================
13
+ CUDA_VISIBLE_DEVICES="0"
14
+ BATCH_SIZE=64
15
+
16
+ # 【AOP 配置】Token Pruning
17
+ export AOP_ENABLED=1
18
+ export AOP_APPLY=qry # 只剪 query;要剪 cand 可以设 both
19
+ export AOP_LAYER=12
20
+ export AOP_MODE=ratio
21
+ export AOP_SELECTION=attention # 使用注意力作为重要性分数
22
+ export AOP_ATTENTION_AGG=mean # head 聚合方式:mean/max/sum
23
+
24
+ # 开启图像 / 文本剪枝
25
+ export AOP_PRUNE_VISION=1
26
+ export AOP_PRUNE_TEXT=1
27
+
28
+ # 可选:合理的比例和保底
29
+ export AOP_KEEP_RATIO_VISION=1.0 # 先不剪图像也行,想剪再改成 0.5 等
30
+ export AOP_KEEP_RATIO_TEXT=0.5
31
+ export AOP_MIN_KEEP_VISION=8
32
+ export AOP_MIN_KEEP_TEXT=8
33
+ export AOP_PROTECT_TEXT_LAST=8
34
+ export AOP_PROTECT_SPECIAL=1
35
+
36
+ # 如果想看 debug,可以加:
37
+ # export AOP_DEBUG=1
38
+
39
+ # 【EE 配置】Early Exit via Classifier
40
+ export EE_ENABLED=1
41
+ export EE_LAYER=12
42
+ export EE_METHOD=classifier
43
+ export EE_DEBUG_MODE=1
44
+ # 阈值根据 V5 分析结果填入,或者保留默认值
45
+ export EE_THRESHOLD=0
46
+ export EE_TOPK=200
47
+ export EE_PROFILE=1
48
+ export EE_TOPK_EMB=10
49
+
50
+ # 【关键】分类器路径 (替换为您 V5 训练的 checkpoint 路径)
51
+ export EE_CLASSIFIER_PATH="/home/v-menggao/code/VLM2Vec/experiments/Qwen2_5vl_3B_add_distill_0.2_0.6_DISTILL_FLOOR_0_12_3_h100_3_Classifier_Layer12_V5_i_ret/checkpoint-800"
52
+
53
+ MODALITIES=("image_retrival")
54
+ DATA_BASEDIR="~/data/vlm2vec_eval/MMEB-V2"
55
+ # 【关键】修改输出目录
56
+ OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/Qwen2_5vl_3B_add_distill_0.2_0.6_DISTILL_FLOOR_0_12_3_h100_3_Classifier_Layer12_V5_i_ret/checkpoint-800_0.3"
57
+
58
+ MODEL_CHECKPOINT="/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2_5vl_3B_add_distill_0.2_0.6_11_23_h100/checkpoint-5000"
59
+ MODEL_NAME="qwen2_5_vl"
60
+
61
+ echo "================================================="
62
+ echo "🚀 Pipeline: AOP + Early Exit Classifier (V5)"
63
+ echo "🚀 Classifier: $EE_CLASSIFIER_PATH"
64
+ echo "================================================="
65
+
66
+ for MODALITY in "${MODALITIES[@]}"; do
67
+ DATA_CONFIG_PATH="/home/v-menggao/code/VLM2Vec/experiments/public/eval/$MODALITY.yaml"
68
+ OUTPUT_PATH="$OUTPUT_BASEDIR/$MODALITY/"
69
+
70
+ mkdir -p "$OUTPUT_PATH"
71
+
72
+ # 【关键】调用 V5 脚本
73
+ cmd="CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python eval_test_time_with_classifier_V5.py \
74
+ --pooling eos \
75
+ --normalize true \
76
+ --per_device_eval_batch_size $BATCH_SIZE \
77
+ --model_backbone \"$MODEL_NAME\" \
78
+ --model_name \"$MODEL_CHECKPOINT\" \
79
+ --dataset_config \"$DATA_CONFIG_PATH\" \
80
+ --encode_output_path \"$OUTPUT_PATH\" \
81
+ --data_basedir \"$DATA_BASEDIR\""
82
+
83
+ echo " - Executing command..."
84
+ eval "$cmd"
85
+ echo " - Done."
86
+ done
87
+
88
+ echo "✅ All jobs completed."
experiments/public/eval/eval_1gpu_early_exit_classifier_V5_new.sh ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ echo "==> Environment"
3
+ echo "conda location: $(which conda)"
4
+ echo "Python location: $(which python)"
5
+ echo "Python version: $(python --version)"
6
+ echo ""
7
+
8
+ cd VLM2Vec/ || exit
9
+
10
+ # ==============================================================================
11
+ # Configuration
12
+ # ==============================================================================
13
+ CUDA_VISIBLE_DEVICES="0"
14
+ BATCH_SIZE=64
15
+
16
+ # 【VPOOL 配置】Vision Token Pooling(如需启用,设为1)
17
+ export VPOOL_ENABLED=1
18
+ export VPOOL_APPLY=both # qry|cand|both
19
+ export VPOOL_LAYER=1 # 进入第 1 层前做 pooling
20
+ export VPOOL_KERNEL=2
21
+ export VPOOL_STRIDE=2
22
+ export VPOOL_METHOD=avg # avg|max|linear|conv
23
+ export VPOOL_PROTECT_CLS=1
24
+ export VPOOL_ONLY_VISION=1
25
+ export VPOOL_MONITOR=1
26
+
27
+ # 显存优化(可选)
28
+ export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
29
+
30
+ # 【AOP 配置】Token Pruning
31
+ export AOP_ENABLED=1
32
+ export AOP_APPLY=qry # 只剪 query;要剪 cand 可以设 both
33
+ export AOP_LAYER=12
34
+ export AOP_MODE=ratio
35
+ export AOP_SELECTION=attention # 使用注意力作为重要性分数
36
+ export AOP_ATTENTION_AGG=mean # head 聚合方式:mean/max/sum
37
+
38
+ # 开启图像 / 文本剪枝
39
+ export AOP_PRUNE_TEXT=1
40
+ export AOP_PRUNE_VISION=0
41
+ export AOP_KEEP_RATIO_TEXT=0.5
42
+ export AOP_KEEP_RATIO_VISION=0.5
43
+ export AOP_MIN_KEEP_TEXT=16
44
+ export AOP_MIN_KEEP_VISION=8
45
+ export AOP_PROTECT_TEXT_LAST=8
46
+ export AOP_PROTECT_SPECIAL=1
47
+ export AOP_RANDOM_SEED=42
48
+ export AOP_MONITOR=1
49
+
50
+ # 【EE 配置】Early Exit via Classifier
51
+ export EE_ENABLED=1
52
+ export EE_LAYER=12
53
+ export EE_METHOD=classifier
54
+ export EE_DEBUG_MODE=1
55
+ # 阈值根据 V5 分析结果填入,或者保留默认值
56
+ export EE_THRESHOLD=0
57
+ export EE_TOPK=200
58
+ export EE_PROFILE=1
59
+ export EE_TOPK_EMB=10
60
+
61
+ # 【关键】分类器路径 (替换为您 V5 训练的 checkpoint 路径)
62
+ export EE_CLASSIFIER_PATH="/home/v-menggao/code/VLM2Vec/experiments/Qwen2_5vl_3B_add_distill_0.2_0.6_DISTILL_FLOOR_0_12_3_h100_3_Classifier_Layer12_V5_i_ret/checkpoint-800"
63
+
64
+ MODALITIES=("image_retrival")
65
+ DATA_BASEDIR="~/data/vlm2vec_eval/MMEB-V2"
66
+ # 【关键】修改输出目录
67
+ OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/Qwen2_5vl_3B_add_distill_0.2_0.6_DISTILL_FLOOR_0_12_3_h100_3_Classifier_Layer12_V5_i_ret/checkpoint-800_0"
68
+
69
+ MODEL_CHECKPOINT="/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2_5vl_3B_add_distill_0.2_0.6_11_23_h100/checkpoint-5000"
70
+ MODEL_NAME="qwen2_5_vl"
71
+
72
+ echo "================================================="
73
+ echo "🚀 Pipeline: AOP + Early Exit Classifier (V5)"
74
+ echo "🚀 Classifier: $EE_CLASSIFIER_PATH"
75
+ echo "================================================="
76
+
77
+ for MODALITY in "${MODALITIES[@]}"; do
78
+ DATA_CONFIG_PATH="/home/v-menggao/code/VLM2Vec/experiments/public/eval/$MODALITY.yaml"
79
+ OUTPUT_PATH="$OUTPUT_BASEDIR/$MODALITY/"
80
+
81
+ mkdir -p "$OUTPUT_PATH"
82
+
83
+ # 【关键】调用 V5 脚本
84
+ cmd="CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python eval_test_time_with_classifier_V5_new.py \
85
+ --pooling eos \
86
+ --normalize true \
87
+ --per_device_eval_batch_size $BATCH_SIZE \
88
+ --model_backbone \"$MODEL_NAME\" \
89
+ --model_name \"$MODEL_CHECKPOINT\" \
90
+ --dataset_config \"$DATA_CONFIG_PATH\" \
91
+ --encode_output_path \"$OUTPUT_PATH\" \
92
+ --data_basedir \"$DATA_BASEDIR\""
93
+
94
+ echo " - Executing command..."
95
+ eval "$cmd"
96
+ echo " - Done."
97
+ done
98
+
99
+ echo "✅ All jobs completed."
experiments/public/eval/eval_1gpu_multilayer_AOP_attn_pooling.sh ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ echo "==> Environment"
3
+ echo "conda location: $(which conda)"
4
+ echo "Python location: $(which python)"
5
+ echo "Python version: $(python --version)"
6
+ echo ""
7
+
8
+ cd VLM2Vec/ || exit
9
+ # ==============================================================================
10
+ # Configuration
11
+ # ==============================================================================
12
+ CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
13
+ BATCH_SIZE=64
14
+ MODALITIES=("image_retrival")
15
+ DATA_BASEDIR="~/data/vlm2vec_eval/MMEB-V2"
16
+ # OUTPUT_BASEDIR="~/exps/vlm2vec_bsz128"
17
+ # OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/VLM2Vec_AOP/t_0.5_8_i_0.5_16_both_l12_bsz64" #_qry_cand_diff_ratio
18
+ OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/checkpoint-20" #/t_0.5_8_i_0.5_16_both_l12_bsz64 #_qry_cand_diff_ratio
19
+
20
+ # ==== AOP + Vision Token Pooling(启用)====
21
+ export VPOOL_ENABLED=1
22
+ export VPOOL_APPLY=both
23
+ export VPOOL_LAYER=1
24
+ export VPOOL_KERNEL=2
25
+ export VPOOL_STRIDE=2
26
+
27
+ # === 改这里:启用 attention pooling ===
28
+ export VPOOL_METHOD=attn # attn | avg | max | ...
29
+ export VPOOL_ATTN_TAU=1.0 # NEW: attention pooling temperature
30
+
31
+ export VPOOL_PROTECT_CLS=1
32
+ export VPOOL_ONLY_VISION=1
33
+ export VPOOL_MONITOR=1
34
+
35
+ export AOP_ENABLED=1
36
+ export AOP_APPLY=both # qry|cand|both
37
+ export AOP_LAYER=13
38
+ export AOP_MODE=ratio
39
+ export AOP_SELECTION=attention
40
+ export AOP_ATTENTION_AGG=mean
41
+
42
+ export AOP_PRUNE_TEXT=1
43
+ export AOP_PRUNE_VISION=0
44
+ export AOP_KEEP_RATIO_TEXT=0.5
45
+ export AOP_KEEP_RATIO_VISION=0.5
46
+ export AOP_MIN_KEEP_TEXT=16
47
+ export AOP_MIN_KEEP_VISION=8
48
+ export AOP_PROTECT_TEXT_LAST=8
49
+ export AOP_PROTECT_SPECIAL=1
50
+ export AOP_RANDOM_SEED=42
51
+ export AOP_MONITOR=1
52
+
53
+ export EE_SKIP_LM_HEAD=1
54
+
55
+ export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
56
+
57
+ # ==> Define models and their base output paths here
58
+ # Format: "MODEL_NAME;BASE_OUTPUT_PATH"
59
+ declare -a MODEL_SPECS
60
+ # MODEL_SPECS+=( "VLM2Vec/VLM2Vec-V2.0;qwen2_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B" )
61
+ # /home/v-menggao/code/VLM2Vec/~/experiments/Qwen2vl_2B.image/checkpoint-1000
62
+ MODEL_SPECS+=( "/home/v-menggao/code/VLM2Vec/exps/checkpoint-20;qwen2_5_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B" )
63
+ # MODEL_SPECS+=( "Alibaba-NLP/gme-Qwen2-VL-2B-Instruct;gme;$OUTPUT_BASEDIR/gme-Qwen2-VL-2B-Instruct" )
64
+ # MODEL_SPECS+=( "Alibaba-NLP/gme-Qwen2-VL-7B-Instruct;gme;$OUTPUT_BASEDIR/gme-Qwen2-VL-7B-Instruct" )
65
+
66
+ # ==============================================================================
67
+ # Main Execution Loop
68
+ # ==============================================================================
69
+ # Loop through each model specification
70
+ for spec in "${MODEL_SPECS[@]}"; do
71
+ # Parse the model name and base output path from the spec string
72
+ IFS=';' read -r MODEL_NAME MODEL_BACKBONE BASE_OUTPUT_PATH <<< "$spec"
73
+
74
+ echo "================================================="
75
+ echo "🚀 Processing Model: $MODEL_NAME"
76
+ echo "================================================="
77
+
78
+ # Loop through each modality for the current model
79
+ for MODALITY in "${MODALITIES[@]}"; do
80
+ DATA_CONFIG_PATH="/home/v-menggao/code/VLM2Vec/experiments/public/eval/$MODALITY.yaml"
81
+ OUTPUT_PATH="$BASE_OUTPUT_PATH/$MODALITY/"
82
+
83
+ echo "-------------------------------------------------"
84
+ echo " - Modality: $MODALITY"
85
+ echo " - Output Path: $OUTPUT_PATH"
86
+
87
+ # Ensure the output directory exists
88
+ mkdir -p "$OUTPUT_PATH"
89
+
90
+ cmd="LM_LAYERS='last' CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python eval_test_time_multilayer_AOP_attn_pooling.py \
91
+ --pooling eos \
92
+ --normalize true \
93
+ --per_device_eval_batch_size $BATCH_SIZE \
94
+ --model_backbone \"$MODEL_BACKBONE\" \
95
+ --model_name \"$MODEL_NAME\" \
96
+ --dataset_config \"$DATA_CONFIG_PATH\" \
97
+ --encode_output_path \"$OUTPUT_PATH\" \
98
+ --data_basedir \"$DATA_BASEDIR\""
99
+
100
+ echo " - Executing command..."
101
+ # echo "$cmd" # Uncomment for debugging the exact command
102
+ eval "$cmd"
103
+ echo " - Done."
104
+ echo "-------------------------------------------------"
105
+ done
106
+ done
107
+
108
+ echo "✅ All jobs completed."
experiments/public/eval/eval_1gpu_multilayer_AOP_new.sh ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ echo "==> Environment"
3
+ echo "conda location: $(which conda)"
4
+ echo "Python location: $(which python)"
5
+ echo "Python version: $(python --version)"
6
+ echo ""
7
+
8
+ cd VLM2Vec/ || exit
9
+ # ==============================================================================
10
+ # Configuration
11
+ # ==============================================================================
12
+ CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
13
+ BATCH_SIZE=64
14
+ MODALITIES=("image_retrival")
15
+ DATA_BASEDIR="~/data/vlm2vec_eval/MMEB-V2"
16
+ # OUTPUT_BASEDIR="~/exps/vlm2vec_bsz128"
17
+ # OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/VLM2Vec_AOP/t_0.5_8_i_0.5_16_both_l12_bsz64" #_qry_cand_diff_ratio
18
+ OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/Qwen2_5vl_7B_multilayer_distill_aop_10_pooling_i_ret_12_30_h100_2_node/checkpoint-5000/aop_t_0.5_8_both_l6_bsz64" #_qry_cand_diff_ratio
19
+ # OUTPUT_BASEDIR="/home/v-menggao/code/vlmvector_qwen25vl_train_multi_layer_distill_AOP_pooling_layer8_ablation_1230/result" #_qry_cand_diff_ratio
20
+
21
+ # ==== AOP + Vision Token Pooling(启用)====
22
+ export VPOOL_ENABLED=1
23
+ export VPOOL_APPLY=both # qry|cand|both
24
+ export VPOOL_LAYER=1 # 进入第 1 层前做 pooling
25
+ export VPOOL_KERNEL=2 # 2x2 pooling
26
+ export VPOOL_STRIDE=2
27
+ export VPOOL_METHOD=avg # avg|max|linear|conv
28
+ export VPOOL_PROTECT_CLS=1
29
+ export VPOOL_ONLY_VISION=1
30
+ export VPOOL_MONITOR=0 # 打印 pooling 前后长度
31
+
32
+ export AOP_ENABLED=1
33
+ export AOP_APPLY=both # qry|cand|both
34
+ export AOP_LAYER=10
35
+ export AOP_MODE=ratio
36
+ export AOP_SELECTION=aop #attention
37
+ export AOP_ATTENTION_AGG=mean
38
+
39
+ export AOP_PRUNE_TEXT=1
40
+ export AOP_PRUNE_VISION=0
41
+ export AOP_KEEP_RATIO_TEXT=0.5
42
+ export AOP_KEEP_RATIO_VISION=0.5
43
+ export AOP_MIN_KEEP_TEXT=8
44
+ export AOP_MIN_KEEP_VISION=8
45
+ export AOP_PROTECT_TEXT_LAST=8
46
+ export AOP_PROTECT_SPECIAL=1
47
+ export AOP_RANDOM_SEED=42
48
+ export AOP_MONITOR=0
49
+
50
+ export EE_SKIP_LM_HEAD=1
51
+
52
+ export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
53
+
54
+ # ==> Define models and their base output paths here
55
+ # Format: "MODEL_NAME;BASE_OUTPUT_PATH"
56
+ declare -a MODEL_SPECS
57
+ # MODEL_SPECS+=( "VLM2Vec/VLM2Vec-V2.0;qwen2_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B" )
58
+ # /home/v-menggao/code/VLM2Vec/~/experiments/Qwen2vl_2B.image/checkpoint-1000
59
+ MODEL_SPECS+=( "/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2_5vl_7B_multilayer_distill_aop_10_pooling_i_ret_12_30_h100_2_node/checkpoint-5000;qwen2_5_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B" )
60
+ # MODEL_SPECS+=( "/home/v-menggao/code/vlmvector_qwen25vl_train_multi_layer_distill_AOP_pooling_layer8_ablation_1230/checkpoint-900;qwen2_5_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B" )
61
+ # MODEL_SPECS+=( "Alibaba-NLP/gme-Qwen2-VL-2B-Instruct;gme;$OUTPUT_BASEDIR/gme-Qwen2-VL-2B-Instruct" )
62
+ # MODEL_SPECS+=( "Alibaba-NLP/gme-Qwen2-VL-7B-Instruct;gme;$OUTPUT_BASEDIR/gme-Qwen2-VL-7B-Instruct" )
63
+
64
+ # ==============================================================================
65
+ # Main Execution Loop
66
+ # ==============================================================================
67
+ # Loop through each model specification
68
+ for spec in "${MODEL_SPECS[@]}"; do
69
+ # Parse the model name and base output path from the spec string
70
+ IFS=';' read -r MODEL_NAME MODEL_BACKBONE BASE_OUTPUT_PATH <<< "$spec"
71
+
72
+ echo "================================================="
73
+ echo "🚀 Processing Model: $MODEL_NAME"
74
+ echo "================================================="
75
+
76
+ # Loop through each modality for the current model
77
+ for MODALITY in "${MODALITIES[@]}"; do
78
+ DATA_CONFIG_PATH="/home/v-menggao/code/VLM2Vec/experiments/public/eval/$MODALITY.yaml"
79
+ OUTPUT_PATH="$BASE_OUTPUT_PATH/$MODALITY/"
80
+
81
+ echo "-------------------------------------------------"
82
+ echo " - Modality: $MODALITY"
83
+ echo " - Output Path: $OUTPUT_PATH"
84
+
85
+ # Ensure the output directory exists
86
+ mkdir -p "$OUTPUT_PATH"
87
+
88
+ cmd="LM_LAYERS='12,last' CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python eval_test_time_multilayer_AOP_new.py \
89
+ --pooling eos \
90
+ --normalize true \
91
+ --per_device_eval_batch_size $BATCH_SIZE \
92
+ --model_backbone \"$MODEL_BACKBONE\" \
93
+ --model_name \"$MODEL_NAME\" \
94
+ --dataset_config \"$DATA_CONFIG_PATH\" \
95
+ --encode_output_path \"$OUTPUT_PATH\" \
96
+ --data_basedir \"$DATA_BASEDIR\""
97
+
98
+ echo " - Executing command..."
99
+ # echo "$cmd" # Uncomment for debugging the exact command
100
+ eval "$cmd"
101
+ echo " - Done."
102
+ echo "-------------------------------------------------"
103
+ done
104
+ done
105
+
106
+ echo "✅ All jobs completed."
experiments/public/eval/eval_1gpu_output_attn.sh ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ echo "==> Environment"
4
+ echo "conda location: $(which conda)"
5
+ echo "Python location: $(which python)"
6
+ echo "Python version: $(python --version)"
7
+ echo ""
8
+
9
+ cd VLM2Vec/ || exit
10
+
11
+ # ==============================================================================
12
+ # Configuration
13
+ # ==============================================================================
14
+ CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
15
+ BATCH_SIZE=64
16
+ MODALITIES=("image_retrival")
17
+ DATA_BASEDIR="~/data/vlm2vec_eval/MMEB-V2"
18
+ # OUTPUT_BASEDIR="~/exps/vlm2vec_bsz128"
19
+ # OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/Qwen2vl_2B.add_mlp_try1/checkpoint-500" #_qry_cand_diff_ratio
20
+ OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/VLM2Vec-output-attn/VLM2Vec-V2.0" #_qry_cand_diff_ratio
21
+ # OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/try_add_mlp_try/stage1" #_qry_cand_diff_ratio
22
+
23
+ # ==> Define models and their base output paths here
24
+ # Format: "MODEL_NAME;BASE_OUTPUT_PATH"
25
+ declare -a MODEL_SPECS
26
+ # MODEL_SPECS+=( "VLM2Vec/VLM2Vec-V2.0;qwen2_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B" )
27
+ # /home/v-menggao/code/VLM2Vec/~/experiments/Qwen2vl_2B.image/checkpoint-1000
28
+ # MODEL_SPECS+=( "code-kunkun/LamRA-Ret-Qwen2.5VL-7b;lamra_qwen25;$OUTPUT_BASEDIR/LamRA-Ret-Qwen2.5VL-7b" )
29
+ MODEL_SPECS+=( "VLM2Vec/VLM2Vec-V2.0;qwen2_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B" )
30
+ # MODEL_SPECS+=( "Qwen/Qwen2.5-VL-3B-Instruct;qwen2_5_vl;$OUTPUT_BASEDIR/VLM2Vec-Qwen2.5VL-3B" )
31
+ # MODEL_SPECS+=( "/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2vl_2B.add_mlp_try1/checkpoint-500;qwen2_vl;$OUTPUT_BASEDIR/VLM2Vec-Qwen2VL-2B" )
32
+ # MODEL_SPECS+=( "/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2vl_2B.add_mlp_8000_16000/checkpoint-7200;qwen2_vl;$OUTPUT_BASEDIR/VLM2Vec-Qwen2VL-2B" )
33
+ # MODEL_SPECS+=( "/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2vl_2B.add_mlp/stage1;qwen2_vl;$OUTPUT_BASEDIR/VLM2Vec-V2.0-Qwen2VL-2B" )
34
+ # MODEL_SPECS+=( "Alibaba-NLP/gme-Qwen2-VL-2B-Instruct;gme;$OUTPUT_BASEDIR/gme-Qwen2-VL-2B-Instruct" )
35
+ # MODEL_SPECS+=( "Alibaba-NLP/gme-Qwen2-VL-7B-Instruct;gme;$OUTPUT_BASEDIR/gme-Qwen2-VL-7B-Instruct" )
36
+ # MODEL_SPECS+=( "code-kunkun/LamRA-Ret;lamra;$OUTPUT_BASEDIR/LamRA-Ret" )lamra_qwen25
37
+ # MODEL_SPECS+=( "code-kunkun/LamRA-Ret-Qwen2.5VL-7b;lamra_qwen25;$OUTPUT_BASEDIR/LamRA-Ret-Qwen2.5VL-7b" )
38
+ # MODEL_SPECS+=( "vidore/colpali-v1.3;colpali;$OUTPUT_BASEDIR/colpali-v1.3" )
39
+
40
+
41
+ # ==============================================================================
42
+ # Main Execution Loop
43
+ # ==============================================================================
44
+ # Loop through each model specification
45
+ for spec in "${MODEL_SPECS[@]}"; do
46
+ # Parse the model name and base output path from the spec string
47
+ IFS=';' read -r MODEL_NAME MODEL_BACKBONE BASE_OUTPUT_PATH <<< "$spec"
48
+
49
+ echo "================================================="
50
+ echo "🚀 Processing Model: $MODEL_NAME"
51
+ echo "================================================="
52
+
53
+ # Loop through each modality for the current model
54
+ for MODALITY in "${MODALITIES[@]}"; do
55
+ DATA_CONFIG_PATH="/home/v-menggao/code/VLM2Vec/experiments/public/eval/$MODALITY.yaml"
56
+ OUTPUT_PATH="$BASE_OUTPUT_PATH/$MODALITY/"
57
+
58
+ echo "-------------------------------------------------"
59
+ echo " - Modality: $MODALITY"
60
+ echo " - Output Path: $OUTPUT_PATH"
61
+
62
+ # Ensure the output directory exists
63
+ mkdir -p "$OUTPUT_PATH"
64
+
65
+ cmd="CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python eval_test_time.py \
66
+ --pooling eos \
67
+ --normalize true \
68
+ --per_device_eval_batch_size $BATCH_SIZE \
69
+ --model_backbone \"$MODEL_BACKBONE\" \
70
+ --model_name \"$MODEL_NAME\" \
71
+ --dataset_config \"$DATA_CONFIG_PATH\" \
72
+ --encode_output_path \"$OUTPUT_PATH\" \
73
+ --data_basedir \"$DATA_BASEDIR\""
74
+
75
+ echo " - Executing command..."
76
+ # echo "$cmd" # Uncomment for debugging the exact command
77
+ eval "$cmd"
78
+ echo " - Done."
79
+ echo "-------------------------------------------------"
80
+ done
81
+ done
82
+
83
+ echo "✅ All jobs completed."
experiments/public/eval/eval_vlm2vecv1_8gpu.sh ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ echo "==> Environment"
4
+ echo "conda location: $(which conda)"
5
+ echo "Python location: $(which python)"
6
+ echo "Python version: $(python --version)"
7
+ echo ""
8
+
9
+ cd projects/VLM2Vec/ || exit
10
+
11
+ # ==============================================================================
12
+ # Configuration
13
+ # ==============================================================================
14
+ CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
15
+ BATCH_SIZE=32
16
+ MODALITIES=("image" "video" "visdoc")
17
+ DATA_BASEDIR="~/data/vlm2vec_eval"
18
+ OUTPUT_BASEDIR="~/exps/vlm2vec/"
19
+
20
+ # ==> Define models and their base output paths here
21
+ # Format: "MODEL_NAME;BASE_OUTPUT_PATH"
22
+ declare -a MODEL_SPECS
23
+ MODEL_SPECS+=( "TIGER-Lab/VLM2Vec-Qwen2VL-2B;qwen2_vl;$OUTPUT_BASEDIR/VLM2Vec-V1-Qwen2VL-2B" )
24
+ MODEL_SPECS+=( "TIGER-Lab/VLM2Vec-Qwen2VL-7B;qwen2_vl;$OUTPUT_BASEDIR/VLM2Vec-V1-Qwen2VL-7B" )
25
+
26
+
27
+ # ==============================================================================
28
+ # Main Execution Loop
29
+ # ==============================================================================
30
+ # Loop through each model specification
31
+ for spec in "${MODEL_SPECS[@]}"; do
32
+ # Parse the model name and base output path from the spec string
33
+ IFS=';' read -r MODEL_NAME MODEL_BACKBONE BASE_OUTPUT_PATH <<< "$spec"
34
+
35
+ echo "================================================="
36
+ echo "🚀 Processing Model: $MODEL_NAME"
37
+ echo "================================================="
38
+
39
+ # Loop through each modality for the current model
40
+ for MODALITY in "${MODALITIES[@]}"; do
41
+ DATA_CONFIG_PATH="experiments/release/eval/$MODALITY.yaml"
42
+ OUTPUT_PATH="$BASE_OUTPUT_PATH/$MODALITY/"
43
+
44
+ echo "-------------------------------------------------"
45
+ echo " - Modality: $MODALITY"
46
+ echo " - Output Path: $OUTPUT_PATH"
47
+
48
+ # Ensure the output directory exists
49
+ mkdir -p "$OUTPUT_PATH"
50
+
51
+ cmd="CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES torchrun --nproc_per_node=8 --master_port=2233 --max_restarts=0 eval.py \
52
+ --pooling eos \
53
+ --normalize true \
54
+ --per_device_eval_batch_size $BATCH_SIZE \
55
+ --model_backbone \"$MODEL_BACKBONE\" \
56
+ --model_name \"$MODEL_NAME\" \
57
+ --resize_use_processor false \
58
+ --image_resolution high \
59
+ --dataset_config \"$DATA_CONFIG_PATH\" \
60
+ --encode_output_path \"$OUTPUT_PATH\" \
61
+ --data_basedir \"$DATA_BASEDIR\""
62
+
63
+ echo " - Executing command..."
64
+ # echo "$cmd" # Uncomment for debugging the exact command
65
+ eval "$cmd"
66
+ echo " - Done."
67
+ echo "-------------------------------------------------"
68
+ done
69
+ done
70
+
71
+ echo "✅ All jobs completed."
experiments/public/eval/image_retrival.yaml ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # RET i -> i
2
+ CIRR:
3
+ dataset_parser: image_i2i_vg
4
+ dataset_name: CIRR
5
+ dataset_split: test
6
+ image_root: image-tasks/MMEB
7
+ eval_type: local
8
+ NIGHTS:
9
+ dataset_parser: image_i2i_vg
10
+ dataset_name: NIGHTS
11
+ dataset_split: test
12
+ image_root: image-tasks/MMEB
13
+ eval_type: local
14
+ OVEN:
15
+ dataset_parser: image_i2i_vg
16
+ dataset_name: OVEN
17
+ dataset_split: test
18
+ image_root: image-tasks/MMEB
19
+ eval_type: local
20
+ FashionIQ:
21
+ dataset_parser: image_i2i_vg
22
+ dataset_name: FashionIQ
23
+ dataset_split: test
24
+ image_root: image-tasks/MMEB
25
+ eval_type: local
26
+
27
+ # RET i -> t
28
+ MSCOCO_i2t:
29
+ dataset_parser: image_i2t
30
+ dataset_name: MSCOCO_i2t
31
+ dataset_split: test
32
+ image_root: image-tasks/MMEB
33
+ eval_type: local
34
+ VisualNews_i2t:
35
+ dataset_parser: image_i2t
36
+ dataset_name: VisualNews_i2t
37
+ dataset_split: test
38
+ image_root: image-tasks/MMEB
39
+ eval_type: local
40
+ # RET t -> i
41
+ VisDial:
42
+ dataset_parser: image_t2i
43
+ dataset_name: VisDial
44
+ dataset_split: test
45
+ image_root: image-tasks/MMEB
46
+ eval_type: local
47
+ MSCOCO_t2i:
48
+ dataset_parser: image_t2i
49
+ dataset_name: MSCOCO_t2i
50
+ dataset_split: test
51
+ image_root: image-tasks/MMEB
52
+ eval_type: local
53
+ VisualNews_t2i:
54
+ dataset_parser: image_t2i
55
+ dataset_name: VisualNews_t2i
56
+ dataset_split: test
57
+ image_root: image-tasks/MMEB
58
+ eval_type: local
59
+ WebQA:
60
+ dataset_parser: image_t2i
61
+ dataset_name: WebQA
62
+ dataset_split: test
63
+ image_root: image-tasks/MMEB
64
+ eval_type: local
65
+ EDIS:
66
+ dataset_parser: image_t2i
67
+ dataset_name: EDIS
68
+ dataset_split: test
69
+ image_root: image-tasks/MMEB
70
+ eval_type: local
71
+ Wiki-SS-NQ:
72
+ dataset_parser: image_t2i
73
+ dataset_name: Wiki-SS-NQ
74
+ dataset_split: test
75
+ image_root: image-tasks/MMEB
76
+ eval_type: local
77
+ # # RET i -> i
78
+ # CIRR:
79
+ # dataset_parser: image_i2i_vg
80
+ # dataset_name: CIRR
81
+ # dataset_split: test
82
+ # image_root: image-tasks/MMEB
83
+ # eval_type: local
84
+ # NIGHTS:
85
+ # dataset_parser: image_i2i_vg
86
+ # dataset_name: NIGHTS
87
+ # dataset_split: test
88
+ # image_root: image-tasks/MMEB
89
+ # eval_type: local
90
+ # OVEN:
91
+ # dataset_parser: image_i2i_vg
92
+ # dataset_name: OVEN
93
+ # dataset_split: test
94
+ # image_root: image-tasks/MMEB
95
+ # eval_type: local
96
+ # FashionIQ:
97
+ # dataset_parser: image_i2i_vg
98
+ # dataset_name: FashionIQ
99
+ # dataset_split: test
100
+ # image_root: image-tasks/MMEB
101
+ # eval_type: local
experiments/public/eval/mieb_any2any_retrieval_lite.yaml ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CIRRIT2IRetrieval:
2
+ # dataset_parser: mieb_any2any_retrieval
3
+ # dataset_name: CIRRIT2IRetrieval
4
+ # dataset_split: test
5
+ # eval_type: global
6
+ # CUB200I2IRetrieval:
7
+ # dataset_parser: mieb_any2any_retrieval
8
+ # dataset_name: CUB200I2IRetrieval
9
+ # dataset_split: test
10
+ # eval_type: global
11
+ # Fashion200kI2TRetrieval:
12
+ # dataset_parser: mieb_any2any_retrieval
13
+ # dataset_name: Fashion200kI2TRetrieval
14
+ # dataset_split: test
15
+ # eval_type: global
16
+ # HatefulMemesI2TRetrieval:
17
+ # dataset_parser: mieb_any2any_retrieval
18
+ # dataset_name: HatefulMemesI2TRetrieval
19
+ # dataset_split: test
20
+ # eval_type: global
21
+ InfoSeekIT2TRetrieval:
22
+ dataset_parser: mieb_any2any_retrieval
23
+ dataset_name: InfoSeekIT2TRetrieval
24
+ dataset_split: test
25
+ eval_type: global
26
+ NIGHTSI2IRetrieval:
27
+ dataset_parser: mieb_any2any_retrieval
28
+ dataset_name: NIGHTSI2IRetrieval
29
+ dataset_split: test
30
+ eval_type: global
31
+ OVENIT2TRetrieval:
32
+ dataset_parser: mieb_any2any_retrieval
33
+ dataset_name: OVENIT2TRetrieval
34
+ dataset_split: test
35
+ eval_type: global
36
+ RP2kI2IRetrieval:
37
+ dataset_parser: mieb_any2any_retrieval
38
+ dataset_name: RP2kI2IRetrieval
39
+ dataset_split: test
40
+ eval_type: global
41
+ VisualNewsI2TRetrieval:
42
+ dataset_parser: mieb_any2any_retrieval
43
+ dataset_name: VisualNewsI2TRetrieval
44
+ dataset_split: test
45
+ eval_type: global
46
+ VQA2IT2TRetrieval:
47
+ dataset_parser: mieb_any2any_retrieval
48
+ dataset_name: VQA2IT2TRetrieval
49
+ dataset_split: test
50
+ eval_type: global
51
+ WebQAT2ITRetrieval:
52
+ dataset_parser: mieb_any2any_retrieval
53
+ dataset_name: WebQAT2ITRetrieval
54
+ dataset_split: test
55
+ eval_type: global
experiments/public/eval/mieb_any2any_retrieval_lite2.yaml ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CIRRIT2IRetrieval:
2
+ # dataset_parser: mieb_any2any_retrieval
3
+ # dataset_name: CIRRIT2IRetrieval
4
+ # dataset_split: test
5
+ # eval_type: global
6
+ # CUB200I2IRetrieval:
7
+ # dataset_parser: mieb_any2any_retrieval
8
+ # dataset_name: CUB200I2IRetrieval
9
+ # dataset_split: test
10
+ # eval_type: global
11
+ # Fashion200kI2TRetrieval:
12
+ # dataset_parser: mieb_any2any_retrieval
13
+ # dataset_name: Fashion200kI2TRetrieval
14
+ # dataset_split: test
15
+ # eval_type: global
16
+ # HatefulMemesI2TRetrieval:
17
+ # dataset_parser: mieb_any2any_retrieval
18
+ # dataset_name: HatefulMemesI2TRetrieval
19
+ # dataset_split: test
20
+ # eval_type: global
21
+ # InfoSeekIT2TRetrieval:
22
+ # dataset_parser: mieb_any2any_retrieval
23
+ # dataset_name: InfoSeekIT2TRetrieval
24
+ # dataset_split: test
25
+ # eval_type: global
26
+ NIGHTSI2IRetrieval:
27
+ dataset_parser: mieb_any2any_retrieval
28
+ dataset_name: NIGHTSI2IRetrieval
29
+ dataset_split: test
30
+ eval_type: global
31
+ OVENIT2TRetrieval:
32
+ dataset_parser: mieb_any2any_retrieval
33
+ dataset_name: OVENIT2TRetrieval
34
+ dataset_split: test
35
+ eval_type: global
36
+ RP2kI2IRetrieval:
37
+ dataset_parser: mieb_any2any_retrieval
38
+ dataset_name: RP2kI2IRetrieval
39
+ dataset_split: test
40
+ eval_type: global
41
+ VisualNewsI2TRetrieval:
42
+ dataset_parser: mieb_any2any_retrieval
43
+ dataset_name: VisualNewsI2TRetrieval
44
+ dataset_split: test
45
+ eval_type: global
46
+ VQA2IT2TRetrieval:
47
+ dataset_parser: mieb_any2any_retrieval
48
+ dataset_name: VQA2IT2TRetrieval
49
+ dataset_split: test
50
+ eval_type: global
51
+ WebQAT2ITRetrieval:
52
+ dataset_parser: mieb_any2any_retrieval
53
+ dataset_name: WebQAT2ITRetrieval
54
+ dataset_split: test
55
+ eval_type: global
experiments/public/eval/run_batch_benchmark.sh ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # ======================= 配置区域 =======================
4
+ CUDA_VISIBLE_DEVICES="0"
5
+
6
+ # 模型路径
7
+ MODEL_PATH="/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2_5vl_3B_add_distill_0.2_0.6_11_23_h100/checkpoint-5000"
8
+ # 分类器路径
9
+ CLASSIFIER_PATH="/home/v-menggao/code/VLM2Vec/experiments/Qwen2_5vl_3B_add_distill_0.2_0.6_11_23_Classifier_Layer12_ImgText_V5_i_ret/checkpoint-500"
10
+
11
+ # 结果保存路径
12
+ RESULT_CSV="final_speedup_results.csv"
13
+ RESULT_LOG="final_benchmark.log"
14
+
15
+ # 要跑的数据集列表
16
+ DATASETS=(
17
+ "CIRR"
18
+ "EDIS"
19
+ "FashionIQ"
20
+ "NIGHTS"
21
+ "OVEN"
22
+ "VisDial"
23
+ "MSCOCO_i2t"
24
+ "MSCOCO_t2i"
25
+ "VisualNews_i2t"
26
+ "VisualNews_t2i"
27
+ "WebQA"
28
+ "Wiki-SS-NQ"
29
+ )
30
+
31
+ # Eval Config 模板目录
32
+ CONFIG_DIR="/home/v-menggao/code/VLM2Vec/experiments/public/eval"
33
+ # ========================================================
34
+
35
+ # 初始化结果文件
36
+ echo "Dataset,Baseline_Latency(ms),Ours_Latency(ms),Speedup_Ratio,Exit_Rate" > $RESULT_CSV
37
+ echo "================= Benchmark Started at $(date) =================" > $RESULT_LOG
38
+
39
+ echo "🚀 Starting Batch Benchmark on ${#DATASETS[@]} datasets..."
40
+ echo "📄 Results will be saved to: $RESULT_CSV"
41
+
42
+ # 循环遍历每一个数据集
43
+ for DATASET in "${DATASETS[@]}"; do
44
+ CONFIG_PATH="$CONFIG_DIR/$DATASET.yaml"
45
+
46
+ if [ ! -f "$CONFIG_PATH" ]; then
47
+ echo "⚠️ Config not found for $DATASET, skipping..." | tee -a $RESULT_LOG
48
+ continue
49
+ fi
50
+
51
+ echo ""
52
+ echo "----------------------------------------------------------------"
53
+ echo "📊 Benchmarking Dataset: $DATASET"
54
+ echo "----------------------------------------------------------------"
55
+
56
+ # 1. 跑 Baseline (Full Forward)
57
+ echo " 🐢 Running Baseline..."
58
+ export EE_ENABLED=0
59
+ export AOP_ENABLED=0
60
+
61
+ LOG_BASE=$(CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python eval_benchmark_V5.py \
62
+ --model_name "$MODEL_PATH" \
63
+ --dataset_config "$CONFIG_PATH" \
64
+ --per_device_eval_batch_size 64 \
65
+ --dataloader_num_workers 4 \
66
+ 2>>$RESULT_LOG)
67
+
68
+ # 提取 Baseline Latency (使用 grep 和 sed)
69
+ LAT_BASE=$(echo "$LOG_BASE" | grep "\[BENCHMARK_RESULT\]" | sed -n 's/.*Latency=\([0-9.]*\)ms.*/\1/p')
70
+
71
+ # 2. 跑 Ours (Early Exit)
72
+ echo " 🚀 Running Ours (Ratio=0.5)..."
73
+ export EE_ENABLED=1
74
+ export AOP_ENABLED=1
75
+ export EE_LAYER=12
76
+ export EE_CLASSIFIER_PATH="$CLASSIFIER_PATH"
77
+ export EE_THRESHOLD=0.3 # 强制固定 Ratio 以测量理论加速能力
78
+
79
+ LOG_OURS=$(CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python eval_benchmark_V5.py \
80
+ --model_name "$MODEL_PATH" \
81
+ --dataset_config "$CONFIG_PATH" \
82
+ --per_device_eval_batch_size 64 \
83
+ --dataloader_num_workers 4 \
84
+ 2>>$RESULT_LOG)
85
+
86
+ # 提取 Ours Latency
87
+ LAT_OURS=$(echo "$LOG_OURS" | grep "\[BENCHMARK_RESULT\]" | sed -n 's/.*Latency=\([0-9.]*\)ms.*/\1/p')
88
+
89
+ # 3. 计算与记录
90
+ if [ -z "$LAT_BASE" ] || [ -z "$LAT_OURS" ]; then
91
+ echo " ❌ Error: Failed to parse latency for $DATASET." | tee -a $RESULT_LOG
92
+ echo "$DATASET,ERROR,ERROR,0,0" >> $RESULT_CSV
93
+ else
94
+ # 使用 python 做浮点数除法计算加速比 (比 shell bc 更稳健)
95
+ SPEEDUP=$(python -c "print(f'{float($LAT_BASE)/float($LAT_OURS):.2f}')")
96
+
97
+ echo " ✅ Result: Base=${LAT_BASE}ms | Ours=${LAT_OURS}ms | Speedup=${SPEEDUP}x"
98
+
99
+ # 写入 CSV
100
+ echo "$DATASET,$LAT_BASE,$LAT_OURS,$SPEEDUP,0.5" >> $RESULT_CSV
101
+ fi
102
+ done
103
+
104
+ echo ""
105
+ echo "================================================================"
106
+ echo "🎉 Batch Benchmark Completed!"
107
+ echo "📄 Final Data: $RESULT_CSV"
108
+ echo "================================================================"
109
+
110
+ # 打印最终表格到终端以便查看
111
+ echo ""
112
+ column -s, -t $RESULT_CSV
experiments/public/eval/scan_threshold.sh ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # =============================================================================
3
+ # Threshold Scanning Script for Early Exit Classifier
4
+ # 自动扫描不同threshold下的性能表现
5
+ # =============================================================================
6
+
7
+ echo "==> Environment"
8
+ echo "conda location: $(which conda)"
9
+ echo "Python location: $(which python)"
10
+ echo "Python version: $(python --version)"
11
+ echo ""
12
+
13
+ cd VLM2Vec/ || exit
14
+
15
+ # ==============================================================================
16
+ # Configuration
17
+ # ==============================================================================
18
+ CUDA_VISIBLE_DEVICES="0"
19
+ BATCH_SIZE=32
20
+
21
+ # 【AOP 配置】Token Pruning - 关闭以匹配baseline
22
+ export AOP_ENABLED=0
23
+
24
+ # 【EE 配置】Early Exit via Classifier
25
+ export EE_ENABLED=1
26
+ export EE_LAYER=12
27
+ export EE_METHOD=classifier
28
+ export EE_DEBUG_MODE=0 # 关闭DEBUG模式加速
29
+ export EE_TOPK=200
30
+
31
+ # 分类器路径
32
+ export EE_CLASSIFIER_PATH="/home/v-menggao/code/VLM2Vec/~/experiments/checkpoint-600"
33
+
34
+ # 数据集配置
35
+ MODALITIES=("image_retrival")
36
+ DATA_BASEDIR="~/data/vlm2vec_eval/MMEB-V2"
37
+
38
+ # 模型路径
39
+ MODEL_CHECKPOINT="/home/v-menggao/code/VLM2Vec/~/experiments/Qwen2_5vl_3B_multi_layer_12_-1_0.1_0.9/checkpoint-5000"
40
+ MODEL_NAME="qwen2_5_vl"
41
+
42
+ # 【关键】Threshold扫描范围
43
+ # 根据你的实验结果,密集扫描0.95-1.0区间
44
+ THRESHOLDS=(0.95 0.96 0.97 0.975 0.98 0.985 0.99 0.995 1.0)
45
+
46
+ # 输出目录基础路径
47
+ OUTPUT_BASEDIR="/home/v-menggao/code/VLM2Vec/~/exps/Qwen2_5vl_3B_Classifier_ThresholdScan"
48
+
49
+ echo "================================================="
50
+ echo "🚀 Threshold Scanning for Early Exit Classifier"
51
+ echo "🚀 Classifier: $EE_CLASSIFIER_PATH"
52
+ echo "🚀 Threshold Range: ${THRESHOLDS[@]}"
53
+ echo "🚀 Total Runs: ${#THRESHOLDS[@]}"
54
+ echo "================================================="
55
+ echo ""
56
+
57
+ # 创建结果汇总文件
58
+ SUMMARY_FILE="${OUTPUT_BASEDIR}/threshold_scan_summary.txt"
59
+ mkdir -p "$OUTPUT_BASEDIR"
60
+
61
+ # 写入表头
62
+ echo "=================================================" > "$SUMMARY_FILE"
63
+ echo "Threshold Scanning Results" >> "$SUMMARY_FILE"
64
+ echo "Date: $(date)" >> "$SUMMARY_FILE"
65
+ echo "Classifier: $EE_CLASSIFIER_PATH" >> "$SUMMARY_FILE"
66
+ echo "=================================================" >> "$SUMMARY_FILE"
67
+ echo "" >> "$SUMMARY_FILE"
68
+ printf "%-12s %-15s %-10s %-10s %-10s %-10s\n" "Threshold" "Dataset" "Hit@1" "Hit@5" "Hit@10" "Exit Rate" >> "$SUMMARY_FILE"
69
+ echo "--------------------------------------------------------------------------------" >> "$SUMMARY_FILE"
70
+
71
+ # 循环扫描每个threshold
72
+ for THRESHOLD in "${THRESHOLDS[@]}"; do
73
+ echo ""
74
+ echo "=========================================="
75
+ echo "Testing Threshold: $THRESHOLD"
76
+ echo "=========================================="
77
+
78
+ # 设置当前threshold
79
+ export EE_THRESHOLD=$THRESHOLD
80
+
81
+ # 设置输出路径(包含threshold信息)
82
+ OUTPUT_PATH="${OUTPUT_BASEDIR}/threshold_${THRESHOLD}"
83
+
84
+ for MODALITY in "${MODALITIES[@]}"; do
85
+ DATA_CONFIG_PATH="/home/v-menggao/code/VLM2Vec/experiments/public/eval/$MODALITY.yaml"
86
+
87
+ # 创建输出目录
88
+ MODALITY_OUTPUT_PATH="${OUTPUT_PATH}/${MODALITY}"
89
+ mkdir -p "$MODALITY_OUTPUT_PATH"
90
+
91
+ echo " - Running $MODALITY with threshold=$THRESHOLD..."
92
+
93
+ # 执行评测
94
+ CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python eval_test_time_with_classifier.py \
95
+ --pooling eos \
96
+ --normalize true \
97
+ --per_device_eval_batch_size $BATCH_SIZE \
98
+ --model_backbone "$MODEL_NAME" \
99
+ --model_name "$MODEL_CHECKPOINT" \
100
+ --dataset_config "$DATA_CONFIG_PATH" \
101
+ --encode_output_path "$MODALITY_OUTPUT_PATH" \
102
+ --data_basedir "$DATA_BASEDIR" 2>&1 | tee "${OUTPUT_PATH}/${MODALITY}_log.txt"
103
+
104
+ # 提取结果(从日志中解析)
105
+ LOG_FILE="${OUTPUT_PATH}/${MODALITY}_log.txt"
106
+
107
+ # 解析每个数据集的结果
108
+ if [ -f "$LOG_FILE" ]; then
109
+ # 使用Python快速解析JSON结果
110
+ python3 << EOF >> "$SUMMARY_FILE"
111
+ import json
112
+ import re
113
+ import sys
114
+
115
+ log_file = "${LOG_FILE}"
116
+ threshold = ${THRESHOLD}
117
+
118
+ try:
119
+ with open(log_file, 'r') as f:
120
+ content = f.read()
121
+
122
+ # 提取Early Exit Stats
123
+ exit_match = re.search(r'Early Exit Stats: Exit=(\d+)/(\d+)', content)
124
+ if exit_match:
125
+ exit_count = int(exit_match.group(1))
126
+ total_count = int(exit_match.group(2))
127
+ exit_rate = exit_count / total_count if total_count > 0 else 0.0
128
+ else:
129
+ exit_rate = -1.0
130
+
131
+ # 查找所有数据集的结果JSON文件
132
+ import os
133
+ import glob
134
+ result_files = glob.glob("${OUTPUT_PATH}/${MODALITY}/*_score_earlyexit.json")
135
+
136
+ for result_file in result_files:
137
+ dataset_name = os.path.basename(result_file).replace("_score_earlyexit.json", "")
138
+ with open(result_file, 'r') as rf:
139
+ results = json.load(rf)
140
+
141
+ hit1 = results.get('hit@1', -1)
142
+ hit5 = results.get('hit@5', -1)
143
+ hit10 = results.get('hit@10', -1)
144
+
145
+ print(f"{threshold:<12.3f} {dataset_name:<15s} {hit1:<10.4f} {hit5:<10.4f} {hit10:<10.4f} {exit_rate:<10.2%}")
146
+
147
+ except Exception as e:
148
+ print(f"{threshold:<12.3f} {'ERROR':<15s} {'-':<10s} {'-':<10s} {'-':<10s} {'-':<10s}", file=sys.stderr)
149
+ print(f"Error: {e}", file=sys.stderr)
150
+ EOF
151
+ fi
152
+ done
153
+
154
+ echo " ✓ Threshold $THRESHOLD completed"
155
+ done
156
+
157
+ echo "" >> "$SUMMARY_FILE"
158
+ echo "=================================================" >> "$SUMMARY_FILE"
159
+ echo "Scan completed at $(date)" >> "$SUMMARY_FILE"
160
+ echo "=================================================" >> "$SUMMARY_FILE"
161
+
162
+ echo ""
163
+ echo "=========================================="
164
+ echo "✅ All threshold scanning completed!"
165
+ echo "=========================================="
166
+ echo ""
167
+ echo "📊 Results summary saved to:"
168
+ echo " $SUMMARY_FILE"
169
+ echo ""
170
+ echo "📁 Detailed results in:"
171
+ echo " $OUTPUT_BASEDIR"
172
+ echo ""
173
+
174
+ # 显示汇总结果
175
+ echo "📈 Quick Summary:"
176
+ cat "$SUMMARY_FILE"
experiments/public/eval/visdoc_retrival.yaml ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ViDoRe_arxivqa:
2
+ dataset_parser: vidore
3
+ dataset_name: ViDoRe_arxivqa
4
+ image_root: visdoc-tasks/ViDoRe_arxivqa
5
+ eval_type: global
6
+ ViDoRe_docvqa:
7
+ dataset_parser: vidore
8
+ dataset_name: ViDoRe_docvqa
9
+ image_root: visdoc-tasks/ViDoRe_docvqa
10
+ eval_type: global
11
+ ViDoRe_infovqa:
12
+ dataset_parser: vidore
13
+ dataset_name: ViDoRe_infovqa
14
+ image_root: visdoc-tasks/ViDoRe_infovqa
15
+ eval_type: global
16
+ ViDoRe_tabfquad:
17
+ dataset_parser: vidore
18
+ dataset_name: ViDoRe_tabfquad
19
+ image_root: visdoc-tasks/ViDoRe_tabfquad
20
+ eval_type: global
21
+ ViDoRe_tatdqa:
22
+ dataset_parser: vidore
23
+ dataset_name: ViDoRe_tatdqa
24
+ image_root: visdoc-tasks/ViDoRe_tatdqa
25
+ eval_type: global
26
+ ViDoRe_shiftproject:
27
+ dataset_parser: vidore
28
+ dataset_name: ViDoRe_shiftproject
29
+ image_root: visdoc-tasks/ViDoRe_shiftproject
30
+ eval_type: global
31
+ ViDoRe_syntheticDocQA_artificial_intelligence:
32
+ dataset_parser: vidore
33
+ dataset_name: ViDoRe_syntheticDocQA_artificial_intelligence
34
+ image_root: visdoc-tasks/ViDoRe_syntheticDocQA_artificial_intelligence
35
+ eval_type: global
36
+ ViDoRe_syntheticDocQA_energy:
37
+ dataset_parser: vidore
38
+ dataset_name: ViDoRe_syntheticDocQA_energy
39
+ image_root: visdoc-tasks/ViDoRe_syntheticDocQA_energy
40
+ eval_type: global
41
+ ViDoRe_syntheticDocQA_government_reports:
42
+ dataset_parser: vidore
43
+ dataset_name: ViDoRe_syntheticDocQA_government_reports
44
+ image_root: visdoc-tasks/ViDoRe_syntheticDocQA_government_reports
45
+ eval_type: global
46
+ ViDoRe_syntheticDocQA_healthcare_industry:
47
+ dataset_parser: vidore
48
+ dataset_name: ViDoRe_syntheticDocQA_healthcare_industry
49
+ image_root: visdoc-tasks/ViDoRe_syntheticDocQA_healthcare_industry
50
+ eval_type: global
51
+
52
+
53
+ #ViDoRe_esg_reports_human_labeled_v2:
54
+ # dataset_parser: vidore
55
+ # dataset_name: ViDoRe_esg_reports_human_labeled_v2
56
+ # image_root: visdoc-tasks/esg_reports_human_labeled_v2
57
+ # eval_type: global
58
+ #ViDoRe_biomedical_lectures_v2:
59
+ # dataset_parser: vidore
60
+ # dataset_name: ViDoRe_biomedical_lectures_v2
61
+ # image_root: visdoc-tasks/biomedical_lectures_v2
62
+ # eval_type: global
63
+ #ViDoRe_biomedical_lectures_v2_multilingual:
64
+ # dataset_parser: vidore
65
+ # dataset_name: ViDoRe_biomedical_lectures_v2_multilingual
66
+ # image_root: visdoc-tasks/biomedical_lectures_v2_multilingual
67
+ # eval_type: global
68
+ #ViDoRe_economics_reports_v2:
69
+ # dataset_parser: vidore
70
+ # dataset_name: ViDoRe_economics_reports_v2
71
+ # image_root: visdoc-tasks/economics_reports_v2
72
+ # eval_type: global
73
+ #ViDoRe_economics_reports_v2_multilingual:
74
+ # dataset_parser: vidore
75
+ # dataset_name: ViDoRe_economics_reports_v2_multilingual
76
+ # image_root: visdoc-tasks/economics_reports_v2_multilingual
77
+ # eval_type: global
78
+ #ViDoRe_esg_reports_v2:
79
+ # dataset_parser: vidore
80
+ # dataset_name: ViDoRe_esg_reports_v2
81
+ # image_root: visdoc-tasks/esg_reports_v2
82
+ # eval_type: global
83
+ #ViDoRe_esg_reports_v2_multilingual:
84
+ # dataset_parser: vidore
85
+ # dataset_name: ViDoRe_esg_reports_v2_multilingual
86
+ # image_root: visdoc-tasks/esg_reports_v2_multilingual
87
+ # eval_type: global
88
+
89
+
90
+ VisRAG_ArxivQA:
91
+ dataset_parser: visrag
92
+ dataset_name: VisRAG_ArxivQA
93
+ image_root: visdoc-tasks/VisRAG_ArxivQA
94
+ eval_type: global
95
+ VisRAG_ChartQA:
96
+ dataset_parser: visrag
97
+ dataset_name: VisRAG_ChartQA
98
+ image_root: visdoc-tasks/VisRAG_ChartQA
99
+ eval_type: global
100
+ VisRAG_MP-DocVQA:
101
+ dataset_parser: visrag
102
+ dataset_name: VisRAG_MP-DocVQA
103
+ image_root: visdoc-tasks/VisRAG_MP-DocVQA
104
+ eval_type: global
105
+ VisRAG_SlideVQA:
106
+ dataset_parser: visrag
107
+ dataset_name: VisRAG_SlideVQA
108
+ image_root: visdoc-tasks/VisRAG_SlideVQA
109
+ eval_type: global
110
+ VisRAG_InfoVQA:
111
+ dataset_parser: visrag
112
+ dataset_name: VisRAG_InfoVQA
113
+ image_root: visdoc-tasks/VisRAG_InfoVQA
114
+ eval_type: global
115
+ VisRAG_PlotQA:
116
+ dataset_parser: visrag
117
+ dataset_name: VisRAG_PlotQA
118
+ image_root: visdoc-tasks/VisRAG_PlotQA
119
+ eval_type: global
120
+
121
+ ViDoSeek-page:
122
+ dataset_parser: vidore
123
+ dataset_name: ViDoSeek-page
124
+ image_root: visdoc-tasks/ViDoSeek-page
125
+ eval_type: global
126
+ ViDoSeek-doc:
127
+ dataset_parser: vidore
128
+ dataset_name: ViDoSeek-doc
129
+ image_root: visdoc-tasks/ViDoSeek-doc
130
+ eval_type: global
131
+
132
+ MMLongBench-doc:
133
+ dataset_parser: vidore
134
+ dataset_name: MMLongBench-doc
135
+ image_root: visdoc-tasks/MMLongBench-doc
136
+ eval_type: global
137
+ MMLongBench-page:
138
+ dataset_parser: vidore
139
+ dataset_name: MMLongBench-page
140
+ image_root: visdoc-tasks/MMLongBench-page
141
+ eval_type: global
experiments/public/train/train_alltasks.yaml ADDED
@@ -0,0 +1,395 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ImageNet_1K:
2
+ # dataset_parser: mmeb
3
+ # dataset_name: TIGER-Lab/MMEB-train
4
+ # subset_name: ImageNet_1K
5
+ # dataset_split: original
6
+ # image_dir: vlm2vec_train/MMEB-train/image
7
+ # num_sample_per_subset: 100000
8
+ # weight: 1
9
+ # N24News:
10
+ # dataset_parser: mmeb
11
+ # dataset_name: TIGER-Lab/MMEB-train
12
+ # subset_name: N24News
13
+ # dataset_split: original
14
+ # image_dir: vlm2vec_train/MMEB-train/image
15
+ # num_sample_per_subset: 50000
16
+ # weight: 1
17
+ # HatefulMemes:
18
+ # dataset_parser: mmeb
19
+ # dataset_name: TIGER-Lab/MMEB-train
20
+ # subset_name: HatefulMemes
21
+ # dataset_split: original
22
+ # image_dir: vlm2vec_train/MMEB-train/image
23
+ # num_sample_per_subset: 10000
24
+ # weight: 0.5
25
+ # VOC2007:
26
+ # dataset_parser: mmeb
27
+ # dataset_name: TIGER-Lab/MMEB-train
28
+ # subset_name: VOC2007
29
+ # dataset_split: original
30
+ # image_dir: vlm2vec_train/MMEB-train/image
31
+ # num_sample_per_subset: 10000
32
+ # weight: 0.5
33
+ # SUN397:
34
+ # dataset_parser: mmeb
35
+ # dataset_name: TIGER-Lab/MMEB-train
36
+ # subset_name: SUN397
37
+ # dataset_split: original
38
+ # image_dir: vlm2vec_train/MMEB-train/image
39
+ # num_sample_per_subset: 20000
40
+ # weight: 0.5
41
+ # OK-VQA:
42
+ # dataset_parser: mmeb
43
+ # dataset_name: TIGER-Lab/MMEB-train
44
+ # subset_name: OK-VQA
45
+ # dataset_split: original
46
+ # image_dir: vlm2vec_train/MMEB-train/image
47
+ # num_sample_per_subset: 10000
48
+ # weight: 0.5
49
+ # A-OKVQA:
50
+ # dataset_parser: mmeb
51
+ # dataset_name: TIGER-Lab/MMEB-train
52
+ # subset_name: A-OKVQA
53
+ # dataset_split: original
54
+ # image_dir: vlm2vec_train/MMEB-train/image
55
+ # num_sample_per_subset: 20000
56
+ # weight: 0.5
57
+ # DocVQA:
58
+ # dataset_parser: mmeb
59
+ # dataset_name: TIGER-Lab/MMEB-train
60
+ # subset_name: DocVQA
61
+ # dataset_split: original
62
+ # image_dir: vlm2vec_train/MMEB-train/image
63
+ # num_sample_per_subset: 40000
64
+ # weight: 1
65
+ # InfographicsVQA:
66
+ # dataset_parser: mmeb
67
+ # dataset_name: TIGER-Lab/MMEB-train
68
+ # subset_name: InfographicsVQA
69
+ # dataset_split: original
70
+ # image_dir: vlm2vec_train/MMEB-train/image
71
+ # num_sample_per_subset: 25000
72
+ # weight: 0.5
73
+ # ChartQA:
74
+ # dataset_parser: mmeb
75
+ # dataset_name: TIGER-Lab/MMEB-train
76
+ # subset_name: ChartQA
77
+ # dataset_split: original
78
+ # image_dir: vlm2vec_train/MMEB-train/image
79
+ # num_sample_per_subset: 28000
80
+ # weight: 0.5
81
+ # Visual7W:
82
+ # dataset_parser: mmeb
83
+ # dataset_name: TIGER-Lab/MMEB-train
84
+ # subset_name: Visual7W
85
+ # dataset_split: original
86
+ # image_dir: vlm2vec_train/MMEB-train/image
87
+ # num_sample_per_subset: 70000
88
+ # weight: 1
89
+ # VisDial:
90
+ # dataset_parser: mmeb
91
+ # dataset_name: TIGER-Lab/MMEB-train
92
+ # subset_name: VisDial
93
+ # dataset_split: original
94
+ # image_dir: vlm2vec_train/MMEB-train/image
95
+ # num_sample_per_subset: 130000
96
+ # weight: 1
97
+ # CIRR:
98
+ # dataset_parser: mmeb
99
+ # dataset_name: TIGER-Lab/MMEB-train
100
+ # subset_name: CIRR
101
+ # dataset_split: original
102
+ # image_dir: vlm2vec_train/MMEB-train/image
103
+ # num_sample_per_subset: 30000
104
+ # weight: 0.5
105
+ # VisualNews_t2i:
106
+ # dataset_parser: mmeb
107
+ # dataset_name: TIGER-Lab/MMEB-train
108
+ # subset_name: VisualNews_t2i
109
+ # dataset_split: original
110
+ # image_dir: vlm2vec_train/MMEB-train/image
111
+ # num_sample_per_subset: 100000
112
+ # weight: 1
113
+ # VisualNews_i2t:
114
+ # dataset_parser: mmeb
115
+ # dataset_name: TIGER-Lab/MMEB-train
116
+ # subset_name: VisualNews_i2t
117
+ # dataset_split: original
118
+ # image_dir: vlm2vec_train/MMEB-train/image
119
+ # num_sample_per_subset: 100000
120
+ # weight: 1
121
+ # MSCOCO_t2i:
122
+ # dataset_parser: mmeb
123
+ # dataset_name: TIGER-Lab/MMEB-train
124
+ # subset_name: MSCOCO_t2i
125
+ # dataset_split: original
126
+ # image_dir: vlm2vec_train/MMEB-train/image
127
+ # num_sample_per_subset: 100000
128
+ # weight: 1
129
+ # MSCOCO_i2t:
130
+ # dataset_parser: mmeb
131
+ # dataset_name: TIGER-Lab/MMEB-train
132
+ # subset_name: MSCOCO_i2t
133
+ # dataset_split: original
134
+ # image_dir: vlm2vec_train/MMEB-train/image
135
+ # num_sample_per_subset: 120000
136
+ # weight: 1
137
+ # NIGHTS:
138
+ # dataset_parser: mmeb
139
+ # dataset_name: TIGER-Lab/MMEB-train
140
+ # subset_name: NIGHTS
141
+ # dataset_split: original
142
+ # image_dir: vlm2vec_train/MMEB-train/image
143
+ # num_sample_per_subset: 20000
144
+ # weight: 0.5
145
+ # WebQA:
146
+ # dataset_parser: mmeb
147
+ # dataset_name: TIGER-Lab/MMEB-train
148
+ # subset_name: WebQA
149
+ # dataset_split: original
150
+ # image_dir: vlm2vec_train/MMEB-train/image
151
+ # num_sample_per_subset: 20000
152
+ # weight: 0.5
153
+ # MSCOCO:
154
+ # dataset_parser: mmeb
155
+ # dataset_name: TIGER-Lab/MMEB-train
156
+ # subset_name: MSCOCO
157
+ # dataset_split: original
158
+ # image_dir: vlm2vec_train/MMEB-train/image
159
+ # num_sample_per_subset: 100000
160
+ # weight: 1
161
+
162
+ # colpali_train_set:
163
+ # dataset_parser: vidore
164
+ # dataset_name: vidore/colpali_train_set
165
+ # weight: 10
166
+ # visrag-indomain:
167
+ # dataset_parser: visrag
168
+ # dataset_name: openbmb/VisRAG-Ret-Train-In-domain-data
169
+ # global_dataset_name: VisRAG-Indomain-data
170
+ # weight: 12
171
+
172
+ # video_caption_300k:
173
+ # dataset_parser: llavahound_caption
174
+ # dataset_name: video_caption_300k
175
+ # dataset_path: vlm2vec_train/train_video_and_instruction/video_instruction/train/sft/video_caption_300k.jsonl
176
+ # video_frame_basedir: vlm2vec_train/train_video_and_instruction/train_300k
177
+ # weight: 5
178
+ # num_rows: 300_000
179
+ # num_frames: 8
180
+ # data_mode: caption_retrieval
181
+ # video_caption_300k-video:
182
+ # dataset_parser: llavahound_caption
183
+ # dataset_name: video_caption_300k
184
+ # dataset_path: vlm2vec_train/train_video_and_instruction/video_instruction/train/sft/video_caption_300k.jsonl
185
+ # video_frame_basedir: vlm2vec_train/train_video_and_instruction/train_300k
186
+ # weight: 5
187
+ # num_rows: 300_000
188
+ # num_frames: 8
189
+ # data_mode: video_retrieval
190
+ # video_qa_240k:
191
+ # dataset_parser: llavahound_qa
192
+ # dataset_name: video_qa_240k
193
+ # dataset_path: vlm2vec_train/train_video_and_instruction/video_instruction/train/sft/video_240k_caption_15k.jsonl
194
+ # video_frame_basedir: vlm2vec_train/train_video_and_instruction/train_300k
195
+ # weight: 5
196
+ # num_rows: 240_000
197
+ # num_frames: 8
198
+
199
+ ImageNet_1K:
200
+ dataset_parser: mmeb
201
+ dataset_name: TIGER-Lab/MMEB-train
202
+ subset_name: ImageNet_1K
203
+ dataset_split: original
204
+ image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
205
+ num_sample_per_subset: 100000
206
+ weight: 1
207
+ N24News:
208
+ dataset_parser: mmeb
209
+ dataset_name: TIGER-Lab/MMEB-train
210
+ subset_name: N24News
211
+ dataset_split: original
212
+ image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
213
+ num_sample_per_subset: 50000
214
+ weight: 1
215
+ HatefulMemes:
216
+ dataset_parser: mmeb
217
+ dataset_name: TIGER-Lab/MMEB-train
218
+ subset_name: HatefulMemes
219
+ dataset_split: original
220
+ image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
221
+ num_sample_per_subset: 10000
222
+ weight: 0.5
223
+ VOC2007:
224
+ dataset_parser: mmeb
225
+ dataset_name: TIGER-Lab/MMEB-train
226
+ subset_name: VOC2007
227
+ dataset_split: original
228
+ image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
229
+ num_sample_per_subset: 10000
230
+ weight: 0.5
231
+ SUN397:
232
+ dataset_parser: mmeb
233
+ dataset_name: TIGER-Lab/MMEB-train
234
+ subset_name: SUN397
235
+ dataset_split: original
236
+ image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
237
+ num_sample_per_subset: 20000
238
+ weight: 0.5
239
+ OK-VQA:
240
+ dataset_parser: mmeb
241
+ dataset_name: TIGER-Lab/MMEB-train
242
+ subset_name: OK-VQA
243
+ dataset_split: original
244
+ image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
245
+ num_sample_per_subset: 10000
246
+ weight: 0.5
247
+ A-OKVQA:
248
+ dataset_parser: mmeb
249
+ dataset_name: TIGER-Lab/MMEB-train
250
+ subset_name: A-OKVQA
251
+ dataset_split: original
252
+ image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
253
+ num_sample_per_subset: 20000
254
+ weight: 0.5
255
+ DocVQA:
256
+ dataset_parser: mmeb
257
+ dataset_name: TIGER-Lab/MMEB-train
258
+ subset_name: DocVQA
259
+ dataset_split: original
260
+ image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
261
+ num_sample_per_subset: 40000
262
+ weight: 1
263
+ InfographicsVQA:
264
+ dataset_parser: mmeb
265
+ dataset_name: TIGER-Lab/MMEB-train
266
+ subset_name: InfographicsVQA
267
+ dataset_split: original
268
+ image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
269
+ num_sample_per_subset: 25000
270
+ weight: 0.5
271
+ ChartQA:
272
+ dataset_parser: mmeb
273
+ dataset_name: TIGER-Lab/MMEB-train
274
+ subset_name: ChartQA
275
+ dataset_split: original
276
+ image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
277
+ num_sample_per_subset: 28000
278
+ weight: 0.5
279
+ Visual7W:
280
+ dataset_parser: mmeb
281
+ dataset_name: TIGER-Lab/MMEB-train
282
+ subset_name: Visual7W
283
+ dataset_split: original
284
+ image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
285
+ num_sample_per_subset: 70000
286
+ weight: 1
287
+ VisDial:
288
+ dataset_parser: mmeb
289
+ dataset_name: TIGER-Lab/MMEB-train
290
+ subset_name: VisDial
291
+ dataset_split: original
292
+ image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
293
+ num_sample_per_subset: 130000
294
+ weight: 1
295
+ CIRR:
296
+ dataset_parser: mmeb
297
+ dataset_name: TIGER-Lab/MMEB-train
298
+ subset_name: CIRR
299
+ dataset_split: original
300
+ image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
301
+ num_sample_per_subset: 30000
302
+ weight: 0.5
303
+ VisualNews_t2i:
304
+ dataset_parser: mmeb
305
+ dataset_name: TIGER-Lab/MMEB-train
306
+ subset_name: VisualNews_t2i
307
+ dataset_split: original
308
+ image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
309
+ num_sample_per_subset: 100000
310
+ weight: 1
311
+ VisualNews_i2t:
312
+ dataset_parser: mmeb
313
+ dataset_name: TIGER-Lab/MMEB-train
314
+ subset_name: VisualNews_i2t
315
+ dataset_split: original
316
+ image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
317
+ num_sample_per_subset: 100000
318
+ weight: 1
319
+ MSCOCO_t2i:
320
+ dataset_parser: mmeb
321
+ dataset_name: TIGER-Lab/MMEB-train
322
+ subset_name: MSCOCO_t2i
323
+ dataset_split: original
324
+ image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
325
+ num_sample_per_subset: 100000
326
+ weight: 1
327
+ MSCOCO_i2t:
328
+ dataset_parser: mmeb
329
+ dataset_name: TIGER-Lab/MMEB-train
330
+ subset_name: MSCOCO_i2t
331
+ dataset_split: original
332
+ image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
333
+ num_sample_per_subset: 120000
334
+ weight: 1
335
+ NIGHTS:
336
+ dataset_parser: mmeb
337
+ dataset_name: TIGER-Lab/MMEB-train
338
+ subset_name: NIGHTS
339
+ dataset_split: original
340
+ image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
341
+ num_sample_per_subset: 20000
342
+ weight: 0.5
343
+ WebQA:
344
+ dataset_parser: mmeb
345
+ dataset_name: TIGER-Lab/MMEB-train
346
+ subset_name: WebQA
347
+ dataset_split: original
348
+ image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
349
+ num_sample_per_subset: 20000
350
+ weight: 0.5
351
+ MSCOCO:
352
+ dataset_parser: mmeb
353
+ dataset_name: TIGER-Lab/MMEB-train
354
+ subset_name: MSCOCO
355
+ dataset_split: original
356
+ image_dir: /home/v-menggao/code/data/codenew/code/data/vlm2vec_train/MMEB-train/image
357
+ num_sample_per_subset: 100000
358
+ weight: 1
359
+
360
+ colpali_train_set:
361
+ dataset_parser: vidore
362
+ dataset_name: vidore/colpali_train_set
363
+ weight: 10
364
+ visrag-indomain:
365
+ dataset_parser: visrag
366
+ dataset_name: openbmb/VisRAG-Ret-Train-In-domain-data
367
+ global_dataset_name: VisRAG-Indomain-data
368
+ weight: 12
369
+
370
+ video_caption_300k:
371
+ dataset_parser: llavahound_caption
372
+ dataset_name: video_caption_300k
373
+ dataset_path: /home/v-menggao/code/data/codenew/code/data/train_video_and_instruction/video_instruction/train/sft/video_caption_300k.jsonl
374
+ video_frame_basedir: /home/v-menggao/code/data/codenew/code/data/train_video_and_instruction/train_300k_extracted
375
+ weight: 5
376
+ num_rows: 300_000
377
+ num_frames: 8
378
+ data_mode: caption_retrieval
379
+ video_caption_300k-video:
380
+ dataset_parser: llavahound_caption
381
+ dataset_name: video_caption_300k
382
+ dataset_path: /home/v-menggao/code/data/codenew/code/data/train_video_and_instruction/video_instruction/train/sft/video_caption_300k.jsonl
383
+ video_frame_basedir: /home/v-menggao/code/data/codenew/code/data/train_video_and_instruction/train_300k_extracted
384
+ weight: 5
385
+ num_rows: 300_000
386
+ num_frames: 8
387
+ data_mode: video_retrieval
388
+ video_qa_240k:
389
+ dataset_parser: llavahound_qa
390
+ dataset_name: video_qa_240k
391
+ dataset_path: /home/v-menggao/code/data/codenew/code/data/train_video_and_instruction/video_instruction/train/sft/video_240k_caption_15k.jsonl
392
+ video_frame_basedir: /home/v-menggao/code/data/codenew/code/data/train_video_and_instruction/train_300k_extracted
393
+ weight: 5
394
+ num_rows: 240_000
395
+ num_frames: 8
experiments/public/train/train_image.yaml ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ImageNet_1K:
2
+ # dataset_parser: mmeb
3
+ # dataset_name: TIGER-Lab/MMEB-train
4
+ # subset_name: ImageNet_1K
5
+ # dataset_split: original
6
+ # image_dir: vlm2vec_train/MMEB-train/image
7
+ # num_sample_per_subset: 100000
8
+ # weight: 1
9
+ # N24News:
10
+ # dataset_parser: mmeb
11
+ # dataset_name: TIGER-Lab/MMEB-train
12
+ # subset_name: N24News
13
+ # dataset_split: original
14
+ # image_dir: vlm2vec_train/MMEB-train/image
15
+ # num_sample_per_subset: 50000
16
+ # weight: 1
17
+ # HatefulMemes:
18
+ # dataset_parser: mmeb
19
+ # dataset_name: TIGER-Lab/MMEB-train
20
+ # subset_name: HatefulMemes
21
+ # dataset_split: original
22
+ # image_dir: vlm2vec_train/MMEB-train/image
23
+ # num_sample_per_subset: 10000
24
+ # weight: 1
25
+ # VOC2007:
26
+ # dataset_parser: mmeb
27
+ # dataset_name: TIGER-Lab/MMEB-train
28
+ # subset_name: VOC2007
29
+ # dataset_split: original
30
+ # image_dir: vlm2vec_train/MMEB-train/image
31
+ # num_sample_per_subset: 10000
32
+ # weight: 1
33
+ # SUN397:
34
+ # dataset_parser: mmeb
35
+ # dataset_name: TIGER-Lab/MMEB-train
36
+ # subset_name: SUN397
37
+ # dataset_split: original
38
+ # image_dir: vlm2vec_train/MMEB-train/image
39
+ # num_sample_per_subset: 20000
40
+ # weight: 1
41
+ # OK-VQA:
42
+ # dataset_parser: mmeb
43
+ # dataset_name: TIGER-Lab/MMEB-train
44
+ # subset_name: OK-VQA
45
+ # dataset_split: original
46
+ # image_dir: vlm2vec_train/MMEB-train/image
47
+ # num_sample_per_subset: 10000
48
+ # weight: 1
49
+ # A-OKVQA:
50
+ # dataset_parser: mmeb
51
+ # dataset_name: TIGER-Lab/MMEB-train
52
+ # subset_name: A-OKVQA
53
+ # dataset_split: original
54
+ # image_dir: vlm2vec_train/MMEB-train/image
55
+ # num_sample_per_subset: 20000
56
+ # weight: 1
57
+ # DocVQA:
58
+ # dataset_parser: mmeb
59
+ # dataset_name: TIGER-Lab/MMEB-train
60
+ # subset_name: DocVQA
61
+ # dataset_split: original
62
+ # image_dir: vlm2vec_train/MMEB-train/image
63
+ # num_sample_per_subset: 40000
64
+ # weight: 1
65
+ # InfographicsVQA:
66
+ # dataset_parser: mmeb
67
+ # dataset_name: TIGER-Lab/MMEB-train
68
+ # subset_name: InfographicsVQA
69
+ # dataset_split: original
70
+ # image_dir: vlm2vec_train/MMEB-train/image
71
+ # num_sample_per_subset: 25000
72
+ # weight: 1
73
+ # ChartQA:
74
+ # dataset_parser: mmeb
75
+ # dataset_name: TIGER-Lab/MMEB-train
76
+ # subset_name: ChartQA
77
+ # dataset_split: original
78
+ # image_dir: vlm2vec_train/MMEB-train/image
79
+ # num_sample_per_subset: 28000
80
+ # weight: 1
81
+ # Visual7W:
82
+ # dataset_parser: mmeb
83
+ # dataset_name: TIGER-Lab/MMEB-train
84
+ # subset_name: Visual7W
85
+ # dataset_split: original
86
+ # image_dir: vlm2vec_train/MMEB-train/image
87
+ # num_sample_per_subset: 70000
88
+ # weight: 1
89
+ VisDial:
90
+ dataset_parser: mmeb
91
+ dataset_name: TIGER-Lab/MMEB-train
92
+ subset_name: VisDial
93
+ dataset_split: original
94
+ image_dir: /home/v-menggao/code/VLM2Vec/~/data/vlm2vec_train/MMEB-train/image
95
+ num_sample_per_subset: 130000
96
+ weight: 1
97
+ CIRR:
98
+ dataset_parser: mmeb
99
+ dataset_name: TIGER-Lab/MMEB-train
100
+ subset_name: CIRR
101
+ dataset_split: original
102
+ image_dir: /home/v-menggao/code/VLM2Vec/~/data/vlm2vec_train/MMEB-train/image
103
+ num_sample_per_subset: 30000
104
+ weight: 1
105
+ VisualNews_t2i:
106
+ dataset_parser: mmeb
107
+ dataset_name: TIGER-Lab/MMEB-train
108
+ subset_name: VisualNews_t2i
109
+ dataset_split: original
110
+ image_dir: /home/v-menggao/code/VLM2Vec/~/data/vlm2vec_train/MMEB-train/image
111
+ num_sample_per_subset: 100000
112
+ weight: 1
113
+ VisualNews_i2t:
114
+ dataset_parser: mmeb
115
+ dataset_name: TIGER-Lab/MMEB-train
116
+ subset_name: VisualNews_i2t
117
+ dataset_split: original
118
+ image_dir: /home/v-menggao/code/VLM2Vec/~/data/vlm2vec_train/MMEB-train/image
119
+ num_sample_per_subset: 100000
120
+ weight: 1
121
+ MSCOCO_t2i:
122
+ dataset_parser: mmeb
123
+ dataset_name: TIGER-Lab/MMEB-train
124
+ subset_name: MSCOCO_t2i
125
+ dataset_split: original
126
+ image_dir: /home/v-menggao/code/VLM2Vec/~/data/vlm2vec_train/MMEB-train/image
127
+ num_sample_per_subset: 100000
128
+ weight: 1
129
+ MSCOCO_i2t:
130
+ dataset_parser: mmeb
131
+ dataset_name: TIGER-Lab/MMEB-train
132
+ subset_name: MSCOCO_i2t
133
+ dataset_split: original
134
+ image_dir: /home/v-menggao/code/VLM2Vec/~/data/vlm2vec_train/MMEB-train/image
135
+ num_sample_per_subset: 120000
136
+ weight: 1
137
+ NIGHTS:
138
+ dataset_parser: mmeb
139
+ dataset_name: TIGER-Lab/MMEB-train
140
+ subset_name: NIGHTS
141
+ dataset_split: original
142
+ image_dir: /home/v-menggao/code/VLM2Vec/~/data/vlm2vec_train/MMEB-train/image
143
+ num_sample_per_subset: 20000
144
+ weight: 1
145
+ WebQA:
146
+ dataset_parser: mmeb
147
+ dataset_name: TIGER-Lab/MMEB-train
148
+ subset_name: WebQA
149
+ dataset_split: original
150
+ image_dir: /home/v-menggao/code/VLM2Vec/~/data/vlm2vec_train/MMEB-train/image
151
+ num_sample_per_subset: 20000
152
+ weight: 1
153
+
154
+ # MSCOCO:
155
+ # dataset_parser: mmeb
156
+ # dataset_name: TIGER-Lab/MMEB-train
157
+ # subset_name: MSCOCO
158
+ # dataset_split: original
159
+ # image_dir: vlm2vec_train/MMEB-train/image
160
+ # num_sample_per_subset: 100000
161
+ # weight: 1
experiments/public/train/train_image1.yaml ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ImageNet_1K:
2
+ dataset_parser: mmeb
3
+ dataset_name: TIGER-Lab/MMEB-train
4
+ subset_name: ImageNet_1K
5
+ dataset_split: original
6
+ image_dir: vlm2vec_train/MMEB-train/image
7
+ num_sample_per_subset: 100000
8
+ weight: 1
9
+ N24News:
10
+ dataset_parser: mmeb
11
+ dataset_name: TIGER-Lab/MMEB-train
12
+ subset_name: N24News
13
+ dataset_split: original
14
+ image_dir: vlm2vec_train/MMEB-train/image
15
+ num_sample_per_subset: 50000
16
+ weight: 1
17
+ HatefulMemes:
18
+ dataset_parser: mmeb
19
+ dataset_name: TIGER-Lab/MMEB-train
20
+ subset_name: HatefulMemes
21
+ dataset_split: original
22
+ image_dir: vlm2vec_train/MMEB-train/image
23
+ num_sample_per_subset: 10000
24
+ weight: 1
25
+ VOC2007:
26
+ dataset_parser: mmeb
27
+ dataset_name: TIGER-Lab/MMEB-train
28
+ subset_name: VOC2007
29
+ dataset_split: original
30
+ image_dir: vlm2vec_train/MMEB-train/image
31
+ num_sample_per_subset: 10000
32
+ weight: 1
33
+ SUN397:
34
+ dataset_parser: mmeb
35
+ dataset_name: TIGER-Lab/MMEB-train
36
+ subset_name: SUN397
37
+ dataset_split: original
38
+ image_dir: vlm2vec_train/MMEB-train/image
39
+ num_sample_per_subset: 20000
40
+ weight: 1
41
+ OK-VQA:
42
+ dataset_parser: mmeb
43
+ dataset_name: TIGER-Lab/MMEB-train
44
+ subset_name: OK-VQA
45
+ dataset_split: original
46
+ image_dir: vlm2vec_train/MMEB-train/image
47
+ num_sample_per_subset: 10000
48
+ weight: 1
49
+ A-OKVQA:
50
+ dataset_parser: mmeb
51
+ dataset_name: TIGER-Lab/MMEB-train
52
+ subset_name: A-OKVQA
53
+ dataset_split: original
54
+ image_dir: vlm2vec_train/MMEB-train/image
55
+ num_sample_per_subset: 20000
56
+ weight: 1
57
+ DocVQA:
58
+ dataset_parser: mmeb
59
+ dataset_name: TIGER-Lab/MMEB-train
60
+ subset_name: DocVQA
61
+ dataset_split: original
62
+ image_dir: vlm2vec_train/MMEB-train/image
63
+ num_sample_per_subset: 40000
64
+ weight: 1
65
+ InfographicsVQA:
66
+ dataset_parser: mmeb
67
+ dataset_name: TIGER-Lab/MMEB-train
68
+ subset_name: InfographicsVQA
69
+ dataset_split: original
70
+ image_dir: vlm2vec_train/MMEB-train/image
71
+ num_sample_per_subset: 25000
72
+ weight: 1
73
+ ChartQA:
74
+ dataset_parser: mmeb
75
+ dataset_name: TIGER-Lab/MMEB-train
76
+ subset_name: ChartQA
77
+ dataset_split: original
78
+ image_dir: vlm2vec_train/MMEB-train/image
79
+ num_sample_per_subset: 28000
80
+ weight: 1
81
+ Visual7W:
82
+ dataset_parser: mmeb
83
+ dataset_name: TIGER-Lab/MMEB-train
84
+ subset_name: Visual7W
85
+ dataset_split: original
86
+ image_dir: vlm2vec_train/MMEB-train/image
87
+ num_sample_per_subset: 70000
88
+ weight: 1
89
+ VisDial:
90
+ dataset_parser: mmeb
91
+ dataset_name: TIGER-Lab/MMEB-train
92
+ subset_name: VisDial
93
+ dataset_split: original
94
+ image_dir: vlm2vec_train/MMEB-train/image
95
+ num_sample_per_subset: 130000
96
+ weight: 1
97
+ CIRR:
98
+ dataset_parser: mmeb
99
+ dataset_name: TIGER-Lab/MMEB-train
100
+ subset_name: CIRR
101
+ dataset_split: original
102
+ image_dir: vlm2vec_train/MMEB-train/image
103
+ num_sample_per_subset: 30000
104
+ weight: 1
105
+ VisualNews_t2i:
106
+ dataset_parser: mmeb
107
+ dataset_name: TIGER-Lab/MMEB-train
108
+ subset_name: VisualNews_t2i
109
+ dataset_split: original
110
+ image_dir: vlm2vec_train/MMEB-train/image
111
+ num_sample_per_subset: 100000
112
+ weight: 1
113
+ VisualNews_i2t:
114
+ dataset_parser: mmeb
115
+ dataset_name: TIGER-Lab/MMEB-train
116
+ subset_name: VisualNews_i2t
117
+ dataset_split: original
118
+ image_dir: vlm2vec_train/MMEB-train/image
119
+ num_sample_per_subset: 100000
120
+ weight: 1
121
+ MSCOCO_t2i:
122
+ dataset_parser: mmeb
123
+ dataset_name: TIGER-Lab/MMEB-train
124
+ subset_name: MSCOCO_t2i
125
+ dataset_split: original
126
+ image_dir: vlm2vec_train/MMEB-train/image
127
+ num_sample_per_subset: 100000
128
+ weight: 1
129
+ MSCOCO_i2t:
130
+ dataset_parser: mmeb
131
+ dataset_name: TIGER-Lab/MMEB-train
132
+ subset_name: MSCOCO_i2t
133
+ dataset_split: original
134
+ image_dir: vlm2vec_train/MMEB-train/image
135
+ num_sample_per_subset: 120000
136
+ weight: 1
137
+ NIGHTS:
138
+ dataset_parser: mmeb
139
+ dataset_name: TIGER-Lab/MMEB-train
140
+ subset_name: NIGHTS
141
+ dataset_split: original
142
+ image_dir: vlm2vec_train/MMEB-train/image
143
+ num_sample_per_subset: 20000
144
+ weight: 1
145
+ WebQA:
146
+ dataset_parser: mmeb
147
+ dataset_name: TIGER-Lab/MMEB-train
148
+ subset_name: WebQA
149
+ dataset_split: original
150
+ image_dir: vlm2vec_train/MMEB-train/image
151
+ num_sample_per_subset: 20000
152
+ weight: 1
153
+ MSCOCO:
154
+ dataset_parser: mmeb
155
+ dataset_name: TIGER-Lab/MMEB-train
156
+ subset_name: MSCOCO
157
+ dataset_split: original
158
+ image_dir: vlm2vec_train/MMEB-train/image
159
+ num_sample_per_subset: 100000
160
+ weight: 1
experiments/public/train/train_v2-gp.sh ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # #!/bin/bash
2
+ # # NOTE: replace ... with actual paths
3
+ # export LD_LIBRARY_PATH=...
4
+ # export PATH=...
5
+ # echo "conda location: $(which conda)"
6
+ # echo "Python location: $(which python)"
7
+ # echo "Python version: $(python --version)"
8
+
9
+ # export HF_DATASETS_CACHE=...
10
+ # export HF_HOME=...
11
+ # export WANDB_DISABLED=false
12
+ # export WANDB_PROJECT=...
13
+ # export WANDB_API_KEY=...
14
+ # export HUGGING_FACE_HUB_TOKEN=...
15
+ # export WANDB_PROJECT=...
16
+ # export WANDB_RUN_GROUP=...
17
+ # export EXP_NAME=Qwen2vl_2B.image+visdoc+video.autoresize.lora16.BS1024.IB64.GCq8p8.NormTemp002.lr5e5.step5kwarm100.8H100
18
+
19
+ # export WANDB_NAME=$EXP_NAME
20
+ # export EXP_DIR=.../$EXP_NAME
21
+ # export WANDB_DIR=$EXP_DIR
22
+ # echo $EXP_DIR
23
+
24
+ # mkdir -p $EXP_DIR/wandb
25
+ # rm -rf $EXP_DIR/wandb/*
26
+
27
+ # cd PATH_TO_VLM2VEC_REPO
28
+ # cmd="CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 torchrun --nproc_per_node=8 --master_port=2207 --max_restarts=0 train.py --lora --lora_r 16 --model_name Qwen/Qwen2-VL-2B-Instruct --bf16 --pooling eos --normalize True --temperature 0.02 --dataloader_num_workers 8 --dataset_config experiments/release/train/train_image.yaml --run_name $EXP_NAME --output_dir $EXP_DIR --grad_cache True --per_device_train_batch_size 128 --gc_q_chunk_size 8 --gc_p_chunk_size 8 --interleave_batch_size 64 --lr_scheduler_type linear --learning_rate 5e-5 --max_steps 5000 --warmup_steps 100 --save_steps 50 --logging_steps 1 --save_safetensors True --remove_unused_columns False --resume_from auto --report_to wandb 2>&1 | tee $EXP_DIR/train.log"
29
+
30
+ # echo $cmd
31
+ # eval $cmd
32
+
33
+
34
+ #!/bin/bash
35
+
36
+ # 1. CUDA 动态库路径
37
+ export LD_LIBRARY_PATH=/usr/local/cuda-12.9/targets/x86_64-linux/lib:/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
38
+
39
+ # 2. PATH 添加 conda 环境 bin 路径
40
+ export PATH=/home/v-menggao/miniconda3/envs/VLMtoVec/bin:/home/v-menggao/miniconda3/condabin:$PATH
41
+
42
+ # 打印当前环境信息
43
+ echo "conda location: $(which conda)"
44
+ echo "Python location: $(which python)"
45
+ echo "Python version: $(python --version)"
46
+
47
+ # 3. HuggingFace 缓存路径
48
+ export HF_DATASETS_CACHE=/home/v-menggao/.cache/huggingface/datasets
49
+ export HF_HOME=/home/v-menggao/.cache/huggingface
50
+
51
+ # 4. W&B 设置(需要你自己填 project、API key)
52
+ timestamp=$(date +%Y%m%d_%H%M%S)
53
+ export WANDB_DISABLED=false
54
+ export WANDB_PROJECT=vlm2vec_gp_${timestamp}
55
+ export WANDB_API_KEY=4d73ec74bcbb8dfa92520641573bd6ce93ad829a # 从 https://wandb.ai/settings 获取
56
+ export HUGGING_FACE_HUB_TOKEN=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 # 从 https://huggingface.co/settings/tokens 获取
57
+ export WANDB_RUN_GROUP=baseline_test
58
+
59
+ # 5. 实验名字和目录
60
+ export EXP_NAME=Qwen2.5vl_gp_try
61
+ export WANDB_NAME=$EXP_NAME
62
+ export EXP_DIR=/home/v-menggao/code/VLM2Vec/~/experiments_try_gp/$EXP_NAME # /mnt/data 确保是有空间的磁盘目录
63
+ export WANDB_DIR=$EXP_DIR
64
+ echo $EXP_DIR
65
+
66
+ mkdir -p $EXP_DIR/wandb
67
+ rm -rf $EXP_DIR/wandb/*
68
+
69
+ # 6. 切换到代码仓库
70
+ cd /home/v-menggao/code/VLM2Vec
71
+
72
+ # 7. 组装训练命令 # --lora --lora_r 16 \
73
+ cmd="CUDA_VISIBLE_DEVICES=0 torchrun --nproc_per_node=1 --master_port=2207 --max_restarts=0 train_gp.py \
74
+ --model_name /home/v-menggao/code/VLM2Vec/~/experiments/Qwen2.5vl_3B.all/checkpoint-1500 \
75
+ --new_modules_dir ashun989/GlimpsePrune_Qwen2.5-VL-3B-Instruct \
76
+ --gp_do_selection False \
77
+ --gp_use_chat_processing True \
78
+ --gp_aux_config /home/v-menggao/code/GlimpsePrune/dataset_configs/gqa_rand.yaml \
79
+ --bf16 \
80
+ --pooling eos \
81
+ --normalize True \
82
+ --temperature 0.02 \
83
+ --dataloader_num_workers 8 \
84
+ --dataset_config /home/v-menggao/code/VLM2Vec/experiments/public/train/train_image.yaml \
85
+ --run_name $EXP_NAME \
86
+ --output_dir $EXP_DIR \
87
+ --grad_cache True \
88
+ --per_device_train_batch_size 32 \
89
+ --gc_q_chunk_size 4 --gc_p_chunk_size 4 \
90
+ --interleave_batch_size 64 \
91
+ --lr_scheduler_type linear \
92
+ --learning_rate 5e-5 \
93
+ --max_steps 1000 --warmup_steps 20 \
94
+ --save_steps 100 --logging_steps 1 \
95
+ --save_safetensors True \
96
+ --remove_unused_columns False \
97
+ --image_encoder_freeze True \
98
+ --loc_weight 1.0 --le_weight 1.0 --loc_dice_weight 1.0 --loc_bce_weight 0.1 \
99
+ --resume_from auto \
100
+ --report_to wandb 2>&1 | tee $EXP_DIR/train.log"
101
+
102
+ echo $cmd
103
+ eval $cmd
experiments/public/train/train_v2-qwen2vl-2B_imageonly_add_CRD.sh ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # #!/bin/bash
2
+ # # NOTE: replace ... with actual paths
3
+ # export LD_LIBRARY_PATH=...
4
+ # export PATH=...
5
+ # echo "conda location: $(which conda)"
6
+ # echo "Python location: $(which python)"
7
+ # echo "Python version: $(python --version)"
8
+
9
+ # export HF_DATASETS_CACHE=...
10
+ # export HF_HOME=...
11
+ # export WANDB_DISABLED=false
12
+ # export WANDB_PROJECT=...
13
+ # export WANDB_API_KEY=...
14
+ # export HUGGING_FACE_HUB_TOKEN=...
15
+ # export WANDB_PROJECT=...
16
+ # export WANDB_RUN_GROUP=...
17
+ # export EXP_NAME=Qwen2vl_2B.image+visdoc+video.autoresize.lora16.BS1024.IB64.GCq8p8.NormTemp002.lr5e5.step5kwarm100.8H100
18
+
19
+ # export WANDB_NAME=$EXP_NAME
20
+ # export EXP_DIR=.../$EXP_NAME
21
+ # export WANDB_DIR=$EXP_DIR
22
+ # echo $EXP_DIR
23
+
24
+ # mkdir -p $EXP_DIR/wandb
25
+ # rm -rf $EXP_DIR/wandb/*
26
+
27
+ # cd PATH_TO_VLM2VEC_REPO
28
+ # cmd="CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 torchrun --nproc_per_node=8 --master_port=2207 --max_restarts=0 train.py --lora --lora_r 16 --model_name Qwen/Qwen2-VL-2B-Instruct --bf16 --pooling eos --normalize True --temperature 0.02 --dataloader_num_workers 8 --dataset_config experiments/release/train/train_image.yaml --run_name $EXP_NAME --output_dir $EXP_DIR --grad_cache True --per_device_train_batch_size 128 --gc_q_chunk_size 8 --gc_p_chunk_size 8 --interleave_batch_size 64 --lr_scheduler_type linear --learning_rate 5e-5 --max_steps 5000 --warmup_steps 100 --save_steps 50 --logging_steps 1 --save_safetensors True --remove_unused_columns False --resume_from auto --report_to wandb 2>&1 | tee $EXP_DIR/train.log"
29
+
30
+ # echo $cmd
31
+ # eval $cmd
32
+
33
+
34
+ #!/bin/bash
35
+
36
+ # 获取脚本所在目录(保证无论从哪里运行都能找到代码路径)
37
+ SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
38
+ CODE_DIR=$(realpath "$SCRIPT_DIR") # 代码仓库路径
39
+ HOME_DIR=$HOME # 当前用户 HOME 目录
40
+ EXP_ROOT="$CODE_DIR/experiments" # 实验主目录
41
+
42
+ # 1. CUDA 动态库路径
43
+ export LD_LIBRARY_PATH=/usr/local/cuda-12.9/targets/x86_64-linux/lib:/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
44
+
45
+ # 2. PATH 添加 conda 环境 bin 路径
46
+ export PATH="$HOME_DIR/miniconda3/envs/VLMtoVec/bin:$HOME_DIR/miniconda3/condabin:$PATH"
47
+
48
+ # 打印当前环境信息
49
+ echo "conda location: $(which conda)"
50
+ echo "Python location: $(which python)"
51
+ echo "Python version: $(python --version)"
52
+
53
+ # 3. HuggingFace 缓存路径
54
+ export HF_DATASETS_CACHE="$HOME_DIR/.cache/huggingface/datasets"
55
+ export HF_HOME="$HOME_DIR/.cache/huggingface"
56
+
57
+ # 4. W&B 设置(需要你自己填 project、API key)
58
+ export WANDB_DISABLED=false
59
+ export WANDB_PROJECT=vlm2vec_layer_prune
60
+ export WANDB_API_KEY=4d73ec74bcbb8dfa92520641573bd6ce93ad829a # 从 https://wandb.ai/settings 获取
61
+ export HUGGING_FACE_HUB_TOKEN=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 # 从 https://huggingface.co/settings/tokens 获取
62
+ export WANDB_RUN_GROUP=baseline_test
63
+
64
+ # 5. 实验名字和目录
65
+ export EXP_NAME=Qwen2vl_2B.add_CRD_try
66
+ export WANDB_NAME=$EXP_NAME
67
+ export EXP_DIR="$EXP_ROOT/$EXP_NAME"
68
+ export WANDB_DIR=$EXP_DIR
69
+
70
+ echo "实验目录: $EXP_DIR"
71
+ mkdir -p "$EXP_DIR/wandb"
72
+ rm -rf "$EXP_DIR/wandb/*"
73
+
74
+ # 6. 组装训练命令
75
+ cmd="CUDA_VISIBLE_DEVICES=0 torchrun --nproc_per_node=1 --master_port=2207 --max_restarts=0 train_add_CRD_warmup.py \
76
+ --lora --lora_r 16 \
77
+ --model_name VLM2Vec/VLM2Vec-V2.0 \
78
+ --supervise_layers "20,-1" \
79
+ --supervise_weights "1,0" \
80
+ --use_crd true \
81
+ --crd_weight 0.2 \
82
+ --crd_temperature 0.07 \
83
+ --crd_layers 0,1 \
84
+ --crd_warmup_steps 200 \
85
+ --crd_detach_teacher true \
86
+ --crd_debug_every 0 \
87
+ --bf16 \
88
+ --pooling eos \
89
+ --normalize True \
90
+ --temperature 0.02 \
91
+ --dataloader_num_workers 8 \
92
+ --dataset_config experiments/public/train/train_image.yaml \
93
+ --run_name $EXP_NAME \
94
+ --output_dir $EXP_DIR \
95
+ --grad_cache True \
96
+ --per_device_train_batch_size 128 \
97
+ --gc_q_chunk_size 1 --gc_p_chunk_size 1 \
98
+ --interleave_batch_size 64 \
99
+ --lr_scheduler_type linear \
100
+ --learning_rate 5e-5 \
101
+ --max_steps 500 --warmup_steps 10 \
102
+ --save_steps 100 --logging_steps 1 \
103
+ --save_safetensors True \
104
+ --remove_unused_columns False \
105
+ --resume_from auto \
106
+ --report_to wandb 2>&1 | tee $EXP_DIR/train.log"
107
+
108
+ echo $cmd
109
+ eval $cmd
experiments/public/train/train_v2-qwen2vl-2B_imageonly_layer_prune.sh ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # #!/bin/bash
2
+ # # NOTE: replace ... with actual paths
3
+ # export LD_LIBRARY_PATH=...
4
+ # export PATH=...
5
+ # echo "conda location: $(which conda)"
6
+ # echo "Python location: $(which python)"
7
+ # echo "Python version: $(python --version)"
8
+
9
+ # export HF_DATASETS_CACHE=...
10
+ # export HF_HOME=...
11
+ # export WANDB_DISABLED=false
12
+ # export WANDB_PROJECT=...
13
+ # export WANDB_API_KEY=...
14
+ # export HUGGING_FACE_HUB_TOKEN=...
15
+ # export WANDB_PROJECT=...
16
+ # export WANDB_RUN_GROUP=...
17
+ # export EXP_NAME=Qwen2vl_2B.image+visdoc+video.autoresize.lora16.BS1024.IB64.GCq8p8.NormTemp002.lr5e5.step5kwarm100.8H100
18
+
19
+ # export WANDB_NAME=$EXP_NAME
20
+ # export EXP_DIR=.../$EXP_NAME
21
+ # export WANDB_DIR=$EXP_DIR
22
+ # echo $EXP_DIR
23
+
24
+ # mkdir -p $EXP_DIR/wandb
25
+ # rm -rf $EXP_DIR/wandb/*
26
+
27
+ # cd PATH_TO_VLM2VEC_REPO
28
+ # cmd="CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 torchrun --nproc_per_node=8 --master_port=2207 --max_restarts=0 train.py --lora --lora_r 16 --model_name Qwen/Qwen2-VL-2B-Instruct --bf16 --pooling eos --normalize True --temperature 0.02 --dataloader_num_workers 8 --dataset_config experiments/release/train/train_image.yaml --run_name $EXP_NAME --output_dir $EXP_DIR --grad_cache True --per_device_train_batch_size 128 --gc_q_chunk_size 8 --gc_p_chunk_size 8 --interleave_batch_size 64 --lr_scheduler_type linear --learning_rate 5e-5 --max_steps 5000 --warmup_steps 100 --save_steps 50 --logging_steps 1 --save_safetensors True --remove_unused_columns False --resume_from auto --report_to wandb 2>&1 | tee $EXP_DIR/train.log"
29
+
30
+ # echo $cmd
31
+ # eval $cmd
32
+
33
+
34
+ #!/bin/bash
35
+
36
+ # 1. CUDA 动态库路径
37
+ export LD_LIBRARY_PATH=/usr/local/cuda-12.9/targets/x86_64-linux/lib:/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
38
+
39
+ # 2. PATH 添加 conda 环境 bin 路径
40
+ export PATH=/home/v-menggao/miniconda3/envs/VLMtoVec/bin:/home/v-menggao/miniconda3/condabin:$PATH
41
+
42
+ # 打印当前环境信息
43
+ echo "conda location: $(which conda)"
44
+ echo "Python location: $(which python)"
45
+ echo "Python version: $(python --version)"
46
+
47
+ # 3. HuggingFace 缓存路径
48
+ export HF_DATASETS_CACHE=/home/v-menggao/.cache/huggingface/datasets
49
+ export HF_HOME=/home/v-menggao/.cache/huggingface
50
+
51
+ # 4. W&B 设置(需要你自己填 project、API key)
52
+ export WANDB_DISABLED=false
53
+ export WANDB_PROJECT=vlm2vec_layer_prune
54
+ export WANDB_API_KEY=4d73ec74bcbb8dfa92520641573bd6ce93ad829a # 从 https://wandb.ai/settings 获取
55
+ export HUGGING_FACE_HUB_TOKEN=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 # 从 https://huggingface.co/settings/tokens 获取
56
+ export WANDB_RUN_GROUP=baseline_test
57
+
58
+ # 5. 实验名字和目录
59
+ export EXP_NAME=Qwen2vl_2B.image_qry_16_none+cand_16_none_0.1_try
60
+ export WANDB_NAME=$EXP_NAME
61
+ export EXP_DIR=/home/v-menggao/code/VLM2Vec/~/experiments/$EXP_NAME # /mnt/data 确保是有空间的磁盘目录
62
+ export WANDB_DIR=$EXP_DIR
63
+ echo $EXP_DIR
64
+
65
+ mkdir -p $EXP_DIR/wandb
66
+ rm -rf $EXP_DIR/wandb/*
67
+
68
+ # 6. 切换到代码仓库
69
+ cd /home/v-menggao/code/VLM2Vec
70
+
71
+ # 7. 组装训练命令
72
+ cmd="CUDA_VISIBLE_DEVICES=0 torchrun --nproc_per_node=1 --master_port=2207 --max_restarts=0 train_layer_prune.py \
73
+ --lora --lora_r 16 \
74
+ --model_name VLM2Vec/VLM2Vec-V2.0 \
75
+ --dual_layer_idx 16 \
76
+ --dual_alpha 0.1 \
77
+ --bf16 \
78
+ --pooling eos \
79
+ --normalize True \
80
+ --temperature 0.02 \
81
+ --dataloader_num_workers 8 \
82
+ --dataset_config /home/v-menggao/code/VLM2Vec/experiments/public/train/train_image.yaml \
83
+ --run_name $EXP_NAME \
84
+ --output_dir $EXP_DIR \
85
+ --grad_cache True \
86
+ --per_device_train_batch_size 128 \
87
+ --gc_q_chunk_size 8 --gc_p_chunk_size 8 \
88
+ --interleave_batch_size 64 \
89
+ --lr_scheduler_type linear \
90
+ --learning_rate 7e-5 \
91
+ --max_steps 500 --warmup_steps 10 \
92
+ --save_steps 100 --logging_steps 1 \
93
+ --save_safetensors True \
94
+ --remove_unused_columns False \
95
+ --resume_from auto \
96
+ --report_to wandb 2>&1 | tee $EXP_DIR/train.log"
97
+
98
+ echo $cmd
99
+ eval $cmd