keentomato commited on
Commit
5045efe
·
verified ·
1 Parent(s): d6282d6

Upload SFT checkpoint step=4578

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: en
3
+ license: apache-2.0
4
+ tags:
5
+ - human-behavior
6
+ - multimodal
7
+ - qwen2.5-omni
8
+ datasets:
9
+ - keentomato/human_behaviour_atlas
10
+ ---
11
+
12
+ # OmniSapiens SFT
13
+
14
+ Fine-tuned [Qwen2.5-Omni-7B](https://huggingface.co/Qwen/Qwen2.5-Omni-7B) for human behavior understanding.
15
+
16
+ ## Benchmark
17
+ Evaluated on [keentomato/human_behaviour_atlas](https://huggingface.co/datasets/keentomato/human_behaviour_atlas).
added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|AUDIO|>": 151646,
5
+ "<|IMAGE|>": 151655,
6
+ "<|VIDEO|>": 151656,
7
+ "<|audio_bos|>": 151647,
8
+ "<|audio_eos|>": 151648,
9
+ "<|box_end|>": 151649,
10
+ "<|endoftext|>": 151643,
11
+ "<|file_sep|>": 151664,
12
+ "<|fim_middle|>": 151660,
13
+ "<|fim_pad|>": 151662,
14
+ "<|fim_prefix|>": 151659,
15
+ "<|fim_suffix|>": 151661,
16
+ "<|im_end|>": 151645,
17
+ "<|im_start|>": 151644,
18
+ "<|quad_end|>": 151651,
19
+ "<|quad_start|>": 151650,
20
+ "<|repo_name|>": 151663,
21
+ "<|vision_bos|>": 151652,
22
+ "<|vision_eos|>": 151653,
23
+ "<|vision_pad|>": 151654
24
+ }
chat_template.jinja ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {% set audio_count = namespace(value=0) %}{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system
2
+ You are a helpful assistant.<|im_end|>
3
+ {% endif %}<|im_start|>{{ message['role'] }}
4
+ {% if message['content'] is string %}{{ message['content'] }}<|im_end|>
5
+ {% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_bos|><|IMAGE|><|vision_eos|>{% elif content['type'] == 'audio' or 'audio' in content or 'audio_url' in content %}{% set audio_count.value = audio_count.value + 1 %}{% if add_audio_id %}Audio {{ audio_count.value }}: {% endif %}<|audio_bos|><|AUDIO|><|audio_eos|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_bos|><|VIDEO|><|vision_eos|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>
6
+ {% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant
7
+ {% endif %}
config.json ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_attn_implementation_autoset": true,
3
+ "architectures": [
4
+ "Qwen2_5OmniThinkerForConditionalGeneration"
5
+ ],
6
+ "audio_config": {
7
+ "_attn_implementation_autoset": true,
8
+ "activation_dropout": 0.0,
9
+ "activation_function": "gelu",
10
+ "attention_dropout": 0.0,
11
+ "d_model": 1280,
12
+ "dropout": 0.0,
13
+ "encoder_attention_heads": 20,
14
+ "encoder_ffn_dim": 5120,
15
+ "encoder_layerdrop": 0.0,
16
+ "encoder_layers": 32,
17
+ "init_std": 0.02,
18
+ "initializer_range": 0.02,
19
+ "max_source_positions": 1500,
20
+ "model_type": "qwen2_5_omni_audio_encoder",
21
+ "n_window": 100,
22
+ "num_hidden_layers": 32,
23
+ "num_mel_bins": 128,
24
+ "output_dim": 3584,
25
+ "scale_embedding": false,
26
+ "torch_dtype": "float16"
27
+ },
28
+ "audio_end_token_id": 151648,
29
+ "audio_start_token_id": 151647,
30
+ "audio_token_index": 151646,
31
+ "bos_token_id": 151644,
32
+ "eos_token_id": 151645,
33
+ "ignore_index": -100,
34
+ "image_token_index": 151655,
35
+ "init_std": 0.02,
36
+ "initializer_range": 0.02,
37
+ "model_type": "qwen2_5_omni_thinker",
38
+ "pad_token_id": 151643,
39
+ "position_id_per_seconds": 25,
40
+ "seconds_per_chunk": 2,
41
+ "text_config": {
42
+ "attention_dropout": 0.0,
43
+ "hidden_act": "silu",
44
+ "hidden_size": 3584,
45
+ "init_std": 0.02,
46
+ "initializer_range": 0.02,
47
+ "intermediate_size": 18944,
48
+ "layer_types": [
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention",
52
+ "full_attention",
53
+ "full_attention",
54
+ "full_attention",
55
+ "full_attention",
56
+ "full_attention",
57
+ "full_attention",
58
+ "full_attention",
59
+ "full_attention",
60
+ "full_attention",
61
+ "full_attention",
62
+ "full_attention",
63
+ "full_attention",
64
+ "full_attention",
65
+ "full_attention",
66
+ "full_attention",
67
+ "full_attention",
68
+ "full_attention",
69
+ "full_attention",
70
+ "full_attention",
71
+ "full_attention",
72
+ "full_attention",
73
+ "full_attention",
74
+ "full_attention",
75
+ "full_attention",
76
+ "full_attention"
77
+ ],
78
+ "max_position_embeddings": 32768,
79
+ "max_window_layers": 28,
80
+ "model_type": "qwen2_5_omni_text",
81
+ "num_attention_heads": 28,
82
+ "num_hidden_layers": 28,
83
+ "num_key_value_heads": 4,
84
+ "rms_norm_eps": 1e-06,
85
+ "rope_scaling": {
86
+ "mrope_section": [
87
+ 16,
88
+ 24,
89
+ 24
90
+ ],
91
+ "rope_type": "default",
92
+ "type": "default"
93
+ },
94
+ "rope_theta": 1000000.0,
95
+ "sliding_window": null,
96
+ "torch_dtype": "float16",
97
+ "use_cache": true,
98
+ "use_sliding_window": false,
99
+ "vocab_size": 152064
100
+ },
101
+ "torch_dtype": "float16",
102
+ "transformers_version": "4.55.2",
103
+ "user_token_id": 872,
104
+ "video_token_index": 151656,
105
+ "vision_config": {
106
+ "_attn_implementation_autoset": true,
107
+ "depth": 32,
108
+ "embed_dim": 1280,
109
+ "fullatt_block_indexes": [
110
+ 7,
111
+ 15,
112
+ 23,
113
+ 31
114
+ ],
115
+ "hidden_act": "silu",
116
+ "hidden_size": 1280,
117
+ "in_channels": 3,
118
+ "in_chans": 3,
119
+ "init_std": 0.02,
120
+ "initializer_range": 0.02,
121
+ "intermediate_size": 3420,
122
+ "model_type": "qwen2_5_omni_vision_encoder",
123
+ "num_heads": 16,
124
+ "out_hidden_size": 3584,
125
+ "patch_size": 14,
126
+ "spatial_merge_size": 2,
127
+ "spatial_patch_size": 14,
128
+ "temporal_patch_size": 2,
129
+ "tokens_per_second": 25,
130
+ "torch_dtype": "float16",
131
+ "window_size": 112
132
+ },
133
+ "vision_end_token_id": 151653,
134
+ "vision_start_token_id": 151652,
135
+ "vision_token_id": 151654
136
+ }
generation_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "transformers_version": "4.55.2"
4
+ }
heads.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa7ac16e49e13be618de31d2064963f5da7a0c1360d4e105a422b88592d612fe
3
+ size 363217
label_scheme.json ADDED
@@ -0,0 +1,816 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "label_mapping": {
3
+ "chsimsv2_neutral": 3,
4
+ "chsimsv2_strongly negative": 0,
5
+ "chsimsv2_strongly positive": 6,
6
+ "chsimsv2_weakly negative": 2,
7
+ "chsimsv2_weakly positive": 4,
8
+ "cremad_anger": 7,
9
+ "cremad_disgust": 8,
10
+ "cremad_fear": 9,
11
+ "cremad_happy": 10,
12
+ "cremad_neutral": 11,
13
+ "cremad_sad": 13,
14
+ "daicwoz_false": 17,
15
+ "daicwoz_true": 18,
16
+ "meld_emotion_anger": 7,
17
+ "meld_emotion_disgust": 8,
18
+ "meld_emotion_fear": 9,
19
+ "meld_emotion_joy": 10,
20
+ "meld_emotion_neutral": 11,
21
+ "meld_emotion_sadness": 13,
22
+ "meld_emotion_surprise": 14,
23
+ "meld_senti_negative": 1,
24
+ "meld_senti_neutral": 3,
25
+ "meld_senti_positive": 5,
26
+ "mmpsy_anxiety_anxiety": 20,
27
+ "mmpsy_anxiety_no anxiety": 19,
28
+ "mmpsy_depression_depression": 18,
29
+ "mmpsy_depression_no depression": 17,
30
+ "mmsd_false": 21,
31
+ "mmsd_true": 22,
32
+ "mosei_emotion_anger": 7,
33
+ "mosei_emotion_disgust": 8,
34
+ "mosei_emotion_fear": 9,
35
+ "mosei_emotion_happy": 10,
36
+ "mosei_emotion_sad": 13,
37
+ "mosei_emotion_surprise": 14,
38
+ "mosei_senti_highly negative": 0,
39
+ "mosei_senti_highly positive": 6,
40
+ "mosei_senti_negative": 1,
41
+ "mosei_senti_neutral": 3,
42
+ "mosei_senti_positive": 5,
43
+ "mosei_senti_weakly negative": 2,
44
+ "mosei_senti_weakly positive": 4,
45
+ "ptsd_in_the_wild_No PTSD": 15,
46
+ "ptsd_in_the_wild_PTSD": 16,
47
+ "ravdess_angry": 7,
48
+ "ravdess_calm": 12,
49
+ "ravdess_disgust": 8,
50
+ "ravdess_fearful": 9,
51
+ "ravdess_happy": 10,
52
+ "ravdess_neutral": 11,
53
+ "ravdess_sad": 13,
54
+ "ravdess_surprised": 14,
55
+ "tess_angry": 7,
56
+ "tess_disgust": 8,
57
+ "tess_fear": 9,
58
+ "tess_happy": 10,
59
+ "tess_neutral": 11,
60
+ "tess_pleasant surprise": 14,
61
+ "tess_sad": 13,
62
+ "urfunny_false": 23,
63
+ "urfunny_true": 24
64
+ },
65
+ "dataset_labels": {
66
+ "cremad": [
67
+ "anger",
68
+ "disgust",
69
+ "fear",
70
+ "happy",
71
+ "neutral",
72
+ "sad"
73
+ ],
74
+ "ptsd_in_the_wild": [
75
+ "No PTSD",
76
+ "PTSD"
77
+ ],
78
+ "ravdess": [
79
+ "angry",
80
+ "calm",
81
+ "disgust",
82
+ "fearful",
83
+ "happy",
84
+ "neutral",
85
+ "sad",
86
+ "surprised"
87
+ ],
88
+ "mmpsy_depression": [
89
+ "depression",
90
+ "no depression"
91
+ ],
92
+ "mmpsy_anxiety": [
93
+ "anxiety",
94
+ "no anxiety"
95
+ ],
96
+ "tess": [
97
+ "angry",
98
+ "disgust",
99
+ "fear",
100
+ "happy",
101
+ "neutral",
102
+ "pleasant surprise",
103
+ "sad"
104
+ ],
105
+ "chsimsv2": [
106
+ "neutral",
107
+ "strongly negative",
108
+ "strongly positive",
109
+ "weakly negative",
110
+ "weakly positive"
111
+ ],
112
+ "meld_emotion": [
113
+ "anger",
114
+ "disgust",
115
+ "fear",
116
+ "joy",
117
+ "neutral",
118
+ "sadness",
119
+ "surprise"
120
+ ],
121
+ "meld_senti": [
122
+ "negative",
123
+ "neutral",
124
+ "positive"
125
+ ],
126
+ "mmsd": [
127
+ "false",
128
+ "true"
129
+ ],
130
+ "mosei_senti": [
131
+ "highly negative",
132
+ "highly positive",
133
+ "negative",
134
+ "neutral",
135
+ "positive",
136
+ "weakly negative",
137
+ "weakly positive"
138
+ ],
139
+ "mosei_emotion": [
140
+ "anger",
141
+ "disgust",
142
+ "fear",
143
+ "happy",
144
+ "sad",
145
+ "surprise"
146
+ ],
147
+ "urfunny": [
148
+ "false",
149
+ "true"
150
+ ],
151
+ "daicwoz": [
152
+ "false",
153
+ "true"
154
+ ]
155
+ },
156
+ "num_classes": 25,
157
+ "datasets": [
158
+ "chsimsv2",
159
+ "cremad",
160
+ "daicwoz",
161
+ "meld_emotion",
162
+ "meld_senti",
163
+ "mmpsy_anxiety",
164
+ "mmpsy_depression",
165
+ "mmsd",
166
+ "mosei_emotion",
167
+ "mosei_senti",
168
+ "ptsd_in_the_wild",
169
+ "ravdess",
170
+ "tess",
171
+ "urfunny"
172
+ ],
173
+ "meta": {
174
+ "created_at": "2025-09-12T00:47:34.467264Z",
175
+ "domains": {
176
+ "sentiment_intensity": [
177
+ "highly negative",
178
+ "negative",
179
+ "weakly negative",
180
+ "neutral",
181
+ "weakly positive",
182
+ "positive",
183
+ "highly positive"
184
+ ],
185
+ "emotion": [
186
+ "anger",
187
+ "disgust",
188
+ "fear",
189
+ "happy",
190
+ "neutral",
191
+ "calm",
192
+ "sad",
193
+ "surprise"
194
+ ],
195
+ "mental_health_ptsd": [
196
+ "no ptsd",
197
+ "ptsd"
198
+ ],
199
+ "mental_health_depression": [
200
+ "no depression",
201
+ "depression"
202
+ ],
203
+ "mental_health_anxiety": [
204
+ "no anxiety",
205
+ "anxiety"
206
+ ],
207
+ "sarcasm": [
208
+ "not sarcasm",
209
+ "sarcasm"
210
+ ],
211
+ "humour": [
212
+ "not humour",
213
+ "humour"
214
+ ]
215
+ },
216
+ "synonyms": {
217
+ "emotion": {
218
+ "angry": "anger",
219
+ "happiness": "happy",
220
+ "joy": "happy",
221
+ "sadness": "sad",
222
+ "fearful": "fear",
223
+ "surprised": "surprise",
224
+ "pleasant surprise": "surprise"
225
+ },
226
+ "sentiment_intensity": {
227
+ "strongly negative": "highly negative",
228
+ "negative": "negative",
229
+ "weakly negative": "weakly negative",
230
+ "neutral": "neutral",
231
+ "weakly positive": "weakly positive",
232
+ "positive": "positive",
233
+ "strongly positive": "highly positive"
234
+ }
235
+ },
236
+ "dataset_domain": {
237
+ "chsimsv2": "sentiment_intensity",
238
+ "mosei_senti": "sentiment_intensity",
239
+ "meld_senti": "sentiment_intensity",
240
+ "cremad": "emotion",
241
+ "einterface": "emotion",
242
+ "expw": "emotion",
243
+ "meld_emotion": "emotion",
244
+ "mosei_emotion": "emotion",
245
+ "ravdess": "emotion",
246
+ "tess": "emotion",
247
+ "ptsd_in_the_wild": "mental_health_ptsd",
248
+ "mmpsy_depression": "mental_health_depression",
249
+ "mmpsy_anxiety": "mental_health_anxiety",
250
+ "daicwoz": "mental_health_depression",
251
+ "mmsd": "sarcasm",
252
+ "urfunny": "humour"
253
+ },
254
+ "global_classes": {
255
+ "sentiment_intensity": [
256
+ {
257
+ "index": 0,
258
+ "label": "highly negative"
259
+ },
260
+ {
261
+ "index": 1,
262
+ "label": "negative"
263
+ },
264
+ {
265
+ "index": 2,
266
+ "label": "weakly negative"
267
+ },
268
+ {
269
+ "index": 3,
270
+ "label": "neutral"
271
+ },
272
+ {
273
+ "index": 4,
274
+ "label": "weakly positive"
275
+ },
276
+ {
277
+ "index": 5,
278
+ "label": "positive"
279
+ },
280
+ {
281
+ "index": 6,
282
+ "label": "highly positive"
283
+ }
284
+ ],
285
+ "emotion": [
286
+ {
287
+ "index": 7,
288
+ "label": "anger"
289
+ },
290
+ {
291
+ "index": 8,
292
+ "label": "disgust"
293
+ },
294
+ {
295
+ "index": 9,
296
+ "label": "fear"
297
+ },
298
+ {
299
+ "index": 10,
300
+ "label": "happy"
301
+ },
302
+ {
303
+ "index": 11,
304
+ "label": "neutral"
305
+ },
306
+ {
307
+ "index": 12,
308
+ "label": "calm"
309
+ },
310
+ {
311
+ "index": 13,
312
+ "label": "sad"
313
+ },
314
+ {
315
+ "index": 14,
316
+ "label": "surprise"
317
+ }
318
+ ],
319
+ "mental_health_ptsd": [
320
+ {
321
+ "index": 15,
322
+ "label": "no ptsd"
323
+ },
324
+ {
325
+ "index": 16,
326
+ "label": "ptsd"
327
+ }
328
+ ],
329
+ "mental_health_depression": [
330
+ {
331
+ "index": 17,
332
+ "label": "no depression"
333
+ },
334
+ {
335
+ "index": 18,
336
+ "label": "depression"
337
+ }
338
+ ],
339
+ "mental_health_anxiety": [
340
+ {
341
+ "index": 19,
342
+ "label": "no anxiety"
343
+ },
344
+ {
345
+ "index": 20,
346
+ "label": "anxiety"
347
+ }
348
+ ],
349
+ "sarcasm": [
350
+ {
351
+ "index": 21,
352
+ "label": "not sarcasm"
353
+ },
354
+ {
355
+ "index": 22,
356
+ "label": "sarcasm"
357
+ }
358
+ ],
359
+ "humour": [
360
+ {
361
+ "index": 23,
362
+ "label": "not humour"
363
+ },
364
+ {
365
+ "index": 24,
366
+ "label": "humour"
367
+ }
368
+ ]
369
+ },
370
+ "original_label_ids": {
371
+ "chsimsv2_neutral": 0,
372
+ "chsimsv2_strongly negative": 1,
373
+ "chsimsv2_strongly positive": 2,
374
+ "chsimsv2_weakly negative": 3,
375
+ "chsimsv2_weakly positive": 4,
376
+ "cremad_anger": 5,
377
+ "cremad_disgust": 6,
378
+ "cremad_fear": 7,
379
+ "cremad_happy": 8,
380
+ "cremad_neutral": 9,
381
+ "cremad_sad": 10,
382
+ "daicwoz_false": 11,
383
+ "daicwoz_true": 12,
384
+ "meld_emotion_anger": 13,
385
+ "meld_emotion_disgust": 14,
386
+ "meld_emotion_fear": 15,
387
+ "meld_emotion_joy": 16,
388
+ "meld_emotion_neutral": 17,
389
+ "meld_emotion_sadness": 18,
390
+ "meld_emotion_surprise": 19,
391
+ "meld_senti_negative": 20,
392
+ "meld_senti_neutral": 21,
393
+ "meld_senti_positive": 22,
394
+ "mmpsy_anxiety_anxiety": 23,
395
+ "mmpsy_anxiety_no anxiety": 24,
396
+ "mmpsy_depression_depression": 25,
397
+ "mmpsy_depression_no depression": 26,
398
+ "mmsd_false": 27,
399
+ "mmsd_true": 28,
400
+ "mosei_emotion_anger": 29,
401
+ "mosei_emotion_disgust": 30,
402
+ "mosei_emotion_fear": 31,
403
+ "mosei_emotion_happy": 32,
404
+ "mosei_emotion_sad": 33,
405
+ "mosei_emotion_surprise": 34,
406
+ "mosei_senti_highly negative": 35,
407
+ "mosei_senti_highly positive": 36,
408
+ "mosei_senti_negative": 37,
409
+ "mosei_senti_neutral": 38,
410
+ "mosei_senti_positive": 39,
411
+ "mosei_senti_weakly negative": 40,
412
+ "mosei_senti_weakly positive": 41,
413
+ "ptsd_in_the_wild_No PTSD": 42,
414
+ "ptsd_in_the_wild_PTSD": 43,
415
+ "ravdess_angry": 44,
416
+ "ravdess_calm": 45,
417
+ "ravdess_disgust": 46,
418
+ "ravdess_fearful": 47,
419
+ "ravdess_happy": 48,
420
+ "ravdess_neutral": 49,
421
+ "ravdess_sad": 50,
422
+ "ravdess_surprised": 51,
423
+ "tess_angry": 52,
424
+ "tess_disgust": 53,
425
+ "tess_fear": 54,
426
+ "tess_happy": 55,
427
+ "tess_neutral": 56,
428
+ "tess_pleasant surprise": 57,
429
+ "tess_sad": 58,
430
+ "urfunny_false": 59,
431
+ "urfunny_true": 60
432
+ },
433
+ "unified_mapping": {
434
+ "chsimsv2_neutral": {
435
+ "domain": "sentiment_intensity",
436
+ "unified_label": "neutral",
437
+ "global_index": 3,
438
+ "notes": ""
439
+ },
440
+ "chsimsv2_strongly negative": {
441
+ "domain": "sentiment_intensity",
442
+ "unified_label": "highly negative",
443
+ "global_index": 0,
444
+ "notes": ""
445
+ },
446
+ "chsimsv2_strongly positive": {
447
+ "domain": "sentiment_intensity",
448
+ "unified_label": "highly positive",
449
+ "global_index": 6,
450
+ "notes": ""
451
+ },
452
+ "chsimsv2_weakly negative": {
453
+ "domain": "sentiment_intensity",
454
+ "unified_label": "weakly negative",
455
+ "global_index": 2,
456
+ "notes": ""
457
+ },
458
+ "chsimsv2_weakly positive": {
459
+ "domain": "sentiment_intensity",
460
+ "unified_label": "weakly positive",
461
+ "global_index": 4,
462
+ "notes": ""
463
+ },
464
+ "cremad_anger": {
465
+ "domain": "emotion",
466
+ "unified_label": "anger",
467
+ "global_index": 7,
468
+ "notes": ""
469
+ },
470
+ "cremad_disgust": {
471
+ "domain": "emotion",
472
+ "unified_label": "disgust",
473
+ "global_index": 8,
474
+ "notes": ""
475
+ },
476
+ "cremad_fear": {
477
+ "domain": "emotion",
478
+ "unified_label": "fear",
479
+ "global_index": 9,
480
+ "notes": ""
481
+ },
482
+ "cremad_happy": {
483
+ "domain": "emotion",
484
+ "unified_label": "happy",
485
+ "global_index": 10,
486
+ "notes": ""
487
+ },
488
+ "cremad_neutral": {
489
+ "domain": "emotion",
490
+ "unified_label": "neutral",
491
+ "global_index": 11,
492
+ "notes": ""
493
+ },
494
+ "cremad_sad": {
495
+ "domain": "emotion",
496
+ "unified_label": "sad",
497
+ "global_index": 13,
498
+ "notes": ""
499
+ },
500
+ "daicwoz_false": {
501
+ "domain": "mental_health_depression",
502
+ "unified_label": "no depression",
503
+ "global_index": 17,
504
+ "notes": "Depression direct binary (boolean false)"
505
+ },
506
+ "daicwoz_true": {
507
+ "domain": "mental_health_depression",
508
+ "unified_label": "depression",
509
+ "global_index": 18,
510
+ "notes": "Depression direct binary (boolean true)"
511
+ },
512
+ "meld_emotion_anger": {
513
+ "domain": "emotion",
514
+ "unified_label": "anger",
515
+ "global_index": 7,
516
+ "notes": ""
517
+ },
518
+ "meld_emotion_disgust": {
519
+ "domain": "emotion",
520
+ "unified_label": "disgust",
521
+ "global_index": 8,
522
+ "notes": ""
523
+ },
524
+ "meld_emotion_fear": {
525
+ "domain": "emotion",
526
+ "unified_label": "fear",
527
+ "global_index": 9,
528
+ "notes": ""
529
+ },
530
+ "meld_emotion_joy": {
531
+ "domain": "emotion",
532
+ "unified_label": "happy",
533
+ "global_index": 10,
534
+ "notes": ""
535
+ },
536
+ "meld_emotion_neutral": {
537
+ "domain": "emotion",
538
+ "unified_label": "neutral",
539
+ "global_index": 11,
540
+ "notes": ""
541
+ },
542
+ "meld_emotion_sadness": {
543
+ "domain": "emotion",
544
+ "unified_label": "sad",
545
+ "global_index": 13,
546
+ "notes": ""
547
+ },
548
+ "meld_emotion_surprise": {
549
+ "domain": "emotion",
550
+ "unified_label": "surprise",
551
+ "global_index": 14,
552
+ "notes": ""
553
+ },
554
+ "meld_senti_negative": {
555
+ "domain": "sentiment_intensity",
556
+ "unified_label": "negative",
557
+ "global_index": 1,
558
+ "notes": ""
559
+ },
560
+ "meld_senti_neutral": {
561
+ "domain": "sentiment_intensity",
562
+ "unified_label": "neutral",
563
+ "global_index": 3,
564
+ "notes": ""
565
+ },
566
+ "meld_senti_positive": {
567
+ "domain": "sentiment_intensity",
568
+ "unified_label": "positive",
569
+ "global_index": 5,
570
+ "notes": ""
571
+ },
572
+ "mmpsy_anxiety_anxiety": {
573
+ "domain": "mental_health_anxiety",
574
+ "unified_label": "anxiety",
575
+ "global_index": 20,
576
+ "notes": "Anxiety direct binary"
577
+ },
578
+ "mmpsy_anxiety_no anxiety": {
579
+ "domain": "mental_health_anxiety",
580
+ "unified_label": "no anxiety",
581
+ "global_index": 19,
582
+ "notes": "Anxiety direct binary"
583
+ },
584
+ "mmpsy_depression_depression": {
585
+ "domain": "mental_health_depression",
586
+ "unified_label": "depression",
587
+ "global_index": 18,
588
+ "notes": "Depression direct binary"
589
+ },
590
+ "mmpsy_depression_no depression": {
591
+ "domain": "mental_health_depression",
592
+ "unified_label": "no depression",
593
+ "global_index": 17,
594
+ "notes": "Depression direct binary"
595
+ },
596
+ "mmsd_false": {
597
+ "domain": "sarcasm",
598
+ "unified_label": "not sarcasm",
599
+ "global_index": 21,
600
+ "notes": "Sarcasm direct binary (true/false)"
601
+ },
602
+ "mmsd_true": {
603
+ "domain": "sarcasm",
604
+ "unified_label": "sarcasm",
605
+ "global_index": 22,
606
+ "notes": "Sarcasm direct binary (true/false)"
607
+ },
608
+ "mosei_emotion_anger": {
609
+ "domain": "emotion",
610
+ "unified_label": "anger",
611
+ "global_index": 7,
612
+ "notes": ""
613
+ },
614
+ "mosei_emotion_disgust": {
615
+ "domain": "emotion",
616
+ "unified_label": "disgust",
617
+ "global_index": 8,
618
+ "notes": ""
619
+ },
620
+ "mosei_emotion_fear": {
621
+ "domain": "emotion",
622
+ "unified_label": "fear",
623
+ "global_index": 9,
624
+ "notes": ""
625
+ },
626
+ "mosei_emotion_happy": {
627
+ "domain": "emotion",
628
+ "unified_label": "happy",
629
+ "global_index": 10,
630
+ "notes": ""
631
+ },
632
+ "mosei_emotion_sad": {
633
+ "domain": "emotion",
634
+ "unified_label": "sad",
635
+ "global_index": 13,
636
+ "notes": ""
637
+ },
638
+ "mosei_emotion_surprise": {
639
+ "domain": "emotion",
640
+ "unified_label": "surprise",
641
+ "global_index": 14,
642
+ "notes": ""
643
+ },
644
+ "mosei_senti_highly negative": {
645
+ "domain": "sentiment_intensity",
646
+ "unified_label": "highly negative",
647
+ "global_index": 0,
648
+ "notes": ""
649
+ },
650
+ "mosei_senti_highly positive": {
651
+ "domain": "sentiment_intensity",
652
+ "unified_label": "highly positive",
653
+ "global_index": 6,
654
+ "notes": ""
655
+ },
656
+ "mosei_senti_negative": {
657
+ "domain": "sentiment_intensity",
658
+ "unified_label": "negative",
659
+ "global_index": 1,
660
+ "notes": ""
661
+ },
662
+ "mosei_senti_neutral": {
663
+ "domain": "sentiment_intensity",
664
+ "unified_label": "neutral",
665
+ "global_index": 3,
666
+ "notes": ""
667
+ },
668
+ "mosei_senti_positive": {
669
+ "domain": "sentiment_intensity",
670
+ "unified_label": "positive",
671
+ "global_index": 5,
672
+ "notes": ""
673
+ },
674
+ "mosei_senti_weakly negative": {
675
+ "domain": "sentiment_intensity",
676
+ "unified_label": "weakly negative",
677
+ "global_index": 2,
678
+ "notes": ""
679
+ },
680
+ "mosei_senti_weakly positive": {
681
+ "domain": "sentiment_intensity",
682
+ "unified_label": "weakly positive",
683
+ "global_index": 4,
684
+ "notes": ""
685
+ },
686
+ "ptsd_in_the_wild_No PTSD": {
687
+ "domain": "mental_health_ptsd",
688
+ "unified_label": "no ptsd",
689
+ "global_index": 15,
690
+ "notes": "PTSD direct binary"
691
+ },
692
+ "ptsd_in_the_wild_PTSD": {
693
+ "domain": "mental_health_ptsd",
694
+ "unified_label": "ptsd",
695
+ "global_index": 16,
696
+ "notes": "PTSD direct binary"
697
+ },
698
+ "ravdess_angry": {
699
+ "domain": "emotion",
700
+ "unified_label": "anger",
701
+ "global_index": 7,
702
+ "notes": ""
703
+ },
704
+ "ravdess_calm": {
705
+ "domain": "emotion",
706
+ "unified_label": "calm",
707
+ "global_index": 12,
708
+ "notes": ""
709
+ },
710
+ "ravdess_disgust": {
711
+ "domain": "emotion",
712
+ "unified_label": "disgust",
713
+ "global_index": 8,
714
+ "notes": ""
715
+ },
716
+ "ravdess_fearful": {
717
+ "domain": "emotion",
718
+ "unified_label": "fear",
719
+ "global_index": 9,
720
+ "notes": ""
721
+ },
722
+ "ravdess_happy": {
723
+ "domain": "emotion",
724
+ "unified_label": "happy",
725
+ "global_index": 10,
726
+ "notes": ""
727
+ },
728
+ "ravdess_neutral": {
729
+ "domain": "emotion",
730
+ "unified_label": "neutral",
731
+ "global_index": 11,
732
+ "notes": ""
733
+ },
734
+ "ravdess_sad": {
735
+ "domain": "emotion",
736
+ "unified_label": "sad",
737
+ "global_index": 13,
738
+ "notes": ""
739
+ },
740
+ "ravdess_surprised": {
741
+ "domain": "emotion",
742
+ "unified_label": "surprise",
743
+ "global_index": 14,
744
+ "notes": ""
745
+ },
746
+ "tess_angry": {
747
+ "domain": "emotion",
748
+ "unified_label": "anger",
749
+ "global_index": 7,
750
+ "notes": ""
751
+ },
752
+ "tess_disgust": {
753
+ "domain": "emotion",
754
+ "unified_label": "disgust",
755
+ "global_index": 8,
756
+ "notes": ""
757
+ },
758
+ "tess_fear": {
759
+ "domain": "emotion",
760
+ "unified_label": "fear",
761
+ "global_index": 9,
762
+ "notes": ""
763
+ },
764
+ "tess_happy": {
765
+ "domain": "emotion",
766
+ "unified_label": "happy",
767
+ "global_index": 10,
768
+ "notes": ""
769
+ },
770
+ "tess_neutral": {
771
+ "domain": "emotion",
772
+ "unified_label": "neutral",
773
+ "global_index": 11,
774
+ "notes": ""
775
+ },
776
+ "tess_pleasant surprise": {
777
+ "domain": "emotion",
778
+ "unified_label": "surprise",
779
+ "global_index": 14,
780
+ "notes": ""
781
+ },
782
+ "tess_sad": {
783
+ "domain": "emotion",
784
+ "unified_label": "sad",
785
+ "global_index": 13,
786
+ "notes": ""
787
+ },
788
+ "urfunny_false": {
789
+ "domain": "humour",
790
+ "unified_label": "not humour",
791
+ "global_index": 23,
792
+ "notes": "Humour direct binary (true/false)"
793
+ },
794
+ "urfunny_true": {
795
+ "domain": "humour",
796
+ "unified_label": "humour",
797
+ "global_index": 24,
798
+ "notes": "Humour direct binary (true/false)"
799
+ }
800
+ },
801
+ "mental_health_notes": [
802
+ "Mental health split into PTSD/Depression/Anxiety sub-domains with binary labels only.",
803
+ "Routing is purely dataset-based (mmpsy_* and ptsd_in_the_wild)."
804
+ ],
805
+ "binary_domain_notes": {
806
+ "sarcasm_binary": {
807
+ "mmsd_false": "not sarcasm",
808
+ "mmsd_true": "sarcasm"
809
+ },
810
+ "humour_binary": {
811
+ "urfunny_false": "not humour",
812
+ "urfunny_true": "humour"
813
+ }
814
+ }
815
+ }
816
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model-00001-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b23263d80740203786a9b9848018cf9a184b831799fe63dba2f9cbd42506419
3
+ size 4985046168
model-00002-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aafa0787d313f3c71830aa2c02b6499847212a16b72851568bb83da69a801dba
3
+ size 4991495656
model-00003-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cc320a70463d10173a163ab6bfd8edc93a998721e9cf22884cffa50ad91d70c
3
+ size 4991495752
model-00004-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5231589069ec54f709b610748fec8c066f5450badf7124e0b35eb9966f1c015a
3
+ size 2895739680
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
preprocessor_config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "chunk_length": 300,
3
+ "dither": 0.0,
4
+ "feature_extractor_type": "WhisperFeatureExtractor",
5
+ "feature_size": 128,
6
+ "hop_length": 160,
7
+ "image_mean": [
8
+ 0.48145466,
9
+ 0.4578275,
10
+ 0.40821073
11
+ ],
12
+ "image_processor_type": "Qwen2VLImageProcessor",
13
+ "image_std": [
14
+ 0.26862954,
15
+ 0.26130258,
16
+ 0.27577711
17
+ ],
18
+ "max_pixels": 12845056,
19
+ "merge_size": 2,
20
+ "min_pixels": 3136,
21
+ "n_fft": 400,
22
+ "n_samples": 4800000,
23
+ "nb_max_frames": 30000,
24
+ "padding_side": "right",
25
+ "padding_value": 0.0,
26
+ "patch_size": 14,
27
+ "processor_class": "Qwen2_5OmniProcessor",
28
+ "return_attention_mask": true,
29
+ "sampling_rate": 16000,
30
+ "temporal_patch_size": 2
31
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|AUDIO|>",
6
+ "<|audio_bos|>",
7
+ "<|audio_eos|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_bos|>",
12
+ "<|vision_eos|>",
13
+ "<|vision_pad|>",
14
+ "<|IMAGE|>",
15
+ "<|VIDEO|>"
16
+ ],
17
+ "audio_bos_token": "<|audio_bos|>",
18
+ "audio_eos_token": "<|audio_eos|>",
19
+ "audio_token": "<|AUDIO|>",
20
+ "eos_token": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ },
27
+ "image_token": "<|IMAGE|>",
28
+ "pad_token": {
29
+ "content": "<|endoftext|>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false
34
+ },
35
+ "video_token": "<|VIDEO|>",
36
+ "vision_bos_token": "<|vision_bos|>",
37
+ "vision_eos_token": "<|vision_eos|>"
38
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8441917e39ae0244e06d704b95b3124795cec478e297f9afac39ba670d7e9d99
3
+ size 11421870
tokenizer_config.json ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "151643": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "151644": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "151645": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "151646": {
29
+ "content": "<|AUDIO|>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "151647": {
37
+ "content": "<|audio_bos|>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "151648": {
45
+ "content": "<|audio_eos|>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "151649": {
53
+ "content": "<|box_end|>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "151650": {
61
+ "content": "<|quad_start|>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "151651": {
69
+ "content": "<|quad_end|>",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "151652": {
77
+ "content": "<|vision_bos|>",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "151653": {
85
+ "content": "<|vision_eos|>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "151654": {
93
+ "content": "<|vision_pad|>",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "151655": {
101
+ "content": "<|IMAGE|>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "151656": {
109
+ "content": "<|VIDEO|>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": true
115
+ },
116
+ "151657": {
117
+ "content": "<tool_call>",
118
+ "lstrip": false,
119
+ "normalized": false,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": false
123
+ },
124
+ "151658": {
125
+ "content": "</tool_call>",
126
+ "lstrip": false,
127
+ "normalized": false,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": false
131
+ },
132
+ "151659": {
133
+ "content": "<|fim_prefix|>",
134
+ "lstrip": false,
135
+ "normalized": false,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": false
139
+ },
140
+ "151660": {
141
+ "content": "<|fim_middle|>",
142
+ "lstrip": false,
143
+ "normalized": false,
144
+ "rstrip": false,
145
+ "single_word": false,
146
+ "special": false
147
+ },
148
+ "151661": {
149
+ "content": "<|fim_suffix|>",
150
+ "lstrip": false,
151
+ "normalized": false,
152
+ "rstrip": false,
153
+ "single_word": false,
154
+ "special": false
155
+ },
156
+ "151662": {
157
+ "content": "<|fim_pad|>",
158
+ "lstrip": false,
159
+ "normalized": false,
160
+ "rstrip": false,
161
+ "single_word": false,
162
+ "special": false
163
+ },
164
+ "151663": {
165
+ "content": "<|repo_name|>",
166
+ "lstrip": false,
167
+ "normalized": false,
168
+ "rstrip": false,
169
+ "single_word": false,
170
+ "special": false
171
+ },
172
+ "151664": {
173
+ "content": "<|file_sep|>",
174
+ "lstrip": false,
175
+ "normalized": false,
176
+ "rstrip": false,
177
+ "single_word": false,
178
+ "special": false
179
+ }
180
+ },
181
+ "additional_special_tokens": [
182
+ "<|im_start|>",
183
+ "<|im_end|>",
184
+ "<|AUDIO|>",
185
+ "<|audio_bos|>",
186
+ "<|audio_eos|>",
187
+ "<|box_end|>",
188
+ "<|quad_start|>",
189
+ "<|quad_end|>",
190
+ "<|vision_bos|>",
191
+ "<|vision_eos|>",
192
+ "<|vision_pad|>",
193
+ "<|IMAGE|>",
194
+ "<|VIDEO|>"
195
+ ],
196
+ "audio_bos_token": "<|audio_bos|>",
197
+ "audio_eos_token": "<|audio_eos|>",
198
+ "audio_token": "<|AUDIO|>",
199
+ "bos_token": null,
200
+ "clean_up_tokenization_spaces": false,
201
+ "eos_token": "<|im_end|>",
202
+ "errors": "replace",
203
+ "extra_special_tokens": {
204
+ "audio_bos_token": "<|audio_bos|>",
205
+ "audio_eos_token": "<|audio_eos|>",
206
+ "audio_token": "<|AUDIO|>",
207
+ "image_token": "<|IMAGE|>",
208
+ "video_token": "<|VIDEO|>",
209
+ "vision_bos_token": "<|vision_bos|>",
210
+ "vision_eos_token": "<|vision_eos|>"
211
+ },
212
+ "image_token": "<|IMAGE|>",
213
+ "model_max_length": 32768,
214
+ "pad_token": "<|endoftext|>",
215
+ "processor_class": "Qwen2_5OmniProcessor",
216
+ "split_special_tokens": false,
217
+ "tokenizer_class": "Qwen2Tokenizer",
218
+ "unk_token": null,
219
+ "video_token": "<|VIDEO|>",
220
+ "vision_bos_token": "<|vision_bos|>",
221
+ "vision_eos_token": "<|vision_eos|>"
222
+ }
training_meta.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0,
3
+ "global_step": 4578,
4
+ "len_train_dataloader": 4578,
5
+ "training_strategy": "lora",
6
+ "saved_at_unix": 1758202923.0941703
7
+ }
video_preprocessor_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "chunk_length": 300,
3
+ "crop_size": null,
4
+ "data_format": "channels_first",
5
+ "default_to_square": true,
6
+ "device": null,
7
+ "dither": 0.0,
8
+ "do_center_crop": null,
9
+ "do_convert_rgb": true,
10
+ "do_normalize": true,
11
+ "do_pad": null,
12
+ "do_rescale": true,
13
+ "do_resize": true,
14
+ "do_sample_frames": false,
15
+ "feature_extractor_type": "WhisperFeatureExtractor",
16
+ "feature_size": 128,
17
+ "fps": null,
18
+ "hop_length": 160,
19
+ "image_mean": [
20
+ 0.48145466,
21
+ 0.4578275,
22
+ 0.40821073
23
+ ],
24
+ "image_std": [
25
+ 0.26862954,
26
+ 0.26130258,
27
+ 0.27577711
28
+ ],
29
+ "input_data_format": null,
30
+ "max_frames": 768,
31
+ "max_pixels": 12845056,
32
+ "merge_size": 2,
33
+ "min_frames": 4,
34
+ "min_pixels": 3136,
35
+ "n_fft": 400,
36
+ "n_samples": 4800000,
37
+ "nb_max_frames": 30000,
38
+ "num_frames": null,
39
+ "padding_side": "right",
40
+ "padding_value": 0.0,
41
+ "patch_size": 14,
42
+ "processor_class": "Qwen2_5OmniProcessor",
43
+ "resample": 3,
44
+ "rescale_factor": 0.00392156862745098,
45
+ "return_attention_mask": true,
46
+ "sampling_rate": 16000,
47
+ "size": {
48
+ "longest_edge": 12845056,
49
+ "shortest_edge": 3136
50
+ },
51
+ "size_divisor": null,
52
+ "temporal_patch_size": 2,
53
+ "video_metadata": null,
54
+ "video_processor_type": "Qwen2VLVideoProcessor"
55
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff