owaski commited on
Commit
a589c99
·
verified ·
1 Parent(s): 4ec7a4f

Upload folder using huggingface_hub

Browse files
args.json CHANGED
@@ -39,7 +39,7 @@
39
  "response_prefix": null,
40
  "template_backend": "swift",
41
  "dataset": [
42
- "/data/group_data/li_lab/siqiouya/datasets/gigaspeech/manifests/train_s_zh_v4_ner_baseline_aligned_rate1.0_k20_final.jsonl"
43
  ],
44
  "val_dataset": [],
45
  "split_dataset_ratio": 0.01,
@@ -164,7 +164,7 @@
164
  "deterministic_mode": false,
165
  "train_iters": null,
166
  "log_interval": 10,
167
- "tensorboard_dir": "/data/user_data/siqiouya/ckpts/infinisst-omni/gigaspeech-zh-s_v4_ner_baseline_aligned_rate1.0_k20_final-bsz4/v1-20260122-010007/runs",
168
  "no_masked_softmax_fusion": false,
169
  "no_bias_dropout_fusion": false,
170
  "no_bias_swiglu_fusion": false,
@@ -198,7 +198,7 @@
198
  "adam_beta2": 0.95,
199
  "adam_eps": 1e-08,
200
  "sgd_momentum": 0.9,
201
- "save": "/data/user_data/siqiouya/ckpts/infinisst-omni/gigaspeech-zh-s_v4_ner_baseline_aligned_rate1.0_k20_final-bsz4/v1-20260122-010007",
202
  "save_interval": 200,
203
  "no_save_optim": true,
204
  "no_save_rng": true,
@@ -316,10 +316,10 @@
316
  "local_world_size": 4,
317
  "model_suffix": "Qwen3-Omni-30B-A3B-Instruct",
318
  "model_info": "ModelInfo(model_type='qwen3_omni', model_dir='/data/user_data/siqiouya/ckpts/pretrained/llm/Qwen3-Omni-30B-A3B-Instruct', torch_dtype=torch.bfloat16, max_model_len=65536, quant_method=None, quant_bits=None, rope_scaling={'interleaved': True, 'mrope_section': [24, 20, 20], 'rope_type': 'default', 'type': 'default'}, is_moe_model=True, config=None, task_type='causal_lm', num_labels=None)",
319
- "model_meta": "ModelMeta(model_type='qwen3_omni', model_groups=[ModelGroup(models=[Model(ms_model_id='Qwen/Qwen3-Omni-30B-A3B-Instruct', hf_model_id='Qwen/Qwen3-Omni-30B-A3B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-Omni-30B-A3B-Thinking', hf_model_id='Qwen/Qwen3-Omni-30B-A3B-Thinking', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-Omni-30B-A3B-Captioner', hf_model_id='Qwen/Qwen3-Omni-30B-A3B-Captioner', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='qwen3_omni', get_function=<function get_model_tokenizer_qwen3_omni at 0x7f5bbce56a20>, model_arch=MultiModelKeys(arch_name='qwen3_omni', embedding=None, module_list=None, lm_head=None, q_proj=None, k_proj=None, v_proj=None, o_proj=None, attention=None, mlp=None, down_proj=None, qkv_proj=None, qk_proj=None, qa_proj=None, qb_proj=None, kv_proj=None, kva_proj=None, kvb_proj=None, language_model=['thinker.model'], aligner=['thinker.audio_tower.proj1', 'thinker.audio_tower.proj2', 'thinker.visual.merger', 'thinker.visual.merger_list'], vision_tower=['thinker.audio_tower', 'thinker.visual'], generator=['talker', 'token2wav']), architectures=['Qwen3OmniMoeForConditionalGeneration'], additional_saved_files=[], torch_dtype=None, is_multimodal=True, is_reward=False, task_type=None, ignore_patterns=None, requires=['transformers>=4.57.dev0', 'soundfile', 'decord', 'qwen_omni_utils'], tags=['vision', 'video', 'audio'])",
320
  "model_dir": "/data/user_data/siqiouya/ckpts/pretrained/llm/Qwen3-Omni-30B-A3B-Instruct",
321
  "hub": "<class 'swift.hub.hub.MSHub'>",
322
- "megatron_model_meta": "MMGPTMegatronModelMeta(megatron_model_type='qwen3_omni', model_types=['qwen3_omni'], convert_mcore2hf=<function convert_mcore2hf_qwen3_omni at 0x7f5b25ee9300>, convert_hf2mcore=<function convert_hf2mcore_qwen3_omni at 0x7f5b25ee9260>, model_cls=<class 'swift.megatron.model.mm_gpt.qwen3_vl.Qwen3VLGPTModel'>, convert_hf_config=<function convert_gpt_hf_config at 0x7f5b25eb19e0>, get_transformer_layer_spec=None, model_provider=<function model_provider at 0x7f5b25eb07c0>, visual_cls=<class 'swift.megatron.model.mm_gpt.qwen3_vl.Qwen3Omni_Vit'>, extra_args_provider=None)",
323
  "extra_args": {
324
  "model": "/data/user_data/siqiouya/ckpts/pretrained/llm/Qwen3-Omni-30B-A3B-Instruct/",
325
  "model_type": "qwen3_omni",
@@ -360,7 +360,7 @@
360
  "response_prefix": null,
361
  "template_backend": "swift",
362
  "dataset": [
363
- "/data/group_data/li_lab/siqiouya/datasets/gigaspeech/manifests/train_s_zh_v4_ner_baseline_aligned_rate1.0_k20_final.jsonl"
364
  ],
365
  "val_dataset": [],
366
  "split_dataset_ratio": 0.01,
@@ -474,7 +474,7 @@
474
  "mrope_interleaved": true,
475
  "add_version": true,
476
  "model_info": "ModelInfo(model_type='qwen3_omni', model_dir='/data/user_data/siqiouya/ckpts/pretrained/llm/Qwen3-Omni-30B-A3B-Instruct', torch_dtype=torch.bfloat16, max_model_len=65536, quant_method=None, quant_bits=None, rope_scaling={'interleaved': True, 'mrope_section': [24, 20, 20], 'rope_type': 'default', 'type': 'default'}, is_moe_model=True, config=None, task_type='causal_lm', num_labels=None)",
477
- "model_meta": "ModelMeta(model_type='qwen3_omni', model_groups=[ModelGroup(models=[Model(ms_model_id='Qwen/Qwen3-Omni-30B-A3B-Instruct', hf_model_id='Qwen/Qwen3-Omni-30B-A3B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-Omni-30B-A3B-Thinking', hf_model_id='Qwen/Qwen3-Omni-30B-A3B-Thinking', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-Omni-30B-A3B-Captioner', hf_model_id='Qwen/Qwen3-Omni-30B-A3B-Captioner', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='qwen3_omni', get_function=<function get_model_tokenizer_qwen3_omni at 0x7f5bbce56a20>, model_arch=MultiModelKeys(arch_name='qwen3_omni', embedding=None, module_list=None, lm_head=None, q_proj=None, k_proj=None, v_proj=None, o_proj=None, attention=None, mlp=None, down_proj=None, qkv_proj=None, qk_proj=None, qa_proj=None, qb_proj=None, kv_proj=None, kva_proj=None, kvb_proj=None, language_model=['thinker.model'], aligner=['thinker.audio_tower.proj1', 'thinker.audio_tower.proj2', 'thinker.visual.merger', 'thinker.visual.merger_list'], vision_tower=['thinker.audio_tower', 'thinker.visual'], generator=['talker', 'token2wav']), architectures=['Qwen3OmniMoeForConditionalGeneration'], additional_saved_files=[], torch_dtype=None, is_multimodal=True, is_reward=False, task_type=None, ignore_patterns=None, requires=['transformers>=4.57.dev0', 'soundfile', 'decord', 'qwen_omni_utils'], tags=['vision', 'video', 'audio'])",
478
- "megatron_model_meta": "MMGPTMegatronModelMeta(megatron_model_type='qwen3_omni', model_types=['qwen3_omni'], convert_mcore2hf=<function convert_mcore2hf_qwen3_omni at 0x7f5b25ee9300>, convert_hf2mcore=<function convert_hf2mcore_qwen3_omni at 0x7f5b25ee9260>, model_cls=<class 'swift.megatron.model.mm_gpt.qwen3_vl.Qwen3VLGPTModel'>, convert_hf_config=<function convert_gpt_hf_config at 0x7f5b25eb19e0>, get_transformer_layer_spec=None, model_provider=<function model_provider at 0x7f5b25eb07c0>, visual_cls=<class 'swift.megatron.model.mm_gpt.qwen3_vl.Qwen3Omni_Vit'>, extra_args_provider=None)"
479
  }
480
  }
 
39
  "response_prefix": null,
40
  "template_backend": "swift",
41
  "dataset": [
42
+ "/data/group_data/li_lab/siqiouya/datasets/gigaspeech/manifests_rag/train_s_zh_v4_ner_baseline_aligned_rate1.0_k20_final.jsonl"
43
  ],
44
  "val_dataset": [],
45
  "split_dataset_ratio": 0.01,
 
164
  "deterministic_mode": false,
165
  "train_iters": null,
166
  "log_interval": 10,
167
+ "tensorboard_dir": "/data/user_data/siqiouya/ckpts/infinisst-omni/gigaspeech-zh-s_v4_ner_baseline_aligned_rate1.0_k20_final-bsz4/v3-20260122-141104/runs",
168
  "no_masked_softmax_fusion": false,
169
  "no_bias_dropout_fusion": false,
170
  "no_bias_swiglu_fusion": false,
 
198
  "adam_beta2": 0.95,
199
  "adam_eps": 1e-08,
200
  "sgd_momentum": 0.9,
201
+ "save": "/data/user_data/siqiouya/ckpts/infinisst-omni/gigaspeech-zh-s_v4_ner_baseline_aligned_rate1.0_k20_final-bsz4/v3-20260122-141104",
202
  "save_interval": 200,
203
  "no_save_optim": true,
204
  "no_save_rng": true,
 
316
  "local_world_size": 4,
317
  "model_suffix": "Qwen3-Omni-30B-A3B-Instruct",
318
  "model_info": "ModelInfo(model_type='qwen3_omni', model_dir='/data/user_data/siqiouya/ckpts/pretrained/llm/Qwen3-Omni-30B-A3B-Instruct', torch_dtype=torch.bfloat16, max_model_len=65536, quant_method=None, quant_bits=None, rope_scaling={'interleaved': True, 'mrope_section': [24, 20, 20], 'rope_type': 'default', 'type': 'default'}, is_moe_model=True, config=None, task_type='causal_lm', num_labels=None)",
319
+ "model_meta": "ModelMeta(model_type='qwen3_omni', model_groups=[ModelGroup(models=[Model(ms_model_id='Qwen/Qwen3-Omni-30B-A3B-Instruct', hf_model_id='Qwen/Qwen3-Omni-30B-A3B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-Omni-30B-A3B-Thinking', hf_model_id='Qwen/Qwen3-Omni-30B-A3B-Thinking', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-Omni-30B-A3B-Captioner', hf_model_id='Qwen/Qwen3-Omni-30B-A3B-Captioner', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='qwen3_omni', get_function=<function get_model_tokenizer_qwen3_omni at 0x7fb1558e2a20>, model_arch=MultiModelKeys(arch_name='qwen3_omni', embedding=None, module_list=None, lm_head=None, q_proj=None, k_proj=None, v_proj=None, o_proj=None, attention=None, mlp=None, down_proj=None, qkv_proj=None, qk_proj=None, qa_proj=None, qb_proj=None, kv_proj=None, kva_proj=None, kvb_proj=None, language_model=['thinker.model'], aligner=['thinker.audio_tower.proj1', 'thinker.audio_tower.proj2', 'thinker.visual.merger', 'thinker.visual.merger_list'], vision_tower=['thinker.audio_tower', 'thinker.visual'], generator=['talker', 'token2wav']), architectures=['Qwen3OmniMoeForConditionalGeneration'], additional_saved_files=[], torch_dtype=None, is_multimodal=True, is_reward=False, task_type=None, ignore_patterns=None, requires=['transformers>=4.57.dev0', 'soundfile', 'decord', 'qwen_omni_utils'], tags=['vision', 'video', 'audio'])",
320
  "model_dir": "/data/user_data/siqiouya/ckpts/pretrained/llm/Qwen3-Omni-30B-A3B-Instruct",
321
  "hub": "<class 'swift.hub.hub.MSHub'>",
322
+ "megatron_model_meta": "MMGPTMegatronModelMeta(megatron_model_type='qwen3_omni', model_types=['qwen3_omni'], convert_mcore2hf=<function convert_mcore2hf_qwen3_omni at 0x7fb0be86d300>, convert_hf2mcore=<function convert_hf2mcore_qwen3_omni at 0x7fb0be86d260>, model_cls=<class 'swift.megatron.model.mm_gpt.qwen3_vl.Qwen3VLGPTModel'>, convert_hf_config=<function convert_gpt_hf_config at 0x7fb0be8359e0>, get_transformer_layer_spec=None, model_provider=<function model_provider at 0x7fb0be8347c0>, visual_cls=<class 'swift.megatron.model.mm_gpt.qwen3_vl.Qwen3Omni_Vit'>, extra_args_provider=None)",
323
  "extra_args": {
324
  "model": "/data/user_data/siqiouya/ckpts/pretrained/llm/Qwen3-Omni-30B-A3B-Instruct/",
325
  "model_type": "qwen3_omni",
 
360
  "response_prefix": null,
361
  "template_backend": "swift",
362
  "dataset": [
363
+ "/data/group_data/li_lab/siqiouya/datasets/gigaspeech/manifests_rag/train_s_zh_v4_ner_baseline_aligned_rate1.0_k20_final.jsonl"
364
  ],
365
  "val_dataset": [],
366
  "split_dataset_ratio": 0.01,
 
474
  "mrope_interleaved": true,
475
  "add_version": true,
476
  "model_info": "ModelInfo(model_type='qwen3_omni', model_dir='/data/user_data/siqiouya/ckpts/pretrained/llm/Qwen3-Omni-30B-A3B-Instruct', torch_dtype=torch.bfloat16, max_model_len=65536, quant_method=None, quant_bits=None, rope_scaling={'interleaved': True, 'mrope_section': [24, 20, 20], 'rope_type': 'default', 'type': 'default'}, is_moe_model=True, config=None, task_type='causal_lm', num_labels=None)",
477
+ "model_meta": "ModelMeta(model_type='qwen3_omni', model_groups=[ModelGroup(models=[Model(ms_model_id='Qwen/Qwen3-Omni-30B-A3B-Instruct', hf_model_id='Qwen/Qwen3-Omni-30B-A3B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-Omni-30B-A3B-Thinking', hf_model_id='Qwen/Qwen3-Omni-30B-A3B-Thinking', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-Omni-30B-A3B-Captioner', hf_model_id='Qwen/Qwen3-Omni-30B-A3B-Captioner', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='qwen3_omni', get_function=<function get_model_tokenizer_qwen3_omni at 0x7fb1558e2a20>, model_arch=MultiModelKeys(arch_name='qwen3_omni', embedding=None, module_list=None, lm_head=None, q_proj=None, k_proj=None, v_proj=None, o_proj=None, attention=None, mlp=None, down_proj=None, qkv_proj=None, qk_proj=None, qa_proj=None, qb_proj=None, kv_proj=None, kva_proj=None, kvb_proj=None, language_model=['thinker.model'], aligner=['thinker.audio_tower.proj1', 'thinker.audio_tower.proj2', 'thinker.visual.merger', 'thinker.visual.merger_list'], vision_tower=['thinker.audio_tower', 'thinker.visual'], generator=['talker', 'token2wav']), architectures=['Qwen3OmniMoeForConditionalGeneration'], additional_saved_files=[], torch_dtype=None, is_multimodal=True, is_reward=False, task_type=None, ignore_patterns=None, requires=['transformers>=4.57.dev0', 'soundfile', 'decord', 'qwen_omni_utils'], tags=['vision', 'video', 'audio'])",
478
+ "megatron_model_meta": "MMGPTMegatronModelMeta(megatron_model_type='qwen3_omni', model_types=['qwen3_omni'], convert_mcore2hf=<function convert_mcore2hf_qwen3_omni at 0x7fb0be86d300>, convert_hf2mcore=<function convert_hf2mcore_qwen3_omni at 0x7fb0be86d260>, model_cls=<class 'swift.megatron.model.mm_gpt.qwen3_vl.Qwen3VLGPTModel'>, convert_hf_config=<function convert_gpt_hf_config at 0x7fb0be8359e0>, get_transformer_layer_spec=None, model_provider=<function model_provider at 0x7fb0be8347c0>, visual_cls=<class 'swift.megatron.model.mm_gpt.qwen3_vl.Qwen3Omni_Vit'>, extra_args_provider=None)"
479
  }
480
  }
model-00001-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3bbefbaabaa42d520d19b3441ae2cd4349b27fa660523d218d5e4759c8036fbb
3
  size 4997899632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf8376532d4b18ce804e7b8fb357bbfcccc857d2eabfb559c41db56463a4daf8
3
  size 4997899632
model-00002-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3bf5e7353b85a3eb61b9008d11743615c94820a68590d6db3ea6da10ec822c1
3
  size 4997754216
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1bdc404ace6c7c761ad9881a2948adf82326d06c069a4d040fede08c8a4f24d
3
  size 4997754216
model-00003-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e9cee3681be29718aa19efb70a8f5a7fb17a009386e6686adebcb890629e2a5
3
  size 4997754216
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb0a493fa120430b2ef9e737841b67202181e9fd9817ab7bc13e7ceb21fce33a
3
  size 4997754216
model-00004-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f43c34e34c675a4736b12e29e1d9b9f0eb66e75e5fefbe189822739404aaf5ef
3
  size 4997755648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a94a1e396fbbc6234c9d582f1589b7f5fa5b64dfc69eb3fa305c8d4d9c4514c4
3
  size 4997755648
model-00005-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79a07b08e093ac0266f4e0b899ef2f2dbeea9e0f9cd831c27db9881fcc2de1b7
3
  size 4997755792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c61a7c8e4505596c3a8e4909dcb13be416e47218c65909d2d3a94181017766a4
3
  size 4997755792
model-00006-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa61b5f72af42b0a8465620b2aa3c4683e6102c416f6fd957ec072db4f75a6d8
3
  size 4997755792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2be80c74fffdf7aa511d249296a34322f429d7fcb53726003499bf75f496366a
3
  size 4997755792
model-00007-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d725785427572acaad9c0946173857b6fcb4316a78dcd7e37f3743d433288fbb
3
  size 4997755792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cdc948a78324f22c94fc55e7b0bf41b62732e7021eff370d31480f220b8a41b
3
  size 4997755792
model-00008-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c71416d5a560b00e61de786e81353eb1f6457dd4fd60698a0128114fed6f5ff
3
  size 4997755792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce728acf33f5c6f05058a8af02d24142c2487ee4ca0525d0599aa20df7e0d5c6
3
  size 4997755792
model-00009-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59534c45f1b39a4ee16b12db7e5a955fd2d02491f40cc404eec4b84b218e7b12
3
  size 4997755792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:840db579c9c61e945789e2d4749ad2ac94b2065331692e97dd7a8dfa4f8034f4
3
  size 4997755792
model-00010-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7762cf8c4e905681d8dcdfccc6b37a5d7a8bfbdc9732ac96d5313a2dcfdf96a9
3
  size 4997755792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91aa124446c12071b2c69fb448dbde64182f638480140d7e4dd605f54468d3d5
3
  size 4997755792
model-00011-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:107a23b8c358dc611544486ed8981d1f1035480e67ebfcefedd69cfbdc459298
3
  size 4997755792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fa8e6c3c07d6392a0145e5ea0e4d322feb0a10a2fb4f4c531f55491b1d376f0
3
  size 4997755792
model-00012-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef845c191019fc58fa30d5a6b26c95782059421c4d56e0d589263c98f414dd35
3
  size 4997755792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:370ecd374aea7da98614862b04e035badc074c092ca73caf0471b12ad1591aff
3
  size 4997755792
model-00013-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:361f0e17e531d9350f9ddcaa834a2343e43140e4db6d6d8a158d6d0e07b1f343
3
  size 4999771808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:175d0b41426e60a92b5581b03349a07b69518f5fd87a3fe70b30980d275364b6
3
  size 4999771808