Upload folder using huggingface_hub
Browse files- args.json +12 -16
- model-00001-of-00015.safetensors +1 -1
- model-00002-of-00015.safetensors +1 -1
- model-00003-of-00015.safetensors +1 -1
- model-00004-of-00015.safetensors +1 -1
- model-00005-of-00015.safetensors +1 -1
- model-00006-of-00015.safetensors +1 -1
- model-00007-of-00015.safetensors +1 -1
- model-00008-of-00015.safetensors +1 -1
- model-00009-of-00015.safetensors +1 -1
- model-00010-of-00015.safetensors +1 -1
- model-00011-of-00015.safetensors +1 -1
- model-00012-of-00015.safetensors +1 -1
- model-00013-of-00015.safetensors +1 -1
args.json
CHANGED
|
@@ -41,10 +41,8 @@
|
|
| 41 |
"dataset": [
|
| 42 |
"/data/group_data/li_lab/siqiouya/datasets/gigaspeech/manifests/train_s_zh_v4_ner_baseline_aligned_rate1.0_k20_final.jsonl"
|
| 43 |
],
|
| 44 |
-
"val_dataset": [
|
| 45 |
-
|
| 46 |
-
],
|
| 47 |
-
"split_dataset_ratio": 0.0,
|
| 48 |
"data_seed": 42,
|
| 49 |
"dataset_num_proc": 8,
|
| 50 |
"load_from_cache_file": true,
|
|
@@ -166,7 +164,7 @@
|
|
| 166 |
"deterministic_mode": false,
|
| 167 |
"train_iters": null,
|
| 168 |
"log_interval": 10,
|
| 169 |
-
"tensorboard_dir": "/data/user_data/siqiouya/ckpts/infinisst-omni/gigaspeech-zh-s_v4_ner_baseline_aligned_rate1.0_k20_final/
|
| 170 |
"no_masked_softmax_fusion": false,
|
| 171 |
"no_bias_dropout_fusion": false,
|
| 172 |
"no_bias_swiglu_fusion": false,
|
|
@@ -200,7 +198,7 @@
|
|
| 200 |
"adam_beta2": 0.95,
|
| 201 |
"adam_eps": 1e-08,
|
| 202 |
"sgd_momentum": 0.9,
|
| 203 |
-
"save": "/data/user_data/siqiouya/ckpts/infinisst-omni/gigaspeech-zh-s_v4_ner_baseline_aligned_rate1.0_k20_final/
|
| 204 |
"save_interval": 200,
|
| 205 |
"no_save_optim": true,
|
| 206 |
"no_save_rng": true,
|
|
@@ -314,14 +312,14 @@
|
|
| 314 |
"extra_megatron_kwargs": {},
|
| 315 |
"add_version": true,
|
| 316 |
"rank": 0,
|
| 317 |
-
"global_world_size":
|
| 318 |
-
"local_world_size":
|
| 319 |
"model_suffix": "Qwen3-Omni-30B-A3B-Instruct",
|
| 320 |
"model_info": "ModelInfo(model_type='qwen3_omni', model_dir='/data/user_data/siqiouya/ckpts/pretrained/llm/Qwen3-Omni-30B-A3B-Instruct', torch_dtype=torch.bfloat16, max_model_len=65536, quant_method=None, quant_bits=None, rope_scaling={'interleaved': True, 'mrope_section': [24, 20, 20], 'rope_type': 'default', 'type': 'default'}, is_moe_model=True, config=None, task_type='causal_lm', num_labels=None)",
|
| 321 |
-
"model_meta": "ModelMeta(model_type='qwen3_omni', model_groups=[ModelGroup(models=[Model(ms_model_id='Qwen/Qwen3-Omni-30B-A3B-Instruct', hf_model_id='Qwen/Qwen3-Omni-30B-A3B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-Omni-30B-A3B-Thinking', hf_model_id='Qwen/Qwen3-Omni-30B-A3B-Thinking', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-Omni-30B-A3B-Captioner', hf_model_id='Qwen/Qwen3-Omni-30B-A3B-Captioner', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='qwen3_omni', get_function=<function get_model_tokenizer_qwen3_omni at
|
| 322 |
"model_dir": "/data/user_data/siqiouya/ckpts/pretrained/llm/Qwen3-Omni-30B-A3B-Instruct",
|
| 323 |
"hub": "<class 'swift.hub.hub.MSHub'>",
|
| 324 |
-
"megatron_model_meta": "MMGPTMegatronModelMeta(megatron_model_type='qwen3_omni', model_types=['qwen3_omni'], convert_mcore2hf=<function convert_mcore2hf_qwen3_omni at
|
| 325 |
"extra_args": {
|
| 326 |
"model": "/data/user_data/siqiouya/ckpts/pretrained/llm/Qwen3-Omni-30B-A3B-Instruct/",
|
| 327 |
"model_type": "qwen3_omni",
|
|
@@ -364,10 +362,8 @@
|
|
| 364 |
"dataset": [
|
| 365 |
"/data/group_data/li_lab/siqiouya/datasets/gigaspeech/manifests/train_s_zh_v4_ner_baseline_aligned_rate1.0_k20_final.jsonl"
|
| 366 |
],
|
| 367 |
-
"val_dataset": [
|
| 368 |
-
|
| 369 |
-
],
|
| 370 |
-
"split_dataset_ratio": 0.0,
|
| 371 |
"data_seed": 42,
|
| 372 |
"dataset_num_proc": 8,
|
| 373 |
"load_from_cache_file": true,
|
|
@@ -478,7 +474,7 @@
|
|
| 478 |
"mrope_interleaved": true,
|
| 479 |
"add_version": true,
|
| 480 |
"model_info": "ModelInfo(model_type='qwen3_omni', model_dir='/data/user_data/siqiouya/ckpts/pretrained/llm/Qwen3-Omni-30B-A3B-Instruct', torch_dtype=torch.bfloat16, max_model_len=65536, quant_method=None, quant_bits=None, rope_scaling={'interleaved': True, 'mrope_section': [24, 20, 20], 'rope_type': 'default', 'type': 'default'}, is_moe_model=True, config=None, task_type='causal_lm', num_labels=None)",
|
| 481 |
-
"model_meta": "ModelMeta(model_type='qwen3_omni', model_groups=[ModelGroup(models=[Model(ms_model_id='Qwen/Qwen3-Omni-30B-A3B-Instruct', hf_model_id='Qwen/Qwen3-Omni-30B-A3B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-Omni-30B-A3B-Thinking', hf_model_id='Qwen/Qwen3-Omni-30B-A3B-Thinking', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-Omni-30B-A3B-Captioner', hf_model_id='Qwen/Qwen3-Omni-30B-A3B-Captioner', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='qwen3_omni', get_function=<function get_model_tokenizer_qwen3_omni at
|
| 482 |
-
"megatron_model_meta": "MMGPTMegatronModelMeta(megatron_model_type='qwen3_omni', model_types=['qwen3_omni'], convert_mcore2hf=<function convert_mcore2hf_qwen3_omni at
|
| 483 |
}
|
| 484 |
}
|
|
|
|
| 41 |
"dataset": [
|
| 42 |
"/data/group_data/li_lab/siqiouya/datasets/gigaspeech/manifests/train_s_zh_v4_ner_baseline_aligned_rate1.0_k20_final.jsonl"
|
| 43 |
],
|
| 44 |
+
"val_dataset": [],
|
| 45 |
+
"split_dataset_ratio": 0.01,
|
|
|
|
|
|
|
| 46 |
"data_seed": 42,
|
| 47 |
"dataset_num_proc": 8,
|
| 48 |
"load_from_cache_file": true,
|
|
|
|
| 164 |
"deterministic_mode": false,
|
| 165 |
"train_iters": null,
|
| 166 |
"log_interval": 10,
|
| 167 |
+
"tensorboard_dir": "/data/user_data/siqiouya/ckpts/infinisst-omni/gigaspeech-zh-s_v4_ner_baseline_aligned_rate1.0_k20_final/v1-20260106-064237/runs",
|
| 168 |
"no_masked_softmax_fusion": false,
|
| 169 |
"no_bias_dropout_fusion": false,
|
| 170 |
"no_bias_swiglu_fusion": false,
|
|
|
|
| 198 |
"adam_beta2": 0.95,
|
| 199 |
"adam_eps": 1e-08,
|
| 200 |
"sgd_momentum": 0.9,
|
| 201 |
+
"save": "/data/user_data/siqiouya/ckpts/infinisst-omni/gigaspeech-zh-s_v4_ner_baseline_aligned_rate1.0_k20_final/v1-20260106-064237",
|
| 202 |
"save_interval": 200,
|
| 203 |
"no_save_optim": true,
|
| 204 |
"no_save_rng": true,
|
|
|
|
| 312 |
"extra_megatron_kwargs": {},
|
| 313 |
"add_version": true,
|
| 314 |
"rank": 0,
|
| 315 |
+
"global_world_size": 4,
|
| 316 |
+
"local_world_size": 4,
|
| 317 |
"model_suffix": "Qwen3-Omni-30B-A3B-Instruct",
|
| 318 |
"model_info": "ModelInfo(model_type='qwen3_omni', model_dir='/data/user_data/siqiouya/ckpts/pretrained/llm/Qwen3-Omni-30B-A3B-Instruct', torch_dtype=torch.bfloat16, max_model_len=65536, quant_method=None, quant_bits=None, rope_scaling={'interleaved': True, 'mrope_section': [24, 20, 20], 'rope_type': 'default', 'type': 'default'}, is_moe_model=True, config=None, task_type='causal_lm', num_labels=None)",
|
| 319 |
+
"model_meta": "ModelMeta(model_type='qwen3_omni', model_groups=[ModelGroup(models=[Model(ms_model_id='Qwen/Qwen3-Omni-30B-A3B-Instruct', hf_model_id='Qwen/Qwen3-Omni-30B-A3B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-Omni-30B-A3B-Thinking', hf_model_id='Qwen/Qwen3-Omni-30B-A3B-Thinking', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-Omni-30B-A3B-Captioner', hf_model_id='Qwen/Qwen3-Omni-30B-A3B-Captioner', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='qwen3_omni', get_function=<function get_model_tokenizer_qwen3_omni at 0x7f622c92ea20>, model_arch=MultiModelKeys(arch_name='qwen3_omni', embedding=None, module_list=None, lm_head=None, q_proj=None, k_proj=None, v_proj=None, o_proj=None, attention=None, mlp=None, down_proj=None, qkv_proj=None, qk_proj=None, qa_proj=None, qb_proj=None, kv_proj=None, kva_proj=None, kvb_proj=None, language_model=['thinker.model'], aligner=['thinker.audio_tower.proj1', 'thinker.audio_tower.proj2', 'thinker.visual.merger', 'thinker.visual.merger_list'], vision_tower=['thinker.audio_tower', 'thinker.visual'], generator=['talker', 'token2wav']), architectures=['Qwen3OmniMoeForConditionalGeneration'], additional_saved_files=[], torch_dtype=None, is_multimodal=True, is_reward=False, task_type=None, ignore_patterns=None, requires=['transformers>=4.57.dev0', 'soundfile', 'decord', 'qwen_omni_utils'], tags=['vision', 'video', 'audio'])",
|
| 320 |
"model_dir": "/data/user_data/siqiouya/ckpts/pretrained/llm/Qwen3-Omni-30B-A3B-Instruct",
|
| 321 |
"hub": "<class 'swift.hub.hub.MSHub'>",
|
| 322 |
+
"megatron_model_meta": "MMGPTMegatronModelMeta(megatron_model_type='qwen3_omni', model_types=['qwen3_omni'], convert_mcore2hf=<function convert_mcore2hf_qwen3_omni at 0x7f619c309300>, convert_hf2mcore=<function convert_hf2mcore_qwen3_omni at 0x7f619c309260>, model_cls=<class 'swift.megatron.model.mm_gpt.qwen3_vl.Qwen3VLGPTModel'>, convert_hf_config=<function convert_gpt_hf_config at 0x7f619c2dd9e0>, get_transformer_layer_spec=None, model_provider=<function model_provider at 0x7f619c2dc7c0>, visual_cls=<class 'swift.megatron.model.mm_gpt.qwen3_vl.Qwen3Omni_Vit'>, extra_args_provider=None)",
|
| 323 |
"extra_args": {
|
| 324 |
"model": "/data/user_data/siqiouya/ckpts/pretrained/llm/Qwen3-Omni-30B-A3B-Instruct/",
|
| 325 |
"model_type": "qwen3_omni",
|
|
|
|
| 362 |
"dataset": [
|
| 363 |
"/data/group_data/li_lab/siqiouya/datasets/gigaspeech/manifests/train_s_zh_v4_ner_baseline_aligned_rate1.0_k20_final.jsonl"
|
| 364 |
],
|
| 365 |
+
"val_dataset": [],
|
| 366 |
+
"split_dataset_ratio": 0.01,
|
|
|
|
|
|
|
| 367 |
"data_seed": 42,
|
| 368 |
"dataset_num_proc": 8,
|
| 369 |
"load_from_cache_file": true,
|
|
|
|
| 474 |
"mrope_interleaved": true,
|
| 475 |
"add_version": true,
|
| 476 |
"model_info": "ModelInfo(model_type='qwen3_omni', model_dir='/data/user_data/siqiouya/ckpts/pretrained/llm/Qwen3-Omni-30B-A3B-Instruct', torch_dtype=torch.bfloat16, max_model_len=65536, quant_method=None, quant_bits=None, rope_scaling={'interleaved': True, 'mrope_section': [24, 20, 20], 'rope_type': 'default', 'type': 'default'}, is_moe_model=True, config=None, task_type='causal_lm', num_labels=None)",
|
| 477 |
+
"model_meta": "ModelMeta(model_type='qwen3_omni', model_groups=[ModelGroup(models=[Model(ms_model_id='Qwen/Qwen3-Omni-30B-A3B-Instruct', hf_model_id='Qwen/Qwen3-Omni-30B-A3B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-Omni-30B-A3B-Thinking', hf_model_id='Qwen/Qwen3-Omni-30B-A3B-Thinking', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-Omni-30B-A3B-Captioner', hf_model_id='Qwen/Qwen3-Omni-30B-A3B-Captioner', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='qwen3_omni', get_function=<function get_model_tokenizer_qwen3_omni at 0x7f622c92ea20>, model_arch=MultiModelKeys(arch_name='qwen3_omni', embedding=None, module_list=None, lm_head=None, q_proj=None, k_proj=None, v_proj=None, o_proj=None, attention=None, mlp=None, down_proj=None, qkv_proj=None, qk_proj=None, qa_proj=None, qb_proj=None, kv_proj=None, kva_proj=None, kvb_proj=None, language_model=['thinker.model'], aligner=['thinker.audio_tower.proj1', 'thinker.audio_tower.proj2', 'thinker.visual.merger', 'thinker.visual.merger_list'], vision_tower=['thinker.audio_tower', 'thinker.visual'], generator=['talker', 'token2wav']), architectures=['Qwen3OmniMoeForConditionalGeneration'], additional_saved_files=[], torch_dtype=None, is_multimodal=True, is_reward=False, task_type=None, ignore_patterns=None, requires=['transformers>=4.57.dev0', 'soundfile', 'decord', 'qwen_omni_utils'], tags=['vision', 'video', 'audio'])",
|
| 478 |
+
"megatron_model_meta": "MMGPTMegatronModelMeta(megatron_model_type='qwen3_omni', model_types=['qwen3_omni'], convert_mcore2hf=<function convert_mcore2hf_qwen3_omni at 0x7f619c309300>, convert_hf2mcore=<function convert_hf2mcore_qwen3_omni at 0x7f619c309260>, model_cls=<class 'swift.megatron.model.mm_gpt.qwen3_vl.Qwen3VLGPTModel'>, convert_hf_config=<function convert_gpt_hf_config at 0x7f619c2dd9e0>, get_transformer_layer_spec=None, model_provider=<function model_provider at 0x7f619c2dc7c0>, visual_cls=<class 'swift.megatron.model.mm_gpt.qwen3_vl.Qwen3Omni_Vit'>, extra_args_provider=None)"
|
| 479 |
}
|
| 480 |
}
|
model-00001-of-00015.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4997899632
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d81fc6df86edd04a50d4a05cd30eed81f6360b7fb31bcbcb0ff1e62b1c16d736
|
| 3 |
size 4997899632
|
model-00002-of-00015.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4997754216
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a0d25dae8b7014deef9d92b0f1de9339f8f6a00ec5917904cf3513cc14ef5bc4
|
| 3 |
size 4997754216
|
model-00003-of-00015.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4997754216
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:139f2bceeed6974fdc24048d8d14a5a873956ccfb50554d46802b42246d08f6f
|
| 3 |
size 4997754216
|
model-00004-of-00015.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4997755648
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4b64443f2559ad8b1042d27567d5277c87e61b1437aa84687090ec1c4798a0ec
|
| 3 |
size 4997755648
|
model-00005-of-00015.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4997755792
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:128fb0751124ab837657148b11c3b55f2462c21d906986205fb554b2398da268
|
| 3 |
size 4997755792
|
model-00006-of-00015.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4997755792
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c8b50b104e7dd4d6780f6cb6e4e3b8c8f36667dc58064f021c616acef0a3bd8
|
| 3 |
size 4997755792
|
model-00007-of-00015.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4997755792
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:236d0bf86f5da060de8910360c3d4b681a8eb53835f192b368374ad4273b2379
|
| 3 |
size 4997755792
|
model-00008-of-00015.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4997755792
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:522d713e1f9fd180ae98c285a3d31effced73ae5ec0206801a03dc71e5511a90
|
| 3 |
size 4997755792
|
model-00009-of-00015.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4997755792
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2735c64bc1a2efc1fadd424e51fa0095ff4c4af81331ba1b596ad49ec2705d64
|
| 3 |
size 4997755792
|
model-00010-of-00015.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4997755792
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a1ca1726bad25ef9cbfa8ba3b655c78a85732579912924d7cb130010b90032f0
|
| 3 |
size 4997755792
|
model-00011-of-00015.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4997755792
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bcb298abc815975dcf3bd4b5db748e2e9aa26424b5bb45d2a29101e7ade79721
|
| 3 |
size 4997755792
|
model-00012-of-00015.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4997755792
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b14ae3d6846555c05e6f101a5641c8d8910754266a2772b878080745def4f1d
|
| 3 |
size 4997755792
|
model-00013-of-00015.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4999771808
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:de313835b565efa533faccdc4f9050e5c3b2d36af9ef4e6bbbf02305fb483826
|
| 3 |
size 4999771808
|