| import os |
|
|
| os.environ['CUDA_VISIBLE_DEVICES'] = '0' |
|
|
| kwargs = { |
| 'per_device_train_batch_size': 2, |
| 'per_device_eval_batch_size': 2, |
| 'save_steps': 5, |
| 'gradient_accumulation_steps': 4, |
| 'num_train_epochs': 1, |
| } |
|
|
|
|
| def test_llm_ddp(): |
| os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' |
| from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| result = sft_main( |
| TrainArguments( |
| model='Qwen/Qwen2-7B-Instruct', |
| dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'], |
| |
| gradient_checkpointing_kwargs={'use_reentrant': False}, |
| target_modules=['all-linear', 'all-embedding'], |
| modules_to_save=['all-embedding', 'all-norm'], |
| **kwargs)) |
| last_model_checkpoint = result['last_model_checkpoint'] |
| infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True)) |
|
|
|
|
| def test_unsloth(): |
| from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| result = sft_main( |
| TrainArguments( |
| model='Qwen/Qwen2-0.5B', |
| dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'], |
| max_steps=5, |
| tuner_backend='unsloth', |
| **kwargs)) |
| last_model_checkpoint = result['last_model_checkpoint'] |
| result = sft_main(TrainArguments(resume_from_checkpoint=last_model_checkpoint, load_data_args=True, max_steps=10)) |
| last_model_checkpoint = result['last_model_checkpoint'] |
| infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True)) |
|
|
|
|
| def test_mllm_mp(): |
| os.environ['MAX_PIXELS'] = '100352' |
| os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3' |
| from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| result = sft_main( |
| TrainArguments( |
| model='bytedance-research/Valley-Eagle-7B', |
| dataset=['modelscope/coco_2014_caption:validation#20'], |
| |
| train_type='lora', |
| target_modules=['all-linear'], |
| freeze_aligner=False, |
| **kwargs)) |
| last_model_checkpoint = result['last_model_checkpoint'] |
| infer_main(InferArguments(ckpt_dir=last_model_checkpoint, load_data_args=True, merge_lora=True)) |
|
|
|
|
| def test_llm_streaming(): |
| from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| result = sft_main( |
| TrainArguments( |
| model='Qwen/Qwen2-7B-Instruct', dataset=['swift/chinese-c4'], streaming=True, max_steps=16, **kwargs)) |
| last_model_checkpoint = result['last_model_checkpoint'] |
| infer_main(InferArguments(ckpt_dir=last_model_checkpoint, load_data_args=True, merge_lora=True)) |
|
|
|
|
| def test_mllm_streaming(): |
| from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| result = sft_main( |
| TrainArguments( |
| model='Qwen/Qwen2-VL-7B-Instruct', |
| dataset=['modelscope/coco_2014_caption:validation', 'AI-ModelScope/alpaca-gpt4-data-en'], |
| streaming=True, |
| max_steps=16, |
| **kwargs)) |
| last_model_checkpoint = result['last_model_checkpoint'] |
| infer_main(InferArguments(ckpt_dir=last_model_checkpoint, load_data_args=True, merge_lora=True)) |
|
|
|
|
| def test_mllm_zero3(): |
| os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' |
| from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| sft_main( |
| TrainArguments( |
| model='Qwen/Qwen2-VL-7B-Instruct', |
| dataset=['modelscope/coco_2014_caption:validation#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'], |
| deepspeed='zero3', |
| **kwargs)) |
|
|
|
|
| def test_qwen_vl(): |
| os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' |
| from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| sft_main( |
| TrainArguments( |
| model='Qwen/Qwen-VL-Chat', |
| dataset=['AI-ModelScope/LaTeX_OCR#40', 'modelscope/coco_2014_caption:validation#40'], |
| **kwargs)) |
|
|
|
|
| def test_qwen2_audio(): |
| os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' |
| from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| sft_main( |
| TrainArguments( |
| model='Qwen/Qwen2-Audio-7B-Instruct', |
| dataset=['speech_asr/speech_asr_aishell1_trainsets:validation#200'], |
| freeze_parameters_ratio=1, |
| trainable_parameters=['audio_tower'], |
| train_type='full', |
| **kwargs)) |
|
|
|
|
| def test_llm_gptq(): |
| from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| result = sft_main( |
| TrainArguments( |
| model='Qwen/Qwen2-7B-Instruct-GPTQ-Int4', |
| dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'], |
| **kwargs)) |
| last_model_checkpoint = result['last_model_checkpoint'] |
| infer_main(InferArguments(ckpt_dir=last_model_checkpoint, load_data_args=True)) |
|
|
|
|
| def test_llm_awq(): |
| from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| result = sft_main( |
| TrainArguments( |
| model='Qwen/Qwen2-7B-Instruct-AWQ', |
| dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'], |
| **kwargs)) |
| last_model_checkpoint = result['last_model_checkpoint'] |
| infer_main(InferArguments(ckpt_dir=last_model_checkpoint, load_data_args=True)) |
|
|
|
|
| def test_mllm_streaming_zero3(): |
| os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' |
| from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| sft_main( |
| TrainArguments( |
| model='Qwen/Qwen2-VL-7B-Instruct', |
| dataset=['modelscope/coco_2014_caption:validation', 'AI-ModelScope/alpaca-gpt4-data-en'], |
| streaming=True, |
| max_steps=16, |
| deepspeed='zero3', |
| **kwargs)) |
|
|
|
|
| def test_mllm_streaming_mp_ddp(): |
| os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3' |
| from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| sft_main( |
| TrainArguments( |
| model='Qwen/Qwen2-VL-7B-Instruct', |
| dataset=['modelscope/coco_2014_caption:validation', 'AI-ModelScope/alpaca-gpt4-data-en'], |
| streaming=True, |
| max_steps=16, |
| gradient_checkpointing_kwargs={'use_reentrant': False}, |
| **kwargs)) |
|
|
|
|
| def test_llm_hqq(): |
| from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| result = sft_main( |
| TrainArguments( |
| model='Qwen/Qwen2-7B-Instruct', |
| dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'], |
| quant_method='hqq', |
| quant_bits=4, |
| **kwargs)) |
| last_model_checkpoint = result['last_model_checkpoint'] |
| infer_main(InferArguments(ckpt_dir=last_model_checkpoint, load_data_args=True)) |
|
|
|
|
| def test_llm_bnb(): |
| from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| result = sft_main( |
| TrainArguments( |
| model='Qwen/Qwen2-7B-Instruct', |
| dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'], |
| quant_method='bnb', |
| quant_bits=4, |
| **kwargs)) |
| last_model_checkpoint = result['last_model_checkpoint'] |
| infer_main(InferArguments(ckpt_dir=last_model_checkpoint, load_data_args=True)) |
|
|
|
|
| def test_moe(): |
| from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| result = sft_main( |
| TrainArguments( |
| model='Qwen/Qwen1.5-MoE-A2.7B-Chat-GPTQ-Int4', |
| dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'], |
| **kwargs)) |
| last_model_checkpoint = result['last_model_checkpoint'] |
| infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True)) |
|
|
|
|
| def test_resume_from_checkpoint(): |
| from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| result = sft_main( |
| TrainArguments( |
| model='Qwen/Qwen2-0.5B', |
| dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'], |
| max_steps=5, |
| streaming=True, |
| **kwargs)) |
| last_model_checkpoint = result['last_model_checkpoint'] |
| result = sft_main( |
| TrainArguments( |
| resume_from_checkpoint=last_model_checkpoint, |
| streaming=True, |
| load_data_args=True, |
| max_steps=10, |
| )) |
| last_model_checkpoint = result['last_model_checkpoint'] |
| infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True)) |
|
|
|
|
| def test_resume_only_model(): |
| import os |
| os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' |
| from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| result = sft_main( |
| TrainArguments( |
| model='Qwen/Qwen2-0.5B', |
| dataset=['AI-ModelScope/alpaca-gpt4-data-zh#10', 'AI-ModelScope/alpaca-gpt4-data-en#10'], |
| max_steps=20, |
| save_only_model=True, |
| deepspeed='zero3', |
| **kwargs)) |
| last_model_checkpoint = result['last_model_checkpoint'] |
| result = sft_main( |
| TrainArguments( |
| resume_from_checkpoint=last_model_checkpoint, load_data_args=True, max_steps=20, resume_only_model=True)) |
|
|
|
|
| def test_llm_transformers_4_33(): |
| os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' |
| from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| sft_main( |
| TrainArguments( |
| model='Qwen/Qwen-7B-Chat', |
| dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'], |
| **kwargs)) |
|
|
|
|
| def test_predict_with_generate(): |
| import os |
| os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' |
| from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| |
| sft_main( |
| TrainArguments( |
| model='Qwen/Qwen2-7B-Instruct', |
| dataset=['AI-ModelScope/alpaca-gpt4-data-en#40'], |
| predict_with_generate=True, |
| split_dataset_ratio=0.5, |
| **kwargs)) |
|
|
|
|
| def test_predict_with_generate_zero3(): |
| import os |
| os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' |
| from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| |
| sft_main( |
| TrainArguments( |
| model='Qwen/Qwen2-VL-7B-Instruct', |
| dataset=['AI-ModelScope/LaTeX_OCR#40'], |
| predict_with_generate=True, |
| freeze_vit=False, |
| split_dataset_ratio=0.5, |
| deepspeed='zero3', |
| **kwargs)) |
|
|
|
|
| def test_template(): |
| from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| global kwargs |
| kwargs = kwargs.copy() |
| kwargs['num_train_epochs'] = 3 |
| result = sft_main( |
| TrainArguments( |
| model='Qwen/Qwen2-0.5B', |
| dataset=['swift/self-cognition#200'], |
| model_name=['小黄'], |
| model_author=['swift'], |
| **kwargs)) |
| last_model_checkpoint = result['last_model_checkpoint'] |
| infer_main(InferArguments(ckpt_dir=last_model_checkpoint, load_data_args=True, merge_lora=True)) |
|
|
|
|
| def test_emu3_gen(): |
| os.environ['CUDA_VISIBLE_DEVICES'] = '1' |
| os.environ['max_position_embeddings'] = '10240' |
| os.environ['image_area'] = '518400' |
| from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| kwargs['num_train_epochs'] = 100 |
| result = sft_main(TrainArguments(model='BAAI/Emu3-Gen', dataset=['swift/TextCaps#2'], **kwargs)) |
| last_model_checkpoint = result['last_model_checkpoint'] |
| args = InferArguments( |
| ckpt_dir=last_model_checkpoint, |
| infer_backend='pt', |
| stream=False, |
| use_chat_template=False, |
| top_k=2048, |
| max_new_tokens=40960) |
| infer_main(args) |
|
|
|
|
| def test_eval_strategy(): |
| os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' |
| from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| result = sft_main( |
| TrainArguments( |
| model='Qwen/Qwen2-7B-Instruct', |
| eval_strategy='no', |
| dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'], |
| **kwargs)) |
| last_model_checkpoint = result['last_model_checkpoint'] |
| infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True)) |
|
|
|
|
| def test_epoch(): |
| os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' |
| from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
|
|
| train_kwargs = kwargs.copy() |
| train_kwargs['num_train_epochs'] = 3 |
| |
| result = sft_main( |
| TrainArguments( |
| model='Qwen/Qwen2-7B-Instruct', |
| dataset=['AI-ModelScope/alpaca-gpt4-data-zh#50', 'AI-ModelScope/alpaca-gpt4-data-en#50'], |
| save_strategy='epoch', |
| **train_kwargs)) |
| last_model_checkpoint = result['last_model_checkpoint'] |
| infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True)) |
|
|
|
|
| def test_agent(): |
| os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' |
| from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
|
|
| result = sft_main( |
| TrainArguments( |
| model='Qwen/Qwen2-7B-Instruct', |
| dataset=['swift/ToolBench#500'], |
| loss_scale='react', |
| agent_template='toolbench', |
| **kwargs)) |
| last_model_checkpoint = result['last_model_checkpoint'] |
| infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True)) |
|
|
|
|
| def test_grounding(): |
| os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' |
| from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
|
|
| result = sft_main( |
| TrainArguments( |
| model='Qwen/Qwen2.5-VL-7B-Instruct', dataset=['AI-ModelScope/coco#200'], dataset_num_proc=4, **kwargs)) |
| last_model_checkpoint = result['last_model_checkpoint'] |
| infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True, stream=True, max_new_tokens=2048)) |
|
|
|
|
| if __name__ == '__main__': |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| test_grounding() |
|
|