Mickey25 commited on Feb 3

Commit

46b244e

verified ·

1 Parent(s): bc3820e

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.dockerignore +15 -0
.env.local +42 -0
.gitattributes +201 -0
.github/CODE_OF_CONDUCT.md +128 -0
.github/CONTRIBUTING.md +67 -0
.github/ISSUE_TEMPLATE/1-bug-report.yml +61 -0
.github/ISSUE_TEMPLATE/2-feature-request.yml +41 -0
.github/ISSUE_TEMPLATE/config.yml +8 -0
.github/PULL_REQUEST_TEMPLATE.md +8 -0
.github/SECURITY.md +7 -0
.github/workflows/docker.yml +108 -0
.github/workflows/label_issue.yml +32 -0
.github/workflows/publish.yml +36 -0
.github/workflows/tests.yml +99 -0
.pre-commit-config.yaml +28 -0
=0.10.0, +0 -0
CITATION.cff +44 -0
LICENSE +201 -0
MANIFEST.in +1 -0
Makefile +24 -0
README.md +948 -0
README_zh.md +949 -0
SUMMARY_完成情况总结.md +164 -0
analyze_token_logs.py +165 -0
assets/alaya_new.svg +38 -0
assets/logo.png +0 -0
assets/wechat.jpg +3 -0
assets/wechat_alaya.png +3 -0
assets/wechat_npu.jpg +3 -0
configs/ds_zero3.json +33 -0
data/belle_multiturn/belle_multiturn.py +82 -0
data/dataset/01_26/CUDA内存不足解决方案.md +146 -0
data/dataset/01_26/NCCL错误解决方案.md +120 -0
data/dataset/01_26/ZeRO-3使用说明.md +153 -0
data/dataset/01_26/fix_cuda_oom.sh +88 -0
data/dataset/01_26/token_statistics.json +18 -0
data/dataset/01_26/token_statistics.txt +19 -0
data/dataset/01_26/toollist1.jsonl +3 -0
data/dataset/01_26/toollist1_sample.json +0 -0
data/dataset/01_26/toollist2.jsonl +3 -0
data/dataset/01_26/toollist2_sample.json +0 -0
data/dataset/01_26/接续训练示例.md +110 -0
data/dataset/01_26/训练命令.md +231 -0
data/dataset/12_10/LLM输入一致性对比分析.md +234 -0
data/dataset/12_10/origin.py +2192 -0
data/dataset/12_10/overfit_data.json +92 -0
data/dataset/12_10/testmix_dataset.json +0 -0
data/dataset/12_10/格式问题分析与修正建议.md +214 -0
data/dataset/12_10/训练与推理输入对比分析.md +415 -0
data/dataset/12_15/GT_dataset.jsonl +3 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,15 @@

+.vscode
+.git
+.github
+.venv
+cache
+docker
+saves
+hf_cache
+ms_cache
+om_cache
+shared_data
+output
+.dockerignore
+.gitattributes
+.gitignore

.env.local ADDED Viewed

	@@ -0,0 +1,42 @@

+# Note: actually we do not support .env, just for reference
+# api
+API_HOST=
+API_PORT=
+API_KEY=
+API_MODEL_NAME=
+API_VERBOSE=
+FASTAPI_ROOT_PATH=
+MAX_CONCURRENT=
+# general
+DISABLE_VERSION_CHECK=
+FORCE_CHECK_IMPORTS=
+ALLOW_EXTRA_ARGS=
+LLAMAFACTORY_VERBOSITY=
+USE_MODELSCOPE_HUB=
+USE_OPENMIND_HUB=
+USE_RAY=
+RECORD_VRAM=
+OPTIM_TORCH=
+NPU_JIT_COMPILE=
+# torchrun
+FORCE_TORCHRUN=
+MASTER_ADDR=
+MASTER_PORT=
+NNODES=
+NODE_RANK=
+NPROC_PER_NODE=
+# wandb
+WANDB_DISABLED=
+WANDB_PROJECT=
+WANDB_API_KEY=
+# gradio ui
+GRADIO_SHARE=
+GRADIO_SERVER_NAME=
+GRADIO_SERVER_PORT=
+GRADIO_ROOT_PATH=
+GRADIO_IPV6=
+# setup
+ENABLE_SHORT_CONSOLE=
+# reserved (do not use)
+LLAMABOARD_ENABLED=
+LLAMABOARD_WORKDIR=

.gitattributes CHANGED Viewed

@@ -33,3 +33,204 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+assets/wechat.jpg filter=lfs diff=lfs merge=lfs -text
+assets/wechat_alaya.png filter=lfs diff=lfs merge=lfs -text
+assets/wechat_npu.jpg filter=lfs diff=lfs merge=lfs -text
+data/dataset/01_26/toollist1.jsonl filter=lfs diff=lfs merge=lfs -text
+data/dataset/01_26/toollist2.jsonl filter=lfs diff=lfs merge=lfs -text
+data/dataset/12_15/GT_dataset.jsonl filter=lfs diff=lfs merge=lfs -text
+data/dataset/12_15/train_converted_APIGen.jsonl filter=lfs diff=lfs merge=lfs -text
+data/dataset/12_15/train_converted_alpaca.json filter=lfs diff=lfs merge=lfs -text
+data/dataset/12_15/train_converted_alpaca_gt.json filter=lfs diff=lfs merge=lfs -text
+data/dataset/12_24/yifan_toollist1.jsonl filter=lfs diff=lfs merge=lfs -text
+data/dataset/12_24/yifan_toollist2.jsonl filter=lfs diff=lfs merge=lfs -text
+data/dataset/12_27/toollist1_dataset.jsonl filter=lfs diff=lfs merge=lfs -text
+evaluation/data_evaluation_demo_0926_v19.json filter=lfs diff=lfs merge=lfs -text
+evaluation/evaluation_checkpoint_demo_0929_v20.json filter=lfs diff=lfs merge=lfs -text
+evaluation/multi_baseline_lora/result__data_models_Qwen3-8B.json filter=lfs diff=lfs merge=lfs -text
+evaluation/multi_baseline_lora/result_my_lora.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2025-12-17-13-03/checkpoint-125/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2025-12-17-13-03/monitoring_logs/label_analysis_20251217_130423.log filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2025-12-17-13-03/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2025-12-18-22-07/checkpoint-152/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2025-12-18-22-07/monitoring_logs/label_analysis_20251218_220754.log filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2025-12-18-22-07/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2025-12-19-00-00/checkpoint-1000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2025-12-19-00-00/checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2025-12-19-00-00/checkpoint-400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2025-12-19-00-00/checkpoint-600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2025-12-19-00-00/monitoring_logs/label_analysis_20251219_000304.log filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2025-12-19-19-19/checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2025-12-19-19-19/checkpoint-304/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2025-12-19-19-19/monitoring_logs/label_analysis_20251219_191948.log filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2025-12-19-19-19/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2025-12-24-14-25/checkpoint-123/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2025-12-24-14-25/monitoring_logs/label_analysis_20251224_142538.log filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2025-12-24-14-25/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2025-12-24-14-27/checkpoint-123/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2025-12-24-14-27/monitoring_logs/label_analysis_20251224_142750.log filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2025-12-24-14-27/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2025-12-25-12-00/checkpoint-125/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2025-12-25-12-00/monitoring_logs/label_analysis_20251225_120027.log filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2025-12-25-12-00/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-26-15-35/checkpoint-1000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-26-15-35/checkpoint-1200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-26-15-35/checkpoint-1400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-26-15-35/checkpoint-1600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-26-15-35/checkpoint-1800/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-26-15-35/checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-26-15-35/checkpoint-2000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-26-15-35/checkpoint-2200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-26-15-35/checkpoint-2400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-26-15-35/checkpoint-2600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-26-15-35/checkpoint-400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-26-15-35/checkpoint-600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-26-15-35/checkpoint-800/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-26-15-35/monitoring_logs/label_analysis_20260126_153852.log filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-26-15-35/monitoring_logs/label_analysis_20260126_153853.log filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-27-15-49/checkpoint-2800/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-27-15-49/checkpoint-3000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-27-15-49/monitoring_logs/label_analysis_20260127_155716.log filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-27-21-01/checkpoint-3200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-27-21-01/checkpoint-3400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-27-21-01/checkpoint-3600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-27-21-01/checkpoint-3800/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-27-21-01/checkpoint-4000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-27-21-01/checkpoint-4200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-27-21-01/checkpoint-4400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-27-21-01/checkpoint-4600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-27-21-01/checkpoint-4800/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-27-21-01/checkpoint-5000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-27-21-01/checkpoint-5200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-27-21-01/checkpoint-5400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-27-21-01/checkpoint-5600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-27-21-01/checkpoint-5800/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-27-21-01/monitoring_logs/label_analysis_20260127_211415.log filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-27-21-01/monitoring_logs/label_analysis_20260127_211416.log filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-28-18-13/checkpoint-6000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-28-18-13/monitoring_logs/label_analysis_20260128_181359.log filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-29-09-32/checkpoint-6200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-29-09-32/checkpoint-6400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-29-09-32/monitoring_logs/label_analysis_20260129_093236.log filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-29-20-31/checkpoint-10000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-29-20-31/checkpoint-10200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-29-20-31/checkpoint-10400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-29-20-31/checkpoint-10600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-29-20-31/checkpoint-10800/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-29-20-31/checkpoint-11000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-29-20-31/checkpoint-11200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-29-20-31/checkpoint-6600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-29-20-31/checkpoint-6800/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-29-20-31/checkpoint-7000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-29-20-31/checkpoint-7200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-29-20-31/checkpoint-7400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-29-20-31/checkpoint-7600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-29-20-31/checkpoint-7800/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-29-20-31/checkpoint-8000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-29-20-31/checkpoint-8200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-29-20-31/checkpoint-8400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-29-20-31/checkpoint-8600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-29-20-31/checkpoint-8800/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-29-20-31/checkpoint-9000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-29-20-31/checkpoint-9200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-29-20-31/checkpoint-9400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-29-20-31/checkpoint-9600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-29-20-31/checkpoint-9800/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-29-20-31/monitoring_logs/label_analysis_20260129_203222.log filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-16-41/checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-16-41/checkpoint-400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-16-41/monitoring_logs/label_analysis_20260130_164253.log filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-1000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-1200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-1400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-1600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-1800/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-2000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-2200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-2400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-2600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-2800/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-3000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-3200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-3400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-3600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-3800/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-4000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-4200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-4400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-4600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-4800/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-5000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-5200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-5400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-5600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-5800/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-6000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-6200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-6400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-6600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-6800/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-7000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-7200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-7400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-7600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-7800/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-800/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-8000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-8200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-8400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-8600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/checkpoint-8800/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/monitoring_logs/label_analysis_20260130_205039.log filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-30-20-50/monitoring_logs/label_analysis_20260130_205040.log filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-1000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-1200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-1400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-1600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-1800/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-2000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-2200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-2400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-2600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-2800/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-3000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-3200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-3400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-3600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-3800/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-4000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-4200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-4400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-4600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-4800/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-5000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-5200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-5400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-5600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-5800/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-6000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-6200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-6400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-6600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-6800/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-7000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-7200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-7400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-7600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-7800/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-800/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-8000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-8200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-8400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/checkpoint-8600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-01-31-16-26/monitoring_logs/label_analysis_20260131_163852.log filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-02-01-19-27/checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-02-01-19-27/checkpoint-400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-02-01-19-27/checkpoint-600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+saves/Qwen3-8B/lora/train_2026-02-01-19-27/monitoring_logs/label_analysis_20260201_192731.log filter=lfs diff=lfs merge=lfs -text

.github/CODE_OF_CONDUCT.md ADDED Viewed

	@@ -0,0 +1,128 @@

+# Contributor Covenant Code of Conduct
+## Our Pledge
+We as members, contributors, and leaders pledge to make participation in our
+community a harassment-free experience for everyone, regardless of age, body
+size, visible or invisible disability, ethnicity, sex characteristics, gender
+identity and expression, level of experience, education, socio-economic status,
+nationality, personal appearance, race, religion, or sexual identity
+and orientation.
+We pledge to act and interact in ways that contribute to an open, welcoming,
+diverse, inclusive, and healthy community.
+## Our Standards
+Examples of behavior that contributes to a positive environment for our
+community include:
+* Demonstrating empathy and kindness toward other people
+* Being respectful of differing opinions, viewpoints, and experiences
+* Giving and gracefully accepting constructive feedback
+* Accepting responsibility and apologizing to those affected by our mistakes,
+  and learning from the experience
+* Focusing on what is best not just for us as individuals, but for the
+  overall community
+Examples of unacceptable behavior include:
+* The use of sexualized language or imagery, and sexual attention or
+  advances of any kind
+* Trolling, insulting or derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or email
+  address, without their explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+  professional setting
+## Enforcement Responsibilities
+Community leaders are responsible for clarifying and enforcing our standards of
+acceptable behavior and will take appropriate and fair corrective action in
+response to any behavior that they deem inappropriate, threatening, offensive,
+or harmful.
+Community leaders have the right and responsibility to remove, edit, or reject
+comments, commits, code, wiki edits, issues, and other contributions that are
+not aligned to this Code of Conduct, and will communicate reasons for moderation
+decisions when appropriate.
+## Scope
+This Code of Conduct applies within all community spaces, and also applies when
+an individual is officially representing the community in public spaces.
+Examples of representing our community include using an official e-mail address,
+posting via an official social media account, or acting as an appointed
+representative at an online or offline event.
+## Enforcement
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported to the community leaders responsible for enforcement at
+`hoshihiyouga AT gmail DOT com`.
+All complaints will be reviewed and investigated promptly and fairly.
+All community leaders are obligated to respect the privacy and security of the
+reporter of any incident.
+## Enforcement Guidelines
+Community leaders will follow these Community Impact Guidelines in determining
+the consequences for any action they deem in violation of this Code of Conduct:
+### 1. Correction
+**Community Impact**: Use of inappropriate language or other behavior deemed
+unprofessional or unwelcome in the community.
+**Consequence**: A private, written warning from community leaders, providing
+clarity around the nature of the violation and an explanation of why the
+behavior was inappropriate. A public apology may be requested.
+### 2. Warning
+**Community Impact**: A violation through a single incident or series
+of actions.
+**Consequence**: A warning with consequences for continued behavior. No
+interaction with the people involved, including unsolicited interaction with
+those enforcing the Code of Conduct, for a specified period of time. This
+includes avoiding interactions in community spaces as well as external channels
+like social media. Violating these terms may lead to a temporary or
+permanent ban.
+### 3. Temporary Ban
+**Community Impact**: A serious violation of community standards, including
+sustained inappropriate behavior.
+**Consequence**: A temporary ban from any sort of interaction or public
+communication with the community for a specified period of time. No public or
+private interaction with the people involved, including unsolicited interaction
+with those enforcing the Code of Conduct, is allowed during this period.
+Violating these terms may lead to a permanent ban.
+### 4. Permanent Ban
+**Community Impact**: Demonstrating a pattern of violation of community
+standards, including sustained inappropriate behavior,  harassment of an
+individual, or aggression toward or disparagement of classes of individuals.
+**Consequence**: A permanent ban from any sort of public interaction within
+the community.
+## Attribution
+This Code of Conduct is adapted from the [Contributor Covenant][homepage],
+version 2.0, available at
+https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
+Community Impact Guidelines were inspired by [Mozilla's code of conduct
+enforcement ladder](https://github.com/mozilla/diversity).
+[homepage]: https://www.contributor-covenant.org
+For answers to common questions about this code of conduct, see the FAQ at
+https://www.contributor-covenant.org/faq. Translations are available at
+https://www.contributor-covenant.org/translations.

.github/CONTRIBUTING.md ADDED Viewed

	@@ -0,0 +1,67 @@

+# Contributing to LLaMA Factory
+Everyone is welcome to contribute, and we value everybody's contribution. Code contributions are not the only way to help the community. Answering questions, helping others, and improving the documentation are also immensely valuable.
+It also helps us if you spread the word! Reference the library in blog posts about the awesome projects it made possible, shout out on Twitter every time it has helped you, or simply ⭐️ the repository to say thank you.
+However you choose to contribute, please be mindful and respect our [code of conduct](CODE_OF_CONDUCT.md).
+**This guide was heavily inspired by [transformers guide to contributing](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md).**
+## Ways to contribute
+There are several ways you can contribute to LLaMA Factory:
+* Fix outstanding issues with the existing code.
+* Submit issues related to bugs or desired new features.
+* Contribute to the examples or to the documentation.
+### Style guide
+LLaMA Factory follows the [Google Python Style Guide](https://google.github.io/styleguide/pyguide.html), check it for details.
+### Create a Pull Request
+1. Fork the [repository](https://github.com/hiyouga/LLaMA-Factory) by clicking on the [Fork](https://github.com/hiyouga/LLaMA-Factory/fork) button on the repository's page. This creates a copy of the code under your GitHub user account.
+2. Clone your fork to your local disk, and add the base repository as a remote:
+```bash
+git clone git@github.com:[username]/LLaMA-Factory.git
+cd LLaMA-Factory
+git remote add upstream https://github.com/hiyouga/LLaMA-Factory.git
+```
+3. Create a new branch to hold your development changes:
+```bash
+git checkout -b dev_your_branch
+```
+4. Set up a development environment by running the following command in a virtual environment:
+```bash
+pip install -e ".[dev]"
+```
+If LLaMA Factory was already installed in the virtual environment, remove it with `pip uninstall llamafactory` before reinstalling it in editable mode with the -e flag.
+5. Check code before commit:
+```bash
+make commit
+make style && make quality
+make test
+```
+6. Submit changes:
+```bash
+git add .
+git commit -m "commit message"
+git fetch upstream
+git rebase upstream/main
+git push -u origin dev_your_branch
+```
+7. Create a merge request from your branch `dev_your_branch` at [origin repo](https://github.com/hiyouga/LLaMA-Factory).

.github/ISSUE_TEMPLATE/1-bug-report.yml ADDED Viewed

	@@ -0,0 +1,61 @@

+name: "\U0001F41B Bug / help"
+description: Create a report to help us improve the LLaMA Factory
+labels: ["bug", "pending"]
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Issues included in **[FAQs](https://github.com/hiyouga/LLaMA-Factory/issues/4614)** or those with **insufficient** information may be closed without a response.
+        已经包含在 **[常见问题](https://github.com/hiyouga/LLaMA-Factory/issues/4614)** 内或提供信息**不完整**的 issues 可能不会被回复。
+  - type: markdown
+    attributes:
+      value: |
+        Please do not create issues that are not related to framework bugs under this category, use **[Discussions](https://github.com/hiyouga/LLaMA-Factory/discussions/categories/q-a)** instead.
+        请勿在此分类下创建和框架 bug 无关的 issues，训练问题求助请使用 **[讨论区](https://github.com/hiyouga/LLaMA-Factory/discussions/categories/q-a)**。
+  - type: checkboxes
+    id: reminder
+    attributes:
+      label: Reminder
+      description: |
+        Please ensure you have read the above rules carefully and searched the existing issues (including FAQs).
+        请确保您已经认真阅读了上述规则并且搜索过现有的 issues（包括常见问题）。
+      options:
+        - label: I have read the above rules and searched the existing issues.
+          required: true
+  - type: textarea
+    id: system-info
+    validations:
+      required: true
+    attributes:
+      label: System Info
+      description: |
+        Please share your system info with us. You can run the command **llamafactory-cli env** and copy-paste its output below.
+        请提供您的系统信息。您可以在命令行运行 **llamafactory-cli env** 并将其输出复制到该文本框中。
+      placeholder: llamafactory version, platform, python version, ...
+  - type: textarea
+    id: reproduction
+    validations:
+      required: true
+    attributes:
+      label: Reproduction
+      description: |
+        Please provide entry arguments, error messages and stack traces that reproduces the problem.
+        请提供入口参数，错误日志以及异常堆栈以便于我们复现问题。
+      value: |
+        ```text
+        Put your message here.
+        ```
+  - type: textarea
+    id: others
+    validations:
+      required: false
+    attributes:
+      label: Others

.github/ISSUE_TEMPLATE/2-feature-request.yml ADDED Viewed

	@@ -0,0 +1,41 @@

+name: "\U0001F680 Feature request"
+description: Submit a request for a new feature
+labels: ["enhancement", "pending"]
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Please do not create issues that are not related to new features under this category.
+        请勿在此分类下创建和新特性无关的 issues。
+  - type: checkboxes
+    id: reminder
+    attributes:
+      label: Reminder
+      description: |
+        Please ensure you have read the above rules carefully and searched the existing issues.
+        请确保您已经认真阅读了上述规则并且搜索过现有的 issues。
+      options:
+        - label: I have read the above rules and searched the existing issues.
+          required: true
+  - type: textarea
+    id: description
+    validations:
+      required: true
+    attributes:
+      label: Description
+      description: |
+        A clear and concise description of the feature proposal.
+        请详细描述您希望加入的新功能特性。
+  - type: textarea
+    id: contribution
+    validations:
+      required: false
+    attributes:
+      label: Pull Request
+      description: |
+        Have you already created the relevant PR and submitted the code?
+        您是否已经创建了相关 PR 并提交了代码？

.github/ISSUE_TEMPLATE/config.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+blank_issues_enabled: false
+contact_links:
+  - name: 📚 FAQs | 常见问题
+    url: https://github.com/hiyouga/LLaMA-Factory/issues/4614
+    about: Reading in advance is recommended | 建议提前阅读
+  - name: Discussions | 讨论区
+    url: https://github.com/hiyouga/LLaMA-Factory/discussions
+    about: Please ask fine-tuning questions here | 请在这里讨论训练问题

.github/PULL_REQUEST_TEMPLATE.md ADDED Viewed

	@@ -0,0 +1,8 @@

+# What does this PR do?
+Fixes # (issue)
+## Before submitting
+- [ ] Did you read the [contributor guideline](https://github.com/hiyouga/LLaMA-Factory/blob/main/.github/CONTRIBUTING.md)?
+- [ ] Did you write any new necessary tests?

.github/SECURITY.md ADDED Viewed

	@@ -0,0 +1,7 @@

+# Reporting Security Issues
+To report a security issue, please use the GitHub Security Advisory ["Report a Vulnerability"](https://github.com/hiyouga/LLaMA-Factory/security/advisories/new) tab.
+We will send a response indicating the next steps in handling your report. After the initial reply to your report, the security team will keep you informed of the progress towards a fix and full announcement, and may ask for additional information or guidance.
+Report security bugs in third-party modules to the person or team maintaining the module.

.github/workflows/docker.yml ADDED Viewed

	@@ -0,0 +1,108 @@

+name: docker
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - "main"
+    paths:
+      - "**/*.py"
+      - "requirements.txt"
+      - "docker/**"
+      - ".github/workflows/*.yml"
+  pull_request:
+    branches:
+      - "main"
+    paths:
+      - "**/*.py"
+      - "requirements.txt"
+      - "docker/**"
+      - ".github/workflows/*.yml"
+jobs:
+  build:
+    strategy:
+      fail-fast: false
+      matrix:
+        device:
+          - "cuda"
+          - "npu"
+    runs-on: ubuntu-latest
+    concurrency:
+      group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.device }}
+      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+    environment:
+      name: docker
+      url: https://hub.docker.com/r/hiyouga/llamafactory
+    steps:
+      - name: Free up disk space
+        uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
+        with:
+          tool-cache: true
+          docker-images: false
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.9"
+      - name: Get llamafactory version
+        id: version
+        run: |
+          echo "tag=$(python setup.py --version | sed 's/\.dev0//')" >> "$GITHUB_OUTPUT"
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Login to Docker Hub
+        if: ${{ github.event_name != 'pull_request' }}
+        uses: docker/login-action@v3
+        with:
+          username: ${{ vars.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+      - name: Login to Quay
+        if: ${{ github.event_name != 'pull_request' && matrix.device == 'npu' }}
+        uses: docker/login-action@v3
+        with:
+          registry: quay.io
+          username: ${{ vars.QUAY_ASCEND_USERNAME }}
+          password: ${{ secrets.QUAY_ASCEND_TOKEN }}
+      - name: Build and push Docker image (CUDA)
+        if: ${{ matrix.device == 'cuda' }}
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          file: ./docker/docker-cuda/Dockerfile
+          build-args: |
+            EXTRAS=metrics,deepspeed,liger-kernel
+          push: ${{ github.event_name != 'pull_request' }}
+          tags: |
+            docker.io/hiyouga/llamafactory:latest
+            docker.io/hiyouga/llamafactory:${{ steps.version.outputs.tag }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+      - name: Build and push Docker image (NPU)
+        if: ${{ matrix.device == 'npu' }}
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          platforms: linux/amd64,linux/arm64
+          file: ./docker/docker-npu/Dockerfile
+          push: ${{ github.event_name != 'pull_request' }}
+          tags: |
+            docker.io/hiyouga/llamafactory:latest-npu-a2
+            docker.io/hiyouga/llamafactory:${{ steps.version.outputs.tag }}-npu-a2
+            quay.io/ascend/llamafactory:latest-npu-a2
+            quay.io/ascend/llamafactory:${{ steps.version.outputs.tag }}-npu-a2
+          cache-from: type=gha
+          cache-to: type=gha,mode=max

.github/workflows/label_issue.yml ADDED Viewed

	@@ -0,0 +1,32 @@

+name: label_issue
+on:
+  issues:
+    types:
+      - opened
+jobs:
+  label_issue:
+    runs-on: ubuntu-latest
+    permissions:
+      issues: write
+    steps:
+      - env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          ISSUE_URL: ${{ github.event.issue.html_url }}
+          ISSUE_TITLE: ${{ github.event.issue.title }}
+        run: |
+          LABEL=""
+          NPU_KEYWORDS=(npu huawei ascend 华为 昇腾 910)
+          ISSUE_TITLE_LOWER=$(echo $ISSUE_TITLE | tr '[:upper:]' '[:lower:]')
+          for KEYWORD in ${NPU_KEYWORDS[@]}; do
+            if [[ $ISSUE_TITLE_LOWER == *$KEYWORD* ]] && [[ $ISSUE_TITLE_LOWER != *input* ]]; then
+              LABEL="npu"
+              break
+            fi
+          done
+          if [ -n "$LABEL" ]; then
+            gh issue edit $ISSUE_URL --add-label $LABEL
+          fi

.github/workflows/publish.yml ADDED Viewed

	@@ -0,0 +1,36 @@

+name: publish
+on:
+  workflow_dispatch:
+  release:
+    types:
+      - published
+jobs:
+  publish:
+    name: Upload release to PyPI
+    runs-on: ubuntu-latest
+    environment:
+      name: release
+      url: https://pypi.org/p/llamafactory
+    permissions:
+      id-token: write
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.9"
+      - name: Build package
+        run: |
+          make build
+      - name: Publish package
+        uses: pypa/gh-action-pypi-publish@release/v1

.github/workflows/tests.yml ADDED Viewed

	@@ -0,0 +1,99 @@

+name: tests
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - "main"
+    paths:
+      - "**/*.py"
+      - "requirements.txt"
+      - ".github/workflows/*.yml"
+  pull_request:
+    branches:
+      - "main"
+    paths:
+      - "**/*.py"
+      - "requirements.txt"
+      - ".github/workflows/*.yml"
+jobs:
+  tests:
+    strategy:
+      fail-fast: false
+      matrix:
+        python:
+          - "3.9"
+          - "3.10"
+          - "3.11"
+          - "3.12"
+        os:
+          - "ubuntu-latest"
+          - "windows-latest"
+          - "macos-13"
+        transformers:
+          - null
+        include:  # test backward compatibility
+          - python: "3.9"
+            os: "ubuntu-latest"
+            transformers: "4.49.0"
+          - python: "3.9"
+            os: "ubuntu-latest"
+            transformers: "4.51.0"
+    runs-on: ${{ matrix.os }}
+    concurrency:
+      group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os }}-${{ matrix.python }}-${{ matrix.transformers }}
+      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+    env:
+      HF_TOKEN: ${{ secrets.HF_TOKEN }}
+      OS_NAME: ${{ matrix.os }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python }}
+          cache: "pip"
+          cache-dependency-path: "**/requirements*.txt"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install ".[torch,dev]"
+      - name: Install transformers
+        if: ${{ matrix.transformers }}
+        run: |
+          python -m pip install "transformers==${{ matrix.transformers }}"
+      - name: Cache files
+        id: hf-hub-cache
+        uses: actions/cache@v4
+        with:
+          path: ${{ runner.temp }}/huggingface
+          key: huggingface-${{ matrix.os }}-${{ matrix.python }}-${{ matrix.transformers }}-${{ hashFiles('tests/version.txt') }}
+      - name: Check quality
+        run: |
+          make style && make quality
+      - name: Check license
+        run: |
+          make license
+      - name: Check build
+        run: |
+          make build
+      - name: Test with pytest
+        run: |
+          make test
+        env:
+          HF_HOME: ${{ runner.temp }}/huggingface
+          HF_HUB_OFFLINE: "${{ steps.hf-hub-cache.outputs.cache-hit == 'true' && '1' || '0' }}"

.pre-commit-config.yaml ADDED Viewed

	@@ -0,0 +1,28 @@

+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+    -   id: check-ast
+    -   id: check-added-large-files
+        args: ['--maxkb=25000']
+    -   id: check-merge-conflict
+    -   id: check-yaml
+    -   id: debug-statements
+    -   id: end-of-file-fixer
+    -   id: trailing-whitespace
+        args: [--markdown-linebreak-ext=md]
+    -   id: no-commit-to-branch
+        args: ['--branch', 'main']
+-   repo: https://github.com/asottile/pyupgrade
+    rev: v3.17.0
+    hooks:
+    -   id: pyupgrade
+        args: [--py38-plus]
+-   repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.6.9
+    hooks:
+    -   id: ruff
+        args: [--fix]
+    -   id: ruff-format

=0.10.0, ADDED Viewed

File without changes

CITATION.cff ADDED Viewed

	@@ -0,0 +1,44 @@

+cff-version: 1.2.0
+date-released: 2024-03
+message: "If you use this software, please cite it as below."
+authors:
+- family-names: "Zheng"
+  given-names: "Yaowei"
+- family-names: "Zhang"
+  given-names: "Richong"
+- family-names: "Zhang"
+  given-names: "Junhao"
+- family-names: "Ye"
+  given-names: "Yanhan"
+- family-names: "Luo"
+  given-names: "Zheyan"
+- family-names: "Feng"
+  given-names: "Zhangchi"
+- family-names: "Ma"
+  given-names: "Yongqiang"
+title: "LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models"
+url: "https://arxiv.org/abs/2403.13372"
+preferred-citation:
+  type: conference-paper
+  conference:
+    name: "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)"
+  authors:
+    - family-names: "Zheng"
+      given-names: "Yaowei"
+    - family-names: "Zhang"
+      given-names: "Richong"
+    - family-names: "Zhang"
+      given-names: "Junhao"
+    - family-names: "Ye"
+      given-names: "Yanhan"
+    - family-names: "Luo"
+      given-names: "Zheyan"
+    - family-names: "Feng"
+      given-names: "Zhangchi"
+    - family-names: "Ma"
+      given-names: "Yongqiang"
+  title: "LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models"
+  url: "https://arxiv.org/abs/2403.13372"
+  year: 2024
+  publisher: "Association for Computational Linguistics"
+  address: "Bangkok, Thailand"

LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

MANIFEST.in ADDED Viewed

	@@ -0,0 +1 @@


1	+ include LICENSE requirements.txt

Makefile ADDED Viewed

	@@ -0,0 +1,24 @@

+.PHONY: build commit license quality style test
+check_dirs := scripts src tests setup.py
+build:
+	pip3 install build && python3 -m build
+commit:
+	pre-commit install
+	pre-commit run --all-files
+license:
+	python3 tests/check_license.py $(check_dirs)
+quality:
+	ruff check $(check_dirs)
+	ruff format --check $(check_dirs)
+style:
+	ruff check $(check_dirs) --fix
+	ruff format $(check_dirs)
+test:
+	CUDA_VISIBLE_DEVICES= WANDB_DISABLED=true pytest -vv tests/

README.md ADDED Viewed

	@@ -0,0 +1,948 @@

+![# LLaMA Factory](assets/logo.png)
+[![GitHub Repo stars](https://img.shields.io/github/stars/hiyouga/LLaMA-Factory?style=social)](https://github.com/hiyouga/LLaMA-Factory/stargazers)
+[![GitHub last commit](https://img.shields.io/github/last-commit/hiyouga/LLaMA-Factory)](https://github.com/hiyouga/LLaMA-Factory/commits/main)
+[![GitHub contributors](https://img.shields.io/github/contributors/hiyouga/LLaMA-Factory?color=orange)](https://github.com/hiyouga/LLaMA-Factory/graphs/contributors)
+[![GitHub workflow](https://github.com/hiyouga/LLaMA-Factory/actions/workflows/tests.yml/badge.svg)](https://github.com/hiyouga/LLaMA-Factory/actions/workflows/tests.yml)
+[![PyPI](https://img.shields.io/pypi/v/llamafactory)](https://pypi.org/project/llamafactory/)
+[![Citation](https://img.shields.io/badge/citation-651-green)](https://scholar.google.com/scholar?cites=12620864006390196564)
+[![Docker Pulls](https://img.shields.io/docker/pulls/hiyouga/llamafactory)](https://hub.docker.com/r/hiyouga/llamafactory/tags)
+[![Twitter](https://img.shields.io/twitter/follow/llamafactory_ai)](https://twitter.com/llamafactory_ai)
+[![Discord](https://dcbadge.vercel.app/api/server/rKfvV9r9FK?compact=true&style=flat)](https://discord.gg/rKfvV9r9FK)
+[![GitCode](https://gitcode.com/zhengyaowei/LLaMA-Factory/star/badge.svg)](https://gitcode.com/zhengyaowei/LLaMA-Factory)
+[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1eRTPn37ltBbYsISy9Aw2NuI2Aq5CQrD9?usp=sharing)
+[![Open in DSW](https://gallery.pai-ml.com/assets/open-in-dsw.svg)](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory)
+[![Open in Alaya](assets/alaya_new.svg)](https://docs.alayanew.com/docs/documents/newActivities/llamafactory/?utm_source=LLaMA-Factory)
+[![Open in Spaces](https://img.shields.io/badge/🤗-Open%20in%20Spaces-blue)](https://huggingface.co/spaces/hiyouga/LLaMA-Board)
+[![Open in Studios](https://img.shields.io/badge/ModelScope-Open%20in%20Studios-blue)](https://modelscope.cn/studios/hiyouga/LLaMA-Board)
+[![Open in Novita](https://img.shields.io/badge/Novita-Deploy%20Template-blue)](https://novita.ai/templates-library/105981?sharer=88115474-394e-4bda-968e-b88e123d0c47)
+### Used by [Amazon](https://aws.amazon.com/cn/blogs/machine-learning/how-apoidea-group-enhances-visual-information-extraction-from-banking-documents-with-multimodal-models-using-llama-factory-on-amazon-sagemaker-hyperpod/), [NVIDIA](https://developer.nvidia.com/rtx/ai-toolkit), [Aliyun](https://help.aliyun.com/zh/pai/use-cases/fine-tune-a-llama-3-model-with-llama-factory), etc.
+<div align="center" markdown="1">
+### Supporters ❤️
+<a href="https://warp.dev/llama-factory">
+    <img alt="Warp sponsorship" width="400" src="https://github.com/user-attachments/assets/ab8dd143-b0fd-4904-bdc5-dd7ecac94eae">
+</a>
+#### [Warp, the agentic terminal for developers](https://warp.dev/llama-factory)
+[Available for MacOS, Linux, & Windows](https://warp.dev/llama-factory)
+----
+### Easily fine-tune 100+ large language models with zero-code [CLI](#quickstart) and [Web UI](#fine-tuning-with-llama-board-gui-powered-by-gradio)
+![GitHub Trend](https://trendshift.io/api/badge/repositories/4535)
+</div>
+👋 Join our [WeChat group](assets/wechat.jpg), [NPU user group](assets/wechat_npu.jpg) or [Alaya NeW user group](assets/wechat_alaya.png).
+\[ English | [中文](README_zh.md) \]
+**Fine-tuning a large language model can be easy as...**
+https://github.com/user-attachments/assets/3991a3a8-4276-4d30-9cab-4cb0c4b9b99e
+Choose your path:
+- **Documentation (WIP)**: https://llamafactory.readthedocs.io/en/latest/
+- **Documentation (AMD GPU)**: https://rocm.docs.amd.com/projects/ai-developer-hub/en/latest/notebooks/fine_tune/llama_factory_llama3.html
+- **Colab (free)**: https://colab.research.google.com/drive/1eRTPn37ltBbYsISy9Aw2NuI2Aq5CQrD9?usp=sharing
+- **Local machine**: Please refer to [usage](#getting-started)
+- **PAI-DSW (free trial)**: https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory
+- **Alaya NeW (cloud GPU deal)**: https://docs.alayanew.com/docs/documents/useGuide/LLaMAFactory/mutiple/?utm_source=LLaMA-Factory
+> [!NOTE]
+> Except for the above links, all other websites are unauthorized third-party websites. Please carefully use them.
+## Table of Contents
+- [Features](#features)
+- [Blogs](#blogs)
+- [Changelog](#changelog)
+- [Supported Models](#supported-models)
+- [Supported Training Approaches](#supported-training-approaches)
+- [Provided Datasets](#provided-datasets)
+- [Requirement](#requirement)
+- [Getting Started](#getting-started)
+  - [Installation](#installation)
+  - [Data Preparation](#data-preparation)
+  - [Quickstart](#quickstart)
+  - [Fine-Tuning with LLaMA Board GUI](#fine-tuning-with-llama-board-gui-powered-by-gradio)
+  - [Build Docker](#build-docker)
+  - [Deploy with OpenAI-style API and vLLM](#deploy-with-openai-style-api-and-vllm)
+  - [Download from ModelScope Hub](#download-from-modelscope-hub)
+  - [Download from Modelers Hub](#download-from-modelers-hub)
+  - [Use W&B Logger](#use-wb-logger)
+  - [Use SwanLab Logger](#use-swanlab-logger)
+- [Projects using LLaMA Factory](#projects-using-llama-factory)
+- [License](#license)
+- [Citation](#citation)
+- [Acknowledgement](#acknowledgement)
+## Features
+- **Various models**: LLaMA, LLaVA, Mistral, Mixtral-MoE, Qwen, Qwen2-VL, DeepSeek, Yi, Gemma, ChatGLM, Phi, etc.
+- **Integrated methods**: (Continuous) pre-training, (multimodal) supervised fine-tuning, reward modeling, PPO, DPO, KTO, ORPO, etc.
+- **Scalable resources**: 16-bit full-tuning, freeze-tuning, LoRA and 2/3/4/5/6/8-bit QLoRA via AQLM/AWQ/GPTQ/LLM.int8/HQQ/EETQ.
+- **Advanced algorithms**: [GaLore](https://github.com/jiaweizzhao/GaLore), [BAdam](https://github.com/Ledzy/BAdam), [APOLLO](https://github.com/zhuhanqing/APOLLO), [Adam-mini](https://github.com/zyushun/Adam-mini), [Muon](https://github.com/KellerJordan/Muon), DoRA, LongLoRA, LLaMA Pro, Mixture-of-Depths, LoRA+, LoftQ and PiSSA.
+- **Practical tricks**: [FlashAttention-2](https://github.com/Dao-AILab/flash-attention), [Unsloth](https://github.com/unslothai/unsloth), [Liger Kernel](https://github.com/linkedin/Liger-Kernel), RoPE scaling, NEFTune and rsLoRA.
+- **Wide tasks**: Multi-turn dialogue, tool using, image understanding, visual grounding, video recognition, audio understanding, etc.
+- **Experiment monitors**: LlamaBoard, TensorBoard, Wandb, MLflow, [SwanLab](https://github.com/SwanHubX/SwanLab), etc.
+- **Faster inference**: OpenAI-style API, Gradio UI and CLI with [vLLM worker](https://github.com/vllm-project/vllm) or [SGLang worker](https://github.com/sgl-project/sglang).
+### Day-N Support for Fine-Tuning Cutting-Edge Models
+| Support Date | Model Name                                                           |
+| ------------ | -------------------------------------------------------------------- |
+| Day 0        | Qwen3 / Qwen2.5-VL / Gemma 3 / GLM-4.1V / InternLM 3 / MiniCPM-o-2.6 |
+| Day 1        | Llama 3 / GLM-4 / Mistral Small / PaliGemma2 / Llama 4               |
+## Blogs
+- [A One-Stop Code-Free Model Reinforcement Learning and Deployment Platform based on LLaMA-Factory and EasyR1](https://aws.amazon.com/cn/blogs/china/building-llm-model-hub-based-on-llamafactory-and-easyr1/) (Chinese)
+- [Fine-tune Qwen2.5-VL for Autonomous Driving using LLaMA-Factory](https://docs.alayanew.com/docs/documents/useGuide/LLaMAFactory/mutiple/?utm_source=LLaMA-Factory) (Chinese)
+- [How Apoidea Group enhances visual information extraction from banking documents with multimodal models using LLaMA-Factory on Amazon SageMaker HyperPod](https://aws.amazon.com/cn/blogs/machine-learning/how-apoidea-group-enhances-visual-information-extraction-from-banking-documents-with-multimodal-models-using-llama-factory-on-amazon-sagemaker-hyperpod/) (English)
+- [Easy Dataset × LLaMA Factory: Enabling LLMs to Efficiently Learn Domain Knowledge](https://buaa-act.feishu.cn/wiki/GVzlwYcRFiR8OLkHbL6cQpYin7g) (English)
+<details><summary>All Blogs</summary>
+- [LLaMA Factory: Fine-tuning the DeepSeek-R1-Distill-Qwen-7B Model for News Classifier](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory_deepseek_r1_distill_7b) (Chinese)
+- [A One-Stop Code-Free Model Fine-Tuning \& Deployment Platform based on SageMaker and LLaMA-Factory](https://aws.amazon.com/cn/blogs/china/a-one-stop-code-free-model-fine-tuning-deployment-platform-based-on-sagemaker-and-llama-factory/) (Chinese)
+- [LLaMA Factory Multi-Modal Fine-Tuning Practice: Fine-Tuning Qwen2-VL for Personal Tourist Guide](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory_qwen2vl) (Chinese)
+- [LLaMA Factory: Fine-tuning the LLaMA3 Model for Role-Playing](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory) (Chinese)
+</details>
+## Changelog
+[25/07/02] We supported fine-tuning the **[GLM-4.1V-9B-Thinking](https://github.com/THUDM/GLM-4.1V-Thinking)** model. Please install transformers from **main** branch to use.
+[25/04/28] We supported fine-tuning the **[Qwen3](https://qwenlm.github.io/blog/qwen3/)** model family.
+[25/04/21] We supported the **[Muon](https://github.com/KellerJordan/Muon)** optimizer. See [examples](examples/README.md) for usage. Thank [@tianshijing](https://github.com/tianshijing)'s PR.
+[25/04/16] We supported fine-tuning the **[InternVL3](https://huggingface.co/OpenGVLab/InternVL3-8B)** model. See [PR #7258](https://github.com/hiyouga/LLaMA-Factory/pull/7258) to get started.
+[25/04/14] We supported fine-tuning the **[GLM-Z1](https://huggingface.co/THUDM/GLM-Z1-9B-0414)** and **[Kimi-VL](https://huggingface.co/moonshotai/Kimi-VL-A3B-Instruct)** models.
+[25/04/06] We supported fine-tuning the **[Llama 4](https://ai.meta.com/blog/llama-4-multimodal-intelligence/)** model. See [PR #7611](https://github.com/hiyouga/LLaMA-Factory/pull/7611) to get started.
+<details><summary>Full Changelog</summary>
+[25/03/31] We supported fine-tuning the **[Qwen2.5 Omni](https://qwenlm.github.io/blog/qwen2.5-omni/)** model. See [PR #7537](https://github.com/hiyouga/LLaMA-Factory/pull/7537) to get started.
+[25/03/15] We supported **[SGLang](https://github.com/sgl-project/sglang)** as inference backend. Try `infer_backend: sglang` to accelerate inference.
+[25/03/12] We supported fine-tuning the **[Gemma 3](https://huggingface.co/blog/gemma3)** model.
+[25/02/24] Announcing **[EasyR1](https://github.com/hiyouga/EasyR1)**, an efficient, scalable and multi-modality RL training framework for efficient GRPO training.
+[25/02/11] We supported saving the **[Ollama](https://github.com/ollama/ollama)** modelfile when exporting the model checkpoints. See [examples](examples/README.md) for usage.
+[25/02/05] We supported fine-tuning the **[Qwen2-Audio](Qwen/Qwen2-Audio-7B-Instruct)** and **[MiniCPM-o-2.6](https://huggingface.co/openbmb/MiniCPM-o-2_6)** on audio understanding tasks.
+[25/01/31] We supported fine-tuning the **[DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1)** and **[Qwen2.5-VL](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct)** models.
+[25/01/15] We supported **[APOLLO](https://arxiv.org/abs/2412.05270)** optimizer. See [examples](examples/README.md) for usage.
+[25/01/14] We supported fine-tuning the **[MiniCPM-o-2.6](https://huggingface.co/openbmb/MiniCPM-o-2_6)** and **[MiniCPM-V-2.6](https://huggingface.co/openbmb/MiniCPM-V-2_6)** models. Thank [@BUAADreamer](https://github.com/BUAADreamer)'s PR.
+[25/01/14] We supported fine-tuning the **[InternLM 3](https://huggingface.co/collections/internlm/)** models. Thank [@hhaAndroid](https://github.com/hhaAndroid)'s PR.
+[25/01/10] We supported fine-tuning the **[Phi-4](https://huggingface.co/microsoft/phi-4)** model.
+[24/12/21] We supported using **[SwanLab](https://github.com/SwanHubX/SwanLab)** for experiment tracking and visualization. See [this section](#use-swanlab-logger) for details.
+[24/11/27] We supported fine-tuning the **[Skywork-o1](https://huggingface.co/Skywork/Skywork-o1-Open-Llama-3.1-8B)** model and the **[OpenO1](https://huggingface.co/datasets/O1-OPEN/OpenO1-SFT)** dataset.
+[24/10/09] We supported downloading pre-trained models and datasets from the **[Modelers Hub](https://modelers.cn/models)**. See [this tutorial](#download-from-modelers-hub) for usage.
+[24/09/19] We supported fine-tuning the **[Qwen2.5](https://qwenlm.github.io/blog/qwen2.5/)** models.
+[24/08/30] We supported fine-tuning the **[Qwen2-VL](https://qwenlm.github.io/blog/qwen2-vl/)** models. Thank [@simonJJJ](https://github.com/simonJJJ)'s PR.
+[24/08/27] We supported **[Liger Kernel](https://github.com/linkedin/Liger-Kernel)**. Try `enable_liger_kernel: true` for efficient training.
+[24/08/09] We supported **[Adam-mini](https://github.com/zyushun/Adam-mini)** optimizer. See [examples](examples/README.md) for usage. Thank [@relic-yuexi](https://github.com/relic-yuexi)'s PR.
+[24/07/04] We supported [contamination-free packed training](https://github.com/MeetKai/functionary/tree/main/functionary/train/packing). Use `neat_packing: true` to activate it. Thank [@chuan298](https://github.com/chuan298)'s PR.
+[24/06/16] We supported **[PiSSA](https://arxiv.org/abs/2404.02948)** algorithm. See [examples](examples/README.md) for usage.
+[24/06/07] We supported fine-tuning the **[Qwen2](https://qwenlm.github.io/blog/qwen2/)** and **[GLM-4](https://github.com/THUDM/GLM-4)** models.
+[24/05/26] We supported **[SimPO](https://arxiv.org/abs/2405.14734)** algorithm for preference learning. See [examples](examples/README.md) for usage.
+[24/05/20] We supported fine-tuning the **PaliGemma** series models. Note that the PaliGemma models are pre-trained models, you need to fine-tune them with `paligemma` template for chat completion.
+[24/05/18] We supported **[KTO](https://arxiv.org/abs/2402.01306)** algorithm for preference learning. See [examples](examples/README.md) for usage.
+[24/05/14] We supported training and inference on the Ascend NPU devices. Check [installation](#installation) section for details.
+[24/04/26] We supported fine-tuning the **LLaVA-1.5** multimodal LLMs. See [examples](examples/README.md) for usage.
+[24/04/22] We provided a **[Colab notebook](https://colab.research.google.com/drive/1eRTPn37ltBbYsISy9Aw2NuI2Aq5CQrD9?usp=sharing)** for fine-tuning the Llama-3 model on a free T4 GPU. Two Llama-3-derived models fine-tuned using LLaMA Factory are available at Hugging Face, check [Llama3-8B-Chinese-Chat](https://huggingface.co/shenzhi-wang/Llama3-8B-Chinese-Chat) and [Llama3-Chinese](https://huggingface.co/zhichen/Llama3-Chinese) for details.
+[24/04/21] We supported **[Mixture-of-Depths](https://arxiv.org/abs/2404.02258)** according to [AstraMindAI's implementation](https://github.com/astramind-ai/Mixture-of-depths). See [examples](examples/README.md) for usage.
+[24/04/16] We supported **[BAdam](https://arxiv.org/abs/2404.02827)** optimizer. See [examples](examples/README.md) for usage.
+[24/04/16] We supported **[unsloth](https://github.com/unslothai/unsloth)**'s long-sequence training (Llama-2-7B-56k within 24GB). It achieves **117%** speed and **50%** memory compared with FlashAttention-2, more benchmarks can be found in [this page](https://github.com/hiyouga/LLaMA-Factory/wiki/Performance-comparison).
+[24/03/31] We supported **[ORPO](https://arxiv.org/abs/2403.07691)**. See [examples](examples/README.md) for usage.
+[24/03/21] Our paper "[LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models](https://arxiv.org/abs/2403.13372)" is available at arXiv!
+[24/03/20] We supported **FSDP+QLoRA** that fine-tunes a 70B model on 2x24GB GPUs. See [examples](examples/README.md) for usage.
+[24/03/13] We supported **[LoRA+](https://arxiv.org/abs/2402.12354)**. See [examples](examples/README.md) for usage.
+[24/03/07] We supported **[GaLore](https://arxiv.org/abs/2403.03507)** optimizer. See [examples](examples/README.md) for usage.
+[24/03/07] We integrated **[vLLM](https://github.com/vllm-project/vllm)** for faster and concurrent inference. Try `infer_backend: vllm` to enjoy **270%** inference speed.
+[24/02/28] We supported weight-decomposed LoRA (**[DoRA](https://arxiv.org/abs/2402.09353)**). Try `use_dora: true` to activate DoRA training.
+[24/02/15] We supported **block expansion** proposed by [LLaMA Pro](https://github.com/TencentARC/LLaMA-Pro). See [examples](examples/README.md) for usage.
+[24/02/05] Qwen1.5 (Qwen2 beta version) series models are supported in LLaMA-Factory. Check this [blog post](https://qwenlm.github.io/blog/qwen1.5/) for details.
+[24/01/18] We supported **agent tuning** for most models, equipping model with tool using abilities by fine-tuning with `dataset: glaive_toolcall_en`.
+[23/12/23] We supported **[unsloth](https://github.com/unslothai/unsloth)**'s implementation to boost LoRA tuning for the LLaMA, Mistral and Yi models. Try `use_unsloth: true` argument to activate unsloth patch. It achieves **170%** speed in our benchmark, check [this page](https://github.com/hiyouga/LLaMA-Factory/wiki/Performance-comparison) for details.
+[23/12/12] We supported fine-tuning the latest MoE model **[Mixtral 8x7B](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1)** in our framework. See hardware requirement [here](#hardware-requirement).
+[23/12/01] We supported downloading pre-trained models and datasets from the **[ModelScope Hub](https://modelscope.cn/models)**. See [this tutorial](#download-from-modelscope-hub) for usage.
+[23/10/21] We supported **[NEFTune](https://arxiv.org/abs/2310.05914)** trick for fine-tuning. Try `neftune_noise_alpha: 5` argument to activate NEFTune.
+[23/09/27] We supported **$S^2$-Attn** proposed by [LongLoRA](https://github.com/dvlab-research/LongLoRA) for the LLaMA models. Try `shift_attn: true` argument to enable shift short attention.
+[23/09/23] We integrated MMLU, C-Eval and CMMLU benchmarks in this repo. See [examples](examples/README.md) for usage.
+[23/09/10] We supported **[FlashAttention-2](https://github.com/Dao-AILab/flash-attention)**. Try `flash_attn: fa2` argument to enable FlashAttention-2 if you are using RTX4090, A100 or H100 GPUs.
+[23/08/12] We supported **RoPE scaling** to extend the context length of the LLaMA models. Try `rope_scaling: linear` argument in training and `rope_scaling: dynamic` argument at inference to extrapolate the position embeddings.
+[23/08/11] We supported **[DPO training](https://arxiv.org/abs/2305.18290)** for instruction-tuned models. See [examples](examples/README.md) for usage.
+[23/07/31] We supported **dataset streaming**. Try `streaming: true` and `max_steps: 10000` arguments to load your dataset in streaming mode.
+[23/07/29] We released two instruction-tuned 13B models at Hugging Face. See these Hugging Face Repos ([LLaMA-2](https://huggingface.co/hiyouga/Llama-2-Chinese-13b-chat) / [Baichuan](https://huggingface.co/hiyouga/Baichuan-13B-sft)) for details.
+[23/07/18] We developed an **all-in-one Web UI** for training, evaluation and inference. Try `train_web.py` to fine-tune models in your Web browser. Thank [@KanadeSiina](https://github.com/KanadeSiina) and [@codemayq](https://github.com/codemayq) for their efforts in the development.
+[23/07/09] We released **[FastEdit](https://github.com/hiyouga/FastEdit)** ⚡🩹, an easy-to-use package for editing the factual knowledge of large language models efficiently. Please follow [FastEdit](https://github.com/hiyouga/FastEdit) if you are interested.
+[23/06/29] We provided a **reproducible example** of training a chat model using instruction-following datasets, see [Baichuan-7B-sft](https://huggingface.co/hiyouga/Baichuan-7B-sft) for details.
+[23/06/22] We aligned the [demo API](src/api_demo.py) with the [OpenAI's](https://platform.openai.com/docs/api-reference/chat) format where you can insert the fine-tuned model in **arbitrary ChatGPT-based applications**.
+[23/06/03] We supported quantized training and inference (aka **[QLoRA](https://github.com/artidoro/qlora)**). See [examples](examples/README.md) for usage.
+</details>
+> [!TIP]
+> If you cannot use the latest feature, please pull the latest code and install LLaMA-Factory again.
+## Supported Models
+| Model                                                             | Model size                       | Template            |
+| ----------------------------------------------------------------- | -------------------------------- | ------------------- |
+| [Baichuan 2](https://huggingface.co/baichuan-inc)                 | 7B/13B                           | baichuan2           |
+| [BLOOM/BLOOMZ](https://huggingface.co/bigscience)                 | 560M/1.1B/1.7B/3B/7.1B/176B      | -                   |
+| [ChatGLM3](https://huggingface.co/THUDM)                          | 6B                               | chatglm3            |
+| [Command R](https://huggingface.co/CohereForAI)                   | 35B/104B                         | cohere              |
+| [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai)         | 7B/16B/67B/236B                  | deepseek            |
+| [DeepSeek 2.5/3](https://huggingface.co/deepseek-ai)              | 236B/671B                        | deepseek3           |
+| [DeepSeek R1 (Distill)](https://huggingface.co/deepseek-ai)       | 1.5B/7B/8B/14B/32B/70B/671B      | deepseekr1          |
+| [Falcon](https://huggingface.co/tiiuae)                           | 7B/11B/40B/180B                  | falcon              |
+| [Falcon-H1](https://huggingface.co/tiiuae)                        | 0.5B/1.5B/3B/7B/34B              | falcon_h1           |
+| [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google)          | 2B/7B/9B/27B                     | gemma/gemma2        |
+| [Gemma 3/Gemma 3n](https://huggingface.co/google)                 | 1B/4B/6B/8B/12B/27B              | gemma3/gemma3n      |
+| [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/THUDM)           | 9B/32B                           | glm4/glmz1          |
+| [GLM-4.1V](https://huggingface.co/THUDM)*                         | 9B                               | glm4v               |
+| [GPT-2](https://huggingface.co/openai-community)                  | 0.1B/0.4B/0.8B/1.5B              | -                   |
+| [Granite 3.0-3.3](https://huggingface.co/ibm-granite)             | 1B/2B/3B/8B                      | granite3            |
+| [Hunyuan](https://huggingface.co/tencent/)                        | 7B                               | hunyuan             |
+| [Index](https://huggingface.co/IndexTeam)                         | 1.9B                             | index               |
+| [InternLM 2-3](https://huggingface.co/internlm)                   | 7B/8B/20B                        | intern2             |
+| [InternVL 2.5-3](https://huggingface.co/OpenGVLab)                | 1B/2B/8B/14B/38B/78B             | intern_vl           |
+| [Kimi-VL](https://huggingface.co/moonshotai)                      | 16B                              | kimi_vl             |
+| [Llama](https://github.com/facebookresearch/llama)                | 7B/13B/33B/65B                   | -                   |
+| [Llama 2](https://huggingface.co/meta-llama)                      | 7B/13B/70B                       | llama2              |
+| [Llama 3-3.3](https://huggingface.co/meta-llama)                  | 1B/3B/8B/70B                     | llama3              |
+| [Llama 4](https://huggingface.co/meta-llama)                      | 109B/402B                        | llama4              |
+| [Llama 3.2 Vision](https://huggingface.co/meta-llama)             | 11B/90B                          | mllama              |
+| [LLaVA-1.5](https://huggingface.co/llava-hf)                      | 7B/13B                           | llava               |
+| [LLaVA-NeXT](https://huggingface.co/llava-hf)                     | 7B/8B/13B/34B/72B/110B           | llava_next          |
+| [LLaVA-NeXT-Video](https://huggingface.co/llava-hf)               | 7B/34B                           | llava_next_video    |
+| [MiMo](https://huggingface.co/XiaomiMiMo)                         | 7B                               | mimo                |
+| [MiniCPM](https://huggingface.co/openbmb)                         | 0.5B/1B/2B/4B/8B                 | cpm/cpm3/cpm4       |
+| [MiniCPM-o-2.6/MiniCPM-V-2.6](https://huggingface.co/openbmb)     | 8B                               | minicpm_o/minicpm_v |
+| [Ministral/Mistral-Nemo](https://huggingface.co/mistralai)        | 8B/12B                           | ministral           |
+| [Mistral/Mixtral](https://huggingface.co/mistralai)               | 7B/8x7B/8x22B                    | mistral             |
+| [Mistral Small](https://huggingface.co/mistralai)                 | 24B                              | mistral_small       |
+| [OLMo](https://huggingface.co/allenai)                            | 1B/7B                            | -                   |
+| [PaliGemma/PaliGemma2](https://huggingface.co/google)             | 3B/10B/28B                       | paligemma           |
+| [Phi-1.5/Phi-2](https://huggingface.co/microsoft)                 | 1.3B/2.7B                        | -                   |
+| [Phi-3/Phi-3.5](https://huggingface.co/microsoft)                 | 4B/14B                           | phi                 |
+| [Phi-3-small](https://huggingface.co/microsoft)                   | 7B                               | phi_small           |
+| [Phi-4](https://huggingface.co/microsoft)                         | 14B                              | phi4                |
+| [Pixtral](https://huggingface.co/mistralai)                       | 12B                              | pixtral             |
+| [Qwen (1-2.5) (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen)   | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen                |
+| [Qwen3 (MoE)](https://huggingface.co/Qwen)                        | 0.6B/1.7B/4B/8B/14B/32B/235B     | qwen3               |
+| [Qwen2-Audio](https://huggingface.co/Qwen)                        | 7B                               | qwen2_audio         |
+| [Qwen2.5-Omni](https://huggingface.co/Qwen)                       | 3B/7B                            | qwen2_omni          |
+| [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen)            | 2B/3B/7B/32B/72B                 | qwen2_vl            |
+| [Seed Coder](https://huggingface.co/ByteDance-Seed)               | 8B                               | seed_coder          |
+| [Skywork o1](https://huggingface.co/Skywork)                      | 8B                               | skywork_o1          |
+| [StarCoder 2](https://huggingface.co/bigcode)                     | 3B/7B/15B                        | -                   |
+| [TeleChat2](https://huggingface.co/Tele-AI)                       | 3B/7B/35B/115B                   | telechat2           |
+| [XVERSE](https://huggingface.co/xverse)                           | 7B/13B/65B                       | xverse              |
+| [Yi/Yi-1.5 (Code)](https://huggingface.co/01-ai)                  | 1.5B/6B/9B/34B                   | yi                  |
+| [Yi-VL](https://huggingface.co/01-ai)                             | 6B/34B                           | yi_vl               |
+| [Yuan 2](https://huggingface.co/IEITYuan)                         | 2B/51B/102B                      | yuan                |
+> [!NOTE]
+> For the "base" models, the `template` argument can be chosen from `default`, `alpaca`, `vicuna` etc. But make sure to use the **corresponding template** for the "instruct/chat" models.
+>
+> Remember to use the **SAME** template in training and inference.
+>
+> \*: You should install the `transformers` from main branch and use `DISABLE_VERSION_CHECK=1` to skip version check.
+>
+> \*\*: You need to install a specific version of `transformers` to use the corresponding model.
+Please refer to [constants.py](src/llamafactory/extras/constants.py) for a full list of models we supported.
+You also can add a custom chat template to [template.py](src/llamafactory/data/template.py).
+## Supported Training Approaches
+| Approach               |     Full-tuning    |    Freeze-tuning   |       LoRA         |       QLoRA        |
+| ---------------------- | ------------------ | ------------------ | ------------------ | ------------------ |
+| Pre-Training           | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
+| Supervised Fine-Tuning | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
+| Reward Modeling        | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
+| PPO Training           | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
+| DPO Training           | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
+| KTO Training           | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
+| ORPO Training          | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
+| SimPO Training         | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
+> [!TIP]
+> The implementation details of PPO can be found in [this blog](https://newfacade.github.io/notes-on-reinforcement-learning/17-ppo-trl.html).
+## Provided Datasets
+<details><summary>Pre-training datasets</summary>
+- [Wiki Demo (en)](data/wiki_demo.txt)
+- [RefinedWeb (en)](https://huggingface.co/datasets/tiiuae/falcon-refinedweb)
+- [RedPajama V2 (en)](https://huggingface.co/datasets/togethercomputer/RedPajama-Data-V2)
+- [Wikipedia (en)](https://huggingface.co/datasets/olm/olm-wikipedia-20221220)
+- [Wikipedia (zh)](https://huggingface.co/datasets/pleisto/wikipedia-cn-20230720-filtered)
+- [Pile (en)](https://huggingface.co/datasets/EleutherAI/pile)
+- [SkyPile (zh)](https://huggingface.co/datasets/Skywork/SkyPile-150B)
+- [FineWeb (en)](https://huggingface.co/datasets/HuggingFaceFW/fineweb)
+- [FineWeb-Edu (en)](https://huggingface.co/datasets/HuggingFaceFW/fineweb-edu)
+- [The Stack (en)](https://huggingface.co/datasets/bigcode/the-stack)
+- [StarCoder (en)](https://huggingface.co/datasets/bigcode/starcoderdata)
+</details>
+<details><summary>Supervised fine-tuning datasets</summary>
+- [Identity (en&zh)](data/identity.json)
+- [Stanford Alpaca (en)](https://github.com/tatsu-lab/stanford_alpaca)
+- [Stanford Alpaca (zh)](https://github.com/ymcui/Chinese-LLaMA-Alpaca-3)
+- [Alpaca GPT4 (en&zh)](https://github.com/Instruction-Tuning-with-GPT-4/GPT-4-LLM)
+- [Glaive Function Calling V2 (en&zh)](https://huggingface.co/datasets/glaiveai/glaive-function-calling-v2)
+- [LIMA (en)](https://huggingface.co/datasets/GAIR/lima)
+- [Guanaco Dataset (multilingual)](https://huggingface.co/datasets/JosephusCheung/GuanacoDataset)
+- [BELLE 2M (zh)](https://huggingface.co/datasets/BelleGroup/train_2M_CN)
+- [BELLE 1M (zh)](https://huggingface.co/datasets/BelleGroup/train_1M_CN)
+- [BELLE 0.5M (zh)](https://huggingface.co/datasets/BelleGroup/train_0.5M_CN)
+- [BELLE Dialogue 0.4M (zh)](https://huggingface.co/datasets/BelleGroup/generated_chat_0.4M)
+- [BELLE School Math 0.25M (zh)](https://huggingface.co/datasets/BelleGroup/school_math_0.25M)
+- [BELLE Multiturn Chat 0.8M (zh)](https://huggingface.co/datasets/BelleGroup/multiturn_chat_0.8M)
+- [UltraChat (en)](https://github.com/thunlp/UltraChat)
+- [OpenPlatypus (en)](https://huggingface.co/datasets/garage-bAInd/Open-Platypus)
+- [CodeAlpaca 20k (en)](https://huggingface.co/datasets/sahil2801/CodeAlpaca-20k)
+- [Alpaca CoT (multilingual)](https://huggingface.co/datasets/QingyiSi/Alpaca-CoT)
+- [OpenOrca (en)](https://huggingface.co/datasets/Open-Orca/OpenOrca)
+- [SlimOrca (en)](https://huggingface.co/datasets/Open-Orca/SlimOrca)
+- [MathInstruct (en)](https://huggingface.co/datasets/TIGER-Lab/MathInstruct)
+- [Firefly 1.1M (zh)](https://huggingface.co/datasets/YeungNLP/firefly-train-1.1M)
+- [Wiki QA (en)](https://huggingface.co/datasets/wiki_qa)
+- [Web QA (zh)](https://huggingface.co/datasets/suolyer/webqa)
+- [WebNovel (zh)](https://huggingface.co/datasets/zxbsmk/webnovel_cn)
+- [Nectar (en)](https://huggingface.co/datasets/berkeley-nest/Nectar)
+- [deepctrl (en&zh)](https://www.modelscope.cn/datasets/deepctrl/deepctrl-sft-data)
+- [Advertise Generating (zh)](https://huggingface.co/datasets/HasturOfficial/adgen)
+- [ShareGPT Hyperfiltered (en)](https://huggingface.co/datasets/totally-not-an-llm/sharegpt-hyperfiltered-3k)
+- [ShareGPT4 (en&zh)](https://huggingface.co/datasets/shibing624/sharegpt_gpt4)
+- [UltraChat 200k (en)](https://huggingface.co/datasets/HuggingFaceH4/ultrachat_200k)
+- [AgentInstruct (en)](https://huggingface.co/datasets/THUDM/AgentInstruct)
+- [LMSYS Chat 1M (en)](https://huggingface.co/datasets/lmsys/lmsys-chat-1m)
+- [Evol Instruct V2 (en)](https://huggingface.co/datasets/WizardLM/WizardLM_evol_instruct_V2_196k)
+- [Cosmopedia (en)](https://huggingface.co/datasets/HuggingFaceTB/cosmopedia)
+- [STEM (zh)](https://huggingface.co/datasets/hfl/stem_zh_instruction)
+- [Ruozhiba (zh)](https://huggingface.co/datasets/hfl/ruozhiba_gpt4_turbo)
+- [Neo-sft (zh)](https://huggingface.co/datasets/m-a-p/neo_sft_phase2)
+- [Magpie-Pro-300K-Filtered (en)](https://huggingface.co/datasets/Magpie-Align/Magpie-Pro-300K-Filtered)
+- [Magpie-ultra-v0.1 (en)](https://huggingface.co/datasets/argilla/magpie-ultra-v0.1)
+- [WebInstructSub (en)](https://huggingface.co/datasets/TIGER-Lab/WebInstructSub)
+- [OpenO1-SFT (en&zh)](https://huggingface.co/datasets/O1-OPEN/OpenO1-SFT)
+- [Open-Thoughts (en)](https://huggingface.co/datasets/open-thoughts/OpenThoughts-114k)
+- [Open-R1-Math (en)](https://huggingface.co/datasets/open-r1/OpenR1-Math-220k)
+- [Chinese-DeepSeek-R1-Distill (zh)](https://huggingface.co/datasets/Congliu/Chinese-DeepSeek-R1-Distill-data-110k-SFT)
+- [LLaVA mixed (en&zh)](https://huggingface.co/datasets/BUAADreamer/llava-en-zh-300k)
+- [Pokemon-gpt4o-captions (en&zh)](https://huggingface.co/datasets/jugg1024/pokemon-gpt4o-captions)
+- [Open Assistant (de)](https://huggingface.co/datasets/mayflowergmbh/oasst_de)
+- [Dolly 15k (de)](https://huggingface.co/datasets/mayflowergmbh/dolly-15k_de)
+- [Alpaca GPT4 (de)](https://huggingface.co/datasets/mayflowergmbh/alpaca-gpt4_de)
+- [OpenSchnabeltier (de)](https://huggingface.co/datasets/mayflowergmbh/openschnabeltier_de)
+- [Evol Instruct (de)](https://huggingface.co/datasets/mayflowergmbh/evol-instruct_de)
+- [Dolphin (de)](https://huggingface.co/datasets/mayflowergmbh/dolphin_de)
+- [Booksum (de)](https://huggingface.co/datasets/mayflowergmbh/booksum_de)
+- [Airoboros (de)](https://huggingface.co/datasets/mayflowergmbh/airoboros-3.0_de)
+- [Ultrachat (de)](https://huggingface.co/datasets/mayflowergmbh/ultra-chat_de)
+</details>
+<details><summary>Preference datasets</summary>
+- [DPO mixed (en&zh)](https://huggingface.co/datasets/hiyouga/DPO-En-Zh-20k)
+- [UltraFeedback (en)](https://huggingface.co/datasets/HuggingFaceH4/ultrafeedback_binarized)
+- [COIG-P (zh)](https://huggingface.co/datasets/m-a-p/COIG-P)
+- [RLHF-V (en)](https://huggingface.co/datasets/openbmb/RLHF-V-Dataset)
+- [VLFeedback (en)](https://huggingface.co/datasets/Zhihui/VLFeedback)
+- [RLAIF-V (en)](https://huggingface.co/datasets/openbmb/RLAIF-V-Dataset)
+- [Orca DPO Pairs (en)](https://huggingface.co/datasets/Intel/orca_dpo_pairs)
+- [HH-RLHF (en)](https://huggingface.co/datasets/Anthropic/hh-rlhf)
+- [Nectar (en)](https://huggingface.co/datasets/berkeley-nest/Nectar)
+- [Orca DPO (de)](https://huggingface.co/datasets/mayflowergmbh/intel_orca_dpo_pairs_de)
+- [KTO mixed (en)](https://huggingface.co/datasets/argilla/kto-mix-15k)
+</details>
+Some datasets require confirmation before using them, so we recommend logging in with your Hugging Face account using these commands.
+```bash
+pip install --upgrade huggingface_hub
+huggingface-cli login
+```
+## Requirement
+| Mandatory    | Minimum | Recommend |
+| ------------ | ------- | --------- |
+| python       | 3.9     | 3.10      |
+| torch        | 2.0.0   | 2.6.0     |
+| torchvision  | 0.15.0  | 0.21.0    |
+| transformers | 4.49.0  | 4.50.0    |
+| datasets     | 2.16.0  | 3.2.0     |
+| accelerate   | 0.34.0  | 1.2.1     |
+| peft         | 0.14.0  | 0.15.1    |
+| trl          | 0.8.6   | 0.9.6     |
+| Optional     | Minimum | Recommend |
+| ------------ | ------- | --------- |
+| CUDA         | 11.6    | 12.2      |
+| deepspeed    | 0.10.0  | 0.16.4    |
+| bitsandbytes | 0.39.0  | 0.43.1    |
+| vllm         | 0.4.3   | 0.8.2     |
+| flash-attn   | 2.5.6   | 2.7.2     |
+### Hardware Requirement
+\* *estimated*
+| Method                          | Bits |   7B  |  14B  |  30B  |   70B  |   `x`B  |
+| ------------------------------- | ---- | ----- | ----- | ----- | ------ | ------- |
+| Full (`bf16` or `fp16`)         |  32  | 120GB | 240GB | 600GB | 1200GB | `18x`GB |
+| Full (`pure_bf16`)              |  16  |  60GB | 120GB | 300GB |  600GB |  `8x`GB |
+| Freeze/LoRA/GaLore/APOLLO/BAdam |  16  |  16GB |  32GB |  64GB |  160GB |  `2x`GB |
+| QLoRA                           |   8  |  10GB |  20GB |  40GB |   80GB |   `x`GB |
+| QLoRA                           |   4  |   6GB |  12GB |  24GB |   48GB | `x/2`GB |
+| QLoRA                           |   2  |   4GB |   8GB |  16GB |   24GB | `x/4`GB |
+## Getting Started
+### Installation
+> [!IMPORTANT]
+> Installation is mandatory.
+#### Install from Source
+```bash
+git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git
+cd LLaMA-Factory
+pip install -e ".[torch,metrics]" --no-build-isolation
+```
+Extra dependencies available: torch, torch-npu, metrics, deepspeed, liger-kernel, bitsandbytes, hqq, eetq, gptq, aqlm, vllm, sglang, galore, apollo, badam, adam-mini, qwen, minicpm_v, openmind, swanlab, dev
+#### Install from Docker Image
+```bash
+docker run -it --rm --gpus=all --ipc=host hiyouga/llamafactory:latest
+```
+This image is built on Ubuntu 22.04 (x86\_64), CUDA 12.4, Python 3.11, PyTorch 2.6.0, and Flash-attn 2.7.4.
+Find the pre-built images: https://hub.docker.com/r/hiyouga/llamafactory/tags
+Please refer to [build docker](#build-docker) to build the image yourself.
+<details><summary>Setting up a virtual environment with <b>uv</b></summary>
+Create an isolated Python environment with [uv](https://github.com/astral-sh/uv):
+```bash
+uv sync --extra torch --extra metrics --prerelease=allow
+```
+Run LLaMA-Factory in the isolated environment:
+```bash
+uv run --prerelease=allow llamafactory-cli train examples/train_lora/llama3_lora_pretrain.yaml
+```
+</details>
+<details><summary>For Windows users</summary>
+#### Install PyTorch
+You need to manually install the GPU version of PyTorch on the Windows platform. Please refer to the [official website](https://pytorch.org/get-started/locally/) and the following command to install PyTorch with CUDA support:
+```bash
+pip uninstall torch torchvision torchaudio
+pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126
+python -c "import torch; print(torch.cuda.is_available())"
+```
+If you see `True` then you have successfully installed PyTorch with CUDA support.
+Try `dataloader_num_workers: 0` if you encounter `Can't pickle local object` error.
+#### Install BitsAndBytes
+If you want to enable the quantized LoRA (QLoRA) on the Windows platform, you need to install a pre-built version of `bitsandbytes` library, which supports CUDA 11.1 to 12.2, please select the appropriate [release version](https://github.com/jllllll/bitsandbytes-windows-webui/releases/tag/wheels) based on your CUDA version.
+```bash
+pip install https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.2.post2-py3-none-win_amd64.whl
+```
+#### Install Flash Attention-2
+To enable FlashAttention-2 on the Windows platform, please use the script from [flash-attention-windows-wheel](https://huggingface.co/lldacing/flash-attention-windows-wheel) to compile and install it by yourself.
+</details>
+<details><summary>For Ascend NPU users</summary>
+To install LLaMA Factory on Ascend NPU devices, please upgrade Python to version 3.10 or higher and specify extra dependencies: `pip install -e ".[torch-npu,metrics]"`. Additionally, you need to install the **[Ascend CANN Toolkit and Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**. Please follow the [installation tutorial](https://www.hiascend.com/document/detail/en/CANNCommunityEdition/600alphaX/softwareinstall/instg/atlasdeploy_03_0031.html) or use the following commands:
+```bash
+# replace the url according to your CANN version and devices
+# install CANN Toolkit
+wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C20SPC702/Ascend-cann-toolkit_8.0.0.alpha002_linux-"$(uname -i)".run
+bash Ascend-cann-toolkit_8.0.0.alpha002_linux-"$(uname -i)".run --install
+# install CANN Kernels
+wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C20SPC702/Ascend-cann-kernels-910b_8.0.0.alpha002_linux-"$(uname -i)".run
+bash Ascend-cann-kernels-910b_8.0.0.alpha002_linux-"$(uname -i)".run --install
+# set env variables
+source /usr/local/Ascend/ascend-toolkit/set_env.sh
+```
+| Requirement  | Minimum | Recommend      |
+| ------------ | ------- | -------------- |
+| CANN         | 8.0.RC1 | 8.0.0.alpha002 |
+| torch        | 2.1.0   | 2.4.0          |
+| torch-npu    | 2.1.0   | 2.4.0.post2    |
+| deepspeed    | 0.13.2  | 0.13.2         |
+| vllm-ascend  | -       | 0.7.3          |
+Remember to use `ASCEND_RT_VISIBLE_DEVICES` instead of `CUDA_VISIBLE_DEVICES` to specify the device to use.
+If you cannot infer model on NPU devices, try setting `do_sample: false` in the configurations.
+Download the pre-built Docker images: [32GB](http://mirrors.cn-central-221.ovaijisuan.com/detail/130.html) | [64GB](http://mirrors.cn-central-221.ovaijisuan.com/detail/131.html)
+#### Install BitsAndBytes
+To use QLoRA based on bitsandbytes on Ascend NPU, please follow these 3 steps:
+1. Manually compile bitsandbytes: Refer to [the installation documentation](https://huggingface.co/docs/bitsandbytes/installation?backend=Ascend+NPU&platform=Ascend+NPU) for the NPU version of bitsandbytes to complete the compilation and installation. The compilation requires a cmake version of at least 3.22.1 and a g++ version of at least 12.x.
+```bash
+# Install bitsandbytes from source
+# Clone bitsandbytes repo, Ascend NPU backend is currently enabled on multi-backend-refactor branch
+git clone -b multi-backend-refactor https://github.com/bitsandbytes-foundation/bitsandbytes.git
+cd bitsandbytes/
+# Install dependencies
+pip install -r requirements-dev.txt
+# Install the dependencies for the compilation tools. Note that the commands for this step may vary depending on the operating system. The following are provided for reference
+apt-get install -y build-essential cmake
+# Compile & install
+cmake -DCOMPUTE_BACKEND=npu -S .
+make
+pip install .
+```
+2. Install transformers from the main branch.
+```bash
+git clone -b main https://github.com/huggingface/transformers.git
+cd transformers
+pip install .
+```
+3. Set `double_quantization: false` in the configuration. You can refer to the [example](examples/train_qlora/llama3_lora_sft_bnb_npu.yaml).
+</details>
+### Data Preparation
+Please refer to [data/README.md](data/README.md) for checking the details about the format of dataset files. You can use datasets on HuggingFace / ModelScope / Modelers hub, load the dataset in local disk, or specify a path to s3/gcs cloud storage.
+> [!NOTE]
+> Please update `data/dataset_info.json` to use your custom dataset.
+You can also use **[Easy Dataset](https://github.com/ConardLi/easy-dataset)** or **[GraphGen](https://github.com/open-sciencelab/GraphGen)** to create synthetic data for fine-tuning.
+### Quickstart
+Use the following 3 commands to run LoRA **fine-tuning**, **inference** and **merging** of the Llama3-8B-Instruct model, respectively.
+```bash
+llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
+llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
+llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
+```
+See [examples/README.md](examples/README.md) for advanced usage (including distributed training).
+> [!TIP]
+> Use `llamafactory-cli help` to show help information.
+>
+> Read [FAQs](https://github.com/hiyouga/LLaMA-Factory/issues/4614) first if you encounter any problems.
+### Fine-Tuning with LLaMA Board GUI (powered by [Gradio](https://github.com/gradio-app/gradio))
+```bash
+llamafactory-cli webui
+```
+### Build Docker
+For CUDA users:
+```bash
+cd docker/docker-cuda/
+docker compose up -d
+docker compose exec llamafactory bash
+```
+For Ascend NPU users:
+```bash
+cd docker/docker-npu/
+docker compose up -d
+docker compose exec llamafactory bash
+```
+For AMD ROCm users:
+```bash
+cd docker/docker-rocm/
+docker compose up -d
+docker compose exec llamafactory bash
+```
+<details><summary>Build without Docker Compose</summary>
+For CUDA users:
+```bash
+docker build -f ./docker/docker-cuda/Dockerfile \
+    --build-arg PIP_INDEX=https://pypi.org/simple \
+    --build-arg EXTRAS=metrics \
+    -t llamafactory:latest .
+docker run -dit --ipc=host --gpus=all \
+    -p 7860:7860 \
+    -p 8000:8000 \
+    --name llamafactory \
+    llamafactory:latest
+docker exec -it llamafactory bash
+```
+For Ascend NPU users:
+```bash
+docker build -f ./docker/docker-npu/Dockerfile \
+    --build-arg PIP_INDEX=https://pypi.org/simple \
+    --build-arg EXTRAS=torch-npu,metrics \
+    -t llamafactory:latest .
+docker run -dit --ipc=host \
+    -v /usr/local/dcmi:/usr/local/dcmi \
+    -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \
+    -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
+    -v /etc/ascend_install.info:/etc/ascend_install.info \
+    -p 7860:7860 \
+    -p 8000:8000 \
+    --device /dev/davinci0 \
+    --device /dev/davinci_manager \
+    --device /dev/devmm_svm \
+    --device /dev/hisi_hdc \
+    --name llamafactory \
+    llamafactory:latest
+docker exec -it llamafactory bash
+```
+For AMD ROCm users:
+```bash
+docker build -f ./docker/docker-rocm/Dockerfile \
+    --build-arg PIP_INDEX=https://pypi.org/simple \
+    --build-arg EXTRAS=metrics \
+    -t llamafactory:latest .
+docker run -dit --ipc=host \
+    -p 7860:7860 \
+    -p 8000:8000 \
+    --device /dev/kfd \
+    --device /dev/dri \
+    --name llamafactory \
+    llamafactory:latest
+docker exec -it llamafactory bash
+```
+</details>
+<details><summary>Use Docker volumes</summary>
+You can uncomment `VOLUME [ "/root/.cache/huggingface", "/app/shared_data", "/app/output" ]` in the Dockerfile to use data volumes.
+When building the Docker image, use `-v ./hf_cache:/root/.cache/huggingface` argument to mount the local directory to the container. The following data volumes are available.
+- `hf_cache`: Utilize Hugging Face cache on the host machine.
+- `shared_data`: The directionary to store datasets on the host machine.
+- `output`: Set export dir to this location so that the merged result can be accessed directly on the host machine.
+</details>
+### Deploy with OpenAI-style API and vLLM
+```bash
+API_PORT=8000 llamafactory-cli api examples/inference/llama3.yaml infer_backend=vllm vllm_enforce_eager=true
+```
+> [!TIP]
+> Visit [this page](https://platform.openai.com/docs/api-reference/chat/create) for API document.
+>
+> Examples: [Image understanding](scripts/api_example/test_image.py) | [Function calling](scripts/api_example/test_toolcall.py)
+### Download from ModelScope Hub
+If you have trouble with downloading models and datasets from Hugging Face, you can use ModelScope.
+```bash
+export USE_MODELSCOPE_HUB=1 # `set USE_MODELSCOPE_HUB=1` for Windows
+```
+Train the model by specifying a model ID of the ModelScope Hub as the `model_name_or_path`. You can find a full list of model IDs at [ModelScope Hub](https://modelscope.cn/models), e.g., `LLM-Research/Meta-Llama-3-8B-Instruct`.
+### Download from Modelers Hub
+You can also use Modelers Hub to download models and datasets.
+```bash
+export USE_OPENMIND_HUB=1 # `set USE_OPENMIND_HUB=1` for Windows
+```
+Train the model by specifying a model ID of the Modelers Hub as the `model_name_or_path`. You can find a full list of model IDs at [Modelers Hub](https://modelers.cn/models), e.g., `TeleAI/TeleChat-7B-pt`.
+### Use W&B Logger
+To use [Weights & Biases](https://wandb.ai) for logging experimental results, you need to add the following arguments to yaml files.
+```yaml
+report_to: wandb
+run_name: test_run # optional
+```
+Set `WANDB_API_KEY` to [your key](https://wandb.ai/authorize) when launching training tasks to log in with your W&B account.
+### Use SwanLab Logger
+To use [SwanLab](https://github.com/SwanHubX/SwanLab) for logging experimental results, you need to add the following arguments to yaml files.
+```yaml
+use_swanlab: true
+swanlab_run_name: test_run # optional
+```
+When launching training tasks, you can log in to SwanLab in three ways:
+1. Add `swanlab_api_key=<your_api_key>` to the yaml file, and set it to your [API key](https://swanlab.cn/settings).
+2. Set the environment variable `SWANLAB_API_KEY` to your [API key](https://swanlab.cn/settings).
+3. Use the `swanlab login` command to complete the login.
+## Projects using LLaMA Factory
+If you have a project that should be incorporated, please contact via email or create a pull request.
+<details><summary>Click to show</summary>
+1. Wang et al. ESRL: Efficient Sampling-based Reinforcement Learning for Sequence Generation. 2023. [[arxiv]](https://arxiv.org/abs/2308.02223)
+1. Yu et al. Open, Closed, or Small Language Models for Text Classification? 2023. [[arxiv]](https://arxiv.org/abs/2308.10092)
+1. Wang et al. UbiPhysio: Support Daily Functioning, Fitness, and Rehabilitation with Action Understanding and Feedback in Natural Language. 2023. [[arxiv]](https://arxiv.org/abs/2308.10526)
+1. Luceri et al. Leveraging Large Language Models to Detect Influence Campaigns in Social Media. 2023. [[arxiv]](https://arxiv.org/abs/2311.07816)
+1. Zhang et al. Alleviating Hallucinations of Large Language Models through Induced Hallucinations. 2023. [[arxiv]](https://arxiv.org/abs/2312.15710)
+1. Wang et al. Know Your Needs Better: Towards Structured Understanding of Marketer Demands with Analogical Reasoning Augmented LLMs. KDD 2024. [[arxiv]](https://arxiv.org/abs/2401.04319)
+1. Wang et al. CANDLE: Iterative Conceptualization and Instantiation Distillation from Large Language Models for Commonsense Reasoning. ACL 2024. [[arxiv]](https://arxiv.org/abs/2401.07286)
+1. Choi et al. FACT-GPT: Fact-Checking Augmentation via Claim Matching with LLMs. 2024. [[arxiv]](https://arxiv.org/abs/2402.05904)
+1. Zhang et al. AutoMathText: Autonomous Data Selection with Language Models for Mathematical Texts. 2024. [[arxiv]](https://arxiv.org/abs/2402.07625)
+1. Lyu et al. KnowTuning: Knowledge-aware Fine-tuning for Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2402.11176)
+1. Yang et al. LaCo: Large Language Model Pruning via Layer Collaps. 2024. [[arxiv]](https://arxiv.org/abs/2402.11187)
+1. Bhardwaj et al. Language Models are Homer Simpson! Safety Re-Alignment of Fine-tuned Language Models through Task Arithmetic. 2024. [[arxiv]](https://arxiv.org/abs/2402.11746)
+1. Yang et al. Enhancing Empathetic Response Generation by Augmenting LLMs with Small-scale Empathetic Models. 2024. [[arxiv]](https://arxiv.org/abs/2402.11801)
+1. Yi et al. Generation Meets Verification: Accelerating Large Language Model Inference with Smart Parallel Auto-Correct Decoding. ACL 2024 Findings. [[arxiv]](https://arxiv.org/abs/2402.11809)
+1. Cao et al. Head-wise Shareable Attention for Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2402.11819)
+1. Zhang et al. Enhancing Multilingual Capabilities of Large Language Models through Self-Distillation from Resource-Rich Languages. 2024. [[arxiv]](https://arxiv.org/abs/2402.12204)
+1. Kim et al. Efficient and Effective Vocabulary Expansion Towards Multilingual Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2402.14714)
+1. Yu et al. KIEval: A Knowledge-grounded Interactive Evaluation Framework for Large Language Models. ACL 2024. [[arxiv]](https://arxiv.org/abs/2402.15043)
+1. Huang et al. Key-Point-Driven Data Synthesis with its Enhancement on Mathematical Reasoning. 2024. [[arxiv]](https://arxiv.org/abs/2403.02333)
+1. Duan et al. Negating Negatives: Alignment without Human Positive Samples via Distributional Dispreference Optimization. 2024. [[arxiv]](https://arxiv.org/abs/2403.03419)
+1. Xie and Schwertfeger. Empowering Robotics with Large Language Models: osmAG Map Comprehension with LLMs. 2024. [[arxiv]](https://arxiv.org/abs/2403.08228)
+1. Wu et al. Large Language Models are Parallel Multilingual Learners. 2024. [[arxiv]](https://arxiv.org/abs/2403.09073)
+1. Zhang et al. EDT: Improving Large Language Models' Generation by Entropy-based Dynamic Temperature Sampling. 2024. [[arxiv]](https://arxiv.org/abs/2403.14541)
+1. Weller et al. FollowIR: Evaluating and Teaching Information Retrieval Models to Follow Instructions. 2024. [[arxiv]](https://arxiv.org/abs/2403.15246)
+1. Hongbin Na. CBT-LLM: A Chinese Large Language Model for Cognitive Behavioral Therapy-based Mental Health Question Answering. COLING 2024. [[arxiv]](https://arxiv.org/abs/2403.16008)
+1. Zan et al. CodeS: Natural Language to Code Repository via Multi-Layer Sketch. 2024. [[arxiv]](https://arxiv.org/abs/2403.16443)
+1. Liu et al. Extensive Self-Contrast Enables Feedback-Free Language Model Alignment. 2024. [[arxiv]](https://arxiv.org/abs/2404.00604)
+1. Luo et al. BAdam: A Memory Efficient Full Parameter Training Method for Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.02827)
+1. Du et al. Chinese Tiny LLM: Pretraining a Chinese-Centric Large Language Model. 2024. [[arxiv]](https://arxiv.org/abs/2404.04167)
+1. Ma et al. Parameter Efficient Quasi-Orthogonal Fine-Tuning via Givens Rotation. ICML 2024. [[arxiv]](https://arxiv.org/abs/2404.04316)
+1. Liu et al. Dynamic Generation of Personalities with Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.07084)
+1. Shang et al. How Far Have We Gone in Stripped Binary Code Understanding Using Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.09836)
+1. Huang et al. LLMTune: Accelerate Database Knob Tuning with Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.11581)
+1. Deng et al. Text-Tuple-Table: Towards Information Integration in Text-to-Table Generation via Global Tuple Extraction. 2024. [[arxiv]](https://arxiv.org/abs/2404.14215)
+1. Acikgoz et al. Hippocrates: An Open-Source Framework for Advancing Large Language Models in Healthcare. 2024. [[arxiv]](https://arxiv.org/abs/2404.16621)
+1. Zhang et al. Small Language Models Need Strong Verifiers to Self-Correct Reasoning. ACL 2024 Findings. [[arxiv]](https://arxiv.org/abs/2404.17140)
+1. Zhou et al. FREB-TQA: A Fine-Grained Robustness Evaluation Benchmark for Table Question Answering. NAACL 2024. [[arxiv]](https://arxiv.org/abs/2404.18585)
+1. Xu et al. Large Language Models for Cyber Security: A Systematic Literature Review. 2024. [[arxiv]](https://arxiv.org/abs/2405.04760)
+1. Dammu et al. "They are uncultured": Unveiling Covert Harms and Social Threats in LLM Generated Conversations. 2024. [[arxiv]](https://arxiv.org/abs/2405.05378)
+1. Yi et al. A safety realignment framework via subspace-oriented model fusion for large language models. 2024. [[arxiv]](https://arxiv.org/abs/2405.09055)
+1. Lou et al. SPO: Multi-Dimensional Preference Sequential Alignment With Implicit Reward Modeling. 2024. [[arxiv]](https://arxiv.org/abs/2405.12739)
+1. Zhang et al. Getting More from Less: Large Language Models are Good Spontaneous Multilingual Learners. 2024. [[arxiv]](https://arxiv.org/abs/2405.13816)
+1. Zhang et al. TS-Align: A Teacher-Student Collaborative Framework for Scalable Iterative Finetuning of Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2405.20215)
+1. Zihong Chen. Sentence Segmentation and Sentence Punctuation Based on XunziALLM. 2024. [[paper]](https://aclanthology.org/2024.lt4hala-1.30)
+1. Gao et al. The Best of Both Worlds: Toward an Honest and Helpful Large Language Model. 2024. [[arxiv]](https://arxiv.org/abs/2406.00380)
+1. Wang and Song. MARS: Benchmarking the Metaphysical Reasoning Abilities of Language Models with a Multi-task Evaluation Dataset. 2024. [[arxiv]](https://arxiv.org/abs/2406.02106)
+1. Hu et al. Computational Limits of Low-Rank Adaptation (LoRA) for Transformer-Based Models. 2024. [[arxiv]](https://arxiv.org/abs/2406.03136)
+1. Ge et al. Time Sensitive Knowledge Editing through Efficient Finetuning. ACL 2024. [[arxiv]](https://arxiv.org/abs/2406.04496)
+1. Tan et al. Peer Review as A Multi-Turn and Long-Context Dialogue with Role-Based Interactions. 2024. [[arxiv]](https://arxiv.org/abs/2406.05688)
+1. Song et al. Turbo Sparse: Achieving LLM SOTA Performance with Minimal Activated Parameters. 2024. [[arxiv]](https://arxiv.org/abs/2406.05955)
+1. Gu et al. RWKV-CLIP: A Robust Vision-Language Representation Learner. 2024. [[arxiv]](https://arxiv.org/abs/2406.06973)
+1. Chen et al. Advancing Tool-Augmented Large Language Models: Integrating Insights from Errors in Inference Trees. 2024. [[arxiv]](https://arxiv.org/abs/2406.07115)
+1. Zhu et al. Are Large Language Models Good Statisticians?. 2024. [[arxiv]](https://arxiv.org/abs/2406.07815)
+1. Li et al. Know the Unknown: An Uncertainty-Sensitive Method for LLM Instruction Tuning. 2024. [[arxiv]](https://arxiv.org/abs/2406.10099)
+1. Ding et al. IntentionQA: A Benchmark for Evaluating Purchase Intention Comprehension Abilities of Language Models in E-commerce. 2024. [[arxiv]](https://arxiv.org/abs/2406.10173)
+1. He et al. COMMUNITY-CROSS-INSTRUCT: Unsupervised Instruction Generation for Aligning Large Language Models to Online Communities. 2024. [[arxiv]](https://arxiv.org/abs/2406.12074)
+1. Lin et al. FVEL: Interactive Formal Verification Environment with Large Language Models via Theorem Proving. 2024. [[arxiv]](https://arxiv.org/abs/2406.14408)
+1. Treutlein et al. Connecting the Dots: LLMs can Infer and Verbalize Latent Structure from Disparate Training Data. 2024. [[arxiv]](https://arxiv.org/abs/2406.14546)
+1. Feng et al. SS-Bench: A Benchmark for Social Story Generation and Evaluation. 2024. [[arxiv]](https://arxiv.org/abs/2406.15695)
+1. Feng et al. Self-Constructed Context Decompilation with Fined-grained Alignment Enhancement. 2024. [[arxiv]](https://arxiv.org/abs/2406.17233)
+1. Liu et al. Large Language Models for Cuffless Blood Pressure Measurement From Wearable Biosignals. 2024. [[arxiv]](https://arxiv.org/abs/2406.18069)
+1. Iyer et al. Exploring Very Low-Resource Translation with LLMs: The University of Edinburgh's Submission to AmericasNLP 2024 Translation Task. AmericasNLP 2024. [[paper]](https://aclanthology.org/2024.americasnlp-1.25)
+1. Li et al. Calibrating LLMs with Preference Optimization on Thought Trees for Generating Rationale in Science Question Scoring. 2024. [[arxiv]](https://arxiv.org/abs/2406.19949)
+1. Yang et al. Financial Knowledge Large Language Model. 2024. [[arxiv]](https://arxiv.org/abs/2407.00365)
+1. Lin et al. DogeRM: Equipping Reward Models with Domain Knowledge through Model Merging. 2024. [[arxiv]](https://arxiv.org/abs/2407.01470)
+1. Bako et al. Evaluating the Semantic Profiling Abilities of LLMs for Natural Language Utterances in Data Visualization. 2024. [[arxiv]](https://arxiv.org/abs/2407.06129)
+1. Huang et al. RoLoRA: Fine-tuning Rotated Outlier-free LLMs for Effective Weight-Activation Quantization. 2024. [[arxiv]](https://arxiv.org/abs/2407.08044)
+1. Jiang et al. LLM-Collaboration on Automatic Science Journalism for the General Audience. 2024. [[arxiv]](https://arxiv.org/abs/2407.09756)
+1. Inouye et al. Applied Auto-tuning on LoRA Hyperparameters. 2024. [[paper]](https://scholarcommons.scu.edu/cseng_senior/272/)
+1. Qi et al. Research on Tibetan Tourism Viewpoints information generation system based on LLM. 2024. [[arxiv]](https://arxiv.org/abs/2407.13561)
+1. Xu et al. Course-Correction: Safety Alignment Using Synthetic Preferences. 2024. [[arxiv]](https://arxiv.org/abs/2407.16637)
+1. Sun et al. LAMBDA: A Large Model Based Data Agent. 2024. [[arxiv]](https://arxiv.org/abs/2407.17535)
+1. Zhu et al. CollectiveSFT: Scaling Large Language Models for Chinese Medical Benchmark with Collective Instructions in Healthcare. 2024. [[arxiv]](https://arxiv.org/abs/2407.19705)
+1. Yu et al. Correcting Negative Bias in Large Language Models through Negative Attention Score Alignment. 2024. [[arxiv]](https://arxiv.org/abs/2408.00137)
+1. Xie et al. The Power of Personalized Datasets: Advancing Chinese Composition Writing for Elementary School through Targeted Model Fine-Tuning. IALP 2024. [[paper]](https://www.asianlp.sg/conferences/ialp2024/proceedings/papers/IALP2024_P055.pdf)
+1. Liu et al. Instruct-Code-Llama: Improving Capabilities of Language Model in Competition Level Code Generation by Online Judge Feedback. ICIC 2024. [[paper]](https://link.springer.com/chapter/10.1007/978-981-97-5669-8_11)
+1. Wang et al. Cybernetic Sentinels: Unveiling the Impact of Safety Data Selection on Model Security in Supervised Fine-Tuning. ICIC 2024. [[paper]](https://link.springer.com/chapter/10.1007/978-981-97-5669-8_23)
+1. Xia et al. Understanding the Performance and Estimating the Cost of LLM Fine-Tuning. 2024. [[arxiv]](https://arxiv.org/abs/2408.04693)
+1. Zeng et al. Perceive, Reflect, and Plan: Designing LLM Agent for Goal-Directed City Navigation without Instructions. 2024. [[arxiv]](https://arxiv.org/abs/2408.04168)
+1. Xia et al. Using Pre-trained Language Model for Accurate ESG Prediction. FinNLP 2024. [[paper]](https://aclanthology.org/2024.finnlp-2.1/)
+1. Liang et al. I-SHEEP: Self-Alignment of LLM from Scratch through an Iterative Self-Enhancement Paradigm. 2024. [[arxiv]](https://arxiv.org/abs/2408.08072)
+1. Bai et al. Aligning Large Language Model with Direct Multi-Preference Optimization for Recommendation. CIKM 2024. [[paper]](https://dl.acm.org/doi/10.1145/3627673.3679611)
+1. Zhang et al. CPsyCoun: A Report-based Multi-turn Dialogue Reconstruction and Evaluation Framework for Chinese Psychological Counseling. ACL 2024. [[paper]](https://aclanthology.org/2024.findings-acl.830.pdf)
+1. **[StarWhisper](https://github.com/Yu-Yang-Li/StarWhisper)**: A large language model for Astronomy, based on ChatGLM2-6B and Qwen-14B.
+1. **[DISC-LawLLM](https://github.com/FudanDISC/DISC-LawLLM)**: A large language model specialized in Chinese legal domain, based on Baichuan-13B, is capable of retrieving and reasoning on legal knowledge.
+1. **[Sunsimiao](https://github.com/X-D-Lab/Sunsimiao)**: A large language model specialized in Chinese medical domain, based on Baichuan-7B and ChatGLM-6B.
+1. **[CareGPT](https://github.com/WangRongsheng/CareGPT)**: A series of large language models for Chinese medical domain, based on LLaMA2-7B and Baichuan-13B.
+1. **[MachineMindset](https://github.com/PKU-YuanGroup/Machine-Mindset/)**: A series of MBTI Personality large language models, capable of giving any LLM 16 different personality types based on different datasets and training methods.
+1. **[Luminia-13B-v3](https://huggingface.co/Nekochu/Luminia-13B-v3)**: A large language model specialized in generate metadata for stable diffusion. [[demo]](https://huggingface.co/spaces/Nekochu/Luminia-13B_SD_Prompt)
+1. **[Chinese-LLaVA-Med](https://github.com/BUAADreamer/Chinese-LLaVA-Med)**: A multimodal large language model specialized in Chinese medical domain, based on LLaVA-1.5-7B.
+1. **[AutoRE](https://github.com/THUDM/AutoRE)**: A document-level relation extraction system based on large language models.
+1. **[NVIDIA RTX AI Toolkit](https://github.com/NVIDIA/RTX-AI-Toolkit)**: SDKs for fine-tuning LLMs on Windows PC for NVIDIA RTX.
+1. **[LazyLLM](https://github.com/LazyAGI/LazyLLM)**: An easy and lazy way for building multi-agent LLMs applications and supports model fine-tuning via LLaMA Factory.
+1. **[RAG-Retrieval](https://github.com/NLPJCL/RAG-Retrieval)**: A full pipeline for RAG retrieval model fine-tuning, inference, and distillation. [[blog]](https://zhuanlan.zhihu.com/p/987727357)
+1. **[360-LLaMA-Factory](https://github.com/Qihoo360/360-LLaMA-Factory)**: A modified library that supports long sequence SFT & DPO using ring attention.
+1. **[Sky-T1](https://novasky-ai.github.io/posts/sky-t1/)**: An o1-like model fine-tuned by NovaSky AI with very small cost.
+1. **[WeClone](https://github.com/xming521/WeClone)**: One-stop solution for creating your digital avatar from chat logs.
+1. **[EmoLLM](https://github.com/SmartFlowAI/EmoLLM)**: A project about large language models (LLMs) and mental health.
+</details>
+## License
+This repository is licensed under the [Apache-2.0 License](LICENSE).
+Please follow the model licenses to use the corresponding model weights: [Baichuan 2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [GLM-4](https://huggingface.co/THUDM/glm-4-9b/blob/main/LICENSE) / [GPT-2](https://github.com/openai/gpt-2/blob/master/LICENSE) / [Granite](LICENSE) / [Index](https://huggingface.co/IndexTeam/Index-1.9B/blob/main/LICENSE) / [InternLM](https://github.com/InternLM/InternLM#license) / [Llama](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [Llama 2](https://ai.meta.com/llama/license/) / [Llama 3](https://llama.meta.com/llama3/license/) / [Llama 4](https://github.com/meta-llama/llama-models/blob/main/models/llama4/LICENSE) / [MiniCPM](https://github.com/OpenBMB/MiniCPM/blob/main/MiniCPM%20Model%20License.md) / [Mistral/Mixtral/Pixtral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/Phi-2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3/Phi-4](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [Skywork](https://huggingface.co/Skywork/Skywork-13B-base/blob/main/Skywork%20Community%20License.pdf) / [StarCoder 2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [TeleChat2](https://huggingface.co/Tele-AI/telechat-7B/blob/main/TeleChat%E6%A8%A1%E5%9E%8B%E7%A4%BE%E5%8C%BA%E8%AE%B8%E5%8F%AF%E5%8D%8F%E8%AE%AE.pdf) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yi-1.5](LICENSE) / [Yuan 2](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan)
+## Citation
+If this work is helpful, please kindly cite as:
+```bibtex
+@inproceedings{zheng2024llamafactory,
+  title={LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models},
+  author={Yaowei Zheng and Richong Zhang and Junhao Zhang and Yanhan Ye and Zheyan Luo and Zhangchi Feng and Yongqiang Ma},
+  booktitle={Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)},
+  address={Bangkok, Thailand},
+  publisher={Association for Computational Linguistics},
+  year={2024},
+  url={http://arxiv.org/abs/2403.13372}
+}
+```
+## Acknowledgement
+This repo benefits from [PEFT](https://github.com/huggingface/peft), [TRL](https://github.com/huggingface/trl), [QLoRA](https://github.com/artidoro/qlora) and [FastChat](https://github.com/lm-sys/FastChat). Thanks for their wonderful works.
+## Star History
+![Star History Chart](https://api.star-history.com/svg?repos=hiyouga/LLaMA-Factory&type=Date)

README_zh.md ADDED Viewed

	@@ -0,0 +1,949 @@

+![# LLaMA Factory](assets/logo.png)
+[![GitHub Repo stars](https://img.shields.io/github/stars/hiyouga/LLaMA-Factory?style=social)](https://github.com/hiyouga/LLaMA-Factory/stargazers)
+[![GitHub last commit](https://img.shields.io/github/last-commit/hiyouga/LLaMA-Factory)](https://github.com/hiyouga/LLaMA-Factory/commits/main)
+[![GitHub contributors](https://img.shields.io/github/contributors/hiyouga/LLaMA-Factory?color=orange)](https://github.com/hiyouga/LLaMA-Factory/graphs/contributors)
+[![GitHub workflow](https://github.com/hiyouga/LLaMA-Factory/actions/workflows/tests.yml/badge.svg)](https://github.com/hiyouga/LLaMA-Factory/actions/workflows/tests.yml)
+[![PyPI](https://img.shields.io/pypi/v/llamafactory)](https://pypi.org/project/llamafactory/)
+[![Citation](https://img.shields.io/badge/citation-651-green)](https://scholar.google.com/scholar?cites=12620864006390196564)
+[![Docker Pulls](https://img.shields.io/docker/pulls/hiyouga/llamafactory)](https://hub.docker.com/r/hiyouga/llamafactory/tags)
+[![Twitter](https://img.shields.io/twitter/follow/llamafactory_ai)](https://twitter.com/llamafactory_ai)
+[![Discord](https://dcbadge.vercel.app/api/server/rKfvV9r9FK?compact=true&style=flat)](https://discord.gg/rKfvV9r9FK)
+[![GitCode](https://gitcode.com/zhengyaowei/LLaMA-Factory/star/badge.svg)](https://gitcode.com/zhengyaowei/LLaMA-Factory)
+[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1d5KQtbemerlSDSxZIfAaWXhKr30QypiK?usp=sharing)
+[![Open in DSW](https://gallery.pai-ml.com/assets/open-in-dsw.svg)](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory)
+[![Open in Alaya](assets/alaya_new.svg)](https://docs.alayanew.com/docs/documents/newActivities/llamafactory/?utm_source=LLaMA-Factory)
+[![Open in Spaces](https://img.shields.io/badge/🤗-Open%20in%20Spaces-blue)](https://huggingface.co/spaces/hiyouga/LLaMA-Board)
+[![Open in Studios](https://img.shields.io/badge/ModelScope-Open%20in%20Studios-blue)](https://modelscope.cn/studios/hiyouga/LLaMA-Board)
+[![Open in Novita](https://img.shields.io/badge/Novita-Deploy%20Template-blue)](https://novita.ai/templates-library/105981?sharer=88115474-394e-4bda-968e-b88e123d0c47)
+### 获得[亚马逊](https://aws.amazon.com/cn/blogs/china/a-one-stop-code-free-model-fine-tuning-deployment-platform-based-on-sagemaker-and-llama-factory/)、[英伟达](https://developer.nvidia.cn/rtx/ai-toolkit)、[阿里云](https://help.aliyun.com/zh/pai/use-cases/fine-tune-a-llama-3-model-with-llama-factory)等的应用。
+<div align="center" markdown="1">
+### 赞助商 ❤️
+<a href="https://warp.dev/llama-factory">
+    <img alt="Warp sponsorship" width="400" src="https://github.com/user-attachments/assets/ab8dd143-b0fd-4904-bdc5-dd7ecac94eae">
+</a>
+#### [Warp，面向开发者的智能终端](https://warp.dev/llama-factory)
+[适用于 MacOS、Linux 和 Windows](https://warp.dev/llama-factory)
+----
+### 使用零代码[命令行](#快速开始)与 [Web UI](#llama-board-可视化微调由-gradio-驱动) 轻松微调百余种大模型
+![GitHub Trend](https://trendshift.io/api/badge/repositories/4535)
+</div>
+👋 加入我们的[微信群](assets/wechat.jpg)、[NPU 用户群](assets/wechat_npu.jpg)或 [九章智算云算力优惠群](assets/wechat_alaya.png)。
+\[ [English](README.md) | 中文 \]
+**微调大模型可以像这样轻松…**
+https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
+选择你的打开方式：
+- **入门教程**：https://zhuanlan.zhihu.com/p/695287607
+- **微调视频教程**：https://www.bilibili.com/video/BV1djgRzxEts/
+- **框架文档**：https://llamafactory.readthedocs.io/zh-cn/latest/
+- **框架文档（昇腾 NPU）**：https://ascend.github.io/docs/sources/llamafactory/
+- **Colab（免费）**：https://colab.research.google.com/drive/1d5KQtbemerlSDSxZIfAaWXhKr30QypiK?usp=sharing
+- **本地机器**：请见[如何使用](#如何使用)
+- **PAI-DSW（免费试用）**：https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory
+- **九章智算云（算力优惠活动）**：https://docs.alayanew.com/docs/documents/useGuide/LLaMAFactory/mutiple/?utm_source=LLaMA-Factory
+> [!NOTE]
+> 除上述链接以外的其他网站均为未经许可的第三方网站，请小心甄别。
+## 目录
+- [项目特色](#项目特色)
+- [官方博客](#官方博客)
+- [更新日志](#更新日志)
+- [模型](#模型)
+- [训练方法](#训练方法)
+- [数据集](#数据集)
+- [软硬件依赖](#软硬件依赖)
+- [如何使用](#如何使用)
+  - [安装 LLaMA Factory](#安装-llama-factory)
+  - [数据准备](#数据准备)
+  - [快速开始](#快速开始)
+  - [LLaMA Board 可视化微调](#llama-board-可视化微调由-gradio-驱动)
+  - [构建 Docker](#构建-docker)
+  - [利用 vLLM 部署 OpenAI API](#利用-vllm-部署-openai-api)
+  - [从魔搭社区下载](#从魔搭社区下载)
+  - [从魔乐社区下载](#从魔乐社区下载)
+  - [使用 W&B 面板](#使用-wb-面板)
+  - [使用 SwanLab 面板](#使用-swanlab-面板)
+- [使用了 LLaMA Factory 的项目](#使用了-llama-factory-的项目)
+- [协议](#协议)
+- [引用](#引用)
+- [致谢](#致谢)
+## 项目特色
+- **多种模型**：LLaMA、LLaVA、Mistral、Mixtral-MoE、Qwen、Qwen2-VL、DeepSeek、Yi、Gemma、ChatGLM、Phi 等等。
+- **集成方法**：（增量）预训练、（多模态）指令监督微调、奖励模型训练、PPO 训练、DPO 训练、KTO 训练、ORPO 训练等等。
+- **多种精度**：16 比特全参数微调、冻结微调、LoRA 微调和基于 AQLM/AWQ/GPTQ/LLM.int8/HQQ/EETQ 的 2/3/4/5/6/8 比特 QLoRA 微调。
+- **先进算法**：[GaLore](https://github.com/jiaweizzhao/GaLore)、[BAdam](https://github.com/Ledzy/BAdam)、[APOLLO](https://github.com/zhuhanqing/APOLLO)、[Adam-mini](https://github.com/zyushun/Adam-mini)、[Muon](https://github.com/KellerJordan/Muon)、DoRA、LongLoRA、LLaMA Pro、Mixture-of-Depths、LoRA+、LoftQ 和 PiSSA。
+- **实用技巧**：[FlashAttention-2](https://github.com/Dao-AILab/flash-attention)、[Unsloth](https://github.com/unslothai/unsloth)、[Liger Kernel](https://github.com/linkedin/Liger-Kernel)、RoPE scaling、NEFTune 和 rsLoRA。
+- **广泛任务**：多轮对话、工具调用、图像理解、视觉定位、视频识别和语音理解等等。
+- **实验监控**：LlamaBoard、TensorBoard、Wandb、MLflow、[SwanLab](https://github.com/SwanHubX/SwanLab) 等等。
+- **极速推理**：基于 [vLLM](https://github.com/vllm-project/vllm) 或 [SGLang](https://github.com/sgl-project/sglang) 的 OpenAI 风格 API、浏览器界面和命令行接口。
+### 最新模型的 Day-N 微调适配
+| 适配时间      | 模型名称                                                              |
+| ------------ | -------------------------------------------------------------------- |
+| Day 0        | Qwen3 / Qwen2.5-VL / Gemma 3 / GLM-4.1V / InternLM 3 / MiniCPM-o-2.6 |
+| Day 1        | Llama 3 / GLM-4 / Mistral Small / PaliGemma2 / Llama 4               |
+## 官方博客
+- [基于 LLaMA-Factory 和 EasyR1 打造一站式无代码大模型强化学习和部署平台 LLM Model Hub](https://aws.amazon.com/cn/blogs/china/building-llm-model-hub-based-on-llamafactory-and-easyr1/)（中文）
+- [使用 LLaMA-Factory 微调 Qwen2.5-VL 实现自动驾驶场景微调](https://docs.alayanew.com/docs/documents/useGuide/LLaMAFactory/mutiple/?utm_source=LLaMA-Factory)（中文）
+- [通过亚马逊 SageMaker HyperPod 上的 LLaMA-Factory 增强多模态模型银行文档的视觉信息提取](https://aws.amazon.com/cn/blogs/machine-learning/how-apoidea-group-enhances-visual-information-extraction-from-banking-documents-with-multimodal-models-using-llama-factory-on-amazon-sagemaker-hyperpod/)（英文）
+- [Easy Dataset × LLaMA Factory: 让大模型高效学习领域知识](https://buaa-act.feishu.cn/wiki/KY9xwTGs1iqHrRkjXBwcZP9WnL9)（中文）
+<details><summary>全部博客</summary>
+- [LLaMA Factory：微调 DeepSeek-R1-Distill-Qwen-7B 模型实现新闻标题分类器](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory_deepseek_r1_distill_7b)（中文）
+- [基于 Amazon SageMaker 和 LLaMA-Factory 打造一站式无代码模型微调部署平台 Model Hub](https://aws.amazon.com/cn/blogs/china/a-one-stop-code-free-model-fine-tuning-deployment-platform-based-on-sagemaker-and-llama-factory/)（中文）
+- [LLaMA Factory 多模态微调实践：微调 Qwen2-VL 构建文旅大模型](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory_qwen2vl)（中文）
+- [LLaMA Factory：微调LLaMA3模型实现角色扮演](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory)（中文）
+</details>
+## 更新日志
+[25/07/02] 我们支持了 **[GLM-4.1V-9B-Thinking](https://github.com/THUDM/GLM-4.1V-Thinking)** 模型的微调。请安装 transformers 的 main 分支版本以使用。
+[25/04/28] 我们支持了 **[Qwen3](https://qwenlm.github.io/blog/qwen3/)** 系列模型的微调。
+[25/04/21] 我们支持了 **[Muon](https://github.com/KellerJordan/Muon)** 优化器。详细用法请参照 [examples](examples/README_zh.md)。感谢 [@tianshijing](https://github.com/tianshijing) 的 PR。
+[25/04/16] 我们支持了 **[InternVL3](https://huggingface.co/OpenGVLab/InternVL3-8B)** 模型的微调。查看 [PR #7258](https://github.com/hiyouga/LLaMA-Factory/pull/7258) 以使用。
+[25/04/14] 我们支持了 **[GLM-Z1](https://huggingface.co/THUDM/GLM-Z1-9B-0414)** 和 **[Kimi-VL](https://huggingface.co/moonshotai/Kimi-VL-A3B-Instruct)** 模型的微调。
+[25/04/06] 我们支持了 **[Llama 4](https://ai.meta.com/blog/llama-4-multimodal-intelligence/)** 模型的微调。查看 [PR #7611](https://github.com/hiyouga/LLaMA-Factory/pull/7611) 以使用。
+<details><summary>展开日志</summary>
+[25/03/31] 我们支持了 **[Qwen2.5 Omni](https://qwenlm.github.io/blog/qwen2.5-omni/)** 模型的微调。查看 [PR #7537](https://github.com/hiyouga/LLaMA-Factory/pull/7537) 以使用。
+[25/03/15] 我们支持了 **[SGLang](https://github.com/sgl-project/sglang)** 推理后端，请使用 `infer_backend: sglang` 启用。
+[25/03/12] 我们支持了 **[Gemma 3](https://huggingface.co/blog/gemma3)** 模型的微调。
+[25/02/24] 我们宣布开源 **[EasyR1](https://github.com/hiyouga/EasyR1)**，一个高效可扩展的多模态强化学习框架，支持高效的 GRPO 训练。
+[25/02/11] 我们支持了在导出模型时保存 **[Ollama](https://github.com/ollama/ollama)** 配置文件。详细用法请参照 [examples](examples/README_zh.md)。
+[25/02/05] 我们支持了在语音理解任务上微调 **[Qwen2-Audio](Qwen/Qwen2-Audio-7B-Instruct)** 和 **[MiniCPM-o-2.6](https://huggingface.co/openbmb/MiniCPM-o-2_6)** 模型。
+[25/01/31] 我们支持了 **[DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1)** 和 **[Qwen2.5-VL](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct)** 模型的微调。
+[25/01/15] 我们支持了 **[APOLLO](https://arxiv.org/abs/2412.05270)** 优化器。详细用法请参照 [examples](examples/README_zh.md)。
+[25/01/14] 我们支持了 **[MiniCPM-o-2.6](https://huggingface.co/openbmb/MiniCPM-o-2_6)** 和 **[MiniCPM-V-2.6](https://huggingface.co/openbmb/MiniCPM-V-2_6)** 模型的微调。 感谢 [@BUAADreamer](https://github.com/BUAADreamer) 的 PR.
+[25/01/14] 我们支持了 **[InternLM 3](https://huggingface.co/collections/internlm/)** 模型的微调。感谢 [@hhaAndroid](https://github.com/hhaAndroid) 的 PR。
+[25/01/10] 我们支持了 **[Phi-4](https://huggingface.co/microsoft/phi-4)** 模型的微调。
+[24/12/21] 我们支持了使用 **[SwanLab](https://github.com/SwanHubX/SwanLab)** 跟踪与可视化实验。详细用法请参考 [此部分](#使用-swanlab-面板)。
+[24/11/27] 我们支持了 **[Skywork-o1](https://huggingface.co/Skywork/Skywork-o1-Open-Llama-3.1-8B)** 模型的微调和 **[OpenO1](https://huggingface.co/datasets/O1-OPEN/OpenO1-SFT)** 数据集。
+[24/10/09] 我们支持了从 **[魔乐社区](https://modelers.cn/models)** 下载预训练模型和数据集。详细用法请参照 [此教程](#从魔乐社区下载)。
+[24/09/19] 我们支持了 **[Qwen2.5](https://qwenlm.github.io/blog/qwen2.5/)** 模型的微调。
+[24/08/30] 我们支持了 **[Qwen2-VL](https://qwenlm.github.io/blog/qwen2-vl/)** 模型的微调。感谢 [@simonJJJ](https://github.com/simonJJJ) 的 PR。
+[24/08/27] 我们支持了 **[Liger Kernel](https://github.com/linkedin/Liger-Kernel)**。请使用 `enable_liger_kernel: true` 来加速训练。
+[24/08/09] 我们支持了 **[Adam-mini](https://github.com/zyushun/Adam-mini)** 优化器。详细用法请参照 [examples](examples/README_zh.md)。感谢 [@relic-yuexi](https://github.com/relic-yuexi) 的 PR。
+[24/07/04] 我们支持了[无污染打包训练](https://github.com/MeetKai/functionary/tree/main/functionary/train/packing)。请使用 `neat_packing: true` 参数。感谢 [@chuan298](https://github.com/chuan298) 的 PR。
+[24/06/16] 我们支持了 **[PiSSA](https://arxiv.org/abs/2404.02948)** 算法。详细用法请参照 [examples](examples/README_zh.md)。
+[24/06/07] 我们支持了 **[Qwen2](https://qwenlm.github.io/blog/qwen2/)** 和 **[GLM-4](https://github.com/THUDM/GLM-4)** 模型的微调。
+[24/05/26] 我们支持了 **[SimPO](https://arxiv.org/abs/2405.14734)** 偏好对齐算法。详细用法请参照 [examples](examples/README_zh.md)。
+[24/05/20] 我们支持了 **PaliGemma** 系列模型的微调。注意 PaliGemma 是预训练模型，你需要使用 `paligemma` 模板进行微调使其获得对话能力。
+[24/05/18] 我们支持了 **[KTO](https://arxiv.org/abs/2402.01306)** 偏好对齐算法。详细用法请参照 [examples](examples/README_zh.md)。
+[24/05/14] 我们支持了昇腾 NPU 设备的训练和推理。详情请查阅[安装](#安装-llama-factory)部分。
+[24/04/26] 我们支持了多模态模型 **LLaVA-1.5** 的微调。详细用法请参照 [examples](examples/README_zh.md)。
+[24/04/22] 我们提供了在免费 T4 GPU 上微调 Llama-3 模型的 **[Colab 笔记本](https://colab.research.google.com/drive/1d5KQtbemerlSDSxZIfAaWXhKr30QypiK?usp=sharing)**。Hugging Face 社区公开了两个利用 LLaMA Factory 微调的 Llama-3 模型，详情请见 [Llama3-8B-Chinese-Chat](https://huggingface.co/shenzhi-wang/Llama3-8B-Chinese-Chat) 和 [Llama3-Chinese](https://huggingface.co/zhichen/Llama3-Chinese)。
+[24/04/21] 我们基于 [AstraMindAI 的仓库](https://github.com/astramind-ai/Mixture-of-depths)支持了 **[混合深度训练](https://arxiv.org/abs/2404.02258)**。详细用法请参照 [examples](examples/README_zh.md)。
+[24/04/16] 我们支持了 **[BAdam](https://arxiv.org/abs/2404.02827)** 优化器。详细用法请参照 [examples](examples/README_zh.md)。
+[24/04/16] 我们支持了 **[unsloth](https://github.com/unslothai/unsloth)** 的长序列训练（24GB 可训练 Llama-2-7B-56k）。该方法相比 FlashAttention-2 提供了 **117%** 的训��速度和 **50%** 的显存节约。更多数据请见[此页面](https://github.com/hiyouga/LLaMA-Factory/wiki/Performance-comparison)。
+[24/03/31] 我们支持了 **[ORPO](https://arxiv.org/abs/2403.07691)**。详细用法请参照 [examples](examples/README_zh.md)。
+[24/03/21] 我们的论文 "[LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models](https://arxiv.org/abs/2403.13372)" 可在 arXiv 上查看！
+[24/03/20] 我们支持了能在 2x24GB GPU 上微调 70B 模型的 **FSDP+QLoRA**。详细用法请参照 [examples](examples/README_zh.md)。
+[24/03/13] 我们支持了 **[LoRA+](https://arxiv.org/abs/2402.12354)**。详细用法请参照 [examples](examples/README_zh.md)。
+[24/03/07] 我们支持了 **[GaLore](https://arxiv.org/abs/2403.03507)** 优化器。详细用法请参照 [examples](examples/README_zh.md)。
+[24/03/07] 我们集成了 **[vLLM](https://github.com/vllm-project/vllm)** 以实现极速并发推理。请使用 `infer_backend: vllm` 来获得 **270%** 的推理速度。
+[24/02/28] 我们支持了 **[DoRA](https://arxiv.org/abs/2402.09353)** 微调。请使用 `use_dora: true` 参数进行 DoRA 微调。
+[24/02/15] 我们支持了 [LLaMA Pro](https://github.com/TencentARC/LLaMA-Pro) 提出的**块扩展**方法。详细用法请参照 [examples](examples/README_zh.md)。
+[24/02/05] Qwen1.5（Qwen2 测试版）系列模型已在 LLaMA-Factory 中实现微调支持。详情请查阅该[博客页面](https://qwenlm.github.io/zh/blog/qwen1.5/)。
+[24/01/18] 我们针对绝大多数模型实现了 **Agent 微调**，微调时指定 `dataset: glaive_toolcall_zh` 即可使模型获得工具调用能力。
+[23/12/23] 我们针对 LLaMA, Mistral 和 Yi 模型支持了 **[unsloth](https://github.com/unslothai/unsloth)** 的 LoRA 训练加速。请使用 `use_unsloth: true` 参数启用 unsloth 优化。该方法可提供 **170%** 的训练速度，详情请查阅[此页面](https://github.com/hiyouga/LLaMA-Factory/wiki/Performance-comparison)。
+[23/12/12] 我们支持了微调最新的混合专家模型 **[Mixtral 8x7B](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1)**。硬件需求请查阅[此处](#硬件依赖)。
+[23/12/01] 我们支持了从 **[魔搭社区](https://modelscope.cn/models)** 下载预训练模型和数据集。详细用法请参照 [此教程](#从魔搭社区下载)。
+[23/10/21] 我们支持了 **[NEFTune](https://arxiv.org/abs/2310.05914)** 训练技巧。请使用 `neftune_noise_alpha: 5` 参数启用 NEFTune。
+[23/09/27] 我们针对 LLaMA 模型支持了 [LongLoRA](https://github.com/dvlab-research/LongLoRA) 提出的 **$S^2$-Attn**。请使用 `shift_attn: true` 参数以启用该功能。
+[23/09/23] 我们在项目中集成了 MMLU、C-Eval 和 CMMLU 评估集。详细用法请参照 [examples](examples/README_zh.md)。
+[23/09/10] 我们支持了 **[FlashAttention-2](https://github.com/Dao-AILab/flash-attention)**。如果您使用的是 RTX4090、A100 或 H100 GPU，请使用 `flash_attn: fa2` 参数以启用 FlashAttention-2。
+[23/08/12] 我们支持了 **RoPE 插值**来扩展 LLaMA 模型的上下文长度。请使用 `rope_scaling: linear` 参数训练模型或使用 `rope_scaling: dynamic` 参数评估模型。
+[23/08/11] 我们支持了指令模型的 **[DPO 训练](https://arxiv.org/abs/2305.18290)**。详细用法请参照 [examples](examples/README_zh.md)。
+[23/07/31] 我们支持了**数据流式加载**。请使用 `streaming: true` 和 `max_steps: 10000` 参数来流式加载数据集。
+[23/07/29] 我们在 Hugging Face 发布了两个 13B 指令微调模型。详细内容请查阅我们的 Hugging Face 项目（[LLaMA-2](https://huggingface.co/hiyouga/Llama-2-Chinese-13b-chat) / [Baichuan](https://huggingface.co/hiyouga/Baichuan-13B-sft)）。
+[23/07/18] 我们开发了支持训练和测试的**浏览器一体化界面**。请使用 `train_web.py` 在您的浏览器中微调模型。感谢 [@KanadeSiina](https://github.com/KanadeSiina) 和 [@codemayq](https://github.com/codemayq) 在该功能开发中付出的努力。
+[23/07/09] 我们开源了 **[FastEdit](https://github.com/hiyouga/FastEdit)** ⚡🩹，一个简单易用的、能迅速编辑大模型事实记忆的工具包。如果您感兴趣请关注我们的 [FastEdit](https://github.com/hiyouga/FastEdit) 项目。
+[23/06/29] 我们提供了一个**可复现的**指令模型微调示例，详细内容请查阅 [Baichuan-7B-sft](https://huggingface.co/hiyouga/Baichuan-7B-sft)。
+[23/06/22] 我们对齐了[示例 API](src/api_demo.py) 与 [OpenAI API](https://platform.openai.com/docs/api-reference/chat) 的格式，您可以将微调模型接入**任意基于 ChatGPT 的应用**中。
+[23/06/03] 我们实现了 4 比特的 LoRA 训练（也称 **[QLoRA](https://github.com/artidoro/qlora)**）。详细用法请参照 [examples](examples/README_zh.md)。
+</details>
+> [!TIP]
+> 如果您无法使用最新的功能，请尝试重新拉取代码并再次安装 LLaMA-Factory。
+## 模型
+| 模型名                                                             | 参数量                            | Template            |
+| ----------------------------------------------------------------- | -------------------------------- | ------------------- |
+| [Baichuan 2](https://huggingface.co/baichuan-inc)                 | 7B/13B                           | baichuan2           |
+| [BLOOM/BLOOMZ](https://huggingface.co/bigscience)                 | 560M/1.1B/1.7B/3B/7.1B/176B      | -                   |
+| [ChatGLM3](https://huggingface.co/THUDM)                          | 6B                               | chatglm3            |
+| [Command R](https://huggingface.co/CohereForAI)                   | 35B/104B                         | cohere              |
+| [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai)         | 7B/16B/67B/236B                  | deepseek            |
+| [DeepSeek 2.5/3](https://huggingface.co/deepseek-ai)              | 236B/671B                        | deepseek3           |
+| [DeepSeek R1 (Distill)](https://huggingface.co/deepseek-ai)       | 1.5B/7B/8B/14B/32B/70B/671B      | deepseekr1          |
+| [Falcon](https://huggingface.co/tiiuae)                           | 7B/11B/40B/180B                  | falcon              |
+| [Falcon-H1](https://huggingface.co/tiiuae)                        | 0.5B/1.5B/3B/7B/34B              | falcon_h1           |
+| [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google)          | 2B/7B/9B/27B                     | gemma/gemma2        |
+| [Gemma 3/Gemma 3n](https://huggingface.co/google)                 | 1B/4B/6B/8B/12B/27B              | gemma3/gemma3n      |
+| [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/THUDM)           | 9B/32B                           | glm4/glmz1          |
+| [GLM-4.1V](https://huggingface.co/THUDM)*                         | 9B                               | glm4v               |
+| [GPT-2](https://huggingface.co/openai-community)                  | 0.1B/0.4B/0.8B/1.5B              | -                   |
+| [Granite 3.0-3.3](https://huggingface.co/ibm-granite)             | 1B/2B/3B/8B                      | granite3            |
+| [Hunyuan](https://huggingface.co/tencent/)                        | 7B                               | hunyuan             |
+| [Index](https://huggingface.co/IndexTeam)                         | 1.9B                             | index               |
+| [InternLM 2-3](https://huggingface.co/internlm)                   | 7B/8B/20B                        | intern2             |
+| [InternVL 2.5-3](https://huggingface.co/OpenGVLab)                | 1B/2B/8B/14B/38B/78B             | intern_vl           |
+| [Kimi-VL](https://huggingface.co/moonshotai)                      | 16B                              | kimi_vl             |
+| [Llama](https://github.com/facebookresearch/llama)                | 7B/13B/33B/65B                   | -                   |
+| [Llama 2](https://huggingface.co/meta-llama)                      | 7B/13B/70B                       | llama2              |
+| [Llama 3-3.3](https://huggingface.co/meta-llama)                  | 1B/3B/8B/70B                     | llama3              |
+| [Llama 4](https://huggingface.co/meta-llama)                      | 109B/402B                        | llama4              |
+| [Llama 3.2 Vision](https://huggingface.co/meta-llama)             | 11B/90B                          | mllama              |
+| [LLaVA-1.5](https://huggingface.co/llava-hf)                      | 7B/13B                           | llava               |
+| [LLaVA-NeXT](https://huggingface.co/llava-hf)                     | 7B/8B/13B/34B/72B/110B           | llava_next          |
+| [LLaVA-NeXT-Video](https://huggingface.co/llava-hf)               | 7B/34B                           | llava_next_video    |
+| [MiMo](https://huggingface.co/XiaomiMiMo)                         | 7B                               | mimo                |
+| [MiniCPM](https://huggingface.co/openbmb)                         | 0.5B/1B/2B/4B/8B                 | cpm/cpm3/cpm4       |
+| [MiniCPM-o-2.6/MiniCPM-V-2.6](https://huggingface.co/openbmb)     | 8B                               | minicpm_o/minicpm_v |
+| [Ministral/Mistral-Nemo](https://huggingface.co/mistralai)        | 8B/12B                           | ministral           |
+| [Mistral/Mixtral](https://huggingface.co/mistralai)               | 7B/8x7B/8x22B                    | mistral             |
+| [Mistral Small](https://huggingface.co/mistralai)                 | 24B                              | mistral_small       |
+| [OLMo](https://huggingface.co/allenai)                            | 1B/7B                            | -                   |
+| [PaliGemma/PaliGemma2](https://huggingface.co/google)             | 3B/10B/28B                       | paligemma           |
+| [Phi-1.5/Phi-2](https://huggingface.co/microsoft)                 | 1.3B/2.7B                        | -                   |
+| [Phi-3/Phi-3.5](https://huggingface.co/microsoft)                 | 4B/14B                           | phi                 |
+| [Phi-3-small](https://huggingface.co/microsoft)                   | 7B                               | phi_small           |
+| [Phi-4](https://huggingface.co/microsoft)                         | 14B                              | phi4                |
+| [Pixtral](https://huggingface.co/mistralai)                       | 12B                              | pixtral             |
+| [Qwen (1-2.5) (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen)   | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen                |
+| [Qwen3 (MoE)](https://huggingface.co/Qwen)                        | 0.6B/1.7B/4B/8B/14B/32B/235B     | qwen3               |
+| [Qwen2-Audio](https://huggingface.co/Qwen)                        | 7B                               | qwen2_audio         |
+| [Qwen2.5-Omni](https://huggingface.co/Qwen)                       | 3B/7B                            | qwen2_omni          |
+| [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen)            | 2B/3B/7B/32B/72B                 | qwen2_vl            |
+| [Seed Coder](https://huggingface.co/ByteDance-Seed)               | 8B                               | seed_coder          |
+| [Skywork o1](https://huggingface.co/Skywork)                      | 8B                               | skywork_o1          |
+| [StarCoder 2](https://huggingface.co/bigcode)                     | 3B/7B/15B                        | -                   |
+| [TeleChat2](https://huggingface.co/Tele-AI)                       | 3B/7B/35B/115B                   | telechat2           |
+| [XVERSE](https://huggingface.co/xverse)                           | 7B/13B/65B                       | xverse              |
+| [Yi/Yi-1.5 (Code)](https://huggingface.co/01-ai)                  | 1.5B/6B/9B/34B                   | yi                  |
+| [Yi-VL](https://huggingface.co/01-ai)                             | 6B/34B                           | yi_vl               |
+| [Yuan 2](https://huggingface.co/IEITYuan)                         | 2B/51B/102B                      | yuan                |
+> [!NOTE]
+> 对于所有“基座”（Base）模型，`template` 参数可以是 `default`, `alpaca`, `vicuna` 等任意值。但“对话”（Instruct/Chat）模型请务必使用**对应的模板**。
+>
+> 请务必在训练和推理时采用**完全一致**的模板。
+>
+> \*：您需要从 main 分支安装 `transformers` 并使用 `DISABLE_VERSION_CHECK=1` 来跳过版本检查。
+>
+> \*\*：您需要安装特定版本的 `transformers` 以使用该模型。
+项目所支持模型的完整列表请参阅 [constants.py](src/llamafactory/extras/constants.py)。
+您也可以在 [template.py](src/llamafactory/data/template.py) 中添加自己的对话模板。
+## 训练方法
+| 方法                   |     全参数训练      |    部分参数训练     |       LoRA         |       QLoRA        |
+| --------------------- | ------------------ | ------------------ | ------------------ | ------------------ |
+| 预训练                 | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
+| 指令监督微调            | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
+| 奖励模型训练            | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
+| PPO 训练               | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
+| DPO 训练               | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
+| KTO 训练               | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
+| ORPO 训练              | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
+| SimPO 训练             | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
+> [!TIP]
+> 有关 PPO 的实现细节，请参考[此博客](https://newfacade.github.io/notes-on-reinforcement-learning/17-ppo-trl.html)。
+## 数据集
+<details><summary>预训练数据集</summary>
+- [Wiki Demo (en)](data/wiki_demo.txt)
+- [RefinedWeb (en)](https://huggingface.co/datasets/tiiuae/falcon-refinedweb)
+- [RedPajama V2 (en)](https://huggingface.co/datasets/togethercomputer/RedPajama-Data-V2)
+- [Wikipedia (en)](https://huggingface.co/datasets/olm/olm-wikipedia-20221220)
+- [Wikipedia (zh)](https://huggingface.co/datasets/pleisto/wikipedia-cn-20230720-filtered)
+- [Pile (en)](https://huggingface.co/datasets/EleutherAI/pile)
+- [SkyPile (zh)](https://huggingface.co/datasets/Skywork/SkyPile-150B)
+- [FineWeb (en)](https://huggingface.co/datasets/HuggingFaceFW/fineweb)
+- [FineWeb-Edu (en)](https://huggingface.co/datasets/HuggingFaceFW/fineweb-edu)
+- [The Stack (en)](https://huggingface.co/datasets/bigcode/the-stack)
+- [StarCoder (en)](https://huggingface.co/datasets/bigcode/starcoderdata)
+</details>
+<details><summary>指令微调数据集</summary>
+- [Identity (en&zh)](data/identity.json)
+- [Stanford Alpaca (en)](https://github.com/tatsu-lab/stanford_alpaca)
+- [Stanford Alpaca (zh)](https://github.com/ymcui/Chinese-LLaMA-Alpaca-3)
+- [Alpaca GPT4 (en&zh)](https://github.com/Instruction-Tuning-with-GPT-4/GPT-4-LLM)
+- [Glaive Function Calling V2 (en&zh)](https://huggingface.co/datasets/glaiveai/glaive-function-calling-v2)
+- [LIMA (en)](https://huggingface.co/datasets/GAIR/lima)
+- [Guanaco Dataset (multilingual)](https://huggingface.co/datasets/JosephusCheung/GuanacoDataset)
+- [BELLE 2M (zh)](https://huggingface.co/datasets/BelleGroup/train_2M_CN)
+- [BELLE 1M (zh)](https://huggingface.co/datasets/BelleGroup/train_1M_CN)
+- [BELLE 0.5M (zh)](https://huggingface.co/datasets/BelleGroup/train_0.5M_CN)
+- [BELLE Dialogue 0.4M (zh)](https://huggingface.co/datasets/BelleGroup/generated_chat_0.4M)
+- [BELLE School Math 0.25M (zh)](https://huggingface.co/datasets/BelleGroup/school_math_0.25M)
+- [BELLE Multiturn Chat 0.8M (zh)](https://huggingface.co/datasets/BelleGroup/multiturn_chat_0.8M)
+- [UltraChat (en)](https://github.com/thunlp/UltraChat)
+- [OpenPlatypus (en)](https://huggingface.co/datasets/garage-bAInd/Open-Platypus)
+- [CodeAlpaca 20k (en)](https://huggingface.co/datasets/sahil2801/CodeAlpaca-20k)
+- [Alpaca CoT (multilingual)](https://huggingface.co/datasets/QingyiSi/Alpaca-CoT)
+- [OpenOrca (en)](https://huggingface.co/datasets/Open-Orca/OpenOrca)
+- [SlimOrca (en)](https://huggingface.co/datasets/Open-Orca/SlimOrca)
+- [MathInstruct (en)](https://huggingface.co/datasets/TIGER-Lab/MathInstruct)
+- [Firefly 1.1M (zh)](https://huggingface.co/datasets/YeungNLP/firefly-train-1.1M)
+- [Wiki QA (en)](https://huggingface.co/datasets/wiki_qa)
+- [Web QA (zh)](https://huggingface.co/datasets/suolyer/webqa)
+- [WebNovel (zh)](https://huggingface.co/datasets/zxbsmk/webnovel_cn)
+- [Nectar (en)](https://huggingface.co/datasets/berkeley-nest/Nectar)
+- [deepctrl (en&zh)](https://www.modelscope.cn/datasets/deepctrl/deepctrl-sft-data)
+- [Advertise Generating (zh)](https://huggingface.co/datasets/HasturOfficial/adgen)
+- [ShareGPT Hyperfiltered (en)](https://huggingface.co/datasets/totally-not-an-llm/sharegpt-hyperfiltered-3k)
+- [ShareGPT4 (en&zh)](https://huggingface.co/datasets/shibing624/sharegpt_gpt4)
+- [UltraChat 200k (en)](https://huggingface.co/datasets/HuggingFaceH4/ultrachat_200k)
+- [AgentInstruct (en)](https://huggingface.co/datasets/THUDM/AgentInstruct)
+- [LMSYS Chat 1M (en)](https://huggingface.co/datasets/lmsys/lmsys-chat-1m)
+- [Evol Instruct V2 (en)](https://huggingface.co/datasets/WizardLM/WizardLM_evol_instruct_V2_196k)
+- [Cosmopedia (en)](https://huggingface.co/datasets/HuggingFaceTB/cosmopedia)
+- [STEM (zh)](https://huggingface.co/datasets/hfl/stem_zh_instruction)
+- [Ruozhiba (zh)](https://huggingface.co/datasets/hfl/ruozhiba_gpt4_turbo)
+- [Neo-sft (zh)](https://huggingface.co/datasets/m-a-p/neo_sft_phase2)
+- [Magpie-Pro-300K-Filtered (en)](https://huggingface.co/datasets/Magpie-Align/Magpie-Pro-300K-Filtered)
+- [Magpie-ultra-v0.1 (en)](https://huggingface.co/datasets/argilla/magpie-ultra-v0.1)
+- [WebInstructSub (en)](https://huggingface.co/datasets/TIGER-Lab/WebInstructSub)
+- [OpenO1-SFT (en&zh)](https://huggingface.co/datasets/O1-OPEN/OpenO1-SFT)
+- [Open-Thoughts (en)](https://huggingface.co/datasets/open-thoughts/OpenThoughts-114k)
+- [Open-R1-Math (en)](https://huggingface.co/datasets/open-r1/OpenR1-Math-220k)
+- [Chinese-DeepSeek-R1-Distill (zh)](https://huggingface.co/datasets/Congliu/Chinese-DeepSeek-R1-Distill-data-110k-SFT)
+- [LLaVA mixed (en&zh)](https://huggingface.co/datasets/BUAADreamer/llava-en-zh-300k)
+- [Pokemon-gpt4o-captions (en&zh)](https://huggingface.co/datasets/jugg1024/pokemon-gpt4o-captions)
+- [Open Assistant (de)](https://huggingface.co/datasets/mayflowergmbh/oasst_de)
+- [Dolly 15k (de)](https://huggingface.co/datasets/mayflowergmbh/dolly-15k_de)
+- [Alpaca GPT4 (de)](https://huggingface.co/datasets/mayflowergmbh/alpaca-gpt4_de)
+- [OpenSchnabeltier (de)](https://huggingface.co/datasets/mayflowergmbh/openschnabeltier_de)
+- [Evol Instruct (de)](https://huggingface.co/datasets/mayflowergmbh/evol-instruct_de)
+- [Dolphin (de)](https://huggingface.co/datasets/mayflowergmbh/dolphin_de)
+- [Booksum (de)](https://huggingface.co/datasets/mayflowergmbh/booksum_de)
+- [Airoboros (de)](https://huggingface.co/datasets/mayflowergmbh/airoboros-3.0_de)
+- [Ultrachat (de)](https://huggingface.co/datasets/mayflowergmbh/ultra-chat_de)
+</details>
+<details><summary>偏好数据集</summary>
+- [DPO mixed (en&zh)](https://huggingface.co/datasets/hiyouga/DPO-En-Zh-20k)
+- [UltraFeedback (en)](https://huggingface.co/datasets/HuggingFaceH4/ultrafeedback_binarized)
+- [COIG-P (zh)](https://huggingface.co/datasets/m-a-p/COIG-P)
+- [RLHF-V (en)](https://huggingface.co/datasets/openbmb/RLHF-V-Dataset)
+- [VLFeedback (en)](https://huggingface.co/datasets/Zhihui/VLFeedback)
+- [RLAIF-V (en)](https://huggingface.co/datasets/openbmb/RLAIF-V-Dataset)
+- [Orca DPO Pairs (en)](https://huggingface.co/datasets/Intel/orca_dpo_pairs)
+- [HH-RLHF (en)](https://huggingface.co/datasets/Anthropic/hh-rlhf)
+- [Nectar (en)](https://huggingface.co/datasets/berkeley-nest/Nectar)
+- [Orca DPO (de)](https://huggingface.co/datasets/mayflowergmbh/intel_orca_dpo_pairs_de)
+- [KTO mixed (en)](https://huggingface.co/datasets/argilla/kto-mix-15k)
+</details>
+部分数据集的使用需要确认，我们推荐使用下述命令登录您的 Hugging Face 账户。
+```bash
+pip install --upgrade huggingface_hub
+huggingface-cli login
+```
+## 软硬件依赖
+| 必需项        | 至少     | 推荐      |
+| ------------ | ------- | --------- |
+| python       | 3.9     | 3.10      |
+| torch        | 2.0.0   | 2.6.0     |
+| torchvision  | 0.15.0  | 0.21.0    |
+| transformers | 4.49.0  | 4.50.0    |
+| datasets     | 2.16.0  | 3.2.0     |
+| accelerate   | 0.34.0  | 1.2.1     |
+| peft         | 0.14.0  | 0.15.1    |
+| trl          | 0.8.6   | 0.9.6     |
+| 可选项        | 至少     | 推荐      |
+| ------------ | ------- | --------- |
+| CUDA         | 11.6    | 12.2      |
+| deepspeed    | 0.10.0  | 0.16.4    |
+| bitsandbytes | 0.39.0  | 0.43.1    |
+| vllm         | 0.4.3   | 0.8.2     |
+| flash-attn   | 2.5.6   | 2.7.2     |
+### 硬件依赖
+\* *估算值*
+| 方法                             | 精度 |   7B  |  14B  |  30B  |   70B  |   `x`B  |
+| ------------------------------- | ---- | ----- | ----- | ----- | ------ | ------- |
+| Full (`bf16` or `fp16`)         |  32  | 120GB | 240GB | 600GB | 1200GB | `18x`GB |
+| Full (`pure_bf16`)              |  16  |  60GB | 120GB | 300GB |  600GB |  `8x`GB |
+| Freeze/LoRA/GaLore/APOLLO/BAdam |  16  |  16GB |  32GB |  64GB |  160GB |  `2x`GB |
+| QLoRA                           |   8  |  10GB |  20GB |  40GB |   80GB |   `x`GB |
+| QLoRA                           |   4  |   6GB |  12GB |  24GB |   48GB | `x/2`GB |
+| QLoRA                           |   2  |   4GB |   8GB |  16GB |   24GB | `x/4`GB |
+## 如何使用
+### 安装 LLaMA Factory
+> [!IMPORTANT]
+> 此步骤为必需。
+#### 从源码安装
+```bash
+git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git
+cd LLaMA-Factory
+pip install -e ".[torch,metrics]" --no-build-isolation
+```
+可选的额外依赖项：torch、torch-npu、metrics、deepspeed、liger-kernel、bitsandbytes、hqq、eetq、gptq、aqlm、vllm、sglang、galore、apollo、badam、adam-mini、qwen、minicpm_v、openmind、swanlab、dev
+#### 从镜像安装
+```bash
+docker run -it --rm --gpus=all --ipc=host hiyouga/llamafactory:latest
+```
+该镜像基于 Ubuntu 22.04（x86\_64）、CUDA 12.4、Python 3.11、PyTorch 2.6.0 和 Flash-attn 2.7.4 构建。
+查看全部镜像：https://hub.docker.com/r/hiyouga/llamafactory/tags
+请参阅[构建 Docker](#构建-docker) 来重新构建镜像。
+<details><summary>使用 <b>uv</b> 构建虚拟环境</summary>
+使用 [uv](https://github.com/astral-sh/uv) 创建隔离的 Python 环境：
+```bash
+uv sync --extra torch --extra metrics --prerelease=allow
+```
+在环境中运行 LLaMA-Factory：
+```bash
+uv run --prerelease=allow llamafactory-cli train examples/train_lora/llama3_lora_pretrain.yaml
+```
+</details>
+<details><summary>Windows 用户指南</summary>
+#### 安装 PyTorch
+Windows 平台需要额外手动安装 GPU 版本的 PyTorch 依赖包，您可以参考[官方网站](https://pytorch.org/get-started/locally/)和以下命令安装并测试 PyTorch 是否正确安装。
+```bash
+pip uninstall torch torchvision torchaudio
+pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126
+python -c "import torch; print(torch.cuda.is_available())"
+```
+如果看到 `True` 则说明安装成功。
+若遇到类似 `Can't pickle local object` 的报错，请设置 `dataloader_num_workers: 0`。
+#### 安装 BitsAndBytes
+如果要在 Windows 平台上开启量化 LoRA（QLoRA），需要安装预编译的 `bitsandbytes` 库, 支持 CUDA 11.1 到 12.2, 请根据您的 CUDA 版本情况选择适合的[发布版本](https://github.com/jllllll/bitsandbytes-windows-webui/releases/tag/wheels)。
+```bash
+pip install https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.2.post2-py3-none-win_amd64.whl
+```
+#### 安装 Flash Attention-2
+如果要在 Windows 平台上开启 FlashAttention-2，请使用 [flash-attention-windows-wheel](https://huggingface.co/lldacing/flash-attention-windows-wheel) 中的脚本自行编译与安装。
+</details>
+<details><summary>昇腾 NPU 用户指南</summary>
+在昇腾 NPU 设备上安装 LLaMA Factory 时，请升级 Python 到 3.10 及以上，并需要指定额外依赖项，使用 `pip install -e ".[torch-npu,metrics]"` 命令安装。此外，还需要安装 **[Ascend CANN Toolkit 与 Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**，安装方法请参考[安装教程](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC2alpha002/quickstart/quickstart/quickstart_18_0004.html)或使用以下命令：
+```bash
+# 请替换 URL 为 CANN 版本和设备型号对应的 URL
+# 安装 CANN Toolkit
+wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C17SPC701/Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run
+bash Ascend-cann-toolkit_8.0.RC1.alpha001_linux-"$(uname -i)".run --install
+# 安装 CANN Kernels
+wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C17SPC701/Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run
+bash Ascend-cann-kernels-910b_8.0.RC1.alpha001_linux.run --install
+# 设置环境变量
+source /usr/local/Ascend/ascend-toolkit/set_env.sh
+```
+| 依赖项        | 至少     | 推荐           |
+| ------------ | ------- | -------------- |
+| CANN         | 8.0.RC1 | 8.0.0.alpha002 |
+| torch        | 2.1.0   | 2.4.0          |
+| torch-npu    | 2.1.0   | 2.4.0.post2    |
+| deepspeed    | 0.13.2  | 0.13.2         |
+| vllm-ascend  | -       | 0.7.3          |
+请使用 `ASCEND_RT_VISIBLE_DEVICES` 而非 `CUDA_VISIBLE_DEVICES` 来指定运算设备。
+如果遇到无法正常推理的情况，请尝试设置 `do_sample: false`。
+下载预构建 Docker 镜像：[32GB](http://mirrors.cn-central-221.ovaijisuan.com/detail/130.html) | [64GB](http://mirrors.cn-central-221.ovaijisuan.com/detail/131.html)
+#### 安装 BitsAndBytes
+如果要在 Ascend NPU 上进行基于 bitsandbytes 的 QLoRA 量化微调，请执行如下步骤：
+1. 手动编译 bitsandbytes：请参考[安装文档](https://huggingface.co/docs/bitsandbytes/installation?backend=Ascend+NPU&platform=Ascend+NPU)完成 NPU 版的 bitsandbytes 安装，编译要求环境 cmake 版本不低于 3.22.1，g++ 版本不低于 12.x。
+```bash
+# 从源码安装 bitsandbytes
+# 克隆 bitsandbytes 仓库, Ascend NPU 目前在 multi-backend-refactor 中支持
+git clone -b multi-backend-refactor https://github.com/bitsandbytes-foundation/bitsandbytes.git
+cd bitsandbytes/
+# 安装依赖
+pip install -r requirements-dev.txt
+# 安装编译工具依赖，该步骤在不同系统上命令有所不同，供参考
+apt-get install -y build-essential cmake
+# 编译 & 安装
+cmake -DCOMPUTE_BACKEND=npu -S .
+make
+pip install .
+```
+2. 安装 transformers 的 main 分支版本。
+```bash
+git clone -b main https://github.com/huggingface/transformers.git
+cd transformers
+pip install .
+```
+3. 在训练参数中设置 `double_quantization: false`，可参考[示例](examples/train_qlora/llama3_lora_sft_bnb_npu.yaml)。
+</details>
+### 数据准备
+关于数据集文件的格式，请参考 [data/README_zh.md](data/README_zh.md) 的内容。你可以使用 HuggingFace / ModelScope / Modelers 上的数据集或加载本地数据集。
+> [!NOTE]
+> 使用自定义数据集时，请更新 `data/dataset_info.json` 文件。
+您也可以使用 **[Easy Dataset](https://github.com/ConardLi/easy-dataset)** 或 **[GraphGen](https://github.com/open-sciencelab/GraphGen)** 构建用于微调的合成数据。
+### 快速开始
+下面三行命令分别对 Llama3-8B-Instruct 模型进行 LoRA **微调**、**推理**和**合并**。
+```bash
+llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
+llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
+llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
+```
+高级用法请参考 [examples/README_zh.md](examples/README_zh.md)（包括多 GPU 微调）。
+> [!TIP]
+> 使用 `llamafactory-cli help` 显示帮助信息。
+>
+> 遇到报错请先看[常见问题](https://github.com/hiyouga/LLaMA-Factory/issues/4614)。
+### LLaMA Board 可视化微调（由 [Gradio](https://github.com/gradio-app/gradio) 驱动）
+```bash
+llamafactory-cli webui
+```
+### 构建 Docker
+CUDA 用户：
+```bash
+cd docker/docker-cuda/
+docker compose up -d
+docker compose exec llamafactory bash
+```
+昇腾 NPU 用户：
+```bash
+cd docker/docker-npu/
+docker compose up -d
+docker compose exec llamafactory bash
+```
+AMD ROCm 用户：
+```bash
+cd docker/docker-rocm/
+docker compose up -d
+docker compose exec llamafactory bash
+```
+<details><summary>不使用 Docker Compose 构建</summary>
+CUDA 用户：
+```bash
+docker build -f ./docker/docker-cuda/Dockerfile \
+    --build-arg PIP_INDEX=https://pypi.org/simple \
+    --build-arg EXTRAS=metrics \
+    -t llamafactory:latest .
+docker run -dit --ipc=host --gpus=all \
+    -p 7860:7860 \
+    -p 8000:8000 \
+    --name llamafactory \
+    llamafactory:latest
+docker exec -it llamafactory bash
+```
+昇腾 NPU 用户：
+```bash
+docker build -f ./docker/docker-npu/Dockerfile \
+    --build-arg PIP_INDEX=https://pypi.org/simple \
+    --build-arg EXTRAS=torch-npu,metrics \
+    -t llamafactory:latest .
+docker run -dit --ipc=host \
+    -v /usr/local/dcmi:/usr/local/dcmi \
+    -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \
+    -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
+    -v /etc/ascend_install.info:/etc/ascend_install.info \
+    -p 7860:7860 \
+    -p 8000:8000 \
+    --device /dev/davinci0 \
+    --device /dev/davinci_manager \
+    --device /dev/devmm_svm \
+    --device /dev/hisi_hdc \
+    --name llamafactory \
+    llamafactory:latest
+docker exec -it llamafactory bash
+```
+AMD ROCm 用户：
+```bash
+docker build -f ./docker/docker-rocm/Dockerfile \
+    --build-arg PIP_INDEX=https://pypi.org/simple \
+    --build-arg EXTRAS=metrics \
+    -t llamafactory:latest .
+docker run -dit --ipc=host \
+    -p 7860:7860 \
+    -p 8000:8000 \
+    --device /dev/kfd \
+    --device /dev/dri \
+    --name llamafactory \
+    llamafactory:latest
+docker exec -it llamafactory bash
+```
+</details>
+<details><summary>使用数据卷</summary>
+您可以通过移除 Dockerfile 中 `VOLUME [ "/root/.cache/huggingface", "/app/shared_data", "/app/output" ]` 的注释来使用数据卷。
+在构建 Docker 时使用参数 `-v ./hf_cache:/root/.cache/huggingface` 来挂载数据卷。各个数据卷的含义表示如下。
+- `hf_cache`：使用宿主机的 Hugging Face 缓存文件夹。
+- `shared_data`：宿主机中存放数据集的文件夹路径。
+- `output`：将导出目录设置为该路径后，即可在宿主机中访问导出后的模型。
+</details>
+### 利用 vLLM 部署 OpenAI API
+```bash
+API_PORT=8000 llamafactory-cli api examples/inference/llama3.yaml infer_backend=vllm vllm_enforce_eager=true
+```
+> [!TIP]
+> API 文档请查阅[这里](https://platform.openai.com/docs/api-reference/chat/create)。
+>
+> 示例：[图像理解](scripts/api_example/test_image.py) | [工具调用](scripts/api_example/test_toolcall.py)
+### 从魔搭社区下载
+如果您在 Hugging Face 模型和数据集的下载中遇到了问题，可以通过下述方法使用魔搭社区。
+```bash
+export USE_MODELSCOPE_HUB=1 # Windows 使用 `set USE_MODELSCOPE_HUB=1`
+```
+将 `model_name_or_path` 设置为模型 ID 来加载对应的模型。在[魔搭社区](https://modelscope.cn/models)查看所有可用的模型，例如 `LLM-Research/Meta-Llama-3-8B-Instruct`。
+### 从魔乐社区下载
+您也可以通过下述方法，使用魔乐社区下载数据集和模型。
+```bash
+export USE_OPENMIND_HUB=1 # Windows 使用 `set USE_OPENMIND_HUB=1`
+```
+将 `model_name_or_path` 设置为模型 ID 来加载对应的模型。在[魔乐社区](https://modelers.cn/models)查看所有可用的模型，例如 `TeleAI/TeleChat-7B-pt`。
+### 使用 W&B 面板
+若要使用 [Weights & Biases](https://wandb.ai) 记录实验数据，请在 yaml 文件中添加下面的参数。
+```yaml
+report_to: wandb
+run_name: test_run # 可选
+```
+在启动训练任务时，将 `WANDB_API_KEY` 设置为[密钥](https://wandb.ai/authorize)来登录 W&B 账户。
+### 使用 SwanLab 面板
+若要使用 [SwanLab](https://github.com/SwanHubX/SwanLab) 记录实验数据，请在 yaml 文件中添加下面的参数。
+```yaml
+use_swanlab: true
+swanlab_run_name: test_run # 可选
+```
+在启动训练任务时，登录SwanLab账户有以下三种方式：
+方式一：在 yaml 文件中添加 `swanlab_api_key=<your_api_key>` ，并设置为你的 [API 密钥](https://swanlab.cn/settings)。
+方式二：将环境变量 `SWANLAB_API_KEY` 设置为你的 [API 密钥](https://swanlab.cn/settings)。
+方式三：启动前使用 `swanlab login` 命令完成登录。
+## 使用了 LLaMA Factory 的项目
+如果您有项目希望添加至下述列表，请通过邮件联系或者创建一个 PR。
+<details><summary>点击显示</summary>
+1. Wang et al. ESRL: Efficient Sampling-based Reinforcement Learning for Sequence Generation. 2023. [[arxiv]](https://arxiv.org/abs/2308.02223)
+1. Yu et al. Open, Closed, or Small Language Models for Text Classification? 2023. [[arxiv]](https://arxiv.org/abs/2308.10092)
+1. Wang et al. UbiPhysio: Support Daily Functioning, Fitness, and Rehabilitation with Action Understanding and Feedback in Natural Language. 2023. [[arxiv]](https://arxiv.org/abs/2308.10526)
+1. Luceri et al. Leveraging Large Language Models to Detect Influence Campaigns in Social Media. 2023. [[arxiv]](https://arxiv.org/abs/2311.07816)
+1. Zhang et al. Alleviating Hallucinations of Large Language Models through Induced Hallucinations. 2023. [[arxiv]](https://arxiv.org/abs/2312.15710)
+1. Wang et al. Know Your Needs Better: Towards Structured Understanding of Marketer Demands with Analogical Reasoning Augmented LLMs. KDD 2024. [[arxiv]](https://arxiv.org/abs/2401.04319)
+1. Wang et al. CANDLE: Iterative Conceptualization and Instantiation Distillation from Large Language Models for Commonsense Reasoning. ACL 2024. [[arxiv]](https://arxiv.org/abs/2401.07286)
+1. Choi et al. FACT-GPT: Fact-Checking Augmentation via Claim Matching with LLMs. 2024. [[arxiv]](https://arxiv.org/abs/2402.05904)
+1. Zhang et al. AutoMathText: Autonomous Data Selection with Language Models for Mathematical Texts. 2024. [[arxiv]](https://arxiv.org/abs/2402.07625)
+1. Lyu et al. KnowTuning: Knowledge-aware Fine-tuning for Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2402.11176)
+1. Yang et al. LaCo: Large Language Model Pruning via Layer Collaps. 2024. [[arxiv]](https://arxiv.org/abs/2402.11187)
+1. Bhardwaj et al. Language Models are Homer Simpson! Safety Re-Alignment of Fine-tuned Language Models through Task Arithmetic. 2024. [[arxiv]](https://arxiv.org/abs/2402.11746)
+1. Yang et al. Enhancing Empathetic Response Generation by Augmenting LLMs with Small-scale Empathetic Models. 2024. [[arxiv]](https://arxiv.org/abs/2402.11801)
+1. Yi et al. Generation Meets Verification: Accelerating Large Language Model Inference with Smart Parallel Auto-Correct Decoding. ACL 2024 Findings. [[arxiv]](https://arxiv.org/abs/2402.11809)
+1. Cao et al. Head-wise Shareable Attention for Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2402.11819)
+1. Zhang et al. Enhancing Multilingual Capabilities of Large Language Models through Self-Distillation from Resource-Rich Languages. 2024. [[arxiv]](https://arxiv.org/abs/2402.12204)
+1. Kim et al. Efficient and Effective Vocabulary Expansion Towards Multilingual Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2402.14714)
+1. Yu et al. KIEval: A Knowledge-grounded Interactive Evaluation Framework for Large Language Models. ACL 2024. [[arxiv]](https://arxiv.org/abs/2402.15043)
+1. Huang et al. Key-Point-Driven Data Synthesis with its Enhancement on Mathematical Reasoning. 2024. [[arxiv]](https://arxiv.org/abs/2403.02333)
+1. Duan et al. Negating Negatives: Alignment without Human Positive Samples via Distributional Dispreference Optimization. 2024. [[arxiv]](https://arxiv.org/abs/2403.03419)
+1. Xie and Schwertfeger. Empowering Robotics with Large Language Models: osmAG Map Comprehension with LLMs. 2024. [[arxiv]](https://arxiv.org/abs/2403.08228)
+1. Wu et al. Large Language Models are Parallel Multilingual Learners. 2024. [[arxiv]](https://arxiv.org/abs/2403.09073)
+1. Zhang et al. EDT: Improving Large Language Models' Generation by Entropy-based Dynamic Temperature Sampling. 2024. [[arxiv]](https://arxiv.org/abs/2403.14541)
+1. Weller et al. FollowIR: Evaluating and Teaching Information Retrieval Models to Follow Instructions. 2024. [[arxiv]](https://arxiv.org/abs/2403.15246)
+1. Hongbin Na. CBT-LLM: A Chinese Large Language Model for Cognitive Behavioral Therapy-based Mental Health Question Answering. COLING 2024. [[arxiv]](https://arxiv.org/abs/2403.16008)
+1. Zan et al. CodeS: Natural Language to Code Repository via Multi-Layer Sketch. 2024. [[arxiv]](https://arxiv.org/abs/2403.16443)
+1. Liu et al. Extensive Self-Contrast Enables Feedback-Free Language Model Alignment. 2024. [[arxiv]](https://arxiv.org/abs/2404.00604)
+1. Luo et al. BAdam: A Memory Efficient Full Parameter Training Method for Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.02827)
+1. Du et al. Chinese Tiny LLM: Pretraining a Chinese-Centric Large Language Model. 2024. [[arxiv]](https://arxiv.org/abs/2404.04167)
+1. Ma et al. Parameter Efficient Quasi-Orthogonal Fine-Tuning via Givens Rotation. ICML 2024. [[arxiv]](https://arxiv.org/abs/2404.04316)
+1. Liu et al. Dynamic Generation of Personalities with Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.07084)
+1. Shang et al. How Far Have We Gone in Stripped Binary Code Understanding Using Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.09836)
+1. Huang et al. LLMTune: Accelerate Database Knob Tuning with Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.11581)
+1. Deng et al. Text-Tuple-Table: Towards Information Integration in Text-to-Table Generation via Global Tuple Extraction. 2024. [[arxiv]](https://arxiv.org/abs/2404.14215)
+1. Acikgoz et al. Hippocrates: An Open-Source Framework for Advancing Large Language Models in Healthcare. 2024. [[arxiv]](https://arxiv.org/abs/2404.16621)
+1. Zhang et al. Small Language Models Need Strong Verifiers to Self-Correct Reasoning. ACL 2024 Findings. [[arxiv]](https://arxiv.org/abs/2404.17140)
+1. Zhou et al. FREB-TQA: A Fine-Grained Robustness Evaluation Benchmark for Table Question Answering. NAACL 2024. [[arxiv]](https://arxiv.org/abs/2404.18585)
+1. Xu et al. Large Language Models for Cyber Security: A Systematic Literature Review. 2024. [[arxiv]](https://arxiv.org/abs/2405.04760)
+1. Dammu et al. "They are uncultured": Unveiling Covert Harms and Social Threats in LLM Generated Conversations. 2024. [[arxiv]](https://arxiv.org/abs/2405.05378)
+1. Yi et al. A safety realignment framework via subspace-oriented model fusion for large language models. 2024. [[arxiv]](https://arxiv.org/abs/2405.09055)
+1. Lou et al. SPO: Multi-Dimensional Preference Sequential Alignment With Implicit Reward Modeling. 2024. [[arxiv]](https://arxiv.org/abs/2405.12739)
+1. Zhang et al. Getting More from Less: Large Language Models are Good Spontaneous Multilingual Learners. 2024. [[arxiv]](https://arxiv.org/abs/2405.13816)
+1. Zhang et al. TS-Align: A Teacher-Student Collaborative Framework for Scalable Iterative Finetuning of Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2405.20215)
+1. Zihong Chen. Sentence Segmentation and Sentence Punctuation Based on XunziALLM. 2024. [[paper]](https://aclanthology.org/2024.lt4hala-1.30)
+1. Gao et al. The Best of Both Worlds: Toward an Honest and Helpful Large Language Model. 2024. [[arxiv]](https://arxiv.org/abs/2406.00380)
+1. Wang and Song. MARS: Benchmarking the Metaphysical Reasoning Abilities of Language Models with a Multi-task Evaluation Dataset. 2024. [[arxiv]](https://arxiv.org/abs/2406.02106)
+1. Hu et al. Computational Limits of Low-Rank Adaptation (LoRA) for Transformer-Based Models. 2024. [[arxiv]](https://arxiv.org/abs/2406.03136)
+1. Ge et al. Time Sensitive Knowledge Editing through Efficient Finetuning. ACL 2024. [[arxiv]](https://arxiv.org/abs/2406.04496)
+1. Tan et al. Peer Review as A Multi-Turn and Long-Context Dialogue with Role-Based Interactions. 2024. [[arxiv]](https://arxiv.org/abs/2406.05688)
+1. Song et al. Turbo Sparse: Achieving LLM SOTA Performance with Minimal Activated Parameters. 2024. [[arxiv]](https://arxiv.org/abs/2406.05955)
+1. Gu et al. RWKV-CLIP: A Robust Vision-Language Representation Learner. 2024. [[arxiv]](https://arxiv.org/abs/2406.06973)
+1. Chen et al. Advancing Tool-Augmented Large Language Models: Integrating Insights from Errors in Inference Trees. 2024. [[arxiv]](https://arxiv.org/abs/2406.07115)
+1. Zhu et al. Are Large Language Models Good Statisticians?. 2024. [[arxiv]](https://arxiv.org/abs/2406.07815)
+1. Li et al. Know the Unknown: An Uncertainty-Sensitive Method for LLM Instruction Tuning. 2024. [[arxiv]](https://arxiv.org/abs/2406.10099)
+1. Ding et al. IntentionQA: A Benchmark for Evaluating Purchase Intention Comprehension Abilities of Language Models in E-commerce. 2024. [[arxiv]](https://arxiv.org/abs/2406.10173)
+1. He et al. COMMUNITY-CROSS-INSTRUCT: Unsupervised Instruction Generation for Aligning Large Language Models to Online Communities. 2024. [[arxiv]](https://arxiv.org/abs/2406.12074)
+1. Lin et al. FVEL: Interactive Formal Verification Environment with Large Language Models via Theorem Proving. 2024. [[arxiv]](https://arxiv.org/abs/2406.14408)
+1. Treutlein et al. Connecting the Dots: LLMs can Infer and Verbalize Latent Structure from Disparate Training Data. 2024. [[arxiv]](https://arxiv.org/abs/2406.14546)
+1. Feng et al. SS-Bench: A Benchmark for Social Story Generation and Evaluation. 2024. [[arxiv]](https://arxiv.org/abs/2406.15695)
+1. Feng et al. Self-Constructed Context Decompilation with Fined-grained Alignment Enhancement. 2024. [[arxiv]](https://arxiv.org/abs/2406.17233)
+1. Liu et al. Large Language Models for Cuffless Blood Pressure Measurement From Wearable Biosignals. 2024. [[arxiv]](https://arxiv.org/abs/2406.18069)
+1. Iyer et al. Exploring Very Low-Resource Translation with LLMs: The University of Edinburgh's Submission to AmericasNLP 2024 Translation Task. AmericasNLP 2024. [[paper]](https://aclanthology.org/2024.americasnlp-1.25)
+1. Li et al. Calibrating LLMs with Preference Optimization on Thought Trees for Generating Rationale in Science Question Scoring. 2024. [[arxiv]](https://arxiv.org/abs/2406.19949)
+1. Yang et al. Financial Knowledge Large Language Model. 2024. [[arxiv]](https://arxiv.org/abs/2407.00365)
+1. Lin et al. DogeRM: Equipping Reward Models with Domain Knowledge through Model Merging. 2024. [[arxiv]](https://arxiv.org/abs/2407.01470)
+1. Bako et al. Evaluating the Semantic Profiling Abilities of LLMs for Natural Language Utterances in Data Visualization. 2024. [[arxiv]](https://arxiv.org/abs/2407.06129)
+1. Huang et al. RoLoRA: Fine-tuning Rotated Outlier-free LLMs for Effective Weight-Activation Quantization. 2024. [[arxiv]](https://arxiv.org/abs/2407.08044)
+1. Jiang et al. LLM-Collaboration on Automatic Science Journalism for the General Audience. 2024. [[arxiv]](https://arxiv.org/abs/2407.09756)
+1. Inouye et al. Applied Auto-tuning on LoRA Hyperparameters. 2024. [[paper]](https://scholarcommons.scu.edu/cseng_senior/272/)
+1. Qi et al. Research on Tibetan Tourism Viewpoints information generation system based on LLM. 2024. [[arxiv]](https://arxiv.org/abs/2407.13561)
+1. Xu et al. Course-Correction: Safety Alignment Using Synthetic Preferences. 2024. [[arxiv]](https://arxiv.org/abs/2407.16637)
+1. Sun et al. LAMBDA: A Large Model Based Data Agent. 2024. [[arxiv]](https://arxiv.org/abs/2407.17535)
+1. Zhu et al. CollectiveSFT: Scaling Large Language Models for Chinese Medical Benchmark with Collective Instructions in Healthcare. 2024. [[arxiv]](https://arxiv.org/abs/2407.19705)
+1. Yu et al. Correcting Negative Bias in Large Language Models through Negative Attention Score Alignment. 2024. [[arxiv]](https://arxiv.org/abs/2408.00137)
+1. Xie et al. The Power of Personalized Datasets: Advancing Chinese Composition Writing for Elementary School through Targeted Model Fine-Tuning. IALP 2024. [[paper]](https://www.asianlp.sg/conferences/ialp2024/proceedings/papers/IALP2024_P055.pdf)
+1. Liu et al. Instruct-Code-Llama: Improving Capabilities of Language Model in Competition Level Code Generation by Online Judge Feedback. ICIC 2024. [[paper]](https://link.springer.com/chapter/10.1007/978-981-97-5669-8_11)
+1. Wang et al. Cybernetic Sentinels: Unveiling the Impact of Safety Data Selection on Model Security in Supervised Fine-Tuning. ICIC 2024. [[paper]](https://link.springer.com/chapter/10.1007/978-981-97-5669-8_23)
+1. Xia et al. Understanding the Performance and Estimating the Cost of LLM Fine-Tuning. 2024. [[arxiv]](https://arxiv.org/abs/2408.04693)
+1. Zeng et al. Perceive, Reflect, and Plan: Designing LLM Agent for Goal-Directed City Navigation without Instructions. 2024. [[arxiv]](https://arxiv.org/abs/2408.04168)
+1. Xia et al. Using Pre-trained Language Model for Accurate ESG Prediction. FinNLP 2024. [[paper]](https://aclanthology.org/2024.finnlp-2.1/)
+1. Liang et al. I-SHEEP: Self-Alignment of LLM from Scratch through an Iterative Self-Enhancement Paradigm. 2024. [[arxiv]](https://arxiv.org/abs/2408.08072)
+1. Bai et al. Aligning Large Language Model with Direct Multi-Preference Optimization for Recommendation. CIKM 2024. [[paper]](https://dl.acm.org/doi/10.1145/3627673.3679611)
+1. **[StarWhisper](https://github.com/Yu-Yang-Li/StarWhisper)**: 天文大模型 StarWhisper，基于 ChatGLM2-6B 和 Qwen-14B 在天文数据上微调而得。
+1. **[DISC-LawLLM](https://github.com/FudanDISC/DISC-LawLLM)**: 中文法律领域大模型 DISC-LawLLM，基于 Baichuan-13B 微调而得，具有法律推理和知识检索能力。
+1. **[Sunsimiao](https://github.com/X-D-Lab/Sunsimiao)**: 孙思邈中文医疗大模型 Sumsimiao，基于 Baichuan-7B 和 ChatGLM-6B 在中文医疗数据上微调而得。
+1. **[CareGPT](https://github.com/WangRongsheng/CareGPT)**: 医疗大模型项目 CareGPT，基于 LLaMA2-7B 和 Baichuan-13B 在中文医疗数据上微调而得。
+1. **[MachineMindset](https://github.com/PKU-YuanGroup/Machine-Mindset/)**：MBTI性格大模型项目，根据数据集与训练方式让任意 LLM 拥有 16 个不同的性格类型。
+1. **[Luminia-13B-v3](https://huggingface.co/Nekochu/Luminia-13B-v3)**：一个用于生成 Stable Diffusion 提示词的大型语言模型。[[demo]](https://huggingface.co/spaces/Nekochu/Luminia-13B_SD_Prompt)
+1. **[Chinese-LLaVA-Med](https://github.com/BUAADreamer/Chinese-LLaVA-Med)**：中文多模态医学大模型，基于 LLaVA-1.5-7B 在中文多模态医疗数据上微调而得。
+1. **[AutoRE](https://github.com/THUDM/AutoRE)**：基于大语言模型的文档级关系抽取系统。
+1. **[NVIDIA RTX AI Toolkit](https://github.com/NVIDIA/RTX-AI-Toolkit)**：在 Windows 主机上利用英伟达 RTX 设备进行大型语言模型微调的开发包。
+1. **[LazyLLM](https://github.com/LazyAGI/LazyLLM)**：一个低代码构建多 Agent 大模型应用的开发工具，支持基于 LLaMA Factory 的模型微调.
+1. **[RAG-Retrieval](https://github.com/NLPJCL/RAG-Retrieval)**：一个全链路 RAG 检索模型微调、推理和蒸馏代码库。[[blog]](https://zhuanlan.zhihu.com/p/987727357)
+1. **[360-LLaMA-Factory](https://github.com/Qihoo360/360-LLaMA-Factory)**：一个魔改后的代码库，通过 Ring Attention 支持长序列的 SFT 和 DPO 训练。
+1. **[Sky-T1](https://novasky-ai.github.io/posts/sky-t1/)**：由 NovaSky AI 微调的低成本类 o1 长推理模型。
+1. **[WeClone](https://github.com/xming521/WeClone)**：从聊天记录创造数字分身的一站式解决方案。
+</details>
+## 协议
+本仓库的代码依照 [Apache-2.0](LICENSE) 协议开源。
+使用模型权重时，请遵循对应的模型协议：[Baichuan 2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [GLM-4](https://huggingface.co/THUDM/glm-4-9b/blob/main/LICENSE) / [GPT-2](https://github.com/openai/gpt-2/blob/master/LICENSE) / [Granite](LICENSE) / [Index](https://huggingface.co/IndexTeam/Index-1.9B/blob/main/LICENSE) / [InternLM](https://github.com/InternLM/InternLM#license) / [Llama](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [Llama 2](https://ai.meta.com/llama/license/) / [Llama 3](https://llama.meta.com/llama3/license/) / [Llama 4](https://github.com/meta-llama/llama-models/blob/main/models/llama4/LICENSE) / [MiniCPM](https://github.com/OpenBMB/MiniCPM/blob/main/MiniCPM%20Model%20License.md) / [Mistral/Mixtral/Pixtral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/Phi-2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3/Phi-4](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [Skywork](https://huggingface.co/Skywork/Skywork-13B-base/blob/main/Skywork%20Community%20License.pdf) / [StarCoder 2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [TeleChat2](https://huggingface.co/Tele-AI/telechat-7B/blob/main/TeleChat%E6%A8%A1%E5%9E%8B%E7%A4%BE%E5%8C%BA%E8%AE%B8%E5%8F%AF%E5%8D%8F%E8%AE%AE.pdf) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yi-1.5](LICENSE) / [Yuan 2](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan)
+## 引用
+如果您觉得此项目有帮助，请考虑以下列格式引用
+```bibtex
+@inproceedings{zheng2024llamafactory,
+  title={LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models},
+  author={Yaowei Zheng and Richong Zhang and Junhao Zhang and Yanhan Ye and Zheyan Luo and Zhangchi Feng and Yongqiang Ma},
+  booktitle={Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)},
+  address={Bangkok, Thailand},
+  publisher={Association for Computational Linguistics},
+  year={2024},
+  url={http://arxiv.org/abs/2403.13372}
+}
+```
+## 致谢
+本项目受益于 [PEFT](https://github.com/huggingface/peft)、[TRL](https://github.com/huggingface/trl)、[QLoRA](https://github.com/artidoro/qlora) 和 [FastChat](https://github.com/lm-sys/FastChat)，感谢以上诸位作者的付出。
+## Star History
+![Star History Chart](https://api.star-history.com/svg?repos=hiyouga/LLaMA-Factory&type=Date)

SUMMARY_完成情况总结.md ADDED Viewed

	@@ -0,0 +1,164 @@

+# 任务完成情况总结
+## 需求1：ShareGPT训练的具体pair分割情况分析 ✅
+### 完成内容
+1. **详细调试分析**：创建了`debug_pair_splitting.py`脚本，详细分析了ShareGPT格式的对话在训练过程中的具体分割情况
+2. **核心发现**：
+   - **原始对话结构**：6条消息（human → function_call → observation → function_call → observation → gpt）
+   - **编码后消息段**：6个独立的token段，每个对应一条消息
+   - **最终训练pairs**：3个训练对，按照(prompt, response)方式分割
+3. **关键理解**：
+   ```python
+   # 实际的pairs结构：
+   pairs = [
+     # Pair 1: 系统提示+用户问题 → 第一次工具调用
+     ([<system_tokens> + <user_tokens>], [<function_call_1_tokens>]),
+     # Pair 2: 第一次工具结果 → 第二次工具调用
+     ([<observation_1_tokens>], [<function_call_2_tokens>]),
+     # Pair 3: 第二次工具结果 → 助手回复
+     ([<observation_2_tokens>], [<assistant_tokens>])
+   ]
+   ```
+4. **重要发现**：
+   - ✅ **只有第一个pair包含system tokens**，后续pairs不包含
+   - ✅ **每个target自动添加思维链标记**（`<think>\n\n</think>`）
+   - ✅ **多轮function call被分成多个独立训练对**
+   - ✅ **按照cutoff_len进行智能截断**
+### 生成文件
+- `debug_pair_splitting.py` - 主要调试脚本
+- `pair_debug.log` - 详细调试日志
+- 在`supervised.py`和`template.py`中添加了调试代码
+## 需求2：基于真实训练流程的测试集评测脚本 ✅
+### 完成内容
+1. **核心评估器**：`eval_by_training_flow.py`
+   - 完全按照训练时的pair分割方式进行评估
+   - 支持function call准确性评估
+   - 支持LLM judge多维度评估
+2. **Function Call评估**：
+   - ✅ **工具名称匹配检测**
+   - ✅ **参数完整性和准确性检测**
+   - ✅ **详细错误分析**（缺失参数、错误参数）
+   - ✅ **自动处理转义字符和JSON格式**
+3. **Assistant Response评估**：
+   - ✅ **LLM judge多维度评分**（准确性、完整性、相关性、清晰度）
+   - ✅ **自动化评分解析**
+   - ✅ **上下文感知评估**
+4. **完整的评估流程**：
+   - ✅ **数据加载和格式转换**
+   - ✅ **按训练方式分割pairs**
+   - ✅ **逐pair评估和统计**
+   - ✅ **详细结果保存和分析**
+### 评估指标
+```python
+{
+  "function_call": {
+    "name_accuracy": 0.90,      # 工具名称准确率
+    "args_accuracy": 0.75,      # 参数准确率
+    "overall_accuracy": 0.60    # 整体准确率
+  },
+  "assistant_response": {
+    "average_score": 4.2,       # 平均分数（1-5分）
+    "score_distribution": {...} # 分数分布
+  }
+}
+```
+### 生成文件
+- `eval_by_training_flow.py` - 主要评估器
+- `test_evaluator.py` - 功能测试脚本
+- `run_evaluation_example.py` - 完整使用示例
+- `sample_test_data.json` - 示例测试数据
+- `mock_eval_results.json` - 示例评估结果
+- `README_evaluation.md` - 详细使用文档
+## 技术亮点
+### 1. 完全基于真实训练流程
+- 使用相同的tokenizer和template
+- 按照实际的pair分割逻辑
+- 考虑思维链和特殊token处理
+### 2. 智能Function Call解析
+- 支持多种格式的function call
+- 自动处理转义字符
+- 详细的错误诊断
+### 3. 多维度评估体系
+- Function call的结构化评估
+- LLM judge的主观评估
+- 统计分析和可视化
+### 4. 完善的工具链
+- 调试工具：理解训练过程
+- 测试工具：验证功能正确性
+- 评估工具：实际评测效果
+- 文档工具：使用说明和示例
+## 使用方式
+### 1. 理解训练过程
+```bash
+python debug_pair_splitting.py
+```
+### 2. 测试评估功能
+```bash
+python test_evaluator.py
+```
+### 3. 运行完整评估
+```bash
+python run_evaluation_example.py  # 模拟API
+python eval_by_training_flow.py   # 真实API
+```
+## 核心价值
+1. **训练-评估一致性**：确保评估方式与训练方式完全一致
+2. **全面的评估体系**：覆盖function call和response质量的多个维度
+3. **实用的工具链**：从调试到评估的完整解决方案
+4. **可扩展的架构**：易于添加新的评估指标和功能
+## 项目文件总览
+```
+/home/ziqiang/LLaMA-Factory/
+├── debug_pair_splitting.py           # pair分割调试脚本
+├── eval_by_training_flow.py         # 主评估器
+├── test_evaluator.py                # 功能测试
+├── run_evaluation_example.py        # 使用示例
+├── README_evaluation.md             # 详细文档
+├── SUMMARY_完成情况总结.md           # 本文档
+├── pair_debug.log                   # 调试日志
+├── eval_results.log                 # 评估日志
+├── sample_test_data.json            # 示例数据
+└── mock_eval_results.json           # 示例结果
+```
+## 总结
+我们成功完成了两个主要需求：
+1. ✅ **深入理解了ShareGPT训练的pair分割机制**，并提供了详细的调试工具
+2. ✅ **构建了完整的基于训练流程的评测体系**，包括function call和LLM judge评估
+这套工具可以帮助你：
+- 准确理解训练过程中的数据处理方式
+- 按照真实训练流程评估模型效果
+- 获得详细的评估指标和错误分析
+- 持续优化模型的function calling能力
+所有代码都经过测试验证，可以直接使用。

analyze_token_logs.py ADDED Viewed

	@@ -0,0 +1,165 @@

+#!/usr/bin/env python3
+"""
+分析Token调试日志的脚本
+用于分析训练日志中的token处理情况
+"""
+import re
+import sys
+from collections import defaultdict
+def analyze_token_logs(log_file):
+    """分析token调试日志"""
+    print(f"=== 分析日志文件: {log_file} ===\n")
+    # 统计信息
+    stats = {
+        'total_samples': 0,
+        'truncated_pairs': 0,
+        'dropped_pairs': 0,
+        'cutoff_usage': [],
+        'truncation_events': []
+    }
+    current_sample = None
+    with open(log_file, 'r', encoding='utf-8') as f:
+        for line in f:
+            line = line.strip()
+            # 检测新样本开始
+            if '[TOKEN_DEBUG] 开始处理数据样本' in line:
+                stats['total_samples'] += 1
+                current_sample = {
+                    'sample_id': stats['total_samples'],
+                    'pairs': [],
+                    'final_length': 0,
+                    'cutoff_len': 0
+                }
+            # 记录cutoff_len
+            elif '[TOKEN_DEBUG] cutoff_len:' in line:
+                cutoff_match = re.search(r'cutoff_len: (\d+)', line)
+                if cutoff_match:
+                    current_sample['cutoff_len'] = int(cutoff_match.group(1))
+            # 记录pair信息
+            elif '[TOKEN_DEBUG] === Pair' in line:
+                pair_match = re.search(r'Pair (\d+)', line)
+                if pair_match:
+                    pair_id = int(pair_match.group(1))
+                    current_sample['pairs'].append({
+                        'id': pair_id,
+                        'original_source': 0,
+                        'original_target': 0,
+                        'truncated_source': 0,
+                        'truncated_target': 0,
+                        'truncated': False
+                    })
+            # 记录原始长度
+            elif '[TOKEN_DEBUG] 原始长度:' in line:
+                length_match = re.search(r'source=(\d+), target=(\d+)', line)
+                if length_match and current_sample and current_sample['pairs']:
+                    source_len = int(length_match.group(1))
+                    target_len = int(length_match.group(2))
+                    current_sample['pairs'][-1]['original_source'] = source_len
+                    current_sample['pairs'][-1]['original_target'] = target_len
+            # 记录截断后长度
+            elif '[TOKEN_DEBUG] 截断后长度:' in line:
+                truncate_match = re.search(r'source=(\d+)->(\d+), target=(\d+)->(\d+)', line)
+                if truncate_match and current_sample and current_sample['pairs']:
+                    orig_source = int(truncate_match.group(1))
+                    new_source = int(truncate_match.group(2))
+                    orig_target = int(truncate_match.group(3))
+                    new_target = int(truncate_match.group(4))
+                    current_sample['pairs'][-1]['truncated_source'] = new_source
+                    current_sample['pairs'][-1]['truncated_target'] = new_target
+                    if new_source < orig_source or new_target < orig_target:
+                        current_sample['pairs'][-1]['truncated'] = True
+                        stats['truncated_pairs'] += 1
+                        stats['truncation_events'].append({
+                            'sample_id': current_sample['sample_id'],
+                            'pair_id': current_sample['pairs'][-1]['id'],
+                            'source_truncated': new_source < orig_source,
+                            'target_truncated': new_target < orig_target
+                        })
+            # 记录预算耗尽
+            elif '[TOKEN_DEBUG] 预算耗尽，丢弃剩余pairs' in line:
+                stats['dropped_pairs'] += 1
+            # 记录最终结果
+            elif '[TOKEN_DEBUG] 最终total_length:' in line:
+                final_match = re.search(r'最终total_length: (\d+)', line)
+                if final_match and current_sample:
+                    current_sample['final_length'] = int(final_match.group(1))
+                    stats['cutoff_usage'].append({
+                        'sample_id': current_sample['sample_id'],
+                        'used': current_sample['final_length'],
+                        'cutoff': current_sample['cutoff_len'],
+                        'usage_rate': current_sample['final_length'] / current_sample['cutoff_len'] if current_sample['cutoff_len'] > 0 else 0
+                    })
+    # 输出分析结果
+    print(f"总样本数: {stats['total_samples']}")
+    print(f"发生截断的pairs: {stats['truncated_pairs']}")
+    print(f"预算耗尽的样本: {stats['dropped_pairs']}")
+    print()
+    if stats['cutoff_usage']:
+        usage_rates = [item['usage_rate'] for item in stats['cutoff_usage']]
+        avg_usage = sum(usage_rates) / len(usage_rates)
+        max_usage = max(usage_rates)
+        min_usage = min(usage_rates)
+        print(f"Token使用率统计:")
+        print(f"  平均使用率: {avg_usage:.1%}")
+        print(f"  最大使用率: {max_usage:.1%}")
+        print(f"  最小使用率: {min_usage:.1%}")
+        print()
+    # 截断事件分析
+    if stats['truncation_events']:
+        print("截断事件分析:")
+        source_truncated = sum(1 for event in stats['truncation_events'] if event['source_truncated'])
+        target_truncated = sum(1 for event in stats['truncation_events'] if event['target_truncated'])
+        print(f"  source被截断: {source_truncated} 次")
+        print(f"  target被截断: {target_truncated} 次")
+        print()
+        # 显示前几个截断事件
+        print("前5个截断事件:")
+        for i, event in enumerate(stats['truncation_events'][:5]):
+            print(f"  样本{event['sample_id']} Pair{event['pair_id']}: "
+                  f"source截断={event['source_truncated']}, target截断={event['target_truncated']}")
+    # 使用率分布
+    if stats['cutoff_usage']:
+        print("\n使用率分布:")
+        ranges = [(0.0, 0.5), (0.5, 0.7), (0.7, 0.9), (0.9, 1.0), (1.0, 1.0)]
+        for start, end in ranges:
+            count = sum(1 for item in stats['cutoff_usage']
+                       if start <= item['usage_rate'] < end or (end == 1.0 and item['usage_rate'] == 1.0))
+            print(f"  {start:.0%}-{end:.0%}: {count} 个样本")
+def main():
+    if len(sys.argv) != 2:
+        print("使用方法: python analyze_token_logs.py <log_file>")
+        print("示例: python analyze_token_logs.py debug_train.log")
+        sys.exit(1)
+    log_file = sys.argv[1]
+    try:
+        analyze_token_logs(log_file)
+    except FileNotFoundError:
+        print(f"错误: 找不到日志文件 {log_file}")
+    except Exception as e:
+        print(f"错误: {e}")
+if __name__ == "__main__":
+    main()

assets/alaya_new.svg ADDED Viewed

assets/logo.png ADDED Viewed

assets/wechat.jpg ADDED Viewed

Git LFS Details

SHA256: a6b317145a71c11f710a4762bc89d02abfedcd890da55218c62fe62c23a44f3b
Pointer size: 131 Bytes
Size of remote file: 169 kB

assets/wechat_alaya.png ADDED Viewed

Git LFS Details

SHA256: 83fecf09460bb1ed5f1ff88a8aee03ab4c7093303de062ead1659e1522917592
Pointer size: 131 Bytes
Size of remote file: 214 kB

assets/wechat_npu.jpg ADDED Viewed

Git LFS Details

SHA256: a2919b7f7a61086aaa6a401b01f779d5b06671d71aa341072d029dfc6e85180c
Pointer size: 131 Bytes
Size of remote file: 175 kB

configs/ds_zero3.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+    "bf16": { "enabled": true },
+    "zero_optimization": {
+      "stage": 3,
+      "overlap_comm": false,
+      "contiguous_gradients": true,
+      /* 下面三项是按 40GB 显存收紧的桶大小与持久化阈值 */
+      "reduce_bucket_size": 200000000,
+      "stage3_prefetch_bucket_size": 50000000,
+      "stage3_param_persistence_threshold": 1000000,
+      "stage3_gather_16bit_weights_on_model_save": true
+    },
+    "aio": {
+      "block_size": 1048576,
+      "queue_depth": 8,
+      "single_submit": false,
+      "overlap_events": true,
+      "thread_count": 1
+    },
+    /* 交由命令行的 per_device_train_batch_size / gradient_accumulation_steps 控制 */
+    "train_micro_batch_size_per_gpu": "auto",
+    "gradient_accumulation_steps": "auto",
+    "zero_force_ds_cpu_optimizer": false,
+    "zero_allow_untested_optimizer": true,
+    "wall_clock_breakdown": false
+  }

data/belle_multiturn/belle_multiturn.py ADDED Viewed

	@@ -0,0 +1,82 @@

+# Copyright 2025 the LlamaFactory team.
+# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+import os
+import datasets
+_HF_ENDPOINT = os.getenv("HF_ENDPOINT", "https://huggingface.co")
+_DESCRIPTION = "BELLE multiturn chat dataset."
+_CITATION = """\
+@article{belle2023exploring,
+  title={Exploring the Impact of Instruction Data Scaling on Large Language Models},
+  author={Yunjie Ji, Yong Deng, Yan Gong, Yiping Peng, Qiang Niu, Lei Zhang, Baochang Ma, Xiangang Li},
+  journal={arXiv preprint arXiv:2303.14742},
+  year={2023}
+}
+"""
+_HOMEPAGE = f"{_HF_ENDPOINT}/datasets/BelleGroup/multiturn_chat_0.8M"
+_LICENSE = "gpl-3.0"
+_URL = f"{_HF_ENDPOINT}/datasets/BelleGroup/multiturn_chat_0.8M/resolve/main/multiturn_chat_0.8M.json"
+class BelleMultiturn(datasets.GeneratorBasedBuilder):
+    VERSION = datasets.Version("0.0.0")
+    def _info(self):
+        features = datasets.Features(
+            {"conversations": [{"from": datasets.Value("string"), "value": datasets.Value("string")}]}
+        )
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION, features=features, homepage=_HOMEPAGE, license=_LICENSE, citation=_CITATION
+        )
+    def _split_generators(self, dl_manager: datasets.DownloadManager):
+        file_path = dl_manager.download(_URL)
+        return [datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepath": file_path})]
+    def _generate_examples(self, filepath: str):
+        with open(filepath, encoding="utf-8") as f:
+            for key, row in enumerate(f):
+                data = json.loads(row)
+                conversations = []
+                prompt = data["instruction"].strip()
+                response = data["output"].strip()
+                assist_idx = prompt.rfind("Assistant:")
+                human_idx = prompt.rfind("Human:")
+                query = prompt[human_idx + 6 : assist_idx].strip()
+                prompt = prompt[:human_idx].strip()
+                conversations.insert(0, {"from": "gpt", "value": response})
+                conversations.insert(0, {"from": "human", "value": query})
+                while prompt.rfind("Assistant:") != -1:
+                    assist_idx = prompt.rfind("Assistant:")
+                    human_idx = prompt.rfind("Human:")
+                    if human_idx != -1:
+                        old_query = prompt[human_idx + 6 : assist_idx].strip()
+                        old_resp = prompt[assist_idx + 10 :].strip()
+                        conversations.insert(0, {"from": "gpt", "value": old_resp})
+                        conversations.insert(0, {"from": "human", "value": old_query})
+                    else:
+                        break
+                    prompt = prompt[:human_idx].strip()
+                yield key, {"conversations": conversations}

data/dataset/01_26/CUDA内存不足解决方案.md ADDED Viewed

	@@ -0,0 +1,146 @@

+# CUDA 内存不足错误解决方案
+## 错误现象
+```
+torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 4.64 GiB.
+GPU 0 has a total capacity of 39.49 GiB of which 1.86 GiB is free.
+Including non-PyTorch memory, this process has 26.21 GiB memory in use.
+Process 881161 has 11.40 GiB memory in use.
+```
+## 错误原因分析
+1. **序列长度过长**：`cutoff_len 8192` 需要大量显存
+2. **多进程占用显存**：另一个进程（881161）占用了 11.40 GiB
+3. **DeepSpeed ZeRO-2 配置不够激进**：虽然启用了优化器 offload，但参数仍在 GPU 上
+4. **PyTorch 内存碎片**：24.70 GiB 已分配但可能碎片化
+## 解决方案
+### 方案 1：启用 ZeRO-3 + 参数 Offload（最推荐）
+使用 ZeRO-3 并将参数也 offload 到 CPU，可以大幅减少 GPU 显存占用。
+**修改 DeepSpeed 配置**：使用 `zero3.json` 或创建更激进的配置
+**训练命令**：
+```bash
+CUDA_VISIBLE_DEVICES=0,1,2,5 llamafactory-cli train \
+  --stage sft \
+  --template qwen \
+  --do_train True \
+  --flash_attn auto \
+  --model_name_or_path /data/models/Qwen3-8B \
+  --preprocessing_num_workers 8 \
+  --finetuning_type lora \
+  --dataset_dir data \
+  --dataset 01_26_toollist1 \
+  --num_train_epochs 1 \
+  --warmup_ratio 0.03 \
+  --max_samples 200000 \
+  --cutoff_len 8192 \
+  --learning_rate 1e-5 \
+  --per_device_train_batch_size 1 \
+  --gradient_accumulation_steps 2 \
+  --lr_scheduler_type cosine \
+  --max_grad_norm 1.0 \
+  --logging_steps 5 \
+  --save_steps 200 \
+  --warmup_steps 0 \
+  --ddp_timeout 180000000 \
+  --packing False \
+  --enable_thinking False \
+  --output_dir /home/yijuan_liang/LLaMA-Factory/saves/Qwen3-8B/lora/train_$(date +%Y-%m-%d-%H-%M) \
+  --bf16 True \
+  --plot_loss True \
+  --trust_remote_code True \
+  --deepspeed data/deepspeed/zero3_offload.json \
+  --include_num_input_tokens_seen True \
+  --optim adamw_torch \
+  --adam_beta1 0.9 --adam_beta2 0.999 --adam_epsilon 1e-8 \
+  --lora_rank 8 \
+  --lora_alpha 16 \
+  --lora_dropout 0 \
+  --lora_target all
+```
+### 方案 2：增强 ZeRO-2 配置（参数 Offload）
+在现有 ZeRO-2 基础上添加参数 offload。
+**修改 DeepSpeed 配置**：创建 `zero2_offload_param.json`
+**训练命令**：使用新的 DeepSpeed 配置文件
+### 方案 3：减少序列长度（快速解决）
+将 `cutoff_len` 从 8192 降到 6144 或更小：
+```bash
+--cutoff_len 6144
+```
+### 方案 4：清理其他 GPU 进程
+在训练前检查并清理占用 GPU 的进程：
+```bash
+# 查看 GPU 使用情况
+nvidia-smi
+# 如果进程 881161 不需要，可以终止它
+kill -9 881161
+# 或者使用 fuser 查找占用 GPU 的进程
+fuser -v /dev/nvidia*
+```
+### 方案 5：减少批次大小并增加梯度累积
+```bash
+--per_device_train_batch_size 1 \
+--gradient_accumulation_steps 4
+```
+### 方案 6：启用 PyTorch 内存优化
+在训练命令前添加环境变量：
+```bash
+PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True CUDA_VISIBLE_DEVICES=0,1,2,5 llamafactory-cli train ...
+```
+这可以减少内存碎片化问题。
+### 方案 7：使用梯度检查点（Gradient Checkpointing）
+如果 LLaMA-Factory 支持，可以启用梯度检查点来以计算时间换取显存：
+```bash
+--gradient_checkpointing True
+```
+## 推荐的组合方案
+**最佳实践组合**：
+1. 使用 ZeRO-3 + 参数和优化器 offload
+2. 启用 PyTorch 内存优化
+3. 清理不必要的 GPU 进程
+4. 如果仍然不足，适当降低 `cutoff_len`
+## 内存使用估算
+- **Qwen3-8B 模型**：约 16 GiB（FP16/BF16）
+- **LoRA 参数**：约 100-200 MiB（rank=8）
+- **激活值（cutoff_len=8192）**：约 8-12 GiB per GPU
+- **优化器状态（ZeRO-2）**：约 4-6 GiB per GPU（已 offload 到 CPU）
+- **梯度**：约 2-4 GiB per GPU
+**总计（ZeRO-2）**：约 26-34 GiB per GPU
+**总计（ZeRO-3 + offload）**：约 10-15 GiB per GPU
+## 已创建的配置文件
+- `zero3_offload.json`：ZeRO-3 + 参数和优化器 offload 配置
+- `zero2_offload_param.json`：ZeRO-2 + 参数 offload 配置

data/dataset/01_26/NCCL错误解决方案.md ADDED Viewed

	@@ -0,0 +1,120 @@

+# NCCL 通信超时错误解决方案
+## 错误现象
+```
+Exception raised from ncclCommWatchdog at /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:1902
+Signal 6 (SIGABRT) received by PID
+torch.distributed.elastic.multiprocessing.errors.ChildFailedError
+```
+## 错误原因分析
+1. **NCCL 通信超时**：GPU 之间的通信超时，watchdog 检测到通信失败
+2. **GPU 拓扑问题**：使用 `CUDA_VISIBLE_DEVICES=0,1,2,5` 跳过了 GPU 3 和 4，可能导致通信路径不理想
+3. **内存不足**：显存不足可能导致通信缓冲区分配失败
+4. **DeepSpeed 配置**：通信超时设置可能过短
+## 解决方案
+### 方案 1：增加 NCCL 超时时间（推荐）
+在训练命令前添加环境变量：
+```bash
+NCCL_TIMEOUT=1800 NCCL_ASYNC_ERROR_HANDLING=1 CUDA_VISIBLE_DEVICES=0,1,2,5 llamafactory-cli train ...
+```
+并在训练命令中添加：
+```bash
+--ddp_timeout 1800
+```
+**参数说明：**
+- `NCCL_TIMEOUT=1800`：设置 NCCL 通信超时为 1800 秒（30分钟）
+- `NCCL_ASYNC_ERROR_HANDLING=1`：启用异步错误处理，避免立即终止
+- `--ddp_timeout 1800`：设置 PyTorch DDP 超时时间
+### 方案 2：禁用 NCCL Watchdog（如果方案1无效）
+```bash
+NCCL_TIMEOUT=1800 NCCL_ASYNC_ERROR_HANDLING=1 NCCL_BLOCKING_WAIT=0 CUDA_VISIBLE_DEVICES=0,1,2,5 llamafactory-cli train ...
+```
+### 方案 3：使用连续的 GPU（如果可能）
+如果 GPU 0,1,2,3 可用，优先使用连续的 GPU：
+```bash
+CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train ...
+```
+### 方案 4：减少 GPU 数量
+如果问题持续，可以尝试减少到 2 个 GPU：
+```bash
+CUDA_VISIBLE_DEVICES=0,1 llamafactory-cli train ...
+```
+并相应调整 `gradient_accumulation_steps` 以保持相同的有效批次大小。
+### 方案 5：检查 GPU 状态和拓扑
+```bash
+# 检查 GPU 状态
+nvidia-smi
+# 检查 GPU 拓扑（需要安装 nvidia-ml-py）
+python -c "import pynvml; pynvml.nvmlInit(); [print(f'GPU {i}: {pynvml.nvmlDeviceGetName(pynvml.nvmlDeviceGetHandleByIndex(i))}') for i in range(4)]"
+```
+### 方案 6：启用 NCCL 调试（用于诊断）
+```bash
+NCCL_DEBUG=INFO NCCL_TIMEOUT=1800 CUDA_VISIBLE_DEVICES=0,1,2,5 llamafactory-cli train ...
+```
+这会输出详细的 NCCL 通信日志，帮助定位问题。
+### 方案 7：调整 DeepSpeed 配置
+如果使用 DeepSpeed，可以尝试：
+1. **减小通信桶大小**（在 `zero2.json` 中）：
+```json
+{
+    "zero_optimization": {
+        "allgather_bucket_size": 2e8,
+        "reduce_bucket_size": 2e8
+    }
+}
+```
+2. **禁用通信重叠**：
+```json
+{
+    "zero_optimization": {
+        "overlap_comm": false
+    }
+}
+```
+## 其他建议
+1. **检查系统资源**：
+   - 确保有足够的系统内存
+   - 检查是否有其他进程占用 GPU
+   - 检查网络带宽（如果使用多节点）
+2. **降低批次大小**：
+   - 减小 `per_device_train_batch_size`
+   - 增加 `gradient_accumulation_steps` 以保持有效批次大小
+3. **使用更小的模型或 LoRA rank**：
+   - 如果显存紧张，可以减小 `lora_rank`
+## 已更新的文件
+- `/home/yijuan_liang/LLaMA-Factory/data/deepspeed/zero2.json`：添加了通信数据类型配置
+- `/home/yijuan_liang/LLaMA-Factory/data/dataset/01_26/训练命令.md`：添加了 NCCL 超时环境变量和 `--ddp_timeout` 参数

data/dataset/01_26/ZeRO-3使用说明.md ADDED Viewed

	@@ -0,0 +1,153 @@

+# DeepSpeed ZeRO-3 使用说明
+## 问题1：max_samples=38万，显存是否支持？
+### 重要结论
+**max_samples 不影响显存占用！**
+显存占用主要取决于：
+- 模型大小（Qwen3-8B）：~16GB
+- Batch size × Sequence length
+- 优化器状态：~32GB（AdamW）
+- 梯度：~16GB
+### 显存需求估算（2卡）
+**ZeRO-2：**
+- 模型参数：16GB/GPU（未分片）
+- 优化器状态：16GB/GPU（分片后）
+- 梯度：8GB/GPU（分片后）
+- 激活值：~0.5GB/GPU
+- **总计：~40.5GB/GPU**
+**ZeRO-3：**
+- 模型参数：8GB/GPU（分片后）
+- 优化器状态：16GB/GPU（分片后）
+- 梯度：8GB/GPU（分片后）
+- 激活值：~0.5GB/GPU
+- **总计：~32.5GB/GPU**
+- **节省：~8GB/GPU**
+### 结论
+- 如果每张GPU有40GB显存（A800），ZeRO-2应该足够
+- 如果显存紧张或想更安全，建议使用ZeRO-3
+- max_samples=38万不影响显存，只影响训练时间
+---
+## 问题2：是否需要用到DeepSpeed ZeRO-3？
+### 建议使用ZeRO-3的情况：
+1. ✅ 显存不足（<40GB/GPU）
+2. ✅ 想训练更大的batch size
+3. ✅ 想使用更长的sequence length
+4. ✅ 想更安全地训练（留出显存余量）
+### 当前配置建议：
+- **如果显存充足（40GB+）**：ZeRO-2足够
+- **如果显存紧张（<40GB）**：使用ZeRO-3
+- **如果想更安全**：使用ZeRO-3
+---
+## 问题3：ZeRO-3是否可以接续在ZeRO-2后训练？
+### ✅ 答案：可以！
+DeepSpeed checkpoint在不同ZeRO stage之间是**兼容的**，因为：
+1. Checkpoint保存的是**模型权重**（LoRA adapter）
+2. ZeRO stage只影响**训练时的显存分配**
+3. 保存的checkpoint格式相同
+### 接续训练步骤：
+#### 从ZeRO-2切换到ZeRO-3：
+```bash
+CUDA_VISIBLE_DEVICES=0,1 llamafactory-cli train \
+  --stage sft \
+  --template qwen \
+  --do_train True \
+  --flash_attn auto \
+  --model_name_or_path /data/models/Qwen3-8B \
+  --preprocessing_num_workers 8 \
+  --finetuning_type lora \
+  --dataset_dir data \
+  --dataset 01_26_toollist1 \
+  --num_train_epochs 1 \
+  --warmup_ratio 0.03 \
+  --cutoff_len 10240 \
+  --max_samples 380000 \
+  --resume_from_checkpoint /path/to/zeor2/checkpoint-XXXX \
+  --learning_rate 1e-5 \
+  --per_device_train_batch_size 1 \
+  --gradient_accumulation_steps 4 \
+  --lr_scheduler_type cosine \
+  --max_grad_norm 1.0 \
+  --logging_steps 5 \
+  --save_steps 200 \
+  --warmup_steps 0 \
+  --packing False \
+  --enable_thinking False \
+  --output_dir /home/yijuan_liang/LLaMA-Factory/saves/Qwen3-8B/lora/train_$(date +%Y-%m-%d-%H-%M) \
+  --bf16 True \
+  --plot_loss True \
+  --trust_remote_code True \
+  --deepspeed data/deepspeed/zero3.json \
+  --include_num_input_tokens_seen True \
+  --optim adamw_torch \
+  --adam_beta1 0.9 --adam_beta2 0.999 --adam_epsilon 1e-8 \
+  --lora_rank 8 \
+  --lora_alpha 16 \
+  --lora_dropout 0 \
+  --lora_target all
+```
+### 注意事项：
+1. **保持训练参数一致**：
+   - learning_rate
+   - optimizer类型
+   - LoRA参数（rank, alpha等）
+2. **可以改变**：
+   - ZeRO stage（2→3）
+   - max_samples
+   - GPU数量（需要调整gradient_accumulation_steps）
+3. **checkpoint路径**：
+   - 确保checkpoint路径正确
+   - 使用最新的checkpoint
+---
+## 配置文件说明
+### ZeRO-3配置（带CPU offload）
+- `data/deepspeed/zero3.json`：最节省显存
+- 优化器和参数都offload到CPU
+### ZeRO-3配置（不带CPU offload）
+- `data/deepspeed/zero3_no_offload.json`：速度更快
+- 所有计算在GPU上
+---
+## 推荐方案
+### 方案1：先用ZeRO-2训练小样本
+```bash
+--max_samples 10000
+--deepspeed data/deepspeed/zero2.json
+```
+### 方案2：切换到ZeRO-3训练大样本
+```bash
+--max_samples 380000
+--resume_from_checkpoint <zeor2_checkpoint>
+--deepspeed data/deepspeed/zero3.json
+```
+这样可以：
+- 先用ZeRO-2快速验证训练流程
+- 再用ZeRO-3安全训练大样本

data/dataset/01_26/fix_cuda_oom.sh ADDED Viewed

	@@ -0,0 +1,88 @@

+#!/bin/bash
+# CUDA 内存不足问题诊断和修复脚本
+# 使用方法: bash fix_cuda_oom.sh
+echo "=========================================="
+echo "CUDA 内存不足问题诊断工具"
+echo "=========================================="
+echo ""
+# 1. 检查 GPU 状态
+echo "1. 检查 GPU 状态和内存使用情况："
+echo "----------------------------------------"
+nvidia-smi
+echo ""
+# 2. 查找占用 GPU 的进程
+echo "2. 查找占用 GPU 的进程："
+echo "----------------------------------------"
+fuser -v /dev/nvidia* 2>/dev/null || echo "未找到占用 GPU 的进程"
+echo ""
+# 3. 检查是否有其他 Python 进程占用 GPU
+echo "3. 检查 Python 进程占用 GPU 情况："
+echo "----------------------------------------"
+ps aux | grep python | grep -v grep | head -10
+echo ""
+# 4. 提供清理建议
+echo "4. 清理建议："
+echo "----------------------------------------"
+echo "如果发现不需要的进程占用 GPU，可以使用以下命令清理："
+echo "  kill -9 <PID>  # 替换 <PID> 为进程 ID"
+echo ""
+# 5. 显示推荐的训练命令
+echo "5. 推荐的训练命令（内存优化版本）："
+echo "----------------------------------------"
+echo ""
+echo "方案 A: 使用 ZeRO-2 + 参数 Offload（推荐）："
+echo "PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True CUDA_VISIBLE_DEVICES=0,1,2,5 llamafactory-cli train \\"
+echo "  --deepspeed data/deepspeed/zero2_offload_param.json \\"
+echo "  ... (其他参数见训练命令.md)"
+echo ""
+echo "方案 B: 使用 ZeRO-3 + 参数 Offload（最激进）："
+echo "PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True CUDA_VISIBLE_DEVICES=0,1,2,5 llamafactory-cli train \\"
+echo "  --deepspeed data/deepspeed/zero3.json \\"
+echo "  ... (其他参数见训练命令.md)"
+echo ""
+echo "方案 C: 降低序列长度（快速解决）："
+echo "将 --cutoff_len 8192 改为 --cutoff_len 6144"
+echo ""
+# 6. 检查 DeepSpeed 配置文件是否存在
+echo "6. 检查 DeepSpeed 配置文件："
+echo "----------------------------------------"
+if [ -f "data/deepspeed/zero2_offload_param.json" ]; then
+    echo "✓ zero2_offload_param.json 存在"
+else
+    echo "✗ zero2_offload_param.json 不存在"
+fi
+if [ -f "data/deepspeed/zero3.json" ]; then
+    echo "✓ zero3.json 存在"
+else
+    echo "✗ zero3.json 不存在"
+fi
+echo ""
+# 7. 显示内存使用估算
+echo "7. 内存使用估算："
+echo "----------------------------------------"
+echo "当前配置（ZeRO-2，cutoff_len=8192）："
+echo "  - 模型参数: ~16 GiB"
+echo "  - 激活值: ~8-12 GiB per GPU"
+echo "  - 优化器状态: ~4-6 GiB per GPU (已 offload)"
+echo "  - 总计: ~26-34 GiB per GPU"
+echo ""
+echo "优化后（ZeRO-3 + offload，cutoff_len=8192）："
+echo "  - 模型参数: ~0 GiB (offload 到 CPU)"
+echo "  - 激活值: ~8-12 GiB per GPU"
+echo "  - 优化器状态: ~0 GiB (offload 到 CPU)"
+echo "  - 总计: ~10-15 GiB per GPU"
+echo ""
+echo "=========================================="
+echo "详细解决方案请查看: CUDA内存不足解决方案.md"
+echo "=========================================="

data/dataset/01_26/token_statistics.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "toollist1.jsonl": {
+    "max": 48125,
+    "min": 2714,
+    "mean": 4443.34899767502,
+    "median": 4143.0,
+    "total": 1720020397,
+    "count": 387100
+  },
+  "toollist2.jsonl": {
+    "max": 33493,
+    "min": 2623,
+    "mean": 4555.394726492956,
+    "median": 4281.0,
+    "total": 1714349919,
+    "count": 376334
+  }
+}

data/dataset/01_26/token_statistics.txt ADDED Viewed

	@@ -0,0 +1,19 @@

+Token 统计结果
+============================================================
+文件: toollist1.jsonl
+  总实例数: 387100
+  总 token 数: 1720020397
+  最大 token 数: 48125
+  最小 token 数: 2714
+  平均 token 数: 4443.35
+  中位数 token 数: 4143.00
+文件: toollist2.jsonl
+  总实例数: 376334
+  总 token 数: 1714349919
+  最大 token 数: 33493
+  最小 token 数: 2623
+  平均 token 数: 4555.39
+  中位数 token 数: 4281.00

data/dataset/01_26/toollist1.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3544b700c2a66f259adfc732e43164125adbcf318bee3d3604125ac25095a226
+size 7155840129

data/dataset/01_26/toollist1_sample.json ADDED Viewed

The diff for this file is too large to render. See raw diff

data/dataset/01_26/toollist2.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4bf1d0c33ed8b489ec071cc5a1188a0c5565d1f814f32c3cfa4c820cf9936471
+size 7176485260

data/dataset/01_26/toollist2_sample.json ADDED Viewed

The diff for this file is too large to render. See raw diff

data/dataset/01_26/接续训练示例.md ADDED Viewed

	@@ -0,0 +1,110 @@

+# 接续训练命令示例
+## 场景1：增大max_samples接续训练（保持GPU数量不变）
+```bash
+CUDA_VISIBLE_DEVICES=0,1 llamafactory-cli train \
+  --stage sft \
+  --template qwen \
+  --do_train True \
+  --flash_attn auto \
+  --model_name_or_path /data/models/Qwen3-8B \
+  --preprocessing_num_workers 8 \
+  --finetuning_type lora \
+  --dataset_dir data \
+  --dataset 01_26_toollist1 \
+  --num_train_epochs 1 \
+  --warmup_ratio 0.03 \
+  --cutoff_len 10240 \
+  --max_samples 50000 \
+  --resume_from_checkpoint /home/yijuan_liang/LLaMA-Factory/saves/Qwen3-8B/lora/train_YYYY-MM-DD-HH-MM/checkpoint-XXXX \
+  --learning_rate 1e-5 \
+  --per_device_train_batch_size 1 \
+  --gradient_accumulation_steps 4 \
+  --lr_scheduler_type cosine \
+  --max_grad_norm 1.0 \
+  --logging_steps 5 \
+  --save_steps 200 \
+  --warmup_steps 0 \
+  --packing False \
+  --enable_thinking False \
+  --output_dir /home/yijuan_liang/LLaMA-Factory/saves/Qwen3-8B/lora/train_$(date +%Y-%m-%d-%H-%M) \
+  --bf16 True \
+  --plot_loss True \
+  --trust_remote_code True \
+  --deepspeed data/deepspeed/zero2.json \
+  --include_num_input_tokens_seen True \
+  --optim adamw_torch \
+  --adam_beta1 0.9 --adam_beta2 0.999 --adam_epsilon 1e-8 \
+  --lora_rank 8 \
+  --lora_alpha 16 \
+  --lora_dropout 0 \
+  --lora_target all
+```
+## 场景2：加卡并减少gradient_accumulation_steps接续训练
+### 原配置（2卡）：
+- GPU数量: 2
+- per_device_train_batch_size: 1
+- gradient_accumulation_steps: 4
+- **有效batch size = 2 × 1 × 4 = 8**
+### 新配置（4卡，保持有效batch size）：
+- GPU数量: 4
+- per_device_train_batch_size: 1
+- gradient_accumulation_steps: 2
+- **有效batch size = 4 × 1 × 2 = 8** ✓
+```bash
+CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train \
+  --stage sft \
+  --template qwen \
+  --do_train True \
+  --flash_attn auto \
+  --model_name_or_path /data/models/Qwen3-8B \
+  --preprocessing_num_workers 8 \
+  --finetuning_type lora \
+  --dataset_dir data \
+  --dataset 01_26_toollist1 \
+  --num_train_epochs 1 \
+  --warmup_ratio 0.03 \
+  --cutoff_len 10240 \
+  --max_samples 50000 \
+  --resume_from_checkpoint /home/yijuan_liang/LLaMA-Factory/saves/Qwen3-8B/lora/train_YYYY-MM-DD-HH-MM/checkpoint-XXXX \
+  --learning_rate 1e-5 \
+  --per_device_train_batch_size 1 \
+  --gradient_accumulation_steps 2 \
+  --lr_scheduler_type cosine \
+  --max_grad_norm 1.0 \
+  --logging_steps 5 \
+  --save_steps 200 \
+  --warmup_steps 0 \
+  --packing False \
+  --enable_thinking False \
+  --output_dir /home/yijuan_liang/LLaMA-Factory/saves/Qwen3-8B/lora/train_$(date +%Y-%m-%d-%H-%M) \
+  --bf16 True \
+  --plot_loss True \
+  --trust_remote_code True \
+  --deepspeed data/deepspeed/zero2.json \
+  --include_num_input_tokens_seen True \
+  --optim adamw_torch \
+  --adam_beta1 0.9 --adam_beta2 0.999 --adam_epsilon 1e-8 \
+  --lora_rank 8 \
+  --lora_alpha 16 \
+  --lora_dropout 0 \
+  --lora_target all
+```
+## 重要注意事项
+1. **保持有效batch size一致**：改变GPU数量时，需要调整gradient_accumulation_steps以保持相同的有效batch size
+2. **checkpoint兼容性**：确保checkpoint路径正确，DeepSpeed checkpoint可以跨GPU数量使用
+3. **学习率调度器**：如果改变了有效batch size，可能需要调整learning_rate或warmup_steps
+4. **数据集顺序**：确保数据集顺序一致，否则可能影响训练效果
+## 有效batch size计算公式
+```
+有效batch size = GPU数量 × per_device_train_batch_size × gradient_accumulation_steps
+```

data/dataset/01_26/训练命令.md ADDED Viewed

	@@ -0,0 +1,231 @@

+toollist1（cutoff_len 8192）- 使用DeepSpeed ZeRO-2：
+CUDA_VISIBLE_DEVICES=0,1,2,5 llamafactory-cli train \
+  --stage sft \
+  --template qwen \
+  --do_train True \
+  --flash_attn auto \
+  --model_name_or_path /data/models/Qwen3-8B \
+  --preprocessing_num_workers 8 \
+  --finetuning_type lora \
+  --dataset_dir data \
+  --dataset 01_26_toollist1 \
+  --num_train_epochs 1 \
+  --warmup_ratio 0.03 \
+  --max_samples 200000 \
+  --cutoff_len 8192 \
+  --learning_rate 1e-5 \
+  --per_device_train_batch_size 1 \
+  --gradient_accumulation_steps 2 \
+  --lr_scheduler_type cosine \
+  --max_grad_norm 1.0 \
+  --logging_steps 5 \
+  --save_steps 200 \
+  --warmup_steps 0 \
+  --ddp_timeout 180000000 \
+  --packing False \
+  --enable_thinking False \
+  --output_dir /home/yijuan_liang/LLaMA-Factory/saves/Qwen3-8B/lora/train_$(date +%Y-%m-%d-%H-%M) \
+  --bf16 True \
+  --plot_loss True \
+  --trust_remote_code True \
+  --deepspeed data/deepspeed/zero2.json \
+  --include_num_input_tokens_seen True \
+  --optim adamw_torch \
+  --adam_beta1 0.9 --adam_beta2 0.999 --adam_epsilon 1e-8 \
+  --lora_rank 8 \
+  --lora_alpha 16 \
+  --lora_dropout 0 \
+  --lora_target all
+toollist1（cutoff_len 8192）- 使用DeepSpeed ZeRO-2 + 参数Offload（内存优化版，解决OOM）：
+PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True CUDA_VISIBLE_DEVICES=0,1,2,5 llamafactory-cli train \
+  --stage sft \
+  --template qwen \
+  --do_train True \
+  --flash_attn auto \
+  --model_name_or_path /data/models/Qwen3-8B \
+  --preprocessing_num_workers 8 \
+  --finetuning_type lora \
+  --dataset_dir data \
+  --dataset 01_26_toollist1 \
+  --num_train_epochs 1 \
+  --warmup_ratio 0.03 \
+  --max_samples 200000 \
+  --cutoff_len 8192 \
+  --learning_rate 1e-5 \
+  --per_device_train_batch_size 1 \
+  --gradient_accumulation_steps 2 \
+  --lr_scheduler_type cosine \
+  --max_grad_norm 1.0 \
+  --logging_steps 5 \
+  --save_steps 200 \
+  --warmup_steps 0 \
+  --ddp_timeout 180000000 \
+  --packing False \
+  --enable_thinking False \
+  --output_dir /home/yijuan_liang/LLaMA-Factory/saves/Qwen3-8B/lora/train_$(date +%Y-%m-%d-%H-%M) \
+  --bf16 True \
+  --plot_loss True \
+  --trust_remote_code True \
+  --deepspeed data/deepspeed/zero2_offload_param.json \
+  --include_num_input_tokens_seen True \
+  --optim adamw_torch \
+  --adam_beta1 0.9 --adam_beta2 0.999 --adam_epsilon 1e-8 \
+  --lora_rank 8 \
+  --lora_alpha 16 \
+  --lora_dropout 0 \
+  --lora_target all
+toollist1（cutoff_len 8192）- 使用DeepSpeed ZeRO-3 + 参数Offload（最激进内存优化版）：
+PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True CUDA_VISIBLE_DEVICES=0,1,2,5 llamafactory-cli train \
+  --stage sft \
+  --template qwen \
+  --do_train True \
+  --flash_attn auto \
+  --model_name_or_path /data/models/Qwen3-8B \
+  --preprocessing_num_workers 8 \
+  --finetuning_type lora \
+  --dataset_dir data \
+  --dataset 01_26_toollist1 \
+  --num_train_epochs 1 \
+  --warmup_ratio 0.03 \
+  --max_samples 200000 \
+  --cutoff_len 8192 \
+  --learning_rate 1e-5 \
+  --per_device_train_batch_size 1 \
+  --gradient_accumulation_steps 2 \
+  --lr_scheduler_type cosine \
+  --max_grad_norm 1.0 \
+  --logging_steps 5 \
+  --save_steps 200 \
+  --warmup_steps 0 \
+  --ddp_timeout 180000000 \
+  --packing False \
+  --enable_thinking False \
+  --output_dir /home/yijuan_liang/LLaMA-Factory/saves/Qwen3-8B/lora/train_$(date +%Y-%m-%d-%H-%M) \
+  --bf16 True \
+  --plot_loss True \
+  --trust_remote_code True \
+  --deepspeed data/deepspeed/zero3.json \
+  --include_num_input_tokens_seen True \
+  --optim adamw_torch \
+  --adam_beta1 0.9 --adam_beta2 0.999 --adam_epsilon 1e-8 \
+  --lora_rank 8 \
+  --lora_alpha 16 \
+  --lora_dropout 0 \
+  --lora_target all
+toollist1（cutoff_len 6144）- 使用DeepSpeed ZeRO-2：
+CUDA_VISIBLE_DEVICES=0,1,2,5 llamafactory-cli train \
+  --stage sft \
+  --template qwen \
+  --do_train True \
+  --flash_attn auto \
+  --model_name_or_path /data/models/Qwen3-8B \
+  --preprocessing_num_workers 8 \
+  --finetuning_type lora \
+  --dataset_dir data \
+  --dataset 01_26_toollist1 \
+  --num_train_epochs 1 \
+  --warmup_ratio 0.03 \
+  --cutoff_len 6144 \
+  --max_samples 200000 \
+  --learning_rate 1e-5 \
+  --per_device_train_batch_size 1 \
+  --gradient_accumulation_steps 2 \
+  --lr_scheduler_type cosine \
+  --max_grad_norm 1.0 \
+  --logging_steps 5 \
+  --save_steps 200 \
+  --warmup_steps 0 \
+  --packing False \
+  --enable_thinking False \
+  --output_dir /home/yijuan_liang/LLaMA-Factory/saves/Qwen3-8B/lora/train_$(date +%Y-%m-%d-%H-%M) \
+  --bf16 True \
+  --plot_loss True \
+  --trust_remote_code True \
+  --deepspeed data/deepspeed/zero2.json \
+  --include_num_input_tokens_seen True \
+  --optim adamw_torch \
+  --adam_beta1 0.9 --adam_beta2 0.999 --adam_epsilon 1e-8 \
+  --lora_rank 8 \
+  --lora_alpha 16 \
+  --lora_dropout 0 \
+  --lora_target all
+toollist1（cutoff_len 10240）- 不使用DeepSpeed ZeRO-2，从ck训：
+CUDA_VISIBLE_DEVICES=0,1 llamafactory-cli train \
+  --stage sft \
+  --template qwen \
+  --do_train True \
+  --flash_attn auto \
+  --model_name_or_path /data/models/Qwen3-8B \
+  --preprocessing_num_workers 8 \
+  --finetuning_type lora \
+  --dataset_dir data \
+  --dataset 01_26_toollist1 \
+  --num_train_epochs 1 \
+  --warmup_ratio 0.03 \
+  --cutoff_len 10240 \
+  --max_samples 120000 \
+  --resume_from_checkpoint /home/yijuan_liang/LLaMA-Factory/saves/Qwen3-8B/lora/train_2026-01-29-20-31/checkpoint-11200 \
+  --learning_rate 1e-5 \
+  --per_device_train_batch_size 1 \
+  --gradient_accumulation_steps 4 \
+  --lr_scheduler_type cosine \
+  --max_grad_norm 1.0 \
+  --logging_steps 5 \
+  --save_steps 200 \
+  --warmup_steps 0 \
+  --packing False \
+  --enable_thinking False \
+  --output_dir /home/yijuan_liang/LLaMA-Factory/saves/Qwen3-8B/lora/train_$(date +%Y-%m-%d-%H-%M) \
+  --bf16 True \
+  --plot_loss True \
+  --trust_remote_code True \
+  --ddp_timeout 180000000 \
+  --include_num_input_tokens_seen True \
+  --optim adamw_torch \
+  --adam_beta1 0.9 --adam_beta2 0.999 --adam_epsilon 1e-8 \
+  --lora_rank 8 \
+  --lora_alpha 16 \
+  --lora_dropout 0 \
+  --lora_target all
+toollist1（cutoff_len 10240）- 不使用DeepSpeed ZeRO-2，从头训：
+  CUDA_VISIBLE_DEVICES=0,1 llamafactory-cli train \
+  --stage sft \
+  --template qwen \
+  --do_train True \
+  --flash_attn auto \
+  --model_name_or_path /data/models/Qwen3-8B \
+  --preprocessing_num_workers 8 \
+  --finetuning_type lora \
+  --dataset_dir data \
+  --dataset 01_26_toollist1 \
+  --num_train_epochs 1 \
+  --warmup_ratio 0.03 \
+  --cutoff_len 10240 \
+  --max_samples 10000 \
+  --learning_rate 1e-5 \
+  --per_device_train_batch_size 1 \
+  --gradient_accumulation_steps 4 \
+  --lr_scheduler_type cosine \
+  --max_grad_norm 1.0 \
+  --logging_steps 5 \
+  --save_steps 200 \
+  --warmup_steps 0 \
+  --packing False \
+  --enable_thinking False \
+  --output_dir /home/yijuan_liang/LLaMA-Factory/saves/Qwen3-8B/lora/train_$(date +%Y-%m-%d-%H-%M) \
+  --bf16 True \
+  --plot_loss True \
+  --trust_remote_code True \
+  --include_num_input_tokens_seen True \
+  --optim adamw_torch \
+  --adam_beta1 0.9 --adam_beta2 0.999 --adam_epsilon 1e-8 \
+  --lora_rank 8 \
+  --lora_alpha 16 \
+  --lora_dropout 0 \
+  --lora_target all

data/dataset/12_10/LLM输入一致性对比分析.md ADDED Viewed

	@@ -0,0 +1,234 @@

+# LLM输入一致性对比分析
+## 概述
+本文档对比分析 `generate_with_qwen_server_list_v2.py`（推理脚本）和 `template.py`（训练模板）中每次调用LLM时的input构建逻辑。
+---
+## 1. Function Call阶段对比
+### 1.1 推理脚本 (`build_messages_for_function_call`)
+**位置**: `generate_with_qwen_server_list_v2.py:265-394`
+**逻辑**:
+- **第一次调用** (call_idx=0):
+  - system: `system_prompt + <tools>`
+  - user: `current_query`
+- **第n次调用** (call_idx > 0):
+  - system: `system_prompt + <chat_history> + <tools>`
+  - observation: `<tool_response>` (前一个observation)
+  - user: `current_query` (如果不在chat_history中)
+**关键代码**:
+```python
+# 第363-365行：添加tools到system中
+if tools_formatted:
+    system_content += f"\n\n<tools>\n{tools_formatted}\n</tools>"
+```
+### 1.2 训练模板 (`_encode` 处理 OBSERVATION → FUNCTION)
+**位置**: `template.py:315-371`
+**逻辑**:
+- 当 `message["role"] == Role.OBSERVATION` 且下一个消息是 `Role.FUNCTION` 时:
+  - system: `system + <chat_history> + <tools>`
+  - observation: `<tool_response>` (当前observation)
+**关键代码**:
+```python
+# 第360-367行：如果下一个是tool call，拼接system + tools
+else:
+    # 如果下一个是tool call，拼接system + tools
+    if system_content or tools:
+        tool_text = self.format_tools.apply(content=tools)[0] if tools else ""
+        if system_content:
+            elements += self.format_system.apply(content=(system_content + tool_text) if tool_text else system_content)
+```
+### 1.3 Function Call阶段一致性结论
+✅ **基本一致**：
+- 两者都在system中包含 `<chat_history>` 和 `<tools>`
+- 两者都使用单独的observation消息携带 `<tool_response>`
+- 历史信息的格式一致（User query, Tool call, Assistant reply）
+---
+## 2. Answer阶段对比
+### 2.1 推理脚本 (`build_messages_for_answer`)
+**位置**: `generate_with_qwen_server_list_v2.py:466-567`
+**逻辑**:
+- system: `system_prompt + <chat_history>` （**不包含tools**）
+- observation: `<tool_response>` (最后一个observation)
+- user: `current_query` (如果不在chat_history中)
+**关键代码**:
+```python
+# 第542行：明确注释"不包含tools，因为要生成answer"
+# 添加system消息（不包含tools，因为要生成answer）
+messages.append({"role": "system", "content": system_content})
+```
+### 2.2 训练模板 (`_encode` 处理 OBSERVATION → ASSISTANT)
+**位置**: `template.py:315-371`
+**逻辑**:
+- 当 `message["role"] == Role.OBSERVATION` 且下一个消息是 `Role.ASSISTANT` 时:
+  - system: `system + <chat_history>` （**不包含tools**）
+  - observation: `<tool_response>` (当前observation)
+**关键代码**:
+```python
+# 第356-359行：如果下一个是answer，只拼接system prompt，不拼接tools
+if next_is_assistant:
+    # 如果下一个是answer，只拼接system prompt，不拼接tools
+    if system_content:
+        elements += self.format_system.apply(content=system_content)
+```
+### 2.3 Answer阶段一致性结论
+✅ **基本一致**：
+- 两者都在system中包含 `<chat_history>` 但**不包含** `<tools>`
+- 两者都使用单独的observation消息携带 `<tool_response>`
+- 历史信息的格式一致（User query, Tool response, Assistant reply，但不包含Tool call）
+---
+## 3. 关键不一致点分析
+### 3.1 潜在不一致：USER消息的处理
+**训练模板中的USER消息处理** (`template.py:247-312`):
+**关键代码**:
+```python
+# 第248行注释：所有USER消息（包括首个和后续turn的）都需要添加system和tools
+# 所有USER消息（包括首个和后续turn的）都需要添加system和tools
+# 第288-306行：所有USER消息都会添加system+tools
+if i == 0:
+    # 第一个消息，已经有prefix了，只需要添加system+tools
+    if system_content or tools:
+        tool_text = self.format_tools.apply(content=tools)[0] if tools else ""
+        if system_content:
+            elements += self.format_system.apply(content=(system_content + tool_text) if tool_text else system_content)
+else:
+    # 后续turn的user消息，需要添加system+tools
+    if system_content or tools:
+        tool_text = self.format_tools.apply(content=tools)[0] if tools else ""
+        if system_content:
+            elements += self.format_system.apply(content=(system_content + tool_text) if tool_text else system_content)
+```
+**问题分析**:
+在训练时，如果消息序列是：
+```
+USER → FUNCTION → OBSERVATION → ASSISTANT (answer)
+```
+那么：
+- OBSERVATION消息：system包含 `<chat_history>`，**不包含** `<tools>`（因为下一个是ASSISTANT）
+- ASSISTANT消息：直接格式化，不涉及system
+**但是**，如果消息序列是：
+```
+OBSERVATION → USER → ASSISTANT (answer)
+```
+那么：
+- OBSERVATION消息：system包含 `<chat_history>`，**不包含** `<tools>`（因为下一个是ASSISTANT）
+- USER消息���system包含 `<chat_history>` + `<tools>`（第288-306行，所有USER消息都会添加tools）
+- ASSISTANT消息：直接格式化
+**这意味着**：在训练时，如果observation后面还有user消息，那么user消息会包含tools，但推理脚本在answer阶段不包含tools。
+### 3.2 推理脚本的answer阶段
+**推理脚本** (`build_messages_for_answer`):
+- 只构建：system + observation + user（可选）
+- system中**明确不包含tools**（第542行）
+**训练模板** (`_encode`):
+- 如果observation后面直接是assistant：system不包含tools ✅
+- 如果observation后面是user，然后才是assistant：user消息的system包含tools ❌
+---
+## 4. 结论
+### 4.1 Function Call阶段
+✅ **基本一致**：
+- 推理脚本和训练模板都在system中包含 `<chat_history>` 和 `<tools>`
+- 格式和结构一致
+### 4.2 Answer阶段
+⚠️ **存在不一致**：
+1. **直接情况**（observation → assistant）：
+   - ✅ 一致：两者都不包含tools
+2. **间接情况**（observation → user → assistant）：
+   - ❌ **不一致**：
+     - 推理脚本：system不包含tools
+     - 训练模板：user消息的system包含tools（因为所有USER消息都会添加tools）
+### 4.3 建议
+**问题**：在训练时，如果消息序列是 `OBSERVATION → USER → ASSISTANT`，USER消息会包含tools，但推理时answer阶段不包含tools。
+**解决方案**：
+1. **方案1**：修改训练模板，在生成answer时，如果前一个消息是OBSERVATION且下一个是ASSISTANT，则USER消息也不应该包含tools。
+2. **方案2**：修改推理脚本，在answer阶段也包含tools（但这与注释中的设计意图不符）。
+3. **方案3**：确保训练数据中，answer阶段的消息序列是 `OBSERVATION → ASSISTANT`，不包含中间的USER消息。
+**推荐方案**：方案1或方案3，因为推理脚本的设计意图是"生成answer时不包含tools"。
+---
+## 5. 详细代码对比
+### 5.1 Function Call阶段
+| 组件 | 推理脚本 | 训练模板 | 一致性 |
+|------|---------|---------|--------|
+| system | system_prompt + chat_history + tools | system + chat_history + tools | ✅ |
+| observation | 单独的observation消息 | 单独的observation消息 | ✅ |
+| user | current_query | current_query | ✅ |
+### 5.2 Answer阶段
+| 组件 | 推理脚本 | 训练模板（直接） | 训练模板（间接） | 一致性 |
+|------|---------|----------------|----------------|--------|
+| system | system_prompt + chat_history | system + chat_history | system + chat_history + tools | ⚠️ |
+| observation | 单独的observation消息 | 单独的observation消息 | 单独的observation消息 | ✅ |
+| user | current_query（可选） | 无 | current_query | ⚠️ |
+---
+## 6. 总结
+1. **Function Call阶段**：✅ 基本一致
+2. **Answer阶段（直接）**：✅ 一致（observation → assistant）
+3. **Answer阶段（间接）**：❌ **不一致**（observation → user → assistant）
+**关键问题**：训练模板中，所有USER消息都会添加tools，但在answer阶段，如果前一个消息是OBSERVATION且下一个是ASSISTANT，推理脚本不包含tools。这可能导致训练和推理的不一致。

data/dataset/12_10/origin.py ADDED Viewed

	@@ -0,0 +1,2192 @@

+# Copyright 2025 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import re
+from copy import deepcopy
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Optional, Union
+from typing_extensions import override
+from ..extras import logging
+from .data_utils import Role
+from .formatter import EmptyFormatter, FunctionFormatter, StringFormatter, ToolFormatter
+from .mm_plugin import get_mm_plugin
+if TYPE_CHECKING:
+    from transformers import PreTrainedTokenizer
+    from ..hparams import DataArguments
+    from .formatter import SLOTS, Formatter
+    from .mm_plugin import BasePlugin
+    from .tool_utils import FunctionCall
+logger = logging.get_logger(__name__)
+@dataclass
+class Template:
+    format_user: "Formatter"
+    format_assistant: "Formatter"
+    format_system: "Formatter"
+    format_function: "Formatter"
+    format_observation: "Formatter"
+    format_tools: "Formatter"
+    format_prefix: "Formatter"
+    default_system: str
+    stop_words: list[str]
+    thought_words: tuple[str, str]
+    efficient_eos: bool
+    replace_eos: bool
+    replace_jinja_template: bool
+    enable_thinking: Optional[bool]
+    mm_plugin: "BasePlugin"
+    def encode_oneturn(
+        self,
+        tokenizer: "PreTrainedTokenizer",
+        messages: list[dict[str, str]],
+        system: Optional[str] = None,
+        tools: Optional[str] = None,
+    ) -> tuple[list[int], list[int]]:
+        r"""Return a single pair of token ids representing prompt and response respectively."""
+        encoded_messages = self._encode(tokenizer, messages, system, tools)
+        prompt_ids = []
+        for encoded_ids in encoded_messages[:-1]:
+            prompt_ids += encoded_ids
+        response_ids = encoded_messages[-1]
+        return prompt_ids, response_ids
+    def encode_multiturn(
+        self,
+        tokenizer: "PreTrainedTokenizer",
+        messages: list[dict[str, str]],
+        system: Optional[str] = None,
+        tools: Optional[str] = None,
+    ) -> list[tuple[list[int], list[int]]]:
+        r"""Return multiple pairs of token ids representing prompts and responses respectively."""
+        encoded_messages = self._encode(tokenizer, messages, system, tools)
+        # 添加详细日志记录
+        import os
+        from datetime import datetime
+        def log_debug(msg):
+            """简单的调试日志函数"""
+            timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
+            log_entry = f"{timestamp} | INFO | {msg}\n"
+            # 写入日志文件
+            log_file = "/home/ziqiang/LLaMA-Factory/sharegpt_pair_debug.log"
+            try:
+                with open(log_file, "a", encoding="utf-8") as f:
+                    f.write(log_entry)
+                    f.flush()  # 立即刷新到文件
+            except:
+                pass  # 忽略写文件错误
+            # 只写入日志文件，不输出到控制台
+        log_debug("=" * 80)
+        log_debug("🔄 ShareGPT数据处理 - encode_multiturn开始")
+        log_debug("=" * 80)
+        log_debug(f"📥 输入messages数量: {len(messages)}")
+        # 打印原始消息内容
+        for i, msg in enumerate(messages):
+            role = msg.get('role', 'unknown')
+            content_preview = msg.get('content', '')[:100].replace('\n', '\\n')
+            log_debug(f"  📝 Message {i+1}: {role} - {content_preview}...")
+        log_debug(f"🔢 编码后messages数量: {len(encoded_messages)}")
+        # 打印编码后的每个消息段
+        for i, encoded in enumerate(encoded_messages):
+            log_debug(f"  🧮 编码段 {i+1}: {len(encoded)} tokens")
+            if len(encoded) > 0:
+                # 解码前50个token看内容
+                preview = tokenizer.decode(encoded[:min(50, len(encoded))], skip_special_tokens=False)
+                preview = preview.replace("\n", "\\n")
+                log_debug(f"    📄 内容预览: {preview}...")
+        pairs = [(encoded_messages[i], encoded_messages[i + 1]) for i in range(0, len(encoded_messages), 2)]
+        log_debug("\n" + "=" * 80)
+        log_debug("✂️  Pair分割结果")
+        log_debug("=" * 80)
+        log_debug(f"📊 生成的pairs数量: {len(pairs)}")
+        for i, (source_ids, target_ids) in enumerate(pairs):
+            log_debug(f"\n--- Pair {i+1} ---")
+            log_debug(f"📏 Pair {i+1}: source={len(source_ids)} tokens, target={len(target_ids)} tokens")
+            # 解码source和target内容
+            if len(source_ids) > 0:
+                source_text = tokenizer.decode(source_ids, skip_special_tokens=False)
+                source_preview = source_text[:2000].replace("\n", "\\n")
+                log_debug(f"  📤 Source内容: {source_text}...")
+            if len(target_ids) > 0:
+                target_text = tokenizer.decode(target_ids, skip_special_tokens=False)
+                target_preview = target_text[:2000].replace("\n", "\\n")
+                log_debug(f"  📥 Target内容: {target_text}...")
+        log_debug("=" * 80)
+        log_debug("✅ encode_multiturn完成")
+        log_debug("=" * 80)
+        return pairs
+    def extract_tool(self, content: str) -> Union[str, list["FunctionCall"]]:
+        r"""Extract tool message."""
+        return self.format_tools.extract(content)
+    def get_stop_token_ids(self, tokenizer: "PreTrainedTokenizer") -> list[int]:
+        r"""Return stop token ids."""
+        stop_token_ids = {tokenizer.eos_token_id}
+        for token in self.stop_words:
+            stop_token_ids.add(tokenizer.convert_tokens_to_ids(token))
+        return list(stop_token_ids)
+    def add_thought(self, content: str = "") -> str:
+        r"""Add empty thought to assistant message."""
+        return f"{self.thought_words[0]}\n\n{self.thought_words[1]}\n\n" + content
+    def remove_thought(self, content: str) -> str:
+        r"""Remove thought from assistant message."""
+        pattern = re.compile(f"{re.escape(self.thought_words[0])}(.*?){re.escape(self.thought_words[1])}", re.DOTALL)
+        return re.sub(pattern, "", content).lstrip("\n")
+    def get_thought_word_ids(self, tokenizer: "PreTrainedTokenizer") -> list[int]:
+        r"""Get the token ids of thought words."""
+        return tokenizer.encode(self.add_thought(), add_special_tokens=False)
+    def _convert_elements_to_ids(self, tokenizer: "PreTrainedTokenizer", elements: "SLOTS") -> list[int]:
+        r"""Convert elements to token ids."""
+        token_ids = []
+        for elem in elements:
+            if isinstance(elem, str):
+                if len(elem) != 0:
+                    token_ids += tokenizer.encode(elem, add_special_tokens=False)
+            elif isinstance(elem, dict):
+                token_ids += [tokenizer.convert_tokens_to_ids(elem.get("token"))]
+            elif isinstance(elem, set):
+                if "bos_token" in elem and tokenizer.bos_token_id is not None:
+                    token_ids += [tokenizer.bos_token_id]
+                elif "eos_token" in elem and tokenizer.eos_token_id is not None:
+                    token_ids += [tokenizer.eos_token_id]
+            else:
+                raise ValueError(f"Input must be string, set[str] or dict[str, str], got {type(elem)}")
+        return token_ids
+    def _encode(
+        self,
+        tokenizer: "PreTrainedTokenizer",
+        messages: list[dict[str, str]],
+        system: Optional[str],
+        tools: Optional[str],
+    ) -> list[list[int]]:
+        r"""Encode formatted inputs to pairs of token ids.
+        Turn 0: prefix + system + query        resp
+        Turn t: query                          resp.
+        """
+        system = system or self.default_system
+        encoded_messages = []
+        for i, message in enumerate(messages):
+            elements = []
+            if i == 0:
+                elements += self.format_prefix.apply()
+                if system or tools:
+                    tool_text = self.format_tools.apply(content=tools)[0] if tools else ""
+                    elements += self.format_system.apply(content=(system + tool_text))
+            elif message["role"] == Role.OBSERVATION:
+                # 对于正常的observation消息，添加system信息（不包含tools）
+                if system:
+                    elements += self.format_system.apply(content=system)
+                # 统计当前已处理的observation数量，用于判断是否需要填充上一个工具返回结果
+                observation_count = 0
+                for j in range(i-1, -1, -1):
+                    if messages[j]["role"] == Role.OBSERVATION:
+                        observation_count += 1
+                # 查找最近的human消息，将其内容添加到observation中
+                human_content = ""
+                for j in range(i-1, -1, -1):
+                    if messages[j]["role"] == Role.USER:
+                        human_content = messages[j]["content"]
+                        break
+                # 查找上一个FUNCTION消息
+                previous_function = None
+                for j in range(i-1, -1, -1):
+                    if messages[j]["role"] == Role.FUNCTION:
+                        previous_function = messages[j]
+                        break
+                # 只在observation数量>=2（即至少是第三个observation）时才查找上一个observation
+                # 这意味着至少已经有2轮完整的工具调用
+                previous_observation_content = ""
+                if observation_count >= 3:
+                    # 查找最近的observation消息（用于multi-hop场景）
+                    # 需要找到第一个非retrieval_tool的observation
+                    for j in range(i-1, -1, -1):
+                        if messages[j]["role"] == Role.OBSERVATION:
+                            # 检查这个observation对应的FUNCTION是不是retrieval_tool
+                            is_retrieval = False
+                            for k in range(j-1, -1, -1):
+                                if messages[k]["role"] == Role.FUNCTION:
+                                    if "retrieval_tool" in messages[k]["content"]:
+                                        is_retrieval = True
+                                    break
+                            # 只使用非retrieval_tool的observation内容
+                            if not is_retrieval:
+                                previous_observation_content = messages[j]["content"]
+                                break
+                            # 如果是retrieval_tool的observation，继续向前查找
+                # 根据是否有previous_observation_content来决定如何拼接
+                if previous_observation_content:
+                    # 如果存在之前的observation，说明这是multi-hop场景
+                    enhanced_content = f"用户查询: {human_content}\n\n上一个工具返回结果: {previous_observation_content}\n\n当前工具返回结果: {message['content']}"
+                elif human_content:
+                    # 如果没有之前的observation，说明这是single-hop场景
+                    enhanced_content = f"用户查询: {human_content}\n\n当前工具返回结果: {message['content']}"
+                else:
+                    enhanced_content = message['content']
+                message = {**message, "content": enhanced_content}
+            if message["role"] == Role.USER:
+                elements += self.format_user.apply(content=message["content"], idx=str(i // 2))
+            elif message["role"] == Role.ASSISTANT:
+                elements += self.format_assistant.apply(content=message["content"])
+            elif message["role"] == Role.OBSERVATION:
+                # 前面已经处理过，这里只需要应用format_observation
+                elements += self.format_observation.apply(content=message["content"])
+            elif message["role"] == Role.FUNCTION:
+                # 直接使用原始内容，不做拼接处理，避免JSON解析错误
+                elements += self.format_function.apply(content=message["content"])
+            else:
+                raise NotImplementedError("Unexpected role: {}".format(message["role"]))
+            encoded_messages.append(self._convert_elements_to_ids(tokenizer, elements))
+        return encoded_messages
+    @staticmethod
+    def _add_or_replace_eos_token(tokenizer: "PreTrainedTokenizer", eos_token: str) -> None:
+        r"""Add or replace eos token to the tokenizer."""
+        if tokenizer.eos_token == eos_token:
+            return
+        is_added = tokenizer.eos_token_id is None
+        num_added_tokens = tokenizer.add_special_tokens({"eos_token": eos_token})
+        if is_added:
+            logger.info_rank0(f"Add eos token: {tokenizer.eos_token}.")
+        else:
+            logger.info_rank0(f"Replace eos token: {tokenizer.eos_token}.")
+        if num_added_tokens > 0:
+            logger.warning_rank0("New tokens have been added, make sure `resize_vocab` is True.")
+    def fix_special_tokens(self, tokenizer: "PreTrainedTokenizer") -> None:
+        r"""Add eos token and pad token to the tokenizer."""
+        stop_words = self.stop_words
+        if self.replace_eos:
+            if not stop_words:
+                raise ValueError("Stop words are required to replace the EOS token.")
+            self._add_or_replace_eos_token(tokenizer, eos_token=stop_words[0])
+            stop_words = stop_words[1:]
+        if tokenizer.eos_token_id is None:
+            self._add_or_replace_eos_token(tokenizer, eos_token="<|endoftext|>")
+        if tokenizer.pad_token_id is None:
+            tokenizer.pad_token = tokenizer.eos_token
+            logger.info_rank0(f"Add pad token: {tokenizer.pad_token}")
+        if stop_words:
+            num_added_tokens = tokenizer.add_special_tokens(
+                dict(additional_special_tokens=stop_words), replace_additional_special_tokens=False
+            )
+            logger.info_rank0("Add {} to stop words.".format(",".join(stop_words)))
+            if num_added_tokens > 0:
+                logger.warning_rank0("New tokens have been added, make sure `resize_vocab` is True.")
+    @staticmethod
+    def _jinja_escape(content: str) -> str:
+        r"""Escape single quotes in content."""
+        return content.replace("'", r"\'")
+    @staticmethod
+    def _convert_slots_to_jinja(slots: "SLOTS", tokenizer: "PreTrainedTokenizer", placeholder: str = "content") -> str:
+        r"""Convert slots to jinja template."""
+        slot_items = []
+        for slot in slots:
+            if isinstance(slot, str):
+                slot_pieces = slot.split("{{content}}")
+                if slot_pieces[0]:
+                    slot_items.append("'" + Template._jinja_escape(slot_pieces[0]) + "'")
+                if len(slot_pieces) > 1:
+                    slot_items.append(placeholder)
+                    if slot_pieces[1]:
+                        slot_items.append("'" + Template._jinja_escape(slot_pieces[1]) + "'")
+            elif isinstance(slot, set):  # do not use {{ eos_token }} since it may be replaced
+                if "bos_token" in slot and tokenizer.bos_token_id is not None:
+                    slot_items.append("'" + tokenizer.bos_token + "'")
+                elif "eos_token" in slot and tokenizer.eos_token_id is not None:
+                    slot_items.append("'" + tokenizer.eos_token + "'")
+            elif isinstance(slot, dict):
+                raise ValueError("Dict is not supported.")
+        return " + ".join(slot_items)
+    def _get_jinja_template(self, tokenizer: "PreTrainedTokenizer") -> str:
+        r"""Return the jinja template."""
+        prefix = self._convert_slots_to_jinja(self.format_prefix.apply(), tokenizer)
+        system = self._convert_slots_to_jinja(self.format_system.apply(), tokenizer, placeholder="system_message")
+        user = self._convert_slots_to_jinja(self.format_user.apply(), tokenizer)
+        assistant = self._convert_slots_to_jinja(self.format_assistant.apply(), tokenizer)
+        jinja_template = ""
+        if prefix:
+            jinja_template += "{{ " + prefix + " }}"
+        if self.default_system:
+            jinja_template += "{% set system_message = '" + self._jinja_escape(self.default_system) + "' %}"
+        jinja_template += (
+            "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}"
+            "{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}"
+            "{% if system_message is defined %}{{ " + system + " }}{% endif %}"
+            "{% for message in loop_messages %}"
+            "{% set content = message['content'] %}"
+            "{% if message['role'] == 'user' %}"
+            "{{ " + user + " }}"
+            "{% elif message['role'] == 'assistant' %}"
+            "{{ " + assistant + " }}"
+            "{% endif %}"
+            "{% endfor %}"
+        )
+        return jinja_template
+    def fix_jinja_template(self, tokenizer: "PreTrainedTokenizer") -> None:
+        r"""Replace the jinja template in the tokenizer."""
+        if tokenizer.chat_template is None or self.replace_jinja_template:
+            try:
+                tokenizer.chat_template = self._get_jinja_template(tokenizer)
+            except ValueError as e:
+                logger.info_rank0(f"Cannot add this chat template to tokenizer: {e}.")
+    @staticmethod
+    def _convert_slots_to_ollama(
+        slots: "SLOTS", tokenizer: "PreTrainedTokenizer", placeholder: str = "content"
+    ) -> str:
+        r"""Convert slots to ollama template."""
+        slot_items = []
+        for slot in slots:
+            if isinstance(slot, str):
+                slot_pieces = slot.split("{{content}}")
+                if slot_pieces[0]:
+                    slot_items.append(slot_pieces[0])
+                if len(slot_pieces) > 1:
+                    slot_items.append("{{ " + placeholder + " }}")
+                    if slot_pieces[1]:
+                        slot_items.append(slot_pieces[1])
+            elif isinstance(slot, set):  # do not use {{ eos_token }} since it may be replaced
+                if "bos_token" in slot and tokenizer.bos_token_id is not None:
+                    slot_items.append(tokenizer.bos_token)
+                elif "eos_token" in slot and tokenizer.eos_token_id is not None:
+                    slot_items.append(tokenizer.eos_token)
+            elif isinstance(slot, dict):
+                raise ValueError("Dict is not supported.")
+        return "".join(slot_items)
+    def _get_ollama_template(self, tokenizer: "PreTrainedTokenizer") -> str:
+        r"""Return the ollama template."""
+        prefix = self._convert_slots_to_ollama(self.format_prefix.apply(), tokenizer)
+        system = self._convert_slots_to_ollama(self.format_system.apply(), tokenizer, placeholder=".System")
+        user = self._convert_slots_to_ollama(self.format_user.apply(), tokenizer, placeholder=".Content")
+        assistant = self._convert_slots_to_ollama(self.format_assistant.apply(), tokenizer, placeholder=".Content")
+        return (
+            f"{prefix}{{{{ if .System }}}}{system}{{{{ end }}}}"
+            f"""{{{{ range .Messages }}}}{{{{ if eq .Role "user" }}}}{user}"""
+            f"""{{{{ else if eq .Role "assistant" }}}}{assistant}{{{{ end }}}}{{{{ end }}}}"""
+        )
+    def get_ollama_modelfile(self, tokenizer: "PreTrainedTokenizer") -> str:
+        r"""Return the ollama modelfile.
+        TODO: support function calling.
+        """
+        modelfile = "# ollama modelfile auto-generated by llamafactory\n\n"
+        modelfile += f'FROM .\n\nTEMPLATE """{self._get_ollama_template(tokenizer)}"""\n\n'
+        if self.default_system:
+            modelfile += f'SYSTEM """{self.default_system}"""\n\n'
+        for stop_token_id in self.get_stop_token_ids(tokenizer):
+            modelfile += f'PARAMETER stop "{tokenizer.convert_ids_to_tokens(stop_token_id)}"\n'
+        modelfile += "PARAMETER num_ctx 4096\n"
+        return modelfile
+@dataclass
+class Llama2Template(Template):
+    r"""A template that fuse the system message to first user message."""
+    @override
+    def _encode(
+        self,
+        tokenizer: "PreTrainedTokenizer",
+        messages: list[dict[str, str]],
+        system: str,
+        tools: str,
+    ) -> list[list[int]]:
+        system = system or self.default_system
+        encoded_messages = []
+        for i, message in enumerate(messages):
+            elements = []
+            system_text = ""
+            if i == 0:
+                elements += self.format_prefix.apply()
+                if system or tools:
+                    tool_text = self.format_tools.apply(content=tools)[0] if tools else ""
+                    system_text = self.format_system.apply(content=(system + tool_text))[0]
+            if message["role"] == Role.USER:
+                elements += self.format_user.apply(content=system_text + message["content"])
+            elif message["role"] == Role.ASSISTANT:
+                elements += self.format_assistant.apply(content=message["content"])
+            elif message["role"] == Role.OBSERVATION:
+                # 统计当前已处理的observation数量
+                observation_count = 0
+                for j in range(i-1, -1, -1):
+                    if messages[j]["role"] == Role.OBSERVATION:
+                        observation_count += 1
+                # 查找最近的human消息，将其内容添加到observation中
+                human_content = ""
+                for j in range(i-1, -1, -1):
+                    if messages[j]["role"] == Role.USER:
+                        human_content = messages[j]["content"]
+                        break
+                # 查找上一个FUNCTION消息
+                previous_function = None
+                for j in range(i-1, -1, -1):
+                    if messages[j]["role"] == Role.FUNCTION:
+                        previous_function = messages[j]
+                        break
+                # 只在observation数量>=2时才查找上一个observation
+                previous_observation_content = ""
+                if observation_count >= 3:
+                    for j in range(i-1, -1, -1):
+                        if messages[j]["role"] == Role.OBSERVATION:
+                            # 检查这个observation对应的FUNCTION是不是retrieval_tool
+                            is_retrieval = False
+                            for k in range(j-1, -1, -1):
+                                if messages[k]["role"] == Role.FUNCTION:
+                                    if "retrieval_tool" in messages[k]["content"]:
+                                        is_retrieval = True
+                                    break
+                            # 只使用非retrieval_tool的observation内容
+                            if not is_retrieval:
+                                previous_observation_content = messages[j]["content"]
+                                break
+                # 根据是否有previous_observation_content来决定如何拼接
+                if previous_observation_content:
+                    # 如果存在之前的observation，说明这是multi-hop场景
+                    enhanced_content = f"用户查询: {human_content}\n\n上一个工具返回结果: {previous_observation_content}\n\n当前工具返回结果: {message['content']}"
+                    elements += self.format_observation.apply(content=enhanced_content)
+                elif human_content:
+                    # 如果没有之前的observation，说明这是single-hop场景
+                    enhanced_content = f"用户查询: {human_content}\n\n当前工具返回结果: {message['content']}"
+                    elements += self.format_observation.apply(content=enhanced_content)
+                else:
+                    elements += self.format_observation.apply(content=message["content"])
+            elif message["role"] == Role.FUNCTION:
+                # 直接使用原始内容，不做拼接处理，避免JSON解析错误
+                elements += self.format_function.apply(content=message["content"])
+            else:
+                raise NotImplementedError("Unexpected role: {}".format(message["role"]))
+            encoded_messages.append(self._convert_elements_to_ids(tokenizer, elements))
+        return encoded_messages
+    def _get_jinja_template(self, tokenizer: "PreTrainedTokenizer") -> str:
+        prefix = self._convert_slots_to_jinja(self.format_prefix.apply(), tokenizer)
+        system_message = self._convert_slots_to_jinja(
+            self.format_system.apply(), tokenizer, placeholder="system_message"
+        )
+        user_message = self._convert_slots_to_jinja(self.format_user.apply(), tokenizer)
+        assistant_message = self._convert_slots_to_jinja(self.format_assistant.apply(), tokenizer)
+        jinja_template = ""
+        if prefix:
+            jinja_template += "{{ " + prefix + " }}"
+        if self.default_system:
+            jinja_template += "{% set system_message = '" + self._jinja_escape(self.default_system) + "' %}"
+        jinja_template += (
+            "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}"
+            "{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}"
+            "{% for message in loop_messages %}"
+            "{% if loop.index0 == 0 and system_message is defined %}"
+            "{% set content = " + system_message + " + message['content'] %}"
+            "{% else %}{% set content = message['content'] %}{% endif %}"
+            "{% if message['role'] == 'user' %}"
+            "{{ " + user_message + " }}"
+            "{% elif message['role'] == 'assistant' %}"
+            "{{ " + assistant_message + " }}"
+            "{% endif %}"
+            "{% endfor %}"
+        )
+        return jinja_template
+@dataclass
+class ReasoningTemplate(Template):
+    r"""A template that add thought to assistant message."""
+    @override
+    def encode_oneturn(
+        self,
+        tokenizer: "PreTrainedTokenizer",
+        messages: list[dict[str, str]],
+        system: Optional[str] = None,
+        tools: Optional[str] = None,
+    ) -> tuple[list[int], list[int]]:
+        messages = deepcopy(messages)
+        for i in range(1, len(messages) - 2, 2):
+            messages[i]["content"] = self.remove_thought(messages[i]["content"])
+        if self.enable_thinking is False:  # remove all cot
+            messages[-1]["content"] = self.remove_thought(messages[-1]["content"])
+        prompt_ids, response_ids = super().encode_oneturn(tokenizer, messages, system, tools)
+        if (
+            self.thought_words[0] not in messages[-1]["content"]
+            and self.thought_words[1] not in messages[-1]["content"]
+        ):  # add empty cot
+            if not self.enable_thinking:  # do not compute loss
+                prompt_ids += self.get_thought_word_ids(tokenizer)
+            else:  # do compute loss
+                response_ids = self.get_thought_word_ids(tokenizer) + response_ids
+        return prompt_ids, response_ids
+    @override
+    def encode_multiturn(
+        self,
+        tokenizer: "PreTrainedTokenizer",
+        messages: list[dict[str, str]],
+        system: Optional[str] = None,
+        tools: Optional[str] = None,
+    ) -> list[tuple[list[int], list[int]]]:
+        # 添加详细日志记录
+        import os
+        from datetime import datetime
+        def log_debug(msg):
+            """简单的调试日志函数"""
+            timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
+            log_entry = f"{timestamp} | INFO | {msg}\n"
+            # 写入日志文件
+            log_file = "/home/ziqiang/LLaMA-Factory/sharegpt_pair_debug.log"
+            try:
+                with open(log_file, "a", encoding="utf-8") as f:
+                    f.write(log_entry)
+                    f.flush()  # 立即刷新到文件
+            except:
+                pass  # 忽略写文件错误
+            # 只写入日志文件，不输出到控制台
+        log_debug("=" * 80)
+        log_debug("🔄 ReasoningTemplate数据处理 - encode_multiturn开始")
+        log_debug("=" * 80)
+        log_debug(f"📥 输入messages数量: {len(messages)}")
+        log_debug(f"🧠 enable_thinking: {self.enable_thinking}")
+        # 打印原始消息内容
+        for i, msg in enumerate(messages):
+            role = msg.get('role', 'unknown')
+            content_preview = msg.get('content', '')[:8000].replace('\n', '\\n')
+            log_debug(f"  📝 Message {i+1}: {role} - {content_preview}...")
+        messages = deepcopy(messages)
+        if self.enable_thinking is False:  # remove all cot
+            for i in range(1, len(messages), 2):
+                messages[i]["content"] = self.remove_thought(messages[i]["content"])
+                log_debug(f"  🧹 移除了Message {i+1}中的思考内容")
+        encoded_messages = self._encode(tokenizer, messages, system, tools)
+        log_debug(f"🔢 编码后messages数量: {len(encoded_messages)}")
+        # 打印���码后的每个消息段
+        for i, encoded in enumerate(encoded_messages):
+            log_debug(f"  🧮 编码段 {i+1}: {len(encoded)} tokens")
+            if len(encoded) > 0:
+                # 解码前50个token看内容
+                preview = tokenizer.decode(encoded[:min(50, len(encoded))], skip_special_tokens=False)
+                preview = preview.replace("\n", "\\n")
+                log_debug(f"    📄 内容预览: {preview}...")
+        for i in range(0, len(messages), 2):
+            if (
+                self.thought_words[0] not in messages[i + 1]["content"]
+                and self.thought_words[1] not in messages[i + 1]["content"]
+            ):  # add empty cot
+                log_debug(f"  💭 为Message {i+2}添加空思考标签")
+                if not self.enable_thinking:  # do not compute loss
+                    encoded_messages[i] += self.get_thought_word_ids(tokenizer)
+                else:  # do compute loss
+                    encoded_messages[i + 1] = self.get_thought_word_ids(tokenizer) + encoded_messages[i + 1]
+        pairs = [(encoded_messages[i], encoded_messages[i + 1]) for i in range(0, len(encoded_messages), 2)]
+        log_debug("\n" + "=" * 80)
+        log_debug("✂️  Pair分割结果")
+        log_debug("=" * 80)
+        log_debug(f"📊 生成的pairs数量: {len(pairs)}")
+        for i, (source_ids, target_ids) in enumerate(pairs):
+            log_debug(f"\n--- Pair {i+1} ---")
+            log_debug(f"📏 Pair {i+1}: source={len(source_ids)} tokens, target={len(target_ids)} tokens")
+            # 解码source和target内容
+            if len(source_ids) > 0:
+                source_text = tokenizer.decode(source_ids, skip_special_tokens=False)
+                source_preview = source_text[:2000].replace("\n", "\\n")
+                log_debug(f"  📤 Source内容: {source_text}...")
+            if len(target_ids) > 0:
+                target_text = tokenizer.decode(target_ids, skip_special_tokens=False)
+                target_preview = target_text[:2000].replace("\n", "\\n")
+                log_debug(f"  📥 Target内容: {target_text}...")
+        log_debug("=" * 80)
+        log_debug("✅ ReasoningTemplate encode_multiturn完成")
+        log_debug("=" * 80)
+        return pairs
+TEMPLATES: dict[str, "Template"] = {}
+def register_template(
+    name: str,
+    format_user: Optional["Formatter"] = None,
+    format_assistant: Optional["Formatter"] = None,
+    format_system: Optional["Formatter"] = None,
+    format_function: Optional["Formatter"] = None,
+    format_observation: Optional["Formatter"] = None,
+    format_tools: Optional["Formatter"] = None,
+    format_prefix: Optional["Formatter"] = None,
+    default_system: str = "",
+    stop_words: Optional[list[str]] = None,
+    thought_words: Optional[tuple[str, str]] = None,
+    efficient_eos: bool = False,
+    replace_eos: bool = False,
+    replace_jinja_template: bool = False,
+    enable_thinking: Optional[bool] = True,
+    mm_plugin: "BasePlugin" = get_mm_plugin(name="base"),
+    template_class: type["Template"] = Template,
+) -> None:
+    r"""Register a chat template.
+    To add the following chat template:
+    ```
+    <s><user>user prompt here
+    <model>model response here</s>
+    <user>user prompt here
+    <model>model response here</s>
+    ```
+    The corresponding code should be:
+    ```
+    register_template(
+        name="custom",
+        format_user=StringFormatter(slots=["<user>{{content}}\n<model>"]),
+        format_assistant=StringFormatter(slots=["{{content}}</s>\n"]),
+        format_prefix=EmptyFormatter("<s>"),
+    )
+    ```
+    """
+    if name in TEMPLATES:
+        raise ValueError(f"Template {name} already exists.")
+    default_slots = ["{{content}}"] if efficient_eos else ["{{content}}", {"eos_token"}]
+    default_user_formatter = StringFormatter(slots=["{{content}}"])
+    default_assistant_formatter = StringFormatter(slots=default_slots)
+    if format_assistant is not None:
+        default_function_formatter = FunctionFormatter(slots=format_assistant.slots, tool_format="default")
+    else:
+        default_function_formatter = FunctionFormatter(slots=default_slots, tool_format="default")
+    default_tool_formatter = ToolFormatter(tool_format="default")
+    default_prefix_formatter = EmptyFormatter()
+    TEMPLATES[name] = template_class(
+        format_user=format_user or default_user_formatter,
+        format_assistant=format_assistant or default_assistant_formatter,
+        format_system=format_system or default_user_formatter,
+        format_function=format_function or default_function_formatter,
+        format_observation=format_observation or format_user or default_user_formatter,
+        format_tools=format_tools or default_tool_formatter,
+        format_prefix=format_prefix or default_prefix_formatter,
+        default_system=default_system,
+        stop_words=stop_words or [],
+        thought_words=thought_words or ("<think>", "</think>"),
+        efficient_eos=efficient_eos,
+        replace_eos=replace_eos,
+        replace_jinja_template=replace_jinja_template,
+        enable_thinking=enable_thinking,
+        mm_plugin=mm_plugin,
+    )
+def parse_template(tokenizer: "PreTrainedTokenizer") -> "Template":
+    r"""Extract a chat template from the tokenizer."""
+    def find_diff(short_str: str, long_str: str) -> str:
+        i, j = 0, 0
+        diff = ""
+        while i < len(short_str) and j < len(long_str):
+            if short_str[i] == long_str[j]:
+                i += 1
+                j += 1
+            else:
+                diff += long_str[j]
+                j += 1
+        return diff
+    prefix = tokenizer.decode(tokenizer.encode(""))
+    messages = [{"role": "system", "content": "{{content}}"}]
+    system_slot = tokenizer.apply_chat_template(messages, add_generation_prompt=False, tokenize=False)[len(prefix) :]
+    messages = [{"role": "system", "content": ""}, {"role": "user", "content": "{{content}}"}]
+    user_slot_empty_system = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
+    user_slot_empty_system = user_slot_empty_system[len(prefix) :]
+    messages = [{"role": "user", "content": "{{content}}"}]
+    user_slot = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
+    user_slot = user_slot[len(prefix) :]
+    messages = [{"role": "user", "content": "{{content}}"}, {"role": "assistant", "content": "{{content}}"}]
+    assistant_slot = tokenizer.apply_chat_template(messages, add_generation_prompt=False, tokenize=False)
+    assistant_slot = assistant_slot[len(prefix) + len(user_slot) :]
+    template_class = ReasoningTemplate if "<think>" in assistant_slot else Template
+    assistant_slot = assistant_slot.replace("<think>", "").replace("</think>", "").lstrip("\n")  # remove thought tags
+    if len(user_slot) > len(user_slot_empty_system):
+        default_system = find_diff(user_slot_empty_system, user_slot)
+        sole_system = system_slot.replace("{{content}}", default_system, 1)
+        user_slot = user_slot[len(sole_system) :]
+    else:  # if defaut_system is empty, user_slot_empty_system will be longer than user_slot
+        default_system = ""
+    return template_class(
+        format_user=StringFormatter(slots=[user_slot]),
+        format_assistant=StringFormatter(slots=[assistant_slot]),
+        format_system=StringFormatter(slots=[system_slot]),
+        format_function=FunctionFormatter(slots=[assistant_slot], tool_format="default"),
+        format_observation=StringFormatter(slots=[user_slot]),
+        format_tools=ToolFormatter(tool_format="default"),
+        format_prefix=EmptyFormatter(slots=[prefix]) if prefix else EmptyFormatter(),
+        default_system=default_system,
+        stop_words=[],
+        thought_words=("<think>", "</think>"),
+        efficient_eos=False,
+        replace_eos=False,
+        replace_jinja_template=False,
+        enable_thinking=True,
+        mm_plugin=get_mm_plugin(name="base"),
+    )
+def get_template_and_fix_tokenizer(tokenizer: "PreTrainedTokenizer", data_args: "DataArguments") -> "Template":
+    r"""Get chat template and fixes the tokenizer."""
+    if data_args.template is None:
+        if isinstance(tokenizer.chat_template, str):
+            logger.warning_rank0("`template` was not specified, try parsing the chat template from the tokenizer.")
+            template = parse_template(tokenizer)
+        else:
+            logger.warning_rank0("`template` was not specified, use `empty` template.")
+            template = TEMPLATES["empty"]  # placeholder
+    else:
+        if data_args.template not in TEMPLATES:
+            raise ValueError(f"Template {data_args.template} does not exist.")
+        template = TEMPLATES[data_args.template]
+    if data_args.train_on_prompt and template.efficient_eos:
+        raise ValueError("Current template does not support `train_on_prompt`.")
+    if data_args.tool_format is not None:
+        logger.info_rank0(f"Using tool format: {data_args.tool_format}.")
+        default_slots = ["{{content}}"] if template.efficient_eos else ["{{content}}", {"eos_token"}]
+        template.format_function = FunctionFormatter(slots=default_slots, tool_format=data_args.tool_format)
+        template.format_tools = ToolFormatter(tool_format=data_args.tool_format)
+    if data_args.default_system is not None:
+        logger.info_rank0(f"Using default system message: {data_args.default_system}.")
+        template.default_system = data_args.default_system
+    template.enable_thinking = data_args.enable_thinking
+    template.fix_special_tokens(tokenizer)
+    template.fix_jinja_template(tokenizer)
+    return template
+register_template(
+    name="alpaca",
+    format_user=StringFormatter(slots=["### Instruction:\n{{content}}\n\n### Response:\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}", {"eos_token"}, "\n\n"]),
+    default_system=(
+        "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n"
+    ),
+    replace_jinja_template=True,
+)
+register_template(
+    name="aquila",
+    format_user=StringFormatter(slots=["Human: {{content}}###Assistant:"]),
+    format_assistant=StringFormatter(slots=["{{content}}###"]),
+    format_system=StringFormatter(slots=["System: {{content}}###"]),
+    default_system=(
+        "A chat between a curious human and an artificial intelligence assistant. "
+        "The assistant gives helpful, detailed, and polite answers to the human's questions."
+    ),
+    stop_words=["</s>"],
+)
+register_template(
+    name="atom",
+    format_user=StringFormatter(
+        slots=[{"bos_token"}, "Human: {{content}}\n", {"eos_token"}, {"bos_token"}, "Assistant:"]
+    ),
+    format_assistant=StringFormatter(slots=["{{content}}\n", {"eos_token"}]),
+)
+register_template(
+    name="baichuan",
+    format_user=StringFormatter(slots=[{"token": "<reserved_102>"}, "{{content}}", {"token": "<reserved_103>"}]),
+    efficient_eos=True,
+)
+register_template(
+    name="baichuan2",
+    format_user=StringFormatter(slots=["<reserved_106>{{content}}<reserved_107>"]),
+    efficient_eos=True,
+)
+register_template(
+    name="bailing",
+    format_user=StringFormatter(slots=["<role>HUMAN</role>{{content}}<role>ASSISTANT</role>"]),
+    format_system=StringFormatter(slots=["<role>SYSTEM</role>{{content}}"]),
+    format_observation=StringFormatter(slots=["<role>OBSERVATION</role>{{content}}<role>ASSISTANT</role>"]),
+    stop_words=["<|endoftext|>"],
+    efficient_eos=True,
+)
+register_template(
+    name="belle",
+    format_user=StringFormatter(slots=["Human: {{content}}\n\nBelle: "]),
+    format_assistant=StringFormatter(slots=["{{content}}", {"eos_token"}, "\n\n"]),
+    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
+)
+register_template(
+    name="bluelm",
+    format_user=StringFormatter(slots=[{"token": "[|Human|]:"}, "{{content}}", {"token": "[|AI|]:"}]),
+)
+register_template(
+    name="breeze",
+    format_user=StringFormatter(slots=["[INST] {{content}} [/INST] "]),
+    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
+    efficient_eos=True,
+)
+register_template(
+    name="chatglm2",
+    format_user=StringFormatter(slots=["[Round {{idx}}]\n\n问：{{content}}\n\n答："]),
+    format_prefix=EmptyFormatter(slots=[{"token": "[gMASK]"}, {"token": "sop"}]),
+    efficient_eos=True,
+)
+register_template(
+    name="chatglm3",
+    format_user=StringFormatter(slots=[{"token": "<|user|>"}, "\n", "{{content}}", {"token": "<|assistant|>"}]),
+    format_assistant=StringFormatter(slots=["\n", "{{content}}"]),
+    format_system=StringFormatter(slots=[{"token": "<|system|>"}, "\n", "{{content}}"]),
+    format_function=FunctionFormatter(slots=["{{content}}"], tool_format="glm4"),
+    format_observation=StringFormatter(
+        slots=[{"token": "<|observation|>"}, "\n", "{{content}}", {"token": "<|assistant|>"}]
+    ),
+    format_tools=ToolFormatter(tool_format="glm4"),
+    format_prefix=EmptyFormatter(slots=[{"token": "[gMASK]"}, {"token": "sop"}]),
+    stop_words=["<|user|>", "<|observation|>"],
+    efficient_eos=True,
+)
+register_template(
+    name="chatml",
+    format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]),
+    format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
+    format_observation=StringFormatter(slots=["<|im_start|>tool\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
+    stop_words=["<|im_end|>", "<|im_start|>"],
+    replace_eos=True,
+    replace_jinja_template=True,
+)
+# copied from chatml template
+register_template(
+    name="chatml_de",
+    format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]),
+    format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
+    format_observation=StringFormatter(slots=["<|im_start|>tool\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
+    default_system="Du bist ein freundlicher und hilfsbereiter KI-Assistent.",
+    stop_words=["<|im_end|>", "<|im_start|>"],
+    replace_eos=True,
+    replace_jinja_template=True,
+)
+register_template(
+    name="codegeex2",
+    format_prefix=EmptyFormatter(slots=[{"token": "[gMASK]"}, {"token": "sop"}]),
+)
+register_template(
+    name="codegeex4",
+    format_user=StringFormatter(slots=["<|user|>\n{{content}}<|assistant|>\n"]),
+    format_system=StringFormatter(slots=["<|system|>\n{{content}}"]),
+    format_function=FunctionFormatter(slots=["{{content}}"], tool_format="glm4"),
+    format_observation=StringFormatter(slots=["<|observation|>\n{{content}}<|assistant|>\n"]),
+    format_tools=ToolFormatter(tool_format="glm4"),
+    format_prefix=EmptyFormatter(slots=["[gMASK]<sop>"]),
+    default_system=(
+        "你是���位智能编程助手，你叫CodeGeeX。你会为用户回答关于编程、代码、计算机方面的任何问题，"
+        "并提供格式规范、可以执行、准确安全的代码，并在必要时提供详细的解释。"
+    ),
+    stop_words=["<|user|>", "<|observation|>"],
+    efficient_eos=True,
+)
+register_template(
+    name="cohere",
+    format_user=StringFormatter(
+        slots=[
+            (
+                "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{{content}}<|END_OF_TURN_TOKEN|>"
+                "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"
+            )
+        ]
+    ),
+    format_system=StringFormatter(slots=["<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{content}}<|END_OF_TURN_TOKEN|>"]),
+    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
+)
+register_template(
+    name="cpm",
+    format_user=StringFormatter(slots=["<用户>{{content}}<AI>"]),
+    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
+)
+# copied from chatml template
+register_template(
+    name="cpm3",
+    format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]),
+    format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
+    format_observation=StringFormatter(slots=["<|im_start|>tool\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
+    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
+    stop_words=["<|im_end|>"],
+)
+# copied from chatml template
+register_template(
+    name="cpm4",
+    format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]),
+    format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
+    format_observation=StringFormatter(slots=["<|im_start|>tool\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
+    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
+    stop_words=["<|im_end|>"],
+)
+# copied from chatml template
+register_template(
+    name="dbrx",
+    format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]),
+    format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
+    format_observation=StringFormatter(slots=["<|im_start|>tool\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
+    default_system=(
+        "You are DBRX, created by Databricks. You were last updated in December 2023. "
+        "You answer questions based on information available up to that point.\n"
+        "YOU PROVIDE SHORT RESPONSES TO SHORT QUESTIONS OR STATEMENTS, but provide thorough "
+        "responses to more complex and open-ended questions.\nYou assist with various tasks, "
+        "from writing to coding (using markdown for code blocks — remember to use ``` with "
+        "code, JSON, and tables).\n(You do not have real-time data access or code execution "
+        "capabilities. You avoid stereotyping and provide balanced perspectives on "
+        "controversial topics. You do not provide song lyrics, poems, or news articles and "
+        "do not divulge details of your training data.)\nThis is your system prompt, "
+        "guiding your responses. Do not reference it, just respond to the user. If you find "
+        "yourself talking about this message, stop. You should be responding appropriately "
+        "and usually that means not mentioning this.\nYOU DO NOT MENTION ANY OF THIS INFORMATION "
+        "ABOUT YOURSELF UNLESS THE INFORMATION IS DIRECTLY PERTINENT TO THE USER'S QUERY."
+    ),
+    stop_words=["<|im_end|>"],
+    replace_eos=True,
+)
+register_template(
+    name="deepseek",
+    format_user=StringFormatter(slots=["User: {{content}}\n\nAssistant:"]),
+    format_system=StringFormatter(slots=["{{content}}\n\n"]),
+    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
+)
+register_template(
+    name="deepseek3",
+    format_user=StringFormatter(slots=["{{content}}"]),
+    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
+)
+# copied from deepseek3 template
+register_template(
+    name="deepseekr1",
+    format_user=StringFormatter(slots=["{{content}}"]),
+    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
+    template_class=ReasoningTemplate,
+)
+register_template(
+    name="deepseekcoder",
+    format_user=StringFormatter(slots=["### Instruction:\n{{content}}\n### Response:"]),
+    format_assistant=StringFormatter(slots=["\n{{content}}\n"]),
+    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
+    default_system=(
+        "You are an AI programming assistant, utilizing the DeepSeek Coder model, "
+        "developed by DeepSeek Company, and you only answer questions related to computer science. "
+        "For politically sensitive questions, security and privacy issues, "
+        "and other non-computer science questions, you will refuse to answer.\n"
+    ),
+)
+register_template(
+    name="default",
+    format_user=StringFormatter(slots=["Human: {{content}}", {"eos_token"}, "\nAssistant:"]),
+    format_assistant=StringFormatter(slots=["{{content}}", {"eos_token"}, "\n"]),
+    format_system=StringFormatter(slots=["System: {{content}}", {"eos_token"}, "\n"]),
+    replace_jinja_template=True,
+)
+register_template(
+    name="empty",
+    format_assistant=StringFormatter(slots=["{{content}}"]),
+)
+register_template(
+    name="exaone",
+    format_user=StringFormatter(slots=["[|user|]{{content}}\n[|assistant|]"]),
+    format_assistant=StringFormatter(slots=["{{content}}", {"eos_token"}, "\n"]),
+    format_system=StringFormatter(slots=["[|system|]{{content}}[|endofturn|]\n"]),
+)
+register_template(
+    name="falcon",
+    format_user=StringFormatter(slots=["User: {{content}}\nFalcon:"]),
+    format_assistant=StringFormatter(slots=["{{content}}\n"]),
+    efficient_eos=True,
+)
+# copied from chatml template
+register_template(
+    name="falcon_h1",
+    format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]),
+    format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
+    format_observation=StringFormatter(slots=["<|im_start|>tool\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
+    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
+    stop_words=["<|im_end|>", "<|end_of_text|>"],
+)
+register_template(
+    name="fewshot",
+    format_assistant=StringFormatter(slots=["{{content}}\n\n"]),
+    efficient_eos=True,
+    replace_jinja_template=True,
+)
+register_template(
+    name="gemma",
+    format_user=StringFormatter(slots=["<start_of_turn>user\n{{content}}<end_of_turn>\n<start_of_turn>model\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}<end_of_turn>\n"]),
+    format_system=StringFormatter(slots=["{{content}}\n\n"]),
+    format_observation=StringFormatter(
+        slots=["<start_of_turn>tool\n{{content}}<end_of_turn>\n<start_of_turn>model\n"]
+    ),
+    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
+    stop_words=["<end_of_turn>"],
+    replace_eos=True,
+    template_class=Llama2Template,
+)
+# copied from gemma template
+register_template(
+    name="gemma2",
+    format_user=StringFormatter(slots=["<start_of_turn>user\n{{content}}<end_of_turn>\n<start_of_turn>model\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}<end_of_turn>\n"]),
+    format_system=StringFormatter(slots=["{{content}}\n\n"]),
+    format_observation=StringFormatter(
+        slots=["<start_of_turn>tool\n{{content}}<end_of_turn>\n<start_of_turn>model\n"]
+    ),
+    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
+    stop_words=["<eos>", "<end_of_turn>"],
+    efficient_eos=True,
+    template_class=Llama2Template,
+)
+# copied from gemma template
+register_template(
+    name="gemma3",
+    format_user=StringFormatter(slots=["<start_of_turn>user\n{{content}}<end_of_turn>\n<start_of_turn>model\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}<end_of_turn>\n"]),
+    format_system=StringFormatter(slots=["{{content}}\n\n"]),
+    format_observation=StringFormatter(
+        slots=["<start_of_turn>tool\n{{content}}<end_of_turn>\n<start_of_turn>model\n"]
+    ),
+    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
+    stop_words=["<end_of_turn>"],
+    replace_eos=True,
+    mm_plugin=get_mm_plugin("gemma3", image_token="<image_soft_token>"),
+    template_class=Llama2Template,
+)
+register_template(
+    name="gemma3n",
+    format_user=StringFormatter(slots=["<start_of_turn>user\n{{content}}<end_of_turn>\n<start_of_turn>model\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}<end_of_turn>\n"]),
+    format_system=StringFormatter(slots=["{{content}}\n\n"]),
+    format_observation=StringFormatter(
+        slots=["<start_of_turn>tool\n{{content}}<end_of_turn>\n<start_of_turn>model\n"]
+    ),
+    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
+    stop_words=["<end_of_turn>"],
+    replace_eos=True,
+    mm_plugin=get_mm_plugin("gemma3n", image_token="<image_soft_token>", audio_token="<audio_soft_token>"),
+    template_class=Llama2Template,
+)
+register_template(
+    name="glm4",
+    format_user=StringFormatter(slots=["<|user|>\n{{content}}<|assistant|>"]),
+    format_assistant=StringFormatter(slots=["\n{{content}}"]),
+    format_system=StringFormatter(slots=["<|system|>\n{{content}}"]),
+    format_function=FunctionFormatter(slots=["{{content}}"], tool_format="glm4"),
+    format_observation=StringFormatter(slots=["<|observation|>\n{{content}}<|assistant|>"]),
+    format_tools=ToolFormatter(tool_format="glm4"),
+    format_prefix=EmptyFormatter(slots=["[gMASK]<sop>"]),
+    stop_words=["<|user|>", "<|observation|>"],
+    efficient_eos=True,
+)
+# copied from glm4 template
+register_template(
+    name="glm4v",
+    format_user=StringFormatter(slots=["<|user|>\n{{content}}<|assistant|>"]),
+    format_assistant=StringFormatter(slots=["\n{{content}}"]),
+    format_system=StringFormatter(slots=["<|system|>\n{{content}}"]),
+    format_function=FunctionFormatter(slots=["{{content}}"], tool_format="glm4"),
+    format_observation=StringFormatter(slots=["<|observation|>\n{{content}}<|assistant|>"]),
+    format_tools=ToolFormatter(tool_format="glm4"),
+    format_prefix=EmptyFormatter(slots=["[gMASK]<sop>"]),
+    stop_words=["<|user|>", "<|observation|>", "</answer>"],
+    efficient_eos=True,
+    mm_plugin=get_mm_plugin(name="glm4v", image_token="<|image|>", video_token="<|video|>"),
+    template_class=ReasoningTemplate,
+)
+# copied from glm4 template
+register_template(
+    name="glmz1",
+    format_user=StringFormatter(slots=["<|user|>\n{{content}}<|assistant|>"]),
+    format_assistant=StringFormatter(slots=["\n{{content}}"]),
+    format_system=StringFormatter(slots=["<|system|>\n{{content}}"]),
+    format_function=FunctionFormatter(slots=["{{content}}"], tool_format="glm4"),
+    format_observation=StringFormatter(slots=["<|observation|>\n{{content}}<|assistant|>"]),
+    format_tools=ToolFormatter(tool_format="glm4"),
+    format_prefix=EmptyFormatter(slots=["[gMASK]<sop>"]),
+    stop_words=["<|user|>", "<|observation|>"],
+    efficient_eos=True,
+    template_class=ReasoningTemplate,
+)
+register_template(
+    name="granite3",
+    format_user=StringFormatter(
+        slots=[
+            "<|start_of_role|>user<|end_of_role|>{{content}}<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>"
+        ]
+    ),
+    format_assistant=StringFormatter(slots=["{{content}}<|end_of_text|>\n"]),
+    format_system=StringFormatter(slots=["<|start_of_role|>system<|end_of_role|>{{content}}<|end_of_text|>\n"]),
+)
+register_template(
+    name="granite3_vision",
+    format_user=StringFormatter(slots=["<|user|>\n{{content}}\n<|assistant|>\n"]),
+    format_system=StringFormatter(slots=["<|system|>\n{{content}}\n"]),
+    default_system=(
+        "A chat between a curious user and an artificial intelligence assistant. "
+        "The assistant gives helpful, detailed, and polite answers to the user's questions."
+    ),
+    mm_plugin=get_mm_plugin(name="llava_next", image_token="<image>"),
+)
+register_template(
+    name="index",
+    format_user=StringFormatter(slots=["reserved_0{{content}}reserved_1"]),
+    format_system=StringFormatter(slots=["<unk>{{content}}"]),
+    efficient_eos=True,
+)
+register_template(
+    name="hunyuan",
+    format_user=StringFormatter(slots=["<|bos|>user\n{{content}}<|eos|>\n<|bos|>assistant\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}<|eos|>\n"]),
+    format_system=StringFormatter(slots=["<|bos|>system\n{{content}}<|eos|>\n"]),
+    format_prefix=EmptyFormatter(slots=["<|bos|>"]),
+    stop_words=["<|eos|>"],
+)
+register_template(
+    name="intern",
+    format_user=StringFormatter(slots=["<|User|>:{{content}}\n<|Bot|>:"]),
+    format_assistant=StringFormatter(slots=["{{content}}<eoa>\n"]),
+    format_system=StringFormatter(slots=["<|System|>:{{content}}\n"]),
+    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
+    default_system=(
+        "You are an AI assistant whose name is InternLM (书生·浦语).\n"
+        "- InternLM (书生·浦语) is a conversational language model that is developed by Shanghai AI Laboratory "
+        "(上海人工智能实验室). It is designed to be helpful, honest, and harmless.\n"
+        "- InternLM (书生·浦语) can understand and communicate fluently in the language "
+        "chosen by the user such as English and 中文."
+    ),
+    stop_words=["<eoa>"],
+)
+register_template(
+    name="intern2",
+    format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]),
+    format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
+    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
+    default_system=(
+        "You are an AI assistant whose name is InternLM (书生·浦语).\n"
+        "- InternLM (书生·浦语) is a conversational language model that is developed by Shanghai AI Laboratory "
+        "(上海人工智能实验室). It is designed to be helpful, honest, and harmless.\n"
+        "- InternLM (书生·浦语) can understand and communicate fluently in the language "
+        "chosen by the user such as English and 中文."
+    ),
+    stop_words=["<|im_end|>"],
+)
+register_template(
+    name="intern_vl",
+    format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]),
+    format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
+    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
+    default_system=(
+        "你是书生·万象，英文名是InternVL，是由上海人工智能实验室、清华大学及多家合作单位联合开发��多模态大语言模型。"
+    ),
+    stop_words=["<|im_end|>"],
+    mm_plugin=get_mm_plugin(name="intern_vl", image_token="<image>", video_token="<video>"),
+)
+register_template(
+    name="kimi_vl",
+    format_user=StringFormatter(
+        slots=["<|im_user|>user<|im_middle|>{{content}}<|im_end|><|im_assistant|>assistant<|im_middle|>"]
+    ),
+    format_assistant=StringFormatter(slots=["{{content}}<|im_end|>"]),
+    format_system=StringFormatter(slots=["<|im_system|>system<|im_middle|>{{content}}<|im_end|>"]),
+    default_system="You are a helpful assistant",
+    stop_words=["<|im_end|>"],
+    thought_words=("◁think▷", "◁/think▷"),
+    mm_plugin=get_mm_plugin("kimi_vl", image_token="<|media_pad|>"),
+    template_class=ReasoningTemplate,
+)
+register_template(
+    name="llama2",
+    format_user=StringFormatter(slots=[{"bos_token"}, "[INST] {{content}} [/INST]"]),
+    format_system=StringFormatter(slots=["<<SYS>>\n{{content}}\n<</SYS>>\n\n"]),
+    template_class=Llama2Template,
+)
+# copied from llama2 template
+register_template(
+    name="llama2_zh",
+    format_user=StringFormatter(slots=[{"bos_token"}, "[INST] {{content}} [/INST]"]),
+    format_system=StringFormatter(slots=["<<SYS>>\n{{content}}\n<</SYS>>\n\n"]),
+    default_system="You are a helpful assistant. 你是一个乐于助人的助手。",
+    template_class=Llama2Template,
+)
+register_template(
+    name="llama3",
+    format_user=StringFormatter(
+        slots=[
+            (
+                "<|start_header_id|>user<|end_header_id|>\n\n{{content}}<|eot_id|>"
+                "<|start_header_id|>assistant<|end_header_id|>\n\n"
+            )
+        ]
+    ),
+    format_assistant=StringFormatter(slots=["{{content}}<|eot_id|>"]),
+    format_system=StringFormatter(slots=["<|start_header_id|>system<|end_header_id|>\n\n{{content}}<|eot_id|>"]),
+    format_function=FunctionFormatter(slots=["{{content}}<|eot_id|>"], tool_format="llama3"),
+    format_observation=StringFormatter(
+        slots=[
+            (
+                "<|start_header_id|>ipython<|end_header_id|>\n\n{{content}}<|eot_id|>"
+                "<|start_header_id|>assistant<|end_header_id|>\n\n"
+            )
+        ]
+    ),
+    format_tools=ToolFormatter(tool_format="llama3"),
+    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
+    stop_words=["<|eot_id|>", "<|eom_id|>"],
+    replace_eos=True,
+)
+register_template(
+    name="llama4",
+    format_user=StringFormatter(
+        slots=["<|header_start|>user<|header_end|>\n\n{{content}}<|eot|><|header_start|>assistant<|header_end|>\n\n"]
+    ),
+    format_assistant=StringFormatter(slots=["{{content}}<|eot|>"]),
+    format_system=StringFormatter(slots=["<|header_start|>system<|header_end|>\n\n{{content}}<|eot|>"]),
+    format_function=FunctionFormatter(slots=["{{content}}<|eot|>"], tool_format="llama3"),
+    format_observation=StringFormatter(
+        slots=[
+            "<|header_start|>ipython<|header_end|>\n\n{{content}}<|eot|><|header_start|>assistant<|header_end|>\n\n"
+        ]
+    ),
+    format_tools=ToolFormatter(tool_format="llama3"),
+    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
+    stop_words=["<|eot|>", "<|eom|>"],
+    replace_eos=True,
+    mm_plugin=get_mm_plugin(name="llama4", image_token="<|image|>"),
+)
+# copied from llama3 template
+register_template(
+    name="mllama",
+    format_user=StringFormatter(
+        slots=[
+            (
+                "<|start_header_id|>user<|end_header_id|>\n\n{{content}}<|eot_id|>"
+                "<|start_header_id|>assistant<|end_header_id|>\n\n"
+            )
+        ]
+    ),
+    format_assistant=StringFormatter(slots=["{{content}}<|eot_id|>"]),
+    format_system=StringFormatter(slots=["<|start_header_id|>system<|end_header_id|>\n\n{{content}}<|eot_id|>"]),
+    format_function=FunctionFormatter(slots=["{{content}}<|eot_id|>"], tool_format="llama3"),
+    format_observation=StringFormatter(
+        slots=[
+            (
+                "<|start_header_id|>ipython<|end_header_id|>\n\n{{content}}<|eot_id|>"
+                "<|start_header_id|>assistant<|end_header_id|>\n\n"
+            )
+        ]
+    ),
+    format_tools=ToolFormatter(tool_format="llama3"),
+    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
+    stop_words=["<|eot_id|>", "<|eom_id|>"],
+    replace_eos=True,
+    mm_plugin=get_mm_plugin(name="mllama", image_token="<|image|>"),
+)
+register_template(
+    name="moonlight",
+    format_user=StringFormatter(
+        slots=["<|im_user|>user<|im_middle|>{{content}}<|im_end|><|im_assistant|>assistant<|im_middle|>"]
+    ),
+    format_assistant=StringFormatter(slots=["{{content}}<|im_end|>"]),
+    format_system=StringFormatter(slots=["<|im_system|>system<|im_middle|>{{content}}<|im_end|>"]),
+    default_system="You are a helpful assistant provided by Moonshot-AI.",
+    stop_words=["<|im_end|>"],
+    replace_eos=True,
+)
+# copied from vicuna template
+register_template(
+    name="llava",
+    format_user=StringFormatter(slots=["USER: {{content}} ASSISTANT:"]),
+    default_system=(
+        "A chat between a curious user and an artificial intelligence assistant. "
+        "The assistant gives helpful, detailed, and polite answers to the user's questions."
+    ),
+    mm_plugin=get_mm_plugin(name="llava", image_token="<image>"),
+)
+# copied from vicuna template
+register_template(
+    name="llava_next",
+    format_user=StringFormatter(slots=["USER: {{content}} ASSISTANT:"]),
+    default_system=(
+        "A chat between a curious user and an artificial intelligence assistant. "
+        "The assistant gives helpful, detailed, and polite answers to the user's questions."
+    ),
+    mm_plugin=get_mm_plugin(name="llava_next", image_token="<image>"),
+)
+# copied from llama3 template
+register_template(
+    name="llava_next_llama3",
+    format_user=StringFormatter(
+        slots=[
+            (
+                "<|start_header_id|>user<|end_header_id|>\n\n{{content}}<|eot_id|>"
+                "<|start_header_id|>assistant<|end_header_id|>\n\n"
+            )
+        ]
+    ),
+    format_assistant=StringFormatter(slots=["{{content}}<|eot_id|>"]),
+    format_system=StringFormatter(slots=["<|start_header_id|>system<|end_header_id|>\n\n{{content}}<|eot_id|>"]),
+    format_function=FunctionFormatter(slots=["{{content}}<|eot_id|>"], tool_format="llama3"),
+    format_observation=StringFormatter(
+        slots=[
+            (
+                "<|start_header_id|>ipython<|end_header_id|>\n\n{{content}}<|eot_id|>"
+                "<|start_header_id|>assistant<|end_header_id|>\n\n"
+            )
+        ]
+    ),
+    format_tools=ToolFormatter(tool_format="llama3"),
+    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
+    stop_words=["<|eot_id|>", "<|eom_id|>"],
+    replace_eos=True,
+    mm_plugin=get_mm_plugin(name="llava_next", image_token="<image>"),
+)
+# copied from mistral template
+register_template(
+    name="llava_next_mistral",
+    format_user=StringFormatter(slots=["[INST] {{content}}[/INST]"]),
+    format_assistant=StringFormatter(slots=[" {{content}}", {"eos_token"}]),
+    format_system=StringFormatter(slots=["{{content}}\n\n"]),
+    format_function=FunctionFormatter(slots=["[TOOL_CALLS] {{content}}", {"eos_token"}], tool_format="mistral"),
+    format_observation=StringFormatter(slots=["""[TOOL_RESULTS] {"content": {{content}}}[/TOOL_RESULTS]"""]),
+    format_tools=ToolFormatter(tool_format="mistral"),
+    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
+    mm_plugin=get_mm_plugin(name="llava_next", image_token="<image>"),
+    template_class=Llama2Template,
+)
+# copied from qwen template
+register_template(
+    name="llava_next_qwen",
+    format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]),
+    format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
+    format_function=FunctionFormatter(slots=["{{content}}<|im_end|>\n"], tool_format="qwen"),
+    format_observation=StringFormatter(
+        slots=["<|im_start|>user\n<tool_response>\n{{content}}\n</tool_response><|im_end|>\n<|im_start|>assistant\n"]
+    ),
+    format_tools=ToolFormatter(tool_format="qwen"),
+    default_system="You are a helpful assistant.",
+    stop_words=["<|im_end|>"],
+    replace_eos=True,
+    mm_plugin=get_mm_plugin(name="llava_next", image_token="<image>"),
+)
+# copied from chatml template
+register_template(
+    name="llava_next_yi",
+    format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]),
+    format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
+    stop_words=["<|im_end|>"],
+    mm_plugin=get_mm_plugin(name="llava_next", image_token="<image>"),
+)
+# copied from vicuna template
+register_template(
+    name="llava_next_video",
+    format_user=StringFormatter(slots=["USER: {{content}} ASSISTANT:"]),
+    default_system=(
+        "A chat between a curious user and an artificial intelligence assistant. "
+        "The assistant gives helpful, detailed, and polite answers to the user's questions."
+    ),
+    mm_plugin=get_mm_plugin(name="llava_next_video", image_token="<image>", video_token="<video>"),
+)
+# copied from mistral template
+register_template(
+    name="llava_next_video_mistral",
+    format_user=StringFormatter(slots=["[INST] {{content}}[/INST]"]),
+    format_assistant=StringFormatter(slots=[" {{content}}", {"eos_token"}]),
+    format_system=StringFormatter(slots=["{{content}}\n\n"]),
+    format_function=FunctionFormatter(slots=["[TOOL_CALLS] {{content}}", {"eos_token"}], tool_format="mistral"),
+    format_observation=StringFormatter(slots=["""[TOOL_RESULTS] {"content": {{content}}}[/TOOL_RESULTS]"""]),
+    format_tools=ToolFormatter(tool_format="mistral"),
+    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
+    mm_plugin=get_mm_plugin(name="llava_next_video", image_token="<image>", video_token="<video>"),
+    template_class=Llama2Template,
+)
+# copied from chatml template
+register_template(
+    name="llava_next_video_yi",
+    format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]),
+    format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
+    stop_words=["<|im_end|>"],
+    mm_plugin=get_mm_plugin(name="llava_next_video", image_token="<image>", video_token="<video>"),
+)
+# copied from chatml template
+register_template(
+    name="marco",
+    format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]),
+    format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
+    format_observation=StringFormatter(slots=["<|im_start|>tool\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
+    default_system=(
+        "你是一个经过良好训练的AI助手，你的名字是Marco-o1."
+        "由阿里国际数字商业集团的AI Business创造.\n## 重要！！！！！\n"
+        "当你回答问题时，你的思考应该在<Thought>内完成，<Output>内输出你的结果。\n"
+        "<Thought>应该尽可能是英文，但是有2个特例，一个是对原文中的引用，另一个是是数学应该使用markdown格式，<Output>内的输出需要遵循用户输入的语言。\n"
+    ),
+    stop_words=["<|im_end|>"],
+)
+# copied from qwen template
+register_template(
+    name="mimo",
+    format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]),
+    format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
+    format_function=FunctionFormatter(slots=["{{content}}<|im_end|>\n"], tool_format="qwen"),
+    format_observation=StringFormatter(
+        slots=["<|im_start|>user\n<tool_response>\n{{content}}\n</tool_response><|im_end|>\n<|im_start|>assistant\n"]
+    ),
+    format_tools=ToolFormatter(tool_format="qwen"),
+    default_system="You are a helpful assistant.",
+    stop_words=["<|im_end|>"],
+    replace_eos=True,
+    template_class=ReasoningTemplate,
+)
+# copied from qwen2vl
+register_template(
+    name="mimo_vl",
+    format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]),
+    format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
+    format_function=FunctionFormatter(slots=["{{content}}<|im_end|>\n"], tool_format="qwen"),
+    format_observation=StringFormatter(
+        slots=["<|im_start|>user\n<tool_response>\n{{content}}\n</tool_response><|im_end|>\n<|im_start|>assistant\n"]
+    ),
+    format_tools=ToolFormatter(tool_format="qwen"),
+    default_system="You are MiMo, an AI assistant developed by Xiaomi.",
+    stop_words=["<|im_end|>"],
+    replace_eos=True,
+    mm_plugin=get_mm_plugin(name="qwen2_vl", image_token="<|image_pad|>", video_token="<|video_pad|>"),
+    template_class=ReasoningTemplate,
+)
+# copied from chatml template
+register_template(
+    name="minicpm_v",
+    format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]),
+    format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
+    stop_words=["<|im_end|>"],
+    default_system="You are a helpful assistant.",
+    mm_plugin=get_mm_plugin(name="minicpm_v", image_token="<image>", video_token="<video>"),
+)
+# copied from minicpm_v template
+register_template(
+    name="minicpm_o",
+    format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]),
+    format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
+    stop_words=["<|im_end|>"],
+    default_system="You are Qwen, created by Alibaba Cloud. You are a helpful assistant.",
+    mm_plugin=get_mm_plugin(name="minicpm_v", image_token="<image>", video_token="<video>", audio_token="<audio>"),
+)
+# mistral tokenizer v3 tekken
+register_template(
+    name="ministral",
+    format_user=StringFormatter(slots=["[INST]{{content}}[/INST]"]),
+    format_system=StringFormatter(slots=["{{content}}\n\n"]),
+    format_function=FunctionFormatter(slots=["[TOOL_CALLS]{{content}}", {"eos_token"}], tool_format="mistral"),
+    format_observation=StringFormatter(slots=["""[TOOL_RESULTS]{"content": {{content}}}[/TOOL_RESULTS]"""]),
+    format_tools=ToolFormatter(tool_format="mistral"),
+    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
+    template_class=Llama2Template,
+)
+# mistral tokenizer v3
+register_template(
+    name="mistral",
+    format_user=StringFormatter(slots=["[INST] {{content}}[/INST]"]),
+    format_assistant=StringFormatter(slots=[" {{content}}", {"eos_token"}]),
+    format_system=StringFormatter(slots=["{{content}}\n\n"]),
+    format_function=FunctionFormatter(slots=["[TOOL_CALLS] {{content}}", {"eos_token"}], tool_format="mistral"),
+    format_observation=StringFormatter(slots=["""[TOOL_RESULTS] {"content": {{content}}}[/TOOL_RESULTS]"""]),
+    format_tools=ToolFormatter(tool_format="mistral"),
+    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
+    template_class=Llama2Template,
+)
+# mistral tokenizer v7 tekken (copied from ministral)
+register_template(
+    name="mistral_small",
+    format_user=StringFormatter(slots=["[INST]{{content}}[/INST]"]),
+    format_system=StringFormatter(slots=["[SYSTEM_PROMPT]{{content}}[/SYSTEM_PROMPT]"]),
+    format_function=FunctionFormatter(slots=["[TOOL_CALLS]{{content}}", {"eos_token"}], tool_format="mistral"),
+    format_observation=StringFormatter(slots=["""[TOOL_RESULTS]{"content": {{content}}}[/TOOL_RESULTS]"""]),
+    format_tools=ToolFormatter(tool_format="mistral"),
+    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
+    mm_plugin=get_mm_plugin(name="pixtral", image_token="[IMG]"),
+)
+register_template(
+    name="olmo",
+    format_user=StringFormatter(slots=["<|user|>\n{{content}}<|assistant|>\n"]),
+    format_prefix=EmptyFormatter(slots=[{"eos_token"}]),
+)
+register_template(
+    name="openchat",
+    format_user=StringFormatter(slots=["GPT4 Correct User: {{content}}", {"eos_token"}, "GPT4 Correct Assistant:"]),
+    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
+)
+register_template(
+    name="openchat-3.6",
+    format_user=StringFormatter(
+        slots=[
+            (
+                "<|start_header_id|>GPT4 Correct User<|end_header_id|>\n\n{{content}}<|eot_id|>"
+                "<|start_header_id|>GPT4 Correct Assistant<|end_header_id|>\n\n"
+            )
+        ]
+    ),
+    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
+    stop_words=["<|eot_id|>"],
+)
+# copied from chatml template
+register_template(
+    name="opencoder",
+    format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]),
+    format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
+    format_observation=StringFormatter(slots=["<|im_start|>tool\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
+    default_system="You are OpenCoder, created by OpenCoder Team.",
+    stop_words=["<|im_end|>"],
+)
+register_template(
+    name="orion",
+    format_user=StringFormatter(slots=["Human: {{content}}\n\nAssistant: ", {"eos_token"}]),
+    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
+)
+register_template(
+    name="paligemma",
+    format_user=StringFormatter(slots=["{{content}}\n"]),
+    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
+    mm_plugin=get_mm_plugin(name="paligemma", image_token="<image>"),
+    template_class=Llama2Template,
+)
+# copied from gemma template
+register_template(
+    name="paligemma_chat",
+    format_user=StringFormatter(slots=["<start_of_turn>user\n{{content}}<end_of_turn>\n<start_of_turn>model\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}<end_of_turn>\n"]),
+    format_observation=StringFormatter(
+        slots=["<start_of_turn>tool\n{{content}}<end_of_turn>\n<start_of_turn>model\n"]
+    ),
+    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
+    stop_words=["<end_of_turn>"],
+    replace_eos=True,
+    mm_plugin=get_mm_plugin(name="paligemma", image_token="<image>"),
+    template_class=Llama2Template,
+)
+register_template(
+    name="phi",
+    format_user=StringFormatter(slots=["<|user|>\n{{content}}<|end|>\n<|assistant|>\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}<|end|>\n"]),
+    format_system=StringFormatter(slots=["<|system|>\n{{content}}<|end|>\n"]),
+    stop_words=["<|end|>"],
+    replace_eos=True,
+)
+register_template(
+    name="phi_small",
+    format_user=StringFormatter(slots=["<|user|>\n{{content}}<|end|>\n<|assistant|>\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}<|end|>\n"]),
+    format_system=StringFormatter(slots=["<|system|>\n{{content}}<|end|>\n"]),
+    format_prefix=EmptyFormatter(slots=[{"<|endoftext|>"}]),
+    stop_words=["<|end|>"],
+    replace_eos=True,
+)
+register_template(
+    name="phi4",
+    format_user=StringFormatter(
+        slots=["<|im_start|>user<|im_sep|>{{content}}<|im_end|><|im_start|>assistant<|im_sep|>"]
+    ),
+    format_assistant=StringFormatter(slots=["{{content}}<|im_end|>"]),
+    format_system=StringFormatter(slots=["<|im_start|>system<|im_sep|>{{content}}<|im_end|>"]),
+    stop_words=["<|im_end|>"],
+    replace_eos=True,
+)
+# copied from ministral template
+register_template(
+    name="pixtral",
+    format_user=StringFormatter(slots=["[INST]{{content}}[/INST]"]),
+    format_system=StringFormatter(slots=["{{content}}\n\n"]),
+    format_function=FunctionFormatter(slots=["[TOOL_CALLS]{{content}}", {"eos_token"}], tool_format="mistral"),
+    format_observation=StringFormatter(slots=["""[TOOL_RESULTS]{"content": {{content}}}[/TOOL_RESULTS]"""]),
+    format_tools=ToolFormatter(tool_format="mistral"),
+    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
+    mm_plugin=get_mm_plugin(name="pixtral", image_token="[IMG]"),
+    template_class=Llama2Template,
+)
+# copied from chatml template
+register_template(
+    name="qwen",
+    format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]),
+    format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
+    format_function=FunctionFormatter(slots=["{{content}}<|im_end|>\n"], tool_format="qwen"),
+    format_observation=StringFormatter(
+        slots=["<|im_start|>user\n<tool_response>\n{{content}}\n</tool_response><|im_end|>\n<|im_start|>assistant\n"]
+    ),
+    format_tools=ToolFormatter(tool_format="qwen"),
+    default_system="You are Qwen, created by Alibaba Cloud. You are a helpful assistant.",
+    stop_words=["<|im_end|>"],
+    replace_eos=True,
+)
+# copied from qwen template
+register_template(
+    name="qwen3",
+    format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]),
+    format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
+    format_function=FunctionFormatter(slots=["{{content}}<|im_end|>\n"], tool_format="qwen"),
+    format_observation=StringFormatter(
+        slots=["<|im_start|>user\n<tool_response>\n{{content}}\n</tool_response><|im_end|>\n<|im_start|>assistant\n"]
+    ),
+    format_tools=ToolFormatter(tool_format="qwen"),
+    stop_words=["<|im_end|>"],
+    replace_eos=True,
+    template_class=ReasoningTemplate,
+)
+# copied from chatml template
+register_template(
+    name="qwen2_audio",
+    format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]),
+    format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
+    default_system="You are a helpful assistant.",
+    stop_words=["<|im_end|>"],
+    replace_eos=True,
+    mm_plugin=get_mm_plugin(name="qwen2_audio", audio_token="<|AUDIO|>"),
+)
+# copied from qwen template
+register_template(
+    name="qwen2_omni",
+    format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]),
+    format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
+    format_function=FunctionFormatter(slots=["{{content}}<|im_end|>\n"], tool_format="qwen"),
+    format_observation=StringFormatter(
+        slots=["<|im_start|>user\n<tool_response>\n{{content}}\n</tool_response><|im_end|>\n<|im_start|>assistant\n"]
+    ),
+    format_tools=ToolFormatter(tool_format="qwen"),
+    default_system="You are a helpful assistant.",
+    stop_words=["<|im_end|>"],
+    replace_eos=True,
+    mm_plugin=get_mm_plugin(
+        name="qwen2_omni", audio_token="<|AUDIO|>", image_token="<|IMAGE|>", video_token="<|VIDEO|>"
+    ),
+)
+# copied from qwen template
+register_template(
+    name="qwen2_vl",
+    format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]),
+    format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
+    format_function=FunctionFormatter(slots=["{{content}}<|im_end|>\n"], tool_format="qwen"),
+    format_observation=StringFormatter(
+        slots=["<|im_start|>user\n<tool_response>\n{{content}}\n</tool_response><|im_end|>\n<|im_start|>assistant\n"]
+    ),
+    format_tools=ToolFormatter(tool_format="qwen"),
+    default_system="You are a helpful assistant.",
+    stop_words=["<|im_end|>"],
+    replace_eos=True,
+    mm_plugin=get_mm_plugin(name="qwen2_vl", image_token="<|image_pad|>", video_token="<|video_pad|>"),
+)
+register_template(
+    name="sailor",
+    format_user=StringFormatter(slots=["<|im_start|>question\n{{content}}<|im_end|>\n<|im_start|>answer\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]),
+    format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
+    default_system=(
+        "You are an AI assistant named Sailor created by Sea AI Lab. "
+        "Your answer should be friendly, unbiased, faithful, informative and detailed."
+    ),
+    stop_words=["<|im_end|>"],
+)
+register_template(
+    name="seed_coder",
+    format_user=StringFormatter(
+        slots=[{"bos_token"}, "user\n{{content}}", {"eos_token"}, {"bos_token"}, "assistant\n"]
+    ),
+    format_system=StringFormatter(slots=[{"bos_token"}, "system\n{{content}}", {"eos_token"}]),
+    default_system=(
+        "You are an AI programming assistant, utilizing the Seed-Coder model, developed by ByteDance Seed, "
+        "and you only answer questions related to computer science. For politically sensitive questions, "
+        "security and privacy issues, and other non-computer science questions, you will refuse to answer.\n\n"
+    ),
+)
+# copied from llama3 template
+register_template(
+    name="skywork_o1",
+    format_user=StringFormatter(
+        slots=[
+            (
+                "<|start_header_id|>user<|end_header_id|>\n\n{{content}}<|eot_id|>"
+                "<|start_header_id|>assistant<|end_header_id|>\n\n"
+            )
+        ]
+    ),
+    format_assistant=StringFormatter(slots=["{{content}}<|eot_id|>"]),
+    format_system=StringFormatter(slots=["<|start_header_id|>system<|end_header_id|>\n\n{{content}}<|eot_id|>"]),
+    format_function=FunctionFormatter(slots=["{{content}}<|eot_id|>"], tool_format="llama3"),
+    format_observation=StringFormatter(
+        slots=[
+            (
+                "<|start_header_id|>ipython<|end_header_id|>\n\n{{content}}<|eot_id|>"
+                "<|start_header_id|>assistant<|end_header_id|>\n\n"
+            )
+        ]
+    ),
+    format_tools=ToolFormatter(tool_format="llama3"),
+    format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
+    default_system=(
+        "You are Skywork-o1, a thinking model developed by Skywork AI, specializing in solving complex problems "
+        "involving mathematics, coding, and logical reasoning through deep thought. When faced with a user's request, "
+        "you first engage in a lengthy and in-depth thinking process to explore possible solutions to the problem. "
+        "After completing your thoughts, you then provide a detailed explanation of the solution process "
+        "in your response."
+    ),
+    stop_words=["<|eot_id|>", "<|eom_id|>"],
+)
+register_template(
+    name="smollm",
+    format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]),
+    format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
+    stop_words=["<|im_end|>"],
+)
+register_template(
+    name="smollm2",
+    format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]),
+    format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
+    stop_words=["<|im_end|>"],
+    default_system="You are a helpful AI assistant named SmolLM, trained by Hugging Face.",
+)
+register_template(
+    name="solar",
+    format_user=StringFormatter(slots=["### User:\n{{content}}\n\n### Assistant:\n"]),
+    format_system=StringFormatter(slots=["### System:\n{{content}}\n\n"]),
+    efficient_eos=True,
+)
+register_template(
+    name="starchat",
+    format_user=StringFormatter(slots=["<|user|>\n{{content}}<|end|>\n<|assistant|>"]),
+    format_assistant=StringFormatter(slots=["{{content}}<|end|>\n"]),
+    format_system=StringFormatter(slots=["<|system|>\n{{content}}<|end|>\n"]),
+    stop_words=["<|end|>"],
+)
+register_template(
+    name="telechat",
+    format_user=StringFormatter(slots=["<_user>{{content}}<_bot>"]),
+    format_system=StringFormatter(slots=["<_system>{{content}}<_end>"]),
+)
+register_template(
+    name="telechat2",
+    format_user=StringFormatter(slots=["<_user>{{content}}<_bot>"]),
+    format_system=StringFormatter(slots=["<_system>{{content}}"]),
+    default_system=(
+        "你是中国电信星辰语义大模型，英文名是TeleChat，你是由中电信人工智能科技有限公司和中国电信人工智能研究院（TeleAI）研发的人工智能助手。"
+    ),
+)
+register_template(
+    name="vicuna",
+    format_user=StringFormatter(slots=["USER: {{content}} ASSISTANT:"]),
+    default_system=(
+        "A chat between a curious user and an artificial intelligence assistant. "
+        "The assistant gives helpful, detailed, and polite answers to the user's questions."
+    ),
+    replace_jinja_template=True,
+)
+register_template(
+    name="video_llava",
+    format_user=StringFormatter(slots=["USER: {{content}} ASSISTANT:"]),
+    default_system=(
+        "A chat between a curious user and an artificial intelligence assistant. "
+        "The assistant gives helpful, detailed, and polite answers to the user's questions."
+    ),
+    mm_plugin=get_mm_plugin(name="video_llava", image_token="<image>", video_token="<video>"),
+)
+register_template(
+    name="xuanyuan",
+    format_user=StringFormatter(slots=["Human: {{content}} Assistant:"]),
+    default_system=(
+        "以下是用户和人工智能助手之间的对话。用户以Human开头，人工智能助手以Assistant开头，"
+        "会对人类提出的问题给出有帮助、高质量、详细和礼貌的回答，并且总是拒绝参与与不道德、"
+        "不安全、有争议、政治敏感等相关的话题、问题和指示。\n"
+    ),
+)
+register_template(
+    name="xverse",
+    format_user=StringFormatter(slots=["Human: {{content}}\n\nAssistant: "]),
+)
+register_template(
+    name="yayi",
+    format_user=StringFormatter(slots=[{"token": "<|Human|>"}, ":\n{{content}}\n\n", {"token": "<|YaYi|>"}, ":"]),
+    format_assistant=StringFormatter(slots=["{{content}}\n\n"]),
+    format_system=StringFormatter(slots=[{"token": "<|System|>"}, ":\n{{content}}\n\n"]),
+    default_system=(
+        "You are a helpful, respectful and honest assistant named YaYi "
+        "developed by Beijing Wenge Technology Co.,Ltd. "
+        "Always answer as helpfully as possible, while being safe.  "
+        "Your answers should not include any harmful, unethical, "
+        "racist, sexist, toxic, dangerous, or illegal content. "
+        "Please ensure that your responses are socially unbiased and positive in nature.\n\n"
+        "If a question does not make any sense, or is not factually coherent, "
+        "explain why instead of answering something not correct. "
+        "If you don't know the answer to a question, please don't share false information."
+    ),
+    stop_words=["<|End|>"],
+)
+# copied from chatml template
+register_template(
+    name="yi",
+    format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]),
+    format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
+    stop_words=["<|im_end|>"],
+)
+register_template(
+    name="yi_vl",
+    format_user=StringFormatter(slots=["### Human: {{content}}\n### Assistant:"]),
+    format_assistant=StringFormatter(slots=["{{content}}\n"]),
+    default_system=(
+        "This is a chat between an inquisitive human and an AI assistant. "
+        "Assume the role of the AI assistant. Read all the images carefully, "
+        "and respond to the human's questions with informative, helpful, detailed and polite answers. "
+        "这是一个好奇的人类和一个人工智能助手之间的对话。假设你扮演这个AI助手的角色。"
+        "仔细阅读所有的图像，并对人类的问题做出信息丰富、有帮助、详细的和礼貌的回答。\n\n"
+    ),
+    stop_words=["###"],
+    efficient_eos=True,
+    mm_plugin=get_mm_plugin(name="llava", image_token="<image>"),
+)
+register_template(
+    name="yuan",
+    format_user=StringFormatter(slots=["{{content}}", {"token": "<sep>"}]),
+    format_assistant=StringFormatter(slots=["{{content}}<eod>\n"]),
+    stop_words=["<eod>"],
+)
+register_template(
+    name="zephyr",
+    format_user=StringFormatter(slots=["<|user|>\n{{content}}", {"eos_token"}, "<|assistant|>\n"]),
+    format_system=StringFormatter(slots=["<|system|>\n{{content}}", {"eos_token"}]),
+    default_system="You are Zephyr, a helpful assistant.",
+)
+register_template(
+    name="ziya",
+    format_user=StringFormatter(slots=["<human>:{{content}}\n<bot>:"]),
+    format_assistant=StringFormatter(slots=["{{content}}\n"]),
+)

data/dataset/12_10/overfit_data.json ADDED Viewed

	@@ -0,0 +1,92 @@

+[
+    {
+        "conversations": [
+          {
+            "from": "human",
+            "value": "I have an XML file containing a list of records, each with a title and description. Here is the full content of the XML file:\n<records>\n <record>\n <title>Record 1</title>\n <description>Description for record 1</description>\n </record>\n <record>\n <title>Record 2</title>\n <description>Description for record 2</description>\n </record>\n <record>\n <title>Record 3</title>\n <description>Description for record 3</description>\n </record>\n</records>\n\nI need:\n1. To extract the titles and descriptions from each record in the XML data.\n2. To sort the extracted titles in descending order.\nIn total, I want to receive a final report that includes the sorted list of titles."
+          },
+          {
+            "from": "function_call",
+            "value": "{\"name\": \"parse_xml_data\", \"arguments\": {\"xml_data\": \"<records><record><title>Record 1</title><description>Description for record 1</description></record><record><title>Record 2</title><description>Description for record 2</description></record><record><title>Record 3</title><description>Description for record 3</description></record></records>\", \"index\": 1}}"
+          },
+          {
+            "from": "observation",
+            "value": "[{\"type\": \"raw\", \"json\": {\"data\": [\"#1. Record 1: Description for record 1\", \"#2. Record 2: Description for record 2\", \"#3. Record 3: Description for record 3\"]}}]"
+          },
+          {
+            "from": "function_call",
+            "value": "{\"name\": \"dict_values_sorted\", \"arguments\": {\"dictionary\": {\"#1. Record 1\": 1, \"#2. Record 2\": 2, \"#3. Record 3\": 3}}}"
+          },
+          {
+            "from": "observation",
+            "value": "[{\"type\": \"raw\", \"json\": {\"data\": [3, 2, 1]}}]"
+          },
+          {
+            "from": "gpt",
+            "value": "<answer>The sorted list of titles is: Record 3, Record 2, Record 1.</answer>"
+          }
+        ],
+        "system": "# Role\n\nYou are an AI assistant capable of calling various functions to help users solve their problems.\n\n# Tool Selection\n\n**Important**: The available function signatures are provided in the <tools></tools> section. You must carefully select one or more appropriate tools from this section that can solve the user's request.\n\n# Output Rules\n\nYou must strictly follow the rules below when responding:\n\n## 1. Function Call Format\nWhen you need to call a function, you must output only one function call per round in the following format:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call>\n\n**Parameter Parsing**: The arguments must be parsed based on the user's query. **Do not fabricate parameters that are not mentioned or cannot be reasonably inferred from the query.** Only use parameters that can be reasonably extracted or inferred from the user's request.\n\n**Basis for Generating Function Call Content**:\n- **First function call**: The user's query and available tools information.\n- **n-th function call (n > 1)**: The user's query, available tools information, and the complete conversation history in <chat_history></chat_history> from the previous n-1 rounds (including all prior function calls, observations, and answers). In some scenarios, observations may be empty; this is acceptable for generating function calls.\n\n**Example**:\n<tool_call>\n{\\\"name\\\": \\\"cancel_booking\\\", \\\"arguments\\\": {\\\"access_token\\\": \\\"abc123xyz\\\", \\\"booking_id\\\": \\\"flight_001\\\"}}\n</tool_call>\n\n## 2. Answer Format\nWhen you judge from the chat history that all necessary tools have been called, you must immediately stop calling tools and provide the final answer in the following format:\n<answer>\nYour final answer here\n</answer>\n\n**Answer Generation Requirements**:\n- **Critical**: If all observations in chat-history are empty (meaning tools were called but returned no data), you MUST reply exactly: \"Sorry, I did not obtain sufficient information to complete your request.\" Do NOT fabricate, invent, or generate any content based on assumptions. Do NOT create imaginary results or responses. Only output this exact message.\n- **Important**: The provided tools may include tools that are irrelevant or unsuitable for the current query. If you determine there are no suitable tools to answer the user's request, reply: \"Sorry, there are no suitable tools to answer your request.\"\n- **Important**: If you have called some tools and obtained observations, but the available tools are insufficient to fully satisfy the user's request (e.g., some required tools are missing from the available tool list), you MUST reply exactly: \"Sorry, there are not enough tools to fully satisfy your request.\" Do NOT fabricate or generate partial answers based on incomplete information.\n- Carefully analyze the conversation history to determine the current turn. The answer must be based on the user's query and all available observation results in the conversation.\n\n## 3. Intelligent Process Stage Judgment\n- single-hop: Typically requires only one tool call to complete the task.\n- multi-hop: Requires multiple tool calls to complete the task.\n- single-turn: Involves only one user query.\n- multi-turn: Involves multiple user queries; later queries may refer to or build upon earlier exchanges.\n- When you see that the assistant has issued a tool call and received an observation, that tool call is considered complete.\n\n**Special Note**: By examining the conversation history, you can clearly see:\n- Previous interactions between the user and the assistant\n- Tool calls that have already been executed\n- Results returned by tools\n- The stage the current conversation has reached\n\n## 4. Strictly Prohibited Behaviors\n- Do not output a function call and an answer in the same round.\n- Do not repeatedly call the same tool with identical parameters.\n- Do not ignore existing tool calls and their returned information in the conversation history.\n- Do not fabricate parameters that are not present in or reasonably implied by the user's query.\n\n## 5. Error Handling and Data Quality Assessment\n- If the tool returns an empty observation, it may indicate there is no data under the current query conditions or that observation data is unavailable in the current context.\n- If the tool returns error messages (e.g., \"resource not found\", \"invalid parameters\"), do not repeat the same tool call.\n- In such cases, provide an explanatory answer describing the specific error cause or data condition.\n- Absolutely do not repeatedly call the same tool because it returned an error or empty data.\n\n\n",
+        "tools": "[{\"name\": \"parse_xml_data\", \"description\": \"Parses the XML data and extracts the title and description from each record element.\", \"inputSchema\": {\"type\": \"object\", \"properties\": {\"xml_data\": {\"type\": \"string\", \"description\": \"A string containing the XML data.\"}, \"index\": {\"type\": \"integer\", \"description\": \"An integer to start numbering the records from.\"}}, \"required\": [\"xml_data\", \"index\"]}, \"category\": \"analysis\", \"domain\": \"technology\"}, {\"name\": \"dict_values_sorted\", \"description\": \"Returns a list of the values from the dictionary, sorted in descending order.\", \"inputSchema\": {\"type\": \"object\", \"properties\": {\"dictionary\": {\"type\": \"object\", \"additionalProperties\": {\"type\": \"integer\"}, \"description\": \"The dictionary from which to sort the values.\"}}, \"required\": [\"dictionary\"]}, \"category\": \"analysis\", \"domain\": \"technology\"}]"
+      },
+      {
+        "conversations": [
+          {
+            "from": "human",
+            "value": "Generate a static map image of size 1600x1600 centered at the coordinates (55.7558, 37.6176) with a zoom level of 11, using the 'toursprung-terrain' maptype, and include a delta_zoom of 2. Output the URL."
+          },
+          {
+            "from": "function_call",
+            "value": "{\"name\":\"staticmaps\",\"arguments\":{\"size\":\"1600x1600\",\"center\":\"55.7558,37.6176\",\"zoom\":11,\"maptype\":\"toursprung-terrain\",\"delta_zoom\":2}}"
+          },
+          {
+            "from": "observation",
+            "value": "[{\"type\": \"tool_result\", \"name\": \"staticmaps\", \"content\": \"https://maps.example.com/map.png?center=55.7558%2C37.6176&size=1600x1600&zoom=11&maptype=toursprung-terrain&delta_zoom=2&format=png\"}]"
+          },
+          {
+            "from": "gpt",
+            "value": "<answer></answer>"
+          }
+        ],
+        "system": "# Role\n\nYou are an AI assistant capable of calling various functions to help users solve their problems.\n\n# Tool Selection\n\n**Important**: The available function signatures are provided in the <tools></tools> section. You must carefully select one or more appropriate tools from this section that can solve the user's request.\n\n# Output Rules\n\nYou must strictly follow the rules below when responding:\n\n## 1. Function Call Format\nWhen you need to call a function, you must output only one function call per round in the following format:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call>\n\n**Parameter Parsing**: The arguments must be parsed based on the user's query. **Do not fabricate parameters that are not mentioned or cannot be reasonably inferred from the query.** Only use parameters that can be reasonably extracted or inferred from the user's request.\n\n**Basis for Generating Function Call Content**:\n- **First function call**: The user's query and available tools information.\n- **n-th function call (n > 1)**: The user's query, available tools information, and the complete conversation history in <chat_history></chat_history> from the previous n-1 rounds (including all prior function calls, observations, and answers). In some scenarios, observations may be empty; this is acceptable for generating function calls.\n\n**Example**:\n<tool_call>\n{\\\"name\\\": \\\"cancel_booking\\\", \\\"arguments\\\": {\\\"access_token\\\": \\\"abc123xyz\\\", \\\"booking_id\\\": \\\"flight_001\\\"}}\n</tool_call>\n\n## 2. Answer Format\nWhen you judge from the chat history that all necessary tools have been called, you must immediately stop calling tools and provide the final answer in the following format:\n<answer>\nYour final answer here\n</answer>\n\n**Answer Generation Requirements**:\n- **Critical**: If all observations in chat-history are empty (meaning tools were called but returned no data), you MUST reply exactly: \"Sorry, I did not obtain sufficient information to complete your request.\" Do NOT fabricate, invent, or generate any content based on assumptions. Do NOT create imaginary results or responses. Only output this exact message.\n- **Important**: The provided tools may include tools that are irrelevant or unsuitable for the current query. If you determine there are no suitable tools to answer the user's request, reply: \"Sorry, there are no suitable tools to answer your request.\"\n- **Important**: If you have called some tools and obtained observations, but the available tools are insufficient to fully satisfy the user's request (e.g., some required tools are missing from the available tool list), you MUST reply exactly: \"Sorry, there are not enough tools to fully satisfy your request.\" Do NOT fabricate or generate partial answers based on incomplete information.\n- Carefully analyze the conversation history to determine the current turn. The answer must be based on the user's query and all available observation results in the conversation.\n\n## 3. Intelligent Process Stage Judgment\n- single-hop: Typically requires only one tool call to complete the task.\n- multi-hop: Requires multiple tool calls to complete the task.\n- single-turn: Involves only one user query.\n- multi-turn: Involves multiple user queries; later queries may refer to or build upon earlier exchanges.\n- When you see that the assistant has issued a tool call and received an observation, that tool call is considered complete.\n\n**Special Note**: By examining the conversation history, you can clearly see:\n- Previous interactions between the user and the assistant\n- Tool calls that have already been executed\n- Results returned by tools\n- The stage the current conversation has reached\n\n## 4. Strictly Prohibited Behaviors\n- Do not output a function call and an answer in the same round.\n- Do not repeatedly call the same tool with identical parameters.\n- Do not ignore existing tool calls and their returned information in the conversation history.\n- Do not fabricate parameters that are not present in or reasonably implied by the user's query.\n\n## 5. Error Handling and Data Quality Assessment\n- If the tool returns an empty observation, it may indicate there is no data under the current query conditions or that observation data is unavailable in the current context.\n- If the tool returns error messages (e.g., \"resource not found\", \"invalid parameters\"), do not repeat the same tool call.\n- In such cases, provide an explanatory answer describing the specific error cause or data condition.\n- Absolutely do not repeatedly call the same tool because it returned an error or empty data.\n\n\n",
+        "tools": "[{\"name\": \"staticmaps\", \"description\": \"Generates a static map image URL based on the given parameters.\", \"required\": [\"size\", \"center\", \"zoom\"], \"inputSchema\": {\"type\": \"object\", \"properties\": {\"type\": {\"type\": \"str\", \"description\": \"dict\"}, \"properties\": {\"size\": {\"description\": \"Size of the map. Format is usually `{width}x{height}`.\", \"type\": \"str\", \"default\": \"640x480\"}, \"center\": {\"description\": \"Center coordinates of the map. Format is `{latitude},{longitude}`.\", \"type\": \"str\", \"default\": \"48.20835,16.3725\"}, \"zoom\": {\"description\": \"Zoom level of the map.\", \"type\": \"int\", \"default\": \"11\"}, \"bounds\": {\"description\": \"Bounding box to define the map area instead of `center` and `zoom`. Format is `{north},{east},{south},{west}`.\", \"type\": \"str, optional\", \"default\": \"\"}, \"geojson\": {\"description\": \"GeoJSON geometry to include on the map. Format is `{attribute}:{value}[|{attribute}:{value}...]`.\", \"type\": \"str, optional\", \"default\": \"\"}, \"maptype\": {\"description\": \"Type of the map, defining its appearance. Default is 'toursprung-terrain'.\", \"type\": \"str, optional\", \"default\": \"toursprung-terrain\"}, \"path\": {\"description\": \"Path to include on the map. Format is `{attribute}:{value}[|{attribute}:{value}...]`.\", \"type\": \"str, optional\", \"default\": \"\"}, \"delta_zoom\": {\"description\": \"Zoom level adjustment if zoom is calculated automatically.\", \"type\": \"int, optional\", \"default\": \"\"}, \"kml\": {\"description\": \"KML data to include on the map. Format is `{kml}?{attribute}={value}[&{attribute}={value}...]`.\", \"type\": \"str, optional\", \"default\": \"\"}, \"format\": {\"description\": \"File format of the map image. Default is 'png'.\", \"type\": \"str, optional\", \"default\": \"png\"}, \"marker\": {\"description\": \"Marker to include on the map. Format is `{attribute}:{value}[|{attribute}:{value}...]`.\", \"type\": \"str, optional\", \"default\": \"\"}, \"factor\": {\"description\": \"Image scaling factor.\", \"type\": \"str, optional\", \"default\": \"\"}}}, \"required\": []}, \"category\": \"generate\", \"domain\": \"technology\"}]"
+      },
+      {
+        "conversations": [
+          {
+            "from": "human",
+            "value": "Hi, can you check if my website is working fine? The URL is www.mywebsite.com."
+          },
+          {
+            "from": "function_call",
+            "value": "{\"name\": \"check_website_status\", \"arguments\": {\"url\": \"www.mywebsite.com\"}}"
+          },
+          {
+            "from": "observation",
+            "value": "[{\"type\": \"raw\", \"json\": {\"status\": \"OK\", \"message\": \"The website www.mywebsite.com is up and running.\"}}]"
+          },
+          {
+            "from": "gpt",
+            "value": "<answer>The website www.mywebsite.com is up and running. Everything seems fine.</answer>"
+          },
+          {
+            "from": "human",
+            "value": "That's great! Can you also check www.anotherwebsite.com for me?"
+          },
+          {
+            "from": "function_call",
+            "value": "{\"name\": \"check_website_status\", \"arguments\": {\"url\": \"www.anotherwebsite.com\"}}"
+          },
+          {
+            "from": "observation",
+            "value": "[{\"type\": \"raw\", \"json\": {\"status\": \"ERROR\", \"message\": \"The website www.anotherwebsite.com is currently down.\"}}]"
+          },
+          {
+            "from": "gpt",
+            "value": "<answer>I'm sorry, but the website www.anotherwebsite.com is currently down.</answer>"
+          }
+        ],
+        "system": "# Role\n\nYou are an AI assistant capable of calling various functions to help users solve their problems.\n\n# Tool Selection\n\n**Important**: The available function signatures are provided in the <tools></tools> section. You must carefully select one or more appropriate tools from this section that can solve the user's request.\n\n# Output Rules\n\nYou must strictly follow the rules below when responding:\n\n## 1. Function Call Format\nWhen you need to call a function, you must output only one function call per round in the following format:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call>\n\n**Parameter Parsing**: The arguments must be parsed based on the user's query. **Do not fabricate parameters that are not mentioned or cannot be reasonably inferred from the query.** Only use parameters that can be reasonably extracted or inferred from the user's request.\n\n**Basis for Generating Function Call Content**:\n- **First function call**: The user's query and available tools information.\n- **n-th function call (n > 1)**: The user's query, available tools information, and the complete conversation history in <chat_history></chat_history> from the previous n-1 rounds (including all prior function calls, observations, and answers). In some scenarios, observations may be empty; this is acceptable for generating function calls.\n\n**Example**:\n<tool_call>\n{\\\"name\\\": \\\"cancel_booking\\\", \\\"arguments\\\": {\\\"access_token\\\": \\\"abc123xyz\\\", \\\"booking_id\\\": \\\"flight_001\\\"}}\n</tool_call>\n\n## 2. Answer Format\nWhen you judge from the chat history that all necessary tools have been called, you must immediately stop calling tools and provide the final answer in the following format:\n<answer>\nYour final answer here\n</answer>\n\n**Answer Generation Requirements**:\n- **Critical**: If all observations in chat-history are empty (meaning tools were called but returned no data), you MUST reply exactly: \"Sorry, I did not obtain sufficient information to complete your request.\" Do NOT fabricate, invent, or generate any content based on assumptions. Do NOT create imaginary results or responses. Only output this exact message.\n- **Important**: The provided tools may include tools that are irrelevant or unsuitable for the current query. If you determine there are no suitable tools to answer the user's request, reply: \"Sorry, there are no suitable tools to answer your request.\"\n- **Important**: If you have called some tools and obtained observations, but the available tools are insufficient to fully satisfy the user's request (e.g., some required tools are missing from the available tool list), you MUST reply exactly: \"Sorry, there are not enough tools to fully satisfy your request.\" Do NOT fabricate or generate partial answers based on incomplete information.\n- Carefully analyze the conversation history to determine the current turn. The answer must be based on the user's query and all available observation results in the conversation.\n\n## 3. Intelligent Process Stage Judgment\n- single-hop: Typically requires only one tool call to complete the task.\n- multi-hop: Requires multiple tool calls to complete the task.\n- single-turn: Involves only one user query.\n- multi-turn: Involves multiple user queries; later queries may refer to or build upon earlier exchanges.\n- When you see that the assistant has issued a tool call and received an observation, that tool call is considered complete.\n\n**Special Note**: By examining the conversation history, you can clearly see:\n- Previous interactions between the user and the assistant\n- Tool calls that have already been executed\n- Results returned by tools\n- The stage the current conversation has reached\n\n## 4. Strictly Prohibited Behaviors\n- Do not output a function call and an answer in the same round.\n- Do not repeatedly call the same tool with identical parameters.\n- Do not ignore existing tool calls and their returned information in the conversation history.\n- Do not fabricate parameters that are not present in or reasonably implied by the user's query.\n\n## 5. Error Handling and Data Quality Assessment\n- If the tool returns an empty observation, it may indicate there is no data under the current query conditions or that observation data is unavailable in the current context.\n- If the tool returns error messages (e.g., \"resource not found\", \"invalid parameters\"), do not repeat the same tool call.\n- In such cases, provide an explanatory answer describing the specific error cause or data condition.\n- Absolutely do not repeatedly call the same tool because it returned an error or empty data.\n\n\n",
+        "tools": "[{\"name\": \"check_website_status\", \"description\": \"Check the status of a website\", \"inputSchema\": {\"type\": \"object\", \"properties\": {\"url\": {\"type\": \"string\", \"description\": \"The URL of the website\"}}, \"required\": [\"url\"]}, \"category\": \"system\", \"domain\": \"technology\"}]"
+      }
+]

data/dataset/12_10/testmix_dataset.json ADDED Viewed

The diff for this file is too large to render. See raw diff

data/dataset/12_10/格式问题分析与修正建议.md ADDED Viewed

	@@ -0,0 +1,214 @@

+# 格式问题分析与修正建议
+## 问题分析
+### 1. 关于"User query:"前缀的问题
+**当前实现：**
+- 历史信息中：`User query: {原始query内容}`
+- 当前query：`Current user query: {当前query内容}`
+**问题：**
+- 前缀不一致（"User query:" vs "Current user query:"）
+- 前缀放在`<|im_start|>user`标签内，可能不符合预期
+**建议：**
+- 方案1：统一前缀，都使用`User query:`，去掉"Current"
+- 方案2：去掉所有前缀，直接使用原始内容
+- 方案3：历史信息去掉前缀，当前query保留前缀（用于区分）
+### 2. 关于`<chat_history>`位置的问题
+**当前实现：**
+- `<chat_history>`放在user消息的elements中（在`<|im_start|>user`标签外）
+**问题：**
+- 用户认为应该放在system角色中
+**分析：**
+- 从origin.py看，原始实现中没有`<chat_history>`标签
+- 当前实现中，历史信息是作为普通文本添加到elements中，不通过format_user格式化
+- 如果放在system中，需要修改system消息的构建逻辑
+**建议：**
+- 方案1：保持当前实现（在user消息中，但在`<|im_start|>user`标签外）
+- 方案2：将`<chat_history>`放在system消息中（需要修改system消息的构建逻辑）
+### 3. 关于原始格式（origin.py）的问题
+**origin.py的实现：**
+- 有5个role：`USER`, `ASSISTANT`, `FUNCTION`, `OBSERVATION`, `SYSTEM`
+- observation通过`format_observation`格式化
+- 在qwen模板中，`format_observation`的格式是：
+  ```
+  <|im_start|>user\n<tool_response>\n{{content}}\n</tool_response><|im_end|>\n<|im_start|>assistant\n
+  ```
+- 也就是说，observation是作为**user role**，但内容是`<tool_response>...</tool_response>`，然后后面跟着assistant role
+- origin.py中，observation的内容会被增强，添加"用户查询: xxx"等前缀
+**关键发现：**
+- **origin.py中，observation确实是作为user role处理的**，不是独立的observation role
+- 但当前template.py中，observation是作为独立的OBSERVATION role处理的
+### 4. 关于当前query前缀的问题
+**当前实现：**
+```python
+current_user_query = f"Current user query: {original_contents[i]}"
+elements += self.format_user.apply(content=current_user_query, idx=str(i // 2))
+```
+**问题：**
+- "Current user query:"前缀是否必要？
+- 如果历史信息中有"User query:"前缀，当前query是否也应该有前缀？
+**建议：**
+- 如果历史信息有前缀，当前query也应该有前缀（保持一致性）
+- 或者，都去掉前缀，直接使用原始内容
+---
+## 修正建议
+### 建议1：统一前缀格式
+**修改方案：**
+- 历史信息：`User query: {原始query内容}`
+- 当前query：`User query: {当前query内容}`（去掉"Current"）
+**理由：**
+- 保持一致性
+- 简化格式
+### 建议2：将`<chat_history>`放在system中
+**修改方案：**
+- 将历史信息包装在`<chat_history>`标签内
+- 将`<chat_history>`添加到system消息的content中
+- 只在需要历史信息时（非第一次调用）才添加
+**理由：**
+- 历史信息属于上下文，放在system中更合理
+- 与原始实现更接近（origin.py中没有`<chat_history>`，但历史信息是通过其他方式传递的）
+### 建议3：去掉所有前缀
+**修改方案：**
+- 历史信息：直接使用原始内容，不加前缀
+- 当前query：直接使用原始内容，不加前缀
+**理由：**
+- 简化格式
+- 减少token消耗
+- 模型可以通过`<chat_history>`标签识别历史信息
+### 建议4：保持当前实现，但统一前缀
+**修改方案：**
+- 历史信息：`User query: {原始query内容}`
+- 当前query：`User query: {当前query内容}`（去掉"Current"）
+- `<chat_history>`位置：保持当前实现（在user消息中，但在`<|im_start|>user`标签外）
+**理由：**
+- 最小改动
+- 保持格式一致性
+---
+## 推荐方案
+**推荐：方案4（保持当前实现，但统一前缀）**
+**修改内容：**
+1. 将"Current user query:"改为"User query:"
+2. 保持`<chat_history>`在user消息中的位置
+3. 保持其他格式不变
+**理由：**
+- 最小改动，风险低
+- 统一前缀，格式一致
+- 保持与推理代码的一致性
+---
+## 对比：origin.py vs 当前template.py
+### origin.py（原始实现）
+**格式：**
+```
+<|im_start|>system
+{system + tools}
+<|im_end|>
+<|im_start|>user
+{query内容}
+<|im_end|>
+<|im_start|>assistant
+{function_call}
+<|im_end|>
+<|im_start|>user
+<tool_response>
+{增强后的observation内容（包含"用户查询: xxx"等前缀）}
+</tool_response>
+<|im_end|>
+<|im_start|>assistant
+{answer}
+<|im_end|>
+```
+**特点：**
+- observation作为user role
+- observation内容会被增强（添加"用户查询: xxx"等前缀）
+- 没有`<chat_history>`标签
+- 历史信息通过observation的增强内容传递
+### 当前template.py（修改后）
+**格式：**
+```
+<|im_start|>system
+{system + tools}
+<|im_end|>
+<chat_history>
+User query: {历史query}
+Tool call: {历史function_call}
+Tool response: {历史observation}
+Assistant reply: {历史answer}
+</chat_history>
+<|im_start|>user
+Current user query: {当前query}
+<|im_end|>
+<|im_start|>assistant
+{function_call}
+<|im_end|>
+```
+**特点：**
+- 使用`<chat_history>`标签包装历史信息
+- 历史信息在user消息中（在`<|im_start|>user`标签外）
+- 当前query有"Current user query:"前缀
+- observation作为独立的OBSERVATION role
+---
+## 需要确认的问题
+1. **前缀问题**：是否统一为"User query:"，还是去掉所有前缀？
+2. **`<chat_history>`位置**：是否应该放在system中，还是保持当前实现？
+3. **当前query前缀**：是否需要前缀，还是直接使用原始内容？
+4. **格式一致性**：训练和推理是否应该完全一致？
+---
+## 下一步行动
+1. 根据用户反馈，确定最终的格式方案
+2. 修改template.py，统一前缀格式
+3. 如果需要，调整`<chat_history>`的位置
+4. 同步修改推理代码，保持一致性
+5. 测试修改后的效果

data/dataset/12_10/训练与推理输入对比分析.md ADDED Viewed

	@@ -0,0 +1,415 @@

+# 训练代码与推理代码输入对比分析
+## 样本信息
+基于 `format_mutiturn copy.json` 中的第二个样本（32-77行）进行分析。
+### 对话结构
+```
+1. human (query 1): "In the past month, have you received a lot of feedback about poor service attitude? I want to see which scenic spots the problem is specifically."
+2. function_call 1: analyze_customer_feedback(...)
+3. observation 1: [分析结果...]
+4. function_call 2: analyze_customer_feedback(...) [重复]
+5. observation 2: [分析结果...] [重复]
+6. gpt (answer 1): "In the past month, a total of 130 negative feedbacks..."
+7. human (query 2): "In the past month, have you received a lot of feedback about poor service attitude? I want to see which scenic spots the problem is specifically." [重复query 1]
+8. function_call 3: analyze_customer_feedback(...) [重复]
+9. observation 3: [分析结果...] [重复]
+10. gpt (answer 2): "In the past month, a total of 130 negative feedbacks..." [重复answer 1]
+```
+这是一个**multi-turn**对话，包含2个turn，每个turn都有function_call和answer。
+---
+## 训练时的LLM调用（5次）
+**注意**：这个样本中第一个turn有2个function_call（重复的），所以训练时会调用5次LLM。
+### 第1次调用：生成第1个Function Call
+**处理的消息序列：**
+- messages[0]: Role.USER (query 1)
+- messages[1]: Role.FUNCTION (function_call 1) ← **要生成的目标**
+**输入内容：**
+```
+<|im_start|>system
+# Role
+...
+<tools>
+[工具定义...]
+</tools><|im_end|>
+<|im_start|>user
+In the past month, have you received a lot of feedback about poor service attitude? I want to see which scenic spots the problem is specifically.<|im_end|>
+<|im_start|>assistant
+[生成function_call 1]
+```
+**关键点：**
+- 第一次调用，没有历史信息
+- system包含tools
+- 当前query通过format_user格式化，**没有前缀**（直接使用原始内容）
+---
+### 第2次调用：生成第2个Function Call（第一个turn的第二个function_call）
+**处理的消息序列：**
+- messages[0]: Role.USER (query 1)
+- messages[1]: Role.FUNCTION (function_call 1)
+- messages[2]: Role.OBSERVATION (observation 1)
+- messages[3]: Role.FUNCTION (function_call 2) ← **要生成的目标**
+**输入内容：**
+```
+<|im_start|>system
+# Role
+...
+<chat_history>
+User query: In the past month, have you received a lot of feedback about poor service attitude? I want to see which scenic spots the problem is specifically.
+Tool call: {"name": "analyze_customer_feedback", "arguments": {...}}
+</chat_history>
+<tools>
+[工具定义...]
+</tools><|im_end|>
+<|im_start|>tool_response
+[observation 1的内容]
+<|im_end|>
+<|im_start|>assistant
+[生成function_call 2]
+```
+**关键点：**
+- 历史信息在system消息中（在<chat_history>标签内）
+- 历史信息包含：User query + Tool call + Tool response（不包含Assistant reply，因为下一个是FUNCTION）
+- system包含tools（在<chat_history>之后）
+- 当前observation在<tool_response>标签内
+- **注意**：处理OBSERVATION消息时，不添加user消息，直接生成FUNCTION
+---
+### 第3次调用：生成第1个Answer（第一个turn）
+**处理的消息序列：**
+- messages[0]: Role.USER (query 1)
+- messages[1]: Role.FUNCTION (function_call 1)
+- messages[2]: Role.OBSERVATION (observation 1)
+- messages[3]: Role.FUNCTION (function_call 2)
+- messages[4]: Role.OBSERVATION (observation 2)
+- messages[5]: Role.ASSISTANT (answer 1) ← **要生成的目标**
+**输入内容：**
+```
+<|im_start|>system
+# Role
+...
+<chat_history>
+User query: In the past month, have you received a lot of feedback about poor service attitude? I want to see which scenic spots the problem is specifically.
+Tool call: {"name": "analyze_customer_feedback", "arguments": {...}}
+Tool response: [observation 1的内容]
+Tool call: {"name": "analyze_customer_feedback", "arguments": {...}}
+Tool response: [observation 2的内容]
+</chat_history><|im_end|>
+<|im_start|>tool_response
+[observation 2的内容]
+<|im_end|>
+<|im_start|>assistant
+[生成answer 1]
+```
+**关键点：**
+- 历史信息在system消息中（在<chat_history>标签内）
+- 历史信息包含：User query + Tool call + Tool response（answer和tool call都要放到对话历史中）
+- system不包含tools
+- 当前observation在<tool_response>标签内
+- **注意**：处理OBSERVATION消息时，不添加user消息，直接生成ASSISTANT
+---
+### 第4次调用：生成第3个Function Call（Turn 2）
+**处理的消息序列：**
+- messages[0]: Role.USER (query 1)
+- messages[1]: Role.FUNCTION (function_call 1)
+- messages[2]: Role.OBSERVATION (observation 1)
+- messages[3]: Role.FUNCTION (function_call 2)
+- messages[4]: Role.OBSERVATION (observation 2)
+- messages[5]: Role.ASSISTANT (answer 1)
+- messages[6]: Role.USER (query 2)
+- messages[7]: Role.FUNCTION (function_call 3) ← **要生成的目标**
+**输入内容：**
+```
+<|im_start|>system
+# Role
+...
+<chat_history>
+User query: In the past month, have you received a lot of feedback about poor service attitude? I want to see which scenic spots the problem is specifically.
+Tool call: {"name": "analyze_customer_feedback", "arguments": {...}}
+Tool call: {"name": "analyze_customer_feedback", "arguments": {...}}
+Assistant reply: In the past month, a total of 130 negative feedbacks...
+</chat_history>
+<tools>
+[工具定义...]
+</tools><|im_end|>
+<|im_start|>assistant
+[生成function_call 3]
+```
+**关键点：**
+- 历史信息在system消息中（在<chat_history>标签内）
+- 历史信息包含：User query + Tool call + Assistant reply（生成tool call时，不把observation放在chat_history中）
+- system包含tools（在<chat_history>之后）
+- observation消息包含第二个observation（不在chat_history中）
+- user消息为空（因为chat_history中已经包含了当前query）
+---
+### 第5次调用：生成第2个Answer（Turn 2）
+**处理的消息序列：**
+- messages[0]: Role.USER (query 1)
+- messages[1]: Role.FUNCTION (function_call 1)
+- messages[2]: Role.OBSERVATION (observation 1)
+- messages[3]: Role.FUNCTION (function_call 2)
+- messages[4]: Role.OBSERVATION (observation 2)
+- messages[5]: Role.ASSISTANT (answer 1)
+- messages[6]: Role.USER (query 2)
+- messages[7]: Role.FUNCTION (function_call 3)
+- messages[8]: Role.OBSERVATION (observation 3)
+- messages[9]: Role.ASSISTANT (answer 2) ← **要生成的目标**
+**输入内容：**
+```
+<|im_start|>system
+# Role
+...
+<chat_history>
+User query: In the past month, have you received a lot of feedback about poor service attitude? I want to see which scenic spots the problem is specifically.
+Tool call: {"name": "analyze_customer_feedback", "arguments": {...}}
+Tool response: [observation 1的内容]
+Tool call: {"name": "analyze_customer_feedback", "arguments": {...}}
+Tool response: [observation 2的内容]
+Assistant reply: In the past month, a total of 130 negative feedbacks...
+User query: In the past month, have you received a lot of feedback about poor service attitude? I want to see which scenic spots the problem is specifically.
+Tool call: {"name": "analyze_customer_feedback", "arguments": {...}}
+Tool response: [observation 3的内容]
+</chat_history><|im_end|>
+<|im_start|>tool_response
+[observation 3的内容]
+<|im_end|>
+<|im_start|>assistant
+[生成answer 2]
+```
+**关键点：**
+- 历史信息在system消息中（在<chat_history>标签内）
+- 历史信息包含：所有User query + Tool call + Tool response + Assistant reply（answer和tool call都要放到对话历史中）
+- system不包含tools
+- 当前observation在<tool_response>标签内
+- **注意**：处理OBSERVATION消息时，不添加user消息，直接生成ASSISTANT
+---
+## 推理时的LLM调用（5次）
+**注意**：这个样本中第一个turn有2个function_call（重复的），所以推理时会调用5次LLM。
+### 第1次调用：生成第1个Function Call
+**调用函数：** `build_messages_for_function_call(conversations, call_idx=0, ...)`
+**输入messages：**
+```python
+[
+    {
+        "role": "system",
+        "content": "# Role\n...\n\n<tools>\n[工具定义...]\n</tools>"
+    },
+    {
+        "role": "user",
+        "content": "In the past month, have you received a lot of feedback about poor service attitude? I want to see which scenic spots the problem is specifically."
+    }
+]
+```
+**关键点：**
+- 第一次调用，没有历史信息
+- system包含tools
+- user消息包含当前query（**没有前缀**）
+---
+### 第2次调用：生成第2个Function Call（第一个turn的第二个function_call）
+**调用函数：** `build_messages_for_function_call(conversations, call_idx=1, generated_calls=[function_call 1], generated_answers=[], ...)`
+**输入messages：**
+```python
+[
+    {
+        "role": "system",
+        "content": "# Role\n...\n\n<chat_history>\nUser query: In the past month, have you received a lot of feedback about poor service attitude? I want to see which scenic spots the problem is specifically.\nTool call: [generated_calls[0]，即function_call 1]\n</chat_history>\n\n<tools>\n[工具定义...]\n</tools>"
+    },
+    {
+        "role": "observation",
+        "content": "<tool_response>\n[observation 1的内容]\n</tool_response>"
+    },
+]
+```
+**关键点：**
+- 历史信息在system消息中（在<chat_history>标签内）
+- 历史信息包含：User query + Tool call（生成tool call时，不把observation放在chat_history中）
+- 使用已生成的function_call（动态变化的LLM结果）
+- system包含tools（在<chat_history>之后）
+- observation消息包含第一个observation（不在chat_history中）
+- **注意**：chat_history中已经包含了当前query，所以user消息为空（不添加user消息）
+---
+### 第3次调用：生成第1个Answer（第一个turn）
+**调用函数：** `build_messages_for_answer(conversations, answer_idx=0, generated_calls=[function_call 1, function_call 2], generated_answers=[], ...)`
+**输入messages：**
+```python
+[
+    {
+        "role": "system",
+        "content": "# Role\n...\n\n<chat_history>\nUser query: In the past month, have you received a lot of feedback about poor service attitude? I want to see which scenic spots the problem is specifically.\nTool call: [generated_calls[0]，即function_call 1]\nTool response: [observation 1的内容]\nTool call: [generated_calls[1]，即function_call 2]\nTool response: [observation 2的内容]\n</chat_history>"
+    },
+    {
+        "role": "observation",
+        "content": "<tool_response>\n[observation 2的内容]\n</tool_response>"
+    },
+]
+```
+**关键点：**
+- 历史信息在system消息中（在<chat_history>标签内）
+- 历史信息包含：User query + Tool call + Tool response（answer和tool call都要放到对话历史中）
+- system不包含tools
+- 当前observation在observation消息的<tool_response>标签内
+- **注意**：chat_history中已经包含了当前query，所以user消息为空（不添加user消息）
+---
+### 第4次调用：生成第3个Function Call（Turn 2）
+**调用函数：** `build_messages_for_function_call(conversations, call_idx=2, generated_calls=[function_call 1, function_call 2], generated_answers=[answer 1], ...)`
+**输入messages：**
+```python
+[
+    {
+        "role": "system",
+        "content": "# Role\n...\n\n<chat_history>\nUser query: In the past month, have you received a lot of feedback about poor service attitude? I want to see which scenic spots the problem is specifically.\nTool call: [generated_calls[0]，即function_call 1]\nTool call: [generated_calls[1]，即function_call 2]\nAssistant reply: [generated_answers[0]，即answer 1]\n</chat_history>\n\n<tools>\n[工具定义...]\n</tools>"
+    },
+    {
+        "role": "observation",
+        "content": "<tool_response>\n[observation 3的内容]\n</tool_response>"
+    },
+]
+```
+**关键点：**
+- 历史信息在system消息中（在<chat_history>标签内）
+- 历史信息包含：User query + Tool call + Assistant reply（生成tool call时，不把observation放在chat_history中）
+- 使用已生成的function_call和answer（动态变化的LLM结果）
+- system包含tools（在<chat_history>之后）
+- observation消息包含第三个observation（不在chat_history中）
+- user消息为空（因为chat_history中已经包含了当前query）
+---
+### 第5次调用：生成第2个Answer（Turn 2）
+**调用函数：** `build_messages_for_answer(conversations, answer_idx=1, generated_calls=[function_call 1, function_call 2, function_call 3], generated_answers=[answer 1], ...)`
+**输入messages：**
+```python
+[
+    {
+        "role": "system",
+        "content": "# Role\n...\n\n<chat_history>\nUser query: In the past month, have you received a lot of feedback about poor service attitude? I want to see which scenic spots the problem is specifically.\nTool call: [generated_calls[0]，即function_call 1]\nTool response: [observation 1的内容]\nTool call: [generated_calls[1]，即function_call 2]\nTool response: [observation 2的内容]\nAssistant reply: [generated_answers[0]，即answer 1]\nUser query: In the past month, have you received a lot of feedback about poor service attitude? I want to see which scenic spots the problem is specifically.\nTool call: [generated_calls[2]，即function_call 3]\nTool response: [observation 3的内容]\n</chat_history>"
+    },
+    {
+        "role": "observation",
+        "content": "<tool_response>\n[observation 3的内容]\n</tool_response>"
+    },
+]
+```
+**关键点：**
+- 历史信息在system消息中（在<chat_history>标签内）
+- 历史信息包含：所有User query + Tool call + Tool response + Assistant reply（answer和tool call都要放到对话历史中）
+- 使用已生成的function_call和answer（动态变化的LLM结果）
+- system不包含tools
+- 当前observation在observation消息的<tool_response>标签内
+- **注意**：chat_history中已经包含了当前query，所以user消息为空（不添加user消息）
+---
+## 关键差异总结
+### 1. 历史信息的位置
+- **训练时生成Function Call**：历史信息在system消息中（在<chat_history>标签内）
+- **训练时生成Answer**：历史信息在system消息中（在<chat_history>标签内）
+- **推理时生成Function Call**：历史信息在system消息中（在<chat_history>标签内）
+- **推理时生成Answer**：历史信息在system消息中（在<chat_history>标签内）
+### 2. 历史信息的格式
+- **训练时**：使用`User query:`, `Tool call:`, `Tool response:`, `Assistant reply:`前缀
+- **推理时**：使用相同的前缀格式
+- **重要**：生成answer时不包含Tool call，生成tool call时才包含Tool call
+### 3. Tools的包含
+- **生成Function Call时**：训练和推理都包含tools
+- **生成Answer时**：训练和推理都不包含tools
+### 4. 当前消息的格式
+- **训练时生成Function Call**：当前query通过format_user格式化，**没有前缀**（直接使用原始内容）
+- **训练时生成Answer**：当前observation在observation消息的<tool_response>标签内
+- **推理时生成Function Call**：当前query在user消息中，**没有前缀**（直接使用原始内容）
+- **推理时生成Answer**：当前observation在observation消息的<tool_response>标签内
+### 5. 动态内容的使用
+- **训练时**：使用训练数据中的固定内容
+- **推理时**：使用已生成的function_call和answer（动态变化的LLM结果）
+---
+## 一致性检查
+✅ **格式一致性**：
+- 历史信息都使用`<chat_history>`标签包装
+- 历史信息都使用相同的前缀格式
+- Tools的包含/排除逻辑一致
+✅ **位置一致性**：
+- 生成Function Call时，历史信息都在system消息中（在<chat_history>标签内）
+- 生成Answer时，历史信息都在system消息中（在<chat_history>标签内）
+✅ **格式一致性**：
+- 当前query都没有前缀（直接使用原始内容）
+- 历史信息中的query保留"User query:"前缀（在<chat_history>标签内）
+✅ **格式一致性**：
+- 训练和推理的格式完全一致（历史信息在system中，当前query没有前缀）
+---
+## 结论
+训练代码和推理代码的输入格式**完全一致**：
+1. ✅ 历史信息都在system消息中（在<chat_history>标签内）
+2. ✅ 当前query都没有前缀（直接使用原始内容）
+3. ✅ 历史信息中的query保留"User query:"前缀（在<chat_history>标签内）
+4. ✅ Tools的包含/排除逻辑一致（生成Function Call时包含，生成Answer时不包含）
+这些修改确保了训练和推理的格式完全一致，有助于模型更好地学习和应用。

data/dataset/12_15/GT_dataset.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a5197fd920b966ccae749906565e788fbb199494d78db00d25e14d86c7863555
+size 5732191453