Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| # -*- coding: utf-8 -*- | |
| """ | |
| CodePilot Model Export — 把訓練好的模型匯出給 Ollama / LM Studio | |
| ================================================================ | |
| Usage: | |
| # Step 1+2+3 一鍵完成 | |
| python export_model.py --adapter ~/.codepilot/adapter_20260423 --output ./my-model | |
| # 只合併(產生完整模型) | |
| python export_model.py --adapter ~/.codepilot/adapter_20260423 --output ./merged --merge-only | |
| # 只轉 GGUF(已有合併模型) | |
| python export_model.py --merged-model ./merged --output ./my-model --quantize q4_k_m | |
| # 自動註冊到 Ollama | |
| python export_model.py --adapter ~/.codepilot/adapter_20260423 --output ./my-model --ollama | |
| # 上傳到 HuggingFace Hub | |
| python export_model.py --adapter ~/.codepilot/adapter_20260423 --output ./my-model --push-to-hub USERNAME/my-model | |
| """ | |
| import argparse, os, sys, subprocess, shutil | |
| from pathlib import Path | |
| DEFAULT_BASE_MODEL = "Qwen/Qwen2.5-Coder-3B-Instruct" | |
| def step1_merge_adapter(base_model, adapter_path, output_dir): | |
| """Step 1: 合併 LoRA adapter 到基礎模型""" | |
| print(f"\n{'='*60}") | |
| print(f" Step 1: 合併 LoRA Adapter") | |
| print(f" Base: {base_model}") | |
| print(f" Adapter: {adapter_path}") | |
| print(f" Output: {output_dir}") | |
| print(f"{'='*60}\n") | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from peft import PeftModel | |
| print("📥 載入基礎模型...") | |
| tokenizer = AutoTokenizer.from_pretrained(base_model) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| base_model, torch_dtype=torch.float16, | |
| device_map="cpu", # 合併用 CPU,省 GPU 記憶體 | |
| trust_remote_code=True, | |
| ) | |
| print("📥 載入 LoRA adapter...") | |
| model = PeftModel.from_pretrained(model, adapter_path) | |
| print("🔄 合併權重...") | |
| model = model.merge_and_unload() | |
| print(f"💾 保存到 {output_dir}...") | |
| os.makedirs(output_dir, exist_ok=True) | |
| model.save_pretrained(output_dir, safe_serialization=True) | |
| tokenizer.save_pretrained(output_dir) | |
| print(f"✅ 合併完成: {output_dir}") | |
| # 顯示大小 | |
| total_size = sum(f.stat().st_size for f in Path(output_dir).rglob("*") if f.is_file()) | |
| print(f" 大小: {total_size / 1024**3:.1f} GB") | |
| return output_dir | |
| def step2_convert_gguf(merged_dir, output_dir, quantize="q4_k_m"): | |
| """Step 2: 轉換成 GGUF 格式""" | |
| print(f"\n{'='*60}") | |
| print(f" Step 2: 轉換 GGUF") | |
| print(f" Input: {merged_dir}") | |
| print(f" Output: {output_dir}") | |
| print(f" Quantize: {quantize}") | |
| print(f"{'='*60}\n") | |
| # 檢查 llama.cpp 是否已安裝 | |
| convert_script = shutil.which("convert_hf_to_gguf.py") | |
| quantize_bin = shutil.which("llama-quantize") | |
| if not convert_script: | |
| # 嘗試找 llama.cpp 目錄 | |
| llama_cpp_paths = [ | |
| os.path.expanduser("~/llama.cpp"), | |
| "/opt/llama.cpp", | |
| os.path.expanduser("~/.local/share/llama.cpp"), | |
| ] | |
| for p in llama_cpp_paths: | |
| if os.path.exists(os.path.join(p, "convert_hf_to_gguf.py")): | |
| convert_script = os.path.join(p, "convert_hf_to_gguf.py") | |
| quantize_bin = os.path.join(p, "build", "bin", "llama-quantize") | |
| break | |
| if not convert_script: | |
| print("⚠️ llama.cpp 未安裝。安裝方式:") | |
| print() | |
| print(" # 方式 1: pip(最簡單)") | |
| print(" pip install llama-cpp-python") | |
| print() | |
| print(" # 方式 2: 從原始碼編譯(完整功能)") | |
| print(" git clone https://github.com/ggml-org/llama.cpp") | |
| print(" cd llama.cpp && make -j") | |
| print() | |
| print(" # 方式 3: 用 Hugging Face 的轉換工具") | |
| print(" pip install transformers[gguf]") | |
| print() | |
| # 嘗試用 transformers 的 GGUF 導出 | |
| print("🔄 嘗試用 transformers 導出 GGUF...") | |
| try: | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| os.makedirs(output_dir, exist_ok=True) | |
| gguf_path = os.path.join(output_dir, "model.gguf") | |
| tokenizer = AutoTokenizer.from_pretrained(merged_dir) | |
| model = AutoModelForCausalLM.from_pretrained(merged_dir, torch_dtype="auto") | |
| model.save_pretrained(output_dir, safe_serialization=False) | |
| # 用 convert script from transformers | |
| convert_cmd = [ | |
| sys.executable, "-c", | |
| f"from transformers.convert_slow_tokenizer import convert_gguf; " | |
| f"convert_gguf('{merged_dir}', '{gguf_path}')" | |
| ] | |
| result = subprocess.run(convert_cmd, capture_output=True, text=True) | |
| if result.returncode == 0: | |
| print(f"✅ GGUF 轉換完成: {gguf_path}") | |
| return gguf_path | |
| except Exception as e: | |
| pass | |
| print() | |
| print("❌ 自動轉換失敗。請手動安裝 llama.cpp 後重試。") | |
| print(f" 或者直接用合併後的模型: {merged_dir}") | |
| return None | |
| # 用 llama.cpp 轉換 | |
| os.makedirs(output_dir, exist_ok=True) | |
| fp16_gguf = os.path.join(output_dir, "model-fp16.gguf") | |
| quant_gguf = os.path.join(output_dir, f"model-{quantize}.gguf") | |
| # Step 2a: HF → GGUF (fp16) | |
| print("🔄 轉換 HF → GGUF (fp16)...") | |
| result = subprocess.run( | |
| [sys.executable, convert_script, merged_dir, | |
| "--outfile", fp16_gguf, "--outtype", "f16"], | |
| capture_output=True, text=True) | |
| if result.returncode != 0: | |
| print(f"❌ 轉換失敗:\n{result.stderr[:500]}") | |
| return None | |
| # Step 2b: 量化 | |
| if quantize and quantize != "f16" and quantize_bin and os.path.exists(quantize_bin): | |
| print(f"🔄 量化 → {quantize}...") | |
| result = subprocess.run( | |
| [quantize_bin, fp16_gguf, quant_gguf, quantize.upper()], | |
| capture_output=True, text=True) | |
| if result.returncode == 0: | |
| # 刪除 fp16 版本節省空間 | |
| os.remove(fp16_gguf) | |
| gguf_path = quant_gguf | |
| else: | |
| print(f"⚠️ 量化失敗,使用 fp16 版本") | |
| gguf_path = fp16_gguf | |
| else: | |
| gguf_path = fp16_gguf | |
| size = os.path.getsize(gguf_path) / 1024**3 | |
| print(f"✅ GGUF 完成: {gguf_path} ({size:.1f} GB)") | |
| return gguf_path | |
| def step3_register_ollama(gguf_path, model_name="codepilot"): | |
| """Step 3: 註冊到 Ollama""" | |
| print(f"\n{'='*60}") | |
| print(f" Step 3: 註冊到 Ollama") | |
| print(f"{'='*60}\n") | |
| if not shutil.which("ollama"): | |
| print("❌ Ollama 未安裝") | |
| print(" 安裝: curl -fsSL https://ollama.ai/install.sh | sh") | |
| return | |
| # 建立 Modelfile | |
| modelfile_path = os.path.join(os.path.dirname(gguf_path), "Modelfile") | |
| modelfile_content = f"""FROM {os.path.abspath(gguf_path)} | |
| TEMPLATE \"\"\"{{{{- if .System }}}}<|im_start|>system | |
| {{{{ .System }}}}<|im_end|> | |
| {{{{- end }}}} | |
| <|im_start|>user | |
| {{{{ .Prompt }}}}<|im_end|> | |
| <|im_start|>assistant | |
| \"\"\" | |
| PARAMETER stop "<|im_end|>" | |
| PARAMETER stop "<|endoftext|>" | |
| PARAMETER temperature 0.7 | |
| PARAMETER top_p 0.9 | |
| PARAMETER num_ctx 4096 | |
| SYSTEM \"\"\"You are CodePilot, an expert AI programming assistant. | |
| Write clean, efficient, well-documented code.\"\"\" | |
| """ | |
| Path(modelfile_path).write_text(modelfile_content) | |
| print(f"📝 Modelfile 已建立: {modelfile_path}") | |
| # 註冊到 Ollama | |
| print(f"🔄 ollama create {model_name}...") | |
| result = subprocess.run( | |
| ["ollama", "create", model_name, "-f", modelfile_path], | |
| capture_output=True, text=True) | |
| if result.returncode == 0: | |
| print(f"\n✅ 已註冊到 Ollama!") | |
| print(f"\n 使用方式:") | |
| print(f" ollama run {model_name}") | |
| print(f" ollama run {model_name} '寫一個快速排序'") | |
| print(f"\n 在 CodePilot 中使用:") | |
| print(f" python codepilot_v4.py --provider ollama --cloud-model {model_name}") | |
| else: | |
| print(f"❌ 註冊失敗:\n{result.stderr[:300]}") | |
| print(f"\n 手動註冊:") | |
| print(f" ollama create {model_name} -f {modelfile_path}") | |
| def step4_push_to_hub(merged_dir, repo_id): | |
| """(可選)上傳到 HuggingFace Hub""" | |
| print(f"\n{'='*60}") | |
| print(f" Step 4: 上傳到 HuggingFace Hub") | |
| print(f" Repo: {repo_id}") | |
| print(f"{'='*60}\n") | |
| from huggingface_hub import HfApi | |
| api = HfApi() | |
| print("📤 上傳中...") | |
| api.upload_folder( | |
| folder_path=merged_dir, | |
| repo_id=repo_id, | |
| repo_type="model", | |
| commit_message="CodePilot fine-tuned model", | |
| ) | |
| print(f"✅ 已上傳: https://huggingface.co/{repo_id}") | |
| def step_lmstudio(gguf_path): | |
| """顯示 LM Studio 使用說明""" | |
| print(f"\n{'='*60}") | |
| print(f" LM Studio 使用方式") | |
| print(f"{'='*60}\n") | |
| print(f" 1. 打開 LM Studio") | |
| print(f" 2. 左側選「My Models」") | |
| print(f" 3. 點「Import Model」") | |
| print(f" 4. 選擇: {os.path.abspath(gguf_path)}") | |
| print(f" 5. 載入後就可以在 LM Studio 中使用") | |
| print(f"\n 或者把 GGUF 文件複製到 LM Studio 的模型目錄:") | |
| # LM Studio 預設路徑 | |
| import platform | |
| if platform.system() == "Windows": | |
| lm_dir = os.path.expanduser("~/.cache/lm-studio/models") | |
| elif platform.system() == "Darwin": | |
| lm_dir = os.path.expanduser("~/.cache/lm-studio/models") | |
| else: | |
| lm_dir = os.path.expanduser("~/.cache/lm-studio/models") | |
| dest = os.path.join(lm_dir, "codepilot") | |
| print(f" mkdir -p {dest}") | |
| print(f" cp {os.path.abspath(gguf_path)} {dest}/") | |
| def main(): | |
| parser = argparse.ArgumentParser(description="匯出模型給 Ollama / LM Studio") | |
| parser.add_argument("--base-model", default=DEFAULT_BASE_MODEL, help="基礎模型") | |
| parser.add_argument("--adapter", help="LoRA adapter 路徑") | |
| parser.add_argument("--merged-model", help="已合併的模型路徑(跳過 Step 1)") | |
| parser.add_argument("--output", default="./exported_model", help="輸出目錄") | |
| parser.add_argument("--quantize", default="q4_k_m", | |
| choices=["f16", "q8_0", "q6_k", "q5_k_m", "q4_k_m", "q4_0", "q3_k_m", "q2_k"], | |
| help="量化等級 (預設: q4_k_m)") | |
| parser.add_argument("--ollama", action="store_true", help="自動註冊到 Ollama") | |
| parser.add_argument("--ollama-name", default="codepilot", help="Ollama 模型名稱") | |
| parser.add_argument("--merge-only", action="store_true", help="只合併,不轉 GGUF") | |
| parser.add_argument("--push-to-hub", help="上傳到 HF Hub (格式: username/model-name)") | |
| args = parser.parse_args() | |
| print(""" | |
| ╔════════════════════════════════════════════════════════════╗ | |
| ║ CodePilot Model Export ║ | |
| ║ LoRA → 合併 → GGUF → Ollama / LM Studio ║ | |
| ╚════════════════════════════════════════════════════════════╝ | |
| """) | |
| merged_dir = args.merged_model | |
| gguf_path = None | |
| # Step 1: 合併 | |
| if not merged_dir: | |
| if not args.adapter: | |
| print("❌ 請指定 --adapter 或 --merged-model") | |
| sys.exit(1) | |
| merged_dir = os.path.join(args.output, "merged") | |
| step1_merge_adapter(args.base_model, args.adapter, merged_dir) | |
| if args.merge_only: | |
| print(f"\n✅ 合併完成: {merged_dir}") | |
| return | |
| # Step 2: GGUF | |
| gguf_dir = os.path.join(args.output, "gguf") | |
| gguf_path = step2_convert_gguf(merged_dir, gguf_dir, args.quantize) | |
| # Step 3: Ollama | |
| if args.ollama and gguf_path: | |
| step3_register_ollama(gguf_path, args.ollama_name) | |
| # LM Studio 說明 | |
| if gguf_path: | |
| step_lmstudio(gguf_path) | |
| # 上傳 | |
| if args.push_to_hub: | |
| step4_push_to_hub(merged_dir, args.push_to_hub) | |
| print(f"\n{'='*60}") | |
| print(f" 🎉 匯出完成!") | |
| print(f"{'='*60}") | |
| print(f" 合併模型: {merged_dir}") | |
| if gguf_path: print(f" GGUF: {gguf_path}") | |
| print() | |
| print(f" 量化選項說明:") | |
| print(f" f16 — 最高品質,最大 (~6GB)") | |
| print(f" q8_0 — 幾乎無損 (~3.5GB)") | |
| print(f" q6_k — 高品質 (~2.8GB)") | |
| print(f" q5_k_m — 好的平衡 (~2.4GB)") | |
| print(f" q4_k_m — 推薦預設 (~2.0GB) ← 品質/大小最佳平衡") | |
| print(f" q4_0 — 較小 (~1.8GB)") | |
| print(f" q3_k_m — 很小 (~1.5GB)") | |
| print(f" q2_k — 最小,品質有損 (~1.2GB)") | |
| if __name__ == "__main__": | |
| main() | |