sandbox-5ca717e4

Sleeping

App Files Files Community

Justin-lee commited on 16 days ago

Commit

b65d866

verified ·

1 Parent(s): 1211240

Add model export script for Ollama/LM Studio

Browse files

Files changed (1) hide show

export_model.py +350 -0

export_model.py ADDED Viewed

	@@ -0,0 +1,350 @@

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+CodePilot Model Export — 把訓練好的模型匯出給 Ollama / LM Studio
+================================================================
+Usage:
+    # Step 1+2+3 一鍵完成
+    python export_model.py --adapter ~/.codepilot/adapter_20260423 --output ./my-model
+    # 只合併（產生完整模型）
+    python export_model.py --adapter ~/.codepilot/adapter_20260423 --output ./merged --merge-only
+    # 只轉 GGUF（已有合併模型）
+    python export_model.py --merged-model ./merged --output ./my-model --quantize q4_k_m
+    # 自動註冊到 Ollama
+    python export_model.py --adapter ~/.codepilot/adapter_20260423 --output ./my-model --ollama
+    # 上傳到 HuggingFace Hub
+    python export_model.py --adapter ~/.codepilot/adapter_20260423 --output ./my-model --push-to-hub USERNAME/my-model
+"""
+import argparse, os, sys, subprocess, shutil
+from pathlib import Path
+DEFAULT_BASE_MODEL = "Qwen/Qwen2.5-Coder-3B-Instruct"
+def step1_merge_adapter(base_model, adapter_path, output_dir):
+    """Step 1: 合併 LoRA adapter 到基礎模型"""
+    print(f"\n{'='*60}")
+    print(f"  Step 1: 合併 LoRA Adapter")
+    print(f"  Base:    {base_model}")
+    print(f"  Adapter: {adapter_path}")
+    print(f"  Output:  {output_dir}")
+    print(f"{'='*60}\n")
+    import torch
+    from transformers import AutoModelForCausalLM, AutoTokenizer
+    from peft import PeftModel
+    print("📥 載入基礎模型...")
+    tokenizer = AutoTokenizer.from_pretrained(base_model)
+    model = AutoModelForCausalLM.from_pretrained(
+        base_model, torch_dtype=torch.float16,
+        device_map="cpu",  # 合併用 CPU，省 GPU 記憶體
+        trust_remote_code=True,
+    )
+    print("📥 載入 LoRA adapter...")
+    model = PeftModel.from_pretrained(model, adapter_path)
+    print("🔄 合併權重...")
+    model = model.merge_and_unload()
+    print(f"💾 保存到 {output_dir}...")
+    os.makedirs(output_dir, exist_ok=True)
+    model.save_pretrained(output_dir, safe_serialization=True)
+    tokenizer.save_pretrained(output_dir)
+    print(f"✅ 合併完成: {output_dir}")
+    # 顯示大小
+    total_size = sum(f.stat().st_size for f in Path(output_dir).rglob("*") if f.is_file())
+    print(f"   大小: {total_size / 1024**3:.1f} GB")
+    return output_dir
+def step2_convert_gguf(merged_dir, output_dir, quantize="q4_k_m"):
+    """Step 2: 轉換成 GGUF 格式"""
+    print(f"\n{'='*60}")
+    print(f"  Step 2: 轉換 GGUF")
+    print(f"  Input:    {merged_dir}")
+    print(f"  Output:   {output_dir}")
+    print(f"  Quantize: {quantize}")
+    print(f"{'='*60}\n")
+    # 檢查 llama.cpp 是否已安裝
+    convert_script = shutil.which("convert_hf_to_gguf.py")
+    quantize_bin = shutil.which("llama-quantize")
+    if not convert_script:
+        # 嘗試找 llama.cpp 目錄
+        llama_cpp_paths = [
+            os.path.expanduser("~/llama.cpp"),
+            "/opt/llama.cpp",
+            os.path.expanduser("~/.local/share/llama.cpp"),
+        ]
+        for p in llama_cpp_paths:
+            if os.path.exists(os.path.join(p, "convert_hf_to_gguf.py")):
+                convert_script = os.path.join(p, "convert_hf_to_gguf.py")
+                quantize_bin = os.path.join(p, "build", "bin", "llama-quantize")
+                break
+    if not convert_script:
+        print("⚠️  llama.cpp 未安裝。安裝方式：")
+        print()
+        print("  # 方式 1: pip（最簡單）")
+        print("  pip install llama-cpp-python")
+        print()
+        print("  # 方式 2: 從原始碼編譯（完整功能）")
+        print("  git clone https://github.com/ggml-org/llama.cpp")
+        print("  cd llama.cpp && make -j")
+        print()
+        print("  # 方式 3: 用 Hugging Face 的轉換工具")
+        print("  pip install transformers[gguf]")
+        print()
+        # 嘗試用 transformers 的 GGUF 導出
+        print("🔄 嘗試用 transformers 導出 GGUF...")
+        try:
+            from transformers import AutoModelForCausalLM, AutoTokenizer
+            os.makedirs(output_dir, exist_ok=True)
+            gguf_path = os.path.join(output_dir, "model.gguf")
+            tokenizer = AutoTokenizer.from_pretrained(merged_dir)
+            model = AutoModelForCausalLM.from_pretrained(merged_dir, torch_dtype="auto")
+            model.save_pretrained(output_dir, safe_serialization=False)
+            # 用 convert script from transformers
+            convert_cmd = [
+                sys.executable, "-c",
+                f"from transformers.convert_slow_tokenizer import convert_gguf; "
+                f"convert_gguf('{merged_dir}', '{gguf_path}')"
+            ]
+            result = subprocess.run(convert_cmd, capture_output=True, text=True)
+            if result.returncode == 0:
+                print(f"✅ GGUF 轉換完成: {gguf_path}")
+                return gguf_path
+        except Exception as e:
+            pass
+        print()
+        print("❌ 自動轉換失敗。請手動安裝 llama.cpp 後重試。")
+        print(f"   或者直接用合併後的模型: {merged_dir}")
+        return None
+    # 用 llama.cpp 轉換
+    os.makedirs(output_dir, exist_ok=True)
+    fp16_gguf = os.path.join(output_dir, "model-fp16.gguf")
+    quant_gguf = os.path.join(output_dir, f"model-{quantize}.gguf")
+    # Step 2a: HF → GGUF (fp16)
+    print("🔄 轉換 HF → GGUF (fp16)...")
+    result = subprocess.run(
+        [sys.executable, convert_script, merged_dir,
+         "--outfile", fp16_gguf, "--outtype", "f16"],
+        capture_output=True, text=True)
+    if result.returncode != 0:
+        print(f"❌ 轉換失敗:\n{result.stderr[:500]}")
+        return None
+    # Step 2b: 量化
+    if quantize and quantize != "f16" and quantize_bin and os.path.exists(quantize_bin):
+        print(f"🔄 量化 → {quantize}...")
+        result = subprocess.run(
+            [quantize_bin, fp16_gguf, quant_gguf, quantize.upper()],
+            capture_output=True, text=True)
+        if result.returncode == 0:
+            # 刪除 fp16 版本節省空間
+            os.remove(fp16_gguf)
+            gguf_path = quant_gguf
+        else:
+            print(f"⚠️  量化失敗，使用 fp16 版本")
+            gguf_path = fp16_gguf
+    else:
+        gguf_path = fp16_gguf
+    size = os.path.getsize(gguf_path) / 1024**3
+    print(f"✅ GGUF 完成: {gguf_path} ({size:.1f} GB)")
+    return gguf_path
+def step3_register_ollama(gguf_path, model_name="codepilot"):
+    """Step 3: 註冊到 Ollama"""
+    print(f"\n{'='*60}")
+    print(f"  Step 3: 註冊到 Ollama")
+    print(f"{'='*60}\n")
+    if not shutil.which("ollama"):
+        print("❌ Ollama 未安裝")
+        print("   安裝: curl -fsSL https://ollama.ai/install.sh | sh")
+        return
+    # 建立 Modelfile
+    modelfile_path = os.path.join(os.path.dirname(gguf_path), "Modelfile")
+    modelfile_content = f"""FROM {os.path.abspath(gguf_path)}
+TEMPLATE \"\"\"{{{{- if .System }}}}<|im_start|>system
+{{{{ .System }}}}<|im_end|>
+{{{{- end }}}}
+<|im_start|>user
+{{{{ .Prompt }}}}<|im_end|>
+<|im_start|>assistant
+\"\"\"
+PARAMETER stop "<|im_end|>"
+PARAMETER stop "<|endoftext|>"
+PARAMETER temperature 0.7
+PARAMETER top_p 0.9
+PARAMETER num_ctx 4096
+SYSTEM \"\"\"You are CodePilot, an expert AI programming assistant.
+Write clean, efficient, well-documented code.\"\"\"
+"""
+    Path(modelfile_path).write_text(modelfile_content)
+    print(f"📝 Modelfile 已建立: {modelfile_path}")
+    # 註冊到 Ollama
+    print(f"🔄 ollama create {model_name}...")
+    result = subprocess.run(
+        ["ollama", "create", model_name, "-f", modelfile_path],
+        capture_output=True, text=True)
+    if result.returncode == 0:
+        print(f"\n✅ 已註冊到 Ollama!")
+        print(f"\n   使用方式:")
+        print(f"   ollama run {model_name}")
+        print(f"   ollama run {model_name} '寫一個快速排序'")
+        print(f"\n   在 CodePilot 中使用:")
+        print(f"   python codepilot_v4.py --provider ollama --cloud-model {model_name}")
+    else:
+        print(f"❌ 註冊失敗:\n{result.stderr[:300]}")
+        print(f"\n   手動註冊:")
+        print(f"   ollama create {model_name} -f {modelfile_path}")
+def step4_push_to_hub(merged_dir, repo_id):
+    """（可選）上傳到 HuggingFace Hub"""
+    print(f"\n{'='*60}")
+    print(f"  Step 4: 上傳到 HuggingFace Hub")
+    print(f"  Repo: {repo_id}")
+    print(f"{'='*60}\n")
+    from huggingface_hub import HfApi
+    api = HfApi()
+    print("📤 上傳中...")
+    api.upload_folder(
+        folder_path=merged_dir,
+        repo_id=repo_id,
+        repo_type="model",
+        commit_message="CodePilot fine-tuned model",
+    )
+    print(f"✅ 已上傳: https://huggingface.co/{repo_id}")
+def step_lmstudio(gguf_path):
+    """顯示 LM Studio 使用說明"""
+    print(f"\n{'='*60}")
+    print(f"  LM Studio 使用方式")
+    print(f"{'='*60}\n")
+    print(f"  1. 打開 LM Studio")
+    print(f"  2. 左側選「My Models」")
+    print(f"  3. 點「Import Model」")
+    print(f"  4. 選擇: {os.path.abspath(gguf_path)}")
+    print(f"  5. 載入後就可以在 LM Studio 中使用")
+    print(f"\n  或者把 GGUF 文件複製到 LM Studio 的模型目錄:")
+    # LM Studio 預設路徑
+    import platform
+    if platform.system() == "Windows":
+        lm_dir = os.path.expanduser("~/.cache/lm-studio/models")
+    elif platform.system() == "Darwin":
+        lm_dir = os.path.expanduser("~/.cache/lm-studio/models")
+    else:
+        lm_dir = os.path.expanduser("~/.cache/lm-studio/models")
+    dest = os.path.join(lm_dir, "codepilot")
+    print(f"  mkdir -p {dest}")
+    print(f"  cp {os.path.abspath(gguf_path)} {dest}/")
+def main():
+    parser = argparse.ArgumentParser(description="匯出模型給 Ollama / LM Studio")
+    parser.add_argument("--base-model", default=DEFAULT_BASE_MODEL, help="基礎模型")
+    parser.add_argument("--adapter", help="LoRA adapter 路徑")
+    parser.add_argument("--merged-model", help="已合併的模型路徑（跳過 Step 1）")
+    parser.add_argument("--output", default="./exported_model", help="輸出目錄")
+    parser.add_argument("--quantize", default="q4_k_m",
+        choices=["f16", "q8_0", "q6_k", "q5_k_m", "q4_k_m", "q4_0", "q3_k_m", "q2_k"],
+        help="量化等級 (預設: q4_k_m)")
+    parser.add_argument("--ollama", action="store_true", help="自動註冊到 Ollama")
+    parser.add_argument("--ollama-name", default="codepilot", help="Ollama 模型名稱")
+    parser.add_argument("--merge-only", action="store_true", help="只合併，不轉 GGUF")
+    parser.add_argument("--push-to-hub", help="上傳到 HF Hub (格式: username/model-name)")
+    args = parser.parse_args()
+    print("""
+    ╔════════════════════════════════════════════════════════════╗
+    ║  CodePilot Model Export                                    ║
+    ║  LoRA → 合併 → GGUF → Ollama / LM Studio                 ║
+    ╚════════════════════════════════════════════════════════════╝
+    """)
+    merged_dir = args.merged_model
+    gguf_path = None
+    # Step 1: 合併
+    if not merged_dir:
+        if not args.adapter:
+            print("❌ 請指定 --adapter 或 --merged-model")
+            sys.exit(1)
+        merged_dir = os.path.join(args.output, "merged")
+        step1_merge_adapter(args.base_model, args.adapter, merged_dir)
+    if args.merge_only:
+        print(f"\n✅ 合併完成: {merged_dir}")
+        return
+    # Step 2: GGUF
+    gguf_dir = os.path.join(args.output, "gguf")
+    gguf_path = step2_convert_gguf(merged_dir, gguf_dir, args.quantize)
+    # Step 3: Ollama
+    if args.ollama and gguf_path:
+        step3_register_ollama(gguf_path, args.ollama_name)
+    # LM Studio 說明
+    if gguf_path:
+        step_lmstudio(gguf_path)
+    # 上傳
+    if args.push_to_hub:
+        step4_push_to_hub(merged_dir, args.push_to_hub)
+    print(f"\n{'='*60}")
+    print(f"  🎉 匯出完成！")
+    print(f"{'='*60}")
+    print(f"  合併模型: {merged_dir}")
+    if gguf_path: print(f"  GGUF:     {gguf_path}")
+    print()
+    print(f"  量化選項說明:")
+    print(f"    f16     — 最高品質，最大 (~6GB)")
+    print(f"    q8_0    — 幾乎無損 (~3.5GB)")
+    print(f"    q6_k    — 高品質 (~2.8GB)")
+    print(f"    q5_k_m  — 好的平衡 (~2.4GB)")
+    print(f"    q4_k_m  — 推薦預設 (~2.0GB)  ← 品質/大小最佳平衡")
+    print(f"    q4_0    — 較小 (~1.8GB)")
+    print(f"    q3_k_m  — 很小 (~1.5GB)")
+    print(f"    q2_k    — 最小，品質有損 (~1.2GB)")
+if __name__ == "__main__":
+    main()