| |
| |
| |
|
|
| import os |
| import subprocess |
| import shutil |
| from pathlib import Path |
| from huggingface_hub import HfApi, snapshot_download, create_repo |
| from peft import AutoPeftModelForCausalLM |
| from transformers import AutoTokenizer |
|
|
| |
| ADAPTER_REPO = "kingjux/ffmpeg-command-generator" |
| OUTPUT_REPO = "kingjux/ffmpeg-command-generator-gguf" |
| BASE_MODEL = "Qwen/Qwen2.5-0.5B-Instruct" |
| QUANTIZATIONS = ["Q4_K_M", "Q8_0"] |
|
|
| print("=" * 50) |
| print("GGUF Conversion for LM Studio") |
| print("=" * 50) |
|
|
| |
| print("\n[1/4] Loading adapter and merging with base model...") |
| model = AutoPeftModelForCausalLM.from_pretrained( |
| ADAPTER_REPO, |
| device_map="auto", |
| trust_remote_code=True, |
| ) |
| tokenizer = AutoTokenizer.from_pretrained(ADAPTER_REPO, trust_remote_code=True) |
|
|
| |
| print("Merging LoRA weights...") |
| merged_model = model.merge_and_unload() |
|
|
| |
| merged_path = Path("/tmp/merged_model") |
| merged_path.mkdir(exist_ok=True) |
| print(f"Saving merged model to {merged_path}...") |
| merged_model.save_pretrained(merged_path) |
| tokenizer.save_pretrained(merged_path) |
| print("Merged model saved!") |
|
|
| |
| print("\n[2/4] Setting up llama.cpp converter...") |
| llama_cpp_path = Path("/tmp/llama.cpp") |
| if not llama_cpp_path.exists(): |
| subprocess.run([ |
| "git", "clone", "--depth", "1", |
| "https://github.com/ggerganov/llama.cpp.git", |
| str(llama_cpp_path) |
| ], check=True) |
|
|
| |
| subprocess.run([ |
| "pip", "install", "-r", |
| str(llama_cpp_path / "requirements" / "requirements-convert_hf_to_gguf.txt") |
| ], check=True, capture_output=True) |
|
|
| |
| print("\n[3/4] Converting to GGUF format...") |
| gguf_output_dir = Path("/tmp/gguf_output") |
| gguf_output_dir.mkdir(exist_ok=True) |
|
|
| |
| f16_path = gguf_output_dir / "ffmpeg-command-generator-f16.gguf" |
| subprocess.run([ |
| "python", str(llama_cpp_path / "convert_hf_to_gguf.py"), |
| str(merged_path), |
| "--outfile", str(f16_path), |
| "--outtype", "f16" |
| ], check=True) |
| print(f"Created: {f16_path}") |
|
|
| |
| print("\nBuilding llama.cpp for quantization...") |
| subprocess.run(["make", "-C", str(llama_cpp_path), "llama-quantize"], check=True, capture_output=True) |
|
|
| |
| quantized_files = [] |
| for quant in QUANTIZATIONS: |
| quant_path = gguf_output_dir / f"ffmpeg-command-generator-{quant.lower()}.gguf" |
| print(f"Quantizing to {quant}...") |
| subprocess.run([ |
| str(llama_cpp_path / "llama-quantize"), |
| str(f16_path), |
| str(quant_path), |
| quant |
| ], check=True) |
| quantized_files.append(quant_path) |
| print(f"Created: {quant_path}") |
|
|
| |
| print("\n[4/4] Uploading to Hugging Face Hub...") |
| api = HfApi() |
|
|
| |
| create_repo(OUTPUT_REPO, repo_type="model", exist_ok=True) |
|
|
| |
| model_card = """--- |
| license: apache-2.0 |
| base_model: Qwen/Qwen2.5-0.5B-Instruct |
| tags: |
| - gguf |
| - ffmpeg |
| - command-generation |
| - lm-studio |
| - ollama |
| --- |
| |
| # FFMPEG Command Generator (GGUF) |
| |
| A fine-tuned model that generates FFMPEG commands from natural language descriptions with chain-of-thought reasoning. |
| |
| ## Usage |
| |
| ### LM Studio |
| ```bash |
| lms import kingjux/ffmpeg-command-generator-gguf |
| ``` |
| |
| ### Ollama |
| ```bash |
| ollama run hf.co/kingjux/ffmpeg-command-generator-gguf |
| ``` |
| |
| ## Example |
| |
| **Input:** "Convert video.mp4 to webm format" |
| |
| **Output:** |
| ``` |
| <think> |
| Task: Convert MP4 to WebM |
| - WebM container uses VP9 video codec and Opus audio |
| - Use -c:v libvpx-vp9 for video encoding |
| - Use -c:a libopus for audio encoding |
| </think> |
| |
| ffmpeg -i video.mp4 -c:v libvpx-vp9 -c:a libopus output.webm |
| ``` |
| |
| ## Files |
| |
| - `ffmpeg-command-generator-q4_k_m.gguf` - 4-bit quantized (smallest, fastest) |
| - `ffmpeg-command-generator-q8_0.gguf` - 8-bit quantized (better quality) |
| |
| ## Training |
| |
| Fine-tuned from Qwen2.5-0.5B-Instruct on 30 FFMPEG command examples with CoT reasoning. |
| """ |
|
|
| |
| card_path = gguf_output_dir / "README.md" |
| card_path.write_text(model_card) |
|
|
| |
| for file in [card_path] + quantized_files: |
| print(f"Uploading {file.name}...") |
| api.upload_file( |
| path_or_fileobj=str(file), |
| path_in_repo=file.name, |
| repo_id=OUTPUT_REPO, |
| repo_type="model" |
| ) |
|
|
| print("\n" + "=" * 50) |
| print("DONE!") |
| print(f"Model available at: https://huggingface.co/{OUTPUT_REPO}") |
| print("\nTo use in LM Studio:") |
| print(f" lms import {OUTPUT_REPO}") |
| print("=" * 50) |
|
|