ffmpeg-command-generator / convert_to_gguf.py

Upload convert_to_gguf.py with huggingface_hub

1ffa62b verified 4 months ago

4.7 kB

	# /// script
	# dependencies = ["torch", "transformers", "peft", "huggingface_hub", "sentencepiece", "protobuf", "gguf"]
	# ///

	import os
	import subprocess
	import shutil
	from pathlib import Path
	from huggingface_hub import HfApi, snapshot_download, create_repo
	from peft import AutoPeftModelForCausalLM
	from transformers import AutoTokenizer

	# Config
	ADAPTER_REPO = "kingjux/ffmpeg-command-generator"
	OUTPUT_REPO = "kingjux/ffmpeg-command-generator-gguf"
	BASE_MODEL = "Qwen/Qwen2.5-0.5B-Instruct"
	QUANTIZATIONS = ["Q4_K_M", "Q8_0"] # Good balance of size/quality

	print("=" * 50)
	print("GGUF Conversion for LM Studio")
	print("=" * 50)

	# Step 1: Load and merge LoRA with base model
	print("\n[1/4] Loading adapter and merging with base model...")
	model = AutoPeftModelForCausalLM.from_pretrained(
	ADAPTER_REPO,
	device_map="auto",
	trust_remote_code=True,
	)
	tokenizer = AutoTokenizer.from_pretrained(ADAPTER_REPO, trust_remote_code=True)

	# Merge LoRA weights into base model
	print("Merging LoRA weights...")
	merged_model = model.merge_and_unload()

	# Save merged model
	merged_path = Path("/tmp/merged_model")
	merged_path.mkdir(exist_ok=True)
	print(f"Saving merged model to {merged_path}...")
	merged_model.save_pretrained(merged_path)
	tokenizer.save_pretrained(merged_path)
	print("Merged model saved!")

	# Step 2: Clone llama.cpp for conversion
	print("\n[2/4] Setting up llama.cpp converter...")
	llama_cpp_path = Path("/tmp/llama.cpp")
	if not llama_cpp_path.exists():
	subprocess.run([
	"git", "clone", "--depth", "1",
	"https://github.com/ggerganov/llama.cpp.git",
	str(llama_cpp_path)
	], check=True)

	# Install conversion requirements
	subprocess.run([
	"pip", "install", "-r",
	str(llama_cpp_path / "requirements" / "requirements-convert_hf_to_gguf.txt")
	], check=True, capture_output=True)

	# Step 3: Convert to GGUF
	print("\n[3/4] Converting to GGUF format...")
	gguf_output_dir = Path("/tmp/gguf_output")
	gguf_output_dir.mkdir(exist_ok=True)

	# Convert to F16 GGUF first
	f16_path = gguf_output_dir / "ffmpeg-command-generator-f16.gguf"
	subprocess.run([
	"python", str(llama_cpp_path / "convert_hf_to_gguf.py"),
	str(merged_path),
	"--outfile", str(f16_path),
	"--outtype", "f16"
	], check=True)
	print(f"Created: {f16_path}")

	# Build llama.cpp for quantization
	print("\nBuilding llama.cpp for quantization...")
	subprocess.run(["make", "-C", str(llama_cpp_path), "llama-quantize"], check=True, capture_output=True)

	# Quantize to different formats
	quantized_files = []
	for quant in QUANTIZATIONS:
	quant_path = gguf_output_dir / f"ffmpeg-command-generator-{quant.lower()}.gguf"
	print(f"Quantizing to {quant}...")
	subprocess.run([
	str(llama_cpp_path / "llama-quantize"),
	str(f16_path),
	str(quant_path),
	quant
	], check=True)
	quantized_files.append(quant_path)
	print(f"Created: {quant_path}")

	# Step 4: Upload to Hub
	print("\n[4/4] Uploading to Hugging Face Hub...")
	api = HfApi()

	# Create repo
	create_repo(OUTPUT_REPO, repo_type="model", exist_ok=True)

	# Create model card
	model_card = """---
	license: apache-2.0
	base_model: Qwen/Qwen2.5-0.5B-Instruct
	tags:
	- gguf
	- ffmpeg
	- command-generation
	- lm-studio
	- ollama
	---

	# FFMPEG Command Generator (GGUF)

	A fine-tuned model that generates FFMPEG commands from natural language descriptions with chain-of-thought reasoning.

	## Usage

	### LM Studio
	```bash
	lms import kingjux/ffmpeg-command-generator-gguf
	```

	### Ollama
	```bash
	ollama run hf.co/kingjux/ffmpeg-command-generator-gguf
	```

	## Example

	Input: "Convert video.mp4 to webm format"

	Output:
	```
	<think>
	Task: Convert MP4 to WebM
	- WebM container uses VP9 video codec and Opus audio
	- Use -c:v libvpx-vp9 for video encoding
	- Use -c:a libopus for audio encoding
	</think>

	ffmpeg -i video.mp4 -c:v libvpx-vp9 -c:a libopus output.webm
	```

	## Files

	- `ffmpeg-command-generator-q4_k_m.gguf` - 4-bit quantized (smallest, fastest)
	- `ffmpeg-command-generator-q8_0.gguf` - 8-bit quantized (better quality)

	## Training

	Fine-tuned from Qwen2.5-0.5B-Instruct on 30 FFMPEG command examples with CoT reasoning.
	"""

	# Save and upload model card
	card_path = gguf_output_dir / "README.md"
	card_path.write_text(model_card)

	# Upload all files
	for file in [card_path] + quantized_files:
	print(f"Uploading {file.name}...")
	api.upload_file(
	path_or_fileobj=str(file),
	path_in_repo=file.name,
	repo_id=OUTPUT_REPO,
	repo_type="model"
	)

	print("\n" + "=" * 50)
	print("DONE!")
	print(f"Model available at: https://huggingface.co/{OUTPUT_REPO}")
	print("\nTo use in LM Studio:")
	print(f" lms import {OUTPUT_REPO}")
	print("=" * 50)