| #!/bin/bash |
|
|
| |
| |
|
|
| set -e |
|
|
| |
| RED='\033[0;31m' |
| GREEN='\033[0;32m' |
| YELLOW='\033[1;33m' |
| BLUE='\033[0;34m' |
| NC='\033[0m' |
|
|
| echo -e "${BLUE}π Converting Model to GGUF Format${NC}" |
| echo "=====================================" |
|
|
| |
| MODEL_DIR="./fine_tuned_model" |
| OUTPUT_FILE="my_custom_model.gguf" |
| LLAMA_CPP_DIR="./llama.cpp" |
|
|
| |
| if [ ! -d "$MODEL_DIR" ]; then |
| echo -e "${RED}β Fine-tuned model not found at: $MODEL_DIR${NC}" |
| echo "Run fine-tuning first: python create_custom_model.py (option 2)" |
| exit 1 |
| fi |
|
|
| echo -e "${GREEN}β
Found fine-tuned model at: $MODEL_DIR${NC}" |
|
|
| |
| if [ ! -d "$LLAMA_CPP_DIR" ]; then |
| echo -e "${YELLOW}π₯ Cloning llama.cpp...${NC}" |
| git clone https://github.com/ggerganov/llama.cpp.git |
|
|
| echo -e "${YELLOW}π¨ Building llama.cpp...${NC}" |
| cd llama.cpp |
|
|
| |
| if command -v nvcc &> /dev/null; then |
| echo -e "${GREEN}π NVIDIA CUDA detected, building with GPU support${NC}" |
| make LLAMA_CUBLAS=1 -j$(nproc) |
| else |
| echo -e "${YELLOW}β οΈ No CUDA detected, building CPU-only version${NC}" |
| make -j$(nproc) |
| fi |
|
|
| cd .. |
| else |
| echo -e "${GREEN}β
llama.cpp already exists${NC}" |
| fi |
|
|
| |
| echo -e "${BLUE}π¦ Checking Python dependencies...${NC}" |
| python3 -c "import torch, transformers, sentencepiece" 2>/dev/null || { |
| echo -e "${YELLOW}β οΈ Installing missing dependencies...${NC}" |
| pip install torch transformers sentencepiece protobuf |
| } |
|
|
| |
| echo -e "${BLUE}π Converting to GGUF format...${NC}" |
| echo "This may take several minutes..." |
|
|
| |
| if [ -f "$LLAMA_CPP_DIR/convert.py" ]; then |
| echo -e "${GREEN}Using convert.py${NC}" |
| python3 "$LLAMA_CPP_DIR/convert.py" \ |
| "$MODEL_DIR" \ |
| --outtype f16 \ |
| --outfile "$OUTPUT_FILE" |
| else |
| |
| echo -e "${YELLOW}Using alternative conversion method${NC}" |
| python3 -c " |
| import torch |
| from transformers import AutoModelForCausalLM, AutoTokenizer |
| import sys |
| import os |
| |
| print('Loading model...') |
| model = AutoModelForCausalLM.from_pretrained('$MODEL_DIR', torch_dtype=torch.float16) |
| tokenizer = AutoTokenizer.from_pretrained('$MODEL_DIR') |
| |
| print('Saving in HF format...') |
| model.save_pretrained('./temp_hf_model', safe_serialization=True) |
| tokenizer.save_pretrained('./temp_hf_model') |
| print('Conversion to HF format complete') |
| " |
|
|
| |
| if [ -d "./temp_hf_model" ]; then |
| python3 "$LLAMA_CPP_DIR/convert.py" \ |
| "./temp_hf_model" \ |
| --outtype f16 \ |
| --outfile "$OUTPUT_FILE" |
| rm -rf ./temp_hf_model |
| fi |
| fi |
|
|
| |
| if [ -f "$OUTPUT_FILE" ]; then |
| FILE_SIZE=$(du -h "$OUTPUT_FILE" | cut -f1) |
| echo |
| echo -e "${GREEN}π Conversion successful!${NC}" |
| echo -e "${BLUE}π Output file: $OUTPUT_FILE${NC}" |
| echo -e "${BLUE}π File size: $FILE_SIZE${NC}" |
|
|
| |
| echo |
| echo -e "${YELLOW}π‘ Optional: Create quantized versions?${NC}" |
| read -p "Create Q4_K_M quantized version? (y/N): " -n 1 -r |
| echo |
| if [[ $REPLY =~ ^[Yy]$ ]]; then |
| echo -e "${BLUE}π Creating Q4_K_M quantized version...${NC}" |
| "$LLAMA_CPP_DIR/quantize" "$OUTPUT_FILE" "${OUTPUT_FILE%.gguf}_q4_k_m.gguf" Q4_K_M |
|
|
| if [ -f "${OUTPUT_FILE%.gguf}_q4_k_m.gguf" ]; then |
| QUANT_SIZE=$(du -h "${OUTPUT_FILE%.gguf}_q4_k_m.gguf" | cut -f1) |
| echo -e "${GREEN}β
Quantized version created: ${OUTPUT_FILE%.gguf}_q4_k_m.gguf ($QUANT_SIZE)${NC}" |
| fi |
| fi |
|
|
| |
| echo |
| echo -e "${YELLOW}π§ͺ Test the converted model?${NC}" |
| read -p "Run a quick test? (y/N): " -n 1 -r |
| echo |
| if [[ $REPLY =~ ^[Yy]$ ]]; then |
| echo -e "${BLUE}π§ͺ Testing model...${NC}" |
| echo "Prompt: 'Hello, how are you?'" |
| echo "Response:" |
| "$LLAMA_CPP_DIR/main" -m "$OUTPUT_FILE" -p "Hello, how are you?" -n 50 --temp 0.7 |
| fi |
|
|
| else |
| echo -e "${RED}β Conversion failed!${NC}" |
| echo "Check the error messages above." |
| exit 1 |
| fi |
|
|
| |
| echo |
| echo -e "${GREEN}π― Next Steps:${NC}" |
| echo "1. Create Ollama Modelfile:" |
| echo " python create_custom_model.py # option 4" |
| echo |
| echo "2. Import to Ollama:" |
| echo " ollama create my-custom-model -f Modelfile" |
| echo |
| echo "3. Test in Ollama:" |
| echo " ollama run my-custom-model \"Hello!\"" |
| echo |
| echo "4. Push to Ollama Library:" |
| echo " ollama push my-custom-model" |
| echo |
| echo -e "${BLUE}π Files created:${NC}" |
| echo " β’ $OUTPUT_FILE (F16 version)" |
| if [ -f "${OUTPUT_FILE%.gguf}_q4_k_m.gguf" ]; then |
| echo " β’ ${OUTPUT_FILE%.gguf}_q4_k_m.gguf (Quantized version)" |
| fi |
|
|
| echo |
| echo -e "${GREEN}π GGUF conversion completed successfully!${NC}" |