Spaces:

RayMelius
/

StockEx

Running

RayMelius Claude Opus 4.6 commited on Mar 6

Commit

e725a4e

1 Parent(s): 578590d

Add script to convert fine-tuned adapter to Ollama GGUF

scripts/convert_to_ollama.py:
- Downloads base Qwen2.5-7B + LoRA adapter from HF Hub
- Merges adapter into base model (CPU, ~16GB RAM)
- Converts to GGUF via llama.cpp (Q4_K_M quantization)
- Creates Ollama model with system prompt and parameters

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (2) hide show

.gitignore +3 -0
scripts/convert_to_ollama.py +203 -0

.gitignore CHANGED Viewed

@@ -33,6 +33,9 @@ shared_data/order_id.txt
 # Docker volumes / local data
 matcher_data/
 # Windows artifact
 nul

 # Docker volumes / local data
 matcher_data/
+# Model files (GGUF, merged weights)
+models/
 # Windows artifact
 nul

scripts/convert_to_ollama.py ADDED Viewed

	@@ -0,0 +1,203 @@

+#!/usr/bin/env python3
+"""Convert the StockEx CH Trader LoRA adapter to GGUF for Ollama.
+Prerequisites:
+    pip install torch transformers peft huggingface_hub
+    git clone https://github.com/ggerganov/llama.cpp
+    cd llama.cpp && pip install -r requirements/requirements-convert_hf_to_gguf.txt
+Usage:
+    python scripts/convert_to_ollama.py
+This script will:
+    1. Download the base model (Qwen2.5-7B-Instruct)
+    2. Download the LoRA adapter (RayMelius/stockex-ch-trader)
+    3. Merge adapter into base model (CPU, ~16GB RAM needed)
+    4. Convert merged model to GGUF (Q4_K_M quantization)
+    5. Create and register an Ollama model
+After running, use in StockEx with:
+    OLLAMA_HOST=http://localhost:11434  OLLAMA_MODEL=stockex-ch-trader
+"""
+import os
+import sys
+import shutil
+import subprocess
+import argparse
+BASE_MODEL = "Qwen/Qwen2.5-7B-Instruct"
+ADAPTER_REPO = "RayMelius/stockex-ch-trader"
+OLLAMA_MODEL_NAME = "stockex-ch-trader"
+QUANT = "Q4_K_M"
+WORK_DIR = os.path.join(os.path.dirname(__file__), "..", "models")
+MERGED_DIR = os.path.join(WORK_DIR, "merged")
+GGUF_PATH = os.path.join(WORK_DIR, f"stockex-ch-trader-{QUANT}.gguf")
+MODELFILE_PATH = os.path.join(WORK_DIR, "Modelfile")
+SYSTEM_PROMPT = (
+    "You are a StockEx clearing house trading agent. "
+    "Given a member's financial state and live market data, "
+    "you output a single valid JSON trading decision that respects all capital and holdings constraints. "
+    "Never output anything other than the JSON object."
+)
+def step(n, msg):
+    print(f"\n{'='*60}")
+    print(f"  Step {n}: {msg}")
+    print(f"{'='*60}\n")
+def merge_adapter():
+    """Download base model + adapter, merge, save to disk."""
+    step(1, f"Merging {ADAPTER_REPO} into {BASE_MODEL}")
+    import torch
+    from transformers import AutoTokenizer, AutoModelForCausalLM
+    from peft import PeftModel
+    print(f"Loading base model (CPU, float16)...")
+    base_model = AutoModelForCausalLM.from_pretrained(
+        BASE_MODEL,
+        torch_dtype=torch.float16,
+        device_map="cpu",
+        trust_remote_code=True,
+    )
+    print(f"Loading adapter from {ADAPTER_REPO}...")
+    model = PeftModel.from_pretrained(base_model, ADAPTER_REPO)
+    print("Merging adapter weights...")
+    model = model.merge_and_unload()
+    os.makedirs(MERGED_DIR, exist_ok=True)
+    print(f"Saving merged model to {MERGED_DIR}...")
+    model.save_pretrained(MERGED_DIR)
+    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
+    tokenizer.save_pretrained(MERGED_DIR)
+    print("Merge complete.")
+def convert_to_gguf(llama_cpp_dir):
+    """Convert merged HF model to GGUF format."""
+    step(2, f"Converting to GGUF ({QUANT})")
+    convert_script = os.path.join(llama_cpp_dir, "convert_hf_to_gguf.py")
+    if not os.path.exists(convert_script):
+        print(f"ERROR: {convert_script} not found.")
+        print(f"Clone llama.cpp first: git clone https://github.com/ggerganov/llama.cpp")
+        sys.exit(1)
+    # First convert to f16 GGUF
+    f16_path = os.path.join(WORK_DIR, "stockex-ch-trader-f16.gguf")
+    cmd = [sys.executable, convert_script, MERGED_DIR, "--outfile", f16_path, "--outtype", "f16"]
+    print(f"Running: {' '.join(cmd)}")
+    subprocess.run(cmd, check=True)
+    # Then quantize
+    quantize_bin = os.path.join(llama_cpp_dir, "build", "bin", "llama-quantize")
+    if not os.path.exists(quantize_bin):
+        # Try alternative paths
+        for alt in ["llama-quantize", "quantize"]:
+            alt_path = os.path.join(llama_cpp_dir, "build", "bin", alt)
+            if os.path.exists(alt_path):
+                quantize_bin = alt_path
+                break
+            # Check if it's in PATH
+            if shutil.which(alt):
+                quantize_bin = alt
+                break
+    if os.path.exists(quantize_bin) or shutil.which(quantize_bin):
+        cmd = [quantize_bin, f16_path, GGUF_PATH, QUANT]
+        print(f"Quantizing: {' '.join(cmd)}")
+        subprocess.run(cmd, check=True)
+        os.remove(f16_path)
+        print(f"Quantized GGUF saved to {GGUF_PATH}")
+    else:
+        # No quantize binary — keep f16
+        os.rename(f16_path, GGUF_PATH)
+        print(f"llama-quantize not found, using f16 GGUF: {GGUF_PATH}")
+        print(f"To quantize manually: llama-quantize {GGUF_PATH} output.gguf {QUANT}")
+def create_ollama_model():
+    """Create Ollama Modelfile and register the model."""
+    step(3, "Creating Ollama model")
+    gguf_abs = os.path.abspath(GGUF_PATH)
+    modelfile_content = f"""FROM {gguf_abs}
+SYSTEM \"\"\"{SYSTEM_PROMPT}\"\"\"
+PARAMETER temperature 0.4
+PARAMETER num_predict 100
+PARAMETER stop "<|im_end|>"
+PARAMETER stop "<|endoftext|>"
+"""
+    with open(MODELFILE_PATH, "w") as f:
+        f.write(modelfile_content)
+    print(f"Modelfile written to {MODELFILE_PATH}")
+    # Check if Ollama is available
+    if not shutil.which("ollama"):
+        print("\nOllama not found in PATH. Install from https://ollama.com")
+        print(f"Then run manually:")
+        print(f"  ollama create {OLLAMA_MODEL_NAME} -f {os.path.abspath(MODELFILE_PATH)}")
+        return
+    cmd = ["ollama", "create", OLLAMA_MODEL_NAME, "-f", MODELFILE_PATH]
+    print(f"Running: {' '.join(cmd)}")
+    result = subprocess.run(cmd, capture_output=True, text=True)
+    if result.returncode == 0:
+        print(f"Ollama model '{OLLAMA_MODEL_NAME}' created successfully!")
+        print(f"\nTest it:")
+        print(f"  ollama run {OLLAMA_MODEL_NAME}")
+        print(f"\nUse in StockEx docker-compose.yml:")
+        print(f"  OLLAMA_HOST=http://host.docker.internal:11434")
+        print(f"  OLLAMA_MODEL={OLLAMA_MODEL_NAME}")
+    else:
+        print(f"Ollama create failed: {result.stderr}")
+        print(f"Try manually: ollama create {OLLAMA_MODEL_NAME} -f {os.path.abspath(MODELFILE_PATH)}")
+def main():
+    parser = argparse.ArgumentParser(description="Convert StockEx CH Trader to Ollama GGUF")
+    parser.add_argument("--llama-cpp", default=os.path.expanduser("~/llama.cpp"),
+                        help="Path to llama.cpp repo (default: ~/llama.cpp)")
+    parser.add_argument("--skip-merge", action="store_true",
+                        help="Skip merge step (use existing merged model)")
+    parser.add_argument("--skip-convert", action="store_true",
+                        help="Skip GGUF conversion (use existing GGUF)")
+    args = parser.parse_args()
+    os.makedirs(WORK_DIR, exist_ok=True)
+    if not args.skip_merge:
+        merge_adapter()
+    else:
+        print(f"Skipping merge (using {MERGED_DIR})")
+    if not args.skip_convert:
+        convert_to_gguf(args.llama_cpp)
+    else:
+        print(f"Skipping conversion (using {GGUF_PATH})")
+    create_ollama_model()
+    print(f"\n{'='*60}")
+    print(f"  DONE!")
+    print(f"{'='*60}")
+    print(f"  Merged model : {MERGED_DIR}")
+    print(f"  GGUF file    : {GGUF_PATH}")
+    print(f"  Ollama model : {OLLAMA_MODEL_NAME}")
+    print(f"{'='*60}\n")
+if __name__ == "__main__":
+    main()