RayMelius Claude Opus 4.6 commited on
Commit
e725a4e
·
1 Parent(s): 578590d

Add script to convert fine-tuned adapter to Ollama GGUF

Browse files

scripts/convert_to_ollama.py:
- Downloads base Qwen2.5-7B + LoRA adapter from HF Hub
- Merges adapter into base model (CPU, ~16GB RAM)
- Converts to GGUF via llama.cpp (Q4_K_M quantization)
- Creates Ollama model with system prompt and parameters

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (2) hide show
  1. .gitignore +3 -0
  2. scripts/convert_to_ollama.py +203 -0
.gitignore CHANGED
@@ -33,6 +33,9 @@ shared_data/order_id.txt
33
  # Docker volumes / local data
34
  matcher_data/
35
 
 
 
 
36
  # Windows artifact
37
  nul
38
 
 
33
  # Docker volumes / local data
34
  matcher_data/
35
 
36
+ # Model files (GGUF, merged weights)
37
+ models/
38
+
39
  # Windows artifact
40
  nul
41
 
scripts/convert_to_ollama.py ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Convert the StockEx CH Trader LoRA adapter to GGUF for Ollama.
3
+
4
+ Prerequisites:
5
+ pip install torch transformers peft huggingface_hub
6
+ git clone https://github.com/ggerganov/llama.cpp
7
+ cd llama.cpp && pip install -r requirements/requirements-convert_hf_to_gguf.txt
8
+
9
+ Usage:
10
+ python scripts/convert_to_ollama.py
11
+
12
+ This script will:
13
+ 1. Download the base model (Qwen2.5-7B-Instruct)
14
+ 2. Download the LoRA adapter (RayMelius/stockex-ch-trader)
15
+ 3. Merge adapter into base model (CPU, ~16GB RAM needed)
16
+ 4. Convert merged model to GGUF (Q4_K_M quantization)
17
+ 5. Create and register an Ollama model
18
+
19
+ After running, use in StockEx with:
20
+ OLLAMA_HOST=http://localhost:11434 OLLAMA_MODEL=stockex-ch-trader
21
+ """
22
+
23
+ import os
24
+ import sys
25
+ import shutil
26
+ import subprocess
27
+ import argparse
28
+
29
+ BASE_MODEL = "Qwen/Qwen2.5-7B-Instruct"
30
+ ADAPTER_REPO = "RayMelius/stockex-ch-trader"
31
+ OLLAMA_MODEL_NAME = "stockex-ch-trader"
32
+ QUANT = "Q4_K_M"
33
+
34
+ WORK_DIR = os.path.join(os.path.dirname(__file__), "..", "models")
35
+ MERGED_DIR = os.path.join(WORK_DIR, "merged")
36
+ GGUF_PATH = os.path.join(WORK_DIR, f"stockex-ch-trader-{QUANT}.gguf")
37
+ MODELFILE_PATH = os.path.join(WORK_DIR, "Modelfile")
38
+
39
+ SYSTEM_PROMPT = (
40
+ "You are a StockEx clearing house trading agent. "
41
+ "Given a member's financial state and live market data, "
42
+ "you output a single valid JSON trading decision that respects all capital and holdings constraints. "
43
+ "Never output anything other than the JSON object."
44
+ )
45
+
46
+
47
+ def step(n, msg):
48
+ print(f"\n{'='*60}")
49
+ print(f" Step {n}: {msg}")
50
+ print(f"{'='*60}\n")
51
+
52
+
53
+ def merge_adapter():
54
+ """Download base model + adapter, merge, save to disk."""
55
+ step(1, f"Merging {ADAPTER_REPO} into {BASE_MODEL}")
56
+
57
+ import torch
58
+ from transformers import AutoTokenizer, AutoModelForCausalLM
59
+ from peft import PeftModel
60
+
61
+ print(f"Loading base model (CPU, float16)...")
62
+ base_model = AutoModelForCausalLM.from_pretrained(
63
+ BASE_MODEL,
64
+ torch_dtype=torch.float16,
65
+ device_map="cpu",
66
+ trust_remote_code=True,
67
+ )
68
+
69
+ print(f"Loading adapter from {ADAPTER_REPO}...")
70
+ model = PeftModel.from_pretrained(base_model, ADAPTER_REPO)
71
+
72
+ print("Merging adapter weights...")
73
+ model = model.merge_and_unload()
74
+
75
+ os.makedirs(MERGED_DIR, exist_ok=True)
76
+ print(f"Saving merged model to {MERGED_DIR}...")
77
+ model.save_pretrained(MERGED_DIR)
78
+
79
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
80
+ tokenizer.save_pretrained(MERGED_DIR)
81
+ print("Merge complete.")
82
+
83
+
84
+ def convert_to_gguf(llama_cpp_dir):
85
+ """Convert merged HF model to GGUF format."""
86
+ step(2, f"Converting to GGUF ({QUANT})")
87
+
88
+ convert_script = os.path.join(llama_cpp_dir, "convert_hf_to_gguf.py")
89
+ if not os.path.exists(convert_script):
90
+ print(f"ERROR: {convert_script} not found.")
91
+ print(f"Clone llama.cpp first: git clone https://github.com/ggerganov/llama.cpp")
92
+ sys.exit(1)
93
+
94
+ # First convert to f16 GGUF
95
+ f16_path = os.path.join(WORK_DIR, "stockex-ch-trader-f16.gguf")
96
+ cmd = [sys.executable, convert_script, MERGED_DIR, "--outfile", f16_path, "--outtype", "f16"]
97
+ print(f"Running: {' '.join(cmd)}")
98
+ subprocess.run(cmd, check=True)
99
+
100
+ # Then quantize
101
+ quantize_bin = os.path.join(llama_cpp_dir, "build", "bin", "llama-quantize")
102
+ if not os.path.exists(quantize_bin):
103
+ # Try alternative paths
104
+ for alt in ["llama-quantize", "quantize"]:
105
+ alt_path = os.path.join(llama_cpp_dir, "build", "bin", alt)
106
+ if os.path.exists(alt_path):
107
+ quantize_bin = alt_path
108
+ break
109
+ # Check if it's in PATH
110
+ if shutil.which(alt):
111
+ quantize_bin = alt
112
+ break
113
+
114
+ if os.path.exists(quantize_bin) or shutil.which(quantize_bin):
115
+ cmd = [quantize_bin, f16_path, GGUF_PATH, QUANT]
116
+ print(f"Quantizing: {' '.join(cmd)}")
117
+ subprocess.run(cmd, check=True)
118
+ os.remove(f16_path)
119
+ print(f"Quantized GGUF saved to {GGUF_PATH}")
120
+ else:
121
+ # No quantize binary — keep f16
122
+ os.rename(f16_path, GGUF_PATH)
123
+ print(f"llama-quantize not found, using f16 GGUF: {GGUF_PATH}")
124
+ print(f"To quantize manually: llama-quantize {GGUF_PATH} output.gguf {QUANT}")
125
+
126
+
127
+ def create_ollama_model():
128
+ """Create Ollama Modelfile and register the model."""
129
+ step(3, "Creating Ollama model")
130
+
131
+ gguf_abs = os.path.abspath(GGUF_PATH)
132
+
133
+ modelfile_content = f"""FROM {gguf_abs}
134
+
135
+ SYSTEM \"\"\"{SYSTEM_PROMPT}\"\"\"
136
+
137
+ PARAMETER temperature 0.4
138
+ PARAMETER num_predict 100
139
+ PARAMETER stop "<|im_end|>"
140
+ PARAMETER stop "<|endoftext|>"
141
+ """
142
+
143
+ with open(MODELFILE_PATH, "w") as f:
144
+ f.write(modelfile_content)
145
+ print(f"Modelfile written to {MODELFILE_PATH}")
146
+
147
+ # Check if Ollama is available
148
+ if not shutil.which("ollama"):
149
+ print("\nOllama not found in PATH. Install from https://ollama.com")
150
+ print(f"Then run manually:")
151
+ print(f" ollama create {OLLAMA_MODEL_NAME} -f {os.path.abspath(MODELFILE_PATH)}")
152
+ return
153
+
154
+ cmd = ["ollama", "create", OLLAMA_MODEL_NAME, "-f", MODELFILE_PATH]
155
+ print(f"Running: {' '.join(cmd)}")
156
+ result = subprocess.run(cmd, capture_output=True, text=True)
157
+ if result.returncode == 0:
158
+ print(f"Ollama model '{OLLAMA_MODEL_NAME}' created successfully!")
159
+ print(f"\nTest it:")
160
+ print(f" ollama run {OLLAMA_MODEL_NAME}")
161
+ print(f"\nUse in StockEx docker-compose.yml:")
162
+ print(f" OLLAMA_HOST=http://host.docker.internal:11434")
163
+ print(f" OLLAMA_MODEL={OLLAMA_MODEL_NAME}")
164
+ else:
165
+ print(f"Ollama create failed: {result.stderr}")
166
+ print(f"Try manually: ollama create {OLLAMA_MODEL_NAME} -f {os.path.abspath(MODELFILE_PATH)}")
167
+
168
+
169
+ def main():
170
+ parser = argparse.ArgumentParser(description="Convert StockEx CH Trader to Ollama GGUF")
171
+ parser.add_argument("--llama-cpp", default=os.path.expanduser("~/llama.cpp"),
172
+ help="Path to llama.cpp repo (default: ~/llama.cpp)")
173
+ parser.add_argument("--skip-merge", action="store_true",
174
+ help="Skip merge step (use existing merged model)")
175
+ parser.add_argument("--skip-convert", action="store_true",
176
+ help="Skip GGUF conversion (use existing GGUF)")
177
+ args = parser.parse_args()
178
+
179
+ os.makedirs(WORK_DIR, exist_ok=True)
180
+
181
+ if not args.skip_merge:
182
+ merge_adapter()
183
+ else:
184
+ print(f"Skipping merge (using {MERGED_DIR})")
185
+
186
+ if not args.skip_convert:
187
+ convert_to_gguf(args.llama_cpp)
188
+ else:
189
+ print(f"Skipping conversion (using {GGUF_PATH})")
190
+
191
+ create_ollama_model()
192
+
193
+ print(f"\n{'='*60}")
194
+ print(f" DONE!")
195
+ print(f"{'='*60}")
196
+ print(f" Merged model : {MERGED_DIR}")
197
+ print(f" GGUF file : {GGUF_PATH}")
198
+ print(f" Ollama model : {OLLAMA_MODEL_NAME}")
199
+ print(f"{'='*60}\n")
200
+
201
+
202
+ if __name__ == "__main__":
203
+ main()