model_tools / chatml_to_mistral.py
Naphula's picture
Upload 10 files
f43fd2b verified
import json
import os
import argparse
from colorama import init, Fore, Style
init()
def load_json(path):
if not os.path.exists(path):
return {}
try:
with open(path, 'r', encoding='utf-8') as f:
return json.load(f)
except Exception:
return {}
def save_json(path, data):
with open(path, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2)
def convert_model(model_path):
model_name = os.path.basename(model_path)
print(f"Scanning: {model_name}...")
gen_path = os.path.join(model_path, "generation_config.json")
tok_conf_path = os.path.join(model_path, "tokenizer_config.json")
# 1. Check if it is actually a ChatML/32000 model
gen_data = load_json(gen_path)
current_eos = gen_data.get("eos_token_id")
# Handle list format (e.g. [32000, 2])
if isinstance(current_eos, list):
current_eos = current_eos[0]
if str(current_eos) != "32000":
# If it's already 2, we skip it (unless forced, but let's be safe)
if str(current_eos) == "2":
return # Already Mistral format
print(f" Skipping: EOS ID is {current_eos} (Not 32000)")
return
print(f"{Fore.YELLOW} -> Detected ChatML (EOS: 32000). Converting to Mistral format...{Style.RESET_ALL}")
# 2. Patch Generation Config
gen_data["eos_token_id"] = 2
gen_data["pad_token_id"] = 2 # Usually good practice to align pad/eos for base mistral
save_json(gen_path, gen_data)
print(f" Fixed generation_config.json (ID: 2)")
# 3. Patch Tokenizer Config
if os.path.exists(tok_conf_path):
tok_data = load_json(tok_conf_path)
# Change string to </s>
tok_data["eos_token"] = "</s>"
# Remove chat_template if it exists (prevents auto-detection issues later)
if "chat_template" in tok_data:
del tok_data["chat_template"]
save_json(tok_conf_path, tok_data)
print(f" Fixed tokenizer_config.json (Str: </s>)")
# 4. Patch Special Tokens Map
spec_path = os.path.join(model_path, "special_tokens_map.json")
if os.path.exists(spec_path):
spec_data = load_json(spec_path)
spec_data["eos_token"] = "</s>"
save_json(spec_path, spec_data)
print(f" Fixed special_tokens_map.json")
print(f"{Fore.GREEN} -> Successfully converted {model_name}{Style.RESET_ALL}")
def main():
parser = argparse.ArgumentParser(description="Convert ChatML models (EOS 32000) to Mistral format (EOS 2)")
parser.add_argument("base_dir", help="Directory containing the model folders (e.g. B:\\7B)")
args = parser.parse_args()
print(f"{Fore.CYAN}--- CHATML TO MISTRAL CONVERTER ---{Style.RESET_ALL}")
print("This script changes metadata only. It allows ChatML models to be merged")
print("using 'tokenizer: source: base' without errors.\n")
if not os.path.exists(args.base_dir):
print(f"Error: Directory {args.base_dir} does not exist.")
return
# Walk through all subdirectories
count = 0
for root, dirs, files in os.walk(args.base_dir):
for name in dirs:
# We look at every folder, check if it's a model inside convert_model
full_path = os.path.join(root, name)
# Simple check if it looks like a model folder
if os.path.exists(os.path.join(full_path, "config.json")):
convert_model(full_path)
count += 1
print("-" * 60)
print("Scan complete.")
if __name__ == "__main__":
main()