import json import os import argparse from colorama import init, Fore, Style init() def load_json(path): if not os.path.exists(path): return {} try: with open(path, 'r', encoding='utf-8') as f: return json.load(f) except Exception: return {} def save_json(path, data): with open(path, 'w', encoding='utf-8') as f: json.dump(data, f, indent=2) def convert_model(model_path): model_name = os.path.basename(model_path) print(f"Scanning: {model_name}...") gen_path = os.path.join(model_path, "generation_config.json") tok_conf_path = os.path.join(model_path, "tokenizer_config.json") # 1. Check if it is actually a ChatML/32000 model gen_data = load_json(gen_path) current_eos = gen_data.get("eos_token_id") # Handle list format (e.g. [32000, 2]) if isinstance(current_eos, list): current_eos = current_eos[0] if str(current_eos) != "32000": # If it's already 2, we skip it (unless forced, but let's be safe) if str(current_eos) == "2": return # Already Mistral format print(f" Skipping: EOS ID is {current_eos} (Not 32000)") return print(f"{Fore.YELLOW} -> Detected ChatML (EOS: 32000). Converting to Mistral format...{Style.RESET_ALL}") # 2. Patch Generation Config gen_data["eos_token_id"] = 2 gen_data["pad_token_id"] = 2 # Usually good practice to align pad/eos for base mistral save_json(gen_path, gen_data) print(f" Fixed generation_config.json (ID: 2)") # 3. Patch Tokenizer Config if os.path.exists(tok_conf_path): tok_data = load_json(tok_conf_path) # Change string to tok_data["eos_token"] = "" # Remove chat_template if it exists (prevents auto-detection issues later) if "chat_template" in tok_data: del tok_data["chat_template"] save_json(tok_conf_path, tok_data) print(f" Fixed tokenizer_config.json (Str: )") # 4. Patch Special Tokens Map spec_path = os.path.join(model_path, "special_tokens_map.json") if os.path.exists(spec_path): spec_data = load_json(spec_path) spec_data["eos_token"] = "" save_json(spec_path, spec_data) print(f" Fixed special_tokens_map.json") print(f"{Fore.GREEN} -> Successfully converted {model_name}{Style.RESET_ALL}") def main(): parser = argparse.ArgumentParser(description="Convert ChatML models (EOS 32000) to Mistral format (EOS 2)") parser.add_argument("base_dir", help="Directory containing the model folders (e.g. B:\\7B)") args = parser.parse_args() print(f"{Fore.CYAN}--- CHATML TO MISTRAL CONVERTER ---{Style.RESET_ALL}") print("This script changes metadata only. It allows ChatML models to be merged") print("using 'tokenizer: source: base' without errors.\n") if not os.path.exists(args.base_dir): print(f"Error: Directory {args.base_dir} does not exist.") return # Walk through all subdirectories count = 0 for root, dirs, files in os.walk(args.base_dir): for name in dirs: # We look at every folder, check if it's a model inside convert_model full_path = os.path.join(root, name) # Simple check if it looks like a model folder if os.path.exists(os.path.join(full_path, "config.json")): convert_model(full_path) count += 1 print("-" * 60) print("Scan complete.") if __name__ == "__main__": main()