import torch from transformers import AutoModelForCausalLM, AutoTokenizer from peft import PeftModel BASE_MODEL = "Qwen/Qwen2.5-Coder-7B-Instruct" ADAPTER_DIR = "./qwen-7b-coder-nl2sql-grpo/final" OUTPUT_DIR = "./qwen-7b-nl2sql-merged" def main(): print("Loading Base Model...") base_model = AutoModelForCausalLM.from_pretrained( BASE_MODEL, torch_dtype=torch.bfloat16, device_map="auto" ) print("Loading Adapters and Merging...") # Load the LoRA adapters into the base model model = PeftModel.from_pretrained(base_model, ADAPTER_DIR) # Merge weights permanently merged_model = model.merge_and_unload() print("Saving Merged Model...") merged_model.save_pretrained(OUTPUT_DIR) tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL) tokenizer.save_pretrained(OUTPUT_DIR) print(f"Done! Merged model saved to {OUTPUT_DIR}") if __name__ == "__main__": main()