| import torch |
| from peft import PeftModel, PeftConfig |
| from transformers import AutoModelForCausalLM, AutoTokenizer |
| import warnings |
| import os |
|
|
| |
| os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' |
| warnings.filterwarnings("ignore", category=UserWarning, module='transformers.generation.utils') |
|
|
| def load_model_and_tokenizer(): |
| base_model = "TheBloke/phi-2-GPTQ" |
| peft_model_id = "STEM-AI-mtl/phi-2-electrical-engineering" |
| config = PeftConfig.from_pretrained(peft_model_id, trust_remote_code=True) |
| model = AutoModelForCausalLM.from_pretrained(base_model, device_map="cuda:0",return_dict=True, trust_remote_code=True) |
|
|
| model = model.to('cuda') |
|
|
| tokenizer = AutoTokenizer.from_pretrained(base_model) |
| model = PeftModel.from_pretrained(model, peft_model_id, trust_remote_code=True) |
|
|
| model = model.to('cuda') |
|
|
| return model, tokenizer |
|
|
| def generate(instruction, model, tokenizer): |
| inputs = tokenizer(instruction, return_tensors="pt", return_attention_mask=False) |
| inputs = inputs.to('cuda') |
| outputs = model.generate( |
| **inputs, |
| max_length=350, |
| do_sample=True, |
| temperature=0.7, |
| top_k=50, |
| top_p=0.9, |
| repetition_penalty=1, |
| ) |
| text = tokenizer.batch_decode(outputs)[0] |
| return text |
|
|
|
|
| if __name__ == '__main__': |
| model, tokenizer = load_model_and_tokenizer() |
| while True: |
| instruction = input("Enter your instruction: ") |
| if not instruction: |
| continue |
| if instruction.lower() in ["exit", "quit", "exit()", "quit()"]: |
| print("Exiting...") |
| break |
|
|
| answer = generate(instruction, model, tokenizer) |
| print(f'Answer: {answer}') |
|
|