ke shen commited on
Commit
daee2c3
·
1 Parent(s): 67aaad9

Change the app.py name

Browse files
Files changed (1) hide show
  1. app.py +56 -0
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSequenceClassification
2
+ from peft import PeftModel
3
+ import torch
4
+
5
+ # Load base model, LoRA adapter and reward model
6
+ device = "cuda" if torch.cuda.is_available() else "cpu"
7
+ base_model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-1.8B")
8
+ tokenizer_sft = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-1.8B")
9
+ sft_model = PeftModel.from_pretrained(base_model, "Miao025/Qwen-KinderChatbot-LoRA").to(device)
10
+ tokenizer_reward = AutoTokenizer.from_pretrained("Miao025/Qwen-KinderChatbot-Reward")
11
+ reward_model = AutoModelForSequenceClassification.from_pretrained("Miao025/Qwen-KinderChatbot-Reward").to(device)
12
+
13
+ # Generate a list of multiple (default to 5) responses using the fine-tuned model
14
+ def generate_responses(prompt, n=5):
15
+ inputs = tokenizer_sft(prompt, return_tensors="pt", truncation=True).to(device) # "pt" means pytorch tensors so that the model can read
16
+ outputs = []
17
+ for i in range(n):
18
+ generated_ids = sft_model.generate(
19
+ **inputs, # the tokenized prompt
20
+ max_length=256, # the max total length of generated text
21
+ do_sample=True, # choose randomly instead of best next token to generate different answers
22
+ top_p=0.9, # keep the smallest set of tokens whose cumulative probability adds up to ≥ 0.9 to avoid nonsense
23
+ temperature=0.8 # control how sharp or flat the probability distribution is, the lower the less randomness
24
+ )
25
+ out = tokenizer_sft.decode(generated_ids[0], skip_special_tokens=True) # decode to human language, note to skip special tokens like padding
26
+ if out.lower().startswith(prompt.lower()): # remove the prompt from the beginning of the answer if present
27
+ out = out[len(prompt)+1:]
28
+
29
+ outputs.append(out)
30
+ return outputs
31
+
32
+ # Score each response using reward model
33
+ def score_response(prompt, response):
34
+ inputs = tokenizer_reward(prompt, response, return_tensors="pt", truncation=True).to(device)
35
+ with torch.no_grad():
36
+ logits = reward_model(**inputs).logits # raw score before softmax
37
+ score = torch.softmax(logits, dim=-1)[0,1].item() # apply softmax to get the possibility of chosen and rejected, then get the chosen with label=1, then convert it into float
38
+ return score
39
+
40
+ # Choose the best response
41
+ def return_best_response(prompt):
42
+ candidates = generate_responses(prompt, n=5)
43
+ scores = [(candidate, score_response(prompt, candidate)) for candidate in candidates]
44
+ best_response = max(scores, key=lambda x: x[1])[0]
45
+ return best_response
46
+
47
+ # Gradio deploy
48
+ import gradio as gr
49
+ iface = gr.Interface(
50
+ fn=return_best_response,
51
+ inputs=gr.Textbox(lines=3, label="My sweetie, what is your question?:"),
52
+ outputs=gr.Textbox(label="AI teacher answers you:"),
53
+ title="SFT + Reward Reranker chatbot Demo"
54
+ )
55
+
56
+ iface.launch()