Eeppa commited on
Commit
2bbf7f0
·
verified ·
1 Parent(s): d0c4bd1

Create model_utils.py

Browse files
Files changed (1) hide show
  1. model_utils.py +162 -0
model_utils.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
+ from typing import Dict, Optional, Tuple
4
+ import re
5
+
6
+ class CodeThinkingAssistant:
7
+ def __init__(self, model_id: str = "your-username/Llama-3.2-1B-Codex", use_gpu: bool = True):
8
+ """
9
+ Initialize the coding assistant with thinking capabilities
10
+
11
+ Note: Replace "your-username/Llama-3.2-1B-Codex" with your actual model ID
12
+ For testing before fine-tuning, use: "meta-llama/Llama-3.2-1B-Instruct"
13
+ """
14
+ self.device = "cuda" if use_gpu and torch.cuda.is_available() else "cpu"
15
+
16
+ print(f"Loading model on {self.device}...")
17
+
18
+ # Load model with optimizations
19
+ self.model = AutoModelForCausalLM.from_pretrained(
20
+ model_id,
21
+ torch_dtype=torch.bfloat16 if self.device == "cuda" else torch.float32,
22
+ device_map="auto" if self.device == "cuda" else None,
23
+ trust_remote_code=True
24
+ )
25
+
26
+ self.tokenizer = AutoTokenizer.from_pretrained(model_id)
27
+
28
+ # Set padding token
29
+ if self.tokenizer.pad_token is None:
30
+ self.tokenizer.pad_token = self.tokenizer.eos_token
31
+
32
+ # Create pipeline for easy generation
33
+ self.pipe = pipeline(
34
+ "text-generation",
35
+ model=self.model,
36
+ tokenizer=self.tokenizer,
37
+ device_map="auto" if self.device == "cuda" else None
38
+ )
39
+
40
+ print("Model loaded successfully!")
41
+
42
+ def generate_fast(self, prompt: str, max_tokens: int = 500) -> str:
43
+ """Fast generation without thinking mode"""
44
+ messages = [
45
+ {"role": "system", "content": "You are an expert coding assistant. Write clean, efficient code."},
46
+ {"role": "user", "content": prompt}
47
+ ]
48
+
49
+ response = self.pipe(
50
+ messages,
51
+ max_new_tokens=max_tokens,
52
+ temperature=0.7,
53
+ do_sample=True,
54
+ top_p=0.95
55
+ )
56
+
57
+ return response[0]['generated_text'][-1]['content']
58
+
59
+ def generate_with_thinking(self, prompt: str, max_thought_tokens: int = 300, max_code_tokens: int = 600) -> Dict[str, str]:
60
+ """Generate with explicit thinking/reasoning step"""
61
+
62
+ # Step 1: Generate thinking process
63
+ think_prompt = f"""<|system|>
64
+ You are a coding assistant. Before writing code, think step by step about the solution.
65
+
66
+ <|user|>
67
+ {prompt}
68
+
69
+ <|assistant|>
70
+ <thinking>
71
+ Let me break this down step by step:
72
+ """
73
+
74
+ thoughts = self.pipe(
75
+ think_prompt,
76
+ max_new_tokens=max_thought_tokens,
77
+ temperature=0.6,
78
+ do_sample=True,
79
+ stop_strings=["</thinking>", "<|eot_id|>"]
80
+ )[0]['generated_text']
81
+
82
+ # Extract just the thinking part
83
+ thinking_content = thoughts.split("<thinking>")[-1] if "<thinking>" in thoughts else thoughts
84
+ thinking_content = thinking_content.split("</thinking>")[0] if "</thinking>" in thinking_content else thinking_content
85
+
86
+ # Step 2: Generate code based on thinking
87
+ code_prompt = f"""<|system|>
88
+ You are an expert programmer. Based on your reasoning, write clean, efficient code.
89
+
90
+ <|user|>
91
+ {prompt}
92
+
93
+ <|assistant|>
94
+ <thinking>
95
+ {thinking_content}
96
+ </thinking>
97
+
98
+ Here's the solution:
99
+ """
100
+
101
+ code_response = self.pipe(
102
+ code_prompt,
103
+ max_new_tokens=max_code_tokens,
104
+ temperature=0.7,
105
+ do_sample=True,
106
+ top_p=0.95
107
+ )[0]['generated_text']
108
+
109
+ # Extract code
110
+ code = code_response.split("Here's the solution:")[-1] if "Here's the solution:" in code_response else code_response
111
+
112
+ return {
113
+ "thinking": thinking_content.strip(),
114
+ "code": code.strip()
115
+ }
116
+
117
+ def generate_with_chain_of_thought(self, prompt: str) -> Dict[str, str]:
118
+ """Alternative: Integrated chain-of-thought reasoning"""
119
+
120
+ cot_prompt = f"""<|system|>
121
+ You are a coding assistant. Always show your reasoning process before providing code.
122
+ Use this format:
123
+
124
+ Reasoning: [Your step-by-step thought process]
125
+ Code: [Your solution]
126
+
127
+ <|user|>
128
+ {prompt}
129
+
130
+ <|assistant|>
131
+ Reasoning:"""
132
+
133
+ response = self.pipe(
134
+ cot_prompt,
135
+ max_new_tokens=800,
136
+ temperature=0.7,
137
+ do_sample=True
138
+ )[0]['generated_text']
139
+
140
+ # Parse reasoning and code
141
+ reasoning_match = re.search(r"Reasoning:(.*?)Code:", response, re.DOTALL)
142
+ code_match = re.search(r"Code:(.*?)$", response, re.DOTALL)
143
+
144
+ reasoning = reasoning_match.group(1).strip() if reasoning_match else "No reasoning provided"
145
+ code = code_match.group(1).strip() if code_match else response
146
+
147
+ return {
148
+ "thinking": reasoning,
149
+ "code": code
150
+ }
151
+
152
+ # For testing
153
+ if __name__ == "__main__":
154
+ # Test with base model (replace with your fine-tuned model ID after training)
155
+ assistant = CodeThinkingAssistant("meta-llama/Llama-3.2-1B-Instruct")
156
+
157
+ # Test fast generation
158
+ print("Fast mode:", assistant.generate_fast("Write a function to calculate fibonacci numbers"))
159
+
160
+ # Test thinking mode
161
+ result = assistant.generate_with_thinking("Write a function to check if a number is prime")
162
+ print(f"Thinking:\n{result['thinking']}\n\nCode:\n{result['code']}")