Eeppa commited on
Commit
ffaba8b
·
verified ·
1 Parent(s): 9dbd0eb

Delete model_utils.py

Browse files
Files changed (1) hide show
  1. model_utils.py +0 -162
model_utils.py DELETED
@@ -1,162 +0,0 @@
1
- import torch
2
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
- from typing import Dict, Optional, Tuple
4
- import re
5
-
6
- class CodeThinkingAssistant:
7
- def __init__(self, model_id: str = "your-username/Llama-3.2-1B-Codex", use_gpu: bool = True):
8
- """
9
- Initialize the coding assistant with thinking capabilities
10
-
11
- Note: Replace "your-username/Llama-3.2-1B-Codex" with your actual model ID
12
- For testing before fine-tuning, use: "meta-llama/Llama-3.2-1B-Instruct"
13
- """
14
- self.device = "cuda" if use_gpu and torch.cuda.is_available() else "cpu"
15
-
16
- print(f"Loading model on {self.device}...")
17
-
18
- # Load model with optimizations
19
- self.model = AutoModelForCausalLM.from_pretrained(
20
- model_id,
21
- torch_dtype=torch.bfloat16 if self.device == "cuda" else torch.float32,
22
- device_map="auto" if self.device == "cuda" else None,
23
- trust_remote_code=True
24
- )
25
-
26
- self.tokenizer = AutoTokenizer.from_pretrained(model_id)
27
-
28
- # Set padding token
29
- if self.tokenizer.pad_token is None:
30
- self.tokenizer.pad_token = self.tokenizer.eos_token
31
-
32
- # Create pipeline for easy generation
33
- self.pipe = pipeline(
34
- "text-generation",
35
- model=self.model,
36
- tokenizer=self.tokenizer,
37
- device_map="auto" if self.device == "cuda" else None
38
- )
39
-
40
- print("Model loaded successfully!")
41
-
42
- def generate_fast(self, prompt: str, max_tokens: int = 500) -> str:
43
- """Fast generation without thinking mode"""
44
- messages = [
45
- {"role": "system", "content": "You are an expert coding assistant. Write clean, efficient code."},
46
- {"role": "user", "content": prompt}
47
- ]
48
-
49
- response = self.pipe(
50
- messages,
51
- max_new_tokens=max_tokens,
52
- temperature=0.7,
53
- do_sample=True,
54
- top_p=0.95
55
- )
56
-
57
- return response[0]['generated_text'][-1]['content']
58
-
59
- def generate_with_thinking(self, prompt: str, max_thought_tokens: int = 300, max_code_tokens: int = 600) -> Dict[str, str]:
60
- """Generate with explicit thinking/reasoning step"""
61
-
62
- # Step 1: Generate thinking process
63
- think_prompt = f"""<|system|>
64
- You are a coding assistant. Before writing code, think step by step about the solution.
65
-
66
- <|user|>
67
- {prompt}
68
-
69
- <|assistant|>
70
- <thinking>
71
- Let me break this down step by step:
72
- """
73
-
74
- thoughts = self.pipe(
75
- think_prompt,
76
- max_new_tokens=max_thought_tokens,
77
- temperature=0.6,
78
- do_sample=True,
79
- stop_strings=["</thinking>", "<|eot_id|>"]
80
- )[0]['generated_text']
81
-
82
- # Extract just the thinking part
83
- thinking_content = thoughts.split("<thinking>")[-1] if "<thinking>" in thoughts else thoughts
84
- thinking_content = thinking_content.split("</thinking>")[0] if "</thinking>" in thinking_content else thinking_content
85
-
86
- # Step 2: Generate code based on thinking
87
- code_prompt = f"""<|system|>
88
- You are an expert programmer. Based on your reasoning, write clean, efficient code.
89
-
90
- <|user|>
91
- {prompt}
92
-
93
- <|assistant|>
94
- <thinking>
95
- {thinking_content}
96
- </thinking>
97
-
98
- Here's the solution:
99
- """
100
-
101
- code_response = self.pipe(
102
- code_prompt,
103
- max_new_tokens=max_code_tokens,
104
- temperature=0.7,
105
- do_sample=True,
106
- top_p=0.95
107
- )[0]['generated_text']
108
-
109
- # Extract code
110
- code = code_response.split("Here's the solution:")[-1] if "Here's the solution:" in code_response else code_response
111
-
112
- return {
113
- "thinking": thinking_content.strip(),
114
- "code": code.strip()
115
- }
116
-
117
- def generate_with_chain_of_thought(self, prompt: str) -> Dict[str, str]:
118
- """Alternative: Integrated chain-of-thought reasoning"""
119
-
120
- cot_prompt = f"""<|system|>
121
- You are a coding assistant. Always show your reasoning process before providing code.
122
- Use this format:
123
-
124
- Reasoning: [Your step-by-step thought process]
125
- Code: [Your solution]
126
-
127
- <|user|>
128
- {prompt}
129
-
130
- <|assistant|>
131
- Reasoning:"""
132
-
133
- response = self.pipe(
134
- cot_prompt,
135
- max_new_tokens=800,
136
- temperature=0.7,
137
- do_sample=True
138
- )[0]['generated_text']
139
-
140
- # Parse reasoning and code
141
- reasoning_match = re.search(r"Reasoning:(.*?)Code:", response, re.DOTALL)
142
- code_match = re.search(r"Code:(.*?)$", response, re.DOTALL)
143
-
144
- reasoning = reasoning_match.group(1).strip() if reasoning_match else "No reasoning provided"
145
- code = code_match.group(1).strip() if code_match else response
146
-
147
- return {
148
- "thinking": reasoning,
149
- "code": code
150
- }
151
-
152
- # For testing
153
- if __name__ == "__main__":
154
- # Test with base model (replace with your fine-tuned model ID after training)
155
- assistant = CodeThinkingAssistant("meta-llama/Llama-3.2-1B-Instruct")
156
-
157
- # Test fast generation
158
- print("Fast mode:", assistant.generate_fast("Write a function to calculate fibonacci numbers"))
159
-
160
- # Test thinking mode
161
- result = assistant.generate_with_thinking("Write a function to check if a number is prime")
162
- print(f"Thinking:\n{result['thinking']}\n\nCode:\n{result['code']}")