Spaces:

Eeppa
/

Llama-3.2-1B-Codex

Configuration error

App Files Files Community

Eeppa commited on 6 days ago

Commit

ffaba8b

verified ·

1 Parent(s): 9dbd0eb

Delete model_utils.py

Browse files

Files changed (1) hide show

model_utils.py +0 -162

model_utils.py DELETED Viewed

@@ -1,162 +0,0 @@
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
-from typing import Dict, Optional, Tuple
-import re
-class CodeThinkingAssistant:
-    def __init__(self, model_id: str = "your-username/Llama-3.2-1B-Codex", use_gpu: bool = True):
-        """
-        Initialize the coding assistant with thinking capabilities
-        Note: Replace "your-username/Llama-3.2-1B-Codex" with your actual model ID
-        For testing before fine-tuning, use: "meta-llama/Llama-3.2-1B-Instruct"
-        """
-        self.device = "cuda" if use_gpu and torch.cuda.is_available() else "cpu"
-        print(f"Loading model on {self.device}...")
-        # Load model with optimizations
-        self.model = AutoModelForCausalLM.from_pretrained(
-            model_id,
-            torch_dtype=torch.bfloat16 if self.device == "cuda" else torch.float32,
-            device_map="auto" if self.device == "cuda" else None,
-            trust_remote_code=True
-        )
-        self.tokenizer = AutoTokenizer.from_pretrained(model_id)
-        # Set padding token
-        if self.tokenizer.pad_token is None:
-            self.tokenizer.pad_token = self.tokenizer.eos_token
-        # Create pipeline for easy generation
-        self.pipe = pipeline(
-            "text-generation",
-            model=self.model,
-            tokenizer=self.tokenizer,
-            device_map="auto" if self.device == "cuda" else None
-        )
-        print("Model loaded successfully!")
-    def generate_fast(self, prompt: str, max_tokens: int = 500) -> str:
-        """Fast generation without thinking mode"""
-        messages = [
-            {"role": "system", "content": "You are an expert coding assistant. Write clean, efficient code."},
-            {"role": "user", "content": prompt}
-        ]
-        response = self.pipe(
-            messages,
-            max_new_tokens=max_tokens,
-            temperature=0.7,
-            do_sample=True,
-            top_p=0.95
-        )
-        return response[0]['generated_text'][-1]['content']
-    def generate_with_thinking(self, prompt: str, max_thought_tokens: int = 300, max_code_tokens: int = 600) -> Dict[str, str]:
-        """Generate with explicit thinking/reasoning step"""
-        # Step 1: Generate thinking process
-        think_prompt = f"""<|system|>
-You are a coding assistant. Before writing code, think step by step about the solution.
-<|user|>
-{prompt}
-<|assistant|>
-<thinking>
-Let me break this down step by step:
-"""
-        thoughts = self.pipe(
-            think_prompt,
-            max_new_tokens=max_thought_tokens,
-            temperature=0.6,
-            do_sample=True,
-            stop_strings=["</thinking>", "<|eot_id|>"]
-        )[0]['generated_text']
-        # Extract just the thinking part
-        thinking_content = thoughts.split("<thinking>")[-1] if "<thinking>" in thoughts else thoughts
-        thinking_content = thinking_content.split("</thinking>")[0] if "</thinking>" in thinking_content else thinking_content
-        # Step 2: Generate code based on thinking
-        code_prompt = f"""<|system|>
-You are an expert programmer. Based on your reasoning, write clean, efficient code.
-<|user|>
-{prompt}
-<|assistant|>
-<thinking>
-{thinking_content}
-</thinking>
-Here's the solution:
-"""
-        code_response = self.pipe(
-            code_prompt,
-            max_new_tokens=max_code_tokens,
-            temperature=0.7,
-            do_sample=True,
-            top_p=0.95
-        )[0]['generated_text']
-        # Extract code
-        code = code_response.split("Here's the solution:")[-1] if "Here's the solution:" in code_response else code_response
-        return {
-            "thinking": thinking_content.strip(),
-            "code": code.strip()
-        }
-    def generate_with_chain_of_thought(self, prompt: str) -> Dict[str, str]:
-        """Alternative: Integrated chain-of-thought reasoning"""
-        cot_prompt = f"""<|system|>
-You are a coding assistant. Always show your reasoning process before providing code.
-Use this format:
-Reasoning: [Your step-by-step thought process]
-Code: [Your solution]
-<|user|>
-{prompt}
-<|assistant|>
-Reasoning:"""
-        response = self.pipe(
-            cot_prompt,
-            max_new_tokens=800,
-            temperature=0.7,
-            do_sample=True
-        )[0]['generated_text']
-        # Parse reasoning and code
-        reasoning_match = re.search(r"Reasoning:(.*?)Code:", response, re.DOTALL)
-        code_match = re.search(r"Code:(.*?)$", response, re.DOTALL)
-        reasoning = reasoning_match.group(1).strip() if reasoning_match else "No reasoning provided"
-        code = code_match.group(1).strip() if code_match else response
-        return {
-            "thinking": reasoning,
-            "code": code
-        }
-# For testing
-if __name__ == "__main__":
-    # Test with base model (replace with your fine-tuned model ID after training)
-    assistant = CodeThinkingAssistant("meta-llama/Llama-3.2-1B-Instruct")
-    # Test fast generation
-    print("Fast mode:", assistant.generate_fast("Write a function to calculate fibonacci numbers"))
-    # Test thinking mode
-    result = assistant.generate_with_thinking("Write a function to check if a number is prime")
-    print(f"Thinking:\n{result['thinking']}\n\nCode:\n{result['code']}")