Spaces:

Eeppa
/

Llama-3.2-1B-Codex

Configuration error

App Files Files Community

Eeppa commited on 6 days ago

Commit

6953393

verified ·

1 Parent(s): 0d19d5e

Update app.py

Browse files

Files changed (1) hide show

app.py +372 -179

app.py CHANGED Viewed

@@ -1,235 +1,428 @@
 import gradio as gr
-from model_utils import CodeThinkingAssistant
 import time
-# Initialize the assistant
-# IMPORTANT: Replace with your fine-tuned model ID after training
-MODEL_ID = "meta-llama/Llama-3.2-1B-Instruct"  # CHANGE THIS to your model after fine-tuning
-print("🚀 Initializing Llama 3.2 Codex Assistant...")
-assistant = CodeThinkingAssistant(MODEL_ID)
-print("✅ Ready to help with coding!")
-# Custom CSS for better UI
 custom_css = """
 <style>
-    .thinking-mode {
-        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-        padding: 2px;
-        border-radius: 10px;
     }
-    .fast-mode {
-        background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
-        padding: 2px;
-        border-radius: 10px;
     }
-    .thinking-box {
-        background-color: #f0f4ff;
-        border-left: 4px solid #667eea;
-        padding: 10px;
-        margin: 10px 0;
-        border-radius: 5px;
         font-family: monospace;
     }
-    .code-box {
-        background-color: #1e1e1e;
-        color: #d4d4d4;
         padding: 15px;
-        border-radius: 5px;
-        font-family: 'Courier New', monospace;
         overflow-x: auto;
     }
-    footer {
-        visibility: visible;
-        text-align: center;
-        margin-top: 20px;
-        font-size: 12px;
     }
 </style>
 """
-def format_code_output(code: str) -> str:
-    """Format code for display"""
-    return f'<div class="code-box"><pre>{code}</pre></div>'
-def format_thinking_output(thinking: str) -> str:
-    """Format thinking process for display"""
-    return f'<div class="thinking-box">💭 <strong>Thinking process:</strong><br>{thinking}</div>'
-def respond(
-    message: str,
-    history: list,
-    thinking_mode: bool,
-    reasoning_style: str,
-    temperature: float,
-    max_tokens: int
-):
-    """Main response function for Gradio chat"""
-    if not message.strip():
-        yield "Please enter a coding question or task."
-        return
-    # Show thinking indicator
-    yield "🤔 Thinking" + "." * 3
-    try:
-        if thinking_mode:
-            if reasoning_style == "Step-by-step thinking":
-                result = assistant.generate_with_thinking(
-                    message,
-                    max_thought_tokens=300,
-                    max_code_tokens=max_tokens
-                )
-            else:  # Chain-of-thought
-                result = assistant.generate_with_chain_of_thought(message)
-            # Format output with both thinking and code
-            formatted_output = ""
-            if result.get("thinking"):
-                formatted_output += format_thinking_output(result["thinking"])
-            if result.get("code"):
-                formatted_output += "\n\n" + format_code_output(result["code"])
-            yield formatted_output
-        else:
-            # Fast mode
-            code = assistant.generate_fast(message, max_tokens=max_tokens)
-            yield format_code_output(code)
-    except Exception as e:
-        yield f"❌ Error: {str(e)}\n\nPlease make sure the model is loaded correctly."
-# Build the Gradio interface
-with gr.Blocks(css=custom_css, title="Llama 3.2 Codex - AI Coding Assistant") as demo:
     gr.Markdown("""
-    # 🤖 Llama 3.2 1B Codex
-    ### Your AI Pair Programmer with Thinking Mode
-    Built with Llama 3.2 - Specialized for code generation with explicit reasoning.
     """)
-    with gr.Row():
-        with gr.Column(scale=2):
-            # Chat interface
             chatbot = gr.Chatbot(
-                label="Code Assistant",
                 height=500,
                 bubble_full_width=False
             )
             with gr.Row():
                 msg = gr.Textbox(
-                    label="Ask for code help",
-                    placeholder="Example: 'Write a function to sort a list of dictionaries by a key' or 'Explain this algorithm...'",
-                    scale=4
                 )
                 send_btn = gr.Button("Send", variant="primary", scale=1)
             with gr.Row():
                 clear_btn = gr.Button("Clear Chat")
-                example_btn = gr.Button("Load Example")
-        with gr.Column(scale=1):
-            gr.Markdown("### ⚙️ Settings")
-            thinking_mode = gr.Checkbox(
-                label="🧠 Enable Thinking Mode",
-                value=True,
-                info="Shows reasoning process before generating code"
-            )
-            reasoning_style = gr.Radio(
-                choices=["Step-by-step thinking", "Chain-of-thought"],
-                value="Step-by-step thinking",
-                label="Reasoning style",
-                visible=True
-            )
-            temperature = gr.Slider(
-                minimum=0.1,
-                maximum=1.5,
-                value=0.7,
-                step=0.1,
-                label="Temperature (creativity)",
-                info="Lower = more focused, Higher = more creative"
-            )
-            max_tokens = gr.Slider(
-                minimum=100,
-                maximum=1500,
-                value=600,
-                step=50,
-                label="Max response length",
-                info="Maximum tokens in response"
-            )
-            gr.Markdown("""
-            ---
-            ### 📌 Tips
-            - **Thinking Mode ON**: Best for complex problems
-            - **Thinking Mode OFF**: Faster responses for simple code
-            - Be specific in your requests
-            - Ask for explanations or optimizations
-            """)
-    # Example prompts
-    examples = gr.Examples(
-        examples=[
-            "Write a Python function to find the longest common prefix in a list of strings",
-            "Implement a binary search tree with insert and search methods",
-            "Explain the difference between deep and shallow copy in Python with examples",
-            "Write a recursive function to generate all permutations of a string",
-            "Create a decorator that measures function execution time",
-            "Implement a simple URL shortener using dictionary",
-            "Write a function to check if two strings are anagrams",
-            "Create a class for a bank account with deposit, withdraw, and interest calculation"
-        ],
-        inputs=msg,
-        label="Example Prompts (click to try)"
-    )
     # Event handlers
-    def respond_wrapper(message, history, thinking_mode, reasoning_style, temperature, max_tokens):
-        response_generator = respond(message, history, thinking_mode, reasoning_style, temperature, max_tokens)
-        for response in response_generator:
-            history.append((message, response))
-            yield history, ""
-            # Reset after yielding
-            history = []
-            yield history, ""
-    # Wire up the events
     send_btn.click(
         respond,
-        [msg, chatbot, thinking_mode, reasoning_style, temperature, max_tokens],
         [chatbot, msg]
     )
     msg.submit(
         respond,
-        [msg, chatbot, thinking_mode, reasoning_style, temperature, max_tokens],
         [chatbot, msg]
     )
-    clear_btn.click(lambda: None, None, chatbot, queue=False)
-    example_btn.click(
-        lambda: "Write a function to check if a string is a palindrome (ignoring spaces, punctuation, and case)",
-        None,
-        msg
     )
-    # Footer with required attribution
-    gr.Markdown("""
-    ---
-    <footer>
-    <b>Built with Llama</b> • Llama 3.2 1B Codex • <a href="https://llama.meta.com/" target="_blank">Meta Llama 3.2</a><br>
-    Licensed under <a href="/LICENSE.txt">Llama 3.2 Community License</a>
-    </footer>
-    """)
 if __name__ == "__main__":
     demo.launch(share=True)

 import gradio as gr
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 import time
+from typing import Dict, List, Tuple
+from code_shower import CodeShower
+from file_manager import FileManager
+class DualModeAssistant:
+    def __init__(self):
+        print("🔄 Loading Llama 3.2 (General purpose)...")
+        self.llama_model_id = "meta-llama/Llama-3.2-1B-Instruct"
+        self.llama_pipe = pipeline(
+            "text-generation",
+            model=self.llama_model_id,
+            torch_dtype=torch.bfloat16,
+            device_map="auto",
+            token=True  # Uses HF_TOKEN from env if available
+        )
+        print("💻 Loading Maincoder (Code specialist)...")
+        self.codex_model_id = "maincode/maincoder-1b"
+        self.codex_pipe = pipeline(
+            "text-generation",
+            model=self.codex_model_id,
+            torch_dtype=torch.bfloat16,
+            device_map="auto"
+        )
+        self.current_mode = "codex"
+        self.file_manager = FileManager()
+    def generate_with_thinking(self, prompt: str, mode: str, history: List = None) -> Dict:
+        """Generate with thinking process"""
+        self.current_mode = mode
+        # Choose model
+        if mode == "codex":
+            pipe = self.codex_pipe
+            system_prompt = """You are Maincoder, a specialized coding assistant.
+When asked to write code, always output complete files with their filenames as markdown code blocks.
+Example format:
+```python app.py
+print("Hello")
+html
+<h1>Hello</h1>
+```"""
+        else:
+            pipe = self.llama_pipe
+            system_prompt = "You are a helpful general assistant. Answer questions thoroughly."
+        # Build messages
+        messages = [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": prompt}
+        ]
+        # Add conversation history if provided
+        if history:
+            for h in history[-4:]:  # Last 4 exchanges
+                if isinstance(h, dict):
+                    messages.append(h)
+        # Generate thinking (using system prompt to encourage reasoning)
+        full_response = pipe(
+            messages,
+            max_new_tokens=1000,
+            temperature=0.7,
+            do_sample=True,
+            top_p=0.95
+        )[0]['generated_text']
+        # Extract the assistant's response
+        if isinstance(full_response, list):
+            assistant_msg = full_response[-1].get('content', '')
+        else:
+            # Parse the full text
+            assistant_msg = full_response
+        # Detect and extract code blocks for file tree
+        files = self.file_manager.extract_files_from_code(assistant_msg)
+        return {
+            "response": assistant_msg,
+            "model_used": "Codex (Coding Specialist)" if mode == "codex" else "Llama (General)",
+            "files": files
+        }
+# Initialize components
+assistant = DualModeAssistant()
+code_shower = CodeShower()
+# Custom CSS
 custom_css = """
 <style>
+    /* Main layout */
+    .main-container {
+        display: flex;
+        gap: 20px;
+        height: 100vh;
     }
+    .chat-panel {
+        flex: 1;
+        min-width: 400px;
     }
+    .code-panel {
+        width: 450px;
+        border-left: 1px solid #ddd;
+        padding-left: 15px;
+        overflow-y: auto;
+    }
+    /* File tree styling */
+    .file-tree {
+        max-height: 300px;
+        overflow-y: auto;
+        border: 1px solid #e0e0e0;
+        border-radius: 8px;
+        background: #fafafa;
+    }
+    .file-item {
+        display: flex;
+        align-items: center;
+        padding: 8px 12px;
+        border-bottom: 1px solid #eee;
+        cursor: pointer;
+        transition: background 0.2s;
+    }
+    .file-item:hover {
+        background: #f0f0f0;
+    }
+    .file-item.active {
+        background: #e3f2fd;
+        border-left: 3px solid #2196f3;
+    }
+    .file-logo {
+        font-size: 1.2em;
+        margin-right: 10px;
+    }
+    .file-name {
+        flex: 1;
         font-family: monospace;
+        font-size: 0.9em;
+    }
+    .file-badge {
+        font-size: 0.7em;
+        padding: 2px 6px;
+        border-radius: 10px;
+        background: #e0e0e0;
+        margin-left: 8px;
+    }
+    .file-delete {
+        background: none;
+        border: none;
+        cursor: pointer;
+        opacity: 0.5;
+        margin-left: 8px;
     }
+    .file-delete:hover {
+        opacity: 1;
+    }
+    .file-tree-empty {
+        padding: 20px;
+        text-align: center;
+        color: #999;
+    }
+    /* Preview area */
+    .preview-container {
+        border: 1px solid #ddd;
+        border-radius: 8px;
+        overflow: hidden;
+        background: white;
+    }
+    .preview-placeholder, .preview-error {
+        padding: 40px;
+        text-align: center;
+        color: #999;
+        background: #f9f9f9;
+        border-radius: 8px;
+    }
+    /* Code viewer */
+    .code-viewer {
+        background: #1e1e1e;
+        border-radius: 8px;
+        overflow: hidden;
+    }
+    .code-header {
+        display: flex;
+        justify-content: space-between;
+        padding: 8px 12px;
+        background: #2d2d2d;
+        color: white;
+        border-bottom: 1px solid #444;
+    }
+    .code-block {
+        margin: 0;
         padding: 15px;
         overflow-x: auto;
+        font-family: 'Courier New', monospace;
+        font-size: 13px;
+        line-height: 1.4;
     }
+    .copy-btn {
+        background: #007bff;
+        border: none;
+        color: white;
+        padding: 4px 12px;
+        border-radius: 4px;
+        cursor: pointer;
+    }
+    .copy-btn:hover {
+        background: #0056b3;
+    }
+    /* Thinking mode bubble */
+    .thinking-bubble {
+        background: #f0f4ff;
+        border-left: 4px solid #667eea;
+        padding: 10px 15px;
+        margin: 10px 0;
+        border-radius: 8px;
+        font-style: italic;
+        color: #555;
+    }
+    /* Chat messages */
+    .message {
+        margin-bottom: 15px;
+    }
+    .user-message {
+        background: #e3f2fd;
+        padding: 10px;
+        border-radius: 10px;
+        margin-left: 20%;
+    }
+    .assistant-message {
+        background: #f5f5f5;
+        padding: 10px;
+        border-radius: 10px;
+        margin-right: 20%;
+    }
+    /* Responsive */
+    @media (max-width: 800px) {
+        .code-panel {
+            display: none;
+        }
+        .chat-panel {
+            min-width: 100%;
+        }
     }
 </style>
 """
+# Create the Gradio interface
+with gr.Blocks(css=custom_css, title="Llama Codex - Dual Mode Assistant", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
+    # �� Llama Codex - Dual Mode AI Coding Assistant
+    **Switch between two specialized AI modes:**
+    - 🧠 **Llama Mode**: General conversations, explanations, Q&A
+    - 💻 **Codex Mode**: Specialized coding with file extraction and previews
+    > 💡 Inspired by DeepSeek-R1 - both modes show their reasoning process before responding!
     """)
+    with gr.Row(elem_classes="main-container"):
+        # Left panel: Chat
+        with gr.Column(elem_classes="chat-panel", scale=2):
+            with gr.Row():
+                mode_selector = gr.Radio(
+                    choices=["💻 Codex Mode (Coding Specialist)", "🧠 Llama Mode (General)"],
+                    label="Select AI Mode",
+                    value="💻 Codex Mode (Coding Specialist)",
+                    interactive=True
+                )
+            with gr.Row():
+                thinking_toggle = gr.Checkbox(
+                    label="🧠 Show Thinking Process",
+                    value=True,
+                    info="Shows the AI's reasoning before the final answer"
+                )
             chatbot = gr.Chatbot(
+                label="Assistant",
                 height=500,
                 bubble_full_width=False
             )
             with gr.Row():
                 msg = gr.Textbox(
+                    label="Your message",
+                    placeholder="Ask me to write code, explain concepts, or help debug...",
+                    scale=4,
+                    lines=3
                 )
                 send_btn = gr.Button("Send", variant="primary", scale=1)
             with gr.Row():
                 clear_btn = gr.Button("Clear Chat")
+                gr.Markdown("""
+                **Example prompts:**
+                - "Write a Python function to calculate fibonacci"
+                - "Create an HTML game of Snake"
+                - "Explain how recursion works"
+                - "Debug this: `for i in range(10) print(i)`"
+                """)
+        # Right panel: Code Shower
+        with gr.Column(elem_classes="code-panel", scale=1):
+            code_shower_ui = code_shower.create_ui()
+    # Footer with attribution
+    gr.Markdown("""
+    ---
+    <footer style="text-align: center;">
+    <b>Built with Llama</b> • Llama 3.2 1B + Maincoder 1B • <a href="https://llama.meta.com/" target="_blank">Meta Llama 3.2</a>
+    </footer>
+    """)
+    # State for conversation history
+    conversation_history = gr.State([])
+    # Helper functions
+    def get_model_mode(radio_value: str) -> str:
+        return "codex" if "Codex" in radio_value else "llama"
+    def respond(message, history, mode_radio, show_thinking):
+        if not message.strip():
+            yield history + [("", "Please enter a message.")], ""
+            return
+        # Show thinking indicator
+        thinking_msg = "🤔 Thinking" + "." * 3
+        yield history + [("", thinking_msg)], ""
+        # Get mode
+        mode = get_model_mode(mode_radio)
+        # Generate response
+        result = assistant.generate_with_thinking(message, mode, history)
+        # Format response
+        if show_thinking:
+            # Extract thinking from response (simple heuristic)
+            response_parts = result["response"].split("\n\n")
+            thinking_text = "No explicit thinking shown"
+            # Simple thinking extraction - you can enhance this
+            if "think" in result["response"].lower() or "step" in result["response"].lower():
+                thinking_text = result["response"][:300] + "..."
+            formatted = f"""<div class="thinking-bubble">
+💭 **Thinking process ({result['model_used']}):**
+{thinking_text}
+</div>
+✨ **Response:**
+{result["response"]}"""
+        else:
+            formatted = result["response"]
+        # Update code shower with extracted files
+        if result.get("files") and code_shower:
+            # Update file tree
+            code_shower.current_files = result["files"]
+            file_tree_html = code_shower.update_files_display()
+            # Update code_shower_ui components
+            if result["files"]:
+                first_file = list(result["files"].keys())[0]
+                preview, code_view, code_content = code_shower.display_file(first_file)
+                # Note: In full implementation, update the UI components here
+                # For this example, we'll just update the file tree
+        # Update chat
+        new_history = history + [(message, formatted)]
+        yield new_history, ""
+    def clear_chat():
+        return [], ""
     # Event handlers
     send_btn.click(
         respond,
+        [msg, chatbot, mode_selector, thinking_toggle],
         [chatbot, msg]
     )
     msg.submit(
         respond,
+        [msg, chatbot, mode_selector, thinking_toggle],
         [chatbot, msg]
     )
+    clear_btn.click(clear_chat, None, [chatbot, msg])
+    # Code shower event handlers
+    code_shower_ui["add_file_btn"].click(
+        code_shower.add_new_file,
+        [code_shower_ui["new_lang"], code_shower_ui["new_filename"]],
+        [code_shower_ui["file_tree"], code_shower_ui["preview_area"], code_shower_ui["code_area"], msg]
     )
 if __name__ == "__main__":
     demo.launch(share=True)