Spaces:

Eeppa
/

Llama-3.2-1B-Codex

Configuration error

File size: 12,182 Bytes

import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import time
from typing import Dict, List, Tuple
from code_shower import CodeShower
from file_manager import FileManager

class DualModeAssistant:
    def __init__(self):
        print("🔄 Loading Llama 3.2 (General purpose)...")
        self.llama_model_id = "meta-llama/Llama-3.2-1B-Instruct"
        self.llama_pipe = pipeline(
            "text-generation",
            model=self.llama_model_id,
            torch_dtype=torch.bfloat16,
            device_map="auto",
            token=True  # Uses HF_TOKEN from env if available
        )
        
        print("💻 Loading Maincoder (Code specialist)...")
        self.codex_model_id = "maincode/maincoder-1b"
        self.codex_pipe = pipeline(
            "text-generation",
            model=self.codex_model_id,
            torch_dtype=torch.bfloat16,
            device_map="auto"
        )
        
        self.current_mode = "codex"
        self.file_manager = FileManager()
        
    def generate_with_thinking(self, prompt: str, mode: str, history: List = None) -> Dict:
        """Generate with thinking process"""
        
        self.current_mode = mode
        
        # Choose model
        if mode == "codex":
            pipe = self.codex_pipe
            system_prompt = """You are Maincoder, a specialized coding assistant. 
When asked to write code, always output complete files with their filenames as markdown code blocks.
Example format:
```python app.py
print("Hello")
html
<h1>Hello</h1>
```"""
        else:
            pipe = self.llama_pipe
            system_prompt = "You are a helpful general assistant. Answer questions thoroughly."
        
        # Build messages
        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": prompt}
        ]
        
        # Add conversation history if provided
        if history:
            for h in history[-4:]:  # Last 4 exchanges
                if isinstance(h, dict):
                    messages.append(h)
        
        # Generate thinking (using system prompt to encourage reasoning)
        full_response = pipe(
            messages,
            max_new_tokens=1000,
            temperature=0.7,
            do_sample=True,
            top_p=0.95
        )[0]['generated_text']
        
        # Extract the assistant's response
        if isinstance(full_response, list):
            assistant_msg = full_response[-1].get('content', '')
        else:
            # Parse the full text
            assistant_msg = full_response
        
        # Detect and extract code blocks for file tree
        files = self.file_manager.extract_files_from_code(assistant_msg)
        
        return {
            "response": assistant_msg,
            "model_used": "Codex (Coding Specialist)" if mode == "codex" else "Llama (General)",
            "files": files
        }

# Initialize components
assistant = DualModeAssistant()
code_shower = CodeShower()

# Custom CSS
custom_css = """
<style>
    /* Main layout */
    .main-container {
        display: flex;
        gap: 20px;
        height: 100vh;
    }
    
    .chat-panel {
        flex: 1;
        min-width: 400px;
    }
    
    .code-panel {
        width: 450px;
        border-left: 1px solid #ddd;
        padding-left: 15px;
        overflow-y: auto;
    }
    
    /* File tree styling */
    .file-tree {
        max-height: 300px;
        overflow-y: auto;
        border: 1px solid #e0e0e0;
        border-radius: 8px;
        background: #fafafa;
    }
    
    .file-item {
        display: flex;
        align-items: center;
        padding: 8px 12px;
        border-bottom: 1px solid #eee;
        cursor: pointer;
        transition: background 0.2s;
    }
    
    .file-item:hover {
        background: #f0f0f0;
    }
    
    .file-item.active {
        background: #e3f2fd;
        border-left: 3px solid #2196f3;
    }
    
    .file-logo {
        font-size: 1.2em;
        margin-right: 10px;
    }
    
    .file-name {
        flex: 1;
        font-family: monospace;
        font-size: 0.9em;
    }
    
    .file-badge {
        font-size: 0.7em;
        padding: 2px 6px;
        border-radius: 10px;
        background: #e0e0e0;
        margin-left: 8px;
    }
    
    .file-delete {
        background: none;
        border: none;
        cursor: pointer;
        opacity: 0.5;
        margin-left: 8px;
    }
    
    .file-delete:hover {
        opacity: 1;
    }
    
    .file-tree-empty {
        padding: 20px;
        text-align: center;
        color: #999;
    }
    
    /* Preview area */
    .preview-container {
        border: 1px solid #ddd;
        border-radius: 8px;
        overflow: hidden;
        background: white;
    }
    
    .preview-placeholder, .preview-error {
        padding: 40px;
        text-align: center;
        color: #999;
        background: #f9f9f9;
        border-radius: 8px;
    }
    
    /* Code viewer */
    .code-viewer {
        background: #1e1e1e;
        border-radius: 8px;
        overflow: hidden;
    }
    
    .code-header {
        display: flex;
        justify-content: space-between;
        padding: 8px 12px;
        background: #2d2d2d;
        color: white;
        border-bottom: 1px solid #444;
    }
    
    .code-block {
        margin: 0;
        padding: 15px;
        overflow-x: auto;
        font-family: 'Courier New', monospace;
        font-size: 13px;
        line-height: 1.4;
    }
    
    .copy-btn {
        background: #007bff;
        border: none;
        color: white;
        padding: 4px 12px;
        border-radius: 4px;
        cursor: pointer;
    }
    
    .copy-btn:hover {
        background: #0056b3;
    }
    
    /* Thinking mode bubble */
    .thinking-bubble {
        background: #f0f4ff;
        border-left: 4px solid #667eea;
        padding: 10px 15px;
        margin: 10px 0;
        border-radius: 8px;
        font-style: italic;
        color: #555;
    }
    
    /* Chat messages */
    .message {
        margin-bottom: 15px;
    }
    
    .user-message {
        background: #e3f2fd;
        padding: 10px;
        border-radius: 10px;
        margin-left: 20%;
    }
    
    .assistant-message {
        background: #f5f5f5;
        padding: 10px;
        border-radius: 10px;
        margin-right: 20%;
    }
    
    /* Responsive */
    @media (max-width: 800px) {
        .code-panel {
            display: none;
        }
        .chat-panel {
            min-width: 100%;
        }
    }
</style>
"""

# Create the Gradio interface
with gr.Blocks(css=custom_css, title="Llama Codex - Dual Mode Assistant", theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
    # 🤖 Llama Codex - Dual Mode AI Coding Assistant
    
    **Switch between two specialized AI modes:**
    - 🧠 **Llama Mode**: General conversations, explanations, Q&A
    - 💻 **Codex Mode**: Specialized coding with file extraction and previews
    
    > 💡 Inspired by DeepSeek-R1 - both modes show their reasoning process before responding!
    """)
    
    with gr.Row(elem_classes="main-container"):
        # Left panel: Chat
        with gr.Column(elem_classes="chat-panel", scale=2):
            with gr.Row():
                mode_selector = gr.Radio(
                    choices=["💻 Codex Mode (Coding Specialist)", "🧠 Llama Mode (General)"],
                    label="Select AI Mode",
                    value="💻 Codex Mode (Coding Specialist)",
                    interactive=True
                )
            
            with gr.Row():
                thinking_toggle = gr.Checkbox(
                    label="🧠 Show Thinking Process",
                    value=True,
                    info="Shows the AI's reasoning before the final answer"
                )
            
            chatbot = gr.Chatbot(
                label="Assistant",
                height=500,
                bubble_full_width=False
            )
            
            with gr.Row():
                msg = gr.Textbox(
                    label="Your message",
                    placeholder="Ask me to write code, explain concepts, or help debug...",
                    scale=4,
                    lines=3
                )
                send_btn = gr.Button("Send", variant="primary", scale=1)
            
            with gr.Row():
                clear_btn = gr.Button("Clear Chat")
                gr.Markdown("""
                **Example prompts:** 
                - "Write a Python function to calculate fibonacci"
                - "Create an HTML game of Snake"
                - "Explain how recursion works"
                - "Debug this: `for i in range(10) print(i)`"
                """)
        
        # Right panel: Code Shower
        with gr.Column(elem_classes="code-panel", scale=1):
            code_shower_ui = code_shower.create_ui()
    
    # Footer with attribution
    gr.Markdown("""
    ---
    <footer style="text-align: center;">
    <b>Built with Llama</b> • Llama 3.2 1B + Maincoder 1B • <a href="https://llama.meta.com/" target="_blank">Meta Llama 3.2</a>
    </footer>
    """)
    
    # State for conversation history
    conversation_history = gr.State([])
    
    # Helper functions
    def get_model_mode(radio_value: str) -> str:
        return "codex" if "Codex" in radio_value else "llama"
    
    def respond(message, history, mode_radio, show_thinking):
        if not message.strip():
            yield history + [("", "Please enter a message.")], ""
            return
        
        # Show thinking indicator
        thinking_msg = "🤔 Thinking" + "." * 3
        yield history + [("", thinking_msg)], ""
        
        # Get mode
        mode = get_model_mode(mode_radio)
        
        # Generate response
        result = assistant.generate_with_thinking(message, mode, history)
        
        # Format response
        if show_thinking:
            # Extract thinking from response (simple heuristic)
            response_parts = result["response"].split("\n\n")
            thinking_text = "No explicit thinking shown"
            
            # Simple thinking extraction - you can enhance this
            if "think" in result["response"].lower() or "step" in result["response"].lower():
                thinking_text = result["response"][:300] + "..."
            
            formatted = f"""<div class="thinking-bubble">
💭 **Thinking process ({result['model_used']}):**
{thinking_text}
</div>

✨ **Response:**
{result["response"]}"""
        else:
            formatted = result["response"]
        
        # Update code shower with extracted files
        if result.get("files") and code_shower:
            # Update file tree
            code_shower.current_files = result["files"]
            file_tree_html = code_shower.update_files_display()
            
            # Update code_shower_ui components
            if result["files"]:
                first_file = list(result["files"].keys())[0]
                preview, code_view, code_content = code_shower.display_file(first_file)
                # Note: In full implementation, update the UI components here
                # For this example, we'll just update the file tree
        
        # Update chat
        new_history = history + [(message, formatted)]
        yield new_history, ""
    
    def clear_chat():
        return [], ""
    
    # Event handlers
    send_btn.click(
        respond,
        [msg, chatbot, mode_selector, thinking_toggle],
        [chatbot, msg]
    )
    
    msg.submit(
        respond,
        [msg, chatbot, mode_selector, thinking_toggle],
        [chatbot, msg]
    )
    
    clear_btn.click(clear_chat, None, [chatbot, msg])
    
    # Code shower event handlers
    code_shower_ui["add_file_btn"].click(
        code_shower.add_new_file,
        [code_shower_ui["new_lang"], code_shower_ui["new_filename"]],
        [code_shower_ui["file_tree"], code_shower_ui["preview_area"], code_shower_ui["code_area"], msg]
    )

if __name__ == "__main__":
    demo.launch(share=True)