import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline import time from typing import Dict, List, Tuple from code_shower import CodeShower from file_manager import FileManager class DualModeAssistant: def __init__(self): print("🔄 Loading Llama 3.2 (General purpose)...") self.llama_model_id = "meta-llama/Llama-3.2-1B-Instruct" self.llama_pipe = pipeline( "text-generation", model=self.llama_model_id, torch_dtype=torch.bfloat16, device_map="auto", token=True # Uses HF_TOKEN from env if available ) print("💻 Loading Maincoder (Code specialist)...") self.codex_model_id = "maincode/maincoder-1b" self.codex_pipe = pipeline( "text-generation", model=self.codex_model_id, torch_dtype=torch.bfloat16, device_map="auto" ) self.current_mode = "codex" self.file_manager = FileManager() def generate_with_thinking(self, prompt: str, mode: str, history: List = None) -> Dict: """Generate with thinking process""" self.current_mode = mode # Choose model if mode == "codex": pipe = self.codex_pipe system_prompt = """You are Maincoder, a specialized coding assistant. When asked to write code, always output complete files with their filenames as markdown code blocks. Example format: ```python app.py print("Hello") html

Hello

```""" else: pipe = self.llama_pipe system_prompt = "You are a helpful general assistant. Answer questions thoroughly." # Build messages messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": prompt} ] # Add conversation history if provided if history: for h in history[-4:]: # Last 4 exchanges if isinstance(h, dict): messages.append(h) # Generate thinking (using system prompt to encourage reasoning) full_response = pipe( messages, max_new_tokens=1000, temperature=0.7, do_sample=True, top_p=0.95 )[0]['generated_text'] # Extract the assistant's response if isinstance(full_response, list): assistant_msg = full_response[-1].get('content', '') else: # Parse the full text assistant_msg = full_response # Detect and extract code blocks for file tree files = self.file_manager.extract_files_from_code(assistant_msg) return { "response": assistant_msg, "model_used": "Codex (Coding Specialist)" if mode == "codex" else "Llama (General)", "files": files } # Initialize components assistant = DualModeAssistant() code_shower = CodeShower() # Custom CSS custom_css = """ """ # Create the Gradio interface with gr.Blocks(css=custom_css, title="Llama Codex - Dual Mode Assistant", theme=gr.themes.Soft()) as demo: gr.Markdown(""" # 🤖 Llama Codex - Dual Mode AI Coding Assistant **Switch between two specialized AI modes:** - 🧠 **Llama Mode**: General conversations, explanations, Q&A - 💻 **Codex Mode**: Specialized coding with file extraction and previews > 💡 Inspired by DeepSeek-R1 - both modes show their reasoning process before responding! """) with gr.Row(elem_classes="main-container"): # Left panel: Chat with gr.Column(elem_classes="chat-panel", scale=2): with gr.Row(): mode_selector = gr.Radio( choices=["💻 Codex Mode (Coding Specialist)", "🧠 Llama Mode (General)"], label="Select AI Mode", value="💻 Codex Mode (Coding Specialist)", interactive=True ) with gr.Row(): thinking_toggle = gr.Checkbox( label="🧠 Show Thinking Process", value=True, info="Shows the AI's reasoning before the final answer" ) chatbot = gr.Chatbot( label="Assistant", height=500, bubble_full_width=False ) with gr.Row(): msg = gr.Textbox( label="Your message", placeholder="Ask me to write code, explain concepts, or help debug...", scale=4, lines=3 ) send_btn = gr.Button("Send", variant="primary", scale=1) with gr.Row(): clear_btn = gr.Button("Clear Chat") gr.Markdown(""" **Example prompts:** - "Write a Python function to calculate fibonacci" - "Create an HTML game of Snake" - "Explain how recursion works" - "Debug this: `for i in range(10) print(i)`" """) # Right panel: Code Shower with gr.Column(elem_classes="code-panel", scale=1): code_shower_ui = code_shower.create_ui() # Footer with attribution gr.Markdown(""" --- """) # State for conversation history conversation_history = gr.State([]) # Helper functions def get_model_mode(radio_value: str) -> str: return "codex" if "Codex" in radio_value else "llama" def respond(message, history, mode_radio, show_thinking): if not message.strip(): yield history + [("", "Please enter a message.")], "" return # Show thinking indicator thinking_msg = "🤔 Thinking" + "." * 3 yield history + [("", thinking_msg)], "" # Get mode mode = get_model_mode(mode_radio) # Generate response result = assistant.generate_with_thinking(message, mode, history) # Format response if show_thinking: # Extract thinking from response (simple heuristic) response_parts = result["response"].split("\n\n") thinking_text = "No explicit thinking shown" # Simple thinking extraction - you can enhance this if "think" in result["response"].lower() or "step" in result["response"].lower(): thinking_text = result["response"][:300] + "..." formatted = f"""

💭 **Thinking process ({result['model_used']}):** {thinking_text}

✨ **Response:** {result["response"]}""" else: formatted = result["response"] # Update code shower with extracted files if result.get("files") and code_shower: # Update file tree code_shower.current_files = result["files"] file_tree_html = code_shower.update_files_display() # Update code_shower_ui components if result["files"]: first_file = list(result["files"].keys())[0] preview, code_view, code_content = code_shower.display_file(first_file) # Note: In full implementation, update the UI components here # For this example, we'll just update the file tree # Update chat new_history = history + [(message, formatted)] yield new_history, "" def clear_chat(): return [], "" # Event handlers send_btn.click( respond, [msg, chatbot, mode_selector, thinking_toggle], [chatbot, msg] ) msg.submit( respond, [msg, chatbot, mode_selector, thinking_toggle], [chatbot, msg] ) clear_btn.click(clear_chat, None, [chatbot, msg]) # Code shower event handlers code_shower_ui["add_file_btn"].click( code_shower.add_new_file, [code_shower_ui["new_lang"], code_shower_ui["new_filename"]], [code_shower_ui["file_tree"], code_shower_ui["preview_area"], code_shower_ui["code_area"], msg] ) if __name__ == "__main__": demo.launch(share=True)