Spaces:

Eeppa
/

Llama-3.2-1B-Codex

Configuration error

App Files Files Community

Llama-3.2-1B-Codex / app.py

Eeppa

Update app.py

6953393 verified 6 days ago

raw

history blame contribute delete

12.2 kB

	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
	import time
	from typing import Dict, List, Tuple
	from code_shower import CodeShower
	from file_manager import FileManager

	class DualModeAssistant:
	def __init__(self):
	print("🔄 Loading Llama 3.2 (General purpose)...")
	self.llama_model_id = "meta-llama/Llama-3.2-1B-Instruct"
	self.llama_pipe = pipeline(
	"text-generation",
	model=self.llama_model_id,
	torch_dtype=torch.bfloat16,
	device_map="auto",
	token=True # Uses HF_TOKEN from env if available
	)

	print("💻 Loading Maincoder (Code specialist)...")
	self.codex_model_id = "maincode/maincoder-1b"
	self.codex_pipe = pipeline(
	"text-generation",
	model=self.codex_model_id,
	torch_dtype=torch.bfloat16,
	device_map="auto"
	)

	self.current_mode = "codex"
	self.file_manager = FileManager()

	def generate_with_thinking(self, prompt: str, mode: str, history: List = None) -> Dict:
	"""Generate with thinking process"""

	self.current_mode = mode

	# Choose model
	if mode == "codex":
	pipe = self.codex_pipe
	system_prompt = """You are Maincoder, a specialized coding assistant.
	When asked to write code, always output complete files with their filenames as markdown code blocks.
	Example format:
	```python app.py
	print("Hello")
	html
	<h1>Hello</h1>
	```"""
	else:
	pipe = self.llama_pipe
	system_prompt = "You are a helpful general assistant. Answer questions thoroughly."

	# Build messages
	messages = [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": prompt}
	]

	# Add conversation history if provided
	if history:
	for h in history[-4:]: # Last 4 exchanges
	if isinstance(h, dict):
	messages.append(h)

	# Generate thinking (using system prompt to encourage reasoning)
	full_response = pipe(
	messages,
	max_new_tokens=1000,
	temperature=0.7,
	do_sample=True,
	top_p=0.95
	)[0]['generated_text']

	# Extract the assistant's response
	if isinstance(full_response, list):
	assistant_msg = full_response[-1].get('content', '')
	else:
	# Parse the full text
	assistant_msg = full_response

	# Detect and extract code blocks for file tree
	files = self.file_manager.extract_files_from_code(assistant_msg)

	return {
	"response": assistant_msg,
	"model_used": "Codex (Coding Specialist)" if mode == "codex" else "Llama (General)",
	"files": files
	}

	# Initialize components
	assistant = DualModeAssistant()
	code_shower = CodeShower()

	# Custom CSS
	custom_css = """
	<style>
	/* Main layout */
	.main-container {
	display: flex;
	gap: 20px;
	height: 100vh;
	}

	.chat-panel {
	flex: 1;
	min-width: 400px;
	}

	.code-panel {
	width: 450px;
	border-left: 1px solid #ddd;
	padding-left: 15px;
	overflow-y: auto;
	}

	/* File tree styling */
	.file-tree {
	max-height: 300px;
	overflow-y: auto;
	border: 1px solid #e0e0e0;
	border-radius: 8px;
	background: #fafafa;
	}

	.file-item {
	display: flex;
	align-items: center;
	padding: 8px 12px;
	border-bottom: 1px solid #eee;
	cursor: pointer;
	transition: background 0.2s;
	}

	.file-item:hover {
	background: #f0f0f0;
	}

	.file-item.active {
	background: #e3f2fd;
	border-left: 3px solid #2196f3;
	}

	.file-logo {
	font-size: 1.2em;
	margin-right: 10px;
	}

	.file-name {
	flex: 1;
	font-family: monospace;
	font-size: 0.9em;
	}

	.file-badge {
	font-size: 0.7em;
	padding: 2px 6px;
	border-radius: 10px;
	background: #e0e0e0;
	margin-left: 8px;
	}

	.file-delete {
	background: none;
	border: none;
	cursor: pointer;
	opacity: 0.5;
	margin-left: 8px;
	}

	.file-delete:hover {
	opacity: 1;
	}

	.file-tree-empty {
	padding: 20px;
	text-align: center;
	color: #999;
	}

	/* Preview area */
	.preview-container {
	border: 1px solid #ddd;
	border-radius: 8px;
	overflow: hidden;
	background: white;
	}

	.preview-placeholder, .preview-error {
	padding: 40px;
	text-align: center;
	color: #999;
	background: #f9f9f9;
	border-radius: 8px;
	}

	/* Code viewer */
	.code-viewer {
	background: #1e1e1e;
	border-radius: 8px;
	overflow: hidden;
	}

	.code-header {
	display: flex;
	justify-content: space-between;
	padding: 8px 12px;
	background: #2d2d2d;
	color: white;
	border-bottom: 1px solid #444;
	}

	.code-block {
	margin: 0;
	padding: 15px;
	overflow-x: auto;
	font-family: 'Courier New', monospace;
	font-size: 13px;
	line-height: 1.4;
	}

	.copy-btn {
	background: #007bff;
	border: none;
	color: white;
	padding: 4px 12px;
	border-radius: 4px;
	cursor: pointer;
	}

	.copy-btn:hover {
	background: #0056b3;
	}

	/* Thinking mode bubble */
	.thinking-bubble {
	background: #f0f4ff;
	border-left: 4px solid #667eea;
	padding: 10px 15px;
	margin: 10px 0;
	border-radius: 8px;
	font-style: italic;
	color: #555;
	}

	/* Chat messages */
	.message {
	margin-bottom: 15px;
	}

	.user-message {
	background: #e3f2fd;
	padding: 10px;
	border-radius: 10px;
	margin-left: 20%;
	}

	.assistant-message {
	background: #f5f5f5;
	padding: 10px;
	border-radius: 10px;
	margin-right: 20%;
	}

	/* Responsive */
	@media (max-width: 800px) {
	.code-panel {
	display: none;
	}
	.chat-panel {
	min-width: 100%;
	}
	}
	</style>
	"""

	# Create the Gradio interface
	with gr.Blocks(css=custom_css, title="Llama Codex - Dual Mode Assistant", theme=gr.themes.Soft()) as demo:
	gr.Markdown("""
	# 🤖 Llama Codex - Dual Mode AI Coding Assistant

	Switch between two specialized AI modes:
	- 🧠 Llama Mode: General conversations, explanations, Q&A
	- 💻 Codex Mode: Specialized coding with file extraction and previews

	> 💡 Inspired by DeepSeek-R1 - both modes show their reasoning process before responding!
	""")

	with gr.Row(elem_classes="main-container"):
	# Left panel: Chat
	with gr.Column(elem_classes="chat-panel", scale=2):
	with gr.Row():
	mode_selector = gr.Radio(
	choices=["💻 Codex Mode (Coding Specialist)", "🧠 Llama Mode (General)"],
	label="Select AI Mode",
	value="💻 Codex Mode (Coding Specialist)",
	interactive=True
	)

	with gr.Row():
	thinking_toggle = gr.Checkbox(
	label="🧠 Show Thinking Process",
	value=True,
	info="Shows the AI's reasoning before the final answer"
	)

	chatbot = gr.Chatbot(
	label="Assistant",
	height=500,
	bubble_full_width=False
	)

	with gr.Row():
	msg = gr.Textbox(
	label="Your message",
	placeholder="Ask me to write code, explain concepts, or help debug...",
	scale=4,
	lines=3
	)
	send_btn = gr.Button("Send", variant="primary", scale=1)

	with gr.Row():
	clear_btn = gr.Button("Clear Chat")
	gr.Markdown("""
	Example prompts:
	- "Write a Python function to calculate fibonacci"
	- "Create an HTML game of Snake"
	- "Explain how recursion works"
	- "Debug this: `for i in range(10) print(i)`"
	""")

	# Right panel: Code Shower
	with gr.Column(elem_classes="code-panel", scale=1):
	code_shower_ui = code_shower.create_ui()

	# Footer with attribution
	gr.Markdown("""
	---
	<footer style="text-align: center;">
	<b>Built with Llama</b> • Llama 3.2 1B + Maincoder 1B • <a href="https://llama.meta.com/" target="_blank">Meta Llama 3.2</a>
	</footer>
	""")

	# State for conversation history
	conversation_history = gr.State([])

	# Helper functions
	def get_model_mode(radio_value: str) -> str:
	return "codex" if "Codex" in radio_value else "llama"

	def respond(message, history, mode_radio, show_thinking):
	if not message.strip():
	yield history + [("", "Please enter a message.")], ""
	return

	# Show thinking indicator
	thinking_msg = "🤔 Thinking" + "." * 3
	yield history + [("", thinking_msg)], ""

	# Get mode
	mode = get_model_mode(mode_radio)

	# Generate response
	result = assistant.generate_with_thinking(message, mode, history)

	# Format response
	if show_thinking:
	# Extract thinking from response (simple heuristic)
	response_parts = result["response"].split("\n\n")
	thinking_text = "No explicit thinking shown"

	# Simple thinking extraction - you can enhance this
	if "think" in result["response"].lower() or "step" in result["response"].lower():
	thinking_text = result["response"][:300] + "..."

	formatted = f"""<div class="thinking-bubble">
	💭 Thinking process ({result['model_used']}):
	{thinking_text}
	</div>

	✨ Response:
	{result["response"]}"""
	else:
	formatted = result["response"]

	# Update code shower with extracted files
	if result.get("files") and code_shower:
	# Update file tree
	code_shower.current_files = result["files"]
	file_tree_html = code_shower.update_files_display()

	# Update code_shower_ui components
	if result["files"]:
	first_file = list(result["files"].keys())[0]
	preview, code_view, code_content = code_shower.display_file(first_file)
	# Note: In full implementation, update the UI components here
	# For this example, we'll just update the file tree

	# Update chat
	new_history = history + [(message, formatted)]
	yield new_history, ""

	def clear_chat():
	return [], ""

	# Event handlers
	send_btn.click(
	respond,
	[msg, chatbot, mode_selector, thinking_toggle],
	[chatbot, msg]
	)

	msg.submit(
	respond,
	[msg, chatbot, mode_selector, thinking_toggle],
	[chatbot, msg]
	)

	clear_btn.click(clear_chat, None, [chatbot, msg])

	# Code shower event handlers
	code_shower_ui["add_file_btn"].click(
	code_shower.add_new_file,
	[code_shower_ui["new_lang"], code_shower_ui["new_filename"]],
	[code_shower_ui["file_tree"], code_shower_ui["preview_area"], code_shower_ui["code_area"], msg]
	)

	if __name__ == "__main__":
	demo.launch(share=True)