| import gradio as gr |
| import numpy as np |
| import base64 |
| import re |
| import asyncio |
| from groq import Groq |
| from fastrtc import ( |
| Stream, |
| AsyncStreamHandler, |
| AdditionalOutputs, |
| wait_for_item, |
| get_cloudflare_turn_credentials_async, |
| ) |
| from gradio.utils import get_space |
|
|
| class VoiceCodingHandler(AsyncStreamHandler): |
| """FastRTC Handler cho Voice Coding""" |
| |
| def __init__(self, groq_client: Groq): |
| super().__init__( |
| expected_layout="mono", |
| output_sample_rate=24000, |
| input_sample_rate=16000, |
| ) |
| self.groq_client = groq_client |
| self.input_queue = asyncio.Queue() |
| self.output_queue = asyncio.Queue() |
| self.is_active = False |
| |
| |
| self.system_prompt = "You are an AI coding assistant. Your task is to write single-file HTML applications based on a user's request. Only return the necessary code. Include all necessary imports and styles. You may also be asked to edit your original response. Respond in Vietnamese when appropriate." |
| self.user_prompt = "Please write a single-file HTML application to fulfill the following request.\nThe message:{user_message}\nCurrent code you have written:{code}" |
| |
| self.current_history = [{"role": "system", "content": self.system_prompt}] |
| self.current_code = "" |
| |
| def copy(self): |
| return VoiceCodingHandler(self.groq_client) |
| |
| def extract_html_content(self, text): |
| """Extract content including HTML tags.""" |
| if not text: |
| return None |
| match = re.search(r"<!DOCTYPE html>.*?</html>", text, re.DOTALL) |
| return match.group(0) if match else text |
| |
| async def start_up(self): |
| """Khởi động handler""" |
| self.is_active = True |
| print("✅ Voice Coding Handler started") |
| |
| async def receive(self, frame: tuple[int, np.ndarray]) -> None: |
| """Nhận audio frame""" |
| if not self.is_active: |
| return |
| |
| sample_rate, array = frame |
| array = array.squeeze() |
| |
| |
| asyncio.create_task(self._process_audio(array, sample_rate)) |
| |
| async def _process_audio(self, audio_data: np.ndarray, sample_rate: int): |
| """Xử lý audio và generate code""" |
| try: |
| print("🎤 Processing audio for voice coding...") |
| |
| |
| |
| transcription = await self._mock_transcribe_audio() |
| |
| if transcription: |
| print(f"🎯 Received request: {transcription}") |
| |
| |
| await self.output_queue.put(AdditionalOutputs({ |
| "type": "loading", |
| "message": "🦙 Llama đang code...", |
| "history": self.current_history, |
| "code": self.current_code |
| })) |
| |
| |
| await self._generate_code(transcription) |
| |
| except Exception as e: |
| print(f"❌ Lỗi xử lý audio: {e}") |
| |
| async def _mock_transcribe_audio(self) -> str: |
| """Mock transcription - trong thực tế sẽ tích hợp với ASR""" |
| |
| |
| return "Tạo trang web hello world với màu nền xanh và chữ màu trắng" |
| |
| async def _generate_code(self, user_message: str): |
| """Generate code từ text input""" |
| try: |
| |
| user_msg_formatted = self.user_prompt.format( |
| user_message=user_message, |
| code=self.current_code |
| ) |
| |
| |
| self.current_history.append({"role": "user", "content": user_msg_formatted}) |
| |
| |
| print("🦙 Generating code with Llama...") |
| response = self.groq_client.chat.completions.create( |
| model="llama-3.1-8b-instant", |
| messages=self.current_history, |
| temperature=0.7, |
| max_tokens=1024, |
| top_p=0.9, |
| stream=False, |
| ) |
| |
| output = response.choices[0].message.content |
| print("✅ Code generated successfully") |
| |
| |
| html_code = self.extract_html_content(output) |
| |
| |
| self.current_history.append({"role": "assistant", "content": output}) |
| self.current_code = html_code |
| |
| |
| await self.output_queue.put(AdditionalOutputs({ |
| "type": "code_generated", |
| "history": self.current_history, |
| "code": html_code, |
| "message": "✅ Code đã được generate!" |
| })) |
| |
| except Exception as e: |
| print(f"❌ Lỗi generate code: {e}") |
| await self.output_queue.put(AdditionalOutputs({ |
| "type": "error", |
| "message": f"❌ Lỗi: {str(e)}", |
| "history": self.current_history, |
| "code": self.current_code |
| })) |
| |
| async def emit(self): |
| """Emit outputs""" |
| try: |
| return await wait_for_item(self.output_queue) |
| except Exception as e: |
| print(f"❌ Lỗi emit: {e}") |
| return None |
| |
| async def shutdown(self): |
| """Dừng handler""" |
| self.is_active = False |
| print("🛑 Voice Coding Handler stopped") |
|
|
| class VoiceCodingService: |
| """Dịch vụ Voice Coding sử dụng FastRTC""" |
| |
| def __init__(self, groq_client: Groq): |
| self.groq_client = groq_client |
| |
| |
| try: |
| self.rtc_configuration = asyncio.run(get_cloudflare_turn_credentials_async()) |
| print("✅ Using Cloudflare TURN servers") |
| except Exception as e: |
| print(f"⚠️ Cannot get TURN credentials, using None: {e}") |
| self.rtc_configuration = None |
| |
| |
| self.sandbox_html = """ |
| <div style="text-align: center; padding: 20px; border: 2px dashed #ccc; border-radius: 10px;"> |
| <h3>🎮 Sandbox Preview</h3> |
| <p>Code sẽ được hiển thị ở đây sau khi generate</p> |
| <p><small>Chức năng voice đang được phát triển. Vui lòng sử dụng text input.</small></p> |
| </div> |
| """ |
| |
| self.loading_html = """ |
| <div style="text-align: center; padding: 20px;"> |
| <div class="spinner"></div> |
| <p>🦙 Llama đang code...</p> |
| </div> |
| <style> |
| .spinner { |
| border: 4px solid #f3f3f3; |
| border-top: 4px solid #3498db; |
| border-radius: 50%; |
| width: 40px; |
| height: 40px; |
| animation: spin 2s linear infinite; |
| margin: 0 auto; |
| } |
| @keyframes spin { |
| 0% { transform: rotate(0deg); } |
| 100% { transform: rotate(360deg); } |
| } |
| </style> |
| """ |
| |
| def extract_html_content(self, text): |
| """Extract content including HTML tags.""" |
| if not text: |
| return "<!-- No code generated -->" |
| match = re.search(r"<!DOCTYPE html>.*?</html>", text, re.DOTALL) |
| return match.group(0) if match else f"<!-- Generated Code -->\n<pre>{text}</pre>" |
| |
| def create_stream(self): |
| """Tạo FastRTC stream""" |
| return Stream( |
| VoiceCodingHandler(self.groq_client), |
| modality="audio", |
| mode="send-receive", |
| rtc_configuration=self.rtc_configuration, |
| concurrency_limit=3, |
| time_limit=120, |
| ) |
| |
| def display_in_sandbox(self, code): |
| """Hiển thị code trong sandbox iframe""" |
| if not code or "No code" in code: |
| return self.sandbox_html |
| |
| try: |
| |
| if any(tag in code.lower() for tag in ['<html', '<!doctype', '<body', '<head']): |
| encoded_html = base64.b64encode(code.encode("utf-8")).decode("utf-8") |
| data_uri = f"data:text/html;charset=utf-8;base64,{encoded_html}" |
| return f'<iframe src="{data_uri}" width="100%" height="600px" style="border: 1px solid #ccc; border-radius: 5px;"></iframe>' |
| else: |
| |
| return f'<div style="padding: 20px; background: #f5f5f5; border-radius: 5px;"><h4>Generated Content:</h4><pre style="white-space: pre-wrap;">{code}</pre></div>' |
| except Exception as e: |
| print(f"❌ Lỗi display sandbox: {e}") |
| return f'<div style="color: red; padding: 20px;">Lỗi hiển thị sandbox: {str(e)}</div>' |