Spaces:

TeszenAI
/

MTP-3space

Sleeping

App Files Files Community

teszenofficial commited on 13 days ago

Commit

6517f5f

verified ·

1 Parent(s): d163150

Update app.py

Browse files

Files changed (1) hide show

app.py +218 -453

app.py CHANGED Viewed

@@ -32,99 +32,87 @@ if DEVICE == "cpu":
 torch.set_grad_enabled(False)
 # CAMBIA ESTO POR EL NOMBRE DE TU REPO EN HUGGING FACE
-MODEL_REPO = "TeszenAI/MTP-3.1.1"  # <-- CAMBIA A TU REPO
 # ======================
 # FUNCIONES DE LIMPIEZA Y CONTROL DE CALIDAD
 # ======================
-def clean_response(text: str) -> str:
     """
-    Limpia la respuesta eliminando repeticiones, frases sin sentido y
-    asegurando que termine correctamente.
     """
     if not text:
         return ""
-    # 1. Eliminar repeticiones excesivas de palabras o frases cortas
     words = text.split()
     cleaned_words = []
-    last_phrase = ""
     repeat_count = 0
     for word in words:
-        if word == last_phrase:
             repeat_count += 1
-            if repeat_count > 2:  # Si repite más de 2 veces seguidas
                 continue
         else:
-            last_phrase = word
             repeat_count = 0
         cleaned_words.append(word)
     text = " ".join(cleaned_words)
-    # 2. Eliminar patrones sin sentido (repeticiones de letras, caracteres raros)
-    text = re.sub(r'(.)\1{4,}', r'\1\1', text)  # aaa... -> aa
-    text = re.sub(r'[^a-zA-ZáéíóúñüÁÉÍÓÚÑÜ0-9\s.,;:!?¿¡()\-"]+', '', text)
-    # 3. Cortar en la primera frase que parezca final coherente
-    stop_patterns = [
-        r'(\.\s*)$',           # Punto final
-        r'[.!?](\s+)?$',       # Fin de oración
-        r'(gracias|hasta luego|adiós|saludos|fin|fin del mensaje)$',
-        r'(¿algo más\?|¿necesitas algo más\?|¿en qué más puedo ayudarte\?)'
-    ]
-    for pattern in stop_patterns:
-        match = re.search(pattern, text, re.IGNORECASE)
-        if match:
-            # Cortar justo después del patrón de finalización
-            end_pos = match.end()
-            text = text[:end_pos]
-            break
-    # 4. Si la respuesta es muy corta o vacía, devolver mensaje por defecto
-    if len(text.strip()) < 10:
         return "Lo siento, no pude generar una respuesta clara. ¿Podrías reformular tu pregunta?"
-    # 5. Eliminar espacios múltiples y saltos de línea excesivos
     text = re.sub(r'\s+', ' ', text).strip()
     return text
-def should_stop_generation(generated_text: str, min_length: int = 30, max_length: int = 300) -> bool:
-    """
-    Determina si debemos detener la generación basado en el texto generado.
-    """
-    # Si ya superamos la longitud máxima
-    if len(generated_text) > max_length:
-        return True
-    # Si es muy corto y no hay puntuación final
-    if len(generated_text) < min_length and not re.search(r'[.!?]$', generated_text):
-        return False
-    # Señales de que ya terminó la respuesta
-    stop_signals = [
-        r'(gracias por tu pregunta|espero haberte ayudado|¿necesitas algo más\?)',
-        r'(hasta luego|adiós|quedo atento|saludos cordiales)',
-        r'(fin del mensaje|fin de la conversación)'
-    ]
-    for signal in stop_signals:
-        if re.search(signal, generated_text, re.IGNORECASE):
-            return True
-    # Si la última frase parece completa
-    last_sentence = generated_text.split('.')[-1].strip()
-    if len(last_sentence) > 5 and re.search(r'[.!?]$', last_sentence):
-        # Y ya hemos generado suficiente contenido
-        if len(generated_text) > min_length:
-            return True
-    return False
 # ======================
 # DEFINIR ARQUITECTURA DEL MODELO (MTP)
 # ======================
@@ -237,11 +225,8 @@ class MTPModel(nn.Module):
         return logits
     def generate(self, input_ids, max_new_tokens=150, temperature=0.8, top_k=50, top_p=0.9, repetition_penalty=1.1):
-        """Método de generación mejorado con detección inteligente de fin"""
         generated = input_ids
-        generated_text = ""
-        min_response_length = 30
-        max_response_length = max_new_tokens * 2
         for step in range(max_new_tokens):
             with torch.no_grad():
@@ -268,17 +253,11 @@ class MTPModel(nn.Module):
             probs = F.softmax(next_logits, dim=-1)
             next_token = torch.multinomial(probs, num_samples=1).item()
-            if next_token == 3:  # EOS ID para SentencePiece
                 break
             generated = torch.cat([generated, torch.tensor([[next_token]], device=generated.device)], dim=1)
-            # Decodificar parcialmente para verificar si debemos parar (solo cada 10 pasos para eficiencia)
-            if step > 10 and step % 10 == 0:
-                # Intentar decodificar tokens generados (esto es aproximado, el tokenizador real está fuera)
-                if len(generated[0]) > 10:
-                    if should_stop_generation(str(generated[0].tolist()), min_response_length, max_response_length):
-                        break
         return generated
@@ -310,6 +289,10 @@ else:
 # Cargar tokenizador
 tokenizer_path = os.path.join(repo_path, "mtp_tokenizer.model")
 sp = spm.SentencePieceProcessor()
 sp.load(tokenizer_path)
 VOCAB_SIZE = sp.get_piece_size()
@@ -330,25 +313,13 @@ model.to(DEVICE)
 model_path = os.path.join(repo_path, "mtp_model.pt")
 if os.path.exists(model_path):
     state_dict = torch.load(model_path, map_location=DEVICE)
-    model.load_state_dict(state_dict)
     print("✅ Pesos del modelo cargados")
 else:
-    print("⚠️ No se encontró mtp_model.pt, usando pesos aleatorios")
 model.eval()
-# Cuantización para CPU
-if DEVICE == "cpu":
-    print("⚡ Aplicando cuantización dinámica para CPU...")
-    try:
-        model = torch.quantization.quantize_dynamic(
-            model,
-            {nn.Linear},
-            dtype=torch.qint8
-        )
-    except Exception as e:
-        print(f"⚠️ No se pudo aplicar cuantización: {e}")
 param_count = sum(p.numel() for p in model.parameters())
 print(f"✅ Modelo cargado: {param_count:,} parámetros ({param_count/1e6:.1f}M)")
@@ -370,7 +341,7 @@ app.add_middleware(
 class PromptRequest(BaseModel):
     text: str = Field(..., max_length=2000, description="Texto de entrada")
-    max_tokens: int = Field(default=150, ge=10, le=300, description="Tokens máximos a generar")
     temperature: float = Field(default=0.7, ge=0.1, le=2.0, description="Temperatura de muestreo")
     top_k: int = Field(default=50, ge=1, le=100, description="Top-k sampling")
     top_p: float = Field(default=0.9, ge=0.1, le=1.0, description="Top-p (nucleus) sampling")
@@ -413,29 +384,27 @@ async def generate(req: PromptRequest):
     global ACTIVE_REQUESTS
     ACTIVE_REQUESTS += 1
-    dyn_max_tokens = req.max_tokens
-    dyn_temperature = req.temperature
-    if ACTIVE_REQUESTS > 2:
-        print(f"⚠️ Carga alta ({ACTIVE_REQUESTS} requests). Ajustando parámetros.")
-        dyn_max_tokens = min(dyn_max_tokens, 120)
-        dyn_temperature = max(0.5, dyn_temperature * 0.9)
     user_input = req.text.strip()
     if not user_input:
         ACTIVE_REQUESTS -= 1
         return {"reply": "", "tokens_generated": 0}
     full_prompt = build_prompt(user_input)
-    tokens = [tokenizer_wrapper.bos_id()] + tokenizer_wrapper.encode(full_prompt)
     input_ids = torch.tensor([tokens], device=DEVICE)
     try:
         with torch.no_grad():
             output_ids = model.generate(
                 input_ids,
-                max_new_tokens=dyn_max_tokens,
-                temperature=dyn_temperature,
                 top_k=req.top_k,
                 top_p=req.top_p,
                 repetition_penalty=req.repetition_penalty
@@ -443,18 +412,23 @@ async def generate(req: PromptRequest):
         gen_tokens = output_ids[0, len(tokens):].tolist()
-        safe_tokens = [
-            t for t in gen_tokens
-            if 0 <= t < VOCAB_SIZE and t != tokenizer_wrapper.eos_id()
-        ]
-        response = tokenizer_wrapper.decode(safe_tokens).strip()
-        if "###" in response:
-            response = response.split("###")[0].strip()
-        # Aplicar limpieza inteligente a la respuesta
-        response = clean_response(response)
         return {
             "reply": response,
@@ -464,8 +438,12 @@ async def generate(req: PromptRequest):
     except Exception as e:
         print(f"❌ Error durante generación: {e}")
         return {
-            "reply": "Lo siento, ocurrió un error al procesar tu solicitud.",
             "error": str(e)
         }
@@ -499,7 +477,7 @@ def model_info():
     }
 # ======================
-# INTERFAZ WEB (MODERNA CON LOGO INTEGRADO)
 # ======================
 @app.get("/", response_class=HTMLResponse)
 def chat_ui():
@@ -508,410 +486,197 @@ def chat_ui():
 <html lang="es">
 <head>
 <meta charset="UTF-8">
-<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
 <title>MTP - Asistente IA</title>
-<link rel="preconnect" href="https://fonts.googleapis.com">
-<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
-<link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600&display=swap" rel="stylesheet">
 <style>
-:root {
-    --bg-color: #131314;
-    --surface-color: #1E1F20;
-    --accent-color: #4a9eff;
-    --text-primary: #e3e3e3;
-    --text-secondary: #9aa0a6;
-    --user-bubble: #282a2c;
-}
-* { box-sizing: border-box; outline: none; -webkit-tap-highlight-color: transparent; }
 body {
-    margin: 0;
-    background-color: var(--bg-color);
-    font-family: 'Inter', sans-serif;
-    color: var(--text-primary);
-    height: 100dvh;
     display: flex;
     flex-direction: column;
-    overflow: hidden;
 }
-header {
-    padding: 12px 20px;
-    display: flex;
-    align-items: center;
-    justify-content: space-between;
-    background: rgba(19, 19, 20, 0.85);
-    backdrop-filter: blur(12px);
-    position: fixed;
-    top: 0;
-    width: 100%;
-    z-index: 50;
-    border-bottom: 1px solid rgba(255,255,255,0.05);
 }
-.brand-wrapper {
-    display: flex;
-    align-items: center;
-    gap: 12px;
-    cursor: pointer;
-}
-.brand-logo {
-    width: 32px;
-    height: 32px;
-    border-radius: 50%;
-    background-image: url('https://i.postimg.cc/c4BRhSnR/8F838209-6DD9-4E1C-96BB-621EC3B78E68.png');
-    background-size: cover;
-    background-position: center;
-    background-repeat: no-repeat;
-    border: 1px solid rgba(255,255,255,0.1);
-}
-.brand-text {
     font-weight: 500;
-    font-size: 1.05rem;
-    display: flex;
-    align-items: center;
-    gap: 8px;
 }
-.version-badge {
-    font-size: 0.75rem;
-    background: rgba(74, 158, 255, 0.15);
-    color: #8ab4f8;
-    padding: 2px 8px;
-    border-radius: 12px;
-    font-weight: 600;
-}
-.chat-scroll {
     flex: 1;
     overflow-y: auto;
-    padding: 80px 20px 40px 20px;
     display: flex;
     flex-direction: column;
-    gap: 30px;
-    max-width: 850px;
-    margin: 0 auto;
-    width: 100%;
-    scroll-behavior: smooth;
 }
-.msg-row {
     display: flex;
-    gap: 16px;
-    width: 100%;
-    opacity: 0;
-    transform: translateY(10px);
-    animation: slideUpFade 0.4s cubic-bezier(0.2, 0.8, 0.2, 1) forwards;
 }
-.msg-row.user { justify-content: flex-end; }
-.msg-row.bot { justify-content: flex-start; align-items: flex-start; }
-.msg-content {
-    line-height: 1.6;
-    font-size: 1rem;
-    word-wrap: break-word;
-    max-width: 85%;
 }
-.user .msg-content {
-    background-color: var(--user-bubble);
-    padding: 10px 18px;
     border-radius: 18px;
-    border-top-right-radius: 4px;
-    color: #fff;
 }
-.bot .msg-content-wrapper {
-    display: flex;
-    flex-direction: column;
-    gap: 8px;
-    width: 100%;
 }
-.bot .msg-text {
-    padding-top: 6px;
-    color: var(--text-primary);
 }
-.bot-avatar {
-    width: 34px;
-    height: 34px;
-    min-width: 34px;
-    border-radius: 50%;
-    background-image: url('https://i.postimg.cc/c4BRhSnR/8F838209-6DD9-4E1C-96BB-621EC3B78E68.png');
-    background-size: cover;
-    background-position: center;
-    background-repeat: no-repeat;
-    box-shadow: 0 2px 6px rgba(0,0,0,0.2);
 }
-.bot-actions {
     display: flex;
-    gap: 10px;
-    opacity: 0;
-    transition: opacity 0.3s;
-    margin-top: 5px;
-}
-.action-btn {
-    background: transparent;
-    border: none;
-    color: var(--text-secondary);
-    cursor: pointer;
-    padding: 4px;
-    border-radius: 4px;
-    display: flex;
-    align-items: center;
-    transition: color 0.2s, background 0.2s;
-}
-.action-btn:hover {
-    color: var(--text-primary);
-    background: rgba(255,255,255,0.08);
-}
-.action-btn svg { width: 16px; height: 16px; fill: currentColor; }
-.typing-cursor::after {
-    content: '▊';
-    display: inline-block;
-    margin-left: 2px;
-    animation: blink 1s infinite;
-}
-.footer-container {
-    padding: 0 20px 20px 20px;
-    background: linear-gradient(to top, var(--bg-color) 85%, transparent);
-    position: relative;
-    z-index: 60;
-}
-.input-box {
-    max-width: 850px;
     margin: 0 auto;
-    background: var(--surface-color);
-    border-radius: 28px;
-    padding: 8px 10px 8px 20px;
-    display: flex;
-    align-items: center;
-    border: 1px solid rgba(255,255,255,0.1);
-    transition: border-color 0.2s, box-shadow 0.2s;
 }
-.input-box:focus-within {
-    border-color: rgba(74, 158, 255, 0.5);
-    box-shadow: 0 0 0 2px rgba(74, 158, 255, 0.1);
-}
-#userInput {
     flex: 1;
-    background: transparent;
     border: none;
     color: white;
-    font-size: 1rem;
-    font-family: inherit;
-    padding: 10px 0;
 }
-#mainBtn {
-    background: white;
-    color: black;
     border: none;
-    width: 36px;
-    height: 36px;
-    border-radius: 50%;
-    display: flex;
-    align-items: center;
-    justify-content: center;
     cursor: pointer;
-    margin-left: 8px;
-    transition: transform 0.2s;
 }
-#mainBtn:hover { transform: scale(1.05); }
-.disclaimer {
-    text-align: center;
-    font-size: 0.75rem;
-    color: #666;
-    margin-top: 12px;
 }
-@keyframes slideUpFade {
-    from { opacity: 0; transform: translateY(15px); }
-    to { opacity: 1; transform: translateY(0); }
 }
-@keyframes blink { 0%, 100% { opacity: 1; } 50% { opacity: 0; } }
-@keyframes pulseAvatar {
-    0% { box-shadow: 0 0 0 0 rgba(74, 158, 255, 0.4); }
-    70% { box-shadow: 0 0 0 8px rgba(74, 158, 255, 0); }
-    100% { box-shadow: 0 0 0 0 rgba(74, 158, 255, 0); }
 }
-.pulsing { animation: pulseAvatar 1.5s infinite; }
-::-webkit-scrollbar { width: 8px; }
-::-webkit-scrollbar-track { background: transparent; }
-::-webkit-scrollbar-thumb { background: #333; border-radius: 4px; }
 </style>
 </head>
 <body>
-<header>
-    <div class="brand-wrapper" onclick="location.reload()">
-        <div class="brand-logo"></div>
-        <div class="brand-text">
-            MTP <span class="version-badge">v1</span>
-        </div>
-    </div>
-</header>
-<div id="chatScroll" class="chat-scroll">
-    <div class="msg-row bot" style="animation-delay: 0.1s;">
-        <div class="bot-avatar"></div>
-        <div class="msg-content-wrapper">
-            <div class="msg-text">
-                ¡Hola! Soy MTP, tu asistente de IA. ¿En qué puedo ayudarte hoy?
-            </div>
-        </div>
-    </div>
 </div>
-<div class="footer-container">
-    <div class="input-box">
-        <input type="text" id="userInput" placeholder="Escribe un mensaje..." autocomplete="off">
-        <button id="mainBtn" onclick="handleBtnClick()">➤</button>
     </div>
-    <div class="disclaimer">
-        MTP puede cometer errores. Considera verificar la información importante.
     </div>
 </div>
 <script>
-const chatScroll = document.getElementById('chatScroll');
-const userInput = document.getElementById('userInput');
-const mainBtn = document.getElementById('mainBtn');
-let isGenerating = false;
-let abortController = null;
-let typingTimeout = null;
-let lastUserPrompt = "";
-function scrollToBottom() {
-    chatScroll.scrollTop = chatScroll.scrollHeight;
-}
-function setBtnState(state) {
-    if (state === 'sending') {
-        mainBtn.innerHTML = '⏹';
-        isGenerating = true;
-    } else {
-        mainBtn.innerHTML = '➤';
-        isGenerating = false;
-        abortController = null;
-    }
 }
-function handleBtnClick() {
-    if (isGenerating) {
-        stopGeneration();
-    } else {
-        sendMessage();
-    }
 }
-function stopGeneration() {
-    if (abortController) abortController.abort();
-    if (typingTimeout) clearTimeout(typingTimeout);
-    const activeCursor = document.querySelector('.typing-cursor');
-    if (activeCursor) activeCursor.classList.remove('typing-cursor');
-    const activeAvatar = document.querySelector('.pulsing');
-    if (activeAvatar) activeAvatar.classList.remove('pulsing');
-    setBtnState('idle');
-    userInput.focus();
 }
-async function sendMessage(textOverride = null) {
-    const text = textOverride || userInput.value.trim();
-    if (!text) return;
-    lastUserPrompt = text;
-    if (!textOverride) {
-        userInput.value = '';
-        addMessage(text, 'user');
-    }
-    setBtnState('sending');
-    abortController = new AbortController();
-    const botRow = document.createElement('div');
-    botRow.className = 'msg-row bot';
-    const avatar = document.createElement('div');
-    avatar.className = 'bot-avatar pulsing';
-    const wrapper = document.createElement('div');
-    wrapper.className = 'msg-content-wrapper';
-    const msgText = document.createElement('div');
-    msgText.className = 'msg-text';
-    wrapper.appendChild(msgText);
-    botRow.appendChild(avatar);
-    botRow.appendChild(wrapper);
-    chatScroll.appendChild(botRow);
-    scrollToBottom();
     try {
         const response = await fetch('/generate', {
             method: 'POST',
             headers: { 'Content-Type': 'application/json' },
-            body: JSON.stringify({ text: text }),
-            signal: abortController.signal
         });
         const data = await response.json();
-        if (!isGenerating) return;
-        avatar.classList.remove('pulsing');
-        const reply = data.reply || "No entendí eso.";
-        await typeWriter(msgText, reply);
-        if (isGenerating) {
-            addActions(wrapper, reply);
-            setBtnState('idle');
-        }
     } catch (error) {
-        if (error.name === 'AbortError') {
-            msgText.textContent += " [Detenido]";
-        } else {
-            avatar.classList.remove('pulsing');
-            msgText.textContent = "Error de conexión.";
-            msgText.style.color = "#ff8b8b";
-            setBtnState('idle');
-        }
     }
 }
-function addMessage(text, sender) {
-    const row = document.createElement('div');
-    row.className = `msg-row ${sender}`;
-    const content = document.createElement('div');
-    content.className = 'msg-content';
-    content.textContent = text;
-    row.appendChild(content);
-    chatScroll.appendChild(row);
-    scrollToBottom();
-}
-function typeWriter(element, text, speed = 12) {
-    return new Promise(resolve => {
-        let i = 0;
-        element.classList.add('typing-cursor');
-        function type() {
-            if (!isGenerating) {
-                element.classList.remove('typing-cursor');
-                resolve();
-                return;
-            }
-            if (i < text.length) {
-                element.textContent += text.charAt(i);
-                i++;
-                scrollToBottom();
-                typingTimeout = setTimeout(type, speed + Math.random() * 5);
-            } else {
-                element.classList.remove('typing-cursor');
-                resolve();
-            }
-        }
-        type();
-    });
-}
-function addActions(wrapperElement, textToCopy) {
-    const actionsDiv = document.createElement('div');
-    actionsDiv.className = 'bot-actions';
-    const copyBtn = document.createElement('button');
-    copyBtn.className = 'action-btn';
-    copyBtn.innerHTML = `<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><rect x="9" y="9" width="13" height="13" rx="2" ry="2"></rect><path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"></path></svg>`;
-    copyBtn.onclick = () => {
-        navigator.clipboard.writeText(textToCopy);
-    };
-    const regenBtn = document.createElement('button');
-    regenBtn.className = 'action-btn';
-    regenBtn.innerHTML = `<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M23 4v6h-6"></path><path d="M1 20v-6h6"></path><path d="M3.51 9a9 9 0 0 1 14.85-3.36L23 10M1 14l4.64 4.36A9 9 0 0 0 20.49 15"></path></svg>`;
-    regenBtn.onclick = () => {
-        sendMessage(lastUserPrompt);
-    };
-    actionsDiv.appendChild(copyBtn);
-    actionsDiv.appendChild(regenBtn);
-    wrapperElement.appendChild(actionsDiv);
-    requestAnimationFrame(() => actionsDiv.style.opacity = "1");
-    scrollToBottom();
-}
-userInput.addEventListener('keydown', (e) => {
-    if (e.key === 'Enter') handleBtnClick();
 });
-window.onload = () => userInput.focus();
 </script>
 </body>
 </html>

 torch.set_grad_enabled(False)
 # CAMBIA ESTO POR EL NOMBRE DE TU REPO EN HUGGING FACE
+MODEL_REPO = "TeszenAI/MTP-3.1.1"
 # ======================
 # FUNCIONES DE LIMPIEZA Y CONTROL DE CALIDAD
 # ======================
+def truncate_greeting_response(text: str) -> str:
     """
+    Para respuestas de saludo, trunca SOLO en el primer PUNTO (.)
+    No usa signos de exclamación o interrogación.
     """
+    if not text:
+        return text
+    # Buscar el primer PUNTO (.)
+    end_match = re.search(r'\.', text)
+    if end_match:
+        # Cortar justo después del punto
+        end_pos = end_match.end()
+        truncated = text[:end_pos].strip()
+        return truncated
+    # Si no hay punto, devolver solo primeras 80 caracteres
+    if len(text) > 80:
+        return text[:80] + "..."
+    return text
+def clean_response(text: str, user_input: str = "") -> str:
+    """Limpia la respuesta del modelo"""
     if not text:
         return ""
+    # Eliminar repeticiones excesivas
     words = text.split()
     cleaned_words = []
+    last_word = ""
     repeat_count = 0
     for word in words:
+        if word == last_word:
             repeat_count += 1
+            if repeat_count > 2:
                 continue
         else:
+            last_word = word
             repeat_count = 0
         cleaned_words.append(word)
     text = " ".join(cleaned_words)
+    # Eliminar caracteres raros
+    text = re.sub(r'(.)\1{4,}', r'\1\1', text)
+    # Detectar si es un saludo
+    is_greeting = user_input.lower().strip() in ["hola", "hola!", "hola.", "buenas", "saludos", "hola?"]
+    if is_greeting and text:
+        # Para saludos, truncar SOLO en el primer PUNTO (.)
+        punct_match = re.search(r'\.', text)
+        if punct_match:
+            text = text[:punct_match.end()].strip()
+        else:
+            # Si no hay punto, tomar solo la primera oración o 60 caracteres
+            first_sentence = text.split('.')[0].strip()
+            if len(first_sentence) > 5:
+                text = first_sentence
+            elif len(text) > 60:
+                text = text[:60]
+    # Si la respuesta es muy corta o vacía
+    if len(text.strip()) < 5:
+        if is_greeting:
+            return "¡Hola! ¿En qué puedo ayudarte?"
         return "Lo siento, no pude generar una respuesta clara. ¿Podrías reformular tu pregunta?"
+    # Eliminar espacios múltiples
     text = re.sub(r'\s+', ' ', text).strip()
     return text
 # ======================
 # DEFINIR ARQUITECTURA DEL MODELO (MTP)
 # ======================
         return logits
     def generate(self, input_ids, max_new_tokens=150, temperature=0.8, top_k=50, top_p=0.9, repetition_penalty=1.1):
+        """Genera texto token por token"""
         generated = input_ids
         for step in range(max_new_tokens):
             with torch.no_grad():
             probs = F.softmax(next_logits, dim=-1)
             next_token = torch.multinomial(probs, num_samples=1).item()
+            # EOS ID común para SentencePiece
+            if next_token == 2 or next_token == 3:
                 break
             generated = torch.cat([generated, torch.tensor([[next_token]], device=generated.device)], dim=1)
         return generated
 # Cargar tokenizador
 tokenizer_path = os.path.join(repo_path, "mtp_tokenizer.model")
+if not os.path.exists(tokenizer_path):
+    print(f"❌ Tokenizador no encontrado en {tokenizer_path}")
+    sys.exit(1)
 sp = spm.SentencePieceProcessor()
 sp.load(tokenizer_path)
 VOCAB_SIZE = sp.get_piece_size()
 model_path = os.path.join(repo_path, "mtp_model.pt")
 if os.path.exists(model_path):
     state_dict = torch.load(model_path, map_location=DEVICE)
+    model.load_state_dict(state_dict, strict=False)
     print("✅ Pesos del modelo cargados")
 else:
+    print(f"⚠️ No se encontró {model_path}, usando pesos aleatorios")
 model.eval()
 param_count = sum(p.numel() for p in model.parameters())
 print(f"✅ Modelo cargado: {param_count:,} parámetros ({param_count/1e6:.1f}M)")
 class PromptRequest(BaseModel):
     text: str = Field(..., max_length=2000, description="Texto de entrada")
+    max_tokens: int = Field(default=100, ge=10, le=200, description="Tokens máximos a generar")
     temperature: float = Field(default=0.7, ge=0.1, le=2.0, description="Temperatura de muestreo")
     top_k: int = Field(default=50, ge=1, le=100, description="Top-k sampling")
     top_p: float = Field(default=0.9, ge=0.1, le=1.0, description="Top-p (nucleus) sampling")
     global ACTIVE_REQUESTS
     ACTIVE_REQUESTS += 1
     user_input = req.text.strip()
     if not user_input:
         ACTIVE_REQUESTS -= 1
         return {"reply": "", "tokens_generated": 0}
+    # Detectar si es un saludo
+    is_greeting = user_input.lower().strip() in ["hola", "hola!", "hola.", "buenas", "saludos", "hola?"]
+    # Si es saludo, usar menos tokens
+    max_tokens = 30 if is_greeting else req.max_tokens
     full_prompt = build_prompt(user_input)
+    tokens = tokenizer_wrapper.encode(full_prompt)
     input_ids = torch.tensor([tokens], device=DEVICE)
     try:
         with torch.no_grad():
             output_ids = model.generate(
                 input_ids,
+                max_new_tokens=max_tokens,
+                temperature=req.temperature,
                 top_k=req.top_k,
                 top_p=req.top_p,
                 repetition_penalty=req.repetition_penalty
         gen_tokens = output_ids[0, len(tokens):].tolist()
+        # Filtrar tokens inválidos
+        safe_tokens = [t for t in gen_tokens if 0 <= t < VOCAB_SIZE]
+        if safe_tokens:
+            response = tokenizer_wrapper.decode(safe_tokens).strip()
+        else:
+            response = ""
+        # Limpiar respuesta
+        response = clean_response(response, user_input)
+        # Si la respuesta sigue vacía o es muy corta, usar respuesta por defecto
+        if len(response) < 3:
+            if is_greeting:
+                response = "¡Hola! ¿En qué puedo ayudarte?"
+            else:
+                response = "Lo siento, no pude generar una respuesta. ¿Podrías reformular tu pregunta?"
         return {
             "reply": response,
     except Exception as e:
         print(f"❌ Error durante generación: {e}")
+        if is_greeting:
+            fallback = "¡Hola! ¿En qué puedo ayudarte?"
+        else:
+            fallback = "Lo siento, ocurrió un error al procesar tu solicitud."
         return {
+            "reply": fallback,
             "error": str(e)
         }
     }
 # ======================
+# INTERFAZ WEB
 # ======================
 @app.get("/", response_class=HTMLResponse)
 def chat_ui():
 <html lang="es">
 <head>
 <meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
 <title>MTP - Asistente IA</title>
 <style>
+* { margin: 0; padding: 0; box-sizing: border-box; }
 body {
+    background: #131314;
+    font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+    height: 100vh;
     display: flex;
     flex-direction: column;
 }
+.chat-header {
+    padding: 16px 20px;
+    background: #1E1F20;
+    border-bottom: 1px solid #2a2b2e;
 }
+.chat-header h1 {
+    color: white;
+    font-size: 1.2rem;
     font-weight: 500;
 }
+.chat-messages {
     flex: 1;
     overflow-y: auto;
+    padding: 20px;
     display: flex;
     flex-direction: column;
+    gap: 16px;
 }
+.message {
     display: flex;
+    gap: 12px;
+    max-width: 80%;
 }
+.message.user {
+    align-self: flex-end;
+    flex-direction: row-reverse;
 }
+.message-content {
+    padding: 10px 16px;
     border-radius: 18px;
+    font-size: 0.95rem;
+    line-height: 1.4;
 }
+.user .message-content {
+    background: #4a9eff;
+    color: white;
+    border-radius: 18px 4px 18px 18px;
 }
+.bot .message-content {
+    background: #1E1F20;
+    color: #e3e3e3;
+    border-radius: 4px 18px 18px 18px;
 }
+.chat-input-container {
+    padding: 16px 20px;
+    background: #1E1F20;
+    border-top: 1px solid #2a2b2e;
 }
+.input-wrapper {
     display: flex;
+    gap: 12px;
+    max-width: 800px;
     margin: 0 auto;
 }
+#messageInput {
     flex: 1;
+    padding: 12px 16px;
+    background: #2a2b2e;
     border: none;
+    border-radius: 24px;
     color: white;
+    font-size: 0.95rem;
+    outline: none;
+}
+#messageInput::placeholder {
+    color: #888;
 }
+#sendBtn {
+    padding: 12px 24px;
+    background: #4a9eff;
     border: none;
+    border-radius: 24px;
+    color: white;
+    font-weight: 500;
     cursor: pointer;
+    transition: opacity 0.2s;
 }
+#sendBtn:hover { opacity: 0.9; }
+#sendBtn:disabled {
+    opacity: 0.5;
+    cursor: not-allowed;
 }
+.typing {
+    display: flex;
+    gap: 4px;
+    padding: 10px 16px;
 }
+.typing span {
+    width: 8px;
+    height: 8px;
+    background: #888;
+    border-radius: 50%;
+    animation: bounce 1.4s infinite ease-in-out;
+}
+.typing span:nth-child(1) { animation-delay: -0.32s; }
+.typing span:nth-child(2) { animation-delay: -0.16s; }
+@keyframes bounce {
+    0%, 80%, 100% { transform: scale(0); }
+    40% { transform: scale(1); }
 }
 </style>
 </head>
 <body>
+<div class="chat-header">
+    <h1>🤖 MTP - Asistente IA</h1>
 </div>
+<div class="chat-messages" id="chatMessages">
+    <div class="message bot">
+        <div class="message-content">¡Hola! Soy MTP, tu asistente de IA. ¿En qué puedo ayudarte hoy?</div>
     </div>
+</div>
+<div class="chat-input-container">
+    <div class="input-wrapper">
+        <input type="text" id="messageInput" placeholder="Escribe tu mensaje..." autocomplete="off">
+        <button id="sendBtn">Enviar</button>
     </div>
 </div>
 <script>
+const chatMessages = document.getElementById('chatMessages');
+const messageInput = document.getElementById('messageInput');
+const sendBtn = document.getElementById('sendBtn');
+let isLoading = false;
+function addMessage(text, isUser) {
+    const div = document.createElement('div');
+    div.className = `message ${isUser ? 'user' : 'bot'}`;
+    div.innerHTML = `<div class="message-content">${text}</div>`;
+    chatMessages.appendChild(div);
+    chatMessages.scrollTop = chatMessages.scrollHeight;
+    return div;
 }
+function addTypingIndicator() {
+    const div = document.createElement('div');
+    div.className = 'message bot';
+    div.id = 'typingIndicator';
+    div.innerHTML = `<div class="typing"><span></span><span></span><span></span></div>`;
+    chatMessages.appendChild(div);
+    chatMessages.scrollTop = chatMessages.scrollHeight;
 }
+function removeTypingIndicator() {
+    const indicator = document.getElementById('typingIndicator');
+    if (indicator) indicator.remove();
 }
+async function sendMessage() {
+    const text = messageInput.value.trim();
+    if (!text || isLoading) return;
+    messageInput.value = '';
+    addMessage(text, true);
+    isLoading = true;
+    sendBtn.disabled = true;
+    addTypingIndicator();
     try {
         const response = await fetch('/generate', {
             method: 'POST',
             headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify({ text: text })
         });
         const data = await response.json();
+        removeTypingIndicator();
+        addMessage(data.reply, false);
     } catch (error) {
+        removeTypingIndicator();
+        addMessage('Error de conexión. Intenta de nuevo.', false);
+    } finally {
+        isLoading = false;
+        sendBtn.disabled = false;
+        messageInput.focus();
     }
 }
+messageInput.addEventListener('keypress', (e) => {
+    if (e.key === 'Enter') sendMessage();
 });
+sendBtn.addEventListener('click', sendMessage);
+messageInput.focus();
 </script>
 </body>
 </html>