Spaces:

TeszenAI
/

MTP-4

Sleeping

App Files Files Community

teszenofficial commited on 7 days ago

Commit

9ebe5ea

verified ·

1 Parent(s): 09a0bd9

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -80

app.py CHANGED Viewed

@@ -1,10 +1,9 @@
 # -*- coding: utf-8 -*-
 """
 MTP 4 API - ASISTENTE AVANZADO
-- Modelo más grande (384 dims, 6 capas)
-- Temperatura 0.4 para máxima precisión
-- Sistema anti-alucinaciones mejorado
-- Parada inteligente avanzada
 """
 import os
@@ -42,73 +41,55 @@ else:
 torch.set_grad_enabled(False)
-# CAMBIA ESTO POR EL NOMBRE DE TU REPO EN HUGGING FACE
 MODEL_REPO = "TeszenAI/MTP-4"  # Cambia por tu repo
 # ======================
-# SISTEMA ANTI-ALUCINACIONES MEJORADO
 # ======================
 class AntiHallucination:
-    """Sistema para prevenir alucinaciones y respuestas incoherentes"""
     def __init__(self):
         self.uncertainty_words = [
             'no se', 'no lo se', 'no tengo idea', 'no estoy seguro',
             'no puedo responder', 'no sé', 'desconozco'
         ]
         self.empty_patterns = [
             r'^[.,!?;:]+$', r'^[\s]+$', r'^[0-9]+$', r'^[a-zA-Z]{1,3}$',
         ]
         self.repetition_patterns = [
             r'(\b\w+\b)(?:\s+\1){5,}', r'(.)\1{10,}',
         ]
-        self.max_safe_tokens = 120
         self.max_safe_chars = 500
     def is_hallucinating(self, text: str) -> Tuple[bool, str]:
         if not text:
             return True, "Respuesta vacía"
-        text_lower = text.lower().strip()
         if len(text) < 5:
             return True, "Respuesta demasiado corta"
         for pattern in self.empty_patterns:
             if re.match(pattern, text):
                 return True, "Patrón vacío detectado"
         for pattern in self.repetition_patterns:
             if re.search(pattern, text):
                 return True, "Repetición excesiva"
-        words = text_lower.split()[:5]
         for uw in self.uncertainty_words:
             if uw in ' '.join(words):
                 return True, f"Expresa incertidumbre: '{uw}'"
         if len(text) > self.max_safe_chars:
             return True, "Respuesta demasiado larga"
         return False, "OK"
     def is_coherent(self, text: str, question: str) -> Tuple[bool, str]:
         if not text or not question:
             return True, "Sin datos suficientes"
         text_lower = text.lower()
         question_lower = question.lower()
         question_words = set(re.findall(r'\b[a-záéíóúüñ]{3,}\b', question_lower))
         if question_words:
             matches = sum(1 for w in question_words if w in text_lower)
             ratio = matches / len(question_words)
             if len(question_words) >= 2 and ratio < 0.2:
-                return False, f"No responde a la pregunta (solo {matches}/{len(question_words)} palabras clave)"
         return True, "OK"
 # ======================
@@ -121,15 +102,8 @@ class CompletionState(Enum):
 class IntelligentStopper:
     def __init__(self):
-        self.completion_patterns = [
-            r'\.\s*$', r'\!?\s*$', r'\?\s*$', r'\.\.\.\s*$',
-        ]
-        self.continuation_patterns = [
-            r'[,;:]\s*$', r' y $', r' o $', r' pero $', r' porque $',
-            r' además $', r' también $', r' como $',
-        ]
         self.completion_phrases = [
             'gracias', 'saludos', 'adios', 'hasta luego',
             'espero haberte ayudado', 'cualquier otra pregunta',
@@ -139,33 +113,26 @@ class IntelligentStopper:
     def analyze(self, text: str, min_length: int = 40) -> Tuple[CompletionState, str]:
         if not text or len(text) < min_length:
             return CompletionState.INCOMPLETE, "Demasiado corto"
         text = text.strip()
         for pattern in self.continuation_patterns:
             if re.search(pattern, text, re.IGNORECASE):
                 return CompletionState.INCOMPLETE, "Indica continuación"
         text_lower = text.lower()
         for phrase in self.completion_phrases:
             if phrase in text_lower[-80:]:
                 return CompletionState.COMPLETE, "Frase de finalización"
         for pattern in self.completion_patterns:
             if re.search(pattern, text):
                 if len(text) > min_length:
                     return CompletionState.COMPLETE, "Termina naturalmente"
         if len(text) > 350:
             return CompletionState.COMPLETE, "Longitud suficiente"
         return CompletionState.INCOMPLETE, "Puede continuar"
 # ======================
-# ARQUITECTURA MTP 4 (MEJORADA)
 # ======================
 class LayerNorm(nn.Module):
-    __slots__ = ('weight', 'bias', 'eps')
     def __init__(self, d_model, eps=1e-5):
         super().__init__()
         self.weight = nn.Parameter(torch.ones(d_model))
@@ -175,10 +142,10 @@ class LayerNorm(nn.Module):
         return self.weight * (x - x.mean(-1, keepdim=True)) / (x.std(-1, keepdim=True) + self.eps) + self.bias
 class MultiHeadAttention(nn.Module):
-    __slots__ = ('n_heads', 'd_k', 'w_q', 'w_k', 'w_v', 'w_o', 'dropout', 'scale')
     def __init__(self, d_model, n_heads, dropout=0.2):
         super().__init__()
         assert d_model % n_heads == 0
         self.n_heads = n_heads
         self.d_k = d_model // n_heads
         self.w_q = nn.Linear(d_model, d_model)
@@ -200,7 +167,6 @@ class MultiHeadAttention(nn.Module):
         return self.w_o(out)
 class FeedForward(nn.Module):
-    __slots__ = ('linear1', 'linear2', 'dropout')
     def __init__(self, d_model, d_ff, dropout=0.2):
         super().__init__()
         self.linear1 = nn.Linear(d_model, d_ff)
@@ -210,7 +176,6 @@ class FeedForward(nn.Module):
         return self.linear2(self.dropout(F.gelu(self.linear1(x))))
 class TransformerBlock(nn.Module):
-    __slots__ = ('attn', 'ff', 'norm1', 'norm2', 'dropout1', 'dropout2')
     def __init__(self, d_model, n_heads, d_ff, dropout=0.2):
         super().__init__()
         self.attn = MultiHeadAttention(d_model, n_heads, dropout)
@@ -248,6 +213,11 @@ class MTP4Model(nn.Module):
         self.norm = LayerNorm(d_model)
         self.lm_head = nn.Linear(d_model, vocab_size)
         self.dropout = nn.Dropout(dropout)
     def forward(self, x):
         seq_len = x.size(1)
         mask = torch.tril(torch.ones(seq_len, seq_len)).unsqueeze(0).unsqueeze(0).to(x.device)
@@ -261,7 +231,6 @@ class MTP4Model(nn.Module):
     @torch.no_grad()
     def generate(self, input_ids, max_new=120, temperature=0.4, top_k=30, top_p=0.85,
                  repetition_penalty=1.3, stopper=None):
-        """Generación optimizada para MTP 4"""
         generated = input_ids
         eos_id = 3
         last_tokens = []
@@ -363,6 +332,12 @@ config_path = os.path.join(repo_path, "config.json")
 with open(config_path, "r") as f:
     config = json.load(f)
 tokenizer_path = os.path.join(repo_path, "mtp_tokenizer.model")
 sp = spm.SentencePieceProcessor()
 sp.load(tokenizer_path)
@@ -371,16 +346,16 @@ config["vocab_size"] = VOCAB_SIZE
 print(f"🧠 Inicializando MTP 4...")
 print(f"   → Vocabulario: {VOCAB_SIZE}")
-print(f"   → Dimensiones: {config.get('d_model', 384)}")
-print(f"   → Capas: {config.get('n_layers', 6)}")
 print(f"   → Dispositivo: {DEVICE.upper()}")
 model = MTP4Model(**config)
 model.to(DEVICE)
 model_path = os.path.join(repo_path, "mtp_model.pt")
 if os.path.exists(model_path):
     state_dict = torch.load(model_path, map_location=DEVICE)
     model.load_state_dict(state_dict, strict=False)
     print("✅ Pesos del modelo cargados")
@@ -399,8 +374,6 @@ app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], all
 class PromptRequest(BaseModel):
     text: str = Field(..., max_length=2000)
-    max_tokens: int = Field(default=120, ge=20, le=200)
-    temperature: float = Field(default=0.4, ge=0.3, le=1.0)
 def build_prompt(user_input: str) -> str:
     return f"### Instrucción:\n{user_input}\n\n### Respuesta:\n"
@@ -430,8 +403,8 @@ async def generate(req: PromptRequest):
         output_ids = model.generate(
             input_ids,
-            max_new=min(req.max_tokens, 120),
-            temperature=req.temperature,
             top_k=30,
             top_p=0.85,
             repetition_penalty=1.3,
@@ -445,6 +418,7 @@ async def generate(req: PromptRequest):
         response = sp.decode(safe_tokens).strip() if safe_tokens else ""
         is_hallucinating, reason = anti_hallucination.is_hallucinating(response)
         if is_hallucinating:
             print(f"⚠️ Alucinación detectada: {reason}")
@@ -455,6 +429,7 @@ async def generate(req: PromptRequest):
                 if is_hallucinating:
                     response = ""
         is_coherent, _ = anti_hallucination.is_coherent(response, user_input)
         if not is_coherent and len(response) > 20:
             first_sentence = response.split('.')[0] if '.' in response else response[:100]
@@ -475,6 +450,8 @@ async def generate(req: PromptRequest):
     except Exception as e:
         print(f"Error: {e}")
         return {"reply": "Lo siento, ocurrió un error."}
     finally:
@@ -495,13 +472,11 @@ def info():
         "parameters": param_count,
         "parameters_millions": round(param_count / 1e6, 2),
         "device": DEVICE,
-        "vocab_size": VOCAB_SIZE,
-        "d_model": config.get('d_model', 384),
-        "n_layers": config.get('n_layers', 6)
     }
 # ======================
-# INTERFAZ WEB MEJORADA
 # ======================
 @app.get("/", response_class=HTMLResponse)
 def chat_ui():
@@ -511,7 +486,7 @@ def chat_ui():
 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>MTP 4 - Asistente IA Avanzado</title>
     <style>
         * { margin: 0; padding: 0; box-sizing: border-box; }
         body {
@@ -527,16 +502,7 @@ def chat_ui():
             backdrop-filter: blur(10px);
             border-bottom: 1px solid rgba(255,255,255,0.1);
         }
-        .header h1 {
-            color: white;
-            font-size: 1.2rem;
-        }
-        .header h1 span {
-            background: linear-gradient(135deg, #4a9eff, #ff6b6b);
-            -webkit-background-clip: text;
-            background-clip: text;
-            color: transparent;
-        }
         .header p { color: #888; font-size: 0.7rem; margin-top: 4px; }
         .messages {
             flex: 1;
@@ -661,25 +627,25 @@ def chat_ui():
 </head>
 <body>
     <div class="header">
-        <h1>🤖 <span>MTP 4</span> - Asistente IA Avanzado</h1>
-        <p>✨ 25M parámetros • Temperatura 0.4 • Anti-alucinaciones • Respuestas precisas</p>
     </div>
     <div class="suggestions">
-        <div class="suggestion">Hola 👋</div>
-        <div class="suggestion">¿Quién eres? 🤖</div>
-        <div class="suggestion">¿Qué puedes hacer? ⚡</div>
-        <div class="suggestion">Explícame la IA 🧠</div>
-        <div class="suggestion">Háblame de BTS 💜</div>
-        <div class="suggestion">¿Qué es un agujero negro? 🌌</div>
-        <div class="suggestion">Dime un chiste 😄</div>
-        <div class="suggestion">Adiós 👋</div>
     </div>
     <div class="messages" id="messages">
-        <div class="message bot">✨ ¡Hola! Soy <strong>MTP 4</strong>, mi asistente de IA avanzado. Tengo 25 millones de parámetros y estoy optimizado para dar respuestas precisas y coherentes. ¿En qué puedo ayudarte hoy? 😊</div>
     </div>
     <div class="input-area">
         <input type="text" id="input" placeholder="Escribe tu pregunta..." autocomplete="off">
-        <button id="send">Enviar 📤</button>
     </div>
     <div class="badge">⚡ MTP 4 | 🌡️ 0.4 | 🛡️ Anti-alucinaciones</div>
     <script>
@@ -757,7 +723,6 @@ if __name__ == "__main__":
     port = int(os.environ.get("PORT", 7860))
     print("\n" + "=" * 60)
     print(f"🚀 MTP 4 en http://0.0.0.0:{port}")
-    print(f"📊 Parámetros: {param_count:,} ({param_count/1e6:.2f}M)")
     print(f"🌡️ Temperatura: 0.4 | 🔁 Repetition penalty: 1.3")
     print("=" * 60)

 # -*- coding: utf-8 -*-
 """
 MTP 4 API - ASISTENTE AVANZADO
+- Modelo: d_model=384, n_layers=6 (25M parámetros)
+- Temperatura 0.4
+- Sistema anti-alucinaciones
 """
 import os
 torch.set_grad_enabled(False)
 MODEL_REPO = "TeszenAI/MTP-4"  # Cambia por tu repo
 # ======================
+# SISTEMA ANTI-ALUCINACIONES
 # ======================
 class AntiHallucination:
     def __init__(self):
         self.uncertainty_words = [
             'no se', 'no lo se', 'no tengo idea', 'no estoy seguro',
             'no puedo responder', 'no sé', 'desconozco'
         ]
         self.empty_patterns = [
             r'^[.,!?;:]+$', r'^[\s]+$', r'^[0-9]+$', r'^[a-zA-Z]{1,3}$',
         ]
         self.repetition_patterns = [
             r'(\b\w+\b)(?:\s+\1){5,}', r'(.)\1{10,}',
         ]
         self.max_safe_chars = 500
     def is_hallucinating(self, text: str) -> Tuple[bool, str]:
         if not text:
             return True, "Respuesta vacía"
         if len(text) < 5:
             return True, "Respuesta demasiado corta"
         for pattern in self.empty_patterns:
             if re.match(pattern, text):
                 return True, "Patrón vacío detectado"
         for pattern in self.repetition_patterns:
             if re.search(pattern, text):
                 return True, "Repetición excesiva"
+        words = text.lower().split()[:5]
         for uw in self.uncertainty_words:
             if uw in ' '.join(words):
                 return True, f"Expresa incertidumbre: '{uw}'"
         if len(text) > self.max_safe_chars:
             return True, "Respuesta demasiado larga"
         return False, "OK"
     def is_coherent(self, text: str, question: str) -> Tuple[bool, str]:
         if not text or not question:
             return True, "Sin datos suficientes"
         text_lower = text.lower()
         question_lower = question.lower()
         question_words = set(re.findall(r'\b[a-záéíóúüñ]{3,}\b', question_lower))
         if question_words:
             matches = sum(1 for w in question_words if w in text_lower)
             ratio = matches / len(question_words)
             if len(question_words) >= 2 and ratio < 0.2:
+                return False, f"No responde a la pregunta"
         return True, "OK"
 # ======================
 class IntelligentStopper:
     def __init__(self):
+        self.completion_patterns = [r'\.\s*$', r'\!?\s*$', r'\?\s*$', r'\.\.\.\s*$']
+        self.continuation_patterns = [r'[,;:]\s*$', r' y $', r' o $', r' pero $', r' porque $']
         self.completion_phrases = [
             'gracias', 'saludos', 'adios', 'hasta luego',
             'espero haberte ayudado', 'cualquier otra pregunta',
     def analyze(self, text: str, min_length: int = 40) -> Tuple[CompletionState, str]:
         if not text or len(text) < min_length:
             return CompletionState.INCOMPLETE, "Demasiado corto"
         text = text.strip()
         for pattern in self.continuation_patterns:
             if re.search(pattern, text, re.IGNORECASE):
                 return CompletionState.INCOMPLETE, "Indica continuación"
         text_lower = text.lower()
         for phrase in self.completion_phrases:
             if phrase in text_lower[-80:]:
                 return CompletionState.COMPLETE, "Frase de finalización"
         for pattern in self.completion_patterns:
             if re.search(pattern, text):
                 if len(text) > min_length:
                     return CompletionState.COMPLETE, "Termina naturalmente"
         if len(text) > 350:
             return CompletionState.COMPLETE, "Longitud suficiente"
         return CompletionState.INCOMPLETE, "Puede continuar"
 # ======================
+# ARQUITECTURA MTP 4 (IDÉNTICA AL ENTRENADOR)
 # ======================
 class LayerNorm(nn.Module):
     def __init__(self, d_model, eps=1e-5):
         super().__init__()
         self.weight = nn.Parameter(torch.ones(d_model))
         return self.weight * (x - x.mean(-1, keepdim=True)) / (x.std(-1, keepdim=True) + self.eps) + self.bias
 class MultiHeadAttention(nn.Module):
     def __init__(self, d_model, n_heads, dropout=0.2):
         super().__init__()
         assert d_model % n_heads == 0
+        self.d_model = d_model
         self.n_heads = n_heads
         self.d_k = d_model // n_heads
         self.w_q = nn.Linear(d_model, d_model)
         return self.w_o(out)
 class FeedForward(nn.Module):
     def __init__(self, d_model, d_ff, dropout=0.2):
         super().__init__()
         self.linear1 = nn.Linear(d_model, d_ff)
         return self.linear2(self.dropout(F.gelu(self.linear1(x))))
 class TransformerBlock(nn.Module):
     def __init__(self, d_model, n_heads, d_ff, dropout=0.2):
         super().__init__()
         self.attn = MultiHeadAttention(d_model, n_heads, dropout)
         self.norm = LayerNorm(d_model)
         self.lm_head = nn.Linear(d_model, vocab_size)
         self.dropout = nn.Dropout(dropout)
+        self._init_weights()
+    def _init_weights(self):
+        for p in self.parameters():
+            if p.dim() > 1:
+                nn.init.xavier_uniform_(p)
     def forward(self, x):
         seq_len = x.size(1)
         mask = torch.tril(torch.ones(seq_len, seq_len)).unsqueeze(0).unsqueeze(0).to(x.device)
     @torch.no_grad()
     def generate(self, input_ids, max_new=120, temperature=0.4, top_k=30, top_p=0.85,
                  repetition_penalty=1.3, stopper=None):
         generated = input_ids
         eos_id = 3
         last_tokens = []
 with open(config_path, "r") as f:
     config = json.load(f)
+print(f"📋 Configuración encontrada:")
+print(f"   → d_model: {config.get('d_model', 'No especificado')}")
+print(f"   → n_layers: {config.get('n_layers', 'No especificado')}")
+print(f"   → n_heads: {config.get('n_heads', 'No especificado')}")
+print(f"   → d_ff: {config.get('d_ff', 'No especificado')}")
 tokenizer_path = os.path.join(repo_path, "mtp_tokenizer.model")
 sp = spm.SentencePieceProcessor()
 sp.load(tokenizer_path)
 print(f"🧠 Inicializando MTP 4...")
 print(f"   → Vocabulario: {VOCAB_SIZE}")
 print(f"   → Dispositivo: {DEVICE.upper()}")
+# Crear modelo con la configuración EXACTA del archivo
 model = MTP4Model(**config)
 model.to(DEVICE)
 model_path = os.path.join(repo_path, "mtp_model.pt")
 if os.path.exists(model_path):
     state_dict = torch.load(model_path, map_location=DEVICE)
+    # Usar strict=False para permitir pequeñas diferencias
     model.load_state_dict(state_dict, strict=False)
     print("✅ Pesos del modelo cargados")
 class PromptRequest(BaseModel):
     text: str = Field(..., max_length=2000)
 def build_prompt(user_input: str) -> str:
     return f"### Instrucción:\n{user_input}\n\n### Respuesta:\n"
         output_ids = model.generate(
             input_ids,
+            max_new=100,
+            temperature=0.4,
             top_k=30,
             top_p=0.85,
             repetition_penalty=1.3,
         response = sp.decode(safe_tokens).strip() if safe_tokens else ""
+        # Anti-alucinaciones
         is_hallucinating, reason = anti_hallucination.is_hallucinating(response)
         if is_hallucinating:
             print(f"⚠️ Alucinación detectada: {reason}")
                 if is_hallucinating:
                     response = ""
+        # Verificar coherencia
         is_coherent, _ = anti_hallucination.is_coherent(response, user_input)
         if not is_coherent and len(response) > 20:
             first_sentence = response.split('.')[0] if '.' in response else response[:100]
     except Exception as e:
         print(f"Error: {e}")
+        import traceback
+        traceback.print_exc()
         return {"reply": "Lo siento, ocurrió un error."}
     finally:
         "parameters": param_count,
         "parameters_millions": round(param_count / 1e6, 2),
         "device": DEVICE,
+        "vocab_size": VOCAB_SIZE
     }
 # ======================
+# INTERFAZ WEB
 # ======================
 @app.get("/", response_class=HTMLResponse)
 def chat_ui():
 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>MTP 4 - Asistente IA</title>
     <style>
         * { margin: 0; padding: 0; box-sizing: border-box; }
         body {
             backdrop-filter: blur(10px);
             border-bottom: 1px solid rgba(255,255,255,0.1);
         }
+        .header h1 { color: white; font-size: 1.2rem; }
         .header p { color: #888; font-size: 0.7rem; margin-top: 4px; }
         .messages {
             flex: 1;
 </head>
 <body>
     <div class="header">
+        <h1>🤖 MTP 4 - Asistente IA</h1>
+        <p>✨ Temperatura 0.4 | Anti-alucinaciones | Respuestas precisas</p>
     </div>
     <div class="suggestions">
+        <div class="suggestion">Hola</div>
+        <div class="suggestion">¿Quién eres?</div>
+        <div class="suggestion">¿Qué puedes hacer?</div>
+        <div class="suggestion">Explícame la IA</div>
+        <div class="suggestion">Háblame de BTS</div>
+        <div class="suggestion">¿Qué es un agujero negro?</div>
+        <div class="suggestion">Dime un chiste</div>
+        <div class="suggestion">Adiós</div>
     </div>
     <div class="messages" id="messages">
+        <div class="message bot">✨ Hola, soy MTP 4. Estoy optimizado para dar respuestas coherentes y evitar alucinaciones. ¿En qué puedo ayudarte?</div>
     </div>
     <div class="input-area">
         <input type="text" id="input" placeholder="Escribe tu pregunta..." autocomplete="off">
+        <button id="send">Enviar</button>
     </div>
     <div class="badge">⚡ MTP 4 | 🌡️ 0.4 | 🛡️ Anti-alucinaciones</div>
     <script>
     port = int(os.environ.get("PORT", 7860))
     print("\n" + "=" * 60)
     print(f"🚀 MTP 4 en http://0.0.0.0:{port}")
     print(f"🌡️ Temperatura: 0.4 | 🔁 Repetition penalty: 1.3")
     print("=" * 60)