Spaces:

gijl
/

hj

Sleeping

App Files Files Community

gijl commited on 14 days ago

Commit

f5c5361

verified ·

1 Parent(s): 60287a3

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -30

app.py CHANGED Viewed

@@ -5,72 +5,75 @@ import torch.nn.functional as F
 from huggingface_hub import hf_hub_download
 from model import MedicalMasterAI
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 with open("tokenizer_config.json", "r", encoding="utf-8") as f:
     vocab = json.load(f)
 stoi = vocab["stoi"]
 itos = vocab["itos"]
 def encode(text):
-    return [stoi.get(c, 0) for c in text]
 def decode(ids):
     return "".join([itos.get(str(i), "") for i in ids])
 try:
     model = MedicalMasterAI(vocab_size=115, n_layer=48, n_head=8, n_embd=768)
-    print("جاري سحب ملف الأوزان من المستودع...")
     model_path = hf_hub_download(repo_id="gijl/Medical-Master-1.5B", filename="pytorch_model.bin")
-    print("تم التحميل بنجاح. جاري قراءة الأوزان...")
     state_dict = torch.load(model_path, map_location=device, weights_only=True)
-    # إضافة strict=False لتجاهل طبقات الصور (image_projection) بأمان
     model.load_state_dict(state_dict, strict=False)
-    model.to(device)
-    model.eval()
     model_loaded = True
-    print("النموذج جاهز للعمل!")
 except Exception as e:
-    print(f"Error loading model: {e}")
     model_loaded = False
 def medical_chat(message, history):
     if not model_loaded:
-        return "حدث خطأ أثناء تحميل أوزان النموذج. يرجى مراجعة السجلات."
-    prompt = f"Question: {message} Answer:"
     idx = torch.tensor([encode(prompt)], dtype=torch.long).to(device)
-    max_new_chars = 200
-    generated_ids = []
-    with torch.no_grad():
-        for _ in range(max_new_chars):
-            # تغيير 1024 إلى 256 ليتطابق مع حجم أوزان التدريب
             idx_cond = idx[:, -256:]
             logits = model(idx_cond)
-            logits = logits[:, -1, :]
-            temperature = 0.8
-            logits = logits / temperature
             probs = F.softmax(logits, dim=-1)
             idx_next = torch.multinomial(probs, num_samples=1)
             idx = torch.cat((idx, idx_next), dim=1)
-            generated_ids.append(idx_next.item())
-    answer = decode(generated_ids)
-    return answer
 demo = gr.ChatInterface(
     fn=medical_chat,
-    title="Medical Master (Custom PyTorch AI)",
-    description="نموذج مبني من الصفر للإجابة على الاستفسارات (يعمل بالوضع النصي حالياً).",
 )
 if __name__ == "__main__":

 from huggingface_hub import hf_hub_download
 from model import MedicalMasterAI
+# إعداد الجهاز
+device = torch.device("cpu") # المساحات المجانية تستخدم المعالج
+# 1. تحميل التوكنايزر
 with open("tokenizer_config.json", "r", encoding="utf-8") as f:
     vocab = json.load(f)
 stoi = vocab["stoi"]
 itos = vocab["itos"]
 def encode(text):
+    return [stoi.get(c, 0) for c in text] # 0 للمسافات أو الرموز غير المعروفة
 def decode(ids):
     return "".join([itos.get(str(i), "") for i in ids])
+# 2. تحميل النموذج (مرة واحدة فقط)
 try:
     model = MedicalMasterAI(vocab_size=115, n_layer=48, n_head=8, n_embd=768)
     model_path = hf_hub_download(repo_id="gijl/Medical-Master-1.5B", filename="pytorch_model.bin")
     state_dict = torch.load(model_path, map_location=device, weights_only=True)
     model.load_state_dict(state_dict, strict=False)
+    model.eval()
     model_loaded = True
 except Exception as e:
+    print(f"خطأ في تحميل النموذج: {e}")
     model_loaded = False
+# 3. دالة التوليد بنظام Streaming
 def medical_chat(message, history):
     if not model_loaded:
+        yield "النموذج لم يتم تحميله بشكل صحيح."
+        return
+    # بناء البرومبت
+    prompt = f"Question: {message} Answer:"
     idx = torch.tensor([encode(prompt)], dtype=torch.long).to(device)
+    generated_text = ""
+    # استخدام inference_mode لتسريع المعالج
+    with torch.inference_mode():
+        for _ in range(150): # تقليل العدد لسرعة الاستجابة
+            # القص ليتناسب مع حجم الـ Position Embedding (256)
             idx_cond = idx[:, -256:]
             logits = model(idx_cond)
+            logits = logits[:, -1, :] / 0.8 # درجة الحرارة
             probs = F.softmax(logits, dim=-1)
             idx_next = torch.multinomial(probs, num_samples=1)
+            # إضافة الحرف الجديد
             idx = torch.cat((idx, idx_next), dim=1)
+            char = decode([idx_next.item()])
+            generated_text += char
+            # إرسال النص المنتج حتى الآن للواجهة (Streaming)
+            yield generated_text
+            # توقف إذا أنتج النموذج علامة توقف (مثل النقطة إذا رغبت)
+            if idx_next.item() == stoi.get(".", -1):
+                break
+# 4. واجهة Gradio
 demo = gr.ChatInterface(
     fn=medical_chat,
+    title="Medical Master 1.5B (Streaming Mode)",
+    description="إذا تأخر الرد، انتظر قليلاً فالنموذج يولد النص حرفاً بحرف.",
 )
 if __name__ == "__main__":