Spaces:

gijl
/

hj

Sleeping

App Files Files Community

gijl commited on 13 days ago

Commit

5201445

verified ·

1 Parent(s): 1defb5b

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -37

app.py CHANGED Viewed

@@ -1,79 +1,74 @@
 import gradio as gr
 import torch
 import json
-import torch.nn.functional as F
-from huggingface_hub import hf_hub_download
-from model import MedicalMasterAI
-# إعداد الجهاز
-device = torch.device("cpu") # المساحات المجانية تستخدم المعالج
-# 1. تحميل التوكنايزر
 with open("tokenizer_config.json", "r", encoding="utf-8") as f:
     vocab = json.load(f)
 stoi = vocab["stoi"]
 itos = vocab["itos"]
-def encode(text):
-    return [stoi.get(c, 0) for c in text] # 0 للمسافات أو الرموز غير المعروفة
-def decode(ids):
-    return "".join([itos.get(str(i), "") for i in ids])
-# 2. تحميل النموذج (مرة واحدة فقط)
 try:
-    model = MedicalMasterAI(vocab_size=115, n_layer=48, n_head=8, n_embd=768)
-    model_path = hf_hub_download(repo_id="gijl/Medical-Master-1.5B", filename="pytorch_model.bin")
-    state_dict = torch.load(model_path, map_location=device, weights_only=True)
-    model.load_state_dict(state_dict, strict=False)
     model.eval()
     model_loaded = True
 except Exception as e:
-    print(f"خطأ في تحميل النموذج: {e}")
     model_loaded = False
-# 3. دالة التوليد بنظام Streaming
 def medical_chat(message, history):
     if not model_loaded:
-        yield "النموذج لم يتم تحميله بشكل صحيح."
         return
-    # بناء البرومبت
     prompt = f"Question: {message} Answer:"
-    idx = torch.tensor([encode(prompt)], dtype=torch.long).to(device)
     generated_text = ""
-    # استخدام inference_mode لتسريع المعالج
-    with torch.inference_mode():
-        for _ in range(150): # تقليل العدد لسرعة الاستجابة
-            # القص ليتناسب مع حجم الـ Position Embedding (256)
-            idx_cond = idx[:, -256:]
-            logits = model(idx_cond)
-            logits = logits[:, -1, :] / 0.8 # درجة الحرارة
-            probs = F.softmax(logits, dim=-1)
             idx_next = torch.multinomial(probs, num_samples=1)
-            # إضافة الحرف الجديد
-            idx = torch.cat((idx, idx_next), dim=1)
             char = decode([idx_next.item()])
             generated_text += char
-            # إرسال النص المنتج حتى الآن للواجهة (Streaming)
             yield generated_text
-            # توقف إذا أنتج النموذج علامة توقف (مثل النقطة إذا رغبت)
             if idx_next.item() == stoi.get(".", -1):
                 break
-# 4. واجهة Gradio
 demo = gr.ChatInterface(
     fn=medical_chat,
-    title="Medical Master 1.5B (Streaming Mode)",
-    description="إذا تأخر الرد، انتظر قليلاً فالنموذج يولد النص حرفاً بحرف.",
 )
 if __name__ == "__main__":

 import gradio as gr
 import torch
 import json
+from transformers import AutoModelForCausalLM
+device = torch.device("cpu")
+# 1. تحميل التوكنايزر المخصص الخاص بك (بدون تعديل الملف)
 with open("tokenizer_config.json", "r", encoding="utf-8") as f:
     vocab = json.load(f)
 stoi = vocab["stoi"]
 itos = vocab["itos"]
+def encode(text): return [stoi.get(c, 0) for c in text]
+def decode(ids): return "".join([itos.get(str(i), "") for i in ids])
+# 2. تحميل النموذج باستخدام مكتبة Transformers مباشرة
 try:
+    print("جاري تحميل النموذج من Hugging Face...")
+    # هذا السطر سيقرأ config.json و pytorch_model.bin بشكل صحيح ومطابق 100%
+    model = AutoModelForCausalLM.from_pretrained(
+        "gijl/Medical-Master-1.5B",
+        torch_dtype=torch.float32,
+        low_cpu_mem_usage=True
+    )
+    model.to(device)
     model.eval()
     model_loaded = True
+    print("تم التحميل بنجاح وتم مطابقة الأوزان!")
 except Exception as e:
+    print(f"Error: {e}")
     model_loaded = False
+# 3. دالة المحادثة (Streaming)
 def medical_chat(message, history):
     if not model_loaded:
+        yield "حدث خطأ في تحميل النموذج."
         return
     prompt = f"Question: {message} Answer:"
+    input_ids = torch.tensor([encode(prompt)], dtype=torch.long).to(device)
     generated_text = ""
+    with torch.no_grad():
+        for _ in range(150): # توليد 150 حرف
+            # الحد الأقصى للسياق هو 256 كما هو في config.json
+            idx_cond = input_ids[:, -256:]
+            # تمرير البيانات لنموذج HF
+            outputs = model(input_ids=idx_cond)
+            logits = outputs.logits[:, -1, :] / 0.7 # حرارة 0.7 لتقليل العشوائية
+            probs = torch.nn.functional.softmax(logits, dim=-1)
             idx_next = torch.multinomial(probs, num_samples=1)
+            input_ids = torch.cat((input_ids, idx_next), dim=1)
             char = decode([idx_next.item()])
             generated_text += char
             yield generated_text
+            # التوقف إذا وصل لنقطة
             if idx_next.item() == stoi.get(".", -1):
                 break
+# 4. بناء الواجهة
 demo = gr.ChatInterface(
     fn=medical_chat,
+    title="Medical Master 1.5B",
+    description="مساعد طبي ذكي يعمل بحروف اللغة العربية والإنجليزية.",
 )
 if __name__ == "__main__":