Spaces:

gijl
/

g

Sleeping

App Files Files Community

gijl commited on 13 days ago

Commit

4e023f7

verified ·

1 Parent(s): 72d7222

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -26

app.py CHANGED Viewed

@@ -1,47 +1,55 @@
 import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 import torch
-import os
 model_name = "gijl/gemma-4-E2B-it"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(model_name,
                                              torch_dtype=torch.float16,
                                              device_map="auto")
-pipe = pipeline("text-generation",
-                model=model_name,
-                tokenizer=tokenizer,
-                max_new_tokens=1500,
-                temperature=0.7)
 def generate_response(message, history):
     messages = [
-    [
-        {
-            "role": "system",
-            "content": [{"type": "text",
-                         "text": "Você é ELIZA, uma terapeuta que responde com empatia e faz perguntas para entender melhor o paciente."},]
-        },
-        {
-            "role": "user",
-            "content": [{"type": "text",
-                         "text": message},]
-        },
-    ],
     ]
-    response = pipe(messages)
     for new_text in streamer:
-    partial_text += new_text
-    yield partial_text  # هذا هو السطر المطلوب
 demo = gr.ChatInterface(
     generate_response,
-    title="ELIZA (com LLM)",
     description="Compartilhe seus pensamentos e ELIZA irá ajudar você a refletir sobre eles."
 )
-demo.launch()

 import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, TextIteratorStreamer
 import torch
+from threading import Thread
 model_name = "gijl/gemma-4-E2B-it"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(model_name,
                                              torch_dtype=torch.float16,
                                              device_map="auto")
+# إبقاء الـ pipeline كما هو أو استخدامه مباشرة
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
 def generate_response(message, history):
+    # إعداد مدخلات المحادثة
     messages = [
+        {"role": "system", "content": "Você é ELIZA, uma terapeuta que responde com empatia e faz perguntas para entender melhor o paciente."},
+        {"role": "user", "content": message}
     ]
+    # 1. إعداد الـ Streamer
+    streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
+    # 2. إعداد إعدادات التوليد
+    # ملاحظة: تم استخدام tokenizer.apply_chat_template لتحويل الرسائل لتنسيق يفهمه النموذج
+    inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
+    generation_kwargs = dict(
+        input_ids=inputs,
+        streamer=streamer,
+        max_new_tokens=150,
+        temperature=0.7,
+        do_sample=True
+    )
+    # 3. تشغيل التوليد في Thread منفصل لكي لا يتجمد التطبيق أثناء البث
+    thread = Thread(target=model.generate, kwargs=generation_kwargs)
+    thread.start()
+    # 4. البث المباشر: الـ yield هي السر هنا!
+    partial_text = ""
     for new_text in streamer:
+        partial_text += new_text
+        yield partial_text # يرسل النص قطعة قطعة للواجهة
 demo = gr.ChatInterface(
     generate_response,
+    title="ELIZA (com LLM Streaming)",
     description="Compartilhe seus pensamentos e ELIZA irá ajudar você a refletir sobre eles."
 )
+if __name__ == "__main__":
+    demo.launch()