gijl commited on
Commit
4e023f7
·
verified ·
1 Parent(s): 72d7222

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -26
app.py CHANGED
@@ -1,47 +1,55 @@
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
  import torch
4
- import os
5
 
6
  model_name = "gijl/gemma-4-E2B-it"
7
 
8
  tokenizer = AutoTokenizer.from_pretrained(model_name)
9
-
10
  model = AutoModelForCausalLM.from_pretrained(model_name,
11
  torch_dtype=torch.float16,
12
  device_map="auto")
13
 
14
- pipe = pipeline("text-generation",
15
- model=model_name,
16
- tokenizer=tokenizer,
17
- max_new_tokens=1500,
18
- temperature=0.7)
19
 
20
  def generate_response(message, history):
 
21
  messages = [
22
- [
23
- {
24
- "role": "system",
25
- "content": [{"type": "text",
26
- "text": "Você é ELIZA, uma terapeuta que responde com empatia e faz perguntas para entender melhor o paciente."},]
27
- },
28
- {
29
- "role": "user",
30
- "content": [{"type": "text",
31
- "text": message},]
32
- },
33
- ],
34
  ]
35
-
36
- response = pipe(messages)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  for new_text in streamer:
38
- partial_text += new_text
39
- yield partial_text # هذا هو السطر المطلوب
40
 
41
  demo = gr.ChatInterface(
42
  generate_response,
43
- title="ELIZA (com LLM)",
44
  description="Compartilhe seus pensamentos e ELIZA irá ajudar você a refletir sobre eles."
45
  )
46
 
47
- demo.launch()
 
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, TextIteratorStreamer
3
  import torch
4
+ from threading import Thread
5
 
6
  model_name = "gijl/gemma-4-E2B-it"
7
 
8
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
9
  model = AutoModelForCausalLM.from_pretrained(model_name,
10
  torch_dtype=torch.float16,
11
  device_map="auto")
12
 
13
+ # إبقاء الـ pipeline كما هو أو استخدامه مباشرة
14
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
 
 
 
15
 
16
  def generate_response(message, history):
17
+ # إعداد مدخلات المحادثة
18
  messages = [
19
+ {"role": "system", "content": "Você é ELIZA, uma terapeuta que responde com empatia e faz perguntas para entender melhor o paciente."},
20
+ {"role": "user", "content": message}
 
 
 
 
 
 
 
 
 
 
21
  ]
22
+
23
+ # 1. إعداد الـ Streamer
24
+ streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
25
+
26
+ # 2. إعداد إعدادات التوليد
27
+ # ملاحظة: تم استخدام tokenizer.apply_chat_template لتحويل الرسائل لتنسيق يفهمه النموذج
28
+ inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
29
+
30
+ generation_kwargs = dict(
31
+ input_ids=inputs,
32
+ streamer=streamer,
33
+ max_new_tokens=150,
34
+ temperature=0.7,
35
+ do_sample=True
36
+ )
37
+
38
+ # 3. تشغيل التوليد في Thread منفصل لكي لا يتجمد التطبيق أثناء البث
39
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
40
+ thread.start()
41
+
42
+ # 4. البث المباشر: الـ yield هي السر هنا!
43
+ partial_text = ""
44
  for new_text in streamer:
45
+ partial_text += new_text
46
+ yield partial_text # يرسل النص قطعة قطعة للواجهة
47
 
48
  demo = gr.ChatInterface(
49
  generate_response,
50
+ title="ELIZA (com LLM Streaming)",
51
  description="Compartilhe seus pensamentos e ELIZA irá ajudar você a refletir sobre eles."
52
  )
53
 
54
+ if __name__ == "__main__":
55
+ demo.launch()