File size: 2,632 Bytes
1dd8d6e
 
 
376d532
 
 
 
1dd8d6e
376d532
 
 
6adb322
1dd8d6e
 
376d532
c20ba17
1dd8d6e
 
376d532
 
dc37782
376d532
1dd8d6e
 
 
 
8fd5823
 
 
 
 
1dd8d6e
 
 
8fd5823
 
 
376d532
82325f6
376d532
 
 
 
 
 
1dd8d6e
 
 
376d532
 
 
 
1dd8d6e
d665e1b
040d697
9c8dc08
040d697
 
 
8abaccc
4e3ee48
9c8dc08
1dd8d6e
ad0fa67
9c8dc08
4e3ee48
ad0fa67
82325f6
1dd8d6e
82325f6
d665e1b
4e3ee48
1dd8d6e
 
 
 
 
dc37782
8fd5823
dc37782
376d532
8fd5823
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83


import spaces
import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import gradio as gr


huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
if not huggingface_token:
    pass
    print("no HUGGINGFACE_TOKEN if you need set secret ")
    #raise ValueError("HUGGINGFACE_TOKEN environment variable is not set")

model_id = "google/gemma-2-9b-it"

device = "auto" # torch.device("cuda" if torch.cuda.is_available() else "cpu")
dtype = torch.bfloat16

tokenizer = AutoTokenizer.from_pretrained(model_id, token=huggingface_token)

print(model_id,device,dtype)
histories = []
#model = None

model = AutoModelForCausalLM.from_pretrained(
        model_id, token=huggingface_token ,torch_dtype=dtype,device_map=device
    )
text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer,torch_dtype=dtype,device_map=device) #pipeline has not to(device)
    

@spaces.GPU(duration=120)
def generate_text(messages):
#    model = AutoModelForCausalLM.from_pretrained(
#       model_id, token=huggingface_token ,torch_dtype=dtype,device_map=device
#  )

    #text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer,torch_dtype=dtype,device_map=device) #pipeline has not to(device)
    result = text_generator(messages, max_new_tokens=256, do_sample=True, temperature=0.7)

    generated_output = result[0]["generated_text"]
    if isinstance(generated_output, list):
        for message in reversed(generated_output):
            if message.get("role") == "assistant":
                content= message.get("content", "No content found.")
                return content
            
        return "No assistant response found."
    else:
        return "Unexpected output format."



def call_generate_text(message, history):
   # history.append({"role": "user", "content": message})
    print(message)
    print(history)
   
    #messages = history + message
<<<<<<< HEAD
    messages =history + [{"role":"user","content":message}]
    try:
        text = generate_text(history)
        #history.append({"role": "assistant", "content": text})
=======
   # messages = [{"role":"user","content":message}]
    messages = history+[{"role":"user","content":message}]
    try:
        text = generate_text(messages)
        history.append({"role": "assistant", "content": text})
>>>>>>> 82325f6d06d848b6c1600ce2902018538b68230b
        return text
    except RuntimeError  as e:
        print(f"An unexpected error occurred: {e}")
       
    return ""

demo = gr.ChatInterface(call_generate_text,type="messages")

if __name__ == "__main__":
    demo.launch(share=True)