Spaces:
Runtime error
Runtime error
File size: 2,632 Bytes
1dd8d6e 376d532 1dd8d6e 376d532 6adb322 1dd8d6e 376d532 c20ba17 1dd8d6e 376d532 dc37782 376d532 1dd8d6e 8fd5823 1dd8d6e 8fd5823 376d532 82325f6 376d532 1dd8d6e 376d532 1dd8d6e d665e1b 040d697 9c8dc08 040d697 8abaccc 4e3ee48 9c8dc08 1dd8d6e ad0fa67 9c8dc08 4e3ee48 ad0fa67 82325f6 1dd8d6e 82325f6 d665e1b 4e3ee48 1dd8d6e dc37782 8fd5823 dc37782 376d532 8fd5823 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
import spaces
import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import gradio as gr
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
if not huggingface_token:
pass
print("no HUGGINGFACE_TOKEN if you need set secret ")
#raise ValueError("HUGGINGFACE_TOKEN environment variable is not set")
model_id = "google/gemma-2-9b-it"
device = "auto" # torch.device("cuda" if torch.cuda.is_available() else "cpu")
dtype = torch.bfloat16
tokenizer = AutoTokenizer.from_pretrained(model_id, token=huggingface_token)
print(model_id,device,dtype)
histories = []
#model = None
model = AutoModelForCausalLM.from_pretrained(
model_id, token=huggingface_token ,torch_dtype=dtype,device_map=device
)
text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer,torch_dtype=dtype,device_map=device) #pipeline has not to(device)
@spaces.GPU(duration=120)
def generate_text(messages):
# model = AutoModelForCausalLM.from_pretrained(
# model_id, token=huggingface_token ,torch_dtype=dtype,device_map=device
# )
#text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer,torch_dtype=dtype,device_map=device) #pipeline has not to(device)
result = text_generator(messages, max_new_tokens=256, do_sample=True, temperature=0.7)
generated_output = result[0]["generated_text"]
if isinstance(generated_output, list):
for message in reversed(generated_output):
if message.get("role") == "assistant":
content= message.get("content", "No content found.")
return content
return "No assistant response found."
else:
return "Unexpected output format."
def call_generate_text(message, history):
# history.append({"role": "user", "content": message})
print(message)
print(history)
#messages = history + message
<<<<<<< HEAD
messages =history + [{"role":"user","content":message}]
try:
text = generate_text(history)
#history.append({"role": "assistant", "content": text})
=======
# messages = [{"role":"user","content":message}]
messages = history+[{"role":"user","content":message}]
try:
text = generate_text(messages)
history.append({"role": "assistant", "content": text})
>>>>>>> 82325f6d06d848b6c1600ce2902018538b68230b
return text
except RuntimeError as e:
print(f"An unexpected error occurred: {e}")
return ""
demo = gr.ChatInterface(call_generate_text,type="messages")
if __name__ == "__main__":
demo.launch(share=True)
|