Spaces:
Running on Zero
Running on Zero
File size: 1,951 Bytes
505f2c1 eb06650 8615e88 785df5e 8615e88 eb06650 d5392ef dba52b2 e0c2ba9 d5392ef 803f51e d5392ef 8615e88 d5492e2 1da3a52 caafc4b 505f2c1 eb06650 dba52b2 d5492e2 505f2c1 eb06650 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 | import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from spaces import GPU
model_id = "sapientinc/HRM-Text-1B"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
dtype=torch.bfloat16,
trust_remote_code=True,
).cuda().eval()
# M#ark the prompt as a single bidirectional prefix block — see "PrefixLM mask" below.
@GPU
def generate_response(message, history):
# synth,cot composite — reasoning / CoT style (see Disclaimer for other modes)
conversation = message
# for user_msg, assistant_msg in history:
# conversation += f"user: {user_msg}\nassistant: {assistant_msg}\n"
# conversation += f"user: {message}\nassistant: "
condition = "<|quad_end|><|object_ref_end|>"
prompt = f"<|im_start|>{condition}{conversation} "
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
# Mark the prompt as a single bidirectional prefix block — see "PrefixLM mask" below.
inputs["token_type_ids"] = torch.ones_like(inputs["input_ids"])
with torch.no_grad():
out = model.generate(**inputs, max_new_tokens=1000, do_sample=False)
full_output = tokenizer.decode(out[0], skip_special_tokens=False)
start = full_output.find('<|object_ref_end|>') + len('<|object_ref_end|>')
end = full_output.find('<|box_end|>')
if end == -1: # اگر <|box_end|> پیدا نشد
result = full_output[start:]
else:
result = full_output[start:end]
return result
chatbot = gr.ChatInterface(
fn=generate_response,
title="Simple ZeroGPU Chatbot",
description="A simple chatbot using HRM-Text for test, running on Hugging Face ZeroGPU. use only one question in any round",
examples=["Explain why the sky is blue?", "Introduce yourself.", "explain why ship float on water?"]
)
if __name__ == "__main__":
chatbot.launch() |