HRM-Text-1B / app.py
nikravan's picture
Update app.py
d5492e2 verified
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from spaces import GPU
model_id = "sapientinc/HRM-Text-1B"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
dtype=torch.bfloat16,
trust_remote_code=True,
).cuda().eval()
# M#ark the prompt as a single bidirectional prefix block — see "PrefixLM mask" below.
@GPU
def generate_response(message, history):
# synth,cot composite — reasoning / CoT style (see Disclaimer for other modes)
conversation = message
# for user_msg, assistant_msg in history:
# conversation += f"user: {user_msg}\nassistant: {assistant_msg}\n"
# conversation += f"user: {message}\nassistant: "
condition = "<|quad_end|><|object_ref_end|>"
prompt = f"<|im_start|>{condition}{conversation} "
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
# Mark the prompt as a single bidirectional prefix block — see "PrefixLM mask" below.
inputs["token_type_ids"] = torch.ones_like(inputs["input_ids"])
with torch.no_grad():
out = model.generate(**inputs, max_new_tokens=1000, do_sample=False)
full_output = tokenizer.decode(out[0], skip_special_tokens=False)
start = full_output.find('<|object_ref_end|>') + len('<|object_ref_end|>')
end = full_output.find('<|box_end|>')
if end == -1: # اگر <|box_end|> پیدا نشد
result = full_output[start:]
else:
result = full_output[start:end]
return result
chatbot = gr.ChatInterface(
fn=generate_response,
title="Simple ZeroGPU Chatbot",
description="A simple chatbot using HRM-Text for test, running on Hugging Face ZeroGPU. use only one question in any round",
examples=["Explain why the sky is blue?", "Introduce yourself.", "explain why ship float on water?"]
)
if __name__ == "__main__":
chatbot.launch()