HRM-Text-1B

Runtime error

App Files Files Community

HRM-Text-1B / app.py

nikravan

Update app.py

d5492e2 verified about 19 hours ago

raw

history blame contribute delete

1.95 kB

	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from spaces import GPU



	model_id = "sapientinc/HRM-Text-1B"
	tokenizer = AutoTokenizer.from_pretrained(model_id)
	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	dtype=torch.bfloat16,
	trust_remote_code=True,
	).cuda().eval()


	# M#ark the prompt as a single bidirectional prefix block — see "PrefixLM mask" below.



	@GPU
	def generate_response(message, history):


	# synth,cot composite — reasoning / CoT style (see Disclaimer for other modes)
	conversation = message
	# for user_msg, assistant_msg in history:
	# conversation += f"user: {user_msg}\nassistant: {assistant_msg}\n"
	# conversation += f"user: {message}\nassistant: "

	condition = "<\|quad_end\|><\|object_ref_end\|>"
	prompt = f"<\|im_start\|>{condition}{conversation} "

	inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
	# Mark the prompt as a single bidirectional prefix block — see "PrefixLM mask" below.
	inputs["token_type_ids"] = torch.ones_like(inputs["input_ids"])

	with torch.no_grad():
	out = model.generate(**inputs, max_new_tokens=1000, do_sample=False)
	full_output = tokenizer.decode(out[0], skip_special_tokens=False)
	start = full_output.find('<\|object_ref_end\|>') + len('<\|object_ref_end\|>')
	end = full_output.find('<\|box_end\|>')
	if end == -1: # اگر <\|box_end\|> پیدا نشد
	result = full_output[start:]
	else:
	result = full_output[start:end]
	return result
	chatbot = gr.ChatInterface(
	fn=generate_response,
	title="Simple ZeroGPU Chatbot",
	description="A simple chatbot using HRM-Text for test, running on Hugging Face ZeroGPU. use only one question in any round",
	examples=["Explain why the sky is blue?", "Introduce yourself.", "explain why ship float on water?"]
	)

	if __name__ == "__main__":
	chatbot.launch()