HRM-Text-1B

Runtime error

nikravan commited on 2 days ago

Commit

8615e88

verified ·

1 Parent(s): eb06650

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,16 +3,29 @@ import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from spaces import GPU
-model_name = "microsoft/DialoGPT-small"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name)
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model.to(device)
 @GPU
 def generate_response(message, history):
-    input_ids = tokenizer.encode(message + tokenizer.eos_token, return_tensors="pt").to(device)
     chat_history_ids = input_ids
     response_ids = model.generate(
         chat_history_ids,
@@ -21,7 +34,10 @@ def generate_response(message, history):
         do_sample=True,
         temperature=0.7
     )
-    response = tokenizer.decode(response_ids[:, chat_history_ids.shape[-1]:][0], skip_special_tokens=True)
     return response.strip()
 chatbot = gr.ChatInterface(

 from transformers import AutoModelForCausalLM, AutoTokenizer
 from spaces import GPU
+model_id = "sapientinc/HRM-Text-1B"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    dtype=torch.bfloat16,
+    trust_remote_code=True,
+).cuda().eval()
+# synth,cot composite — reasoning / CoT style (see Disclaimer for other modes)
+condition = "<|quad_end|><|object_ref_end|>"
+prompt = f"<|im_start|>{condition}Explain why the sky is blue.<|im_end|>"
+# M#ark the prompt as a single bidirectional prefix block — see "PrefixLM mask" below.
 @GPU
 def generate_response(message, history):
+    input_ids = tokenizer(prompt, return_tensors="pt").to(model.device)
+    input_ids["token_type_ids"] = torch.ones_like(input_ids["input_ids"])
     chat_history_ids = input_ids
     response_ids = model.generate(
         chat_history_ids,
         do_sample=True,
         temperature=0.7
     )
+    with torch.no_grad():
+        out = model.generate(**chat_history_ids, max_new_tokens=256, do_sample=False)
+    response = tokenizer.decode(out[0], skip_special_tokens=False)
     return response.strip()
 chatbot = gr.ChatInterface(