Spaces:

nikravan
/

HRM-Text-1B

Running on Zero

nikravan commited on 1 day ago

Commit

d5392ef

verified ·

1 Parent(s): 8615e88

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ from spaces import GPU
-model_id = "sapientinc/HRM-Text-1B"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
@@ -13,10 +13,6 @@ model = AutoModelForCausalLM.from_pretrained(
     trust_remote_code=True,
 ).cuda().eval()
-# synth,cot composite — reasoning / CoT style (see Disclaimer for other modes)
-condition = "<|quad_end|><|object_ref_end|>"
-prompt = f"<|im_start|>{condition}Explain why the sky is blue.<|im_end|>"
 # M#ark the prompt as a single bidirectional prefix block — see "PrefixLM mask" below.
@@ -24,22 +20,19 @@ prompt = f"<|im_start|>{condition}Explain why the sky is blue.<|im_end|>"
 @GPU
 def generate_response(message, history):
-    input_ids = tokenizer(prompt, return_tensors="pt").to(model.device)
-    input_ids["token_type_ids"] = torch.ones_like(input_ids["input_ids"])
-    chat_history_ids = input_ids
-    response_ids = model.generate(
-        chat_history_ids,
-        max_length=200,
-        pad_token_id=tokenizer.eos_token_id,
-        do_sample=True,
-        temperature=0.7
-    )
     with torch.no_grad():
-        out = model.generate(**chat_history_ids, max_new_tokens=256, do_sample=False)
-    response = tokenizer.decode(out[0], skip_special_tokens=False)
-    return response.strip()
 chatbot = gr.ChatInterface(
     fn=generate_response,
     title="Simple ZeroGPU Chatbot",

+  model_id = "sapientinc/HRM-Text-1B"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     trust_remote_code=True,
 ).cuda().eval()
 # M#ark the prompt as a single bidirectional prefix block — see "PrefixLM mask" below.
 @GPU
 def generate_response(message, history):
+    # synth,cot composite — reasoning / CoT style (see Disclaimer for other modes)
+    condition = "<|quad_end|><|object_ref_end|>"
+    prompt = f"<|im_start|>{condition}Explain why the sky is blue.<|im_end|>"
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    # Mark the prompt as a single bidirectional prefix block — see "PrefixLM mask" below.
+    inputs["token_type_ids"] = torch.ones_like(inputs["input_ids"])
     with torch.no_grad():
+        out = model.generate(**inputs, max_new_tokens=256, do_sample=False)
+    return tokenizer.decode(out[0], skip_special_tokens=False)
 chatbot = gr.ChatInterface(
     fn=generate_response,
     title="Simple ZeroGPU Chatbot",