Spaces:

nikravan
/

HRM-Text-1B

Running on Zero

File size: 1,951 Bytes

505f2c1
eb06650
 
 
 
 
8615e88
785df5e
8615e88
 
 
 
 
 
 
 
 
 
 
eb06650
 
 
d5392ef
 
 
dba52b2
 
 
 
e0c2ba9
d5392ef
803f51e
d5392ef
 
 
 
 
8615e88
d5492e2
1da3a52
caafc4b
 
 
 
 
 
 
505f2c1
eb06650
 
dba52b2
d5492e2
505f2c1
 
 
eb06650

import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from spaces import GPU



model_id = "sapientinc/HRM-Text-1B"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    dtype=torch.bfloat16,
    trust_remote_code=True,
).cuda().eval()


# M#ark the prompt as a single bidirectional prefix block — see "PrefixLM mask" below.



@GPU
def generate_response(message, history):
      

    # synth,cot composite — reasoning / CoT style (see Disclaimer for other modes)
    conversation = message
    # for user_msg, assistant_msg in history:
    #     conversation += f"user: {user_msg}\nassistant: {assistant_msg}\n"
    # conversation += f"user: {message}\nassistant: "
    
    condition = "<|quad_end|><|object_ref_end|>"
    prompt = f"<|im_start|>{condition}{conversation} "
    
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    # Mark the prompt as a single bidirectional prefix block — see "PrefixLM mask" below.
    inputs["token_type_ids"] = torch.ones_like(inputs["input_ids"])
    
    with torch.no_grad():
        out = model.generate(**inputs, max_new_tokens=1000, do_sample=False)
    full_output = tokenizer.decode(out[0], skip_special_tokens=False)
    start = full_output.find('<|object_ref_end|>') + len('<|object_ref_end|>')
    end = full_output.find('<|box_end|>')
    if end == -1:  # اگر <|box_end|> پیدا نشد
        result = full_output[start:]
    else:
        result = full_output[start:end]
    return result
chatbot = gr.ChatInterface(
    fn=generate_response,
    title="Simple ZeroGPU Chatbot",
    description="A simple chatbot using HRM-Text for test, running on Hugging Face ZeroGPU. use only one question in any round",
    examples=["Explain why the sky is blue?", "Introduce yourself.", "explain why ship float on water?"]
)

if __name__ == "__main__":
    chatbot.launch()