File size: 1,951 Bytes
505f2c1
eb06650
 
 
 
 
8615e88
785df5e
8615e88
 
 
 
 
 
 
 
 
 
 
eb06650
 
 
d5392ef
 
 
dba52b2
 
 
 
e0c2ba9
d5392ef
803f51e
d5392ef
 
 
 
 
8615e88
d5492e2
1da3a52
caafc4b
 
 
 
 
 
 
505f2c1
eb06650
 
dba52b2
d5492e2
505f2c1
 
 
eb06650
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from spaces import GPU



model_id = "sapientinc/HRM-Text-1B"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    dtype=torch.bfloat16,
    trust_remote_code=True,
).cuda().eval()


# M#ark the prompt as a single bidirectional prefix block — see "PrefixLM mask" below.



@GPU
def generate_response(message, history):
      

    # synth,cot composite — reasoning / CoT style (see Disclaimer for other modes)
    conversation = message
    # for user_msg, assistant_msg in history:
    #     conversation += f"user: {user_msg}\nassistant: {assistant_msg}\n"
    # conversation += f"user: {message}\nassistant: "
    
    condition = "<|quad_end|><|object_ref_end|>"
    prompt = f"<|im_start|>{condition}{conversation} "
    
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    # Mark the prompt as a single bidirectional prefix block — see "PrefixLM mask" below.
    inputs["token_type_ids"] = torch.ones_like(inputs["input_ids"])
    
    with torch.no_grad():
        out = model.generate(**inputs, max_new_tokens=1000, do_sample=False)
    full_output = tokenizer.decode(out[0], skip_special_tokens=False)
    start = full_output.find('<|object_ref_end|>') + len('<|object_ref_end|>')
    end = full_output.find('<|box_end|>')
    if end == -1:  # اگر <|box_end|> پیدا نشد
        result = full_output[start:]
    else:
        result = full_output[start:end]
    return result
chatbot = gr.ChatInterface(
    fn=generate_response,
    title="Simple ZeroGPU Chatbot",
    description="A simple chatbot using HRM-Text for test, running on Hugging Face ZeroGPU. use only one question in any round",
    examples=["Explain why the sky is blue?", "Introduce yourself.", "explain why ship float on water?"]
)

if __name__ == "__main__":
    chatbot.launch()