nikravan commited on
Commit
d5392ef
·
verified ·
1 Parent(s): 8615e88

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -20
app.py CHANGED
@@ -5,7 +5,7 @@ from spaces import GPU
5
 
6
 
7
 
8
- model_id = "sapientinc/HRM-Text-1B"
9
  tokenizer = AutoTokenizer.from_pretrained(model_id)
10
  model = AutoModelForCausalLM.from_pretrained(
11
  model_id,
@@ -13,10 +13,6 @@ model = AutoModelForCausalLM.from_pretrained(
13
  trust_remote_code=True,
14
  ).cuda().eval()
15
 
16
- # synth,cot composite — reasoning / CoT style (see Disclaimer for other modes)
17
- condition = "<|quad_end|><|object_ref_end|>"
18
- prompt = f"<|im_start|>{condition}Explain why the sky is blue.<|im_end|>"
19
-
20
 
21
  # M#ark the prompt as a single bidirectional prefix block — see "PrefixLM mask" below.
22
 
@@ -24,22 +20,19 @@ prompt = f"<|im_start|>{condition}Explain why the sky is blue.<|im_end|>"
24
 
25
  @GPU
26
  def generate_response(message, history):
27
- input_ids = tokenizer(prompt, return_tensors="pt").to(model.device)
28
- input_ids["token_type_ids"] = torch.ones_like(input_ids["input_ids"])
29
- chat_history_ids = input_ids
30
- response_ids = model.generate(
31
- chat_history_ids,
32
- max_length=200,
33
- pad_token_id=tokenizer.eos_token_id,
34
- do_sample=True,
35
- temperature=0.7
36
- )
37
-
38
  with torch.no_grad():
39
- out = model.generate(**chat_history_ids, max_new_tokens=256, do_sample=False)
40
- response = tokenizer.decode(out[0], skip_special_tokens=False)
41
- return response.strip()
42
-
43
  chatbot = gr.ChatInterface(
44
  fn=generate_response,
45
  title="Simple ZeroGPU Chatbot",
 
5
 
6
 
7
 
8
+ model_id = "sapientinc/HRM-Text-1B"
9
  tokenizer = AutoTokenizer.from_pretrained(model_id)
10
  model = AutoModelForCausalLM.from_pretrained(
11
  model_id,
 
13
  trust_remote_code=True,
14
  ).cuda().eval()
15
 
 
 
 
 
16
 
17
  # M#ark the prompt as a single bidirectional prefix block — see "PrefixLM mask" below.
18
 
 
20
 
21
  @GPU
22
  def generate_response(message, history):
23
+
24
+
25
+ # synth,cot composite — reasoning / CoT style (see Disclaimer for other modes)
26
+ condition = "<|quad_end|><|object_ref_end|>"
27
+ prompt = f"<|im_start|>{condition}Explain why the sky is blue.<|im_end|>"
28
+
29
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
30
+ # Mark the prompt as a single bidirectional prefix block — see "PrefixLM mask" below.
31
+ inputs["token_type_ids"] = torch.ones_like(inputs["input_ids"])
32
+
 
33
  with torch.no_grad():
34
+ out = model.generate(**inputs, max_new_tokens=256, do_sample=False)
35
+ return tokenizer.decode(out[0], skip_special_tokens=False)
 
 
36
  chatbot = gr.ChatInterface(
37
  fn=generate_response,
38
  title="Simple ZeroGPU Chatbot",