hutlim
/

hutlim commited on
Commit
83a3c09
·
verified ·
1 Parent(s): 6157eb9

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +9 -4
handler.py CHANGED
@@ -18,7 +18,7 @@ class EndpointHandler:
18
  )
19
  self.model = AutoModelForCausalLM.from_pretrained(
20
  model_dir,
21
- torch_dtype=self.dtype,
22
  trust_remote_code=True,
23
  ).to(self.device).eval()
24
 
@@ -43,22 +43,27 @@ class EndpointHandler:
43
  return f"<Instruct>: {instruction}\n<Query>: {query}\n<Document>: {document}"
44
 
45
  def _process_inputs(self, pairs: List[str]) -> Dict[str, torch.Tensor]:
 
46
  inputs = self.tokenizer(
47
  pairs,
48
  padding=False,
49
- truncation="longest_first",
50
  return_attention_mask=False,
 
51
  max_length=self.max_length - len(self.prefix_tokens) - len(self.suffix_tokens),
52
  )
53
 
 
54
  for i, ids in enumerate(inputs["input_ids"]):
55
  inputs["input_ids"][i] = self.prefix_tokens + ids + self.suffix_tokens
56
 
 
 
57
  padded = self.tokenizer.pad(
58
  inputs,
59
- padding=True,
60
  return_tensors="pt",
61
- max_length=self.max_length,
62
  )
63
 
64
  for k in padded:
 
18
  )
19
  self.model = AutoModelForCausalLM.from_pretrained(
20
  model_dir,
21
+ dtype=self.dtype,
22
  trust_remote_code=True,
23
  ).to(self.device).eval()
24
 
 
43
  return f"<Instruct>: {instruction}\n<Query>: {query}\n<Document>: {document}"
44
 
45
  def _process_inputs(self, pairs: List[str]) -> Dict[str, torch.Tensor]:
46
+ # 1. First, encode the text and handle truncation properly
47
  inputs = self.tokenizer(
48
  pairs,
49
  padding=False,
50
+ truncation=True, # Change from "longest_first" to True for simpler logic
51
  return_attention_mask=False,
52
+ # Subtract the length of your prefix and suffix from the limit
53
  max_length=self.max_length - len(self.prefix_tokens) - len(self.suffix_tokens),
54
  )
55
 
56
+ # 2. Manually prepend/append your special tokens
57
  for i, ids in enumerate(inputs["input_ids"]):
58
  inputs["input_ids"][i] = self.prefix_tokens + ids + self.suffix_tokens
59
 
60
+ # 3. FIX: When padding, use 'max_length' if you want a fixed size,
61
+ # or just padding=True to pad to the longest in the batch.
62
  padded = self.tokenizer.pad(
63
  inputs,
64
+ padding=True, # This will pad to the longest sequence in the current batch
65
  return_tensors="pt",
66
+ # Remove max_length here to stop the warning
67
  )
68
 
69
  for k in padded: