hutlim
/

hutlim commited on
Commit
2f3cbcd
·
verified ·
1 Parent(s): 83a3c09

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +55 -19
handler.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  from typing import Any, Dict, List
3
 
4
  import torch
@@ -7,34 +8,75 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
7
 
8
  class EndpointHandler:
9
  def __init__(self, path: str = ""):
10
- model_dir = path if path else os.getenv("HF_MODEL_DIR", ".")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
12
- self.dtype = torch.float16 if self.device == "cuda" else torch.float32
13
 
14
  self.tokenizer = AutoTokenizer.from_pretrained(
15
- model_dir,
16
  padding_side="left",
17
  trust_remote_code=True,
 
18
  )
 
19
  self.model = AutoModelForCausalLM.from_pretrained(
20
- model_dir,
21
- dtype=self.dtype,
22
  trust_remote_code=True,
 
23
  ).to(self.device).eval()
24
 
25
- self.token_false_id = self.tokenizer.convert_tokens_to_ids("no")
26
- self.token_true_id = self.tokenizer.convert_tokens_to_ids("yes")
 
 
 
 
 
 
 
 
 
27
 
28
  self.max_length = 8192
29
 
30
  self.prefix = (
31
- '<|im_start|>system\n'
32
  'Judge whether the Document meets the requirements based on the Query '
33
  'and the Instruct provided. Note that the answer can only be "yes" or "no".'
34
- '<|im_end|>\n'
35
- '<|im_start|>user\n'
36
  )
37
- self.suffix = '<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\n'
38
 
39
  self.prefix_tokens = self.tokenizer.encode(self.prefix, add_special_tokens=False)
40
  self.suffix_tokens = self.tokenizer.encode(self.suffix, add_special_tokens=False)
@@ -43,27 +85,21 @@ class EndpointHandler:
43
  return f"<Instruct>: {instruction}\n<Query>: {query}\n<Document>: {document}"
44
 
45
  def _process_inputs(self, pairs: List[str]) -> Dict[str, torch.Tensor]:
46
- # 1. First, encode the text and handle truncation properly
47
  inputs = self.tokenizer(
48
  pairs,
49
  padding=False,
50
- truncation=True, # Change from "longest_first" to True for simpler logic
51
  return_attention_mask=False,
52
- # Subtract the length of your prefix and suffix from the limit
53
  max_length=self.max_length - len(self.prefix_tokens) - len(self.suffix_tokens),
54
  )
55
 
56
- # 2. Manually prepend/append your special tokens
57
  for i, ids in enumerate(inputs["input_ids"]):
58
  inputs["input_ids"][i] = self.prefix_tokens + ids + self.suffix_tokens
59
 
60
- # 3. FIX: When padding, use 'max_length' if you want a fixed size,
61
- # or just padding=True to pad to the longest in the batch.
62
  padded = self.tokenizer.pad(
63
  inputs,
64
- padding=True, # This will pad to the longest sequence in the current batch
65
  return_tensors="pt",
66
- # Remove max_length here to stop the warning
67
  )
68
 
69
  for k in padded:
 
1
  import os
2
+ from pathlib import Path
3
  from typing import Any, Dict, List
4
 
5
  import torch
 
8
 
9
  class EndpointHandler:
10
  def __init__(self, path: str = ""):
11
+ model_dir = Path(path or os.getenv("HF_MODEL_DIR", ".")).resolve()
12
+
13
+ if not model_dir.exists():
14
+ raise FileNotFoundError(f"Model directory does not exist: {model_dir}")
15
+
16
+ # Helpful debug info in endpoint logs
17
+ print(f"[handler] loading model from: {model_dir}")
18
+ print(f"[handler] files: {[p.name for p in model_dir.iterdir()]}")
19
+
20
+ required_any = [
21
+ "config.json",
22
+ ]
23
+ missing_required = [f for f in required_any if not (model_dir / f).exists()]
24
+ if missing_required:
25
+ raise FileNotFoundError(
26
+ f"Missing required model files in {model_dir}: {missing_required}"
27
+ )
28
+
29
+ has_weights = any([
30
+ (model_dir / "model.safetensors").exists(),
31
+ (model_dir / "pytorch_model.bin").exists(),
32
+ any(model_dir.glob("model-*.safetensors")),
33
+ any(model_dir.glob("pytorch_model-*.bin")),
34
+ ])
35
+ if not has_weights:
36
+ raise FileNotFoundError(
37
+ f"No model weight file found in {model_dir}. "
38
+ f"Expected model.safetensors, pytorch_model.bin, or sharded weights."
39
+ )
40
+
41
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
42
+ self.torch_dtype = torch.float16 if self.device == "cuda" else torch.float32
43
 
44
  self.tokenizer = AutoTokenizer.from_pretrained(
45
+ str(model_dir),
46
  padding_side="left",
47
  trust_remote_code=True,
48
+ local_files_only=True,
49
  )
50
+
51
  self.model = AutoModelForCausalLM.from_pretrained(
52
+ str(model_dir),
53
+ torch_dtype=self.torch_dtype,
54
  trust_remote_code=True,
55
+ local_files_only=True,
56
  ).to(self.device).eval()
57
 
58
+ # Safer token lookup for decoder LMs: include leading space variants if needed
59
+ yes_ids = self.tokenizer.encode(" yes", add_special_tokens=False)
60
+ no_ids = self.tokenizer.encode(" no", add_special_tokens=False)
61
+ if len(yes_ids) != 1 or len(no_ids) != 1:
62
+ raise ValueError(
63
+ f'Expected single-token " yes"/" no", got yes={yes_ids}, no={no_ids}. '
64
+ "You may need a different scoring method for this tokenizer."
65
+ )
66
+
67
+ self.token_true_id = yes_ids[0]
68
+ self.token_false_id = no_ids[0]
69
 
70
  self.max_length = 8192
71
 
72
  self.prefix = (
73
+ "<|im_start|>system\n"
74
  'Judge whether the Document meets the requirements based on the Query '
75
  'and the Instruct provided. Note that the answer can only be "yes" or "no".'
76
+ "<|im_end|>\n"
77
+ "<|im_start|>user\n"
78
  )
79
+ self.suffix = "<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\n"
80
 
81
  self.prefix_tokens = self.tokenizer.encode(self.prefix, add_special_tokens=False)
82
  self.suffix_tokens = self.tokenizer.encode(self.suffix, add_special_tokens=False)
 
85
  return f"<Instruct>: {instruction}\n<Query>: {query}\n<Document>: {document}"
86
 
87
  def _process_inputs(self, pairs: List[str]) -> Dict[str, torch.Tensor]:
 
88
  inputs = self.tokenizer(
89
  pairs,
90
  padding=False,
91
+ truncation=True,
92
  return_attention_mask=False,
 
93
  max_length=self.max_length - len(self.prefix_tokens) - len(self.suffix_tokens),
94
  )
95
 
 
96
  for i, ids in enumerate(inputs["input_ids"]):
97
  inputs["input_ids"][i] = self.prefix_tokens + ids + self.suffix_tokens
98
 
 
 
99
  padded = self.tokenizer.pad(
100
  inputs,
101
+ padding=True,
102
  return_tensors="pt",
 
103
  )
104
 
105
  for k in padded: