ChevalierJoseph commited on
Commit
267fefd
·
verified ·
1 Parent(s): ee72852

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +21 -6
handler.py CHANGED
@@ -19,12 +19,27 @@ class EndpointHandler:
19
 
20
  # Load tokenizer and model
21
  self.tokenizer = AutoTokenizer.from_pretrained(path)
22
- self.model = AutoModelForCausalLM.from_pretrained(
23
- path,
24
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
25
- device_map="auto" if torch.cuda.is_available() else None,
26
- trust_remote_code=True
27
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  # Set pad token if it doesn't exist
30
  if self.tokenizer.pad_token is None:
 
19
 
20
  # Load tokenizer and model
21
  self.tokenizer = AutoTokenizer.from_pretrained(path)
22
+
23
+ # Try to load without quantization first
24
+ try:
25
+ self.model = AutoModelForCausalLM.from_pretrained(
26
+ path,
27
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
28
+ device_map="auto" if torch.cuda.is_available() else None,
29
+ trust_remote_code=True,
30
+ load_in_8bit=False,
31
+ load_in_4bit=False
32
+ )
33
+ except Exception as e:
34
+ logger.warning(f"Failed to load without quantization: {e}")
35
+ # Fallback: try with different settings
36
+ self.model = AutoModelForCausalLM.from_pretrained(
37
+ path,
38
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
39
+ device_map="auto" if torch.cuda.is_available() else None,
40
+ trust_remote_code=True,
41
+ use_safetensors=True
42
+ )
43
 
44
  # Set pad token if it doesn't exist
45
  if self.tokenizer.pad_token is None: