maxdougly commited on
Commit
f8b7a6c
·
verified ·
1 Parent(s): 985b94e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -7,7 +7,7 @@ from peft import AutoPeftModelForCausalLM
7
  from transformers import AutoTokenizer
8
 
9
  # Load the model and tokenizer globally
10
- model = AutoPeftModelForCausalLM.from_pretrained("eforse01/lora_model")
11
  tokenizer = AutoTokenizer.from_pretrained("eforse01/lora_model")
12
 
13
  @spaces.GPU(duration=120) # Decorate the function for ZeroGPU
@@ -29,8 +29,8 @@ def respond(message, history: list[tuple[str, str]], system_message, max_tokens,
29
  return_tensors="pt", # Return tensors for PyTorch
30
  )
31
 
32
- # Ensure input_ids is extracted
33
- input_ids = inputs # Directly using tensor returned from apply_chat_template
34
  print("Input IDs shape:", input_ids.shape)
35
 
36
  # Generate response
 
7
  from transformers import AutoTokenizer
8
 
9
  # Load the model and tokenizer globally
10
+ model = AutoPeftModelForCausalLM.from_pretrained("eforse01/lora_model").to("cuda") # Move model to CUDA
11
  tokenizer = AutoTokenizer.from_pretrained("eforse01/lora_model")
12
 
13
  @spaces.GPU(duration=120) # Decorate the function for ZeroGPU
 
29
  return_tensors="pt", # Return tensors for PyTorch
30
  )
31
 
32
+ # Ensure input_ids is moved to the same device as the model
33
+ input_ids = inputs.to("cuda") # Move input_ids to CUDA
34
  print("Input IDs shape:", input_ids.shape)
35
 
36
  # Generate response