Update app.py
Browse files
app.py
CHANGED
|
@@ -7,7 +7,7 @@ from peft import AutoPeftModelForCausalLM
|
|
| 7 |
from transformers import AutoTokenizer
|
| 8 |
|
| 9 |
# Load the model and tokenizer globally
|
| 10 |
-
model = AutoPeftModelForCausalLM.from_pretrained("eforse01/lora_model")
|
| 11 |
tokenizer = AutoTokenizer.from_pretrained("eforse01/lora_model")
|
| 12 |
|
| 13 |
@spaces.GPU(duration=120) # Decorate the function for ZeroGPU
|
|
@@ -29,8 +29,8 @@ def respond(message, history: list[tuple[str, str]], system_message, max_tokens,
|
|
| 29 |
return_tensors="pt", # Return tensors for PyTorch
|
| 30 |
)
|
| 31 |
|
| 32 |
-
# Ensure input_ids is
|
| 33 |
-
input_ids = inputs #
|
| 34 |
print("Input IDs shape:", input_ids.shape)
|
| 35 |
|
| 36 |
# Generate response
|
|
|
|
| 7 |
from transformers import AutoTokenizer
|
| 8 |
|
| 9 |
# Load the model and tokenizer globally
|
| 10 |
+
model = AutoPeftModelForCausalLM.from_pretrained("eforse01/lora_model").to("cuda") # Move model to CUDA
|
| 11 |
tokenizer = AutoTokenizer.from_pretrained("eforse01/lora_model")
|
| 12 |
|
| 13 |
@spaces.GPU(duration=120) # Decorate the function for ZeroGPU
|
|
|
|
| 29 |
return_tensors="pt", # Return tensors for PyTorch
|
| 30 |
)
|
| 31 |
|
| 32 |
+
# Ensure input_ids is moved to the same device as the model
|
| 33 |
+
input_ids = inputs.to("cuda") # Move input_ids to CUDA
|
| 34 |
print("Input IDs shape:", input_ids.shape)
|
| 35 |
|
| 36 |
# Generate response
|