Spaces:

maxdougly
/

iris

Runtime error

maxdougly commited on Dec 8, 2024

Commit

f8b7a6c

verified ·

1 Parent(s): 985b94e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ from peft import AutoPeftModelForCausalLM
 from transformers import AutoTokenizer
 # Load the model and tokenizer globally
-model = AutoPeftModelForCausalLM.from_pretrained("eforse01/lora_model")
 tokenizer = AutoTokenizer.from_pretrained("eforse01/lora_model")
 @spaces.GPU(duration=120)  # Decorate the function for ZeroGPU
@@ -29,8 +29,8 @@ def respond(message, history: list[tuple[str, str]], system_message, max_tokens,
         return_tensors="pt",  # Return tensors for PyTorch
     )
-    # Ensure input_ids is extracted
-    input_ids = inputs  # Directly using tensor returned from apply_chat_template
     print("Input IDs shape:", input_ids.shape)
     # Generate response

 from transformers import AutoTokenizer
 # Load the model and tokenizer globally
+model = AutoPeftModelForCausalLM.from_pretrained("eforse01/lora_model").to("cuda")  # Move model to CUDA
 tokenizer = AutoTokenizer.from_pretrained("eforse01/lora_model")
 @spaces.GPU(duration=120)  # Decorate the function for ZeroGPU
         return_tensors="pt",  # Return tensors for PyTorch
     )
+    # Ensure input_ids is moved to the same device as the model
+    input_ids = inputs.to("cuda")  # Move input_ids to CUDA
     print("Input IDs shape:", input_ids.shape)
     # Generate response