Spaces:

Lewis7887
/

mys

Sleeping

App Files Files Community

Lewis7887 commited on Feb 26

Commit

95fbd1e

verified ·

1 Parent(s): df5e808

Create app.py

Browse files

Files changed (1) hide show

app.py +67 -0

app.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import gradio as gr
+import spaces
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+# The exact name of the model on Hugging Face
+model_id = "Qwen/Qwen2.5-7B-Instruct"
+# 1. Load the Tokenizer
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+# 2. Load the Model
+# We use bfloat16 to compress it slightly so it fits perfectly in the free ZeroGPU memory
+model = AutoModelForCausalLM.from_pretrained(
+    model_id, torch_dtype=torch.bfloat16, device_map="auto"
+)
+# 3. The Generation Function
+# The @spaces.GPU decorator is the magic word that gives you free GPU access
+@spaces.GPU
+def generate_response(message, history):
+    # Format the ongoing conversation history for Qwen
+    messages = []
+    for user_msg, bot_msg in history:
+        messages.append({"role": "user", "content": user_msg})
+        messages.append({"role": "assistant", "content": bot_msg})
+    # Add the newest message
+    messages.append({"role": "user", "content": message})
+    # Apply Qwen's specific chat template
+    text = tokenizer.apply_chat_template(
+        messages, tokenize=False, add_generation_prompt=True
+    )
+    # Convert text to tokens and send to the GPU
+    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
+    # Generate the response
+    generated_ids = model.generate(
+        **model_inputs,
+        max_new_tokens=512,  # Maximum length of the response
+        temperature=0.7,  # Creativity (0.0 is robotic, 1.0 is highly creative)
+    )
+    # Strip away the prompt so we only display the new answer
+    generated_ids = [
+        output_ids[len(input_ids) :]
+        for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
+    ]
+    # Decode tokens back into readable text
+    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    return response
+# 4. Build the Web Interface
+demo = gr.ChatInterface(
+    fn=generate_response,
+    title="My Qwen 2.5 Chatbot",
+    description="Running entirely for free using Hugging Face ZeroGPU.",
+)
+# 5. Launch the app
+if __name__ == "__main__":
+    demo.launch()