Spaces:

Lewis7887
/

mys

Sleeping

App Files Files Community

mys / app.py

Lewis7887

Create app.py

95fbd1e verified about 2 months ago

raw

history blame contribute delete

2.1 kB

	import gradio as gr
	import spaces
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer

	# The exact name of the model on Hugging Face
	model_id = "Qwen/Qwen2.5-7B-Instruct"

	# 1. Load the Tokenizer
	tokenizer = AutoTokenizer.from_pretrained(model_id)

	# 2. Load the Model
	# We use bfloat16 to compress it slightly so it fits perfectly in the free ZeroGPU memory
	model = AutoModelForCausalLM.from_pretrained(
	model_id, torch_dtype=torch.bfloat16, device_map="auto"
	)


	# 3. The Generation Function
	# The @spaces.GPU decorator is the magic word that gives you free GPU access
	@spaces.GPU
	def generate_response(message, history):
	# Format the ongoing conversation history for Qwen
	messages = []
	for user_msg, bot_msg in history:
	messages.append({"role": "user", "content": user_msg})
	messages.append({"role": "assistant", "content": bot_msg})

	# Add the newest message
	messages.append({"role": "user", "content": message})

	# Apply Qwen's specific chat template
	text = tokenizer.apply_chat_template(
	messages, tokenize=False, add_generation_prompt=True
	)

	# Convert text to tokens and send to the GPU
	model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

	# Generate the response
	generated_ids = model.generate(
	**model_inputs,
	max_new_tokens=512, # Maximum length of the response
	temperature=0.7, # Creativity (0.0 is robotic, 1.0 is highly creative)
	)

	# Strip away the prompt so we only display the new answer
	generated_ids = [
	output_ids[len(input_ids) :]
	for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
	]

	# Decode tokens back into readable text
	response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
	return response


	# 4. Build the Web Interface
	demo = gr.ChatInterface(
	fn=generate_response,
	title="My Qwen 2.5 Chatbot",
	description="Running entirely for free using Hugging Face ZeroGPU.",
	)

	# 5. Launch the app
	if __name__ == "__main__":
	demo.launch()