StevesInfinityDrive
/

JoyCloud

Model card Files Files and versions

JoyCloud / src /model_loader.py

StevesInfinityDrive's picture

StevesInfinityDrive

Update src/model_loader.py

9ab0f08 verified about 1 year ago

history blame contribute delete

2.25 kB

	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer

	# Define the model path (adjust as needed)
	MODEL_PATH = "model_files" # Your fine-tuned model path

	# System prompt for guiding model behavior
	DEFAULT_PROMPT = """<\|system\|>
	You are a compassionate listener. Respond with:
	- Short, natural sentences
	- Occasional empathetic sounds ("Oh...", "I see")
	- Open-ended questions when appropriate
	- Validation before advice
	- Clear crisis handoff when needed
	Examples of good responses:
	1. "That sounds really overwhelming. Can you tell me more about what's been happening?"
	2. "I'm hearing a lot of pain in what you're sharing. Have you talked to anyone about this?"
	3. "This seems really important. Let's focus on how you're feeling right now."
	</s>"""

	def load_model():
	"""
	Loads the fine-tuned model and tokenizer with optimizations for memory and performance.
	Returns:
	model: The loaded Hugging Face model.
	tokenizer: The corresponding tokenizer.
	device: The device (CPU/GPU) the model is loaded on.
	"""
	print(f"🔍 Loading model from: {MODEL_PATH}")

	# 1. Load Tokenizer
	tokenizer = AutoTokenizer.from_pretrained(
	MODEL_PATH,
	cache_dir="./cache", # Cache directory for faster reloads
	use_fast=True, # Use the fast tokenizer for better performance
	padding_side="left" # Ensure padding is consistent for generation
	)

	# 2. Load Model with Memory Optimization
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_PATH,
	cache_dir="./cache",
	trust_remote_code=True, # Allow custom model code
	torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, # Use FP16 on GPU
	device_map="auto", # Automatically map model to available devices
	load_in_4bit=True if torch.cuda.is_available() else False # Quantize to 4-bit on GPU
	)

	# 3. Set Device
	device = "cuda" if torch.cuda.is_available() else "cpu"
	model.to(device)

	print("✅ Model successfully loaded.")
	return model, tokenizer, device

	# Test the loader
	if __name__ == "__main__":
	model, tokenizer, device = load_model()
	print("Model and tokenizer successfully loaded.")