egrpo / scripts /evaluation /test_pickscore_score.py

Add files using upload-large-folder tool

b171568 verified 3 months ago

2.95 kB

	from hpsv2.src.open_clip import create_model_and_transforms, get_tokenizer
	import torch
	from torchvision import transforms
	from PIL import Image
	import os
	from tqdm import tqdm
	from torch.nn import functional as F
	from open_clip import create_model_from_pretrained, get_tokenizer
	from transformers import AutoProcessor, AutoModel

	def initialize_model():
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model_dict = {}

	process_path = "ckpt/CLIP-ViT-H-14-laion2B-s32B-b79K"
	# download from https://huggingface.co/yuvalkirstain/PickScore_v1
	model_path = "ckpt/PickScore_v1"

	processor = AutoProcessor.from_pretrained(process_path)
	reward_model = AutoModel.from_pretrained(model_path)
	reward_model.to(device).eval()

	model_dict['model'] = reward_model
	model_dict['preprocess_val'] = processor

	return model_dict, device

	def load_images_from_folder(folder):
	images = []
	filenames = []
	for filename in os.listdir(folder):
	if filename.endswith(".png"):
	img_path = os.path.join(folder, filename)
	image = Image.open(img_path).convert("RGB")
	images.append(image)
	filenames.append(filename)
	return images, filenames

	def main():
	model_dict, device = initialize_model()
	model = model_dict['model']
	preprocess_val = model_dict['preprocess_val']

	tokenizer = get_tokenizer('ViT-H-14')
	reward_model = model.to(device)
	reward_model.eval()

	img_folder = "IMAGE_SAVE_FOLDER"
	images, filenames = load_images_from_folder(img_folder)

	eval_rewards = []
	with torch.no_grad():
	for image_pil, filename in tqdm(zip(images, filenames), total=400):

	image_inputs = preprocess_val(
	images=[image_pil],
	padding=True,
	truncation=True,
	max_length=77,
	return_tensors="pt",
	).to(device)

	prompt = os.path.splitext(filename)[0] # 剔除文件扩展名

	text_inputs = preprocess_val(
	text=prompt,
	padding=True,
	truncation=True,
	max_length=77,
	return_tensors="pt",
	).to(device)

	# Get embeddings
	image_embs = reward_model.get_image_features(**image_inputs)
	image_embs = image_embs / torch.norm(image_embs, dim=-1, keepdim=True)

	text_embs = reward_model.get_text_features(**text_inputs)
	text_embs = text_embs / torch.norm(text_embs, dim=-1, keepdim=True)

	# Calculate scores
	score = reward_model.logit_scale.exp() * (text_embs @ image_embs.T)[0]
	eval_rewards.append(score.item())

	avg_reward = sum(eval_rewards) / len(eval_rewards) if eval_rewards else 0
	print(f"Average pickscore score: {avg_reward:.4f}")

	if __name__ == "__main__":
	main()