VARestorer / tools /prompt_rewriter.py

add HF model card and mirror runnable codebase

7f7272e 15 days ago

6.51 kB

	import json
	import os
	import os.path as osp
	import argparse
	import sys
	import re
	import http.client
	import base64
	import tqdm
	import random
	import traceback
	import time
	from io import BytesIO

	from PIL import Image
	import openai

	from conf import GPT_AK


	def encode_image(image_path, size=(512, 512)):
	"""
	Resize an image and encode it as a Base64 string.

	Args:
	- image_path (str): Path to the image file.
	- size (tuple): New size as a tuple, (width, height).

	Returns:
	- str: Base64 encoded string of the resized image.
	"""
	if size is None:
	with open(image_path, "rb") as image_file:
	return base64.b64encode(image_file.read()).decode("utf-8")

	with Image.open(image_path) as img:
	img_resized = img.resize(size, Image.ANTIALIAS)
	img_buffer = BytesIO()
	img_resized.save(img_buffer, format=img.format)
	img_buffer.seek(0)
	return base64.b64encode(img_buffer.read()).decode("utf-8")


	SYSTEM = """
	You are part of a team of bots that creates images. You work with an assistant bot that will draw anything you say.
	For example, outputting the prompt and parameters like "<prompt:a beautiful morning in the woods with the sun peaking through the trees><cfg:3>" will trigger your partner bot to output an image of a forest morning, as described.
	You will be prompted by users looking to create detailed, amazing images. The way to accomplish this is to refine their short prompts and make them extremely detailed and descriptive.
	- You will only ever output a single image description sentence per user request.
	- Each image description sentence should be consist of "<prompt:xxx><cfg:xxx>", where <prompt:xxx> is the image description, <cfg:xxx> is the parameter that control the image generation.
	Here are the guidelines to generate image description <prompt:xxx> :
	- Refine users' prompts and make them extremely detailed and descriptive but keep the meaning unchanged (very important).
	- For particularly long users' prompts (>50 words), they can be outputted directly without refining. Image descriptions must be between 8-512 words. Extra words will be ignored.
	- If the user's prompt requires rendering text, enclose the text with single quotation marks and prefix it with "the text".
	Here are the guidelines to set <cfg:xxx> :
	- Please first determine whether the image to be generated based on the user prompt is likely to contain a clear face. If it does, set <cfg:1>; if not, set <cfg:3>.
	"""

	FEW_SHOT_HISTORY = [
	{"role": "user", "content": "a tree"},
	{"role": "assistant", "content": "<prompt:A photo of a majestic oak tree stands proudly in the middle of a sunlit meadow, its branches stretching out like welcoming arms. The leaves shimmer in shades of vibrant green, casting dappled shadows on the soft grass below.><cfg:3>"},
	{"role": "user", "content": "a young girl with red hair"},
	{"role": "assistant", "content": "<prompt:A young girl with vibrant red hair, close-up face, in the style of hyper-realistic portraiture, warm and inviting atmosphere, soft lighting, freckles, vintage effect><cfg:1>"},
	{"role": "user", "content": "a man, close-up"},
	{"role": "assistant", "content": "<prompt:close-up portrait of a young man with freckles and curly hair, in the style of chiaroscuro, strong light and shadow contrast, intense gaze, background fades into darkness><cfg:1>"},
	{"role": "user", "content": "Generate Never Stop Learning"},
	{"role": "assistant", "content": "<prompt:Generate an image with the text 'Never Stop Learning' in chalkboard style.><cfg:3>"},
	]

	class PromptRewriter(object):
	def __init__(self, system, few_shot_history):
	if not system:
	system = SYSTEM
	if not len(few_shot_history):
	few_shot_history = FEW_SHOT_HISTORY
	self.system = [{"role": "system", "content": system}]
	self.few_shot_history = few_shot_history

	def rewrite(self, prompt):
	messages = self.system + self.few_shot_history + [{"role": "user", "content": prompt}]
	result, _ = get_gpt_result(model_name='gpt-4o-2024-08-06', messages=messages, retry=5, ak=GPT_AK, return_json=False)
	assert result
	return result


	def get_gpt_result(model_name='gpt-4o-2024-05-13', messages=None, retry=5, ak=None, return_json=False):
	"""
	Retrieves a chat response using the GPT-4 model.
	Args:
	model_name (str, optional): The name of the GPT model to use. Defaults to 'gpt-4'. [gpt-3.5-turbo, gpt-4]
	retry (int, optional): The number of times to retry the chat API if there is an error. Defaults to 5.
	Returns:
	tuple: A tuple containing the chat response content (str) and the API usage (dict).
	Raises:
	Exception: If there is an error retrieving the chat response.
	"""
	openai_ak = ak
	client = openai.AzureOpenAI(
	azure_endpoint="https://search-va.byteintl.net/gpt/openapi/online/multimodal/crawl",
	api_version="2023-07-01-preview",
	api_key=openai_ak
	)
	for i in range(retry):
	try:
	if return_json:
	completion = client.chat.completions.create(
	model=model_name,
	messages=messages,
	response_format={ "type": "json_object" },
	)
	else:
	completion = client.chat.completions.create(
	model=model_name,
	messages=messages,
	)
	result = json.loads(completion.model_dump_json())['choices'][0]['message']['content']
	return result,None
	except Exception as e:
	traceback.print_exc()
	if isinstance(e,KeyboardInterrupt):
	exit(0)
	sleep_time = 10 + random.randint(2,5)**(i+1)
	time.sleep(sleep_time)
	return None, -1

	if __name__ == '__main__':
	times = 0
	prompt_list = []

	var_t2i_prompt_rewriter = PromptRewriter(system='', few_shot_history=[])

	prompt_list = [
	'a tree',
	'two dogs',
	'an oil painting of a house',
	'a Chinese model sits in the train. Magazine style',
	'two girls',
	'countryside',
	'a rabbit fights with a tiger',
	'a beach in Hawaii',
	]

	for prompt in prompt_list:
	times += 1
	result = var_t2i_prompt_rewriter.rewrite(prompt)
	print(f'prompt: {prompt}, result: {result}')