Spaces:

BestWishYsh
/

MagicTime

Running on Zero

App Files Files Community

MagicTime / app.py

BestWishYsh

Update app.py

ea4e66d verified about 1 month ago

raw

history blame contribute delete

14.1 kB

	import spaces
	import os
	import copy
	import time
	import torch
	import random
	import json # 新增：用于将字典转为字符串
	import gradio as gr
	from glob import glob
	from omegaconf import OmegaConf
	from safetensors import safe_open
	from diffusers import AutoencoderKL
	from diffusers import DDIMScheduler
	from diffusers.utils.import_utils import is_xformers_available
	from transformers import CLIPTextModel, CLIPTokenizer

	from utils.unet import UNet3DConditionModel
	from utils.pipeline_magictime import MagicTimePipeline
	from utils.util import save_videos_grid, convert_ldm_unet_checkpoint, convert_ldm_clip_checkpoint, convert_ldm_vae_checkpoint, load_diffusers_lora_unet, convert_ldm_clip_text_model

	from huggingface_hub import snapshot_download

	model_path = "ckpts"

	if not os.path.exists(model_path) or not os.path.exists(f"{model_path}/model_real_esran") or not os.path.exists(f"{model_path}/model_rife"):
	print("Model not found, downloading from Hugging Face...")
	snapshot_download(repo_id="BestWishYsh/MagicTime", local_dir=f"{model_path}")
	else:
	print(f"Model already exists in {model_path}, skipping download.")

	pretrained_model_path = f"{model_path}/Base_Model/stable-diffusion-v1-5"
	inference_config_path = "sample_configs/RealisticVision.yaml"
	magic_adapter_s_path = f"{model_path}/Magic_Weights/magic_adapter_s/magic_adapter_s.ckpt"
	magic_adapter_t_path = f"{model_path}/Magic_Weights/magic_adapter_t"
	magic_text_encoder_path = f"{model_path}/Magic_Weights/magic_text_encoder"

	css = """
	.toolbutton {
	margin-buttom: 0em 0em 0em 0em;
	max-width: 2.5em;
	min-width: 2.5em !important;
	height: 2.5em;
	}
	"""

	examples = [
	# 1-RealisticVision
	[
	"RealisticVisionV60B1_v51VAE.safetensors",
	"motion_module.ckpt",
	"Cherry blossoms transitioning from tightly closed buds to a peak state of bloom. The progression moves through stages of bud swelling, petal exposure, and gradual opening, culminating in a full and vibrant display of open blossoms.",
	"worst quality, low quality, letterboxed",
	512, 512, "1534851746"
	],
	# 2-RCNZ
	[
	"RcnzCartoon.safetensors",
	"motion_module.ckpt",
	"Time-lapse of a simple modern house's construction in a Minecraft virtual environment: beginning with an avatar laying a white foundation, progressing through wall erection and interior furnishing, to adding roof and exterior details, and completed with landscaping and a tall chimney.",
	"worst quality, low quality, letterboxed",
	512, 512, "3480796026"
	],
	# 3-ToonYou
	[
	"ToonYou_beta6.safetensors",
	"motion_module.ckpt",
	"Bean sprouts grow and mature from seeds.",
	"worst quality, low quality, letterboxed",
	512, 512, "1496541313"
	]
	]

	# clean Grdio cache
	print(f"### Cleaning cached examples ...")
	os.system(f"rm -rf gradio_cached_examples/")

	device = "cuda"

	def random_seed():
	return str(random.randint(1, 10**16))

	class MagicTimeController:
	def __init__(self):
	# config dirs
	self.basedir = os.getcwd()
	self.stable_diffusion_dir = os.path.join(self.basedir, model_path, "Base_Model")
	self.motion_module_dir = os.path.join(self.basedir, model_path, "Base_Model", "motion_module")
	self.personalized_model_dir = os.path.join(self.basedir, model_path, "DreamBooth")
	self.savedir = os.path.join(self.basedir, "outputs")
	os.makedirs(self.savedir, exist_ok=True)

	self.dreambooth_list = []
	self.motion_module_list = []

	self.selected_dreambooth = None
	self.selected_motion_module = None

	self.refresh_motion_module()
	self.refresh_personalized_model()

	# config models
	self.inference_config = OmegaConf.load(inference_config_path)[1]

	self.tokenizer = CLIPTokenizer.from_pretrained(pretrained_model_path, subfolder="tokenizer")
	self.text_encoder = CLIPTextModel.from_pretrained(pretrained_model_path, subfolder="text_encoder").to(device)
	self.vae = AutoencoderKL.from_pretrained(pretrained_model_path, subfolder="vae").to(device)
	self.unet = UNet3DConditionModel.from_pretrained_2d(pretrained_model_path, subfolder="unet", unet_additional_kwargs=OmegaConf.to_container(self.inference_config.unet_additional_kwargs)).to(device)
	self.text_model = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14")
	self.unet_model = UNet3DConditionModel.from_pretrained_2d(pretrained_model_path, subfolder="unet", unet_additional_kwargs=OmegaConf.to_container(self.inference_config.unet_additional_kwargs))

	self.update_motion_module(self.motion_module_list[0])
	self.update_motion_module_2(self.motion_module_list[0])
	self.update_dreambooth(self.dreambooth_list[0])

	def refresh_motion_module(self):
	motion_module_list = glob(os.path.join(self.motion_module_dir, "*.ckpt"))
	self.motion_module_list = [os.path.basename(p) for p in motion_module_list]

	def refresh_personalized_model(self):
	dreambooth_list = glob(os.path.join(self.personalized_model_dir, "*.safetensors"))
	self.dreambooth_list = [os.path.basename(p) for p in dreambooth_list]

	def update_dreambooth(self, dreambooth_dropdown, motion_module_dropdown=None):
	self.selected_dreambooth = dreambooth_dropdown

	dreambooth_dropdown = os.path.join(self.personalized_model_dir, dreambooth_dropdown)
	dreambooth_state_dict = {}
	with safe_open(dreambooth_dropdown, framework="pt", device="cpu") as f:
	for key in f.keys(): dreambooth_state_dict[key] = f.get_tensor(key)

	converted_vae_checkpoint = convert_ldm_vae_checkpoint(dreambooth_state_dict, self.vae.config)
	self.vae.load_state_dict(converted_vae_checkpoint)

	del self.unet
	self.unet = None
	torch.cuda.empty_cache()
	time.sleep(1)
	converted_unet_checkpoint = convert_ldm_unet_checkpoint(dreambooth_state_dict, self.unet_model.config)
	self.unet = copy.deepcopy(self.unet_model)
	self.unet.load_state_dict(converted_unet_checkpoint, strict=False)

	del self.text_encoder
	self.text_encoder = None
	torch.cuda.empty_cache()
	time.sleep(1)
	text_model = copy.deepcopy(self.text_model)
	self.text_encoder = convert_ldm_clip_text_model(text_model, dreambooth_state_dict)

	from swift import Swift
	magic_adapter_s_state_dict = torch.load(magic_adapter_s_path, map_location="cpu")
	self.unet = load_diffusers_lora_unet(self.unet, magic_adapter_s_state_dict, alpha=1.0)
	self.unet = Swift.from_pretrained(self.unet, magic_adapter_t_path)
	self.text_encoder = Swift.from_pretrained(self.text_encoder, magic_text_encoder_path)

	def update_motion_module(self, motion_module_dropdown):
	self.selected_motion_module = motion_module_dropdown
	motion_module_dropdown = os.path.join(self.motion_module_dir, motion_module_dropdown)
	motion_module_state_dict = torch.load(motion_module_dropdown, map_location="cpu")
	_, unexpected = self.unet.load_state_dict(motion_module_state_dict, strict=False)
	assert len(unexpected) == 0

	def update_motion_module_2(self, motion_module_dropdown):
	self.selected_motion_module = motion_module_dropdown
	motion_module_dropdown = os.path.join(self.motion_module_dir, motion_module_dropdown)
	motion_module_state_dict = torch.load(motion_module_dropdown, map_location="cpu")
	_, unexpected = self.unet_model.load_state_dict(motion_module_state_dict, strict=False)
	assert len(unexpected) == 0

	@spaces.GPU(duration=120)
	def magictime(
	self,
	dreambooth_dropdown,
	motion_module_dropdown,
	prompt_textbox,
	negative_prompt_textbox,
	width_slider,
	height_slider,
	seed_textbox,
	):
	torch.cuda.empty_cache()
	time.sleep(1)

	if self.selected_motion_module != motion_module_dropdown: self.update_motion_module(motion_module_dropdown)
	if self.selected_motion_module != motion_module_dropdown: self.update_motion_module_2(motion_module_dropdown)
	if self.selected_dreambooth != dreambooth_dropdown: self.update_dreambooth(dreambooth_dropdown)

	while self.text_encoder is None or self.unet is None:
	self.update_dreambooth(dreambooth_dropdown, motion_module_dropdown)

	if is_xformers_available(): self.unet.enable_xformers_memory_efficient_attention()

	pipeline = MagicTimePipeline(
	vae=self.vae, text_encoder=self.text_encoder, tokenizer=self.tokenizer, unet=self.unet,
	scheduler=DDIMScheduler(**OmegaConf.to_container(self.inference_config.noise_scheduler_kwargs))
	).to(device)

	if int(seed_textbox) > 0: seed = int(seed_textbox)
	else: seed = int(random_seed())
	torch.manual_seed(seed)

	assert seed == torch.initial_seed()
	print(f"### seed: {seed}")

	generator = torch.Generator(device=device)
	generator.manual_seed(seed)

	sample = pipeline(
	prompt_textbox,
	negative_prompt = negative_prompt_textbox,
	num_inference_steps = 25,
	guidance_scale = 8.,
	width = width_slider,
	height = height_slider,
	video_length = 16,
	generator = generator,
	).videos

	save_sample_path = os.path.join(self.savedir, f"sample.mp4")
	save_videos_grid(sample, save_sample_path)

	json_config = {
	"prompt": prompt_textbox,
	"n_prompt": negative_prompt_textbox,
	"width": width_slider,
	"height": height_slider,
	"seed": seed,
	"dreambooth": dreambooth_dropdown,
	}

	# 修复：将字典序列化为 JSON 字符串
	json_config_str = json.dumps(json_config, indent=4)

	torch.cuda.empty_cache()
	time.sleep(1)

	# 修复：直接返回字符串以配合 gr.Code 组件
	return save_sample_path, json_config_str

	controller = MagicTimeController()

	def ui():
	with gr.Blocks(css=css) as demo:
	gr.HTML("""
	<div style='display: flex; align-items: center; justify-content: center; text-align: center;'>
	<img src='https://raw.githubusercontent.com/SHYuanBest/shyuanbest_media/main/MagicTime/MagicTime_logo.png' style='width: 200px; height: auto; margin-right: 10px;' />
	</div>
	""")
	gr.Markdown(
	"""
	<h2 align="center"> <a href="https://github.com/PKU-YuanGroup/MagicTime">[TPAMI 2025] MagicTime: Time-lapse Video Generation Models as Metamorphic Simulators</a></h2>
	<h5 style="text-align:left;">If you like our project, please give us a star ⭐ on GitHub for the latest update.</h5>

	[GitHub](https://github.com/PKU-YuanGroup/MagicTime) \| [arXiv](https://arxiv.org/abs/2404.05014) \| [Home Page](https://pku-yuangroup.github.io/MagicTime/) \| [Dataset](https://huggingface.co/datasets/BestWishYsh/ChronoMagic)
	"""
	)
	with gr.Row():
	with gr.Column():
	dreambooth_dropdown = gr.Dropdown(label="DreamBooth Model", choices=controller.dreambooth_list, value=controller.dreambooth_list[0], interactive=True)
	motion_module_dropdown = gr.Dropdown(label="Motion Module", choices=controller.motion_module_list, value=controller.motion_module_list[0], interactive=True)

	prompt_textbox = gr.Textbox(label="Prompt", lines=3)
	negative_prompt_textbox = gr.Textbox(label="Negative Prompt", lines=3, value="worst quality, low quality, nsfw, logo")

	with gr.Accordion("Advance", open=False):
	with gr.Row():
	width_slider = gr.Slider(label="Width", value=512, minimum=256, maximum=1024, step=64)
	height_slider = gr.Slider(label="Height", value=512, minimum=256, maximum=1024, step=64)
	with gr.Row():
	seed_textbox = gr.Textbox(label="Seed (-1 means random)", value="-1")
	seed_button = gr.Button(value="\U0001F3B2", elem_classes="toolbutton")
	seed_button.click(fn=random_seed, inputs=[], outputs=[seed_textbox])

	generate_button = gr.Button(value="Generate", variant='primary')

	with gr.Column():
	result_video = gr.Video(label="Generated Animation", interactive=False)

	# 修复：改用 gr.Code 接收字符串并以 JSON 格式高亮，绕过字典解析 Bug
	json_config = gr.Code(label="Config", language="json", interactive=False)

	inputs = [dreambooth_dropdown, motion_module_dropdown, prompt_textbox, negative_prompt_textbox, width_slider, height_slider, seed_textbox]
	outputs = [result_video, json_config]

	generate_button.click(fn=controller.magictime, inputs=inputs, outputs=outputs)

	gr.Markdown("""
	<h5 style="text-align:left;">⚠ Warning: Even if you use the same seed and prompt, changing machines may produce different results.
	If you find a better seed and prompt, please submit an issue on GitHub.</h5>
	""")

	gr.Examples(fn=controller.magictime, examples=examples, inputs=inputs, outputs=outputs, cache_examples=False)

	return demo

	if __name__ == "__main__":
	demo = ui()
	demo.queue(max_size=20)
	# 修复：在 HF Spaces 上去掉 share=True 避免警告和潜在的网络冲突
	demo.launch()