| |
|
|
| import sys |
| import os |
| import time |
| import glob |
| import gc |
| import torch |
| import subprocess |
| import random |
| import argparse |
| import shutil |
| from typing import Sequence, Mapping, Any, Union |
|
|
| |
|
|
| def to_bool(s: str) -> bool: |
| return s.lower() in ['true', '1', 't', 'y', 'yes', 'on'] |
|
|
| def clear_memory(): |
| """VRAM ๋ฐ RAM ์บ์๋ฅผ ์ ๋ฆฌํฉ๋๋ค.""" |
| if torch.cuda.is_available(): |
| torch.cuda.empty_cache() |
| torch.cuda.ipc_collect() |
| gc.collect() |
|
|
| COMFYUI_BASE_PATH = '/content/ComfyUI' |
|
|
| def get_value_at_index(obj: Union[Sequence, Mapping], index: int) -> Any: |
| """ ComfyUI ๋
ธ๋ ์ถ๋ ฅ์์ ๊ฐ์ ์์ ํ๊ฒ ๊ฐ์ ธ์ต๋๋ค. """ |
| try: |
| return obj[index] |
| except (KeyError, TypeError): |
| if isinstance(obj, dict) and "result" in obj: |
| return obj["result"][index] |
| raise |
|
|
| def add_comfyui_directory_to_sys_path() -> None: |
| """ ComfyUI ๊ฒฝ๋ก๋ฅผ sys.path์ ์ถ๊ฐํฉ๋๋ค. """ |
| if os.path.isdir(COMFYUI_BASE_PATH) and COMFYUI_BASE_PATH not in sys.path: |
| sys.path.append(COMFYUI_BASE_PATH) |
| print(f"'{COMFYUI_BASE_PATH}' added to sys.path") |
|
|
| def import_custom_nodes() -> None: |
| """ |
| ComfyUI ์ปค์คํ
๋
ธ๋๋ฅผ ๋ก๋ํ๊ธฐ ์ํด ๋น๋๊ธฐ ํ๊ฒฝ์ ์ด๊ธฐํํฉ๋๋ค. |
| (I2V ์คํฌ๋ฆฝํธ์ import_custom_nodes์ ๋์ผ) |
| """ |
| try: |
| import nest_asyncio |
| nest_asyncio.apply() |
| except ImportError: |
| print("nest_asyncio not found, installing...") |
| try: |
| subprocess.run([sys.executable, "-m", "pip", "install", "-q", "nest_asyncio"], check=True) |
| import nest_asyncio |
| nest_asyncio.apply() |
| print("nest_asyncio installed and applied.") |
| except Exception as e: |
| print(f"Failed to install or apply nest_asyncio: {e}") |
|
|
| import asyncio, execution, server |
| from nodes import init_extra_nodes |
| try: |
| loop = asyncio.get_event_loop() |
| if loop.is_closed(): |
| loop = asyncio.new_event_loop() |
| asyncio.set_event_loop(loop) |
| except RuntimeError: |
| loop = asyncio.new_event_loop() |
| asyncio.set_event_loop(loop) |
|
|
| server_instance = server.PromptServer(loop) |
| execution.PromptQueue(server_instance) |
|
|
| if not loop.is_running(): |
| try: |
| loop.run_until_complete(init_extra_nodes()) |
| except RuntimeError as e: |
| print(f"Note: Could not run init_extra_nodes synchronously: {e}") |
| try: asyncio.ensure_future(init_extra_nodes()) |
| except Exception as fut_e: print(f"Error trying async init_extra_nodes: {fut_e}") |
| else: |
| try: asyncio.ensure_future(init_extra_nodes()) |
| except Exception as fut_e: print(f"Error trying async init_extra_nodes on running loop: {fut_e}") |
|
|
|
|
| |
|
|
| def parse_args(): |
| parser = argparse.ArgumentParser(description="ComfyUI V2A (Video-to-Audio) Generation Script") |
|
|
| |
| parser.add_argument("--input_video_path", type=str, required=True, help="์ค๋์ค๋ฅผ ์์ฑํ ์
๋ ฅ ๋น๋์ค ํ์ผ ๊ฒฝ๋ก") |
| parser.add_argument("--prompt", type=str, default="") |
| parser.add_argument("--negative_prompt", type=str, default="") |
|
|
| |
| parser.add_argument("--steps", type=int, default=25) |
| parser.add_argument("--cfg", type=float, default=4.5) |
| parser.add_argument("--seed", type=int, default=-1) |
| parser.add_argument("--mask_away_clip", type=str, default="off") |
| parser.add_argument("--force_offload", type=str, default="off") |
|
|
| |
| parser.add_argument("--mmaudio_model", type=str, default="mmaudio_large_44k_v2_fp16.safetensors") |
| parser.add_argument("--base_precision", type=str, default="fp16") |
| parser.add_argument("--vae_model", type=str, default="mmaudio_vae_44k_fp16.safetensors") |
| parser.add_argument("--synchformer_model", type=str, default="mmaudio_synchformer_fp16.safetensors") |
| parser.add_argument("--clip_model", type=str, default="apple_DFN5B-CLIP-ViT-H-14-384_fp16.safetensors") |
| parser.add_argument("--mode", type=str, default="44k") |
| parser.add_argument("--precision", type=str, default="fp16", help="Feature Utils Precision") |
|
|
| |
| parser.add_argument("--force_rate", type=int, default=0) |
| parser.add_argument("--custom_width", type=int, default=0) |
| parser.add_argument("--custom_height", type=int, default=0) |
| parser.add_argument("--frame_load_cap", type=int, default=0) |
| parser.add_argument("--skip_first_frames", type=int, default=0) |
| parser.add_argument("--select_every_nth", type=int, default=1) |
| parser.add_argument("--load_format", type=str, default="AnimateDiff") |
|
|
| |
| parser.add_argument("--loop_count", type=int, default=0) |
| parser.add_argument("--filename_prefix", type=str, default="MMaudio") |
| parser.add_argument("--combine_format", type=str, default="video/h264-mp4") |
| parser.add_argument("--pix_fmt", type=str, default="yuv420p") |
| parser.add_argument("--crf", type=int, default=19) |
| parser.add_argument("--save_metadata", type=str, default="on") |
| parser.add_argument("--trim_to_audio", type=str, default="off") |
| parser.add_argument("--pingpong", type=str, default="off") |
|
|
| return parser.parse_args() |
|
|
|
|
| |
|
|
| |
|
|
| def main(): |
| args = parse_args() |
| print("๐ V2A ์ค๋์ค ์์ฑ์ ์์ํฉ๋๋ค (VRAM Optimized)...") |
|
|
| |
| add_comfyui_directory_to_sys_path() |
|
|
| try: |
| from utils.extra_config import load_extra_path_config |
| except ImportError: |
| print("โ ๏ธ ComfyUI์ extra_model_paths.yaml ๋ก๋ฉ ์คํจ (๋ฌด์ํ๊ณ ์งํ)") |
| load_extra_path_config = lambda x: None |
|
|
| extra_model_paths_file = os.path.join(COMFYUI_BASE_PATH, "extra_model_paths.yaml") |
| if os.path.exists(extra_model_paths_file): |
| load_extra_path_config(extra_model_paths_file) |
|
|
| print("ComfyUI ์ปค์คํ
๋
ธ๋ ์ด๊ธฐํ ์ค...") |
| import_custom_nodes() |
| from nodes import NODE_CLASS_MAPPINGS |
| print("์ปค์คํ
๋
ธ๋ ์ด๊ธฐํ ์๋ฃ.") |
|
|
| |
| mmaudiomodelloader = NODE_CLASS_MAPPINGS["MMAudioModelLoader"]() |
| vhs_loadvideo = NODE_CLASS_MAPPINGS["VHS_LoadVideo"]() |
| mmaudiofeatureutilsloader = NODE_CLASS_MAPPINGS["MMAudioFeatureUtilsLoader"]() |
| vhs_videoinfo = NODE_CLASS_MAPPINGS["VHS_VideoInfo"]() |
| mmaudiosampler = NODE_CLASS_MAPPINGS["MMAudioSampler"]() |
| vhs_videocombine = NODE_CLASS_MAPPINGS["VHS_VideoCombine"]() |
|
|
| |
| if args.seed == -1: |
| final_seed = random.randint(1, 2**64) |
| print(f" - ๋๋ค ์๋ ์์ฑ: {final_seed}") |
| else: |
| final_seed = args.seed |
| print(f" - ๊ณ ์ ์๋ ์ฌ์ฉ: {final_seed}") |
|
|
| |
| with torch.inference_mode(): |
|
|
| |
| print(f"\n1๋จ๊ณ: ์ค๋์ค ์์ฑ์ ์ํ ๋น๋์ค ๋ก๋ (25 FPS ๊ฐ์ )... ({args.input_video_path})") |
| vhs_loadvideo_91_audio = vhs_loadvideo.load_video( |
| video=args.input_video_path, |
| force_rate=25, |
| custom_width=args.custom_width, |
| custom_height=args.custom_height, |
| frame_load_cap=args.frame_load_cap, |
| skip_first_frames=args.skip_first_frames, |
| select_every_nth=args.select_every_nth, |
| format=args.load_format, |
| unique_id=random.randint(1, 2**64) |
| ) |
| images_for_audio = get_value_at_index(vhs_loadvideo_91_audio, 0) |
|
|
| |
| vhs_videoinfo_105 = vhs_videoinfo.get_video_info( |
| video_info=get_value_at_index(vhs_loadvideo_91_audio, 3) |
| ) |
| del vhs_loadvideo_91_audio |
|
|
| duration = get_value_at_index(vhs_videoinfo_105, 7) |
| original_frame_rate = get_value_at_index(vhs_videoinfo_105, 0) |
| print(f" - ๋น๋์ค ์ ๋ณด: {duration}์ด, ์๋ณธ {original_frame_rate} FPS") |
| clear_memory() |
|
|
| |
| print(f"\n2๋จ๊ณ: ์ค๋์ค ๋ชจ๋ธ ๋ก๋ฉ ์ค...") |
| print(f" - MMAudio ๋ชจ๋ธ: {args.mmaudio_model} ({args.base_precision})") |
| mmaudiomodelloader_85 = mmaudiomodelloader.loadmodel( |
| mmaudio_model=args.mmaudio_model, |
| base_precision=args.base_precision |
| ) |
| mmaudio_model = get_value_at_index(mmaudiomodelloader_85, 0) |
|
|
| print(f" - ์ ํธ๋ฆฌํฐ ๋ชจ๋ธ: (Mode: {args.mode}, Precision: {args.precision})") |
| mmaudiofeatureutilsloader_102 = mmaudiofeatureutilsloader.loadmodel( |
| vae_model=args.vae_model, |
| synchformer_model=args.synchformer_model, |
| clip_model=args.clip_model, |
| mode=args.mode, |
| precision=args.precision |
| ) |
| feature_utils = get_value_at_index(mmaudiofeatureutilsloader_102, 0) |
|
|
| |
| print(f"\n3๋จ๊ณ: ์ค๋์ค ์์ฑ ์ค... (Steps: {args.steps}, CFG: {args.cfg})") |
| mmaudiosampler_92 = mmaudiosampler.sample( |
| duration=duration, |
| steps=args.steps, |
| cfg=args.cfg, |
| seed=final_seed, |
| prompt=args.prompt, |
| negative_prompt=args.negative_prompt, |
| mask_away_clip=to_bool(args.mask_away_clip), |
| force_offload=to_bool(args.force_offload), |
| mmaudio_model=mmaudio_model, |
| feature_utils=feature_utils, |
| images=images_for_audio |
| ) |
| generated_audio = get_value_at_index(mmaudiosampler_92, 0) |
|
|
| |
| print(f"\n4๋จ๊ณ: ๋ชจ๋ธ ๋ฐ ์ค๋์ค์ฉ ์ด๋ฏธ์ง ๋ฉ๋ชจ๋ฆฌ ํด์ ์ค...") |
| del mmaudiomodelloader_85, mmaudio_model, mmaudiofeatureutilsloader_102, feature_utils |
| del images_for_audio |
| clear_memory() |
|
|
| |
| print(f"\n5๋จ๊ณ: ๋น๋์ค ๊ฒฐํฉ์ ์ํ ์๋ณธ ๋น๋์ค ๋ก๋ (์ฌ์ฉ์ ์ค์ FPS: {args.force_rate})...") |
| vhs_loadvideo_91_combine = vhs_loadvideo.load_video( |
| video=args.input_video_path, |
| force_rate=args.force_rate, |
| custom_width=args.custom_width, |
| custom_height=args.custom_height, |
| frame_load_cap=args.frame_load_cap, |
| skip_first_frames=args.skip_first_frames, |
| select_every_nth=args.select_every_nth, |
| format=args.load_format, |
| unique_id=random.randint(1, 2**64) |
| ) |
| images_for_combine = get_value_at_index(vhs_loadvideo_91_combine, 0) |
| del vhs_loadvideo_91_combine |
| clear_memory() |
|
|
| |
| print(f"\n6๋จ๊ณ: ๋น๋์ค + ์ค๋์ค ๊ฒฐํฉ ๋ฐ ์ ์ฅ ์ค...") |
| timestamp = time.strftime("%Y%m%d-%H%M%S") |
| final_filename_prefix = f"{args.filename_prefix}_{timestamp}" |
|
|
| vhs_videocombine_97 = vhs_videocombine.combine_video( |
| frame_rate=original_frame_rate, |
| loop_count=args.loop_count, |
| filename_prefix=final_filename_prefix, |
| format=args.combine_format, |
| pix_fmt=args.pix_fmt, |
| crf=args.crf, |
| save_metadata=to_bool(args.save_metadata), |
| trim_to_audio=to_bool(args.trim_to_audio), |
| pingpong=to_bool(args.pingpong), |
| save_output=True, |
| images=images_for_combine, |
| audio=generated_audio, |
| unique_id=random.randint(1, 2**64) |
| ) |
|
|
| |
| del images_for_combine, generated_audio |
| clear_memory() |
|
|
| |
| try: |
| |
| |
| file_path_list = vhs_videocombine_97['result'][0][1] |
| final_video_path = file_path_list[2] |
| except Exception as e: |
| print(f"โ [์ค๋ฅ] ์ต์ข
ํ์ผ ๊ฒฝ๋ก๋ฅผ ์ถ์ถํ๋ ๋ฐ ์คํจํ์ต๋๋ค: {e}") |
| print(f" - ์ ์ฒด ๋ฐํ๊ฐ: {vhs_videocombine_97}") |
| final_video_path = None |
|
|
| if final_video_path and os.path.exists(final_video_path): |
| print(f"โ
์ค๋์ค ์์ฑ ๋ฐ ๊ฒฐํฉ ์๋ฃ!") |
| print(f"LATEST_VIDEO_PATH:{final_video_path}") |
|
|
| |
| base, ext = os.path.splitext(final_video_path) |
| original_copy_path = f"{base}_original{ext}" |
| try: |
| shutil.copy2(final_video_path, original_copy_path) |
| print(f"โ
์๋ณธ ๋ณต์ฌ๋ณธ ์์ฑ ์๋ฃ: {original_copy_path}") |
| print(f"ORIGINAL_COPY_PATH:{original_copy_path}") |
| except Exception as e: |
| print(f"โ ์๋ณธ ๋ณต์ฌ๋ณธ ์์ฑ ์คํจ: {e}") |
| else: |
| print(f"โ ์ต์ข
๋น๋์ค ํ์ผ ๊ฒฝ๋ก๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค.") |
|
|
| if __name__ == "__main__": |
| main() |
|
|