import gradio as gr
import torch
import numpy as np
from PIL import Image
from diffusers import StableVideoDiffusionPipeline
from diffusers.utils import export_to_video

device = "cuda" if torch.cuda.is_available() else "cpu"

pipe = None


# ----------------------------
# Load model (lazy loading)
# ----------------------------
def load_model():
    global pipe
    if pipe is None:
        pipe = StableVideoDiffusionPipeline.from_pretrained(
            "stabilityai/stable-video-diffusion-img2vid-xt",
            torch_dtype=torch.float16 if device == "cuda" else torch.float32,
        )

        if device == "cuda":
            pipe.to(device)
            pipe.enable_model_cpu_offload()
            pipe.unet.enable_forward_chunking()

        pipe.enable_attention_slicing()

    return pipe


# ----------------------------
# Resize helper
# ----------------------------
def resize_image(image, size=(576, 1024)):
    return image.resize(size)


# ----------------------------
# Interpolation function
# ----------------------------
def generate_video(
    start_image,
    end_image,
    num_frames,
    fps,
    motion_bucket_id,
    seed,
):
    if start_image is None or end_image is None:
        return None, "Please upload both start and end images."

    pipe = load_model()

    generator = torch.manual_seed(int(seed))

    start = resize_image(start_image)
    end = resize_image(end_image)

    # simple blending (basic interpolation conditioning)
    blend = Image.blend(start, end, alpha=0.5)

    frames = pipe(
        blend,
        num_frames=int(num_frames),
        motion_bucket_id=int(motion_bucket_id),
        generator=generator,
        decode_chunk_size=1,  # low VRAM
    ).frames[0]

    video_path = export_to_video(frames, fps=int(fps))

    return video_path, "✅ Done!"


# ----------------------------
# UI
# ----------------------------
with gr.Blocks(title="SVD Keyframe Interpolation") as demo:

    gr.Markdown(
        """
        # 🎥 SVD Keyframe Interpolation
        Generate smooth video between two images using Stable Video Diffusion.

        Upload a start and end frame → generate motion between them.
        """
    )

    with gr.Row():
        start_image = gr.Image(label="Start Image", type="pil")
        end_image = gr.Image(label="End Image", type="pil")

    with gr.Row():
        num_frames = gr.Slider(8, 32, value=16, step=1, label="Number of Frames")
        fps = gr.Slider(4, 24, value=8, step=1, label="FPS")

    with gr.Row():
        motion_bucket_id = gr.Slider(1, 255, value=127, step=1, label="Motion Strength")
        seed = gr.Number(value=42, label="Seed")

    run_btn = gr.Button("🚀 Generate Video")

    with gr.Row():
        output_video = gr.Video(label="Output Video")
        status = gr.Textbox(label="Status")

    run_btn.click(
        fn=generate_video,
        inputs=[
            start_image,
            end_image,
            num_frames,
            fps,
            motion_bucket_id,
            seed,
        ],
        outputs=[output_video, status],
    )

demo.queue().launch()