Spaces:

cobramv12
/

image-processor-v2

Runtime error

File size: 4,543 Bytes

d66d21f
 
 
b552fde
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ae7c5d
 
b552fde
0c871a6
fe964d2
 
726aa0c
fe964d2
d66d21f
b70cde5
 
 
c52be34
fe964d2
b70cde5
 
726aa0c
 
 
6f96683
b70cde5
726aa0c
 
c52be34
726aa0c
fe964d2
b70cde5
 
726aa0c
 
 
 
 
fe964d2
0c5ac88
6f96683
 
726aa0c
0c5ac88
 
726aa0c
fe964d2
 
6f96683
fe964d2
b70cde5
726aa0c
0c5ac88
b70cde5
 
fe964d2
 
 
 
726aa0c
fe964d2
 
6f96683
 
fe964d2
c52be34
0c871a6
fe964d2
4282ad2
c52be34
726aa0c
6f96683
fe964d2
6f96683
 
726aa0c
c52be34
726aa0c
fe964d2
c52be34
fe964d2
 
c52be34
 
 
726aa0c
c52be34
726aa0c
0c871a6
b552fde

import sys
import os

# --- PARCHE DE COMPATIBILIDAD INVESTIGADO (CRÍTICO) ---
# Este parche corrige el bug de 'bool' en gradio_client que afecta a Gradio 5
try:
    import gradio_client.utils as client_utils
    old_get_type = client_utils.get_type
    def new_get_type(schema):
        if isinstance(schema, bool): return "Any"
        return old_get_type(schema)
    client_utils.get_type = new_get_type
    
    old_json_to_python = client_utils._json_schema_to_python_type
    def new_json_to_python(schema, defs=None):
        if isinstance(schema, bool): return "Any"
        return old_json_to_python(schema, defs)
    client_utils._json_schema_to_python_type = new_json_to_python
except Exception as e:
    print(f"Aviso: No se pudo aplicar el parche de gradio_client: {e}")
# ------------------------------------------------------

import spaces
import gradio as gr
import torch
import numpy as np
from PIL import Image
import tempfile

# CONFIG
BASE_MODEL = "cyberdelia/CyberRealisticPony"
LTX_MODEL  = "Lightricks/LTX-Video"
LTX_NSFW_LORA = "Lora-Daddy/Ltx2.3-real-nudity-early-alpha-30k-steps"
NEG_DEFAULT = "blurry, low quality, bad anatomy, deformed, ugly, watermark, text"

def load_t2i(lora_id=None, lora_scale=1.0):
    from diffusers import StableDiffusionXLPipeline
    pipe = StableDiffusionXLPipeline.from_pretrained(
        BASE_MODEL, torch_dtype=torch.float16, use_safetensors=True, variant="fp16"
    )
    if lora_id and len(lora_id.strip()) > 5:
        try:
            pipe.load_lora_weights(lora_id.strip())
            pipe.fuse_lora(lora_scale=lora_scale)
        except: pass
    return pipe

def load_video():
    from diffusers import LTXPipeline
    pipe = LTXPipeline.from_pretrained(LTX_MODEL, torch_dtype=torch.bfloat16)
    try:
        pipe.load_lora_weights(LTX_NSFW_LORA)
    except: pass
    return pipe

@spaces.GPU(duration=100)
def generate_t2i(prompt, neg, lora_id, lora_scale, w, h):
    pipe = load_t2i(lora_id, lora_scale).to("cuda")
    img = pipe(prompt=prompt, negative_prompt=neg, num_inference_steps=30, 
               guidance_scale=7.0, width=int(w), height=int(h), 
               generator=torch.Generator("cuda").manual_seed(42)).images[0]
    return img

@spaces.GPU(duration=200)
def generate_video(prompt, init_image, lora_scale):
    from diffusers.utils import export_to_video
    pipe = load_video().to("cuda")
    kwargs = {"prompt": prompt, "negative_prompt": NEG_DEFAULT, "num_frames": 49, 
              "num_inference_steps": 30, "generator": torch.Generator("cuda").manual_seed(42)}
    if init_image is not None:
        kwargs["image"] = Image.fromarray(init_image).convert("RGB").resize((768, 512))
    if lora_scale > 0:
        kwargs["cross_attention_kwargs"] = {"scale": lora_scale}
    output = pipe(**kwargs)
    tmp = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
    export_to_video(output.frames[0], tmp.name, fps=24)
    return tmp.name

with gr.Blocks(title="Image Utility v2.1") as demo:
    gr.HTML("<h1 style='text-align:center;'>🛠 Image Processing Utility v2.1.4</h1>")
    with gr.Tabs():
        with gr.Tab("D-Processor (T2I)"):
            with gr.Row():
                with gr.Column():
                    t2i_p = gr.Textbox(label="Input Data String", lines=3)
                    t2i_n = gr.Textbox(label="Excluded Data", value=NEG_DEFAULT)
                    t2i_lora = gr.Textbox(label="Extension ID")
                    t2i_ls = gr.Slider(0, 1.5, 0.8, label="Extension Weight")
                    with gr.Row():
                        t2i_w = gr.Slider(512, 1024, 1024, step=64, label="X-Axis")
                        t2i_h = gr.Slider(512, 1024, 1024, step=64, label="Y-Axis")
                    t2i_btn = gr.Button("Execute Process")
                t2i_out = gr.Image(label="Output Preview")
            t2i_btn.click(generate_t2i, [t2i_p, t2i_n, t2i_lora, t2i_ls, t2i_w, t2i_h], t2i_out)

        with gr.Tab("M-Sequence (Video)"):
            with gr.Row():
                with gr.Column():
                    v_p = gr.Textbox(label="Motion Vector String", lines=3)
                    v_img = gr.Image(label="Source Buffer", type="numpy")
                    v_ls = gr.Slider(0, 1.5, 0.8, label="Motion Weight")
                    v_btn = gr.Button("Process Sequence")
                v_out = gr.Video(label="Sequence Output")
            v_btn.click(generate_video, [v_p, v_img, v_ls], v_out)

# Al usar SDK nativo, no definimos server_name ni port. HuggingFace lo inyecta solo.
demo.launch(show_api=False)