Spaces:

rbanfield
/

ControlNetV1.1

Runtime error

App Files Files Community

rbanfield commited on Sep 26, 2023

Commit

cc579f2

1 Parent(s): 727f0f1

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

README.md +2 -8
app.py +215 -0

README.md CHANGED Viewed

@@ -1,12 +1,6 @@
 ---
 title: ControlNetV1.1
-emoji: 😻
-colorFrom: gray
-colorTo: indigo
-sdk: gradio
-sdk_version: 3.44.4
 app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: ControlNetV1.1
 app_file: app.py
+sdk: gradio
+sdk_version: 3.42.0
 ---

app.py ADDED Viewed

	@@ -0,0 +1,215 @@

+#!/usr/bin/env python
+import cv2
+import numpy as np
+import torch
+import random
+import base64
+import json
+import threading
+import uuid
+import math
+import io
+from PIL import Image
+from diffusers import AutoencoderKL, StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler,StableDiffusionControlNetImg2ImgPipeline,StableDiffusionXLControlNetPipeline,DiffusionPipeline
+from diffusers.utils import load_image
+from transformers import pipeline
+import gradio as gr
+vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16)
+canny_controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny", torch_dtype=torch.float16)
+canny_pipe = StableDiffusionControlNetPipeline.from_pretrained(
+    "SG161222/Realistic_Vision_V3.0_VAE", controlnet=canny_controlnet, torch_dtype=torch.float16, use_safetensors=True
+)
+canny_controlnet_tile = ControlNetModel.from_pretrained("lllyasviel/control_v11f1e_sd15_tile", torch_dtype=torch.float16)
+canny_pipe_img2img = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
+    "SG161222/Realistic_Vision_V3.0_VAE", controlnet=canny_controlnet_tile, torch_dtype=torch.float16, use_safetensors=True
+)
+canny_pipe_img2img.enable_model_cpu_offload()
+canny_pipe_img2img.enable_xformers_memory_efficient_attention()
+canny_pipe.scheduler = UniPCMultistepScheduler.from_config(canny_pipe.scheduler.config)
+canny_pipe.enable_model_cpu_offload()
+canny_pipe.enable_xformers_memory_efficient_attention()
+controlnet_xl = ControlNetModel.from_pretrained(
+    "diffusers/controlnet-canny-sdxl-1.0",
+    torch_dtype=torch.float16
+)
+vae_xl = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
+pipe_xl = StableDiffusionXLControlNetPipeline.from_pretrained(
+    "stabilityai/stable-diffusion-xl-base-1.0",
+    controlnet=controlnet_xl,
+    vae=vae_xl,
+    torch_dtype=torch.float16,
+    use_safetensors=True,
+    variant="fp16",
+)
+pipe_xl.scheduler = UniPCMultistepScheduler.from_config(pipe_xl.scheduler.config)
+pipe_xl.enable_xformers_memory_efficient_attention()
+pipe_xl.enable_model_cpu_offload()
+refiner = DiffusionPipeline.from_pretrained(
+    "stabilityai/stable-diffusion-xl-refiner-1.0",
+    text_encoder_2=pipe_xl.text_encoder_2,
+    vae=pipe_xl.vae,
+    torch_dtype=torch.float16,
+    use_safetensors=True,
+    variant="fp16",
+)
+refiner.enable_xformers_memory_efficient_attention()
+refiner.enable_model_cpu_offload()
+def resize_image_output(im, width, height):
+    im = np.array(im)
+    newSize = (width,height)
+    img = cv2.resize(im, newSize, interpolation=cv2.INTER_CUBIC)
+    img = Image.fromarray(img)
+    return img
+def resize_image(im, max_size = 590000):
+    [x,y,z] = im.shape
+    new_size = [0,0]
+    min_size = 262144
+    if x*y > max_size:
+        scale_ratio = math.sqrt((x*y)/max_size)
+        new_size[0] = int(x / scale_ratio)
+        new_size[1] = int(y / scale_ratio)
+    elif x*y <= min_size:
+        scale_ratio = math.sqrt((x*y)/min_size)
+        new_size[0] = int(x / scale_ratio)
+        new_size[1] = int(y / scale_ratio)
+    else:
+        new_size[0] = int(x)
+        new_size[1] = int(y)
+    height = (new_size[0] // 8) * 8
+    width = (new_size[1] // 8) * 8
+    newSize = (width,height)
+    img = cv2.resize(im, newSize, interpolation=cv2.INTER_CUBIC)
+    return img
+def process_canny_tile(input_image,control_image, x ,y, prompt, a_prompt, n_prompt, num_samples, image_resolution, ddim_steps, guess_mode, strength_conditioning, scale, seed, eta, low_threshold, high_threshold):
+    image = input_image
+    return canny_pipe_img2img(
+        prompt = '',
+        image=image,
+        control_image = image,
+        num_inference_steps=20,
+        guidance_scale=4,
+        strength = 0.3,
+        guess_mode = True,
+        negative_prompt=n_prompt,
+        num_images_per_prompt=1,
+        eta=eta,
+        generator=torch.Generator(device="cpu").manual_seed(seed)
+    )
+def process_canny(input_image,x ,y, prompt, a_prompt, n_prompt, num_samples, image_resolution, ddim_steps, guess_mode, strength, scale, seed, eta, low_threshold, high_threshold):
+    image = input_image
+    print(strength)
+    return canny_pipe(
+        prompt=','.join([prompt,a_prompt]),
+        image=image,
+        height=x,
+        width=y,
+        num_inference_steps=ddim_steps,
+        guidance_scale=scale,
+        negative_prompt=n_prompt,
+        num_images_per_prompt=num_samples,
+        eta=eta,
+        controlnet_conditioning_scale=strength,
+        generator=torch.Generator(device="cpu").manual_seed(seed)
+    )
+def process_canny_sdxl(input_image,x ,y, prompt, a_prompt, n_prompt, num_samples, image_resolution, ddim_steps, guess_mode, strength, scale, seed, eta, low_threshold, high_threshold):
+    image = input_image
+    image = pipe_xl(
+        prompt=','.join([prompt,a_prompt]),
+        image=image,
+        height=x,
+        width=y,
+        num_inference_steps=ddim_steps,
+        guidance_scale=scale,
+        negative_prompt=n_prompt,
+        num_images_per_prompt=num_samples,
+        eta=eta,
+        controlnet_conditioning_scale=strength,
+        generator=torch.Generator(device="cpu").manual_seed(seed),
+        output_type="latent"
+    ).images
+    return refiner(
+    prompt=prompt,
+    num_inference_steps=ddim_steps,
+    num_images_per_prompt=num_samples,
+    denoising_start=0.8,
+    image=image,
+    )
+def process(image, prompt, a_prompt, n_prompt, ddim_steps, strength, scale, seed, eta, low_threshold, high_threshold):
+    image = load_image(image)
+    image = np.array(image)
+    [x_orig,y_orig,z_orig] = image.shape
+    image = resize_image(image)
+    [x,y,z] = image.shape
+    image = cv2.Canny(image, low_threshold, high_threshold)
+    image = image[:, :, None]
+    image = np.concatenate([image, image, image], axis=2)
+    image = Image.fromarray(image)
+    return process_canny(image,x,y, prompt, a_prompt, n_prompt, 1, None, ddim_steps, False, float(strength), scale, seed, eta, low_threshold, high_threshold)
+demo = gr.Blocks().queue()
+with demo:
+    with gr.Row():
+        gr.Markdown("## Control Stable Diffusion with Canny Edge Maps")
+    with gr.Row():
+        with gr.Column():
+            input_image = gr.Image(type="pil", label="Input Image")
+            input_prompt = gr.Textbox()
+            run_button = gr.Button(label="Run")
+            with gr.Accordion("Advanced Options"):
+                strength = gr.Slider(label="Control Strength", minimum=0.0, maximum=2.0, value=1.0, step=0.01)
+                low_threshold = gr.Slider(label="Canny low threshold", minimum=1, maximum=255, value=100, step=1)
+                high_threshold = gr.Slider(label="Canny high threshold", minimum=1, maximum=255, value=200, step=1)
+                ddim_steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=20, step=1)
+                scale = gr.Slider(label="Guidance Scale", minimum=0.1, maximum=30.0, value=7.5, step=0.1)  # default value was 9.0
+                seed = gr.Slider(label="Seed", minimum=-1, maximum=2147483647, step=1, randomize=True)
+                eta = gr.Number(label="eta (DDIM)", value=0.0)
+                a_prompt = gr.Textbox(label="Added Prompt", value='best quality, extremely detailed')
+                n_prompt = gr.Textbox(label="Negative Prompt",
+                                      value='longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality')
+        with gr.Column():
+            result = gr.outputs.Image(label='Output', type="pil")
+    ips = [input_image, input_prompt, a_prompt, n_prompt, ddim_steps, strength, scale, seed, eta, low_threshold, high_threshold]
+    run_button.click(fn=process, inputs=ips, outputs=[result])
+demo.launch()