{ "cells": [ { "cell_type": "markdown", "id": "c7dbfe5c", "metadata": {}, "source": [ "## Texture Synthesis" ] }, { "cell_type": "markdown", "id": "8a06ff4b", "metadata": {}, "source": [ "### Texture Synthesis via Optimization" ] }, { "cell_type": "code", "execution_count": null, "id": "6349220d", "metadata": {}, "outputs": [], "source": [ "import os\n", "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\" \n", "from accelerate.utils import set_seed\n", "from diffusers import AutoencoderKL, DDIMScheduler, UNet2DConditionModel\n", "from pipeline_sd import ADPipeline\n", "from utils import *\n", "\n", "model_name = \"/root/models/stable-diffusion-v1-5\"\n", "vae = \"\"\n", "lr = 0.05\n", "iters = 1\n", "seed = 42\n", "width, height = 512, 512\n", "weight = 0\n", "batch_size = 3\n", "mixed_precision = \"bf16\"\n", "num_inference_steps = 300\n", "enable_gradient_checkpoint = False\n", "start_layer, end_layer = 10, 16\n", "\n", "\n", "style_image = [\"./data/texture/4.jpg\"]\n", "content_image = \"\"\n", "\n", "\n", "scheduler = DDIMScheduler.from_pretrained(model_name, subfolder=\"scheduler\")\n", "pipe = ADPipeline.from_pretrained(\n", " model_name, scheduler=scheduler, safety_checker=None\n", ")\n", "if vae != \"\":\n", " vae = AutoencoderKL.from_pretrained(vae)\n", " pipe.vae = vae\n", "pipe.classifier = pipe.unet\n", "set_seed(seed)\n", "\n", "style_image = torch.cat([load_image(path, size=(512, 512)) for path in style_image])\n", "rec_style_image = pipe.latent2image(pipe.image2latent(style_image))\n", "if content_image == \"\":\n", " content_image = None\n", "else:\n", " content_image = load_image(content_image, size=(width, height))\n", "controller = Controller(self_layers=(start_layer, end_layer))\n", "result = pipe.optimize(\n", " lr=lr,\n", " batch_size=batch_size,\n", " iters=iters,\n", " width=width,\n", " height=height,\n", " weight=weight,\n", " controller=controller,\n", " style_image=style_image,\n", " content_image=content_image,\n", " mixed_precision=mixed_precision,\n", " num_inference_steps=num_inference_steps,\n", " enable_gradient_checkpoint=enable_gradient_checkpoint,\n", ")\n", "\n", "save_image(style_image, \"style.png\")\n", "save_image(result, \"output.png\")\n", "show_image(\"style.png\", title=\"style image\")\n", "show_image(\"output.png\", title=\"generated\")" ] }, { "cell_type": "markdown", "id": "c4ba9b1d", "metadata": {}, "source": [ "### Texture Synthesis via Sample" ] }, { "cell_type": "code", "execution_count": null, "id": "67a535c1", "metadata": {}, "outputs": [], "source": [ "import os\n", "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\" \n", "from accelerate.utils import set_seed\n", "from diffusers import AutoencoderKL, DDIMScheduler\n", "from pipeline_sd import ADPipeline\n", "from utils import *\n", "\n", "model_name = \"/root/models/stable-diffusion-v1-5\"\n", "vae = \"\"\n", "lr = 0.05\n", "iters =3\n", "seed = 42\n", "width, height = 512, 512\n", "weight = 0.\n", "mixed_precision = \"bf16\"\n", "num_inference_steps = 50\n", "guidance_scale = 1\n", "num_images_per_prompt = 3\n", "enable_gradient_checkpoint = False\n", "start_layer, end_layer = 10, 16\n", "\n", "\n", "style_image = [\"./data/texture/8.jpg\"]\n", "content_image = \"\"\n", "\n", "scheduler = DDIMScheduler.from_pretrained(model_name, subfolder=\"scheduler\")\n", "pipe = ADPipeline.from_pretrained(model_name, scheduler=scheduler, safety_checker=None)\n", "if vae != \"\":\n", " vae = AutoencoderKL.from_pretrained(vae)\n", " pipe.vae = vae\n", "pipe.classifier = pipe.unet\n", "set_seed(seed)\n", "\n", "style_image = torch.cat([load_image(path, size=(512, 512)) for path in style_image])\n", "rec_style_image = pipe.latent2image(pipe.image2latent(style_image))\n", "if content_image == \"\":\n", " content_image = None\n", "else:\n", " content_image = load_image(content_image, size=(width, height))\n", "controller = Controller(self_layers=(start_layer, end_layer))\n", "\n", "result = pipe.sample(\n", " lr=lr,\n", " adain=False,\n", " iters=iters,\n", " width=width,\n", " height=height,\n", " weight=weight,\n", " controller=controller,\n", " style_image=style_image,\n", " content_image=content_image,\n", " prompt=\"\",\n", " negative_prompt=\"\",\n", " mixed_precision=mixed_precision,\n", " num_inference_steps=num_inference_steps,\n", " guidance_scale=guidance_scale,\n", " num_images_per_prompt=num_images_per_prompt,\n", " enable_gradient_checkpoint=enable_gradient_checkpoint,\n", ")\n", "\n", "save_image(style_image, \"style.png\")\n", "save_image(result, \"output.png\")\n", "show_image(\"style.png\", title=\"style image\")\n", "show_image(\"output.png\", title=\"generated\")" ] }, { "cell_type": "markdown", "id": "c69ae623", "metadata": {}, "source": [ "### Texture Synthesis via MultiDiffusion" ] }, { "cell_type": "code", "execution_count": null, "id": "6d059173", "metadata": {}, "outputs": [], "source": [ "import os\n", "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\" \n", "from accelerate.utils import set_seed\n", "from diffusers import AutoencoderKL, DDIMScheduler\n", "from pipeline_sd import ADPipeline\n", "from utils import *\n", "\n", "model_name = \"/root/models/stable-diffusion-v1-5\"\n", "vae = \"\"\n", "lr = 0.05\n", "iters = 2\n", "seed = 42\n", "width, height = 512*2, 512\n", "weight = 0.0\n", "mixed_precision = \"bf16\"\n", "num_inference_steps = 50\n", "guidance_scale = 1\n", "num_images_per_prompt = 1\n", "enable_gradient_checkpoint = False\n", "start_layer, end_layer = 10, 16\n", "\n", "\n", "style_image = [\"./data/texture/17.jpg\"]\n", "content_image = \"\"\n", "\n", "scheduler = DDIMScheduler.from_pretrained(model_name, subfolder=\"scheduler\")\n", "pipe = ADPipeline.from_pretrained(model_name, scheduler=scheduler, safety_checker=None)\n", "if vae != \"\":\n", " vae = AutoencoderKL.from_pretrained(vae)\n", " pipe.vae = vae\n", "\n", "pipe.classifier = pipe.unet\n", "set_seed(seed)\n", "\n", "style_image = torch.cat([load_image(path, size=(512, 512)) for path in style_image])\n", "if content_image == \"\":\n", " content_image = None\n", "else:\n", " content_image = load_image(content_image, size=(width, height))\n", "controller = Controller(self_layers=(start_layer, end_layer))\n", "\n", "result = pipe.panorama(\n", " lr=lr,\n", " iters=iters,\n", " width=width,\n", " height=height,\n", " weight=weight,\n", " controller=controller,\n", " style_image=style_image,\n", " content_image=content_image,\n", " prompt=\"\",\n", " negative_prompt=\"\",\n", " stride=8,\n", " view_batch_size=8,\n", " mixed_precision=mixed_precision,\n", " num_inference_steps=num_inference_steps,\n", " guidance_scale=guidance_scale,\n", " num_images_per_prompt=num_images_per_prompt,\n", " enable_gradient_checkpoint=enable_gradient_checkpoint,\n", ")\n", "\n", "save_image(style_image, \"style.png\")\n", "save_image(result, \"output.png\")\n", "show_image(\"style.png\", title=\"style image\")\n", "show_image(\"output.png\", title=\"generated\")" ] }, { "cell_type": "markdown", "id": "25e4e702", "metadata": {}, "source": [ "## Style/Appearance Transfer" ] }, { "cell_type": "code", "execution_count": null, "id": "4badee8f", "metadata": {}, "outputs": [], "source": [ "import os\n", "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\" \n", "from accelerate.utils import set_seed\n", "from diffusers import AutoencoderKL, DDIMScheduler\n", "from pipeline_sd import ADPipeline\n", "from utils import *\n", "\n", "model_name = \"/root/models/stable-diffusion-v1-5\"\n", "vae = \"\"\n", "lr = 0.05\n", "iters = 1\n", "seed = 42\n", "width = 512\n", "height = 512\n", "weight = 0.25\n", "batch_size = 1\n", "mixed_precision = \"bf16\"\n", "num_inference_steps = 200\n", "guidance_scale = 1\n", "num_images_per_prompt = 1\n", "enable_gradient_checkpoint = False\n", "start_layer, end_layer = 10, 16\n", "\n", "\n", "style_image = [\"./data/style/12.jpg\"]\n", "content_image = \"./data/content/deer.jpg\"\n", "\n", "\n", "scheduler = DDIMScheduler.from_pretrained(model_name, subfolder=\"scheduler\")\n", "pipe = ADPipeline.from_pretrained(\n", " model_name, scheduler=scheduler, safety_checker=None\n", ")\n", "if vae != \"\":\n", " vae = AutoencoderKL.from_pretrained(vae)\n", " pipe.vae = vae\n", "\n", "pipe.classifier = pipe.unet\n", "set_seed(seed)\n", "\n", "style_image = torch.cat([load_image(path, size=(512, 512)) for path in style_image])\n", "if content_image == \"\":\n", " content_image = None\n", "else:\n", " content_image = load_image(content_image, size=(width, height))\n", "controller = Controller(self_layers=(start_layer, end_layer))\n", "result = pipe.optimize(\n", " lr=lr,\n", " batch_size=batch_size,\n", " iters=iters,\n", " width=width,\n", " height=height,\n", " weight=weight,\n", " controller=controller,\n", " style_image=style_image,\n", " content_image=content_image,\n", " mixed_precision=mixed_precision,\n", " num_inference_steps=num_inference_steps,\n", " enable_gradient_checkpoint=enable_gradient_checkpoint,\n", ")\n", "\n", "save_image(style_image, \"style.png\")\n", "save_image(content_image, \"content.png\")\n", "save_image(result, \"output.png\")\n", "show_image(\"style.png\", title=\"style image\")\n", "show_image(\"content.png\", title=\"content image\")\n", "show_image(\"output.png\", title=\"generated\")" ] }, { "cell_type": "markdown", "id": "088ca839", "metadata": {}, "source": [ "## Style-specific T2I Generation " ] }, { "cell_type": "markdown", "id": "02efdafd", "metadata": {}, "source": [ "### Style-specific T2I Generation with SD1.5" ] }, { "cell_type": "code", "execution_count": null, "id": "15c9fb96", "metadata": {}, "outputs": [], "source": [ "import os\n", "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\" \n", "from pipeline_sd import ADPipeline\n", "from diffusers import DDIMScheduler, AutoencoderKL\n", "import torch\n", "from utils import *\n", "from accelerate.utils import set_seed\n", "\n", "\n", "model_name = \"/root/models/stable-diffusion-v1-5\"\n", "vae = \"\"\n", "lr = 0.015\n", "iters = 3\n", "seed = 42\n", "mixed_precision = \"bf16\"\n", "num_inference_steps = 50\n", "guidance_scale = 7.5\n", "num_images_per_prompt = 3\n", "enable_gradient_checkpoint = False\n", "start_layer, end_layer = 10, 16\n", "\n", "prompt = \"A deer\"\n", "style_image = [\"./data/style/1.jpg\"]\n", "\n", "scheduler = DDIMScheduler.from_pretrained(model_name, subfolder=\"scheduler\")\n", "pipe = ADPipeline.from_pretrained(\n", " model_name, scheduler=scheduler, safety_checker=None\n", ")\n", "if vae != \"\":\n", " vae = AutoencoderKL.from_pretrained(vae)\n", " pipe.vae = vae\n", "\n", "pipe.classifier = pipe.unet\n", "set_seed(seed)\n", "\n", "style_image = torch.cat([load_image(path, size=(512, 512)) for path in style_image])\n", "controller = Controller(self_layers=(start_layer, end_layer))\n", "\n", "result = pipe.sample(\n", " controller=controller,\n", " iters=iters,\n", " lr=lr,\n", " adain=True,\n", " height=512,\n", " width=512,\n", " mixed_precision=\"bf16\",\n", " style_image=style_image,\n", " prompt=prompt,\n", " negative_prompt=\"\",\n", " guidance_scale=guidance_scale,\n", " num_inference_steps=num_inference_steps,\n", " num_images_per_prompt=num_images_per_prompt,\n", " enable_gradient_checkpoint=enable_gradient_checkpoint\n", ")\n", "\n", "save_image(style_image, \"style.png\")\n", "save_image(result, \"output.png\")\n", "show_image(\"style.png\", title=\"style image\")\n", "show_image(\"output.png\", title=prompt)\n" ] }, { "cell_type": "markdown", "id": "dd75eac7", "metadata": {}, "source": [ "### Style-specific T2I Generation with SDXL" ] }, { "cell_type": "code", "execution_count": null, "id": "1541fd6b", "metadata": {}, "outputs": [], "source": [ "import os\n", "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\" \n", "from pipeline_sdxl import ADPipeline\n", "from diffusers import DDIMScheduler, AutoencoderKL\n", "import torch\n", "from utils import *\n", "from accelerate.utils import set_seed\n", "\n", "\n", "model_name = \"/root/models/stable-diffusion-xl-base-1.0/\"\n", "vae = \"\"\n", "lr = 0.015\n", "iters = 2\n", "seed = 42\n", "mixed_precision = \"bf16\"\n", "num_inference_steps = 50\n", "guidance_scale = 7\n", "num_images_per_prompt = 5\n", "enable_gradient_checkpoint = True\n", "start_layer, end_layer = 64, 70\n", "\n", "prompt = \"A rocket\"\n", "style_image = [\"./data/style/1.png\"]\n", "\n", "scheduler = DDIMScheduler.from_pretrained(model_name, subfolder=\"scheduler\")\n", "pipe = ADPipeline.from_pretrained(\n", " model_name, scheduler=scheduler, safety_checker=None\n", ")\n", "if vae != \"\":\n", " vae = AutoencoderKL.from_pretrained(vae)\n", " pipe.vae = vae\n", "\n", "pipe.classifier = pipe.unet\n", "set_seed(seed)\n", "\n", "style_image = torch.cat([load_image(path, size=(1024, 1024)) for path in style_image])\n", "controller = Controller(self_layers=(start_layer, end_layer))\n", "\n", "result = pipe.sample(\n", " controller=controller,\n", " iters=iters,\n", " lr=lr,\n", " adain=True,\n", " height=1024,\n", " width=1024,\n", " mixed_precision=\"bf16\",\n", " style_image=style_image,\n", " prompt=prompt,\n", " negative_prompt=\"\",\n", " guidance_scale=guidance_scale,\n", " num_inference_steps=num_inference_steps,\n", " num_images_per_prompt=num_images_per_prompt,\n", " enable_gradient_checkpoint=enable_gradient_checkpoint\n", ")\n", "\n", "save_image(style_image, \"style.png\")\n", "save_image(result, \"output.png\")\n", "show_image(\"style.png\", title=\"style image\")\n", "show_image(\"output.png\", title=prompt)\n" ] } ], "metadata": { "kernelspec": { "display_name": "ad", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.16" } }, "nbformat": 4, "nbformat_minor": 5 }