开源ControlNet Tile模型 - 修复图像细节、生成变体及超分辨率处理！

首页

Controlnet Tile Sdxl 1.0

由 xinsir 开发

基于Stable Diffusion XL的ControlNet Tile模型，专注于图像细节修复、变体生成和超分辨率处理

图像生成开源协议:Apache-2.0 #图像去模糊 #细节修复 #图像变体生成

下载量 20.64k

发布时间 : 6/26/2024

模型简介

该模型通过Tile控制网络实现对图像的精细控制，主要用于图像去模糊、细节修复、风格变体生成以及超分辨率放大等任务

模型特点

图像细节修复

能够对模糊或低质量图像进行细节修复和增强

风格变体生成

可在保留原图主要内容的同时生成不同风格的变体

超分辨率放大

支持任意比例的超分辨率放大，最高可达3倍

宽高比自适应

支持任意宽高比的图像处理

模型能力

图像去模糊

细节增强

风格转换

超分辨率放大

图像变体生成

使用案例

图像修复

模糊图像修复

对模糊或低质量图像进行细节修复

显著提升图像清晰度和细节表现

创意设计

图像风格变体

生成保留原图内容但风格不同的变体

获得多样化风格的艺术作品

图像增强

超分辨率放大

对低分辨率图像进行高质量放大

3倍放大后仍保持良好细节

🚀 ControlNet Tile SDXL

ControlNet Tile SDXL 是一个用于文本到图像生成的项目，它能够实现图像去模糊、图像变体生成以及图像超分辨率等功能，为图像生成和处理提供了多样化的解决方案。

🚀 快速开始

本项目可实现图像去模糊、图像变体生成以及图像超分辨率等功能，以下是相关示例和使用代码。

✨ 主要特性

图像去模糊：可对模糊图像进行去模糊处理，恢复图像细节。
图像变体生成：类似于 Midjourney，能根据输入图像生成不同变体的图像。
图像超分辨率：支持任意宽高比和任意倍数的图像放大，如 3 * 3 倍放大。

📦 安装指南

文档未提供具体安装步骤，可参考代码引用中的相关链接：

💻 使用示例

基础用法

图像去模糊（Tile blur）

去模糊示例 1 去模糊示例 2 去模糊示例 3 去模糊示例 4 去模糊示例 5

from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler
from PIL import Image
from guided_filter import FastGuidedFilter # I have upload this file in this repo
import torch
import numpy as np
import cv2

def resize_image_control(control_image, resolution):
    HH, WW, _ = control_image.shape
    crop_h = random.randint(0, HH - resolution[1])
    crop_w = random.randint(0, WW - resolution[0])
    crop_image = control_image[crop_h:crop_h+resolution[1], crop_w:crop_w+resolution[0], :]
    return crop_image, crop_w, crop_h

def apply_gaussian_blur(image_np, ksize=5, sigmaX=1.0):
    if ksize % 2 == 0:
        ksize += 1  # ksize must be odd
    blurred_image = cv2.GaussianBlur(image_np, (ksize, ksize), sigmaX=sigmaX)
    return blurred_image

def apply_guided_filter(image_np, radius, eps, scale):
    filter = FastGuidedFilter(image_np, radius, eps, scale)
    return filter.filter(image_np)


controlnet_conditioning_scale = 1.0  
prompt = "your prompt, the longer the better, you can describe it as detail as possible"
negative_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'

eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")


controlnet = ControlNetModel.from_pretrained(
    "xinsir/controlnet-tile-sdxl-1.0",
    torch_dtype=torch.float16
)

# when test with other base model, you need to change the vae also.
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)

pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    controlnet=controlnet,
    vae=vae,
    safety_checker=None,
    torch_dtype=torch.float16,
    scheduler=eulera_scheduler,
)

controlnet_img = cv2.imread("your original image path")
height, width, _  = controlnet_img.shape
ratio = np.sqrt(1024. * 1024. / (width * height))
W, H = int(width * ratio), int(height * ratio)

crop_w, crop_h = 0, 0
controlnet_img = cv2.resize(controlnet_img, (W, H))


blur_strength = random.sample([i / 10. for i in range(10, 201, 2)], k=1)[0]
radius = random.sample([i for i in range(1, 40, 2)], k=1)[0]
eps = random.sample([i / 1000. for i in range(1, 101, 2)], k=1)[0]
scale_factor = random.sample([i / 10. for i in range(10, 181, 5)], k=1)[0]


if random.random() > 0.5:
    controlnet_img = apply_gaussian_blur(controlnet_img, ksize=int(blur_strength), sigmaX=blur_strength / 2)            

if random.random() > 0.5:
    # Apply Guided Filter
    controlnet_img = apply_guided_filter(controlnet_img, radius, eps, scale_factor)

# Resize image
controlnet_img = cv2.resize(controlnet_img, (int(W / scale_factor), int(H / scale_factor)), interpolation=cv2.INTER_AREA)
controlnet_img = cv2.resize(controlnet_img, (W, H), interpolation=cv2.INTER_CUBIC)

controlnet_img = cv2.cvtColor(controlnet_img, cv2.COLOR_BGR2RGB)
controlnet_img = Image.fromarray(controlnet_img)

# need to resize the image resolution to 1024 * 1024 or same bucket resolution to get the best performance

images = pipe(
    prompt,
    negative_prompt=negative_prompt,
    image=controlnet_img,
    controlnet_conditioning_scale=controlnet_conditioning_scale,
    width=new_width,
    height=new_height,
    num_inference_steps=30,
    ).images

images[0].save(f"your image save path, png format is usually better than jpg or webp in terms of image quality but got much bigger")

图像变体生成（Tile var）

变体生成示例 1 变体生成示例 2 变体生成示例 3 变体生成示例 4 变体生成示例 5

from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler
from PIL import Image
import torch
import numpy as np
import cv2

controlnet_conditioning_scale = 1.0  
prompt = "your prompt, the longer the better, you can describe it as detail as possible"
negative_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'

eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")


controlnet = ControlNetModel.from_pretrained(
    "xinsir/controlnet-tile-sdxl-1.0",
    torch_dtype=torch.float16
)

# when test with other base model, you need to change the vae also.
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)

pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    controlnet=controlnet,
    vae=vae,
    safety_checker=None,
    torch_dtype=torch.float16,
    scheduler=eulera_scheduler,
)

controlnet_img = cv2.imread("your original image path")
height, width, _  = controlnet_img.shape
ratio = np.sqrt(1024. * 1024. / (width * height))
W, H = int(width * ratio), int(height * ratio)

crop_w, crop_h = 0, 0
controlnet_img = cv2.resize(controlnet_img, (W, H))
controlnet_img = cv2.cvtColor(controlnet_img, cv2.COLOR_BGR2RGB)
controlnet_img = Image.fromarray(controlnet_img)

# need to resize the image resolution to 1024 * 1024 or same bucket resolution to get the best performance
images = pipe(
    prompt,
    negative_prompt=negative_prompt,
    image=controlnet_img,
    controlnet_conditioning_scale=controlnet_conditioning_scale,
    width=new_width,
    height=new_height,
    num_inference_steps=30,
    ).images

images[0].save(f"your image save path, png format is usually better than jpg or webp in terms of image quality but got much bigger")

图像超分辨率（Tile super）

超分辨率示例 1 超分辨率示例 2 超分辨率示例 3 超分辨率示例 4

from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler
from PIL import Image
import torch
import numpy as np
import cv2

controlnet_conditioning_scale = 1.0  
prompt = "your prompt, the longer the better, you can describe it as detail as possible"
negative_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'

eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")


controlnet = ControlNetModel.from_pretrained(
    "xinsir/controlnet-tile-sdxl-1.0",
    torch_dtype=torch.float16
)

# when test with other base model, you need to change the vae also.
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)

pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    controlnet=controlnet,
    vae=vae,
    safety_checker=None,
    torch_dtype=torch.float16,
    scheduler=eulera_scheduler,
)

controlnet_img = cv2.imread("your original image path")
height, width, _  = controlnet_img.shape
ratio = np.sqrt(1024. * 1024. / (width * height))
W, H = int(width * ratio) // 48 * 48, int(height * ratio) // 48 * 48
controlnet_img = cv2.resize(controlnet_img, (W, H))
controlnet_img = cv2.cvtColor(controlnet_img, cv2.COLOR_BGR2RGB)
controlnet_img = Image.fromarray(controlnet_img)

# need to resize the image resolution to 1024 * 1024 or same bucket resolution to get the best performance
target_width = W // 3
target_height = H // 3

for i in range(3):  # 两行
  for j in range(3):  # 两列
    left = j * target_width
    top = i * target_height
    right = left + target_width
    bottom = top + target_height

    # 根据计算的边界裁剪图像
    cropped_image = controlnet_img.crop((left, top, right, bottom))
    cropped_image = cropped_image.resize((W, H))

    images.append(cropped_image)

seed = random.randint(0, 2147483647)
generator = torch.Generator('cuda').manual_seed(seed)

result_images = []
for sub_img in images:
  new_width, new_height = W, H
  out = pipe(prompt=[prompt]*1,
                    image=sub_img, 
                    control_image=sub_img,
                    negative_prompt=[negative_prompt]*1,
                    generator=generator,
                    width=new_width, 
                    height=new_height,
                    num_inference_steps=30,
                    crops_coords_top_left=(W, H),
                    target_size=(W, H),
                    original_size=(W * 2, H * 2),
                )
  result_images.append(out.images[0])

new_im = Image.new('RGB', (new_width*3, new_height*3))
# 拼接图片到新的图像上
new_im.paste(result_images[0], (0, 0))  
new_im.paste(result_images[1], (new_width, 0))
new_im.paste(result_images[2], (new_width * 2, 0))
new_im.paste(result_images[3], (0, new_height))
new_im.paste(result_images[4], (new_width, new_height))  
new_im.paste(result_images[5], (new_width * 2, new_height))
new_im.paste(result_images[6], (0, new_height * 2))
new_im.paste(result_images[7], (new_width, new_height * 2))
new_im.paste(result_images[8], (new_width * 2, new_height * 2))  

new_im.save(f"your image save path, png format is usually better than jpg or webp in terms of image quality but got much bigger")