Controlnet Tile Sdxl 1.0

xinsirによって開発

Stable Diffusion XLをベースにしたControlNet Tileモデルで、画像のディテール修復、バリエーション生成、超解像度処理に特化

画像生成オープンソースライセンス:Apache-2.0 #画像のぼかし除去 #ディテール修復 #画像バリエーション生成

ダウンロード数 20.64k

リリース時間 : 6/26/2024

モデル概要

このモデルはTile制御ネットワークを通じて画像の精密な制御を実現し、主に画像のぼかし除去、ディテール修復、スタイルバリエーション生成、超解像度拡大などのタスクに使用

モデル特徴

画像ディテール修復

ぼやけた画像や低品質な画像のディテール修復と強化が可能

スタイルバリエーション生成

元画像の主要な内容を保持しながら異なるスタイルのバリエーションを生成

超解像度拡大

任意の比率での超解像度拡大をサポート、最大3倍まで可能

アスペクト比適応

任意のアスペクト比の画像処理をサポート

モデル能力

画像のぼかし除去

ディテール強化

スタイル変換

超解像度拡大

画像バリエーション生成

使用事例

画像修復

ぼやけた画像修復

ぼやけた画像や低品質な画像のディテール修復

画像の鮮明さとディテール表現を大幅に向上

クリエイティブデザイン

画像スタイルバリエーション

元画像の内容を保持しつつ異なるスタイルのバリエーションを生成

多様なスタイルのアート作品を獲得

画像強化

超解像度拡大

低解像度画像を高品質に拡大

3倍拡大後も良好なディテールを保持

license: apache-2.0 pipeline_tag: text-to-image

ControlNet Tile SDXL

images

Image Deblur Example(Repaint Detail)

images_0)

images_1)

images_2)

images_3)

images_4)

Image Variation Example(like midjourney)

images_5)

images_6)

images_7)

images_8)

images_9)

Image Super-resolution(like realESRGAN)

support any aspect ratio and any times upscale, followings are 3 * 3 times

images_5)

images_6)

images_7)

images_8)

Code to Use Tile blur

code reference: https://huggingface.co/TTPlanet/TTPLanet_SDXL_Controlnet_Tile_Realistic/blob/main/TTP_tile_preprocessor_v5.py
https://github.com/lllyasviel/ControlNet-v1-1-nightly/blob/main/gradio_tile.py

from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler
from PIL import Image
from guided_filter import FastGuidedFilter # I have upload this file in this repo
import torch
import numpy as np
import cv2

def resize_image_control(control_image, resolution):
    HH, WW, _ = control_image.shape
    crop_h = random.randint(0, HH - resolution[1])
    crop_w = random.randint(0, WW - resolution[0])
    crop_image = control_image[crop_h:crop_h+resolution[1], crop_w:crop_w+resolution[0], :]
    return crop_image, crop_w, crop_h

def apply_gaussian_blur(image_np, ksize=5, sigmaX=1.0):
    if ksize % 2 == 0:
        ksize += 1  # ksize must be odd
    blurred_image = cv2.GaussianBlur(image_np, (ksize, ksize), sigmaX=sigmaX)
    return blurred_image

def apply_guided_filter(image_np, radius, eps, scale):
    filter = FastGuidedFilter(image_np, radius, eps, scale)
    return filter.filter(image_np)


controlnet_conditioning_scale = 1.0  
prompt = "your prompt, the longer the better, you can describe it as detail as possible"
negative_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'

eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")


controlnet = ControlNetModel.from_pretrained(
    "xinsir/controlnet-tile-sdxl-1.0",
    torch_dtype=torch.float16
)

# when test with other base model, you need to change the vae also.
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)

pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    controlnet=controlnet,
    vae=vae,
    safety_checker=None,
    torch_dtype=torch.float16,
    scheduler=eulera_scheduler,
)

controlnet_img = cv2.imread("your original image path")
height, width, _  = controlnet_img.shape
ratio = np.sqrt(1024. * 1024. / (width * height))
W, H = int(width * ratio), int(height * ratio)

crop_w, crop_h = 0, 0
controlnet_img = cv2.resize(controlnet_img, (W, H))


blur_strength = random.sample([i / 10. for i in range(10, 201, 2)], k=1)[0]
radius = random.sample([i for i in range(1, 40, 2)], k=1)[0]
eps = random.sample([i / 1000. for i in range(1, 101, 2)], k=1)[0]
scale_factor = random.sample([i / 10. for i in range(10, 181, 5)], k=1)[0]


if random.random() > 0.5:
    controlnet_img = apply_gaussian_blur(controlnet_img, ksize=int(blur_strength), sigmaX=blur_strength / 2)            

if random.random() > 0.5:
    # Apply Guided Filter
    controlnet_img = apply_guided_filter(controlnet_img, radius, eps, scale_factor)

# Resize image
controlnet_img = cv2.resize(controlnet_img, (int(W / scale_factor), int(H / scale_factor)), interpolation=cv2.INTER_AREA)
controlnet_img = cv2.resize(controlnet_img, (W, H), interpolation=cv2.INTER_CUBIC)

controlnet_img = cv2.cvtColor(controlnet_img, cv2.COLOR_BGR2RGB)
controlnet_img = Image.fromarray(controlnet_img)

# need to resize the image resolution to 1024 * 1024 or same bucket resolution to get the best performance

images = pipe(
    prompt,
    negative_prompt=negative_prompt,
    image=controlnet_img,
    controlnet_conditioning_scale=controlnet_conditioning_scale,
    width=new_width,
    height=new_height,
    num_inference_steps=30,
    ).images

images[0].save(f"your image save path, png format is usually better than jpg or webp in terms of image quality but got much bigger")

Code to Use Tile var

Use more detail prompt to regerate can help!

from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler
from PIL import Image
import torch
import numpy as np
import cv2

controlnet_conditioning_scale = 1.0  
prompt = "your prompt, the longer the better, you can describe it as detail as possible"
negative_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'

eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")


controlnet = ControlNetModel.from_pretrained(
    "xinsir/controlnet-tile-sdxl-1.0",
    torch_dtype=torch.float16
)

# when test with other base model, you need to change the vae also.
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)

pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    controlnet=controlnet,
    vae=vae,
    safety_checker=None,
    torch_dtype=torch.float16,
    scheduler=eulera_scheduler,
)

controlnet_img = cv2.imread("your original image path")
height, width, _  = controlnet_img.shape
ratio = np.sqrt(1024. * 1024. / (width * height))
W, H = int(width * ratio), int(height * ratio)

crop_w, crop_h = 0, 0
controlnet_img = cv2.resize(controlnet_img, (W, H))
controlnet_img = cv2.cvtColor(controlnet_img, cv2.COLOR_BGR2RGB)
controlnet_img = Image.fromarray(controlnet_img)

# need to resize the image resolution to 1024 * 1024 or same bucket resolution to get the best performance
images = pipe(
    prompt,
    negative_prompt=negative_prompt,
    image=controlnet_img,
    controlnet_conditioning_scale=controlnet_conditioning_scale,
    width=new_width,
    height=new_height,
    num_inference_steps=30,
    ).images

images[0].save(f"your image save path, png format is usually better than jpg or webp in terms of image quality but got much bigger")

Code to Use Tile super

performance may unstable and next version is optimizing!

from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler
from PIL import Image
import torch
import numpy as np
import cv2

controlnet_conditioning_scale = 1.0  
prompt = "your prompt, the longer the better, you can describe it as detail as possible"
negative_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'

eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")


controlnet = ControlNetModel.from_pretrained(
    "xinsir/controlnet-tile-sdxl-1.0",
    torch_dtype=torch.float16
)

# when test with other base model, you need to change the vae also.
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)

pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    controlnet=controlnet,
    vae=vae,
    safety_checker=None,
    torch_dtype=torch.float16,
    scheduler=eulera_scheduler,
)

controlnet_img = cv2.imread("your original image path")
height, width, _  = controlnet_img.shape
ratio = np.sqrt(1024. * 1024. / (width * height))
W, H = int(width * ratio) // 48 * 48, int(height * ratio) // 48 * 48
controlnet_img = cv2.resize(controlnet_img, (W, H))
controlnet_img = cv2.cvtColor(controlnet_img, cv2.COLOR_BGR2RGB)
controlnet_img = Image.fromarray(controlnet_img)

# need to resize the image resolution to 1024 * 1024 or same bucket resolution to get the best performance
target_width = W // 3
target_height = H // 3

for i in range(3):  # 两行
  for j in range(3):  # 两列
    left = j * target_width
    top = i * target_height
    right = left + target_width
    bottom = top + target_height

    # 根据计算的边界裁剪图像
    cropped_image = controlnet_img.crop((left, top, right, bottom))
    cropped_image = cropped_image.resize((W, H))

    images.append(cropped_image)

seed = random.randint(0, 2147483647)
generator = torch.Generator('cuda').manual_seed(seed)

result_images = []
for sub_img in images:
  new_width, new_height = W, H
  out = pipe(prompt=[prompt]*1,
                    image=sub_img, 
                    control_image=sub_img,
                    negative_prompt=[negative_prompt]*1,
                    generator=generator,
                    width=new_width, 
                    height=new_height,
                    num_inference_steps=30,
                    crops_coords_top_left=(W, H),
                    target_size=(W, H),
                    original_size=(W * 2, H * 2),
                )
  result_images.append(out.images[0])

new_im = Image.new('RGB', (new_width*3, new_height*3))
# 拼接图片到新的图像上
new_im.paste(result_images[0], (0, 0))  
new_im.paste(result_images[1], (new_width, 0))
new_im.paste(result_images[2], (new_width * 2, 0))
new_im.paste(result_images[3], (0, new_height))
new_im.paste(result_images[4], (new_width, new_height))  
new_im.paste(result_images[5], (new_width * 2, new_height))
new_im.paste(result_images[6], (0, new_height * 2))
new_im.paste(result_images[7], (new_width, new_height * 2))
new_im.paste(result_images[8], (new_width * 2, new_height * 2))  

new_im.save(f"your image save path, png format is usually better than jpg or webp in terms of image quality but got much bigger")