BRIA-2.3-ControlNet-Inpaintingオープンソース画像修復モデル

ホーム

BRIA 2.3 ControlNet Inpainting

briaaiによって開発

商用ライセンスデータセットでトレーニングされたスマート画像修復モデル、法的責任保証を提供

画像生成オープンソースライセンス:その他 #商用ライセンス修復 #法的責任保証 #ControlNetアーキテクチャ

ダウンロード数 25

リリース時間 : 6/13/2024

モデル概要

BRIA 2.3はユーザーのテキストプロンプトに基づき画像のマスク領域をインテリジェントに補完する修復モデルで、オブジェクト削除・置換・追加・修正及び画像拡張機能をサポート

モデル特徴

商用法的保証

トレーニングデータは完全にコンプライアンスを満たし、著作権侵害・プライバシー侵害・有害コンテンツに対する完全な法的責任カバレッジを提供

超高速修復

FAST-LORA技術を組み合わせ、A10 GPUでわずか1.6秒で修復を完了

多様なシーン対応

オブジェクト削除・置換・追加・修正及び画像拡張など様々な編集ニーズをサポート

モデル能力

画像修復

オブジェクト削除

コンテンツ置換

画像拡張

使用事例

画像編集

不要なオブジェクト削除

写真から不要な人物や物体を削除

自然な修復効果を生成

コンテンツ置換

画像内の特定要素を置換

画像全体のスタイルを一貫して保持

クリエイティブデザイン

画像拡張

画像境界の拡張や欠損領域の補完

元画像のスタイルに合った拡張コンテンツを生成

🚀 BRIA 2.3 ControlNet Inpainting Fast

BRIA 2.3は、最大規模のマルチソース商用グレードのライセンス付きデータセットで専用にトレーニングされています。商用利用に安全で、最高品質を保証します。このモデルは、著作権やプライバシー侵害、有害コンテンツの軽減に関する完全な法的責任を提供します。なぜなら、データセットには架空のキャラクター、ロゴ、商標、著名人、有害コンテンツ、プライバシー侵害コンテンツなどの著作権保護された素材が含まれていないからです。

BRIA 2.3は、ユーザーが提供するテキストプロンプトに基づいて、画像内のマスクされた領域を埋めるために設計されたインペインティングモデルです。このモデルは、画像内のオブジェクトの削除、置換、追加、変更などのさまざまなシナリオに適用でき、画像を拡張する機能も備えています。

詳細情報、チュートリアル、ツール、および他のユーザーとの交流を目的として、Discordコミュニティに参加しましょう！

🚀 クイックスタート

モデルのダウンロード

from huggingface_hub import hf_hub_download
import os

try:
    local_dir = os.path.dirname(__file__)
except:
    local_dir = '.'
    

hf_hub_download(repo_id="briaai/BRIA-2.3-ControlNet-Inpainting", filename='controlnet.py', local_dir=local_dir)
hf_hub_download(repo_id="briaai/BRIA-2.3-ControlNet-Inpainting", filename='config.json', local_dir=local_dir)
hf_hub_download(repo_id="briaai/BRIA-2.3-ControlNet-Inpainting", filename='image_processor.py', local_dir=local_dir)
hf_hub_download(repo_id="briaai/BRIA-2.3-ControlNet-Inpainting", filename='pipeline_controlnet_sd_xl.py', local_dir=local_dir)

モデルの実行

from diffusers import (
    AutoencoderKL,
    LCMScheduler,
)
from pipeline_controlnet_sd_xl import StableDiffusionXLControlNetPipeline
from controlnet import ControlNetModel
import torch
import numpy as np
from PIL import Image
import requests
import PIL
from io import BytesIO
from torchvision import transforms
import os 


def resize_image_to_retain_ratio(image):
    pixel_number = 1024*1024
    granularity_val = 8
    ratio = image.size[0] / image.size[1]
    width = int((pixel_number * ratio) ** 0.5)
    width = width - (width % granularity_val)
    height = int(pixel_number / width)
    height = height - (height % granularity_val)

    image = image.resize((width, height))
    return image


def download_image(url):
    response = requests.get(url)
    return PIL.Image.open(BytesIO(response.content)).convert("RGB")


def get_masked_image(image, image_mask, width, height):
    image_mask = image_mask # inpaint area is white
    image_mask = image_mask.resize((width, height)) # object to remove is white (1)
    image_mask_pil = image_mask
    image = np.array(image.convert("RGB")).astype(np.float32) / 255.0
    image_mask = np.array(image_mask_pil.convert("L")).astype(np.float32) / 255.0
    assert image.shape[0:1] == image_mask.shape[0:1], "image and image_mask must have the same image size"
    masked_image_to_present = image.copy()
    masked_image_to_present[image_mask > 0.5] = (0.5,0.5,0.5)  # set as masked pixel
    image[image_mask > 0.5] = 0.5  # set as masked pixel - s.t. will be grey 
    image = Image.fromarray((image * 255.0).astype(np.uint8))
    masked_image_to_present = Image.fromarray((masked_image_to_present * 255.0).astype(np.uint8))
    return image, image_mask_pil, masked_image_to_present


image_transforms = transforms.Compose(
    [
        transforms.ToTensor(),
    ]
)

default_negative_prompt = "Logo,Watermark,Text,Ugly,Morbid,Extra fingers,Poorly drawn hands,Mutation,Blurry,Extra limbs,Gross proportions,Missing arms,Mutated hands,Long neck,Duplicate,Mutilated,Mutilated hands,Poorly drawn face,Deformed,Bad anatomy,Cloned face,Malformed limbs,Missing legs,Too many fingers"

img_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png"
mask_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png"

init_image = download_image(img_url).resize((1024, 1024))
mask_image = download_image(mask_url).resize((1024, 1024))


init_image = resize_image_to_retain_ratio(init_image)
width, height = init_image.size

mask_image = mask_image.convert("L").resize(init_image.size)

width, height = init_image.size

# Load, init model    
controlnet = ControlNetModel().from_pretrained("briaai/BRIA-2.3-ControlNet-Inpainting", torch_dtype=torch.float16)
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
pipe = StableDiffusionXLControlNetPipeline.from_pretrained("briaai/BRIA-2.3", controlnet=controlnet.to(dtype=torch.float16), torch_dtype=torch.float16, vae=vae) #force_zeros_for_empty_prompt=False, # vae=vae)

pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
pipe.load_lora_weights("briaai/BRIA-2.3-FAST-LORA")
pipe.fuse_lora()
pipe = pipe.to(device="cuda")

# pipe.enable_xformers_memory_efficient_attention()

generator = torch.Generator(device="cuda").manual_seed(123456)

vae = pipe.vae


masked_image, image_mask, masked_image_to_present = get_masked_image(init_image, mask_image, width, height)

masked_image_tensor = image_transforms(masked_image)
masked_image_tensor = (masked_image_tensor - 0.5) / 0.5


masked_image_tensor = masked_image_tensor.unsqueeze(0).to(device="cuda")
control_latents = vae.encode(  
        masked_image_tensor[:, :3, :, :].to(vae.dtype)
    ).latent_dist.sample()   
control_latents = control_latents * vae.config.scaling_factor 


image_mask = np.array(image_mask)[:,:]
mask_tensor = torch.tensor(image_mask, dtype=torch.float32)[None, ...]
# binarize the mask
mask_tensor = torch.where(mask_tensor > 128.0, 255.0, 0)       

mask_tensor = mask_tensor / 255.0

mask_tensor = mask_tensor.to(device="cuda")
mask_resized = torch.nn.functional.interpolate(mask_tensor[None, ...], size=(control_latents.shape[2], control_latents.shape[3]), mode='nearest')

masked_image = torch.cat([control_latents, mask_resized], dim=1)

prompt = ""

gen_img = pipe(negative_prompt=default_negative_prompt, prompt=prompt, 
            controlnet_conditioning_scale=1.0, 
            num_inference_steps=12, 
            height=height, width=width, 
            image = masked_image, # control image
            init_image = init_image,     
            mask_image = mask_tensor,
            guidance_scale = 1.2,
            generator=generator).images[0]


display(gen_img)