đ DanbooruCLIP
A fine - tuned CLIP (ViT - L/14) model using the danburoo2021 dataset for vision - related tasks
đ Quick Start
The following is a basic example of using the model:
from PIL import Image
import requests
from transformers import CLIPProcessor, CLIPModel
model = CLIPModel.from_pretrained("OysterQAQ/DanbooruCLIP")
processor = CLIPProcessor.from_pretrained("OysterQAQ/DanbooruCLIP")
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)
inputs = processor(text=["a photo of a cat", "a photo of a dog"], images=image, return_tensors="pt", padding=True)
outputs = model(**inputs)
logits_per_image = outputs.logits_per_image
probs = logits_per_image.softmax(dim=1)
⨠Features
- Updated on July 17, 2023: Added the pixiv dataset for training.
- Fine - tuned the CLIP (ViT - L/14) model using the danburoo2021 dataset.
- Different learning rates and weight decays were used for different epochs during training.
đ§ Technical Details
Training Parameters
- For epochs 0 - 3, the learning rate is 4e - 6, and the weight decay is 1e - 3.
- For epochs 4 - 8, the learning rate is 1e - 6, and the weight decay is 1e - 3.
Label Pre - processing
for i in range(length):
if not is_image(data_from_db.path[i]):
continue
try:
img = self.preprocess(
Image.open(data_from_db.path[i].replace("./", "/mnt/lvm/danbooru2021/danbooru2021/")))
except Exception as e:
continue
tags = json.loads(data_from_db.tags[i])
category_group = {}
for tag in tags:
category_group.setdefault(tag["category"], []).append(tag)
character_list = category_group[4] if 4 in category_group else []
work_list = list(filter(
lambda e:
e["name"] != "original"
, category_group[3])) if 3 in category_group else []
general_list = category_group[0] if 0 in category_group else []
caption = ""
caption_2 = None
for character in character_list:
if len(work_list) != 0:
character["name"] = re.sub(u"\\(.*?\\)", "", character["name"])
caption += character["name"].replace("_", " ")
caption += ","
caption = caption[:-1]
caption += " "
if len(work_list) != 0:
caption += "from "
for work in work_list:
caption += work["name"].replace("_", " ")
caption += " "
if len(general_list) != 0:
caption += "with "
if len(general_list) > 20:
general_list_1 = general_list[:int(len(general_list) / 2)]
general_list_2 = general_list[int(len(general_list) / 2):]
caption_2 = caption
for general in general_list_1:
if general["name"].find("girl") == -1 and general["name"].find("boy") == -1 and len(
re.findall(is_contain, general["name"])) != 0:
caption_2 += general["name"].replace("_", " ")
caption_2 += ","
caption_2 = caption_2[:-1]
for general in general_list_2:
if general["name"].find("girl") == -1 and general["name"].find("boy") == -1 and len(
re.findall(is_contain, general["name"])) != 0:
caption += general["name"].replace("_", " ")
caption += ","
caption = caption[:-1]
else:
for general in general_list:
if general["name"].find("girl") == -1 and general["name"].find("boy") == -1 and len(
re.findall(is_contain, general["name"])) != 0:
caption += general["name"].replace("_", " ")
caption += ","
caption = caption[:-1]
text_1 = clip.tokenize(texts=caption, truncate=True)
text_2= None
if caption_2 is not None:
text_2 = clip.tokenize(texts=caption_2, truncate=True)
yield img, text_1[0]
if text_2 is not None:
yield img, text_2[0]
đ Feedback
Where to send questions or comments about the model
Please use this Google Form