Skip to content

Commit

Permalink
上传
Browse files Browse the repository at this point in the history
  • Loading branch information
StartHua committed Feb 24, 2024
1 parent 70f8e39 commit 06956ad
Show file tree
Hide file tree
Showing 582 changed files with 75,693 additions and 0 deletions.
86 changes: 86 additions & 0 deletions OodGenerater.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import numpy as np
from PIL import Image
from torchvision.transforms.functional import to_pil_image, to_tensor

from .preprocess.openpose.run_openpose import OpenPose
from .preprocess.humanparsing.aigc_run_parsing import Parsing
from .ootd.inference_ootd_hd import OOTDiffusionHD
from .ootd.inference_ootd_dc import OOTDiffusionDC

from .utils_ootd import get_mask_location

cude_type = 0
openpose_model_hd = OpenPose(cude_type)
parsing_model_hd = Parsing(cude_type)
ootd_model_hd = OOTDiffusionHD(cude_type)

def tensor2pil(image):
return Image.fromarray(np.clip(255. * image.cpu().numpy().squeeze(), 0, 255).astype(np.uint8))

category_dict_utils = ['upper_body', 'lower_body', 'dresses']

class Ood_hd_CXH:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"cloth_image": ("IMAGE",),
"model_image": ("IMAGE",),
"category": (["upperbody","lowerbody","dress"],{"default":"upperbody"} ),
"steps": ("INT", {"default": 20, "min": 20, "max": 40, "step": 1}),
"scale": ("FLOAT", {"default":2, "min": 1, "max": 5, "step":0.1}),
"seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
}
}

RETURN_TYPES = ("IMAGE", "IMAGE")
RETURN_NAMES = ("image", "image_masked")
FUNCTION = "generate"

CATEGORY = "CXH"

def generate(self, cloth_image, model_image,category,steps,scale,seed):

model_type = 'hd'
garm_img = tensor2pil(cloth_image)
garm_img = garm_img.resize((768, 1024))

vton_img= tensor2pil(model_image)
vton_img = vton_img.resize((768, 1024))
keypoints = openpose_model_hd(vton_img.resize((384, 512)))
model_parse, _ = parsing_model_hd(vton_img.resize((384, 512)))

dictype = 0
if category == "upperbody":
dictype = 0
if category == "lowerbody":
dictype = 1
if category == "dress":
dictype = 2
mask, mask_gray = get_mask_location(model_type, category_dict_utils[dictype], model_parse, keypoints)
mask = mask.resize((768, 1024), Image.NEAREST)
mask_gray = mask_gray.resize((768, 1024), Image.NEAREST)

masked_vton_img = Image.composite(mask_gray, vton_img, mask)

images = ootd_model_hd(
model_type=model_type,
category=category,
image_garm=garm_img,
image_vton=masked_vton_img,
mask=mask,
image_ori=vton_img,
num_samples=1,
num_steps=steps,
image_scale=scale,
seed=seed,
)

output_image = to_tensor(images[0])
output_image = output_image.permute((1, 2, 0))
masked_vton_img = masked_vton_img.convert("RGB")
masked_vton_img = to_tensor(masked_vton_img)
masked_vton_img = masked_vton_img.permute((1, 2, 0))
return ([output_image], [masked_vton_img])


6 changes: 6 additions & 0 deletions __init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from .OodGenerater import Ood_hd_CXH

NODE_CLASS_MAPPINGS = {
"Ood_hd_CXH": Ood_hd_CXH,
"Ood_hd_CXH": Ood_hd_CXH,
}
1 change: 1 addition & 0 deletions checkpoints/README.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Put checkpoints here, including ootd, humanparsing, openpose and clip-vit-large-patch14
Binary file added images/demo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/workflow.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
Binary file not shown.
133 changes: 133 additions & 0 deletions ootd/inference_ootd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import pdb
from pathlib import Path
import sys
PROJECT_ROOT = Path(__file__).absolute().parents[0].absolute()
sys.path.insert(0, str(PROJECT_ROOT))
import os

import torch
import numpy as np
from PIL import Image
import cv2

import random
import time
import pdb

from pipelines_ootd.pipeline_ootd import OotdPipeline
from pipelines_ootd.unet_garm_2d_condition import UNetGarm2DConditionModel
from pipelines_ootd.unet_vton_2d_condition import UNetVton2DConditionModel
from diffusers import UniPCMultistepScheduler
from diffusers import AutoencoderKL

import torch.nn as nn
import torch.nn.functional as F
from transformers import AutoProcessor, CLIPVisionModelWithProjection
from transformers import CLIPTextModel, CLIPTokenizer

VIT_PATH = "../checkpoints/clip-vit-large-patch14"
VAE_PATH = "../checkpoints/ootd"
UNET_PATH = "../checkpoints/ootd/ootd_hd/checkpoint-36000"
MODEL_PATH = "../checkpoints/ootd"

class OOTDiffusion:

def __init__(self, gpu_id):
self.gpu_id = 'cuda:' + str(gpu_id)

vae = AutoencoderKL.from_pretrained(
VAE_PATH,
subfolder="vae",
torch_dtype=torch.float16,
)

unet_garm = UNetGarm2DConditionModel.from_pretrained(
UNET_PATH,
subfolder="unet_garm",
torch_dtype=torch.float16,
use_safetensors=True,
)
unet_vton = UNetVton2DConditionModel.from_pretrained(
UNET_PATH,
subfolder="unet_vton",
torch_dtype=torch.float16,
use_safetensors=True,
)

self.pipe = OotdPipeline.from_pretrained(
MODEL_PATH,
unet_garm=unet_garm,
unet_vton=unet_vton,
vae=vae,
torch_dtype=torch.float16,
variant="fp16",
use_safetensors=True,
safety_checker=None,
requires_safety_checker=False,
).to(self.gpu_id)

self.pipe.scheduler = UniPCMultistepScheduler.from_config(self.pipe.scheduler.config)

self.auto_processor = AutoProcessor.from_pretrained(VIT_PATH)
self.image_encoder = CLIPVisionModelWithProjection.from_pretrained(VIT_PATH).to(self.gpu_id)

self.tokenizer = CLIPTokenizer.from_pretrained(
MODEL_PATH,
subfolder="tokenizer",
)
self.text_encoder = CLIPTextModel.from_pretrained(
MODEL_PATH,
subfolder="text_encoder",
).to(self.gpu_id)


def tokenize_captions(self, captions, max_length):
inputs = self.tokenizer(
captions, max_length=max_length, padding="max_length", truncation=True, return_tensors="pt"
)
return inputs.input_ids


def __call__(self,
model_type='hd',
category='upperbody',
image_garm=None,
image_vton=None,
mask=None,
image_ori=None,
num_samples=1,
num_steps=20,
image_scale=1.0,
seed=-1,
):
if seed == -1:
random.seed(time.time())
seed = random.randint(0, 2147483647)
print('Initial seed: ' + str(seed))
generator = torch.manual_seed(seed)

with torch.no_grad():
prompt_image = self.auto_processor(images=image_garm, return_tensors="pt").to(self.gpu_id)
prompt_image = self.image_encoder(prompt_image.data['pixel_values']).image_embeds
prompt_image = prompt_image.unsqueeze(1)
if model_type == 'hd':
prompt_embeds = self.text_encoder(self.tokenize_captions([""], 2).to(self.gpu_id))[0]
prompt_embeds[:, 1:] = prompt_image[:]
elif model_type == 'dc':
prompt_embeds = self.text_encoder(self.tokenize_captions([category], 3).to(self.gpu_id))[0]
prompt_embeds = torch.cat([prompt_embeds, prompt_image], dim=1)
else:
raise ValueError("model_type must be \'hd\' or \'dc\'!")

images = self.pipe(prompt_embeds=prompt_embeds,
image_garm=image_garm,
image_vton=image_vton,
mask=mask,
image_ori=image_ori,
num_inference_steps=num_steps,
image_guidance_scale=image_scale,
num_images_per_prompt=num_samples,
generator=generator,
).images

return images
132 changes: 132 additions & 0 deletions ootd/inference_ootd_dc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
import pdb
from pathlib import Path
import sys
PROJECT_ROOT = Path(__file__).absolute().parents[0].absolute()
sys.path.insert(0, str(PROJECT_ROOT))
import os
import torch
import numpy as np
from PIL import Image
import cv2

import random
import time
import pdb

from pipelines_ootd.pipeline_ootd import OotdPipeline
from pipelines_ootd.unet_garm_2d_condition import UNetGarm2DConditionModel
from pipelines_ootd.unet_vton_2d_condition import UNetVton2DConditionModel
from diffusers import UniPCMultistepScheduler
from diffusers import AutoencoderKL

import torch.nn as nn
import torch.nn.functional as F
from transformers import AutoProcessor, CLIPVisionModelWithProjection
from transformers import CLIPTextModel, CLIPTokenizer

VIT_PATH = "../checkpoints/clip-vit-large-patch14"
VAE_PATH = "../checkpoints/ootd"
UNET_PATH = "../checkpoints/ootd/ootd_dc/checkpoint-36000"
MODEL_PATH = "../checkpoints/ootd"

class OOTDiffusionDC:

def __init__(self, gpu_id):
self.gpu_id = 'cuda:' + str(gpu_id)

vae = AutoencoderKL.from_pretrained(
VAE_PATH,
subfolder="vae",
torch_dtype=torch.float16,
)

unet_garm = UNetGarm2DConditionModel.from_pretrained(
UNET_PATH,
subfolder="unet_garm",
torch_dtype=torch.float16,
use_safetensors=True,
)
unet_vton = UNetVton2DConditionModel.from_pretrained(
UNET_PATH,
subfolder="unet_vton",
torch_dtype=torch.float16,
use_safetensors=True,
)

self.pipe = OotdPipeline.from_pretrained(
MODEL_PATH,
unet_garm=unet_garm,
unet_vton=unet_vton,
vae=vae,
torch_dtype=torch.float16,
variant="fp16",
use_safetensors=True,
safety_checker=None,
requires_safety_checker=False,
).to(self.gpu_id)

self.pipe.scheduler = UniPCMultistepScheduler.from_config(self.pipe.scheduler.config)

self.auto_processor = AutoProcessor.from_pretrained(VIT_PATH)
self.image_encoder = CLIPVisionModelWithProjection.from_pretrained(VIT_PATH).to(self.gpu_id)

self.tokenizer = CLIPTokenizer.from_pretrained(
MODEL_PATH,
subfolder="tokenizer",
)
self.text_encoder = CLIPTextModel.from_pretrained(
MODEL_PATH,
subfolder="text_encoder",
).to(self.gpu_id)


def tokenize_captions(self, captions, max_length):
inputs = self.tokenizer(
captions, max_length=max_length, padding="max_length", truncation=True, return_tensors="pt"
)
return inputs.input_ids


def __call__(self,
model_type='hd',
category='upperbody',
image_garm=None,
image_vton=None,
mask=None,
image_ori=None,
num_samples=1,
num_steps=20,
image_scale=1.0,
seed=-1,
):
if seed == -1:
random.seed(time.time())
seed = random.randint(0, 2147483647)
print('Initial seed: ' + str(seed))
generator = torch.manual_seed(seed)

with torch.no_grad():
prompt_image = self.auto_processor(images=image_garm, return_tensors="pt").to(self.gpu_id)
prompt_image = self.image_encoder(prompt_image.data['pixel_values']).image_embeds
prompt_image = prompt_image.unsqueeze(1)
if model_type == 'hd':
prompt_embeds = self.text_encoder(self.tokenize_captions([""], 2).to(self.gpu_id))[0]
prompt_embeds[:, 1:] = prompt_image[:]
elif model_type == 'dc':
prompt_embeds = self.text_encoder(self.tokenize_captions([category], 3).to(self.gpu_id))[0]
prompt_embeds = torch.cat([prompt_embeds, prompt_image], dim=1)
else:
raise ValueError("model_type must be \'hd\' or \'dc\'!")

images = self.pipe(prompt_embeds=prompt_embeds,
image_garm=image_garm,
image_vton=image_vton,
mask=mask,
image_ori=image_ori,
num_inference_steps=num_steps,
image_guidance_scale=image_scale,
num_images_per_prompt=num_samples,
generator=generator,
).images

return images
Loading

0 comments on commit 06956ad

Please sign in to comment.