-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
582 changed files
with
75,693 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
import numpy as np | ||
from PIL import Image | ||
from torchvision.transforms.functional import to_pil_image, to_tensor | ||
|
||
from .preprocess.openpose.run_openpose import OpenPose | ||
from .preprocess.humanparsing.aigc_run_parsing import Parsing | ||
from .ootd.inference_ootd_hd import OOTDiffusionHD | ||
from .ootd.inference_ootd_dc import OOTDiffusionDC | ||
|
||
from .utils_ootd import get_mask_location | ||
|
||
cude_type = 0 | ||
openpose_model_hd = OpenPose(cude_type) | ||
parsing_model_hd = Parsing(cude_type) | ||
ootd_model_hd = OOTDiffusionHD(cude_type) | ||
|
||
def tensor2pil(image): | ||
return Image.fromarray(np.clip(255. * image.cpu().numpy().squeeze(), 0, 255).astype(np.uint8)) | ||
|
||
category_dict_utils = ['upper_body', 'lower_body', 'dresses'] | ||
|
||
class Ood_hd_CXH: | ||
@classmethod | ||
def INPUT_TYPES(cls): | ||
return { | ||
"required": { | ||
"cloth_image": ("IMAGE",), | ||
"model_image": ("IMAGE",), | ||
"category": (["upperbody","lowerbody","dress"],{"default":"upperbody"} ), | ||
"steps": ("INT", {"default": 20, "min": 20, "max": 40, "step": 1}), | ||
"scale": ("FLOAT", {"default":2, "min": 1, "max": 5, "step":0.1}), | ||
"seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), | ||
} | ||
} | ||
|
||
RETURN_TYPES = ("IMAGE", "IMAGE") | ||
RETURN_NAMES = ("image", "image_masked") | ||
FUNCTION = "generate" | ||
|
||
CATEGORY = "CXH" | ||
|
||
def generate(self, cloth_image, model_image,category,steps,scale,seed): | ||
|
||
model_type = 'hd' | ||
garm_img = tensor2pil(cloth_image) | ||
garm_img = garm_img.resize((768, 1024)) | ||
|
||
vton_img= tensor2pil(model_image) | ||
vton_img = vton_img.resize((768, 1024)) | ||
keypoints = openpose_model_hd(vton_img.resize((384, 512))) | ||
model_parse, _ = parsing_model_hd(vton_img.resize((384, 512))) | ||
|
||
dictype = 0 | ||
if category == "upperbody": | ||
dictype = 0 | ||
if category == "lowerbody": | ||
dictype = 1 | ||
if category == "dress": | ||
dictype = 2 | ||
mask, mask_gray = get_mask_location(model_type, category_dict_utils[dictype], model_parse, keypoints) | ||
mask = mask.resize((768, 1024), Image.NEAREST) | ||
mask_gray = mask_gray.resize((768, 1024), Image.NEAREST) | ||
|
||
masked_vton_img = Image.composite(mask_gray, vton_img, mask) | ||
|
||
images = ootd_model_hd( | ||
model_type=model_type, | ||
category=category, | ||
image_garm=garm_img, | ||
image_vton=masked_vton_img, | ||
mask=mask, | ||
image_ori=vton_img, | ||
num_samples=1, | ||
num_steps=steps, | ||
image_scale=scale, | ||
seed=seed, | ||
) | ||
|
||
output_image = to_tensor(images[0]) | ||
output_image = output_image.permute((1, 2, 0)) | ||
masked_vton_img = masked_vton_img.convert("RGB") | ||
masked_vton_img = to_tensor(masked_vton_img) | ||
masked_vton_img = masked_vton_img.permute((1, 2, 0)) | ||
return ([output_image], [masked_vton_img]) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
from .OodGenerater import Ood_hd_CXH | ||
|
||
NODE_CLASS_MAPPINGS = { | ||
"Ood_hd_CXH": Ood_hd_CXH, | ||
"Ood_hd_CXH": Ood_hd_CXH, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Put checkpoints here, including ootd, humanparsing, openpose and clip-vit-large-patch14 |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
import pdb | ||
from pathlib import Path | ||
import sys | ||
PROJECT_ROOT = Path(__file__).absolute().parents[0].absolute() | ||
sys.path.insert(0, str(PROJECT_ROOT)) | ||
import os | ||
|
||
import torch | ||
import numpy as np | ||
from PIL import Image | ||
import cv2 | ||
|
||
import random | ||
import time | ||
import pdb | ||
|
||
from pipelines_ootd.pipeline_ootd import OotdPipeline | ||
from pipelines_ootd.unet_garm_2d_condition import UNetGarm2DConditionModel | ||
from pipelines_ootd.unet_vton_2d_condition import UNetVton2DConditionModel | ||
from diffusers import UniPCMultistepScheduler | ||
from diffusers import AutoencoderKL | ||
|
||
import torch.nn as nn | ||
import torch.nn.functional as F | ||
from transformers import AutoProcessor, CLIPVisionModelWithProjection | ||
from transformers import CLIPTextModel, CLIPTokenizer | ||
|
||
VIT_PATH = "../checkpoints/clip-vit-large-patch14" | ||
VAE_PATH = "../checkpoints/ootd" | ||
UNET_PATH = "../checkpoints/ootd/ootd_hd/checkpoint-36000" | ||
MODEL_PATH = "../checkpoints/ootd" | ||
|
||
class OOTDiffusion: | ||
|
||
def __init__(self, gpu_id): | ||
self.gpu_id = 'cuda:' + str(gpu_id) | ||
|
||
vae = AutoencoderKL.from_pretrained( | ||
VAE_PATH, | ||
subfolder="vae", | ||
torch_dtype=torch.float16, | ||
) | ||
|
||
unet_garm = UNetGarm2DConditionModel.from_pretrained( | ||
UNET_PATH, | ||
subfolder="unet_garm", | ||
torch_dtype=torch.float16, | ||
use_safetensors=True, | ||
) | ||
unet_vton = UNetVton2DConditionModel.from_pretrained( | ||
UNET_PATH, | ||
subfolder="unet_vton", | ||
torch_dtype=torch.float16, | ||
use_safetensors=True, | ||
) | ||
|
||
self.pipe = OotdPipeline.from_pretrained( | ||
MODEL_PATH, | ||
unet_garm=unet_garm, | ||
unet_vton=unet_vton, | ||
vae=vae, | ||
torch_dtype=torch.float16, | ||
variant="fp16", | ||
use_safetensors=True, | ||
safety_checker=None, | ||
requires_safety_checker=False, | ||
).to(self.gpu_id) | ||
|
||
self.pipe.scheduler = UniPCMultistepScheduler.from_config(self.pipe.scheduler.config) | ||
|
||
self.auto_processor = AutoProcessor.from_pretrained(VIT_PATH) | ||
self.image_encoder = CLIPVisionModelWithProjection.from_pretrained(VIT_PATH).to(self.gpu_id) | ||
|
||
self.tokenizer = CLIPTokenizer.from_pretrained( | ||
MODEL_PATH, | ||
subfolder="tokenizer", | ||
) | ||
self.text_encoder = CLIPTextModel.from_pretrained( | ||
MODEL_PATH, | ||
subfolder="text_encoder", | ||
).to(self.gpu_id) | ||
|
||
|
||
def tokenize_captions(self, captions, max_length): | ||
inputs = self.tokenizer( | ||
captions, max_length=max_length, padding="max_length", truncation=True, return_tensors="pt" | ||
) | ||
return inputs.input_ids | ||
|
||
|
||
def __call__(self, | ||
model_type='hd', | ||
category='upperbody', | ||
image_garm=None, | ||
image_vton=None, | ||
mask=None, | ||
image_ori=None, | ||
num_samples=1, | ||
num_steps=20, | ||
image_scale=1.0, | ||
seed=-1, | ||
): | ||
if seed == -1: | ||
random.seed(time.time()) | ||
seed = random.randint(0, 2147483647) | ||
print('Initial seed: ' + str(seed)) | ||
generator = torch.manual_seed(seed) | ||
|
||
with torch.no_grad(): | ||
prompt_image = self.auto_processor(images=image_garm, return_tensors="pt").to(self.gpu_id) | ||
prompt_image = self.image_encoder(prompt_image.data['pixel_values']).image_embeds | ||
prompt_image = prompt_image.unsqueeze(1) | ||
if model_type == 'hd': | ||
prompt_embeds = self.text_encoder(self.tokenize_captions([""], 2).to(self.gpu_id))[0] | ||
prompt_embeds[:, 1:] = prompt_image[:] | ||
elif model_type == 'dc': | ||
prompt_embeds = self.text_encoder(self.tokenize_captions([category], 3).to(self.gpu_id))[0] | ||
prompt_embeds = torch.cat([prompt_embeds, prompt_image], dim=1) | ||
else: | ||
raise ValueError("model_type must be \'hd\' or \'dc\'!") | ||
|
||
images = self.pipe(prompt_embeds=prompt_embeds, | ||
image_garm=image_garm, | ||
image_vton=image_vton, | ||
mask=mask, | ||
image_ori=image_ori, | ||
num_inference_steps=num_steps, | ||
image_guidance_scale=image_scale, | ||
num_images_per_prompt=num_samples, | ||
generator=generator, | ||
).images | ||
|
||
return images |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
import pdb | ||
from pathlib import Path | ||
import sys | ||
PROJECT_ROOT = Path(__file__).absolute().parents[0].absolute() | ||
sys.path.insert(0, str(PROJECT_ROOT)) | ||
import os | ||
import torch | ||
import numpy as np | ||
from PIL import Image | ||
import cv2 | ||
|
||
import random | ||
import time | ||
import pdb | ||
|
||
from pipelines_ootd.pipeline_ootd import OotdPipeline | ||
from pipelines_ootd.unet_garm_2d_condition import UNetGarm2DConditionModel | ||
from pipelines_ootd.unet_vton_2d_condition import UNetVton2DConditionModel | ||
from diffusers import UniPCMultistepScheduler | ||
from diffusers import AutoencoderKL | ||
|
||
import torch.nn as nn | ||
import torch.nn.functional as F | ||
from transformers import AutoProcessor, CLIPVisionModelWithProjection | ||
from transformers import CLIPTextModel, CLIPTokenizer | ||
|
||
VIT_PATH = "../checkpoints/clip-vit-large-patch14" | ||
VAE_PATH = "../checkpoints/ootd" | ||
UNET_PATH = "../checkpoints/ootd/ootd_dc/checkpoint-36000" | ||
MODEL_PATH = "../checkpoints/ootd" | ||
|
||
class OOTDiffusionDC: | ||
|
||
def __init__(self, gpu_id): | ||
self.gpu_id = 'cuda:' + str(gpu_id) | ||
|
||
vae = AutoencoderKL.from_pretrained( | ||
VAE_PATH, | ||
subfolder="vae", | ||
torch_dtype=torch.float16, | ||
) | ||
|
||
unet_garm = UNetGarm2DConditionModel.from_pretrained( | ||
UNET_PATH, | ||
subfolder="unet_garm", | ||
torch_dtype=torch.float16, | ||
use_safetensors=True, | ||
) | ||
unet_vton = UNetVton2DConditionModel.from_pretrained( | ||
UNET_PATH, | ||
subfolder="unet_vton", | ||
torch_dtype=torch.float16, | ||
use_safetensors=True, | ||
) | ||
|
||
self.pipe = OotdPipeline.from_pretrained( | ||
MODEL_PATH, | ||
unet_garm=unet_garm, | ||
unet_vton=unet_vton, | ||
vae=vae, | ||
torch_dtype=torch.float16, | ||
variant="fp16", | ||
use_safetensors=True, | ||
safety_checker=None, | ||
requires_safety_checker=False, | ||
).to(self.gpu_id) | ||
|
||
self.pipe.scheduler = UniPCMultistepScheduler.from_config(self.pipe.scheduler.config) | ||
|
||
self.auto_processor = AutoProcessor.from_pretrained(VIT_PATH) | ||
self.image_encoder = CLIPVisionModelWithProjection.from_pretrained(VIT_PATH).to(self.gpu_id) | ||
|
||
self.tokenizer = CLIPTokenizer.from_pretrained( | ||
MODEL_PATH, | ||
subfolder="tokenizer", | ||
) | ||
self.text_encoder = CLIPTextModel.from_pretrained( | ||
MODEL_PATH, | ||
subfolder="text_encoder", | ||
).to(self.gpu_id) | ||
|
||
|
||
def tokenize_captions(self, captions, max_length): | ||
inputs = self.tokenizer( | ||
captions, max_length=max_length, padding="max_length", truncation=True, return_tensors="pt" | ||
) | ||
return inputs.input_ids | ||
|
||
|
||
def __call__(self, | ||
model_type='hd', | ||
category='upperbody', | ||
image_garm=None, | ||
image_vton=None, | ||
mask=None, | ||
image_ori=None, | ||
num_samples=1, | ||
num_steps=20, | ||
image_scale=1.0, | ||
seed=-1, | ||
): | ||
if seed == -1: | ||
random.seed(time.time()) | ||
seed = random.randint(0, 2147483647) | ||
print('Initial seed: ' + str(seed)) | ||
generator = torch.manual_seed(seed) | ||
|
||
with torch.no_grad(): | ||
prompt_image = self.auto_processor(images=image_garm, return_tensors="pt").to(self.gpu_id) | ||
prompt_image = self.image_encoder(prompt_image.data['pixel_values']).image_embeds | ||
prompt_image = prompt_image.unsqueeze(1) | ||
if model_type == 'hd': | ||
prompt_embeds = self.text_encoder(self.tokenize_captions([""], 2).to(self.gpu_id))[0] | ||
prompt_embeds[:, 1:] = prompt_image[:] | ||
elif model_type == 'dc': | ||
prompt_embeds = self.text_encoder(self.tokenize_captions([category], 3).to(self.gpu_id))[0] | ||
prompt_embeds = torch.cat([prompt_embeds, prompt_image], dim=1) | ||
else: | ||
raise ValueError("model_type must be \'hd\' or \'dc\'!") | ||
|
||
images = self.pipe(prompt_embeds=prompt_embeds, | ||
image_garm=image_garm, | ||
image_vton=image_vton, | ||
mask=mask, | ||
image_ori=image_ori, | ||
num_inference_steps=num_steps, | ||
image_guidance_scale=image_scale, | ||
num_images_per_prompt=num_samples, | ||
generator=generator, | ||
).images | ||
|
||
return images |
Oops, something went wrong.