diff --git a/stablepy_demo.ipynb b/stablepy_demo.ipynb index 802c3fb..84c54f0 100644 --- a/stablepy_demo.ipynb +++ b/stablepy_demo.ipynb @@ -5,7 +5,9 @@ "colab": { "provenance": [], "gpuType": "T4", - "authorship_tag": "ABX9TyMTtwAB3UX9r02s0CiGOolX" + "toc_visible": true, + "authorship_tag": "ABX9TyNBGv+xFiyr9kclvWrZxoFa", + "include_colab_link": true }, "kernelspec": { "name": "python3", @@ -17,6 +19,16 @@ "accelerator": "GPU" }, "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, { "cell_type": "markdown", "source": [ @@ -43,9 +55,20 @@ }, "outputs": [], "source": [ - "!pip install -q git+https://github.com/R3gm/stablepy.git@v0.2.0" + "!pip install stablepy==0.3.0 -q" ] }, + { + "cell_type": "markdown", + "source": [ + "To use the version with the latest changes, you can install directly from the repository.\n", + "\n", + "`pip install -q git+https://github.com/R3gm/stablepy.git`" + ], + "metadata": { + "id": "7YIpscy8sjgs" + } + }, { "cell_type": "markdown", "source": [ @@ -58,6 +81,8 @@ { "cell_type": "code", "source": [ + "%cd /content/\n", + "\n", "# Model\n", "!wget https://huggingface.co/frankjoshua/toonyou_beta6/resolve/main/toonyou_beta6.safetensors\n", "\n", @@ -84,7 +109,7 @@ { "cell_type": "markdown", "source": [ - "## Inference with Stable diffusion 1.5" + "# Inference with Stable diffusion 1.5" ], "metadata": { "id": "amYJfvMwOKnL" @@ -95,7 +120,7 @@ "source": [ "First, we pass the path of the model we will use.\n", "\n", - "The default task is txt2img but it can be changed to: openpose, canny, mlsd, scribble, softedge, segmentation, depth, normalbae, lineart, shuffle, ip2p or inpaint" + "The default task is txt2img but it can be changed to: openpose, canny, mlsd, scribble, softedge, segmentation, depth, normalbae, lineart, shuffle, ip2p, img2img or inpaint" ], "metadata": { "id": "PvTT0AiKRX0m" @@ -167,15 +192,15 @@ "images, image_list = model(\n", " prompt = \"pixel art (masterpiece, best quality), 1girl, collarbone, wavy hair, looking at viewer, blurry foreground, upper body, necklace, contemporary, plain pants, ((intricate, print, pattern)), ponytail, freckles, red hair, dappled sunlight, smile, happy,\",\n", " negative_prompt = \"(worst quality, low quality, letterboxed), bad_artist_token, bad_hand_token\",\n", - " img_width = 512,\n", - " img_height = 1024,\n", + " img_width = 513,\n", + " img_height = 1022,\n", " num_images = 1,\n", " num_steps = 30,\n", " guidance_scale = 8.0,\n", - " clip_skip = True,\n", + " clip_skip = True, # Clip skip to the penultimate layer, in other implementations it is equivalent to use clipskip 2.\n", " seed = -1, # random seed\n", " sampler=\"DPM++ SDE Karras\",\n", - " syntax_weights=\"Compel\",\n", + " syntax_weights=\"Compel\", # (word)weight and (word)+ for prompts weights\n", "\n", " lora_A = lora1_path,\n", " lora_scale_A = 0.8,\n", @@ -184,8 +209,19 @@ "\n", " textual_inversion=[(\"bad_artist_token\", \"./bad-artist.pt\"), (\"bad_hand_token\", \"./bad-hands-5.pt\")], # Is a list of tuples with [(\"\",\"\"),...]\n", "\n", - " upscaler_model_path = upscaler_path, # High res fix\n", + " upscaler_model_path = upscaler_path, # Upscale the image and Hires-fix\n", " upscaler_increases_size=1.5,\n", + " hires_steps = 25,\n", + " hires_denoising_strength = 0.35,\n", + " hires_prompt = \"\", # If this is left as is, the main prompt will be used instead.\n", + " hires_negative_prompt = \"\",\n", + " hires_sampler = \"Use same sampler\",\n", + "\n", + " #By default, the generated images are saved in the current location within the 'images' folder.\n", + " image_storage_location = \"./images\",\n", + "\n", + " #You can disable saving the images with this parameter.\n", + " save_generated_images = False,\n", ")\n", "\n", "for image in images:\n", @@ -200,7 +236,7 @@ { "cell_type": "markdown", "source": [ - "# ControlNet" + "## ControlNet" ], "metadata": { "id": "zSYYzJ7FXO2d" @@ -235,7 +271,7 @@ "source": [ "from PIL import Image\n", "\n", - "!wget https://huggingface.co/lllyasviel/sd-controlnet-canny/resolve/main/images/bird.png\n", + "!wget https://huggingface.co/lllyasviel/sd-controlnet-canny/resolve/main/images/bird.png -q\n", "\n", "control_image = \"bird.png\"\n", "image = Image.open(control_image)\n", @@ -263,7 +299,7 @@ " prompt = \"(masterpiece, best quality), bird\",\n", " negative_prompt = \"(worst quality, low quality, letterboxed)\",\n", " image = control_image,\n", - " # preprocessor_name=\"None\", only canny not need the preprocessor_name, active by default\n", + " # preprocessor_name = \"None\", canny not need the preprocessor_name, active by default\n", " preprocess_resolution = 512, # It is the resize of the image that will be obtained from the preprocessor.\n", " image_resolution = 768, # The equivalent resolution to be used for inference.\n", " controlnet_conditioning_scale = 1.0, # ControlNet Output Scaling in UNet\n", @@ -271,7 +307,10 @@ " control_guidance_end= 1.0, # ControlNet Stop Threshold (%)\n", "\n", " upscaler_model_path = upscaler_path,\n", - " upscaler_increases_size=1.5,\n", + " upscaler_increases_size=1.4,\n", + "\n", + " # By default, 'hires-fix' is applied when we use an upscaler; to deactivate it, we can set 'hires steps' to 0\n", + " hires_steps = 0,\n", ")\n", "\n", "for image in images:\n", @@ -286,7 +325,22 @@ { "cell_type": "markdown", "source": [ - "valid `preprocessor_name` depending on the task: HED, HED safe, Midas, MLSD, Openpose, PidiNet, PidiNet safe, NormalBae, Lineart, Lineart coarse, LineartAnime, None (anime), ContentShuffle, DPT or UPerNet" + "Valid `preprocessor_name` depending on the task:\n", + "\n", + "\n", + "| Task name | Preprocessor Name |\n", + "|----------|-------------------|\n", + "| openpose | \"None\" \"Openpose\" |\n", + "|scribble|\"None\" \"HED\" \"Pidinet\"|\n", + "|softedge|\"None\" \"HED\" \"Pidinet\" \"HED safe\" \"Pidinet safe\"|\n", + "|segmentation|\"None\" \"UPerNet\"|\n", + "|depth|\"None\" \"DPT\" \"Midas\"|\n", + "|normalbae|\"None\" \"NormalBae\"|\n", + "|lineart|\"None\" \"Lineart\" \"Lineart coarse\" \"None (anime)\" \"LineartAnime\"|\n", + "|shuffle|\"None\" \"ContentShuffle\"|\n", + "|canny||\n", + "|mlsd||\n", + "|ip2p||\n" ], "metadata": { "id": "_4ETyFwVm4wt" @@ -295,7 +349,7 @@ { "cell_type": "markdown", "source": [ - "# Adetailer" + "## Adetailer" ], "metadata": { "id": "fcuPwmjudAc5" @@ -327,49 +381,42 @@ { "cell_type": "code", "source": [ - "img_width = 512\n", - "img_height = 1024\n", - "num_steps = 30\n", - "CFG = 8.0\n", - "\n", - "adetailer_inpaint_params = {\n", - " \"prompt\": None, # main prompt will be use\n", - " \"negative_prompt\" : None,\n", - " \"strength\" : 0.35, # need low values\n", - " \"num_inference_steps\": num_steps,\n", - " \"width\": img_width,\n", - " \"height\": img_height,\n", - " \"guidance_scale\" : CFG,\n", - "}\n", - "\n", - "adetailer_params = {\n", + "# These are the parameters that adetailer A uses by default, but we can modify them if needed, the same applies to adetailer B.\n", + "adetailer_params_A = {\n", " \"face_detector_ad\" : True,\n", " \"person_detector_ad\" : True,\n", " \"hand_detector_ad\" : False,\n", - " \"inpaint_only\" : adetailer_inpaint_params,\n", + " \"prompt\": \"\", # The main prompt will be used if left empty\n", + " \"negative_prompt\" : \"\",\n", + " \"strength\" : 0.35, # need low values\n", " \"mask_dilation\" : 4,\n", " \"mask_blur\" : 4,\n", " \"mask_padding\" : 32,\n", + " \"inpaint_only\" : True, # better\n", + " \"sampler\" : \"Use same sampler\",\n", "}\n", "\n", "images, image_list = model(\n", " prompt = \"(masterpiece, best quality), 1girl, collarbone, wavy hair, looking at viewer, blurry foreground, upper body, necklace, contemporary, plain pants, ((intricate, print, pattern)), ponytail, freckles, red hair, dappled sunlight, smile, happy,\",\n", " negative_prompt = \"(worst quality, low quality, letterboxed)\",\n", - " img_width = img_width,\n", - " img_height = img_height,\n", + " img_width = 512,\n", + " img_height = 1024,\n", " num_images = 1,\n", - " num_steps = num_steps,\n", - " guidance_scale = CFG,\n", + " num_steps = 30,\n", + " guidance_scale = 8.0,\n", " clip_skip = True,\n", " seed = 33,\n", " sampler=\"DPM++ SDE Karras\",\n", "\n", " FreeU=True, # Improves diffusion model sample quality at no costs.\n", - " adetailer_active=True,\n", - " adetailer_params=adetailer_params,\n", + " adetailer_A=True,\n", + " adetailer_A_params=adetailer_params_A,\n", + "\n", + " adetailer_B=True, # \"If we don't use adetailer_B_params, it will use default values.\n", "\n", - " # upscaler_model_path = upscaler_path,\n", - " # upscaler_increases_size=1.5,\n", + " # By default, the upscaler will be deactivated if we don't pass a model to it.\n", + " # It's also valid to use a url to the model, Lanczos or Nearest.\n", + " #upscaler_model_path = \"Lanczos\",\n", ")\n", "\n", "for image in images:\n", @@ -381,6 +428,406 @@ "execution_count": null, "outputs": [] }, + { + "cell_type": "markdown", + "source": [ + "## Inpaint" + ], + "metadata": { + "id": "aDnuReyiiB7i" + } + }, + { + "cell_type": "code", + "source": [ + "model.load_pipe(\n", + " base_model_id = model_path,\n", + " task_name = \"inpaint\",\n", + ")" + ], + "metadata": { + "id": "9YlKpQ9liDbx" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "We can specify the directory of our mask image, but we can also generate it, which is what we'll do in this example\n", + "\n", + "You need a mouse to draw on this canvas." + ], + "metadata": { + "id": "GF2hjejxjPgp" + } + }, + { + "cell_type": "code", + "source": [ + "images, image_list = model(\n", + " image = control_image,\n", + " # image_mask = \"/mask.png\",\n", + " prompt = \"a blue bird\",\n", + " strength = 0.5,\n", + " negative_prompt = \"(worst quality, low quality, letterboxed)\",\n", + " image_resolution = 768, # The equivalent resolution to be used for inference.\n", + " sampler=\"DPM++ SDE Karras\",\n", + ")\n", + "\n", + "for image in images:\n", + " display(image)" + ], + "metadata": { + "id": "3G0rt5yPiGmu" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "If you're using a device without a mouse or Jupyter Notebook outside of Colab, the function to create a mask automatically won't work correctly. Therefore, you'll need to specify the path of your mask image manually." + ], + "metadata": { + "id": "a3Bci8VHmG5N" + } + }, + { + "cell_type": "markdown", + "source": [ + "# Styles\n", + "These are additions to the prompt and negative prompt to utilize a specific style in generation. By default, there are only 9 of these, and we can know their names by using:" + ], + "metadata": { + "id": "U8mc0S5vmQ7s" + } + }, + { + "cell_type": "code", + "source": [ + "model.STYLE_NAMES" + ], + "metadata": { + "id": "o8bnYMsXm9o2" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "But if we want to use other styles, we can load them through a JSON, like this one for example.\n", + "Here are more JSON style files: [PromptStylers](https://github.com/wolfden/ComfyUi_PromptStylers), [sdxl_prompt_styler](https://github.com/ali1234/sdxl_prompt_styler/tree/main)" + ], + "metadata": { + "id": "gl5IE01_nSTl" + } + }, + { + "cell_type": "code", + "source": [ + "!wget https://raw.githubusercontent.com/ahgsql/StyleSelectorXL/main/sdxl_styles.json" + ], + "metadata": { + "id": "eM3aiE1RjoQN" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "model.load_style_file(\"sdxl_styles.json\")" + ], + "metadata": { + "id": "V-IfivFJnijz" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "The file was loaded with 77 styles replacing the previous ones, now we can see the new names:" + ], + "metadata": { + "id": "FY5LCRDRoWBM" + } + }, + { + "cell_type": "code", + "source": [ + "model.STYLE_NAMES" + ], + "metadata": { + "id": "ldNxLj_ooXxX" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Now we can use the style in the inference." + ], + "metadata": { + "id": "FQkfWmvRow7Q" + } + }, + { + "cell_type": "code", + "source": [ + "# Image to Image task.\n", + "model.load_pipe(\n", + " base_model_id = model_path,\n", + " task_name = \"img2img\",\n", + ")\n", + "\n", + "# We can also use multiple styles in a list [\"Silhouette\", \"Kirigami\"]\n", + "images, image_list = model(\n", + " style_prompt = \"Silhouette\", # The style will be added to the prompt and negative prompt\n", + " image = control_image,\n", + " prompt = \"a bird\",\n", + " negative_prompt = \"worst quality\",\n", + " strength = 0.48,\n", + " image_resolution = 512,\n", + " sampler=\"DPM++ SDE Karras\",\n", + ")\n", + "\n", + "for image in images:\n", + " display(image)" + ], + "metadata": { + "id": "NKYtqaxlo1_f" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "#Verbosity Level\n", + "To change the verbosity level, you can use the logger from StablePy\n" + ], + "metadata": { + "id": "1v2n80ujO6My" + } + }, + { + "cell_type": "code", + "source": [ + "import logging\n", + "from stablepy import logger\n", + "\n", + "logging_level_mapping = {\n", + " 'DEBUG': logging.DEBUG,\n", + " 'INFO': logging.INFO,\n", + " 'WARNING': logging.WARNING,\n", + " 'ERROR': logging.ERROR,\n", + " 'CRITICAL': logging.CRITICAL\n", + "}\n", + "\n", + "Verbosity_Level = \"WARNING\" # Messages INFO and DEBUG will not be printed\n", + "\n", + "logger.setLevel(logging_level_mapping.get(Verbosity_Level, logging.INFO))" + ], + "metadata": { + "id": "97XN4eNfsgWu" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# LCM\n", + "\n", + "Latent Consistency Models (LCM) can generate images in a few steps. When selecting the 'LCM' sampler, the model automatically loads the LCM_LoRA for the task. Generally, guidance_scale is used at 1.0 or a maximum of 2.0, with steps between 4 and 8" + ], + "metadata": { + "id": "6YY8c3T2gcpq" + } + }, + { + "cell_type": "code", + "source": [ + "# Generating an image with txt2img\n", + "model.load_pipe(\n", + " base_model_id = model_path,\n", + " task_name = \"txt2img\",\n", + ")\n", + "images, image_list = model(\n", + " prompt = \"(masterpiece, best quality), 1girl, collarbone, wavy hair, looking at viewer, blurry foreground, upper body, necklace, contemporary, plain pants, ((intricate, print, pattern)), ponytail, freckles, red hair, dappled sunlight, smile, happy,\",\n", + " negative_prompt = \"(worst quality, low quality, letterboxed)\",\n", + " num_images = 1,\n", + " num_steps = 7,\n", + " guidance_scale = 1.0,\n", + " sampler=\"LCM\",\n", + " syntax_weights=\"Classic\", # (word:weight) and (word) for prompts weights\n", + " disable_progress_bar = True,\n", + " save_generated_images = False,\n", + " display_images = True,\n", + ")\n", + "\n", + "# Using the image generated in img2img\n", + "# If we use the same model and VAE, we can switch tasks quickly\n", + "model.load_pipe(\n", + " base_model_id = model_path,\n", + " task_name = \"img2img\",\n", + ")\n", + "images_i2i, image_list = model(\n", + " prompt = \"masterpiece, sunlight\",\n", + " image = images[0], # only one image\n", + " style_prompt = \"Disco\", # Apply a style\n", + " strength = 0.70,\n", + " num_steps = 4,\n", + " guidance_scale = 1.0,\n", + " sampler=\"LCM\",\n", + " disable_progress_bar = True,\n", + " save_generated_images = False,\n", + " display_images = True,\n", + ")" + ], + "metadata": { + "id": "O3TVADzdge3T" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "logger.setLevel(logging.INFO) # return info" + ], + "metadata": { + "id": "vsC_l6lxhj7r" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Inference with SDXL" + ], + "metadata": { + "id": "GY0skyTXntas" + } + }, + { + "cell_type": "markdown", + "source": [ + "If you are using Colab with a T4, you might encounter OOM (Out Of Memory) issues when loading an SDXL safetensor model. However, you can still use it in the following way\n", + "\n", + "```\n", + "from stablepy import Model_Diffusers\n", + "\n", + "model_path = \"./my_sdxl_model.safetensors\"\n", + "\n", + "model = Model_Diffusers(\n", + " base_model_id = model_path,\n", + " task_name = \"txt2img\",\n", + " sdxl_safetensors = True\n", + ")\n", + "```\n", + "\n", + "If you change tasks, you don't need to specify this parameter `sdxl_safetensors = True` unless you change the model.\n", + "```\n", + "second_model_path = \"./second_sdxl_model.safetensors\"\n", + "\n", + "model.load_pipe(\n", + " base_model_id = second_model_path,\n", + " task_name = \"img2img\",\n", + " sdxl_safetensors = True\n", + ")\n", + "```\n", + "\n" + ], + "metadata": { + "id": "9MIl4OjayKL7" + } + }, + { + "cell_type": "markdown", + "source": [ + "Currently, SDXL models in fp16 Diffusers format can be used in Colab with a T4. You only need to specify the repository name to load the model from Hugging Face. You can search for specifically compatible models by looking for [XL FP16 on Hugging Face](https://huggingface.co/models?search=-xl-fp16) ." + ], + "metadata": { + "id": "rsX3n69x1izX" + } + }, + { + "cell_type": "code", + "source": [ + "repo = \"SG161222/RealVisXL_V2.0\"\n", + "\n", + "model.load_pipe(\n", + " base_model_id = repo,\n", + " task_name = \"txt2img\",\n", + ")" + ], + "metadata": { + "id": "Dzpva586tcvT" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "At the moment, SDXL is compatible with the following tasks:\n", + "\n", + "\n", + "* txt2img\n", + "* inpaint\n", + "* img2img\n", + "* sdxl_canny\n", + "* sdxl_sketch\n", + "* sdxl_lineart\n", + "* sdxl_depth-midas\n", + "* sdxl_openpose\n" + ], + "metadata": { + "id": "DBjfbqNZ3hM4" + } + }, + { + "cell_type": "code", + "source": [ + "# Example sdxl_depth-midas\n", + "model.load_pipe(\n", + " base_model_id = repo, # sdxl repo\n", + " task_name = \"sdxl_depth-midas\",\n", + ")\n", + "\n", + "# We can also use multiple styles in a list [\"Silhouette\", \"Kirigami\"]\n", + "images, image_list = model(\n", + " image = control_image,\n", + " prompt = \"a green bird\",\n", + " negative_prompt = \"worst quality\",\n", + "\n", + " # If we want to use the preprocessor\n", + " t2i_adapter_preprocessor = True,\n", + " preprocess_resolution = 1024,\n", + "\n", + " # Relative resolution\n", + " image_resolution = 1024,\n", + "\n", + " sampler=\"DPM++ 2M SDE Lu\", # Specific variant for SDXL. We can also use euler at final with \"DPM++ 2M SDE Ef\"\n", + "\n", + " t2i_adapter_conditioning_scale = 1.0,\n", + " t2i_adapter_conditioning_factor = 1.0,\n", + "\n", + " display_images = True,\n", + ")" + ], + "metadata": { + "id": "PJ3nryKR4f8A" + }, + "execution_count": null, + "outputs": [] + }, { "cell_type": "code", "source": [ @@ -388,7 +835,7 @@ "help(model.__call__)" ], "metadata": { - "id": "iMxDxgJIVoQC" + "id": "3H3SWosLTgpF" }, "execution_count": null, "outputs": []