diff --git a/README.md b/README.md index 5a88179..fffba0e 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ The goal of this project is to make Stable Diffusion more accessible, simple and **Installation:** ``` -pip install stablepy==0.4.0 +pip install stablepy==0.4.1 ``` **Usage:** diff --git a/pyproject.toml b/pyproject.toml index d8b243c..e3411c9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "stablepy" -version = "0.4.0" +version = "0.4.1" description = "A tool for easy use of stable diffusion" authors = ["Roger Condori(R3gm) "] readme = "README.md" diff --git a/stablepy/__version__.py b/stablepy/__version__.py index 6a9beea..3d26edf 100644 --- a/stablepy/__version__.py +++ b/stablepy/__version__.py @@ -1 +1 @@ -__version__ = "0.4.0" +__version__ = "0.4.1" diff --git a/stablepy/diffusers_vanilla/model.py b/stablepy/diffusers_vanilla/model.py index e316163..e0f306e 100644 --- a/stablepy/diffusers_vanilla/model.py +++ b/stablepy/diffusers_vanilla/model.py @@ -1959,6 +1959,8 @@ def __call__( if self.task_name == "inpaint": pipe_params_config["strength"] = strength pipe_params_config["mask_image"] = control_mask + pipe_params_config["height"] = control_image.size[1] + pipe_params_config["width"] = control_image.size[0] elif self.task_name not in ["txt2img", "inpaint", "img2img"]: if "t2i" not in self.task_name: pipe_params_config[ diff --git a/stablepy_demo.ipynb b/stablepy_demo.ipynb index 6f3008e..5077816 100644 --- a/stablepy_demo.ipynb +++ b/stablepy_demo.ipynb @@ -1,28 +1,10 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [], - "gpuType": "T4", - "authorship_tag": "ABX9TyNrQ2PmvGY+vHcRVb/Bd+U5", - "include_colab_link": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - }, - "accelerator": "GPU" - }, "cells": [ { "cell_type": "markdown", "metadata": { - "id": "view-in-github", - "colab_type": "text" + "colab_type": "text", + "id": "view-in-github" }, "source": [ "\"Open" @@ -30,21 +12,21 @@ }, { "cell_type": "markdown", - "source": [ - "# Stablepy" - ], "metadata": { "id": "BQeX1ykNNMla" - } + }, + "source": [ + "# Stablepy" + ] }, { "cell_type": "markdown", - "source": [ - "Install dependencies" - ], "metadata": { "id": "TYqyA785NZF8" - } + }, + "source": [ + "Install dependencies" + ] }, { "cell_type": "code", @@ -54,31 +36,36 @@ }, "outputs": [], "source": [ - "!pip install stablepy==0.4.0 -q" + "!pip install stablepy==0.4.1 -q" ] }, { "cell_type": "markdown", + "metadata": { + "id": "7YIpscy8sjgs" + }, "source": [ "To use the version with the latest changes, you can install directly from the repository.\n", "\n", "`pip install -q git+https://github.com/R3gm/stablepy.git`" - ], - "metadata": { - "id": "7YIpscy8sjgs" - } + ] }, { "cell_type": "markdown", - "source": [ - "Download our models and other stuffs" - ], "metadata": { "id": "KdNshU7kNbLj" - } + }, + "source": [ + "Download our models and other stuffs" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GtTdza7SNexT" + }, + "outputs": [], "source": [ "%cd /content/\n", "\n", @@ -99,35 +86,35 @@ "\n", "# Upscaler\n", "!wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth" - ], - "metadata": { - "id": "GtTdza7SNexT" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "# Inference with Stable diffusion 1.5" - ], "metadata": { "id": "amYJfvMwOKnL" - } + }, + "source": [ + "# Inference with Stable diffusion 1.5" + ] }, { "cell_type": "markdown", + "metadata": { + "id": "PvTT0AiKRX0m" + }, "source": [ "First, we pass the path of the model we will use.\n", "\n", "The default task is txt2img but it can be changed to other tasks like canny" - ], - "metadata": { - "id": "PvTT0AiKRX0m" - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9hvZngGDOhh5" + }, + "outputs": [], "source": [ "from stablepy import Model_Diffusers\n", "import torch\n", @@ -140,70 +127,70 @@ " task_name = \"canny\", # task\n", " vae_model = vae_path, # path vae\n", ")" - ], - "metadata": { - "id": "9hvZngGDOhh5" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "You can see the different tasks that can be used with sd1.5 with the following code:" - ], "metadata": { "id": "HFJUWOzmMElv" - } + }, + "source": [ + "You can see the different tasks that can be used with sd1.5 with the following code:" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "i8WGUSPN05RD" + }, + "outputs": [], "source": [ "from stablepy import SD15_TASKS\n", "\n", "SD15_TASKS" - ], - "metadata": { - "id": "i8WGUSPN05RD" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "To switch tasks or models, we can call `model.load_pipe()` and specify the new task or model. This will load the necessary components." - ], "metadata": { "id": "CQNbptvYSs4z" - } + }, + "source": [ + "To switch tasks or models, we can call `model.load_pipe()` and specify the new task or model. This will load the necessary components." + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jnO6p80qPRS9" + }, + "outputs": [], "source": [ "model.load_pipe(\n", " base_model_id = model_path, # path to the model\n", " task_name = \"txt2img\", # task\n", " vae_model = None, # Use default VAE\n", ")" - ], - "metadata": { - "id": "jnO6p80qPRS9" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "Simple generation using a sampler and a specified prompt weight." - ], "metadata": { "id": "hNdnzYZXMZwO" - } + }, + "source": [ + "Simple generation using a sampler and a specified prompt weight." + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FCYd5Ya5z1Ic" + }, + "outputs": [], "source": [ "images, image_list = model(\n", " prompt = \"cat, (masterpiece), (best quality)\",\n", @@ -212,68 +199,68 @@ ")\n", "\n", "images[0]" - ], - "metadata": { - "id": "FCYd5Ya5z1Ic" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "The different samplers that can be used can be checked in the following way:" - ], "metadata": { "id": "scqyCMnoNJ1Q" - } + }, + "source": [ + "The different samplers that can be used can be checked in the following way:" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZJz5pVDx1-UJ" + }, + "outputs": [], "source": [ "from stablepy import scheduler_names\n", "\n", "scheduler_names" - ], - "metadata": { - "id": "ZJz5pVDx1-UJ" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "Prompt weight is the syntax and method used to emphasize certain parts of the prompt. If you want to get results similar to other popular implementations, you can use \"Classic-original\" with a SD1.5 model." - ], "metadata": { "id": "ZStd44a_NRIU" - } + }, + "source": [ + "Prompt weight is the syntax and method used to emphasize certain parts of the prompt. If you want to get results similar to other popular implementations, you can use \"Classic-original\" with a SD1.5 model." + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4oesBsst1u7i" + }, + "outputs": [], "source": [ "from stablepy import ALL_PROMPT_WEIGHT_OPTIONS\n", "\n", "ALL_PROMPT_WEIGHT_OPTIONS" - ], - "metadata": { - "id": "4oesBsst1u7i" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "We will use a basic txt2img task in which we can specify different common parameters, such as Loras, embeddings, upscaler, etc." - ], "metadata": { "id": "fqmU2o0fTUvZ" - } + }, + "source": [ + "We will use a basic txt2img task in which we can specify different common parameters, such as Loras, embeddings, upscaler, etc." + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zVrtRLuFPSiQ" + }, + "outputs": [], "source": [ "from IPython.display import display\n", "\n", @@ -318,48 +305,48 @@ "\n", "for image in images:\n", " display(image)" - ], - "metadata": { - "id": "zVrtRLuFPSiQ" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "## ControlNet" - ], "metadata": { "id": "zSYYzJ7FXO2d" - } + }, + "source": [ + "## ControlNet" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4BsMsk0SXjtN" + }, + "outputs": [], "source": [ "model.load_pipe(\n", " base_model_id = model_path,\n", " task_name = \"canny\",\n", " # Use default VAE\n", ")" - ], - "metadata": { - "id": "4BsMsk0SXjtN" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "Our control image will be this one, to which the processor will apply Canny, and then use ControlNet to generate the final image." - ], "metadata": { "id": "PF1PCOyKXvSx" - } + }, + "source": [ + "Our control image will be this one, to which the processor will apply Canny, and then use ControlNet to generate the final image." + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ia5fu84QW1MM" + }, + "outputs": [], "source": [ "from PIL import Image\n", "import os\n", @@ -373,24 +360,24 @@ " return filename # return the path\n", "\n", "control_image = download_image(\"https://huggingface.co/lllyasviel/sd-controlnet-canny/resolve/main/images/bird.png\")" - ], - "metadata": { - "id": "ia5fu84QW1MM" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "Inference with canny" - ], "metadata": { "id": "3AjvhCa3dsBn" - } + }, + "source": [ + "Inference with canny" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NefvFTvCWL8v" + }, + "outputs": [], "source": [ "images, image_list = model(\n", " prompt = \"(masterpiece, best quality), bird\",\n", @@ -412,15 +399,13 @@ "\n", "for image in images:\n", " display(image)" - ], - "metadata": { - "id": "NefvFTvCWL8v" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", + "metadata": { + "id": "_4ETyFwVm4wt" + }, "source": [ "Valid `preprocessor_name` depending on the task:\n", "\n", @@ -439,45 +424,47 @@ "|shuffle|\"None\" \"ContentShuffle\"|\n", "|ip2p||\n", "|pattern||\n" - ], - "metadata": { - "id": "_4ETyFwVm4wt" - } + ] }, { "cell_type": "markdown", - "source": [ - "## Adetailer" - ], "metadata": { "id": "fcuPwmjudAc5" - } + }, + "source": [ + "## Adetailer" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "o_0OE-PaPGOl" + }, + "outputs": [], "source": [ "model.load_pipe(\n", " base_model_id = model_path,\n", " task_name = \"txt2img\",\n", ")" - ], - "metadata": { - "id": "o_0OE-PaPGOl" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "There must be a match of parameters for good results to be obtained with adetailer, it is also useful to use `strength` in adetailer_inpaint_params with low values ​​below 0.4." - ], "metadata": { "id": "RxBK7tcLdENa" - } + }, + "source": [ + "There must be a match of parameters for good results to be obtained with adetailer, it is also useful to use `strength` in adetailer_inpaint_params with low values ​​below 0.4." + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "F_b2jCSBcVBH" + }, + "outputs": [], "source": [ "# These are the parameters that adetailer A uses by default, but we can modify them if needed, the same applies to adetailer B.\n", "adetailer_params_A = {\n", @@ -519,49 +506,49 @@ "\n", "for image in images:\n", " display(image)" - ], - "metadata": { - "id": "F_b2jCSBcVBH" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "## Inpaint" - ], "metadata": { "id": "aDnuReyiiB7i" - } + }, + "source": [ + "## Inpaint" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9YlKpQ9liDbx" + }, + "outputs": [], "source": [ "model.load_pipe(\n", " base_model_id = model_path,\n", " task_name = \"inpaint\",\n", ")" - ], - "metadata": { - "id": "9YlKpQ9liDbx" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", + "metadata": { + "id": "GF2hjejxjPgp" + }, "source": [ "We can specify the directory of our mask image, but we can also generate it, which is what we'll do in this example\n", "\n", "You need a mouse to draw on this canvas." - ], - "metadata": { - "id": "GF2hjejxjPgp" - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "TTnxYmsGJYj9" + }, + "outputs": [], "source": [ "images, image_list = model(\n", " image = control_image,\n", @@ -575,106 +562,106 @@ "\n", "for image in images:\n", " display(image)" - ], - "metadata": { - "id": "TTnxYmsGJYj9" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "If you're using a device without a mouse or Jupyter Notebook outside of Colab, the function to create a mask automatically won't work correctly. Therefore, you'll need to specify the path of your mask image manually." - ], "metadata": { "id": "a3Bci8VHmG5N" - } + }, + "source": [ + "If you're using a device without a mouse or Jupyter Notebook outside of Colab, the function to create a mask automatically won't work correctly. Therefore, you'll need to specify the path of your mask image manually." + ] }, { "cell_type": "markdown", + "metadata": { + "id": "U8mc0S5vmQ7s" + }, "source": [ "# Styles\n", "These are additions to the prompt and negative prompt to utilize a specific style in generation. By default, there are only 9 of these, and we can know their names by using:" - ], - "metadata": { - "id": "U8mc0S5vmQ7s" - } + ] }, { "cell_type": "code", - "source": [ - "model.STYLE_NAMES" - ], + "execution_count": null, "metadata": { "id": "o8bnYMsXm9o2" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "model.STYLE_NAMES" + ] }, { "cell_type": "markdown", + "metadata": { + "id": "gl5IE01_nSTl" + }, "source": [ "But if we want to use other styles, we can load them through a JSON, like this one for example.\n", "Here are more JSON style files: [PromptStylers](https://github.com/wolfden/ComfyUi_PromptStylers), [sdxl_prompt_styler](https://github.com/ali1234/sdxl_prompt_styler/tree/main)" - ], - "metadata": { - "id": "gl5IE01_nSTl" - } + ] }, { "cell_type": "code", - "source": [ - "!wget https://raw.githubusercontent.com/ahgsql/StyleSelectorXL/main/sdxl_styles.json" - ], + "execution_count": null, "metadata": { "id": "eM3aiE1RjoQN" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "!wget https://raw.githubusercontent.com/ahgsql/StyleSelectorXL/main/sdxl_styles.json" + ] }, { "cell_type": "code", - "source": [ - "model.load_style_file(\"sdxl_styles.json\")" - ], + "execution_count": null, "metadata": { "id": "V-IfivFJnijz" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "model.load_style_file(\"sdxl_styles.json\")" + ] }, { "cell_type": "markdown", - "source": [ - "The file was loaded with 77 styles replacing the previous ones, now we can see the new names:" - ], "metadata": { "id": "FY5LCRDRoWBM" - } + }, + "source": [ + "The file was loaded with 77 styles replacing the previous ones, now we can see the new names:" + ] }, { "cell_type": "code", - "source": [ - "model.STYLE_NAMES" - ], + "execution_count": null, "metadata": { "id": "ldNxLj_ooXxX" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "model.STYLE_NAMES" + ] }, { "cell_type": "markdown", - "source": [ - "Now we can use the style in the inference." - ], "metadata": { "id": "FQkfWmvRow7Q" - } + }, + "source": [ + "Now we can use the style in the inference." + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NKYtqaxlo1_f" + }, + "outputs": [], "source": [ "# Image to Image task.\n", "model.load_pipe(\n", @@ -695,25 +682,25 @@ "\n", "for image in images:\n", " display(image)" - ], - "metadata": { - "id": "NKYtqaxlo1_f" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", + "metadata": { + "id": "1v2n80ujO6My" + }, "source": [ "#Verbosity Level\n", "To change the verbosity level, you can use the logger from StablePy\n" - ], - "metadata": { - "id": "1v2n80ujO6My" - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "97XN4eNfsgWu" + }, + "outputs": [], "source": [ "import logging\n", "from stablepy import logger\n", @@ -729,27 +716,27 @@ "Verbosity_Level = \"WARNING\" # Messages INFO and DEBUG will not be printed\n", "\n", "logger.setLevel(logging_level_mapping.get(Verbosity_Level, logging.INFO))" - ], - "metadata": { - "id": "97XN4eNfsgWu" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", + "metadata": { + "id": "6YY8c3T2gcpq" + }, "source": [ "# LCM and TCD\n", "\n", "Latent Consistency Models (LCM) can generate images in a few steps. When selecting the 'LCM Auto-Loader' or 'TCD Auto-Loader' sampler, the model automatically loads the LCM_LoRA for the task. Generally, guidance_scale is used at 1.0 or a maximum of 2.0, with steps between 4 and 8.\n", "\n" - ], - "metadata": { - "id": "6YY8c3T2gcpq" - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "O3TVADzdge3T" + }, + "outputs": [], "source": [ "# Generating an image with txt2img\n", "model.load_pipe(\n", @@ -787,67 +774,67 @@ " save_generated_images = False,\n", " display_images = True,\n", ")" - ], - "metadata": { - "id": "O3TVADzdge3T" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", - "source": [ - "logger.setLevel(logging.INFO) # return info" - ], + "execution_count": null, "metadata": { "id": "vsC_l6lxhj7r" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "logger.setLevel(logging.INFO) # return info" + ] }, { "cell_type": "markdown", - "source": [ - "# Inference with SDXL" - ], "metadata": { "id": "GY0skyTXntas" - } + }, + "source": [ + "# Inference with SDXL" + ] }, { "cell_type": "markdown", - "source": [ - "The tasks that can be used with SDXL:" - ], "metadata": { "id": "ue9h7FKGWc1j" - } + }, + "source": [ + "The tasks that can be used with SDXL:" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "iXAiSLXx94-3" + }, + "outputs": [], "source": [ "from stablepy import SDXL_TASKS\n", "\n", "SDXL_TASKS" - ], - "metadata": { - "id": "iXAiSLXx94-3" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", + "metadata": { + "id": "zsqajNfYX44N" + }, "source": [ "\n", "When switching between different tasks, you may encounter an out-of-memory (OOM) issue if there isn't enough GPU memory available, particularly with SDXL. To avoid this, you can set `retain_task_model_in_memory=False` in `model.load_pipe` or `Model_Diffusers` to save some VRAM. However, in many cases, this may not be sufficient, and you may need to restart the kernel or runtime in Colab to resolve the issue." - ], - "metadata": { - "id": "zsqajNfYX44N" - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "H5L6K3dr9c3y" + }, + "outputs": [], "source": [ "model_name = \"./Juggernaut-XL_v9_RunDiffusionPhoto_v2.safetensors\" # SDXL safetensors\n", "\n", @@ -856,35 +843,35 @@ " task_name = \"openpose\",\n", " retain_task_model_in_cache=False,\n", ")" - ], - "metadata": { - "id": "H5L6K3dr9c3y" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "We will perform OpenPose with the following image using SDXL." - ], "metadata": { "id": "SUAHnEhfZIkv" - } + }, + "source": [ + "We will perform OpenPose with the following image using SDXL." + ] }, { "cell_type": "code", - "source": [ - "control_image_2 = download_image(\"https://upload.wikimedia.org/wikipedia/commons/f/f8/Model_Posing_On_Typical_Studio_Set.jpg\")" - ], + "execution_count": null, "metadata": { "id": "G7OxI1mWqDwL" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "control_image_2 = download_image(\"https://upload.wikimedia.org/wikipedia/commons/f/f8/Model_Posing_On_Typical_Studio_Set.jpg\")" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3eKyULPjCJuG" + }, + "outputs": [], "source": [ "images, image_list = model(\n", " image = control_image_2,\n", @@ -899,82 +886,82 @@ "\n", "for image in images:\n", " display(image)" - ], - "metadata": { - "id": "3eKyULPjCJuG" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "# IP Adapter" - ], "metadata": { "id": "3UuGqmo9gwaH" - } + }, + "source": [ + "# IP Adapter" + ] }, { "cell_type": "markdown", - "source": [ - "IP-Adapter enhances diffusion models by adding a dedicated image cross-attention layer for better image-specific feature learning and adaptability." - ], "metadata": { "id": "MJoMRPFAbUMw" - } + }, + "source": [ + "IP-Adapter enhances diffusion models by adding a dedicated image cross-attention layer for better image-specific feature learning and adaptability." + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5vaBv_1NgyYp" + }, + "outputs": [], "source": [ "from stablepy import IP_ADAPTERS_SD, IP_ADAPTERS_SDXL\n", "\n", "IP_ADAPTERS_SDXL" - ], - "metadata": { - "id": "5vaBv_1NgyYp" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", + "metadata": { + "id": "kROso95NcMCl" + }, "source": [ "\n", "You can specify a list of different IP adapter models with their respective images, but combinations between normal IP adapters and FaceID adapters are not enabled. Additionally, base_vit_G models cannot be combined with other models because they use a different image encoder. This image encoder is necessary for most IP adapter models and will occupy additional space in VRAM.\n" - ], - "metadata": { - "id": "kROso95NcMCl" - } + ] }, { "cell_type": "code", - "source": [ - "img_ip = download_image(\"https://upload.wikimedia.org/wikipedia/commons/3/3f/TechCrunch_Disrupt_2019_%2848834434641%29_%28cropped%29.jpg\")" - ], + "execution_count": null, "metadata": { "id": "aKtEMwB4nC0g" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "img_ip = download_image(\"https://upload.wikimedia.org/wikipedia/commons/3/3f/TechCrunch_Disrupt_2019_%2848834434641%29_%28cropped%29.jpg\")" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Lfnxb92TlncN" + }, + "outputs": [], "source": [ "model.load_pipe(\n", " base_model_id = model_name,\n", " task_name = \"txt2img\",\n", " retain_task_model_in_cache=False,\n", ")" - ], - "metadata": { - "id": "Lfnxb92TlncN" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vTsErl_3iNU2" + }, + "outputs": [], "source": [ "images, image_list = model(\n", " prompt = \"a man with a pink jacket in the jungle\",\n", @@ -995,25 +982,25 @@ "\n", " display_images = True,\n", ")" - ], - "metadata": { - "id": "vTsErl_3iNU2" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", + "metadata": { + "id": "oWJGI6IdtxW1" + }, "source": [ "#### Multi IP-Adapter\n", "\n" - ], - "metadata": { - "id": "oWJGI6IdtxW1" - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XCVO3eEutxHS" + }, + "outputs": [], "source": [ "images, image_list = model(\n", " prompt = \"a man with a pink jacket in the jungle\",\n", @@ -1034,45 +1021,45 @@ "\n", " display_images = True,\n", ")" - ], - "metadata": { - "id": "XCVO3eEutxHS" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "`ip_adapter_mode` specifies which layers are active in the IP adapter model, certain layers have specific influence on how features are extracted from the IP image, the valid options for this are \"original\", \"style\", \"layout\" and \"style+layout\"" - ], "metadata": { "id": "dF8D0zej2xOc" - } + }, + "source": [ + "`ip_adapter_mode` specifies which layers are active in the IP adapter model, certain layers have specific influence on how features are extracted from the IP image, the valid options for this are \"original\", \"style\", \"layout\" and \"style+layout\"" + ] }, { "cell_type": "markdown", - "source": [ - "# Diffusers format" - ], "metadata": { "id": "Bjfi-n3ShMzb" - } + }, + "source": [ + "# Diffusers format" + ] }, { "cell_type": "markdown", + "metadata": { + "id": "KjHk_2ZOjVrQ" + }, "source": [ "\n", "You can also load models in the Diffusers format. This format divides the model into different parts, which allows you to load individual sections from various models more easily. For instance, models like SD 1.5 and SDXL can be loaded using the repository name as shown in this example: [RealVisXL_V2.0](https://huggingface.co/SG161222/RealVisXL_V2.0/tree/main). This repository contains folders corresponding to each section of the model such as unet, vae, text encoder, and more.\n", "\n", "Another characteristic of the diffusers format is that it can use either the safetensors or bin extension. Currently, you can only use diffuser models in the safetensors extension because they offer better performance and are safer than bin files. To verify if a diffuser model is in safetensors format, check the [unet folder](https://huggingface.co/SG161222/RealVisXL_V2.0/tree/main/unet) and see if it ends with the safetensors extension." - ], - "metadata": { - "id": "KjHk_2ZOjVrQ" - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Dzpva586tcvT" + }, + "outputs": [], "source": [ "repo = \"SG161222/RealVisXL_V2.0\"\n", "\n", @@ -1080,24 +1067,24 @@ " base_model_id = repo,\n", " task_name = \"txt2img\",\n", ")" - ], - "metadata": { - "id": "Dzpva586tcvT" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "The T2I-Adapter depth is similar to that of ControlNet and uses less VRAM" - ], "metadata": { "id": "mLzuQDBfny1x" - } + }, + "source": [ + "The T2I-Adapter depth is similar to that of ControlNet and uses less VRAM" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PJ3nryKR4f8A" + }, + "outputs": [], "source": [ "# Example sdxl_depth-midas\n", "model.load_pipe(\n", @@ -1126,44 +1113,44 @@ "\n", " display_images = True,\n", ")" - ], - "metadata": { - "id": "PJ3nryKR4f8A" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "# ControlNet pattern" - ], "metadata": { "id": "TTl8AaZ9RNzo" - } + }, + "source": [ + "# ControlNet pattern" + ] }, { "cell_type": "markdown", - "source": [ - "It is used to generate images with a QR code but can also be used to generate optical patterns." - ], "metadata": { "id": "FqdEZa6BRQSS" - } + }, + "source": [ + "It is used to generate images with a QR code but can also be used to generate optical patterns." + ] }, { "cell_type": "code", - "source": [ - "spiral_image = download_image(\"https://upload.wikimedia.org/wikipedia/en/6/6c/Screwtop_spiral.jpg\")" - ], + "execution_count": null, "metadata": { "id": "j8Yws7siRza9" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "spiral_image = download_image(\"https://upload.wikimedia.org/wikipedia/en/6/6c/Screwtop_spiral.jpg\")" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7qXf9YsvSQmm" + }, + "outputs": [], "source": [ "model.load_pipe(\n", " base_model_id = repo,\n", @@ -1180,59 +1167,57 @@ "\n", "for image in images:\n", " display(image)" - ], - "metadata": { - "id": "7qXf9YsvSQmm" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "# Utils" - ], "metadata": { "id": "Nlyj1jqro3lR" - } + }, + "source": [ + "# Utils" + ] }, { "cell_type": "code", - "source": [ - "# Load beta styles\n", - "model.load_beta_styles()" - ], + "execution_count": null, "metadata": { "id": "OISDbhbljYNu" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "# Load beta styles\n", + "model.load_beta_styles()" + ] }, { "cell_type": "code", - "source": [ - "model.STYLE_NAMES" - ], + "execution_count": null, "metadata": { "id": "C_tzObXyxGic" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "model.STYLE_NAMES" + ] }, { "cell_type": "code", - "source": [ - "# For more details about the parameters\n", - "help(model.__call__)" - ], + "execution_count": null, "metadata": { "id": "3H3SWosLTgpF" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "# For more details about the parameters\n", + "help(model.__call__)" + ] }, { "cell_type": "markdown", + "metadata": { + "id": "JwtC4KLm3bte" + }, "source": [ "# Common issues and potential solutions:\n", "\n", @@ -1241,10 +1226,25 @@ "- **Distorted or very strange images**: This usually occurs due to prompt weight. In this implementation, the emphasis level set using Compel or Classic is particularly sensitive. It's best to use low prompt weights. Similarly, for LoRAs, it's recommended to use low scales. Also, using Classic variants like Classic-original can help; It has a normalization method to avoid extreme peaks that can greatly distort the outcome.\n", "\n", "- **Pony Diffusion not producing good images**: Compatibility with the model can be tricky. However, you can try using sampler DPM++ 1s or DPM2 with Compel or Classic prompt weights to improve results.\n" - ], - "metadata": { - "id": "JwtC4KLm3bte" - } + ] } - ] -} \ No newline at end of file + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "authorship_tag": "ABX9TyNrQ2PmvGY+vHcRVb/Bd+U5", + "gpuType": "T4", + "include_colab_link": true, + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}