From 29e53de3adabef57001bae2b954212666ab5669f Mon Sep 17 00:00:00 2001 From: Roger Condori <114810545+R3gm@users.noreply.github.com> Date: Tue, 24 Dec 2024 15:45:53 -0400 Subject: [PATCH] colab version 0.6.0 --- stablepy_demo.ipynb | 636 ++++++++++++++++++++++++++++++++------------ 1 file changed, 466 insertions(+), 170 deletions(-) diff --git a/stablepy_demo.ipynb b/stablepy_demo.ipynb index b2b8492..7aa9db1 100644 --- a/stablepy_demo.ipynb +++ b/stablepy_demo.ipynb @@ -3,8 +3,8 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", - "id": "view-in-github" + "id": "view-in-github", + "colab_type": "text" }, "source": [ "\"Open" @@ -36,7 +36,7 @@ }, "outputs": [], "source": [ - "!pip install stablepy==0.5.1 -q" + "!pip install stablepy==0.6.0 -q" ] }, { @@ -192,15 +192,54 @@ }, "outputs": [], "source": [ - "images, image_list = model(\n", + "images, img_info = model(\n", " prompt = \"cat, (masterpiece), (best quality)\",\n", - " sampler=\"DPM++ SDE Karras\",\n", + " sampler=\"DPM++ SDE\",\n", + " schedule_type=\"Karras\",\n", " syntax_weights=\"Classic-original\",\n", - ")\n", - "\n", - "images[0]" + ")" ] }, + { + "cell_type": "markdown", + "source": [ + "The output consists of the images generated in a list" + ], + "metadata": { + "id": "A5ZXM34tjfYU" + } + }, + { + "cell_type": "code", + "source": [ + "images[0]" + ], + "metadata": { + "id": "p2-OHc1Dje55" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "The output also includes img_info, which contains details like the **seed**, the **path** where the image was saved, and the **generation metadata**." + ], + "metadata": { + "id": "6k8hSFEIkWzo" + } + }, + { + "cell_type": "code", + "source": [ + "img_info" + ], + "metadata": { + "id": "w707FqK4jMoh" + }, + "execution_count": null, + "outputs": [] + }, { "cell_type": "markdown", "metadata": { @@ -223,13 +262,35 @@ "scheduler_names" ] }, + { + "cell_type": "markdown", + "source": [ + "And for the schedule types:" + ], + "metadata": { + "id": "eKMErUczihf-" + } + }, + { + "cell_type": "code", + "source": [ + "from stablepy import SCHEDULE_TYPE_OPTIONS\n", + "\n", + "SCHEDULE_TYPE_OPTIONS" + ], + "metadata": { + "id": "akYwc8mWie_9" + }, + "execution_count": null, + "outputs": [] + }, { "cell_type": "markdown", "metadata": { "id": "ZStd44a_NRIU" }, "source": [ - "Prompt weight is the syntax and method used to emphasize certain parts of the prompt. If you want to get results similar to other popular implementations, you can use \"Classic-original\" with a SD1.5 model." + "Prompt weight is the syntax and method used to emphasize certain parts of the prompt. If you want to get results similar to other popular implementations, you can use \"Classic-original\"." ] }, { @@ -278,7 +339,8 @@ " guidance_scale = 8.0,\n", " clip_skip = True, # Clip skip to the penultimate layer, in other implementations it is equivalent to use clipskip 2.\n", " seed = -1, # random seed\n", - " sampler=\"DPM++ SDE Karras\",\n", + " sampler=\"DPM++ SDE\",\n", + " schedule_type=\"Karras\",\n", " syntax_weights=\"Classic-original\",\n", "\n", " lora_A = lora1_path,\n", @@ -313,7 +375,7 @@ "id": "V0-kKkqzbLwa" }, "source": [ - "The upscaler_model_path can be used with different ESRGAN models and can also be used with Lanczos, Nearest, or the variants of latent upscaler.\n", + "The upscaler_model_path can be used with different models and can also be used any of the builtin upscalers.\n", "Example: `upscaler_model_path=\"Latent (bicubic)\",`" ] }, @@ -325,9 +387,9 @@ }, "outputs": [], "source": [ - "from stablepy import LATENT_UPSCALERS\n", + "from stablepy import ALL_BUILTIN_UPSCALERS\n", "\n", - "list(LATENT_UPSCALERS)" + "ALL_BUILTIN_UPSCALERS" ] }, { @@ -408,7 +470,7 @@ " image = control_image,\n", " preprocessor_name = \"Canny\", # Needed to activate the Canny preprocessor\n", " preprocess_resolution = 512, # It is the resize of the image that will be obtained from the preprocessor.\n", - " image_resolution = 768, # The equivalent resolution to be used for inference.\n", + " image_resolution = 768, # The max proportional final resolution based on the provided image.\n", " controlnet_conditioning_scale = 1.0, # ControlNet Output Scaling in UNet\n", " control_guidance_start = 0.0, # ControlNet Start Threshold (%)\n", " control_guidance_end= 1.0, # ControlNet Stop Threshold (%)\n", @@ -437,17 +499,18 @@ "|----------|-------------------|\n", "|canny|\"None\" \"Canny\"|\n", "|mlsd|\"None\" \"MLSD\"|\n", - "| openpose | \"None\" \"Openpose\" |\n", - "|scribble|\"None\" \"HED\" \"PidiNet\"|\n", - "|softedge|\"None\" \"HED\" \"PidiNet\" \"HED safe\" \"PidiNet safe\"|\n", - "|segmentation|\"None\" \"UPerNet\"|\n", - "|depth|\"None\" \"DPT\" \"Midas\"|\n", + "| openpose | \"None\" \"Openpose\" \"Openpose core\" |\n", + "|scribble|\"None\" \"HED\" \"PidiNet\" \"TEED\" |\n", + "|softedge|\"None\" \"HED\" \"PidiNet\" \"HED safe\" \"PidiNet safe\" \"TEED\" |\n", + "|segmentation|\"None\" \"UPerNet\" \"SegFormer\"|\n", + "|depth|\"None\" \"DPT\" \"Midas\" \"ZoeDepth\" \"DepthAnything\"|\n", "|normalbae|\"None\" \"NormalBae\"|\n", - "|lineart|\"None\" \"Lineart\" \"Lineart coarse\" \"None (anime)\" \"LineartAnime\"|\n", - "|lineart_anime|\"None\" \"Lineart\" \"Lineart coarse\" \"None (anime)\" \"LineartAnime\"|\n", - "|tile|\"None\" \"Mild Blur\" \"Moderate Blur\" \"Heavy Blur\"|\n", + "|lineart|\"None\" \"Lineart\" \"Lineart coarse\" \"None (anime)\" \"LineartAnime\" \"Lineart standard\" \"Anyline\"|\n", + "|lineart_anime|\"None\" \"Lineart\" \"Lineart coarse\" \"None (anime)\" \"LineartAnime\" \"Lineart standard\" \"Anyline\"|\n", + "|tile|\"None\" \"Blur\"|\n", "|recolor|\"None\" \"Recolor luminance\" \"Recolor intensity\"|\n", "|shuffle|\"None\" \"ContentShuffle\"|\n", + "|repaint|\"None\"|\n", "|pattern|\"None\"|\n", "|ip2p|\"None\"|\n", "\n" @@ -518,7 +581,8 @@ " guidance_scale = 8.0,\n", " clip_skip = True,\n", " seed = 33,\n", - " sampler=\"DPM++ SDE Karras\",\n", + " sampler=\"DPM++ SDE\",\n", + " schedule_type=\"Karras\",\n", "\n", " FreeU=True, # Improves diffusion model sample quality at no costs.\n", " pag_scale=3.0, # PAG enhances image quality and is typically set to 3.0, which can be effective in some cases.\n", @@ -585,7 +649,8 @@ " strength = 0.5,\n", " negative_prompt = \"(worst quality, low quality, letterboxed)\",\n", " image_resolution = 768, # The equivalent resolution to be used for inference.\n", - " sampler=\"DPM++ SDE Karras\",\n", + " sampler=\"DPM++ SDE\",\n", + " schedule_type=\"Karras\",\n", ")\n", "\n", "for image in images:\n", @@ -705,7 +770,8 @@ " negative_prompt = \"worst quality\",\n", " strength = 0.48,\n", " image_resolution = 512,\n", - " sampler=\"DPM++ SDE Karras\",\n", + " sampler=\"DPM++ SDE\",\n", + " schedule_type=\"Karras\",\n", ")\n", "\n", "for image in images:\n", @@ -869,8 +935,10 @@ "model.load_pipe(\n", " base_model_id = model_name,\n", " task_name = \"openpose\",\n", - " retain_task_model_in_cache=False,\n", - ")" + " retain_task_model_in_cache=False, # version 0.6.0 default is False\n", + ")\n", + "\n", + "model.advanced_params(image_preprocessor_cuda_active=True) # Default is False" ] }, { @@ -916,6 +984,148 @@ " display(image)" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "Bjfi-n3ShMzb" + }, + "source": [ + "# Diffusers format" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KjHk_2ZOjVrQ" + }, + "source": [ + "\n", + "You can also load models in the Diffusers format. This format divides the model into different parts, which allows you to load individual sections from various models more easily. For instance, models like SD 1.5 and SDXL can be loaded using the repository name as shown in this example: [RealVisXL_V2.0](https://huggingface.co/SG161222/RealVisXL_V2.0/tree/main). This repository contains folders corresponding to each section of the model such as unet, vae, text encoder, and more.\n", + "\n", + "Another characteristic of the diffusers format is that it can use either the safetensors or bin extension. Currently, you can only use diffuser models in the safetensors extension because they offer better performance and are safer than bin files. To verify if a diffuser model is in safetensors format, check the [unet folder](https://huggingface.co/SG161222/RealVisXL_V2.0/tree/main/unet) and see if it ends with the safetensors extension." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Dzpva586tcvT" + }, + "outputs": [], + "source": [ + "repo = \"SG161222/RealVisXL_V2.0\"\n", + "\n", + "model.load_pipe(\n", + " base_model_id = repo, # path to the model\n", + " task_name = \"canny\", # task\n", + " vae_model = repo, # backed vae\n", + ")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mLzuQDBfny1x" + }, + "source": [ + "The T2I-Adapter depth is similar to that of ControlNet and uses less VRAM" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PJ3nryKR4f8A" + }, + "outputs": [], + "source": [ + "# Example sdxl_depth-midas\n", + "model.load_pipe(\n", + " base_model_id = repo, # sdxl repo\n", + " task_name = \"sdxl_depth-midas_t2i\",\n", + " retain_task_model_in_cache=False,\n", + ")\n", + "\n", + "# We can also use multiple styles in a list [\"Silhouette\", \"Kirigami\"]\n", + "images, image_list = model(\n", + " image = control_image,\n", + " prompt = \"a green bird\",\n", + " negative_prompt = \"worst quality\",\n", + "\n", + " # If we want to use the preprocessor\n", + " t2i_adapter_preprocessor = True,\n", + " preprocess_resolution = 1024,\n", + "\n", + " # Relative resolution\n", + " image_resolution = 1024,\n", + "\n", + " sampler=\"DPM++ 2M SDE\", # We can also use euler at final with \"DPM++ 2M SDE Ef\"\n", + "\n", + " t2i_adapter_conditioning_scale = 1.0,\n", + " t2i_adapter_conditioning_factor = 1.0,\n", + "\n", + " display_images = True,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TTl8AaZ9RNzo" + }, + "source": [ + "# ControlNet pattern" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FqdEZa6BRQSS" + }, + "source": [ + "It is used to generate images with a QR code but can also be used to generate optical patterns." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "j8Yws7siRza9" + }, + "outputs": [], + "source": [ + "spiral_image = download_image(\"https://upload.wikimedia.org/wikipedia/en/6/6c/Screwtop_spiral.jpg\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7qXf9YsvSQmm" + }, + "outputs": [], + "source": [ + "repo = \"SG161222/RealVisXL_V2.0\"\n", + "\n", + "model.load_pipe(\n", + " base_model_id = repo,\n", + " task_name = \"pattern\",\n", + " retain_task_model_in_cache=False,\n", + ")\n", + "\n", + "images, image_list = model(\n", + " image = spiral_image,\n", + " prompt = \"a jungle landscape\",\n", + " negative_prompt = \"worst quality\",\n", + " sampler=\"DPM++ 2M SDE\",\n", + " schedule_type=\"Lambdas\",\n", + " image_resolution = 1024,\n", + ")\n", + "\n", + "for image in images:\n", + " display(image)" + ] + }, { "cell_type": "markdown", "metadata": { @@ -976,6 +1186,8 @@ }, "outputs": [], "source": [ + "model_name = \"./Juggernaut-XL_v9_RunDiffusionPhoto_v2.safetensors\" # SDXL safetensors\n", + "\n", "model.load_pipe(\n", " base_model_id = model_name,\n", " task_name = \"txt2img\",\n", @@ -1081,178 +1293,49 @@ { "cell_type": "markdown", "metadata": { - "id": "Bjfi-n3ShMzb" + "id": "8XbyGTTlDTPc" }, "source": [ - "# Diffusers format" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "KjHk_2ZOjVrQ" - }, - "source": [ - "\n", - "You can also load models in the Diffusers format. This format divides the model into different parts, which allows you to load individual sections from various models more easily. For instance, models like SD 1.5 and SDXL can be loaded using the repository name as shown in this example: [RealVisXL_V2.0](https://huggingface.co/SG161222/RealVisXL_V2.0/tree/main). This repository contains folders corresponding to each section of the model such as unet, vae, text encoder, and more.\n", - "\n", - "Another characteristic of the diffusers format is that it can use either the safetensors or bin extension. Currently, you can only use diffuser models in the safetensors extension because they offer better performance and are safer than bin files. To verify if a diffuser model is in safetensors format, check the [unet folder](https://huggingface.co/SG161222/RealVisXL_V2.0/tree/main/unet) and see if it ends with the safetensors extension." + "# Displaying preview images during generation steps" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "Dzpva586tcvT" + "id": "L1xb0G07DTPe" }, "outputs": [], "source": [ - "repo = \"SG161222/RealVisXL_V2.0\"\n", - "\n", "model.load_pipe(\n", " base_model_id = repo,\n", " task_name = \"txt2img\",\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mLzuQDBfny1x" - }, - "source": [ - "The T2I-Adapter depth is similar to that of ControlNet and uses less VRAM" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "PJ3nryKR4f8A" - }, - "outputs": [], - "source": [ - "# Example sdxl_depth-midas\n", - "model.load_pipe(\n", - " base_model_id = repo, # sdxl repo\n", - " task_name = \"sdxl_depth-midas_t2i\",\n", " retain_task_model_in_cache=False,\n", - ")\n", - "\n", - "# We can also use multiple styles in a list [\"Silhouette\", \"Kirigami\"]\n", - "images, image_list = model(\n", - " image = control_image,\n", - " prompt = \"a green bird\",\n", - " negative_prompt = \"worst quality\",\n", - "\n", - " # If we want to use the preprocessor\n", - " t2i_adapter_preprocessor = True,\n", - " preprocess_resolution = 1024,\n", - "\n", - " # Relative resolution\n", - " image_resolution = 1024,\n", - "\n", - " sampler=\"DPM++ 2M SDE Lu\", # Specific variant for SDXL. We can also use euler at final with \"DPM++ 2M SDE Ef\"\n", - "\n", - " t2i_adapter_conditioning_scale = 1.0,\n", - " t2i_adapter_conditioning_factor = 1.0,\n", - "\n", - " display_images = True,\n", ")" ] }, { "cell_type": "markdown", "metadata": { - "id": "TTl8AaZ9RNzo" + "id": "10DTNoR3DTPe" }, "source": [ - "# ControlNet pattern" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "FqdEZa6BRQSS" - }, - "source": [ - "It is used to generate images with a QR code but can also be used to generate optical patterns." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "j8Yws7siRza9" - }, - "outputs": [], - "source": [ - "spiral_image = download_image(\"https://upload.wikimedia.org/wikipedia/en/6/6c/Screwtop_spiral.jpg\")" + "By setting `image_previews=True`, an iterable generator object for image previews will be created." ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "7qXf9YsvSQmm" + "id": "8TgHk9SxDTPf" }, "outputs": [], - "source": [ - "model.load_pipe(\n", - " base_model_id = repo,\n", - " task_name = \"pattern\",\n", - " retain_task_model_in_cache=False,\n", - ")\n", - "\n", - "images, image_list = model(\n", - " image = spiral_image,\n", - " prompt = \"a jungle landscape\",\n", - " negative_prompt = \"worst quality\",\n", - " sampler=\"DPM++ 2M SDE Lu\",\n", - " image_resolution = 1024,\n", - ")\n", - "\n", - "for image in images:\n", - " display(image)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Displaying preview images during generation steps" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model.load_pipe(\n", - " base_model_id = repo,\n", - " task_name = \"txt2img\",\n", - " retain_task_model_in_cache=False,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "By setting `image_previews=True`, an iterable generator object for image previews will be created." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], "source": [ "stream = model(\n", " prompt = \"a cat\",\n", " negative_prompt = \"worst quality\",\n", - " sampler=\"DPM++ 2M SDE Lu\",\n", + " sampler=\"DPM++ 2M SDE\",\n", + " schedule_type=\"Lambdas\",\n", " img_width = 768,\n", " img_height = 768,\n", " image_previews=True,\n", @@ -1267,7 +1350,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "YqETYZi3DTPf" + }, "source": [ "### Config the stream parameters\n" ] @@ -1275,7 +1360,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "id": "21-BMmjyDTPg" + }, "outputs": [], "source": [ "model.stream_config(\n", @@ -1287,7 +1374,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "TJjHktt4DTPh" + }, "source": [ "- **concurrency**: `int`\n", " - **Default**: 5\n", @@ -1346,6 +1435,213 @@ "help(model.__call__)" ] }, + { + "cell_type": "code", + "source": [ + "help(Model_Diffusers.load_pipe)" + ], + "metadata": { + "id": "OiYJ3TxI3IAw" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Run the upscaling tool" + ], + "metadata": { + "id": "tm4qRiNh51kI" + } + }, + { + "cell_type": "markdown", + "source": [ + "Load the model" + ], + "metadata": { + "id": "_CapLd-oAdiv" + } + }, + { + "cell_type": "code", + "source": [ + "from PIL import Image\n", + "from stablepy import BUILTIN_UPSCALERS, load_upscaler_model\n", + "\n", + "scaler_beta = load_upscaler_model(\n", + " model=\"./RealESRGAN_x4plus.pth\",\n", + " tile=192,\n", + " tile_overlap=8,\n", + " device=\"cuda\",\n", + " half=True\n", + ")" + ], + "metadata": { + "id": "AaVwojmt6CMD" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "upscale" + ], + "metadata": { + "id": "n440GHcLAiN8" + } + }, + { + "cell_type": "code", + "source": [ + "image_path = \"bird.png\"\n", + "image_pil_base = Image.open(image_path)\n", + "image_pil_base = image_pil_base.convert(\"RGB\")\n", + "upscaler_size = 1.3\n", + "\n", + "image_upscaler = scaler_beta.upscale(image_pil_base, upscaler_size)\n", + "\n", + "image_upscaler" + ], + "metadata": { + "id": "e4_uI6b97XNo" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "You can also use any for the built-in upscalers." + ], + "metadata": { + "id": "IKXcMzrX6tn3" + } + }, + { + "cell_type": "code", + "source": [ + "BUILTIN_UPSCALERS" + ], + "metadata": { + "id": "YuXbE1RH6s7K" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "More info usage:" + ], + "metadata": { + "id": "AmI3d70L9myP" + } + }, + { + "cell_type": "code", + "source": [ + "help(load_upscaler_model)" + ], + "metadata": { + "id": "AgrvQt5u9pBK" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Run the preprocessing tool" + ], + "metadata": { + "id": "YzDoSWRg59kc" + } + }, + { + "cell_type": "markdown", + "source": [ + "Load the model and set it on CUDA" + ], + "metadata": { + "id": "R1iMXysqGGBw" + } + }, + { + "cell_type": "code", + "source": [ + "from stablepy import Preprocessor, ALL_PREPROCESSOR_TASKS\n", + "\n", + "preprocessor = Preprocessor()\n", + "\n", + "preprocessor.load(\"Openpose\", use_cuda=True)\n", + "# preprocessor.to(\"cuda\")" + ], + "metadata": { + "id": "k5P3JUkh6DHf" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Inference" + ], + "metadata": { + "id": "bzoC4clhGNnL" + } + }, + { + "cell_type": "code", + "source": [ + "result_image = preprocessor(\n", + " image=\"Model_Posing_On_Typical_Studio_Set.jpg\",\n", + " image_resolution=1024, # The final proportional resolution based on the provided image\n", + " detect_resolution=512, # The resolution at which the detector will perform the inference\n", + ")\n", + "\n", + "result_image" + ], + "metadata": { + "id": "kml7pIZ09-FD" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Valid preprocessor names:" + ], + "metadata": { + "id": "0Xs6g2ijGQcf" + } + }, + { + "cell_type": "code", + "source": [ + "ALL_PREPROCESSOR_TASKS" + ], + "metadata": { + "id": "2gRq-7uQAcD1" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "help(Preprocessor)" + ], + "metadata": { + "id": "_fALLNaa_DwO" + }, + "execution_count": null, + "outputs": [] + }, { "cell_type": "markdown", "metadata": { @@ -1366,8 +1662,8 @@ "accelerator": "GPU", "colab": { "gpuType": "T4", - "include_colab_link": true, - "provenance": [] + "provenance": [], + "include_colab_link": true }, "kernelspec": { "display_name": "Python 3", @@ -1379,4 +1675,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} +} \ No newline at end of file