Port LDM Tutorials (Project-MONAI#1775)

Addresses part of Project-MONAI#1769. ### Description This updates the tutorials for LDM models and copies over the notebooks from the GenerativeModels repo. These notebooks have been checked, the 3D one has its code updated but cell outputs left as-is to save time so may need checking later. The MAISI tutorial was untouched but should be updated as well. ### Checks  - [ ] Avoid including large-size files in the PR. - [ ] Clean up long text outputs from code cells in the notebook. - [ ] For security purposes, please check the contents and remove any sensitive info such as user names and private key. - [ ] Ensure (1) hyperlinks and markdown anchors are working (2) use relative paths for tutorial repo files (3) put figure and graphs in the `./figure` folder - [ ] Notebook runs automatically `./runner.sh -t <path to .ipynb file>` --------- Signed-off-by: Eric Kerfoot <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
dongyang0122 · Aug 13, 2024 · 7d2c225 · 7d2c225
1 parent a9f547c
commit 7d2c225
Show file tree

Hide file tree

Showing 15 changed files with 2,069 additions and 66 deletions.
diff --git a/.gitignore b/.gitignore
@@ -152,3 +152,8 @@ deployment/ray/mednist_classifier_start.py
 3d_segmentation/out
 *.nsys-rep
 auto3dseg/notebooks/datalist.json
+
+*.jpeg
+*.png
+*.np*
+*.pt
diff --git a/generation/2d_ldm/2d_ldm_tutorial.ipynb b/generation/2d_ldm/2d_ldm_tutorial.ipynb
diff --git a/generation/2d_ldm/README.md b/generation/2d_ldm/README.md
@@ -1,5 +1,5 @@
 # 2D Latent Diffusion Example
-This folder contains an example for training and validating a 2D Latent Diffusion Model on Brats axial slices. The example includes support for multi-GPU training with distributed data parallelism.
+This folder contains examples for training and validating a 2D Latent Diffusion Model on MedNIST and Brats axial slice data. The notebook [2d_ldm_tutorial.ipynb](./2d_ldm_tutorial.ipynb) demonstrates these concepts with the MedNIST dataset. The larger example given in Python files and explained here uses Brats and includes support for multi-GPU training with distributed data parallelism.
 
 The workflow of the Latent Diffusion Model is depicted in the figure below. It begins by training an autoencoder in pixel space to encode images into latent features. Following that, it trains a diffusion model in the latent space to denoise the noisy latent features. During inference, it first generates latent features from random noise by applying multiple denoising steps using the trained diffusion model. Finally, it decodes the denoised latent features into images using the trained autoencoder.
 <p align="center">

diff --git a/generation/2d_ldm/config/config_train_16g.json b/generation/2d_ldm/config/config_train_16g.json
@@ -5,12 +5,12 @@
     "latent_channels": 1,
     "sample_axis": 2,
     "autoencoder_def": {
-        "_target_": "generative.networks.nets.AutoencoderKL",
+        "_target_": "monai.networks.nets.AutoencoderKL",
         "spatial_dims": "@spatial_dims",
         "in_channels": "$@image_channels",
         "out_channels": "@image_channels",
         "latent_channels": "@latent_channels",
-        "num_channels": [
+        "channels": [
             64,
             128,
             256
@@ -33,15 +33,15 @@
         "perceptual_weight": 1.0,
         "kl_weight": 1e-6,
         "recon_loss": "l1",
-        "n_epochs": 1000,
+        "max_epochs": 1000,
         "val_interval": 1
     },
     "diffusion_def": {
-        "_target_": "generative.networks.nets.DiffusionModelUNet",
+        "_target_": "monai.networks.nets.DiffusionModelUNet",
         "spatial_dims": "@spatial_dims",
         "in_channels": "@latent_channels",
         "out_channels": "@latent_channels",
-        "num_channels":[32, 64, 128, 256],
+        "channels":[32, 64, 128, 256],
         "attention_levels":[false, true, true, true],
         "num_head_channels":[0, 32, 32, 32],
         "num_res_blocks": 2
@@ -50,7 +50,7 @@
         "batch_size": 50,
         "patch_size": [256,256],
         "lr": 1e-5,
-        "n_epochs": 1500,
+        "max_epochs": 1500,
         "val_interval": 2,
         "lr_scheduler_milestones": [1000]
     },

diff --git a/generation/2d_ldm/config/config_train_32g.json b/generation/2d_ldm/config/config_train_32g.json
@@ -5,12 +5,12 @@
     "latent_channels": 1,
     "sample_axis": 2,
     "autoencoder_def": {
-        "_target_": "generative.networks.nets.AutoencoderKL",
+        "_target_": "monai.networks.nets.AutoencoderKL",
         "spatial_dims": "@spatial_dims",
         "in_channels": "$@image_channels",
         "out_channels": "@image_channels",
         "latent_channels": "@latent_channels",
-        "num_channels": [
+        "channels": [
             64,
             128,
             256
@@ -33,15 +33,15 @@
         "perceptual_weight": 1.0,
         "kl_weight": 1e-6,
         "recon_loss": "l1",
-        "n_epochs": 1000,
+        "max_epochs": 1000,
         "val_interval": 1
     },
     "diffusion_def": {
-        "_target_": "generative.networks.nets.DiffusionModelUNet",
+        "_target_": "monai.networks.nets.DiffusionModelUNet",
         "spatial_dims": "@spatial_dims",
         "in_channels": "@latent_channels",
         "out_channels": "@latent_channels",
-        "num_channels":[32, 64, 128, 256],
+        "channels":[32, 64, 128, 256],
         "attention_levels":[false, true, true, true],
         "num_head_channels":[0, 32, 32, 32],
         "num_res_blocks": 2
@@ -50,7 +50,7 @@
         "batch_size": 80,
         "patch_size": [256,256],
         "lr": 1e-5,
-        "n_epochs": 1500,
+        "max_epochs": 1500,
         "val_interval": 2,
         "lr_scheduler_milestones": [1000]
     },

diff --git a/generation/2d_ldm/inference.py b/generation/2d_ldm/inference.py
@@ -19,8 +19,8 @@
 
 import numpy as np
 import torch
-from generative.inferers import LatentDiffusionInferer
-from generative.networks.schedulers import DDPMScheduler
+from monai.inferers import LatentDiffusionInferer
+from monai.networks.schedulers import DDPMScheduler
 from monai.config import print_config
 from monai.utils import set_determinism
 from PIL import Image

diff --git a/generation/2d_ldm/train_autoencoder.py b/generation/2d_ldm/train_autoencoder.py
@@ -17,8 +17,8 @@
 from pathlib import Path
 
 import torch
-from generative.losses import PatchAdversarialLoss, PerceptualLoss
-from generative.networks.nets import PatchDiscriminator
+from monai.losses import PatchAdversarialLoss, PerceptualLoss
+from monai.networks.nets import PatchDiscriminator
 from monai.config import print_config
 from monai.utils import set_determinism
 from torch.nn import L1Loss, MSELoss
@@ -75,7 +75,7 @@ def main():
     set_determinism(42)
 
     # Step 1: set data loader
-    size_divisible = 2 ** (len(args.autoencoder_def["num_channels"]) - 1)
+    size_divisible = 2 ** (len(args.autoencoder_def["channels"]) - 1)
     train_loader, val_loader = prepare_brats2d_dataloader(
         args,
         args.autoencoder_train["batch_size"],
@@ -95,7 +95,7 @@ def main():
     discriminator = PatchDiscriminator(
         spatial_dims=args.spatial_dims,
         num_layers_d=3,
-        num_channels=32,
+        channels=32,
         in_channels=1,
         out_channels=1,
         norm=discriminator_norm,
@@ -172,12 +172,12 @@ def main():
 
     # Step 4: training
     autoencoder_warm_up_n_epochs = 5
-    n_epochs = args.autoencoder_train["n_epochs"]
+    max_epochs = args.autoencoder_train["max_epochs"]
     val_interval = args.autoencoder_train["val_interval"]
     best_val_recon_epoch_loss = 100.0
     total_step = 0
 
-    for epoch in range(n_epochs):
+    for epoch in range(max_epochs):
         # train
         autoencoder.train()
         discriminator.train()

diff --git a/generation/2d_ldm/train_diffusion.py b/generation/2d_ldm/train_diffusion.py
@@ -18,11 +18,11 @@
 
 import torch
 import torch.nn.functional as F
-from generative.inferers import LatentDiffusionInferer
-from generative.networks.schedulers import DDPMScheduler
+from monai.inferers import LatentDiffusionInferer
+from monai.networks.schedulers import DDPMScheduler
 from monai.config import print_config
 from monai.utils import first, set_determinism
-from torch.cuda.amp import GradScaler, autocast
+from torch.amp import GradScaler, autocast
 from torch.nn.parallel import DistributedDataParallel as DDP
 from torch.utils.tensorboard import SummaryWriter
 from utils import define_instance, prepare_brats2d_dataloader, setup_ddp
@@ -75,7 +75,7 @@ def main():
     set_determinism(42)
 
     # Step 1: set data loader
-    size_divisible = 2 ** (len(args.autoencoder_def["num_channels"]) + len(args.diffusion_def["num_channels"]) - 2)
+    size_divisible = 2 ** (len(args.autoencoder_def["channels"]) + len(args.diffusion_def["channels"]) - 2)
     train_loader, val_loader = prepare_brats2d_dataloader(
         args,
         args.diffusion_train["batch_size"],
@@ -114,7 +114,7 @@ def main():
     # and the results will not differ from those obtained when it is not used._
 
     with torch.no_grad():
-        with autocast(enabled=True):
+        with autocast("cuda", enabled=True):
             check_data = first(train_loader)
             z = autoencoder.encode_stage_2_inputs(check_data["image"].to(device))
             if rank == 0:
@@ -179,14 +179,14 @@ def main():
     )
 
     # Step 4: training
-    n_epochs = args.diffusion_train["n_epochs"]
+    max_epochs = args.diffusion_train["max_epochs"]
     val_interval = args.diffusion_train["val_interval"]
     autoencoder.eval()
     scaler = GradScaler()
     total_step = 0
     best_val_recon_epoch_loss = 100.0
 
-    for epoch in range(start_epoch, n_epochs):
+    for epoch in range(start_epoch, max_epochs):
         unet.train()
         lr_scheduler.step()
         if ddp_bool:
@@ -196,7 +196,7 @@ def main():
             images = batch["image"].to(device)
             optimizer_diff.zero_grad(set_to_none=True)
 
-            with autocast(enabled=True):
+            with autocast("cuda", enabled=True):
                 # Generate random noise
                 noise_shape = [images.shape[0]] + list(z.shape[1:])
                 noise = torch.randn(noise_shape, dtype=images.dtype).to(device)
@@ -239,7 +239,7 @@ def main():
             unet.eval()
             val_recon_epoch_loss = 0
             with torch.no_grad():
-                with autocast(enabled=True):
+                with autocast("cuda", enabled=True):
                     # compute val loss
                     for step, batch in enumerate(val_loader):
                         images = batch["image"].to(device)