From e544fe0425266aee198f255a326e32248acf7b41 Mon Sep 17 00:00:00 2001 From: Lukas Kuhn Date: Mon, 13 Nov 2023 20:09:39 +0100 Subject: [PATCH 1/6] feat: added tracemalloc arg to train_dreambooth --- examples/lora_dreambooth/train_dreambooth.py | 40 ++++++++++++-------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/examples/lora_dreambooth/train_dreambooth.py b/examples/lora_dreambooth/train_dreambooth.py index 3e350b0313..eefcffbf5e 100644 --- a/examples/lora_dreambooth/train_dreambooth.py +++ b/examples/lora_dreambooth/train_dreambooth.py @@ -9,6 +9,7 @@ import warnings from pathlib import Path from typing import Optional +from contextlib import nullcontext import datasets import diffusers @@ -217,6 +218,13 @@ def parse_args(input_args=None): "--num_dataloader_workers", type=int, default=1, help="Num of workers for the training dataloader." ) + parser.add_argument( + "--trace_memory_allocation", + default=True, + action="store_true", + help="Flag to track memory allocation during training.", + ) + parser.add_argument( "--train_batch_size", type=int, default=4, help="Batch size (per device) for the training dataloader." ) @@ -897,7 +905,7 @@ def main(args): unet.train() if args.train_text_encoder: text_encoder.train() - with TorchTracemalloc() as tracemalloc: + with TorchTracemalloc() if args.trace_memory_allocation else nullcontext() as tracemalloc: for step, batch in enumerate(train_dataloader): # Skip steps until we reach the resumed step if args.resume_from_checkpoint and epoch == first_epoch and step < resume_step: @@ -1038,23 +1046,25 @@ def main(args): if global_step >= args.max_train_steps: break # Printing the GPU memory usage details such as allocated memory, peak memory, and total memory usage - accelerator.print("GPU Memory before entering the train : {}".format(b2mb(tracemalloc.begin))) - accelerator.print("GPU Memory consumed at the end of the train (end-begin): {}".format(tracemalloc.used)) - accelerator.print("GPU Peak Memory consumed during the train (max-begin): {}".format(tracemalloc.peaked)) - accelerator.print( - "GPU Total Peak Memory consumed during the train (max): {}".format( - tracemalloc.peaked + b2mb(tracemalloc.begin) + + if args.trace_memory_allocation: + accelerator.print("GPU Memory before entering the train : {}".format(b2mb(tracemalloc.begin))) + accelerator.print("GPU Memory consumed at the end of the train (end-begin): {}".format(tracemalloc.used)) + accelerator.print("GPU Peak Memory consumed during the train (max-begin): {}".format(tracemalloc.peaked)) + accelerator.print( + "GPU Total Peak Memory consumed during the train (max): {}".format( + tracemalloc.peaked + b2mb(tracemalloc.begin) + ) ) - ) - accelerator.print("CPU Memory before entering the train : {}".format(b2mb(tracemalloc.cpu_begin))) - accelerator.print("CPU Memory consumed at the end of the train (end-begin): {}".format(tracemalloc.cpu_used)) - accelerator.print("CPU Peak Memory consumed during the train (max-begin): {}".format(tracemalloc.cpu_peaked)) - accelerator.print( - "CPU Total Peak Memory consumed during the train (max): {}".format( - tracemalloc.cpu_peaked + b2mb(tracemalloc.cpu_begin) + accelerator.print("CPU Memory before entering the train : {}".format(b2mb(tracemalloc.cpu_begin))) + accelerator.print("CPU Memory consumed at the end of the train (end-begin): {}".format(tracemalloc.cpu_used)) + accelerator.print("CPU Peak Memory consumed during the train (max-begin): {}".format(tracemalloc.cpu_peaked)) + accelerator.print( + "CPU Total Peak Memory consumed during the train (max): {}".format( + tracemalloc.cpu_peaked + b2mb(tracemalloc.cpu_begin) + ) ) - ) # Create the pipeline using using the trained modules and save it. accelerator.wait_for_everyone() From abd01deb8f9596fc40c5ed8a482cac6537cb0215 Mon Sep 17 00:00:00 2001 From: Lukas Kuhn Date: Mon, 13 Nov 2023 20:13:21 +0100 Subject: [PATCH 2/6] fix: added help for arg --- examples/lora_dreambooth/train_dreambooth.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/lora_dreambooth/train_dreambooth.py b/examples/lora_dreambooth/train_dreambooth.py index eefcffbf5e..be092225d8 100644 --- a/examples/lora_dreambooth/train_dreambooth.py +++ b/examples/lora_dreambooth/train_dreambooth.py @@ -222,7 +222,7 @@ def parse_args(input_args=None): "--trace_memory_allocation", default=True, action="store_true", - help="Flag to track memory allocation during training.", + help="Flag to track memory allocation during training. This could slow down training on Windows.", ) parser.add_argument( From 3087dfa1cfcdddeac71b86410ecde4069e6a5aa1 Mon Sep 17 00:00:00 2001 From: Lukas Kuhn Date: Mon, 13 Nov 2023 20:25:06 +0100 Subject: [PATCH 3/6] fix: changed arg name --- examples/lora_dreambooth/train_dreambooth.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/lora_dreambooth/train_dreambooth.py b/examples/lora_dreambooth/train_dreambooth.py index be092225d8..0819661d13 100644 --- a/examples/lora_dreambooth/train_dreambooth.py +++ b/examples/lora_dreambooth/train_dreambooth.py @@ -219,10 +219,10 @@ def parse_args(input_args=None): ) parser.add_argument( - "--trace_memory_allocation", - default=True, + "--no_tracemalloc", + default=False, action="store_true", - help="Flag to track memory allocation during training. This could slow down training on Windows.", + help="Flag to stop memory allocation tracing during training. This could speed up training on Windows.", ) parser.add_argument( @@ -905,7 +905,7 @@ def main(args): unet.train() if args.train_text_encoder: text_encoder.train() - with TorchTracemalloc() if args.trace_memory_allocation else nullcontext() as tracemalloc: + with TorchTracemalloc() if not args.no_tracemalloc else nullcontext() as tracemalloc: for step, batch in enumerate(train_dataloader): # Skip steps until we reach the resumed step if args.resume_from_checkpoint and epoch == first_epoch and step < resume_step: @@ -1047,7 +1047,7 @@ def main(args): break # Printing the GPU memory usage details such as allocated memory, peak memory, and total memory usage - if args.trace_memory_allocation: + if not args.no_tracemalloc: accelerator.print("GPU Memory before entering the train : {}".format(b2mb(tracemalloc.begin))) accelerator.print("GPU Memory consumed at the end of the train (end-begin): {}".format(tracemalloc.used)) accelerator.print("GPU Peak Memory consumed during the train (max-begin): {}".format(tracemalloc.peaked)) From 439410030be66f40bd1c1adbc088efdf225d8d96 Mon Sep 17 00:00:00 2001 From: Lukas Kuhn Date: Tue, 14 Nov 2023 09:12:28 +0100 Subject: [PATCH 4/6] fix formatting --- examples/lora_dreambooth/train_dreambooth.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/examples/lora_dreambooth/train_dreambooth.py b/examples/lora_dreambooth/train_dreambooth.py index 0819661d13..ede2cb9448 100644 --- a/examples/lora_dreambooth/train_dreambooth.py +++ b/examples/lora_dreambooth/train_dreambooth.py @@ -1058,8 +1058,12 @@ def main(args): ) accelerator.print("CPU Memory before entering the train : {}".format(b2mb(tracemalloc.cpu_begin))) - accelerator.print("CPU Memory consumed at the end of the train (end-begin): {}".format(tracemalloc.cpu_used)) - accelerator.print("CPU Peak Memory consumed during the train (max-begin): {}".format(tracemalloc.cpu_peaked)) + accelerator.print( + "CPU Memory consumed at the end of the train (end-begin): {}".format(tracemalloc.cpu_used) + ) + accelerator.print( + "CPU Peak Memory consumed during the train (max-begin): {}".format(tracemalloc.cpu_peaked) + ) accelerator.print( "CPU Total Peak Memory consumed during the train (max): {}".format( tracemalloc.cpu_peaked + b2mb(tracemalloc.cpu_begin) From 3abfc90466b879939f9438b04c2c6e6be4895c41 Mon Sep 17 00:00:00 2001 From: Lukas Kuhn Date: Tue, 14 Nov 2023 11:28:13 +0100 Subject: [PATCH 5/6] fix: import order --- examples/lora_dreambooth/train_dreambooth.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/lora_dreambooth/train_dreambooth.py b/examples/lora_dreambooth/train_dreambooth.py index ede2cb9448..73d827a4c3 100644 --- a/examples/lora_dreambooth/train_dreambooth.py +++ b/examples/lora_dreambooth/train_dreambooth.py @@ -7,9 +7,9 @@ import os import threading import warnings +from contextlib import nullcontext from pathlib import Path from typing import Optional -from contextlib import nullcontext import datasets import diffusers From 00564f4f3d08b4b08113a42cb306ad1ee418a43c Mon Sep 17 00:00:00 2001 From: Lukas Kuhn Date: Tue, 21 Nov 2023 22:17:35 +0100 Subject: [PATCH 6/6] fix: dataset was loaded twice in ft script --- examples/fp4_finetuning/finetune_fp4_opt_bnb_peft.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/examples/fp4_finetuning/finetune_fp4_opt_bnb_peft.py b/examples/fp4_finetuning/finetune_fp4_opt_bnb_peft.py index f0fc29d8e1..018cc53b05 100755 --- a/examples/fp4_finetuning/finetune_fp4_opt_bnb_peft.py +++ b/examples/fp4_finetuning/finetune_fp4_opt_bnb_peft.py @@ -131,11 +131,6 @@ def print_trainable_parameters(model): """### Training""" - -data = load_dataset("Abirate/english_quotes") -data = data.map(lambda samples: tokenizer(samples["quote"]), batched=True) - - data = load_dataset("Abirate/english_quotes") data = data.map(lambda samples: tokenizer(samples["quote"]), batched=True)