From c5e71d3a3856db12c7bc459080528702fba9a494 Mon Sep 17 00:00:00 2001 From: Wesley Date: Thu, 31 Mar 2022 17:30:46 +0800 Subject: [PATCH 1/5] test pp + tp1d --- bert/colossalai_utils/model_zoo/colo_tp1dcol_bert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bert/colossalai_utils/model_zoo/colo_tp1dcol_bert.py b/bert/colossalai_utils/model_zoo/colo_tp1dcol_bert.py index 8875ed5..6e869d6 100644 --- a/bert/colossalai_utils/model_zoo/colo_tp1dcol_bert.py +++ b/bert/colossalai_utils/model_zoo/colo_tp1dcol_bert.py @@ -195,8 +195,8 @@ class BertOutput(nn.Module): def __init__(self, config): super().__init__() self.dense = col_nn.Linear(config.intermediate_size, config.hidden_size) - self.LayerNorm = col_nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps) self.dropout = col_nn.Dropout(config.hidden_dropout_prob) + self.LayerNorm = col_nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps) def forward(self, hidden_states, input_tensor): hidden_states = self.dense(hidden_states) From 582084791a1bebe350db27293bcc6bf224beeb7f Mon Sep 17 00:00:00 2001 From: Wesley Date: Thu, 31 Mar 2022 17:51:47 +0800 Subject: [PATCH 2/5] add tp1dpp --- bert/colossalai_utils/bert_config_tp1dpp.json | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 bert/colossalai_utils/bert_config_tp1dpp.json diff --git a/bert/colossalai_utils/bert_config_tp1dpp.json b/bert/colossalai_utils/bert_config_tp1dpp.json new file mode 100644 index 0000000..fcc687c --- /dev/null +++ b/bert/colossalai_utils/bert_config_tp1dpp.json @@ -0,0 +1,20 @@ +{ + "method": "colossalai", + "model": { + "type": "bert_small" + }, + "hyperparameter": { + "batch_size": 8, + "num_epochs": 20, + "steps_per_epoch": 10 + }, + "gradient_clipping": 1.0, + "parallel": { + "pipeline": 2, + "tensor": { + "mode": "1d", + "size": 2 + } + }, + "use_mem_monitor": true +} From 0031bfb553e50ca171eced9c58d9da22b2ede47b Mon Sep 17 00:00:00 2001 From: Wesley Date: Fri, 1 Apr 2022 17:34:04 +0800 Subject: [PATCH 3/5] update vocab size to pass check in divide() --- bert/common/helper.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/bert/common/helper.py b/bert/common/helper.py index 8a783aa..0ed946d 100644 --- a/bert/common/helper.py +++ b/bert/common/helper.py @@ -9,7 +9,7 @@ _bert_small = dict( seq_length=512, - vocab_size=32400, + vocab_size=50304, hidden_size=768, num_heads=12, depth=12, @@ -77,8 +77,6 @@ def tokenize(examples, mode='concat'): keep_in_memory=True, remove_columns='text') - CONFIG['model']['vocab_size'] = len(tokenizer) - def seed_worker(_): worker_seed = 1024 np.random.seed(worker_seed) From 21ab7bcfe579487bc30353be5d65bc9a58fdf0e8 Mon Sep 17 00:00:00 2001 From: Wesley Date: Sat, 2 Apr 2022 16:26:48 +0800 Subject: [PATCH 4/5] refine code --- bert/colossalai_utils/bert_config_pp.json | 2 +- bert/colossalai_utils/bert_config_tp1d.json | 7 ++-- bert/colossalai_utils/bert_config_tp1dpp.json | 2 +- bert/colossalai_utils/bert_config_tp2d.json | 2 +- bert/colossalai_utils/bert_config_tp2p5d.json | 2 +- bert/colossalai_utils/bert_config_tp3d.json | 2 +- ...nfig_zerov2.json => bert_config_zero.json} | 2 +- .../bert_config_zerotppp.json | 35 +++++++++++++++++++ bert/colossalai_utils/model_zoo/__init__.py | 2 +- .../{colo_tp1dcol_bert.py => colo_bert.py} | 5 +-- bert/colossalai_utils/utils.py | 6 ++-- bert/common/helper.py | 20 ++++++++--- bert/torch_utils/bert_config.json | 20 ----------- 13 files changed, 66 insertions(+), 41 deletions(-) rename bert/colossalai_utils/{bert_config_zerov2.json => bert_config_zero.json} (95%) create mode 100644 bert/colossalai_utils/bert_config_zerotppp.json rename bert/colossalai_utils/model_zoo/{colo_tp1dcol_bert.py => colo_bert.py} (97%) delete mode 100644 bert/torch_utils/bert_config.json diff --git a/bert/colossalai_utils/bert_config_pp.json b/bert/colossalai_utils/bert_config_pp.json index e8e4734..e6eedec 100644 --- a/bert/colossalai_utils/bert_config_pp.json +++ b/bert/colossalai_utils/bert_config_pp.json @@ -1,7 +1,7 @@ { "method": "colossalai", "model": { - "type": "bert_small" + "type": "bert_base" }, "hyperparameter": { "batch_size": 8, diff --git a/bert/colossalai_utils/bert_config_tp1d.json b/bert/colossalai_utils/bert_config_tp1d.json index db1acd6..f78347c 100644 --- a/bert/colossalai_utils/bert_config_tp1d.json +++ b/bert/colossalai_utils/bert_config_tp1d.json @@ -1,19 +1,22 @@ { "method": "colossalai", "model": { - "type": "bert_small" + "type": "bert_base" }, "hyperparameter": { "batch_size": 8, "num_epochs": 20, "steps_per_epoch": 10 }, + "fp16": { + "mode": "AMP_TYPE.NAIVE" + }, "gradient_clipping": 1.0, "parallel": { "pipeline": 1, "tensor": { "mode": "1d", - "size": 2 + "size": 1 } }, "use_mem_monitor": true diff --git a/bert/colossalai_utils/bert_config_tp1dpp.json b/bert/colossalai_utils/bert_config_tp1dpp.json index fcc687c..11bbdb8 100644 --- a/bert/colossalai_utils/bert_config_tp1dpp.json +++ b/bert/colossalai_utils/bert_config_tp1dpp.json @@ -1,7 +1,7 @@ { "method": "colossalai", "model": { - "type": "bert_small" + "type": "bert_base" }, "hyperparameter": { "batch_size": 8, diff --git a/bert/colossalai_utils/bert_config_tp2d.json b/bert/colossalai_utils/bert_config_tp2d.json index b3cebbf..260dd20 100644 --- a/bert/colossalai_utils/bert_config_tp2d.json +++ b/bert/colossalai_utils/bert_config_tp2d.json @@ -1,7 +1,7 @@ { "method": "colossalai", "model": { - "type": "bert_small" + "type": "bert_base" }, "hyperparameter": { "batch_size": 8, diff --git a/bert/colossalai_utils/bert_config_tp2p5d.json b/bert/colossalai_utils/bert_config_tp2p5d.json index e1bf528..3cfebea 100644 --- a/bert/colossalai_utils/bert_config_tp2p5d.json +++ b/bert/colossalai_utils/bert_config_tp2p5d.json @@ -1,7 +1,7 @@ { "method": "colossalai", "model": { - "type": "bert_small" + "type": "bert_base" }, "hyperparameter": { "batch_size": 8, diff --git a/bert/colossalai_utils/bert_config_tp3d.json b/bert/colossalai_utils/bert_config_tp3d.json index daca835..f877f81 100644 --- a/bert/colossalai_utils/bert_config_tp3d.json +++ b/bert/colossalai_utils/bert_config_tp3d.json @@ -1,7 +1,7 @@ { "method": "colossalai", "model": { - "type": "bert_small" + "type": "bert_base" }, "hyperparameter": { "batch_size": 8, diff --git a/bert/colossalai_utils/bert_config_zerov2.json b/bert/colossalai_utils/bert_config_zero.json similarity index 95% rename from bert/colossalai_utils/bert_config_zerov2.json rename to bert/colossalai_utils/bert_config_zero.json index 286f9df..b6626bf 100644 --- a/bert/colossalai_utils/bert_config_zerov2.json +++ b/bert/colossalai_utils/bert_config_zero.json @@ -1,7 +1,7 @@ { "method": "colossalai", "model": { - "type": "bert_small" + "type": "bert_base" }, "hyperparameter": { "batch_size": 8, diff --git a/bert/colossalai_utils/bert_config_zerotppp.json b/bert/colossalai_utils/bert_config_zerotppp.json new file mode 100644 index 0000000..46b1cdd --- /dev/null +++ b/bert/colossalai_utils/bert_config_zerotppp.json @@ -0,0 +1,35 @@ +{ + "method": "colossalai", + "model": { + "type": "bert_base" + }, + "hyperparameter": { + "batch_size": 8, + "num_epochs": 100, + "steps_per_epoch": 10 + }, + "gradient_clipping": 1.0, + "zero": { + "model_config": { + "offload_config": { + "device": "cpu" + } + }, + "optimizer_config": { + "cpu_offload": true, + "initial_scale": 256, + "min_scale": 1, + "growth_factor": 2.0, + "backoff_factor": 0.5, + "growth_interval": 1000 + } + }, + "parallel": { + "pipeline":1, + "tensor": { + "mode": "1d", + "size": 2 + } + }, + "use_mem_monitor": true +} diff --git a/bert/colossalai_utils/model_zoo/__init__.py b/bert/colossalai_utils/model_zoo/__init__.py index 2799632..b1df787 100644 --- a/bert/colossalai_utils/model_zoo/__init__.py +++ b/bert/colossalai_utils/model_zoo/__init__.py @@ -1,3 +1,3 @@ -from .colo_tp1dcol_bert import create_colo_bert_pipeline_model, ColoBertForMaskedLM, ColoBertMaskedLMLoss +from .colo_bert import create_colo_bert_pipeline_model, ColoBertForMaskedLM, ColoBertMaskedLMLoss __all__ = ['create_colo_bert_pipeline_model', 'ColoBertForMaskedLM', 'ColoBertMaskedLMLoss'] diff --git a/bert/colossalai_utils/model_zoo/colo_tp1dcol_bert.py b/bert/colossalai_utils/model_zoo/colo_bert.py similarity index 97% rename from bert/colossalai_utils/model_zoo/colo_tp1dcol_bert.py rename to bert/colossalai_utils/model_zoo/colo_bert.py index 6e869d6..3a48031 100644 --- a/bert/colossalai_utils/model_zoo/colo_tp1dcol_bert.py +++ b/bert/colossalai_utils/model_zoo/colo_bert.py @@ -682,7 +682,7 @@ def __init__(self, config): # The output weights are the same as the input embeddings, but there is # an output-only bias for each token. - self.decoder = nn.Linear(config.hidden_size, config.vocab_size, bias=True) + self.decoder = col_nn.Classifier(config.hidden_size, config.vocab_size, bias=True) def forward(self, hidden_states): ###print("BertLMPredictionHead:input:", hidden_states.shape) @@ -712,9 +712,6 @@ def __init__(self, config): self.bert = BertModel(config, add_pooling_layer=False) self.cls = BertOnlyMLMHead(config) - # Initialize weights and apply final processing - self.post_init() - def get_output_embeddings(self): return self.cls.predictions.decoder diff --git a/bert/colossalai_utils/utils.py b/bert/colossalai_utils/utils.py index 669819e..4084052 100644 --- a/bert/colossalai_utils/utils.py +++ b/bert/colossalai_utils/utils.py @@ -8,10 +8,8 @@ def init_w_col(builder): from colossalai.core import global_context as gpc from colossalai.nn.optimizer import CPUAdam from colossalai.zero.init_ctx import ZeroInitContext - from colossalai.zero.shard_utils import (BucketTensorShardStrategy, - TensorShardStrategy) - from colossalai.zero.sharded_model import ShardedModelV2 - from colossalai.zero.sharded_optim import ShardedOptimizerV2 + from colossalai.zero.shard_utils import (BucketTensorShardStrategy) + from colossalai.amp import AMP_TYPE colossalai.launch_from_torch(config=CONFIG) diff --git a/bert/common/helper.py b/bert/common/helper.py index 0ed946d..e726832 100644 --- a/bert/common/helper.py +++ b/bert/common/helper.py @@ -5,9 +5,9 @@ from transformers import BertConfig, BertTokenizer from zero.common.utils import CONFIG, ModelFromHF, get_model_size -from bert.colossalai_utils.model_zoo.colo_tp1dcol_bert import ColoBertMaskedLMLoss, ColoBertForMaskedLM, create_colo_bert_pipeline_model +from bert.colossalai_utils.model_zoo.colo_bert import ColoBertMaskedLMLoss, ColoBertForMaskedLM, create_colo_bert_pipeline_model -_bert_small = dict( +_bert_base = dict( seq_length=512, vocab_size=50304, hidden_size=768, @@ -18,9 +18,21 @@ evaluation='ppl', ) +_bert_large = dict( + seq_length=512, + vocab_size=50304, + hidden_size=1024, + num_heads=16, + depth=24, + ff_size=3072, + checkpoint=False, + evaluation='ppl', +) + _bert_configurations = dict( - bert=_bert_small, - bert_small=_bert_small, + bert=_bert_base, + bert_base=_bert_base, + bert_large=_bert_large ) _default_hyperparameters = dict( diff --git a/bert/torch_utils/bert_config.json b/bert/torch_utils/bert_config.json deleted file mode 100644 index dc0640a..0000000 --- a/bert/torch_utils/bert_config.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "method": "torch", - "model": { - "type": "bert_small" - }, - "hyperparameter": { - "batch_size": 8, - "num_epochs": 300, - "steps_per_epoch": 10 - }, - "fp16": { - "enabled": true, - "init_scale": 32768, - "growth_factor": 2.0, - "backoff_factor": 0.5, - "growth_interval": 1000 - }, - "gradient_clipping": 1.0, - "use_mem_monitor": true -} From 5ce8f1b09f8de68387a67d9e788797be76134c99 Mon Sep 17 00:00:00 2001 From: Wesley Date: Sat, 2 Apr 2022 18:07:59 +0800 Subject: [PATCH 5/5] Add README --- bert/README.md | 22 +++++++++++++++++++++ bert/colossalai_utils/bert_config_tp1d.json | 7 ++----- bert/colossalai_utils/requirement.txt | 7 +++++++ bert/colossalai_utils/utils.py | 4 +++- 4 files changed, 34 insertions(+), 6 deletions(-) create mode 100644 bert/README.md create mode 100644 bert/colossalai_utils/requirement.txt diff --git a/bert/README.md b/bert/README.md new file mode 100644 index 0000000..4924ad9 --- /dev/null +++ b/bert/README.md @@ -0,0 +1,22 @@ +# Bert Benchmark +Bert Benchmark with data parallel, tensor parallel(tp), pipeline parallel(pp) and ZeRO. + +## Setup +1. Install dependencies if you do not have them +``` +pip install -r requirement.txt +``` + +2. Add root dir into PYTHONPATH +``` +export PYTHONPATH=$(dirname "$PWD"):$PYTHONPATH +``` + +## Bert Usage + +1. Prepare datasets and tokenizers from HuggingFace Hub if necessary (e.g. we provide an example of training `wikitext-2`). + +2. Run benchmark with one of the systems to evaluate +``` +DATA=/PATH/TO/DATASET TOKENIZER=/PATH/TO/TOKENIZER LOG=/PATH/TO/LOG torchrun --nproc_per_node=NUM_GPUS run.py --config=CONFIG_FILE +``` \ No newline at end of file diff --git a/bert/colossalai_utils/bert_config_tp1d.json b/bert/colossalai_utils/bert_config_tp1d.json index f78347c..a835a03 100644 --- a/bert/colossalai_utils/bert_config_tp1d.json +++ b/bert/colossalai_utils/bert_config_tp1d.json @@ -5,18 +5,15 @@ }, "hyperparameter": { "batch_size": 8, - "num_epochs": 20, + "num_epochs": 10, "steps_per_epoch": 10 }, - "fp16": { - "mode": "AMP_TYPE.NAIVE" - }, "gradient_clipping": 1.0, "parallel": { "pipeline": 1, "tensor": { "mode": "1d", - "size": 1 + "size": 2 } }, "use_mem_monitor": true diff --git a/bert/colossalai_utils/requirement.txt b/bert/colossalai_utils/requirement.txt new file mode 100644 index 0000000..24fcf65 --- /dev/null +++ b/bert/colossalai_utils/requirement.txt @@ -0,0 +1,7 @@ + +torch>=1.10 -f https://download.pytorch.org/whl/cu113/torch_stable.html +torchvision -f https://download.pytorch.org/whl/cu113/torch_stable.html +transformers +datasets +colossalai +rich \ No newline at end of file diff --git a/bert/colossalai_utils/utils.py b/bert/colossalai_utils/utils.py index 4084052..abad460 100644 --- a/bert/colossalai_utils/utils.py +++ b/bert/colossalai_utils/utils.py @@ -9,7 +9,9 @@ def init_w_col(builder): from colossalai.nn.optimizer import CPUAdam from colossalai.zero.init_ctx import ZeroInitContext from colossalai.zero.shard_utils import (BucketTensorShardStrategy) - from colossalai.amp import AMP_TYPE + + from colossalai.utils.memory_utils.utils import colo_set_process_memory_fraction + colo_set_process_memory_fraction(0.2) colossalai.launch_from_torch(config=CONFIG)