Skip to content
This repository has been archived by the owner on Oct 16, 2023. It is now read-only.

refine code and add README to Bert #41

Merged
merged 6 commits into from
Apr 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions bert/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Bert Benchmark
Bert Benchmark with data parallel, tensor parallel(tp), pipeline parallel(pp) and ZeRO.

## Setup
1. Install dependencies if you do not have them
```
pip install -r requirement.txt
```

2. Add root dir into PYTHONPATH
```
export PYTHONPATH=$(dirname "$PWD"):$PYTHONPATH
```

## Bert Usage

1. Prepare datasets and tokenizers from HuggingFace Hub if necessary (e.g. we provide an example of training `wikitext-2`).

2. Run benchmark with one of the systems to evaluate
```
DATA=/PATH/TO/DATASET TOKENIZER=/PATH/TO/TOKENIZER LOG=/PATH/TO/LOG torchrun --nproc_per_node=NUM_GPUS run.py --config=CONFIG_FILE
```
2 changes: 1 addition & 1 deletion bert/colossalai_utils/bert_config_pp.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"method": "colossalai",
"model": {
"type": "bert_small"
"type": "bert_base"
},
"hyperparameter": {
"batch_size": 8,
Expand Down
4 changes: 2 additions & 2 deletions bert/colossalai_utils/bert_config_tp1d.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
{
"method": "colossalai",
"model": {
"type": "bert_small"
"type": "bert_base"
},
"hyperparameter": {
"batch_size": 8,
"num_epochs": 20,
"num_epochs": 10,
"steps_per_epoch": 10
},
"gradient_clipping": 1.0,
Expand Down
2 changes: 1 addition & 1 deletion bert/colossalai_utils/bert_config_tp1dpp.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"method": "colossalai",
"model": {
"type": "bert_small"
"type": "bert_base"
},
"hyperparameter": {
"batch_size": 8,
Expand Down
2 changes: 1 addition & 1 deletion bert/colossalai_utils/bert_config_tp2d.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"method": "colossalai",
"model": {
"type": "bert_small"
"type": "bert_base"
},
"hyperparameter": {
"batch_size": 8,
Expand Down
2 changes: 1 addition & 1 deletion bert/colossalai_utils/bert_config_tp2p5d.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"method": "colossalai",
"model": {
"type": "bert_small"
"type": "bert_base"
},
"hyperparameter": {
"batch_size": 8,
Expand Down
2 changes: 1 addition & 1 deletion bert/colossalai_utils/bert_config_tp3d.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"method": "colossalai",
"model": {
"type": "bert_small"
"type": "bert_base"
},
"hyperparameter": {
"batch_size": 8,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"method": "colossalai",
"model": {
"type": "bert_small"
"type": "bert_base"
},
"hyperparameter": {
"batch_size": 8,
Expand Down
35 changes: 35 additions & 0 deletions bert/colossalai_utils/bert_config_zerotppp.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{
"method": "colossalai",
"model": {
"type": "bert_base"
},
"hyperparameter": {
"batch_size": 8,
"num_epochs": 100,
"steps_per_epoch": 10
},
"gradient_clipping": 1.0,
"zero": {
"model_config": {
"offload_config": {
"device": "cpu"
}
},
"optimizer_config": {
"cpu_offload": true,
"initial_scale": 256,
"min_scale": 1,
"growth_factor": 2.0,
"backoff_factor": 0.5,
"growth_interval": 1000
}
},
"parallel": {
"pipeline":1,
"tensor": {
"mode": "1d",
"size": 2
}
},
"use_mem_monitor": true
}
2 changes: 1 addition & 1 deletion bert/colossalai_utils/model_zoo/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .colo_tp1dcol_bert import create_colo_bert_pipeline_model, ColoBertForMaskedLM, ColoBertMaskedLMLoss
from .colo_bert import create_colo_bert_pipeline_model, ColoBertForMaskedLM, ColoBertMaskedLMLoss

__all__ = ['create_colo_bert_pipeline_model', 'ColoBertForMaskedLM', 'ColoBertMaskedLMLoss']
Original file line number Diff line number Diff line change
Expand Up @@ -682,7 +682,7 @@ def __init__(self, config):

# The output weights are the same as the input embeddings, but there is
# an output-only bias for each token.
self.decoder = nn.Linear(config.hidden_size, config.vocab_size, bias=True)
self.decoder = col_nn.Classifier(config.hidden_size, config.vocab_size, bias=True)

def forward(self, hidden_states):
###print("BertLMPredictionHead:input:", hidden_states.shape)
Expand Down Expand Up @@ -712,9 +712,6 @@ def __init__(self, config):
self.bert = BertModel(config, add_pooling_layer=False)
self.cls = BertOnlyMLMHead(config)

# Initialize weights and apply final processing
self.post_init()

def get_output_embeddings(self):
return self.cls.predictions.decoder

Expand Down
7 changes: 7 additions & 0 deletions bert/colossalai_utils/requirement.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@

torch>=1.10 -f https://download.pytorch.org/whl/cu113/torch_stable.html
torchvision -f https://download.pytorch.org/whl/cu113/torch_stable.html
transformers
datasets
colossalai
rich
8 changes: 4 additions & 4 deletions bert/colossalai_utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ def init_w_col(builder):
from colossalai.core import global_context as gpc
from colossalai.nn.optimizer import CPUAdam
from colossalai.zero.init_ctx import ZeroInitContext
from colossalai.zero.shard_utils import (BucketTensorShardStrategy,
TensorShardStrategy)
from colossalai.zero.sharded_model import ShardedModelV2
from colossalai.zero.sharded_optim import ShardedOptimizerV2
from colossalai.zero.shard_utils import (BucketTensorShardStrategy)

from colossalai.utils.memory_utils.utils import colo_set_process_memory_fraction
colo_set_process_memory_fraction(0.2)

colossalai.launch_from_torch(config=CONFIG)

Expand Down
20 changes: 16 additions & 4 deletions bert/common/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
from transformers import BertConfig, BertTokenizer

from zero.common.utils import CONFIG, ModelFromHF, get_model_size
from bert.colossalai_utils.model_zoo.colo_tp1dcol_bert import ColoBertMaskedLMLoss, ColoBertForMaskedLM, create_colo_bert_pipeline_model
from bert.colossalai_utils.model_zoo.colo_bert import ColoBertMaskedLMLoss, ColoBertForMaskedLM, create_colo_bert_pipeline_model

_bert_small = dict(
_bert_base = dict(
seq_length=512,
vocab_size=50304,
hidden_size=768,
Expand All @@ -18,9 +18,21 @@
evaluation='ppl',
)

_bert_large = dict(
seq_length=512,
vocab_size=50304,
hidden_size=1024,
num_heads=16,
depth=24,
ff_size=3072,
checkpoint=False,
evaluation='ppl',
)

_bert_configurations = dict(
bert=_bert_small,
bert_small=_bert_small,
bert=_bert_base,
bert_base=_bert_base,
bert_large=_bert_large
)

_default_hyperparameters = dict(
Expand Down
20 changes: 0 additions & 20 deletions bert/torch_utils/bert_config.json

This file was deleted.