[Numpy Refactor] BART (#1282)

* init * fix convert roberta * rename TransformerNMTModel as TransformerModel * update bart * fix * fix * update init * add layernorm_embedding for transformer * convert script * encoder * fix * fix vocab * fix roberta * fix * fix electra * add conversion bash for roberta and xlmr * ELECTRA SETUP * convert bart decoder * fix * update * testing output * remove arange_like for embeddings * fix * update * use_pooler for bart * fix * upload params for bart * add test_models_bart * fix cfg * test bart * update * fix transformer * Squashed commit of the following: commit 510d991 Author: ZheyuYe <[email protected]> Date: Thu Jul 30 02:33:22 2020 +0800 test commit 1b5fa7b Author: ZheyuYe <[email protected]> Date: Thu Jul 30 01:48:01 2020 +0800 fix comment1 commit 6533601 Author: ZheyuYe <[email protected]> Date: Thu Jul 30 01:27:44 2020 +0800 fix comment commit a8853f9 Author: ZheyuYe <[email protected]> Date: Thu Jul 30 01:10:06 2020 +0800 Squashed commit of the following: commit 232e0b6 Author: ZheyuYe <[email protected]> Date: Thu Jul 30 01:05:17 2020 +0800 update commit 995e5d7 Author: ZheyuYe <[email protected]> Date: Thu Jul 30 01:01:56 2020 +0800 fix commit 9623240 Author: ZheyuYe <[email protected]> Date: Thu Jul 30 00:52:17 2020 +0800 fix commit d9c4140 Author: ZheyuYe <[email protected]> Date: Wed Jul 29 23:07:10 2020 +0800 fix transformer commit e49fbe1 Author: ZheyuYe <[email protected]> Date: Wed Jul 29 22:18:12 2020 +0800 update commit 1f75b26 Author: ZheyuYe <[email protected]> Date: Wed Jul 29 22:04:08 2020 +0800 test bart commit 5bab516 Author: ZheyuYe <[email protected]> Date: Wed Jul 29 21:34:47 2020 +0800 fix cfg commit 6c62a29 Merge: 3366cf3 033214e Author: ZheyuYe <[email protected]> Date: Wed Jul 29 21:33:10 2020 +0800 Merge remote-tracking branch 'upstream/numpy' into bart commit 033214e Author: Xingjian Shi <[email protected]> Date: Wed Jul 29 00:36:57 2020 -0700 [Numpy] Fix SQuAD + Fix GLUE downloading (#1280) * Update run_squad.py * Update run_squad.py * Update prepare_glue.py commit 3c87457 Author: Xingjian Shi <[email protected]> Date: Tue Jul 28 18:03:21 2020 -0700 Add layout + compute_layout support: TransformerNMT, BERT, ALBERT, ELECTRA, MobileBERT, RoBERTA, XLMR (#1258) * Add layout support * fix test * Update transformer.py * Update transformer.py * Update README.md * try to add set_layout * update test case * fix * update * update * update * Update bert.py * fix bug * update * Update test_models_bert.py * Update tokenizers.py * add compute layout * Update xlmr.py * Update test_models_bert.py * revise test cases * Update layers.py * move jieba to try import * fix * Update transformer.py * fix * Update bert.py * Update setup.py * Update test_models_bert.py * Update test_models_bert.py * fix * update * Revise * Update electra.py * Update electra.py * Update test_models_electra.py * fix * fix bug * Update test_models_albert.py * add more testcases * fix * Update albert.py * Update albert.py * fix bug * fix testcase * Update test_models_electra.py * Update bert.py * update * Update test_models_electra.py * Update mobilebert.py * Update mobilebert.py * update mobilebert * Update test_models_mobilebert.py * Update mobilebert.py * fix bug * Update roberta.py * fix roberta * update * update * fix import * fix bug * update * reduce test workloads * address comment * address comment commit 4d43f82 Author: Sheng Zha <[email protected]> Date: Mon Jul 27 20:21:00 2020 -0700 add subversion/wget to docker, add readme (#1279) commit d76897b Author: phile <[email protected]> Date: Tue Jul 28 10:10:13 2020 +0800 Add embedding related methods in numpy version (#1263) * A draft for embedding * fix embed_loader * add hyperbolic space and some updates * revise evaluation * fix * simple fixes * move l2norm to op.py * new features * fix * update * add tests, update * newline * Squashed commit of the following: commit 9e1ffde Author: ZheyuYe <[email protected]> Date: Thu Jul 30 11:42:01 2020 +0800 todo commit 9a7c343 Author: ZheyuYe <[email protected]> Date: Thu Jul 30 10:53:15 2020 +0800 revert gelu commit 0425346 Author: ZheyuYe <[email protected]> Date: Thu Jul 30 10:49:52 2020 +0800 re-upload bart commit 516ae84 Author: ZheyuYe <[email protected]> Date: Thu Jul 30 03:32:35 2020 +0800 use_qkv_bias for transformer commit 9d60cda Author: ZheyuYe <[email protected]> Date: Thu Jul 30 03:17:28 2020 +0800 classifier_activation commit 510d991 Author: ZheyuYe <[email protected]> Date: Thu Jul 30 02:33:22 2020 +0800 test commit 1b5fa7b Author: ZheyuYe <[email protected]> Date: Thu Jul 30 01:48:01 2020 +0800 fix comment1 commit 6533601 Author: ZheyuYe <[email protected]> Date: Thu Jul 30 01:27:44 2020 +0800 fix comment commit a8853f9 Author: ZheyuYe <[email protected]> Date: Thu Jul 30 01:10:06 2020 +0800 Squashed commit of the following: commit 232e0b6 Author: ZheyuYe <[email protected]> Date: Thu Jul 30 01:05:17 2020 +0800 update commit 995e5d7 Author: ZheyuYe <[email protected]> Date: Thu Jul 30 01:01:56 2020 +0800 fix commit 9623240 Author: ZheyuYe <[email protected]> Date: Thu Jul 30 00:52:17 2020 +0800 fix commit d9c4140 Author: ZheyuYe <[email protected]> Date: Wed Jul 29 23:07:10 2020 +0800 fix transformer commit e49fbe1 Author: ZheyuYe <[email protected]> Date: Wed Jul 29 22:18:12 2020 +0800 update commit 1f75b26 Author: ZheyuYe <[email protected]> Date: Wed Jul 29 22:04:08 2020 +0800 test bart commit 5bab516 Author: ZheyuYe <[email protected]> Date: Wed Jul 29 21:34:47 2020 +0800 fix cfg commit 6c62a29 Merge: 3366cf3 033214e Author: ZheyuYe <[email protected]> Date: Wed Jul 29 21:33:10 2020 +0800 Merge remote-tracking branch 'upstream/numpy' into bart commit 033214e Author: Xingjian Shi <[email protected]> Date: Wed Jul 29 00:36:57 2020 -0700 [Numpy] Fix SQuAD + Fix GLUE downloading (#1280) * Update run_squad.py * Update run_squad.py * Update prepare_glue.py commit 3c87457 Author: Xingjian Shi <[email protected]> Date: Tue Jul 28 18:03:21 2020 -0700 Add layout + compute_layout support: TransformerNMT, BERT, ALBERT, ELECTRA, MobileBERT, RoBERTA, XLMR (#1258) * Add layout support * fix test * Update transformer.py * Update transformer.py * Update README.md * try to add set_layout * update test case * fix * update * update * update * Update bert.py * fix bug * update * Update test_models_bert.py * Update tokenizers.py * add compute layout * Update xlmr.py * Update test_models_bert.py * revise test cases * Update layers.py * move jieba to try import * fix * Update transformer.py * fix * Update bert.py * Update setup.py * Update test_models_bert.py * Update test_models_bert.py * fix * update * Revise * Update electra.py * Update electra.py * Update test_models_electra.py * fix * fix bug * Update test_models_albert.py * add more testcases * fix * Update albert.py * Update albert.py * fix bug * fix testcase * Update test_models_electra.py * Update bert.py * update * Update test_models_electra.py * Update mobilebert.py * Update mobilebert.py * update mobilebert * Update test_models_mobilebert.py * Update mobilebert.py * fix bug * Update roberta.py * fix roberta * update * update * fix import * fix bug * update * reduce test workloads * address comment * address comment commit 4d43f82 Author: Sheng Zha <[email protected]> Date: Mon Jul 27 20:21:00 2020 -0700 add subversion/wget to docker, add readme (#1279) commit d76897b Author: phile <[email protected]> Date: Tue Jul 28 10:10:13 2020 +0800 Add embedding related methods in numpy version (#1263) * A draft for embedding * fix embed_loader * add hyperbolic space and some updates * revise evaluation * fix * simple fixes * move l2norm to op.py * new features * fix * update * add tests, update * newline * fix comment * use xavier for embedding initializer
dmlc · liuzh47 · Mar 18, 2020 · Jun 10, 2020 · Jun 11, 2020 · Jun 11, 2020
commit 2294421b990ce92fedfb5876aa3ee4dd119d83b4
@@ -12,6 +12,8 @@ The testing step mentioned above are controlled by the flag `--test`, in which t
 tolerance of 1e-3 between gluon model with converted weights and original tensorflow model.
 In addition, we can use GPU in all converting scripts by adding `--gpu 0`.
 
+For RoBERTa XLM-R and BART model, please instal the [fairseq](https://github.com/pytorch/fairseq#requirements-and-installation) package locally as `pip install git+https://github.com/pytorch/fairseq.git@master`.
+
 ## BERT
 Convert model from [BERT LIST](https://tfhub.dev/google/collections/bert/1).
 
@@ -37,25 +39,42 @@ do
 done
 ```
 
-## RoBERTa
+## ELECTRA
+The TF Hub is not available for ELECTRA model currently.
+Thus, you will need to clone the [electra repository](https://github.com/ZheyuYe/electra)
+and download the checkpoint. The parameters are converted from local checkpoints.
+By running the following command, you can convert + verify the ELECTRA model with both the discriminator and the generator.
+
+Notice: pleas set up the `--electra_path` with the cloned path ~~or get this electra repository packaged by `pip install -e .`.~~
+
+```bash
+# Need to use TF 1.13.2 to use contrib layer
+pip uninstall tensorflow
+pip install tensorflow==1.13.2
+
+# Actual conversion
+bash convert_electra.sh
+```
 
+## Mobile Bert
 ```bash
-pip install fairseq==0.9.0
+bash convert_mobilebert.sh
+```
 
+## RoBERTa
+```bash
 for model in base large
 do
     mkdir roberta_${model}
     wget "https://dl.fbaipublicfiles.com/fairseq/models/roberta.${model}.tar.gz"
     tar zxf roberta.${model}.tar.gz --directory roberta_${model}
-    python convert_fairseq_roberta.py --fairseq_model_path roberta_${model}/roberta.${model} --model_size ${model} --test
+    python convert_fairseq_roberta.py --fairseq_model_path roberta_${model}/roberta.${model} --test
 done
 ```
 
 ## XLM-R
 
 ```bash
-pip install fairseq==0.9.0
-
 for model in base large
 do
     mkdir xlmr_${model}
@@ -65,23 +84,13 @@ do
 done
 ```
 
-## ELECTRA
-The TF Hub is not available for ELECTRA model currently.
-Thus, you will need to clone the [electra repository](https://github.com/ZheyuYe/electra)
-and download the checkpoint. The parameters are converted from local checkpoints.
-By running the following command, you can convert + verify the ELECTRA model with both the discriminator and the generator.
-
-Notice: pleas set up the `--electra_path` with the cloned path or get this electra repository packaged by `pip install -e .`.
-
+## BART
 ```bash
-# Need to use TF 1.13.2 to use contrib layer
-pip install tensorflow==1.13.2 --upgrade --force-reinstall
-
-# Actual conversion
-bash convert_electra.sh
-```
-
-## Mobile Bert
-```bash
-bash convert_mobilebert.sh
+for model in base large
+do  
+    mkdir bart_${model}
+    wget  "https://dl.fbaipublicfiles.com/fairseq/models/bart.${model}.tar.gz"
+    tar zxf bart.${model}.tar.gz --directory bart_${model}
+    python convert_fairseq_bart.py --fairseq_model_path bart_${model}/bart.${model} --test
+done
 ```
@@ -0,0 +1,7 @@
+for model in base large
+do
+    mkdir bart_${model}
+    wget  "https://dl.fbaipublicfiles.com/fairseq/models/bart.${model}.tar.gz"
+    tar zxf bart.${model}.tar.gz --directory bart_${model}
+    python convert_fairseq_bart.py --fairseq_model_path bart_${model}/bart.${model} --test
+done
@@ -53,7 +53,9 @@ def read_tf_checkpoint(path):
     return tensors
 
 
-def get_dict_config(model_size, electra_dir):
+def get_dict_config(model_size, electra_path):
+    sys.path.append(electra_path)
+    electra_dir = os.path.abspath(os.path.join(os.path.dirname(electra_path), os.path.pardir))
     sys.path.append(electra_dir)
     from electra.util.training_utils import get_bert_config
     from electra.configure_pretraining import PretrainingConfig
@@ -100,7 +102,7 @@ def convert_tf_config(config_dict, vocab_size):
     return cfg
 
 
-def convert_tf_assets(tf_assets_dir, model_size, electra_dir):
+def convert_tf_assets(tf_assets_dir, model_size, electra_path):
     """Convert the assets file including config, vocab and tokenizer model"""
     file_names = os.listdir(tf_assets_dir)
     vocab_path = None
@@ -113,7 +115,7 @@ def convert_tf_assets(tf_assets_dir, model_size, electra_dir):
     if vocab_path:
         vocab_path = os.path.join(tf_assets_dir, vocab_path)
         vocab_size = len(open(vocab_path, 'rU').readlines())
-    config_dict = get_dict_config(model_size, electra_dir)
+    config_dict = get_dict_config(model_size, electra_path)
     cfg = convert_tf_config(config_dict, vocab_size)
     return cfg, vocab_path
 
@@ -190,12 +192,12 @@ def get_name_map(tf_names, convert_type='backbone'):
     return name_map
 
 
-def convert_tf_model(model_dir, save_dir, test_conversion, model_size, gpu, electra_dir):
+def convert_tf_model(model_dir, save_dir, test_conversion, model_size, gpu, electra_path):
     ctx = mx.gpu(gpu) if gpu is not None else mx.cpu()
     if not os.path.exists(save_dir):
         os.makedirs(save_dir)
 
-    cfg, vocab_path = convert_tf_assets(model_dir, model_size, electra_dir)
+    cfg, vocab_path = convert_tf_assets(model_dir, model_size, electra_path)
     with open(os.path.join(save_dir, 'model.yml'), 'w') as of:
         of.write(cfg.dump())
     new_vocab = HuggingFaceWordPieceTokenizer(
@@ -234,6 +236,8 @@ def convert_tf_model(model_dir, save_dir, test_conversion, model_size, gpu, elec
     tf_names = list(tf_names)
 
     # reload the electra module for this local scope
+    sys.path.append(electra_path)
+    electra_dir = os.path.abspath(os.path.join(os.path.dirname(electra_path), os.path.pardir))
     sys.path.append(electra_dir)
     from electra.util.training_utils import get_bert_config
     from electra.configure_pretraining import PretrainingConfig
@@ -426,11 +430,10 @@ def convert_qkv_weights(tf_prefix, mx_prefix):
     logging_config()
     save_dir = args.save_dir if args.save_dir is not None else os.path.basename(
         args.tf_model_path) + '_gluon'
-    electra_dir = os.path.abspath(os.path.join(os.path.dirname(args.electra_path), os.path.pardir))
     convert_tf_model(
         args.tf_model_path,
         save_dir,
         args.test,
         args.model_size,
         args.gpu,
-        electra_dir)
+        args.electra_path)