Merge branch 'dev'

# Conflicts: # setup.py
fastnlp · Dec 6, 2021 · fb645d3 · fb645d3
2 parents 3cb01d1 + 2e3a689
commit fb645d3
Show file tree

Hide file tree

Showing 14 changed files with 325 additions and 43 deletions.
diff --git a/README.md b/README.md
@@ -39,30 +39,30 @@ python -m spacy download en
 
 
 ## fastNLP教程
-中文[文档](https://fastnlp.readthedocs.io/)、[教程](https://fastnlp.readthedocs.io/zh/latest/user/tutorials.html)
+中文[文档](http://www.fastnlp.top/docs/fastNLP/)、 [教程](http://www.fastnlp.top/docs/fastNLP/user/quickstart.html)
 
 ### 快速入门
 
-- [0. 快速入门](https://fastnlp.readthedocs.io/zh/latest/user/quickstart.html)
+- [Quick-1. 文本分类](http://www.fastnlp.top/docs/fastNLP/tutorials/%E6%96%87%E6%9C%AC%E5%88%86%E7%B1%BB.html)
+- [Quick-2. 序列标注](http://www.fastnlp.top/docs/fastNLP/tutorials/%E5%BA%8F%E5%88%97%E6%A0%87%E6%B3%A8.html)
 
 ### 详细使用教程
 
-- [1. 使用DataSet预处理文本](https://fastnlp.readthedocs.io/zh/latest/tutorials/tutorial_1_data_preprocess.html)
-- [2. 使用Vocabulary转换文本与index](https://fastnlp.readthedocs.io/zh/latest/tutorials/tutorial_2_vocabulary.html)
-- [3. 使用Embedding模块将文本转成向量](https://fastnlp.readthedocs.io/zh/latest/tutorials/tutorial_3_embedding.html)
-- [4. 使用Loader和Pipe加载并处理数据集](https://fastnlp.readthedocs.io/zh/latest/tutorials/tutorial_4_load_dataset.html)
-- [5. 动手实现一个文本分类器I-使用Trainer和Tester快速训练和测试](https://fastnlp.readthedocs.io/zh/latest/tutorials/tutorial_5_loss_optimizer.html)
-- [6. 动手实现一个文本分类器II-使用DataSetIter实现自定义训练过程](https://fastnlp.readthedocs.io/zh/latest/tutorials/tutorial_6_datasetiter.html)
-- [7. 使用Metric快速评测你的模型](https://fastnlp.readthedocs.io/zh/latest/tutorials/tutorial_7_metrics.html)
-- [8. 使用Modules和Models快速搭建自定义模型](https://fastnlp.readthedocs.io/zh/latest/tutorials/tutorial_8_modules_models.html)
-- [9. 快速实现序列标注模型](https://fastnlp.readthedocs.io/zh/latest/tutorials/tutorial_9_seq_labeling.html)
-- [10. 使用Callback自定义你的训练过程](https://fastnlp.readthedocs.io/zh/latest/tutorials/tutorial_10_callback.html)
+- [1. 使用DataSet预处理文本](http://www.fastnlp.top/docs/fastNLP/tutorials/tutorial_1_data_preprocess.html)
+- [2. 使用Vocabulary转换文本与index](http://www.fastnlp.top/docs/fastNLP/tutorials/tutorial_2_vocabulary.html)
+- [3. 使用Embedding模块将文本转成向量](http://www.fastnlp.top/docs/fastNLP/tutorials/tutorial_3_embedding.html)
+- [4. 使用Loader和Pipe加载并处理数据集](http://www.fastnlp.top/docs/fastNLP/tutorials/tutorial_4_load_dataset.html)
+- [5. 动手实现一个文本分类器I-使用Trainer和Tester快速训练和测试](http://www.fastnlp.top/docs/fastNLP/tutorials/tutorial_5_loss_optimizer.html)
+- [6. 动手实现一个文本分类器II-使用DataSetIter实现自定义训练过程](http://www.fastnlp.top/docs/fastNLP/tutorials/tutorial_6_datasetiter.html)
+- [7. 使用Metric快速评测你的模型](http://www.fastnlp.top/docs/fastNLP/tutorials/tutorial_7_metrics.html)
+- [8. 使用Modules和Models快速搭建自定义模型](http://www.fastnlp.top/docs/fastNLP/tutorials/tutorial_8_modules_models.html)
+- [9. 使用Callback自定义你的训练过程](http://www.fastnlp.top/docs/fastNLP/tutorials/tutorial_9_callback.html)
 
 ### 扩展教程
 
-- [Extend-1. BertEmbedding的各种用法](https://fastnlp.readthedocs.io/zh/latest/tutorials/extend_1_bert_embedding.html)
-- [Extend-2. 分布式训练简介](https://fastnlp.readthedocs.io/zh/latest/tutorials/extend_2_dist.html)
-- [Extend-3. 使用fitlog 辅助 fastNLP 进行科研](https://fastnlp.readthedocs.io/zh/latest/tutorials/extend_3_fitlog.html)
+- [Extend-1. BertEmbedding的各种用法](http://www.fastnlp.top/docs/fastNLP/tutorials/extend_1_bert_embedding.html)
+- [Extend-2. 分布式训练简介](http://www.fastnlp.top/docs/fastNLP/tutorials/extend_2_dist.html)
+- [Extend-3. 使用fitlog 辅助 fastNLP 进行科研](http://www.fastnlp.top/docs/fastNLP/tutorials/extend_3_fitlog.html)
 
 
 ## 内置组件

diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -1,8 +1,4 @@
-numpy>=1.14.2
-http://download.pytorch.org/whl/cpu/torch-0.4.1-cp36-cp36m-linux_x86_64.whl
-torchvision>=0.1.8
-sphinx-rtd-theme==0.4.1
-tensorboardX>=1.4
-tqdm>=4.28.1
-ipython>=6.4.0
-ipython-genutils>=0.2.0
+sphinx==3.2.1
+docutils==0.16
+sphinx-rtd-theme==0.5.0
+readthedocs-sphinx-search==0.1.0rc3
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -4,6 +4,10 @@ fastNLP 中文文档
 `fastNLP <https://github.com/fastnlp/fastNLP/>`_ 是一款轻量级的自然语言处理（NLP）工具包。你既可以用它来快速地完成一个NLP任务，
 也可以用它在研究中快速构建更复杂的模型。
 
+.. hint::
+
+    如果你是从 readthedocs 访问的该文档，请跳转到我们的 `最新网站 <http://www.fastnlp.top/docs/fastNLP/>`_
+
 fastNLP具有如下的特性：
 
 - 统一的Tabular式数据容器，简化数据预处理过程;
@@ -41,7 +45,7 @@ API 文档
 fitlog文档
 ----------
 
-您可以 `点此 <https://fitlog.readthedocs.io/zh/latest/>`_  查看fitlog的文档。
+您可以 `点此 <http://www.fastnlp.top/docs/fitlog/>`_  查看fitlog的文档。
 fitlog 是由我们团队开发的日志记录+代码管理的工具。
 
 索引与搜索

diff --git a/docs/source/tutorials/extend_3_fitlog.rst b/docs/source/tutorials/extend_3_fitlog.rst
@@ -4,7 +4,7 @@
 
 本文介绍结合使用 fastNLP 和 fitlog 进行科研的方法。
 
-首先，我们需要安装 `fitlog <https://fitlog.readthedocs.io/>`_ 。你需要确认你的电脑中没有其它名为 `fitlog` 的命令。
+首先，我们需要安装 `fitlog <http://www.fastnlp.top/docs/fitlog/>`_ 。你需要确认你的电脑中没有其它名为 `fitlog` 的命令。
 
 我们从命令行中进入到一个文件夹，现在我们要在文件夹中创建我们的 fastNLP 项目。你可以在命令行输入 `fitlog init test1` ，
 然后你会看到如下提示::
@@ -15,7 +15,7 @@
     Fitlog project test1 is initialized.
 
 这表明你已经创建成功了项目文件夹，并且在项目文件夹中已经初始化了 Git。如果你不想初始化 Git，
-可以参考文档 `命令行工具 <https://fitlog.readthedocs.io/zh/latest/user/command_line.html>`_
+可以参考文档 `命令行工具 <http://www.fastnlp.top/docs/fitlog/user/command_line.html>`_
 
 现在我们进入你创建的项目文件夹 test1 中，可以看到有一个名为 logs 的文件夹，后面我们将会在里面存放你的实验记录。
 同时也有一个名为 main.py 的文件，这是我们推荐你使用的训练入口文件。文件的内容如下::
@@ -37,7 +37,7 @@
     fitlog.finish()                     # finish the logging
 
 我们推荐你保留除注释外的四行代码，它们有助于你的实验，
-他们的具体用处参见文档 `用户 API <https://fitlog.readthedocs.io/zh/latest/fitlog.html>`_
+他们的具体用处参见文档 `用户 API <http://www.fastnlp.top/docs/fitlog/>`_
 
 我们假定你要进行前两个教程中的实验，并已经把数据复制到了项目根目录下的 tutorial_sample_dataset.csv 文件中。
 现在我们编写如下的训练代码，使用 :class:`~fastNLP.core.callback.FitlogCallback` 进行实验记录保存::

diff --git a/docs/source/tutorials/文本分类.rst b/docs/source/tutorials/文本分类.rst
@@ -291,7 +291,7 @@ fastNLP提供了Trainer对象来组织训练过程，包括完成loss计算(所
 
 
 PS: 使用Bert进行文本分类
-~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. code-block:: python
 
@@ -368,7 +368,7 @@ PS: 使用Bert进行文本分类
 
 
 PS: 基于词进行文本分类
-~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 由于汉字中没有显示的字与字的边界，一般需要通过分词器先将句子进行分词操作。
 下面的例子演示了如何不基于fastNLP已有的数据读取、预处理代码进行文本分类。

diff --git a/fastNLP/core/dataset.py b/fastNLP/core/dataset.py
@@ -53,7 +53,7 @@
         from fastNLP import DataSet
         from fastNLP import Instance
         instances = []
-        winstances.append(Instance(sentence="This is the first instance",
+        instances.append(Instance(sentence="This is the first instance",
                             ords=['this', 'is', 'the', 'first', 'instance', '.'],
                             seq_len=6))
         instances.append(Instance(sentence="Second instance .",

diff --git a/fastNLP/core/tester.py b/fastNLP/core/tester.py
@@ -148,7 +148,7 @@ def __init__(self, data, model, metrics, batch_size=16, num_workers=0, device=No
                 self._predict_func = self._model.predict
                 self._predict_func_wrapper = self._model.predict
         else:
-            if _model_contains_inner_module(model):
+            if _model_contains_inner_module(self._model):
                 self._predict_func_wrapper = self._model.forward
                 self._predict_func = self._model.module.forward
             else:

diff --git a/fastNLP/io/file_utils.py b/fastNLP/io/file_utils.py
@@ -103,6 +103,11 @@
     "yelp-review-polarity": "yelp_review_polarity.tar.gz",
     "sst-2": "SST-2.zip",
     "sst": "SST.zip",
+    'mr': 'mr.zip',
+    "R8": "R8.zip",
+    "R52": "R52.zip",
+    "20ng": "20ng.zip",
+    "ohsumed": "ohsumed.zip",
 
     # Classification, Chinese
     "chn-senti-corp": "chn_senti_corp.zip",

diff --git a/fastNLP/io/pipe/__init__.py b/fastNLP/io/pipe/__init__.py
@@ -23,15 +23,15 @@
     "ChnSentiCorpPipe",
     "THUCNewsPipe",
     "WeiboSenti100kPipe",
-    "MRPipe", "R52Pipe", "R8Pipe", "OhsumedPipe", "NG20Loader",
-    
+    "MRPipe", "R52Pipe", "R8Pipe", "OhsumedPipe", "NG20Pipe",
+
     "Conll2003NERPipe",
     "OntoNotesNERPipe",
     "MsraNERPipe",
     "WeiboNERPipe",
     "PeopleDailyPipe",
     "Conll2003Pipe",
-    
+
     "MatchingBertPipe",
     "RTEBertPipe",
     "SNLIBertPipe",
@@ -53,14 +53,20 @@
     "RenamePipe",
     "GranularizePipe",
     "MachingTruncatePipe",
-    
+
     "CoReferencePipe",
 
-    "CMRC2018BertPipe"
+    "CMRC2018BertPipe",
+
+    "R52PmiGraphPipe",
+    "R8PmiGraphPipe",
+    "OhsumedPmiGraphPipe",
+    "NG20PmiGraphPipe",
+    "MRPmiGraphPipe"
 ]
 
 from .classification import CLSBasePipe, YelpFullPipe, YelpPolarityPipe, SSTPipe, SST2Pipe, IMDBPipe, ChnSentiCorpPipe, THUCNewsPipe, \
-    WeiboSenti100kPipe, AGsNewsPipe, DBPediaPipe, MRPipe, R8Pipe, R52Pipe, OhsumedPipe, NG20Loader
+    WeiboSenti100kPipe, AGsNewsPipe, DBPediaPipe, MRPipe, R8Pipe, R52Pipe, OhsumedPipe, NG20Pipe
 from .conll import Conll2003NERPipe, OntoNotesNERPipe, MsraNERPipe, WeiboNERPipe, PeopleDailyPipe
 from .conll import Conll2003Pipe
 from .coreference import CoReferencePipe
@@ -70,3 +76,5 @@
     LCQMCPipe, BQCorpusPipe, LCQMCBertPipe, RenamePipe, GranularizePipe, MachingTruncatePipe
 from .pipe import Pipe
 from .qa import CMRC2018BertPipe
+
+from .construct_graph import MRPmiGraphPipe, R8PmiGraphPipe, R52PmiGraphPipe, NG20PmiGraphPipe, OhsumedPmiGraphPipe