diff --git a/.github/scripts/doc_link_checker.py b/.github/scripts/doc_link_checker.py
index f2acc7b63c..00e2fbc9bc 100644
--- a/.github/scripts/doc_link_checker.py
+++ b/.github/scripts/doc_link_checker.py
@@ -24,16 +24,15 @@ def make_parser():
def analyze_doc(home, path):
print('analyze {}'.format(path))
problem_list = []
- code_block = False
+ code_block = 0
with open(path) as f:
lines = f.readlines()
for line in lines:
line = line.strip()
if line.startswith('```'):
- code_block = not code_block
- continue
+ code_block = 1 - code_block
- if code_block is True:
+ if code_block > 0:
continue
if '[' in line and ']' in line and '(' in line and ')' in line:
@@ -62,7 +61,7 @@ def analyze_doc(home, path):
def traverse(target):
if os.path.isfile(target):
- analyze_doc('./', target)
+ analyze_doc(os.path.dirname(target), target)
return
for home, dirs, files in os.walk(target):
for filename in files:
diff --git a/README.md b/README.md
index ab966e8bed..01d804ecae 100644
--- a/README.md
+++ b/README.md
@@ -63,9 +63,9 @@ Models can be exported and run in the following backends, and more will be compa
All kinds of modules in the SDK can be extended, such as `Transform` for image processing, `Net` for Neural Network inference, `Module` for postprocessing and so on
-## Get Started
+## [Documentation](https://mmdeploy.readthedocs.io/en/latest/)
-Please read [getting_started.md](docs/en/get_started.md) for the basic usage of MMDeploy. We also provide tutoials about:
+Please read [getting_started](docs/en/get_started.md) for the basic usage of MMDeploy. We also provide tutoials about:
- [Build](docs/en/01-how-to-build/build_from_source.md)
- [Build from Docker](docs/en/01-how-to-build/build_from_docker.md)
@@ -77,11 +77,20 @@ Please read [getting_started.md](docs/en/get_started.md) for the basic usage of
- User Guide
- [How to convert model](docs/en/02-how-to-run/convert_model.md)
- [How to write config](docs/en/02-how-to-run/write_config.md)
- - [How to evaluate deployed models](docs/en/02-how-to-run/how_to_evaluate_a_model.md)
- - [How to measure performance of deployed models](docs/en/02-how-to-run/how_to_measure_performance_of_models.md)
+ - [How to profile model](docs/en/02-how-to-run/profile_model.md)
+ - [How to quantize model](docs/en/02-how-to-run/quantize_model.md)
+ - [Useful tools](docs/en/02-how-to-run/useful_tools.md)
- Developer Guide
- - [How to support new models](docs/en/06-developer-guide/support_new_model.md)
- - [How to support new backends](docs/en/06-developer-guide/support_new_backend.md)
+ - [How to support new models](docs/en/07-developer-guide/support_new_model.md)
+ - [How to support new backends](docs/en/07-developer-guide/support_new_backend.md)
+ - [How to partition model](docs/en/07-developer-guide/partition_model.md)
+ - [How to test rewritten model](docs/en/07-developer-guide/test_rewritten_models.md)
+ - [How to test backend ops](docs/en/07-developer-guide/add_backend_ops_unittest.md)
+ - [How to do regression test](docs/en/07-developer-guide/regression_test.md)
+- Custom Backend Ops
+ - [ncnn](docs/en/06-custom-ops/ncnn.md)
+ - [onnxruntime](docs/en/06-custom-ops/onnxruntime.md)
+ - [tensorrt](docs/en/06-custom-ops/tensorrt.md)
- [FAQ](docs/en/faq.md)
- [Contributing](.github/CONTRIBUTING.md)
diff --git a/README_zh-CN.md b/README_zh-CN.md
index e77b4da141..7c7dd4d0f2 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -63,8 +63,9 @@ MMDeploy 是 [OpenMMLab](https://openmmlab.com/) 模型部署工具箱,**为
- Net 推理
- Module 后处理
-## [快速上手](docs/zh_cn/get_started.md)
+## [中文文档](https://mmdeploy.readthedocs.io/zh_CN/latest/)
+- [快速上手](docs/zh_cn/get_started.md)
- [编译](docs/zh_cn/01-how-to-build/build_from_source.md)
- [Build from Docker](docs/zh_cn/01-how-to-build/build_from_docker.md)
- [Build for Linux](docs/zh_cn/01-how-to-build/linux-x86_64.md)
@@ -77,17 +78,28 @@ MMDeploy 是 [OpenMMLab](https://openmmlab.com/) 模型部署工具箱,**为
- [配置转换参数](docs/zh_cn/02-how-to-run/write_config.md)
- [量化](docs/zh_cn/02-how-to-run/quantize_model.md)
- [测试转换完成的模型](docs/zh_cn/02-how-to-run/profile_model.md)
+ - [工具集介绍](docs/zh_cn/02-how-to-run/useful_tools.md)
- 开发指南
- - [支持新模型](docs/zh_cn/04-developer-guide/support_new_model.md)
- - [增加推理 Backend](docs/zh_cn/04-developer-guide/support_new_backend.md)
- - [回归测试](docs/zh_cn/04-developer-guide/do_regression_test.md)
+ - [支持新模型](docs/zh_cn/07-developer-guide/support_new_model.md)
+ - [增加推理 backend](docs/zh_cn/07-developer-guide/support_new_backend.md)
+ - [模型分块](docs/zh_cn/07-developer-guide/partition_model.md)
+ - [测试重写模型](docs/zh_cn/07-developer-guide/test_rewritten_models.md)
+ - [backend 算子测试](docs/zh_cn/07-developer-guide/add_backend_ops_unittest.md)
+ - [回归测试](docs/zh_cn/07-developer-guide/regression_test.md)
+- 各 backend 自定义算子列表
+ - [ncnn](docs/zh_cn/06-custom-ops/ncnn.md)
+ - [onnxruntime](docs/zh_cn/06-custom-ops/onnxruntime.md)
+ - [tensorrt](docs/zh_cn/06-custom-ops/tensorrt.md)
- [FAQ](docs/zh_cn/faq.md)
- [贡献者手册](.github/CONTRIBUTING.md)
## 新人解说
-- [01 术语解释、加载第一个模型](docs/zh_cn/05-tutorial/01_introduction_to_model_deployment.md)
-- [02 转成 onnx](docs/zh_cn/05-tutorial/02_challenges.md)
+- [01 术语解释、加载第一个模型](docs/zh_cn/tutorial/01_introduction_to_model_deployment.md)
+- [02 部署常见问题](docs/zh_cn/tutorial/02_challenges.md)
+- [03 torch转onnx](docs/zh_cn/tutorial/03_pytorch2onnx.md)
+- [04 让torch支持更多onnx算子](docs/zh_cn/tutorial/04_onnx_custom_op.md)
+- [05 调试onnx模型](docs/zh_cn/tutorial/05_onnx_model_editing.md)
## 基准与模型库
diff --git a/docs/en/01-how-to-build/android.md b/docs/en/01-how-to-build/android.md
index 0285f9cf27..ec959fbdf8 100644
--- a/docs/en/01-how-to-build/android.md
+++ b/docs/en/01-how-to-build/android.md
@@ -17,7 +17,7 @@ Model converter is executed on linux platform, and SDK is executed on android pl
Here are two steps for android build.
-1. Build model converter on linux, please refer to [How to build linux](./linux-x86_64.md)
+1. Build model converter on linux, please refer to [How to build linux](linux-x86_64.md)
2. Build SDK using android toolchain on linux.
diff --git a/docs/en/01-how-to-build/build_from_docker.md b/docs/en/01-how-to-build/build_from_docker.md
index b2f759bfae..816c2c77b1 100644
--- a/docs/en/01-how-to-build/build_from_docker.md
+++ b/docs/en/01-how-to-build/build_from_docker.md
@@ -51,7 +51,7 @@ docker run --gpus all -it mmdeploy:master-gpu
As described [here](https://forums.developer.nvidia.com/t/cuda-error-the-provided-ptx-was-compiled-with-an-unsupported-toolchain/185754), update the GPU driver to the latest one for your GPU.
-2. docker: Error response from daemon: could not select device driver "" with capabilities: \[\[gpu\]\].
+2. docker: Error response from daemon: could not select device driver "" with capabilities: \[gpu\].
```
# Add the package repositories
diff --git a/docs/en/01-how-to-build/jetsons.md b/docs/en/01-how-to-build/jetsons.md
index 2accf7534a..f995dea3ad 100644
--- a/docs/en/01-how-to-build/jetsons.md
+++ b/docs/en/01-how-to-build/jetsons.md
@@ -229,7 +229,7 @@ export MMDEPLOY_DIR=$(pwd)
### Install Model Converter
Since some operators adopted by OpenMMLab codebases are not supported by TensorRT, we build the custom TensorRT plugins to make it up, such as `roi_align`, `scatternd`, etc.
-You can find a full list of custom plugins from [here](../ops/tensorrt.md).
+You can find a full list of custom plugins from [here](../06-custom-ops/tensorrt.md).
```shell
# build TensorRT custom operators
diff --git a/docs/en/02-how-to-run/convert_model.md b/docs/en/02-how-to-run/convert_model.md
index edefd3731e..1c8614abd8 100644
--- a/docs/en/02-how-to-run/convert_model.md
+++ b/docs/en/02-how-to-run/convert_model.md
@@ -65,7 +65,7 @@ python ./tools/deploy.py \
## How to evaluate the exported models
-You can try to evaluate model, referring to [how_to_evaluate_a_model](./how_to_evaluate_a_model.md).
+You can try to evaluate model, referring to [how_to_evaluate_a_model](profile_model.md).
## List of supported models exportable to other backends
diff --git a/docs/en/02-how-to-run/how_to_measure_performance_of_models.md b/docs/en/02-how-to-run/how_to_measure_performance_of_models.md
deleted file mode 100644
index 5402e9f9f8..0000000000
--- a/docs/en/02-how-to-run/how_to_measure_performance_of_models.md
+++ /dev/null
@@ -1,44 +0,0 @@
-# How to profile model
-
-After converting a PyTorch model to a backend model, you can profile inference speed using `tools/test.py`.
-
-## Prerequisite
-
-Install MMDeploy according to [get-started](../get_started.md) instructions.
-And convert the PyTorch model or ONNX model to the backend model by following the [guide](convert_model.md).
-
-## Profile
-
-```shell
-python tools/test.py \
-${DEPLOY_CFG} \
-${MODEL_CFG} \
---model ${BACKEND_MODEL_FILES} \
-[--speed-test] \
-[--warmup ${WARM_UP}] \
-[--log-interval ${LOG_INTERVERL}] \
-[--log2file ${LOG_RESULT_TO_FILE}]
-```
-
-## Description of all arguments
-
-- `deploy_cfg`: The config for deployment.
-- `model_cfg`: The config of the model in OpenMMLab codebases.
-- `--model`: The backend model files. For example, if we convert a model to ncnn, we need to pass a ".param" file and a ".bin" file. If we convert a model to TensorRT, we need to pass the model file with ".engine" suffix.
-- `--log2file`: log evaluation results and speed to file.
-- `--speed-test`: Whether to activate speed test.
-- `--warmup`: warmup before counting inference elapse, require setting speed-test first.
-- `--log-interval`: The interval between each log, require setting speed-test first.
-
-\* Other arguments in `tools/test.py` are used for performance test. They have no concern with speed test.
-
-## Example
-
-```shell
-python tools/test.py \
- configs/mmcls/classification_onnxruntime_static.py \
- {MMCLS_DIR}/configs/resnet/resnet50_b32x8_imagenet.py \
- --model model.onnx \
- --speed-test \
- --device cpu
-```
diff --git a/docs/en/02-how-to-run/how_to_evaluate_a_model.md b/docs/en/02-how-to-run/profile_model.md
similarity index 89%
rename from docs/en/02-how-to-run/how_to_evaluate_a_model.md
rename to docs/en/02-how-to-run/profile_model.md
index b6cea24293..da248f4bb6 100644
--- a/docs/en/02-how-to-run/how_to_evaluate_a_model.md
+++ b/docs/en/02-how-to-run/profile_model.md
@@ -25,6 +25,9 @@ ${MODEL_CFG} \
[--metric-options ${METRIC_OPTIONS}]
[--log2file work_dirs/output.txt]
[--batch-size ${BATCH_SIZE}]
+[--speed-test] \
+[--warmup ${WARM_UP}] \
+[--log-interval ${LOG_INTERVERL}] \
```
## Description of all arguments
@@ -44,6 +47,9 @@ ${MODEL_CFG} \
format will be kwargs for dataset.evaluate() function.
- `--log2file`: log evaluation results (and speed) to file.
- `--batch-size`: the batch size for inference, which would override `samples_per_gpu` in data config. Default is `1`. Note that not all models support `batch_size>1`.
+- `--speed-test`: Whether to activate speed test.
+- `--warmup`: warmup before counting inference elapse, require setting speed-test first.
+- `--log-interval`: The interval between each log, require setting speed-test first.
\* Other arguments in `tools/test.py` are used for speed test. They have no concern with evaluation.
@@ -55,7 +61,8 @@ python tools/test.py \
{MMCLS_DIR}/configs/resnet/resnet50_b32x8_imagenet.py \
--model model.onnx \
--out out.pkl \
- --device cuda:0
+ --device cpu \
+ --speed-test
```
## Note
diff --git a/docs/en/02-how-to-run/quantize_model.md b/docs/en/02-how-to-run/quantize_model.md
new file mode 100644
index 0000000000..445facd98f
--- /dev/null
+++ b/docs/en/02-how-to-run/quantize_model.md
@@ -0,0 +1,67 @@
+# Quantize model
+
+## Why quantization ?
+
+The fixed-point model has many advantages over the fp32 model:
+
+- Smaller size, 8-bit model reduces file size by 75%
+- Benefit from the smaller model, the Cache hit rate is improved and inference would be faster
+- Chips tend to have corresponding fixed-point acceleration instructions which are faster and less energy consumed (int8 on a common CPU requires only about 10% of energy)
+
+The size of the installation package and the heat generation are the key indicators of the mobile terminal evaluation APP;
+On the server side, quantization means that you can maintain the same QPS and improve model precision in exchange for improved accuracy.
+
+## Post training quantization scheme
+
+Taking ncnn backend as an example, the complete workflow is as follows:
+
+
+
+
+
+mmdeploy generates quantization table based on static graph (onnx) and uses backend tools to convert fp32 model to fixed point.
+
+Currently mmdeploy support ncnn with PTQ.
+
+## How to convert model
+
+[After mmdeploy installation](../01-how-to-build/build_from_source.md), install ppq
+
+```bash
+git clone https://github.com/openppl-public/ppq.git
+cd ppq
+git checkout edbecf4 # import some feature
+pip install -r requirements.txt
+python3 setup.py install
+```
+
+Back in mmdeploy, enable quantization with the option 'tools/deploy.py --quant'.
+
+```bash
+cd /path/to/mmdeploy
+export MODEL_PATH=/path/to/mmclassification/configs/resnet/resnet18_8xb16_cifar10.py
+export MODEL_CONFIG=https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_b16x8_cifar10_20210528-bd6371c8.pth
+
+python3 tools/deploy.py configs/mmcls/classification_ncnn-int8_static.py ${MODEL_CONFIG} ${MODEL_PATH} /path/to/self-test.png --work-dir work_dir --device cpu --quant --quant-image-dir /path/to/images
+...
+```
+
+Description
+
+| Parameter | Meaning |
+| :---------------: | :--------------------------------------------------------------: |
+| --quant | Enable quantization, the default value is False |
+| --quant-image-dir | Calibrate dataset, use Validation Set in MODEL_CONFIG by default |
+
+## Custom calibration dataset
+
+Calibration set is used to calculate quantization layer parameters. Some DFQ (Data Free Quantization) methods do not even require a dataset.
+
+- Create a new folder, just put in the picture (no directory structure required, no negative example required, no filename format required)
+- The image needs to be the data comes from real scenario otherwise the accuracy would be drop
+- You can not quantize model with test dataset
+ | Type | Train dataset | Validation dataset | Test dataset | Calibration dataset |
+ | ----- | ------------- | ------------------ | ------------- | ------------------- |
+ | Usage | QAT | PTQ | Test accuracy | PTQ |
+
+It is highly recommended that [verifying model precision](profile_model.md) after quantization. [Here](../03-benchmark/quantization.md) is some quantization model test result.
diff --git a/docs/en/useful_tools.md b/docs/en/02-how-to-run/useful_tools.md
similarity index 98%
rename from docs/en/useful_tools.md
rename to docs/en/02-how-to-run/useful_tools.md
index 92b0121108..7472f32565 100644
--- a/docs/en/useful_tools.md
+++ b/docs/en/02-how-to-run/useful_tools.md
@@ -1,3 +1,5 @@
+# Useful Tools
+
Apart from `deploy.py`, there are other useful tools under the `tools/` directory.
## torch2onnx
@@ -96,7 +98,8 @@ python tools/onnx2tensorrt.py \
${ONNX_PATH} \
${OUTPUT} \
--device-id 0 \
- --log-level INFO
+ --log-level INFO \
+ --calib-file /path/to/file
```
### Description of all arguments
diff --git a/docs/en/03-benchmark/benchmark.md b/docs/en/03-benchmark/benchmark.md
index 8b44c32eec..79024effc4 100644
--- a/docs/en/03-benchmark/benchmark.md
+++ b/docs/en/03-benchmark/benchmark.md
@@ -26,7 +26,7 @@ GPU: ncnn, TensorRT, PPLNN
- Warm up. For ncnn, we warm up 30 iters for all codebases. As for other backends: for classification, we warm up 1010 iters; for other codebases, we warm up 10 iters.
- Input resolution varies for different datasets of different codebases. All inputs are real images except for `mmediting` because the dataset is not large enough.
-Users can directly test the speed through [model profiling](../02-how-to-run/how_to_measure_performance_of_models.md). And here is the benchmark in our environment.
+Users can directly test the speed through [model profiling](../02-how-to-run/profile_model.md). And here is the benchmark in our environment.
@@ -407,7 +407,7 @@ Users can directly test the speed through [model profiling](../02-how-to-run/how
## Performance benchmark
-Users can directly test the performance through [how_to_evaluate_a_model.md](../02-how-to-run/how_to_evaluate_a_model.md). And here is the benchmark in our environment.
+Users can directly test the performance through [how_to_evaluate_a_model.md](../02-how-to-run/profile_model.md). And here is the benchmark in our environment.
diff --git a/docs/en/03-benchmark/benchmark_edge.md b/docs/en/03-benchmark/benchmark_edge.md
index 5f9ec0782c..5145c429d3 100644
--- a/docs/en/03-benchmark/benchmark_edge.md
+++ b/docs/en/03-benchmark/benchmark_edge.md
@@ -1,6 +1,6 @@
# Test on embedded device
-Here are the test conclusions of our edge devices. You can directly obtain the results of your own environment with [model profiling](../02-how-to-run/how_to_evaluate_a_model.md).
+Here are the test conclusions of our edge devices. You can directly obtain the results of your own environment with [model profiling](../02-how-to-run/profile_model.md).
## Software and hardware environment
diff --git a/docs/en/03-benchmark/quantization.md b/docs/en/03-benchmark/quantization.md
new file mode 100644
index 0000000000..4d9bf09da5
--- /dev/null
+++ b/docs/en/03-benchmark/quantization.md
@@ -0,0 +1,27 @@
+# Quantization test result
+
+Currently mmdeploy support ncnn quantization
+
+## Quantize with ncnn
+
+### mmcls
+
+| model | dataset | fp32 top-1 (%) | int8 top-1 (%) |
+| :--------------------------------------------------------------------------------------------------------------------------: | :---------: | :------------: | :------------: |
+| [ResNet-18](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet18_8xb16_cifar10.py) | Cifar10 | 94.82 | 94.83 |
+| [ResNeXt-32x4d-50](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnext/resnext50-32x4d_8xb32_in1k.py) | ImageNet-1k | 77.90 | 78.20\* |
+| [MobileNet V2](https://github.com/open-mmlab/mmclassification/blob/master/configs/mobilenet_v2/mobilenet-v2_8xb32_in1k.py) | ImageNet-1k | 71.86 | 71.43\* |
+| [HRNet-W18\*](https://github.com/open-mmlab/mmclassification/blob/master/configs/hrnet/hrnet-w18_4xb32_in1k.py) | ImageNet-1k | 76.75 | 76.25\* |
+
+Note:
+
+- Because of the large amount of imagenet-1k data and ncnn has not released Vulkan int8 version, only part of the test set (4000/50000) is used.
+- The accuracy will vary after quantization, and it is normal for the classification model to increase by less than 1%.
+
+### OCR detection
+
+| model | dataset | fp32 hmean | int8 hmean |
+| :---------------------------------------------------------------------------------------------------------------: | :-------: | :--------: | :------------: |
+| [PANet](https://github.com/open-mmlab/mmocr/blob/main/configs/textdet/panet/panet_r18_fpem_ffm_600e_icdar2015.py) | ICDAR2015 | 0.795 | 0.792 @thr=0.9 |
+
+Note: [mmocr](https://github.com/open-mmlab/mmocr) Uses 'shapely' to compute IoU, which results in a slight difference in accuracy
diff --git a/docs/en/03-benchmark/supported_models.md b/docs/en/03-benchmark/supported_models.md
index bace6e2a46..0be5c21923 100644
--- a/docs/en/03-benchmark/supported_models.md
+++ b/docs/en/03-benchmark/supported_models.md
@@ -1,4 +1,4 @@
-## Supported Models
+## Supported models
The table below lists the models that are guaranteed to be exportable to other backends.
diff --git a/docs/en/04-supported-codebases/mmocr.md b/docs/en/04-supported-codebases/mmocr.md
index 97334baca1..490b5fdb84 100644
--- a/docs/en/04-supported-codebases/mmocr.md
+++ b/docs/en/04-supported-codebases/mmocr.md
@@ -21,7 +21,7 @@ Please refer to [install.md](https://mmocr.readthedocs.io/en/latest/install.html
Note that ncnn, pplnn, and OpenVINO only support the configs of DBNet18 for DBNet.
-For the PANet with the [checkpoint](https://download.openmmlab.com/mmocr/textdet/panet/panet_r18_fpem_ffm_sbn_600e_icdar2015_20210219-42dbe46a.pth) pretrained on ICDAR dateset, if you want to convert the model to TensorRT with 16 bits float point, please try the following script.
+For the PANet with the [checkpoint](https://download.openmmlab.com/mmocr/textdet/panet/panet_r18_fpem_ffm_sbn_600e_icdar2015_20210219-42dbe46a.pth) pretrained on ICDAR dataset, if you want to convert the model to TensorRT with 16 bits float point, please try the following script.
```python
# Copyright (c) OpenMMLab. All rights reserved.
diff --git a/docs/en/05-supported-backends/ncnn.md b/docs/en/05-supported-backends/ncnn.md
index 789ed3ef1a..a4043e3ffb 100644
--- a/docs/en/05-supported-backends/ncnn.md
+++ b/docs/en/05-supported-backends/ncnn.md
@@ -1,92 +1,18 @@
-# ncnn Support
+# Supported ncnn feature
-MMDeploy now supports ncnn version == 1.0.20220216
+The current use of the ncnn feature is as follows:
-## Installation
+| feature | windows | linux | mac | android |
+| :----------------: | :-----: | :---: | :-: | :-----: |
+| fp32 inference | ✔️ | ✔️ | ✔️ | ✔️ |
+| int8 model convert | - | ✔️ | ✔️ | - |
+| nchw layout | ✔️ | ✔️ | ✔️ | ✔️ |
+| Vulkan support | - | ✔️ | ✔️ | ✔️ |
-### Install ncnn
+The following features cannot be automatically enabled by mmdeploy and you need to manually modify the ncnn build options or adjust the running parameters in the SDK
-- Download VulkanTools for the compilation of ncnn.
-
- ```bash
- wget https://sdk.lunarg.com/sdk/download/1.2.176.1/linux/vulkansdk-linux-x86_64-1.2.176.1.tar.gz?Human=true -O vulkansdk-linux-x86_64-1.2.176.1.tar.gz
- tar -xf vulkansdk-linux-x86_64-1.2.176.1.tar.gz
- export VULKAN_SDK=$(pwd)/1.2.176.1/x86_64
- export LD_LIBRARY_PATH=$VULKAN_SDK/lib:$LD_LIBRARY_PATH
- ```
-
-- Check your gcc version.
- You should ensure your gcc satisfies `gcc >= 6`.
-
-- Install Protocol Buffers through:
-
- ```bash
- apt-get install libprotobuf-dev protobuf-compiler
- ```
-
-- Prepare ncnn Framework
-
- - Download ncnn source code
-
- ```bash
- git clone -b 20220216 git@github.com:Tencent/ncnn.git
- ```
-
- - Make install ncnn library
-
- ```bash
- cd ncnn
- export NCNN_DIR=$(pwd)
- git submodule update --init
- mkdir -p build && cd build
- cmake -DNCNN_VULKAN=ON -DNCNN_SYSTEM_GLSLANG=ON -DNCNN_BUILD_EXAMPLES=ON -DNCNN_PYTHON=ON -DNCNN_BUILD_TOOLS=ON -DNCNN_BUILD_BENCHMARK=ON -DNCNN_BUILD_TESTS=ON ..
- make install
- ```
-
- - Install pyncnn module
-
- ```bash
- cd ${NCNN_DIR} # To ncnn root directory
- cd python
- pip install -e .
- ```
-
-### Build custom ops
-
-Some custom ops are created to support models in OpenMMLab, the custom ops can be built as follows:
-
-```bash
-cd ${MMDEPLOY_DIR}
-mkdir -p build && cd build
-cmake -DMMDEPLOY_TARGET_BACKENDS=ncnn ..
-make -j$(nproc)
-```
-
-If you haven't installed ncnn in the default path, please add `-Dncnn_DIR` flag in cmake.
-
-```bash
- cmake -DMMDEPLOY_TARGET_BACKENDS=ncnn -Dncnn_DIR=${NCNN_DIR}/build/install/lib/cmake/ncnn ..
- make -j$(nproc)
-```
-
-## Convert model
-
-- This follows the tutorial on [How to convert model](../02-how-to-run/convert_model.md).
-- The converted model has two files: `.param` and `.bin`, as model structure file and weight file respectively.
-
-## Reminder
-
-- In ncnn version >= 1.0.20220216, the dimension of ncnn.Mat should be no more than 4.
-
-## FAQs
-
-1. When running ncnn models for inference with custom ops, it fails and shows the error message like:
-
- ```bash
- TypeError: register mm custom layers(): incompatible function arguments. The following argument types are supported:
- 1.(ar0: ncnn:Net) -> int
-
- Invoked with:
- ```
-
- This is because of the failure to bind ncnn C++ library to pyncnn. You should build pyncnn from C++ ncnn source code, but not by `pip install`
+- bf16 inference
+- nc4hw4 layout
+- Profiling per layer
+- Turn off NCNN_STRING to reduce .so file size
+- Set thread number and CPU affinity
diff --git a/docs/en/05-supported-backends/onnxruntime.md b/docs/en/05-supported-backends/onnxruntime.md
index 4c292c2a6c..330df092f8 100644
--- a/docs/en/05-supported-backends/onnxruntime.md
+++ b/docs/en/05-supported-backends/onnxruntime.md
@@ -29,17 +29,6 @@ export ONNXRUNTIME_DIR=$(pwd)
export LD_LIBRARY_PATH=$ONNXRUNTIME_DIR/lib:$LD_LIBRARY_PATH
```
-Note:
-
-- If you want to save onnxruntime env variables to bashrc, you could run
-
- ```bash
- echo '# set env for onnxruntime' >> ~/.bashrc
- echo "export ONNXRUNTIME_DIR=${ONNXRUNTIME_DIR}" >> ~/.bashrc
- echo 'export LD_LIBRARY_PATH=$ONNXRUNTIME_DIR/lib:$LD_LIBRARY_PATH' >> ~/.bashrc
- source ~/.bashrc
- ```
-
### Build on Linux
```bash
diff --git a/docs/en/05-supported-backends/snpe.md b/docs/en/05-supported-backends/snpe.md
new file mode 100644
index 0000000000..65705d13b2
--- /dev/null
+++ b/docs/en/05-supported-backends/snpe.md
@@ -0,0 +1,8 @@
+# SNPE feature support
+
+Currently mmdeploy integrates the onnx2dlc model conversion and SDK inference, but the following features are not yet supported:
+
+- GPU_FP16 mode
+- DSP/AIP quantization
+- Operator internal profiling
+- UDO operator
diff --git a/docs/en/ops/ncnn.md b/docs/en/06-custom-ops/ncnn.md
similarity index 100%
rename from docs/en/ops/ncnn.md
rename to docs/en/06-custom-ops/ncnn.md
diff --git a/docs/en/ops/onnxruntime.md b/docs/en/06-custom-ops/onnxruntime.md
similarity index 100%
rename from docs/en/ops/onnxruntime.md
rename to docs/en/06-custom-ops/onnxruntime.md
diff --git a/docs/en/ops/tensorrt.md b/docs/en/06-custom-ops/tensorrt.md
similarity index 100%
rename from docs/en/ops/tensorrt.md
rename to docs/en/06-custom-ops/tensorrt.md
diff --git a/docs/en/06-developer-guide/add_test_units_for_backend_ops.md b/docs/en/07-developer-guide/add_backend_ops_unittest.md
similarity index 98%
rename from docs/en/06-developer-guide/add_test_units_for_backend_ops.md
rename to docs/en/07-developer-guide/add_backend_ops_unittest.md
index 8c517857b2..3926c6dbf5 100644
--- a/docs/en/06-developer-guide/add_test_units_for_backend_ops.md
+++ b/docs/en/07-developer-guide/add_backend_ops_unittest.md
@@ -15,7 +15,7 @@ You can put unit test for ops in `tests/test_ops/`. Usually, the following progr
```python
@pytest.mark.parametrize('backend', [TEST_TENSORRT, TEST_ONNXRT]) # 1.1 backend test class
@pytest.mark.parametrize('pool_h,pool_w,spatial_scale,sampling_ratio', # 1.2 set parameters of op
- [(2, 2, 1.0, 2), (4, 4, 2.0, 4)]) # [(# Examples of op test parameters),...]
+ [(2, 2, 1.0, 2), (4, 4, 2.0, 4)]) # [(# Examples of op test parameters),...]
def test_roi_align(backend,
pool_h, # set parameters of op
pool_w,
diff --git a/docs/en/06-developer-guide/partition_model.md b/docs/en/07-developer-guide/partition_model.md
similarity index 94%
rename from docs/en/06-developer-guide/partition_model.md
rename to docs/en/07-developer-guide/partition_model.md
index 3cfd27c1f8..7c981af364 100644
--- a/docs/en/06-developer-guide/partition_model.md
+++ b/docs/en/07-developer-guide/partition_model.md
@@ -73,7 +73,7 @@ partition_config = dict(
## Step 3: Get partitioned onnx models
-Once we have marks of nodes and the deployment config with `parition_config` being set properly, we could use the [tool](../useful_tools.md) `torch2onnx` to export the model to onnx and get the partition onnx files.
+Once we have marks of nodes and the deployment config with `parition_config` being set properly, we could use the [tool](../02-how-to-run/useful_tools.md) `torch2onnx` to export the model to onnx and get the partition onnx files.
```shell
python tools/torch2onnx.py \
@@ -86,4 +86,4 @@ https://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_d53_mstrain-608_273e
After run the script above, we would have the partitioned onnx file `yolov3.onnx` in the `work-dir`. You can use the visualization tool [netron](https://netron.app/) to check the model structure.
-With the partitioned onnx file, you could refer to [useful_tools.md](../useful_tools.md) to do the following procedures such as `mmdeploy_onnx2ncnn`, `onnx2tensorrt`.
+With the partitioned onnx file, you could refer to [useful_tools.md](../02-how-to-run/useful_tools.md) to do the following procedures such as `mmdeploy_onnx2ncnn`, `onnx2tensorrt`.
diff --git a/docs/en/07-developer-guide/regression_test.md b/docs/en/07-developer-guide/regression_test.md
new file mode 100644
index 0000000000..3b46c5637d
--- /dev/null
+++ b/docs/en/07-developer-guide/regression_test.md
@@ -0,0 +1,237 @@
+# How to do regression test
+
+This tutorial describes how to do regression test. The deployment configuration file contains codebase config and inference config.
+
+### 1. Python Environment
+
+```shell
+pip install -r requirements/tests.txt
+```
+
+If pip throw an exception, try to upgrade numpy.
+
+```shell
+pip install -U numpy
+```
+
+## 2. Usage
+
+```shell
+python ./tools/regression_test.py \
+ --codebase "${CODEBASE_NAME}" \
+ --backends "${BACKEND}" \
+ [--models "${MODELS}"] \
+ --work-dir "${WORK_DIR}" \
+ --device "${DEVICE}" \
+ --log-level INFO \
+ [--performance 或 -p] \
+ [--checkpoint-dir "$CHECKPOINT_DIR"]
+```
+
+### Description
+
+- `--codebase` : The codebase to test, eg.`mmdet`. If you want to test multiple codebase, use `mmcls mmdet ...`
+- `--backends` : The backend to test. By default, all `backend`s would be tested. You can use `onnxruntime tesensorrt`to choose several backends. If you also need to test the SDK, you need to configure the `sdk_config` in `tests/regression/${codebase}.yml`.
+- `--models` : Specify the model to be tested. All models in `yml` are tested by default. You can also give some model names. For the model name, please refer to the relevant yml configuration file. For example `ResNet SE-ResNet "Mask R-CNN"`. Model name can only contain numbers and letters.
+- `--work-dir` : The directory of model convert and report, use `../mmdeploy_regression_working_dir` by default.
+- `--checkpoint-dir`: The path of downloaded torch model, use `../mmdeploy_checkpoints` by default.
+- `--device` : device type, use `cuda` by default
+- `--log-level` : These options are available:`'CRITICAL', 'FATAL', 'ERROR', 'WARN', 'WARNING', 'INFO', 'DEBUG', 'NOTSET'`. The default value is `INFO`.
+- `-p` or `--performance` : Test precision or not. If not enabled, only model convert would be tested.
+
+### Notes
+
+For Windows user:
+
+1. To use the `&&` connector in shell commands, you need to download `PowerShell 7 Preview 5+`.
+2. If you are using conda env, you may need to change `python3` to `python` in regression_test.py because there is `python3.exe` in `%USERPROFILE%\AppData\Local\Microsoft\WindowsApps` directory.
+
+## Example
+
+1. Test all backends of mmdet and mmpose for **model convert and precision**
+
+```shell
+python ./tools/regression_test.py \
+ --codebase mmdet mmpose \
+ --work-dir "../mmdeploy_regression_working_dir" \
+ --device "cuda" \
+ --log-level INFO \
+ --performance
+```
+
+2. Test **model convert and precision** of some backends of mmdet and mmpose
+
+```shell
+python ./tools/regression_test.py \
+ --codebase mmdet mmpose \
+ --backends onnxruntime tensorrt \
+ --work-dir "../mmdeploy_regression_working_dir" \
+ --device "cuda" \
+ --log-level INFO \
+ -p
+```
+
+3. Test some backends of mmdet and mmpose, **only test model convert**
+
+```shell
+python ./tools/regression_test.py \
+ --codebase mmdet mmpose \
+ --backends onnxruntime tensorrt \
+ --work-dir "../mmdeploy_regression_working_dir" \
+ --device "cuda" \
+ --log-level INFO
+```
+
+4. Test some models of mmdet and mmcls, **only test model convert**
+
+```shell
+python ./tools/regression_test.py \
+ --codebase mmdet mmpose \
+ --models ResNet SE-ResNet "Mask R-CNN" \
+ --work-dir "../mmdeploy_regression_working_dir" \
+ --device "cuda" \
+ --log-level INFO
+```
+
+## 3. Regression Test Tonfiguration
+
+### Example and parameter description
+
+```yaml
+globals:
+ codebase_dir: ../mmocr # codebase path to test
+ checkpoint_force_download: False # whether to redownload the model even if it already exists
+ images:
+ img_densetext_det: &img_densetext_det ../mmocr/demo/demo_densetext_det.jpg
+ img_demo_text_det: &img_demo_text_det ../mmocr/demo/demo_text_det.jpg
+ img_demo_text_ocr: &img_demo_text_ocr ../mmocr/demo/demo_text_ocr.jpg
+ img_demo_text_recog: &img_demo_text_recog ../mmocr/demo/demo_text_recog.jpg
+ metric_info: &metric_info
+ hmean-iou: # metafile.Results.Metrics
+ eval_name: hmean-iou # test.py --metrics args
+ metric_key: 0_hmean-iou:hmean # the key name of eval log
+ tolerance: 0.1 # tolerated threshold interval
+ task_name: Text Detection # the name of metafile.Results.Task
+ dataset: ICDAR2015 # the name of metafile.Results.Dataset
+ word_acc: # same as hmean-iou, also a kind of metric
+ eval_name: acc
+ metric_key: 0_word_acc_ignore_case
+ tolerance: 0.2
+ task_name: Text Recognition
+ dataset: IIIT5K
+ convert_image_det: &convert_image_det # the image that will be used by detection model convert
+ input_img: *img_densetext_det
+ test_img: *img_demo_text_det
+ convert_image_rec: &convert_image_rec
+ input_img: *img_demo_text_recog
+ test_img: *img_demo_text_recog
+ backend_test: &default_backend_test True # whether test model precision for backend
+ sdk: # SDK config
+ sdk_detection_dynamic: &sdk_detection_dynamic configs/mmocr/text-detection/text-detection_sdk_dynamic.py
+ sdk_recognition_dynamic: &sdk_recognition_dynamic configs/mmocr/text-recognition/text-recognition_sdk_dynamic.py
+
+onnxruntime:
+ pipeline_ort_recognition_static_fp32: &pipeline_ort_recognition_static_fp32
+ convert_image: *convert_image_rec # the image used by model conversion
+ backend_test: *default_backend_test # whether inference on the backend
+ sdk_config: *sdk_recognition_dynamic # test SDK or not. If it exists, use a specific SDK config for testing
+ deploy_config: configs/mmocr/text-recognition/text-recognition_onnxruntime_static.py # the deploy cfg path to use, based on mmdeploy path
+
+ pipeline_ort_recognition_dynamic_fp32: &pipeline_ort_recognition_dynamic_fp32
+ convert_image: *convert_image_rec
+ backend_test: *default_backend_test
+ sdk_config: *sdk_recognition_dynamic
+ deploy_config: configs/mmocr/text-recognition/text-recognition_onnxruntime_dynamic.py
+
+ pipeline_ort_detection_dynamic_fp32: &pipeline_ort_detection_dynamic_fp32
+ convert_image: *convert_image_det
+ deploy_config: configs/mmocr/text-detection/text-detection_onnxruntime_dynamic.py
+
+tensorrt:
+ pipeline_trt_recognition_dynamic_fp16: &pipeline_trt_recognition_dynamic_fp16
+ convert_image: *convert_image_rec
+ backend_test: *default_backend_test
+ sdk_config: *sdk_recognition_dynamic
+ deploy_config: configs/mmocr/text-recognition/text-recognition_tensorrt-fp16_dynamic-1x32x32-1x32x640.py
+
+ pipeline_trt_detection_dynamic_fp16: &pipeline_trt_detection_dynamic_fp16
+ convert_image: *convert_image_det
+ backend_test: *default_backend_test
+ sdk_config: *sdk_detection_dynamic
+ deploy_config: configs/mmocr/text-detection/text-detection_tensorrt-fp16_dynamic-320x320-2240x2240.py
+
+openvino:
+ # same as onnxruntime backend configuration
+ncnn:
+ # same as onnxruntime backend configuration
+pplnn:
+ # same as onnxruntime backend configuration
+torchscript:
+ # same as onnxruntime backend configuration
+
+
+models:
+ - name: crnn # model name
+ metafile: configs/textrecog/crnn/metafile.yml # the path of model metafile, based on codebase path
+ codebase_model_config_dir: configs/textrecog/crnn # the basepath of `model_configs`, based on codebase path
+ model_configs: # the config name to teset
+ - crnn_academic_dataset.py
+ pipelines: # pipeline name
+ - *pipeline_ort_recognition_dynamic_fp32
+
+ - name: dbnet
+ metafile: configs/textdet/dbnet/metafile.yml
+ codebase_model_config_dir: configs/textdet/dbnet
+ model_configs:
+ - dbnet_r18_fpnc_1200e_icdar2015.py
+ pipelines:
+ - *pipeline_ort_detection_dynamic_fp32
+ - *pipeline_trt_detection_dynamic_fp16
+
+ # special pipeline can be added like this
+ - convert_image: xxx
+ backend_test: xxx
+ sdk_config: xxx
+ deploy_config: configs/mmocr/text-detection/xxx
+```
+
+## 4. Generated Report
+
+This is an example of mmocr regression test report.
+
+| | Model | Model Config | Task | Checkpoint | Dataset | Backend | Deploy Config | Static or Dynamic | Precision Type | Conversion Result | hmean-iou | word_acc | Test Pass |
+| --- | ----- | ---------------------------------------------------------------- | ---------------- | ------------------------------------------------------------------------------------------------------------ | --------- | --------------- | -------------------------------------------------------------------------------------- | ----------------- | -------------- | ----------------- | --------- | -------- | --------- |
+| 0 | crnn | ../mmocr/configs/textrecog/crnn/crnn_academic_dataset.py | Text Recognition | ../mmdeploy_checkpoints/mmocr/crnn/crnn_academic-a723a1c5.pth | IIIT5K | Pytorch | - | - | - | - | - | 80.5 | - |
+| 1 | crnn | ../mmocr/configs/textrecog/crnn/crnn_academic_dataset.py | Text Recognition | ${WORK_DIR}/mmocr/crnn/onnxruntime/static/crnn_academic-a723a1c5/end2end.onnx | x | onnxruntime | configs/mmocr/text-recognition/text-recognition_onnxruntime_dynamic.py | static | fp32 | True | - | 80.67 | True |
+| 2 | crnn | ../mmocr/configs/textrecog/crnn/crnn_academic_dataset.py | Text Recognition | ${WORK_DIR}/mmocr/crnn/onnxruntime/static/crnn_academic-a723a1c5 | x | SDK-onnxruntime | configs/mmocr/text-recognition/text-recognition_sdk_dynamic.py | static | fp32 | True | - | x | False |
+| 3 | dbnet | ../mmocr/configs/textdet/dbnet/dbnet_r18_fpnc_1200e_icdar2015.py | Text Detection | ../mmdeploy_checkpoints/mmocr/dbnet/dbnet_r18_fpnc_sbn_1200e_icdar2015_20210329-ba3ab597.pth | ICDAR2015 | Pytorch | - | - | - | - | 0.795 | - | - |
+| 4 | dbnet | ../mmocr/configs/textdet/dbnet/dbnet_r18_fpnc_1200e_icdar2015.py | Text Detection | ../mmdeploy_checkpoints/mmocr/dbnet/dbnet_r18_fpnc_sbn_1200e_icdar2015_20210329-ba3ab597.pth | ICDAR | onnxruntime | configs/mmocr/text-detection/text-detection_onnxruntime_dynamic.py | dynamic | fp32 | True | - | - | True |
+| 5 | dbnet | ../mmocr/configs/textdet/dbnet/dbnet_r18_fpnc_1200e_icdar2015.py | Text Detection | ${WORK_DIR}/mmocr/dbnet/tensorrt/dynamic/dbnet_r18_fpnc_sbn_1200e_icdar2015_20210329-ba3ab597/end2end.engine | ICDAR | tensorrt | configs/mmocr/text-detection/text-detection_tensorrt-fp16_dynamic-320x320-2240x2240.py | dynamic | fp16 | True | 0.793302 | - | True |
+| 6 | dbnet | ../mmocr/configs/textdet/dbnet/dbnet_r18_fpnc_1200e_icdar2015.py | Text Detection | ${WORK_DIR}/mmocr/dbnet/tensorrt/dynamic/dbnet_r18_fpnc_sbn_1200e_icdar2015_20210329-ba3ab597 | ICDAR | SDK-tensorrt | configs/mmocr/text-detection/text-detection_sdk_dynamic.py | dynamic | fp16 | True | 0.795073 | - | True |
+
+## 5. Supported Backends
+
+- [x] ONNX Runtime
+- [x] TensorRT
+- [x] PPLNN
+- [x] ncnn
+- [x] OpenVINO
+- [x] TorchScript
+- [x] SNPE
+- [x] MMDeploy SDK
+
+## 6. Supported Codebase and Metrics
+
+| Codebase | Metric | Support |
+| -------- | -------- | ------------------ |
+| mmdet | bbox | :heavy_check_mark: |
+| | segm | :heavy_check_mark: |
+| | PQ | :x: |
+| mmcls | accuracy | :heavy_check_mark: |
+| mmseg | mIoU | :heavy_check_mark: |
+| mmpose | AR | :heavy_check_mark: |
+| | AP | :heavy_check_mark: |
+| mmocr | hmean | :heavy_check_mark: |
+| | acc | :heavy_check_mark: |
+| mmedit | PSNR | :heavy_check_mark: |
+| | SSIM | :heavy_check_mark: |
diff --git a/docs/en/06-developer-guide/support_new_backend.md b/docs/en/07-developer-guide/support_new_backend.md
similarity index 100%
rename from docs/en/06-developer-guide/support_new_backend.md
rename to docs/en/07-developer-guide/support_new_backend.md
diff --git a/docs/en/06-developer-guide/support_new_model.md b/docs/en/07-developer-guide/support_new_model.md
similarity index 100%
rename from docs/en/06-developer-guide/support_new_model.md
rename to docs/en/07-developer-guide/support_new_model.md
diff --git a/docs/en/06-developer-guide/test_rewritten_models.md b/docs/en/07-developer-guide/test_rewritten_models.md
similarity index 100%
rename from docs/en/06-developer-guide/test_rewritten_models.md
rename to docs/en/07-developer-guide/test_rewritten_models.md
diff --git a/docs/en/_static/image/quant_model.png b/docs/en/_static/image/quant_model.png
new file mode 100644
index 0000000000..ef4f8f5216
Binary files /dev/null and b/docs/en/_static/image/quant_model.png differ
diff --git a/docs/en/appendix/cross_build_snpe_service.md b/docs/en/appendix/cross_build_snpe_service.md
index f5aba17d87..042e1c5be8 100644
--- a/docs/en/appendix/cross_build_snpe_service.md
+++ b/docs/en/appendix/cross_build_snpe_service.md
@@ -73,7 +73,7 @@ $ tree -L 1
└── share
```
-## 3. \[Skipable\] Self-test whether NDK gRPC is available
+## 3. (Skipable) Self-test whether NDK gRPC is available
1. Compile the helloworld that comes with gRPC
diff --git a/docs/en/get_started.md b/docs/en/get_started.md
index e5a004c5c9..bb41daad32 100644
--- a/docs/en/get_started.md
+++ b/docs/en/get_started.md
@@ -109,7 +109,7 @@ The supported platform and device matrix is presented as following:
-**Note: if MMDeploy prebuilt package doesn't meet your target platforms or devices, please [build MMDeploy from source](./01-how-to-build/build_from_source.md)**
+**Note: if MMDeploy prebuilt package doesn't meet your target platforms or devices, please [build MMDeploy from source](01-how-to-build/build_from_source.md)**
Take the latest precompiled package as example, you can install it as follows:
@@ -162,7 +162,7 @@ export LD_LIBRARY_PATH=$CUDNN_DIR/lib64:$LD_LIBRARY_PATH
Windows-x86_64
-Please learn its prebuilt package from [this](./02-how-to-run/prebuilt_package_windows.md) guide.
+Please learn its prebuilt package from [this](02-how-to-run/prebuilt_package_windows.md) guide.
## Convert Model
@@ -197,7 +197,7 @@ python mmdeploy/tools/deploy.py \
The converted model and its meta info will be found in the path specified by `--work-dir`.
And they make up of MMDeploy Model that can be fed to MMDeploy SDK to do model inference.
-For more details about model conversion, you can read [how_to_convert_model](./02-how-to-run/convert_model.md). If you want to customize the conversion pipeline, you can edit the config file by following [this](./02-how-to-run/write_config.md) tutorial.
+For more details about model conversion, you can read [how_to_convert_model](02-how-to-run/convert_model.md). If you want to customize the conversion pipeline, you can edit the config file by following [this](02-how-to-run/write_config.md) tutorial.
```{tip}
If MMDeploy-ONNXRuntime prebuild package is installed, you can convert the above model to onnx model and perform ONNX Runtime inference
@@ -343,4 +343,4 @@ python ${MMDEPLOY_DIR}/tools/test.py \
Regarding the --model option, it represents the converted engine files path when using Model Converter to do performance test. But when you try to test the metrics by Inference SDK, this option refers to the directory path of MMDeploy Model.
```
-You can read [how to evaluate a model](02-how-to-run/how_to_evaluate_a_model.md) for more details.
+You can read [how to evaluate a model](02-how-to-run/profile_model.md) for more details.
diff --git a/docs/en/index.rst b/docs/en/index.rst
index 0bc36a6425..297117a05a 100644
--- a/docs/en/index.rst
+++ b/docs/en/index.rst
@@ -22,8 +22,9 @@ You can switch between Chinese and English documents in the lower-left corner of
02-how-to-run/convert_model.md
02-how-to-run/write_config.md
- 02-how-to-run/how_to_evaluate_a_model.md
- 02-how-to-run/how_to_measure_performance_of_models.md
+ 02-how-to-run/profile_model.md
+ 02-how-to-run/quantize_model.md
+ 02-how-to-run/useful_tools.md
.. toctree::
:maxdepth: 1
@@ -32,6 +33,7 @@ You can switch between Chinese and English documents in the lower-left corner of
03-benchmark/supported_models.md
03-benchmark/benchmark.md
03-benchmark/benchmark_edge.md
+ 03-benchmark/quantization.md
.. toctree::
:maxdepth: 1
@@ -50,34 +52,38 @@ You can switch between Chinese and English documents in the lower-left corner of
:maxdepth: 1
:caption: Backend Support
+ 05-supported-backends/ncnn.md
05-supported-backends/onnxruntime.md
- 05-supported-backends/tensorrt.md
05-supported-backends/openvino.md
- 05-supported-backends/ncnn.md
05-supported-backends/pplnn.md
+ 05-supported-backends/snpe.md
+ 05-supported-backends/tensorrt.md
05-supported-backends/torchscript.md
.. toctree::
:maxdepth: 1
:caption: Custom Ops
- ops/onnxruntime.md
- ops/tensorrt.md
- ops/ncnn.md
+ 06-custom-ops/onnxruntime.md
+ 06-custom-ops/tensorrt.md
+ 06-custom-ops/ncnn.md
.. toctree::
:maxdepth: 1
:caption: Developer Guide
- 06-developer-guide/support_new_model.md
- 06-developer-guide/support_new_backend.md
- 06-developer-guide/add_test_units_for_backend_ops.md
- 06-developer-guide/test_rewritten_models.md
- 06-developer-guide/partition_model.md
+ 07-developer-guide/support_new_model.md
+ 07-developer-guide/support_new_backend.md
+ 07-developer-guide/add_backend_ops_unittest.md
+ 07-developer-guide/test_rewritten_models.md
+ 07-developer-guide/partition_model.md
+ 07-developer-guide/regression_test.md
.. toctree::
:maxdepth: 1
- :caption: Tutorials on Model Deployment
+ :caption: Experimental feature
+
+ experimental/onnx_optimizer.md
.. toctree::
:maxdepth: 1
diff --git a/docs/zh_cn/01-how-to-build/android.md b/docs/zh_cn/01-how-to-build/android.md
index 40828a9b77..2056ead7f0 100644
--- a/docs/zh_cn/01-how-to-build/android.md
+++ b/docs/zh_cn/01-how-to-build/android.md
@@ -17,7 +17,7 @@ MMDeploy converter 部分在 linux 平台上执行,SDK 部分在 android 平台
MMDeploy 的交叉编译分为两步:
-1. 在 linux 平台上构建 MMDeploy converter. 请根据 [How to build linux](./linux-x86_64.md) 进行构建.
+1. 在 linux 平台上构建 MMDeploy converter. 请根据 [How to build linux](linux-x86_64.md) 进行构建.
2. 使用 android 工具链构建 MMDeploy SDK.
diff --git a/docs/zh_cn/01-how-to-build/build_from_docker.md b/docs/zh_cn/01-how-to-build/build_from_docker.md
index 0825262909..7cc44072f7 100644
--- a/docs/zh_cn/01-how-to-build/build_from_docker.md
+++ b/docs/zh_cn/01-how-to-build/build_from_docker.md
@@ -51,7 +51,7 @@ docker run --gpus all -it mmdeploy:master-gpu
如 [这里](https://forums.developer.nvidia.com/t/cuda-error-the-provided-ptx-was-compiled-with-an-unsupported-toolchain/185754)所说,更新 GPU 的驱动到您的GPU能使用的最新版本。
-2. docker: Error response from daemon: could not select device driver "" with capabilities: \[\[gpu\]\].
+2. docker: Error response from daemon: could not select device driver "" with capabilities: \[gpu\].
```
# Add the package repositories
diff --git a/docs/zh_cn/01-how-to-build/build_from_source.md b/docs/zh_cn/01-how-to-build/build_from_source.md
index 66b6907d03..d43d80a316 100644
--- a/docs/zh_cn/01-how-to-build/build_from_source.md
+++ b/docs/zh_cn/01-how-to-build/build_from_source.md
@@ -1,6 +1,6 @@
# 源码安装
-如果环境允许(网络良好且宿主机强劲),我们建议使用[docker 方式](build_from_docker.md)。
+如果环境允许(网络良好且宿主机强劲),我们建议使用 [docker 方式](build_from_docker.md) 。
## 下载
diff --git a/docs/zh_cn/01-how-to-build/jetsons.md b/docs/zh_cn/01-how-to-build/jetsons.md
index c952da8980..accd986fbb 100644
--- a/docs/zh_cn/01-how-to-build/jetsons.md
+++ b/docs/zh_cn/01-how-to-build/jetsons.md
@@ -207,7 +207,7 @@ export MMDEPLOY_DIR=$(pwd)
由于一些算子采用的是 OpenMMLab 代码库中的实现,并不被 TenorRT 支持,
因此我们需要自定义 TensorRT 插件,例如 `roi_align`, `scatternd` 等。
-你可以从[这里](../../en/ops/tensorrt.md)找到完整的自定义插件列表。
+你可以从[这里](../06-custom-ops/tensorrt.md)找到完整的自定义插件列表。
```shell
# 编译 TensorRT 自定义算子
diff --git a/docs/zh_cn/01-how-to-build/windows.md b/docs/zh_cn/01-how-to-build/windows.md
index 0257a2fd7a..2d9d4b4def 100644
--- a/docs/zh_cn/01-how-to-build/windows.md
+++ b/docs/zh_cn/01-how-to-build/windows.md
@@ -266,7 +266,7 @@ $env:MMDEPLOY_DIR="$pwd"
##### 编译自定义算子
-如果您选择了ONNXRuntime,TensorRT 和 ncnn 任一种推理后端,您需要编译对应的自定义算子库。
+如果您选择了 ONNXRuntime,TensorRT 和 ncnn 任一种推理后端,您需要编译对应的自定义算子库。
- **ONNXRuntime** 自定义算子
diff --git a/docs/zh_cn/02-how-to-run/convert_model.md b/docs/zh_cn/02-how-to-run/convert_model.md
index 8ca64ce7be..54e17fc51a 100644
--- a/docs/zh_cn/02-how-to-run/convert_model.md
+++ b/docs/zh_cn/02-how-to-run/convert_model.md
@@ -82,7 +82,7 @@ python ./tools/deploy.py \
## 如何评测模型
-您可以尝试去评测转换出来的模型 ,参考 [profile 模型](./profile_model.md)。
+您可以尝试去评测转换出来的模型 ,参考 [profile 模型](profile_model.md)。
## 各后端已支持导出的模型列表
diff --git a/docs/zh_cn/02-how-to-run/quantize_model.md b/docs/zh_cn/02-how-to-run/quantize_model.md
index adea0236dd..144cc3f043 100644
--- a/docs/zh_cn/02-how-to-run/quantize_model.md
+++ b/docs/zh_cn/02-how-to-run/quantize_model.md
@@ -14,14 +14,9 @@
以 ncnn backend 为例,完整的工作流如下:
-```{mermaid}
-flowchart TD;
- torch模型-->非标准onnx;
- 非标准onnx-->ncnn-fp32;
- 非标准onnx-->量化表;
- 量化表-->ncnn-int8;
- ncnn-fp32-->ncnn-int8;
-```
+
+
+
mmdeploy 基于静态图(onnx)生成推理框架所需的量化表,再用后端工具把浮点模型转为定点。
@@ -68,4 +63,4 @@ python3 tools/deploy.py configs/mmcls/classification_ncnn-int8_static.py ${MOD
| ---- | ------ | ------ | -------- | ------ |
| 用法 | QAT | PTQ | 测试精度 | PTQ |
-**强烈建议**量化结束后,[按此文档](./profile_model.md)验证模型精度。[这里](../03-benchmark/quantization.md)是一些量化模型测试结果。
+**强烈建议**量化结束后,[按此文档](profile_model.md) 验证模型精度。[这里](../03-benchmark/quantization.md) 是一些量化模型测试结果。
diff --git a/docs/zh_cn/02-how-to-run/useful_tools.md b/docs/zh_cn/02-how-to-run/useful_tools.md
new file mode 100644
index 0000000000..a21841456b
--- /dev/null
+++ b/docs/zh_cn/02-how-to-run/useful_tools.md
@@ -0,0 +1,199 @@
+# 更多工具介绍
+
+除 `deploy.py` 以外, tools 目录下有很多实用工具
+
+## torch2onnx
+
+把 OpenMMLab 模型转 onnx 格式。
+
+### 用法
+
+```bash
+python tools/torch2onnx.py \
+ ${DEPLOY_CFG} \
+ ${MODEL_CFG} \
+ ${CHECKPOINT} \
+ ${INPUT_IMG} \
+ --work-dir ${WORK_DIR} \
+ --device cpu \
+ --log-level INFO
+```
+
+### 参数说明
+
+- `deploy_cfg` : The path of the deploy config file in MMDeploy codebase.
+- `model_cfg` : The path of model config file in OpenMMLab codebase.
+- `checkpoint` : The path of the model checkpoint file.
+- `img` : The path of the image file used to convert the model.
+- `--work-dir` : Directory to save output ONNX models Default is `./work-dir`.
+- `--device` : The device used for conversion. If not specified, it will be set to `cpu`.
+- `--log-level` : To set log level which in `'CRITICAL', 'FATAL', 'ERROR', 'WARN', 'WARNING', 'INFO', 'DEBUG', 'NOTSET'`. If not specified, it will be set to `INFO`.
+
+## extract
+
+有 `Mark` 节点的 onnx 模型会被分成多个子图,这个工具用来提取 onnx 模型中的子图。
+
+### 用法
+
+```bash
+python tools/extract.py \
+ ${INPUT_MODEL} \
+ ${OUTPUT_MODEL} \
+ --start ${PARITION_START} \
+ --end ${PARITION_END} \
+ --log-level INFO
+```
+
+### 参数说明
+
+- `input_model` : The path of input ONNX model. The output ONNX model will be extracted from this model.
+- `output_model` : The path of output ONNX model.
+- `--start` : The start point of extracted model with format `:`. The `function_name` comes from the decorator `@mark`.
+- `--end` : The end point of extracted model with format `:`. The `function_name` comes from the decorator `@mark`.
+- `--log-level` : To set log level which in `'CRITICAL', 'FATAL', 'ERROR', 'WARN', 'WARNING', 'INFO', 'DEBUG', 'NOTSET'`. If not specified, it will be set to `INFO`.
+
+### 注意事项
+
+要支持模型分块,必须在 onnx 模型中添加 mark 节点,用`@mark` 修饰。
+下面这个例子里 mark 了 `multiclass_nms`,在 NMS 前设置 `end=multiclass_nms:input` 提取子图。
+
+```python
+@mark('multiclass_nms', inputs=['boxes', 'scores'], outputs=['dets', 'labels'])
+def multiclass_nms(*args, **kwargs):
+ """Wrapper function for `_multiclass_nms`."""
+```
+
+## onnx2pplnn
+
+这个工具可以把 onnx 模型转成 pplnn 格式。
+
+### 用法
+
+```bash
+python tools/onnx2pplnn.py \
+ ${ONNX_PATH} \
+ ${OUTPUT_PATH} \
+ --device cuda:0 \
+ --opt-shapes [224,224] \
+ --log-level INFO
+```
+
+### 参数说明
+
+- `onnx_path`: The path of the `ONNX` model to convert.
+- `output_path`: The converted `PPLNN` algorithm path in json format.
+- `device`: The device of the model during conversion.
+- `opt-shapes`: Optimal shapes for PPLNN optimization. The shape of each tensor should be wrap with "\[\]" or "()" and the shapes of tensors should be separated by ",".
+- `--log-level`: To set log level which in `'CRITICAL', 'FATAL', 'ERROR', 'WARN', 'WARNING', 'INFO', 'DEBUG', 'NOTSET'`. If not specified, it will be set to `INFO`.
+
+## onnx2tensorrt
+
+这个工具把 onnx 转成 trt .engine 格式。
+
+### 用法
+
+```bash
+python tools/onnx2tensorrt.py \
+ ${DEPLOY_CFG} \
+ ${ONNX_PATH} \
+ ${OUTPUT} \
+ --device-id 0 \
+ --log-level INFO \
+ --calib-file /path/to/file
+```
+
+### 参数说明
+
+- `deploy_cfg` : The path of the deploy config file in MMDeploy codebase.
+- `onnx_path` : The ONNX model path to convert.
+- `output` : The path of output TensorRT engine.
+- `--device-id` : The device index, default to `0`.
+- `--calib-file` : The calibration data used to calibrate engine to int8.
+- `--log-level` : To set log level which in `'CRITICAL', 'FATAL', 'ERROR', 'WARN', 'WARNING', 'INFO', 'DEBUG', 'NOTSET'`. If not specified, it will be set to `INFO`.
+
+## onnx2ncnn
+
+onnx 转 ncnn
+
+### 用法
+
+```bash
+python tools/onnx2ncnn.py \
+ ${ONNX_PATH} \
+ ${NCNN_PARAM} \
+ ${NCNN_BIN} \
+ --log-level INFO
+```
+
+### 参数说明
+
+- `onnx_path` : The path of the `ONNX` model to convert from.
+- `output_param` : The converted `ncnn` param path.
+- `output_bin` : The converted `ncnn` bin path.
+- `--log-level` : To set log level which in `'CRITICAL', 'FATAL', 'ERROR', 'WARN', 'WARNING', 'INFO', 'DEBUG', 'NOTSET'`. If not specified, it will be set to `INFO`.
+
+## profile
+
+这个工具用来测试 torch 和 trt 等后端的速度,注意测试不包含前后处理。
+
+### 用法
+
+```bash
+python tools/profile.py \
+ ${DEPLOY_CFG} \
+ ${MODEL_CFG} \
+ ${IMAGE_DIR} \
+ --model ${MODEL} \
+ --device ${DEVICE} \
+ --shape ${SHAPE} \
+ --num-iter {NUM_ITER} \
+ --warmup {WARMUP}
+ --cfg-options ${CFG_OPTIONS}
+```
+
+### 参数说明
+
+- `deploy_cfg` : The path of the deploy config file in MMDeploy codebase.
+- `model_cfg` : The path of model config file in OpenMMLab codebase.
+- `image_dir` : The directory to image files that used to test the model.
+- `--model` : The path of the model to be tested.
+- `--shape` : Input shape of the model by `HxW`, e.g., `800x1344`. If not specified, it would use `input_shape` from deploy config.
+- `--num-iter` : Number of iteration to run inference. Default is `100`.
+- `--warmup` : Number of iteration to warm-up the machine. Default is `10`.
+- `--device` : The device type. If not specified, it will be set to `cuda:0`.
+- `--cfg-options` : Optional key-value pairs to be overrode for model config.
+
+### 使用举例
+
+```shell
+python tools/profile.py \
+ configs/mmcls/classification_tensorrt_dynamic-224x224-224x224.py \
+ ../mmclassification/configs/resnet/resnet18_8xb32_in1k.py \
+ ../mmdetection/demo \
+ --model work-dirs/mmcls/resnet/trt/end2end.engine \
+ --device cuda \
+ --shape 224x224 \
+ --num-iter 100 \
+ --warmup 10 \
+```
+
+输出:
+
+```text
+----- Settings:
++------------+---------+
+| batch size | 1 |
+| shape | 224x224 |
+| iterations | 100 |
+| warmup | 10 |
++------------+---------+
+----- Results:
++--------+------------+---------+
+| Stats | Latency/ms | FPS |
++--------+------------+---------+
+| Mean | 1.535 | 651.656 |
+| Median | 1.665 | 600.569 |
+| Min | 1.308 | 764.341 |
+| Max | 1.689 | 591.983 |
++--------+------------+---------+
+```
diff --git a/docs/zh_cn/03-benchmark/benchmark.md b/docs/zh_cn/03-benchmark/benchmark.md
index b97777afa0..72ffe195e5 100644
--- a/docs/zh_cn/03-benchmark/benchmark.md
+++ b/docs/zh_cn/03-benchmark/benchmark.md
@@ -1,4 +1,4 @@
-# Benchmark
+# 精度速度测试结果
## Backends
diff --git a/docs/zh_cn/04-supported-codebases/mmcls.md b/docs/zh_cn/04-supported-codebases/mmcls.md
new file mode 100644
index 0000000000..1bfa37118d
--- /dev/null
+++ b/docs/zh_cn/04-supported-codebases/mmcls.md
@@ -0,0 +1,19 @@
+# mmcls 模型支持列表
+
+[MMClassification](https://github.com/open-mmlab/mmclassification) 是基于 Python 的的图像分类工具,属于 [OpenMMLab](https://openmmlab.com)。
+
+## 安装 mmcls
+
+请参考 [install.md](https://github.com/open-mmlab/mmclassification/blob/master/docs/en/install.md) 进行安装。
+
+## 支持列表
+
+| Model | ONNX Runtime | TensorRT | ncnn | PPLNN | OpenVINO | Model config |
+| :---------------- | :----------: | :------: | :--: | :---: | :------: | :---------------------------------------------------------------------------------------------: |
+| ResNet | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/resnet) |
+| ResNeXt | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/resnext) |
+| SE-ResNet | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/seresnet) |
+| MobileNetV2 | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/mobilenet_v2) |
+| ShuffleNetV1 | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/shufflenet_v1) |
+| ShuffleNetV2 | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/shufflenet_v2) |
+| VisionTransformer | Y | Y | Y | ? | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/vision_transformer) |
diff --git a/docs/zh_cn/04-supported-codebases/mmdet.md b/docs/zh_cn/04-supported-codebases/mmdet.md
new file mode 100644
index 0000000000..cbc883e53c
--- /dev/null
+++ b/docs/zh_cn/04-supported-codebases/mmdet.md
@@ -0,0 +1,29 @@
+# mmdet 模型支持列表
+
+mmdet 是基于 pytorch 的检测工具箱,属于 [OpenMMLab](https://openmmlab.com/)。
+
+## 安装 mmdet
+
+请参照 [get_started.md](https://github.com/open-mmlab/mmdetection/blob/master/docs/en/get_started.md) 。
+
+## 支持列表
+
+| Model | Task | OnnxRuntime | TensorRT | ncnn | PPLNN | OpenVINO | Model config |
+| :----------------: | :------------------: | :---------: | :------: | :--: | :---: | :------: | :----------------------------------------------------------------------------------: |
+| ATSS | ObjectDetection | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/atss) |
+| FCOS | ObjectDetection | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos) |
+| FoveaBox | ObjectDetection | Y | N | N | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox) |
+| FSAF | ObjectDetection | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fsaf) |
+| RetinaNet | ObjectDetection | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet) |
+| SSD | ObjectDetection | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ssd) |
+| VFNet | ObjectDetection | N | N | N | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/vfnet) |
+| YOLOv3 | ObjectDetection | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolo) |
+| YOLOX | ObjectDetection | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolox) |
+| Cascade R-CNN | ObjectDetection | Y | Y | N | Y | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn) |
+| Faster R-CNN | ObjectDetection | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn) |
+| Faster R-CNN + DCN | ObjectDetection | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn) |
+| GFL | ObjectDetection | Y | Y | N | ? | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl) |
+| RepPoints | ObjectDetection | N | Y | N | ? | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/reppoints) |
+| Cascade Mask R-CNN | InstanceSegmentation | Y | N | N | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn) |
+| Mask R-CNN | InstanceSegmentation | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn) |
+| Swin Transformer | InstanceSegmentation | Y | Y | N | N | N | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/swin) |
diff --git a/docs/zh_cn/04-supported-codebases/mmdet3d.md b/docs/zh_cn/04-supported-codebases/mmdet3d.md
new file mode 100644
index 0000000000..4ced6e16af
--- /dev/null
+++ b/docs/zh_cn/04-supported-codebases/mmdet3d.md
@@ -0,0 +1,40 @@
+# mmdet3d 模型支持列表
+
+MMDetection3d是用于通用 3D 物体检测平台。属于 [OpenMMLab](https://openmmlab.com/)。
+
+## 安装 mmdet3d
+
+参照 [getting_started.md](https://github.com/open-mmlab/mmdetection3d/blob/master/docs/en/getting_started.md)。
+
+## 示例
+
+```bash
+python tools/deploy.py \
+ configs/mmdet3d/voxel-detection/voxel-detection_tensorrt_dynamic.py \
+ ${MMDET3D_DIR}/configs/pointpillars/hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class.py \
+ checkpoints/point_pillars.pth \
+ ${MMDET3D_DIR}/demo/data/kitti/kitti_000008.bin \
+ --work-dir \
+ work_dir \
+ --show \
+ --device \
+ cuda:0
+```
+
+## 支持列表
+
+| Model | Task | OnnxRuntime | TensorRT | ncnn | PPLNN | OpenVINO | Model config |
+| :----------: | :------------: | :---------: | :------: | :--: | :---: | :------: | :------------------------------------------------------------------------------------: |
+| PointPillars | VoxelDetection | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmdetection3d/blob/master/configs/pointpillars) |
+
+## 注意事项
+
+体素检测 onnx 模型不包含 model.voxelize 层和模型后处理,可用 python api 来调这些函数。
+
+示例:
+
+```python
+from mmdeploy.codebase.mmdet3d.deploy import VoxelDetectionModel
+VoxelDetectionModel.voxelize(...)
+VoxelDetectionModel.post_process(...)
+```
diff --git a/docs/zh_cn/04-supported-codebases/mmedit.md b/docs/zh_cn/04-supported-codebases/mmedit.md
new file mode 100644
index 0000000000..fedc8d13a5
--- /dev/null
+++ b/docs/zh_cn/04-supported-codebases/mmedit.md
@@ -0,0 +1,20 @@
+# mmedit 模型支持列表
+
+[mmedit](https://github.com/open-mmlab/mmediting) 是基于 PyTorch 的开源图像和视频编辑工具箱,属于 [OpenMMLab](https://openmmlab.com/)。
+
+## 安装 mmedit
+
+参照 [official installation guide](https://mmediting.readthedocs.io/en/latest/install.html#installation)。
+
+## 支持列表
+
+| Model | Task | ONNX Runtime | TensorRT | ncnn | PPLNN | OpenVINO | Model config |
+| :---------- | :--------------- | :----------: | :------: | :--: | :---: | :------: | :--------------------------------------------------------------------------------------------: |
+| SRCNN | super-resolution | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/srcnn) |
+| ESRGAN | super-resolution | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/esrgan) |
+| ESRGAN-PSNR | super-resolution | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/esrgan) |
+| SRGAN | super-resolution | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/srresnet_srgan) |
+| SRResNet | super-resolution | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/srresnet_srgan) |
+| Real-ESRGAN | super-resolution | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/real_esrgan) |
+| EDSR | super-resolution | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/edsr) |
+| RDN | super-resolution | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/rdn) |
diff --git a/docs/zh_cn/04-supported-codebases/mmocr.md b/docs/zh_cn/04-supported-codebases/mmocr.md
new file mode 100644
index 0000000000..022b313526
--- /dev/null
+++ b/docs/zh_cn/04-supported-codebases/mmocr.md
@@ -0,0 +1,163 @@
+# mmocr 模型支持列表
+
+mmocr 是一个基于 PyTorch 和 mmdetection 的开源工具箱,用于文本检测,文本识别以及相应的下游任务,例如关键信息提取,是 [OpenMMLab](https://openmmlab.com/)项目的一部分。
+
+## 安装
+
+参照 [install.md](https://mmocr.readthedocs.io/en/latest/install.html)。
+
+## 支持列表
+
+| Model | Task | TorchScript | OnnxRuntime | TensorRT | ncnn | PPLNN | OpenVINO | Model config |
+| :----- | :--------------- | :---------: | :---------: | :------: | :--: | :---: | :------: | :-----------------------------------------------------------------------------: |
+| DBNet | text-detection | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textdet/dbnet) |
+| PSENet | text-detection | Y | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textdet/psenet) |
+| PANet | text-detection | Y | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textdet/panet) |
+| CRNN | text-recognition | Y | Y | Y | Y | Y | N | [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textrecog/crnn) |
+| SAR | text-recognition | N | Y | N | N | N | N | [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textrecog/sar) |
+| SATRN | text-recognition | Y | Y | Y | N | N | N | [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textrecog/satrn) |
+
+## 注意事项
+
+请注意,ncnn、pplnn 和 OpenVINO 仅支持 DBNet 的 DBNet18 配置。
+
+对于在 ICDAR 数据集上预训 [checkpoint](https://download.openmmlab.com/mmocr/textdet/panet/panet_r18_fpem_ffm_sbn_600e_icdar2015_20210219-42dbe46a.pth) 的 PANet,如果要将模型转为具有 fp16 TensorRT,请尝试以下脚本。
+
+```python
+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import Sequence
+
+import torch
+import torch.nn.functional as F
+
+from mmdeploy.core import FUNCTION_REWRITER
+from mmdeploy.utils.constants import Backend
+
+FACTOR = 32
+ENABLE = False
+CHANNEL_THRESH = 400
+
+
+@FUNCTION_REWRITER.register_rewriter(
+ func_name='mmocr.models.textdet.necks.FPEM_FFM.forward',
+ backend=Backend.TENSORRT.value)
+def fpem_ffm__forward__trt(ctx, self, x: Sequence[torch.Tensor], *args,
+ **kwargs) -> Sequence[torch.Tensor]:
+ """Rewrite `forward` of FPEM_FFM for tensorrt backend.
+
+ Rewrite this function avoid overflow for tensorrt-fp16 with the checkpoint
+ `https://download.openmmlab.com/mmocr/textdet/panet/panet_r18_fpem_ffm
+ _sbn_600e_icdar2015_20210219-42dbe46a.pth`
+
+ Args:
+ ctx (ContextCaller): The context with additional information.
+ self: The instance of the class FPEM_FFM.
+ x (List[Tensor]): A list of feature maps of shape (N, C, H, W).
+
+ Returns:
+ outs (List[Tensor]): A list of feature maps of shape (N, C, H, W).
+ """
+ c2, c3, c4, c5 = x
+ # reduce channel
+ c2 = self.reduce_conv_c2(c2)
+ c3 = self.reduce_conv_c3(c3)
+ c4 = self.reduce_conv_c4(c4)
+
+ if ENABLE:
+ bn_w = self.reduce_conv_c5[1].weight / torch.sqrt(
+ self.reduce_conv_c5[1].running_var + self.reduce_conv_c5[1].eps)
+ bn_b = self.reduce_conv_c5[
+ 1].bias - self.reduce_conv_c5[1].running_mean * bn_w
+ bn_w = bn_w.reshape(1, -1, 1, 1).repeat(1, 1, c5.size(2), c5.size(3))
+ bn_b = bn_b.reshape(1, -1, 1, 1).repeat(1, 1, c5.size(2), c5.size(3))
+ conv_b = self.reduce_conv_c5[0].bias.reshape(1, -1, 1, 1).repeat(
+ 1, 1, c5.size(2), c5.size(3))
+ c5 = FACTOR * (self.reduce_conv_c5[:-1](c5)) - (FACTOR - 1) * (
+ bn_w * conv_b + bn_b)
+ c5 = self.reduce_conv_c5[-1](c5)
+ else:
+ c5 = self.reduce_conv_c5(c5)
+
+ # FPEM
+ for i, fpem in enumerate(self.fpems):
+ c2, c3, c4, c5 = fpem(c2, c3, c4, c5)
+ if i == 0:
+ c2_ffm = c2
+ c3_ffm = c3
+ c4_ffm = c4
+ c5_ffm = c5
+ else:
+ c2_ffm += c2
+ c3_ffm += c3
+ c4_ffm += c4
+ c5_ffm += c5
+
+ # FFM
+ c5 = F.interpolate(
+ c5_ffm,
+ c2_ffm.size()[-2:],
+ mode='bilinear',
+ align_corners=self.align_corners)
+ c4 = F.interpolate(
+ c4_ffm,
+ c2_ffm.size()[-2:],
+ mode='bilinear',
+ align_corners=self.align_corners)
+ c3 = F.interpolate(
+ c3_ffm,
+ c2_ffm.size()[-2:],
+ mode='bilinear',
+ align_corners=self.align_corners)
+ outs = [c2_ffm, c3, c4, c5]
+ return tuple(outs)
+
+
+@FUNCTION_REWRITER.register_rewriter(
+ func_name='mmdet.models.backbones.resnet.BasicBlock.forward',
+ backend=Backend.TENSORRT.value)
+def basic_block__forward__trt(ctx, self, x: torch.Tensor) -> torch.Tensor:
+ """Rewrite `forward` of BasicBlock for tensorrt backend.
+
+ Rewrite this function avoid overflow for tensorrt-fp16 with the checkpoint
+ `https://download.openmmlab.com/mmocr/textdet/panet/panet_r18_fpem_ffm
+ _sbn_600e_icdar2015_20210219-42dbe46a.pth`
+
+ Args:
+ ctx (ContextCaller): The context with additional information.
+ self: The instance of the class FPEM_FFM.
+ x (Tensor): The input tensor of shape (N, C, H, W).
+
+ Returns:
+ outs (Tensor): The output tensor of shape (N, C, H, W).
+ """
+ if self.conv1.in_channels < CHANNEL_THRESH:
+ return ctx.origin_func(self, x)
+
+ identity = x
+
+ out = self.conv1(x)
+ out = self.norm1(out)
+ out = self.relu(out)
+
+ out = self.conv2(out)
+
+ if torch.abs(self.norm2(out)).max() < 65504:
+ out = self.norm2(out)
+ out += identity
+ out = self.relu(out)
+ return out
+ else:
+ global ENABLE
+ ENABLE = True
+ # the output of the last bn layer exceeds the range of fp16
+ w1 = self.norm2.weight / torch.sqrt(self.norm2.running_var +
+ self.norm2.eps)
+ bias = self.norm2.bias - self.norm2.running_mean * w1
+ w1 = w1.reshape(1, -1, 1, 1).repeat(1, 1, out.size(2), out.size(3))
+ bias = bias.reshape(1, -1, 1, 1).repeat(1, 1, out.size(2),
+ out.size(3)) + identity
+ out = self.relu(w1 * (out / FACTOR) + bias / FACTOR)
+
+ return out
+
+```
diff --git a/docs/zh_cn/04-supported-codebases/mmpose.md b/docs/zh_cn/04-supported-codebases/mmpose.md
new file mode 100644
index 0000000000..641ca28673
--- /dev/null
+++ b/docs/zh_cn/04-supported-codebases/mmpose.md
@@ -0,0 +1,31 @@
+# mmpose 模型支持列表
+
+[mmpose](https://github.com/open-mmlab/mmpose) 是一个基于 PyTorch 的姿态估计的开源工具箱,也是 [OpenMMLab](https://openmmlab.com/) 项目的一部分。
+
+## 安装 mmpose
+
+参照 [official installation guide](https://mmpose.readthedocs.io/en/latest/install.html)。
+
+## 支持列表
+
+| Model | Task | ONNX Runtime | TensorRT | ncnn | PPLNN | OpenVINO | Model config |
+| :-------- | :------------ | :----------: | :------: | :--: | :---: | :------: | :-----------------------------------------------------------------------------------------: |
+| HRNet | PoseDetection | Y | Y | Y | N | Y | [config](https://mmpose.readthedocs.io/en/latest/papers/backbones.html#hrnet-cvpr-2019) |
+| MSPN | PoseDetection | Y | Y | Y | N | Y | [config](https://mmpose.readthedocs.io/en/latest/papers/backbones.html#mspn-arxiv-2019) |
+| LiteHRNet | PoseDetection | Y | Y | Y | N | Y | [config](https://mmpose.readthedocs.io/en/latest/papers/backbones.html#litehrnet-cvpr-2021) |
+
+### 使用方法
+
+```bash
+python tools/deploy.py \
+configs/mmpose/posedetection_tensorrt_static-256x192.py \
+$MMPOSE_DIR/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_256x192.py \
+$MMPOSE_DIR/checkpoints/hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth \
+$MMDEPLOY_DIR/demo/resources/human-pose.jpg \
+--work-dir work-dirs/mmpose/topdown/hrnet/trt \
+--device cuda
+```
+
+注意事项
+
+- mmpose 模型需要额外的输入,但我们无法直接获取它。在导出模型时,可以使用 `$MMDEPLOY_DIR/demo/resources/human-pose.jpg` 作为输入。
diff --git a/docs/zh_cn/04-supported-codebases/mmrotate.md b/docs/zh_cn/04-supported-codebases/mmrotate.md
new file mode 100644
index 0000000000..5f87f091d9
--- /dev/null
+++ b/docs/zh_cn/04-supported-codebases/mmrotate.md
@@ -0,0 +1,48 @@
+# mmrotate 模型支持列表
+
+[mmrotate](https://github.com/open-mmlab/mmrotate) 是一个基于 PyTorch 的旋转物体检测的开源工具箱,也是 [OpenMMLab](https://openmmlab.com/) 项目的一部分。
+
+## 安装 mmrotate
+
+参照 [official installation guide](https://mmrotate.readthedocs.io/en/latest/install.html)。
+
+## 支持列表
+
+| Model | Task | ONNX Runtime | TensorRT | NCNN | PPLNN | OpenVINO | Model config |
+| :--------------- | :--------------- | :----------: | :------: | :--: | :---: | :------: | :--------------------------------------------------------------------------------------------: |
+| RotatedRetinaNet | RotatedDetection | Y | Y | N | N | N | [config](https://github.com/open-mmlab/mmrotate/blob/main/configs/rotated_retinanet/README.md) |
+| Oriented RCNN | RotatedDetection | Y | Y | N | N | N | [config](https://github.com/open-mmlab/mmrotate/blob/main/configs/oriented_rcnn/README.md) |
+| Gliding Vertex | RotatedDetection | N | Y | N | N | N | [config](https://github.com/open-mmlab/mmrotate/blob/main/configs/gliding_vertex/README.md) |
+| RoI Transformer | RotatedDetection | Y | Y | N | N | N | [config](https://github.com/open-mmlab/mmrotate/blob/main/configs/roi_trans/README.md) |
+
+### 使用举例
+
+```bash
+# convert ort
+python tools/deploy.py \
+configs/mmrotate/rotated-detection_onnxruntime_dynamic.py \
+$MMROTATE_DIR/configs/rotated_retinanet/rotated_retinanet_obb_r50_fpn_1x_dota_le135.py \
+$MMROTATE_DIR/checkpoints/rotated_retinanet_obb_r50_fpn_1x_dota_le135-e4131166.pth \
+$MMROTATE_DIR/demo/demo.jpg \
+--work-dir work-dirs/mmrotate/rotated_retinanet/ort \
+--device cpu
+
+# compute metric
+python tools/test.py \
+ configs/mmrotate/rotated-detection_onnxruntime_dynamic.py \
+ $MMROTATE_DIR/configs/rotated_retinanet/rotated_retinanet_obb_r50_fpn_1x_dota_le135.py \
+ --model work-dirs/mmrotate/rotated_retinanet/ort/end2end.onnx \
+ --metrics mAP
+
+# generate submit file
+python tools/test.py \
+ configs/mmrotate/rotated-detection_onnxruntime_dynamic.py \
+ $MMROTATE_DIR/configs/rotated_retinanet/rotated_retinanet_obb_r50_fpn_1x_dota_le135.py \
+ --model work-dirs/mmrotate/rotated_retinanet/ort/end2end.onnx \
+ --format-only \
+ --metric-options submission_dir=work-dirs/mmrotate/rotated_retinanet/ort/Task1_results
+```
+
+注意:
+
+- mmrotate 模型需要额外输入,但我们无法直接获取它。在导出模型时,可以使用 `$MMROTATE_DIR/demo/demo.jpg` 作为输入。
diff --git a/docs/zh_cn/04-supported-codebases/mmseg.md b/docs/zh_cn/04-supported-codebases/mmseg.md
new file mode 100644
index 0000000000..845aabd780
--- /dev/null
+++ b/docs/zh_cn/04-supported-codebases/mmseg.md
@@ -0,0 +1,53 @@
+# mmseg 模型支持列表
+
+mmseg 是一个基于 PyTorch 的开源对象分割工具箱,也是 [OpenMMLab](https://openmmlab.com/) 项目的一部分。
+
+## 安装 mmseg
+
+参照 [get_started.md](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/get_started.md#installation)。
+
+## 支持列表
+
+| Model | OnnxRuntime | TensorRT | ncnn | PPLNN | OpenVino | Model config |
+| :--------------------------- | :---------: | :------: | :--: | :---: | :------: | :--------------------------------------------------------------------------------------: |
+| FCN | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fcn) |
+| PSPNet[\*](#static_shape) | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/pspnet) |
+| DeepLabV3 | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/deeplabv3) |
+| DeepLabV3+ | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/deeplabv3plus) |
+| Fast-SCNN[\*](#static_shape) | Y | Y | N | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fastscnn) |
+| UNet | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/unet) |
+| ANN[\*](#static_shape) | Y | Y | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/ann) |
+| APCNet | Y | Y | Y | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/apcnet) |
+| BiSeNetV1 | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/bisenetv1) |
+| BiSeNetV2 | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/bisenetv2) |
+| CGNet | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/cgnet) |
+| DMNet | Y | N | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/dmnet) |
+| DNLNet | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/dnlnet) |
+| EMANet | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/emanet) |
+| EncNet | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/encnet) |
+| ERFNet | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/erfnet) |
+| FastFCN | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fastfcn) |
+| GCNet | Y | Y | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/gcnet) |
+| ICNet[\*](#static_shape) | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/icnet) |
+| ISANet | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/isanet) |
+| NonLocal Net | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/nonlocal_net) |
+| OCRNet | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/ocrnet) |
+| PointRend | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/point_rend) |
+| Semantic FPN | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/sem_fpn) |
+| STDC | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/stdc) |
+| UPerNet[\*](#static_shape) | Y | Y | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/upernet) |
+| DANet | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/danet) |
+| Segmenter[\*](#static_shape) | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/segmenter) |
+| SegFormer[\*](#static_shape) | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/segformer) |
+| SETR | Y | N | N | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/setr) |
+| CCNet | N | N | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/ccnet) |
+| PSANet | N | N | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/psanet) |
+| DPT | N | N | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/dpt) |
+
+## 注意事项
+
+- 所有 mmseg 模型仅支持 "whole" 推理模式。
+
+- PSPNet,Fast-SCNN 仅支持静态输入,因为多数推理框架的 [nn.AdaptiveAvgPool2d](https://github.com/open-mmlab/mmsegmentation/blob/97f9670c5a4a2a3b4cfb411bcc26db16b23745f7/mmseg/models/decode_heads/psp_head.py#L38) 不支持动态输入。
+
+- 对于仅支持静态形状的模型,应使用静态形状的部署配置文件,例如 `configs/mmseg/segmentation_tensorrt_static-1024x2048.py`
diff --git a/docs/zh_cn/05-supported-backends/ncnn.md b/docs/zh_cn/05-supported-backends/ncnn.md
new file mode 100644
index 0000000000..8505761384
--- /dev/null
+++ b/docs/zh_cn/05-supported-backends/ncnn.md
@@ -0,0 +1,18 @@
+# ncnn 支持情况
+
+目前对 ncnn 特性使用情况如下:
+
+| feature | windows | linux | mac | android |
+| :----------------: | :-----: | :---: | :-: | :-----: |
+| fp32 inference | ✔️ | ✔️ | ✔️ | ✔️ |
+| int8 model convert | - | ✔️ | ✔️ | - |
+| nchw layout | ✔️ | ✔️ | ✔️ | ✔️ |
+| Vulkan support | - | ✔️ | ✔️ | ✔️ |
+
+以下特性还不能由 mmdeploy 自动开启,需要手动修改 ncnn 编译参数、或在 SDK 中调整运行参数
+
+- bf16 inference
+- nc4hw4 layout
+- profiling per layer
+- 关闭 NCNN_STRING 以减小 so 体积
+- 设置线程数和 CPU 亲和力
diff --git a/docs/zh_cn/05-supported-backends/onnxruntime.md b/docs/zh_cn/05-supported-backends/onnxruntime.md
new file mode 100644
index 0000000000..099b64f238
--- /dev/null
+++ b/docs/zh_cn/05-supported-backends/onnxruntime.md
@@ -0,0 +1,66 @@
+# onnxruntime 支持情况
+
+## Introduction of ONNX Runtime
+
+**ONNX Runtime** is a cross-platform inference and training accelerator compatible with many popular ML/DNN frameworks. Check its [github](https://github.com/microsoft/onnxruntime) for more information.
+
+## Installation
+
+*Please note that only **onnxruntime>=1.8.1** of CPU version on Linux platform is supported by now.*
+
+- Install ONNX Runtime python package
+
+```bash
+pip install onnxruntime==1.8.1
+```
+
+## Build custom ops
+
+### Prerequisite
+
+- Download `onnxruntime-linux` from ONNX Runtime [releases](https://github.com/microsoft/onnxruntime/releases/tag/v1.8.1), extract it, expose `ONNXRUNTIME_DIR` and finally add the lib path to `LD_LIBRARY_PATH` as below:
+
+```bash
+wget https://github.com/microsoft/onnxruntime/releases/download/v1.8.1/onnxruntime-linux-x64-1.8.1.tgz
+
+tar -zxvf onnxruntime-linux-x64-1.8.1.tgz
+cd onnxruntime-linux-x64-1.8.1
+export ONNXRUNTIME_DIR=$(pwd)
+export LD_LIBRARY_PATH=$ONNXRUNTIME_DIR/lib:$LD_LIBRARY_PATH
+```
+
+### Build on Linux
+
+```bash
+cd ${MMDEPLOY_DIR} # To MMDeploy root directory
+mkdir -p build && cd build
+cmake -DMMDEPLOY_TARGET_BACKENDS=ort -DONNXRUNTIME_DIR=${ONNXRUNTIME_DIR} ..
+make -j$(nproc)
+```
+
+## How to convert a model
+
+- You could follow the instructions of tutorial [How to convert model](../02-how-to-run/convert_model.md)
+
+## How to add a new custom op
+
+## Reminder
+
+- The custom operator is not included in [supported operator list](https://github.com/microsoft/onnxruntime/blob/master/docs/OperatorKernels.md) in ONNX Runtime.
+- The custom operator should be able to be exported to ONNX.
+
+#### Main procedures
+
+Take custom operator `roi_align` for example.
+
+1. Create a `roi_align` directory in ONNX Runtime source directory `${MMDEPLOY_DIR}/csrc/backend_ops/onnxruntime/`
+2. Add header and source file into `roi_align` directory `${MMDEPLOY_DIR}/csrc/backend_ops/onnxruntime/roi_align/`
+3. Add unit test into `tests/test_ops/test_ops.py`
+ Check [here](../../../tests/test_ops/test_ops.py) for examples.
+
+**Finally, welcome to send us PR of adding custom operators for ONNX Runtime in MMDeploy.** :nerd_face:
+
+## References
+
+- [How to export Pytorch model with custom op to ONNX and run it in ONNX Runtime](https://github.com/onnx/tutorials/blob/master/PyTorchCustomOperator/README.md)
+- [How to add a custom operator/kernel in ONNX Runtime](https://github.com/microsoft/onnxruntime/blob/master/docs/AddingCustomOp.md)
diff --git a/docs/zh_cn/05-supported-backends/openvino.md b/docs/zh_cn/05-supported-backends/openvino.md
new file mode 100644
index 0000000000..9eccc3cc44
--- /dev/null
+++ b/docs/zh_cn/05-supported-backends/openvino.md
@@ -0,0 +1,95 @@
+# OpenVINO 支持情况
+
+This tutorial is based on Linux systems like Ubuntu-18.04.
+
+## Installation
+
+It is recommended to create a virtual environment for the project.
+
+1. Install [OpenVINO](https://docs.openvino.ai/2021.4/get_started.html). It is recommended to use the installer or install using pip.
+ Installation example using [pip](https://pypi.org/project/openvino-dev/):
+
+```bash
+pip install openvino-dev
+```
+
+2. \*`Optional` If you want to use OpenVINO in SDK, you need install OpenVINO with [install_guides](https://docs.openvino.ai/2021.4/openvino_docs_install_guides_installing_openvino_linux.html#install-openvino).
+
+3. Install MMDeploy following the [instructions](../01-how-to-build/build_from_source.md).
+
+To work with models from [MMDetection](https://github.com/open-mmlab/mmdetection/blob/master/docs/get_started.md), you may need to install it additionally.
+
+## Usage
+
+Example:
+
+```bash
+python tools/deploy.py \
+ configs/mmdet/detection/detection_openvino_static-300x300.py \
+ /mmdetection_dir/mmdetection/configs/ssd/ssd300_coco.py \
+ /tmp/snapshots/ssd300_coco_20210803_015428-d231a06e.pth \
+ tests/data/tiger.jpeg \
+ --work-dir ../deploy_result \
+ --device cpu \
+ --log-level INFO
+```
+
+## List of supported models exportable to OpenVINO from MMDetection
+
+The table below lists the models that are guaranteed to be exportable to OpenVINO from MMDetection.
+
+| Model name | Config | Dynamic Shape |
+| :----------------: | :-----------------------------------------------------------------------: | :-----------: |
+| ATSS | `configs/atss/atss_r50_fpn_1x_coco.py` | Y |
+| Cascade Mask R-CNN | `configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py` | Y |
+| Cascade R-CNN | `configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py` | Y |
+| Faster R-CNN | `configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py` | Y |
+| FCOS | `configs/fcos/fcos_x101_64x4d_fpn_gn-head_mstrain_640-800_4x2_2x_coco.py` | Y |
+| FoveaBox | `configs/foveabox/fovea_r50_fpn_4x4_1x_coco.py ` | Y |
+| FSAF | `configs/fsaf/fsaf_r50_fpn_1x_coco.py` | Y |
+| Mask R-CNN | `configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py` | Y |
+| RetinaNet | `configs/retinanet/retinanet_r50_fpn_1x_coco.py` | Y |
+| SSD | `configs/ssd/ssd300_coco.py` | Y |
+| YOLOv3 | `configs/yolo/yolov3_d53_mstrain-608_273e_coco.py` | Y |
+| YOLOX | `configs/yolox/yolox_tiny_8x8_300e_coco.py` | Y |
+| Faster R-CNN + DCN | `configs/dcn/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py` | Y |
+| VFNet | `configs/vfnet/vfnet_r50_fpn_1x_coco.py` | Y |
+
+Notes:
+
+- Custom operations from OpenVINO use the domain `org.openvinotoolkit`.
+- For faster work in OpenVINO in the Faster-RCNN, Mask-RCNN, Cascade-RCNN, Cascade-Mask-RCNN models
+ the RoiAlign operation is replaced with the [ExperimentalDetectronROIFeatureExtractor](https://docs.openvinotoolkit.org/latest/openvino_docs_ops_detection_ExperimentalDetectronROIFeatureExtractor_6.html) operation in the ONNX graph.
+- Models "VFNet" and "Faster R-CNN + DCN" use the custom "DeformableConv2D" operation.
+
+## Deployment config
+
+With the deployment config, you can specify additional options for the Model Optimizer.
+To do this, add the necessary parameters to the `backend_config.mo_options` in the fields `args` (for parameters with values) and `flags` (for flags).
+
+Example:
+
+```python
+backend_config = dict(
+ mo_options=dict(
+ args=dict({
+ '--mean_values': [0, 0, 0],
+ '--scale_values': [255, 255, 255],
+ '--data_type': 'FP32',
+ }),
+ flags=['--disable_fusing'],
+ )
+)
+```
+
+Information about the possible parameters for the Model Optimizer can be found in the [documentation](https://docs.openvino.ai/latest/openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model.html).
+
+## Troubleshooting
+
+- ImportError: libpython3.7m.so.1.0: cannot open shared object file: No such file or directory
+
+ To resolve missing external dependency on Ubuntu\*, execute the following command:
+
+ ```bash
+ sudo apt-get install libpython3.7
+ ```
diff --git a/docs/zh_cn/05-supported-backends/pplnn.md b/docs/zh_cn/05-supported-backends/pplnn.md
new file mode 100644
index 0000000000..2dcca8e05f
--- /dev/null
+++ b/docs/zh_cn/05-supported-backends/pplnn.md
@@ -0,0 +1,24 @@
+# PPLNN 支持情况
+
+MMDeploy supports ppl.nn v0.8.1 and later. This tutorial is based on Linux systems like Ubuntu-18.04.
+
+## Installation
+
+1. Please install [pyppl](https://github.com/openppl-public/ppl.nn) following [install-guide](https://github.com/openppl-public/ppl.nn/blob/master/docs/en/building-from-source.md).
+
+2. Install MMDeploy following the [instructions](../01-how-to-build/build_from_source.md).
+
+## Usage
+
+Example:
+
+```bash
+python tools/deploy.py \
+ configs/mmdet/detection/detection_pplnn_dynamic-800x1344.py \
+ /mmdetection_dir/mmdetection/configs/retinanet/retinanet_r50_fpn_1x_coco.py \
+ /tmp/snapshots/retinanet_r50_fpn_1x_coco_20200130-c2398f9e.pth \
+ tests/data/tiger.jpeg \
+ --work-dir ../deploy_result \
+ --device cuda \
+ --log-level INFO
+```
diff --git a/docs/zh_cn/05-supported-backends/snpe.md b/docs/zh_cn/05-supported-backends/snpe.md
new file mode 100644
index 0000000000..07f0055489
--- /dev/null
+++ b/docs/zh_cn/05-supported-backends/snpe.md
@@ -0,0 +1,8 @@
+# SNPE 支持情况
+
+目前 mmdeploy 集成了 onnx2dlc 模型转换的 SDK 推理,但以下特性还不支持:
+
+- GPU_FP16 模式
+- DSP/AIP 量化
+- 算子内部 profiling
+- UDO 算子
diff --git a/docs/zh_cn/05-supported-backends/tensorrt.md b/docs/zh_cn/05-supported-backends/tensorrt.md
new file mode 100644
index 0000000000..6d4202e613
--- /dev/null
+++ b/docs/zh_cn/05-supported-backends/tensorrt.md
@@ -0,0 +1,139 @@
+# TensorRT 支持情况
+
+## Installation
+
+### Install TensorRT
+
+Please install TensorRT 8 follow [install-guide](https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html#installing).
+
+**Note**:
+
+- `pip Wheel File Installation` is not supported yet in this repo.
+
+- We strongly suggest you install TensorRT through [tar file](https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html#installing-tar)
+
+- After installation, you'd better add TensorRT environment variables to bashrc by:
+
+ ```bash
+ cd ${TENSORRT_DIR} # To TensorRT root directory
+ echo '# set env for TensorRT' >> ~/.bashrc
+ echo "export TENSORRT_DIR=${TENSORRT_DIR}" >> ~/.bashrc
+ echo 'export LD_LIBRARY_PATH=$TENSORRT_DIR/lib:$TENSORRT_DIR' >> ~/.bashrc
+ source ~/.bashrc
+ ```
+
+### Build custom ops
+
+Some custom ops are created to support models in OpenMMLab, and the custom ops can be built as follow:
+
+```bash
+cd ${MMDEPLOY_DIR} # To MMDeploy root directory
+mkdir -p build && cd build
+cmake -DMMDEPLOY_TARGET_BACKENDS=trt ..
+make -j$(nproc)
+```
+
+If you haven't installed TensorRT in the default path, Please add `-DTENSORRT_DIR` flag in CMake.
+
+```bash
+ cmake -DMMDEPLOY_TARGET_BACKENDS=trt -DTENSORRT_DIR=${TENSORRT_DIR} ..
+ make -j$(nproc)
+```
+
+## Convert model
+
+Please follow the tutorial in [How to convert model](../02-how-to-run/convert_model.md). **Note** that the device must be `cuda` device.
+
+### Int8 Support
+
+Since TensorRT supports INT8 mode, a custom dataset config can be given to calibrate the model. Following is an example for MMDetection:
+
+```python
+# calibration_dataset.py
+
+# dataset settings, same format as the codebase in OpenMMLab
+dataset_type = 'CalibrationDataset'
+data_root = 'calibration/dataset/root'
+img_norm_cfg = dict(
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+test_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(1333, 800),
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size_divisor=32),
+ dict(type='ImageToTensor', keys=['img']),
+ dict(type='Collect', keys=['img']),
+ ])
+]
+data = dict(
+ samples_per_gpu=2,
+ workers_per_gpu=2,
+ val=dict(
+ type=dataset_type,
+ ann_file=data_root + 'val_annotations.json',
+ pipeline=test_pipeline),
+ test=dict(
+ type=dataset_type,
+ ann_file=data_root + 'test_annotations.json',
+ pipeline=test_pipeline))
+evaluation = dict(interval=1, metric='bbox')
+```
+
+Convert your model with this calibration dataset:
+
+```python
+python tools/deploy.py \
+ ...
+ --calib-dataset-cfg calibration_dataset.py
+```
+
+If the calibration dataset is not given, the data will be calibrated with the dataset in model config.
+
+## FAQs
+
+- Error `Cannot found TensorRT headers` or `Cannot found TensorRT libs`
+
+ Try cmake with flag `-DTENSORRT_DIR`:
+
+ ```bash
+ cmake -DBUILD_TENSORRT_OPS=ON -DTENSORRT_DIR=${TENSORRT_DIR} ..
+ make -j$(nproc)
+ ```
+
+ Please make sure there are libs and headers in `${TENSORRT_DIR}`.
+
+- Error `error: parameter check failed at: engine.cpp::setBindingDimensions::1046, condition: profileMinDims.d[i] <= dimensions.d[i]`
+
+ There is an input shape limit in deployment config:
+
+ ```python
+ backend_config = dict(
+ # other configs
+ model_inputs=[
+ dict(
+ input_shapes=dict(
+ input=dict(
+ min_shape=[1, 3, 320, 320],
+ opt_shape=[1, 3, 800, 1344],
+ max_shape=[1, 3, 1344, 1344])))
+ ])
+ # other configs
+ ```
+
+ The shape of the tensor `input` must be limited between `input_shapes["input"]["min_shape"]` and `input_shapes["input"]["max_shape"]`.
+
+- Error `error: [TensorRT] INTERNAL ERROR: Assertion failed: cublasStatus == CUBLAS_STATUS_SUCCESS`
+
+ TRT 7.2.1 switches to use cuBLASLt (previously it was cuBLAS). cuBLASLt is the default choice for SM version >= 7.0. However, you may need CUDA-10.2 Patch 1 (Released Aug 26, 2020) to resolve some cuBLASLt issues. Another option is to use the new TacticSource API and disable cuBLASLt tactics if you don't want to upgrade.
+
+ Read [this](https://forums.developer.nvidia.com/t/matrixmultiply-failed-on-tensorrt-7-2-1/158187/4) for detail.
+
+- Install mmdeploy on Jetson
+
+ We provide a tutorial to get start on Jetsons [here](../01-how-to-build/jetsons.md).
diff --git a/docs/zh_cn/05-supported-backends/torchscript.md b/docs/zh_cn/05-supported-backends/torchscript.md
new file mode 100644
index 0000000000..a86f048d9b
--- /dev/null
+++ b/docs/zh_cn/05-supported-backends/torchscript.md
@@ -0,0 +1,54 @@
+# TorchScript 支持情况
+
+## Introduction of TorchScript
+
+**TorchScript** a way to create serializable and optimizable models from PyTorch code. Any TorchScript program can be saved from a Python process and loaded in a process where there is no Python dependency. Check the [Introduction to TorchScript](https://pytorch.org/tutorials/beginner/Intro_to_TorchScript_tutorial.html) for more details.
+
+## Build custom ops
+
+### Prerequisite
+
+- Download libtorch from the official website [here](https://pytorch.org/get-started/locally/).
+
+*Please note that only **Pre-cxx11 ABI** and **version 1.8.1+** on Linux platform are supported by now.*
+
+For previous versions of libtorch, users can find through the [issue comment](https://github.com/pytorch/pytorch/issues/40961#issuecomment-1017317786). Libtorch1.8.1+cu111 as an example, extract it, expose `Torch_DIR` and add the lib path to `LD_LIBRARY_PATH` as below:
+
+```bash
+wget https://download.pytorch.org/libtorch/cu111/libtorch-shared-with-deps-1.8.1%2Bcu111.zip
+
+unzip libtorch-shared-with-deps-1.8.1+cu111.zip
+cd libtorch
+export Torch_DIR=$(pwd)
+export LD_LIBRARY_PATH=$Torch_DIR/lib:$LD_LIBRARY_PATH
+```
+
+Note:
+
+- If you want to save libtorch env variables to bashrc, you could run
+
+ ```bash
+ echo '# set env for libtorch' >> ~/.bashrc
+ echo "export Torch_DIR=${Torch_DIR}" >> ~/.bashrc
+ echo 'export LD_LIBRARY_PATH=$Torch_DIR/lib:$LD_LIBRARY_PATH' >> ~/.bashrc
+ source ~/.bashrc
+ ```
+
+### Build on Linux
+
+```bash
+cd ${MMDEPLOY_DIR} # To MMDeploy root directory
+mkdir -p build && cd build
+cmake -DMMDEPLOY_TARGET_BACKENDS=torchscript -DTorch_DIR=${Torch_DIR} ..
+make -j$(nproc)
+```
+
+## How to convert a model
+
+- You could follow the instructions of tutorial [How to convert model](../02-how-to-run/convert_model.md)
+
+## FAQs
+
+- Error: `projects/thirdparty/libtorch/share/cmake/Caffe2/Caffe2Config.cmake:96 (message):Your installed Caffe2 version uses cuDNN but I cannot find the cuDNN libraries. Please set the proper cuDNN prefixes and / or install cuDNN.`
+
+ May export CUDNN_ROOT=/root/path/to/cudnn to resolve the build error.
diff --git a/docs/zh_cn/06-custom-ops/ncnn.md b/docs/zh_cn/06-custom-ops/ncnn.md
new file mode 100644
index 0000000000..df0f22b33c
--- /dev/null
+++ b/docs/zh_cn/06-custom-ops/ncnn.md
@@ -0,0 +1,158 @@
+## ncnn 自定义算子
+
+
+
+- [ncnn Ops](#ncnn-ops)
+ - [Expand](#expand)
+ - [Description](#description)
+ - [Parameters](#parameters)
+ - [Inputs](#inputs)
+ - [Outputs](#outputs)
+ - [Type Constraints](#type-constraints)
+ - [Gather](#gather)
+ - [Description](#description)
+ - [Parameters](#parameters)
+ - [Inputs](#inputs)
+ - [Outputs](#outputs)
+ - [Type Constraints](#type-constraints)
+ - [Shape](#shape)
+ - [Description](#description)
+ - [Parameters](#parameters)
+ - [Inputs](#inputs)
+ - [Outputs](#outputs)
+ - [Type Constraints](#type-constraints)
+ - [TopK](#topk)
+ - [Description](#description)
+ - [Parameters](#parameters)
+ - [Inputs](#inputs)
+ - [Outputs](#outputs)
+ - [Type Constraints](#type-constraints)
+
+
+
+### Expand
+
+#### Description
+
+Broadcast the input blob following the given shape and the broadcast rule of ncnn.
+
+#### Parameters
+
+Expand has no parameters.
+
+#### Inputs
+
+
+
inputs[0]: ncnn.Mat
+
bottom_blobs[0]; An ncnn.Mat of input data.
+
inputs[1]: ncnn.Mat
+
bottom_blobs[1]; An 1-dim ncnn.Mat. A valid shape of ncnn.Mat.
+
+
+#### Outputs
+
+
+
outputs[0]: T
+
top_blob; The blob of ncnn.Mat which expanded by given shape and broadcast rule of ncnn.
+
+
+#### Type Constraints
+
+- ncnn.Mat: Mat(float32)
+
+### Gather
+
+#### Description
+
+Given the data and indice blob, gather entries of the axis dimension of data indexed by indices.
+
+#### Parameters
+
+| Type | Parameter | Description |
+| ----- | --------- | -------------------------------------- |
+| `int` | `axis` | Which axis to gather on. Default is 0. |
+
+#### Inputs
+
+
+
inputs[0]: ncnn.Mat
+
bottom_blobs[0]; An ncnn.Mat of input data.
+
inputs[1]: ncnn.Mat
+
bottom_blobs[1]; An 1-dim ncnn.Mat of indices on given axis.
+
+
+#### Outputs
+
+
+
outputs[0]: T
+
top_blob; The blob of ncnn.Mat which gathered by given data and indice blob.
+
+
+#### Type Constraints
+
+- ncnn.Mat: Mat(float32)
+
+### Shape
+
+#### Description
+
+Get the shape of the ncnn blobs.
+
+#### Parameters
+
+Shape has no parameters.
+
+#### Inputs
+
+
+
inputs[0]: ncnn.Mat
+
bottom_blob; An ncnn.Mat of input data.
+
+
+#### Outputs
+
+
+
outputs[0]: T
+
top_blob; 1-D ncnn.Mat of shape (bottom_blob.dims,), `bottom_blob.dims` is the input blob dimensions.
+
+
+#### Type Constraints
+
+- ncnn.Mat: Mat(float32)
+
+### TopK
+
+#### Description
+
+Get the indices and value(optional) of largest or smallest k data among the axis. This op will map to onnx op `TopK`, `ArgMax`, and `ArgMin`.
+
+#### Parameters
+
+| Type | Parameter | Description |
+| ----- | ----------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `int` | `axis` | The axis of data which topk calculate on. Default is -1, indicates the last dimension. |
+| `int` | `largest` | The binary value which indicates the TopK operator selects the largest or smallest K values. Default is 1, the TopK selects the largest K values. |
+| `int` | `sorted` | The binary value of whether returning sorted topk value or not. If not, the topk returns topk values in any order. Default is 1, this operator returns sorted topk values. |
+| `int` | `keep_dims` | The binary value of whether keep the reduced dimension or not. Default is 1, each output blob has the same dimension as input blob. |
+
+#### Inputs
+
+
+
inputs[0]: ncnn.Mat
+
bottom_blob[0]; An ncnn.Mat of input data.
+
inputs[1] (optional): ncnn.Mat
+
bottom_blob[1]; An optional ncnn.Mat. A blob of K in TopK. If this blob not exist, K is 1.
+
+
+#### Outputs
+
+
+
outputs[0]: T
+
top_blob[0]; If outputs has only 1 blob, outputs[0] is the indice blob of topk, if outputs has 2 blobs, outputs[0] is the value blob of topk. This blob is ncnn.Mat format with the shape of bottom_blob[0] or reduced shape of bottom_blob[0].
+
outputs[1]: T
+
top_blob[1] (optional); If outputs has 2 blobs, outputs[1] is the value blob of topk. This blob is ncnn.Mat format with the shape of bottom_blob[0] or reduced shape of bottom_blob[0].
+
+
+#### Type Constraints
+
+- ncnn.Mat: Mat(float32)
diff --git a/docs/zh_cn/06-custom-ops/onnxruntime.md b/docs/zh_cn/06-custom-ops/onnxruntime.md
new file mode 100644
index 0000000000..e4f0779efa
--- /dev/null
+++ b/docs/zh_cn/06-custom-ops/onnxruntime.md
@@ -0,0 +1,176 @@
+## onnxruntime 自定义算子
+
+
+
+- [ONNX Runtime Ops](#onnx-runtime-ops)
+ - [grid_sampler](#grid_sampler)
+ - [Description](#description)
+ - [Parameters](#parameters)
+ - [Inputs](#inputs)
+ - [Outputs](#outputs)
+ - [Type Constraints](#type-constraints)
+ - [MMCVModulatedDeformConv2d](#mmcvmodulateddeformconv2d)
+ - [Description](#description-1)
+ - [Parameters](#parameters-1)
+ - [Inputs](#inputs-1)
+ - [Outputs](#outputs-1)
+ - [Type Constraints](#type-constraints-1)
+- [NMSRotated](#nmsrotated)
+ - [Description](#description-2)
+ - [Parameters](#parameters-2)
+ - [Inputs](#inputs-2)
+ - [Outputs](#outputs-2)
+ - [Type Constraints](#type-constraints-2)
+ - [RoIAlignRotated](#roialignrotated)
+ - [Description](#description-3)
+ - [Parameters](#parameters-3)
+ - [Inputs](#inputs-3)
+ - [Outputs](#outputs-3)
+ - [Type Constraints](#type-constraints-3)
+
+
+
+### grid_sampler
+
+#### Description
+
+Perform sample from `input` with pixel locations from `grid`.
+
+#### Parameters
+
+| Type | Parameter | Description |
+| ----- | -------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `int` | `interpolation_mode` | Interpolation mode to calculate output values. (0: `bilinear` , 1: `nearest`) |
+| `int` | `padding_mode` | Padding mode for outside grid values. (0: `zeros`, 1: `border`, 2: `reflection`) |
+| `int` | `align_corners` | If `align_corners=1`, the extrema (`-1` and `1`) are considered as referring to the center points of the input's corner pixels. If `align_corners=0`, they are instead considered as referring to the corner points of the input's corner pixels, making the sampling more resolution agnostic. |
+
+#### Inputs
+
+
+
input: T
+
Input feature; 4-D tensor of shape (N, C, inH, inW), where N is the batch size, C is the numbers of channels, inH and inW are the height and width of the data.
+
grid: T
+
Input offset; 4-D tensor of shape (N, outH, outW, 2), where outH and outW are the height and width of offset and output.
+
+
+#### Outputs
+
+
+
output: T
+
Output feature; 4-D tensor of shape (N, C, outH, outW).
+
+
+#### Type Constraints
+
+- T:tensor(float32, Linear)
+
+### MMCVModulatedDeformConv2d
+
+#### Description
+
+Perform Modulated Deformable Convolution on input feature, read [Deformable ConvNets v2: More Deformable, Better Results](https://arxiv.org/abs/1811.11168?from=timeline) for detail.
+
+#### Parameters
+
+| Type | Parameter | Description |
+| -------------- | ------------------- | ------------------------------------------------------------------------------------- |
+| `list of ints` | `stride` | The stride of the convolving kernel. (sH, sW) |
+| `list of ints` | `padding` | Paddings on both sides of the input. (padH, padW) |
+| `list of ints` | `dilation` | The spacing between kernel elements. (dH, dW) |
+| `int` | `deformable_groups` | Groups of deformable offset. |
+| `int` | `groups` | Split input into groups. `input_channel` should be divisible by the number of groups. |
+
+#### Inputs
+
+
+
inputs[0]: T
+
Input feature; 4-D tensor of shape (N, C, inH, inW), where N is the batch size, C is the number of channels, inH and inW are the height and width of the data.
+
inputs[1]: T
+
Input offset; 4-D tensor of shape (N, deformable_group* 2* kH* kW, outH, outW), where kH and kW are the height and width of weight, outH and outW are the height and width of offset and output.
+
inputs[2]: T
+
Input mask; 4-D tensor of shape (N, deformable_group* kH* kW, outH, outW), where kH and kW are the height and width of weight, outH and outW are the height and width of offset and output.
+
inputs[3]: T
+
Input weight; 4-D tensor of shape (output_channel, input_channel, kH, kW).
+
inputs[4]: T, optional
+
Input bias; 1-D tensor of shape (output_channel).
+
+
+#### Outputs
+
+
+
outputs[0]: T
+
Output feature; 4-D tensor of shape (N, output_channel, outH, outW).
+
+
+#### Type Constraints
+
+- T:tensor(float32, Linear)
+
+### NMSRotated
+
+#### Description
+
+Non Max Suppression for rotated bboxes.
+
+#### Parameters
+
+| Type | Parameter | Description |
+| ------- | --------------- | -------------------------- |
+| `float` | `iou_threshold` | The IoU threshold for NMS. |
+
+#### Inputs
+
+
+
inputs[0]: T
+
Input feature; 2-D tensor of shape (N, 5), where N is the number of rotated bboxes, .
+
inputs[1]: T
+
Input offset; 1-D tensor of shape (N, ), where N is the number of rotated bboxes.
+
+
+#### Outputs
+
+
+
outputs[0]: T
+
Output feature; 1-D tensor of shape (K, ), where K is the number of keep bboxes.
+
+
+#### Type Constraints
+
+- T:tensor(float32, Linear)
+
+### RoIAlignRotated
+
+#### Description
+
+Perform RoIAlignRotated on output feature, used in bbox_head of most two-stage rotated object detectors.
+
+#### Parameters
+
+| Type | Parameter | Description |
+| ------- | ---------------- | ----------------------------------------------------------------------------------------------------------------------------------------- |
+| `int` | `output_height` | height of output roi |
+| `int` | `output_width` | width of output roi |
+| `float` | `spatial_scale` | used to scale the input boxes |
+| `int` | `sampling_ratio` | number of input samples to take for each output sample. `0` means to take samples densely for current models. |
+| `int` | `aligned` | If `aligned=0`, use the legacy implementation in MMDetection. Else, align the results more perfectly. |
+| `int` | `clockwise` | If True, the angle in each proposal follows a clockwise fashion in image space, otherwise, the angle is counterclockwise. Default: False. |
+
+#### Inputs
+
+
+
input: T
+
Input feature map; 4D tensor of shape (N, C, H, W), where N is the batch size, C is the numbers of channels, H and W are the height and width of the data.
+
rois: T
+
RoIs (Regions of Interest) to pool over; 2-D tensor of shape (num_rois, 6) given as [[batch_index, cx, cy, w, h, theta], ...]. The RoIs' coordinates are the coordinate system of input.
+
+
+#### Outputs
+
+
+
feat: T
+
RoI pooled output, 4-D tensor of shape (num_rois, C, output_height, output_width). The r-th batch element feat[r-1] is a pooled feature map corresponding to the r-th RoI RoIs[r-1].
+
+
+#### Type Constraints
+
+- T:tensor(float32)
diff --git a/docs/zh_cn/06-custom-ops/tensorrt.md b/docs/zh_cn/06-custom-ops/tensorrt.md
new file mode 100644
index 0000000000..3297bf7778
--- /dev/null
+++ b/docs/zh_cn/06-custom-ops/tensorrt.md
@@ -0,0 +1,407 @@
+## TRT 自定义算子
+
+
+
+- [TensorRT Ops](#tensorrt-ops)
+ - [TRTBatchedNMS](#trtbatchednms)
+ - [Description](#description)
+ - [Parameters](#parameters)
+ - [Inputs](#inputs)
+ - [Outputs](#outputs)
+ - [Type Constraints](#type-constraints)
+ - [grid_sampler](#grid_sampler)
+ - [Description](#description-1)
+ - [Parameters](#parameters-1)
+ - [Inputs](#inputs-1)
+ - [Outputs](#outputs-1)
+ - [Type Constraints](#type-constraints-1)
+ - [MMCVInstanceNormalization](#mmcvinstancenormalization)
+ - [Description](#description-2)
+ - [Parameters](#parameters-2)
+ - [Inputs](#inputs-2)
+ - [Outputs](#outputs-2)
+ - [Type Constraints](#type-constraints-2)
+ - [MMCVModulatedDeformConv2d](#mmcvmodulateddeformconv2d)
+ - [Description](#description-3)
+ - [Parameters](#parameters-3)
+ - [Inputs](#inputs-3)
+ - [Outputs](#outputs-3)
+ - [Type Constraints](#type-constraints-3)
+ - [MMCVMultiLevelRoiAlign](#mmcvmultilevelroialign)
+ - [Description](#description-4)
+ - [Parameters](#parameters-4)
+ - [Inputs](#inputs-4)
+ - [Outputs](#outputs-4)
+ - [Type Constraints](#type-constraints-4)
+ - [MMCVRoIAlign](#mmcvroialign)
+ - [Description](#description-5)
+ - [Parameters](#parameters-5)
+ - [Inputs](#inputs-5)
+ - [Outputs](#outputs-5)
+ - [Type Constraints](#type-constraints-5)
+ - [ScatterND](#scatternd)
+ - [Description](#description-6)
+ - [Parameters](#parameters-6)
+ - [Inputs](#inputs-6)
+ - [Outputs](#outputs-6)
+ - [Type Constraints](#type-constraints-6)
+ - [TRTBatchedRotatedNMS](#trtbatchedrotatednms)
+ - [Description](#description-7)
+ - [Parameters](#parameters-7)
+ - [Inputs](#inputs-7)
+ - [Outputs](#outputs-7)
+ - [Type Constraints](#type-constraints-7)
+ - [GridPriorsTRT](#gridpriorstrt)
+ - [Description](#description-8)
+ - [Parameters](#parameters-8)
+ - [Inputs](#inputs-8)
+ - [Outputs](#outputs-8)
+ - [Type Constraints](#type-constraints-8)
+
+
+
+### TRTBatchedNMS
+
+#### Description
+
+Batched NMS with a fixed number of output bounding boxes.
+
+#### Parameters
+
+| Type | Parameter | Description |
+| ------- | --------------------- | --------------------------------------------------------------------------------------------------------------------------------------- |
+| `int` | `background_label_id` | The label ID for the background class. If there is no background class, set it to `-1`. |
+| `int` | `num_classes` | The number of classes. |
+| `int` | `topK` | The number of bounding boxes to be fed into the NMS step. |
+| `int` | `keepTopK` | The number of total bounding boxes to be kept per-image after the NMS step. Should be less than or equal to the `topK` value. |
+| `float` | `scoreThreshold` | The scalar threshold for score (low scoring boxes are removed). |
+| `float` | `iouThreshold` | The scalar threshold for IoU (new boxes that have high IoU overlap with previously selected boxes are removed). |
+| `int` | `isNormalized` | Set to `false` if the box coordinates are not normalized, meaning they are not in the range `[0,1]`. Defaults to `true`. |
+| `int` | `clipBoxes` | Forcibly restrict bounding boxes to the normalized range `[0,1]`. Only applicable if `isNormalized` is also `true`. Defaults to `true`. |
+
+#### Inputs
+
+
+
inputs[0]: T
+
boxes; 4-D tensor of shape (N, num_boxes, num_classes, 4), where N is the batch size; `num_boxes` is the number of boxes; `num_classes` is the number of classes, which could be 1 if the boxes are shared between all classes.
+
inputs[1]: T
+
scores; 4-D tensor of shape (N, num_boxes, 1, num_classes).
+
+
+#### Outputs
+
+
+
outputs[0]: T
+
dets; 3-D tensor of shape (N, valid_num_boxes, 5), `valid_num_boxes` is the number of boxes after NMS. For each row `dets[i,j,:] = [x0, y0, x1, y1, score]`
+
outputs[1]: tensor(int32, Linear)
+
labels; 2-D tensor of shape (N, valid_num_boxes).
+
+
+#### Type Constraints
+
+- T:tensor(float32, Linear)
+
+### grid_sampler
+
+#### Description
+
+Perform sample from `input` with pixel locations from `grid`.
+
+#### Parameters
+
+| Type | Parameter | Description |
+| ----- | -------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `int` | `interpolation_mode` | Interpolation mode to calculate output values. (0: `bilinear` , 1: `nearest`) |
+| `int` | `padding_mode` | Padding mode for outside grid values. (0: `zeros`, 1: `border`, 2: `reflection`) |
+| `int` | `align_corners` | If `align_corners=1`, the extrema (`-1` and `1`) are considered as referring to the center points of the input's corner pixels. If `align_corners=0`, they are instead considered as referring to the corner points of the input's corner pixels, making the sampling more resolution agnostic. |
+
+#### Inputs
+
+
+
inputs[0]: T
+
Input feature; 4-D tensor of shape (N, C, inH, inW), where N is the batch size, C is the numbers of channels, inH and inW are the height and width of the data.
+
inputs[1]: T
+
Input offset; 4-D tensor of shape (N, outH, outW, 2), where outH and outW are the height and width of offset and output.
+
+
+#### Outputs
+
+
+
outputs[0]: T
+
Output feature; 4-D tensor of shape (N, C, outH, outW).
+
+
+#### Type Constraints
+
+- T:tensor(float32, Linear)
+
+### MMCVInstanceNormalization
+
+#### Description
+
+Carry out instance normalization as described in the paper https://arxiv.org/abs/1607.08022.
+
+y = scale * (x - mean) / sqrt(variance + epsilon) + B, where mean and variance are computed per instance per channel.
+
+#### Parameters
+
+| Type | Parameter | Description |
+| ------- | --------- | -------------------------------------------------------------------- |
+| `float` | `epsilon` | The epsilon value to use to avoid division by zero. Default is 1e-05 |
+
+#### Inputs
+
+
+
input: T
+
Input data tensor from the previous operator; dimensions for image case are (N x C x H x W), where N is the batch size, C is the number of channels, and H and W are the height and the width of the data. For non image case, the dimensions are in the form of (N x C x D1 x D2 ... Dn), where N is the batch size.
+
scale: T
+
The input 1-dimensional scale tensor of size C.
+
B: T
+
The input 1-dimensional bias tensor of size C.
+
+
+#### Outputs
+
+
+
output: T
+
The output tensor of the same shape as input.
+
+
+#### Type Constraints
+
+- T:tensor(float32, Linear)
+
+### MMCVModulatedDeformConv2d
+
+#### Description
+
+Perform Modulated Deformable Convolution on input feature. Read [Deformable ConvNets v2: More Deformable, Better Results](https://arxiv.org/abs/1811.11168?from=timeline) for detail.
+
+#### Parameters
+
+| Type | Parameter | Description |
+| -------------- | ------------------ | ------------------------------------------------------------------------------------- |
+| `list of ints` | `stride` | The stride of the convolving kernel. (sH, sW) |
+| `list of ints` | `padding` | Paddings on both sides of the input. (padH, padW) |
+| `list of ints` | `dilation` | The spacing between kernel elements. (dH, dW) |
+| `int` | `deformable_group` | Groups of deformable offset. |
+| `int` | `group` | Split input into groups. `input_channel` should be divisible by the number of groups. |
+
+#### Inputs
+
+
+
inputs[0]: T
+
Input feature; 4-D tensor of shape (N, C, inH, inW), where N is the batch size, C is the number of channels, inH and inW are the height and width of the data.
+
inputs[1]: T
+
Input offset; 4-D tensor of shape (N, deformable_group* 2* kH* kW, outH, outW), where kH and kW are the height and width of weight, outH and outW are the height and width of offset and output.
+
inputs[2]: T
+
Input mask; 4-D tensor of shape (N, deformable_group* kH* kW, outH, outW), where kH and kW are the height and width of weight, outH and outW are the height and width of offset and output.
+
inputs[3]: T
+
Input weight; 4-D tensor of shape (output_channel, input_channel, kH, kW).
+
inputs[4]: T, optional
+
Input weight; 1-D tensor of shape (output_channel).
+
+
+#### Outputs
+
+
+
outputs[0]: T
+
Output feature; 4-D tensor of shape (N, output_channel, outH, outW).
+
+
+#### Type Constraints
+
+- T:tensor(float32, Linear)
+
+### MMCVMultiLevelRoiAlign
+
+#### Description
+
+Perform RoIAlign on features from multiple levels. Used in bbox_head of most two-stage detectors.
+
+#### Parameters
+
+| Type | Parameter | Description |
+| ---------------- | ------------------ | ------------------------------------------------------------------------------------------------------------- |
+| `int` | `output_height` | height of output roi. |
+| `int` | `output_width` | width of output roi. |
+| `list of floats` | `featmap_strides` | feature map stride of each level. |
+| `int` | `sampling_ratio` | number of input samples to take for each output sample. `0` means to take samples densely for current models. |
+| `float` | `roi_scale_factor` | RoIs will be scaled by this factor before RoI Align. |
+| `int` | `finest_scale` | Scale threshold of mapping to level 0. Default: 56. |
+| `int` | `aligned` | If `aligned=0`, use the legacy implementation in MMDetection. Else, align the results more perfectly. |
+
+#### Inputs
+
+
inputs[0]: T
+
RoIs (Regions of Interest) to pool over; 2-D tensor of shape (num_rois, 5) given as [[batch_index, x1, y1, x2, y2], ...].
+
inputs[1~]: T
+
Input feature map; 4D tensor of shape (N, C, H, W), where N is the batch size, C is the numbers of channels, H and W are the height and width of the data.
+
+#### Outputs
+
+
+
outputs[0]: T
+
RoI pooled output, 4-D tensor of shape (num_rois, C, output_height, output_width). The r-th batch element output[0][r-1] is a pooled feature map corresponding to the r-th RoI inputs[1][r-1].
+
+
+#### Type Constraints
+
+- T:tensor(float32, Linear)
+
+### MMCVRoIAlign
+
+#### Description
+
+Perform RoIAlign on output feature, used in bbox_head of most two-stage detectors.
+
+#### Parameters
+
+| Type | Parameter | Description |
+| ------- | ---------------- | ------------------------------------------------------------------------------------------------------------- |
+| `int` | `output_height` | height of output roi |
+| `int` | `output_width` | width of output roi |
+| `float` | `spatial_scale` | used to scale the input boxes |
+| `int` | `sampling_ratio` | number of input samples to take for each output sample. `0` means to take samples densely for current models. |
+| `str` | `mode` | pooling mode in each bin. `avg` or `max` |
+| `int` | `aligned` | If `aligned=0`, use the legacy implementation in MMDetection. Else, align the results more perfectly. |
+
+#### Inputs
+
+
+
inputs[0]: T
+
Input feature map; 4D tensor of shape (N, C, H, W), where N is the batch size, C is the numbers of channels, H and W are the height and width of the data.
+
inputs[1]: T
+
RoIs (Regions of Interest) to pool over; 2-D tensor of shape (num_rois, 5) given as [[batch_index, x1, y1, x2, y2], ...]. The RoIs' coordinates are the coordinate system of inputs[0].
+
+
+#### Outputs
+
+
+
outputs[0]: T
+
RoI pooled output, 4-D tensor of shape (num_rois, C, output_height, output_width). The r-th batch element output[0][r-1] is a pooled feature map corresponding to the r-th RoI inputs[1][r-1].
+
+
+#### Type Constraints
+
+- T:tensor(float32, Linear)
+
+### ScatterND
+
+#### Description
+
+ScatterND takes three inputs `data` tensor of rank r >= 1, `indices` tensor of rank q >= 1, and `updates` tensor of rank q + r - indices.shape\[-1\] - 1. The output of the operation is produced by creating a copy of the input `data`, and then updating its value to values specified by updates at specific index positions specified by `indices`. Its output shape is the same as the shape of `data`. Note that `indices` should not have duplicate entries. That is, two or more updates for the same index-location is not supported.
+
+The `output` is calculated via the following equation:
+
+```python
+ output = np.copy(data)
+ update_indices = indices.shape[:-1]
+ for idx in np.ndindex(update_indices):
+ output[indices[idx]] = updates[idx]
+```
+
+#### Parameters
+
+None
+
+#### Inputs
+
+
+
inputs[0]: T
+
Tensor of rank r>=1.
+
+
inputs[1]: tensor(int32, Linear)
+
Tensor of rank q>=1.
+
+
inputs[2]: T
+
Tensor of rank q + r - indices_shape[-1] - 1.
+
+
+#### Outputs
+
+
+
outputs[0]: T
+
Tensor of rank r >= 1.
+
+
+#### Type Constraints
+
+- T:tensor(float32, Linear), tensor(int32, Linear)
+
+### TRTBatchedRotatedNMS
+
+#### Description
+
+Batched rotated NMS with a fixed number of output bounding boxes.
+
+#### Parameters
+
+| Type | Parameter | Description |
+| ------- | --------------------- | --------------------------------------------------------------------------------------------------------------------------------------- |
+| `int` | `background_label_id` | The label ID for the background class. If there is no background class, set it to `-1`. |
+| `int` | `num_classes` | The number of classes. |
+| `int` | `topK` | The number of bounding boxes to be fed into the NMS step. |
+| `int` | `keepTopK` | The number of total bounding boxes to be kept per-image after the NMS step. Should be less than or equal to the `topK` value. |
+| `float` | `scoreThreshold` | The scalar threshold for score (low scoring boxes are removed). |
+| `float` | `iouThreshold` | The scalar threshold for IoU (new boxes that have high IoU overlap with previously selected boxes are removed). |
+| `int` | `isNormalized` | Set to `false` if the box coordinates are not normalized, meaning they are not in the range `[0,1]`. Defaults to `true`. |
+| `int` | `clipBoxes` | Forcibly restrict bounding boxes to the normalized range `[0,1]`. Only applicable if `isNormalized` is also `true`. Defaults to `true`. |
+
+#### Inputs
+
+
+
inputs[0]: T
+
boxes; 4-D tensor of shape (N, num_boxes, num_classes, 5), where N is the batch size; `num_boxes` is the number of boxes; `num_classes` is the number of classes, which could be 1 if the boxes are shared between all classes.
+
inputs[1]: T
+
scores; 4-D tensor of shape (N, num_boxes, 1, num_classes).
+
+
+#### Outputs
+
+
+
outputs[0]: T
+
dets; 3-D tensor of shape (N, valid_num_boxes, 6), `valid_num_boxes` is the number of boxes after NMS. For each row `dets[i,j,:] = [x0, y0, width, height, theta, score]`
+
outputs[1]: tensor(int32, Linear)
+
labels; 2-D tensor of shape (N, valid_num_boxes).
+
+
+#### Type Constraints
+
+- T:tensor(float32, Linear)
+
+### GridPriorsTRT
+
+#### Description
+
+Generate the anchors for object detection task.
+
+#### Parameters
+
+| Type | Parameter | Description |
+| ----- | ---------- | --------------------------------- |
+| `int` | `stride_w` | The stride of the feature width. |
+| `int` | `stride_h` | The stride of the feature height. |
+
+#### Inputs
+
+
+
inputs[0]: T
+
The base anchors; 2-D tensor with shape [num_base_anchor, 4].
+
inputs[1]: TAny
+
height provider; 1-D tensor with shape [featmap_height]. The data will never been used.
+
inputs[2]: TAny
+
width provider; 1-D tensor with shape [featmap_width]. The data will never been used.
+
+
+#### Outputs
+
+
+
outputs[0]: T
+
output anchors; 2-D tensor of shape (num_base_anchor*featmap_height*featmap_widht, 4).
+
+
+#### Type Constraints
+
+- T:tensor(float32, Linear)
+- TAny: Any
diff --git a/docs/zh_cn/07-developer-guide/add_backend_ops_unittest.md b/docs/zh_cn/07-developer-guide/add_backend_ops_unittest.md
new file mode 100644
index 0000000000..d861a2e452
--- /dev/null
+++ b/docs/zh_cn/07-developer-guide/add_backend_ops_unittest.md
@@ -0,0 +1,86 @@
+# 为推理 ops 添加测试单元
+
+本教程介绍如何为后端 ops 添加单元测试。在 backend_ops 目录下添加自定义 op 时,需要添加相应的测试单元。op 的单元测试在 `test/test_ops/test_ops.py` 中。
+
+添加新的自定义 op 后,需要重新编译,引用 [build.md](../01-how-to-build/build_from_source.md) 。
+
+## ops 单元测试样例
+
+```python
+@pytest.mark.parametrize('backend', [TEST_TENSORRT, TEST_ONNXRT]) # 1.1 backend test class
+@pytest.mark.parametrize('pool_h,pool_w,spatial_scale,sampling_ratio', # 1.2 set parameters of op
+ [(2, 2, 1.0, 2), (4, 4, 2.0, 4)]) # [(# Examples of op test parameters),...]
+def test_roi_align(backend,
+ pool_h, # set parameters of op
+ pool_w,
+ spatial_scale,
+ sampling_ratio,
+ input_list=None,
+ save_dir=None):
+ backend.check_env()
+
+ if input_list is None:
+ input = torch.rand(1, 1, 16, 16, dtype=torch.float32) # 1.3 op input data initialization
+ single_roi = torch.tensor([[0, 0, 0, 4, 4]], dtype=torch.float32)
+ else:
+ input = torch.tensor(input_list[0], dtype=torch.float32)
+ single_roi = torch.tensor(input_list[1], dtype=torch.float32)
+
+ from mmcv.ops import roi_align
+
+ def wrapped_function(torch_input, torch_rois): # 1.4 initialize op model to be tested
+ return roi_align(torch_input, torch_rois, (pool_w, pool_h),
+ spatial_scale, sampling_ratio, 'avg', True)
+
+ wrapped_model = WrapFunction(wrapped_function).eval()
+
+ with RewriterContext(cfg={}, backend=backend.backend_name, opset=11): # 1.5 call the backend test class interface
+ backend.run_and_validate(
+ wrapped_model, [input, single_roi],
+ 'roi_align',
+ input_names=['input', 'rois'],
+ output_names=['roi_feat'],
+ save_dir=save_dir)
+```
+
+mmdeploy 支持的模型有两种格式:
+
+- torch 模型:参考 roi_align 单元测试,必须要求 op 相关 Python 代码
+- onnx 模型:参考 multi_level_roi_align 单元测试,需要调用 onnx api 进行构建
+
+调用 `run_and_validate` 即可运行
+
+```python
+ def run_and_validate(self,
+ model,
+ input_list,
+ model_name='tmp',
+ tolerate_small_mismatch=False,
+ do_constant_folding=True,
+ dynamic_axes=None,
+ output_names=None,
+ input_names=None,
+ expected_result=None,
+ save_dir=None):
+```
+
+#### Parameter Description
+
+| 参数 | 说明 |
+| :---------------------: | :-----------------------------------: |
+| model | 要测试的输入模型 |
+| input_list | 测试数据列表,映射到input_names的顺序 |
+| tolerate_small_mismatch | 是否允许验证结果出现精度误差 |
+| do_constant_folding | 是否使用常量折叠 |
+| output_names | 输出节点名字 |
+| input_names | 输入节点名字 |
+| expected_result | 期望的 ground truth |
+| save_dir | 结果保存目录 |
+
+## 测试模型
+
+用 `pytest` 调用 ops 测试
+
+```bash
+pytest tests/test_ops/test_ops.py::test_XXXX
+```
diff --git a/docs/zh_cn/04-developer-guide/partition_model.md b/docs/zh_cn/07-developer-guide/partition_model.md
similarity index 99%
rename from docs/zh_cn/04-developer-guide/partition_model.md
rename to docs/zh_cn/07-developer-guide/partition_model.md
index f1d6280fa1..a9104173ad 100644
--- a/docs/zh_cn/04-developer-guide/partition_model.md
+++ b/docs/zh_cn/07-developer-guide/partition_model.md
@@ -1,4 +1,4 @@
-# How to get partitioned ONNX models
+# 如何拆分 onnx 模型
MMDeploy 支持将PyTorch模型导出到onnx模型并进行拆分得到多个onnx模型文件,用户可以自由的对模型图节点进行标记并根据这些标记的节点定制任意的onnx模型拆分策略。在这个教程中,我们将通过具体例子来展示如何进行onnx模型拆分。在这个例子中,我们的目标是将YOLOV3模型拆分成两个部分,保留不带后处理的onnx模型,丢弃包含Anchor生成,NMS的后处理部分。
diff --git a/docs/zh_cn/04-developer-guide/do_regression_test.md b/docs/zh_cn/07-developer-guide/regression_test.md
similarity index 98%
rename from docs/zh_cn/04-developer-guide/do_regression_test.md
rename to docs/zh_cn/07-developer-guide/regression_test.md
index d1b33d0076..0153459609 100644
--- a/docs/zh_cn/04-developer-guide/do_regression_test.md
+++ b/docs/zh_cn/07-developer-guide/regression_test.md
@@ -65,8 +65,8 @@ python ./tools/regression_test.py \
- `--codebase` : 需要测试的 codebase,eg.`mmdet`, 测试多个 `mmcls mmdet ...`
- `--backends` : 筛选测试的后端, 默认测全部`backend`, 也可传入若干个后端,例如 `onnxruntime tesnsorrt`。如果需要一同进行 SDK 的测试,需要在 `tests/regression/${codebase}.yml` 里面的 `sdk_config` 进行配置。
- `--models` : 指定测试的模型, 默认测试 `yml` 中所有模型, 也可传入若干个模型名称,模型名称可参考相关yml配置文件。例如 `ResNet SE-ResNet "Mask R-CNN"`。注意的是,可传入只有字母和数字组成模型名称,例如 `resnet seresnet maskrcnn`。
-- `--work-dir` : 模型转换、报告生成的路径,默认是`../mmdeploy_regression_working_dir`,注意路径中不要不含空格等特殊字符。
-- `--checkpoint-dir`: PyTorch 模型文件下载保存路径,默认是`../mmdeploy_checkpoints`,注意路径中不要不含空格等特殊字符。
+- `--work-dir` : 模型转换、报告生成的路径,默认是`../mmdeploy_regression_working_dir`,注意路径中不要含空格等特殊字符。
+- `--checkpoint-dir`: PyTorch 模型文件下载保存路径,默认是`../mmdeploy_checkpoints`,注意路径中不要含空格等特殊字符。
- `--device` : 使用的设备,默认 `cuda`。
- `--log-level` : 设置日记的等级,选项包括`'CRITICAL', 'FATAL', 'ERROR', 'WARN', 'WARNING', 'INFO', 'DEBUG', 'NOTSET'`。默认是`INFO`。
- `-p` 或 `--performance` : 是否测试精度,加上则测试转换+精度,不加上则只测试转换
@@ -257,7 +257,6 @@ models:
- [x] ncnn
- [x] OpenVINO
- [x] TorchScript
-- [x] SNPE
- [x] MMDeploy SDK
## 6. 支持的Codebase及其Metric
diff --git a/docs/zh_cn/04-developer-guide/support_new_backend.md b/docs/zh_cn/07-developer-guide/support_new_backend.md
similarity index 98%
rename from docs/zh_cn/04-developer-guide/support_new_backend.md
rename to docs/zh_cn/07-developer-guide/support_new_backend.md
index 50ae937397..d881bfc49e 100644
--- a/docs/zh_cn/04-developer-guide/support_new_backend.md
+++ b/docs/zh_cn/07-developer-guide/support_new_backend.md
@@ -151,7 +151,7 @@ MMDeploy 中的后端必须支持 ONNX,因此后端能直接加载“.onnx”
# ...
```
-6. 将 OpenMMLab 的模型转换后(如有必要)并在后端引擎上进行推理。如果在测试时发现一些不兼容的算子,可以尝试按照[重写器教程](../04-developer-guide/support_new_model.md)为后端重写原始模型或添加自定义算子。
+6. 将 OpenMMLab 的模型转换后(如有必要)并在后端引擎上进行推理。如果在测试时发现一些不兼容的算子,可以尝试按照[重写器教程](support_new_model.md)为后端重写原始模型或添加自定义算子。
7. 为新后端引擎代码添加相关注释和单元测试:).
diff --git a/docs/zh_cn/04-developer-guide/support_new_model.md b/docs/zh_cn/07-developer-guide/support_new_model.md
similarity index 100%
rename from docs/zh_cn/04-developer-guide/support_new_model.md
rename to docs/zh_cn/07-developer-guide/support_new_model.md
diff --git a/docs/zh_cn/07-developer-guide/test_rewritten_models.md b/docs/zh_cn/07-developer-guide/test_rewritten_models.md
new file mode 100644
index 0000000000..0ae0111de4
--- /dev/null
+++ b/docs/zh_cn/07-developer-guide/test_rewritten_models.md
@@ -0,0 +1,126 @@
+# 测试模型重写
+
+模型 [rewriter](support_new_model.md) 完成后,还需完成对应测试用例,以验证重写是否生效。通常我们需要对比原始模型和重写后的输出。原始模型输出可以调用模型的 forward 函数直接获取,而生成重写模型输出的方法取决于重写的复杂性。
+
+## 测试简单的重写
+
+如果对模型的更改很小(例如,仅更改一个或两个变量且无副作用),则可为重写函数/模块构造输入,在`RewriteContext`中运行推理并检查结果。
+
+```python
+# mmcls.models.classfiers.base.py
+class BaseClassifier(BaseModule, metaclass=ABCMeta):
+ def forward(self, img, return_loss=True, **kwargs):
+ if return_loss:
+ return self.forward_train(img, **kwargs)
+ else:
+ return self.forward_test(img, **kwargs)
+
+# Custom rewritten function
+@FUNCTION_REWRITER.register_rewriter(
+ 'mmcls.models.classifiers.BaseClassifier.forward', backend='default')
+def forward_of_base_classifier(ctx, self, img, *args, **kwargs):
+ """Rewrite `forward` for default backend."""
+ return self.simple_test(img, {})
+```
+
+在示例中,我们仅更改 forward 函数。我们可以通过编写以下函数来测试这个重写:
+
+```python
+def test_baseclassfier_forward():
+ input = torch.rand(1)
+ from mmcls.models.classifiers import BaseClassifier
+ class DummyClassifier(BaseClassifier):
+
+ def __init__(self, init_cfg=None):
+ super().__init__(init_cfg=init_cfg)
+
+ def extract_feat(self, imgs):
+ pass
+
+ def forward_train(self, imgs):
+ return 'train'
+
+ def simple_test(self, img, tmp, **kwargs):
+ return 'simple_test'
+
+ model = DummyClassifier().eval()
+
+ model_output = model(input)
+ with RewriterContext(cfg=dict()), torch.no_grad():
+ backend_output = model(input)
+
+ assert model_output == 'train'
+ assert backend_output == 'simple_test'
+```
+
+在这个测试函数中,我们构造派生类 `BaseClassifier` 来测试重写能否工作。通过直接调用`model(input)`来获得原始输出,并通过在`RewriteContext`中调用`model(input)`来获取重写的输出。最后断检查输出。
+
+## 测试复杂重写
+
+有时我们可能会对原始模型函数进行重大更改(例如,消除分支语句以生成正确的计算图)。即使运行在Python中的重写模型的输出是正确的,我们也不能保证重写的模型可以在后端按预期工作。因此,我们需要在后端测试重写的模型。
+
+```python
+# Custom rewritten function
+@FUNCTION_REWRITER.register_rewriter(
+ func_name='mmseg.models.segmentors.BaseSegmentor.forward')
+def base_segmentor__forward(ctx, self, img, img_metas=None, **kwargs):
+ if img_metas is None:
+ img_metas = {}
+ assert isinstance(img_metas, dict)
+ assert isinstance(img, torch.Tensor)
+
+ deploy_cfg = ctx.cfg
+ is_dynamic_flag = is_dynamic_shape(deploy_cfg)
+ img_shape = img.shape[2:]
+ if not is_dynamic_flag:
+ img_shape = [int(val) for val in img_shape]
+ img_metas['img_shape'] = img_shape
+ return self.simple_test(img, img_metas, **kwargs)
+
+```
+
+此重写函数的行为很复杂,我们应该按如下方式测试它:
+
+```python
+def test_basesegmentor_forward():
+ from mmdeploy.utils.test import (WrapModel, get_model_outputs,
+ get_rewrite_outputs)
+
+ segmentor = get_model()
+ segmentor.cpu().eval()
+
+ # Prepare data
+ # ...
+
+ # Get the outputs of original model
+ model_inputs = {
+ 'img': [imgs],
+ 'img_metas': [img_metas],
+ 'return_loss': False
+ }
+ model_outputs = get_model_outputs(segmentor, 'forward', model_inputs)
+
+ # Get the outputs of rewritten model
+ wrapped_model = WrapModel(segmentor, 'forward', img_metas = None, return_loss = False)
+ rewrite_inputs = {'img': imgs}
+ rewrite_outputs, is_backend_output = get_rewrite_outputs(
+ wrapped_model=wrapped_model,
+ model_inputs=rewrite_inputs,
+ deploy_cfg=deploy_cfg)
+ if is_backend_output:
+ # If the backend plugins have been installed, the rewrite outputs are
+ # generated by backend.
+ rewrite_outputs = torch.tensor(rewrite_outputs)
+ model_outputs = torch.tensor(model_outputs)
+ model_outputs = model_outputs.unsqueeze(0).unsqueeze(0)
+ assert torch.allclose(rewrite_outputs, model_outputs)
+ else:
+ # Otherwise, the outputs are generated by python.
+ assert rewrite_outputs is not None
+```
+
+我们已经提供了一些使用函数做测试,例如可以先 build 模型,用 `get_model_outputs` 获取原始输出;然后用`WrapModel` 包装重写函数,使用`get_rewrite_outputs` 获取结果。这个例子里会返回输出内容和是否来自后端两个结果。
+
+因为我们也不确定用户是否正确安装后端,所以得检查结果来自 Python 还是真实后端推理结果。单元测试必须涵盖这两种结果,最后用`torch.allclose` 对比两种结果的差异。
+
+API 文档中有测试用例完整用法。
diff --git a/docs/zh_cn/_static/image/quant_model.png b/docs/zh_cn/_static/image/quant_model.png
new file mode 100644
index 0000000000..c2462ab13f
Binary files /dev/null and b/docs/zh_cn/_static/image/quant_model.png differ
diff --git a/docs/zh_cn/experimental/onnx_optimizer.md b/docs/zh_cn/experimental/onnx_optimizer.md
new file mode 100644
index 0000000000..a40939d183
--- /dev/null
+++ b/docs/zh_cn/experimental/onnx_optimizer.md
@@ -0,0 +1,50 @@
+# ONNX export Optimizer
+
+This is a tool to optimize ONNX model when exporting from PyTorch.
+
+## Installation
+
+Build MMDeploy with `torchscript` support:
+
+```shell
+export Torch_DIR=$(python -c "import torch;print(torch.utils.cmake_prefix_path + '/Torch')")
+
+cmake \
+ -DTorch_DIR=${Torch_DIR} \
+ -DMMDEPLOY_TARGET_BACKENDS="${your_backend};torchscript" \
+ .. # You can also add other build flags if you need
+
+cmake --build . -- -j$(nproc) && cmake --install .
+```
+
+## Usage
+
+```python
+# import model_to_graph_custom_optimizer so we can hijack onnx.export
+from mmdeploy.apis.onnx.optimizer import model_to_graph__custom_optimizer # noqa
+from mmdeploy.core import RewriterContext
+from mmdeploy.apis.onnx.passes import optimize_onnx
+
+# load you model here
+model = create_model()
+
+# export with ONNX Optimizer
+x = create_dummy_input()
+with RewriterContext({}, onnx_custom_passes=optimize_onnx):
+ torch.onnx.export(model, x, output_path)
+```
+
+The model would be optimized after export.
+
+You can also define your own optimizer:
+
+```python
+# create the optimize callback
+def _optimize_onnx(graph, params_dict, torch_out):
+ from mmdeploy.backend.torchscript import ts_optimizer
+ ts_optimizer.onnx._jit_pass_onnx_peephole(graph)
+ return graph, params_dict, torch_out
+
+with RewriterContext({}, onnx_custom_passes=_optimize_onnx):
+ # export your model
+```
diff --git a/docs/zh_cn/get_started.md b/docs/zh_cn/get_started.md
index c0e2e84d88..45fd897293 100644
--- a/docs/zh_cn/get_started.md
+++ b/docs/zh_cn/get_started.md
@@ -104,7 +104,7 @@ mim install mmcv-full